library(tidyverse)
## ── Attaching packages ───────────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.0     ✓ purrr   0.3.3
## ✓ tibble  2.1.3     ✓ dplyr   0.8.5
## ✓ tidyr   1.0.2     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0
## ── Conflicts ──────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
grad <- read_csv("data/graduate-programs.csv")
## Parsed with column specification:
## cols(
##   subject = col_character(),
##   Inst = col_character(),
##   AvNumPubs = col_double(),
##   AvNumCits = col_double(),
##   PctFacGrants = col_double(),
##   PctCompletion = col_double(),
##   MedianTimetoDegree = col_double(),
##   PctMinorityFac = col_double(),
##   PctFemaleFac = col_double(),
##   PctFemaleStud = col_double(),
##   PctIntlStud = col_double(),
##   AvNumPhDs = col_double(),
##   AvGREs = col_double(),
##   TotFac = col_double(),
##   PctAsstProf = col_double(),
##   NumStud = col_double()
## )
# grad <- read_csv("exercises/2a/data/graduate-programs.csv")
grad
## # A tibble: 412 x 16
##    subject Inst  AvNumPubs AvNumCits PctFacGrants PctCompletion MedianTimetoDeg…
##    <chr>   <chr>     <dbl>     <dbl>        <dbl>         <dbl>            <dbl>
##  1 econom… ARIZ…      0.9       1.57         31.3          31.7             5.6 
##  2 econom… AUBU…      0.79      0.64         77.6          44.4             3.84
##  3 econom… BOST…      0.51      1.03         43.5          46.8             5   
##  4 econom… BOST…      0.49      2.66         36.9          34.2             5.5 
##  5 econom… BRAN…      0.3       3.03         36.8          48.7             5.29
##  6 econom… BROW…      0.84      2.31         27.1          54.6             6   
##  7 econom… CALI…      0.99      2.31         56.4          83.3             4   
##  8 econom… CARN…      0.43      1.67         35.2          45.6             5.05
##  9 econom… CITY…      0.35      1.06         38.1          27.9             5.2 
## 10 econom… CLAR…      0.47      0.7          24.7          37.7             5.17
## # … with 402 more rows, and 9 more variables: PctMinorityFac <dbl>,
## #   PctFemaleFac <dbl>, PctFemaleStud <dbl>, PctIntlStud <dbl>,
## #   AvNumPhDs <dbl>, AvGREs <dbl>, TotFac <dbl>, PctAsstProf <dbl>,
## #   NumStud <dbl>

What is the average number of graduate students per economics program?

grad %>% 
  # first we filter to only look at "economics
  ## filter(subject == ___) %>% 
  filter(subject == "economics") %>%
  summarise(mean = mean(NumStud))
## # A tibble: 1 x 1
##    mean
##   <dbl>
## 1  60.7

What is the best description of the relationship between number of students and median time to degree?

grad %>%
  filter(subject == "economics") %>%
  ggplot(aes(x = NumStud, 
             y = MedianTimetoDegree)) +
  geom_point() + 
  theme(aspect.ratio = 1)