First, load library(tidyverse), and read in the data

library(tidyverse)
## ── Attaching packages ───────────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.0     ✓ purrr   0.3.3
## ✓ tibble  2.1.3     ✓ dplyr   0.8.5
## ✓ tidyr   1.0.2     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0
## ── Conflicts ──────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
# tb_raw <- read_csv("data/TB_notifications_2018-03-18.csv")
tb_raw <- read_csv(
  here::here("data/TB_notifications_2018-03-18.csv"))
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   country = col_character(),
##   iso2 = col_character(),
##   iso3 = col_character(),
##   iso_numeric = col_character(),
##   g_whoregion = col_character(),
##   new_sn_sexunk04 = col_logical(),
##   new_sn_sexunk514 = col_logical(),
##   new_sn_sexunk014 = col_logical(),
##   new_sn_sexunk15plus = col_logical(),
##   new_ep_m04 = col_logical(),
##   new_ep_sexunkageunk = col_logical(),
##   rdxsurvey_newinc = col_logical(),
##   rdxsurvey_newinc_rdx = col_logical()
## )
## See spec(...) for full column specifications.
## Warning: 103 parsing failures.
##  row                 col           expected actual                                                  file
## 1186 new_sn_sexunk04     1/0/T/F/TRUE/FALSE    22  '/cloud/project/data/TB_notifications_2018-03-18.csv'
## 1186 new_sn_sexunk514    1/0/T/F/TRUE/FALSE    33  '/cloud/project/data/TB_notifications_2018-03-18.csv'
## 1186 new_sn_sexunk014    1/0/T/F/TRUE/FALSE    55  '/cloud/project/data/TB_notifications_2018-03-18.csv'
## 1186 new_sn_sexunk15plus 1/0/T/F/TRUE/FALSE    632 '/cloud/project/data/TB_notifications_2018-03-18.csv'
## 1187 new_sn_sexunk04     1/0/T/F/TRUE/FALSE    23  '/cloud/project/data/TB_notifications_2018-03-18.csv'
## .... ................... .................. ...... .....................................................
## See problems(...) for more details.
tb_raw
## # A tibble: 7,891 x 175
##    country iso2  iso3  iso_numeric g_whoregion  year new_sp new_sn new_su new_ep
##    <chr>   <chr> <chr> <chr>       <chr>       <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
##  1 Afghan… AF    AFG   004         EMR          1980     NA     NA     NA     NA
##  2 Afghan… AF    AFG   004         EMR          1981     NA     NA     NA     NA
##  3 Afghan… AF    AFG   004         EMR          1982     NA     NA     NA     NA
##  4 Afghan… AF    AFG   004         EMR          1983     NA     NA     NA     NA
##  5 Afghan… AF    AFG   004         EMR          1984     NA     NA     NA     NA
##  6 Afghan… AF    AFG   004         EMR          1985     NA     NA     NA     NA
##  7 Afghan… AF    AFG   004         EMR          1986     NA     NA     NA     NA
##  8 Afghan… AF    AFG   004         EMR          1987     NA     NA     NA     NA
##  9 Afghan… AF    AFG   004         EMR          1988     NA     NA     NA     NA
## 10 Afghan… AF    AFG   004         EMR          1989     NA     NA     NA     NA
## # … with 7,881 more rows, and 165 more variables: new_oth <dbl>, ret_rel <dbl>,
## #   ret_taf <dbl>, ret_tad <dbl>, ret_oth <dbl>, newret_oth <dbl>,
## #   new_labconf <dbl>, new_clindx <dbl>, ret_rel_labconf <dbl>,
## #   ret_rel_clindx <dbl>, ret_rel_ep <dbl>, ret_nrel <dbl>,
## #   notif_foreign <dbl>, c_newinc <dbl>, new_sp_m04 <dbl>, new_sp_m514 <dbl>,
## #   new_sp_m014 <dbl>, new_sp_m1524 <dbl>, new_sp_m2534 <dbl>,
## #   new_sp_m3544 <dbl>, new_sp_m4554 <dbl>, new_sp_m5564 <dbl>,
## #   new_sp_m65 <dbl>, new_sp_mu <dbl>, new_sp_f04 <dbl>, new_sp_f514 <dbl>,
## #   new_sp_f014 <dbl>, new_sp_f1524 <dbl>, new_sp_f2534 <dbl>,
## #   new_sp_f3544 <dbl>, new_sp_f4554 <dbl>, new_sp_f5564 <dbl>,
## #   new_sp_f65 <dbl>, new_sp_fu <dbl>, new_sn_m04 <dbl>, new_sn_m514 <dbl>,
## #   new_sn_m014 <dbl>, new_sn_m1524 <dbl>, new_sn_m2534 <dbl>,
## #   new_sn_m3544 <dbl>, new_sn_m4554 <dbl>, new_sn_m5564 <dbl>,
## #   new_sn_m65 <dbl>, new_sn_m15plus <dbl>, new_sn_mu <dbl>, new_sn_f04 <dbl>,
## #   new_sn_f514 <dbl>, new_sn_f014 <dbl>, new_sn_f1524 <dbl>,
## #   new_sn_f2534 <dbl>, new_sn_f3544 <dbl>, new_sn_f4554 <dbl>,
## #   new_sn_f5564 <dbl>, new_sn_f65 <dbl>, new_sn_f15plus <dbl>,
## #   new_sn_fu <dbl>, new_sn_sexunk04 <lgl>, new_sn_sexunk514 <lgl>,
## #   new_sn_sexunk014 <lgl>, new_sn_sexunk15plus <lgl>, new_ep_m04 <lgl>,
## #   new_ep_m514 <dbl>, new_ep_m014 <dbl>, new_ep_m1524 <dbl>,
## #   new_ep_m2534 <dbl>, new_ep_m3544 <dbl>, new_ep_m4554 <dbl>,
## #   new_ep_m5564 <dbl>, new_ep_m65 <dbl>, new_ep_m15plus <dbl>,
## #   new_ep_mu <dbl>, new_ep_f04 <dbl>, new_ep_f514 <dbl>, new_ep_f014 <dbl>,
## #   new_ep_f1524 <dbl>, new_ep_f2534 <dbl>, new_ep_f3544 <dbl>,
## #   new_ep_f4554 <dbl>, new_ep_f5564 <dbl>, new_ep_f65 <dbl>,
## #   new_ep_f15plus <dbl>, new_ep_fu <dbl>, new_ep_sexunk04 <dbl>,
## #   new_ep_sexunk514 <dbl>, new_ep_sexunk014 <dbl>, new_ep_sexunk15plus <dbl>,
## #   new_ep_sexunkageunk <lgl>, rel_in_agesex_flg <dbl>, newrel_m04 <dbl>,
## #   newrel_m514 <dbl>, newrel_m014 <dbl>, newrel_m1524 <dbl>,
## #   newrel_m2534 <dbl>, newrel_m3544 <dbl>, newrel_m4554 <dbl>,
## #   newrel_m5564 <dbl>, newrel_m65 <dbl>, newrel_m15plus <dbl>,
## #   newrel_mu <dbl>, newrel_f04 <dbl>, …
tb_short <-  tb_raw %>%
  select(country, # select variables country and year
         year, 
         starts_with("new_sp_")) %>%
  filter(year > 1996, 
         year < 2012) # before 2012

tb_short
## # A tibble: 3,202 x 22
##    country  year new_sp_m04 new_sp_m514 new_sp_m014 new_sp_m1524 new_sp_m2534
##    <chr>   <dbl>      <dbl>       <dbl>       <dbl>        <dbl>        <dbl>
##  1 Afghan…  1997         NA          NA           0           10            6
##  2 Afghan…  1998         NA          NA          30          129          128
##  3 Afghan…  1999         NA          NA           8           55           55
##  4 Afghan…  2000         NA          NA          52          228          183
##  5 Afghan…  2001         NA          NA         129          379          349
##  6 Afghan…  2002         NA          NA          90          476          481
##  7 Afghan…  2003         NA          NA         127          511          436
##  8 Afghan…  2004         NA          NA         139          537          568
##  9 Afghan…  2005         NA          NA         151          606          560
## 10 Afghan…  2006         NA          NA         193          837          791
## # … with 3,192 more rows, and 15 more variables: new_sp_m3544 <dbl>,
## #   new_sp_m4554 <dbl>, new_sp_m5564 <dbl>, new_sp_m65 <dbl>, new_sp_mu <dbl>,
## #   new_sp_f04 <dbl>, new_sp_f514 <dbl>, new_sp_f014 <dbl>, new_sp_f1524 <dbl>,
## #   new_sp_f2534 <dbl>, new_sp_f3544 <dbl>, new_sp_f4554 <dbl>,
## #   new_sp_f5564 <dbl>, new_sp_f65 <dbl>, new_sp_fu <dbl>
tb_short %>% top_n(20) # place 20 inside top_n
## Selecting by new_sp_fu
## # A tibble: 26 x 22
##    country  year new_sp_m04 new_sp_m514 new_sp_m014 new_sp_m1524 new_sp_m2534
##    <chr>   <dbl>      <dbl>       <dbl>       <dbl>        <dbl>        <dbl>
##  1 Argent…  2008         11          58          69          633          611
##  2 Argent…  2009          8          36          44          546          483
##  3 Argent…  2011         50          93         143          664          657
##  4 Bosnia…  2006          0           0           0           40           58
##  5 Brazil   2010        130         168         298         4405         6381
##  6 Centra…  2010         23          55          78          379          633
##  7 Centra…  2011         14          56          70          362          576
##  8 Egypt    2008          0          13          13          581          640
##  9 Greece   2007          0           1           1           21           22
## 10 Italy    2005          7           1           8           93          191
## # … with 16 more rows, and 15 more variables: new_sp_m3544 <dbl>,
## #   new_sp_m4554 <dbl>, new_sp_m5564 <dbl>, new_sp_m65 <dbl>, new_sp_mu <dbl>,
## #   new_sp_f04 <dbl>, new_sp_f514 <dbl>, new_sp_f014 <dbl>, new_sp_f1524 <dbl>,
## #   new_sp_f2534 <dbl>, new_sp_f3544 <dbl>, new_sp_f4554 <dbl>,
## #   new_sp_f5564 <dbl>, new_sp_f65 <dbl>, new_sp_fu <dbl>

pivot_longer the data into columns into columns named stuff and count

tb_longer <- tb_short %>% 
  pivot_longer(cols = starts_with("new_sp_"),
               names_to = "stuff",
               values_to = "count")

tb_longer
## # A tibble: 64,040 x 4
##    country      year stuff        count
##    <chr>       <dbl> <chr>        <dbl>
##  1 Afghanistan  1997 new_sp_m04      NA
##  2 Afghanistan  1997 new_sp_m514     NA
##  3 Afghanistan  1997 new_sp_m014      0
##  4 Afghanistan  1997 new_sp_m1524    10
##  5 Afghanistan  1997 new_sp_m2534     6
##  6 Afghanistan  1997 new_sp_m3544     3
##  7 Afghanistan  1997 new_sp_m4554     5
##  8 Afghanistan  1997 new_sp_m5564     2
##  9 Afghanistan  1997 new_sp_m65       0
## 10 Afghanistan  1997 new_sp_mu       NA
## # … with 64,030 more rows
tb_sep_step1 <- tb_longer %>%
  separate(col = stuff, 
           into = c("stuff1", "stuff2", "genderage"),
           sep = "_")

tb_sep_step1
## # A tibble: 64,040 x 6
##    country      year stuff1 stuff2 genderage count
##    <chr>       <dbl> <chr>  <chr>  <chr>     <dbl>
##  1 Afghanistan  1997 new    sp     m04          NA
##  2 Afghanistan  1997 new    sp     m514         NA
##  3 Afghanistan  1997 new    sp     m014          0
##  4 Afghanistan  1997 new    sp     m1524        10
##  5 Afghanistan  1997 new    sp     m2534         6
##  6 Afghanistan  1997 new    sp     m3544         3
##  7 Afghanistan  1997 new    sp     m4554         5
##  8 Afghanistan  1997 new    sp     m5564         2
##  9 Afghanistan  1997 new    sp     m65           0
## 10 Afghanistan  1997 new    sp     mu           NA
## # … with 64,030 more rows
tb_sep_step2 <- tb_sep_step1 %>%
  separate(col = genderage, 
           into = c("gender", "age"), 
           sep = 1)

tb_sep_step2
## # A tibble: 64,040 x 7
##    country      year stuff1 stuff2 gender age   count
##    <chr>       <dbl> <chr>  <chr>  <chr>  <chr> <dbl>
##  1 Afghanistan  1997 new    sp     m      04       NA
##  2 Afghanistan  1997 new    sp     m      514      NA
##  3 Afghanistan  1997 new    sp     m      014       0
##  4 Afghanistan  1997 new    sp     m      1524     10
##  5 Afghanistan  1997 new    sp     m      2534      6
##  6 Afghanistan  1997 new    sp     m      3544      3
##  7 Afghanistan  1997 new    sp     m      4554      5
##  8 Afghanistan  1997 new    sp     m      5564      2
##  9 Afghanistan  1997 new    sp     m      65        0
## 10 Afghanistan  1997 new    sp     m      u        NA
## # … with 64,030 more rows

Drop the columns named stuff1 and stuff2

tb_tidy <- tb_sep_step2 %>%
  select(- stuff1, 
         - stuff2) # drop stuff1 and stuff2

tb_tidy
## # A tibble: 64,040 x 5
##    country      year gender age   count
##    <chr>       <dbl> <chr>  <chr> <dbl>
##  1 Afghanistan  1997 m      04       NA
##  2 Afghanistan  1997 m      514      NA
##  3 Afghanistan  1997 m      014       0
##  4 Afghanistan  1997 m      1524     10
##  5 Afghanistan  1997 m      2534      6
##  6 Afghanistan  1997 m      3544      3
##  7 Afghanistan  1997 m      4554      5
##  8 Afghanistan  1997 m      5564      2
##  9 Afghanistan  1997 m      65        0
## 10 Afghanistan  1997 m      u        NA
## # … with 64,030 more rows