First we read in the data from the excel sheet using read_xls

Here are are telling it:

library(readxl)
library(tidyverse)
## ── Attaching packages ───────────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.0     ✓ purrr   0.3.3
## ✓ tibble  2.1.3     ✓ dplyr   0.8.5
## ✓ tidyr   1.0.2     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0
## ── Conflicts ──────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
passengers <- read_xls(
  path = here::here("data/WebAirport_FY_1986-2017.xls"),
  sheet = 3, 
  skip = 6,
  # this gives us much easier names to work with
  .name_repair = janitor::make_clean_names
  )

passengers
## # A tibble: 3,479 x 14
##    x     airport year  rank  inbound outbound  total inbound_2 outbound_2
##    <chr> <chr>   <chr> <chr>   <dbl>    <dbl>  <dbl>     <dbl>      <dbl>
##  1 TOTA… TOTAL … 1985… -      1.45e7 14543928 2.91e7   2767413    2656964
##  2 TOTA… TOTAL … 1986… -      1.50e7 15001877 3.00e7   3154920    3040061
##  3 TOTA… TOTAL … 1987… -      1.62e7 16181355 3.24e7   3692976    3518767
##  4 TOTA… TOTAL … 1988… -      1.65e7 16497826 3.30e7   4022634    3907954
##  5 TOTA… TOTAL … 1989… -      1.19e7 11861644 2.37e7   4175315    4077454
##  6 TOTA… TOTAL … 1990… -      1.65e7 16536767 3.31e7   4257165    4167346
##  7 TOTA… TOTAL … 1991… -      2.05e7 20525193 4.11e7   4539042    4503847
##  8 TOTA… TOTAL … 1992… -      2.09e7 20923187 4.18e7   4902693    4856372
##  9 TOTA… TOTAL … 1993… -      2.27e7 22668912 4.53e7   5340017    5281959
## 10 TOTA… TOTAL … 1994… -      2.51e7 25091227 5.02e7   5830311    5735442
## # … with 3,469 more rows, and 5 more variables: total_2 <dbl>, inbound_3 <dbl>,
## #   outbound_3 <dbl>, total_3 <dbl>, x_2 <chr>

Tidy the data, to produce a data set with these columns - airport: all of the airports. - year - type_of_flight: DOMESTIC, INTERNATIONAL - bound: IN or OUT

tidy_passengers <- passengers %>%
  select(-x, 
         -x_2, 
         -rank, 
         -starts_with("total")) %>%
  rename(inbound_dom = inbound,
         inbound_intl = inbound_2,
         inbound_totl = inbound_3,
         outbound_dom = outbound,
         outbound_intl = outbound_2,
         outbound_totl =outbound_3) %>%
    pivot_longer(cols = contains("bound"),
                 names_to = "where",
                 values_to = "amount") %>% 
  separate(col = where,
           into = c("bound", "type")) %>% 
  filter(type != "totl")

tidy_passengers
## # A tibble: 13,916 x 5
##    airport         year    bound    type    amount
##    <chr>           <chr>   <chr>    <chr>    <dbl>
##  1 TOTAL AUSTRALIA 1985-86 inbound  dom   14543928
##  2 TOTAL AUSTRALIA 1985-86 outbound dom   14543928
##  3 TOTAL AUSTRALIA 1985-86 inbound  intl   2767413
##  4 TOTAL AUSTRALIA 1985-86 outbound intl   2656964
##  5 TOTAL AUSTRALIA 1986-87 inbound  dom   15001877
##  6 TOTAL AUSTRALIA 1986-87 outbound dom   15001877
##  7 TOTAL AUSTRALIA 1986-87 inbound  intl   3154920
##  8 TOTAL AUSTRALIA 1986-87 outbound intl   3040061
##  9 TOTAL AUSTRALIA 1987-88 inbound  dom   16181355
## 10 TOTAL AUSTRALIA 1987-88 outbound dom   16181355
## # … with 13,906 more rows
  # gather(key = where, 
  #        value = amount, 
  #        starts_with("IN"), 
  #        starts_with("OUT"))