Note that the pnwflights14 package includes several other data sets that may be of use and/or interest: mappings from two-letter airline codes to airline names, weather conditions, etc.
data("flights", package = "pnwflights14")
data("weather", package = "pnwflights14")
data("airports", package = "pnwflights14")
data("airlines", package = "pnwflights14")
Convert month numbers to month names. Convert airport symbols to airport names. Get airline names from its symbol.
# get airline name
flights <- flights %>%
left_join(airlines, by = 'carrier') %>%
rename(airline = name)
# Convert month integers to month names
flights$month[flights$month == 1] <- "January"
flights$month[flights$month == 2] <- "February"
flights$month[flights$month == 3] <- "March"
flights$month[flights$month == 4] <- "April"
flights$month[flights$month == 5] <- "May"
flights$month[flights$month == 6] <- "June"
flights$month[flights$month == 7] <- "July"
flights$month[flights$month == 8] <- "August"
flights$month[flights$month == 9] <- "September"
flights$month[flights$month == 10] <- "October"
flights$month[flights$month == 11] <- "November"
flights$month[flights$month == 12] <- "December"
flights$month <- factor(flights$month, levels = c("January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"))
# get airport names
flights <- flights %>%
left_join(airports[,c('faa', 'name')], by = c("dest" = "faa")) %>%
rename(dest_airport = name) %>%
left_join(airports[,c('faa', 'name')], by = c("origin" = "faa")) %>%
rename(origin_airport = name)
What cities have the most service from Portland (defined however you like, but do make sure to define it clearly!), and which have the worst?
# group by destination and date (year + month + day combo)
dest_flights <- flights %>%
filter(origin == "PDX") %>%
group_by(dest_airport, year, month, day) %>%
summarize(Num_depart_flights = n()) %>%
group_by(dest_airport) %>%
summarize(mean_flights_per_day = round(mean(Num_depart_flights), 2)) %>%
arrange(desc(mean_flights_per_day))
## `summarise()` has grouped output by 'dest_airport', 'year', 'month'. You can
## override using the `.groups` argument.
top_dest_flights <- dest_flights %>% slice_max(mean_flights_per_day, n = 10)
bottom_dest_flights <- dest_flights %>% slice_min(mean_flights_per_day, n = 10, with_ties = FALSE)
table1 <- rbind(top_dest_flights, bottom_dest_flights)
table1 %>%
kable(caption = "Most and Least Popular Destinations from PDX Airport",
col.names = c("Destinations", "Mean Flights per Day")) %>%
kable_styling(full_width = F) %>%
row_spec(row = 1:10, background = 'lightgreen') %>%
row_spec(row = 11:20, background = 'pink')
| Destinations | Mean Flights per Day |
|---|---|
| San Francisco Intl | 14.19 |
| Denver Intl | 10.79 |
| Phoenix Sky Harbor Intl | 9.78 |
| Los Angeles Intl | 8.22 |
| Chicago Ohare Intl | 6.85 |
| Mc Carran Intl | 6.80 |
| Salt Lake City Intl | 6.75 |
| Norman Y Mineta San Jose Intl | 6.49 |
| Seattle Tacoma Intl | 6.27 |
| Metropolitan Oakland Intl | 5.33 |
| Albuquerque International Sunport | 1.00 |
| Austin Bergstrom Intl | 1.00 |
| Baltimore Washington Intl | 1.00 |
| Boise Air Terminal | 1.00 |
| Detroit Metro Wayne Co | 1.00 |
| Fairbanks Intl | 1.00 |
| Kansas City Intl | 1.00 |
| Klamath Falls Airport | 1.00 |
| Kona Intl At Keahole | 1.00 |
| Lambert St Louis Intl | 1.00 |
Which airlines had the best and worst track records of on-time departures in each month? Is it different between PDX and SEA?
# wrangle data summary
delays <- flights %>%
filter(origin == "PDX" | origin == "SEA") %>%
group_by(airline, origin_airport, year, month) %>%
summarize(mean_delay = mean(dep_delay, na.rm = TRUE), .groups = 'drop')
monthly_delay_summary <- delays %>%
group_by(year, month, origin_airport) %>%
summarise(
max_carrier = airline[which.max(mean_delay)],
min_carrier = airline[which.min(mean_delay)],
.groups = 'drop'
)
# format table for gt()
pdx_delays <- monthly_delay_summary %>%
filter(origin_airport == "Portland Intl") %>%
select(!origin_airport)
sea_delays <- monthly_delay_summary %>%
filter(origin_airport == "Seattle Tacoma Intl") %>%
select(!origin_airport)
delay_table <- pdx_delays %>%
left_join(sea_delays,
by = c('year', 'month'),
suffix = c("PDX", "SEA"))
delay_table %>%
gt() %>%
tab_header(title = "Airlines with the Best and Worst Delay Times each Month, Portland and Seattle Airports",
subtitle = "As measured by monthly mean delay time") %>%
tab_spanner(label = "PDX",
columns = c(max_carrierPDX, min_carrierPDX)) %>%
tab_spanner(label = "SEA",
columns = c(max_carrierSEA, min_carrierSEA)) %>%
cols_label(year = "Year",
month = "Month",
max_carrierPDX = "Longest Delays",
min_carrierPDX = "Shortest Delays",
max_carrierSEA = "Longest Delays",
min_carrierSEA = "Shortest Delays")
| Airlines with the Best and Worst Delay Times each Month, Portland and Seattle Airports | |||||
| As measured by monthly mean delay time | |||||
| Year | Month |
PDX
|
SEA
|
||
|---|---|---|---|---|---|
| Longest Delays | Shortest Delays | Longest Delays | Shortest Delays | ||
| 2014 | January | Frontier Airlines Inc. | Hawaiian Airlines Inc. | Frontier Airlines Inc. | Alaska Airlines Inc. |
| 2014 | February | Virgin America | Hawaiian Airlines Inc. | Frontier Airlines Inc. | Hawaiian Airlines Inc. |
| 2014 | March | American Airlines Inc. | Hawaiian Airlines Inc. | Southwest Airlines Co. | Hawaiian Airlines Inc. |
| 2014 | April | American Airlines Inc. | Hawaiian Airlines Inc. | Frontier Airlines Inc. | US Airways Inc. |
| 2014 | May | Southwest Airlines Co. | Alaska Airlines Inc. | Southwest Airlines Co. | Hawaiian Airlines Inc. |
| 2014 | June | Frontier Airlines Inc. | Hawaiian Airlines Inc. | Southwest Airlines Co. | Hawaiian Airlines Inc. |
| 2014 | July | Southwest Airlines Co. | Hawaiian Airlines Inc. | Frontier Airlines Inc. | Hawaiian Airlines Inc. |
| 2014 | August | American Airlines Inc. | Hawaiian Airlines Inc. | Hawaiian Airlines Inc. | SkyWest Airlines Inc. |
| 2014 | September | Hawaiian Airlines Inc. | Virgin America | Southwest Airlines Co. | US Airways Inc. |
| 2014 | October | American Airlines Inc. | Virgin America | American Airlines Inc. | Hawaiian Airlines Inc. |
| 2014 | November | Southwest Airlines Co. | Hawaiian Airlines Inc. | United Air Lines Inc. | US Airways Inc. |
| 2014 | December | American Airlines Inc. | Hawaiian Airlines Inc. | Virgin America | US Airways Inc. |
Time of Day: are some destinations from PDX “morning” destinations vs “evening” ones?
# construct AM PM variable
flights$day_time <- ifelse(flights$hour >= 12, "PM", "AM")
# wrangle data summary for table
day_flights <- flights %>%
filter(origin == "PDX") %>%
filter(!is.na(hour)) %>%
group_by(dest_airport, day_time) %>%
summarize(count = n(),
.groups = 'drop') %>%
pivot_wider(names_from = day_time,
values_from = count,
values_fill = 0)
# table
day_flights %>% gt() %>%
tab_header(title = "Flight Destinations from Portland",
subtitle = "Total flights in the AM and PM part of the day") %>%
tab_spanner(label = "Total Flights",
columns = c(AM, PM)) %>%
cols_label(dest_airport = "Destination")
| Flight Destinations from Portland | ||
| Total flights in the AM and PM part of the day | ||
| Destination |
Total Flights
|
|
|---|---|---|
| AM | PM | |
| Albuquerque International Sunport | 150 | 149 |
| Austin Bergstrom Intl | 63 | 0 |
| Baltimore Washington Intl | 9 | 77 |
| Bob Hope | 372 | 645 |
| Boise Air Terminal | 97 | 0 |
| Charlotte Douglas Intl | 13 | 262 |
| Chicago Midway Intl | 436 | 231 |
| Chicago Ohare Intl | 1531 | 935 |
| Dallas Fort Worth Intl | 1246 | 622 |
| Denver Intl | 2190 | 1732 |
| Detroit Metro Wayne Co | 138 | 172 |
| Fairbanks Intl | 0 | 93 |
| General Edward Lawrence Logan Intl | 404 | 97 |
| George Bush Intercontinental | 577 | 414 |
| Hartsfield Jackson Atlanta Intl | 973 | 573 |
| Honolulu Intl | 728 | 2 |
| John F Kennedy Intl | 404 | 441 |
| John Wayne Arpt Orange Co | 652 | 369 |
| Kahului | 404 | 201 |
| Kansas City Intl | 350 | 14 |
| Klamath Falls Airport | 0 | 150 |
| Kona Intl At Keahole | 168 | 0 |
| Lambert St Louis Intl | 7 | 30 |
| Lihue | 101 | 2 |
| Long Beach | 365 | 635 |
| Los Angeles Intl | 1480 | 1500 |
| Mahlon Sweet Fld | 147 | 475 |
| Mc Carran Intl | 985 | 1492 |
| Metropolitan Oakland Intl | 778 | 1160 |
| Minneapolis St Paul Intl | 744 | 618 |
| Newark Liberty Intl | 357 | 145 |
| Norman Y Mineta San Jose Intl | 975 | 1390 |
| Ontario Intl | 358 | 557 |
| Palm Springs Intl | 14 | 257 |
| Philadelphia Intl | 176 | 1 |
| Phoenix Sky Harbor Intl | 1919 | 1626 |
| Reno Tahoe Intl | 86 | 77 |
| Roberts Fld | 120 | 595 |
| Ronald Reagan Washington Natl | 360 | 2 |
| Sacramento Intl | 653 | 1159 |
| Salt Lake City Intl | 1090 | 1372 |
| San Diego Intl | 862 | 687 |
| San Francisco Intl | 2153 | 2958 |
| Santa Barbara Muni | 0 | 363 |
| Seattle Tacoma Intl | 1106 | 1142 |
| Ted Stevens Anchorage Intl | 361 | 638 |
| Tucson Intl | 360 | 2 |
| Washington Dulles Intl | 363 | 5 |
| William P Hobby | 54 | 9 |
Display fonts and some cleaning
fonts <- fonttable()
glimpse(fonts)
## Rows: 447
## Columns: 10
## $ package <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ afmfile <chr> "Amiri-Bold.afm.gz", "Amiri-BoldSlanted.afm.gz", "Amiri-Reg…
## $ fontfile <chr> "/usr/share/fonts/opentype/fonts-hosny-amiri/Amiri-Bold.ttf…
## $ FullName <chr> "Amiri Bold", "Amiri Bold Slanted", "Amiri", "Amiri Slanted…
## $ FamilyName <chr> "Amiri", "Amiri", "Amiri", "Amiri", "Amiri Quran", "Amiri Q…
## $ FontName <chr> "Amiri-Bold", "Amiri-BoldSlanted", "Amiri-Regular", "Amiri-…
## $ Bold <lgl> TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, …
## $ Italic <lgl> FALSE, TRUE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, TRUE,…
## $ Symbol <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FAL…
## $ afmsymfile <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
table(fonts$package)
## < table of extent 0 >
table(fonts$afmsymfile)
## < table of extent 0 >
# remove columsn with all NAs
fonts <- fonts %>%
select(!c("package", "afmsymfile"))
glimpse(fonts)
## Rows: 447
## Columns: 8
## $ afmfile <chr> "Amiri-Bold.afm.gz", "Amiri-BoldSlanted.afm.gz", "Amiri-Reg…
## $ fontfile <chr> "/usr/share/fonts/opentype/fonts-hosny-amiri/Amiri-Bold.ttf…
## $ FullName <chr> "Amiri Bold", "Amiri Bold Slanted", "Amiri", "Amiri Slanted…
## $ FamilyName <chr> "Amiri", "Amiri", "Amiri", "Amiri", "Amiri Quran", "Amiri Q…
## $ FontName <chr> "Amiri-Bold", "Amiri-BoldSlanted", "Amiri-Regular", "Amiri-…
## $ Bold <lgl> TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, …
## $ Italic <lgl> FALSE, TRUE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, TRUE,…
## $ Symbol <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FAL…
There are a total of 447 unique families of fonts.
fonts %>% n_distinct("FamilyName")
## [1] 447
34.4% of font families have bold faces, 21.5% have italic faces, and 21.5% have both italic and bold faces.
family_faces <- fonts %>% group_by(FamilyName) %>%
summarize(has_bold = as.integer(any(Bold, na.rm = TRUE)),
has_italic = as.integer(any(Italic, na.rm = TRUE)),
has_bold_italic = as.integer(has_bold & has_italic),
.groups = 'drop')
family_faces %>%
summarize(prop_bold = sum(has_bold)/length(has_bold),
prop_italic = sum(has_italic)/length(has_italic),
prop_both = sum(has_italic)/length(has_bold_italic))
| prop_bold | prop_italic | prop_both |
|---|---|---|
| 0.3444444 | 0.2148148 | 0.2148148 |
font_per_family <- fonts %>%
group_by(FamilyName) %>%
summarize(font_counts = n())
ggplot(data = font_per_family,
mapping = aes(x = font_counts)) +
geom_bar(fill = 'purple3', col = 'black') +
theme_classic() +
scale_y_continuous(expand = c(0,0)) +
labs(title = "ExtraFont's Font Family's Number of Fonts",
x = "Number of Fonts",
y = "Number of Font Families")
font_per_family %>%
summarize(mean_fonts = mean(font_counts),
median_fonts = median(font_counts),
min_fonts = min(font_counts),
max_fonts = max(font_counts)) %>%
kable(caption = "Statistics on Fonts per Font Family",
col.names = c("Mean", "Median", "Min", "Max")) %>%
kable_styling()
| Mean | Median | Min | Max |
|---|---|---|---|
| 1.655556 | 1 | 1 | 4 |
This font reminds me of Times New Roman, bit in a better, more compemporary way. Still formal, but not as contrained as Times New Roman, this font would work well in figures, a manuscript, or anything meant for a formal publication.
This font reminds me a lot like a terminal font, but with slightly better readability. If I were to make a RMarkdown that included code snippets, I would choose to display the code in this font.
This font would never apply to my scientific-professional life, however and hypothetically, if I were to start a hip coffee cafe that also served fresh pastries and carefully curated coffee beans, this would be the font to choose for the company name and any other important subtitles.
Font Family’s font distributions with “DM Serif Text” font. Looks pretty okay, I think I would maybe choose a different font for plots. Perhaps is best for the plot title, but too much detail for the axis labels.
font_add_google("DM Serif Text")
showtext_auto()
ggplot(data = font_per_family,
mapping = aes(x = font_counts)) +
geom_bar(fill = 'purple3', col = 'black') +
theme_classic() +
scale_y_continuous(expand = c(0,0)) +
labs(title = "ExtraFont's Font Family's Number of Fonts",
x = "Number of Fonts",
y = "Number of Font Families") +
theme(text = element_text(family = "DM Serif Text",
size = 24))