The lab instructions can be found here; we will work through its contents together. You will use this RMarkdown file as your workspace, and its knitted HTML file as the final deliverable in Sakai. Don’t forget to update the “author” metadata field at the top of the file!
moma <- read_csv(here("data", "artworks-cleaned.csv"))
artworks <- read.csv("data/artworks.csv")
“House by the Railroad” by Edward Hopper (1930).
moma %>% arrange(year_acquired) %>% select(title, artist, year_acquired) %>% slice(1)
## # A tibble: 1 × 3
## title artist year_acquired
## <chr> <chr> <dbl>
## 1 House by the Railroad Edward Hopper 1930
Depending on how an individual painting is defined, a total of 2171 painting with unique title and artist existed in MoMa. Alternatively, if all observations in this dataset are truly unique painting, then the total number of rows: 2253.
# unique title & artist
moma %>% distinct(title, artist) %>% tally()
## # A tibble: 1 × 1
## n
## <int>
## 1 2171
nrow(moma)
## [1] 2253
The oldest painting is “Landscape at Daybreak” by Odilon Redon (1872).
moma %>% arrange(year_created) %>% select(title, artist, year_created) %>% slice(1)
## # A tibble: 1 × 3
## title artist year_created
## <chr> <chr> <dbl>
## 1 Landscape at Daybreak Odilon Redon 1872
There are a total of 989 distinct artists at the MomA.
moma %>% distinct(artist) %>% tally
## # A tibble: 1 × 1
## n
## <int>
## 1 989
Pablo Picasso has the most painting at 55.
moma %>% group_by(artist) %>% summarize(count = n()) %>% arrange(desc(count))
## # A tibble: 989 × 2
## artist count
## <chr> <int>
## 1 Pablo Picasso 55
## 2 Henri Matisse 32
## 3 On Kawara 32
## 4 Jacob Lawrence 30
## 5 Batiste Madalena 25
## 6 Jean Dubuffet 25
## 7 Odilon Redon 25
## 8 Ben Vautier 24
## 9 Frank Stella 23
## 10 Philip Guston 23
## # ℹ 979 more rows
There are 1991 paintings done by males and 252 paintings done by females.
moma %>% group_by(artist_gender) %>% count()
## # A tibble: 3 × 2
## # Groups: artist_gender [3]
## artist_gender n
## <chr> <int>
## 1 Female 252
## 2 Male 1991
## 3 <NA> 10
Per the total amount of artists working on any given painting, there are 2017 males artists and 256 female artists.
moma %>% summarize(n_female = sum(n_female_artists),
n_male = sum(n_male_artists))
## # A tibble: 1 × 2
## n_female n_male
## <dbl> <dbl>
## 1 256 2017
The year 1985 had the most paintings acquired at 86.
moma %>% group_by(year_acquired) %>% summarize(count = n()) %>% arrange(desc(count))
## # A tibble: 88 × 2
## year_acquired count
## <dbl> <int>
## 1 1985 86
## 2 1942 71
## 3 1979 71
## 4 1991 67
## 5 2005 67
## 6 1967 65
## 7 2008 55
## 8 1961 45
## 9 1969 45
## 10 1956 42
## # ℹ 78 more rows
The most amount of paintings was created in 1977 totalling 57.
moma %>% group_by(year_created) %>% summarize(count = n()) %>% arrange(desc(count))
## # A tibble: 139 × 2
## year_created count
## <dbl> <int>
## 1 1977 57
## 2 1940 56
## 3 1964 56
## 4 1961 50
## 5 1962 49
## 6 1963 44
## 7 1959 42
## 8 1968 40
## 9 1960 39
## 10 1914 37
## # ℹ 129 more rows
The first painting created by a solo female artist in MoMa’s collection was “Self-Portrait with Two Flowers in Her Raised Left Hand” by Paula Modersohn-Becker in 1907.
moma %>% filter(artist_gender == 'Female' & num_artists == 1) %>% arrange(year_created) %>% select(title, artist, year_created) %>% slice(1)
## # A tibble: 1 × 3
## title artist year_created
## <chr> <chr> <dbl>
## 1 Self-Portrait with Two Flowers in Her Raised Left Hand Paula Mod… 1907
momaplot <- moma %>% ggplot(aes(x = year_created,
y = year_acquired))+
geom_point(alpha = 0.07) +
scale_x_continuous(breaks = c(1875, 1900, 1925, 1950, 1975, 2000), limits = c(1875, 2020), minor_breaks = NULL) +
scale_y_continuous(breaks = c(1925, 1950, 1975, 2000), limits = c(1925, 2020), minor_breaks = NULL) +
labs(title = "MoMA Keeps Its Collection Current",
subtitle = "Year of a work's acquisition vs. year it was painted, for works acquired since 1930",
y = "Year acquired",
x = "Year painted") +
geom_abline(intercept = c(0,0), color = 'red')
momaplot
moma %>% filter(num_artists == 1 & !is.na(artist_gender)) %>% ggplot(aes(x = year_created,
y = year_acquired))+
geom_point(alpha = 0.07) +
scale_x_continuous(breaks = c(1875, 1900, 1925, 1950, 1975, 2000), limits = c(1875, 2020), minor_breaks = NULL) +
scale_y_continuous(breaks = c(1925, 1950, 1975, 2000), limits = c(1925, 2020), minor_breaks = NULL) +
labs(title = "MoMA Keeps Its Collection Current",
subtitle = "Year of a work's acquisition vs. year it was painted, for works acquired since 1930",
y = "Year acquired",
x = "Year painted") +
geom_abline(intercept = c(0,0), color = 'red') +
facet_wrap(~artist_gender)
moma_dims <- moma %>% filter(height_cm < 600 & width_cm < 760) %>%
mutate(longest_dim = case_when(height_cm > width_cm ~ "Taller than wide",
width_cm > height_cm ~ "Wider than tall",
height_cm == width_cm ~ "Square"),
height_ft = (height_cm/2.54)/12,
width_ft = (width_cm/2.54)/12) %>% filter(!is.na(longest_dim))
ggplot(data = moma_dims,
aes(x = width_ft,
y = height_ft,
col = longest_dim)) +
geom_point(alpha = 0.15) +
scale_color_manual(values = c("Taller than wide" = "orange", "Wider than tall" = "purple", "Square" = "grey")) +
geom_smooth(data = filter(moma_dims, longest_dim %in% c('Wider than tall', 'Taller than wide')), method = "lm", se = FALSE, show.legend = FALSE) +
theme_fivethirtyeight() +
theme(axis.title = element_text()) +
labs(title = "MoMA Paintings, Tall And Wide",
subtitle = "Dimensions of over 2,000 paintings in the collection, excluding \npieces over 25 feet wide and 15 feet tall",
y = "Height (ft)",
x = "Width (ft)",
col = NULL)
ggplot(data = moma_dims,
aes(x = width_ft,
y = height_ft,
col = longest_dim)) +
geom_point(alpha = 0.15) +
scale_color_manual(values = c("Taller than wide" = "#ee5863", "Wider than tall" = "#6999cd", "Square" = "grey50")) +
geom_smooth(data = filter(moma_dims, longest_dim %in% c('Wider than tall', 'Taller than wide')), method = "lm", se = FALSE, show.legend = FALSE) +
theme_fivethirtyeight() +
theme(axis.title = element_text()) +
labs(title = "MoMA Paintings, Tall And Wide",
subtitle = "Dimensions of over 2,000 paintings in the collection, excluding \npieces over 25 feet wide and 15 feet tall",
y = "Height (ft)",
x = "Width (ft)",
col = NULL)
geom_annotate()ggplot(data = moma_dims,
aes(x = width_ft,
y = height_ft,
col = longest_dim)) +
geom_point(alpha = 0.15, show.legend = FALSE) +
scale_color_manual(values = c("Taller than wide" = "#ee5863", "Wider than tall" = "#6999cd", "Square" = "grey50")) +
geom_smooth(data = filter(moma_dims, longest_dim %in% c('Wider than tall', 'Taller than wide')), method = "lm", se = FALSE, show.legend = FALSE) +
theme_fivethirtyeight() +
theme(axis.title = element_text()) +
labs(title = "MoMA Paintings, Tall And Wide",
subtitle = "Dimensions of over 2,000 paintings in the collection, excluding \npieces over 25 feet wide and 15 feet tall",
y = "Height (ft)",
x = "Width (ft)",
col = NULL) +
annotate(x = 2, y = 12, geom = 'text', color = "#ee5863", size = 5, family = 'Lato', label = 'Taller than\n wide', hjust = 0, fontface = 2) +
annotate(x = 12, y = 3, geom = 'text', color = "#6999cd", size = 5, family = 'Lato', label = "Wider than \n tall", hjust = 0, fontface = 2)
moma_artists <- moma %>%
group_by(artist_gender, year_acquired) %>%
summarise(n = n()) %>%
arrange(artist_gender, year_acquired) %>%
mutate(cumulative_paintings = cumsum(n)) %>%
ungroup() %>% na.omit()
moma_artists$artist_gender <- factor(moma_artists$artist_gender, levels = c('Male', 'Female'), labels = c('Male', 'Female'))
ggplot(moma_artists,
aes(x = year_acquired,
y = cumulative_paintings,
fill = artist_gender)) +
geom_col(position = "stack", width = 0.8) +
theme_fivethirtyeight() +
theme(axis.title = element_text()) +
labs(x = 'Year',
y = 'Total Acquired Paintings since 1930',
title = "MoMA's Painting Aquisition Through the Years",
subtitle = "Cumulative sum of acquired paintings for single-artist paintings, by gender",
fill = NULL) +
scale_fill_manual(values = c("Female" = "#ee5863", "Male" = "#6999cd"))
In this bar chart, I’ve plotted the cumulative sum of acquired paintings through the years of MoMA’s painting acquisition, grouped by gender. From initial analysis above, it was apparent that MoMA’s collection was heavily biased towards male artists. However, given recent time’s initiative towards higher inclusivity, perhaps there was a notable trend to include more female artists in their collection through means of acquisition. This graph, through the use of cumulative sums, shows the overall picture to date of MoMA’s painting acquisition, by gender. To note, female artists have far fewer acquired paintings each year compared to males, but the rate of female painted paintings does increase through the years, but from close inspection, at a lesser rate than males.