diff --git a/module3/exo3/wheat.Rmd b/module3/exo3/wheat.Rmd new file mode 100644 index 0000000000000000000000000000000000000000..a0f2a0f8dc0a1c9fb32244d8fb14e5a1e6c87cf1 --- /dev/null +++ b/module3/exo3/wheat.Rmd @@ -0,0 +1,361 @@ +--- +title: "wheat" +author: "Fouad Gasmi" +date: "2025-02-18" +output: html_document +--- + + +```{r } +# -------------------------------------------------------------------- +# Setup chunk for R Markdown +# -------------------------------------------------------------------- +knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE) +``` + +```{r packages} +# -------------------------------------------------------------------- +# Check if packages are installed; if not, install them +# -------------------------------------------------------------------- +required_packages <- c("ggplot2", "dplyr") +for(p in required_packages){ + if(!requireNamespace(p, quietly = TRUE)){ + install.packages(p) + } +} + +library(ggplot2) +library(dplyr) +``` + +```{r load data} +# -------------------------------------------------------------------- +# Load data +# -------------------------------------------------------------------- +df <- read.csv("data/Wheat.csv", header = TRUE) + +# Inspect structure +str(df) +``` + +```{r adding data of monarch reign} +# -------------------------------------------------------------------- +# Create a data frame of monarch reigns (for background shading) +# -------------------------------------------------------------------- +monarch_df <- data.frame( + name = c("Elizabeth", "James I", "Charles I", "Cromwell", + "Charles II", "James II", "W&M", "Anne", + "George I", "George II", "George III", "George IV"), + start = c(1565, 1603, 1625, 1649, + 1660, 1685, 1689, 1702, + 1714, 1727, 1760, 1820), + end = c(1603, 1625, 1649, 1660, + 1685, 1689, 1702, 1714, + 1727, 1760, 1820, 1821) +) + +monarch_df$fill_color <- ifelse( + seq_len(nrow(monarch_df)) %% 2 == 1, + "white", + "gray90" +) +``` + +```{r Plot theme} +# -------------------------------------------------------------------- +# Minimalistic theme +# -------------------------------------------------------------------- +theme_playfair <- theme_minimal(base_size = 12) + + theme( + panel.grid.major = element_line(color = "grey80"), + panel.grid.minor = element_line(color = "grey90") + ) + +``` + +```{r First graph reproducing, fig.width=12, fig.height=5} +# -------------------------------------------------------------------- +# First Plot (Bars for Wheat, Area & Line for Wages) +# -------------------------------------------------------------------- +g1 <- ggplot(df, aes(x = Year)) + + # Bars for Wheat prices + geom_bar( + aes(y = Wheat), + stat = "identity", + fill = "grey70", + width = 4, + alpha = 0.8 + ) + + # Area for Wages + geom_area(aes(y = Wages), fill = "lightblue", alpha = 0.6) + + # Red line on top of the area for Wages + geom_line(aes(y = Wages), color = "red", linewidth = 1) + + labs( + x = "Year", + y = "Shillings (Combined Wheat & Wages)", + title = "Graph Replicating William Playfair's Idea", + subtitle = "Wheat price (bars) and wages (area/line) on the same scale" + ) + + theme_playfair + +# Display first version +print(g1) + +# Add monarch backgrounds and labels +g1 <- g1 + + geom_rect( + data = monarch_df, + aes( + xmin = start, + xmax = end, + ymin = -Inf, + ymax = Inf, + fill = fill_color + ), + inherit.aes = FALSE, + alpha = 0.5, + color = NA + ) + + scale_fill_identity() + + geom_text( + data = monarch_df, + aes( + x = (start + end)/2, + y = Inf, + label = name + ), + inherit.aes = FALSE, + vjust = 1.2, + size = 3, + color = "black", + fontface = "bold" + ) + +# Display updated plot +print(g1) +``` + +```{r second graph using two axis, fig.width=12, fig.height=5} +# -------------------------------------------------------------------- +# Second Plot with Two Axes (Wheat vs. Wages) +# -------------------------------------------------------------------- + +# Calculate scaling factor (to align Wages visually to Wheat scale for the same y-axis range) +scale_factor <- max(df$Wheat, na.rm = TRUE) / max(df$Wages, na.rm = TRUE) + +g2 <- ggplot(df, aes(x = Year)) + + geom_line(aes(y = Wheat), color = "blue", size = 1) + + geom_line(aes(y = Wages * scale_factor), color = "red", size = 1) + + scale_y_continuous( + name = "Wheat Price (shillings per quarter bushel)", + sec.axis = sec_axis( + trans = ~ . / scale_factor, + name = "Weekly Wages (shillings per week)" + ) + ) + + labs( + x = "Year", + title = "Wheat Price and Wages on Two Axis", + subtitle = "Left Axis: shillings/quarter bushel, Right Axis: shillings/week" + ) + + theme_playfair + +print(g2) + +# Add monarch backgrounds and labels +g2 <- g2 + + geom_rect( + data = monarch_df, + aes(xmin = start, xmax = end, ymin = -Inf, ymax = Inf, fill = fill_color), + alpha = 0.5, + inherit.aes = FALSE + ) + + scale_fill_identity() + + geom_text( + data = monarch_df, + aes(x = (start + end)/2, y = Inf, label = name), + inherit.aes = FALSE, + vjust = 1.2, + size = 3 + ) + +print(g2) +``` + +```{r other representation of second plot, fig.width=12, fig.height=5} +# -------------------------------------------------------------------- +# Third Plot: Bars for Wheat, Line for Wages on Two Axes +# -------------------------------------------------------------------- + +scale_factor <- max(df$Wheat, na.rm = TRUE) / max(df$Wages, na.rm = TRUE) + +g2b <- ggplot(df, aes(x = Year)) + + # Bars for Wheat + geom_bar( + aes(y = Wheat), + stat = "identity", + width = 4, + fill = "grey70", + alpha = 0.8 + ) + + # Red line for Wages (scaled) + geom_line( + aes(y = Wages * scale_factor), + color = "red", + size = 1 + ) + + scale_y_continuous( + name = "Wheat Price (shillings per quarter bushel)", + sec.axis = sec_axis( + trans = ~ . / scale_factor, + name = "Weekly Wages (shillings per week)" + ) + ) + + labs( + x = "Year", + title = "Wheat Price (bars) and Wages (line) on Two Axes", + subtitle = "Main Axis: Wheat, Secondary Axis: Wages" + ) + + theme_playfair + +print(g2b) + +# Add monarch backgrounds and labels +g2b <- g2b + + geom_rect( + data = monarch_df, + aes( + xmin = start, + xmax = end, + ymin = -Inf, + ymax = Inf, + fill = fill_color + ), + inherit.aes = FALSE, + alpha = 0.5 + ) + + scale_fill_identity() + + geom_text( + data = monarch_df, + aes( + x = (start + end)/2, + y = Inf, + label = name + ), + inherit.aes = FALSE, + vjust = 1.2, + size = 3, + color = "black" + ) + +print(g2b) +``` + +```{r third plot purchasing power, fig.width=12, fig.height=5} +# -------------------------------------------------------------------- +# Fourth Plot: Purchasing Power = Wages / Wheat +# -------------------------------------------------------------------- + +df$PurchasingPower <- df$Wages / df$Wheat + +g3 <- ggplot(df, aes(x = Year, y = PurchasingPower)) + + geom_line(color = "darkgreen", size = 1) + + geom_point(color = "darkgreen", size = 2) + + labs( + x = "Year", + y = "Quarters of Bushels of Wheat per Weekly Wage", + title = "Evolution of Workers' Purchasing Power (in Wheat Volume)", + subtitle = "Inspired by Playfair's demonstration of rising purchasing power over time" + ) + + theme_playfair + +print(g3) + +# Add monarch backgrounds and labels +g3 <- g3 + + geom_rect( + data = monarch_df, + aes(xmin = start, xmax = end, ymin = -Inf, ymax = Inf, fill = fill_color), + alpha = 0.5, + inherit.aes = FALSE + ) + + scale_fill_identity() + + geom_text( + data = monarch_df, + aes(x = (start + end)/2, y = Inf, label = name), + inherit.aes = FALSE, + vjust = 1.2, + size = 3 + ) + +print(g3) +``` + +```{r forth plot wheat vs wages without time axis, fig.width=12, fig.height=5} +# -------------------------------------------------------------------- +# Fifth Plot: Scatter/Path of Wheat vs. Wages (Time as a Color Gradient) +# -------------------------------------------------------------------- + +# Make sure df is sorted by Year +df <- df[order(df$Year), ] + +g4 <- ggplot(df, aes(x = Wheat, y = Wages)) + + geom_path( + aes(color = Year), + arrow = arrow(type = "open", length = unit(0.15, "inches")), + size = 1 + ) + + geom_point(aes(color = Year), size = 2) + + # Color gradient from oldest (blue) to newest (red) + scale_color_gradient(low = "blue", high = "red") + + labs( + x = "Wheat Price (shillings/quarter bushel)", + y = "Weekly Wages (shillings/week)", + color = "Year", + title = "Relationship Between Wheat Price and Weekly Wages (No Direct Time Axis)", + subtitle = "Color and arrow indicate chronological progression" + ) + + theme_minimal(base_size = 12) + +print(g4) +``` + +```{r , fig.width=12, fig.height=5} +# -------------------------------------------------------------------- +# Summarize by Decade, Then Plot (Path + Arrow + Labels) +# -------------------------------------------------------------------- +df_decade <- df %>% + mutate(decade = floor(Year / 10) * 10) %>% + group_by(decade) %>% + summarize( + Wheat = mean(Wheat, na.rm = TRUE), + Wages = mean(Wages, na.rm = TRUE) + ) %>% + ungroup() + +g_better <- ggplot(df_decade, aes(x = Wheat, y = Wages)) + + geom_path( + arrow = arrow(length = unit(0.15, "inches"), type = "open"), + color = "darkblue", + size = 1 + ) + + geom_point(color = "darkblue", size = 3) + + geom_text( + aes(label = decade), + hjust = -0.1, + vjust = -0.5, + color = "black", + size = 3 + ) + + labs( + x = "Wheat Price (shillings/quarter bushel)", + y = "Weekly Wages (shillings/week)", + title = "Wheat Price vs. Wages (Aggregated by Decade)", + subtitle = "Arrows and labels indicate progression over time (no direct time axis)" + ) + + theme_minimal(base_size = 12) + +print(g_better) +``` + +I find g3 does a better job of illustrating purchasing power. g4 and g_better become too cluttered between 1560 and 1700, making it difficult to interpret the data at a glance. Even when the data is aggregated by decade (g4 vs. g_better), it remains challenging to grasp the information quickly.