rmd file of wheat exercice

parent 244eb0b2
---
title: "wheat"
author: "Fouad Gasmi"
date: "2025-02-18"
output: html_document
---
```{r }
# --------------------------------------------------------------------
# Setup chunk for R Markdown
# --------------------------------------------------------------------
knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)
```
```{r packages}
# --------------------------------------------------------------------
# Check if packages are installed; if not, install them
# --------------------------------------------------------------------
required_packages <- c("ggplot2", "dplyr")
for(p in required_packages){
if(!requireNamespace(p, quietly = TRUE)){
install.packages(p)
}
}
library(ggplot2)
library(dplyr)
```
```{r load data}
# --------------------------------------------------------------------
# Load data
# --------------------------------------------------------------------
df <- read.csv("data/Wheat.csv", header = TRUE)
# Inspect structure
str(df)
```
```{r adding data of monarch reign}
# --------------------------------------------------------------------
# Create a data frame of monarch reigns (for background shading)
# --------------------------------------------------------------------
monarch_df <- data.frame(
name = c("Elizabeth", "James I", "Charles I", "Cromwell",
"Charles II", "James II", "W&M", "Anne",
"George I", "George II", "George III", "George IV"),
start = c(1565, 1603, 1625, 1649,
1660, 1685, 1689, 1702,
1714, 1727, 1760, 1820),
end = c(1603, 1625, 1649, 1660,
1685, 1689, 1702, 1714,
1727, 1760, 1820, 1821)
)
monarch_df$fill_color <- ifelse(
seq_len(nrow(monarch_df)) %% 2 == 1,
"white",
"gray90"
)
```
```{r Plot theme}
# --------------------------------------------------------------------
# Minimalistic theme
# --------------------------------------------------------------------
theme_playfair <- theme_minimal(base_size = 12) +
theme(
panel.grid.major = element_line(color = "grey80"),
panel.grid.minor = element_line(color = "grey90")
)
```
```{r First graph reproducing, fig.width=12, fig.height=5}
# --------------------------------------------------------------------
# First Plot (Bars for Wheat, Area & Line for Wages)
# --------------------------------------------------------------------
g1 <- ggplot(df, aes(x = Year)) +
# Bars for Wheat prices
geom_bar(
aes(y = Wheat),
stat = "identity",
fill = "grey70",
width = 4,
alpha = 0.8
) +
# Area for Wages
geom_area(aes(y = Wages), fill = "lightblue", alpha = 0.6) +
# Red line on top of the area for Wages
geom_line(aes(y = Wages), color = "red", linewidth = 1) +
labs(
x = "Year",
y = "Shillings (Combined Wheat & Wages)",
title = "Graph Replicating William Playfair's Idea",
subtitle = "Wheat price (bars) and wages (area/line) on the same scale"
) +
theme_playfair
# Display first version
print(g1)
# Add monarch backgrounds and labels
g1 <- g1 +
geom_rect(
data = monarch_df,
aes(
xmin = start,
xmax = end,
ymin = -Inf,
ymax = Inf,
fill = fill_color
),
inherit.aes = FALSE,
alpha = 0.5,
color = NA
) +
scale_fill_identity() +
geom_text(
data = monarch_df,
aes(
x = (start + end)/2,
y = Inf,
label = name
),
inherit.aes = FALSE,
vjust = 1.2,
size = 3,
color = "black",
fontface = "bold"
)
# Display updated plot
print(g1)
```
```{r second graph using two axis, fig.width=12, fig.height=5}
# --------------------------------------------------------------------
# Second Plot with Two Axes (Wheat vs. Wages)
# --------------------------------------------------------------------
# Calculate scaling factor (to align Wages visually to Wheat scale for the same y-axis range)
scale_factor <- max(df$Wheat, na.rm = TRUE) / max(df$Wages, na.rm = TRUE)
g2 <- ggplot(df, aes(x = Year)) +
geom_line(aes(y = Wheat), color = "blue", size = 1) +
geom_line(aes(y = Wages * scale_factor), color = "red", size = 1) +
scale_y_continuous(
name = "Wheat Price (shillings per quarter bushel)",
sec.axis = sec_axis(
trans = ~ . / scale_factor,
name = "Weekly Wages (shillings per week)"
)
) +
labs(
x = "Year",
title = "Wheat Price and Wages on Two Axis",
subtitle = "Left Axis: shillings/quarter bushel, Right Axis: shillings/week"
) +
theme_playfair
print(g2)
# Add monarch backgrounds and labels
g2 <- g2 +
geom_rect(
data = monarch_df,
aes(xmin = start, xmax = end, ymin = -Inf, ymax = Inf, fill = fill_color),
alpha = 0.5,
inherit.aes = FALSE
) +
scale_fill_identity() +
geom_text(
data = monarch_df,
aes(x = (start + end)/2, y = Inf, label = name),
inherit.aes = FALSE,
vjust = 1.2,
size = 3
)
print(g2)
```
```{r other representation of second plot, fig.width=12, fig.height=5}
# --------------------------------------------------------------------
# Third Plot: Bars for Wheat, Line for Wages on Two Axes
# --------------------------------------------------------------------
scale_factor <- max(df$Wheat, na.rm = TRUE) / max(df$Wages, na.rm = TRUE)
g2b <- ggplot(df, aes(x = Year)) +
# Bars for Wheat
geom_bar(
aes(y = Wheat),
stat = "identity",
width = 4,
fill = "grey70",
alpha = 0.8
) +
# Red line for Wages (scaled)
geom_line(
aes(y = Wages * scale_factor),
color = "red",
size = 1
) +
scale_y_continuous(
name = "Wheat Price (shillings per quarter bushel)",
sec.axis = sec_axis(
trans = ~ . / scale_factor,
name = "Weekly Wages (shillings per week)"
)
) +
labs(
x = "Year",
title = "Wheat Price (bars) and Wages (line) on Two Axes",
subtitle = "Main Axis: Wheat, Secondary Axis: Wages"
) +
theme_playfair
print(g2b)
# Add monarch backgrounds and labels
g2b <- g2b +
geom_rect(
data = monarch_df,
aes(
xmin = start,
xmax = end,
ymin = -Inf,
ymax = Inf,
fill = fill_color
),
inherit.aes = FALSE,
alpha = 0.5
) +
scale_fill_identity() +
geom_text(
data = monarch_df,
aes(
x = (start + end)/2,
y = Inf,
label = name
),
inherit.aes = FALSE,
vjust = 1.2,
size = 3,
color = "black"
)
print(g2b)
```
```{r third plot purchasing power, fig.width=12, fig.height=5}
# --------------------------------------------------------------------
# Fourth Plot: Purchasing Power = Wages / Wheat
# --------------------------------------------------------------------
df$PurchasingPower <- df$Wages / df$Wheat
g3 <- ggplot(df, aes(x = Year, y = PurchasingPower)) +
geom_line(color = "darkgreen", size = 1) +
geom_point(color = "darkgreen", size = 2) +
labs(
x = "Year",
y = "Quarters of Bushels of Wheat per Weekly Wage",
title = "Evolution of Workers' Purchasing Power (in Wheat Volume)",
subtitle = "Inspired by Playfair's demonstration of rising purchasing power over time"
) +
theme_playfair
print(g3)
# Add monarch backgrounds and labels
g3 <- g3 +
geom_rect(
data = monarch_df,
aes(xmin = start, xmax = end, ymin = -Inf, ymax = Inf, fill = fill_color),
alpha = 0.5,
inherit.aes = FALSE
) +
scale_fill_identity() +
geom_text(
data = monarch_df,
aes(x = (start + end)/2, y = Inf, label = name),
inherit.aes = FALSE,
vjust = 1.2,
size = 3
)
print(g3)
```
```{r forth plot wheat vs wages without time axis, fig.width=12, fig.height=5}
# --------------------------------------------------------------------
# Fifth Plot: Scatter/Path of Wheat vs. Wages (Time as a Color Gradient)
# --------------------------------------------------------------------
# Make sure df is sorted by Year
df <- df[order(df$Year), ]
g4 <- ggplot(df, aes(x = Wheat, y = Wages)) +
geom_path(
aes(color = Year),
arrow = arrow(type = "open", length = unit(0.15, "inches")),
size = 1
) +
geom_point(aes(color = Year), size = 2) +
# Color gradient from oldest (blue) to newest (red)
scale_color_gradient(low = "blue", high = "red") +
labs(
x = "Wheat Price (shillings/quarter bushel)",
y = "Weekly Wages (shillings/week)",
color = "Year",
title = "Relationship Between Wheat Price and Weekly Wages (No Direct Time Axis)",
subtitle = "Color and arrow indicate chronological progression"
) +
theme_minimal(base_size = 12)
print(g4)
```
```{r , fig.width=12, fig.height=5}
# --------------------------------------------------------------------
# Summarize by Decade, Then Plot (Path + Arrow + Labels)
# --------------------------------------------------------------------
df_decade <- df %>%
mutate(decade = floor(Year / 10) * 10) %>%
group_by(decade) %>%
summarize(
Wheat = mean(Wheat, na.rm = TRUE),
Wages = mean(Wages, na.rm = TRUE)
) %>%
ungroup()
g_better <- ggplot(df_decade, aes(x = Wheat, y = Wages)) +
geom_path(
arrow = arrow(length = unit(0.15, "inches"), type = "open"),
color = "darkblue",
size = 1
) +
geom_point(color = "darkblue", size = 3) +
geom_text(
aes(label = decade),
hjust = -0.1,
vjust = -0.5,
color = "black",
size = 3
) +
labs(
x = "Wheat Price (shillings/quarter bushel)",
y = "Weekly Wages (shillings/week)",
title = "Wheat Price vs. Wages (Aggregated by Decade)",
subtitle = "Arrows and labels indicate progression over time (no direct time axis)"
) +
theme_minimal(base_size = 12)
print(g_better)
```
I find g3 does a better job of illustrating purchasing power. g4 and g_better become too cluttered between 1560 and 1700, making it difficult to interpret the data at a glance. Even when the data is aggregated by decade (g4 vs. g_better), it remains challenging to grasp the information quickly.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment