From a64a03e408568d43cc4e28fdc038fe9ce4e904bc Mon Sep 17 00:00:00 2001 From: 0c2d387b484b42dc06d4c28dcae352b6 <0c2d387b484b42dc06d4c28dcae352b6@app-learninglab.inria.fr> Date: Fri, 3 Dec 2021 23:18:22 +0000 Subject: [PATCH] Replace exercice_en.Rmd --- module3/exo3/exercice_en.Rmd | 78 +++++++++++++++++++++--------------- 1 file changed, 45 insertions(+), 33 deletions(-) diff --git a/module3/exo3/exercice_en.Rmd b/module3/exo3/exercice_en.Rmd index 62a380b..9cc2f16 100644 --- a/module3/exo3/exercice_en.Rmd +++ b/module3/exo3/exercice_en.Rmd @@ -6,6 +6,7 @@ output: pdf_document: default html_document: df_print: paged +urlcolor: blue --- ```{r setup, include=FALSE} @@ -16,58 +17,69 @@ options(warn = -1) ## Preamble The aim of this activity is to perform convenient visualization for data describing the evolution of wages and wheat price for English workers from the 16th to the 19th century. +The dataset in our disposal is a csv file available at: + +. + +The following is the chart as made by William Playfair, showing at one view the price of both the quarter of wheat and wages of labour by the Week, from 1565 to 1821. + +![Evolution of the wheat price and average salaries from 1565 to 1821 *(source: [Wikimedia](https://commons.wikimedia.org/wiki/File:Chart_Showing_at_One_View_the_Price_of_the_Quarter_of_Wheat,_and_Wages_of_Labour_by_the_Week,_from_1565_to_1821.png))*](img/playfair-chart.png) + +In this document, we first try to reproduce the same chart using R. Then, we propose some enhancement on the visualization aspect. And at the end, we will try to make the message behind Playfair's chart stand out better. + -We will need to use the following libraries: + +\newpage +## Preliminary steps +1. Importing the following R libraries: ```{r, results=FALSE, message=FALSE} -# The environment library(tidyverse) library(ggplot2) ``` -## Build the data frame -From the following link we have downloaded the data we are going to work with in the form of a csv file, and make it into **data/** folder: +2. Building the data frame: - -We build the data frame as follows, and we print a couple of rows to have a look at its structure: +From the [link](https://raw.githubusercontent.com/vincentarelbundock/Rdatasets/master/csv/HistData/Wheat.csv) cited above, we have downloaded the csv file containing the data. It is located in the `data/` folder. + +We build the data frame as follows, and we output a couple of rows to have a look at its structure: ```{r, message=FALSE} df <- read.csv("data/Wheat.csv",header=T) df[c(1,2),] ``` -## Clean the data frame -We observe that the first column indicates a sort of an identifier for each data sample. This is not an interesting parameter, so we can simply omit it: +We observe that the first column indicates a sort of an identifier for each data sample. This is not an interesting parameter, so we may simply omit it: ```{r, message=FALSE} -# only keep columns from 2 to 4 (column 1 is omitted) +# Only keep columns from 2 to 4 (column 1 is omitted): df <- df[c(2:4)] df[c(1,2),] ``` -## 1. Reproducing Playfair's graph +## Reproducing Playfair's graph ```{r, message=FALSE} -# create a list of colors +# Create a list of personalized colors: my_colors <- list( blue = "#3399e6", red = "#ff3333", dark = "#1f1f1f") - +``` +```{r, message=FALSE} # Start with a usual ggplot2 call: ggplot(df, aes(x=Year)) + + # Plot Wheat price with Histograms (scale=3/2): geom_col( aes(y=Wheat/1.5), width = 4.15, alpha=1, color = my_colors[["dark"]], fill = my_colors[["dark"]] ) + + # Plot Wages as a blue filled area with a red delimiter: geom_area( aes(y=Wages), size = 1, alpha=0.7, color = my_colors[["red"]], - fill=my_colors[["blue"]] ) + + fill=my_colors[["blue"]] ) + # Custom the Y scales: - scale_y_continuous( - # Features of the first axis - name = "Wages (in Shillings per week)", - # Add a second axis and specify its features - sec.axis = sec_axis( trans=~.*1.5, name="Wheat Price (in Shillings per quarter)") + scale_y_continuous( name = "Wages (in Shillings per week)", + sec.axis = sec_axis( trans=~.*1.5, name="Wheat Price (in Shillings per quarter)" ) ) + labs(title = "Evolution of wages and wheat price for English workers (16th to 19th century)") + theme(plot.title = element_text(hjust = 0.5), @@ -75,16 +87,20 @@ ggplot(df, aes(x=Year)) + axis.title.y.right = element_text(colour = my_colors[["dark"]])) ``` -## 2. Alternative representation +- \underline{Note:} The last three values of Wages are missing from dataset. +- \underline{Comment:} The Wages were increasing for the whole period of time represented in this chart, with a noticeable increase in its pace that started around 1700. Wheat price on the other hand has no consistent progress, and was even rapidly changing in some times. + +## Alternative representation First, we represent the data as simple dots for both wages and wheat price. As it is difficult to see the pattern for the wheat values evolution, we use the `stat_smooth()` function that shows a smoothed mean (with a confidence level of 70%). ```{r, message=FALSE} -# Set color parameters +# Set color parameters: wages_color <- "#ff5733" wheat_color <- rgb(0.2, 0.6, 0.9, 1) wheat_color_trans <- rgb(0.2, 0.6, 0.9, 0.5) - +``` +```{r, message=FALSE} # Start with a usual ggplot2 call: ggplot(df, aes(x=Year)) + geom_point( aes(y=Wages), size = 0.7, color = wages_color) + @@ -94,11 +110,7 @@ ggplot(df, aes(x=Year)) + stat_smooth(aes(y=Wheat/1.5), level = 0.7, size=0.6, color=wheat_color_trans) + # Custom the Y scales: - scale_y_continuous( - # Features of the first axis - name = "Wages (in Shillings per week)", - - # Add a second axis and specify its features + scale_y_continuous( name = "Wages (in Shillings per week)", sec.axis = sec_axis( trans=~.*1.5, name="Wheat (in Shillings per quarter)") ) + labs(title = "Evolution of wages and wheat price for English workers (16th to 19th century)") + @@ -107,22 +119,22 @@ ggplot(df, aes(x=Year)) + axis.title.y.right = element_text(colour = wheat_color)) ``` -## 3. Another Representation without an explicit time axis +## Another representation without an explicit time axis ```{r, message=FALSE} -ggplot(df, aes(x=Wages, y=Wheat)) + +df_sub <- df[,] +ggplot(df_sub, aes(x=Wages, y=Wheat)) + geom_area( size = 0.5, alpha=0.5, linetype="dashed", color = "#ff3333", fill = "#3399e6" ) + geom_point( size = .7, shape = 4 ) + - xlim(min((na.omit(df))$Wages), max((na.omit(df))$Wages)) + + xlim(min((na.omit(df_sub))$Wages), max((na.omit(df_sub))$Wages)) + - - labs(title = "Evolution Wheat price with respect to Salary of English workers (16th to 19th century)") + + labs(title = "Evolution of wheat price with respect to salary of English workers (16th to 19th century)") + theme(plot.title = element_text(hjust = 0.5)) ``` -### Comments -TBD + + -- 2.18.1