--- title: "Subject 1: CO2 concentration in the atmosphere since 1958" author: "NJ" date: "11 July 2025" output: html_document Link: https://scrippsco2.ucsd.edu/data/atmospheric_co2/primary_mlo_co2_record.html --- ```{r} data_url <- "https://scrippsco2.ucsd.edu/assets/data/atmospheric/stations/in_situ_co2/monthly/monthly_in_situ_co2_mlo.csv" data <- read.csv(data_url, skip=1, na.strings=c("-")) ``` ```{r} D <- is.data.frame(data) D ``` ```{r} df <- read.csv( data_url, skip = 64, header = FALSE, col.names = c("Year", "Month", "ExcelDate", "DecimalDate", "CO2", "CO2_Adjusted", "Fit", "Fit_Adjusted", "Filled", "Filled_Adjusted", "Station") ) head(df) ``` ```{r} library(ggplot2) ggplot(df, aes(x = DecimalDate, y = CO2)) + geom_line(color = "steelblue") + labs(title = "Mauna Loa Atmospheric CO2", x = "Year", y = "CO2 (ppm)") + theme_minimal() ``` ```{r} ##1. Make a plot that shows the superposition of #a periodic oscillation and a slower systematic evolution. # Remove missing values (like -99.99) df_clean <- df[df$CO2 > 0, ] # Plot ggplot(df_clean, aes(x = DecimalDate, y = CO2)) + geom_line(color = "darkblue", size = 0.6, alpha = 0.8) + geom_smooth(span = 0.3, se = FALSE, color = "red", size = 1.2) + scale_x_continuous( limits = c(1958, 2025), # x-axis range breaks = seq(1960, 2025, by = 5) # tick marks every 5 years ) + labs( title = "Superposition of Seasonal Oscillation and Long-Term CO2 Trend", subtitle = "Mauna Loa Observatory (MLO)", x = "Year", y = "CO2 concentration (ppm)" ) + theme_minimal() ``` ```{r} #2. Separate these two phenomena. #2.1. Characterize the periodic oscillation. # Create a time variable in years and months df_clean$time <- df_clean$DecimalDate ``` ```{r} # Fit a quadratic model (simplified but effective) trend_model <- lm(CO2 ~ poly(time, 2), data = df_clean) # Add predicted trend to dataframe df_clean$trend <- predict(trend_model) df_clean$seasonal <- df_clean$CO2 - df_clean$trend ``` ```{r} # create the date like..."YYYY-MM-15" # 15 = mid month df_clean$Date <- as.Date(paste(df_clean$Year, df_clean$Month, "15", sep = "-")) df_clean$month <- as.numeric(format(df_clean$Date, "%m")) ``` ```{r} p1 <- ggplot(df_clean, aes(x = DecimalDate)) + geom_line(aes(y = CO2), color = "gray60", size = 0.5) + geom_line(aes(y = trend), color = "blue", size = 1.2) + scale_x_continuous( limits = c(1958, 2025), # x-axis range breaks = seq(1960, 2025, by = 5) # tick marks every 5 years ) + labs( title = "Long-Term Trend in CO2 Concentration", x = "Year", y = "CO2 (ppm)" ) + theme_minimal() p1 ``` ```{r} #2.2. Find a simple model for the slow contribution, estimate its parameters, #and attempt an extrapolation until 2025 (for validating the model using future observations). # Fit sine curve with 1-year periodne season_model <- nls(seasonal ~ A*sin(2*pi*DecimalDate) + B*cos(2*pi*DecimalDate), data = df_clean, start = list(A = 1, B = 1)) summary(season_model) df_clean$time <- df_clean$DecimalDate ``` ```{r} # Create new data from last year to 2030 future <- data.frame(time = seq(from = max(df_clean$time), to = 2030, by = 1/12)) # Predict trend future$trend <- predict(trend_model, newdata = future) # Predict seasonal using fitted sine model A <- coef(season_model)[1] B <- coef(season_model)[2] future$seasonal <- A * sin(2 * pi * future$time) + B * cos(2 * pi * future$time) # Combine to get total CO2 estimate future$CO2_predicted <- future$trend + future$seasonal CO2_predicted <- future$trend + future$seasonal ``` ```{r} # Trend of the CO2 (ppm) predicted from 2025 to 2030 year ggplot() + geom_line(data = future, aes(x = time, y = trend), color = "blue") + geom_line(data = future, aes(x = time, y = CO2_predicted), color = "red") + labs(title = "CO2 Projection with Trend + Seasonal Model", y = "CO2 (ppm)", x = "Year") + theme_minimal() ``` ```{r} # the inceasing of the CO2 (ppm) predicted within 2030 year ggplot() + geom_line(data = df_clean, aes(x = DecimalDate, y = CO2), color = "black") + geom_line(data = future, aes(x = time, y = CO2_predicted), color = "darkgreen") + geom_vline(xintercept = 2023, linetype = "dashed", color = "red") + annotate("text", x = 2023.5, y = max(df_clean$CO2), label = "Prediction zone", color = "red", hjust = 0) + scale_x_continuous( limits = c(1958, 2030), # x-axis range breaks = seq(1960, 2030, by = 5) # tick marks every 5 years ) + labs( title = "CO2 Prediction Until 2030", x = "Year", y = "CO2 (ppm)" ) + theme_minimal() ``` Note: write your solution in the file module3/exo3/exercice_en.Rmd. Generate a PDF file using the KnitR tool and store it as module3/exo3/YourFileName.pdf (don't forget to commit it).