diff --git a/module3/exo3/exercice_en.Rmd b/module3/exo3/exercice_en.Rmd index a85afaf78e17c3d04946c8adbfd6dd0f512b1a63..cfcb70ac94f01d626f77c187169bfcdc3291e325 100644 --- a/module3/exo3/exercice_en.Rmd +++ b/module3/exo3/exercice_en.Rmd @@ -1,24 +1,37 @@ --- -title: "CO2 Keeling" +title: "CO2 Keeling Curve and Hisitorical Ice Core Data" author: "David Elser" date: "05-03-2021" -output: html_document +output: pdf_document --- -## Data preprocessing fetch the Data file and check if file already exists -The data is available from the Web site of the Scripps Institute "https://scrippsco2.ucsd.edu/data/atmospheric_co2/primary_mlo_co2_record.html" +#### Please note that I am not a Computer Scientist and my programming skills are very limited. + +### Data preprocessing fetch the Data file and check if file already exists + +The data is available from the Web site of the Scripps Institute + +"https://scrippsco2.ucsd.edu/data/atmospheric_co2/primary_mlo_co2_record.html" +"https://scrippsco2.ucsd.edu/assets/data/atmospheric/merged_ice_core_mlo_spo/merged_ice_core_yearly.csv" Soure script available at : https://github.com/DemirhanOzelTrojan/Keeling_Predict/blob/master/main.R + +` install.packages('forecast', dependencies = TRUE) install.packages("imputeTS") +install.packages("tidyverse") +` -```{r} +### Load librarys -install.packages("imputeTS") +```{r} +library(ggplot2) library(forecast) library(imputeTS) +library(lubridate) ``` +### Fetch CO2 measurements from Mauna Loa observatory, Hawaii, USA ```{r} data_url = "https://scrippsco2.ucsd.edu/assets/data/atmospheric/stations/in_situ_co2/monthly/monthly_in_situ_co2_mlo.csv" @@ -29,23 +42,151 @@ if (!file.exists(data_file)) { } ``` -Skip the frist 5 lines of the file +### Fetch Ice Core CO2 Data + +```{r} + + +data_url = "https://scrippsco2.ucsd.edu/assets/data/atmospheric/merged_ice_core_mlo_spo/merged_ice_core_yearly.csv" +data_file1 = "Ice-CO2.csv" + +if (!file.exists(data_file1)) { + download.file(data_url, data_file1, method="auto") +} +``` + +### Skip the frist 36 lines + +```{r} +data_ice = read.csv(data_file1, header=FALSE, skip=36) +``` + +### Skip the frist 5 lines of the file ```{r} data = read.csv(data_file, header=TRUE, skip=5) ``` -Let's have a look at what we got: +### Let's have a look at what we got and check NA + +```{r} +head(data_ice) +tail(data_ice) + +na_records = apply(data_ice, 1, function (x) any(is.na(x))) +data[na_records,] + + + +``` +### Conversion of decimal-date into Years, rename V2 into ppm + + +```{r} + + +data_ice$date<- format(date_decimal(data_ice$V1), "%Y") +head(data_ice) +tail(data_ice) +names(data_ice)[names(data_ice) == "V2"] <- "ppm" +data_ice$Year=as.numeric(data_ice$date) +head(data_ice) +tail(data_ice) + + +``` + + + +### Let's have a look at what we got from Mauna Loa observatory and check NA + ```{r} head(data) tail(data) + +na_records = apply(data, 1, function (x) any(is.na(x))) +data[na_records,] ``` +### impute data of column X.ppm..4(CO2, seasonally filled,adjusted filled) ```{r} -data$X.ppm.[data$X.ppm. == -99.99] <- NA -data$X.ppm. <- na.interpolation(data$X.ppm.) +data$X.ppm..4[data$X.ppm..4 == -99.99] <- NA +data$X.ppm..4 <- na.interpolation(data$X.ppm..4) head(data) tail(data) ``` + +### data preparation +```{r} +data.ts <- ts(data$X.ppm..4, start = c(1958, 1), end = c(2021, 1), freq = 12) + +``` + +### split training and testing data, use 90% of available data in the co2.csv file +```{r} +train.test.split <- 0.9 +split.index <- round(length(data.ts) * train.test.split) +``` + +### train time series +```{r} +data.train.ts <- window(data.ts, start = c(1958, 1), end = c(1958, split.index)) +``` + +### test time series +```{r} +data.test.ts <- window(data.ts, start = c(1958, split.index + 1), end = c(1958, length(data.ts))) +``` + +### create a fit using the time series +```{r} +fit <- ets(data.ts, model = 'AAA') +``` + +### forecast using the fit +```{r} +predictions <- forecast(fit, 12 * 20, level = 0.95) +``` + + +## Plot Ice Core Data + +```{r} + +theme_set(theme_minimal()) + +ggplot(data = data_ice, aes(x = Year, y = ppm))+ + geom_line(color = "#00AFBB", size = 1) + + +``` + +## Plot predictions till 2025 + +```{r} + +plot(predictions, + main = '', + xlab = 'Time', + ylab = 'CO2 (ppm)', + xlim = c(1958, 2025), + shaded = TRUE, + shadecols=c('light blue')) +legend('topleft', + legend=c('Historical Data', + 'Predicted Data', + '95% CI'), + fill=c(NA, NA, 'light blue'), + col=c('black', 'blue', NA), + lty=c(1, 1, NA), + lwd=c(1, 2, NA), + border=c(NA, NA, 'black'), + inset=0.01) + + +``` + + + diff --git a/module3/exo3/exercice_en.pdf b/module3/exo3/exercice_en.pdf new file mode 100644 index 0000000000000000000000000000000000000000..472440135a5b5acf04d41e8bc430d6478cb3ac1e Binary files /dev/null and b/module3/exo3/exercice_en.pdf differ