From 4dcebe6415b22bf80b101d987525ed7d7d151341 Mon Sep 17 00:00:00 2001
From: 57e92ddd8e047446b30777d5b69846d1
<57e92ddd8e047446b30777d5b69846d1@app-learninglab.inria.fr>
Date: Sun, 12 Feb 2023 21:23:41 +0000
Subject: [PATCH] wip
---
module3/exo3/exercice.ipynb | 199 ++++++++++++++++++++++++++++++++++++
1 file changed, 199 insertions(+)
diff --git a/module3/exo3/exercice.ipynb b/module3/exo3/exercice.ipynb
index 88e4157..7dbd2ac 100644
--- a/module3/exo3/exercice.ipynb
+++ b/module3/exo3/exercice.ipynb
@@ -7,6 +7,205 @@
"# Sujet 1 : Concentration de CO2 dans l'atmosphère depuis 1958"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%matplotlib inline\n",
+ "import matplotlib.pyplot as plt\n",
+ "import pandas as pd\n",
+ "import isoweek\n",
+ "\n",
+ "# set diagram sizes\n",
+ "# print(plt.rcParams['figure.dpi']) # default = 72\n",
+ "# print(plt.rcParams['figure.figsize']) # default = 6.0, 4.0\n",
+ "# plt.rcParams['figure.dpi'] = 100\n",
+ "plt.rcParams['figure.figsize'] = [12.0, 4.0]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Les données sont disponibles sur le site Web de l'[institut Scripps. ](https://scrippsco2.ucsd.edu/data/atmospheric_co2/primary_mlo_co2_record.html)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " date | \n",
+ " value | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1958-03-29 | \n",
+ " 316.19 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 1958-04-05 | \n",
+ " 317.31 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 1958-04-12 | \n",
+ " 317.69 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 1958-04-19 | \n",
+ " 317.58 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 1958-04-26 | \n",
+ " 316.48 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " date value\n",
+ "0 1958-03-29 316.19\n",
+ "1 1958-04-05 317.31\n",
+ "2 1958-04-12 317.69\n",
+ "3 1958-04-19 317.58\n",
+ "4 1958-04-26 316.48"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# récolte le 12/02\n",
+ "data_url = \"https://scrippsco2.ucsd.edu/assets/data/atmospheric/stations/in_situ_co2/weekly/weekly_in_situ_co2_mlo.csv\"\n",
+ "data = pd.read_csv(data_url, encoding = 'utf-8', comment='\"', names=[\"date\", \"value\"])\n",
+ "data.head(5)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Contrôle Qualité\n",
+ "### Recherche des lignes sans donnée"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " date | \n",
+ " value | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ "Empty DataFrame\n",
+ "Columns: [date, value]\n",
+ "Index: []"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data[data.isnull().any(axis=1)]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Il ne manque aucune donnée.\n",
+ "\n",
+ "### Vérification des écarts entre les données\n",
+ "On attend un écart de 7 jours entre chaque mesure."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "data['timestamp'] = [ pd.to_datetime(d) for d in data['date'] ]\n",
+ "sorted_data = data.set_index('timestamp').sort_index()\n",
+ "sorted_data.head(5)\n",
+ "# for row1, row2 in zip(sorted_data[:-1], sorted_data[1:]):\n",
+ "# delta = (row2['timestamp'] - row1['timestamp']).days\n",
+ "# if delta != 7:\n",
+ "# print(row1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "l_index = sorted_data.index\n",
+ "l_index.head(5)"
+ ]
+ },
{
"cell_type": "code",
"execution_count": null,
--
2.18.1