From 4dcebe6415b22bf80b101d987525ed7d7d151341 Mon Sep 17 00:00:00 2001 From: 57e92ddd8e047446b30777d5b69846d1 <57e92ddd8e047446b30777d5b69846d1@app-learninglab.inria.fr> Date: Sun, 12 Feb 2023 21:23:41 +0000 Subject: [PATCH] wip --- module3/exo3/exercice.ipynb | 199 ++++++++++++++++++++++++++++++++++++ 1 file changed, 199 insertions(+) diff --git a/module3/exo3/exercice.ipynb b/module3/exo3/exercice.ipynb index 88e4157..7dbd2ac 100644 --- a/module3/exo3/exercice.ipynb +++ b/module3/exo3/exercice.ipynb @@ -7,6 +7,205 @@ "# Sujet 1 : Concentration de CO2 dans l'atmosphère depuis 1958" ] }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "import matplotlib.pyplot as plt\n", + "import pandas as pd\n", + "import isoweek\n", + "\n", + "# set diagram sizes\n", + "# print(plt.rcParams['figure.dpi']) # default = 72\n", + "# print(plt.rcParams['figure.figsize']) # default = 6.0, 4.0\n", + "# plt.rcParams['figure.dpi'] = 100\n", + "plt.rcParams['figure.figsize'] = [12.0, 4.0]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Les données sont disponibles sur le site Web de l'[institut Scripps. ](https://scrippsco2.ucsd.edu/data/atmospheric_co2/primary_mlo_co2_record.html)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
datevalue
01958-03-29316.19
11958-04-05317.31
21958-04-12317.69
31958-04-19317.58
41958-04-26316.48
\n", + "
" + ], + "text/plain": [ + " date value\n", + "0 1958-03-29 316.19\n", + "1 1958-04-05 317.31\n", + "2 1958-04-12 317.69\n", + "3 1958-04-19 317.58\n", + "4 1958-04-26 316.48" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# récolte le 12/02\n", + "data_url = \"https://scrippsco2.ucsd.edu/assets/data/atmospheric/stations/in_situ_co2/weekly/weekly_in_situ_co2_mlo.csv\"\n", + "data = pd.read_csv(data_url, encoding = 'utf-8', comment='\"', names=[\"date\", \"value\"])\n", + "data.head(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Contrôle Qualité\n", + "### Recherche des lignes sans donnée" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
datevalue
\n", + "
" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: [date, value]\n", + "Index: []" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data[data.isnull().any(axis=1)]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Il ne manque aucune donnée.\n", + "\n", + "### Vérification des écarts entre les données\n", + "On attend un écart de 7 jours entre chaque mesure." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "data['timestamp'] = [ pd.to_datetime(d) for d in data['date'] ]\n", + "sorted_data = data.set_index('timestamp').sort_index()\n", + "sorted_data.head(5)\n", + "# for row1, row2 in zip(sorted_data[:-1], sorted_data[1:]):\n", + "# delta = (row2['timestamp'] - row1['timestamp']).days\n", + "# if delta != 7:\n", + "# print(row1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "l_index = sorted_data.index\n", + "l_index.head(5)" + ] + }, { "cell_type": "code", "execution_count": null, -- 2.18.1