diff --git a/module3/exo3/exercice.ipynb b/module3/exo3/exercice.ipynb index bc8660e04d86b78e18a4135530eb24e8f04ee95f..3b055bbf99123866d3ab94547aee33d155276e48 100644 --- a/module3/exo3/exercice.ipynb +++ b/module3/exo3/exercice.ipynb @@ -33,16 +33,463 @@ "metadata": {}, "outputs": [], "source": [ - "# Fichier source des données brutes au 20/08/2020\n", + "# Fichier source des données brutes au 20/08/2020 (mise à jour mensuelle)\n", + "# dans le dossier /work/module3/exo3/monthly_in_situ_co2_mlo_20200820.csv\n", + "# @see https://scrippsco2.ucsd.edu/data/atmospheric_co2/primary_mlo_co2_record.html\n", "raw_data = \"monthly_in_situ_co2_mlo_20200820.csv\"" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "# https://pandas.pydata.org\n", + "# Version 0.22.0 (December 29, 2017) sur ce Jupyter !\n", + "import pandas as pd\n", + "\n", + "# print(pd.__version__) # La version de Pandas actuelle\n", + "# pd.show_versions() # Toutes les extensions installées\n", + "\n", + "# Lecture du fichier CSV et saut à la ligne 55 pour ignorer les commentaires\n", + "# Ce fichier possède 10 colonnes séparées par des virgules : \n", + "# - colonnes 1-4 : dates en plusieurs formats : Yr, Mn, Date, Date\n", + "# - colonnes 5-10 : mesures en ppm, si absente valeur par défaut = '-99.99'\n", + "data = pd.read_csv(raw_data, skiprows = 54)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
YrMnDateDateCO2seasonallyfitseasonallyCO2seasonally
0adjustedadjusted fitfilledadjusted filled
1Excel[ppm][ppm][ppm][ppm][ppm][ppm]
2195801212001958.0411-99.99-99.99-99.99-99.99-99.99-99.99
3195802212311958.1260-99.99-99.99-99.99-99.99-99.99-99.99
4195803212591958.2027315.70314.44316.19314.91315.70314.44
5195804212901958.2877317.45315.16317.30314.99317.45315.16
6195805213201958.3699317.51314.71317.86315.06317.51314.71
7195806213511958.4548-99.99-99.99317.24315.14317.24315.14
8195807213811958.5370315.86315.19315.86315.22315.86315.19
9195808214121958.6219314.93316.19313.99315.29314.93316.19
\n", + "
" + ], + "text/plain": [ + " Yr Mn Date Date CO2 seasonally fit \\\n", + "0 adjusted \n", + "1 Excel [ppm] [ppm] [ppm] \n", + "2 1958 01 21200 1958.0411 -99.99 -99.99 -99.99 \n", + "3 1958 02 21231 1958.1260 -99.99 -99.99 -99.99 \n", + "4 1958 03 21259 1958.2027 315.70 314.44 316.19 \n", + "5 1958 04 21290 1958.2877 317.45 315.16 317.30 \n", + "6 1958 05 21320 1958.3699 317.51 314.71 317.86 \n", + "7 1958 06 21351 1958.4548 -99.99 -99.99 317.24 \n", + "8 1958 07 21381 1958.5370 315.86 315.19 315.86 \n", + "9 1958 08 21412 1958.6219 314.93 316.19 313.99 \n", + "\n", + " seasonally CO2 seasonally \n", + "0 adjusted fit filled adjusted filled \n", + "1 [ppm] [ppm] [ppm] \n", + "2 -99.99 -99.99 -99.99 \n", + "3 -99.99 -99.99 -99.99 \n", + "4 314.91 315.70 314.44 \n", + "5 314.99 317.45 315.16 \n", + "6 315.06 317.51 314.71 \n", + "7 315.14 317.24 315.14 \n", + "8 315.22 315.86 315.19 \n", + "9 315.29 314.93 316.19 " + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Vérification des 10 premières lignes, avec des valeurs absentes\n", + "# Comparer avec le fichier original !\n", + "data.head(10)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
YrMnDateDateCO2seasonallyfitseasonallyCO2seasonally
748202003439052020.2049414.51412.94414.84413.24414.51412.94
749202004439362020.2896416.18413.35416.28413.43416.18413.35
750202005439662020.3716417.17413.75417.01413.60417.17413.75
751202006439972020.4563416.30413.75416.31413.77416.30413.75
752202007440272020.5383414.49413.70414.69413.94414.49413.70
753202008440582020.6230-99.99-99.99-99.99-99.99-99.99-99.99
754202009440892020.7077-99.99-99.99-99.99-99.99-99.99-99.99
755202010441192020.7896-99.99-99.99-99.99-99.99-99.99-99.99
756202011441502020.8743-99.99-99.99-99.99-99.99-99.99-99.99
757202012441802020.9563-99.99-99.99-99.99-99.99-99.99-99.99
\n", + "
" + ], + "text/plain": [ + " Yr Mn Date Date CO2 seasonally fit \\\n", + "748 2020 03 43905 2020.2049 414.51 412.94 414.84 \n", + "749 2020 04 43936 2020.2896 416.18 413.35 416.28 \n", + "750 2020 05 43966 2020.3716 417.17 413.75 417.01 \n", + "751 2020 06 43997 2020.4563 416.30 413.75 416.31 \n", + "752 2020 07 44027 2020.5383 414.49 413.70 414.69 \n", + "753 2020 08 44058 2020.6230 -99.99 -99.99 -99.99 \n", + "754 2020 09 44089 2020.7077 -99.99 -99.99 -99.99 \n", + "755 2020 10 44119 2020.7896 -99.99 -99.99 -99.99 \n", + "756 2020 11 44150 2020.8743 -99.99 -99.99 -99.99 \n", + "757 2020 12 44180 2020.9563 -99.99 -99.99 -99.99 \n", + "\n", + " seasonally CO2 seasonally \n", + "748 413.24 414.51 412.94 \n", + "749 413.43 416.18 413.35 \n", + "750 413.60 417.17 413.75 \n", + "751 413.77 416.30 413.75 \n", + "752 413.94 414.49 413.70 \n", + "753 -99.99 -99.99 -99.99 \n", + "754 -99.99 -99.99 -99.99 \n", + "755 -99.99 -99.99 -99.99 \n", + "756 -99.99 -99.99 -99.99 \n", + "757 -99.99 -99.99 -99.99 " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Vérification des 10 dernières lignes, avec des valeurs absentes\n", + "# Comparer avec le fichier original !\n", + "data.tail(10)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# TODO\n", + "# Récupérer les entêtes de colonnes\n", + "# Vérifier les valeurs à '-99.99'\n", + "# Compter les lignes" + ] } ], "metadata": {