MAJ exercice 3 (module 3)

parent 396b9256
......@@ -33,16 +33,463 @@
"metadata": {},
"outputs": [],
"source": [
"# Fichier source des données brutes au 20/08/2020\n",
"# Fichier source des données brutes au 20/08/2020 (mise à jour mensuelle)\n",
"# dans le dossier /work/module3/exo3/monthly_in_situ_co2_mlo_20200820.csv\n",
"# @see https://scrippsco2.ucsd.edu/data/atmospheric_co2/primary_mlo_co2_record.html\n",
"raw_data = \"monthly_in_situ_co2_mlo_20200820.csv\""
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": []
"source": [
"# https://pandas.pydata.org\n",
"# Version 0.22.0 (December 29, 2017) sur ce Jupyter !\n",
"import pandas as pd\n",
"\n",
"# print(pd.__version__) # La version de Pandas actuelle\n",
"# pd.show_versions() # Toutes les extensions installées\n",
"\n",
"# Lecture du fichier CSV et saut à la ligne 55 pour ignorer les commentaires\n",
"# Ce fichier possède 10 colonnes séparées par des virgules : \n",
"# - colonnes 1-4 : dates en plusieurs formats : Yr, Mn, Date, Date\n",
"# - colonnes 5-10 : mesures en ppm, si absente valeur par défaut = '-99.99'\n",
"data = pd.read_csv(raw_data, skiprows = 54)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Yr</th>\n",
" <th>Mn</th>\n",
" <th>Date</th>\n",
" <th>Date</th>\n",
" <th>CO2</th>\n",
" <th>seasonally</th>\n",
" <th>fit</th>\n",
" <th>seasonally</th>\n",
" <th>CO2</th>\n",
" <th>seasonally</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" <td>adjusted</td>\n",
" <td></td>\n",
" <td>adjusted fit</td>\n",
" <td>filled</td>\n",
" <td>adjusted filled</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td></td>\n",
" <td></td>\n",
" <td>Excel</td>\n",
" <td></td>\n",
" <td>[ppm]</td>\n",
" <td>[ppm]</td>\n",
" <td>[ppm]</td>\n",
" <td>[ppm]</td>\n",
" <td>[ppm]</td>\n",
" <td>[ppm]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1958</td>\n",
" <td>01</td>\n",
" <td>21200</td>\n",
" <td>1958.0411</td>\n",
" <td>-99.99</td>\n",
" <td>-99.99</td>\n",
" <td>-99.99</td>\n",
" <td>-99.99</td>\n",
" <td>-99.99</td>\n",
" <td>-99.99</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1958</td>\n",
" <td>02</td>\n",
" <td>21231</td>\n",
" <td>1958.1260</td>\n",
" <td>-99.99</td>\n",
" <td>-99.99</td>\n",
" <td>-99.99</td>\n",
" <td>-99.99</td>\n",
" <td>-99.99</td>\n",
" <td>-99.99</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1958</td>\n",
" <td>03</td>\n",
" <td>21259</td>\n",
" <td>1958.2027</td>\n",
" <td>315.70</td>\n",
" <td>314.44</td>\n",
" <td>316.19</td>\n",
" <td>314.91</td>\n",
" <td>315.70</td>\n",
" <td>314.44</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>1958</td>\n",
" <td>04</td>\n",
" <td>21290</td>\n",
" <td>1958.2877</td>\n",
" <td>317.45</td>\n",
" <td>315.16</td>\n",
" <td>317.30</td>\n",
" <td>314.99</td>\n",
" <td>317.45</td>\n",
" <td>315.16</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>1958</td>\n",
" <td>05</td>\n",
" <td>21320</td>\n",
" <td>1958.3699</td>\n",
" <td>317.51</td>\n",
" <td>314.71</td>\n",
" <td>317.86</td>\n",
" <td>315.06</td>\n",
" <td>317.51</td>\n",
" <td>314.71</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>1958</td>\n",
" <td>06</td>\n",
" <td>21351</td>\n",
" <td>1958.4548</td>\n",
" <td>-99.99</td>\n",
" <td>-99.99</td>\n",
" <td>317.24</td>\n",
" <td>315.14</td>\n",
" <td>317.24</td>\n",
" <td>315.14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>1958</td>\n",
" <td>07</td>\n",
" <td>21381</td>\n",
" <td>1958.5370</td>\n",
" <td>315.86</td>\n",
" <td>315.19</td>\n",
" <td>315.86</td>\n",
" <td>315.22</td>\n",
" <td>315.86</td>\n",
" <td>315.19</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>1958</td>\n",
" <td>08</td>\n",
" <td>21412</td>\n",
" <td>1958.6219</td>\n",
" <td>314.93</td>\n",
" <td>316.19</td>\n",
" <td>313.99</td>\n",
" <td>315.29</td>\n",
" <td>314.93</td>\n",
" <td>316.19</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Yr Mn Date Date CO2 seasonally fit \\\n",
"0 adjusted \n",
"1 Excel [ppm] [ppm] [ppm] \n",
"2 1958 01 21200 1958.0411 -99.99 -99.99 -99.99 \n",
"3 1958 02 21231 1958.1260 -99.99 -99.99 -99.99 \n",
"4 1958 03 21259 1958.2027 315.70 314.44 316.19 \n",
"5 1958 04 21290 1958.2877 317.45 315.16 317.30 \n",
"6 1958 05 21320 1958.3699 317.51 314.71 317.86 \n",
"7 1958 06 21351 1958.4548 -99.99 -99.99 317.24 \n",
"8 1958 07 21381 1958.5370 315.86 315.19 315.86 \n",
"9 1958 08 21412 1958.6219 314.93 316.19 313.99 \n",
"\n",
" seasonally CO2 seasonally \n",
"0 adjusted fit filled adjusted filled \n",
"1 [ppm] [ppm] [ppm] \n",
"2 -99.99 -99.99 -99.99 \n",
"3 -99.99 -99.99 -99.99 \n",
"4 314.91 315.70 314.44 \n",
"5 314.99 317.45 315.16 \n",
"6 315.06 317.51 314.71 \n",
"7 315.14 317.24 315.14 \n",
"8 315.22 315.86 315.19 \n",
"9 315.29 314.93 316.19 "
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Vérification des 10 premières lignes, avec des valeurs absentes\n",
"# Comparer avec le fichier original !\n",
"data.head(10)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Yr</th>\n",
" <th>Mn</th>\n",
" <th>Date</th>\n",
" <th>Date</th>\n",
" <th>CO2</th>\n",
" <th>seasonally</th>\n",
" <th>fit</th>\n",
" <th>seasonally</th>\n",
" <th>CO2</th>\n",
" <th>seasonally</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>748</th>\n",
" <td>2020</td>\n",
" <td>03</td>\n",
" <td>43905</td>\n",
" <td>2020.2049</td>\n",
" <td>414.51</td>\n",
" <td>412.94</td>\n",
" <td>414.84</td>\n",
" <td>413.24</td>\n",
" <td>414.51</td>\n",
" <td>412.94</td>\n",
" </tr>\n",
" <tr>\n",
" <th>749</th>\n",
" <td>2020</td>\n",
" <td>04</td>\n",
" <td>43936</td>\n",
" <td>2020.2896</td>\n",
" <td>416.18</td>\n",
" <td>413.35</td>\n",
" <td>416.28</td>\n",
" <td>413.43</td>\n",
" <td>416.18</td>\n",
" <td>413.35</td>\n",
" </tr>\n",
" <tr>\n",
" <th>750</th>\n",
" <td>2020</td>\n",
" <td>05</td>\n",
" <td>43966</td>\n",
" <td>2020.3716</td>\n",
" <td>417.17</td>\n",
" <td>413.75</td>\n",
" <td>417.01</td>\n",
" <td>413.60</td>\n",
" <td>417.17</td>\n",
" <td>413.75</td>\n",
" </tr>\n",
" <tr>\n",
" <th>751</th>\n",
" <td>2020</td>\n",
" <td>06</td>\n",
" <td>43997</td>\n",
" <td>2020.4563</td>\n",
" <td>416.30</td>\n",
" <td>413.75</td>\n",
" <td>416.31</td>\n",
" <td>413.77</td>\n",
" <td>416.30</td>\n",
" <td>413.75</td>\n",
" </tr>\n",
" <tr>\n",
" <th>752</th>\n",
" <td>2020</td>\n",
" <td>07</td>\n",
" <td>44027</td>\n",
" <td>2020.5383</td>\n",
" <td>414.49</td>\n",
" <td>413.70</td>\n",
" <td>414.69</td>\n",
" <td>413.94</td>\n",
" <td>414.49</td>\n",
" <td>413.70</td>\n",
" </tr>\n",
" <tr>\n",
" <th>753</th>\n",
" <td>2020</td>\n",
" <td>08</td>\n",
" <td>44058</td>\n",
" <td>2020.6230</td>\n",
" <td>-99.99</td>\n",
" <td>-99.99</td>\n",
" <td>-99.99</td>\n",
" <td>-99.99</td>\n",
" <td>-99.99</td>\n",
" <td>-99.99</td>\n",
" </tr>\n",
" <tr>\n",
" <th>754</th>\n",
" <td>2020</td>\n",
" <td>09</td>\n",
" <td>44089</td>\n",
" <td>2020.7077</td>\n",
" <td>-99.99</td>\n",
" <td>-99.99</td>\n",
" <td>-99.99</td>\n",
" <td>-99.99</td>\n",
" <td>-99.99</td>\n",
" <td>-99.99</td>\n",
" </tr>\n",
" <tr>\n",
" <th>755</th>\n",
" <td>2020</td>\n",
" <td>10</td>\n",
" <td>44119</td>\n",
" <td>2020.7896</td>\n",
" <td>-99.99</td>\n",
" <td>-99.99</td>\n",
" <td>-99.99</td>\n",
" <td>-99.99</td>\n",
" <td>-99.99</td>\n",
" <td>-99.99</td>\n",
" </tr>\n",
" <tr>\n",
" <th>756</th>\n",
" <td>2020</td>\n",
" <td>11</td>\n",
" <td>44150</td>\n",
" <td>2020.8743</td>\n",
" <td>-99.99</td>\n",
" <td>-99.99</td>\n",
" <td>-99.99</td>\n",
" <td>-99.99</td>\n",
" <td>-99.99</td>\n",
" <td>-99.99</td>\n",
" </tr>\n",
" <tr>\n",
" <th>757</th>\n",
" <td>2020</td>\n",
" <td>12</td>\n",
" <td>44180</td>\n",
" <td>2020.9563</td>\n",
" <td>-99.99</td>\n",
" <td>-99.99</td>\n",
" <td>-99.99</td>\n",
" <td>-99.99</td>\n",
" <td>-99.99</td>\n",
" <td>-99.99</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Yr Mn Date Date CO2 seasonally fit \\\n",
"748 2020 03 43905 2020.2049 414.51 412.94 414.84 \n",
"749 2020 04 43936 2020.2896 416.18 413.35 416.28 \n",
"750 2020 05 43966 2020.3716 417.17 413.75 417.01 \n",
"751 2020 06 43997 2020.4563 416.30 413.75 416.31 \n",
"752 2020 07 44027 2020.5383 414.49 413.70 414.69 \n",
"753 2020 08 44058 2020.6230 -99.99 -99.99 -99.99 \n",
"754 2020 09 44089 2020.7077 -99.99 -99.99 -99.99 \n",
"755 2020 10 44119 2020.7896 -99.99 -99.99 -99.99 \n",
"756 2020 11 44150 2020.8743 -99.99 -99.99 -99.99 \n",
"757 2020 12 44180 2020.9563 -99.99 -99.99 -99.99 \n",
"\n",
" seasonally CO2 seasonally \n",
"748 413.24 414.51 412.94 \n",
"749 413.43 416.18 413.35 \n",
"750 413.60 417.17 413.75 \n",
"751 413.77 416.30 413.75 \n",
"752 413.94 414.49 413.70 \n",
"753 -99.99 -99.99 -99.99 \n",
"754 -99.99 -99.99 -99.99 \n",
"755 -99.99 -99.99 -99.99 \n",
"756 -99.99 -99.99 -99.99 \n",
"757 -99.99 -99.99 -99.99 "
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Vérification des 10 dernières lignes, avec des valeurs absentes\n",
"# Comparer avec le fichier original !\n",
"data.tail(10)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"# TODO\n",
"# Récupérer les entêtes de colonnes\n",
"# Vérifier les valeurs à '-99.99'\n",
"# Compter les lignes"
]
}
],
"metadata": {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment