{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#
Analyse de la concentration de CO2, "
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {},
"outputs": [],
"source": [
"%matplotlib inline\n",
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import datetime\n",
"from scipy import interpolate\n",
"from IPython.core.interactiveshell import InteractiveShell\n",
"InteractiveShell.ast_node_interactivity = \"all\""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Nous récupérons les données les plus récentes sur le site en pointant sur un fichier au format .csv, si cette récupération est possible nous enregistrons une copie de ce fichier. Si pour une raison quelconque nous n'arrivons pas a faire ce téléchargement, nous travaillons sur le dernières données téléchargé."
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Erreur lors du téléchargement : \n",
"Nous téléchargeons les dernières données enregistrer sur notre PC\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Unnamed: 0 | \n",
" | \n",
" | \n",
" Excel | \n",
" | \n",
" [ppm] | \n",
" [ppm] | \n",
" [ppm] | \n",
" [ppm] | \n",
" [ppm] | \n",
" [ppm].1 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0 | \n",
" 1958 | \n",
" 1 | \n",
" 21200 | \n",
" 1958.0411 | \n",
" -99.99 | \n",
" -99.99 | \n",
" -99.99 | \n",
" -99.99 | \n",
" -99.99 | \n",
" -99.99 | \n",
"
\n",
" \n",
" 1 | \n",
" 1 | \n",
" 1958 | \n",
" 2 | \n",
" 21231 | \n",
" 1958.1260 | \n",
" -99.99 | \n",
" -99.99 | \n",
" -99.99 | \n",
" -99.99 | \n",
" -99.99 | \n",
" -99.99 | \n",
"
\n",
" \n",
" 2 | \n",
" 2 | \n",
" 1958 | \n",
" 3 | \n",
" 21259 | \n",
" 1958.2027 | \n",
" 315.70 | \n",
" 314.44 | \n",
" 316.19 | \n",
" 314.91 | \n",
" 315.70 | \n",
" 314.44 | \n",
"
\n",
" \n",
" 3 | \n",
" 3 | \n",
" 1958 | \n",
" 4 | \n",
" 21290 | \n",
" 1958.2877 | \n",
" 317.45 | \n",
" 315.16 | \n",
" 317.30 | \n",
" 314.99 | \n",
" 317.45 | \n",
" 315.16 | \n",
"
\n",
" \n",
" 4 | \n",
" 4 | \n",
" 1958 | \n",
" 5 | \n",
" 21320 | \n",
" 1958.3699 | \n",
" 317.51 | \n",
" 314.71 | \n",
" 317.86 | \n",
" 315.06 | \n",
" 317.51 | \n",
" 314.71 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Unnamed: 0 Excel [ppm] [ppm] \\\n",
"0 0 1958 1 21200 1958.0411 -99.99 -99.99 \n",
"1 1 1958 2 21231 1958.1260 -99.99 -99.99 \n",
"2 2 1958 3 21259 1958.2027 315.70 314.44 \n",
"3 3 1958 4 21290 1958.2877 317.45 315.16 \n",
"4 4 1958 5 21320 1958.3699 317.51 314.71 \n",
"\n",
" [ppm] [ppm] [ppm] [ppm].1 \n",
"0 -99.99 -99.99 -99.99 -99.99 \n",
"1 -99.99 -99.99 -99.99 -99.99 \n",
"2 316.19 314.91 315.70 314.44 \n",
"3 317.30 314.99 317.45 315.16 \n",
"4 317.86 315.06 317.51 314.71 "
]
},
"execution_count": 57,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Unnamed: 0 | \n",
" | \n",
" | \n",
" Excel | \n",
" | \n",
" [ppm] | \n",
" [ppm] | \n",
" [ppm] | \n",
" [ppm] | \n",
" [ppm] | \n",
" [ppm].1 | \n",
"
\n",
" \n",
" \n",
" \n",
" 751 | \n",
" 751 | \n",
" 2020 | \n",
" 8 | \n",
" 44058 | \n",
" 2020.6230 | \n",
" -99.99 | \n",
" -99.99 | \n",
" -99.99 | \n",
" -99.99 | \n",
" -99.99 | \n",
" -99.99 | \n",
"
\n",
" \n",
" 752 | \n",
" 752 | \n",
" 2020 | \n",
" 9 | \n",
" 44089 | \n",
" 2020.7077 | \n",
" -99.99 | \n",
" -99.99 | \n",
" -99.99 | \n",
" -99.99 | \n",
" -99.99 | \n",
" -99.99 | \n",
"
\n",
" \n",
" 753 | \n",
" 753 | \n",
" 2020 | \n",
" 10 | \n",
" 44119 | \n",
" 2020.7896 | \n",
" -99.99 | \n",
" -99.99 | \n",
" -99.99 | \n",
" -99.99 | \n",
" -99.99 | \n",
" -99.99 | \n",
"
\n",
" \n",
" 754 | \n",
" 754 | \n",
" 2020 | \n",
" 11 | \n",
" 44150 | \n",
" 2020.8743 | \n",
" -99.99 | \n",
" -99.99 | \n",
" -99.99 | \n",
" -99.99 | \n",
" -99.99 | \n",
" -99.99 | \n",
"
\n",
" \n",
" 755 | \n",
" 755 | \n",
" 2020 | \n",
" 12 | \n",
" 44180 | \n",
" 2020.9563 | \n",
" -99.99 | \n",
" -99.99 | \n",
" -99.99 | \n",
" -99.99 | \n",
" -99.99 | \n",
" -99.99 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Unnamed: 0 Excel [ppm] [ppm] \\\n",
"751 751 2020 8 44058 2020.6230 -99.99 -99.99 \n",
"752 752 2020 9 44089 2020.7077 -99.99 -99.99 \n",
"753 753 2020 10 44119 2020.7896 -99.99 -99.99 \n",
"754 754 2020 11 44150 2020.8743 -99.99 -99.99 \n",
"755 755 2020 12 44180 2020.9563 -99.99 -99.99 \n",
"\n",
" [ppm] [ppm] [ppm] [ppm].1 \n",
"751 -99.99 -99.99 -99.99 -99.99 \n",
"752 -99.99 -99.99 -99.99 -99.99 \n",
"753 -99.99 -99.99 -99.99 -99.99 \n",
"754 -99.99 -99.99 -99.99 -99.99 \n",
"755 -99.99 -99.99 -99.99 -99.99 "
]
},
"execution_count": 57,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"try:\n",
" raw_data = pd.read_csv(\"https://scrippsco.ucsd.edu/assets/data/atmospheric/stations/\"\n",
" \"in_situ_co2/monthly/monthly_in_situ_co2_mlo.csv\",skiprows=56)\n",
"except OSError as err:\n",
" print(\"Erreur lors du téléchargement : {0}\".format(err))\n",
" print(\"Nous téléchargeons les dernières données enregistrer sur notre PC\")\n",
" raw_data = pd.read_csv(\"monthly_in_situ_co2_mlo\")\n",
"except:\n",
" print(\"Unexpected error:\", sys.exc_info()[0])\n",
" raise\n",
"else:\n",
" raw_data.to_csv('monthly_in_situ_co2_mlo',index=True)\n",
"\n",
"raw_data.head(5)\n",
"raw_data.tail(5)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Nous voyons que les noms des colonnes ne sont pas très représentatives, nous modifions les noms des colonnes"
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['Unnamed: 0', ' ', ' ', ' Excel', ' ', ' [ppm]',\n",
" ' [ppm] ', ' [ppm]', ' [ppm]', ' [ppm]',\n",
" ' [ppm].1'],\n",
" dtype='object')"
]
},
"execution_count": 58,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Yr | \n",
" Mn | \n",
" Date 1 | \n",
" Date 2 | \n",
" s1 | \n",
" s2 | \n",
" s3 | \n",
" s4 | \n",
" s5 | \n",
" s6 | \n",
" s7 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0 | \n",
" 1958 | \n",
" 1 | \n",
" 21200 | \n",
" 1958.0411 | \n",
" -99.99 | \n",
" -99.99 | \n",
" -99.99 | \n",
" -99.99 | \n",
" -99.99 | \n",
" -99.99 | \n",
"
\n",
" \n",
" 1 | \n",
" 1 | \n",
" 1958 | \n",
" 2 | \n",
" 21231 | \n",
" 1958.1260 | \n",
" -99.99 | \n",
" -99.99 | \n",
" -99.99 | \n",
" -99.99 | \n",
" -99.99 | \n",
" -99.99 | \n",
"
\n",
" \n",
" 2 | \n",
" 2 | \n",
" 1958 | \n",
" 3 | \n",
" 21259 | \n",
" 1958.2027 | \n",
" 315.70 | \n",
" 314.44 | \n",
" 316.19 | \n",
" 314.91 | \n",
" 315.70 | \n",
" 314.44 | \n",
"
\n",
" \n",
" 3 | \n",
" 3 | \n",
" 1958 | \n",
" 4 | \n",
" 21290 | \n",
" 1958.2877 | \n",
" 317.45 | \n",
" 315.16 | \n",
" 317.30 | \n",
" 314.99 | \n",
" 317.45 | \n",
" 315.16 | \n",
"
\n",
" \n",
" 4 | \n",
" 4 | \n",
" 1958 | \n",
" 5 | \n",
" 21320 | \n",
" 1958.3699 | \n",
" 317.51 | \n",
" 314.71 | \n",
" 317.86 | \n",
" 315.06 | \n",
" 317.51 | \n",
" 314.71 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Yr Mn Date 1 Date 2 s1 s2 s3 s4 s5 \\\n",
"0 0 1958 1 21200 1958.0411 -99.99 -99.99 -99.99 -99.99 \n",
"1 1 1958 2 21231 1958.1260 -99.99 -99.99 -99.99 -99.99 \n",
"2 2 1958 3 21259 1958.2027 315.70 314.44 316.19 314.91 \n",
"3 3 1958 4 21290 1958.2877 317.45 315.16 317.30 314.99 \n",
"4 4 1958 5 21320 1958.3699 317.51 314.71 317.86 315.06 \n",
"\n",
" s6 s7 \n",
"0 -99.99 -99.99 \n",
"1 -99.99 -99.99 \n",
"2 315.70 314.44 \n",
"3 317.45 315.16 \n",
"4 317.51 314.71 "
]
},
"execution_count": 58,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"raw_data.columns \n",
"raw_data.columns = ['Yr','Mn','Date 1','Date 2','s1','s2','s3','s4','s5','s6','s7']\n",
"data = raw_data\n",
"data.head(5)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Les données vide sont représentés par la valeur -99.99, nous remplaçons cette valeur par une valeur plus adéquate NaN dans une autre DataFrame"
]
},
{
"cell_type": "code",
"execution_count": 59,
"metadata": {},
"outputs": [],
"source": [
"data = data.replace(-99.99,np.NaN);"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"On visualise les lignes dont une donnée colonne est manquante."
]
},
{
"cell_type": "code",
"execution_count": 63,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Yr | \n",
" Mn | \n",
" Date 1 | \n",
" Date 2 | \n",
" s1 | \n",
" s2 | \n",
" s3 | \n",
" s4 | \n",
" s5 | \n",
" s6 | \n",
" s7 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0 | \n",
" 1958 | \n",
" 1 | \n",
" 21200 | \n",
" 1958.0411 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 1 | \n",
" 1 | \n",
" 1958 | \n",
" 2 | \n",
" 21231 | \n",
" 1958.1260 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 5 | \n",
" 5 | \n",
" 1958 | \n",
" 6 | \n",
" 21351 | \n",
" 1958.4548 | \n",
" NaN | \n",
" NaN | \n",
" 317.24 | \n",
" 315.14 | \n",
" 317.24 | \n",
" 315.14 | \n",
"
\n",
" \n",
" 9 | \n",
" 9 | \n",
" 1958 | \n",
" 10 | \n",
" 21473 | \n",
" 1958.7890 | \n",
" NaN | \n",
" NaN | \n",
" 312.44 | \n",
" 315.40 | \n",
" 312.44 | \n",
" 315.40 | \n",
"
\n",
" \n",
" 73 | \n",
" 73 | \n",
" 1964 | \n",
" 2 | \n",
" 23422 | \n",
" 1964.1257 | \n",
" NaN | \n",
" NaN | \n",
" 320.01 | \n",
" 319.36 | \n",
" 320.01 | \n",
" 319.36 | \n",
"
\n",
" \n",
" 74 | \n",
" 74 | \n",
" 1964 | \n",
" 3 | \n",
" 23451 | \n",
" 1964.2049 | \n",
" NaN | \n",
" NaN | \n",
" 320.74 | \n",
" 319.41 | \n",
" 320.74 | \n",
" 319.41 | \n",
"
\n",
" \n",
" 75 | \n",
" 75 | \n",
" 1964 | \n",
" 4 | \n",
" 23482 | \n",
" 1964.2896 | \n",
" NaN | \n",
" NaN | \n",
" 321.83 | \n",
" 319.45 | \n",
" 321.83 | \n",
" 319.45 | \n",
"
\n",
" \n",
" 745 | \n",
" 745 | \n",
" 2020 | \n",
" 2 | \n",
" 43876 | \n",
" 2020.1257 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 746 | \n",
" 746 | \n",
" 2020 | \n",
" 3 | \n",
" 43905 | \n",
" 2020.2049 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 747 | \n",
" 747 | \n",
" 2020 | \n",
" 4 | \n",
" 43936 | \n",
" 2020.2896 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 748 | \n",
" 748 | \n",
" 2020 | \n",
" 5 | \n",
" 43966 | \n",
" 2020.3716 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 749 | \n",
" 749 | \n",
" 2020 | \n",
" 6 | \n",
" 43997 | \n",
" 2020.4563 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 750 | \n",
" 750 | \n",
" 2020 | \n",
" 7 | \n",
" 44027 | \n",
" 2020.5383 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 751 | \n",
" 751 | \n",
" 2020 | \n",
" 8 | \n",
" 44058 | \n",
" 2020.6230 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 752 | \n",
" 752 | \n",
" 2020 | \n",
" 9 | \n",
" 44089 | \n",
" 2020.7077 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 753 | \n",
" 753 | \n",
" 2020 | \n",
" 10 | \n",
" 44119 | \n",
" 2020.7896 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 754 | \n",
" 754 | \n",
" 2020 | \n",
" 11 | \n",
" 44150 | \n",
" 2020.8743 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 755 | \n",
" 755 | \n",
" 2020 | \n",
" 12 | \n",
" 44180 | \n",
" 2020.9563 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Yr Mn Date 1 Date 2 s1 s2 s3 s4 s5 s6 \\\n",
"0 0 1958 1 21200 1958.0411 NaN NaN NaN NaN NaN \n",
"1 1 1958 2 21231 1958.1260 NaN NaN NaN NaN NaN \n",
"5 5 1958 6 21351 1958.4548 NaN NaN 317.24 315.14 317.24 \n",
"9 9 1958 10 21473 1958.7890 NaN NaN 312.44 315.40 312.44 \n",
"73 73 1964 2 23422 1964.1257 NaN NaN 320.01 319.36 320.01 \n",
"74 74 1964 3 23451 1964.2049 NaN NaN 320.74 319.41 320.74 \n",
"75 75 1964 4 23482 1964.2896 NaN NaN 321.83 319.45 321.83 \n",
"745 745 2020 2 43876 2020.1257 NaN NaN NaN NaN NaN \n",
"746 746 2020 3 43905 2020.2049 NaN NaN NaN NaN NaN \n",
"747 747 2020 4 43936 2020.2896 NaN NaN NaN NaN NaN \n",
"748 748 2020 5 43966 2020.3716 NaN NaN NaN NaN NaN \n",
"749 749 2020 6 43997 2020.4563 NaN NaN NaN NaN NaN \n",
"750 750 2020 7 44027 2020.5383 NaN NaN NaN NaN NaN \n",
"751 751 2020 8 44058 2020.6230 NaN NaN NaN NaN NaN \n",
"752 752 2020 9 44089 2020.7077 NaN NaN NaN NaN NaN \n",
"753 753 2020 10 44119 2020.7896 NaN NaN NaN NaN NaN \n",
"754 754 2020 11 44150 2020.8743 NaN NaN NaN NaN NaN \n",
"755 755 2020 12 44180 2020.9563 NaN NaN NaN NaN NaN \n",
"\n",
" s7 \n",
"0 NaN \n",
"1 NaN \n",
"5 315.14 \n",
"9 315.40 \n",
"73 319.36 \n",
"74 319.41 \n",
"75 319.45 \n",
"745 NaN \n",
"746 NaN \n",
"747 NaN \n",
"748 NaN \n",
"749 NaN \n",
"750 NaN \n",
"751 NaN \n",
"752 NaN \n",
"753 NaN \n",
"754 NaN \n",
"755 NaN "
]
},
"execution_count": 63,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data[data.isnull().any(axis=1)]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"On ajoute un index ' période' à la DataFrame, cet index représente la période de mesure. \n",
"Cette date est mise dans au format compréhensible par pandas. On visualise toutes les lignes qui seront supprimées."
]
},
{
"cell_type": "code",
"execution_count": 64,
"metadata": {},
"outputs": [
{
"ename": "ValueError",
"evalue": "year 0 is out of range",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'period'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mdatetime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mm\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mm\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mzip\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'Yr'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'Mn'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_index\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'period'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdropna\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcopy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m(.0)\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'period'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mdatetime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mm\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mm\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mzip\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'Yr'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'Mn'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_index\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'period'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdropna\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcopy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mValueError\u001b[0m: year 0 is out of range"
]
}
],
"source": [
"data['period'] = [datetime.date(y,m,1) for y,m in zip(data['Yr'],data['Mn'])]\n",
"data = data.set_index('period') \n",
"data = data.dropna().copy()\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}