diff --git a/module3/exo3/exercice.ipynb b/module3/exo3/exercice.ipynb
index 891ae0113ae411d5c4332191b1604ed82b225413..6674ee1f28d532e6fde71bf0a801d161d7fbcf7c 100644
--- a/module3/exo3/exercice.ipynb
+++ b/module3/exo3/exercice.ipynb
@@ -9,7 +9,7 @@
},
{
"cell_type": "code",
- "execution_count": 34,
+ "execution_count": 56,
"metadata": {},
"outputs": [],
"source": [
@@ -32,7 +32,7 @@
},
{
"cell_type": "code",
- "execution_count": 39,
+ "execution_count": 57,
"metadata": {},
"outputs": [
{
@@ -168,7 +168,7 @@
"4 317.86 315.06 317.51 314.71 "
]
},
- "execution_count": 39,
+ "execution_count": 57,
"metadata": {},
"output_type": "execute_result"
},
@@ -297,7 +297,7 @@
"755 -99.99 -99.99 -99.99 -99.99 "
]
},
- "execution_count": 39,
+ "execution_count": 57,
"metadata": {},
"output_type": "execute_result"
}
@@ -320,9 +320,16 @@
"raw_data.tail(5)"
]
},
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Nous voyons que les noms des colonnes ne sont pas très représentatives, nous modifions les noms des colonnes"
+ ]
+ },
{
"cell_type": "code",
- "execution_count": 43,
+ "execution_count": 58,
"metadata": {},
"outputs": [
{
@@ -334,29 +341,547 @@
" dtype='object')"
]
},
- "execution_count": 43,
+ "execution_count": 58,
"metadata": {},
"output_type": "execute_result"
},
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Yr | \n",
+ " Mn | \n",
+ " Date 1 | \n",
+ " Date 2 | \n",
+ " s1 | \n",
+ " s2 | \n",
+ " s3 | \n",
+ " s4 | \n",
+ " s5 | \n",
+ " s6 | \n",
+ " s7 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1958 | \n",
+ " 1 | \n",
+ " 21200 | \n",
+ " 1958.0411 | \n",
+ " -99.99 | \n",
+ " -99.99 | \n",
+ " -99.99 | \n",
+ " -99.99 | \n",
+ " -99.99 | \n",
+ " -99.99 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 1958 | \n",
+ " 2 | \n",
+ " 21231 | \n",
+ " 1958.1260 | \n",
+ " -99.99 | \n",
+ " -99.99 | \n",
+ " -99.99 | \n",
+ " -99.99 | \n",
+ " -99.99 | \n",
+ " -99.99 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 2 | \n",
+ " 1958 | \n",
+ " 3 | \n",
+ " 21259 | \n",
+ " 1958.2027 | \n",
+ " 315.70 | \n",
+ " 314.44 | \n",
+ " 316.19 | \n",
+ " 314.91 | \n",
+ " 315.70 | \n",
+ " 314.44 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 3 | \n",
+ " 1958 | \n",
+ " 4 | \n",
+ " 21290 | \n",
+ " 1958.2877 | \n",
+ " 317.45 | \n",
+ " 315.16 | \n",
+ " 317.30 | \n",
+ " 314.99 | \n",
+ " 317.45 | \n",
+ " 315.16 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 4 | \n",
+ " 1958 | \n",
+ " 5 | \n",
+ " 21320 | \n",
+ " 1958.3699 | \n",
+ " 317.51 | \n",
+ " 314.71 | \n",
+ " 317.86 | \n",
+ " 315.06 | \n",
+ " 317.51 | \n",
+ " 314.71 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Yr Mn Date 1 Date 2 s1 s2 s3 s4 s5 \\\n",
+ "0 0 1958 1 21200 1958.0411 -99.99 -99.99 -99.99 -99.99 \n",
+ "1 1 1958 2 21231 1958.1260 -99.99 -99.99 -99.99 -99.99 \n",
+ "2 2 1958 3 21259 1958.2027 315.70 314.44 316.19 314.91 \n",
+ "3 3 1958 4 21290 1958.2877 317.45 315.16 317.30 314.99 \n",
+ "4 4 1958 5 21320 1958.3699 317.51 314.71 317.86 315.06 \n",
+ "\n",
+ " s6 s7 \n",
+ "0 -99.99 -99.99 \n",
+ "1 -99.99 -99.99 \n",
+ "2 315.70 314.44 \n",
+ "3 317.45 315.16 \n",
+ "4 317.51 314.71 "
+ ]
+ },
+ "execution_count": 58,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "raw_data.columns \n",
+ "raw_data.columns = ['Yr','Mn','Date 1','Date 2','s1','s2','s3','s4','s5','s6','s7']\n",
+ "data = raw_data\n",
+ "data.head(5)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Les données vide sont représentés par la valeur -99.99, nous remplaçons cette valeur par une valeur plus adéquate NaN dans une autre DataFrame"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 59,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "data = data.replace(-99.99,np.NaN);"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "On visualise les lignes dont une donnée colonne est manquante."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 63,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Yr | \n",
+ " Mn | \n",
+ " Date 1 | \n",
+ " Date 2 | \n",
+ " s1 | \n",
+ " s2 | \n",
+ " s3 | \n",
+ " s4 | \n",
+ " s5 | \n",
+ " s6 | \n",
+ " s7 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1958 | \n",
+ " 1 | \n",
+ " 21200 | \n",
+ " 1958.0411 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 1958 | \n",
+ " 2 | \n",
+ " 21231 | \n",
+ " 1958.1260 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " 5 | \n",
+ " 1958 | \n",
+ " 6 | \n",
+ " 21351 | \n",
+ " 1958.4548 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 317.24 | \n",
+ " 315.14 | \n",
+ " 317.24 | \n",
+ " 315.14 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " 9 | \n",
+ " 1958 | \n",
+ " 10 | \n",
+ " 21473 | \n",
+ " 1958.7890 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 312.44 | \n",
+ " 315.40 | \n",
+ " 312.44 | \n",
+ " 315.40 | \n",
+ "
\n",
+ " \n",
+ " 73 | \n",
+ " 73 | \n",
+ " 1964 | \n",
+ " 2 | \n",
+ " 23422 | \n",
+ " 1964.1257 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 320.01 | \n",
+ " 319.36 | \n",
+ " 320.01 | \n",
+ " 319.36 | \n",
+ "
\n",
+ " \n",
+ " 74 | \n",
+ " 74 | \n",
+ " 1964 | \n",
+ " 3 | \n",
+ " 23451 | \n",
+ " 1964.2049 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 320.74 | \n",
+ " 319.41 | \n",
+ " 320.74 | \n",
+ " 319.41 | \n",
+ "
\n",
+ " \n",
+ " 75 | \n",
+ " 75 | \n",
+ " 1964 | \n",
+ " 4 | \n",
+ " 23482 | \n",
+ " 1964.2896 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 321.83 | \n",
+ " 319.45 | \n",
+ " 321.83 | \n",
+ " 319.45 | \n",
+ "
\n",
+ " \n",
+ " 745 | \n",
+ " 745 | \n",
+ " 2020 | \n",
+ " 2 | \n",
+ " 43876 | \n",
+ " 2020.1257 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 746 | \n",
+ " 746 | \n",
+ " 2020 | \n",
+ " 3 | \n",
+ " 43905 | \n",
+ " 2020.2049 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 747 | \n",
+ " 747 | \n",
+ " 2020 | \n",
+ " 4 | \n",
+ " 43936 | \n",
+ " 2020.2896 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 748 | \n",
+ " 748 | \n",
+ " 2020 | \n",
+ " 5 | \n",
+ " 43966 | \n",
+ " 2020.3716 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 749 | \n",
+ " 749 | \n",
+ " 2020 | \n",
+ " 6 | \n",
+ " 43997 | \n",
+ " 2020.4563 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 750 | \n",
+ " 750 | \n",
+ " 2020 | \n",
+ " 7 | \n",
+ " 44027 | \n",
+ " 2020.5383 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 751 | \n",
+ " 751 | \n",
+ " 2020 | \n",
+ " 8 | \n",
+ " 44058 | \n",
+ " 2020.6230 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 752 | \n",
+ " 752 | \n",
+ " 2020 | \n",
+ " 9 | \n",
+ " 44089 | \n",
+ " 2020.7077 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 753 | \n",
+ " 753 | \n",
+ " 2020 | \n",
+ " 10 | \n",
+ " 44119 | \n",
+ " 2020.7896 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 754 | \n",
+ " 754 | \n",
+ " 2020 | \n",
+ " 11 | \n",
+ " 44150 | \n",
+ " 2020.8743 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 755 | \n",
+ " 755 | \n",
+ " 2020 | \n",
+ " 12 | \n",
+ " 44180 | \n",
+ " 2020.9563 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Yr Mn Date 1 Date 2 s1 s2 s3 s4 s5 s6 \\\n",
+ "0 0 1958 1 21200 1958.0411 NaN NaN NaN NaN NaN \n",
+ "1 1 1958 2 21231 1958.1260 NaN NaN NaN NaN NaN \n",
+ "5 5 1958 6 21351 1958.4548 NaN NaN 317.24 315.14 317.24 \n",
+ "9 9 1958 10 21473 1958.7890 NaN NaN 312.44 315.40 312.44 \n",
+ "73 73 1964 2 23422 1964.1257 NaN NaN 320.01 319.36 320.01 \n",
+ "74 74 1964 3 23451 1964.2049 NaN NaN 320.74 319.41 320.74 \n",
+ "75 75 1964 4 23482 1964.2896 NaN NaN 321.83 319.45 321.83 \n",
+ "745 745 2020 2 43876 2020.1257 NaN NaN NaN NaN NaN \n",
+ "746 746 2020 3 43905 2020.2049 NaN NaN NaN NaN NaN \n",
+ "747 747 2020 4 43936 2020.2896 NaN NaN NaN NaN NaN \n",
+ "748 748 2020 5 43966 2020.3716 NaN NaN NaN NaN NaN \n",
+ "749 749 2020 6 43997 2020.4563 NaN NaN NaN NaN NaN \n",
+ "750 750 2020 7 44027 2020.5383 NaN NaN NaN NaN NaN \n",
+ "751 751 2020 8 44058 2020.6230 NaN NaN NaN NaN NaN \n",
+ "752 752 2020 9 44089 2020.7077 NaN NaN NaN NaN NaN \n",
+ "753 753 2020 10 44119 2020.7896 NaN NaN NaN NaN NaN \n",
+ "754 754 2020 11 44150 2020.8743 NaN NaN NaN NaN NaN \n",
+ "755 755 2020 12 44180 2020.9563 NaN NaN NaN NaN NaN \n",
+ "\n",
+ " s7 \n",
+ "0 NaN \n",
+ "1 NaN \n",
+ "5 315.14 \n",
+ "9 315.40 \n",
+ "73 319.36 \n",
+ "74 319.41 \n",
+ "75 319.45 \n",
+ "745 NaN \n",
+ "746 NaN \n",
+ "747 NaN \n",
+ "748 NaN \n",
+ "749 NaN \n",
+ "750 NaN \n",
+ "751 NaN \n",
+ "752 NaN \n",
+ "753 NaN \n",
+ "754 NaN \n",
+ "755 NaN "
+ ]
+ },
+ "execution_count": 63,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data[data.isnull().any(axis=1)]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "On ajoute un index ' période' à la DataFrame, cet index représente la période de mesure. \n",
+ "Cette date est mise dans au format compréhensible par pandas. On visualise toutes les lignes qui seront supprimées."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 64,
+ "metadata": {},
+ "outputs": [
{
"ename": "ValueError",
- "evalue": "Length mismatch: Expected axis has 11 elements, new values have 10 elements",
+ "evalue": "year 0 is out of range",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
- "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0mraw_data\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mraw_data\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m'Yr'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'Mn'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'Date 1'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'Date 2'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m's1'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m's2'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m's3'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m's4'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m's5'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m's6'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
- "\u001b[0;32m/opt/conda/lib/python3.6/site-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36m__setattr__\u001b[0;34m(self, name, value)\u001b[0m\n\u001b[1;32m 3625\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3626\u001b[0m \u001b[0mobject\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__getattribute__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3627\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mobject\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__setattr__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3628\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mAttributeError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3629\u001b[0m \u001b[0;32mpass\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32mpandas/_libs/properties.pyx\u001b[0m in \u001b[0;36mpandas._libs.properties.AxisProperty.__set__\u001b[0;34m()\u001b[0m\n",
- "\u001b[0;32m/opt/conda/lib/python3.6/site-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36m_set_axis\u001b[0;34m(self, axis, labels)\u001b[0m\n\u001b[1;32m 557\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 558\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_set_axis\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlabels\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 559\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_data\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_axis\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlabels\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 560\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_clear_item_cache\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 561\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32m/opt/conda/lib/python3.6/site-packages/pandas/core/internals.py\u001b[0m in \u001b[0;36mset_axis\u001b[0;34m(self, axis, new_labels)\u001b[0m\n\u001b[1;32m 3072\u001b[0m raise ValueError('Length mismatch: Expected axis has %d elements, '\n\u001b[1;32m 3073\u001b[0m \u001b[0;34m'new values have %d elements'\u001b[0m \u001b[0;34m%\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3074\u001b[0;31m (old_len, new_len))\n\u001b[0m\u001b[1;32m 3075\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3076\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0maxes\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnew_labels\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;31mValueError\u001b[0m: Length mismatch: Expected axis has 11 elements, new values have 10 elements"
+ "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'period'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mdatetime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mm\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mm\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mzip\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'Yr'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'Mn'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_index\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'period'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdropna\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcopy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+ "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m(.0)\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'period'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mdatetime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mm\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mm\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mzip\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'Yr'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'Mn'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_index\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'period'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdropna\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcopy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+ "\u001b[0;31mValueError\u001b[0m: year 0 is out of range"
]
}
],
"source": [
- "raw_data.columns \n",
- "raw_data.columns = ['Yr','Mn','Date 1','Date 2','s1','s2','s3','s4','s5','s6']"
+ "data['period'] = [datetime.date(y,m,1) for y,m in zip(data['Yr'],data['Mn'])]\n",
+ "data = data.set_index('period') \n",
+ "data = data.dropna().copy()\n"
]
},
{