diff --git a/module3/exo3/exercice.ipynb b/module3/exo3/exercice.ipynb index 891ae0113ae411d5c4332191b1604ed82b225413..6674ee1f28d532e6fde71bf0a801d161d7fbcf7c 100644 --- a/module3/exo3/exercice.ipynb +++ b/module3/exo3/exercice.ipynb @@ -9,7 +9,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 56, "metadata": {}, "outputs": [], "source": [ @@ -32,7 +32,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 57, "metadata": {}, "outputs": [ { @@ -168,7 +168,7 @@ "4 317.86 315.06 317.51 314.71 " ] }, - "execution_count": 39, + "execution_count": 57, "metadata": {}, "output_type": "execute_result" }, @@ -297,7 +297,7 @@ "755 -99.99 -99.99 -99.99 -99.99 " ] }, - "execution_count": 39, + "execution_count": 57, "metadata": {}, "output_type": "execute_result" } @@ -320,9 +320,16 @@ "raw_data.tail(5)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Nous voyons que les noms des colonnes ne sont pas très représentatives, nous modifions les noms des colonnes" + ] + }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 58, "metadata": {}, "outputs": [ { @@ -334,29 +341,547 @@ " dtype='object')" ] }, - "execution_count": 43, + "execution_count": 58, "metadata": {}, "output_type": "execute_result" }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
YrMnDate 1Date 2s1s2s3s4s5s6s7
0019581212001958.0411-99.99-99.99-99.99-99.99-99.99-99.99
1119582212311958.1260-99.99-99.99-99.99-99.99-99.99-99.99
2219583212591958.2027315.70314.44316.19314.91315.70314.44
3319584212901958.2877317.45315.16317.30314.99317.45315.16
4419585213201958.3699317.51314.71317.86315.06317.51314.71
\n", + "
" + ], + "text/plain": [ + " Yr Mn Date 1 Date 2 s1 s2 s3 s4 s5 \\\n", + "0 0 1958 1 21200 1958.0411 -99.99 -99.99 -99.99 -99.99 \n", + "1 1 1958 2 21231 1958.1260 -99.99 -99.99 -99.99 -99.99 \n", + "2 2 1958 3 21259 1958.2027 315.70 314.44 316.19 314.91 \n", + "3 3 1958 4 21290 1958.2877 317.45 315.16 317.30 314.99 \n", + "4 4 1958 5 21320 1958.3699 317.51 314.71 317.86 315.06 \n", + "\n", + " s6 s7 \n", + "0 -99.99 -99.99 \n", + "1 -99.99 -99.99 \n", + "2 315.70 314.44 \n", + "3 317.45 315.16 \n", + "4 317.51 314.71 " + ] + }, + "execution_count": 58, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "raw_data.columns \n", + "raw_data.columns = ['Yr','Mn','Date 1','Date 2','s1','s2','s3','s4','s5','s6','s7']\n", + "data = raw_data\n", + "data.head(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Les données vide sont représentés par la valeur -99.99, nous remplaçons cette valeur par une valeur plus adéquate NaN dans une autre DataFrame" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": {}, + "outputs": [], + "source": [ + "data = data.replace(-99.99,np.NaN);" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "On visualise les lignes dont une donnée colonne est manquante." + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
YrMnDate 1Date 2s1s2s3s4s5s6s7
0019581212001958.0411NaNNaNNaNNaNNaNNaN
1119582212311958.1260NaNNaNNaNNaNNaNNaN
5519586213511958.4548NaNNaN317.24315.14317.24315.14
99195810214731958.7890NaNNaN312.44315.40312.44315.40
737319642234221964.1257NaNNaN320.01319.36320.01319.36
747419643234511964.2049NaNNaN320.74319.41320.74319.41
757519644234821964.2896NaNNaN321.83319.45321.83319.45
74574520202438762020.1257NaNNaNNaNNaNNaNNaN
74674620203439052020.2049NaNNaNNaNNaNNaNNaN
74774720204439362020.2896NaNNaNNaNNaNNaNNaN
74874820205439662020.3716NaNNaNNaNNaNNaNNaN
74974920206439972020.4563NaNNaNNaNNaNNaNNaN
75075020207440272020.5383NaNNaNNaNNaNNaNNaN
75175120208440582020.6230NaNNaNNaNNaNNaNNaN
75275220209440892020.7077NaNNaNNaNNaNNaNNaN
753753202010441192020.7896NaNNaNNaNNaNNaNNaN
754754202011441502020.8743NaNNaNNaNNaNNaNNaN
755755202012441802020.9563NaNNaNNaNNaNNaNNaN
\n", + "
" + ], + "text/plain": [ + " Yr Mn Date 1 Date 2 s1 s2 s3 s4 s5 s6 \\\n", + "0 0 1958 1 21200 1958.0411 NaN NaN NaN NaN NaN \n", + "1 1 1958 2 21231 1958.1260 NaN NaN NaN NaN NaN \n", + "5 5 1958 6 21351 1958.4548 NaN NaN 317.24 315.14 317.24 \n", + "9 9 1958 10 21473 1958.7890 NaN NaN 312.44 315.40 312.44 \n", + "73 73 1964 2 23422 1964.1257 NaN NaN 320.01 319.36 320.01 \n", + "74 74 1964 3 23451 1964.2049 NaN NaN 320.74 319.41 320.74 \n", + "75 75 1964 4 23482 1964.2896 NaN NaN 321.83 319.45 321.83 \n", + "745 745 2020 2 43876 2020.1257 NaN NaN NaN NaN NaN \n", + "746 746 2020 3 43905 2020.2049 NaN NaN NaN NaN NaN \n", + "747 747 2020 4 43936 2020.2896 NaN NaN NaN NaN NaN \n", + "748 748 2020 5 43966 2020.3716 NaN NaN NaN NaN NaN \n", + "749 749 2020 6 43997 2020.4563 NaN NaN NaN NaN NaN \n", + "750 750 2020 7 44027 2020.5383 NaN NaN NaN NaN NaN \n", + "751 751 2020 8 44058 2020.6230 NaN NaN NaN NaN NaN \n", + "752 752 2020 9 44089 2020.7077 NaN NaN NaN NaN NaN \n", + "753 753 2020 10 44119 2020.7896 NaN NaN NaN NaN NaN \n", + "754 754 2020 11 44150 2020.8743 NaN NaN NaN NaN NaN \n", + "755 755 2020 12 44180 2020.9563 NaN NaN NaN NaN NaN \n", + "\n", + " s7 \n", + "0 NaN \n", + "1 NaN \n", + "5 315.14 \n", + "9 315.40 \n", + "73 319.36 \n", + "74 319.41 \n", + "75 319.45 \n", + "745 NaN \n", + "746 NaN \n", + "747 NaN \n", + "748 NaN \n", + "749 NaN \n", + "750 NaN \n", + "751 NaN \n", + "752 NaN \n", + "753 NaN \n", + "754 NaN \n", + "755 NaN " + ] + }, + "execution_count": 63, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data[data.isnull().any(axis=1)]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "On ajoute un index ' période' à la DataFrame, cet index représente la période de mesure. \n", + "Cette date est mise dans au format compréhensible par pandas. On visualise toutes les lignes qui seront supprimées." + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": {}, + "outputs": [ { "ename": "ValueError", - "evalue": "Length mismatch: Expected axis has 11 elements, new values have 10 elements", + "evalue": "year 0 is out of range", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0mraw_data\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mraw_data\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m'Yr'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'Mn'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'Date 1'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'Date 2'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m's1'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m's2'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m's3'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m's4'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m's5'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m's6'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;32m/opt/conda/lib/python3.6/site-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36m__setattr__\u001b[0;34m(self, name, value)\u001b[0m\n\u001b[1;32m 3625\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3626\u001b[0m \u001b[0mobject\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__getattribute__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3627\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mobject\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__setattr__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3628\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mAttributeError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3629\u001b[0m \u001b[0;32mpass\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32mpandas/_libs/properties.pyx\u001b[0m in \u001b[0;36mpandas._libs.properties.AxisProperty.__set__\u001b[0;34m()\u001b[0m\n", - "\u001b[0;32m/opt/conda/lib/python3.6/site-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36m_set_axis\u001b[0;34m(self, axis, labels)\u001b[0m\n\u001b[1;32m 557\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 558\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_set_axis\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlabels\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 559\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_data\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_axis\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlabels\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 560\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_clear_item_cache\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 561\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/opt/conda/lib/python3.6/site-packages/pandas/core/internals.py\u001b[0m in \u001b[0;36mset_axis\u001b[0;34m(self, axis, new_labels)\u001b[0m\n\u001b[1;32m 3072\u001b[0m raise ValueError('Length mismatch: Expected axis has %d elements, '\n\u001b[1;32m 3073\u001b[0m \u001b[0;34m'new values have %d elements'\u001b[0m \u001b[0;34m%\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3074\u001b[0;31m (old_len, new_len))\n\u001b[0m\u001b[1;32m 3075\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3076\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0maxes\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnew_labels\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mValueError\u001b[0m: Length mismatch: Expected axis has 11 elements, new values have 10 elements" + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'period'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mdatetime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mm\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mm\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mzip\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'Yr'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'Mn'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_index\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'period'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdropna\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcopy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m(.0)\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'period'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mdatetime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mm\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mm\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mzip\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'Yr'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'Mn'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_index\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'period'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdropna\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcopy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mValueError\u001b[0m: year 0 is out of range" ] } ], "source": [ - "raw_data.columns \n", - "raw_data.columns = ['Yr','Mn','Date 1','Date 2','s1','s2','s3','s4','s5','s6']" + "data['period'] = [datetime.date(y,m,1) for y,m in zip(data['Yr'],data['Mn'])]\n", + "data = data.set_index('period') \n", + "data = data.dropna().copy()\n" ] }, {