no commit message

parent 5ed6218e
......@@ -9,7 +9,7 @@
},
{
"cell_type": "code",
"execution_count": 34,
"execution_count": 56,
"metadata": {},
"outputs": [],
"source": [
......@@ -32,7 +32,7 @@
},
{
"cell_type": "code",
"execution_count": 39,
"execution_count": 57,
"metadata": {},
"outputs": [
{
......@@ -168,7 +168,7 @@
"4 317.86 315.06 317.51 314.71 "
]
},
"execution_count": 39,
"execution_count": 57,
"metadata": {},
"output_type": "execute_result"
},
......@@ -297,7 +297,7 @@
"755 -99.99 -99.99 -99.99 -99.99 "
]
},
"execution_count": 39,
"execution_count": 57,
"metadata": {},
"output_type": "execute_result"
}
......@@ -320,9 +320,16 @@
"raw_data.tail(5)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Nous voyons que les noms des colonnes ne sont pas très représentatives, nous modifions les noms des colonnes"
]
},
{
"cell_type": "code",
"execution_count": 43,
"execution_count": 58,
"metadata": {},
"outputs": [
{
......@@ -334,29 +341,547 @@
" dtype='object')"
]
},
"execution_count": 43,
"execution_count": 58,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Yr</th>\n",
" <th>Mn</th>\n",
" <th>Date 1</th>\n",
" <th>Date 2</th>\n",
" <th>s1</th>\n",
" <th>s2</th>\n",
" <th>s3</th>\n",
" <th>s4</th>\n",
" <th>s5</th>\n",
" <th>s6</th>\n",
" <th>s7</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0</td>\n",
" <td>1958</td>\n",
" <td>1</td>\n",
" <td>21200</td>\n",
" <td>1958.0411</td>\n",
" <td>-99.99</td>\n",
" <td>-99.99</td>\n",
" <td>-99.99</td>\n",
" <td>-99.99</td>\n",
" <td>-99.99</td>\n",
" <td>-99.99</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>1958</td>\n",
" <td>2</td>\n",
" <td>21231</td>\n",
" <td>1958.1260</td>\n",
" <td>-99.99</td>\n",
" <td>-99.99</td>\n",
" <td>-99.99</td>\n",
" <td>-99.99</td>\n",
" <td>-99.99</td>\n",
" <td>-99.99</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2</td>\n",
" <td>1958</td>\n",
" <td>3</td>\n",
" <td>21259</td>\n",
" <td>1958.2027</td>\n",
" <td>315.70</td>\n",
" <td>314.44</td>\n",
" <td>316.19</td>\n",
" <td>314.91</td>\n",
" <td>315.70</td>\n",
" <td>314.44</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>3</td>\n",
" <td>1958</td>\n",
" <td>4</td>\n",
" <td>21290</td>\n",
" <td>1958.2877</td>\n",
" <td>317.45</td>\n",
" <td>315.16</td>\n",
" <td>317.30</td>\n",
" <td>314.99</td>\n",
" <td>317.45</td>\n",
" <td>315.16</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>4</td>\n",
" <td>1958</td>\n",
" <td>5</td>\n",
" <td>21320</td>\n",
" <td>1958.3699</td>\n",
" <td>317.51</td>\n",
" <td>314.71</td>\n",
" <td>317.86</td>\n",
" <td>315.06</td>\n",
" <td>317.51</td>\n",
" <td>314.71</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Yr Mn Date 1 Date 2 s1 s2 s3 s4 s5 \\\n",
"0 0 1958 1 21200 1958.0411 -99.99 -99.99 -99.99 -99.99 \n",
"1 1 1958 2 21231 1958.1260 -99.99 -99.99 -99.99 -99.99 \n",
"2 2 1958 3 21259 1958.2027 315.70 314.44 316.19 314.91 \n",
"3 3 1958 4 21290 1958.2877 317.45 315.16 317.30 314.99 \n",
"4 4 1958 5 21320 1958.3699 317.51 314.71 317.86 315.06 \n",
"\n",
" s6 s7 \n",
"0 -99.99 -99.99 \n",
"1 -99.99 -99.99 \n",
"2 315.70 314.44 \n",
"3 317.45 315.16 \n",
"4 317.51 314.71 "
]
},
"execution_count": 58,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"raw_data.columns \n",
"raw_data.columns = ['Yr','Mn','Date 1','Date 2','s1','s2','s3','s4','s5','s6','s7']\n",
"data = raw_data\n",
"data.head(5)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Les données vide sont représentés par la valeur -99.99, nous remplaçons cette valeur par une valeur plus adéquate NaN dans une autre DataFrame"
]
},
{
"cell_type": "code",
"execution_count": 59,
"metadata": {},
"outputs": [],
"source": [
"data = data.replace(-99.99,np.NaN);"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"On visualise les lignes dont une donnée colonne est manquante."
]
},
{
"cell_type": "code",
"execution_count": 63,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Yr</th>\n",
" <th>Mn</th>\n",
" <th>Date 1</th>\n",
" <th>Date 2</th>\n",
" <th>s1</th>\n",
" <th>s2</th>\n",
" <th>s3</th>\n",
" <th>s4</th>\n",
" <th>s5</th>\n",
" <th>s6</th>\n",
" <th>s7</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0</td>\n",
" <td>1958</td>\n",
" <td>1</td>\n",
" <td>21200</td>\n",
" <td>1958.0411</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>1958</td>\n",
" <td>2</td>\n",
" <td>21231</td>\n",
" <td>1958.1260</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>5</td>\n",
" <td>1958</td>\n",
" <td>6</td>\n",
" <td>21351</td>\n",
" <td>1958.4548</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>317.24</td>\n",
" <td>315.14</td>\n",
" <td>317.24</td>\n",
" <td>315.14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>9</td>\n",
" <td>1958</td>\n",
" <td>10</td>\n",
" <td>21473</td>\n",
" <td>1958.7890</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>312.44</td>\n",
" <td>315.40</td>\n",
" <td>312.44</td>\n",
" <td>315.40</td>\n",
" </tr>\n",
" <tr>\n",
" <th>73</th>\n",
" <td>73</td>\n",
" <td>1964</td>\n",
" <td>2</td>\n",
" <td>23422</td>\n",
" <td>1964.1257</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>320.01</td>\n",
" <td>319.36</td>\n",
" <td>320.01</td>\n",
" <td>319.36</td>\n",
" </tr>\n",
" <tr>\n",
" <th>74</th>\n",
" <td>74</td>\n",
" <td>1964</td>\n",
" <td>3</td>\n",
" <td>23451</td>\n",
" <td>1964.2049</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>320.74</td>\n",
" <td>319.41</td>\n",
" <td>320.74</td>\n",
" <td>319.41</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75</th>\n",
" <td>75</td>\n",
" <td>1964</td>\n",
" <td>4</td>\n",
" <td>23482</td>\n",
" <td>1964.2896</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>321.83</td>\n",
" <td>319.45</td>\n",
" <td>321.83</td>\n",
" <td>319.45</td>\n",
" </tr>\n",
" <tr>\n",
" <th>745</th>\n",
" <td>745</td>\n",
" <td>2020</td>\n",
" <td>2</td>\n",
" <td>43876</td>\n",
" <td>2020.1257</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>746</th>\n",
" <td>746</td>\n",
" <td>2020</td>\n",
" <td>3</td>\n",
" <td>43905</td>\n",
" <td>2020.2049</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>747</th>\n",
" <td>747</td>\n",
" <td>2020</td>\n",
" <td>4</td>\n",
" <td>43936</td>\n",
" <td>2020.2896</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>748</th>\n",
" <td>748</td>\n",
" <td>2020</td>\n",
" <td>5</td>\n",
" <td>43966</td>\n",
" <td>2020.3716</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>749</th>\n",
" <td>749</td>\n",
" <td>2020</td>\n",
" <td>6</td>\n",
" <td>43997</td>\n",
" <td>2020.4563</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>750</th>\n",
" <td>750</td>\n",
" <td>2020</td>\n",
" <td>7</td>\n",
" <td>44027</td>\n",
" <td>2020.5383</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>751</th>\n",
" <td>751</td>\n",
" <td>2020</td>\n",
" <td>8</td>\n",
" <td>44058</td>\n",
" <td>2020.6230</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>752</th>\n",
" <td>752</td>\n",
" <td>2020</td>\n",
" <td>9</td>\n",
" <td>44089</td>\n",
" <td>2020.7077</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>753</th>\n",
" <td>753</td>\n",
" <td>2020</td>\n",
" <td>10</td>\n",
" <td>44119</td>\n",
" <td>2020.7896</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>754</th>\n",
" <td>754</td>\n",
" <td>2020</td>\n",
" <td>11</td>\n",
" <td>44150</td>\n",
" <td>2020.8743</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>755</th>\n",
" <td>755</td>\n",
" <td>2020</td>\n",
" <td>12</td>\n",
" <td>44180</td>\n",
" <td>2020.9563</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Yr Mn Date 1 Date 2 s1 s2 s3 s4 s5 s6 \\\n",
"0 0 1958 1 21200 1958.0411 NaN NaN NaN NaN NaN \n",
"1 1 1958 2 21231 1958.1260 NaN NaN NaN NaN NaN \n",
"5 5 1958 6 21351 1958.4548 NaN NaN 317.24 315.14 317.24 \n",
"9 9 1958 10 21473 1958.7890 NaN NaN 312.44 315.40 312.44 \n",
"73 73 1964 2 23422 1964.1257 NaN NaN 320.01 319.36 320.01 \n",
"74 74 1964 3 23451 1964.2049 NaN NaN 320.74 319.41 320.74 \n",
"75 75 1964 4 23482 1964.2896 NaN NaN 321.83 319.45 321.83 \n",
"745 745 2020 2 43876 2020.1257 NaN NaN NaN NaN NaN \n",
"746 746 2020 3 43905 2020.2049 NaN NaN NaN NaN NaN \n",
"747 747 2020 4 43936 2020.2896 NaN NaN NaN NaN NaN \n",
"748 748 2020 5 43966 2020.3716 NaN NaN NaN NaN NaN \n",
"749 749 2020 6 43997 2020.4563 NaN NaN NaN NaN NaN \n",
"750 750 2020 7 44027 2020.5383 NaN NaN NaN NaN NaN \n",
"751 751 2020 8 44058 2020.6230 NaN NaN NaN NaN NaN \n",
"752 752 2020 9 44089 2020.7077 NaN NaN NaN NaN NaN \n",
"753 753 2020 10 44119 2020.7896 NaN NaN NaN NaN NaN \n",
"754 754 2020 11 44150 2020.8743 NaN NaN NaN NaN NaN \n",
"755 755 2020 12 44180 2020.9563 NaN NaN NaN NaN NaN \n",
"\n",
" s7 \n",
"0 NaN \n",
"1 NaN \n",
"5 315.14 \n",
"9 315.40 \n",
"73 319.36 \n",
"74 319.41 \n",
"75 319.45 \n",
"745 NaN \n",
"746 NaN \n",
"747 NaN \n",
"748 NaN \n",
"749 NaN \n",
"750 NaN \n",
"751 NaN \n",
"752 NaN \n",
"753 NaN \n",
"754 NaN \n",
"755 NaN "
]
},
"execution_count": 63,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data[data.isnull().any(axis=1)]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"On ajoute un index ' période' à la DataFrame, cet index représente la période de mesure. \n",
"Cette date est mise dans au format compréhensible par pandas. On visualise toutes les lignes qui seront supprimées."
]
},
{
"cell_type": "code",
"execution_count": 64,
"metadata": {},
"outputs": [
{
"ename": "ValueError",
"evalue": "Length mismatch: Expected axis has 11 elements, new values have 10 elements",
"evalue": "year 0 is out of range",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-43-13251efbef79>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0mraw_data\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mraw_data\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m'Yr'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'Mn'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'Date 1'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'Date 2'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m's1'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m's2'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m's3'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m's4'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m's5'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m's6'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m/opt/conda/lib/python3.6/site-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36m__setattr__\u001b[0;34m(self, name, value)\u001b[0m\n\u001b[1;32m 3625\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3626\u001b[0m \u001b[0mobject\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__getattribute__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3627\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mobject\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__setattr__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3628\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mAttributeError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3629\u001b[0m \u001b[0;32mpass\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32mpandas/_libs/properties.pyx\u001b[0m in \u001b[0;36mpandas._libs.properties.AxisProperty.__set__\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32m/opt/conda/lib/python3.6/site-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36m_set_axis\u001b[0;34m(self, axis, labels)\u001b[0m\n\u001b[1;32m 557\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 558\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_set_axis\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlabels\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 559\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_data\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_axis\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlabels\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 560\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_clear_item_cache\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 561\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/opt/conda/lib/python3.6/site-packages/pandas/core/internals.py\u001b[0m in \u001b[0;36mset_axis\u001b[0;34m(self, axis, new_labels)\u001b[0m\n\u001b[1;32m 3072\u001b[0m raise ValueError('Length mismatch: Expected axis has %d elements, '\n\u001b[1;32m 3073\u001b[0m \u001b[0;34m'new values have %d elements'\u001b[0m \u001b[0;34m%\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3074\u001b[0;31m (old_len, new_len))\n\u001b[0m\u001b[1;32m 3075\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3076\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0maxes\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnew_labels\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mValueError\u001b[0m: Length mismatch: Expected axis has 11 elements, new values have 10 elements"
"\u001b[0;32m<ipython-input-64-adf0f8c9cf82>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'period'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mdatetime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mm\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mm\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mzip\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'Yr'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'Mn'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_index\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'period'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdropna\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcopy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m<ipython-input-64-adf0f8c9cf82>\u001b[0m in \u001b[0;36m<listcomp>\u001b[0;34m(.0)\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'period'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mdatetime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mm\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mm\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mzip\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'Yr'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'Mn'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_index\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'period'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdropna\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcopy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mValueError\u001b[0m: year 0 is out of range"
]
}
],
"source": [
"raw_data.columns \n",
"raw_data.columns = ['Yr','Mn','Date 1','Date 2','s1','s2','s3','s4','s5','s6']"
"data['period'] = [datetime.date(y,m,1) for y,m in zip(data['Yr'],data['Mn'])]\n",
"data = data.set_index('period') \n",
"data = data.dropna().copy()\n"
]
},
{
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment