Update module3/exo1/analyse-syndrome-grippal.ipynb

parent dbf327a2
......@@ -9,16 +9,16 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"%matplotlib inline\n",
"import matplotlib.pyplot as plt\n",
"import pandas as pd\n",
"import os\n",
"import requests\n",
"import isoweek"
"import os"
"import requests"
]
},
{
......@@ -30,7 +30,7 @@
},
{
"cell_type": "code",
"execution_count": 19,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
......@@ -61,7 +61,7 @@
},
{
"cell_type": "code",
"execution_count": 20,
"execution_count": 3,
"metadata": {},
"outputs": [
{
......@@ -164,667 +164,260 @@
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>202451</td>\n",
" <td>3</td>\n",
" <td>201697</td>\n",
" <td>187843.0</td>\n",
" <td>215551.0</td>\n",
" <td>302</td>\n",
" <td>281.0</td>\n",
" <td>323.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>202450</td>\n",
" <td>3</td>\n",
" <td>136694</td>\n",
" <td>126369.0</td>\n",
" <td>147019.0</td>\n",
" <td>205</td>\n",
" <td>190.0</td>\n",
" <td>220.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>202449</td>\n",
" <th>2095</th>\n",
" <td>198448</td>\n",
" <td>3</td>\n",
" <td>108487</td>\n",
" <td>99037.0</td>\n",
" <td>117937.0</td>\n",
" <td>163</td>\n",
" <td>149.0</td>\n",
" <td>177.0</td>\n",
" <td>78620</td>\n",
" <td>60634.0</td>\n",
" <td>96606.0</td>\n",
" <td>143</td>\n",
" <td>110.0</td>\n",
" <td>176.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>202448</td>\n",
" <th>2096</th>\n",
" <td>198447</td>\n",
" <td>3</td>\n",
" <td>87381</td>\n",
" <td>78687.0</td>\n",
" <td>96075.0</td>\n",
" <td>72029</td>\n",
" <td>54274.0</td>\n",
" <td>89784.0</td>\n",
" <td>131</td>\n",
" <td>118.0</td>\n",
" <td>144.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>202447</td>\n",
" <td>3</td>\n",
" <td>76286</td>\n",
" <td>67626.0</td>\n",
" <td>84946.0</td>\n",
" <td>114</td>\n",
" <td>101.0</td>\n",
" <td>127.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>202446</td>\n",
" <td>3</td>\n",
" <td>56399</td>\n",
" <td>49006.0</td>\n",
" <td>63792.0</td>\n",
" <td>85</td>\n",
" <td>74.0</td>\n",
" <td>96.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>202445</td>\n",
" <td>3</td>\n",
" <td>47347</td>\n",
" <td>40843.0</td>\n",
" <td>53851.0</td>\n",
" <td>71</td>\n",
" <td>61.0</td>\n",
" <td>81.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>202444</td>\n",
" <td>3</td>\n",
" <td>36039</td>\n",
" <td>30122.0</td>\n",
" <td>41956.0</td>\n",
" <td>54</td>\n",
" <td>45.0</td>\n",
" <td>63.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>202443</td>\n",
" <td>3</td>\n",
" <td>46572</td>\n",
" <td>39928.0</td>\n",
" <td>53216.0</td>\n",
" <td>70</td>\n",
" <td>60.0</td>\n",
" <td>80.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>202442</td>\n",
" <td>3</td>\n",
" <td>67785</td>\n",
" <td>60009.0</td>\n",
" <td>75561.0</td>\n",
" <td>102</td>\n",
" <td>90.0</td>\n",
" <td>114.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>202441</td>\n",
" <td>3</td>\n",
" <td>79435</td>\n",
" <td>71386.0</td>\n",
" <td>87484.0</td>\n",
" <td>119</td>\n",
" <td>107.0</td>\n",
" <td>131.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>202440</td>\n",
" <td>3</td>\n",
" <td>84965</td>\n",
" <td>76555.0</td>\n",
" <td>93375.0</td>\n",
" <td>127</td>\n",
" <td>114.0</td>\n",
" <td>140.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>202439</td>\n",
" <td>3</td>\n",
" <td>91660</td>\n",
" <td>82937.0</td>\n",
" <td>100383.0</td>\n",
" <td>137</td>\n",
" <td>124.0</td>\n",
" <td>150.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>202438</td>\n",
" <td>3</td>\n",
" <td>91786</td>\n",
" <td>82903.0</td>\n",
" <td>100669.0</td>\n",
" <td>138</td>\n",
" <td>125.0</td>\n",
" <td>151.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>202437</td>\n",
" <td>3</td>\n",
" <td>56460</td>\n",
" <td>49319.0</td>\n",
" <td>63601.0</td>\n",
" <td>85</td>\n",
" <td>74.0</td>\n",
" <td>96.0</td>\n",
" <td>99.0</td>\n",
" <td>163.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>202436</td>\n",
" <th>2097</th>\n",
" <td>198446</td>\n",
" <td>3</td>\n",
" <td>33657</td>\n",
" <td>27906.0</td>\n",
" <td>39408.0</td>\n",
" <td>50</td>\n",
" <td>41.0</td>\n",
" <td>59.0</td>\n",
" <td>87330</td>\n",
" <td>67686.0</td>\n",
" <td>106974.0</td>\n",
" <td>159</td>\n",
" <td>123.0</td>\n",
" <td>195.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>202435</td>\n",
" <th>2098</th>\n",
" <td>198445</td>\n",
" <td>3</td>\n",
" <td>27404</td>\n",
" <td>22036.0</td>\n",
" <td>32772.0</td>\n",
" <td>41</td>\n",
" <td>33.0</td>\n",
" <td>49.0</td>\n",
" <td>135223</td>\n",
" <td>101414.0</td>\n",
" <td>169032.0</td>\n",
" <td>246</td>\n",
" <td>184.0</td>\n",
" <td>308.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>202434</td>\n",
" <th>2099</th>\n",
" <td>198444</td>\n",
" <td>3</td>\n",
" <td>26717</td>\n",
" <td>21003.0</td>\n",
" <td>32431.0</td>\n",
" <td>40</td>\n",
" <td>31.0</td>\n",
" <td>49.0</td>\n",
" <td>68422</td>\n",
" <td>20056.0</td>\n",
" <td>116788.0</td>\n",
" <td>125</td>\n",
" <td>37.0</td>\n",
" <td>213.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>202433</td>\n",
" <td>3</td>\n",
" <td>20623</td>\n",
" <td>15349.0</td>\n",
" <td>25897.0</td>\n",
" <td>31</td>\n",
" <td>23.0</td>\n",
" <td>39.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>202432</td>\n",
" <td>3</td>\n",
" <td>23187</td>\n",
" <td>17532.0</td>\n",
" <td>28842.0</td>\n",
" <td>35</td>\n",
" <td>27.0</td>\n",
" <td>43.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25</th>\n",
" <td>202431</td>\n",
" <td>3</td>\n",
" <td>26035</td>\n",
" <td>20267.0</td>\n",
" <td>31803.0</td>\n",
" <td>39</td>\n",
" <td>30.0</td>\n",
" <td>48.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26</th>\n",
" <td>202430</td>\n",
" <td>3</td>\n",
" <td>36393</td>\n",
" <td>28593.0</td>\n",
" <td>44193.0</td>\n",
" <td>55</td>\n",
" <td>43.0</td>\n",
" <td>67.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>202429</td>\n",
" <td>3</td>\n",
" <td>39560</td>\n",
" <td>32592.0</td>\n",
" <td>46528.0</td>\n",
" <td>59</td>\n",
" <td>49.0</td>\n",
" <td>69.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28</th>\n",
" <td>202428</td>\n",
" <td>3</td>\n",
" <td>54342</td>\n",
" <td>45781.0</td>\n",
" <td>62903.0</td>\n",
" <td>81</td>\n",
" <td>68.0</td>\n",
" <td>94.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>202427</td>\n",
" <td>3</td>\n",
" <td>47364</td>\n",
" <td>40234.0</td>\n",
" <td>54494.0</td>\n",
" <td>71</td>\n",
" <td>60.0</td>\n",
" <td>82.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2070</th>\n",
" <td>198521</td>\n",
" <td>3</td>\n",
" <td>26096</td>\n",
" <td>19621.0</td>\n",
" <td>32571.0</td>\n",
" <td>47</td>\n",
" <td>35.0</td>\n",
" <td>59.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2071</th>\n",
" <td>198520</td>\n",
" <td>3</td>\n",
" <td>27896</td>\n",
" <td>20885.0</td>\n",
" <td>34907.0</td>\n",
" <td>51</td>\n",
" <td>38.0</td>\n",
" <td>64.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2072</th>\n",
" <td>198519</td>\n",
" <td>3</td>\n",
" <td>43154</td>\n",
" <td>32821.0</td>\n",
" <td>53487.0</td>\n",
" <td>78</td>\n",
" <td>59.0</td>\n",
" <td>97.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2073</th>\n",
" <td>198518</td>\n",
" <td>3</td>\n",
" <td>40555</td>\n",
" <td>29935.0</td>\n",
" <td>51175.0</td>\n",
" <td>74</td>\n",
" <td>55.0</td>\n",
" <td>93.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2074</th>\n",
" <td>198517</td>\n",
" <td>3</td>\n",
" <td>34053</td>\n",
" <td>24366.0</td>\n",
" <td>43740.0</td>\n",
" <td>62</td>\n",
" <td>44.0</td>\n",
" <td>80.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2075</th>\n",
" <td>198516</td>\n",
" <td>3</td>\n",
" <td>50362</td>\n",
" <td>36451.0</td>\n",
" <td>64273.0</td>\n",
" <td>91</td>\n",
" <td>66.0</td>\n",
" <td>116.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2076</th>\n",
" <td>198515</td>\n",
" <td>3</td>\n",
" <td>63881</td>\n",
" <td>45538.0</td>\n",
" <td>82224.0</td>\n",
" <td>116</td>\n",
" <td>83.0</td>\n",
" <td>149.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2077</th>\n",
" <td>198514</td>\n",
" <td>3</td>\n",
" <td>134545</td>\n",
" <td>114400.0</td>\n",
" <td>154690.0</td>\n",
" <td>244</td>\n",
" <td>207.0</td>\n",
" <td>281.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2078</th>\n",
" <td>198513</td>\n",
" <td>3</td>\n",
" <td>197206</td>\n",
" <td>176080.0</td>\n",
" <td>218332.0</td>\n",
" <td>357</td>\n",
" <td>319.0</td>\n",
" <td>395.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2079</th>\n",
" <td>198512</td>\n",
" <td>3</td>\n",
" <td>245240</td>\n",
" <td>223304.0</td>\n",
" <td>267176.0</td>\n",
" <td>445</td>\n",
" <td>405.0</td>\n",
" <td>485.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2080</th>\n",
" <td>198511</td>\n",
" <td>3</td>\n",
" <td>276205</td>\n",
" <td>252399.0</td>\n",
" <td>300011.0</td>\n",
" <td>501</td>\n",
" <td>458.0</td>\n",
" <td>544.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2081</th>\n",
" <td>198510</td>\n",
" <td>3</td>\n",
" <td>353231</td>\n",
" <td>326279.0</td>\n",
" <td>380183.0</td>\n",
" <td>640</td>\n",
" <td>591.0</td>\n",
" <td>689.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2082</th>\n",
" <td>198509</td>\n",
" <td>3</td>\n",
" <td>369895</td>\n",
" <td>341109.0</td>\n",
" <td>398681.0</td>\n",
" <td>670</td>\n",
" <td>618.0</td>\n",
" <td>722.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2083</th>\n",
" <td>198508</td>\n",
" <td>3</td>\n",
" <td>389886</td>\n",
" <td>359529.0</td>\n",
" <td>420243.0</td>\n",
" <td>707</td>\n",
" <td>652.0</td>\n",
" <td>762.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2084</th>\n",
" <td>198507</td>\n",
" <td>3</td>\n",
" <td>471852</td>\n",
" <td>432599.0</td>\n",
" <td>511105.0</td>\n",
" <td>855</td>\n",
" <td>784.0</td>\n",
" <td>926.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2085</th>\n",
" <td>198506</td>\n",
" <td>3</td>\n",
" <td>565825</td>\n",
" <td>518011.0</td>\n",
" <td>613639.0</td>\n",
" <td>1026</td>\n",
" <td>939.0</td>\n",
" <td>1113.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2086</th>\n",
" <td>198505</td>\n",
" <td>3</td>\n",
" <td>637302</td>\n",
" <td>592795.0</td>\n",
" <td>681809.0</td>\n",
" <td>1155</td>\n",
" <td>1074.0</td>\n",
" <td>1236.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2087</th>\n",
" <td>198504</td>\n",
" <td>3</td>\n",
" <td>424937</td>\n",
" <td>390794.0</td>\n",
" <td>459080.0</td>\n",
" <td>770</td>\n",
" <td>708.0</td>\n",
" <td>832.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2088</th>\n",
" <td>198503</td>\n",
" <td>3</td>\n",
" <td>213901</td>\n",
" <td>174689.0</td>\n",
" <td>253113.0</td>\n",
" <td>388</td>\n",
" <td>317.0</td>\n",
" <td>459.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tbody>\n",
"</table>\n",
"<p>2100 rows × 10 columns</p>\n",
"</div>"
],
"text/plain": [
" week indicator inc inc_low inc_up inc100 inc100_low \\\n",
"0 202504 3 375118 356288.0 393948.0 560 532.0 \n",
"1 202503 3 253215 239337.0 267093.0 378 357.0 \n",
"2 202502 3 257247 242991.0 271503.0 384 363.0 \n",
"3 202501 3 231549 214627.0 248471.0 345 320.0 \n",
"4 202452 3 201726 185870.0 217582.0 302 278.0 \n",
"... ... ... ... ... ... ... ... \n",
"2095 198448 3 78620 60634.0 96606.0 143 110.0 \n",
"2096 198447 3 72029 54274.0 89784.0 131 99.0 \n",
"2097 198446 3 87330 67686.0 106974.0 159 123.0 \n",
"2098 198445 3 135223 101414.0 169032.0 246 184.0 \n",
"2099 198444 3 68422 20056.0 116788.0 125 37.0 \n",
"\n",
" inc100_up geo_insee geo_name \n",
"0 588.0 FR France \n",
"1 399.0 FR France \n",
"2 405.0 FR France \n",
"3 370.0 FR France \n",
"4 326.0 FR France \n",
"... ... ... ... \n",
"2095 176.0 FR France \n",
"2096 163.0 FR France \n",
"2097 195.0 FR France \n",
"2098 308.0 FR France \n",
"2099 213.0 FR France \n",
"\n",
"[2100 rows x 10 columns]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"raw_data = pd.read_csv(data_url, skiprows=1)\n",
"raw_data"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Modification du code pour utiliser le fichier local contenant les données :\n",
"1. Vérifie si une copie locale des données existe déjà\n",
"2. Si elle n'existe pas, télécharge les données depuis le Réseau Sentinelles"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Téléchargement des données et sauvegarde locale.\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>week</th>\n",
" <th>indicator</th>\n",
" <th>inc</th>\n",
" <th>inc_low</th>\n",
" <th>inc_up</th>\n",
" <th>inc100</th>\n",
" <th>inc100_low</th>\n",
" <th>inc100_up</th>\n",
" <th>geo_insee</th>\n",
" <th>geo_name</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2089</th>\n",
" <td>198502</td>\n",
" <th>0</th>\n",
" <td>202504</td>\n",
" <td>3</td>\n",
" <td>97586</td>\n",
" <td>80949.0</td>\n",
" <td>114223.0</td>\n",
" <td>177</td>\n",
" <td>147.0</td>\n",
" <td>207.0</td>\n",
" <td>375118</td>\n",
" <td>356288.0</td>\n",
" <td>393948.0</td>\n",
" <td>560</td>\n",
" <td>532.0</td>\n",
" <td>588.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2090</th>\n",
" <td>198501</td>\n",
" <th>1</th>\n",
" <td>202503</td>\n",
" <td>3</td>\n",
" <td>85489</td>\n",
" <td>65918.0</td>\n",
" <td>105060.0</td>\n",
" <td>155</td>\n",
" <td>120.0</td>\n",
" <td>190.0</td>\n",
" <td>253215</td>\n",
" <td>239337.0</td>\n",
" <td>267093.0</td>\n",
" <td>378</td>\n",
" <td>357.0</td>\n",
" <td>399.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2091</th>\n",
" <td>198452</td>\n",
" <th>2</th>\n",
" <td>202502</td>\n",
" <td>3</td>\n",
" <td>84830</td>\n",
" <td>60602.0</td>\n",
" <td>109058.0</td>\n",
" <td>154</td>\n",
" <td>110.0</td>\n",
" <td>198.0</td>\n",
" <td>257247</td>\n",
" <td>242991.0</td>\n",
" <td>271503.0</td>\n",
" <td>384</td>\n",
" <td>363.0</td>\n",
" <td>405.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2092</th>\n",
" <td>198451</td>\n",
" <th>3</th>\n",
" <td>202501</td>\n",
" <td>3</td>\n",
" <td>101726</td>\n",
" <td>80242.0</td>\n",
" <td>123210.0</td>\n",
" <td>185</td>\n",
" <td>146.0</td>\n",
" <td>224.0</td>\n",
" <td>231549</td>\n",
" <td>214627.0</td>\n",
" <td>248471.0</td>\n",
" <td>345</td>\n",
" <td>320.0</td>\n",
" <td>370.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2093</th>\n",
" <td>198450</td>\n",
" <th>4</th>\n",
" <td>202452</td>\n",
" <td>3</td>\n",
" <td>123680</td>\n",
" <td>101401.0</td>\n",
" <td>145959.0</td>\n",
" <td>225</td>\n",
" <td>184.0</td>\n",
" <td>266.0</td>\n",
" <td>201726</td>\n",
" <td>185870.0</td>\n",
" <td>217582.0</td>\n",
" <td>302</td>\n",
" <td>278.0</td>\n",
" <td>326.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2094</th>\n",
" <td>198449</td>\n",
" <td>3</td>\n",
" <td>101073</td>\n",
" <td>81684.0</td>\n",
" <td>120462.0</td>\n",
" <td>184</td>\n",
" <td>149.0</td>\n",
" <td>219.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2095</th>\n",
......@@ -903,57 +496,7 @@
"2 202502 3 257247 242991.0 271503.0 384 363.0 \n",
"3 202501 3 231549 214627.0 248471.0 345 320.0 \n",
"4 202452 3 201726 185870.0 217582.0 302 278.0 \n",
"5 202451 3 201697 187843.0 215551.0 302 281.0 \n",
"6 202450 3 136694 126369.0 147019.0 205 190.0 \n",
"7 202449 3 108487 99037.0 117937.0 163 149.0 \n",
"8 202448 3 87381 78687.0 96075.0 131 118.0 \n",
"9 202447 3 76286 67626.0 84946.0 114 101.0 \n",
"10 202446 3 56399 49006.0 63792.0 85 74.0 \n",
"11 202445 3 47347 40843.0 53851.0 71 61.0 \n",
"12 202444 3 36039 30122.0 41956.0 54 45.0 \n",
"13 202443 3 46572 39928.0 53216.0 70 60.0 \n",
"14 202442 3 67785 60009.0 75561.0 102 90.0 \n",
"15 202441 3 79435 71386.0 87484.0 119 107.0 \n",
"16 202440 3 84965 76555.0 93375.0 127 114.0 \n",
"17 202439 3 91660 82937.0 100383.0 137 124.0 \n",
"18 202438 3 91786 82903.0 100669.0 138 125.0 \n",
"19 202437 3 56460 49319.0 63601.0 85 74.0 \n",
"20 202436 3 33657 27906.0 39408.0 50 41.0 \n",
"21 202435 3 27404 22036.0 32772.0 41 33.0 \n",
"22 202434 3 26717 21003.0 32431.0 40 31.0 \n",
"23 202433 3 20623 15349.0 25897.0 31 23.0 \n",
"24 202432 3 23187 17532.0 28842.0 35 27.0 \n",
"25 202431 3 26035 20267.0 31803.0 39 30.0 \n",
"26 202430 3 36393 28593.0 44193.0 55 43.0 \n",
"27 202429 3 39560 32592.0 46528.0 59 49.0 \n",
"28 202428 3 54342 45781.0 62903.0 81 68.0 \n",
"29 202427 3 47364 40234.0 54494.0 71 60.0 \n",
"... ... ... ... ... ... ... ... \n",
"2070 198521 3 26096 19621.0 32571.0 47 35.0 \n",
"2071 198520 3 27896 20885.0 34907.0 51 38.0 \n",
"2072 198519 3 43154 32821.0 53487.0 78 59.0 \n",
"2073 198518 3 40555 29935.0 51175.0 74 55.0 \n",
"2074 198517 3 34053 24366.0 43740.0 62 44.0 \n",
"2075 198516 3 50362 36451.0 64273.0 91 66.0 \n",
"2076 198515 3 63881 45538.0 82224.0 116 83.0 \n",
"2077 198514 3 134545 114400.0 154690.0 244 207.0 \n",
"2078 198513 3 197206 176080.0 218332.0 357 319.0 \n",
"2079 198512 3 245240 223304.0 267176.0 445 405.0 \n",
"2080 198511 3 276205 252399.0 300011.0 501 458.0 \n",
"2081 198510 3 353231 326279.0 380183.0 640 591.0 \n",
"2082 198509 3 369895 341109.0 398681.0 670 618.0 \n",
"2083 198508 3 389886 359529.0 420243.0 707 652.0 \n",
"2084 198507 3 471852 432599.0 511105.0 855 784.0 \n",
"2085 198506 3 565825 518011.0 613639.0 1026 939.0 \n",
"2086 198505 3 637302 592795.0 681809.0 1155 1074.0 \n",
"2087 198504 3 424937 390794.0 459080.0 770 708.0 \n",
"2088 198503 3 213901 174689.0 253113.0 388 317.0 \n",
"2089 198502 3 97586 80949.0 114223.0 177 147.0 \n",
"2090 198501 3 85489 65918.0 105060.0 155 120.0 \n",
"2091 198452 3 84830 60602.0 109058.0 154 110.0 \n",
"2092 198451 3 101726 80242.0 123210.0 185 146.0 \n",
"2093 198450 3 123680 101401.0 145959.0 225 184.0 \n",
"2094 198449 3 101073 81684.0 120462.0 184 149.0 \n",
"2095 198448 3 78620 60634.0 96606.0 143 110.0 \n",
"2096 198447 3 72029 54274.0 89784.0 131 99.0 \n",
"2097 198446 3 87330 67686.0 106974.0 159 123.0 \n",
......@@ -966,57 +509,7 @@
"2 405.0 FR France \n",
"3 370.0 FR France \n",
"4 326.0 FR France \n",
"5 323.0 FR France \n",
"6 220.0 FR France \n",
"7 177.0 FR France \n",
"8 144.0 FR France \n",
"9 127.0 FR France \n",
"10 96.0 FR France \n",
"11 81.0 FR France \n",
"12 63.0 FR France \n",
"13 80.0 FR France \n",
"14 114.0 FR France \n",
"15 131.0 FR France \n",
"16 140.0 FR France \n",
"17 150.0 FR France \n",
"18 151.0 FR France \n",
"19 96.0 FR France \n",
"20 59.0 FR France \n",
"21 49.0 FR France \n",
"22 49.0 FR France \n",
"23 39.0 FR France \n",
"24 43.0 FR France \n",
"25 48.0 FR France \n",
"26 67.0 FR France \n",
"27 69.0 FR France \n",
"28 94.0 FR France \n",
"29 82.0 FR France \n",
"... ... ... ... \n",
"2070 59.0 FR France \n",
"2071 64.0 FR France \n",
"2072 97.0 FR France \n",
"2073 93.0 FR France \n",
"2074 80.0 FR France \n",
"2075 116.0 FR France \n",
"2076 149.0 FR France \n",
"2077 281.0 FR France \n",
"2078 395.0 FR France \n",
"2079 485.0 FR France \n",
"2080 544.0 FR France \n",
"2081 689.0 FR France \n",
"2082 722.0 FR France \n",
"2083 762.0 FR France \n",
"2084 926.0 FR France \n",
"2085 1113.0 FR France \n",
"2086 1236.0 FR France \n",
"2087 832.0 FR France \n",
"2088 459.0 FR France \n",
"2089 207.0 FR France \n",
"2090 190.0 FR France \n",
"2091 198.0 FR France \n",
"2092 224.0 FR France \n",
"2093 266.0 FR France \n",
"2094 219.0 FR France \n",
"2095 176.0 FR France \n",
"2096 163.0 FR France \n",
"2097 195.0 FR France \n",
......@@ -1026,59 +519,23 @@
"[2100 rows x 10 columns]"
]
},
"execution_count": 20,
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"raw_data = pd.read_csv(data_url, skiprows=1)\n",
"raw_data"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Modification du code pour vérifier si une copie locale des données existe et pour les télécharger si elles n'existent pas :"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"ename": "ParserError",
"evalue": "Error tokenizing data. C error: Expected 1 fields in line 30, saw 21\n",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mParserError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-5-df5ef4b63c35>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mraw_data\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread_csv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'https://app-learninglab.inria.fr/moocrr/gitlab/5212fa3d0a7441c34b57f854081c7450/mooc-rr/blob/master/module3/exo1/inc-25-PAY.csv'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencoding\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'iso-8859-1'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mskiprows\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mraw_data\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/opt/conda/lib/python3.6/site-packages/pandas/io/parsers.py\u001b[0m in \u001b[0;36mparser_f\u001b[0;34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, escapechar, comment, encoding, dialect, tupleize_cols, error_bad_lines, warn_bad_lines, skipfooter, skip_footer, doublequote, delim_whitespace, as_recarray, compact_ints, use_unsigned, low_memory, buffer_lines, memory_map, float_precision)\u001b[0m\n\u001b[1;32m 707\u001b[0m skip_blank_lines=skip_blank_lines)\n\u001b[1;32m 708\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 709\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_read\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfilepath_or_buffer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 710\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 711\u001b[0m \u001b[0mparser_f\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__name__\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/opt/conda/lib/python3.6/site-packages/pandas/io/parsers.py\u001b[0m in \u001b[0;36m_read\u001b[0;34m(filepath_or_buffer, kwds)\u001b[0m\n\u001b[1;32m 453\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 454\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 455\u001b[0;31m \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mparser\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnrows\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 456\u001b[0m \u001b[0;32mfinally\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 457\u001b[0m \u001b[0mparser\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclose\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/opt/conda/lib/python3.6/site-packages/pandas/io/parsers.py\u001b[0m in \u001b[0;36mread\u001b[0;34m(self, nrows)\u001b[0m\n\u001b[1;32m 1067\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'skipfooter not supported for iteration'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1068\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1069\u001b[0;31m \u001b[0mret\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnrows\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1070\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1071\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moptions\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'as_recarray'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/opt/conda/lib/python3.6/site-packages/pandas/io/parsers.py\u001b[0m in \u001b[0;36mread\u001b[0;34m(self, nrows)\u001b[0m\n\u001b[1;32m 1837\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnrows\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1838\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1839\u001b[0;31m \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_reader\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnrows\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1840\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mStopIteration\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1841\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_first_chunk\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32mpandas/_libs/parsers.pyx\u001b[0m in \u001b[0;36mpandas._libs.parsers.TextReader.read\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32mpandas/_libs/parsers.pyx\u001b[0m in \u001b[0;36mpandas._libs.parsers.TextReader._read_low_memory\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32mpandas/_libs/parsers.pyx\u001b[0m in \u001b[0;36mpandas._libs.parsers.TextReader._read_rows\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32mpandas/_libs/parsers.pyx\u001b[0m in \u001b[0;36mpandas._libs.parsers.TextReader._tokenize_rows\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32mpandas/_libs/parsers.pyx\u001b[0m in \u001b[0;36mpandas._libs.parsers.raise_parser_error\u001b[0;34m()\u001b[0m\n",
"\u001b[0;31mParserError\u001b[0m: Error tokenizing data. C error: Expected 1 fields in line 30, saw 21\n"
]
}
],
"source": [
"file_path = "https://app-learninglab.inria.fr/moocrr/gitlab/5212fa3d0a7441c34b57f854081c7450/mooc-rr/blob/master/module3/exo1/inc-25-PAY.csv" \n",
"file_path = r\"C:\\Users\\33612\\Downloads\\inc-25-PAY.csv\"\n",
"\n",
"if not os.path.exists(file_path):\n",
"response = requests.get(data_url)\n",
"with open(file_path, "wb") as file:\n",
" file.write(response.content)\n",
"print("Téléchargement des données et sauvegarde locale.")\n",
" response = requests.get(data_url)\n",
" with open(file_path, \"wb\") as file:\n",
" file.write(response.content)\n",
" print(\"Téléchargement des données et sauvegarde locale.\")\n",
"else:\n",
" print("Données locales déjà existantes.")\n",
"raw_data = pd.read_csv('https://app-learninglab.inria.fr/moocrr/gitlab/5212fa3d0a7441c34b57f854081c7450/mooc-rr/blob/master/module3/exo1/inc-25-PAY.csv', encoding = 'iso-8859-1', skiprows=1)\n",
" print(\"Données locales déjà existantes.\")\n",
"\n",
"raw_data = pd.read_csv(file_path, encoding = 'iso-8859-1', skiprows=1)\n",
"raw_data"
]
},
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment