From 73d4f4acd715cf42149b5d9b35901c603f79d985 Mon Sep 17 00:00:00 2001
From: 5212fa3d0a7441c34b57f854081c7450
<5212fa3d0a7441c34b57f854081c7450@app-learninglab.inria.fr>
Date: Sun, 2 Feb 2025 09:59:17 +0000
Subject: [PATCH] Update module3/exo1/analyse-syndrome-grippal.ipynb
---
module3/exo1/analyse-syndrome-grippal.ipynb | 973 +++++---------------
1 file changed, 215 insertions(+), 758 deletions(-)
diff --git a/module3/exo1/analyse-syndrome-grippal.ipynb b/module3/exo1/analyse-syndrome-grippal.ipynb
index c9f40a7..f497894 100644
--- a/module3/exo1/analyse-syndrome-grippal.ipynb
+++ b/module3/exo1/analyse-syndrome-grippal.ipynb
@@ -9,16 +9,16 @@
},
{
"cell_type": "code",
- "execution_count": 18,
+ "execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"%matplotlib inline\n",
"import matplotlib.pyplot as plt\n",
"import pandas as pd\n",
+ "import os\n",
+ "import requests\n",
"import isoweek"
- "import os"
- "import requests"
]
},
{
@@ -30,7 +30,7 @@
},
{
"cell_type": "code",
- "execution_count": 19,
+ "execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
@@ -61,7 +61,7 @@
},
{
"cell_type": "code",
- "execution_count": 20,
+ "execution_count": 3,
"metadata": {},
"outputs": [
{
@@ -164,667 +164,260 @@
"
France | \n",
" \n",
" \n",
- " 5 | \n",
- " 202451 | \n",
- " 3 | \n",
- " 201697 | \n",
- " 187843.0 | \n",
- " 215551.0 | \n",
- " 302 | \n",
- " 281.0 | \n",
- " 323.0 | \n",
- " FR | \n",
- " France | \n",
- "
\n",
- " \n",
- " 6 | \n",
- " 202450 | \n",
- " 3 | \n",
- " 136694 | \n",
- " 126369.0 | \n",
- " 147019.0 | \n",
- " 205 | \n",
- " 190.0 | \n",
- " 220.0 | \n",
- " FR | \n",
- " France | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
"
\n",
" \n",
- " 7 | \n",
- " 202449 | \n",
+ " 2095 | \n",
+ " 198448 | \n",
" 3 | \n",
- " 108487 | \n",
- " 99037.0 | \n",
- " 117937.0 | \n",
- " 163 | \n",
- " 149.0 | \n",
- " 177.0 | \n",
+ " 78620 | \n",
+ " 60634.0 | \n",
+ " 96606.0 | \n",
+ " 143 | \n",
+ " 110.0 | \n",
+ " 176.0 | \n",
" FR | \n",
" France | \n",
"
\n",
" \n",
- " 8 | \n",
- " 202448 | \n",
+ " 2096 | \n",
+ " 198447 | \n",
" 3 | \n",
- " 87381 | \n",
- " 78687.0 | \n",
- " 96075.0 | \n",
+ " 72029 | \n",
+ " 54274.0 | \n",
+ " 89784.0 | \n",
" 131 | \n",
- " 118.0 | \n",
- " 144.0 | \n",
- " FR | \n",
- " France | \n",
- "
\n",
- " \n",
- " 9 | \n",
- " 202447 | \n",
- " 3 | \n",
- " 76286 | \n",
- " 67626.0 | \n",
- " 84946.0 | \n",
- " 114 | \n",
- " 101.0 | \n",
- " 127.0 | \n",
- " FR | \n",
- " France | \n",
- "
\n",
- " \n",
- " 10 | \n",
- " 202446 | \n",
- " 3 | \n",
- " 56399 | \n",
- " 49006.0 | \n",
- " 63792.0 | \n",
- " 85 | \n",
- " 74.0 | \n",
- " 96.0 | \n",
- " FR | \n",
- " France | \n",
- "
\n",
- " \n",
- " 11 | \n",
- " 202445 | \n",
- " 3 | \n",
- " 47347 | \n",
- " 40843.0 | \n",
- " 53851.0 | \n",
- " 71 | \n",
- " 61.0 | \n",
- " 81.0 | \n",
- " FR | \n",
- " France | \n",
- "
\n",
- " \n",
- " 12 | \n",
- " 202444 | \n",
- " 3 | \n",
- " 36039 | \n",
- " 30122.0 | \n",
- " 41956.0 | \n",
- " 54 | \n",
- " 45.0 | \n",
- " 63.0 | \n",
- " FR | \n",
- " France | \n",
- "
\n",
- " \n",
- " 13 | \n",
- " 202443 | \n",
- " 3 | \n",
- " 46572 | \n",
- " 39928.0 | \n",
- " 53216.0 | \n",
- " 70 | \n",
- " 60.0 | \n",
- " 80.0 | \n",
- " FR | \n",
- " France | \n",
- "
\n",
- " \n",
- " 14 | \n",
- " 202442 | \n",
- " 3 | \n",
- " 67785 | \n",
- " 60009.0 | \n",
- " 75561.0 | \n",
- " 102 | \n",
- " 90.0 | \n",
- " 114.0 | \n",
- " FR | \n",
- " France | \n",
- "
\n",
- " \n",
- " 15 | \n",
- " 202441 | \n",
- " 3 | \n",
- " 79435 | \n",
- " 71386.0 | \n",
- " 87484.0 | \n",
- " 119 | \n",
- " 107.0 | \n",
- " 131.0 | \n",
- " FR | \n",
- " France | \n",
- "
\n",
- " \n",
- " 16 | \n",
- " 202440 | \n",
- " 3 | \n",
- " 84965 | \n",
- " 76555.0 | \n",
- " 93375.0 | \n",
- " 127 | \n",
- " 114.0 | \n",
- " 140.0 | \n",
- " FR | \n",
- " France | \n",
- "
\n",
- " \n",
- " 17 | \n",
- " 202439 | \n",
- " 3 | \n",
- " 91660 | \n",
- " 82937.0 | \n",
- " 100383.0 | \n",
- " 137 | \n",
- " 124.0 | \n",
- " 150.0 | \n",
- " FR | \n",
- " France | \n",
- "
\n",
- " \n",
- " 18 | \n",
- " 202438 | \n",
- " 3 | \n",
- " 91786 | \n",
- " 82903.0 | \n",
- " 100669.0 | \n",
- " 138 | \n",
- " 125.0 | \n",
- " 151.0 | \n",
- " FR | \n",
- " France | \n",
- "
\n",
- " \n",
- " 19 | \n",
- " 202437 | \n",
- " 3 | \n",
- " 56460 | \n",
- " 49319.0 | \n",
- " 63601.0 | \n",
- " 85 | \n",
- " 74.0 | \n",
- " 96.0 | \n",
+ " 99.0 | \n",
+ " 163.0 | \n",
" FR | \n",
" France | \n",
"
\n",
" \n",
- " 20 | \n",
- " 202436 | \n",
+ " 2097 | \n",
+ " 198446 | \n",
" 3 | \n",
- " 33657 | \n",
- " 27906.0 | \n",
- " 39408.0 | \n",
- " 50 | \n",
- " 41.0 | \n",
- " 59.0 | \n",
+ " 87330 | \n",
+ " 67686.0 | \n",
+ " 106974.0 | \n",
+ " 159 | \n",
+ " 123.0 | \n",
+ " 195.0 | \n",
" FR | \n",
" France | \n",
"
\n",
" \n",
- " 21 | \n",
- " 202435 | \n",
+ " 2098 | \n",
+ " 198445 | \n",
" 3 | \n",
- " 27404 | \n",
- " 22036.0 | \n",
- " 32772.0 | \n",
- " 41 | \n",
- " 33.0 | \n",
- " 49.0 | \n",
+ " 135223 | \n",
+ " 101414.0 | \n",
+ " 169032.0 | \n",
+ " 246 | \n",
+ " 184.0 | \n",
+ " 308.0 | \n",
" FR | \n",
" France | \n",
"
\n",
" \n",
- " 22 | \n",
- " 202434 | \n",
+ " 2099 | \n",
+ " 198444 | \n",
" 3 | \n",
- " 26717 | \n",
- " 21003.0 | \n",
- " 32431.0 | \n",
- " 40 | \n",
- " 31.0 | \n",
- " 49.0 | \n",
+ " 68422 | \n",
+ " 20056.0 | \n",
+ " 116788.0 | \n",
+ " 125 | \n",
+ " 37.0 | \n",
+ " 213.0 | \n",
" FR | \n",
" France | \n",
"
\n",
- " \n",
- " 23 | \n",
- " 202433 | \n",
- " 3 | \n",
- " 20623 | \n",
- " 15349.0 | \n",
- " 25897.0 | \n",
- " 31 | \n",
- " 23.0 | \n",
- " 39.0 | \n",
- " FR | \n",
- " France | \n",
- "
\n",
- " \n",
- " 24 | \n",
- " 202432 | \n",
- " 3 | \n",
- " 23187 | \n",
- " 17532.0 | \n",
- " 28842.0 | \n",
- " 35 | \n",
- " 27.0 | \n",
- " 43.0 | \n",
- " FR | \n",
- " France | \n",
- "
\n",
- " \n",
- " 25 | \n",
- " 202431 | \n",
- " 3 | \n",
- " 26035 | \n",
- " 20267.0 | \n",
- " 31803.0 | \n",
- " 39 | \n",
- " 30.0 | \n",
- " 48.0 | \n",
- " FR | \n",
- " France | \n",
- "
\n",
- " \n",
- " 26 | \n",
- " 202430 | \n",
- " 3 | \n",
- " 36393 | \n",
- " 28593.0 | \n",
- " 44193.0 | \n",
- " 55 | \n",
- " 43.0 | \n",
- " 67.0 | \n",
- " FR | \n",
- " France | \n",
- "
\n",
- " \n",
- " 27 | \n",
- " 202429 | \n",
- " 3 | \n",
- " 39560 | \n",
- " 32592.0 | \n",
- " 46528.0 | \n",
- " 59 | \n",
- " 49.0 | \n",
- " 69.0 | \n",
- " FR | \n",
- " France | \n",
- "
\n",
- " \n",
- " 28 | \n",
- " 202428 | \n",
- " 3 | \n",
- " 54342 | \n",
- " 45781.0 | \n",
- " 62903.0 | \n",
- " 81 | \n",
- " 68.0 | \n",
- " 94.0 | \n",
- " FR | \n",
- " France | \n",
- "
\n",
- " \n",
- " 29 | \n",
- " 202427 | \n",
- " 3 | \n",
- " 47364 | \n",
- " 40234.0 | \n",
- " 54494.0 | \n",
- " 71 | \n",
- " 60.0 | \n",
- " 82.0 | \n",
- " FR | \n",
- " France | \n",
- "
\n",
- " \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " 2070 | \n",
- " 198521 | \n",
- " 3 | \n",
- " 26096 | \n",
- " 19621.0 | \n",
- " 32571.0 | \n",
- " 47 | \n",
- " 35.0 | \n",
- " 59.0 | \n",
- " FR | \n",
- " France | \n",
- "
\n",
- " \n",
- " 2071 | \n",
- " 198520 | \n",
- " 3 | \n",
- " 27896 | \n",
- " 20885.0 | \n",
- " 34907.0 | \n",
- " 51 | \n",
- " 38.0 | \n",
- " 64.0 | \n",
- " FR | \n",
- " France | \n",
- "
\n",
- " \n",
- " 2072 | \n",
- " 198519 | \n",
- " 3 | \n",
- " 43154 | \n",
- " 32821.0 | \n",
- " 53487.0 | \n",
- " 78 | \n",
- " 59.0 | \n",
- " 97.0 | \n",
- " FR | \n",
- " France | \n",
- "
\n",
- " \n",
- " 2073 | \n",
- " 198518 | \n",
- " 3 | \n",
- " 40555 | \n",
- " 29935.0 | \n",
- " 51175.0 | \n",
- " 74 | \n",
- " 55.0 | \n",
- " 93.0 | \n",
- " FR | \n",
- " France | \n",
- "
\n",
- " \n",
- " 2074 | \n",
- " 198517 | \n",
- " 3 | \n",
- " 34053 | \n",
- " 24366.0 | \n",
- " 43740.0 | \n",
- " 62 | \n",
- " 44.0 | \n",
- " 80.0 | \n",
- " FR | \n",
- " France | \n",
- "
\n",
- " \n",
- " 2075 | \n",
- " 198516 | \n",
- " 3 | \n",
- " 50362 | \n",
- " 36451.0 | \n",
- " 64273.0 | \n",
- " 91 | \n",
- " 66.0 | \n",
- " 116.0 | \n",
- " FR | \n",
- " France | \n",
- "
\n",
- " \n",
- " 2076 | \n",
- " 198515 | \n",
- " 3 | \n",
- " 63881 | \n",
- " 45538.0 | \n",
- " 82224.0 | \n",
- " 116 | \n",
- " 83.0 | \n",
- " 149.0 | \n",
- " FR | \n",
- " France | \n",
- "
\n",
- " \n",
- " 2077 | \n",
- " 198514 | \n",
- " 3 | \n",
- " 134545 | \n",
- " 114400.0 | \n",
- " 154690.0 | \n",
- " 244 | \n",
- " 207.0 | \n",
- " 281.0 | \n",
- " FR | \n",
- " France | \n",
- "
\n",
- " \n",
- " 2078 | \n",
- " 198513 | \n",
- " 3 | \n",
- " 197206 | \n",
- " 176080.0 | \n",
- " 218332.0 | \n",
- " 357 | \n",
- " 319.0 | \n",
- " 395.0 | \n",
- " FR | \n",
- " France | \n",
- "
\n",
- " \n",
- " 2079 | \n",
- " 198512 | \n",
- " 3 | \n",
- " 245240 | \n",
- " 223304.0 | \n",
- " 267176.0 | \n",
- " 445 | \n",
- " 405.0 | \n",
- " 485.0 | \n",
- " FR | \n",
- " France | \n",
- "
\n",
- " \n",
- " 2080 | \n",
- " 198511 | \n",
- " 3 | \n",
- " 276205 | \n",
- " 252399.0 | \n",
- " 300011.0 | \n",
- " 501 | \n",
- " 458.0 | \n",
- " 544.0 | \n",
- " FR | \n",
- " France | \n",
- "
\n",
- " \n",
- " 2081 | \n",
- " 198510 | \n",
- " 3 | \n",
- " 353231 | \n",
- " 326279.0 | \n",
- " 380183.0 | \n",
- " 640 | \n",
- " 591.0 | \n",
- " 689.0 | \n",
- " FR | \n",
- " France | \n",
- "
\n",
- " \n",
- " 2082 | \n",
- " 198509 | \n",
- " 3 | \n",
- " 369895 | \n",
- " 341109.0 | \n",
- " 398681.0 | \n",
- " 670 | \n",
- " 618.0 | \n",
- " 722.0 | \n",
- " FR | \n",
- " France | \n",
- "
\n",
- " \n",
- " 2083 | \n",
- " 198508 | \n",
- " 3 | \n",
- " 389886 | \n",
- " 359529.0 | \n",
- " 420243.0 | \n",
- " 707 | \n",
- " 652.0 | \n",
- " 762.0 | \n",
- " FR | \n",
- " France | \n",
- "
\n",
- " \n",
- " 2084 | \n",
- " 198507 | \n",
- " 3 | \n",
- " 471852 | \n",
- " 432599.0 | \n",
- " 511105.0 | \n",
- " 855 | \n",
- " 784.0 | \n",
- " 926.0 | \n",
- " FR | \n",
- " France | \n",
- "
\n",
- " \n",
- " 2085 | \n",
- " 198506 | \n",
- " 3 | \n",
- " 565825 | \n",
- " 518011.0 | \n",
- " 613639.0 | \n",
- " 1026 | \n",
- " 939.0 | \n",
- " 1113.0 | \n",
- " FR | \n",
- " France | \n",
- "
\n",
- " \n",
- " 2086 | \n",
- " 198505 | \n",
- " 3 | \n",
- " 637302 | \n",
- " 592795.0 | \n",
- " 681809.0 | \n",
- " 1155 | \n",
- " 1074.0 | \n",
- " 1236.0 | \n",
- " FR | \n",
- " France | \n",
- "
\n",
- " \n",
- " 2087 | \n",
- " 198504 | \n",
- " 3 | \n",
- " 424937 | \n",
- " 390794.0 | \n",
- " 459080.0 | \n",
- " 770 | \n",
- " 708.0 | \n",
- " 832.0 | \n",
- " FR | \n",
- " France | \n",
- "
\n",
- " \n",
- " 2088 | \n",
- " 198503 | \n",
- " 3 | \n",
- " 213901 | \n",
- " 174689.0 | \n",
- " 253113.0 | \n",
- " 388 | \n",
- " 317.0 | \n",
- " 459.0 | \n",
- " FR | \n",
- " France | \n",
+ " \n",
+ "\n",
+ "2100 rows × 10 columns
\n",
+ ""
+ ],
+ "text/plain": [
+ " week indicator inc inc_low inc_up inc100 inc100_low \\\n",
+ "0 202504 3 375118 356288.0 393948.0 560 532.0 \n",
+ "1 202503 3 253215 239337.0 267093.0 378 357.0 \n",
+ "2 202502 3 257247 242991.0 271503.0 384 363.0 \n",
+ "3 202501 3 231549 214627.0 248471.0 345 320.0 \n",
+ "4 202452 3 201726 185870.0 217582.0 302 278.0 \n",
+ "... ... ... ... ... ... ... ... \n",
+ "2095 198448 3 78620 60634.0 96606.0 143 110.0 \n",
+ "2096 198447 3 72029 54274.0 89784.0 131 99.0 \n",
+ "2097 198446 3 87330 67686.0 106974.0 159 123.0 \n",
+ "2098 198445 3 135223 101414.0 169032.0 246 184.0 \n",
+ "2099 198444 3 68422 20056.0 116788.0 125 37.0 \n",
+ "\n",
+ " inc100_up geo_insee geo_name \n",
+ "0 588.0 FR France \n",
+ "1 399.0 FR France \n",
+ "2 405.0 FR France \n",
+ "3 370.0 FR France \n",
+ "4 326.0 FR France \n",
+ "... ... ... ... \n",
+ "2095 176.0 FR France \n",
+ "2096 163.0 FR France \n",
+ "2097 195.0 FR France \n",
+ "2098 308.0 FR France \n",
+ "2099 213.0 FR France \n",
+ "\n",
+ "[2100 rows x 10 columns]"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "raw_data = pd.read_csv(data_url, skiprows=1)\n",
+ "raw_data"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Modification du code pour utiliser le fichier local contenant les données :\n",
+ "1. Vérifie si une copie locale des données existe déjà\n",
+ "2. Si elle n'existe pas, télécharge les données depuis le Réseau Sentinelles"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Téléchargement des données et sauvegarde locale.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " week | \n",
+ " indicator | \n",
+ " inc | \n",
+ " inc_low | \n",
+ " inc_up | \n",
+ " inc100 | \n",
+ " inc100_low | \n",
+ " inc100_up | \n",
+ " geo_insee | \n",
+ " geo_name | \n",
"
\n",
+ " \n",
+ " \n",
" \n",
- " 2089 | \n",
- " 198502 | \n",
+ " 0 | \n",
+ " 202504 | \n",
" 3 | \n",
- " 97586 | \n",
- " 80949.0 | \n",
- " 114223.0 | \n",
- " 177 | \n",
- " 147.0 | \n",
- " 207.0 | \n",
+ " 375118 | \n",
+ " 356288.0 | \n",
+ " 393948.0 | \n",
+ " 560 | \n",
+ " 532.0 | \n",
+ " 588.0 | \n",
" FR | \n",
" France | \n",
"
\n",
" \n",
- " 2090 | \n",
- " 198501 | \n",
+ " 1 | \n",
+ " 202503 | \n",
" 3 | \n",
- " 85489 | \n",
- " 65918.0 | \n",
- " 105060.0 | \n",
- " 155 | \n",
- " 120.0 | \n",
- " 190.0 | \n",
+ " 253215 | \n",
+ " 239337.0 | \n",
+ " 267093.0 | \n",
+ " 378 | \n",
+ " 357.0 | \n",
+ " 399.0 | \n",
" FR | \n",
" France | \n",
"
\n",
" \n",
- " 2091 | \n",
- " 198452 | \n",
+ " 2 | \n",
+ " 202502 | \n",
" 3 | \n",
- " 84830 | \n",
- " 60602.0 | \n",
- " 109058.0 | \n",
- " 154 | \n",
- " 110.0 | \n",
- " 198.0 | \n",
+ " 257247 | \n",
+ " 242991.0 | \n",
+ " 271503.0 | \n",
+ " 384 | \n",
+ " 363.0 | \n",
+ " 405.0 | \n",
" FR | \n",
" France | \n",
"
\n",
" \n",
- " 2092 | \n",
- " 198451 | \n",
+ " 3 | \n",
+ " 202501 | \n",
" 3 | \n",
- " 101726 | \n",
- " 80242.0 | \n",
- " 123210.0 | \n",
- " 185 | \n",
- " 146.0 | \n",
- " 224.0 | \n",
+ " 231549 | \n",
+ " 214627.0 | \n",
+ " 248471.0 | \n",
+ " 345 | \n",
+ " 320.0 | \n",
+ " 370.0 | \n",
" FR | \n",
" France | \n",
"
\n",
" \n",
- " 2093 | \n",
- " 198450 | \n",
+ " 4 | \n",
+ " 202452 | \n",
" 3 | \n",
- " 123680 | \n",
- " 101401.0 | \n",
- " 145959.0 | \n",
- " 225 | \n",
- " 184.0 | \n",
- " 266.0 | \n",
+ " 201726 | \n",
+ " 185870.0 | \n",
+ " 217582.0 | \n",
+ " 302 | \n",
+ " 278.0 | \n",
+ " 326.0 | \n",
" FR | \n",
" France | \n",
"
\n",
" \n",
- " 2094 | \n",
- " 198449 | \n",
- " 3 | \n",
- " 101073 | \n",
- " 81684.0 | \n",
- " 120462.0 | \n",
- " 184 | \n",
- " 149.0 | \n",
- " 219.0 | \n",
- " FR | \n",
- " France | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
"
\n",
" \n",
" 2095 | \n",
@@ -903,57 +496,7 @@
"2 202502 3 257247 242991.0 271503.0 384 363.0 \n",
"3 202501 3 231549 214627.0 248471.0 345 320.0 \n",
"4 202452 3 201726 185870.0 217582.0 302 278.0 \n",
- "5 202451 3 201697 187843.0 215551.0 302 281.0 \n",
- "6 202450 3 136694 126369.0 147019.0 205 190.0 \n",
- "7 202449 3 108487 99037.0 117937.0 163 149.0 \n",
- "8 202448 3 87381 78687.0 96075.0 131 118.0 \n",
- "9 202447 3 76286 67626.0 84946.0 114 101.0 \n",
- "10 202446 3 56399 49006.0 63792.0 85 74.0 \n",
- "11 202445 3 47347 40843.0 53851.0 71 61.0 \n",
- "12 202444 3 36039 30122.0 41956.0 54 45.0 \n",
- "13 202443 3 46572 39928.0 53216.0 70 60.0 \n",
- "14 202442 3 67785 60009.0 75561.0 102 90.0 \n",
- "15 202441 3 79435 71386.0 87484.0 119 107.0 \n",
- "16 202440 3 84965 76555.0 93375.0 127 114.0 \n",
- "17 202439 3 91660 82937.0 100383.0 137 124.0 \n",
- "18 202438 3 91786 82903.0 100669.0 138 125.0 \n",
- "19 202437 3 56460 49319.0 63601.0 85 74.0 \n",
- "20 202436 3 33657 27906.0 39408.0 50 41.0 \n",
- "21 202435 3 27404 22036.0 32772.0 41 33.0 \n",
- "22 202434 3 26717 21003.0 32431.0 40 31.0 \n",
- "23 202433 3 20623 15349.0 25897.0 31 23.0 \n",
- "24 202432 3 23187 17532.0 28842.0 35 27.0 \n",
- "25 202431 3 26035 20267.0 31803.0 39 30.0 \n",
- "26 202430 3 36393 28593.0 44193.0 55 43.0 \n",
- "27 202429 3 39560 32592.0 46528.0 59 49.0 \n",
- "28 202428 3 54342 45781.0 62903.0 81 68.0 \n",
- "29 202427 3 47364 40234.0 54494.0 71 60.0 \n",
"... ... ... ... ... ... ... ... \n",
- "2070 198521 3 26096 19621.0 32571.0 47 35.0 \n",
- "2071 198520 3 27896 20885.0 34907.0 51 38.0 \n",
- "2072 198519 3 43154 32821.0 53487.0 78 59.0 \n",
- "2073 198518 3 40555 29935.0 51175.0 74 55.0 \n",
- "2074 198517 3 34053 24366.0 43740.0 62 44.0 \n",
- "2075 198516 3 50362 36451.0 64273.0 91 66.0 \n",
- "2076 198515 3 63881 45538.0 82224.0 116 83.0 \n",
- "2077 198514 3 134545 114400.0 154690.0 244 207.0 \n",
- "2078 198513 3 197206 176080.0 218332.0 357 319.0 \n",
- "2079 198512 3 245240 223304.0 267176.0 445 405.0 \n",
- "2080 198511 3 276205 252399.0 300011.0 501 458.0 \n",
- "2081 198510 3 353231 326279.0 380183.0 640 591.0 \n",
- "2082 198509 3 369895 341109.0 398681.0 670 618.0 \n",
- "2083 198508 3 389886 359529.0 420243.0 707 652.0 \n",
- "2084 198507 3 471852 432599.0 511105.0 855 784.0 \n",
- "2085 198506 3 565825 518011.0 613639.0 1026 939.0 \n",
- "2086 198505 3 637302 592795.0 681809.0 1155 1074.0 \n",
- "2087 198504 3 424937 390794.0 459080.0 770 708.0 \n",
- "2088 198503 3 213901 174689.0 253113.0 388 317.0 \n",
- "2089 198502 3 97586 80949.0 114223.0 177 147.0 \n",
- "2090 198501 3 85489 65918.0 105060.0 155 120.0 \n",
- "2091 198452 3 84830 60602.0 109058.0 154 110.0 \n",
- "2092 198451 3 101726 80242.0 123210.0 185 146.0 \n",
- "2093 198450 3 123680 101401.0 145959.0 225 184.0 \n",
- "2094 198449 3 101073 81684.0 120462.0 184 149.0 \n",
"2095 198448 3 78620 60634.0 96606.0 143 110.0 \n",
"2096 198447 3 72029 54274.0 89784.0 131 99.0 \n",
"2097 198446 3 87330 67686.0 106974.0 159 123.0 \n",
@@ -966,57 +509,7 @@
"2 405.0 FR France \n",
"3 370.0 FR France \n",
"4 326.0 FR France \n",
- "5 323.0 FR France \n",
- "6 220.0 FR France \n",
- "7 177.0 FR France \n",
- "8 144.0 FR France \n",
- "9 127.0 FR France \n",
- "10 96.0 FR France \n",
- "11 81.0 FR France \n",
- "12 63.0 FR France \n",
- "13 80.0 FR France \n",
- "14 114.0 FR France \n",
- "15 131.0 FR France \n",
- "16 140.0 FR France \n",
- "17 150.0 FR France \n",
- "18 151.0 FR France \n",
- "19 96.0 FR France \n",
- "20 59.0 FR France \n",
- "21 49.0 FR France \n",
- "22 49.0 FR France \n",
- "23 39.0 FR France \n",
- "24 43.0 FR France \n",
- "25 48.0 FR France \n",
- "26 67.0 FR France \n",
- "27 69.0 FR France \n",
- "28 94.0 FR France \n",
- "29 82.0 FR France \n",
"... ... ... ... \n",
- "2070 59.0 FR France \n",
- "2071 64.0 FR France \n",
- "2072 97.0 FR France \n",
- "2073 93.0 FR France \n",
- "2074 80.0 FR France \n",
- "2075 116.0 FR France \n",
- "2076 149.0 FR France \n",
- "2077 281.0 FR France \n",
- "2078 395.0 FR France \n",
- "2079 485.0 FR France \n",
- "2080 544.0 FR France \n",
- "2081 689.0 FR France \n",
- "2082 722.0 FR France \n",
- "2083 762.0 FR France \n",
- "2084 926.0 FR France \n",
- "2085 1113.0 FR France \n",
- "2086 1236.0 FR France \n",
- "2087 832.0 FR France \n",
- "2088 459.0 FR France \n",
- "2089 207.0 FR France \n",
- "2090 190.0 FR France \n",
- "2091 198.0 FR France \n",
- "2092 224.0 FR France \n",
- "2093 266.0 FR France \n",
- "2094 219.0 FR France \n",
"2095 176.0 FR France \n",
"2096 163.0 FR France \n",
"2097 195.0 FR France \n",
@@ -1026,59 +519,23 @@
"[2100 rows x 10 columns]"
]
},
- "execution_count": 20,
+ "execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "raw_data = pd.read_csv(data_url, skiprows=1)\n",
- "raw_data"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Modification du code pour vérifier si une copie locale des données existe et pour les télécharger si elles n'existent pas :"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "metadata": {},
- "outputs": [
- {
- "ename": "ParserError",
- "evalue": "Error tokenizing data. C error: Expected 1 fields in line 30, saw 21\n",
- "output_type": "error",
- "traceback": [
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
- "\u001b[0;31mParserError\u001b[0m Traceback (most recent call last)",
- "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mraw_data\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread_csv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'https://app-learninglab.inria.fr/moocrr/gitlab/5212fa3d0a7441c34b57f854081c7450/mooc-rr/blob/master/module3/exo1/inc-25-PAY.csv'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencoding\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'iso-8859-1'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mskiprows\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mraw_data\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32m/opt/conda/lib/python3.6/site-packages/pandas/io/parsers.py\u001b[0m in \u001b[0;36mparser_f\u001b[0;34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, escapechar, comment, encoding, dialect, tupleize_cols, error_bad_lines, warn_bad_lines, skipfooter, skip_footer, doublequote, delim_whitespace, as_recarray, compact_ints, use_unsigned, low_memory, buffer_lines, memory_map, float_precision)\u001b[0m\n\u001b[1;32m 707\u001b[0m skip_blank_lines=skip_blank_lines)\n\u001b[1;32m 708\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 709\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_read\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfilepath_or_buffer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 710\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 711\u001b[0m \u001b[0mparser_f\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__name__\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32m/opt/conda/lib/python3.6/site-packages/pandas/io/parsers.py\u001b[0m in \u001b[0;36m_read\u001b[0;34m(filepath_or_buffer, kwds)\u001b[0m\n\u001b[1;32m 453\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 454\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 455\u001b[0;31m \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mparser\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnrows\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 456\u001b[0m \u001b[0;32mfinally\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 457\u001b[0m \u001b[0mparser\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclose\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32m/opt/conda/lib/python3.6/site-packages/pandas/io/parsers.py\u001b[0m in \u001b[0;36mread\u001b[0;34m(self, nrows)\u001b[0m\n\u001b[1;32m 1067\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'skipfooter not supported for iteration'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1068\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1069\u001b[0;31m \u001b[0mret\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnrows\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1070\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1071\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moptions\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'as_recarray'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32m/opt/conda/lib/python3.6/site-packages/pandas/io/parsers.py\u001b[0m in \u001b[0;36mread\u001b[0;34m(self, nrows)\u001b[0m\n\u001b[1;32m 1837\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnrows\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1838\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1839\u001b[0;31m \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_reader\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnrows\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1840\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mStopIteration\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1841\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_first_chunk\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32mpandas/_libs/parsers.pyx\u001b[0m in \u001b[0;36mpandas._libs.parsers.TextReader.read\u001b[0;34m()\u001b[0m\n",
- "\u001b[0;32mpandas/_libs/parsers.pyx\u001b[0m in \u001b[0;36mpandas._libs.parsers.TextReader._read_low_memory\u001b[0;34m()\u001b[0m\n",
- "\u001b[0;32mpandas/_libs/parsers.pyx\u001b[0m in \u001b[0;36mpandas._libs.parsers.TextReader._read_rows\u001b[0;34m()\u001b[0m\n",
- "\u001b[0;32mpandas/_libs/parsers.pyx\u001b[0m in \u001b[0;36mpandas._libs.parsers.TextReader._tokenize_rows\u001b[0;34m()\u001b[0m\n",
- "\u001b[0;32mpandas/_libs/parsers.pyx\u001b[0m in \u001b[0;36mpandas._libs.parsers.raise_parser_error\u001b[0;34m()\u001b[0m\n",
- "\u001b[0;31mParserError\u001b[0m: Error tokenizing data. C error: Expected 1 fields in line 30, saw 21\n"
- ]
- }
- ],
- "source": [
- "file_path = "https://app-learninglab.inria.fr/moocrr/gitlab/5212fa3d0a7441c34b57f854081c7450/mooc-rr/blob/master/module3/exo1/inc-25-PAY.csv" \n",
+ "file_path = r\"C:\\Users\\33612\\Downloads\\inc-25-PAY.csv\"\n",
+ "\n",
"if not os.path.exists(file_path):\n",
- "response = requests.get(data_url)\n",
- "with open(file_path, "wb") as file:\n",
- " file.write(response.content)\n",
- "print("Téléchargement des données et sauvegarde locale.")\n",
+ " response = requests.get(data_url)\n",
+ " with open(file_path, \"wb\") as file:\n",
+ " file.write(response.content)\n",
+ " print(\"Téléchargement des données et sauvegarde locale.\")\n",
"else:\n",
- " print("Données locales déjà existantes.")\n",
- "raw_data = pd.read_csv('https://app-learninglab.inria.fr/moocrr/gitlab/5212fa3d0a7441c34b57f854081c7450/mooc-rr/blob/master/module3/exo1/inc-25-PAY.csv', encoding = 'iso-8859-1', skiprows=1)\n",
+ " print(\"Données locales déjà existantes.\")\n",
+ "\n",
+ "raw_data = pd.read_csv(file_path, encoding = 'iso-8859-1', skiprows=1)\n",
"raw_data"
]
},
--
2.18.1