diff --git a/module3/exo2/exercice.ipynb b/module3/exo2/exercice.ipynb index 0bbbe371b01e359e381e43239412d77bf53fb1fb..3505d89b3857dea95e5adabeb061b862d297ecc2 100644 --- a/module3/exo2/exercice.ipynb +++ b/module3/exo2/exercice.ipynb @@ -1,5 +1,2410 @@ { - "cells": [], + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Incidence de la varicelle en France métropolitaine" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "import matplotlib.pyplot as plt\n", + "import pandas as pd\n", + "import isoweek" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Les données de l'incidence du syndrome grippal sont disponibles sur le site Web du [Réseau Sentinelles](http://www.sentiweb.fr/). Nous les récupérons sous forme d'un fichier en format CSV dont chaque ligne correspond à une semaine de la période demandée. Nous téléchargeons toujours le jeu de données complet, qui commence en 1991 et se termine avec une semaine récente." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "data_url = \"http://www.sentiweb.fr/datasets/incidence-PAY-7.csv\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "La première ligne du fichier CSV est un commentaire, que nous ignorons en précisant `skiprows=1`." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
weekindicatorincinc_lowinc_upinc100inc100_lowinc100_upgeo_inseegeo_name
0202207713531977617286201426FRFrance
120220679935710412766151119FRFrance
2202205710851779713905161121FRFrance
320220479547672112373141018FRFrance
42022037139721068017264211626FRFrance
52022027849560261096413917FRFrance
62022017137931059716989211626FRFrance
7202152713239961116867201525FRFrance
8202151713326962917023201426FRFrance
92021507141281031217944211527FRFrance
102021497136741036916979211626FRFrance
11202148711549850314595171222FRFrance
12202147711419837614462171222FRFrance
132021467821657241070812816FRFrance
1420214578965646811462141018FRFrance
152021447873656361183613818FRFrance
162021437814551641112612717FRFrance
172021427944360371284914919FRFrance
182021417402122395803639FRFrance
1920214074441245464287410FRFrance
202021397229110563526315FRFrance
2120213874325226763837410FRFrance
22202137719647543174315FRFrance
232021367344117305152528FRFrance
242021357256211074017426FRFrance
25202134714293782480204FRFrance
262021337382918305828639FRFrance
272021327410818956321639FRFrance
2820213174793230172857311FRFrance
292021307719041911018911616FRFrance
.................................
15991991267176081130423912312042FRFrance
16001991257161691070021638281838FRFrance
16011991247161711007122271281739FRFrance
1602199123711947767116223211329FRFrance
1603199122715452995320951271737FRFrance
1604199121714903897520831261636FRFrance
16051991207190531274225364342345FRFrance
16061991197167391124622232291939FRFrance
16071991187213851388228888382551FRFrance
1608199117713462887718047241632FRFrance
16091991167148571006819646261834FRFrance
1610199115713975978118169251832FRFrance
1611199114712265768416846221430FRFrance
161219911379567604113093171123FRFrance
1613199112710864733114397191325FRFrance
16141991117155741118419964271935FRFrance
16151991107166431137221914292038FRFrance
1616199109713741878018702241533FRFrance
1617199108713289881317765231531FRFrance
1618199107712337807716597221529FRFrance
1619199106710877701314741191226FRFrance
1620199105710442654414340181125FRFrance
16211991047791345631126314820FRFrance
16221991037153871048420290271836FRFrance
16231991027162771104621508292038FRFrance
16241991017155651027120859271836FRFrance
16251990527193751329525455342345FRFrance
16261990517190801380724353342543FRFrance
1627199050711079666015498201228FRFrance
16281990497114302610205FRFrance
\n", + "

1629 rows × 10 columns

\n", + "
" + ], + "text/plain": [ + " week indicator inc inc_low inc_up inc100 inc100_low \\\n", + "0 202207 7 13531 9776 17286 20 14 \n", + "1 202206 7 9935 7104 12766 15 11 \n", + "2 202205 7 10851 7797 13905 16 11 \n", + "3 202204 7 9547 6721 12373 14 10 \n", + "4 202203 7 13972 10680 17264 21 16 \n", + "5 202202 7 8495 6026 10964 13 9 \n", + "6 202201 7 13793 10597 16989 21 16 \n", + "7 202152 7 13239 9611 16867 20 15 \n", + "8 202151 7 13326 9629 17023 20 14 \n", + "9 202150 7 14128 10312 17944 21 15 \n", + "10 202149 7 13674 10369 16979 21 16 \n", + "11 202148 7 11549 8503 14595 17 12 \n", + "12 202147 7 11419 8376 14462 17 12 \n", + "13 202146 7 8216 5724 10708 12 8 \n", + "14 202145 7 8965 6468 11462 14 10 \n", + "15 202144 7 8736 5636 11836 13 8 \n", + "16 202143 7 8145 5164 11126 12 7 \n", + "17 202142 7 9443 6037 12849 14 9 \n", + "18 202141 7 4021 2239 5803 6 3 \n", + "19 202140 7 4441 2454 6428 7 4 \n", + "20 202139 7 2291 1056 3526 3 1 \n", + "21 202138 7 4325 2267 6383 7 4 \n", + "22 202137 7 1964 754 3174 3 1 \n", + "23 202136 7 3441 1730 5152 5 2 \n", + "24 202135 7 2562 1107 4017 4 2 \n", + "25 202134 7 1429 378 2480 2 0 \n", + "26 202133 7 3829 1830 5828 6 3 \n", + "27 202132 7 4108 1895 6321 6 3 \n", + "28 202131 7 4793 2301 7285 7 3 \n", + "29 202130 7 7190 4191 10189 11 6 \n", + "... ... ... ... ... ... ... ... \n", + "1599 199126 7 17608 11304 23912 31 20 \n", + "1600 199125 7 16169 10700 21638 28 18 \n", + "1601 199124 7 16171 10071 22271 28 17 \n", + "1602 199123 7 11947 7671 16223 21 13 \n", + "1603 199122 7 15452 9953 20951 27 17 \n", + "1604 199121 7 14903 8975 20831 26 16 \n", + "1605 199120 7 19053 12742 25364 34 23 \n", + "1606 199119 7 16739 11246 22232 29 19 \n", + "1607 199118 7 21385 13882 28888 38 25 \n", + "1608 199117 7 13462 8877 18047 24 16 \n", + "1609 199116 7 14857 10068 19646 26 18 \n", + "1610 199115 7 13975 9781 18169 25 18 \n", + "1611 199114 7 12265 7684 16846 22 14 \n", + "1612 199113 7 9567 6041 13093 17 11 \n", + "1613 199112 7 10864 7331 14397 19 13 \n", + "1614 199111 7 15574 11184 19964 27 19 \n", + "1615 199110 7 16643 11372 21914 29 20 \n", + "1616 199109 7 13741 8780 18702 24 15 \n", + "1617 199108 7 13289 8813 17765 23 15 \n", + "1618 199107 7 12337 8077 16597 22 15 \n", + "1619 199106 7 10877 7013 14741 19 12 \n", + "1620 199105 7 10442 6544 14340 18 11 \n", + "1621 199104 7 7913 4563 11263 14 8 \n", + "1622 199103 7 15387 10484 20290 27 18 \n", + "1623 199102 7 16277 11046 21508 29 20 \n", + "1624 199101 7 15565 10271 20859 27 18 \n", + "1625 199052 7 19375 13295 25455 34 23 \n", + "1626 199051 7 19080 13807 24353 34 25 \n", + "1627 199050 7 11079 6660 15498 20 12 \n", + "1628 199049 7 1143 0 2610 2 0 \n", + "\n", + " inc100_up geo_insee geo_name \n", + "0 26 FR France \n", + "1 19 FR France \n", + "2 21 FR France \n", + "3 18 FR France \n", + "4 26 FR France \n", + "5 17 FR France \n", + "6 26 FR France \n", + "7 25 FR France \n", + "8 26 FR France \n", + "9 27 FR France \n", + "10 26 FR France \n", + "11 22 FR France \n", + "12 22 FR France \n", + "13 16 FR France \n", + "14 18 FR France \n", + "15 18 FR France \n", + "16 17 FR France \n", + "17 19 FR France \n", + "18 9 FR France \n", + "19 10 FR France \n", + "20 5 FR France \n", + "21 10 FR France \n", + "22 5 FR France \n", + "23 8 FR France \n", + "24 6 FR France \n", + "25 4 FR France \n", + "26 9 FR France \n", + "27 9 FR France \n", + "28 11 FR France \n", + "29 16 FR France \n", + "... ... ... ... \n", + "1599 42 FR France \n", + "1600 38 FR France \n", + "1601 39 FR France \n", + "1602 29 FR France \n", + "1603 37 FR France \n", + "1604 36 FR France \n", + "1605 45 FR France \n", + "1606 39 FR France \n", + "1607 51 FR France \n", + "1608 32 FR France \n", + "1609 34 FR France \n", + "1610 32 FR France \n", + "1611 30 FR France \n", + "1612 23 FR France \n", + "1613 25 FR France \n", + "1614 35 FR France \n", + "1615 38 FR France \n", + "1616 33 FR France \n", + "1617 31 FR France \n", + "1618 29 FR France \n", + "1619 26 FR France \n", + "1620 25 FR France \n", + "1621 20 FR France \n", + "1622 36 FR France \n", + "1623 38 FR France \n", + "1624 36 FR France \n", + "1625 45 FR France \n", + "1626 43 FR France \n", + "1627 28 FR France \n", + "1628 5 FR France \n", + "\n", + "[1629 rows x 10 columns]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "raw_data = pd.read_csv(data_url, encoding = 'iso-8859-1', skiprows=1)\n", + "raw_data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Nous allons maintenant inspecter les données afin de voir s'il y a des données manquantes: Il semble que ce ne soit pas le cas à première vue. " + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
weekindicatorincinc_lowinc_upinc100inc100_lowinc100_upgeo_inseegeo_name
\n", + "
" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: [week, indicator, inc, inc_low, inc_up, inc100, inc100_low, inc100_up, geo_insee, geo_name]\n", + "Index: []" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "raw_data[raw_data.isnull().any(axis=1)]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Nos données utilisent une convention inhabituelle: le numéro de\n", + "semaine est collé à l'année, donnant l'impression qu'il s'agit\n", + "de nombre entier. C'est comme ça que Pandas les interprète.Un deuxième problème est que Pandas ne comprend pas les numéros de\n", + "semaine. Il faut lui fournir les dates de début et de fin de\n", + "semaine. Nous utilisons pour cela la bibliothèque `isoweek`.Comme la conversion des semaines est devenu assez complexe, nous\n", + "écrivons une petite fonction Python pour cela. Ensuite, nous\n", + "l'appliquons à tous les points de nos donnés. Les résultats vont\n", + "dans une nouvelle colonne 'period'." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
weekindicatorincinc_lowinc_upinc100inc100_lowinc100_upgeo_inseegeo_nameperiod
0202207713531977617286201426FRFrance2022-02-14/2022-02-20
120220679935710412766151119FRFrance2022-02-07/2022-02-13
2202205710851779713905161121FRFrance2022-01-31/2022-02-06
320220479547672112373141018FRFrance2022-01-24/2022-01-30
42022037139721068017264211626FRFrance2022-01-17/2022-01-23
52022027849560261096413917FRFrance2022-01-10/2022-01-16
62022017137931059716989211626FRFrance2022-01-03/2022-01-09
7202152713239961116867201525FRFrance2021-12-27/2022-01-02
8202151713326962917023201426FRFrance2021-12-20/2021-12-26
92021507141281031217944211527FRFrance2021-12-13/2021-12-19
102021497136741036916979211626FRFrance2021-12-06/2021-12-12
11202148711549850314595171222FRFrance2021-11-29/2021-12-05
12202147711419837614462171222FRFrance2021-11-22/2021-11-28
132021467821657241070812816FRFrance2021-11-15/2021-11-21
1420214578965646811462141018FRFrance2021-11-08/2021-11-14
152021447873656361183613818FRFrance2021-11-01/2021-11-07
162021437814551641112612717FRFrance2021-10-25/2021-10-31
172021427944360371284914919FRFrance2021-10-18/2021-10-24
182021417402122395803639FRFrance2021-10-11/2021-10-17
1920214074441245464287410FRFrance2021-10-04/2021-10-10
202021397229110563526315FRFrance2021-09-27/2021-10-03
2120213874325226763837410FRFrance2021-09-20/2021-09-26
22202137719647543174315FRFrance2021-09-13/2021-09-19
232021367344117305152528FRFrance2021-09-06/2021-09-12
242021357256211074017426FRFrance2021-08-30/2021-09-05
25202134714293782480204FRFrance2021-08-23/2021-08-29
262021337382918305828639FRFrance2021-08-16/2021-08-22
272021327410818956321639FRFrance2021-08-09/2021-08-15
2820213174793230172857311FRFrance2021-08-02/2021-08-08
292021307719041911018911616FRFrance2021-07-26/2021-08-01
....................................
15991991267176081130423912312042FRFrance1991-06-24/1991-06-30
16001991257161691070021638281838FRFrance1991-06-17/1991-06-23
16011991247161711007122271281739FRFrance1991-06-10/1991-06-16
1602199123711947767116223211329FRFrance1991-06-03/1991-06-09
1603199122715452995320951271737FRFrance1991-05-27/1991-06-02
1604199121714903897520831261636FRFrance1991-05-20/1991-05-26
16051991207190531274225364342345FRFrance1991-05-13/1991-05-19
16061991197167391124622232291939FRFrance1991-05-06/1991-05-12
16071991187213851388228888382551FRFrance1991-04-29/1991-05-05
1608199117713462887718047241632FRFrance1991-04-22/1991-04-28
16091991167148571006819646261834FRFrance1991-04-15/1991-04-21
1610199115713975978118169251832FRFrance1991-04-08/1991-04-14
1611199114712265768416846221430FRFrance1991-04-01/1991-04-07
161219911379567604113093171123FRFrance1991-03-25/1991-03-31
1613199112710864733114397191325FRFrance1991-03-18/1991-03-24
16141991117155741118419964271935FRFrance1991-03-11/1991-03-17
16151991107166431137221914292038FRFrance1991-03-04/1991-03-10
1616199109713741878018702241533FRFrance1991-02-25/1991-03-03
1617199108713289881317765231531FRFrance1991-02-18/1991-02-24
1618199107712337807716597221529FRFrance1991-02-11/1991-02-17
1619199106710877701314741191226FRFrance1991-02-04/1991-02-10
1620199105710442654414340181125FRFrance1991-01-28/1991-02-03
16211991047791345631126314820FRFrance1991-01-21/1991-01-27
16221991037153871048420290271836FRFrance1991-01-14/1991-01-20
16231991027162771104621508292038FRFrance1991-01-07/1991-01-13
16241991017155651027120859271836FRFrance1990-12-31/1991-01-06
16251990527193751329525455342345FRFrance1990-12-24/1990-12-30
16261990517190801380724353342543FRFrance1990-12-17/1990-12-23
1627199050711079666015498201228FRFrance1990-12-10/1990-12-16
16281990497114302610205FRFrance1990-12-03/1990-12-09
\n", + "

1629 rows × 11 columns

\n", + "
" + ], + "text/plain": [ + " week indicator inc inc_low inc_up inc100 inc100_low \\\n", + "0 202207 7 13531 9776 17286 20 14 \n", + "1 202206 7 9935 7104 12766 15 11 \n", + "2 202205 7 10851 7797 13905 16 11 \n", + "3 202204 7 9547 6721 12373 14 10 \n", + "4 202203 7 13972 10680 17264 21 16 \n", + "5 202202 7 8495 6026 10964 13 9 \n", + "6 202201 7 13793 10597 16989 21 16 \n", + "7 202152 7 13239 9611 16867 20 15 \n", + "8 202151 7 13326 9629 17023 20 14 \n", + "9 202150 7 14128 10312 17944 21 15 \n", + "10 202149 7 13674 10369 16979 21 16 \n", + "11 202148 7 11549 8503 14595 17 12 \n", + "12 202147 7 11419 8376 14462 17 12 \n", + "13 202146 7 8216 5724 10708 12 8 \n", + "14 202145 7 8965 6468 11462 14 10 \n", + "15 202144 7 8736 5636 11836 13 8 \n", + "16 202143 7 8145 5164 11126 12 7 \n", + "17 202142 7 9443 6037 12849 14 9 \n", + "18 202141 7 4021 2239 5803 6 3 \n", + "19 202140 7 4441 2454 6428 7 4 \n", + "20 202139 7 2291 1056 3526 3 1 \n", + "21 202138 7 4325 2267 6383 7 4 \n", + "22 202137 7 1964 754 3174 3 1 \n", + "23 202136 7 3441 1730 5152 5 2 \n", + "24 202135 7 2562 1107 4017 4 2 \n", + "25 202134 7 1429 378 2480 2 0 \n", + "26 202133 7 3829 1830 5828 6 3 \n", + "27 202132 7 4108 1895 6321 6 3 \n", + "28 202131 7 4793 2301 7285 7 3 \n", + "29 202130 7 7190 4191 10189 11 6 \n", + "... ... ... ... ... ... ... ... \n", + "1599 199126 7 17608 11304 23912 31 20 \n", + "1600 199125 7 16169 10700 21638 28 18 \n", + "1601 199124 7 16171 10071 22271 28 17 \n", + "1602 199123 7 11947 7671 16223 21 13 \n", + "1603 199122 7 15452 9953 20951 27 17 \n", + "1604 199121 7 14903 8975 20831 26 16 \n", + "1605 199120 7 19053 12742 25364 34 23 \n", + "1606 199119 7 16739 11246 22232 29 19 \n", + "1607 199118 7 21385 13882 28888 38 25 \n", + "1608 199117 7 13462 8877 18047 24 16 \n", + "1609 199116 7 14857 10068 19646 26 18 \n", + "1610 199115 7 13975 9781 18169 25 18 \n", + "1611 199114 7 12265 7684 16846 22 14 \n", + "1612 199113 7 9567 6041 13093 17 11 \n", + "1613 199112 7 10864 7331 14397 19 13 \n", + "1614 199111 7 15574 11184 19964 27 19 \n", + "1615 199110 7 16643 11372 21914 29 20 \n", + "1616 199109 7 13741 8780 18702 24 15 \n", + "1617 199108 7 13289 8813 17765 23 15 \n", + "1618 199107 7 12337 8077 16597 22 15 \n", + "1619 199106 7 10877 7013 14741 19 12 \n", + "1620 199105 7 10442 6544 14340 18 11 \n", + "1621 199104 7 7913 4563 11263 14 8 \n", + "1622 199103 7 15387 10484 20290 27 18 \n", + "1623 199102 7 16277 11046 21508 29 20 \n", + "1624 199101 7 15565 10271 20859 27 18 \n", + "1625 199052 7 19375 13295 25455 34 23 \n", + "1626 199051 7 19080 13807 24353 34 25 \n", + "1627 199050 7 11079 6660 15498 20 12 \n", + "1628 199049 7 1143 0 2610 2 0 \n", + "\n", + " inc100_up geo_insee geo_name period \n", + "0 26 FR France 2022-02-14/2022-02-20 \n", + "1 19 FR France 2022-02-07/2022-02-13 \n", + "2 21 FR France 2022-01-31/2022-02-06 \n", + "3 18 FR France 2022-01-24/2022-01-30 \n", + "4 26 FR France 2022-01-17/2022-01-23 \n", + "5 17 FR France 2022-01-10/2022-01-16 \n", + "6 26 FR France 2022-01-03/2022-01-09 \n", + "7 25 FR France 2021-12-27/2022-01-02 \n", + "8 26 FR France 2021-12-20/2021-12-26 \n", + "9 27 FR France 2021-12-13/2021-12-19 \n", + "10 26 FR France 2021-12-06/2021-12-12 \n", + "11 22 FR France 2021-11-29/2021-12-05 \n", + "12 22 FR France 2021-11-22/2021-11-28 \n", + "13 16 FR France 2021-11-15/2021-11-21 \n", + "14 18 FR France 2021-11-08/2021-11-14 \n", + "15 18 FR France 2021-11-01/2021-11-07 \n", + "16 17 FR France 2021-10-25/2021-10-31 \n", + "17 19 FR France 2021-10-18/2021-10-24 \n", + "18 9 FR France 2021-10-11/2021-10-17 \n", + "19 10 FR France 2021-10-04/2021-10-10 \n", + "20 5 FR France 2021-09-27/2021-10-03 \n", + "21 10 FR France 2021-09-20/2021-09-26 \n", + "22 5 FR France 2021-09-13/2021-09-19 \n", + "23 8 FR France 2021-09-06/2021-09-12 \n", + "24 6 FR France 2021-08-30/2021-09-05 \n", + "25 4 FR France 2021-08-23/2021-08-29 \n", + "26 9 FR France 2021-08-16/2021-08-22 \n", + "27 9 FR France 2021-08-09/2021-08-15 \n", + "28 11 FR France 2021-08-02/2021-08-08 \n", + "29 16 FR France 2021-07-26/2021-08-01 \n", + "... ... ... ... ... \n", + "1599 42 FR France 1991-06-24/1991-06-30 \n", + "1600 38 FR France 1991-06-17/1991-06-23 \n", + "1601 39 FR France 1991-06-10/1991-06-16 \n", + "1602 29 FR France 1991-06-03/1991-06-09 \n", + "1603 37 FR France 1991-05-27/1991-06-02 \n", + "1604 36 FR France 1991-05-20/1991-05-26 \n", + "1605 45 FR France 1991-05-13/1991-05-19 \n", + "1606 39 FR France 1991-05-06/1991-05-12 \n", + "1607 51 FR France 1991-04-29/1991-05-05 \n", + "1608 32 FR France 1991-04-22/1991-04-28 \n", + "1609 34 FR France 1991-04-15/1991-04-21 \n", + "1610 32 FR France 1991-04-08/1991-04-14 \n", + "1611 30 FR France 1991-04-01/1991-04-07 \n", + "1612 23 FR France 1991-03-25/1991-03-31 \n", + "1613 25 FR France 1991-03-18/1991-03-24 \n", + "1614 35 FR France 1991-03-11/1991-03-17 \n", + "1615 38 FR France 1991-03-04/1991-03-10 \n", + "1616 33 FR France 1991-02-25/1991-03-03 \n", + "1617 31 FR France 1991-02-18/1991-02-24 \n", + "1618 29 FR France 1991-02-11/1991-02-17 \n", + "1619 26 FR France 1991-02-04/1991-02-10 \n", + "1620 25 FR France 1991-01-28/1991-02-03 \n", + "1621 20 FR France 1991-01-21/1991-01-27 \n", + "1622 36 FR France 1991-01-14/1991-01-20 \n", + "1623 38 FR France 1991-01-07/1991-01-13 \n", + "1624 36 FR France 1990-12-31/1991-01-06 \n", + "1625 45 FR France 1990-12-24/1990-12-30 \n", + "1626 43 FR France 1990-12-17/1990-12-23 \n", + "1627 28 FR France 1990-12-10/1990-12-16 \n", + "1628 5 FR France 1990-12-03/1990-12-09 \n", + "\n", + "[1629 rows x 11 columns]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def convert_week(year_and_week_int):\n", + " year_and_week_str = str(year_and_week_int)\n", + " year = int(year_and_week_str[:4])\n", + " week = int(year_and_week_str[4:])\n", + " w = isoweek.Week(year, week)\n", + " return pd.Period(w.day(0), 'W')\n", + "\n", + "raw_data['period'] = [convert_week(yw) for yw in raw_data['week']]\n", + "raw_data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Il restent deux petites modifications à faire.Premièrement, nous définissons les périodes d'observation comme nouvel index de notre jeux de données. Ceci en fait une suite chronologique, ce qui sera pratique par la suite.Deuxièmement, nous trions les points par période, dans le sens chronologique." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "sorted_data = raw_data.set_index('period').sort_index()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Nous vérifions la cohérence des données. Entre la fin d'une période et le début de la période qui suit, la différence temporelle doit être zéro, ou au moins très faible. Nous laissons une \"marge d'erreur\" d'une seconde. Ceci s'avère tout à fait juste." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "periods = sorted_data.index\n", + "for p1, p2 in zip(periods[:-1], periods[1:]):\n", + " delta = p2.to_timestamp() - p1.end_time\n", + " if delta > pd.Timedelta('1s'):\n", + " print(p1, p2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Nous pouvons maintenant commencer à regarder les données" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "sorted_data['inc'].plot()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Zoomons sur les dernières années afin de mieux voir des éventuels effets de saisonnalités. On observe bien une forme de saisonnalité avec des creux entre le printemps jusqu'au début de l'automne, et des pics en hiver (janvier-mars environ, selon les années). Notons que ces pics durent, contrairement au pic de la grippe par exemple (plus larges)." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "sorted_data['inc'][-300:].plot()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Etude de l'incidence annuelle" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Etant donné que le pic de l'épidémie se situe en hiver, à cheval entre deux années civiles, nous définissons la période de référence entre deux minima de l'incidence, du 1er septembre de l'année N au 1er septembre de l'année N+1.Notre tâche est un peu compliquée par le fait que l'année ne comporte pas un nombre entier de semaines. Nous modifions donc un peu nos périodes\n", + "de référence: à la place du 1er septembre de chaque année, nous utilisons le premier jour de la semaine qui contient le 1er septembre.Comme l'incidence de la varicelle est très faible en fin d'été, cette modification ne risque pas de fausser nos conclusions. Encore un petit détail: les données commencent fin 1990, ce qui rend la première année incomplète. Nous commençons donc l'analyse en 1991." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "first_sept_week = [pd.Period(pd.Timestamp(y, 9, 1), 'W')\n", + " for y in range(1991,\n", + " sorted_data.index[-1].year)]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "En partant de cette liste des semaines qui contiennent un 1er septembre, nous obtenons nos intervalles d'environ un an comme les périodes entre deux semaines adjacentes dans cette liste. Nous calculons les sommes des incidences hebdomadaires pour toutes ces périodes. Nous vérifions également que ces périodes contiennent entre 51 et 52 semaines, pour nous protéger contre des éventuelles erreurs dans notre code." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "year = []\n", + "yearly_incidence = []\n", + "for week1, week2 in zip(first_sept_week[:-1],\n", + " first_sept_week[1:]):\n", + " one_year = sorted_data['inc'][week1:week2-1]\n", + " assert abs(len(one_year)-52) < 2\n", + " yearly_incidence.append(one_year.sum())\n", + " year.append(week2.year)\n", + "yearly_incidence = pd.Series(data=yearly_incidence, index=year)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Voici les incidences annuelles." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "yearly_incidence.plot(style='*')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Une liste triée permet de plus facilement répérer les valeurs les plus élevées (à la fin). On observe que l'épidémie a été la plus forte en 2009, et la plus faible en 2020." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2020 221186\n", + "2021 376290\n", + "2002 516689\n", + "2018 542312\n", + "2017 551041\n", + "1996 564901\n", + "2019 584066\n", + "2015 604382\n", + "2000 617597\n", + "2001 619041\n", + "2012 624573\n", + "2005 628464\n", + "2006 632833\n", + "2011 642368\n", + "1993 643387\n", + "1995 652478\n", + "1994 661409\n", + "1998 677775\n", + "1997 683434\n", + "2014 685769\n", + "2013 698332\n", + "2007 717352\n", + "2008 749478\n", + "1999 756456\n", + "2003 758363\n", + "2004 777388\n", + "2016 782114\n", + "2010 829911\n", + "1992 832939\n", + "2009 842373\n", + "dtype: int64" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "yearly_incidence.sort_values()" + ] + } + ], "metadata": { "kernelspec": { "display_name": "Python 3", @@ -16,10 +2421,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.3" + "version": "3.6.4" } }, "nbformat": 4, "nbformat_minor": 2 } -