From 88f49a32e9454c342dcaa5b1bdc17b553f222fbd Mon Sep 17 00:00:00 2001 From: 4c0611008db969a4dbfc8fda2f0d9d72 <4c0611008db969a4dbfc8fda2f0d9d72@app-learninglab.inria.fr> Date: Fri, 18 Aug 2023 12:55:05 +0000 Subject: [PATCH] exo2 module3 --- module3/exo2/exercice.ipynb | 1385 ++++++++++++++++++++++++++++++++++- 1 file changed, 1382 insertions(+), 3 deletions(-) diff --git a/module3/exo2/exercice.ipynb b/module3/exo2/exercice.ipynb index 0bbbe37..562f54b 100644 --- a/module3/exo2/exercice.ipynb +++ b/module3/exo2/exercice.ipynb @@ -1,5 +1,1385 @@ { - "cells": [], + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Analyse incidence Varicelle" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "import matplotlib.pyplot as plt\n", + "import pandas as pd\n", + "import numpy as np\n", + "import isoweek" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Les données de l'incidence de la varicelle sont disponibles sur le [site Web du Réseau Sentinelles](https://sentiweb.fr/). Nous les récupérons sous forme d'un fichier en format CSV dont chaque ligne correspond à une semaine de la période demandée. Nous téléchargeons toujours le jeu de données complet, qui commence en 1984 et se termine avec une semaine récente." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "data_url = \"https://www.sentiweb.fr/datasets/incidence-PAY-7.csv?v=3m0ly\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Voici l'explication des colonnes données [sur le site d'origine](https://ns.sentiweb.fr/incidence/csv-schema-v1.json):\n", + "\n", + "| Nom de colonne | Libellé de colonne |\n", + "|----------------|-----------------------------------------------------------------------------------------------------------------------------------|\n", + "| week | Semaine calendaire (ISO 8601) |\n", + "| indicator | Code de l'indicateur de surveillance |\n", + "| inc | Estimation de l'incidence de consultations en nombre de cas |\n", + "| inc_low | Estimation de la borne inférieure de l'IC95% du nombre de cas de consultation |\n", + "| inc_up | Estimation de la borne supérieure de l'IC95% du nombre de cas de consultation |\n", + "| inc100 | Estimation du taux d'incidence du nombre de cas de consultation (en cas pour 100,000 habitants) |\n", + "| inc100_low | Estimation de la borne inférieure de l'IC95% du taux d'incidence du nombre de cas de consultation (en cas pour 100,000 habitants) |\n", + "| inc100_up | Estimation de la borne supérieure de l'IC95% du taux d'incidence du nombre de cas de consultation (en cas pour 100,000 habitants) |\n", + "| geo_insee | Code de la zone géographique concernée (Code INSEE) http://www.insee.fr/fr/methodes/nomenclatures/cog/ |\n", + "| geo_name | Libellé de la zone géographique (ce libellé peut être modifié sans préavis) |\n", + "\n", + "La première ligne du fichier CSV est un commentaire, que nous ignorons en précisant `skiprows=1`." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
weekindicatorincinc_lowinc_upinc100inc100_lowinc100_upgeo_inseegeo_name
02023327936021481657214325FRFrance
12023317333514055265528FRFrance
220233075821326983739513FRFrance
3202329713558829718819201228FRFrance
4202328767004043935710614FRFrance
5202327772534599990711715FRFrance
620232679192622312161141018FRFrance
7202325711498825714739171222FRFrance
8202324711115796814262171222FRFrance
920232371256361341899219929FRFrance
10202322712184812516243181224FRFrance
11202321711349759815100171123FRFrance
122023207900046151338514721FRFrance
132023197934460911259714919FRFrance
14202318710671729114051161121FRFrance
152023177918461621220614919FRFrance
16202316711387801414760171222FRFrance
17202315714040761320467211131FRFrance
182023147152471103219462231729FRFrance
19202313713322970016944201525FRFrance
20202312710374721813530161121FRFrance
2120231174919288069587410FRFrance
2220231074854273169777410FRFrance
23202309770044548946011715FRFrance
242023087817553161103412816FRFrance
25202307765953782940810614FRFrance
262023067959560171317314919FRFrance
2720230576237390785679513FRFrance
2820230476299397386259612FRFrance
2920230376063379883289612FRFrance
.................................
16761991267176081130423912312042FRFrance
16771991257161691070021638281838FRFrance
16781991247161711007122271281739FRFrance
1679199123711947767116223211329FRFrance
1680199122715452995320951271737FRFrance
1681199121714903897520831261636FRFrance
16821991207190531274225364342345FRFrance
16831991197167391124622232291939FRFrance
16841991187213851388228888382551FRFrance
1685199117713462887718047241632FRFrance
16861991167148571006819646261834FRFrance
1687199115713975978118169251832FRFrance
1688199114712265768416846221430FRFrance
168919911379567604113093171123FRFrance
1690199112710864733114397191325FRFrance
16911991117155741118419964271935FRFrance
16921991107166431137221914292038FRFrance
1693199109713741878018702241533FRFrance
1694199108713289881317765231531FRFrance
1695199107712337807716597221529FRFrance
1696199106710877701314741191226FRFrance
1697199105710442654414340181125FRFrance
16981991047791345631126314820FRFrance
16991991037153871048420290271836FRFrance
17001991027162771104621508292038FRFrance
17011991017155651027120859271836FRFrance
17021990527193751329525455342345FRFrance
17031990517190801380724353342543FRFrance
1704199050711079666015498201228FRFrance
17051990497114302610205FRFrance
\n", + "

1706 rows × 10 columns

\n", + "
" + ], + "text/plain": [ + " week indicator inc inc_low inc_up inc100 inc100_low \\\n", + "0 202332 7 9360 2148 16572 14 3 \n", + "1 202331 7 3335 1405 5265 5 2 \n", + "2 202330 7 5821 3269 8373 9 5 \n", + "3 202329 7 13558 8297 18819 20 12 \n", + "4 202328 7 6700 4043 9357 10 6 \n", + "5 202327 7 7253 4599 9907 11 7 \n", + "6 202326 7 9192 6223 12161 14 10 \n", + "7 202325 7 11498 8257 14739 17 12 \n", + "8 202324 7 11115 7968 14262 17 12 \n", + "9 202323 7 12563 6134 18992 19 9 \n", + "10 202322 7 12184 8125 16243 18 12 \n", + "11 202321 7 11349 7598 15100 17 11 \n", + "12 202320 7 9000 4615 13385 14 7 \n", + "13 202319 7 9344 6091 12597 14 9 \n", + "14 202318 7 10671 7291 14051 16 11 \n", + "15 202317 7 9184 6162 12206 14 9 \n", + "16 202316 7 11387 8014 14760 17 12 \n", + "17 202315 7 14040 7613 20467 21 11 \n", + "18 202314 7 15247 11032 19462 23 17 \n", + "19 202313 7 13322 9700 16944 20 15 \n", + "20 202312 7 10374 7218 13530 16 11 \n", + "21 202311 7 4919 2880 6958 7 4 \n", + "22 202310 7 4854 2731 6977 7 4 \n", + "23 202309 7 7004 4548 9460 11 7 \n", + "24 202308 7 8175 5316 11034 12 8 \n", + "25 202307 7 6595 3782 9408 10 6 \n", + "26 202306 7 9595 6017 13173 14 9 \n", + "27 202305 7 6237 3907 8567 9 5 \n", + "28 202304 7 6299 3973 8625 9 6 \n", + "29 202303 7 6063 3798 8328 9 6 \n", + "... ... ... ... ... ... ... ... \n", + "1676 199126 7 17608 11304 23912 31 20 \n", + "1677 199125 7 16169 10700 21638 28 18 \n", + "1678 199124 7 16171 10071 22271 28 17 \n", + "1679 199123 7 11947 7671 16223 21 13 \n", + "1680 199122 7 15452 9953 20951 27 17 \n", + "1681 199121 7 14903 8975 20831 26 16 \n", + "1682 199120 7 19053 12742 25364 34 23 \n", + "1683 199119 7 16739 11246 22232 29 19 \n", + "1684 199118 7 21385 13882 28888 38 25 \n", + "1685 199117 7 13462 8877 18047 24 16 \n", + "1686 199116 7 14857 10068 19646 26 18 \n", + "1687 199115 7 13975 9781 18169 25 18 \n", + "1688 199114 7 12265 7684 16846 22 14 \n", + "1689 199113 7 9567 6041 13093 17 11 \n", + "1690 199112 7 10864 7331 14397 19 13 \n", + "1691 199111 7 15574 11184 19964 27 19 \n", + "1692 199110 7 16643 11372 21914 29 20 \n", + "1693 199109 7 13741 8780 18702 24 15 \n", + "1694 199108 7 13289 8813 17765 23 15 \n", + "1695 199107 7 12337 8077 16597 22 15 \n", + "1696 199106 7 10877 7013 14741 19 12 \n", + "1697 199105 7 10442 6544 14340 18 11 \n", + "1698 199104 7 7913 4563 11263 14 8 \n", + "1699 199103 7 15387 10484 20290 27 18 \n", + "1700 199102 7 16277 11046 21508 29 20 \n", + "1701 199101 7 15565 10271 20859 27 18 \n", + "1702 199052 7 19375 13295 25455 34 23 \n", + "1703 199051 7 19080 13807 24353 34 25 \n", + "1704 199050 7 11079 6660 15498 20 12 \n", + "1705 199049 7 1143 0 2610 2 0 \n", + "\n", + " inc100_up geo_insee geo_name \n", + "0 25 FR France \n", + "1 8 FR France \n", + "2 13 FR France \n", + "3 28 FR France \n", + "4 14 FR France \n", + "5 15 FR France \n", + "6 18 FR France \n", + "7 22 FR France \n", + "8 22 FR France \n", + "9 29 FR France \n", + "10 24 FR France \n", + "11 23 FR France \n", + "12 21 FR France \n", + "13 19 FR France \n", + "14 21 FR France \n", + "15 19 FR France \n", + "16 22 FR France \n", + "17 31 FR France \n", + "18 29 FR France \n", + "19 25 FR France \n", + "20 21 FR France \n", + "21 10 FR France \n", + "22 10 FR France \n", + "23 15 FR France \n", + "24 16 FR France \n", + "25 14 FR France \n", + "26 19 FR France \n", + "27 13 FR France \n", + "28 12 FR France \n", + "29 12 FR France \n", + "... ... ... ... \n", + "1676 42 FR France \n", + "1677 38 FR France \n", + "1678 39 FR France \n", + "1679 29 FR France \n", + "1680 37 FR France \n", + "1681 36 FR France \n", + "1682 45 FR France \n", + "1683 39 FR France \n", + "1684 51 FR France \n", + "1685 32 FR France \n", + "1686 34 FR France \n", + "1687 32 FR France \n", + "1688 30 FR France \n", + "1689 23 FR France \n", + "1690 25 FR France \n", + "1691 35 FR France \n", + "1692 38 FR France \n", + "1693 33 FR France \n", + "1694 31 FR France \n", + "1695 29 FR France \n", + "1696 26 FR France \n", + "1697 25 FR France \n", + "1698 20 FR France \n", + "1699 36 FR France \n", + "1700 38 FR France \n", + "1701 36 FR France \n", + "1702 45 FR France \n", + "1703 43 FR France \n", + "1704 28 FR France \n", + "1705 5 FR France \n", + "\n", + "[1706 rows x 10 columns]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "raw_data = pd.read_csv(data_url, skiprows=1)\n", + "raw_data" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(array([], dtype=int64), array([], dtype=int64))" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.where( raw_data.isnull() )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Pas de valeur nulle!" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "data = raw_data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "os données utilisent une convention inhabituelle: le numéro de semaine est collé à l'année, donnant l'impression qu'il s'agit de nombre entier. C'est comme ça que Pandas les interprète.\n", + "\n", + "Un deuxième problème est que Pandas ne comprend pas les numéros de semaine. Il faut lui fournir les dates de début et de fin de semaine. Nous utilisons pour cela la bibliothèque isoweek.\n", + "\n", + "Comme la conversion des semaines est devenu assez complexe, nous écrivons une petite fonction Python pour cela. Ensuite, nous l'appliquons à tous les points de nos donnés. Les résultats vont dans une nouvelle colonne 'period'.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "def convert_week(year_and_week_int):\n", + "\n", + " year_and_week_str = str(year_and_week_int)\n", + "\n", + " year = int(year_and_week_str[:4])\n", + "\n", + " week = int(year_and_week_str[4:])\n", + "\n", + " w = isoweek.Week(year, week)\n", + "\n", + " return pd.Period(w.day(0), 'W')\n", + "\n", + "\n", + "data['period'] = [convert_week(yw) for yw in data['week']]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Il restent deux petites modifications à faire.\n", + "\n", + "Premièrement, nous définissons les périodes d'observation comme nouvel index de notre jeux de données. Ceci en fait une suite chronologique, ce qui sera pratique par la suite.\n", + "\n", + "Deuxièmement, nous trions les points par période, dans le sens chronologique.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "sorted_data = data.set_index('period').sort_index()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Nous vérifions la cohérence des données. Entre la fin d'une période et le début de la période qui suit, la différence temporelle doit être zéro, ou au moins très faible. Nous laissons une \"marge d'erreur\" d'une seconde.\n", + "\n", + "Ceci s'avère tout à fait juste." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "periods = sorted_data.index\n", + "for p1, p2 in zip(periods[:-1], periods[1:]):\n", + " delta = p2.to_timestamp() - p1.end_time\n", + " if delta > pd.Timedelta('1s'):\n", + " print(p1, p2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Un premier regard sur les données !" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "sorted_data['inc'].plot()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Un zoom sur les dernières années montre mieux la situation des pics en hiver. Le creux des incidences se trouve en septembre." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "sorted_data['inc'][-100:].plot()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Etude de l'incidence annuelle" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Etant donné que le pic de l'épidémie se situe en hiver, à cheval entre deux années civiles, nous définissons la période de référence entre deux minima de l'incidence, du 1er septembre de l'année 𝑁 au 1er septembre de l'année 𝑁+1.\n", + "\n", + "Notre tâche est un peu compliquée par le fait que l'année ne comporte pas un nombre entier de semaines. Nous modifions donc un peu nos périodes de référence: à la place du 1er septembre de chaque année, nous utilisons le premier jour de la semaine qui contient le 1er septembre.\n", + "\n", + "Comme l'incidence de la varicelle est très faible en été, cette modification ne risque pas de fausser nos conclusions." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "first_september_week = [pd.Period(pd.Timestamp(y, 9, 1), 'W')\n", + " for y in range(1991,\n", + " sorted_data.index[-1].year)]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "En partant de cette liste des semaines qui contiennent un 1er août, nous obtenons nos intervalles d'environ un an comme les périodes entre deux semaines adjacentes dans cette liste. Nous calculons les sommes des incidences hebdomadaires pour toutes ces périodes.\n", + "\n", + "Nous vérifions également que ces périodes contiennent entre 51 et 52 semaines, pour nous protéger contre des éventuelles erreurs dans notre code.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [], + "source": [ + "year = []\n", + "yearly_incidence = []\n", + "pbs = []\n", + "for week1, week2 in zip(first_september_week[:-1],\n", + " first_september_week[1:]):\n", + " one_year = sorted_data['inc'][week1:week2-1]\n", + " if abs(len(one_year)-52) > 1:\n", + " pbs.append((one_year, abs(len(one_year)-52)))\n", + " yearly_incidence.append(one_year.sum())\n", + " year.append(week2.year)\n", + "assert len(pbs) == 0\n", + "yearly_incidence = pd.Series(data=yearly_incidence, index=year)" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "yearly_incidence.plot(style='*')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Une liste triée permet de plus facilement répérer les valeurs les plus élevées (à la fin)." + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2020 221186\n", + "2021 376290\n", + "2002 516689\n", + "2018 542312\n", + "2017 551041\n", + "1996 564901\n", + "2019 584066\n", + "2015 604382\n", + "2000 617597\n", + "2001 619041\n", + "2012 624573\n", + "2005 628464\n", + "2006 632833\n", + "2022 641397\n", + "2011 642368\n", + "1993 643387\n", + "1995 652478\n", + "1994 661409\n", + "1998 677775\n", + "1997 683434\n", + "2014 685769\n", + "2013 698332\n", + "2007 717352\n", + "2008 749478\n", + "1999 756456\n", + "2003 758363\n", + "2004 777388\n", + "2016 782114\n", + "2010 829911\n", + "1992 832939\n", + "2009 842373\n", + "dtype: int64" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "yearly_incidence.sort_values()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], "metadata": { "kernelspec": { "display_name": "Python 3", @@ -16,10 +1396,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.3" + "version": "3.6.4" } }, "nbformat": 4, "nbformat_minor": 2 } - -- 2.18.1