{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Analyse de l'incidence de la varicelle" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Chargement des données" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "%matplotlib inline\n", "import matplotlib.pyplot as plt\n", "import pandas as pd\n", "import isoweek" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "data_url = \"http://www.sentiweb.fr/datasets/incidence-PAY-7.csv\"" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "data_file = \"varicelle.csv\"\n", "\n", "import os\n", "import urllib.request\n", "if not os.path.exists(data_file):\n", " urllib.request.urlretrieve(data_url, data_file)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
weekindicatorincinc_lowinc_upinc100inc100_lowinc100_upgeo_inseegeo_name
020234474336185468187311FRFrance
12023437392416296219639FRFrance
220234273968121267246210FRFrance
32023417335617644948537FRFrance
42023407284514104280426FRFrance
5202339717396292849315FRFrance
6202338716632743052315FRFrance
7202337711222232021213FRFrance
82023367726101442102FRFrance
92023357961961826102FRFrance
102023347116892327204FRFrance
112023337330811845432528FRFrance
122023327799611201487212222FRFrance
132023317331813985238528FRFrance
1420233075821326983739513FRFrance
15202329713558829718819201228FRFrance
16202328767004043935710614FRFrance
17202327772534599990711715FRFrance
1820232679192622312161141018FRFrance
19202325711498825714739171222FRFrance
20202324711115796814262171222FRFrance
2120232371256361341899219929FRFrance
22202322712184812516243181224FRFrance
23202321711349759815100171123FRFrance
242023207900046151338514721FRFrance
252023197934460911259714919FRFrance
26202318710671729114051161121FRFrance
272023177918461621220614919FRFrance
28202316711387801414760171222FRFrance
29202315714040761320467211131FRFrance
.................................
16881991267176081130423912312042FRFrance
16891991257161691070021638281838FRFrance
16901991247161711007122271281739FRFrance
1691199123711947767116223211329FRFrance
1692199122715452995320951271737FRFrance
1693199121714903897520831261636FRFrance
16941991207190531274225364342345FRFrance
16951991197167391124622232291939FRFrance
16961991187213851388228888382551FRFrance
1697199117713462887718047241632FRFrance
16981991167148571006819646261834FRFrance
1699199115713975978118169251832FRFrance
1700199114712265768416846221430FRFrance
170119911379567604113093171123FRFrance
1702199112710864733114397191325FRFrance
17031991117155741118419964271935FRFrance
17041991107166431137221914292038FRFrance
1705199109713741878018702241533FRFrance
1706199108713289881317765231531FRFrance
1707199107712337807716597221529FRFrance
1708199106710877701314741191226FRFrance
1709199105710442654414340181125FRFrance
17101991047791345631126314820FRFrance
17111991037153871048420290271836FRFrance
17121991027162771104621508292038FRFrance
17131991017155651027120859271836FRFrance
17141990527193751329525455342345FRFrance
17151990517190801380724353342543FRFrance
1716199050711079666015498201228FRFrance
17171990497114302610205FRFrance
\n", "

1718 rows × 10 columns

\n", "
" ], "text/plain": [ " week indicator inc inc_low inc_up inc100 inc100_low \\\n", "0 202344 7 4336 1854 6818 7 3 \n", "1 202343 7 3924 1629 6219 6 3 \n", "2 202342 7 3968 1212 6724 6 2 \n", "3 202341 7 3356 1764 4948 5 3 \n", "4 202340 7 2845 1410 4280 4 2 \n", "5 202339 7 1739 629 2849 3 1 \n", "6 202338 7 1663 274 3052 3 1 \n", "7 202337 7 1122 223 2021 2 1 \n", "8 202336 7 726 10 1442 1 0 \n", "9 202335 7 961 96 1826 1 0 \n", "10 202334 7 1168 9 2327 2 0 \n", "11 202333 7 3308 1184 5432 5 2 \n", "12 202332 7 7996 1120 14872 12 2 \n", "13 202331 7 3318 1398 5238 5 2 \n", "14 202330 7 5821 3269 8373 9 5 \n", "15 202329 7 13558 8297 18819 20 12 \n", "16 202328 7 6700 4043 9357 10 6 \n", "17 202327 7 7253 4599 9907 11 7 \n", "18 202326 7 9192 6223 12161 14 10 \n", "19 202325 7 11498 8257 14739 17 12 \n", "20 202324 7 11115 7968 14262 17 12 \n", "21 202323 7 12563 6134 18992 19 9 \n", "22 202322 7 12184 8125 16243 18 12 \n", "23 202321 7 11349 7598 15100 17 11 \n", "24 202320 7 9000 4615 13385 14 7 \n", "25 202319 7 9344 6091 12597 14 9 \n", "26 202318 7 10671 7291 14051 16 11 \n", "27 202317 7 9184 6162 12206 14 9 \n", "28 202316 7 11387 8014 14760 17 12 \n", "29 202315 7 14040 7613 20467 21 11 \n", "... ... ... ... ... ... ... ... \n", "1688 199126 7 17608 11304 23912 31 20 \n", "1689 199125 7 16169 10700 21638 28 18 \n", "1690 199124 7 16171 10071 22271 28 17 \n", "1691 199123 7 11947 7671 16223 21 13 \n", "1692 199122 7 15452 9953 20951 27 17 \n", "1693 199121 7 14903 8975 20831 26 16 \n", "1694 199120 7 19053 12742 25364 34 23 \n", "1695 199119 7 16739 11246 22232 29 19 \n", "1696 199118 7 21385 13882 28888 38 25 \n", "1697 199117 7 13462 8877 18047 24 16 \n", "1698 199116 7 14857 10068 19646 26 18 \n", "1699 199115 7 13975 9781 18169 25 18 \n", "1700 199114 7 12265 7684 16846 22 14 \n", "1701 199113 7 9567 6041 13093 17 11 \n", "1702 199112 7 10864 7331 14397 19 13 \n", "1703 199111 7 15574 11184 19964 27 19 \n", "1704 199110 7 16643 11372 21914 29 20 \n", "1705 199109 7 13741 8780 18702 24 15 \n", "1706 199108 7 13289 8813 17765 23 15 \n", "1707 199107 7 12337 8077 16597 22 15 \n", "1708 199106 7 10877 7013 14741 19 12 \n", "1709 199105 7 10442 6544 14340 18 11 \n", "1710 199104 7 7913 4563 11263 14 8 \n", "1711 199103 7 15387 10484 20290 27 18 \n", "1712 199102 7 16277 11046 21508 29 20 \n", "1713 199101 7 15565 10271 20859 27 18 \n", "1714 199052 7 19375 13295 25455 34 23 \n", "1715 199051 7 19080 13807 24353 34 25 \n", "1716 199050 7 11079 6660 15498 20 12 \n", "1717 199049 7 1143 0 2610 2 0 \n", "\n", " inc100_up geo_insee geo_name \n", "0 11 FR France \n", "1 9 FR France \n", "2 10 FR France \n", "3 7 FR France \n", "4 6 FR France \n", "5 5 FR France \n", "6 5 FR France \n", "7 3 FR France \n", "8 2 FR France \n", "9 2 FR France \n", "10 4 FR France \n", "11 8 FR France \n", "12 22 FR France \n", "13 8 FR France \n", "14 13 FR France \n", "15 28 FR France \n", "16 14 FR France \n", "17 15 FR France \n", "18 18 FR France \n", "19 22 FR France \n", "20 22 FR France \n", "21 29 FR France \n", "22 24 FR France \n", "23 23 FR France \n", "24 21 FR France \n", "25 19 FR France \n", "26 21 FR France \n", "27 19 FR France \n", "28 22 FR France \n", "29 31 FR France \n", "... ... ... ... \n", "1688 42 FR France \n", "1689 38 FR France \n", "1690 39 FR France \n", "1691 29 FR France \n", "1692 37 FR France \n", "1693 36 FR France \n", "1694 45 FR France \n", "1695 39 FR France \n", "1696 51 FR France \n", "1697 32 FR France \n", "1698 34 FR France \n", "1699 32 FR France \n", "1700 30 FR France \n", "1701 23 FR France \n", "1702 25 FR France \n", "1703 35 FR France \n", "1704 38 FR France \n", "1705 33 FR France \n", "1706 31 FR France \n", "1707 29 FR France \n", "1708 26 FR France \n", "1709 25 FR France \n", "1710 20 FR France \n", "1711 36 FR France \n", "1712 38 FR France \n", "1713 36 FR France \n", "1714 45 FR France \n", "1715 43 FR France \n", "1716 28 FR France \n", "1717 5 FR France \n", "\n", "[1718 rows x 10 columns]" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "raw_data = pd.read_csv(data_file, encoding = 'iso-8859-1', skiprows=1)\n", "raw_data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Vérification des données" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Les données sont chargées, on vérifie s'il y a des anomalies." ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
weekindicatorincinc_lowinc_upinc100inc100_lowinc100_upgeo_inseegeo_name
\n", "
" ], "text/plain": [ "Empty DataFrame\n", "Columns: [week, indicator, inc, inc_low, inc_up, inc100, inc100_low, inc100_up, geo_insee, geo_name]\n", "Index: []" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "raw_data[raw_data.isnull().any(axis=1)]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Ça a l'air bon..." ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "data = raw_data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Conversion format date avec isoweek" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "def convert_week(year_and_week_int):\n", " year_and_week_str = str(year_and_week_int)\n", " year = int(year_and_week_str[:4])\n", " week = int(year_and_week_str[4:])\n", " w = isoweek.Week(year, week)\n", " return pd.Period(w.day(0), 'W')\n", "\n", "data['period'] = [convert_week(yw) for yw in data['week']]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Il reste deux petites modifications à faire. Premièrement, nous définissons les périodes d'observation\n", "comme nouvel index de notre jeux de données. Ceci en fait une suite chronologique, ce qui sera pratique par la suite. Deuxièmement, nous trions les points par période, dans le sens chronologique." ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "sorted_data = data.set_index('period').sort_index()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Cohérence temporelle des données (marge de 1s)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "periods = sorted_data.index\n", "for p1, p2 in zip(periods[:-1], periods[1:]):\n", " delta = p2.to_timestamp() - p1.end_time\n", " if delta > pd.Timedelta('1s'):\n", " print(p1, p2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Tout roule..." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Analyse" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "sorted_data['inc'].plot()" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "first_september_week = [pd.Period(pd.Timestamp(y, 9, 1), 'W') for y in range(1991, sorted_data.index[-1].year)]" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "year = []\n", "yearly_incidence = []\n", "for week1, week2 in zip(first_september_week[:-1],\n", " first_september_week[1:]):\n", " one_year = sorted_data['inc'][week1:week2-1]\n", " assert abs(len(one_year)-52) < 2\n", " yearly_incidence.append(one_year.sum())\n", " year.append(week2.year)\n", "yearly_incidence = pd.Series(data=yearly_incidence, index=year)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "yearly_incidence.plot(style='*')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Plus faible : 2020, plus forte : 2009**" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.4" } }, "nbformat": 4, "nbformat_minor": 2 }