{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Analyse de l'incidence de la varicelle" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ " %matplotlib inline\n", "import matplotlib.pyplot as plt\n", "import pandas as pd\n", "import isoweek" ] }, { "cell_type": "markdown", "metadata": { "hideCode": true }, "source": [ " Les données de l'incidence de la varicelle sont disponibles du site Web du Réseau Sentinelles. Nous les récupérons sous forme d'un fichier en format CSV dont chaque ligne correspond à une semaine de la période demandée. Nous téléchargeons toujours le jeu de données complet, qui commence en 1984 et se termine avec une semaine récente." ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "data_url = \"http://www.sentiweb.fr/datasets/incidence-PAY-7.csv\"\n", "raw_data = pd.read_csv(data_url, skiprows=1)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
weekindicatorincinc_lowinc_upinc100inc100_lowinc100_upgeo_inseegeo_name
\n", "
" ], "text/plain": [ "Empty DataFrame\n", "Columns: [week, indicator, inc, inc_low, inc_up, inc100, inc100_low, inc100_up, geo_insee, geo_name]\n", "Index: []" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "raw_data[raw_data.isnull().any(axis=1)]" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
weekindicatorincinc_lowinc_upinc100inc100_lowinc100_upgeo_inseegeo_name
0202422711317733015304171123FRFrance
120242179807692612688151119FRFrance
22024207136611020917113201525FRFrance
320241971008364131375315921FRFrance
4202418713438951417362201426FRFrance
52024177153031121919387231729FRFrance
62024167181381354022736272034FRFrance
72024157249291731532543372648FRFrance
82024147161811254419818241929FRFrance
92024137183221420622438272133FRFrance
10202412712818912816508191325FRFrance
112024117159731240019546241929FRFrance
122024107143011076117841211626FRFrance
132024097143371087117803211626FRFrance
142024087158991199119807241830FRFrance
15202407711294822614362171222FRFrance
16202406712174902015328181323FRFrance
172024057881461101151813917FRFrance
1820240479504656612442141018FRFrance
19202403769484633926310713FRFrance
20202402771254852939811814FRFrance
21202401713305921417396201426FRFrance
22202352711636735415918181224FRFrance
23202351769124227959710614FRFrance
242023507879962151138313917FRFrance
252023497781753621027212816FRFrance
26202348773514749995311715FRFrance
27202347765374277879710713FRFrance
2820234675229297374858511FRFrance
2920234575007267573398412FRFrance
.................................
17181991267176081130423912312042FRFrance
17191991257161691070021638281838FRFrance
17201991247161711007122271281739FRFrance
1721199123711947767116223211329FRFrance
1722199122715452995320951271737FRFrance
1723199121714903897520831261636FRFrance
17241991207190531274225364342345FRFrance
17251991197167391124622232291939FRFrance
17261991187213851388228888382551FRFrance
1727199117713462887718047241632FRFrance
17281991167148571006819646261834FRFrance
1729199115713975978118169251832FRFrance
1730199114712265768416846221430FRFrance
173119911379567604113093171123FRFrance
1732199112710864733114397191325FRFrance
17331991117155741118419964271935FRFrance
17341991107166431137221914292038FRFrance
1735199109713741878018702241533FRFrance
1736199108713289881317765231531FRFrance
1737199107712337807716597221529FRFrance
1738199106710877701314741191226FRFrance
1739199105710442654414340181125FRFrance
17401991047791345631126314820FRFrance
17411991037153871048420290271836FRFrance
17421991027162771104621508292038FRFrance
17431991017155651027120859271836FRFrance
17441990527193751329525455342345FRFrance
17451990517190801380724353342543FRFrance
1746199050711079666015498201228FRFrance
17471990497114302610205FRFrance
\n", "

1748 rows × 10 columns

\n", "
" ], "text/plain": [ " week indicator inc inc_low inc_up inc100 inc100_low \\\n", "0 202422 7 11317 7330 15304 17 11 \n", "1 202421 7 9807 6926 12688 15 11 \n", "2 202420 7 13661 10209 17113 20 15 \n", "3 202419 7 10083 6413 13753 15 9 \n", "4 202418 7 13438 9514 17362 20 14 \n", "5 202417 7 15303 11219 19387 23 17 \n", "6 202416 7 18138 13540 22736 27 20 \n", "7 202415 7 24929 17315 32543 37 26 \n", "8 202414 7 16181 12544 19818 24 19 \n", "9 202413 7 18322 14206 22438 27 21 \n", "10 202412 7 12818 9128 16508 19 13 \n", "11 202411 7 15973 12400 19546 24 19 \n", "12 202410 7 14301 10761 17841 21 16 \n", "13 202409 7 14337 10871 17803 21 16 \n", "14 202408 7 15899 11991 19807 24 18 \n", "15 202407 7 11294 8226 14362 17 12 \n", "16 202406 7 12174 9020 15328 18 13 \n", "17 202405 7 8814 6110 11518 13 9 \n", "18 202404 7 9504 6566 12442 14 10 \n", "19 202403 7 6948 4633 9263 10 7 \n", "20 202402 7 7125 4852 9398 11 8 \n", "21 202401 7 13305 9214 17396 20 14 \n", "22 202352 7 11636 7354 15918 18 12 \n", "23 202351 7 6912 4227 9597 10 6 \n", "24 202350 7 8799 6215 11383 13 9 \n", "25 202349 7 7817 5362 10272 12 8 \n", "26 202348 7 7351 4749 9953 11 7 \n", "27 202347 7 6537 4277 8797 10 7 \n", "28 202346 7 5229 2973 7485 8 5 \n", "29 202345 7 5007 2675 7339 8 4 \n", "... ... ... ... ... ... ... ... \n", "1718 199126 7 17608 11304 23912 31 20 \n", "1719 199125 7 16169 10700 21638 28 18 \n", "1720 199124 7 16171 10071 22271 28 17 \n", "1721 199123 7 11947 7671 16223 21 13 \n", "1722 199122 7 15452 9953 20951 27 17 \n", "1723 199121 7 14903 8975 20831 26 16 \n", "1724 199120 7 19053 12742 25364 34 23 \n", "1725 199119 7 16739 11246 22232 29 19 \n", "1726 199118 7 21385 13882 28888 38 25 \n", "1727 199117 7 13462 8877 18047 24 16 \n", "1728 199116 7 14857 10068 19646 26 18 \n", "1729 199115 7 13975 9781 18169 25 18 \n", "1730 199114 7 12265 7684 16846 22 14 \n", "1731 199113 7 9567 6041 13093 17 11 \n", "1732 199112 7 10864 7331 14397 19 13 \n", "1733 199111 7 15574 11184 19964 27 19 \n", "1734 199110 7 16643 11372 21914 29 20 \n", "1735 199109 7 13741 8780 18702 24 15 \n", "1736 199108 7 13289 8813 17765 23 15 \n", "1737 199107 7 12337 8077 16597 22 15 \n", "1738 199106 7 10877 7013 14741 19 12 \n", "1739 199105 7 10442 6544 14340 18 11 \n", "1740 199104 7 7913 4563 11263 14 8 \n", "1741 199103 7 15387 10484 20290 27 18 \n", "1742 199102 7 16277 11046 21508 29 20 \n", "1743 199101 7 15565 10271 20859 27 18 \n", "1744 199052 7 19375 13295 25455 34 23 \n", "1745 199051 7 19080 13807 24353 34 25 \n", "1746 199050 7 11079 6660 15498 20 12 \n", "1747 199049 7 1143 0 2610 2 0 \n", "\n", " inc100_up geo_insee geo_name \n", "0 23 FR France \n", "1 19 FR France \n", "2 25 FR France \n", "3 21 FR France \n", "4 26 FR France \n", "5 29 FR France \n", "6 34 FR France \n", "7 48 FR France \n", "8 29 FR France \n", "9 33 FR France \n", "10 25 FR France \n", "11 29 FR France \n", "12 26 FR France \n", "13 26 FR France \n", "14 30 FR France \n", "15 22 FR France \n", "16 23 FR France \n", "17 17 FR France \n", "18 18 FR France \n", "19 13 FR France \n", "20 14 FR France \n", "21 26 FR France \n", "22 24 FR France \n", "23 14 FR France \n", "24 17 FR France \n", "25 16 FR France \n", "26 15 FR France \n", "27 13 FR France \n", "28 11 FR France \n", "29 12 FR France \n", "... ... ... ... \n", "1718 42 FR France \n", "1719 38 FR France \n", "1720 39 FR France \n", "1721 29 FR France \n", "1722 37 FR France \n", "1723 36 FR France \n", "1724 45 FR France \n", "1725 39 FR France \n", "1726 51 FR France \n", "1727 32 FR France \n", "1728 34 FR France \n", "1729 32 FR France \n", "1730 30 FR France \n", "1731 23 FR France \n", "1732 25 FR France \n", "1733 35 FR France \n", "1734 38 FR France \n", "1735 33 FR France \n", "1736 31 FR France \n", "1737 29 FR France \n", "1738 26 FR France \n", "1739 25 FR France \n", "1740 20 FR France \n", "1741 36 FR France \n", "1742 38 FR France \n", "1743 36 FR France \n", "1744 45 FR France \n", "1745 43 FR France \n", "1746 28 FR France \n", "1747 5 FR France \n", "\n", "[1748 rows x 10 columns]" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data = raw_data.dropna().copy()\n", "data" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "def convert_week(year_and_week_int):\n", " year_and_week_str = str(year_and_week_int)\n", " year = int(year_and_week_str[:4])\n", " week = int(year_and_week_str[4:])\n", " w = isoweek.Week(year, week)\n", " return pd.Period(w.day(0), 'W')" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "data['period'] = [convert_week(yw) for yw in data['week']]" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "sorted_data = data.set_index('period').sort_index()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "periods = sorted_data.index\n", "for p1, p2 in zip(periods[:-1], periods[1:]):\n", " delta = p2.to_timestamp() - p1.end_time\n", " if delta > pd.Timedelta('1s'):\n", " print(p1, p2)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "sorted_data['inc'].plot()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "sorted_data['inc'][-200:].plot()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "first_september_week = [pd.Period(pd.Timestamp(y, 9, 1), 'W') for y in range(1985, sorted_data.index[-1].year)]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "year = []\n", "yearly_incidence = []\n", "for week1, week2 in zip(first_september_week[:-1], first_september_week[1:]):\n", " one_year = sorted_data['inc'][(sorted_data.index >= week1.start_time) & (sorted_data.index < week2.start_time)]\n", " assert abs(len(one_year)-52) < 2\n", " yearly_incidence.append(one_year.sum())\n", " year.append(week2.year)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "yearly_incidence.plot(style='*')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "yearly_incidence.sort_values()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "yearly_incidence.hist(xrot=20)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.4" } }, "nbformat": 4, "nbformat_minor": 2 }