diff --git a/module3/exo1/influenza-like-illness-analysis_Module 3_mission.ipynb b/module3/exo1/influenza-like-illness-analysis_Module 3_mission.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..0ad3b688990e018ea9d743bada13a5737ba2efd3 --- /dev/null +++ b/module3/exo1/influenza-like-illness-analysis_Module 3_mission.ipynb @@ -0,0 +1,1438 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Incidence of influenza-like illness in France" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "import matplotlib.pyplot as plt\n", + "import pandas as pd\n", + "import isoweek\n", + "import os" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The data on the incidence of influenza-like illness are available from the Web site of the [Réseau Sentinelles](http://www.sentiweb.fr/). We download them as a file in CSV format, in which each line corresponds to a week in the observation period. Only the complete dataset, starting in 1984 and ending with a recent week, is available for download." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "data_url = \"http://www.sentiweb.fr/datasets/incidence-PAY-3.csv\"\n", + "filename=\"inc-3-PAY-ds2.csv\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is the documentation of the data from [the download site](https://ns.sentiweb.fr/incidence/csv-schema-v1.json):\n", + "\n", + "| Column name | Description |\n", + "|--------------|---------------------------------------------------------------------------------------------------------------------------|\n", + "| `week` | ISO8601 Yearweek number as numeric (year times 100 + week nubmer) |\n", + "| `indicator` | Unique identifier of the indicator, see metadata document https://www.sentiweb.fr/meta.json |\n", + "| `inc` | Estimated incidence value for the time step, in the geographic level |\n", + "| `inc_low` | Lower bound of the estimated incidence 95% Confidence Interval |\n", + "| `inc_up` | Upper bound of the estimated incidence 95% Confidence Interval |\n", + "| `inc100` | Estimated rate incidence per 100,000 inhabitants |\n", + "| `inc100_low` | Lower bound of the estimated incidence 95% Confidence Interval |\n", + "| `inc100_up` | Upper bound of the estimated rate incidence 95% Confidence Interval |\n", + "| `geo_insee` | Identifier of the geographic area, from INSEE https://www.insee.fr |\n", + "| `geo_name` | Geographic label of the area, corresponding to INSEE code. This label is not an id and is only provided for human reading |\n", + "\n", + "The first line of the CSV file is a comment, which we ignore with `skip=1`." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Download the data to a local file" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To ensure reproducibility, the data link and a save name for a copy of the dataset are defined above. This next block checks if the local file is already available, to avoid repeated downloads." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "if not os.path.exists(filename):\n", + " raw_data = pd.read_csv(data_url, skiprows=1)\n", + " raw_data\n", + "else:\n", + " raw_data=pd.read_csv(filename, index=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Are there missing data points? Check if there are any empty cells." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
weekindicatorincinc_lowinc_upinc100inc100_lowinc100_upgeo_inseegeo_name
18831989193-NaNNaN-NaNNaNFRFrance
\n", + "
" + ], + "text/plain": [ + " week indicator inc inc_low inc_up inc100 inc100_low inc100_up \\\n", + "1883 198919 3 - NaN NaN - NaN NaN \n", + "\n", + " geo_insee geo_name \n", + "1883 FR France " + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "absent=raw_data[raw_data.isnull().any(axis=1)]\n", + "absent" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Yes, week 19 of year 1989 does not have any observed values." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "raw_data[raw_data.isnull().any(axis=1)]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We delete this point, which does not have big consequence for our rather simple analysis." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
weekindicatorincinc_lowinc_upinc100inc100_lowinc100_upgeo_inseegeo_name
020252432281617621.028011.03426.042.0FRFrance
120252332456419382.029746.03729.045.0FRFrance
220252231875514333.023177.02821.035.0FRFrance
320252132376018671.028849.03527.043.0FRFrance
420252032026515814.024716.03023.037.0FRFrance
520251931626412394.020134.02418.030.0FRFrance
620251831811513975.022255.02721.033.0FRFrance
720251732215017291.027009.03326.040.0FRFrance
820251632856422550.034578.04334.052.0FRFrance
920251533572129592.041850.05344.062.0FRFrance
1020251433757931232.043926.05647.065.0FRFrance
1120251333967333686.045660.05950.068.0FRFrance
1220251235254345627.059459.07868.088.0FRFrance
1320251135946952154.066784.08978.0100.0FRFrance
1420251036033453048.067620.09079.0101.0FRFrance
1520250938453174994.094068.0126112.0140.0FRFrance
162025083136020124824.0147216.0203186.0220.0FRFrance
172025073208952195988.0221916.0312293.0331.0FRFrance
182025063273519258159.0288879.0408385.0431.0FRFrance
192025053334395318416.0350374.0499475.0523.0FRFrance
202025043350043332885.0367201.0522496.0548.0FRFrance
212025033252772238917.0266627.0377356.0398.0FRFrance
222025023257247242991.0271503.0384363.0405.0FRFrance
232025013231549214627.0248471.0345320.0370.0FRFrance
242024523201726185870.0217582.0302278.0326.0FRFrance
252024513201697187843.0215551.0302281.0323.0FRFrance
262024503136694126369.0147019.0205190.0220.0FRFrance
27202449310848799037.0117937.0163149.0177.0FRFrance
2820244838738178687.096075.0131118.0144.0FRFrance
2920244737628667626.084946.0114101.0127.0FRFrance
.................................
209019852132609619621.032571.04735.059.0FRFrance
209119852032789620885.034907.05138.064.0FRFrance
209219851934315432821.053487.07859.097.0FRFrance
209319851834055529935.051175.07455.093.0FRFrance
209419851733405324366.043740.06244.080.0FRFrance
209519851635036236451.064273.09166.0116.0FRFrance
209619851536388145538.082224.011683.0149.0FRFrance
20971985143134545114400.0154690.0244207.0281.0FRFrance
20981985133197206176080.0218332.0357319.0395.0FRFrance
20991985123245240223304.0267176.0445405.0485.0FRFrance
21001985113276205252399.0300011.0501458.0544.0FRFrance
21011985103353231326279.0380183.0640591.0689.0FRFrance
21021985093369895341109.0398681.0670618.0722.0FRFrance
21031985083389886359529.0420243.0707652.0762.0FRFrance
21041985073471852432599.0511105.0855784.0926.0FRFrance
21051985063565825518011.0613639.01026939.01113.0FRFrance
21061985053637302592795.0681809.011551074.01236.0FRFrance
21071985043424937390794.0459080.0770708.0832.0FRFrance
21081985033213901174689.0253113.0388317.0459.0FRFrance
210919850239758680949.0114223.0177147.0207.0FRFrance
211019850138548965918.0105060.0155120.0190.0FRFrance
211119845238483060602.0109058.0154110.0198.0FRFrance
2112198451310172680242.0123210.0185146.0224.0FRFrance
21131984503123680101401.0145959.0225184.0266.0FRFrance
2114198449310107381684.0120462.0184149.0219.0FRFrance
211519844837862060634.096606.0143110.0176.0FRFrance
211619844737202954274.089784.013199.0163.0FRFrance
211719844638733067686.0106974.0159123.0195.0FRFrance
21181984453135223101414.0169032.0246184.0308.0FRFrance
211919844436842220056.0116788.012537.0213.0FRFrance
\n", + "

2119 rows × 10 columns

\n", + "
" + ], + "text/plain": [ + " week indicator inc inc_low inc_up inc100 inc100_low \\\n", + "0 202524 3 22816 17621.0 28011.0 34 26.0 \n", + "1 202523 3 24564 19382.0 29746.0 37 29.0 \n", + "2 202522 3 18755 14333.0 23177.0 28 21.0 \n", + "3 202521 3 23760 18671.0 28849.0 35 27.0 \n", + "4 202520 3 20265 15814.0 24716.0 30 23.0 \n", + "5 202519 3 16264 12394.0 20134.0 24 18.0 \n", + "6 202518 3 18115 13975.0 22255.0 27 21.0 \n", + "7 202517 3 22150 17291.0 27009.0 33 26.0 \n", + "8 202516 3 28564 22550.0 34578.0 43 34.0 \n", + "9 202515 3 35721 29592.0 41850.0 53 44.0 \n", + "10 202514 3 37579 31232.0 43926.0 56 47.0 \n", + "11 202513 3 39673 33686.0 45660.0 59 50.0 \n", + "12 202512 3 52543 45627.0 59459.0 78 68.0 \n", + "13 202511 3 59469 52154.0 66784.0 89 78.0 \n", + "14 202510 3 60334 53048.0 67620.0 90 79.0 \n", + "15 202509 3 84531 74994.0 94068.0 126 112.0 \n", + "16 202508 3 136020 124824.0 147216.0 203 186.0 \n", + "17 202507 3 208952 195988.0 221916.0 312 293.0 \n", + "18 202506 3 273519 258159.0 288879.0 408 385.0 \n", + "19 202505 3 334395 318416.0 350374.0 499 475.0 \n", + "20 202504 3 350043 332885.0 367201.0 522 496.0 \n", + "21 202503 3 252772 238917.0 266627.0 377 356.0 \n", + "22 202502 3 257247 242991.0 271503.0 384 363.0 \n", + "23 202501 3 231549 214627.0 248471.0 345 320.0 \n", + "24 202452 3 201726 185870.0 217582.0 302 278.0 \n", + "25 202451 3 201697 187843.0 215551.0 302 281.0 \n", + "26 202450 3 136694 126369.0 147019.0 205 190.0 \n", + "27 202449 3 108487 99037.0 117937.0 163 149.0 \n", + "28 202448 3 87381 78687.0 96075.0 131 118.0 \n", + "29 202447 3 76286 67626.0 84946.0 114 101.0 \n", + "... ... ... ... ... ... ... ... \n", + "2090 198521 3 26096 19621.0 32571.0 47 35.0 \n", + "2091 198520 3 27896 20885.0 34907.0 51 38.0 \n", + "2092 198519 3 43154 32821.0 53487.0 78 59.0 \n", + "2093 198518 3 40555 29935.0 51175.0 74 55.0 \n", + "2094 198517 3 34053 24366.0 43740.0 62 44.0 \n", + "2095 198516 3 50362 36451.0 64273.0 91 66.0 \n", + "2096 198515 3 63881 45538.0 82224.0 116 83.0 \n", + "2097 198514 3 134545 114400.0 154690.0 244 207.0 \n", + "2098 198513 3 197206 176080.0 218332.0 357 319.0 \n", + "2099 198512 3 245240 223304.0 267176.0 445 405.0 \n", + "2100 198511 3 276205 252399.0 300011.0 501 458.0 \n", + "2101 198510 3 353231 326279.0 380183.0 640 591.0 \n", + "2102 198509 3 369895 341109.0 398681.0 670 618.0 \n", + "2103 198508 3 389886 359529.0 420243.0 707 652.0 \n", + "2104 198507 3 471852 432599.0 511105.0 855 784.0 \n", + "2105 198506 3 565825 518011.0 613639.0 1026 939.0 \n", + "2106 198505 3 637302 592795.0 681809.0 1155 1074.0 \n", + "2107 198504 3 424937 390794.0 459080.0 770 708.0 \n", + "2108 198503 3 213901 174689.0 253113.0 388 317.0 \n", + "2109 198502 3 97586 80949.0 114223.0 177 147.0 \n", + "2110 198501 3 85489 65918.0 105060.0 155 120.0 \n", + "2111 198452 3 84830 60602.0 109058.0 154 110.0 \n", + "2112 198451 3 101726 80242.0 123210.0 185 146.0 \n", + "2113 198450 3 123680 101401.0 145959.0 225 184.0 \n", + "2114 198449 3 101073 81684.0 120462.0 184 149.0 \n", + "2115 198448 3 78620 60634.0 96606.0 143 110.0 \n", + "2116 198447 3 72029 54274.0 89784.0 131 99.0 \n", + "2117 198446 3 87330 67686.0 106974.0 159 123.0 \n", + "2118 198445 3 135223 101414.0 169032.0 246 184.0 \n", + "2119 198444 3 68422 20056.0 116788.0 125 37.0 \n", + "\n", + " inc100_up geo_insee geo_name \n", + "0 42.0 FR France \n", + "1 45.0 FR France \n", + "2 35.0 FR France \n", + "3 43.0 FR France \n", + "4 37.0 FR France \n", + "5 30.0 FR France \n", + "6 33.0 FR France \n", + "7 40.0 FR France \n", + "8 52.0 FR France \n", + "9 62.0 FR France \n", + "10 65.0 FR France \n", + "11 68.0 FR France \n", + "12 88.0 FR France \n", + "13 100.0 FR France \n", + "14 101.0 FR France \n", + "15 140.0 FR France \n", + "16 220.0 FR France \n", + "17 331.0 FR France \n", + "18 431.0 FR France \n", + "19 523.0 FR France \n", + "20 548.0 FR France \n", + "21 398.0 FR France \n", + "22 405.0 FR France \n", + "23 370.0 FR France \n", + "24 326.0 FR France \n", + "25 323.0 FR France \n", + "26 220.0 FR France \n", + "27 177.0 FR France \n", + "28 144.0 FR France \n", + "29 127.0 FR France \n", + "... ... ... ... \n", + "2090 59.0 FR France \n", + "2091 64.0 FR France \n", + "2092 97.0 FR France \n", + "2093 93.0 FR France \n", + "2094 80.0 FR France \n", + "2095 116.0 FR France \n", + "2096 149.0 FR France \n", + "2097 281.0 FR France \n", + "2098 395.0 FR France \n", + "2099 485.0 FR France \n", + "2100 544.0 FR France \n", + "2101 689.0 FR France \n", + "2102 722.0 FR France \n", + "2103 762.0 FR France \n", + "2104 926.0 FR France \n", + "2105 1113.0 FR France \n", + "2106 1236.0 FR France \n", + "2107 832.0 FR France \n", + "2108 459.0 FR France \n", + "2109 207.0 FR France \n", + "2110 190.0 FR France \n", + "2111 198.0 FR France \n", + "2112 224.0 FR France \n", + "2113 266.0 FR France \n", + "2114 219.0 FR France \n", + "2115 176.0 FR France \n", + "2116 163.0 FR France \n", + "2117 195.0 FR France \n", + "2118 308.0 FR France \n", + "2119 213.0 FR France \n", + "\n", + "[2119 rows x 10 columns]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = raw_data.dropna().copy()\n", + "data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Our dataset uses an uncommon encoding; the week number is attached\n", + "to the year number, leaving the impression of a six-digit integer.\n", + "That is how Pandas interprets it.\n", + "\n", + "A second problem is that Pandas does not know about week numbers.\n", + "It needs to be given the dates of the beginning and end of the week.\n", + "We use the library `isoweek` for that.\n", + "\n", + "Since the conversion is a bit lengthy, we write a small Python \n", + "function for doing it. Then we apply it to all points in our dataset. \n", + "The results go into a new column 'period'." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def convert_week(year_and_week_int):\n", + " year_and_week_str = str(year_and_week_int)\n", + " year = int(year_and_week_str[:4])\n", + " week = int(year_and_week_str[4:])\n", + " w = isoweek.Week(year, week)\n", + " return pd.Period(w.day(0), 'W')\n", + "\n", + "data['period'] = [convert_week(yw) for yw in data['week']]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "There are two more small changes to make.\n", + "\n", + "First, we define the observation periods as the new index of\n", + "our dataset. That turns it into a time series, which will be\n", + "convenient later on.\n", + "\n", + "Second, we sort the points chronologically." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "sorted_data = data.set_index('period').sort_index()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We check the consistency of the data. Between the end of a period and\n", + "the beginning of the next one, the difference should be zero, or very small.\n", + "We tolerate an error of one second.\n", + "\n", + "This is OK except for one pair of consecutive periods between which\n", + "a whole week is missing.\n", + "\n", + "We recognize the dates: it's the week without observations that we\n", + "have deleted earlier!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "periods = sorted_data.index\n", + "for p1, p2 in zip(periods[:-1], periods[1:]):\n", + " delta = p2.to_timestamp() - p1.end_time\n", + " if delta > pd.Timedelta('1s'):\n", + " print(p1, p2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A first look at the data!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sorted_data['inc'].plot()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A zoom on the last few years shows more clearly that the peaks are situated in winter." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sorted_data['inc'][-200:].plot()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Study of the annual incidence" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Since the peaks of the epidemic happen in winter, near the transition\n", + "between calendar years, we define the reference period for the annual\n", + "incidence from August 1st of year $N$ to August 1st of year $N+1$. We\n", + "label this period as year $N+1$ because the peak is always located in\n", + "year $N+1$. The very low incidence in summer ensures that the arbitrariness\n", + "of the choice of reference period has no impact on our conclusions.\n", + "\n", + "Our task is a bit complicated by the fact that a year does not have an\n", + "integer number of weeks. Therefore we modify our reference period a bit:\n", + "instead of August 1st, we use the first day of the week containing August 1st.\n", + "\n", + "A final detail: the dataset starts in October 1984, the first peak is thus\n", + "incomplete, We start the analysis with the first full peak." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "first_august_week = [pd.Period(pd.Timestamp(y, 8, 1), 'W')\n", + " for y in range(1985,\n", + " sorted_data.index[-1].year)]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Starting from this list of weeks that contain August 1st, we obtain intervals of approximately one year as the periods between two adjacent weeks in this list. We compute the sums of weekly incidences for all these periods.\n", + "\n", + "We also check that our periods contain between 51 and 52 weeks, as a safeguard against potential mistakes in our code." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "year = []\n", + "yearly_incidence = []\n", + "for week1, week2 in zip(first_august_week[:-1],\n", + " first_august_week[1:]):\n", + " one_year = sorted_data['inc'][week1:week2-1]\n", + " assert abs(len(one_year)-52) < 2\n", + " yearly_incidence.append(one_year.sum())\n", + " year.append(week2.year)\n", + "yearly_incidence = pd.Series(data=yearly_incidence, index=year)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And here are the annual incidences." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "yearly_incidence.plot(style='*')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A sorted list makes it easier to find the highest values (at the end)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "yearly_incidence.sort_values()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Finally, a histogram clearly shows the few very strong epidemics, which affect about 10% of the French population,\n", + "but are rare: there were three of them in the course of 35 years. The typical epidemic affects only half as many people." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "yearly_incidence.hist(xrot=20)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.4" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/module3/exo1/influenza-like-illness-analysis.ipynb b/module3/exo1/influenza-like-illness-analysis_og.ipynb similarity index 99% rename from module3/exo1/influenza-like-illness-analysis.ipynb rename to module3/exo1/influenza-like-illness-analysis_og.ipynb index 87092fc69cd90ff457ea56284b789cb0de199a41..152f4644a6d49c50a858ce496adc85d65c230756 100644 --- a/module3/exo1/influenza-like-illness-analysis.ipynb +++ b/module3/exo1/influenza-like-illness-analysis_og.ipynb @@ -365,7 +365,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.1" + "version": "3.6.4" } }, "nbformat": 4, diff --git a/module3/exo2/exercice.ipynb b/module3/exo2/exercice.ipynb index 0bbbe371b01e359e381e43239412d77bf53fb1fb..a47a908ae821fce7decb417ac58afde932bfa9ce 100644 --- a/module3/exo2/exercice.ipynb +++ b/module3/exo2/exercice.ipynb @@ -1,5 +1,2122 @@ { - "cells": [], + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## The incidence of chickenpox in France (2016-2024)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The data on the incidence of chickenpox-like illness are available from the Web site of the [Réseau Sentinelles](http://www.sentiweb.fr/). We download them as a file in CSV format, in which each line corresponds to a week in the observation period. The dataset used is starting in 2016 and ends with 2024." + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import os\n", + "from isoweek import Week" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [], + "source": [ + "data_url = \"https://www.sentiweb.fr/datasets/all/inc-7-RDD-ds2.csv\"\n", + "filename = \"incidence-chickenpox.csv\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "1. Download -> if there is not a local file already" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [], + "source": [ + "if not os.path.exists(filename):\n", + " raw_data = pd.read_csv(data_url, skiprows=1)\n", + "else:\n", + " raw_data = pd.read_csv(filename)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "2. Remove rows with missing values" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
weekgeo_inseeindicatorincinc100inc_upinc_lowinc100_upinc100_lowperiod
0201601447574108612871552016-01-04/2016-01-10
1201601757151325209992735152016-01-04/2016-01-10
22016018472363302958176837222016-01-04/2016-01-10
320160127768624105831436112016-01-04/2016-01-10
4201601537532168741902662016-01-04/2016-01-10
5201601247394156251632462016-01-04/2016-01-10
620160194738128202502016-01-04/2016-01-10
72016011173030253788227231192016-01-04/2016-01-10
82016017678421413073772262016-01-04/2016-01-10
92016013272100342711148944242016-01-04/2016-01-10
10201601287418126871492042016-01-04/2016-01-10
11201601527102927157748142132016-01-04/2016-01-10
12201601937105321145764929132016-01-04/2016-01-10
132016024477721411224222072016-01-11/2016-01-17
142016027576571110162981752016-01-11/2016-01-17
152016028471486191928104424132016-01-11/2016-01-17
16201602277442157271572552016-01-11/2016-01-17
17201602537444137441442242016-01-11/2016-01-17
18201602247402156271772472016-01-11/2016-01-17
192016029471244201302016-01-11/2016-01-17
202016021171745142317117319102016-01-11/2016-01-17
21201602767110119164455828102016-01-11/2016-01-17
22201602327124920172077828132016-01-11/2016-01-17
23201602287106431151960944182016-01-11/2016-01-17
24201602527408117081081932016-01-11/2016-01-17
252016029371570312099104142212016-01-11/2016-01-17
262016034479851714425282592016-01-18/2016-01-24
272016037572070342769137146232016-01-18/2016-01-24
282016038472258282810170635212016-01-18/2016-01-24
29201603277100935153848053172016-01-18/2016-01-24
.................................
63792025223273001950302025-05-26/2025-06-01
63802025222870000002025-05-26/2025-06-01
6381202522527198552801302025-05-26/2025-06-01
6382202522937125424309805902025-05-26/2025-06-01
63832025234474137109001902025-06-02/2025-06-08
63842025237574427111701802025-06-02/2025-06-08
638520252384711481421561402622025-06-02/2025-06-08
63862025232770000002025-06-02/2025-06-08
6387202523537143439401102025-06-02/2025-06-08
63882025232472721066702502025-06-02/2025-06-08
6389202523947371010402902025-06-02/2025-06-08
639020252311790571763471402025-06-02/2025-06-08
63912025237677514350702025-06-02/2025-06-08
63922025233275279122602002025-06-02/2025-06-08
639320252328710833210902025-06-02/2025-06-08
63942025235270000002025-06-02/2025-06-08
63952025239373366113002202025-06-02/2025-06-08
6396202524447196374101302025-06-09/2025-06-15
639720252475714425160802025-06-09/2025-06-15
639820252484782410174502102025-06-09/2025-06-15
63992025242770000002025-06-09/2025-06-15
6400202524537164543001202025-06-09/2025-06-15
640120252424771027142305402025-06-09/2025-06-15
64022025249472889402602025-06-09/2025-06-15
64032025241179137194601602025-06-09/2025-06-15
64042025247670000002025-06-09/2025-06-15
640520252432714625070802025-06-09/2025-06-15
6406202524287140444101302025-06-09/2025-06-15
64072025245270000002025-06-09/2025-06-15
6408202524937319699501902025-06-09/2025-06-15
\n", + "

6409 rows × 10 columns

\n", + "
" + ], + "text/plain": [ + " week geo_insee indicator inc inc100 inc_up inc_low inc100_up \\\n", + "0 201601 44 7 574 10 861 287 15 \n", + "1 201601 75 7 1513 25 2099 927 35 \n", + "2 201601 84 7 2363 30 2958 1768 37 \n", + "3 201601 27 7 686 24 1058 314 36 \n", + "4 201601 53 7 532 16 874 190 26 \n", + "5 201601 24 7 394 15 625 163 24 \n", + "6 201601 94 7 38 12 82 0 25 \n", + "7 201601 11 7 3030 25 3788 2272 31 \n", + "8 201601 76 7 842 14 1307 377 22 \n", + "9 201601 32 7 2100 34 2711 1489 44 \n", + "10 201601 28 7 418 12 687 149 20 \n", + "11 201601 52 7 1029 27 1577 481 42 \n", + "12 201601 93 7 1053 21 1457 649 29 \n", + "13 201602 44 7 772 14 1122 422 20 \n", + "14 201602 75 7 657 11 1016 298 17 \n", + "15 201602 84 7 1486 19 1928 1044 24 \n", + "16 201602 27 7 442 15 727 157 25 \n", + "17 201602 53 7 444 13 744 144 22 \n", + "18 201602 24 7 402 15 627 177 24 \n", + "19 201602 94 7 12 4 42 0 13 \n", + "20 201602 11 7 1745 14 2317 1173 19 \n", + "21 201602 76 7 1101 19 1644 558 28 \n", + "22 201602 32 7 1249 20 1720 778 28 \n", + "23 201602 28 7 1064 31 1519 609 44 \n", + "24 201602 52 7 408 11 708 108 19 \n", + "25 201602 93 7 1570 31 2099 1041 42 \n", + "26 201603 44 7 985 17 1442 528 25 \n", + "27 201603 75 7 2070 34 2769 1371 46 \n", + "28 201603 84 7 2258 28 2810 1706 35 \n", + "29 201603 27 7 1009 35 1538 480 53 \n", + "... ... ... ... ... ... ... ... ... \n", + "6379 202522 32 7 30 0 195 0 3 \n", + "6380 202522 28 7 0 0 0 0 0 \n", + "6381 202522 52 7 198 5 528 0 13 \n", + "6382 202522 93 7 1254 24 3098 0 59 \n", + "6383 202523 44 7 413 7 1090 0 19 \n", + "6384 202523 75 7 442 7 1117 0 18 \n", + "6385 202523 84 7 1148 14 2156 140 26 \n", + "6386 202523 27 7 0 0 0 0 0 \n", + "6387 202523 53 7 143 4 394 0 11 \n", + "6388 202523 24 7 272 10 667 0 25 \n", + "6389 202523 94 7 37 10 104 0 29 \n", + "6390 202523 11 7 905 7 1763 47 14 \n", + "6391 202523 76 7 75 1 435 0 7 \n", + "6392 202523 32 7 527 9 1226 0 20 \n", + "6393 202523 28 7 108 3 321 0 9 \n", + "6394 202523 52 7 0 0 0 0 0 \n", + "6395 202523 93 7 336 6 1130 0 22 \n", + "6396 202524 44 7 196 3 741 0 13 \n", + "6397 202524 75 7 144 2 516 0 8 \n", + "6398 202524 84 7 824 10 1745 0 21 \n", + "6399 202524 27 7 0 0 0 0 0 \n", + "6400 202524 53 7 164 5 430 0 12 \n", + "6401 202524 24 7 710 27 1423 0 54 \n", + "6402 202524 94 7 28 8 94 0 26 \n", + "6403 202524 11 7 913 7 1946 0 16 \n", + "6404 202524 76 7 0 0 0 0 0 \n", + "6405 202524 32 7 146 2 507 0 8 \n", + "6406 202524 28 7 140 4 441 0 13 \n", + "6407 202524 52 7 0 0 0 0 0 \n", + "6408 202524 93 7 319 6 995 0 19 \n", + "\n", + " inc100_low period \n", + "0 5 2016-01-04/2016-01-10 \n", + "1 15 2016-01-04/2016-01-10 \n", + "2 22 2016-01-04/2016-01-10 \n", + "3 11 2016-01-04/2016-01-10 \n", + "4 6 2016-01-04/2016-01-10 \n", + "5 6 2016-01-04/2016-01-10 \n", + "6 0 2016-01-04/2016-01-10 \n", + "7 19 2016-01-04/2016-01-10 \n", + "8 6 2016-01-04/2016-01-10 \n", + "9 24 2016-01-04/2016-01-10 \n", + "10 4 2016-01-04/2016-01-10 \n", + "11 13 2016-01-04/2016-01-10 \n", + "12 13 2016-01-04/2016-01-10 \n", + "13 7 2016-01-11/2016-01-17 \n", + "14 5 2016-01-11/2016-01-17 \n", + "15 13 2016-01-11/2016-01-17 \n", + "16 5 2016-01-11/2016-01-17 \n", + "17 4 2016-01-11/2016-01-17 \n", + "18 7 2016-01-11/2016-01-17 \n", + "19 0 2016-01-11/2016-01-17 \n", + "20 10 2016-01-11/2016-01-17 \n", + "21 10 2016-01-11/2016-01-17 \n", + "22 13 2016-01-11/2016-01-17 \n", + "23 18 2016-01-11/2016-01-17 \n", + "24 3 2016-01-11/2016-01-17 \n", + "25 21 2016-01-11/2016-01-17 \n", + "26 9 2016-01-18/2016-01-24 \n", + "27 23 2016-01-18/2016-01-24 \n", + "28 21 2016-01-18/2016-01-24 \n", + "29 17 2016-01-18/2016-01-24 \n", + "... ... ... \n", + "6379 0 2025-05-26/2025-06-01 \n", + "6380 0 2025-05-26/2025-06-01 \n", + "6381 0 2025-05-26/2025-06-01 \n", + "6382 0 2025-05-26/2025-06-01 \n", + "6383 0 2025-06-02/2025-06-08 \n", + "6384 0 2025-06-02/2025-06-08 \n", + "6385 2 2025-06-02/2025-06-08 \n", + "6386 0 2025-06-02/2025-06-08 \n", + "6387 0 2025-06-02/2025-06-08 \n", + "6388 0 2025-06-02/2025-06-08 \n", + "6389 0 2025-06-02/2025-06-08 \n", + "6390 0 2025-06-02/2025-06-08 \n", + "6391 0 2025-06-02/2025-06-08 \n", + "6392 0 2025-06-02/2025-06-08 \n", + "6393 0 2025-06-02/2025-06-08 \n", + "6394 0 2025-06-02/2025-06-08 \n", + "6395 0 2025-06-02/2025-06-08 \n", + "6396 0 2025-06-09/2025-06-15 \n", + "6397 0 2025-06-09/2025-06-15 \n", + "6398 0 2025-06-09/2025-06-15 \n", + "6399 0 2025-06-09/2025-06-15 \n", + "6400 0 2025-06-09/2025-06-15 \n", + "6401 0 2025-06-09/2025-06-15 \n", + "6402 0 2025-06-09/2025-06-15 \n", + "6403 0 2025-06-09/2025-06-15 \n", + "6404 0 2025-06-09/2025-06-15 \n", + "6405 0 2025-06-09/2025-06-15 \n", + "6406 0 2025-06-09/2025-06-15 \n", + "6407 0 2025-06-09/2025-06-15 \n", + "6408 0 2025-06-09/2025-06-15 \n", + "\n", + "[6409 rows x 10 columns]" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "raw_data = raw_data.dropna()\n", + "raw_data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "3. Convert 'week' to period " + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
weekgeo_inseeindicatorincinc100inc_upinc_lowinc100_upinc100_lowperiod
0201601447574108612871552016-01-04/2016-01-10
1201601757151325209992735152016-01-04/2016-01-10
22016018472363302958176837222016-01-04/2016-01-10
320160127768624105831436112016-01-04/2016-01-10
4201601537532168741902662016-01-04/2016-01-10
5201601247394156251632462016-01-04/2016-01-10
620160194738128202502016-01-04/2016-01-10
72016011173030253788227231192016-01-04/2016-01-10
82016017678421413073772262016-01-04/2016-01-10
92016013272100342711148944242016-01-04/2016-01-10
10201601287418126871492042016-01-04/2016-01-10
11201601527102927157748142132016-01-04/2016-01-10
12201601937105321145764929132016-01-04/2016-01-10
132016024477721411224222072016-01-11/2016-01-17
142016027576571110162981752016-01-11/2016-01-17
152016028471486191928104424132016-01-11/2016-01-17
16201602277442157271572552016-01-11/2016-01-17
17201602537444137441442242016-01-11/2016-01-17
18201602247402156271772472016-01-11/2016-01-17
192016029471244201302016-01-11/2016-01-17
202016021171745142317117319102016-01-11/2016-01-17
21201602767110119164455828102016-01-11/2016-01-17
22201602327124920172077828132016-01-11/2016-01-17
23201602287106431151960944182016-01-11/2016-01-17
24201602527408117081081932016-01-11/2016-01-17
252016029371570312099104142212016-01-11/2016-01-17
262016034479851714425282592016-01-18/2016-01-24
272016037572070342769137146232016-01-18/2016-01-24
282016038472258282810170635212016-01-18/2016-01-24
29201603277100935153848053172016-01-18/2016-01-24
.................................
63792025223273001950302025-05-26/2025-06-01
63802025222870000002025-05-26/2025-06-01
6381202522527198552801302025-05-26/2025-06-01
6382202522937125424309805902025-05-26/2025-06-01
63832025234474137109001902025-06-02/2025-06-08
63842025237574427111701802025-06-02/2025-06-08
638520252384711481421561402622025-06-02/2025-06-08
63862025232770000002025-06-02/2025-06-08
6387202523537143439401102025-06-02/2025-06-08
63882025232472721066702502025-06-02/2025-06-08
6389202523947371010402902025-06-02/2025-06-08
639020252311790571763471402025-06-02/2025-06-08
63912025237677514350702025-06-02/2025-06-08
63922025233275279122602002025-06-02/2025-06-08
639320252328710833210902025-06-02/2025-06-08
63942025235270000002025-06-02/2025-06-08
63952025239373366113002202025-06-02/2025-06-08
6396202524447196374101302025-06-09/2025-06-15
639720252475714425160802025-06-09/2025-06-15
639820252484782410174502102025-06-09/2025-06-15
63992025242770000002025-06-09/2025-06-15
6400202524537164543001202025-06-09/2025-06-15
640120252424771027142305402025-06-09/2025-06-15
64022025249472889402602025-06-09/2025-06-15
64032025241179137194601602025-06-09/2025-06-15
64042025247670000002025-06-09/2025-06-15
640520252432714625070802025-06-09/2025-06-15
6406202524287140444101302025-06-09/2025-06-15
64072025245270000002025-06-09/2025-06-15
6408202524937319699501902025-06-09/2025-06-15
\n", + "

6409 rows × 10 columns

\n", + "
" + ], + "text/plain": [ + " week geo_insee indicator inc inc100 inc_up inc_low inc100_up \\\n", + "0 201601 44 7 574 10 861 287 15 \n", + "1 201601 75 7 1513 25 2099 927 35 \n", + "2 201601 84 7 2363 30 2958 1768 37 \n", + "3 201601 27 7 686 24 1058 314 36 \n", + "4 201601 53 7 532 16 874 190 26 \n", + "5 201601 24 7 394 15 625 163 24 \n", + "6 201601 94 7 38 12 82 0 25 \n", + "7 201601 11 7 3030 25 3788 2272 31 \n", + "8 201601 76 7 842 14 1307 377 22 \n", + "9 201601 32 7 2100 34 2711 1489 44 \n", + "10 201601 28 7 418 12 687 149 20 \n", + "11 201601 52 7 1029 27 1577 481 42 \n", + "12 201601 93 7 1053 21 1457 649 29 \n", + "13 201602 44 7 772 14 1122 422 20 \n", + "14 201602 75 7 657 11 1016 298 17 \n", + "15 201602 84 7 1486 19 1928 1044 24 \n", + "16 201602 27 7 442 15 727 157 25 \n", + "17 201602 53 7 444 13 744 144 22 \n", + "18 201602 24 7 402 15 627 177 24 \n", + "19 201602 94 7 12 4 42 0 13 \n", + "20 201602 11 7 1745 14 2317 1173 19 \n", + "21 201602 76 7 1101 19 1644 558 28 \n", + "22 201602 32 7 1249 20 1720 778 28 \n", + "23 201602 28 7 1064 31 1519 609 44 \n", + "24 201602 52 7 408 11 708 108 19 \n", + "25 201602 93 7 1570 31 2099 1041 42 \n", + "26 201603 44 7 985 17 1442 528 25 \n", + "27 201603 75 7 2070 34 2769 1371 46 \n", + "28 201603 84 7 2258 28 2810 1706 35 \n", + "29 201603 27 7 1009 35 1538 480 53 \n", + "... ... ... ... ... ... ... ... ... \n", + "6379 202522 32 7 30 0 195 0 3 \n", + "6380 202522 28 7 0 0 0 0 0 \n", + "6381 202522 52 7 198 5 528 0 13 \n", + "6382 202522 93 7 1254 24 3098 0 59 \n", + "6383 202523 44 7 413 7 1090 0 19 \n", + "6384 202523 75 7 442 7 1117 0 18 \n", + "6385 202523 84 7 1148 14 2156 140 26 \n", + "6386 202523 27 7 0 0 0 0 0 \n", + "6387 202523 53 7 143 4 394 0 11 \n", + "6388 202523 24 7 272 10 667 0 25 \n", + "6389 202523 94 7 37 10 104 0 29 \n", + "6390 202523 11 7 905 7 1763 47 14 \n", + "6391 202523 76 7 75 1 435 0 7 \n", + "6392 202523 32 7 527 9 1226 0 20 \n", + "6393 202523 28 7 108 3 321 0 9 \n", + "6394 202523 52 7 0 0 0 0 0 \n", + "6395 202523 93 7 336 6 1130 0 22 \n", + "6396 202524 44 7 196 3 741 0 13 \n", + "6397 202524 75 7 144 2 516 0 8 \n", + "6398 202524 84 7 824 10 1745 0 21 \n", + "6399 202524 27 7 0 0 0 0 0 \n", + "6400 202524 53 7 164 5 430 0 12 \n", + "6401 202524 24 7 710 27 1423 0 54 \n", + "6402 202524 94 7 28 8 94 0 26 \n", + "6403 202524 11 7 913 7 1946 0 16 \n", + "6404 202524 76 7 0 0 0 0 0 \n", + "6405 202524 32 7 146 2 507 0 8 \n", + "6406 202524 28 7 140 4 441 0 13 \n", + "6407 202524 52 7 0 0 0 0 0 \n", + "6408 202524 93 7 319 6 995 0 19 \n", + "\n", + " inc100_low period \n", + "0 5 2016-01-04/2016-01-10 \n", + "1 15 2016-01-04/2016-01-10 \n", + "2 22 2016-01-04/2016-01-10 \n", + "3 11 2016-01-04/2016-01-10 \n", + "4 6 2016-01-04/2016-01-10 \n", + "5 6 2016-01-04/2016-01-10 \n", + "6 0 2016-01-04/2016-01-10 \n", + "7 19 2016-01-04/2016-01-10 \n", + "8 6 2016-01-04/2016-01-10 \n", + "9 24 2016-01-04/2016-01-10 \n", + "10 4 2016-01-04/2016-01-10 \n", + "11 13 2016-01-04/2016-01-10 \n", + "12 13 2016-01-04/2016-01-10 \n", + "13 7 2016-01-11/2016-01-17 \n", + "14 5 2016-01-11/2016-01-17 \n", + "15 13 2016-01-11/2016-01-17 \n", + "16 5 2016-01-11/2016-01-17 \n", + "17 4 2016-01-11/2016-01-17 \n", + "18 7 2016-01-11/2016-01-17 \n", + "19 0 2016-01-11/2016-01-17 \n", + "20 10 2016-01-11/2016-01-17 \n", + "21 10 2016-01-11/2016-01-17 \n", + "22 13 2016-01-11/2016-01-17 \n", + "23 18 2016-01-11/2016-01-17 \n", + "24 3 2016-01-11/2016-01-17 \n", + "25 21 2016-01-11/2016-01-17 \n", + "26 9 2016-01-18/2016-01-24 \n", + "27 23 2016-01-18/2016-01-24 \n", + "28 21 2016-01-18/2016-01-24 \n", + "29 17 2016-01-18/2016-01-24 \n", + "... ... ... \n", + "6379 0 2025-05-26/2025-06-01 \n", + "6380 0 2025-05-26/2025-06-01 \n", + "6381 0 2025-05-26/2025-06-01 \n", + "6382 0 2025-05-26/2025-06-01 \n", + "6383 0 2025-06-02/2025-06-08 \n", + "6384 0 2025-06-02/2025-06-08 \n", + "6385 2 2025-06-02/2025-06-08 \n", + "6386 0 2025-06-02/2025-06-08 \n", + "6387 0 2025-06-02/2025-06-08 \n", + "6388 0 2025-06-02/2025-06-08 \n", + "6389 0 2025-06-02/2025-06-08 \n", + "6390 0 2025-06-02/2025-06-08 \n", + "6391 0 2025-06-02/2025-06-08 \n", + "6392 0 2025-06-02/2025-06-08 \n", + "6393 0 2025-06-02/2025-06-08 \n", + "6394 0 2025-06-02/2025-06-08 \n", + "6395 0 2025-06-02/2025-06-08 \n", + "6396 0 2025-06-09/2025-06-15 \n", + "6397 0 2025-06-09/2025-06-15 \n", + "6398 0 2025-06-09/2025-06-15 \n", + "6399 0 2025-06-09/2025-06-15 \n", + "6400 0 2025-06-09/2025-06-15 \n", + "6401 0 2025-06-09/2025-06-15 \n", + "6402 0 2025-06-09/2025-06-15 \n", + "6403 0 2025-06-09/2025-06-15 \n", + "6404 0 2025-06-09/2025-06-15 \n", + "6405 0 2025-06-09/2025-06-15 \n", + "6406 0 2025-06-09/2025-06-15 \n", + "6407 0 2025-06-09/2025-06-15 \n", + "6408 0 2025-06-09/2025-06-15 \n", + "\n", + "[6409 rows x 10 columns]" + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def convert_week(yearweek):\n", + " y, w = int(str(yearweek)[:4]), int(str(yearweek)[4:])\n", + " return pd.Period(Week(y, w).day(0), 'W')\n", + "\n", + "raw_data['period'] = [convert_week(x) for x in raw_data['week']]\n", + "raw_data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "4. Set 'period' as index and sort the dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [], + "source": [ + "data = raw_data.set_index('period').sort_index()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "5. Choose September 1st as the beginning of each annual period" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [], + "source": [ + "sept_start = [pd.Period(pd.Timestamp(y, 9, 1), 'W')\n", + " for y in range(2016, data.index[-1].year)]\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "6. Collect the incidence per year information" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "ename": "TypeError", + "evalue": "Cannot compare type 'Period' with type 'int'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mincidence\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mstart\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mend\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mzip\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msept_start\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msept_start\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0mseason\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'inc'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mstart\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mend\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;31m# No need for -1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5\u001b[0m \u001b[0;32massert\u001b[0m \u001b[0mabs\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mseason\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0;36m52\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m<\u001b[0m \u001b[0;36m3\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0mincidence\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mseason\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msum\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/opt/conda/lib/python3.6/site-packages/pandas/core/series.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 662\u001b[0m \u001b[0mkey\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcheck_bool_indexer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 663\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 664\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_with\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 665\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 666\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_get_with\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/opt/conda/lib/python3.6/site-packages/pandas/core/series.py\u001b[0m in \u001b[0;36m_get_with\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 667\u001b[0m \u001b[0;31m# other: fancy integer or otherwise\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 668\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mslice\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 669\u001b[0;31m \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_convert_slice_indexer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkind\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'getitem'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 670\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_values\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 671\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mABCDataFrame\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/opt/conda/lib/python3.6/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36m_convert_slice_indexer\u001b[0;34m(self, key, kind)\u001b[0m\n\u001b[1;32m 1462\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1463\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1464\u001b[0;31m \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mslice_indexer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstart\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstop\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstep\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkind\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mkind\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1465\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mException\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1466\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mis_index_slice\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/opt/conda/lib/python3.6/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mslice_indexer\u001b[0;34m(self, start, end, step, kind)\u001b[0m\n\u001b[1;32m 3455\u001b[0m \"\"\"\n\u001b[1;32m 3456\u001b[0m start_slice, end_slice = self.slice_locs(start, end, step=step,\n\u001b[0;32m-> 3457\u001b[0;31m kind=kind)\n\u001b[0m\u001b[1;32m 3458\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3459\u001b[0m \u001b[0;31m# return a slice\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/opt/conda/lib/python3.6/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mslice_locs\u001b[0;34m(self, start, end, step, kind)\u001b[0m\n\u001b[1;32m 3656\u001b[0m \u001b[0mstart_slice\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3657\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mstart\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3658\u001b[0;31m \u001b[0mstart_slice\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_slice_bound\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstart\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'left'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkind\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3659\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mstart_slice\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3660\u001b[0m \u001b[0mstart_slice\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/opt/conda/lib/python3.6/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mget_slice_bound\u001b[0;34m(self, label, side, kind)\u001b[0m\n\u001b[1;32m 3586\u001b[0m \u001b[0;31m# we need to look up the label\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3587\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3588\u001b[0;31m \u001b[0mslc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_loc_only_exact_matches\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlabel\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3589\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3590\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/opt/conda/lib/python3.6/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36m_get_loc_only_exact_matches\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 3555\u001b[0m \u001b[0mget_slice_bound\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3556\u001b[0m \"\"\"\n\u001b[0;32m-> 3557\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3558\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3559\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mget_slice_bound\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlabel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mside\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkind\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/opt/conda/lib/python3.6/site-packages/pandas/core/indexes/period.py\u001b[0m in \u001b[0;36mget_loc\u001b[0;34m(self, key, method, tolerance)\u001b[0m\n\u001b[1;32m 810\u001b[0m \"\"\"\n\u001b[1;32m 811\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 812\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 813\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 814\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mis_integer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n", + "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n", + "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine._get_loc_duplicates\u001b[0;34m()\u001b[0m\n", + "\u001b[0;32mpandas/_libs/period.pyx\u001b[0m in \u001b[0;36mpandas._libs.period._Period.__richcmp__\u001b[0;34m()\u001b[0m\n", + "\u001b[0;31mTypeError\u001b[0m: Cannot compare type 'Period' with type 'int'" + ] + } + ], + "source": [ + "years = []\n", + "incidence = []\n", + "for start, end in zip(sept_start[:-1], sept_start[1:]):\n", + " season = data['inc'][start:end] # No need for -1\n", + " assert abs(len(season) - 52) < 3\n", + " incidence.append(season.sum())\n", + " years.append(end.year)\n", + "\n", + "annual = pd.Series(data=incidence, index=years)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], "metadata": { "kernelspec": { "display_name": "Python 3", @@ -16,10 +2133,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.3" + "version": "3.6.4" } }, "nbformat": 4, "nbformat_minor": 2 } -