{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"hideCode": false,
"hidePrompt": false
},
"outputs": [],
"source": [
"%matplotlib inline\n",
"import numpy as np\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"import os\n",
"import urllib.request"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"hideCode": false,
"hidePrompt": false
},
"outputs": [],
"source": [
"data_url = \"https://www.sentiweb.fr/datasets/incidence-PAY-3.csv\"\n",
"data_file = \"syndrome_grippal.csv\"\n",
"if os.path.exists(data_file):\n",
" pass\n",
"else:\n",
" urllib.request.urlretrieve(data_url, data_file)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"hideCode": false,
"hidePrompt": false
},
"outputs": [],
"source": [
"data = pd.read_csv(data_file, skiprows=1)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"hideCode": false,
"hidePrompt": false
},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" week | \n",
" indicator | \n",
" inc | \n",
" inc_low | \n",
" inc_up | \n",
" inc100 | \n",
" inc100_low | \n",
" inc100_up | \n",
" geo_insee | \n",
" geo_name | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 202204 | \n",
" 3 | \n",
" 72585 | \n",
" 65106.0 | \n",
" 80064.0 | \n",
" 109 | \n",
" 98.0 | \n",
" 120.0 | \n",
" FR | \n",
" France | \n",
"
\n",
" \n",
" 1 | \n",
" 202203 | \n",
" 3 | \n",
" 74613 | \n",
" 67144.0 | \n",
" 82082.0 | \n",
" 113 | \n",
" 102.0 | \n",
" 124.0 | \n",
" FR | \n",
" France | \n",
"
\n",
" \n",
" 2 | \n",
" 202202 | \n",
" 3 | \n",
" 55920 | \n",
" 49511.0 | \n",
" 62329.0 | \n",
" 84 | \n",
" 74.0 | \n",
" 94.0 | \n",
" FR | \n",
" France | \n",
"
\n",
" \n",
" 3 | \n",
" 202201 | \n",
" 3 | \n",
" 57629 | \n",
" 50699.0 | \n",
" 64559.0 | \n",
" 87 | \n",
" 77.0 | \n",
" 97.0 | \n",
" FR | \n",
" France | \n",
"
\n",
" \n",
" 4 | \n",
" 202152 | \n",
" 3 | \n",
" 54349 | \n",
" 47029.0 | \n",
" 61669.0 | \n",
" 82 | \n",
" 71.0 | \n",
" 93.0 | \n",
" FR | \n",
" France | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" week indicator inc inc_low inc_up inc100 inc100_low inc100_up \\\n",
"0 202204 3 72585 65106.0 80064.0 109 98.0 120.0 \n",
"1 202203 3 74613 67144.0 82082.0 113 102.0 124.0 \n",
"2 202202 3 55920 49511.0 62329.0 84 74.0 94.0 \n",
"3 202201 3 57629 50699.0 64559.0 87 77.0 97.0 \n",
"4 202152 3 54349 47029.0 61669.0 82 71.0 93.0 \n",
"\n",
" geo_insee geo_name \n",
"0 FR France \n",
"1 FR France \n",
"2 FR France \n",
"3 FR France \n",
"4 FR France "
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.head()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"hideCode": false,
"hidePrompt": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" week | \n",
" indicator | \n",
" inc | \n",
" inc_low | \n",
" inc_up | \n",
" inc100 | \n",
" inc100_low | \n",
" inc100_up | \n",
" geo_insee | \n",
" geo_name | \n",
"
\n",
" \n",
" \n",
" \n",
" 1939 | \n",
" 198448 | \n",
" 3 | \n",
" 78620 | \n",
" 60634.0 | \n",
" 96606.0 | \n",
" 143 | \n",
" 110.0 | \n",
" 176.0 | \n",
" FR | \n",
" France | \n",
"
\n",
" \n",
" 1940 | \n",
" 198447 | \n",
" 3 | \n",
" 72029 | \n",
" 54274.0 | \n",
" 89784.0 | \n",
" 131 | \n",
" 99.0 | \n",
" 163.0 | \n",
" FR | \n",
" France | \n",
"
\n",
" \n",
" 1941 | \n",
" 198446 | \n",
" 3 | \n",
" 87330 | \n",
" 67686.0 | \n",
" 106974.0 | \n",
" 159 | \n",
" 123.0 | \n",
" 195.0 | \n",
" FR | \n",
" France | \n",
"
\n",
" \n",
" 1942 | \n",
" 198445 | \n",
" 3 | \n",
" 135223 | \n",
" 101414.0 | \n",
" 169032.0 | \n",
" 246 | \n",
" 184.0 | \n",
" 308.0 | \n",
" FR | \n",
" France | \n",
"
\n",
" \n",
" 1943 | \n",
" 198444 | \n",
" 3 | \n",
" 68422 | \n",
" 20056.0 | \n",
" 116788.0 | \n",
" 125 | \n",
" 37.0 | \n",
" 213.0 | \n",
" FR | \n",
" France | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" week indicator inc inc_low inc_up inc100 inc100_low \\\n",
"1939 198448 3 78620 60634.0 96606.0 143 110.0 \n",
"1940 198447 3 72029 54274.0 89784.0 131 99.0 \n",
"1941 198446 3 87330 67686.0 106974.0 159 123.0 \n",
"1942 198445 3 135223 101414.0 169032.0 246 184.0 \n",
"1943 198444 3 68422 20056.0 116788.0 125 37.0 \n",
"\n",
" inc100_up geo_insee geo_name \n",
"1939 176.0 FR France \n",
"1940 163.0 FR France \n",
"1941 195.0 FR France \n",
"1942 308.0 FR France \n",
"1943 213.0 FR France "
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.tail()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"hideCode": false,
"hidePrompt": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"week int64\n",
"indicator int64\n",
"inc int64\n",
"inc_low float64\n",
"inc_up float64\n",
"inc100 int64\n",
"inc100_low float64\n",
"inc100_up float64\n",
"geo_insee object\n",
"geo_name object\n",
"dtype: object\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" week | \n",
" indicator | \n",
" inc | \n",
" inc_low | \n",
" inc_up | \n",
" inc100 | \n",
" inc100_low | \n",
" inc100_up | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 1944.000000 | \n",
" 1944.0 | \n",
" 1.944000e+03 | \n",
" 1943.000000 | \n",
" 1.943000e+03 | \n",
" 1944.000000 | \n",
" 1943.000000 | \n",
" 1943.000000 | \n",
"
\n",
" \n",
" mean | \n",
" 200321.964506 | \n",
" 3.0 | \n",
" 6.006592e+04 | \n",
" 52573.937210 | \n",
" 6.767021e+04 | \n",
" 100.750514 | \n",
" 88.210499 | \n",
" 113.476068 | \n",
"
\n",
" \n",
" std | \n",
" 1075.707510 | \n",
" 0.0 | \n",
" 1.135715e+05 | \n",
" 107879.967912 | \n",
" 1.194434e+05 | \n",
" 193.441495 | \n",
" 183.805111 | \n",
" 203.392822 | \n",
"
\n",
" \n",
" min | \n",
" 198444.000000 | \n",
" 3.0 | \n",
" 0.000000e+00 | \n",
" 0.000000 | \n",
" 0.000000e+00 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" 25% | \n",
" 199407.750000 | \n",
" 3.0 | \n",
" 5.170750e+03 | \n",
" 2048.500000 | \n",
" 8.432500e+03 | \n",
" 9.000000 | \n",
" 3.000000 | \n",
" 14.000000 | \n",
"
\n",
" \n",
" 50% | \n",
" 200324.500000 | \n",
" 3.0 | \n",
" 1.618500e+04 | \n",
" 10915.000000 | \n",
" 2.161300e+04 | \n",
" 27.000000 | \n",
" 18.000000 | \n",
" 36.000000 | \n",
"
\n",
" \n",
" 75% | \n",
" 201240.250000 | \n",
" 3.0 | \n",
" 4.819375e+04 | \n",
" 38644.500000 | \n",
" 5.730750e+04 | \n",
" 82.000000 | \n",
" 66.000000 | \n",
" 97.500000 | \n",
"
\n",
" \n",
" max | \n",
" 202204.000000 | \n",
" 3.0 | \n",
" 1.001824e+06 | \n",
" 974799.000000 | \n",
" 1.028849e+06 | \n",
" 1793.000000 | \n",
" 1745.000000 | \n",
" 1841.000000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" week indicator inc inc_low inc_up \\\n",
"count 1944.000000 1944.0 1.944000e+03 1943.000000 1.943000e+03 \n",
"mean 200321.964506 3.0 6.006592e+04 52573.937210 6.767021e+04 \n",
"std 1075.707510 0.0 1.135715e+05 107879.967912 1.194434e+05 \n",
"min 198444.000000 3.0 0.000000e+00 0.000000 0.000000e+00 \n",
"25% 199407.750000 3.0 5.170750e+03 2048.500000 8.432500e+03 \n",
"50% 200324.500000 3.0 1.618500e+04 10915.000000 2.161300e+04 \n",
"75% 201240.250000 3.0 4.819375e+04 38644.500000 5.730750e+04 \n",
"max 202204.000000 3.0 1.001824e+06 974799.000000 1.028849e+06 \n",
"\n",
" inc100 inc100_low inc100_up \n",
"count 1944.000000 1943.000000 1943.000000 \n",
"mean 100.750514 88.210499 113.476068 \n",
"std 193.441495 183.805111 203.392822 \n",
"min 0.000000 0.000000 0.000000 \n",
"25% 9.000000 3.000000 14.000000 \n",
"50% 27.000000 18.000000 36.000000 \n",
"75% 82.000000 66.000000 97.500000 \n",
"max 1793.000000 1745.000000 1841.000000 "
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"print(data.dtypes)\n",
"data.describe()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"hideCode": false,
"hidePrompt": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" week | \n",
" indicator | \n",
" inc | \n",
" inc_low | \n",
" inc_up | \n",
" inc100 | \n",
" inc100_low | \n",
" inc100_up | \n",
" geo_insee | \n",
" geo_name | \n",
"
\n",
" \n",
" \n",
" \n",
" 1707 | \n",
" 198919 | \n",
" 3 | \n",
" 0 | \n",
" NaN | \n",
" NaN | \n",
" 0 | \n",
" NaN | \n",
" NaN | \n",
" FR | \n",
" France | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" week indicator inc inc_low inc_up inc100 inc100_low inc100_up \\\n",
"1707 198919 3 0 NaN NaN 0 NaN NaN \n",
"\n",
" geo_insee geo_name \n",
"1707 FR France "
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"missing_data = data.loc[data.isna().any(axis=1), :]\n",
"missing_data"
]
},
{
"cell_type": "markdown",
"metadata": {
"hideCode": false,
"hidePrompt": false
},
"source": [
"In the original datafile: data is simply missing for week \"198919\", hence the NaN obtained.\n",
"We decide to continue the analysis without this line which, hopefully, should have little impact on the analysis."
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"hideCode": false,
"hidePrompt": false
},
"outputs": [],
"source": [
"data = data.drop(index=missing_data.index)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"hideCode": false,
"hidePrompt": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" week | \n",
" indicator | \n",
" inc | \n",
" inc_low | \n",
" inc_up | \n",
" inc100 | \n",
" inc100_low | \n",
" inc100_up | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 1943.000000 | \n",
" 1943.0 | \n",
" 1.943000e+03 | \n",
" 1943.000000 | \n",
" 1.943000e+03 | \n",
" 1943.000000 | \n",
" 1943.000000 | \n",
" 1943.000000 | \n",
"
\n",
" \n",
" mean | \n",
" 200322.686567 | \n",
" 3.0 | \n",
" 6.009684e+04 | \n",
" 52573.937210 | \n",
" 6.767021e+04 | \n",
" 100.802367 | \n",
" 88.210499 | \n",
" 113.476068 | \n",
"
\n",
" \n",
" std | \n",
" 1075.513102 | \n",
" 0.0 | \n",
" 1.135925e+05 | \n",
" 107879.967912 | \n",
" 1.194434e+05 | \n",
" 193.477779 | \n",
" 183.805111 | \n",
" 203.392822 | \n",
"
\n",
" \n",
" min | \n",
" 198444.000000 | \n",
" 3.0 | \n",
" 0.000000e+00 | \n",
" 0.000000 | \n",
" 0.000000e+00 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" 25% | \n",
" 199408.500000 | \n",
" 3.0 | \n",
" 5.177000e+03 | \n",
" 2048.500000 | \n",
" 8.432500e+03 | \n",
" 9.000000 | \n",
" 3.000000 | \n",
" 14.000000 | \n",
"
\n",
" \n",
" 50% | \n",
" 200325.000000 | \n",
" 3.0 | \n",
" 1.618800e+04 | \n",
" 10915.000000 | \n",
" 2.161300e+04 | \n",
" 27.000000 | \n",
" 18.000000 | \n",
" 36.000000 | \n",
"
\n",
" \n",
" 75% | \n",
" 201240.500000 | \n",
" 3.0 | \n",
" 4.821350e+04 | \n",
" 38644.500000 | \n",
" 5.730750e+04 | \n",
" 82.000000 | \n",
" 66.000000 | \n",
" 97.500000 | \n",
"
\n",
" \n",
" max | \n",
" 202204.000000 | \n",
" 3.0 | \n",
" 1.001824e+06 | \n",
" 974799.000000 | \n",
" 1.028849e+06 | \n",
" 1793.000000 | \n",
" 1745.000000 | \n",
" 1841.000000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" week indicator inc inc_low inc_up \\\n",
"count 1943.000000 1943.0 1.943000e+03 1943.000000 1.943000e+03 \n",
"mean 200322.686567 3.0 6.009684e+04 52573.937210 6.767021e+04 \n",
"std 1075.513102 0.0 1.135925e+05 107879.967912 1.194434e+05 \n",
"min 198444.000000 3.0 0.000000e+00 0.000000 0.000000e+00 \n",
"25% 199408.500000 3.0 5.177000e+03 2048.500000 8.432500e+03 \n",
"50% 200325.000000 3.0 1.618800e+04 10915.000000 2.161300e+04 \n",
"75% 201240.500000 3.0 4.821350e+04 38644.500000 5.730750e+04 \n",
"max 202204.000000 3.0 1.001824e+06 974799.000000 1.028849e+06 \n",
"\n",
" inc100 inc100_low inc100_up \n",
"count 1943.000000 1943.000000 1943.000000 \n",
"mean 100.802367 88.210499 113.476068 \n",
"std 193.477779 183.805111 203.392822 \n",
"min 0.000000 0.000000 0.000000 \n",
"25% 9.000000 3.000000 14.000000 \n",
"50% 27.000000 18.000000 36.000000 \n",
"75% 82.000000 66.000000 97.500000 \n",
"max 1793.000000 1745.000000 1841.000000 "
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.describe()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**TODO:** Change week format"
]
}
],
"metadata": {
"celltoolbar": "Hide code",
"hide_code_all_hidden": false,
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}