Completed

parent 755ff4b2
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## The incidence of chickenpox in France"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The data on the incidence of chickenpox-like illness are available from the Web site of the [Réseau Sentinelles](http://www.sentiweb.fr/). We download them as a file in CSV format, in which each line corresponds to a week in the observation period. "
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: isoweek in /opt/conda/lib/python3.6/site-packages (1.3.3)\r\n"
]
}
],
"source": [
"!pip install isoweek"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"%matplotlib inline\n",
"import matplotlib.pyplot as plt\n",
"import pandas as pd\n",
"import isoweek \n",
"import os"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"data_url = \"http://www.sentiweb.fr/datasets/incidence-PAY-3.csv\" \n",
"filename = \"inc-7-PAY-ds3.csv\""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"1. Download -> if there is not a local file already"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"if not os.path.exists(filename):\n",
" raw_data = pd.read_csv(data_url, encoding = 'iso-8859-1' , skiprows= 1 )\n",
"else:\n",
" raw_data = pd.read_csv(filename)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"2. Remove rows with missing values"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>week</th>\n",
" <th>indicator</th>\n",
" <th>inc</th>\n",
" <th>inc_low</th>\n",
" <th>inc_up</th>\n",
" <th>inc100</th>\n",
" <th>inc100_low</th>\n",
" <th>inc100_up</th>\n",
" <th>geo_insee</th>\n",
" <th>geo_name</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1883</th>\n",
" <td>198919</td>\n",
" <td>3</td>\n",
" <td>-</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>-</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" week indicator inc inc_low inc_up inc100 inc100_low inc100_up \\\n",
"1883 198919 3 - NaN NaN - NaN NaN \n",
"\n",
" geo_insee geo_name \n",
"1883 FR France "
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"raw_data[raw_data.isnull(). any (axis= 1 )] "
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>week</th>\n",
" <th>indicator</th>\n",
" <th>inc</th>\n",
" <th>inc_low</th>\n",
" <th>inc_up</th>\n",
" <th>inc100</th>\n",
" <th>inc100_low</th>\n",
" <th>inc100_up</th>\n",
" <th>geo_insee</th>\n",
" <th>geo_name</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>202524</td>\n",
" <td>3</td>\n",
" <td>22816</td>\n",
" <td>17621.0</td>\n",
" <td>28011.0</td>\n",
" <td>34</td>\n",
" <td>26.0</td>\n",
" <td>42.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>202523</td>\n",
" <td>3</td>\n",
" <td>24564</td>\n",
" <td>19382.0</td>\n",
" <td>29746.0</td>\n",
" <td>37</td>\n",
" <td>29.0</td>\n",
" <td>45.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>202522</td>\n",
" <td>3</td>\n",
" <td>18755</td>\n",
" <td>14333.0</td>\n",
" <td>23177.0</td>\n",
" <td>28</td>\n",
" <td>21.0</td>\n",
" <td>35.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>202521</td>\n",
" <td>3</td>\n",
" <td>23760</td>\n",
" <td>18671.0</td>\n",
" <td>28849.0</td>\n",
" <td>35</td>\n",
" <td>27.0</td>\n",
" <td>43.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>202520</td>\n",
" <td>3</td>\n",
" <td>20265</td>\n",
" <td>15814.0</td>\n",
" <td>24716.0</td>\n",
" <td>30</td>\n",
" <td>23.0</td>\n",
" <td>37.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>202519</td>\n",
" <td>3</td>\n",
" <td>16264</td>\n",
" <td>12394.0</td>\n",
" <td>20134.0</td>\n",
" <td>24</td>\n",
" <td>18.0</td>\n",
" <td>30.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>202518</td>\n",
" <td>3</td>\n",
" <td>18115</td>\n",
" <td>13975.0</td>\n",
" <td>22255.0</td>\n",
" <td>27</td>\n",
" <td>21.0</td>\n",
" <td>33.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>202517</td>\n",
" <td>3</td>\n",
" <td>22150</td>\n",
" <td>17291.0</td>\n",
" <td>27009.0</td>\n",
" <td>33</td>\n",
" <td>26.0</td>\n",
" <td>40.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>202516</td>\n",
" <td>3</td>\n",
" <td>28564</td>\n",
" <td>22550.0</td>\n",
" <td>34578.0</td>\n",
" <td>43</td>\n",
" <td>34.0</td>\n",
" <td>52.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>202515</td>\n",
" <td>3</td>\n",
" <td>35721</td>\n",
" <td>29592.0</td>\n",
" <td>41850.0</td>\n",
" <td>53</td>\n",
" <td>44.0</td>\n",
" <td>62.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>202514</td>\n",
" <td>3</td>\n",
" <td>37579</td>\n",
" <td>31232.0</td>\n",
" <td>43926.0</td>\n",
" <td>56</td>\n",
" <td>47.0</td>\n",
" <td>65.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>202513</td>\n",
" <td>3</td>\n",
" <td>39673</td>\n",
" <td>33686.0</td>\n",
" <td>45660.0</td>\n",
" <td>59</td>\n",
" <td>50.0</td>\n",
" <td>68.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>202512</td>\n",
" <td>3</td>\n",
" <td>52543</td>\n",
" <td>45627.0</td>\n",
" <td>59459.0</td>\n",
" <td>78</td>\n",
" <td>68.0</td>\n",
" <td>88.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>202511</td>\n",
" <td>3</td>\n",
" <td>59469</td>\n",
" <td>52154.0</td>\n",
" <td>66784.0</td>\n",
" <td>89</td>\n",
" <td>78.0</td>\n",
" <td>100.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>202510</td>\n",
" <td>3</td>\n",
" <td>60334</td>\n",
" <td>53048.0</td>\n",
" <td>67620.0</td>\n",
" <td>90</td>\n",
" <td>79.0</td>\n",
" <td>101.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>202509</td>\n",
" <td>3</td>\n",
" <td>84531</td>\n",
" <td>74994.0</td>\n",
" <td>94068.0</td>\n",
" <td>126</td>\n",
" <td>112.0</td>\n",
" <td>140.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>202508</td>\n",
" <td>3</td>\n",
" <td>136020</td>\n",
" <td>124824.0</td>\n",
" <td>147216.0</td>\n",
" <td>203</td>\n",
" <td>186.0</td>\n",
" <td>220.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>202507</td>\n",
" <td>3</td>\n",
" <td>208952</td>\n",
" <td>195988.0</td>\n",
" <td>221916.0</td>\n",
" <td>312</td>\n",
" <td>293.0</td>\n",
" <td>331.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>202506</td>\n",
" <td>3</td>\n",
" <td>273519</td>\n",
" <td>258159.0</td>\n",
" <td>288879.0</td>\n",
" <td>408</td>\n",
" <td>385.0</td>\n",
" <td>431.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>202505</td>\n",
" <td>3</td>\n",
" <td>334395</td>\n",
" <td>318416.0</td>\n",
" <td>350374.0</td>\n",
" <td>499</td>\n",
" <td>475.0</td>\n",
" <td>523.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>202504</td>\n",
" <td>3</td>\n",
" <td>350043</td>\n",
" <td>332885.0</td>\n",
" <td>367201.0</td>\n",
" <td>522</td>\n",
" <td>496.0</td>\n",
" <td>548.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>202503</td>\n",
" <td>3</td>\n",
" <td>252772</td>\n",
" <td>238917.0</td>\n",
" <td>266627.0</td>\n",
" <td>377</td>\n",
" <td>356.0</td>\n",
" <td>398.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>202502</td>\n",
" <td>3</td>\n",
" <td>257247</td>\n",
" <td>242991.0</td>\n",
" <td>271503.0</td>\n",
" <td>384</td>\n",
" <td>363.0</td>\n",
" <td>405.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>202501</td>\n",
" <td>3</td>\n",
" <td>231549</td>\n",
" <td>214627.0</td>\n",
" <td>248471.0</td>\n",
" <td>345</td>\n",
" <td>320.0</td>\n",
" <td>370.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>202452</td>\n",
" <td>3</td>\n",
" <td>201726</td>\n",
" <td>185870.0</td>\n",
" <td>217582.0</td>\n",
" <td>302</td>\n",
" <td>278.0</td>\n",
" <td>326.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25</th>\n",
" <td>202451</td>\n",
" <td>3</td>\n",
" <td>201697</td>\n",
" <td>187843.0</td>\n",
" <td>215551.0</td>\n",
" <td>302</td>\n",
" <td>281.0</td>\n",
" <td>323.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26</th>\n",
" <td>202450</td>\n",
" <td>3</td>\n",
" <td>136694</td>\n",
" <td>126369.0</td>\n",
" <td>147019.0</td>\n",
" <td>205</td>\n",
" <td>190.0</td>\n",
" <td>220.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>202449</td>\n",
" <td>3</td>\n",
" <td>108487</td>\n",
" <td>99037.0</td>\n",
" <td>117937.0</td>\n",
" <td>163</td>\n",
" <td>149.0</td>\n",
" <td>177.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28</th>\n",
" <td>202448</td>\n",
" <td>3</td>\n",
" <td>87381</td>\n",
" <td>78687.0</td>\n",
" <td>96075.0</td>\n",
" <td>131</td>\n",
" <td>118.0</td>\n",
" <td>144.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>202447</td>\n",
" <td>3</td>\n",
" <td>76286</td>\n",
" <td>67626.0</td>\n",
" <td>84946.0</td>\n",
" <td>114</td>\n",
" <td>101.0</td>\n",
" <td>127.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2090</th>\n",
" <td>198521</td>\n",
" <td>3</td>\n",
" <td>26096</td>\n",
" <td>19621.0</td>\n",
" <td>32571.0</td>\n",
" <td>47</td>\n",
" <td>35.0</td>\n",
" <td>59.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2091</th>\n",
" <td>198520</td>\n",
" <td>3</td>\n",
" <td>27896</td>\n",
" <td>20885.0</td>\n",
" <td>34907.0</td>\n",
" <td>51</td>\n",
" <td>38.0</td>\n",
" <td>64.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2092</th>\n",
" <td>198519</td>\n",
" <td>3</td>\n",
" <td>43154</td>\n",
" <td>32821.0</td>\n",
" <td>53487.0</td>\n",
" <td>78</td>\n",
" <td>59.0</td>\n",
" <td>97.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2093</th>\n",
" <td>198518</td>\n",
" <td>3</td>\n",
" <td>40555</td>\n",
" <td>29935.0</td>\n",
" <td>51175.0</td>\n",
" <td>74</td>\n",
" <td>55.0</td>\n",
" <td>93.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2094</th>\n",
" <td>198517</td>\n",
" <td>3</td>\n",
" <td>34053</td>\n",
" <td>24366.0</td>\n",
" <td>43740.0</td>\n",
" <td>62</td>\n",
" <td>44.0</td>\n",
" <td>80.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2095</th>\n",
" <td>198516</td>\n",
" <td>3</td>\n",
" <td>50362</td>\n",
" <td>36451.0</td>\n",
" <td>64273.0</td>\n",
" <td>91</td>\n",
" <td>66.0</td>\n",
" <td>116.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2096</th>\n",
" <td>198515</td>\n",
" <td>3</td>\n",
" <td>63881</td>\n",
" <td>45538.0</td>\n",
" <td>82224.0</td>\n",
" <td>116</td>\n",
" <td>83.0</td>\n",
" <td>149.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2097</th>\n",
" <td>198514</td>\n",
" <td>3</td>\n",
" <td>134545</td>\n",
" <td>114400.0</td>\n",
" <td>154690.0</td>\n",
" <td>244</td>\n",
" <td>207.0</td>\n",
" <td>281.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2098</th>\n",
" <td>198513</td>\n",
" <td>3</td>\n",
" <td>197206</td>\n",
" <td>176080.0</td>\n",
" <td>218332.0</td>\n",
" <td>357</td>\n",
" <td>319.0</td>\n",
" <td>395.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2099</th>\n",
" <td>198512</td>\n",
" <td>3</td>\n",
" <td>245240</td>\n",
" <td>223304.0</td>\n",
" <td>267176.0</td>\n",
" <td>445</td>\n",
" <td>405.0</td>\n",
" <td>485.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2100</th>\n",
" <td>198511</td>\n",
" <td>3</td>\n",
" <td>276205</td>\n",
" <td>252399.0</td>\n",
" <td>300011.0</td>\n",
" <td>501</td>\n",
" <td>458.0</td>\n",
" <td>544.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2101</th>\n",
" <td>198510</td>\n",
" <td>3</td>\n",
" <td>353231</td>\n",
" <td>326279.0</td>\n",
" <td>380183.0</td>\n",
" <td>640</td>\n",
" <td>591.0</td>\n",
" <td>689.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2102</th>\n",
" <td>198509</td>\n",
" <td>3</td>\n",
" <td>369895</td>\n",
" <td>341109.0</td>\n",
" <td>398681.0</td>\n",
" <td>670</td>\n",
" <td>618.0</td>\n",
" <td>722.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2103</th>\n",
" <td>198508</td>\n",
" <td>3</td>\n",
" <td>389886</td>\n",
" <td>359529.0</td>\n",
" <td>420243.0</td>\n",
" <td>707</td>\n",
" <td>652.0</td>\n",
" <td>762.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2104</th>\n",
" <td>198507</td>\n",
" <td>3</td>\n",
" <td>471852</td>\n",
" <td>432599.0</td>\n",
" <td>511105.0</td>\n",
" <td>855</td>\n",
" <td>784.0</td>\n",
" <td>926.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2105</th>\n",
" <td>198506</td>\n",
" <td>3</td>\n",
" <td>565825</td>\n",
" <td>518011.0</td>\n",
" <td>613639.0</td>\n",
" <td>1026</td>\n",
" <td>939.0</td>\n",
" <td>1113.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2106</th>\n",
" <td>198505</td>\n",
" <td>3</td>\n",
" <td>637302</td>\n",
" <td>592795.0</td>\n",
" <td>681809.0</td>\n",
" <td>1155</td>\n",
" <td>1074.0</td>\n",
" <td>1236.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2107</th>\n",
" <td>198504</td>\n",
" <td>3</td>\n",
" <td>424937</td>\n",
" <td>390794.0</td>\n",
" <td>459080.0</td>\n",
" <td>770</td>\n",
" <td>708.0</td>\n",
" <td>832.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2108</th>\n",
" <td>198503</td>\n",
" <td>3</td>\n",
" <td>213901</td>\n",
" <td>174689.0</td>\n",
" <td>253113.0</td>\n",
" <td>388</td>\n",
" <td>317.0</td>\n",
" <td>459.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2109</th>\n",
" <td>198502</td>\n",
" <td>3</td>\n",
" <td>97586</td>\n",
" <td>80949.0</td>\n",
" <td>114223.0</td>\n",
" <td>177</td>\n",
" <td>147.0</td>\n",
" <td>207.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2110</th>\n",
" <td>198501</td>\n",
" <td>3</td>\n",
" <td>85489</td>\n",
" <td>65918.0</td>\n",
" <td>105060.0</td>\n",
" <td>155</td>\n",
" <td>120.0</td>\n",
" <td>190.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2111</th>\n",
" <td>198452</td>\n",
" <td>3</td>\n",
" <td>84830</td>\n",
" <td>60602.0</td>\n",
" <td>109058.0</td>\n",
" <td>154</td>\n",
" <td>110.0</td>\n",
" <td>198.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2112</th>\n",
" <td>198451</td>\n",
" <td>3</td>\n",
" <td>101726</td>\n",
" <td>80242.0</td>\n",
" <td>123210.0</td>\n",
" <td>185</td>\n",
" <td>146.0</td>\n",
" <td>224.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2113</th>\n",
" <td>198450</td>\n",
" <td>3</td>\n",
" <td>123680</td>\n",
" <td>101401.0</td>\n",
" <td>145959.0</td>\n",
" <td>225</td>\n",
" <td>184.0</td>\n",
" <td>266.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2114</th>\n",
" <td>198449</td>\n",
" <td>3</td>\n",
" <td>101073</td>\n",
" <td>81684.0</td>\n",
" <td>120462.0</td>\n",
" <td>184</td>\n",
" <td>149.0</td>\n",
" <td>219.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2115</th>\n",
" <td>198448</td>\n",
" <td>3</td>\n",
" <td>78620</td>\n",
" <td>60634.0</td>\n",
" <td>96606.0</td>\n",
" <td>143</td>\n",
" <td>110.0</td>\n",
" <td>176.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2116</th>\n",
" <td>198447</td>\n",
" <td>3</td>\n",
" <td>72029</td>\n",
" <td>54274.0</td>\n",
" <td>89784.0</td>\n",
" <td>131</td>\n",
" <td>99.0</td>\n",
" <td>163.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2117</th>\n",
" <td>198446</td>\n",
" <td>3</td>\n",
" <td>87330</td>\n",
" <td>67686.0</td>\n",
" <td>106974.0</td>\n",
" <td>159</td>\n",
" <td>123.0</td>\n",
" <td>195.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2118</th>\n",
" <td>198445</td>\n",
" <td>3</td>\n",
" <td>135223</td>\n",
" <td>101414.0</td>\n",
" <td>169032.0</td>\n",
" <td>246</td>\n",
" <td>184.0</td>\n",
" <td>308.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2119</th>\n",
" <td>198444</td>\n",
" <td>3</td>\n",
" <td>68422</td>\n",
" <td>20056.0</td>\n",
" <td>116788.0</td>\n",
" <td>125</td>\n",
" <td>37.0</td>\n",
" <td>213.0</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>2119 rows × 10 columns</p>\n",
"</div>"
],
"text/plain": [
" week indicator inc inc_low inc_up inc100 inc100_low \\\n",
"0 202524 3 22816 17621.0 28011.0 34 26.0 \n",
"1 202523 3 24564 19382.0 29746.0 37 29.0 \n",
"2 202522 3 18755 14333.0 23177.0 28 21.0 \n",
"3 202521 3 23760 18671.0 28849.0 35 27.0 \n",
"4 202520 3 20265 15814.0 24716.0 30 23.0 \n",
"5 202519 3 16264 12394.0 20134.0 24 18.0 \n",
"6 202518 3 18115 13975.0 22255.0 27 21.0 \n",
"7 202517 3 22150 17291.0 27009.0 33 26.0 \n",
"8 202516 3 28564 22550.0 34578.0 43 34.0 \n",
"9 202515 3 35721 29592.0 41850.0 53 44.0 \n",
"10 202514 3 37579 31232.0 43926.0 56 47.0 \n",
"11 202513 3 39673 33686.0 45660.0 59 50.0 \n",
"12 202512 3 52543 45627.0 59459.0 78 68.0 \n",
"13 202511 3 59469 52154.0 66784.0 89 78.0 \n",
"14 202510 3 60334 53048.0 67620.0 90 79.0 \n",
"15 202509 3 84531 74994.0 94068.0 126 112.0 \n",
"16 202508 3 136020 124824.0 147216.0 203 186.0 \n",
"17 202507 3 208952 195988.0 221916.0 312 293.0 \n",
"18 202506 3 273519 258159.0 288879.0 408 385.0 \n",
"19 202505 3 334395 318416.0 350374.0 499 475.0 \n",
"20 202504 3 350043 332885.0 367201.0 522 496.0 \n",
"21 202503 3 252772 238917.0 266627.0 377 356.0 \n",
"22 202502 3 257247 242991.0 271503.0 384 363.0 \n",
"23 202501 3 231549 214627.0 248471.0 345 320.0 \n",
"24 202452 3 201726 185870.0 217582.0 302 278.0 \n",
"25 202451 3 201697 187843.0 215551.0 302 281.0 \n",
"26 202450 3 136694 126369.0 147019.0 205 190.0 \n",
"27 202449 3 108487 99037.0 117937.0 163 149.0 \n",
"28 202448 3 87381 78687.0 96075.0 131 118.0 \n",
"29 202447 3 76286 67626.0 84946.0 114 101.0 \n",
"... ... ... ... ... ... ... ... \n",
"2090 198521 3 26096 19621.0 32571.0 47 35.0 \n",
"2091 198520 3 27896 20885.0 34907.0 51 38.0 \n",
"2092 198519 3 43154 32821.0 53487.0 78 59.0 \n",
"2093 198518 3 40555 29935.0 51175.0 74 55.0 \n",
"2094 198517 3 34053 24366.0 43740.0 62 44.0 \n",
"2095 198516 3 50362 36451.0 64273.0 91 66.0 \n",
"2096 198515 3 63881 45538.0 82224.0 116 83.0 \n",
"2097 198514 3 134545 114400.0 154690.0 244 207.0 \n",
"2098 198513 3 197206 176080.0 218332.0 357 319.0 \n",
"2099 198512 3 245240 223304.0 267176.0 445 405.0 \n",
"2100 198511 3 276205 252399.0 300011.0 501 458.0 \n",
"2101 198510 3 353231 326279.0 380183.0 640 591.0 \n",
"2102 198509 3 369895 341109.0 398681.0 670 618.0 \n",
"2103 198508 3 389886 359529.0 420243.0 707 652.0 \n",
"2104 198507 3 471852 432599.0 511105.0 855 784.0 \n",
"2105 198506 3 565825 518011.0 613639.0 1026 939.0 \n",
"2106 198505 3 637302 592795.0 681809.0 1155 1074.0 \n",
"2107 198504 3 424937 390794.0 459080.0 770 708.0 \n",
"2108 198503 3 213901 174689.0 253113.0 388 317.0 \n",
"2109 198502 3 97586 80949.0 114223.0 177 147.0 \n",
"2110 198501 3 85489 65918.0 105060.0 155 120.0 \n",
"2111 198452 3 84830 60602.0 109058.0 154 110.0 \n",
"2112 198451 3 101726 80242.0 123210.0 185 146.0 \n",
"2113 198450 3 123680 101401.0 145959.0 225 184.0 \n",
"2114 198449 3 101073 81684.0 120462.0 184 149.0 \n",
"2115 198448 3 78620 60634.0 96606.0 143 110.0 \n",
"2116 198447 3 72029 54274.0 89784.0 131 99.0 \n",
"2117 198446 3 87330 67686.0 106974.0 159 123.0 \n",
"2118 198445 3 135223 101414.0 169032.0 246 184.0 \n",
"2119 198444 3 68422 20056.0 116788.0 125 37.0 \n",
"\n",
" inc100_up geo_insee geo_name \n",
"0 42.0 FR France \n",
"1 45.0 FR France \n",
"2 35.0 FR France \n",
"3 43.0 FR France \n",
"4 37.0 FR France \n",
"5 30.0 FR France \n",
"6 33.0 FR France \n",
"7 40.0 FR France \n",
"8 52.0 FR France \n",
"9 62.0 FR France \n",
"10 65.0 FR France \n",
"11 68.0 FR France \n",
"12 88.0 FR France \n",
"13 100.0 FR France \n",
"14 101.0 FR France \n",
"15 140.0 FR France \n",
"16 220.0 FR France \n",
"17 331.0 FR France \n",
"18 431.0 FR France \n",
"19 523.0 FR France \n",
"20 548.0 FR France \n",
"21 398.0 FR France \n",
"22 405.0 FR France \n",
"23 370.0 FR France \n",
"24 326.0 FR France \n",
"25 323.0 FR France \n",
"26 220.0 FR France \n",
"27 177.0 FR France \n",
"28 144.0 FR France \n",
"29 127.0 FR France \n",
"... ... ... ... \n",
"2090 59.0 FR France \n",
"2091 64.0 FR France \n",
"2092 97.0 FR France \n",
"2093 93.0 FR France \n",
"2094 80.0 FR France \n",
"2095 116.0 FR France \n",
"2096 149.0 FR France \n",
"2097 281.0 FR France \n",
"2098 395.0 FR France \n",
"2099 485.0 FR France \n",
"2100 544.0 FR France \n",
"2101 689.0 FR France \n",
"2102 722.0 FR France \n",
"2103 762.0 FR France \n",
"2104 926.0 FR France \n",
"2105 1113.0 FR France \n",
"2106 1236.0 FR France \n",
"2107 832.0 FR France \n",
"2108 459.0 FR France \n",
"2109 207.0 FR France \n",
"2110 190.0 FR France \n",
"2111 198.0 FR France \n",
"2112 224.0 FR France \n",
"2113 266.0 FR France \n",
"2114 219.0 FR France \n",
"2115 176.0 FR France \n",
"2116 163.0 FR France \n",
"2117 195.0 FR France \n",
"2118 308.0 FR France \n",
"2119 213.0 FR France \n",
"\n",
"[2119 rows x 10 columns]"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data = raw_data.dropna().copy()\n",
"data "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"3. Convert 'week' to period "
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"def convert_week ( year_and_week_int ):\n",
" year_and_week_str = str (year_and_week_int)\n",
" year = int (year_and_week_str[: 4 ])\n",
" week = int (year_and_week_str[ 4 :])\n",
" w = isoweek.Week(year, week)\n",
" return pd.Period(w.day( 0 ), 'W' )\n",
"\n",
"data[ 'period' ] = [convert_week(yw) for yw in data[ 'week' ]] "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"4. Set 'period' as index and sort the dataset"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [],
"source": [
"sorted_data = data.set_index( 'period' ).sort_index() \n",
"# Ensure the 'inc' column is numeric\n",
"sorted_data['inc'] = pd.to_numeric(sorted_data['inc'], errors='coerce')"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1989-05-01/1989-05-07 1989-05-15/1989-05-21\n"
]
}
],
"source": [
"periods = sorted_data.index\n",
"for p1, p2 in zip (periods[:- 1 ], periods[ 1 :]):\n",
" delta = p2.to_timestamp() - p1.end_time\n",
" if delta > pd.Timedelta( '1s' ):\n",
" print (p1, p2) "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"5. Choose September 1st as the beginning of each annual period"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[Period('1985-08-26/1985-09-01', 'W-SUN'),\n",
" Period('1986-09-01/1986-09-07', 'W-SUN'),\n",
" Period('1987-08-31/1987-09-06', 'W-SUN'),\n",
" Period('1988-08-29/1988-09-04', 'W-SUN'),\n",
" Period('1989-08-28/1989-09-03', 'W-SUN'),\n",
" Period('1990-08-27/1990-09-02', 'W-SUN'),\n",
" Period('1991-08-26/1991-09-01', 'W-SUN'),\n",
" Period('1992-08-31/1992-09-06', 'W-SUN'),\n",
" Period('1993-08-30/1993-09-05', 'W-SUN'),\n",
" Period('1994-08-29/1994-09-04', 'W-SUN'),\n",
" Period('1995-08-28/1995-09-03', 'W-SUN'),\n",
" Period('1996-08-26/1996-09-01', 'W-SUN'),\n",
" Period('1997-09-01/1997-09-07', 'W-SUN'),\n",
" Period('1998-08-31/1998-09-06', 'W-SUN'),\n",
" Period('1999-08-30/1999-09-05', 'W-SUN'),\n",
" Period('2000-08-28/2000-09-03', 'W-SUN'),\n",
" Period('2001-08-27/2001-09-02', 'W-SUN'),\n",
" Period('2002-08-26/2002-09-01', 'W-SUN'),\n",
" Period('2003-09-01/2003-09-07', 'W-SUN'),\n",
" Period('2004-08-30/2004-09-05', 'W-SUN'),\n",
" Period('2005-08-29/2005-09-04', 'W-SUN'),\n",
" Period('2006-08-28/2006-09-03', 'W-SUN'),\n",
" Period('2007-08-27/2007-09-02', 'W-SUN'),\n",
" Period('2008-09-01/2008-09-07', 'W-SUN'),\n",
" Period('2009-08-31/2009-09-06', 'W-SUN'),\n",
" Period('2010-08-30/2010-09-05', 'W-SUN'),\n",
" Period('2011-08-29/2011-09-04', 'W-SUN'),\n",
" Period('2012-08-27/2012-09-02', 'W-SUN'),\n",
" Period('2013-08-26/2013-09-01', 'W-SUN'),\n",
" Period('2014-09-01/2014-09-07', 'W-SUN'),\n",
" Period('2015-08-31/2015-09-06', 'W-SUN'),\n",
" Period('2016-08-29/2016-09-04', 'W-SUN'),\n",
" Period('2017-08-28/2017-09-03', 'W-SUN'),\n",
" Period('2018-08-27/2018-09-02', 'W-SUN'),\n",
" Period('2019-08-26/2019-09-01', 'W-SUN'),\n",
" Period('2020-08-31/2020-09-06', 'W-SUN'),\n",
" Period('2021-08-30/2021-09-05', 'W-SUN'),\n",
" Period('2022-08-29/2022-09-04', 'W-SUN'),\n",
" Period('2023-08-28/2023-09-03', 'W-SUN'),\n",
" Period('2024-08-26/2024-09-01', 'W-SUN')]"
]
},
"execution_count": 46,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"first_sept_week = [pd.Period(pd.Timestamp(y, 9 , 1 ), 'W' )\n",
" for y in range ( 1985 ,\n",
" sorted_data.index[- 1 ].year)] \n",
"first_sept_week"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"6. Collect the incidence per year information"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"2021 772545\n",
"2014 1601698\n",
"1991 1663610\n",
"1995 1828304\n",
"2020 2017296\n",
"2022 2057596\n",
"2012 2183912\n",
"2003 2234514\n",
"2019 2254363\n",
"2006 2297262\n",
"2017 2322818\n",
"2001 2540826\n",
"1992 2590314\n",
"1993 2699482\n",
"2018 2701716\n",
"1988 2759663\n",
"2007 2786458\n",
"2011 2852504\n",
"2016 2859019\n",
"1987 2867464\n",
"2023 2908672\n",
"2008 2984311\n",
"1998 3047298\n",
"2002 3115484\n",
"1994 3514133\n",
"1996 3540251\n",
"2009 3558474\n",
"2004 3572810\n",
"1997 3624129\n",
"2015 3647492\n",
"2024 3691245\n",
"2000 3808190\n",
"2005 3831409\n",
"1999 3914003\n",
"2010 3992174\n",
"2013 4176872\n",
"1986 5050543\n",
"1990 5214494\n",
"1989 5461328\n",
"dtype: int64"
]
},
"execution_count": 47,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"year = []\n",
"yearly_incidence = []\n",
"for week1, week2 in zip (first_sept_week[:- 1 ],first_sept_week[ 1 :]):\n",
" one_year = sorted_data[ 'inc' ][week1:week2- 1 ]\n",
" assert abs ( len (one_year)- 52 ) < 2 \n",
" yearly_incidence.append(one_year. sum ())\n",
" year.append(week2.year)\n",
"yearly_incidence = pd.Series(data=yearly_incidence, index=year) \n",
"yearly_incidence.sort_values()"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"yearly_incidence.hist()\n",
"plt.title(\"Distribution of Yearly Incidence\")\n",
"plt.xlabel(\"Incidence\")\n",
"plt.ylabel(\"Count\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Strongest epidemic year: 1989\n",
"Weakest epidemic year: 2021\n"
]
}
],
"source": [
"strongest = yearly_incidence.idxmax()\n",
"weakest = yearly_incidence.idxmin()\n",
"print(f\"Strongest epidemic year: {strongest}\")\n",
"print(f\"Weakest epidemic year: {weakest}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment