change

parent 4fbe9fcd
{
"cells": [],
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"%matplotlib inline\n",
"import matplotlib.pyplot as plt\n",
"import pandas as pd\n",
"import isoweek"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [],
"source": [
"data_url = \"https://www.sentiweb.fr/datasets/incidence-PAY-7.csv\""
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>week</th>\n",
" <th>indicator</th>\n",
" <th>inc</th>\n",
" <th>inc_low</th>\n",
" <th>inc_up</th>\n",
" <th>inc100</th>\n",
" <th>inc100_low</th>\n",
" <th>inc100_up</th>\n",
" <th>geo_insee</th>\n",
" <th>geo_name</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>202109</td>\n",
" <td>7</td>\n",
" <td>11766</td>\n",
" <td>8111</td>\n",
" <td>15421</td>\n",
" <td>18</td>\n",
" <td>12</td>\n",
" <td>24</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>202108</td>\n",
" <td>7</td>\n",
" <td>11382</td>\n",
" <td>8422</td>\n",
" <td>14342</td>\n",
" <td>17</td>\n",
" <td>13</td>\n",
" <td>21</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>202107</td>\n",
" <td>7</td>\n",
" <td>13561</td>\n",
" <td>10315</td>\n",
" <td>16807</td>\n",
" <td>21</td>\n",
" <td>16</td>\n",
" <td>26</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>202106</td>\n",
" <td>7</td>\n",
" <td>13401</td>\n",
" <td>9810</td>\n",
" <td>16992</td>\n",
" <td>20</td>\n",
" <td>15</td>\n",
" <td>25</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>202105</td>\n",
" <td>7</td>\n",
" <td>12210</td>\n",
" <td>8988</td>\n",
" <td>15432</td>\n",
" <td>18</td>\n",
" <td>13</td>\n",
" <td>23</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" week indicator inc inc_low inc_up inc100 inc100_low inc100_up \\\n",
"0 202109 7 11766 8111 15421 18 12 24 \n",
"1 202108 7 11382 8422 14342 17 13 21 \n",
"2 202107 7 13561 10315 16807 21 16 26 \n",
"3 202106 7 13401 9810 16992 20 15 25 \n",
"4 202105 7 12210 8988 15432 18 13 23 \n",
"\n",
" geo_insee geo_name \n",
"0 FR France \n",
"1 FR France \n",
"2 FR France \n",
"3 FR France \n",
"4 FR France "
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"raw_data = pd.read_csv(data_url, skiprows=1)\n",
"raw_data.head()"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [],
"source": [
"data = raw_data.dropna().copy()\n"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [],
"source": [
"def convert_week(year_and_week_int):\n",
" year_and_week_str = str(year_and_week_int)\n",
" year = int(year_and_week_str[:4])\n",
" week = int(year_and_week_str[4:])\n",
" w = isoweek.Week(year, week)\n",
" return pd.Period(w.day(0), 'W')\n",
"\n",
"data['period'] = [convert_week(yw) for yw in data['week']]"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>week</th>\n",
" <th>indicator</th>\n",
" <th>inc</th>\n",
" <th>inc_low</th>\n",
" <th>inc_up</th>\n",
" <th>inc100</th>\n",
" <th>inc100_low</th>\n",
" <th>inc100_up</th>\n",
" <th>geo_insee</th>\n",
" <th>geo_name</th>\n",
" <th>period</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>202109</td>\n",
" <td>7</td>\n",
" <td>11766</td>\n",
" <td>8111</td>\n",
" <td>15421</td>\n",
" <td>18</td>\n",
" <td>12</td>\n",
" <td>24</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" <td>2021-03-01/2021-03-07</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>202108</td>\n",
" <td>7</td>\n",
" <td>11382</td>\n",
" <td>8422</td>\n",
" <td>14342</td>\n",
" <td>17</td>\n",
" <td>13</td>\n",
" <td>21</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" <td>2021-02-22/2021-02-28</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>202107</td>\n",
" <td>7</td>\n",
" <td>13561</td>\n",
" <td>10315</td>\n",
" <td>16807</td>\n",
" <td>21</td>\n",
" <td>16</td>\n",
" <td>26</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" <td>2021-02-15/2021-02-21</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>202106</td>\n",
" <td>7</td>\n",
" <td>13401</td>\n",
" <td>9810</td>\n",
" <td>16992</td>\n",
" <td>20</td>\n",
" <td>15</td>\n",
" <td>25</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" <td>2021-02-08/2021-02-14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>202105</td>\n",
" <td>7</td>\n",
" <td>12210</td>\n",
" <td>8988</td>\n",
" <td>15432</td>\n",
" <td>18</td>\n",
" <td>13</td>\n",
" <td>23</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" <td>2021-02-01/2021-02-07</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" week indicator inc inc_low inc_up inc100 inc100_low inc100_up \\\n",
"0 202109 7 11766 8111 15421 18 12 24 \n",
"1 202108 7 11382 8422 14342 17 13 21 \n",
"2 202107 7 13561 10315 16807 21 16 26 \n",
"3 202106 7 13401 9810 16992 20 15 25 \n",
"4 202105 7 12210 8988 15432 18 13 23 \n",
"\n",
" geo_insee geo_name period \n",
"0 FR France 2021-03-01/2021-03-07 \n",
"1 FR France 2021-02-22/2021-02-28 \n",
"2 FR France 2021-02-15/2021-02-21 \n",
"3 FR France 2021-02-08/2021-02-14 \n",
"4 FR France 2021-02-01/2021-02-07 "
]
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.head()"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [],
"source": [
"sorted_data = data.set_index('period').sort_index()"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [],
"source": [
"first_august_week = [pd.Period(pd.Timestamp(y, 9, 1), 'W')\n",
" for y in range(1991,\n",
" sorted_data.index[-1].year)]"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.axes._subplots.AxesSubplot at 0x7f097c6a2b00>"
]
},
"execution_count": 48,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"sorted_data['inc'].plot()"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>week</th>\n",
" <th>indicator</th>\n",
" <th>inc</th>\n",
" <th>inc_low</th>\n",
" <th>inc_up</th>\n",
" <th>inc100</th>\n",
" <th>inc100_low</th>\n",
" <th>inc100_up</th>\n",
" <th>geo_insee</th>\n",
" <th>geo_name</th>\n",
" </tr>\n",
" <tr>\n",
" <th>period</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1990-12-03/1990-12-09</th>\n",
" <td>199049</td>\n",
" <td>7</td>\n",
" <td>1143</td>\n",
" <td>0</td>\n",
" <td>2610</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>5</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1990-12-10/1990-12-16</th>\n",
" <td>199050</td>\n",
" <td>7</td>\n",
" <td>11079</td>\n",
" <td>6660</td>\n",
" <td>15498</td>\n",
" <td>20</td>\n",
" <td>12</td>\n",
" <td>28</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1990-12-17/1990-12-23</th>\n",
" <td>199051</td>\n",
" <td>7</td>\n",
" <td>19080</td>\n",
" <td>13807</td>\n",
" <td>24353</td>\n",
" <td>34</td>\n",
" <td>25</td>\n",
" <td>43</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1990-12-24/1990-12-30</th>\n",
" <td>199052</td>\n",
" <td>7</td>\n",
" <td>19375</td>\n",
" <td>13295</td>\n",
" <td>25455</td>\n",
" <td>34</td>\n",
" <td>23</td>\n",
" <td>45</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1990-12-31/1991-01-06</th>\n",
" <td>199101</td>\n",
" <td>7</td>\n",
" <td>15565</td>\n",
" <td>10271</td>\n",
" <td>20859</td>\n",
" <td>27</td>\n",
" <td>18</td>\n",
" <td>36</td>\n",
" <td>FR</td>\n",
" <td>France</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" week indicator inc inc_low inc_up inc100 \\\n",
"period \n",
"1990-12-03/1990-12-09 199049 7 1143 0 2610 2 \n",
"1990-12-10/1990-12-16 199050 7 11079 6660 15498 20 \n",
"1990-12-17/1990-12-23 199051 7 19080 13807 24353 34 \n",
"1990-12-24/1990-12-30 199052 7 19375 13295 25455 34 \n",
"1990-12-31/1991-01-06 199101 7 15565 10271 20859 27 \n",
"\n",
" inc100_low inc100_up geo_insee geo_name \n",
"period \n",
"1990-12-03/1990-12-09 0 5 FR France \n",
"1990-12-10/1990-12-16 12 28 FR France \n",
"1990-12-17/1990-12-23 25 43 FR France \n",
"1990-12-24/1990-12-30 23 45 FR France \n",
"1990-12-31/1991-01-06 18 36 FR France "
]
},
"execution_count": 49,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sorted_data.head()"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {},
"outputs": [],
"source": [
"year = []\n",
"yearly_incidence = []\n",
"for week1, week2 in zip(first_august_week[:-1],\n",
" first_august_week[1:]):\n",
" one_year = sorted_data['inc'][week1:week2-1]\n",
" #assert abs(len(one_year)-52) < 2\n",
" yearly_incidence.append(one_year.sum())\n",
" year.append(week2.year)\n",
"yearly_incidence = pd.Series(data=yearly_incidence, index=year)"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"2020 221186\n",
"2002 516689\n",
"2018 542312\n",
"2017 551041\n",
"1996 564901\n",
"2019 584066\n",
"2015 604382\n",
"2000 617597\n",
"2001 619041\n",
"2012 624573\n",
"2005 628464\n",
"2006 632833\n",
"2011 642368\n",
"1993 643387\n",
"1995 652478\n",
"1994 661409\n",
"1998 677775\n",
"1997 683434\n",
"2014 685769\n",
"2013 698332\n",
"2007 717352\n",
"2008 749478\n",
"1999 756456\n",
"2003 758363\n",
"2004 777388\n",
"2016 782114\n",
"2010 829911\n",
"1992 832939\n",
"2009 842373\n",
"dtype: int64"
]
},
"execution_count": 51,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"yearly_incidence.sort_values()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
......@@ -16,10 +609,9 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.3"
"version": "3.6.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment