no commit message

parent 8a5ef274
{
"cells": [],
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Analyse de l'incidence de la varicelle"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
" %matplotlib inline\n",
"import matplotlib.pyplot as plt\n",
"import pandas as pd\n",
"import isoweek"
]
},
{
"cell_type": "markdown",
"metadata": {
"hideCode": true
},
"source": [
" Les données de l'incidence de la varicelle sont disponibles du site Web du Réseau Sentinelles. Nous les récupérons sous forme d'un fichier en format CSV dont chaque ligne correspond à une semaine de la période demandée. Nous téléchargeons toujours le jeu de données complet, qui commence en 1984 et se termine avec une semaine récente."
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"data_url = \"http://www.sentiweb.fr/datasets/incidence-PAY-7.csv\"\n",
"raw_data = pd.read_csv(data_url, encoding='iso-8859-1', skiprows=1)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" week indicator inc inc_low inc_up inc100 inc100_low inc100_up \\\n",
"0 202422 7 11317 7330 15304 17 11 23 \n",
"1 202421 7 9807 6926 12688 15 11 19 \n",
"2 202420 7 13661 10209 17113 20 15 25 \n",
"3 202419 7 10083 6413 13753 15 9 21 \n",
"4 202418 7 13438 9514 17362 20 14 26 \n",
"\n",
" geo_insee geo_name \n",
"0 FR France \n",
"1 FR France \n",
"2 FR France \n",
"3 FR France \n",
"4 FR France \n"
]
}
],
"source": [
"print(raw_data.head())"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"data = raw_data.dropna().copy()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"def convert_week(year_and_week_int):\n",
" year_and_week_str = str(year_and_week_int)\n",
" year = int(year_and_week_str[:4])\n",
" week = int(year_and_week_str[4:])\n",
" w = isoweek.Week(year, week)\n",
" return pd.Period(w.day(0), 'W')"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"data['period'] = [convert_week(yw) for yw in data['week']]"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"sorted_data = data.set_index('period').sort_index()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"periods = sorted_data.index\n",
"for p1, p2 in zip(periods[:-1], periods[1:]):\n",
" delta = p2.to_timestamp() - p1.end_time\n",
" if delta > pd.Timedelta('1s'):\n",
" print(p1, p2)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"sorted_data['inc'].plot()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"sorted_data['inc'][-200:].plot()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"first_september_week = [pd.Period(pd.Timestamp(y, 9, 1), 'W')\n",
" for y in range(1985, sorted_data.index[-1].year)]\n",
"\n",
"year = []\n",
"yearly_incidence = []\n",
"for week1, week2 in zip(first_august_week[:-1], first_august_week[1:]):\n",
" one_year = sorted_data['inc'][week1:week2-1]\n",
" assert abs(len(one_year)-52) < 2\n",
" yearly_incidence.append(one_year.sum())\n",
" year.append(week2.year)\n",
"yearly_incidence = pd.Series(data=yearly_incidence, index=year)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"yearly_incidence.plot(style='*')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"yearly_incidence.sort_values()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"yearly_incidence.hist(xrot=20)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
......@@ -16,10 +202,9 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.3"
"version": "3.6.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment