From fda5af46f209c0e2e549303d621e39da5c64837f Mon Sep 17 00:00:00 2001 From: 971801b35ac90e89321ea0ca9d72e0b8 <971801b35ac90e89321ea0ca9d72e0b8@app-learninglab.inria.fr> Date: Tue, 11 Jun 2024 10:42:05 +0000 Subject: [PATCH] no commit message --- module3/exo2/exercice.ipynb | 191 +++++++++++++++++++++++++++++++++++- 1 file changed, 188 insertions(+), 3 deletions(-) diff --git a/module3/exo2/exercice.ipynb b/module3/exo2/exercice.ipynb index 0bbbe37..b7c2bc9 100644 --- a/module3/exo2/exercice.ipynb +++ b/module3/exo2/exercice.ipynb @@ -1,5 +1,191 @@ { - "cells": [], + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Analyse de l'incidence de la varicelle" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + " %matplotlib inline\n", + "import matplotlib.pyplot as plt\n", + "import pandas as pd\n", + "import isoweek" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hideCode": true + }, + "source": [ + " Les données de l'incidence de la varicelle sont disponibles du site Web du Réseau Sentinelles. Nous les récupérons sous forme d'un fichier en format CSV dont chaque ligne correspond à une semaine de la période demandée. Nous téléchargeons toujours le jeu de données complet, qui commence en 1984 et se termine avec une semaine récente." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "data_url = \"http://www.sentiweb.fr/datasets/incidence-PAY-7.csv\"\n", + "raw_data = pd.read_csv(data_url, encoding='iso-8859-1', skiprows=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " week indicator inc inc_low inc_up inc100 inc100_low inc100_up \\\n", + "0 202422 7 11317 7330 15304 17 11 23 \n", + "1 202421 7 9807 6926 12688 15 11 19 \n", + "2 202420 7 13661 10209 17113 20 15 25 \n", + "3 202419 7 10083 6413 13753 15 9 21 \n", + "4 202418 7 13438 9514 17362 20 14 26 \n", + "\n", + " geo_insee geo_name \n", + "0 FR France \n", + "1 FR France \n", + "2 FR France \n", + "3 FR France \n", + "4 FR France \n" + ] + } + ], + "source": [ + "print(raw_data.head())" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "data = raw_data.dropna().copy()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "def convert_week(year_and_week_int):\n", + " year_and_week_str = str(year_and_week_int)\n", + " year = int(year_and_week_str[:4])\n", + " week = int(year_and_week_str[4:])\n", + " w = isoweek.Week(year, week)\n", + " return pd.Period(w.day(0), 'W')" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "data['period'] = [convert_week(yw) for yw in data['week']]" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "sorted_data = data.set_index('period').sort_index()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "periods = sorted_data.index\n", + "for p1, p2 in zip(periods[:-1], periods[1:]):\n", + " delta = p2.to_timestamp() - p1.end_time\n", + " if delta > pd.Timedelta('1s'):\n", + " print(p1, p2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sorted_data['inc'].plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sorted_data['inc'][-200:].plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "first_september_week = [pd.Period(pd.Timestamp(y, 9, 1), 'W')\n", + " for y in range(1985, sorted_data.index[-1].year)]\n", + "\n", + "year = []\n", + "yearly_incidence = []\n", + "for week1, week2 in zip(first_august_week[:-1], first_august_week[1:]):\n", + " one_year = sorted_data['inc'][week1:week2-1]\n", + " assert abs(len(one_year)-52) < 2\n", + " yearly_incidence.append(one_year.sum())\n", + " year.append(week2.year)\n", + "yearly_incidence = pd.Series(data=yearly_incidence, index=year)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "yearly_incidence.plot(style='*')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "yearly_incidence.sort_values()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "yearly_incidence.hist(xrot=20)" + ] + } + ], "metadata": { "kernelspec": { "display_name": "Python 3", @@ -16,10 +202,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.3" + "version": "3.6.4" } }, "nbformat": 4, "nbformat_minor": 2 } - -- 2.18.1