# Analyse de l'incidence de la varicelle

In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import isoweek
import os

Les données de l'incidence du syndrome grippal sont disponibles du site Web du Réseau Sentinelles. Nous les récupérons sous forme d'un fichier en format CSV dont chaque ligne correspond à une semaine de la période demandée. Nous téléchargeons toujours le jeu de données complet, qui commence en 1984 et se termine avec une semaine récente.

In [2]:
remote_url = "https://www.sentiweb.fr/datasets/incidence-PAY-7.csv"
local_path = r"\module3\exo3\incidence-PAY-7.csv"
if os.path.exists(local_path):
    data_url = local_path
    raw_data = pd.read_csv(local_path, skiprows=1, comment='#')
else:
    raw_data = pd.read_csv(remote_url, skiprows=1, comment='#')
    data.to_csv(local_path)
    data_url = local_path

In [3]:
raw_data = raw_data.dropna(axis=1)
raw_data.head()

Unnamed: 0,0,week,indicator,inc,inc_low,inc_up,inc100,inc100_low,inc100_up,geo_insee,geo_name
0,1,202335,7,943,0,1986,1,0,3,FR,France
1,2,202334,7,1179,11,2347,2,0,4,FR,France
2,3,202333,7,3308,1184,5432,5,2,8,FR,France
3,4,202332,7,7996,1120,14872,12,2,22,FR,France
4,5,202331,7,3318,1398,5238,5,2,8,FR,France
5,6,202330,7,5821,3269,8373,9,5,13,FR,France
6,7,202329,7,13558,8297,18819,20,12,28,FR,France
7,8,202328,7,6700,4043,9357,10,6,14,FR,France
8,9,202327,7,7253,4599,9907,11,7,15,FR,France
9,10,202326,7,9192,6223,12161,14,10,18,FR,France


On s'assure qu'il ny a pas de valeurs manquantes

In [4]:
raw_data[raw_data.isnull().any(axis=1)]

Unnamed: 0,0,week,indicator,inc,inc_low,inc_up,inc100,inc100_low,inc100_up,geo_insee,geo_name


In [6]:
data = raw_data.dropna().copy()
data.head()

Unnamed: 0,0,week,indicator,inc,inc_low,inc_up,inc100,inc100_low,inc100_up,geo_insee,geo_name
0,1,202335,7,943,0,1986,1,0,3,FR,France
1,2,202334,7,1179,11,2347,2,0,4,FR,France
2,3,202333,7,3308,1184,5432,5,2,8,FR,France
3,4,202332,7,7996,1120,14872,12,2,22,FR,France
4,5,202331,7,3318,1398,5238,5,2,8,FR,France


In [None]:
def convert_week(year_and_week_int):
    year_and_week_str = str(year_and_week_int)
    year = int(year_and_week_str[:4])
    week = int(year_and_week_str[4:])
    w = isoweek.Week(year, week)
    return pd.Period(w.day(0), 'W')

data['period'] = [convert_week(yw) for yw in data['week']]