# varicelle

Les données de l'incidence la varicelle sont disponibles du site Web du Réseau Sentinelles. Nous les récupérons sous forme d'un fichier en format CSV dont chaque ligne correspond à une semaine de la période demandée. Nous téléchargeons toujours le jeu de données complet, qui commence en 1999 semaine 49 et se termine avec une semaine récente : 2022 semaine 6.


In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import isoweek

In [3]:
data_url = "http://www.sentiweb.fr/datasets/incidence-PAY-7.csv"

In [4]:
raw_data = pd.read_csv(data_url, skiprows=1)
type(raw_data)

pandas.core.frame.DataFrame

In [5]:
len(raw_data)

1628

In [7]:
raw_data[:5]

Unnamed: 0,week,indicator,inc,inc_low,inc_up,inc100,inc100_low,inc100_up,geo_insee,geo_name
0,202206,7,10414,7128,13700,16,11,21,FR,France
1,202205,7,10866,7758,13974,16,11,21,FR,France
2,202204,7,9547,6721,12373,14,10,18,FR,France
3,202203,7,13972,10680,17264,21,16,26,FR,France
4,202202,7,8495,6026,10964,13,9,17,FR,France


In [8]:
raw_data[-5:]

Unnamed: 0,week,indicator,inc,inc_low,inc_up,inc100,inc100_low,inc100_up,geo_insee,geo_name
1623,199101,7,15565,10271,20859,27,18,36,FR,France
1624,199052,7,19375,13295,25455,34,23,45,FR,France
1625,199051,7,19080,13807,24353,34,25,43,FR,France
1626,199050,7,11079,6660,15498,20,12,28,FR,France
1627,199049,7,1143,0,2610,2,0,5,FR,France


In [9]:
raw_data[raw_data.isnull().any(axis=1)]

Unnamed: 0,week,indicator,inc,inc_low,inc_up,inc100,inc100_low,inc100_up,geo_insee,geo_name


In [11]:
data = raw_data.copy()

In [13]:
def convert_week(year_and_week_int):
    year_and_week_str = str(year_and_week_int)
    year = int(year_and_week_str[:4])
    week = int(year_and_week_str[4:])
    w = isoweek.Week(year, week)
    return pd.Period(w.day(0), 'W')

data['period'] = [convert_week(yw) for yw in data['week']]
len(data)

1628

In [16]:
data[:5]

Unnamed: 0,week,indicator,inc,inc_low,inc_up,inc100,inc100_low,inc100_up,geo_insee,geo_name,period
0,202206,7,10414,7128,13700,16,11,21,FR,France,2022-02-07/2022-02-13
1,202205,7,10866,7758,13974,16,11,21,FR,France,2022-01-31/2022-02-06
2,202204,7,9547,6721,12373,14,10,18,FR,France,2022-01-24/2022-01-30
3,202203,7,13972,10680,17264,21,16,26,FR,France,2022-01-17/2022-01-23
4,202202,7,8495,6026,10964,13,9,17,FR,France,2022-01-10/2022-01-16


In [17]:
data[-5:]

Unnamed: 0,week,indicator,inc,inc_low,inc_up,inc100,inc100_low,inc100_up,geo_insee,geo_name,period
1623,199101,7,15565,10271,20859,27,18,36,FR,France,1990-12-31/1991-01-06
1624,199052,7,19375,13295,25455,34,23,45,FR,France,1990-12-24/1990-12-30
1625,199051,7,19080,13807,24353,34,25,43,FR,France,1990-12-17/1990-12-23
1626,199050,7,11079,6660,15498,20,12,28,FR,France,1990-12-10/1990-12-16
1627,199049,7,1143,0,2610,2,0,5,FR,France,1990-12-03/1990-12-09


In [15]:
sorted_data = data.set_index('period').sort_index()

In [20]:
sorted_data[:5]

Unnamed: 0_level_0,week,indicator,inc,inc_low,inc_up,inc100,inc100_low,inc100_up,geo_insee,geo_name
period,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1990-12-03/1990-12-09,199049,7,1143,0,2610,2,0,5,FR,France
1990-12-10/1990-12-16,199050,7,11079,6660,15498,20,12,28,FR,France
1990-12-17/1990-12-23,199051,7,19080,13807,24353,34,25,43,FR,France
1990-12-24/1990-12-30,199052,7,19375,13295,25455,34,23,45,FR,France
1990-12-31/1991-01-06,199101,7,15565,10271,20859,27,18,36,FR,France


In [19]:
sorted_data[-5:]

Unnamed: 0_level_0,week,indicator,inc,inc_low,inc_up,inc100,inc100_low,inc100_up,geo_insee,geo_name
period,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2022-01-10/2022-01-16,202202,7,8495,6026,10964,13,9,17,FR,France
2022-01-17/2022-01-23,202203,7,13972,10680,17264,21,16,26,FR,France
2022-01-24/2022-01-30,202204,7,9547,6721,12373,14,10,18,FR,France
2022-01-31/2022-02-06,202205,7,10866,7758,13974,16,11,21,FR,France
2022-02-07/2022-02-13,202206,7,10414,7128,13700,16,11,21,FR,France


In [22]:
periods = sorted_data.index
cpt = 0
for p1, p2 in zip(periods[:-1], periods[1:]):
    delta = p2.to_timestamp() - p1.end_time
    if delta > pd.Timedelta('1s'):
        print(p1, p2)
print(f'cpt = {cpt}')

cpt = 0


In [26]:
p1

Period('2022-01-31/2022-02-06', 'W-SUN')