# Excercise

In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
from isoweek import Week
import os

In [2]:
#url = "https://www.sentiweb.fr/datasets/all/inc-7-PAY.csv"
data_url = "https://www.sentiweb.fr/datasets/all/inc-7-REG.csv"
#url = "https://www.sentiweb.fr/datasets/all/inc-7-RDD.csv"
local_FileName="chickenpox_data.csv"
df = pd.read_csv(data_url, skiprows=1)

In [3]:
df = df[df["indicator"] == 7]

In [4]:
# 3. Split the weeks on the year and its number
df["year"] = df["week"] // 100
df["week_num"] = df["week"] % 100

In [5]:
#Transform into the date (Monday)
df["week_date"] = df["week"].apply(lambda x: Week(x // 100, x % 100).monday())
df["week_date"] = pd.to_datetime(df["week_date"])  # for comparison

In [6]:
# Into number
df["inc100"] = pd.to_numeric(df["inc100"], errors="coerce")

In [7]:
# List of the weeks, from the 1st September
first_septembers = [pd.Period(pd.Timestamp(y, 9, 1), 'W') for y in range(1990, 2025)]

In [8]:
# Computation for epidemiologic year
years = []
sums = []

for w1, w2 in zip(first_septembers[:-1], first_septembers[1:]):
    start = w1.start_time
    end = w2.start_time
    one_year = df[(df["week_date"] >= start) & (df["week_date"] < end)]
    if len(one_year) >= 50:  # фильтруем неполные годы
        years.append(start.year)
        sums.append(one_year["inc100"].sum())

In [9]:
epidemic_years = pd.Series(data=sums, index=years).sort_values(ascending=False)
print("the incidence of chickenpox for epidemiologic year:\n")
for year, value in epidemic_years.items():
    print(f"{year}: {int(value)}")

the incidence of chickenpox for epidemiologic year:

2008: 29521
1994: 25681
2009: 25590
1991: 24810
1998: 24759
1997: 23680
1996: 23621
1993: 23230
1992: 22850
2015: 22719
2007: 21958
2012: 21331
2003: 21146
2010: 20752
2006: 20546
1995: 19521
2014: 19310
2018: 18776
2021: 18749
2013: 18635
2004: 18493
2002: 18241
2005: 17768
2011: 17515
2017: 17398
1999: 17049
1990: 16994
2000: 16203
2016: 16014
2023: 13535
2001: 12499
2020: 11226
2022: 10647
2019: 7264


In [10]:
max_year = epidemic_years.idxmax()
min_year = epidemic_years.idxmin()
print(f"\n Max Epidemic Year: {max_year} – {int(epidemic_years[max_year])}")
print(f"\n Min Epidemic year: {min_year} – {int(epidemic_years[min_year])}")


 Max Epidemic Year: 2008 – 29521

 Min Epidemic year: 2019 – 7264


In [11]:
The_first_REG = pd.read_csv("https://www.sentiweb.fr/datasets/all/inc-7-REG.csv", skiprows=1)
The_first_REG["inc100"] = pd.to_numeric(The_first_REG["inc100"], errors="coerce")
The_first_REG["year"] = The_first_REG["week"] // 100

In [12]:
Annual = The_first_REG.groupby("year")["inc100"].sum().sort_values(ascending=False)
for y in [1990, 2025, 2020]:
    count = df[(df["week_date"] >= pd.Timestamp(y, 9, 1)) &
               (df["week_date"] < pd.Timestamp(y + 1, 9, 1))].shape[0]
    print(f"{y} — недель: {count}")
    
Annual = Annual[~Annual.index.isin([1990, 2025])]

print("\n the incidence of chickenpox by regions:\n")
for year, value in Annual.items():
    print(f"{year}: {int(value)}")

print(f"\n Max Epidemic year: {Annual.idxmax()} – {int(Annual.max())}")
print(f"\n Min Epidemic year: {Annual.idxmin()} – {int(Annual.min())}")

1990 — недель: 897
2025 — недель: 0
2020 — недель: 1196

 the incidence of chickenpox by regions:

2009: 30896
1998: 27161
1992: 26497
1995: 26191
2010: 24228
2004: 23363
2007: 22874
1994: 22734
1996: 21782
1993: 21685
2008: 21420
1997: 21036
2016: 20990
1999: 20701
2011: 20348
2015: 20197
1991: 19827
2012: 19662
2005: 18848
2014: 18820
2013: 18678
2019: 18199
2000: 17347
2018: 17085
2022: 16848
2017: 16814
2003: 16580
2006: 15106
2002: 14786
2001: 14162
2024: 12748
2021: 12574
2023: 10760
2020: 7332

 Max Epidemic year: 2009 – 30896

 Min Epidemic year: 2020 – 7332
