Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
M
mooc-rr
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
f8dc60cab5180566667b00ce62a51ae7
mooc-rr
Commits
ceb6fc6e
Commit
ceb6fc6e
authored
Jun 22, 2025
by
f8dc60cab5180566667b00ce62a51ae7
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
no commit message
parent
af142e4e
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
311 additions
and
0 deletions
+311
-0
.ipynb
module3/exo2/.ipynb
+311
-0
No files found.
module3/exo2/.ipynb
0 → 100644
View file @
ceb6fc6e
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Excercise"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [],
"source": [
"%matplotlib inline\n",
"import matplotlib.pyplot as plt\n",
"import pandas as pd\n",
"from isoweek import Week\n",
"import os"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [],
"source": [
"#url = \"https://www.sentiweb.fr/datasets/all/inc-7-PAY.csv\"\n",
"data_url = \"https://www.sentiweb.fr/datasets/all/inc-7-REG.csv\"\n",
"#url = \"https://www.sentiweb.fr/datasets/all/inc-7-RDD.csv\"\n",
"local_FileName=\"chickenpox_data.csv\"\n",
"df = pd.read_csv(data_url, skiprows=1)"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [],
"source": [
"df = df[df[\"indicator\"] == 7]"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [],
"source": [
"# 3. Split the weeks on the year and its number\n",
"df[\"year\"] = df[\"week\"] // 100\n",
"df[\"week_num\"] = df[\"week\"] % 100"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [],
"source": [
"#Transform into the date (Monday)\n",
"df[\"week_date\"] = df[\"week\"].apply(lambda x: Week(x // 100, x % 100).monday())\n",
"df[\"week_date\"] = pd.to_datetime(df[\"week_date\"]) # for comparison"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {},
"outputs": [],
"source": [
"# Into number\n",
"df[\"inc100\"] = pd.to_numeric(df[\"inc100\"], errors=\"coerce\")"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [],
"source": [
"# List of the weeks, from the 1st September\n",
"first_septembers = [pd.Period(pd.Timestamp(y, 9, 1), 'W') for y in range(1990, 2025)]"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [],
"source": [
"# Computation for epidemiologic year\n",
"years = []\n",
"sums = []\n",
"\n",
"for w1, w2 in zip(first_septembers[:-1], first_septembers[1:]):\n",
" start = w1.start_time\n",
" end = w2.start_time\n",
" one_year = df[(df[\"week_date\"] >= start) & (df[\"week_date\"] < end)]\n",
" if len(one_year) >= 50: # фильтруем неполные годы\n",
" years.append(start.year)\n",
" sums.append(one_year[\"inc100\"].sum())"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"the incidence of chickenpox for epidemiologic year:\n",
"\n",
"2008: 29521\n",
"1994: 25681\n",
"2009: 25590\n",
"1991: 24810\n",
"1998: 24759\n",
"1997: 23680\n",
"1996: 23621\n",
"1993: 23230\n",
"1992: 22850\n",
"2015: 22719\n",
"2007: 21958\n",
"2012: 21331\n",
"2003: 21146\n",
"2010: 20752\n",
"2006: 20546\n",
"1995: 19521\n",
"2014: 19310\n",
"2018: 18776\n",
"2021: 18749\n",
"2013: 18635\n",
"2004: 18493\n",
"2002: 18241\n",
"2005: 17768\n",
"2011: 17515\n",
"2017: 17398\n",
"1999: 17049\n",
"1990: 16994\n",
"2000: 16203\n",
"2016: 16014\n",
"2023: 13535\n",
"2001: 12499\n",
"2020: 11226\n",
"2022: 10647\n",
"2019: 7264\n"
]
}
],
"source": [
"epidemic_years = pd.Series(data=sums, index=years).sort_values(ascending=False)\n",
"print(\"the incidence of chickenpox for epidemiologic year:\\n\")\n",
"for year, value in epidemic_years.items():\n",
" print(f\"{year}: {int(value)}\")"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
" Max Epidemic Year: 2008 – 29521\n",
"\n",
" Min Epidemic year: 2019 – 7264\n"
]
}
],
"source": [
"max_year = epidemic_years.idxmax()\n",
"min_year = epidemic_years.idxmin()\n",
"print(f\"\\n Max Epidemic Year: {max_year} – {int(epidemic_years[max_year])}\")\n",
"print(f\"\\n Min Epidemic year: {min_year} – {int(epidemic_years[min_year])}\")"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {},
"outputs": [],
"source": [
"The_first_REG = pd.read_csv(\"https://www.sentiweb.fr/datasets/all/inc-7-REG.csv\", skiprows=1)\n",
"The_first_REG[\"inc100\"] = pd.to_numeric(The_first_REG[\"inc100\"], errors=\"coerce\")\n",
"The_first_REG[\"year\"] = The_first_REG[\"week\"] // 100"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1990 — недель: 897\n",
"2025 — недель: 0\n",
"2020 — недель: 1196\n",
"\n",
" the incidence of chickenpox by regions:\n",
"\n",
"2009: 30896\n",
"1998: 27161\n",
"1992: 26497\n",
"1995: 26191\n",
"2010: 24228\n",
"2004: 23363\n",
"2007: 22874\n",
"1994: 22734\n",
"1996: 21782\n",
"1993: 21685\n",
"2008: 21420\n",
"1997: 21036\n",
"2016: 20990\n",
"1999: 20701\n",
"2011: 20348\n",
"2015: 20197\n",
"1991: 19827\n",
"2012: 19662\n",
"2005: 18848\n",
"2014: 18820\n",
"2013: 18678\n",
"2019: 18199\n",
"2000: 17347\n",
"2018: 17085\n",
"2022: 16848\n",
"2017: 16814\n",
"2003: 16580\n",
"2006: 15106\n",
"2002: 14786\n",
"2001: 14162\n",
"2024: 12748\n",
"2021: 12574\n",
"2023: 10760\n",
"2020: 7332\n",
"\n",
" Max Epidemic year: 2009 – 30896\n",
"\n",
" Min Epidemic year: 2020 – 7332\n"
]
}
],
"source": [
"Annual = The_first_REG.groupby(\"year\")[\"inc100\"].sum().sort_values(ascending=False)\n",
"for y in [1990, 2025, 2020]:\n",
" count = df[(df[\"week_date\"] >= pd.Timestamp(y, 9, 1)) &\n",
" (df[\"week_date\"] < pd.Timestamp(y + 1, 9, 1))].shape[0]\n",
" print(f\"{y} — недель: {count}\")\n",
" \n",
"Annual = Annual[~Annual.index.isin([1990, 2025])]\n",
"\n",
"print(\"\\n the incidence of chickenpox by regions:\\n\")\n",
"for year, value in Annual.items():\n",
" print(f\"{year}: {int(value)}\")\n",
"\n",
"print(f\"\\n Max Epidemic year: {Annual.idxmax()} – {int(Annual.max())}\")\n",
"print(f\"\\n Min Epidemic year: {Annual.idxmin()} – {int(Annual.min())}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment