Commit exercise

parent 2b35c31e
This diff is collapsed.
{
"cells": [],
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Excercise"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"%matplotlib inline\n",
"import matplotlib.pyplot as plt\n",
"import pandas as pd\n",
"from isoweek import Week\n",
"import os"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"#url = \"https://www.sentiweb.fr/datasets/all/inc-7-PAY.csv\"\n",
"data_url = \"https://www.sentiweb.fr/datasets/all/inc-7-REG.csv\"\n",
"#url = \"https://www.sentiweb.fr/datasets/all/inc-7-RDD.csv\"\n",
"local_FileName=\"chickenpox_data.csv\"\n",
"df = pd.read_csv(data_url, skiprows=1)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"df = df[df[\"indicator\"] == 7]"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"# 3. Split the weeks on the year and its number\n",
"df[\"year\"] = df[\"week\"] // 100\n",
"df[\"week_num\"] = df[\"week\"] % 100"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"#Transform into the date (Monday)\n",
"df[\"week_date\"] = df[\"week\"].apply(lambda x: Week(x // 100, x % 100).monday())\n",
"df[\"week_date\"] = pd.to_datetime(df[\"week_date\"]) # for comparison"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"# Into number\n",
"df[\"inc100\"] = pd.to_numeric(df[\"inc100\"], errors=\"coerce\")"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"# List of the weeks, from the 1st September\n",
"first_septembers = [pd.Period(pd.Timestamp(y, 9, 1), 'W') for y in range(1990, 2025)]"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"# Computation for epidemiologic year\n",
"years = []\n",
"sums = []\n",
"\n",
"for w1, w2 in zip(first_septembers[:-1], first_septembers[1:]):\n",
" start = w1.start_time\n",
" end = w2.start_time\n",
" one_year = df[(df[\"week_date\"] >= start) & (df[\"week_date\"] < end)]\n",
" if len(one_year) >= 50: # фильтруем неполные годы\n",
" years.append(start.year)\n",
" sums.append(one_year[\"inc100\"].sum())"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"the incidence of chickenpox for epidemiologic year:\n",
"\n",
"2008: 29521\n",
"1994: 25681\n",
"2009: 25590\n",
"1991: 24810\n",
"1998: 24759\n",
"1997: 23680\n",
"1996: 23621\n",
"1993: 23230\n",
"1992: 22850\n",
"2015: 22719\n",
"2007: 21958\n",
"2012: 21331\n",
"2003: 21146\n",
"2010: 20752\n",
"2006: 20546\n",
"1995: 19521\n",
"2014: 19310\n",
"2018: 18776\n",
"2021: 18749\n",
"2013: 18635\n",
"2004: 18493\n",
"2002: 18241\n",
"2005: 17768\n",
"2011: 17515\n",
"2017: 17398\n",
"1999: 17049\n",
"1990: 16994\n",
"2000: 16203\n",
"2016: 16014\n",
"2023: 13535\n",
"2001: 12499\n",
"2020: 11226\n",
"2022: 10647\n",
"2019: 7264\n"
]
}
],
"source": [
"epidemic_years = pd.Series(data=sums, index=years).sort_values(ascending=False)\n",
"print(\"the incidence of chickenpox for epidemiologic year:\\n\")\n",
"for year, value in epidemic_years.items():\n",
" print(f\"{year}: {int(value)}\")"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
" Max Epidemic Year: 2008 – 29521\n",
"\n",
" Min Epidemic year: 2019 – 7264\n"
]
}
],
"source": [
"max_year = epidemic_years.idxmax()\n",
"min_year = epidemic_years.idxmin()\n",
"print(f\"\\n Max Epidemic Year: {max_year} – {int(epidemic_years[max_year])}\")\n",
"print(f\"\\n Min Epidemic year: {min_year} – {int(epidemic_years[min_year])}\")"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"The_first_REG = pd.read_csv(\"https://www.sentiweb.fr/datasets/all/inc-7-REG.csv\", skiprows=1)\n",
"The_first_REG[\"inc100\"] = pd.to_numeric(The_first_REG[\"inc100\"], errors=\"coerce\")\n",
"The_first_REG[\"year\"] = The_first_REG[\"week\"] // 100"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1990 — недель: 897\n",
"2025 — недель: 0\n",
"2020 — недель: 1196\n",
"\n",
" the incidence of chickenpox by regions:\n",
"\n",
"2009: 30896\n",
"1998: 27161\n",
"1992: 26497\n",
"1995: 26191\n",
"2010: 24228\n",
"2004: 23363\n",
"2007: 22874\n",
"1994: 22734\n",
"1996: 21782\n",
"1993: 21685\n",
"2008: 21420\n",
"1997: 21036\n",
"2016: 20990\n",
"1999: 20701\n",
"2011: 20348\n",
"2015: 20197\n",
"1991: 19827\n",
"2012: 19662\n",
"2005: 18848\n",
"2014: 18820\n",
"2013: 18678\n",
"2019: 18199\n",
"2000: 17347\n",
"2018: 17085\n",
"2022: 16848\n",
"2017: 16814\n",
"2003: 16580\n",
"2006: 15106\n",
"2002: 14786\n",
"2001: 14162\n",
"2024: 12748\n",
"2021: 12574\n",
"2023: 10760\n",
"2020: 7332\n",
"\n",
" Max Epidemic year: 2009 – 30896\n",
"\n",
" Min Epidemic year: 2020 – 7332\n"
]
}
],
"source": [
"Annual = The_first_REG.groupby(\"year\")[\"inc100\"].sum().sort_values(ascending=False)\n",
"for y in [1990, 2025, 2020]:\n",
" count = df[(df[\"week_date\"] >= pd.Timestamp(y, 9, 1)) &\n",
" (df[\"week_date\"] < pd.Timestamp(y + 1, 9, 1))].shape[0]\n",
" print(f\"{y} — недель: {count}\")\n",
" \n",
"Annual = Annual[~Annual.index.isin([1990, 2025])]\n",
"\n",
"print(\"\\n the incidence of chickenpox by regions:\\n\")\n",
"for year, value in Annual.items():\n",
" print(f\"{year}: {int(value)}\")\n",
"\n",
"print(f\"\\n Max Epidemic year: {Annual.idxmax()} – {int(Annual.max())}\")\n",
"print(f\"\\n Min Epidemic year: {Annual.idxmin()} – {int(Annual.min())}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
......@@ -16,10 +289,9 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.3"
"version": "3.6.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment