{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Excercise" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "%matplotlib inline\n", "import matplotlib.pyplot as plt\n", "import pandas as pd\n", "from isoweek import Week\n", "import os" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "#url = \"https://www.sentiweb.fr/datasets/all/inc-7-PAY.csv\"\n", "data_url = \"https://www.sentiweb.fr/datasets/all/inc-7-REG.csv\"\n", "#url = \"https://www.sentiweb.fr/datasets/all/inc-7-RDD.csv\"\n", "local_FileName=\"chickenpox_data.csv\"\n", "df = pd.read_csv(data_url, skiprows=1)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "df = df[df[\"indicator\"] == 7]" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "# 3. Split the weeks on the year and its number\n", "df[\"year\"] = df[\"week\"] // 100\n", "df[\"week_num\"] = df[\"week\"] % 100" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "#Transform into the date (Monday)\n", "df[\"week_date\"] = df[\"week\"].apply(lambda x: Week(x // 100, x % 100).monday())\n", "df[\"week_date\"] = pd.to_datetime(df[\"week_date\"]) # for comparison" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "# Into number\n", "df[\"inc100\"] = pd.to_numeric(df[\"inc100\"], errors=\"coerce\")" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "# List of the weeks, from the 1st September\n", "first_septembers = [pd.Period(pd.Timestamp(y, 9, 1), 'W') for y in range(1990, 2025)]" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "# Computation for epidemiologic year\n", "years = []\n", "sums = []\n", "\n", "for w1, w2 in zip(first_septembers[:-1], first_septembers[1:]):\n", " start = w1.start_time\n", " end = w2.start_time\n", " one_year = df[(df[\"week_date\"] >= start) & (df[\"week_date\"] < end)]\n", " if len(one_year) >= 50: # фильтруем неполные годы\n", " years.append(start.year)\n", " sums.append(one_year[\"inc100\"].sum())" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "the incidence of chickenpox for epidemiologic year:\n", "\n", "2008: 29521\n", "1994: 25681\n", "2009: 25590\n", "1991: 24810\n", "1998: 24759\n", "1997: 23680\n", "1996: 23621\n", "1993: 23230\n", "1992: 22850\n", "2015: 22719\n", "2007: 21958\n", "2012: 21331\n", "2003: 21146\n", "2010: 20752\n", "2006: 20546\n", "1995: 19521\n", "2014: 19310\n", "2018: 18776\n", "2021: 18749\n", "2013: 18635\n", "2004: 18493\n", "2002: 18241\n", "2005: 17768\n", "2011: 17515\n", "2017: 17398\n", "1999: 17049\n", "1990: 16994\n", "2000: 16203\n", "2016: 16014\n", "2023: 13535\n", "2001: 12499\n", "2020: 11226\n", "2022: 10647\n", "2019: 7264\n" ] } ], "source": [ "epidemic_years = pd.Series(data=sums, index=years).sort_values(ascending=False)\n", "print(\"the incidence of chickenpox for epidemiologic year:\\n\")\n", "for year, value in epidemic_years.items():\n", " print(f\"{year}: {int(value)}\")" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", " Max Epidemic Year: 2008 – 29521\n", "\n", " Min Epidemic year: 2019 – 7264\n" ] } ], "source": [ "max_year = epidemic_years.idxmax()\n", "min_year = epidemic_years.idxmin()\n", "print(f\"\\n Max Epidemic Year: {max_year} – {int(epidemic_years[max_year])}\")\n", "print(f\"\\n Min Epidemic year: {min_year} – {int(epidemic_years[min_year])}\")" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "The_first_REG = pd.read_csv(\"https://www.sentiweb.fr/datasets/all/inc-7-REG.csv\", skiprows=1)\n", "The_first_REG[\"inc100\"] = pd.to_numeric(The_first_REG[\"inc100\"], errors=\"coerce\")\n", "The_first_REG[\"year\"] = The_first_REG[\"week\"] // 100" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1990 — недель: 897\n", "2025 — недель: 0\n", "2020 — недель: 1196\n", "\n", " the incidence of chickenpox by regions:\n", "\n", "2009: 30896\n", "1998: 27161\n", "1992: 26497\n", "1995: 26191\n", "2010: 24228\n", "2004: 23363\n", "2007: 22874\n", "1994: 22734\n", "1996: 21782\n", "1993: 21685\n", "2008: 21420\n", "1997: 21036\n", "2016: 20990\n", "1999: 20701\n", "2011: 20348\n", "2015: 20197\n", "1991: 19827\n", "2012: 19662\n", "2005: 18848\n", "2014: 18820\n", "2013: 18678\n", "2019: 18199\n", "2000: 17347\n", "2018: 17085\n", "2022: 16848\n", "2017: 16814\n", "2003: 16580\n", "2006: 15106\n", "2002: 14786\n", "2001: 14162\n", "2024: 12748\n", "2021: 12574\n", "2023: 10760\n", "2020: 7332\n", "\n", " Max Epidemic year: 2009 – 30896\n", "\n", " Min Epidemic year: 2020 – 7332\n" ] } ], "source": [ "Annual = The_first_REG.groupby(\"year\")[\"inc100\"].sum().sort_values(ascending=False)\n", "for y in [1990, 2025, 2020]:\n", " count = df[(df[\"week_date\"] >= pd.Timestamp(y, 9, 1)) &\n", " (df[\"week_date\"] < pd.Timestamp(y + 1, 9, 1))].shape[0]\n", " print(f\"{y} — недель: {count}\")\n", " \n", "Annual = Annual[~Annual.index.isin([1990, 2025])]\n", "\n", "print(\"\\n the incidence of chickenpox by regions:\\n\")\n", "for year, value in Annual.items():\n", " print(f\"{year}: {int(value)}\")\n", "\n", "print(f\"\\n Max Epidemic year: {Annual.idxmax()} – {int(Annual.max())}\")\n", "print(f\"\\n Min Epidemic year: {Annual.idxmin()} – {int(Annual.min())}\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.4" } }, "nbformat": 4, "nbformat_minor": 2 }