From ceb6fc6e0b32a6fd22b475ee7f3b16b6c724f19a Mon Sep 17 00:00:00 2001 From: f8dc60cab5180566667b00ce62a51ae7 Date: Sun, 22 Jun 2025 18:11:23 +0000 Subject: [PATCH] no commit message --- module3/exo2/.ipynb | 311 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 311 insertions(+) create mode 100644 module3/exo2/.ipynb diff --git a/module3/exo2/.ipynb b/module3/exo2/.ipynb new file mode 100644 index 0000000..5543db9 --- /dev/null +++ b/module3/exo2/.ipynb @@ -0,0 +1,311 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Excercise" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "import matplotlib.pyplot as plt\n", + "import pandas as pd\n", + "from isoweek import Week\n", + "import os" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [], + "source": [ + "#url = \"https://www.sentiweb.fr/datasets/all/inc-7-PAY.csv\"\n", + "data_url = \"https://www.sentiweb.fr/datasets/all/inc-7-REG.csv\"\n", + "#url = \"https://www.sentiweb.fr/datasets/all/inc-7-RDD.csv\"\n", + "local_FileName=\"chickenpox_data.csv\"\n", + "df = pd.read_csv(data_url, skiprows=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [], + "source": [ + "df = df[df[\"indicator\"] == 7]" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [], + "source": [ + "# 3. Split the weeks on the year and its number\n", + "df[\"year\"] = df[\"week\"] // 100\n", + "df[\"week_num\"] = df[\"week\"] % 100" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [], + "source": [ + "#Transform into the date (Monday)\n", + "df[\"week_date\"] = df[\"week\"].apply(lambda x: Week(x // 100, x % 100).monday())\n", + "df[\"week_date\"] = pd.to_datetime(df[\"week_date\"]) # for comparison" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [], + "source": [ + "# Into number\n", + "df[\"inc100\"] = pd.to_numeric(df[\"inc100\"], errors=\"coerce\")" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [], + "source": [ + "# List of the weeks, from the 1st September\n", + "first_septembers = [pd.Period(pd.Timestamp(y, 9, 1), 'W') for y in range(1990, 2025)]" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [], + "source": [ + "# Computation for epidemiologic year\n", + "years = []\n", + "sums = []\n", + "\n", + "for w1, w2 in zip(first_septembers[:-1], first_septembers[1:]):\n", + " start = w1.start_time\n", + " end = w2.start_time\n", + " one_year = df[(df[\"week_date\"] >= start) & (df[\"week_date\"] < end)]\n", + " if len(one_year) >= 50: # фильтруем неполные годы\n", + " years.append(start.year)\n", + " sums.append(one_year[\"inc100\"].sum())" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "the incidence of chickenpox for epidemiologic year:\n", + "\n", + "2008: 29521\n", + "1994: 25681\n", + "2009: 25590\n", + "1991: 24810\n", + "1998: 24759\n", + "1997: 23680\n", + "1996: 23621\n", + "1993: 23230\n", + "1992: 22850\n", + "2015: 22719\n", + "2007: 21958\n", + "2012: 21331\n", + "2003: 21146\n", + "2010: 20752\n", + "2006: 20546\n", + "1995: 19521\n", + "2014: 19310\n", + "2018: 18776\n", + "2021: 18749\n", + "2013: 18635\n", + "2004: 18493\n", + "2002: 18241\n", + "2005: 17768\n", + "2011: 17515\n", + "2017: 17398\n", + "1999: 17049\n", + "1990: 16994\n", + "2000: 16203\n", + "2016: 16014\n", + "2023: 13535\n", + "2001: 12499\n", + "2020: 11226\n", + "2022: 10647\n", + "2019: 7264\n" + ] + } + ], + "source": [ + "epidemic_years = pd.Series(data=sums, index=years).sort_values(ascending=False)\n", + "print(\"the incidence of chickenpox for epidemiologic year:\\n\")\n", + "for year, value in epidemic_years.items():\n", + " print(f\"{year}: {int(value)}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + " Max Epidemic Year: 2008 – 29521\n", + "\n", + " Min Epidemic year: 2019 – 7264\n" + ] + } + ], + "source": [ + "max_year = epidemic_years.idxmax()\n", + "min_year = epidemic_years.idxmin()\n", + "print(f\"\\n Max Epidemic Year: {max_year} – {int(epidemic_years[max_year])}\")\n", + "print(f\"\\n Min Epidemic year: {min_year} – {int(epidemic_years[min_year])}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [], + "source": [ + "The_first_REG = pd.read_csv(\"https://www.sentiweb.fr/datasets/all/inc-7-REG.csv\", skiprows=1)\n", + "The_first_REG[\"inc100\"] = pd.to_numeric(The_first_REG[\"inc100\"], errors=\"coerce\")\n", + "The_first_REG[\"year\"] = The_first_REG[\"week\"] // 100" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1990 — недель: 897\n", + "2025 — недель: 0\n", + "2020 — недель: 1196\n", + "\n", + " the incidence of chickenpox by regions:\n", + "\n", + "2009: 30896\n", + "1998: 27161\n", + "1992: 26497\n", + "1995: 26191\n", + "2010: 24228\n", + "2004: 23363\n", + "2007: 22874\n", + "1994: 22734\n", + "1996: 21782\n", + "1993: 21685\n", + "2008: 21420\n", + "1997: 21036\n", + "2016: 20990\n", + "1999: 20701\n", + "2011: 20348\n", + "2015: 20197\n", + "1991: 19827\n", + "2012: 19662\n", + "2005: 18848\n", + "2014: 18820\n", + "2013: 18678\n", + "2019: 18199\n", + "2000: 17347\n", + "2018: 17085\n", + "2022: 16848\n", + "2017: 16814\n", + "2003: 16580\n", + "2006: 15106\n", + "2002: 14786\n", + "2001: 14162\n", + "2024: 12748\n", + "2021: 12574\n", + "2023: 10760\n", + "2020: 7332\n", + "\n", + " Max Epidemic year: 2009 – 30896\n", + "\n", + " Min Epidemic year: 2020 – 7332\n" + ] + } + ], + "source": [ + "Annual = The_first_REG.groupby(\"year\")[\"inc100\"].sum().sort_values(ascending=False)\n", + "for y in [1990, 2025, 2020]:\n", + " count = df[(df[\"week_date\"] >= pd.Timestamp(y, 9, 1)) &\n", + " (df[\"week_date\"] < pd.Timestamp(y + 1, 9, 1))].shape[0]\n", + " print(f\"{y} — недель: {count}\")\n", + " \n", + "Annual = Annual[~Annual.index.isin([1990, 2025])]\n", + "\n", + "print(\"\\n the incidence of chickenpox by regions:\\n\")\n", + "for year, value in Annual.items():\n", + " print(f\"{year}: {int(value)}\")\n", + "\n", + "print(f\"\\n Max Epidemic year: {Annual.idxmax()} – {int(Annual.max())}\")\n", + "print(f\"\\n Min Epidemic year: {Annual.idxmin()} – {int(Annual.min())}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} -- 2.18.1