Commit exercise

658b8a5d · f8dc60cab5180566667b00ce62a51ae7 · 2b35c31e · 658b8a5d · 658b8a5d
Commit 658b8a5d authored Jun 22, 2025 by f8dc60cab5180566667b00ce62a51ae7
Expand all Show whitespace changes
Inline Side-by-side

Showing with 2078 additions and 3 deletions

chickenpox_data.csv module3/exo2/chickenpox_data.csv +1803 -0

exercice.ipynb module3/exo2/exercice.ipynb +275 -3

No files found.
--- a/module3/exo2/chickenpox_data.csv
+++ b/module3/exo2/chickenpox_data.csv
--- a/module3/exo2/exercice.ipynb
+++ b/module3/exo2/exercice.ipynb
 {
- "cells": [],
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Excercise"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%matplotlib inline\n",
+    "import matplotlib.pyplot as plt\n",
+    "import pandas as pd\n",
+    "from isoweek import Week\n",
+    "import os"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#url = \"https://www.sentiweb.fr/datasets/all/inc-7-PAY.csv\"\n",
+    "data_url = \"https://www.sentiweb.fr/datasets/all/inc-7-REG.csv\"\n",
+    "#url = \"https://www.sentiweb.fr/datasets/all/inc-7-RDD.csv\"\n",
+    "local_FileName=\"chickenpox_data.csv\"\n",
+    "df = pd.read_csv(data_url, skiprows=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = df[df[\"indicator\"] == 7]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 3. Split the weeks on the year and its number\n",
+    "df[\"year\"] = df[\"week\"] // 100\n",
+    "df[\"week_num\"] = df[\"week\"] % 100"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Transform into the date (Monday)\n",
+    "df[\"week_date\"] = df[\"week\"].apply(lambda x: Week(x // 100, x % 100).monday())\n",
+    "df[\"week_date\"] = pd.to_datetime(df[\"week_date\"])  # for comparison"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Into number\n",
+    "df[\"inc100\"] = pd.to_numeric(df[\"inc100\"], errors=\"coerce\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# List of the weeks, from the 1st September\n",
+    "first_septembers = [pd.Period(pd.Timestamp(y, 9, 1), 'W') for y in range(1990, 2025)]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Computation for epidemiologic year\n",
+    "years = []\n",
+    "sums = []\n",
+    "\n",
+    "for w1, w2 in zip(first_septembers[:-1], first_septembers[1:]):\n",
+    "    start = w1.start_time\n",
+    "    end = w2.start_time\n",
+    "    one_year = df[(df[\"week_date\"] >= start) & (df[\"week_date\"] < end)]\n",
+    "    if len(one_year) >= 50:  # фильтруем неполные годы\n",
+    "        years.append(start.year)\n",
+    "        sums.append(one_year[\"inc100\"].sum())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "the incidence of chickenpox for epidemiologic year:\n",
+      "\n",
+      "2008: 29521\n",
+      "1994: 25681\n",
+      "2009: 25590\n",
+      "1991: 24810\n",
+      "1998: 24759\n",
+      "1997: 23680\n",
+      "1996: 23621\n",
+      "1993: 23230\n",
+      "1992: 22850\n",
+      "2015: 22719\n",
+      "2007: 21958\n",
+      "2012: 21331\n",
+      "2003: 21146\n",
+      "2010: 20752\n",
+      "2006: 20546\n",
+      "1995: 19521\n",
+      "2014: 19310\n",
+      "2018: 18776\n",
+      "2021: 18749\n",
+      "2013: 18635\n",
+      "2004: 18493\n",
+      "2002: 18241\n",
+      "2005: 17768\n",
+      "2011: 17515\n",
+      "2017: 17398\n",
+      "1999: 17049\n",
+      "1990: 16994\n",
+      "2000: 16203\n",
+      "2016: 16014\n",
+      "2023: 13535\n",
+      "2001: 12499\n",
+      "2020: 11226\n",
+      "2022: 10647\n",
+      "2019: 7264\n"
+     ]
+    }
+   ],
+   "source": [
+    "epidemic_years = pd.Series(data=sums, index=years).sort_values(ascending=False)\n",
+    "print(\"the incidence of chickenpox for epidemiologic year:\\n\")\n",
+    "for year, value in epidemic_years.items():\n",
+    "    print(f\"{year}: {int(value)}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      " Max Epidemic Year: 2008 – 29521\n",
+      "\n",
+      " Min Epidemic year: 2019 – 7264\n"
+     ]
+    }
+   ],
+   "source": [
+    "max_year = epidemic_years.idxmax()\n",
+    "min_year = epidemic_years.idxmin()\n",
+    "print(f\"\\n Max Epidemic Year: {max_year} – {int(epidemic_years[max_year])}\")\n",
+    "print(f\"\\n Min Epidemic year: {min_year} – {int(epidemic_years[min_year])}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "The_first_REG = pd.read_csv(\"https://www.sentiweb.fr/datasets/all/inc-7-REG.csv\", skiprows=1)\n",
+    "The_first_REG[\"inc100\"] = pd.to_numeric(The_first_REG[\"inc100\"], errors=\"coerce\")\n",
+    "The_first_REG[\"year\"] = The_first_REG[\"week\"] // 100"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1990 — недель: 897\n",
+      "2025 — недель: 0\n",
+      "2020 — недель: 1196\n",
+      "\n",
+      " the incidence of chickenpox by regions:\n",
+      "\n",
+      "2009: 30896\n",
+      "1998: 27161\n",
+      "1992: 26497\n",
+      "1995: 26191\n",
+      "2010: 24228\n",
+      "2004: 23363\n",
+      "2007: 22874\n",
+      "1994: 22734\n",
+      "1996: 21782\n",
+      "1993: 21685\n",
+      "2008: 21420\n",
+      "1997: 21036\n",
+      "2016: 20990\n",
+      "1999: 20701\n",
+      "2011: 20348\n",
+      "2015: 20197\n",
+      "1991: 19827\n",
+      "2012: 19662\n",
+      "2005: 18848\n",
+      "2014: 18820\n",
+      "2013: 18678\n",
+      "2019: 18199\n",
+      "2000: 17347\n",
+      "2018: 17085\n",
+      "2022: 16848\n",
+      "2017: 16814\n",
+      "2003: 16580\n",
+      "2006: 15106\n",
+      "2002: 14786\n",
+      "2001: 14162\n",
+      "2024: 12748\n",
+      "2021: 12574\n",
+      "2023: 10760\n",
+      "2020: 7332\n",
+      "\n",
+      " Max Epidemic year: 2009 – 30896\n",
+      "\n",
+      " Min Epidemic year: 2020 – 7332\n"
+     ]
+    }
+   ],
+   "source": [
+    "Annual = The_first_REG.groupby(\"year\")[\"inc100\"].sum().sort_values(ascending=False)\n",
+    "for y in [1990, 2025, 2020]:\n",
+    "    count = df[(df[\"week_date\"] >= pd.Timestamp(y, 9, 1)) &\n",
+    "               (df[\"week_date\"] < pd.Timestamp(y + 1, 9, 1))].shape[0]\n",
+    "    print(f\"{y} — недель: {count}\")\n",
+    "    \n",
+    "Annual = Annual[~Annual.index.isin([1990, 2025])]\n",
+    "\n",
+    "print(\"\\n the incidence of chickenpox by regions:\\n\")\n",
+    "for year, value in Annual.items():\n",
+    "    print(f\"{year}: {int(value)}\")\n",
+    "\n",
+    "print(f\"\\n Max Epidemic year: {Annual.idxmax()} – {int(Annual.max())}\")\n",
+    "print(f\"\\n Min Epidemic year: {Annual.idxmin()} – {int(Annual.min())}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
@@ -16,10 +289,9 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.6.3"
+   "version": "3.6.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }
-