From 2aa0ba4595ee8afef1ef1fca766dc4f1f9645b15 Mon Sep 17 00:00:00 2001 From: 6c4b0fdbf8f14f2bf5b4f27e84287421 <6c4b0fdbf8f14f2bf5b4f27e84287421@app-learninglab.inria.fr> Date: Sun, 12 Mar 2023 16:57:50 +0000 Subject: [PATCH] no commit message --- .../influenza-like-illness-analysis.ipynb | 69 ++++++++----------- 1 file changed, 28 insertions(+), 41 deletions(-) diff --git a/module3/exo1/influenza-like-illness-analysis.ipynb b/module3/exo1/influenza-like-illness-analysis.ipynb index d78c3cd..991ab68 100644 --- a/module3/exo1/influenza-like-illness-analysis.ipynb +++ b/module3/exo1/influenza-like-illness-analysis.ipynb @@ -2517,59 +2517,46 @@ } ], "source": [ - "%matplotlib inline\n", - "import matplotlib.pyplot as plt\n", - "import pandas as pd\n", - "import isoweek\n", "import os\n", - "import urllib\n", + "import urllib.request\n", + "import pandas as pd\n", "\n", - "data_file = \"incidence-PAY-3.csv\"\n", - "if not os.path.isfile(data_file):\n", - " url = \"http://www.sentiweb.fr/datasets/incidence-PAY-3.csv\"\n", - " urllib.request.urlretrieve(url, data_file)\n", + "# Define the URL for the data\n", + "url = 'https://raw.githubusercontent.com/MicrosoftDocs/mslearn-introduction-to-machine-learning/main/Data/ml-basics/crime.csv'\n", "\n", - "raw_data = pd.read_csv(data_file, skiprows=1)\n", - "raw_data[raw_data.isnull().any(axis=1)]\n", + "# Define the local filename\n", + "local_data_file = 'crime.csv'\n", "\n", - "data = raw_data.dropna().copy()\n", + "# If the local file doesn't exist, download the data and save to the local file\n", + "if not os.path.exists(local_data_file):\n", + " urllib.request.urlretrieve(url, local_data_file)\n", "\n", - "def convert_week(year_and_week_int):\n", - " year_and_week_str = str(year_and_week_int)\n", - " year = int(year_and_week_str[:4])\n", - " week = int(year_and_week_str[4:])\n", - " w = isoweek.Week(year, week)\n", - " return pd.Period(w.day(0), 'W')\n", + "# Read the data from the local file into a Pandas DataFrame\n", + "raw_data = pd.read_csv(local_data_file)\n", "\n", - "data['period'] = [convert_week(yw) for yw in data['week']]\n", - "sorted_data = data.set_index('period').sort_index()\n", - "periods = sorted_data.index\n", + "# Drop the first column, which contains the crime codes\n", + "data = raw_data.drop(columns=['Offence Code'])\n", "\n", - "for p1, p2 in zip(periods[:-1], periods[1:]):\n", - " delta = p2.to_timestamp() - p1.end_time\n", - " if delta > pd.Timedelta('1s'):\n", - " print(p1, p2)\n", + "# Convert the 'Reported Date' column to a Pandas datetime type\n", + "data['Reported Date'] = pd.to_datetime(data['Reported Date'], format='%d/%m/%Y')\n", "\n", - "sorted_data['inc'].plot()\n", - "sorted_data['inc'][-200:].plot()\n", - "first_august_week = [pd.Period(pd.Timestamp(y, 8, 1), 'W')\n", - " for y in range(1985,\n", - " sorted_data.index[-1].year)]\n", - "year = []\n", - "yearly_incidence = []\n", + "# Filter the data to include only the weeks of interest\n", + "data = data[(data['Reported Date'] >= '1989-05-01') & (data['Reported Date'] <= '1989-05-07') | \n", + " (data['Reported Date'] >= '1989-05-15') & (data['Reported Date'] <= '1989-05-21')]\n", "\n", - "for week1, week2 in zip(first_august_week[:-1],\n", - " first_august_week[1:]):\n", - " one_year = sorted_data['inc'][week1:week2-1]\n", - " assert abs(len(one_year)-52) < 2\n", - " yearly_incidence.append(one_year.sum())\n", - " year.append(week2.year)\n", - " \n", - "yearly_incidence = pd.Series(data=yearly_incidence, index=year)\n", + "# Compute the weekly crime incidence\n", + "weekly_incidence = data.groupby(pd.Grouper(key='Reported Date', freq='W')).size()\n", + "\n", + "# Compute the yearly crime incidence\n", + "yearly_incidence = weekly_incidence.groupby(weekly_incidence.index.year).sum()\n", + "year = yearly_incidence.index.astype(str)\n", + "yearly_incidence.index = year\n", "\n", + "# Plot the yearly crime incidence\n", "yearly_incidence.plot(style='*')\n", "yearly_incidence.sort_values()\n", - "yearly_incidence.hist(xrot=20)\n" + "yearly_incidence.hist(xrot=20)\n", + "\n" ] }, { -- 2.18.1