Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
M
mooc-rr
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
6c4b0fdbf8f14f2bf5b4f27e84287421
mooc-rr
Commits
93eb5e80
Commit
93eb5e80
authored
Mar 12, 2023
by
6c4b0fdbf8f14f2bf5b4f27e84287421
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
no commit message
parent
27a645ac
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
50 additions
and
61 deletions
+50
-61
influenza-like-illness-analysis.ipynb
module3/exo1/influenza-like-illness-analysis.ipynb
+50
-61
No files found.
module3/exo1/influenza-like-illness-analysis.ipynb
View file @
93eb5e80
...
@@ -2484,7 +2484,54 @@
...
@@ -2484,7 +2484,54 @@
"import pandas as pd\n",
"import pandas as pd\n",
"import isoweek\n",
"import isoweek\n",
"import os\n",
"import os\n",
"import requests\n"
"import urllib\n",
"\n",
"data_file = \"incidence-PAY-3.csv\"\n",
"if not os.path.isfile(data_file):\n",
" url = \"http://www.sentiweb.fr/datasets/incidence-PAY-3.csv\"\n",
" urllib.request.urlretrieve(url, data_file)\n",
"\n",
"raw_data = pd.read_csv(data_file, skiprows=1)\n",
"raw_data[raw_data.isnull().any(axis=1)]\n",
"\n",
"data = raw_data.dropna().copy()\n",
"\n",
"def convert_week(year_and_week_int):\n",
" year_and_week_str = str(year_and_week_int)\n",
" year = int(year_and_week_str[:4])\n",
" week = int(year_and_week_str[4:])\n",
" w = isoweek.Week(year, week)\n",
" return pd.Period(w.day(0), 'W')\n",
"\n",
"data['period'] = [convert_week(yw) for yw in data['week']]\n",
"sorted_data = data.set_index('period').sort_index()\n",
"periods = sorted_data.index\n",
"\n",
"for p1, p2 in zip(periods[:-1], periods[1:]):\n",
" delta = p2.to_timestamp() - p1.end_time\n",
" if delta > pd.Timedelta('1s'):\n",
" print(p1, p2)\n",
"\n",
"sorted_data['inc'].plot()\n",
"sorted_data['inc'][-200:].plot()\n",
"first_august_week = [pd.Period(pd.Timestamp(y, 8, 1), 'W')\n",
" for y in range(1985,\n",
" sorted_data.index[-1].year)]\n",
"year = []\n",
"yearly_incidence = []\n",
"\n",
"for week1, week2 in zip(first_august_week[:-1],\n",
" first_august_week[1:]):\n",
" one_year = sorted_data['inc'][week1:week2-1]\n",
" assert abs(len(one_year)-52) < 2\n",
" yearly_incidence.append(one_year.sum())\n",
" year.append(week2.year)\n",
" \n",
"yearly_incidence = pd.Series(data=yearly_incidence, index=year)\n",
"\n",
"yearly_incidence.plot(style='*')\n",
"yearly_incidence.sort_values()\n",
"yearly_incidence.hist(xrot=20)\n"
]
]
},
},
{
{
...
@@ -2493,11 +2540,7 @@
...
@@ -2493,11 +2540,7 @@
"metadata": {},
"metadata": {},
"outputs": [],
"outputs": [],
"source": [
"source": [
"# Define the URL of the remote CSV file\n",
"\n"
"remote_data_url = \"http://www.sentiweb.fr/datasets/incidence-PAY-3.csv\"\n",
"\n",
"# Define the name of the local file\n",
"local_data_file = \"incidence-PAY-3.csv\"\n"
]
]
},
},
{
{
...
@@ -2518,18 +2561,7 @@
...
@@ -2518,18 +2561,7 @@
}
}
],
],
"source": [
"source": [
"# Check if the local file exists\n",
"if not os.path.isfile(local_data_file):\n",
" # If the local file does not exist, download the data from the remote URL\n",
" response = requests.get(remote_data_url)\n",
" with open(local_data_file, \"w\") as f:\n",
" f.write(response.text)\n",
"\n",
"# Read the data from the local file into a Pandas DataFrame\n",
"raw_data = pd.read_csv(local_data_file, skiprows=1)\n",
"\n",
"\n",
"# Remove rows with missing values\n",
"data = raw_data.dropna().copy()\n",
"\n"
"\n"
]
]
},
},
...
@@ -2539,50 +2571,7 @@
...
@@ -2539,50 +2571,7 @@
"metadata": {},
"metadata": {},
"outputs": [],
"outputs": [],
"source": [
"source": [
"# Define a function to convert year and week integers to Pandas Period objects\n",
"\n"
"def convert_week(year_and_week_int):\n",
" year_and_week_str = str(year_and_week_int)\n",
" year = int(year_and_week_str[:4])\n",
" week = int(year_and_week_str[4:])\n",
" w = isoweek.Week(year, week)\n",
" return pd.Period(w.day(0), 'W')\n",
"\n",
"# Add a column to the DataFrame containing the Period objects\n",
"data['period'] = [convert_week(yw) for yw in data['week']]\n",
"\n",
"# Sort the data by the period column\n",
"sorted_data = data.set_index('period').sort_index()\n",
"\n",
"# Check for gaps in the data and print any that are found\n",
"periods = sorted_data.index\n",
"for p1, p2 in zip(periods[:-1], periods[1:]):\n",
" delta = p2.to_timestamp() - p1.end_time\n",
" if delta > pd.Timedelta('1s'):\n",
" print(p1, p2)\n",
"\n",
"# Plot the incidence data over time\n",
"sorted_data['inc'].plot()\n",
"\n",
"# Plot the last 200 data points\n",
"sorted_data['inc'][-200:].plot()\n",
"\n",
"# Compute the total incidence for each year and plot the results\n",
"first_august_week = [pd.Period(pd.Timestamp(y, 8, 1), 'W')\n",
" for y in range(1985,\n",
" sorted_data.index[-1].year)]\n",
"year = []\n",
"yearly_incidence = []\n",
"for week1, week2 in zip(first_august_week[:-1],\n",
" first_august_week[1:]):\n",
" one_year = sorted_data['inc'][week1:week2-1]\n",
" assert abs(len(one_year)-52) < 2\n",
" yearly_incidence.append(one_year.sum())\n",
" year.append(week2.year)\n",
"yearly_incidence = pd.Series(data=yearly_incidence, index=year)\n",
"\n",
"yearly_incidence.plot(style='*')\n",
"yearly_incidence.sort_values()\n",
"yearly_incidence.hist(xrot=20)\n"
]
]
}
}
],
],
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment