Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
M
mooc-rr
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
732afbed1f51733fba4ec4ed0e9bd727
mooc-rr
Commits
c978399f
Commit
c978399f
authored
Oct 07, 2023
by
732afbed1f51733fba4ec4ed0e9bd727
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
module3/exo1/analyse-syndrome-grippal.ipynb
parent
c77fe086
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
93 additions
and
33 deletions
+93
-33
analyse-syndrome-grippal.ipynb
module3/exo1/analyse-syndrome-grippal.ipynb
+93
-33
No files found.
module3/exo1/analyse-syndrome-grippal.ipynb
View file @
c978399f
...
@@ -13,18 +13,22 @@
...
@@ -13,18 +13,22 @@
"metadata": {},
"metadata": {},
"outputs": [],
"outputs": [],
"source": [
"source": [
"# Activer l'affichage des graphiques dans le notebook (jupyter) en ligne.\n",
"%matplotlib inline\n",
"%matplotlib inline\n",
"import matplotlib.pyplot as plt\n",
"\n",
"import pandas as pd\n",
"# Importer les bibliothèques nécessaires\n",
"import isoweek"
"import matplotlib.pyplot as plt # Pour la création de graphiques\n",
"import pandas as pd # Pour la manipulation des données\n",
"import isoweek # Pour gérer les semaines ISO"
]
]
},
},
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count":
24
,
"execution_count":
41
,
"metadata": {},
"metadata": {},
"outputs": [],
"outputs": [],
"source": [
"source": [
"# URL où les données d'incidence du syndrome grippal sont téléchargées\n",
"data_url = \"http://www.sentiweb.fr/datasets/incidence-PAY-3.csv\""
"data_url = \"http://www.sentiweb.fr/datasets/incidence-PAY-3.csv\""
]
]
},
},
...
@@ -37,15 +41,20 @@
...
@@ -37,15 +41,20 @@
},
},
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count":
25
,
"execution_count":
42
,
"metadata": {},
"metadata": {},
"outputs": [],
"outputs": [],
"source": [
"source": [
"# Nom du fichier local où les données seront stockées\n",
"data_file = \"syndrome-grippal.csv\"\n",
"data_file = \"syndrome-grippal.csv\"\n",
"\n",
"\n",
"# Vérifier si le fichier local existe, et s'il n'existe pas, le télécharger depuis l'URL\n",
"import os\n",
"import os\n",
"import urllib.request\n",
"import urllib.request\n",
"\n",
"# Vérifier si le fichier local n'existe pas\n",
"if not os.path.exists(data_file):\n",
"if not os.path.exists(data_file):\n",
" # Télécharger les données depuis l'URL et les enregistrer dans le fichier local\n",
" urllib.request.urlretrieve(data_url, data_file)"
" urllib.request.urlretrieve(data_url, data_file)"
]
]
},
},
...
@@ -73,7 +82,7 @@
...
@@ -73,7 +82,7 @@
},
},
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count":
26
,
"execution_count":
43
,
"metadata": {
"metadata": {
"scrolled": true
"scrolled": true
},
},
...
@@ -1040,13 +1049,16 @@
...
@@ -1040,13 +1049,16 @@
"[2031 rows x 10 columns]"
"[2031 rows x 10 columns]"
]
]
},
},
"execution_count":
26
,
"execution_count":
43
,
"metadata": {},
"metadata": {},
"output_type": "execute_result"
"output_type": "execute_result"
}
}
],
],
"source": [
"source": [
"# Lire les données depuis le fichier local CSV en sautant la première ligne (commentaire)\n",
"raw_data = pd.read_csv(data_file, skiprows=1)\n",
"raw_data = pd.read_csv(data_file, skiprows=1)\n",
"\n",
"# Afficher les données brutes\n",
"raw_data"
"raw_data"
]
]
},
},
...
@@ -1059,7 +1071,7 @@
...
@@ -1059,7 +1071,7 @@
},
},
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count":
27
,
"execution_count":
44
,
"metadata": {},
"metadata": {},
"outputs": [
"outputs": [
{
{
...
@@ -1121,12 +1133,13 @@
...
@@ -1121,12 +1133,13 @@
"1794 FR France "
"1794 FR France "
]
]
},
},
"execution_count":
27
,
"execution_count":
44
,
"metadata": {},
"metadata": {},
"output_type": "execute_result"
"output_type": "execute_result"
}
}
],
],
"source": [
"source": [
"# Sélectionner les lignes contenant au moins une valeur manquante (NaN)\n",
"raw_data[raw_data.isnull().any(axis=1)]"
"raw_data[raw_data.isnull().any(axis=1)]"
]
]
},
},
...
@@ -1139,7 +1152,7 @@
...
@@ -1139,7 +1152,7 @@
},
},
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count":
28
,
"execution_count":
45
,
"metadata": {},
"metadata": {},
"outputs": [
"outputs": [
{
{
...
@@ -2104,13 +2117,16 @@
...
@@ -2104,13 +2117,16 @@
"[2030 rows x 10 columns]"
"[2030 rows x 10 columns]"
]
]
},
},
"execution_count":
28
,
"execution_count":
45
,
"metadata": {},
"metadata": {},
"output_type": "execute_result"
"output_type": "execute_result"
}
}
],
],
"source": [
"source": [
"# Supprimer les lignes contenant des valeurs manquantes (NaN) à partir des données brutes\n",
"data = raw_data.dropna().copy()\n",
"data = raw_data.dropna().copy()\n",
"\n",
"# Afficher les données nettoyées (sans valeurs manquantes) et en créer une copie\n",
"data"
"data"
]
]
},
},
...
@@ -2134,17 +2150,28 @@
...
@@ -2134,17 +2150,28 @@
},
},
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count":
29
,
"execution_count":
46
,
"metadata": {},
"metadata": {},
"outputs": [],
"outputs": [],
"source": [
"source": [
"# Définition d'une fonction pour convertir l'année et la semaine en période\n",
"def convert_week(year_and_week_int):\n",
"def convert_week(year_and_week_int):\n",
" # Convertir l'entier en une chaîne de caractères\n",
" year_and_week_str = str(year_and_week_int)\n",
" year_and_week_str = str(year_and_week_int)\n",
" \n",
" # Extraire l'année (les 4 premiers caractères de la chaîne)\n",
" year = int(year_and_week_str[:4])\n",
" year = int(year_and_week_str[:4])\n",
" \n",
" # Extraire le numéro de semaine (le reste de la chaîne)\n",
" week = int(year_and_week_str[4:])\n",
" week = int(year_and_week_str[4:])\n",
" \n",
" # Créer un objet isoweek.Week avec l'année et la semaine\n",
" w = isoweek.Week(year, week)\n",
" w = isoweek.Week(year, week)\n",
" \n",
" # Convertir l'objet isoweek.Week en une période pandas\n",
" return pd.Period(w.day(0), 'W')\n",
" return pd.Period(w.day(0), 'W')\n",
"\n",
"\n",
"# Appliquer la fonction convert_week à la colonne 'week' et créer une nouvelle colonne 'period'\n",
"data['period'] = [convert_week(yw) for yw in data['week']]"
"data['period'] = [convert_week(yw) for yw in data['week']]"
]
]
},
},
...
@@ -2164,10 +2191,11 @@
...
@@ -2164,10 +2191,11 @@
},
},
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count":
30
,
"execution_count":
47
,
"metadata": {},
"metadata": {},
"outputs": [],
"outputs": [],
"source": [
"source": [
"# Définir la colonne 'period' comme index du DataFrame et trier le DataFrame par cet index\n",
"sorted_data = data.set_index('period').sort_index()"
"sorted_data = data.set_index('period').sort_index()"
]
]
},
},
...
@@ -2189,7 +2217,7 @@
...
@@ -2189,7 +2217,7 @@
},
},
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count":
32
,
"execution_count":
48
,
"metadata": {},
"metadata": {},
"outputs": [
"outputs": [
{
{
...
@@ -2201,10 +2229,17 @@
...
@@ -2201,10 +2229,17 @@
}
}
],
],
"source": [
"source": [
"# Obtenir l'index (colonne 'period') du DataFrame trié\n",
"periods = sorted_data.index\n",
"periods = sorted_data.index\n",
"\n",
"# Parcourir les périodes consécutives et vérifier la différence temporelle entre elles\n",
"for p1, p2 in zip(periods[:-1], periods[1:]):\n",
"for p1, p2 in zip(periods[:-1], periods[1:]):\n",
" # Calculer la différence temporelle entre la fin de la période p1 et le début de la période p2\n",
" delta = p2.to_timestamp() - p1.end_time\n",
" delta = p2.to_timestamp() - p1.end_time\n",
" \n",
" # Vérifier si la différence temporelle est supérieure à 1 seconde\n",
" if delta > pd.Timedelta('1s'):\n",
" if delta > pd.Timedelta('1s'):\n",
" # Afficher les paires de périodes consécutives qui ont une différence temporelle inattendue\n",
" print(p1, p2)"
" print(p1, p2)"
]
]
},
},
...
@@ -2217,7 +2252,7 @@
...
@@ -2217,7 +2252,7 @@
},
},
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count":
33
,
"execution_count":
49
,
"metadata": {},
"metadata": {},
"outputs": [
"outputs": [
{
{
...
@@ -2231,21 +2266,21 @@
...
@@ -2231,21 +2266,21 @@
"source": [
"source": [
"# Convertir la colonne 'inc' en type numérique (float)\n",
"# Convertir la colonne 'inc' en type numérique (float)\n",
"sorted_data['inc'] = pd.to_numeric(sorted_data['inc'], errors='coerce')\n",
"sorted_data['inc'] = pd.to_numeric(sorted_data['inc'], errors='coerce')\n",
"print(sorted_data['inc'].dtypes)
\n
"
"print(sorted_data['inc'].dtypes)"
]
]
},
},
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count":
34
,
"execution_count":
50
,
"metadata": {},
"metadata": {},
"outputs": [
"outputs": [
{
{
"data": {
"data": {
"text/plain": [
"text/plain": [
"<matplotlib.axes._subplots.AxesSubplot at 0x7f2fc7
e208d0
>"
"<matplotlib.axes._subplots.AxesSubplot at 0x7f2fc7
b8bf28
>"
]
]
},
},
"execution_count":
34
,
"execution_count":
50
,
"metadata": {},
"metadata": {},
"output_type": "execute_result"
"output_type": "execute_result"
},
},
...
@@ -2263,6 +2298,7 @@
...
@@ -2263,6 +2298,7 @@
}
}
],
],
"source": [
"source": [
"# Tracer un graphique de la colonne 'inc' du DataFrame trié\n",
"sorted_data['inc'].plot()"
"sorted_data['inc'].plot()"
]
]
},
},
...
@@ -2275,16 +2311,16 @@
...
@@ -2275,16 +2311,16 @@
},
},
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count":
35
,
"execution_count":
51
,
"metadata": {},
"metadata": {},
"outputs": [
"outputs": [
{
{
"data": {
"data": {
"text/plain": [
"text/plain": [
"<matplotlib.axes._subplots.AxesSubplot at 0x7f2fc7
d585c
0>"
"<matplotlib.axes._subplots.AxesSubplot at 0x7f2fc7
9da78
0>"
]
]
},
},
"execution_count":
35
,
"execution_count":
51
,
"metadata": {},
"metadata": {},
"output_type": "execute_result"
"output_type": "execute_result"
},
},
...
@@ -2302,6 +2338,7 @@
...
@@ -2302,6 +2338,7 @@
}
}
],
],
"source": [
"source": [
"# Tracer un graphique des 200 dernières entrées de la colonne 'inc' du DataFrame trié\n",
"sorted_data['inc'][-200:].plot()"
"sorted_data['inc'][-200:].plot()"
]
]
},
},
...
@@ -2335,10 +2372,11 @@
...
@@ -2335,10 +2372,11 @@
},
},
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count":
36
,
"execution_count":
52
,
"metadata": {},
"metadata": {},
"outputs": [],
"outputs": [],
"source": [
"source": [
"# Créer une liste des premières semaines d'août pour chaque année entre 1985 et la dernière année de l'index de sorted_data\n",
"first_august_week = [pd.Period(pd.Timestamp(y, 8, 1), 'W')\n",
"first_august_week = [pd.Period(pd.Timestamp(y, 8, 1), 'W')\n",
" for y in range(1985,\n",
" for y in range(1985,\n",
" sorted_data.index[-1].year)]"
" sorted_data.index[-1].year)]"
...
@@ -2355,19 +2393,31 @@
...
@@ -2355,19 +2393,31 @@
},
},
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count":
37
,
"execution_count":
53
,
"metadata": {},
"metadata": {},
"outputs": [],
"outputs": [],
"source": [
"source": [
"year = []\n",
"# Initialisation des listes pour stocker les données annuelles\n",
"yearly_incidence = []\n",
"year = [] # Liste des années\n",
"yearly_incidence = [] # Liste des incidences annuelles\n",
"\n",
"# Parcourir les paires d'intervalles annuels définies par first_august_week\n",
"for week1, week2 in zip(first_august_week[:-1],\n",
"for week1, week2 in zip(first_august_week[:-1],\n",
" first_august_week[1:]):\n",
" first_august_week[1:]):\n",
" # Extraire les données d'incidence pour une année donnée\n",
" one_year = sorted_data['inc'][week1:week2-1]\n",
" one_year = sorted_data['inc'][week1:week2-1]\n",
" assert abs(len(one_year)-52) < 2\n",
" \n",
" # Vérifier que chaque année a environ 52 semaines d'incidence\n",
" assert abs(len(one_year) - 52) < 2\n",
" \n",
" # Ajouter la somme des incidences de l'année à la liste yearly_incidence\n",
" yearly_incidence.append(one_year.sum())\n",
" yearly_incidence.append(one_year.sum())\n",
" \n",
" # Ajouter l'année correspondante à la liste year\n",
" year.append(week2.year)\n",
" year.append(week2.year)\n",
"yearly_incidence = pd.Series(data=yearly_incidence, index=year)"
"\n",
"# Créer une série pandas avec les données annuelles et les années comme index\n",
"yearly_incidence = pd.Series(data=yearly_incidence, index=year)\n"
]
]
},
},
{
{
...
@@ -2406,6 +2456,7 @@
...
@@ -2406,6 +2456,7 @@
}
}
],
],
"source": [
"source": [
"# Tracer un graphique de dispersion des données d'incidence annuelle avec un style en étoile\n",
"yearly_incidence.plot(style='*')"
"yearly_incidence.plot(style='*')"
]
]
},
},
...
@@ -2418,7 +2469,7 @@
...
@@ -2418,7 +2469,7 @@
},
},
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count":
39
,
"execution_count":
54
,
"metadata": {},
"metadata": {},
"outputs": [
"outputs": [
{
{
...
@@ -2464,12 +2515,13 @@
...
@@ -2464,12 +2515,13 @@
"dtype: int64"
"dtype: int64"
]
]
},
},
"execution_count":
39
,
"execution_count":
54
,
"metadata": {},
"metadata": {},
"output_type": "execute_result"
"output_type": "execute_result"
}
}
],
],
"source": [
"source": [
"# Trier les données d'incidence annuelle par ordre croissant\n",
"yearly_incidence.sort_values()"
"yearly_incidence.sort_values()"
]
]
},
},
...
@@ -2483,16 +2535,16 @@
...
@@ -2483,16 +2535,16 @@
},
},
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count":
40
,
"execution_count":
55
,
"metadata": {},
"metadata": {},
"outputs": [
"outputs": [
{
{
"data": {
"data": {
"text/plain": [
"text/plain": [
"<matplotlib.axes._subplots.AxesSubplot at 0x7f2fc7
c4258
8>"
"<matplotlib.axes._subplots.AxesSubplot at 0x7f2fc7
b7b5f
8>"
]
]
},
},
"execution_count":
40
,
"execution_count":
55
,
"metadata": {},
"metadata": {},
"output_type": "execute_result"
"output_type": "execute_result"
},
},
...
@@ -2510,8 +2562,16 @@
...
@@ -2510,8 +2562,16 @@
}
}
],
],
"source": [
"source": [
"# Tracer un histogramme des données d'incidence annuelle avec une rotation de l'axe des x de 20 degrés\n",
"yearly_incidence.hist(xrot=20)"
"yearly_incidence.hist(xrot=20)"
]
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
}
],
],
"metadata": {
"metadata": {
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment