correction

parent 5a76d241
......@@ -4,7 +4,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"# Incidence du syndrome grippal"
"# Incidence du syndrome grippal CORRECTION"
]
},
{
......@@ -32,11 +32,6 @@
"metadata": {},
"outputs": [],
"source": [
"#data_url = \"http://www.sentiweb.fr/datasets/incidence-PAY-3.csv\"\n",
"#data_url3 = \"https://www.sentiweb.fr/datasets/all/inc-3-PAY.csv\"\n",
"data_url = \"https://app-learninglab.inria.fr/moocrr/gitlab/dfa5b61add2096b8c3911f0d73f434f3/mooc-rr/blob/master/module3/exo1/inc-3-PAY.csv\"\n",
"data_url = \"module3/exo1/incidence-PAY-3.csv\"\n",
"data_url = \"https://app-learninglab.inria.fr/moocrr/gitlab/dfa5b61add2096b8c3911f0d73f434f3/mooc-rr/blob/master/module3/exo1/inc-3-PAY-mod.csv\"\n",
"data_url = \"http://www.sentiweb.fr/datasets/incidence-PAY-3.csv\""
]
},
......@@ -62,10 +57,31 @@
"La première ligne du fichier CSV est un commentaire, que nous ignorons en précisant `skiprows=1`."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Pour nous protéger contre une éventuelle disparition ou modification du serveur du Réseau Sentinelles, nous faisons une copie locale de ce jeux de données que nous préservons avec notre analyse. Il est inutile et même risquée de télécharger les données à chaque exécution, car dans le cas d'une panne nous pourrions remplacer nos données par un fichier défectueux. Pour cette raison, nous téléchargeons les données seulement si la copie locale n'existe pas."
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"data_file = \"syndrome-grippal.csv\"\n",
"\n",
"import os\n",
"import urllib.request\n",
"if not os.path.exists(data_file):\n",
" urllib.request.urlretrieve(data_url, data_file)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
......@@ -1029,20 +1045,13 @@
"[2146 rows x 10 columns]"
]
},
"execution_count": 3,
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"# je ne comprend par pourquoi j'ai une erreur =\n",
"# ParserError: Error tokenizing data. C error: Expected 1 fields in line 30, saw 21\n",
"# quand j'essaye d'utiliser les données qui sont dans le même dossier\n",
"# que celui où se trouve ce notebook, alors qu'avec le lien url ça fonctionne\n",
"# je n'y arrive pas même avec un fichier modifié (sans la ligne qui pause problème)\n",
"\n",
"raw_data = pd.read_csv(data_url, encoding = 'iso-8859-1', skiprows=1)\n",
"raw_data = pd.read_csv(data_file, skiprows=1)\n",
"raw_data"
]
},
......@@ -1055,7 +1064,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 5,
"metadata": {},
"outputs": [
{
......@@ -1117,13 +1126,14 @@
"1909 FR France "
]
},
"execution_count": 4,
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"raw_data[raw_data.isnull().any(axis=1)]"
"raw_data[raw_data.isnull().any(axis=1)]\n",
"#raw_data.isnull().any(axis=1)"
]
},
{
......@@ -1135,7 +1145,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 6,
"metadata": {},
"outputs": [
{
......@@ -2100,7 +2110,7 @@
"[2145 rows x 10 columns]"
]
},
"execution_count": 5,
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
......@@ -2130,7 +2140,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
......@@ -2139,9 +2149,11 @@
" year = int(year_and_week_str[:4])\n",
" week = int(year_and_week_str[4:])\n",
" w = isoweek.Week(year, week)\n",
" #print(w)\n",
" return pd.Period(w.day(0), 'W')\n",
"\n",
"data['period'] = [convert_week(yw) for yw in data['week']]"
"data['period'] = [convert_week(yw) for yw in data['week']]\n",
"#data"
]
},
{
......@@ -2160,7 +2172,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
......@@ -2185,7 +2197,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 9,
"metadata": {},
"outputs": [
{
......@@ -2198,6 +2210,7 @@
],
"source": [
"periods = sorted_data.index\n",
"#print(periods)\n",
"for p1, p2 in zip(periods[:-1], periods[1:]):\n",
" delta = p2.to_timestamp() - p1.end_time\n",
" if delta > pd.Timedelta('1s'):\n",
......@@ -2215,16 +2228,16 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.axes._subplots.AxesSubplot at 0x7f023ae21588>"
"<matplotlib.axes._subplots.AxesSubplot at 0x7f59df282160>"
]
},
"execution_count": 9,
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
},
......@@ -2255,16 +2268,16 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.axes._subplots.AxesSubplot at 0x7f0238c8bd68>"
"<matplotlib.axes._subplots.AxesSubplot at 0x7f59dd147588>"
]
},
"execution_count": 10,
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
},
......@@ -2315,13 +2328,14 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"first_august_week = [pd.Period(pd.Timestamp(y, 8, 1), 'W')\n",
" for y in range(1985,\n",
" sorted_data.index[-1].year)]"
" sorted_data.index[-1].year)]\n",
"#print(first_august_week)"
]
},
{
......@@ -2335,7 +2349,7 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
......@@ -2359,16 +2373,18 @@
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"execution_count": 14,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.axes._subplots.AxesSubplot at 0x7f0238c23f60>"
"<matplotlib.axes._subplots.AxesSubplot at 0x7f59dd0edc88>"
]
},
"execution_count": 13,
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
},
......@@ -2398,7 +2414,7 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 15,
"metadata": {},
"outputs": [
{
......@@ -2446,7 +2462,7 @@
"dtype: int64"
]
},
"execution_count": 14,
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
......@@ -2465,16 +2481,16 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.axes._subplots.AxesSubplot at 0x7f0238b719b0>"
"<matplotlib.axes._subplots.AxesSubplot at 0x7f59dd03d5c0>"
]
},
"execution_count": 15,
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
},
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment