début

35e8b32d · 068b82956f7db4f13a87eb46523a105a · 4c6c8e79 · 35e8b32d · 35e8b32d
Commit 35e8b32d authored Feb 18, 2022 by 068b82956f7db4f13a87eb46523a105a
Show whitespace changes
Inline Side-by-side

Showing with 1203 additions and 4 deletions

analyse-syndrome-grippal.ipynb module3/exo1/analyse-syndrome-grippal.ipynb +1 -1

exercice.ipynb module3/exo2/exercice.ipynb +1202 -3

No files found.
--- a/module3/exo1/analyse-syndrome-grippal.ipynb
+++ b/module3/exo1/analyse-syndrome-grippal.ipynb
@@ -364,7 +364,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.6.1"
+   "version": "3.6.4"
  }
 },
 "nbformat": 4,

--- a/module3/exo2/exercice.ipynb
+++ b/module3/exo2/exercice.ipynb
 {
- "cells": [],
+ "cells": [
+  {
+   "cell_type": "markdown",
   "metadata": {
+    "hideCode": true,
+    "hidePrompt": true
+   },
+   "source": [
+    "# varicelle"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "hideCode": true,
+    "hidePrompt": true
+   },
+   "source": [
+    "Les données de l'incidence la varicelle sont disponibles du site Web du Réseau Sentinelles. Nous les récupérons sous forme d'un fichier en format CSV dont chaque ligne correspond à une semaine de la période demandée. Nous téléchargeons toujours le jeu de données complet, qui commence en 1999 semaine 49 et se termine avec une semaine récente : 2022 semaine 6.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "hideCode": true,
+    "hidePrompt": true
+   },
+   "outputs": [],
+   "source": [
+    "%matplotlib inline\n",
+    "import matplotlib.pyplot as plt\n",
+    "import pandas as pd\n",
+    "import isoweek"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "hideCode": true,
+    "hidePrompt": true
+   },
+   "outputs": [],
+   "source": [
+    "data_url = \"http://www.sentiweb.fr/datasets/incidence-PAY-7.csv\""
+   ]
+  },
+  {
+   "cell_type": "raw",
+   "metadata": {
+    "hideCode": true,
+    "hidePrompt": true
+   },
+   "source": [
+    "La première ligne du fichier CSV est un commentaire, que nous ignorons en précisant `skiprows=1`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "hideCode": true,
+    "hidePrompt": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "pandas.core.frame.DataFrame"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "raw_data = pd.read_csv(data_url, skiprows=1)\n",
+    "type(raw_data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "hideCode": true,
+    "hidePrompt": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "1628"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(raw_data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "hideCode": true,
+    "hidePrompt": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>week</th>\n",
+       "      <th>indicator</th>\n",
+       "      <th>inc</th>\n",
+       "      <th>inc_low</th>\n",
+       "      <th>inc_up</th>\n",
+       "      <th>inc100</th>\n",
+       "      <th>inc100_low</th>\n",
+       "      <th>inc100_up</th>\n",
+       "      <th>geo_insee</th>\n",
+       "      <th>geo_name</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>202206</td>\n",
+       "      <td>7</td>\n",
+       "      <td>10414</td>\n",
+       "      <td>7128</td>\n",
+       "      <td>13700</td>\n",
+       "      <td>16</td>\n",
+       "      <td>11</td>\n",
+       "      <td>21</td>\n",
+       "      <td>FR</td>\n",
+       "      <td>France</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>202205</td>\n",
+       "      <td>7</td>\n",
+       "      <td>10866</td>\n",
+       "      <td>7758</td>\n",
+       "      <td>13974</td>\n",
+       "      <td>16</td>\n",
+       "      <td>11</td>\n",
+       "      <td>21</td>\n",
+       "      <td>FR</td>\n",
+       "      <td>France</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>202204</td>\n",
+       "      <td>7</td>\n",
+       "      <td>9547</td>\n",
+       "      <td>6721</td>\n",
+       "      <td>12373</td>\n",
+       "      <td>14</td>\n",
+       "      <td>10</td>\n",
+       "      <td>18</td>\n",
+       "      <td>FR</td>\n",
+       "      <td>France</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>202203</td>\n",
+       "      <td>7</td>\n",
+       "      <td>13972</td>\n",
+       "      <td>10680</td>\n",
+       "      <td>17264</td>\n",
+       "      <td>21</td>\n",
+       "      <td>16</td>\n",
+       "      <td>26</td>\n",
+       "      <td>FR</td>\n",
+       "      <td>France</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>202202</td>\n",
+       "      <td>7</td>\n",
+       "      <td>8495</td>\n",
+       "      <td>6026</td>\n",
+       "      <td>10964</td>\n",
+       "      <td>13</td>\n",
+       "      <td>9</td>\n",
+       "      <td>17</td>\n",
+       "      <td>FR</td>\n",
+       "      <td>France</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "     week  indicator    inc  inc_low  inc_up  inc100  inc100_low  inc100_up  \\\n",
+       "0  202206          7  10414     7128   13700      16          11         21   \n",
+       "1  202205          7  10866     7758   13974      16          11         21   \n",
+       "2  202204          7   9547     6721   12373      14          10         18   \n",
+       "3  202203          7  13972    10680   17264      21          16         26   \n",
+       "4  202202          7   8495     6026   10964      13           9         17   \n",
+       "\n",
+       "  geo_insee geo_name  \n",
+       "0        FR   France  \n",
+       "1        FR   France  \n",
+       "2        FR   France  \n",
+       "3        FR   France  \n",
+       "4        FR   France  "
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "raw_data[:5]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {
+    "hideCode": true,
+    "hidePrompt": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>week</th>\n",
+       "      <th>indicator</th>\n",
+       "      <th>inc</th>\n",
+       "      <th>inc_low</th>\n",
+       "      <th>inc_up</th>\n",
+       "      <th>inc100</th>\n",
+       "      <th>inc100_low</th>\n",
+       "      <th>inc100_up</th>\n",
+       "      <th>geo_insee</th>\n",
+       "      <th>geo_name</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>1623</th>\n",
+       "      <td>199101</td>\n",
+       "      <td>7</td>\n",
+       "      <td>15565</td>\n",
+       "      <td>10271</td>\n",
+       "      <td>20859</td>\n",
+       "      <td>27</td>\n",
+       "      <td>18</td>\n",
+       "      <td>36</td>\n",
+       "      <td>FR</td>\n",
+       "      <td>France</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1624</th>\n",
+       "      <td>199052</td>\n",
+       "      <td>7</td>\n",
+       "      <td>19375</td>\n",
+       "      <td>13295</td>\n",
+       "      <td>25455</td>\n",
+       "      <td>34</td>\n",
+       "      <td>23</td>\n",
+       "      <td>45</td>\n",
+       "      <td>FR</td>\n",
+       "      <td>France</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1625</th>\n",
+       "      <td>199051</td>\n",
+       "      <td>7</td>\n",
+       "      <td>19080</td>\n",
+       "      <td>13807</td>\n",
+       "      <td>24353</td>\n",
+       "      <td>34</td>\n",
+       "      <td>25</td>\n",
+       "      <td>43</td>\n",
+       "      <td>FR</td>\n",
+       "      <td>France</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1626</th>\n",
+       "      <td>199050</td>\n",
+       "      <td>7</td>\n",
+       "      <td>11079</td>\n",
+       "      <td>6660</td>\n",
+       "      <td>15498</td>\n",
+       "      <td>20</td>\n",
+       "      <td>12</td>\n",
+       "      <td>28</td>\n",
+       "      <td>FR</td>\n",
+       "      <td>France</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1627</th>\n",
+       "      <td>199049</td>\n",
+       "      <td>7</td>\n",
+       "      <td>1143</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2610</td>\n",
+       "      <td>2</td>\n",
+       "      <td>0</td>\n",
+       "      <td>5</td>\n",
+       "      <td>FR</td>\n",
+       "      <td>France</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "        week  indicator    inc  inc_low  inc_up  inc100  inc100_low  \\\n",
+       "1623  199101          7  15565    10271   20859      27          18   \n",
+       "1624  199052          7  19375    13295   25455      34          23   \n",
+       "1625  199051          7  19080    13807   24353      34          25   \n",
+       "1626  199050          7  11079     6660   15498      20          12   \n",
+       "1627  199049          7   1143        0    2610       2           0   \n",
+       "\n",
+       "      inc100_up geo_insee geo_name  \n",
+       "1623         36        FR   France  \n",
+       "1624         45        FR   France  \n",
+       "1625         43        FR   France  \n",
+       "1626         28        FR   France  \n",
+       "1627          5        FR   France  "
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "raw_data[-5:]"
+   ]
+  },
+  {
+   "cell_type": "raw",
+   "metadata": {
+    "hideCode": true,
+    "hidePrompt": true
+   },
+   "source": [
+    "Y a-t-il des points manquants dans ce jeux de données ? Non"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "hideCode": true,
+    "hidePrompt": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>week</th>\n",
+       "      <th>indicator</th>\n",
+       "      <th>inc</th>\n",
+       "      <th>inc_low</th>\n",
+       "      <th>inc_up</th>\n",
+       "      <th>inc100</th>\n",
+       "      <th>inc100_low</th>\n",
+       "      <th>inc100_up</th>\n",
+       "      <th>geo_insee</th>\n",
+       "      <th>geo_name</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "Empty DataFrame\n",
+       "Columns: [week, indicator, inc, inc_low, inc_up, inc100, inc100_low, inc100_up, geo_insee, geo_name]\n",
+       "Index: []"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "raw_data[raw_data.isnull().any(axis=1)]"
+   ]
+  },
+  {
+   "cell_type": "raw",
+   "metadata": {
+    "hideCode": true,
+    "hidePrompt": true
+   },
+   "source": [
+    "copie des donnés raw_data dans data, sans filtrage car il n'y a pas de données manquantes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {
+    "hideCode": true,
+    "hidePrompt": true
+   },
+   "outputs": [],
+   "source": [
+    "data = raw_data.copy()"
+   ]
+  },
+  {
+   "cell_type": "raw",
+   "metadata": {
+    "hideCode": true,
+    "hidePrompt": true
+   },
+   "source": [
+    "Nos données utilisent une convention inhabituelle: le numéro de semaine est collé à l'année, donnant l'impression qu'il s'agit de nombre entier. C'est comme ça que Pandas les interprète.\n",
+    "\n",
+    "Un deuxième problème est que Pandas ne comprend pas les numéros de semaine. Il faut lui fournir les dates de début et de fin de semaine. Nous utilisons pour cela la bibliothèque isoweek.\n",
+    "\n",
+    "Comme la conversion des semaines est devenu assez complexe, nous écrivons une petite fonction Python pour cela. Ensuite, nous l'appliquons à tous les points de nos donnés. Les résultats vont dans une nouvelle colonne 'period'.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {
+    "hideCode": true,
+    "hidePrompt": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "1628"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "def convert_week(year_and_week_int):\n",
+    "    year_and_week_str = str(year_and_week_int)\n",
+    "    year = int(year_and_week_str[:4])\n",
+    "    week = int(year_and_week_str[4:])\n",
+    "    w = isoweek.Week(year, week)\n",
+    "    return pd.Period(w.day(0), 'W')\n",
+    "\n",
+    "data['period'] = [convert_week(yw) for yw in data['week']]\n",
+    "len(data)"
+   ]
+  },
+  {
+   "cell_type": "raw",
+   "metadata": {
+    "hideCode": true,
+    "hidePrompt": true
+   },
+   "source": [
+    "2 modifications à faire.\n",
+    "\n",
+    "Premièrement, nous définissons les périodes d'observation comme nouvel index de notre jeux de données. Ceci en fait une suite chronologique, ce qui sera pratique par la suite.\n",
+    "\n",
+    "Deuxièmement, nous trions les points par période, dans le sens chronologique.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {
+    "hideCode": true,
+    "hidePrompt": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>week</th>\n",
+       "      <th>indicator</th>\n",
+       "      <th>inc</th>\n",
+       "      <th>inc_low</th>\n",
+       "      <th>inc_up</th>\n",
+       "      <th>inc100</th>\n",
+       "      <th>inc100_low</th>\n",
+       "      <th>inc100_up</th>\n",
+       "      <th>geo_insee</th>\n",
+       "      <th>geo_name</th>\n",
+       "      <th>period</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>202206</td>\n",
+       "      <td>7</td>\n",
+       "      <td>10414</td>\n",
+       "      <td>7128</td>\n",
+       "      <td>13700</td>\n",
+       "      <td>16</td>\n",
+       "      <td>11</td>\n",
+       "      <td>21</td>\n",
+       "      <td>FR</td>\n",
+       "      <td>France</td>\n",
+       "      <td>2022-02-07/2022-02-13</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>202205</td>\n",
+       "      <td>7</td>\n",
+       "      <td>10866</td>\n",
+       "      <td>7758</td>\n",
+       "      <td>13974</td>\n",
+       "      <td>16</td>\n",
+       "      <td>11</td>\n",
+       "      <td>21</td>\n",
+       "      <td>FR</td>\n",
+       "      <td>France</td>\n",
+       "      <td>2022-01-31/2022-02-06</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>202204</td>\n",
+       "      <td>7</td>\n",
+       "      <td>9547</td>\n",
+       "      <td>6721</td>\n",
+       "      <td>12373</td>\n",
+       "      <td>14</td>\n",
+       "      <td>10</td>\n",
+       "      <td>18</td>\n",
+       "      <td>FR</td>\n",
+       "      <td>France</td>\n",
+       "      <td>2022-01-24/2022-01-30</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>202203</td>\n",
+       "      <td>7</td>\n",
+       "      <td>13972</td>\n",
+       "      <td>10680</td>\n",
+       "      <td>17264</td>\n",
+       "      <td>21</td>\n",
+       "      <td>16</td>\n",
+       "      <td>26</td>\n",
+       "      <td>FR</td>\n",
+       "      <td>France</td>\n",
+       "      <td>2022-01-17/2022-01-23</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>202202</td>\n",
+       "      <td>7</td>\n",
+       "      <td>8495</td>\n",
+       "      <td>6026</td>\n",
+       "      <td>10964</td>\n",
+       "      <td>13</td>\n",
+       "      <td>9</td>\n",
+       "      <td>17</td>\n",
+       "      <td>FR</td>\n",
+       "      <td>France</td>\n",
+       "      <td>2022-01-10/2022-01-16</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "     week  indicator    inc  inc_low  inc_up  inc100  inc100_low  inc100_up  \\\n",
+       "0  202206          7  10414     7128   13700      16          11         21   \n",
+       "1  202205          7  10866     7758   13974      16          11         21   \n",
+       "2  202204          7   9547     6721   12373      14          10         18   \n",
+       "3  202203          7  13972    10680   17264      21          16         26   \n",
+       "4  202202          7   8495     6026   10964      13           9         17   \n",
+       "\n",
+       "  geo_insee geo_name                period  \n",
+       "0        FR   France 2022-02-07/2022-02-13  \n",
+       "1        FR   France 2022-01-31/2022-02-06  \n",
+       "2        FR   France 2022-01-24/2022-01-30  \n",
+       "3        FR   France 2022-01-17/2022-01-23  \n",
+       "4        FR   France 2022-01-10/2022-01-16  "
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "data[:5]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {
+    "hideCode": true,
+    "hidePrompt": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>week</th>\n",
+       "      <th>indicator</th>\n",
+       "      <th>inc</th>\n",
+       "      <th>inc_low</th>\n",
+       "      <th>inc_up</th>\n",
+       "      <th>inc100</th>\n",
+       "      <th>inc100_low</th>\n",
+       "      <th>inc100_up</th>\n",
+       "      <th>geo_insee</th>\n",
+       "      <th>geo_name</th>\n",
+       "      <th>period</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>1623</th>\n",
+       "      <td>199101</td>\n",
+       "      <td>7</td>\n",
+       "      <td>15565</td>\n",
+       "      <td>10271</td>\n",
+       "      <td>20859</td>\n",
+       "      <td>27</td>\n",
+       "      <td>18</td>\n",
+       "      <td>36</td>\n",
+       "      <td>FR</td>\n",
+       "      <td>France</td>\n",
+       "      <td>1990-12-31/1991-01-06</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1624</th>\n",
+       "      <td>199052</td>\n",
+       "      <td>7</td>\n",
+       "      <td>19375</td>\n",
+       "      <td>13295</td>\n",
+       "      <td>25455</td>\n",
+       "      <td>34</td>\n",
+       "      <td>23</td>\n",
+       "      <td>45</td>\n",
+       "      <td>FR</td>\n",
+       "      <td>France</td>\n",
+       "      <td>1990-12-24/1990-12-30</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1625</th>\n",
+       "      <td>199051</td>\n",
+       "      <td>7</td>\n",
+       "      <td>19080</td>\n",
+       "      <td>13807</td>\n",
+       "      <td>24353</td>\n",
+       "      <td>34</td>\n",
+       "      <td>25</td>\n",
+       "      <td>43</td>\n",
+       "      <td>FR</td>\n",
+       "      <td>France</td>\n",
+       "      <td>1990-12-17/1990-12-23</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1626</th>\n",
+       "      <td>199050</td>\n",
+       "      <td>7</td>\n",
+       "      <td>11079</td>\n",
+       "      <td>6660</td>\n",
+       "      <td>15498</td>\n",
+       "      <td>20</td>\n",
+       "      <td>12</td>\n",
+       "      <td>28</td>\n",
+       "      <td>FR</td>\n",
+       "      <td>France</td>\n",
+       "      <td>1990-12-10/1990-12-16</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1627</th>\n",
+       "      <td>199049</td>\n",
+       "      <td>7</td>\n",
+       "      <td>1143</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2610</td>\n",
+       "      <td>2</td>\n",
+       "      <td>0</td>\n",
+       "      <td>5</td>\n",
+       "      <td>FR</td>\n",
+       "      <td>France</td>\n",
+       "      <td>1990-12-03/1990-12-09</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "        week  indicator    inc  inc_low  inc_up  inc100  inc100_low  \\\n",
+       "1623  199101          7  15565    10271   20859      27          18   \n",
+       "1624  199052          7  19375    13295   25455      34          23   \n",
+       "1625  199051          7  19080    13807   24353      34          25   \n",
+       "1626  199050          7  11079     6660   15498      20          12   \n",
+       "1627  199049          7   1143        0    2610       2           0   \n",
+       "\n",
+       "      inc100_up geo_insee geo_name                period  \n",
+       "1623         36        FR   France 1990-12-31/1991-01-06  \n",
+       "1624         45        FR   France 1990-12-24/1990-12-30  \n",
+       "1625         43        FR   France 1990-12-17/1990-12-23  \n",
+       "1626         28        FR   France 1990-12-10/1990-12-16  \n",
+       "1627          5        FR   France 1990-12-03/1990-12-09  "
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "data[-5:]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {
+    "hideCode": true,
+    "hidePrompt": true
+   },
+   "outputs": [],
+   "source": [
+    "sorted_data = data.set_index('period').sort_index()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {
+    "hideCode": true,
+    "hidePrompt": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>week</th>\n",
+       "      <th>indicator</th>\n",
+       "      <th>inc</th>\n",
+       "      <th>inc_low</th>\n",
+       "      <th>inc_up</th>\n",
+       "      <th>inc100</th>\n",
+       "      <th>inc100_low</th>\n",
+       "      <th>inc100_up</th>\n",
+       "      <th>geo_insee</th>\n",
+       "      <th>geo_name</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>period</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>1990-12-03/1990-12-09</th>\n",
+       "      <td>199049</td>\n",
+       "      <td>7</td>\n",
+       "      <td>1143</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2610</td>\n",
+       "      <td>2</td>\n",
+       "      <td>0</td>\n",
+       "      <td>5</td>\n",
+       "      <td>FR</td>\n",
+       "      <td>France</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1990-12-10/1990-12-16</th>\n",
+       "      <td>199050</td>\n",
+       "      <td>7</td>\n",
+       "      <td>11079</td>\n",
+       "      <td>6660</td>\n",
+       "      <td>15498</td>\n",
+       "      <td>20</td>\n",
+       "      <td>12</td>\n",
+       "      <td>28</td>\n",
+       "      <td>FR</td>\n",
+       "      <td>France</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1990-12-17/1990-12-23</th>\n",
+       "      <td>199051</td>\n",
+       "      <td>7</td>\n",
+       "      <td>19080</td>\n",
+       "      <td>13807</td>\n",
+       "      <td>24353</td>\n",
+       "      <td>34</td>\n",
+       "      <td>25</td>\n",
+       "      <td>43</td>\n",
+       "      <td>FR</td>\n",
+       "      <td>France</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1990-12-24/1990-12-30</th>\n",
+       "      <td>199052</td>\n",
+       "      <td>7</td>\n",
+       "      <td>19375</td>\n",
+       "      <td>13295</td>\n",
+       "      <td>25455</td>\n",
+       "      <td>34</td>\n",
+       "      <td>23</td>\n",
+       "      <td>45</td>\n",
+       "      <td>FR</td>\n",
+       "      <td>France</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1990-12-31/1991-01-06</th>\n",
+       "      <td>199101</td>\n",
+       "      <td>7</td>\n",
+       "      <td>15565</td>\n",
+       "      <td>10271</td>\n",
+       "      <td>20859</td>\n",
+       "      <td>27</td>\n",
+       "      <td>18</td>\n",
+       "      <td>36</td>\n",
+       "      <td>FR</td>\n",
+       "      <td>France</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                         week  indicator    inc  inc_low  inc_up  inc100  \\\n",
+       "period                                                                     \n",
+       "1990-12-03/1990-12-09  199049          7   1143        0    2610       2   \n",
+       "1990-12-10/1990-12-16  199050          7  11079     6660   15498      20   \n",
+       "1990-12-17/1990-12-23  199051          7  19080    13807   24353      34   \n",
+       "1990-12-24/1990-12-30  199052          7  19375    13295   25455      34   \n",
+       "1990-12-31/1991-01-06  199101          7  15565    10271   20859      27   \n",
+       "\n",
+       "                       inc100_low  inc100_up geo_insee geo_name  \n",
+       "period                                                           \n",
+       "1990-12-03/1990-12-09           0          5        FR   France  \n",
+       "1990-12-10/1990-12-16          12         28        FR   France  \n",
+       "1990-12-17/1990-12-23          25         43        FR   France  \n",
+       "1990-12-24/1990-12-30          23         45        FR   France  \n",
+       "1990-12-31/1991-01-06          18         36        FR   France  "
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "sorted_data[:5]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {
+    "hideCode": true,
+    "hidePrompt": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>week</th>\n",
+       "      <th>indicator</th>\n",
+       "      <th>inc</th>\n",
+       "      <th>inc_low</th>\n",
+       "      <th>inc_up</th>\n",
+       "      <th>inc100</th>\n",
+       "      <th>inc100_low</th>\n",
+       "      <th>inc100_up</th>\n",
+       "      <th>geo_insee</th>\n",
+       "      <th>geo_name</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>period</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>2022-01-10/2022-01-16</th>\n",
+       "      <td>202202</td>\n",
+       "      <td>7</td>\n",
+       "      <td>8495</td>\n",
+       "      <td>6026</td>\n",
+       "      <td>10964</td>\n",
+       "      <td>13</td>\n",
+       "      <td>9</td>\n",
+       "      <td>17</td>\n",
+       "      <td>FR</td>\n",
+       "      <td>France</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2022-01-17/2022-01-23</th>\n",
+       "      <td>202203</td>\n",
+       "      <td>7</td>\n",
+       "      <td>13972</td>\n",
+       "      <td>10680</td>\n",
+       "      <td>17264</td>\n",
+       "      <td>21</td>\n",
+       "      <td>16</td>\n",
+       "      <td>26</td>\n",
+       "      <td>FR</td>\n",
+       "      <td>France</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2022-01-24/2022-01-30</th>\n",
+       "      <td>202204</td>\n",
+       "      <td>7</td>\n",
+       "      <td>9547</td>\n",
+       "      <td>6721</td>\n",
+       "      <td>12373</td>\n",
+       "      <td>14</td>\n",
+       "      <td>10</td>\n",
+       "      <td>18</td>\n",
+       "      <td>FR</td>\n",
+       "      <td>France</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2022-01-31/2022-02-06</th>\n",
+       "      <td>202205</td>\n",
+       "      <td>7</td>\n",
+       "      <td>10866</td>\n",
+       "      <td>7758</td>\n",
+       "      <td>13974</td>\n",
+       "      <td>16</td>\n",
+       "      <td>11</td>\n",
+       "      <td>21</td>\n",
+       "      <td>FR</td>\n",
+       "      <td>France</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2022-02-07/2022-02-13</th>\n",
+       "      <td>202206</td>\n",
+       "      <td>7</td>\n",
+       "      <td>10414</td>\n",
+       "      <td>7128</td>\n",
+       "      <td>13700</td>\n",
+       "      <td>16</td>\n",
+       "      <td>11</td>\n",
+       "      <td>21</td>\n",
+       "      <td>FR</td>\n",
+       "      <td>France</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                         week  indicator    inc  inc_low  inc_up  inc100  \\\n",
+       "period                                                                     \n",
+       "2022-01-10/2022-01-16  202202          7   8495     6026   10964      13   \n",
+       "2022-01-17/2022-01-23  202203          7  13972    10680   17264      21   \n",
+       "2022-01-24/2022-01-30  202204          7   9547     6721   12373      14   \n",
+       "2022-01-31/2022-02-06  202205          7  10866     7758   13974      16   \n",
+       "2022-02-07/2022-02-13  202206          7  10414     7128   13700      16   \n",
+       "\n",
+       "                       inc100_low  inc100_up geo_insee geo_name  \n",
+       "period                                                           \n",
+       "2022-01-10/2022-01-16           9         17        FR   France  \n",
+       "2022-01-17/2022-01-23          16         26        FR   France  \n",
+       "2022-01-24/2022-01-30          10         18        FR   France  \n",
+       "2022-01-31/2022-02-06          11         21        FR   France  \n",
+       "2022-02-07/2022-02-13          11         21        FR   France  "
+      ]
+     },
+     "execution_count": 19,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "sorted_data[-5:]"
+   ]
+  },
+  {
+   "cell_type": "raw",
+   "metadata": {
+    "hideCode": true,
+    "hidePrompt": true
+   },
+   "source": [
+    "Nous vérifions la cohérence des données. Entre la fin d'une période et le début de la période qui suit, la différence temporelle doit être zéro, ou au moins très faible. Nous laissons une \"marge d'erreur\" d'une seconde.\n",
+    "Normalement il n'y aura rien."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {
+    "hideCode": true,
+    "hidePrompt": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "cpt = 0\n"
+     ]
+    }
+   ],
+   "source": [
+    "periods = sorted_data.index\n",
+    "cpt = 0\n",
+    "for p1, p2 in zip(periods[:-1], periods[1:]):\n",
+    "    delta = p2.to_timestamp() - p1.end_time\n",
+    "    if delta > pd.Timedelta('1s'):\n",
+    "        print(p1, p2)\n",
+    "print(f'cpt = {cpt}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {
+    "hideCode": true,
+    "hidePrompt": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Period('2022-01-31/2022-02-06', 'W-SUN')"
+      ]
+     },
+     "execution_count": 26,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "p1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true,
+    "hideCode": true,
+    "hideOutput": true,
+    "hidePrompt": true
+   },
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "hide_code_all_hidden": true,
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
@@ -16,10 +1216,9 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.6.3"
+   "version": "3.6.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }
-