diff --git a/module3/exo2/exercice_fr.ipynb b/module3/exo2/exercice_fr.ipynb index 0bbbe371b01e359e381e43239412d77bf53fb1fb..05143c312e7a17f4096a113ae23a5f6bb1f1ebac 100644 --- a/module3/exo2/exercice_fr.ipynb +++ b/module3/exo2/exercice_fr.ipynb @@ -1,5 +1,964 @@ { - "cells": [], + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#
Incidence du syndrome grippal" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "import matplotlib.pyplot as plt\n", + "import pandas as pd\n", + "import isoweek" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Nous recuperons les données sur le site via le site en pointant sur un fichier au format .csv Pour la lecture des données ...." + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
weekindicatorincinc_lowinc_upinc100inc100_lowinc100_upgeo_inseegeo_name
020205231854613789.023303.02821.035.0FRFrance
120205132103216856.025208.03226.038.0FRFrance
220205031684513220.020470.02620.032.0FRFrance
32020493129399923.015955.02015.025.0FRFrance
420204831380410641.016967.02116.026.0FRFrance
\n", + "
" + ], + "text/plain": [ + " week indicator inc inc_low inc_up inc100 inc100_low inc100_up \\\n", + "0 202052 3 18546 13789.0 23303.0 28 21.0 35.0 \n", + "1 202051 3 21032 16856.0 25208.0 32 26.0 38.0 \n", + "2 202050 3 16845 13220.0 20470.0 26 20.0 32.0 \n", + "3 202049 3 12939 9923.0 15955.0 20 15.0 25.0 \n", + "4 202048 3 13804 10641.0 16967.0 21 16.0 26.0 \n", + "\n", + " geo_insee geo_name \n", + "0 FR France \n", + "1 FR France \n", + "2 FR France \n", + "3 FR France \n", + "4 FR France " + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data_url = \"http://www.sentiweb.fr/datasets/incidence-PAY-3.csv\"\n", + "raw_data = pd.read_csv(data_url, skiprows=1)\n", + "raw_data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
weekindicatorincinc_lowinc_upinc100inc100_lowinc100_upgeo_inseegeo_name
188219844837862060634.096606.0143110.0176.0FRFrance
188319844737202954274.089784.013199.0163.0FRFrance
188419844638733067686.0106974.0159123.0195.0FRFrance
18851984453135223101414.0169032.0246184.0308.0FRFrance
188619844436842220056.0116788.012537.0213.0FRFrance
\n", + "
" + ], + "text/plain": [ + " week indicator inc inc_low inc_up inc100 inc100_low \\\n", + "1882 198448 3 78620 60634.0 96606.0 143 110.0 \n", + "1883 198447 3 72029 54274.0 89784.0 131 99.0 \n", + "1884 198446 3 87330 67686.0 106974.0 159 123.0 \n", + "1885 198445 3 135223 101414.0 169032.0 246 184.0 \n", + "1886 198444 3 68422 20056.0 116788.0 125 37.0 \n", + "\n", + " inc100_up geo_insee geo_name \n", + "1882 176.0 FR France \n", + "1883 163.0 FR France \n", + "1884 195.0 FR France \n", + "1885 308.0 FR France \n", + "1886 213.0 FR France " + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "raw_data.tail()\n", + "#raw_data.columns" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "On fait une recherche de donnée manquante dans la dataframe.On visuallise les lignes de donnée manquantes" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
weekindicatorincinc_lowinc_upinc100inc100_lowinc100_upgeo_inseegeo_name
165019891930NaNNaN0NaNNaNFRFrance
\n", + "
" + ], + "text/plain": [ + " week indicator inc inc_low inc_up inc100 inc100_low inc100_up \\\n", + "1650 198919 3 0 NaN NaN 0 NaN NaN \n", + "\n", + " geo_insee geo_name \n", + "1650 FR France " + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "raw_data[raw_data.isnull().any(axis=1)]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Nous travaillerons sur une dataframe dont les lignes manquantes seront supprimées, nous testons que sur cette nouvelle dataframe il y a bien aucun elements manquant sur une ligne;" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
weekindicatorincinc_lowinc_upinc100inc100_lowinc100_upgeo_inseegeo_name
020205231854613789.023303.02821.035.0FRFrance
120205132103216856.025208.03226.038.0FRFrance
220205031684513220.020470.02620.032.0FRFrance
32020493129399923.015955.02015.025.0FRFrance
420204831380410641.016967.02116.026.0FRFrance
\n", + "
" + ], + "text/plain": [ + " week indicator inc inc_low inc_up inc100 inc100_low inc100_up \\\n", + "0 202052 3 18546 13789.0 23303.0 28 21.0 35.0 \n", + "1 202051 3 21032 16856.0 25208.0 32 26.0 38.0 \n", + "2 202050 3 16845 13220.0 20470.0 26 20.0 32.0 \n", + "3 202049 3 12939 9923.0 15955.0 20 15.0 25.0 \n", + "4 202048 3 13804 10641.0 16967.0 21 16.0 26.0 \n", + "\n", + " geo_insee geo_name \n", + "0 FR France \n", + "1 FR France \n", + "2 FR France \n", + "3 FR France \n", + "4 FR France " + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = raw_data.dropna().copy()\n", + "data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
weekindicatorincinc_lowinc_upinc100inc100_lowinc100_upgeo_inseegeo_name
\n", + "
" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: [week, indicator, inc, inc_low, inc_up, inc100, inc100_low, inc100_up, geo_insee, geo_name]\n", + "Index: []" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data[data.isnull().any(axis=1)]" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [], + "source": [ + "year_and_week_int = 198501\n", + "year_and_week_str = str(year_and_week_int)\n", + "year = int(year_and_week_str[:4])\n", + "week = int(year_and_week_str[4:6])\n", + "\n", + "w = isoweek.Week(year,week)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Premeière jour de cette semaine 1985 01" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1984-12-31\n" + ] + } + ], + "source": [ + "print(w.day(0))" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Period('1984-12-31/1985-01-06', 'W-SUN')" + ] + }, + "execution_count": 55, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.Period(w.day(0),'W')" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [], + "source": [ + "def convert_week(year_and_week_int):\n", + " year_and_week_str = str(year_and_week_int)\n", + " year = int(year_and_week_str[:4])\n", + " week = int(year_and_week_str[4:6])\n", + " w = isoweek.Week(year,week)\n", + " return pd.Period(w.day(0),'W')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "On ajoute une nouvelle colonne dans notre jeux de données, cette collonne represente une pérriode.\n", + "On aura notre jeux de données en ordre inverse, nous lui appliquons une fonction de trie pour obtenir un ordre chronologique. " + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
weekindicatorincinc_lowinc_upinc100inc100_lowinc100_upgeo_inseegeo_nameperiod
020205231854613789.023303.02821.035.0FRFrance2020-12-21/2020-12-27
120205132103216856.025208.03226.038.0FRFrance2020-12-14/2020-12-20
220205031684513220.020470.02620.032.0FRFrance2020-12-07/2020-12-13
32020493129399923.015955.02015.025.0FRFrance2020-11-30/2020-12-06
420204831380410641.016967.02116.026.0FRFrance2020-11-23/2020-11-29
\n", + "
" + ], + "text/plain": [ + " week indicator inc inc_low inc_up inc100 inc100_low inc100_up \\\n", + "0 202052 3 18546 13789.0 23303.0 28 21.0 35.0 \n", + "1 202051 3 21032 16856.0 25208.0 32 26.0 38.0 \n", + "2 202050 3 16845 13220.0 20470.0 26 20.0 32.0 \n", + "3 202049 3 12939 9923.0 15955.0 20 15.0 25.0 \n", + "4 202048 3 13804 10641.0 16967.0 21 16.0 26.0 \n", + "\n", + " geo_insee geo_name period \n", + "0 FR France 2020-12-21/2020-12-27 \n", + "1 FR France 2020-12-14/2020-12-20 \n", + "2 FR France 2020-12-07/2020-12-13 \n", + "3 FR France 2020-11-30/2020-12-06 \n", + "4 FR France 2020-11-23/2020-11-29 " + ] + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data['period'] = [convert_week(yw) for yw in data['week']]\n", + "data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": {}, + "outputs": [], + "source": [ + "sorted_data = data.set_index('period').sort_index()" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
weekindicatorincinc_lowinc_upinc100inc100_lowinc100_upgeo_inseegeo_name
period
1984-10-29/1984-11-0419844436842220056.0116788.012537.0213.0FRFrance
1984-11-05/1984-11-111984453135223101414.0169032.0246184.0308.0FRFrance
1984-11-12/1984-11-1819844638733067686.0106974.0159123.0195.0FRFrance
1984-11-19/1984-11-2519844737202954274.089784.013199.0163.0FRFrance
1984-11-26/1984-12-0219844837862060634.096606.0143110.0176.0FRFrance
\n", + "
" + ], + "text/plain": [ + " week indicator inc inc_low inc_up inc100 \\\n", + "period \n", + "1984-10-29/1984-11-04 198444 3 68422 20056.0 116788.0 125 \n", + "1984-11-05/1984-11-11 198445 3 135223 101414.0 169032.0 246 \n", + "1984-11-12/1984-11-18 198446 3 87330 67686.0 106974.0 159 \n", + "1984-11-19/1984-11-25 198447 3 72029 54274.0 89784.0 131 \n", + "1984-11-26/1984-12-02 198448 3 78620 60634.0 96606.0 143 \n", + "\n", + " inc100_low inc100_up geo_insee geo_name \n", + "period \n", + "1984-10-29/1984-11-04 37.0 213.0 FR France \n", + "1984-11-05/1984-11-11 184.0 308.0 FR France \n", + "1984-11-12/1984-11-18 123.0 195.0 FR France \n", + "1984-11-19/1984-11-25 99.0 163.0 FR France \n", + "1984-11-26/1984-12-02 110.0 176.0 FR France " + ] + }, + "execution_count": 63, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sorted_data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], "metadata": { "kernelspec": { "display_name": "Python 3", @@ -16,10 +975,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.3" + "version": "3.6.4" } }, "nbformat": 4, "nbformat_minor": 2 } -