{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# !pip install folium scikit-learn scipy" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import folium\n", "import matplotlib.pyplot as plt\n", "from sklearn.cluster import KMeans\n", "from scipy.spatial import distance\n", "%matplotlib inline" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# L'épidémie de choléra à Londres en 1854\n", "\n", "Cette étude porte sur la construction d'une **carte épidémiologique** afin de mieux comprendre l'épidémie de choléra dans le quartier de Soho à Londres en 1854. Par l'analyse des données, nous cherchons à trouver le **centre de l'épidémie** et prouver sa proximité avec l'une des pompes d'une quartier." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Chargement et aperçu des données" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "data_death = pd.read_csv(\"deaths.csv\")\n", "data_pumps = pd.read_csv(\"pumps.csv\")\n", "data_death_pumps = pd.read_csv(\"deaths_and_pumps.csv\")" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Death dataset columns : ['Death', 'X coordinate', 'Y coordinate']\n", "Pumps dataset columns : ['Pump Name', 'X coordinate', 'Y coordinate']\n", "Death/Pumps dataset columns : ['Number of deaths', 'X coordinate', 'Y coordinate']\n", "\n" ] } ], "source": [ "print(\"\"\"\n", "Death dataset columns : {}\n", "Pumps dataset columns : {}\n", "Death/Pumps dataset columns : {}\n", "\"\"\".format(list(data_death.columns), list(data_pumps.columns), list(data_death_pumps.columns)))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "On renomme les colonnes pour éviter les typos à cause des majuscules et des espaces." ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "death_cols = {\n", " list(data_death.columns)[0]: 'd_count',\n", " list(data_death.columns)[1]: 'x', \n", " list(data_death.columns)[2]: 'y'}\n", "pump_cols = {\n", " list(data_pumps.columns)[0]: 'name',\n", " list(data_pumps.columns)[1]: 'x', \n", " list(data_pumps.columns)[2]: 'y'}\n", "d_p_cols = {\n", " list(data_death_pumps.columns)[0]: 'death_per_pumps',\n", " list(data_death_pumps.columns)[1]: 'x', \n", " list(data_death_pumps.columns)[2]: 'y'}\n", "\n", "data_death.rename(columns=death_cols, inplace=True)\n", "data_pumps.rename(columns=pump_cols, inplace=True)\n", "data_death_pumps.rename(columns=d_p_cols, inplace=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Un petit regard sur la donnée." ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " d_count x y\n", "0 1 51.513418 -0.137930\n", "1 1 51.513418 -0.137930\n", "2 1 51.513418 -0.137930\n", "3 1 51.513361 -0.137883\n", "4 1 51.513361 -0.137883\n", "\n", "\n", " name x y\n", "0 Broad St. 51.513341 -0.136668\n", "1 Crown Chapel 51.513876 -0.139586\n", "2 Gt Marlborough 51.514906 -0.139671\n", "3 Dean St. 51.512354 -0.131630\n", "4 So Soho 51.512139 -0.133594\n", "\n", "\n", " death_per_pumps x y\n", "0 3 51.513418 -0.137930\n", "1 2 51.513361 -0.137883\n", "2 1 51.513317 -0.137853\n", "3 1 51.513262 -0.137812\n", "4 4 51.513204 -0.137767\n" ] } ], "source": [ "print(data_death.head())\n", "print('\\n')\n", "print(data_pumps.head())\n", "print('\\n')\n", "print(data_death_pumps.head())" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Donnée manquante dans le dataset death.csv : 0\n", "Donnée manquante dans le dataset pumps.csv : 0\n", "Donnée manquante dans le dataset death_and_pumps.csv : 0\n" ] } ], "source": [ "print(\"Donnée manquante dans le dataset death.csv : {}\".format(len(data_death[data_death.isnull().any(axis=1)])))\n", "print(\"Donnée manquante dans le dataset pumps.csv : {}\".format(len(data_pumps[data_pumps.isnull().any(axis=1)])))\n", "print(\"Donnée manquante dans le dataset death_and_pumps.csv : {}\".format(len(data_death_pumps[data_death_pumps.isnull().any(axis=1)])))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Création de la carte\n", "\n", "### Les décès\n", "\n", "On commence par afficher les décès sur la carte en pointant vers une coordonnée disponible dans le dataset." ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "data_death_df = data_death.groupby(['x', 'y']).d_count.count().to_frame()\n", "data_death_df.reset_index(inplace=True)\n", "death_coordinates = data_death_df[[\"x\",\"y\"]]\n", "death_coordinates = death_coordinates.values.tolist()" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "soho_c = death_coordinates[0]\n", "death_map = folium.Map(location=soho_c, tiles='Stamen Toner', zoom_start=17)\n", "for p in range(0, len(death_coordinates)):\n", " folium.CircleMarker(death_coordinates[p], radius=2*int(data_death_df['d_count'][p]), \n", " color='blue', fill=True, fill_color='blue',\n", " opacity = 0.4).add_to(death_map)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "