From a4f73fe2ec92958d9a042c7d3aba5441a8726c8c Mon Sep 17 00:00:00 2001
From: 42e498459df1b5cd1010918695b12887
<42e498459df1b5cd1010918695b12887@app-learninglab.inria.fr>
Date: Tue, 15 Sep 2020 20:31:35 +0000
Subject: [PATCH] no commit message
---
module3/exo3/.ipynb | 1109 +++++++++++++++++++++++++++++++++++
module3/exo3/exercice.ipynb | 1088 +++++++++++++++++++++++++++++++++-
2 files changed, 2194 insertions(+), 3 deletions(-)
create mode 100644 module3/exo3/.ipynb
diff --git a/module3/exo3/.ipynb b/module3/exo3/.ipynb
new file mode 100644
index 0000000..8de5488
--- /dev/null
+++ b/module3/exo3/.ipynb
@@ -0,0 +1,1109 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Autour du paradoxe des Simpson"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Ce sujet a pour but d'étudier un sondage réalisé en 1972-1974 sur un panel de 1314 femmes n'ayant jamais fumé ou fumant actuellement (au moment du sondage). Ce sondage a été effectué afin d'éclairer des travaux portant sur l'impact du tabagisme sur des maladies thyroïdiennes et cardiaques (Tunbridge *et al.* 1977). Une suite de cette étude a été menée vingt ans plus tard (Vanderpump *et al.* 1995) afin de savoir si les personnes sont décédeés ou non. \n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%matplotlib inline\n",
+ "import matplotlib.pyplot as plt\n",
+ "import pandas as pd\n",
+ "import isoweek"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Le fichier récupéré est en format CSV. Sur chaque ligne se trouve les informations suivantes : la personne fume ou non, elle est vivante ou décédée au moment de la seconde étude, et son âge lors du premier sondage."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ " data_url = \"https://gitlab.inria.fr/learninglab/mooc-rr/mooc-rr-ressources/-/raw/master/module3/Practical_session/Subject6_smoking.csv\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Ici pas besoin de sauter la première ligne du fichier CSV, même s'il s'agit d'un commentaire. En effet, si nous sautons la première ligne avec skiprows=1, nous perdons la première ligne de valeur."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Smoker | \n",
+ " Status | \n",
+ " Age | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 21.0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 19.3 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " No | \n",
+ " Dead | \n",
+ " 57.5 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 47.1 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 81.4 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 36.8 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 23.8 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " Yes | \n",
+ " Dead | \n",
+ " 57.5 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 24.8 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 49.5 | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 30.0 | \n",
+ "
\n",
+ " \n",
+ " 11 | \n",
+ " No | \n",
+ " Dead | \n",
+ " 66.0 | \n",
+ "
\n",
+ " \n",
+ " 12 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 49.2 | \n",
+ "
\n",
+ " \n",
+ " 13 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 58.4 | \n",
+ "
\n",
+ " \n",
+ " 14 | \n",
+ " No | \n",
+ " Dead | \n",
+ " 60.6 | \n",
+ "
\n",
+ " \n",
+ " 15 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 25.1 | \n",
+ "
\n",
+ " \n",
+ " 16 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 43.5 | \n",
+ "
\n",
+ " \n",
+ " 17 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 27.1 | \n",
+ "
\n",
+ " \n",
+ " 18 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 58.3 | \n",
+ "
\n",
+ " \n",
+ " 19 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 65.7 | \n",
+ "
\n",
+ " \n",
+ " 20 | \n",
+ " No | \n",
+ " Dead | \n",
+ " 73.2 | \n",
+ "
\n",
+ " \n",
+ " 21 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 38.3 | \n",
+ "
\n",
+ " \n",
+ " 22 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 33.4 | \n",
+ "
\n",
+ " \n",
+ " 23 | \n",
+ " Yes | \n",
+ " Dead | \n",
+ " 62.3 | \n",
+ "
\n",
+ " \n",
+ " 24 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 18.0 | \n",
+ "
\n",
+ " \n",
+ " 25 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 56.2 | \n",
+ "
\n",
+ " \n",
+ " 26 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 59.2 | \n",
+ "
\n",
+ " \n",
+ " 27 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 25.8 | \n",
+ "
\n",
+ " \n",
+ " 28 | \n",
+ " No | \n",
+ " Dead | \n",
+ " 36.9 | \n",
+ "
\n",
+ " \n",
+ " 29 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 20.2 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 1284 | \n",
+ " Yes | \n",
+ " Dead | \n",
+ " 36.0 | \n",
+ "
\n",
+ " \n",
+ " 1285 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 48.3 | \n",
+ "
\n",
+ " \n",
+ " 1286 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 63.1 | \n",
+ "
\n",
+ " \n",
+ " 1287 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 60.8 | \n",
+ "
\n",
+ " \n",
+ " 1288 | \n",
+ " Yes | \n",
+ " Dead | \n",
+ " 39.3 | \n",
+ "
\n",
+ " \n",
+ " 1289 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 36.7 | \n",
+ "
\n",
+ " \n",
+ " 1290 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 63.8 | \n",
+ "
\n",
+ " \n",
+ " 1291 | \n",
+ " No | \n",
+ " Dead | \n",
+ " 71.3 | \n",
+ "
\n",
+ " \n",
+ " 1292 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 57.7 | \n",
+ "
\n",
+ " \n",
+ " 1293 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 63.2 | \n",
+ "
\n",
+ " \n",
+ " 1294 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 46.6 | \n",
+ "
\n",
+ " \n",
+ " 1295 | \n",
+ " Yes | \n",
+ " Dead | \n",
+ " 82.4 | \n",
+ "
\n",
+ " \n",
+ " 1296 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 38.3 | \n",
+ "
\n",
+ " \n",
+ " 1297 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 32.7 | \n",
+ "
\n",
+ " \n",
+ " 1298 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 39.7 | \n",
+ "
\n",
+ " \n",
+ " 1299 | \n",
+ " Yes | \n",
+ " Dead | \n",
+ " 60.0 | \n",
+ "
\n",
+ " \n",
+ " 1300 | \n",
+ " No | \n",
+ " Dead | \n",
+ " 71.0 | \n",
+ "
\n",
+ " \n",
+ " 1301 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 20.5 | \n",
+ "
\n",
+ " \n",
+ " 1302 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 44.4 | \n",
+ "
\n",
+ " \n",
+ " 1303 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 31.2 | \n",
+ "
\n",
+ " \n",
+ " 1304 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 47.8 | \n",
+ "
\n",
+ " \n",
+ " 1305 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 60.9 | \n",
+ "
\n",
+ " \n",
+ " 1306 | \n",
+ " No | \n",
+ " Dead | \n",
+ " 61.4 | \n",
+ "
\n",
+ " \n",
+ " 1307 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 43.0 | \n",
+ "
\n",
+ " \n",
+ " 1308 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 42.1 | \n",
+ "
\n",
+ " \n",
+ " 1309 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 35.9 | \n",
+ "
\n",
+ " \n",
+ " 1310 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 22.3 | \n",
+ "
\n",
+ " \n",
+ " 1311 | \n",
+ " Yes | \n",
+ " Dead | \n",
+ " 62.1 | \n",
+ "
\n",
+ " \n",
+ " 1312 | \n",
+ " No | \n",
+ " Dead | \n",
+ " 88.6 | \n",
+ "
\n",
+ " \n",
+ " 1313 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 39.1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
1314 rows × 3 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Smoker Status Age\n",
+ "0 Yes Alive 21.0\n",
+ "1 Yes Alive 19.3\n",
+ "2 No Dead 57.5\n",
+ "3 No Alive 47.1\n",
+ "4 Yes Alive 81.4\n",
+ "5 No Alive 36.8\n",
+ "6 No Alive 23.8\n",
+ "7 Yes Dead 57.5\n",
+ "8 Yes Alive 24.8\n",
+ "9 Yes Alive 49.5\n",
+ "10 Yes Alive 30.0\n",
+ "11 No Dead 66.0\n",
+ "12 Yes Alive 49.2\n",
+ "13 No Alive 58.4\n",
+ "14 No Dead 60.6\n",
+ "15 No Alive 25.1\n",
+ "16 No Alive 43.5\n",
+ "17 No Alive 27.1\n",
+ "18 No Alive 58.3\n",
+ "19 Yes Alive 65.7\n",
+ "20 No Dead 73.2\n",
+ "21 Yes Alive 38.3\n",
+ "22 No Alive 33.4\n",
+ "23 Yes Dead 62.3\n",
+ "24 No Alive 18.0\n",
+ "25 No Alive 56.2\n",
+ "26 Yes Alive 59.2\n",
+ "27 No Alive 25.8\n",
+ "28 No Dead 36.9\n",
+ "29 No Alive 20.2\n",
+ "... ... ... ...\n",
+ "1284 Yes Dead 36.0\n",
+ "1285 Yes Alive 48.3\n",
+ "1286 No Alive 63.1\n",
+ "1287 No Alive 60.8\n",
+ "1288 Yes Dead 39.3\n",
+ "1289 No Alive 36.7\n",
+ "1290 No Alive 63.8\n",
+ "1291 No Dead 71.3\n",
+ "1292 No Alive 57.7\n",
+ "1293 No Alive 63.2\n",
+ "1294 No Alive 46.6\n",
+ "1295 Yes Dead 82.4\n",
+ "1296 Yes Alive 38.3\n",
+ "1297 Yes Alive 32.7\n",
+ "1298 No Alive 39.7\n",
+ "1299 Yes Dead 60.0\n",
+ "1300 No Dead 71.0\n",
+ "1301 No Alive 20.5\n",
+ "1302 No Alive 44.4\n",
+ "1303 Yes Alive 31.2\n",
+ "1304 Yes Alive 47.8\n",
+ "1305 Yes Alive 60.9\n",
+ "1306 No Dead 61.4\n",
+ "1307 Yes Alive 43.0\n",
+ "1308 No Alive 42.1\n",
+ "1309 Yes Alive 35.9\n",
+ "1310 No Alive 22.3\n",
+ "1311 Yes Dead 62.1\n",
+ "1312 No Dead 88.6\n",
+ "1313 No Alive 39.1\n",
+ "\n",
+ "[1314 rows x 3 columns]"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "raw_data = pd.read_csv(data_url)\n",
+ "raw_data"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ " Y a-t-il des points manquants dans ce jeux de données ? Verifions cela"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Smoker | \n",
+ " Status | \n",
+ " Age | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ "Empty DataFrame\n",
+ "Columns: [Smoker, Status, Age]\n",
+ "Index: []"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "raw_data[raw_data.isnull().any(axis=1)]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "A priori il n'y a pas de donnée manquante. Il n'y a donc pas de données à supprimer\n",
+ "Cependant, afin de clarifier la suite du code, nous définissons plus simplement les données par \"data\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Smoker | \n",
+ " Status | \n",
+ " Age | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 21.0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 19.3 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " No | \n",
+ " Dead | \n",
+ " 57.5 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 47.1 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 81.4 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 36.8 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 23.8 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " Yes | \n",
+ " Dead | \n",
+ " 57.5 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 24.8 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 49.5 | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 30.0 | \n",
+ "
\n",
+ " \n",
+ " 11 | \n",
+ " No | \n",
+ " Dead | \n",
+ " 66.0 | \n",
+ "
\n",
+ " \n",
+ " 12 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 49.2 | \n",
+ "
\n",
+ " \n",
+ " 13 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 58.4 | \n",
+ "
\n",
+ " \n",
+ " 14 | \n",
+ " No | \n",
+ " Dead | \n",
+ " 60.6 | \n",
+ "
\n",
+ " \n",
+ " 15 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 25.1 | \n",
+ "
\n",
+ " \n",
+ " 16 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 43.5 | \n",
+ "
\n",
+ " \n",
+ " 17 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 27.1 | \n",
+ "
\n",
+ " \n",
+ " 18 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 58.3 | \n",
+ "
\n",
+ " \n",
+ " 19 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 65.7 | \n",
+ "
\n",
+ " \n",
+ " 20 | \n",
+ " No | \n",
+ " Dead | \n",
+ " 73.2 | \n",
+ "
\n",
+ " \n",
+ " 21 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 38.3 | \n",
+ "
\n",
+ " \n",
+ " 22 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 33.4 | \n",
+ "
\n",
+ " \n",
+ " 23 | \n",
+ " Yes | \n",
+ " Dead | \n",
+ " 62.3 | \n",
+ "
\n",
+ " \n",
+ " 24 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 18.0 | \n",
+ "
\n",
+ " \n",
+ " 25 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 56.2 | \n",
+ "
\n",
+ " \n",
+ " 26 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 59.2 | \n",
+ "
\n",
+ " \n",
+ " 27 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 25.8 | \n",
+ "
\n",
+ " \n",
+ " 28 | \n",
+ " No | \n",
+ " Dead | \n",
+ " 36.9 | \n",
+ "
\n",
+ " \n",
+ " 29 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 20.2 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 1284 | \n",
+ " Yes | \n",
+ " Dead | \n",
+ " 36.0 | \n",
+ "
\n",
+ " \n",
+ " 1285 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 48.3 | \n",
+ "
\n",
+ " \n",
+ " 1286 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 63.1 | \n",
+ "
\n",
+ " \n",
+ " 1287 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 60.8 | \n",
+ "
\n",
+ " \n",
+ " 1288 | \n",
+ " Yes | \n",
+ " Dead | \n",
+ " 39.3 | \n",
+ "
\n",
+ " \n",
+ " 1289 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 36.7 | \n",
+ "
\n",
+ " \n",
+ " 1290 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 63.8 | \n",
+ "
\n",
+ " \n",
+ " 1291 | \n",
+ " No | \n",
+ " Dead | \n",
+ " 71.3 | \n",
+ "
\n",
+ " \n",
+ " 1292 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 57.7 | \n",
+ "
\n",
+ " \n",
+ " 1293 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 63.2 | \n",
+ "
\n",
+ " \n",
+ " 1294 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 46.6 | \n",
+ "
\n",
+ " \n",
+ " 1295 | \n",
+ " Yes | \n",
+ " Dead | \n",
+ " 82.4 | \n",
+ "
\n",
+ " \n",
+ " 1296 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 38.3 | \n",
+ "
\n",
+ " \n",
+ " 1297 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 32.7 | \n",
+ "
\n",
+ " \n",
+ " 1298 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 39.7 | \n",
+ "
\n",
+ " \n",
+ " 1299 | \n",
+ " Yes | \n",
+ " Dead | \n",
+ " 60.0 | \n",
+ "
\n",
+ " \n",
+ " 1300 | \n",
+ " No | \n",
+ " Dead | \n",
+ " 71.0 | \n",
+ "
\n",
+ " \n",
+ " 1301 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 20.5 | \n",
+ "
\n",
+ " \n",
+ " 1302 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 44.4 | \n",
+ "
\n",
+ " \n",
+ " 1303 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 31.2 | \n",
+ "
\n",
+ " \n",
+ " 1304 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 47.8 | \n",
+ "
\n",
+ " \n",
+ " 1305 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 60.9 | \n",
+ "
\n",
+ " \n",
+ " 1306 | \n",
+ " No | \n",
+ " Dead | \n",
+ " 61.4 | \n",
+ "
\n",
+ " \n",
+ " 1307 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 43.0 | \n",
+ "
\n",
+ " \n",
+ " 1308 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 42.1 | \n",
+ "
\n",
+ " \n",
+ " 1309 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 35.9 | \n",
+ "
\n",
+ " \n",
+ " 1310 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 22.3 | \n",
+ "
\n",
+ " \n",
+ " 1311 | \n",
+ " Yes | \n",
+ " Dead | \n",
+ " 62.1 | \n",
+ "
\n",
+ " \n",
+ " 1312 | \n",
+ " No | \n",
+ " Dead | \n",
+ " 88.6 | \n",
+ "
\n",
+ " \n",
+ " 1313 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 39.1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
1314 rows × 3 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Smoker Status Age\n",
+ "0 Yes Alive 21.0\n",
+ "1 Yes Alive 19.3\n",
+ "2 No Dead 57.5\n",
+ "3 No Alive 47.1\n",
+ "4 Yes Alive 81.4\n",
+ "5 No Alive 36.8\n",
+ "6 No Alive 23.8\n",
+ "7 Yes Dead 57.5\n",
+ "8 Yes Alive 24.8\n",
+ "9 Yes Alive 49.5\n",
+ "10 Yes Alive 30.0\n",
+ "11 No Dead 66.0\n",
+ "12 Yes Alive 49.2\n",
+ "13 No Alive 58.4\n",
+ "14 No Dead 60.6\n",
+ "15 No Alive 25.1\n",
+ "16 No Alive 43.5\n",
+ "17 No Alive 27.1\n",
+ "18 No Alive 58.3\n",
+ "19 Yes Alive 65.7\n",
+ "20 No Dead 73.2\n",
+ "21 Yes Alive 38.3\n",
+ "22 No Alive 33.4\n",
+ "23 Yes Dead 62.3\n",
+ "24 No Alive 18.0\n",
+ "25 No Alive 56.2\n",
+ "26 Yes Alive 59.2\n",
+ "27 No Alive 25.8\n",
+ "28 No Dead 36.9\n",
+ "29 No Alive 20.2\n",
+ "... ... ... ...\n",
+ "1284 Yes Dead 36.0\n",
+ "1285 Yes Alive 48.3\n",
+ "1286 No Alive 63.1\n",
+ "1287 No Alive 60.8\n",
+ "1288 Yes Dead 39.3\n",
+ "1289 No Alive 36.7\n",
+ "1290 No Alive 63.8\n",
+ "1291 No Dead 71.3\n",
+ "1292 No Alive 57.7\n",
+ "1293 No Alive 63.2\n",
+ "1294 No Alive 46.6\n",
+ "1295 Yes Dead 82.4\n",
+ "1296 Yes Alive 38.3\n",
+ "1297 Yes Alive 32.7\n",
+ "1298 No Alive 39.7\n",
+ "1299 Yes Dead 60.0\n",
+ "1300 No Dead 71.0\n",
+ "1301 No Alive 20.5\n",
+ "1302 No Alive 44.4\n",
+ "1303 Yes Alive 31.2\n",
+ "1304 Yes Alive 47.8\n",
+ "1305 Yes Alive 60.9\n",
+ "1306 No Dead 61.4\n",
+ "1307 Yes Alive 43.0\n",
+ "1308 No Alive 42.1\n",
+ "1309 Yes Alive 35.9\n",
+ "1310 No Alive 22.3\n",
+ "1311 Yes Dead 62.1\n",
+ "1312 No Dead 88.6\n",
+ "1313 No Alive 39.1\n",
+ "\n",
+ "[1314 rows x 3 columns]"
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data = raw_data\n",
+ "data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.6.4"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/module3/exo3/exercice.ipynb b/module3/exo3/exercice.ipynb
index 0bbbe37..eb42c23 100644
--- a/module3/exo3/exercice.ipynb
+++ b/module3/exo3/exercice.ipynb
@@ -1,5 +1,1088 @@
{
- "cells": [],
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Autour du paradoxe des Simpson"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Ce sujet a pour but d'étudier un sondage réalisé en 1972-1974 sur un panel de 1314 femmes n'ayant jamais fumé ou fumant actuellement (au moment du sondage). Ce sondage a été effectué afin d'éclairer des travaux portant sur l'impact du tabagisme sur des maladies thyroïdiennes et cardiaques (Tunbridge *et al.* 1977). Une suite de cette étude a été menée vingt ans plus tard (Vanderpump *et al.* 1995) afin de savoir si les personnes sont décédeés ou non. \n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%matplotlib inline\n",
+ "import matplotlib.pyplot as plt\n",
+ "import pandas as pd\n",
+ "import isoweek"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Le fichier récupéré est en format CSV. Sur chaque ligne se trouve les informations suivantes : la personne fume ou non, elle est vivante ou décédée au moment de la seconde étude, et son âge lors du premier sondage."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ " data_url = \"https://gitlab.inria.fr/learninglab/mooc-rr/mooc-rr-ressources/-/raw/master/module3/Practical_session/Subject6_smoking.csv\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Ici pas besoin de sauter la première ligne du fichier CSV, même s'il s'agit d'un commentaire. En effet, si nous sautons la première ligne avec skiprows=1, nous perdons la première ligne de valeur."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Smoker | \n",
+ " Status | \n",
+ " Age | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 21.0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 19.3 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " No | \n",
+ " Dead | \n",
+ " 57.5 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 47.1 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 81.4 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 36.8 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 23.8 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " Yes | \n",
+ " Dead | \n",
+ " 57.5 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 24.8 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 49.5 | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 30.0 | \n",
+ "
\n",
+ " \n",
+ " 11 | \n",
+ " No | \n",
+ " Dead | \n",
+ " 66.0 | \n",
+ "
\n",
+ " \n",
+ " 12 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 49.2 | \n",
+ "
\n",
+ " \n",
+ " 13 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 58.4 | \n",
+ "
\n",
+ " \n",
+ " 14 | \n",
+ " No | \n",
+ " Dead | \n",
+ " 60.6 | \n",
+ "
\n",
+ " \n",
+ " 15 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 25.1 | \n",
+ "
\n",
+ " \n",
+ " 16 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 43.5 | \n",
+ "
\n",
+ " \n",
+ " 17 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 27.1 | \n",
+ "
\n",
+ " \n",
+ " 18 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 58.3 | \n",
+ "
\n",
+ " \n",
+ " 19 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 65.7 | \n",
+ "
\n",
+ " \n",
+ " 20 | \n",
+ " No | \n",
+ " Dead | \n",
+ " 73.2 | \n",
+ "
\n",
+ " \n",
+ " 21 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 38.3 | \n",
+ "
\n",
+ " \n",
+ " 22 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 33.4 | \n",
+ "
\n",
+ " \n",
+ " 23 | \n",
+ " Yes | \n",
+ " Dead | \n",
+ " 62.3 | \n",
+ "
\n",
+ " \n",
+ " 24 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 18.0 | \n",
+ "
\n",
+ " \n",
+ " 25 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 56.2 | \n",
+ "
\n",
+ " \n",
+ " 26 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 59.2 | \n",
+ "
\n",
+ " \n",
+ " 27 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 25.8 | \n",
+ "
\n",
+ " \n",
+ " 28 | \n",
+ " No | \n",
+ " Dead | \n",
+ " 36.9 | \n",
+ "
\n",
+ " \n",
+ " 29 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 20.2 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 1284 | \n",
+ " Yes | \n",
+ " Dead | \n",
+ " 36.0 | \n",
+ "
\n",
+ " \n",
+ " 1285 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 48.3 | \n",
+ "
\n",
+ " \n",
+ " 1286 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 63.1 | \n",
+ "
\n",
+ " \n",
+ " 1287 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 60.8 | \n",
+ "
\n",
+ " \n",
+ " 1288 | \n",
+ " Yes | \n",
+ " Dead | \n",
+ " 39.3 | \n",
+ "
\n",
+ " \n",
+ " 1289 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 36.7 | \n",
+ "
\n",
+ " \n",
+ " 1290 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 63.8 | \n",
+ "
\n",
+ " \n",
+ " 1291 | \n",
+ " No | \n",
+ " Dead | \n",
+ " 71.3 | \n",
+ "
\n",
+ " \n",
+ " 1292 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 57.7 | \n",
+ "
\n",
+ " \n",
+ " 1293 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 63.2 | \n",
+ "
\n",
+ " \n",
+ " 1294 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 46.6 | \n",
+ "
\n",
+ " \n",
+ " 1295 | \n",
+ " Yes | \n",
+ " Dead | \n",
+ " 82.4 | \n",
+ "
\n",
+ " \n",
+ " 1296 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 38.3 | \n",
+ "
\n",
+ " \n",
+ " 1297 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 32.7 | \n",
+ "
\n",
+ " \n",
+ " 1298 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 39.7 | \n",
+ "
\n",
+ " \n",
+ " 1299 | \n",
+ " Yes | \n",
+ " Dead | \n",
+ " 60.0 | \n",
+ "
\n",
+ " \n",
+ " 1300 | \n",
+ " No | \n",
+ " Dead | \n",
+ " 71.0 | \n",
+ "
\n",
+ " \n",
+ " 1301 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 20.5 | \n",
+ "
\n",
+ " \n",
+ " 1302 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 44.4 | \n",
+ "
\n",
+ " \n",
+ " 1303 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 31.2 | \n",
+ "
\n",
+ " \n",
+ " 1304 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 47.8 | \n",
+ "
\n",
+ " \n",
+ " 1305 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 60.9 | \n",
+ "
\n",
+ " \n",
+ " 1306 | \n",
+ " No | \n",
+ " Dead | \n",
+ " 61.4 | \n",
+ "
\n",
+ " \n",
+ " 1307 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 43.0 | \n",
+ "
\n",
+ " \n",
+ " 1308 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 42.1 | \n",
+ "
\n",
+ " \n",
+ " 1309 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 35.9 | \n",
+ "
\n",
+ " \n",
+ " 1310 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 22.3 | \n",
+ "
\n",
+ " \n",
+ " 1311 | \n",
+ " Yes | \n",
+ " Dead | \n",
+ " 62.1 | \n",
+ "
\n",
+ " \n",
+ " 1312 | \n",
+ " No | \n",
+ " Dead | \n",
+ " 88.6 | \n",
+ "
\n",
+ " \n",
+ " 1313 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 39.1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
1314 rows × 3 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Smoker Status Age\n",
+ "0 Yes Alive 21.0\n",
+ "1 Yes Alive 19.3\n",
+ "2 No Dead 57.5\n",
+ "3 No Alive 47.1\n",
+ "4 Yes Alive 81.4\n",
+ "5 No Alive 36.8\n",
+ "6 No Alive 23.8\n",
+ "7 Yes Dead 57.5\n",
+ "8 Yes Alive 24.8\n",
+ "9 Yes Alive 49.5\n",
+ "10 Yes Alive 30.0\n",
+ "11 No Dead 66.0\n",
+ "12 Yes Alive 49.2\n",
+ "13 No Alive 58.4\n",
+ "14 No Dead 60.6\n",
+ "15 No Alive 25.1\n",
+ "16 No Alive 43.5\n",
+ "17 No Alive 27.1\n",
+ "18 No Alive 58.3\n",
+ "19 Yes Alive 65.7\n",
+ "20 No Dead 73.2\n",
+ "21 Yes Alive 38.3\n",
+ "22 No Alive 33.4\n",
+ "23 Yes Dead 62.3\n",
+ "24 No Alive 18.0\n",
+ "25 No Alive 56.2\n",
+ "26 Yes Alive 59.2\n",
+ "27 No Alive 25.8\n",
+ "28 No Dead 36.9\n",
+ "29 No Alive 20.2\n",
+ "... ... ... ...\n",
+ "1284 Yes Dead 36.0\n",
+ "1285 Yes Alive 48.3\n",
+ "1286 No Alive 63.1\n",
+ "1287 No Alive 60.8\n",
+ "1288 Yes Dead 39.3\n",
+ "1289 No Alive 36.7\n",
+ "1290 No Alive 63.8\n",
+ "1291 No Dead 71.3\n",
+ "1292 No Alive 57.7\n",
+ "1293 No Alive 63.2\n",
+ "1294 No Alive 46.6\n",
+ "1295 Yes Dead 82.4\n",
+ "1296 Yes Alive 38.3\n",
+ "1297 Yes Alive 32.7\n",
+ "1298 No Alive 39.7\n",
+ "1299 Yes Dead 60.0\n",
+ "1300 No Dead 71.0\n",
+ "1301 No Alive 20.5\n",
+ "1302 No Alive 44.4\n",
+ "1303 Yes Alive 31.2\n",
+ "1304 Yes Alive 47.8\n",
+ "1305 Yes Alive 60.9\n",
+ "1306 No Dead 61.4\n",
+ "1307 Yes Alive 43.0\n",
+ "1308 No Alive 42.1\n",
+ "1309 Yes Alive 35.9\n",
+ "1310 No Alive 22.3\n",
+ "1311 Yes Dead 62.1\n",
+ "1312 No Dead 88.6\n",
+ "1313 No Alive 39.1\n",
+ "\n",
+ "[1314 rows x 3 columns]"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "raw_data = pd.read_csv(data_url)\n",
+ "raw_data"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ " Y a-t-il des points manquants dans ce jeux de données ? Verifions cela"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Smoker | \n",
+ " Status | \n",
+ " Age | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ "Empty DataFrame\n",
+ "Columns: [Smoker, Status, Age]\n",
+ "Index: []"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "raw_data[raw_data.isnull().any(axis=1)]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "A priori il n'y a pas de donnée manquante. Il n'y a donc pas de données à supprimer\n",
+ "Cependant, afin de clarifier la suite du code, nous définissons plus simplement les données par \"data\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Smoker | \n",
+ " Status | \n",
+ " Age | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 21.0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 19.3 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " No | \n",
+ " Dead | \n",
+ " 57.5 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 47.1 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 81.4 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 36.8 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 23.8 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " Yes | \n",
+ " Dead | \n",
+ " 57.5 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 24.8 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 49.5 | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 30.0 | \n",
+ "
\n",
+ " \n",
+ " 11 | \n",
+ " No | \n",
+ " Dead | \n",
+ " 66.0 | \n",
+ "
\n",
+ " \n",
+ " 12 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 49.2 | \n",
+ "
\n",
+ " \n",
+ " 13 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 58.4 | \n",
+ "
\n",
+ " \n",
+ " 14 | \n",
+ " No | \n",
+ " Dead | \n",
+ " 60.6 | \n",
+ "
\n",
+ " \n",
+ " 15 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 25.1 | \n",
+ "
\n",
+ " \n",
+ " 16 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 43.5 | \n",
+ "
\n",
+ " \n",
+ " 17 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 27.1 | \n",
+ "
\n",
+ " \n",
+ " 18 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 58.3 | \n",
+ "
\n",
+ " \n",
+ " 19 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 65.7 | \n",
+ "
\n",
+ " \n",
+ " 20 | \n",
+ " No | \n",
+ " Dead | \n",
+ " 73.2 | \n",
+ "
\n",
+ " \n",
+ " 21 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 38.3 | \n",
+ "
\n",
+ " \n",
+ " 22 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 33.4 | \n",
+ "
\n",
+ " \n",
+ " 23 | \n",
+ " Yes | \n",
+ " Dead | \n",
+ " 62.3 | \n",
+ "
\n",
+ " \n",
+ " 24 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 18.0 | \n",
+ "
\n",
+ " \n",
+ " 25 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 56.2 | \n",
+ "
\n",
+ " \n",
+ " 26 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 59.2 | \n",
+ "
\n",
+ " \n",
+ " 27 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 25.8 | \n",
+ "
\n",
+ " \n",
+ " 28 | \n",
+ " No | \n",
+ " Dead | \n",
+ " 36.9 | \n",
+ "
\n",
+ " \n",
+ " 29 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 20.2 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 1284 | \n",
+ " Yes | \n",
+ " Dead | \n",
+ " 36.0 | \n",
+ "
\n",
+ " \n",
+ " 1285 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 48.3 | \n",
+ "
\n",
+ " \n",
+ " 1286 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 63.1 | \n",
+ "
\n",
+ " \n",
+ " 1287 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 60.8 | \n",
+ "
\n",
+ " \n",
+ " 1288 | \n",
+ " Yes | \n",
+ " Dead | \n",
+ " 39.3 | \n",
+ "
\n",
+ " \n",
+ " 1289 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 36.7 | \n",
+ "
\n",
+ " \n",
+ " 1290 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 63.8 | \n",
+ "
\n",
+ " \n",
+ " 1291 | \n",
+ " No | \n",
+ " Dead | \n",
+ " 71.3 | \n",
+ "
\n",
+ " \n",
+ " 1292 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 57.7 | \n",
+ "
\n",
+ " \n",
+ " 1293 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 63.2 | \n",
+ "
\n",
+ " \n",
+ " 1294 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 46.6 | \n",
+ "
\n",
+ " \n",
+ " 1295 | \n",
+ " Yes | \n",
+ " Dead | \n",
+ " 82.4 | \n",
+ "
\n",
+ " \n",
+ " 1296 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 38.3 | \n",
+ "
\n",
+ " \n",
+ " 1297 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 32.7 | \n",
+ "
\n",
+ " \n",
+ " 1298 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 39.7 | \n",
+ "
\n",
+ " \n",
+ " 1299 | \n",
+ " Yes | \n",
+ " Dead | \n",
+ " 60.0 | \n",
+ "
\n",
+ " \n",
+ " 1300 | \n",
+ " No | \n",
+ " Dead | \n",
+ " 71.0 | \n",
+ "
\n",
+ " \n",
+ " 1301 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 20.5 | \n",
+ "
\n",
+ " \n",
+ " 1302 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 44.4 | \n",
+ "
\n",
+ " \n",
+ " 1303 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 31.2 | \n",
+ "
\n",
+ " \n",
+ " 1304 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 47.8 | \n",
+ "
\n",
+ " \n",
+ " 1305 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 60.9 | \n",
+ "
\n",
+ " \n",
+ " 1306 | \n",
+ " No | \n",
+ " Dead | \n",
+ " 61.4 | \n",
+ "
\n",
+ " \n",
+ " 1307 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 43.0 | \n",
+ "
\n",
+ " \n",
+ " 1308 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 42.1 | \n",
+ "
\n",
+ " \n",
+ " 1309 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 35.9 | \n",
+ "
\n",
+ " \n",
+ " 1310 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 22.3 | \n",
+ "
\n",
+ " \n",
+ " 1311 | \n",
+ " Yes | \n",
+ " Dead | \n",
+ " 62.1 | \n",
+ "
\n",
+ " \n",
+ " 1312 | \n",
+ " No | \n",
+ " Dead | \n",
+ " 88.6 | \n",
+ "
\n",
+ " \n",
+ " 1313 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 39.1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
1314 rows × 3 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Smoker Status Age\n",
+ "0 Yes Alive 21.0\n",
+ "1 Yes Alive 19.3\n",
+ "2 No Dead 57.5\n",
+ "3 No Alive 47.1\n",
+ "4 Yes Alive 81.4\n",
+ "5 No Alive 36.8\n",
+ "6 No Alive 23.8\n",
+ "7 Yes Dead 57.5\n",
+ "8 Yes Alive 24.8\n",
+ "9 Yes Alive 49.5\n",
+ "10 Yes Alive 30.0\n",
+ "11 No Dead 66.0\n",
+ "12 Yes Alive 49.2\n",
+ "13 No Alive 58.4\n",
+ "14 No Dead 60.6\n",
+ "15 No Alive 25.1\n",
+ "16 No Alive 43.5\n",
+ "17 No Alive 27.1\n",
+ "18 No Alive 58.3\n",
+ "19 Yes Alive 65.7\n",
+ "20 No Dead 73.2\n",
+ "21 Yes Alive 38.3\n",
+ "22 No Alive 33.4\n",
+ "23 Yes Dead 62.3\n",
+ "24 No Alive 18.0\n",
+ "25 No Alive 56.2\n",
+ "26 Yes Alive 59.2\n",
+ "27 No Alive 25.8\n",
+ "28 No Dead 36.9\n",
+ "29 No Alive 20.2\n",
+ "... ... ... ...\n",
+ "1284 Yes Dead 36.0\n",
+ "1285 Yes Alive 48.3\n",
+ "1286 No Alive 63.1\n",
+ "1287 No Alive 60.8\n",
+ "1288 Yes Dead 39.3\n",
+ "1289 No Alive 36.7\n",
+ "1290 No Alive 63.8\n",
+ "1291 No Dead 71.3\n",
+ "1292 No Alive 57.7\n",
+ "1293 No Alive 63.2\n",
+ "1294 No Alive 46.6\n",
+ "1295 Yes Dead 82.4\n",
+ "1296 Yes Alive 38.3\n",
+ "1297 Yes Alive 32.7\n",
+ "1298 No Alive 39.7\n",
+ "1299 Yes Dead 60.0\n",
+ "1300 No Dead 71.0\n",
+ "1301 No Alive 20.5\n",
+ "1302 No Alive 44.4\n",
+ "1303 Yes Alive 31.2\n",
+ "1304 Yes Alive 47.8\n",
+ "1305 Yes Alive 60.9\n",
+ "1306 No Dead 61.4\n",
+ "1307 Yes Alive 43.0\n",
+ "1308 No Alive 42.1\n",
+ "1309 Yes Alive 35.9\n",
+ "1310 No Alive 22.3\n",
+ "1311 Yes Dead 62.1\n",
+ "1312 No Dead 88.6\n",
+ "1313 No Alive 39.1\n",
+ "\n",
+ "[1314 rows x 3 columns]"
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data = raw_data\n",
+ "data"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": []
+ }
+ ],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
@@ -16,10 +1099,9 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.6.3"
+ "version": "3.6.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
-
--
2.18.1