{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Autour du Paradoxe de Simpson" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "%matplotlib inline\n", "import matplotlib.pyplot as plt\n", "import pandas as pd" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Obtention et pré-traitement des données\n", "\n", "Les données sont présentes sur le Gitlab du MOOC. Par sécurité elles sont téléchargées localement. Il n'est néanmoins pas nécessaire (et contre-productif) de re-télécharger le fichier à chaque exécution, le téléchargement n'a lieux que si le fichier de données n'est pas présent sur la machine.\n", "\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "data_url=\"https://gitlab.inria.fr/learninglab/mooc-rr/mooc-rr-ressources/-/raw/master/module3/Practical_session/Subject6_smoking.csv?inline=false\"\n", "data_file=\"Subject6_smoking.csv.csv\"\n", "import os\n", "import urllib.request\n", "if not os.path.exists(data_file):\n", " urllib.request.urlretrieve(data_url, data_file)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "On affiche un aperçu des données :" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SmokerStatusAge
0YesAlive21.0
1YesAlive19.3
2NoDead57.5
3NoAlive47.1
4YesAlive81.4
5NoAlive36.8
6NoAlive23.8
7YesDead57.5
8YesAlive24.8
9YesAlive49.5
10YesAlive30.0
11NoDead66.0
12YesAlive49.2
13NoAlive58.4
14NoDead60.6
15NoAlive25.1
16NoAlive43.5
17NoAlive27.1
18NoAlive58.3
19YesAlive65.7
20NoDead73.2
21YesAlive38.3
22NoAlive33.4
23YesDead62.3
24NoAlive18.0
25NoAlive56.2
26YesAlive59.2
27NoAlive25.8
28NoDead36.9
29NoAlive20.2
............
1284YesDead36.0
1285YesAlive48.3
1286NoAlive63.1
1287NoAlive60.8
1288YesDead39.3
1289NoAlive36.7
1290NoAlive63.8
1291NoDead71.3
1292NoAlive57.7
1293NoAlive63.2
1294NoAlive46.6
1295YesDead82.4
1296YesAlive38.3
1297YesAlive32.7
1298NoAlive39.7
1299YesDead60.0
1300NoDead71.0
1301NoAlive20.5
1302NoAlive44.4
1303YesAlive31.2
1304YesAlive47.8
1305YesAlive60.9
1306NoDead61.4
1307YesAlive43.0
1308NoAlive42.1
1309YesAlive35.9
1310NoAlive22.3
1311YesDead62.1
1312NoDead88.6
1313NoAlive39.1
\n", "

1314 rows × 3 columns

\n", "
" ], "text/plain": [ " Smoker Status Age\n", "0 Yes Alive 21.0\n", "1 Yes Alive 19.3\n", "2 No Dead 57.5\n", "3 No Alive 47.1\n", "4 Yes Alive 81.4\n", "5 No Alive 36.8\n", "6 No Alive 23.8\n", "7 Yes Dead 57.5\n", "8 Yes Alive 24.8\n", "9 Yes Alive 49.5\n", "10 Yes Alive 30.0\n", "11 No Dead 66.0\n", "12 Yes Alive 49.2\n", "13 No Alive 58.4\n", "14 No Dead 60.6\n", "15 No Alive 25.1\n", "16 No Alive 43.5\n", "17 No Alive 27.1\n", "18 No Alive 58.3\n", "19 Yes Alive 65.7\n", "20 No Dead 73.2\n", "21 Yes Alive 38.3\n", "22 No Alive 33.4\n", "23 Yes Dead 62.3\n", "24 No Alive 18.0\n", "25 No Alive 56.2\n", "26 Yes Alive 59.2\n", "27 No Alive 25.8\n", "28 No Dead 36.9\n", "29 No Alive 20.2\n", "... ... ... ...\n", "1284 Yes Dead 36.0\n", "1285 Yes Alive 48.3\n", "1286 No Alive 63.1\n", "1287 No Alive 60.8\n", "1288 Yes Dead 39.3\n", "1289 No Alive 36.7\n", "1290 No Alive 63.8\n", "1291 No Dead 71.3\n", "1292 No Alive 57.7\n", "1293 No Alive 63.2\n", "1294 No Alive 46.6\n", "1295 Yes Dead 82.4\n", "1296 Yes Alive 38.3\n", "1297 Yes Alive 32.7\n", "1298 No Alive 39.7\n", "1299 Yes Dead 60.0\n", "1300 No Dead 71.0\n", "1301 No Alive 20.5\n", "1302 No Alive 44.4\n", "1303 Yes Alive 31.2\n", "1304 Yes Alive 47.8\n", "1305 Yes Alive 60.9\n", "1306 No Dead 61.4\n", "1307 Yes Alive 43.0\n", "1308 No Alive 42.1\n", "1309 Yes Alive 35.9\n", "1310 No Alive 22.3\n", "1311 Yes Dead 62.1\n", "1312 No Dead 88.6\n", "1313 No Alive 39.1\n", "\n", "[1314 rows x 3 columns]" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "raw_data = pd.read_csv(data_file)\n", "raw_data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "On vérifie qu'aucune ligne ne soit vide de valeur." ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SmokerStatusAge
\n", "
" ], "text/plain": [ "Empty DataFrame\n", "Columns: [Smoker, Status, Age]\n", "Index: []" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "raw_data[raw_data.isnull().any(axis=1)]\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Aucun soucis n'a été repéré sur les données, elles semblent être exploitables en l'état." ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "data=raw_data #we rename for coherence" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Première exploitation des données\n", "\n", "On effectue une analyse simple (simpliste?) sur les données. On commence par compter le nombre de fumeurs et non-fumeur" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Nombre de fumeurs = 582\n", "Nombre de non fumeurs = 732\n", "Taille de l'échantillon = 1314\n" ] } ], "source": [ "smokers=pd.DataFrame.sum(data['Smoker']=='Yes')\n", "print('Nombre de fumeurs =',smokers)\n", "non_smokers=pd.DataFrame.sum(data['Smoker']=='No')\n", "print('Nombre de non fumeurs =',non_smokers)\n", "total=smokers+non_smokers\n", "print('Taille de l\\'échantillon =',total)\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "On calcule maintenant le taux de mortalité pour ces deux groupes :" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Mortalité fumeur = 0.239\n", "Mortalité non fumeur = 0.314\n", "Mortalité de l'échantillon = 0.281\n" ] } ], "source": [ "deaths_smokers=pd.DataFrame.sum((data['Smoker']=='Yes')&(data['Status']=='Dead'))\n", "death_rate_smokers=deaths_smokers/smokers\n", "deaths_non_smokers=pd.DataFrame.sum((data['Smoker']=='No')&(data['Status']=='Dead'))\n", "death_rate_non_smokers=deaths_non_smokers/non_smokers\n", "death_rate_total=(deaths_smokers+deaths_non_smokers)/total\n", "print('Mortalité fumeur =',round(death_rate_smokers,3))\n", "print('Mortalité non fumeur =', round(death_rate_non_smokers,3))\n", "print('Mortalité de l\\'échantillon =',round(death_rate_total,3))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "On arrange ces informations sous forme d'un tableau" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Fumeurs Non-fumeurs Total\n", " ------------------------------------\n", "Taille du groupe 582 732 1314\n", "Vivant 443 502 945\n", "Mort 139 230 369\n", "Mortalité 0.239 0.314 0.281\n" ] } ], "source": [ "print(' Fumeurs Non-fumeurs Total')\n", "print(' ------------------------------------')\n", "print('Taille du groupe ',smokers,' ',non_smokers,' ',total)\n", "print('Vivant ',smokers-deaths_smokers,' ',non_smokers-deaths_non_smokers,' ',total-deaths_smokers-deaths_non_smokers)\n", "print('Mort ',deaths_smokers,' ',deaths_non_smokers,' ',deaths_smokers+deaths_non_smokers)\n", "print('Mortalité ',round(death_rate_smokers,3),' ',round(death_rate_non_smokers,3),' ',round(death_rate_total,3))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "On peut également les représenter sous forme de graphique circulaire" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "labels = 'Non-fumeurs', 'Fumeurs'\n", "sizes = [non_smokers/total,(smokers/total)]\n", "#explode = (0, 0.1, 0, 0) # only \"explode\" the 2nd slice (i.e. 'Hogs')\n", "\n", "fig1, ax1 = plt.subplots()\n", "ax1.pie(sizes, labels=labels,shadow=True,startangle=90,autopct='%1.1f%%')\n", "ax1.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle.\n", "plt.title('Répartition de l\\'échantillon')\n", "plt.show()\n" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "labels = 'Vivants', 'Morts'\n", "sizes = [1-death_rate_smokers,death_rate_smokers]\n", "explode = (0, 0.1)\n", "\n", "fig1, ax1 = plt.subplots()\n", "ax1.pie(sizes, labels=labels,explode=explode,startangle=90,shadow=True,autopct='%1.1f%%',colors=('green','red'))\n", "ax1.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle.\n", "plt.title('Mortalité échantillon de fumeurs')\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "labels = 'Vivants', 'Morts'\n", "sizes = [1-death_rate_non_smokers,death_rate_non_smokers]\n", "explode = (0, 0.1)\n", "\n", "fig1, ax1 = plt.subplots()\n", "ax1.pie(sizes, labels=labels,explode=explode,startangle=90,shadow=True,autopct='%1.1f%%',colors=('green','red'))\n", "ax1.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle.\n", "plt.title('Mortalité échantillon de non-fumeurs')\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Intervalle de confiance ?" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Il apparait alors que la mortalité est plus importante au sein de l'échantillon 'non-fumeur', une conclusion hâtive peut donc nous amener à mettre en doute la plus connues des inscription figurant sur les paquets de cigarettes actuels." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Prise en compte de l'âge\n", "\n", "Notre analyse précédante nous mêne à une contradiction avec le célèbre _Fumer Tue_. On se penche donc sur la répartition d'âge au sein des groupes afin de voir si cela peut mener à une explication.\n", "On commence par regrouper par tranche d'âge (18-34,34-54,55-64,65+)" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [], "source": [ "data.loc[data['Age']<35,'Categorie d\\'âge'] = 'A'\n", "data.loc[(data['Age']<55) & (data['Age']>=35),'Categorie d\\'âge'] = 'B'\n", "data.loc[(data['Age']<65) & (data['Age']>=55),'Categorie d\\'âge'] = 'C'\n", "data.loc[data['Age']>=65,'Categorie d\\'âge'] = 'D'" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SmokerStatusAgeCatégorie d'âgeCategorieCategorie d'âge
0YesAlive21.0ANaNA
1YesAlive19.3AAA
2NoDead57.5ANaNC
3NoAlive47.1ANaNB
4YesAlive81.4ANaND
5NoAlive36.8ANaNB
6NoAlive23.8ANaNA
7YesDead57.5ANaNC
8YesAlive24.8ANaNA
9YesAlive49.5ANaNB
10YesAlive30.0ANaNA
11NoDead66.0ANaND
12YesAlive49.2ANaNB
13NoAlive58.4ANaNC
14NoDead60.6ANaNC
15NoAlive25.1ANaNA
16NoAlive43.5ANaNB
17NoAlive27.1ANaNA
18NoAlive58.3ANaNC
19YesAlive65.7ANaND
20NoDead73.2ANaND
21YesAlive38.3ANaNB
22NoAlive33.4ANaNA
23YesDead62.3ANaNC
24NoAlive18.0AAA
25NoAlive56.2ANaNC
26YesAlive59.2ANaNC
27NoAlive25.8ANaNA
28NoDead36.9ANaNB
29NoAlive20.2AAA
.....................
1284YesDead36.0ANaNB
1285YesAlive48.3ANaNB
1286NoAlive63.1ANaNC
1287NoAlive60.8ANaNC
1288YesDead39.3ANaNB
1289NoAlive36.7ANaNB
1290NoAlive63.8ANaNC
1291NoDead71.3ANaND
1292NoAlive57.7ANaNC
1293NoAlive63.2ANaNC
1294NoAlive46.6ANaNB
1295YesDead82.4ANaND
1296YesAlive38.3ANaNB
1297YesAlive32.7ANaNA
1298NoAlive39.7ANaNB
1299YesDead60.0ANaNC
1300NoDead71.0ANaND
1301NoAlive20.5AAA
1302NoAlive44.4ANaNB
1303YesAlive31.2ANaNA
1304YesAlive47.8ANaNB
1305YesAlive60.9ANaNC
1306NoDead61.4ANaNC
1307YesAlive43.0ANaNB
1308NoAlive42.1ANaNB
1309YesAlive35.9ANaNB
1310NoAlive22.3ANaNA
1311YesDead62.1ANaNC
1312NoDead88.6ANaND
1313NoAlive39.1ANaNB
\n", "

1314 rows × 6 columns

\n", "
" ], "text/plain": [ " Smoker Status Age Catégorie d'âge Categorie Categorie d'âge\n", "0 Yes Alive 21.0 A NaN A\n", "1 Yes Alive 19.3 A A A\n", "2 No Dead 57.5 A NaN C\n", "3 No Alive 47.1 A NaN B\n", "4 Yes Alive 81.4 A NaN D\n", "5 No Alive 36.8 A NaN B\n", "6 No Alive 23.8 A NaN A\n", "7 Yes Dead 57.5 A NaN C\n", "8 Yes Alive 24.8 A NaN A\n", "9 Yes Alive 49.5 A NaN B\n", "10 Yes Alive 30.0 A NaN A\n", "11 No Dead 66.0 A NaN D\n", "12 Yes Alive 49.2 A NaN B\n", "13 No Alive 58.4 A NaN C\n", "14 No Dead 60.6 A NaN C\n", "15 No Alive 25.1 A NaN A\n", "16 No Alive 43.5 A NaN B\n", "17 No Alive 27.1 A NaN A\n", "18 No Alive 58.3 A NaN C\n", "19 Yes Alive 65.7 A NaN D\n", "20 No Dead 73.2 A NaN D\n", "21 Yes Alive 38.3 A NaN B\n", "22 No Alive 33.4 A NaN A\n", "23 Yes Dead 62.3 A NaN C\n", "24 No Alive 18.0 A A A\n", "25 No Alive 56.2 A NaN C\n", "26 Yes Alive 59.2 A NaN C\n", "27 No Alive 25.8 A NaN A\n", "28 No Dead 36.9 A NaN B\n", "29 No Alive 20.2 A A A\n", "... ... ... ... ... ... ...\n", "1284 Yes Dead 36.0 A NaN B\n", "1285 Yes Alive 48.3 A NaN B\n", "1286 No Alive 63.1 A NaN C\n", "1287 No Alive 60.8 A NaN C\n", "1288 Yes Dead 39.3 A NaN B\n", "1289 No Alive 36.7 A NaN B\n", "1290 No Alive 63.8 A NaN C\n", "1291 No Dead 71.3 A NaN D\n", "1292 No Alive 57.7 A NaN C\n", "1293 No Alive 63.2 A NaN C\n", "1294 No Alive 46.6 A NaN B\n", "1295 Yes Dead 82.4 A NaN D\n", "1296 Yes Alive 38.3 A NaN B\n", "1297 Yes Alive 32.7 A NaN A\n", "1298 No Alive 39.7 A NaN B\n", "1299 Yes Dead 60.0 A NaN C\n", "1300 No Dead 71.0 A NaN D\n", "1301 No Alive 20.5 A A A\n", "1302 No Alive 44.4 A NaN B\n", "1303 Yes Alive 31.2 A NaN A\n", "1304 Yes Alive 47.8 A NaN B\n", "1305 Yes Alive 60.9 A NaN C\n", "1306 No Dead 61.4 A NaN C\n", "1307 Yes Alive 43.0 A NaN B\n", "1308 No Alive 42.1 A NaN B\n", "1309 Yes Alive 35.9 A NaN B\n", "1310 No Alive 22.3 A NaN A\n", "1311 Yes Dead 62.1 A NaN C\n", "1312 No Dead 88.6 A NaN D\n", "1313 No Alive 39.1 A NaN B\n", "\n", "[1314 rows x 6 columns]" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.4" } }, "nbformat": 4, "nbformat_minor": 2 }