From 3a50438364e592caf3c1b3f200dd21bbb7f0ac0e Mon Sep 17 00:00:00 2001 From: fca8854efe3446a788acc25bed5b298c Date: Fri, 9 Sep 2022 13:10:36 +0000 Subject: [PATCH] no commit message --- module3/exo3/exercice.ipynb | 1466 ++++++++++++++++++++++++++++++++++- 1 file changed, 1463 insertions(+), 3 deletions(-) diff --git a/module3/exo3/exercice.ipynb b/module3/exo3/exercice.ipynb index 0bbbe37..6289c5a 100644 --- a/module3/exo3/exercice.ipynb +++ b/module3/exo3/exercice.ipynb @@ -1,5 +1,1466 @@ { - "cells": [], + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Sujet 6 : Autour du Paradoxe de Simpson\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Dans un premier temps j'importe l'ensemble des bibliotèques necessaires pour réaliser l'exercice. " + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "import pandas as pd \n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Puis j'importe les données brute grâce à l'URL associé. Ici je garde la première ligne qui correspond aux légende \"smoker, Status et Age\"" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
SmokerStatusAge
0YesAlive21.0
1YesAlive19.3
2NoDead57.5
3NoAlive47.1
4YesAlive81.4
5NoAlive36.8
6NoAlive23.8
7YesDead57.5
8YesAlive24.8
9YesAlive49.5
10YesAlive30.0
11NoDead66.0
12YesAlive49.2
13NoAlive58.4
14NoDead60.6
15NoAlive25.1
16NoAlive43.5
17NoAlive27.1
18NoAlive58.3
19YesAlive65.7
20NoDead73.2
21YesAlive38.3
22NoAlive33.4
23YesDead62.3
24NoAlive18.0
25NoAlive56.2
26YesAlive59.2
27NoAlive25.8
28NoDead36.9
29NoAlive20.2
............
1284YesDead36.0
1285YesAlive48.3
1286NoAlive63.1
1287NoAlive60.8
1288YesDead39.3
1289NoAlive36.7
1290NoAlive63.8
1291NoDead71.3
1292NoAlive57.7
1293NoAlive63.2
1294NoAlive46.6
1295YesDead82.4
1296YesAlive38.3
1297YesAlive32.7
1298NoAlive39.7
1299YesDead60.0
1300NoDead71.0
1301NoAlive20.5
1302NoAlive44.4
1303YesAlive31.2
1304YesAlive47.8
1305YesAlive60.9
1306NoDead61.4
1307YesAlive43.0
1308NoAlive42.1
1309YesAlive35.9
1310NoAlive22.3
1311YesDead62.1
1312NoDead88.6
1313NoAlive39.1
\n", + "

1314 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " Smoker Status Age\n", + "0 Yes Alive 21.0\n", + "1 Yes Alive 19.3\n", + "2 No Dead 57.5\n", + "3 No Alive 47.1\n", + "4 Yes Alive 81.4\n", + "5 No Alive 36.8\n", + "6 No Alive 23.8\n", + "7 Yes Dead 57.5\n", + "8 Yes Alive 24.8\n", + "9 Yes Alive 49.5\n", + "10 Yes Alive 30.0\n", + "11 No Dead 66.0\n", + "12 Yes Alive 49.2\n", + "13 No Alive 58.4\n", + "14 No Dead 60.6\n", + "15 No Alive 25.1\n", + "16 No Alive 43.5\n", + "17 No Alive 27.1\n", + "18 No Alive 58.3\n", + "19 Yes Alive 65.7\n", + "20 No Dead 73.2\n", + "21 Yes Alive 38.3\n", + "22 No Alive 33.4\n", + "23 Yes Dead 62.3\n", + "24 No Alive 18.0\n", + "25 No Alive 56.2\n", + "26 Yes Alive 59.2\n", + "27 No Alive 25.8\n", + "28 No Dead 36.9\n", + "29 No Alive 20.2\n", + "... ... ... ...\n", + "1284 Yes Dead 36.0\n", + "1285 Yes Alive 48.3\n", + "1286 No Alive 63.1\n", + "1287 No Alive 60.8\n", + "1288 Yes Dead 39.3\n", + "1289 No Alive 36.7\n", + "1290 No Alive 63.8\n", + "1291 No Dead 71.3\n", + "1292 No Alive 57.7\n", + "1293 No Alive 63.2\n", + "1294 No Alive 46.6\n", + "1295 Yes Dead 82.4\n", + "1296 Yes Alive 38.3\n", + "1297 Yes Alive 32.7\n", + "1298 No Alive 39.7\n", + "1299 Yes Dead 60.0\n", + "1300 No Dead 71.0\n", + "1301 No Alive 20.5\n", + "1302 No Alive 44.4\n", + "1303 Yes Alive 31.2\n", + "1304 Yes Alive 47.8\n", + "1305 Yes Alive 60.9\n", + "1306 No Dead 61.4\n", + "1307 Yes Alive 43.0\n", + "1308 No Alive 42.1\n", + "1309 Yes Alive 35.9\n", + "1310 No Alive 22.3\n", + "1311 Yes Dead 62.1\n", + "1312 No Dead 88.6\n", + "1313 No Alive 39.1\n", + "\n", + "[1314 rows x 3 columns]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "raw_data = pd.read_csv(\"https://gitlab.inria.fr/learninglab/mooc-rr/mooc-rr-ressources/-/raw/master/module3/Practical_session/Subject6_smoking.csv?inline=false\")\n", + "raw_data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Je vérifie qu'il n'y ai pas de ligne avec des données manquantes dans une des lignes, auquel cas je devrais la supprimer." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "hideOutput": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
SmokerStatusAge
\n", + "
" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: [Smoker, Status, Age]\n", + "Index: []" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "raw_data[raw_data.isnull().any(axis=1)] " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "La seule ligne qui contient un manque d'information est la premère qui correspond en fait aux légendes. Du coup aucune lignes de \"vrai\" données n'est pas complète, je n'ai donc pas besoin de supprimer de ligne." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hideOutput": true + }, + "source": [ + "## Question 1" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Je ne sais pas faire de tableau (j'apprends tout justz à coder); Par contre je sais calculer le taux de mortalité (dead/alive) pour chaque groupe" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Je commence par regarder ce qu'il se passe chez les fumeuse. Pour commencer je selectionne que les fumeuse et je regarde combien il y en a." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Smoker Status Age\n", + "0 Yes Alive 21.0\n", + "1 Yes Alive 19.3\n", + "4 Yes Alive 81.4\n", + "7 Yes Dead 57.5\n", + "8 Yes Alive 24.8\n", + "9 Yes Alive 49.5\n", + "10 Yes Alive 30.0\n", + "12 Yes Alive 49.2\n", + "19 Yes Alive 65.7\n", + "21 Yes Alive 38.3\n", + "23 Yes Dead 62.3\n", + "26 Yes Alive 59.2\n", + "30 Yes Alive 34.6\n", + "31 Yes Alive 51.9\n", + "32 Yes Alive 49.9\n", + "35 Yes Alive 46.7\n", + "36 Yes Alive 44.4\n", + "37 Yes Alive 29.5\n", + "38 Yes Dead 33.0\n", + "39 Yes Alive 35.6\n", + "40 Yes Alive 39.1\n", + "42 Yes Alive 35.7\n", + "46 Yes Dead 44.3\n", + "48 Yes Alive 37.5\n", + "49 Yes Alive 22.1\n", + "53 Yes Alive 39.0\n", + "56 Yes Alive 40.1\n", + "60 Yes Alive 58.1\n", + "61 Yes Alive 37.3\n", + "63 Yes Dead 36.3\n", + "... ... ... ...\n", + "1240 Yes Alive 29.7\n", + "1243 Yes Alive 40.1\n", + "1251 Yes Alive 27.8\n", + "1252 Yes Alive 52.4\n", + "1253 Yes Alive 27.8\n", + "1254 Yes Alive 41.0\n", + "1259 Yes Alive 40.8\n", + "1260 Yes Alive 20.4\n", + "1263 Yes Alive 20.9\n", + "1264 Yes Alive 45.5\n", + "1269 Yes Alive 38.8\n", + "1270 Yes Alive 55.5\n", + "1271 Yes Alive 24.9\n", + "1273 Yes Alive 55.7\n", + "1276 Yes Alive 58.5\n", + "1278 Yes Alive 43.7\n", + "1282 Yes Alive 51.2\n", + "1284 Yes Dead 36.0\n", + "1285 Yes Alive 48.3\n", + "1288 Yes Dead 39.3\n", + "1295 Yes Dead 82.4\n", + "1296 Yes Alive 38.3\n", + "1297 Yes Alive 32.7\n", + "1299 Yes Dead 60.0\n", + "1303 Yes Alive 31.2\n", + "1304 Yes Alive 47.8\n", + "1305 Yes Alive 60.9\n", + "1307 Yes Alive 43.0\n", + "1309 Yes Alive 35.9\n", + "1311 Yes Dead 62.1\n", + "\n", + "[582 rows x 3 columns]\n", + "582\n" + ] + } + ], + "source": [ + "fumeuse = raw_data[raw_data[\"Smoker\"] == \"Yes\"]\n", + "print(fumeuse)\n", + "print(len(fumeuse))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Puis à partir des fumeuse je regarde celles qui sont mortes" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Smoker Status Age\n", + "7 Yes Dead 57.5\n", + "23 Yes Dead 62.3\n", + "38 Yes Dead 33.0\n", + "46 Yes Dead 44.3\n", + "63 Yes Dead 36.3\n", + "64 Yes Dead 80.7\n", + "87 Yes Dead 53.6\n", + "97 Yes Dead 55.5\n", + "101 Yes Dead 61.0\n", + "109 Yes Dead 62.8\n", + "112 Yes Dead 66.5\n", + "122 Yes Dead 63.8\n", + "129 Yes Dead 87.8\n", + "132 Yes Dead 35.7\n", + "136 Yes Dead 71.7\n", + "139 Yes Dead 40.8\n", + "158 Yes Dead 59.4\n", + "165 Yes Dead 59.9\n", + "170 Yes Dead 62.3\n", + "171 Yes Dead 48.4\n", + "190 Yes Dead 78.3\n", + "199 Yes Dead 68.4\n", + "201 Yes Dead 89.2\n", + "204 Yes Dead 57.4\n", + "207 Yes Dead 63.4\n", + "212 Yes Dead 57.6\n", + "223 Yes Dead 83.1\n", + "235 Yes Dead 47.8\n", + "248 Yes Dead 81.0\n", + "253 Yes Dead 58.9\n", + "... ... ... ...\n", + "991 Yes Dead 61.0\n", + "997 Yes Dead 82.0\n", + "1001 Yes Dead 83.6\n", + "1011 Yes Dead 44.3\n", + "1016 Yes Dead 32.6\n", + "1023 Yes Dead 56.5\n", + "1040 Yes Dead 78.3\n", + "1050 Yes Dead 55.7\n", + "1063 Yes Dead 42.9\n", + "1064 Yes Dead 56.1\n", + "1069 Yes Dead 44.9\n", + "1078 Yes Dead 52.6\n", + "1080 Yes Dead 88.6\n", + "1093 Yes Dead 84.3\n", + "1114 Yes Dead 31.3\n", + "1115 Yes Dead 63.3\n", + "1120 Yes Dead 49.6\n", + "1130 Yes Dead 39.2\n", + "1142 Yes Dead 71.0\n", + "1158 Yes Dead 41.7\n", + "1168 Yes Dead 45.0\n", + "1182 Yes Dead 85.2\n", + "1221 Yes Dead 56.2\n", + "1222 Yes Dead 87.9\n", + "1234 Yes Dead 63.3\n", + "1284 Yes Dead 36.0\n", + "1288 Yes Dead 39.3\n", + "1295 Yes Dead 82.4\n", + "1299 Yes Dead 60.0\n", + "1311 Yes Dead 62.1\n", + "\n", + "[139 rows x 3 columns]\n", + "139\n" + ] + } + ], + "source": [ + "fumeuse_morte = fumeuse[fumeuse[\"Status\"] == \"Dead\"]\n", + "print(fumeuse_morte)\n", + "print(len(fumeuse_morte))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Enfin je calcule le taux de mortalité chez les fumeuses" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.23883161512027493\n" + ] + } + ], + "source": [ + "mortalite_fumeuse = len(fumeuse_morte)/len(fumeuse)\n", + "print(mortalite_fumeuse)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Maintenant je regarde les non-fumeuse. " + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Smoker Status Age\n", + "2 No Dead 57.5\n", + "3 No Alive 47.1\n", + "5 No Alive 36.8\n", + "6 No Alive 23.8\n", + "11 No Dead 66.0\n", + "13 No Alive 58.4\n", + "14 No Dead 60.6\n", + "15 No Alive 25.1\n", + "16 No Alive 43.5\n", + "17 No Alive 27.1\n", + "18 No Alive 58.3\n", + "20 No Dead 73.2\n", + "22 No Alive 33.4\n", + "24 No Alive 18.0\n", + "25 No Alive 56.2\n", + "27 No Alive 25.8\n", + "28 No Dead 36.9\n", + "29 No Alive 20.2\n", + "33 No Alive 19.4\n", + "34 No Alive 56.9\n", + "41 No Dead 69.7\n", + "43 No Dead 75.8\n", + "44 No Alive 25.3\n", + "45 No Dead 83.0\n", + "47 No Alive 18.5\n", + "50 No Alive 82.8\n", + "51 No Alive 45.0\n", + "52 No Dead 73.3\n", + "54 No Alive 28.4\n", + "55 No Dead 73.7\n", + "... ... ... ...\n", + "1262 No Alive 41.2\n", + "1265 No Alive 26.7\n", + "1266 No Alive 41.8\n", + "1267 No Alive 33.7\n", + "1268 No Alive 56.5\n", + "1272 No Alive 33.0\n", + "1274 No Alive 25.7\n", + "1275 No Alive 19.5\n", + "1277 No Alive 23.4\n", + "1279 No Alive 34.4\n", + "1280 No Dead 83.9\n", + "1281 No Alive 34.9\n", + "1283 No Dead 86.3\n", + "1286 No Alive 63.1\n", + "1287 No Alive 60.8\n", + "1289 No Alive 36.7\n", + "1290 No Alive 63.8\n", + "1291 No Dead 71.3\n", + "1292 No Alive 57.7\n", + "1293 No Alive 63.2\n", + "1294 No Alive 46.6\n", + "1298 No Alive 39.7\n", + "1300 No Dead 71.0\n", + "1301 No Alive 20.5\n", + "1302 No Alive 44.4\n", + "1306 No Dead 61.4\n", + "1308 No Alive 42.1\n", + "1310 No Alive 22.3\n", + "1312 No Dead 88.6\n", + "1313 No Alive 39.1\n", + "\n", + "[732 rows x 3 columns]\n", + "732\n", + " Smoker Status Age\n", + "2 No Dead 57.5\n", + "11 No Dead 66.0\n", + "14 No Dead 60.6\n", + "20 No Dead 73.2\n", + "28 No Dead 36.9\n", + "41 No Dead 69.7\n", + "43 No Dead 75.8\n", + "45 No Dead 83.0\n", + "52 No Dead 73.3\n", + "55 No Dead 73.7\n", + "69 No Dead 77.6\n", + "70 No Dead 58.1\n", + "78 No Dead 85.0\n", + "82 No Dead 72.8\n", + "85 No Dead 55.9\n", + "99 No Dead 65.7\n", + "100 No Dead 76.5\n", + "104 No Dead 70.5\n", + "105 No Dead 81.8\n", + "114 No Dead 89.3\n", + "131 No Dead 88.4\n", + "134 No Dead 62.3\n", + "146 No Dead 26.3\n", + "147 No Dead 66.0\n", + "149 No Dead 58.3\n", + "150 No Dead 86.8\n", + "153 No Dead 75.8\n", + "155 No Dead 63.5\n", + "163 No Dead 89.7\n", + "169 No Dead 76.7\n", + "... ... ... ...\n", + "1151 No Dead 75.9\n", + "1152 No Dead 88.0\n", + "1153 No Dead 66.8\n", + "1156 No Dead 83.8\n", + "1164 No Dead 51.1\n", + "1166 No Dead 56.9\n", + "1171 No Dead 86.7\n", + "1173 No Dead 71.6\n", + "1174 No Dead 78.3\n", + "1176 No Dead 84.8\n", + "1185 No Dead 73.8\n", + "1194 No Dead 83.3\n", + "1196 No Dead 76.2\n", + "1199 No Dead 67.8\n", + "1214 No Dead 62.5\n", + "1224 No Dead 75.1\n", + "1225 No Dead 87.9\n", + "1238 No Dead 43.3\n", + "1241 No Dead 79.0\n", + "1242 No Dead 65.1\n", + "1246 No Dead 89.2\n", + "1255 No Dead 28.5\n", + "1258 No Dead 74.4\n", + "1261 No Dead 42.1\n", + "1280 No Dead 83.9\n", + "1283 No Dead 86.3\n", + "1291 No Dead 71.3\n", + "1300 No Dead 71.0\n", + "1306 No Dead 61.4\n", + "1312 No Dead 88.6\n", + "\n", + "[230 rows x 3 columns]\n", + "230\n" + ] + } + ], + "source": [ + "non_fumeuse = raw_data[raw_data[\"Smoker\"] == \"No\"]\n", + "print(non_fumeuse)\n", + "print(len(non_fumeuse))\n", + "\n", + "non_fumeuse_morte = non_fumeuse[non_fumeuse[\"Status\"] == \"Dead\"]\n", + "print(non_fumeuse_morte)\n", + "print(len(non_fumeuse_morte))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Avant de continuer je vérifie que le totale des non fumeuses et non fumeuses font bien 1314" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1314\n" + ] + } + ], + "source": [ + "print(len(non_fumeuse)+ len(fumeuse))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Maintenant que je suis rassurée je calcule le taux de mortalité chez les non fumeuses" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.31420765027322406\n" + ] + } + ], + "source": [ + "mortalite_non_fumeuse = len(non_fumeuse_morte)/len(non_fumeuse)\n", + "print(mortalite_non_fumeuse)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Ces résultats sont suprenants car ils tendant à dire que les femmes qui ne fumment pas ont un taux de mortalité plus élevé alors que l'on s'attend à l'inverse (PS: je n'ai pas envie de faire une représentation graphique)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Question 2\n", + "\n", + "Je refais les calcules du taux de mortalité en ajoutant les tranches d'âge 18-34 ans, 35-54 ans, 55-64 ans, plus de 65 ans." + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Smoker Status Age\n", + "38 Yes Dead 33.0\n", + "827 Yes Dead 22.6\n", + "972 Yes Dead 28.3\n", + "1016 Yes Dead 32.6\n", + "1114 Yes Dead 31.3\n", + "5\n", + " Smoker Status Age\n", + "46 Yes Dead 44.3\n", + "63 Yes Dead 36.3\n", + "87 Yes Dead 53.6\n", + "132 Yes Dead 35.7\n", + "139 Yes Dead 40.8\n", + "171 Yes Dead 48.4\n", + "235 Yes Dead 47.8\n", + "258 Yes Dead 35.4\n", + "310 Yes Dead 36.5\n", + "326 Yes Dead 35.2\n", + "330 Yes Dead 34.3\n", + "336 Yes Dead 42.3\n", + "372 Yes Dead 44.3\n", + "387 Yes Dead 38.5\n", + "401 Yes Dead 43.7\n", + "426 Yes Dead 35.5\n", + "450 Yes Dead 45.6\n", + "517 Yes Dead 35.7\n", + "535 Yes Dead 36.2\n", + "666 Yes Dead 46.6\n", + "727 Yes Dead 51.6\n", + "747 Yes Dead 53.6\n", + "757 Yes Dead 50.2\n", + "787 Yes Dead 36.9\n", + "864 Yes Dead 42.4\n", + "875 Yes Dead 44.9\n", + "937 Yes Dead 38.8\n", + "947 Yes Dead 37.1\n", + "950 Yes Dead 43.3\n", + "963 Yes Dead 34.5\n", + "976 Yes Dead 42.5\n", + "1011 Yes Dead 44.3\n", + "1063 Yes Dead 42.9\n", + "1069 Yes Dead 44.9\n", + "1078 Yes Dead 52.6\n", + "1120 Yes Dead 49.6\n", + "1130 Yes Dead 39.2\n", + "1158 Yes Dead 41.7\n", + "1168 Yes Dead 45.0\n", + "1284 Yes Dead 36.0\n", + "1288 Yes Dead 39.3\n", + "41\n", + " Smoker Status Age\n", + "7 Yes Dead 57.5\n", + "23 Yes Dead 62.3\n", + "97 Yes Dead 55.5\n", + "101 Yes Dead 61.0\n", + "109 Yes Dead 62.8\n", + "122 Yes Dead 63.8\n", + "158 Yes Dead 59.4\n", + "165 Yes Dead 59.9\n", + "170 Yes Dead 62.3\n", + "204 Yes Dead 57.4\n", + "207 Yes Dead 63.4\n", + "212 Yes Dead 57.6\n", + "253 Yes Dead 58.9\n", + "279 Yes Dead 56.7\n", + "283 Yes Dead 59.3\n", + "316 Yes Dead 60.1\n", + "379 Yes Dead 57.7\n", + "395 Yes Dead 58.0\n", + "402 Yes Dead 61.1\n", + "419 Yes Dead 56.9\n", + "478 Yes Dead 60.7\n", + "482 Yes Dead 63.4\n", + "493 Yes Dead 62.3\n", + "518 Yes Dead 56.6\n", + "531 Yes Dead 61.8\n", + "544 Yes Dead 63.2\n", + "549 Yes Dead 55.3\n", + "573 Yes Dead 55.2\n", + "626 Yes Dead 58.6\n", + "652 Yes Dead 62.4\n", + "675 Yes Dead 60.2\n", + "699 Yes Dead 62.1\n", + "734 Yes Dead 57.9\n", + "754 Yes Dead 62.7\n", + "764 Yes Dead 63.4\n", + "778 Yes Dead 61.1\n", + "785 Yes Dead 59.7\n", + "802 Yes Dead 56.5\n", + "805 Yes Dead 58.9\n", + "973 Yes Dead 61.9\n", + "983 Yes Dead 58.2\n", + "991 Yes Dead 61.0\n", + "1023 Yes Dead 56.5\n", + "1050 Yes Dead 55.7\n", + "1064 Yes Dead 56.1\n", + "1115 Yes Dead 63.3\n", + "1221 Yes Dead 56.2\n", + "1234 Yes Dead 63.3\n", + "1299 Yes Dead 60.0\n", + "1311 Yes Dead 62.1\n", + "50\n", + " Smoker Status Age\n", + "64 Yes Dead 80.7\n", + "112 Yes Dead 66.5\n", + "129 Yes Dead 87.8\n", + "136 Yes Dead 71.7\n", + "190 Yes Dead 78.3\n", + "199 Yes Dead 68.4\n", + "201 Yes Dead 89.2\n", + "223 Yes Dead 83.1\n", + "248 Yes Dead 81.0\n", + "305 Yes Dead 65.8\n", + "317 Yes Dead 77.6\n", + "333 Yes Dead 74.1\n", + "410 Yes Dead 67.5\n", + "414 Yes Dead 79.1\n", + "425 Yes Dead 75.9\n", + "452 Yes Dead 73.9\n", + "461 Yes Dead 80.5\n", + "468 Yes Dead 86.8\n", + "521 Yes Dead 78.2\n", + "525 Yes Dead 88.7\n", + "536 Yes Dead 87.9\n", + "541 Yes Dead 82.6\n", + "555 Yes Dead 76.9\n", + "558 Yes Dead 75.2\n", + "567 Yes Dead 66.1\n", + "572 Yes Dead 66.8\n", + "650 Yes Dead 81.8\n", + "657 Yes Dead 88.3\n", + "693 Yes Dead 65.6\n", + "700 Yes Dead 66.1\n", + "760 Yes Dead 67.2\n", + "913 Yes Dead 84.4\n", + "974 Yes Dead 74.8\n", + "997 Yes Dead 82.0\n", + "1001 Yes Dead 83.6\n", + "1040 Yes Dead 78.3\n", + "1080 Yes Dead 88.6\n", + "1093 Yes Dead 84.3\n", + "1142 Yes Dead 71.0\n", + "1182 Yes Dead 85.2\n", + "1222 Yes Dead 87.9\n", + "1295 Yes Dead 82.4\n", + "42\n" + ] + } + ], + "source": [ + "fumeuse_morte_18_34 = fumeuse[(fumeuse[\"Status\"]==\"Dead\") & (fumeuse[\"Age\"] >= 18) & (fumeuse[\"Age\"] <= 34)]\n", + "print(fumeuse_morte_18_34)\n", + "print(len(fumeuse_morte_18_34))\n", + "\n", + "fumeuse_morte_35_54 = fumeuse[(fumeuse[\"Status\"]==\"Dead\") & (fumeuse[\"Age\"] > 34) & (fumeuse[\"Age\"] <= 54)]\n", + "print(fumeuse_morte_35_54)\n", + "print(len(fumeuse_morte_35_54))\n", + "\n", + "fumeuse_morte_55_64 = fumeuse[(fumeuse[\"Status\"]==\"Dead\") & (fumeuse[\"Age\"] > 55) & (fumeuse[\"Age\"] <= 64)]\n", + "print(fumeuse_morte_55_64)\n", + "print(len(fumeuse_morte_55_64))\n", + "\n", + "fumeuse_morte_64plus = fumeuse[(fumeuse[\"Status\"]==\"Dead\") & (fumeuse[\"Age\"] > 64) ]\n", + "print(fumeuse_morte_64plus)\n", + "print(len(fumeuse_morte_64plus))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Parmis les fumeuses qui sont mortes, il y a 5 personnes qui ont entre 18 et 35 ans, 41 qui ont entre 35 et 54 ans, 50 qui ont entre 55 et 64 ans et 42 qui ont plus de 64 ans. " + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Smoker Status Age\n", + "146 No Dead 26.3\n", + "515 No Dead 25.3\n", + "564 No Dead 29.8\n", + "627 No Dead 29.3\n", + "674 No Dead 20.2\n", + "1255 No Dead 28.5\n", + "6\n", + " Smoker Status Age\n", + "28 No Dead 36.9\n", + "214 No Dead 35.2\n", + "298 No Dead 52.4\n", + "308 No Dead 47.9\n", + "343 No Dead 47.0\n", + "607 No Dead 47.2\n", + "684 No Dead 42.6\n", + "698 No Dead 47.3\n", + "729 No Dead 40.0\n", + "752 No Dead 40.1\n", + "852 No Dead 43.7\n", + "858 No Dead 41.9\n", + "914 No Dead 39.1\n", + "934 No Dead 41.3\n", + "970 No Dead 47.5\n", + "1020 No Dead 50.2\n", + "1164 No Dead 51.1\n", + "1238 No Dead 43.3\n", + "1261 No Dead 42.1\n", + "19\n", + " Smoker Status Age\n", + "2 No Dead 57.5\n", + "14 No Dead 60.6\n", + "70 No Dead 58.1\n", + "85 No Dead 55.9\n", + "134 No Dead 62.3\n", + "149 No Dead 58.3\n", + "155 No Dead 63.5\n", + "193 No Dead 60.1\n", + "224 No Dead 58.4\n", + "259 No Dead 55.1\n", + "269 No Dead 58.6\n", + "312 No Dead 56.1\n", + "322 No Dead 56.3\n", + "337 No Dead 63.2\n", + "390 No Dead 61.2\n", + "408 No Dead 57.2\n", + "412 No Dead 55.6\n", + "463 No Dead 59.0\n", + "511 No Dead 63.8\n", + "570 No Dead 56.2\n", + "588 No Dead 55.3\n", + "596 No Dead 62.7\n", + "616 No Dead 61.9\n", + "665 No Dead 55.9\n", + "669 No Dead 57.9\n", + "707 No Dead 58.4\n", + "721 No Dead 58.5\n", + "748 No Dead 60.6\n", + "776 No Dead 62.1\n", + "820 No Dead 56.1\n", + "839 No Dead 63.2\n", + "841 No Dead 63.8\n", + "846 No Dead 61.8\n", + "943 No Dead 64.0\n", + "959 No Dead 59.9\n", + "1065 No Dead 60.2\n", + "1092 No Dead 63.5\n", + "1166 No Dead 56.9\n", + "1214 No Dead 62.5\n", + "1306 No Dead 61.4\n", + "40\n", + " Smoker Status Age\n", + "11 No Dead 66.0\n", + "20 No Dead 73.2\n", + "41 No Dead 69.7\n", + "43 No Dead 75.8\n", + "45 No Dead 83.0\n", + "52 No Dead 73.3\n", + "55 No Dead 73.7\n", + "69 No Dead 77.6\n", + "78 No Dead 85.0\n", + "82 No Dead 72.8\n", + "99 No Dead 65.7\n", + "100 No Dead 76.5\n", + "104 No Dead 70.5\n", + "105 No Dead 81.8\n", + "114 No Dead 89.3\n", + "131 No Dead 88.4\n", + "147 No Dead 66.0\n", + "150 No Dead 86.8\n", + "153 No Dead 75.8\n", + "163 No Dead 89.7\n", + "169 No Dead 76.7\n", + "175 No Dead 82.9\n", + "181 No Dead 75.0\n", + "182 No Dead 66.3\n", + "183 No Dead 87.0\n", + "184 No Dead 79.5\n", + "196 No Dead 87.6\n", + "221 No Dead 65.8\n", + "230 No Dead 78.3\n", + "249 No Dead 84.3\n", + "... ... ... ...\n", + "1085 No Dead 84.7\n", + "1086 No Dead 85.0\n", + "1088 No Dead 85.0\n", + "1108 No Dead 89.9\n", + "1118 No Dead 74.8\n", + "1126 No Dead 89.5\n", + "1139 No Dead 71.1\n", + "1151 No Dead 75.9\n", + "1152 No Dead 88.0\n", + "1153 No Dead 66.8\n", + "1156 No Dead 83.8\n", + "1171 No Dead 86.7\n", + "1173 No Dead 71.6\n", + "1174 No Dead 78.3\n", + "1176 No Dead 84.8\n", + "1185 No Dead 73.8\n", + "1194 No Dead 83.3\n", + "1196 No Dead 76.2\n", + "1199 No Dead 67.8\n", + "1224 No Dead 75.1\n", + "1225 No Dead 87.9\n", + "1241 No Dead 79.0\n", + "1242 No Dead 65.1\n", + "1246 No Dead 89.2\n", + "1258 No Dead 74.4\n", + "1280 No Dead 83.9\n", + "1283 No Dead 86.3\n", + "1291 No Dead 71.3\n", + "1300 No Dead 71.0\n", + "1312 No Dead 88.6\n", + "\n", + "[165 rows x 3 columns]\n", + "165\n" + ] + } + ], + "source": [ + "non_fumeuse_morte_18_34 = non_fumeuse[(non_fumeuse[\"Status\"]==\"Dead\") & (non_fumeuse[\"Age\"] >= 18) & (non_fumeuse[\"Age\"] <= 34)]\n", + "print(non_fumeuse_morte_18_34)\n", + "print(len(non_fumeuse_morte_18_34))\n", + "\n", + "non_fumeuse_morte_35_54 = non_fumeuse[(non_fumeuse[\"Status\"]==\"Dead\") & (non_fumeuse[\"Age\"] > 34) & (non_fumeuse[\"Age\"] <= 54)]\n", + "print(non_fumeuse_morte_35_54)\n", + "print(len(non_fumeuse_morte_35_54))\n", + "\n", + "non_fumeuse_morte_55_64 = non_fumeuse[(non_fumeuse[\"Status\"]==\"Dead\") & (non_fumeuse[\"Age\"] > 55) & (non_fumeuse[\"Age\"] <= 64)]\n", + "print(non_fumeuse_morte_55_64)\n", + "print(len(non_fumeuse_morte_55_64))\n", + "\n", + "non_fumeuse_morte_64plus = non_fumeuse[(non_fumeuse[\"Status\"]==\"Dead\") & (non_fumeuse[\"Age\"] > 64) ]\n", + "print(non_fumeuse_morte_64plus)\n", + "print(len(non_fumeuse_morte_64plus))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Parmsi les non fumeuses qui sont mortes, il y a 6 personnes entre 18 et 34 ans, 19 entre 35 ans 54 ans, 40 entre 55 et 64 ans et 165 qui ont pluq sz 64 nq. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Je peux maintenant calculé le taux de mortalité dans chaque catégorie\n" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "mortalite_fumeuse_18_34 0.00859106529209622\n", + "mortalite_non_fumeuse_18_34 0.00819672131147541 \n", + "\n", + "mortalite_fumeuse_35_54 0.070446735395189\n", + "mortalite_non_fumeuse_35_54 0.03264604810996564 \n", + "\n", + "mortalite_fumeuse_55_64 0.0859106529209622\n", + "mortalite_non_fumeuse_55_64 0.06872852233676977 \n", + "\n", + "mortalite_fumeuse_64plus 0.07216494845360824\n", + "mortalite_non_fumeuse_64plus 0.22540983606557377\n" + ] + } + ], + "source": [ + "mortalite_fumeuse_18_34 = len(fumeuse_morte_18_34)/len(fumeuse)\n", + "print(\"mortalite_fumeuse_18_34\", mortalite_fumeuse_18_34 )\n", + "mortalite_non_fumeuse_18_34 = len(non_fumeuse_morte_18_34)/len(non_fumeuse)\n", + "print(\"mortalite_non_fumeuse_18_34\", mortalite_non_fumeuse_18_34, \"\\n\")\n", + "\n", + "mortalite_fumeuse_35_54 = len(fumeuse_morte_35_54)/len(fumeuse)\n", + "print(\"mortalite_fumeuse_35_54\", mortalite_fumeuse_35_54 )\n", + "mortalite_non_fumeuse_35_54 = len(non_fumeuse_morte_35_54)/len(fumeuse)\n", + "print(\"mortalite_non_fumeuse_35_54\", mortalite_non_fumeuse_35_54,\"\\n\")\n", + "\n", + "mortalite_fumeuse_55_64 = len(fumeuse_morte_55_64)/len(fumeuse)\n", + "print(\"mortalite_fumeuse_55_64\", mortalite_fumeuse_55_64 )\n", + "mortalite_non_fumeuse_55_64 = len(non_fumeuse_morte_55_64)/len(fumeuse)\n", + "print(\"mortalite_non_fumeuse_55_64\", mortalite_non_fumeuse_55_64, \"\\n\" )\n", + "\n", + "mortalite_fumeuse_64plus = len(fumeuse_morte_64plus)/len(fumeuse)\n", + "print(\"mortalite_fumeuse_64plus\", mortalite_fumeuse_64plus)\n", + "mortalite_non_fumeuse_64plus = len(non_fumeuse_morte_64plus)/len(non_fumeuse)\n", + "print(\"mortalite_non_fumeuse_64plus\", mortalite_non_fumeuse_64plus)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "On remarque que entre 18 et 34 ans, le taux de mortalité est quasiment le même entre les fumeuses et le non fumeuses. Cependant, entre 35 et 64 ans, le taux de mortalité chez les fumeuses et bien plus élevé chez les fumeuses. On peut donc en conclure que le tabac semble induire une mortalité précoce. Etonnament, chez les plus de 34 ans, le taux de mortalité est plus élevé chez les non fumeuses. On peut expliquer ces résultats par le fait que la plupart des femmes sont mortes avant 34 ans dans le groupe des non fumeuses" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Question 3" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Cette question est trop compliqué pour mon niveau de code et il ne reste pas assez de temps avant l'évaluation du MOOC pour que j'apprenne comment faire. Désolé" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], "metadata": { "kernelspec": { "display_name": "Python 3", @@ -16,10 +1477,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.3" + "version": "3.6.4" } }, "nbformat": 4, "nbformat_minor": 2 } - -- 2.18.1