From 1e241dd3729f2c28d190f8a2417a6f4cb43fec9d Mon Sep 17 00:00:00 2001 From: 19cb9366d9071c7a3249219d01022f01 <19cb9366d9071c7a3249219d01022f01@app-learninglab.inria.fr> Date: Sun, 16 Oct 2022 18:33:15 +0000 Subject: [PATCH] Paradoxe_de_Sympson.ipynb --- module3/exo3/Paradoxe_de_Sympson.ipynb | 1182 ++++++++++++++++++++++++ 1 file changed, 1182 insertions(+) create mode 100644 module3/exo3/Paradoxe_de_Sympson.ipynb diff --git a/module3/exo3/Paradoxe_de_Sympson.ipynb b/module3/exo3/Paradoxe_de_Sympson.ipynb new file mode 100644 index 0000000..b03fa30 --- /dev/null +++ b/module3/exo3/Paradoxe_de_Sympson.ipynb @@ -0,0 +1,1182 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Autour du paradoxe de Simpson" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "import matplotlib.pyplot as plt\n", + "from sklearn.metrics import roc_auc_score \n", + "from sklearn.metrics import roc_curve\n", + "import pandas as pd\n", + "import numpy as np\n", + "import isoweek" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "data_url = \"https://gitlab.inria.fr/learninglab/mooc-rr/mooc-rr-ressources/-/raw/master/module3/Practical_session/Subject6_smoking.csv\"" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
SmokerStatusAge
0YesAlive21.0
1YesAlive19.3
2NoDead57.5
3NoAlive47.1
4YesAlive81.4
5NoAlive36.8
6NoAlive23.8
7YesDead57.5
8YesAlive24.8
9YesAlive49.5
10YesAlive30.0
11NoDead66.0
12YesAlive49.2
13NoAlive58.4
14NoDead60.6
15NoAlive25.1
16NoAlive43.5
17NoAlive27.1
18NoAlive58.3
19YesAlive65.7
20NoDead73.2
21YesAlive38.3
22NoAlive33.4
23YesDead62.3
24NoAlive18.0
25NoAlive56.2
26YesAlive59.2
27NoAlive25.8
28NoDead36.9
29NoAlive20.2
............
1284YesDead36.0
1285YesAlive48.3
1286NoAlive63.1
1287NoAlive60.8
1288YesDead39.3
1289NoAlive36.7
1290NoAlive63.8
1291NoDead71.3
1292NoAlive57.7
1293NoAlive63.2
1294NoAlive46.6
1295YesDead82.4
1296YesAlive38.3
1297YesAlive32.7
1298NoAlive39.7
1299YesDead60.0
1300NoDead71.0
1301NoAlive20.5
1302NoAlive44.4
1303YesAlive31.2
1304YesAlive47.8
1305YesAlive60.9
1306NoDead61.4
1307YesAlive43.0
1308NoAlive42.1
1309YesAlive35.9
1310NoAlive22.3
1311YesDead62.1
1312NoDead88.6
1313NoAlive39.1
\n", + "

1314 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " Smoker Status Age\n", + "0 Yes Alive 21.0\n", + "1 Yes Alive 19.3\n", + "2 No Dead 57.5\n", + "3 No Alive 47.1\n", + "4 Yes Alive 81.4\n", + "5 No Alive 36.8\n", + "6 No Alive 23.8\n", + "7 Yes Dead 57.5\n", + "8 Yes Alive 24.8\n", + "9 Yes Alive 49.5\n", + "10 Yes Alive 30.0\n", + "11 No Dead 66.0\n", + "12 Yes Alive 49.2\n", + "13 No Alive 58.4\n", + "14 No Dead 60.6\n", + "15 No Alive 25.1\n", + "16 No Alive 43.5\n", + "17 No Alive 27.1\n", + "18 No Alive 58.3\n", + "19 Yes Alive 65.7\n", + "20 No Dead 73.2\n", + "21 Yes Alive 38.3\n", + "22 No Alive 33.4\n", + "23 Yes Dead 62.3\n", + "24 No Alive 18.0\n", + "25 No Alive 56.2\n", + "26 Yes Alive 59.2\n", + "27 No Alive 25.8\n", + "28 No Dead 36.9\n", + "29 No Alive 20.2\n", + "... ... ... ...\n", + "1284 Yes Dead 36.0\n", + "1285 Yes Alive 48.3\n", + "1286 No Alive 63.1\n", + "1287 No Alive 60.8\n", + "1288 Yes Dead 39.3\n", + "1289 No Alive 36.7\n", + "1290 No Alive 63.8\n", + "1291 No Dead 71.3\n", + "1292 No Alive 57.7\n", + "1293 No Alive 63.2\n", + "1294 No Alive 46.6\n", + "1295 Yes Dead 82.4\n", + "1296 Yes Alive 38.3\n", + "1297 Yes Alive 32.7\n", + "1298 No Alive 39.7\n", + "1299 Yes Dead 60.0\n", + "1300 No Dead 71.0\n", + "1301 No Alive 20.5\n", + "1302 No Alive 44.4\n", + "1303 Yes Alive 31.2\n", + "1304 Yes Alive 47.8\n", + "1305 Yes Alive 60.9\n", + "1306 No Dead 61.4\n", + "1307 Yes Alive 43.0\n", + "1308 No Alive 42.1\n", + "1309 Yes Alive 35.9\n", + "1310 No Alive 22.3\n", + "1311 Yes Dead 62.1\n", + "1312 No Dead 88.6\n", + "1313 No Alive 39.1\n", + "\n", + "[1314 rows x 3 columns]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "raw_data = pd.read_csv(data_url)\n", + "raw_data" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "data = raw_data.copy()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "fumeuse_vivante 443\n", + "fumeuse_decedee 139\n", + "non_fumeuse_vivante 502\n", + "non_fumeuse_decedee 230\n", + "nb de fumeuses 582\n", + "nb de non fumeuses 732\n", + "nb femmes 1314\n" + ] + } + ], + "source": [ + "fumeuse_vivante=0\n", + "fumeuse_decedee=0\n", + "non_fumeuse_vivante=0\n", + "non_fumeuse_decedee=0\n", + "\n", + "\n", + "for i in range(1314):\n", + " if (data['Smoker'][i]== 'Yes') and (data['Status'][i] == 'Alive'): # si elle est fumeuse et qu'elle est vivante \n", + " fumeuse_vivante= fumeuse_vivante+1\n", + " \n", + " elif (data['Smoker'][i]== 'Yes') and (data['Status'][i] == 'Dead'): # si elle est fumeuse et qu'elle est décédée \n", + " fumeuse_decedee= fumeuse_decedee+1 \n", + " \n", + " elif (data['Smoker'][i]== 'No') and (data['Status'][i] == 'Alive'): # si elle est non fumeuse et qu'elle est vivante \n", + " non_fumeuse_vivante= non_fumeuse_vivante+1\n", + " \n", + " elif (data['Smoker'][i]== 'No') and (data['Status'][i] == 'Dead'): # si elle est non fumeuse et qu'elle est décédée\n", + " non_fumeuse_decedee= non_fumeuse_decedee+1\n", + " \n", + "print('fumeuse_vivante',fumeuse_vivante)\n", + "print('fumeuse_decedee',fumeuse_decedee)\n", + "print('non_fumeuse_vivante',non_fumeuse_vivante)\n", + "print('non_fumeuse_decedee',non_fumeuse_decedee)\n", + "\n", + "print('nb de fumeuses', fumeuse_vivante+ fumeuse_decedee)\n", + "print('nb de non fumeuses', non_fumeuse_vivante+ non_fumeuse_decedee)\n", + "\n", + "\n", + "\n", + "\n", + "nb_femmes= fumeuse_vivante+fumeuse_decedee+non_fumeuse_vivante+non_fumeuse_decedee\n", + "print('nb femmes', nb_femmes)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# Effectif dans chaque catégorie \n", + "Nombres = {\"Smoker_Alive\": fumeuse_vivante, \"Smoker_Dead\": fumeuse_decedee, \"No_Smoker_Alive\": non_fumeuse_vivante, \"No_Smoker_Dead\": non_fumeuse_decedee}\n", + "\n", + "x = [1,2,3,4]\n", + "height = [fumeuse_vivante, fumeuse_decedee, non_fumeuse_vivante, non_fumeuse_decedee]\n", + "width = 0.05\n", + "BarName = ['Fumeuse vivante', 'Fumeuse décédée', 'Non fumeuse vivante', 'Non fumeuse décédée']\n", + "\n", + "plt.bar(x, height , width )\n", + "\n", + "plt.xlim(0,4.5)\n", + "plt.ylim(0,510)\n", + "plt.ylabel('Effectifs ')\n", + "plt.xticks(x, BarName, rotation=40)\n", + "\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "taux de mortalité des fumeuses 0.23883161512027493\n", + "taux de mortalité des non fumeuses 0.31420765027322406\n" + ] + } + ], + "source": [ + "#Taux de mortalité\n", + "\n", + "taux_mortalite_fumeuse= Nombres[\"Smoker_Dead\"]/ (Nombres[\"Smoker_Dead\"]+Nombres[\"Smoker_Alive\"])\n", + "taux_mortalite_non_fumeuse= Nombres[\"No_Smoker_Dead\"]/ (Nombres[\"No_Smoker_Dead\"]+Nombres[\"No_Smoker_Alive\"])\n", + "\n", + "print('taux de mortalité des fumeuses',taux_mortalite_fumeuse)\n", + "print('taux de mortalité des non fumeuses',taux_mortalite_non_fumeuse)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYwAAAElCAYAAAD3KtVsAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAHABJREFUeJzt3X2YH2V97/H3x8RIjTyorKAJUcS0mLagEAMKilFBQG1AsIAoyoMRjyn1cKyHeo62Ph5ti56DDcZoo0XF2HrARggCRSxHgZKgPAgSXAPIGpTwLAiEwOf8MbP6Y/nt7r0hs7/J7ud1XXvtb2bumf1yXRs+O/fMfd+yTURExGie0usCIiJiy5DAiIiIIgmMiIgoksCIiIgiCYyIiCiSwIiIiCKNBoakAyWtkdQv6ZQuxxdIukbSVZJWS9q349jNkq4dPNZknRERMTo1NQ5D0hTgRmB/YABYBRxl+/qONs8AHrBtSbsB/2J71/rYzcBc23c0UmBERIxJk3cY84B+22ttbwCWAws6G9i+379PrOlARhFGRLRUk4ExA7i1Y3ug3vc4kg6VdANwLnBcxyEDF0i6UtLCBuuMiIgCUxu8trrse8IdhO2zgbMlvQr4GPC6+tA+ttdJeg5woaQbbF/yhB9ShclCgOnTp++56667brb/gIiIie7KK6+8w3ZfSdsmA2MA2KljeyawbrjGti+RtIuk7W3fYXtdvf92SWdTdXE9ITBsLwWWAsydO9erV+f5eEREKUm3lLZtsktqFTBb0s6SpgFHAis6G0h6kSTVn/cApgF3Spouaet6/3TgAOAnDdYaERGjaOwOw/ZGSYuA84EpwDLb10k6sT6+BDgMOEbSI8CDwBH1G1M7UHVTDdZ4pu3vNlVrRESMrrHXanshXVIREWMj6Urbc0vaZqR3REQUSWBERESRBEZERBRJYERERJEERkREFElgREREkQRGREQUSWBERESRBEZERBRJYERERJEERkREFElgREREkQRGREQUSWBERESRBEZERBRJYERERJEERkREFGlsidaIiDZ5wSnnDnvs5k+9YRwr2XLlDiMiIookMCIiokgCIyIiiiQwIiKiSKOBIelASWsk9Us6pcvxBZKukXSVpNWS9i09NyIixldjgSFpCrAYOAiYAxwlac6QZhcBu9t+CXAc8KUxnBsREeOoyTuMeUC/7bW2NwDLgQWdDWzfb9v15nTApedGRMT4ajIwZgC3dmwP1PseR9Khkm4AzqW6yyg+NyIixk+TgaEu+/yEHfbZtncFDgE+NpZzASQtrJ9/rF6/fv0mFxsRESNrMjAGgJ06tmcC64ZrbPsSYBdJ24/lXNtLbc+1Pbevr+/JVx0REV01GRirgNmSdpY0DTgSWNHZQNKLJKn+vAcwDbiz5NyIiBhfjc0lZXujpEXA+cAUYJnt6ySdWB9fAhwGHCPpEeBB4Ij6IXjXc5uqNSIiRtfo5IO2VwIrh+xb0vH508CnS8+NiIjeyUjviIgoksCIiIgiCYyIiCiSwIiIiCIJjIiIKJLAiIiIIgmMiIgoksCIiIgiCYyIiCiSwIiIiCIJjIiIKJLAiIiIIgmMiIgoksCIiIgiCYyIiCiSwIiIiCIJjIiIKJLAiIiIIgmMiIgoksCIiIgiCYyIiCiSwIiIiCKNBoakAyWtkdQv6ZQux4+WdE39damk3TuO3SzpWklXSVrdZJ0RETG6qU1dWNIUYDGwPzAArJK0wvb1Hc1uAvazfbekg4ClwF4dx+fbvqOpGiMiolyTdxjzgH7ba21vAJYDCzob2L7U9t315uXAzAbriYiIJ6HJwJgB3NqxPVDvG87xwHkd2wYukHSlpIUN1BcREWPQWJcUoC773LWhNJ8qMPbt2L2P7XWSngNcKOkG25d0OXchsBBg1qxZT77qiIjoqsk7jAFgp47tmcC6oY0k7QZ8CVhg+87B/bbX1d9vB86m6uJ6AttLbc+1Pbevr28zlh8REZ2aDIxVwGxJO0uaBhwJrOhsIGkWcBbwdts3duyfLmnrwc/AAcBPGqw1IiJG0ViXlO2NkhYB5wNTgGW2r5N0Yn18CfBh4NnA6ZIANtqeC+wAnF3vmwqcafu7TdUaERGja/IZBrZXAiuH7FvS8fkE4IQu560Fdh+6PyIieqe4S0rS05osJCIi2q1rYEia2vF5nqRrgZ/V27tL+tw41RcRES0xXJfUuyX9yPZlwGnAG4FvA9i+un4NNqKVXnDKucMeu/lTbxjHSiImluG6pL4AHD3YxvYtQ44/2lxJERHRRl3vMGxvBBbVm7dKmge4nh/qL4Abu50XERETV8lD7/cAJwOzgF8DewMnNllURES0T8lrtS+0fWTnDkl7A3cO0z4iIiagkjuM07vsW7y5C4mIiHYb9g6jfm7xcqBP0kkdh7YBntp0YRER0S4jdUlNB7av23TO6vcb4C1NFhUREe0zbGDYvhi4WNKX66k6IiJiEhupS+pU2/8NOFXSE9axsP3mRiuLiIhWGalL6pv1938cj0IiIqLdRuqSuqL+ftH4lRMREW01UpfUjxlmSVUA23s0UlFERLTSSF1Sh49bFRER0XojdUn9fDwLiYiIdht1pLekl0m6XNK9kh6S9LCk+8ajuIiIaI+SuaROB94GLAfmAe8EdmqwpoiIaKGSuaSeYnsNMNX2I7a/CLyu4boiIqJlSu4wHpA0Dbha0ieB24BnNFtWRES0TckdxjvrdouoVtqbDRzWYE0REdFCJYFxsO2HbN9j+0O2TwJeX3JxSQdKWiOpX9IpXY4fLema+utSSbuXnhsREeOrJDCO67Lv+NFOqpdzXQwcBMwBjpI0Z0izm4D9bO8GfAxYOoZzIyJiHI000vsI4EjghZLO6ji0DXBPwbXnAf2DM91KWg4sAK4fbGD70o72lwMzS8+NiIjxNdJD7yuolmGdyeNX2PsN8OOCa88Abu3YHgD2GqH98cB5m3huREQ0bKSR3jdJ+gVw1yZOQKhul+3aUJpPFRj7bsK5C4GFALNmzRp7lRERUWTEZxi2HwU2SNpmE649wOMH+M0E1g1tJGk34EvAAtt3juXcusaltufantvX19etSUREbAYl4zDupxqDcQHwwOBO2yePct4qYLaknYFfUj0PeWtnA0mzgLOAt9u+cSznRkTE+CoJjH+vv8bE9kZJi4DzgSnAMtvXSTqxPr4E+DDwbOB0SQAb67uFrueOtYaIiNh8Rg0M2/8kaSrwonpXv+2NJRe3vRJYOWTfko7PJwAnlJ4bERG9M2pgSHol8FWqriEBO0p6u+0fNl1cRES0R0mX1GepRntfDyDpxVQBMrfJwiIiol1KRnpPGwwLANs/BaY1V1JERLRRyR3GjyR9gequAuBoygbuRUTEBFISGCcCJwEfoHqGcQnwuSaLioiI9il5S+oh4O/qr4iImKRK1vQ+UNIqSbdLukvS3ZLuGo/iIiKiPUq6pP4R+HPgWuCxZsuJiIi2KgmMAeAq2wmLiIhJrCQwPgB8R9L3gYcHd9o+ramiIiKifUoC4yPAI8B2pEsqImLSKgmM59jes/FKIiKi1UpGel8k6TWNVxIREa1WEhjvAv5d0v15rTYiYvIq6ZLavvEqIiKi9UpGej86HoVERES7lXRJRUREJDAiIqJMUWBI2lvSMfXnZ0ua1WxZERHRNiVLtP5PYB9gF+AMYCvgTGDfZkuLiIg2KbnDOBw4GHgAwPYvgW2aLCoiItqnJDAetm3AAJKe3mxJERHRRiWBcZakxcC2ko4FLgCWlVy8XktjjaR+Sad0Ob6rpMskPSzp/UOO3SzpWklXSVpd8vMiIqI5JeMwPi3pIGADsDvwCdvnjXaepCnAYmB/qinSV0laYfv6jmZ3US3/esgwl5lv+47RflZERDSvZKQ3dUCMGhJDzAP6ba8FkLQcWAD8LjBs3w7cLukNY7x2RESMs2EDQ9Ld1M8turH9rFGuPQO4tWN7ANhrDLUZuECSgS/YXjqGcyMiYjMb6Q5je0DA3wDrga/W20cDJQ++1WXfsAHUxT6210l6DnChpBtsX/KEHyItBBYCzJqV4SEREU0Z9qG37UdtbwQOsH2a7btt32X7c8ChBdceAHbq2J4JrCstzPa6+vvtwNlUXVzd2i21Pdf23L6+vtLLR0TEGJW8JWVJR0gSgKQjCq+9CpgtaWdJ04AjgRUlJ0qaLmnrwc/AAcBPCn9uREQ0oOSh91uBzwGfl/QYcDlVt9SIbG+UtAg4H5gCLLN9naQT6+NLJO0IrKYaCPiYpPcBc6i6w86uM2oqcKbt7475vy4iIjabktdq1wKb9BaT7ZXAyiH7lnR8/hVVV9VQ91G9whsRES2R2WojIqJIAiMiIookMCIiosiogSHpy4NvLNXbMyVd0GxZERHRNiV3GKuBKyQdUE8+eDHw+WbLioiItil5S2qxpKupguIOYA/btzVeWUREtEpJl9RRVNOZHwd8DfiOpD9purCIiGiXkoF7RwOvqsdMIOnlwNfJOImIiEmlpEvqjUO2L5M0lllnIyJiAhg1MCQNN634ws1cS0REtFhJl9RFHZ+3opqp9tZh2kZExARV0iX1zc5tSV8FLmysooiIaKVNGem9M/D8zV1IRES0W8kzjM6lWp8C3AWc0mRRERHRPiXPMLbv+PyY7bEssxoRERNEyTOMRyVtC+wCbFUvaoTtSxuuLSIiWqSkS+p44GRgBnAt8DKqVfde3WhlERHRKiUPvd8HzAVutv1KYE8gc0lFREwyJYHxkO0HASRNs30dsGuzZUVERNsM2yUlaartjcBtkrYDvgOcL+ku4NfjVWBERLTDSM8wrqCayvzP6u0PSXotsC1wbuOVRUREq4wUGBq6w/ZF3RpGRMTEN1Jg9Ek6ebiDtj/TQD0REdFSIz30ngI8A9h6mK9RSTpQ0hpJ/ZKeMDpc0q6SLpP0sKT3j+XciIgYXyPdYdxm+6ObemFJU4DFwP7AALBK0grb13c0uws4CThkE86NiIhxNNIdxhOeYYzRPKDf9lrbG4DlwILOBrZvt70KeGSs50ZExPgaKTBe+ySvPYPHr5sxUO/brOdKWihptaTV69ev36RCIyJidMMGhu27nuS1u92hlE5cWHyu7aW259qe29fXV1xcRESMzaash1FqANipY3smsG4czo2IiAY0GRirgNmSdpY0DTgSWDEO50ZERANK1sPYJLY3SloEnE/1iu4y29dJOrE+vkTSjsBqYBvgMUnvA+bYvq/buU3VGhERo2ssMABsrwRWDtm3pOPzr6i6m4rOjYiI3mmySyoiIiaQBEZERBRJYERERJEERkREFElgREREkQRGREQUSWBERESRBEZERBRJYERERJEERkREFElgREREkQRGREQUSWBERESRBEZERBRJYERERJEERkREFElgREREkQRGREQUSWBERESRBEZERBRJYERERJEERkREFGk0MCQdKGmNpH5Jp3Q5Lkmn1cevkbRHx7GbJV0r6SpJq5usMyIiRje1qQtLmgIsBvYHBoBVklbYvr6j2UHA7PprL+Dz9fdB823f0VSNERFRrsk7jHlAv+21tjcAy4EFQ9osAM5w5XJgO0nPbbCmiIjYRE0Gxgzg1o7tgXpfaRsDF0i6UtLC4X6IpIWSVktavX79+s1QdkREdNNkYKjLPo+hzT6296DqtnqvpFd1+yG2l9qea3tuX1/fplcbEREjajIwBoCdOrZnAutK29ge/H47cDZVF1dERPRIk4GxCpgtaWdJ04AjgRVD2qwAjqnfltobuNf2bZKmS9oaQNJ04ADgJw3WGhERo2jsLSnbGyUtAs4HpgDLbF8n6cT6+BJgJXAw0A/8Fji2Pn0H4GxJgzWeafu7TdUaERGjaywwAGyvpAqFzn1LOj4beG+X89YCuzdZW0REjE1GekdERJEERkREFElgREREkQRGREQUSWBERESRBEZERBRJYERERJEERkREFElgREREkQRGREQUSWBERESRBEZERBRJYERERJEERkREFElgREREkQRGREQUSWBERESRBEZERBRJYERERJEERkREFElgREREkQRGREQUaTQwJB0oaY2kfkmndDkuSafVx6+RtEfpuRERMb4aCwxJU4DFwEHAHOAoSXOGNDsImF1/LQQ+P4ZzIyJiHDV5hzEP6Le91vYGYDmwYEibBcAZrlwObCfpuYXnRkTEOJra4LVnALd2bA8AexW0mVF4LgCSFlLdnQDcL2nNk6g5Jp7tgTsGN/TpHlYSbZLfi997fmnDJgNDXfa5sE3JudVOeymwdGylxWQhabXtub2uI9olvxebpsnAGAB26tieCawrbDOt4NyIiBhHTT7DWAXMlrSzpGnAkcCKIW1WAMfUb0vtDdxr+7bCcyMiYhw1dodhe6OkRcD5wBRgme3rJJ1YH18CrAQOBvqB3wLHjnRuU7XGhJbuyugmvxebQHbXRwMRERGPk5HeERFRJIERERFFEhgREVEkgRER8SRJ6jZ2bMJJYER0GPyHL2lqx778O4lhSXqK67eHJD2t1/U0KW9JRQwhaX/g3cCvgOW2f9DjkmILIOnDwC7ABmCx7at6XNJml7+cIjpIOhT4W+BDVDMOLJU0vadFRetJ+nvgD4H3A38M/NfeVtSMBEZMavVU+p2eAfwlsDuwG3C87QfGvbBotS7dlOuB91D9oXEbsHAidmVOuP+giFKSpth+tP58aL37j4AvAYcDR9i+TNJ8Scf0qs5oj8FnXLYfkzRb0kH1of2AnwPrbR9m+2HgBEkH9KrWJiQwYtKy/aikGZJ+QLUGC8BpwHbAhbZvqec4Ow14pFd1Rnt0PNw+DPgOMNhdeTKwEbiqPv4BqjuOtT0oszF56B2TiiR1/KN/OvAV4Czby+vuqT+gWo9lKfBL4IXAx22f06OSowXqN6Eeqz/vCnwZeIvtAUl9VPPyzaRaNfTnVF2bx9q+vVc1N6HJ6c0jWqcjLPpsr697GOZJ2gV4EdWywCcDr6ZaZOcptn/do3KjJTrCYhuqZxS3AX8t6T7gT6n+sHgX8Apga9t39qrWJuUOIyYdSfOBtwCnAw8C/wc4C7gU2BE4GlhU90PHJDd4Vyrpg8CzgP8NvIDq1esvA2uANwEP2v7nnhU6DvIMIyajn1E9kzgMeMD2G20vo5pi/78DaxMW0eGp9fdvU42zeBNwue232/4e1dt0f0m1INyElsCISUHS0yT9g6Q/tj0A/BPwHOCtkmZK2hk4F7jI9v/qabHRGnVX5SckzbJ9PdULEAcDh0raVtKBwN8BJ9i+qJe1jocERkxIXeb2eRrwGPBBSdvavgY4B3gncAhwL3C47X8Y10KjVbqMy9mG6kWI4yVtZftiqoXfPgPsDfwY2Nf2D8e30t5IYMSEM+RNqP0kHUzVrbCY6mHlxwBsn1dvzwA22l7To5KjBeo3oQbH5RxT/95cD3wd2Bb4L3XTpcBdwCzbv7Z9b08K7oE89I4JS9IpwJuBi4HXACcBDwAfBh6uP08BTspo7oDfTTp5JnA31R8Zs4G3Ai8F3gbcB+wMfMv253tVZ6/kDiMmJEm7AS+2PY9qzfgpwI11V9SJwE3AOtuZ+iM6vRn4qe13UwVGv+1bqbovPwT8Glg2GcMCcocRE0TnwKp6ewbwPqqxFVOAt9q+X9JLJuIsojF2Q39n6n0HA/OpxlN83/b/qAd4zrG9uhd1tkkCIyYMSXOoJg28CVgFfAt4CHiH7Q2S3k41XcNhwK+cX/4A6t+LDcBPgXuAC4Gltk+tj58OPBN4B/DIZP69SWDEFqvLdA0XUYXEAuB4qv7mDwC/qT/PB95l+4reVBxtMGTSycOBT1E9tzgQOBaYQzVq+2fATlRv1/257Q29qbg9EhixRRr6JhRVt9Mf2D5X0huBr1I9qHyYaibRHYEzJ9rcPjE2Q/7IWADsAJxn+1ZJx1G9CbU/1RidFwPb2f5Kr+ptm8wlFVucIWHxEeAIqi6FH0r6ge1zJH2caqqPObaX97DcaIn69+YxSc+kuqN4OvB84GFJy20vk7QT8F1gv7xm/UR5Syq2OPW8Pk+VdAbwsO1dgU9TTe3xxrrNqVSv076td5VGG3SsYeF6RP+3gG/Y3o9qtuLdgJfUzT9GNRjvxT0otfXSJRVbnMFuBUm3UL32+FpJWwEnUE0Kd5nt/9vTIqOVJL0AuIxqHMVfSHoGVUj8FvgX21d33sHG4+UOI7Yo9ZtQH5X0NGBP4KWSjrf9EPANqreidpO0TZfpQWIS6VwiVdJxkj5q+2aqt+ReK+kQ2/dTTfPxR9R3FQmL4eUOI1pt6F97kmYBXwQusH2qpL2o+pwPsf0f9fF7bN/Xo5KjxyRNtb1xyL4/AZYDn7D9DUlvAf6aas32H0t6vu1belHvliR3GNFakl7Z8XD7zZJm2/4F1Yjb+ZLeYfs/qaaWvljSs23/ImExeUmaDZxXr7c9XdJRkp5r+yfAX1EterSf7X+lepZxRn23+ote1r2lyFtS0Tr1WyzfAm4E/l89xfR8YG9JH7d9haSvAB+XdJftMyStn6irnEWZ+n/87wXOAG6hmol4T2C6pK/ZPq8er/N1Sa+z/UlJ38naJ+VyhxGtIumlwPeo1tl+D4Dtn1OtbDaVaqQ2tr9FtWDNcZK2r2eejUms/h//Q1SD7lYClwOXUC2h+qa6zWepFs9aVL88cW2Pyt0i5Q4j2mYasLvtxQCSPke1BOZi4HnAm+qxF2uA9cDf276jV8VGO3QMyLuaKjDOsX2NpBup1mZ/jaQdqNbe/mfg1KHzSMXo8tA7WkfSe6lmDV0HiGo6jwfrLoc9qEbj9gHvre8+IoDfPcPYCvgm8FnbX5T0LOBVwOFUS/K+u5c1bskSGNFK9Z3FQbZfVG9P65zLp3M+oIhBg78XkuYDy4BjbX+/PpbxFU9SnmFEW50ErJc0uMrZRnjcqN2ExSTVOb5G0rTBZVU7wkKullL9CHCmpBdCxldsDnmGEa1UT+MwH7hB0i31pIL5C3GSGzKP2OHA0cAdkv7K9j2dYzBsf0XS86iei8VmkC6paLX6H/yVVA8rH0pgBICkVwN/A3ySauT2XrZfWh+bQvU3Rx5qb2YJjGg9SVvVU3/EJDXkzuIQqterL7P9t/W+84Hf2D58aPvYfPIMI1ovYTG51a/MuuPZxX9QvVY9U9IfAth+PbCnpMX1dsKiAQmMiGitjpmJZ1M9wP4Q8Grgg1TPJl4vaWbd/BVUA/WiIQmMiGgdSdsD1GHxp1RTxawC+oHTgNcBHwX2ogqNZ9m+zfY3e1XzZJC3pCKiVSS9EniLpC/Yvg54JvA925+pj68Bvg3Mo5qp+MXAA72qdzJJYERE2zxCtaDRmyWtpxq5/YrBg7Z/JOkc4Hm2v9ajGieldElFRM/VC17tC2D7cuDfqOaAOsr2BcBtkr4p6en1QLx5VEES4yiv1UZET9XjJq4G5lCtfncZVZfT64DXA1fYXi7p28A9VPOJLbb9hR6VPGklMCKi5yS9DLgQOAe4CXg51ZK7OwDbUAXIfwLTgR1t9/eo1EktgRERrSDpaKo7jBcC+wCvBN4GPAjcS7Wc6vW9qzASGBHRGpI+A7zG9kvq7ddQhcfewIKha3XH+EpgRESrSPo3YJrtg3pdSzxeAiMiWkdSP3Cx7Xf1upb4vYzDiIg2ehnV21DRIrnDiIiIIhm4FxERRRIYERFRJIERERFFEhgREVEkgREREUUSGBERUSSBERERRf4/QdSOQLml3MsAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "x = [1,2]\n", + "height = [taux_mortalite_fumeuse, taux_mortalite_non_fumeuse]\n", + "width = 0.05\n", + "BarName = ['Fumeuse', 'Non fumeuse']\n", + "\n", + "plt.bar(x, height , width )\n", + "\n", + "plt.xlim(0,2.5)\n", + "plt.ylim(0,0.35)\n", + "\n", + "plt.ylabel('Taux de mortalité ')\n", + "\n", + "plt.xticks(x, BarName, rotation=40)\n", + "\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Intervalle de confiance des fumeuses: [0.21124475 0.26641848]\n", + "Intervalle de confiance des non fumeuses: [0.28662079 0.34179451]\n" + ] + } + ], + "source": [ + "#Intervalles de confiance par rapport aux taux de mortalité\n", + "\n", + "print('Intervalle de confiance des fumeuses:', np.array( [taux_mortalite_fumeuse-1/np.sqrt(nb_femmes), taux_mortalite_fumeuse + 1/np.sqrt(nb_femmes)]))\n", + "\n", + "print('Intervalle de confiance des non fumeuses:', np.array( [taux_mortalite_non_fumeuse-1/np.sqrt(nb_femmes), taux_mortalite_non_fumeuse + 1/np.sqrt(nb_femmes)]))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "L'intervalle de confiance de la mortalité chez les non fumeuses a des valeurs plus élévées que celui chez les fumeuses. Intuitivement, on pourrait penser le contraire. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Classes d'âge" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "catégorie1_fumeuse 181\n", + "catégorie2_fumeuse 237\n", + "catégorie3_fumeuse 115\n", + "catégorie4_fumeuse 49\n", + "nb_fumeuse 582\n", + "catégorie1_non_fumeuse 219\n", + "catégorie2_non_fumeuse 199\n", + "catégorie3_non_fumeuse 121\n", + "catégorie4_fumeuse 193\n", + "nb de non fumeuse 732\n" + ] + } + ], + "source": [ + "# Effectifs selon les tranches d'âge \n", + "\n", + "catégorie1_fumeuse=0 #18-34 ans\n", + "catégorie2_fumeuse=0 # 34-54 ans\n", + "catégorie3_fumeuse=0 # 55-64 ans\n", + "catégorie4_fumeuse=0 # plus de 65 ans\n", + "\n", + "\n", + "catégorie1_non_fumeuse=0\n", + "catégorie2_non_fumeuse=0\n", + "catégorie3_non_fumeuse=0\n", + "catégorie4_non_fumeuse=0\n", + "\n", + "\n", + " \n", + "for i in range(1314):\n", + " \n", + " # tranches d'age pour les fumeuses\n", + " if (data['Smoker'][i]== 'Yes') and (18<=data['Age'][i] <=34): # si elle est fumeuse et qu'elle est dans la catégorie 1\n", + " catégorie1_fumeuse= catégorie1_fumeuse+1\n", + " \n", + " elif (data['Smoker'][i]== 'Yes') and (3464): # si elle est fumeuse et qu'elle est dans la catégorie 4\n", + " catégorie4_fumeuse= catégorie4_fumeuse+1 \n", + " \n", + " \n", + " # tranches d'age pour les non fumeuses\n", + " if (data['Smoker'][i]== 'No') and (18<=data['Age'][i] <=34): # si elle est non fumeuse et qu'elle est dans la catégorie 1\n", + " catégorie1_non_fumeuse= catégorie1_non_fumeuse+1\n", + " \n", + " elif (data['Smoker'][i]== 'No') and (3464): # si elle est non fumeuse et qu'elle est dans la catégorie 4\n", + " catégorie4_non_fumeuse= catégorie4_non_fumeuse+1 \n", + " \n", + " \n", + "print('catégorie1_fumeuse',catégorie1_fumeuse)\n", + "print('catégorie2_fumeuse',catégorie2_fumeuse)\n", + "print('catégorie3_fumeuse',catégorie3_fumeuse)\n", + "print('catégorie4_fumeuse',catégorie4_fumeuse)\n", + "\n", + "print('nb_fumeuse',catégorie1_fumeuse+catégorie2_fumeuse+catégorie3_fumeuse+catégorie4_fumeuse)\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "print('catégorie1_non_fumeuse',catégorie1_non_fumeuse)\n", + "print('catégorie2_non_fumeuse',catégorie2_non_fumeuse)\n", + "print('catégorie3_non_fumeuse',catégorie3_non_fumeuse)\n", + "print('catégorie4_fumeuse',catégorie4_non_fumeuse)\n", + "\n", + "print('nb de non fumeuse',catégorie1_non_fumeuse+catégorie2_non_fumeuse+catégorie3_non_fumeuse+catégorie4_non_fumeuse)\n", + "\n", + "\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "x = [1,2,3,4]\n", + "height = [catégorie1_fumeuse, catégorie2_fumeuse, catégorie3_fumeuse, catégorie4_fumeuse]\n", + "width = 0.05\n", + "\n", + "BarName = ['18-34 ans', '34-54 ans', '55-64 ans','plus de 65 ans']\n", + "\n", + "plt.bar(x, height , width )\n", + "\n", + "plt.xlim(0,4.5)\n", + "plt.ylim(0,240)\n", + "#plt.grid()\n", + "\n", + "plt.ylabel('Effectifs')\n", + "plt.title(\"Effectifs des fumeuses selon la tranche d'âge \")\n", + "\n", + "plt.xticks(x, BarName, rotation=40)\n", + "\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "x = [1,2,3,4]\n", + "height = [catégorie1_non_fumeuse, catégorie2_non_fumeuse, catégorie3_non_fumeuse, catégorie4_non_fumeuse]\n", + "width = 0.05\n", + "\n", + "BarName = ['18-34 ans', '34-54 ans', '55-64 ans','plus de 65 ans']\n", + "\n", + "plt.bar(x, height , width )\n", + "\n", + "plt.xlim(0,4.5)\n", + "plt.ylim(0,225)\n", + "\n", + "plt.ylabel('Effectifs')\n", + "plt.title(\"Effectifs des non fumeuses selon la tranche d'âge \")\n", + "\n", + "plt.xticks(x, BarName, rotation=40)\n", + "\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "fumeuse décédée dans la catégorie 1 5\n", + "fumeuse décédée dans la catégorie 2 41\n", + "fumeuse décédée dans la catégorie 3 51\n", + "fumeuse décédée dans la catégorie 4 42\n", + "non fumeuse décédée dans la catégorie 1 6\n", + "non fumeuse décédée dans la catégorie 2 19\n", + "non fumeuse décédée dans la catégorie 3 40\n", + "non fumeuse décédée dans la catégorie 4 165\n" + ] + } + ], + "source": [ + "# Décès des fumeuses et des non fumeuses selon la tranche d'âge\n", + "\n", + "fumeuse_decedee1=0 # fumeuse décédée de la premiere tranche d'age\n", + "fumeuse_decedee2=0 # fumeuse décédée de la premiere tranche d'age \n", + "fumeuse_decedee3=0 # fumeuse décédée de la premiere tranche d'age\n", + "fumeuse_decedee4=0 # fumeuse décédée de la premiere tranche d'age\n", + "\n", + "\n", + "non_fumeuse_decedee1=0 \n", + "non_fumeuse_decedee2=0 \n", + "non_fumeuse_decedee3=0 \n", + "non_fumeuse_decedee4=0 \n", + "\n", + "\n", + " \n", + "for i in range(1314):\n", + " \n", + " # fumeuses décédée selon la tranche d'âge \n", + " if (data['Smoker'][i]== 'Yes') and (data['Status'][i]== 'Dead') and (18<=data['Age'][i] <=34): # si elle est fumeuse et qu'elle est dans la catégorie 1\n", + " fumeuse_decedee1= fumeuse_decedee1+1\n", + " \n", + " elif (data['Smoker'][i]== 'Yes') and (data['Status'][i]== 'Dead') and (3464): # si elle est fumeuse et qu'elle est dans la catégorie 4\n", + " fumeuse_decedee4= fumeuse_decedee4+1 \n", + " \n", + " \n", + " # tranches d'age pour les non fumeuses\n", + " if (data['Smoker'][i]== 'No') and (data['Status'][i]== 'Dead') and (18<=data['Age'][i] <=34): # si elle est non fumeuse et qu'elle est dans la catégorie 1\n", + " non_fumeuse_decedee1= non_fumeuse_decedee1+1\n", + " \n", + " elif (data['Smoker'][i]== 'No') and (data['Status'][i]== 'Dead') and (3464): # si elle est non fumeuse et qu'elle est dans la catégorie 4\n", + " non_fumeuse_decedee4= non_fumeuse_decedee4+1 \n", + " \n", + " \n", + "print('fumeuse décédée dans la catégorie 1',fumeuse_decedee1)\n", + "print('fumeuse décédée dans la catégorie 2',fumeuse_decedee2)\n", + "print('fumeuse décédée dans la catégorie 3',fumeuse_decedee3)\n", + "print('fumeuse décédée dans la catégorie 4',fumeuse_decedee4)\n", + "\n", + "print('non fumeuse décédée dans la catégorie 1',non_fumeuse_decedee1)\n", + "print('non fumeuse décédée dans la catégorie 2',non_fumeuse_decedee2)\n", + "print('non fumeuse décédée dans la catégorie 3',non_fumeuse_decedee3)\n", + "print('non fumeuse décédée dans la catégorie 4',non_fumeuse_decedee4)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# taux de mortalité des fumeuses et des non fumeuses selon la tranche d'âge \n", + "\n", + "taux1_fumeuse= fumeuse_decedee1 / catégorie1_fumeuse\n", + "taux2_fumeuse= fumeuse_decedee2 / catégorie2_fumeuse\n", + "taux3_fumeuse= fumeuse_decedee3 / catégorie3_fumeuse\n", + "taux4_fumeuse= fumeuse_decedee4 / catégorie4_fumeuse\n", + "\n", + "taux1_non_fumeuse= non_fumeuse_decedee1 / catégorie1_non_fumeuse\n", + "taux2_non_fumeuse= non_fumeuse_decedee2 / catégorie2_non_fumeuse\n", + "taux3_non_fumeuse= non_fumeuse_decedee3 / catégorie3_non_fumeuse\n", + "taux4_non_fumeuse= non_fumeuse_decedee4 / catégorie4_non_fumeuse\n", + "\n", + "\n", + "\n", + "x = [1, 2, 4, 5 ,7, 8, 10, 11]\n", + "height = [taux1_fumeuse, taux1_non_fumeuse, taux2_fumeuse,taux2_non_fumeuse,taux3_fumeuse, taux3_non_fumeuse, taux4_fumeuse,taux4_non_fumeuse]\n", + "width = 0.05\n", + "\n", + "BarName = ['fumeuse','non fumeuse', 'fumeuse','non fumeuse', 'fumeuse','non fumeuse','fumeuse','non fumeuse']\n", + "\n", + "plt.bar(x, height , width )\n", + "\n", + "plt.xlim(0,13)\n", + "plt.ylim(0,1)\n", + "#plt.grid()\n", + "\n", + "plt.ylabel('Taux de mortalité')\n", + "\n", + "plt.xticks(x, BarName, rotation=90)\n", + "\n", + "\n", + "plt.show()\n", + "\n", + "#print(taux1_fumeuse>taux1_non_fumeuse, taux4_fumeuse> taux4_non_fumeuse)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Lorsqu'on observait le taux de mortalité entre les fumeuses et les non fumeuses sans prendre en compte de tranche d'age, le taux de mortalité était plus élevé chez les non fumeuses. Cette fois, on prend en compte les tranches d'age et le taux de mortalité est plus élevé chez les fumeuses. Cela peut s'expliquer par le fait que parmis l'effectif des non fumeurs est assez bien répartie sur les différentes tranches d'ages alors que celui des fumeurs est principalement concentrée avant 54 ans. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Régression logistique" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "Death = np.zeros((1314,1))\n", + "\n", + "\n", + "# 1 pour vivant \n", + "for i in range(1314):\n", + " if data['Status'][i]=='Alive':\n", + " Death[i][0]=1\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAEKCAYAAADpfBXhAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAFjlJREFUeJzt3X2QXfV93/H3d6+u6ErGXmEWBhYpIq4qggFJeItkk7rYxOGhiZEJxChRnXjcMkzt1G46NLhpE2dsD/Wo8eDUdggl1OMmA61joigMtcK4dvqQgFlZPMlYRoMxerCNMBZpYRuW1bd/3LPL3dU+nJXuaq9+er9mdnbPOb9zzufelT577rnn3huZiSSpLD0LHUCS1HmWuyQVyHKXpAJZ7pJUIMtdkgpkuUtSgSx3SSqQ5S5JBbLcJalAixZqx6effnquXLlyoXYvSSekHTt2PJ+Z/bONW7ByX7lyJUNDQwu1e0k6IUXE9+qM87SMJBXIcpekAlnuklQgy12SCmS5S1KBLHdJKpDlLkkFmrXcI+KuiHguIp6YZnlExO9FxJ6IeCwiLu58TEnSXNR5EdMXgM8CX5xm+VXAquprPfD71feOW//JB/jh/3llPjYtHeHMUxfz+t4mTz330kJHmVEAb3vTaTzzo2EOHBqmb0mTTHhxeIQ39DaJgB+/PEJPwOG2j0zu623ysXe/maHvvcDdD+1lNJNGBJvWL+cTGy+ccZ9bd+7nX9/7GC+PHB6f19vs4dZrLwJgy/bdHDg0TG+zZ8IYgM0bVsy4/a079/Oxbbs4NDwCwLIlTf7RRWfxtW8f5MChYc7u6+XmK1azcd3AhHXG9jl2mw+9PDJ+XxwaHqERwWgmfb1NRkYP89Iro+PrL13coNno4cXhEc7u6+Ud5/Vz745949kj4JfXT8w9ts/9h4YJYOyuXbakyW///Jsn5Juccarb0GlR5wOyI2IlcF9mXjDFsj8Avp6Zd1fTu4HLMvP7M21zcHAw5/IKVYtdOn5mKuCtO/fz6//1kQl/KNo1G8HI6My9Mt32t+7cz81fepSR6TZe6W02uPXaC9m4boCtO/fz0XsfZ3hkdMZ1OmEs92z7bDaCLdetGS/vqca334a5iIgdmTk427hOnHMfAPa2Te+r5nWUxS4dP3c/tHfaZVu275622IFZi32m7W/ZvnvWYgcYHhlly/bd4+scj2KH13LPts+R0RzPN9349tswHzpR7jHFvCl/OxFxY0QMRcTQwYMHO7BrSfNhdIZH9AcODc/b9uey7bGxnchT11juOvtsHzPd+PnM3oly3wcsb5s+Bzgw1cDMvCMzBzNzsL9/1jc1k7RAGjHVMVvL2X2987b9uWx7bGwn8tQ1lrvOPtvHTDd+PrN3oty3Ae+rrprZALw42/n2o3HmqYs7vUlJ09i0fvm0y26+YjU903c/zcYMC2fZ/s1XrKY508Yrvc0GN1+xenyd3mZj1nU6YSz3bPtsNmI833Tj22/DfKhzKeTdwF8DqyNiX0R8ICJuioibqiH3A08De4D/CPyz+Qj60G++y4LXcXXmqYtZdcbShY4xqwAufdNpDPT1ErSu1ujrbRK0rohZtqQJcEQh9/U2ue29a9m8YcX4EWkjYtarWTauG+DTv7iWJc2J9dHb7OG2965ly3VrxrNMHgMzP1m7cd0AW65fQ19vc3zesiVNNm9YMb7Ngb7eCU9Eblw3wK3XXji+fOw2t98XY7dt7HYvXTyxaJcubozfZwN9vWzesGJC9oiJudv3CRPPTS9b0pzwZOpUGSffhvlQ62qZ+TDXq2UkScf3ahlJUpex3CWpQJa7JBXIcpekAlnuklQgy12SCmS5S1KBLHdJKpDlLkkFstwlqUCWuyQVyHKXpAJZ7pJUIMtdkgpkuUtSgSx3SSqQ5S5JBbLcJalAlrskFchyl6QCWe6SVCDLXZIKZLlLUoEsd0kqkOUuSQWy3CWpQJa7JBXIcpekAlnuklQgy12SClSr3CPiyojYHRF7IuKWKZa/ISL+PCIejYhdEfH+zkeVJNU1a7lHRAP4HHAVcD6wKSLOnzTsg8C3MnMNcBnwuxGxuMNZJUk11TlyvwTYk5lPZ+YrwD3ANZPGJHBqRATwOuAF4NWOJpUk1Van3AeAvW3T+6p57T4L/BRwAHgc+HBmHp68oYi4MSKGImLo4MGDRxlZkjSbOuUeU8zLSdNXAI8AZwNrgc9GxOuPWCnzjswczMzB/v7+OYeVJNVTp9z3Acvbps+hdYTe7v3AvdmyB/gucF5nIkqS5qpOuT8MrIqIc6snSW8Atk0a8yxwOUBEnAmsBp7uZFBJUn2LZhuQma9GxIeA7UADuCszd0XETdXy24GPA1+IiMdpncb5jcx8fh5zS5JmMGu5A2Tm/cD9k+bd3vbzAeBnOxtNknS0fIWqJBXIcpekAlnuklQgy12SCmS5S1KBLHdJKpDlLkkFstwlqUCWuyQVyHKXpAJZ7pJUIMtdkgpkuUtSgSx3SSqQ5S5JBbLcJalAlrskFchyl6QCWe6SVCDLXZIKZLlLUoEsd0kqkOUuSQWy3CWpQJa7JBXIcpekAlnuklQgy12SCmS5S1KBapV7RFwZEbsjYk9E3DLNmMsi4pGI2BURf9nZmJKkuVg024CIaACfA94F7AMejohtmfmttjF9wOeBKzPz2Yg4Y74CS5JmV+fI/RJgT2Y+nZmvAPcA10wa80vAvZn5LEBmPtfZmJKkuahT7gPA3rbpfdW8dn8PWBYRX4+IHRHxvk4FlCTN3aynZYCYYl5OsZ23AJcDvcBfR8SDmfmdCRuKuBG4EWDFihVzTytJqqXOkfs+YHnb9DnAgSnGfCUzX8rM54H/AayZvKHMvCMzBzNzsL+//2gzS5JmUafcHwZWRcS5EbEYuAHYNmnMnwH/ICIWRcQSYD3wZGejSpLqmvW0TGa+GhEfArYDDeCuzNwVETdVy2/PzCcj4ivAY8Bh4M7MfGI+g0uSpheZk0+fHx+Dg4M5NDS0IPuWpBNVROzIzMHZxvkKVUkqkOUuSQWy3CWpQJa7JBXIcpekAlnuklQgy12SCmS5S1KBLHdJKpDlLkkFstwlqUCWuyQVyHKXpAJZ7pJUIMtdkgpkuUtSgSx3SSqQ5S5JBbLcJalAlrskFchyl6QCWe6SVCDLXZIKZLlLUoEsd0kqkOUuSQWy3CWpQJa7JBXIcpekAlnuklSgWuUeEVdGxO6I2BMRt8ww7u9HxGhEXNe5iJKkuZq13COiAXwOuAo4H9gUEedPM+5TwPZOh5QkzU2dI/dLgD2Z+XRmvgLcA1wzxbhfA74MPNfBfJKko1Cn3AeAvW3T+6p54yJiAHgPcPtMG4qIGyNiKCKGDh48ONeskqSa6pR7TDEvJ03fBvxGZo7OtKHMvCMzBzNzsL+/v25GSdIcLaoxZh+wvG36HODApDGDwD0RAXA6cHVEvJqZWzuSUpI0J3XK/WFgVUScC+wHbgB+qX1AZp479nNEfAG4z2KXpIUza7ln5qsR8SFaV8E0gLsyc1dE3FQtn/E8uyTp+Ktz5E5m3g/cP2nelKWemb967LEkScfCV6hKUoEsd0kqkOUuSQWy3CWpQJa7JBXIcpekAlnuklQgy12SCmS5S1KBLHdJKpDlLkkFstwlqUCWuyQVyHKXpAJZ7pJUIMtdkgpkuUtSgSx3SSqQ5S5JBbLcJalAlrskFchyl6QCWe6SVCDLXZIKZLlLUoEsd0kqkOUuSQWy3CWpQJa7JBWoVrlHxJURsTsi9kTELVMs/+WIeKz6+quIWNP5qJKkumYt94hoAJ8DrgLOBzZFxPmThn0X+IeZeRHwceCOTgeVJNVX58j9EmBPZj6dma8A9wDXtA/IzL/KzB9Xkw8C53Q2piRpLuqU+wCwt216XzVvOh8A/tuxhJIkHZtFNcbEFPNyyoER76BV7j89zfIbgRsBVqxYUTOiJGmu6hy57wOWt02fAxyYPCgiLgLuBK7JzB9NtaHMvCMzBzNzsL+//2jySpJqqFPuDwOrIuLciFgM3ABsax8QESuAe4F/nJnf6XxMSdJczHpaJjNfjYgPAduBBnBXZu6KiJuq5bcDvwW8Efh8RAC8mpmD8xdbkjSTyJzy9Pm8GxwczKGhoQXZtySdqCJiR52DZ1+hKkkFstwlqUCWuyQVyHKXpAJZ7pJUIMtdkgpkuUtSgSx3SSqQ5S5JBbLcJalAlrskFchyl6QCWe6SVCDLXZIKZLlLUoEsd0kqkOUuSQWy3CWpQJa7JBXIcpekAlnuklQgy12SCmS5S1KBLHdJKpDlLkkFstwlqUCWuyQVyHKXpAJZ7pJUIMtdkgq0qM6giLgS+AzQAO7MzH83aXlUy68GXgZ+NTO/2eGstW3duZ8t23dz4NAwZ/f1cvMVq9m4buCox003Fphx/a079/Oxbbs4NDwCwNLFDZqNHg4Nj9CIYDSTgUnrzbSf/YeGj1gPmLCPUxb18LevHq51Py1d3OCT77mQjesGjsjablFPMHo4SaAnWvsYHjk8nuVYBZDAQF8v7zivn699+yD7Dw3Put7AFL+HJYsbvPzKKAk0Itjwk8t45kfDE+67ZUua/L+RUYZHWvfTsiVNzj/rVB58+seMZtKIYNP65Xxi44X8m62P80cPPnvMt3E+nbKoh0/9wkUMfe8F/vjBZzn238hEAfzdM5by9MGXO/L7VsuyJU1+++ffPG3nHKvIWX5ZEdEAvgO8C9gHPAxsysxvtY25Gvg1WuW+HvhMZq6fabuDg4M5NDR0bOmnsHXnfj567+MMj4yOz+ttNrj12guPKN4646Yb22wEJIwczinX37pzPzd/6dEJy6czth5Qaz/jy3paZVVjF9Nq9ASbLlnOf/nG3lpZu02zJyBgZLTz2VedsZSnnnup49uVxjQbwZbr1syp4CNiR2YOzjauzmmZS4A9mfl0Zr4C3ANcM2nMNcAXs+VBoC8izqqdtoO2bN89oRwBhkdG2bJ991GNm27syGgeUYbt62/Zvrt2WY6tV3c/48sOH1uxA4weTu5+6MQsdmjdB/NR7IDFrnk3MppTdk4n1Cn3AWBv2/S+at5cxxARN0bEUEQMHTx4cK5ZazkwzcP5yfPrjptp7Ezrz2WdsfFzXadTfKgtLZz5+n9fp9xjinmT26DOGDLzjswczMzB/v7+Ovnm7Oy+3lrz646baexM689lnbHxc12nUxox1a9P0vEwX//v65T7PmB52/Q5wIGjGHNc3HzFanqbjQnzepuN8Sfe5jpuurHNRrTO906z/s1XrD5i+XTG1qu7n/FlPUHNXUyr0dN68rBu1m7T7InW8xLzYNUZS+dlu9KYZiOm7JxOqFPuDwOrIuLciFgM3ABsmzRmG/C+aNkAvJiZ3+9w1lo2rhvg1msvZKCvl6B1RcVUT5LWHTfd2C3XrWHL9WumXX/jugG2XL+Gvt7m+HaWLm6MT48dLbevN9t+Jq+35fo1fPoX107YxymL6l/dunRxg9+9fg2f2HjhEVnbLeqJ8YdmPQG9zZ4JWY7V2FYG+nrZvGHF+G2dzdh9sOW6134PSxc3xrfXiODSN512xH23bElz/DaMTV/6ptPGlzci2LxhBQ/8+mVs3rCiEzdxXp2yqIfb3ruWzRtWTPkQ+lgFrT90PsLrrGVLmnN+MnUuZr1aBsavhrmN1qWQd2XmJyPiJoDMvL26FPKzwJW0LoV8f2bOeCnMfF0tI0klq3u1TK3r3DPzfuD+SfNub/s5gQ/ONaQkaX74ClVJKpDlLkkFstwlqUCWuyQVyHKXpAJZ7pJUIMtdkgpU60VM87LjiIPA9+ZxF6cDz8/j9jvlRMkJJ05Wc3beiZL1ZMj5E5k565tzLVi5z7eIGKrzKq6FdqLkhBMnqzk770TJas7XeFpGkgpkuUtSgUou9zsWOkBNJ0pOOHGymrPzTpSs5qwUe85dkk5mJR+5S9JJ64Qv94hYHhFfi4gnI2JXRHy4mn9aRDwQEU9V35d1Qda/ExHfiIhHq6y/061ZASKiERE7I+K+arrrckbEMxHxeEQ8EhFD3ZoTICL6IuJPIuLb1b/Xt3Zb1ohYXd2XY19/ExEf6bacVdZ/Uf0/eiIi7q7+f3Vjzg9XGXdFxEeqefOe84Qvd+BV4F9m5k8BG4APRsT5wC3AVzNzFfDVanqh/S3wzsxcA6wFrqw+uaobswJ8GHiybbpbc74jM9e2XVrWrTk/A3wlM88D1tC6b7sqa2buru7LtcBbaH34zp/SZTkjYgD458BgZl5A64OEbqD7cl4A/FPgElq/85+LiFUcj5yZWdQX8GfAu4DdwFnVvLOA3QudbVLOJcA3gfXdmJXW5+B+FXgncF81rxtzPgOcPmleN+Z8PfBdque5ujlrW7afBf53N+YEBoC9wGm0PnTovipvt+W8HrizbfrfAv/qeOQs4ch9XESsBNYBDwFnZvU5rtX3MxYu2WuqUx2PAM8BD2Rmt2a9jdY/wsNt87oxZwJ/ERE7IuLGal435vxJ4CDwn6pTXXdGxFK6M+uYG4C7q5+7Kmdm7gf+PfAs8H1an9v8F3RZTuAJ4O0R8caIWAJcDSznOOQsptwj4nXAl4GPZObfLHSe6WTmaLYe8p4DXFI9bOsqEfFzwHOZuWOhs9RwaWZeDFxF65Tc2xc60DQWARcDv5+Z64CX6J7TRUeIiMXAu4EvLXSWqVTnqK8BzgXOBpZGxOaFTXWkzHwS+BTwAPAV4FFap5LnXRHlHhFNWsX+x5l5bzX7hxFxVrX8LFpHyl0jMw8BX6f1oeLdlvVS4N0R8QxwD/DOiPgjui8nmXmg+v4crXPDl9CFOYF9wL7qkRrAn9Aq+27MCq0/lt/MzB9W092W82eA72bmwcwcAe4F3kb35SQz/zAzL87MtwMvAE9xHHKe8OUeEQH8IfBkZn66bdE24Feqn3+F1rn4BRUR/RHRV/3cS+sf6LfpsqyZ+dHMPCczV9J6aP7fM3MzXZYzIpZGxKljP9M65/oEXZYTIDN/AOyNiNXVrMuBb9GFWSubeO2UDHRfzmeBDRGxpOqAy2k9Qd1tOYmIM6rvK4Brad2v859zIZ9s6NATFj9N67zrY8Aj1dfVwBtpPSH4VPX9tC7IehGws8r6BPBb1fyuy9qW+TJee0K1q3LSOo/9aPW1C/jNbszZlnctMFT9/rcCy7oxK60n+38EvKFtXjfm/B1aB0dPAP8ZOKVLc/5PWn/IHwUuP173p69QlaQCnfCnZSRJR7LcJalAlrskFchyl6QCWe6SVCDLXSeliHhPRGREnLfQWaT5YLnrZLUJ+F+0XqQlFcdy10mneh+iS4EPUJV7RPRExOer99y+LyLuj4jrqmVviYi/rN6cbPvYy8albma562S0kdb7qn8HeCEiLqb1svCVwIXAPwHeCuPvW/QfgOsy8y3AXcAnFyK0NBeLFjqAtAA20XpLY2i9MdomoAl8KTMPAz+IiK9Vy1cDFwAPtN7ChAatt5iVuprlrpNKRLyR1geQXBARSausk9Y7Sk65CrArM996nCJKHeFpGZ1srgO+mJk/kZkrM3M5rU9Ieh74herc+5m03jANWp+Y0x8R46dpIuLNCxFcmgvLXSebTRx5lP5lWh/4sI/WOwz+Aa1P83oxM1+h9QfhUxHxKK13HX3b8YsrHR3fFVKqRMTrMvP/VqduvkHrU55+sNC5pKPhOXfpNfdVH6ayGPi4xa4TmUfuklQgz7lLUoEsd0kqkOUuSQWy3CWpQJa7JBXIcpekAv1/tvSSk1DFa9MAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "from sklearn.metrics import confusion_matrix\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "Age0= data['Age']\n", + "Age_np=np.zeros((1314,1))\n", + "\n", + "for i in range(1314):\n", + " Age_np[i][0]= Age0[i]\n", + "\n", + "\n", + "plt.scatter(Age_np,Death)\n", + "plt.xlabel('Age')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[ 58 38]\n", + " [ 16 217]]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/conda/lib/python3.6/site-packages/sklearn/utils/validation.py:578: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n", + " y = column_or_1d(y, warn=True)\n" + ] + } + ], + "source": [ + "x_train, x_test, y_train, y_test = train_test_split(Age_np, Death, random_state=1)\n", + "logReg= LogisticRegression()\n", + "logReg.fit(x_train,y_train)\n", + "y_pred=logReg.predict(x_test)\n", + "# matrice de confusion \n", + "conf = confusion_matrix(y_test, y_pred)\n", + "print(conf)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# courbes de régression\n", + "\n", + "fpr, tpr, thresholds = roc_curve(y_test, logReg.predict_proba(x_test)[:,1])\n", + "\n", + "\n", + "plt.plot(fpr, tpr)\n", + "\n", + "plt.plot([0, 1], [0, 1])\n", + "\n", + " \n", + "plt.xlim([0.0, 1])\n", + "plt.ylim([0.0, 1.05])\n", + "\n", + "plt.xlabel('Taux de faux positifs')\n", + "plt.ylabel('Taux de vrais positifs')\n", + "plt.title('ROC')\n", + "\n", + " \n", + "plt.show() \n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} -- 2.18.1