{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Première analyse rapide des données"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# import des packages importants\n",
    "import numpy as np \n",
    "import matplotlib.pyplot as plt \n",
    "import pandas as pd \n",
    "import seaborn as sns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "data_url = \"https://gitlab.inria.fr/learninglab/mooc-rr/mooc-rr-ressources/-/raw/master/module3/Practical_session/Subject6_smoking.csv?inline=false\" "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Smoker</th>\n",
       "      <th>Status</th>\n",
       "      <th>Age</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Yes</td>\n",
       "      <td>Alive</td>\n",
       "      <td>21.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Yes</td>\n",
       "      <td>Alive</td>\n",
       "      <td>19.3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>No</td>\n",
       "      <td>Dead</td>\n",
       "      <td>57.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>No</td>\n",
       "      <td>Alive</td>\n",
       "      <td>47.1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Yes</td>\n",
       "      <td>Alive</td>\n",
       "      <td>81.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>No</td>\n",
       "      <td>Alive</td>\n",
       "      <td>36.8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>No</td>\n",
       "      <td>Alive</td>\n",
       "      <td>23.8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>Yes</td>\n",
       "      <td>Dead</td>\n",
       "      <td>57.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>Yes</td>\n",
       "      <td>Alive</td>\n",
       "      <td>24.8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>Yes</td>\n",
       "      <td>Alive</td>\n",
       "      <td>49.5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  Smoker Status   Age\n",
       "0    Yes  Alive  21.0\n",
       "1    Yes  Alive  19.3\n",
       "2     No   Dead  57.5\n",
       "3     No  Alive  47.1\n",
       "4    Yes  Alive  81.4\n",
       "5     No  Alive  36.8\n",
       "6     No  Alive  23.8\n",
       "7    Yes   Dead  57.5\n",
       "8    Yes  Alive  24.8\n",
       "9    Yes  Alive  49.5"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "raw_data = pd.read_csv(data_url)\n",
    "raw_data[:10]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Smoker</th>\n",
       "      <th>Status</th>\n",
       "      <th>Age</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "Empty DataFrame\n",
       "Columns: [Smoker, Status, Age]\n",
       "Index: []"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "raw_data[raw_data.isnull().any(axis=1)] "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Donc ici pas de point manquant -> pas besoin de modifier les données."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "On compte les morts."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "number alive = 945\n",
      "number dead = 369\n",
      "total number = 1314\n",
      "number smoker = 582\n",
      "number non smoker = 732\n"
     ]
    }
   ],
   "source": [
    "dead = raw_data['Status'].value_counts()['Dead']\n",
    "alive = raw_data['Status'].value_counts()['Alive']\n",
    "print(f'number alive = {alive}')\n",
    "print(f'number dead = {dead}')\n",
    "print(f'total number = {alive + dead}')\n",
    "smoker = raw_data['Smoker'].value_counts()['Yes']\n",
    "non_smoker = raw_data['Smoker'].value_counts()['No']\n",
    "print(f'number smoker = {smoker}')\n",
    "print(f'number non smoker = {non_smoker}')\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Question 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>Smoker</th>\n",
       "      <th>No</th>\n",
       "      <th>Yes</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Status</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Alive</th>\n",
       "      <td>502</td>\n",
       "      <td>443</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Dead</th>\n",
       "      <td>230</td>\n",
       "      <td>139</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "Smoker   No  Yes\n",
       "Status          \n",
       "Alive   502  443\n",
       "Dead    230  139"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tab = pd.crosstab(raw_data.Status, raw_data.Smoker) # on a bien le bon nombre de vivants et de morts\n",
    "tab"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[502 443]\n",
      " [230 139]]\n",
      "ratio_smoker = 0.238832\n",
      "ratio_non_smoker = 0.314208\n"
     ]
    }
   ],
   "source": [
    "numpy_data = np.array(tab)\n",
    "print(numpy_data)\n",
    "ratio_smoker = numpy_data[1,1]/smoker\n",
    "ratio_non_smoker = numpy_data[1,0]/non_smoker\n",
    "print(f'ratio_smoker = {ratio_smoker:.6f}')\n",
    "print(f'ratio_non_smoker = {ratio_non_smoker:.6f}')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Ah bah c'est dommage, les gens qui ne fument pas meurent plus que les gens qui fument... embêtant."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Question 2 "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Smoker</th>\n",
       "      <th>Status</th>\n",
       "      <th>Age</th>\n",
       "      <th>AgeGroup</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Yes</td>\n",
       "      <td>Alive</td>\n",
       "      <td>21.0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Yes</td>\n",
       "      <td>Alive</td>\n",
       "      <td>19.3</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>No</td>\n",
       "      <td>Dead</td>\n",
       "      <td>57.5</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>No</td>\n",
       "      <td>Alive</td>\n",
       "      <td>47.1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Yes</td>\n",
       "      <td>Alive</td>\n",
       "      <td>81.4</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>No</td>\n",
       "      <td>Alive</td>\n",
       "      <td>36.8</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>No</td>\n",
       "      <td>Alive</td>\n",
       "      <td>23.8</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>Yes</td>\n",
       "      <td>Dead</td>\n",
       "      <td>57.5</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>Yes</td>\n",
       "      <td>Alive</td>\n",
       "      <td>24.8</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>Yes</td>\n",
       "      <td>Alive</td>\n",
       "      <td>49.5</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  Smoker Status   Age AgeGroup\n",
       "0    Yes  Alive  21.0        1\n",
       "1    Yes  Alive  19.3        1\n",
       "2     No   Dead  57.5        3\n",
       "3     No  Alive  47.1        2\n",
       "4    Yes  Alive  81.4        4\n",
       "5     No  Alive  36.8        2\n",
       "6     No  Alive  23.8        1\n",
       "7    Yes   Dead  57.5        3\n",
       "8    Yes  Alive  24.8        1\n",
       "9    Yes  Alive  49.5        2"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "raw_data.loc[((raw_data.Age < 34) & (raw_data.Age >= 18) ),  'AgeGroup'] = '1'\n",
    "raw_data.loc[((raw_data.Age < 55) & (raw_data.Age >= 34) ),  'AgeGroup'] = '2'\n",
    "raw_data.loc[((raw_data.Age < 65) & (raw_data.Age >= 55) ),  'AgeGroup'] = '3'\n",
    "raw_data.loc[(raw_data.Age >= 65),  'AgeGroup'] = '4'\n",
    "\n",
    "raw_data[:10]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "##################\n",
      "Groupe d'age : 1\n",
      "number alive = 387\n",
      "number dead = 11\n",
      "total number = 398\n",
      "number smoker = 179\n",
      "number non smoker = 219\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>Smoker</th>\n",
       "      <th>No</th>\n",
       "      <th>Yes</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Status</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Alive</th>\n",
       "      <td>213</td>\n",
       "      <td>174</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Dead</th>\n",
       "      <td>6</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "Smoker   No  Yes\n",
       "Status          \n",
       "Alive   213  174\n",
       "Dead      6    5"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "ratio_smoker = 0.027933\n",
      "ratio_non_smoker = 0.027397\n",
      "##################\n",
      "\n",
      "##################\n",
      "Groupe d'age : 2\n",
      "number alive = 378\n",
      "number dead = 60\n",
      "total number = 438\n",
      "number smoker = 239\n",
      "number non smoker = 199\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>Smoker</th>\n",
       "      <th>No</th>\n",
       "      <th>Yes</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Status</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Alive</th>\n",
       "      <td>180</td>\n",
       "      <td>198</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Dead</th>\n",
       "      <td>19</td>\n",
       "      <td>41</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "Smoker   No  Yes\n",
       "Status          \n",
       "Alive   180  198\n",
       "Dead     19   41"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "ratio_smoker = 0.171548\n",
      "ratio_non_smoker = 0.095477\n",
      "##################\n",
      "\n",
      "##################\n",
      "Groupe d'age : 3\n",
      "number alive = 145\n",
      "number dead = 91\n",
      "total number = 236\n",
      "number smoker = 115\n",
      "number non smoker = 121\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>Smoker</th>\n",
       "      <th>No</th>\n",
       "      <th>Yes</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Status</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Alive</th>\n",
       "      <td>81</td>\n",
       "      <td>64</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Dead</th>\n",
       "      <td>40</td>\n",
       "      <td>51</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "Smoker  No  Yes\n",
       "Status         \n",
       "Alive   81   64\n",
       "Dead    40   51"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "ratio_smoker = 0.443478\n",
      "ratio_non_smoker = 0.330579\n",
      "##################\n",
      "\n",
      "##################\n",
      "Groupe d'age : 4\n",
      "number alive = 35\n",
      "number dead = 207\n",
      "total number = 242\n",
      "number smoker = 49\n",
      "number non smoker = 193\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>Smoker</th>\n",
       "      <th>No</th>\n",
       "      <th>Yes</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Status</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Alive</th>\n",
       "      <td>28</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Dead</th>\n",
       "      <td>165</td>\n",
       "      <td>42</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "Smoker   No  Yes\n",
       "Status          \n",
       "Alive    28    7\n",
       "Dead    165   42"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "ratio_smoker = 0.857143\n",
      "ratio_non_smoker = 0.854922\n",
      "##################\n",
      "\n"
     ]
    }
   ],
   "source": [
    "for age_group in ['1', '2', '3', '4']:\n",
    "    print('##################')\n",
    "    print(f'Groupe d\\'age : {age_group}')\n",
    "    tab_class = raw_data.loc[(raw_data.AgeGroup == age_group)]\n",
    "    dead = tab_class['Status'].value_counts()['Dead']\n",
    "    alive = tab_class['Status'].value_counts()['Alive']\n",
    "    print(f'number alive = {alive}')\n",
    "    print(f'number dead = {dead}')\n",
    "    print(f'total number = {alive + dead}')\n",
    "    smoker = tab_class['Smoker'].value_counts()['Yes']\n",
    "    non_smoker = tab_class['Smoker'].value_counts()['No']\n",
    "    print(f'number smoker = {smoker}')\n",
    "    print(f'number non smoker = {non_smoker}')\n",
    "    tab_class = pd.crosstab(tab_class.Status, tab_class.Smoker) # on a bien le bon nombre de vivants et de morts\n",
    "    display(tab_class)\n",
    "    numpy_data = np.array(tab_class)\n",
    "  \n",
    "    ratio_smoker = numpy_data[1,1]/smoker\n",
    "    ratio_non_smoker = numpy_data[1,0]/non_smoker\n",
    "    print(f'ratio_smoker = {ratio_smoker:.6f}')\n",
    "    print(f'ratio_non_smoker = {ratio_non_smoker:.6f}')\n",
    "    print('##################\\n')\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Analyse :** \n",
    "- Dans le groupe 1 : chez les plus jeunes (18-34 ans), le fait de fumer n'influe pas énormément sur le taux de mortalité. \n",
    "- Dans le groupe 2 : fumer tue, le taux de mortalité est presque 2 fois plus élévé chez les fumeurs. \n",
    "- Dans le groupe 3 : tout le monde meurt. Mais un peu plus souvent chez les fumeurs. \n",
    "- Dans le groupe 4 : c'est catastrophique (mais normal, ils sont vieux), tout le monde meurt. "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "C'est donc le nombre de membres de la catégorie 4 qui biaise les données globales : il y en a beaucoup, dont une grande partie de non-fumeurs, là où pour les autres catégories, la proportion de fumeurs et de non-fumeurs est presque équivalente. Puisque beaucoup de vieux meurent, la consommation de tabac ne semble pas faire varier le taux, ce qui biaise le ratio global."
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}