{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Première analyse rapide des données"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# import des packages importants\n",
    "import numpy as np \n",
    "import matplotlib.pyplot as plt \n",
    "import pandas as pd \n",
    "import seaborn as sns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "data_url = \"https://gitlab.inria.fr/learninglab/mooc-rr/mooc-rr-ressources/-/raw/master/module3/Practical_session/Subject6_smoking.csv?inline=false\" "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Smoker</th>\n",
       "      <th>Status</th>\n",
       "      <th>Age</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Yes</td>\n",
       "      <td>Alive</td>\n",
       "      <td>21.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Yes</td>\n",
       "      <td>Alive</td>\n",
       "      <td>19.3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>No</td>\n",
       "      <td>Dead</td>\n",
       "      <td>57.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>No</td>\n",
       "      <td>Alive</td>\n",
       "      <td>47.1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Yes</td>\n",
       "      <td>Alive</td>\n",
       "      <td>81.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>No</td>\n",
       "      <td>Alive</td>\n",
       "      <td>36.8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>No</td>\n",
       "      <td>Alive</td>\n",
       "      <td>23.8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>Yes</td>\n",
       "      <td>Dead</td>\n",
       "      <td>57.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>Yes</td>\n",
       "      <td>Alive</td>\n",
       "      <td>24.8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>Yes</td>\n",
       "      <td>Alive</td>\n",
       "      <td>49.5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  Smoker Status   Age\n",
       "0    Yes  Alive  21.0\n",
       "1    Yes  Alive  19.3\n",
       "2     No   Dead  57.5\n",
       "3     No  Alive  47.1\n",
       "4    Yes  Alive  81.4\n",
       "5     No  Alive  36.8\n",
       "6     No  Alive  23.8\n",
       "7    Yes   Dead  57.5\n",
       "8    Yes  Alive  24.8\n",
       "9    Yes  Alive  49.5"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "raw_data = pd.read_csv(data_url)\n",
    "raw_data[:10]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Smoker</th>\n",
       "      <th>Status</th>\n",
       "      <th>Age</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "Empty DataFrame\n",
       "Columns: [Smoker, Status, Age]\n",
       "Index: []"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "raw_data[raw_data.isnull().any(axis=1)] "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Donc ici pas de point manquant -> pas besoin de modifier les données."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "On compte les morts, les vivants, les fumeurs et non-fumeurs. Cela permet notamment de vérifier rapidement l'intégrité des données. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "number alive = 945\n",
      "number dead = 369\n",
      "total number = 1314\n",
      "number smoker = 582\n",
      "number non smoker = 732\n",
      "total number = 1314\n",
      "Number of data : 1314\n"
     ]
    }
   ],
   "source": [
    "dead = raw_data['Status'].value_counts()['Dead']\n",
    "alive = raw_data['Status'].value_counts()['Alive']\n",
    "print(f'number alive = {alive}')\n",
    "print(f'number dead = {dead}')\n",
    "print(f'total number = {alive + dead}')\n",
    "smoker = raw_data['Smoker'].value_counts()['Yes']\n",
    "non_smoker = raw_data['Smoker'].value_counts()['No']\n",
    "print(f'number smoker = {smoker}')\n",
    "print(f'number non smoker = {non_smoker}')\n",
    "print(f'total number = {smoker + non_smoker}')\n",
    "\n",
    "print(f'Number of data : {len(raw_data)}')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Question 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>Smoker</th>\n",
       "      <th>No</th>\n",
       "      <th>Yes</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Status</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Alive</th>\n",
       "      <td>502</td>\n",
       "      <td>443</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Dead</th>\n",
       "      <td>230</td>\n",
       "      <td>139</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "Smoker   No  Yes\n",
       "Status          \n",
       "Alive   502  443\n",
       "Dead    230  139"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tab = pd.crosstab(raw_data.Status, raw_data.Smoker) # on a bien le bon nombre de vivants et de morts\n",
    "tab"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[502 443]\n",
      " [230 139]]\n",
      "ratio_smoker = 0.238832\n",
      "ratio_non_smoker = 0.314208\n"
     ]
    }
   ],
   "source": [
    "numpy_data = np.array(tab)\n",
    "print(numpy_data)\n",
    "ratio_smoker = numpy_data[1,1]/smoker\n",
    "ratio_non_smoker = numpy_data[1,0]/non_smoker\n",
    "print(f'ratio_smoker = {ratio_smoker:.6f}')\n",
    "print(f'ratio_non_smoker = {ratio_non_smoker:.6f}')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "C'est dommage, il semblerait que les gens qui ne fument pas meurent plus que les gens qui fument... embêtant."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Question 2 "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Smoker</th>\n",
       "      <th>Status</th>\n",
       "      <th>Age</th>\n",
       "      <th>AgeGroup</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Yes</td>\n",
       "      <td>Alive</td>\n",
       "      <td>21.0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Yes</td>\n",
       "      <td>Alive</td>\n",
       "      <td>19.3</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>No</td>\n",
       "      <td>Dead</td>\n",
       "      <td>57.5</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>No</td>\n",
       "      <td>Alive</td>\n",
       "      <td>47.1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Yes</td>\n",
       "      <td>Alive</td>\n",
       "      <td>81.4</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>No</td>\n",
       "      <td>Alive</td>\n",
       "      <td>36.8</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>No</td>\n",
       "      <td>Alive</td>\n",
       "      <td>23.8</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>Yes</td>\n",
       "      <td>Dead</td>\n",
       "      <td>57.5</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>Yes</td>\n",
       "      <td>Alive</td>\n",
       "      <td>24.8</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>Yes</td>\n",
       "      <td>Alive</td>\n",
       "      <td>49.5</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  Smoker Status   Age AgeGroup\n",
       "0    Yes  Alive  21.0        1\n",
       "1    Yes  Alive  19.3        1\n",
       "2     No   Dead  57.5        3\n",
       "3     No  Alive  47.1        2\n",
       "4    Yes  Alive  81.4        4\n",
       "5     No  Alive  36.8        2\n",
       "6     No  Alive  23.8        1\n",
       "7    Yes   Dead  57.5        3\n",
       "8    Yes  Alive  24.8        1\n",
       "9    Yes  Alive  49.5        2"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "raw_data.loc[((raw_data.Age < 34) & (raw_data.Age >= 18) ),  'AgeGroup'] = '1'\n",
    "raw_data.loc[((raw_data.Age < 55) & (raw_data.Age >= 34) ),  'AgeGroup'] = '2'\n",
    "raw_data.loc[((raw_data.Age < 65) & (raw_data.Age >= 55) ),  'AgeGroup'] = '3'\n",
    "raw_data.loc[(raw_data.Age >= 65),  'AgeGroup'] = '4'\n",
    "\n",
    "raw_data[:10]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "##################\n",
      "Groupe d'age : 1\n",
      "number alive = 387\n",
      "number dead = 11\n",
      "total number = 398\n",
      "number smoker = 179\n",
      "number non smoker = 219\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>Smoker</th>\n",
       "      <th>No</th>\n",
       "      <th>Yes</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Status</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Alive</th>\n",
       "      <td>213</td>\n",
       "      <td>174</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Dead</th>\n",
       "      <td>6</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "Smoker   No  Yes\n",
       "Status          \n",
       "Alive   213  174\n",
       "Dead      6    5"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "ratio_smoker = 0.027933\n",
      "ratio_non_smoker = 0.027397\n",
      "##################\n",
      "\n",
      "##################\n",
      "Groupe d'age : 2\n",
      "number alive = 378\n",
      "number dead = 60\n",
      "total number = 438\n",
      "number smoker = 239\n",
      "number non smoker = 199\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>Smoker</th>\n",
       "      <th>No</th>\n",
       "      <th>Yes</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Status</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Alive</th>\n",
       "      <td>180</td>\n",
       "      <td>198</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Dead</th>\n",
       "      <td>19</td>\n",
       "      <td>41</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "Smoker   No  Yes\n",
       "Status          \n",
       "Alive   180  198\n",
       "Dead     19   41"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "ratio_smoker = 0.171548\n",
      "ratio_non_smoker = 0.095477\n",
      "##################\n",
      "\n",
      "##################\n",
      "Groupe d'age : 3\n",
      "number alive = 145\n",
      "number dead = 91\n",
      "total number = 236\n",
      "number smoker = 115\n",
      "number non smoker = 121\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>Smoker</th>\n",
       "      <th>No</th>\n",
       "      <th>Yes</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Status</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Alive</th>\n",
       "      <td>81</td>\n",
       "      <td>64</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Dead</th>\n",
       "      <td>40</td>\n",
       "      <td>51</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "Smoker  No  Yes\n",
       "Status         \n",
       "Alive   81   64\n",
       "Dead    40   51"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "ratio_smoker = 0.443478\n",
      "ratio_non_smoker = 0.330579\n",
      "##################\n",
      "\n",
      "##################\n",
      "Groupe d'age : 4\n",
      "number alive = 35\n",
      "number dead = 207\n",
      "total number = 242\n",
      "number smoker = 49\n",
      "number non smoker = 193\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>Smoker</th>\n",
       "      <th>No</th>\n",
       "      <th>Yes</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Status</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Alive</th>\n",
       "      <td>28</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Dead</th>\n",
       "      <td>165</td>\n",
       "      <td>42</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "Smoker   No  Yes\n",
       "Status          \n",
       "Alive    28    7\n",
       "Dead    165   42"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "ratio_smoker = 0.857143\n",
      "ratio_non_smoker = 0.854922\n",
      "##################\n",
      "\n"
     ]
    }
   ],
   "source": [
    "for age_group in ['1', '2', '3', '4']:\n",
    "    print('##################')\n",
    "    print(f'Groupe d\\'age : {age_group}')\n",
    "    tab_class = raw_data.loc[(raw_data.AgeGroup == age_group)]\n",
    "    dead = tab_class['Status'].value_counts()['Dead']\n",
    "    alive = tab_class['Status'].value_counts()['Alive']\n",
    "    print(f'number alive = {alive}')\n",
    "    print(f'number dead = {dead}')\n",
    "    print(f'total number = {alive + dead}')\n",
    "    smoker = tab_class['Smoker'].value_counts()['Yes']\n",
    "    non_smoker = tab_class['Smoker'].value_counts()['No']\n",
    "    print(f'number smoker = {smoker}')\n",
    "    print(f'number non smoker = {non_smoker}')\n",
    "    tab_class = pd.crosstab(tab_class.Status, tab_class.Smoker) # on a bien le bon nombre de vivants et de morts\n",
    "    display(tab_class)\n",
    "    numpy_data = np.array(tab_class)\n",
    "  \n",
    "    ratio_smoker = numpy_data[1,1]/smoker\n",
    "    ratio_non_smoker = numpy_data[1,0]/non_smoker\n",
    "    print(f'ratio_smoker = {ratio_smoker:.6f}')\n",
    "    print(f'ratio_non_smoker = {ratio_non_smoker:.6f}')\n",
    "    print('##################\\n')\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Analyse :** \n",
    "- Dans le groupe 1 : chez les plus jeunes (18-34 ans), le fait de fumer n'influe pas énormément sur le taux de mortalité. \n",
    "- Dans le groupe 2 : fumer tue, le taux de mortalité est presque 2 fois plus élévé chez les fumeurs. \n",
    "- Dans le groupe 3 : ici, le tabac semble augmenter un peu le taux de mortalité. \n",
    "- Dans le groupe 4 : c'est catastrophique, tout le monde meurt. Les fumeurs ont le même taux de mortalité que les non-fumeurs, ce qui est normal puisque c'est la catégorie où l'âge est le plus élevé. "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "C'est donc la catégorie 4 qui semble biaiser les données globales."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Question 3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "raw_data.loc[(raw_data.Status =='Dead'),'Death'] = 1\n",
    "raw_data.loc[(raw_data.Status =='Alive'),'Death'] = 0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Smoker</th>\n",
       "      <th>Status</th>\n",
       "      <th>Age</th>\n",
       "      <th>AgeGroup</th>\n",
       "      <th>Death</th>\n",
       "      <th>Intercept</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Yes</td>\n",
       "      <td>Alive</td>\n",
       "      <td>21.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Yes</td>\n",
       "      <td>Alive</td>\n",
       "      <td>19.3</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>No</td>\n",
       "      <td>Dead</td>\n",
       "      <td>57.5</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>No</td>\n",
       "      <td>Alive</td>\n",
       "      <td>47.1</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Yes</td>\n",
       "      <td>Alive</td>\n",
       "      <td>81.4</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>No</td>\n",
       "      <td>Alive</td>\n",
       "      <td>36.8</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>No</td>\n",
       "      <td>Alive</td>\n",
       "      <td>23.8</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>Yes</td>\n",
       "      <td>Dead</td>\n",
       "      <td>57.5</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>Yes</td>\n",
       "      <td>Alive</td>\n",
       "      <td>24.8</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>Yes</td>\n",
       "      <td>Alive</td>\n",
       "      <td>49.5</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  Smoker Status   Age AgeGroup  Death  Intercept\n",
       "0    Yes  Alive  21.0        1      0          1\n",
       "1    Yes  Alive  19.3        1      0          1\n",
       "2     No   Dead  57.5        3      1          1\n",
       "3     No  Alive  47.1        2      0          1\n",
       "4    Yes  Alive  81.4        4      0          1\n",
       "5     No  Alive  36.8        2      0          1\n",
       "6     No  Alive  23.8        1      0          1\n",
       "7    Yes   Dead  57.5        3      1          1\n",
       "8    Yes  Alive  24.8        1      0          1\n",
       "9    Yes  Alive  49.5        2      0          1"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "display(raw_data[:10])"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}