diff --git a/module3/exo3/exercice_fr.ipynb b/module3/exo3/exercice_fr.ipynb
index 0bbbe371b01e359e381e43239412d77bf53fb1fb..872294e192291c0cc3421df630894c9668266314 100644
--- a/module3/exo3/exercice_fr.ipynb
+++ b/module3/exo3/exercice_fr.ipynb
@@ -1,5 +1,1057 @@
{
- "cells": [],
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "import matplotlib.pyplot as plt\n",
+ "import seaborn as sns\n",
+ "import statsmodels.api as sm\n",
+ "import statsmodels.formula.api as smf"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "pd.set_option('display.precision', 3)\n",
+ "sns.set(style=\"whitegrid\", palette=\"pastel\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "url = \"https://gitlab.inria.fr/learninglab/mooc-rr/mooc-rr-ressources/-/raw/master/module3/Practical_session/Subject6_smoking.csv\"\n",
+ "data = pd.read_csv(url)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Aperçu du jeu de données :\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Smoker | \n",
+ " Status | \n",
+ " Age | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 21.0 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 19.3 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " No | \n",
+ " Dead | \n",
+ " 57.5 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " No | \n",
+ " Alive | \n",
+ " 47.1 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " 81.4 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Smoker Status Age\n",
+ "0 Yes Alive 21.0\n",
+ "1 Yes Alive 19.3\n",
+ "2 No Dead 57.5\n",
+ "3 No Alive 47.1\n",
+ "4 Yes Alive 81.4"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "print(\"Aperçu du jeu de données :\")\n",
+ "display(data.head())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "Informations sur les variables :\n",
+ "\n",
+ "RangeIndex: 1314 entries, 0 to 1313\n",
+ "Data columns (total 3 columns):\n",
+ "Smoker 1314 non-null object\n",
+ "Status 1314 non-null object\n",
+ "Age 1314 non-null float64\n",
+ "dtypes: float64(1), object(2)\n",
+ "memory usage: 30.9+ KB\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "None"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "print(\"\\nInformations sur les variables :\")\n",
+ "display(data.info())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "Valeurs uniques dans chaque variable :\n",
+ "Smoker: ['Yes' 'No']\n",
+ "Status: ['Alive' 'Dead']\n",
+ "Age: [21. 19.3 57.5 47.1 81.4 36.8 23.8 24.8 49.5 30. 66. 49.2 58.4 60.6\n",
+ " 25.1 43.5 27.1 58.3 65.7 73.2 38.3 33.4 62.3 18. 56.2 59.2 25.8 36.9\n",
+ " 20.2 34.6 51.9 49.9 19.4 56.9 46.7 44.4 29.5 33. 35.6 39.1 69.7 35.7\n",
+ " 75.8 25.3 83. 44.3 18.5 37.5 22.1 82.8 45. 73.3 39. 28.4 73.7 40.1\n",
+ " 51.2 22.9 41.9 58.1 37.3 41.7 36.3 80.7 38.6 27.9 47.6 77.6 26.2 45.4\n",
+ " 62.4 62.5 39.5 27.6 31.4 85. 18.9 35.3 25.4 72.8 27.3 55.9 32.8 53.6\n",
+ " 48. 56.1 18.3 62.8 18.6 46.3 36. 55.5 76.5 61. 26.8 70.5 81.8 32.5\n",
+ " 23. 83.7 45.9 59.9 66.5 47.5 89.3 57.2 21.3 34. 59.5 50.1 30.6 63.8\n",
+ " 27.4 22.5 24.2 56.8 28.9 87.8 88.4 33.7 63.6 71.7 48.5 82. 40.8 31.3\n",
+ " 24.4 32.2 53.1 26.3 41. 86.8 49.7 50.5 63.5 33.1 30.7 59.4 67.2 20.7\n",
+ " 41.6 37.7 89.7 45.2 55.4 44.8 76.7 48.4 82.7 27. 30.9 82.9 40.5 26.5\n",
+ " 29.9 75. 66.3 87. 79.5 51.7 78.2 80. 62.9 78.3 49.8 36.5 60.1 62.\n",
+ " 19. 87.6 24.3 22.2 68.4 89.2 44.5 43.3 57.4 45.6 18.1 63.4 67. 55.6\n",
+ " 23.3 57.6 38.4 35.2 60.3 48.7 23.7 46.9 65.8 83.1 23.2 66.7 58.8 56.7\n",
+ " 21.5 51.5 57.8 47.8 53.9 45.5 63.9 20.6 46.1 49.6 25.9 46.8 81. 84.3\n",
+ " 30.8 52.4 20.1 58.9 72.1 19.6 52.6 35. 35.4 55.1 49.1 39.7 66.4 58.6\n",
+ " 36.2 38.8 47.9 55.3 87.7 74.1 59.3 39.8 55. 42.8 34.2 81.7 61.3 58.5\n",
+ " 52.9 38.5 38.7 49.3 31.9 57.7 21.1 22.7 19.7 67.6 37. 79.9 56.3 31.1\n",
+ " 40.9 24.5 34.3 20.5 29. 43.6 42.3 63.2 53.2 53.7 62.7 39.3 47. 35.8\n",
+ " 49.4 51.3 85.2 25.2 79.4 31.6 74.6 81.3 71.4 56.4 59.1 61.8 36.1 33.5\n",
+ " 24.1 72.5 58.7 40.7 75.6 34.7 22. 61.2 37.2 25.7 88.8 65.6 58. 20.4\n",
+ " 46.2 29.7 43.7 61.1 78. 63. 82.3 36.7 67.5 86.2 79.1 75.1 52. 40.3\n",
+ " 75.9 35.5 76.2 21.7 50.6 42.2 33.6 61.6 23.6 57.1 21.8 43.2 26.6 45.7\n",
+ " 73.9 80.8 52.8 80.5 59. 42.5 76.9 33.3 80.2 30.5 19.8 84.5 56. 50.3\n",
+ " 60.7 32.9 86.9 41.5 45.3 77.2 69.4 49. 44.7 27.7 70.7 38. 32.3 24.9\n",
+ " 63.1 35.9 24. 88.5 82.4 87.4 69.5 59.6 56.6 34.5 48.3 88.7 25.5 21.2\n",
+ " 35.1 87.9 76.1 53.3 82.6 86.3 88.1 71. 62.1 52.2 25.6 75.3 77.5 75.2\n",
+ " 83.9 53. 50.9 29.8 50.7 66.1 27.2 38.1 66.8 55.2 51.6 41.4 65.4 67.7\n",
+ " 37.8 23.9 34.8 28.2 79.3 51.8 23.4 56.5 83.5 43.8 47.2 23.5 68.5 43.4\n",
+ " 19.5 62.2 19.2 61.9 30.2 29.4 29.6 29.3 40. 81.6 42.7 40.4 85.7 55.8\n",
+ " 28.8 31.8 78.4 43. 88.3 52.7 81.9 71.8 46.6 57.9 22.6 60.2 39.6 42.6\n",
+ " 72.6 44.1 26. 47.3 24.6 85.5 42.1 67.4 21.4 74. 42. 68.1 78.7 31.\n",
+ " 66.6 52.1 30.4 23.1 46.5 48.1 32.4 45.1 41.8 84.9 50.2 32.7 59.8 22.3\n",
+ " 47.7 18.7 30.3 43.1 59.7 18.8 27.5 86. 85.8 28.7 61.4 19.9 32.6 82.5\n",
+ " 76.8 26.4 50.8 69.6 21.9 33.9 77.1 37.1 83.4 24.7 38.2 77.8 42.4 28.5\n",
+ " 71.5 44.9 33.8 69. 60.5 44. 36.6 84.4 47.4 20.3 44.6 84.8 26.9 77.4\n",
+ " 41.3 53.4 82.2 64. 25. 19.1 52.3 51.1 73.8 65.3 28.3 74.8 51. 32.1\n",
+ " 55.7 58.2 60. 84.7 85.1 65.2 83.6 38.9 40.6 48.6 78.1 71.3 61.5 62.6\n",
+ " 53.8 41.1 42.9 29.1 80.9 31.5 33.2 81.1 88.6 48.9 89.9 30.1 63.3 65.\n",
+ " 89.5 39.2 28. 71.1 88. 34.9 83.8 86.7 71.6 52.5 34.1 20. 83.3 67.8\n",
+ " 28.1 79. 65.1 46. 40.2 48.8 27.8 26.7 74.4 41.2 20.9 34.4 60.8 31.2\n",
+ " 60.9]\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(\"\\nValeurs uniques dans chaque variable :\")\n",
+ "for col in data.columns:\n",
+ " print(f\"{col}: {data[col].unique()}\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "metadata": {},
+ "outputs": [
+ {
+ "ename": "KeyError",
+ "evalue": "'smoker'",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
+ "\u001b[0;32m/opt/conda/lib/python3.6/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mget_loc\u001b[0;34m(self, key, method, tolerance)\u001b[0m\n\u001b[1;32m 2524\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2525\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2526\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+ "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
+ "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
+ "\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n",
+ "\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n",
+ "\u001b[0;31mKeyError\u001b[0m: 'smoker'",
+ "\nDuring handling of the above exception, another exception occurred:\n",
+ "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
+ "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mtable_smoking\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcrosstab\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'smoker'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'outcome'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Tableau de contingence (nombre de femmes vivantes et décédées) :\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mdisplay\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtable_smoking\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+ "\u001b[0;32m/opt/conda/lib/python3.6/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 2137\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_getitem_multilevel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2138\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2139\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_getitem_column\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2140\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2141\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_getitem_column\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+ "\u001b[0;32m/opt/conda/lib/python3.6/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36m_getitem_column\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 2144\u001b[0m \u001b[0;31m# get column\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2145\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mis_unique\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2146\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_item_cache\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2147\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2148\u001b[0m \u001b[0;31m# duplicate columns & possible reduce dimensionality\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+ "\u001b[0;32m/opt/conda/lib/python3.6/site-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36m_get_item_cache\u001b[0;34m(self, item)\u001b[0m\n\u001b[1;32m 1840\u001b[0m \u001b[0mres\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcache\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mitem\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1841\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mres\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1842\u001b[0;31m \u001b[0mvalues\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_data\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mitem\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1843\u001b[0m \u001b[0mres\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_box_item_values\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mitem\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1844\u001b[0m \u001b[0mcache\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mitem\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mres\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+ "\u001b[0;32m/opt/conda/lib/python3.6/site-packages/pandas/core/internals.py\u001b[0m in \u001b[0;36mget\u001b[0;34m(self, item, fastpath)\u001b[0m\n\u001b[1;32m 3841\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3842\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0misna\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mitem\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3843\u001b[0;31m \u001b[0mloc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mitem\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3844\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3845\u001b[0m \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0misna\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+ "\u001b[0;32m/opt/conda/lib/python3.6/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mget_loc\u001b[0;34m(self, key, method, tolerance)\u001b[0m\n\u001b[1;32m 2525\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2526\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2527\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_maybe_cast_indexer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2528\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2529\u001b[0m \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_indexer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmethod\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtolerance\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtolerance\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+ "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
+ "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
+ "\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n",
+ "\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n",
+ "\u001b[0;31mKeyError\u001b[0m: 'smoker'"
+ ]
+ }
+ ],
+ "source": [
+ "table_smoking = pd.crosstab(data['smoker'], data['outcome'])\n",
+ "print(\"Tableau de contingence (nombre de femmes vivantes et décédées) :\")\n",
+ "display(table_smoking)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Colonnes du DataFrame : ['Smoker', 'Status', 'Age']\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(\"Colonnes du DataFrame :\", list(data.columns))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "candidates_smoker = [c for c in data.columns if any(k in c.lower() for k in ['smok', 'tabag', 'cigs', 'cig'])]\n",
+ "candidates_outcome = [c for c in data.columns if any(k in c.lower() for k in ['outcome', 'status', 'dead', 'alive', 'death'])]\n",
+ "candidates_age = [c for c in data.columns if 'age' in c.lower()]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 31,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "Candidats possibles pour 'smoker' : ['Smoker']\n",
+ "Candidats possibles pour 'outcome' : ['Status']\n",
+ "Candidats possibles pour 'age' : ['Age']\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(\"\\nCandidats possibles pour 'smoker' :\", candidates_smoker)\n",
+ "print(\"Candidats possibles pour 'outcome' :\", candidates_outcome)\n",
+ "print(\"Candidats possibles pour 'age' :\", candidates_age)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 33,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "Utilisation de : smoker_col = 'Smoker', outcome_col = 'Status', age_col = 'Age'\n"
+ ]
+ }
+ ],
+ "source": [
+ "if len(candidates_smoker) == 0 or len(candidates_outcome) == 0 or len(candidates_age) == 0:\n",
+ " raise ValueError(\"Impossible de détecter automatiquement les colonnes smoker/outcome/age. \"\n",
+ " \"Regardez la liste des colonnes ci-dessus et remplacez manuellement les noms dans le code.\")\n",
+ "smoker_col = candidates_smoker[0]\n",
+ "outcome_col = candidates_outcome[0]\n",
+ "age_col = candidates_age[0]\n",
+ "\n",
+ "print(f\"\\nUtilisation de : smoker_col = '{smoker_col}', outcome_col = '{outcome_col}', age_col = '{age_col}'\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 34,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "Valeurs uniques (extrait) :\n",
+ " - Smoker: ['Yes' 'No']\n",
+ " - Status: ['Alive' 'Dead']\n",
+ " - Age: [21. 19.3 57.5 47.1 81.4 36.8 23.8 24.8 49.5 30. 66. 49.2 58.4 60.6\n",
+ " 25.1 43.5 27.1 58.3 65.7 73.2]\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(\"\\nValeurs uniques (extrait) :\")\n",
+ "for col in [smoker_col, outcome_col, age_col]:\n",
+ " try:\n",
+ " print(f\" - {col}: {pd.unique(data[col])[:20]}\")\n",
+ " except Exception as e:\n",
+ " print(f\" - {col}: erreur lors de l'accès -> {e}\")\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 35,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def map_smoker(val):\n",
+ " if pd.isna(val): \n",
+ " return np.nan\n",
+ " s = str(val).strip().lower()\n",
+ " # cas fréquents\n",
+ " if s in ('yes','y','current','smoker','smokes','smoking','yes (current)'):\n",
+ " return 'Yes'\n",
+ " if s in ('no','n','never','non','never smoked','never smoked'):\n",
+ " return 'No'\n",
+ " # si la modalité contient 'never' ou 'no'\n",
+ " if 'never' in s or 'no' in s:\n",
+ " return 'No'\n",
+ " if 'current' in s or 'smok' in s or 'yes' in s:\n",
+ " return 'Yes'\n",
+ " return s.capitalize()\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 36,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def map_outcome(val):\n",
+ " if pd.isna(val):\n",
+ " return np.nan\n",
+ " s = str(val).strip().lower()\n",
+ " if any(k in s for k in ['dead','died','death','deceased']):\n",
+ " return 'Dead'\n",
+ " if any(k in s for k in ['alive','living','alive']):\n",
+ " return 'Alive'\n",
+ " # parfois on a 0/1\n",
+ " if s in ('1', '0'):\n",
+ " return 'Dead' if s == '1' else 'Alive'\n",
+ " return s.capitalize()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 37,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "data['smoker_std'] = data[smoker_col].apply(map_smoker)\n",
+ "data['outcome_std'] = data[outcome_col].apply(map_outcome)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 38,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "data['age_num'] = pd.to_numeric(data[age_col], errors='coerce')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 39,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "Après standardisation — exemples :\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Smoker | \n",
+ " smoker_std | \n",
+ " Status | \n",
+ " outcome_std | \n",
+ " Age | \n",
+ " age_num | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " Yes | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " Alive | \n",
+ " 21.0 | \n",
+ " 21.0 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " Yes | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " Alive | \n",
+ " 19.3 | \n",
+ " 19.3 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " No | \n",
+ " No | \n",
+ " Dead | \n",
+ " Dead | \n",
+ " 57.5 | \n",
+ " 57.5 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " No | \n",
+ " No | \n",
+ " Alive | \n",
+ " Alive | \n",
+ " 47.1 | \n",
+ " 47.1 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " Yes | \n",
+ " Yes | \n",
+ " Alive | \n",
+ " Alive | \n",
+ " 81.4 | \n",
+ " 81.4 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Smoker smoker_std Status outcome_std Age age_num\n",
+ "0 Yes Yes Alive Alive 21.0 21.0\n",
+ "1 Yes Yes Alive Alive 19.3 19.3\n",
+ "2 No No Dead Dead 57.5 57.5\n",
+ "3 No No Alive Alive 47.1 47.1\n",
+ "4 Yes Yes Alive Alive 81.4 81.4"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "print(\"\\nAprès standardisation — exemples :\")\n",
+ "display(data[[smoker_col, 'smoker_std', outcome_col, 'outcome_std', age_col, 'age_num']].head())\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 40,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "Comptes des valeurs manquantes :\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "smoker_std 0\n",
+ "outcome_std 0\n",
+ "age_num 0\n",
+ "dtype: int64"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "print(\"\\nComptes des valeurs manquantes :\")\n",
+ "display(data[['smoker_std','outcome_std','age_num']].isna().sum())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 41,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "Taille après filtrage (Yes/No, Alive/Dead, age notna) : 1314 lignes\n"
+ ]
+ }
+ ],
+ "source": [
+ "data_filtered = data[data['smoker_std'].isin(['Yes','No']) & data['outcome_std'].isin(['Alive','Dead']) & data['age_num'].notna()].copy()\n",
+ "print(f\"\\nTaille après filtrage (Yes/No, Alive/Dead, age notna) : {len(data_filtered)} lignes\")\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 42,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "Tableau de contingence (nombre de femmes vivantes et décédées) :\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | outcome_std | \n",
+ " Alive | \n",
+ " Dead | \n",
+ " total | \n",
+ " taux_mortalite | \n",
+ "
\n",
+ " \n",
+ " | smoker_std | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | No | \n",
+ " 502 | \n",
+ " 230 | \n",
+ " 732 | \n",
+ " 0.314 | \n",
+ "
\n",
+ " \n",
+ " | Yes | \n",
+ " 443 | \n",
+ " 139 | \n",
+ " 582 | \n",
+ " 0.239 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ "outcome_std Alive Dead total taux_mortalite\n",
+ "smoker_std \n",
+ "No 502 230 732 0.314\n",
+ "Yes 443 139 582 0.239"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "table_smoking = pd.crosstab(data_filtered['smoker_std'], data_filtered['outcome_std'])\n",
+ "table_smoking['total'] = table_smoking.sum(axis=1)\n",
+ "table_smoking['taux_mortalite'] = table_smoking.get('Dead', 0) / table_smoking['total']\n",
+ "print(\"\\nTableau de contingence (nombre de femmes vivantes et décédées) :\")\n",
+ "display(table_smoking)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 43,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "plt.figure(figsize=(6,4))\n",
+ "sns.barplot(x=table_smoking.index, y=table_smoking['taux_mortalite'].values)\n",
+ "plt.title(\"Taux de mortalité selon le tabagisme (standardisé)\")\n",
+ "plt.ylabel(\"Taux de mortalité\")\n",
+ "plt.xlabel(\"Statut tabagique\")\n",
+ "plt.ylim(0, table_smoking['taux_mortalite'].max()*1.15)\n",
+ "plt.show()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 44,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "bins = [18, 34, 54, 64, 120]\n",
+ "labels = ['18-34', '35-54', '55-64', '65+']\n",
+ "data_filtered['age_class'] = pd.cut(data_filtered['age_num'], bins=bins, labels=labels, right=True)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 45,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "Tableau par classe d'âge et tabagisme :\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " outcome_std | \n",
+ " Alive | \n",
+ " Dead | \n",
+ " total | \n",
+ " taux_mortalite | \n",
+ "
\n",
+ " \n",
+ " | age_class | \n",
+ " smoker_std | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 18-34 | \n",
+ " No | \n",
+ " 212 | \n",
+ " 6 | \n",
+ " 218 | \n",
+ " 0.028 | \n",
+ "
\n",
+ " \n",
+ " | Yes | \n",
+ " 172 | \n",
+ " 5 | \n",
+ " 177 | \n",
+ " 0.028 | \n",
+ "
\n",
+ " \n",
+ " | 35-54 | \n",
+ " No | \n",
+ " 180 | \n",
+ " 19 | \n",
+ " 199 | \n",
+ " 0.095 | \n",
+ "
\n",
+ " \n",
+ " | Yes | \n",
+ " 196 | \n",
+ " 41 | \n",
+ " 237 | \n",
+ " 0.173 | \n",
+ "
\n",
+ " \n",
+ " | 55-64 | \n",
+ " No | \n",
+ " 81 | \n",
+ " 40 | \n",
+ " 121 | \n",
+ " 0.331 | \n",
+ "
\n",
+ " \n",
+ " | Yes | \n",
+ " 64 | \n",
+ " 51 | \n",
+ " 115 | \n",
+ " 0.443 | \n",
+ "
\n",
+ " \n",
+ " | 65+ | \n",
+ " No | \n",
+ " 28 | \n",
+ " 165 | \n",
+ " 193 | \n",
+ " 0.855 | \n",
+ "
\n",
+ " \n",
+ " | Yes | \n",
+ " 7 | \n",
+ " 42 | \n",
+ " 49 | \n",
+ " 0.857 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ "outcome_std Alive Dead total taux_mortalite\n",
+ "age_class smoker_std \n",
+ "18-34 No 212 6 218 0.028\n",
+ " Yes 172 5 177 0.028\n",
+ "35-54 No 180 19 199 0.095\n",
+ " Yes 196 41 237 0.173\n",
+ "55-64 No 81 40 121 0.331\n",
+ " Yes 64 51 115 0.443\n",
+ "65+ No 28 165 193 0.855\n",
+ " Yes 7 42 49 0.857"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "table_age = data_filtered.groupby(['age_class','smoker_std','outcome_std']).size().unstack(fill_value=0)\n",
+ "table_age['total'] = table_age.sum(axis=1)\n",
+ "table_age['taux_mortalite'] = table_age.get('Dead', 0) / table_age['total']\n",
+ "print(\"\\nTableau par classe d'âge et tabagisme :\")\n",
+ "display(table_age)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 46,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 46,
+ "metadata": {},
+ "output_type": "execute_result"
+ },
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "plt.figure(figsize=(8,5))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 47,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/opt/conda/lib/python3.6/site-packages/scipy/stats/stats.py:1713: FutureWarning: Using a non-tuple sequence for multidimensional indexing is deprecated; use `arr[tuple(seq)]` instead of `arr[seq]`. In the future this will be interpreted as an array index, `arr[np.array(seq)]`, which will result either in an error or a different result.\n",
+ " return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval\n"
+ ]
+ },
+ {
+ "data": {
+ "image/png": "\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "data_filtered['dead_bin'] = (data_filtered['outcome_std']=='Dead').astype(int)\n",
+ "sns.barplot(data=data_filtered, x='age_class', y='dead_bin', hue='smoker_std', estimator=np.mean)\n",
+ "plt.title(\"Taux de mortalité par tranche d'âge et tabagisme\")\n",
+ "plt.ylabel(\"Taux de mortalité\")\n",
+ "plt.xlabel(\"Classe d'âge\")\n",
+ "plt.ylim(0, None)\n",
+ "plt.show()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 48,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import statsmodels.formula.api as smf\n",
+ "data_filtered['Death'] = data_filtered['dead_bin']"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 49,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "Résumé du modèle : Death ~ age\n",
+ " Logit Regression Results \n",
+ "==============================================================================\n",
+ "Dep. Variable: Death No. Observations: 1314\n",
+ "Model: Logit Df Residuals: 1312\n",
+ "Method: MLE Df Model: 1\n",
+ "Date: Mon, 10 Nov 2025 Pseudo R-squ.: 0.3560\n",
+ "Time: 09:38:47 Log-Likelihood: -502.39\n",
+ "converged: True LL-Null: -780.16\n",
+ " LLR p-value: 7.883e-123\n",
+ "==============================================================================\n",
+ " coef std err z P>|z| [0.025 0.975]\n",
+ "------------------------------------------------------------------------------\n",
+ "Intercept -6.1045 0.321 -18.992 0.000 -6.735 -5.475\n",
+ "age_num 0.0977 0.006 17.578 0.000 0.087 0.109\n",
+ "==============================================================================\n"
+ ]
+ }
+ ],
+ "source": [
+ "model_age = smf.logit(\"Death ~ age_num\", data=data_filtered).fit(disp=False)\n",
+ "print(\"\\nRésumé du modèle : Death ~ age\")\n",
+ "print(model_age.summary())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 50,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "Résumé du modèle : Death ~ age + tabagisme (C(smoker_std))\n",
+ " Logit Regression Results \n",
+ "==============================================================================\n",
+ "Dep. Variable: Death No. Observations: 1314\n",
+ "Model: Logit Df Residuals: 1311\n",
+ "Method: MLE Df Model: 2\n",
+ "Date: Mon, 10 Nov 2025 Pseudo R-squ.: 0.3579\n",
+ "Time: 09:38:54 Log-Likelihood: -500.95\n",
+ "converged: True LL-Null: -780.16\n",
+ " LLR p-value: 5.534e-122\n",
+ "========================================================================================\n",
+ " coef std err z P>|z| [0.025 0.975]\n",
+ "----------------------------------------------------------------------------------------\n",
+ "Intercept -6.3519 0.360 -17.637 0.000 -7.058 -5.646\n",
+ "C(smoker_std)[T.Yes] 0.2787 0.165 1.689 0.091 -0.045 0.602\n",
+ "age_num 0.0998 0.006 17.290 0.000 0.089 0.111\n",
+ "========================================================================================\n"
+ ]
+ }
+ ],
+ "source": [
+ "model_age_smoke = smf.logit(\"Death ~ age_num + C(smoker_std)\", data=data_filtered).fit(disp=False)\n",
+ "print(\"\\nRésumé du modèle : Death ~ age + tabagisme (C(smoker_std))\")\n",
+ "print(model_age_smoke.summary())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 53,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "Interprétation rapide :\n",
+ "- Vérifiez le tableau de contingence : si le taux global semble plus bas chez les fumeuses,\n",
+ " c'est probablement parce que les fumeuses sont en moyenne plus jeunes (paradoxe de Simpson).\n",
+ "- Après ajustement sur l'âge (régression logistique), le tabagisme montre en général un effet de risque positif.\n"
+ ]
+ }
+ ],
+ "source": [
+ "age_range = np.linspace(data_filtered['age_num'].min(), data_filtered['age_num'].max(), 100)\n",
+ "pred_df_no = pd.DataFrame({'age_num': age_range, 'smoker_std':'No'})\n",
+ "pred_df_yes = pd.DataFrame({'age_num': age_range, 'smoker_std':'Yes'})\n",
+ "\n",
+ "pred_no = model_age_smoke.predict(pred_df_no)\n",
+ "pred_yes = model_age_smoke.predict(pred_df_yes)\n",
+ "\n",
+ "plt.figure(figsize=(8,5))\n",
+ "plt.plot(age_range, pred_no, label='Non fumeuses', linestyle='--')\n",
+ "plt.plot(age_range, pred_yes, label='Fumeuses', linestyle='-')\n",
+ "plt.title(\"Probabilité prédite de décès à 20 ans selon l'âge et le tabagisme\")\n",
+ "plt.xlabel(\"Âge au début de l'étude\")\n",
+ "plt.ylabel(\"Probabilité prédite de décès\")\n",
+ "plt.legend()\n",
+ "plt.show()\n",
+ "\n",
+ "print(\"\\nInterprétation rapide :\")\n",
+ "print(\"- Vérifiez le tableau de contingence : si le taux global semble plus bas chez les fumeuses,\")\n",
+ "print(\" c'est probablement parce que les fumeuses sont en moyenne plus jeunes (paradoxe de Simpson).\")\n",
+ "print(\"- Après ajustement sur l'âge (régression logistique), le tabagisme montre en général un effet de risque positif.\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
@@ -16,10 +1068,9 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.6.3"
+ "version": "3.6.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
-