FInito

parent df81b418
...@@ -31,13 +31,16 @@ ...@@ -31,13 +31,16 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 10, "execution_count": 1,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"%matplotlib inline\n", "%matplotlib inline\n",
"import matplotlib.pyplot as plt\n", "import matplotlib.pyplot as plt\n",
"import pandas as pd\n" "import pandas as pd\n",
"from scipy import stats\n",
"import numpy as np\n",
"import seaborn as sns\n"
] ]
}, },
{ {
...@@ -49,7 +52,7 @@ ...@@ -49,7 +52,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 19, "execution_count": 2,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
...@@ -58,7 +61,7 @@ ...@@ -58,7 +61,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 20, "execution_count": 3,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
...@@ -526,7 +529,7 @@ ...@@ -526,7 +529,7 @@
"[1314 rows x 3 columns]" "[1314 rows x 3 columns]"
] ]
}, },
"execution_count": 20, "execution_count": 3,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
...@@ -538,7 +541,7 @@ ...@@ -538,7 +541,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 21, "execution_count": 4,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
...@@ -578,7 +581,7 @@ ...@@ -578,7 +581,7 @@
"Index: []" "Index: []"
] ]
}, },
"execution_count": 21, "execution_count": 4,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
...@@ -596,7 +599,7 @@ ...@@ -596,7 +599,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 5,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
...@@ -612,41 +615,1138 @@ ...@@ -612,41 +615,1138 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 23, "execution_count": 6,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"sm_st = pd.crosstab(raw_data['Smoker'],raw_data['Status'], margins = True)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n", "\n",
"fumeuse = raw_data.loc[raw_data.Smoker == \"Yes\"]\n", " .dataframe thead th {\n",
"non_fumeuse = raw_data.loc[raw_data.Smoker == \"No\"]" " text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>Status</th>\n",
" <th>Alive</th>\n",
" <th>Dead</th>\n",
" <th>All</th>\n",
" <th>MortalityRate</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Smoker</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>No</th>\n",
" <td>502</td>\n",
" <td>230</td>\n",
" <td>732</td>\n",
" <td>0.314208</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Yes</th>\n",
" <td>443</td>\n",
" <td>139</td>\n",
" <td>582</td>\n",
" <td>0.238832</td>\n",
" </tr>\n",
" <tr>\n",
" <th>All</th>\n",
" <td>945</td>\n",
" <td>369</td>\n",
" <td>1314</td>\n",
" <td>0.280822</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Status Alive Dead All MortalityRate\n",
"Smoker \n",
"No 502 230 732 0.314208\n",
"Yes 443 139 582 0.238832\n",
"All 945 369 1314 0.280822"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sm_st['MortalityRate']=sm_st.Dead / sm_st.All\n",
"sm_st"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAd8AAAFDCAYAAAB7pARgAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAEyZJREFUeJzt3X+sX3V9x/Hny3adScWZyBWxLZZt9UcXAfGKMoiKEUJFV38twIi/Jmma2Rm3uK2JiSFzM+KPbHOrNg3pHIuOuU2WTivF4RhbkNlLZPwKNV0FuSmOi6LIVKDy3h/fc/Xr5cI9F+79lNvv85Hc9HvO+XzOPd8mt8/vOd9vz01VIUmS2nnK4T4ASZJGjfGVJKkx4ytJUmPGV5KkxoyvJEmNGV9JkhozvpIkNWZ8JUlqzPhKktTY8sN9ALM5+uija+3atYf7MCRJmpfrr7/+nqoam2vckzK+a9euZWJi4nAfhiRJ85Lkjj7jvOwsSVJjxleSpMaMryRJjRlfSZIaM76SJDVmfCVJasz4SpLUmPGVJKkx4ytJUmPGV5KkxoyvJEmNGV9Jkhp7Uv5iBUlLy9qtXzzchyA9Lrd/+JzD8n0985UkqTHjK0lSY8ZXkqTGjK8kSY31im+Ss5PsS7I/ydZZtm9McmOSG5JMJDm971xJkkbNnPFNsgzYBmwA1gPnJ1k/Y9hVwIlVdRLw28Al85grSdJI6XPmewqwv6oOVNWDwGXAxuEBVXV/VVW3uBKovnMlSRo1feK7CrhzaHmyW/dzkrwxyW3AFxmc/fae283f1F2ynpiamupz7JIkLUl94ptZ1tUjVlRdXlUvAN4AfHA+c7v5O6pqvKrGx8bGehyWJElLU5/4TgJrhpZXAwcfbXBVXQP8SpKj5ztXkqRR0Ce+e4F1SY5PsgI4D9g1PCDJryZJ9/hkYAXwnT5zJUkaNXPe27mqDiXZAuwBlgE7q+qWJJu77duBNwNvS/IQ8CPg3O4DWLPOXaTnIknSktDrFytU1W5g94x124ceXwxc3HeuJEmjzDtcSZLUmPGVJKkx4ytJUmPGV5KkxoyvJEmNGV9JkhozvpIkNWZ8JUlqzPhKktSY8ZUkqTHjK0lSY8ZXkqTGjK8kSY0ZX0mSGjO+kiQ1ZnwlSWrM+EqS1JjxlSSpMeMrSVJjxleSpMaMryRJjRlfSZIaM76SJDVmfCVJasz4SpLUmPGVJKkx4ytJUmPGV5KkxoyvJEmNGV9JkhozvpIkNWZ8JUlqzPhKktSY8ZUkqTHjK0lSY8ZXkqTGesU3ydlJ9iXZn2TrLNsvSHJj93VtkhOHtt2e5KYkNySZWMiDlyRpKVo+14Aky4BtwJnAJLA3ya6qunVo2DeBV1bVvUk2ADuAlw1tP6Oq7lnA45Ykacnqc+Z7CrC/qg5U1YPAZcDG4QFVdW1V3dstXgesXtjDlCTpyNEnvquAO4eWJ7t1j+ZdwJeGlgu4Msn1STY92qQkm5JMJJmYmprqcViSJC1Nc152BjLLupp1YHIGg/iePrT6tKo6mORZwJeT3FZV1zxih1U7GFyuZnx8fNb9S5J0JOhz5jsJrBlaXg0cnDkoyQnAJcDGqvrO9PqqOtj9eTdwOYPL2JIkjaw+8d0LrEtyfJIVwHnAruEBSY4DPg+8taq+MbR+ZZKjph8DZwE3L9TBS5K0FM152bmqDiXZAuwBlgE7q+qWJJu77duBDwDPBD6ZBOBQVY0DxwCXd+uWA5+tqisW5ZlIkrRE9HnPl6raDeyesW770OMLgQtnmXcAOHHmekmSRpl3uJIkqTHjK0lSY8ZXkqTGjK8kSY0ZX0mSGjO+kiQ1ZnwlSWrM+EqS1JjxlSSpMeMrSVJjxleSpMZ63dt5KVu79YuH+xCkx+X2D59zuA9B0iLxzFeSpMaMryRJjRlfSZIaM76SJDVmfCVJasz4SpLUmPGVJKkx4ytJUmPGV5KkxoyvJEmNGV9JkhozvpIkNWZ8JUlqzPhKktSY8ZUkqTHjK0lSY8ZXkqTGjK8kSY0ZX0mSGjO+kiQ1ZnwlSWrM+EqS1JjxlSSpsV7xTXJ2kn1J9ifZOsv2C5Lc2H1dm+TEvnMlSRo1c8Y3yTJgG7ABWA+cn2T9jGHfBF5ZVScAHwR2zGOuJEkjpc+Z7ynA/qo6UFUPApcBG4cHVNW1VXVvt3gdsLrvXEmSRk2f+K4C7hxanuzWPZp3AV+a79wkm5JMJJmYmprqcViSJC1NfeKbWdbVrAOTMxjE94/mO7eqdlTVeFWNj42N9TgsSZKWpuU9xkwCa4aWVwMHZw5KcgJwCbChqr4zn7mSJI2SPme+e4F1SY5PsgI4D9g1PCDJccDngbdW1TfmM1eSpFEz55lvVR1KsgXYAywDdlbVLUk2d9u3Ax8Angl8MgnAoe4S8qxzF+m5SJK0JPS57ExV7QZ2z1i3fejxhcCFfedKkjTKvMOVJEmNGV9JkhozvpIkNWZ8JUlqzPhKktSY8ZUkqTHjK0lSY8ZXkqTGjK8kSY0ZX0mSGjO+kiQ1ZnwlSWrM+EqS1JjxlSSpMeMrSVJjxleSpMaMryRJjRlfSZIaM76SJDVmfCVJasz4SpLUmPGVJKkx4ytJUmPGV5KkxoyvJEmNGV9JkhozvpIkNWZ8JUlqzPhKktSY8ZUkqTHjK0lSY8ZXkqTGjK8kSY0ZX0mSGjO+kiQ11iu+Sc5Osi/J/iRbZ9n+giRfTfJAkvfN2HZ7kpuS3JBkYqEOXJKkpWr5XAOSLAO2AWcCk8DeJLuq6tahYd8F3gO84VF2c0ZV3fNED1aSpCNBnzPfU4D9VXWgqh4ELgM2Dg+oqrurai/w0CIcoyRJR5Q+8V0F3Dm0PNmt66uAK5Ncn2TTow1KsinJRJKJqampeexekqSlpU98M8u6msf3OK2qTgY2AO9O8orZBlXVjqoar6rxsbGxeexekqSlpU98J4E1Q8urgYN9v0FVHez+vBu4nMFlbEmSRlaf+O4F1iU5PskK4DxgV5+dJ1mZ5Kjpx8BZwM2P92AlSToSzPlp56o6lGQLsAdYBuysqluSbO62b0/ybGACeDrwcJL3AuuBo4HLk0x/r89W1RWL81QkSVoa5owvQFXtBnbPWLd96PG3GVyOnuk+4MQncoCSJB1pvMOVJEmNGV9JkhozvpIkNWZ8JUlqzPhKktSY8ZUkqTHjK0lSY8ZXkqTGjK8kSY0ZX0mSGjO+kiQ1ZnwlSWrM+EqS1JjxlSSpMeMrSVJjxleSpMaMryRJjRlfSZIaM76SJDVmfCVJasz4SpLUmPGVJKkx4ytJUmPGV5KkxoyvJEmNGV9JkhozvpIkNWZ8JUlqzPhKktSY8ZUkqTHjK0lSY8ZXkqTGjK8kSY0ZX0mSGjO+kiQ11iu+Sc5Osi/J/iRbZ9n+giRfTfJAkvfNZ64kSaNmzvgmWQZsAzYA64Hzk6yfMey7wHuAjz2OuZIkjZQ+Z76nAPur6kBVPQhcBmwcHlBVd1fVXuCh+c6VJGnU9InvKuDOoeXJbl0fvecm2ZRkIsnE1NRUz91LkrT09IlvZllXPfffe25V7aiq8aoaHxsb67l7SZKWnj7xnQTWDC2vBg723P8TmStJ0hGpT3z3AuuSHJ9kBXAesKvn/p/IXEmSjkjL5xpQVYeSbAH2AMuAnVV1S5LN3fbtSZ4NTABPBx5O8l5gfVXdN9vcxXoykiQtBXPGF6CqdgO7Z6zbPvT42wwuKfeaK0nSKPMOV5IkNWZ8JUlqzPhKktSY8ZUkqTHjK0lSY8ZXkqTGjK8kSY0ZX0mSGjO+kiQ1ZnwlSWrM+EqS1JjxlSSpMeMrSVJjxleSpMaMryRJjRlfSZIaM76SJDVmfCVJasz4SpLUmPGVJKkx4ytJUmPGV5KkxoyvJEmNGV9JkhozvpIkNWZ8JUlqzPhKktSY8ZUkqTHjK0lSY8ZXkqTGjK8kSY0ZX0mSGjO+kiQ1ZnwlSWrM+EqS1Fiv+CY5O8m+JPuTbJ1le5J8ott+Y5KTh7bdnuSmJDckmVjIg5ckaSlaPteAJMuAbcCZwCSwN8muqrp1aNgGYF339TLgU92f086oqnsW7KglSVrC+pz5ngLsr6oDVfUgcBmwccaYjcClNXAd8Iwkxy7wsUqSdEToE99VwJ1Dy5Pdur5jCrgyyfVJNj3eA5Uk6Ugx52VnILOsq3mMOa2qDiZ5FvDlJLdV1TWP+CaDMG8COO6443ocliRJS1OfM99JYM3Q8mrgYN8xVTX9593A5QwuYz9CVe2oqvGqGh8bG+t39JIkLUF94rsXWJfk+CQrgPOAXTPG7ALe1n3q+eXA96vqriQrkxwFkGQlcBZw8wIevyRJS86cl52r6lCSLcAeYBmws6puSbK5274d2A28FtgP/BB4Zzf9GODyJNPf67NVdcWCPwtJkpaQPu/5UlW7GQR2eN32occFvHuWeQeAE5/gMUqSdETxDleSJDVmfCVJasz4SpLUmPGVJKkx4ytJUmPGV5KkxoyvJEmNGV9JkhozvpIkNWZ8JUlqzPhKktSY8ZUkqTHjK0lSY8ZXkqTGjK8kSY0ZX0mSGjO+kiQ1ZnwlSWrM+EqS1JjxlSSpMeMrSVJjxleSpMaMryRJjRlfSZIaM76SJDVmfCVJasz4SpLUmPGVJKkx4ytJUmPGV5KkxoyvJEmNGV9JkhozvpIkNWZ8JUlqzPhKktSY8ZUkqbFe8U1ydpJ9SfYn2TrL9iT5RLf9xiQn950rSdKomTO+SZYB24ANwHrg/CTrZwzbAKzrvjYBn5rHXEmSRkqfM99TgP1VdaCqHgQuAzbOGLMRuLQGrgOekeTYnnMlSRopy3uMWQXcObQ8Cbysx5hVPecCkGQTg7NmgPuT7OtxbDq8jgbuOdwHcaTKxYf7CPQk4s/aIlmEn7Pn9hnUJ76ZZV31HNNn7mBl1Q5gR4/j0ZNEkomqGj/cxyEd6fxZO/L0ie8ksGZoeTVwsOeYFT3mSpI0Uvq857sXWJfk+CQrgPOAXTPG7ALe1n3q+eXA96vqrp5zJUkaKXOe+VbVoSRbgD3AMmBnVd2SZHO3fTuwG3gtsB/4IfDOx5q7KM9Eh4NvE0ht+LN2hEnVrG/BSpKkReIdriRJasz4SpLUmPEdUUnen+SW7nagNySZ9f9fz2N/r0ryhYU6Pkk6khnfEZTkVOB1wMlVdQLwGn7+Ziitj6fPf3mTDqskleTjQ8vvS3LRAu7/Kd098m9OclOSvUmOX4D9viPJXy3EMWrhGN/RdCxwT1U9AFBV91TVwSS3J/lQkq8mmUhycpI9Sf5n+tPt3X8n++jQPxDnztx5kpcm+XqSX06yMsnO7h+SryfZ2I15R5J/SPIvwJUtn7z0OD0AvCnJ0Yu0/3OB5wAnVNWLgDcC31uk79WLL4wXj/EdTVcCa5J8I8knk7xyaNudVXUq8B/Ap4G3AC8H/rjb/ibgJOBEBmfMH+3u4w1Akl8HtgMbq+oA8H7gK1X1UuCMbvzKbvipwNur6tWL9DylhXSIwX/5+b2ZG5I8N8lV3ds4VyU5rlv/6e5s9tokB5K85TH2fyxwV1U9DFBVk1V1b7ef+5NcnOT6JP+a5JQkV3f7/I1uzFOT/HX3ovjrSc6Y5TjP6V5cH51kLMk/dS+M9yY5rRtzUZIdSa4ELn2Cf2d6FMZ3BFXV/cBLGNxLewr4+yTv6DZP3wTlJuC/quoHVTUF/DjJM4DTgb+rqp9U1f8C/w68tJvzQgb/OL2+qr7VrTsL2JrkBuBq4KnAcd22L1fVdxfpaUqLYRtwQZJfmrH+rxj8cpkTgM8AnxjadiyDn5vXAR9+jH1/Dnh99xmMjyd58dC2lcDVVfUS4AfAnwBnMjg7nn5h/G6A7qz5fOBvkjx1egdJ3ghsBV5bVfcAfwH8WffC+M3AJUPf7yUMXkD/1mP+behx85LCiKqqnzCI4dVJbgLe3m16oPvz4aHH08vLmf1+3dPuYhDXF/Oz24gGeHNV/dwvyug+4PV/T+ApSM1V1X1JLgXeA/xoaNOpDK4KAfwt8JGhbf/cnc3emuSYx9j3ZJLnA6/uvq5K8ptVdRXwIHBFN/Qm4IGqeqj72V3brT8d+MtuX7cluQN4XrftDGAcOKuq7uvWvQZYn/z0R/rpSY7qHu+qquHnpwXmme8ISvL8JOuGVp0E3NFz+jXAuUmWJRkDXgF8rdv2PeAc4ENJXtWt2wP8brqf8Bmv5qWl6M+BdzE4G300w3cvGn4R+1gvXqmqB6rqS1X1B8CHgDd0mx6qn90R6acvjLuoT59EPda+DwBH8bMYw+Df/1Or6qTua1VV/aDb5gvjRWZ8R9PTGFySujXJjcB64KKecy8HbgT+G/gK8IdV9e3pjd2l6NcD27qz2w8CvwDcmOTmbllasrq3Sj7HIMDTrmVw73qAC4D/nO9+uw84Pqd7/BTgBPq/KIbBC+MLuvnPY/D2zvQVpzsYnJlfmuTXunVXAluGvv9J8z1mPX7eXlKSekhyf1U9rXt8DPBN4CNVdVGStcBOBr93dwp4Z1V9K8mngS9U1T/O3Mcs+z8b+FPgF7tVXwN+p6p+PON7XwTcX1UfG95n9/7udgbv1x4Cfr+q/q37PMd4VW3prjx9hsEL5O8zeA/7hQzOnq+pqs0z96/FYXwlSWrMy86SJDXmp50lqaEkL2LwiehhD1TVE7rFq5YWLztLktSYl50lSWrM+EqS1JjxlSSpMeMrSVJj/w/I8xdz0Wfx+gAAAABJRU5ErkJggg==\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"fig = plt.figure()\n",
"ax = fig.add_axes([0,0,1,1])\n",
"langs = ['Smoker','Non_Smoker']\n",
"students = [0.23,0.31]\n",
"ax.bar(langs,students)\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Le taux de mortalité est plus élevé chez les femmes non fumeuses que les femmes fumeuses, ce qui semble surprenant. \n",
"On peut regarder la significativité de ce résultat en faisant un test de comparaison des proportions de ces deux populations différentes (fumeuses et non fumeuses).\n"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 27, "execution_count": 9,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"str" "False"
] ]
}, },
"execution_count": 27, "execution_count": 9,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
], ],
"source": [ "source": [
"type(fumeuse.Status[1])" "from statsmodels.stats.proportion import proportions_ztest\n",
"count = np.array([ sm_st.Dead.No, sm_st.Dead.Yes])\n",
"nobs = np.array([sm_st.All.No, sm_st.All.Yes])\n",
"stat, pval = proportions_ztest(count, nobs)\n",
"pval > 0.05"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"La p-value est inférieur à 0.05, nous considérons donc que la proportion de fumeuses est significativement différente de celle des non fumeuses."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Taux de mortalité: Non fumeuse VS Fumeuse en fonction de la tranche d'âge"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"On considére maintenant les classes suivantes : 18-34 ans, 34-54 ans, 55-64 ans, plus de 65 ans afin de voir s'il y a des différences de taux de mortalité entre les fumeuses et les non-fumeuses."
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 10,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [
"source": [] {
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Smoker</th>\n",
" <th>Status</th>\n",
" <th>Age</th>\n",
" <th>AgeGroup</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>21.0</td>\n",
" <td>18-34</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>19.3</td>\n",
" <td>18-34</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>No</td>\n",
" <td>Dead</td>\n",
" <td>57.5</td>\n",
" <td>55-64</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>47.1</td>\n",
" <td>35-54</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>81.4</td>\n",
" <td>65-Plus</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>36.8</td>\n",
" <td>35-54</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>23.8</td>\n",
" <td>18-34</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Yes</td>\n",
" <td>Dead</td>\n",
" <td>57.5</td>\n",
" <td>55-64</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>24.8</td>\n",
" <td>18-34</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>49.5</td>\n",
" <td>35-54</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>30.0</td>\n",
" <td>18-34</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>No</td>\n",
" <td>Dead</td>\n",
" <td>66.0</td>\n",
" <td>65-Plus</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>49.2</td>\n",
" <td>35-54</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>58.4</td>\n",
" <td>55-64</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>No</td>\n",
" <td>Dead</td>\n",
" <td>60.6</td>\n",
" <td>55-64</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>25.1</td>\n",
" <td>18-34</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>43.5</td>\n",
" <td>35-54</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>27.1</td>\n",
" <td>18-34</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>58.3</td>\n",
" <td>55-64</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>65.7</td>\n",
" <td>65-Plus</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>No</td>\n",
" <td>Dead</td>\n",
" <td>73.2</td>\n",
" <td>65-Plus</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>38.3</td>\n",
" <td>35-54</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>33.4</td>\n",
" <td>18-34</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>Yes</td>\n",
" <td>Dead</td>\n",
" <td>62.3</td>\n",
" <td>55-64</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>18.0</td>\n",
" <td>18-34</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>56.2</td>\n",
" <td>55-64</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26</th>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>59.2</td>\n",
" <td>55-64</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>25.8</td>\n",
" <td>18-34</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28</th>\n",
" <td>No</td>\n",
" <td>Dead</td>\n",
" <td>36.9</td>\n",
" <td>35-54</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>20.2</td>\n",
" <td>18-34</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1284</th>\n",
" <td>Yes</td>\n",
" <td>Dead</td>\n",
" <td>36.0</td>\n",
" <td>35-54</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1285</th>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>48.3</td>\n",
" <td>35-54</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1286</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>63.1</td>\n",
" <td>55-64</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1287</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>60.8</td>\n",
" <td>55-64</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1288</th>\n",
" <td>Yes</td>\n",
" <td>Dead</td>\n",
" <td>39.3</td>\n",
" <td>35-54</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1289</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>36.7</td>\n",
" <td>35-54</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1290</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>63.8</td>\n",
" <td>55-64</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1291</th>\n",
" <td>No</td>\n",
" <td>Dead</td>\n",
" <td>71.3</td>\n",
" <td>65-Plus</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1292</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>57.7</td>\n",
" <td>55-64</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1293</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>63.2</td>\n",
" <td>55-64</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1294</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>46.6</td>\n",
" <td>35-54</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1295</th>\n",
" <td>Yes</td>\n",
" <td>Dead</td>\n",
" <td>82.4</td>\n",
" <td>65-Plus</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1296</th>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>38.3</td>\n",
" <td>35-54</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1297</th>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>32.7</td>\n",
" <td>18-34</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1298</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>39.7</td>\n",
" <td>35-54</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1299</th>\n",
" <td>Yes</td>\n",
" <td>Dead</td>\n",
" <td>60.0</td>\n",
" <td>55-64</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1300</th>\n",
" <td>No</td>\n",
" <td>Dead</td>\n",
" <td>71.0</td>\n",
" <td>65-Plus</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1301</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>20.5</td>\n",
" <td>18-34</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1302</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>44.4</td>\n",
" <td>35-54</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1303</th>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>31.2</td>\n",
" <td>18-34</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1304</th>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>47.8</td>\n",
" <td>35-54</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1305</th>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>60.9</td>\n",
" <td>55-64</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1306</th>\n",
" <td>No</td>\n",
" <td>Dead</td>\n",
" <td>61.4</td>\n",
" <td>55-64</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1307</th>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>43.0</td>\n",
" <td>35-54</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1308</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>42.1</td>\n",
" <td>35-54</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1309</th>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>35.9</td>\n",
" <td>35-54</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1310</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>22.3</td>\n",
" <td>18-34</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1311</th>\n",
" <td>Yes</td>\n",
" <td>Dead</td>\n",
" <td>62.1</td>\n",
" <td>55-64</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1312</th>\n",
" <td>No</td>\n",
" <td>Dead</td>\n",
" <td>88.6</td>\n",
" <td>65-Plus</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1313</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>39.1</td>\n",
" <td>35-54</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1314 rows × 4 columns</p>\n",
"</div>"
],
"text/plain": [
" Smoker Status Age AgeGroup\n",
"0 Yes Alive 21.0 18-34\n",
"1 Yes Alive 19.3 18-34\n",
"2 No Dead 57.5 55-64\n",
"3 No Alive 47.1 35-54\n",
"4 Yes Alive 81.4 65-Plus\n",
"5 No Alive 36.8 35-54\n",
"6 No Alive 23.8 18-34\n",
"7 Yes Dead 57.5 55-64\n",
"8 Yes Alive 24.8 18-34\n",
"9 Yes Alive 49.5 35-54\n",
"10 Yes Alive 30.0 18-34\n",
"11 No Dead 66.0 65-Plus\n",
"12 Yes Alive 49.2 35-54\n",
"13 No Alive 58.4 55-64\n",
"14 No Dead 60.6 55-64\n",
"15 No Alive 25.1 18-34\n",
"16 No Alive 43.5 35-54\n",
"17 No Alive 27.1 18-34\n",
"18 No Alive 58.3 55-64\n",
"19 Yes Alive 65.7 65-Plus\n",
"20 No Dead 73.2 65-Plus\n",
"21 Yes Alive 38.3 35-54\n",
"22 No Alive 33.4 18-34\n",
"23 Yes Dead 62.3 55-64\n",
"24 No Alive 18.0 18-34\n",
"25 No Alive 56.2 55-64\n",
"26 Yes Alive 59.2 55-64\n",
"27 No Alive 25.8 18-34\n",
"28 No Dead 36.9 35-54\n",
"29 No Alive 20.2 18-34\n",
"... ... ... ... ...\n",
"1284 Yes Dead 36.0 35-54\n",
"1285 Yes Alive 48.3 35-54\n",
"1286 No Alive 63.1 55-64\n",
"1287 No Alive 60.8 55-64\n",
"1288 Yes Dead 39.3 35-54\n",
"1289 No Alive 36.7 35-54\n",
"1290 No Alive 63.8 55-64\n",
"1291 No Dead 71.3 65-Plus\n",
"1292 No Alive 57.7 55-64\n",
"1293 No Alive 63.2 55-64\n",
"1294 No Alive 46.6 35-54\n",
"1295 Yes Dead 82.4 65-Plus\n",
"1296 Yes Alive 38.3 35-54\n",
"1297 Yes Alive 32.7 18-34\n",
"1298 No Alive 39.7 35-54\n",
"1299 Yes Dead 60.0 55-64\n",
"1300 No Dead 71.0 65-Plus\n",
"1301 No Alive 20.5 18-34\n",
"1302 No Alive 44.4 35-54\n",
"1303 Yes Alive 31.2 18-34\n",
"1304 Yes Alive 47.8 35-54\n",
"1305 Yes Alive 60.9 55-64\n",
"1306 No Dead 61.4 55-64\n",
"1307 Yes Alive 43.0 35-54\n",
"1308 No Alive 42.1 35-54\n",
"1309 Yes Alive 35.9 35-54\n",
"1310 No Alive 22.3 18-34\n",
"1311 Yes Dead 62.1 55-64\n",
"1312 No Dead 88.6 65-Plus\n",
"1313 No Alive 39.1 35-54\n",
"\n",
"[1314 rows x 4 columns]"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"bins= [18,34,54,64,200]\n",
"labels = ['18-34','35-54','55-64','65-Plus']\n",
"raw_data['AgeGroup'] = pd.cut(raw_data['Age'], bins=bins, labels=labels, right=False)\n",
"raw_data"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>AgeGroup</th>\n",
" <th>Smoker</th>\n",
" <th>Status</th>\n",
" <th>counts</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>18-34</td>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>213</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>18-34</td>\n",
" <td>No</td>\n",
" <td>Dead</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>18-34</td>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>174</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>18-34</td>\n",
" <td>Yes</td>\n",
" <td>Dead</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>35-54</td>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>180</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>35-54</td>\n",
" <td>No</td>\n",
" <td>Dead</td>\n",
" <td>19</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>35-54</td>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>198</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>35-54</td>\n",
" <td>Yes</td>\n",
" <td>Dead</td>\n",
" <td>41</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>55-64</td>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>80</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>55-64</td>\n",
" <td>No</td>\n",
" <td>Dead</td>\n",
" <td>39</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>55-64</td>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>64</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>55-64</td>\n",
" <td>Yes</td>\n",
" <td>Dead</td>\n",
" <td>51</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>65-Plus</td>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>29</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>65-Plus</td>\n",
" <td>No</td>\n",
" <td>Dead</td>\n",
" <td>166</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>65-Plus</td>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>65-Plus</td>\n",
" <td>Yes</td>\n",
" <td>Dead</td>\n",
" <td>42</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" AgeGroup Smoker Status counts\n",
"0 18-34 No Alive 213\n",
"1 18-34 No Dead 6\n",
"2 18-34 Yes Alive 174\n",
"3 18-34 Yes Dead 5\n",
"4 35-54 No Alive 180\n",
"5 35-54 No Dead 19\n",
"6 35-54 Yes Alive 198\n",
"7 35-54 Yes Dead 41\n",
"8 55-64 No Alive 80\n",
"9 55-64 No Dead 39\n",
"10 55-64 Yes Alive 64\n",
"11 55-64 Yes Dead 51\n",
"12 65-Plus No Alive 29\n",
"13 65-Plus No Dead 166\n",
"14 65-Plus Yes Alive 7\n",
"15 65-Plus Yes Dead 42"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sm_st_age = raw_data.groupby(['AgeGroup','Smoker','Status']).size().reset_index(name='counts')\n",
"sm_st_age"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"# width of the bars\n",
"barWidth = 0.2\n",
" \n",
"# Choose the height of the blue bars - Smoker\n",
"bars1 = [5/(5+174), 41/(41+198), 51/(51+64) , 42/(42+7)]\n",
" \n",
"# Choose the height of the cyan bars - Non Smoker\n",
"bars2 = [6/(6+213), 19/(19+180), 39/(39+80) , 166/(166+29)]\n",
" \n",
"\n",
" \n",
"# The x position of bars\n",
"r1 = np.arange(len(bars1))\n",
"r2 = [x + barWidth for x in r1]\n",
" \n",
"# Create blue bars\n",
"plt.bar(r1, bars1, width = barWidth, color = 'blue', edgecolor = 'black', capsize=7, label='Smoker')\n",
" \n",
"# Create cyan bars\n",
"plt.bar(r2, bars2, width = barWidth, color = 'cyan', edgecolor = 'black', capsize=7, label='Non Smoker')\n",
" \n",
"# general layout\n",
"plt.xticks([r + barWidth for r in range(len(bars1))], ['18-34','35-54','55-64','65-Plus'])\n",
"plt.ylabel('Mortality Rate')\n",
"plt.legend()\n",
" \n",
"# Show graphic\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"En considérant les catégories d'âge, il n'y a pas de différences entre le taux de mortalité des fumeuses et les non fumeuses pour les 18-34 ans et pour les 65-plus ans. \n",
"En revanche pour les catégories 35-54 et 55-64, le taux de mortalité des fumeuses est plus élevé. "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Regression logistique: Mortalité en fonction de l'âge"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Afin d'éviter un biais induit par des regroupements en tranches d'âges arbitraires et non régulières, il est envisageable d'essayer de réaliser une régression logistique. Si on introduit une variable Death valant 1 ou 0 pour indiquer si l'individu est décédé durant la période de 20 ans, on peut étudier le modèle Death ~ Age pour étudier la probabilité de décès en fonction de l'âge selon que l'on considère le groupe des fumeuses ou des non fumeuses. "
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/opt/conda/lib/python3.6/site-packages/ipykernel_launcher.py:5: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
" \"\"\"\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[4.63502546]\n",
"[[-0.0730934]]\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/opt/conda/lib/python3.6/site-packages/scipy/stats/stats.py:1713: FutureWarning: Using a non-tuple sequence for multidimensional indexing is deprecated; use `arr[tuple(seq)]` instead of `arr[seq]`. In the future this will be interpreted as an array index, `arr[np.array(seq)]`, which will result either in an error or a different result.\n",
" return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval\n"
]
},
{
"data": {
"text/plain": [
"<matplotlib.axes._subplots.AxesSubplot at 0x7fe07be691d0>"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"#Fumeuses\n",
"fum = raw_data.loc[raw_data.Smoker == \"Yes\"]\n",
"\n",
"from sklearn.linear_model import LogisticRegression\n",
"fum['Death']= fum.Status.map({'Dead': 0, 'Alive': 1})\n",
"#clf = LogisticRegression(random_state=0).fit(list(raw_data.Death),list(raw_data.Age))\n",
"X_f = np.array(fum.Age).reshape(-1, 1)\n",
"y_f = np.array(fum.Death)\n",
"clf_f = LogisticRegression(random_state=0).fit(X_f,y_f)\n",
"\n",
"\n",
"\n",
"# Check trained model intercept\n",
"print(clf_f.intercept_)\n",
"\n",
"# Check trained model regression coefficients\n",
"print(clf_f.coef_)\n",
"\n",
"# Make predictions\n",
"preds_f = clf_f.predict(X = X_f)\n",
"\n",
"clf_f.score(X = X_f ,\n",
" y = y_f)\n",
"\n",
"sns.regplot(x='Age', y='Death', data=fum, logistic=True)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/opt/conda/lib/python3.6/site-packages/ipykernel_launcher.py:4: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
" after removing the cwd from sys.path.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[5.70698877]\n",
"[[-0.08999613]]\n"
]
},
{
"data": {
"text/plain": [
"<matplotlib.axes._subplots.AxesSubplot at 0x7fe078c73f28>"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"#Non Fumeuses\n",
"nfum = raw_data.loc[raw_data.Smoker == \"No\"]\n",
"\n",
"nfum['Death']= nfum.Status.map({'Dead': 0, 'Alive': 1})\n",
"X_nf = np.array(nfum.Age).reshape(-1, 1)\n",
"y_nf = np.array(nfum.Death)\n",
"clf_nf = LogisticRegression(random_state=0).fit(X_nf,y_nf)\n",
"\n",
"\n",
"\n",
"# Check trained model intercept\n",
"print(clf_nf.intercept_)\n",
"\n",
"# Check trained model regression coefficients \n",
"print(clf_nf.coef_)\n",
"\n",
"\n",
"# Make predictions\n",
"preds_nf = clf_f.predict(X = X_nf)\n",
"\n",
"clf_nf.score(X = X_nf,\n",
" y = y_nf)\n",
"\n",
"\n",
"sns.regplot(x='Age', y='Death', data=nfum, logistic=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Le coefficient de régression du modèle avec les fumeuses est supérieur à celui avec les non fumeuses; la mortalité des jeunes commencerait plus tôt chez les fumeuses. \n",
"Ces régressions ne permettent pas de conclure sur la nocivité du tabagisme.\n",
"\n"
]
} }
], ],
"metadata": { "metadata": {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment