From 7477f6632af6d4b0cb38d9e73b45e5864dc0873b Mon Sep 17 00:00:00 2001 From: 86d2379a8cd828206f6e8576c862739f <86d2379a8cd828206f6e8576c862739f@app-learninglab.inria.fr> Date: Fri, 28 Aug 2020 14:43:56 +0000 Subject: [PATCH] no commit message --- module3/exo3/exercice.ipynb | 54 ++++++++++++++++++++++++++++++------- 1 file changed, 44 insertions(+), 10 deletions(-) diff --git a/module3/exo3/exercice.ipynb b/module3/exo3/exercice.ipynb index deadaf0..520aa14 100644 --- a/module3/exo3/exercice.ipynb +++ b/module3/exo3/exercice.ipynb @@ -764,25 +764,59 @@ "On remarque que le taux de mortalité est - nettement - plus élevé dans le groupe des non fumeuses, ce qui constitue le paradoxe de Simpson" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Taux de mortalité par classes d'age" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Nous allons regarder si les résultats persistent en prenant en compte les différentes classes d'age" + ] + }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 49, "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "443" - ] - }, - "execution_count": 31, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + " Smokers Non-Smokers\n", + "18-34 0.037037 0.026432\n", + "35-54 0.170306 0.099476\n", + "55-64 0.443478 0.330579\n", + "65+ 0.857143 0.854922\n" + ] } ], "source": [ - "len(data[(data['Smoker'] == \"Yes\") & (data['Status'] == \"Alive\")].index)" + "classes_breaks = [0,35,55,65,150] \n", + "tab_alive = [ [len(data[(data['Smoker'] == \"Yes\") & (data['Status'] == \"Alive\") & (classes_breaks[i] <= data['Age']) & (data['Age'] < classes_breaks[i+1])]), len(data[(data['Smoker'] == \"No\") & (data['Status'] == \"Alive\") & (classes_breaks[i] <= data['Age']) & (data['Age'] < classes_breaks[i+1])])] for i in [0,1,2,3]]\n", + "tab_dead = [ [len(data[(data['Smoker'] == \"Yes\") & (data['Status'] == \"Dead\") & (classes_breaks[i] <= data['Age']) & (data['Age'] < classes_breaks[i+1])]), len(data[(data['Smoker'] == \"No\") & (data['Status'] == \"Dead\") & (classes_breaks[i] <= data['Age']) & (data['Age'] < classes_breaks[i+1])])] for i in [0,1,2,3]]\n", + "tab_deathrate = [ [tab_dead[i][0]/(tab_dead[i][0]+tab_alive[i][0]) , tab_dead[i][1]/(tab_dead[i][1]+tab_alive[i][1])] for i in [0,1,2,3]]\n", + "\n", + "print(pd.DataFrame(tab_deathrate, [\"18-34\",\"35-54\",\"55-64\",\"65+\"], [\"Smokers\", \"Non-Smokers\"]))" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "On remarque cette fois que, pour chaque classe d'âge, le résultat est attendu où le taux de mortalité est nettement supérieur pour le groupe des fumeuses, sauf pour les plus de 65 ans où les résultats sont sensiblement égaux." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { -- 2.18.1