diff --git a/module3/exo3/exercice.ipynb b/module3/exo3/exercice.ipynb index 5470b3b83e0d1bf735b33e63d81e05f014a8958e..111dff91b1e651631921ded1698b31f8f5a8dfd7 100644 --- a/module3/exo3/exercice.ipynb +++ b/module3/exo3/exercice.ipynb @@ -4,7 +4,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Simpson's paradox" + "# Simpson's paradox\n", + "\n", + "![SegmentLocal](simpson.gif \"segment\")" ] }, { @@ -16,14 +18,16 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 25, "metadata": {}, "outputs": [], "source": [ - "import numpy as np\n", - "import pandas as pd\n", - "import matplotlib.pyplot as plt\n", - "import seaborn as sns" + "import numpy as np # For maths\n", + "import pandas as pd # For data frames\n", + "import matplotlib.pyplot as plt # For plots\n", + "import seaborn as sns # For nice plots\n", + "from matplotlib.ticker import FuncFormatter # To format % in axis\n", + "from IPython.core.display import display, HTML # For nice display of data frames" ] }, { @@ -35,7 +39,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -58,7 +62,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -131,7 +135,7 @@ "4 Yes Alive 81.4" ] }, - "execution_count": 11, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -141,12 +145,982 @@ "raw_data.head()" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Preprocessing" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Check that \"Smoker\" and \"Satus\" have only two possible values" + ] + }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(array(['Yes', 'No'], dtype=object), array(['Alive', 'Dead'], dtype=object))" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "raw_data[\"Smoker\"].unique(), raw_data[\"Status\"].unique()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Convert them to boolean for a more standardised analysis downstream.\n", + "In particular, there is no missing values for those (otherwise they would show as NaN or some other tag). Also, rename \"Status\" for clarity." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
SmokerDeadAge
0TrueFalse21.0
1TrueFalse19.3
2FalseTrue57.5
3FalseFalse47.1
4TrueFalse81.4
\n", + "
" + ], + "text/plain": [ + " Smoker Dead Age\n", + "0 True False 21.0\n", + "1 True False 19.3\n", + "2 False True 57.5\n", + "3 False False 47.1\n", + "4 True False 81.4" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = raw_data.copy()\n", + " \n", + "df[\"Smoker\"] = df[\"Smoker\"] == \"Yes\"\n", + "df[\"Status\"] = df[\"Status\"] == \"Dead\"\n", + "\n", + "df.rename(columns={\"Status\": \"Dead\"}, inplace=True)\n", + "\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Check for missing values in \"Age\"." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0 missing value(s) in 'Age'\n" + ] + } + ], + "source": [ + "print(\"{:.0f} missing value(s) in 'Age'\".format(df[\"Age\"].isna().sum())) " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Mortality rates among smokers vs. non-smokers" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First, group by \"Smoker\"" + ] + }, + { + "cell_type": "code", + "execution_count": 7, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "df_by_smoker = df.groupby(by=[\"Smoker\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Number of deaths" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Dead
Smoker
False230
True139
\n", + "
" + ], + "text/plain": [ + " Dead\n", + "Smoker \n", + "False 230\n", + "True 139" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_by_smoker.sum()[[\"Dead\"]].astype(int)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Total number per class (smokers vs. non-smokers)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Dead
Smoker
False732
True582
\n", + "
" + ], + "text/plain": [ + " Dead\n", + "Smoker \n", + "False 732\n", + "True 582" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_by_smoker.count()[[\"Dead\"]].astype(int)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Mortality rate (ratio of dead / number within the class)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Dead
Smoker
False31.4%
True23.9%
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(\n", + " HTML(\n", + " df_by_smoker.mean()[[\"Dead\"]].to_html(formatters={\"Dead\": \"{:,.1%}\".format})\n", + " )\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Visualisation\n", + "\n", + "We summarise these results using a bar plot. The black lines represent 95% confidence intervals around the mean mortality rates." + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAfAAAAFgCAYAAABEyiulAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAIABJREFUeJzt3XtUVXX+//HXORzAFEO5manfcblGyulitRSjRll5oQseQLG1RidLoqJQEZmaRbmivtRkxrK+hGmL5UiMKeNlBFKnGcOme0sMLW1Su0xCmoEjIl7OQTnu3x9956zhlzr61X0OH30+/gH22WfzxsRne5999nZYlmUJAAAYxRnsAQAAwLkj4AAAGIiAAwBgIAIOAICBCDgAAAZyBXuA87F//+FgjwAAgK1iY3uecjl74AAAGIiAAwBgIAIOAICBCDgAAAYi4AAAGIiAAwBgIAIOAICBCDgAAAayLeDt7e2aNGmSUlNTlZKSopdfflmSVFpaqpEjRyotLU1paWl69913JUn19fVyu93KyMhQQ0ODJKmtrU1ZWVnijqcAAHRm25XYwsLCVFFRoR49eujEiROaMmWKRo0aJUmaNm2asrKyOq1fXl6u0tJS7d27V5WVlSooKNDChQuVnZ0th8Nh15gAABjJtj1wh8OhHj16SJI6OjrU0dFxxhC7XC55vV55PB65XC41NjaqqalJCQkJdo0IAICxbH0N3OfzKS0tTbfccotuueUWDR06VJK0bNkyud1uPf744zp06JAkKTs7W4WFhaqoqNA999yjl156SbNmzbJzPAAAjOWwAvACc1tbm6ZPn64nn3xSUVFR6t27txwOh0pKStTc3Ky5c+d2Wn/z5s2qra3Vr371K5WUlMjlcqmgoEAxMTGd1vN4jsvlCrF7fAAAgiY09NSdC0jAJWnBggW67LLLOr32vWfPHj388MNat26df5llWcrKytJLL72koqIi5eTkaO/evaqvr9fs2bM7bZO7kZlpy5ZPtHZtldzuCbrppmHBHgcAurSA342spaVFbW1tkiSv16uPPvpIgwYNUnNzs3+d2tpaDR48uNPzqqqqlJSUpMjISHm9XjmdTjmdTnk8HrtGRYCtWrVcO3b8XatWLQ/2KABgLNvOQm9ublZBQYF8Pp8sy9Idd9yh2267TY899ph27twpSerXr5+Kior8z/F4PKqqqtKSJUskSZmZmcrNzVVoaKjmz59v16gIMI/H2+kjAODcBewQuh04hG6mvLwc/fDD97riiiv1P/+zMNjjAECXFvBD6AAAwD4EHAAAAxFwAAAMRMABADAQAQcAwEAEHAAAAxFwAAAMRMABADAQAQcAwEAEHAAAAxFwAAAMRMABADAQAQcAwEAEHAAAAxFwAAAMRMABADAQAQcAwEAEHAAAAxFwAAAMRMABADAQAQcAwEAEHAAAAxFwAAAMRMABADAQAQcAwECuYA/QFSUkHAj2CBe1/v19CguTGht9/FnbpK4uOtgjALAZe+AAABiIgAMAYCACDgCAgQg4AAAGIuAAABiIgAMAYCACDgCAgQg4AAAGsi3g7e3tmjRpklJTU5WSkqKXX35ZktTa2qrMzEwlJycrMzNThw4dkiTV19fL7XYrIyNDDQ0NkqS2tjZlZWXJsiy7xgQAwEi2BTwsLEwVFRV64403VF1drffff1+ffvqpysrKlJiYqA0bNigxMVFlZWWSpPLycpWWlio/P1+VlZWSpIULFyo7O1sOh8OuMQEAMJJtAXc4HOrRo4ckqaOjQx0dHXI4HNq4caPS09MlSenp6aqtrZUkuVwueb1eeTweuVwuNTY2qqmpSQkJCXaNCACAsWy9FrrP59PEiRPV2NioKVOmaOjQoTpw4IDi4uIkSXFxcWppaZEkZWdnq7CwUOHh4SouLta8efM0a9asM24/IiJcLleIDZNzfW6YrVev7sEeAYDNbA14SEiIampq1NbWpunTp+vLL7887bpDhgzRypUrJUmbN29WXFycLMtSXl6eXC6XCgoKFBMT0+k5R4602zk+YKzW1mPBHgHABRIb2/OUywNyFvrll1+uESNG6P3331d0dLSam5slSc3NzYqKiuq0rmVZWrRokXJycrRgwQLNnDlTqampWrp0aSBGBQDACLYFvKWlRW1tbZIkr9erjz76SIMGDdLo0aNVXV0tSaqurtaYMWM6Pa+qqkpJSUmKjIyU1+uV0+mU0+mUx+Oxa1QAAIxj2yH05uZmFRQUyOfzybIs3XHHHbrtttt0ww03KC8vT6tXr1bfvn1VUlLif47H41FVVZWWLFkiScrMzFRubq5CQ0M1f/58u0YFAMA4DsvgN1nv33/Ylu0mJHASm536939CYWFNOn68j/bseS7Y41yU6uqigz0CgAskqK+BAwCAC4uAAwBgIAIOAICBCDgAAAYi4AAAGIiAAwBgIAIOAICBCDgAAAYi4AAAGIiAAwBgIAKOgLOsbp0+AgDOHQFHwLW0pMnjuUotLWnBHgUAjGXb3ciA0/F4hsrjGRrsMQDAaOyBAwBgIAIOAICBCDgAAAYi4ABwkduy5RP993/P0ZYtnwR7FFxAnMQGABe5VauW69tv/yGv16ObbhoW7HFwgbAHDgAXOY/H2+kjLg4EHAAAAxFwAAAMRMABADAQAQcAwEAEHAAAAxFwAAAMRMABADAQAQcAwEAEHAAAAxFwAAAMRMABADAQAQcAwEAEHAAAAxFwAAAMRMABADCQbQHft2+fpk6dqjvvvFMpKSmqqKiQJJWWlmrkyJFKS0tTWlqa3n33XUlSfX293G63MjIy1NDQIElqa2tTVlaWLMuya0wAAIzksmvDISEhKigo0DXXXKMjR44oIyNDt956qyRp2rRpysrK6rR+eXm5SktLtXfvXlVWVqqgoEALFy5Udna2HA6HXWMCAGAk2wIeFxenuLg4SVJERIQGDRqkpqam0w/icsnr9crj8cjlcqmxsVFNTU1KSEiwa0QAAIxlW8D/3Z49e7Rjxw4NHTpUW7Zs0bJly1RdXa1rr71WBQUFioyMVHZ2tgoLCxUeHq7i4mLNmzdPs2bNOuN2IyLC5XKF2DDxARu2CQROr17dgz0CupCQEIf/I383Lh62B/zo0aPKzc3VE088oYiICE2ePFk5OTlyOBwqKSnR888/r7lz52rIkCFauXKlJGnz5s2Ki4uTZVnKy8uTy+VSQUGBYmJiOm37yJF2u8cHjNTaeizYI6AL8fks/0f+bpgnNrbnKZfbehb6iRMnlJubK7fbreTkZElSTEyMQkJC5HQ6dffdd2v79u2dnmNZlhYtWqScnBwtWLBAM2fOVGpqqpYuXWrnqAAAGMW2gFuWpTlz5mjQoEHKzMz0L29ubvZ/Xltbq8GDB3d6XlVVlZKSkhQZGSmv1yun0ymn0ymPx2PXqAAAGMe2Q+j19fWqqalRfHy80tLSJEn5+flat26ddu7cKUnq16+fioqK/M/xeDyqqqrSkiVLJEmZmZnKzc1VaGio5s+fb9eoAAAYx7aADxs2TLt27frJ8qSkpNM+57LLLut0qHzYsGFau3atLfMBAGAyrsQGAICBCDgAAAYi4AAAGIiAAwBgoIBciQ0AzuRowvXBHuGiZvXvL4WFyWrczZ+1TXrUbQv492QPHAAAAxFwAAAMRMABADAQAQcAwEAEHAAAAxFwAAAMRMABADAQAQcAwEAEHAAAAxFwAAAMRMABADAQAQcAwEAEHAAAAxFwAAAMRMABADAQAQcAwEAEHAAAAxFwAAAMRMABADAQAQcAwEAEHAAAAxFwAAAMRMABADAQAQcAwEAEHAAAAxFwAAAMRMABADAQAQcAwEC2BXzfvn2aOnWq7rzzTqWkpKiiokKS1NraqszMTCUnJyszM1OHDh2SJNXX18vtdisjI0MNDQ2SpLa2NmVlZcmyLLvGBADASLYFPCQkRAUFBXrzzTe1YsUKLV++XF9//bXKysqUmJioDRs2KDExUWVlZZKk8vJylZaWKj8/X5WVlZKkhQsXKjs7Ww6Hw64xAeCi1+1/d4K6sTN0UbEt4HFxcbrmmmskSRERERo0aJCampq0ceNGpaenS5LS09NVW1srSXK5XPJ6vfJ4PHK5XGpsbFRTU5MSEhLsGhEALglpLS26yuNRWktLsEfBBeQKxDfZs2ePduzYoaFDh+rAgQOKi4uT9GPkW/73L1R2drYKCwsVHh6u4uJizZs3T7NmzQrEeABwURvq8WioxxPsMXCB2R7wo0ePKjc3V0888YQiIiJOu96QIUO0cuVKSdLmzZsVFxcny7KUl5cnl8ulgoICxcTEdHpORES4XK4QG6Y+YMM2gcDp1at7sEc4J0eDPQBwnoLxO2drwE+cOKHc3Fy53W4lJydLkqKjo9Xc3Ky4uDg1NzcrKiqq03Msy9KiRYv00ksvqaioSDNnztTevXu1dOlSzZ49u9O6R4602zk+YKzW1mPBHgG4pNj5Oxcb2/OUy217DdyyLM2ZM0eDBg1SZmamf/no0aNVXV0tSaqurtaYMWM6Pa+qqkpJSUmKjIyU1+uV0+mU0+mUh8M/AAD42bYHXl9fr5qaGsXHxystLU2SlJ+fr4ceekh5eXlavXq1+vbtq5KSEv9zPB6PqqqqtGTJEklSZmamcnNzFRoaqvnz59s1KgAAxnFYBr/Jev/+w7ZsNyGB18Bhtrq66GCPcE6OJlwf7BGA89Kjbptt2z7dIfQz7oGXl5efcaP/fmgcAAAEzhkDfvToj+eGfvvtt9q+fbtGjx4tSfrb3/6mYcOG2T8dAAA4pTMGfMaMGZKk+++/X2vWrPG/DWzGjBm8RxsAgCA6q7PQv//+e4WFhfm/DgsL0969e20bCgAAnNlZnYWelpamSZMmady4cXI4HHrrrbf8l0MFAACBd1YBf+SRRzRy5EjV19dLkubOnatf/OIXtg4GAABO76zfB37ttdeqb9++am//8epn33//va688krbBgMAAKd3VgHfuHGj5s2b57/06b59+zRo0CCtX7/e7vkAAMApnNVJbCUlJVqxYoUGDhyot99+W+Xl5brpppvsng0AAJzGWQXc5XKpd+/eOnnypE6ePKmbb75ZO3bssHs2AABwGmd1CP3yyy/X0aNHNWzYMD366KOKioqSyxWQW4kDAIBTOKtroR87dkzdunXTyZMntXbtWh0+fFhut1u9e/cOxIynxbXQgVPjWuhAYHW5a6H/S/fu3bV37141NDRowoQJ8ng88vl8F3RAAABw9s7qNfCVK1cqNzdXhYWFkqSmpiZNnz7d1sEAAMDpnVXAly1bpsrKSv+10AcOHKiWlhZbBwMAAKd3VgEPCwvrdC30jo4O2wYCAAD/2Vm9Bj58+HC9+uqr8nq9+vDDD7V8+XL/rUUBAEDgndUe+L/eOhYfH68VK1YoKSlJeXl5ds8GAABO46z2wJ1Op8aOHauxY8cqKirK7pkAAMB/cMaAW5alBQsW6PXXX/d/7XQ6dc8992jGjBkBGRAAAPzUGQ+hV1RUaMuWLVq9erU2bdqkuro6rVq1Slu3btVrr70WoBEBAMD/74wBr66u1vz58zVgwAD/sgEDBqi4uFjV1dW2DwcAAE7tjAHv6Og45WveUVFRvJUMAIAgOmPAQ0ND/0+PAQAAe53xJLadO3ee8r7flmXp+PHjtg0FAADO7IwB557fAAB0TWd1IRcAANC1EHAAAAxEwAEAMBABBwDAQAQcAAADEXAAAAxEwAEAMBABBwDAQLYF/PHHH1diYqLGjx/vX1ZaWqqRI0cqLS1NaWlpevfddyVJ9fX1crvdysjIUENDgySpra1NWVlZsizLrhEBADCWbQGfOHGiFi9e/JPl06ZNU01NjWpqapSUlCRJKi8vV2lpqfLz81VZWSlJWrhwobKzs+VwOOwaEQAAY9kW8OHDhysyMvKs1nW5XPJ6vfJ4PHK5XGpsbFRTU5MSEhLsGg8AAKOd8Vrodli2bJmqq6t17bXXqqCgQJGRkcrOzlZhYaHCw8NVXFysefPmadasWf9xWxER4XK5QmyY8oAN2wQCp1ev7sEe4ZwcDfYAwHkKxu9cQAM+efJk5eTkyOFwqKSkRM8//7zmzp2rIUOGaOXKlZKkzZs3Ky4uTpZlKS8vTy6XSwUFBYqJifnJ9o4caQ/k+IAxWluPBXsE4JJi5+9cbGzPUy4P6FnoMTExCgkJkdPp1N13363t27d3etyyLC1atEg5OTlasGCBZs6cqdTUVC1dujSQYwIA0OUFNODNzc3+z2trazV48OBOj1dVVSkpKUmRkZHyer1yOp1yOp3yeDyBHBMAgC7PtkPo+fn5qqur08GDBzVq1CjNnDlTdXV12rlzpySpX79+Kioq8q/v8XhUVVWlJUuWSJIyMzOVm5ur0NBQzZ8/364xAQAwksMy+I3W+/cftmW7CQmcxAaz1dVFB3uEc3I04fpgjwCclx5122zbdpd4DRwAAFwYBBwAAAMRcAAADETAAQAwEAEHAMBABBwAAAMRcAAADETAAQAwEAEHAMBABBwAAAMRcAAADETAAQAwEAEHAMBABBwAAAMRcAAADETAAQAwEAEHAMBABBwAAAMRcAAADETAAQAwEAEHAMBABBwAAAMRcAAADETAAQAwEAEHAMBABBwAAAMRcAAADETAAQAwEAEHAMBABBwAAAMRcAAADETAAQAwEAEHAMBABBwAAAPZFvDHH39ciYmJGj9+vH9Za2urMjMzlZycrMzMTB06dEiSVF9fL7fbrYyMDDU0NEiS2tralJWVJcuy7BoRAABj2RbwiRMnavHixZ2WlZWVKTExURs2bFBiYqLKysokSeXl5SotLVV+fr4qKyslSQsXLlR2drYcDoddIwIAYCzbAj58+HBFRkZ2WrZx40alp6dLktLT01VbWytJcrlc8nq98ng8crlcamxsVFNTkxISEuwaDwAAo7kC+c0OHDiguLg4SVJcXJxaWlokSdnZ2SosLFR4eLiKi4s1b948zZo16z9uLyIiXC5XiB2T2rBNIHB69eoe7BHOydFgDwCcp2D8zgU04KczZMgQrVy5UpK0efNmxcXFybIs5eXlyeVyqaCgQDExMT953pEj7YEeFTBCa+uxYI8AXFLs/J2Lje15yuUBPQs9Ojpazc3NkqTm5mZFRUV1etyyLC1atEg5OTlasGCBZs6cqdTUVC1dujSQYwIA0OUFNOCjR49WdXW1JKm6ulpjxozp9HhVVZWSkpIUGRkpr9crp9Mpp9Mpj8cTyDEBAOjybDuEnp+fr7q6Oh08eFCjRo3SzJkz9dBDDykvL0+rV69W3759VVJS4l/f4/GoqqpKS5YskSRlZmYqNzdXoaGhmj9/vl1jAgBgJIdl8But9+8/bMt2ExI4iQ1mq6uLDvYI5+RowvXBHgE4Lz3qttm27S7xGjgAALgwCDgAAAYi4AAAGIiAAwBgIAIOAICBCDgAAAYi4AAAGIiAAwBgIAIOAICBCDgAAAYi4AAAGIiAAwBgIAIOAICBCDgAAAYi4AAAGIiAAwBgIAIOAICBCDgAAAYi4AAAGIiAAwBgIAIOAICBCDgAAAYi4AAAGIiAAwBgIAIOAICBCDgAAAYi4AAAGIiAAwBgIAIOAICBCDgAAAYi4AAAGIiAAwBgIAIOAICBXMH4pqNHj1aPHj3kdDoVEhKiNWvWqLi4WO+9956GDBmiF154QZJUXV2tQ4cO6b777gvGmAAAdFlB2wOvqKhQTU2N1qxZo8OHD2vr1q1au3atfD6fdu3aJa/Xq6qqKk2ZMiVYIwIA0GV1iUPoDodDJ06ckGVZam9vl8vl0uLFizV16lSFhoYGezwAALqcoAU8KytLEydO1IoVKxQREaHk5GSlp6erf//+6tmzpz7//HONHTs2WOMBANClOSzLsgL9TZuamtSnTx8dOHBAmZmZevLJJzV8+HD/43PmzNGvf/1r/f3vf9cHH3ygq666Sjk5OT/ZjsdzXC5XyAWfLz7+uwu+TSCQvvxyQLBHOCd7438e7BGA89Lvy69t23Zo6Kk7F5ST2Pr06SNJio6O1rhx47Rt2zZ/wL/44gtJ0sCBA/W73/1Oy5Yt0+zZs7V7924NHDiw03aOHGkP6NyAKVpbjwV7BOCSYufvXGxsz1MuD/gh9GPHjunIkSP+zz/88EMNHjzY/3hJSYlyc3PV0dEhn8/345BOp7xeb6BHBQCgywr4HviBAwc0ffp0SZLP59P48eM1atQoSVJtba2uu+46/x76jTfeKLfbrfj4eF199dWBHhUAgC4rKK+BXyj79x+2ZbsJCQds2S4QKHV10cEe4ZwcTbg+2CMA56VH3Tbbtt1lDqEDAIDzR8ABADAQAQcAwEAEHAAAAxFwAAAMRMABADAQAQcAwEAEHAAAAxFwAAAMRMABADAQAQcAwEAEHAAAAxFwAAAMRMABADAQAQcAwEAEHAAAAxFwAAAMRMABADAQAQcAwEAEHAAAAxFwAAAMRMABADAQAQcAwEAEHAAAAxFwAAAMRMABADAQAQcAwEAEHAAAAxFwAAAMRMABADAQAQcAwEAEHAAAAxFwAAAMRMABADBQUAL+3nvv6fbbb9e4ceNUVlYmSSouLpbb7dZvf/tb/3rV1dWqqKgIxogAAHRpAQ+4z+dTUVGRFi9erPXr12vdunXauXOntm7dqrVr18rn82nXrl3yer2qqqrSlClTAj0iAABdnivQ33Dbtm362c9+pgEDBkiSUlJStHHjRp04cUKWZam9vV0ul0uLFy/W1KlTFRoaGugRAQDo8gIe8KamJl1xxRX+r/v06aNt27YpOTlZ6enpSkxMVM+ePfX5559rxowZZ9xWbGxPW2b89lt7tgvg1GK//TbYIwDGCXjALcv6yTKHw6EHH3xQDz74oCRpzpw5ys3N1apVq/TBBx/oqquuUk5OTqBHBQCgywr4a+BXXHGFfvjhB//XTU1NiouL83/9xRdfSJIGDhyo6upqlZSU6KuvvtLu3bsDPSoAAF1WwAN+3XXXaffu3fruu+90/PhxrV+/XqNHj/Y/XlJSotzcXHV0dMjn8/04pNMpr9cb6FEBAOiyAn4I3eVyqbCwUA888IB8Pp8yMjI0ePBgSVJtba2uu+469enTR5J04403yu12Kz4+XldffXWgRwUAoMtyWKd6URo4B0OGDFF8fLz/61deeUX9+/c/5bp79uzRww8/rHXr1gVqPOCidfDgQU2bNk2S9M9//lNOp1NRUVGSpFWrViksLCyI08FuAd8Dx8WnW7duqqmpCfYYwCWnd+/e/t+90tJSde/eXVlZWZ3WsSxLlmXJ6eTCmxcb/ovCFnv27NGUKVM0YcIETZgwQVu2bPnJOl999ZUmTZqktLQ0ud1u/4mKNTU1/uWFhYX+cyEAnJ2GhgaNHz9ehYWFmjBhgvbt26dhw4b5H1+/fr3mzJkj6cc99xkzZmjixImaNGmSPv3002CNjXPEHjjOm9frVVpamiSpf//+euWVVxQdHa3y8nKFh4dr9+7dys/P15o1azo9749//KPuvfdepaam6vjx4zp58qS++eYbvfnmm6qsrFRoaKiefvpprV27Vunp6cH40QBjff3113ruuedUVFSkjo6O06737LPP6oEHHtANN9zAS1yGIeA4b6c6hN7R0aGioiLt3LlTTqfzlG8DvOGGG/Tqq6/qhx9+UHJysgYOHKiPP/5Yn3/+uSZNmiTpx/85iI6ODsSPAVxU/uu//kvXX3/9f1zv448/1rf/diGdQ4cOyev1qlu3bnaOhwuAgMMWr732mmJiYlRTU6OTJ0+e8h8St9utoUOH6p133lFWVpaeffZZWZalCRMm6De/+U0QpgYuHpdddpn/c6fT2ekiWu3t7f7PLcvihDdD8Ro4bHH48GHFxsbK6XSqpqbmlK9jf/fddxowYIDuvfdejR49Wrt27VJiYqL++te/6sCBA5Kk1tZW7d27N9DjAxcVp9OpyMhI7d69WydPntRbb73lfywxMVHLly/3f71jx45gjIj/A/bAYYspU6Zo5syZ+stf/qIRI0aoe/fuP1nnz3/+s9544w25XC7FxMRo+vTp6tWrl/Ly8nT//ffr5MmTCg0NVWFhofr16xeEnwK4eDz66KN64IEH1LdvX/385z/X8ePHJUlPPfWUnn76af3pT3+Sz+fTiBEj9NRTTwV5WpwN3gcOAICBOIQOAICBCDgAAAYi4AAAGIiAAwBgIAIOAICBCDhwiVi0aJFSUlLkdruVlpamzz777Ly2t2nTJmVnZ1+g6QCcK94HDlwCtm7dqnfeeUdVVVUKCwtTS0uLTpw4EbR5Ojo65HLxzw9wPvgNAi4B+/fvV+/evf2Xy/zXPaNHjx6t8ePHa9OmTTpx4oSeeeYZvfjii2poaFBWVpYmT54sy7L0wgsv6P3335fD4dAjjzyiu+66q9P2t23bpsLCQpWWlio6OlrPPPOMvvzyS/l8Ps2YMUNjx47VmjVr9M477+j48eM6duyY/vCHPwT8zwG4mBBw4BJw66236pVXXtHtt9+uxMRE3XXXXUpISJAkXXHFFVqxYoWee+45FRQUqLKyUsePH1dKSoomT56sDRs2aOfOnaqpqdHBgwc1adKkTrem3LJli5599lktXLhQV155pV588UXdfPPNmjt3rtra2nT33XfrlltukSR9+umneuONN9SrV6+g/DkAFxMCDlwCevTooTVr1uiTTz7Rpk2bNHv2bP8NY8aMGSNJio+P17FjxxQRESFJCg8PV1tbm+rr65WSkqKQkBDFxMRo+PDh2r59uyIiIvTNN9+osLBQv//979WnTx9J0gcffKC3335bS5YskfTjjTP27dsn6cf/kSDewIVBwIFLREhIiEaMGKERI0YoPj5e1dXVkqTQ0FBJP97w4t/vSOV0OtXR0aEzXW05NjZW7e3t2rFjhz/gkvTyyy9r0KBBndb97LPPOt0hC8D54Sx04BLwj3/8o9M92Xfs2KErr7zyrJ47fPhwvfnmm/L5fGppadEnn3zivz3s5ZdfrrKyMr344ovatGmTJOmXv/ylXn/9dX/4v/jiiwv7wwCQRMCBS8KxY8dUUFCgu+66S263W998841mzJhxVs8c4UYWAAAAb0lEQVQdN26c4uPjlZaWpvvuu0+PPfaYYmNj/Y/HxMTo1VdfVVFRkT777DPl5OSoo6NDqampGj9+vEpKSuz6sYBLGncjAwDAQOyBAwBgIAIOAICBCDgAAAYi4AAAGIiAAwBgIAIOAICBCDgAAAb6fygaKPce1U3dAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "with sns.axes_style('darkgrid'):\n", + " fig, ax = plt.subplots(figsize=(7, 5), nrows=1, ncols=1)\n", + "\n", + " sns.barplot(data=df, x=\"Smoker\", y=\"Dead\", ax=ax, ci=95, palette=(\"blue\", \"red\")) # Note: in recent versions of seaborn, we should rather use errorbar=('ci', 95).\n", + "\n", + " ax.yaxis.set_major_formatter(FuncFormatter(lambda y, _: '{:.0%}'.format(y)))\n", + "\n", + " plt.tight_layout()\n", + " plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "At first, this is surprising: the mortality rate appears to be lower in the smoker group.\n", + "\n", + "To confirm this, let's perform a statistical test (this is actually equivalent to the confidence intervals suggested in the question). A simple t-test will do (although it is not the most appropriate here -- categorical data -- it is simple and it is not the focus of this exercise)." + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Ttest_indResult(statistic=-3.057733462432345, pvalue=0.002276255348902961)" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from scipy.stats import ttest_ind\n", + "\n", + "df_smoker = df[df[\"Smoker\"]]\n", + "df_nonsmoker = df[~df[\"Smoker\"]]\n", + "ttest_ind(df_smoker[\"Dead\"], df_nonsmoker[\"Dead\"], equal_var=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The p-value is less than 0.05, i.e. we reject (at significance level 95%) the null hypothesis that both smokers and non-smokers have the same mortality rate.\n", + "\n", + "**Conclusion**: the mortality rate among smokers is significantly lower than among nonsmokers." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Mortality rates among smokers vs. non-smokers by age class" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First, let us split the data frame in four age classes: 18-34, 35-54, 55-64, 65+. To this end, let us use a function." + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "metadata": {}, + "outputs": [], + "source": [ + "def get_age_class(age):\n", + " if age < 18:\n", + " return \"18-\" # To check that we do not have age < 18\n", + " elif age <= 34:\n", + " return \"18-34\"\n", + " elif age <= 54:\n", + " return \"35-54\"\n", + " elif age <= 64:\n", + " return \"55-64\"\n", + " else:\n", + " return \"65+\"\n", + "\n", + "\n", + "df[\"Age_class\"] = df[\"Age\"].apply(get_age_class) # Remark: this is kind of slow, but it is clear and it will do for this simple analysis.\n", + "age_classes = np.sort(df[\"Age_class\"].unique())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Check that nobody is younger than 18" + ] + }, + { + "cell_type": "code", + "execution_count": 84, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0 individual(s) younger than 18\n" + ] + } + ], + "source": [ + "print(\"{:.0f} individual(s) younger than 18\".format((df[\"Age_class\"] == \"18-\").sum()))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Display the mean mortality rates split by age class" + ] + }, + { + "cell_type": "code", + "execution_count": 85, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Dead
Age_classSmoker
18-34False2.7%
True2.8%
35-54False9.5%
True17.3%
55-64False33.1%
True44.3%
65+False85.5%
True85.7%
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(\n", + " HTML(\n", + " df.groupby(by=[\"Age_class\", \"Smoker\"]).mean()[[\"Dead\"]].to_html(formatters={\"Dead\": \"{:,.1%}\".format})\n", + " )\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Visualisation" + ] + }, + { + "cell_type": "code", + "execution_count": 88, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAfAAAAFgCAYAAABEyiulAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAIABJREFUeJzt3Wt8VNW9//HvTCYJBBBIIAQNPRRO9VCVHGIIpUfNn1vEQiQ22lM5WoyIVCohRkQuFQWpEZBiwEKNysUoKZea8AK8YALioUoJhFOKgFBJCCAmbcItYSaXyf4/oKaNAsaaPZMVPu8n25lZe81vZgFf156993JYlmUJAAAYxenvAgAAwDdHgAMAYCACHAAAAxHgAAAYiAAHAMBALn8X8G389a/n/F0CAAC26tq1w0WfZwYOAICBCHAAAAxEgAMAYCACHAAAAxHgAAAYiAAHAMBABDgAAAYiwAEAMBABDgCAgQjwv1u58lXde+9PNGbMT3X//aP18cf7vlV/hYW7NGVKajNVBwBAY0bfSrW57Nu3Vx9+uF3Llr2uoKAgnT59WnV1tX6rp66uTi4XQwMAuDRSQlJ5+d/UsWMnBQUFSZI6deokSbrrrgQNGzZchYW7VFdXpylTZuill17U8ePHNHr0fUpMvEuWZWnJkkXaseMPcjgcGjNmrIYMiW/U/4EDH2vevF9pzpx5Cg0N08KF83TkyKfyeuv0wAMP6ZZb/p/eemuDPvxwu2pqauTxuLVo0W99/j0AAMxBgEvq3/8HWr78Ff30pz9WTEyshgwZpn79bpIkhYd300svLdeiRQv07LNPa+nSV1VdXaP77vuJEhPv0rZtW3T48CdasSJbZ86c1oMP/kxRUdENff/5z3/SwoXzlZ7+a0VEROill36jm27qr+nTn9K5c+c0btwYxcQMkCR9/PGftXJltq66qqNfvgcAgDkIcEkhISF69dUs/elPe7Rnz2499dR0/fznj0iSbr75VklSr17/LrfbrZCQdgoJaaegoCCdO3dOe/f+n4YOvU0BAQEKDQ1Tv37ROnjwY4WEtFNxcZHmzfuVFi78jbp06SpJ2rlzh7Zv36bs7NclSTU11Sot/VyS1L//AMIbAFqAwsJd2rAhRwkJdyo6Osbf5VwUAf53AQEBio6OUXR0jHr16q23394kSQoMvHBY3el0KjAwsKG90+mU1+uVZV26z7CwLqqpqdGhQ580BLhlWfrVr+bpO9/p2ajt/v371KZNm+b9UACAf8natatUVHREHo+7xQY4Z6FLKikp1rFjJQ2PDx8+pIiIiCbt+5//2U9btrwnr9erU6dO6f/+b4/69LlektShQwfNn/+CXnrpNyos3CVJGjBgoNatWy3r78l/6NDBZv40AIBvy+32NNq2RMzAJZ0/79YLL8xXZeU5BQQE6JpremjKlBn68MPtX7vvrbcO0r59f9b9998jh8OhCRNSFBbWRUePFkuSQkPDNHfuQk2enKJp02bq/vvHKiNjgcaM+aksy1L37ldr3rwXbP6EAIDWxmFZlzsI3LL99a/n/F0CAKAVSk2doM8//0wREVfrhReW+LWWrl07XPR5DqEDAGAgAhwAAAMR4AAAGIgABwDAQAQ4AAAGIsABADBQq78OPDa2vFn727kz7Gvb3HprrHr1+veGx+npz6t796sv2vbkyc80ZUqqsrLWNFuNAIDWr9UHuD8EBwdrxYpV/i4DAFoME+4tbhoC3EdOnvxMzzwzUx6PW5L06KNTdOONUY3aHDnyqdLTZ6m2tk6WVa85c+apR4/v6N1339K6db9TbW2dvv/96/XYY1MVEBDgj48BAP8SE+4tbhoC3AbV1dW6//7RkqTu3a9Wevrz6tw5VAsX/kbBwcE6dqxETz89Q6++mtVov/Xrf6+7775H8fG3q7a2VvX1XhUXFyk//z0tXbpMLpdLzz//nDZvflu33z7SHx8NAP4lJtxb3DQEuA0udgi9rq5OCxfO1eHDh+R0BujYsaNf2e/66/vqtdeWqaysVHFxg9Wjx3e0e/dOffLJAT344M8kSdXVHnXu3NknnwMA0HIR4D6yevUb6tw5TCtWZKu+vl5DhvzXV9rExw/X9dffoA8/3K60tImaOvWXsixLt98+smF9cgAAJC4j85mqqkqFhXWR0+nUu+++Ja/X+5U2J04c19VXX6O77/6pbr75Vn366WHddFOs3n8/X6dOVUiSzp49o88/P+nr8gEALUyrn4E35bIvX7jzzrv1y19O0dateYqOjlHbtm2/0mbLlvf07rtvy+VyKTQ0TMnJD+qqqzpq3LiH9eijj8iy6hUQ4FJa2hOKiOjuh08BAGgpWE4UAGC7lrQ8Z1O0pHpZThQAgFaEAAcAwEAEOAAABiLAAQAwEAEOAICBCHAAAAzU6q8Dr4rt26z9tdu597KvnzlzWpMmTZAkVVSUy+l0qlOnC7c+ffnllQoMDGzWegAAV6ZWH+C+1rFjp4b7oL/66ktq2zZEo0ff16iNZVmyLEtOJwdAAODbau6JmiRZkZFSUJCskmJb+v+6yWBTEOA+cvz4MU2b9pj69v1P7d+/T+npC3T//ffonXfelyTl5b2rXbt2aurUJ1VRUa7nn39OpaWfy+l0aNKkx3XDDTf69wMAuGLExpY3e5+RkV4FBUklJd5m739rs/ZmDgLch4qLizR9+lN6/PHpqquru2S7F154XqNH/0w33HCjTp78TFOmpCora40PKwUAtHQEuA9dc02k+vS5/mvb7dq1UyUl/1hu9Ny5c6qu9ig4uI2d5QEADEKA+1CbNv9YwMTpdOqfb0NfU1PT8N+WZXHCGwDgsjiLyk+cTqc6dLhKx46VqL6+Xh988I9fcWJiYvXmm/84ZH748Cf+KBEA0IK1+hl4c5zpZ5eHH56oxx6bqG7dItSzZy/V1l6YhaelPaEFC9L11lsb5PV61a9fjB577Ak/VwsAaElYThQA0Ig9Z6FPV1BQqWpquun48Webte+tGtSs/UnS9MhIlQYFqVtNjZ49frzZ+/8mk0uWEwUAoBUhwAEAMBABDgCAgQhwAAAMRIADAGAgAhwAAAMR4AAAGMjWAF+xYoVGjBihkSNHKi0tTdXV1Tp9+rSSk5MVHx+v5ORknTlzRpK0e/duJSQkKCkpSUePXrgP+NmzZzV27FgZfKk6AECSZbVptMW3Z1uAl5aW6rXXXtPvf/97bdy4UV6vV5s2bVJmZqYGDhyozZs3a+DAgcrMzJQkLV++XIsXL1ZaWpqys7MlSUuWLNH48ePlcDjsKhMA4AMVFaPkdl+niopR/i6l1bB1Bu71euXxeFRXVyePx6Pw8HDl5+crMTFRkpSYmKi8vDxJksvlksfjkdvtlsvlUklJiUpLSxUbG2tniQAAH3C7o3Ty5BS53VH+LqXVsO1e6N26ddMDDzygQYMGKTg4WP/1X/+lm2++WeXl5QoPD5ckhYeHq6KiQpI0fvx4zZw5U8HBwZo/f77mzp2rSZMmXfY92rcPlssVYNdHAIArVPPfShWNdeoU8q37sC3Az5w5o/z8fOXn56tDhw6aNGmS1q9ff8n2ffr00Zo1F1bgKigoUHh4uCzLUmpqqlwul6ZOnaouXbo02qeystqu8gEAsM3p0+eb3Nbn90L/8MMPFRkZqdDQUAUGBio+Pl579uxRWFiYysrKJEllZWUKDQ1ttJ9lWVq6dKkmTJigF198URMnTtQdd9yhrKwsu0oFAMA4tgX41VdfrT/96U9yu92yLEsfffSRevfurcGDBys3N1eSlJubqyFDhjTaLycnR3FxcerYsaM8Ho+cTqecTqfcbrddpQIAYBzbDqFHRUXptttu05133imXy6U+ffrov//7v1VVVaXU1FStW7dO3bt3V0ZGRsM+brdbOTk5WrZsmSQpOTlZKSkpCgwM1IIFC+wqFQAA47AeOACgETvWA7cT64EDAABjEOAAABiIAAcAwEAEOAAABiLAAQAwEAEOAICBCHAAAAxEgAMAYCACHACAL2nz93uctWnB9zojwAEA+JJRFRW6zu3WqL8ved0S2XYvdAAATBXldiuqhS+ixQwcAAADEeAAABiIAAcAwEAEOAAABiLAAQAwEAEOAICBCHAAAAxEgAMAYCACHAAAAxHgAAAYiAAHAMBABDgAAAYiwAEAMBABDgCAgQhwAAAMRIADAGAgAhwAAAMR4AAAGIgABwDAQAQ4AAAGIsABADAQAQ4AgIEIcAAADESAAwBgIAIcAAADEeAAABiIAAcAwEAEOAAABiLAAQAwEAEOAICBCHAAAAxEgAMAYCACHAAAAxHgAAAYiAAHAMBABDgAAAYiwAEAMBABDgCAgQhwAAAMRIADAGAgAhwAAAMR4AAAGIgABwDAQLYG+NmzZ5WSkqLhw4fr9ttv1549e3T69GklJycrPj5eycnJOnPmjCRp9+7dSkhIUFJSko4ePdqw/9ixY2VZlp1lAgBgHFsD/Fe/+pVuueUWvfPOO1q/fr169+6tzMxMDRw4UJs3b9bAgQOVmZkpSVq+fLkWL16stLQ0ZWdnS5KWLFmi8ePHy+Fw2FkmAADGsS3AKysrVVBQoLvuukuSFBQUpKuuukr5+flKTEyUJCUmJiovL0+S5HK55PF45Ha75XK5VFJSotLSUsXGxtpVIgAAxnLZ1fGxY8cUGhqqadOm6eDBg7r++us1Y8YMlZeXKzw8XJIUHh6uiooKSdL48eM1c+ZMBQcHa/78+Zo7d64mTZp02fdo3z5YLleAXR8BAK5Q5f4uoNXr1CnkW/dhW4DX1dVp//79evLJJxUVFaU5c+Y0HC6/mD59+mjNmjWSpIKCAoWHh8uyLKWmpsrlcmnq1Knq0qVLo30qK6vtKh8AANucPn2+yW27du1w0edtO4QeERGhiIgIRUVFSZKGDx+u/fv3KywsTGVlZZKksrIyhYaGNtrPsiwtXbpUEyZM0IsvvqiJEyfqjjvuUFZWll2lAgBgHNsCvGvXroqIiNCRI0ckSR999JF69+6twYMHKzc3V5KUm5urIUOGNNovJydHcXFx6tixozwej5xOp5xOp9xut12lAgBgHNsOoUvSk08+qcmTJ6u2tlY9evRQenq66uvrlZqaqnXr1ql79+7KyMhoaO92u5WTk6Nly5ZJkpKTk5WSkqLAwEAtWLDAzlIBADCKwzL4Iuu//vWcv0sAgFYnNtask9i2apC/S/jG2u3c2+S2Pv8NHAAA2IcABwDAQAQ4AAAGIsABADAQAQ4AgIEIcAAADESAAwBgIAIcAAADEeAAABiIAAcAwEAEOAAABiLAAQAwEAEOADYqLNylWbNmqLBwl79LQStj63KiAHClW7t2lYqKjsjjcSs6Osbf5aAVYQYOADZyuz2NtkBzIcABADAQAQ4AgIEu+xv48uXLL7tzcnJysxYDAACa5rIBXlVVJUkqKirSn//8Zw0ePFiStHXrVsXEcDIGAAD+ctkAf+SRRyRJDzzwgN588021b9++4flJkybZXx0AALioJv0G/tlnnykoKKjhcVBQkE6cOGFbUQAA4PKadB34qFGjdNddd2nYsGFyOBx67733lJiYaHdtAADgEpoU4A8//LBuueUW7d69W5KUnp6u73//+7YWBgAALq3Jd2K74YYb1L17d1VXV0u6cFj96quvtq0wAABwaU0K8Pz8fM2dO1dlZWUKDQ3VyZMn1atXL23atMnu+gAAwEU06SS2jIwMrV69Wj179tSWLVu0fPlyRUdH210bAAC4hCYFuMvlUufOnVVfX6/6+nr94Ac/0IEDB+yuDQAAXEKTDqFfddVVqqqqUkxMjCZPnqzQ0FC5XCxkBgCAvzRpBr5kyRK1bdtW06dP1y233KLvfOc7Wrp0qd21AQCAS2jSNDokJEQnTpzQ0aNHdeedd8rtdsvr9dpdGwAAuIQmzcDXrFmjlJQUzZw5U5JUWlqqX/ziF7YWBgAALq1JAf7GG28oOzu74V7oPXv2VEVFha2FAQCAS2tSgAcFBTW6F3pdXZ1tBQEAgK/XpN/A+/fvr9/+9rfyeDz6wx/+oFWrVjUsLQoAAHyvSTPwLy4du/baa7V69WrFxcUpNTXV7toAAMAlNGkG7nQ6NXToUA0dOlShoaF21wQAAL7GZQPcsiy9+OKLev311xseO51O3XvvvXrkkUd8UiAAAPiqyx5CX7lypQoLC7Vu3Tr98Y9/1M6dO7V27Vrt2bNHK1as8FGJAADgyy4b4Lm5uVqwYIF69OjR8FyPHj00f/585ebm2l4cAAC4uMsGeF1d3UV/8w4NDeVSMgAA/OiyAR4YGPgvvQYAAOx12ZPYDh48eNF1vy3LUk1NjW1FAQCAy7tsgLPmNwAALROLegPA31XF9m32Pq3ISCkoSFZJcbP3327n3mbtD2Zp0p3YAABAy0KAAwBgIAIcAAADEeAAABiIAAcAwEAEOAAABiLAAQAwEAEOwBiFhbs0a9YMFRbu8ncpgN9xIxcAxli7dpWKio7I43ErOjrG3+UAfmX7DNzr9SoxMVHjx4+XJJ0+fVrJycmKj49XcnKyzpw5I0navXu3EhISlJSUpKNHj0qSzp49q7Fjx8qyLLvLBGAAt9vTaAtcyWwP8Ndee029e/dueJyZmamBAwdq8+bNGjhwoDIzMyVJy5cv1+LFi5WWlqbs7GxJ0pIlSzR+/Hg5HA67ywQAwCi2Bvjnn3+u999/X3fddVfDc/n5+UpMTJQkJSYmKi8vT5Lkcrnk8XjkdrvlcrlUUlKi0tJSxcbG2lkiAABGsvU38GeffVaPP/64qqqqGp4rLy9XeHi4JCk8PFwVFRWSpPHjx2vmzJkKDg7W/PnzNXfuXE2aNMnO8gAAMJZtAb5161aFhobqhhtu0B//+Mevbd+nTx+tWbNGklRQUKDw8HBZlqXU1FS5XC5NnTpVXbp0abRP+/bBcrkCbKkfQMsTEOBo2HbqFNLs/Vd9fZMWxY7v4IJym/rFF5pj7GwL8MLCQm3ZskUffPCBqqurVVlZqcmTJyssLExlZWUKDw9XWVmZQkNDG+1nWZaWLl2qhQsXavbs2Zo4caJOnDihrKwsPfroo43aVlZW21U+gBbI67UatqdPn/dzNf7Hd2CubzJ2Xbt2uOjztv0G/thjj+mDDz7Qli1b9Otf/1o/+MEP9Pzzz2vw4MHKzc2VJOXm5mrIkCGN9svJyVFcXJw6duwoj8cjp9Mpp9Mpt9ttV6kAABjH59eBP/TQQ0pNTdW6devUvXt3ZWRkNLzmdruVk5OjZcuWSZKSk5OVkpKiwMBALViwwNelAgDQYvkkwAcMGKABAwZIkjp37qyVK1detF3btm2VlZXV8DgmJkYbNmzwRYkAABiFW6kCAGAgAhwAAAMR4AAAGIgABwDAQAQ4AAAGIsABADAQAQ4AgIEIcAAADESAA4CN2lhWoy3QXAhwALDRqIoKXed2a9Tfl04GmovP74UOAFeSKLdbUSzGBBswAwcAwEAEOAAABiLAAQAwEAEOAICBCHAAAAzEWegAbBEbW97sfUZGehUUJJWUeG3pf2uz9wjYhxk4AAAGIsABADAQAQ4AgIEIcAAADESAAwBgIAIcAAADEeAAABiIAAcAwEAEOAAABiLAAQAwEAEOAICBCHAAAAxEgAMAYCACHAAAAxHgAAAYiAAHAMBABDgAAAYiwAEAMBABDgCAgQhwAAAMRIADAGAgAhwAAAMR4AAAGIgABwDAQAQ4AAAGIsABADAQAQ4AgIEIcAAADESAAzCGZbVptAWuZAQ4AGNUVIyS232dKipG+bsUwO9c/i4AAJrK7Y6S2x3l7zKAFoEZOAAABiLAAQMUFu7SrFkzVFi4y9+lAGghOIQOGGDt2lUqKjoij8et6OgYf5cDoAVgBg4YwO32NNoCAAEOAICBCHAAAAxkW4CfPHlS9913n26//XaNGDFCK1eulCSdPn1aycnJio+PV3Jyss6cOSNJ2r17txISEpSUlKSjR49Kks6ePauxY8fKsiy7ygQAwEi2BXhAQICmTp2qt99+W6tXr9aqVav0l7/8RZmZmRo4cKA2b96sgQMHKjMzU5K0fPlyLV68WGlpacrOzpYkLVmyROPHj5fD4bCrTAAAjGRbgIeHh+v666+XJLVv3169evVSaWmp8vPzlZiYKElKTExUXl6eJMnlcsnj8cjtdsvlcqmkpESlpaWKjY21q0QAAIzlk8vIjh8/rgMHDigqKkrl5eUKDw+XdCHkKyoqJEnjx4/XzJkzFRwcrPnz52vu3LmaNGnSZftt3z5YLleA7fUD/hYQ4GjYduoU4udqmqrc3wW0evb9WWDs7NYcY2d7gFdVVSklJUXTp09X+/btL9muT58+WrNmjSSpoKBA4eHhsixLqampcrlcmjp1qrp06dJon8rKaltrB1oKr9dq2J4+fd7P1aCl4M+Cub7J2HXt2uGiz9t6Fnptba1SUlKUkJCg+Ph4SVJYWJjKysokSWVlZQoNDW20j2VZWrp0qSZMmKAXX3xREydO1B133KGsrCw7SwUAwCi2BbhlWZoxY4Z69eql5OTkhucHDx6s3NxcSVJubq6GDBnSaL+cnBzFxcWpY8eO8ng8cjqdcjqdcrvddpUKAIBxbDuEvnv3bq1fv17XXnutRo26sPRfWlqaHnroIaWmpmrdunXq3r27MjIyGvZxu93KycnRsmXLJEnJyclKSUlRYGCgFixYYFepAAAYx7YAj4mJ0SeffHLR1764JvzL2rZt2+hQeUxMjDZs2GBLfQAAmIw7sQEAYCACHAAAAxHgAAAYiPXAgWZWFdu32fu0IiOloCBZJcXN3n+7nXubtT8AvsEMHAAAAxHgAAAYiAAHAMBABDgAAAYiwAEAMBABDgCAgQhwAAAMRIADAGAgAhwAAAMR4AAAGIgAxxWpsHCXZs2aocLCXf4uBQD+JdwLHVektWtXqajoiDwet6KjY/xdDgB8Y8zAcUVyuz2NtgBgGgIcAAADEeCAAdpYVqMtABDggAFGVVToOrdboyoq/F0KgBaCk9gAA0S53Ypyu/1dBoAWhBk4AAAGIsABADAQAQ4AgIEIcAAADESAAwBgIAIcAAADcRkZWrzY2PJm7zMy0qugIKmkxNvs/W9t1t4A4OKYgQMAYCACHAAAAxHgAAAYiAAHAMBABDgAAAYiwAEAMBABDgCAgQhwAAAMRIADAGAgAhwAAAMR4LgiWVabRlsAMA0BfgUpLNylWbNmqLBwl79L8buKilFyu69TRcUof5cCAP8SFjO5gqxdu0pFRUfk8bgVHR3j73L8yu2Oktsd5e8yAOBfxgz8WzBtRut2exptAQDmYgb+LTCjBQD4CzPwb4EZLQDAXwhwAAAMRIADAGAgAhwAAAMR4AAAGIgABwDAQFfMZWSxseXN3mdkpFdBQVJJibfZ+9+qQc3anyRZkZFSUJCskmJVxfZt1r7b7dzbrP0BAC6PGTgAAAYiwAEAMBABDgCAgQhwAAAM5JcA/+CDD3Tbbbdp2LBhyszMlCTNnz9fCQkJmjJlSkO73NxcrVy50h8lAgDQovk8wL1er2bPnq1XXnlFmzZt0saNG3Xw4EHt2bNHGzZskNfr1SeffCKPx6OcnByNHj3a1yU2mWW1abQFAMBXfB7ge/fu1b/927+pR48eCgoK0ogRI5Sfn6/a2lpZlqXq6mq5XC698soruu+++xQYGOjrEpusomKU3O7rVFExyt+lNEkby2q0BQCYy+fXgZeWlioiIqLhcbdu3bR3717Fx8crMTFRAwcOVIcOHbRv3z498sgjl+2ra9cOTX7foqKmt226npLsCu+iZu9xWbP36Bv2jJ2dmn/sTGTeuEmM3QXmjd2VOW4+D3DrIrM/h8OhcePGady4cZKkGTNmKCUlRWvXrtX27dt13XXXacKECb4uFQCAFsvnh9AjIiL0+eefNzwuLS1VeHh4w+P9+/dLknr27Knc3FxlZGTo8OHDKi4u9nWpAAC0WD4P8BtvvFHFxcU6duyYampqtGnTJg0ePLjh9YyMDKWkpKiurk5er/dCkU6nPB6Pr0sFAKDF8vkhdJfLpZkzZ+rBBx+U1+tVUlKSvve970mS8vLydOONN6pbt26SpH79+ikhIUHXXnut/uM//sPXpQIA0GI5rIv9KI1mNW3aNL3//vsKCwvTxo0bJUkHDhzQU089perqagUEBOjpp59W375fXWDkhRdeUH5+vpxOp8LCwpSent7wPziS9Nlnn2nEiBF65JFHNHbsWJ99pitBdXW1/ud//kc1NTXyer267bbblJKSosWLF2vNmjUKDQ2VJKWlpSkuLu4r+39dO8bOXoMHD1a7du3kdDoVEBCgN998s8ljJ0lZWVl6/fXX5XK5FBcX1+geFYyd75w9e1a//OUvdejQITkcDj377LPavn17k8exNSPAfaCgoEAhISF64oknGgL8gQce0JgxYxQXF6dt27bplVdeUVZW1lf2raysVPv27SVJr732mv7yl79o9uzZDa9PnDhRDodDUVFR/EPSzCzL0vnz59WuXTvV1tZq9OjRmjFjhv73f/9XISEhX/t9L168+LLtGDt7DR48WOvWrWv4R176+jH5wo4dO/Tb3/5WmZmZCgoKUnl5ucLCwhpeZ+x854knnlBMTIzuvvtu1dTUyOPxaOXKlV87josXL9Y111yjH//4xz6s1reumOVE/al///46fvx4o+ccDoeqqqokSefOnWt0It8/+yK8JcntdsvhcDQ8zsvLU2RkpEJCQmyoGg6HQ+3atZMk1dXVqa6urtH3/20wdi1bdna2HnroIQUFBUlSo/Bm7HynsrJSBQUFeu655yRJQUFBDWMC7oXuN9OnT9e8efMUFxenuXPnKi0t7ZJtFy5cqLi4OG3YsEGTJk2SJJ0/f14vv/zy114rj2/H6/Vq1KhR+uEPf6gf/vCHioqKkiS98cYbSkhI0LRp03TmzJlL7n+xdoyd74wdO1Y//vGPtXr16obnmjJ2xcXF2rVrl+6++27de++92rv3wnr3jJ1vHTt2TKGhoZo2bZoSExM1Y8YMnT9/XlLT/w62ZgS4n2RnZ2vatGnatm2bpk2bphkzZlyy7aOPPqpt27YpISFBr7+IdrW/AAAGzUlEQVT+uqQLh4fGjBnTMEOEPQICArR+/Xpt27ZNe/fu1aFDh3TPPffovffe0/r16xUeHt4wO/iyS7Vj7HwjOztbOTk5evnll/XGG2+ooKCgyWPn9Xp19uxZrVmzRlOmTFFqaqosy2LsfKyurk779+/XPffco9zcXLVt21aZmZmXHMdPPvlEo0aN0qhRo/S73/1OixYtanh86tQpP38aG1jwiWPHjlkjRoxoeBwdHW3V19dblmVZ9fX1Vr9+/SzLsqypU6dad9xxh/Xggw9+pY/jx4839HHPPfdYgwYNsgYNGmTddNNNVv/+/a2srCwffJIr1+LFi61XXnml0XP/PK6XG7t/bsfY+d6iRYu+0dg98MAD1o4dOxraDhkyxCovL2fsfKysrMwaNGhQw+OCggJr3Lhxjdp8+d/WLyxatMj6/e9/b3uN/sRv4H4SHh6unTt3asCAAdqxY4d69uwpSUpPT2/Urri4uOG1LVu2qFevXpKkVatWNbT54sSce++91ye1XykqKirkcrl01VVXyePx6MMPP9S4ceNUVlbWcM5CXl5ew2WQXx67S7Vj7Ox3/vx51dfXq3379jp//rz+8Ic/aMKECU0eu6FDh2rHjh0aMGCAioqKVFtbq86dOzN2Pta1a1dFREToyJEj6tWrlz766CP17t37kuN4pSHAfSAtLU07d+7UqVOndOutt2rixIl65pln9Oyzz6qurk7BwcGNziz/ZwsWLFBRUZEcDoeuueYazZo1y8fVX7nKyso0depUeb1eWZal4cOHa9CgQXr88cd18OBBSdI111xzybGbP39+k9qh+ZWXl+sXv/iFpAuHw0eOHKlbb721yWOXlJSk6dOna+TIkQoMDNRzzz3XbCcw4pt58sknNXnyZNXW1qpHjx5KT0/XnDlz+LslLiMDAMBInMQGAICBCHAAAAxEgAMAYCACHAAAAxHgAAAYiAAHAMBABDjQCr333nu67rrr9Omnn9r+XsePH9fIkSNtfx8AjRHgQCu0ceNG3XTTTXrrrbf8XQoAm3AnNqCVqaqqUmFhoV577TU9/PDDmjhxourr6zV79mwVFBQoMjJS9fX1SkpK0vDhw7Vv3z4999xzOn/+vDp37qz09PRLLm979OhRPfXUU6qoqFBAQIAyMjLkdP5jHnD8+HFNmTJFbrdb0oW7aEVHR6usrEyPPvqoKisr5fV69fTTT6tfv36aMWOG9u3bJ4fDoaSkJN1///2++IqAVoEAB1qZvLw83XLLLfrud7+rTp066eOPP9axY8d04sQJbdiwQeXl5frRj36kpKQk1dbWas6cOVqyZIlCQ0P11ltvaeHChV+5N/gXJk+erIceekjDhg1TdXW16uvrVV5e3vB6WFiYli9fruDgYBUXFystLU1vvvmmNm7cqJtvvlkPP/ywvF6v3G63Dhw4oNLSUm3cuFGSdPbsWZ98P0BrQYADrcymTZs0ZswYSdKPfvQjbdy4UXV1dRo+fLicTqe6du2qAQMGSJKKiop06NAhJScnS5Lq6+vVtWvXi/ZbWVmp0tJSDRs2TJIUHBz8lTZ1dXWaPXu2Dh48KKfTqeLiYknSjTfeqOnTp6uurk5Dhw5Vnz591KNHDx07dkzPPPOM4uLidPPNNzf3VwG0agQ40IqcOnVKO3bs0OHDh+VwOOT1euVwODR06NCLtrcsS9/73ve0evXqZnn/FStWqEuXLlq/fr3q6+vVt29fSVL//v31+uuva9u2bZoyZYrGjh2rxMRErV+/Xtu3b9eqVav09ttvX3LmD+CrOIkNaEXeffddJSYmauvWrdqyZYu2bdumyMhIde7cWZs3b1Z9fb3+9re/aefOnZKk7373u6qoqNCePXskSbW1tTp8+PBF+27fvr0iIiKUl5cnSaqpqWn4rfsL586dU9euXeV0OrV+/Xp5vV5J0okTJxQWFqaf/OQnSkpK0scff6yKigpZlqXbbrtNkyZN0v79++36WoBWiRk40Ips2rRJ48aNa/RcfHy8Pv30U3Xr1k0jR45Uz5491bdvX3Xo0EFBQUFatGiR5syZo3Pnzsnr9WrMmDGXXF953rx5mjlzpjIyMhQYGKiMjIxGy2yOHj1aEydO1DvvvKMBAwYoJCREkrRz5069+uqrcrlcCgkJ0dy5c1VWVqZp06apvr5e0oVldwE0HcuJAleIqqoqtWvXTqdOndLdd9+t7OzsS/7eDaDlYwYOXCF+/vOf6+zZs6qtrdWECRMIb8BwzMABfMWsWbNUWFjY6Lmf/exnSkpK8lNFAL6MAAcAwECchQ4AgIEIcAAADESAAwBgIAIcAAAD/X8D76BxHgckHwAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "with sns.axes_style('darkgrid'):\n", + " fig, ax = plt.subplots(figsize=(7, 5), nrows=1, ncols=1)\n", + "\n", + " sns.barplot(data=df, x=\"Age_class\", y=\"Dead\", hue=\"Smoker\", ax=ax, ci=95, palette=(\"blue\", \"red\"), order=tuple(age_classes)) # Note: in recent versions of seaborn, we should rather use errorbar=('ci', 95).\n", + "\n", + " ax.yaxis.set_major_formatter(FuncFormatter(lambda y, _: '{:.0%}'.format(y)))\n", + "\n", + " plt.tight_layout()\n", + " plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Redo the statistical tests" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Ttest_indResult(statistic=0.013780528783028018, pvalue=0.9890122465905076)\n", + "Ttest_indResult(statistic=1.783075769977916, pvalue=0.07588466008529735)\n", + "Ttest_indResult(statistic=2.4009857289248324, pvalue=0.01677328896330688)\n", + "Ttest_indResult(statistic=0.03927297913991199, pvalue=0.9687782291030265)\n" + ] + } + ], + "source": [ + "df_smoker = df[df[\"Smoker\"]]\n", + "df_nonsmoker = df[~df[\"Smoker\"]]\n", + "\n", + "for age_class in age_classes:\n", + " df_smoker_age_class = df_smoker[df_smoker[\"Age_class\"] == age_class]\n", + " df_nonsmoker_age_class = df_nonsmoker[df_nonsmoker[\"Age_class\"] == age_class]\n", + " print(ttest_ind(df_smoker_age_class[\"Dead\"], df_nonsmoker_age_class[\"Dead\"], equal_var=False))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Conclusion**: when inspecting by age class, the conclusion is reversed, i.e. within each age class, the mortality rate is higher among smokers. \n", + "\n", + "Note that this increase of mortality rate is not significant for the youngest and oldest age classes ($p\\gg0.05$). This is intuitive: tobacco-related health conditions take many years to develop, therefore they do not significantly impact the mortality rate of young people. On the other hand, the elderly suffer from many possible health threats that are unrelated to their smoking status, which tend to erase disparities between smokers and non-smokers. \n", + "\n", + "For the other two age classes (35-54 and 55-64), the increases of mortality rates are respectively weakly significant ($p=0.076$) and significant ($p=0.017$). In particular, this last class comprises aging people for which a lifetime of smoking may is likely to have had the time to trigger tobacco-related conditions." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Why is it seemingly contradictory with the age-agnostic analysis of question 1?\n", + "\n", + "In this study, $Age$ is a typical *confounding* variable. In simple terms, $Age$ conditions the distributions of the $Smoker$ variable. In the figure below, we report the percentage of smokers per age class." + ] + }, + { + "cell_type": "code", + "execution_count": 89, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAfAAAAFgCAYAAABEyiulAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAIABJREFUeJzt3X9YlHW+//HXDAOkoijIqCe9Srtq1+85abam0VasmD92FSXZTqe2tiZSNgtEtgykKF03M4+1iJd2sR49RitlrsBiv1x01356RO1cbmm/Nn/XMi2IiM4oDPf3j67mLCk6tt0zfPT5uK4u5Oa+b97wyZ7NzXCPw7IsSwAAwCjOSA8AAADOHQEHAMBABBwAAAMRcAAADETAAQAwkCvSA/wzvvzyaKRHAADAVklJ3U+7nUfgAAAYiIADAGAgAg4AgIEIOAAABiLgAAAYiIADAGAgWwPe1NSknJwcjR8/Xj/+8Y/13nvvqbGxUR6PR2PHjpXH49GRI0ckSdu3b1daWpoyMjK0b9++4PGZmZni9VYAAGjP1oD/+te/1g033KDXXntNVVVVuuyyy1RaWqrk5GRt2LBBycnJKi0tlSStXLlSJSUlysvLU3l5uSRp6dKlysrKksPhsHNMAACMY1vAm5ubVVtbq5/+9KeSpJiYGPXo0UMbN25Uenq6JCk9PV01NTWSJJfLJb/fL5/PJ5fLpf3796uurk4jRoywa0QAAIxl253YDhw4oISEBBUUFOjDDz/Uv/7rv6qwsFD19fVyu92SJLfbrYaGBklSVlaWioqKFBsbq4ULF2rBggWaMWPGGT9HXFysXK4ou74EAAA6LdsC3traql27dunRRx/V0KFDNW/evODl8tMZPHiw1qxZI0mqra2V2+2WZVnKzc2Vy+VSfn6+evfu3e6Y5uYTdo0PAECnEPZbqfbt21d9+/bV0KFDJUnjx4/Xrl27lJiYKK/XK0nyer1KSEhod5xlWVq2bJmmT5+uJUuWKDs7W5MmTVJZWZldowIAYBzbAp6UlKS+ffvqs88+kyS9++67uuyyy5SamqrKykpJUmVlpUaPHt3uuIqKCqWkpCg+Pl5+v19Op1NOp1M+n8+uUQEAMI7DsvF3tHbv3q3CwkK1tLRowIABmj9/vtra2pSbm6svvvhC/fr1U3FxsXr27ClJ8vl8mjZtmlasWKHo6Ght27ZNc+bMUXR0tBYtWqSBAwe2Oz+vRobOZseObaqurlBa2s26+urhkR4HwHmgo0votgbcbgQcnU1BQZ727PlMAwcO0vz5T0d6HADnAV5OFAgDn8/f7i0A2IWAAwBgIAIOAICBCDgAAAYi4AAAGIiAAwBgIAIOAICBCDgAAAYi4AAAGIiAAwBgIAIOAICBCDgAAAYi4AAAGIiAAwBgIAIOAICBCDgAAAYi4AAAGIiAAwBgIAIOAICBCDgAAAYi4AAgaceObZozp1A7dmyL9ChASFyRHgAAOoOXXlqtPXs+k9/v09VXD4/0OMBZEXB0er5l10V6hJBZRxIkuWQd2W/M3F3ueyfSI3QKPp+/3Vugs+MSOgAABiLgAAAYiIADAGAgAg4AgIEIOAAABiLgAAAYiIADAGAgAg4AgIEIOAAABiLgAAAYiIADAGAgAg4AgIEIOAAABiLgAAAYiIADAGAgAg4AgIEIOAAABiLgAAAYyNaAp6amKi0tTZMnT9aUKVMkSY2NjfJ4PBo7dqw8Ho+OHDkiSdq+fbvS0tKUkZGhffv2SZKampqUmZkpy7LsHBMAAOPY/gh81apVqqqq0rp16yRJpaWlSk5O1oYNG5ScnKzS0lJJ0sqVK1VSUqK8vDyVl5dLkpYuXaqsrCw5HA67x+xUduzYpjlzCrVjx7ZIjwIA6KTCfgl948aNSk9PlySlp6erpqZGkuRyueT3++Xz+eRyubR//37V1dVpxIgR4R4x4l56abV27/5AL720OtKjAAA6KZfdnyAzM1MOh0O33nqrbr31VtXX18vtdkuS3G63GhoaJElZWVkqKipSbGysFi5cqAULFmjGjBlnPHdcXKxcrii7v4SwO3nyRPBtz55dIzxN5PkiPcB5jn/HvhIV5Qi+5XsCE9ga8PLycvXp00f19fXyeDwaNGhQh/sOHjxYa9askSTV1tbK7XbLsizl5ubK5XIpPz9fvXv3bndMc/MJO8ePmEDACr5tbDwe4WlwLi6Kstq9NQH/jn2Fv3forJKSup92u62X0Pv06SNJSkxM1JgxY7Rz504lJibK6/VKkrxerxISEtodY1mWli1bpunTp2vJkiXKzs7WpEmTVFZWZueowHfi5kuP6fvxJ3XzpcciPQqA85xtAT9+/Liam5uDf3777bd1+eWXKzU1VZWVlZKkyspKjR49ut1xFRUVSklJUXx8vPx+v5xOp5xOp3w+LqSi8xuaeFIPX9WooYknIz0KgPOcbZfQ6+vrdf/990uSAoGAJk6cqBtvvFFXXnmlcnNztXbtWvXr10/FxcXBY3w+nyoqKrRixQpJksfjUU5OjqKjo7Vo0SK7RgUAwDi2BXzAgAH6wx/+cMr2Xr16adWqVac9pkuXLu0ulQ8fPlzV1dV2jQgAgLFsfxY6gAtT+gvjIz3COYk5GiOnnPr86EFjZq/8j9ciPQIiiFupAgBgIAIOAICBCDgAAAYi4AAAGIiAAwBgIAIOAICBCDgAAAYi4AAAGIiAAwBgIAIOAICBCDgAAAYi4AAAGIiAAwBgIAIOAICBCDgAAAYi4AAAGIiAAwBgIFekBwiX8U9VRHqEkHU/3KwoSYcONxsz92uzbo70CABwQeEROAAABiLgAAAYiIADAGAgAg4AgIEIOAAABiLgAAAYiIADAGAgAg4AgIEIOAAABiLgAAAYiIADAGAgAg4AgIEIOAAABiLgACD932szXjCv0QjTEXAAkNQ6uFWB3gG1Dm6N9ChASPh/TQCQ1Na3TW192yI9BhAyHoEDAGAgAg4AgIEIOAAABiLgAAAYiIADAGAgAg4AgIFsD3ggEFB6erqysrIkSY2NjfJ4PBo7dqw8Ho+OHDkiSdq+fbvS0tKUkZGhffv2SZKampqUmZkpy7LsHhMAAKPYHvDnnntOl112WfD90tJSJScna8OGDUpOTlZpaakkaeXKlSopKVFeXp7Ky8slSUuXLlVWVpYcDofdY3YqltPV7i0AAN9ka8D/9re/6c9//rN++tOfBrdt3LhR6enpkqT09HTV1NRIklwul/x+v3w+n1wul/bv36+6ujqNGDHCzhE7Jf+/DFNLXF/5/2VYpEcBAHRStj7Ee+KJJ/TQQw/p2LFjwW319fVyu92SJLfbrYaGBklSVlaWioqKFBsbq4ULF2rBggWaMWPGGc8fFxcrlyvKvi8gQlrj+6s1vn+kxzgnPXt2te3cPtvODMnetYO9WLsLm20B/9Of/qSEhAT927/9m/7nf/7nrPsPHjxYa9askSTV1tbK7XbLsizl5ubK5XIpPz9fvXv3bndMc/MJW2bHuWtsPB7pEfAtsXbmYu0uDElJ3U+73baA79ixQ5s2bdIbb7yhEydOqLm5WQ8++KASExPl9Xrldrvl9XqVkJDQ7jjLsrRs2TI988wzmjt3rrKzs3Xo0CGVlZVp5syZdo0LAIBRbPsZ+C9/+Uu98cYb2rRpk55++mlde+21+s///E+lpqaqsrJSklRZWanRo0e3O66iokIpKSmKj4+X3++X0+mU0+mUz8eFVAAAvhb2pzlPmzZNubm5Wrt2rfr166fi4uLgx3w+nyoqKrRixQpJksfjUU5OjqKjo7Vo0aJwjwoAQKcVloCPHDlSI0eOlCT16tVLq1atOu1+Xbp0UVlZWfD94cOHq7q6OhwjAgBgFO7EBgCAgQg4AAAGIuAAABiIgAMAYCACDgCAgQg4AAAGIuAAABiIgAMAYCACDgCAgQg4AAAGIuAAABiIgAMAYCACDgCAgQg4AAAGIuAAABiIgAMAYCACDgCAgQg4AAAGIuAAABiIgAMAYCACDgCAgQg4AAAGIuAAABjorAEPBAK6++67wzAKAAAI1VkDHhUVpYsuukhHjx4NxzwAACAErlB2io2NVVpamq677jp17do1uP2RRx6xbTAAANCxkAL+ox/9SD/60Y9sHgUAAIQqpIDffPPN8vv9+vzzzzVo0CC7ZwIAAGcR0rPQN23apMmTJ+vee++VJO3evVu/+MUvbB0MAAB0LKSAL1myRGvXrlWPHj0kSYMHD9ahQ4dsHQwAAHQspIBHRUWpe/fuds8CAABCFNLPwC+//HJVV1crEAho7969Kisr07Bhw+yeDQAAdCCkR+CPPvqoPv30U8XExOiXv/yl4uLiVFhYaPdsAACgAyE9Aq+vr9fMmTM1c+bM4LadO3dqyJAhtg0GAAA6FtIj8OzsbNXV1QXfr62t5RE4AAARFFLA58yZo+nTp+vLL7/U5s2bNW/ePJWWlto9GwAA6EBIl9CHDBmiRx55RPfcc49iY2O1cuVKJSQk2D0bAADowBkD/s2btfj9fnXv3l2zZ8+WJD377LP2TQYAADp0xoDfc8894ZoDAACcgzMGfMSIEcE///3vf9df/vIXSV9dUk9MTLR3MgAA0KGQnsT2yiuv6JZbbtFrr72mV199NfhnAAAQGSE9ie3ZZ5/V2rVrg4+6GxoadPfdd2v8+PEdHnPixAn97Gc/08mTJxUIBDRu3Djl5OSosbFRM2fO1KFDh3TxxRfrN7/5jeLj47V9+3Y9/vjjiomJ0dNPP61LLrlETU1NmjlzppYvXy6Hw/HdfMUAAJwHQnoEbllWu0vmPXv2lGVZZzwmJiZGq1at0h/+8AdVVlbqzTff1P/+7/+qtLRUycnJ2rBhg5KTk4O/jrZy5UqVlJQoLy9P5eXlkqSlS5cqKyuLeAMA8A0hBfz6669XZmam1q1bp3Xr1mnatGm64YYbzniMw+FQt27dJEmtra1qbW2Vw+HQxo0blZ6eLklKT09XTU2NJMnlcsnv98vn88nlcmn//v2qq6tr93N4AADwlZAuoT/88MN6/fXXtWPHDlmWpVtvvVVjxow563GBQEBTpkzR/v37dfvtt2vo0KGqr6+X2+2WJLndbjU0NEiSsrKyVFRUpNjYWC1cuFALFizQjBkzznj+uLhYuVxRoXwJsFnPnl1tO7fPtjNDsnftYC/W7sIWUsAlady4cfrhD3+o1tZWSVJjY6N69ux5xmOioqJUVVWlpqYm3X///fr444873Hfw4MFas2aNpK9u1ep2u2VZlnJzc+VyuZSfn6/evXu3O6a5+USo48NmjY3HIz0CviXWzlys3YUhKen0L+cdUsBfeOEFLV68WBdddJEcDocsywpeDg9Fjx49NHLkSL355ptKTEyU1+uV2+2W1+s95Y5ulmVp2bJleuaZZzR37lxlZ2fr0KFDKisra/diKgAAXMhCCviKFSu0fv36c7p9akNDg1wul3r06CG/36933nlHU6dOVWpqqiorKzVt2jRVVlZq9OjR7Y6rqKhQSkqK4uPj5ff75XQ65XQ65fNxIRUAgK+FFPABAwaoS5cu53Rir9er/Px8BQIBWZal8ePHa9SoUbrqqquUm5urtWvXql+/fiouLg4e4/P5VFFRoRUrVkiSPB6PcnJyFB0drUWLFp3T5wcA4HzmsM72+2CSdu3apYKCAg0dOlQxMTHB7Y888oitw53Nl18eDXnf8U9V2DgJXpt1s23n9i27zrZzQ+py3zu2nDf9hY7vE4HvRuV/cEOtC8E/9TPwoqIiXXvttbriiivkdIb0m2cAAMBGIQXc5XKpoKDA7lkAAECIQno4PXLkSL344ovyer1qbGwM/gMAACIjpEfg1dXVkhS87enXQv01MgAA8N06Y8B37typfv36adOmTZK++hWv119/Xf3799cDDzwQlgEBAMCpzngJ/bHHHlN0dLSkr+6OtmjRIt18882Ki4tTUVFRWAYEAACnOmPAA4FA8Hapr7zyim699VaNGzdOubm52rdvX1gGBAAApzpjwNva2oL3Pn/33Xd17bXXBj8WCATsnQwAAHTojD8DnzBhgu644w716tVLF110kYYPHy5J2rdvn+Li4sIyIAAAONUZA37fffcpOTlZX375pX74wx/K4XBI+uqR+aOPPhqWAQEAwKnO+mtkV1111SnbBg4caMswAAAgNNwXFQAAAxFwAAAMRMABAMbasWOb5swp1I4d2yI9StiFdCtVAAA6o5deWq09ez6T3+/T1VcPj/Q4YcUjcACAsXw+f7u3FxICDgCAgQg4AAAGIuAAABiIgAMAYCACDgCAgQg4AAAGIuAAABiIgAMAYCACDgCAgQg4AAAGIuAAABiIgAMAYCACDgCAgQg4AAAGIuAAABiIgAMAYCACDgCAgQg4AAAGIuAAABiIgAMAYCACDgCAgQg4AAAGIuAAABiIgAMAYCACDgCAgWwL+BdffKE777xTP/7xjzVhwgStWrVKktTY2CiPx6OxY8fK4/HoyJEjkqTt27crLS1NGRkZ2rdvnySpqalJmZmZsizLrjEBADCSbQGPiopSfn6+Xn31Vb344otavXq1Pv30U5WWlio5OVkbNmxQcnKySktLJUkrV65USUmJ8vLyVF5eLklaunSpsrKy5HA47BoTAAAjuew6sdvtltvtliTFxcVp0KBBqqur08aNG1VWViZJSk9P15133qmHHnpILpdLfr9fPp9PLpdL+/fvV11dnUaMGGHXiACA09iVMTHSI4TspCtKcjh08otDRs39/36//p8+h20B/0cHDx7U7t27NXToUNXX1wfD7na71dDQIEnKyspSUVGRYmNjtXDhQi1YsEAzZsw443nj4mLlckXZPj/OrmfPrrad22fbmSHZu3awF2tnru9i7WwP+LFjx5STk6PZs2crLi6uw/0GDx6sNWvWSJJqa2vldrtlWZZyc3PlcrmUn5+v3r17tzumufmErbMjdI2NxyM9Ar4l1s5crJ25zmXtkpK6n3a7rc9Cb2lpUU5OjtLS0jR27FhJUmJiorxeryTJ6/UqISGh3TGWZWnZsmWaPn26lixZouzsbE2aNCl42R0AANgYcMuyVFhYqEGDBsnj8QS3p6amqrKyUpJUWVmp0aNHtzuuoqJCKSkpio+Pl9/vl9PplNPplM/HhVQAAL5m2yX07du3q6qqSldccYUmT54sScrLy9O0adOUm5urtWvXql+/fiouLg4e4/P5VFFRoRUrVkiSPB6PcnJyFB0drUWLFtk1KgAAxrEt4MOHD9dHH3102o99/Tvh39SlS5d2l8qHDx+u6upqW+YDAMBk3IkNAAADEXAAAAxEwAEAMBABBwDAQAQcAAADEXAAAAxEwAEAMBABBwDAQAQcAAADEXAAAAxEwAEAMBABBwDAQAQcAAADEXAAAAxEwAEAMBABBwDAQAQcAAADEXAAAAxEwAEAxor9xtsLCQEHABgrJdCmS9ralBJoi/QoYeeK9AAAAHxbV1iWrghYkR4jIngEDgCAgQg4AAAGIuAAABiIgAMAYCACDgCAgQg4AAAGIuAAABiIgAMAYCACDgCAgQg4AAAGIuAAABiIgAMAYCACDgCAgQg4AAAGIuAAABiIgAMAYCACDgCAgQg4AAAGIuAAABiIgAMAYCDbAl5QUKDk5GRNnDgxuK2xsVEej0djx46Vx+PRkSNHJEnbt29XWlqaMjIytG/fPklSU1OTMjMzZVmWXSMCAGAs2wI+ZcoULV++vN220tJSJScna8OGDUpOTlZpaakkaeXKlSopKVFeXp7Ky8slSUuXLlVWVpYcDoddIwIAYCzbAn7NNdcoPj6+3baNGzcqPT1dkpSenq6amhpJksvlkt/vl8/nk8vl0v79+1VXV6cRI0bYNR4AAEZzhfOT1dfXy+12S5LcbrcaGhokSVlZWSoqKlJsbKwWLlyoBQsWaMaMGWc9X1xcrFyuKFtnRmh69uxq27l9tp0Zkr1rB3uxdub6LtYurAHvyODBg7VmzRpJUm1trdxutyzLUm5urlwul/Lz89W7d+9TjmtuPhHuUdGBxsbjkR4B3xJrZy7WzlznsnZJSd1Puz2sz0JPTEyU1+uVJHm9XiUkJLT7uGVZWrZsmaZPn64lS5YoOztbkyZNUllZWTjHBACg0wtrwFNTU1VZWSlJqqys1OjRo9t9vKKiQikpKYqPj5ff75fT6ZTT6ZTPx0VUAAD+kW2X0PPy8rR161YdPnxYN954o7KzszVt2jTl5uZq7dq16tevn4qLi4P7+3w+VVRUaMWKFZIkj8ejnJwcRUdHa9GiRXaNCQCAkWwL+NNPP33a7atWrTrt9i5durS7VD58+HBVV1fbMhsAAKbjTmwAABiIgAMAYCACDgCAgQg4AAAGIuAAABiIgAMAYCACDgCAgQg4AAAGIuAAABiIgAMAYCACDgCAgQg4AAAGIuAAABiIgAMAYCACDgCAgQg4AAAGIuAAABiIgAMAYCACDgCAgQg4AAAGIuAAABiIgAMAYCACDgCAgQg4AAAGIuAAABiIgAMAYCACDgCAgQg4AAAGIuAAABiIgAMAYCACDgCAgQg4AAAGIuAAABiIgAMAYCACDgCAgQg4AAAGIuAAABiIgAMAYCACDgCAgQg4AAAGIuAAABgoIgF/4403NG7cOI0ZM0alpaWSpIULFyotLU2zZs0K7ldZWalVq1ZFYkQAADq1sAc8EAho7ty5Wr58uV5++WWtX79eH374od577z1VV1crEAjoo48+kt/vV0VFhW6//fZwjwgAQKfnCvcn3Llzpy655BINGDBAkjRhwgRt3LhRLS0tsixLJ06ckMvl0vLly3XnnXcqOjo63CMCANDphT3gdXV16tu3b/D9Pn36aOfOnRo7dqzS09OVnJys7t276/3339cDDzxwxnMlJXUP+fNuX/jzbz0zIqzoL5GeAN/C29lvR3oEfEspb2yO9AgIQdgDblnWKdscDoemTp2qqVOnSpIKCwuVk5Ojl156SW+99Za+973vafr06eEeFQCATivsPwPv27ev/va3vwXfr6urk9vtDr6/a9cuSdKll16qyspKFRcX65NPPtHevXvDPSoAAJ1W2AN+5ZVXau/evTpw4IBOnjypl19+WampqcGPFxcXKycnR62trQoEAl8N6XTK7/eHe1QAADqtsF9Cd7lcKioq0r333qtAIKCMjAxdfvnlkqSamhpdeeWV6tOnjyRp2LBhSktL0xVXXKHvf//74R4VAIBOy2Gd7ofS+E4VFBToz3/+sxITE7V+/XpJ0u7du/XYY4/pxIkTioqK0uOPP64hQ4accuxvfvMbbdy4UU6nU4mJiZo/f37wf3Ak6fPPP9eECRP0wAMPKDMzM2xf04XgxIkT+tnPfqaTJ08qEAho3LhxysnJUUlJidasWaOEhARJUl5enlJSUk45/mz7sXb2Sk1NVbdu3eR0OhUVFaV169aFvHaSVFZWpueff14ul0spKSnt7lHB2oVPU1OTHnnkEX388cdyOBx64okn9NZbb4W8juczAh4GtbW16tq1qx5++OFgwO+55x7dddddSklJ0ebNm7V8+XKVlZWdcmxzc7Pi4uIkSc8995w+/fRTzZ07N/jx7OxsORwODR06lP+QfMcsy9Lx48fVrVs3tbS06Pbbb1dhYaHefPNNde3a9azf75KSkjPux9rZKzU1VWvXrg3+R146+5p8bcuWLXr22WdVWlqqmJgY1dfXKzExMfhx1i58Hn74YQ0fPly33HKLTp48Kb/fr1WrVp11HUtKSnTxxRdrypQpYZw2vMJ+Cf1CdM011+jgwYPttjkcDh07dkySdPTo0XZP5PtHX8dbknw+nxwOR/D9mpoa9e/fX127drVhajgcDnXr1k2S1NraqtbW1nbf/38Ga9e5lZeXa9q0aYqJiZGkdvFm7cKnublZtbW1evLJJyVJMTExwTUB90KPmNmzZ+upp55SSkqKFixYoLy8vA73feaZZ5SSkqLq6mrNmDFDknT8+HH99re/PevvyuOfEwgENHnyZF133XW67rrrNHToUEnS7373O6WlpamgoEBHjhzp8PjT7cfahU9mZqamTJmiF198MbgtlLXbu3evtm3bpltuuUV33HGHdu7cKYm1C7cDBw4oISFBBQUFSk9PV2FhoY4fPy4p9L+D5zMCHiHl5eUqKCjQ5s2bVVBQoMLCwg73nTlzpjZv3qy0tDQ9//zzkr66PHTXXXcFHyHCHlFRUaqqqtLmzZu1c+dOffzxx7rtttv0xz/+UVVVVXK73cFHB9/U0X6sXXiUl5eroqJCv/3tb/W73/1OtbW1Ia9dIBBQU1OT1qxZo1mzZik3N1eWZbF2Ydba2qpdu3bptttuU2Vlpbp06aLS0tIO1/Gjjz7S5MmTNXnyZL3wwgtavHhx8P3Dhw9H+KuxgYWwOHDggDVhwoTg+1dffbXV1tZmWZZltbW1WcOGDbMsy7Ly8/OtSZMmWffee+8p5zh48GDwHLfddps1atQoa9SoUdYPfvAD65prrrHKysrC8JVcuEpKSqzly5e32/aP63qmtfvH/Vi78Fu8ePE5rd0999xjbdmyJbjv6NGjrfr6etYuzLxerzVq1Kjg+7W1tdbUqVPb7fPN/7Z+bfHixdbvf/9722eMJH4GHiFut1tbt27VyJEjtWXLFl166aWSpPnz57fbb+/evcGPbdq0SYMGDZIkrV69OrjP10/MueOOO8Iy+4WioaFBLpdLPXr0kN/v1zvvvKOpU6fK6/UGn7NQU1MT/DXIb65dR/uxdvY7fvy42traFBcXp+PHj+vtt9/W9OnTQ167m266SVu2bNHIkSO1Z88etbS0qFevXqxdmCUlJalv37767LPPNGjQIL377ru67LLLOlzHCw0BD4O8vDxt3bpVhw8f1o033qjs7Gz96le/0hNPPKHW1lbFxsa2e2b5P1q0aJH27Nkjh8Ohiy++WHPmzAnz9Bcur9er/Px8BQIBWZal8ePHa9SoUXrooYf04YcfSpIuvvjiDtdu4cKFIe2H7159fb3uv/9+SV9dDp84caJuvPHGkNcuIyNDs2fP1sSJExUdHa0nn3zyO3sCI87No48+qgcffFAtLS08jP64AAAEP0lEQVQaMGCA5s+fr3nz5vF3S/waGQAARuJJbAAAGIiAAwBgIAIOAICBCDgAAAYi4AAAGIiAAwBgIAIOnIf++Mc/6nvf+57++te/2v65Dh48qIkTJ9r+eQC0R8CB89D69ev1gx/8QK+88kqkRwFgE+7EBpxnjh07ph07dui5557Tfffdp+zsbLW1tWnu3Lmqra1V//791dbWpoyMDI0fP17vv/++nnzySR0/fly9evXS/PnzO3x523379umxxx5TQ0ODoqKiVFxcLKfz/x4HHDx4ULNmzZLP55P01V20rr76anm9Xs2cOVPNzc0KBAJ6/PHHNWzYMBUWFur999+Xw+FQRkaG7r777nB8i4DzAgEHzjM1NTW64YYbNHDgQPXs2VMffPCBDhw4oEOHDqm6ulr19fX6yU9+ooyMDLW0tGjevHlaunSpEhIS9Morr+iZZ5455d7gX3vwwQc1bdo0jRkzRidOnFBbW5vq6+uDH09MTNTKlSsVGxurvXv3Ki8vT+vWrdP69et1/fXX67777lMgEJDP59Pu3btVV1en9evXS5KamprC8v0BzhcEHDjPvPzyy7rrrrskST/5yU+0fv16tba2avz48XI6nUpKStLIkSMlSXv27NHHH38sj8cjSWpra1NSUtJpz9vc3Ky6ujqNGTNGkhQbG3vKPq2trZo7d64+/PBDOZ1O7d27V5J05ZVXavbs2WptbdVNN92kwYMHa8CAATpw4IB+9atfKSUlRddff/13/a0AzmsEHDiPHD58WFu2bNEnn3wih8OhQCAgh8Ohm2666bT7W5alyy+/XC+++OJ38vn/+7//W71791ZVVZXa2to0ZMgQSdI111yj559/Xps3b9asWbOUmZmp9PR0VVVV6a233tLq1av16quvdvjIH8CpeBIbcB55/fXXlZ6erj/96U/atGmTNm/erP79+6tXr17asGGD2tra9Pe//11bt26VJA0cOFANDQ167733JEktLS365JNPTnvuuLg49e3bVzU1NZKkkydPBn/W/bWjR48qKSlJTqdTVVVVCgQCkqRDhw4pMTFR//7v/66MjAx98MEHamhokGVZGjdunGbMmKFdu3bZ9W0Bzks8AgfOIy+//LKmTp3abtvYsWP117/+VX369NHEiRN16aWXasiQIerevbtiYmK0ePFizZs3T0ePHlUgENBdd93V4esrP/XUUyoqKlJxcbGio6NVXFzc7mU2b7/9dmVnZ+u1117TyJEj1bVrV0nS1q1b9V//9V9yuVzq2rWrFixYIK/Xq4KCArW1tUn66mV3AYSOlxMFLhDHjh1Tt27ddPjwYd1yyy0qLy/v8OfdADo/HoEDF4hf/OIXampqUktLi6ZPn068AcPxCBzAKebMmaMdO3a02/bzn/9cGRkZEZoIwDcRcAAADMSz0AEAMBABBwDAQAQcAAADEXAAAAz0/wH4ekCKZwl23AAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "with sns.axes_style('darkgrid'):\n", + " fig, ax = plt.subplots(figsize=(7, 5), nrows=1, ncols=1)\n", + "\n", + " sns.barplot(data=df, x=\"Age_class\", y=\"Smoker\", ax=ax, ci=95, order=tuple(age_classes)) # Note: in recent versions of seaborn, we should rather use errorbar=('ci', 95).\n", + "\n", + " ax.yaxis.set_major_formatter(FuncFormatter(lambda y, _: '{:.0%}'.format(y)))\n", + "\n", + " plt.tight_layout()\n", + " plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We see that smokers are underrepresented among the elderly compared to the other three age classes (probably because many long-time smokers quit when they grow old and start developing tobacco-related health conditions). Therefore, in the overall analysis of question 1, $Age$ acted as a proxy for the absence of smoking habit. Since the mean mortality rate is higher among the elderly (irrespective of the cause -- after all, they may simply die of old age), this creates a spurious negative correlation between $Smoker$ and $Dead$ (non-smokers are more likely older, and thus more likely to have a high mortality rate). " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Mortality rates among smokers vs. non-smokers by age using logistic regression" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We perform a logistic regression for the model $Dead \\sim Age$" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Optimization terminated successfully.\n", + " Current function value: 0.629619\n", + " Iterations 5\n", + "Optimization terminated successfully.\n", + " Current function value: 0.633082\n", + " Iterations 4\n", + "Optimization terminated successfully.\n", + " Current function value: 0.687904\n", + " Iterations 3\n" + ] + } + ], + "source": [ + "import statsmodels.api as sm\n", + "\n", + "log_reg = sm.Logit(df[\"Dead\"], df[[\"Age\", \"Smoker\"]].astype(int)).fit()\n", + "\n", + "log_reg_smoker = sm.Logit(df_smoker[\"Dead\"], df_smoker[\"Age\"]).fit()\n", + "log_reg_nonsmoker = sm.Logit(df_nonsmoker[\"Dead\"], df_nonsmoker[\"Age\"]).fit()" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "
Logit Regression Results
Dep. Variable: Dead No. Observations: 1314
Model: Logit Df Residuals: 1312
Method: MLE Df Model: 1
Date: Tue, 26 Sep 2023 Pseudo R-squ.: -0.06045
Time: 21:06:14 Log-Likelihood: -827.32
converged: True LL-Null: -780.16
LLR p-value: 1.000
\n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "
coef std err z P>|z| [0.025 0.975]
Age 0.0002 0.001 0.112 0.911 -0.002 0.003
Smoker -1.1657 0.114 -10.253 0.000 -1.389 -0.943
" + ], + "text/plain": [ + "\n", + "\"\"\"\n", + " Logit Regression Results \n", + "==============================================================================\n", + "Dep. Variable: Dead No. Observations: 1314\n", + "Model: Logit Df Residuals: 1312\n", + "Method: MLE Df Model: 1\n", + "Date: Tue, 26 Sep 2023 Pseudo R-squ.: -0.06045\n", + "Time: 21:06:14 Log-Likelihood: -827.32\n", + "converged: True LL-Null: -780.16\n", + " LLR p-value: 1.000\n", + "==============================================================================\n", + " coef std err z P>|z| [0.025 0.975]\n", + "------------------------------------------------------------------------------\n", + "Age 0.0002 0.001 0.112 0.911 -0.002 0.003\n", + "Smoker -1.1657 0.114 -10.253 0.000 -1.389 -0.943\n", + "==============================================================================\n", + "\"\"\"" + ] + }, + "execution_count": 71, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "log_reg.summary()" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "
Logit Regression Results
Dep. Variable: Dead No. Observations: 582
Model: Logit Df Residuals: 581
Method: MLE Df Model: 0
Date: Tue, 26 Sep 2023 Pseudo R-squ.: -0.1516
Time: 21:06:17 Log-Likelihood: -368.45
converged: True LL-Null: -319.94
LLR p-value: nan
\n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "
coef std err z P>|z| [0.025 0.975]
Age -0.0154 0.002 -7.982 0.000 -0.019 -0.012
" + ], + "text/plain": [ + "\n", + "\"\"\"\n", + " Logit Regression Results \n", + "==============================================================================\n", + "Dep. Variable: Dead No. Observations: 582\n", + "Model: Logit Df Residuals: 581\n", + "Method: MLE Df Model: 0\n", + "Date: Tue, 26 Sep 2023 Pseudo R-squ.: -0.1516\n", + "Time: 21:06:17 Log-Likelihood: -368.45\n", + "converged: True LL-Null: -319.94\n", + " LLR p-value: nan\n", + "==============================================================================\n", + " coef std err z P>|z| [0.025 0.975]\n", + "------------------------------------------------------------------------------\n", + "Age -0.0154 0.002 -7.982 0.000 -0.019 -0.012\n", + "==============================================================================\n", + "\"\"\"" + ] + }, + "execution_count": 72, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "log_reg_smoker.summary()" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "
Logit Regression Results
Dep. Variable: Dead No. Observations: 732
Model: Logit Df Residuals: 731
Method: MLE Df Model: 0
Date: Tue, 26 Sep 2023 Pseudo R-squ.: -0.1052
Time: 21:06:17 Log-Likelihood: -503.55
converged: True LL-Null: -455.62
LLR p-value: nan
\n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "
coef std err z P>|z| [0.025 0.975]
Age -0.0038 0.001 -2.759 0.006 -0.007 -0.001
" + ], + "text/plain": [ + "\n", + "\"\"\"\n", + " Logit Regression Results \n", + "==============================================================================\n", + "Dep. Variable: Dead No. Observations: 732\n", + "Model: Logit Df Residuals: 731\n", + "Method: MLE Df Model: 0\n", + "Date: Tue, 26 Sep 2023 Pseudo R-squ.: -0.1052\n", + "Time: 21:06:17 Log-Likelihood: -503.55\n", + "converged: True LL-Null: -455.62\n", + " LLR p-value: nan\n", + "==============================================================================\n", + " coef std err z P>|z| [0.025 0.975]\n", + "------------------------------------------------------------------------------\n", + "Age -0.0038 0.001 -2.759 0.006 -0.007 -0.001\n", + "==============================================================================\n", + "\"\"\"" + ] + }, + "execution_count": 73, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "log_reg_nonsmoker.summary()" + ] }, { "cell_type": "code", diff --git a/module3/exo3/simpson.gif b/module3/exo3/simpson.gif new file mode 100644 index 0000000000000000000000000000000000000000..92a8aeb78ce598de79e3d9af03cbc41a974bedc9 Binary files /dev/null and b/module3/exo3/simpson.gif differ