régression logistique in progress ?

parent bcc3b457
......@@ -15,7 +15,9 @@
"source": [
"%matplotlib inline\n",
"import matplotlib.pyplot as plt\n",
"import pandas as pd"
"import pandas as pd\n",
"import numpy as np\n",
"import statsmodels.api as sm"
]
},
{
......@@ -729,11 +731,11 @@
"source": [
"labels = 'Non-fumeurs', 'Fumeurs'\n",
"sizes = [non_smokers/total,(smokers/total)]\n",
"#explode = (0, 0.1, 0, 0) # only \"explode\" the 2nd slice (i.e. 'Hogs')\n",
"\n",
"\n",
"fig1, ax1 = plt.subplots()\n",
"ax1.pie(sizes, labels=labels,shadow=True,startangle=90,autopct='%1.1f%%')\n",
"ax1.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle.\n",
"ax1.axis('equal') \n",
"plt.title('Répartition de l\\'échantillon')\n",
"plt.show()\n"
]
......@@ -761,7 +763,7 @@
"\n",
"fig1, ax1 = plt.subplots()\n",
"ax1.pie(sizes, labels=labels,explode=explode,startangle=90,shadow=True,autopct='%1.1f%%',colors=('green','red'))\n",
"ax1.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle.\n",
"ax1.axis('equal') \n",
"plt.title('Mortalité échantillon de fumeurs')\n",
"plt.show()"
]
......@@ -789,7 +791,7 @@
"\n",
"fig1, ax1 = plt.subplots()\n",
"ax1.pie(sizes, labels=labels,explode=explode,startangle=90,shadow=True,autopct='%1.1f%%',colors=('green','red'))\n",
"ax1.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle.\n",
"ax1.axis('equal') \n",
"plt.title('Mortalité échantillon de non-fumeurs')\n",
"plt.show()"
]
......@@ -820,680 +822,540 @@
},
{
"cell_type": "code",
"execution_count": 27,
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"data.loc[data['Age']<35,'Categorie d\\'age'] = 'A'\n",
"data.loc[(data['Age']<55) & (data['Age']>=35),'Categorie d\\'age'] = 'B'\n",
"data.loc[(data['Age']<65) & (data['Age']>=55),'Categorie d\\'age'] = 'C'\n",
"data.loc[data['Age']>=65,'Categorie d\\'age'] = 'D'"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"On vérifie que la somme des sous-groupe soit bien égale au nombre total des donnés. "
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Categorie d'âge 18-34 35-54 55-64 65+ total\n",
"-------------------------------------------------------------------------------------------\n",
"Taille de l'échantillon 416 420 236 242 1314\n"
]
}
],
"source": [
"A_total=pd.DataFrame.sum((data['Categorie d\\'age']=='A'))\n",
"B_total=pd.DataFrame.sum((data['Categorie d\\'age']=='B'))\n",
"C_total=pd.DataFrame.sum((data['Categorie d\\'age']=='C'))\n",
"D_total=pd.DataFrame.sum((data['Categorie d\\'age']=='D'))\n",
"print('Categorie d\\'âge 18-34 35-54 55-64 65+ total')\n",
"print('-------------------------------------------------------------------------------------------')\n",
"print('Taille de l\\'échantillon ',A_total,' ',B_total,' ',C_total,' ',D_total,' ',A_total+B_total+C_total+D_total)\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Il ne semble pas y avoir d'erreur sur le découapage en sous-échantillons, on procède donc aux même analyses que précédement appliquées cette fois-ci par tranches d'âges."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Catégorie d'âge 18-34"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Fumeurs Non-fumeurs Total\n",
" ------------------------------------\n",
"Taille du groupe 189 227 416\n",
"Vivant 182 221 403\n",
"Mort 7 6 13\n",
"Mortalité 0.037 0.026 0.031\n"
]
}
],
"source": [
"A_smokers=pd.DataFrame.sum((data['Smoker']=='Yes')& (data['Categorie d\\'age']=='A'))\n",
"A_non_smokers=pd.DataFrame.sum((data['Smoker']=='No')& (data['Categorie d\\'age']=='A'))\n",
"A_total=A_smokers+A_non_smokers\n",
"\n",
"A_deaths_smokers=pd.DataFrame.sum((data['Smoker']=='Yes')&(data['Status']=='Dead')&(data['Categorie d\\'age']=='A'))\n",
"A_death_rate_smokers=A_deaths_smokers/A_smokers\n",
"A_deaths_non_smokers=pd.DataFrame.sum((data['Smoker']=='No')&(data['Status']=='Dead')&(data['Categorie d\\'age']=='A'))\n",
"A_death_rate_non_smokers=A_deaths_non_smokers/A_non_smokers\n",
"A_death_rate_total=(A_deaths_smokers+A_deaths_non_smokers)/A_total\n",
"\n",
"\n",
"print(' Fumeurs Non-fumeurs Total')\n",
"print(' ------------------------------------')\n",
"print('Taille du groupe ',A_smokers,' ',A_non_smokers,' ',A_total)\n",
"print('Vivant ',A_smokers-A_deaths_smokers,' ',A_non_smokers-A_deaths_non_smokers,' ',A_total-A_deaths_smokers-A_deaths_non_smokers)\n",
"print('Mort ',A_deaths_smokers,' ',A_deaths_non_smokers,' ',A_deaths_smokers+A_deaths_non_smokers)\n",
"print('Mortalité ',round(A_death_rate_smokers,3),' ',round(A_death_rate_non_smokers,3),' ',round(A_death_rate_total,3))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Catégorie d'age 35-54"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Fumeurs Non-fumeurs Total\n",
" ------------------------------------\n",
"Taille du groupe 229 191 420\n",
"Vivant 190 172 362\n",
"Mort 39 19 58\n",
"Mortalité 0.17 0.099 0.138\n"
]
}
],
"source": [
"B_smokers=pd.DataFrame.sum((data['Smoker']=='Yes')& (data['Categorie d\\'age']=='B'))\n",
"B_non_smokers=pd.DataFrame.sum((data['Smoker']=='No')& (data['Categorie d\\'age']=='B'))\n",
"B_total=B_smokers+B_non_smokers\n",
"\n",
"B_deaths_smokers=pd.DataFrame.sum((data['Smoker']=='Yes')&(data['Status']=='Dead')&(data['Categorie d\\'age']=='B'))\n",
"B_death_rate_smokers=B_deaths_smokers/B_smokers\n",
"B_deaths_non_smokers=pd.DataFrame.sum((data['Smoker']=='No')&(data['Status']=='Dead')&(data['Categorie d\\'age']=='B'))\n",
"B_death_rate_non_smokers=B_deaths_non_smokers/B_non_smokers\n",
"B_death_rate_total=(B_deaths_smokers+B_deaths_non_smokers)/B_total\n",
"\n",
"\n",
"print(' Fumeurs Non-fumeurs Total')\n",
"print(' ------------------------------------')\n",
"print('Taille du groupe ',B_smokers,' ',B_non_smokers,' ',B_total)\n",
"print('Vivant ',B_smokers-B_deaths_smokers,' ',B_non_smokers-B_deaths_non_smokers,' ',B_total-B_deaths_smokers-B_deaths_non_smokers)\n",
"print('Mort ',B_deaths_smokers,' ',B_deaths_non_smokers,' ',B_deaths_smokers+B_deaths_non_smokers)\n",
"print('Mortalité ',round(B_death_rate_smokers,3),' ',round(B_death_rate_non_smokers,3),' ',round(B_death_rate_total,3))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Catégorie d'age 55-64"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Fumeurs Non-fumeurs Total\n",
" ------------------------------------\n",
"Taille du groupe 115 121 236\n",
"Vivant 64 81 145\n",
"Mort 51 40 91\n",
"Mortalité 0.443 0.331 0.386\n"
]
}
],
"source": [
"C_smokers=pd.DataFrame.sum((data['Smoker']=='Yes')& (data['Categorie d\\'age']=='C'))\n",
"C_non_smokers=pd.DataFrame.sum((data['Smoker']=='No')& (data['Categorie d\\'age']=='C'))\n",
"C_total=C_smokers+C_non_smokers\n",
"\n",
"C_deaths_smokers=pd.DataFrame.sum((data['Smoker']=='Yes')&(data['Status']=='Dead')&(data['Categorie d\\'age']=='C'))\n",
"C_death_rate_smokers=C_deaths_smokers/C_smokers\n",
"C_deaths_non_smokers=pd.DataFrame.sum((data['Smoker']=='No')&(data['Status']=='Dead')&(data['Categorie d\\'age']=='C'))\n",
"C_death_rate_non_smokers=C_deaths_non_smokers/C_non_smokers\n",
"C_death_rate_total=(C_deaths_smokers+C_deaths_non_smokers)/C_total\n",
"\n",
"\n",
"print(' Fumeurs Non-fumeurs Total')\n",
"print(' ------------------------------------')\n",
"print('Taille du groupe ',C_smokers,' ',C_non_smokers,' ',C_total)\n",
"print('Vivant ',C_smokers-C_deaths_smokers,' ',C_non_smokers-C_deaths_non_smokers,' ',C_total-C_deaths_smokers-C_deaths_non_smokers)\n",
"print('Mort ',C_deaths_smokers,' ',C_deaths_non_smokers,' ',C_deaths_smokers+C_deaths_non_smokers)\n",
"print('Mortalité ',round(C_death_rate_smokers,3),' ',round(C_death_rate_non_smokers,3),' ',round(C_death_rate_total,3))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Catégorie d'âge 65+"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Fumeurs Non-fumeurs Total\n",
" ------------------------------------\n",
"Taille du groupe 49 193 242\n",
"Vivant 7 28 35\n",
"Mort 42 165 207\n",
"Mortalité 0.857 0.855 0.855\n"
]
}
],
"source": [
"D_smokers=pd.DataFrame.sum((data['Smoker']=='Yes')& (data['Categorie d\\'age']=='D'))\n",
"D_non_smokers=pd.DataFrame.sum((data['Smoker']=='No')& (data['Categorie d\\'age']=='D'))\n",
"D_total=D_smokers+D_non_smokers\n",
"\n",
"D_deaths_smokers=pd.DataFrame.sum((data['Smoker']=='Yes')&(data['Status']=='Dead')&(data['Categorie d\\'age']=='D'))\n",
"D_death_rate_smokers=D_deaths_smokers/D_smokers\n",
"D_deaths_non_smokers=pd.DataFrame.sum((data['Smoker']=='No')&(data['Status']=='Dead')&(data['Categorie d\\'age']=='D'))\n",
"D_death_rate_non_smokers=D_deaths_non_smokers/D_non_smokers\n",
"D_death_rate_total=(D_deaths_smokers+D_deaths_non_smokers)/D_total\n",
"\n",
"\n",
"print(' Fumeurs Non-fumeurs Total')\n",
"print(' ------------------------------------')\n",
"print('Taille du groupe ',D_smokers,' ',D_non_smokers,' ',D_total)\n",
"print('Vivant ',D_smokers-D_deaths_smokers,' ',D_non_smokers-D_deaths_non_smokers,' ',D_total-D_deaths_smokers-D_deaths_non_smokers)\n",
"print('Mort ',D_deaths_smokers,' ',D_deaths_non_smokers,' ',D_deaths_smokers+D_deaths_non_smokers)\n",
"print('Mortalité ',round(D_death_rate_smokers,3),' ',round(D_death_rate_non_smokers,3),' ',round(D_death_rate_total,3))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Analyse"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"labels = ['18-34', '35-54', '55-64', '65+']\n",
"nn_smkers_dth_rt = [A_death_rate_non_smokers,B_death_rate_non_smokers,C_death_rate_non_smokers,D_death_rate_non_smokers]\n",
"nn_smkers_dth_rt = [round(num, 2) for num in nn_smkers_dth_rt]\n",
"smkers_dth_rt = [A_death_rate_smokers,B_death_rate_smokers,C_death_rate_smokers,D_death_rate_smokers]\n",
"smkers_dth_rt = [round(num, 2) for num in smkers_dth_rt]\n",
"\n",
"x = np.arange(len(labels)) # the label locations\n",
"width = 0.35 # the width of the bars\n",
"\n",
"fig, ax = plt.subplots()\n",
"rects1 = ax.bar(x - width/2, nn_smkers_dth_rt, width, label='Non-fumeur')\n",
"rects2 = ax.bar(x + width/2, smkers_dth_rt, width, label='Fumeur')\n",
"\n",
"# Add some text for labels, title and custom x-axis tick labels, etc.\n",
"ax.set_ylabel('Taux de mortalité')\n",
"ax.set_title('Taux de mortalité par tabagisme et catégorie d\\'âge')\n",
"ax.set_xticks(x)\n",
"ax.set_xticklabels(labels)\n",
"ax.legend()\n",
"\n",
"\n",
"def autolabel(rects):\n",
" \n",
" for rect in rects:\n",
" height = rect.get_height()\n",
" ax.annotate('{}'.format(height),\n",
" xy=(rect.get_x() + rect.get_width() / 2, height),\n",
" xytext=(0, 3), # 3 points vertical offset\n",
" textcoords=\"offset points\",\n",
" ha='center', va='bottom')\n",
"\n",
"\n",
"autolabel(rects1)\n",
"autolabel(rects2)\n",
"\n",
"fig.tight_layout()\n",
"\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"labels = ['18-34', '35-54', '55-64', '65+']\n",
"nn_smkrs = [A_non_smokers,B_non_smokers,C_non_smokers,D_non_smokers]\n",
"smkrs = [A_smokers,B_smokers,C_smokers,D_smokers]\n",
"\n",
"x = np.arange(len(labels)) # the label locations\n",
"width = 0.35 # the width of the bars\n",
"\n",
"fig, ax = plt.subplots()\n",
"rects1 = ax.bar(x - width/2, nn_smkrs, width, label='Non-fumeur')\n",
"rects2 = ax.bar(x + width/2, smkrs, width, label='Fumeur')\n",
"\n",
"# Add some text for labels, title and custom x-axis tick labels, etc.\n",
"ax.set_ylabel('Taille du groupe')\n",
"ax.set_title('Répartition du tabagisme en fonction de la catégorie d\\'âge')\n",
"ax.set_xticks(x)\n",
"ax.set_xticklabels(labels)\n",
"ax.legend()\n",
"\n",
"\n",
"def autolabel(rects):\n",
" \n",
" for rect in rects:\n",
" height = rect.get_height()\n",
" ax.annotate('{}'.format(height),\n",
" xy=(rect.get_x() + rect.get_width() / 2, height),\n",
" xytext=(0, 3), # 3 points vertical offset\n",
" textcoords=\"offset points\",\n",
" ha='center', va='bottom')\n",
"\n",
"\n",
"autolabel(rects1)\n",
"autolabel(rects2)\n",
"\n",
"fig.tight_layout()\n",
"\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Ces deux graphique mettent en évidences plusieurs choses :\n",
" - le taux de mortalité à 20 ans est très dépendant de l'âge (ce qui après réflexion semble évident),\n",
" - la proportion de fumeur dépend de l'âge,\n",
" - pour chaque catégorie d'âge la mortalité des fumeurs est plus importantes que celles des non-fumeurs."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Régression logistique"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Afin d'éviter un biais induit par des regroupements en tranches d'âges arbitraires et non régulières, on réalise une régression logistique. Pour cela on introduit la variable Death qui vaut 1 si l'individu est décédé dans la période de 20 ans, 0 sinon."
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"data['Death']=0\n",
"data.loc[data['Status']=='Dead','Death'] = 1\n"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [],
"source": [
"data.loc[data['Age']<35,'Categorie d\\'âge'] = 'A'\n",
"data.loc[(data['Age']<55) & (data['Age']>=35),'Categorie d\\'âge'] = 'B'\n",
"data.loc[(data['Age']<65) & (data['Age']>=55),'Categorie d\\'âge'] = 'C'\n",
"data.loc[data['Age']>=65,'Categorie d\\'âge'] = 'D'"
"x=data['Age']\n",
"x=sm.add_constant(x)\n",
"y=data['Death']"
]
},
{
"cell_type": "code",
"execution_count": 28,
"execution_count": 30,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Optimization terminated successfully.\n",
" Current function value: 0.382339\n",
" Iterations 7\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Smoker</th>\n",
" <th>Status</th>\n",
" <th>Age</th>\n",
" <th>Catégorie d'âge</th>\n",
" <th>Categorie</th>\n",
" <th>Categorie d'âge</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>21.0</td>\n",
" <td>A</td>\n",
" <td>NaN</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>19.3</td>\n",
" <td>A</td>\n",
" <td>A</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>No</td>\n",
" <td>Dead</td>\n",
" <td>57.5</td>\n",
" <td>A</td>\n",
" <td>NaN</td>\n",
" <td>C</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>47.1</td>\n",
" <td>A</td>\n",
" <td>NaN</td>\n",
" <td>B</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>81.4</td>\n",
" <td>A</td>\n",
" <td>NaN</td>\n",
" <td>D</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>36.8</td>\n",
" <td>A</td>\n",
" <td>NaN</td>\n",
" <td>B</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>23.8</td>\n",
" <td>A</td>\n",
" <td>NaN</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Yes</td>\n",
" <td>Dead</td>\n",
" <td>57.5</td>\n",
" <td>A</td>\n",
" <td>NaN</td>\n",
" <td>C</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>24.8</td>\n",
" <td>A</td>\n",
" <td>NaN</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>49.5</td>\n",
" <td>A</td>\n",
" <td>NaN</td>\n",
" <td>B</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>30.0</td>\n",
" <td>A</td>\n",
" <td>NaN</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>No</td>\n",
" <td>Dead</td>\n",
" <td>66.0</td>\n",
" <td>A</td>\n",
" <td>NaN</td>\n",
" <td>D</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>49.2</td>\n",
" <td>A</td>\n",
" <td>NaN</td>\n",
" <td>B</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>58.4</td>\n",
" <td>A</td>\n",
" <td>NaN</td>\n",
" <td>C</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>No</td>\n",
" <td>Dead</td>\n",
" <td>60.6</td>\n",
" <td>A</td>\n",
" <td>NaN</td>\n",
" <td>C</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>25.1</td>\n",
" <td>A</td>\n",
" <td>NaN</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>43.5</td>\n",
" <td>A</td>\n",
" <td>NaN</td>\n",
" <td>B</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>27.1</td>\n",
" <td>A</td>\n",
" <td>NaN</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>58.3</td>\n",
" <td>A</td>\n",
" <td>NaN</td>\n",
" <td>C</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>65.7</td>\n",
" <td>A</td>\n",
" <td>NaN</td>\n",
" <td>D</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>No</td>\n",
" <td>Dead</td>\n",
" <td>73.2</td>\n",
" <td>A</td>\n",
" <td>NaN</td>\n",
" <td>D</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>38.3</td>\n",
" <td>A</td>\n",
" <td>NaN</td>\n",
" <td>B</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>33.4</td>\n",
" <td>A</td>\n",
" <td>NaN</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>Yes</td>\n",
" <td>Dead</td>\n",
" <td>62.3</td>\n",
" <td>A</td>\n",
" <td>NaN</td>\n",
" <td>C</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>18.0</td>\n",
" <td>A</td>\n",
" <td>A</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>56.2</td>\n",
" <td>A</td>\n",
" <td>NaN</td>\n",
" <td>C</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26</th>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>59.2</td>\n",
" <td>A</td>\n",
" <td>NaN</td>\n",
" <td>C</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>25.8</td>\n",
" <td>A</td>\n",
" <td>NaN</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28</th>\n",
" <td>No</td>\n",
" <td>Dead</td>\n",
" <td>36.9</td>\n",
" <td>A</td>\n",
" <td>NaN</td>\n",
" <td>B</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>20.2</td>\n",
" <td>A</td>\n",
" <td>A</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1284</th>\n",
" <td>Yes</td>\n",
" <td>Dead</td>\n",
" <td>36.0</td>\n",
" <td>A</td>\n",
" <td>NaN</td>\n",
" <td>B</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1285</th>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>48.3</td>\n",
" <td>A</td>\n",
" <td>NaN</td>\n",
" <td>B</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1286</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>63.1</td>\n",
" <td>A</td>\n",
" <td>NaN</td>\n",
" <td>C</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1287</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>60.8</td>\n",
" <td>A</td>\n",
" <td>NaN</td>\n",
" <td>C</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1288</th>\n",
" <td>Yes</td>\n",
" <td>Dead</td>\n",
" <td>39.3</td>\n",
" <td>A</td>\n",
" <td>NaN</td>\n",
" <td>B</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1289</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>36.7</td>\n",
" <td>A</td>\n",
" <td>NaN</td>\n",
" <td>B</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1290</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>63.8</td>\n",
" <td>A</td>\n",
" <td>NaN</td>\n",
" <td>C</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1291</th>\n",
" <td>No</td>\n",
" <td>Dead</td>\n",
" <td>71.3</td>\n",
" <td>A</td>\n",
" <td>NaN</td>\n",
" <td>D</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1292</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>57.7</td>\n",
" <td>A</td>\n",
" <td>NaN</td>\n",
" <td>C</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1293</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>63.2</td>\n",
" <td>A</td>\n",
" <td>NaN</td>\n",
" <td>C</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1294</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>46.6</td>\n",
" <td>A</td>\n",
" <td>NaN</td>\n",
" <td>B</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1295</th>\n",
" <td>Yes</td>\n",
" <td>Dead</td>\n",
" <td>82.4</td>\n",
" <td>A</td>\n",
" <td>NaN</td>\n",
" <td>D</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1296</th>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>38.3</td>\n",
" <td>A</td>\n",
" <td>NaN</td>\n",
" <td>B</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1297</th>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>32.7</td>\n",
" <td>A</td>\n",
" <td>NaN</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1298</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>39.7</td>\n",
" <td>A</td>\n",
" <td>NaN</td>\n",
" <td>B</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1299</th>\n",
" <td>Yes</td>\n",
" <td>Dead</td>\n",
" <td>60.0</td>\n",
" <td>A</td>\n",
" <td>NaN</td>\n",
" <td>C</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1300</th>\n",
" <td>No</td>\n",
" <td>Dead</td>\n",
" <td>71.0</td>\n",
" <td>A</td>\n",
" <td>NaN</td>\n",
" <td>D</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1301</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>20.5</td>\n",
" <td>A</td>\n",
" <td>A</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1302</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>44.4</td>\n",
" <td>A</td>\n",
" <td>NaN</td>\n",
" <td>B</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1303</th>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>31.2</td>\n",
" <td>A</td>\n",
" <td>NaN</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1304</th>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>47.8</td>\n",
" <td>A</td>\n",
" <td>NaN</td>\n",
" <td>B</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1305</th>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>60.9</td>\n",
" <td>A</td>\n",
" <td>NaN</td>\n",
" <td>C</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1306</th>\n",
" <td>No</td>\n",
" <td>Dead</td>\n",
" <td>61.4</td>\n",
" <td>A</td>\n",
" <td>NaN</td>\n",
" <td>C</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1307</th>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>43.0</td>\n",
" <td>A</td>\n",
" <td>NaN</td>\n",
" <td>B</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1308</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>42.1</td>\n",
" <td>A</td>\n",
" <td>NaN</td>\n",
" <td>B</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1309</th>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>35.9</td>\n",
" <td>A</td>\n",
" <td>NaN</td>\n",
" <td>B</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1310</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>22.3</td>\n",
" <td>A</td>\n",
" <td>NaN</td>\n",
" <td>A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1311</th>\n",
" <td>Yes</td>\n",
" <td>Dead</td>\n",
" <td>62.1</td>\n",
" <td>A</td>\n",
" <td>NaN</td>\n",
" <td>C</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1312</th>\n",
" <td>No</td>\n",
" <td>Dead</td>\n",
" <td>88.6</td>\n",
" <td>A</td>\n",
" <td>NaN</td>\n",
" <td>D</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1313</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>39.1</td>\n",
" <td>A</td>\n",
" <td>NaN</td>\n",
" <td>B</td>\n",
" </tr>\n",
" </tbody>\n",
"<table class=\"simpletable\">\n",
"<caption>Logit Regression Results</caption>\n",
"<tr>\n",
" <th>Dep. Variable:</th> <td>Death</td> <th> No. Observations: </th> <td> 1314</td> \n",
"</tr>\n",
"<tr>\n",
" <th>Model:</th> <td>Logit</td> <th> Df Residuals: </th> <td> 1312</td> \n",
"</tr>\n",
"<tr>\n",
" <th>Method:</th> <td>MLE</td> <th> Df Model: </th> <td> 1</td> \n",
"</tr>\n",
"<tr>\n",
" <th>Date:</th> <td>Tue, 28 Apr 2020</td> <th> Pseudo R-squ.: </th> <td>0.3560</td> \n",
"</tr>\n",
"<tr>\n",
" <th>Time:</th> <td>22:33:45</td> <th> Log-Likelihood: </th> <td> -502.39</td> \n",
"</tr>\n",
"<tr>\n",
" <th>converged:</th> <td>True</td> <th> LL-Null: </th> <td> -780.16</td> \n",
"</tr>\n",
"<tr>\n",
" <th> </th> <td> </td> <th> LLR p-value: </th> <td>7.883e-123</td>\n",
"</tr>\n",
"</table>\n",
"<p>1314 rows × 6 columns</p>\n",
"</div>"
"<table class=\"simpletable\">\n",
"<tr>\n",
" <td></td> <th>coef</th> <th>std err</th> <th>z</th> <th>P>|z|</th> <th>[0.025</th> <th>0.975]</th> \n",
"</tr>\n",
"<tr>\n",
" <th>const</th> <td> -6.1045</td> <td> 0.321</td> <td> -18.992</td> <td> 0.000</td> <td> -6.735</td> <td> -5.475</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Age</th> <td> 0.0977</td> <td> 0.006</td> <td> 17.578</td> <td> 0.000</td> <td> 0.087</td> <td> 0.109</td>\n",
"</tr>\n",
"</table>"
],
"text/plain": [
" Smoker Status Age Catégorie d'âge Categorie Categorie d'âge\n",
"0 Yes Alive 21.0 A NaN A\n",
"1 Yes Alive 19.3 A A A\n",
"2 No Dead 57.5 A NaN C\n",
"3 No Alive 47.1 A NaN B\n",
"4 Yes Alive 81.4 A NaN D\n",
"5 No Alive 36.8 A NaN B\n",
"6 No Alive 23.8 A NaN A\n",
"7 Yes Dead 57.5 A NaN C\n",
"8 Yes Alive 24.8 A NaN A\n",
"9 Yes Alive 49.5 A NaN B\n",
"10 Yes Alive 30.0 A NaN A\n",
"11 No Dead 66.0 A NaN D\n",
"12 Yes Alive 49.2 A NaN B\n",
"13 No Alive 58.4 A NaN C\n",
"14 No Dead 60.6 A NaN C\n",
"15 No Alive 25.1 A NaN A\n",
"16 No Alive 43.5 A NaN B\n",
"17 No Alive 27.1 A NaN A\n",
"18 No Alive 58.3 A NaN C\n",
"19 Yes Alive 65.7 A NaN D\n",
"20 No Dead 73.2 A NaN D\n",
"21 Yes Alive 38.3 A NaN B\n",
"22 No Alive 33.4 A NaN A\n",
"23 Yes Dead 62.3 A NaN C\n",
"24 No Alive 18.0 A A A\n",
"25 No Alive 56.2 A NaN C\n",
"26 Yes Alive 59.2 A NaN C\n",
"27 No Alive 25.8 A NaN A\n",
"28 No Dead 36.9 A NaN B\n",
"29 No Alive 20.2 A A A\n",
"... ... ... ... ... ... ...\n",
"1284 Yes Dead 36.0 A NaN B\n",
"1285 Yes Alive 48.3 A NaN B\n",
"1286 No Alive 63.1 A NaN C\n",
"1287 No Alive 60.8 A NaN C\n",
"1288 Yes Dead 39.3 A NaN B\n",
"1289 No Alive 36.7 A NaN B\n",
"1290 No Alive 63.8 A NaN C\n",
"1291 No Dead 71.3 A NaN D\n",
"1292 No Alive 57.7 A NaN C\n",
"1293 No Alive 63.2 A NaN C\n",
"1294 No Alive 46.6 A NaN B\n",
"1295 Yes Dead 82.4 A NaN D\n",
"1296 Yes Alive 38.3 A NaN B\n",
"1297 Yes Alive 32.7 A NaN A\n",
"1298 No Alive 39.7 A NaN B\n",
"1299 Yes Dead 60.0 A NaN C\n",
"1300 No Dead 71.0 A NaN D\n",
"1301 No Alive 20.5 A A A\n",
"1302 No Alive 44.4 A NaN B\n",
"1303 Yes Alive 31.2 A NaN A\n",
"1304 Yes Alive 47.8 A NaN B\n",
"1305 Yes Alive 60.9 A NaN C\n",
"1306 No Dead 61.4 A NaN C\n",
"1307 Yes Alive 43.0 A NaN B\n",
"1308 No Alive 42.1 A NaN B\n",
"1309 Yes Alive 35.9 A NaN B\n",
"1310 No Alive 22.3 A NaN A\n",
"1311 Yes Dead 62.1 A NaN C\n",
"1312 No Dead 88.6 A NaN D\n",
"1313 No Alive 39.1 A NaN B\n",
"\n",
"[1314 rows x 6 columns]"
"<class 'statsmodels.iolib.summary.Summary'>\n",
"\"\"\"\n",
" Logit Regression Results \n",
"==============================================================================\n",
"Dep. Variable: Death No. Observations: 1314\n",
"Model: Logit Df Residuals: 1312\n",
"Method: MLE Df Model: 1\n",
"Date: Tue, 28 Apr 2020 Pseudo R-squ.: 0.3560\n",
"Time: 22:33:45 Log-Likelihood: -502.39\n",
"converged: True LL-Null: -780.16\n",
" LLR p-value: 7.883e-123\n",
"==============================================================================\n",
" coef std err z P>|z| [0.025 0.975]\n",
"------------------------------------------------------------------------------\n",
"const -6.1045 0.321 -18.992 0.000 -6.735 -5.475\n",
"Age 0.0977 0.006 17.578 0.000 0.087 0.109\n",
"==============================================================================\n",
"\"\"\""
]
},
"execution_count": 28,
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data\n",
"\n"
"model = sm.Logit(y, x)\n",
"result = model.fit(method='newton')\n",
"result.summary()\n"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [
{
"ename": "KeyError",
"evalue": "'Frequency'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m/opt/conda/lib/python3.6/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mget_loc\u001b[0;34m(self, key, method, tolerance)\u001b[0m\n\u001b[1;32m 2524\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2525\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2526\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n",
"\u001b[0;31mKeyError\u001b[0m: 'Frequency'",
"\nDuring handling of the above exception, another exception occurred:\n",
"\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-39-eea08db42492>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mdata_pred\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'Frequency'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata_pred\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'Constant'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'Age'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mdata_pred\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mplot\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"Age\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"Frequency\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mkind\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"line\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mylim\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0mplt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mscatter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"Age\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"Frequency\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5\u001b[0m \u001b[0mplt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgrid\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/opt/conda/lib/python3.6/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 2137\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_getitem_multilevel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2138\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2139\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_getitem_column\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2140\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2141\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_getitem_column\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/opt/conda/lib/python3.6/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36m_getitem_column\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 2144\u001b[0m \u001b[0;31m# get column\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2145\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mis_unique\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2146\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_item_cache\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2147\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2148\u001b[0m \u001b[0;31m# duplicate columns & possible reduce dimensionality\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/opt/conda/lib/python3.6/site-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36m_get_item_cache\u001b[0;34m(self, item)\u001b[0m\n\u001b[1;32m 1840\u001b[0m \u001b[0mres\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcache\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mitem\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1841\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mres\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1842\u001b[0;31m \u001b[0mvalues\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_data\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mitem\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1843\u001b[0m \u001b[0mres\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_box_item_values\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mitem\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1844\u001b[0m \u001b[0mcache\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mitem\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mres\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/opt/conda/lib/python3.6/site-packages/pandas/core/internals.py\u001b[0m in \u001b[0;36mget\u001b[0;34m(self, item, fastpath)\u001b[0m\n\u001b[1;32m 3841\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3842\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0misna\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mitem\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3843\u001b[0;31m \u001b[0mloc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mitem\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3844\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3845\u001b[0m \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0misna\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/opt/conda/lib/python3.6/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mget_loc\u001b[0;34m(self, key, method, tolerance)\u001b[0m\n\u001b[1;32m 2525\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2526\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2527\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_maybe_cast_indexer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2528\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2529\u001b[0m \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_indexer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmethod\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtolerance\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtolerance\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n",
"\u001b[0;31mKeyError\u001b[0m: 'Frequency'"
]
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"data_pred = pd.DataFrame({'Age': np.linspace(start=18, stop=100, num=100), 'Constant': 1})\n",
"data_pred['Frequency'] = result.predict(data_pred[['Constant','Age']])\n",
"data_pred.plot(x=\"Age\",y=\"Frequency\",kind=\"line\",ylim=[0,1])\n",
"plt.scatter(x=data[\"Age\"],y=data[\"Frequency\"])\n",
"plt.grid(True)"
]
},
{
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment