no commit message

parent c701238b
...@@ -834,578 +834,328 @@ ...@@ -834,578 +834,328 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 15, "execution_count": 45,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"dead_bool = [(data['Status'][i] == \"Dead\") for i in range(len(data))]" "dead_bool = [(data['Status'][i] == \"Dead\") for i in range(len(data))]\n",
"data['Dead?'] = dead_bool\n",
"smoke_bool = [(data['Smoker'][i] == \"Yes\") for i in range(len(data))]\n",
"data['Smoke?'] = smoke_bool"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
" Nous allons tester les hypothèses par régression logistique."
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 19,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"data.insert(4, \"Dead?\",data)" "import numpy as np\n",
"from sklearn.linear_model import LogisticRegression\n",
"from sklearn.metrics import classification_report, confusion_matrix"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"LogisticRegression(C=10.0, class_weight=None, dual=False, fit_intercept=True,\n",
" intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,\n",
" penalty='l2', random_state=0, solver='liblinear', tol=0.0001,\n",
" verbose=0, warm_start=False)"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model = LogisticRegression(solver='liblinear', C=10.0, random_state=0)\n",
"model.fit(data[data['Smoker'] == \"Yes\"]['Age'].values.reshape(-1,1), data[data['Smoker'] == \"Yes\"]['Dead?'])\n"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 16, "execution_count": 25,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"data['Dead?'] = dead_bool" "p_pred = model.predict_proba(data['Age'].values.reshape(-1,1))"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 17, "execution_count": 26,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[0.98225578 0.01774422]\n",
" [0.98490288 0.01509712]\n",
" [0.61947454 0.38052546]\n",
" ...\n",
" [0.51071991 0.48928009]\n",
" [0.07464525 0.92535475]\n",
" [0.90594064 0.09405936]]\n"
]
}
],
"source": [
"print(p_pred)"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import statsmodels.api as sm\n"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [
{
"ename": "AttributeError",
"evalue": "'list' object has no attribute 'reshape'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-41-ddab106694e4>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'Age'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'Smoker'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreshape\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msm\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0madd_constant\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'Dead?'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mAttributeError\u001b[0m: 'list' object has no attribute 'reshape'"
]
}
],
"source": [
"x1 = data['Age'].values.reshape(-1,1)\n",
"x2 = data['Smoke?'].values.reshape(-1,1)\n",
"x = sm.add_constant(x)\n",
"y = data['Dead?']"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Optimization terminated successfully.\n",
" Current function value: 0.382339\n",
" Iterations 7\n"
]
}
],
"source": [
"model = sm.Logit(y, x)\n",
"result = model.fit(method='newton')"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"data": { "data": {
"text/html": [ "text/html": [
"<div>\n", "<table class=\"simpletable\">\n",
"<style scoped>\n", "<caption>Logit Regression Results</caption>\n",
" .dataframe tbody tr th:only-of-type {\n", "<tr>\n",
" vertical-align: middle;\n", " <th>Dep. Variable:</th> <td>Dead?</td> <th> No. Observations: </th> <td> 1314</td> \n",
" }\n", "</tr>\n",
"\n", "<tr>\n",
" .dataframe tbody tr th {\n", " <th>Model:</th> <td>Logit</td> <th> Df Residuals: </th> <td> 1312</td> \n",
" vertical-align: top;\n", "</tr>\n",
" }\n", "<tr>\n",
"\n", " <th>Method:</th> <td>MLE</td> <th> Df Model: </th> <td> 1</td> \n",
" .dataframe thead th {\n", "</tr>\n",
" text-align: right;\n", "<tr>\n",
" }\n", " <th>Date:</th> <td>Mon, 31 Aug 2020</td> <th> Pseudo R-squ.: </th> <td>0.3560</td> \n",
"</style>\n", "</tr>\n",
"<table border=\"1\" class=\"dataframe\">\n", "<tr>\n",
" <thead>\n", " <th>Time:</th> <td>15:21:58</td> <th> Log-Likelihood: </th> <td> -502.39</td> \n",
" <tr style=\"text-align: right;\">\n", "</tr>\n",
" <th></th>\n", "<tr>\n",
" <th>Smoker</th>\n", " <th>converged:</th> <td>True</td> <th> LL-Null: </th> <td> -780.16</td> \n",
" <th>Status</th>\n", "</tr>\n",
" <th>Age</th>\n", "<tr>\n",
" <th>Dead?</th>\n", " <th> </th> <td> </td> <th> LLR p-value: </th> <td>7.883e-123</td>\n",
" </tr>\n", "</tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>21.0</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>19.3</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>No</td>\n",
" <td>Dead</td>\n",
" <td>57.5</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>47.1</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>81.4</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>36.8</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>23.8</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Yes</td>\n",
" <td>Dead</td>\n",
" <td>57.5</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>24.8</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>49.5</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>30.0</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>No</td>\n",
" <td>Dead</td>\n",
" <td>66.0</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>49.2</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>58.4</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>No</td>\n",
" <td>Dead</td>\n",
" <td>60.6</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>25.1</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>43.5</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>27.1</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>58.3</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>65.7</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>No</td>\n",
" <td>Dead</td>\n",
" <td>73.2</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>38.3</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>33.4</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>Yes</td>\n",
" <td>Dead</td>\n",
" <td>62.3</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>18.0</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>56.2</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26</th>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>59.2</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>25.8</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28</th>\n",
" <td>No</td>\n",
" <td>Dead</td>\n",
" <td>36.9</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>20.2</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1284</th>\n",
" <td>Yes</td>\n",
" <td>Dead</td>\n",
" <td>36.0</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1285</th>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>48.3</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1286</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>63.1</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1287</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>60.8</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1288</th>\n",
" <td>Yes</td>\n",
" <td>Dead</td>\n",
" <td>39.3</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1289</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>36.7</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1290</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>63.8</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1291</th>\n",
" <td>No</td>\n",
" <td>Dead</td>\n",
" <td>71.3</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1292</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>57.7</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1293</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>63.2</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1294</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>46.6</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1295</th>\n",
" <td>Yes</td>\n",
" <td>Dead</td>\n",
" <td>82.4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1296</th>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>38.3</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1297</th>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>32.7</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1298</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>39.7</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1299</th>\n",
" <td>Yes</td>\n",
" <td>Dead</td>\n",
" <td>60.0</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1300</th>\n",
" <td>No</td>\n",
" <td>Dead</td>\n",
" <td>71.0</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1301</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>20.5</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1302</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>44.4</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1303</th>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>31.2</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1304</th>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>47.8</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1305</th>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>60.9</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1306</th>\n",
" <td>No</td>\n",
" <td>Dead</td>\n",
" <td>61.4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1307</th>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>43.0</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1308</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>42.1</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1309</th>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>35.9</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1310</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>22.3</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1311</th>\n",
" <td>Yes</td>\n",
" <td>Dead</td>\n",
" <td>62.1</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1312</th>\n",
" <td>No</td>\n",
" <td>Dead</td>\n",
" <td>88.6</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1313</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>39.1</td>\n",
" <td>False</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n", "</table>\n",
"<p>1314 rows × 4 columns</p>\n", "<table class=\"simpletable\">\n",
"</div>" "<tr>\n",
" <td></td> <th>coef</th> <th>std err</th> <th>z</th> <th>P>|z|</th> <th>[0.025</th> <th>0.975]</th> \n",
"</tr>\n",
"<tr>\n",
" <th>const</th> <td> -6.1045</td> <td> 0.321</td> <td> -18.992</td> <td> 0.000</td> <td> -6.735</td> <td> -5.475</td>\n",
"</tr>\n",
"<tr>\n",
" <th>x1</th> <td> 0.0977</td> <td> 0.006</td> <td> 17.578</td> <td> 0.000</td> <td> 0.087</td> <td> 0.109</td>\n",
"</tr>\n",
"</table>"
], ],
"text/plain": [ "text/plain": [
" Smoker Status Age Dead?\n", "<class 'statsmodels.iolib.summary.Summary'>\n",
"0 Yes Alive 21.0 False\n", "\"\"\"\n",
"1 Yes Alive 19.3 False\n", " Logit Regression Results \n",
"2 No Dead 57.5 True\n", "==============================================================================\n",
"3 No Alive 47.1 False\n", "Dep. Variable: Dead? No. Observations: 1314\n",
"4 Yes Alive 81.4 False\n", "Model: Logit Df Residuals: 1312\n",
"5 No Alive 36.8 False\n", "Method: MLE Df Model: 1\n",
"6 No Alive 23.8 False\n", "Date: Mon, 31 Aug 2020 Pseudo R-squ.: 0.3560\n",
"7 Yes Dead 57.5 True\n", "Time: 15:21:58 Log-Likelihood: -502.39\n",
"8 Yes Alive 24.8 False\n", "converged: True LL-Null: -780.16\n",
"9 Yes Alive 49.5 False\n", " LLR p-value: 7.883e-123\n",
"10 Yes Alive 30.0 False\n", "==============================================================================\n",
"11 No Dead 66.0 True\n", " coef std err z P>|z| [0.025 0.975]\n",
"12 Yes Alive 49.2 False\n", "------------------------------------------------------------------------------\n",
"13 No Alive 58.4 False\n", "const -6.1045 0.321 -18.992 0.000 -6.735 -5.475\n",
"14 No Dead 60.6 True\n", "x1 0.0977 0.006 17.578 0.000 0.087 0.109\n",
"15 No Alive 25.1 False\n", "==============================================================================\n",
"16 No Alive 43.5 False\n", "\"\"\""
"17 No Alive 27.1 False\n",
"18 No Alive 58.3 False\n",
"19 Yes Alive 65.7 False\n",
"20 No Dead 73.2 True\n",
"21 Yes Alive 38.3 False\n",
"22 No Alive 33.4 False\n",
"23 Yes Dead 62.3 True\n",
"24 No Alive 18.0 False\n",
"25 No Alive 56.2 False\n",
"26 Yes Alive 59.2 False\n",
"27 No Alive 25.8 False\n",
"28 No Dead 36.9 True\n",
"29 No Alive 20.2 False\n",
"... ... ... ... ...\n",
"1284 Yes Dead 36.0 True\n",
"1285 Yes Alive 48.3 False\n",
"1286 No Alive 63.1 False\n",
"1287 No Alive 60.8 False\n",
"1288 Yes Dead 39.3 True\n",
"1289 No Alive 36.7 False\n",
"1290 No Alive 63.8 False\n",
"1291 No Dead 71.3 True\n",
"1292 No Alive 57.7 False\n",
"1293 No Alive 63.2 False\n",
"1294 No Alive 46.6 False\n",
"1295 Yes Dead 82.4 True\n",
"1296 Yes Alive 38.3 False\n",
"1297 Yes Alive 32.7 False\n",
"1298 No Alive 39.7 False\n",
"1299 Yes Dead 60.0 True\n",
"1300 No Dead 71.0 True\n",
"1301 No Alive 20.5 False\n",
"1302 No Alive 44.4 False\n",
"1303 Yes Alive 31.2 False\n",
"1304 Yes Alive 47.8 False\n",
"1305 Yes Alive 60.9 False\n",
"1306 No Dead 61.4 True\n",
"1307 Yes Alive 43.0 False\n",
"1308 No Alive 42.1 False\n",
"1309 Yes Alive 35.9 False\n",
"1310 No Alive 22.3 False\n",
"1311 Yes Dead 62.1 True\n",
"1312 No Dead 88.6 True\n",
"1313 No Alive 39.1 False\n",
"\n",
"[1314 rows x 4 columns]"
] ]
}, },
"execution_count": 17, "execution_count": 38,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
], ],
"source": [ "source": [
"data" "result.summary()"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 60,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [] "source": [
"x1 = data['Age'].values.reshape(-1,1)\n",
"x2 = data['Smoke?'].values.reshape(-1,1)\n",
"x = np.hstack((x1,x2))\n",
"x = sm.add_constant(x)\n",
"y = data['Dead?']"
]
},
{
"cell_type": "code",
"execution_count": 61,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Optimization terminated successfully.\n",
" Current function value: 0.381244\n",
" Iterations 7\n"
]
}
],
"source": [
"model = sm.Logit(y, x)\n",
"result = model.fit(method='newton')"
]
},
{
"cell_type": "code",
"execution_count": 62,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table class=\"simpletable\">\n",
"<caption>Logit Regression Results</caption>\n",
"<tr>\n",
" <th>Dep. Variable:</th> <td>Dead?</td> <th> No. Observations: </th> <td> 1314</td> \n",
"</tr>\n",
"<tr>\n",
" <th>Model:</th> <td>Logit</td> <th> Df Residuals: </th> <td> 1311</td> \n",
"</tr>\n",
"<tr>\n",
" <th>Method:</th> <td>MLE</td> <th> Df Model: </th> <td> 2</td> \n",
"</tr>\n",
"<tr>\n",
" <th>Date:</th> <td>Mon, 31 Aug 2020</td> <th> Pseudo R-squ.: </th> <td>0.3579</td> \n",
"</tr>\n",
"<tr>\n",
" <th>Time:</th> <td>15:35:59</td> <th> Log-Likelihood: </th> <td> -500.95</td> \n",
"</tr>\n",
"<tr>\n",
" <th>converged:</th> <td>True</td> <th> LL-Null: </th> <td> -780.16</td> \n",
"</tr>\n",
"<tr>\n",
" <th> </th> <td> </td> <th> LLR p-value: </th> <td>5.534e-122</td>\n",
"</tr>\n",
"</table>\n",
"<table class=\"simpletable\">\n",
"<tr>\n",
" <td></td> <th>coef</th> <th>std err</th> <th>z</th> <th>P>|z|</th> <th>[0.025</th> <th>0.975]</th> \n",
"</tr>\n",
"<tr>\n",
" <th>const</th> <td> -6.3519</td> <td> 0.360</td> <td> -17.637</td> <td> 0.000</td> <td> -7.058</td> <td> -5.646</td>\n",
"</tr>\n",
"<tr>\n",
" <th>x1</th> <td> 0.0998</td> <td> 0.006</td> <td> 17.290</td> <td> 0.000</td> <td> 0.089</td> <td> 0.111</td>\n",
"</tr>\n",
"<tr>\n",
" <th>x2</th> <td> 0.2787</td> <td> 0.165</td> <td> 1.689</td> <td> 0.091</td> <td> -0.045</td> <td> 0.602</td>\n",
"</tr>\n",
"</table>"
],
"text/plain": [
"<class 'statsmodels.iolib.summary.Summary'>\n",
"\"\"\"\n",
" Logit Regression Results \n",
"==============================================================================\n",
"Dep. Variable: Dead? No. Observations: 1314\n",
"Model: Logit Df Residuals: 1311\n",
"Method: MLE Df Model: 2\n",
"Date: Mon, 31 Aug 2020 Pseudo R-squ.: 0.3579\n",
"Time: 15:35:59 Log-Likelihood: -500.95\n",
"converged: True LL-Null: -780.16\n",
" LLR p-value: 5.534e-122\n",
"==============================================================================\n",
" coef std err z P>|z| [0.025 0.975]\n",
"------------------------------------------------------------------------------\n",
"const -6.3519 0.360 -17.637 0.000 -7.058 -5.646\n",
"x1 0.0998 0.006 17.290 0.000 0.089 0.111\n",
"x2 0.2787 0.165 1.689 0.091 -0.045 0.602\n",
"==============================================================================\n",
"\"\"\""
]
},
"execution_count": 62,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"result.summary()"
]
} }
], ],
"metadata": { "metadata": {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment