Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
M
mooc-rr
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
86d2379a8cd828206f6e8576c862739f
mooc-rr
Commits
9a0a4620
Commit
9a0a4620
authored
Aug 31, 2020
by
86d2379a8cd828206f6e8576c862739f
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
no commit message
parent
c701238b
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
280 additions
and
530 deletions
+280
-530
exercice.ipynb
module3/exo3/exercice.ipynb
+280
-530
No files found.
module3/exo3/exercice.ipynb
View file @
9a0a4620
...
@@ -834,578 +834,328 @@
...
@@ -834,578 +834,328 @@
},
},
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count":
1
5,
"execution_count":
4
5,
"metadata": {},
"metadata": {},
"outputs": [],
"outputs": [],
"source": [
"source": [
"dead_bool = [(data['Status'][i] == \"Dead\") for i in range(len(data))]"
"dead_bool = [(data['Status'][i] == \"Dead\") for i in range(len(data))]\n",
"data['Dead?'] = dead_bool\n",
"smoke_bool = [(data['Smoker'][i] == \"Yes\") for i in range(len(data))]\n",
"data['Smoke?'] = smoke_bool"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
" Nous allons tester les hypothèses par régression logistique."
]
]
},
},
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count":
null
,
"execution_count":
19
,
"metadata": {},
"metadata": {},
"outputs": [],
"outputs": [],
"source": [
"source": [
"data.insert(4, \"Dead?\",data)"
"import numpy as np\n",
"from sklearn.linear_model import LogisticRegression\n",
"from sklearn.metrics import classification_report, confusion_matrix"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"LogisticRegression(C=10.0, class_weight=None, dual=False, fit_intercept=True,\n",
" intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,\n",
" penalty='l2', random_state=0, solver='liblinear', tol=0.0001,\n",
" verbose=0, warm_start=False)"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model = LogisticRegression(solver='liblinear', C=10.0, random_state=0)\n",
"model.fit(data[data['Smoker'] == \"Yes\"]['Age'].values.reshape(-1,1), data[data['Smoker'] == \"Yes\"]['Dead?'])\n"
]
]
},
},
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count":
16
,
"execution_count":
25
,
"metadata": {},
"metadata": {},
"outputs": [],
"outputs": [],
"source": [
"source": [
"
data['Dead?'] = dead_bool
"
"
p_pred = model.predict_proba(data['Age'].values.reshape(-1,1))
"
]
]
},
},
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count": 17,
"execution_count": 26,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[0.98225578 0.01774422]\n",
" [0.98490288 0.01509712]\n",
" [0.61947454 0.38052546]\n",
" ...\n",
" [0.51071991 0.48928009]\n",
" [0.07464525 0.92535475]\n",
" [0.90594064 0.09405936]]\n"
]
}
],
"source": [
"print(p_pred)"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import statsmodels.api as sm\n"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [
{
"ename": "AttributeError",
"evalue": "'list' object has no attribute 'reshape'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-41-ddab106694e4>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'Age'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'Smoker'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreshape\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msm\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0madd_constant\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'Dead?'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mAttributeError\u001b[0m: 'list' object has no attribute 'reshape'"
]
}
],
"source": [
"x1 = data['Age'].values.reshape(-1,1)\n",
"x2 = data['Smoke?'].values.reshape(-1,1)\n",
"x = sm.add_constant(x)\n",
"y = data['Dead?']"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Optimization terminated successfully.\n",
" Current function value: 0.382339\n",
" Iterations 7\n"
]
}
],
"source": [
"model = sm.Logit(y, x)\n",
"result = model.fit(method='newton')"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"metadata": {},
"outputs": [
"outputs": [
{
{
"data": {
"data": {
"text/html": [
"text/html": [
"<div>\n",
"<table class=\"simpletable\">\n",
"<style scoped>\n",
"<caption>Logit Regression Results</caption>\n",
" .dataframe tbody tr th:only-of-type {\n",
"<tr>\n",
" vertical-align: middle;\n",
" <th>Dep. Variable:</th> <td>Dead?</td> <th> No. Observations: </th> <td> 1314</td> \n",
" }\n",
"</tr>\n",
"\n",
"<tr>\n",
" .dataframe tbody tr th {\n",
" <th>Model:</th> <td>Logit</td> <th> Df Residuals: </th> <td> 1312</td> \n",
" vertical-align: top;\n",
"</tr>\n",
" }\n",
"<tr>\n",
"\n",
" <th>Method:</th> <td>MLE</td> <th> Df Model: </th> <td> 1</td> \n",
" .dataframe thead th {\n",
"</tr>\n",
" text-align: right;\n",
"<tr>\n",
" }\n",
" <th>Date:</th> <td>Mon, 31 Aug 2020</td> <th> Pseudo R-squ.: </th> <td>0.3560</td> \n",
"</style>\n",
"</tr>\n",
"<table border=\"1\" class=\"dataframe\">\n",
"<tr>\n",
" <thead>\n",
" <th>Time:</th> <td>15:21:58</td> <th> Log-Likelihood: </th> <td> -502.39</td> \n",
" <tr style=\"text-align: right;\">\n",
"</tr>\n",
" <th></th>\n",
"<tr>\n",
" <th>Smoker</th>\n",
" <th>converged:</th> <td>True</td> <th> LL-Null: </th> <td> -780.16</td> \n",
" <th>Status</th>\n",
"</tr>\n",
" <th>Age</th>\n",
"<tr>\n",
" <th>Dead?</th>\n",
" <th> </th> <td> </td> <th> LLR p-value: </th> <td>7.883e-123</td>\n",
" </tr>\n",
"</tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>21.0</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>19.3</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>No</td>\n",
" <td>Dead</td>\n",
" <td>57.5</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>47.1</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>81.4</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>36.8</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>23.8</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Yes</td>\n",
" <td>Dead</td>\n",
" <td>57.5</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>24.8</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>49.5</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>30.0</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>No</td>\n",
" <td>Dead</td>\n",
" <td>66.0</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>49.2</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>58.4</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>No</td>\n",
" <td>Dead</td>\n",
" <td>60.6</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>25.1</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>43.5</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>27.1</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>58.3</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>65.7</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>No</td>\n",
" <td>Dead</td>\n",
" <td>73.2</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>38.3</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>33.4</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>Yes</td>\n",
" <td>Dead</td>\n",
" <td>62.3</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>18.0</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>56.2</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26</th>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>59.2</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>25.8</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28</th>\n",
" <td>No</td>\n",
" <td>Dead</td>\n",
" <td>36.9</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>20.2</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1284</th>\n",
" <td>Yes</td>\n",
" <td>Dead</td>\n",
" <td>36.0</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1285</th>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>48.3</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1286</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>63.1</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1287</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>60.8</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1288</th>\n",
" <td>Yes</td>\n",
" <td>Dead</td>\n",
" <td>39.3</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1289</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>36.7</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1290</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>63.8</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1291</th>\n",
" <td>No</td>\n",
" <td>Dead</td>\n",
" <td>71.3</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1292</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>57.7</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1293</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>63.2</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1294</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>46.6</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1295</th>\n",
" <td>Yes</td>\n",
" <td>Dead</td>\n",
" <td>82.4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1296</th>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>38.3</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1297</th>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>32.7</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1298</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>39.7</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1299</th>\n",
" <td>Yes</td>\n",
" <td>Dead</td>\n",
" <td>60.0</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1300</th>\n",
" <td>No</td>\n",
" <td>Dead</td>\n",
" <td>71.0</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1301</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>20.5</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1302</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>44.4</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1303</th>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>31.2</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1304</th>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>47.8</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1305</th>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>60.9</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1306</th>\n",
" <td>No</td>\n",
" <td>Dead</td>\n",
" <td>61.4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1307</th>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>43.0</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1308</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>42.1</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1309</th>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>35.9</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1310</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>22.3</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1311</th>\n",
" <td>Yes</td>\n",
" <td>Dead</td>\n",
" <td>62.1</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1312</th>\n",
" <td>No</td>\n",
" <td>Dead</td>\n",
" <td>88.6</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1313</th>\n",
" <td>No</td>\n",
" <td>Alive</td>\n",
" <td>39.1</td>\n",
" <td>False</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</table>\n",
"<p>1314 rows × 4 columns</p>\n",
"<table class=\"simpletable\">\n",
"</div>"
"<tr>\n",
" <td></td> <th>coef</th> <th>std err</th> <th>z</th> <th>P>|z|</th> <th>[0.025</th> <th>0.975]</th> \n",
"</tr>\n",
"<tr>\n",
" <th>const</th> <td> -6.1045</td> <td> 0.321</td> <td> -18.992</td> <td> 0.000</td> <td> -6.735</td> <td> -5.475</td>\n",
"</tr>\n",
"<tr>\n",
" <th>x1</th> <td> 0.0977</td> <td> 0.006</td> <td> 17.578</td> <td> 0.000</td> <td> 0.087</td> <td> 0.109</td>\n",
"</tr>\n",
"</table>"
],
],
"text/plain": [
"text/plain": [
" Smoker Status Age Dead?\n",
"<class 'statsmodels.iolib.summary.Summary'>\n",
"0 Yes Alive 21.0 False\n",
"\"\"\"\n",
"1 Yes Alive 19.3 False\n",
" Logit Regression Results \n",
"2 No Dead 57.5 True\n",
"==============================================================================\n",
"3 No Alive 47.1 False\n",
"Dep. Variable: Dead? No. Observations: 1314\n",
"4 Yes Alive 81.4 False\n",
"Model: Logit Df Residuals: 1312\n",
"5 No Alive 36.8 False\n",
"Method: MLE Df Model: 1\n",
"6 No Alive 23.8 False\n",
"Date: Mon, 31 Aug 2020 Pseudo R-squ.: 0.3560\n",
"7 Yes Dead 57.5 True\n",
"Time: 15:21:58 Log-Likelihood: -502.39\n",
"8 Yes Alive 24.8 False\n",
"converged: True LL-Null: -780.16\n",
"9 Yes Alive 49.5 False\n",
" LLR p-value: 7.883e-123\n",
"10 Yes Alive 30.0 False\n",
"==============================================================================\n",
"11 No Dead 66.0 True\n",
" coef std err z P>|z| [0.025 0.975]\n",
"12 Yes Alive 49.2 False\n",
"------------------------------------------------------------------------------\n",
"13 No Alive 58.4 False\n",
"const -6.1045 0.321 -18.992 0.000 -6.735 -5.475\n",
"14 No Dead 60.6 True\n",
"x1 0.0977 0.006 17.578 0.000 0.087 0.109\n",
"15 No Alive 25.1 False\n",
"==============================================================================\n",
"16 No Alive 43.5 False\n",
"\"\"\""
"17 No Alive 27.1 False\n",
"18 No Alive 58.3 False\n",
"19 Yes Alive 65.7 False\n",
"20 No Dead 73.2 True\n",
"21 Yes Alive 38.3 False\n",
"22 No Alive 33.4 False\n",
"23 Yes Dead 62.3 True\n",
"24 No Alive 18.0 False\n",
"25 No Alive 56.2 False\n",
"26 Yes Alive 59.2 False\n",
"27 No Alive 25.8 False\n",
"28 No Dead 36.9 True\n",
"29 No Alive 20.2 False\n",
"... ... ... ... ...\n",
"1284 Yes Dead 36.0 True\n",
"1285 Yes Alive 48.3 False\n",
"1286 No Alive 63.1 False\n",
"1287 No Alive 60.8 False\n",
"1288 Yes Dead 39.3 True\n",
"1289 No Alive 36.7 False\n",
"1290 No Alive 63.8 False\n",
"1291 No Dead 71.3 True\n",
"1292 No Alive 57.7 False\n",
"1293 No Alive 63.2 False\n",
"1294 No Alive 46.6 False\n",
"1295 Yes Dead 82.4 True\n",
"1296 Yes Alive 38.3 False\n",
"1297 Yes Alive 32.7 False\n",
"1298 No Alive 39.7 False\n",
"1299 Yes Dead 60.0 True\n",
"1300 No Dead 71.0 True\n",
"1301 No Alive 20.5 False\n",
"1302 No Alive 44.4 False\n",
"1303 Yes Alive 31.2 False\n",
"1304 Yes Alive 47.8 False\n",
"1305 Yes Alive 60.9 False\n",
"1306 No Dead 61.4 True\n",
"1307 Yes Alive 43.0 False\n",
"1308 No Alive 42.1 False\n",
"1309 Yes Alive 35.9 False\n",
"1310 No Alive 22.3 False\n",
"1311 Yes Dead 62.1 True\n",
"1312 No Dead 88.6 True\n",
"1313 No Alive 39.1 False\n",
"\n",
"[1314 rows x 4 columns]"
]
]
},
},
"execution_count":
17
,
"execution_count":
38
,
"metadata": {},
"metadata": {},
"output_type": "execute_result"
"output_type": "execute_result"
}
}
],
],
"source": [
"source": [
"
data
"
"
result.summary()
"
]
]
},
},
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count":
null
,
"execution_count":
60
,
"metadata": {},
"metadata": {},
"outputs": [],
"outputs": [],
"source": []
"source": [
"x1 = data['Age'].values.reshape(-1,1)\n",
"x2 = data['Smoke?'].values.reshape(-1,1)\n",
"x = np.hstack((x1,x2))\n",
"x = sm.add_constant(x)\n",
"y = data['Dead?']"
]
},
{
"cell_type": "code",
"execution_count": 61,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Optimization terminated successfully.\n",
" Current function value: 0.381244\n",
" Iterations 7\n"
]
}
],
"source": [
"model = sm.Logit(y, x)\n",
"result = model.fit(method='newton')"
]
},
{
"cell_type": "code",
"execution_count": 62,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table class=\"simpletable\">\n",
"<caption>Logit Regression Results</caption>\n",
"<tr>\n",
" <th>Dep. Variable:</th> <td>Dead?</td> <th> No. Observations: </th> <td> 1314</td> \n",
"</tr>\n",
"<tr>\n",
" <th>Model:</th> <td>Logit</td> <th> Df Residuals: </th> <td> 1311</td> \n",
"</tr>\n",
"<tr>\n",
" <th>Method:</th> <td>MLE</td> <th> Df Model: </th> <td> 2</td> \n",
"</tr>\n",
"<tr>\n",
" <th>Date:</th> <td>Mon, 31 Aug 2020</td> <th> Pseudo R-squ.: </th> <td>0.3579</td> \n",
"</tr>\n",
"<tr>\n",
" <th>Time:</th> <td>15:35:59</td> <th> Log-Likelihood: </th> <td> -500.95</td> \n",
"</tr>\n",
"<tr>\n",
" <th>converged:</th> <td>True</td> <th> LL-Null: </th> <td> -780.16</td> \n",
"</tr>\n",
"<tr>\n",
" <th> </th> <td> </td> <th> LLR p-value: </th> <td>5.534e-122</td>\n",
"</tr>\n",
"</table>\n",
"<table class=\"simpletable\">\n",
"<tr>\n",
" <td></td> <th>coef</th> <th>std err</th> <th>z</th> <th>P>|z|</th> <th>[0.025</th> <th>0.975]</th> \n",
"</tr>\n",
"<tr>\n",
" <th>const</th> <td> -6.3519</td> <td> 0.360</td> <td> -17.637</td> <td> 0.000</td> <td> -7.058</td> <td> -5.646</td>\n",
"</tr>\n",
"<tr>\n",
" <th>x1</th> <td> 0.0998</td> <td> 0.006</td> <td> 17.290</td> <td> 0.000</td> <td> 0.089</td> <td> 0.111</td>\n",
"</tr>\n",
"<tr>\n",
" <th>x2</th> <td> 0.2787</td> <td> 0.165</td> <td> 1.689</td> <td> 0.091</td> <td> -0.045</td> <td> 0.602</td>\n",
"</tr>\n",
"</table>"
],
"text/plain": [
"<class 'statsmodels.iolib.summary.Summary'>\n",
"\"\"\"\n",
" Logit Regression Results \n",
"==============================================================================\n",
"Dep. Variable: Dead? No. Observations: 1314\n",
"Model: Logit Df Residuals: 1311\n",
"Method: MLE Df Model: 2\n",
"Date: Mon, 31 Aug 2020 Pseudo R-squ.: 0.3579\n",
"Time: 15:35:59 Log-Likelihood: -500.95\n",
"converged: True LL-Null: -780.16\n",
" LLR p-value: 5.534e-122\n",
"==============================================================================\n",
" coef std err z P>|z| [0.025 0.975]\n",
"------------------------------------------------------------------------------\n",
"const -6.3519 0.360 -17.637 0.000 -7.058 -5.646\n",
"x1 0.0998 0.006 17.290 0.000 0.089 0.111\n",
"x2 0.2787 0.165 1.689 0.091 -0.045 0.602\n",
"==============================================================================\n",
"\"\"\""
]
},
"execution_count": 62,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"result.summary()"
]
}
}
],
],
"metadata": {
"metadata": {
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment