From 16dd6981b97e4a91f453058ffd2898add7dbd04d Mon Sep 17 00:00:00 2001
From: 1cda4be30895c9886fda013d21530393
 <1cda4be30895c9886fda013d21530393@app-learninglab.inria.fr>
Date: Thu, 5 Nov 2020 02:29:49 +0000
Subject: [PATCH] final

---
 module2/exo5/exo5_en.ipynb | 48 +++++++++++++++++++++-----------------
 1 file changed, 26 insertions(+), 22 deletions(-)
diff --git a/module2/exo5/exo5_en.ipynb b/module2/exo5/exo5_en.ipynb
index b310181..db511a8 100644
--- a/module2/exo5/exo5_en.ipynb
+++ b/module2/exo5/exo5_en.ipynb
@@ -495,7 +495,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
@@ -504,10 +504,10 @@
        "<table class=\"simpletable\">\n",
        "<caption>Generalized Linear Model Regression Results</caption>\n",
        "<tr>\n",
-       "  <th>Dep. Variable:</th>      <td>Frequency</td>    <th>  No. Observations:  </th>  <td>    23</td>  \n",
+       "  <th>Dep. Variable:</th>      <td>Frequency</td>    <th>  No. Observations:  </th>  <td>     7</td>  \n",
        "</tr>\n",
        "<tr>\n",
-       "  <th>Model:</th>                 <td>GLM</td>       <th>  Df Residuals:      </th>  <td>    21</td>  \n",
+       "  <th>Model:</th>                 <td>GLM</td>       <th>  Df Residuals:      </th>  <td>     5</td>  \n",
        "</tr>\n",
        "<tr>\n",
        "  <th>Model Family:</th>       <td>Binomial</td>     <th>  Df Model:          </th>  <td>     1</td>  \n",
@@ -516,16 +516,16 @@
        "  <th>Link Function:</th>        <td>logit</td>      <th>  Scale:             </th> <td>  1.0000</td> \n",
        "</tr>\n",
        "<tr>\n",
-       "  <th>Method:</th>               <td>IRLS</td>       <th>  Log-Likelihood:    </th> <td> -3.9210</td> \n",
+       "  <th>Method:</th>               <td>IRLS</td>       <th>  Log-Likelihood:    </th> <td> -2.5250</td> \n",
        "</tr>\n",
        "<tr>\n",
-       "  <th>Date:</th>           <td>Thu, 22 Oct 2020</td> <th>  Deviance:          </th> <td>  3.0144</td> \n",
+       "  <th>Date:</th>           <td>Thu, 05 Nov 2020</td> <th>  Deviance:          </th> <td> 0.22231</td> \n",
        "</tr>\n",
        "<tr>\n",
-       "  <th>Time:</th>               <td>11:23:38</td>     <th>  Pearson chi2:      </th>  <td>  5.00</td>  \n",
+       "  <th>Time:</th>               <td>02:29:10</td>     <th>  Pearson chi2:      </th>  <td> 0.236</td>  \n",
        "</tr>\n",
        "<tr>\n",
-       "  <th>No. Iterations:</th>         <td>6</td>        <th>  Covariance Type:   </th> <td>nonrobust</td>\n",
+       "  <th>No. Iterations:</th>         <td>4</td>        <th>  Covariance Type:   </th> <td>nonrobust</td>\n",
        "</tr>\n",
        "</table>\n",
        "<table class=\"simpletable\">\n",
@@ -533,10 +533,10 @@
        "       <td></td>          <th>coef</th>     <th>std err</th>      <th>z</th>      <th>P>|z|</th>  <th>[0.025</th>    <th>0.975]</th>  \n",
        "</tr>\n",
        "<tr>\n",
-       "  <th>Intercept</th>   <td>    5.0850</td> <td>    7.477</td> <td>    0.680</td> <td> 0.496</td> <td>   -9.570</td> <td>   19.740</td>\n",
+       "  <th>Intercept</th>   <td>   -1.3895</td> <td>    7.828</td> <td>   -0.178</td> <td> 0.859</td> <td>  -16.732</td> <td>   13.953</td>\n",
        "</tr>\n",
        "<tr>\n",
-       "  <th>Temperature</th> <td>   -0.1156</td> <td>    0.115</td> <td>   -1.004</td> <td> 0.316</td> <td>   -0.341</td> <td>    0.110</td>\n",
+       "  <th>Temperature</th> <td>    0.0014</td> <td>    0.122</td> <td>    0.012</td> <td> 0.991</td> <td>   -0.238</td> <td>    0.240</td>\n",
        "</tr>\n",
        "</table>"
       ],
@@ -545,24 +545,24 @@
        "\"\"\"\n",
        "                 Generalized Linear Model Regression Results                  \n",
        "==============================================================================\n",
-       "Dep. Variable:              Frequency   No. Observations:                   23\n",
-       "Model:                            GLM   Df Residuals:                       21\n",
+       "Dep. Variable:              Frequency   No. Observations:                    7\n",
+       "Model:                            GLM   Df Residuals:                        5\n",
        "Model Family:                Binomial   Df Model:                            1\n",
        "Link Function:                  logit   Scale:                          1.0000\n",
-       "Method:                          IRLS   Log-Likelihood:                -3.9210\n",
-       "Date:                Thu, 22 Oct 2020   Deviance:                       3.0144\n",
-       "Time:                        11:23:38   Pearson chi2:                     5.00\n",
-       "No. Iterations:                     6   Covariance Type:             nonrobust\n",
+       "Method:                          IRLS   Log-Likelihood:                -2.5250\n",
+       "Date:                Thu, 05 Nov 2020   Deviance:                      0.22231\n",
+       "Time:                        02:29:10   Pearson chi2:                    0.236\n",
+       "No. Iterations:                     4   Covariance Type:             nonrobust\n",
        "===============================================================================\n",
        "                  coef    std err          z      P>|z|      [0.025      0.975]\n",
        "-------------------------------------------------------------------------------\n",
-       "Intercept       5.0850      7.477      0.680      0.496      -9.570      19.740\n",
-       "Temperature    -0.1156      0.115     -1.004      0.316      -0.341       0.110\n",
+       "Intercept      -1.3895      7.828     -0.178      0.859     -16.732      13.953\n",
+       "Temperature     0.0014      0.122      0.012      0.991      -0.238       0.240\n",
        "===============================================================================\n",
        "\"\"\""
       ]
      },
-     "execution_count": 17,
+     "execution_count": 4,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -691,7 +691,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [
     {
@@ -721,12 +721,12 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "<p>As we can see using visual inspection there is some tendency the tests with temperatures between 65 and 90 have less failure. Of course we can't really conclude using only this observation, however at least we know that ... </p>"
+    "<p>As we can see using visual inspection there is some tendency the tests with temperatures between 65 and 90 have less failure. Of course we can't really conclude using only this observation since we don't have enough data for the lower temperatures. However, we can see clearly that there were 2 failures occured on the lowest temperature which is 53, and there is no successful experiment below 65. This visual inspection should have raised suspicion before launching the challenger shuttle.</p>"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [
     {
@@ -743,6 +743,10 @@
     }
    ],
    "source": [
+    "data[\"Success\"]=data.Count-data.Malfunction\n",
+    "data[\"Intercept\"]=1\n",
+    "logmodel=sm.GLM(data['Frequency'], data[['Intercept','Temperature']], family=sm.families.Binomial(sm.families.links.logit)).fit()\n",
+    "\n",
     "%matplotlib inline\n",
     "data_pred = pd.DataFrame({'Temperature': np.linspace(start=30, stop=90, num=121), 'Intercept': 1})\n",
     "data_pred['Frequency'] = logmodel.predict(data_pred[['Intercept','Temperature']])\n",
@@ -755,7 +759,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "<p> Also when we plot the regression now we can see that lower temperatures seem to have more ...</p>"
+    "<p>Now on the plot above we reuse the same regression technique, however, this time we include all of dataset. Now we can see clearly that the temperature of the experiment indeed has an influence on the probability of the failure. From the corresponding plot we can see the probability is arround 80 percent which is pretty high.</p>"
    ]
   },
   {
-- 
2.18.1


Dep. Variable:	Frequency	No. Observations:	23	Dep. Variable:	Frequency	No. Observations:	7
Model:	GLM	Df Residuals:	21	Model:	GLM	Df Residuals:	5
Model Family:	Binomial	Df Model:	1	Link Function:	logit	Scale:	1.0000
Method:	IRLS	Log-Likelihood:	-3.9210	Method:	IRLS	Log-Likelihood:	-2.5250
Date:	Thu, 22 Oct 2020	Deviance:	3.0144	Date:	Thu, 05 Nov 2020	Deviance:	0.22231
Time:	11:23:38	Pearson chi2:	5.00	Time:	02:29:10	Pearson chi2:	0.236
No. Iterations:	6	Covariance Type:	nonrobust	No. Iterations:	4	Covariance Type:	nonrobust
	coef	std err	z	P>\|z\|	[0.025	0.975]
Intercept	5.0850	7.477	0.680	0.496	-9.570	19.740	Intercept	-1.3895	7.828	-0.178	0.859	-16.732	13.953
Temperature	-0.1156	0.115	-1.004	0.316	-0.341	0.110	Temperature	0.0014	0.122	0.012	0.991	-0.238	0.240