Commit bd72d97a authored by Agathe Schmider's avatar Agathe Schmider

end exercice 2

parent 9c9ca2c3
This source diff could not be displayed because it is too large. You can view the blob instead.
...@@ -538,10 +538,10 @@ ...@@ -538,10 +538,10 @@
" <th>Method:</th> <td>IRLS</td> <th> Log-Likelihood: </th> <td> -3.9210</td>\n", " <th>Method:</th> <td>IRLS</td> <th> Log-Likelihood: </th> <td> -3.9210</td>\n",
"</tr>\n", "</tr>\n",
"<tr>\n", "<tr>\n",
" <th>Date:</th> <td>Tue, 14 Apr 2020</td> <th> Deviance: </th> <td> 3.0144</td>\n", " <th>Date:</th> <td>Wed, 15 Apr 2020</td> <th> Deviance: </th> <td> 3.0144</td>\n",
"</tr>\n", "</tr>\n",
"<tr>\n", "<tr>\n",
" <th>Time:</th> <td>10:27:51</td> <th> Pearson chi2: </th> <td> 5.00</td> \n", " <th>Time:</th> <td>15:47:34</td> <th> Pearson chi2: </th> <td> 5.00</td> \n",
"</tr>\n", "</tr>\n",
"<tr>\n", "<tr>\n",
" <th>No. Iterations:</th> <td>6</td> <th> </th> <td> </td> \n", " <th>No. Iterations:</th> <td>6</td> <th> </th> <td> </td> \n",
...@@ -572,8 +572,8 @@ ...@@ -572,8 +572,8 @@
"Model Family: Binomial Df Model: 1\n", "Model Family: Binomial Df Model: 1\n",
"Link Function: logit Scale: 1.0000\n", "Link Function: logit Scale: 1.0000\n",
"Method: IRLS Log-Likelihood: -3.9210\n", "Method: IRLS Log-Likelihood: -3.9210\n",
"Date: Tue, 14 Apr 2020 Deviance: 3.0144\n", "Date: Wed, 15 Apr 2020 Deviance: 3.0144\n",
"Time: 10:27:51 Pearson chi2: 5.00\n", "Time: 15:47:34 Pearson chi2: 5.00\n",
"No. Iterations: 6 \n", "No. Iterations: 6 \n",
"Covariance Type: nonrobust \n", "Covariance Type: nonrobust \n",
"===============================================================================\n", "===============================================================================\n",
...@@ -644,10 +644,10 @@ ...@@ -644,10 +644,10 @@
" <th>Method:</th> <td>IRLS</td> <th> Log-Likelihood: </th> <td> -23.526</td>\n", " <th>Method:</th> <td>IRLS</td> <th> Log-Likelihood: </th> <td> -23.526</td>\n",
"</tr>\n", "</tr>\n",
"<tr>\n", "<tr>\n",
" <th>Date:</th> <td>Tue, 14 Apr 2020</td> <th> Deviance: </th> <td> 18.086</td>\n", " <th>Date:</th> <td>Wed, 15 Apr 2020</td> <th> Deviance: </th> <td> 18.086</td>\n",
"</tr>\n", "</tr>\n",
"<tr>\n", "<tr>\n",
" <th>Time:</th> <td>10:27:51</td> <th> Pearson chi2: </th> <td> 30.0</td> \n", " <th>Time:</th> <td>15:47:34</td> <th> Pearson chi2: </th> <td> 30.0</td> \n",
"</tr>\n", "</tr>\n",
"<tr>\n", "<tr>\n",
" <th>No. Iterations:</th> <td>6</td> <th> </th> <td> </td> \n", " <th>No. Iterations:</th> <td>6</td> <th> </th> <td> </td> \n",
...@@ -678,8 +678,8 @@ ...@@ -678,8 +678,8 @@
"Model Family: Binomial Df Model: 1\n", "Model Family: Binomial Df Model: 1\n",
"Link Function: logit Scale: 1.0000\n", "Link Function: logit Scale: 1.0000\n",
"Method: IRLS Log-Likelihood: -23.526\n", "Method: IRLS Log-Likelihood: -23.526\n",
"Date: Tue, 14 Apr 2020 Deviance: 18.086\n", "Date: Wed, 15 Apr 2020 Deviance: 18.086\n",
"Time: 10:27:51 Pearson chi2: 30.0\n", "Time: 15:47:34 Pearson chi2: 30.0\n",
"No. Iterations: 6 \n", "No. Iterations: 6 \n",
"Covariance Type: nonrobust \n", "Covariance Type: nonrobust \n",
"===============================================================================\n", "===============================================================================\n",
...@@ -780,55 +780,6 @@ ...@@ -780,55 +780,6 @@
"plt.grid(True)" "plt.grid(True)"
] ]
}, },
{
"cell_type": "markdown",
"metadata": {},
"source": [
"There were warnings during the construction of the log model. let's try and change it"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"ename": "DistributionNotFound",
"evalue": "The 'statsmodel==0.9.0' distribution was not found and is required by the application",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mDistributionNotFound\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-12-0d6a0b26ac77>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mpkg_resources\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mpkg_resources\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrequire\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"statsmodel==0.9.0\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mstatsmodel\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/miniconda3/lib/python3.7/site-packages/pkg_resources/__init__.py\u001b[0m in \u001b[0;36mrequire\u001b[0;34m(self, *requirements)\u001b[0m\n\u001b[1;32m 899\u001b[0m \u001b[0mincluded\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0meven\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mthey\u001b[0m \u001b[0mwere\u001b[0m \u001b[0malready\u001b[0m \u001b[0mactivated\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mthis\u001b[0m \u001b[0mworking\u001b[0m \u001b[0mset\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 900\u001b[0m \"\"\"\n\u001b[0;32m--> 901\u001b[0;31m \u001b[0mneeded\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mresolve\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mparse_requirements\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrequirements\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 902\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 903\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mdist\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mneeded\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/miniconda3/lib/python3.7/site-packages/pkg_resources/__init__.py\u001b[0m in \u001b[0;36mresolve\u001b[0;34m(self, requirements, env, installer, replace_conflicting, extras)\u001b[0m\n\u001b[1;32m 785\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mdist\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 786\u001b[0m \u001b[0mrequirers\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrequired_by\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mreq\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 787\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mDistributionNotFound\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mreq\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrequirers\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 788\u001b[0m \u001b[0mto_activate\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdist\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 789\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mdist\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mreq\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mDistributionNotFound\u001b[0m: The 'statsmodel==0.9.0' distribution was not found and is required by the application"
]
}
],
"source": [
"import pkg_resources\n",
"pkg_resources.require(\"statsmodel==0.9.0\")\n",
"import statsmodel"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import statsmodels.api as sm\n",
"\n",
"data[\"Success\"]=data.Count-data.Malfunction\n",
"data[\"Intercept\"]=1\n",
"\n",
"logmodel=sm.GLM(data['Frequency'], data[['Intercept','Temperature']], \n",
" family=sm.families.Binomial(sm.families.links.logit)).fit()\n",
"\n",
"logmodel.summary()\n"
]
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": { "metadata": {
...@@ -861,7 +812,7 @@ ...@@ -861,7 +812,7 @@
"outputs": [ "outputs": [
{ {
"data": { "data": {
"image/png": "\n", "image/png": "\n",
"text/plain": [ "text/plain": [
"<Figure size 432x288 with 1 Axes>" "<Figure size 432x288 with 1 Axes>"
] ]
...@@ -884,174 +835,6 @@ ...@@ -884,174 +835,6 @@
"source": [ "source": [
"**I think I have managed to correctly compute and plot the uncertainty of my prediction.** Although the shaded area seems very similar to [the one obtained by with R](https://app-learninglab.inria.fr/moocrr/gitlab/moocrr-session3/moocrr-reproducibility-study/tree/master/challenger.pdf), I can spot a few differences (e.g., the blue point for temperature 63 is outside)... Could this be a numerical error ? Or a difference in the statistical method ? It is not clear which one is \"right\"." "**I think I have managed to correctly compute and plot the uncertainty of my prediction.** Although the shaded area seems very similar to [the one obtained by with R](https://app-learninglab.inria.fr/moocrr/gitlab/moocrr-session3/moocrr-reproducibility-study/tree/master/challenger.pdf), I can spot a few differences (e.g., the blue point for temperature 63 is outside)... Could this be a numerical error ? Or a difference in the statistical method ? It is not clear which one is \"right\"."
] ]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Temperature</th>\n",
" <th>Intercept</th>\n",
" <th>Frequency</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>30.0</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>30.5</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>31.0</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>31.5</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>32.0</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>116</th>\n",
" <td>88.0</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>117</th>\n",
" <td>88.5</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>118</th>\n",
" <td>89.0</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>119</th>\n",
" <td>89.5</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>120</th>\n",
" <td>90.0</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>121 rows × 3 columns</p>\n",
"</div>"
],
"text/plain": [
" Temperature Intercept Frequency\n",
"0 30.0 1 1.0\n",
"1 30.5 1 1.0\n",
"2 31.0 1 1.0\n",
"3 31.5 1 1.0\n",
"4 32.0 1 1.0\n",
".. ... ... ...\n",
"116 88.0 1 1.0\n",
"117 88.5 1 1.0\n",
"118 89.0 1 1.0\n",
"119 89.5 1 1.0\n",
"120 90.0 1 1.0\n",
"\n",
"[121 rows x 3 columns]"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data_pred"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 1.0\n",
"1 1.0\n",
"2 1.0\n",
"3 1.0\n",
"4 1.0\n",
" ... \n",
"116 1.0\n",
"117 1.0\n",
"118 1.0\n",
"119 1.0\n",
"120 1.0\n",
"Length: 121, dtype: float64"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data_pred = pd.DataFrame({'Temperature': np.linspace(start=30, stop=90, num=121), 'Intercept': 1})\n",
"logmodel.predict(data_pred)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
} }
], ],
"metadata": { "metadata": {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment