From 9339b2e722465a15e8bbd82f386baf27b685d490 Mon Sep 17 00:00:00 2001 From: Marie-Gabrielle Dondon <85bc36e0a8096c618fbd5993d1cca191@app-learninglab.inria.fr> Date: Mon, 12 Nov 2018 15:17:36 +0000 Subject: [PATCH] Replace challenger_Python_org.org --- src/Python3/challenger_Python_org.org | 205 ++------------------------ 1 file changed, 14 insertions(+), 191 deletions(-) diff --git a/src/Python3/challenger_Python_org.org b/src/Python3/challenger_Python_org.org index 87b6b6d..7d5351d 100644 --- a/src/Python3/challenger_Python_org.org +++ b/src/Python3/challenger_Python_org.org @@ -47,106 +47,6 @@ print_sys_info() print_imported_modules() #+end_src - #+RESULTS: - #+begin_example -Python 3.7.0 (v3.7.0:1bf9cc5093, Jun 27 2018, 04:59:51) [MSC v.1914 64 bit (AMD64)] on win32 -Type "help", "copyright", "credits" or "license" for more information. -3.7.0 (v3.7.0:1bf9cc5093, Jun 27 2018, 04:59:51) [MSC v.1914 64 bit (AMD64)] -uname_result(system='Windows', node='MGDONDON', release='7', version='6.1.7601', machine='AMD64', processor='Intel64 Family 6 Model 94 Stepping 3, GenuineIntel') -IPython 6.5.0 -IPython.core.release 6.5.0 -_csv 1.0 -_ctypes 1.1.0 -decimal 1.70 -argparse 1.1 -backcall 0.1.0 -colorama 0.3.9 -csv 1.0 -ctypes 1.1.0 -cycler 0.10.0 -dateutil 2.7.3 -decimal 1.70 -decorator 4.3.0 -distutils 3.7.0 -ipykernel 4.8.2 -ipykernel._version 4.8.2 -ipython_genutils 0.2.0 -ipython_genutils._version 0.2.0 -ipywidgets 7.4.0 -ipywidgets._version 7.4.0 -jedi 0.12.1 -json 2.0.9 -jupyter_client 5.2.3 -jupyter_client._version 5.2.3 -jupyter_core 4.4.0 -jupyter_core.version 4.4.0 -kiwisolver 1.0.1 -logging 0.5.1.2 -matplotlib 2.2.3 -matplotlib.backends.backend_agg 2.2.3 -numpy 1.15.0 -numpy.core 1.15.0 -numpy.core.multiarray 3.1 -numpy.lib 1.15.0 -numpy.linalg._umath_linalg b'0.1.5' -numpy.matlib 1.15.0 -pandas 0.23.4 -_libjson 1.33 -parso 0.3.1 -patsy 0.5.0 -patsy.version 0.5.0 -pickleshare 0.7.4 -platform 1.0.8 -prompt_toolkit 1.0.15 -pygments 2.2.0 -pyparsing 2.2.0 -pytz 2018.5 -re 2.2.1 -scipy 1.1.0 -scipy._lib.decorator 4.0.5 -scipy._lib.six 1.2.0 -scipy.fftpack._fftpack b'$Revision: $' -scipy.fftpack.convolve b'$Revision: $' -scipy.integrate._dop b'$Revision: $' -scipy.integrate._ode $Id$ -scipy.integrate._odepack 1.9 -scipy.integrate._quadpack 1.13 -scipy.integrate.lsoda b'$Revision: $' -scipy.integrate.vode b'$Revision: $' -scipy.interpolate._fitpack 1.7 -scipy.interpolate.dfitpack b'$Revision: $' -scipy.linalg 0.4.9 -scipy.linalg._fblas b'$Revision: $' -scipy.linalg._flapack b'$Revision: $' -scipy.linalg._flinalg b'$Revision: $' -scipy.ndimage 2.0 -scipy.optimize._cobyla b'$Revision: $' -scipy.optimize._lbfgsb b'$Revision: $' -scipy.optimize._minpack 1.10 -scipy.optimize._nnls b'$Revision: $' -scipy.optimize._slsqp b'$Revision: $' -scipy.optimize.minpack2 b'$Revision: $' -scipy.signal.spline 0.2 -scipy.sparse.linalg.eigen.arpack._arpack b'$Revision: $' -scipy.sparse.linalg.isolve._iterative b'$Revision: $' -scipy.special.specfun b'$Revision: $' -scipy.stats.mvn b'$Revision: $' -scipy.stats.statlib b'$Revision: $' -seaborn 0.9.0 -seaborn.external.husl 2.1.0 -seaborn.external.six 1.10.0 -six 1.11.0 -statsmodels 0.9.0 -statsmodels.__init__ 0.9.0 -traitlets 4.3.2 -traitlets._version 4.3.2 -urllib.request 3.7 -zlib 1.0 -zmq 17.1.2 -zmq.sugar 17.1.2 -zmq.sugar.version 17.1.2 - #+end_example - *** Loading and inspecting data Let's start by reading data. @@ -156,34 +56,6 @@ data = pd.read_csv("https://app-learninglab.inria.fr/gitlab/moocrr-session1/mooc print(data) #+end_src - #+RESULTS: - #+begin_example - Date Count Temperature Pressure Malfunction - 0 4/12/81 6 66 50 0 - 1 11/12/81 6 70 50 1 - 2 3/22/82 6 69 50 0 - 3 11/11/82 6 68 50 0 - 4 4/04/83 6 67 50 0 - 5 6/18/82 6 72 50 0 - 6 8/30/83 6 73 100 0 - 7 11/28/83 6 70 100 0 - 8 2/03/84 6 57 200 1 - 9 4/06/84 6 63 200 1 - 10 8/30/84 6 70 200 1 - 11 10/05/84 6 78 200 0 - 12 11/08/84 6 67 200 0 - 13 1/24/85 6 53 200 2 - 14 4/12/85 6 67 200 0 - 15 4/29/85 6 75 200 0 - 16 6/17/85 6 70 200 0 - 17 7/2903/85 6 81 200 0 - 18 8/27/85 6 76 200 0 - 19 10/03/85 6 79 200 0 - 20 10/30/85 6 75 200 2 - 21 11/26/85 6 76 200 0 - 22 1/12/86 6 58 200 1 - #+end_example - We know from our previous experience on this data set that filtering data is a really bad idea. We will therefore process it as such. @@ -200,9 +72,6 @@ plt.savefig(matplot_lib_filename) matplot_lib_filename #+end_src - #+RESULTS: - [[file:c:/Users/dondon/AppData/Local/Temp/babel-aNPFF5/figureFG8KBj.png]] - *** Logistic regression Let's assume O-rings independently fail with the same probability @@ -221,26 +90,6 @@ logmodel=sm.GLM(data['Frequency'], data[['Intercept','Temperature']], print(logmodel.summary()) #+end_src - #+RESULTS: - #+begin_example -Generalized Linear Model Regression Results -============================================================================== -Dep. Variable: Frequency No. Observations: 23 -Model: GLM Df Residuals: 21 -Model Family: Binomial Df Model: 1 -Link Function: logit Scale: 1.0000 -Method: IRLS Log-Likelihood: -3.9210 -Date: Mon, 12 Nov 2018 Deviance: 3.0144 -Time: 13:13:31 Pearson chi2: 5.00 -No. Iterations: 6 Covariance Type: nonrobust -=============================================================================== - coef std err z P>|z| [0.025 0.975] -------------------------------------------------------------------------------- -Intercept 5.0850 7.477 0.680 0.496 -9.570 19.740 -Temperature -0.1156 0.115 -1.004 0.316 -0.341 0.110 -=============================================================================== - #+end_example - The maximum likelyhood estimator of the intercept and of Temperature are thus *$\hat{\alpha}$ = 5.0850* and *$\hat{\beta}$ = -0.1156*. This *corresponds* to the values from the article of Dalal /et al./ The standard errors are @@ -262,26 +111,6 @@ logmodel=sm.GLM(data['Frequency'], data[['Intercept','Temperature']], print(logmodel.summary()) #+end_src - #+RESULTS: - #+begin_example -Generalized Linear Model Regression Results -============================================================================== -Dep. Variable: Frequency No. Observations: 23 -Model: GLM Df Residuals: 21 -Model Family: Binomial Df Model: 1 -Link Function: logit Scale: 1.0000 -Method: IRLS Log-Likelihood: -23.526 -Date: Mon, 12 Nov 2018 Deviance: 18.086 -Time: 13:13:39 Pearson chi2: 30.0 -No. Iterations: 6 Covariance Type: nonrobust -=============================================================================== - coef std err z P>|z| [0.025 0.975] -------------------------------------------------------------------------------- -Intercept 5.0850 3.052 1.666 0.096 -0.898 11.068 -Temperature -0.1156 0.047 -2.458 0.014 -0.208 -0.023 -=============================================================================== - #+end_example - Good, now I have recovered the asymptotic standard errors *$s_{\hat{\alpha}}$ = 3.052* and *$s_{\hat{\beta}}$ = 0.047*. The Goodness of fit (Deviance) indicated for this model is *$G^2$ = 18.086* with *21* degrees @@ -307,19 +136,16 @@ plt.savefig(matplot_lib_filename) matplot_lib_filename #+end_src - #+RESULTS: - [[file:c:/Users/dondon/AppData/Local/Temp/babel-aNPFF5/figure51z7PH.png]] - This figure is very similar to the Figure 4 of Dalal /et al./ *I have managed to replicate the Figure 4 of the Dalal /et al./ article.* -** Computing and plotting uncertainty +*** Computing and plotting uncertainty -Following the documentation of -[Seaborn](https://seaborn.pydata.org/generated/seaborn.regplot.html), -I use regplot. + Following the documentation of + [Seaborn](https://seaborn.pydata.org/generated/seaborn.regplot.html), + I use regplot. -#+begin_src python :results file :session :var matplot_lib_filename=(org-babel-temp-file "figure" ".png") :exports both + #+begin_src python :results file :session :var matplot_lib_filename=(org-babel-temp-file "figure" ".png") :exports both sns.set(color_codes=True) plt.xlim(30,90) plt.ylim(0,1) @@ -328,15 +154,12 @@ plt.show() plt.savefig(matplot_lib_filename) matplot_lib_filename -#+end_src - -#+RESULTS: -[[file:c:/Users/dondon/AppData/Local/Temp/babel-aNPFF5/figurebq7jid.png]] - -**I think I have managed to correctly compute and plot the uncertainty - of my prediction.** Although the shaded area seems very similar to - [the one obtained by with - R](https://app-learninglab.inria.fr/gitlab/moocrr-session1/moocrr-reproducibility-study/raw/5c9dbef11b4d7638b7ddf2ea71026e7bf00fcfb0/challenger.pdf), - I can spot a few differences (e.g., the blue point for temperature - 63 is outside)... Could this be a numerical error ? Or a difference - in the statistical method ? It is not clear which one is "right". + #+end_src + + **I think I have managed to correctly compute and plot the uncertainty + of my prediction.** Although the shaded area seems very similar to + [the one obtained by with + R](https://app-learninglab.inria.fr/gitlab/moocrr-session1/moocrr-reproducibility-study/raw/5c9dbef11b4d7638b7ddf2ea71026e7bf00fcfb0/challenger.pdf), + I can spot a few differences (e.g., the blue point for temperature + 63 is outside)... Could this be a numerical error ? Or a difference + in the statistical method ? It is not clear which one is "right". -- 2.18.1