def print_imported_modules():
    import sys
    for name, val in sorted(sys.modules.items()):
        if(hasattr(val, '__version__')): 
            print(val.__name__, val.__version__)
#        else:
#            print(val.__name__, "(unknown version)")
def print_sys_info():
    import sys
    import platform
    print(sys.version)
    print(platform.uname())
    print(platform.python_version())

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm
import seaborn as sns

print_sys_info()
print_imported_modules()

3.7.16 (default, Jan 17 2023, 16:06:28) [MSC v.1916 64 bit (AMD64)]
uname_result(system='Windows', node='CLT-PHAC2105-P', release='10', version='10.0.19041', machine='AMD64', processor='Intel64 Family 6 Model 140 Stepping 1, GenuineIntel')
3.7.16
IPython 7.31.1
IPython.core.release 7.31.1
PIL 9.3.0
PIL.Image 9.3.0
PIL._deprecate 9.3.0
PIL._version 9.3.0
_csv 1.0
_ctypes 1.1.0
decimal 1.70
_pydev_bundle.fsnotify 0.1.5
_pydevd_frame_eval.vendored.bytecode 0.13.0.dev
argparse 1.1
backcall 0.2.0
bottleneck 1.3.5
cffi 1.15.1
colorama 0.4.6
csv 1.0
ctypes 1.1.0
cycler 0.10.0
dateutil 2.8.2
debugpy 1.5.1
decimal 1.70
decorator 5.1.1
defusedxml 0.7.1
entrypoints 0.4
http.server 0.6
ipykernel 6.15.2
ipykernel._version 6.15.2
ipython_genutils 0.2.0
ipython_genutils._version 0.2.0
ipywidgets 7.6.5
ipywidgets._version 7.6.5
jedi 0.18.1
json 2.0.9
jupyter_client 7.4.9
jupyter_client._version 7.4.9
jupyter_core 4.11.2
jupyter_core.version 4.11.2
kiwisolver 1.4.4
kiwisolver._cext 1.4.4
logging 0.5.1.2
matplotlib 3.5.3
mkl 2.4.0
numexpr 2.8.4
numpy 1.21.5
numpy.core 1.21.5
numpy.core._multiarray_umath 3.1
numpy.lib 1.21.5
numpy.linalg._umath_linalg 0.1.5
packaging 22.0
packaging.__about__ 22.0
pandas 1.3.5
parso 0.8.3
patsy 0.5.6
patsy.version 0.5.6
pickleshare 0.7.5
pkg_resources._vendor.appdirs 1.4.3
pkg_resources._vendor.more_itertools 8.12.0
pkg_resources._vendor.packaging 21.3
pkg_resources._vendor.packaging.__about__ 21.3
pkg_resources._vendor.pyparsing 3.0.9
pkg_resources._vendor.appdirs 1.4.3
pkg_resources._vendor.more_itertools 8.12.0
pkg_resources._vendor.packaging 21.3
pkg_resources._vendor.pyparsing 3.0.9
platform 1.0.8
prompt_toolkit 3.0.36
psutil 5.9.0
pydevd 2.6.0
pygments 2.11.2
pyparsing 3.0.9
pytz 2022.7
re 2.2.1
scipy 1.7.3
scipy._lib._uarray 0.5.1+49.g4c3f1d7.scipy
scipy._lib.decorator 4.0.5
scipy.integrate._dop b'$Revision: $'
scipy.integrate._ode $Id$
scipy.integrate._odepack  1.9 
scipy.integrate._quadpack  1.13 
scipy.integrate.lsoda b'$Revision: $'
scipy.integrate.vode b'$Revision: $'
scipy.interpolate._fitpack  1.7 
scipy.interpolate.dfitpack b'$Revision: $'
scipy.linalg._fblas b'$Revision: $'
scipy.linalg._flapack b'$Revision: $'
scipy.linalg._flinalg b'$Revision: $'
scipy.linalg._interpolative b'$Revision: $'
scipy.ndimage 2.0
scipy.optimize.__nnls b'$Revision: $'
scipy.optimize._cobyla b'$Revision: $'
scipy.optimize._lbfgsb b'$Revision: $'
scipy.optimize._minpack  1.10 
scipy.optimize._slsqp b'$Revision: $'
scipy.optimize.minpack2 b'$Revision: $'
scipy.signal.spline 0.2
scipy.sparse.linalg.eigen.arpack._arpack b'$Revision: $'
scipy.sparse.linalg.isolve._iterative b'$Revision: $'
scipy.special.specfun b'$Revision: $'
scipy.stats.mvn b'$Revision: $'
scipy.stats.statlib b'$Revision: $'
seaborn 0.12.2
seaborn.external.appdirs 1.4.4
seaborn.external.husl 2.1.0
six 1.16.0
socketserver 0.4
statsmodels 0.13.5
statsmodels.__init__ 0.13.5
statsmodels._version 0.13.5
statsmodels.api 0.13.5
statsmodels.tools.web 0.13.5
traitlets 5.7.1
traitlets._version 5.7.1
urllib.request 3.7
wcwidth 0.2.5
xmlrpc.client 3.7
zlib 1.0
zmq 23.2.0
zmq.sugar 23.2.0
zmq.sugar.version 23.2.0


data = pd.read_csv("https://app-learninglab.inria.fr/moocrr/gitlab/moocrr-session3/moocrr-reproducibility-study/raw/master/data/shuttle.csv") # Modify "blob to raw"
data


%matplotlib inline
pd.set_option('mode.chained_assignment',None) # this removes a useless warning from pandas
import matplotlib.pyplot as plt

data["Frequency"]=data.Malfunction/data.Count
data.plot(x="Temperature",y="Frequency",kind="scatter",ylim=[0,1])
plt.grid(True)


import statsmodels.api as sm

data["Success"]=data.Count-data.Malfunction
data["Intercept"]=1

logmodel=sm.GLM(data['Frequency'],
                data[['Intercept','Temperature']],
                family=sm.families.Binomial(sm.families.links.logit())).fit() # Added "()" after "logit"

logmodel.summary()


logmodel=sm.GLM(data['Frequency'], data[['Intercept','Temperature']], 
                family=sm.families.Binomial(sm.families.links.logit()), # Added "()" after "logit"
                var_weights=data['Count']).fit()

logmodel.summary()


%matplotlib inline
data_pred = pd.DataFrame({'Temperature': np.linspace(start=30, stop=90, num=121), 'Intercept': 1})
data_pred['Frequency'] = logmodel.predict(data_pred)
data_pred.plot(x="Temperature",y="Frequency",kind="line",ylim=[0,1])
plt.scatter(x=data["Temperature"],y=data["Frequency"])
plt.grid(True)


sns.set(color_codes=True)
plt.xlim(30,90)
plt.ylim(0,1)
sns.regplot(x='Temperature', y='Frequency', data=data, logistic=True)
plt.show()

	Date	Count	Temperature	Pressure	Malfunction
0	4/12/81	6	66	50	0
1	11/12/81	6	70	50	1
2	3/22/82	6	69	50	0
3	11/11/82	6	68	50	0
4	4/04/83	6	67	50	0
5	6/18/82	6	72	50	0
6	8/30/83	6	73	100	0
7	11/28/83	6	70	100	0
8	2/03/84	6	57	200	1
9	4/06/84	6	63	200	1
10	8/30/84	6	70	200	1
11	10/05/84	6	78	200	0
12	11/08/84	6	67	200	0
13	1/24/85	6	53	200	2
14	4/12/85	6	67	200	0
15	4/29/85	6	75	200	0
16	6/17/85	6	70	200	0
17	7/2903/85	6	81	200	0
18	8/27/85	6	76	200	0
19	10/03/85	6	79	200	0
20	10/30/85	6	75	200	2
21	11/26/85	6	76	200	0
22	1/12/86	6	58	200	1

Dep. Variable:	Frequency	No. Observations:	23
Model:	GLM	Df Residuals:	21
Model Family:	Binomial	Df Model:	1
Link Function:	logit	Scale:	1.0000
Method:	IRLS	Log-Likelihood:	-3.9210
Date:	Thu, 22 Aug 2024	Deviance:	3.0144
Time:	09:30:41	Pearson chi2:	5.00
No. Iterations:	6	Pseudo R-squ. (CS):	0.04355
Covariance Type:	nonrobust

	coef	std err	z	P>\|z\|	[0.025	0.975]
Intercept	5.0850	7.477	0.680	0.496	-9.570	19.740
Temperature	-0.1156	0.115	-1.004	0.316	-0.341	0.110

Dep. Variable:	Frequency	No. Observations:	23
Model:	GLM	Df Residuals:	21
Model Family:	Binomial	Df Model:	1
Link Function:	logit	Scale:	1.0000
Method:	IRLS	Log-Likelihood:	-23.526
Date:	Thu, 22 Aug 2024	Deviance:	18.086
Time:	09:30:41	Pearson chi2:	30.0
No. Iterations:	6	Pseudo R-squ. (CS):	0.2344
Covariance Type:	nonrobust

	coef	std err	z	P>\|z\|	[0.025	0.975]
Intercept	5.0850	3.052	1.666	0.096	-0.898	11.068
Temperature	-0.1156	0.047	-2.458	0.014	-0.208	-0.023

Risk Analysis of the Space Shuttle: Pre-Challenger Prediction of Failure¶

Technical information on the computer on which the analysis is run¶

Loading and inspecting data¶

Logistic regression¶

Predicting failure probability¶

Computing and plotting uncertainty¶