diff --git a/module4/challenger.org b/module4/challenger.org index 58de4b1ba5d3ff014353562b0b9f63fc6c38fd57..0dd460fd4afd147ff776614195f7cf0000f374e4 100644 --- a/module4/challenger.org +++ b/module4/challenger.org @@ -181,7 +181,7 @@ data We know from our previous experience on this data set that filtering data is a really bad idea. We will therefore process it as such. -#+BEGIN_SRC python :session :exports both :results output +#+BEGIN_SRC python :session :exports both :results output #%matplotlib inline pd.set_option('mode.chained_assignment',None) # this removes a useless warning from pandas import matplotlib.pyplot as plt @@ -193,6 +193,8 @@ plt.savefig("fig1.png") #+END_SRC #+RESULTS: +: /home/eliox/miniconda3/envs/mooc-rr-emacs/lib/python3.8/site-packages/pandas/plotting/_matplotlib/core.py:320: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`). +: fig = self.plt.figure(figsize=self.figsize) : 'c' argument looks like a single numeric RGB or RGBA sequence, which should be avoided as value-mapping will have precedence in case its length matches with 'x' & 'y'. Please use a 2-D array with a single row if you really want to specify the same RGB or RGBA value for all points. [[./fig1.png]] @@ -206,7 +208,7 @@ Let's assume O-rings independently fail with the same probability which solely depends on temperature. A logistic regression should allow us to estimate the influence of temperature. -#+BEGIN_SRC python :session :exports both :results value +#+BEGIN_SRC python :session :exports both :results output import statsmodels.api as sm data["Success"]=data.Count-data.Malfunction @@ -215,11 +217,14 @@ data["Intercept"]=1 logmodel=sm.GLM(data['Frequency'], data[['Intercept','Temperature']], family=sm.families.Binomial(sm.families.links.logit)).fit() -logmodel.summary() +print(logmodel.summary()) #+END_SRC #+RESULTS: #+begin_example +/tmp/babel-tMc2Ef/python-Skn7ZF:7: DeprecationWarning: Calling Family(..) with a link class as argument is deprecated. +Use an instance of a link class instead. + family=sm.families.Binomial(sm.families.links.logit)).fit() Generalized Linear Model Regression Results ============================================================================== Dep. Variable: Frequency No. Observations: 23 @@ -227,8 +232,8 @@ Model: GLM Df Residuals: 21 Model Family: Binomial Df Model: 1 Link Function: logit Scale: 1.0000 Method: IRLS Log-Likelihood: -3.9210 -Date: mar., 05 mai 2020 Deviance: 3.0144 -Time: 22:25:31 Pearson chi2: 5.00 +Date: mer., 06 mai 2020 Deviance: 3.0144 +Time: 18:03:21 Pearson chi2: 5.00 No. Iterations: 6 Covariance Type: nonrobust =============================================================================== @@ -269,8 +274,8 @@ Model: GLM Df Residuals: 21 Model Family: Binomial Df Model: 1 Link Function: logit Scale: 1.0000 Method: IRLS Log-Likelihood: -23.526 -Date: mar., 05 mai 2020 Deviance: 18.086 -Time: 22:25:31 Pearson chi2: 30.0 +Date: mer., 06 mai 2020 Deviance: 18.086 +Time: 18:03:21 Pearson chi2: 30.0 No. Iterations: 6 Covariance Type: nonrobust =============================================================================== @@ -300,14 +305,31 @@ estimate the failure probability for such temperature using our model.: #+BEGIN_SRC python :session :results output :exports both #%matplotlib inline data_pred = pd.DataFrame({'Temperature': np.linspace(start=30, stop=90, num=121), 'Intercept': 1}) +print(logmodel.predict(data_pred)) +""" data_pred['Frequency'] = logmodel.predict(data_pred) -data_pred.plot(x="Temperature",y="Frequency",kind="line",ylim=[0,1]) +data_pred.plot(x="Temperature",y="Frequency",kind="scatter",ylim=[0,1]) plt.scatter(x=data["Temperature"],y=data["Frequency"]) plt.grid(True) plt.savefig("fig2.png") +""" #+END_SRC #+RESULTS: +#+begin_example +0 1.0 +1 1.0 +2 1.0 +3 1.0 +4 1.0 + ... +116 1.0 +117 1.0 +118 1.0 +119 1.0 +120 1.0 +Length: 121, dtype: float64 +#+end_example [[./fig2.png]] @@ -325,6 +347,7 @@ I use regplot. #+BEGIN_SRC python :session :results output :exports both sns.set(color_codes=True) +plt.figure(figsize=(5,3)) plt.xlim(30,90) plt.ylim(0,1) sns.regplot(x='Temperature', y='Frequency', data=data, logistic=True) @@ -332,6 +355,8 @@ plt.savefig("fig3.png") #+END_SRC #+RESULTS: +: /tmp/babel-tMc2Ef/python-j6HG2E:2: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`). +: plt.figure(figsize=(5,3)) [[./fig3.png]]