diff --git a/module2/exo1/cosxsx.png b/module2/exo1/cosxsx.png index cddaddf5bc376e73d8f3698c2b32903e6c3e4d3d..42736ffdd8056ac519dd511816ce51d7a933e780 100644 Binary files a/module2/exo1/cosxsx.png and b/module2/exo1/cosxsx.png differ diff --git a/module2/exo5/exo5_python_en.org b/module2/exo5/exo5_python_en.org index 39a203bed10eed51f00f5222dee62329bf13ec72..9ff283dcfe9348c7b3911968be250e13fd43f258 100644 --- a/module2/exo5/exo5_python_en.org +++ b/module2/exo5/exo5_python_en.org @@ -33,7 +33,7 @@ Challenger. * Loading the data We start by loading this data: -#+begin_src python :results value :session *python* :exports both +#+begin_src python :results value :session :exports both import numpy as np import pandas as pd data = pd.read_csv("shuttle.csv") @@ -42,30 +42,30 @@ data #+RESULTS: #+begin_example - Date Count Temperature Pressure Malfunction -0 4/12/81 6 66 50 0 -1 11/12/81 6 70 50 1 -2 3/22/82 6 69 50 0 -3 11/11/82 6 68 50 0 -4 4/04/83 6 67 50 0 -5 6/18/82 6 72 50 0 -6 8/30/83 6 73 100 0 -7 11/28/83 6 70 100 0 -8 2/03/84 6 57 200 1 -9 4/06/84 6 63 200 1 -10 8/30/84 6 70 200 1 -11 10/05/84 6 78 200 0 -12 11/08/84 6 67 200 0 -13 1/24/85 6 53 200 2 -14 4/12/85 6 67 200 0 -15 4/29/85 6 75 200 0 -16 6/17/85 6 70 200 0 -17 7/29/85 6 81 200 0 -18 8/27/85 6 76 200 0 -19 10/03/85 6 79 200 0 -20 10/30/85 6 75 200 2 -21 11/26/85 6 76 200 0 -22 1/12/86 6 58 200 1 + Date Count Temperature Pressure Malfunction +0 4/12/81 6 66 50 0 +1 11/12/81 6 70 50 1 +2 3/22/82 6 69 50 0 +3 11/11/82 6 68 50 0 +4 4/04/83 6 67 50 0 +5 6/18/82 6 72 50 0 +6 8/30/83 6 73 100 0 +7 11/28/83 6 70 100 0 +8 2/03/84 6 57 200 1 +9 4/06/84 6 63 200 1 +10 8/30/84 6 70 200 1 +11 10/05/84 6 78 200 0 +12 11/08/84 6 67 200 0 +13 1/24/85 6 53 200 2 +14 4/12/85 6 67 200 0 +15 4/29/85 6 75 200 0 +16 6/17/85 6 70 200 0 +17 7/29/85 6 81 200 0 +18 8/27/85 6 76 200 0 +19 10/03/85 6 79 200 0 +20 10/30/85 6 75 200 2 +21 11/26/85 6 76 200 0 +22 1/12/86 6 58 200 1 #+end_example The data set shows us the date of each test, the number of O-rings @@ -78,8 +78,13 @@ Flights without incidents do not provide any information on the influence of temperature or pressure on malfunction. We thus focus on the experiments in which at least one O-ring was defective. -#+begin_src python :results value :session *python* :exports both -data = data[data.Malfunction>0] +#+begin_quote +This is suspect. What if launches without defects were predominately +at higher temperatures? +#+end_quote + +#+begin_src python :results value :session :exports both +data = data[data.Malfunction > 0] data #+end_src @@ -97,8 +102,12 @@ We have a high temperature variability but the pressure is almost always 200, which should simplify the analysis. +#+begin_quote +"Almost always" is alarming. +#+end_quote + How does the frequency of failure vary with temperature? -#+begin_src python :results output file :var matplot_lib_filename="freq_temp_python.png" :exports both :session *python* +#+begin_src python :results output file :var matplot_lib_filename="freq_temp_python.png" :exports both :session import matplotlib.pyplot as plt plt.clf() @@ -120,37 +129,43 @@ estimate the impact of temperature $t$ on the probability of O-ring malfunction. Suppose that each of the six O-rings is damaged with the same probability and independently of the others and that this probability -depends only on the temperature. If $p(t)$ is this probability, the +depends only on the temperature. +#+begin_quote +OK, yes, let's suppose that. +#+end_quote +If $p(t)$ is this probability, the number $D$ of malfunctioning O-rings during a flight at temperature $t$ follows a binomial law with parameters $n=6$ and $p=p(t)$. To link $p(t)$ to $t$, we will therefore perform a logistic regression. -#+begin_src python :results value :session *python* :exports both +#+begin_src python :results value :session :exports both import statsmodels.api as sm data["Success"]=data.Count-data.Malfunction data["Intercept"]=1 -# logit_model=sm.Logit(data["Frequency"],data[["Intercept","Temperature"]]).fit() -logmodel=sm.GLM(data['Frequency'], data[['Intercept','Temperature']], family=sm.families.Binomial(sm.families.links.logit)).fit() +# logit_model=sm.Logit(data["Frequency"],data[["Intercept","Temperature"]]).fit() +link = sm.families.links.Logit() +logmodel=sm.GLM(data['Frequency'], data[['Intercept','Temperature']], family=sm.families.Binomial(link)).fit() logmodel.summary() #+end_src #+RESULTS: #+begin_example - Generalized Linear Model Regression Results -============================================================================== -Dep. Variable: Frequency No. Observations: 7 -Model: GLM Df Residuals: 5 -Model Family: Binomial Df Model: 1 -Link Function: logit Scale: 1.0 -Method: IRLS Log-Likelihood: -3.6370 -Date: Fri, 20 Jul 2018 Deviance: 3.3763 -Time: 16:56:08 Pearson chi2: 0.236 -No. Iterations: 5 + Generalized Linear Model Regression Results +=============================================================================== +Dep. Variable: Frequency No. Observations: 7 +Model: GLM Df Residuals: 5 +Model Family: Binomial Df Model: 1 +Link Function: Logit Scale: 1.0000 +Method: IRLS Log-Likelihood: -2.5250 +Date: jeu., 11 avril 2024 Deviance: 0.22231 +Time: 15:05:37 Pearson chi2: 0.236 +No. Iterations: 4 Pseudo R-squ. (CS): 1.926e-05 +Covariance Type: nonrobust =============================================================================== coef std err z P>|z| [0.025 0.975] ------------------------------------------------------------------------------- @@ -163,13 +178,23 @@ The most likely estimator of the temperature parameter is 0.0014 and the standard error of this estimator is 0.122, in other words we cannot distinguish any particular impact and we must take our estimates with caution. +#+begin_quote +Indeed... and look at that /p value/ (0.991) which more-or-less says, +for the subset of data, that temperature likely has /no effect/ on +likelihood of malfunction. +#+end_quote * Estimation of the probability of O-ring malfunction -The expected temperature on the take-off day is 31°F. Let's try to +The expected temperature on the take-off day is 31°F. +#+begin_quote +A temperature at/around which we have /no data/. Extrapolating from +higher temperatures — bad idea. +#+end_quote +Let's try to estimate the probability of O-ring malfunction at this temperature from the model we just built: -#+begin_src python :results output file :var matplot_lib_filename="proba_estimate_python.png" :exports both :session *python* +#+begin_src python :results output file :var matplot_lib_filename="proba_estimate_python.png" :exports both :session import matplotlib.pyplot as plt data_pred = pd.DataFrame({'Temperature': np.linspace(start=30, stop=90, num=121), 'Intercept': 1}) @@ -188,9 +213,14 @@ print(matplot_lib_filename) As expected from the initial data, the temperature has no significant impact on the probability of failure of the O-rings. It will be about 0.2, as in the tests -where we had a failure of at least one joint. Let's get back to the initial dataset to estimate the probability of failure: - -#+begin_src python :results output :session *python* :exports both +where we had a failure of at least one joint. +#+begin_quote +Opting not to exclude the entries where no malfunction occurred +reveals a strikingly different picture. +#+end_quote +Let's get back to the initial dataset to estimate the probability of failure: + +#+begin_src python :results output :session :exports both data = pd.read_csv("shuttle.csv") print(np.sum(data.Malfunction)/np.sum(data.Count)) #+end_src @@ -198,11 +228,17 @@ print(np.sum(data.Malfunction)/np.sum(data.Count)) #+RESULTS: : 0.06521739130434782 -This probability is thus about $p=0.065$. Knowing that there is +This probability is thus about $p=0.065$. +#+begin_quote +This has an air of desperation about it. So now, we're just taking the +proportion of failures to total O-rings... across just the flights +where malfunctions were recorded? +#+end_quote +Knowing that there is a primary and a secondary O-ring on each of the three parts of the launcher, the probability of failure of both joints of a launcher is $p^2 \approx 0.00425$. The probability of failure of any one of the -launchers is $1-(1-p^2)^3 \approx 1.2%$. That would really be +launchers is $1-(1-p^2)^3 \approx 1.2%$. That would really be bad luck.... Everything is under control, so the takeoff can happen tomorrow as planned. diff --git a/module2/exo5/freq_temp_python.png b/module2/exo5/freq_temp_python.png index 93cb9e626441d23f6dff59ed252d7b14eb37abdb..e5d55d1abf0eb0e665dd23b7c19ca916310cf190 100644 Binary files a/module2/exo5/freq_temp_python.png and b/module2/exo5/freq_temp_python.png differ diff --git a/module2/exo5/proba_estimate_python.png b/module2/exo5/proba_estimate_python.png index 77fc4b275dd8815b1ab91cd3b67b1beb93e00748..7124bb498cb0f155dad9ede93189cc75c8308042 100644 Binary files a/module2/exo5/proba_estimate_python.png and b/module2/exo5/proba_estimate_python.png differ