diff --git a/src/R/study_vd.html b/src/R/study_vd.html new file mode 100644 index 0000000000000000000000000000000000000000..cdbe77b6c445c188e303824f6f02dba84230ea35 --- /dev/null +++ b/src/R/study_vd.html @@ -0,0 +1,299 @@ + + + + + + + + + + + + + + +Challenger + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + + + + + + +
+

Environnement de travail

+
library(readr)
+library(dplyr)
+
## 
+## Attaching package: 'dplyr'
+
## The following objects are masked from 'package:stats':
+## 
+##     filter, lag
+
## The following objects are masked from 'package:base':
+## 
+##     intersect, setdiff, setequal, union
+
library(ggplot2)
+sessionInfo()
+
## R version 3.5.1 (2018-07-02)
+## Platform: x86_64-w64-mingw32/x64 (64-bit)
+## Running under: Windows 7 x64 (build 7601) Service Pack 1
+## 
+## Matrix products: default
+## 
+## locale:
+## [1] LC_COLLATE=French_France.1252  LC_CTYPE=French_France.1252   
+## [3] LC_MONETARY=French_France.1252 LC_NUMERIC=C                  
+## [5] LC_TIME=French_France.1252    
+## 
+## attached base packages:
+## [1] stats     graphics  grDevices utils     datasets  methods   base     
+## 
+## other attached packages:
+## [1] ggplot2_3.0.0 dplyr_0.7.6   readr_1.1.1  
+## 
+## loaded via a namespace (and not attached):
+##  [1] Rcpp_0.12.18     knitr_1.20       bindr_0.1.1      magrittr_1.5    
+##  [5] hms_0.4.2        munsell_0.5.0    tidyselect_0.2.4 colorspace_1.3-2
+##  [9] R6_2.2.2         rlang_0.2.2      plyr_1.8.4       stringr_1.3.1   
+## [13] tools_3.5.1      grid_3.5.1       gtable_0.2.0     withr_2.1.2     
+## [17] htmltools_0.3.6  lazyeval_0.2.1   yaml_2.2.0       rprojroot_1.3-2 
+## [21] digest_0.6.16    assertthat_0.2.0 tibble_1.4.2     crayon_1.3.4    
+## [25] bindrcpp_0.2.2   purrr_0.2.5      glue_1.3.0       evaluate_0.11   
+## [29] rmarkdown_1.10   stringi_1.1.7    compiler_3.5.1   pillar_1.3.0    
+## [33] scales_1.0.0     backports_1.1.2  pkgconfig_2.0.2
+
+
+

Data

+
df <- read_csv("data/shuttle.csv")
+
## Parsed with column specification:
+## cols(
+##   Date = col_character(),
+##   Count = col_integer(),
+##   Temperature = col_integer(),
+##   Pressure = col_integer(),
+##   Malfunction = col_integer()
+## )
+
head(df)
+
## # A tibble: 6 x 5
+##   Date     Count Temperature Pressure Malfunction
+##   <chr>    <int>       <int>    <int>       <int>
+## 1 4/12/81      6          66       50           0
+## 2 11/12/81     6          70       50           1
+## 3 3/22/82      6          69       50           0
+## 4 11/11/82     6          68       50           0
+## 5 4/04/83      6          67       50           0
+## 6 6/18/82      6          72       50           0
+
+
+

Analyse primaire

+
summary(df)
+
##      Date               Count    Temperature       Pressure    
+##  Length:23          Min.   :6   Min.   :53.00   Min.   : 50.0  
+##  Class :character   1st Qu.:6   1st Qu.:67.00   1st Qu.: 75.0  
+##  Mode  :character   Median :6   Median :70.00   Median :200.0  
+##                     Mean   :6   Mean   :69.57   Mean   :152.2  
+##                     3rd Qu.:6   3rd Qu.:75.00   3rd Qu.:200.0  
+##                     Max.   :6   Max.   :81.00   Max.   :200.0  
+##   Malfunction    
+##  Min.   :0.0000  
+##  1st Qu.:0.0000  
+##  Median :0.0000  
+##  Mean   :0.3913  
+##  3rd Qu.:1.0000  
+##  Max.   :2.0000
+
ggplot(df) + geom_point(aes(x=Temperature,y=Malfunction/Count))
+

+
ggplot(df) + geom_point(aes(x=Pressure,y=Malfunction/Count))
+

+
+
+

Régression logistique

+
logreg <- glm( Malfunction/Count ~ Temperature, data = df,family = binomial(link="logit"),weights=Count)
+summary(logreg)
+
## 
+## Call:
+## glm(formula = Malfunction/Count ~ Temperature, family = binomial(link = "logit"), 
+##     data = df, weights = Count)
+## 
+## Deviance Residuals: 
+##      Min        1Q    Median        3Q       Max  
+## -0.95227  -0.78299  -0.54117  -0.04379   2.65152  
+## 
+## Coefficients:
+##             Estimate Std. Error z value Pr(>|z|)  
+## (Intercept)  5.08498    3.05247   1.666   0.0957 .
+## Temperature -0.11560    0.04702  -2.458   0.0140 *
+## ---
+## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
+## 
+## (Dispersion parameter for binomial family taken to be 1)
+## 
+##     Null deviance: 24.230  on 22  degrees of freedom
+## Residual deviance: 18.086  on 21  degrees of freedom
+## AIC: 35.647
+## 
+## Number of Fisher Scoring iterations: 5
+
+
+

Prévision

+
temp <- data.frame(Temperature = seq(30,90,0.5))
+pred <- predict(logreg,newdata= temp,type="response")
+ggplot() + 
+  geom_point(aes(x=df$Temperature,y=df$Malfunction/df$Count)) + 
+  geom_line(aes(x=temp$Temperature,y=pred))  +
+  xlab("Temperature") + 
+  ylab("Malfunction/Count") + 
+  theme_bw()
+

## Intervalle de prévision

+
pred_se <- predict(logreg,newdata= temp,type="link",se.fit=TRUE) #type=link est le défaut
+ICinf <- logreg$family$linkinv(pred_se$fit-1.96*pred_se$se.fit)
+ICsup <- logreg$family$linkinv(pred_se$fit+1.96*pred_se$se.fit)
+ggplot() + 
+  geom_point(aes(x=df$Temperature,y=df$Malfunction/df$Count)) + 
+  geom_line(aes(x=temp$Temperature,y=pred)) +
+  geom_line(aes(x=temp$Temperature,y=ICinf),col='red') +
+  geom_line(aes(x=temp$Temperature,y=ICsup),col='red') +
+  xlab("Temperature") + 
+  ylab("Malfunction/Count") + 
+  theme_bw()
+

+
+ + + + +
+ + + + + + + +