diff --git a/src/R/study_vd.html b/src/R/study_vd.html new file mode 100644 index 0000000000000000000000000000000000000000..cdbe77b6c445c188e303824f6f02dba84230ea35 --- /dev/null +++ b/src/R/study_vd.html @@ -0,0 +1,299 @@ + + + + +
+ + + + + + + + + +library(readr)
+library(dplyr)
+##
+## Attaching package: 'dplyr'
+## The following objects are masked from 'package:stats':
+##
+## filter, lag
+## The following objects are masked from 'package:base':
+##
+## intersect, setdiff, setequal, union
+library(ggplot2)
+sessionInfo()
+## R version 3.5.1 (2018-07-02)
+## Platform: x86_64-w64-mingw32/x64 (64-bit)
+## Running under: Windows 7 x64 (build 7601) Service Pack 1
+##
+## Matrix products: default
+##
+## locale:
+## [1] LC_COLLATE=French_France.1252 LC_CTYPE=French_France.1252
+## [3] LC_MONETARY=French_France.1252 LC_NUMERIC=C
+## [5] LC_TIME=French_France.1252
+##
+## attached base packages:
+## [1] stats graphics grDevices utils datasets methods base
+##
+## other attached packages:
+## [1] ggplot2_3.0.0 dplyr_0.7.6 readr_1.1.1
+##
+## loaded via a namespace (and not attached):
+## [1] Rcpp_0.12.18 knitr_1.20 bindr_0.1.1 magrittr_1.5
+## [5] hms_0.4.2 munsell_0.5.0 tidyselect_0.2.4 colorspace_1.3-2
+## [9] R6_2.2.2 rlang_0.2.2 plyr_1.8.4 stringr_1.3.1
+## [13] tools_3.5.1 grid_3.5.1 gtable_0.2.0 withr_2.1.2
+## [17] htmltools_0.3.6 lazyeval_0.2.1 yaml_2.2.0 rprojroot_1.3-2
+## [21] digest_0.6.16 assertthat_0.2.0 tibble_1.4.2 crayon_1.3.4
+## [25] bindrcpp_0.2.2 purrr_0.2.5 glue_1.3.0 evaluate_0.11
+## [29] rmarkdown_1.10 stringi_1.1.7 compiler_3.5.1 pillar_1.3.0
+## [33] scales_1.0.0 backports_1.1.2 pkgconfig_2.0.2
+df <- read_csv("data/shuttle.csv")
+## Parsed with column specification:
+## cols(
+## Date = col_character(),
+## Count = col_integer(),
+## Temperature = col_integer(),
+## Pressure = col_integer(),
+## Malfunction = col_integer()
+## )
+head(df)
+## # A tibble: 6 x 5
+## Date Count Temperature Pressure Malfunction
+## <chr> <int> <int> <int> <int>
+## 1 4/12/81 6 66 50 0
+## 2 11/12/81 6 70 50 1
+## 3 3/22/82 6 69 50 0
+## 4 11/11/82 6 68 50 0
+## 5 4/04/83 6 67 50 0
+## 6 6/18/82 6 72 50 0
+summary(df)
+## Date Count Temperature Pressure
+## Length:23 Min. :6 Min. :53.00 Min. : 50.0
+## Class :character 1st Qu.:6 1st Qu.:67.00 1st Qu.: 75.0
+## Mode :character Median :6 Median :70.00 Median :200.0
+## Mean :6 Mean :69.57 Mean :152.2
+## 3rd Qu.:6 3rd Qu.:75.00 3rd Qu.:200.0
+## Max. :6 Max. :81.00 Max. :200.0
+## Malfunction
+## Min. :0.0000
+## 1st Qu.:0.0000
+## Median :0.0000
+## Mean :0.3913
+## 3rd Qu.:1.0000
+## Max. :2.0000
+ggplot(df) + geom_point(aes(x=Temperature,y=Malfunction/Count))
+ggplot(df) + geom_point(aes(x=Pressure,y=Malfunction/Count))
+logreg <- glm( Malfunction/Count ~ Temperature, data = df,family = binomial(link="logit"),weights=Count)
+summary(logreg)
+##
+## Call:
+## glm(formula = Malfunction/Count ~ Temperature, family = binomial(link = "logit"),
+## data = df, weights = Count)
+##
+## Deviance Residuals:
+## Min 1Q Median 3Q Max
+## -0.95227 -0.78299 -0.54117 -0.04379 2.65152
+##
+## Coefficients:
+## Estimate Std. Error z value Pr(>|z|)
+## (Intercept) 5.08498 3.05247 1.666 0.0957 .
+## Temperature -0.11560 0.04702 -2.458 0.0140 *
+## ---
+## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
+##
+## (Dispersion parameter for binomial family taken to be 1)
+##
+## Null deviance: 24.230 on 22 degrees of freedom
+## Residual deviance: 18.086 on 21 degrees of freedom
+## AIC: 35.647
+##
+## Number of Fisher Scoring iterations: 5
+temp <- data.frame(Temperature = seq(30,90,0.5))
+pred <- predict(logreg,newdata= temp,type="response")
+ggplot() +
+ geom_point(aes(x=df$Temperature,y=df$Malfunction/df$Count)) +
+ geom_line(aes(x=temp$Temperature,y=pred)) +
+ xlab("Temperature") +
+ ylab("Malfunction/Count") +
+ theme_bw()
+ ## Intervalle de prévision
pred_se <- predict(logreg,newdata= temp,type="link",se.fit=TRUE) #type=link est le défaut
+ICinf <- logreg$family$linkinv(pred_se$fit-1.96*pred_se$se.fit)
+ICsup <- logreg$family$linkinv(pred_se$fit+1.96*pred_se$se.fit)
+ggplot() +
+ geom_point(aes(x=df$Temperature,y=df$Malfunction/df$Count)) +
+ geom_line(aes(x=temp$Temperature,y=pred)) +
+ geom_line(aes(x=temp$Temperature,y=ICinf),col='red') +
+ geom_line(aes(x=temp$Temperature,y=ICsup),col='red') +
+ xlab("Temperature") +
+ ylab("Malfunction/Count") +
+ theme_bw()
+