library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
sessionInfo()
## R version 3.5.1 (2018-07-02)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 7 x64 (build 7601) Service Pack 1
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=French_France.1252 LC_CTYPE=French_France.1252
## [3] LC_MONETARY=French_France.1252 LC_NUMERIC=C
## [5] LC_TIME=French_France.1252
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] ggplot2_3.0.0 dplyr_0.7.6 readr_1.1.1
##
## loaded via a namespace (and not attached):
## [1] Rcpp_0.12.18 knitr_1.20 bindr_0.1.1 magrittr_1.5
## [5] hms_0.4.2 munsell_0.5.0 tidyselect_0.2.4 colorspace_1.3-2
## [9] R6_2.2.2 rlang_0.2.2 plyr_1.8.4 stringr_1.3.1
## [13] tools_3.5.1 grid_3.5.1 gtable_0.2.0 withr_2.1.2
## [17] htmltools_0.3.6 lazyeval_0.2.1 yaml_2.2.0 rprojroot_1.3-2
## [21] digest_0.6.16 assertthat_0.2.0 tibble_1.4.2 crayon_1.3.4
## [25] bindrcpp_0.2.2 purrr_0.2.5 glue_1.3.0 evaluate_0.11
## [29] rmarkdown_1.10 stringi_1.1.7 compiler_3.5.1 pillar_1.3.0
## [33] scales_1.0.0 backports_1.1.2 pkgconfig_2.0.2
df <- read_csv("data/shuttle.csv")
## Parsed with column specification:
## cols(
## Date = col_character(),
## Count = col_integer(),
## Temperature = col_integer(),
## Pressure = col_integer(),
## Malfunction = col_integer()
## )
head(df)
## # A tibble: 6 x 5
## Date Count Temperature Pressure Malfunction
## <chr> <int> <int> <int> <int>
## 1 4/12/81 6 66 50 0
## 2 11/12/81 6 70 50 1
## 3 3/22/82 6 69 50 0
## 4 11/11/82 6 68 50 0
## 5 4/04/83 6 67 50 0
## 6 6/18/82 6 72 50 0
summary(df)
## Date Count Temperature Pressure
## Length:23 Min. :6 Min. :53.00 Min. : 50.0
## Class :character 1st Qu.:6 1st Qu.:67.00 1st Qu.: 75.0
## Mode :character Median :6 Median :70.00 Median :200.0
## Mean :6 Mean :69.57 Mean :152.2
## 3rd Qu.:6 3rd Qu.:75.00 3rd Qu.:200.0
## Max. :6 Max. :81.00 Max. :200.0
## Malfunction
## Min. :0.0000
## 1st Qu.:0.0000
## Median :0.0000
## Mean :0.3913
## 3rd Qu.:1.0000
## Max. :2.0000
ggplot(df) + geom_point(aes(x=Temperature,y=Malfunction/Count))
ggplot(df) + geom_point(aes(x=Pressure,y=Malfunction/Count))
logreg <- glm( Malfunction/Count ~ Temperature, data = df,family = binomial(link="logit"),weights=Count)
summary(logreg)
##
## Call:
## glm(formula = Malfunction/Count ~ Temperature, family = binomial(link = "logit"),
## data = df, weights = Count)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.95227 -0.78299 -0.54117 -0.04379 2.65152
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 5.08498 3.05247 1.666 0.0957 .
## Temperature -0.11560 0.04702 -2.458 0.0140 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 24.230 on 22 degrees of freedom
## Residual deviance: 18.086 on 21 degrees of freedom
## AIC: 35.647
##
## Number of Fisher Scoring iterations: 5
temp <- data.frame(Temperature = seq(30,90,0.5))
pred <- predict(logreg,newdata= temp,type="response")
ggplot() +
geom_point(aes(x=df$Temperature,y=df$Malfunction/df$Count)) +
geom_line(aes(x=temp$Temperature,y=pred)) +
xlab("Temperature") +
ylab("Malfunction/Count") +
theme_bw()
## Intervalle de prévision
pred_se <- predict(logreg,newdata= temp,type="link",se.fit=TRUE) #type=link est le défaut
ICinf <- logreg$family$linkinv(pred_se$fit-1.96*pred_se$se.fit)
ICsup <- logreg$family$linkinv(pred_se$fit+1.96*pred_se$se.fit)
ggplot() +
geom_point(aes(x=df$Temperature,y=df$Malfunction/df$Count)) +
geom_line(aes(x=temp$Temperature,y=pred)) +
geom_line(aes(x=temp$Temperature,y=ICinf),col='red') +
geom_line(aes(x=temp$Temperature,y=ICsup),col='red') +
xlab("Temperature") +
ylab("Malfunction/Count") +
theme_bw()