--- title: "Analyse de l'incidence du syndrôme grippal" author: "John" date: "2023-05-24" output: html_document --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) ``` ```{r} url_data="https://www.sentiweb.fr/datasets/incidence-PAY-3.csv" data=read.csv(url_data,skip=1) head(data) ``` On va retirer les datas post-2020 pour éviter la perturbation covid ```{r} data<-data[data$week<202001,] head(data) ``` On enlève la/les lignes vides ```{r} lignes_na<-apply(data,1,function(x) any(is.na(x))) data_na<-data[lignes_na,] data_nona<-data[-lignes_na,] nrow(data_nona) nrow(data) ``` On check le format des colonnes ```{r} head(data_nona) ``` Loading parsedate, configuring function ```{r} library(parsedate) date=199501 ws=as.character(date) iso= paste0(substring(ws,1,4),"-W",substring(ws,5,6)) parse_iso_8601(iso) convert_week= function(date){ ws=as.character(date) iso= paste0(substring(ws,1,4),"-W",substring(ws,5,6)) output=as.Date(parse_iso_8601(iso)) return(output) } ``` ```{r} data$date=convert_week(data$week) head(data) class(data$date) ``` triage des données dans l'ordere chrono ```{r} data<-data[order(data$date),] head(data) ``` test pour la chronologie ```{r} all(diff(data$date)==7) ``` représentation visuelle ```{r} with(data,plot(date,inc,type="l")) ``` ```{r} with(tail(data,250),plot(date,inc,type="l")) ``` ```{r} annee=1990 debut=paste0(annee-1,"-08-01") fin=paste0(annee,"-08-01") semaines=data$date > debut & data$date <= fin sum(data$inc[semaines],na.rm=TRUE) ``` généralisation ```{r} pic_annuel=function(annee) { debut=paste0(annee-1,"-08-01") fin=paste0(annee,"-08-01") semaines=data$date > debut & data$date <= fin output=sum(data$inc[semaines],na.rm=TRUE) return(output) } ``` rvm 1985 ```{r} annees=1986:2017 icd_annuelle = data.frame(annee=annees,icd=sapply(annees,pic_annuel)) head(icd_annuelle) ``` représentation graphique ```{r} plot(icd_annuelle,type="p") ``` max ```{r} head(icd_annuelle[order(-icd_annuelle$icd),]) ``` représentation graphique ```{r} hist(icd_annuelle$icd,breaks=10) ```