From 1bf2ebfcd04f17a1aa27e41227e514333c8429e4 Mon Sep 17 00:00:00 2001 From: Konrad Hinsen Date: Tue, 24 Sep 2019 16:00:00 +0200 Subject: [PATCH] =?UTF-8?q?Supprimer=20les=20fichiers=20obsol=C3=A8tes?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../Snakefile | 48 ------------- .../scripts/annual-incidence-histogram.R | 11 --- .../scripts/annual-incidence.R | 23 ------- .../scripts/incidence-plots.R | 13 ---- .../scripts/preprocess.py | 69 ------------------- 5 files changed, 164 deletions(-) delete mode 100644 module6/ressources/incidence_syndrome_grippal_snakemake/Snakefile delete mode 100644 module6/ressources/incidence_syndrome_grippal_snakemake/scripts/annual-incidence-histogram.R delete mode 100644 module6/ressources/incidence_syndrome_grippal_snakemake/scripts/annual-incidence.R delete mode 100644 module6/ressources/incidence_syndrome_grippal_snakemake/scripts/incidence-plots.R delete mode 100644 module6/ressources/incidence_syndrome_grippal_snakemake/scripts/preprocess.py diff --git a/module6/ressources/incidence_syndrome_grippal_snakemake/Snakefile b/module6/ressources/incidence_syndrome_grippal_snakemake/Snakefile deleted file mode 100644 index c2f3328..0000000 --- a/module6/ressources/incidence_syndrome_grippal_snakemake/Snakefile +++ /dev/null @@ -1,48 +0,0 @@ -rule download: - output: - "data/weekly-incidence.csv" - shell: - "wget -O {output} http://www.sentiweb.fr/datasets/incidence-PAY-3.csv" - -rule preprocess: - input: - "data/weekly-incidence.csv" - output: - data="data/preprocessed-weekly-incidence.csv", - errorlog="data/errors-from-preprocessing.txt" - script: - "scripts/preprocess.py" - -rule plot: - input: - "data/preprocessed-weekly-incidence.csv" - output: - "data/weekly-incidence-plot.png", - "data/weekly-incidence-plot-last-years.png" - script: - "scripts/incidence-plots.R" - -rule annual_incidence: - input: - "data/preprocessed-weekly-incidence.csv" - output: - "data/annual-incidence.csv" - script: - "scripts/annual-incidence.R" - -rule histogram: - input: - "data/annual-incidence.csv" - output: - "data/annual-incidence-histogram.png" - script: - "scripts/annual-incidence-histogram.R" - -rule all: - input: - "data/weekly-incidence.csv", - "data/preprocessed-weekly-incidence.csv", - "data/weekly-incidence-plot.png", - "data/weekly-incidence-plot-last-years.png", - "data/annual-incidence.csv", - "data/annual-incidence-histogram.png" diff --git a/module6/ressources/incidence_syndrome_grippal_snakemake/scripts/annual-incidence-histogram.R b/module6/ressources/incidence_syndrome_grippal_snakemake/scripts/annual-incidence-histogram.R deleted file mode 100644 index 0871acf..0000000 --- a/module6/ressources/incidence_syndrome_grippal_snakemake/scripts/annual-incidence-histogram.R +++ /dev/null @@ -1,11 +0,0 @@ -# Read in the data and convert the dates -data = read.csv(snakemake@input[[1]]) - -# Plot the histogram -png(filename=snakemake@output[[1]]) -hist(data$incidence, - breaks=10, - xlab="Annual incidence", - ylab="Number of observations", - main="") -dev.off() diff --git a/module6/ressources/incidence_syndrome_grippal_snakemake/scripts/annual-incidence.R b/module6/ressources/incidence_syndrome_grippal_snakemake/scripts/annual-incidence.R deleted file mode 100644 index aa2b2c8..0000000 --- a/module6/ressources/incidence_syndrome_grippal_snakemake/scripts/annual-incidence.R +++ /dev/null @@ -1,23 +0,0 @@ -# Read in the data and convert the dates -data = read.csv(snakemake@input[[1]]) -names(data) <- c("date", "incidence") -data$date <- as.Date(data$date) - -# A function that extracts the peak for year N -yearly_peak = function(year) { - start = paste0(year-1,"-08-01") - end = paste0(year,"-08-01") - records = data$date > start & data$date <= end - sum(data$incidence[records]) - } - -# The years for which we have the full peak -years <- 1986:2018 - -# Make a new data frame for the annual incidences -annual_data = data.frame(year = years, - incidence = sapply(years, yearly_peak)) -# write output file -write.csv(annual_data, - file=snakemake@output[[1]], - row.names=FALSE) diff --git a/module6/ressources/incidence_syndrome_grippal_snakemake/scripts/incidence-plots.R b/module6/ressources/incidence_syndrome_grippal_snakemake/scripts/incidence-plots.R deleted file mode 100644 index abdd136..0000000 --- a/module6/ressources/incidence_syndrome_grippal_snakemake/scripts/incidence-plots.R +++ /dev/null @@ -1,13 +0,0 @@ -# Read in the data and convert the dates -data = read.csv(snakemake@input[[1]]) -data$week_starting <- as.Date(data$week_starting) - -# Plot the complete incidence dataset -png(filename=snakemake@output[[1]]) -plot(data, type="l", xlab="Date", ylab="Weekly incidence") -dev.off() - -# Zoom on the last four years -png(filename=snakemake@output[[2]]) -plot(tail(data, 4*52), type="l", xlab="Date", ylab="Weekly incidence") -dev.off() diff --git a/module6/ressources/incidence_syndrome_grippal_snakemake/scripts/preprocess.py b/module6/ressources/incidence_syndrome_grippal_snakemake/scripts/preprocess.py deleted file mode 100644 index c3aef7b..0000000 --- a/module6/ressources/incidence_syndrome_grippal_snakemake/scripts/preprocess.py +++ /dev/null @@ -1,69 +0,0 @@ -# Libraries used by this script: -import datetime # for date conversion -import csv # for writing output to a CSV file - -# Read the CSV file into memory -data = open(snakemake.input[0], 'rb').read() -# Decode the Latin-1 character set, -# remove white space at both ends, -# and split into lines. -lines = data.decode('latin-1') \ - .strip() \ - .split('\n') -# Discard the first line, which contains a comment -data_lines = lines[1:] -# Split each line into columns -table = [line.split(',') for line in data_lines] - -# Remove records with missing data and write -# the removed records to a separate file for inspection. -with open(snakemake.output.errorlog, "w") as errorlog: - valid_table = [] - for row in table: - missing = any([column == '' for column in row]) - if missing: - errorlog.write("Missing data in record\n") - errorlog.write(str(row)) - errorlog.write("\n") - else: - valid_table.append(row) - -# Extract the two relevant columns, "week" and "inc" -week = [row[0] for row in valid_table] -assert week[0] == 'week' -del week[0] -inc = [row[2] for row in valid_table] -assert inc[0] == 'inc' -del inc[0] -data = list(zip(week, inc)) - -# Check for obviously out-of-range values -with open(snakemake.output.errorlog, "a") as errorlog: - for week, inc in data: - if len(week) != 6 or not week.isdigit(): - errorlog.write("Suspect value in column 'week': {week}\n") - if not inc.isdigit(): - errorlog.write("Suspect value in column 'inc': {inc}\n") - -# Convert year/week by date of the corresponding Monday, -# then sort by increasing date -converted_data = \ - [(datetime.datetime.strptime(year_and_week + ":1" , '%G%V:%u').date(), inc) - for year_and_week, inc in data] -converted_data.sort(key = lambda record: record[0]) - -# Check that consecutive dates are seven days apart -with open(snakemake.output.errorlog, "a") as errorlog: - dates = [date for date, _ in converted_data] - for date1, date2 in zip(dates[:-1], dates[1:]): - if date2-date1 != datetime.timedelta(weeks=1): - errorlog.write(f"{date2-date1} between {date1} and {date2}\n") - -# Write data to a CSV file with two columns: -# 1. the date of the Monday of each week, in ISO format -# 2. the incidence estimate for that week -with open(snakemake.output.data, "w") as csvfile: - csv_writer = csv.writer(csvfile) - csv_writer.writerow(["week_starting", "incidence"]) - for row in converted_data: - csv_writer.writerow(row) -- 2.18.1