Supprimer les fichiers obsolètes

1bf2ebfc · Konrad Hinsen · b8a7cbf2 · b8a7cbf2 · b8a7cbf2 · b8a7cbf2
Commit 1bf2ebfc authored Sep 24, 2019 by Konrad Hinsen
5 changed files
--- a/module6/ressources/incidence_syndrome_grippal_snakemake/Snakefile
+++ b/module6/ressources/incidence_syndrome_grippal_snakemake/Snakefile
-rule download:
-     output:
-        "data/weekly-incidence.csv"
-     shell:
-        "wget -O {output} http://www.sentiweb.fr/datasets/incidence-PAY-3.csv"
-
-rule preprocess:
-     input:
-        "data/weekly-incidence.csv"
-     output:
-        data="data/preprocessed-weekly-incidence.csv",
-        errorlog="data/errors-from-preprocessing.txt"
-     script:
-        "scripts/preprocess.py"
-
-rule plot:
-     input:
-        "data/preprocessed-weekly-incidence.csv"
-     output:
-        "data/weekly-incidence-plot.png",
-        "data/weekly-incidence-plot-last-years.png"
-     script:
-        "scripts/incidence-plots.R"
-
-rule annual_incidence:
-     input:
-        "data/preprocessed-weekly-incidence.csv"
-     output:
-        "data/annual-incidence.csv"
-     script:
-        "scripts/annual-incidence.R"
-
-rule histogram:
-     input:
-        "data/annual-incidence.csv"
-     output:
-        "data/annual-incidence-histogram.png"
-     script:
-        "scripts/annual-incidence-histogram.R"
-
-rule all:
-     input:
-        "data/weekly-incidence.csv",
-        "data/preprocessed-weekly-incidence.csv",
-        "data/weekly-incidence-plot.png",
-        "data/weekly-incidence-plot-last-years.png",
-        "data/annual-incidence.csv",
-        "data/annual-incidence-histogram.png"
--- a/module6/ressources/incidence_syndrome_grippal_snakemake/scripts/annual-incidence-histogram.R
+++ b/module6/ressources/incidence_syndrome_grippal_snakemake/scripts/annual-incidence-histogram.R
-# Read in the data and convert the dates
-data = read.csv(snakemake@input[[1]])
-
-# Plot the histogram
-png(filename=snakemake@output[[1]])
-hist(data$incidence,
-     breaks=10,
-     xlab="Annual incidence",
-     ylab="Number of observations",
-     main="")
-dev.off()
--- a/module6/ressources/incidence_syndrome_grippal_snakemake/scripts/annual-incidence.R
+++ b/module6/ressources/incidence_syndrome_grippal_snakemake/scripts/annual-incidence.R
-# Read in the data and convert the dates
-data = read.csv(snakemake@input[[1]])
-names(data) <- c("date", "incidence")
-data$date <- as.Date(data$date)
-
-# A function that extracts the peak for year N
-yearly_peak = function(year) {
-      start = paste0(year-1,"-08-01")
-      end = paste0(year,"-08-01")
-      records = data$date > start & data$date <= end
-      sum(data$incidence[records])
-      }
-
-# The years for which we have the full peak
-years <- 1986:2018
-
-# Make a new data frame for the annual incidences
-annual_data = data.frame(year = years,
-                         incidence = sapply(years, yearly_peak))
-# write output file
-write.csv(annual_data,
-          file=snakemake@output[[1]],
-          row.names=FALSE)
--- a/module6/ressources/incidence_syndrome_grippal_snakemake/scripts/incidence-plots.R
+++ b/module6/ressources/incidence_syndrome_grippal_snakemake/scripts/incidence-plots.R
-# Read in the data and convert the dates
-data = read.csv(snakemake@input[[1]])
-data$week_starting <- as.Date(data$week_starting)
-
-# Plot the complete incidence dataset
-png(filename=snakemake@output[[1]])
-plot(data, type="l", xlab="Date", ylab="Weekly incidence")
-dev.off()
-
-# Zoom on the last four years
-png(filename=snakemake@output[[2]])
-plot(tail(data, 4*52), type="l", xlab="Date", ylab="Weekly incidence")
-dev.off()
--- a/module6/ressources/incidence_syndrome_grippal_snakemake/scripts/preprocess.py
+++ b/module6/ressources/incidence_syndrome_grippal_snakemake/scripts/preprocess.py
-# Libraries used by this script:
-import datetime  # for date conversion
-import csv       # for writing output to a CSV file
-
-# Read the CSV file into memory
-data = open(snakemake.input[0], 'rb').read()
-# Decode the Latin-1 character set,
-# remove white space at both ends,
-# and split into lines.
-lines = data.decode('latin-1') \
-         .strip() \
-         .split('\n')
-# Discard the first line, which contains a comment
-data_lines = lines[1:]
-# Split each line into columns
-table = [line.split(',') for line in data_lines]
-
-# Remove records with missing data and write
-# the removed records to a separate file for inspection.
-with open(snakemake.output.errorlog, "w") as errorlog:
-    valid_table = []
-    for row in table:
-        missing = any([column == '' for column in row])
-        if missing:
-            errorlog.write("Missing data in record\n")
-            errorlog.write(str(row))
-            errorlog.write("\n")
-        else:
-            valid_table.append(row)
-
-# Extract the two relevant columns, "week" and "inc"
-week = [row[0] for row in valid_table]
-assert week[0] == 'week'
-del week[0]
-inc = [row[2] for row in valid_table]
-assert inc[0] == 'inc'
-del inc[0]
-data = list(zip(week, inc))
-
-# Check for obviously out-of-range values
-with open(snakemake.output.errorlog, "a") as errorlog:
-    for week, inc in data:
-        if len(week) != 6 or not week.isdigit():
-            errorlog.write("Suspect value in column 'week': {week}\n")
-        if not inc.isdigit():
-            errorlog.write("Suspect value in column 'inc': {inc}\n")
-
-# Convert year/week by date of the corresponding Monday,
-# then sort by increasing date
-converted_data = \
-    [(datetime.datetime.strptime(year_and_week + ":1" , '%G%V:%u').date(), inc)
-     for year_and_week, inc in data]
-converted_data.sort(key = lambda record: record[0])
-
-# Check that consecutive dates are seven days apart
-with open(snakemake.output.errorlog, "a") as errorlog:
-    dates = [date for date, _ in converted_data]
-    for date1, date2 in zip(dates[:-1], dates[1:]):
-        if date2-date1 != datetime.timedelta(weeks=1):
-            errorlog.write(f"{date2-date1} between {date1} and {date2}\n")
-
-# Write data to a CSV file with two columns:
-#  1. the date of the Monday of each week, in ISO format
-#  2. the incidence estimate for that week
-with open(snakemake.output.data, "w") as csvfile:
-    csv_writer = csv.writer(csvfile)
-    csv_writer.writerow(["week_starting", "incidence"])
-    for row in converted_data:
-        csv_writer.writerow(row)