Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
M
mooc-rr-ressources
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Commits
Issue Boards
Open sidebar
9b01737421a268a54ae68902c9bdbf26
mooc-rr-ressources
Commits
57d0e5ff
You need to sign in or sign up before continuing.
Commit
57d0e5ff
authored
Aug 28, 2019
by
Konrad Hinsen
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Debut d'un tutoriel pour snakemake
parent
5b4af3a7
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
172 additions
and
2 deletions
+172
-2
.gitignore
.gitignore
+3
-1
Makefile
Makefile
+1
-1
Snakefile
...ressources/incidence_syndrome_grippal_snakemake/Snakefile
+39
-0
annual-incidence-histogram.R
...me_grippal_snakemake/scripts/annual-incidence-histogram.R
+11
-0
annual-incidence.R
...nce_syndrome_grippal_snakemake/scripts/annual-incidence.R
+23
-0
incidence-plots.R
...ence_syndrome_grippal_snakemake/scripts/incidence-plots.R
+14
-0
preprocess.py
...ncidence_syndrome_grippal_snakemake/scripts/preprocess.py
+68
-0
snakemake_tutorial_fr.org
module6/ressources/snakemake_tutorial_fr.org
+13
-0
No files found.
.gitignore
View file @
57d0e5ff
...
...
@@ -9,4 +9,6 @@
*.tex
_minted*
svg-inkscape*
*-svg.pdf
\ No newline at end of file
*-svg.pdf
.Rhistory
.snakemake
Makefile
View file @
57d0e5ff
ressources-md
:
for
i
in
module1/ressources module2/ressources module2/slides module3/ressources
\
module4/ressources module5/ressources
;
do
\
module4/ressources module5/ressources
module6/ressources
;
do
\
make
-C
$$
i ressources-md
;
\
done
module6/ressources/incidence_syndrome_grippal_snakemake/Snakefile
0 → 100644
View file @
57d0e5ff
rule download:
output:
"data/weekly-incidence.csv"
shell:
"wget -O {output} http://www.sentiweb.fr/datasets/incidence-PAY-3.csv"
rule preprocess:
input:
"data/weekly-incidence.csv"
output:
data="data/preprocessed-weekly-incidence.csv",
errorlog="data/errors-from-preprocessing.csv"
script:
"scripts/preprocess.py"
rule plot:
input:
"data/preprocessed-weekly-incidence.csv"
output:
"data/weekly-incidence-plot.png",
"data/weekly-incidence-plot-last-years.png"
script:
"scripts/incidence-plots.R"
rule annual_incidence:
input:
"data/preprocessed-weekly-incidence.csv"
output:
"data/annual-incidence.csv"
script:
"scripts/annual-incidence.R"
rule histogram:
input:
"data/annual-incidence.csv"
output:
"data/annual-incidence-histogram.png"
script:
"scripts/annual-incidence-histogram.R"
module6/ressources/incidence_syndrome_grippal_snakemake/scripts/annual-incidence-histogram.R
0 → 100644
View file @
57d0e5ff
# Read in the data and convert the dates
data
=
read.csv
(
snakemake
@
input
[[
1
]])
# Plot the histogram
png
(
filename
=
snakemake
@
output
[[
1
]])
hist
(
data
$
incidence
,
breaks
=
10
,
xlab
=
"Annual incidence"
,
ylab
=
"Number of observations"
,
main
=
""
)
dev.off
()
module6/ressources/incidence_syndrome_grippal_snakemake/scripts/annual-incidence.R
0 → 100644
View file @
57d0e5ff
# Read in the data and convert the dates
data
=
read.csv
(
snakemake
@
input
[[
1
]])
names
(
data
)
<-
c
(
"date"
,
"incidence"
)
data
$
date
<-
as.Date
(
data
$
date
)
# A function that extracts the peak for year N
yearly_peak
=
function
(
year
)
{
start
=
paste0
(
year
-1
,
"-08-01"
)
end
=
paste0
(
year
,
"-08-01"
)
records
=
data
$
date
>
start
&
data
$
date
<=
end
sum
(
data
$
incidence
[
records
])
}
# The years for which we have the full peak
years
<-
1986
:
2018
# Make a new data frame for the annual incidences
annual_data
=
data.frame
(
year
=
years
,
incidence
=
sapply
(
years
,
yearly_peak
))
# write output file
write.csv
(
annual_data
,
file
=
snakemake
@
output
[[
1
]],
row.names
=
FALSE
)
module6/ressources/incidence_syndrome_grippal_snakemake/scripts/incidence-plots.R
0 → 100644
View file @
57d0e5ff
# Read in the data and convert the dates
data
=
read.csv
(
snakemake
@
input
[[
1
]])
names
(
data
)
<-
c
(
"date"
,
"incidence"
)
data
$
date
<-
as.Date
(
data
$
date
)
# Plot the complete incidence dataset
png
(
filename
=
snakemake
@
output
[[
1
]])
plot
(
data
,
type
=
"l"
,
xlab
=
"Date"
,
ylab
=
"Weekly incidence"
)
dev.off
()
# Zoom on the last four years
png
(
filename
=
snakemake
@
output
[[
2
]])
plot
(
tail
(
data
,
4
*
52
),
type
=
"l"
,
xlab
=
"Date"
,
ylab
=
"Weekly incidence"
)
dev.off
()
module6/ressources/incidence_syndrome_grippal_snakemake/scripts/preprocess.py
0 → 100644
View file @
57d0e5ff
# Libraries used by this script:
import
datetime
# for date conversion
import
csv
# for writing output to a CSV file
# Read the CSV file into memory
data
=
open
(
snakemake
.
input
[
0
],
'rb'
)
.
read
()
# Decode the Latin-1 character set,
# remove white space at both ends,
# and split into lines.
lines
=
data
.
decode
(
'latin-1'
)
\
.
strip
()
\
.
split
(
'
\n
'
)
# Discard the first line, which contains a comment
data_lines
=
lines
[
1
:]
# Split each line into columns
table
=
[
line
.
split
(
','
)
for
line
in
data_lines
]
# Remove records with missing data and write
# the removed records to a separate file for inspection.
with
open
(
snakemake
.
output
.
errorlog
,
"w"
)
as
errorlog
:
valid_table
=
[]
for
row
in
table
:
missing
=
any
([
column
==
''
for
column
in
row
])
if
missing
:
errorlog
.
write
(
"Missing data in record
\n
"
)
errorlog
.
write
(
str
(
row
))
errorlog
.
write
(
"
\n
"
)
else
:
valid_table
.
append
(
row
)
# Extract the two relevant columns, "week" and "inc"
week
=
[
row
[
0
]
for
row
in
valid_table
]
assert
week
[
0
]
==
'week'
del
week
[
0
]
inc
=
[
row
[
2
]
for
row
in
valid_table
]
assert
inc
[
0
]
==
'inc'
del
inc
[
0
]
data
=
list
(
zip
(
week
,
inc
))
# Check for obviously out-of-range values
with
open
(
snakemake
.
output
.
errorlog
,
"a"
)
as
errorlog
:
for
week
,
inc
in
data
:
if
len
(
week
)
!=
6
or
not
week
.
isdigit
():
errorlog
.
write
(
"Suspect value in column 'week': {week}
\n
"
)
if
not
inc
.
isdigit
():
errorlog
.
write
(
"Suspect value in column 'inc': {inc}
\n
"
)
# Convert year/week by date of the corresponding Monday,
# then sort by increasing date
converted_data
=
\
[(
datetime
.
datetime
.
strptime
(
year_and_week
+
":1"
,
'
%
G
%
V:
%
u'
)
.
date
(),
inc
)
for
year_and_week
,
inc
in
data
]
converted_data
.
sort
(
key
=
lambda
record
:
record
[
0
])
# Check that consecutive dates are seven days apart
with
open
(
snakemake
.
output
.
errorlog
,
"a"
)
as
errorlog
:
dates
=
[
date
for
date
,
_
in
converted_data
]
for
date1
,
date2
in
zip
(
dates
[:
-
1
],
dates
[
1
:]):
if
date2
-
date1
!=
datetime
.
timedelta
(
weeks
=
1
):
errorlog
.
write
(
f
"{date2-date1} between {date1} and {date2}
\n
"
)
# Write data to a CSV file with two columns:
# 1. the date of the Monday of each week, in ISO format
# 2. the incidence estimate for that week
with
open
(
snakemake
.
output
.
data
,
"w"
)
as
csvfile
:
csv_writer
=
csv
.
writer
(
csvfile
)
for
row
in
converted_data
:
csv_writer
.
writerow
(
row
)
module6/ressources/snakemake_tutorial_fr.org
0 → 100644
View file @
57d0e5ff
# -*- mode: org -*-
#+TITLE: Gérer un workflow avec snakemake
#+DATE: August, 2019
#+STARTUP: overview indent
#+OPTIONS: num:nil toc:t
#+PROPERTY: header-args :eval never-export
* Installer snakemake
TODO
* L'analyse de l'incidence du syndrome grippal revisitée
Nous allons reprendre l'exemple du module 3, l'analyse de l'incidence du syndrome grippal.
** 1ère étape: le téléchargement des données
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment