Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
M
mooc-rr-ressources
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
4
Merge Requests
4
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Commits
Issue Boards
Open sidebar
Learning Lab
mooc-rr-ressources
Commits
57d0e5ff
Commit
57d0e5ff
authored
Aug 28, 2019
by
Konrad Hinsen
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Debut d'un tutoriel pour snakemake
parent
5b4af3a7
Changes
8
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
172 additions
and
2 deletions
+172
-2
.gitignore
.gitignore
+3
-1
Makefile
Makefile
+1
-1
Snakefile
...ressources/incidence_syndrome_grippal_snakemake/Snakefile
+39
-0
annual-incidence-histogram.R
...me_grippal_snakemake/scripts/annual-incidence-histogram.R
+11
-0
annual-incidence.R
...nce_syndrome_grippal_snakemake/scripts/annual-incidence.R
+23
-0
incidence-plots.R
...ence_syndrome_grippal_snakemake/scripts/incidence-plots.R
+14
-0
preprocess.py
...ncidence_syndrome_grippal_snakemake/scripts/preprocess.py
+68
-0
snakemake_tutorial_fr.org
module6/ressources/snakemake_tutorial_fr.org
+13
-0
No files found.
.gitignore
View file @
57d0e5ff
...
@@ -10,3 +10,5 @@
...
@@ -10,3 +10,5 @@
_minted*
_minted*
svg-inkscape*
svg-inkscape*
*-svg.pdf
*-svg.pdf
.Rhistory
.snakemake
Makefile
View file @
57d0e5ff
ressources-md
:
ressources-md
:
for
i
in
module1/ressources module2/ressources module2/slides module3/ressources
\
for
i
in
module1/ressources module2/ressources module2/slides module3/ressources
\
module4/ressources module5/ressources
;
do
\
module4/ressources module5/ressources
module6/ressources
;
do
\
make
-C
$$
i ressources-md
;
\
make
-C
$$
i ressources-md
;
\
done
done
module6/ressources/incidence_syndrome_grippal_snakemake/Snakefile
0 → 100644
View file @
57d0e5ff
rule download:
output:
"data/weekly-incidence.csv"
shell:
"wget -O {output} http://www.sentiweb.fr/datasets/incidence-PAY-3.csv"
rule preprocess:
input:
"data/weekly-incidence.csv"
output:
data="data/preprocessed-weekly-incidence.csv",
errorlog="data/errors-from-preprocessing.csv"
script:
"scripts/preprocess.py"
rule plot:
input:
"data/preprocessed-weekly-incidence.csv"
output:
"data/weekly-incidence-plot.png",
"data/weekly-incidence-plot-last-years.png"
script:
"scripts/incidence-plots.R"
rule annual_incidence:
input:
"data/preprocessed-weekly-incidence.csv"
output:
"data/annual-incidence.csv"
script:
"scripts/annual-incidence.R"
rule histogram:
input:
"data/annual-incidence.csv"
output:
"data/annual-incidence-histogram.png"
script:
"scripts/annual-incidence-histogram.R"
module6/ressources/incidence_syndrome_grippal_snakemake/scripts/annual-incidence-histogram.R
0 → 100644
View file @
57d0e5ff
# Read in the data and convert the dates
data
=
read.csv
(
snakemake
@
input
[[
1
]])
# Plot the histogram
png
(
filename
=
snakemake
@
output
[[
1
]])
hist
(
data
$
incidence
,
breaks
=
10
,
xlab
=
"Annual incidence"
,
ylab
=
"Number of observations"
,
main
=
""
)
dev.off
()
module6/ressources/incidence_syndrome_grippal_snakemake/scripts/annual-incidence.R
0 → 100644
View file @
57d0e5ff
# Read in the data and convert the dates
data
=
read.csv
(
snakemake
@
input
[[
1
]])
names
(
data
)
<-
c
(
"date"
,
"incidence"
)
data
$
date
<-
as.Date
(
data
$
date
)
# A function that extracts the peak for year N
yearly_peak
=
function
(
year
)
{
start
=
paste0
(
year
-1
,
"-08-01"
)
end
=
paste0
(
year
,
"-08-01"
)
records
=
data
$
date
>
start
&
data
$
date
<=
end
sum
(
data
$
incidence
[
records
])
}
# The years for which we have the full peak
years
<-
1986
:
2018
# Make a new data frame for the annual incidences
annual_data
=
data.frame
(
year
=
years
,
incidence
=
sapply
(
years
,
yearly_peak
))
# write output file
write.csv
(
annual_data
,
file
=
snakemake
@
output
[[
1
]],
row.names
=
FALSE
)
module6/ressources/incidence_syndrome_grippal_snakemake/scripts/incidence-plots.R
0 → 100644
View file @
57d0e5ff
# Read in the data and convert the dates
data
=
read.csv
(
snakemake
@
input
[[
1
]])
names
(
data
)
<-
c
(
"date"
,
"incidence"
)
data
$
date
<-
as.Date
(
data
$
date
)
# Plot the complete incidence dataset
png
(
filename
=
snakemake
@
output
[[
1
]])
plot
(
data
,
type
=
"l"
,
xlab
=
"Date"
,
ylab
=
"Weekly incidence"
)
dev.off
()
# Zoom on the last four years
png
(
filename
=
snakemake
@
output
[[
2
]])
plot
(
tail
(
data
,
4
*
52
),
type
=
"l"
,
xlab
=
"Date"
,
ylab
=
"Weekly incidence"
)
dev.off
()
module6/ressources/incidence_syndrome_grippal_snakemake/scripts/preprocess.py
0 → 100644
View file @
57d0e5ff
# Libraries used by this script:
import
datetime
# for date conversion
import
csv
# for writing output to a CSV file
# Read the CSV file into memory
data
=
open
(
snakemake
.
input
[
0
],
'rb'
)
.
read
()
# Decode the Latin-1 character set,
# remove white space at both ends,
# and split into lines.
lines
=
data
.
decode
(
'latin-1'
)
\
.
strip
()
\
.
split
(
'
\n
'
)
# Discard the first line, which contains a comment
data_lines
=
lines
[
1
:]
# Split each line into columns
table
=
[
line
.
split
(
','
)
for
line
in
data_lines
]
# Remove records with missing data and write
# the removed records to a separate file for inspection.
with
open
(
snakemake
.
output
.
errorlog
,
"w"
)
as
errorlog
:
valid_table
=
[]
for
row
in
table
:
missing
=
any
([
column
==
''
for
column
in
row
])
if
missing
:
errorlog
.
write
(
"Missing data in record
\n
"
)
errorlog
.
write
(
str
(
row
))
errorlog
.
write
(
"
\n
"
)
else
:
valid_table
.
append
(
row
)
# Extract the two relevant columns, "week" and "inc"
week
=
[
row
[
0
]
for
row
in
valid_table
]
assert
week
[
0
]
==
'week'
del
week
[
0
]
inc
=
[
row
[
2
]
for
row
in
valid_table
]
assert
inc
[
0
]
==
'inc'
del
inc
[
0
]
data
=
list
(
zip
(
week
,
inc
))
# Check for obviously out-of-range values
with
open
(
snakemake
.
output
.
errorlog
,
"a"
)
as
errorlog
:
for
week
,
inc
in
data
:
if
len
(
week
)
!=
6
or
not
week
.
isdigit
():
errorlog
.
write
(
"Suspect value in column 'week': {week}
\n
"
)
if
not
inc
.
isdigit
():
errorlog
.
write
(
"Suspect value in column 'inc': {inc}
\n
"
)
# Convert year/week by date of the corresponding Monday,
# then sort by increasing date
converted_data
=
\
[(
datetime
.
datetime
.
strptime
(
year_and_week
+
":1"
,
'
%
G
%
V:
%
u'
)
.
date
(),
inc
)
for
year_and_week
,
inc
in
data
]
converted_data
.
sort
(
key
=
lambda
record
:
record
[
0
])
# Check that consecutive dates are seven days apart
with
open
(
snakemake
.
output
.
errorlog
,
"a"
)
as
errorlog
:
dates
=
[
date
for
date
,
_
in
converted_data
]
for
date1
,
date2
in
zip
(
dates
[:
-
1
],
dates
[
1
:]):
if
date2
-
date1
!=
datetime
.
timedelta
(
weeks
=
1
):
errorlog
.
write
(
f
"{date2-date1} between {date1} and {date2}
\n
"
)
# Write data to a CSV file with two columns:
# 1. the date of the Monday of each week, in ISO format
# 2. the incidence estimate for that week
with
open
(
snakemake
.
output
.
data
,
"w"
)
as
csvfile
:
csv_writer
=
csv
.
writer
(
csvfile
)
for
row
in
converted_data
:
csv_writer
.
writerow
(
row
)
module6/ressources/snakemake_tutorial_fr.org
0 → 100644
View file @
57d0e5ff
# -*- mode: org -*-
#+TITLE: Gérer un workflow avec snakemake
#+DATE: August, 2019
#+STARTUP: overview indent
#+OPTIONS: num:nil toc:t
#+PROPERTY: header-args :eval never-export
* Installer snakemake
TODO
* L'analyse de l'incidence du syndrome grippal revisitée
Nous allons reprendre l'exemple du module 3, l'analyse de l'incidence du syndrome grippal.
** 1ère étape: le téléchargement des données
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment