Commit 97687917 authored by boulanlo's avatar boulanlo

Peer-reviewed article

parent 8c01d519
\begin{Verbatim}[commandchars=\\\{\}]
\PYG{n}{extracted\PYGZus{}data}\PYG{p}{[}\PYG{l+m+mi}{0}\PYG{p}{][}\PYG{l+m+mi}{1}\PYG{p}{:]} \PYG{o}{=} \PYG{p}{[}\PYG{n}{datetime}\PYG{o}{.}\PYG{n}{datetime}\PYG{o}{.}\PYG{n}{strptime}\PYG{p}{(}\PYG{n}{date}\PYG{p}{,} \PYG{l+s+s2}{\PYGZdq{}\PYGZpc{}m/}\PYG{l+s+si}{\PYGZpc{}d}\PYG{l+s+s2}{/\PYGZpc{}y\PYGZdq{}}\PYG{p}{)} \PYG{k}{for} \PYG{n}{date} \PYG{o+ow}{in} \PYG{n}{extracted\PYGZus{}data}\PYG{p}{[}\PYG{l+m+mi}{0}\PYG{p}{][}\PYG{l+m+mi}{1}\PYG{p}{:]]}
\end{Verbatim}
\begin{Verbatim}[commandchars=\\\{\}]
\PYG{n}{data}\PYG{o}{\PYGZdl{}}\PYG{n}{Date} \PYG{o}{\PYGZlt{}\PYGZhy{}} \PYG{n+nf}{as.Date}\PYG{p}{(}\PYG{n}{data}\PYG{o}{\PYGZdl{}}\PYG{n}{Date}\PYG{p}{)}
\PYG{n+nf}{summary}\PYG{p}{(}\PYG{n}{data}\PYG{p}{)}
\end{Verbatim}
\begin{Verbatim}[commandchars=\\\{\}]
\PYG{k+kn}{import} \PYG{n+nn}{sys}
\PYG{k}{if} \PYG{n}{sys}\PYG{o}{.}\PYG{n}{version\PYGZus{}info}\PYG{o}{.}\PYG{n}{major} \PYG{o}{\PYGZlt{}} \PYG{l+m+mi}{3} \PYG{o+ow}{or} \PYG{n}{sys}\PYG{o}{.}\PYG{n}{version\PYGZus{}info}\PYG{o}{.}\PYG{n}{minor} \PYG{o}{\PYGZlt{}} \PYG{l+m+mi}{6}\PYG{p}{:}
\PYG{n+nb}{print}\PYG{p}{(}\PYG{l+s+s2}{\PYGZdq{}Please use Python 3.6 (or higher)!\PYGZdq{}}\PYG{p}{)}
\end{Verbatim}
\begin{Verbatim}[commandchars=\\\{\}]
\PYG{p}{[}\PYG{n}{flipped\PYGZus{}data}\PYG{p}{[}\PYG{l+m+mi}{0}\PYG{p}{],} \PYG{k+kc}{None}\PYG{p}{]} \PYG{o}{+} \PYG{n}{flipped\PYGZus{}data}\PYG{p}{[}\PYG{l+m+mi}{1}\PYG{p}{:]}
\end{Verbatim}
\begin{Verbatim}[commandchars=\\\{\}]
\PYG{p}{(}\PYG{n+nb}{unless} \PYG{p}{(}\PYG{n+nb}{featurep} \PYG{l+s+ss}{\PYGZsq{}ob\PYGZhy{}python}\PYG{p}{)}
\PYG{p}{(}\PYG{n+nf}{print} \PYG{l+s}{\PYGZdq{}Please activate python in org\PYGZhy{}babel (org\PYGZhy{}babel\PYGZhy{}do\PYGZhy{}lnaguages)!\PYGZdq{}}\PYG{p}{))}
\PYG{p}{(}\PYG{n+nb}{unless} \PYG{p}{(}\PYG{n+nb}{featurep} \PYG{l+s+ss}{\PYGZsq{}ob\PYGZhy{}R}\PYG{p}{)}
\PYG{p}{(}\PYG{n+nf}{print} \PYG{l+s}{\PYGZdq{}Please activate R in org\PYGZhy{}babel (org\PYGZhy{}babel\PYGZhy{}do\PYGZhy{}lnaguages)!\PYGZdq{}}\PYG{p}{))}
\end{Verbatim}
\begin{Verbatim}[commandchars=\\\{\}]
\PYG{n}{target\PYGZus{}countries} \PYG{o}{=} \PYG{p}{[}
\PYG{p}{[}\PYG{k+kc}{None}\PYG{p}{,} \PYG{l+s+s2}{\PYGZdq{}Belgium\PYGZdq{}}\PYG{p}{],}
\PYG{p}{[}\PYG{l+s+s2}{\PYGZdq{}Hong Kong\PYGZdq{}}\PYG{p}{,} \PYG{k+kc}{None}\PYG{p}{],}
\PYG{p}{[}\PYG{l+s+s2}{\PYGZdq{}Hong Kong\PYGZdq{}}\PYG{p}{,} \PYG{l+s+s2}{\PYGZdq{}China\PYGZdq{}}\PYG{p}{],} \PYG{c+c1}{\PYGZsh{} China without Hong Kong}
\PYG{p}{[}\PYG{k+kc}{None}\PYG{p}{,} \PYG{l+s+s2}{\PYGZdq{}France\PYGZdq{}}\PYG{p}{],}
\PYG{p}{[}\PYG{k+kc}{None}\PYG{p}{,} \PYG{l+s+s2}{\PYGZdq{}Germany\PYGZdq{}}\PYG{p}{],}
\PYG{p}{[}\PYG{k+kc}{None}\PYG{p}{,} \PYG{l+s+s2}{\PYGZdq{}Iran\PYGZdq{}}\PYG{p}{],}
\PYG{p}{[}\PYG{k+kc}{None}\PYG{p}{,} \PYG{l+s+s2}{\PYGZdq{}Italy\PYGZdq{}}\PYG{p}{],}
\PYG{p}{[}\PYG{k+kc}{None}\PYG{p}{,} \PYG{l+s+s2}{\PYGZdq{}Japan\PYGZdq{}}\PYG{p}{],}
\PYG{p}{[}\PYG{k+kc}{None}\PYG{p}{,} \PYG{l+s+s2}{\PYGZdq{}South Korea\PYGZdq{}}\PYG{p}{],}
\PYG{p}{[}\PYG{k+kc}{None}\PYG{p}{,} \PYG{l+s+s2}{\PYGZdq{}Netherlands\PYGZdq{}}\PYG{p}{],}
\PYG{p}{[}\PYG{k+kc}{None}\PYG{p}{,} \PYG{l+s+s2}{\PYGZdq{}United Kingdom\PYGZdq{}}\PYG{p}{],}
\PYG{p}{[}\PYG{k+kc}{None}\PYG{p}{,} \PYG{l+s+s2}{\PYGZdq{}US\PYGZdq{}}\PYG{p}{]}
\PYG{p}{]}
\PYG{k}{def} \PYG{n+nf}{is\PYGZus{}target\PYGZus{}country}\PYG{p}{(}\PYG{n}{province}\PYG{p}{,} \PYG{n}{country}\PYG{p}{):}
\PYG{n}{specific\PYGZus{}province} \PYG{o}{=} \PYG{k}{lambda} \PYG{n}{t}\PYG{p}{,} \PYG{n}{p}\PYG{p}{,} \PYG{n}{c}\PYG{p}{:} \PYG{n}{t}\PYG{p}{[}\PYG{l+m+mi}{0}\PYG{p}{]} \PYG{o}{==} \PYG{n}{p} \PYG{o+ow}{and} \PYG{n}{t}\PYG{p}{[}\PYG{l+m+mi}{1}\PYG{p}{]} \PYG{o+ow}{is} \PYG{k+kc}{None}
\PYG{n}{without\PYGZus{}specific\PYGZus{}province} \PYG{o}{=} \PYG{k}{lambda} \PYG{n}{t}\PYG{p}{,} \PYG{n}{p}\PYG{p}{,} \PYG{n}{c}\PYG{p}{:} \PYG{n}{t}\PYG{p}{[}\PYG{l+m+mi}{0}\PYG{p}{]} \PYG{o+ow}{is} \PYG{o+ow}{not} \PYG{k+kc}{None} \PYG{o+ow}{and} \PYG{n}{t}\PYG{p}{[}\PYG{l+m+mi}{0}\PYG{p}{]} \PYG{o}{!=} \PYG{n}{p} \PYG{o+ow}{and} \PYG{n}{t}\PYG{p}{[}\PYG{l+m+mi}{1}\PYG{p}{]} \PYG{o}{==} \PYG{n}{c}
\PYG{n}{without\PYGZus{}provinces} \PYG{o}{=} \PYG{k}{lambda} \PYG{n}{t}\PYG{p}{,} \PYG{n}{p}\PYG{p}{,} \PYG{n}{c}\PYG{p}{:} \PYG{n}{t}\PYG{p}{[}\PYG{l+m+mi}{0}\PYG{p}{]} \PYG{o+ow}{is} \PYG{k+kc}{None} \PYG{o+ow}{and} \PYG{n}{p} \PYG{o}{==} \PYG{l+s+s2}{\PYGZdq{}\PYGZdq{}} \PYG{o+ow}{and} \PYG{n}{t}\PYG{p}{[}\PYG{l+m+mi}{1}\PYG{p}{]} \PYG{o}{==} \PYG{n}{c}
\PYG{n}{check} \PYG{o}{=} \PYG{k}{lambda} \PYG{n}{t}\PYG{p}{,} \PYG{n}{p}\PYG{p}{,} \PYG{n}{c}\PYG{p}{:} \PYG{n}{specific\PYGZus{}province}\PYG{p}{(}\PYG{n}{t}\PYG{p}{,} \PYG{n}{p}\PYG{p}{,} \PYG{n}{c}\PYG{p}{)} \PYG{o+ow}{or} \PYG{n}{without\PYGZus{}specific\PYGZus{}province}\PYG{p}{(}\PYG{n}{t}\PYG{p}{,} \PYG{n}{p}\PYG{p}{,} \PYG{n}{c}\PYG{p}{)} \PYG{o+ow}{or} \PYG{n}{without\PYGZus{}provinces}\PYG{p}{(}\PYG{n}{t}\PYG{p}{,} \PYG{n}{p}\PYG{p}{,} \PYG{n}{c}\PYG{p}{)}
\PYG{n}{res} \PYG{o}{=} \PYG{p}{[}\PYG{n}{check}\PYG{p}{(}\PYG{n}{target}\PYG{p}{,} \PYG{n}{province}\PYG{p}{,} \PYG{n}{country}\PYG{p}{)} \PYG{k}{for} \PYG{n}{target} \PYG{o+ow}{in} \PYG{n}{target\PYGZus{}countries}\PYG{p}{]}
\PYG{k}{return} \PYG{n+nb}{any}\PYG{p}{(}\PYG{n}{res}\PYG{p}{)}
\PYG{n}{extracted\PYGZus{}data} \PYG{o}{=} \PYG{p}{[]}
\PYG{n}{extracted\PYGZus{}data}\PYG{o}{.}\PYG{n}{append}\PYG{p}{([}\PYG{n}{data}\PYG{p}{[}\PYG{l+m+mi}{0}\PYG{p}{][}\PYG{l+m+mi}{1}\PYG{p}{]]} \PYG{o}{+} \PYG{n}{data}\PYG{p}{[}\PYG{l+m+mi}{0}\PYG{p}{][}\PYG{l+m+mi}{4}\PYG{p}{:])}
\PYG{k}{for} \PYG{n}{row} \PYG{o+ow}{in} \PYG{n}{valid\PYGZus{}data}\PYG{p}{[}\PYG{l+m+mi}{1}\PYG{p}{:]:}
\PYG{k}{if} \PYG{l+s+s2}{\PYGZdq{}Korea\PYGZdq{}} \PYG{o+ow}{in} \PYG{n}{row}\PYG{p}{[}\PYG{l+m+mi}{1}\PYG{p}{]:}
\PYG{n+nb}{print}\PYG{p}{(}\PYG{n}{row}\PYG{p}{[}\PYG{l+m+mi}{1}\PYG{p}{])}
\PYG{k}{if} \PYG{n}{is\PYGZus{}target\PYGZus{}country}\PYG{p}{(}\PYG{n}{row}\PYG{p}{[}\PYG{l+m+mi}{0}\PYG{p}{],} \PYG{n}{row}\PYG{p}{[}\PYG{l+m+mi}{1}\PYG{p}{]):}
\PYG{c+c1}{\PYGZsh{} print(row[0])}
\PYG{k}{if} \PYG{n}{row}\PYG{p}{[}\PYG{l+m+mi}{0}\PYG{p}{]} \PYG{o}{==} \PYG{l+s+s2}{\PYGZdq{}Hong Kong\PYGZdq{}}\PYG{p}{:}
\PYG{n}{extracted\PYGZus{}data}\PYG{o}{.}\PYG{n}{append}\PYG{p}{([}\PYG{l+s+s2}{\PYGZdq{}Hong Kong\PYGZdq{}}\PYG{p}{]} \PYG{o}{+} \PYG{n}{row}\PYG{p}{[}\PYG{l+m+mi}{4}\PYG{p}{:])}
\PYG{k}{elif} \PYG{n}{row}\PYG{p}{[}\PYG{l+m+mi}{1}\PYG{p}{]} \PYG{o}{==} \PYG{l+s+s2}{\PYGZdq{}China\PYGZdq{}}\PYG{p}{:}
\PYG{k}{try}\PYG{p}{:}
\PYG{n}{idx} \PYG{o}{=} \PYG{p}{[}\PYG{n}{row}\PYG{p}{[}\PYG{l+m+mi}{0}\PYG{p}{]} \PYG{k}{for} \PYG{n}{row} \PYG{o+ow}{in} \PYG{n}{extracted\PYGZus{}data}\PYG{p}{]}\PYG{o}{.}\PYG{n}{index}\PYG{p}{(}\PYG{l+s+s2}{\PYGZdq{}China\PYGZdq{}}\PYG{p}{)}
\PYG{n}{extracted\PYGZus{}data}\PYG{p}{[}\PYG{n}{idx}\PYG{p}{][}\PYG{l+m+mi}{1}\PYG{p}{:]} \PYG{o}{=} \PYG{p}{[}\PYG{n+nb}{int}\PYG{p}{(}\PYG{n}{a}\PYG{p}{)} \PYG{o}{+} \PYG{n+nb}{int}\PYG{p}{(}\PYG{n}{b}\PYG{p}{)} \PYG{k}{for} \PYG{n}{a}\PYG{p}{,} \PYG{n}{b} \PYG{o+ow}{in} \PYG{n+nb}{zip}\PYG{p}{(}\PYG{n}{extracted\PYGZus{}data}\PYG{p}{[}\PYG{o}{\PYGZhy{}}\PYG{l+m+mi}{1}\PYG{p}{][}\PYG{l+m+mi}{1}\PYG{p}{:],} \PYG{n}{row}\PYG{p}{[}\PYG{l+m+mi}{4}\PYG{p}{:])]}
\PYG{k}{except} \PYG{n+ne}{ValueError}\PYG{p}{:}
\PYG{n}{extracted\PYGZus{}data}\PYG{o}{.}\PYG{n}{append}\PYG{p}{([}\PYG{l+s+s2}{\PYGZdq{}China\PYGZdq{}}\PYG{p}{]} \PYG{o}{+} \PYG{n}{row}\PYG{p}{[}\PYG{l+m+mi}{4}\PYG{p}{:])}
\PYG{k}{else}\PYG{p}{:}
\PYG{n}{extracted\PYGZus{}data}\PYG{o}{.}\PYG{n}{append}\PYG{p}{([}\PYG{n}{row}\PYG{p}{[}\PYG{l+m+mi}{1}\PYG{p}{]]} \PYG{o}{+} \PYG{n}{row}\PYG{p}{[}\PYG{l+m+mi}{4}\PYG{p}{:])}
\end{Verbatim}
\begin{Verbatim}[commandchars=\\\{\}]
\PYG{n+nf}{library}\PYG{p}{(}\PYG{n}{tidyverse}\PYG{p}{)}
\PYG{n+nf}{library}\PYG{p}{(}\PYG{n}{ggrepel}\PYG{p}{)}
\PYG{n+nf}{library}\PYG{p}{(}\PYG{n}{scales}\PYG{p}{)}
\PYG{n}{last\PYGZus{}date} \PYG{o}{\PYGZlt{}\PYGZhy{}} \PYG{n}{data} \PYG{o}{\PYGZpc{}\PYGZgt{}\PYGZpc{}}
\PYG{n+nf}{gather}\PYG{p}{(}\PYG{n}{Country}\PYG{p}{,} \PYG{n}{Cases}\PYG{p}{,} \PYG{n}{Belgium}\PYG{o}{:}\PYG{n}{UnitedKingdom}\PYG{p}{)} \PYG{o}{\PYGZpc{}\PYGZgt{}\PYGZpc{}}
\PYG{n}{dplyr}\PYG{o}{::}\PYG{n+nf}{filter}\PYG{p}{(}\PYG{n}{Date} \PYG{o}{==} \PYG{n+nf}{tail}\PYG{p}{(}\PYG{n}{data}\PYG{o}{\PYGZdl{}}\PYG{n}{Date}\PYG{p}{,} \PYG{l+m}{1}\PYG{p}{),} \PYG{n}{Country} \PYG{o}{\PYGZpc{}in\PYGZpc{}} \PYG{n+nf}{c}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}US\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}France\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}Germany\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}China\PYGZdq{}}\PYG{p}{))}
\PYG{c+c1}{\PYGZsh{} Color\PYGZhy{}blind friendly palette taken from}
\PYG{c+c1}{\PYGZsh{} https://bconnelly.net/posts/creating\PYGZus{}colorblind\PYGZhy{}friendly\PYGZus{}figures/}
\PYG{c+c1}{\PYGZsh{} with added grayscale values}
\PYG{n}{palette} \PYG{o}{\PYGZlt{}\PYGZhy{}} \PYG{n+nf}{c}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}\PYGZsh{}000000\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}\PYGZsh{}E69F00\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}\PYGZsh{}56B4E9\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}\PYGZsh{}009E73\PYGZdq{}}\PYG{p}{,}
\PYG{l+s}{\PYGZdq{}\PYGZsh{}292929\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}\PYGZsh{}555555\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}\PYGZsh{}999999\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}\PYGZsh{}BBBBBB\PYGZdq{}}\PYG{p}{,}
\PYG{l+s}{\PYGZdq{}\PYGZsh{}F0E442\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}\PYGZsh{}0072B2\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}\PYGZsh{}D55E00\PYGZdq{}}\PYG{p}{,} \PYG{l+s}{\PYGZdq{}\PYGZsh{}CC79A7\PYGZdq{}}\PYG{p}{)}
\PYG{n}{data} \PYG{o}{\PYGZpc{}\PYGZgt{}\PYGZpc{}}
\PYG{n+nf}{gather}\PYG{p}{(}\PYG{n}{Country}\PYG{p}{,} \PYG{n}{Cases}\PYG{p}{,} \PYG{n}{Belgium}\PYG{o}{:}\PYG{n}{UnitedKingdom}\PYG{p}{)} \PYG{o}{\PYGZpc{}\PYGZgt{}\PYGZpc{}}
\PYG{n+nf}{ggplot}\PYG{p}{(}\PYG{n+nf}{aes}\PYG{p}{(}\PYG{n}{x}\PYG{o}{=}\PYG{n}{Date}\PYG{p}{,} \PYG{n}{y}\PYG{o}{=}\PYG{n}{Cases}\PYG{p}{,} \PYG{n}{colour}\PYG{o}{=}\PYG{n}{Country}\PYG{p}{))} \PYG{o}{+}
\PYG{n+nf}{geom\PYGZus{}line}\PYG{p}{()} \PYG{o}{+}
\PYG{n+nf}{scale\PYGZus{}x\PYGZus{}date}\PYG{p}{(}\PYG{n}{breaks} \PYG{o}{=} \PYG{n+nf}{pretty\PYGZus{}breaks}\PYG{p}{(}\PYG{l+m}{8}\PYG{p}{),} \PYG{n}{labels} \PYG{o}{=} \PYG{n+nf}{date\PYGZus{}format}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}\PYGZpc{}b \PYGZpc{}Y\PYGZdq{}}\PYG{p}{))} \PYG{o}{+}
\PYG{n+nf}{scale\PYGZus{}color\PYGZus{}manual}\PYG{p}{(}\PYG{n}{values}\PYG{o}{=}\PYG{n}{palette}\PYG{p}{)} \PYG{o}{+}
\PYG{n+nf}{scale\PYGZus{}y\PYGZus{}continuous}\PYG{p}{(}\PYG{n}{labels} \PYG{o}{=} \PYG{n+nf}{comma\PYGZus{}format}\PYG{p}{(),} \PYG{n}{breaks} \PYG{o}{=} \PYG{n+nf}{pretty\PYGZus{}breaks}\PYG{p}{(}\PYG{l+m}{8}\PYG{p}{))} \PYG{o}{+}
\PYG{n+nf}{geom\PYGZus{}text\PYGZus{}repel}\PYG{p}{(}\PYG{n}{data}\PYG{o}{=}\PYG{n}{last\PYGZus{}date}\PYG{p}{,} \PYG{n+nf}{aes}\PYG{p}{(}\PYG{n}{label} \PYG{o}{=} \PYG{n}{Country}\PYG{p}{))} \PYG{o}{+}
\PYG{n+nf}{ggtitle}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}Cumulative confirmed cases of Covid\PYGZhy{}19\PYGZdq{}}\PYG{p}{,} \PYG{n}{subtitle}\PYG{o}{=}\PYG{l+s}{\PYGZdq{}In selected countries since the beginning of 2020\PYGZdq{}}\PYG{p}{)} \PYG{o}{+}
\PYG{n+nf}{theme\PYGZus{}bw}\PYG{p}{()}
\end{Verbatim}
\begin{Verbatim}[commandchars=\\\{\}]
\PYG{n}{flipped\PYGZus{}data} \PYG{o}{=} \PYG{p}{[[}\PYG{n+nb}{str}\PYG{p}{(}\PYG{n}{row}\PYG{p}{[}\PYG{n}{i}\PYG{p}{])} \PYG{k}{for} \PYG{n}{row} \PYG{o+ow}{in} \PYG{n}{extracted\PYGZus{}data}\PYG{p}{]} \PYG{k}{for} \PYG{n}{i} \PYG{o+ow}{in} \PYG{n+nb}{range}\PYG{p}{(}\PYG{l+m+mi}{0}\PYG{p}{,} \PYG{n+nb}{len}\PYG{p}{(}\PYG{n}{extracted\PYGZus{}data}\PYG{p}{[}\PYG{l+m+mi}{0}\PYG{p}{]))]}
\PYG{n}{flipped\PYGZus{}data}\PYG{p}{[}\PYG{l+m+mi}{0}\PYG{p}{][}\PYG{l+m+mi}{0}\PYG{p}{]} \PYG{o}{=} \PYG{l+s+s2}{\PYGZdq{}Date\PYGZdq{}}
\PYG{n}{flipped\PYGZus{}data}\PYG{p}{[}\PYG{l+m+mi}{0}\PYG{p}{]} \PYG{o}{=} \PYG{p}{[}\PYG{n}{s}\PYG{o}{.}\PYG{n}{replace}\PYG{p}{(}\PYG{l+s+s2}{\PYGZdq{} \PYGZdq{}}\PYG{p}{,} \PYG{l+s+s2}{\PYGZdq{}\PYGZdq{}}\PYG{p}{)} \PYG{k}{for} \PYG{n}{s} \PYG{o+ow}{in} \PYG{n}{flipped\PYGZus{}data}\PYG{p}{[}\PYG{l+m+mi}{0}\PYG{p}{]]}
\end{Verbatim}
\begin{Verbatim}[commandchars=\\\{\}]
\PYG{n+nf}{plot}\PYG{p}{(}\PYG{n}{data}\PYG{p}{[,}\PYG{l+s}{\PYGZsq{}Date\PYGZsq{}}\PYG{p}{],} \PYG{n}{data}\PYG{p}{[,}\PYG{l+s}{\PYGZsq{}France\PYGZsq{}}\PYG{p}{],} \PYG{n}{xlab}\PYG{o}{=}\PYG{l+s}{\PYGZdq{}Date\PYGZdq{}}\PYG{p}{,}
\PYG{n}{ylab}\PYG{o}{=}\PYG{l+s}{\PYGZdq{}Confirmed cases in Metropolitan France, cumulative\PYGZdq{}}\PYG{p}{)}
\end{Verbatim}
\begin{Verbatim}[commandchars=\\\{\}]
\PYG{k+kn}{from} \PYG{n+nn}{urllib.request} \PYG{k+kn}{import} \PYG{n}{urlopen}
\PYG{k+kn}{import} \PYG{n+nn}{datetime}
\PYG{n}{temp\PYGZus{}file\PYGZus{}name} \PYG{o}{=} \PYG{l+s+s1}{\PYGZsq{}data.csv\PYGZsq{}}
\PYG{c+c1}{\PYGZsh{} Downloads the data from GitHub}
\PYG{k}{def} \PYG{n+nf}{download\PYGZus{}data}\PYG{p}{():}
\PYG{n}{data} \PYG{o}{=} \PYG{n}{urlopen}\PYG{p}{(}\PYG{n}{data\PYGZus{}url}\PYG{p}{)}\PYG{o}{.}\PYG{n}{read}\PYG{p}{()}
\PYG{k}{with} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{temp\PYGZus{}file\PYGZus{}name}\PYG{p}{,} \PYG{l+s+s1}{\PYGZsq{}wb\PYGZsq{}}\PYG{p}{)} \PYG{k}{as} \PYG{n}{f}\PYG{p}{:}
\PYG{n}{f}\PYG{o}{.}\PYG{n}{write}\PYG{p}{(}\PYG{n}{data}\PYG{p}{)}
\PYG{c+c1}{\PYGZsh{} Tries to read data from the local file and returns the content}
\PYG{c+c1}{\PYGZsh{} parsed as a series of lines}
\PYG{k}{def} \PYG{n+nf}{read\PYGZus{}data}\PYG{p}{():}
\PYG{k}{try}\PYG{p}{:}
\PYG{k}{with} \PYG{n+nb}{open}\PYG{p}{(}\PYG{n}{temp\PYGZus{}file\PYGZus{}name}\PYG{p}{,} \PYG{l+s+s1}{\PYGZsq{}r\PYGZsq{}}\PYG{p}{)} \PYG{k}{as} \PYG{n}{f}\PYG{p}{:}
\PYG{n}{data} \PYG{o}{=} \PYG{n}{f}\PYG{o}{.}\PYG{n}{read}\PYG{p}{()}
\PYG{n}{lines} \PYG{o}{=} \PYG{n}{data}\PYG{o}{.}\PYG{n}{split}\PYG{p}{(}\PYG{l+s+s1}{\PYGZsq{}}\PYG{l+s+se}{\PYGZbs{}n}\PYG{l+s+s1}{\PYGZsq{}}\PYG{p}{)}
\PYG{n}{table} \PYG{o}{=} \PYG{p}{[}\PYG{n}{line}\PYG{o}{.}\PYG{n}{replace}\PYG{p}{(}\PYG{l+s+s2}{\PYGZdq{}}\PYG{l+s+se}{\PYGZbs{}\PYGZdq{}}\PYG{l+s+s2}{Korea, South}\PYG{l+s+se}{\PYGZbs{}\PYGZdq{}}\PYG{l+s+s2}{\PYGZdq{}}\PYG{p}{,} \PYG{l+s+s2}{\PYGZdq{}South Korea\PYGZdq{}}\PYG{p}{)}\PYG{o}{.}\PYG{n}{split}\PYG{p}{(}\PYG{l+s+s1}{\PYGZsq{},\PYGZsq{}}\PYG{p}{)} \PYG{k}{for} \PYG{n}{line} \PYG{o+ow}{in} \PYG{n}{lines}\PYG{p}{]}
\PYG{k}{return} \PYG{n}{table}\PYG{p}{[:}\PYG{o}{\PYGZhy{}}\PYG{l+m+mi}{2}\PYG{p}{]} \PYG{c+c1}{\PYGZsh{} Removing the empty last line}
\PYG{k}{except} \PYG{n+ne}{IOError} \PYG{k}{as} \PYG{n}{e}\PYG{p}{:}
\PYG{k}{raise} \PYG{n}{e}
\PYG{c+c1}{\PYGZsh{} Decides whether or not to download the file from GitHub based on the}
\PYG{c+c1}{\PYGZsh{} presence of a local file and the last recorded date in the local}
\PYG{c+c1}{\PYGZsh{} file}
\PYG{k}{def} \PYG{n+nf}{try\PYGZus{}download\PYGZus{}data}\PYG{p}{():}
\PYG{n}{data} \PYG{o}{=} \PYG{k+kc}{None}
\PYG{k}{try}\PYG{p}{:}
\PYG{n}{data} \PYG{o}{=} \PYG{n}{read\PYGZus{}data}\PYG{p}{()}
\PYG{n}{last\PYGZus{}date} \PYG{o}{=} \PYG{n}{datetime}\PYG{o}{.}\PYG{n}{datetime}\PYG{o}{.}\PYG{n}{strptime}\PYG{p}{(}\PYG{n}{data}\PYG{p}{[}\PYG{l+m+mi}{0}\PYG{p}{][}\PYG{o}{\PYGZhy{}}\PYG{l+m+mi}{1}\PYG{p}{],} \PYG{l+s+s2}{\PYGZdq{}\PYGZpc{}m/}\PYG{l+s+si}{\PYGZpc{}d}\PYG{l+s+s2}{/\PYGZpc{}y\PYGZdq{}}\PYG{p}{)}
\PYG{n}{today} \PYG{o}{=} \PYG{n}{datetime}\PYG{o}{.}\PYG{n}{datetime}\PYG{o}{.}\PYG{n}{today}\PYG{p}{()}
\PYG{k}{if} \PYG{n}{today} \PYG{o}{\PYGZhy{}} \PYG{n}{last\PYGZus{}date} \PYG{o}{\PYGZgt{}} \PYG{n}{datetime}\PYG{o}{.}\PYG{n}{timedelta}\PYG{p}{(}\PYG{n}{day}\PYG{o}{=}\PYG{l+m+mi}{1}\PYG{p}{):}
\PYG{n+nb}{print}\PYG{p}{(}\PYG{l+s+s2}{\PYGZdq{}Data obsolete, downloading new data...\PYGZdq{}}\PYG{p}{)}
\PYG{n}{download\PYGZus{}data}\PYG{p}{()}
\PYG{n}{data} \PYG{o}{=} \PYG{n}{read\PYGZus{}data}\PYG{p}{()}
\PYG{k}{except} \PYG{n+ne}{IOError}\PYG{p}{:}
\PYG{n}{download\PYGZus{}data}\PYG{p}{()}
\PYG{n}{data} \PYG{o}{=} \PYG{n}{read\PYGZus{}data}\PYG{p}{()}
\PYG{k}{finally}\PYG{p}{:}
\PYG{k}{return} \PYG{n}{data}
\PYG{n}{data} \PYG{o}{=} \PYG{n}{try\PYGZus{}download\PYGZus{}data}\PYG{p}{()}
\end{Verbatim}
\begin{Verbatim}[commandchars=\\\{\}]
\PYG{n}{last\PYGZus{}date} \PYG{o}{\PYGZlt{}\PYGZhy{}} \PYG{n}{data} \PYG{o}{\PYGZpc{}\PYGZgt{}\PYGZpc{}}
\PYG{n+nf}{gather}\PYG{p}{(}\PYG{n}{Country}\PYG{p}{,} \PYG{n}{Cases}\PYG{p}{,} \PYG{n}{Belgium}\PYG{o}{:}\PYG{n}{UnitedKingdom}\PYG{p}{)} \PYG{o}{\PYGZpc{}\PYGZgt{}\PYGZpc{}}
\PYG{n}{dplyr}\PYG{o}{::}\PYG{n+nf}{filter}\PYG{p}{(}\PYG{n}{Date} \PYG{o}{==} \PYG{n+nf}{tail}\PYG{p}{(}\PYG{n}{data}\PYG{o}{\PYGZdl{}}\PYG{n}{Date}\PYG{p}{,} \PYG{l+m}{1}\PYG{p}{))}
\PYG{n}{data} \PYG{o}{\PYGZpc{}\PYGZgt{}\PYGZpc{}}
\PYG{n+nf}{gather}\PYG{p}{(}\PYG{n}{Country}\PYG{p}{,} \PYG{n}{Cases}\PYG{p}{,} \PYG{n}{Belgium}\PYG{o}{:}\PYG{n}{UnitedKingdom}\PYG{p}{)} \PYG{o}{\PYGZpc{}\PYGZgt{}\PYGZpc{}}
\PYG{n+nf}{ggplot}\PYG{p}{(}\PYG{n+nf}{aes}\PYG{p}{(}\PYG{n}{x}\PYG{o}{=}\PYG{n}{Date}\PYG{p}{,} \PYG{n}{y}\PYG{o}{=}\PYG{n}{Cases}\PYG{p}{,} \PYG{n}{colour}\PYG{o}{=}\PYG{n}{Country}\PYG{p}{))} \PYG{o}{+}
\PYG{n+nf}{geom\PYGZus{}line}\PYG{p}{()} \PYG{o}{+}
\PYG{n+nf}{scale\PYGZus{}x\PYGZus{}date}\PYG{p}{(}\PYG{n}{breaks} \PYG{o}{=} \PYG{n+nf}{pretty\PYGZus{}breaks}\PYG{p}{(}\PYG{l+m}{8}\PYG{p}{),} \PYG{n}{labels} \PYG{o}{=} \PYG{n+nf}{date\PYGZus{}format}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}\PYGZpc{}b \PYGZpc{}Y\PYGZdq{}}\PYG{p}{))} \PYG{o}{+}
\PYG{n+nf}{scale\PYGZus{}color\PYGZus{}manual}\PYG{p}{(}\PYG{n}{values}\PYG{o}{=}\PYG{n}{palette}\PYG{p}{)} \PYG{o}{+}
\PYG{n+nf}{scale\PYGZus{}y\PYGZus{}continuous}\PYG{p}{(}\PYG{n}{trans}\PYG{o}{=}\PYG{l+s}{\PYGZdq{}log10\PYGZdq{}}\PYG{p}{,} \PYG{n}{labels} \PYG{o}{=} \PYG{n+nf}{comma\PYGZus{}format}\PYG{p}{())} \PYG{o}{+}
\PYG{n+nf}{geom\PYGZus{}text\PYGZus{}repel}\PYG{p}{(}\PYG{n}{data}\PYG{o}{=}\PYG{n}{last\PYGZus{}date}\PYG{p}{,} \PYG{n+nf}{aes}\PYG{p}{(}\PYG{n}{label} \PYG{o}{=} \PYG{n}{Country}\PYG{p}{))} \PYG{o}{+}
\PYG{n+nf}{ggtitle}\PYG{p}{(}\PYG{l+s}{\PYGZdq{}Cumulative confirmed cases of Covid\PYGZhy{}19\PYGZdq{}}\PYG{p}{,} \PYG{n}{subtitle}\PYG{o}{=}\PYG{l+s}{\PYGZdq{}In selected countries since the beginning of 2020, on a logarithmic scale\PYGZdq{}}\PYG{p}{)} \PYG{o}{+}
\PYG{n+nf}{theme\PYGZus{}bw}\PYG{p}{()}
\end{Verbatim}
\begin{Verbatim}[commandchars=\\\{\}]
\PYG{n}{valid\PYGZus{}data} \PYG{o}{=} \PYG{p}{[]}
\PYG{n}{valid\PYGZus{}data}\PYG{o}{.}\PYG{n}{append}\PYG{p}{(}\PYG{n}{data}\PYG{p}{[}\PYG{l+m+mi}{0}\PYG{p}{])}
\PYG{k}{for} \PYG{n}{row} \PYG{o+ow}{in} \PYG{n}{data}\PYG{p}{[}\PYG{l+m+mi}{1}\PYG{p}{:]:}
\PYG{n}{missing} \PYG{o}{=} \PYG{n+nb}{any}\PYG{p}{([}\PYG{n}{value} \PYG{o}{==} \PYG{l+s+s1}{\PYGZsq{}\PYGZsq{}} \PYG{k}{for} \PYG{n}{value} \PYG{o+ow}{in} \PYG{n}{row}\PYG{p}{[}\PYG{l+m+mi}{4}\PYG{p}{:]])}
\PYG{k}{if} \PYG{n}{missing}\PYG{p}{:}
\PYG{n+nb}{print}\PYG{p}{(}\PYG{n}{row}\PYG{p}{)}
\PYG{k}{else}\PYG{p}{:}
\PYG{n}{valid\PYGZus{}data}\PYG{o}{.}\PYG{n}{append}\PYG{p}{(}\PYG{n}{row}\PYG{p}{)}
\end{Verbatim}
\makeatletter
\def\PYG@reset{\let\PYG@it=\relax \let\PYG@bf=\relax%
\let\PYG@ul=\relax \let\PYG@tc=\relax%
\let\PYG@bc=\relax \let\PYG@ff=\relax}
\def\PYG@tok#1{\csname PYG@tok@#1\endcsname}
\def\PYG@toks#1+{\ifx\relax#1\empty\else%
\PYG@tok{#1}\expandafter\PYG@toks\fi}
\def\PYG@do#1{\PYG@bc{\PYG@tc{\PYG@ul{%
\PYG@it{\PYG@bf{\PYG@ff{#1}}}}}}}
\def\PYG#1#2{\PYG@reset\PYG@toks#1+\relax+\PYG@do{#2}}
\expandafter\def\csname PYG@tok@w\endcsname{\def\PYG@tc##1{\textcolor[rgb]{0.73,0.73,0.73}{##1}}}
\expandafter\def\csname PYG@tok@c\endcsname{\let\PYG@it=\textit\def\PYG@tc##1{\textcolor[rgb]{0.25,0.50,0.50}{##1}}}
\expandafter\def\csname PYG@tok@cp\endcsname{\def\PYG@tc##1{\textcolor[rgb]{0.74,0.48,0.00}{##1}}}
\expandafter\def\csname PYG@tok@k\endcsname{\let\PYG@bf=\textbf\def\PYG@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}}
\expandafter\def\csname PYG@tok@kp\endcsname{\def\PYG@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}}
\expandafter\def\csname PYG@tok@kt\endcsname{\def\PYG@tc##1{\textcolor[rgb]{0.69,0.00,0.25}{##1}}}
\expandafter\def\csname PYG@tok@o\endcsname{\def\PYG@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}}
\expandafter\def\csname PYG@tok@ow\endcsname{\let\PYG@bf=\textbf\def\PYG@tc##1{\textcolor[rgb]{0.67,0.13,1.00}{##1}}}
\expandafter\def\csname PYG@tok@nb\endcsname{\def\PYG@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}}
\expandafter\def\csname PYG@tok@nf\endcsname{\def\PYG@tc##1{\textcolor[rgb]{0.00,0.00,1.00}{##1}}}
\expandafter\def\csname PYG@tok@nc\endcsname{\let\PYG@bf=\textbf\def\PYG@tc##1{\textcolor[rgb]{0.00,0.00,1.00}{##1}}}
\expandafter\def\csname PYG@tok@nn\endcsname{\let\PYG@bf=\textbf\def\PYG@tc##1{\textcolor[rgb]{0.00,0.00,1.00}{##1}}}
\expandafter\def\csname PYG@tok@ne\endcsname{\let\PYG@bf=\textbf\def\PYG@tc##1{\textcolor[rgb]{0.82,0.25,0.23}{##1}}}
\expandafter\def\csname PYG@tok@nv\endcsname{\def\PYG@tc##1{\textcolor[rgb]{0.10,0.09,0.49}{##1}}}
\expandafter\def\csname PYG@tok@no\endcsname{\def\PYG@tc##1{\textcolor[rgb]{0.53,0.00,0.00}{##1}}}
\expandafter\def\csname PYG@tok@nl\endcsname{\def\PYG@tc##1{\textcolor[rgb]{0.63,0.63,0.00}{##1}}}
\expandafter\def\csname PYG@tok@ni\endcsname{\let\PYG@bf=\textbf\def\PYG@tc##1{\textcolor[rgb]{0.60,0.60,0.60}{##1}}}
\expandafter\def\csname PYG@tok@na\endcsname{\def\PYG@tc##1{\textcolor[rgb]{0.49,0.56,0.16}{##1}}}
\expandafter\def\csname PYG@tok@nt\endcsname{\let\PYG@bf=\textbf\def\PYG@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}}
\expandafter\def\csname PYG@tok@nd\endcsname{\def\PYG@tc##1{\textcolor[rgb]{0.67,0.13,1.00}{##1}}}
\expandafter\def\csname PYG@tok@s\endcsname{\def\PYG@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}}
\expandafter\def\csname PYG@tok@sd\endcsname{\let\PYG@it=\textit\def\PYG@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}}
\expandafter\def\csname PYG@tok@si\endcsname{\let\PYG@bf=\textbf\def\PYG@tc##1{\textcolor[rgb]{0.73,0.40,0.53}{##1}}}
\expandafter\def\csname PYG@tok@se\endcsname{\let\PYG@bf=\textbf\def\PYG@tc##1{\textcolor[rgb]{0.73,0.40,0.13}{##1}}}
\expandafter\def\csname PYG@tok@sr\endcsname{\def\PYG@tc##1{\textcolor[rgb]{0.73,0.40,0.53}{##1}}}
\expandafter\def\csname PYG@tok@ss\endcsname{\def\PYG@tc##1{\textcolor[rgb]{0.10,0.09,0.49}{##1}}}
\expandafter\def\csname PYG@tok@sx\endcsname{\def\PYG@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}}
\expandafter\def\csname PYG@tok@m\endcsname{\def\PYG@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}}
\expandafter\def\csname PYG@tok@gh\endcsname{\let\PYG@bf=\textbf\def\PYG@tc##1{\textcolor[rgb]{0.00,0.00,0.50}{##1}}}
\expandafter\def\csname PYG@tok@gu\endcsname{\let\PYG@bf=\textbf\def\PYG@tc##1{\textcolor[rgb]{0.50,0.00,0.50}{##1}}}
\expandafter\def\csname PYG@tok@gd\endcsname{\def\PYG@tc##1{\textcolor[rgb]{0.63,0.00,0.00}{##1}}}
\expandafter\def\csname PYG@tok@gi\endcsname{\def\PYG@tc##1{\textcolor[rgb]{0.00,0.63,0.00}{##1}}}
\expandafter\def\csname PYG@tok@gr\endcsname{\def\PYG@tc##1{\textcolor[rgb]{1.00,0.00,0.00}{##1}}}
\expandafter\def\csname PYG@tok@ge\endcsname{\let\PYG@it=\textit}
\expandafter\def\csname PYG@tok@gs\endcsname{\let\PYG@bf=\textbf}
\expandafter\def\csname PYG@tok@gp\endcsname{\let\PYG@bf=\textbf\def\PYG@tc##1{\textcolor[rgb]{0.00,0.00,0.50}{##1}}}
\expandafter\def\csname PYG@tok@go\endcsname{\def\PYG@tc##1{\textcolor[rgb]{0.53,0.53,0.53}{##1}}}
\expandafter\def\csname PYG@tok@gt\endcsname{\def\PYG@tc##1{\textcolor[rgb]{0.00,0.27,0.87}{##1}}}
\expandafter\def\csname PYG@tok@err\endcsname{\def\PYG@bc##1{\setlength{\fboxsep}{0pt}\fcolorbox[rgb]{1.00,0.00,0.00}{1,1,1}{\strut ##1}}}
\expandafter\def\csname PYG@tok@kc\endcsname{\let\PYG@bf=\textbf\def\PYG@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}}
\expandafter\def\csname PYG@tok@kd\endcsname{\let\PYG@bf=\textbf\def\PYG@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}}
\expandafter\def\csname PYG@tok@kn\endcsname{\let\PYG@bf=\textbf\def\PYG@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}}
\expandafter\def\csname PYG@tok@kr\endcsname{\let\PYG@bf=\textbf\def\PYG@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}}
\expandafter\def\csname PYG@tok@bp\endcsname{\def\PYG@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}}
\expandafter\def\csname PYG@tok@fm\endcsname{\def\PYG@tc##1{\textcolor[rgb]{0.00,0.00,1.00}{##1}}}
\expandafter\def\csname PYG@tok@vc\endcsname{\def\PYG@tc##1{\textcolor[rgb]{0.10,0.09,0.49}{##1}}}
\expandafter\def\csname PYG@tok@vg\endcsname{\def\PYG@tc##1{\textcolor[rgb]{0.10,0.09,0.49}{##1}}}
\expandafter\def\csname PYG@tok@vi\endcsname{\def\PYG@tc##1{\textcolor[rgb]{0.10,0.09,0.49}{##1}}}
\expandafter\def\csname PYG@tok@vm\endcsname{\def\PYG@tc##1{\textcolor[rgb]{0.10,0.09,0.49}{##1}}}
\expandafter\def\csname PYG@tok@sa\endcsname{\def\PYG@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}}
\expandafter\def\csname PYG@tok@sb\endcsname{\def\PYG@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}}
\expandafter\def\csname PYG@tok@sc\endcsname{\def\PYG@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}}
\expandafter\def\csname PYG@tok@dl\endcsname{\def\PYG@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}}
\expandafter\def\csname PYG@tok@s2\endcsname{\def\PYG@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}}
\expandafter\def\csname PYG@tok@sh\endcsname{\def\PYG@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}}
\expandafter\def\csname PYG@tok@s1\endcsname{\def\PYG@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}}
\expandafter\def\csname PYG@tok@mb\endcsname{\def\PYG@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}}
\expandafter\def\csname PYG@tok@mf\endcsname{\def\PYG@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}}
\expandafter\def\csname PYG@tok@mh\endcsname{\def\PYG@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}}
\expandafter\def\csname PYG@tok@mi\endcsname{\def\PYG@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}}
\expandafter\def\csname PYG@tok@il\endcsname{\def\PYG@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}}
\expandafter\def\csname PYG@tok@mo\endcsname{\def\PYG@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}}
\expandafter\def\csname PYG@tok@ch\endcsname{\let\PYG@it=\textit\def\PYG@tc##1{\textcolor[rgb]{0.25,0.50,0.50}{##1}}}
\expandafter\def\csname PYG@tok@cm\endcsname{\let\PYG@it=\textit\def\PYG@tc##1{\textcolor[rgb]{0.25,0.50,0.50}{##1}}}
\expandafter\def\csname PYG@tok@cpf\endcsname{\let\PYG@it=\textit\def\PYG@tc##1{\textcolor[rgb]{0.25,0.50,0.50}{##1}}}
\expandafter\def\csname PYG@tok@c1\endcsname{\let\PYG@it=\textit\def\PYG@tc##1{\textcolor[rgb]{0.25,0.50,0.50}{##1}}}
\expandafter\def\csname PYG@tok@cs\endcsname{\let\PYG@it=\textit\def\PYG@tc##1{\textcolor[rgb]{0.25,0.50,0.50}{##1}}}
\def\PYGZbs{\char`\\}
\def\PYGZus{\char`\_}
\def\PYGZob{\char`\{}
\def\PYGZcb{\char`\}}
\def\PYGZca{\char`\^}
\def\PYGZam{\char`\&}
\def\PYGZlt{\char`\<}
\def\PYGZgt{\char`\>}
\def\PYGZsh{\char`\#}
\def\PYGZpc{\char`\%}
\def\PYGZdl{\char`\$}
\def\PYGZhy{\char`\-}
\def\PYGZsq{\char`\'}
\def\PYGZdq{\char`\"}
\def\PYGZti{\char`\~}
% for compatibility with earlier versions
\def\PYGZat{@}
\def\PYGZlb{[}
\def\PYGZrb{]}
\makeatother
\makeatletter
\def\PYGdefault@reset{\let\PYGdefault@it=\relax \let\PYGdefault@bf=\relax%
\let\PYGdefault@ul=\relax \let\PYGdefault@tc=\relax%
\let\PYGdefault@bc=\relax \let\PYGdefault@ff=\relax}
\def\PYGdefault@tok#1{\csname PYGdefault@tok@#1\endcsname}
\def\PYGdefault@toks#1+{\ifx\relax#1\empty\else%
\PYGdefault@tok{#1}\expandafter\PYGdefault@toks\fi}
\def\PYGdefault@do#1{\PYGdefault@bc{\PYGdefault@tc{\PYGdefault@ul{%
\PYGdefault@it{\PYGdefault@bf{\PYGdefault@ff{#1}}}}}}}
\def\PYGdefault#1#2{\PYGdefault@reset\PYGdefault@toks#1+\relax+\PYGdefault@do{#2}}
\expandafter\def\csname PYGdefault@tok@w\endcsname{\def\PYGdefault@tc##1{\textcolor[rgb]{0.73,0.73,0.73}{##1}}}
\expandafter\def\csname PYGdefault@tok@c\endcsname{\let\PYGdefault@it=\textit\def\PYGdefault@tc##1{\textcolor[rgb]{0.25,0.50,0.50}{##1}}}
\expandafter\def\csname PYGdefault@tok@cp\endcsname{\def\PYGdefault@tc##1{\textcolor[rgb]{0.74,0.48,0.00}{##1}}}
\expandafter\def\csname PYGdefault@tok@k\endcsname{\let\PYGdefault@bf=\textbf\def\PYGdefault@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}}
\expandafter\def\csname PYGdefault@tok@kp\endcsname{\def\PYGdefault@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}}
\expandafter\def\csname PYGdefault@tok@kt\endcsname{\def\PYGdefault@tc##1{\textcolor[rgb]{0.69,0.00,0.25}{##1}}}
\expandafter\def\csname PYGdefault@tok@o\endcsname{\def\PYGdefault@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}}
\expandafter\def\csname PYGdefault@tok@ow\endcsname{\let\PYGdefault@bf=\textbf\def\PYGdefault@tc##1{\textcolor[rgb]{0.67,0.13,1.00}{##1}}}
\expandafter\def\csname PYGdefault@tok@nb\endcsname{\def\PYGdefault@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}}
\expandafter\def\csname PYGdefault@tok@nf\endcsname{\def\PYGdefault@tc##1{\textcolor[rgb]{0.00,0.00,1.00}{##1}}}
\expandafter\def\csname PYGdefault@tok@nc\endcsname{\let\PYGdefault@bf=\textbf\def\PYGdefault@tc##1{\textcolor[rgb]{0.00,0.00,1.00}{##1}}}
\expandafter\def\csname PYGdefault@tok@nn\endcsname{\let\PYGdefault@bf=\textbf\def\PYGdefault@tc##1{\textcolor[rgb]{0.00,0.00,1.00}{##1}}}
\expandafter\def\csname PYGdefault@tok@ne\endcsname{\let\PYGdefault@bf=\textbf\def\PYGdefault@tc##1{\textcolor[rgb]{0.82,0.25,0.23}{##1}}}
\expandafter\def\csname PYGdefault@tok@nv\endcsname{\def\PYGdefault@tc##1{\textcolor[rgb]{0.10,0.09,0.49}{##1}}}
\expandafter\def\csname PYGdefault@tok@no\endcsname{\def\PYGdefault@tc##1{\textcolor[rgb]{0.53,0.00,0.00}{##1}}}
\expandafter\def\csname PYGdefault@tok@nl\endcsname{\def\PYGdefault@tc##1{\textcolor[rgb]{0.63,0.63,0.00}{##1}}}
\expandafter\def\csname PYGdefault@tok@ni\endcsname{\let\PYGdefault@bf=\textbf\def\PYGdefault@tc##1{\textcolor[rgb]{0.60,0.60,0.60}{##1}}}
\expandafter\def\csname PYGdefault@tok@na\endcsname{\def\PYGdefault@tc##1{\textcolor[rgb]{0.49,0.56,0.16}{##1}}}
\expandafter\def\csname PYGdefault@tok@nt\endcsname{\let\PYGdefault@bf=\textbf\def\PYGdefault@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}}
\expandafter\def\csname PYGdefault@tok@nd\endcsname{\def\PYGdefault@tc##1{\textcolor[rgb]{0.67,0.13,1.00}{##1}}}
\expandafter\def\csname PYGdefault@tok@s\endcsname{\def\PYGdefault@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}}
\expandafter\def\csname PYGdefault@tok@sd\endcsname{\let\PYGdefault@it=\textit\def\PYGdefault@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}}
\expandafter\def\csname PYGdefault@tok@si\endcsname{\let\PYGdefault@bf=\textbf\def\PYGdefault@tc##1{\textcolor[rgb]{0.73,0.40,0.53}{##1}}}
\expandafter\def\csname PYGdefault@tok@se\endcsname{\let\PYGdefault@bf=\textbf\def\PYGdefault@tc##1{\textcolor[rgb]{0.73,0.40,0.13}{##1}}}
\expandafter\def\csname PYGdefault@tok@sr\endcsname{\def\PYGdefault@tc##1{\textcolor[rgb]{0.73,0.40,0.53}{##1}}}
\expandafter\def\csname PYGdefault@tok@ss\endcsname{\def\PYGdefault@tc##1{\textcolor[rgb]{0.10,0.09,0.49}{##1}}}
\expandafter\def\csname PYGdefault@tok@sx\endcsname{\def\PYGdefault@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}}
\expandafter\def\csname PYGdefault@tok@m\endcsname{\def\PYGdefault@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}}
\expandafter\def\csname PYGdefault@tok@gh\endcsname{\let\PYGdefault@bf=\textbf\def\PYGdefault@tc##1{\textcolor[rgb]{0.00,0.00,0.50}{##1}}}
\expandafter\def\csname PYGdefault@tok@gu\endcsname{\let\PYGdefault@bf=\textbf\def\PYGdefault@tc##1{\textcolor[rgb]{0.50,0.00,0.50}{##1}}}
\expandafter\def\csname PYGdefault@tok@gd\endcsname{\def\PYGdefault@tc##1{\textcolor[rgb]{0.63,0.00,0.00}{##1}}}
\expandafter\def\csname PYGdefault@tok@gi\endcsname{\def\PYGdefault@tc##1{\textcolor[rgb]{0.00,0.63,0.00}{##1}}}
\expandafter\def\csname PYGdefault@tok@gr\endcsname{\def\PYGdefault@tc##1{\textcolor[rgb]{1.00,0.00,0.00}{##1}}}
\expandafter\def\csname PYGdefault@tok@ge\endcsname{\let\PYGdefault@it=\textit}
\expandafter\def\csname PYGdefault@tok@gs\endcsname{\let\PYGdefault@bf=\textbf}
\expandafter\def\csname PYGdefault@tok@gp\endcsname{\let\PYGdefault@bf=\textbf\def\PYGdefault@tc##1{\textcolor[rgb]{0.00,0.00,0.50}{##1}}}
\expandafter\def\csname PYGdefault@tok@go\endcsname{\def\PYGdefault@tc##1{\textcolor[rgb]{0.53,0.53,0.53}{##1}}}
\expandafter\def\csname PYGdefault@tok@gt\endcsname{\def\PYGdefault@tc##1{\textcolor[rgb]{0.00,0.27,0.87}{##1}}}
\expandafter\def\csname PYGdefault@tok@err\endcsname{\def\PYGdefault@bc##1{\setlength{\fboxsep}{0pt}\fcolorbox[rgb]{1.00,0.00,0.00}{1,1,1}{\strut ##1}}}
\expandafter\def\csname PYGdefault@tok@kc\endcsname{\let\PYGdefault@bf=\textbf\def\PYGdefault@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}}
\expandafter\def\csname PYGdefault@tok@kd\endcsname{\let\PYGdefault@bf=\textbf\def\PYGdefault@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}}
\expandafter\def\csname PYGdefault@tok@kn\endcsname{\let\PYGdefault@bf=\textbf\def\PYGdefault@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}}
\expandafter\def\csname PYGdefault@tok@kr\endcsname{\let\PYGdefault@bf=\textbf\def\PYGdefault@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}}
\expandafter\def\csname PYGdefault@tok@bp\endcsname{\def\PYGdefault@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}}
\expandafter\def\csname PYGdefault@tok@fm\endcsname{\def\PYGdefault@tc##1{\textcolor[rgb]{0.00,0.00,1.00}{##1}}}
\expandafter\def\csname PYGdefault@tok@vc\endcsname{\def\PYGdefault@tc##1{\textcolor[rgb]{0.10,0.09,0.49}{##1}}}
\expandafter\def\csname PYGdefault@tok@vg\endcsname{\def\PYGdefault@tc##1{\textcolor[rgb]{0.10,0.09,0.49}{##1}}}
\expandafter\def\csname PYGdefault@tok@vi\endcsname{\def\PYGdefault@tc##1{\textcolor[rgb]{0.10,0.09,0.49}{##1}}}
\expandafter\def\csname PYGdefault@tok@vm\endcsname{\def\PYGdefault@tc##1{\textcolor[rgb]{0.10,0.09,0.49}{##1}}}
\expandafter\def\csname PYGdefault@tok@sa\endcsname{\def\PYGdefault@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}}
\expandafter\def\csname PYGdefault@tok@sb\endcsname{\def\PYGdefault@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}}
\expandafter\def\csname PYGdefault@tok@sc\endcsname{\def\PYGdefault@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}}
\expandafter\def\csname PYGdefault@tok@dl\endcsname{\def\PYGdefault@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}}
\expandafter\def\csname PYGdefault@tok@s2\endcsname{\def\PYGdefault@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}}
\expandafter\def\csname PYGdefault@tok@sh\endcsname{\def\PYGdefault@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}}
\expandafter\def\csname PYGdefault@tok@s1\endcsname{\def\PYGdefault@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}}
\expandafter\def\csname PYGdefault@tok@mb\endcsname{\def\PYGdefault@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}}
\expandafter\def\csname PYGdefault@tok@mf\endcsname{\def\PYGdefault@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}}
\expandafter\def\csname PYGdefault@tok@mh\endcsname{\def\PYGdefault@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}}
\expandafter\def\csname PYGdefault@tok@mi\endcsname{\def\PYGdefault@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}}
\expandafter\def\csname PYGdefault@tok@il\endcsname{\def\PYGdefault@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}}
\expandafter\def\csname PYGdefault@tok@mo\endcsname{\def\PYGdefault@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}}
\expandafter\def\csname PYGdefault@tok@ch\endcsname{\let\PYGdefault@it=\textit\def\PYGdefault@tc##1{\textcolor[rgb]{0.25,0.50,0.50}{##1}}}
\expandafter\def\csname PYGdefault@tok@cm\endcsname{\let\PYGdefault@it=\textit\def\PYGdefault@tc##1{\textcolor[rgb]{0.25,0.50,0.50}{##1}}}
\expandafter\def\csname PYGdefault@tok@cpf\endcsname{\let\PYGdefault@it=\textit\def\PYGdefault@tc##1{\textcolor[rgb]{0.25,0.50,0.50}{##1}}}
\expandafter\def\csname PYGdefault@tok@c1\endcsname{\let\PYGdefault@it=\textit\def\PYGdefault@tc##1{\textcolor[rgb]{0.25,0.50,0.50}{##1}}}
\expandafter\def\csname PYGdefault@tok@cs\endcsname{\let\PYGdefault@it=\textit\def\PYGdefault@tc##1{\textcolor[rgb]{0.25,0.50,0.50}{##1}}}
\def\PYGdefaultZbs{\char`\\}
\def\PYGdefaultZus{\char`\_}
\def\PYGdefaultZob{\char`\{}
\def\PYGdefaultZcb{\char`\}}
\def\PYGdefaultZca{\char`\^}
\def\PYGdefaultZam{\char`\&}
\def\PYGdefaultZlt{\char`\<}
\def\PYGdefaultZgt{\char`\>}
\def\PYGdefaultZsh{\char`\#}
\def\PYGdefaultZpc{\char`\%}
\def\PYGdefaultZdl{\char`\$}
\def\PYGdefaultZhy{\char`\-}
\def\PYGdefaultZsq{\char`\'}
\def\PYGdefaultZdq{\char`\"}
\def\PYGdefaultZti{\char`\~}
% for compatibility with earlier versions
\def\PYGdefaultZat{@}
\def\PYGdefaultZlb{[}
\def\PYGdefaultZrb{]}
\makeatother
This source diff could not be displayed because it is too large. You can view the blob instead.
{
"cells": [],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
#+TITLE: Your title #+Title: Basic analysis of the SARS-CoV-2 (Covid-19) pandemic
#+AUTHOR: Your name #+Language: en
#+DATE: Today's date #+Author: Louis Boulanger
#+LANGUAGE: en
# #+PROPERTY: header-args :eval never-export * Foreword
In order to process this computational document, you will need to install:
#+HTML_HEAD: <link rel="stylesheet" type="text/css" href="http://www.pirilampo.org/styles/readtheorg/css/htmlize.css"/> - *Emacs* 25.0 or greater (no guarantees on previous versions of Emacs)
#+HTML_HEAD: <link rel="stylesheet" type="text/css" href="http://www.pirilampo.org/styles/readtheorg/css/readtheorg.css"/> - *Python* 3.6.0 or greater
#+HTML_HEAD: <script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.3/jquery.min.js"></script> - *R* 3.4
#+HTML_HEAD: <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/js/bootstrap.min.js"></script>
#+HTML_HEAD: <script type="text/javascript" src="http://www.pirilampo.org/styles/lib/js/jquery.stickytableheaders.js"></script> #+Begin_src python :results output
#+HTML_HEAD: <script type="text/javascript" src="http://www.pirilampo.org/styles/readtheorg/js/readtheorg.js"></script> import sys
if sys.version_info.major < 3 or sys.version_info.minor < 6:
* Some explanations print("Please use Python 3.6 (or higher)!")
#+End_src
This is an org-mode document with code examples in R. Once opened in
Emacs, this document can easily be exported to HTML, PDF, and Office #+RESULTS:
formats. For more information on org-mode, see
https://orgmode.org/guide/. #+Begin_src elisp :results output
(unless (featurep 'ob-python)
When you type the shortcut =C-c C-e h o=, this document will be (print "Please activate python in org-babel (org-babel-do-lnaguages)!"))
exported as HTML. All the code in it will be re-executed, and the
results will be retrieved and included into the exported document. If (unless (featurep 'ob-R)
you do not want to re-execute all code each time, you can delete the # (print "Please activate R in org-babel (org-babel-do-lnaguages)!"))
and the space before ~#+PROPERTY:~ in the header of this document. #+End_src
Like we showed in the video, R code is included as follows (and is #+RESULTS:
exxecuted by typing ~C-c C-c~):
* Introduction
#+begin_src R :results output :exports both The goal of this document is to provide an analysis of the Coronavirus
print("Hello world!") pandemic numbers, in particular the number of cases for a select
#+end_src number of countries since the beginning of the pandemic. The data is
provided by the [[https://systems.jhu.edu][John Hopkins University Center for Systems Science and
Engineering (JHU CSSE)]], and freely available on [[https://github.com/CSSEGISandData/COVID-19][GitHub]].
The analysis focuses primarily on the
=time_series_covid19_confirmed_global.csv= file, containing time series
for the confirmed cases for each state/province of each affected
country.
Following the data pre-processing, the analysis will show the
evolution of the cases in:
- Belgium
- China (all provinces except Hong-Kong)
- Hong-Kong
- France (except DOM/TOMs)
- Germany
- Iran
- Italy
- Japan
- South Korea
- The Netherlands (except colonies)
- Portugal
- Spain
- United Kingdom (except colonies)
- United States of America
* Data pre-processing
The data containing the amount of confirmed cases of Covid-19 is taken
from the aforementioned JHU CSSE GitHub repository; specifically, the
file used is:
#+Name: data-url
https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv
. According to the information on the repository, the data is updated
every day at 23:59 UTC. The file contains the following fields:
#+ATTR_LATEX: :environment tabularx :width \textwidth :align llX
| Column name | Type | Description |
|----------------+---------------------+-----------------------------------------------------------------------------|
| Province/State | Text (can be empty) | The state or province of a country, if any. |
| Country/Region | Text | A country or region affected by the Covid-19 pandemic. |
| Lat | Floating number | The latitude of the general location of the country or region |
| Long | Floating number | The longitude of the general location of the country or region |
| <mm/dd/yy> | Integer | The number of cases for the specified country/province on the specified day |
Each of the columns corresponding for the data of a date are written
in the American date format of month/day/year.
** Downloading the data
In order to save time and resources, the data is downloaded only if:
- the file has not been downloaded before,
- or if the file is obsolete, as in the last day recorded is in the
past.
Particular care must be taken for the row containing "Korea, South":
since the document uses commas to separate the fields, we need to make
sure that we don't separate the country name into two separate fields.
#+Begin_src python :session :results output :var data_url=data-url
from urllib.request import urlopen
import datetime
temp_file_name = 'data.csv'
# Downloads the data from GitHub
def download_data():
data = urlopen(data_url).read()
with open(temp_file_name, 'wb') as f:
f.write(data)
# Tries to read data from the local file and returns the content
# parsed as a series of lines
def read_data():
try:
with open(temp_file_name, 'r') as f:
data = f.read()
lines = data.split('\n')
table = [line.replace("\"Korea, South\"", "South Korea").split(',') for line in lines]
return table[:-2] # Removing the empty last line
except IOError as e:
raise e
# Decides whether or not to download the file from GitHub based on the
# presence of a local file and the last recorded date in the local
# file
def try_download_data():
data = None
try:
data = read_data()
last_date = datetime.datetime.strptime(data[0][-1], "%m/%d/%y")
today = datetime.datetime.today()
if today - last_date > datetime.timedelta(day=1):
print("Data obsolete, downloading new data...")
download_data()
data = read_data()
except IOError:
download_data()
data = read_data()
finally:
return data
data = try_download_data()
#+End_src
#+RESULTS:
: Python 3.8.6 (default, Sep 30 2020, 04:00:38)
: [GCC 10.2.0] on linux
: Type "help", "copyright", "credits" or "license" for more information.
: >>> python.el: native completion setup loaded
Let's print the first five lines for the last two dates.
#+Begin_src python :session :results value :exports results
[[x for x in y[:5] + y[-2:]] for y in data[:1]] + [None] + [[x for x in y[:5] + y[-2:]] for y in data[1:5]]
#+End_src
#+RESULTS:
| Province/State | Country/Region | Lat | Long | 1/22/20 | 12/14/20 | 12/15/20 |
|----------------+----------------+----------+-----------+---------+----------+----------|
| | Afghanistan | 33.93911 | 67.709953 | 0 | 48718 | 48952 |
| | Albania | 41.1533 | 20.1683 | 0 | 49191 | 50000 |
| | Algeria | 28.0339 | 1.6596 | 0 | 92597 | 93065 |
| | Andorra | 42.5063 | 1.5218 | 0 | 7382 | 7382 |
** Checking for missing data
The data is generated automatically, but it's never too prudent to
check if some data is malformed or missing. We can assume that the
first 4 rows, containing information about the countries and
provinces, are correct, since they are the key of the real data.
#+Begin_src python :session :results output
valid_data = []
valid_data.append(data[0])
for row in data[1:]:
missing = any([value == '' for value in row[4:]])
if missing:
print(row)
else:
valid_data.append(row)
#+End_src
#+RESULTS:
** Extracting the relevant information
As mentioned in the introduction, we only care for a few countries in
the list; we will filter the rows in which we are not
interested. There is also little use for the =Long= and =Lat= columns, so
we will drop them.
We also need to group the different Chinese provinces into one row,
and add the values together, while counting Hong Kong as a separate
country.
#+Begin_src python :session :results output
target_countries = [
[None, "Belgium"],
["Hong Kong", None],
["Hong Kong", "China"], # China without Hong Kong
[None, "France"],
[None, "Germany"],
[None, "Iran"],
[None, "Italy"],
[None, "Japan"],
[None, "South Korea"],
[None, "Netherlands"],
[None, "United Kingdom"],
[None, "US"]
]
def is_target_country(province, country):
specific_province = lambda t, p, c: t[0] == p and t[1] is None
without_specific_province = lambda t, p, c: t[0] is not None and t[0] != p and t[1] == c
without_provinces = lambda t, p, c: t[0] is None and p == "" and t[1] == c
check = lambda t, p, c: specific_province(t, p, c) or without_specific_province(t, p, c) or without_provinces(t, p, c)
res = [check(target, province, country) for target in target_countries]
return any(res)
extracted_data = []
extracted_data.append([data[0][1]] + data[0][4:])
for row in valid_data[1:]:
if "Korea" in row[1]:
print(row[1])
if is_target_country(row[0], row[1]):
# print(row[0])
if row[0] == "Hong Kong":
extracted_data.append(["Hong Kong"] + row[4:])
elif row[1] == "China":
try:
idx = [row[0] for row in extracted_data].index("China")
extracted_data[idx][1:] = [int(a) + int(b) for a, b in zip(extracted_data[-1][1:], row[4:])]
except ValueError:
extracted_data.append(["China"] + row[4:])
else:
extracted_data.append([row[1]] + row[4:])
#+End_src
#+RESULTS:
: South Korea
Let's look at the last five days of the countries we selected.
#+Begin_src python :session :results value :exports results
# [[x for x in y[:1] + y[-5:]] for y in extracted_data]
[extracted_data[0][:1] + extracted_data[0][-5:]] + [None] + [[x for x in y[:1] + y[-5:]] for y in extracted_data[1:]]
#+End_src
#+RESULTS:
| Country/Region | 12/11/20 | 12/12/20 | 12/13/20 | 12/14/20 | 12/15/20 |
|----------------+----------+----------+----------+----------+----------|
| Belgium | 600397 | 603159 | 608137 | 609211 | 611422 |
| China | 8673 | 8742 | 8838 | 8920 | 9018 |
| Hong Kong | 7377 | 7446 | 7541 | 7623 | 7721 |
| France | 2350923 | 2350793 | 2376228 | 2379291 | 2390419 |
| Germany | 1314309 | 1336101 | 1350810 | 1357261 | 1391086 |
| Iran | 1092407 | 1100818 | 1108269 | 1115770 | 1123474 |
| Italy | 1805873 | 1825775 | 1843712 | 1855737 | 1870576 |
| Japan | 175310 | 178272 | 180639 | 182311 | 184752 |
| South Korea | 41736 | 42766 | 43484 | 44364 | 45442 |
| Netherlands | 594523 | 603603 | 613487 | 621944 | 628577 |
| US | 15913292 | 16134237 | 16325615 | 16518420 | 16716777 |
| United Kingdom | 1809455 | 1830956 | 1849403 | 1869666 | 1888116 |
** Date conversion
The dates are currently expressed using the American format
=mm/dd/yy=. We will need to convert them into proper dates in order to
analyze the data further.
#+Begin_src python :session :results none
extracted_data[0][1:] = [datetime.datetime.strptime(date, "%m/%d/%y") for date in extracted_data[0][1:]]
#+End_src
Now, let's take a look at the last two days again:
#+Begin_src python :session :results value :exports results
# [[str(x) for x in y[:1] + y[-5:]] for y in extracted_data]
[extracted_data[0][:1] + [str(x) for x in extracted_data[0][-2:]]] + [None] + [[str(x) for x in y[:1] + y[-2:]] for y in extracted_data[1:]]
#+End_src
#+RESULTS:
| Country/Region | 2020-12-14 00:00:00 | 2020-12-15 00:00:00 |
|----------------+---------------------+---------------------|
| Belgium | 609211 | 611422 |
| China | 8920 | 9018 |
| Hong Kong | 7623 | 7721 |
| France | 2379291 | 2390419 |
| Germany | 1357261 | 1391086 |
| Iran | 1115770 | 1123474 |
| Italy | 1855737 | 1870576 |
| Japan | 182311 | 184752 |
| South Korea | 44364 | 45442 |
| Netherlands | 621944 | 628577 |
| US | 16518420 | 16716777 |
| United Kingdom | 1869666 | 1888116 |
** Conversion into a regular table
Right now, each date is represented as a column; we will flip the
table and have the dates as rows, and the countries as columns.
#+Begin_src python :session :results none
flipped_data = [[str(row[i]) for row in extracted_data] for i in range(0, len(extracted_data[0]))]
flipped_data[0][0] = "Date"
flipped_data[0] = [s.replace(" ", "") for s in flipped_data[0]]
#+End_src
Let's look at the data for a few countries now:
#+Begin_src python :session :results value :exports results
[flipped_data[0][:4]] + [None] + [x[:4] for x in flipped_data[-5:]]
#+End_src
#+RESULTS:
| Date | Belgium | China | HongKong |
|---------------------+---------+-------+----------|
| 2020-12-11 00:00:00 | 600397 | 8673 | 7377 |
| 2020-12-12 00:00:00 | 603159 | 8742 | 7446 |
| 2020-12-13 00:00:00 | 608137 | 8838 | 7541 |
| 2020-12-14 00:00:00 | 609211 | 8920 | 7623 |
| 2020-12-15 00:00:00 | 611422 | 9018 | 7721 |
This format is much more usable now.
** Transferring the data from Python to R
We will switch from Python to R for the analysis, since R is a much
better tool than Python for that. We will use org-mode's data exchange
utility in order to transfer the data.
#+Name: data-for-R
#+Begin_src python :session :results silent
[flipped_data[0], None] + flipped_data[1:]
#+End_src
In R, we get the data in the form of a data-frame, and the strings must
be converted.
#+Begin_src R :session *R* :results output :var data=data-for-R :exports both
data$Date <- as.Date(data$Date)
summary(data)
#+End_src
#+RESULTS: #+RESULTS:
: [1] "Hello world!" #+begin_example
Date Belgium China HongKong
Min. :2020-01-22 Min. : 0 Min. : 10 Min. : 0
1st Qu.:2020-04-13 1st Qu.: 30589 1st Qu.:2276 1st Qu.:1009
Median :2020-07-04 Median : 61838 Median :2527 Median :1258
Mean :2020-07-04 Mean :132884 Mean :3903 Mean :2682
3rd Qu.:2020-09-24 3rd Qu.:108768 3rd Qu.:6338 3rd Qu.:5056
Max. :2020-12-15 Max. :611422 Max. :9018 Max. :7721
France Germany Iran Italy
Min. : 0 Min. : 0 Min. : 0 Min. : 0
1st Qu.: 110836 1st Qu.: 130072 1st Qu.: 73303 1st Qu.: 159516
Median : 197994 Median : 197198 Median : 237878 Median : 241419
Mean : 494998 Mean : 278940 Mean : 301025 Mean : 362680
3rd Qu.: 513732 3rd Qu.: 281346 3rd Qu.: 436319 3rd Qu.: 304323
Max. :2390419 Max. :1391086 Max. :1123474 Max. :1870576
Japan SouthKorea Netherlands US
Min. : 2 Min. : 1 Min. : 0 Min. : 1
1st Qu.: 7773 1st Qu.:10537 1st Qu.: 26551 1st Qu.: 585518
Median : 19461 Median :13091 Median : 50548 Median : 2833290
Mean : 45932 Mean :15651 Mean :118150 Mean : 4329085
3rd Qu.: 80490 3rd Qu.:23455 3rd Qu.:103141 3rd Qu.: 6972152
Max. :184752 Max. :45442 Max. :628577 Max. :16716777
UnitedKingdom
Min. : 0
1st Qu.: 97068
Median : 284900
Mean : 420031
3rd Qu.: 416363
Max. :1888116
#+end_example
And now the same but in an R session. This is the most frequent ** Quick analysis of the data
situation, because R is really an interactive language. With a Now, we can inspect the data and look at the curve for a country, for
session, R's state, i.e. the values of all the variables, remains example, France.
persistent from one code block to the next. The code is still executed
using ~C-c C-c~.
#+begin_src R :results output :session *R* :exports both #+Begin_src R :session *R* :results output graphics file :file first_plot.png :exports both
summary(cars) plot(data[,'Date'], data[,'France'], xlab="Date",
#+end_src ylab="Confirmed cases in Metropolitan France, cumulative")
#+End_src
#+RESULTS: #+RESULTS:
: speed dist [[file:first_plot.png]]
: Min. : 4.0 Min. : 2.00
: 1st Qu.:12.0 1st Qu.: 26.00 * Comparative analysis of the accumulated cases in the selected countries
: Median :15.0 Median : 36.00 ** Linear scale
: Mean :15.4 Mean : 42.98 Let's build a graph showing the confirmed cases for all of the
: 3rd Qu.:19.0 3rd Qu.: 56.00 selected countries, on the same graph. The goal of such a graphic is
: Max. :25.0 Max. :120.00 to compare the countries, determine outliers from a glance, and see
the general shape of the phenomenon across different places in the
Finally, an example for graphical output: world. An /interactive/ graph might be easier to read and parse, such as
#+begin_src R :results output graphics :file "./cars.png" :exports results :width 600 :height 400 :session *R* the excellent ones in [[https://www.ft.com/content/a2901ce8-5eb7-4633-b89c-cbdf5b386938][the Financial Times website]].
plot(cars) #+Begin_src R :session *R* :results output graphics file :file comparative.png :width 1000 :exports both
#+end_src library(tidyverse)
library(ggrepel)
library(scales)
last_date <- data %>%
gather(Country, Cases, Belgium:UnitedKingdom) %>%
dplyr::filter(Date == tail(data$Date, 1), Country %in% c("US", "France", "Germany", "China"))
# Color-blind friendly palette taken from
# https://bconnelly.net/posts/creating_colorblind-friendly_figures/
# with added grayscale values
palette <- c("#000000", "#E69F00", "#56B4E9", "#009E73",
"#292929", "#555555", "#999999", "#BBBBBB",
"#F0E442", "#0072B2", "#D55E00", "#CC79A7")
data %>%
gather(Country, Cases, Belgium:UnitedKingdom) %>%
ggplot(aes(x=Date, y=Cases, colour=Country)) +
geom_line() +
scale_x_date(breaks = pretty_breaks(8), labels = date_format("%b %Y")) +
scale_color_manual(values=palette) +
scale_y_continuous(labels = comma_format(), breaks = pretty_breaks(8)) +
geom_text_repel(data=last_date, aes(label = Country)) +
ggtitle("Cumulative confirmed cases of Covid-19", subtitle="In selected countries since the beginning of 2020") +
theme_bw()
#+End_src
#+RESULTS:
[[file:comparative.png]]
This graph is dominated by the spike of the confirmed cases in the
United States, which outweighs the other countries'. It is also clear
that the second wave, which started for the selected countries in
mid-October of 2020, was significantly larger than the first wave.
** Logarithmic scale
Let's now take a look at the same graph, but with a logarithmic scale.
#+Begin_src R :session *R* :results output graphics file :file comparative_log.png :width 1000 :exports both
last_date <- data %>%
gather(Country, Cases, Belgium:UnitedKingdom) %>%
dplyr::filter(Date == tail(data$Date, 1))
data %>%
gather(Country, Cases, Belgium:UnitedKingdom) %>%
ggplot(aes(x=Date, y=Cases, colour=Country)) +
geom_line() +
scale_x_date(breaks = pretty_breaks(8), labels = date_format("%b %Y")) +
scale_color_manual(values=palette) +
scale_y_continuous(trans="log10", labels = comma_format()) +
geom_text_repel(data=last_date, aes(label = Country)) +
ggtitle("Cumulative confirmed cases of Covid-19", subtitle="In selected countries since the beginning of 2020, on a logarithmic scale") +
theme_bw()
#+End_src
#+RESULTS: #+RESULTS:
[[file:./cars.png]] [[file:comparative_log.png]]
Note the parameter ~:exports results~, which indicates that the code This graph shows in better details the different waves of infection
will not appear in the exported document. We recommend that in the among the selected countries. We can clearly see how the pandemic
context of this MOOC, you always leave this parameter setting as started in China, starting in January, and was mitigated in
~:exports both~, because we want your analyses to be perfectly mid-February; while the other countries experienced the rapid growth
transparent and reproducible. of the cases in March. The period of international lockdown during the
spring and summer is visible, as well the different waves of
Watch out: the figure generated by the code block is /not/ stored in infections later in the year.
the org document. It's a plain file, here named ~cars.png~. You have
to commit it explicitly if you want your analysis to be legible and
understandable on GitLab.
Finally, don't forget that we provide in the resource section of this
MOOC a configuration with a few keyboard shortcuts that allow you to
quickly create code blocks in R by typing ~<r~ or ~<R~ followed by
~Tab~.
Now it's your turn! You can delete all this information and replace it
by your computational document.
% Created 2020-12-19 Sat 02:39
% Intended LaTeX compiler: xelatex
\documentclass[11pt]{article}
\usepackage{graphicx}
\usepackage{grffile}
\usepackage{longtable}
\usepackage{wrapfig}
\usepackage{rotating}
\usepackage[normalem]{ulem}
\usepackage{amsmath}
\usepackage{textcomp}
\usepackage{amssymb}
\usepackage{capt-of}
\usepackage{hyperref}
\usepackage{minted}
\author{Louis Boulanger}
\date{\today}
\title{Basic analysis of the SARS-CoV-2 (Covid-19) pandemic}
\hypersetup{
pdfauthor={Louis Boulanger},
pdftitle={Basic analysis of the SARS-CoV-2 (Covid-19) pandemic},
pdfkeywords={},
pdfsubject={},
pdfcreator={Emacs 27.1 (Org mode 9.3)},
pdflang={English}}
\begin{document}
\maketitle
\tableofcontents
\section{Foreword}
\label{sec:org99ae971}
In order to process this computational document, you will need to install:
\begin{itemize}
\item \textbf{Emacs} 25.0 or greater (no guarantees on previous versions of Emacs)
\item \textbf{Python} 3.6.0 or greater
\item \textbf{R} 3.4
\end{itemize}
\begin{minted}[breaklines=true,breakanywhere=true]{python}
import sys
if sys.version_info.major < 3 or sys.version_info.minor < 6:
print("Please use Python 3.6 (or higher)!")
\end{minted}
\begin{minted}[breaklines=true,breakanywhere=true]{elisp}
(unless (featurep 'ob-python)
(print "Please activate python in org-babel (org-babel-do-lnaguages)!"))
(unless (featurep 'ob-R)
(print "Please activate R in org-babel (org-babel-do-lnaguages)!"))
\end{minted}
\section{Introduction}
\label{sec:org0b75636}
The goal of this document is to provide an analysis of the Coronavirus
pandemic numbers, in particular the number of cases for a select
number of countries since the beginning of the pandemic. The data is
provided by the \href{https://systems.jhu.edu}{John Hopkins University Center for Systems Science and
Engineering (JHU CSSE)}, and freely available on \href{https://github.com/CSSEGISandData/COVID-19}{GitHub}.
The analysis focuses primarily on the
\texttt{time\_series\_covid19\_confirmed\_global.csv} file, containing time series
for the confirmed cases for each state/province of each affected
country.
Following the data pre-processing, the analysis will show the
evolution of the cases in:
\begin{itemize}
\item Belgium
\item China (all provinces except Hong-Kong)
\item Hong-Kong
\item France (except DOM/TOMs)
\item Germany
\item Iran
\item Italy
\item Japan
\item South Korea
\item The Netherlands (except colonies)
\item Portugal
\item Spain
\item United Kingdom (except colonies)
\item United States of America
\end{itemize}
\section{Data pre-processing}
\label{sec:orgdf640b3}
The data containing the amount of confirmed cases of Covid-19 is taken
from the aforementioned JHU CSSE GitHub repository; specifically, the
file used is:
\url{https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse\_covid\_19\_data/csse\_covid\_19\_time\_series/time\_series\_covid19\_confirmed\_global.csv}
. According to the information on the repository, the data is updated
every day at 23:59 UTC. The file contains the following fields:
\begin{center}
\begin{tabularx}{\textwidth}{llX}
Column name & Type & Description\\
\hline
Province/State & Text (can be empty) & The state or province of a country, if any.\\
Country/Region & Text & A country or region affected by the Covid-19 pandemic.\\
Lat & Floating number & The latitude of the general location of the country or region\\
Long & Floating number & The longitude of the general location of the country or region\\
<mm/dd/yy> & Integer & The number of cases for the specified country/province on the specified day\\
\end{tabularx}
\end{center}
Each of the columns corresponding for the data of a date are written
in the American date format of month/day/year.
\subsection{Downloading the data}
\label{sec:org11c5886}
In order to save time and resources, the data is downloaded only if:
\begin{itemize}
\item the file has not been downloaded before,
\item or if the file is obsolete, as in the last day recorded is in the
past.
\end{itemize}
Particular care must be taken for the row containing "Korea, South":
since the document uses commas to separate the fields, we need to make
sure that we don't separate the country name into two separate fields.
\begin{minted}[breaklines=true,breakanywhere=true]{python}
from urllib.request import urlopen
import datetime
temp_file_name = 'data.csv'
# Downloads the data from GitHub
def download_data():
data = urlopen(data_url).read()
with open(temp_file_name, 'wb') as f:
f.write(data)
# Tries to read data from the local file and returns the content
# parsed as a series of lines
def read_data():
try:
with open(temp_file_name, 'r') as f:
data = f.read()
lines = data.split('\n')
table = [line.replace("\"Korea, South\"", "South Korea").split(',') for line in lines]
return table[:-2] # Removing the empty last line
except IOError as e:
raise e
# Decides whether or not to download the file from GitHub based on the
# presence of a local file and the last recorded date in the local
# file
def try_download_data():
data = None
try:
data = read_data()
last_date = datetime.datetime.strptime(data[0][-1], "%m/%d/%y")
today = datetime.datetime.today()
if today - last_date > datetime.timedelta(day=1):
print("Data obsolete, downloading new data...")
download_data()
data = read_data()
except IOError:
download_data()
data = read_data()
finally:
return data
data = try_download_data()
\end{minted}
Let's print the first five lines for the last two dates.
\begin{center}
\begin{tabular}{llrrrrr}
Province/State & Country/Region & Lat & Long & 1/22/20 & 12/14/20 & 12/15/20\\
\hline
& Afghanistan & 33.93911 & 67.709953 & 0 & 48718 & 48952\\
& Albania & 41.1533 & 20.1683 & 0 & 49191 & 50000\\
& Algeria & 28.0339 & 1.6596 & 0 & 92597 & 93065\\
& Andorra & 42.5063 & 1.5218 & 0 & 7382 & 7382\\
\end{tabular}
\end{center}
\subsection{Checking for missing data}
\label{sec:orgd84b1ee}
The data is generated automatically, but it's never too prudent to
check if some data is malformed or missing. We can assume that the
first 4 rows, containing information about the countries and
provinces, are correct, since they are the key of the real data.
\begin{minted}[breaklines=true,breakanywhere=true]{python}
valid_data = []
valid_data.append(data[0])
for row in data[1:]:
missing = any([value == '' for value in row[4:]])
if missing:
print(row)
else:
valid_data.append(row)
\end{minted}
\subsection{Extracting the relevant information}
\label{sec:org59c8f3a}
As mentioned in the introduction, we only care for a few countries in
the list; we will filter the rows in which we are not
interested. There is also little use for the \texttt{Long} and \texttt{Lat} columns, so
we will drop them.
We also need to group the different Chinese provinces into one row,
and add the values together, while counting Hong Kong as a separate
country.
\begin{minted}[breaklines=true,breakanywhere=true]{python}
target_countries = [
[None, "Belgium"],
["Hong Kong", None],
["Hong Kong", "China"], # China without Hong Kong
[None, "France"],
[None, "Germany"],
[None, "Iran"],
[None, "Italy"],
[None, "Japan"],
[None, "South Korea"],
[None, "Netherlands"],
[None, "United Kingdom"],
[None, "US"]
]
def is_target_country(province, country):
specific_province = lambda t, p, c: t[0] == p and t[1] is None
without_specific_province = lambda t, p, c: t[0] is not None and t[0] != p and t[1] == c
without_provinces = lambda t, p, c: t[0] is None and p == "" and t[1] == c
check = lambda t, p, c: specific_province(t, p, c) or without_specific_province(t, p, c) or without_provinces(t, p, c)
res = [check(target, province, country) for target in target_countries]
return any(res)
extracted_data = []
extracted_data.append([data[0][1]] + data[0][4:])
for row in valid_data[1:]:
if "Korea" in row[1]:
print(row[1])
if is_target_country(row[0], row[1]):
# print(row[0])
if row[0] == "Hong Kong":
extracted_data.append(["Hong Kong"] + row[4:])
elif row[1] == "China":
try:
idx = [row[0] for row in extracted_data].index("China")
extracted_data[idx][1:] = [int(a) + int(b) for a, b in zip(extracted_data[-1][1:], row[4:])]
except ValueError:
extracted_data.append(["China"] + row[4:])
else:
extracted_data.append([row[1]] + row[4:])
\end{minted}
Let's look at the last five days of the countries we selected.
\begin{center}
\begin{tabular}{lrrrrr}
Country/Region & 12/11/20 & 12/12/20 & 12/13/20 & 12/14/20 & 12/15/20\\
\hline
Belgium & 600397 & 603159 & 608137 & 609211 & 611422\\
China & 8673 & 8742 & 8838 & 8920 & 9018\\
Hong Kong & 7377 & 7446 & 7541 & 7623 & 7721\\
France & 2350923 & 2350793 & 2376228 & 2379291 & 2390419\\
Germany & 1314309 & 1336101 & 1350810 & 1357261 & 1391086\\
Iran & 1092407 & 1100818 & 1108269 & 1115770 & 1123474\\
Italy & 1805873 & 1825775 & 1843712 & 1855737 & 1870576\\
Japan & 175310 & 178272 & 180639 & 182311 & 184752\\
South Korea & 41736 & 42766 & 43484 & 44364 & 45442\\
Netherlands & 594523 & 603603 & 613487 & 621944 & 628577\\
US & 15913292 & 16134237 & 16325615 & 16518420 & 16716777\\
United Kingdom & 1809455 & 1830956 & 1849403 & 1869666 & 1888116\\
\end{tabular}
\end{center}
\subsection{Date conversion}
\label{sec:orge0c1a46}
The dates are currently expressed using the American format
\texttt{mm/dd/yy}. We will need to convert them into proper dates in order to
analyze the data further.
\begin{minted}[breaklines=true,breakanywhere=true]{python}
extracted_data[0][1:] = [datetime.datetime.strptime(date, "%m/%d/%y") for date in extracted_data[0][1:]]
\end{minted}
Now, let's take a look at the last two days again:
\begin{center}
\begin{tabular}{lrr}
Country/Region & 2020-12-14 00:00:00 & 2020-12-15 00:00:00\\
\hline
Belgium & 609211 & 611422\\
China & 8920 & 9018\\
Hong Kong & 7623 & 7721\\
France & 2379291 & 2390419\\
Germany & 1357261 & 1391086\\
Iran & 1115770 & 1123474\\
Italy & 1855737 & 1870576\\
Japan & 182311 & 184752\\
South Korea & 44364 & 45442\\
Netherlands & 621944 & 628577\\
US & 16518420 & 16716777\\
United Kingdom & 1869666 & 1888116\\
\end{tabular}
\end{center}
\subsection{Conversion into a regular table}
\label{sec:orga28a7b4}
Right now, each date is represented as a column; we will flip the
table and have the dates as rows, and the countries as columns.
\begin{minted}[breaklines=true,breakanywhere=true]{python}
flipped_data = [[str(row[i]) for row in extracted_data] for i in range(0, len(extracted_data[0]))]
flipped_data[0][0] = "Date"
flipped_data[0] = [s.replace(" ", "") for s in flipped_data[0]]
\end{minted}
Let's look at the data for a few countries now:
\begin{center}
\begin{tabular}{lrrr}
Date & Belgium & China & HongKong\\
\hline
2020-12-11 00:00:00 & 600397 & 8673 & 7377\\
2020-12-12 00:00:00 & 603159 & 8742 & 7446\\
2020-12-13 00:00:00 & 608137 & 8838 & 7541\\
2020-12-14 00:00:00 & 609211 & 8920 & 7623\\
2020-12-15 00:00:00 & 611422 & 9018 & 7721\\
\end{tabular}
\end{center}
This format is much more usable now.
\subsection{Transferring the data from Python to R}
\label{sec:org81b54b0}
We will switch from Python to R for the analysis, since R is a much
better tool than Python for that. We will use org-mode's data exchange
utility in order to transfer the data.
\begin{minted}[breaklines=true,breakanywhere=true]{python}
[flipped_data[0], None] + flipped_data[1:]
\end{minted}
In R, we get the data in the form of a data-frame, and the strings must
be converted.
\begin{minted}[breaklines=true,breakanywhere=true]{r}
data$Date <- as.Date(data$Date)
summary(data)
\end{minted}
\begin{verbatim}
Date Belgium China HongKong
Min. :2020-01-22 Min. : 0 Min. : 10 Min. : 0
1st Qu.:2020-04-13 1st Qu.: 30589 1st Qu.:2276 1st Qu.:1009
Median :2020-07-04 Median : 61838 Median :2527 Median :1258
Mean :2020-07-04 Mean :132884 Mean :3903 Mean :2682
3rd Qu.:2020-09-24 3rd Qu.:108768 3rd Qu.:6338 3rd Qu.:5056
Max. :2020-12-15 Max. :611422 Max. :9018 Max. :7721
France Germany Iran Italy
Min. : 0 Min. : 0 Min. : 0 Min. : 0
1st Qu.: 110836 1st Qu.: 130072 1st Qu.: 73303 1st Qu.: 159516
Median : 197994 Median : 197198 Median : 237878 Median : 241419
Mean : 494998 Mean : 278940 Mean : 301025 Mean : 362680
3rd Qu.: 513732 3rd Qu.: 281346 3rd Qu.: 436319 3rd Qu.: 304323
Max. :2390419 Max. :1391086 Max. :1123474 Max. :1870576
Japan SouthKorea Netherlands US
Min. : 2 Min. : 1 Min. : 0 Min. : 1
1st Qu.: 7773 1st Qu.:10537 1st Qu.: 26551 1st Qu.: 585518
Median : 19461 Median :13091 Median : 50548 Median : 2833290
Mean : 45932 Mean :15651 Mean :118150 Mean : 4329085
3rd Qu.: 80490 3rd Qu.:23455 3rd Qu.:103141 3rd Qu.: 6972152
Max. :184752 Max. :45442 Max. :628577 Max. :16716777
UnitedKingdom
Min. : 0
1st Qu.: 97068
Median : 284900
Mean : 420031
3rd Qu.: 416363
Max. :1888116
\end{verbatim}
\subsection{Quick analysis of the data}
\label{sec:orgc158620}
Now, we can inspect the data and look at the curve for a country, for
example, France.
\begin{minted}[breaklines=true,breakanywhere=true]{r}
plot(data[,'Date'], data[,'France'], xlab="Date",
ylab="Confirmed cases in Metropolitan France, cumulative")
\end{minted}
\begin{center}
\includegraphics[width=.9\linewidth]{first_plot.png}
\end{center}
\section{Comparative analysis of the accumulated cases in the selected countries}
\label{sec:orgff34256}
\subsection{Linear scale}
\label{sec:orgb6a521b}
Let's build a graph showing the confirmed cases for all of the
selected countries, on the same graph. The goal of such a graphic is
to compare the countries, determine outliers from a glance, and see
the general shape of the phenomenon across different places in the
world. An \emph{interactive} graph might be easier to read and parse, such as
the excellent ones in \href{https://www.ft.com/content/a2901ce8-5eb7-4633-b89c-cbdf5b386938}{the Financial Times website}.
\begin{minted}[breaklines=true,breakanywhere=true]{r}
library(tidyverse)
library(ggrepel)
library(scales)
last_date <- data %>%
gather(Country, Cases, Belgium:UnitedKingdom) %>%
dplyr::filter(Date == tail(data$Date, 1), Country %in% c("US", "France", "Germany", "China"))
# Color-blind friendly palette taken from
# https://bconnelly.net/posts/creating_colorblind-friendly_figures/
# with added grayscale values
palette <- c("#000000", "#E69F00", "#56B4E9", "#009E73",
"#292929", "#555555", "#999999", "#BBBBBB",
"#F0E442", "#0072B2", "#D55E00", "#CC79A7")
data %>%
gather(Country, Cases, Belgium:UnitedKingdom) %>%
ggplot(aes(x=Date, y=Cases, colour=Country)) +
geom_line() +
scale_x_date(breaks = pretty_breaks(8), labels = date_format("%b %Y")) +
scale_color_manual(values=palette) +
scale_y_continuous(labels = comma_format(), breaks = pretty_breaks(8)) +
geom_text_repel(data=last_date, aes(label = Country)) +
ggtitle("Cumulative confirmed cases of Covid-19", subtitle="In selected countries since the beginning of 2020") +
theme_bw()
\end{minted}
\begin{center}
\includegraphics[width=.9\linewidth]{comparative.png}
\end{center}
This graph is dominated by the spike of the confirmed cases in the
United States, which outweighs the other countries'. It is also clear
that the second wave, which started for the selected countries in
mid-October of 2020, was significantly larger than the first wave.
\subsection{Logarithmic scale}
\label{sec:org1f0602d}
Let's now take a look at the same graph, but with a logarithmic scale.
\begin{minted}[breaklines=true,breakanywhere=true]{r}
last_date <- data %>%
gather(Country, Cases, Belgium:UnitedKingdom) %>%
dplyr::filter(Date == tail(data$Date, 1))
data %>%
gather(Country, Cases, Belgium:UnitedKingdom) %>%
ggplot(aes(x=Date, y=Cases, colour=Country)) +
geom_line() +
scale_x_date(breaks = pretty_breaks(8), labels = date_format("%b %Y")) +
scale_color_manual(values=palette) +
scale_y_continuous(trans="log10", labels = comma_format()) +
geom_text_repel(data=last_date, aes(label = Country)) +
ggtitle("Cumulative confirmed cases of Covid-19", subtitle="In selected countries since the beginning of 2020, on a logarithmic scale") +
theme_bw()
\end{minted}
\begin{center}
\includegraphics[width=.9\linewidth]{comparative_log.png}
\end{center}
This graph shows in better details the different waves of infection
among the selected countries. We can clearly see how the pandemic
started in China, starting in January, and was mitigated in
mid-February; while the other countries experienced the rapid growth
of the cases in March. The period of international lockdown during the
spring and summer is visible, as well the different waves of
infections later in the year.
\end{document}
#+TITLE: Votre titre
#+AUTHOR: Votre nom
#+DATE: La date du jour
#+LANGUAGE: fr
# #+PROPERTY: header-args :eval never-export
#+HTML_HEAD: <link rel="stylesheet" type="text/css" href="http://www.pirilampo.org/styles/readtheorg/css/htmlize.css"/>
#+HTML_HEAD: <link rel="stylesheet" type="text/css" href="http://www.pirilampo.org/styles/readtheorg/css/readtheorg.css"/>
#+HTML_HEAD: <script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.3/jquery.min.js"></script>
#+HTML_HEAD: <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/js/bootstrap.min.js"></script>
#+HTML_HEAD: <script type="text/javascript" src="http://www.pirilampo.org/styles/lib/js/jquery.stickytableheaders.js"></script>
#+HTML_HEAD: <script type="text/javascript" src="http://www.pirilampo.org/styles/readtheorg/js/readtheorg.js"></script>
* Quelques explications
Ceci est un document org-mode avec quelques exemples de code
R. Une fois ouvert dans emacs, ce document peut aisément être
exporté au format HTML, PDF, et Office. Pour plus de détails sur
org-mode vous pouvez consulter https://orgmode.org/guide/.
Lorsque vous utiliserez le raccourci =C-c C-e h o=, ce document sera
compilé en html. Tout le code contenu sera ré-exécuté, les résultats
récupérés et inclus dans un document final. Si vous ne souhaitez pas
ré-exécuter tout le code à chaque fois, il vous suffit de supprimer
le # et l'espace qui sont devant le ~#+PROPERTY:~ au début de ce
document.
Comme nous vous l'avons montré dans la vidéo, on inclut du code
R de la façon suivante (et on l'exécute en faisant ~C-c C-c~):
#+begin_src R :results output :exports both
print("Hello world!")
#+end_src
#+RESULTS:
: [1] "Hello world!"
Voici la même chose, mais avec une session R (c'est le cas le
plus courant, R étant vraiment un langage interactif), donc une
persistance d'un bloc à l'autre (et on l'exécute toujours en faisant
~C-c C-c~).
#+begin_src R :results output :session *R* :exports both
summary(cars)
#+end_src
#+RESULTS:
: speed dist
: Min. : 4.0 Min. : 2.00
: 1st Qu.:12.0 1st Qu.: 26.00
: Median :15.0 Median : 36.00
: Mean :15.4 Mean : 42.98
: 3rd Qu.:19.0 3rd Qu.: 56.00
: Max. :25.0 Max. :120.00
Et enfin, voici un exemple de sortie graphique:
#+begin_src R :results output graphics :file "./cars.png" :exports results :width 600 :height 400 :session *R*
plot(cars)
#+end_src
#+RESULTS:
[[file:./cars.png]]
Vous remarquerez le paramètre ~:exports results~ qui indique que le code
ne doit pas apparaître dans la version finale du document. Nous vous
recommandons dans le cadre de ce MOOC de ne pas changer ce paramètre
(indiquer ~both~) car l'objectif est que vos analyses de données soient
parfaitement transparentes pour être reproductibles.
Attention, la figure ainsi générée n'est pas stockée dans le document
org. C'est un fichier ordinaire, ici nommé ~cars.png~. N'oubliez pas
de le committer si vous voulez que votre analyse soit lisible et
compréhensible sur GitLab.
Enfin, pour les prochains exercices, nous ne vous fournirons pas
forcément de fichier de départ, ça sera à vous de le créer, par
exemple en repartant de ce document et de le commiter vers
gitlab. N'oubliez pas que nous vous fournissons dans les ressources de
ce MOOC une configuration avec un certain nombre de raccourcis
claviers permettant de créer rapidement les blocs de code R (en
faisant ~<r~ ou ~<R~ suivi de ~Tab~).
Maintenant, à vous de jouer! Vous pouvez effacer toutes ces
informations et les remplacer par votre document computationnel.
---
title: "Your title"
author: "Your name"
date: "Today's date"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
## Some explanations
This is an R Markdown document that you can easily export to HTML, PDF, and MS Word formats. For more information on R Markdown, see <http://rmarkdown.rstudio.com>.
When you click on the button **Knit**, the document will be compiled in order to re-execute the R code and to include the results into the final document. As we have shown in the video, R code is inserted as follows:
```{r cars}
summary(cars)
```
It is also straightforward to include figures. For example:
```{r pressure, echo=FALSE}
plot(pressure)
```
Note the parameter `echo = FALSE` that indicates that the code will not appear in the final version of the document. We recommend not to use this parameter in the context of this MOOC, because we want your data analyses to be perfectly transparent and reproducible.
Since the results are not stored in Rmd files, you should generate an HTML or PDF version of your exercises and commit them. Otherwise reading and checking your analysis will be difficult for anyone else but you.
Now it's your turn! You can delete all this information and replace it by your computational document.
{
"cells": [],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
---
title: "Votre titre"
author: "Votre nom"
date: "La date du jour"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
## Quelques explications
Ceci est un document R markdown que vous pouvez aisément exporter au format HTML, PDF, et MS Word. Pour plus de détails sur R Markdown consultez <http://rmarkdown.rstudio.com>.
Lorsque vous cliquerez sur le bouton **Knit** ce document sera compilé afin de ré-exécuter le code R et d'inclure les résultats dans un document final. Comme nous vous l'avons montré dans la vidéo, on inclue du code R de la façon suivante:
```{r cars}
summary(cars)
```
Et on peut aussi aisément inclure des figures. Par exemple:
```{r pressure, echo=FALSE}
plot(pressure)
```
Vous remarquerez le paramètre `echo = FALSE` qui indique que le code ne doit pas apparaître dans la version finale du document. Nous vous recommandons dans le cadre de ce MOOC de ne pas utiliser ce paramètre car l'objectif est que vos analyses de données soient parfaitement transparentes pour être reproductibles.
Comme les résultats ne sont pas stockés dans les fichiers Rmd, pour faciliter la relecture de vos analyses par d'autres personnes, vous aurez donc intérêt à générer un HTML ou un PDF et à le commiter.
Maintenant, à vous de jouer! Vous pouvez effacer toutes ces informations et les remplacer par votre document computationnel.
{
"cells": [],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
#+TITLE: Your title
#+AUTHOR: Your name
#+DATE: Today's date
#+LANGUAGE: en
# #+PROPERTY: header-args :eval never-export
#+HTML_HEAD: <link rel="stylesheet" type="text/css" href="http://www.pirilampo.org/styles/readtheorg/css/htmlize.css"/>
#+HTML_HEAD: <link rel="stylesheet" type="text/css" href="http://www.pirilampo.org/styles/readtheorg/css/readtheorg.css"/>
#+HTML_HEAD: <script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.3/jquery.min.js"></script>
#+HTML_HEAD: <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/js/bootstrap.min.js"></script>
#+HTML_HEAD: <script type="text/javascript" src="http://www.pirilampo.org/styles/lib/js/jquery.stickytableheaders.js"></script>
#+HTML_HEAD: <script type="text/javascript" src="http://www.pirilampo.org/styles/readtheorg/js/readtheorg.js"></script>
* Some explanations
This is an org-mode document with code examples in R. Once opened in
Emacs, this document can easily be exported to HTML, PDF, and Office
formats. For more information on org-mode, see
https://orgmode.org/guide/.
When you type the shortcut =C-c C-e h o=, this document will be
exported as HTML. All the code in it will be re-executed, and the
results will be retrieved and included into the exported document. If
you do not want to re-execute all code each time, you can delete the #
and the space before ~#+PROPERTY:~ in the header of this document.
Like we showed in the video, Python code is included as follows (and
is exxecuted by typing ~C-c C-c~):
#+begin_src python :results output :exports both
print("Hello world!")
#+end_src
#+RESULTS:
: Hello world!
And now the same but in an Python session. With a session, Python's
state, i.e. the values of all the variables, remains persistent from
one code block to the next. The code is still executed using ~C-c
C-c~.
#+begin_src python :results output :session :exports both
import numpy
x=numpy.linspace(-15,15)
print(x)
#+end_src
#+RESULTS:
#+begin_example
[-15. -14.3877551 -13.7755102 -13.16326531 -12.55102041
-11.93877551 -11.32653061 -10.71428571 -10.10204082 -9.48979592
-8.87755102 -8.26530612 -7.65306122 -7.04081633 -6.42857143
-5.81632653 -5.20408163 -4.59183673 -3.97959184 -3.36734694
-2.75510204 -2.14285714 -1.53061224 -0.91836735 -0.30612245
0.30612245 0.91836735 1.53061224 2.14285714 2.75510204
3.36734694 3.97959184 4.59183673 5.20408163 5.81632653
6.42857143 7.04081633 7.65306122 8.26530612 8.87755102
9.48979592 10.10204082 10.71428571 11.32653061 11.93877551
12.55102041 13.16326531 13.7755102 14.3877551 15. ]
#+end_example
Finally, an example for graphical output:
#+begin_src python :results output file :session :var matplot_lib_filename="./cosxsx.png" :exports results
import matplotlib.pyplot as plt
plt.figure(figsize=(10,5))
plt.plot(x,numpy.cos(x)/x)
plt.tight_layout()
plt.savefig(matplot_lib_filename)
print(matplot_lib_filename)
#+end_src
#+RESULTS:
[[file:./cosxsx.png]]
Note the parameter ~:exports results~, which indicates that the code
will not appear in the exported document. We recommend that in the
context of this MOOC, you always leave this parameter setting as
~:exports both~, because we want your analyses to be perfectly
transparent and reproducible.
Watch out: the figure generated by the code block is /not/ stored in
the org document. It's a plain file, here named ~cosxsx.png~. You have
to commit it explicitly if you want your analysis to be legible and
understandable on GitLab.
Finally, don't forget that we provide in the resource section of this
MOOC a configuration with a few keyboard shortcuts that allow you to
quickly create code blocks in Python by typing ~<p~, ~<P~ or ~<PP~
followed by ~Tab~.
Now it's your turn! You can delete all this information and replace it
by your computational document.
#+TITLE: Votre titre
#+AUTHOR: Votre nom
#+DATE: La date du jour
#+LANGUAGE: fr
# #+PROPERTY: header-args :eval never-export
#+HTML_HEAD: <link rel="stylesheet" type="text/css" href="http://www.pirilampo.org/styles/readtheorg/css/htmlize.css"/>
#+HTML_HEAD: <link rel="stylesheet" type="text/css" href="http://www.pirilampo.org/styles/readtheorg/css/readtheorg.css"/>
#+HTML_HEAD: <script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.3/jquery.min.js"></script>
#+HTML_HEAD: <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/js/bootstrap.min.js"></script>
#+HTML_HEAD: <script type="text/javascript" src="http://www.pirilampo.org/styles/lib/js/jquery.stickytableheaders.js"></script>
#+HTML_HEAD: <script type="text/javascript" src="http://www.pirilampo.org/styles/readtheorg/js/readtheorg.js"></script>
* Quelques explications
Ceci est un document org-mode avec quelques exemples de code
python. Une fois ouvert dans emacs, ce document peut aisément être
exporté au format HTML, PDF, et Office. Pour plus de détails sur
org-mode vous pouvez consulter https://orgmode.org/guide/.
Lorsque vous utiliserez le raccourci =C-c C-e h o=, ce document sera
compilé en html. Tout le code contenu sera ré-exécuté, les résultats
récupérés et inclus dans un document final. Si vous ne souhaitez pas
ré-exécuter tout le code à chaque fois, il vous suffit de supprimer
le # et l'espace qui sont devant le ~#+PROPERTY:~ au début de ce
document.
Comme nous vous l'avons montré dans la vidéo, on inclue du code
python de la façon suivante (et on l'exécute en faisant ~C-c C-c~):
#+begin_src python :results output :exports both
print("Hello world!")
#+end_src
#+RESULTS:
: Hello world!
Voici la même chose, mais avec une session python, donc une
persistance d'un bloc à l'autre (et on l'exécute toujours en faisant
~C-c C-c~).
#+begin_src python :results output :session :exports both
import numpy
x=numpy.linspace(-15,15)
print(x)
#+end_src
#+RESULTS:
#+begin_example
[-15. -14.3877551 -13.7755102 -13.16326531 -12.55102041
-11.93877551 -11.32653061 -10.71428571 -10.10204082 -9.48979592
-8.87755102 -8.26530612 -7.65306122 -7.04081633 -6.42857143
-5.81632653 -5.20408163 -4.59183673 -3.97959184 -3.36734694
-2.75510204 -2.14285714 -1.53061224 -0.91836735 -0.30612245
0.30612245 0.91836735 1.53061224 2.14285714 2.75510204
3.36734694 3.97959184 4.59183673 5.20408163 5.81632653
6.42857143 7.04081633 7.65306122 8.26530612 8.87755102
9.48979592 10.10204082 10.71428571 11.32653061 11.93877551
12.55102041 13.16326531 13.7755102 14.3877551 15. ]
#+end_example
Et enfin, voici un exemple de sortie graphique:
#+begin_src python :results output file :session :var matplot_lib_filename="./cosxsx.png" :exports results
import matplotlib.pyplot as plt
plt.figure(figsize=(10,5))
plt.plot(x,numpy.cos(x)/x)
plt.tight_layout()
plt.savefig(matplot_lib_filename)
print(matplot_lib_filename)
#+end_src
#+RESULTS:
[[file:./cosxsx.png]]
Vous remarquerez le paramètre ~:exports results~ qui indique que le code
ne doit pas apparaître dans la version finale du document. Nous vous
recommandons dans le cadre de ce MOOC de ne pas changer ce paramètre
(indiquer ~both~) car l'objectif est que vos analyses de données soient
parfaitement transparentes pour être reproductibles.
Attention, la figure ainsi générée n'est pas stockée dans le document
org. C'est un fichier ordinaire, ici nommé ~cosxsx.png~. N'oubliez pas
de le committer si vous voulez que votre analyse soit lisible et
compréhensible sur GitLab.
Enfin, n'oubliez pas que nous vous fournissons dans les ressources de
ce MOOC une configuration avec un certain nombre de raccourcis
claviers permettant de créer rapidement les blocs de code python (en
faisant ~<p~, ~<P~ ou ~<PP~ suivi de ~Tab~).
Maintenant, à vous de jouer! Vous pouvez effacer toutes ces
informations et les remplacer par votre document computationnel.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment