diff --git a/module3/exo2/annual_incidence.png b/module3/exo2/annual_incidence.png new file mode 100644 index 0000000000000000000000000000000000000000..047cd5afd28d57b8bf9e269a5ecfb6fb8f15bee6 Binary files /dev/null and b/module3/exo2/annual_incidence.png differ diff --git a/module3/exo2/exercice_python_en.org b/module3/exo2/exercice_python_en.org index 88c2aa5fbf3be2f6a73c4453c3f701479e137d09..6a6ed478011a87604e74af2c4cff2163ea2d8fdb 100644 --- a/module3/exo2/exercice_python_en.org +++ b/module3/exo2/exercice_python_en.org @@ -61,8 +61,8 @@ Alright, looks good 👍 ** Sanitise the data -But there may be problems with the data. Let's check for the obvious -case of missing/empty data: +But the data may not be without its issues. Let's check for the +obvious case of missing/empty data: #+begin_src python :results output :exports both valid_table = [] @@ -97,29 +97,29 @@ separators in the resulting table (which contains the first five and last five weeks' data): #+begin_src python :results value :exports both -[('week', 'inc'), None] + data[:5] + [None] + data[-5:] +[('week', 'incidence'), None] + data[:5] + [None] + data[-5:] #+end_src #+RESULTS: -| week | inc | -|--------+-------| -| 202416 | 19330 | -| 202415 | 24807 | -| 202414 | 16181 | -| 202413 | 18322 | -| 202412 | 12818 | -|--------+-------| -| 199101 | 15565 | -| 199052 | 19375 | -| 199051 | 19080 | -| 199050 | 11079 | -| 199049 | 1143 | +| week | incidence | +|--------+-----------| +| 202416 | 19330 | +| 202415 | 24807 | +| 202414 | 16181 | +| 202413 | 18322 | +| 202412 | 12818 | +|--------+-----------| +| 199101 | 15565 | +| 199052 | 19375 | +| 199051 | 19080 | +| 199050 | 11079 | +| 199049 | 1143 | ** Convert dates Dates are represented in ISO 8601 format (YYYYWW) so let's parse -those. It should already be sorted chronologically, but let's make -sure of that too. +those. Entries are sorted chronologically, but in reverse, so we'll +fix that here too. #+begin_src python :results silent :exports both import datetime @@ -133,40 +133,42 @@ Let's check again: #+begin_src python :results value :exports both data_as_str = [(str(date), str(inc)) for date, inc in converted_data] -[('date', 'inc'), None] + data_as_str[:5] + [None] + data_as_str[-5:] +[('date', 'incidence'), None] + data_as_str[:5] + [None] + data_as_str[-5:] #+end_src #+RESULTS: -| date | inc | -|------------+-------| -| 1990-12-03 | 1143 | -| 1990-12-10 | 11079 | -| 1990-12-17 | 19080 | -| 1990-12-24 | 19375 | -| 1990-12-31 | 15565 | -|------------+-------| -| 2024-03-18 | 12818 | -| 2024-03-25 | 18322 | -| 2024-04-01 | 16181 | -| 2024-04-08 | 24807 | -| 2024-04-15 | 19330 | +| date | incidence | +|------------+-----------| +| 1990-12-03 | 1143 | +| 1990-12-10 | 11079 | +| 1990-12-17 | 19080 | +| 1990-12-24 | 19375 | +| 1990-12-31 | 15565 | +|------------+-----------| +| 2024-03-18 | 12818 | +| 2024-03-25 | 18322 | +| 2024-04-01 | 16181 | +| 2024-04-08 | 24807 | +| 2024-04-15 | 19330 | ** Visual inspection So, now we can take a look at incidence over time. (The 'flu notebook switches to R here, but we're going to stick with python.) -#+begin_src python :results output file :var filename="./incidence.png" :exports both +#+begin_src python :results value file :var filename="./incidence.png" :exports both import matplotlib.pyplot as plt plt.clf() date,incidence = zip(*converted_data) +plt.figure(figsize=(7.5,5)) + plt.plot(date,incidence) -plt.tight_layout() +# plt.tight_layout() plt.savefig(filename) -print(filename) +filename #+end_src #+RESULTS: @@ -174,17 +176,17 @@ print(filename) And we can zoom in on a period of, say, five years: -#+begin_src python :results output file :var filename="./incidence-zoom.png" :exports both +#+begin_src python :results value file :var filename="./incidence-zoom.png" :exports both plt.clf() -start = 10 +start = 15 years = 5 date,incidence = zip(*converted_data[52*start:52*(start+years)]) plt.plot(date,incidence) -plt.tight_layout() +# plt.tight_layout() plt.savefig(filename) -print(filename) +filename #+end_src #+RESULTS: @@ -192,3 +194,78 @@ print(filename) It looks like incidence peaks in the spring, with lowest numbers around September. + +* Study of annual incidence + +** Compute annual incidence + +So, let's calculate the incidence for each year. We'll define this is +as the sum of the incidence reports from the beginning of September in +year /N-1/ to the end of August in year /N/. + +#+begin_src python :results silent :exports both +def annual_incidence(year): + start = datetime.datetime.strptime(f"{year-1}-09-01", '%Y-%m-%d').date() + end = datetime.datetime.strptime(f"{year}-09-01", '%Y-%m-%d').date() + weeks = [d for d in converted_data if d[0] > start and d[0] <= end] + return sum([w[1] for w in weeks]) +#+end_src + +That was quick and dirty and not entirely a nice time. + +Now we can define the years we're interested in. These are the years +for which we have a full year's worth of data: + +#+begin_src python :results silent :exports both +years = range(1991, 2024) +#+end_src + +NB. The second argument to ~range~ is non-inclusive. + +Now we can perform a list comprehension to get the incidence for each +year: + +#+begin_src python :results value :exports both +incidence_per_year = [(y,annual_incidence(y)) for y in years] +head, *tail = incidence_per_year +head +#+end_src + +#+RESULTS: +| 1991 | 553895 | + +** Visual Inspection + +Now we can plot incidence against year. + +#+begin_src python :results value file :var filename="./annual_incidence.png" :exports both +plt.clf() + +years,incidences = zip(*incidence_per_year) +plt.bar(years,incidences) +plt.ylabel("Annual incidence") + +plt.savefig(filename) +filename +#+end_src + +#+RESULTS: +[[file:./annual_incidence.png]] + +Eyeballing the plots, it looks like although 2003-04 had the greatest +spikes in terms of weekly cases, 2009-10 featured longer spells of +consistently high numbers of cases. + +Anyway, let's get the strongest and weakest epidemics: + +#+begin_src python :results value :exports both +key = lambda y: y[1] +strongest = max(incidence_per_year, key=key) +weakest = min(incidence_per_year, key=key) + +(strongest,weakest) +#+end_src + +#+RESULTS: +| 2009 | 841233 | +| 2020 | 221183 | diff --git a/module3/exo2/incidence-zoom.png b/module3/exo2/incidence-zoom.png index cdd883514a1265f8ce5adc1f004cfd21cf52bacf..7bdbabf7ed83d7d2d07f430c3b8a5db5e6498431 100644 Binary files a/module3/exo2/incidence-zoom.png and b/module3/exo2/incidence-zoom.png differ diff --git a/module3/exo2/incidence.png b/module3/exo2/incidence.png index 6272d94af348d9c9caa02fe799eb8c41e5c22169..490e610eddad45aeb2d71134df3cbbb55deb71ed 100644 Binary files a/module3/exo2/incidence.png and b/module3/exo2/incidence.png differ