Travail évalué par les pairs
+Table des matières
+ +1 Préparation des données
++Voici le lien qui correspond aux données traitées dans ce +document. Elles ont été téléchargées le 10 Juin 2020 à 21:51 +
+ +1.1 Téléchargement des données
++Nous vérifions si le fichier existe. Si ce n'est pas le cas nous le +téléchargeons. +
+donnees_file ='cas-confirmes-covid19.csv' + +import os +import urllib.request +if not os.path.exists(donnees_file): + urllib.request.urlretrieve(donnees_url,donnees_file) ++
+Nous commençons le traitement par l'extraction des données. +Ensuite nous découpons le contenu du fichier en lignes, en éliminant les espaces qui pourraient se trouver autour des mots. +Nous conservons toutes les lignes qui sont découpées en colonnes en utilisant le séparateur ','. +
+donnees = open(donnees_file, 'rb').read() + +lines = donnees.decode('latin-1').strip().split('\n') +donnees_lines = lines[0:] +table = [line.split(',') for line in donnees_lines] ++
1.2 Trie des données
++Nous affichons le début du tableau afin d'avoir un aperçu des données +
+table[:10] ++
Province/State | +Country/Region | +Lat | +Long | +1/22/20 | +1/23/20 | +1/24/20 | +1/25/20 | +1/26/20 | +1/27/20 | +1/28/20 | +1/29/20 | +1/30/20 | +1/31/20 | +2/1/20 | +2/2/20 | +2/3/20 | +2/4/20 | +2/5/20 | +2/6/20 | +2/7/20 | +2/8/20 | +2/9/20 | +2/10/20 | +2/11/20 | +2/12/20 | +2/13/20 | +2/14/20 | +2/15/20 | +2/16/20 | +2/17/20 | +2/18/20 | +2/19/20 | +2/20/20 | +2/21/20 | +2/22/20 | +2/23/20 | +2/24/20 | +2/25/20 | +2/26/20 | +2/27/20 | +2/28/20 | +2/29/20 | +3/1/20 | +3/2/20 | +3/3/20 | +3/4/20 | +3/5/20 | +3/6/20 | +3/7/20 | +3/8/20 | +3/9/20 | +3/10/20 | +3/11/20 | +3/12/20 | +3/13/20 | +3/14/20 | +3/15/20 | +3/16/20 | +3/17/20 | +3/18/20 | +3/19/20 | +3/20/20 | +3/21/20 | +3/22/20 | +3/23/20 | +3/24/20 | +3/25/20 | +3/26/20 | +3/27/20 | +3/28/20 | +3/29/20 | +3/30/20 | +3/31/20 | +4/1/20 | +4/2/20 | +4/3/20 | +4/4/20 | +4/5/20 | +4/6/20 | +4/7/20 | +4/8/20 | +4/9/20 | +4/10/20 | +4/11/20 | +4/12/20 | +4/13/20 | +4/14/20 | +4/15/20 | +4/16/20 | +4/17/20 | +4/18/20 | +4/19/20 | +4/20/20 | +4/21/20 | +4/22/20 | +4/23/20 | +4/24/20 | +4/25/20 | +4/26/20 | +4/27/20 | +4/28/20 | +4/29/20 | +4/30/20 | +5/1/20 | +5/2/20 | +5/3/20 | +5/4/20 | +5/5/20 | +5/6/20 | +5/7/20 | +5/8/20 | +5/9/20 | +5/10/20 | +5/11/20 | +5/12/20 | +5/13/20 | +5/14/20 | +5/15/20 | +5/16/20 | +5/17/20 | +5/18/20 | +5/19/20 | +5/20/20 | +5/21/20 | +5/22/20 | +5/23/20 | +5/24/20 | +5/25/20 | +5/26/20 | +5/27/20 | +5/28/20 | +5/29/20 | +5/30/20 | +5/31/20 | +6/1/20 | +6/2/20 | +6/3/20 | +6/4/20 | +6/5/20 | +6/6/20 | +6/7/20 | +6/8/20 | +6/9/20\r | +
+ | Afghanistan | +33.0 | +65.0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +1 | +1 | +1 | +1 | +1 | +1 | +1 | +1 | +1 | +1 | +1 | +1 | +1 | +4 | +4 | +5 | +7 | +7 | +7 | +11 | +16 | +21 | +22 | +22 | +22 | +24 | +24 | +40 | +40 | +74 | +84 | +94 | +110 | +110 | +120 | +170 | +174 | +237 | +273 | +281 | +299 | +349 | +367 | +423 | +444 | +484 | +521 | +555 | +607 | +665 | +714 | +784 | +840 | +906 | +933 | +996 | +1026 | +1092 | +1176 | +1279 | +1351 | +1463 | +1531 | +1703 | +1828 | +1939 | +2171 | +2335 | +2469 | +2704 | +2894 | +3224 | +3392 | +3563 | +3778 | +4033 | +4402 | +4687 | +4963 | +5226 | +5639 | +6053 | +6402 | +6664 | +7072 | +7653 | +8145 | +8676 | +9216 | +9998 | +10582 | +11173 | +11831 | +12456 | +13036 | +13659 | +14525 | +15205 | +15750 | +16509 | +17267 | +18054 | +18969 | +19551 | +20342 | +20917 | +21459\r | +
+ | Albania | +41.1533 | +20.1683 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +2 | +10 | +12 | +23 | +33 | +38 | +42 | +51 | +55 | +59 | +64 | +70 | +76 | +89 | +104 | +123 | +146 | +174 | +186 | +197 | +212 | +223 | +243 | +259 | +277 | +304 | +333 | +361 | +377 | +383 | +400 | +409 | +416 | +433 | +446 | +467 | +475 | +494 | +518 | +539 | +548 | +562 | +584 | +609 | +634 | +663 | +678 | +712 | +726 | +736 | +750 | +766 | +773 | +782 | +789 | +795 | +803 | +820 | +832 | +842 | +850 | +856 | +868 | +872 | +876 | +880 | +898 | +916 | +933 | +946 | +948 | +949 | +964 | +969 | +981 | +989 | +998 | +1004 | +1029 | +1050 | +1076 | +1099 | +1122 | +1137 | +1143 | +1164 | +1184 | +1197 | +1212 | +1232 | +1246 | +1263 | +1299\r | +
+ | Algeria | +28.0339 | +1.6596 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +1 | +1 | +1 | +1 | +1 | +1 | +3 | +5 | +12 | +12 | +17 | +17 | +19 | +20 | +20 | +20 | +24 | +26 | +37 | +48 | +54 | +60 | +74 | +87 | +90 | +139 | +201 | +230 | +264 | +302 | +367 | +409 | +454 | +511 | +584 | +716 | +847 | +986 | +1171 | +1251 | +1320 | +1423 | +1468 | +1572 | +1666 | +1761 | +1825 | +1914 | +1983 | +2070 | +2160 | +2268 | +2418 | +2534 | +2629 | +2718 | +2811 | +2910 | +3007 | +3127 | +3256 | +3382 | +3517 | +3649 | +3848 | +4006 | +4154 | +4295 | +4474 | +4648 | +4838 | +4997 | +5182 | +5369 | +5558 | +5723 | +5891 | +6067 | +6253 | +6442 | +6629 | +6821 | +7019 | +7201 | +7377 | +7542 | +7728 | +7918 | +8113 | +8306 | +8503 | +8697 | +8857 | +8997 | +9134 | +9267 | +9394 | +9513 | +9626 | +9733 | +9831 | +9935 | +10050 | +10154 | +10265 | +10382\r | +
+ | Andorra | +42.5063 | +1.5218 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +1 | +1 | +1 | +1 | +1 | +1 | +1 | +1 | +1 | +1 | +1 | +1 | +1 | +1 | +2 | +39 | +39 | +53 | +75 | +88 | +113 | +133 | +164 | +188 | +224 | +267 | +308 | +334 | +370 | +376 | +390 | +428 | +439 | +466 | +501 | +525 | +545 | +564 | +583 | +601 | +601 | +638 | +646 | +659 | +673 | +673 | +696 | +704 | +713 | +717 | +717 | +723 | +723 | +731 | +738 | +738 | +743 | +743 | +743 | +745 | +745 | +747 | +748 | +750 | +751 | +751 | +752 | +752 | +754 | +755 | +755 | +758 | +760 | +761 | +761 | +761 | +761 | +761 | +761 | +762 | +762 | +762 | +762 | +762 | +763 | +763 | +763 | +763 | +764 | +764 | +764 | +765 | +844 | +851 | +852 | +852 | +852 | +852 | +852 | +852\r | +
+ | Angola | +-11.2027 | +17.8739 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +1 | +2 | +2 | +3 | +3 | +3 | +4 | +4 | +5 | +7 | +7 | +7 | +8 | +8 | +8 | +10 | +14 | +16 | +17 | +19 | +19 | +19 | +19 | +19 | +19 | +19 | +19 | +19 | +19 | +24 | +24 | +24 | +24 | +25 | +25 | +25 | +25 | +26 | +27 | +27 | +27 | +27 | +30 | +35 | +35 | +35 | +36 | +36 | +36 | +43 | +43 | +45 | +45 | +45 | +45 | +48 | +48 | +48 | +48 | +50 | +52 | +52 | +58 | +60 | +61 | +69 | +70 | +70 | +71 | +74 | +81 | +84 | +86 | +86 | +86 | +86 | +86 | +86 | +88 | +91 | +92 | +96\r | +
+ | Antigua and Barbuda | +17.0608 | +-61.7964 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +1 | +1 | +1 | +1 | +1 | +1 | +1 | +1 | +1 | +1 | +3 | +3 | +3 | +7 | +7 | +7 | +7 | +7 | +7 | +7 | +9 | +15 | +15 | +15 | +15 | +19 | +19 | +19 | +19 | +21 | +21 | +23 | +23 | +23 | +23 | +23 | +23 | +23 | +23 | +23 | +24 | +24 | +24 | +24 | +24 | +24 | +24 | +24 | +24 | +25 | +25 | +25 | +25 | +25 | +25 | +25 | +25 | +25 | +25 | +25 | +25 | +25 | +25 | +25 | +25 | +25 | +25 | +25 | +25 | +25 | +25 | +25 | +25 | +25 | +25 | +25 | +25 | +25 | +25 | +26 | +26 | +26 | +26 | +26 | +26 | +26 | +26 | +26 | +26\r | +
+ | Argentina | +-38.4161 | +-63.6167 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +1 | +1 | +1 | +2 | +8 | +12 | +12 | +17 | +19 | +19 | +31 | +34 | +45 | +56 | +68 | +79 | +97 | +128 | +158 | +266 | +301 | +387 | +387 | +502 | +589 | +690 | +745 | +820 | +1054 | +1054 | +1133 | +1265 | +1451 | +1451 | +1554 | +1628 | +1715 | +1795 | +1975 | +1975 | +2142 | +2208 | +2277 | +2443 | +2571 | +2669 | +2758 | +2839 | +2941 | +3031 | +3144 | +3435 | +3607 | +3780 | +3892 | +4003 | +4127 | +4285 | +4428 | +4532 | +4681 | +4783 | +4887 | +5020 | +5208 | +5371 | +5611 | +5776 | +6034 | +6278 | +6563 | +6879 | +7134 | +7479 | +7805 | +8068 | +8371 | +8809 | +9283 | +9931 | +10649 | +11353 | +12076 | +12628 | +13228 | +13933 | +14702 | +15419 | +16214 | +16851 | +17415 | +18319 | +19268 | +20197 | +21037 | +22020 | +22794 | +23620 | +24761\r | +
+ | Armenia | +40.0691 | +45.0382 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +1 | +1 | +1 | +1 | +1 | +1 | +1 | +1 | +1 | +1 | +1 | +4 | +8 | +18 | +26 | +52 | +78 | +84 | +115 | +136 | +160 | +194 | +235 | +249 | +265 | +290 | +329 | +407 | +424 | +482 | +532 | +571 | +663 | +736 | +770 | +822 | +833 | +853 | +881 | +921 | +937 | +967 | +1013 | +1039 | +1067 | +1111 | +1159 | +1201 | +1248 | +1291 | +1339 | +1401 | +1473 | +1523 | +1596 | +1677 | +1746 | +1808 | +1867 | +1932 | +2066 | +2148 | +2273 | +2386 | +2507 | +2619 | +2782 | +2884 | +3029 | +3175 | +3313 | +3392 | +3538 | +3718 | +3860 | +4044 | +4283 | +4472 | +4823 | +5041 | +5271 | +5606 | +5928 | +6302 | +6661 | +7113 | +7402 | +7774 | +8216 | +8676 | +8927 | +9282 | +9492 | +10009 | +10524 | +11221 | +11817 | +12364 | +13130 | +13325 | +13675\r | +
Australian Capital Territory | +Australia | +-35.4735 | +149.0124 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +0 | +1 | +1 | +1 | +2 | +2 | +3 | +4 | +6 | +9 | +19 | +32 | +39 | +39 | +53 | +62 | +71 | +77 | +78 | +80 | +84 | +87 | +91 | +93 | +96 | +96 | +96 | +99 | +100 | +103 | +103 | +103 | +102 | +103 | +103 | +103 | +103 | +103 | +103 | +104 | +104 | +104 | +104 | +105 | +106 | +106 | +106 | +106 | +106 | +106 | +106 | +106 | +106 | +107 | +107 | +107 | +107 | +107 | +107 | +107 | +107 | +107 | +107 | +107 | +107 | +107 | +107 | +107 | +107 | +107 | +107 | +107 | +107 | +107 | +107 | +107 | +107 | +107 | +107 | +107 | +107 | +107 | +107 | +107 | +107 | +107 | +108 | +108 | +108 | +108\r | +
+Il y a plusieurs lignes qui nous intéressent: +
+-
+
- la Belgique (Belgium) -> ligne 24 +
- la Chine (China) hors Hong-Kong -> lignes 50 à 61 puis 63 à 83 +
- Hong-Kong -> ligne 62 +
- La France métropolitaine (France) -> ligne 117 +
- L'Allemagne (ligne 121) +
- l'Iran (ligne 134) +
- l'Italie (ligne 138) +
- le Japon (ligne 140) +
- la Corée du Sud (ligne 144) +
- la Hollande sans les colonies (ligne 170) +
- le Portugal (ligne 185) +
- l'Espagne (ligne 202) +
- le Royaume-Unis sans les colonies (ligne 224) +
- les Etats-Unis (ligne 226) +
+On choisi les lignes qui nous interessent. +
+date = table[0] +del date[:4] +belgique = table[24] +del belgique[:4] +chine1 = table[50:62] +chine2 = table[63:83] +chine = chine1 + chine2 +hong_kong = table[62] +del hong_kong[:4] +france_metropolitaine = table[117] +del france_metropolitaine[:4] +allemagne = table[121] +del allemagne[:4] +iran = table[134] +del iran[:4] +italie = table[138] +del italie[:4] +japon = table[140] +del japon[:4] +coree_du_sud = table[144] +del coree_du_sud[:5] +hollande = table[170] +del hollande[:4] +portugal = table[185] +del portugal[:4] +espagne = table[202] +del espagne[:4] +royaume_unis = table[224] +del royaume_unis[:4] +etats_unis = table[226] +del etats_unis[:4] ++
+Pour le cas de la Chine, il faut faire la somme des valeurs de +plusieurs provinces sans Hong-Kong. Pour celà, je vais ajouter les valeurs de chaque +ligne correspondant à des provinces chinoises (sauf Hong-Kong) +
+import numpy as np + +sum_chine = np.sum(np.array(chine)[:,4:-1].astype(np.int32),axis=0).tolist() ++
+Je vais maintenant ajouter les dates et tous les pays dans une liste. +
+donnees = list(zip(date, belgique, hong_kong, france_metropolitaine, allemagne, iran, italie, japon, coree_du_sud, hollande, portugal, espagne, royaume_unis, etats_unis, sum_chine)) ++
1.3 Passage Python -> R
++Nous passons au langage R pour inspecter nos données, parce que l'analyse et la préparation de graphiques sont plus concises en R, sans nécessiter aucune bibliothèque supplémentaire. +
+ ++Nous utilisons le mécanisme d'échange de données proposé par org-mode, ce qui nécessite un peu de code Python pour transformer les données dans le bon format. +
+[('Date', 'Belgique', 'Hong-Kong', 'France', 'Allemagne', 'Iran', 'Italie', 'Japon', 'Corée du Sud', 'Hollande', 'Portugal', 'Espagne', 'Royaume-Unis', 'Etats-Unis', 'Chine'), None] + [(str(en_tete), belgique, hong_kong, france_metropolitaine, allemagne, iran, italie, japon, coree_du_sud, hollande, portugal, espagne, royaume_unis, etats_unis, sum_chine) for en_tete, belgique, hong_kong, france_metropolitaine, allemagne, iran, italie, japon, coree_du_sud, hollande, portugal, espagne, royaume_unis, etats_unis, sum_chine in donnees] ++
+En R, les données arrivent sous forme d'un data frame, mais il faut encore convertir les dates, qui arrivent comme chaînes de caractères. +
+date_evt_bis=as.Date(donnees$Date, format="%m/%d/%Y")
+
+2 Analyse
++On peut maintenant faire le graphe du nombre de mort en échelle linéaire. +
+plot(date_evt_bis, donnees$Belgique, type ='l', col ='brown', lwd =2, + ylab='Morts cumulées', + xlab='Date', + ylim=c(0,2500000)) +lines(date_evt_bis, donnees$Hong.Kong, type ='l', col ='blue', lwd =2) +lines(date_evt_bis, donnees$France, type ='l', col ='aquamarine3', lwd =2) +lines(date_evt_bis, donnees$Allemagne, type ='l', col ='darkorange', lwd =2) +lines(date_evt_bis, donnees$Iran, type ='l', col ='gray', lwd =2) +lines(date_evt_bis, donnees$Italie, type ='l', col ='darkgreen', lwd =2) +lines(date_evt_bis, donnees$Japon, type ='l', col ='darkblue', lwd =2) +lines(date_evt_bis, donnees$Corée.du.Sud, type ='l', col ='deeppink', lwd =2) +lines(date_evt_bis, donnees$Hollande, type ='l', col ='darkviolet', lwd =2) +lines(date_evt_bis, donnees$Portugal, type ='l', col='green', lwd =2) +lines(date_evt_bis, donnees$Espagne, type ='l', col ='red', lwd =2) +lines(date_evt_bis, donnees$Royaume.Unis, type ='l',col ='darkred', lwd =2) +lines(date_evt_bis, donnees$Etats.Unis, type ='l', col='yellow2', lwd =2) +lines(date_evt_bis, donnees$Chine, type ='l', col='black', lwd =2) +legend('topleft', inset=0.05 , legend =c('Hong-Kong', 'France', 'Allemagne', 'Iran', 'Italie', 'Japon', 'Corée du Sud', 'Hollande', 'Portugal', 'Espagne', 'Royaume-Uni', 'Etats-Unis', 'Chine'), col=c('blue', 'aquamarine3', 'darkorange', 'gray', 'darkgreen', 'darkblue', 'deeppink', 'darkviolet', 'green', 'red', 'darkred', 'yellow2', 'black'), lty=1, lwd=2) ++
+
+Puis en échelle logarithmique +
+plot(date_evt_bis, donnees$Belgique, type ='l', col ='brown', lwd =2, + ylab='Morts cumulées', + xlab='Date', + log='y', + ylim=c(1,2500000)) +lines(date_evt_bis, donnees$Hong.Kong, type ='l', col ='blue', lwd =2) +lines(date_evt_bis, donnees$France, type ='l', col ='aquamarine3', lwd =2) +lines(date_evt_bis, donnees$Allemagne, type ='l', col ='darkorange', lwd =2) +lines(date_evt_bis, donnees$Iran, type ='l', col ='gray', lwd =2) +lines(date_evt_bis, donnees$Italie, type ='l', col ='darkgreen', lwd =2) +lines(date_evt_bis, donnees$Japon, type ='l', col ='darkblue', lwd =2) +lines(date_evt_bis, donnees$Corée.du.Sud, type ='l', col ='deeppink', lwd =2) +lines(date_evt_bis, donnees$Hollande, type ='l', col ='darkviolet', lwd =2) +lines(date_evt_bis, donnees$Portugal, type ='l', col='green', lwd =2) +lines(date_evt_bis, donnees$Espagne, type ='l', col ='red', lwd =2) +lines(date_evt_bis, donnees$Royaume.Unis, type ='l',col ='darkred', lwd =2) +lines(date_evt_bis, donnees$Etats.Unis, type ='l', col='yellow2', lwd =2) +lines(date_evt_bis, donnees$Chine, type ='l', col='black', lwd =2) ++
+