# Analyse statistique des données liées à l'épidemie de COVID 19

In [120]:
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import isoweek
import os
import urllib

Dans ce fichier, on s'intéresse à l'évolution du nombres de cas de COVID 19 dans les pays suivants :
    - la Belgique (Belgium),
    - la Chine -sans Hong-Kong (China),
    - Hong Kong (China, Hong-Kong),
    - France métropolitaine (France),
    - Allemagne (Germany),
    - Iran (Iran), l’Italie (Italy), 
    - Japon (Japan),
    - Corée du Sud (Korea, South), -
    - Pays-Bas métropolitain (Netherlands), 
    - Portugal (Portugal),
    - Espagne (Spain), 
    - Royaume-Unis métropolitain (United Kingdom), 
    - États-Unis (US).

In [158]:
data_url = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv"

In [159]:
path_to_file = "../Sujet7_StatCOVID19/DonneeBrute"
csv_file = "donnee_JohnHopkins.csv"
if os.path.exists(os.path.join(path_to_file,csv_file)):
    raw_data = pd.read_csv(os.path.join(path_to_file,csv_file), skiprows=0)
else: 
    if not(os.path.isdir(path_to_file)):
        os.mkdir(path_to_file)
    urllib.request.urlretrieve(data_url, os.path.join(path_to_file,csv_file))
    raw_data = pd.read_csv(os.path.join(path_to_file,csv_file), skiprows=0)
    
raw_data

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,11/30/20,12/1/20,12/2/20,12/3/20,12/4/20,12/5/20,12/6/20,12/7/20,12/8/20,12/9/20
0,,Afghanistan,33.939110,67.709953,0,0,0,0,0,0,...,46274,46516,46718,46837,46837,47072,47306,47516,47716,47851
1,,Albania,41.153300,20.168300,0,0,0,0,0,0,...,38182,39014,39719,40501,41302,42148,42988,43683,44436,45188
2,,Algeria,28.033900,1.659600,0,0,0,0,0,0,...,83199,84152,85084,85927,86730,87502,88252,88825,89416,90014
3,,Andorra,42.506300,1.521800,0,0,0,0,0,0,...,6745,6790,6842,6904,6955,7005,7050,7084,7127,7162
4,,Angola,-11.202700,17.873900,0,0,0,0,0,0,...,15139,15251,15319,15361,15493,15536,15591,15648,15729,15804
5,,Antigua and Barbuda,17.060800,-61.796400,0,0,0,0,0,0,...,141,142,144,144,144,144,144,146,146,146
6,,Argentina,-38.416100,-63.616700,0,0,0,0,0,0,...,1424533,1432570,1440103,1447732,1454631,1459832,1463110,1466309,1469919,1475222
7,,Armenia,40.069100,45.038200,0,0,0,0,0,0,...,135124,135967,137231,138508,139692,140959,141937,142344,142928,144066
8,Australian Capital Territory,Australia,-35.473500,149.012400,0,0,0,0,0,0,...,117,117,117,117,117,117,117,117,117,117
9,New South Wales,Australia,-33.868800,151.209300,0,0,0,0,3,4,...,4582,4588,4597,4603,4605,4610,4614,4620,4622,4624


## Selection des données des pays d'intérêt

On crée une liste des pays qui nous intéresse.

In [160]:
country_list = ['Belgium','China','France','Germany','Iran','Italy','Japan','Korea, South','Netherlands','Portugal','Spain','United Kingdom','US']
data = raw_data[raw_data['Country/Region'].isin(country_list)]

In [161]:
data

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,11/30/20,12/1/20,12/2/20,12/3/20,12/4/20,12/5/20,12/6/20,12/7/20,12/8/20,12/9/20
23,,Belgium,50.833300,4.469936,0,0,0,0,0,0,...,577345,579212,582252,584857,587439,589942,591756,592615,594572,597643
58,Anhui,China,31.825700,117.226400,1,9,15,39,60,70,...,992,992,992,992,992,992,992,992,992,992
59,Beijing,China,40.182400,116.414200,14,22,36,41,68,80,...,951,951,951,951,952,952,952,952,952,952
60,Chongqing,China,30.057200,107.874000,6,9,27,57,75,110,...,590,590,590,590,590,590,590,590,590,590
61,Fujian,China,26.078900,117.987400,1,5,10,18,35,59,...,489,490,490,492,493,499,500,500,500,501
62,Gansu,China,35.751800,104.286100,0,2,2,4,7,14,...,182,182,182,182,182,182,182,182,182,182
63,Guangdong,China,23.341700,113.424400,26,32,53,78,111,151,...,1989,1992,1996,1997,2000,2002,2004,2007,2009,2010
64,Guangxi,China,23.829800,108.788100,2,5,23,23,36,46,...,263,263,263,263,263,263,263,263,264,264
65,Guizhou,China,26.815400,106.874800,1,3,3,4,5,7,...,147,147,147,147,147,147,147,147,147,147
66,Hainan,China,19.195900,109.745300,4,5,8,19,22,33,...,171,171,171,171,171,171,171,171,171,171


Prise en compte des cas particuliers (Hong Kong, France, Pays-Bas et Royaume-Unis)

In [162]:
data = data[data['Province/State'].isna() | data['Country/Region'].isin(['China'])] #Suppression des "colonnies" 
hkdata = data[data['Province/State'].isin(['Hong Kong']) & data['Country/Region'].isin(['China'])] # récupération des données de Hong Kong
hkdata = hkdata.replace(['China','Hong Kong'],['China, Hong Kong', 'NaN']) # modification des champs pour coincider avec le SCMP
data = data[~data['Province/State'].isin(['Hong Kong'])] # suppression de l'ancienne ligne HK
data = data.append(hkdata) # ajout de la ligne modifiée
data.sort_index().copy() # tri des lignes

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,11/30/20,12/1/20,12/2/20,12/3/20,12/4/20,12/5/20,12/6/20,12/7/20,12/8/20,12/9/20
23,,Belgium,50.8333,4.469936,0,0,0,0,0,0,...,577345,579212,582252,584857,587439,589942,591756,592615,594572,597643
58,Anhui,China,31.8257,117.2264,1,9,15,39,60,70,...,992,992,992,992,992,992,992,992,992,992
59,Beijing,China,40.1824,116.4142,14,22,36,41,68,80,...,951,951,951,951,952,952,952,952,952,952
60,Chongqing,China,30.0572,107.874,6,9,27,57,75,110,...,590,590,590,590,590,590,590,590,590,590
61,Fujian,China,26.0789,117.9874,1,5,10,18,35,59,...,489,490,490,492,493,499,500,500,500,501
62,Gansu,China,35.7518,104.2861,0,2,2,4,7,14,...,182,182,182,182,182,182,182,182,182,182
63,Guangdong,China,23.3417,113.4244,26,32,53,78,111,151,...,1989,1992,1996,1997,2000,2002,2004,2007,2009,2010
64,Guangxi,China,23.8298,108.7881,2,5,23,23,36,46,...,263,263,263,263,263,263,263,263,264,264
65,Guizhou,China,26.8154,106.8748,1,3,3,4,5,7,...,147,147,147,147,147,147,147,147,147,147
66,Hainan,China,19.1959,109.7453,4,5,8,19,22,33,...,171,171,171,171,171,171,171,171,171,171
