Commit b918ae3d authored by Samuel MEYNARD's avatar Samuel MEYNARD

Ajout etude stackoverflow

parent 19885907
...@@ -36,7 +36,8 @@ _Votre mission si vous l'acceptez :_ ...@@ -36,7 +36,8 @@ _Votre mission si vous l'acceptez :_
5. Répétez les étapes précédentes avec le second jeu de données (stackoverflow) 5. Répétez les étapes précédentes avec le second jeu de données (stackoverflow)
6. Déposer dans FUN votre résultat 6. Déposer dans FUN votre résultat
* Récupération du 1^er jeu de donnée * Liglab2
** Récupération du 1^er jeu de donnée
** Téléchargement ** Téléchargement
#+BEGIN_SRC python :session :file step1.txt :results file #+BEGIN_SRC python :session :file step1.txt :results file
from urllib.request import urlretrieve from urllib.request import urlretrieve
...@@ -75,20 +76,19 @@ cleantable[:4] ...@@ -75,20 +76,19 @@ cleantable[:4]
| [1421761682.502054] | 262 | bytes | from | lig-publig.imag.fr | (129.88.11.7): | icmp_seq=1 | ttl=60 | time=21.2 | ms | | [1421761682.502054] | 262 | bytes | from | lig-publig.imag.fr | (129.88.11.7): | icmp_seq=1 | ttl=60 | time=21.2 | ms |
| [1421761682.729257] | 1107 | bytes | from | lig-publig.imag.fr | (129.88.11.7): | icmp_seq=1 | ttl=60 | time=23.3 | ms | | [1421761682.729257] | 1107 | bytes | from | lig-publig.imag.fr | (129.88.11.7): | icmp_seq=1 | ttl=60 | time=23.3 | ms |
#+BEGIN_SRC python :session :results output replace #+BEGIN_SRC python :session :results replace
from datetime import datetime from datetime import datetime
date = [datetime.utcfromtimestamp(float(row[0][1:-1])) for row in cleantable] date = [datetime.utcfromtimestamp(float(row[0][1:-1])) for row in cleantable]
donnee = [int(row[1]) for row in cleantable] S = [int(row[1]) for row in cleantable]
source = [str(row[4]) for row in cleantable] source = [str(row[4]) for row in cleantable]
ip = [str(row[5][1:-1]) for row in cleantable] ip = [str(row[5][1:-1]) for row in cleantable]
ltime = [float(row[8].split('=')[1]) for row in cleantable] T = [float(row[8].split('=')[1]) for row in cleantable]
dataset = list(zip(date,donnee, ltime)) dataset = list(zip(date,donnee, ltime))
print(dataset[:4]) T[:10]
#+END_SRC #+END_SRC
#+RESULTS: #+RESULTS:
: [(datetime.datetime(2015, 1, 20, 13, 48, 2, 52172), 665, 22.5), (datetime.datetime(2015, 1, 20, 13, 48, 2, 277315), 1373, 21.2), (datetime.datetime(2015, 1, 20, 13, 48, 2, 502054), 262, 21.2), (datetime.datetime(2015, 1, 20, 13, 48, 2, 729257), 1107, 23.3)] | 22.5 | 21.2 | 21.2 | 23.3 | 1.41 | 21.9 | 78.7 | 25.1 | 24.0 | 19.5 |
#+BEGIN_SRC python :session :results silent :file test.png #+BEGIN_SRC python :session :results silent :file test.png
import matplotlib import matplotlib
...@@ -98,20 +98,275 @@ fig, ax = plt.subplots(figsize=(12, 12)) ...@@ -98,20 +98,275 @@ fig, ax = plt.subplots(figsize=(12, 12))
# Add x-axis and y-axis # Add x-axis and y-axis
ax.scatter(date, ax.scatter(date,
ltime, T,
color='purple') color='purple')
# Set title and labels for axes # Set title and labels for axes
ax.set(xlabel="Date", ax.set(xlabel="Date",
ylabel="Latence", ylabel="Temps de transmission",
title="Evolution de la latence dans le temps") title="Evolution de la temps de transmission dans le temps")
plt.savefig('evol_temps_transmission_dans_le_temps.png')
#+END_SRC
#+RESULTS:
: None
Il ne semble pas avoir d'impact au travers le temps
** Evolution du temps de transmission à travers le temps
#+BEGIN_SRC python :session :results silent :file test2.png
import matplotlib
import matplotlib.pyplot as plt
# Create figure and plot space
fig, ax = plt.subplots(figsize=(12, 12))
# Add x-axis and y-axis
ax.scatter(S,
T,
color='purple')
# Set title and labels for axes
ax.set(xlabel="Taille des donnee",
ylabel="Temps de transmission",
title="Evolution du temps de transmission en fonction de la taille des données")
plt.savefig('evol_temps_transmission_en_fonction_de_la_taille.png')
#+END_SRC
Ici, on voit l'impact de la MTU ici certainement à 1500 sur le temps de transport
** Differenciation par rapport à la taille
*** Inférieur à la MTU
#+BEGIN_SRC python :session
table_l1500 = [row for row in cleantable if int(row[1]) <= 1485]
date_l1500 = [datetime.utcfromtimestamp(float(row[0][1:-1])) for row in table_l1500]
S_l1500 = [int(row[1]) for row in table_l1500]
T_l1500 = [float(row[8].split('=')[1]) for row in table_l1500]
dataset_l1500 = list(zip(date_l1500,S_l1500, T_l1500))
dataset_l1500[:10]
#+END_SRC
#+RESULTS:
| datetime.datetime | (2015 1 20 13 48 2 52172) | 665 | 22.5 |
| datetime.datetime | (2015 1 20 13 48 2 277315) | 1373 | 21.2 |
| datetime.datetime | (2015 1 20 13 48 2 502054) | 262 | 21.2 |
| datetime.datetime | (2015 1 20 13 48 2 729257) | 1107 | 23.3 |
| datetime.datetime | (2015 1 20 13 48 2 934648) | 1128 | 1.41 |
| datetime.datetime | (2015 1 20 13 48 3 160397) | 489 | 21.9 |
| datetime.datetime | (2015 1 20 13 48 3 672157) | 1146 | 25.1 |
| datetime.datetime | (2015 1 20 13 48 3 899933) | 884 | 24.0 |
| datetime.datetime | (2015 1 20 13 48 4 122687) | 1422 | 19.5 |
| datetime.datetime | (2015 1 20 13 48 4 344135) | 1180 | 18.0 |
#+BEGIN_SRC python :session :results silent :file test2.png
import matplotlib
import matplotlib.pyplot as plt
# Create figure and plot space
fig, ax = plt.subplots(figsize=(12, 12))
# Add x-axis and y-axis
ax.scatter(S_l1500,
T_l1500,
color='purple')
# Set title and labels for axes
ax.set(xlabel="Taille des donnee",
ylabel="Temps de transmission",
title="Evolution du temps de transmission en fonction de la taille des données")
plt.savefig('l1500_evol_T-f(S).png')
#+END_SRC
*** Supérieur à la MTU
Calcul d'un tableau avec les donnée supérieure à la MTU
#+BEGIN_SRC python :session
table_g1500 = [row for row in cleantable if int(row[1]) >= 1485]
date_g1500 = [datetime.utcfromtimestamp(float(row[0][1:-1])) for row in table_g1500]
S_g1500 = [int(row[1]) for row in table_g1500]
T_g1500 = [float(row[8].split('=')[1]) for row in table_g1500]
dataset_g1500 = list(zip(date_g1500,S_g1500, T_g1500))
dataset_g1500[:10]
#+END_SRC
#+RESULTS:
| datetime.datetime | (2015 1 20 13 48 3 443055) | 1759 | 78.7 |
| datetime.datetime | (2015 1 20 13 48 5 620117) | 1843 | 2.31 |
| datetime.datetime | (2015 1 20 13 48 6 234464) | 1511 | 2.18 |
| datetime.datetime | (2015 1 20 13 48 7 463275) | 1510 | 2.17 |
| datetime.datetime | (2015 1 20 13 48 7 874230) | 1966 | 2.2 |
| datetime.datetime | (2015 1 20 13 48 8 694652) | 1518 | 2.19 |
| datetime.datetime | (2015 1 20 13 48 10 335289) | 1732 | 2.29 |
| datetime.datetime | (2015 1 20 13 48 10 950126) | 1500 | 2.14 |
| datetime.datetime | (2015 1 20 13 48 11 359824) | 1520 | 2.1 |
| datetime.datetime | (2015 1 20 13 48 11 974735) | 1509 | 2.23 |
#+BEGIN_SRC python :session :results silent :file test2.png
import matplotlib
import matplotlib.pyplot as plt
# Create figure and plot space
fig, ax = plt.subplots(figsize=(12, 12))
# Add x-axis and y-axis
ax.scatter(S_g1500,
T_g1500,
color='purple')
# Set title and labels for axes
ax.set(xlabel="Taille des donnee",
ylabel="Temps de transmission",
title="Evolution du temps de transmission en fonction de la taille des données")
plt.savefig('g1500_evol_T-f(S).png')
#+END_SRC
** Régression linéaire
** Cas inférieur à la MTU
#+BEGIN_SRC python :session :results replace
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import dates
import numpy as np
from scipy import stats
import seaborn as sns
import statsmodels.api as sm
from sklearn import linear_model
@plt.FuncFormatter
def fake_dates(x, pos):
""" Custom formater to turn floats into e.g., 2016-05-08"""
return dates.num2date(x).strftime('%Y-%m-%d')
sns.set(color_codes=True)
df = pd.DataFrame({
'date': pd.to_datetime(date_l1500),
'datenum': dates.date2num(date_l1500),
'T': T_l1500,
'S': S_l1500})
fig, ax = plt.subplots()
sns.regplot(x="datenum", y="T", color='purple', data=df, ax=ax)
# here's the magic:
ax.xaxis.set_major_formatter(fake_dates)
# legible labels
ax.tick_params(labelrotation=30)
fig.savefig('l1500_reglineaireT-f(S).png')
#+END_SRC
#+RESULTS:
: None
#+BEGIN_SRC python :session
np.array(S_l1500).reshape(1, -1)[:9]
#+END_SRC
#+RESULTS:
| 1759 | 1843 | 1511 | ... | 1503 | 1515 | 1875 |
#+BEGIN_SRC python :session
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
S_tt = [[value] for value in S_l1500]
my_s = np.array(S_tt)
my_t = np.array(T_l1500)
#my_s = np.array([[1], [2], [3]])
lmodel = LinearRegression()
lmodel.fit(my_s, my_t)
f"Les coeff sont L = {lmodel.intercept_} et C = { 1 / lmodel.coef_}"
#+END_SRC
#+RESULTS:
: Les coeff sont L = 3.257592785874401 et C = [2761.3155395]
** Cas supérieur à la MTU
#+BEGIN_SRC python :session
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
S_tt = [[value] for value in S_g1500]
my_s = np.array(S_tt)
my_t = np.array(T_g1500)
#my_s = np.array([[1], [2], [3]])
lmodel = LinearRegression()
lmodel.fit(my_s, my_t)
f"Les coeff sont L = {lmodel.intercept_} et C = { 1 / lmodel.coef_}"
plt.savefig('test.png') #+END_SRC
#+RESULTS:
: Les coeff sont L = 5.867233082184833 et C = [441.71908009]
* Stackoverflow
** Récupération du jeu de donnée
*** Téléchargement
#+BEGIN_SRC python :session :file step1.txt :results replace
from urllib.request import urlretrieve
from os import path
stacko_file = "stackoverflow.log"
stacko_filegz = stacko_file + ".gz"
url = "http://mescal.imag.fr/membres/arnaud.legrand/teaching/2014/RICM4_EP_ping/stackoverflow.log.gz"
if not path.exists(stacko_file):
urlretrieve(url, stacko_filegz)
#+END_SRC #+END_SRC
#+RESULTS: #+RESULTS:
: None : None
* Evolution du temps de transmission à travers le temps *** Lecture du fichier
#+BEGIN_SRC python :session :results output
import gzip
f = gzip.open(stacko_filegz)
data = f.read().decode('latin-1').strip().splitlines()
f.close()
#+END_SRC
#+RESULTS:
#+BEGIN_SRC python :session :results replace
table = [row.split(' ') for row in data]
cleantable = []
for row in table:
if len(row) == 10:
cleantable.append(row)
cleantable[:4]
#+END_SRC
#+RESULTS:
| [1421771203.082701] | 1257 | bytes | from | stackoverflow.com | (198.252.206.140): | icmp_seq=1 | ttl=50 | time=120 | ms |
| [1421771203.408254] | 454 | bytes | from | stackoverflow.com | (198.252.206.140): | icmp_seq=1 | ttl=50 | time=120 | ms |
| [1421771203.739730] | 775 | bytes | from | stackoverflow.com | (198.252.206.140): | icmp_seq=1 | ttl=50 | time=126 | ms |
| [1421771204.056630] | 1334 | bytes | from | stackoverflow.com | (198.252.206.140): | icmp_seq=1 | ttl=50 | time=112 | ms |
#+BEGIN_SRC python :session :results replace
from datetime import datetime
date = [datetime.utcfromtimestamp(float(row[0][1:-1])) for row in cleantable]
S = [int(row[1]) for row in cleantable]
source = [str(row[4]) for row in cleantable]
ip = [str(row[5][1:-1]) for row in cleantable]
T = [float(row[8].split('=')[1]) for row in cleantable]
dataset = list(zip(date,donnee, ltime))
T[:10]
#+END_SRC
#+RESULTS:
| 120.0 | 120.0 | 126.0 | 112.0 | 111.0 | 111.0 | 112.0 | 111.0 | 111.0 | 111.0 |
#+BEGIN_SRC python :session :results silent :file test.png
import matplotlib
import matplotlib.pyplot as plt
# Create figure and plot space
fig, ax = plt.subplots(figsize=(12, 12))
# Add x-axis and y-axis
ax.scatter(date,
T,
color='purple')
# Set title and labels for axes
ax.set(xlabel="Date",
ylabel="Temps de transmission",
title="Evolution de la temps de transmission dans le temps")
plt.savefig('stacko_evol_temps_transmission_dans_le_temps.png')
#+END_SRC
#+RESULTS:
: None
Il ne semble pas avoir d'impact au travers le temps
** Evolution du temps de transmission à travers le temps
#+BEGIN_SRC python :session :results silent :file test2.png #+BEGIN_SRC python :session :results silent :file test2.png
import matplotlib import matplotlib
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
...@@ -119,16 +374,173 @@ import matplotlib.pyplot as plt ...@@ -119,16 +374,173 @@ import matplotlib.pyplot as plt
fig, ax = plt.subplots(figsize=(12, 12)) fig, ax = plt.subplots(figsize=(12, 12))
# Add x-axis and y-axis # Add x-axis and y-axis
ax.scatter(donnee, ax.scatter(S,
ltime, T,
color='purple') color='purple')
# Set title and labels for axes # Set title and labels for axes
ax.set(xlabel="Taille des donnee", ax.set(xlabel="Taille des donnee",
ylabel="Latence", ylabel="Temps de transmission",
title="Evolution de la latence dans le temps") title="Evolution du temps de transmission en fonction de la taille des données")
plt.savefig('test2.png') plt.savefig('stacko_evol_temps_transmission_en_fonction_de_la_taille.png')
#+END_SRC #+END_SRC
Ici, on voit l'impact de la MTU ici certainement à 1500 sur le temps de transport Ici, on voit l'impact de la MTU ici certainement à 1500 sur le temps de transport
*** Differenciation par rapport à la taille
**** Inférieur à la MTU
#+BEGIN_SRC python :session
table_l1500 = [row for row in cleantable if int(row[1]) <= 1485]
date_l1500 = [datetime.utcfromtimestamp(float(row[0][1:-1])) for row in table_l1500]
S_l1500 = [int(row[1]) for row in table_l1500]
T_l1500 = [float(row[8].split('=')[1]) for row in table_l1500]
dataset_l1500 = list(zip(date_l1500,S_l1500, T_l1500))
dataset_l1500[:10]
#+END_SRC
#+RESULTS:
| datetime.datetime | (2015 1 20 16 26 43 82701) | 1257 | 120.0 |
| datetime.datetime | (2015 1 20 16 26 43 408254) | 454 | 120.0 |
| datetime.datetime | (2015 1 20 16 26 43 739730) | 775 | 126.0 |
| datetime.datetime | (2015 1 20 16 26 44 56630) | 1334 | 112.0 |
| datetime.datetime | (2015 1 20 16 26 44 372224) | 83 | 111.0 |
| datetime.datetime | (2015 1 20 16 26 44 688367) | 694 | 111.0 |
| datetime.datetime | (2015 1 20 16 26 45 321112) | 632 | 111.0 |
| datetime.datetime | (2015 1 20 16 26 45 637464) | 405 | 111.0 |
| datetime.datetime | (2015 1 20 16 26 45 953472) | 1419 | 111.0 |
| datetime.datetime | (2015 1 20 16 26 46 269163) | 329 | 111.0 |
#+BEGIN_SRC python :session :results silent :file test2.png
import matplotlib
import matplotlib.pyplot as plt
# Create figure and plot space
fig, ax = plt.subplots(figsize=(12, 12))
# Add x-axis and y-axis
ax.scatter(S_l1500,
T_l1500,
color='purple')
# Set title and labels for axes
ax.set(xlabel="Taille des donnee",
ylabel="Temps de transmission",
title="Evolution du temps de transmission en fonction de la taille des données")
plt.savefig('stacko_l1500_evol_T-f(S).png')
#+END_SRC
**** Supérieur à la MTU
Calcul d'un tableau avec les donnée supérieure à la MTU
#+BEGIN_SRC python :session
table_g1500 = [row for row in cleantable if int(row[1]) >= 1485]
date_g1500 = [datetime.utcfromtimestamp(float(row[0][1:-1])) for row in table_g1500]
S_g1500 = [int(row[1]) for row in table_g1500]
T_g1500 = [float(row[8].split('=')[1]) for row in table_g1500]
dataset_g1500 = list(zip(date_g1500,S_g1500, T_g1500))
dataset_g1500[:10]
#+END_SRC
#+RESULTS:
| datetime.datetime | (2015 1 20 16 26 45 5514) | 1577 | 112.0 |
| datetime.datetime | (2015 1 20 16 26 46 901972) | 1714 | 112.0 |
| datetime.datetime | (2015 1 20 16 26 47 851148) | 1598 | 112.0 |
| datetime.datetime | (2015 1 20 16 26 52 272504) | 1619 | 112.0 |
| datetime.datetime | (2015 1 20 16 26 55 749652) | 1655 | 112.0 |
| datetime.datetime | (2015 1 20 16 26 56 66885) | 1556 | 112.0 |
| datetime.datetime | (2015 1 20 16 26 57 648057) | 1839 | 112.0 |
| datetime.datetime | (2015 1 20 16 26 58 280820) | 1572 | 112.0 |
| datetime.datetime | (2015 1 20 16 27 1 133246) | 1491 | 120.0 |
| datetime.datetime | (2015 1 20 16 27 1 765499) | 1978 | 112.0 |
#+BEGIN_SRC python :session :results silent :file test2.png
import matplotlib
import matplotlib.pyplot as plt
# Create figure and plot space
fig, ax = plt.subplots(figsize=(12, 12))
# Add x-axis and y-axis
ax.scatter(S_g1500,
T_g1500,
color='purple')
# Set title and labels for axes
ax.set(xlabel="Taille des donnee",
ylabel="Temps de transmission",
title="Evolution du temps de transmission en fonction de la taille des données")
plt.savefig('stacko_g1500_evol_T-f(S).png')
#+END_SRC
** Régression linéaire
*** Cas inférieur à la MTU
#+BEGIN_SRC python :session :results replace
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import dates
import numpy as np
from scipy import stats
import seaborn as sns
import statsmodels.api as sm
from sklearn import linear_model
@plt.FuncFormatter
def fake_dates(x, pos):
""" Custom formater to turn floats into e.g., 2016-05-08"""
return dates.num2date(x).strftime('%Y-%m-%d')
sns.set(color_codes=True)
df = pd.DataFrame({
'date': pd.to_datetime(date_l1500),
'datenum': dates.date2num(date_l1500),
'T': T_l1500,
'S': S_l1500})
fig, ax = plt.subplots()
sns.regplot(x="datenum", y="T", color='purple', data=df, ax=ax)
# here's the magic:
ax.xaxis.set_major_formatter(fake_dates)
# legible labels
ax.tick_params(labelrotation=30)
fig.savefig('stacko_l1500_reglineaireT-f(S).png')
#+END_SRC
#+RESULTS:
: None
#+BEGIN_SRC python :session
np.array(S_l1500).reshape(1, -1)[:9]
#+END_SRC
#+RESULTS:
| 1759 | 1843 | 1511 | ... | 1503 | 1515 | 1875 |
#+BEGIN_SRC python :session
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
S_tt = [[value] for value in S_l1500]
my_s = np.array(S_tt)
my_t = np.array(T_l1500)
#my_s = np.array([[1], [2], [3]])
lmodel = LinearRegression()
lmodel.fit(my_s, my_t)
f"Les coeff sont L = {lmodel.intercept_} et C = { 1 / lmodel.coef_}"
#+END_SRC
#+RESULTS:
: Les coeff sont L = 3.257592785874401 et C = [2761.3155395]
*** Cas supérieur à la MTU
#+BEGIN_SRC python :session
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
S_tt = [[value] for value in S_g1500]
my_s = np.array(S_tt)
my_t = np.array(T_g1500)
#my_s = np.array([[1], [2], [3]])
lmodel = LinearRegression()
lmodel.fit(my_s, my_t)
f"Les coeff sont L = {lmodel.intercept_} et C = { 1 / lmodel.coef_}"
#+END_SRC
#+RESULTS:
: Les coeff sont L = 5.867233082184833 et C = [441.71908009]
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment