Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
M
mooc-rr
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
006b4500d188491bec31dcfa180916c2
mooc-rr
Commits
b918ae3d
Commit
b918ae3d
authored
Jun 07, 2020
by
Samuel MEYNARD
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Ajout etude stackoverflow
parent
19885907
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
429 additions
and
17 deletions
+429
-17
exercice_python_fr.org
module3/exo3/exercice_python_fr.org
+429
-17
No files found.
module3/exo3/exercice_python_fr.org
View file @
b918ae3d
...
@@ -36,7 +36,8 @@ _Votre mission si vous l'acceptez :_
...
@@ -36,7 +36,8 @@ _Votre mission si vous l'acceptez :_
5. Répétez les étapes précédentes avec le second jeu de données (stackoverflow)
5. Répétez les étapes précédentes avec le second jeu de données (stackoverflow)
6. Déposer dans FUN votre résultat
6. Déposer dans FUN votre résultat
* Récupération du 1^er jeu de donnée
* Liglab2
** Récupération du 1^er jeu de donnée
** Téléchargement
** Téléchargement
#+BEGIN_SRC python :session :file step1.txt :results file
#+BEGIN_SRC python :session :file step1.txt :results file
from urllib.request import urlretrieve
from urllib.request import urlretrieve
...
@@ -75,20 +76,19 @@ cleantable[:4]
...
@@ -75,20 +76,19 @@ cleantable[:4]
| [1421761682.502054] | 262 | bytes | from | lig-publig.imag.fr | (129.88.11.7): | icmp_seq=1 | ttl=60 | time=21.2 | ms |
| [1421761682.502054] | 262 | bytes | from | lig-publig.imag.fr | (129.88.11.7): | icmp_seq=1 | ttl=60 | time=21.2 | ms |
| [1421761682.729257] | 1107 | bytes | from | lig-publig.imag.fr | (129.88.11.7): | icmp_seq=1 | ttl=60 | time=23.3 | ms |
| [1421761682.729257] | 1107 | bytes | from | lig-publig.imag.fr | (129.88.11.7): | icmp_seq=1 | ttl=60 | time=23.3 | ms |
#+BEGIN_SRC python :session :results
output
replace
#+BEGIN_SRC python :session :results replace
from datetime import datetime
from datetime import datetime
date = [datetime.utcfromtimestamp(float(row[0][1:-1])) for row in cleantable]
date = [datetime.utcfromtimestamp(float(row[0][1:-1])) for row in cleantable]
donnee
= [int(row[1]) for row in cleantable]
S
= [int(row[1]) for row in cleantable]
source = [str(row[4]) for row in cleantable]
source = [str(row[4]) for row in cleantable]
ip = [str(row[5][1:-1]) for row in cleantable]
ip = [str(row[5][1:-1]) for row in cleantable]
ltime = [float(row[8].split('=')[1]) for row in cleantable]
T = [float(row[8].split('=')[1]) for row in cleantable]
dataset = list(zip(date,donnee, ltime))
dataset = list(zip(date,donnee, ltime))
print(dataset[:4])
T[:10]
#+END_SRC
#+END_SRC
#+RESULTS:
#+RESULTS:
: [(datetime.datetime(2015, 1, 20, 13, 48, 2, 52172), 665, 22.5), (datetime.datetime(2015, 1, 20, 13, 48, 2, 277315), 1373, 21.2), (datetime.datetime(2015, 1, 20, 13, 48, 2, 502054), 262, 21.2), (datetime.datetime(2015, 1, 20, 13, 48, 2, 729257), 1107, 23.3)]
| 22.5 | 21.2 | 21.2 | 23.3 | 1.41 | 21.9 | 78.7 | 25.1 | 24.0 | 19.5 |
#+BEGIN_SRC python :session :results silent :file test.png
#+BEGIN_SRC python :session :results silent :file test.png
import matplotlib
import matplotlib
...
@@ -98,20 +98,275 @@ fig, ax = plt.subplots(figsize=(12, 12))
...
@@ -98,20 +98,275 @@ fig, ax = plt.subplots(figsize=(12, 12))
# Add x-axis and y-axis
# Add x-axis and y-axis
ax.scatter(date,
ax.scatter(date,
ltime
,
T
,
color='purple')
color='purple')
# Set title and labels for axes
# Set title and labels for axes
ax.set(xlabel="Date",
ax.set(xlabel="Date",
ylabel="Latence",
ylabel="Temps de transmission",
title="Evolution de la latence dans le temps")
title="Evolution de la temps de transmission dans le temps")
plt.savefig('evol_temps_transmission_dans_le_temps.png')
#+END_SRC
#+RESULTS:
: None
Il ne semble pas avoir d'impact au travers le temps
** Evolution du temps de transmission à travers le temps
#+BEGIN_SRC python :session :results silent :file test2.png
import matplotlib
import matplotlib.pyplot as plt
# Create figure and plot space
fig, ax = plt.subplots(figsize=(12, 12))
# Add x-axis and y-axis
ax.scatter(S,
T,
color='purple')
# Set title and labels for axes
ax.set(xlabel="Taille des donnee",
ylabel="Temps de transmission",
title="Evolution du temps de transmission en fonction de la taille des données")
plt.savefig('evol_temps_transmission_en_fonction_de_la_taille.png')
#+END_SRC
Ici, on voit l'impact de la MTU ici certainement à 1500 sur le temps de transport
** Differenciation par rapport à la taille
*** Inférieur à la MTU
#+BEGIN_SRC python :session
table_l1500 = [row for row in cleantable if int(row[1]) <= 1485]
date_l1500 = [datetime.utcfromtimestamp(float(row[0][1:-1])) for row in table_l1500]
S_l1500 = [int(row[1]) for row in table_l1500]
T_l1500 = [float(row[8].split('=')[1]) for row in table_l1500]
dataset_l1500 = list(zip(date_l1500,S_l1500, T_l1500))
dataset_l1500[:10]
#+END_SRC
#+RESULTS:
| datetime.datetime | (2015 1 20 13 48 2 52172) | 665 | 22.5 |
| datetime.datetime | (2015 1 20 13 48 2 277315) | 1373 | 21.2 |
| datetime.datetime | (2015 1 20 13 48 2 502054) | 262 | 21.2 |
| datetime.datetime | (2015 1 20 13 48 2 729257) | 1107 | 23.3 |
| datetime.datetime | (2015 1 20 13 48 2 934648) | 1128 | 1.41 |
| datetime.datetime | (2015 1 20 13 48 3 160397) | 489 | 21.9 |
| datetime.datetime | (2015 1 20 13 48 3 672157) | 1146 | 25.1 |
| datetime.datetime | (2015 1 20 13 48 3 899933) | 884 | 24.0 |
| datetime.datetime | (2015 1 20 13 48 4 122687) | 1422 | 19.5 |
| datetime.datetime | (2015 1 20 13 48 4 344135) | 1180 | 18.0 |
#+BEGIN_SRC python :session :results silent :file test2.png
import matplotlib
import matplotlib.pyplot as plt
# Create figure and plot space
fig, ax = plt.subplots(figsize=(12, 12))
# Add x-axis and y-axis
ax.scatter(S_l1500,
T_l1500,
color='purple')
# Set title and labels for axes
ax.set(xlabel="Taille des donnee",
ylabel="Temps de transmission",
title="Evolution du temps de transmission en fonction de la taille des données")
plt.savefig('l1500_evol_T-f(S).png')
#+END_SRC
*** Supérieur à la MTU
Calcul d'un tableau avec les donnée supérieure à la MTU
#+BEGIN_SRC python :session
table_g1500 = [row for row in cleantable if int(row[1]) >= 1485]
date_g1500 = [datetime.utcfromtimestamp(float(row[0][1:-1])) for row in table_g1500]
S_g1500 = [int(row[1]) for row in table_g1500]
T_g1500 = [float(row[8].split('=')[1]) for row in table_g1500]
dataset_g1500 = list(zip(date_g1500,S_g1500, T_g1500))
dataset_g1500[:10]
#+END_SRC
#+RESULTS:
| datetime.datetime | (2015 1 20 13 48 3 443055) | 1759 | 78.7 |
| datetime.datetime | (2015 1 20 13 48 5 620117) | 1843 | 2.31 |
| datetime.datetime | (2015 1 20 13 48 6 234464) | 1511 | 2.18 |
| datetime.datetime | (2015 1 20 13 48 7 463275) | 1510 | 2.17 |
| datetime.datetime | (2015 1 20 13 48 7 874230) | 1966 | 2.2 |
| datetime.datetime | (2015 1 20 13 48 8 694652) | 1518 | 2.19 |
| datetime.datetime | (2015 1 20 13 48 10 335289) | 1732 | 2.29 |
| datetime.datetime | (2015 1 20 13 48 10 950126) | 1500 | 2.14 |
| datetime.datetime | (2015 1 20 13 48 11 359824) | 1520 | 2.1 |
| datetime.datetime | (2015 1 20 13 48 11 974735) | 1509 | 2.23 |
#+BEGIN_SRC python :session :results silent :file test2.png
import matplotlib
import matplotlib.pyplot as plt
# Create figure and plot space
fig, ax = plt.subplots(figsize=(12, 12))
# Add x-axis and y-axis
ax.scatter(S_g1500,
T_g1500,
color='purple')
# Set title and labels for axes
ax.set(xlabel="Taille des donnee",
ylabel="Temps de transmission",
title="Evolution du temps de transmission en fonction de la taille des données")
plt.savefig('g1500_evol_T-f(S).png')
#+END_SRC
** Régression linéaire
** Cas inférieur à la MTU
#+BEGIN_SRC python :session :results replace
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import dates
import numpy as np
from scipy import stats
import seaborn as sns
import statsmodels.api as sm
from sklearn import linear_model
@plt.FuncFormatter
def fake_dates(x, pos):
""" Custom formater to turn floats into e.g., 2016-05-08"""
return dates.num2date(x).strftime('%Y-%m-%d')
sns.set(color_codes=True)
df = pd.DataFrame({
'date': pd.to_datetime(date_l1500),
'datenum': dates.date2num(date_l1500),
'T': T_l1500,
'S': S_l1500})
fig, ax = plt.subplots()
sns.regplot(x="datenum", y="T", color='purple', data=df, ax=ax)
# here's the magic:
ax.xaxis.set_major_formatter(fake_dates)
# legible labels
ax.tick_params(labelrotation=30)
fig.savefig('l1500_reglineaireT-f(S).png')
#+END_SRC
#+RESULTS:
: None
#+BEGIN_SRC python :session
np.array(S_l1500).reshape(1, -1)[:9]
#+END_SRC
#+RESULTS:
| 1759 | 1843 | 1511 | ... | 1503 | 1515 | 1875 |
#+BEGIN_SRC python :session
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
S_tt = [[value] for value in S_l1500]
my_s = np.array(S_tt)
my_t = np.array(T_l1500)
#my_s = np.array([[1], [2], [3]])
lmodel = LinearRegression()
lmodel.fit(my_s, my_t)
f"Les coeff sont L = {lmodel.intercept_} et C = { 1 / lmodel.coef_}"
#+END_SRC
#+RESULTS:
: Les coeff sont L = 3.257592785874401 et C = [2761.3155395]
** Cas supérieur à la MTU
#+BEGIN_SRC python :session
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
S_tt = [[value] for value in S_g1500]
my_s = np.array(S_tt)
my_t = np.array(T_g1500)
#my_s = np.array([[1], [2], [3]])
lmodel = LinearRegression()
lmodel.fit(my_s, my_t)
f"Les coeff sont L = {lmodel.intercept_} et C = { 1 / lmodel.coef_}"
plt.savefig('test.png')
#+END_SRC
#+RESULTS:
: Les coeff sont L = 5.867233082184833 et C = [441.71908009]
* Stackoverflow
** Récupération du jeu de donnée
*** Téléchargement
#+BEGIN_SRC python :session :file step1.txt :results replace
from urllib.request import urlretrieve
from os import path
stacko_file = "stackoverflow.log"
stacko_filegz = stacko_file + ".gz"
url = "http://mescal.imag.fr/membres/arnaud.legrand/teaching/2014/RICM4_EP_ping/stackoverflow.log.gz"
if not path.exists(stacko_file):
urlretrieve(url, stacko_filegz)
#+END_SRC
#+END_SRC
#+RESULTS:
#+RESULTS:
: None
: None
* Evolution du temps de transmission à travers le temps
*** Lecture du fichier
#+BEGIN_SRC python :session :results output
import gzip
f = gzip.open(stacko_filegz)
data = f.read().decode('latin-1').strip().splitlines()
f.close()
#+END_SRC
#+RESULTS:
#+BEGIN_SRC python :session :results replace
table = [row.split(' ') for row in data]
cleantable = []
for row in table:
if len(row) == 10:
cleantable.append(row)
cleantable[:4]
#+END_SRC
#+RESULTS:
| [1421771203.082701] | 1257 | bytes | from | stackoverflow.com | (198.252.206.140): | icmp_seq=1 | ttl=50 | time=120 | ms |
| [1421771203.408254] | 454 | bytes | from | stackoverflow.com | (198.252.206.140): | icmp_seq=1 | ttl=50 | time=120 | ms |
| [1421771203.739730] | 775 | bytes | from | stackoverflow.com | (198.252.206.140): | icmp_seq=1 | ttl=50 | time=126 | ms |
| [1421771204.056630] | 1334 | bytes | from | stackoverflow.com | (198.252.206.140): | icmp_seq=1 | ttl=50 | time=112 | ms |
#+BEGIN_SRC python :session :results replace
from datetime import datetime
date = [datetime.utcfromtimestamp(float(row[0][1:-1])) for row in cleantable]
S = [int(row[1]) for row in cleantable]
source = [str(row[4]) for row in cleantable]
ip = [str(row[5][1:-1]) for row in cleantable]
T = [float(row[8].split('=')[1]) for row in cleantable]
dataset = list(zip(date,donnee, ltime))
T[:10]
#+END_SRC
#+RESULTS:
| 120.0 | 120.0 | 126.0 | 112.0 | 111.0 | 111.0 | 112.0 | 111.0 | 111.0 | 111.0 |
#+BEGIN_SRC python :session :results silent :file test.png
import matplotlib
import matplotlib.pyplot as plt
# Create figure and plot space
fig, ax = plt.subplots(figsize=(12, 12))
# Add x-axis and y-axis
ax.scatter(date,
T,
color='purple')
# Set title and labels for axes
ax.set(xlabel="Date",
ylabel="Temps de transmission",
title="Evolution de la temps de transmission dans le temps")
plt.savefig('stacko_evol_temps_transmission_dans_le_temps.png')
#+END_SRC
#+RESULTS:
: None
Il ne semble pas avoir d'impact au travers le temps
** Evolution du temps de transmission à travers le temps
#+BEGIN_SRC python :session :results silent :file test2.png
#+BEGIN_SRC python :session :results silent :file test2.png
import matplotlib
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.pyplot as plt
...
@@ -119,16 +374,173 @@ import matplotlib.pyplot as plt
...
@@ -119,16 +374,173 @@ import matplotlib.pyplot as plt
fig, ax = plt.subplots(figsize=(12, 12))
fig, ax = plt.subplots(figsize=(12, 12))
# Add x-axis and y-axis
# Add x-axis and y-axis
ax.scatter(
donnee
,
ax.scatter(
S
,
ltime
,
T
,
color='purple')
color='purple')
# Set title and labels for axes
# Set title and labels for axes
ax.set(xlabel="Taille des donnee",
ax.set(xlabel="Taille des donnee",
ylabel="
Latence
",
ylabel="
Temps de transmission
",
title="Evolution d
e la latence dans le temp
s")
title="Evolution d
u temps de transmission en fonction de la taille des donnée
s")
plt.savefig('
test2
.png')
plt.savefig('
stacko_evol_temps_transmission_en_fonction_de_la_taille
.png')
#+END_SRC
#+END_SRC
Ici, on voit l'impact de la MTU ici certainement à 1500 sur le temps de transport
Ici, on voit l'impact de la MTU ici certainement à 1500 sur le temps de transport
*** Differenciation par rapport à la taille
**** Inférieur à la MTU
#+BEGIN_SRC python :session
table_l1500 = [row for row in cleantable if int(row[1]) <= 1485]
date_l1500 = [datetime.utcfromtimestamp(float(row[0][1:-1])) for row in table_l1500]
S_l1500 = [int(row[1]) for row in table_l1500]
T_l1500 = [float(row[8].split('=')[1]) for row in table_l1500]
dataset_l1500 = list(zip(date_l1500,S_l1500, T_l1500))
dataset_l1500[:10]
#+END_SRC
#+RESULTS:
| datetime.datetime | (2015 1 20 16 26 43 82701) | 1257 | 120.0 |
| datetime.datetime | (2015 1 20 16 26 43 408254) | 454 | 120.0 |
| datetime.datetime | (2015 1 20 16 26 43 739730) | 775 | 126.0 |
| datetime.datetime | (2015 1 20 16 26 44 56630) | 1334 | 112.0 |
| datetime.datetime | (2015 1 20 16 26 44 372224) | 83 | 111.0 |
| datetime.datetime | (2015 1 20 16 26 44 688367) | 694 | 111.0 |
| datetime.datetime | (2015 1 20 16 26 45 321112) | 632 | 111.0 |
| datetime.datetime | (2015 1 20 16 26 45 637464) | 405 | 111.0 |
| datetime.datetime | (2015 1 20 16 26 45 953472) | 1419 | 111.0 |
| datetime.datetime | (2015 1 20 16 26 46 269163) | 329 | 111.0 |
#+BEGIN_SRC python :session :results silent :file test2.png
import matplotlib
import matplotlib.pyplot as plt
# Create figure and plot space
fig, ax = plt.subplots(figsize=(12, 12))
# Add x-axis and y-axis
ax.scatter(S_l1500,
T_l1500,
color='purple')
# Set title and labels for axes
ax.set(xlabel="Taille des donnee",
ylabel="Temps de transmission",
title="Evolution du temps de transmission en fonction de la taille des données")
plt.savefig('stacko_l1500_evol_T-f(S).png')
#+END_SRC
**** Supérieur à la MTU
Calcul d'un tableau avec les donnée supérieure à la MTU
#+BEGIN_SRC python :session
table_g1500 = [row for row in cleantable if int(row[1]) >= 1485]
date_g1500 = [datetime.utcfromtimestamp(float(row[0][1:-1])) for row in table_g1500]
S_g1500 = [int(row[1]) for row in table_g1500]
T_g1500 = [float(row[8].split('=')[1]) for row in table_g1500]
dataset_g1500 = list(zip(date_g1500,S_g1500, T_g1500))
dataset_g1500[:10]
#+END_SRC
#+RESULTS:
| datetime.datetime | (2015 1 20 16 26 45 5514) | 1577 | 112.0 |
| datetime.datetime | (2015 1 20 16 26 46 901972) | 1714 | 112.0 |
| datetime.datetime | (2015 1 20 16 26 47 851148) | 1598 | 112.0 |
| datetime.datetime | (2015 1 20 16 26 52 272504) | 1619 | 112.0 |
| datetime.datetime | (2015 1 20 16 26 55 749652) | 1655 | 112.0 |
| datetime.datetime | (2015 1 20 16 26 56 66885) | 1556 | 112.0 |
| datetime.datetime | (2015 1 20 16 26 57 648057) | 1839 | 112.0 |
| datetime.datetime | (2015 1 20 16 26 58 280820) | 1572 | 112.0 |
| datetime.datetime | (2015 1 20 16 27 1 133246) | 1491 | 120.0 |
| datetime.datetime | (2015 1 20 16 27 1 765499) | 1978 | 112.0 |
#+BEGIN_SRC python :session :results silent :file test2.png
import matplotlib
import matplotlib.pyplot as plt
# Create figure and plot space
fig, ax = plt.subplots(figsize=(12, 12))
# Add x-axis and y-axis
ax.scatter(S_g1500,
T_g1500,
color='purple')
# Set title and labels for axes
ax.set(xlabel="Taille des donnee",
ylabel="Temps de transmission",
title="Evolution du temps de transmission en fonction de la taille des données")
plt.savefig('stacko_g1500_evol_T-f(S).png')
#+END_SRC
** Régression linéaire
*** Cas inférieur à la MTU
#+BEGIN_SRC python :session :results replace
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import dates
import numpy as np
from scipy import stats
import seaborn as sns
import statsmodels.api as sm
from sklearn import linear_model
@plt.FuncFormatter
def fake_dates(x, pos):
""" Custom formater to turn floats into e.g., 2016-05-08"""
return dates.num2date(x).strftime('%Y-%m-%d')
sns.set(color_codes=True)
df = pd.DataFrame({
'date': pd.to_datetime(date_l1500),
'datenum': dates.date2num(date_l1500),
'T': T_l1500,
'S': S_l1500})
fig, ax = plt.subplots()
sns.regplot(x="datenum", y="T", color='purple', data=df, ax=ax)
# here's the magic:
ax.xaxis.set_major_formatter(fake_dates)
# legible labels
ax.tick_params(labelrotation=30)
fig.savefig('stacko_l1500_reglineaireT-f(S).png')
#+END_SRC
#+RESULTS:
: None
#+BEGIN_SRC python :session
np.array(S_l1500).reshape(1, -1)[:9]
#+END_SRC
#+RESULTS:
| 1759 | 1843 | 1511 | ... | 1503 | 1515 | 1875 |
#+BEGIN_SRC python :session
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
S_tt = [[value] for value in S_l1500]
my_s = np.array(S_tt)
my_t = np.array(T_l1500)
#my_s = np.array([[1], [2], [3]])
lmodel = LinearRegression()
lmodel.fit(my_s, my_t)
f"Les coeff sont L = {lmodel.intercept_} et C = { 1 / lmodel.coef_}"
#+END_SRC
#+RESULTS:
: Les coeff sont L = 3.257592785874401 et C = [2761.3155395]
*** Cas supérieur à la MTU
#+BEGIN_SRC python :session
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
S_tt = [[value] for value in S_g1500]
my_s = np.array(S_tt)
my_t = np.array(T_g1500)
#my_s = np.array([[1], [2], [3]])
lmodel = LinearRegression()
lmodel.fit(my_s, my_t)
f"Les coeff sont L = {lmodel.intercept_} et C = { 1 / lmodel.coef_}"
#+END_SRC
#+RESULTS:
: Les coeff sont L = 5.867233082184833 et C = [441.71908009]
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment