diff --git "a/module3/exo3/Estimation de la latence et de la capacit\303\251 d\342\200\231une connexion \303\240 partir de mesures asym\303\251triques.ipynb" "b/module3/exo3/Estimation de la latence et de la capacit\303\251 d\342\200\231une connexion \303\240 partir de mesures asym\303\251triques.ipynb" new file mode 100644 index 0000000000000000000000000000000000000000..450ed7884f431c55e21d1b18b137a856ebe9d60c --- /dev/null +++ "b/module3/exo3/Estimation de la latence et de la capacit\303\251 d\342\200\231une connexion \303\240 partir de mesures asym\303\251triques.ipynb" @@ -0,0 +1,163 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Extraction, lecture et vérification des données\n", + "\n", + "## Extraction et lecture\n", + "\n", + "On commence par récupérer les jeux de données et on les sauvegarde en local pour une utilisation ultérieure." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Reading local version of liglab2.log.gz\n", + "[1421761682.052172] 665 bytes from lig-publig.imag.fr (129.88.11.7): icmp_seq=1 ttl=60 time=22.5 ms\n", + "\n", + "Reading local version of stackoverflow.log.gz\n", + "[1421771203.082701] 1257 bytes from stackoverflow.com (198.252.206.140): icmp_seq=1 ttl=50 time=120 ms\n", + "\n" + ] + } + ], + "source": [ + "%matplotlib inline\n", + "import urllib\n", + "import os, gzip\n", + "data_url = [\"http://mescal.imag.fr/membres/arnaud.legrand/teaching/2014/RICM4_EP_ping/liglab2.log.gz\",\n", + " \"http://mescal.imag.fr/membres/arnaud.legrand/teaching/2014/RICM4_EP_ping/stackoverflow.log.gz\"]\n", + "filenames = []\n", + "raw_data = {}\n", + "for url in data_url:\n", + " fname = url.split('/')[-1] ## get file name from url, which is everything after the last '/'\n", + " filenames.append(fname)\n", + " if os.path.isfile(fname):\n", + " print(\"Reading local version of\", fname)\n", + " else:\n", + " print(\"Downloading remote version for\", url)\n", + " urllib.request.urlretrieve(url, fname) ## this downloads url and save file to fname\n", + " \n", + " with gzip.open(fname, 'rt') as file:\n", + " raw_data[fname] = file.readlines()\n", + " print(raw_data[fname][0]) ## print first line to check it worked" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Vérification des données\n", + "\n", + "Les données sont des fichiers textes où chaque ligne est de la forme:\n", + "\n", + "\\[**timestamp**\\] **size** bytes from **url** (**ip**): icmp_seq=**icmp_seq** ttl=**ttl** time=**time**\n", + "\n", + "- **timestamp** est l'instant d'émission de la requête (flottant);\n", + "- **size** est la taille de la requête en octets (entier);\n", + "- **url** est l'url vers laquelle la requête a été envoyée (chaîne de caractères);\n", + "- **ip** est l'adresse ip de l'url précédente (chaîne de caractères);\n", + "- **icmp_seq** et **ttl** sont ignorées;\n", + "- **time** est le temps aller-retour entre l'ordinateur d'envoi et l'url spécifiée (flottant + chaîne de caractères).\n", + "\n", + "Pour vérifier les données, nous utilisons des expressions régulières. Les données vérifiées sont ensuites insérées dans un DataFrame de pandas pour traitement." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hideOutput": true + }, + "outputs": [], + "source": [ + "import re\n", + "import pandas as pd\n", + "\n", + "pingoutput = re.compile(r'\\[(?P\\d*\\.\\d*)\\]' ## match timestamp as floating number\n", + " r' (?P\\d*) bytes from ' ## match size as integer\n", + " r'(?P(\\w[\\w\\-]*\\.)*\\w*) ' ## match simple urls\n", + " r'\\((?P\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3})\\)' ## match ips\n", + " r': icmp_seq=(?P\\d*) '## match icmp_seq\n", + " r'ttl=(?P\\d*) ' ## match ttl\n", + " r'time=(?P\\d*\\.?\\d*) ms' ## match time with unit\n", + " , flags=re.ASCII|re.IGNORECASE)\n", + "data = {}\n", + "for fname in filenames:\n", + " data[fname] = pd.DataFrame(columns=['timestamp', 'size', 'url', 'ip',\n", + " 'icmp_seq', 'ttl', 'ping'])\n", + " rdata = []\n", + " errors = 0\n", + " for i, line in enumerate(raw_data[fname]):\n", + " m = pingoutput.match(line)\n", + " if m is None:\n", + " errors = errors + 1\n", + " continue\n", + " rdata.append({'timestamp':pd.Timestamp(float(m.group('timestamp')), unit='s'),\n", + " 'size':int(m.group('size')), 'url':m.group('url'),\n", + " 'ip':m.group('ip'),'icmp_seq':int(m.group('icmp_seq')),\n", + " 'ttl':int(m.group('ttl')), 'ping':float(m.group('ping'))})\n", + " data[fname] = pd.DataFrame(rdata)\n", + " print('{:d} lines failed parsing in {:s}'.format(errors, fname))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Nous pouvons désormais observer l'évolution du ping en fonction du temps, ici dans le cas du premier fichier." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'data' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mmydata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mfilenames\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mmydata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mplot\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"timestamp\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"ping\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mNameError\u001b[0m: name 'data' is not defined" + ] + } + ], + "source": [ + "mydata = data[filenames[0]]\n", + "mydata.plot(x=\"timestamp\", y=\"ping\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/module3/exo3/exercice.ipynb b/module3/exo3/exercice.ipynb deleted file mode 100644 index 0bbbe371b01e359e381e43239412d77bf53fb1fb..0000000000000000000000000000000000000000 --- a/module3/exo3/exercice.ipynb +++ /dev/null @@ -1,25 +0,0 @@ -{ - "cells": [], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.3" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} - diff --git a/module3/exo3/liglab2.log.gz b/module3/exo3/liglab2.log.gz new file mode 100644 index 0000000000000000000000000000000000000000..67cd38790071d15567564e59af7528c88c8b08ae Binary files /dev/null and b/module3/exo3/liglab2.log.gz differ diff --git a/module3/exo3/stackoverflow.log.gz b/module3/exo3/stackoverflow.log.gz new file mode 100644 index 0000000000000000000000000000000000000000..cce2629a0f75e566eddf238a7995d8007327713d Binary files /dev/null and b/module3/exo3/stackoverflow.log.gz differ