{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Estimating network latency and capacity from asymmetric measures\n", "\n", "We first load our dependencies and download the provided datasets if required." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "%matplotlib inline\n", "from os import path\n", "from urllib.request import urlretrieve\n", "import gzip\n", "\n", "import numpy as np\n", "import pandas as pd\n", "import statsmodels.api as sm\n", "import matplotlib.pyplot as plt" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "lan_url = 'http://mescal.imag.fr/membres/arnaud.legrand/teaching/2014/RICM4_EP_ping/liglab2.log.gz'\n", "lan_file = 'liglab2.log.gz'\n", "\n", "if not path.exists(lan_file):\n", " urlretrieve(lan_url, lan_file)\n", " \n", "wan_url = 'http://mescal.imag.fr/membres/arnaud.legrand/teaching/2014/RICM4_EP_ping/stackoverflow.log.gz'\n", "wan_file = 'stackoverflow.log.gz'\n", "\n", "if not path.exists(wan_file):\n", " urlretrieve(wan_url, wan_file)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## First dataset: Local Area Network conditions\n", "\n", "### Data extraction\n", "\n", "The first dataset contains ping results. Here is an extract of the provided data." ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "b'[1421761682.052172] 665 bytes from lig-publig.imag.fr (129.88.11.7): icmp_seq=1 ttl=60 time=22.5 ms\\n'\n", "b'[1421761682.277315] 1373 bytes from lig-publig.imag.fr (129.88.11.7): icmp_seq=1 ttl=60 time=21.2 ms\\n'\n", "b'[1421761682.502054] 262 bytes from lig-publig.imag.fr (129.88.11.7): icmp_seq=1 ttl=60 time=21.2 ms\\n'\n", "b'[1421761682.729257] 1107 bytes from lig-publig.imag.fr (129.88.11.7): icmp_seq=1 ttl=60 time=23.3 ms\\n'\n", "b'[1421761682.934648] 1128 bytes from lig-publig.imag.fr (129.88.11.7): icmp_seq=1 ttl=60 time=1.41 ms\\n'\n" ] } ], "source": [ "with gzip.open(lan_file, 'rb') as f:\n", " for _ in range(5):\n", " print(next(f))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We create a function to parse these log lines and test it." ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "def parse_ping_line(l):\n", " assert len(l) > 0\n", " assert l[0] == '['\n", " \n", " timestamp = pd.Timestamp(float(l[1:l.index(']')]), unit='s')\n", " size = np.nan\n", " rtt = np.nan\n", " \n", " tokens = l.split(' ')\n", " for i, t in enumerate(tokens):\n", " if t == 'bytes':\n", " assert np.isnan(size)\n", " size = int(tokens[i-1])\n", " elif t.startswith('time='):\n", " rtt = float(t[len('time='):])\n", " \n", " return [timestamp, size, rtt]\n", " \n", "test_line = '[1421761682.052172] 665 bytes from lig-publig.imag.fr (129.88.11.7): icmp_seq=1 ttl=60 time=22.5 ms\\n'\n", "[timestamp, size, rtt] = parse_ping_line(test_line)\n", "assert timestamp == pd.Timestamp('2015-01-20 13:48:02.052172')\n", "assert size == 665\n", "assert rtt == 22.5" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We can now process our full dataset and check if any data is missing." ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " size rtt\n", "timestamp \n", "2015-01-20 13:48:04.770828000 21 NaN\n", "2015-01-20 13:48:26.146320000 9 NaN\n", "2015-01-20 13:49:08.647974000 9 NaN\n", "2015-01-20 13:49:48.500515999 9 NaN\n", "2015-01-20 13:50:05.331125000 18 NaN\n" ] } ], "source": [ "rows = []\n", "with gzip.open(lan_file, 'rb') as f:\n", " for l in f:\n", " try:\n", " rows.append(parse_ping_line(l.decode()))\n", " except (AssertionError):\n", " print('invalid line', l)\n", "\n", "df = pd.DataFrame(rows, columns=['timestamp', 'size', 'rtt']).set_index('timestamp').sort_index()\n", "print(df[df.isnull().any(axis=1)].head())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The value `time` is missing from some rows. We remove these measurements." ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | size | \n", "rtt | \n", "
---|---|---|
count | \n", "44036.000000 | \n", "44036.000000 | \n", "
mean | \n", "1000.025729 | \n", "5.136663 | \n", "
std | \n", "573.659666 | \n", "12.221312 | \n", "
min | \n", "24.000000 | \n", "1.000000 | \n", "
25% | \n", "499.000000 | \n", "1.270000 | \n", "
50% | \n", "989.000000 | \n", "1.440000 | \n", "
75% | \n", "1499.000000 | \n", "2.310000 | \n", "
max | \n", "2007.000000 | \n", "276.000000 | \n", "