{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Traitement du fichier de données\n", "## Chargement de quelques librairies" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "%matplotlib inline\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import pandas as pd" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Lecture des données" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['Date', 'Number of emails', 'Number that require answers']\n", "['2023-08-02', '3', '0']\n", "['2023-08-03', '2', '0']\n", "['2023-08-04', '2', '0']\n", "['2023-08-05', '0', '0']\n", "['2023-08-06', '1', '0']\n", "['2023-08-07', '3', '2']\n", "['2023-08-08', '5', '3']\n" ] } ], "source": [ "filename = \"Emails_par_jours.csv\"\n", "\n", "file = open(filename,\"r\")\n", "data = file.readlines()\n", "Nlines = len(data)\n", "for line in data :\n", " print(line[0:-1].split(\",\"))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Mettre les données dans des tableaux" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[[2023 8 2 3 0]\n", " [2023 8 3 2 0]\n", " [2023 8 4 2 0]\n", " [2023 8 5 0 0]\n", " [2023 8 6 1 0]\n", " [2023 8 7 3 2]\n", " [2023 8 8 2 0]]\n" ] } ], "source": [ "names = data[0]\n", "names = names[0:-1].split(\",\")\n", "Ncol = len(names)\n", "\n", "\n", "datatab = np.zeros((Nlines-1, Ncol),dtype=int)\n", "for i in range(1,Nlines) :\n", " line = data[i]\n", " line = line[0:-1].split(\",\")\n", " for j in range(Ncol) :\n", " datatab[i-1,j] = int(line[j])\n", "print(datatab)\n", "year = datatab[:,0]\n", "month = datatab[:,1]\n", "day = datatab[:,2]\n", "Nemail = datatab[:,3]\n", "Nanswer = datatab[:,4]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Afficher les données" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# Redefine Nlines to not take into account name row\n", "Nlines = len(year)\n", "\n", "# Create labels for figures\n", "datelabels = []\n", "for i in range(Nlines) :\n", " yyyy = str(year[i])\n", " mm = str(month[i])\n", " dd = str(day[i])\n", " date_string = yyyy+\"-\"+mm+\"-\"+dd\n", " datelabels.append(date_string)\n", " \n", "fig, ax = plt.subplots()\n", "\n", "I_x = [i for i in range(Nlines)]\n", "ax.plot(I_x,Nemail)\n", "ax.set_xticks(I_x)\n", "ax.set_xticklabels(datelabels)\n", "plt.xlabel(\"Date\")\n", "plt.ylabel(\"Number of emails\")\n", "\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Some numbers" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Mean" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1.8571428571428572" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.mean(Nemail)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Median" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "2.0" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.median(Nemail)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Traitement des données avec pandas" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
DateNumber of emailsNumber that require answers
02023-08-0230
12023-08-0320
22023-08-0420
32023-08-0500
42023-08-0610
52023-08-0732
62023-08-0853
\n", "
" ], "text/plain": [ " Date Number of emails Number that require answers\n", "0 2023-08-02 3 0\n", "1 2023-08-03 2 0\n", "2 2023-08-04 2 0\n", "3 2023-08-05 0 0\n", "4 2023-08-06 1 0\n", "5 2023-08-07 3 2\n", "6 2023-08-08 5 3" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "raw_data = pd.read_csv(filename)\n", "raw_data" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Number of emailsNumber that require answers
Date
2023-08-0230
2023-08-0320
2023-08-0420
2023-08-0500
2023-08-0610
2023-08-0732
2023-08-0853
\n", "
" ], "text/plain": [ " Number of emails Number that require answers\n", "Date \n", "2023-08-02 3 0\n", "2023-08-03 2 0\n", "2023-08-04 2 0\n", "2023-08-05 0 0\n", "2023-08-06 1 0\n", "2023-08-07 3 2\n", "2023-08-08 5 3" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "date = pd.to_datetime(raw_data['Date'],format='%Y-%m-%d')\n", "raw_data['Date'] = date\n", "data = raw_data.set_index('Date')\n", "data" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "data['Number of emails'].plot()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.4" } }, "nbformat": 4, "nbformat_minor": 2 }