diff --git a/module2/exo4/exercice.ipynb b/module2/exo4/exercice.ipynb index 15db27e73f0e747e868267d9616e791d411bd522..3b38e22e192b7a793a2f48e7cb0552d85a5c0c8c 100644 --- a/module2/exo4/exercice.ipynb +++ b/module2/exo4/exercice.ipynb @@ -26,7 +26,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -45,6 +45,137 @@ " sortie.write(f\"{date};{mot}\\n\")" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Analyse des données\n", + "On peut regarder l'aspect des données." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " 0 1\n", + "0 12/07/2020 Mooc\n", + "1 12/07/2020 Installation\n", + "2 12/07/2020 Python\n", + "3 12/07/2020 Jupiter\n", + "4 12/07/2020 R \n", + "...\n", + " 0 1\n", + "33 22/08/2020 Réunion\n", + "34 22/08/2020 Python\n", + "35 28/08/2020 Lecture\n", + "36 28/08/2020 Python\n", + "37 28/08/2020 GUI\n" + ] + } + ], + "source": [ + "NomFichentree = \"./Info_Journal.csv\"\n", + "\n", + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "donnees = pd.read_csv(NomFichentree, sep=';', header=None,\n", + " infer_datetime_format=True, keep_date_col=True,\n", + " dayfirst=True)\n", + "\n", + "print(donnees.head(), '\\n...\\n', donnees.tail())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Nous pouvons ensuite regarder:\n", + "- le nombre total de données (le même que le nombre de lignes),\n", + "- le nombre de données uniques (pour les dates et pour les mots-clés),\n", + "- la date et le mot-clé les plus fréquents, avec leur fréquences respectives." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " 0 1\n", + "count 38 38\n", + "unique 13 15\n", + "top 12/07/2020 Mooc\n", + "freq 5 7\n" + ] + } + ], + "source": [ + "print(donnees.describe())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Nous pouvons regarder, également les fréquences respectives de chaque date et de chaque mot-clé." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "12/07/2020 5\n", + "17/07/2020 5\n", + "28/08/2020 3\n", + "13/08/2020 3\n", + "23/07/2020 3\n", + "08/08/2020 3\n", + "17/08/2020 3\n", + "10/08/2020 3\n", + "21/07/2020 2\n", + "20/07/2020 2\n", + "25/07/2020 2\n", + "02/08/2020 2\n", + "22/08/2020 2\n", + "Name: 0, dtype: int64\n", + "Mooc 7\n", + "Python 5\n", + "Lecture 3\n", + "Module 2 3\n", + "Réunion 3\n", + "GUI 3\n", + "Exercices 3\n", + "Module 1 2\n", + "R 2\n", + "Jupiter 2\n", + "Git 1\n", + "Pannes oscillatoires 1\n", + "Génération ACS 1\n", + "Configuration 1\n", + "Installation 1\n", + "Name: 1, dtype: int64\n" + ] + } + ], + "source": [ + "print(pd.value_counts(donnees[0]))\n", + "print(pd.value_counts(donnees[1]))" + ] + }, { "cell_type": "code", "execution_count": null,