From 200490838b1dd15ee92c35de52def0eb30411172 Mon Sep 17 00:00:00 2001 From: Louis Lacoste Date: Sun, 20 Nov 2022 20:51:53 +0100 Subject: [PATCH] =?UTF-8?q?Premi=C3=A8re=20analyse?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- module2/exo4/analyse-journal.org | 83 ++++++++++++++++++++++++++------ 1 file changed, 69 insertions(+), 14 deletions(-) diff --git a/module2/exo4/analyse-journal.org b/module2/exo4/analyse-journal.org index adcb0a7..dc5cac9 100644 --- a/module2/exo4/analyse-journal.org +++ b/module2/exo4/analyse-journal.org @@ -33,23 +33,78 @@ head -n 5 data.csv #+NAME: import-python #+begin_src python :results output :session :exports both :dir /home/polarolouis/Documents/MOOC/RR/mooc-rr/module2/exo4 import csv +from collections import Counter + +temporaryList = [] + with open('data.csv', 'r', encoding='utf8') as csvfile: reader = csv.reader(csvfile) for row in reader: - print(row) + temporaryList.append(row[0]) + +tagCount = Counter(temporaryList) + +tagList = [] +countList = [] + +for tag in tagCount: + tagList.append(tag) + countList.append(tagCount[tag]) + #+end_src +Ici on convertit les données en dataframe =pandas= afin de pouvoir faire +l'affichage plus facilement. + #+RESULTS: import-python -#+begin_example - -['informatique'] -['wikipedia'] -['biologie'] -['virus'] -['allergie'] -['biologie'] -['LOGBOOK'] -['END'] -['LOGBOOK'] -['END'] -#+end_example + +#+NAME: conversion-dataframe +#+begin_src python :results output :session :exports both +import pandas as pd + +preDataframe = dict(tagCount) + + +print(preDataframe) + +tagCountDataframe = pd.DataFrame.from_dict({'tags':list(preDataframe), 'values':list(preDataframe.values())}) +tagCountDataframe['values'] = pd.to_numeric(tagCountDataframe['values']) + +print(tagCountDataframe) +#+end_src + +#+RESULTS: conversion-dataframe +: {'informatique': 1, 'wikipedia': 1, 'biologie': 2, 'virus': 1, 'allergie': 1, 'LOGBOOK': 2, 'END': 2} +: tags values +: 0 informatique 1 +: 1 wikipedia 1 +: 2 biologie 2 +: 3 virus 1 +: 4 allergie 1 +: 5 LOGBOOK 2 +: 6 END 2 + +* Affichage des données +** Diverses infos + #+begin_src python :results output :session :exports both +print(f"Les tags les plus cités : {tagCount.most_common(3)}") + #+end_src + + #+RESULTS: + : Les tags les plus cités : [('biologie', 2), ('LOGBOOK', 2), ('END', 2)] +** Graphiques + #+begin_src python :results file :session :var matplot_lib_filename=(org-babel-temp-file "figure" ".png") :exports both +import matplotlib.pyplot as plt +plt.figure(figsize=(10,5)) +plt.tight_layout() + +# Affichage +ax = tagCountDataframe.plot(x="tags", y="values", kind='bar') + +plt.savefig(matplot_lib_filename) +matplot_lib_filename + +#+end_src + +#+RESULTS: +[[file:/tmp/babel-DfWAKd/figureDe0QxU.png]] -- 2.18.1