#+TITLE: Analyse du journal #+AUTHOR: Louis Lacoste #+DATE: 2022-11-20 #+LANGUAGE: fr # #+PROPERTY: header-args :eval never-export #+HTML_HEAD: #+HTML_HEAD: #+HTML_HEAD: #+HTML_HEAD: #+HTML_HEAD: #+HTML_HEAD: * Récupération des données du journal Ici nous allons importer les étiquettes et les exporter dans un fichier =data.csv=. #+begin_src shell :results output :exports both grep -oP "(?<=:)([a-zA-Z]*)(?=:)" ~/org/journal.org > data.csv head -n 5 data.csv #+end_src #+RESULTS: : informatique : wikipedia : biologie : virus : allergie * Traitement des données #+NAME: import-python #+begin_src python :results output :session :exports both :dir /home/polarolouis/Documents/MOOC/RR/mooc-rr/module2/exo4 import csv from collections import Counter temporaryList = [] with open('data.csv', 'r', encoding='utf8') as csvfile: reader = csv.reader(csvfile) for row in reader: temporaryList.append(row[0]) tagCount = Counter(temporaryList) tagList = [] countList = [] for tag in tagCount: tagList.append(tag) countList.append(tagCount[tag]) #+end_src Ici on convertit les données en dataframe =pandas= afin de pouvoir faire l'affichage plus facilement. #+RESULTS: import-python #+NAME: conversion-dataframe #+begin_src python :results output :session :exports both import pandas as pd preDataframe = dict(tagCount) print(preDataframe) tagCountDataframe = pd.DataFrame.from_dict({'tags':list(preDataframe), 'values':list(preDataframe.values())}) tagCountDataframe['values'] = pd.to_numeric(tagCountDataframe['values']) print(tagCountDataframe) #+end_src #+RESULTS: conversion-dataframe : {'informatique': 1, 'wikipedia': 1, 'biologie': 2, 'virus': 1, 'allergie': 1, 'LOGBOOK': 2, 'END': 2} : tags values : 0 informatique 1 : 1 wikipedia 1 : 2 biologie 2 : 3 virus 1 : 4 allergie 1 : 5 LOGBOOK 2 : 6 END 2 * Affichage des données ** Diverses infos #+begin_src python :results output :session :exports both print(f"Les tags les plus cités : {tagCount.most_common(3)}") #+end_src #+RESULTS: : Les tags les plus cités : [('biologie', 2), ('LOGBOOK', 2), ('END', 2)] ** Graphiques #+begin_src python :results file :session :var matplot_lib_filename=(org-babel-temp-file "figure" ".png") :exports both import matplotlib.pyplot as plt plt.figure(figsize=(10,5)) plt.tight_layout() # Affichage ax = tagCountDataframe.plot(x="tags", y="values", kind='bar') plt.savefig(matplot_lib_filename) matplot_lib_filename #+end_src #+RESULTS: [[file:/tmp/babel-DfWAKd/figureDe0QxU.png]]