Commit 20049083 authored by Louis Lacoste's avatar Louis Lacoste

Première analyse

parent 912e8519
...@@ -33,23 +33,78 @@ head -n 5 data.csv ...@@ -33,23 +33,78 @@ head -n 5 data.csv
#+NAME: import-python #+NAME: import-python
#+begin_src python :results output :session :exports both :dir /home/polarolouis/Documents/MOOC/RR/mooc-rr/module2/exo4 #+begin_src python :results output :session :exports both :dir /home/polarolouis/Documents/MOOC/RR/mooc-rr/module2/exo4
import csv import csv
from collections import Counter
temporaryList = []
with open('data.csv', 'r', encoding='utf8') as csvfile: with open('data.csv', 'r', encoding='utf8') as csvfile:
reader = csv.reader(csvfile) reader = csv.reader(csvfile)
for row in reader: for row in reader:
print(row) temporaryList.append(row[0])
tagCount = Counter(temporaryList)
tagList = []
countList = []
for tag in tagCount:
tagList.append(tag)
countList.append(tagCount[tag])
#+end_src #+end_src
Ici on convertit les données en dataframe =pandas= afin de pouvoir faire
l'affichage plus facilement.
#+RESULTS: import-python #+RESULTS: import-python
#+begin_example
#+NAME: conversion-dataframe
['informatique'] #+begin_src python :results output :session :exports both
['wikipedia'] import pandas as pd
['biologie']
['virus'] preDataframe = dict(tagCount)
['allergie']
['biologie']
['LOGBOOK'] print(preDataframe)
['END']
['LOGBOOK'] tagCountDataframe = pd.DataFrame.from_dict({'tags':list(preDataframe), 'values':list(preDataframe.values())})
['END'] tagCountDataframe['values'] = pd.to_numeric(tagCountDataframe['values'])
#+end_example
print(tagCountDataframe)
#+end_src
#+RESULTS: conversion-dataframe
: {'informatique': 1, 'wikipedia': 1, 'biologie': 2, 'virus': 1, 'allergie': 1, 'LOGBOOK': 2, 'END': 2}
: tags values
: 0 informatique 1
: 1 wikipedia 1
: 2 biologie 2
: 3 virus 1
: 4 allergie 1
: 5 LOGBOOK 2
: 6 END 2
* Affichage des données
** Diverses infos
#+begin_src python :results output :session :exports both
print(f"Les tags les plus cités : {tagCount.most_common(3)}")
#+end_src
#+RESULTS:
: Les tags les plus cités : [('biologie', 2), ('LOGBOOK', 2), ('END', 2)]
** Graphiques
#+begin_src python :results file :session :var matplot_lib_filename=(org-babel-temp-file "figure" ".png") :exports both
import matplotlib.pyplot as plt
plt.figure(figsize=(10,5))
plt.tight_layout()
# Affichage
ax = tagCountDataframe.plot(x="tags", y="values", kind='bar')
plt.savefig(matplot_lib_filename)
matplot_lib_filename
#+end_src
#+RESULTS:
[[file:/tmp/babel-DfWAKd/figureDe0QxU.png]]
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment