diff --git a/module3/exo2/Analyse de des dialogues de l'avare de Moliere.ipynb b/module3/exo2/Analyse de des dialogues de l'avare de Moliere.ipynb deleted file mode 100644 index 3835f130f4207b77389c1292d18b5ede780fe939..0000000000000000000000000000000000000000 --- a/module3/exo2/Analyse de des dialogues de l'avare de Moliere.ipynb +++ /dev/null @@ -1,139 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "hideCode": true, - "hidePrompt": true - }, - "source": [ - "# Etape 1 : classer les personnages selon le nombre de mots prononcés" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "hideCode": true, - "hidePrompt": true - }, - "source": [ - "Tout d'abord, il faut commencer par inclure les bibliothèques dont on aura besoin." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "import re\n", - "import pandas as pd\n", - "import matplotlib.pyplot as plt\n", - "from collections import Counter" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "ename": "FileNotFoundError", - "evalue": "[Errno 2] No such file or directory: 'l_avare.md'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 36\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 37\u001b[0m \u001b[0;31m# Chargement et traitement du texte au format Markdown\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 38\u001b[0;31m \u001b[0;32mwith\u001b[0m \u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"l_avare.md\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"r\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencoding\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"utf-8\"\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mfile\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 39\u001b[0m \u001b[0mtext\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfile\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 40\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'l_avare.md'" - ] - } - ], - "source": [ - "def extract_dialogues_markdown(text):\n", - " # Séparation des scènes\n", - " scenes = re.split(r'##\\s*SCENE\\s+\\d+', text) # Divise le texte par les scènes\n", - " dialogue_data = []\n", - "\n", - " for scene_id, scene in enumerate(scenes):\n", - " lines = scene.split('\\n')\n", - " current_character = None\n", - " dialogue = \"\"\n", - " \n", - " for line in lines:\n", - " # Utilisation de regex pour détecter les noms de personnages en majuscules suivis de ':'\n", - " match = re.match(r'^([A-ZÉÈÀÙÂÊÎÔÛÄËÏÖÜÇ]+):\\s*(.*)', line)\n", - " \n", - " if match:\n", - " # Si un nom est détecté, sauvegardons la réplique précédente\n", - " if current_character and dialogue:\n", - " word_count = len(dialogue.split())\n", - " dialogue_data.append([scene_id, current_character, word_count])\n", - " \n", - " # Actualisation du personnage actuel et début de nouvelle réplique\n", - " current_character = match.group(1).lower() # Nom en minuscule pour uniformité\n", - " dialogue = match.group(2) # Commence une nouvelle réplique\n", - "\n", - " elif current_character:\n", - " # Continuation de la réplique sur plusieurs lignes\n", - " dialogue += ' ' + line.strip()\n", - "\n", - " # Enregistrement de la dernière réplique de la scène\n", - " if current_character and dialogue:\n", - " word_count = len(dialogue.split())\n", - " dialogue_data.append([scene_id, current_character, word_count])\n", - "\n", - " # Conversion en DataFrame\n", - " return pd.DataFrame(dialogue_data, columns=[\"scene_id\", \"character\", \"word_count\"])\n", - "\n", - "# Chargement et traitement du texte au format Markdown\n", - "with open(\"l_avare.md\", \"r\", encoding=\"utf-8\") as file:\n", - " text = file.read()\n", - "\n", - "# Extraction des dialogues\n", - "dialogue_df = extract_dialogues_markdown(text)\n", - "\n", - "# Analyse : nombre de mots par scène et personnage\n", - "scene_word_counts = dialogue_df.groupby([\"scene_id\", \"character\"])[\"word_count\"].sum().unstack(fill_value=0)\n", - "scene_totals = scene_word_counts.sum(axis=1)\n", - "\n", - "# Création du graphique empilé\n", - "scene_word_counts.div(scene_totals, axis=0).plot(kind='bar', stacked=True, colormap='tab20', figsize=(12, 8))\n", - "plt.xlabel(\"Scène\")\n", - "plt.ylabel(\"Proportion de mots\")\n", - "plt.title(\"Répartition de la parole par personnage dans chaque scène\")\n", - "plt.legend(title=\"Personnage\", bbox_to_anchor=(1.05, 1), loc='upper left')\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "hide_code_all_hidden": true, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.4" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/module3/exo2/exerciceTabac.ipynb b/module3/exo2/exerciceTabac.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..dc2f28419052355ffe39169aae39b87c23eda8cb --- /dev/null +++ b/module3/exo2/exerciceTabac.ipynb @@ -0,0 +1,87 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "hideCode": true, + "hidePrompt": true + }, + "source": [ + "# titre" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hideCode": true, + "hidePrompt": true + }, + "source": [ + "Tout d'abord, il faut commencer par inclure les bibliothèques dont on aura besoin." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "import matplotlib.pyplot as plt\n", + "import pandas as pd\n", + "import isoweek" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "ename": "AttributeError", + "evalue": "'Index' object has no attribute 'labels'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 50\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 51\u001b[0m \u001b[0;31m# Comptage des mots par personnage et par scène\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 52\u001b[0;31m \u001b[0mscene_word_counts\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdialogue_df\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgroupby\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"scene_id\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"character\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"word_count\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msum\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0munstack\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfill_value\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 53\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 54\u001b[0m \u001b[0;31m# Vérification que les données sont bien formatées avant le graphique\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/opt/conda/lib/python3.6/site-packages/pandas/core/series.py\u001b[0m in \u001b[0;36munstack\u001b[0;34m(self, level, fill_value)\u001b[0m\n\u001b[1;32m 2222\u001b[0m \"\"\"\n\u001b[1;32m 2223\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mpandas\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcore\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreshape\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreshape\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0munstack\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2224\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0munstack\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlevel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfill_value\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2225\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2226\u001b[0m \u001b[0;31m# ----------------------------------------------------------------------\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/opt/conda/lib/python3.6/site-packages/pandas/core/reshape/reshape.py\u001b[0m in \u001b[0;36munstack\u001b[0;34m(obj, level, fill_value)\u001b[0m\n\u001b[1;32m 472\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 473\u001b[0m unstacker = _Unstacker(obj.values, obj.index, level=level,\n\u001b[0;32m--> 474\u001b[0;31m fill_value=fill_value)\n\u001b[0m\u001b[1;32m 475\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0munstacker\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_result\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 476\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/opt/conda/lib/python3.6/site-packages/pandas/core/reshape/reshape.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, values, index, level, value_columns, fill_value)\u001b[0m\n\u001b[1;32m 105\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 106\u001b[0m \u001b[0;31m# when index includes `nan`, need to lift levels/strides by 1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 107\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlift\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m1\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlabels\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlevel\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 108\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 109\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnew_index_levels\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlevels\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mAttributeError\u001b[0m: 'Index' object has no attribute 'labels'" + ] + } + ], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "hide_code_all_hidden": true, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.4" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/module3/exo2/untitled.txt b/module3/exo2/untitled.txt new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391