no commit message

56f34492 · 264af2e9a1e4e844f861df089a5604e3 · 25eb9b3a · 25eb9b3a · 56f34492 · 56f34492
Commit 56f34492 authored Oct 29, 2024 by 264af2e9a1e4e844f861df089a5604e3
3 changed files
--- a/module3/exo2/Analyse de des dialogues de l'avare de Moliere.ipynb
+++ b/module3/exo2/Analyse de des dialogues de l'avare de Moliere.ipynb
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "hideCode": true,
-    "hidePrompt": true
-   },
-   "source": [
-    "# Etape 1 : classer les personnages selon le nombre de mots prononcés"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "hideCode": true,
-    "hidePrompt": true
-   },
-   "source": [
-    "Tout d'abord, il faut commencer par inclure les bibliothèques dont on aura besoin."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import re\n",
-    "import pandas as pd\n",
-    "import matplotlib.pyplot as plt\n",
-    "from collections import Counter"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [
-    {
-     "ename": "FileNotFoundError",
-     "evalue": "[Errno 2] No such file or directory: 'l_avare.md'",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mFileNotFoundError\u001b[0m                         Traceback (most recent call last)",
-      "\u001b[0;32m<ipython-input-4-36d7bdb3cdc6>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m     36\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     37\u001b[0m \u001b[0;31m# Chargement et traitement du texte au format Markdown\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 38\u001b[0;31m \u001b[0;32mwith\u001b[0m \u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"l_avare.md\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"r\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencoding\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"utf-8\"\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mfile\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     39\u001b[0m     \u001b[0mtext\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfile\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     40\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'l_avare.md'"
-     ]
-    }
-   ],
-   "source": [
-    "def extract_dialogues_markdown(text):\n",
-    "    # Séparation des scènes\n",
-    "    scenes = re.split(r'##\\s*SCENE\\s+\\d+', text)  # Divise le texte par les scènes\n",
-    "    dialogue_data = []\n",
-    "\n",
-    "    for scene_id, scene in enumerate(scenes):\n",
-    "        lines = scene.split('\\n')\n",
-    "        current_character = None\n",
-    "        dialogue = \"\"\n",
-    "        \n",
-    "        for line in lines:\n",
-    "            # Utilisation de regex pour détecter les noms de personnages en majuscules suivis de ':'\n",
-    "            match = re.match(r'^([A-ZÉÈÀÙÂÊÎÔÛÄËÏÖÜÇ]+):\\s*(.*)', line)\n",
-    "            \n",
-    "            if match:\n",
-    "                # Si un nom est détecté, sauvegardons la réplique précédente\n",
-    "                if current_character and dialogue:\n",
-    "                    word_count = len(dialogue.split())\n",
-    "                    dialogue_data.append([scene_id, current_character, word_count])\n",
-    "                \n",
-    "                # Actualisation du personnage actuel et début de nouvelle réplique\n",
-    "                current_character = match.group(1).lower()  # Nom en minuscule pour uniformité\n",
-    "                dialogue = match.group(2)  # Commence une nouvelle réplique\n",
-    "\n",
-    "            elif current_character:\n",
-    "                # Continuation de la réplique sur plusieurs lignes\n",
-    "                dialogue += ' ' + line.strip()\n",
-    "\n",
-    "        # Enregistrement de la dernière réplique de la scène\n",
-    "        if current_character and dialogue:\n",
-    "            word_count = len(dialogue.split())\n",
-    "            dialogue_data.append([scene_id, current_character, word_count])\n",
-    "\n",
-    "    # Conversion en DataFrame\n",
-    "    return pd.DataFrame(dialogue_data, columns=[\"scene_id\", \"character\", \"word_count\"])\n",
-    "\n",
-    "# Chargement et traitement du texte au format Markdown\n",
-    "with open(\"l_avare.md\", \"r\", encoding=\"utf-8\") as file:\n",
-    "    text = file.read()\n",
-    "\n",
-    "# Extraction des dialogues\n",
-    "dialogue_df = extract_dialogues_markdown(text)\n",
-    "\n",
-    "# Analyse : nombre de mots par scène et personnage\n",
-    "scene_word_counts = dialogue_df.groupby([\"scene_id\", \"character\"])[\"word_count\"].sum().unstack(fill_value=0)\n",
-    "scene_totals = scene_word_counts.sum(axis=1)\n",
-    "\n",
-    "# Création du graphique empilé\n",
-    "scene_word_counts.div(scene_totals, axis=0).plot(kind='bar', stacked=True, colormap='tab20', figsize=(12, 8))\n",
-    "plt.xlabel(\"Scène\")\n",
-    "plt.ylabel(\"Proportion de mots\")\n",
-    "plt.title(\"Répartition de la parole par personnage dans chaque scène\")\n",
-    "plt.legend(title=\"Personnage\", bbox_to_anchor=(1.05, 1), loc='upper left')\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "hide_code_all_hidden": true,
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.6.4"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}
--- a/module3/exo2/exerciceTabac.ipynb
+++ b/module3/exo2/exerciceTabac.ipynb
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "hideCode": true,
+    "hidePrompt": true
+   },
+   "source": [
+    "# titre"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "hideCode": true,
+    "hidePrompt": true
+   },
+   "source": [
+    "Tout d'abord, il faut commencer par inclure les bibliothèques dont on aura besoin."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%matplotlib inline\n",
+    "import matplotlib.pyplot as plt\n",
+    "import pandas as pd\n",
+    "import isoweek"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "AttributeError",
+     "evalue": "'Index' object has no attribute 'labels'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-12-d45da91e46f1>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m     50\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     51\u001b[0m \u001b[0;31m# Comptage des mots par personnage et par scène\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 52\u001b[0;31m \u001b[0mscene_word_counts\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdialogue_df\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgroupby\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"scene_id\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"character\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"word_count\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msum\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0munstack\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfill_value\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     53\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     54\u001b[0m \u001b[0;31m# Vérification que les données sont bien formatées avant le graphique\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/opt/conda/lib/python3.6/site-packages/pandas/core/series.py\u001b[0m in \u001b[0;36munstack\u001b[0;34m(self, level, fill_value)\u001b[0m\n\u001b[1;32m   2222\u001b[0m         \"\"\"\n\u001b[1;32m   2223\u001b[0m         \u001b[0;32mfrom\u001b[0m \u001b[0mpandas\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcore\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreshape\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreshape\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0munstack\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2224\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0munstack\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlevel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfill_value\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   2225\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2226\u001b[0m     \u001b[0;31m# ----------------------------------------------------------------------\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/opt/conda/lib/python3.6/site-packages/pandas/core/reshape/reshape.py\u001b[0m in \u001b[0;36munstack\u001b[0;34m(obj, level, fill_value)\u001b[0m\n\u001b[1;32m    472\u001b[0m     \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    473\u001b[0m         unstacker = _Unstacker(obj.values, obj.index, level=level,\n\u001b[0;32m--> 474\u001b[0;31m                                fill_value=fill_value)\n\u001b[0m\u001b[1;32m    475\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0munstacker\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_result\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    476\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/opt/conda/lib/python3.6/site-packages/pandas/core/reshape/reshape.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, values, index, level, value_columns, fill_value)\u001b[0m\n\u001b[1;32m    105\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    106\u001b[0m         \u001b[0;31m# when index includes `nan`, need to lift levels/strides by 1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 107\u001b[0;31m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlift\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m1\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlabels\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlevel\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    108\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    109\u001b[0m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnew_index_levels\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlevels\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mAttributeError\u001b[0m: 'Index' object has no attribute 'labels'"
+     ]
+    }
+   ],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "hide_code_all_hidden": true,
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
--- a/module3/exo2/untitled.txt
+++ b/module3/exo2/untitled.txt