Ongoing work on question 2

parent 64dfb5dc
......@@ -185,7 +185,8 @@
"metadata": {},
"outputs": [],
"source": [
"import re"
"import re\n",
"import copy"
]
},
{
......@@ -344,7 +345,11 @@
" # la seconde partie définit le personnage du clerc\n",
" # et commence par \" et \" \n",
" emptyPersoDict[parts[0]] = persoCaracs\n",
" emptyPersoDict[parts[1][4:]] = persoCaracs\n",
" # On fait une copie profonde pour ne pas\n",
" # que l'objet partage la même adresse mémoire\n",
" clercCaracs = copy.deepcopy(persoCaracs)\n",
" clercCaracs[\"links\"].append(\"assistant du commissaire\")\n",
" emptyPersoDict[parts[1][4:]] = clercCaracs\n",
" \n",
" currentLine = fileToAnalyse.readline()\n",
" lineNum += 1\n",
......@@ -624,7 +629,13 @@
" # La ligne n'est pas vide, ce n'est pas l'exception\n",
" for namePerso in persoDict:\n",
" # On vérifie si namePerso fait partie des protagonistes\n",
" m = re.search(namePerso, currentLine, re.IGNORECASE)\n",
" # Pour prendre en compte le problème avec les maitres/maîtres\n",
" # il faut adapter name Perso pour être une expression régulière\n",
" persoRegex = namePerso\n",
" if namePerso.startswith(\"Maitre\"):\n",
" persoRegex = namePerso.replace('Maitre','Maître')\n",
" \n",
" m = re.search(persoRegex, currentLine, re.IGNORECASE)\n",
" if m is not None:\n",
" # Le résultat de la recherche n'est pas vide,\n",
" # le personnage fait partie des protagonistes\n",
......@@ -933,7 +944,7 @@
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>841</th>\n",
" <th>931</th>\n",
" <td>5</td>\n",
" <td>Harpagon</td>\n",
" <td>Valère</td>\n",
......@@ -941,31 +952,23 @@
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>842</th>\n",
" <td>5</td>\n",
" <td>Valère</td>\n",
" <td>Harpagon</td>\n",
" <td>5</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>843</th>\n",
" <th>932</th>\n",
" <td>5</td>\n",
" <td>Harpagon</td>\n",
" <td>Valère</td>\n",
" <td>Maître Jacques</td>\n",
" <td>5</td>\n",
" <td>2</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>844</th>\n",
" <th>933</th>\n",
" <td>5</td>\n",
" <td>Valère</td>\n",
" <td>Maître Jacques</td>\n",
" <td>Harpagon</td>\n",
" <td>5</td>\n",
" <td>13</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>845</th>\n",
" <th>934</th>\n",
" <td>5</td>\n",
" <td>Harpagon</td>\n",
" <td>Valère</td>\n",
......@@ -973,7 +976,7 @@
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>846</th>\n",
" <th>935</th>\n",
" <td>5</td>\n",
" <td>Valère</td>\n",
" <td>Harpagon</td>\n",
......@@ -981,7 +984,7 @@
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>847</th>\n",
" <th>936</th>\n",
" <td>5</td>\n",
" <td>Harpagon</td>\n",
" <td>Valère</td>\n",
......@@ -989,7 +992,7 @@
" <td>9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>848</th>\n",
" <th>937</th>\n",
" <td>5</td>\n",
" <td>Cléante</td>\n",
" <td>Harpagon</td>\n",
......@@ -997,7 +1000,7 @@
" <td>41</td>\n",
" </tr>\n",
" <tr>\n",
" <th>849</th>\n",
" <th>938</th>\n",
" <td>5</td>\n",
" <td>Harpagon</td>\n",
" <td>Cléante</td>\n",
......@@ -1005,7 +1008,7 @@
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>850</th>\n",
" <th>939</th>\n",
" <td>5</td>\n",
" <td>Cléante</td>\n",
" <td>Harpagon</td>\n",
......@@ -1013,7 +1016,7 @@
" <td>47</td>\n",
" </tr>\n",
" <tr>\n",
" <th>851</th>\n",
" <th>940</th>\n",
" <td>5</td>\n",
" <td>Harpagon</td>\n",
" <td>Cléante</td>\n",
......@@ -1021,7 +1024,7 @@
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>852</th>\n",
" <th>941</th>\n",
" <td>5</td>\n",
" <td>Cléante</td>\n",
" <td>Mariane</td>\n",
......@@ -1029,7 +1032,7 @@
" <td>36</td>\n",
" </tr>\n",
" <tr>\n",
" <th>853</th>\n",
" <th>942</th>\n",
" <td>5</td>\n",
" <td>Mariane</td>\n",
" <td>Anselme</td>\n",
......@@ -1037,7 +1040,7 @@
" <td>36</td>\n",
" </tr>\n",
" <tr>\n",
" <th>854</th>\n",
" <th>943</th>\n",
" <td>5</td>\n",
" <td>Anselme</td>\n",
" <td>Harpagon</td>\n",
......@@ -1045,7 +1048,7 @@
" <td>61</td>\n",
" </tr>\n",
" <tr>\n",
" <th>855</th>\n",
" <th>944</th>\n",
" <td>5</td>\n",
" <td>Harpagon</td>\n",
" <td>Cléante</td>\n",
......@@ -1053,7 +1056,7 @@
" <td>11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>856</th>\n",
" <th>945</th>\n",
" <td>5</td>\n",
" <td>Cléante</td>\n",
" <td>Harpagon</td>\n",
......@@ -1061,7 +1064,7 @@
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>857</th>\n",
" <th>946</th>\n",
" <td>5</td>\n",
" <td>Harpagon</td>\n",
" <td>Anselme</td>\n",
......@@ -1069,7 +1072,7 @@
" <td>13</td>\n",
" </tr>\n",
" <tr>\n",
" <th>858</th>\n",
" <th>947</th>\n",
" <td>5</td>\n",
" <td>Anselme</td>\n",
" <td>Harpagon</td>\n",
......@@ -1077,7 +1080,7 @@
" <td>13</td>\n",
" </tr>\n",
" <tr>\n",
" <th>859</th>\n",
" <th>948</th>\n",
" <td>5</td>\n",
" <td>Harpagon</td>\n",
" <td>Anselme</td>\n",
......@@ -1085,7 +1088,7 @@
" <td>12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>860</th>\n",
" <th>949</th>\n",
" <td>5</td>\n",
" <td>Anselme</td>\n",
" <td>Harpagon</td>\n",
......@@ -1093,7 +1096,7 @@
" <td>8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>861</th>\n",
" <th>950</th>\n",
" <td>5</td>\n",
" <td>Harpagon</td>\n",
" <td>Anselme</td>\n",
......@@ -1101,7 +1104,7 @@
" <td>12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>862</th>\n",
" <th>951</th>\n",
" <td>5</td>\n",
" <td>Anselme</td>\n",
" <td>le Commissaire</td>\n",
......@@ -1109,7 +1112,7 @@
" <td>13</td>\n",
" </tr>\n",
" <tr>\n",
" <th>863</th>\n",
" <th>952</th>\n",
" <td>5</td>\n",
" <td>le Commissaire</td>\n",
" <td>Harpagon</td>\n",
......@@ -1117,7 +1120,7 @@
" <td>14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>864</th>\n",
" <th>953</th>\n",
" <td>5</td>\n",
" <td>Harpagon</td>\n",
" <td>le Commissaire</td>\n",
......@@ -1125,7 +1128,7 @@
" <td>8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>865</th>\n",
" <th>954</th>\n",
" <td>5</td>\n",
" <td>le Commissaire</td>\n",
" <td>Harpagon</td>\n",
......@@ -1133,15 +1136,23 @@
" <td>12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>866</th>\n",
" <th>955</th>\n",
" <td>5</td>\n",
" <td>Harpagon</td>\n",
" <td>Maître Jacques</td>\n",
" <td>6</td>\n",
" <td>12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>956</th>\n",
" <td>5</td>\n",
" <td>Maître Jacques</td>\n",
" <td>Anselme</td>\n",
" <td>6</td>\n",
" <td>35</td>\n",
" <td>23</td>\n",
" </tr>\n",
" <tr>\n",
" <th>867</th>\n",
" <th>957</th>\n",
" <td>5</td>\n",
" <td>Anselme</td>\n",
" <td>Harpagon</td>\n",
......@@ -1149,7 +1160,7 @@
" <td>8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>868</th>\n",
" <th>958</th>\n",
" <td>5</td>\n",
" <td>Harpagon</td>\n",
" <td>Anselme</td>\n",
......@@ -1157,7 +1168,7 @@
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>869</th>\n",
" <th>959</th>\n",
" <td>5</td>\n",
" <td>Anselme</td>\n",
" <td>Harpagon</td>\n",
......@@ -1165,7 +1176,7 @@
" <td>11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>870</th>\n",
" <th>960</th>\n",
" <td>5</td>\n",
" <td>Harpagon</td>\n",
" <td>Anselme</td>\n",
......@@ -1174,7 +1185,7 @@
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>871 rows × 5 columns</p>\n",
"<p>961 rows × 5 columns</p>\n",
"</div>"
],
"text/plain": [
......@@ -1210,38 +1221,38 @@
"28 1 Cléante Élise 2 150\n",
"29 1 Élise Cléante 2 27\n",
".. ... ... ... ... ...\n",
"841 5 Harpagon Valère 5 2\n",
"842 5 Valère Harpagon 5 4\n",
"843 5 Harpagon Valère 5 2\n",
"844 5 Valère Harpagon 5 13\n",
"845 5 Harpagon Valère 5 10\n",
"846 5 Valère Harpagon 5 10\n",
"847 5 Harpagon Valère 5 9\n",
"848 5 Cléante Harpagon 6 41\n",
"849 5 Harpagon Cléante 6 3\n",
"850 5 Cléante Harpagon 6 47\n",
"851 5 Harpagon Cléante 6 7\n",
"852 5 Cléante Mariane 6 36\n",
"853 5 Mariane Anselme 6 36\n",
"854 5 Anselme Harpagon 6 61\n",
"855 5 Harpagon Cléante 6 11\n",
"856 5 Cléante Harpagon 6 6\n",
"857 5 Harpagon Anselme 6 13\n",
"858 5 Anselme Harpagon 6 13\n",
"859 5 Harpagon Anselme 6 12\n",
"860 5 Anselme Harpagon 6 8\n",
"861 5 Harpagon Anselme 6 12\n",
"862 5 Anselme le Commissaire 6 13\n",
"863 5 le Commissaire Harpagon 6 14\n",
"864 5 Harpagon le Commissaire 6 8\n",
"865 5 le Commissaire Harpagon 6 12\n",
"866 5 Harpagon Anselme 6 35\n",
"867 5 Anselme Harpagon 6 8\n",
"868 5 Harpagon Anselme 6 5\n",
"869 5 Anselme Harpagon 6 11\n",
"870 5 Harpagon Anselme 6 6\n",
"931 5 Harpagon Valère 5 2\n",
"932 5 Valère Maître Jacques 5 6\n",
"933 5 Maître Jacques Harpagon 5 7\n",
"934 5 Harpagon Valère 5 10\n",
"935 5 Valère Harpagon 5 10\n",
"936 5 Harpagon Valère 5 9\n",
"937 5 Cléante Harpagon 6 41\n",
"938 5 Harpagon Cléante 6 3\n",
"939 5 Cléante Harpagon 6 47\n",
"940 5 Harpagon Cléante 6 7\n",
"941 5 Cléante Mariane 6 36\n",
"942 5 Mariane Anselme 6 36\n",
"943 5 Anselme Harpagon 6 61\n",
"944 5 Harpagon Cléante 6 11\n",
"945 5 Cléante Harpagon 6 6\n",
"946 5 Harpagon Anselme 6 13\n",
"947 5 Anselme Harpagon 6 13\n",
"948 5 Harpagon Anselme 6 12\n",
"949 5 Anselme Harpagon 6 8\n",
"950 5 Harpagon Anselme 6 12\n",
"951 5 Anselme le Commissaire 6 13\n",
"952 5 le Commissaire Harpagon 6 14\n",
"953 5 Harpagon le Commissaire 6 8\n",
"954 5 le Commissaire Harpagon 6 12\n",
"955 5 Harpagon Maître Jacques 6 12\n",
"956 5 Maître Jacques Anselme 6 23\n",
"957 5 Anselme Harpagon 6 8\n",
"958 5 Harpagon Anselme 6 5\n",
"959 5 Anselme Harpagon 6 11\n",
"960 5 Harpagon Anselme 6 6\n",
"\n",
"[871 rows x 5 columns]"
"[961 rows x 5 columns]"
]
},
"execution_count": 13,
......@@ -1281,50 +1292,10 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"## Réponse à la première question\n",
"Maintenant, nous pouvons répondre à la première question."
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Harpagon a prononcé 6992 mots dans toute la pièce.\n",
"Cléante a prononcé 3555 mots dans toute la pièce.\n",
"Élise a prononcé 1067 mots dans toute la pièce.\n",
"Valère a prononcé 3016 mots dans toute la pièce.\n",
"Mariane a prononcé 910 mots dans toute la pièce.\n",
"Anselme a prononcé 517 mots dans toute la pièce.\n",
"Frosine a prononcé 2381 mots dans toute la pièce.\n",
"Maitre Simon a prononcé 0 mots dans toute la pièce.\n",
"Maitre Jacques a prononcé 0 mots dans toute la pièce.\n",
"La Flèche a prononcé 1520 mots dans toute la pièce.\n",
"Dame Claude a prononcé 0 mots dans toute la pièce.\n",
"Brindavoine a prononcé 43 mots dans toute la pièce.\n",
"La Merluche a prononcé 47 mots dans toute la pièce.\n",
"Le commissaire a prononcé 409 mots dans toute la pièce.\n",
"son clerc a prononcé 0 mots dans toute la pièce.\n"
]
}
],
"source": [
"# On va parcourir le dictionnaire des personnages\n",
"# et compter le nombre total de mots prononcés\n",
"# pour chacun.\n",
"import numpy as np\n",
"persoList = []\n",
"persoNbWords = np.zeros(len(avarePersoDict),dtype=int)\n",
"#print(textDataSynthesisTableDf.shape)\n",
"for perso in avarePersoDict:\n",
" m = textDataSynthesisTableDf['author'].str.match(perso, case=False, na=False)\n",
" tmpDf = textDataSynthesisTableDf[m].copy()\n",
" print(\"{} a prononcé {} mots dans toute la pièce.\".format(perso,tmpDf['speech_length'].sum()))"
]
},
{
"cell_type": "markdown",
"metadata": {},
......@@ -1352,6 +1323,167 @@
"Nous avons donc corriger le code pour prendre en compte ces deux problèmes."
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>perso</th>\n",
" </tr>\n",
" <tr>\n",
" <th>speech_length</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Dame Claude</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>son clerc</td>\n",
" </tr>\n",
" <tr>\n",
" <th>43</th>\n",
" <td>Brindavoine</td>\n",
" </tr>\n",
" <tr>\n",
" <th>47</th>\n",
" <td>La Merluche</td>\n",
" </tr>\n",
" <tr>\n",
" <th>197</th>\n",
" <td>Maitre Simon</td>\n",
" </tr>\n",
" <tr>\n",
" <th>294</th>\n",
" <td>Le commissaire</td>\n",
" </tr>\n",
" <tr>\n",
" <th>517</th>\n",
" <td>Anselme</td>\n",
" </tr>\n",
" <tr>\n",
" <th>910</th>\n",
" <td>Mariane</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1067</th>\n",
" <td>Élise</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1520</th>\n",
" <td>La Flèche</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1668</th>\n",
" <td>Maitre Jacques</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2370</th>\n",
" <td>Frosine</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2740</th>\n",
" <td>Valère</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3331</th>\n",
" <td>Cléante</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5753</th>\n",
" <td>Harpagon</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" perso\n",
"speech_length \n",
"0 Dame Claude\n",
"0 son clerc\n",
"43 Brindavoine\n",
"47 La Merluche\n",
"197 Maitre Simon\n",
"294 Le commissaire\n",
"517 Anselme\n",
"910 Mariane\n",
"1067 Élise\n",
"1520 La Flèche\n",
"1668 Maitre Jacques\n",
"2370 Frosine\n",
"2740 Valère\n",
"3331 Cléante\n",
"5753 Harpagon"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# On va parcourir le dictionnaire des personnages\n",
"# et compter le nombre total de mots prononcés\n",
"# pour chacun.\n",
"import numpy as np\n",
"persoList = []\n",
"persoNbWords = np.zeros(len(avarePersoDict),dtype=int)\n",
"#print(textDataSynthesisTableDf.shape)\n",
"for index, perso in enumerate(avarePersoDict):\n",
" persoRegex = perso\n",
" if perso.startswith(\"Maitre\"):\n",
" persoRegex = perso.replace('Maitre','Maître')\n",
" m = textDataSynthesisTableDf['author'].str.match(persoRegex, case=False, na=False)\n",
" tmpDf = textDataSynthesisTableDf[m].copy()\n",
" persoList.append(perso)\n",
" persoNbWords[index] = tmpDf['speech_length'].sum()\n",
"\n",
"data = pd.DataFrame({\"perso\":persoList, \"speech_length\":persoNbWords})\n",
" \n",
"sortedData = data.set_index('speech_length').sort_index()\n",
"sortedData\n",
" \n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Nous pouvons désormais finir de répondre à la premiè question posée. Le personnage s'exprimant le plus dans la pièce de _L'Avare_ de Molière et qui est donc le protagoniste principal, n'est autre qu'**Harpagon**. Quant aux personnages ne s'exprimant pas, nous en avons deux, le **clerc** du commissaire ainsi que **Dame Claude** qui est la servante d'Harpagon."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"On va utiliser le classement pour associer à chaque personnage de la pièce une couleur unique. On va donc reprendre le dictionnaire des personnages et régler la propriété de couleur."
]
},
{
"cell_type": "code",
"execution_count": null,
......@@ -1359,6 +1491,71 @@
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Réponse à la seconde question"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"En nous appuyant sur une page internet trouvée sur exemple d'affichage similaire à ce qui est fait dans l'étude \\(à savoir un affichage par Acte, sous forme de barres horizontales pour chaque scène le composant donnant la répartition de la paroles entre les différents protagonistes de chaque scène\\), on va pouvoir répondre à la seconde question. Voici un lien vers celle-ci: [geeksforgeeks_stacked-percentage-bar-plot](https://www.geeksforgeeks.org/stacked-percentage-bar-plot-in-matplotlib/)."
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"5\n"
]
}
],
"source": [
"import matplotlib.pyplot as plt\n",
"\n",
"# Il va nous falloir compter le nombre d'actes de la pièce\n",
"# et faire une boucle sur ces derniers pour construire \n",
"# un dataframe par acte à partir du dataframe global\n",
"# que dont on va caler la structure en fonction de\n",
"# l'exemple disponible sur la page.\n",
"# Le dataframe doit contenir une ligne par scène de l'acte\n",
"# courant et une colonne par protagoniste de la scène\n",
"actNums = pd.unique(textDataSynthesisTableDf[\"act\"])\n",
"nbActs = actNums.size\n",
"\n",
"fig = plt.subplots(nbActs, 1)\n",
"\n",
"for actNum in actNums:\n",
" actDf = textDataSynthesisTableDf[textDataSynthesisTableDf==actNum].copy()\n",
" sceneNums = pd.unique(actDf[\"scene\"])\n",
" actPersos = pd.unique(actDf[\"author\"])\n",
" actDict = {\"scene\":[]}\n",
" for perso in actPersos:\n",
" # Création d'une colonne par personnage de l'acte\n",
" actDict[perso] = []\n",
" \n",
" for sceneNum in sceneNums:\n",
" actDict[\"scene\"].append(sceneNum)\n",
" sceneDf = actDf[actDf[\"scene\"]==sceneNum].copy()\n",
" scenePersos = pd.unique(sceneDf)\n",
" for perso in actPersos:\n",
" tmpDf = sceneDf[sceneDf[\"author\"]==perso].copy()\n",
" if tmpDf:\n",
" currentPersoNbWordsInScene = tmpDf[\"speech_length\"].sum()\n",
" actDict[perso] = currentPersoNbWordsInScene\n",
" else:\n",
" actDict[perso] = 0\n",
" pass"
]
},
{
"cell_type": "markdown",
"metadata": {},
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment