From 48defd270ab64ea75be93b88462d01a79a6f1fb2 Mon Sep 17 00:00:00 2001 From: 3d1cde3613956104173df2e357578f04 <3d1cde3613956104173df2e357578f04@app-learninglab.inria.fr> Date: Sat, 30 May 2020 16:47:39 +0000 Subject: [PATCH] version provisoire --- module3/exo3/exercice.ipynb | 636 ++++++++++++++++++++++++++++++++++-- 1 file changed, 602 insertions(+), 34 deletions(-) diff --git a/module3/exo3/exercice.ipynb b/module3/exo3/exercice.ipynb index 59975f5..8636871 100644 --- a/module3/exo3/exercice.ipynb +++ b/module3/exo3/exercice.ipynb @@ -7,6 +7,13 @@ "# Playfair analysis" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Importation de la librairie et chargement des données." + ] + }, { "cell_type": "code", "execution_count": 1, @@ -17,9 +24,16 @@ "playfair = pd.read_csv(\"https://raw.githubusercontent.com/vincentarelbundock/Rdatasets/master/csv/HistData/Wheat.csv\")" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "On regarde le début et la fin du dataframe pour avoir un premier sentiment sur les données" + ] + }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 20, "metadata": {}, "outputs": [ { @@ -43,7 +57,6 @@ " \n", " \n", " \n", - " Unnamed: 0\n", " Year\n", " Wheat\n", " Wages\n", @@ -51,54 +64,49 @@ " \n", " \n", " \n", - " 48\n", - " 49\n", - " 1805\n", - " 81.0\n", - " 29.5\n", + " 0\n", + " 1565\n", + " 41.0\n", + " 5.00\n", " \n", " \n", - " 49\n", - " 50\n", - " 1810\n", - " 99.0\n", - " 30.0\n", + " 1\n", + " 1570\n", + " 45.0\n", + " 5.05\n", " \n", " \n", - " 50\n", - " 51\n", - " 1815\n", - " 78.0\n", - " NaN\n", + " 2\n", + " 1575\n", + " 42.0\n", + " 5.08\n", " \n", " \n", - " 51\n", - " 52\n", - " 1820\n", - " 54.0\n", - " NaN\n", + " 3\n", + " 1580\n", + " 49.0\n", + " 5.12\n", " \n", " \n", - " 52\n", - " 53\n", - " 1821\n", - " 54.0\n", - " NaN\n", + " 4\n", + " 1585\n", + " 41.5\n", + " 5.15\n", " \n", " \n", "\n", "" ], "text/plain": [ - " Unnamed: 0 Year Wheat Wages\n", - "48 49 1805 81.0 29.5\n", - "49 50 1810 99.0 30.0\n", - "50 51 1815 78.0 NaN\n", - "51 52 1820 54.0 NaN\n", - "52 53 1821 54.0 NaN" + " Year Wheat Wages\n", + "0 1565 41.0 5.00\n", + "1 1570 45.0 5.05\n", + "2 1575 42.0 5.08\n", + "3 1580 49.0 5.12\n", + "4 1585 41.5 5.15" ] }, - "execution_count": 5, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -197,6 +205,15 @@ "playfair.tail()" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Comme il n'y a pas beaucoup de données on peut vérifier la qualité par de simples graphiques.\n", + "\n", + "On importe matplotlib et on visualise les deux variables principales; on ne constate pas de valeurs anormales. La variabilité du prix du blé est plus grande que celle des salaires. Cela parait normal. " + ] + }, { "cell_type": "code", "execution_count": 7, @@ -270,6 +287,557 @@ "plt.plot(playfair['Wages'])" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "On prend l'année comme index. Cela permettra que l'année figure comme abscice dans les graphiques" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unnamed: 0WheatWages
Year
1565141.05.00
1570245.05.05
1575342.05.08
1580449.05.12
1585541.55.15
1590647.05.25
1595764.05.54
1600827.05.61
1605933.05.69
16101032.05.78
16151133.05.94
16201235.06.01
16251333.06.12
16301445.06.22
16351533.06.30
16401639.06.37
16451753.06.45
16501842.06.50
16551940.56.60
16602046.56.75
16652132.06.80
16702237.06.90
16752343.07.00
16802435.07.30
16852527.07.60
16902640.08.00
16952750.08.50
17002830.09.00
17052932.010.00
17103044.011.00
17153133.011.75
17203229.012.50
17253339.013.00
17303426.013.30
17353532.013.60
17403627.014.00
17453727.514.50
17503831.015.00
17553935.515.70
17604031.016.50
17654143.017.60
17704247.018.50
17754344.019.50
17804446.021.00
17854542.023.00
17904647.525.50
17954776.027.50
18004879.028.50
18054981.029.50
18105099.030.00
18155178.0NaN
18205254.0NaN
18215354.0NaN
\n", + "
" + ], + "text/plain": [ + " Unnamed: 0 Wheat Wages\n", + "Year \n", + "1565 1 41.0 5.00\n", + "1570 2 45.0 5.05\n", + "1575 3 42.0 5.08\n", + "1580 4 49.0 5.12\n", + "1585 5 41.5 5.15\n", + "1590 6 47.0 5.25\n", + "1595 7 64.0 5.54\n", + "1600 8 27.0 5.61\n", + "1605 9 33.0 5.69\n", + "1610 10 32.0 5.78\n", + "1615 11 33.0 5.94\n", + "1620 12 35.0 6.01\n", + "1625 13 33.0 6.12\n", + "1630 14 45.0 6.22\n", + "1635 15 33.0 6.30\n", + "1640 16 39.0 6.37\n", + "1645 17 53.0 6.45\n", + "1650 18 42.0 6.50\n", + "1655 19 40.5 6.60\n", + "1660 20 46.5 6.75\n", + "1665 21 32.0 6.80\n", + "1670 22 37.0 6.90\n", + "1675 23 43.0 7.00\n", + "1680 24 35.0 7.30\n", + "1685 25 27.0 7.60\n", + "1690 26 40.0 8.00\n", + "1695 27 50.0 8.50\n", + "1700 28 30.0 9.00\n", + "1705 29 32.0 10.00\n", + "1710 30 44.0 11.00\n", + "1715 31 33.0 11.75\n", + "1720 32 29.0 12.50\n", + "1725 33 39.0 13.00\n", + "1730 34 26.0 13.30\n", + "1735 35 32.0 13.60\n", + "1740 36 27.0 14.00\n", + "1745 37 27.5 14.50\n", + "1750 38 31.0 15.00\n", + "1755 39 35.5 15.70\n", + "1760 40 31.0 16.50\n", + "1765 41 43.0 17.60\n", + "1770 42 47.0 18.50\n", + "1775 43 44.0 19.50\n", + "1780 44 46.0 21.00\n", + "1785 45 42.0 23.00\n", + "1790 46 47.5 25.50\n", + "1795 47 76.0 27.50\n", + "1800 48 79.0 28.50\n", + "1805 49 81.0 29.50\n", + "1810 50 99.0 30.00\n", + "1815 51 78.0 NaN\n", + "1820 52 54.0 NaN\n", + "1821 53 54.0 NaN" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "playfair.set_index('Year')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "On n'a plus besoin de la colonne qui numérote les observations" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "playfair=playfair.drop(columns='Unnamed: 0')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "On vérifie le type des variables et le nombre de variables non nulles. Les 3 NaN du salaire correspondent aux trois dernières observations." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 53 entries, 0 to 52\n", + "Data columns (total 3 columns):\n", + "Year 53 non-null int64\n", + "Wheat 53 non-null float64\n", + "Wages 50 non-null float64\n", + "dtypes: float64(2), int64(1)\n", + "memory usage: 1.3 KB\n" + ] + } + ], + "source": [ + "playfair.info()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Les années vont de 5 en 5 sauf la dernière qui a les mêmes valeurs que l'avant dernière. On élimine donc la dernière observation qui n'apporte rien. " + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 52 entries, 0 to 51\n", + "Data columns (total 3 columns):\n", + "Year 52 non-null int64\n", + "Wheat 52 non-null float64\n", + "Wages 50 non-null float64\n", + "dtypes: float64(2), int64(1)\n", + "memory usage: 1.3 KB\n" + ] + } + ], + "source": [ + "playfair=playfair[:-1]\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## TODO vérifier que les années vont de 5 en 5" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "playfair.plot.bar(y='Wheat')" + ] + }, { "cell_type": "code", "execution_count": null, -- 2.18.1