From bc98490cf17e3bfa7467dd5df0c75e45a10118ae Mon Sep 17 00:00:00 2001 From: e5e62e6e8091e12ec477af239ac0a4fc Date: Fri, 28 Jul 2023 11:46:44 +0000 Subject: [PATCH] anomalies sur les semaines manquantes --- module3/exo3/exercice.ipynb | 89 ++++++++++++++++++++++++++++--------- 1 file changed, 68 insertions(+), 21 deletions(-) diff --git a/module3/exo3/exercice.ipynb b/module3/exo3/exercice.ipynb index 1db355d..f7f6345 100644 --- a/module3/exo3/exercice.ipynb +++ b/module3/exo3/exercice.ipynb @@ -30,7 +30,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -49,7 +49,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -80,7 +80,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 13, "metadata": {}, "outputs": [ { @@ -141,7 +141,7 @@ "1958-04-26 316.48" ] }, - "execution_count": 3, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -154,7 +154,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 14, "metadata": {}, "outputs": [ { @@ -183,7 +183,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 15, "metadata": {}, "outputs": [ { @@ -228,7 +228,7 @@ "Index: []" ] }, - "execution_count": 5, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -240,7 +240,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 17, "metadata": {}, "outputs": [ { @@ -264,28 +264,34 @@ " \n", " \n", " \n", + " index\n", " ppm\n", " \n", " \n", " \n", " \n", - " 1958-03-29\n", + " 0\n", + " 1958-03-29\n", " 316.19\n", " \n", " \n", - " 1958-04-05\n", + " 1\n", + " 1958-04-05\n", " 317.31\n", " \n", " \n", - " 1958-04-12\n", + " 2\n", + " 1958-04-12\n", " 317.69\n", " \n", " \n", - " 1958-04-19\n", + " 3\n", + " 1958-04-19\n", " 317.58\n", " \n", " \n", - " 1958-04-26\n", + " 4\n", + " 1958-04-26\n", " 316.48\n", " \n", " \n", @@ -293,24 +299,65 @@ "" ], "text/plain": [ - " ppm\n", - "1958-03-29 316.19\n", - "1958-04-05 317.31\n", - "1958-04-12 317.69\n", - "1958-04-19 317.58\n", - "1958-04-26 316.48" + " index ppm\n", + "0 1958-03-29 316.19\n", + "1 1958-04-05 317.31\n", + "2 1958-04-12 317.69\n", + "3 1958-04-19 317.58\n", + "4 1958-04-26 316.48" ] }, - "execution_count": 6, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "data = raw_data.copy()\n", + "data = raw_data.copy().reset_index()\n", "data.head()" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "On vérifie si toutes les semaines sont présentes dans les données : ce n'est pas le cas pour les années ci-dessous. Deux cas de figure se présentent :\n", + "- pour l'année 1958 (première date de la série) et l'année 2023 (dernière date de la série) $\\rightarrow$ pas de problème\n", + "- pour les autres années $\\rightarrow$ quelques semaines sont manquantes, on décide de continuer l'analyse tel quel mais on garde l'information en tête" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1958 25\n", + "1959 48\n", + "1962 48\n", + "1963 49\n", + "1964 31\n", + "1966 49\n", + "1984 48\n", + "2003 49\n", + "2005 49\n", + "2023 26\n" + ] + } + ], + "source": [ + "# calcul du nombre de semaines présentes par année\n", + "data['year'] = data['index'].apply(lambda x: x[:4])\n", + "years = data.groupby(['year'])['year'].count().index\n", + "nb_weeks = data.groupby(['year'])['year'].count().values\n", + "for x, y in zip(years, nb_weeks):\n", + " if y < 50:\n", + " print(x, y)" + ] + }, { "cell_type": "markdown", "metadata": {}, -- 2.18.1