diff --git a/module3/exo3/exercice.ipynb b/module3/exo3/exercice.ipynb index c01f392648d74e1b3e62424645d1f73a112ec1e7..a410a65032c5b396a2729dd0451697fbaa452b3e 100644 --- a/module3/exo3/exercice.ipynb +++ b/module3/exo3/exercice.ipynb @@ -15,7 +15,7 @@ "source": [ "# Oscillation périodique\n", "\n", - "La première étape est de charger les données du fichier et de s'assurer de son contenu" + "La première étape est de charger les données du fichier et de s'assurer de son contenu. D'après les commentaires en en-tête de fichier, la première colonne indique la date et la seconde la concentration de CO2 en micro-moles par mole (ppm). Les mesures sont alignées sur 12h00 sur le premier jour de la semaine." ] }, { @@ -32,34 +32,88 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# Data taken from\n", + "# https://scrippsco2.ucsd.edu/assets/data/atmospheric/stations/in_situ_co2/weekly/weekly_in_situ_co2_mlo.csv\n", + "\n", + "path = \"weekly_in_situ_co2_mlo.csv\"\n", + "raw_data = pd.read_csv(path, comment='\"', names=[\"Date\", \"Concentration\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Vérifions les semaines manquantes dans le dataset." + ] + }, + { + "cell_type": "code", + "execution_count": 29, "metadata": {}, "outputs": [ { - "ename": "FileNotFoundError", - "evalue": "File b'weekly_in_situ_co2_mlo.csv' does not exist", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mpath\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"weekly_in_situ_co2_mlo.csv\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0mraw_data\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread_csv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;32m/opt/conda/lib/python3.6/site-packages/pandas/io/parsers.py\u001b[0m in \u001b[0;36mparser_f\u001b[0;34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, escapechar, comment, encoding, dialect, tupleize_cols, error_bad_lines, warn_bad_lines, skipfooter, skip_footer, doublequote, delim_whitespace, as_recarray, compact_ints, use_unsigned, low_memory, buffer_lines, memory_map, float_precision)\u001b[0m\n\u001b[1;32m 707\u001b[0m skip_blank_lines=skip_blank_lines)\n\u001b[1;32m 708\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 709\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_read\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfilepath_or_buffer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 710\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 711\u001b[0m \u001b[0mparser_f\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__name__\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/opt/conda/lib/python3.6/site-packages/pandas/io/parsers.py\u001b[0m in \u001b[0;36m_read\u001b[0;34m(filepath_or_buffer, kwds)\u001b[0m\n\u001b[1;32m 447\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 448\u001b[0m \u001b[0;31m# Create the parser.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 449\u001b[0;31m \u001b[0mparser\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mTextFileReader\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfilepath_or_buffer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 450\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 451\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mchunksize\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0miterator\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/opt/conda/lib/python3.6/site-packages/pandas/io/parsers.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, f, engine, **kwds)\u001b[0m\n\u001b[1;32m 816\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moptions\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'has_index_names'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mkwds\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'has_index_names'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 817\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 818\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_make_engine\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mengine\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 819\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 820\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mclose\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/opt/conda/lib/python3.6/site-packages/pandas/io/parsers.py\u001b[0m in \u001b[0;36m_make_engine\u001b[0;34m(self, engine)\u001b[0m\n\u001b[1;32m 1047\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_make_engine\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mengine\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'c'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1048\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mengine\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'c'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1049\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mCParserWrapper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moptions\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1050\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1051\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mengine\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'python'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/opt/conda/lib/python3.6/site-packages/pandas/io/parsers.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, src, **kwds)\u001b[0m\n\u001b[1;32m 1693\u001b[0m \u001b[0mkwds\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'allow_leading_cols'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex_col\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1694\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1695\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_reader\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mparsers\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTextReader\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msrc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1696\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1697\u001b[0m \u001b[0;31m# XXX\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32mpandas/_libs/parsers.pyx\u001b[0m in \u001b[0;36mpandas._libs.parsers.TextReader.__cinit__\u001b[0;34m()\u001b[0m\n", - "\u001b[0;32mpandas/_libs/parsers.pyx\u001b[0m in \u001b[0;36mpandas._libs.parsers.TextReader._setup_parser_source\u001b[0;34m()\u001b[0m\n", - "\u001b[0;31mFileNotFoundError\u001b[0m: File b'weekly_in_situ_co2_mlo.csv' does not exist" + "name": "stdout", + "output_type": "stream", + "text": [ + "Missing weeks (74 in total):\n", + "1958-05-10,\t1958-05-31,\t1958-06-07,\t1958-06-14,\t1958-06-21,\t1958-06-28,\t1958-08-23,\t1958-09-13,\t1958-09-20,\t1958-09-27,\t1958-10-04,\t1958-10-11,\t1958-10-18,\t1958-10-25,\t1958-11-01,\t1959-02-07,\t1959-03-14,\t1959-05-30,\t1959-08-15,\t1962-08-25,\t1962-09-01,\t1962-09-08,\t1962-12-29,\t1963-02-16,\t1963-05-04,\t1963-11-23,\t1964-01-25,\t1964-02-01,\t1964-02-08,\t1964-02-15,\t1964-02-22,\t1964-02-29,\t1964-03-07,\t1964-03-14,\t1964-03-21,\t1964-03-28,\t1964-04-04,\t1964-04-11,\t1964-04-18,\t1964-04-25,\t1964-05-02,\t1964-05-09,\t1964-05-16,\t1964-05-23,\t1964-06-13,\t1964-06-20,\t1964-08-08,\t1966-07-16,\t1966-07-23,\t1966-07-30,\t1966-11-05,\t1967-01-21,\t1967-01-28,\t1976-06-26,\t1984-03-31,\t1984-04-07,\t1984-04-14,\t1984-04-21,\t1985-08-03,\t2003-06-14,\t2003-10-11,\t2003-10-18,\t2005-02-26,\t2005-03-05,\t2005-03-12,\t2005-03-19,\t2006-02-11,\t2006-02-18,\t2007-01-27,\t2012-10-06,\t2012-10-13,\t2020-01-18,\t2022-12-03,\t2022-12-10\n", + "\n", + "Missing weeks per year:\n", + "1958 15\n", + "1959 4\n", + "1962 4\n", + "1963 3\n", + "1964 21\n", + "1966 4\n", + "1967 2\n", + "1976 1\n", + "1984 4\n", + "1985 1\n", + "2003 3\n", + "2005 4\n", + "2006 2\n", + "2007 1\n", + "2012 2\n", + "2020 1\n", + "2022 2\n" ] } ], "source": [ - "# Data taken from\n", - "# https://scrippsco2.ucsd.edu/assets/data/atmospheric/stations/in_situ_co2/weekly/weekly_in_situ_co2_mlo.csv\n", + "# Check if every line is the corresponding week\n", + "expected_date = set()\n", + "w = 0\n", + "while True:\n", + " new_week = str(isoweek.Week(1958, 13+w).saturday())\n", + " if new_week >= '2024-01-01':\n", + " break\n", + " expected_date.add(new_week)\n", + " w += 1\n", "\n", - "path = \"weekly_in_situ_co2_mlo.csv\"\n", - "raw_data = pd.read_csv(path)\n" + "# Remove line if found\n", + "for w in raw_data.index:\n", + " stored_date = raw_data[\"Date\"][w]\n", + " expected_date.remove(stored_date)\n", + "\n", + "missing_weeks = sorted(expected_date)\n", + "print(f\"Missing weeks ({len(missing_weeks)} in total):\")\n", + "print(\",\\t\".join(missing_weeks))\n", + "\n", + "missing_weeks_keep_year = [e[:4] for e in missing_weeks]\n", + "missing_weeks_per_year = dict()\n", + "for e in missing_weeks_keep_year:\n", + " if e in missing_weeks_per_year.keys():\n", + " missing_weeks_per_year[e] += 1\n", + " else:\n", + " missing_weeks_per_year[e] = 1\n", + " \n", + "print(f\"\\nMissing weeks per year:\")\n", + "for y in sorted(missing_weeks_per_year):\n", + " print(y, missing_weeks_per_year[y])" ] } ],