{ "cells": [ { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
weekindicatorincinc_lowinc_upinc100inc100_lowinc100_upgeo_inseegeo_name
0202425712129813016128181224FRFrance
1202424712731943816024191424FRFrance
22024237146571133917975221727FRFrance
3202422711628836114895171222FRFrance
420242179701685112551151119FRFrance
52024207136611020917113201525FRFrance
620241971008364131375315921FRFrance
7202418713438951417362201426FRFrance
82024177153031121919387231729FRFrance
92024167181381354022736272034FRFrance
102024157249291731532543372648FRFrance
112024147161811254419818241929FRFrance
122024137183221420622438272133FRFrance
13202412712818912816508191325FRFrance
142024117159731240019546241929FRFrance
152024107143011076117841211626FRFrance
162024097143371087117803211626FRFrance
172024087158991199119807241830FRFrance
18202407711294822614362171222FRFrance
19202406712174902015328181323FRFrance
202024057881461101151813917FRFrance
2120240479504656612442141018FRFrance
22202403769484633926310713FRFrance
23202402771254852939811814FRFrance
24202401713305921417396201426FRFrance
25202352711636735415918181224FRFrance
26202351769124227959710614FRFrance
272023507879962151138313917FRFrance
282023497781753621027212816FRFrance
29202348773514749995311715FRFrance
.................................
17211991267176081130423912312042FRFrance
17221991257161691070021638281838FRFrance
17231991247161711007122271281739FRFrance
1724199123711947767116223211329FRFrance
1725199122715452995320951271737FRFrance
1726199121714903897520831261636FRFrance
17271991207190531274225364342345FRFrance
17281991197167391124622232291939FRFrance
17291991187213851388228888382551FRFrance
1730199117713462887718047241632FRFrance
17311991167148571006819646261834FRFrance
1732199115713975978118169251832FRFrance
1733199114712265768416846221430FRFrance
173419911379567604113093171123FRFrance
1735199112710864733114397191325FRFrance
17361991117155741118419964271935FRFrance
17371991107166431137221914292038FRFrance
1738199109713741878018702241533FRFrance
1739199108713289881317765231531FRFrance
1740199107712337807716597221529FRFrance
1741199106710877701314741191226FRFrance
1742199105710442654414340181125FRFrance
17431991047791345631126314820FRFrance
17441991037153871048420290271836FRFrance
17451991027162771104621508292038FRFrance
17461991017155651027120859271836FRFrance
17471990527193751329525455342345FRFrance
17481990517190801380724353342543FRFrance
1749199050711079666015498201228FRFrance
17501990497114302610205FRFrance
\n", "

1751 rows × 10 columns

\n", "
" ], "text/plain": [ " week indicator inc inc_low inc_up inc100 inc100_low \\\n", "0 202425 7 12129 8130 16128 18 12 \n", "1 202424 7 12731 9438 16024 19 14 \n", "2 202423 7 14657 11339 17975 22 17 \n", "3 202422 7 11628 8361 14895 17 12 \n", "4 202421 7 9701 6851 12551 15 11 \n", "5 202420 7 13661 10209 17113 20 15 \n", "6 202419 7 10083 6413 13753 15 9 \n", "7 202418 7 13438 9514 17362 20 14 \n", "8 202417 7 15303 11219 19387 23 17 \n", "9 202416 7 18138 13540 22736 27 20 \n", "10 202415 7 24929 17315 32543 37 26 \n", "11 202414 7 16181 12544 19818 24 19 \n", "12 202413 7 18322 14206 22438 27 21 \n", "13 202412 7 12818 9128 16508 19 13 \n", "14 202411 7 15973 12400 19546 24 19 \n", "15 202410 7 14301 10761 17841 21 16 \n", "16 202409 7 14337 10871 17803 21 16 \n", "17 202408 7 15899 11991 19807 24 18 \n", "18 202407 7 11294 8226 14362 17 12 \n", "19 202406 7 12174 9020 15328 18 13 \n", "20 202405 7 8814 6110 11518 13 9 \n", "21 202404 7 9504 6566 12442 14 10 \n", "22 202403 7 6948 4633 9263 10 7 \n", "23 202402 7 7125 4852 9398 11 8 \n", "24 202401 7 13305 9214 17396 20 14 \n", "25 202352 7 11636 7354 15918 18 12 \n", "26 202351 7 6912 4227 9597 10 6 \n", "27 202350 7 8799 6215 11383 13 9 \n", "28 202349 7 7817 5362 10272 12 8 \n", "29 202348 7 7351 4749 9953 11 7 \n", "... ... ... ... ... ... ... ... \n", "1721 199126 7 17608 11304 23912 31 20 \n", "1722 199125 7 16169 10700 21638 28 18 \n", "1723 199124 7 16171 10071 22271 28 17 \n", "1724 199123 7 11947 7671 16223 21 13 \n", "1725 199122 7 15452 9953 20951 27 17 \n", "1726 199121 7 14903 8975 20831 26 16 \n", "1727 199120 7 19053 12742 25364 34 23 \n", "1728 199119 7 16739 11246 22232 29 19 \n", "1729 199118 7 21385 13882 28888 38 25 \n", "1730 199117 7 13462 8877 18047 24 16 \n", "1731 199116 7 14857 10068 19646 26 18 \n", "1732 199115 7 13975 9781 18169 25 18 \n", "1733 199114 7 12265 7684 16846 22 14 \n", "1734 199113 7 9567 6041 13093 17 11 \n", "1735 199112 7 10864 7331 14397 19 13 \n", "1736 199111 7 15574 11184 19964 27 19 \n", "1737 199110 7 16643 11372 21914 29 20 \n", "1738 199109 7 13741 8780 18702 24 15 \n", "1739 199108 7 13289 8813 17765 23 15 \n", "1740 199107 7 12337 8077 16597 22 15 \n", "1741 199106 7 10877 7013 14741 19 12 \n", "1742 199105 7 10442 6544 14340 18 11 \n", "1743 199104 7 7913 4563 11263 14 8 \n", "1744 199103 7 15387 10484 20290 27 18 \n", "1745 199102 7 16277 11046 21508 29 20 \n", "1746 199101 7 15565 10271 20859 27 18 \n", "1747 199052 7 19375 13295 25455 34 23 \n", "1748 199051 7 19080 13807 24353 34 25 \n", "1749 199050 7 11079 6660 15498 20 12 \n", "1750 199049 7 1143 0 2610 2 0 \n", "\n", " inc100_up geo_insee geo_name \n", "0 24 FR France \n", "1 24 FR France \n", "2 27 FR France \n", "3 22 FR France \n", "4 19 FR France \n", "5 25 FR France \n", "6 21 FR France \n", "7 26 FR France \n", "8 29 FR France \n", "9 34 FR France \n", "10 48 FR France \n", "11 29 FR France \n", "12 33 FR France \n", "13 25 FR France \n", "14 29 FR France \n", "15 26 FR France \n", "16 26 FR France \n", "17 30 FR France \n", "18 22 FR France \n", "19 23 FR France \n", "20 17 FR France \n", "21 18 FR France \n", "22 13 FR France \n", "23 14 FR France \n", "24 26 FR France \n", "25 24 FR France \n", "26 14 FR France \n", "27 17 FR France \n", "28 16 FR France \n", "29 15 FR France \n", "... ... ... ... \n", "1721 42 FR France \n", "1722 38 FR France \n", "1723 39 FR France \n", "1724 29 FR France \n", "1725 37 FR France \n", "1726 36 FR France \n", "1727 45 FR France \n", "1728 39 FR France \n", "1729 51 FR France \n", "1730 32 FR France \n", "1731 34 FR France \n", "1732 32 FR France \n", "1733 30 FR France \n", "1734 23 FR France \n", "1735 25 FR France \n", "1736 35 FR France \n", "1737 38 FR France \n", "1738 33 FR France \n", "1739 31 FR France \n", "1740 29 FR France \n", "1741 26 FR France \n", "1742 25 FR France \n", "1743 20 FR France \n", "1744 36 FR France \n", "1745 38 FR France \n", "1746 36 FR France \n", "1747 45 FR France \n", "1748 43 FR France \n", "1749 28 FR France \n", "1750 5 FR France \n", "\n", "[1751 rows x 10 columns]" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%matplotlib inline\n", "import matplotlib.pyplot as plt\n", "import pandas as pd\n", "import isoweek\n", "\n", "data_url = \"https://www.sentiweb.fr/datasets/all/inc-7-PAY.csv\"\n", "raw_data = pd.read_csv(data_url, skiprows=1)\n", "raw_data" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
weekindicatorincinc_lowinc_upinc100inc100_lowinc100_upgeo_inseegeo_name
\n", "
" ], "text/plain": [ "Empty DataFrame\n", "Columns: [week, indicator, inc, inc_low, inc_up, inc100, inc100_low, inc100_up, geo_insee, geo_name]\n", "Index: []" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "raw_data[raw_data.isnull().any(axis=1)]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "no data missing" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "convert data to year" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "data = raw_data\n", "def convert_week(year_and_week_int):\n", " year_and_week_str = str(year_and_week_int)\n", " year = int(year_and_week_str[:4])\n", " week = int(year_and_week_str[4:])\n", " w = isoweek.Week(year, week)\n", " return pd.Period(w.day(0), 'W')\n", "\n", "data['period'] = [convert_week(yw) for yw in data['week']]" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "sorted_data = data.set_index('period').sort_index()" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "#see some missing?\n", "periods = sorted_data.index\n", "for p1, p2 in zip(periods[:-1], periods[1:]):\n", " delta = p2.to_timestamp() - p1.end_time\n", " if delta > pd.Timedelta('1s'):\n", " print(p1, p2)" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1991-07-29/1991-08-04\n", "1992-07-27/1992-08-02\n", "1993-07-26/1993-08-01\n", "1994-08-01/1994-08-07\n", "1995-07-31/1995-08-06\n", "1996-07-29/1996-08-04\n", "1997-07-28/1997-08-03\n", "1998-07-27/1998-08-02\n", "1999-07-26/1999-08-01\n", "2000-07-31/2000-08-06\n", "2001-07-30/2001-08-05\n", "2002-07-29/2002-08-04\n", "2003-07-28/2003-08-03\n", "2004-07-26/2004-08-01\n", "2005-08-01/2005-08-07\n", "2006-07-31/2006-08-06\n", "2007-07-30/2007-08-05\n", "2008-07-28/2008-08-03\n", "2009-07-27/2009-08-02\n", "2010-07-26/2010-08-01\n", "2011-08-01/2011-08-07\n", "2012-07-30/2012-08-05\n", "2013-07-29/2013-08-04\n", "2014-07-28/2014-08-03\n", "2015-07-27/2015-08-02\n", "2016-08-01/2016-08-07\n", "2017-07-31/2017-08-06\n", "2018-07-30/2018-08-05\n", "2019-07-29/2019-08-04\n", "2020-07-27/2020-08-02\n", "2021-07-26/2021-08-01\n", "2022-08-01/2022-08-07\n", "2023-07-31/2023-08-06\n" ] } ], "source": [ "first_august_week = [pd.Period(pd.Timestamp(y, 8, 1), 'W')\n", " for y in range(1991,\n", " sorted_data.index[-1].year)]\n", "\n", "for i in first_august_week:\n", " print(i)" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Year 1991: Total 'inc' sum = 679607\n", "Year 1992: Total 'inc' sum = 843500\n", "Year 1993: Total 'inc' sum = 611264\n", "Year 1994: Total 'inc' sum = 670292\n", "Year 1995: Total 'inc' sum = 667294\n", "Year 1996: Total 'inc' sum = 646644\n", "Year 1997: Total 'inc' sum = 643524\n", "Year 1998: Total 'inc' sum = 785055\n", "Year 1999: Total 'inc' sum = 670819\n", "Year 2000: Total 'inc' sum = 656975\n", "Year 2001: Total 'inc' sum = 569744\n", "Year 2002: Total 'inc' sum = 612129\n", "Year 2003: Total 'inc' sum = 703899\n", "Year 2004: Total 'inc' sum = 850765\n", "Year 2005: Total 'inc' sum = 672315\n", "Year 2006: Total 'inc' sum = 574493\n", "Year 2007: Total 'inc' sum = 798899\n", "Year 2008: Total 'inc' sum = 766066\n", "Year 2009: Total 'inc' sum = 875937\n", "Year 2010: Total 'inc' sum = 794691\n", "Year 2011: Total 'inc' sum = 655146\n", "Year 2012: Total 'inc' sum = 720814\n", "Year 2013: Total 'inc' sum = 680822\n", "Year 2014: Total 'inc' sum = 664026\n", "Year 2015: Total 'inc' sum = 649742\n", "Year 2016: Total 'inc' sum = 744389\n", "Year 2017: Total 'inc' sum = 564245\n", "Year 2018: Total 'inc' sum = 577351\n", "Year 2019: Total 'inc' sum = 550709\n", "Year 2020: Total 'inc' sum = 229985\n", "Year 2021: Total 'inc' sum = 441771\n", "Year 2022: Total 'inc' sum = 582916\n", "Year 2023: Total 'inc' sum = 382480\n" ] } ], "source": [ "import pandas as pd\n", "\n", "# Initialize a dictionary to store annual sums\n", "annual_sums = {}\n", "\n", "# Iterate over each year's first week of August\n", "for week_start in first_august_week:\n", " year = week_start.year\n", "\n", " year_data = sorted_data[str(year)]\n", " annual_sum = year_data['inc'].sum()\n", " \n", " # Store the annual sum in the dictionary\n", " annual_sums[year] = annual_sum\n", "\n", "# Print annual sums\n", "for year, total_inc in annual_sums.items():\n", " print(f\"Year {year}: Total 'inc' sum = {total_inc}\")\n" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "875937\n", "229985\n" ] } ], "source": [ "max_sum = max(annual_sums.values())\n", "min_sum = min(annual_sums.values())\n", "print(max_sum)\n", "print(min_sum)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.4" } }, "nbformat": 4, "nbformat_minor": 2 }