diff --git a/module3/exo2/exercice.ipynb b/module3/exo2/exercice.ipynb index 0bbbe371b01e359e381e43239412d77bf53fb1fb..b15796cf468621428c81ffbab6b71745980e47c6 100644 --- a/module3/exo2/exercice.ipynb +++ b/module3/exo2/exercice.ipynb @@ -1,5 +1,1045 @@ { - "cells": [], + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Analyse des épidémoies de varicelle" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from matplotlib import pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
weekindicatorincinc_lowinc_upinc100inc100_lowinc100_upgeo_inseegeo_name
02024507753243841068011616FRFrance
120244976015357684549513FRFrance
220244874189145469246210FRFrance
3202447719317263136315FRFrance
4202446722608633657315FRFrance
52024457271312164210426FRFrance
6202444721356763594315FRFrance
7202443721246413607315FRFrance
82024427262112463996426FRFrance
9202441720353813689315FRFrance
10202440721257253525315FRFrance
112024397289813334463426FRFrance
12202438775101513102FRFrance
132024377916281804102FRFrance
14202436722358703600315FRFrance
15202435716202852955204FRFrance
16202434725606224498417FRFrance
17202433719715363406315FRFrance
1820243274399194468547311FRFrance
1920243174500221367877410FRFrance
20202430770044278973011715FRFrance
2120242979270630312237141018FRFrance
2220242879364649812230141018FRFrance
23202427710247709013404151020FRFrance
242024267143681039918337221628FRFrance
25202425711174803914309171222FRFrance
26202424712621935715885191424FRFrance
272024237146571133917975221727FRFrance
28202422711628836114895171222FRFrance
2920242179701685112551151119FRFrance
.................................
17461991267176081130423912312042FRFrance
17471991257161691070021638281838FRFrance
17481991247161711007122271281739FRFrance
1749199123711947767116223211329FRFrance
1750199122715452995320951271737FRFrance
1751199121714903897520831261636FRFrance
17521991207190531274225364342345FRFrance
17531991197167391124622232291939FRFrance
17541991187213851388228888382551FRFrance
1755199117713462887718047241632FRFrance
17561991167148571006819646261834FRFrance
1757199115713975978118169251832FRFrance
1758199114712265768416846221430FRFrance
175919911379567604113093171123FRFrance
1760199112710864733114397191325FRFrance
17611991117155741118419964271935FRFrance
17621991107166431137221914292038FRFrance
1763199109713741878018702241533FRFrance
1764199108713289881317765231531FRFrance
1765199107712337807716597221529FRFrance
1766199106710877701314741191226FRFrance
1767199105710442654414340181125FRFrance
17681991047791345631126314820FRFrance
17691991037153871048420290271836FRFrance
17701991027162771104621508292038FRFrance
17711991017155651027120859271836FRFrance
17721990527193751329525455342345FRFrance
17731990517190801380724353342543FRFrance
1774199050711079666015498201228FRFrance
17751990497114302610205FRFrance
\n", + "

1776 rows × 10 columns

\n", + "
" + ], + "text/plain": [ + " week indicator inc inc_low inc_up inc100 inc100_low \\\n", + "0 202450 7 7532 4384 10680 11 6 \n", + "1 202449 7 6015 3576 8454 9 5 \n", + "2 202448 7 4189 1454 6924 6 2 \n", + "3 202447 7 1931 726 3136 3 1 \n", + "4 202446 7 2260 863 3657 3 1 \n", + "5 202445 7 2713 1216 4210 4 2 \n", + "6 202444 7 2135 676 3594 3 1 \n", + "7 202443 7 2124 641 3607 3 1 \n", + "8 202442 7 2621 1246 3996 4 2 \n", + "9 202441 7 2035 381 3689 3 1 \n", + "10 202440 7 2125 725 3525 3 1 \n", + "11 202439 7 2898 1333 4463 4 2 \n", + "12 202438 7 751 0 1513 1 0 \n", + "13 202437 7 916 28 1804 1 0 \n", + "14 202436 7 2235 870 3600 3 1 \n", + "15 202435 7 1620 285 2955 2 0 \n", + "16 202434 7 2560 622 4498 4 1 \n", + "17 202433 7 1971 536 3406 3 1 \n", + "18 202432 7 4399 1944 6854 7 3 \n", + "19 202431 7 4500 2213 6787 7 4 \n", + "20 202430 7 7004 4278 9730 11 7 \n", + "21 202429 7 9270 6303 12237 14 10 \n", + "22 202428 7 9364 6498 12230 14 10 \n", + "23 202427 7 10247 7090 13404 15 10 \n", + "24 202426 7 14368 10399 18337 22 16 \n", + "25 202425 7 11174 8039 14309 17 12 \n", + "26 202424 7 12621 9357 15885 19 14 \n", + "27 202423 7 14657 11339 17975 22 17 \n", + "28 202422 7 11628 8361 14895 17 12 \n", + "29 202421 7 9701 6851 12551 15 11 \n", + "... ... ... ... ... ... ... ... \n", + "1746 199126 7 17608 11304 23912 31 20 \n", + "1747 199125 7 16169 10700 21638 28 18 \n", + "1748 199124 7 16171 10071 22271 28 17 \n", + "1749 199123 7 11947 7671 16223 21 13 \n", + "1750 199122 7 15452 9953 20951 27 17 \n", + "1751 199121 7 14903 8975 20831 26 16 \n", + "1752 199120 7 19053 12742 25364 34 23 \n", + "1753 199119 7 16739 11246 22232 29 19 \n", + "1754 199118 7 21385 13882 28888 38 25 \n", + "1755 199117 7 13462 8877 18047 24 16 \n", + "1756 199116 7 14857 10068 19646 26 18 \n", + "1757 199115 7 13975 9781 18169 25 18 \n", + "1758 199114 7 12265 7684 16846 22 14 \n", + "1759 199113 7 9567 6041 13093 17 11 \n", + "1760 199112 7 10864 7331 14397 19 13 \n", + "1761 199111 7 15574 11184 19964 27 19 \n", + "1762 199110 7 16643 11372 21914 29 20 \n", + "1763 199109 7 13741 8780 18702 24 15 \n", + "1764 199108 7 13289 8813 17765 23 15 \n", + "1765 199107 7 12337 8077 16597 22 15 \n", + "1766 199106 7 10877 7013 14741 19 12 \n", + "1767 199105 7 10442 6544 14340 18 11 \n", + "1768 199104 7 7913 4563 11263 14 8 \n", + "1769 199103 7 15387 10484 20290 27 18 \n", + "1770 199102 7 16277 11046 21508 29 20 \n", + "1771 199101 7 15565 10271 20859 27 18 \n", + "1772 199052 7 19375 13295 25455 34 23 \n", + "1773 199051 7 19080 13807 24353 34 25 \n", + "1774 199050 7 11079 6660 15498 20 12 \n", + "1775 199049 7 1143 0 2610 2 0 \n", + "\n", + " inc100_up geo_insee geo_name \n", + "0 16 FR France \n", + "1 13 FR France \n", + "2 10 FR France \n", + "3 5 FR France \n", + "4 5 FR France \n", + "5 6 FR France \n", + "6 5 FR France \n", + "7 5 FR France \n", + "8 6 FR France \n", + "9 5 FR France \n", + "10 5 FR France \n", + "11 6 FR France \n", + "12 2 FR France \n", + "13 2 FR France \n", + "14 5 FR France \n", + "15 4 FR France \n", + "16 7 FR France \n", + "17 5 FR France \n", + "18 11 FR France \n", + "19 10 FR France \n", + "20 15 FR France \n", + "21 18 FR France \n", + "22 18 FR France \n", + "23 20 FR France \n", + "24 28 FR France \n", + "25 22 FR France \n", + "26 24 FR France \n", + "27 27 FR France \n", + "28 22 FR France \n", + "29 19 FR France \n", + "... ... ... ... \n", + "1746 42 FR France \n", + "1747 38 FR France \n", + "1748 39 FR France \n", + "1749 29 FR France \n", + "1750 37 FR France \n", + "1751 36 FR France \n", + "1752 45 FR France \n", + "1753 39 FR France \n", + "1754 51 FR France \n", + "1755 32 FR France \n", + "1756 34 FR France \n", + "1757 32 FR France \n", + "1758 30 FR France \n", + "1759 23 FR France \n", + "1760 25 FR France \n", + "1761 35 FR France \n", + "1762 38 FR France \n", + "1763 33 FR France \n", + "1764 31 FR France \n", + "1765 29 FR France \n", + "1766 26 FR France \n", + "1767 25 FR France \n", + "1768 20 FR France \n", + "1769 36 FR France \n", + "1770 38 FR France \n", + "1771 36 FR France \n", + "1772 45 FR France \n", + "1773 43 FR France \n", + "1774 28 FR France \n", + "1775 5 FR France \n", + "\n", + "[1776 rows x 10 columns]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "raw_data=pd.read_csv('inc-7-PAY.csv',skiprows=1)\n", + "raw_data" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 1776 entries, 0 to 1775\n", + "Data columns (total 10 columns):\n", + "week 1776 non-null int64\n", + "indicator 1776 non-null int64\n", + "inc 1776 non-null int64\n", + "inc_low 1776 non-null int64\n", + "inc_up 1776 non-null int64\n", + "inc100 1776 non-null int64\n", + "inc100_low 1776 non-null int64\n", + "inc100_up 1776 non-null int64\n", + "geo_insee 1776 non-null object\n", + "geo_name 1776 non-null object\n", + "dtypes: int64(8), object(2)\n", + "memory usage: 138.8+ KB\n" + ] + } + ], + "source": [ + "raw_data.info()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "On voit ici qu'il n'y a aucune données manquantes entre fin 1990 et fin 2024, et toutes les données numériques sont au format `int`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], "metadata": { "kernelspec": { "display_name": "Python 3", @@ -16,10 +1056,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.3" + "version": "3.6.4" } }, "nbformat": 4, "nbformat_minor": 2 } - diff --git a/module3/exo3/exercice_fr.ipynb b/module3/exo3/exercice_fr.ipynb index 0bbbe371b01e359e381e43239412d77bf53fb1fb..1ff3ad270886dad506371bf28fd5835fd77530c5 100644 --- a/module3/exo3/exercice_fr.ipynb +++ b/module3/exo3/exercice_fr.ipynb @@ -1,6 +1,2242 @@ { - "cells": [], + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "hideCode": false, + "hidePrompt": true + }, + "source": [ + "# Autour du paradoxe de Simpson" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "hideCode": false, + "hidePrompt": true + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "hideCode": false, + "hidePrompt": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
SmokerStatusAge
0YesAlive21.0
1YesAlive19.3
2NoDead57.5
3NoAlive47.1
4YesAlive81.4
5NoAlive36.8
6NoAlive23.8
7YesDead57.5
8YesAlive24.8
9YesAlive49.5
10YesAlive30.0
11NoDead66.0
12YesAlive49.2
13NoAlive58.4
14NoDead60.6
15NoAlive25.1
16NoAlive43.5
17NoAlive27.1
18NoAlive58.3
19YesAlive65.7
20NoDead73.2
21YesAlive38.3
22NoAlive33.4
23YesDead62.3
24NoAlive18.0
25NoAlive56.2
26YesAlive59.2
27NoAlive25.8
28NoDead36.9
29NoAlive20.2
............
1284YesDead36.0
1285YesAlive48.3
1286NoAlive63.1
1287NoAlive60.8
1288YesDead39.3
1289NoAlive36.7
1290NoAlive63.8
1291NoDead71.3
1292NoAlive57.7
1293NoAlive63.2
1294NoAlive46.6
1295YesDead82.4
1296YesAlive38.3
1297YesAlive32.7
1298NoAlive39.7
1299YesDead60.0
1300NoDead71.0
1301NoAlive20.5
1302NoAlive44.4
1303YesAlive31.2
1304YesAlive47.8
1305YesAlive60.9
1306NoDead61.4
1307YesAlive43.0
1308NoAlive42.1
1309YesAlive35.9
1310NoAlive22.3
1311YesDead62.1
1312NoDead88.6
1313NoAlive39.1
\n", + "

1314 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " Smoker Status Age\n", + "0 Yes Alive 21.0\n", + "1 Yes Alive 19.3\n", + "2 No Dead 57.5\n", + "3 No Alive 47.1\n", + "4 Yes Alive 81.4\n", + "5 No Alive 36.8\n", + "6 No Alive 23.8\n", + "7 Yes Dead 57.5\n", + "8 Yes Alive 24.8\n", + "9 Yes Alive 49.5\n", + "10 Yes Alive 30.0\n", + "11 No Dead 66.0\n", + "12 Yes Alive 49.2\n", + "13 No Alive 58.4\n", + "14 No Dead 60.6\n", + "15 No Alive 25.1\n", + "16 No Alive 43.5\n", + "17 No Alive 27.1\n", + "18 No Alive 58.3\n", + "19 Yes Alive 65.7\n", + "20 No Dead 73.2\n", + "21 Yes Alive 38.3\n", + "22 No Alive 33.4\n", + "23 Yes Dead 62.3\n", + "24 No Alive 18.0\n", + "25 No Alive 56.2\n", + "26 Yes Alive 59.2\n", + "27 No Alive 25.8\n", + "28 No Dead 36.9\n", + "29 No Alive 20.2\n", + "... ... ... ...\n", + "1284 Yes Dead 36.0\n", + "1285 Yes Alive 48.3\n", + "1286 No Alive 63.1\n", + "1287 No Alive 60.8\n", + "1288 Yes Dead 39.3\n", + "1289 No Alive 36.7\n", + "1290 No Alive 63.8\n", + "1291 No Dead 71.3\n", + "1292 No Alive 57.7\n", + "1293 No Alive 63.2\n", + "1294 No Alive 46.6\n", + "1295 Yes Dead 82.4\n", + "1296 Yes Alive 38.3\n", + "1297 Yes Alive 32.7\n", + "1298 No Alive 39.7\n", + "1299 Yes Dead 60.0\n", + "1300 No Dead 71.0\n", + "1301 No Alive 20.5\n", + "1302 No Alive 44.4\n", + "1303 Yes Alive 31.2\n", + "1304 Yes Alive 47.8\n", + "1305 Yes Alive 60.9\n", + "1306 No Dead 61.4\n", + "1307 Yes Alive 43.0\n", + "1308 No Alive 42.1\n", + "1309 Yes Alive 35.9\n", + "1310 No Alive 22.3\n", + "1311 Yes Dead 62.1\n", + "1312 No Dead 88.6\n", + "1313 No Alive 39.1\n", + "\n", + "[1314 rows x 3 columns]" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "donnees = pd.read_csv('Subject6_smoking.csv')\n", + "donnees" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "hideCode": false, + "hidePrompt": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 1314 entries, 0 to 1313\n", + "Data columns (total 3 columns):\n", + "Smoker 1314 non-null object\n", + "Status 1314 non-null object\n", + "Age 1314 non-null float64\n", + "dtypes: float64(1), object(2)\n", + "memory usage: 30.9+ KB\n" + ] + } + ], + "source": [ + "donnees.info()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hideCode": false, + "hidePrompt": true + }, + "source": [ + "Pour faire un peu de statistiques sur ces données, on peut représenter `Yes` par `1` et `No`par `0`, et `Alive`par `1`et `Dead` par `0`. On va en fait procéder autrement mais c'était ma première approche naïve, je laisse donc mon code initial ainsi que les résultats obtenus. Au lieu d'utiliser la methode `apply` j'aurais également pu utiliser la méthode `replace`." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hideCode": false, + "hidePrompt": true + }, + "source": [ + "``` \n", + "def convert(x):\n", + " if (x=='Yes') | (x==\"Alive\"):\n", + " return 1\n", + " elif (x=='No') | (x=='Dead'):\n", + " return 0\n", + "\n", + "donnees['Smoker'] = donnees['Smoker'].apply(convert)\n", + "donnees['Status'] = donnees['Status'].apply(convert)\n", + "donnees[['Smoker','Status']].sum()\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hideCode": false, + "hidePrompt": true + }, + "source": [ + "Parmi les 1314 femmes sondées, il y a donc 582 fumeuses, et 945 des femmes (fumeuses et non fumeuses) sont encore vivantes 20 ans après." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hideCode": false, + "hidePrompt": true + }, + "source": [ + "Ré-obtenons ces informations à l'aide des méthodes de regroupement." + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": { + "hideCode": false, + "hidePrompt": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "No 732\n", + "Yes 582\n", + "Name: Smoker, dtype: int64" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "donnees['Smoker'].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": { + "hideCode": false, + "hidePrompt": true, + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Alive 945\n", + "Dead 369\n", + "Name: Status, dtype: int64" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "donnees['Status'].value_counts()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hideCode": false, + "hidePrompt": true + }, + "source": [ + "Maintenant regardons les infos jointes, pour essayer de comprendre les dépendances. Pour ça, on pourrait écrire \n", + "```\n", + "donnees[['Smoker,'Status']].value_counts()\n", + "```\n", + "pour avoir le tableau souhaité, mais ce notebook jupyter utilise une version non à jour de panda où on ne peut pas utiliser `value_counts` sur un `dataframe`, donc on ruse." + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": { + "hideCode": false, + "hidePrompt": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Total
SmokerStatus
NoAlive502
Dead230
YesAlive443
Dead139
\n", + "
" + ], + "text/plain": [ + " Total\n", + "Smoker Status \n", + "No Alive 502\n", + " Dead 230\n", + "Yes Alive 443\n", + " Dead 139" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tableau = donnees.groupby(['Smoker','Status']).count()\n", + "tableau = tableau.rename(columns={'Age':'Total'})\n", + "tableau" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": { + "hideCode": false, + "hidePrompt": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "MultiIndex: 4 entries, (No, Alive) to (Yes, Dead)\n", + "Data columns (total 1 columns):\n", + "Total 4 non-null int64\n", + "dtypes: int64(1)\n", + "memory usage: 238.0+ bytes\n" + ] + } + ], + "source": [ + "tableau.info()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Un autre moyen d'obtenir le même tableau (toujours avec le même souci de nom)." + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Age
SmokerStatus
NoAlive502
Dead230
YesAlive443
Dead139
All1314
\n", + "
" + ], + "text/plain": [ + " Age\n", + "Smoker Status \n", + "No Alive 502\n", + " Dead 230\n", + "Yes Alive 443\n", + " Dead 139\n", + "All 1314" + ] + }, + "execution_count": 55, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "donnees.pivot_table(index = ['Smoker','Status'], aggfunc='count', margins=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Une manière plus agréable visuellement d'avoir les mêmes données, et en supprimpant le problème de nom :" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
StatusAliveDeadAll
Smoker
No502230732
Yes443139582
All9453691314
\n", + "
" + ], + "text/plain": [ + "Status Alive Dead All\n", + "Smoker \n", + "No 502 230 732\n", + "Yes 443 139 582\n", + "All 945 369 1314" + ] + }, + "execution_count": 70, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "table2 = donnees.pivot_table(index = 'Smoker',values='Age', columns='Status', aggfunc='count', margins=True) \n", + "table2" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Index: 3 entries, No to All\n", + "Data columns (total 3 columns):\n", + "Alive 3 non-null int64\n", + "Dead 3 non-null int64\n", + "All 3 non-null int64\n", + "dtypes: int64(3)\n", + "memory usage: 96.0+ bytes\n" + ] + } + ], + "source": [ + "table2.info()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Remarquons que si on ne précise pas `values='Age'`, on obtient quelque chose de proche mais avec un souci de nom à nouveau." + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Age
StatusAliveDeadAll
Smoker
No502230732
Yes443139582
All9453691314
\n", + "
" + ], + "text/plain": [ + " Age \n", + "Status Alive Dead All\n", + "Smoker \n", + "No 502 230 732\n", + "Yes 443 139 582\n", + "All 945 369 1314" + ] + }, + "execution_count": 68, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "table3 = donnees.pivot_table(index = 'Smoker', columns='Status', aggfunc='count', margins=True) \n", + "table3" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Index: 3 entries, No to All\n", + "Data columns (total 3 columns):\n", + "(Age, Alive) 3 non-null int64\n", + "(Age, Dead) 3 non-null int64\n", + "(Age, All) 3 non-null int64\n", + "dtypes: int64(3)\n", + "memory usage: 96.0+ bytes\n" + ] + } + ], + "source": [ + "table3.info()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hideCode": false, + "hidePrompt": true + }, + "source": [ + "Evaluons maintenant le taux de mortalité selon si on fume ou non." + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
StatusAliveDeadAllratio_deces
Smoker
No5022307320.314208
Yes4431395820.238832
All94536913140.280822
\n", + "
" + ], + "text/plain": [ + "Status Alive Dead All ratio_deces\n", + "Smoker \n", + "No 502 230 732 0.314208\n", + "Yes 443 139 582 0.238832\n", + "All 945 369 1314 0.280822" + ] + }, + "execution_count": 73, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "table2['ratio_deces'] = table2['Dead']/table2['All']\n", + "table2" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "On observe donc que les fumeuses ont eu un taux de décès plus faible !\n", + "\n", + "Le problème avec notre étude, c'est que l'âge (qui est clairement un facteur dans la mort des individus) des participants n'a pas été pris en compte, or le fait de fumer (ou non) pour une femme dans les années 70 est corrélé avec l'âge et nos groupes de sont pas équivalent du point de vue de l'âge. On peut facilement le vérifier ici :" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
StatusAliveDeadAll
Smoker
No40.34741070.48173949.815847
Yes39.64898458.99640344.269759
All40.02000066.15528547.359361
\n", + "
" + ], + "text/plain": [ + "Status Alive Dead All\n", + "Smoker \n", + "No 40.347410 70.481739 49.815847\n", + "Yes 39.648984 58.996403 44.269759\n", + "All 40.020000 66.155285 47.359361" + ] + }, + "execution_count": 72, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "donnees.pivot_table(index = 'Smoker',values='Age', columns='Status', aggfunc='mean', margins=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Ajoutons des tranches d'ages pour prendre en compte cette composante." + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": {}, + "outputs": [], + "source": [ + "def classifie(x):\n", + " if x <= 34:\n", + " return '18-34 ans'\n", + " elif x <= 54:\n", + " return '35-54 ans'\n", + " elif x <= 65:\n", + " return '55-65 ans'\n", + " else:\n", + " return '>65 ans'\n", + " \n", + "donnees['tranche']=donnees['Age'].apply(classifie)" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
SmokerStatusAgetranche
0YesAlive21.018-34 ans
1YesAlive19.318-34 ans
2NoDead57.555-65 ans
3NoAlive47.135-54 ans
4YesAlive81.4>65 ans
5NoAlive36.835-54 ans
6NoAlive23.818-34 ans
7YesDead57.555-65 ans
8YesAlive24.818-34 ans
9YesAlive49.535-54 ans
10YesAlive30.018-34 ans
11NoDead66.0>65 ans
12YesAlive49.235-54 ans
13NoAlive58.455-65 ans
14NoDead60.655-65 ans
15NoAlive25.118-34 ans
16NoAlive43.535-54 ans
17NoAlive27.118-34 ans
18NoAlive58.355-65 ans
19YesAlive65.7>65 ans
20NoDead73.2>65 ans
21YesAlive38.335-54 ans
22NoAlive33.418-34 ans
23YesDead62.355-65 ans
24NoAlive18.018-34 ans
25NoAlive56.255-65 ans
26YesAlive59.255-65 ans
27NoAlive25.818-34 ans
28NoDead36.935-54 ans
29NoAlive20.218-34 ans
...............
1284YesDead36.035-54 ans
1285YesAlive48.335-54 ans
1286NoAlive63.155-65 ans
1287NoAlive60.855-65 ans
1288YesDead39.335-54 ans
1289NoAlive36.735-54 ans
1290NoAlive63.855-65 ans
1291NoDead71.3>65 ans
1292NoAlive57.755-65 ans
1293NoAlive63.255-65 ans
1294NoAlive46.635-54 ans
1295YesDead82.4>65 ans
1296YesAlive38.335-54 ans
1297YesAlive32.718-34 ans
1298NoAlive39.735-54 ans
1299YesDead60.055-65 ans
1300NoDead71.0>65 ans
1301NoAlive20.518-34 ans
1302NoAlive44.435-54 ans
1303YesAlive31.218-34 ans
1304YesAlive47.835-54 ans
1305YesAlive60.955-65 ans
1306NoDead61.455-65 ans
1307YesAlive43.035-54 ans
1308NoAlive42.135-54 ans
1309YesAlive35.935-54 ans
1310NoAlive22.318-34 ans
1311YesDead62.155-65 ans
1312NoDead88.6>65 ans
1313NoAlive39.135-54 ans
\n", + "

1314 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " Smoker Status Age tranche\n", + "0 Yes Alive 21.0 18-34 ans\n", + "1 Yes Alive 19.3 18-34 ans\n", + "2 No Dead 57.5 55-65 ans\n", + "3 No Alive 47.1 35-54 ans\n", + "4 Yes Alive 81.4 >65 ans\n", + "5 No Alive 36.8 35-54 ans\n", + "6 No Alive 23.8 18-34 ans\n", + "7 Yes Dead 57.5 55-65 ans\n", + "8 Yes Alive 24.8 18-34 ans\n", + "9 Yes Alive 49.5 35-54 ans\n", + "10 Yes Alive 30.0 18-34 ans\n", + "11 No Dead 66.0 >65 ans\n", + "12 Yes Alive 49.2 35-54 ans\n", + "13 No Alive 58.4 55-65 ans\n", + "14 No Dead 60.6 55-65 ans\n", + "15 No Alive 25.1 18-34 ans\n", + "16 No Alive 43.5 35-54 ans\n", + "17 No Alive 27.1 18-34 ans\n", + "18 No Alive 58.3 55-65 ans\n", + "19 Yes Alive 65.7 >65 ans\n", + "20 No Dead 73.2 >65 ans\n", + "21 Yes Alive 38.3 35-54 ans\n", + "22 No Alive 33.4 18-34 ans\n", + "23 Yes Dead 62.3 55-65 ans\n", + "24 No Alive 18.0 18-34 ans\n", + "25 No Alive 56.2 55-65 ans\n", + "26 Yes Alive 59.2 55-65 ans\n", + "27 No Alive 25.8 18-34 ans\n", + "28 No Dead 36.9 35-54 ans\n", + "29 No Alive 20.2 18-34 ans\n", + "... ... ... ... ...\n", + "1284 Yes Dead 36.0 35-54 ans\n", + "1285 Yes Alive 48.3 35-54 ans\n", + "1286 No Alive 63.1 55-65 ans\n", + "1287 No Alive 60.8 55-65 ans\n", + "1288 Yes Dead 39.3 35-54 ans\n", + "1289 No Alive 36.7 35-54 ans\n", + "1290 No Alive 63.8 55-65 ans\n", + "1291 No Dead 71.3 >65 ans\n", + "1292 No Alive 57.7 55-65 ans\n", + "1293 No Alive 63.2 55-65 ans\n", + "1294 No Alive 46.6 35-54 ans\n", + "1295 Yes Dead 82.4 >65 ans\n", + "1296 Yes Alive 38.3 35-54 ans\n", + "1297 Yes Alive 32.7 18-34 ans\n", + "1298 No Alive 39.7 35-54 ans\n", + "1299 Yes Dead 60.0 55-65 ans\n", + "1300 No Dead 71.0 >65 ans\n", + "1301 No Alive 20.5 18-34 ans\n", + "1302 No Alive 44.4 35-54 ans\n", + "1303 Yes Alive 31.2 18-34 ans\n", + "1304 Yes Alive 47.8 35-54 ans\n", + "1305 Yes Alive 60.9 55-65 ans\n", + "1306 No Dead 61.4 55-65 ans\n", + "1307 Yes Alive 43.0 35-54 ans\n", + "1308 No Alive 42.1 35-54 ans\n", + "1309 Yes Alive 35.9 35-54 ans\n", + "1310 No Alive 22.3 18-34 ans\n", + "1311 Yes Dead 62.1 55-65 ans\n", + "1312 No Dead 88.6 >65 ans\n", + "1313 No Alive 39.1 35-54 ans\n", + "\n", + "[1314 rows x 4 columns]" + ] + }, + "execution_count": 75, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "donnees" + ] + }, + { + "cell_type": "code", + "execution_count": 84, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
StatusAliveDeadAllratio_deces
Smoker
No22162270.026432
Yes18271890.037037
All403134160.031250
\n", + "
" + ], + "text/plain": [ + "Status Alive Dead All ratio_deces\n", + "Smoker \n", + "No 221 6 227 0.026432\n", + "Yes 182 7 189 0.037037\n", + "All 403 13 416 0.031250" + ] + }, + "execution_count": 84, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "table18_34 = pd.pivot_table(donnees[donnees['Age']<35],index = 'Smoker',values='Age', columns='Status', aggfunc='count', margins=True)\n", + "table18_34['ratio_deces'] = table18_34['Dead']/table18_34['All']\n", + "table18_34" + ] + }, + { + "cell_type": "code", + "execution_count": 86, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
StatusAliveDeadAllratio_deces
Smoker
No172191910.099476
Yes190392290.170306
All362584200.138095
\n", + "
" + ], + "text/plain": [ + "Status Alive Dead All ratio_deces\n", + "Smoker \n", + "No 172 19 191 0.099476\n", + "Yes 190 39 229 0.170306\n", + "All 362 58 420 0.138095" + ] + }, + "execution_count": 86, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "table35_54 = pd.pivot_table(donnees[(donnees['Age']>=35) & (donnees['Age']<55)],index = 'Smoker',values='Age', columns='Status', aggfunc='count', margins=True)\n", + "table35_54['ratio_deces'] = table35_54['Dead']/table35_54['All']\n", + "table35_54" + ] + }, + { + "cell_type": "code", + "execution_count": 87, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
StatusAliveDeadAllratio_deces
Smoker
No82491310.374046
Yes65531180.449153
All1471022490.409639
\n", + "
" + ], + "text/plain": [ + "Status Alive Dead All ratio_deces\n", + "Smoker \n", + "No 82 49 131 0.374046\n", + "Yes 65 53 118 0.449153\n", + "All 147 102 249 0.409639" + ] + }, + "execution_count": 87, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "table55_65 = pd.pivot_table(donnees[(donnees['Age']>=55) & (donnees['Age']<66)],index = 'Smoker',values='Age', columns='Status', aggfunc='count', margins=True)\n", + "table55_65['ratio_deces'] = table55_65['Dead']/table55_65['All']\n", + "table55_65" + ] + }, + { + "cell_type": "code", + "execution_count": 88, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
StatusAliveDeadAllratio_deces
Smoker
No271651920.859375
Yes742490.857143
All342072410.858921
\n", + "
" + ], + "text/plain": [ + "Status Alive Dead All ratio_deces\n", + "Smoker \n", + "No 27 165 192 0.859375\n", + "Yes 7 42 49 0.857143\n", + "All 34 207 241 0.858921" + ] + }, + "execution_count": 88, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "table65plus = pd.pivot_table(donnees[(donnees['Age']>65)],index = 'Smoker',values='Age', columns='Status', aggfunc='count', margins=True)\n", + "table65plus['ratio_deces'] = table65plus['Dead']/table65plus['All']\n", + "table65plus" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "On remarque que dans chaque tranche d'age, les fumeuses ont un taux de décès plus fort. La tendance s'inverse quand on ne regarde plus l'âge car dans nos données les fumeuses sont plutôt plus jeunes que les non fumeuses." + ] + }, + { + "cell_type": "code", + "execution_count": 98, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 98, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "copy = donnees.copy()\n", + "copy['Smoker']=donnees['Smoker'].replace({'Yes':1, 'No':0})\n", + "copy.plot.scatter(x='Smoker', y='Age', figsize=(10,15),s=1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Le graphe si dessus n'est pas si parlant, on peut sinon simplement calculer la proportion de fumeuses par tranche d'age et voir que cette proportion s'effondre pour les personnes de plus de 65 ans." + ] + }, + { + "cell_type": "code", + "execution_count": 99, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "tranche\n", + "18-34 ans 0.452500\n", + "35-54 ans 0.543578\n", + "55-65 ans 0.485232\n", + ">65 ans 0.203320\n", + "Name: Smoker, dtype: float64" + ] + }, + "execution_count": 99, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "copy.groupby('tranche')['Smoker'].mean()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], "metadata": { + "hide_code_all_hidden": true, "kernelspec": { "display_name": "Python 3", "language": "python", @@ -16,10 +2252,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.3" + "version": "3.6.4" } }, "nbformat": 4, "nbformat_minor": 2 } -