From 5b31e1f821de4d3483d1ca7d55b989c60aa5c965 Mon Sep 17 00:00:00 2001 From: 1f2b5799c7e6329c0238867cf93010b4 <1f2b5799c7e6329c0238867cf93010b4@app-learninglab.inria.fr> Date: Tue, 26 Sep 2023 22:38:57 +0000 Subject: [PATCH] done --- module3/exo3/correlation.png | Bin 0 -> 17400 bytes module3/exo3/exercice.ipynb | 378 ++++++++++++++--------------------- 2 files changed, 146 insertions(+), 232 deletions(-) create mode 100644 module3/exo3/correlation.png diff --git a/module3/exo3/correlation.png b/module3/exo3/correlation.png new file mode 100644 index 0000000000000000000000000000000000000000..dd0baa2a774171d41ce4bd08a431cfe041f27ae7 GIT binary patch literal 17400 zcmV)iK%&2iP)U8P*7-ZbZ>KLZ*U+lnSp_Ufq@}0xwybFAi#%#fq@|}KQEO56)-X|e7nZL z$iTqBa9P*U#mSX{G{Bl%P*lRez;J+pfx##xwK$o9f#C}S14DXwNkIt%17i#W1A|CX zc0maP17iUL1A|C*NRTrF17iyV0~1e4YDEbH0|SF|enDkXW_m`6f}y3QrGjHhep0GJ zaAk2xYHqQDXI^rCQ9*uDVo7QW0|Nup4h9AW240u^5(W3f%sd4n162kpgNVo|1qcff zJ_s=cNG>fZg9jx8g8+j9g8_pBLjXe}Lp{R+hNBE`7{wV~7)u#fFy3PlV+vxLz;uCG zm^qSpA@ds+OO_6nTdaDlt*rOhEZL^9ePa)2-_4=K(Z%tFGm-NGmm}8}ZcXk5JW@PU zd4+f<@d@)yL(o<5icqT158+-B6_LH7;i6x}CW#w~Uy-Pgl#@Irl`kzV zeL|*8R$ca%T%Wv){2zs_iiJvgN^h0dsuZZ2sQy$tsNSU!s;Q*;LF<6_B%M@UD?LHI zSNcZ`78uqV#TeU~$eS{ozBIdFzSClfs*^S+dw;4dus<{M;#|MXC)T}S9v!D zcV!QCPhBq)ZyO(X-(bH4|NMaZz==UigLj2o41F2S6d@OB6%`R(5i>J(Puzn9wnW{e zu;hl6HK{k#IWjCVGqdJqU(99Cv(K+6*i`tgSi2;vbXD1#3jNBGs$DgVwO(~o>mN4i zHPtkqZIx>)Y(Ls5-Br|mx>vQYvH$Kwn@O`L|D75??eGkZnfg$5<;Xeg_o%+-I&+-3%01W^SH2RkDT>t<81ZP1_K>z@;j(q!3lK=n! zAY({UO#lFTB>(_`g8%^e{{R4h=>PzAFaQARU;qF*m;eA5Z<1fdMgRanN=ZaPRCwC$ zon=5&>D$JyA%^Y{EIYBvcSYx@)fY z!Yg{}VORw}VdqVM#bjr!G zbkltj|Fzt!m{0D2xn(53*74aSJIirj`htL`{kEJ!eF{_^fQ3w6NRf zPwf9ERqj{u+F_YIrbFk(O+6blE3f-!KEFKJ?1S9@62e2?1%^tmYlCaU=C!JLl&tuc zOvt=LuO8<4SftKwi7RE>&ca{Pznovy zuWktgU6Dxk%>w8HI%2)jpA#lIHa?n4k|BLX*w2Imp*hcBD^Pkxx)PR>1JwYOzmhMz zM8-XPBi&2fwhk5DhRGhBB;i9!{_oG@?Sm`m13JA(GJR_xnfmtd~4! z*f(=4Nd}=eci!Yo=-nlmAbw@u+N*2%vtX?039^mv(QJlkvjvMxab1F@Az7U?xe?PGP;=4Azr8iPg6;k0gg+jV&xia9Ekl5hykanM0|0czvc)6n*I%!n#$q~EC&^bhcpCa1Ui%*&e5Y9Uxq-S_e>(+llJr5l z-snP-$1qa*V3(~&k||a3OuBwK+eLCdL|txhBu>b7a211j8(GON_dPi6oz+&1&eJAnA=26~r$t zcQZmaj+9OHB+10m`T-=_60vh}`82SHtZmTeiVGq0_8H5FoFiRCV_JeNq ztiSESDw0eo2Qdzl}%&P`@T2qu$A0~9*|=IuJ8SGS%5dCb;@y5HH{)lOApC zgB~p7$?o{H3?VK1##Q4qe${QgCoY}btX#dxdrnmmNhboB>Bd~>Afca^US5?XJP*M3pdWf7Mn?q&zjLQ<^QrxcmPvx ztsh?wBWTf&66GvoZA)ee;D>~FIjOW;J$UwLJ&(Hppe;$ZER`xe(2u0scEb%vyU!$< zE`T)cWi|tEbmBP03!;bU*5#uQo#$b2x|%eZiys&BVs;Q zb2H9nsm$g%=h*5VWTc_=ZEFGNSdwHL0J?YOw`&eS$E27}i8zI_&znX7N=?6OVe*0G zzX0ZtWOdKAShN=3^A4u8Hi#g}hIk$4-O95R)@HKYD@Zs;E&)z4B&TEg-f5S!pKs-g zyaoU;M%!dNl&Dl|xiXf?PWD;1?gGY;DG!b>%u2GfbGKquzY}(WMi|wLwa|$ z@JRB2bGh=U4zS9}K3&S}8>z`myGn;slNiL>gBpFexa^_pG zPngWTeF4!rlCuGzPmc5TB{H?SMpk<)EL1a0*9=(K*ddX8=8W4+k&t8Yqp*tU%>b{;Wn{av}b?iOyknos8cOhi%6m9qUyw@8E8?e$fDX zQKr0}YTA-o=q3_|J@`|Mmx&WgDp%i z-J2w9G@?u8TaPcU>}uSBCv{$s>3ty6b|eX(#=@fsJyoy~`3@M=%NuJf59uLB>>?LXvyWFupDT`@hJ?CIB37$#&r( zNN=--)MekjBzIT=IywMgFUK+|jSP(^$#n(~O9L?IdQrg~y9^N7H|^54&$3LRJbI8W z8by*ZzqKEHkz~r_i~stEMy4mN-*)W5yHt6hS;CBcEo+VOm*08Bs%XBI%Gcrdm=ei| zaA77qkxc&MOOihIQ9e{NKZzvMjCmrI7f1fsbNJp5xy<7WAS%^k{*Ay7afyoF|D-^9 zUr|cV-AIy^*w$kvJGo9xAbIQ1Yhj1ID(xW2_)HydMeTuGBS`Ym$o{LdTv5;0Px5Nt zdvs7hs8rDi-dx+EtY_CXPm=OHQ@`zezHjf1FoinF@=|l2;yyidIq;irpE++h$r{pXOhVg{`W3!`728L`{Y!?@)IBFFS{EN zF9qc>Ay~UC!+PqO49+W@EnV2&-{;x$$1;`Cr+olP#*Mr}ru@5M-R0XKDDz_->DFM- z6$wdZe7Le=K!<~m+ql}4nG&NO%d>VsS$UXgAA@2fBpJMN@RS?U`;+qF=B`z~3ildy zLC$8LkW=Rk8M2Q2w=w`Ue96nb6G>)_KF#FKy&6}rGj@KGv87(wPRnBzu{;2$*Am`K zOg-^qWk+3|VsB%eVen#06DKP}0C2DOiDb%Z=lbNA_)Li76Y4(Ws8W%+}^fjF2SXwi+2vlF89&(TNO=AoQ`mFte@lZx!fT0#)8AVA(}#fBq4Gj)z+-$qdxNgbuI} z88+`eYT*!EVCs&lfMGGgwrAKR`Th=)N1;8gb-%bfjQVpUnlwNzQ-~4u%O)7yD4lxqLdwKy2g$cwP4WoEY5{Lm$UI8;*t< z`LaxNwGjL{TgToRfS2S`ohq@O#a=y+Dqn#lH{;l&+CcZ450hTJ@^h*f8kNRz2w;E% z{z_jOydP+CNjhB|6QlLXZ4L*j?T|hkHR4I~dSw7sPe?MLWAV?*SWd%2;X_5iza3N3 zzAZ+uE~;%0`ncvG>%*JzOgJRiF!@Q`og`VM7{}lb;kyP1BI$<)B%_-G@OqIb+@k{| zlN-y$FDYkP558n#!V@bPp=t()VsISE^>C?)9wfOOcizJDrtq$JD=92M*U_yb0El!T zcEsZEtW!TYU>$FZ>vAD5gN!n6NRm8-(^;n7I2ZA+5egJrCPLvHvv zktAP{_7>NMDA-*AYobsL$#`t0-5=q9})Fj^yhHHJ;H0v8e$^w@HFkE$(lZ4=oyQq@H z;Ew432+c_z5>?<)EEcf6dW;&2#!Y!b(xYrTNxs7OVcA6n?y zJ|h?5DyO3Ew4Yn+mrLOq_>!E4$0*8*5bBd;Vi^F2zs0eI?x};*K9XF8 z(c;F`)TB~^q7kPgC&@=yl=;5aFSJS@CJW7GK*b@pDp^^r_;s7jJ?W#G}Gt_!{= zx8f;Dk9urn9!v6r&ZM8jiAV8UhN+>*9~ms-U${lGX#?pmezHzru)u3X>6>yv z!m$(Z7s+`L{QEDlgk(x(I9md!oWbx*(`&%?H21U;8*fs^^0E+iM=}r-NOBGgDt*7Y zyEN>31vZl;2gt-&#+%QU&dsOoT?7YLw55lHWO|v_`**w{S*w`T;W`)R$=40x^=8~n z6hGW$%DwT&W$+g8pPzl5AJ|PFDCixfDmp%tvYm z?vcC;z@g8aI?^bgn;1eepcD2fVVT-ce3m5tcC9Q>t!wd;8ljQ6=;A6H&q_uU2|^dh~ync#`!Ok7QvZ>G1HZL=b-m9Z=?JRt1_ zeb1@iF_@4$nY9P-55 zyjG&K2YFE>lD-T>b>kmk5lM2SD4e}Z3b$ks0El&A_9;w|wfGhb`R>UMQ;$#u`sGl} zA-yr0WYe;2ZZ2pj!_Csp<2N#`bPN9R8@bb0O z`|Euq8JRvyC!FLWtbGvhZlp{pYsLc^+lV z;Ch?vC=Ih$VZE$k_LTU5Tq+i<=0;_&Pi4vh>- z&g%L7j3zn(Bsm%H*#-V&vSTAA)qx1GxgV`xhvZC%?nx(@_wS{_^TY6-!=?E*i6>W$ z`=Q0y(Ovmb-23pc({~;pF|e=^0q$=U*?p4Rpg+=0!| zr3yUXll)uHpQNPH3XVa;b+g<}s>2M1!Yi^8LoToBZP0zFdB~NuzMd6?*)*e%g`hLWcz?{O1E6~{y-=krgj_3<M^ zer_xnP5L^-B-@OX>H#MhlijkHp3L~t!rALpu`1iB&Gb)-?Xd~HfijOtGUO9KO9MgU zZ}79tpCtGD@KHGwpAkip;X*8$e>~&70F=BC)2aMasX!Ct3;^zzX2TK?VJJy*jb7|O zuq=gUpINo~HaBeN@0l3#wZ}gG>qI!o17g6s#n4Qn|C=!*H^h@4o57&&>bxR&5(l&% zvH5*gfKj^iWAckzC$F5pn?|Nbsg`;x$MS(K#Qm>?@qmGj9lE9Q4u+HbTo0G;2zvC< zR*xCF#%t`VQ~v3UJb&-g+xuq)`7JS!OwGqlktU7azqglsD40D%55(oQ<!X1kH^!5Xh18%h@3&#S zU;n@NMJMKbyae8#w`>R|`RIJ|f7*YUdNAtCpWnB*g8zlMf^RJIVrllDVfj~~vrJcu zWs2+9#?tgZ!*aL!EZ?90ed;gc*Qyv9^soCpwOE$@#!?F`6Qwa9A$cV*BuoDoFs81a zpy*_VQ>WiqGh^1WV=ttR^lMoW;u*=L7mj^nS>!C+v=$`)HuXp{Fnb>7kJ7S6$DrHT zd;8`+-Cd)zd|bkqm`ZZ40j6dC$h$rRjVh+3J=-~ z7f$k}k>gb|A?WG;Y0Ee6I~)5Ujob?u&Ws@63K?eoalI&51{gzpoFp5Rm9RSi5lUWp zY9N@{(@OQo{K-YE$hO1nI8Bm~&9UJWEKc?XMsGmxbdn>D(oPQ_1-(wqF#W;5JC~CXjxAJmYvpo6PyEP# z(ShVuG~;fVOj&-3WP1y5Q|w<~bV#eULd@443aB=vl-cQPOnORil40^zCcQN0RTz{lcS1D-=_8^ zp9{qDlZj3Amt`66fhmG^6{y=hsk~n39>Fiu+$g-c)EjjN!+28H@iX}Z<=IC?8M+BoAFEwOxsmGrQQW{mOhU{pya$M-?Nq^KLc>v><3DrZ!;E34kFSOx4!LOdrSfU%DC6=E=bGZV4GZ4%7 z36_7(vssLXtXG}fgsd2yxyYVE|3Es&j)BzKKce_x94ucVf^}*LTLj~mg0W04i2cIa^^oSyO-CzMwf6NIpe7lG|{kuAVGRANcUx{n-3J$K2uF$QUp2Gb{uny-$WcegUh_YvcpBKWnZtClZd`a>ymX1e+ zH|A3sW5#RJIxw2+myO_4Fe;M#Qe%?BRblxev-r;iSPvc26EzPA;SRlZJ~B2nqB(rt z3zaGGJ$VscB-db{xo~9@^+sSbLu{QV%OZCRnPsf$7;*wm@5Rk!xJr@%;=R7O-ygWb zy3#jHl9JAL_{aCix~w~X0KO#OlqoeF4@icTZOlFfG0`s8r}VqX4syMKEQd?q{d~tm zf5tvNf9l5V7v#%4l>1{=@{d#8>022L4kO9Y&d2@cjKGC)ZwP(r$D8tB2rC239QL z$DVl2jWC5!A&65DLOys}0G3OUXHne8cVusu_= z`P2g#PEUtGGSI?i19Sn@d=TSmtq(wmoy@_~2hvhD(T?^HzhoaY2-%yPV#%4Dp@X_I0j@tcraobe7@z6G@H+sxjQ!@jg># zU^D$pP&27NP(u1J?X}j|GxuoRW!}N8oUY?==~xUU&N0N^c+q&B($C33Zi(a|l$APZ zC)OlC+Luh!oaG*&XkgCKN*eRWcOsKnK92y{$_!6* zs)0Y3q60Yu-(3ZI{N4e^dyAu(XUpboKZjoT z_qrH-8ii{lFJT0^5bH>GfL~u+BiCY4q@G6x$r-?jYseuw76!`)=tnZH5$2LhpjV?L zhLG&4f0G&8+hsnhNg*FT<%p442I!Pt97pnOIp_}zCz7h}z18&am80H25L%G~OnB*IDB$r?l z$%C_b1x{OfqCejVx=4Ioj|h$``L57e-WO5_-YtkDd8VbU$CeC|u|Z0Am~iV>kQ7GZ z<9t&vQ_jZ~fTd5SN|0^Lmf0Xy;bv#?#*?>VNcO{1(jViqZRv|0>mkCCj~K=gxI(VQ z&SYmjBg|%$xR>Ph476hvm$ZE4*bnY7wt?w_9&W-TGJ6y!0`|TnKQHSu_G}u{gmrec z4;j2%7KXG=ADBtT*Az@;aS@~>PEzPBQ%A}7KaeD8(%C|zp$f}q(mLfi#&KD6dmp{1 z?f_6nLN>67ABxi3lDjH8NYHX&?c3U2E!TxhR<2KtpB|i8Tr_K zqLwslI>C>TS=RORzt<=aIC`?~5lN=1QXOcrVYHT5ezBnn%MiT=dHvKM9hm++jmeI! z*i>&szd8%aRHqsw_u_K4Eid8^KA(|DA8Zac(l4bC;CrT4!wE7%ciIF1vr=3k1`d2V zpI5_*{9j-xD@>bnR4Bziv6m09jtzt&=ZrG!mIupoSS`KY3+JJ0T*kv4^~oa`Oom}9 zgZH^&YaJL?>g)H#Sf&*3^gqEeqpWA1zindhx(XU`4eqeIR62^#9YOLG4rbf%s5Dt& z4t!SPXiRDb8=6Nk(!x2J;X1QP`kA}i)(p*1;PWc%@hpe`>HjNBc7$JEUr#|DU2C(@ zpWv5K!hZXsfI#23F(%)We1##D#HKML)1Wa=`hzz^gk4R-HOAXJB{RyPJ4tp2DoVYi z?*<&i@9>2|3;~>9`40zH8c{xSJ%g6aZZv;B;r5bJu_VtG?N;CT zjxfgc0jTGu4~XAykrlWvQkIqL@_&Qn*r^mMLypm;%0DmS+rZeQA~_aiNG<(o3rC zYj-G)FD^QkJ%8etX89nq#bcqfJeHXhuf>tk?Znr?a`3e;&N4(U6R40`M%bEs(E2TW zlvR?kC|O3_S~sKLcR$X!km-C*mwa)S(oSwg!IJ%z6ywok?82rZXBqluQ%hO4ly&#J z1=*E$S^i`lUsNoQDGcD(Z2XG*MaTK0KO$PrlcpJKU1CdikVmih?BhJl-{gG_%2u9bPM)jVLMsC80fc;)&1A3~4!uYe^N zWEaHlnggimed@vE>u1MUS^$QNzEYNvC8{K8&axi>n6%Uf0Hdk%0d%SiMdpl$I{7HG z+obvAUjfUEP8gf@X2#V(yJ@>*0T~gW&UFDsew{35WcIAmfaQ(M*Ad3ULN2>XKPw1V zUWRIUJI0sF5&{dGr6eICDPFO2KB^U3W!9FM3HKazCe}e=X7?9k`BbM1HD}pdM$&O< z^v8ihTLf`Ol+NGw@Ev~4`w!#tO@Y%Q%ji=RTh(-OE^S+`?*Te zo8{|aLKil;cvu0|SFcfE+gLVGI;$RlW3I@&v>I6ds-Yfv?rB%@TSScDWROMh#?apT?DW#mBiY?_bVLT;?dyWU52`JPXjv&`s@ z9Al2cvxzTCs?YLN<9<`rH3 zR;>403oM@-0f4C6+ZCwwhYLypKs813Ps=RqD7YVfGfP(2yo~us3xtF$lIu=)13r^{ zr{|iYWald}CXa8u&|VArWs)0!x*=(O@xx>TjOh*h!mf&Jy$kX(nq5O{EwJH zajpeMvA^SDj_N_=a|fXZd||Roa0akB?BT-@c^An}xo(sBjO%dE9VXFUaldp`S=I+S zRsjIF4Y%t6eWFRe7v*2T{GHe)57W*Kw9o>}+7OA*Ir}X2+{{3UEoBPKvMr96OQTy# zVY8G}2kdfbuoIQr3c`zFW6tmH>fodnSjw7t0&Sb$S zR)Xo60(<{8)X!5hm|1X^&s@b~JpeHDdQFmPH;0&^OPmTfzdN8)wo{KfCIE^RICoVg zmfL{3IbWt@3MK9=NmeAUpr_7-v?+H%S-#MR2(azD(pT=@&Q_2DaV1Z6xUH;yOo7%* zP6A*g0w8WS?#DhA0nr%MR=%tKO96mwuXUE_m{JrhHv>44Yu??X%Kpa#BV|u$#I$@T0Ca^;b;U+JRe7fg56_*wAIrNy zK^p?7#&Rxz%GvLmzmapCN(b0z1ox$PLq0v&Gr$>8F8iv;GQb$QFeKP3%7ADgUpien z_CuW{!~9t$yIUG;-)|3>n|*a{rr*$@Ud=L~z{#ts+GCN!95*TN^7VPy&YA--G6rB= zPY+kM#4_0(C1P`Y=d19XHXv7URz_X?rWKaydxAqGYz^breDhfo42k6dRLdv=mNft% z+nl-@?keZ5r5)^C+|aV}kONWIpu94es`}-RxqYMa6-MvJmdUl6Zh*@tt*|6X9s*>A zrPUsn0Kn)?;j{c~4*<~3a(Mp+z0>L(B`zc?on$vCPkd38t1P9-hMZn=K&MEl>+AHvvH2b#0Z7hdO}m z9lHXS)KP(DaO(+MVOJbFGNO9jHJaEDDbIaUgXM1kn#=w1jGbcJAW7bF1N4g7nOL}0 zcT^!hNIjN9XhVICDC~xn*e0p3n)8H4vJcZ%-#hffabuul?|O}9szvhh0hxr}1ArK= ziP~W~2Ofs7nkR%(M<3__tW|z8@yk36^bE{&0a4SR)RJM9vJ5f>P|q&^=w?NuJDM1| zb9Wv=EZS!&$jvT!nPrt-e?>S7m*;1UifC(#TiRjS7qg83ShX0`r%Mw%ptx$&etMAq z`-m4eF27-We6LNGHvnMz0QD>0lYDMqJPS?eZ&)yMff_82129*+cE>ysx=Rg#9=~I1 z2WTf}Pg2#qe^vmV$aSH*-A#RTU>T%Emdjvdh!rQ4t|57ik!ZaOZOQuhvxr!B1c0+@ zA7KS-LrAh6W^cjLmCD3+6SKDdZ>C4h-@Fhi4yWA%WX zKAeKEd~IO#sWaBFsXE4b>DYogT#Di3J(zze5|*<7(<*47iK)>wmRy5+oQ%KHipljS zE(l99W7An{U?bmYA~0M5piWGIGL~!bJ-cE7|AgA@*E8y39S2|vnOXsRi-Ki_C*ZgY zCV}efl{GPoY=rq7gk9{R#DuI)KFI_CgEYJDaj?;Y=&sgSw!{{8#e4=B8NXF`|!H>R3YpWgb5@)%0R6#>if0KR8cXi$7FxjMQH#Zbl;!y}$SQ*E(a z005e`HaLL_#{pDK)Dp{+m_h!4N#uFdQ}0wl9k7U>t@Hy(1{xQnGPcSrI|CTZUU^0@ zD;qbA07D-Jimk%=TGup4J1pac3J)5&CVs^je>8MeuWy3_mZNZ+9ElNRyi48*TnfaA zl2LpJmn4#Gthktic37sA2QY@qp;5uR-9T5;2bD9JQW3kg!}2srxuKMTTq*??gQ__C z22;J2SmRY@8E)B-@C3i~Wbmu9EKGPb@8$wRpITX{#*Xo6*n1Kok$t3d@6-Npd5;CwU*m)hSE57M6kJ&(eU4 z%j*4DGnV5}xi;o7%odusNEq{X9Jw29NM2B^VXDkB%>$3x;a&^X;*PS7kkVMpbJ)kW z*sc|p196Dt8I(&U*;_5^O#^f$Uztkt&0L zZLquxr=xqrQ)QM*Fqk#rmsZ*=SZy7t)O&I{?(mdaMzefb zUdIn48DWFFB-3lD@wWFzwKVcO$fWz#d8P7cm!&sqvNh(B|3XbMO>XoS$|_7&SSej) zmbKx_I(Ws!>ehXGV}}hS$KWx8#g!ylV7XpMo=>kNg!1n~Syh!@Up=fL6Uzc-0aCfd zB&d*C4#SV^gV|(~o91;c8e^+qb*Pe2aF$Qult@q9V}PNBTF#rKhUiL?eGoua$6GD1 z942UmK03k#8R;Z#m8yJnfqM)YYz{~(nuObF?kiD|Wj!q7FbpL*9Gx^gRBA;*ObNGF z^lGchasz%KJ@Jfet2{|be*6O0w<4X&nM}Iu-^&Ld63isN@PP zon`5IeHEv{6`bXB1Ki{oj3RjyCK`6DyCw6w$llJPz&CTyjQADc#U3tG>VCEGvm^|EkTZbq>Pj>Smf ztLQ~Cq?+X|7(6d1%jM`uvH|Xr+z4k? z??XgobP*QoWLMlF*#@@?&QdxC!h(cwX{5TpF|LqYqBP}Ib(YP{!ui_3QGLT$Jsf7u zg5|fX%cei7tUL- z2rUAZ$?lj${tlpA0?C-&8uT*e8!?_9M~P@<+o{H~5xhx;S_m2|m%~AoK$*}VGP6h} z)qu)rBzLPGnV^K_T_G5%0UoP)Yc`+>8CP0Sp(*XXmp=@TAxEHc@dB*Rdr*guptzBp z$}Atj(2wL5plTXPrnXfx+=Id9fWa%0DXXt(iRDJ2p3m>E=9-9RZNEPuuuk_<7arrxGu5w&Kd}&->gN7O(s1Y8L&6S6ksKQcGOIRNV8;r-#nw`!H^k1iL+?NuT zt;}PkrV#bQRvtq4Imv4<@-HHm&voE`Me}sePv$O3IKB-2!WvDzhAmAIYCoDg=|a zX1A8)KPaE71(q2B=_L6I^En&qG&!9cFn!Pdx#~O@gypr&-c3W*%A>pw>6-tG#tnni zW!V@T$^Ebl*6cV9@sQ*rh@NPHrF87W1)fwpTZ<$)8{d;l)l8;U%<}il{&;893U%;? zyO0ij08Vj5!ZOgv^c@*sgcBMa#$S+{KHDly99Dzn1OzhJsH7%Egrum68ik{h{iW`M4obuRRc9G( z(TZeuXkh8T3#%|vQUj;7#xhX1VrI0&5e!v%rxUWu`fP$zY!R?buPpR(JPNf_U!>}= zAa4Guv{9$(EFWSO$#FQW{`ssD^3^6NY4)uS%d>I|cc?{4RkfYy5?PspLFO8yx2n(b zxwOgJTVsvp^jq*qm)f~%YK`SyTqAi%S zDmQ!mS_CXN$YcrV0USw}$CS+=%`Cz2FWbnBg5MgFwUa(>qL z^)+aLJ1MJ}^BFjVYlr0mXu z0hh=t+*B#Y=P^vOU*88ao3O%Vc>)emS(BvNhL%0@*R>xd zW3|EZOr~DBMqSgFS5_3q8HKqms3xt?e7Wz;^} zNco8)yqMykAyrv=XBI_z3|qBAQd7%kElLR`EhDtSa#JU+!^ajueUC)H^% zrUA<|5Bb~QNl`PqeC&VnQ9j=(Hp!~WvZ;w|sz|SzM^$_U;~C^)5qhanxI+QUVY#AW zpBktT=r%ahPbM2Nr8RS|tIG1FNIuuW8)eiBt$3xM%|0+&vCCCOmLZ09V?Pbl}MZ%{lktYhs^0#bevb(@NwGMFrrDC%!bP9{UsmO9a z=4O8Og_SyC!VTpW(tp)xW~!j4MOo`y0^tKarS-@YoO4GYJkrsBScojMtCZ2jr2)pO ziyGPdWFM^+dNx#K*JI4`M?A@4ZJYVO!*Zr+ z4$sKZHfmf?l3Om5=O#tMlS(W%oco)oTncqdPQyxCWmN+@{vWU`+MyE5cl)z9+LpXhIlDJ^yw(I&tZm;| z78c6_jQ+UEJS-NVSWxL#Vg@!5?t4^%|nEz*3{{5A)Om)*b%fsDrIS4nx zI`A9I{~*h}#+cf{-&p=XVVR+tKl&TX{{TxhYz5y~eq;HK&l9O;=1^X vhnpPBebD>Trz1jjIhIu+`igFqp7{R&B9`h1^|ElR00000NkvXXu0mjfpWl&= literal 0 HcmV?d00001 diff --git a/module3/exo3/exercice.ipynb b/module3/exo3/exercice.ipynb index 111dff9..11bb722 100644 --- a/module3/exo3/exercice.ipynb +++ b/module3/exo3/exercice.ipynb @@ -4,9 +4,55 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Simpson's paradox\n", + "# Subject 6: Around Simpson's Paradox\n", "\n", - "![SegmentLocal](simpson.gif \"segment\")" + "![SegmentLocal](simpson.gif \"segment\")\n", + "(Copyright: [tenor.com](https://tenor.com/search/homer-thinking-gifs))\n", + "\n", + " __Prerequisites__ : Averaging and ratio calculation, simple graphical presentation techniques, possibly logistic regression\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "In 1972-1974, in Whickham, a town in the north-east of England,\n", + "located approximately 6.5 kilometres south-west of Newcastle upon Tyne,\n", + "a survey of one-sixth of the electorate was conducted in order to inform\n", + "work on thyroid and heart disease (Tunbridge and\n", + "al. 1977). A continuation of this study was carried out twenty years later.\n", + "(Vanderpump et al. 1995). Some of the results were related to\n", + "smoking and whether individuals were still alive at the time of the\n", + "second study. For the purpose of simplicity, we will restrict the data to women and among these to the 1314 that were categorized as \"smoking currently\" or \"never smoked\". There were relatively few\n", + "women in the initial survey who smoked but have since quit\n", + "(162) and very few for which information was not available\n", + "(18). Survival at 20 years was determined for all women of the first survey.\n", + "\n", + "All these data are available in this [file\n", + "CSV](https://gitlab.inria.fr/learninglab/mooc-rr/mooc-rr-ressources/blob/master/module3/Practical_session/Subject6_smoking.csv). You will find on each line if the\n", + "person smokes or not, whether alive or dead at the time of the\n", + "second study, and his age at the time of the first survey. \n", + "\n", + "This exercise can be done in either R or Python.\n", + "\n", + "__Your mission, should you choose to accept it:__\n", + "\n", + "1. Tabulate the total number of women alive and dead over the period according to their smoking habits. Calculate in each group (smoking/non-smoking) the mortality rate (the ratio of the number of women who died in a group to the total number of women in that group). You can graph these data and calculate confidence intervals if you wish. Why is this result surprising?\n", + "2. Go back to question 1 (numbers and mortality rates) and add a new category related to the age group. For example, the following classes will be considered: 18-34 years, 34-54 years, 55-64 years, over 65 years.\n", + "\n", + " Why is this result surprising? Can you explain this paradox? Similarly, you may wish to provide a graphical representation of the data to support your explanations.\n", + "3. In order to avoid a bias induced by arbitrary and non-regular age groupings, it is possible to try to perform a logistic regression. If we introduce a `Death` variable of `1` or `0` to indicate whether the individual died during the 20-year period, we can study the `Death ~ Age` model to study the probability of death as a function of age according to whether one considers the group of smokers or non-smokers. Do these regressions allow you to conclude or not on the harmfulness of smoking? You will be able to propose a graphical representation of these regressions (without omitting the regions of confidence).\n", + "4. Submit your work in FUN" ] }, { @@ -18,7 +64,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -156,7 +202,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Check that \"Smoker\" and \"Satus\" have only two possible values" + "Check that \"Smoker\" and \"Satus\" have only two possible values:" ] }, { @@ -282,7 +328,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Check for missing values in \"Age\"." + "Check for missing values in \"Age\":" ] }, { @@ -313,7 +359,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "First, group by \"Smoker\"" + "First, group by $Smoker$:" ] }, { @@ -329,7 +375,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Number of deaths" + "Number of deaths:" ] }, { @@ -398,7 +444,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Total number per class (smokers vs. non-smokers)" + "Total number per class (smokers vs. non-smokers):" ] }, { @@ -467,7 +513,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Mortality rate (ratio of dead / number within the class)" + "Mortality rate (ratio of dead / number within the class):" ] }, { @@ -528,12 +574,20 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 11, "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/conda/lib/python3.6/site-packages/scipy/stats/stats.py:1713: FutureWarning: Using a non-tuple sequence for multidimensional indexing is deprecated; use `arr[tuple(seq)]` instead of `arr[seq]`. In the future this will be interpreted as an array index, `arr[np.array(seq)]`, which will result either in an error or a different result.\n", + " return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval\n" + ] + }, { "data": { - "image/png": "\n", + "image/png": "\n", "text/plain": [ "
" ] @@ -549,7 +603,10 @@ " sns.barplot(data=df, x=\"Smoker\", y=\"Dead\", ax=ax, ci=95, palette=(\"blue\", \"red\")) # Note: in recent versions of seaborn, we should rather use errorbar=('ci', 95).\n", "\n", " ax.yaxis.set_major_formatter(FuncFormatter(lambda y, _: '{:.0%}'.format(y)))\n", - "\n", + " ax.tick_params(axis='both', which='major', labelsize=15)\n", + " ax.xaxis.get_label().set_fontsize(15)\n", + " ax.yaxis.get_label().set_fontsize(15)\n", + " \n", " plt.tight_layout()\n", " plt.show()" ] @@ -565,7 +622,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -574,7 +631,7 @@ "Ttest_indResult(statistic=-3.057733462432345, pvalue=0.002276255348902961)" ] }, - "execution_count": 40, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -607,12 +664,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "First, let us split the data frame in four age classes: 18-34, 35-54, 55-64, 65+. To this end, let us use a function." + "First, let us split the data frame in four age classes: 18-34, 35-54, 55-64, 65+. We encapsulate this in a function." ] }, { "cell_type": "code", - "execution_count": 83, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ @@ -637,12 +694,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Check that nobody is younger than 18" + "Check that nobody is younger than 18." ] }, { "cell_type": "code", - "execution_count": 84, + "execution_count": 14, "metadata": {}, "outputs": [ { @@ -661,12 +718,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Display the mean mortality rates split by age class" + "Display the mean mortality rates split by age class." ] }, { "cell_type": "code", - "execution_count": 85, + "execution_count": 15, "metadata": {}, "outputs": [ { @@ -750,12 +807,12 @@ }, { "cell_type": "code", - "execution_count": 88, + "execution_count": 16, "metadata": {}, "outputs": [ { "data": { - "image/png": "\n", + "image/png": "\n", "text/plain": [ "
" ] @@ -771,7 +828,10 @@ " sns.barplot(data=df, x=\"Age_class\", y=\"Dead\", hue=\"Smoker\", ax=ax, ci=95, palette=(\"blue\", \"red\"), order=tuple(age_classes)) # Note: in recent versions of seaborn, we should rather use errorbar=('ci', 95).\n", "\n", " ax.yaxis.set_major_formatter(FuncFormatter(lambda y, _: '{:.0%}'.format(y)))\n", - "\n", + " ax.tick_params(axis='both', which='major', labelsize=15)\n", + " ax.xaxis.get_label().set_fontsize(15)\n", + " ax.yaxis.get_label().set_fontsize(15)\n", + " \n", " plt.tight_layout()\n", " plt.show()" ] @@ -780,12 +840,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Redo the statistical tests" + "We redo the statistical tests." ] }, { "cell_type": "code", - "execution_count": 58, + "execution_count": 17, "metadata": {}, "outputs": [ { @@ -793,8 +853,8 @@ "output_type": "stream", "text": [ "Ttest_indResult(statistic=0.013780528783028018, pvalue=0.9890122465905076)\n", - "Ttest_indResult(statistic=1.783075769977916, pvalue=0.07588466008529735)\n", "Ttest_indResult(statistic=2.4009857289248324, pvalue=0.01677328896330688)\n", + "Ttest_indResult(statistic=1.783075769977916, pvalue=0.07588466008529735)\n", "Ttest_indResult(statistic=0.03927297913991199, pvalue=0.9687782291030265)\n" ] } @@ -831,12 +891,12 @@ }, { "cell_type": "code", - "execution_count": 89, + "execution_count": 18, "metadata": {}, "outputs": [ { "data": { - "image/png": "\n", + "image/png": "\n", "text/plain": [ "
" ] @@ -852,7 +912,10 @@ " sns.barplot(data=df, x=\"Age_class\", y=\"Smoker\", ax=ax, ci=95, order=tuple(age_classes)) # Note: in recent versions of seaborn, we should rather use errorbar=('ci', 95).\n", "\n", " ax.yaxis.set_major_formatter(FuncFormatter(lambda y, _: '{:.0%}'.format(y)))\n", - "\n", + " ax.tick_params(axis='both', which='major', labelsize=15)\n", + " ax.xaxis.get_label().set_fontsize(15)\n", + " ax.yaxis.get_label().set_fontsize(15)\n", + " \n", " plt.tight_layout()\n", " plt.show()" ] @@ -861,7 +924,13 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "We see that smokers are underrepresented among the elderly compared to the other three age classes (probably because many long-time smokers quit when they grow old and start developing tobacco-related health conditions). Therefore, in the overall analysis of question 1, $Age$ acted as a proxy for the absence of smoking habit. Since the mean mortality rate is higher among the elderly (irrespective of the cause -- after all, they may simply die of old age), this creates a spurious negative correlation between $Smoker$ and $Dead$ (non-smokers are more likely older, and thus more likely to have a high mortality rate). " + "We see that smokers are underrepresented among the elderly compared to the other three age classes (probably because many long-time smokers quit when they grow old and start developing tobacco-related health conditions). Therefore, in the overall analysis of question 1, $Age$ acted as a proxy for the absence of smoking habit. Since the mean mortality rate is higher among the elderly (irrespective of the cause -- after all, they may simply die of old age), this creates a spurious negative correlation between $Smoker$ and $Dead$ (non-smokers are more likely older, and thus more likely to have a high mortality rate).\n", + "\n", + "In other words, what was detected in question 1 was a *correlation*, but not the underlying mechanism of *causality*.\n", + "\n", + "![SegmentLocal](correlation.png \"segment\")\n", + "\n", + "(Copyright: [xkcd](https://xkcd.com/552/))" ] }, { @@ -875,12 +944,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "We perform a logistic regression for the model $Dead \\sim Age$" + "We perform a logistic regression for the model $Dead = \\beta \\times Age + \\alpha$ ($\\beta$ is the regression coefficient, $\\alpha$ is the intercept). Compared to the previous question, $Age$ is treated as a continuous covariate rather than categorised into age classes." ] }, { "cell_type": "code", - "execution_count": 70, + "execution_count": 19, "metadata": {}, "outputs": [ { @@ -888,238 +957,83 @@ "output_type": "stream", "text": [ "Optimization terminated successfully.\n", - " Current function value: 0.629619\n", - " Iterations 5\n", - "Optimization terminated successfully.\n", - " Current function value: 0.633082\n", - " Iterations 4\n", + " Current function value: 0.412727\n", + " Iterations 7\n", "Optimization terminated successfully.\n", - " Current function value: 0.687904\n", - " Iterations 3\n" + " Current function value: 0.354560\n", + " Iterations 7\n" ] } ], "source": [ "import statsmodels.api as sm\n", "\n", - "log_reg = sm.Logit(df[\"Dead\"], df[[\"Age\", \"Smoker\"]].astype(int)).fit()\n", - "\n", - "log_reg_smoker = sm.Logit(df_smoker[\"Dead\"], df_smoker[\"Age\"]).fit()\n", - "log_reg_nonsmoker = sm.Logit(df_nonsmoker[\"Dead\"], df_nonsmoker[\"Age\"]).fit()" + "log_reg_smoker = sm.Logit(df_smoker[\"Dead\"], sm.add_constant(df_smoker[\"Age\"])).fit()\n", + "log_reg_nonsmoker = sm.Logit(df_nonsmoker[\"Dead\"], sm.add_constant(df_nonsmoker[\"Age\"])).fit()" ] }, { - "cell_type": "code", - "execution_count": 71, + "cell_type": "markdown", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "\n", - "\n", - " \n", - "\n", - "\n", - " \n", - "\n", - "\n", - " \n", - "\n", - "\n", - " \n", - "\n", - "\n", - " \n", - "\n", - "\n", - " \n", - "\n", - "\n", - " \n", - "\n", - "
Logit Regression Results
Dep. Variable: Dead No. Observations: 1314
Model: Logit Df Residuals: 1312
Method: MLE Df Model: 1
Date: Tue, 26 Sep 2023 Pseudo R-squ.: -0.06045
Time: 21:06:14 Log-Likelihood: -827.32
converged: True LL-Null: -780.16
LLR p-value: 1.000
\n", - "\n", - "\n", - " \n", - "\n", - "\n", - " \n", - "\n", - "\n", - " \n", - "\n", - "
coef std err z P>|z| [0.025 0.975]
Age 0.0002 0.001 0.112 0.911 -0.002 0.003
Smoker -1.1657 0.114 -10.253 0.000 -1.389 -0.943
" - ], - "text/plain": [ - "\n", - "\"\"\"\n", - " Logit Regression Results \n", - "==============================================================================\n", - "Dep. Variable: Dead No. Observations: 1314\n", - "Model: Logit Df Residuals: 1312\n", - "Method: MLE Df Model: 1\n", - "Date: Tue, 26 Sep 2023 Pseudo R-squ.: -0.06045\n", - "Time: 21:06:14 Log-Likelihood: -827.32\n", - "converged: True LL-Null: -780.16\n", - " LLR p-value: 1.000\n", - "==============================================================================\n", - " coef std err z P>|z| [0.025 0.975]\n", - "------------------------------------------------------------------------------\n", - "Age 0.0002 0.001 0.112 0.911 -0.002 0.003\n", - "Smoker -1.1657 0.114 -10.253 0.000 -1.389 -0.943\n", - "==============================================================================\n", - "\"\"\"" - ] - }, - "execution_count": 71, - "metadata": {}, - "output_type": "execute_result" - } - ], "source": [ - "log_reg.summary()" + "We plot the resulting hazard curves (mortality rates as a function of age). \n", + "\n", + "The confidence bands are obtained using the the classical transformation of hazard rates: use the Gaussian 95% confidence interval $[mean \\pm 1.96 \\times standard\\ error]$ in logit space, then transform back into a probability (using the *expit* function)." ] }, { "cell_type": "code", - "execution_count": 72, + "execution_count": 20, "metadata": {}, "outputs": [ { "data": { - "text/html": [ - "\n", - "\n", - "\n", - " \n", - "\n", - "\n", - " \n", - "\n", - "\n", - " \n", - "\n", - "\n", - " \n", - "\n", - "\n", - " \n", - "\n", - "\n", - " \n", - "\n", - "\n", - " \n", - "\n", - "
Logit Regression Results
Dep. Variable: Dead No. Observations: 582
Model: Logit Df Residuals: 581
Method: MLE Df Model: 0
Date: Tue, 26 Sep 2023 Pseudo R-squ.: -0.1516
Time: 21:06:17 Log-Likelihood: -368.45
converged: True LL-Null: -319.94
LLR p-value: nan
\n", - "\n", - "\n", - " \n", - "\n", - "\n", - " \n", - "\n", - "
coef std err z P>|z| [0.025 0.975]
Age -0.0154 0.002 -7.982 0.000 -0.019 -0.012
" - ], + "image/png": "\n", "text/plain": [ - "\n", - "\"\"\"\n", - " Logit Regression Results \n", - "==============================================================================\n", - "Dep. Variable: Dead No. Observations: 582\n", - "Model: Logit Df Residuals: 581\n", - "Method: MLE Df Model: 0\n", - "Date: Tue, 26 Sep 2023 Pseudo R-squ.: -0.1516\n", - "Time: 21:06:17 Log-Likelihood: -368.45\n", - "converged: True LL-Null: -319.94\n", - " LLR p-value: nan\n", - "==============================================================================\n", - " coef std err z P>|z| [0.025 0.975]\n", - "------------------------------------------------------------------------------\n", - "Age -0.0154 0.002 -7.982 0.000 -0.019 -0.012\n", - "==============================================================================\n", - "\"\"\"" + "
" ] }, - "execution_count": 72, "metadata": {}, - "output_type": "execute_result" + "output_type": "display_data" } ], "source": [ - "log_reg_smoker.summary()" + "from scipy.special import expit, logit\n", + "\n", + "ages = np.linspace(df[\"Age\"].min(), df[\"Age\"].max(), 100)\n", + "\n", + "pred_smoker = log_reg_smoker.predict(sm.add_constant(ages))\n", + "pred_nonsmoker = log_reg_nonsmoker.predict(sm.add_constant(ages))\n", + "\n", + "se_smoker = np.sqrt(np.array([x @ log_reg_smoker.cov_params() @ x for x in sm.add_constant(ages)]))\n", + "se_nonsmoker = np.sqrt(np.array([x @ log_reg_nonsmoker.cov_params() @ x for x in sm.add_constant(ages)]))\n", + "\n", + "\n", + "with sns.axes_style('darkgrid'):\n", + " fig, ax = plt.subplots(figsize=(7, 5), nrows=1, ncols=1)\n", + "\n", + " ax.plot(ages, pred_smoker, color=\"red\", label=\"Smoker\")\n", + " ax.fill_between(ages, y1=expit(logit(pred_smoker) - 1.96 * se_smoker), y2=expit(logit(pred_smoker) + 1.96 * se_smoker), alpha=0.1, color=\"red\")\n", + " \n", + " ax.plot(ages, pred_nonsmoker, color=\"blue\", label=\"Non-smoker\")\n", + " ax.fill_between(ages, y1=expit(logit(pred_nonsmoker) - 1.96 * se_nonsmoker), y2=expit(logit(pred_nonsmoker) + 1.96 * se_nonsmoker), alpha=0.1, color=\"blue\")\n", + " \n", + " ax.yaxis.set_major_formatter(FuncFormatter(lambda y, _: '{:.0%}'.format(y)))\n", + "\n", + " ax.set_xlabel(\"Age\", fontsize=15)\n", + " ax.set_ylabel(\"Predicted mortality rate\", fontsize=15)\n", + " ax.legend(loc='upper left', prop={'size': 15})\n", + " ax.tick_params(axis='both', which='major', labelsize=15)\n", + "\n", + " plt.tight_layout()\n", + " plt.show()" ] }, { - "cell_type": "code", - "execution_count": 73, + "cell_type": "markdown", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "\n", - "\n", - " \n", - "\n", - "\n", - " \n", - "\n", - "\n", - " \n", - "\n", - "\n", - " \n", - "\n", - "\n", - " \n", - "\n", - "\n", - " \n", - "\n", - "\n", - " \n", - "\n", - "
Logit Regression Results
Dep. Variable: Dead No. Observations: 732
Model: Logit Df Residuals: 731
Method: MLE Df Model: 0
Date: Tue, 26 Sep 2023 Pseudo R-squ.: -0.1052
Time: 21:06:17 Log-Likelihood: -503.55
converged: True LL-Null: -455.62
LLR p-value: nan
\n", - "\n", - "\n", - " \n", - "\n", - "\n", - " \n", - "\n", - "
coef std err z P>|z| [0.025 0.975]
Age -0.0038 0.001 -2.759 0.006 -0.007 -0.001
" - ], - "text/plain": [ - "\n", - "\"\"\"\n", - " Logit Regression Results \n", - "==============================================================================\n", - "Dep. Variable: Dead No. Observations: 732\n", - "Model: Logit Df Residuals: 731\n", - "Method: MLE Df Model: 0\n", - "Date: Tue, 26 Sep 2023 Pseudo R-squ.: -0.1052\n", - "Time: 21:06:17 Log-Likelihood: -503.55\n", - "converged: True LL-Null: -455.62\n", - " LLR p-value: nan\n", - "==============================================================================\n", - " coef std err z P>|z| [0.025 0.975]\n", - "------------------------------------------------------------------------------\n", - "Age -0.0038 0.001 -2.759 0.006 -0.007 -0.001\n", - "==============================================================================\n", - "\"\"\"" - ] - }, - "execution_count": 73, - "metadata": {}, - "output_type": "execute_result" - } - ], "source": [ - "log_reg_nonsmoker.summary()" + "**Conclusion**: the logistic regression confirms the analysis of question 2, namely that middle-aged people exhibit a higher mortality rate when smoking, while the young and the elderly have similar mortality rates." ] }, { -- 2.18.1