Update question 1

parent 7de8743a
...@@ -62,9 +62,16 @@ ...@@ -62,9 +62,16 @@
" urllib.request.urlretrieve(data_url, data_file)" " urllib.request.urlretrieve(data_url, data_file)"
] ]
}, },
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Le document comporte trois colonnes : la première colonne indique leur habitude de tabagisme, la deuxième renseigne si la personne est vivante ou décédée au moment de la seconde étude et enfin, la troisième colonne indique leur âge lors de la première étude"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 4, "execution_count": 6,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
...@@ -88,9 +95,9 @@ ...@@ -88,9 +95,9 @@
" <thead>\n", " <thead>\n",
" <tr style=\"text-align: right;\">\n", " <tr style=\"text-align: right;\">\n",
" <th></th>\n", " <th></th>\n",
" <th>Yes</th>\n", " <th>Smoker</th>\n",
" <th>Alive</th>\n", " <th>Status</th>\n",
" <th>21</th>\n", " <th>Age</th>\n",
" </tr>\n", " </tr>\n",
" </thead>\n", " </thead>\n",
" <tbody>\n", " <tbody>\n",
...@@ -98,447 +105,447 @@ ...@@ -98,447 +105,447 @@
" <th>0</th>\n", " <th>0</th>\n",
" <td>Yes</td>\n", " <td>Yes</td>\n",
" <td>Alive</td>\n", " <td>Alive</td>\n",
" <td>19.3</td>\n", " <td>21.0</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>1</th>\n", " <th>1</th>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>19.3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>No</td>\n", " <td>No</td>\n",
" <td>Dead</td>\n", " <td>Dead</td>\n",
" <td>57.5</td>\n", " <td>57.5</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>2</th>\n", " <th>3</th>\n",
" <td>No</td>\n", " <td>No</td>\n",
" <td>Alive</td>\n", " <td>Alive</td>\n",
" <td>47.1</td>\n", " <td>47.1</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>3</th>\n", " <th>4</th>\n",
" <td>Yes</td>\n", " <td>Yes</td>\n",
" <td>Alive</td>\n", " <td>Alive</td>\n",
" <td>81.4</td>\n", " <td>81.4</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>4</th>\n", " <th>5</th>\n",
" <td>No</td>\n", " <td>No</td>\n",
" <td>Alive</td>\n", " <td>Alive</td>\n",
" <td>36.8</td>\n", " <td>36.8</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>5</th>\n", " <th>6</th>\n",
" <td>No</td>\n", " <td>No</td>\n",
" <td>Alive</td>\n", " <td>Alive</td>\n",
" <td>23.8</td>\n", " <td>23.8</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>6</th>\n", " <th>7</th>\n",
" <td>Yes</td>\n", " <td>Yes</td>\n",
" <td>Dead</td>\n", " <td>Dead</td>\n",
" <td>57.5</td>\n", " <td>57.5</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>7</th>\n", " <th>8</th>\n",
" <td>Yes</td>\n", " <td>Yes</td>\n",
" <td>Alive</td>\n", " <td>Alive</td>\n",
" <td>24.8</td>\n", " <td>24.8</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>8</th>\n", " <th>9</th>\n",
" <td>Yes</td>\n", " <td>Yes</td>\n",
" <td>Alive</td>\n", " <td>Alive</td>\n",
" <td>49.5</td>\n", " <td>49.5</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>9</th>\n", " <th>10</th>\n",
" <td>Yes</td>\n", " <td>Yes</td>\n",
" <td>Alive</td>\n", " <td>Alive</td>\n",
" <td>30.0</td>\n", " <td>30.0</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>10</th>\n", " <th>11</th>\n",
" <td>No</td>\n", " <td>No</td>\n",
" <td>Dead</td>\n", " <td>Dead</td>\n",
" <td>66.0</td>\n", " <td>66.0</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>11</th>\n", " <th>12</th>\n",
" <td>Yes</td>\n", " <td>Yes</td>\n",
" <td>Alive</td>\n", " <td>Alive</td>\n",
" <td>49.2</td>\n", " <td>49.2</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>12</th>\n", " <th>13</th>\n",
" <td>No</td>\n", " <td>No</td>\n",
" <td>Alive</td>\n", " <td>Alive</td>\n",
" <td>58.4</td>\n", " <td>58.4</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>13</th>\n", " <th>14</th>\n",
" <td>No</td>\n", " <td>No</td>\n",
" <td>Dead</td>\n", " <td>Dead</td>\n",
" <td>60.6</td>\n", " <td>60.6</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>14</th>\n", " <th>15</th>\n",
" <td>No</td>\n", " <td>No</td>\n",
" <td>Alive</td>\n", " <td>Alive</td>\n",
" <td>25.1</td>\n", " <td>25.1</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>15</th>\n", " <th>16</th>\n",
" <td>No</td>\n", " <td>No</td>\n",
" <td>Alive</td>\n", " <td>Alive</td>\n",
" <td>43.5</td>\n", " <td>43.5</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>16</th>\n", " <th>17</th>\n",
" <td>No</td>\n", " <td>No</td>\n",
" <td>Alive</td>\n", " <td>Alive</td>\n",
" <td>27.1</td>\n", " <td>27.1</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>17</th>\n", " <th>18</th>\n",
" <td>No</td>\n", " <td>No</td>\n",
" <td>Alive</td>\n", " <td>Alive</td>\n",
" <td>58.3</td>\n", " <td>58.3</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>18</th>\n", " <th>19</th>\n",
" <td>Yes</td>\n", " <td>Yes</td>\n",
" <td>Alive</td>\n", " <td>Alive</td>\n",
" <td>65.7</td>\n", " <td>65.7</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>19</th>\n", " <th>20</th>\n",
" <td>No</td>\n", " <td>No</td>\n",
" <td>Dead</td>\n", " <td>Dead</td>\n",
" <td>73.2</td>\n", " <td>73.2</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>20</th>\n", " <th>21</th>\n",
" <td>Yes</td>\n", " <td>Yes</td>\n",
" <td>Alive</td>\n", " <td>Alive</td>\n",
" <td>38.3</td>\n", " <td>38.3</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>21</th>\n", " <th>22</th>\n",
" <td>No</td>\n", " <td>No</td>\n",
" <td>Alive</td>\n", " <td>Alive</td>\n",
" <td>33.4</td>\n", " <td>33.4</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>22</th>\n", " <th>23</th>\n",
" <td>Yes</td>\n", " <td>Yes</td>\n",
" <td>Dead</td>\n", " <td>Dead</td>\n",
" <td>62.3</td>\n", " <td>62.3</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>23</th>\n", " <th>24</th>\n",
" <td>No</td>\n", " <td>No</td>\n",
" <td>Alive</td>\n", " <td>Alive</td>\n",
" <td>18.0</td>\n", " <td>18.0</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>24</th>\n", " <th>25</th>\n",
" <td>No</td>\n", " <td>No</td>\n",
" <td>Alive</td>\n", " <td>Alive</td>\n",
" <td>56.2</td>\n", " <td>56.2</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>25</th>\n", " <th>26</th>\n",
" <td>Yes</td>\n", " <td>Yes</td>\n",
" <td>Alive</td>\n", " <td>Alive</td>\n",
" <td>59.2</td>\n", " <td>59.2</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>26</th>\n", " <th>27</th>\n",
" <td>No</td>\n", " <td>No</td>\n",
" <td>Alive</td>\n", " <td>Alive</td>\n",
" <td>25.8</td>\n", " <td>25.8</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>27</th>\n", " <th>28</th>\n",
" <td>No</td>\n", " <td>No</td>\n",
" <td>Dead</td>\n", " <td>Dead</td>\n",
" <td>36.9</td>\n", " <td>36.9</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>28</th>\n", " <th>29</th>\n",
" <td>No</td>\n", " <td>No</td>\n",
" <td>Alive</td>\n", " <td>Alive</td>\n",
" <td>20.2</td>\n", " <td>20.2</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>29</th>\n",
" <td>Yes</td>\n",
" <td>Alive</td>\n",
" <td>34.6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n", " <th>...</th>\n",
" <td>...</td>\n", " <td>...</td>\n",
" <td>...</td>\n", " <td>...</td>\n",
" <td>...</td>\n", " <td>...</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>1283</th>\n", " <th>1284</th>\n",
" <td>Yes</td>\n", " <td>Yes</td>\n",
" <td>Dead</td>\n", " <td>Dead</td>\n",
" <td>36.0</td>\n", " <td>36.0</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>1284</th>\n", " <th>1285</th>\n",
" <td>Yes</td>\n", " <td>Yes</td>\n",
" <td>Alive</td>\n", " <td>Alive</td>\n",
" <td>48.3</td>\n", " <td>48.3</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>1285</th>\n", " <th>1286</th>\n",
" <td>No</td>\n", " <td>No</td>\n",
" <td>Alive</td>\n", " <td>Alive</td>\n",
" <td>63.1</td>\n", " <td>63.1</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>1286</th>\n", " <th>1287</th>\n",
" <td>No</td>\n", " <td>No</td>\n",
" <td>Alive</td>\n", " <td>Alive</td>\n",
" <td>60.8</td>\n", " <td>60.8</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>1287</th>\n", " <th>1288</th>\n",
" <td>Yes</td>\n", " <td>Yes</td>\n",
" <td>Dead</td>\n", " <td>Dead</td>\n",
" <td>39.3</td>\n", " <td>39.3</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>1288</th>\n", " <th>1289</th>\n",
" <td>No</td>\n", " <td>No</td>\n",
" <td>Alive</td>\n", " <td>Alive</td>\n",
" <td>36.7</td>\n", " <td>36.7</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>1289</th>\n", " <th>1290</th>\n",
" <td>No</td>\n", " <td>No</td>\n",
" <td>Alive</td>\n", " <td>Alive</td>\n",
" <td>63.8</td>\n", " <td>63.8</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>1290</th>\n", " <th>1291</th>\n",
" <td>No</td>\n", " <td>No</td>\n",
" <td>Dead</td>\n", " <td>Dead</td>\n",
" <td>71.3</td>\n", " <td>71.3</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>1291</th>\n", " <th>1292</th>\n",
" <td>No</td>\n", " <td>No</td>\n",
" <td>Alive</td>\n", " <td>Alive</td>\n",
" <td>57.7</td>\n", " <td>57.7</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>1292</th>\n", " <th>1293</th>\n",
" <td>No</td>\n", " <td>No</td>\n",
" <td>Alive</td>\n", " <td>Alive</td>\n",
" <td>63.2</td>\n", " <td>63.2</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>1293</th>\n", " <th>1294</th>\n",
" <td>No</td>\n", " <td>No</td>\n",
" <td>Alive</td>\n", " <td>Alive</td>\n",
" <td>46.6</td>\n", " <td>46.6</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>1294</th>\n", " <th>1295</th>\n",
" <td>Yes</td>\n", " <td>Yes</td>\n",
" <td>Dead</td>\n", " <td>Dead</td>\n",
" <td>82.4</td>\n", " <td>82.4</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>1295</th>\n", " <th>1296</th>\n",
" <td>Yes</td>\n", " <td>Yes</td>\n",
" <td>Alive</td>\n", " <td>Alive</td>\n",
" <td>38.3</td>\n", " <td>38.3</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>1296</th>\n", " <th>1297</th>\n",
" <td>Yes</td>\n", " <td>Yes</td>\n",
" <td>Alive</td>\n", " <td>Alive</td>\n",
" <td>32.7</td>\n", " <td>32.7</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>1297</th>\n", " <th>1298</th>\n",
" <td>No</td>\n", " <td>No</td>\n",
" <td>Alive</td>\n", " <td>Alive</td>\n",
" <td>39.7</td>\n", " <td>39.7</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>1298</th>\n", " <th>1299</th>\n",
" <td>Yes</td>\n", " <td>Yes</td>\n",
" <td>Dead</td>\n", " <td>Dead</td>\n",
" <td>60.0</td>\n", " <td>60.0</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>1299</th>\n", " <th>1300</th>\n",
" <td>No</td>\n", " <td>No</td>\n",
" <td>Dead</td>\n", " <td>Dead</td>\n",
" <td>71.0</td>\n", " <td>71.0</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>1300</th>\n", " <th>1301</th>\n",
" <td>No</td>\n", " <td>No</td>\n",
" <td>Alive</td>\n", " <td>Alive</td>\n",
" <td>20.5</td>\n", " <td>20.5</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>1301</th>\n", " <th>1302</th>\n",
" <td>No</td>\n", " <td>No</td>\n",
" <td>Alive</td>\n", " <td>Alive</td>\n",
" <td>44.4</td>\n", " <td>44.4</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>1302</th>\n", " <th>1303</th>\n",
" <td>Yes</td>\n", " <td>Yes</td>\n",
" <td>Alive</td>\n", " <td>Alive</td>\n",
" <td>31.2</td>\n", " <td>31.2</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>1303</th>\n", " <th>1304</th>\n",
" <td>Yes</td>\n", " <td>Yes</td>\n",
" <td>Alive</td>\n", " <td>Alive</td>\n",
" <td>47.8</td>\n", " <td>47.8</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>1304</th>\n", " <th>1305</th>\n",
" <td>Yes</td>\n", " <td>Yes</td>\n",
" <td>Alive</td>\n", " <td>Alive</td>\n",
" <td>60.9</td>\n", " <td>60.9</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>1305</th>\n", " <th>1306</th>\n",
" <td>No</td>\n", " <td>No</td>\n",
" <td>Dead</td>\n", " <td>Dead</td>\n",
" <td>61.4</td>\n", " <td>61.4</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>1306</th>\n", " <th>1307</th>\n",
" <td>Yes</td>\n", " <td>Yes</td>\n",
" <td>Alive</td>\n", " <td>Alive</td>\n",
" <td>43.0</td>\n", " <td>43.0</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>1307</th>\n", " <th>1308</th>\n",
" <td>No</td>\n", " <td>No</td>\n",
" <td>Alive</td>\n", " <td>Alive</td>\n",
" <td>42.1</td>\n", " <td>42.1</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>1308</th>\n", " <th>1309</th>\n",
" <td>Yes</td>\n", " <td>Yes</td>\n",
" <td>Alive</td>\n", " <td>Alive</td>\n",
" <td>35.9</td>\n", " <td>35.9</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>1309</th>\n", " <th>1310</th>\n",
" <td>No</td>\n", " <td>No</td>\n",
" <td>Alive</td>\n", " <td>Alive</td>\n",
" <td>22.3</td>\n", " <td>22.3</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>1310</th>\n", " <th>1311</th>\n",
" <td>Yes</td>\n", " <td>Yes</td>\n",
" <td>Dead</td>\n", " <td>Dead</td>\n",
" <td>62.1</td>\n", " <td>62.1</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>1311</th>\n", " <th>1312</th>\n",
" <td>No</td>\n", " <td>No</td>\n",
" <td>Dead</td>\n", " <td>Dead</td>\n",
" <td>88.6</td>\n", " <td>88.6</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>1312</th>\n", " <th>1313</th>\n",
" <td>No</td>\n", " <td>No</td>\n",
" <td>Alive</td>\n", " <td>Alive</td>\n",
" <td>39.1</td>\n", " <td>39.1</td>\n",
" </tr>\n", " </tr>\n",
" </tbody>\n", " </tbody>\n",
"</table>\n", "</table>\n",
"<p>1313 rows × 3 columns</p>\n", "<p>1314 rows × 3 columns</p>\n",
"</div>" "</div>"
], ],
"text/plain": [ "text/plain": [
" Yes Alive 21\n", " Smoker Status Age\n",
"0 Yes Alive 19.3\n", "0 Yes Alive 21.0\n",
"1 No Dead 57.5\n", "1 Yes Alive 19.3\n",
"2 No Alive 47.1\n", "2 No Dead 57.5\n",
"3 Yes Alive 81.4\n", "3 No Alive 47.1\n",
"4 No Alive 36.8\n", "4 Yes Alive 81.4\n",
"5 No Alive 23.8\n", "5 No Alive 36.8\n",
"6 Yes Dead 57.5\n", "6 No Alive 23.8\n",
"7 Yes Alive 24.8\n", "7 Yes Dead 57.5\n",
"8 Yes Alive 49.5\n", "8 Yes Alive 24.8\n",
"9 Yes Alive 30.0\n", "9 Yes Alive 49.5\n",
"10 No Dead 66.0\n", "10 Yes Alive 30.0\n",
"11 Yes Alive 49.2\n", "11 No Dead 66.0\n",
"12 No Alive 58.4\n", "12 Yes Alive 49.2\n",
"13 No Dead 60.6\n", "13 No Alive 58.4\n",
"14 No Alive 25.1\n", "14 No Dead 60.6\n",
"15 No Alive 43.5\n", "15 No Alive 25.1\n",
"16 No Alive 27.1\n", "16 No Alive 43.5\n",
"17 No Alive 58.3\n", "17 No Alive 27.1\n",
"18 Yes Alive 65.7\n", "18 No Alive 58.3\n",
"19 No Dead 73.2\n", "19 Yes Alive 65.7\n",
"20 Yes Alive 38.3\n", "20 No Dead 73.2\n",
"21 No Alive 33.4\n", "21 Yes Alive 38.3\n",
"22 Yes Dead 62.3\n", "22 No Alive 33.4\n",
"23 No Alive 18.0\n", "23 Yes Dead 62.3\n",
"24 No Alive 56.2\n", "24 No Alive 18.0\n",
"25 Yes Alive 59.2\n", "25 No Alive 56.2\n",
"26 No Alive 25.8\n", "26 Yes Alive 59.2\n",
"27 No Dead 36.9\n", "27 No Alive 25.8\n",
"28 No Alive 20.2\n", "28 No Dead 36.9\n",
"29 Yes Alive 34.6\n", "29 No Alive 20.2\n",
"... ... ... ...\n", "... ... ... ...\n",
"1283 Yes Dead 36.0\n", "1284 Yes Dead 36.0\n",
"1284 Yes Alive 48.3\n", "1285 Yes Alive 48.3\n",
"1285 No Alive 63.1\n", "1286 No Alive 63.1\n",
"1286 No Alive 60.8\n", "1287 No Alive 60.8\n",
"1287 Yes Dead 39.3\n", "1288 Yes Dead 39.3\n",
"1288 No Alive 36.7\n", "1289 No Alive 36.7\n",
"1289 No Alive 63.8\n", "1290 No Alive 63.8\n",
"1290 No Dead 71.3\n", "1291 No Dead 71.3\n",
"1291 No Alive 57.7\n", "1292 No Alive 57.7\n",
"1292 No Alive 63.2\n", "1293 No Alive 63.2\n",
"1293 No Alive 46.6\n", "1294 No Alive 46.6\n",
"1294 Yes Dead 82.4\n", "1295 Yes Dead 82.4\n",
"1295 Yes Alive 38.3\n", "1296 Yes Alive 38.3\n",
"1296 Yes Alive 32.7\n", "1297 Yes Alive 32.7\n",
"1297 No Alive 39.7\n", "1298 No Alive 39.7\n",
"1298 Yes Dead 60.0\n", "1299 Yes Dead 60.0\n",
"1299 No Dead 71.0\n", "1300 No Dead 71.0\n",
"1300 No Alive 20.5\n", "1301 No Alive 20.5\n",
"1301 No Alive 44.4\n", "1302 No Alive 44.4\n",
"1302 Yes Alive 31.2\n", "1303 Yes Alive 31.2\n",
"1303 Yes Alive 47.8\n", "1304 Yes Alive 47.8\n",
"1304 Yes Alive 60.9\n", "1305 Yes Alive 60.9\n",
"1305 No Dead 61.4\n", "1306 No Dead 61.4\n",
"1306 Yes Alive 43.0\n", "1307 Yes Alive 43.0\n",
"1307 No Alive 42.1\n", "1308 No Alive 42.1\n",
"1308 Yes Alive 35.9\n", "1309 Yes Alive 35.9\n",
"1309 No Alive 22.3\n", "1310 No Alive 22.3\n",
"1310 Yes Dead 62.1\n", "1311 Yes Dead 62.1\n",
"1311 No Dead 88.6\n", "1312 No Dead 88.6\n",
"1312 No Alive 39.1\n", "1313 No Alive 39.1\n",
"\n", "\n",
"[1313 rows x 3 columns]" "[1314 rows x 3 columns]"
] ]
}, },
"execution_count": 4, "execution_count": 6,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
], ],
"source": [ "source": [
"raw_data = pd.read_csv(data_file, skiprows=1)\n", "raw_data = pd.read_csv(data_file)\n",
"raw_data" "raw_data"
] ]
}, },
...@@ -546,7 +553,157 @@ ...@@ -546,7 +553,157 @@
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
"source": [ "source": [
"Le document comporte trois colonnes : la première colonne indique leur habitude de tabagisme, la deuxième renseigne si la personne est vivante ou décédée au moment de la seconde étude et enfin, la troisième colonne indique leur âge lors de la première étude" "Pour nous assurer que le jeu de données est complet, nous vérifions qu'il n'y a pas d'informations manquantes conernant l'une des personnes du sondage. Après vérification, il n'y a pas de données manquantes."
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Smoker</th>\n",
" <th>Status</th>\n",
" <th>Age</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: [Smoker, Status, Age]\n",
"Index: []"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"raw_data[raw_data.isnull().any(axis=1)]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Nous calculons le nombre total de femmes vivantes et décédées sur la période en fonction de leur habitude de tabagisme"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"alive_and_smoker = 0\n",
"alive_and_non_smoker = 0\n",
"dead_and_smoker = 0\n",
"dead_and_non_smoker = 0\n",
"for i in range(len(raw_data)):\n",
" if raw_data.iloc[i][0] == \"Yes\":\n",
" if raw_data.iloc[i][1] == \"Alive\":\n",
" alive_and_smoker += 1\n",
" else :\n",
" dead_and_smoker += 1\n",
" else :\n",
" if raw_data.iloc[i][1] == \"Alive\":\n",
" alive_and_non_smoker += 1\n",
" else :\n",
" dead_and_non_smoker += 1"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Nous représentons ensuite ces données sous la forme d'un tableau. "
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Smoker</th>\n",
" <th>Non-Smoker</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Alive</th>\n",
" <td>443</td>\n",
" <td>502</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Dead</th>\n",
" <td>139</td>\n",
" <td>230</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Smoker Non-Smoker\n",
"Alive 443 502\n",
"Dead 139 230"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data = [[alive_and_smoker,alive_and_non_smoker],[dead_and_smoker, dead_and_non_smoker]]\n",
"\n",
"pd.DataFrame(data, columns=[\"Smoker\", \"Non-Smoker\"], index = [\"Alive\", \"Dead\"])"
] ]
}, },
{ {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment