diff --git a/module3/exo3/exercice.ipynb b/module3/exo3/exercice.ipynb index 0bbbe371b01e359e381e43239412d77bf53fb1fb..d7e28c17401edf81a4c6eeeeeced6d8613529f71 100644 --- a/module3/exo3/exercice.ipynb +++ b/module3/exo3/exercice.ipynb @@ -1,5 +1,667 @@ { - "cells": [], + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Autour du paradox Simpson LEZIN Chloé" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "raw_data = pd.read_csv (\"https://gitlab.inria.fr/learninglab/mooc-rr/mooc-rr-ressources/-/raw/master/module3/Practical_session/Subject6_smoking.csv?inline=false\",skiprows=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
YesAlive21
0YesAlive19.3
1NoDead57.5
2NoAlive47.1
3YesAlive81.4
4NoAlive36.8
5NoAlive23.8
6YesDead57.5
7YesAlive24.8
8YesAlive49.5
9YesAlive30.0
10NoDead66.0
11YesAlive49.2
12NoAlive58.4
13NoDead60.6
14NoAlive25.1
15NoAlive43.5
16NoAlive27.1
17NoAlive58.3
18YesAlive65.7
19NoDead73.2
20YesAlive38.3
21NoAlive33.4
22YesDead62.3
23NoAlive18.0
24NoAlive56.2
25YesAlive59.2
26NoAlive25.8
27NoDead36.9
28NoAlive20.2
29YesAlive34.6
............
1283YesDead36.0
1284YesAlive48.3
1285NoAlive63.1
1286NoAlive60.8
1287YesDead39.3
1288NoAlive36.7
1289NoAlive63.8
1290NoDead71.3
1291NoAlive57.7
1292NoAlive63.2
1293NoAlive46.6
1294YesDead82.4
1295YesAlive38.3
1296YesAlive32.7
1297NoAlive39.7
1298YesDead60.0
1299NoDead71.0
1300NoAlive20.5
1301NoAlive44.4
1302YesAlive31.2
1303YesAlive47.8
1304YesAlive60.9
1305NoDead61.4
1306YesAlive43.0
1307NoAlive42.1
1308YesAlive35.9
1309NoAlive22.3
1310YesDead62.1
1311NoDead88.6
1312NoAlive39.1
\n", + "

1313 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " Yes Alive 21\n", + "0 Yes Alive 19.3\n", + "1 No Dead 57.5\n", + "2 No Alive 47.1\n", + "3 Yes Alive 81.4\n", + "4 No Alive 36.8\n", + "5 No Alive 23.8\n", + "6 Yes Dead 57.5\n", + "7 Yes Alive 24.8\n", + "8 Yes Alive 49.5\n", + "9 Yes Alive 30.0\n", + "10 No Dead 66.0\n", + "11 Yes Alive 49.2\n", + "12 No Alive 58.4\n", + "13 No Dead 60.6\n", + "14 No Alive 25.1\n", + "15 No Alive 43.5\n", + "16 No Alive 27.1\n", + "17 No Alive 58.3\n", + "18 Yes Alive 65.7\n", + "19 No Dead 73.2\n", + "20 Yes Alive 38.3\n", + "21 No Alive 33.4\n", + "22 Yes Dead 62.3\n", + "23 No Alive 18.0\n", + "24 No Alive 56.2\n", + "25 Yes Alive 59.2\n", + "26 No Alive 25.8\n", + "27 No Dead 36.9\n", + "28 No Alive 20.2\n", + "29 Yes Alive 34.6\n", + "... ... ... ...\n", + "1283 Yes Dead 36.0\n", + "1284 Yes Alive 48.3\n", + "1285 No Alive 63.1\n", + "1286 No Alive 60.8\n", + "1287 Yes Dead 39.3\n", + "1288 No Alive 36.7\n", + "1289 No Alive 63.8\n", + "1290 No Dead 71.3\n", + "1291 No Alive 57.7\n", + "1292 No Alive 63.2\n", + "1293 No Alive 46.6\n", + "1294 Yes Dead 82.4\n", + "1295 Yes Alive 38.3\n", + "1296 Yes Alive 32.7\n", + "1297 No Alive 39.7\n", + "1298 Yes Dead 60.0\n", + "1299 No Dead 71.0\n", + "1300 No Alive 20.5\n", + "1301 No Alive 44.4\n", + "1302 Yes Alive 31.2\n", + "1303 Yes Alive 47.8\n", + "1304 Yes Alive 60.9\n", + "1305 No Dead 61.4\n", + "1306 Yes Alive 43.0\n", + "1307 No Alive 42.1\n", + "1308 Yes Alive 35.9\n", + "1309 No Alive 22.3\n", + "1310 Yes Dead 62.1\n", + "1311 No Dead 88.6\n", + "1312 No Alive 39.1\n", + "\n", + "[1313 rows x 3 columns]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "raw_data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# vérifions s'il y a des données manquantes" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 False\n", + "1 False\n", + "2 False\n", + "3 False\n", + "4 False\n", + "5 False\n", + "6 False\n", + "7 False\n", + "8 False\n", + "9 False\n", + "10 False\n", + "11 False\n", + "12 False\n", + "13 False\n", + "14 False\n", + "15 False\n", + "16 False\n", + "17 False\n", + "18 False\n", + "19 False\n", + "20 False\n", + "21 False\n", + "22 False\n", + "23 False\n", + "24 False\n", + "25 False\n", + "26 False\n", + "27 False\n", + "28 False\n", + "29 False\n", + " ... \n", + "1283 False\n", + "1284 False\n", + "1285 False\n", + "1286 False\n", + "1287 False\n", + "1288 False\n", + "1289 False\n", + "1290 False\n", + "1291 False\n", + "1292 False\n", + "1293 False\n", + "1294 False\n", + "1295 False\n", + "1296 False\n", + "1297 False\n", + "1298 False\n", + "1299 False\n", + "1300 False\n", + "1301 False\n", + "1302 False\n", + "1303 False\n", + "1304 False\n", + "1305 False\n", + "1306 False\n", + "1307 False\n", + "1308 False\n", + "1309 False\n", + "1310 False\n", + "1311 False\n", + "1312 False\n", + "Length: 1313, dtype: bool" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "raw_data.isnull().any(axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
YesAlive21
\n", + "
" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: [Yes, Alive, 21]\n", + "Index: []" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "raw_data[raw_data.isnull().any(axis=1)]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hideOutput": true + }, + "source": [ + "# résultats : aucune données manquantes" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], "metadata": { "kernelspec": { "display_name": "Python 3", @@ -16,10 +678,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.3" + "version": "3.6.4" } }, "nbformat": 4, "nbformat_minor": 2 } -