diff --git a/module3/exo2/exercice.ipynb b/module3/exo2/exercice.ipynb index 791b4496acc6f02bc5083e6ef61165848561cdd6..2258e603a6e8bf3080e3604bde934efdf007aa4e 100644 --- a/module3/exo2/exercice.ipynb +++ b/module3/exo2/exercice.ipynb @@ -1530,7 +1530,20 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.4" + "version": "3.7.6" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": false } }, "nbformat": 4, diff --git a/module4/challenger.ipynb b/module4/challenger.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..8ce3c9d37ea640101486e2f776b953dcc1b370ea --- /dev/null +++ b/module4/challenger.ipynb @@ -0,0 +1,664 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Risk Analysis of the Space Shuttle: Pre-Challenger Prediction of Failure" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this document we reperform some of the analysis provided in \n", + "*Risk Analysis of the Space Shuttle: Pre-Challenger Prediction of Failure* by *Siddhartha R. Dalal, Edward B. Fowlkes, Bruce Hoadley* published in *Journal of the American Statistical Association*, Vol. 84, No. 408 (Dec., 1989), pp. 945-957 and available at http://www.jstor.org/stable/2290069. \n", + "\n", + "On the fourth page of this article, they indicate that the maximum likelihood estimates of the logistic regression using only temperature are: $\\hat{\\alpha}=5.085$ and $\\hat{\\beta}=-0.1156$ and their asymptotic standard errors are $s_{\\hat{\\alpha}}=3.052$ and $s_{\\hat{\\beta}}=0.047$. The Goodness of fit indicated for this model was $G^2=18.086$ with 21 degrees of freedom. Our goal is to reproduce the computation behind these values and the Figure 4 of this article, possibly in a nicer looking way." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Technical information on the computer on which the analysis is run" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We will be using the python3 language using the pandas, statsmodels, numpy, matplotlib and seaborn libraries." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "#!pip install statsmodels \n", + "#!pip install seaborn" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "3.7.6 (default, Jan 8 2020, 19:59:22) \n", + "[GCC 7.3.0]\n", + "uname_result(system='Linux', node='H2-SCHMIDER', release='5.3.0-46-generic', version='#38~18.04.1-Ubuntu SMP Tue Mar 31 04:17:56 UTC 2020', machine='x86_64', processor='x86_64')\n", + "IPython 7.13.0\n", + "IPython.core.release 7.13.0\n", + "html 6.0.3\n", + "_csv 1.0\n", + "_ctypes 1.1.0\n", + "_curses b'2.2'\n", + "decimal 1.70\n", + "argparse 1.1\n", + "backcall 0.1.0\n", + "csv 1.0\n", + "ctypes 1.1.0\n", + "cycler 0.10.0\n", + "dateutil 2.8.1\n", + "decimal 1.70\n", + "decorator 4.4.2\n", + "distutils 3.7.6\n", + "ipykernel 5.1.4\n", + "ipykernel._version 5.1.4\n", + "ipython_genutils 0.2.0\n", + "ipython_genutils._version 0.2.0\n", + "jedi 0.16.0\n", + "json 2.0.9\n", + "jupyter_client 6.1.2\n", + "jupyter_client._version 6.1.2\n", + "jupyter_core 4.6.3\n", + "jupyter_core.version 4.6.3\n", + "kiwisolver 1.2.0\n", + "logging 0.5.1.2\n", + "matplotlib 3.2.1\n", + "matplotlib.backends.backend_agg 3.2.1\n", + "numpy 1.18.2\n", + "numpy.core 1.18.2\n", + "numpy.core._multiarray_umath 3.1\n", + "numpy.lib 1.18.2\n", + "numpy.linalg._umath_linalg b'0.1.5'\n", + "pandas 1.0.3\n", + "parso 0.6.2\n", + "patsy 0.5.1\n", + "patsy.version 0.5.1\n", + "pexpect 4.8.0\n", + "pickleshare 0.7.5\n", + "platform 1.0.8\n", + "prompt_toolkit 3.0.4\n", + "ptyprocess 0.6.0\n", + "pygments 2.6.1\n", + "pyparsing 2.4.6\n", + "pytz 2019.3\n", + "re 2.2.1\n", + "scipy 1.4.1\n", + "scipy._lib._uarray 0.5.1+5.ga864a57.scipy\n", + "scipy._lib.decorator 4.0.5\n", + "scipy._lib.six 1.2.0\n", + "scipy.integrate._dop b'$Revision: $'\n", + "scipy.integrate._ode $Id$\n", + "scipy.integrate._odepack 1.9 \n", + "scipy.integrate._quadpack 1.13 \n", + "scipy.integrate.lsoda b'$Revision: $'\n", + "scipy.integrate.vode b'$Revision: $'\n", + "scipy.interpolate._fitpack 1.7 \n", + "scipy.interpolate.dfitpack b'$Revision: $'\n", + "scipy.linalg 0.4.9\n", + "scipy.linalg._fblas b'$Revision: $'\n", + "scipy.linalg._flapack b'$Revision: $'\n", + "scipy.linalg._flinalg b'$Revision: $'\n", + "scipy.ndimage 2.0\n", + "scipy.optimize._cobyla b'$Revision: $'\n", + "scipy.optimize._lbfgsb b'$Revision: $'\n", + "scipy.optimize._minpack 1.10 \n", + "scipy.optimize._nnls b'$Revision: $'\n", + "scipy.optimize._slsqp b'$Revision: $'\n", + "scipy.optimize.minpack2 b'$Revision: $'\n", + "scipy.signal.spline 0.2\n", + "scipy.sparse.linalg.eigen.arpack._arpack b'$Revision: $'\n", + "scipy.sparse.linalg.isolve._iterative b'$Revision: $'\n", + "scipy.special.specfun b'$Revision: $'\n", + "scipy.stats.mvn b'$Revision: $'\n", + "scipy.stats.statlib b'$Revision: $'\n", + "seaborn 0.10.0\n", + "seaborn.external.husl 2.1.0\n", + "six 1.14.0\n", + "statsmodels 0.11.1\n", + "statsmodels.__init__ 0.11.1\n", + "statsmodels.api 0.11.1\n", + "statsmodels.tools.web 0.11.1\n", + "traitlets 4.3.3\n", + "traitlets._version 4.3.3\n", + "urllib.request 3.7\n", + "zlib 1.0\n", + "zmq 18.1.1\n", + "zmq.sugar 18.1.1\n", + "zmq.sugar.version 18.1.1\n" + ] + } + ], + "source": [ + "def print_imported_modules():\n", + " import sys\n", + " for name, val in sorted(sys.modules.items()):\n", + " if(hasattr(val, '__version__')): \n", + " print(val.__name__, val.__version__)\n", + "# else:\n", + "# print(val.__name__, \"(unknown version)\")\n", + "def print_sys_info():\n", + " import sys\n", + " import platform\n", + " print(sys.version)\n", + " print(platform.uname())\n", + "\n", + "import numpy as np\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import statsmodels.api as sm\n", + "import seaborn as sns\n", + "\n", + "print_sys_info()\n", + "print_imported_modules()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Loading and inspecting data\n", + "Let's start by reading data." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'/home/aschmide/Documents/formation_MOOC_RR/mooc-rr/module4'" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pwd" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "file downloaded on Sun Apr 12 17:22:23 2020\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + " | Date | \n", + "Count | \n", + "Temperature | \n", + "Pressure | \n", + "Malfunction | \n", + "
---|---|---|---|---|---|
0 | \n", + "4/12/81 | \n", + "6 | \n", + "66 | \n", + "50 | \n", + "0 | \n", + "
1 | \n", + "11/12/81 | \n", + "6 | \n", + "70 | \n", + "50 | \n", + "1 | \n", + "
2 | \n", + "3/22/82 | \n", + "6 | \n", + "69 | \n", + "50 | \n", + "0 | \n", + "
3 | \n", + "11/11/82 | \n", + "6 | \n", + "68 | \n", + "50 | \n", + "0 | \n", + "
4 | \n", + "4/04/83 | \n", + "6 | \n", + "67 | \n", + "50 | \n", + "0 | \n", + "
5 | \n", + "6/18/82 | \n", + "6 | \n", + "72 | \n", + "50 | \n", + "0 | \n", + "
6 | \n", + "8/30/83 | \n", + "6 | \n", + "73 | \n", + "100 | \n", + "0 | \n", + "
7 | \n", + "11/28/83 | \n", + "6 | \n", + "70 | \n", + "100 | \n", + "0 | \n", + "
8 | \n", + "2/03/84 | \n", + "6 | \n", + "57 | \n", + "200 | \n", + "1 | \n", + "
9 | \n", + "4/06/84 | \n", + "6 | \n", + "63 | \n", + "200 | \n", + "1 | \n", + "
10 | \n", + "8/30/84 | \n", + "6 | \n", + "70 | \n", + "200 | \n", + "1 | \n", + "
11 | \n", + "10/05/84 | \n", + "6 | \n", + "78 | \n", + "200 | \n", + "0 | \n", + "
12 | \n", + "11/08/84 | \n", + "6 | \n", + "67 | \n", + "200 | \n", + "0 | \n", + "
13 | \n", + "1/24/85 | \n", + "6 | \n", + "53 | \n", + "200 | \n", + "2 | \n", + "
14 | \n", + "4/12/85 | \n", + "6 | \n", + "67 | \n", + "200 | \n", + "0 | \n", + "
15 | \n", + "4/29/85 | \n", + "6 | \n", + "75 | \n", + "200 | \n", + "0 | \n", + "
16 | \n", + "6/17/85 | \n", + "6 | \n", + "70 | \n", + "200 | \n", + "0 | \n", + "
17 | \n", + "7/2903/85 | \n", + "6 | \n", + "81 | \n", + "200 | \n", + "0 | \n", + "
18 | \n", + "8/27/85 | \n", + "6 | \n", + "76 | \n", + "200 | \n", + "0 | \n", + "
19 | \n", + "10/03/85 | \n", + "6 | \n", + "79 | \n", + "200 | \n", + "0 | \n", + "
20 | \n", + "10/30/85 | \n", + "6 | \n", + "75 | \n", + "200 | \n", + "2 | \n", + "
21 | \n", + "11/26/85 | \n", + "6 | \n", + "76 | \n", + "200 | \n", + "0 | \n", + "
22 | \n", + "1/12/86 | \n", + "6 | \n", + "58 | \n", + "200 | \n", + "1 | \n", + "