diff --git a/module4/src_Python3_challenger.ipynb b/module4/src_Python3_challenger.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..f04b6ee938ae0e3e130d550fe73c02318fe9f2bb
--- /dev/null
+++ b/module4/src_Python3_challenger.ipynb
@@ -0,0 +1,741 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "3.7.4 (default, Aug 9 2019, 18:34:13) [MSC v.1915 64 bit (AMD64)]\n",
+ "uname_result(system='Windows', node='LAPTOP-TL7KUKIV', release='10', version='10.0.18362', machine='AMD64', processor='Intel64 Family 6 Model 158 Stepping 10, GenuineIntel')\n",
+ "IPython 7.8.0\n",
+ "IPython.core.release 7.8.0\n",
+ "PIL 6.2.0\n",
+ "PIL.Image 6.2.0\n",
+ "PIL._version 6.2.0\n",
+ "_csv 1.0\n",
+ "_ctypes 1.1.0\n",
+ "decimal 1.70\n",
+ "argparse 1.1\n",
+ "backcall 0.1.0\n",
+ "bottleneck 1.2.1\n",
+ "bottleneck.version 1.2.1\n",
+ "cffi 1.12.3\n",
+ "colorama 0.4.1\n",
+ "csv 1.0\n",
+ "ctypes 1.1.0\n",
+ "cycler 0.10.0\n",
+ "dateutil 2.8.0\n",
+ "decimal 1.70\n",
+ "decorator 4.4.0\n",
+ "distutils 3.7.4\n",
+ "ipykernel 5.1.2\n",
+ "ipykernel._version 5.1.2\n",
+ "ipython_genutils 0.2.0\n",
+ "ipython_genutils._version 0.2.0\n",
+ "ipywidgets 7.5.1\n",
+ "ipywidgets._version 7.5.1\n",
+ "jedi 0.15.1\n",
+ "joblib 0.13.2\n",
+ "joblib.externals.cloudpickle 0.8.0\n",
+ "joblib.externals.loky 2.4.2\n",
+ "json 2.0.9\n",
+ "jupyter_client 5.3.3\n",
+ "jupyter_client._version 5.3.3\n",
+ "jupyter_core 4.5.0\n",
+ "jupyter_core.version 4.5.0\n",
+ "kiwisolver 1.1.0\n",
+ "logging 0.5.1.2\n",
+ "matplotlib 3.1.1\n",
+ "matplotlib.backends.backend_agg 3.1.1\n",
+ "numpy 1.19.0\n",
+ "numpy.core 1.19.0\n",
+ "numpy.core._multiarray_umath 3.1\n",
+ "numpy.lib 1.19.0\n",
+ "numpy.linalg._umath_linalg b'0.1.5'\n",
+ "pandas 0.25.1\n",
+ "_libjson 1.33\n",
+ "parso 0.5.1\n",
+ "patsy 0.5.1\n",
+ "patsy.version 0.5.1\n",
+ "pickleshare 0.7.5\n",
+ "platform 1.0.8\n",
+ "prompt_toolkit 2.0.10\n",
+ "psutil 5.6.3\n",
+ "pygments 2.4.2\n",
+ "pyparsing 2.4.2\n",
+ "pytz 2019.3\n",
+ "re 2.2.1\n",
+ "scipy 1.5.0\n",
+ "scipy._lib._uarray 0.5.1+49.g4c3f1d7.scipy\n",
+ "scipy._lib.decorator 4.0.5\n",
+ "scipy.integrate._dop b'$Revision: $'\n",
+ "scipy.integrate._ode $Id$\n",
+ "scipy.integrate._odepack 1.9 \n",
+ "scipy.integrate._quadpack 1.13 \n",
+ "scipy.integrate.lsoda b'$Revision: $'\n",
+ "scipy.integrate.vode b'$Revision: $'\n",
+ "scipy.interpolate._fitpack 1.7 \n",
+ "scipy.interpolate.dfitpack b'$Revision: $'\n",
+ "scipy.linalg._fblas b'$Revision: $'\n",
+ "scipy.linalg._flapack b'$Revision: $'\n",
+ "scipy.linalg._flinalg b'$Revision: $'\n",
+ "scipy.ndimage 2.0\n",
+ "scipy.optimize.__nnls b'$Revision: $'\n",
+ "scipy.optimize._cobyla b'$Revision: $'\n",
+ "scipy.optimize._lbfgsb b'$Revision: $'\n",
+ "scipy.optimize._minpack 1.10 \n",
+ "scipy.optimize._slsqp b'$Revision: $'\n",
+ "scipy.optimize.minpack2 b'$Revision: $'\n",
+ "scipy.signal.spline 0.2\n",
+ "scipy.sparse.linalg.eigen.arpack._arpack b'$Revision: $'\n",
+ "scipy.sparse.linalg.isolve._iterative b'$Revision: $'\n",
+ "scipy.special.specfun b'$Revision: $'\n",
+ "scipy.stats.mvn b'$Revision: $'\n",
+ "scipy.stats.statlib b'$Revision: $'\n",
+ "seaborn 0.9.0\n",
+ "seaborn.external.husl 2.1.0\n",
+ "seaborn.external.six 1.10.0\n",
+ "six 1.12.0\n",
+ "statsmodels 0.10.1\n",
+ "statsmodels.__init__ 0.10.1\n",
+ "statsmodels.api 0.10.1\n",
+ "statsmodels.tools.web 0.10.1\n",
+ "traitlets 4.3.3\n",
+ "traitlets._version 4.3.3\n",
+ "urllib.request 3.7\n",
+ "zlib 1.0\n",
+ "zmq 18.1.0\n",
+ "zmq.sugar 18.1.0\n",
+ "zmq.sugar.version 18.1.0\n"
+ ]
+ }
+ ],
+ "source": [
+ "def print_imported_modules():\n",
+ " import sys\n",
+ " for name, val in sorted(sys.modules.items()):\n",
+ " if(hasattr(val, '__version__')): \n",
+ " print(val.__name__, val.__version__)\n",
+ "# else:\n",
+ "# print(val.__name__, \"(unknown version)\")\n",
+ "def print_sys_info():\n",
+ " import sys\n",
+ " import platform\n",
+ " print(sys.version)\n",
+ " print(platform.uname())\n",
+ "\n",
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "import matplotlib.pyplot as plt\n",
+ "import statsmodels.api as sm\n",
+ "import seaborn as sns\n",
+ "\n",
+ "print_sys_info()\n",
+ "print_imported_modules()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Loading and inspecting data"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Let's start by reading data."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {
+ "scrolled": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Date | \n",
+ " Count | \n",
+ " Temperature | \n",
+ " Pressure | \n",
+ " Malfunction | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 4/12/81 | \n",
+ " 6 | \n",
+ " 66 | \n",
+ " 50 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 11/12/81 | \n",
+ " 6 | \n",
+ " 70 | \n",
+ " 50 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 3/22/82 | \n",
+ " 6 | \n",
+ " 69 | \n",
+ " 50 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 11/11/82 | \n",
+ " 6 | \n",
+ " 68 | \n",
+ " 50 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 4/04/83 | \n",
+ " 6 | \n",
+ " 67 | \n",
+ " 50 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " 6/18/82 | \n",
+ " 6 | \n",
+ " 72 | \n",
+ " 50 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " 8/30/83 | \n",
+ " 6 | \n",
+ " 73 | \n",
+ " 100 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " 11/28/83 | \n",
+ " 6 | \n",
+ " 70 | \n",
+ " 100 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " 2/03/84 | \n",
+ " 6 | \n",
+ " 57 | \n",
+ " 200 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " 4/06/84 | \n",
+ " 6 | \n",
+ " 63 | \n",
+ " 200 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " 8/30/84 | \n",
+ " 6 | \n",
+ " 70 | \n",
+ " 200 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 11 | \n",
+ " 10/05/84 | \n",
+ " 6 | \n",
+ " 78 | \n",
+ " 200 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 12 | \n",
+ " 11/08/84 | \n",
+ " 6 | \n",
+ " 67 | \n",
+ " 200 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 13 | \n",
+ " 1/24/85 | \n",
+ " 6 | \n",
+ " 53 | \n",
+ " 200 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 14 | \n",
+ " 4/12/85 | \n",
+ " 6 | \n",
+ " 67 | \n",
+ " 200 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 15 | \n",
+ " 4/29/85 | \n",
+ " 6 | \n",
+ " 75 | \n",
+ " 200 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 16 | \n",
+ " 6/17/85 | \n",
+ " 6 | \n",
+ " 70 | \n",
+ " 200 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 17 | \n",
+ " 7/2903/85 | \n",
+ " 6 | \n",
+ " 81 | \n",
+ " 200 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 18 | \n",
+ " 8/27/85 | \n",
+ " 6 | \n",
+ " 76 | \n",
+ " 200 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 19 | \n",
+ " 10/03/85 | \n",
+ " 6 | \n",
+ " 79 | \n",
+ " 200 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 20 | \n",
+ " 10/30/85 | \n",
+ " 6 | \n",
+ " 75 | \n",
+ " 200 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 21 | \n",
+ " 11/26/85 | \n",
+ " 6 | \n",
+ " 76 | \n",
+ " 200 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 22 | \n",
+ " 1/12/86 | \n",
+ " 6 | \n",
+ " 58 | \n",
+ " 200 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Date Count Temperature Pressure Malfunction\n",
+ "0 4/12/81 6 66 50 0\n",
+ "1 11/12/81 6 70 50 1\n",
+ "2 3/22/82 6 69 50 0\n",
+ "3 11/11/82 6 68 50 0\n",
+ "4 4/04/83 6 67 50 0\n",
+ "5 6/18/82 6 72 50 0\n",
+ "6 8/30/83 6 73 100 0\n",
+ "7 11/28/83 6 70 100 0\n",
+ "8 2/03/84 6 57 200 1\n",
+ "9 4/06/84 6 63 200 1\n",
+ "10 8/30/84 6 70 200 1\n",
+ "11 10/05/84 6 78 200 0\n",
+ "12 11/08/84 6 67 200 0\n",
+ "13 1/24/85 6 53 200 2\n",
+ "14 4/12/85 6 67 200 0\n",
+ "15 4/29/85 6 75 200 0\n",
+ "16 6/17/85 6 70 200 0\n",
+ "17 7/2903/85 6 81 200 0\n",
+ "18 8/27/85 6 76 200 0\n",
+ "19 10/03/85 6 79 200 0\n",
+ "20 10/30/85 6 75 200 2\n",
+ "21 11/26/85 6 76 200 0\n",
+ "22 1/12/86 6 58 200 1"
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data = pd.read_csv(\"C:/Users/hugoc/Downloads/shuttle.csv\", delimiter=',')\n",
+ "data"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We know from our previous experience on this data set that filtering data is a really bad idea. We will therefore process it as such."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "image/png": "\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ " %matplotlib inline\n",
+ "pd.set_option('mode.chained_assignment',None) # this removes a useless warning from pandas\n",
+ "import matplotlib.pyplot as plt\n",
+ "\n",
+ "data[\"Frequency\"]=data.Malfunction/data.Count\n",
+ "data.plot(x=\"Temperature\",y=\"Frequency\",kind=\"scatter\",ylim=[0,1])\n",
+ "plt.grid(True)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Logistic regression"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Let's assume O-rings independently fail with the same probability which solely depends on temperature. A logistic regression should allow us to estimate the influence of temperature."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "C:\\Users\\hugoc\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:7: DeprecationWarning: Calling Family(..) with a link class as argument is deprecated.\n",
+ "Use an instance of a link class instead.\n",
+ " import sys\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "Generalized Linear Model Regression Results\n",
+ "\n",
+ " Dep. Variable: | Frequency | No. Observations: | 23 | \n",
+ "
\n",
+ "\n",
+ " Model: | GLM | Df Residuals: | 21 | \n",
+ "
\n",
+ "\n",
+ " Model Family: | Binomial | Df Model: | 1 | \n",
+ "
\n",
+ "\n",
+ " Link Function: | logit | Scale: | 1.0000 | \n",
+ "
\n",
+ "\n",
+ " Method: | IRLS | Log-Likelihood: | -3.9210 | \n",
+ "
\n",
+ "\n",
+ " Date: | Wed, 12 Aug 2020 | Deviance: | 3.0144 | \n",
+ "
\n",
+ "\n",
+ " Time: | 16:46:29 | Pearson chi2: | 5.00 | \n",
+ "
\n",
+ "\n",
+ " No. Iterations: | 6 | | | \n",
+ "
\n",
+ "\n",
+ " Covariance Type: | nonrobust | | | \n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ " | coef | std err | z | P>|z| | [0.025 | 0.975] | \n",
+ "
\n",
+ "\n",
+ " Intercept | 5.0850 | 7.477 | 0.680 | 0.496 | -9.570 | 19.740 | \n",
+ "
\n",
+ "\n",
+ " Temperature | -0.1156 | 0.115 | -1.004 | 0.316 | -0.341 | 0.110 | \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ "\n",
+ "\"\"\"\n",
+ " Generalized Linear Model Regression Results \n",
+ "==============================================================================\n",
+ "Dep. Variable: Frequency No. Observations: 23\n",
+ "Model: GLM Df Residuals: 21\n",
+ "Model Family: Binomial Df Model: 1\n",
+ "Link Function: logit Scale: 1.0000\n",
+ "Method: IRLS Log-Likelihood: -3.9210\n",
+ "Date: Wed, 12 Aug 2020 Deviance: 3.0144\n",
+ "Time: 16:46:29 Pearson chi2: 5.00\n",
+ "No. Iterations: 6 \n",
+ "Covariance Type: nonrobust \n",
+ "===============================================================================\n",
+ " coef std err z P>|z| [0.025 0.975]\n",
+ "-------------------------------------------------------------------------------\n",
+ "Intercept 5.0850 7.477 0.680 0.496 -9.570 19.740\n",
+ "Temperature -0.1156 0.115 -1.004 0.316 -0.341 0.110\n",
+ "===============================================================================\n",
+ "\"\"\""
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import statsmodels.api as sm\n",
+ "\n",
+ "data[\"Success\"]=data.Count-data.Malfunction\n",
+ "data[\"Intercept\"]=1\n",
+ "\n",
+ "logmodel=sm.GLM(data['Frequency'], data[['Intercept','Temperature']], \n",
+ " family=sm.families.Binomial(sm.families.links.logit)).fit()\n",
+ "\n",
+ "logmodel.summary()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "C:\\Users\\hugoc\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:2: DeprecationWarning: Calling Family(..) with a link class as argument is deprecated.\n",
+ "Use an instance of a link class instead.\n",
+ " \n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "Generalized Linear Model Regression Results\n",
+ "\n",
+ " Dep. Variable: | Frequency | No. Observations: | 23 | \n",
+ "
\n",
+ "\n",
+ " Model: | GLM | Df Residuals: | 21 | \n",
+ "
\n",
+ "\n",
+ " Model Family: | Binomial | Df Model: | 1 | \n",
+ "
\n",
+ "\n",
+ " Link Function: | logit | Scale: | 1.0000 | \n",
+ "
\n",
+ "\n",
+ " Method: | IRLS | Log-Likelihood: | -23.526 | \n",
+ "
\n",
+ "\n",
+ " Date: | Wed, 12 Aug 2020 | Deviance: | 18.086 | \n",
+ "
\n",
+ "\n",
+ " Time: | 16:46:29 | Pearson chi2: | 30.0 | \n",
+ "
\n",
+ "\n",
+ " No. Iterations: | 6 | | | \n",
+ "
\n",
+ "\n",
+ " Covariance Type: | nonrobust | | | \n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ " | coef | std err | z | P>|z| | [0.025 | 0.975] | \n",
+ "
\n",
+ "\n",
+ " Intercept | 5.0850 | 3.052 | 1.666 | 0.096 | -0.898 | 11.068 | \n",
+ "
\n",
+ "\n",
+ " Temperature | -0.1156 | 0.047 | -2.458 | 0.014 | -0.208 | -0.023 | \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ "\n",
+ "\"\"\"\n",
+ " Generalized Linear Model Regression Results \n",
+ "==============================================================================\n",
+ "Dep. Variable: Frequency No. Observations: 23\n",
+ "Model: GLM Df Residuals: 21\n",
+ "Model Family: Binomial Df Model: 1\n",
+ "Link Function: logit Scale: 1.0000\n",
+ "Method: IRLS Log-Likelihood: -23.526\n",
+ "Date: Wed, 12 Aug 2020 Deviance: 18.086\n",
+ "Time: 16:46:29 Pearson chi2: 30.0\n",
+ "No. Iterations: 6 \n",
+ "Covariance Type: nonrobust \n",
+ "===============================================================================\n",
+ " coef std err z P>|z| [0.025 0.975]\n",
+ "-------------------------------------------------------------------------------\n",
+ "Intercept 5.0850 3.052 1.666 0.096 -0.898 11.068\n",
+ "Temperature -0.1156 0.047 -2.458 0.014 -0.208 -0.023\n",
+ "===============================================================================\n",
+ "\"\"\""
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ " logmodel=sm.GLM(data['Frequency'], data[['Intercept','Temperature']], \n",
+ " family=sm.families.Binomial(sm.families.links.logit),\n",
+ " var_weights=data['Count']).fit()\n",
+ "\n",
+ "logmodel.summary()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ " %matplotlib inline\n",
+ "data_pred = pd.DataFrame({'Temperature': np.linspace(start=30, stop=90, num=121), 'Intercept': 1})\n",
+ "data_pred['Frequency'] = logmodel.predict(data_pred)\n",
+ "data_pred.plot(x=\"Temperature\",y=\"Frequency\",kind=\"line\",ylim=[0,1])\n",
+ "plt.scatter(x=data[\"Temperature\"],y=data[\"Frequency\"])\n",
+ "plt.grid(True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "sns.set(color_codes=True)\n",
+ "plt.xlim(30,90)\n",
+ "plt.ylim(0,1)\n",
+ "sns.regplot(x='Temperature', y='Frequency', data=data, logistic=True)\n",
+ "plt.show()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.7.4"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}