From 07e5ee3bf02f7a7e94b03a79876508e14914f71f Mon Sep 17 00:00:00 2001 From: Cyril Date: Mon, 15 Mar 2021 14:29:30 +0100 Subject: [PATCH] repro --- module4/install.sh | 5 + module4/repro.ipynb | 801 +++++++++++++++++++++++++++++++++++++++ module4/requirements.txt | 67 ++++ 3 files changed, 873 insertions(+) create mode 100755 module4/install.sh create mode 100644 module4/repro.ipynb create mode 100644 module4/requirements.txt diff --git a/module4/install.sh b/module4/install.sh new file mode 100755 index 0000000..4225642 --- /dev/null +++ b/module4/install.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +python3.9 -m venv venv +source venv/bin/activate +pip install -r requirements.txt diff --git a/module4/repro.ipynb b/module4/repro.ipynb new file mode 100644 index 0000000..83b8e6b --- /dev/null +++ b/module4/repro.ipynb @@ -0,0 +1,801 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "considerable-stanford", + "metadata": {}, + "source": [ + "# Risk Analysis of the Space Shuttle: Pre-Challenger Prediction of Failure\n", + "(copy with some modifications from https://app-learninglab.inria.fr/moocrr/gitlab/moocrr-session3/moocrr-reproducibility-study/blob/master/src_Python3_challenger__1_.ipynb graphs doesnt give the same output)\n", + "\n", + "---\n", + "\n", + "In this document we reperform some of the analysis provided in *Risk Analysis of the Space Shuttle: Pre-Challenger Prediction of Failure* by *Siddhartha R. Dalal, Edward B. Fowlkes, Bruce Hoadley* published in *Journal of the American Statistical Association*, Vol. 84, No. 408 (Dec., 1989), pp. 945-957 and available at \n", + "http://www.jstor.org/stable/2290069.\n", + "\n", + "On the fourth page of this article, they indicate that the maximum likelihood estimates of the logistic regression using only temperature are: $\\\\hat{\\\\alpha}=5.085$ and $\\\\hat{\\\\beta}=-0.1156$ and their asymptotic standard errors are $s_{\\\\hat{\\\\alpha}}=3.052$ and $s_{\\\\hat{\\\\beta}}=0.047$. The Goodness of fit indicated for this model was $G^2=18.086$ with 21 degrees of freedom. \n", + "\n", + "Our goal is to reproduce the computation behind these values and the Figure 4 of this article,\n", + "possibly in a nicer looking way.\n", + "\n", + "## Technical information on the computer on which the analysis is run\n", + "\n", + "We will be using the python3.9 language using the pandas, statsmodels, numpy, matplotlib and seaborn libraries." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "suffering-night", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "3.9.0+ (default, Oct 19 2020, 09:51:18) \n", + "[GCC 10.2.0]\n", + "uname_result(system='Linux', node='dell', release='5.8.0-44-generic', version='#50-Ubuntu SMP Tue Feb 9 06:29:41 UTC 2021', machine='x86_64')\n", + "IPython 7.21.0\n", + "IPython.core.release 7.21.0\n", + "PIL 8.1.2\n", + "PIL.Image 8.1.2\n", + "PIL._version 8.1.2\n", + "_csv 1.0\n", + "_ctypes 1.1.0\n", + "_curses b'2.2'\n", + "decimal 1.70\n", + "argparse 1.1\n", + "backcall 0.2.0\n", + "cffi 1.14.5\n", + "csv 1.0\n", + "ctypes 1.1.0\n", + "cycler 0.10.0\n", + "dateutil 2.8.1\n", + "decimal 1.70\n", + "decorator 4.4.2\n", + "distutils 3.9.0+\n", + "ipykernel 5.5.0\n", + "ipykernel._version 5.5.0\n", + "ipython_genutils 0.2.0\n", + "ipython_genutils._version 0.2.0\n", + "ipywidgets 7.6.3\n", + "ipywidgets._version 7.6.3\n", + "jedi 0.18.0\n", + "json 2.0.9\n", + "jupyter_client 6.1.11\n", + "jupyter_client._version 6.1.11\n", + "jupyter_core 4.7.1\n", + "jupyter_core.version 4.7.1\n", + "kiwisolver 1.3.1\n", + "logging 0.5.1.2\n", + "matplotlib 3.3.4\n", + "numpy 1.20.1\n", + "numpy.core 1.20.1\n", + "numpy.core._multiarray_umath 3.1\n", + "numpy.lib 1.20.1\n", + "numpy.linalg._umath_linalg 0.1.5\n", + "pandas 1.2.3\n", + "pandas.compat.numpy.function 1.20.1\n", + "parso 0.8.1\n", + "patsy 0.5.1\n", + "patsy.version 0.5.1\n", + "pexpect 4.8.0\n", + "pickleshare 0.7.5\n", + "platform 1.0.8\n", + "prompt_toolkit 3.0.17\n", + "ptyprocess 0.7.0\n", + "pygments 2.8.1\n", + "pyparsing 2.4.7\n", + "pytz 2021.1\n", + "re 2.2.1\n", + "scipy 1.6.1\n", + "scipy._lib._uarray 0.5.1+49.g4c3f1d7.scipy\n", + "scipy._lib.decorator 4.0.5\n", + "scipy.integrate._dop b'$Revision: $'\n", + "scipy.integrate._ode $Id$\n", + "scipy.integrate._odepack 1.9 \n", + "scipy.integrate._quadpack 1.13 \n", + "scipy.integrate.lsoda b'$Revision: $'\n", + "scipy.integrate.vode b'$Revision: $'\n", + "scipy.interpolate._fitpack 1.7 \n", + "scipy.interpolate.dfitpack b'$Revision: $'\n", + "scipy.linalg._fblas b'$Revision: $'\n", + "scipy.linalg._flapack b'$Revision: $'\n", + "scipy.linalg._flinalg b'$Revision: $'\n", + "scipy.linalg._interpolative b'$Revision: $'\n", + "scipy.ndimage 2.0\n", + "scipy.optimize.__nnls b'$Revision: $'\n", + "scipy.optimize._cobyla b'$Revision: $'\n", + "scipy.optimize._lbfgsb b'$Revision: $'\n", + "scipy.optimize._minpack 1.10 \n", + "scipy.optimize._slsqp b'$Revision: $'\n", + "scipy.optimize.minpack2 b'$Revision: $'\n", + "scipy.signal.spline 0.2\n", + "scipy.sparse.linalg.eigen.arpack._arpack b'$Revision: $'\n", + "scipy.sparse.linalg.isolve._iterative b'$Revision: $'\n", + "scipy.special.specfun b'$Revision: $'\n", + "scipy.stats.mvn b'$Revision: $'\n", + "scipy.stats.statlib b'$Revision: $'\n", + "seaborn 0.11.1\n", + "seaborn.external.husl 2.1.0\n", + "six 1.15.0\n", + "statsmodels 0.12.2\n", + "statsmodels.__init__ 0.12.2\n", + "statsmodels.api 0.12.2\n", + "statsmodels.tools.web 0.12.2\n", + "traitlets 5.0.5\n", + "traitlets._version 5.0.5\n", + "urllib.request 3.9\n", + "wcwidth 0.2.5\n", + "zlib 1.0\n", + "zmq 22.0.3\n", + "zmq.sugar 22.0.3\n", + "zmq.sugar.version 22.0.3\n" + ] + } + ], + "source": [ + "def print_imported_modules():\n", + " import sys\n", + " for name, val in sorted(sys.modules.items()):\n", + " if(hasattr(val, '__version__')): \n", + " print(val.__name__, val.__version__)\n", + "# else:\n", + "# print(val.__name__, \"(unknown version)\")\n", + "def print_sys_info():\n", + " import sys\n", + " import platform\n", + " print(sys.version)\n", + " print(platform.uname())\n", + "\n", + "import numpy as np\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import statsmodels.api as sm\n", + "import seaborn as sns\n", + "\n", + "print_sys_info()\n", + "print_imported_modules()" + ] + }, + { + "cell_type": "markdown", + "id": "purple-budapest", + "metadata": {}, + "source": [ + "## Loading and inspecting data\n", + "---\n", + "Let's start by reading data." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "aquatic-starter", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
DateCountTemperaturePressureMalfunction
04/12/81666500
111/12/81670501
23/22/82669500
311/11/82668500
44/04/83667500
56/18/82672500
68/30/836731000
711/28/836701000
82/03/846572001
94/06/846632001
108/30/846702001
1110/05/846782000
1211/08/846672000
131/24/856532002
144/12/856672000
154/29/856752000
166/17/856702000
177/2903/856812000
188/27/856762000
1910/03/856792000
2010/30/856752002
2111/26/856762000
221/12/866582001
\n", + "
" + ], + "text/plain": [ + " Date Count Temperature Pressure Malfunction\n", + "0 4/12/81 6 66 50 0\n", + "1 11/12/81 6 70 50 1\n", + "2 3/22/82 6 69 50 0\n", + "3 11/11/82 6 68 50 0\n", + "4 4/04/83 6 67 50 0\n", + "5 6/18/82 6 72 50 0\n", + "6 8/30/83 6 73 100 0\n", + "7 11/28/83 6 70 100 0\n", + "8 2/03/84 6 57 200 1\n", + "9 4/06/84 6 63 200 1\n", + "10 8/30/84 6 70 200 1\n", + "11 10/05/84 6 78 200 0\n", + "12 11/08/84 6 67 200 0\n", + "13 1/24/85 6 53 200 2\n", + "14 4/12/85 6 67 200 0\n", + "15 4/29/85 6 75 200 0\n", + "16 6/17/85 6 70 200 0\n", + "17 7/2903/85 6 81 200 0\n", + "18 8/27/85 6 76 200 0\n", + "19 10/03/85 6 79 200 0\n", + "20 10/30/85 6 75 200 2\n", + "21 11/26/85 6 76 200 0\n", + "22 1/12/86 6 58 200 1" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = pd.read_csv(\"https://app-learninglab.inria.fr/moocrr/gitlab/moocrr-session3/moocrr-reproducibility-study/raw/master/data/shuttle.csv\")\n", + "data" + ] + }, + { + "cell_type": "markdown", + "id": "steady-seven", + "metadata": {}, + "source": [ + "We know from our previous experience on this data set that filtering data is a really bad idea. We will therefore process it as such." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "diagnostic-optimum", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "%matplotlib inline\n", + "pd.set_option('mode.chained_assignment',None) # this removes a useless warning from pandas\n", + "import matplotlib.pyplot as plt\n", + "\n", + "data[\"Frequency\"]=data.Malfunction/data.Count\n", + "data.plot(x=\"Temperature\",y=\"Frequency\",kind=\"scatter\",ylim=[0,1])\n", + "plt.grid(True)" + ] + }, + { + "cell_type": "markdown", + "id": "focal-screening", + "metadata": {}, + "source": [ + "## Logistic regression\n", + "---\n", + "Let's assume O-rings independently fail with the same probability which solely depends on temperature. A logistic regression should allow us to estimate the influence of temperature." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "tribal-sleep", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + ":7: DeprecationWarning: Calling Family(..) with a link class as argument is deprecated.\n", + "Use an instance of a link class instead.\n", + " family=sm.families.Binomial(sm.families.links.logit)).fit()\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "
Generalized Linear Model Regression Results
Dep. Variable: Frequency No. Observations: 23
Model: GLM Df Residuals: 21
Model Family: Binomial Df Model: 1
Link Function: logit Scale: 1.0000
Method: IRLS Log-Likelihood: -3.9210
Date: Mon, 15 Mar 2021 Deviance: 3.0144
Time: 14:12:16 Pearson chi2: 5.00
No. Iterations: 6
Covariance Type: nonrobust
\n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "
coef std err z P>|z| [0.025 0.975]
Intercept 5.0850 7.477 0.680 0.496 -9.570 19.740
Temperature -0.1156 0.115 -1.004 0.316 -0.341 0.110
" + ], + "text/plain": [ + "\n", + "\"\"\"\n", + " Generalized Linear Model Regression Results \n", + "==============================================================================\n", + "Dep. Variable: Frequency No. Observations: 23\n", + "Model: GLM Df Residuals: 21\n", + "Model Family: Binomial Df Model: 1\n", + "Link Function: logit Scale: 1.0000\n", + "Method: IRLS Log-Likelihood: -3.9210\n", + "Date: Mon, 15 Mar 2021 Deviance: 3.0144\n", + "Time: 14:12:16 Pearson chi2: 5.00\n", + "No. Iterations: 6 \n", + "Covariance Type: nonrobust \n", + "===============================================================================\n", + " coef std err z P>|z| [0.025 0.975]\n", + "-------------------------------------------------------------------------------\n", + "Intercept 5.0850 7.477 0.680 0.496 -9.570 19.740\n", + "Temperature -0.1156 0.115 -1.004 0.316 -0.341 0.110\n", + "===============================================================================\n", + "\"\"\"" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import statsmodels.api as sm\n", + "\n", + "data[\"Success\"]=data.Count-data.Malfunction\n", + "data[\"Intercept\"]=1\n", + "\n", + "logmodel=sm.GLM(data['Frequency'], data[['Intercept','Temperature']], \n", + " family=sm.families.Binomial(sm.families.links.logit)).fit()\n", + "\n", + "logmodel.summary()" + ] + }, + { + "cell_type": "markdown", + "id": "tested-jungle", + "metadata": {}, + "source": [ + " The maximum likelyhood estimator of the intercept and of Temperature are thus $\\hat{\\alpha}=5.0850$ and $\\hat{\\beta}=-0.1156$. This corresponds to the values from the article of Dalal et al. The standard errors are $s_{\\hat{\\alpha}} = 7.477$ and $s_{\\hat{\\beta}} = 0.115$, which is different from the $3.052$ and $0.04702$ reported by Dallal et al. The deviance is $3.01444$ with 21 degrees of freedom. I cannot find any value similar to the Goodness of fit ($G^2=18.086$) reported by Dalal et al. There seems to be something wrong. Oh I know, I haven't indicated that my observations are actually the result of 6 observations for each rocket launch. Let's indicate these weights (since the weights are always the same throughout all experiments, it does not change the estimates of the fit but it does influence the variance estimates)." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "administrative-topic", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + ":2: DeprecationWarning: Calling Family(..) with a link class as argument is deprecated.\n", + "Use an instance of a link class instead.\n", + " family=sm.families.Binomial(sm.families.links.logit),\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "
Generalized Linear Model Regression Results
Dep. Variable: Frequency No. Observations: 23
Model: GLM Df Residuals: 21
Model Family: Binomial Df Model: 1
Link Function: logit Scale: 1.0000
Method: IRLS Log-Likelihood: -23.526
Date: Mon, 15 Mar 2021 Deviance: 18.086
Time: 14:12:16 Pearson chi2: 30.0
No. Iterations: 6
Covariance Type: nonrobust
\n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "
coef std err z P>|z| [0.025 0.975]
Intercept 5.0850 3.052 1.666 0.096 -0.898 11.068
Temperature -0.1156 0.047 -2.458 0.014 -0.208 -0.023
" + ], + "text/plain": [ + "\n", + "\"\"\"\n", + " Generalized Linear Model Regression Results \n", + "==============================================================================\n", + "Dep. Variable: Frequency No. Observations: 23\n", + "Model: GLM Df Residuals: 21\n", + "Model Family: Binomial Df Model: 1\n", + "Link Function: logit Scale: 1.0000\n", + "Method: IRLS Log-Likelihood: -23.526\n", + "Date: Mon, 15 Mar 2021 Deviance: 18.086\n", + "Time: 14:12:16 Pearson chi2: 30.0\n", + "No. Iterations: 6 \n", + "Covariance Type: nonrobust \n", + "===============================================================================\n", + " coef std err z P>|z| [0.025 0.975]\n", + "-------------------------------------------------------------------------------\n", + "Intercept 5.0850 3.052 1.666 0.096 -0.898 11.068\n", + "Temperature -0.1156 0.047 -2.458 0.014 -0.208 -0.023\n", + "===============================================================================\n", + "\"\"\"" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "logmodel=sm.GLM(data['Frequency'], data[['Intercept','Temperature']], \n", + " family=sm.families.Binomial(sm.families.links.logit),\n", + " var_weights=data['Count']).fit()\n", + "\n", + "logmodel.summary()" + ] + }, + { + "cell_type": "markdown", + "id": "martial-colleague", + "metadata": {}, + "source": [ + "Good, now I have recovered the asymptotic standard errors $s_{\\hat{\\alpha}}=3.052$ and $s_{\\hat{\\beta}}=0.047$. The Goodness of fit (Deviance) indicated for this model is $G^2=18.086$ with 21 degrees of freedom (Df Residuals).\n", + "\n", + "*I have therefore managed to fully replicate the results of the Dalal et al. article.*\n", + "\n", + "## Predicting failure probability\n", + "---\n", + "\n", + "The temperature when launching the shuttle was 31°F. Let's try to estimate the failure probability for such temperature using our model.:" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "becoming-plaintiff", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "%matplotlib inline\n", + "data_pred = pd.DataFrame({'Temperature': np.linspace(start=30, stop=90, num=121), 'Intercept': 1})\n", + "data_pred['Frequency'] = logmodel.predict(data_pred)\n", + "data_pred.plot(x=\"Temperature\",y=\"Frequency\",kind=\"line\",ylim=[0,1])\n", + "plt.scatter(x=data[\"Temperature\"],y=data[\"Frequency\"])\n", + "plt.grid(True)" + ] + }, + { + "cell_type": "markdown", + "id": "breathing-junction", + "metadata": {}, + "source": [ + "This figure is very similar to the Figure 4 of Dalal et al. *I haven't managed to replicate the Figure 4 of the Dalal et al. article. The curve is missing*.\n", + "\n", + "## Computing and plotting uncertainty\n", + "---\n", + "Following the documentation of Seaborn, I use regplot." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "loaded-collapse", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.set(color_codes=True)\n", + "plt.xlim(30,90)\n", + "plt.ylim(0,1)\n", + "sns.regplot(x='Temperature', y='Frequency', data=data, logistic=True)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "vanilla-baseline", + "metadata": {}, + "source": [ + "*I think I haven't managed to correctly compute and plot the uncertainty of my prediction.* Although the shaded area seems very similar to the one obtained by with R, I can spot a few differences (e.g., the blue point for temperature 63 is outside)... Could this be a numerical error ? Or a difference in the statistical method ? It is not clear which one is \"right\"." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "innovative-compensation", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.0+" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/module4/requirements.txt b/module4/requirements.txt new file mode 100644 index 0000000..44b08df --- /dev/null +++ b/module4/requirements.txt @@ -0,0 +1,67 @@ +argon2-cffi==20.1.0 +async-generator==1.10 +attrs==20.3.0 +backcall==0.2.0 +bleach==3.3.0 +cffi==1.14.5 +cycler==0.10.0 +decorator==4.4.2 +defusedxml==0.7.1 +entrypoints==0.3 +ipykernel==5.5.0 +ipython==7.21.0 +ipython-genutils==0.2.0 +ipywidgets==7.6.3 +jedi==0.18.0 +Jinja2==2.11.3 +jsonschema==3.2.0 +jupyter==1.0.0 +jupyter-client==6.1.11 +jupyter-console==6.2.0 +jupyter-core==4.7.1 +jupyterlab-pygments==0.1.2 +jupyterlab-widgets==1.0.0 +kiwisolver==1.3.1 +MarkupSafe==1.1.1 +matplotlib==3.3.4 +mistune==0.8.4 +mpmath==1.2.1 +nbclient==0.5.3 +nbconvert==6.0.7 +nbformat==5.1.2 +nest-asyncio==1.5.1 +nose==1.3.7 +notebook==6.2.0 +numpy==1.20.1 +packaging==20.9 +pandas==1.2.3 +pandocfilters==1.4.3 +parso==0.8.1 +patsy==0.5.1 +pexpect==4.8.0 +pickleshare==0.7.5 +Pillow==8.1.2 +prometheus-client==0.9.0 +prompt-toolkit==3.0.17 +ptyprocess==0.7.0 +pycparser==2.20 +Pygments==2.8.1 +pyparsing==2.4.7 +pyrsistent==0.17.3 +python-dateutil==2.8.1 +pytz==2021.1 +pyzmq==22.0.3 +qtconsole==5.0.2 +QtPy==1.9.0 +scipy==1.6.1 +Send2Trash==1.5.0 +six==1.15.0 +statsmodels==0.12.2 +sympy==1.7.1 +terminado==0.9.2 +testpath==0.4.4 +tornado==6.1 +traitlets==5.0.5 +wcwidth==0.2.5 +webencodings==0.5.1 +widgetsnbextension==3.5.1 -- 2.18.1