def print_imported_modules(): import sys for name, val in sorted(sys.modules.items()): if(hasattr(val, '__version__')): print(val.__name__, val.__version__) # else: # print(val.__name__, "(unknown version)") def print_sys_info(): import sys import platform print(sys.version) print(platform.uname()) import numpy as np import pandas as pd import matplotlib.pyplot as plt import statsmodels.api as sm import seaborn as sns print_sys_info() print_imported_modules() data = pd.read_csv("data_shuttle.csv") data %matplotlib inline pd.set_option('mode.chained_assignment',None) # this removes a useless warning from pandas import matplotlib.pyplot as plt data["Frequency"]=data.Malfunction/data.Count data.plot(x="Temperature",y="Frequency",kind="scatter",ylim=[0,1]) plt.grid(True) import statsmodels.api as sm data["Success"]=data.Count-data.Malfunction data["Intercept"]=1 logmodel=sm.GLM(data['Frequency'], data[['Intercept','Temperature']], family=sm.families.Binomial(sm.families.links.logit)).fit() logmodel.summary() logmodel=sm.GLM(data['Frequency'], data[['Intercept','Temperature']], family=sm.families.Binomial(sm.families.links.logit), var_weights=data['Count']).fit() logmodel.summary() %matplotlib inline data_pred = pd.DataFrame({'Temperature': np.linspace(start=30, stop=90, num=121), 'Intercept': 1}) data_pred['Frequency'] = logmodel.predict(data_pred) data_pred.plot(x="Temperature",y="Frequency",kind="line",ylim=[0,1]) plt.scatter(x=data["Temperature"],y=data["Frequency"]) plt.grid(True) sns.set(color_codes=True) plt.xlim(30,90) plt.ylim(0,1) sns.regplot(x='Temperature', y='Frequency', data=data, logistic=True) plt.show()