{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Collecting plotly\n", " Downloading plotly-5.11.0-py2.py3-none-any.whl (15.3 MB)\n", "\u001b[K |████████████████████████████████| 15.3 MB 453 kB/s eta 0:00:01\n", "\u001b[?25hCollecting tenacity>=6.2.0\n", " Downloading tenacity-8.1.0-py3-none-any.whl (23 kB)\n", "Installing collected packages: tenacity, plotly\n", "Successfully installed plotly-5.11.0 tenacity-8.1.0\n" ] } ], "source": [ "!pip install plotly" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "#Importing the basic librarires fot analysis\n", "\n", "import numpy as np\n", "import pandas as pd\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "import warnings\n", "plt.style.use(\"ggplot\") #using style ggplot\n", "\n", "%matplotlib inline\n", "import plotly.graph_objects as go\n", "import plotly.express as px" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv('Subject6_smoking.csv',encoding='utf8')" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SmokerStatusAge
0YesAlive21.0
1YesAlive19.3
2NoDead57.5
3NoAlive47.1
4YesAlive81.4
5NoAlive36.8
6NoAlive23.8
7YesDead57.5
8YesAlive24.8
9YesAlive49.5
\n", "
" ], "text/plain": [ " Smoker Status Age\n", "0 Yes Alive 21.0\n", "1 Yes Alive 19.3\n", "2 No Dead 57.5\n", "3 No Alive 47.1\n", "4 Yes Alive 81.4\n", "5 No Alive 36.8\n", "6 No Alive 23.8\n", "7 Yes Dead 57.5\n", "8 Yes Alive 24.8\n", "9 Yes Alive 49.5" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head(10)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# how much percentage Gender in the dataset\n", "\n", "df['Smoker'].value_counts().plot.pie(explode=[0,0.1],autopct='%1.1f%%',shadow=True)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# how much percentage Gender in the dataset\n", "\n", "df['Status'].value_counts().plot.pie(explode=[0,0.1],autopct='%1.1f%%',shadow=True)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAD8CAYAAABn919SAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAADaZJREFUeJzt3V9sZOdZx/GvG2dNA0VOGHaxp0UL0iotWakRiVBFpGppasSfqLtU6qOgFplS4puqBIREt1wQbkDLDWpuTYtwRRT2IU21C4iqkSFEvSCQRCltmlYrYAmxzS4WG9G0yGbLcOET2Aur59ie8Xgffz+SNfOeeY/fx9LRz0fPzJkzMRgMkCTd/N407gIkScNhoEtSEQa6JBVhoEtSEQa6JBVhoEtSEQa6JBVhoEtSEQa6JBUxuc/reVmqJO3ORNuE/Q50VldX93tJqVWv12N9fX3cZUjbmp2d7TTPloskFWGgS1IRBrokFWGgS1IRnd4UjYiHgYfYepf1DzLzkxFxB3AeOA5cBiIzr42oTklSi9Yz9Ig4yVaY/xjwTuCBiDgBnAWWM/MEsNyMJUlj0qXl8g7gbzPzW5l5Hfgb4OeA08BSM2cJODOaEiVJXXQJ9K8A746I74uI24CfAd4GHMvMNYDm8ejoypQktWntoWfmyxHxe8BTwOvAl4DrXReIiAVgofld9Hq9XZYqdTc1NbUv62xsbOzLOlIXEzu9SXRE/C7wKvAwcCoz1yJiBng6M+9s2X3glaI6iPr9PisrK+MuQ9pWc6Vo66X/nT62GBFHm8cfBN4PPA5cBOabKfPAhd0UKkkajq6fQ/9sRHwV+DPgo83HE88BcxFxCZhrxpKkMdlxy2WPbLnoQLLlooNsqC0XSdLBZ6BLUhEGuiQVYaBLUhEGuiQVYaBLUhEGuiQVYaBLUhEGuiQVYaBLUhEGuiQVYaBLUhEGuiQVYaBLUhEGuiQVYaBLUhEGuiQVYaBLUhEGuiQVMdllUkT8GvDLwAD4MvBh4DbgPHAcuAxEc/NoSdIYtJ6hR0Qf+BXg3sw8CdwCPAicBZYz8wSw3IwlSWPSteUyCbw5IibZOjNfBU4DS83rS8CZ4ZcnSepqYjAYtE6KiIeB3wH+C/hCZn4wIl7LzOkb5lzLzNu32XcBWADIzHs2NzeHVrw0LFNTU2xsbIy7DGlbR44cAZhom9faQ4+I29k6G/8h4DXgTyPiQ10LycxFYLEZDtbX17vuKu0rj00dVLOzs53mdWm5vBf458z898z8b+BJ4MeBKxExA9A8Xt1lrZKkIejyKZdXgHdFxG1stVzuB54DvgnMA+eaxwujKlKS1K71DD0znwWeAF5g6yOLb2KrhXIOmIuIS8BcM5YkjUmnN0WHaLC6urqf60md9Pt9VlZWxl2GtK2mh976pqhXikpSEQa6JBVhoEtSEQa6JBVhoEtSEQa6JBVhoEtSEQa6JBVhoEtSEQa6JBVhoEtSEQa6JBVhoEtSEQa6JBVhoEtSEQa6JBVhoEtSEQa6JBXRepPoiLgTOH/Dph8Gfgv4TLP9OHAZiMy8NvwSJUlddLlJ9Ncz8+7MvBu4B/gW8DngLLCcmSeA5WYsSRqTnbZc7gf+MTP/BTgNLDXbl4AzwyxMkrQzOw30B4HHm+fHMnMNoHk8OszCJEk709pDf0NEHAHeB3xiJwtExAKwAJCZ9Hq9HRUo7RePTd3sOgc68NPAC5l5pRlfiYiZzFyLiBng6nY7ZeYisNgMB+vr67uvVhohj00dVLOzs53m7aTl8vP8f7sF4CIw3zyfBy7s4HdJkoasU6BHxG3AHPDkDZvPAXMRcal57dzwy5MkdTUxGAz2c73B6urqfq4nddLv91lZWRl3GdK2mpbLRNs8rxSVpCIMdEkqwkCXpCIMdEkqwkCXpCIMdEkqwkCXpCIMdEkqwkCXpCIMdEkqwkCXpCIMdEkqwkCXpCIMdEkqwkCXpCIMdEkqwkCXpCIMdEkqwkCXpCImu0yKiGngU8BJYAD8EvB14DxwHLgMRGZeG0mVkqRWXc/QHwU+n5lvB94JvAycBZYz8wSw3IwlSWPSeoYeEd8LvBv4RYDM3AQ2I+I0cKqZtgQ8DXx8FEXqcLvrrrt47bXXRr5Ov98f6e+fnp7mpZdeGukaOtwmBoPBd5wQEXcDi8BX2To7fx54GFjJzOkb5l3LzNu32X8BWADIzHs2NzeHV70OhampKTY2Nka6xuTkJNevXx/pGvvxd6imI0eOAEy0zevSQ58EfhT4WGY+GxGPsoP2SmYusvUPAWCwvr7edVfp/4z6uOn1eiNfA0b/d6im2dnZTvO69NBfBV7NzGeb8RNsBfyViJgBaB6v7qJOSdKQtAZ6Zv4b8K8RcWez6X622i8Xgflm2zxwYSQVSpI66fSxReBjwGMRcQT4J+DDbP0zyIj4CPAK8IHRlChJ6qJToGfmi8C927x0/3DLkSTtlleKSlIRBrokFWGgS1IRBrokFWGgS1IRBrokFWGgS1IRBrokFWGgS1IRBrokFWGgS1IRBrokFWGgS1IRBrokFWGgS1IRBrokFWGgS1IRBrokFdHpFnQRcRn4BvBt4Hpm3hsRdwDngePAZSAy89poypQktdnJGfpPZObdmfnGvUXPAsuZeQJYbsaSpDHZS8vlNLDUPF8Czuy9HEnSbnUN9AHwhYh4PiIWmm3HMnMNoHk8OooCJUnddOqhA/dl5mpEHAWeioivdV2g+QewAJCZ9Hq9XZSpw27Ux83k5OS+HJse/xqlicFgsKMdIuK3gdeBh4BTmbkWETPA05l5Z8vug9XV1V0VqsOr3++zsrIy0jV6vR7r6+sjXWM//g7VNDs7CzDRNq+15RIR3x0Rb3njOfCTwFeAi8B8M20euLDbYiVJe9elh34M+GJEfAn4O+AvMvPzwDlgLiIuAXPNWJI0JjtuueyRLRftmC0XHXZDa7lIkm4OBrokFWGgS1IRBrokFWGgS1IRBrokFWGgS1IRBrokFWGgS1IRXimqA+/0Y52/3PPAu/DBt4+7BN2Eul4paqDrwPPSfx12XvovSYeMgS5JRRjoklSEgS5JRRjoklSEgS5JRRjoklSEgS5JRUx2nRgRtwDPASuZ+UBE3AGcB44Dl4HIzGujKFKS1G4nZ+gPAy/fMD4LLGfmCWC5GUuSxqRToEfEW4GfBT51w+bTwFLzfAk4M9zSJEk70bXl8kngN4C33LDtWGauAWTmWkQc3W7HiFgAFpp59Hq9PZSrw2rUx83k5OS+HJse/xql1kCPiAeAq5n5fESc2ukCmbkILDbDwai/AEk1jfq42Y8v54LR/x2qqflyrlZdWi73Ae+LiMvAnwDviYg/Bq5ExAxA83h1d6VKkoahNdAz8xOZ+dbMPA48CPxVZn4IuAjMN9PmgQsjq1KS1Govn0M/B8xFxCVgrhlLksbEG1zowPMGFzrsvMGFJB0yBrokFWGgS1IRBrokFWGgS1IRBrokFWGgS1IRBrokFWGgS1IRBrokFWGgS1IRBrokFWGgS1IRBrokFWGgS1IRBrokFWGgS1IRBrokFTHZNiEivgt4Bphq5j+RmY9ExB3AeeA4cBmIzLw2ulJ1mPX7/XGXsGfT09PjLkHFtQY6sAG8JzNfj4hbgS9GxF8C7weWM/NcRJwFzgIfH2GtOqT24z6c3u9TFbQGemYOgNeb4a3NzwA4DZxqti8BT2OgS9LYdOqhR8QtEfEicBV4KjOfBY5l5hpA83h0dGVKktp0abmQmd8G7o6IaeBzEXGy6wIRsQAsNL+HXq+3q0KlUfPY1M1uYjAY7GiHiHgE+CbwEHAqM9ciYgZ4OjPvbNl9sLq6urtKpRGyh66DbHZ2FmCibV5ryyUivr85Myci3gy8F/gacBGYb6bNAxd2W6wkae+69NBngL+OiH8A/p6tHvqfA+eAuYi4BMw1Y0nSmOy45bJHtlx0INly0UE2tJaLJOnmYKBLUhEGuiQVYaBLUhEGuiQVYaBLUhEGuiQVYaBLUhEGuiQVYaBLUhEGuiQVYaBLUhEGuiQVYaBLUhEGuiQVYaBLUhEGuiQVYaBLUhEGuiQVMdk2ISLeBnwG+AHgf4DFzHw0Iu4AzgPHgctAZOa10ZUqSfpOupyhXwd+PTPfAbwL+GhE/AhwFljOzBPAcjOWJI1Ja6Bn5lpmvtA8/wbwMtAHTgNLzbQl4MyoipQktZsYDAadJ0fEceAZ4CTwSmZO3/Datcy8fZt9FoAFgMy8Z3Nzc681S0M3NTXFxsbGuMuQtnXkyBGAibZ5rT30N0TE9wCfBX41M/8zIjrtl5mLwGIzHKyvr3ddUtpXHps6qGZnZzvN6/Qpl4i4la0wfywzn2w2X4mImeb1GeDqLuqUJA1Ja6BHxATwaeDlzPz9G166CMw3z+eBC8MvT5LUVZeWy33ALwBfjogXm22/CZwDMiI+ArwCfGA0JUqSutjRm6JDMFhdXd3P9aRO+v0+Kysr4y5D2lbTQx/em6LSzaTf7+/LPv4T0EFioKuknQZtr9fzUy666fldLpJUhIEuSUUY6JJUhIEuSUUY6JJUhIEuSUUY6JJUhIEuSUXs+6X/+7mYJBVy4C79by1IGoeIeC4z7x13HdJe2HKRpCIMdEkqwkCXtiy2T5EOtv1+U1SSNCKeoUtSEX4fug61iPhD4AHgamaeHHc90l54hq7D7o+Anxp3EdIwGOg61DLzGeA/xl2HNAwGuiQVYaBLUhEGuiQVYaBLUhFeWKRDLSIeB04BPeAK8EhmfnqsRUm7ZKBLUhG2XCSpCANdkoow0CWpCANdkoow0CWpCANdkoow0CWpCANdkor4X34Od/+pT7dLAAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# boxplot for show describe age \n", "\n", "plt.boxplot(df[\"Age\"])\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
AgeSmoker_NoSmoker_YesStatus_AliveStatus_Dead
021.00110
119.30110
257.51001
347.11010
481.40110
536.81010
623.81010
757.50101
824.80110
949.50110
\n", "
" ], "text/plain": [ " Age Smoker_No Smoker_Yes Status_Alive Status_Dead\n", "0 21.0 0 1 1 0\n", "1 19.3 0 1 1 0\n", "2 57.5 1 0 0 1\n", "3 47.1 1 0 1 0\n", "4 81.4 0 1 1 0\n", "5 36.8 1 0 1 0\n", "6 23.8 1 0 1 0\n", "7 57.5 0 1 0 1\n", "8 24.8 0 1 1 0\n", "9 49.5 0 1 1 0" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfobject=pd.get_dummies(df)\n", "dfobject.head(10)" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [], "source": [ "df=dfobject.drop(['Smoker_No','Status_Alive'],axis=1)" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
AgeSmoker_YesStatus_Dead
021.010
119.310
257.501
347.100
481.410
536.800
623.800
757.511
824.810
949.510
\n", "
" ], "text/plain": [ " Age Smoker_Yes Status_Dead\n", "0 21.0 1 0\n", "1 19.3 1 0\n", "2 57.5 0 1\n", "3 47.1 0 0\n", "4 81.4 1 0\n", "5 36.8 0 0\n", "6 23.8 0 0\n", "7 57.5 1 1\n", "8 24.8 1 0\n", "9 49.5 1 0" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head(10)" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [], "source": [ "df.rename(columns = {'Smoker_Yes':'smoker', 'Status_Dead':'status'}, inplace = True)" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Agesmokerstatus
021.010
119.310
257.501
347.100
481.410
\n", "
" ], "text/plain": [ " Age smoker status\n", "0 21.0 1 0\n", "1 19.3 1 0\n", "2 57.5 0 1\n", "3 47.1 0 0\n", "4 81.4 1 0" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head(5)" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "smoker\n", "0 230\n", "1 139\n", "Name: status, dtype: uint8" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "summary=df.groupby([\"smoker\"])[\"status\"].sum().round(0)\n", "summary" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "df.loc[df['Age'].between(18,34), 'age_group'] = '18-34'\n", "df.loc[df['Age'].between(34,54), 'age_group'] = '34-54'\n", "df.loc[df['Age'].between(55,64), 'age_group'] = '55-64'\n", "df.loc[df['Age']>65, 'age_group'] = '+ 65'" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Agesmokerstatusage_group
021.01018-34
119.31018-34
257.50155-64
347.10034-54
481.410+ 65
\n", "
" ], "text/plain": [ " Age smoker status age_group\n", "0 21.0 1 0 18-34\n", "1 19.3 1 0 18-34\n", "2 57.5 0 1 55-64\n", "3 47.1 0 0 34-54\n", "4 81.4 1 0 + 65" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head(5)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/html": [ " \n", " " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import pandas as pd\n", "from os import getcwd, path\n", "import plotly.express as px\n", "import plotly.offline as pyo\n", "pyo.init_notebook_mode()\n" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "status\n", "0 AxesSubplot(0.125,0.125;0.775x0.755)\n", "1 AxesSubplot(0.125,0.125;0.775x0.755)\n", "Name: Age, dtype: object" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "df.groupby('status').Age.plot(kind='kde')" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "df=df.groupby(['age_group','status']).size()\n", "df=df.unstack()\n", "df.plot(kind='bar')" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import train_test_split\n", "from sklearn.linear_model import LogisticRegression" ] }, { "cell_type": "code", "execution_count": 89, "metadata": {}, "outputs": [], "source": [ "#split dataset in features and target variable\n", "feature_cols = ['Age','smoker']\n", "X = df[feature_cols] # Features\n", "y = df.status # Target variable" ] }, { "cell_type": "code", "execution_count": 90, "metadata": {}, "outputs": [], "source": [ "# split X and y into training and testing sets\n", "from sklearn.model_selection import train_test_split\n", "\n", "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=16)" ] }, { "cell_type": "code", "execution_count": 91, "metadata": {}, "outputs": [], "source": [ "from sklearn import preprocessing\n", "from sklearn import utils\n", "lab_enc = preprocessing.LabelEncoder()\n", "y_train = lab_enc.fit_transform(y_train)" ] }, { "cell_type": "code", "execution_count": 92, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n", " intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,\n", " penalty='l2', random_state=0, solver='liblinear', tol=0.0001,\n", " verbose=0, warm_start=False)" ] }, "execution_count": 92, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#REG LOG\n", "from sklearn.linear_model import LogisticRegression\n", "classifier = LogisticRegression(random_state = 0)\n", "classifier.fit(X_train, y_train)" ] }, { "cell_type": "code", "execution_count": 93, "metadata": {}, "outputs": [], "source": [ "log_reg=LogisticRegression()\n", "log_model=log_reg.fit(X_train,y_train)\n", "ypred_lr_test=log_model.predict(X_test)\n", "ypred_lr_train=log_model.predict(X_train)\n", "ypred_lr_probability=log_model.predict_proba(X_test)[:,1]" ] }, { "cell_type": "code", "execution_count": 94, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Confusion Matrix form Logistic Regression Model : \n", "------------------------------------------------ \n", "\n", "[[220 27]\n", " [ 28 54]]\n" ] } ], "source": [ "from sklearn.metrics import confusion_matrix,accuracy_score,roc_auc_score,roc_curve,log_loss,classification_report\n", "confusion_mat=confusion_matrix(y_test,ypred_lr_test)\n", "tn = confusion_mat[0,0]\n", "tp = confusion_mat[1,1]\n", "fp = confusion_mat[0,1]\n", "fn = confusion_mat[1,0]\n", "print('Confusion Matrix form Logistic Regression Model : ')\n", "print('------------------------------------------------','\\n')\n", "print(confusion_mat)" ] }, { "cell_type": "code", "execution_count": 95, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Sensitivity of the Logistic Regression Model: \n", "--------------------------------------------- \n", "\n", "0.6585365853658537\n", "\n", "\n", "Specificity of the Logistic Regression Model: \n", "--------------------------------------------- \n", "\n", "0.8906882591093117\n" ] } ], "source": [ "Sensitivity_Logistic=(tp/(tp+fn))\n", "print('Sensitivity of the Logistic Regression Model: ')\n", "print('---------------------------------------------','\\n')\n", "print(Sensitivity_Logistic)\n", "print('\\n')\n", "Specificity_Logistic=(tn/(tn+fp))\n", "print('Specificity of the Logistic Regression Model: ')\n", "print('---------------------------------------------','\\n')\n", "print(Specificity_Logistic)" ] }, { "cell_type": "code", "execution_count": 96, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The Accuracy score of the test data for Logistic Regression model : \n", "0.8328267477203647 \n", "\n", "The Accuracy score of the train data for Logistic Regression model : \n", "0.8527918781725888\n" ] } ], "source": [ "print('The Accuracy score of the test data for Logistic Regression model : ')\n", "print(accuracy_score(y_test,ypred_lr_test),'\\n')\n", "print('The Accuracy score of the train data for Logistic Regression model : ')\n", "print(accuracy_score(y_train,ypred_lr_train))" ] }, { "cell_type": "code", "execution_count": 97, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Logit model classification report: \n", "------------------------------------ \n", "\n", " precision recall f1-score support\n", "\n", " 0 0.89 0.89 0.89 247\n", " 1 0.67 0.66 0.66 82\n", "\n", "avg / total 0.83 0.83 0.83 329\n", "\n" ] } ], "source": [ "# Computing the classification report:\n", "\n", "logistic_report=classification_report(y_test,ypred_lr_test)\n", "print('Logit model classification report: ')\n", "print('------------------------------------','\\n')\n", "print(logistic_report)" ] }, { "cell_type": "code", "execution_count": 98, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "fpr,tpr,th=roc_curve(y_test,ypred_lr_probability)\n", "plt.figure(figsize=(10,8))\n", "plt.plot(fpr,tpr,color='green')\n", "plt.xlim([-0.05,1.05])\n", "plt.ylim([-0.05,1.05])\n", "plt.xlabel('FPR (1-Specificity)')\n", "plt.ylabel('TPR (Sensitivity)')\n", "plt.title('ROC curve for Logistic Regression Model')\n", "plt.grid()\n", "plt.plot([-0.05,1.05],[-0.05,1.05],'r--',linewidth=2)\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.4" } }, "nbformat": 4, "nbformat_minor": 2 }