Diagram

parent b8ea82a1
......@@ -628,32 +628,21 @@
"metadata": {},
"outputs": [],
"source": [
"total_smoker = 0\n",
"total_non_smoker = 0\n",
"total_alive = 0\n",
"total_dead = 0\n",
"\n",
"alive_and_smoker = 0\n",
"alive_and_non_smoker = 0\n",
"dead_and_smoker = 0\n",
"dead_and_non_smoker = 0\n",
"for i in range(len(raw_data)):\n",
" if raw_data.iloc[i][0] == \"Yes\":\n",
" total_smoker += 1\n",
" if raw_data.iloc[i][1] == \"Alive\":\n",
" total_alive +=1\n",
" alive_and_smoker += 1\n",
" else :\n",
" total_dead +=1\n",
" dead_and_smoker += 1\n",
" else :\n",
" total_non_smoker += 1\n",
" if raw_data.iloc[i][1] == \"Alive\":\n",
" total_alive +=1\n",
" alive_and_non_smoker += 1\n",
" else :\n",
" total_dead +=1\n",
" dead_and_non_smoker += 1"
" dead_and_non_smoker += 1\n"
]
},
{
......@@ -730,7 +719,7 @@
}
],
"source": [
"data = [[alive_and_smoker,alive_and_non_smoker,total_alive],[dead_and_smoker, dead_and_non_smoker,total_dead], [total_smoker,total_non_smoker,(total_alive+total_dead)]]\n",
"data = [[alive_and_smoker,alive_and_non_smoker,(alive_and_smoker+alive_and_non_smoker)],[dead_and_smoker, dead_and_non_smoker,(dead_and_non_smoker+dead_and_smoker)], [(dead_and_smoker+alive_and_smoker),(dead_and_non_smoker+alive_and_non_smoker),(alive_and_smoker+alive_and_non_smoker + dead_and_non_smoker+dead_and_smoker)]]\n",
"\n",
"pd.DataFrame(data, columns=[\"Smoker\", \"Non-Smoker\", \"Total\"], index = [\"Alive\", \"Dead\",\"Total\"])"
]
......@@ -854,21 +843,43 @@
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"582 732\n"
]
}
],
"source": [
"class_18_to_35 = []\n",
"class_35_to_55 = []\n",
"class_55_to_64 = []\n",
"class_over_65 = []\n",
"#class_18_to_35 = []\n",
"#class_35_to_55 = []\n",
"#class_55_to_64 = []\n",
"#class_over_65 = []\n",
"\n",
"smoker = []\n",
"non_smoker = []\n",
"\n",
"raw_data[\"Status\"].replace({\"Dead\": \"1\", \"Alive\": \"0\"}, inplace=True)\n",
"#raw_data[\"Age\"] = raw_data[\"Age\"].astype(str)\n",
"\n",
"#raw_data\n",
"\n",
"for i in range(len(raw_data)):\n",
" if raw_data.iloc[i][2] < 35:\n",
" class_18_to_35.append(raw_data.iloc[i])\n",
" elif 35 <= raw_data.iloc[i][2] < 55:\n",
" class_35_to_55.append(raw_data.iloc[i])\n",
" elif 55 <= raw_data.iloc[i][2] < 65 :\n",
" class_55_to_64.append(raw_data.iloc[i])\n",
" if raw_data.iloc[i][0] == \"Yes\":\n",
" smoker.append(raw_data.iloc[i])\n",
" else :\n",
" class_over_65.append(raw_data.iloc[i])"
" non_smoker.append(raw_data.iloc[i])\n",
" #if raw_data.iloc[i][2] < 35:\n",
" # class_18_to_35.append(raw_data.iloc[i])\n",
" #elif 35 <= raw_data.iloc[i][2] < 55:\n",
" # class_35_to_55.append(raw_data.iloc[i])\n",
" #elif 55 <= raw_data.iloc[i][2] < 65 :\n",
" # class_55_to_64.append(raw_data.iloc[i])\n",
" #else :\n",
" # class_over_65.append(raw_data.iloc[i])\n",
"print(len(smoker), len(non_smoker))"
]
},
{
......@@ -878,68 +889,64 @@
"outputs": [],
"source": [
"alive_and_smoker_18to35 = 0\n",
"alive_and_non_smoker_18to35 = 0\n",
"dead_and_smoker_18to35 = 0\n",
"dead_and_non_smoker_18to35 = 0\n",
"for i in range(len(class_18_to_35)):\n",
" if class_18_to_35[i][0] == \"Yes\":\n",
" if class_18_to_35[i][1] == \"Alive\":\n",
" alive_and_smoker_18to35 += 1\n",
" else :\n",
" dead_and_smoker_18to35 += 1\n",
" else :\n",
" if class_18_to_35[i][1] == \"Alive\":\n",
" alive_and_non_smoker_18to35 += 1\n",
" else :\n",
" dead_and_non_smoker_18to35 += 1\n",
"\n",
"alive_and_smoker_35to55 = 0\n",
"alive_and_non_smoker_35to55 = 0\n",
"dead_and_smoker_35to55 = 0\n",
"dead_and_non_smoker_35to55 = 0\n",
"for i in range(len(class_35_to_55)):\n",
" if class_35_to_55[i][0] == \"Yes\":\n",
" if class_35_to_55[i][1] == \"Alive\":\n",
" alive_and_smoker_35to55 += 1\n",
" else :\n",
" dead_and_smoker_35to55 += 1\n",
" else :\n",
" if class_35_to_55[i][1] == \"Alive\":\n",
" alive_and_non_smoker_35to55 += 1\n",
" else :\n",
" dead_and_non_smoker_35to55 += 1\n",
"\n",
"alive_and_smoker_55to64 = 0\n",
"alive_and_non_smoker_55to64 = 0\n",
"dead_and_smoker_55to64 = 0\n",
"dead_and_non_smoker_55to64 = 0\n",
"for i in range(len(class_55_to_64)):\n",
" if class_55_to_64[i][0] == \"Yes\":\n",
" if class_55_to_64[i][1] == \"Alive\":\n",
"alive_and_smoker_over65 = 0\n",
"dead_and_smoker_over65 = 0\n",
"\n",
"for i in range(len(smoker)):\n",
" if smoker[i][1] == \"0\" :\n",
" if smoker[i][2] < 35:\n",
" alive_and_smoker_18to35 += 1\n",
" elif 35 <= smoker[i][2] < 55:\n",
" alive_and_smoker_35to55 += 1\n",
" elif 55 <= smoker[i][2] < 65 :\n",
" alive_and_smoker_55to64 += 1\n",
" else :\n",
" dead_and_smoker_55to64 += 1\n",
" alive_and_smoker_over65 += 1\n",
" else :\n",
" if class_55_to_64[i][1] == \"Alive\":\n",
" alive_and_non_smoker_55to64 += 1\n",
" if smoker[i][2] < 35:\n",
" dead_and_smoker_18to35 += 1\n",
" elif 35 <= smoker[i][2] < 55:\n",
" dead_and_smoker_35to55 += 1\n",
" elif 55 <= smoker[i][2] < 65 :\n",
" dead_and_smoker_55to64 += 1\n",
" else :\n",
" dead_and_non_smoker_55to64 += 1\n",
"\n",
"alive_and_smoker_over65 = 0\n",
" dead_and_smoker_over65 += 1\n",
" \n",
"alive_and_non_smoker_18to35 = 0\n",
"dead_and_non_smoker_18to35 = 0\n",
"alive_and_non_smoker_35to55 = 0\n",
"dead_and_non_smoker_35to55 = 0\n",
"alive_and_non_smoker_55to64 = 0\n",
"dead_and_non_smoker_55to64 = 0\n",
"alive_and_non_smoker_over65 = 0\n",
"dead_and_smoker_over65 = 0\n",
"dead_and_non_smoker_over65 = 0\n",
"for i in range(len(class_over_65)):\n",
" if class_over_65[i][0] == \"Yes\":\n",
" if class_over_65[i][1] == \"Alive\":\n",
" alive_and_smoker_over65 += 1\n",
" else :\n",
" dead_and_smoker_over65 += 1\n",
" \n",
"for i in range(len(non_smoker)):\n",
" if non_smoker[i][1] == \"0\" :\n",
" if non_smoker[i][2] < 35:\n",
" alive_and_non_smoker_18to35 += 1\n",
" elif 35 <= non_smoker[i][2] < 55:\n",
" alive_and_non_smoker_35to55 += 1\n",
" elif 55 <= non_smoker[i][2] < 65 :\n",
" alive_and_non_smoker_55to64 += 1\n",
" else :\n",
" if class_over_65[i][1] == \"Alive\":\n",
" alive_and_non_smoker_over65 += 1\n",
" else :\n",
" dead_and_non_smoker_over65 += 1"
" if non_smoker[i][2] < 35:\n",
" dead_and_non_smoker_18to35 += 1\n",
" elif 35 <= non_smoker[i][2] < 55:\n",
" dead_and_non_smoker_35to55 += 1\n",
" elif 55 <= non_smoker[i][2] < 65 :\n",
" dead_and_non_smoker_55to64 += 1\n",
" else :\n",
" dead_and_non_smoker_over65 += 1\n",
" \n",
" \n"
]
},
{
......@@ -1107,482 +1114,69 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Smoker</th>\n",
" <th>Status</th>\n",
" <th>Age</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Yes</td>\n",
" <td>0</td>\n",
" <td>21.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Yes</td>\n",
" <td>0</td>\n",
" <td>19.3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>No</td>\n",
" <td>1</td>\n",
" <td>57.5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>No</td>\n",
" <td>0</td>\n",
" <td>47.1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Yes</td>\n",
" <td>0</td>\n",
" <td>81.4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>No</td>\n",
" <td>0</td>\n",
" <td>36.8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>No</td>\n",
" <td>0</td>\n",
" <td>23.8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Yes</td>\n",
" <td>1</td>\n",
" <td>57.5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Yes</td>\n",
" <td>0</td>\n",
" <td>24.8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>Yes</td>\n",
" <td>0</td>\n",
" <td>49.5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>Yes</td>\n",
" <td>0</td>\n",
" <td>30.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>No</td>\n",
" <td>1</td>\n",
" <td>66.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>Yes</td>\n",
" <td>0</td>\n",
" <td>49.2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>No</td>\n",
" <td>0</td>\n",
" <td>58.4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>No</td>\n",
" <td>1</td>\n",
" <td>60.6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>No</td>\n",
" <td>0</td>\n",
" <td>25.1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>No</td>\n",
" <td>0</td>\n",
" <td>43.5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>No</td>\n",
" <td>0</td>\n",
" <td>27.1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>No</td>\n",
" <td>0</td>\n",
" <td>58.3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>Yes</td>\n",
" <td>0</td>\n",
" <td>65.7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>No</td>\n",
" <td>1</td>\n",
" <td>73.2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>Yes</td>\n",
" <td>0</td>\n",
" <td>38.3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>No</td>\n",
" <td>0</td>\n",
" <td>33.4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>Yes</td>\n",
" <td>1</td>\n",
" <td>62.3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>No</td>\n",
" <td>0</td>\n",
" <td>18.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25</th>\n",
" <td>No</td>\n",
" <td>0</td>\n",
" <td>56.2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26</th>\n",
" <td>Yes</td>\n",
" <td>0</td>\n",
" <td>59.2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>No</td>\n",
" <td>0</td>\n",
" <td>25.8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28</th>\n",
" <td>No</td>\n",
" <td>1</td>\n",
" <td>36.9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>No</td>\n",
" <td>0</td>\n",
" <td>20.2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1284</th>\n",
" <td>Yes</td>\n",
" <td>1</td>\n",
" <td>36.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1285</th>\n",
" <td>Yes</td>\n",
" <td>0</td>\n",
" <td>48.3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1286</th>\n",
" <td>No</td>\n",
" <td>0</td>\n",
" <td>63.1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1287</th>\n",
" <td>No</td>\n",
" <td>0</td>\n",
" <td>60.8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1288</th>\n",
" <td>Yes</td>\n",
" <td>1</td>\n",
" <td>39.3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1289</th>\n",
" <td>No</td>\n",
" <td>0</td>\n",
" <td>36.7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1290</th>\n",
" <td>No</td>\n",
" <td>0</td>\n",
" <td>63.8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1291</th>\n",
" <td>No</td>\n",
" <td>1</td>\n",
" <td>71.3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1292</th>\n",
" <td>No</td>\n",
" <td>0</td>\n",
" <td>57.7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1293</th>\n",
" <td>No</td>\n",
" <td>0</td>\n",
" <td>63.2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1294</th>\n",
" <td>No</td>\n",
" <td>0</td>\n",
" <td>46.6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1295</th>\n",
" <td>Yes</td>\n",
" <td>1</td>\n",
" <td>82.4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1296</th>\n",
" <td>Yes</td>\n",
" <td>0</td>\n",
" <td>38.3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1297</th>\n",
" <td>Yes</td>\n",
" <td>0</td>\n",
" <td>32.7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1298</th>\n",
" <td>No</td>\n",
" <td>0</td>\n",
" <td>39.7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1299</th>\n",
" <td>Yes</td>\n",
" <td>1</td>\n",
" <td>60.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1300</th>\n",
" <td>No</td>\n",
" <td>1</td>\n",
" <td>71.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1301</th>\n",
" <td>No</td>\n",
" <td>0</td>\n",
" <td>20.5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1302</th>\n",
" <td>No</td>\n",
" <td>0</td>\n",
" <td>44.4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1303</th>\n",
" <td>Yes</td>\n",
" <td>0</td>\n",
" <td>31.2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1304</th>\n",
" <td>Yes</td>\n",
" <td>0</td>\n",
" <td>47.8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1305</th>\n",
" <td>Yes</td>\n",
" <td>0</td>\n",
" <td>60.9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1306</th>\n",
" <td>No</td>\n",
" <td>1</td>\n",
" <td>61.4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1307</th>\n",
" <td>Yes</td>\n",
" <td>0</td>\n",
" <td>43.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1308</th>\n",
" <td>No</td>\n",
" <td>0</td>\n",
" <td>42.1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1309</th>\n",
" <td>Yes</td>\n",
" <td>0</td>\n",
" <td>35.9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1310</th>\n",
" <td>No</td>\n",
" <td>0</td>\n",
" <td>22.3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1311</th>\n",
" <td>Yes</td>\n",
" <td>1</td>\n",
" <td>62.1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1312</th>\n",
" <td>No</td>\n",
" <td>1</td>\n",
" <td>88.6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1313</th>\n",
" <td>No</td>\n",
" <td>0</td>\n",
" <td>39.1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1314 rows × 3 columns</p>\n",
"</div>"
"name": "stdout",
"output_type": "stream",
"text": [
"Status Smoker\n",
"0 No 502\n",
" Yes 443\n",
"1 No 230\n",
" Yes 139\n",
"dtype: int64\n"
]
}
],
"text/plain": [
" Smoker Status Age\n",
"0 Yes 0 21.0\n",
"1 Yes 0 19.3\n",
"2 No 1 57.5\n",
"3 No 0 47.1\n",
"4 Yes 0 81.4\n",
"5 No 0 36.8\n",
"6 No 0 23.8\n",
"7 Yes 1 57.5\n",
"8 Yes 0 24.8\n",
"9 Yes 0 49.5\n",
"10 Yes 0 30.0\n",
"11 No 1 66.0\n",
"12 Yes 0 49.2\n",
"13 No 0 58.4\n",
"14 No 1 60.6\n",
"15 No 0 25.1\n",
"16 No 0 43.5\n",
"17 No 0 27.1\n",
"18 No 0 58.3\n",
"19 Yes 0 65.7\n",
"20 No 1 73.2\n",
"21 Yes 0 38.3\n",
"22 No 0 33.4\n",
"23 Yes 1 62.3\n",
"24 No 0 18.0\n",
"25 No 0 56.2\n",
"26 Yes 0 59.2\n",
"27 No 0 25.8\n",
"28 No 1 36.9\n",
"29 No 0 20.2\n",
"... ... ... ...\n",
"1284 Yes 1 36.0\n",
"1285 Yes 0 48.3\n",
"1286 No 0 63.1\n",
"1287 No 0 60.8\n",
"1288 Yes 1 39.3\n",
"1289 No 0 36.7\n",
"1290 No 0 63.8\n",
"1291 No 1 71.3\n",
"1292 No 0 57.7\n",
"1293 No 0 63.2\n",
"1294 No 0 46.6\n",
"1295 Yes 1 82.4\n",
"1296 Yes 0 38.3\n",
"1297 Yes 0 32.7\n",
"1298 No 0 39.7\n",
"1299 Yes 1 60.0\n",
"1300 No 1 71.0\n",
"1301 No 0 20.5\n",
"1302 No 0 44.4\n",
"1303 Yes 0 31.2\n",
"1304 Yes 0 47.8\n",
"1305 Yes 0 60.9\n",
"1306 No 1 61.4\n",
"1307 Yes 0 43.0\n",
"1308 No 0 42.1\n",
"1309 Yes 0 35.9\n",
"1310 No 0 22.3\n",
"1311 Yes 1 62.1\n",
"1312 No 1 88.6\n",
"1313 No 0 39.1\n",
"source": [
"#raw_data[\"Status\"].replace({\"Dead\": \"1\", \"Alive\": \"0\"}, inplace=True)\n",
"#raw_data\n",
"\n",
"[1314 rows x 3 columns]"
"count = raw_data.groupby(['Status', 'Smoker']).size() \n",
"print(count)"
]
},
"execution_count": 17,
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"raw_data[\"Status\"].replace({\"Dead\": \"1\", \"Alive\": \"0\"}, inplace=True)\n",
"raw_data"
"raw_data[\"Status\"] = raw_data[\"Status\"].astype(int)\n",
"\n",
"df_smoker = raw_data[raw_data['Smoker'] == 'Yes']\n",
"df_non_smoker = raw_data[raw_data['Smoker'] == 'No']\n",
" \n",
"df_smoker.plot(kind='scatter',x='Age',y='Status',color='#E69F00')\n",
"df_non_smoker.plot(kind='scatter',x='Age',y='Status',color='#56B4E9')\n",
"plt.show()"
]
},
{
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment