Finalizing Naive Bayes
This commit is contained in:
parent
2710b772ed
commit
de18a51aa2
@ -1 +0,0 @@
|
|||||||
,noah,NovaArchSys,29.03.2023 17:55,file:///home/noah/.config/libreoffice/4;
|
|
||||||
@ -578,11 +578,12 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": 202,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# c."
|
"# c.\n",
|
||||||
|
"trainData, validData = train_test_split(accidents_df, test_size=0.4, random_state=26)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -595,12 +596,23 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"attachments": {},
|
||||||
"execution_count": null,
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
"source": [
|
||||||
"# i"
|
"# i. \n",
|
||||||
|
"HOUR_I_R \n",
|
||||||
|
"ALIGN_I \n",
|
||||||
|
"WRK_ZONE \n",
|
||||||
|
"WKDY_I_R \n",
|
||||||
|
"INT_HWY \n",
|
||||||
|
"LGTCON_I_R \n",
|
||||||
|
"PROFIL_I_R \n",
|
||||||
|
"SPD_LIM \n",
|
||||||
|
"SUR_CON \n",
|
||||||
|
"TRAF_CON_R \n",
|
||||||
|
"TRAF_WAY \n",
|
||||||
|
"WEATHER_R"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -609,16 +621,53 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"# ii. \n",
|
"# ii. \n",
|
||||||
"Run a naive Bayes classifier on the complete training set with the relevant predictors (and INJURY as the response). Note that all predictors are categorical. Show the confusion matrix.\n"
|
"Run a naive Bayes classifier on the complete training set with the relevant predictors (and INJURY as the response). Note that all predictors are categorical. Show the confusion matrix."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": 209,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Training\n",
|
||||||
|
"Confusion Matrix (Accuracy 0.5291)\n",
|
||||||
|
"\n",
|
||||||
|
" Prediction\n",
|
||||||
|
"Actual 0 1\n",
|
||||||
|
" 0 4197 8195\n",
|
||||||
|
" 1 3724 9193\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"# ii."
|
"# ii.\n",
|
||||||
|
"predictors = ['HOUR_I_R', 'ALIGN_I', 'WRK_ZONE', 'WKDY_I_R', 'INT_HWY',\n",
|
||||||
|
" 'LGTCON_I_R', 'PROFIL_I_R', 'SPD_LIM', 'SUR_COND', \n",
|
||||||
|
" 'TRAF_CON_R', 'TRAF_WAY', 'WEATHER_R']\n",
|
||||||
|
"\n",
|
||||||
|
"X = pd.get_dummies(accidents_df[predictors])\n",
|
||||||
|
"y = accidents_df['Injury']\n",
|
||||||
|
"\n",
|
||||||
|
"X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.40, random_state=1)\n",
|
||||||
|
"\n",
|
||||||
|
"# run naive Bayes\n",
|
||||||
|
"delays_nb = MultinomialNB(alpha=0.01)\n",
|
||||||
|
"delays_nb.fit(X_train, y_train)\n",
|
||||||
|
"\n",
|
||||||
|
"# predict probabilities\n",
|
||||||
|
"predProb_train = delays_nb.predict_proba(X_train)\n",
|
||||||
|
"predProb_valid = delays_nb.predict_proba(X_valid)\n",
|
||||||
|
"\n",
|
||||||
|
"# predict class membership\n",
|
||||||
|
"y_valid_pred = delays_nb.predict(X_valid)\n",
|
||||||
|
"y_train_pred = delays_nb.predict(X_train)\n",
|
||||||
|
"\n",
|
||||||
|
"print(\"Training\")\n",
|
||||||
|
"classificationSummary(y_train, y_train_pred) "
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -627,16 +676,32 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"# iii. \n",
|
"# iii. \n",
|
||||||
"What is the overall error for the validation set?\n"
|
"What is the overall error for the validation set?"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": 210,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Validation\n",
|
||||||
|
"Confusion Matrix (Accuracy 0.5288)\n",
|
||||||
|
"\n",
|
||||||
|
" Prediction\n",
|
||||||
|
"Actual 0 1\n",
|
||||||
|
" 0 2838 5491\n",
|
||||||
|
" 1 2460 6085\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"# iii."
|
"# iii.\n",
|
||||||
|
"print(\"Validation\")\n",
|
||||||
|
"classificationSummary(y_valid, y_valid_pred) "
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -650,11 +715,21 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": 217,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"0.057\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"# iv."
|
"# iv.\n",
|
||||||
|
"pctg_inc = round(100* abs(0.5288 - 0.5291)/(0.5291), 3)\n",
|
||||||
|
"print(pctg_inc)\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -667,12 +742,12 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"attachments": {},
|
||||||
"execution_count": null,
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
"source": [
|
||||||
"# v."
|
"The probability is rounded to 0 due to the extremely low likelihood of sustaining an injury at such low speeds. \n",
|
||||||
|
"The pivot tables display values ranging from E-6 to E-9, which is assumed as 0."
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user