Finalizing Naive Bayes
This commit is contained in:
parent
2710b772ed
commit
de18a51aa2
@ -1 +0,0 @@
|
||||
,noah,NovaArchSys,29.03.2023 17:55,file:///home/noah/.config/libreoffice/4;
|
||||
@ -578,11 +578,12 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 202,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# c."
|
||||
"# c.\n",
|
||||
"trainData, validData = train_test_split(accidents_df, test_size=0.4, random_state=26)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -595,12 +596,23 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# i"
|
||||
"# i. \n",
|
||||
"HOUR_I_R \n",
|
||||
"ALIGN_I \n",
|
||||
"WRK_ZONE \n",
|
||||
"WKDY_I_R \n",
|
||||
"INT_HWY \n",
|
||||
"LGTCON_I_R \n",
|
||||
"PROFIL_I_R \n",
|
||||
"SPD_LIM \n",
|
||||
"SUR_CON \n",
|
||||
"TRAF_CON_R \n",
|
||||
"TRAF_WAY \n",
|
||||
"WEATHER_R"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -609,16 +621,53 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# ii. \n",
|
||||
"Run a naive Bayes classifier on the complete training set with the relevant predictors (and INJURY as the response). Note that all predictors are categorical. Show the confusion matrix.\n"
|
||||
"Run a naive Bayes classifier on the complete training set with the relevant predictors (and INJURY as the response). Note that all predictors are categorical. Show the confusion matrix."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 209,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Training\n",
|
||||
"Confusion Matrix (Accuracy 0.5291)\n",
|
||||
"\n",
|
||||
" Prediction\n",
|
||||
"Actual 0 1\n",
|
||||
" 0 4197 8195\n",
|
||||
" 1 3724 9193\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# ii."
|
||||
"# ii.\n",
|
||||
"predictors = ['HOUR_I_R', 'ALIGN_I', 'WRK_ZONE', 'WKDY_I_R', 'INT_HWY',\n",
|
||||
" 'LGTCON_I_R', 'PROFIL_I_R', 'SPD_LIM', 'SUR_COND', \n",
|
||||
" 'TRAF_CON_R', 'TRAF_WAY', 'WEATHER_R']\n",
|
||||
"\n",
|
||||
"X = pd.get_dummies(accidents_df[predictors])\n",
|
||||
"y = accidents_df['Injury']\n",
|
||||
"\n",
|
||||
"X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.40, random_state=1)\n",
|
||||
"\n",
|
||||
"# run naive Bayes\n",
|
||||
"delays_nb = MultinomialNB(alpha=0.01)\n",
|
||||
"delays_nb.fit(X_train, y_train)\n",
|
||||
"\n",
|
||||
"# predict probabilities\n",
|
||||
"predProb_train = delays_nb.predict_proba(X_train)\n",
|
||||
"predProb_valid = delays_nb.predict_proba(X_valid)\n",
|
||||
"\n",
|
||||
"# predict class membership\n",
|
||||
"y_valid_pred = delays_nb.predict(X_valid)\n",
|
||||
"y_train_pred = delays_nb.predict(X_train)\n",
|
||||
"\n",
|
||||
"print(\"Training\")\n",
|
||||
"classificationSummary(y_train, y_train_pred) "
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -627,16 +676,32 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# iii. \n",
|
||||
"What is the overall error for the validation set?\n"
|
||||
"What is the overall error for the validation set?"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 210,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Validation\n",
|
||||
"Confusion Matrix (Accuracy 0.5288)\n",
|
||||
"\n",
|
||||
" Prediction\n",
|
||||
"Actual 0 1\n",
|
||||
" 0 2838 5491\n",
|
||||
" 1 2460 6085\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# iii."
|
||||
"# iii.\n",
|
||||
"print(\"Validation\")\n",
|
||||
"classificationSummary(y_valid, y_valid_pred) "
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -650,11 +715,21 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 217,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"0.057\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# iv."
|
||||
"# iv.\n",
|
||||
"pctg_inc = round(100* abs(0.5288 - 0.5291)/(0.5291), 3)\n",
|
||||
"print(pctg_inc)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -667,12 +742,12 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# v."
|
||||
"The probability is rounded to 0 due to the extremely low likelihood of sustaining an injury at such low speeds. \n",
|
||||
"The pivot tables display values ranging from E-6 to E-9, which is assumed as 0."
|
||||
]
|
||||
}
|
||||
],
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user