431 lines
29 KiB
Plaintext
431 lines
29 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Learning Practice 9 for the University of Tulsa's QM-7063 Data Mining Course\n",
|
|
"# Support Vector Machines\n",
|
|
"# Professor: Dr. Abdulrashid, Spring 2023\n",
|
|
"# Noah L. Schrick - 1492657"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 28,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Imports\n",
|
|
"import pandas as pd\n",
|
|
"import numpy as np\n",
|
|
"import matplotlib.pyplot as plt\n",
|
|
"import seaborn as sns\n",
|
|
"from sklearn import preprocessing\n",
|
|
"from sklearn.model_selection import train_test_split\n",
|
|
"from sklearn.svm import SVC\n",
|
|
"from sklearn.model_selection import GridSearchCV\n",
|
|
"from sklearn.metrics import classification_report,confusion_matrix\n",
|
|
"\n",
|
|
"%matplotlib inline"
|
|
]
|
|
},
|
|
{
|
|
"attachments": {},
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# a. \n",
|
|
"Numerisize the dataset"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 17,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# a\n",
|
|
"accidents_df = pd.read_csv('accidentsFull.csv')\n",
|
|
"accidents_df['Injury'] = (accidents_df['MAX_SEV_IR'] > 0).astype(int)\n",
|
|
"accidents_df = accidents_df.apply(pd.to_numeric) # convert all columns of DataFrame\n"
|
|
]
|
|
},
|
|
{
|
|
"attachments": {},
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# b. \n",
|
|
"Transform the data by either normalizing or standardizing it."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 18,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# b. and c.\n",
|
|
"scaler = preprocessing.StandardScaler()\n",
|
|
"\n",
|
|
"accident_cols = accidents_df.columns.values.tolist()\n",
|
|
"accident_cols.remove('Injury')\n",
|
|
"\n",
|
|
"# split into training and validation\n",
|
|
"trainData, validData = train_test_split(accidents_df, test_size=0.40, random_state=20)\n",
|
|
"\n",
|
|
"scaler.fit(trainData[accident_cols]) # Note the use of an array of column names\n",
|
|
"\n",
|
|
"# Transform the full dataset\n",
|
|
"accidentNorm = pd.concat([pd.DataFrame(scaler.transform(accidents_df[accident_cols]), \n",
|
|
" columns=accident_cols),\n",
|
|
" accidents_df[['Injury']]], axis=1)\n",
|
|
"\n",
|
|
"trainNorm = accidentNorm.iloc[trainData.index]\n",
|
|
"validNorm = accidentNorm.iloc[validData.index]"
|
|
]
|
|
},
|
|
{
|
|
"attachments": {},
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# c. \n",
|
|
"Use train, test, and split function to split the data into training and testing sets."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 21,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# c. \n",
|
|
"train_X = trainNorm[accident_cols]\n",
|
|
"train_y = trainNorm['Injury']\n",
|
|
"valid_X = validNorm[accident_cols]\n",
|
|
"valid_y = validNorm['Injury']"
|
|
]
|
|
},
|
|
{
|
|
"attachments": {},
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# d.\n",
|
|
"Select your preferred kernel type and determine the kernel values by using either grid-search or v-fold cross validation."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 29,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Fitting 5 folds for each of 25 candidates, totalling 125 fits\n",
|
|
"[CV 1/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.772 total time= 33.9s\n",
|
|
"[CV 2/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.762 total time= 46.2s\n",
|
|
"[CV 3/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.769 total time= 42.3s\n",
|
|
"[CV 4/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.762 total time= 36.2s\n",
|
|
"[CV 5/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.761 total time= 34.8s\n",
|
|
"[CV 1/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.999 total time= 5.6s\n",
|
|
"[CV 2/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.996 total time= 5.8s\n",
|
|
"[CV 3/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.998 total time= 5.6s\n",
|
|
"[CV 4/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.995 total time= 5.6s\n",
|
|
"[CV 5/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.997 total time= 5.7s\n",
|
|
"[CV 1/5] END .....C=0.1, gamma=0.01, kernel=rbf;, score=1.000 total time= 1.1s\n",
|
|
"[CV 2/5] END .....C=0.1, gamma=0.01, kernel=rbf;, score=1.000 total time= 1.1s\n",
|
|
"[CV 3/5] END .....C=0.1, gamma=0.01, kernel=rbf;, score=1.000 total time= 1.2s\n",
|
|
"[CV 4/5] END .....C=0.1, gamma=0.01, kernel=rbf;, score=0.999 total time= 1.4s\n",
|
|
"[CV 5/5] END .....C=0.1, gamma=0.01, kernel=rbf;, score=1.000 total time= 1.0s\n",
|
|
"[CV 1/5] END ....C=0.1, gamma=0.001, kernel=rbf;, score=1.000 total time= 4.4s\n",
|
|
"[CV 2/5] END ....C=0.1, gamma=0.001, kernel=rbf;, score=1.000 total time= 4.3s\n",
|
|
"[CV 3/5] END ....C=0.1, gamma=0.001, kernel=rbf;, score=1.000 total time= 4.2s\n",
|
|
"[CV 4/5] END ....C=0.1, gamma=0.001, kernel=rbf;, score=1.000 total time= 4.3s\n",
|
|
"[CV 5/5] END ....C=0.1, gamma=0.001, kernel=rbf;, score=1.000 total time= 4.4s\n",
|
|
"[CV 1/5] END ...C=0.1, gamma=0.0001, kernel=rbf;, score=1.000 total time= 26.5s\n",
|
|
"[CV 2/5] END ...C=0.1, gamma=0.0001, kernel=rbf;, score=1.000 total time= 25.9s\n",
|
|
"[CV 3/5] END ...C=0.1, gamma=0.0001, kernel=rbf;, score=1.000 total time= 27.3s\n",
|
|
"[CV 4/5] END ...C=0.1, gamma=0.0001, kernel=rbf;, score=1.000 total time= 25.9s\n",
|
|
"[CV 5/5] END ...C=0.1, gamma=0.0001, kernel=rbf;, score=1.000 total time= 26.6s\n",
|
|
"[CV 1/5] END ..........C=1, gamma=1, kernel=rbf;, score=0.923 total time= 1.1min\n",
|
|
"[CV 2/5] END ..........C=1, gamma=1, kernel=rbf;, score=0.913 total time= 1.3min\n",
|
|
"[CV 3/5] END ..........C=1, gamma=1, kernel=rbf;, score=0.917 total time= 1.1min\n",
|
|
"[CV 4/5] END ..........C=1, gamma=1, kernel=rbf;, score=0.916 total time= 1.1min\n",
|
|
"[CV 5/5] END ..........C=1, gamma=1, kernel=rbf;, score=0.912 total time= 1.1min\n",
|
|
"[CV 1/5] END ........C=1, gamma=0.1, kernel=rbf;, score=1.000 total time= 3.4s\n",
|
|
"[CV 2/5] END ........C=1, gamma=0.1, kernel=rbf;, score=0.999 total time= 3.3s\n",
|
|
"[CV 3/5] END ........C=1, gamma=0.1, kernel=rbf;, score=0.999 total time= 3.4s\n",
|
|
"[CV 4/5] END ........C=1, gamma=0.1, kernel=rbf;, score=0.999 total time= 3.3s\n",
|
|
"[CV 5/5] END ........C=1, gamma=0.1, kernel=rbf;, score=1.000 total time= 3.4s\n",
|
|
"[CV 1/5] END .......C=1, gamma=0.01, kernel=rbf;, score=1.000 total time= 0.3s\n",
|
|
"[CV 2/5] END .......C=1, gamma=0.01, kernel=rbf;, score=1.000 total time= 0.4s\n",
|
|
"[CV 3/5] END .......C=1, gamma=0.01, kernel=rbf;, score=1.000 total time= 0.4s\n",
|
|
"[CV 4/5] END .......C=1, gamma=0.01, kernel=rbf;, score=0.999 total time= 0.4s\n",
|
|
"[CV 5/5] END .......C=1, gamma=0.01, kernel=rbf;, score=1.000 total time= 0.4s\n",
|
|
"[CV 1/5] END ......C=1, gamma=0.001, kernel=rbf;, score=1.000 total time= 0.6s\n",
|
|
"[CV 2/5] END ......C=1, gamma=0.001, kernel=rbf;, score=1.000 total time= 0.6s\n",
|
|
"[CV 3/5] END ......C=1, gamma=0.001, kernel=rbf;, score=1.000 total time= 0.6s\n",
|
|
"[CV 4/5] END ......C=1, gamma=0.001, kernel=rbf;, score=1.000 total time= 0.6s\n",
|
|
"[CV 5/5] END ......C=1, gamma=0.001, kernel=rbf;, score=1.000 total time= 0.7s\n",
|
|
"[CV 1/5] END .....C=1, gamma=0.0001, kernel=rbf;, score=1.000 total time= 4.6s\n",
|
|
"[CV 2/5] END .....C=1, gamma=0.0001, kernel=rbf;, score=1.000 total time= 4.9s\n",
|
|
"[CV 3/5] END .....C=1, gamma=0.0001, kernel=rbf;, score=1.000 total time= 4.4s\n",
|
|
"[CV 4/5] END .....C=1, gamma=0.0001, kernel=rbf;, score=1.000 total time= 5.0s\n",
|
|
"[CV 5/5] END .....C=1, gamma=0.0001, kernel=rbf;, score=1.000 total time= 4.9s\n",
|
|
"[CV 1/5] END .........C=10, gamma=1, kernel=rbf;, score=0.928 total time= 1.1min\n",
|
|
"[CV 2/5] END .........C=10, gamma=1, kernel=rbf;, score=0.918 total time= 1.2min\n",
|
|
"[CV 3/5] END .........C=10, gamma=1, kernel=rbf;, score=0.923 total time= 1.3min\n",
|
|
"[CV 4/5] END .........C=10, gamma=1, kernel=rbf;, score=0.920 total time= 1.0min\n",
|
|
"[CV 5/5] END .........C=10, gamma=1, kernel=rbf;, score=0.918 total time= 1.1min\n",
|
|
"[CV 1/5] END .......C=10, gamma=0.1, kernel=rbf;, score=1.000 total time= 3.2s\n",
|
|
"[CV 2/5] END .......C=10, gamma=0.1, kernel=rbf;, score=0.999 total time= 3.1s\n",
|
|
"[CV 3/5] END .......C=10, gamma=0.1, kernel=rbf;, score=0.999 total time= 3.0s\n",
|
|
"[CV 4/5] END .......C=10, gamma=0.1, kernel=rbf;, score=1.000 total time= 3.1s\n",
|
|
"[CV 5/5] END .......C=10, gamma=0.1, kernel=rbf;, score=1.000 total time= 4.1s\n",
|
|
"[CV 1/5] END ......C=10, gamma=0.01, kernel=rbf;, score=1.000 total time= 0.5s\n",
|
|
"[CV 2/5] END ......C=10, gamma=0.01, kernel=rbf;, score=1.000 total time= 0.5s\n",
|
|
"[CV 3/5] END ......C=10, gamma=0.01, kernel=rbf;, score=1.000 total time= 0.4s\n",
|
|
"[CV 4/5] END ......C=10, gamma=0.01, kernel=rbf;, score=1.000 total time= 0.5s\n",
|
|
"[CV 5/5] END ......C=10, gamma=0.01, kernel=rbf;, score=1.000 total time= 0.4s\n",
|
|
"[CV 1/5] END .....C=10, gamma=0.001, kernel=rbf;, score=1.000 total time= 0.3s\n",
|
|
"[CV 2/5] END .....C=10, gamma=0.001, kernel=rbf;, score=1.000 total time= 0.3s\n",
|
|
"[CV 3/5] END .....C=10, gamma=0.001, kernel=rbf;, score=1.000 total time= 0.3s\n",
|
|
"[CV 4/5] END .....C=10, gamma=0.001, kernel=rbf;, score=1.000 total time= 0.3s\n",
|
|
"[CV 5/5] END .....C=10, gamma=0.001, kernel=rbf;, score=1.000 total time= 0.3s\n",
|
|
"[CV 1/5] END ....C=10, gamma=0.0001, kernel=rbf;, score=1.000 total time= 1.0s\n",
|
|
"[CV 2/5] END ....C=10, gamma=0.0001, kernel=rbf;, score=1.000 total time= 1.0s\n",
|
|
"[CV 3/5] END ....C=10, gamma=0.0001, kernel=rbf;, score=1.000 total time= 1.0s\n",
|
|
"[CV 4/5] END ....C=10, gamma=0.0001, kernel=rbf;, score=1.000 total time= 0.8s\n",
|
|
"[CV 5/5] END ....C=10, gamma=0.0001, kernel=rbf;, score=1.000 total time= 0.9s\n",
|
|
"[CV 1/5] END ........C=100, gamma=1, kernel=rbf;, score=0.928 total time= 1.1min\n",
|
|
"[CV 2/5] END ........C=100, gamma=1, kernel=rbf;, score=0.918 total time= 1.1min\n",
|
|
"[CV 3/5] END ........C=100, gamma=1, kernel=rbf;, score=0.923 total time= 1.1min\n",
|
|
"[CV 4/5] END ........C=100, gamma=1, kernel=rbf;, score=0.920 total time= 59.7s\n",
|
|
"[CV 5/5] END ........C=100, gamma=1, kernel=rbf;, score=0.918 total time= 1.0min\n",
|
|
"[CV 1/5] END ......C=100, gamma=0.1, kernel=rbf;, score=1.000 total time= 3.2s\n",
|
|
"[CV 2/5] END ......C=100, gamma=0.1, kernel=rbf;, score=0.999 total time= 3.0s\n",
|
|
"[CV 3/5] END ......C=100, gamma=0.1, kernel=rbf;, score=0.999 total time= 3.1s\n",
|
|
"[CV 4/5] END ......C=100, gamma=0.1, kernel=rbf;, score=1.000 total time= 3.1s\n",
|
|
"[CV 5/5] END ......C=100, gamma=0.1, kernel=rbf;, score=1.000 total time= 3.2s\n",
|
|
"[CV 1/5] END .....C=100, gamma=0.01, kernel=rbf;, score=1.000 total time= 0.3s\n",
|
|
"[CV 2/5] END .....C=100, gamma=0.01, kernel=rbf;, score=1.000 total time= 0.4s\n",
|
|
"[CV 3/5] END .....C=100, gamma=0.01, kernel=rbf;, score=1.000 total time= 0.3s\n",
|
|
"[CV 4/5] END .....C=100, gamma=0.01, kernel=rbf;, score=1.000 total time= 0.3s\n",
|
|
"[CV 5/5] END .....C=100, gamma=0.01, kernel=rbf;, score=1.000 total time= 0.3s\n",
|
|
"[CV 1/5] END ....C=100, gamma=0.001, kernel=rbf;, score=1.000 total time= 0.2s\n",
|
|
"[CV 2/5] END ....C=100, gamma=0.001, kernel=rbf;, score=1.000 total time= 0.2s\n",
|
|
"[CV 3/5] END ....C=100, gamma=0.001, kernel=rbf;, score=1.000 total time= 0.2s\n",
|
|
"[CV 4/5] END ....C=100, gamma=0.001, kernel=rbf;, score=1.000 total time= 0.2s\n",
|
|
"[CV 5/5] END ....C=100, gamma=0.001, kernel=rbf;, score=1.000 total time= 0.2s\n",
|
|
"[CV 1/5] END ...C=100, gamma=0.0001, kernel=rbf;, score=1.000 total time= 0.3s\n",
|
|
"[CV 2/5] END ...C=100, gamma=0.0001, kernel=rbf;, score=1.000 total time= 0.4s\n",
|
|
"[CV 3/5] END ...C=100, gamma=0.0001, kernel=rbf;, score=1.000 total time= 0.4s\n",
|
|
"[CV 4/5] END ...C=100, gamma=0.0001, kernel=rbf;, score=1.000 total time= 0.4s\n",
|
|
"[CV 5/5] END ...C=100, gamma=0.0001, kernel=rbf;, score=1.000 total time= 0.5s\n",
|
|
"[CV 1/5] END .......C=1000, gamma=1, kernel=rbf;, score=0.928 total time= 57.9s\n",
|
|
"[CV 2/5] END .......C=1000, gamma=1, kernel=rbf;, score=0.918 total time= 1.1min\n",
|
|
"[CV 3/5] END .......C=1000, gamma=1, kernel=rbf;, score=0.923 total time= 1.1min\n",
|
|
"[CV 4/5] END .......C=1000, gamma=1, kernel=rbf;, score=0.920 total time= 58.9s\n",
|
|
"[CV 5/5] END .......C=1000, gamma=1, kernel=rbf;, score=0.918 total time= 59.8s\n",
|
|
"[CV 1/5] END .....C=1000, gamma=0.1, kernel=rbf;, score=1.000 total time= 2.8s\n",
|
|
"[CV 2/5] END .....C=1000, gamma=0.1, kernel=rbf;, score=0.999 total time= 2.7s\n",
|
|
"[CV 3/5] END .....C=1000, gamma=0.1, kernel=rbf;, score=0.999 total time= 2.6s\n",
|
|
"[CV 4/5] END .....C=1000, gamma=0.1, kernel=rbf;, score=1.000 total time= 2.7s\n",
|
|
"[CV 5/5] END .....C=1000, gamma=0.1, kernel=rbf;, score=1.000 total time= 2.6s\n",
|
|
"[CV 1/5] END ....C=1000, gamma=0.01, kernel=rbf;, score=1.000 total time= 0.3s\n",
|
|
"[CV 2/5] END ....C=1000, gamma=0.01, kernel=rbf;, score=1.000 total time= 0.3s\n",
|
|
"[CV 3/5] END ....C=1000, gamma=0.01, kernel=rbf;, score=1.000 total time= 0.3s\n",
|
|
"[CV 4/5] END ....C=1000, gamma=0.01, kernel=rbf;, score=1.000 total time= 0.4s\n",
|
|
"[CV 5/5] END ....C=1000, gamma=0.01, kernel=rbf;, score=1.000 total time= 0.4s\n",
|
|
"[CV 1/5] END ...C=1000, gamma=0.001, kernel=rbf;, score=1.000 total time= 0.2s\n",
|
|
"[CV 2/5] END ...C=1000, gamma=0.001, kernel=rbf;, score=1.000 total time= 0.2s\n",
|
|
"[CV 3/5] END ...C=1000, gamma=0.001, kernel=rbf;, score=1.000 total time= 0.3s\n",
|
|
"[CV 4/5] END ...C=1000, gamma=0.001, kernel=rbf;, score=1.000 total time= 0.2s\n",
|
|
"[CV 5/5] END ...C=1000, gamma=0.001, kernel=rbf;, score=1.000 total time= 0.3s\n",
|
|
"[CV 1/5] END ..C=1000, gamma=0.0001, kernel=rbf;, score=1.000 total time= 0.4s\n",
|
|
"[CV 2/5] END ..C=1000, gamma=0.0001, kernel=rbf;, score=1.000 total time= 0.4s\n",
|
|
"[CV 3/5] END ..C=1000, gamma=0.0001, kernel=rbf;, score=1.000 total time= 0.3s\n",
|
|
"[CV 4/5] END ..C=1000, gamma=0.0001, kernel=rbf;, score=1.000 total time= 0.4s\n",
|
|
"[CV 5/5] END ..C=1000, gamma=0.0001, kernel=rbf;, score=1.000 total time= 0.6s\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<style>#sk-container-id-3 {color: black;background-color: white;}#sk-container-id-3 pre{padding: 0;}#sk-container-id-3 div.sk-toggleable {background-color: white;}#sk-container-id-3 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-3 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-3 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-3 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-3 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-3 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-3 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-3 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-3 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-3 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-3 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-3 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-3 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-3 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-3 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-3 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-3 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-3 div.sk-item {position: relative;z-index: 1;}#sk-container-id-3 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-3 div.sk-item::before, #sk-container-id-3 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-3 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-3 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-3 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-3 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-3 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-3 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-3 div.sk-label-container {text-align: center;}#sk-container-id-3 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-3 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-3\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>GridSearchCV(estimator=SVC(),\n",
|
|
" param_grid={'C': [0.1, 1, 10, 100, 1000],\n",
|
|
" 'gamma': [1, 0.1, 0.01, 0.001, 0.0001],\n",
|
|
" 'kernel': ['rbf']},\n",
|
|
" verbose=3)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-3\" type=\"checkbox\" ><label for=\"sk-estimator-id-3\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">GridSearchCV</label><div class=\"sk-toggleable__content\"><pre>GridSearchCV(estimator=SVC(),\n",
|
|
" param_grid={'C': [0.1, 1, 10, 100, 1000],\n",
|
|
" 'gamma': [1, 0.1, 0.01, 0.001, 0.0001],\n",
|
|
" 'kernel': ['rbf']},\n",
|
|
" verbose=3)</pre></div></div></div><div class=\"sk-parallel\"><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-4\" type=\"checkbox\" ><label for=\"sk-estimator-id-4\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">estimator: SVC</label><div class=\"sk-toggleable__content\"><pre>SVC()</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-5\" type=\"checkbox\" ><label for=\"sk-estimator-id-5\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">SVC</label><div class=\"sk-toggleable__content\"><pre>SVC()</pre></div></div></div></div></div></div></div></div></div></div>"
|
|
],
|
|
"text/plain": [
|
|
"GridSearchCV(estimator=SVC(),\n",
|
|
" param_grid={'C': [0.1, 1, 10, 100, 1000],\n",
|
|
" 'gamma': [1, 0.1, 0.01, 0.001, 0.0001],\n",
|
|
" 'kernel': ['rbf']},\n",
|
|
" verbose=3)"
|
|
]
|
|
},
|
|
"execution_count": 29,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"# d.\n",
|
|
"param_grid = {'C': [0.1,1, 10, 100, 1000], 'gamma': [1,0.1,0.01,0.001,0.0001], 'kernel': ['rbf']} \n",
|
|
"grid = GridSearchCV(SVC(),param_grid,refit=True,verbose=3)\n",
|
|
"grid.fit(train_X,train_y)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 38,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<style>#sk-container-id-5 {color: black;background-color: white;}#sk-container-id-5 pre{padding: 0;}#sk-container-id-5 div.sk-toggleable {background-color: white;}#sk-container-id-5 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-5 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-5 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-5 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-5 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-5 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-5 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-5 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-5 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-5 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-5 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-5 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-5 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-5 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-5 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-5 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-5 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-5 div.sk-item {position: relative;z-index: 1;}#sk-container-id-5 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-5 div.sk-item::before, #sk-container-id-5 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-5 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-5 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-5 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-5 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-5 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-5 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-5 div.sk-label-container {text-align: center;}#sk-container-id-5 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-5 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-5\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>SVC(C=0.1, gamma=0.001)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-7\" type=\"checkbox\" checked><label for=\"sk-estimator-id-7\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">SVC</label><div class=\"sk-toggleable__content\"><pre>SVC(C=0.1, gamma=0.001)</pre></div></div></div></div></div>"
|
|
],
|
|
"text/plain": [
|
|
"SVC(C=0.1, gamma=0.001)"
|
|
]
|
|
},
|
|
"execution_count": 38,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"# Best options:\n",
|
|
"grid.best_params_\n",
|
|
"print()\n",
|
|
"grid.best_estimator_"
|
|
]
|
|
},
|
|
{
|
|
"attachments": {},
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# e.\n",
|
|
"Run a SVM classifier using identified kernel values found in (d)."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 42,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# e.\n",
|
|
"grid_predictions = grid.predict(valid_X)"
|
|
]
|
|
},
|
|
{
|
|
"attachments": {},
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# f.\n",
|
|
"Obtain the confusion matrix."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 43,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"[[8270 0]\n",
|
|
" [ 0 8604]]\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# f. \n",
|
|
"print(confusion_matrix(valid_y,grid_predictions))"
|
|
]
|
|
},
|
|
{
|
|
"attachments": {},
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# g.\n",
|
|
"What is the overall error for the validation set?"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 44,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
" precision recall f1-score support\n",
|
|
"\n",
|
|
" 0 1.00 1.00 1.00 8270\n",
|
|
" 1 1.00 1.00 1.00 8604\n",
|
|
"\n",
|
|
" accuracy 1.00 16874\n",
|
|
" macro avg 1.00 1.00 1.00 16874\n",
|
|
"weighted avg 1.00 1.00 1.00 16874\n",
|
|
"\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# g. \n",
|
|
"print(classification_report(valid_y,grid_predictions))"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.10.10"
|
|
},
|
|
"orig_nbformat": 4
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|