{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Learning Practice 9 for the University of Tulsa's QM-7063 Data Mining Course\n", "# Support Vector Machines\n", "# Professor: Dr. Abdulrashid, Spring 2023\n", "# Noah L. Schrick - 1492657" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [], "source": [ "# Imports\n", "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "from sklearn import preprocessing\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.svm import SVC\n", "from sklearn.model_selection import GridSearchCV\n", "from sklearn.metrics import classification_report,confusion_matrix\n", "\n", "%matplotlib inline" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "# a. \n", "Numerisize the dataset" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "# a\n", "accidents_df = pd.read_csv('accidentsFull.csv')\n", "accidents_df['Injury'] = (accidents_df['MAX_SEV_IR'] > 0).astype(int)\n", "accidents_df = accidents_df.apply(pd.to_numeric) # convert all columns of DataFrame\n" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "# b. \n", "Transform the data by either normalizing or standardizing it." ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "# b. and c.\n", "scaler = preprocessing.StandardScaler()\n", "\n", "accident_cols = accidents_df.columns.values.tolist()\n", "accident_cols.remove('Injury')\n", "\n", "# split into training and validation\n", "trainData, validData = train_test_split(accidents_df, test_size=0.40, random_state=20)\n", "\n", "scaler.fit(trainData[accident_cols]) # Note the use of an array of column names\n", "\n", "# Transform the full dataset\n", "accidentNorm = pd.concat([pd.DataFrame(scaler.transform(accidents_df[accident_cols]), \n", " columns=accident_cols),\n", " accidents_df[['Injury']]], axis=1)\n", "\n", "trainNorm = accidentNorm.iloc[trainData.index]\n", "validNorm = accidentNorm.iloc[validData.index]" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "# c. \n", "Use train, test, and split function to split the data into training and testing sets." ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "# c. \n", "train_X = trainNorm[accident_cols]\n", "train_y = trainNorm['Injury']\n", "valid_X = validNorm[accident_cols]\n", "valid_y = validNorm['Injury']" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "# d.\n", "Select your preferred kernel type and determine the kernel values by using either grid-search or v-fold cross validation." ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Fitting 5 folds for each of 25 candidates, totalling 125 fits\n", "[CV 1/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.772 total time= 33.9s\n", "[CV 2/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.762 total time= 46.2s\n", "[CV 3/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.769 total time= 42.3s\n", "[CV 4/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.762 total time= 36.2s\n", "[CV 5/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.761 total time= 34.8s\n", "[CV 1/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.999 total time= 5.6s\n", "[CV 2/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.996 total time= 5.8s\n", "[CV 3/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.998 total time= 5.6s\n", "[CV 4/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.995 total time= 5.6s\n", "[CV 5/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.997 total time= 5.7s\n", "[CV 1/5] END .....C=0.1, gamma=0.01, kernel=rbf;, score=1.000 total time= 1.1s\n", "[CV 2/5] END .....C=0.1, gamma=0.01, kernel=rbf;, score=1.000 total time= 1.1s\n", "[CV 3/5] END .....C=0.1, gamma=0.01, kernel=rbf;, score=1.000 total time= 1.2s\n", "[CV 4/5] END .....C=0.1, gamma=0.01, kernel=rbf;, score=0.999 total time= 1.4s\n", "[CV 5/5] END .....C=0.1, gamma=0.01, kernel=rbf;, score=1.000 total time= 1.0s\n", "[CV 1/5] END ....C=0.1, gamma=0.001, kernel=rbf;, score=1.000 total time= 4.4s\n", "[CV 2/5] END ....C=0.1, gamma=0.001, kernel=rbf;, score=1.000 total time= 4.3s\n", "[CV 3/5] END ....C=0.1, gamma=0.001, kernel=rbf;, score=1.000 total time= 4.2s\n", "[CV 4/5] END ....C=0.1, gamma=0.001, kernel=rbf;, score=1.000 total time= 4.3s\n", "[CV 5/5] END ....C=0.1, gamma=0.001, kernel=rbf;, score=1.000 total time= 4.4s\n", "[CV 1/5] END ...C=0.1, gamma=0.0001, kernel=rbf;, score=1.000 total time= 26.5s\n", "[CV 2/5] END ...C=0.1, gamma=0.0001, kernel=rbf;, score=1.000 total time= 25.9s\n", "[CV 3/5] END ...C=0.1, gamma=0.0001, kernel=rbf;, score=1.000 total time= 27.3s\n", "[CV 4/5] END ...C=0.1, gamma=0.0001, kernel=rbf;, score=1.000 total time= 25.9s\n", "[CV 5/5] END ...C=0.1, gamma=0.0001, kernel=rbf;, score=1.000 total time= 26.6s\n", "[CV 1/5] END ..........C=1, gamma=1, kernel=rbf;, score=0.923 total time= 1.1min\n", "[CV 2/5] END ..........C=1, gamma=1, kernel=rbf;, score=0.913 total time= 1.3min\n", "[CV 3/5] END ..........C=1, gamma=1, kernel=rbf;, score=0.917 total time= 1.1min\n", "[CV 4/5] END ..........C=1, gamma=1, kernel=rbf;, score=0.916 total time= 1.1min\n", "[CV 5/5] END ..........C=1, gamma=1, kernel=rbf;, score=0.912 total time= 1.1min\n", "[CV 1/5] END ........C=1, gamma=0.1, kernel=rbf;, score=1.000 total time= 3.4s\n", "[CV 2/5] END ........C=1, gamma=0.1, kernel=rbf;, score=0.999 total time= 3.3s\n", "[CV 3/5] END ........C=1, gamma=0.1, kernel=rbf;, score=0.999 total time= 3.4s\n", "[CV 4/5] END ........C=1, gamma=0.1, kernel=rbf;, score=0.999 total time= 3.3s\n", "[CV 5/5] END ........C=1, gamma=0.1, kernel=rbf;, score=1.000 total time= 3.4s\n", "[CV 1/5] END .......C=1, gamma=0.01, kernel=rbf;, score=1.000 total time= 0.3s\n", "[CV 2/5] END .......C=1, gamma=0.01, kernel=rbf;, score=1.000 total time= 0.4s\n", "[CV 3/5] END .......C=1, gamma=0.01, kernel=rbf;, score=1.000 total time= 0.4s\n", "[CV 4/5] END .......C=1, gamma=0.01, kernel=rbf;, score=0.999 total time= 0.4s\n", "[CV 5/5] END .......C=1, gamma=0.01, kernel=rbf;, score=1.000 total time= 0.4s\n", "[CV 1/5] END ......C=1, gamma=0.001, kernel=rbf;, score=1.000 total time= 0.6s\n", "[CV 2/5] END ......C=1, gamma=0.001, kernel=rbf;, score=1.000 total time= 0.6s\n", "[CV 3/5] END ......C=1, gamma=0.001, kernel=rbf;, score=1.000 total time= 0.6s\n", "[CV 4/5] END ......C=1, gamma=0.001, kernel=rbf;, score=1.000 total time= 0.6s\n", "[CV 5/5] END ......C=1, gamma=0.001, kernel=rbf;, score=1.000 total time= 0.7s\n", "[CV 1/5] END .....C=1, gamma=0.0001, kernel=rbf;, score=1.000 total time= 4.6s\n", "[CV 2/5] END .....C=1, gamma=0.0001, kernel=rbf;, score=1.000 total time= 4.9s\n", "[CV 3/5] END .....C=1, gamma=0.0001, kernel=rbf;, score=1.000 total time= 4.4s\n", "[CV 4/5] END .....C=1, gamma=0.0001, kernel=rbf;, score=1.000 total time= 5.0s\n", "[CV 5/5] END .....C=1, gamma=0.0001, kernel=rbf;, score=1.000 total time= 4.9s\n", "[CV 1/5] END .........C=10, gamma=1, kernel=rbf;, score=0.928 total time= 1.1min\n", "[CV 2/5] END .........C=10, gamma=1, kernel=rbf;, score=0.918 total time= 1.2min\n", "[CV 3/5] END .........C=10, gamma=1, kernel=rbf;, score=0.923 total time= 1.3min\n", "[CV 4/5] END .........C=10, gamma=1, kernel=rbf;, score=0.920 total time= 1.0min\n", "[CV 5/5] END .........C=10, gamma=1, kernel=rbf;, score=0.918 total time= 1.1min\n", "[CV 1/5] END .......C=10, gamma=0.1, kernel=rbf;, score=1.000 total time= 3.2s\n", "[CV 2/5] END .......C=10, gamma=0.1, kernel=rbf;, score=0.999 total time= 3.1s\n", "[CV 3/5] END .......C=10, gamma=0.1, kernel=rbf;, score=0.999 total time= 3.0s\n", "[CV 4/5] END .......C=10, gamma=0.1, kernel=rbf;, score=1.000 total time= 3.1s\n", "[CV 5/5] END .......C=10, gamma=0.1, kernel=rbf;, score=1.000 total time= 4.1s\n", "[CV 1/5] END ......C=10, gamma=0.01, kernel=rbf;, score=1.000 total time= 0.5s\n", "[CV 2/5] END ......C=10, gamma=0.01, kernel=rbf;, score=1.000 total time= 0.5s\n", "[CV 3/5] END ......C=10, gamma=0.01, kernel=rbf;, score=1.000 total time= 0.4s\n", "[CV 4/5] END ......C=10, gamma=0.01, kernel=rbf;, score=1.000 total time= 0.5s\n", "[CV 5/5] END ......C=10, gamma=0.01, kernel=rbf;, score=1.000 total time= 0.4s\n", "[CV 1/5] END .....C=10, gamma=0.001, kernel=rbf;, score=1.000 total time= 0.3s\n", "[CV 2/5] END .....C=10, gamma=0.001, kernel=rbf;, score=1.000 total time= 0.3s\n", "[CV 3/5] END .....C=10, gamma=0.001, kernel=rbf;, score=1.000 total time= 0.3s\n", "[CV 4/5] END .....C=10, gamma=0.001, kernel=rbf;, score=1.000 total time= 0.3s\n", "[CV 5/5] END .....C=10, gamma=0.001, kernel=rbf;, score=1.000 total time= 0.3s\n", "[CV 1/5] END ....C=10, gamma=0.0001, kernel=rbf;, score=1.000 total time= 1.0s\n", "[CV 2/5] END ....C=10, gamma=0.0001, kernel=rbf;, score=1.000 total time= 1.0s\n", "[CV 3/5] END ....C=10, gamma=0.0001, kernel=rbf;, score=1.000 total time= 1.0s\n", "[CV 4/5] END ....C=10, gamma=0.0001, kernel=rbf;, score=1.000 total time= 0.8s\n", "[CV 5/5] END ....C=10, gamma=0.0001, kernel=rbf;, score=1.000 total time= 0.9s\n", "[CV 1/5] END ........C=100, gamma=1, kernel=rbf;, score=0.928 total time= 1.1min\n", "[CV 2/5] END ........C=100, gamma=1, kernel=rbf;, score=0.918 total time= 1.1min\n", "[CV 3/5] END ........C=100, gamma=1, kernel=rbf;, score=0.923 total time= 1.1min\n", "[CV 4/5] END ........C=100, gamma=1, kernel=rbf;, score=0.920 total time= 59.7s\n", "[CV 5/5] END ........C=100, gamma=1, kernel=rbf;, score=0.918 total time= 1.0min\n", "[CV 1/5] END ......C=100, gamma=0.1, kernel=rbf;, score=1.000 total time= 3.2s\n", "[CV 2/5] END ......C=100, gamma=0.1, kernel=rbf;, score=0.999 total time= 3.0s\n", "[CV 3/5] END ......C=100, gamma=0.1, kernel=rbf;, score=0.999 total time= 3.1s\n", "[CV 4/5] END ......C=100, gamma=0.1, kernel=rbf;, score=1.000 total time= 3.1s\n", "[CV 5/5] END ......C=100, gamma=0.1, kernel=rbf;, score=1.000 total time= 3.2s\n", "[CV 1/5] END .....C=100, gamma=0.01, kernel=rbf;, score=1.000 total time= 0.3s\n", "[CV 2/5] END .....C=100, gamma=0.01, kernel=rbf;, score=1.000 total time= 0.4s\n", "[CV 3/5] END .....C=100, gamma=0.01, kernel=rbf;, score=1.000 total time= 0.3s\n", "[CV 4/5] END .....C=100, gamma=0.01, kernel=rbf;, score=1.000 total time= 0.3s\n", "[CV 5/5] END .....C=100, gamma=0.01, kernel=rbf;, score=1.000 total time= 0.3s\n", "[CV 1/5] END ....C=100, gamma=0.001, kernel=rbf;, score=1.000 total time= 0.2s\n", "[CV 2/5] END ....C=100, gamma=0.001, kernel=rbf;, score=1.000 total time= 0.2s\n", "[CV 3/5] END ....C=100, gamma=0.001, kernel=rbf;, score=1.000 total time= 0.2s\n", "[CV 4/5] END ....C=100, gamma=0.001, kernel=rbf;, score=1.000 total time= 0.2s\n", "[CV 5/5] END ....C=100, gamma=0.001, kernel=rbf;, score=1.000 total time= 0.2s\n", "[CV 1/5] END ...C=100, gamma=0.0001, kernel=rbf;, score=1.000 total time= 0.3s\n", "[CV 2/5] END ...C=100, gamma=0.0001, kernel=rbf;, score=1.000 total time= 0.4s\n", "[CV 3/5] END ...C=100, gamma=0.0001, kernel=rbf;, score=1.000 total time= 0.4s\n", "[CV 4/5] END ...C=100, gamma=0.0001, kernel=rbf;, score=1.000 total time= 0.4s\n", "[CV 5/5] END ...C=100, gamma=0.0001, kernel=rbf;, score=1.000 total time= 0.5s\n", "[CV 1/5] END .......C=1000, gamma=1, kernel=rbf;, score=0.928 total time= 57.9s\n", "[CV 2/5] END .......C=1000, gamma=1, kernel=rbf;, score=0.918 total time= 1.1min\n", "[CV 3/5] END .......C=1000, gamma=1, kernel=rbf;, score=0.923 total time= 1.1min\n", "[CV 4/5] END .......C=1000, gamma=1, kernel=rbf;, score=0.920 total time= 58.9s\n", "[CV 5/5] END .......C=1000, gamma=1, kernel=rbf;, score=0.918 total time= 59.8s\n", "[CV 1/5] END .....C=1000, gamma=0.1, kernel=rbf;, score=1.000 total time= 2.8s\n", "[CV 2/5] END .....C=1000, gamma=0.1, kernel=rbf;, score=0.999 total time= 2.7s\n", "[CV 3/5] END .....C=1000, gamma=0.1, kernel=rbf;, score=0.999 total time= 2.6s\n", "[CV 4/5] END .....C=1000, gamma=0.1, kernel=rbf;, score=1.000 total time= 2.7s\n", "[CV 5/5] END .....C=1000, gamma=0.1, kernel=rbf;, score=1.000 total time= 2.6s\n", "[CV 1/5] END ....C=1000, gamma=0.01, kernel=rbf;, score=1.000 total time= 0.3s\n", "[CV 2/5] END ....C=1000, gamma=0.01, kernel=rbf;, score=1.000 total time= 0.3s\n", "[CV 3/5] END ....C=1000, gamma=0.01, kernel=rbf;, score=1.000 total time= 0.3s\n", "[CV 4/5] END ....C=1000, gamma=0.01, kernel=rbf;, score=1.000 total time= 0.4s\n", "[CV 5/5] END ....C=1000, gamma=0.01, kernel=rbf;, score=1.000 total time= 0.4s\n", "[CV 1/5] END ...C=1000, gamma=0.001, kernel=rbf;, score=1.000 total time= 0.2s\n", "[CV 2/5] END ...C=1000, gamma=0.001, kernel=rbf;, score=1.000 total time= 0.2s\n", "[CV 3/5] END ...C=1000, gamma=0.001, kernel=rbf;, score=1.000 total time= 0.3s\n", "[CV 4/5] END ...C=1000, gamma=0.001, kernel=rbf;, score=1.000 total time= 0.2s\n", "[CV 5/5] END ...C=1000, gamma=0.001, kernel=rbf;, score=1.000 total time= 0.3s\n", "[CV 1/5] END ..C=1000, gamma=0.0001, kernel=rbf;, score=1.000 total time= 0.4s\n", "[CV 2/5] END ..C=1000, gamma=0.0001, kernel=rbf;, score=1.000 total time= 0.4s\n", "[CV 3/5] END ..C=1000, gamma=0.0001, kernel=rbf;, score=1.000 total time= 0.3s\n", "[CV 4/5] END ..C=1000, gamma=0.0001, kernel=rbf;, score=1.000 total time= 0.4s\n", "[CV 5/5] END ..C=1000, gamma=0.0001, kernel=rbf;, score=1.000 total time= 0.6s\n" ] }, { "data": { "text/html": [ "
GridSearchCV(estimator=SVC(),\n",
" param_grid={'C': [0.1, 1, 10, 100, 1000],\n",
" 'gamma': [1, 0.1, 0.01, 0.001, 0.0001],\n",
" 'kernel': ['rbf']},\n",
" verbose=3)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. GridSearchCV(estimator=SVC(),\n",
" param_grid={'C': [0.1, 1, 10, 100, 1000],\n",
" 'gamma': [1, 0.1, 0.01, 0.001, 0.0001],\n",
" 'kernel': ['rbf']},\n",
" verbose=3)SVC()
SVC()
SVC(C=0.1, gamma=0.001)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
SVC(C=0.1, gamma=0.001)