QM-7063-Learning-Practice-9/Schrick-Noah_Learning-Practice-9.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Learning Practice 9 for the University of Tulsa's QM-7063 Data Mining Course\n",
    "# Support Vector Machines\n",
    "# Professor: Dr. Abdulrashid, Spring 2023\n",
    "# Noah L. Schrick - 1492657"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Imports\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "from sklearn import preprocessing\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.svm import SVC\n",
    "from sklearn.model_selection import GridSearchCV\n",
    "from sklearn.metrics import classification_report,confusion_matrix\n",
    "\n",
    "%matplotlib inline"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# a. \n",
    "Numerisize the dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "# a\n",
    "accidents_df = pd.read_csv('accidentsFull.csv')\n",
    "accidents_df['Injury'] = (accidents_df['MAX_SEV_IR'] > 0).astype(int)\n",
    "accidents_df = accidents_df.apply(pd.to_numeric) # convert all columns of DataFrame\n"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# b. \n",
    "Transform the data by either normalizing or standardizing it."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "# b. and c.\n",
    "scaler = preprocessing.StandardScaler()\n",
    "\n",
    "accident_cols = accidents_df.columns.values.tolist()\n",
    "accident_cols.remove('Injury')\n",
    "\n",
    "# split into training and validation\n",
    "trainData, validData = train_test_split(accidents_df, test_size=0.40, random_state=20)\n",
    "\n",
    "scaler.fit(trainData[accident_cols])  # Note the use of an array of column names\n",
    "\n",
    "# Transform the full dataset\n",
    "accidentNorm = pd.concat([pd.DataFrame(scaler.transform(accidents_df[accident_cols]), \n",
    "                                    columns=accident_cols),\n",
    "                        accidents_df[['Injury']]], axis=1)\n",
    "\n",
    "trainNorm = accidentNorm.iloc[trainData.index]\n",
    "validNorm = accidentNorm.iloc[validData.index]"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# c. \n",
    "Use train, test, and split function to split the data into training and testing sets."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "# c. \n",
    "train_X = trainNorm[accident_cols]\n",
    "train_y = trainNorm['Injury']\n",
    "valid_X = validNorm[accident_cols]\n",
    "valid_y = validNorm['Injury']"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# d.\n",
    "Select your preferred kernel type and determine the kernel values by using either grid-search or v-fold cross validation."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fitting 5 folds for each of 25 candidates, totalling 125 fits\n",
      "[CV 1/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.772 total time=  33.9s\n",
      "[CV 2/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.762 total time=  46.2s\n",
      "[CV 3/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.769 total time=  42.3s\n",
      "[CV 4/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.762 total time=  36.2s\n",
      "[CV 5/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.761 total time=  34.8s\n",
      "[CV 1/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.999 total time=   5.6s\n",
      "[CV 2/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.996 total time=   5.8s\n",
      "[CV 3/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.998 total time=   5.6s\n",
      "[CV 4/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.995 total time=   5.6s\n",
      "[CV 5/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.997 total time=   5.7s\n",
      "[CV 1/5] END .....C=0.1, gamma=0.01, kernel=rbf;, score=1.000 total time=   1.1s\n",
      "[CV 2/5] END .....C=0.1, gamma=0.01, kernel=rbf;, score=1.000 total time=   1.1s\n",
      "[CV 3/5] END .....C=0.1, gamma=0.01, kernel=rbf;, score=1.000 total time=   1.2s\n",
      "[CV 4/5] END .....C=0.1, gamma=0.01, kernel=rbf;, score=0.999 total time=   1.4s\n",
      "[CV 5/5] END .....C=0.1, gamma=0.01, kernel=rbf;, score=1.000 total time=   1.0s\n",
      "[CV 1/5] END ....C=0.1, gamma=0.001, kernel=rbf;, score=1.000 total time=   4.4s\n",
      "[CV 2/5] END ....C=0.1, gamma=0.001, kernel=rbf;, score=1.000 total time=   4.3s\n",
      "[CV 3/5] END ....C=0.1, gamma=0.001, kernel=rbf;, score=1.000 total time=   4.2s\n",
      "[CV 4/5] END ....C=0.1, gamma=0.001, kernel=rbf;, score=1.000 total time=   4.3s\n",
      "[CV 5/5] END ....C=0.1, gamma=0.001, kernel=rbf;, score=1.000 total time=   4.4s\n",
      "[CV 1/5] END ...C=0.1, gamma=0.0001, kernel=rbf;, score=1.000 total time=  26.5s\n",
      "[CV 2/5] END ...C=0.1, gamma=0.0001, kernel=rbf;, score=1.000 total time=  25.9s\n",
      "[CV 3/5] END ...C=0.1, gamma=0.0001, kernel=rbf;, score=1.000 total time=  27.3s\n",
      "[CV 4/5] END ...C=0.1, gamma=0.0001, kernel=rbf;, score=1.000 total time=  25.9s\n",
      "[CV 5/5] END ...C=0.1, gamma=0.0001, kernel=rbf;, score=1.000 total time=  26.6s\n",
      "[CV 1/5] END ..........C=1, gamma=1, kernel=rbf;, score=0.923 total time= 1.1min\n",
      "[CV 2/5] END ..........C=1, gamma=1, kernel=rbf;, score=0.913 total time= 1.3min\n",
      "[CV 3/5] END ..........C=1, gamma=1, kernel=rbf;, score=0.917 total time= 1.1min\n",
      "[CV 4/5] END ..........C=1, gamma=1, kernel=rbf;, score=0.916 total time= 1.1min\n",
      "[CV 5/5] END ..........C=1, gamma=1, kernel=rbf;, score=0.912 total time= 1.1min\n",
      "[CV 1/5] END ........C=1, gamma=0.1, kernel=rbf;, score=1.000 total time=   3.4s\n",
      "[CV 2/5] END ........C=1, gamma=0.1, kernel=rbf;, score=0.999 total time=   3.3s\n",
      "[CV 3/5] END ........C=1, gamma=0.1, kernel=rbf;, score=0.999 total time=   3.4s\n",
      "[CV 4/5] END ........C=1, gamma=0.1, kernel=rbf;, score=0.999 total time=   3.3s\n",
      "[CV 5/5] END ........C=1, gamma=0.1, kernel=rbf;, score=1.000 total time=   3.4s\n",
      "[CV 1/5] END .......C=1, gamma=0.01, kernel=rbf;, score=1.000 total time=   0.3s\n",
      "[CV 2/5] END .......C=1, gamma=0.01, kernel=rbf;, score=1.000 total time=   0.4s\n",
      "[CV 3/5] END .......C=1, gamma=0.01, kernel=rbf;, score=1.000 total time=   0.4s\n",
      "[CV 4/5] END .......C=1, gamma=0.01, kernel=rbf;, score=0.999 total time=   0.4s\n",
      "[CV 5/5] END .......C=1, gamma=0.01, kernel=rbf;, score=1.000 total time=   0.4s\n",
      "[CV 1/5] END ......C=1, gamma=0.001, kernel=rbf;, score=1.000 total time=   0.6s\n",
      "[CV 2/5] END ......C=1, gamma=0.001, kernel=rbf;, score=1.000 total time=   0.6s\n",
      "[CV 3/5] END ......C=1, gamma=0.001, kernel=rbf;, score=1.000 total time=   0.6s\n",
      "[CV 4/5] END ......C=1, gamma=0.001, kernel=rbf;, score=1.000 total time=   0.6s\n",
      "[CV 5/5] END ......C=1, gamma=0.001, kernel=rbf;, score=1.000 total time=   0.7s\n",
      "[CV 1/5] END .....C=1, gamma=0.0001, kernel=rbf;, score=1.000 total time=   4.6s\n",
      "[CV 2/5] END .....C=1, gamma=0.0001, kernel=rbf;, score=1.000 total time=   4.9s\n",
      "[CV 3/5] END .....C=1, gamma=0.0001, kernel=rbf;, score=1.000 total time=   4.4s\n",
      "[CV 4/5] END .....C=1, gamma=0.0001, kernel=rbf;, score=1.000 total time=   5.0s\n",
      "[CV 5/5] END .....C=1, gamma=0.0001, kernel=rbf;, score=1.000 total time=   4.9s\n",
      "[CV 1/5] END .........C=10, gamma=1, kernel=rbf;, score=0.928 total time= 1.1min\n",
      "[CV 2/5] END .........C=10, gamma=1, kernel=rbf;, score=0.918 total time= 1.2min\n",
      "[CV 3/5] END .........C=10, gamma=1, kernel=rbf;, score=0.923 total time= 1.3min\n",
      "[CV 4/5] END .........C=10, gamma=1, kernel=rbf;, score=0.920 total time= 1.0min\n",
      "[CV 5/5] END .........C=10, gamma=1, kernel=rbf;, score=0.918 total time= 1.1min\n",
      "[CV 1/5] END .......C=10, gamma=0.1, kernel=rbf;, score=1.000 total time=   3.2s\n",
      "[CV 2/5] END .......C=10, gamma=0.1, kernel=rbf;, score=0.999 total time=   3.1s\n",
      "[CV 3/5] END .......C=10, gamma=0.1, kernel=rbf;, score=0.999 total time=   3.0s\n",
      "[CV 4/5] END .......C=10, gamma=0.1, kernel=rbf;, score=1.000 total time=   3.1s\n",
      "[CV 5/5] END .......C=10, gamma=0.1, kernel=rbf;, score=1.000 total time=   4.1s\n",
      "[CV 1/5] END ......C=10, gamma=0.01, kernel=rbf;, score=1.000 total time=   0.5s\n",
      "[CV 2/5] END ......C=10, gamma=0.01, kernel=rbf;, score=1.000 total time=   0.5s\n",
      "[CV 3/5] END ......C=10, gamma=0.01, kernel=rbf;, score=1.000 total time=   0.4s\n",
      "[CV 4/5] END ......C=10, gamma=0.01, kernel=rbf;, score=1.000 total time=   0.5s\n",
      "[CV 5/5] END ......C=10, gamma=0.01, kernel=rbf;, score=1.000 total time=   0.4s\n",
      "[CV 1/5] END .....C=10, gamma=0.001, kernel=rbf;, score=1.000 total time=   0.3s\n",
      "[CV 2/5] END .....C=10, gamma=0.001, kernel=rbf;, score=1.000 total time=   0.3s\n",
      "[CV 3/5] END .....C=10, gamma=0.001, kernel=rbf;, score=1.000 total time=   0.3s\n",
      "[CV 4/5] END .....C=10, gamma=0.001, kernel=rbf;, score=1.000 total time=   0.3s\n",
      "[CV 5/5] END .....C=10, gamma=0.001, kernel=rbf;, score=1.000 total time=   0.3s\n",
      "[CV 1/5] END ....C=10, gamma=0.0001, kernel=rbf;, score=1.000 total time=   1.0s\n",
      "[CV 2/5] END ....C=10, gamma=0.0001, kernel=rbf;, score=1.000 total time=   1.0s\n",
      "[CV 3/5] END ....C=10, gamma=0.0001, kernel=rbf;, score=1.000 total time=   1.0s\n",
      "[CV 4/5] END ....C=10, gamma=0.0001, kernel=rbf;, score=1.000 total time=   0.8s\n",
      "[CV 5/5] END ....C=10, gamma=0.0001, kernel=rbf;, score=1.000 total time=   0.9s\n",
      "[CV 1/5] END ........C=100, gamma=1, kernel=rbf;, score=0.928 total time= 1.1min\n",
      "[CV 2/5] END ........C=100, gamma=1, kernel=rbf;, score=0.918 total time= 1.1min\n",
      "[CV 3/5] END ........C=100, gamma=1, kernel=rbf;, score=0.923 total time= 1.1min\n",
      "[CV 4/5] END ........C=100, gamma=1, kernel=rbf;, score=0.920 total time=  59.7s\n",
      "[CV 5/5] END ........C=100, gamma=1, kernel=rbf;, score=0.918 total time= 1.0min\n",
      "[CV 1/5] END ......C=100, gamma=0.1, kernel=rbf;, score=1.000 total time=   3.2s\n",
      "[CV 2/5] END ......C=100, gamma=0.1, kernel=rbf;, score=0.999 total time=   3.0s\n",
      "[CV 3/5] END ......C=100, gamma=0.1, kernel=rbf;, score=0.999 total time=   3.1s\n",
      "[CV 4/5] END ......C=100, gamma=0.1, kernel=rbf;, score=1.000 total time=   3.1s\n",
      "[CV 5/5] END ......C=100, gamma=0.1, kernel=rbf;, score=1.000 total time=   3.2s\n",
      "[CV 1/5] END .....C=100, gamma=0.01, kernel=rbf;, score=1.000 total time=   0.3s\n",
      "[CV 2/5] END .....C=100, gamma=0.01, kernel=rbf;, score=1.000 total time=   0.4s\n",
      "[CV 3/5] END .....C=100, gamma=0.01, kernel=rbf;, score=1.000 total time=   0.3s\n",
      "[CV 4/5] END .....C=100, gamma=0.01, kernel=rbf;, score=1.000 total time=   0.3s\n",
      "[CV 5/5] END .....C=100, gamma=0.01, kernel=rbf;, score=1.000 total time=   0.3s\n",
      "[CV 1/5] END ....C=100, gamma=0.001, kernel=rbf;, score=1.000 total time=   0.2s\n",
      "[CV 2/5] END ....C=100, gamma=0.001, kernel=rbf;, score=1.000 total time=   0.2s\n",
      "[CV 3/5] END ....C=100, gamma=0.001, kernel=rbf;, score=1.000 total time=   0.2s\n",
      "[CV 4/5] END ....C=100, gamma=0.001, kernel=rbf;, score=1.000 total time=   0.2s\n",
      "[CV 5/5] END ....C=100, gamma=0.001, kernel=rbf;, score=1.000 total time=   0.2s\n",
      "[CV 1/5] END ...C=100, gamma=0.0001, kernel=rbf;, score=1.000 total time=   0.3s\n",
      "[CV 2/5] END ...C=100, gamma=0.0001, kernel=rbf;, score=1.000 total time=   0.4s\n",
      "[CV 3/5] END ...C=100, gamma=0.0001, kernel=rbf;, score=1.000 total time=   0.4s\n",
      "[CV 4/5] END ...C=100, gamma=0.0001, kernel=rbf;, score=1.000 total time=   0.4s\n",
      "[CV 5/5] END ...C=100, gamma=0.0001, kernel=rbf;, score=1.000 total time=   0.5s\n",
      "[CV 1/5] END .......C=1000, gamma=1, kernel=rbf;, score=0.928 total time=  57.9s\n",
      "[CV 2/5] END .......C=1000, gamma=1, kernel=rbf;, score=0.918 total time= 1.1min\n",
      "[CV 3/5] END .......C=1000, gamma=1, kernel=rbf;, score=0.923 total time= 1.1min\n",
      "[CV 4/5] END .......C=1000, gamma=1, kernel=rbf;, score=0.920 total time=  58.9s\n",
      "[CV 5/5] END .......C=1000, gamma=1, kernel=rbf;, score=0.918 total time=  59.8s\n",
      "[CV 1/5] END .....C=1000, gamma=0.1, kernel=rbf;, score=1.000 total time=   2.8s\n",
      "[CV 2/5] END .....C=1000, gamma=0.1, kernel=rbf;, score=0.999 total time=   2.7s\n",
      "[CV 3/5] END .....C=1000, gamma=0.1, kernel=rbf;, score=0.999 total time=   2.6s\n",
      "[CV 4/5] END .....C=1000, gamma=0.1, kernel=rbf;, score=1.000 total time=   2.7s\n",
      "[CV 5/5] END .....C=1000, gamma=0.1, kernel=rbf;, score=1.000 total time=   2.6s\n",
      "[CV 1/5] END ....C=1000, gamma=0.01, kernel=rbf;, score=1.000 total time=   0.3s\n",
      "[CV 2/5] END ....C=1000, gamma=0.01, kernel=rbf;, score=1.000 total time=   0.3s\n",
      "[CV 3/5] END ....C=1000, gamma=0.01, kernel=rbf;, score=1.000 total time=   0.3s\n",
      "[CV 4/5] END ....C=1000, gamma=0.01, kernel=rbf;, score=1.000 total time=   0.4s\n",
      "[CV 5/5] END ....C=1000, gamma=0.01, kernel=rbf;, score=1.000 total time=   0.4s\n",
      "[CV 1/5] END ...C=1000, gamma=0.001, kernel=rbf;, score=1.000 total time=   0.2s\n",
      "[CV 2/5] END ...C=1000, gamma=0.001, kernel=rbf;, score=1.000 total time=   0.2s\n",
      "[CV 3/5] END ...C=1000, gamma=0.001, kernel=rbf;, score=1.000 total time=   0.3s\n",
      "[CV 4/5] END ...C=1000, gamma=0.001, kernel=rbf;, score=1.000 total time=   0.2s\n",
      "[CV 5/5] END ...C=1000, gamma=0.001, kernel=rbf;, score=1.000 total time=   0.3s\n",
      "[CV 1/5] END ..C=1000, gamma=0.0001, kernel=rbf;, score=1.000 total time=   0.4s\n",
      "[CV 2/5] END ..C=1000, gamma=0.0001, kernel=rbf;, score=1.000 total time=   0.4s\n",
      "[CV 3/5] END ..C=1000, gamma=0.0001, kernel=rbf;, score=1.000 total time=   0.3s\n",
      "[CV 4/5] END ..C=1000, gamma=0.0001, kernel=rbf;, score=1.000 total time=   0.4s\n",
      "[CV 5/5] END ..C=1000, gamma=0.0001, kernel=rbf;, score=1.000 total time=   0.6s\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<style>#sk-container-id-3 {color: black;background-color: white;}#sk-container-id-3 pre{padding: 0;}#sk-container-id-3 div.sk-toggleable {background-color: white;}#sk-container-id-3 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-3 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-3 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-3 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-3 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-3 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-3 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-3 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-3 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-3 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-3 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-3 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-3 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-3 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-3 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-3 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-3 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-3 div.sk-item {position: relative;z-index: 1;}#sk-container-id-3 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-3 div.sk-item::before, #sk-container-id-3 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-3 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-3 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-3 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-3 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-3 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-3 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-3 div.sk-label-container {text-align: center;}#sk-container-id-3 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-3 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-3\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>GridSearchCV(estimator=SVC(),\n",
       "             param_grid={&#x27;C&#x27;: [0.1, 1, 10, 100, 1000],\n",
       "                         &#x27;gamma&#x27;: [1, 0.1, 0.01, 0.001, 0.0001],\n",
       "                         &#x27;kernel&#x27;: [&#x27;rbf&#x27;]},\n",
       "             verbose=3)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-3\" type=\"checkbox\" ><label for=\"sk-estimator-id-3\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">GridSearchCV</label><div class=\"sk-toggleable__content\"><pre>GridSearchCV(estimator=SVC(),\n",
       "             param_grid={&#x27;C&#x27;: [0.1, 1, 10, 100, 1000],\n",
       "                         &#x27;gamma&#x27;: [1, 0.1, 0.01, 0.001, 0.0001],\n",
       "                         &#x27;kernel&#x27;: [&#x27;rbf&#x27;]},\n",
       "             verbose=3)</pre></div></div></div><div class=\"sk-parallel\"><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-4\" type=\"checkbox\" ><label for=\"sk-estimator-id-4\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">estimator: SVC</label><div class=\"sk-toggleable__content\"><pre>SVC()</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-5\" type=\"checkbox\" ><label for=\"sk-estimator-id-5\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">SVC</label><div class=\"sk-toggleable__content\"><pre>SVC()</pre></div></div></div></div></div></div></div></div></div></div>"
      ],
      "text/plain": [
       "GridSearchCV(estimator=SVC(),\n",
       "             param_grid={'C': [0.1, 1, 10, 100, 1000],\n",
       "                         'gamma': [1, 0.1, 0.01, 0.001, 0.0001],\n",
       "                         'kernel': ['rbf']},\n",
       "             verbose=3)"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# d.\n",
    "param_grid = {'C': [0.1,1, 10, 100, 1000], 'gamma': [1,0.1,0.01,0.001,0.0001], 'kernel': ['rbf']} \n",
    "grid = GridSearchCV(SVC(),param_grid,refit=True,verbose=3)\n",
    "grid.fit(train_X,train_y)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<style>#sk-container-id-5 {color: black;background-color: white;}#sk-container-id-5 pre{padding: 0;}#sk-container-id-5 div.sk-toggleable {background-color: white;}#sk-container-id-5 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-5 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-5 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-5 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-5 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-5 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-5 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-5 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-5 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-5 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-5 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-5 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-5 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-5 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-5 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-5 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-5 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-5 div.sk-item {position: relative;z-index: 1;}#sk-container-id-5 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-5 div.sk-item::before, #sk-container-id-5 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-5 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-5 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-5 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-5 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-5 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-5 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-5 div.sk-label-container {text-align: center;}#sk-container-id-5 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-5 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-5\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>SVC(C=0.1, gamma=0.001)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-7\" type=\"checkbox\" checked><label for=\"sk-estimator-id-7\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">SVC</label><div class=\"sk-toggleable__content\"><pre>SVC(C=0.1, gamma=0.001)</pre></div></div></div></div></div>"
      ],
      "text/plain": [
       "SVC(C=0.1, gamma=0.001)"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Best options:\n",
    "grid.best_params_\n",
    "print()\n",
    "grid.best_estimator_"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# e.\n",
    "Run a SVM classifier using identified kernel values found in (d)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [],
   "source": [
    "# e.\n",
    "grid_predictions = grid.predict(valid_X)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# f.\n",
    "Obtain the confusion matrix."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[8270    0]\n",
      " [   0 8604]]\n"
     ]
    }
   ],
   "source": [
    "# f. \n",
    "print(confusion_matrix(valid_y,grid_predictions))"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# g.\n",
    "What is the overall error for the validation set?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "              precision    recall  f1-score   support\n",
      "\n",
      "           0       1.00      1.00      1.00      8270\n",
      "           1       1.00      1.00      1.00      8604\n",
      "\n",
      "    accuracy                           1.00     16874\n",
      "   macro avg       1.00      1.00      1.00     16874\n",
      "weighted avg       1.00      1.00      1.00     16874\n",
      "\n"
     ]
    }
   ],
   "source": [
    "# g. \n",
    "print(classification_report(valid_y,grid_predictions))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.10"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}