{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Learning Practice 9 for the University of Tulsa's QM-7063 Data Mining Course\n",
    "# Support Vector Machines\n",
    "# Professor: Dr. Abdulrashid, Spring 2023\n",
    "# Noah L. Schrick - 1492657"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Imports\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "from sklearn import preprocessing\n",
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "\n",
    "%matplotlib inline"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# a. \n",
    "Numerisize the dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "# a\n",
    "accidents_df = pd.read_csv('accidentsFull.csv')\n",
    "accidents_df = accidents_df.apply(pd.to_numeric) # convert all columns of DataFrame\n"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# b. \n",
    "Transform the data by either normalizing or standardizing it."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "# b.\n",
    "## Normalize\n",
    "scaler = preprocessing.StandardScaler()\n",
    "\n",
    "accident_cols = accidents_df.columns.values.tolist()\n",
    "\n",
    "scaler.fit(accidents_df[accident_cols])  # Note the use of an array of column names\n",
    "\n",
    "# Transform the full dataset\n",
    "accidentsNorm = pd.DataFrame(scaler.transform(accidents_df[accident_cols]), \n",
    "                                    columns=accident_cols)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# c. \n",
    "Use train, test, and split function to split the data into training and testing sets."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# c.\n",
    "trainData, validData = train_test_split(accidentsNorm, test_size=0.4, random_state=26)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# d.\n",
    "Select your preferred kernel type and determine the kernel values by using either grid-search or v-fold cross validation."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# d."
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# e.\n",
    "Run a SVM classifier using identified kernel values found in (d)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# e."
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# f.\n",
    "Obtain the confusion matrix."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# f. "
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# g.\n",
    "What is the overall error for the validation set?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# g. "
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.10"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}