diff --git a/Schrick-Noah_Learning-Practice-9.ipynb b/Schrick-Noah_Learning-Practice-9.ipynb index 03a1508..d617e76 100644 --- a/Schrick-Noah_Learning-Practice-9.ipynb +++ b/Schrick-Noah_Learning-Practice-9.ipynb @@ -14,7 +14,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 28, "metadata": {}, "outputs": [], "source": [ @@ -25,7 +25,9 @@ "import seaborn as sns\n", "from sklearn import preprocessing\n", "from sklearn.model_selection import train_test_split\n", - "\n", + "from sklearn.svm import SVC\n", + "from sklearn.model_selection import GridSearchCV\n", + "from sklearn.metrics import classification_report,confusion_matrix\n", "\n", "%matplotlib inline" ] @@ -41,12 +43,13 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "# a\n", "accidents_df = pd.read_csv('accidentsFull.csv')\n", + "accidents_df['Injury'] = (accidents_df['MAX_SEV_IR'] > 0).astype(int)\n", "accidents_df = accidents_df.apply(pd.to_numeric) # convert all columns of DataFrame\n" ] }, @@ -61,21 +64,28 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 18, "metadata": {}, "outputs": [], "source": [ - "# b.\n", - "## Normalize\n", + "# b. and c.\n", "scaler = preprocessing.StandardScaler()\n", "\n", "accident_cols = accidents_df.columns.values.tolist()\n", + "accident_cols.remove('Injury')\n", "\n", - "scaler.fit(accidents_df[accident_cols]) # Note the use of an array of column names\n", + "# split into training and validation\n", + "trainData, validData = train_test_split(accidents_df, test_size=0.40, random_state=20)\n", + "\n", + "scaler.fit(trainData[accident_cols]) # Note the use of an array of column names\n", "\n", "# Transform the full dataset\n", - "accidentsNorm = pd.DataFrame(scaler.transform(accidents_df[accident_cols]), \n", - " columns=accident_cols)" + "accidentNorm = pd.concat([pd.DataFrame(scaler.transform(accidents_df[accident_cols]), \n", + " columns=accident_cols),\n", + " accidents_df[['Injury']]], axis=1)\n", + "\n", + "trainNorm = accidentNorm.iloc[trainData.index]\n", + "validNorm = accidentNorm.iloc[validData.index]" ] }, { @@ -89,12 +99,15 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 21, "metadata": {}, "outputs": [], "source": [ - "# c.\n", - "trainData, validData = train_test_split(accidentsNorm, test_size=0.4, random_state=26)" + "# c. \n", + "train_X = trainNorm[accident_cols]\n", + "train_y = trainNorm['Injury']\n", + "valid_X = validNorm[accident_cols]\n", + "valid_y = validNorm['Injury']" ] }, { @@ -108,11 +121,205 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 29, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Fitting 5 folds for each of 25 candidates, totalling 125 fits\n", + "[CV 1/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.772 total time= 33.9s\n", + "[CV 2/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.762 total time= 46.2s\n", + "[CV 3/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.769 total time= 42.3s\n", + "[CV 4/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.762 total time= 36.2s\n", + "[CV 5/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.761 total time= 34.8s\n", + "[CV 1/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.999 total time= 5.6s\n", + "[CV 2/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.996 total time= 5.8s\n", + "[CV 3/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.998 total time= 5.6s\n", + "[CV 4/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.995 total time= 5.6s\n", + "[CV 5/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.997 total time= 5.7s\n", + "[CV 1/5] END .....C=0.1, gamma=0.01, kernel=rbf;, score=1.000 total time= 1.1s\n", + "[CV 2/5] END .....C=0.1, gamma=0.01, kernel=rbf;, score=1.000 total time= 1.1s\n", + "[CV 3/5] END .....C=0.1, gamma=0.01, kernel=rbf;, score=1.000 total time= 1.2s\n", + "[CV 4/5] END .....C=0.1, gamma=0.01, kernel=rbf;, score=0.999 total time= 1.4s\n", + "[CV 5/5] END .....C=0.1, gamma=0.01, kernel=rbf;, score=1.000 total time= 1.0s\n", + "[CV 1/5] END ....C=0.1, gamma=0.001, kernel=rbf;, score=1.000 total time= 4.4s\n", + "[CV 2/5] END ....C=0.1, gamma=0.001, kernel=rbf;, score=1.000 total time= 4.3s\n", + "[CV 3/5] END ....C=0.1, gamma=0.001, kernel=rbf;, score=1.000 total time= 4.2s\n", + "[CV 4/5] END ....C=0.1, gamma=0.001, kernel=rbf;, score=1.000 total time= 4.3s\n", + "[CV 5/5] END ....C=0.1, gamma=0.001, kernel=rbf;, score=1.000 total time= 4.4s\n", + "[CV 1/5] END ...C=0.1, gamma=0.0001, kernel=rbf;, score=1.000 total time= 26.5s\n", + "[CV 2/5] END ...C=0.1, gamma=0.0001, kernel=rbf;, score=1.000 total time= 25.9s\n", + "[CV 3/5] END ...C=0.1, gamma=0.0001, kernel=rbf;, score=1.000 total time= 27.3s\n", + "[CV 4/5] END ...C=0.1, gamma=0.0001, kernel=rbf;, score=1.000 total time= 25.9s\n", + "[CV 5/5] END ...C=0.1, gamma=0.0001, kernel=rbf;, score=1.000 total time= 26.6s\n", + "[CV 1/5] END ..........C=1, gamma=1, kernel=rbf;, score=0.923 total time= 1.1min\n", + "[CV 2/5] END ..........C=1, gamma=1, kernel=rbf;, score=0.913 total time= 1.3min\n", + "[CV 3/5] END ..........C=1, gamma=1, kernel=rbf;, score=0.917 total time= 1.1min\n", + "[CV 4/5] END ..........C=1, gamma=1, kernel=rbf;, score=0.916 total time= 1.1min\n", + "[CV 5/5] END ..........C=1, gamma=1, kernel=rbf;, score=0.912 total time= 1.1min\n", + "[CV 1/5] END ........C=1, gamma=0.1, kernel=rbf;, score=1.000 total time= 3.4s\n", + "[CV 2/5] END ........C=1, gamma=0.1, kernel=rbf;, score=0.999 total time= 3.3s\n", + "[CV 3/5] END ........C=1, gamma=0.1, kernel=rbf;, score=0.999 total time= 3.4s\n", + "[CV 4/5] END ........C=1, gamma=0.1, kernel=rbf;, score=0.999 total time= 3.3s\n", + "[CV 5/5] END ........C=1, gamma=0.1, kernel=rbf;, score=1.000 total time= 3.4s\n", + "[CV 1/5] END .......C=1, gamma=0.01, kernel=rbf;, score=1.000 total time= 0.3s\n", + "[CV 2/5] END .......C=1, gamma=0.01, kernel=rbf;, score=1.000 total time= 0.4s\n", + "[CV 3/5] END .......C=1, gamma=0.01, kernel=rbf;, score=1.000 total time= 0.4s\n", + "[CV 4/5] END .......C=1, gamma=0.01, kernel=rbf;, score=0.999 total time= 0.4s\n", + "[CV 5/5] END .......C=1, gamma=0.01, kernel=rbf;, score=1.000 total time= 0.4s\n", + "[CV 1/5] END ......C=1, gamma=0.001, kernel=rbf;, score=1.000 total time= 0.6s\n", + "[CV 2/5] END ......C=1, gamma=0.001, kernel=rbf;, score=1.000 total time= 0.6s\n", + "[CV 3/5] END ......C=1, gamma=0.001, kernel=rbf;, score=1.000 total time= 0.6s\n", + "[CV 4/5] END ......C=1, gamma=0.001, kernel=rbf;, score=1.000 total time= 0.6s\n", + "[CV 5/5] END ......C=1, gamma=0.001, kernel=rbf;, score=1.000 total time= 0.7s\n", + "[CV 1/5] END .....C=1, gamma=0.0001, kernel=rbf;, score=1.000 total time= 4.6s\n", + "[CV 2/5] END .....C=1, gamma=0.0001, kernel=rbf;, score=1.000 total time= 4.9s\n", + "[CV 3/5] END .....C=1, gamma=0.0001, kernel=rbf;, score=1.000 total time= 4.4s\n", + "[CV 4/5] END .....C=1, gamma=0.0001, kernel=rbf;, score=1.000 total time= 5.0s\n", + "[CV 5/5] END .....C=1, gamma=0.0001, kernel=rbf;, score=1.000 total time= 4.9s\n", + "[CV 1/5] END .........C=10, gamma=1, kernel=rbf;, score=0.928 total time= 1.1min\n", + "[CV 2/5] END .........C=10, gamma=1, kernel=rbf;, score=0.918 total time= 1.2min\n", + "[CV 3/5] END .........C=10, gamma=1, kernel=rbf;, score=0.923 total time= 1.3min\n", + "[CV 4/5] END .........C=10, gamma=1, kernel=rbf;, score=0.920 total time= 1.0min\n", + "[CV 5/5] END .........C=10, gamma=1, kernel=rbf;, score=0.918 total time= 1.1min\n", + "[CV 1/5] END .......C=10, gamma=0.1, kernel=rbf;, score=1.000 total time= 3.2s\n", + "[CV 2/5] END .......C=10, gamma=0.1, kernel=rbf;, score=0.999 total time= 3.1s\n", + "[CV 3/5] END .......C=10, gamma=0.1, kernel=rbf;, score=0.999 total time= 3.0s\n", + "[CV 4/5] END .......C=10, gamma=0.1, kernel=rbf;, score=1.000 total time= 3.1s\n", + "[CV 5/5] END .......C=10, gamma=0.1, kernel=rbf;, score=1.000 total time= 4.1s\n", + "[CV 1/5] END ......C=10, gamma=0.01, kernel=rbf;, score=1.000 total time= 0.5s\n", + "[CV 2/5] END ......C=10, gamma=0.01, kernel=rbf;, score=1.000 total time= 0.5s\n", + "[CV 3/5] END ......C=10, gamma=0.01, kernel=rbf;, score=1.000 total time= 0.4s\n", + "[CV 4/5] END ......C=10, gamma=0.01, kernel=rbf;, score=1.000 total time= 0.5s\n", + "[CV 5/5] END ......C=10, gamma=0.01, kernel=rbf;, score=1.000 total time= 0.4s\n", + "[CV 1/5] END .....C=10, gamma=0.001, kernel=rbf;, score=1.000 total time= 0.3s\n", + "[CV 2/5] END .....C=10, gamma=0.001, kernel=rbf;, score=1.000 total time= 0.3s\n", + "[CV 3/5] END .....C=10, gamma=0.001, kernel=rbf;, score=1.000 total time= 0.3s\n", + "[CV 4/5] END .....C=10, gamma=0.001, kernel=rbf;, score=1.000 total time= 0.3s\n", + "[CV 5/5] END .....C=10, gamma=0.001, kernel=rbf;, score=1.000 total time= 0.3s\n", + "[CV 1/5] END ....C=10, gamma=0.0001, kernel=rbf;, score=1.000 total time= 1.0s\n", + "[CV 2/5] END ....C=10, gamma=0.0001, kernel=rbf;, score=1.000 total time= 1.0s\n", + "[CV 3/5] END ....C=10, gamma=0.0001, kernel=rbf;, score=1.000 total time= 1.0s\n", + "[CV 4/5] END ....C=10, gamma=0.0001, kernel=rbf;, score=1.000 total time= 0.8s\n", + "[CV 5/5] END ....C=10, gamma=0.0001, kernel=rbf;, score=1.000 total time= 0.9s\n", + "[CV 1/5] END ........C=100, gamma=1, kernel=rbf;, score=0.928 total time= 1.1min\n", + "[CV 2/5] END ........C=100, gamma=1, kernel=rbf;, score=0.918 total time= 1.1min\n", + "[CV 3/5] END ........C=100, gamma=1, kernel=rbf;, score=0.923 total time= 1.1min\n", + "[CV 4/5] END ........C=100, gamma=1, kernel=rbf;, score=0.920 total time= 59.7s\n", + "[CV 5/5] END ........C=100, gamma=1, kernel=rbf;, score=0.918 total time= 1.0min\n", + "[CV 1/5] END ......C=100, gamma=0.1, kernel=rbf;, score=1.000 total time= 3.2s\n", + "[CV 2/5] END ......C=100, gamma=0.1, kernel=rbf;, score=0.999 total time= 3.0s\n", + "[CV 3/5] END ......C=100, gamma=0.1, kernel=rbf;, score=0.999 total time= 3.1s\n", + "[CV 4/5] END ......C=100, gamma=0.1, kernel=rbf;, score=1.000 total time= 3.1s\n", + "[CV 5/5] END ......C=100, gamma=0.1, kernel=rbf;, score=1.000 total time= 3.2s\n", + "[CV 1/5] END .....C=100, gamma=0.01, kernel=rbf;, score=1.000 total time= 0.3s\n", + "[CV 2/5] END .....C=100, gamma=0.01, kernel=rbf;, score=1.000 total time= 0.4s\n", + "[CV 3/5] END .....C=100, gamma=0.01, kernel=rbf;, score=1.000 total time= 0.3s\n", + "[CV 4/5] END .....C=100, gamma=0.01, kernel=rbf;, score=1.000 total time= 0.3s\n", + "[CV 5/5] END .....C=100, gamma=0.01, kernel=rbf;, score=1.000 total time= 0.3s\n", + "[CV 1/5] END ....C=100, gamma=0.001, kernel=rbf;, score=1.000 total time= 0.2s\n", + "[CV 2/5] END ....C=100, gamma=0.001, kernel=rbf;, score=1.000 total time= 0.2s\n", + "[CV 3/5] END ....C=100, gamma=0.001, kernel=rbf;, score=1.000 total time= 0.2s\n", + "[CV 4/5] END ....C=100, gamma=0.001, kernel=rbf;, score=1.000 total time= 0.2s\n", + "[CV 5/5] END ....C=100, gamma=0.001, kernel=rbf;, score=1.000 total time= 0.2s\n", + "[CV 1/5] END ...C=100, gamma=0.0001, kernel=rbf;, score=1.000 total time= 0.3s\n", + "[CV 2/5] END ...C=100, gamma=0.0001, kernel=rbf;, score=1.000 total time= 0.4s\n", + "[CV 3/5] END ...C=100, gamma=0.0001, kernel=rbf;, score=1.000 total time= 0.4s\n", + "[CV 4/5] END ...C=100, gamma=0.0001, kernel=rbf;, score=1.000 total time= 0.4s\n", + "[CV 5/5] END ...C=100, gamma=0.0001, kernel=rbf;, score=1.000 total time= 0.5s\n", + "[CV 1/5] END .......C=1000, gamma=1, kernel=rbf;, score=0.928 total time= 57.9s\n", + "[CV 2/5] END .......C=1000, gamma=1, kernel=rbf;, score=0.918 total time= 1.1min\n", + "[CV 3/5] END .......C=1000, gamma=1, kernel=rbf;, score=0.923 total time= 1.1min\n", + "[CV 4/5] END .......C=1000, gamma=1, kernel=rbf;, score=0.920 total time= 58.9s\n", + "[CV 5/5] END .......C=1000, gamma=1, kernel=rbf;, score=0.918 total time= 59.8s\n", + "[CV 1/5] END .....C=1000, gamma=0.1, kernel=rbf;, score=1.000 total time= 2.8s\n", + "[CV 2/5] END .....C=1000, gamma=0.1, kernel=rbf;, score=0.999 total time= 2.7s\n", + "[CV 3/5] END .....C=1000, gamma=0.1, kernel=rbf;, score=0.999 total time= 2.6s\n", + "[CV 4/5] END .....C=1000, gamma=0.1, kernel=rbf;, score=1.000 total time= 2.7s\n", + "[CV 5/5] END .....C=1000, gamma=0.1, kernel=rbf;, score=1.000 total time= 2.6s\n", + "[CV 1/5] END ....C=1000, gamma=0.01, kernel=rbf;, score=1.000 total time= 0.3s\n", + "[CV 2/5] END ....C=1000, gamma=0.01, kernel=rbf;, score=1.000 total time= 0.3s\n", + "[CV 3/5] END ....C=1000, gamma=0.01, kernel=rbf;, score=1.000 total time= 0.3s\n", + "[CV 4/5] END ....C=1000, gamma=0.01, kernel=rbf;, score=1.000 total time= 0.4s\n", + "[CV 5/5] END ....C=1000, gamma=0.01, kernel=rbf;, score=1.000 total time= 0.4s\n", + "[CV 1/5] END ...C=1000, gamma=0.001, kernel=rbf;, score=1.000 total time= 0.2s\n", + "[CV 2/5] END ...C=1000, gamma=0.001, kernel=rbf;, score=1.000 total time= 0.2s\n", + "[CV 3/5] END ...C=1000, gamma=0.001, kernel=rbf;, score=1.000 total time= 0.3s\n", + "[CV 4/5] END ...C=1000, gamma=0.001, kernel=rbf;, score=1.000 total time= 0.2s\n", + "[CV 5/5] END ...C=1000, gamma=0.001, kernel=rbf;, score=1.000 total time= 0.3s\n", + "[CV 1/5] END ..C=1000, gamma=0.0001, kernel=rbf;, score=1.000 total time= 0.4s\n", + "[CV 2/5] END ..C=1000, gamma=0.0001, kernel=rbf;, score=1.000 total time= 0.4s\n", + "[CV 3/5] END ..C=1000, gamma=0.0001, kernel=rbf;, score=1.000 total time= 0.3s\n", + "[CV 4/5] END ..C=1000, gamma=0.0001, kernel=rbf;, score=1.000 total time= 0.4s\n", + "[CV 5/5] END ..C=1000, gamma=0.0001, kernel=rbf;, score=1.000 total time= 0.6s\n" + ] + }, + { + "data": { + "text/html": [ + "
GridSearchCV(estimator=SVC(),\n",
+ " param_grid={'C': [0.1, 1, 10, 100, 1000],\n",
+ " 'gamma': [1, 0.1, 0.01, 0.001, 0.0001],\n",
+ " 'kernel': ['rbf']},\n",
+ " verbose=3)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. GridSearchCV(estimator=SVC(),\n",
+ " param_grid={'C': [0.1, 1, 10, 100, 1000],\n",
+ " 'gamma': [1, 0.1, 0.01, 0.001, 0.0001],\n",
+ " 'kernel': ['rbf']},\n",
+ " verbose=3)SVC()
SVC()
SVC(C=0.1, gamma=0.001)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
SVC(C=0.1, gamma=0.001)