From 3555a14f54e0799952716a869d1633dfb442ad16 Mon Sep 17 00:00:00 2001 From: noah Date: Sat, 1 Apr 2023 20:30:24 -0500 Subject: [PATCH] Data pre-processing work --- Schrick-Noah_Learning-Practice-9.ipynb | 52 ++++++++++++++++++++++---- 1 file changed, 44 insertions(+), 8 deletions(-) diff --git a/Schrick-Noah_Learning-Practice-9.ipynb b/Schrick-Noah_Learning-Practice-9.ipynb index 6f64da5..03a1508 100644 --- a/Schrick-Noah_Learning-Practice-9.ipynb +++ b/Schrick-Noah_Learning-Practice-9.ipynb @@ -14,11 +14,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ - "# Imports" + "# Imports\n", + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "from sklearn import preprocessing\n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "\n", + "%matplotlib inline" ] }, { @@ -32,11 +41,13 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ - "# a" + "# a\n", + "accidents_df = pd.read_csv('accidentsFull.csv')\n", + "accidents_df = accidents_df.apply(pd.to_numeric) # convert all columns of DataFrame\n" ] }, { @@ -50,11 +61,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ - "# b." + "# b.\n", + "## Normalize\n", + "scaler = preprocessing.StandardScaler()\n", + "\n", + "accident_cols = accidents_df.columns.values.tolist()\n", + "\n", + "scaler.fit(accidents_df[accident_cols]) # Note the use of an array of column names\n", + "\n", + "# Transform the full dataset\n", + "accidentsNorm = pd.DataFrame(scaler.transform(accidents_df[accident_cols]), \n", + " columns=accident_cols)" ] }, { @@ -72,7 +93,8 @@ "metadata": {}, "outputs": [], "source": [ - "# c." + "# c.\n", + "trainData, validData = train_test_split(accidentsNorm, test_size=0.4, random_state=26)" ] }, { @@ -149,8 +171,22 @@ } ], "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, "language_info": { - "name": "python" + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.10" }, "orig_nbformat": 4 },