Data pre-processing work

2023-04-01 20:30:24 -05:00 · 2023-04-01 20:30:24 -05:00 · 3555a14f54
commit 3555a14f54
parent 57d398b6bc
1 changed files with 44 additions and 8 deletions
--- a/Schrick-Noah_Learning-Practice-9.ipynb
+++ b/Schrick-Noah_Learning-Practice-9.ipynb
@ -14,11 +14,20 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
-    "# Imports"
+    "# Imports\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "import seaborn as sns\n",
+    "from sklearn import preprocessing\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "\n",
+    "\n",
+    "%matplotlib inline"
   ]
  },
  {
@ -32,11 +41,13 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
-    "# a"
+    "# a\n",
+    "accidents_df = pd.read_csv('accidentsFull.csv')\n",
+    "accidents_df = accidents_df.apply(pd.to_numeric) # convert all columns of DataFrame\n"
   ]
  },
  {
@ -50,11 +61,21 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
-    "# b."
+    "# b.\n",
+    "## Normalize\n",
+    "scaler = preprocessing.StandardScaler()\n",
+    "\n",
+    "accident_cols = accidents_df.columns.values.tolist()\n",
+    "\n",
+    "scaler.fit(accidents_df[accident_cols])  # Note the use of an array of column names\n",
+    "\n",
+    "# Transform the full dataset\n",
+    "accidentsNorm = pd.DataFrame(scaler.transform(accidents_df[accident_cols]), \n",
+    "                                    columns=accident_cols)"
   ]
  },
  {
@ -72,7 +93,8 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "# c."
+    "# c.\n",
+    "trainData, validData = train_test_split(accidentsNorm, test_size=0.4, random_state=26)"
   ]
  },
  {
@ -149,8 +171,22 @@
  }
 ],
 "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
  "language_info": {
-   "name": "python"
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.10"
  },
  "orig_nbformat": 4
 },