diff --git a/Schrick-Noah_Lecture-Work.ipynb b/Schrick-Noah_Lecture-Work.ipynb index 8ffa337..3ef03f0 100644 --- a/Schrick-Noah_Lecture-Work.ipynb +++ b/Schrick-Noah_Lecture-Work.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -12,12 +12,13 @@ "# Noah L. Schrick - 1492657\n", "\n", "import pandas as pd\n", + "import numpy as np\n", "import matplotlib.pyplot as plt" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -229,7 +230,7 @@ "CAT. MEDV 0 " ] }, - "execution_count": 3, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -253,7 +254,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -570,7 +571,7 @@ "CAT. MEDV -0.44 -0.47 0.79 1.00 " ] }, - "execution_count": 5, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -579,6 +580,153 @@ "# Correlation Matrix\n", "bostonHousing_df.corr().round(2)" ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "RM_bin CHAS\n", + "3 0 25.300000\n", + "4 0 15.407143\n", + "5 0 17.200000\n", + " 1 22.218182\n", + "6 0 21.769170\n", + " 1 25.918750\n", + "7 0 35.964444\n", + " 1 44.066667\n", + "8 0 45.700000\n", + " 1 35.950000\n", + "Name: MEDV, dtype: float64" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Create bins of size 1 for variables\n", + "\n", + "bostonHousing_df['RM_bin'] = pd.cut(bostonHousing_df.RM, range(0, 10), labels=False)\n", + "\n", + "# Compute average of MEDV by (binned) RM and CHAS. \n", + "# Group the data using groupby, then restrict the analysis to MEDV and determine the mean for each group.\n", + "\n", + "bostonHousing_df.groupby(['RM_bin', 'CHAS'])['MEDV'].mean()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
| CHAS | \n", + "0 | \n", + "1 | \n", + "All | \n", + "
|---|---|---|---|
| RM_bin | \n", + "\n", + " | \n", + " | \n", + " |
| 3 | \n", + "25.300000 | \n", + "NaN | \n", + "25.300000 | \n", + "
| 4 | \n", + "15.407143 | \n", + "NaN | \n", + "15.407143 | \n", + "
| 5 | \n", + "17.200000 | \n", + "22.218182 | \n", + "17.551592 | \n", + "
| 6 | \n", + "21.769170 | \n", + "25.918750 | \n", + "22.015985 | \n", + "
| 7 | \n", + "35.964444 | \n", + "44.066667 | \n", + "36.917647 | \n", + "
| 8 | \n", + "45.700000 | \n", + "35.950000 | \n", + "44.200000 | \n", + "
| All | \n", + "22.093843 | \n", + "28.440000 | \n", + "22.532806 | \n", + "