diff --git a/Schrick-Noah_Lecture-Work.ipynb b/Schrick-Noah_Lecture-Work.ipynb index 8ffa337..3ef03f0 100644 --- a/Schrick-Noah_Lecture-Work.ipynb +++ b/Schrick-Noah_Lecture-Work.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -12,12 +12,13 @@ "# Noah L. Schrick - 1492657\n", "\n", "import pandas as pd\n", + "import numpy as np\n", "import matplotlib.pyplot as plt" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -229,7 +230,7 @@ "CAT. MEDV 0 " ] }, - "execution_count": 3, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -253,7 +254,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -570,7 +571,7 @@ "CAT. MEDV -0.44 -0.47 0.79 1.00 " ] }, - "execution_count": 5, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -579,6 +580,153 @@ "# Correlation Matrix\n", "bostonHousing_df.corr().round(2)" ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "RM_bin CHAS\n", + "3 0 25.300000\n", + "4 0 15.407143\n", + "5 0 17.200000\n", + " 1 22.218182\n", + "6 0 21.769170\n", + " 1 25.918750\n", + "7 0 35.964444\n", + " 1 44.066667\n", + "8 0 45.700000\n", + " 1 35.950000\n", + "Name: MEDV, dtype: float64" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Create bins of size 1 for variables\n", + "\n", + "bostonHousing_df['RM_bin'] = pd.cut(bostonHousing_df.RM, range(0, 10), labels=False)\n", + "\n", + "# Compute average of MEDV by (binned) RM and CHAS. \n", + "# Group the data using groupby, then restrict the analysis to MEDV and determine the mean for each group.\n", + "\n", + "bostonHousing_df.groupby(['RM_bin', 'CHAS'])['MEDV'].mean()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CHAS01All
RM_bin
325.300000NaN25.300000
415.407143NaN15.407143
517.20000022.21818217.551592
621.76917025.91875022.015985
735.96444444.06666736.917647
845.70000035.95000044.200000
All22.09384328.44000022.532806
\n", + "
" + ], + "text/plain": [ + "CHAS 0 1 All\n", + "RM_bin \n", + "3 25.300000 NaN 25.300000\n", + "4 15.407143 NaN 15.407143\n", + "5 17.200000 22.218182 17.551592\n", + "6 21.769170 25.918750 22.015985\n", + "7 35.964444 44.066667 36.917647\n", + "8 45.700000 35.950000 44.200000\n", + "All 22.093843 28.440000 22.532806" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# create bins of size 1 for RM\n", + "bostonHousing_df['RM_bin'] = pd.cut(bostonHousing_df.RM, range(0, 10), labels=False)\n", + "\n", + "# use pivot_table() to reshape data and generate pivot table\n", + "pd.pivot_table(bostonHousing_df, values='MEDV', index=['RM_bin'], columns=['CHAS'], aggfunc=np.mean, margins=True)" + ] } ], "metadata": { @@ -587,6 +735,18 @@ "language": "python", "name": "python3" }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.9" + }, "orig_nbformat": 4 }, "nbformat": 4,