Group by and pivot for bins of data
This commit is contained in:
parent
b7b8769e63
commit
d612119625
@ -2,7 +2,7 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -12,12 +12,13 @@
|
||||
"# Noah L. Schrick - 1492657\n",
|
||||
"\n",
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"import matplotlib.pyplot as plt"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -229,7 +230,7 @@
|
||||
"CAT. MEDV 0 "
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -253,7 +254,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -570,7 +571,7 @@
|
||||
"CAT. MEDV -0.44 -0.47 0.79 1.00 "
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -579,6 +580,153 @@
|
||||
"# Correlation Matrix\n",
|
||||
"bostonHousing_df.corr().round(2)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"RM_bin CHAS\n",
|
||||
"3 0 25.300000\n",
|
||||
"4 0 15.407143\n",
|
||||
"5 0 17.200000\n",
|
||||
" 1 22.218182\n",
|
||||
"6 0 21.769170\n",
|
||||
" 1 25.918750\n",
|
||||
"7 0 35.964444\n",
|
||||
" 1 44.066667\n",
|
||||
"8 0 45.700000\n",
|
||||
" 1 35.950000\n",
|
||||
"Name: MEDV, dtype: float64"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Create bins of size 1 for variables\n",
|
||||
"\n",
|
||||
"bostonHousing_df['RM_bin'] = pd.cut(bostonHousing_df.RM, range(0, 10), labels=False)\n",
|
||||
"\n",
|
||||
"# Compute average of MEDV by (binned) RM and CHAS. \n",
|
||||
"# Group the data using groupby, then restrict the analysis to MEDV and determine the mean for each group.\n",
|
||||
"\n",
|
||||
"bostonHousing_df.groupby(['RM_bin', 'CHAS'])['MEDV'].mean()\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th>CHAS</th>\n",
|
||||
" <th>0</th>\n",
|
||||
" <th>1</th>\n",
|
||||
" <th>All</th>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>RM_bin</th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>25.300000</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>25.300000</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>15.407143</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>15.407143</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>5</th>\n",
|
||||
" <td>17.200000</td>\n",
|
||||
" <td>22.218182</td>\n",
|
||||
" <td>17.551592</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>6</th>\n",
|
||||
" <td>21.769170</td>\n",
|
||||
" <td>25.918750</td>\n",
|
||||
" <td>22.015985</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>7</th>\n",
|
||||
" <td>35.964444</td>\n",
|
||||
" <td>44.066667</td>\n",
|
||||
" <td>36.917647</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>8</th>\n",
|
||||
" <td>45.700000</td>\n",
|
||||
" <td>35.950000</td>\n",
|
||||
" <td>44.200000</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>All</th>\n",
|
||||
" <td>22.093843</td>\n",
|
||||
" <td>28.440000</td>\n",
|
||||
" <td>22.532806</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
"CHAS 0 1 All\n",
|
||||
"RM_bin \n",
|
||||
"3 25.300000 NaN 25.300000\n",
|
||||
"4 15.407143 NaN 15.407143\n",
|
||||
"5 17.200000 22.218182 17.551592\n",
|
||||
"6 21.769170 25.918750 22.015985\n",
|
||||
"7 35.964444 44.066667 36.917647\n",
|
||||
"8 45.700000 35.950000 44.200000\n",
|
||||
"All 22.093843 28.440000 22.532806"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# create bins of size 1 for RM\n",
|
||||
"bostonHousing_df['RM_bin'] = pd.cut(bostonHousing_df.RM, range(0, 10), labels=False)\n",
|
||||
"\n",
|
||||
"# use pivot_table() to reshape data and generate pivot table\n",
|
||||
"pd.pivot_table(bostonHousing_df, values='MEDV', index=['RM_bin'], columns=['CHAS'], aggfunc=np.mean, margins=True)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@ -587,6 +735,18 @@
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.9"
|
||||
},
|
||||
"orig_nbformat": 4
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user