{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# Learning Practice 2 for the University of Tulsa's QM-7063 Data Mining Course\n", "# Dimension Reduction\n", "# Professor: Dr. Abdulrashid, Spring 2023\n", "# Noah L. Schrick - 1492657\n", "\n", "import pandas as pd\n", "import matplotlib.pyplot as plt" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
meansdminmaxmedianlengthmiss.val
CRIM3.6135248.6015450.0063288.97620.256515060
ZN11.36363623.3224530.00000100.00000.000005060
INDUS11.1367796.8603530.4600027.74009.690005060
CHAS0.0691700.2539940.000001.00000.000005060
NOX0.5546950.1158780.385000.87100.538005060
RM6.2846340.7026173.561008.78006.208505060
AGE68.57490128.1488612.90000100.000077.500005060
DIS3.7950432.1057101.1296012.12653.207455060
RAD9.5494078.7072591.0000024.00005.000005060
TAX408.237154168.537116187.00000711.0000330.000005060
PTRATIO18.4555342.16494612.6000022.000019.050005060
LSTAT12.6530637.1410621.7300037.970011.360005060
MEDV22.5328069.1971045.0000050.000021.200005060
CAT. MEDV0.1660080.3724560.000001.00000.000005060
\n", "
" ], "text/plain": [ " mean sd min max median length \\\n", "CRIM 3.613524 8.601545 0.00632 88.9762 0.25651 506 \n", "ZN 11.363636 23.322453 0.00000 100.0000 0.00000 506 \n", "INDUS 11.136779 6.860353 0.46000 27.7400 9.69000 506 \n", "CHAS 0.069170 0.253994 0.00000 1.0000 0.00000 506 \n", "NOX 0.554695 0.115878 0.38500 0.8710 0.53800 506 \n", "RM 6.284634 0.702617 3.56100 8.7800 6.20850 506 \n", "AGE 68.574901 28.148861 2.90000 100.0000 77.50000 506 \n", "DIS 3.795043 2.105710 1.12960 12.1265 3.20745 506 \n", "RAD 9.549407 8.707259 1.00000 24.0000 5.00000 506 \n", "TAX 408.237154 168.537116 187.00000 711.0000 330.00000 506 \n", "PTRATIO 18.455534 2.164946 12.60000 22.0000 19.05000 506 \n", "LSTAT 12.653063 7.141062 1.73000 37.9700 11.36000 506 \n", "MEDV 22.532806 9.197104 5.00000 50.0000 21.20000 506 \n", "CAT. MEDV 0.166008 0.372456 0.00000 1.0000 0.00000 506 \n", "\n", " miss.val \n", "CRIM 0 \n", "ZN 0 \n", "INDUS 0 \n", "CHAS 0 \n", "NOX 0 \n", "RM 0 \n", "AGE 0 \n", "DIS 0 \n", "RAD 0 \n", "TAX 0 \n", "PTRATIO 0 \n", "LSTAT 0 \n", "MEDV 0 \n", "CAT. MEDV 0 " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "bostonHousing_df = pd.read_csv('BostonHousing.csv')\n", "bostonHousing_df = bostonHousing_df.rename(columns={'CAT.MEDV': 'CAT_MEDV'})\n", "\n", "# Compute mean, standard dev., min, max, median, length, and missing values for all variables\n", "pd.DataFrame({'mean': bostonHousing_df.mean(),\n", "'sd': bostonHousing_df.std(),\n", "'min': bostonHousing_df.min(),\n", "'max': bostonHousing_df.max(),\n", "'median': bostonHousing_df.median(),\n", "'length': len(bostonHousing_df),\n", "'miss.val': bostonHousing_df.isnull().sum(),\n", "})\n", "\n", "\n" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CRIMZNINDUSCHASNOXRMAGEDISRADTAXPTRATIOLSTATMEDVCAT. MEDV
CRIM1.00-0.200.41-0.060.42-0.220.35-0.380.630.580.290.46-0.39-0.15
ZN-0.201.00-0.53-0.04-0.520.31-0.570.66-0.31-0.31-0.39-0.410.360.37
INDUS0.41-0.531.000.060.76-0.390.64-0.710.600.720.380.60-0.48-0.37
CHAS-0.06-0.040.061.000.090.090.09-0.10-0.01-0.04-0.12-0.050.180.11
NOX0.42-0.520.760.091.00-0.300.73-0.770.610.670.190.59-0.43-0.23
RM-0.220.31-0.390.09-0.301.00-0.240.21-0.21-0.29-0.36-0.610.700.64
AGE0.35-0.570.640.090.73-0.241.00-0.750.460.510.260.60-0.38-0.19
DIS-0.380.66-0.71-0.10-0.770.21-0.751.00-0.49-0.53-0.23-0.500.250.12
RAD0.63-0.310.60-0.010.61-0.210.46-0.491.000.910.460.49-0.38-0.20
TAX0.58-0.310.72-0.040.67-0.290.51-0.530.911.000.460.54-0.47-0.27
PTRATIO0.29-0.390.38-0.120.19-0.360.26-0.230.460.461.000.37-0.51-0.44
LSTAT0.46-0.410.60-0.050.59-0.610.60-0.500.490.540.371.00-0.74-0.47
MEDV-0.390.36-0.480.18-0.430.70-0.380.25-0.38-0.47-0.51-0.741.000.79
CAT. MEDV-0.150.37-0.370.11-0.230.64-0.190.12-0.20-0.27-0.44-0.470.791.00
\n", "
" ], "text/plain": [ " CRIM ZN INDUS CHAS NOX RM AGE DIS RAD TAX \\\n", "CRIM 1.00 -0.20 0.41 -0.06 0.42 -0.22 0.35 -0.38 0.63 0.58 \n", "ZN -0.20 1.00 -0.53 -0.04 -0.52 0.31 -0.57 0.66 -0.31 -0.31 \n", "INDUS 0.41 -0.53 1.00 0.06 0.76 -0.39 0.64 -0.71 0.60 0.72 \n", "CHAS -0.06 -0.04 0.06 1.00 0.09 0.09 0.09 -0.10 -0.01 -0.04 \n", "NOX 0.42 -0.52 0.76 0.09 1.00 -0.30 0.73 -0.77 0.61 0.67 \n", "RM -0.22 0.31 -0.39 0.09 -0.30 1.00 -0.24 0.21 -0.21 -0.29 \n", "AGE 0.35 -0.57 0.64 0.09 0.73 -0.24 1.00 -0.75 0.46 0.51 \n", "DIS -0.38 0.66 -0.71 -0.10 -0.77 0.21 -0.75 1.00 -0.49 -0.53 \n", "RAD 0.63 -0.31 0.60 -0.01 0.61 -0.21 0.46 -0.49 1.00 0.91 \n", "TAX 0.58 -0.31 0.72 -0.04 0.67 -0.29 0.51 -0.53 0.91 1.00 \n", "PTRATIO 0.29 -0.39 0.38 -0.12 0.19 -0.36 0.26 -0.23 0.46 0.46 \n", "LSTAT 0.46 -0.41 0.60 -0.05 0.59 -0.61 0.60 -0.50 0.49 0.54 \n", "MEDV -0.39 0.36 -0.48 0.18 -0.43 0.70 -0.38 0.25 -0.38 -0.47 \n", "CAT. MEDV -0.15 0.37 -0.37 0.11 -0.23 0.64 -0.19 0.12 -0.20 -0.27 \n", "\n", " PTRATIO LSTAT MEDV CAT. MEDV \n", "CRIM 0.29 0.46 -0.39 -0.15 \n", "ZN -0.39 -0.41 0.36 0.37 \n", "INDUS 0.38 0.60 -0.48 -0.37 \n", "CHAS -0.12 -0.05 0.18 0.11 \n", "NOX 0.19 0.59 -0.43 -0.23 \n", "RM -0.36 -0.61 0.70 0.64 \n", "AGE 0.26 0.60 -0.38 -0.19 \n", "DIS -0.23 -0.50 0.25 0.12 \n", "RAD 0.46 0.49 -0.38 -0.20 \n", "TAX 0.46 0.54 -0.47 -0.27 \n", "PTRATIO 1.00 0.37 -0.51 -0.44 \n", "LSTAT 0.37 1.00 -0.74 -0.47 \n", "MEDV -0.51 -0.74 1.00 0.79 \n", "CAT. MEDV -0.44 -0.47 0.79 1.00 " ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Correlation Matrix\n", "bostonHousing_df.corr().round(2)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }