27 lines
890 B
Python
27 lines
890 B
Python
# Learning Practice 1 for the University of Tulsa's QM-7063 Data Mining Course
|
|
# Intro to data visualization
|
|
# Professor: Dr. Abdulrashid, Spring 2023
|
|
# Noah L. Schrick - 1492657
|
|
|
|
import pandas as pd
|
|
import matplotlib.pyplot as plt
|
|
## Load, convert Amtrak data for time series analysis
|
|
Amtrak_df = pd.read_csv('Amtrak.csv').squeeze("columns")
|
|
Amtrak_df['Date'] = pd.to_datetime(Amtrak_df.Month,
|
|
format='%d/%m/%Y')
|
|
ridership_ts = pd.Series(Amtrak_df.Ridership.values,
|
|
index=Amtrak_df.Date)
|
|
|
|
## Boston housing data
|
|
housing_df = pd.read_csv('BostonHousing.csv')
|
|
housing_df = housing_df.rename(columns={'CAT. MEDV': 'CAT_MEDV'})
|
|
|
|
# compute mean MEDV per CHAS = (0, 1)
|
|
dataForPlot = housing_df.groupby('CHAS').mean().MEDV
|
|
fig, ax = plt.subplots()
|
|
ax.bar(dataForPlot.index, dataForPlot, color=['C5', 'C1'])
|
|
#ax.set_xticks([0, 1], False)
|
|
ax.set_xlabel('CHAS')
|
|
ax.set_ylabel('Avg. MEDV')
|
|
|