27 lines
890 B
Python

# Learning Practice 1 for the University of Tulsa's QM-7063 Data Mining Course
# Intro to data visualization
# Professor: Dr. Abdulrashid, Spring 2023
# Noah L. Schrick - 1492657
import pandas as pd
import matplotlib.pyplot as plt
## Load, convert Amtrak data for time series analysis
Amtrak_df = pd.read_csv('Amtrak.csv').squeeze("columns")
Amtrak_df['Date'] = pd.to_datetime(Amtrak_df.Month,
format='%d/%m/%Y')
ridership_ts = pd.Series(Amtrak_df.Ridership.values,
index=Amtrak_df.Date)
## Boston housing data
housing_df = pd.read_csv('BostonHousing.csv')
housing_df = housing_df.rename(columns={'CAT. MEDV': 'CAT_MEDV'})
# compute mean MEDV per CHAS = (0, 1)
dataForPlot = housing_df.groupby('CHAS').mean().MEDV
fig, ax = plt.subplots()
ax.bar(dataForPlot.index, dataForPlot, color=['C5', 'C1'])
#ax.set_xticks([0, 1], False)
ax.set_xlabel('CHAS')
ax.set_ylabel('Avg. MEDV')