Correlation and Matrix Plots. Linear Regression
This commit is contained in:
commit
cce3b9fecb
231
timing-analysis.ipynb
Normal file
231
timing-analysis.ipynb
Normal file
@ -0,0 +1,231 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Imports\n",
|
||||||
|
"\n",
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"import matplotlib.pyplot as plt\n",
|
||||||
|
"import seaborn as sns\n",
|
||||||
|
"from sklearn.model_selection import train_test_split\n",
|
||||||
|
"from sklearn.linear_model import LinearRegression\n",
|
||||||
|
"from dmba import regressionSummary"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Import data\n",
|
||||||
|
"timing_df = pd.read_csv('TimingData.csv')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Compute mean, median, min, max, and standard deviation for quantitative variables\n",
|
||||||
|
"pd.DataFrame({'mean': timing_df.mean(),\n",
|
||||||
|
"'median': timing_df.median(),\n",
|
||||||
|
"'min': timing_df.min(),\n",
|
||||||
|
"'max': timing_df.max(),\n",
|
||||||
|
"'sd': timing_df.std()\n",
|
||||||
|
"})"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Show avg timing data for each var step (EX: Avg time for 12 nodes, 11...1, Avg time for 6 exploits...49152)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Correlation and Matrix Plot\n",
|
||||||
|
"timing_corr = timing_df.corr().round(3)\n",
|
||||||
|
"print(timing_corr)\n",
|
||||||
|
"fig, ax = plt.subplots()\n",
|
||||||
|
"fig.set_size_inches(11, 7)\n",
|
||||||
|
"sns.heatmap(timing_corr, annot=True, fmt=\".1f\", cmap=\"RdBu\", center=0, ax=ax)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Linear Regression - Overall\n",
|
||||||
|
"predictors = [\"nodes\", \"exploits\", \"appl\", \"load\"]\n",
|
||||||
|
"overall_outcome = [\"runtime\"]\n",
|
||||||
|
"\n",
|
||||||
|
"# partition data\n",
|
||||||
|
"X = pd.get_dummies(timing_df[predictors], drop_first=True)\n",
|
||||||
|
"overall_y = timing_df[overall_outcome]\n",
|
||||||
|
"train_X, valid_X, train_y, valid_y = train_test_split(X, overall_y, test_size=0.4, random_state=1)\n",
|
||||||
|
"housing_lm = LinearRegression()\n",
|
||||||
|
"housing_lm.fit(train_X, train_y)\n",
|
||||||
|
"# print coefficients\n",
|
||||||
|
"print('intercept ', housing_lm.intercept_)\n",
|
||||||
|
"print(pd.DataFrame({'Predictor': X.columns, 'coefficient': housing_lm.coef_}))\n",
|
||||||
|
"# print performance measures\n",
|
||||||
|
"regressionSummary(train_y, housing_lm.predict(train_X))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Linear Regression - Task 0\n",
|
||||||
|
"t0_outcome = [\"task0\"]\n",
|
||||||
|
"\n",
|
||||||
|
"# partition data\n",
|
||||||
|
"X = pd.get_dummies(timing_df[predictors], drop_first=True)\n",
|
||||||
|
"overall_y = timing_df[t0_outcome]\n",
|
||||||
|
"train_X, valid_X, train_y, valid_y = train_test_split(X, overall_y, test_size=0.4, random_state=1)\n",
|
||||||
|
"housing_lm = LinearRegression()\n",
|
||||||
|
"housing_lm.fit(train_X, train_y)\n",
|
||||||
|
"# print coefficients\n",
|
||||||
|
"print('intercept ', housing_lm.intercept_)\n",
|
||||||
|
"print(pd.DataFrame({'Predictor': X.columns, 'coefficient': housing_lm.coef_}))\n",
|
||||||
|
"# print performance measures\n",
|
||||||
|
"regressionSummary(train_y, housing_lm.predict(train_X))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Linear Regression - Task 1\n",
|
||||||
|
"t1_outcome = [\"task1\"]\n",
|
||||||
|
"\n",
|
||||||
|
"# partition data\n",
|
||||||
|
"X = pd.get_dummies(timing_df[predictors], drop_first=True)\n",
|
||||||
|
"overall_y = timing_df[t1_outcome]\n",
|
||||||
|
"train_X, valid_X, train_y, valid_y = train_test_split(X, overall_y, test_size=0.4, random_state=1)\n",
|
||||||
|
"housing_lm = LinearRegression()\n",
|
||||||
|
"housing_lm.fit(train_X, train_y)\n",
|
||||||
|
"# print coefficients\n",
|
||||||
|
"print('intercept ', housing_lm.intercept_)\n",
|
||||||
|
"print(pd.DataFrame({'Predictor': X.columns, 'coefficient': housing_lm.coef_}))\n",
|
||||||
|
"# print performance measures\n",
|
||||||
|
"regressionSummary(train_y, housing_lm.predict(train_X))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Linear Regression - Task 2\n",
|
||||||
|
"t2_outcome = [\"task2\"]\n",
|
||||||
|
"\n",
|
||||||
|
"# partition data\n",
|
||||||
|
"X = pd.get_dummies(timing_df[predictors], drop_first=True)\n",
|
||||||
|
"overall_y = timing_df[t2_outcome]\n",
|
||||||
|
"train_X, valid_X, train_y, valid_y = train_test_split(X, overall_y, test_size=0.4, random_state=1)\n",
|
||||||
|
"housing_lm = LinearRegression()\n",
|
||||||
|
"housing_lm.fit(train_X, train_y)\n",
|
||||||
|
"# print coefficients\n",
|
||||||
|
"print('intercept ', housing_lm.intercept_)\n",
|
||||||
|
"print(pd.DataFrame({'Predictor': X.columns, 'coefficient': housing_lm.coef_}))\n",
|
||||||
|
"# print performance measures\n",
|
||||||
|
"regressionSummary(train_y, housing_lm.predict(train_X))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Linear Regression - Task 3\n",
|
||||||
|
"t3_outcome = [\"task3\"]\n",
|
||||||
|
"\n",
|
||||||
|
"# partition data\n",
|
||||||
|
"X = pd.get_dummies(timing_df[predictors], drop_first=True)\n",
|
||||||
|
"overall_y = timing_df[t3_outcome]\n",
|
||||||
|
"train_X, valid_X, train_y, valid_y = train_test_split(X, overall_y, test_size=0.4, random_state=1)\n",
|
||||||
|
"housing_lm = LinearRegression()\n",
|
||||||
|
"housing_lm.fit(train_X, train_y)\n",
|
||||||
|
"# print coefficients\n",
|
||||||
|
"print('intercept ', housing_lm.intercept_)\n",
|
||||||
|
"print(pd.DataFrame({'Predictor': X.columns, 'coefficient': housing_lm.coef_}))\n",
|
||||||
|
"# print performance measures\n",
|
||||||
|
"regressionSummary(train_y, housing_lm.predict(train_X))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Linear Regression - Task 4\n",
|
||||||
|
"t4_outcome = [\"task4\"]\n",
|
||||||
|
"\n",
|
||||||
|
"# partition data\n",
|
||||||
|
"X = pd.get_dummies(timing_df[predictors], drop_first=True)\n",
|
||||||
|
"overall_y = timing_df[t4_outcome]\n",
|
||||||
|
"train_X, valid_X, train_y, valid_y = train_test_split(X, overall_y, test_size=0.4, random_state=1)\n",
|
||||||
|
"housing_lm = LinearRegression()\n",
|
||||||
|
"housing_lm.fit(train_X, train_y)\n",
|
||||||
|
"# print coefficients\n",
|
||||||
|
"print('intercept ', housing_lm.intercept_)\n",
|
||||||
|
"print(pd.DataFrame({'Predictor': X.columns, 'coefficient': housing_lm.coef_}))\n",
|
||||||
|
"# print performance measures\n",
|
||||||
|
"regressionSummary(train_y, housing_lm.predict(train_X))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Linear Regression - Task 5\n",
|
||||||
|
"t5_outcome = [\"task5\"]\n",
|
||||||
|
"\n",
|
||||||
|
"# partition data\n",
|
||||||
|
"X = pd.get_dummies(timing_df[predictors], drop_first=True)\n",
|
||||||
|
"overall_y = timing_df[t5_outcome]\n",
|
||||||
|
"train_X, valid_X, train_y, valid_y = train_test_split(X, overall_y, test_size=0.4, random_state=1)\n",
|
||||||
|
"housing_lm = LinearRegression()\n",
|
||||||
|
"housing_lm.fit(train_X, train_y)\n",
|
||||||
|
"# print coefficients\n",
|
||||||
|
"print('intercept ', housing_lm.intercept_)\n",
|
||||||
|
"print(pd.DataFrame({'Predictor': X.columns, 'coefficient': housing_lm.coef_}))\n",
|
||||||
|
"# print performance measures\n",
|
||||||
|
"regressionSummary(train_y, housing_lm.predict(train_X))"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"language_info": {
|
||||||
|
"name": "python"
|
||||||
|
},
|
||||||
|
"orig_nbformat": 4
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
||||||
Loading…
x
Reference in New Issue
Block a user