Fitting LM
This commit is contained in:
parent
7fc3a898dc
commit
4e7e4fc474
@ -638,22 +638,18 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"# Linear? \n",
|
"# Linear? \n",
|
||||||
"There does not appear to be a linear relationship between spending and frequency or spending and last update days ago.\n",
|
"There does not appear to be a linear relationship between spending and last update days ago.\n",
|
||||||
"An argument could be made for Frequency and Spending as spending gets larger, but both scatter plots do not seem to indicate a linear relationship."
|
"An argument could be made for Frequency and Spending as spending gets larger, but both scatter plots do not seem to indicate a linear relationship. The linear fit for frequency and spending would have a low R squared value."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 107,
|
"execution_count": 109,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# c. i\n",
|
"# c. i\n",
|
||||||
"predictors = ['sequence_number', 'US', 'source_a', 'source_c', 'source_b', 'source_d', 'source_e',\n",
|
"predictors = ['US','Freq', 'last_update_days_ago', 'Web order', 'Gender=male', 'Address_is_res']\n",
|
||||||
" 'source_m', 'source_o', 'source_h', 'source_r', 'source_s', 'source_t',\n",
|
|
||||||
" 'source_u', 'source_p', 'source_x', 'source_w', 'Freq', 'last_update_days_ago',\n",
|
|
||||||
" '1st_update_days_ago', 'Web order', 'Gender=male', 'Address_is_res', 'Purchase',\n",
|
|
||||||
" 'Spending']\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"outcome = 'Spending'\n",
|
"outcome = 'Spending'\n",
|
||||||
"X = pd.get_dummies(tayko_df[predictors], drop_first=True)\n",
|
"X = pd.get_dummies(tayko_df[predictors], drop_first=True)\n",
|
||||||
@ -663,11 +659,47 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": 111,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"intercept 10.17629741458822\n",
|
||||||
|
" Predictor coefficient\n",
|
||||||
|
"0 US -4.620293\n",
|
||||||
|
"1 Freq 91.274450\n",
|
||||||
|
"2 last_update_days_ago -0.010374\n",
|
||||||
|
"3 Web order 18.628731\n",
|
||||||
|
"4 Gender=male -9.111366\n",
|
||||||
|
"5 Address_is_res -75.815354\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"ename": "ValueError",
|
||||||
|
"evalue": "operands could not be broadcast together with shapes (800,) (1200,) ",
|
||||||
|
"output_type": "error",
|
||||||
|
"traceback": [
|
||||||
|
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
||||||
|
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
|
||||||
|
"Cell \u001b[0;32mIn[111], line 10\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[39mprint\u001b[39m(pd\u001b[39m.\u001b[39mDataFrame({\u001b[39m'\u001b[39m\u001b[39mPredictor\u001b[39m\u001b[39m'\u001b[39m: X\u001b[39m.\u001b[39mcolumns, \u001b[39m'\u001b[39m\u001b[39mcoefficient\u001b[39m\u001b[39m'\u001b[39m: tayko_lm\u001b[39m.\u001b[39mcoef_}))\n\u001b[1;32m 9\u001b[0m \u001b[39m# print performance measures\u001b[39;00m\n\u001b[0;32m---> 10\u001b[0m regressionSummary(valid_y, tayko_lm\u001b[39m.\u001b[39;49mpredict(train_X))\n",
|
||||||
|
"File \u001b[0;32m~/.local/lib/python3.10/site-packages/dmba/metric.py:71\u001b[0m, in \u001b[0;36mregressionSummary\u001b[0;34m(y_true, y_pred)\u001b[0m\n\u001b[1;32m 69\u001b[0m y_true \u001b[39m=\u001b[39m _toArray(y_true)\n\u001b[1;32m 70\u001b[0m y_pred \u001b[39m=\u001b[39m _toArray(y_pred)\n\u001b[0;32m---> 71\u001b[0m y_res \u001b[39m=\u001b[39m y_true \u001b[39m-\u001b[39;49m y_pred\n\u001b[1;32m 72\u001b[0m metrics \u001b[39m=\u001b[39m [\n\u001b[1;32m 73\u001b[0m (\u001b[39m'\u001b[39m\u001b[39mMean Error (ME)\u001b[39m\u001b[39m'\u001b[39m, \u001b[39msum\u001b[39m(y_res) \u001b[39m/\u001b[39m \u001b[39mlen\u001b[39m(y_res)),\n\u001b[1;32m 74\u001b[0m (\u001b[39m'\u001b[39m\u001b[39mRoot Mean Squared Error (RMSE)\u001b[39m\u001b[39m'\u001b[39m, math\u001b[39m.\u001b[39msqrt(mean_squared_error(y_true, y_pred))),\n\u001b[1;32m 75\u001b[0m (\u001b[39m'\u001b[39m\u001b[39mMean Absolute Error (MAE)\u001b[39m\u001b[39m'\u001b[39m, \u001b[39msum\u001b[39m(\u001b[39mabs\u001b[39m(y_res)) \u001b[39m/\u001b[39m \u001b[39mlen\u001b[39m(y_res)),\n\u001b[1;32m 76\u001b[0m ]\n\u001b[1;32m 77\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mall\u001b[39m(yt \u001b[39m!=\u001b[39m \u001b[39m0\u001b[39m \u001b[39mfor\u001b[39;00m yt \u001b[39min\u001b[39;00m y_true):\n",
|
||||||
|
"\u001b[0;31mValueError\u001b[0m: operands could not be broadcast together with shapes (800,) (1200,) "
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"# c. ii"
|
"# c. ii\n",
|
||||||
|
"tayko_lm = LinearRegression()\n",
|
||||||
|
"tayko_lm.fit(train_X, train_y)\n",
|
||||||
|
"\n",
|
||||||
|
"# print coefficients\n",
|
||||||
|
"print('intercept ', tayko_lm.intercept_)\n",
|
||||||
|
"print(pd.DataFrame({'Predictor': X.columns, 'coefficient': tayko_lm.coef_}))\n",
|
||||||
|
"\n",
|
||||||
|
"# print performance measures\n",
|
||||||
|
"regressionSummary(valid_y, tayko_lm.predict(valid_X))"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user