From 58b92cfb7a242f795e74d8b421d63e07fbee8b37 Mon Sep 17 00:00:00 2001 From: noah Date: Tue, 28 Feb 2023 12:48:35 -0600 Subject: [PATCH] Conceptual responses and residuals --- .~lock.Tayko.csv# | 1 - Schrick-Noah_Learning-Practice-5.ipynb | 174 ++++++++++++++++++++++--- 2 files changed, 159 insertions(+), 16 deletions(-) delete mode 100644 .~lock.Tayko.csv# diff --git a/.~lock.Tayko.csv# b/.~lock.Tayko.csv# deleted file mode 100644 index 026fb0e..0000000 --- a/.~lock.Tayko.csv# +++ /dev/null @@ -1 +0,0 @@ -,noah,NovaArchSys,27.02.2023 17:02,file:///home/noah/.config/libreoffice/4; \ No newline at end of file diff --git a/Schrick-Noah_Learning-Practice-5.ipynb b/Schrick-Noah_Learning-Practice-5.ipynb index a124100..4e0ff37 100644 --- a/Schrick-Noah_Learning-Practice-5.ipynb +++ b/Schrick-Noah_Learning-Practice-5.ipynb @@ -644,7 +644,7 @@ }, { "cell_type": "code", - "execution_count": 109, + "execution_count": 114, "metadata": {}, "outputs": [], "source": [ @@ -659,7 +659,7 @@ }, { "cell_type": "code", - "execution_count": 111, + "execution_count": 115, "metadata": {}, "outputs": [ { @@ -673,19 +673,13 @@ "2 last_update_days_ago -0.010374\n", "3 Web order 18.628731\n", "4 Gender=male -9.111366\n", - "5 Address_is_res -75.815354\n" - ] - }, - { - "ename": "ValueError", - "evalue": "operands could not be broadcast together with shapes (800,) (1200,) ", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[111], line 10\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[39mprint\u001b[39m(pd\u001b[39m.\u001b[39mDataFrame({\u001b[39m'\u001b[39m\u001b[39mPredictor\u001b[39m\u001b[39m'\u001b[39m: X\u001b[39m.\u001b[39mcolumns, \u001b[39m'\u001b[39m\u001b[39mcoefficient\u001b[39m\u001b[39m'\u001b[39m: tayko_lm\u001b[39m.\u001b[39mcoef_}))\n\u001b[1;32m 9\u001b[0m \u001b[39m# print performance measures\u001b[39;00m\n\u001b[0;32m---> 10\u001b[0m regressionSummary(valid_y, tayko_lm\u001b[39m.\u001b[39;49mpredict(train_X))\n", - "File \u001b[0;32m~/.local/lib/python3.10/site-packages/dmba/metric.py:71\u001b[0m, in \u001b[0;36mregressionSummary\u001b[0;34m(y_true, y_pred)\u001b[0m\n\u001b[1;32m 69\u001b[0m y_true \u001b[39m=\u001b[39m _toArray(y_true)\n\u001b[1;32m 70\u001b[0m y_pred \u001b[39m=\u001b[39m _toArray(y_pred)\n\u001b[0;32m---> 71\u001b[0m y_res \u001b[39m=\u001b[39m y_true \u001b[39m-\u001b[39;49m y_pred\n\u001b[1;32m 72\u001b[0m metrics \u001b[39m=\u001b[39m [\n\u001b[1;32m 73\u001b[0m (\u001b[39m'\u001b[39m\u001b[39mMean Error (ME)\u001b[39m\u001b[39m'\u001b[39m, \u001b[39msum\u001b[39m(y_res) \u001b[39m/\u001b[39m \u001b[39mlen\u001b[39m(y_res)),\n\u001b[1;32m 74\u001b[0m (\u001b[39m'\u001b[39m\u001b[39mRoot Mean Squared Error (RMSE)\u001b[39m\u001b[39m'\u001b[39m, math\u001b[39m.\u001b[39msqrt(mean_squared_error(y_true, y_pred))),\n\u001b[1;32m 75\u001b[0m (\u001b[39m'\u001b[39m\u001b[39mMean Absolute Error (MAE)\u001b[39m\u001b[39m'\u001b[39m, \u001b[39msum\u001b[39m(\u001b[39mabs\u001b[39m(y_res)) \u001b[39m/\u001b[39m \u001b[39mlen\u001b[39m(y_res)),\n\u001b[1;32m 76\u001b[0m ]\n\u001b[1;32m 77\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mall\u001b[39m(yt \u001b[39m!=\u001b[39m \u001b[39m0\u001b[39m \u001b[39mfor\u001b[39;00m yt \u001b[39min\u001b[39;00m y_true):\n", - "\u001b[0;31mValueError\u001b[0m: operands could not be broadcast together with shapes (800,) (1200,) " + "5 Address_is_res -75.815354\n", + "\n", + "Regression statistics\n", + "\n", + " Mean Error (ME) : 7.1933\n", + "Root Mean Squared Error (RMSE) : 136.7397\n", + " Mean Absolute Error (MAE) : 83.6010\n" ] } ], @@ -701,6 +695,156 @@ "# print performance measures\n", "regressionSummary(valid_y, tayko_lm.predict(valid_X))" ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# iii Based on this model, what type of purchaser is most likely to spend a large amount of money?\n", + "Women outside the US that do not have a residential address, that place web orders, and made many transactions the previous year." + ] + }, + { + "cell_type": "code", + "execution_count": 117, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Backward\n", + "Variables: US, Freq, last_update_days_ago, Web order, Gender=male, Address_is_res\n", + "Start: score=15028.53\n", + "Step: score=15026.76, remove US\n", + "Step: score=15026.38, remove Gender=male\n", + "Step: score=15026.38, remove None\n", + "['Freq', 'last_update_days_ago', 'Web order', 'Address_is_res']\n", + "\n" + ] + } + ], + "source": [ + "#iv. If we used backward elimination to reduce the number\n", + "# of predictors, which predictor would be dropped first \n", + "# from the model?\n", + "\n", + "def train_model(variables):\n", + " if len(variables) == 0:\n", + " return None\n", + " model = LinearRegression()\n", + " model.fit(train_X[variables], train_y)\n", + " return model\n", + "\n", + "def score_model(model, variables):\n", + " if len(variables) == 0:\n", + " return AIC_score(train_y, [train_y.mean()] * len(train_y), model, df=1)\n", + " return AIC_score(train_y, model.predict(train_X[variables]), model)\n", + "\n", + "print(\"Backward\")\n", + "best_back_model, best_back_variables = backward_elimination(train_X.columns, train_model, score_model, verbose=True)\n", + "print(best_back_variables)\n", + "\n", + "# 'US' dropped first" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# v. Show how the prediction and the prediction error are computed for the first purchase in the validation set.\n", + "\n", + "After the model is trained, we have the regression coefficients.\n", + "Using these, we can multiply them with the new predictor values.\n", + "Using the sample of the first purchase, each predictor is multiplied by the coefficients to compute the prediction.\n", + "\n", + "The error is obtained by comparing the predicted value to the actual value." + ] + }, + { + "cell_type": "code", + "execution_count": 119, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Predicted Actual Residual\n", + "674 89.214915 0 -89.214915\n", + "1699 202.231362 184 -18.231362\n", + "1282 49.159303 0 -49.159303\n", + "1315 824.841659 1289 464.158341\n", + "1210 0.121196 0 -0.121196\n", + "1636 86.766675 0 -86.766675\n", + "613 58.018614 0 -58.018614\n", + "447 247.428569 1255 1007.571431\n", + "1131 67.036615 0 -67.036615\n", + "808 67.825031 0 -67.825031\n", + "1496 -7.098168 0 7.098168\n", + "1468 194.814024 411 216.185976\n", + "1682 -13.480101 0 13.480101\n", + "1149 -32.457046 0 32.457046\n", + "442 61.247979 0 -61.247979\n", + "1813 4.497885 173 168.502115\n", + "654 -46.046854 0 46.046854\n", + "1264 -32.315195 0 32.315195\n", + "858 80.219048 0 -80.219048\n", + "1482 51.783900 0 -51.783900\n", + "\n", + "Regression statistics\n", + "\n", + " Mean Error (ME) : 7.1933\n", + "Root Mean Squared Error (RMSE) : 136.7397\n", + " Mean Absolute Error (MAE) : 83.6010\n" + ] + } + ], + "source": [ + "#vi. Evaluate the predictive accuracy of the model by\n", + "# examining its performance on the validation set.\n", + "\n", + "tayko_lm_pred = tayko_lm.predict(valid_X)\n", + "\n", + "result = pd.DataFrame({'Predicted': tayko_lm_pred, 'Actual': valid_y,\n", + " 'Residual': valid_y - tayko_lm_pred})\n", + "print(result.head(20))\n", + "\n", + "# Compute common accuracy measures\n", + "regressionSummary(valid_y, tayko_lm_pred)" + ] + }, + { + "cell_type": "code", + "execution_count": 121, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAnYAAAHWCAYAAAD6oMSKAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/P9b71AAAACXBIWXMAAA9hAAAPYQGoP6dpAAAx/klEQVR4nO3de1xVdb7/8TfXraiIqIAk3vNu5dEgyowSwcvoWDbl5VQ6pl2wG41jzpSi1WjW0R45pjWn5JyH2b3UzDFRS8dCUyczL/EQjmmmYOkP0MzNVr6/P+awjztQWQps+fJ6Ph77oWut71rr+/3wFd+sxV47wBhjBAAAgFov0N8dAAAAQNUg2AEAAFiCYAcAAGAJgh0AAIAlCHYAAACWINgBAABYgmAHAABgCYIdAACAJQh2AAAAliDYAcD/ysjIUEBAQKXaBgQEKCMjo1r7k5SUpKSkpGo9BwC7EOwAXJYyMzMVEBDgfQUHB+uKK67QmDFj9MMPP/i7ewBwWQr2dwcA4HxmzJihtm3b6tSpU9q0aZMyMzO1ceNG7dy5U/Xq1avScz355JN64oknqvSYAFCTCHYALmsDBw5U7969JUn33nuvmjVrpueee07Lly/XHXfcUaXnCg4OVnAw3xYB1F7cigVQq9x4442SpLy8PO+6b7/9VrfffrsiIyNVr1499e7dW8uXL/fZz+PxaPr06bryyitVr149NW3aVH369FFWVpa3TUW/Y+d2u/XYY4+pefPmatSokYYOHaqDBw+W69eYMWPUpk2bcusrOuaiRYt0yy23KCoqSi6XS127dtWCBQsqNf558+apW7duCgsLU5MmTdS7d28tWbKkUvsCsB8/mgKoVb777jtJUpMmTSRJu3bt0g033KArrrhCTzzxhBo0aKB33nlHw4YN0/vvv69bb71V0r8C1syZM3XvvfcqPj5excXF2rp1q/75z3+qf//+5zzfvffeq8WLF2vUqFG6/vrrtW7dOg0ePPiSxrBgwQJ169ZNQ4cOVXBwsD766CM9+OCDKi0tVVpa2jn3+9vf/qaHH35Yt99+ux555BGdOnVKO3bs0ObNmzVq1KhL6hMAOxDsAFzWioqK9NNPP+nUqVPavHmzpk+fLpfLpd/85jeSpEceeUStWrXSli1b5HK5JEkPPvig+vTpo8mTJ3uD3ccff6xBgwbp1VdfrfS5v/76ay1evFgPPvig5s+fL0lKS0vT6NGjtWPHjose0/r161W/fn3v8sSJEzVgwADNmTPnvMHu448/Vrdu3fTuu+9e9LkB2I1bsQAua8nJyWrevLni4uJ0++23q0GDBlq+fLlatmypY8eOad26dbrjjjt0/Phx/fTTT/rpp5909OhRpaamau/evd530EZERGjXrl3au3dvpc+9cuVKSdLDDz/ss/7RRx+9pDGdHerKgutNN92k//mf/1FRUdE594uIiNDBgwe1ZcuWSzo/AHsR7ABc1ubPn6+srCy99957GjRokH766Sfvlbnc3FwZY/TUU0+pefPmPq9p06ZJko4cOSLpX++uLSwsVMeOHdWjRw9NmjTpglfd9u/fr8DAQLVv395nfadOnS5pTJ9//rmSk5PVoEEDRUREqHnz5vrTn/4kSecNdpMnT1bDhg0VHx+vK6+8Umlpafr8888vqS8A7MKtWACXtfj4eO+7YocNG6Y+ffpo1KhRysnJUWlpqSTpD3/4g1JTUyvcv0OHDpKkvn37Ki8vT8uWLdPq1av1n//5n5o7d64WLlyoe++995L7ea4HG585c8ZnOS8vT/369VPnzp01Z84cxcXFKTQ0VCtXrtTcuXO9Y6pIly5dlJOToxUrVmjVqlV6//339fLLL2vq1KmaPn36JY8BQO1HsANQawQFBWnmzJm6+eab9de//lW///3vJUkhISFKTk6+4P6RkZEaO3asxo4dqxMnTqhv377KyMg4Z7Br3bq1SktLlZeX53OVLicnp1zbJk2aqLCwsNz6/fv3+yx/9NFHcrvdWr58uVq1auVd/+mnn16w/5LUoEED3XnnnbrzzjtVUlKi2267Tc8++6ymTJlS5c/1A1D7cCsWQK2SlJSk+Ph4vfjiiwoPD1dSUpJeeeUVHT58uFzbH3/80fv3o0eP+mxr2LChOnToILfbfc5zDRw4UJL00ksv+ax/8cUXy7Vt3769ioqKfG7vHj58WB9++KFPu6CgIEmSMca7rqioSIsWLTpnP841htDQUHXt2lXGGHk8ngvuD8B+XLEDUOtMmjRJv/vd75SZman58+erT58+6tGjh8aPH6927dqpoKBA2dnZOnjwoL7++mtJUteuXZWUlKRevXopMjJSW7du1XvvvaeJEyee8zzXXHONRo4cqZdffllFRUW6/vrrtXbtWuXm5pZrO2LECO+7cB9++GGdPHlSCxYsUMeOHfXPf/7T2y4lJUWhoaEaMmSI7rvvPp04cUJ/+9vfFBUVVWE4PVtKSopiYmJ0ww03KDo6Wnv27NFf//pXDR48WI0aNbrIagKwigGAy9CiRYuMJLNly5Zy286cOWPat29v2rdvb06fPm3y8vLM3XffbWJiYkxISIi54oorzG9+8xvz3nvvefd55plnTHx8vImIiDD169c3nTt3Ns8++6wpKSnxtpk2bZr59bfFX375xTz88MOmadOmpkGDBmbIkCHm+++/N5LMtGnTfNquXr3adO/e3YSGhppOnTqZxYsXV3jM5cuXm6uuusrUq1fPtGnTxjz33HPm9ddfN5LMvn37vO1uuukmc9NNN3mXX3nlFdO3b1/TtGlT43K5TPv27c2kSZNMUVHRRVQYgI0CjDnrfgAAAABqLX7HDgAAwBIEOwAAAEsQ7AAAACxBsAMAALAEwQ4AAMASBDsAAABL1MoHFJeWlurQoUNq1KjROT+fEQAAwAbGGB0/flyxsbEKDDz/NblaGewOHTqkuLg4f3cDAACgxnz//fdq2bLledvUymBX9tE533//vcLDw/3cm5rl8Xi0evVqpaSkKCQkxN/duexRL+eomTPUyzlq5gz1csbGehUXFysuLq5SHx1YK4Nd2e3X8PDwOhnswsLCFB4ebs2ErU7Uyzlq5gz1co6aOUO9nLG5XpX59TPePAEAAGAJgh0AAIAlCHYAAACWINgBAABYgmAHAABgCYIdAACAJQh2AAAAliDYAQAAWIJgBwAAYAmCHQAAgCUIdgAAAJYg2AEAAFiCYAcAAGAJgh0AAIAlHAW7mTNn6tprr1WjRo0UFRWlYcOGKScnx6dNUlKSAgICfF7333+/T5sDBw5o8ODBCgsLU1RUlCZNmqTTp09f+mgAAADqsGAnjdevX6+0tDRde+21On36tP70pz8pJSVFu3fvVoMGDbztxo8frxkzZniXw8LCvH8/c+aMBg8erJiYGH3xxRc6fPiw7r77boWEhOgvf/lLFQwJAACgbnIU7FatWuWznJmZqaioKG3btk19+/b1rg8LC1NMTEyFx1i9erV2796tNWvWKDo6Wtdcc42efvppTZ48WRkZGQoNDb2IYQAAAMBRsPu1oqIiSVJkZKTP+jfeeEOLFy9WTEyMhgwZoqeeesp71S47O1s9evRQdHS0t31qaqoeeOAB7dq1Sz179ix3HrfbLbfb7V0uLi6WJHk8Hnk8nksZQq1TNt66Nu7z6Z7xyTm3uQKNnu4t9ZqxSu7SgEodb2dGalV1rVZijjlDvZyjZs5QL2dsrJeTsQQYY8zFnKS0tFRDhw5VYWGhNm7c6F3/6quvqnXr1oqNjdWOHTs0efJkxcfH64MPPpAkTZgwQfv379cnn/zff8YnT55UgwYNtHLlSg0cOLDcuTIyMjR9+vRy65csWeJzmxcAAMA2J0+e1KhRo1RUVKTw8PDztr3oK3ZpaWnauXOnT6iT/hXcyvTo0UMtWrRQv379lJeXp/bt21/UuaZMmaL09HTvcnFxseLi4pSSknLBAdrG4/EoKytL/fv3V0hIiL+7c1m48BW7Uj21NZArdpXEHHOGejlHzZyhXs7YWK+yO5WVcVHBbuLEiVqxYoU2bNigli1bnrdtQkKCJCk3N1ft27dXTEyMvvzyS582BQUFknTO38tzuVxyuVzl1oeEhFjzRXOqLo/919xnLhzY3KUBlWonibr+L+aYM9TLOWrmDPVyxqZ6ORmHo8edGGM0ceJEffjhh1q3bp3atm17wX22b98uSWrRooUkKTExUd98842OHDnibZOVlaXw8HB17drVSXcAAABwFkdX7NLS0rRkyRItW7ZMjRo1Un5+viSpcePGql+/vvLy8rRkyRINGjRITZs21Y4dO/TYY4+pb9++uuqqqyRJKSkp6tq1q+666y7Nnj1b+fn5evLJJ5WWllbhVTkAAABUjqMrdgsWLFBRUZGSkpLUokUL7+vtt9+WJIWGhmrNmjVKSUlR586d9fjjj2v48OH66KOPvMcICgrSihUrFBQUpMTERP37v/+77r77bp/n3gEAAMA5R1fsLvQG2ri4OK1fv/6Cx2ndurVWrlzp5NQAAAC4AD4rFgAAwBIEOwAAAEsQ7AAAACxBsAMAALAEwQ4AAMASBDsAAABLEOwAAAAsQbADAACwBMEOAADAEgQ7AAAASxDsAAAALEGwAwAAsATBDgAAwBLB/u4A6pY2T3zs7y4AAGAtrtgBAABYgmAHAABgCYIdAACAJQh2AAAAliDYAQAAWIJgBwAAYAmCHQAAgCUIdgAAAJYg2AEAAFiCYAcAAGAJgh0AAIAlCHYAAACWINgBAABYgmAHAABgCYIdAACAJQh2AAAAliDYAQAAWIJgBwAAYAmCHQAAgCUIdgAAAJYg2AEAAFiCYAcAAGAJgh0AAIAlCHYAAACWINgBAABYgmAHAABgCYIdAACAJQh2AAAAliDYAQAAWIJgBwAAYAmCHQAAgCUIdgAAAJYg2AEAAFiCYAcAAGAJgh0AAIAlCHYAAACWINgBAABYgmAHAABgCYIdAACAJQh2AAAAliDYAQAAWIJgBwAAYAmCHQAAgCUIdgAAAJYg2AEAAFiCYAcAAGAJgh0AAIAlCHYAAACWINgBAABYgmAHAABgCYIdAACAJRwFu5kzZ+raa69Vo0aNFBUVpWHDhiknJ8enzalTp5SWlqamTZuqYcOGGj58uAoKCnzaHDhwQIMHD1ZYWJiioqI0adIknT59+tJHAwAAUIc5Cnbr169XWlqaNm3apKysLHk8HqWkpOjnn3/2tnnsscf00Ucf6d1339X69et16NAh3Xbbbd7tZ86c0eDBg1VSUqIvvvhC//Vf/6XMzExNnTq16kYFAABQBwU7abxq1Sqf5czMTEVFRWnbtm3q27evioqK9Nprr2nJkiW65ZZbJEmLFi1Sly5dtGnTJl133XVavXq1du/erTVr1ig6OlrXXHONnn76aU2ePFkZGRkKDQ2tutEBAADUIZf0O3ZFRUWSpMjISEnStm3b5PF4lJyc7G3TuXNntWrVStnZ2ZKk7Oxs9ejRQ9HR0d42qampKi4u1q5duy6lOwAAAHWaoyt2ZystLdWjjz6qG264Qd27d5ck5efnKzQ0VBERET5to6OjlZ+f721zdqgr2162rSJut1tut9u7XFxcLEnyeDzyeDwXO4RaqWy8tXXcriBTs+cLND5/VkZtrW1Vqe1zrKZRL+eomTPUyxkb6+VkLBcd7NLS0rRz505t3LjxYg9RaTNnztT06dPLrV+9erXCwsKq/fyXo6ysLH934aLMjvfPeZ/uXVrptitXrqzGntQetXWO+Qv1co6aOUO9nLGpXidPnqx024sKdhMnTtSKFSu0YcMGtWzZ0rs+JiZGJSUlKiws9LlqV1BQoJiYGG+bL7/80ud4Ze+aLWvza1OmTFF6erp3ubi4WHFxcUpJSVF4ePjFDKHW8ng8ysrKUv/+/RUSEuLv7jjWPeOTGj2fK9Do6d6lemproNylAZXaZ2dGajX36vJW2+dYTaNezlEzZ6iXMzbWq+xOZWU4CnbGGD300EP68MMP9dlnn6lt27Y+23v16qWQkBCtXbtWw4cPlyTl5OTowIEDSkxMlCQlJibq2Wef1ZEjRxQVFSXpX6k6PDxcXbt2rfC8LpdLLper3PqQkBBrvmhO1daxu89ULlxV+XlLAyp97tpY1+pQW+eYv1Av56iZM9TLGZvq5WQcjoJdWlqalixZomXLlqlRo0be34lr3Lix6tevr8aNG2vcuHFKT09XZGSkwsPD9dBDDykxMVHXXXedJCklJUVdu3bVXXfdpdmzZys/P19PPvmk0tLSKgxvAAAAqBxHwW7BggWSpKSkJJ/1ixYt0pgxYyRJc+fOVWBgoIYPHy63263U1FS9/PLL3rZBQUFasWKFHnjgASUmJqpBgwa65557NGPGjEsbCQAAQB3n+FbshdSrV0/z58/X/Pnzz9mmdevW/II6AABAFeOzYgEAACxBsAMAALAEwQ4AAMASBDsAAABLEOwAAAAsQbADAACwBMEOAADAEgQ7AAAASxDsAAAALEGwAwAAsATBDgAAwBIEOwAAAEsQ7AAAACxBsAMAALAEwQ4AAMASBDsAAABLEOwAAAAsQbADAACwBMEOAADAEgQ7AAAASxDsAAAALEGwAwAAsATBDgAAwBIEOwAAAEsQ7AAAACxBsAMAALAEwQ4AAMASBDsAAABLEOwAAAAsQbADAACwBMEOAADAEgQ7AAAASxDsAAAALEGwAwAAsATBDgAAwBIEOwAAAEsQ7AAAACxBsAMAALAEwQ4AAMASBDsAAABLEOwAAAAsQbADAACwBMEOAADAEgQ7AAAASxDsAAAALEGwAwAAsATBDgAAwBIEOwAAAEsQ7AAAACxBsAMAALAEwQ4AAMASBDsAAABLEOwAAAAsQbADAACwBMEOAADAEgQ7AAAASxDsAAAALEGwAwAAsATBDgAAwBIEOwAAAEsQ7AAAACxBsAMAALAEwQ4AAMASBDsAAABLEOwAAAAsQbADAACwhONgt2HDBg0ZMkSxsbEKCAjQ0qVLfbaPGTNGAQEBPq8BAwb4tDl27JhGjx6t8PBwRUREaNy4cTpx4sQlDQQAAKCucxzsfv75Z1199dWaP3/+OdsMGDBAhw8f9r7efPNNn+2jR4/Wrl27lJWVpRUrVmjDhg2aMGGC894DAADAK9jpDgMHDtTAgQPP28blcikmJqbCbXv27NGqVau0ZcsW9e7dW5I0b948DRo0SC+88IJiY2OddgkAAAC6iGBXGZ999pmioqLUpEkT3XLLLXrmmWfUtGlTSVJ2drYiIiK8oU6SkpOTFRgYqM2bN+vWW28tdzy32y232+1dLi4uliR5PB55PJ7qGMJlq2y8tXXcriBTs+cLND5/VkZtrW1Vqe1zrKZRL+eomTPUyxkb6+VkLFUe7AYMGKDbbrtNbdu2VV5env70pz9p4MCBys7OVlBQkPLz8xUVFeXbieBgRUZGKj8/v8Jjzpw5U9OnTy+3fvXq1QoLC6vqIdQKWVlZ/u7CRZkd75/zPt27tNJtV65cWY09qT1q6xzzF+rlHDVzhno5Y1O9Tp48Wem2VR7sRowY4f17jx49dNVVV6l9+/b67LPP1K9fv4s65pQpU5Senu5dLi4uVlxcnFJSUhQeHn7Jfa5NPB6PsrKy1L9/f4WEhPi7O451z/ikRs/nCjR6unepntoaKHdpQKX22ZmRWs29urzV9jlW06iXc9TMGerljI31KrtTWRnVciv2bO3atVOzZs2Um5urfv36KSYmRkeOHPFpc/r0aR07duycv5fncrnkcrnKrQ8JCbHmi+ZUbR27+0zlwlWVn7c0oNLnro11rQ61dY75C/Vyjpo5Q72csaleTsZR7c+xO3jwoI4ePaoWLVpIkhITE1VYWKht27Z526xbt06lpaVKSEio7u4AAABYy/EVuxMnTig3N9e7vG/fPm3fvl2RkZGKjIzU9OnTNXz4cMXExCgvL09//OMf1aFDB6Wm/uv2VpcuXTRgwACNHz9eCxculMfj0cSJEzVixAjeEQsAAHAJHF+x27p1q3r27KmePXtKktLT09WzZ09NnTpVQUFB2rFjh4YOHaqOHTtq3Lhx6tWrl/7xj3/43Ep944031LlzZ/Xr10+DBg1Snz599Oqrr1bdqAAAAOogx1fskpKSZMy5Hx3xyScX/uX4yMhILVmyxOmpAQAAcB58ViwAAIAlCHYAAACWINgBAABYgmAHAABgCYIdAACAJQh2AAAAliDYAQAAWIJgBwAAYAmCHQAAgCUIdgAAAJYg2AEAAFiCYAcAAGAJgh0AAIAlCHYAAACWINgBAABYgmAHAABgCYIdAACAJQh2AAAAliDYAQAAWCLY3x0ALjdtnvi4yo/53azBVX5MAAB+jSt2AAAAliDYAQAAWIJgBwAAYAmCHQAAgCUIdgAAAJYg2AEAAFiCYAcAAGAJgh0AAIAlCHYAAACWINgBAABYgmAHAABgCYIdAACAJQh2AAAAliDYAQAAWIJgBwAAYAmCHQAAgCUIdgAAAJYg2AEAAFiCYAcAAGAJgh0AAIAlCHYAAACWINgBAABYgmAHAABgCYIdAACAJQh2AAAAliDYAQAAWIJgBwAAYAmCHQAAgCUIdgAAAJYg2AEAAFiCYAcAAGAJgh0AAIAlCHYAAACWINgBAABYgmAHAABgCYIdAACAJQh2AAAAliDYAQAAWIJgBwAAYAmCHQAAgCUIdgAAAJYg2AEAAFiCYAcAAGAJgh0AAIAlCHYAAACWINgBAABYwnGw27Bhg4YMGaLY2FgFBARo6dKlPtuNMZo6dapatGih+vXrKzk5WXv37vVpc+zYMY0ePVrh4eGKiIjQuHHjdOLEiUsaCAAAQF3nONj9/PPPuvrqqzV//vwKt8+ePVsvvfSSFi5cqM2bN6tBgwZKTU3VqVOnvG1Gjx6tXbt2KSsrSytWrNCGDRs0YcKEix8FAAAAFOx0h4EDB2rgwIEVbjPG6MUXX9STTz6p3/72t5Kk//7v/1Z0dLSWLl2qESNGaM+ePVq1apW2bNmi3r17S5LmzZunQYMG6YUXXlBsbOwlDAcAAKDuchzszmffvn3Kz89XcnKyd13jxo2VkJCg7OxsjRgxQtnZ2YqIiPCGOklKTk5WYGCgNm/erFtvvbXccd1ut9xut3e5uLhYkuTxeOTxeKpyCJe9svHW1nG7gkzNni/Q+PzpL7Xp61Xb51hNo17OUTNnqJczNtbLyViqNNjl5+dLkqKjo33WR0dHe7fl5+crKirKtxPBwYqMjPS2+bWZM2dq+vTp5davXr1aYWFhVdH1WicrK8vfXbgos+P9c96ne5f658T/a+XKlX49/8WorXPMX6iXc9TMGerljE31OnnyZKXbVmmwqy5TpkxRenq6d7m4uFhxcXFKSUlReHi4H3tW8zwej7KystS/f3+FhIT4uzuOdc/4pEbP5wo0erp3qZ7aGih3aUCNnvtsOzNS/XZup2r7HKtp1Ms5auYM9XLGxnqV3amsjCoNdjExMZKkgoICtWjRwru+oKBA11xzjbfNkSNHfPY7ffq0jh075t3/11wul1wuV7n1ISEh1nzRnKqtY3ef8U+4cpcG+O3ckmrl16q2zjF/oV7OUTNnqJczNtXLyTiq9Dl2bdu2VUxMjNauXetdV1xcrM2bNysxMVGSlJiYqMLCQm3bts3bZt26dSotLVVCQkJVdgcAAKBOcXzF7sSJE8rNzfUu79u3T9u3b1dkZKRatWqlRx99VM8884yuvPJKtW3bVk899ZRiY2M1bNgwSVKXLl00YMAAjR8/XgsXLpTH49HEiRM1YsQI3hELAABwCRwHu61bt+rmm2/2Lpf97ts999yjzMxM/fGPf9TPP/+sCRMmqLCwUH369NGqVatUr1497z5vvPGGJk6cqH79+ikwMFDDhw/XSy+9VAXDAQAAqLscB7ukpCQZc+5HRwQEBGjGjBmaMWPGOdtERkZqyZIlTk8NAACA8+CzYgEAACxBsAMAALAEwQ4AAMASBDsAAABLEOwAAAAsQbADAACwBMEOAADAEgQ7AAAASxDsAAAALEGwAwAAsATBDgAAwBIEOwAAAEsQ7AAAACxBsAMAALAEwQ4AAMASBDsAAABLEOwAAAAsQbADAACwBMEOAADAEgQ7AAAASxDsAAAALEGwAwAAsATBDgAAwBIEOwAAAEsQ7AAAACxBsAMAALAEwQ4AAMASBDsAAABLEOwAAAAsQbADAACwBMEOAADAEgQ7AAAASxDsAAAALEGwAwAAsATBDgAAwBIEOwAAAEsQ7AAAACxBsAMAALAEwQ4AAMASwf7uAC5fbZ742N9dAAAADnDFDgAAwBIEOwAAAEsQ7AAAACxBsAMAALAEwQ4AAMASBDsAAABLEOwAAAAsQbADAACwBMEOAADAEgQ7AAAASxDsAAAALEGwAwAAsATBDgAAwBIEOwAAAEsQ7AAAACxBsAMAALAEwQ4AAMASBDsAAABLEOwAAAAsQbADAACwBMEOAADAEgQ7AAAASxDsAAAALEGwAwAAsATBDgAAwBJVHuwyMjIUEBDg8+rcubN3+6lTp5SWlqamTZuqYcOGGj58uAoKCqq6GwAAAHVOtVyx69atmw4fPux9bdy40bvtscce00cffaR3331X69ev16FDh3TbbbdVRzcAAADqlOBqOWhwsGJiYsqtLyoq0muvvaYlS5bolltukSQtWrRIXbp00aZNm3TddddVR3cAAADqhGq5Yrd3717FxsaqXbt2Gj16tA4cOCBJ2rZtmzwej5KTk71tO3furFatWik7O7s6ugIAAFBnVPkVu4SEBGVmZqpTp046fPiwpk+frhtvvFE7d+5Ufn6+QkNDFRER4bNPdHS08vPzz3lMt9stt9vtXS4uLpYkeTweeTyeqh7CZa1svDUxbleQqfZzVDdXoPH5019q0zytyTlmA+rlHDVzhno5Y2O9nIwlwBhTrf/jFRYWqnXr1pozZ47q16+vsWPH+oQ0SYqPj9fNN9+s5557rsJjZGRkaPr06eXWL1myRGFhYdXSbwAAgMvByZMnNWrUKBUVFSk8PPy8bavld+zOFhERoY4dOyo3N1f9+/dXSUmJCgsLfa7aFRQUVPg7eWWmTJmi9PR073JxcbHi4uKUkpJywQHaxuPxKCsrS/3791dISEi1nqt7xifVevya4Ao0erp3qZ7aGih3aYDf+rEzI9Vv53aqJueYDaiXc9TMGerljI31KrtTWRnVHuxOnDihvLw83XXXXerVq5dCQkK0du1aDR8+XJKUk5OjAwcOKDEx8ZzHcLlccrlc5daHhIRY80VzqibG7j7jvyBU1dylAX4dT22cp3X539fFoF7OUTNnqJczNtXLyTiqPNj94Q9/0JAhQ9S6dWsdOnRI06ZNU1BQkEaOHKnGjRtr3LhxSk9PV2RkpMLDw/XQQw8pMTGRd8QCAABcoioPdgcPHtTIkSN19OhRNW/eXH369NGmTZvUvHlzSdLcuXMVGBio4cOHy+12KzU1VS+//HJVdwMAAKDOqfJg99Zbb513e7169TR//nzNnz+/qk8NAABQp/FZsQAAAJYg2AEAAFiCYAcAAGAJgh0AAIAlCHYAAACWINgBAABYgmAHAABgCYIdAACAJQh2AAAAliDYAQAAWIJgBwAAYAmCHQAAgCWC/d0BoC5o88THVX7M72YNrvJjAgBqN67YAQAAWIJgBwAAYAmCHQAAgCUIdgAAAJYg2AEAAFiCYAcAAGAJgh0AAIAlCHYAAACWINgBAABYgmAHAABgCYIdAACAJQh2AAAAliDYAQAAWIJgBwAAYAmCHQAAgCUIdgAAAJYg2AEAAFiCYAcAAGAJgh0AAIAlCHYAAACWINgBAABYgmAHAABgiWB/dwDAxWnzxMdVfszvZg2u8mMCAGoOV+wAAAAsQbADAACwBMEOAADAEgQ7AAAASxDsAAAALEGwAwAAsATBDgAAwBIEOwAAAEsQ7AAAACxBsAMAALAEHylmier4eCkAAFC7cMUOAADAEgQ7AAAASxDsAAAALEGwAwAAsATBDgAAwBIEOwAAAEsQ7AAAACxBsAMAALAEwQ4AAMASBDsAAABLEOwAAAAswWfFAvBq88THcgUZzY6Xumd8IveZAH93qULfzRrs7y4AwGWJK3YAAACWINgBAABYgmAHAABgCYIdAACAJQh2AAAAliDYAQAAWIJgBwAAYAm/Pcdu/vz5ev7555Wfn6+rr75a8+bNU3x8vL+6c05tnvi4yo/JM7gAAEB18Euwe/vtt5Wenq6FCxcqISFBL774olJTU5WTk6OoqCh/dKlGXUpYrA0PjwWqW3X8wHWxzvVvsjp+gOMHzctfdXyN9j6dUuXHhL38cit2zpw5Gj9+vMaOHauuXbtq4cKFCgsL0+uvv+6P7gAAAFihxq/YlZSUaNu2bZoyZYp3XWBgoJKTk5WdnV3hPm63W26327tcVFQkSTp27Jg8Hk+19jf49M/VenyngkuNTp4sVbAnUGdKuWJ3IdTLOWrmzLnqdfTo0ao/VzV8P6qOfl6Ix+PRyZMndfToUYWEhNT4+atTdXyNrvnzB3qyZ6mu+fMHcl+m/yY3T+nn7y54nW9+JcxcW+Xnq4mxHz9+XJJkjLlwY1PDfvjhByPJfPHFFz7rJ02aZOLj4yvcZ9q0aUYSL168ePHixYtXnX19//33F8xZfnvzhBNTpkxRenq6d7m0tFTHjh1T06ZNFRBwef70Ul2Ki4sVFxen77//XuHh4f7uzmWPejlHzZyhXs5RM2eolzM21ssYo+PHjys2NvaCbWs82DVr1kxBQUEqKCjwWV9QUKCYmJgK93G5XHK5XD7rIiIiqquLtUJ4eLg1E7YmUC/nqJkz1Ms5auYM9XLGtno1bty4Uu1q/M0ToaGh6tWrl9au/b/73KWlpVq7dq0SExNrujsAAADW8Mut2PT0dN1zzz3q3bu34uPj9eKLL+rnn3/W2LFj/dEdAAAAK/gl2N1555368ccfNXXqVOXn5+uaa67RqlWrFB0d7Y/u1Coul0vTpk0rd2saFaNezlEzZ6iXc9TMGerlTF2vV4AxlXnvLAAAAC53fFYsAACAJQh2AAAAliDYAQAAWIJgBwAAYAmC3WXqu+++07hx49S2bVvVr19f7du317Rp01RSUuLTJiAgoNxr06ZNPsd699131blzZ9WrV089evTQypUra3o4fjN//ny1adNG9erVU0JCgr788kt/d8kvZs6cqWuvvVaNGjVSVFSUhg0bppycHJ82SUlJ5ebS/fff79PmwIEDGjx4sMLCwhQVFaVJkybp9OnTNTmUGpGRkVGuFp07d/ZuP3XqlNLS0tS0aVM1bNhQw4cPL/fQ9bpSqzJt2rSp8PtRWlqaJObXhg0bNGTIEMXGxiogIEBLly712W6M0dSpU9WiRQvVr19fycnJ2rt3r0+bY8eOafTo0QoPD1dERITGjRunEydO+LTZsWOHbrzxRtWrV09xcXGaPXt2dQ+tWpyvXh6PR5MnT1aPHj3UoEEDxcbG6u6779ahQ4d8jlHRnJw1a5ZPG1vq5eOSP/wV1eLvf/+7GTNmjPnkk09MXl6eWbZsmYmKijKPP/64t82+ffuMJLNmzRpz+PBh76ukpMTb5vPPPzdBQUFm9uzZZvfu3ebJJ580ISEh5ptvvvHHsGrUW2+9ZUJDQ83rr79udu3aZcaPH28iIiJMQUGBv7tW41JTU82iRYvMzp07zfbt282gQYNMq1atzIkTJ7xtbrrpJjN+/HifuVRUVOTdfvr0adO9e3eTnJxsvvrqK7Ny5UrTrFkzM2XKFH8MqVpNmzbNdOvWzacWP/74o3f7/fffb+Li4szatWvN1q1bzXXXXWeuv/567/a6VKsyR44c8alXVlaWkWQ+/fRTYwzza+XKlebPf/6z+eCDD4wk8+GHH/psnzVrlmncuLFZunSp+frrr83QoUNN27ZtzS+//OJtM2DAAHP11VebTZs2mX/84x+mQ4cOZuTIkd7tRUVFJjo62owePdrs3LnTvPnmm6Z+/frmlVdeqalhVpnz1auwsNAkJyebt99+23z77bcmOzvbxMfHm169evkco3Xr1mbGjBk+c+7s73k21etsBLtaZPbs2aZt27be5bJg99VXX51znzvuuMMMHjzYZ11CQoK57777qqubl434+HiTlpbmXT5z5oyJjY01M2fO9GOvLg9Hjhwxksz69eu962666SbzyCOPnHOflStXmsDAQJOfn+9dt2DBAhMeHm7cbnd1drfGTZs2zVx99dUVbissLDQhISHm3Xff9a7bs2ePkWSys7ONMXWrVufyyCOPmPbt25vS0lJjDPPrbL8OKqWlpSYmJsY8//zz3nWFhYXG5XKZN9980xhjzO7du40ks2XLFm+bv//97yYgIMD88MMPxhhjXn75ZdOkSROfek2ePNl06tSpmkdUvSoKwr/25ZdfGklm//793nWtW7c2c+fOPec+ttaLW7G1SFFRkSIjI8utHzp0qKKiotSnTx8tX77cZ1t2draSk5N91qWmpio7O7ta++pvJSUl2rZtm8/YAwMDlZycbP3YK6OoqEiSys2nN954Q82aNVP37t01ZcoUnTx50rstOztbPXr08HmQeGpqqoqLi7Vr166a6XgN2rt3r2JjY9WuXTuNHj1aBw4ckCRt27ZNHo/HZ2517txZrVq18s6tularXyspKdHixYv1+9//XgEBAd71zK+K7du3T/n5+T5zqnHjxkpISPCZUxEREerdu7e3TXJysgIDA7V582Zvm759+yo0NNTbJjU1VTk5Ofp//+//1dBo/KOoqEgBAQHlPkd+1qxZatq0qXr27Knnn3/e59a+rfXyyydPwLnc3FzNmzdPL7zwgnddw4YN9R//8R+64YYbFBgYqPfff1/Dhg3T0qVLNXToUElSfn5+uU/0iI6OVn5+fo32v6b99NNPOnPmTIVj//bbb/3Uq8tDaWmpHn30Ud1www3q3r27d/2oUaPUunVrxcbGaseOHZo8ebJycnL0wQcfSDr3XCrbZpOEhARlZmaqU6dOOnz4sKZPn64bb7xRO3fuVH5+vkJDQ8v9B3L2v6u6VKuKLF26VIWFhRozZox3HfPr3MrGd77v1fn5+YqKivLZHhwcrMjISJ82bdu2LXeMsm1NmjSplv7726lTpzR58mSNHDlS4eHh3vUPP/yw/u3f/k2RkZH64osvNGXKFB0+fFhz5syRZG+9CHY17IknntBzzz133jZ79uzx+UXtH374QQMGDNDvfvc7jR8/3ru+WbNmSk9P9y5fe+21OnTokJ5//nlvsAN+LS0tTTt37tTGjRt91k+YMMH79x49eqhFixbq16+f8vLy1L59+5rupl8NHDjQ+/errrpKCQkJat26td555x3Vr1/fjz2rHV577TUNHDhQsbGx3nXML1QHj8ejO+64Q8YYLViwwGfb2f8/XnXVVQoNDdV9992nmTNnWv1xY9yKrWGPP/649uzZc95Xu3btvO0PHTqkm2++Wddff71effXVCx4/ISFBubm53uWYmJhy79YrKChQTExM1Q3qMtSsWTMFBQXVybGfz8SJE7VixQp9+umnatmy5XnbJiQkSJJ3Pp1rLpVts1lERIQ6duyo3NxcxcTEqKSkRIWFhT5tzp5bdblW+/fv15o1a3Tvvfeetx3z6/+Uje98369iYmJ05MgRn+2nT5/WsWPH6uy8Kwt1+/fvV1ZWls/VuookJCTo9OnT+u677yTZWy+CXQ1r3ry5OnfufN5X2f3+H374QUlJSerVq5cWLVqkwMALf7m2b9+uFi1aeJcTExO1du1anzZZWVlKTEys2oFdZkJDQ9WrVy+fsZeWlmrt2rXWj70ixhhNnDhRH374odatW1fu9kNFtm/fLkne+ZSYmKhvvvnG5z+Xsm+mXbt2rZZ+Xy5OnDihvLw8tWjRQr169VJISIjP3MrJydGBAwe8c6su12rRokWKiorS4MGDz9uO+fV/2rZtq5iYGJ85VVxcrM2bN/vMqcLCQm3bts3bZt26dSotLfWG5MTERG3YsEEej8fbJisrS506daq1txXPpSzU7d27V2vWrFHTpk0vuM/27dsVGBjovaVtbb38/e4NVOzgwYOmQ4cOpl+/fubgwYM+b9cuk5mZaZYsWWL27Nlj9uzZY5599lkTGBhoXn/9dW+bzz//3AQHB5sXXnjB7Nmzx0ybNq1OPe7E5XKZzMxMs3v3bjNhwgQTERHh8667uuKBBx4wjRs3Np999pnPXDp58qQxxpjc3FwzY8YMs3XrVrNv3z6zbNky065dO9O3b1/vMcoeR5GSkmK2b99uVq1aZZo3b27N4yjO9vjjj5vPPvvM7Nu3z3z++ecmOTnZNGvWzBw5csQY86/HnbRq1cqsW7fObN261SQmJprExETv/nWpVmc7c+aMadWqlZk8ebLPeuaXMcePHzdfffWV+eqrr4wkM2fOHPPVV19538U5a9YsExERYZYtW2Z27Nhhfvvb31b4uJOePXuazZs3m40bN5orr7zS53EnhYWFJjo62tx1111m586d5q233jJhYWG18vEd56tXSUmJGTp0qGnZsqXZvn27z/e0sne4fvHFF2bu3Llm+/btJi8vzyxevNg0b97c3H333d5z2FSvsxHsLlOLFi0ykip8lcnMzDRdunQxYWFhJjw83MTHx/s8gqHMO++8Yzp27GhCQ0NNt27dzMcff1yTQ/GrefPmmVatWpnQ0FATHx9vNm3a5O8u+cW55tKiRYuMMcYcOHDA9O3b10RGRhqXy2U6dOhgJk2a5POcMWOM+e6778zAgQNN/fr1TbNmzczjjz9uPB6PH0ZUve68807TokULExoaaq644gpz5513mtzcXO/2X375xTz44IOmSZMmJiwszNx6660+P3QZU3dqdbZPPvnESDI5OTk+65lfxnz66acV/hu85557jDH/euTJU089ZaKjo43L5TL9+vUrV8ejR4+akSNHmoYNG5rw8HAzduxYc/z4cZ82X3/9tenTp49xuVzmiiuuMLNmzaqpIVap89Wr7FFfFb3Knpu4bds2k5CQYBo3bmzq1atnunTpYv7yl7+YU6dO+ZzHlnqdLcAYY2rgwiAAAACqGb9jBwAAYAmCHQAAgCUIdgAAAJYg2AEAAFiCYAcAAGAJgh0AAIAlCHYAAACWINgBAABYgmAHAABgCYIdAACAJQh2AAAAliDYAQAAWOL/A9XSwqocVcsQAAAAAElFTkSuQmCC", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "#vii. Create a histogram of the model residuals. \n", + "# Do they appear to follow a normal distribution? \n", + "# How does this affect the predictive performance of the model?\n", + "\n", + "tayko_lm_pred = tayko_lm.predict(valid_X)\n", + "all_residuals = valid_y - tayko_lm_pred\n", + "\n", + "ax = pd.DataFrame({'Residuals': all_residuals}).hist(bins=25)\n", + "\n", + "plt.tight_layout()\n", + "plt.show()" + ] } ], "metadata": {