927 lines
54 KiB
Plaintext
927 lines
54 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 52,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Lecture 3 for the University of Tulsa's QM-7063 Data Mining Course\n",
|
|
"# Dimension Reduction\n",
|
|
"# Professor: Dr. Abdulrashid, Spring 2023\n",
|
|
"# Noah L. Schrick - 1492657\n",
|
|
"\n",
|
|
"import pandas as pd\n",
|
|
"import numpy as np\n",
|
|
"from sklearn.decomposition import PCA\n",
|
|
"import matplotlib.pyplot as plt"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 36,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>mean</th>\n",
|
|
" <th>sd</th>\n",
|
|
" <th>min</th>\n",
|
|
" <th>max</th>\n",
|
|
" <th>median</th>\n",
|
|
" <th>length</th>\n",
|
|
" <th>miss.val</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>CRIM</th>\n",
|
|
" <td>3.613524</td>\n",
|
|
" <td>8.601545</td>\n",
|
|
" <td>0.00632</td>\n",
|
|
" <td>88.9762</td>\n",
|
|
" <td>0.25651</td>\n",
|
|
" <td>506</td>\n",
|
|
" <td>0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>ZN</th>\n",
|
|
" <td>11.363636</td>\n",
|
|
" <td>23.322453</td>\n",
|
|
" <td>0.00000</td>\n",
|
|
" <td>100.0000</td>\n",
|
|
" <td>0.00000</td>\n",
|
|
" <td>506</td>\n",
|
|
" <td>0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>INDUS</th>\n",
|
|
" <td>11.136779</td>\n",
|
|
" <td>6.860353</td>\n",
|
|
" <td>0.46000</td>\n",
|
|
" <td>27.7400</td>\n",
|
|
" <td>9.69000</td>\n",
|
|
" <td>506</td>\n",
|
|
" <td>0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>CHAS</th>\n",
|
|
" <td>0.069170</td>\n",
|
|
" <td>0.253994</td>\n",
|
|
" <td>0.00000</td>\n",
|
|
" <td>1.0000</td>\n",
|
|
" <td>0.00000</td>\n",
|
|
" <td>506</td>\n",
|
|
" <td>0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>NOX</th>\n",
|
|
" <td>0.554695</td>\n",
|
|
" <td>0.115878</td>\n",
|
|
" <td>0.38500</td>\n",
|
|
" <td>0.8710</td>\n",
|
|
" <td>0.53800</td>\n",
|
|
" <td>506</td>\n",
|
|
" <td>0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>RM</th>\n",
|
|
" <td>6.284634</td>\n",
|
|
" <td>0.702617</td>\n",
|
|
" <td>3.56100</td>\n",
|
|
" <td>8.7800</td>\n",
|
|
" <td>6.20850</td>\n",
|
|
" <td>506</td>\n",
|
|
" <td>0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>AGE</th>\n",
|
|
" <td>68.574901</td>\n",
|
|
" <td>28.148861</td>\n",
|
|
" <td>2.90000</td>\n",
|
|
" <td>100.0000</td>\n",
|
|
" <td>77.50000</td>\n",
|
|
" <td>506</td>\n",
|
|
" <td>0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>DIS</th>\n",
|
|
" <td>3.795043</td>\n",
|
|
" <td>2.105710</td>\n",
|
|
" <td>1.12960</td>\n",
|
|
" <td>12.1265</td>\n",
|
|
" <td>3.20745</td>\n",
|
|
" <td>506</td>\n",
|
|
" <td>0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>RAD</th>\n",
|
|
" <td>9.549407</td>\n",
|
|
" <td>8.707259</td>\n",
|
|
" <td>1.00000</td>\n",
|
|
" <td>24.0000</td>\n",
|
|
" <td>5.00000</td>\n",
|
|
" <td>506</td>\n",
|
|
" <td>0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>TAX</th>\n",
|
|
" <td>408.237154</td>\n",
|
|
" <td>168.537116</td>\n",
|
|
" <td>187.00000</td>\n",
|
|
" <td>711.0000</td>\n",
|
|
" <td>330.00000</td>\n",
|
|
" <td>506</td>\n",
|
|
" <td>0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>PTRATIO</th>\n",
|
|
" <td>18.455534</td>\n",
|
|
" <td>2.164946</td>\n",
|
|
" <td>12.60000</td>\n",
|
|
" <td>22.0000</td>\n",
|
|
" <td>19.05000</td>\n",
|
|
" <td>506</td>\n",
|
|
" <td>0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>LSTAT</th>\n",
|
|
" <td>12.653063</td>\n",
|
|
" <td>7.141062</td>\n",
|
|
" <td>1.73000</td>\n",
|
|
" <td>37.9700</td>\n",
|
|
" <td>11.36000</td>\n",
|
|
" <td>506</td>\n",
|
|
" <td>0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>MEDV</th>\n",
|
|
" <td>22.532806</td>\n",
|
|
" <td>9.197104</td>\n",
|
|
" <td>5.00000</td>\n",
|
|
" <td>50.0000</td>\n",
|
|
" <td>21.20000</td>\n",
|
|
" <td>506</td>\n",
|
|
" <td>0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>CAT_MEDV</th>\n",
|
|
" <td>0.166008</td>\n",
|
|
" <td>0.372456</td>\n",
|
|
" <td>0.00000</td>\n",
|
|
" <td>1.0000</td>\n",
|
|
" <td>0.00000</td>\n",
|
|
" <td>506</td>\n",
|
|
" <td>0</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" mean sd min max median length \\\n",
|
|
"CRIM 3.613524 8.601545 0.00632 88.9762 0.25651 506 \n",
|
|
"ZN 11.363636 23.322453 0.00000 100.0000 0.00000 506 \n",
|
|
"INDUS 11.136779 6.860353 0.46000 27.7400 9.69000 506 \n",
|
|
"CHAS 0.069170 0.253994 0.00000 1.0000 0.00000 506 \n",
|
|
"NOX 0.554695 0.115878 0.38500 0.8710 0.53800 506 \n",
|
|
"RM 6.284634 0.702617 3.56100 8.7800 6.20850 506 \n",
|
|
"AGE 68.574901 28.148861 2.90000 100.0000 77.50000 506 \n",
|
|
"DIS 3.795043 2.105710 1.12960 12.1265 3.20745 506 \n",
|
|
"RAD 9.549407 8.707259 1.00000 24.0000 5.00000 506 \n",
|
|
"TAX 408.237154 168.537116 187.00000 711.0000 330.00000 506 \n",
|
|
"PTRATIO 18.455534 2.164946 12.60000 22.0000 19.05000 506 \n",
|
|
"LSTAT 12.653063 7.141062 1.73000 37.9700 11.36000 506 \n",
|
|
"MEDV 22.532806 9.197104 5.00000 50.0000 21.20000 506 \n",
|
|
"CAT_MEDV 0.166008 0.372456 0.00000 1.0000 0.00000 506 \n",
|
|
"\n",
|
|
" miss.val \n",
|
|
"CRIM 0 \n",
|
|
"ZN 0 \n",
|
|
"INDUS 0 \n",
|
|
"CHAS 0 \n",
|
|
"NOX 0 \n",
|
|
"RM 0 \n",
|
|
"AGE 0 \n",
|
|
"DIS 0 \n",
|
|
"RAD 0 \n",
|
|
"TAX 0 \n",
|
|
"PTRATIO 0 \n",
|
|
"LSTAT 0 \n",
|
|
"MEDV 0 \n",
|
|
"CAT_MEDV 0 "
|
|
]
|
|
},
|
|
"execution_count": 36,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"bostonHousing_df = pd.read_csv('BostonHousing.csv')\n",
|
|
"bostonHousing_df = bostonHousing_df.rename(columns={'CAT.MEDV': 'CAT_MEDV'})\n",
|
|
"\n",
|
|
"# Compute mean, standard dev., min, max, median, length, and missing values for all variables\n",
|
|
"pd.DataFrame({'mean': bostonHousing_df.mean(),\n",
|
|
"'sd': bostonHousing_df.std(),\n",
|
|
"'min': bostonHousing_df.min(),\n",
|
|
"'max': bostonHousing_df.max(),\n",
|
|
"'median': bostonHousing_df.median(),\n",
|
|
"'length': len(bostonHousing_df),\n",
|
|
"'miss.val': bostonHousing_df.isnull().sum(),\n",
|
|
"})\n",
|
|
"\n",
|
|
"\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 37,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>CRIM</th>\n",
|
|
" <th>ZN</th>\n",
|
|
" <th>INDUS</th>\n",
|
|
" <th>CHAS</th>\n",
|
|
" <th>NOX</th>\n",
|
|
" <th>RM</th>\n",
|
|
" <th>AGE</th>\n",
|
|
" <th>DIS</th>\n",
|
|
" <th>RAD</th>\n",
|
|
" <th>TAX</th>\n",
|
|
" <th>PTRATIO</th>\n",
|
|
" <th>LSTAT</th>\n",
|
|
" <th>MEDV</th>\n",
|
|
" <th>CAT_MEDV</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>CRIM</th>\n",
|
|
" <td>1.00</td>\n",
|
|
" <td>-0.20</td>\n",
|
|
" <td>0.41</td>\n",
|
|
" <td>-0.06</td>\n",
|
|
" <td>0.42</td>\n",
|
|
" <td>-0.22</td>\n",
|
|
" <td>0.35</td>\n",
|
|
" <td>-0.38</td>\n",
|
|
" <td>0.63</td>\n",
|
|
" <td>0.58</td>\n",
|
|
" <td>0.29</td>\n",
|
|
" <td>0.46</td>\n",
|
|
" <td>-0.39</td>\n",
|
|
" <td>-0.15</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>ZN</th>\n",
|
|
" <td>-0.20</td>\n",
|
|
" <td>1.00</td>\n",
|
|
" <td>-0.53</td>\n",
|
|
" <td>-0.04</td>\n",
|
|
" <td>-0.52</td>\n",
|
|
" <td>0.31</td>\n",
|
|
" <td>-0.57</td>\n",
|
|
" <td>0.66</td>\n",
|
|
" <td>-0.31</td>\n",
|
|
" <td>-0.31</td>\n",
|
|
" <td>-0.39</td>\n",
|
|
" <td>-0.41</td>\n",
|
|
" <td>0.36</td>\n",
|
|
" <td>0.37</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>INDUS</th>\n",
|
|
" <td>0.41</td>\n",
|
|
" <td>-0.53</td>\n",
|
|
" <td>1.00</td>\n",
|
|
" <td>0.06</td>\n",
|
|
" <td>0.76</td>\n",
|
|
" <td>-0.39</td>\n",
|
|
" <td>0.64</td>\n",
|
|
" <td>-0.71</td>\n",
|
|
" <td>0.60</td>\n",
|
|
" <td>0.72</td>\n",
|
|
" <td>0.38</td>\n",
|
|
" <td>0.60</td>\n",
|
|
" <td>-0.48</td>\n",
|
|
" <td>-0.37</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>CHAS</th>\n",
|
|
" <td>-0.06</td>\n",
|
|
" <td>-0.04</td>\n",
|
|
" <td>0.06</td>\n",
|
|
" <td>1.00</td>\n",
|
|
" <td>0.09</td>\n",
|
|
" <td>0.09</td>\n",
|
|
" <td>0.09</td>\n",
|
|
" <td>-0.10</td>\n",
|
|
" <td>-0.01</td>\n",
|
|
" <td>-0.04</td>\n",
|
|
" <td>-0.12</td>\n",
|
|
" <td>-0.05</td>\n",
|
|
" <td>0.18</td>\n",
|
|
" <td>0.11</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>NOX</th>\n",
|
|
" <td>0.42</td>\n",
|
|
" <td>-0.52</td>\n",
|
|
" <td>0.76</td>\n",
|
|
" <td>0.09</td>\n",
|
|
" <td>1.00</td>\n",
|
|
" <td>-0.30</td>\n",
|
|
" <td>0.73</td>\n",
|
|
" <td>-0.77</td>\n",
|
|
" <td>0.61</td>\n",
|
|
" <td>0.67</td>\n",
|
|
" <td>0.19</td>\n",
|
|
" <td>0.59</td>\n",
|
|
" <td>-0.43</td>\n",
|
|
" <td>-0.23</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>RM</th>\n",
|
|
" <td>-0.22</td>\n",
|
|
" <td>0.31</td>\n",
|
|
" <td>-0.39</td>\n",
|
|
" <td>0.09</td>\n",
|
|
" <td>-0.30</td>\n",
|
|
" <td>1.00</td>\n",
|
|
" <td>-0.24</td>\n",
|
|
" <td>0.21</td>\n",
|
|
" <td>-0.21</td>\n",
|
|
" <td>-0.29</td>\n",
|
|
" <td>-0.36</td>\n",
|
|
" <td>-0.61</td>\n",
|
|
" <td>0.70</td>\n",
|
|
" <td>0.64</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>AGE</th>\n",
|
|
" <td>0.35</td>\n",
|
|
" <td>-0.57</td>\n",
|
|
" <td>0.64</td>\n",
|
|
" <td>0.09</td>\n",
|
|
" <td>0.73</td>\n",
|
|
" <td>-0.24</td>\n",
|
|
" <td>1.00</td>\n",
|
|
" <td>-0.75</td>\n",
|
|
" <td>0.46</td>\n",
|
|
" <td>0.51</td>\n",
|
|
" <td>0.26</td>\n",
|
|
" <td>0.60</td>\n",
|
|
" <td>-0.38</td>\n",
|
|
" <td>-0.19</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>DIS</th>\n",
|
|
" <td>-0.38</td>\n",
|
|
" <td>0.66</td>\n",
|
|
" <td>-0.71</td>\n",
|
|
" <td>-0.10</td>\n",
|
|
" <td>-0.77</td>\n",
|
|
" <td>0.21</td>\n",
|
|
" <td>-0.75</td>\n",
|
|
" <td>1.00</td>\n",
|
|
" <td>-0.49</td>\n",
|
|
" <td>-0.53</td>\n",
|
|
" <td>-0.23</td>\n",
|
|
" <td>-0.50</td>\n",
|
|
" <td>0.25</td>\n",
|
|
" <td>0.12</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>RAD</th>\n",
|
|
" <td>0.63</td>\n",
|
|
" <td>-0.31</td>\n",
|
|
" <td>0.60</td>\n",
|
|
" <td>-0.01</td>\n",
|
|
" <td>0.61</td>\n",
|
|
" <td>-0.21</td>\n",
|
|
" <td>0.46</td>\n",
|
|
" <td>-0.49</td>\n",
|
|
" <td>1.00</td>\n",
|
|
" <td>0.91</td>\n",
|
|
" <td>0.46</td>\n",
|
|
" <td>0.49</td>\n",
|
|
" <td>-0.38</td>\n",
|
|
" <td>-0.20</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>TAX</th>\n",
|
|
" <td>0.58</td>\n",
|
|
" <td>-0.31</td>\n",
|
|
" <td>0.72</td>\n",
|
|
" <td>-0.04</td>\n",
|
|
" <td>0.67</td>\n",
|
|
" <td>-0.29</td>\n",
|
|
" <td>0.51</td>\n",
|
|
" <td>-0.53</td>\n",
|
|
" <td>0.91</td>\n",
|
|
" <td>1.00</td>\n",
|
|
" <td>0.46</td>\n",
|
|
" <td>0.54</td>\n",
|
|
" <td>-0.47</td>\n",
|
|
" <td>-0.27</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>PTRATIO</th>\n",
|
|
" <td>0.29</td>\n",
|
|
" <td>-0.39</td>\n",
|
|
" <td>0.38</td>\n",
|
|
" <td>-0.12</td>\n",
|
|
" <td>0.19</td>\n",
|
|
" <td>-0.36</td>\n",
|
|
" <td>0.26</td>\n",
|
|
" <td>-0.23</td>\n",
|
|
" <td>0.46</td>\n",
|
|
" <td>0.46</td>\n",
|
|
" <td>1.00</td>\n",
|
|
" <td>0.37</td>\n",
|
|
" <td>-0.51</td>\n",
|
|
" <td>-0.44</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>LSTAT</th>\n",
|
|
" <td>0.46</td>\n",
|
|
" <td>-0.41</td>\n",
|
|
" <td>0.60</td>\n",
|
|
" <td>-0.05</td>\n",
|
|
" <td>0.59</td>\n",
|
|
" <td>-0.61</td>\n",
|
|
" <td>0.60</td>\n",
|
|
" <td>-0.50</td>\n",
|
|
" <td>0.49</td>\n",
|
|
" <td>0.54</td>\n",
|
|
" <td>0.37</td>\n",
|
|
" <td>1.00</td>\n",
|
|
" <td>-0.74</td>\n",
|
|
" <td>-0.47</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>MEDV</th>\n",
|
|
" <td>-0.39</td>\n",
|
|
" <td>0.36</td>\n",
|
|
" <td>-0.48</td>\n",
|
|
" <td>0.18</td>\n",
|
|
" <td>-0.43</td>\n",
|
|
" <td>0.70</td>\n",
|
|
" <td>-0.38</td>\n",
|
|
" <td>0.25</td>\n",
|
|
" <td>-0.38</td>\n",
|
|
" <td>-0.47</td>\n",
|
|
" <td>-0.51</td>\n",
|
|
" <td>-0.74</td>\n",
|
|
" <td>1.00</td>\n",
|
|
" <td>0.79</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>CAT_MEDV</th>\n",
|
|
" <td>-0.15</td>\n",
|
|
" <td>0.37</td>\n",
|
|
" <td>-0.37</td>\n",
|
|
" <td>0.11</td>\n",
|
|
" <td>-0.23</td>\n",
|
|
" <td>0.64</td>\n",
|
|
" <td>-0.19</td>\n",
|
|
" <td>0.12</td>\n",
|
|
" <td>-0.20</td>\n",
|
|
" <td>-0.27</td>\n",
|
|
" <td>-0.44</td>\n",
|
|
" <td>-0.47</td>\n",
|
|
" <td>0.79</td>\n",
|
|
" <td>1.00</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" CRIM ZN INDUS CHAS NOX RM AGE DIS RAD TAX \\\n",
|
|
"CRIM 1.00 -0.20 0.41 -0.06 0.42 -0.22 0.35 -0.38 0.63 0.58 \n",
|
|
"ZN -0.20 1.00 -0.53 -0.04 -0.52 0.31 -0.57 0.66 -0.31 -0.31 \n",
|
|
"INDUS 0.41 -0.53 1.00 0.06 0.76 -0.39 0.64 -0.71 0.60 0.72 \n",
|
|
"CHAS -0.06 -0.04 0.06 1.00 0.09 0.09 0.09 -0.10 -0.01 -0.04 \n",
|
|
"NOX 0.42 -0.52 0.76 0.09 1.00 -0.30 0.73 -0.77 0.61 0.67 \n",
|
|
"RM -0.22 0.31 -0.39 0.09 -0.30 1.00 -0.24 0.21 -0.21 -0.29 \n",
|
|
"AGE 0.35 -0.57 0.64 0.09 0.73 -0.24 1.00 -0.75 0.46 0.51 \n",
|
|
"DIS -0.38 0.66 -0.71 -0.10 -0.77 0.21 -0.75 1.00 -0.49 -0.53 \n",
|
|
"RAD 0.63 -0.31 0.60 -0.01 0.61 -0.21 0.46 -0.49 1.00 0.91 \n",
|
|
"TAX 0.58 -0.31 0.72 -0.04 0.67 -0.29 0.51 -0.53 0.91 1.00 \n",
|
|
"PTRATIO 0.29 -0.39 0.38 -0.12 0.19 -0.36 0.26 -0.23 0.46 0.46 \n",
|
|
"LSTAT 0.46 -0.41 0.60 -0.05 0.59 -0.61 0.60 -0.50 0.49 0.54 \n",
|
|
"MEDV -0.39 0.36 -0.48 0.18 -0.43 0.70 -0.38 0.25 -0.38 -0.47 \n",
|
|
"CAT_MEDV -0.15 0.37 -0.37 0.11 -0.23 0.64 -0.19 0.12 -0.20 -0.27 \n",
|
|
"\n",
|
|
" PTRATIO LSTAT MEDV CAT_MEDV \n",
|
|
"CRIM 0.29 0.46 -0.39 -0.15 \n",
|
|
"ZN -0.39 -0.41 0.36 0.37 \n",
|
|
"INDUS 0.38 0.60 -0.48 -0.37 \n",
|
|
"CHAS -0.12 -0.05 0.18 0.11 \n",
|
|
"NOX 0.19 0.59 -0.43 -0.23 \n",
|
|
"RM -0.36 -0.61 0.70 0.64 \n",
|
|
"AGE 0.26 0.60 -0.38 -0.19 \n",
|
|
"DIS -0.23 -0.50 0.25 0.12 \n",
|
|
"RAD 0.46 0.49 -0.38 -0.20 \n",
|
|
"TAX 0.46 0.54 -0.47 -0.27 \n",
|
|
"PTRATIO 1.00 0.37 -0.51 -0.44 \n",
|
|
"LSTAT 0.37 1.00 -0.74 -0.47 \n",
|
|
"MEDV -0.51 -0.74 1.00 0.79 \n",
|
|
"CAT_MEDV -0.44 -0.47 0.79 1.00 "
|
|
]
|
|
},
|
|
"execution_count": 37,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"# Correlation Matrix\n",
|
|
"bostonHousing_df.corr().round(2)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 38,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"RM_bin CHAS\n",
|
|
"3 0 25.300000\n",
|
|
"4 0 15.407143\n",
|
|
"5 0 17.200000\n",
|
|
" 1 22.218182\n",
|
|
"6 0 21.769170\n",
|
|
" 1 25.918750\n",
|
|
"7 0 35.964444\n",
|
|
" 1 44.066667\n",
|
|
"8 0 45.700000\n",
|
|
" 1 35.950000\n",
|
|
"Name: MEDV, dtype: float64"
|
|
]
|
|
},
|
|
"execution_count": 38,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"# Create bins of size 1 for variables\n",
|
|
"\n",
|
|
"bostonHousing_df['RM_bin'] = pd.cut(bostonHousing_df.RM, range(0, 10), labels=False)\n",
|
|
"\n",
|
|
"# Compute average of MEDV by (binned) RM and CHAS. \n",
|
|
"# Group the data using groupby, then restrict the analysis to MEDV and determine the mean for each group.\n",
|
|
"\n",
|
|
"bostonHousing_df.groupby(['RM_bin', 'CHAS'])['MEDV'].mean()\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 15,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th>CHAS</th>\n",
|
|
" <th>0</th>\n",
|
|
" <th>1</th>\n",
|
|
" <th>All</th>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>RM_bin</th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>25.300000</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>25.300000</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>15.407143</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>15.407143</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>5</th>\n",
|
|
" <td>17.200000</td>\n",
|
|
" <td>22.218182</td>\n",
|
|
" <td>17.551592</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>6</th>\n",
|
|
" <td>21.769170</td>\n",
|
|
" <td>25.918750</td>\n",
|
|
" <td>22.015985</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>7</th>\n",
|
|
" <td>35.964444</td>\n",
|
|
" <td>44.066667</td>\n",
|
|
" <td>36.917647</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>8</th>\n",
|
|
" <td>45.700000</td>\n",
|
|
" <td>35.950000</td>\n",
|
|
" <td>44.200000</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>All</th>\n",
|
|
" <td>22.093843</td>\n",
|
|
" <td>28.440000</td>\n",
|
|
" <td>22.532806</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
"CHAS 0 1 All\n",
|
|
"RM_bin \n",
|
|
"3 25.300000 NaN 25.300000\n",
|
|
"4 15.407143 NaN 15.407143\n",
|
|
"5 17.200000 22.218182 17.551592\n",
|
|
"6 21.769170 25.918750 22.015985\n",
|
|
"7 35.964444 44.066667 36.917647\n",
|
|
"8 45.700000 35.950000 44.200000\n",
|
|
"All 22.093843 28.440000 22.532806"
|
|
]
|
|
},
|
|
"execution_count": 15,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"# create bins of size 1 for RM\n",
|
|
"bostonHousing_df['RM_bin'] = pd.cut(bostonHousing_df.RM, range(0, 10), labels=False)\n",
|
|
"\n",
|
|
"# use pivot_table() to reshape data and generate pivot table\n",
|
|
"pd.pivot_table(bostonHousing_df, values='MEDV', index=['RM_bin'], columns=['CHAS'], aggfunc=np.mean, margins=True)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 49,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAmUAAAHgCAYAAADkGhrRAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/P9b71AAAACXBIWXMAAA9hAAAPYQGoP6dpAABIt0lEQVR4nO3df3zP9f7/8ft7m/1gfrPN/NgUQmHyY00/cFotOcqpTn6GfaNyktAvFNNPOkWcIuUc1ZHfnaI+ojTpp5JJOpUTsRAbImZibI/vH87ex9t+eL/nve1lbtfL5fXH+/Xj8Xq+XnvtvfteP54vl5mZAAAAUK4CyrsBAAAAIJQBAAA4AqEMAADAAQhlAAAADkAoAwAAcABCGQAAgAMQygAAAByAUAYAAOAAhDIAAAAHIJThnDBhwgS5XK4yWVeXLl3UpUsX9+fVq1fL5XLpjTfeKJP1Dxo0SLGxsWWyrpI6fPiwBg8erKioKLlcLo0YMaK8m4TTdOnSRZdcckl5NwOADwhlKHOvvvqqXC6XewgNDVV0dLSSkpL0t7/9TVlZWX5Zz65duzRhwgRt2LDBL/X8yclt88ZTTz2lV199VUOHDtWcOXN02223FTt/bm6uXnnlFXXp0kW1atVSSEiIYmNjlZycrHXr1hW6zIwZM+RyuRQfH+8xPjY21uP4KWp49dVXvdqWQYMGyeVyqVq1avr9998LTN+8ebO75rPPPusenx/WixoWLFhQaJsDAgJUo0YNtWrVSnfccYe+/PJLj/UNHz5cLpdLW7ZsKbLNDz/8sFwulzZu3OjVNpaW/H1X3HDqPxj587du3VqFveHP5XJp2LBhZbgFgLMElXcDcP567LHH1LhxYx0/flwZGRlavXq1RowYoSlTpujtt99W69at3fM+8sgjGj16tE/1d+3apUcffVSxsbGKi4vzern333/fp/WURHFtmzVrlvLy8kq9DWdj1apVuuyyy5SSknLGeX///XfddNNNWrFiha666iqNHTtWtWrVUnp6uhYtWqTXXntN27dvV4MGDTyWmzt3rmJjY7V27Vpt2bJFTZo0kSRNnTpVhw8fds/37rvvav78+XruuedUp04d9/hOnTp5vT1BQUE6cuSI3nnnHd16660F2hEaGqqjR48Wuuzw4cPVoUOHAuMTEhI8PsfFxem+++6TJGVlZemHH37Q4sWLNWvWLI0cOVJTpkyRJPXr10/PP/+85s2bp/Hjxxe6zvnz56tVq1YevyPl4c4771RiYmKh01JTU/Xqq6/qsssuKzDt22+/1Ztvvqmbb765tJsInFsMKGOvvPKKSbKvvvqqwLTU1FQLCwuzmJgYO3LkyFmt56uvvjJJ9sorr3g1f3Z2dqHjP/zwQ5NkixcvPqv2nE3bnKZx48bWvXt3r+a9++67TZI999xzBaadOHHCnnnmGduxY4fH+K1bt5oke/PNN61u3bo2YcKEIus/88wzJsm2bdvmyya4DRw40KpUqWLXXnut9ezZs8D0pk2b2s0332yS7JlnnnGP9+W4iImJKXR/HTlyxHr27GmSbMaMGe7xTZo0sebNmxda6/PPPzdJNmnSpGLX2blzZ7v44ovP2LbSsGvXLqtbt67FxMTY/v373eMHDhxoYWFh1qxZM2vdurXl5eV5LCfJ7r777rJuLuAYXL6Eo/zhD3/QuHHj9PPPP+v11193jy/snrKVK1fqiiuuUI0aNRQeHq6LLrpIY8eOlXTy0lL+2Yvk5OQCl7Ty77dJS0vTVVddpcqVK7uXPf2esny5ubkaO3asoqKiVKVKFd1www3asWOHxzyxsbEaNGhQgWVPrXmmthV2T1l2drbuu+8+NWzYUCEhIbrooov07LPPFrgElH/5Z8mSJbrkkksUEhKiiy++WCtWrCh8h59mz549uv322xUZGanQ0FC1adNGr732mnt6/iW7bdu2admyZe62p6enF1pv586deumll3TNNdcUet9ZYGCg7r///kLPktWsWVPdu3fXLbfcorlz53rV/rPRt29fLV++XL/99pt73FdffaXNmzerb9++pbLOsLAwzZkzR7Vq1dKTTz7p/nn269dPmzZt0vr16wssM2/ePLlcLvXp08erdaSlpalTp04KCwtT48aNNXPmTPe0w4cPq0qVKrr33nsLLLdz504FBgZq4sSJPm1TXl6e+vXrpwMHDmjevHmqWbOmx/SAgAA98sgj2rhxo9566y2fagMVHaEMjpN/f1JxlxG/++47/fGPf9SxY8f02GOPafLkybrhhhv02WefSZJatGihxx57TJJ0xx13aM6cOZozZ46uuuoqd41ff/1V3bp1U1xcnKZOnaquXbsW264nn3xSy5Yt00MPPaThw4dr5cqVSkxMLPQ+pOJ407ZTmZluuOEGPffcc7ruuus0ZcoUXXTRRXrggQc0atSoAvN/+umn+stf/qLevXvrr3/9q44ePaqbb75Zv/76a7Ht+v3339WlSxfNmTNH/fr10zPPPKPq1atr0KBBmjZtmrvtc+bMUZ06dRQXF+due926dQutuXz5cp04ceKM95ydbu7cubrpppsUHBysPn36aPPmzfrqq698quGrm266SS6XS2+++aZ73Lx589S8eXNdeumlRS6XlZWlffv2FRhOD8xFCQ8P15/+9Cf98ssv+v777yWdDGX56z9Vbm6uFi1apCuvvFKNGjU6Y+0DBw7o+uuvV7t27fTXv/5VDRo00NChQzV79myPdS9cuFC5ubkey86fP19m5m6Ltx5//HF9+OGHevTRR4u8hNy3b181bdpUjz32mNf7CTgvlOdpOpyfirt8ma969erWtm1b9+eUlBQ79XB97rnnTJLt3bu3yBrFXSLs3LmzSbKZM2cWOq1z587uz/mXqerXr2+HDh1yj1+0aJFJsmnTprnHxcTE2MCBA89Ys7i2DRw40GJiYtyflyxZYpLsiSee8JjvlltuMZfLZVu2bHGPk2TBwcEe47755huTZM8//3yBdZ1q6tSpJslef/1197icnBxLSEiw8PBwj20v6nLc6UaOHGmS7Ouvvz7jvPnWrVtnkmzlypVmZpaXl2cNGjSwe++9t9D5/XX50uzkPr366qvNzCw3N9eioqLs0UcftW3bthV5+bKoYffu3e55z7S/8o/npUuXusd16NDBGjRoYLm5ue5xK1asMEn20ksvnXG78o/xyZMnu8cdO3bM4uLiLCIiwnJycszM7L333jNJtnz5co/lW7du7XHMemP16tUWGBhoV199tUe78526r1977TX3Jep84vIlznOcKYMjhYeHF/sUZo0aNSRJS5cuLfFN8SEhIUpOTvZ6/gEDBqhq1aruz7fccovq1aund999t0Tr99a7776rwMBADR8+3GP8fffdJzPT8uXLPcYnJibqwgsvdH9u3bq1qlWrpq1bt55xPVFRUR6XxSpVqqThw4fr8OHD+uijj3xu+6FDhyTJY7+dydy5cxUZGek+c+lyudSrVy8tWLCgwNkcf+vbt69Wr16tjIwMrVq1ShkZGWe8dDl+/HitXLmywFCrVi2v1xseHi5JHsd8//79tXPnTn388cfucfPmzVNwcLD+/Oc/e1U3KChId955p/tzcHCw7rzzTu3Zs0dpaWmSTh4v0dHRHpeI//3vf2vjxo3q37+/19uwb98+9e3bV7Vr19brr7+ugIDi/7z069ePs2XAaQhlcKTDhw8X+4e8V69euvzyyzV48GBFRkaqd+/eWrRokU8BrX79+goODvZ6/qZNm3p8drlcatKkSZH3U/nLzz//rOjo6AL7o0WLFu7ppyrsslbNmjV14MCBM66nadOmBf6YFrUeb1SrVk2SvO7mJDc3VwsWLFDXrl21bds2bdmyRVu2bFF8fLwyMzOVmprqcxt8cf3116tq1apauHCh5s6dqw4dOrif+ixKq1atlJiYWGDw5djKf5r01J9x7969FRgY6L6EefToUb311lvq1q1bgfu0ihIdHa0qVap4jGvWrJkkuY/bgIAA9evXT0uWLNGRI0ck/e+JU2/Dn5lpwIAB2r17t/75z38qKirqjMsEBgbqkUce0YYNG7RkyRKv1gNUdIQyOM7OnTt18ODBYv8YhoWF6eOPP9YHH3yg2267TRs3blSvXr10zTXXeH02JSwszF9Ndiuqg9vSPsNzqsDAwELHl8fZiObNm0s62QWCN1atWqXdu3drwYIFatq0qXvI76aitG/4DwkJ0U033aTXXntNb731Vqnd4H+6f//735LkccxHRETommuu0b/+9S8dP35c77zzjrKysny+x8sbAwYM0OHDh7VkyRKZmebNm6c//vGPql69ulfLP/vss1q+fLnuv/9+JSUleb3efv36qUmTJpwtA/6LUAbHmTNnjiSd8cs9ICBAV199taZMmaLvv/9eTz75pFatWqUPP/xQUtEBqaQ2b97s8dnMtGXLFo8nJWvWrOnx9F6+088y+dK2mJgY7dq1q8DZpk2bNrmn+0NMTIw2b95c4Gzj2aynW7duCgwM9HiStjhz585VRESEFi9eXGDo06eP3nrrLZ8frPBV37599fXXXysrK0u9e/cu1XVJJ8+SvfXWW2rYsKH7rGS+fv36af/+/Vq+fLnmzZunatWqqUePHl7X3rVrl7Kzsz3G/fjjj5Lkcdxecsklatu2rebOnatPPvlE27dv9/rhjC+//FIPP/yw4uPj9eSTT3rdNsnzbNnSpUt9WhaoiAhlcJRVq1bp8ccfV+PGjYs9I7B///4C4/I7YT127JgkuS/bFBaSSuKf//ynRzB64403tHv3bnXr1s097sILL9QXX3yhnJwc97j/+7//K9B1hi9tu/7665Wbm6sXXnjBY/xzzz0nl8vlsf6zcf311ysjI0MLFy50jztx4oSef/55hYeHq3Pnzj7XbNiwoYYMGaL3339fzz//fIHpeXl5mjx5snbu3Knff/9db775pv74xz/qlltuKTAMGzZMWVlZevvtt89qO8+ka9euevzxx/XCCy94dRnubPz++++67bbbtH//fncv/afq2bOnKleurBkzZmj58uW66aabFBoa6nX9EydO6KWXXnJ/zsnJ0UsvvaS6deuqXbt2HvPedtttev/99zV16lTVrl3bq+Pqt99+U+/evVW5cmXNnz9flSpV8rpt+fr3768mTZro0Ucf9XlZoKKhR3+Um+XLl2vTpk06ceKEMjMztWrVKq1cuVIxMTF6++23i/3j89hjj+njjz9W9+7dFRMToz179mjGjBlq0KCBrrjiCkknA1KNGjU0c+ZMVa1aVVWqVFF8fLwaN25covbWqlVLV1xxhZKTk5WZmampU6eqSZMmGjJkiHuewYMH64033tB1112nW2+9VT/99JNef/11jxvvfW1bjx491LVrVz388MNKT09XmzZt9P7772vp0qUaMWJEgdoldccdd+ill17SoEGDlJaWptjYWL3xxhv67LPPNHXqVJ9u1j/V5MmT9dNPP2n48OHu0FWzZk1t375dixcv1qZNm9S7d2+9/fbbysrK0g033FBoncsuu0x169bV3Llz1atXr7PZ1GLl96PlrU8++aTQ3v5bt27t0eP+L7/84j5jePjwYX3//fdavHixMjIydN9993nckJ8vPDxcPXv2dN9X5uuly+joaD399NNKT09Xs2bNtHDhQm3YsEEvv/xygQDVt29fPfjgg3rrrbc0dOhQrwLWXXfdpfT0dPXq1UufffaZu0ua0xX3wEBgYKAefvhhnx66ASqscnzyE+ep/C4x8ofg4GCLioqya665xqZNm+bR9UK+07vESE1NtRtvvNGio6MtODjYoqOjrU+fPvbjjz96LLd06VJr2bKlBQUFeXRBUVxv50V1iTF//nwbM2aMRUREWFhYmHXv3t1+/vnnAstPnjzZ6tevbyEhIXb55ZfbunXrCtQsrm2nd4lhZpaVlWUjR4606Ohoq1SpkjVt2tSeeeYZr3tEL6qrjtNlZmZacnKy1alTx4KDg61Vq1aFdtvhbZcY+U6cOGF///vf7corr7Tq1atbpUqVLCYmxpKTk93dZfTo0cNCQ0OLfLOCmdmgQYOsUqVKtm/fPvc4f3aJUZSSdImRkpLinjcmJsY93uVyWbVq1eziiy+2IUOG2JdfflnsupctW2aSrF69eoV2M1GU/GN83bp1lpCQYKGhoRYTE2MvvPBCkctcf/31Jsk+//xzr9Zx6nYVN+Qral8fP37cLrzwQrrEwHnPZcbdlQAA6U9/+pO+/fbbYl+GDqD0cE8ZAEC7d+/WsmXLfH77AgD/4Z4yABXS/v37PR64OF1gYGCRr4c6n2zbtk2fffaZ/v73v6tSpUqF3tsGoGwQygBUSDfddFOxbyGIiYkp9Y5/zwUfffSRkpOT1ahRI7322mul/sQpgKJxTxmACiktLa3YtxiEhYXp8ssvL8MWAUDxCGUAAAAOwI3+AAAADnBO3FOWl5enXbt2qWrVqn5/dQ4AACgdZqasrCxFR0crIIDzQGdyToSyXbt2qWHDhuXdDAAAUAI7duxQgwYNyrsZjndOhLL817vs2LFD1apVK+fWAAAAbxw6dEgNGzYs8WvazjfnRCjLv2RZrVo1QhkAAOcYbj3yDhd4AQAAHIBQBgAA4ACEMgAAAAcglAEAADgAoQwAAMABCGUAAAAOQCgDAABwAEIZAACAAxDKAAAAHIBQBgAA4AA+h7KPP/5YPXr0UHR0tFwul5YsWXLGZVavXq1LL71UISEhatKkiV599dUSNBUAAKDi8jmUZWdnq02bNpo+fbpX82/btk3du3dX165dtWHDBo0YMUKDBw/We++953NjAQAAKiqfX0jerVs3devWzev5Z86cqcaNG2vy5MmSpBYtWujTTz/Vc889p6SkJF9XDwAAUCGV+j1la9asUWJiose4pKQkrVmzpshljh07pkOHDnkMAAAAFZnPZ8p8lZGRocjISI9xkZGROnTokH7//XeFhYUVWGbixIl69NFHiy88obp3DZhw0KvZYkcvO+M86ZO6e7lOL9pGu05ZZ9m2S/KubU6t5ddj/3yo5W29c/zYZxt9rOfHWvx++1gLRXLk05djxozRwYMH3cOOHTvKu0kAAAClqtTPlEVFRSkzM9NjXGZmpqpVq1boWTJJCgkJUUhISGk3DQAAwDFK/UxZQkKCUlNTPcatXLlSCQkJpb1qAACAc4bPoezw4cPasGGDNmzYIOlklxcbNmzQ9u3bJZ289DhgwAD3/HfddZe2bt2qBx98UJs2bdKMGTO0aNEijRw50j9bAAAAUAH4HMrWrVuntm3bqm3btpKkUaNGqW3btho/frwkaffu3e6AJkmNGzfWsmXLtHLlSrVp00aTJ0/W3//+d7rDAAAAOIXP95R16dJFZlbk9MJ66+/SpYu+/vprX1cFAABw3nDk05cAAADnG0IZAACAAxDKAAAAHIBQBgAA4ACEMgAAAAcglAEAADgAoQwAAMABCGUAAAAOQCgDAABwAEIZAACAAxDKAAAAHIBQBgAA4ACEMgAAAAcglAEAADgAoQwAAMABCGUAAAAOQCgDAABwAEIZAACAAxDKAAAAHIBQBgAA4ACEMgAAAAcglAEAADgAoQwAAMABCGUAAAAOEFTeDSip2KPzvJovvXSbAQAA4BecKQMAAHAAQhkAAIADEMoAAAAc4Jy9pwwVhzf3B6aXfjMAAChXnCkDAABwAEIZAACAAxDKAAAAHIBQBgAA4ACEMgAAAAcglAEAADgAoQwAAMABCGUAAAAOQCgDAABwAEIZAACAAxDKAAAAHIBQBgAA4ACEMgAAAAcglAEAADgAoQwAAMABCGUAAAAOQCgDAABwAEIZAACAAxDKAAAAHIBQBgAA4ACEMgAAAAcglAEAADgAoQwAAMABCGUAAAAOQCgDAABwAEIZAACAAxDKAAAAHIBQBgAA4ACEMgAAAAcglAEAADgAoQwAAMABShTKpk+frtjYWIWGhio+Pl5r164tdv6pU6fqoosuUlhYmBo2bKiRI0fq6NGjJWowAABAReRzKFu4cKFGjRqllJQUrV+/Xm3atFFSUpL27NlT6Pzz5s3T6NGjlZKSoh9++EH/+Mc/tHDhQo0dO/asGw8AAFBR+BzKpkyZoiFDhig5OVktW7bUzJkzVblyZc2ePbvQ+T///HNdfvnl6tu3r2JjY3XttdeqT58+Zzy7BgAAcD7xKZTl5OQoLS1NiYmJ/ysQEKDExEStWbOm0GU6deqktLQ0dwjbunWr3n33XV1//fVFrufYsWM6dOiQxwAAAFCRBfky8759+5Sbm6vIyEiP8ZGRkdq0aVOhy/Tt21f79u3TFVdcITPTiRMndNdddxV7+XLixIl69NFHfWkaAADAOa3Un75cvXq1nnrqKc2YMUPr16/Xm2++qWXLlunxxx8vcpkxY8bo4MGD7mHHjh2l3UwAAIBy5dOZsjp16igwMFCZmZke4zMzMxUVFVXoMuPGjdNtt92mwYMHS5JatWql7Oxs3XHHHXr44YcVEFAwF4aEhCgkJMSXpgEAAJzTfDpTFhwcrHbt2ik1NdU9Li8vT6mpqUpISCh0mSNHjhQIXoGBgZIkM/O1vQAAABWST2fKJGnUqFEaOHCg2rdvr44dO2rq1KnKzs5WcnKyJGnAgAGqX7++Jk6cKEnq0aOHpkyZorZt2yo+Pl5btmzRuHHj1KNHD3c4AwAAON/5HMp69eqlvXv3avz48crIyFBcXJxWrFjhvvl/+/btHmfGHnnkEblcLj3yyCP65ZdfVLduXfXo0UNPPvmk/7YCAADgHOdzKJOkYcOGadiwYYVOW716tecKgoKUkpKilJSUkqwKAADgvMC7LwEAAByAUAYAAOAAhDIAAAAHIJQBAAA4AKEMAADAAQhlAAAADkAoAwAAcABCGQAAgAMQygAAAByAUAYAAOAAJXrNEgDAWWKPzjvjPOml3wwAZ4EzZQAAAA5AKAMAAHAAQhkAAIADEMoAAAAcgFAGAADgAIQyAAAAByCUAQAAOAChDAAAwAEIZQAAAA5AKAMAAHAAQhkAAIADEMoAAAAcgFAGAADgAIQyAAAAByCUAQAAOAChDAAAwAEIZQAAAA5AKAMAAHAAQhkAAIADEMoAAAAcgFAGAADgAIQyAAAAByCUAQAAOAChDAAAwAEIZQAAAA5AKAMAAHAAQhkAAIADEMoAAAAcgFAGAADgAEHl3QAA5Sf26Dyv5ksv41rnC2/2WXrpN+OcwTGGio4zZQAAAA5AKAMAAHAAQhkAAIADEMoAAAAcgFAGAADgAIQyAAAAByCUAQAAOAChDAAAwAEIZQAAAA5AKAMAAHAAQhkAAIADEMoAAAAcgFAGAADgAIQyAAAAByCUAQAAOAChDAAAwAEIZQAAAA5AKAMAAHCAEoWy6dOnKzY2VqGhoYqPj9fatWuLnf+3337T3XffrXr16ikkJETNmjXTu+++W6IGAwAAVERBvi6wcOFCjRo1SjNnzlR8fLymTp2qpKQk/ec//1FERESB+XNycnTNNdcoIiJCb7zxhurXr6+ff/5ZNWrU8Ef7AQAAKgSfQ9mUKVM0ZMgQJScnS5JmzpypZcuWafbs2Ro9enSB+WfPnq39+/fr888/V6VKlSRJsbGxZ9dqAACACsany5c5OTlKS0tTYmLi/woEBCgxMVFr1qwpdJm3335bCQkJuvvuuxUZGalLLrlETz31lHJzc4tcz7Fjx3To0CGPAQAAoCLzKZTt27dPubm5ioyM9BgfGRmpjIyMQpfZunWr3njjDeXm5urdd9/VuHHjNHnyZD3xxBNFrmfixImqXr26e2jYsKEvzQQAADjnlPrTl3l5eYqIiNDLL7+sdu3aqVevXnr44Yc1c+bMIpcZM2aMDh486B527NhR2s0EAAAoVz7dU1anTh0FBgYqMzPTY3xmZqaioqIKXaZevXqqVKmSAgMD3eNatGihjIwM5eTkKDg4uMAyISEhCgkJ8aVpAADgHJSXl6ecnJzybkapOT0DFcenUBYcHKx27dopNTVVPXv2lHRyZ6ampmrYsGGFLnP55Zdr3rx5ysvLU0DAyRNzP/74o+rVq1doIAMAAOeHnJwcbdu2TXl5eeXdlFJVo0YNRUVFyeVyFTufz09fjho1SgMHDlT79u3VsWNHTZ06VdnZ2e6nMQcMGKD69etr4sSJkqShQ4fqhRde0L333qt77rlHmzdv1lNPPaXhw4eXYLMAAEBFYGbavXu3AgMD1bBhQ/eJm4rEzHTkyBHt2bNH0smrh8XxOZT16tVLe/fu1fjx45WRkaG4uDitWLHCffP/9u3bPXZsw4YN9d5772nkyJFq3bq16tevr3vvvVcPPfSQr6sGAAAVxIkTJ3TkyBFFR0ercuXK5d2cUhMWFiZJ2rNnjyIiIoq9lOlzKJOkYcOGFXm5cvXq1QXGJSQk6IsvvijJqgAAQAWU3zXW+XArU37oPH78eLGhrOKdKwQAAOeMM91nVRF4u42EMgAAAAcglAEAADhAie4pAwAAKA2xo5eV6frSJ3Uv0/UVhzNlAAAAPpo+fbpiY2MVGhqq+Ph4rV279qxrEsoAAAB8sHDhQo0aNUopKSlav3692rRpo6SkJHd/ZCVFKAMAAPDBlClTNGTIECUnJ6tly5aaOXOmKleurNmzZ59VXUIZAACAl3JycpSWlqbExET3uICAACUmJmrNmjVnVZtQBgAA4KV9+/YpNzfX/SajfJGRkcrIyDir2oQyAAAAByCUAQAAeKlOnToKDAxUZmamx/jMzExFRUWdVW1CGQAAgJeCg4PVrl07paamusfl5eUpNTVVCQkJZ1WbzmMBAAB8MGrUKA0cOFDt27dXx44dNXXqVGVnZys5Ofms6hLKAACAYziph/2i9OrVS3v37tX48eOVkZGhuLg4rVixosDN/74ilAEAAPho2LBhGjZsmF9rck8ZAACAAxDKAAAAHIBQBgAA4ACEMgAAAAcglAEAADgAoQwAAMABCGUAAAAOQCgDAABwAEIZAACAA9CjPwAAcI4J1ct4fQfLdn3F4EwZAACADz7++GP16NFD0dHRcrlcWrJkiV/qEsoAAAB8kJ2drTZt2mj69Ol+rcvlSwAAAB9069ZN3bp183tdQhlwjok9Os+r+dJLtxnAOY3fIzgRly8BAAAcgFAGAADgAIQyAAAAByCUAQAAOAA3+gMAAPjg8OHD2rJli/vztm3btGHDBtWqVUuNGjUqcV1CGQAAcA4H9bBflHXr1qlr167uz6NGjZIkDRw4UK+++mqJ6xLKAAAAfNClSxeZmd/rck8ZAACAAxDKAAAAHIBQBgAA4ACEMgAAAAcglAEAgHJTGjfMO01eXp5X8/H0JQAAKHOVKlWSy+XS3r17VbduXblcrvJukt+ZmXJycrR3714FBAQoODi42PkJZQAAoMwFBgaqQYMG2rlzp9LT08u7OaWqcuXKatSokQICir9ASSgDAADlIjw8XE2bNtXx48fLuymlJjAwUEFBQV6dCSSUAQCAchMYGKjAwMDyboYjcKM/AACAAxDKAAAAHIBQBgAA4ACEMgAAAAcglAEAADgAoQwAAMABCGUAAAAOQCgDAABwADqPBeA4sUfneTVfeuk2AwDKFGfKAAAAHIBQBgAA4ACEMgAAAAcglAEAADgAoQwAAMABCGUAAAAOUKJQNn36dMXGxio0NFTx8fFau3atV8stWLBALpdLPXv2LMlqAQAAKiyfQ9nChQs1atQopaSkaP369WrTpo2SkpK0Z8+eYpdLT0/X/fffryuvvLLEjQUAAKiofA5lU6ZM0ZAhQ5ScnKyWLVtq5syZqly5smbPnl3kMrm5uerXr58effRRXXDBBWdcx7Fjx3To0CGPAQAAoCLzKZTl5OQoLS1NiYmJ/ysQEKDExEStWbOmyOUee+wxRURE6Pbbb/dqPRMnTlT16tXdQ8OGDX1pJgAAwDnHp1C2b98+5ebmKjIy0mN8ZGSkMjIyCl3m008/1T/+8Q/NmjXL6/WMGTNGBw8edA87duzwpZkAAADnnFJ992VWVpZuu+02zZo1S3Xq1PF6uZCQEIWEhJRiywAAAJzFp1BWp04dBQYGKjMz02N8ZmamoqKiCsz/008/KT09XT169HCPy8vLO7nioCD95z//0YUXXliSdgMAAFQoPl2+DA4OVrt27ZSamuoel5eXp9TUVCUkJBSYv3nz5vr222+1YcMG93DDDTeoa9eu2rBhA/eKAQAA/JfPly9HjRqlgQMHqn379urYsaOmTp2q7OxsJScnS5IGDBig+vXra+LEiQoNDdUll1zisXyNGjUkqcB4AACA85nPoaxXr17au3evxo8fr4yMDMXFxWnFihXum/+3b9+ugABeFAAAAOCLEt3oP2zYMA0bNqzQaatXry522VdffbUkqwQAAKjQOKUFAADgAIQyAAAAByCUAQAAOAChDAAAwAEIZQAAAA5AKAMAAHAAQhkAAIADEMoAAAAcgFAGAADgAIQyAAAAByCUAQAAOAChDAAAwAEIZQAAAA5AKAMAAHAAQhkAAIADEMoAAAAcIKi8GwAAqLhij8474zzppd8M4JzAmTIAAAAHIJQBAAA4AKEMAADAAQhlAAAADkAoAwAAcABCGQAAgAMQygAAAByAUAYAAOAAhDIAAAAHIJQBAAA4AKEMAADAAQhlAAAADkAoAwAAcABCGQAAgAMQygAAAByAUAYAAOAAhDIAAAAHIJQBAAA4AKEMAADAAQhlAAAADkAoAwAAcABCGQAAgAMQygAAAByAUAYAAOAAhDIAAAAHIJQBAAA4AKEMAADAAQhlAAAADkAoAwAAcABCGQAAgAMQygAAAByAUAYAAOAAQeXdAJSd2KPzzjhPeuk3AwAAFIIzZQAAAA5AKAMAAHAAQhkAAIADEMoAAAAcgFAGAADgAIQyAAAAByCUAQAAOAChDAAAwAEIZQAAAA5QolA2ffp0xcbGKjQ0VPHx8Vq7dm2R886aNUtXXnmlatasqZo1ayoxMbHY+QEAAM5HPoeyhQsXatSoUUpJSdH69evVpk0bJSUlac+ePYXOv3r1avXp00cffvih1qxZo4YNG+raa6/VL7/8ctaNBwAAqCh8DmVTpkzRkCFDlJycrJYtW2rmzJmqXLmyZs+eXej8c+fO1V/+8hfFxcWpefPm+vvf/668vDylpqaedeMBAAAqCp9CWU5OjtLS0pSYmPi/AgEBSkxM1Jo1a7yqceTIER0/fly1atUqcp5jx47p0KFDHgMAAEBF5lMo27dvn3JzcxUZGekxPjIyUhkZGV7VeOihhxQdHe0R7E43ceJEVa9e3T00bNjQl2YCAACcc8r06ctJkyZpwYIFeuuttxQaGlrkfGPGjNHBgwfdw44dO8qwlQAAAGUvyJeZ69Spo8DAQGVmZnqMz8zMVFRUVLHLPvvss5o0aZI++OADtW7duth5Q0JCFBIS4kvTAAAAzmk+nSkLDg5Wu3btPG7Sz79pPyEhocjl/vrXv+rxxx/XihUr1L59+5K3FgAAoILy6UyZJI0aNUoDBw5U+/bt1bFjR02dOlXZ2dlKTk6WJA0YMED169fXxIkTJUlPP/20xo8fr3nz5ik2NtZ971l4eLjCw8P9uCkAAADnLp9DWa9evbR3716NHz9eGRkZiouL04oVK9w3/2/fvl0BAf87Affiiy8qJydHt9xyi0edlJQUTZgw4exaDwAAUEH4HMokadiwYRo2bFih01avXu3xOT09vSSrAAAAOK/w7ksAAAAHIJQBAAA4AKEMAADAAQhlAAAADkAoAwAAcIASPX0JAOeS2KPzzjhPeuk3AwCKxZkyAAAAByCUAQAAOAChDAAAwAEIZQAAAA5AKAMAAHAAQhkAAIADEMoAAAAcgFAGAADgAIQyAAAAByCUAQAAOAChDAAAwAEIZQAAAA5AKAMAAHAAQhkAAIADEMoAAAAcgFAGAADgAIQyAAAAByCUAQAAOAChDAAAwAEIZQAAAA5AKAMAAHAAQhkAAIADEMoAAAAcgFAGAADgAIQyAAAAByCUAQAAOAChDAAAwAEIZQAAAA5AKAMAAHAAQhkAAIADEMoAAAAcgFAGAADgAIQyAAAAByCUAQAAOAChDAAAwAEIZQAAAA5AKAMAAHAAQhkAAIADEMoAAAAcgFAGAADgAIQyAAAAByCUAQAAOAChDAAAwAEIZQAAAA5AKAMAAHAAQhkAAIADEMoAAAAcgFAGAADgAIQyAAAAByCUAQAAOAChDAAAwAEIZQAAAA5AKAMAAHCAEoWy6dOnKzY2VqGhoYqPj9fatWuLnX/x4sVq3ry5QkND1apVK7377rslaiwAAEBF5XMoW7hwoUaNGqWUlBStX79ebdq0UVJSkvbs2VPo/J9//rn69Omj22+/XV9//bV69uypnj176t///vdZNx4AAKCi8DmUTZkyRUOGDFFycrJatmypmTNnqnLlypo9e3ah80+bNk3XXXedHnjgAbVo0UKPP/64Lr30Ur3wwgtn3XgAAICKIsiXmXNycpSWlqYxY8a4xwUEBCgxMVFr1qwpdJk1a9Zo1KhRHuOSkpK0ZMmSItdz7NgxHTt2zP354MGDkqRDhw65x+UdO+JVm09dpjje1PO2lo7Zmecph3ZRy7d61KoYtbytR62KUcvbek6t5dXfj5PFzola+dts5uXy5zvzwS+//GKS7PPPP/cY/8ADD1jHjh0LXaZSpUo2b948j3HTp0+3iIiIIteTkpJikhgYGBgYGBgqwLBjxw5f4sZ5y6czZWVlzJgxHmfX8vLytH//ftWuXVsul6vQZQ4dOqSGDRtqx44dqlat2lmt/3yo5eS2UYta1OL3m1oVo5aZKSsrS9HR0We1vvOFT6GsTp06CgwMVGZmpsf4zMxMRUVFFbpMVFSUT/NLUkhIiEJCQjzG1ahRw6s2VqtWzS9faOdLLX/Xoxa1qOWcWv6uRy1qlaRW9erV/bKu84FPN/oHBwerXbt2Sk1NdY/Ly8tTamqqEhISCl0mISHBY35JWrlyZZHzAwAAnI98vnw5atQoDRw4UO3bt1fHjh01depUZWdnKzk5WZI0YMAA1a9fXxMnTpQk3XvvvercubMmT56s7t27a8GCBVq3bp1efvll/24JAADAOcznUNarVy/t3btX48ePV0ZGhuLi4rRixQpFRkZKkrZv366AgP+dgOvUqZPmzZunRx55RGPHjlXTpk21ZMkSXXLJJf7bCp285JmSklLgsie1yqYetahFLefU8nc9alGrNGvhf1xmPKcKAABQ3nj3JQAAgAMQygAAAByAUAYAAOAAhDIAAAAHIJQBAAA4AKEMAADAARz57kuUjpycHC1ZskRr1qxRRkaGpJOvwerUqZNuvPFGBQcHe11r3759mj17dqG1Bg0apLp165ZLu5y6jfCNU48JJzsfjtfS2sbs7GwtWrRIW7ZsUb169dSnTx/Vrl27XNrl1FooG+dsP2Xff/+9XnjhhQIHW0JCgoYNG6aWLVuWuPbZ/oKWVq2zsWXLFiUlJWnXrl2Kj493d/abmZmpL7/8Ug0aNNDy5cvVpEmTM9b66quvlJSUpMqVKysxMdGjVmpqqo4cOaL33ntP7du3L9N2OXUbJeeGDCfWcuoxcToz0+rVq92/20lJSapUqZJPNfy1z86H49Wf29iyZUt9+umnqlWrlnbs2KGrrrpKBw4cULNmzfTTTz8pKChIX3zxhRo3blym7XJqrVOtXbu20L+7HTt29KkOimDnoHfffdeCg4Ptsssus5SUFJsxY4bNmDHDUlJSrFOnThYSEmIrVqzwul6LFi3s119/NTOz7du3W2xsrFWvXt06dOhgtWrVsoiICNu6dWuZ1zIz++6772zo0KEWFxdnUVFRFhUVZXFxcTZ06FD77rvvvK6TmJhoN954ox08eLDAtIMHD9qNN95o1157rVe14uPj7Y477rC8vLwC0/Ly8uyOO+6wyy67rMzb5dRt3Lx5s11wwQUWGhpqnTt3tltvvdVuvfVW69y5s4WGhlqTJk1s8+bN1Povpx4T3bp1s99++83MzH799VeLj483l8tldevWtYCAAGvevLnt2bPHq1pm/t1n58Px6s9tdLlclpmZaWZm/fr1s06dOrl/tllZWZaYmGh9+vQp83Y5tZaZWWZmpl1xxRXmcrksJibGOnbsaB07drSYmBhzuVx2xRVXuPcpSu6cDGWtW7e2cePGFTk9JSXFWrVq5XU9f/6C+rOWP8NnWFiYffvtt0VO37hxo4WFhXlVKzQ01H744Ycip//www8WGhpa5u1y6jY6NWQ4tZZTj4lTf7eHDh1qLVu2dP+DtWPHDmvXrp3dddddXtUy8+8+Ox+OV39u46k/ywsuuMDef/99j+mfffaZNWzYsMzb5dRaZmY333yzJSQk2KZNmwpM27Rpk3Xq1MluueUWr+uhcOdkKAsNDS30wMi3adMmnw42f/6C+rOWP8NnvXr17J133ily+ttvv2316tXzqlZsbKy99tprRU5/7bXXLCYmpszb5dRtdGrIcGotpx4Tp/5uX3TRRbZ06VKP6R988IE1btzYq1pm/t1n58Px6s9tdLlc7rOa0dHRBdqYnp7u9d8Qf7bLqbXMzMLDw239+vVFTl+3bp2Fh4d7XQ+FOydv9I+NjdWyZct00UUXFTp92bJliomJ8ammy+WSJB09elT16tXzmFa/fn3t3bu3zGv9+OOP6tevX5HT+/Tpo6efftqrWoMHD9aAAQM0btw4XX311QXuL3jiiSd0zz33eFXr/vvv1x133KG0tLRCa82aNUvPPvtsmbfLqdtYo0YNpaen65JLLil0enp6umrUqEGt/3LqMSH973f7wIEDuvDCCz2mNWnSRLt27fK6lj/32flwvPpzGyXp6quvVlBQkA4dOqT//Oc/Hm38+eefvb7315/tcmot6eQLyA8dOlTk9KysLF5O7g/lnQpLYtGiRRYUFGQ9evSwadOm2YIFC2zBggU2bdo0u+GGGyw4ONjeeOMNr+u5XC5r1aqVtW3b1sLDwwss+9FHH1n9+vXLvFbz5s1t8uTJRU6fPHmyXXTRRV7VMjObNGmS1atXz1wulwUEBFhAQIC5XC6rV6+ePf30017XMTNbsGCBxcfHW1BQkLlcLnO5XBYUFGTx8fG2cOFCn2r5s11O3MZx48ZZzZo1bcqUKfbNN99YRkaGZWRk2DfffGNTpkyxWrVqWUpKCrVO4cRjwuVy2fXXX29/+tOfrGbNmgXOwH3xxRcWGRnpdT1/77OKfrz6cxsnTJjgMZx+G8j9999vvXv3LvN2ObnWX/7yF4uJibE333zT43L0wYMH7c0337TY2FgbNmyYTzVR0Dn79OXnn3+uv/3tb4U+BXLvvfcqISHB61qPPvqox+fLLrtMSUlJ7s8PPPCAdu7cqfnz55dprcWLF6tv377q1q1boU/PrFixQvPmzdPNN998xlqn2rZtm8c+8+YJo6IcP35c+/btkyTVqVPH56fPSqtdTtvGp59+WtOmTVNGRob7bIuZKSoqSiNGjNCDDz5IrUI46ZhITk72+NytWzfdeuut7s8PPvigNm7cqBUrVnhdszT2WUU+XvP583vHn/zZLqfVOnbsmEaMGKHZs2frxIkT7qdmc3JyFBQUpNtvv13PPfccZ8vO0jkbys4X/gyfKH9OChnnQq1zSXZ2tgIDAxUaGurzsk7dZxwXON2hQ4eUlpbm8bNs166dqlWrVs4tqxgIZZAkLV26VAcPHtSAAQPOutaMGTO0b98+jR8/3lHtcuo2wjdOPSac7Hw4Xv25jdRCeamQoWzs2LHKyMjQ7Nmz/VLvfPhFaN68uTZv3qzc3NyzrnX11Vdr27Zt2rp1q6Pa5dRtdGrIcGotpx4T/g54/qx3Phyv/txGavlu3bp1OnLkiK666iq/1DtfVchQNmDAAO3cuVOrVq3ySz2n/iL4O3yifDg1ZDi1llP5exudus84LlCYFi1a6Mcff+RneZYqZCg7XwwcOFA7duzwW/gEAKAkdu3apePHj/vcHRU8nbOhjBetlkxZvLfswIEDeuedd3y6JJGXl6eAgIBCx+/cuVONGjUqUVu2bdvmfjdhUX0lFeWbb75RWlqaunTpogsuuEDfffedpk+frry8PP3pT3/yeKoW/ldax+rZHBNOdj68eN3f27hz507VqFFD4eHhHuOPHz+uNWvWlOhSnJ3lO1FL6+d4tu1CGSnD7jf8Zu3atVazZk2rX7++DRw40B588EF78MEHbeDAgdagQQOrVauWffXVVz7X3bFjh2VlZRUYn5OTYx999JFXNZ599llLT0/3ed2lrSzfW7ZhwwYLCAjwat6DBw/an//8ZwsNDbWIiAgbN26cnThxwj09IyPD61pDhw51//yOHDliN998s7tPqoCAAOvatWuhP9/C/Otf/7LAwECrXbu2hYeH28qVK61GjRqWmJhoSUlJFhgYaHPnzvWqVr4vv/zSpk6daqNHj7bRo0fb1KlT7csvv/SphtnJ99Zt3brVjh8/bmZmx44dswULFthrr71me/fu9bne6bp27eqXY3jr1q32/vvvF9uje2H8eaz685jIl5ubW+T4n3/+2adapyvpPvPnOybz+et4PZ0TtnHXrl3WoUMHCwgIsMDAQLvttts8jgNfvnf8+U5Uf26jv9/Vmm/37t22ZMkSmzlzps2cOdOWLFliu3fv9rkOCndOhjJ/v2jVn7+gLpfLAgMDLTEx0RYsWGDHjh3zuh2Feeedd2zcuHH26aefmplZamqqdevWzZKSkuyll17yuo4/31t28ODBYodPPvnE6/01fPhwa9asmS1evNhmzZplMTEx1r17d/d+y8jIMJfL5VWtgIAA9x/rMWPGWIMGDWzVqlWWnZ1tn376qV144YU2evRor2pdeuml9sQTT5iZ2fz5861GjRr22GOPuac/++yzFhcX51Utf4aMTZs2WUxMjAUEBFiTJk1s69at1q5dO6tSpYpVrlzZ6tSpYz/++KNXtZYuXVroEBgYaC+88IL7szf8GX78eaz685jw5z8QZv7dZ/58x6RTQ7E/t3HAgAEWHx9vX331la1cudLatWtn7du3t/3795uZb987/nwnqj+30d/vaj18+LD169fPAgMDLSgoyCIiIiwiIsKCgoIsMDDQ+vfvb9nZ2V7XQ+HOyVDm7xet+vsX9JVXXrEbb7zRKlWqZLVr17Z7773X5/8KzcxmzpxpQUFB1q5dO6tWrZrNmTPHqlataoMHD7Y777zTwsLCbOrUqV7V8ud7y07tGb2wIX+6Nxo1amQffvih+/PevXutY8eOdu2119rRo0d9DsT5X0KXXHKJzZs3z2P60qVLrVmzZl7VqlKlim3bts3MTgb9SpUq2caNG93Tf/rpJ6/3lz9Dxo033mg33HCDbdy40UaMGGEtWrSwG2+80XJycuzo0aPWo0cP69+/v1e18n9O+T19FzZ4u+/9GX78faz665jw5z8QZv7dZ/58x6RTQ7E/tzE6OtrjrF/+705cXJz9+uuvJf7eOdt3ovpzG/39rtbbb7/dmjZtaitWrPD4Z+TEiRP23nvvWbNmzWzw4MFe10PhzslQ5u8XrZbWL2hmZqY9/fTT1rx5cwsICLAOHTrYyy+/bIcOHfKqVsuWLe3ll182M7NVq1ZZaGioTZ8+3T39lVdesRYtWnhVq3bt2rZ69eoip3/44YdWu3Ztr2pVq1bNnn76aVu9enWhw6xZs7zeX2FhYe7/3vIdOnTIEhIS7A9/+INt3brVp32ffzq+Tp069u9//9tjenp6utdfaFFRUbZu3TozM9u/f7+5XC6P8Lh27VqLioryqpY/Q0bdunXt66+/NrOT/7m6XC775JNP3NM/++wza9SokVe1rrvuOuvevXuBsx5BQUH23XffeVUjnz/Djz+PVX8eE/78ByK/bf7aZ/588bpTQ7E/t7FKlSoFzigfP37cevbsaa1bt7aNGzeW6HsnIiKi0GMsJCTEq1r+3EZ/tsvMrEaNGvbZZ58VOf3TTz+1GjVqeF0PhTsnQ9kLL7xgISEhNnz4cFu6dKl98cUX9sUXX9jSpUtt+PDhFhYW5hFezsTfv6CFndr/+OOPbeDAgValShWrUqWKV7XCwsI87lGpVKmSx39R27Zts8qVK3tVy5/vLevSpUux7wzcsGGD12cMLrroIlu2bFmB8VlZWZaQkGBt2rTxad/feeedNnLkSIuIiLD333/fY3paWprVqVPHq1r9+/e3+Ph4e/31161Hjx6WlJRkl112mf3www+2adMm69y5s9dnC/wZMk4/JsLDw23Lli3uz9u3b/fpi3bKlCnWsGFDjz8EJQ1l/go//jxW/XlM+PMfiPy2+Wuf+fMdk04Nxf7cxlatWhX6fuT87/1GjRr59L3jr3ei+nMb/f2u1mrVqhV7r/batWutWrVqXtdD4c7JUGbm3xet+vMX9NTT9YU5ePCg++zXmTRo0MA+/vhjMzP75ZdfzOVyeQSY1atXW4MGDbyqdfToUbvrrrssODjYAgICLDQ01EJDQy0gIMCCg4Nt6NChdvToUa9qvfzyyzZt2rQip2dkZNiECRO8qnXPPfcUGW4OHTpk8fHxXu/7zp07W5cuXdzDrFmzPKY//vjj1rlzZ69qZWRk2DXXXGPh4eGWlJRkv/32mw0bNsx9Sa9p06YeYag4/gwZF154oceZsRkzZniceU1LS/P6DF6+r7/+2lq2bGl33HGHZWdnlziU+Sv8FHWsulwun49Vfx4T/vwHwsy/+8zMfy9ed2ooNvPfNj744INF3pt1/Phxu+GGG7z+WQ4aNMhjOP3vzwMPPGBJSUlet81f2+jvdvXt29fatm1b6FnU9evXW7t27axfv35e10PhztkuMfL540WrDz30kDZs2KD33nuvwLQTJ07o5ptv1jvvvKO8vLwz1goICFBGRoYiIiJ8bsfphg0bpvfff18DBw7U22+/rRYtWujLL7/Uc889J5fLpQceeEAdOnTQP/7xD69rOu29ZQcOHNCuXbt08cUXFzo9KytL69evV+fOnc96XVu3blVwcLAaNGhwVjWOHDmi5s2bKygoyKtl/Pki37vuukvt27fX4MGDC50+adIkffLJJ1q2bJn3GyXp999/18iRI7Vq1Spt3bpVGzduVMuWLb1evkuXLu6XTUtSv379PNr4xBNP6IMPPtDq1au9rnno0CGtW7dOmZmZkqTIyEi1b9/eL8eqmcnlcvl0TAwfPly7d+/W4sWLC0zLysrSNddco6+++srrzjNLY59JZ/+OSX8er07dxhMnTujIkSNFHksnTpzQL7/84pc+t0r6TtTSfleor+06cOCA+vbtq/fee081a9Z0/43bs2ePfvvtNyUlJWnevHmqUaOGX9t5vjnnQ5k/lOUvqC+ys7M1cuRIrVmzRp06ddLzzz+vv/3tb3r44Yd1/Phxde7cWQsXLvRLAETpK4tAvG3bNoWGhqpevXolWv7tt9/Whx9+qDFjxvj1uPJHIA4ODtY333yjFi1anHV7SlKrLP+BkPyzz87G6aG4NI7X8t5GJ9q9e7defPFFffrpp9q9e7cCAgJ0wQUXqGfPnho0aJACAwPLpVa+H374QV988UWB/gObN2/ucy0URCjzwo4dO5SSkuKX1xn5o9bRo0d1/PhxVa1a1aflfv/9d6WlpalWrVoFzoIcPXpUixYt8rrDV2r5Viv/iyz/y2vTpk2aNm2ajh07pv79++sPf/iDV3XKotbUqVOVk5NT4lqdOnXSRRddVOJ2jRo1qtDx06ZNU//+/VW7dm1J0pQpU8q01umys7O1aNEid2ecffr0cdcribOpt379etWsWdN9NmXOnDmaOXOmtm/frpiYGA0bNky9e/f2qtY999yjW2+9VVdeeWWJt6U0aknSCy+8oLVr1+r6669X7969NWfOHE2cOFF5eXm66aab9Nhjj3l9BtuJtdatW6fExEQ1adJEYWFhWrNmjfr27aucnBy99957atmypVasWOHVd78/a6EMleOl03OGL52hlmWt7du3W3Jyslfz/uc//3H3MxQQEGBXXXWV/fLLL+7pvjw5VlitXbt2UasIy5cvt+DgYKtVq5aFhoba8uXLrW7dupaYmGh/+MMfLDAw0FJTU6n1Xy6Xy+Li4jzuBevSpYu5XC7r0KGDdenSxbp27VrmtVq0aGG//vqrmZ383YuNjbXq1atbhw4drFatWhYREVHgQQBf6sXExJS4XuvWrW3lypVmZjZr1iwLCwuz4cOH24svvmgjRoyw8PBw+8c//uFVrVPvm5w0adJZdQzqz1qPP/64Va1a1W6++WaLioqySZMmWe3ate2JJ56wp556yurWrWvjx48/p2tdfvnlHvfjzpkzx+Lj483s5FPgcXFxNnz48DKvle/YsWO2cOFCGzFihPXu3dt69+5tI0aMsEWLFp11n5w4iVBmRXeimT8899xzXv8B9metM/El4PXs2dO6d+9ue/futc2bN1v37t2tcePG7if5fAkZ1PKtVkJCgj388MNmdrIj2po1a9rYsWPd00ePHm3XXHMNtf5r4sSJ1rhx4wIhriQPIPiz1qlPVvfr1886derk7jE9KyvLEhMTrU+fPuVSLywszP0WhrZt2xZ4mGju3LnWsmVLr9v1wQcf2L333mt16tSxSpUq2Q033GDvvPNOkW8zKItaF154of3rX/8ys5PffYGBgfb666+7p7/55pvWpEmTc7pWWFiY/fTTT+7Pubm5VqlSJcvIyDAzs/fff9+io6PLvJZZ6bw1AgURysy/nWj6s5Y/A15ERIRH56d5eXl21113WaNGjeynn37yKWRQy7da1apVc39Z5ebmWlBQkMcTTN9++63Xj6afD7XMTj5e36xZM7vvvvssJyfHzEoWpPxZ69QQdcEFFxR4kvCzzz6zhg0blku92rVru/vVi4iIsA0bNnhM37JlS4k6Hc3JybGFCxe6Xy0WHR1tY8eO9fqPrz9rFdZF0KldbKSnp3vdRZBTa8XExLjf3mJ28m0zLpfLjhw5YmYnu0HytmN0f9Yy8+/bBlA0Qpmd7Dx2yZIlRU7/+uuvvf4D7M9a/gx4VatWte+//77A+Lvvvtvd9Qa1SqdWtWrVPLrPCA8P9/gPNj093esvx/OhVr6srCwbMGCAtW7d2r799lurVKlSiUKZv2qd2udWdHR0gZ7Xfd1Gf9br37+/3X777WZm9uc//9keeeQRj+lPPfWUtWrVyut2Fdatz88//2wpKSnu13yVda3GjRvb8uXLzczsxx9/tICAAFu0aJF7+rJlyyw2NvacrnXvvffaJZdcYsuXL7dVq1ZZ165drUuXLu7pK1assAsvvLDMa5n5920DKBqhzMx69Ohh48aNK3K6L52h+rOWPwNehw4d7J///Geh0+6++26rUaMGtUqpVuvWrd1f2mYnzxrlv0zc7GTHwt6+7uR8qHW6+fPnW2RkpAUEBJQ4lPmjlsvlslatWlnbtm0tPDy8QN+GH330kdWvX79c6v3yyy8WGxtrV111lY0aNcrCwsLsiiuusCFDhthVV11lwcHBhfaxVlS7iutrMS8vr8BZvbKo9cgjj1jdunVt8ODB1rhxYxs9erQ1atTIXnzxRZs5c6Y1bNjQRo4ceU7XysrKsltvvdXd/2anTp087it87733PAJfWdUy8+/bBlA0Qpmd/INx6h+U0x0+fLjYHq5Lq5Y/A95TTz1l3bp1K3L60KFDqVVKtV588UX7v//7vyKnjxkzxn2Wg1qF27Fjhy1ZssQOHz5c4hpnW2vChAkew4oVKzym33///da7d+9yq3fgwAF76KGHrGXLlhYaGmrBwcEWExNjffv2LbYn9tPFxsbavn37vJ6/rGrl5ubak08+aX/84x/tqaeesry8PJs/f741bNjQateubYMGDfL6Z+rUWvl+//13r1/UXla1/Pm2ARSNLjEc7JNPPlF2drauu+66QqdnZ2dr3bp1fusXCQCAojz99NOaNm2aMjIy3J0Cm5mioqI0YsQIPfjgg+XcwnMfoQwAAHittN82cD4jlAEAgLPiz07Wz2eEMgAAcFa++eYbXXrppV6/+xWF8+4dEgAA4Lz19ttvFzt969atZdSSio0zZQAAoFgBAQFyuVwqLjK4XC7OlJ2lgPJuAAAAcLZ69erpzTffVF5eXqHD+vXry7uJFQKhDAAAFKtdu3ZKS0srcvqZzqLBO9xTBgAAivXAAw8oOzu7yOlNmjTRhx9+WIYtqpi4pwwAAMABuHwJAADgAIQyAAAAByCUAQAAOAChDAAAwAEIZQAAAA5AKANQalavXi2Xy1Xk0LVrV6Wnp8vlcikiIkJZWVkey8fFxWnChAnl03gAKGOEMgClplOnTtq9e3eB4aWXXpLL5dJf/vIX97xZWVl69tlny7G1AFC+CGUASk1wcLCioqI8hgMHDuj+++/X2LFj9ec//9k97z333KMpU6Zoz5495dhiACg/hDIAZea3337TjTfeqC5duujxxx/3mNanTx81adJEjz32WDm1DgDKF6EMQJnIy8tT3759FRQUpLlz58rlcnlMd7lcmjRpkl5++WX99NNP5dRKACg/hDIAZWLs2LFas2aNli5dqqpVqxY6T1JSkq644gqNGzeujFsHAOWPUAag1C1YsEDPPvusFixYoKZNmxY776RJk7Rw4UJ9/fXXZdQ6AHAGQhmAUrVhwwbdfvvtmjRpkpKSks44f8eOHXXTTTdp9OjRZdA6AHCOoPJuAICKa9++ferZs6e6dOmi/v37KyMjw2N6YGBgocs9+eSTuvjiixUUxFcUgPMH33gASs2yZcv0888/6+eff1a9evUKTI+JidHq1asLjG/WrJn+3//7f3r55ZfLoJUA4AwuM7PybgQAAMD5jnvKAAAAHIBQBgAA4ACEMgAAAAcglAEAADgAoQwAAMABCGUAAAAOQCgDAABwAEIZAACAAxDKAAAAHIBQBgAA4ACEMgAAAAf4/xlcop/mN7MoAAAAAElFTkSuQmCC",
|
|
"text/plain": [
|
|
"<Figure size 640x480 with 1 Axes>"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
}
|
|
],
|
|
"source": [
|
|
"## Stacked Bar Charts\n",
|
|
"# use crosstab to create a cross-tabulation of two variables\n",
|
|
"tbl = pd.crosstab(bostonHousing_df.CAT_MEDV, bostonHousing_df.ZN)\n",
|
|
"\n",
|
|
"# convert numbers to ratios\n",
|
|
"propTbl = tbl / tbl.sum()\n",
|
|
"propTbl.round(2)\n",
|
|
"\n",
|
|
"# plot the ratios in a stacked bar chart\n",
|
|
"ax = propTbl.transpose().plot(kind='bar', stacked=True)\n",
|
|
"plt.title('Distribution of CAT_MEDV by ZN')\n",
|
|
"plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))\n",
|
|
"plt.show()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 53,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>PC1</th>\n",
|
|
" <th>PC2</th>\n",
|
|
" <th>PC3</th>\n",
|
|
" <th>PC4</th>\n",
|
|
" <th>PC5</th>\n",
|
|
" <th>PC6</th>\n",
|
|
" <th>PC7</th>\n",
|
|
" <th>PC8</th>\n",
|
|
" <th>PC9</th>\n",
|
|
" <th>PC10</th>\n",
|
|
" <th>PC11</th>\n",
|
|
" <th>PC12</th>\n",
|
|
" <th>PC13</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>Standard deviation</th>\n",
|
|
" <td>83.7641</td>\n",
|
|
" <td>70.9143</td>\n",
|
|
" <td>22.6437</td>\n",
|
|
" <td>19.1815</td>\n",
|
|
" <td>8.4232</td>\n",
|
|
" <td>2.0917</td>\n",
|
|
" <td>1.6994</td>\n",
|
|
" <td>0.7796</td>\n",
|
|
" <td>0.6578</td>\n",
|
|
" <td>0.3704</td>\n",
|
|
" <td>0.1864</td>\n",
|
|
" <td>0.063</td>\n",
|
|
" <td>0.0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>Proportion of variance</th>\n",
|
|
" <td>0.5395</td>\n",
|
|
" <td>0.3867</td>\n",
|
|
" <td>0.0394</td>\n",
|
|
" <td>0.0283</td>\n",
|
|
" <td>0.0055</td>\n",
|
|
" <td>0.0003</td>\n",
|
|
" <td>0.0002</td>\n",
|
|
" <td>0.0000</td>\n",
|
|
" <td>0.0000</td>\n",
|
|
" <td>0.0000</td>\n",
|
|
" <td>0.0000</td>\n",
|
|
" <td>0.000</td>\n",
|
|
" <td>0.0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>Cumulative proportion</th>\n",
|
|
" <td>0.5395</td>\n",
|
|
" <td>0.9262</td>\n",
|
|
" <td>0.9656</td>\n",
|
|
" <td>0.9939</td>\n",
|
|
" <td>0.9993</td>\n",
|
|
" <td>0.9997</td>\n",
|
|
" <td>0.9999</td>\n",
|
|
" <td>1.0000</td>\n",
|
|
" <td>1.0000</td>\n",
|
|
" <td>1.0000</td>\n",
|
|
" <td>1.0000</td>\n",
|
|
" <td>1.000</td>\n",
|
|
" <td>1.0</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" PC1 PC2 PC3 PC4 PC5 PC6 \\\n",
|
|
"Standard deviation 83.7641 70.9143 22.6437 19.1815 8.4232 2.0917 \n",
|
|
"Proportion of variance 0.5395 0.3867 0.0394 0.0283 0.0055 0.0003 \n",
|
|
"Cumulative proportion 0.5395 0.9262 0.9656 0.9939 0.9993 0.9997 \n",
|
|
"\n",
|
|
" PC7 PC8 PC9 PC10 PC11 PC12 PC13 \n",
|
|
"Standard deviation 1.6994 0.7796 0.6578 0.3704 0.1864 0.063 0.0 \n",
|
|
"Proportion of variance 0.0002 0.0000 0.0000 0.0000 0.0000 0.000 0.0 \n",
|
|
"Cumulative proportion 0.9999 1.0000 1.0000 1.0000 1.0000 1.000 1.0 "
|
|
]
|
|
},
|
|
"execution_count": 53,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"## PCA\n",
|
|
"cereals_df = pd.read_csv('Cereals.csv')\n",
|
|
"\n",
|
|
"pcs = PCA(n_components=2)\n",
|
|
"pcs.fit(cereals_df[['calories', 'rating']])\n",
|
|
"\n",
|
|
"pcs = PCA()\n",
|
|
"pcs.fit(cereals_df.iloc[:, 3:].dropna(axis=0))\n",
|
|
"pcsSummary_df = pd.DataFrame({'Standard deviation': \n",
|
|
" np.sqrt(pcs.explained_variance_), 'Proportion of variance': \n",
|
|
" pcs.explained_variance_ratio_,'Cumulative proportion': \n",
|
|
" np.cumsum(pcs.explained_variance_ratio_)})\n",
|
|
"pcsSummary_df = pcsSummary_df.transpose()\n",
|
|
"pcsSummary_df.columns = ['PC{}'.format(i) for i in range(1, \n",
|
|
" len(pcsSummary_df.columns) + 1)]\n",
|
|
"pcsSummary_df.round(4)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 57,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"array([[ 379.63089542, -188.68156228],\n",
|
|
" [-188.68156228, 197.32632105]])"
|
|
]
|
|
},
|
|
"execution_count": 57,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"np.cov(cereals_df.calories, cereals_df.rating)"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"orig_nbformat": 4
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|