PCA of cereal dataset

This commit is contained in:
Noah L. Schrick 2023-01-24 18:45:15 -06:00
parent ce06f9e427
commit 09a5f43d9c
2 changed files with 206 additions and 4 deletions

78
Cereals.csv Normal file
View File

@ -0,0 +1,78 @@
name,mfr,type,calories,protein,fat,sodium,fiber,carbo,sugars,potass,vitamins,shelf,weight,cups,rating
100%_Bran,N,C,70,4,1,130,10,5,6,280,25,3,1,0.33,68.402973
100%_Natural_Bran,Q,C,120,3,5,15,2,8,8,135,0,3,1,1,33.983679
All-Bran,K,C,70,4,1,260,9,7,5,320,25,3,1,0.33,59.425505
All-Bran_with_Extra_Fiber,K,C,50,4,0,140,14,8,0,330,25,3,1,0.5,93.704912
Almond_Delight,R,C,110,2,2,200,1,14,8,,25,3,1,0.75,34.384843
Apple_Cinnamon_Cheerios,G,C,110,2,2,180,1.5,10.5,10,70,25,1,1,0.75,29.509541
Apple_Jacks,K,C,110,2,0,125,1,11,14,30,25,2,1,1,33.174094
Basic_4,G,C,130,3,2,210,2,18,8,100,25,3,1.33,0.75,37.038562
Bran_Chex,R,C,90,2,1,200,4,15,6,125,25,1,1,0.67,49.120253
Bran_Flakes,P,C,90,3,0,210,5,13,5,190,25,3,1,0.67,53.313813
Cap'n'Crunch,Q,C,120,1,2,220,0,12,12,35,25,2,1,0.75,18.042851
Cheerios,G,C,110,6,2,290,2,17,1,105,25,1,1,1.25,50.764999
Cinnamon_Toast_Crunch,G,C,120,1,3,210,0,13,9,45,25,2,1,0.75,19.823573
Clusters,G,C,110,3,2,140,2,13,7,105,25,3,1,0.5,40.400208
Cocoa_Puffs,G,C,110,1,1,180,0,12,13,55,25,2,1,1,22.736446
Corn_Chex,R,C,110,2,0,280,0,22,3,25,25,1,1,1,41.445019
Corn_Flakes,K,C,100,2,0,290,1,21,2,35,25,1,1,1,45.863324
Corn_Pops,K,C,110,1,0,90,1,13,12,20,25,2,1,1,35.782791
Count_Chocula,G,C,110,1,1,180,0,12,13,65,25,2,1,1,22.396513
Cracklin'_Oat_Bran,K,C,110,3,3,140,4,10,7,160,25,3,1,0.5,40.448772
Cream_of_Wheat_(Quick),N,H,100,3,0,80,1,21,0,,0,2,1,1,64.533816
Crispix,K,C,110,2,0,220,1,21,3,30,25,3,1,1,46.895644
Crispy_Wheat_&_Raisins,G,C,100,2,1,140,2,11,10,120,25,3,1,0.75,36.176196
Double_Chex,R,C,100,2,0,190,1,18,5,80,25,3,1,0.75,44.330856
Froot_Loops,K,C,110,2,1,125,1,11,13,30,25,2,1,1,32.207582
Frosted_Flakes,K,C,110,1,0,200,1,14,11,25,25,1,1,0.75,31.435973
Frosted_Mini-Wheats,K,C,100,3,0,0,3,14,7,100,25,2,1,0.8,58.345141
"Fruit_&_Fibre_Dates,_Walnuts,_and_Oats",P,C,120,3,2,160,5,12,10,200,25,3,1.25,0.67,40.917047
Fruitful_Bran,K,C,120,3,0,240,5,14,12,190,25,3,1.33,0.67,41.015492
Fruity_Pebbles,P,C,110,1,1,135,0,13,12,25,25,2,1,0.75,28.025765
Golden_Crisp,P,C,100,2,0,45,0,11,15,40,25,1,1,0.88,35.252444
Golden_Grahams,G,C,110,1,1,280,0,15,9,45,25,2,1,0.75,23.804043
Grape_Nuts_Flakes,P,C,100,3,1,140,3,15,5,85,25,3,1,0.88,52.076897
Grape-Nuts,P,C,110,3,0,170,3,17,3,90,25,3,1,0.25,53.371007
Great_Grains_Pecan,P,C,120,3,3,75,3,13,4,100,25,3,1,0.33,45.811716
Honey_Graham_Ohs,Q,C,120,1,2,220,1,12,11,45,25,2,1,1,21.871292
Honey_Nut_Cheerios,G,C,110,3,1,250,1.5,11.5,10,90,25,1,1,0.75,31.072217
Honey-comb,P,C,110,1,0,180,0,14,11,35,25,1,1,1.33,28.742414
Just_Right_Crunchy__Nuggets,K,C,110,2,1,170,1,17,6,60,100,3,1,1,36.523683
Just_Right_Fruit_&_Nut,K,C,140,3,1,170,2,20,9,95,100,3,1.3,0.75,36.471512
Kix,G,C,110,2,1,260,0,21,3,40,25,2,1,1.5,39.241114
Life,Q,C,100,4,2,150,2,12,6,95,25,2,1,0.67,45.328074
Lucky_Charms,G,C,110,2,1,180,0,12,12,55,25,2,1,1,26.734515
Maypo,A,H,100,4,1,0,0,16,3,95,25,2,1,1,54.850917
"Muesli_Raisins,_Dates,_&_Almonds",R,C,150,4,3,95,3,16,11,170,25,3,1,1,37.136863
"Muesli_Raisins,_Peaches,_&_Pecans",R,C,150,4,3,150,3,16,11,170,25,3,1,1,34.139765
Mueslix_Crispy_Blend,K,C,160,3,2,150,3,17,13,160,25,3,1.5,0.67,30.313351
Multi-Grain_Cheerios,G,C,100,2,1,220,2,15,6,90,25,1,1,1,40.105965
Nut&Honey_Crunch,K,C,120,2,1,190,0,15,9,40,25,2,1,0.67,29.924285
Nutri-Grain_Almond-Raisin,K,C,140,3,2,220,3,21,7,130,25,3,1.33,0.67,40.69232
Nutri-grain_Wheat,K,C,90,3,0,170,3,18,2,90,25,3,1,1,59.642837
Oatmeal_Raisin_Crisp,G,C,130,3,2,170,1.5,13.5,10,120,25,3,1.25,0.5,30.450843
Post_Nat._Raisin_Bran,P,C,120,3,1,200,6,11,14,260,25,3,1.33,0.67,37.840594
Product_19,K,C,100,3,0,320,1,20,3,45,100,3,1,1,41.50354
Puffed_Rice,Q,C,50,1,0,0,0,13,0,15,0,3,0.5,1,60.756112
Puffed_Wheat,Q,C,50,2,0,0,1,10,0,50,0,3,0.5,1,63.005645
Quaker_Oat_Squares,Q,C,100,4,1,135,2,14,6,110,25,3,1,0.5,49.511874
Quaker_Oatmeal,Q,H,100,5,2,0,2.7,,,110,0,1,1,0.67,50.828392
Raisin_Bran,K,C,120,3,1,210,5,14,12,240,25,2,1.33,0.75,39.259197
Raisin_Nut_Bran,G,C,100,3,2,140,2.5,10.5,8,140,25,3,1,0.5,39.7034
Raisin_Squares,K,C,90,2,0,0,2,15,6,110,25,3,1,0.5,55.333142
Rice_Chex,R,C,110,1,0,240,0,23,2,30,25,1,1,1.13,41.998933
Rice_Krispies,K,C,110,2,0,290,0,22,3,35,25,1,1,1,40.560159
Shredded_Wheat,N,C,80,2,0,0,3,16,0,95,0,1,0.83,1,68.235885
Shredded_Wheat_'n'Bran,N,C,90,3,0,0,4,19,0,140,0,1,1,0.67,74.472949
Shredded_Wheat_spoon_size,N,C,90,3,0,0,3,20,0,120,0,1,1,0.67,72.801787
Smacks,K,C,110,2,1,70,1,9,15,40,25,2,1,0.75,31.230054
Special_K,K,C,110,6,0,230,1,16,3,55,25,1,1,1,53.131324
Strawberry_Fruit_Wheats,N,C,90,2,0,15,3,15,5,90,25,2,1,1,59.363993
Total_Corn_Flakes,G,C,110,2,1,200,0,21,3,35,100,3,1,1,38.839746
Total_Raisin_Bran,G,C,140,3,1,190,4,15,14,230,100,3,1.5,1,28.592785
Total_Whole_Grain,G,C,100,3,1,200,3,16,3,110,100,3,1,1,46.658844
Triples,G,C,110,2,1,250,0,21,3,60,25,3,1,0.75,39.106174
Trix,G,C,110,1,1,140,0,13,12,25,25,2,1,1,27.753301
Wheat_Chex,R,C,100,3,1,230,3,17,3,115,25,1,1,0.67,49.787445
Wheaties,G,C,100,3,1,200,3,17,3,110,25,1,1,1,51.592193
Wheaties_Honey_Gold,G,C,110,2,1,200,1,16,8,60,25,1,1,0.75,36.187559
1 name mfr type calories protein fat sodium fiber carbo sugars potass vitamins shelf weight cups rating
2 100%_Bran N C 70 4 1 130 10 5 6 280 25 3 1 0.33 68.402973
3 100%_Natural_Bran Q C 120 3 5 15 2 8 8 135 0 3 1 1 33.983679
4 All-Bran K C 70 4 1 260 9 7 5 320 25 3 1 0.33 59.425505
5 All-Bran_with_Extra_Fiber K C 50 4 0 140 14 8 0 330 25 3 1 0.5 93.704912
6 Almond_Delight R C 110 2 2 200 1 14 8 25 3 1 0.75 34.384843
7 Apple_Cinnamon_Cheerios G C 110 2 2 180 1.5 10.5 10 70 25 1 1 0.75 29.509541
8 Apple_Jacks K C 110 2 0 125 1 11 14 30 25 2 1 1 33.174094
9 Basic_4 G C 130 3 2 210 2 18 8 100 25 3 1.33 0.75 37.038562
10 Bran_Chex R C 90 2 1 200 4 15 6 125 25 1 1 0.67 49.120253
11 Bran_Flakes P C 90 3 0 210 5 13 5 190 25 3 1 0.67 53.313813
12 Cap'n'Crunch Q C 120 1 2 220 0 12 12 35 25 2 1 0.75 18.042851
13 Cheerios G C 110 6 2 290 2 17 1 105 25 1 1 1.25 50.764999
14 Cinnamon_Toast_Crunch G C 120 1 3 210 0 13 9 45 25 2 1 0.75 19.823573
15 Clusters G C 110 3 2 140 2 13 7 105 25 3 1 0.5 40.400208
16 Cocoa_Puffs G C 110 1 1 180 0 12 13 55 25 2 1 1 22.736446
17 Corn_Chex R C 110 2 0 280 0 22 3 25 25 1 1 1 41.445019
18 Corn_Flakes K C 100 2 0 290 1 21 2 35 25 1 1 1 45.863324
19 Corn_Pops K C 110 1 0 90 1 13 12 20 25 2 1 1 35.782791
20 Count_Chocula G C 110 1 1 180 0 12 13 65 25 2 1 1 22.396513
21 Cracklin'_Oat_Bran K C 110 3 3 140 4 10 7 160 25 3 1 0.5 40.448772
22 Cream_of_Wheat_(Quick) N H 100 3 0 80 1 21 0 0 2 1 1 64.533816
23 Crispix K C 110 2 0 220 1 21 3 30 25 3 1 1 46.895644
24 Crispy_Wheat_&_Raisins G C 100 2 1 140 2 11 10 120 25 3 1 0.75 36.176196
25 Double_Chex R C 100 2 0 190 1 18 5 80 25 3 1 0.75 44.330856
26 Froot_Loops K C 110 2 1 125 1 11 13 30 25 2 1 1 32.207582
27 Frosted_Flakes K C 110 1 0 200 1 14 11 25 25 1 1 0.75 31.435973
28 Frosted_Mini-Wheats K C 100 3 0 0 3 14 7 100 25 2 1 0.8 58.345141
29 Fruit_&_Fibre_Dates,_Walnuts,_and_Oats P C 120 3 2 160 5 12 10 200 25 3 1.25 0.67 40.917047
30 Fruitful_Bran K C 120 3 0 240 5 14 12 190 25 3 1.33 0.67 41.015492
31 Fruity_Pebbles P C 110 1 1 135 0 13 12 25 25 2 1 0.75 28.025765
32 Golden_Crisp P C 100 2 0 45 0 11 15 40 25 1 1 0.88 35.252444
33 Golden_Grahams G C 110 1 1 280 0 15 9 45 25 2 1 0.75 23.804043
34 Grape_Nuts_Flakes P C 100 3 1 140 3 15 5 85 25 3 1 0.88 52.076897
35 Grape-Nuts P C 110 3 0 170 3 17 3 90 25 3 1 0.25 53.371007
36 Great_Grains_Pecan P C 120 3 3 75 3 13 4 100 25 3 1 0.33 45.811716
37 Honey_Graham_Ohs Q C 120 1 2 220 1 12 11 45 25 2 1 1 21.871292
38 Honey_Nut_Cheerios G C 110 3 1 250 1.5 11.5 10 90 25 1 1 0.75 31.072217
39 Honey-comb P C 110 1 0 180 0 14 11 35 25 1 1 1.33 28.742414
40 Just_Right_Crunchy__Nuggets K C 110 2 1 170 1 17 6 60 100 3 1 1 36.523683
41 Just_Right_Fruit_&_Nut K C 140 3 1 170 2 20 9 95 100 3 1.3 0.75 36.471512
42 Kix G C 110 2 1 260 0 21 3 40 25 2 1 1.5 39.241114
43 Life Q C 100 4 2 150 2 12 6 95 25 2 1 0.67 45.328074
44 Lucky_Charms G C 110 2 1 180 0 12 12 55 25 2 1 1 26.734515
45 Maypo A H 100 4 1 0 0 16 3 95 25 2 1 1 54.850917
46 Muesli_Raisins,_Dates,_&_Almonds R C 150 4 3 95 3 16 11 170 25 3 1 1 37.136863
47 Muesli_Raisins,_Peaches,_&_Pecans R C 150 4 3 150 3 16 11 170 25 3 1 1 34.139765
48 Mueslix_Crispy_Blend K C 160 3 2 150 3 17 13 160 25 3 1.5 0.67 30.313351
49 Multi-Grain_Cheerios G C 100 2 1 220 2 15 6 90 25 1 1 1 40.105965
50 Nut&Honey_Crunch K C 120 2 1 190 0 15 9 40 25 2 1 0.67 29.924285
51 Nutri-Grain_Almond-Raisin K C 140 3 2 220 3 21 7 130 25 3 1.33 0.67 40.69232
52 Nutri-grain_Wheat K C 90 3 0 170 3 18 2 90 25 3 1 1 59.642837
53 Oatmeal_Raisin_Crisp G C 130 3 2 170 1.5 13.5 10 120 25 3 1.25 0.5 30.450843
54 Post_Nat._Raisin_Bran P C 120 3 1 200 6 11 14 260 25 3 1.33 0.67 37.840594
55 Product_19 K C 100 3 0 320 1 20 3 45 100 3 1 1 41.50354
56 Puffed_Rice Q C 50 1 0 0 0 13 0 15 0 3 0.5 1 60.756112
57 Puffed_Wheat Q C 50 2 0 0 1 10 0 50 0 3 0.5 1 63.005645
58 Quaker_Oat_Squares Q C 100 4 1 135 2 14 6 110 25 3 1 0.5 49.511874
59 Quaker_Oatmeal Q H 100 5 2 0 2.7 110 0 1 1 0.67 50.828392
60 Raisin_Bran K C 120 3 1 210 5 14 12 240 25 2 1.33 0.75 39.259197
61 Raisin_Nut_Bran G C 100 3 2 140 2.5 10.5 8 140 25 3 1 0.5 39.7034
62 Raisin_Squares K C 90 2 0 0 2 15 6 110 25 3 1 0.5 55.333142
63 Rice_Chex R C 110 1 0 240 0 23 2 30 25 1 1 1.13 41.998933
64 Rice_Krispies K C 110 2 0 290 0 22 3 35 25 1 1 1 40.560159
65 Shredded_Wheat N C 80 2 0 0 3 16 0 95 0 1 0.83 1 68.235885
66 Shredded_Wheat_'n'Bran N C 90 3 0 0 4 19 0 140 0 1 1 0.67 74.472949
67 Shredded_Wheat_spoon_size N C 90 3 0 0 3 20 0 120 0 1 1 0.67 72.801787
68 Smacks K C 110 2 1 70 1 9 15 40 25 2 1 0.75 31.230054
69 Special_K K C 110 6 0 230 1 16 3 55 25 1 1 1 53.131324
70 Strawberry_Fruit_Wheats N C 90 2 0 15 3 15 5 90 25 2 1 1 59.363993
71 Total_Corn_Flakes G C 110 2 1 200 0 21 3 35 100 3 1 1 38.839746
72 Total_Raisin_Bran G C 140 3 1 190 4 15 14 230 100 3 1.5 1 28.592785
73 Total_Whole_Grain G C 100 3 1 200 3 16 3 110 100 3 1 1 46.658844
74 Triples G C 110 2 1 250 0 21 3 60 25 3 1 0.75 39.106174
75 Trix G C 110 1 1 140 0 13 12 25 25 2 1 1 27.753301
76 Wheat_Chex R C 100 3 1 230 3 17 3 115 25 1 1 0.67 49.787445
77 Wheaties G C 100 3 1 200 3 17 3 110 25 1 1 1 51.592193
78 Wheaties_Honey_Gold G C 110 2 1 200 1 16 8 60 25 1 1 0.75 36.187559

View File

@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 52,
"metadata": {},
"outputs": [],
"source": [
@ -13,6 +13,7 @@
"\n",
"import pandas as pd\n",
"import numpy as np\n",
"from sklearn.decomposition import PCA\n",
"import matplotlib.pyplot as plt"
]
},
@ -762,10 +763,133 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 53,
"metadata": {},
"outputs": [],
"source": []
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>PC1</th>\n",
" <th>PC2</th>\n",
" <th>PC3</th>\n",
" <th>PC4</th>\n",
" <th>PC5</th>\n",
" <th>PC6</th>\n",
" <th>PC7</th>\n",
" <th>PC8</th>\n",
" <th>PC9</th>\n",
" <th>PC10</th>\n",
" <th>PC11</th>\n",
" <th>PC12</th>\n",
" <th>PC13</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Standard deviation</th>\n",
" <td>83.7641</td>\n",
" <td>70.9143</td>\n",
" <td>22.6437</td>\n",
" <td>19.1815</td>\n",
" <td>8.4232</td>\n",
" <td>2.0917</td>\n",
" <td>1.6994</td>\n",
" <td>0.7796</td>\n",
" <td>0.6578</td>\n",
" <td>0.3704</td>\n",
" <td>0.1864</td>\n",
" <td>0.063</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Proportion of variance</th>\n",
" <td>0.5395</td>\n",
" <td>0.3867</td>\n",
" <td>0.0394</td>\n",
" <td>0.0283</td>\n",
" <td>0.0055</td>\n",
" <td>0.0003</td>\n",
" <td>0.0002</td>\n",
" <td>0.0000</td>\n",
" <td>0.0000</td>\n",
" <td>0.0000</td>\n",
" <td>0.0000</td>\n",
" <td>0.000</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Cumulative proportion</th>\n",
" <td>0.5395</td>\n",
" <td>0.9262</td>\n",
" <td>0.9656</td>\n",
" <td>0.9939</td>\n",
" <td>0.9993</td>\n",
" <td>0.9997</td>\n",
" <td>0.9999</td>\n",
" <td>1.0000</td>\n",
" <td>1.0000</td>\n",
" <td>1.0000</td>\n",
" <td>1.0000</td>\n",
" <td>1.000</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" PC1 PC2 PC3 PC4 PC5 PC6 \\\n",
"Standard deviation 83.7641 70.9143 22.6437 19.1815 8.4232 2.0917 \n",
"Proportion of variance 0.5395 0.3867 0.0394 0.0283 0.0055 0.0003 \n",
"Cumulative proportion 0.5395 0.9262 0.9656 0.9939 0.9993 0.9997 \n",
"\n",
" PC7 PC8 PC9 PC10 PC11 PC12 PC13 \n",
"Standard deviation 1.6994 0.7796 0.6578 0.3704 0.1864 0.063 0.0 \n",
"Proportion of variance 0.0002 0.0000 0.0000 0.0000 0.0000 0.000 0.0 \n",
"Cumulative proportion 0.9999 1.0000 1.0000 1.0000 1.0000 1.000 1.0 "
]
},
"execution_count": 53,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"## PCA\n",
"cereals_df = pd.read_csv('Cereals.csv')\n",
"\n",
"pcs = PCA(n_components=2)\n",
"pcs.fit(cereals_df[['calories', 'rating']])\n",
"\n",
"pcs = PCA()\n",
"pcs.fit(cereals_df.iloc[:, 3:].dropna(axis=0))\n",
"pcsSummary_df = pd.DataFrame({'Standard deviation': \n",
" np.sqrt(pcs.explained_variance_), 'Proportion of variance': \n",
" pcs.explained_variance_ratio_,'Cumulative proportion': \n",
" np.cumsum(pcs.explained_variance_ratio_)})\n",
"pcsSummary_df = pcsSummary_df.transpose()\n",
"pcsSummary_df.columns = ['PC{}'.format(i) for i in range(1, \n",
" len(pcsSummary_df.columns) + 1)]\n",
"pcsSummary_df.round(4)"
]
}
],
"metadata": {