From 09a5f43d9cab6d961bb443e8218670f72afb79b4 Mon Sep 17 00:00:00 2001 From: noah Date: Tue, 24 Jan 2023 18:45:15 -0600 Subject: [PATCH] PCA of cereal dataset --- Cereals.csv | 78 +++++++++++++++++++ Schrick-Noah_Lecture-Work.ipynb | 132 +++++++++++++++++++++++++++++++- 2 files changed, 206 insertions(+), 4 deletions(-) create mode 100644 Cereals.csv diff --git a/Cereals.csv b/Cereals.csv new file mode 100644 index 0000000..7d27003 --- /dev/null +++ b/Cereals.csv @@ -0,0 +1,78 @@ +name,mfr,type,calories,protein,fat,sodium,fiber,carbo,sugars,potass,vitamins,shelf,weight,cups,rating +100%_Bran,N,C,70,4,1,130,10,5,6,280,25,3,1,0.33,68.402973 +100%_Natural_Bran,Q,C,120,3,5,15,2,8,8,135,0,3,1,1,33.983679 +All-Bran,K,C,70,4,1,260,9,7,5,320,25,3,1,0.33,59.425505 +All-Bran_with_Extra_Fiber,K,C,50,4,0,140,14,8,0,330,25,3,1,0.5,93.704912 +Almond_Delight,R,C,110,2,2,200,1,14,8,,25,3,1,0.75,34.384843 +Apple_Cinnamon_Cheerios,G,C,110,2,2,180,1.5,10.5,10,70,25,1,1,0.75,29.509541 +Apple_Jacks,K,C,110,2,0,125,1,11,14,30,25,2,1,1,33.174094 +Basic_4,G,C,130,3,2,210,2,18,8,100,25,3,1.33,0.75,37.038562 +Bran_Chex,R,C,90,2,1,200,4,15,6,125,25,1,1,0.67,49.120253 +Bran_Flakes,P,C,90,3,0,210,5,13,5,190,25,3,1,0.67,53.313813 +Cap'n'Crunch,Q,C,120,1,2,220,0,12,12,35,25,2,1,0.75,18.042851 +Cheerios,G,C,110,6,2,290,2,17,1,105,25,1,1,1.25,50.764999 +Cinnamon_Toast_Crunch,G,C,120,1,3,210,0,13,9,45,25,2,1,0.75,19.823573 +Clusters,G,C,110,3,2,140,2,13,7,105,25,3,1,0.5,40.400208 +Cocoa_Puffs,G,C,110,1,1,180,0,12,13,55,25,2,1,1,22.736446 +Corn_Chex,R,C,110,2,0,280,0,22,3,25,25,1,1,1,41.445019 +Corn_Flakes,K,C,100,2,0,290,1,21,2,35,25,1,1,1,45.863324 +Corn_Pops,K,C,110,1,0,90,1,13,12,20,25,2,1,1,35.782791 +Count_Chocula,G,C,110,1,1,180,0,12,13,65,25,2,1,1,22.396513 +Cracklin'_Oat_Bran,K,C,110,3,3,140,4,10,7,160,25,3,1,0.5,40.448772 +Cream_of_Wheat_(Quick),N,H,100,3,0,80,1,21,0,,0,2,1,1,64.533816 +Crispix,K,C,110,2,0,220,1,21,3,30,25,3,1,1,46.895644 +Crispy_Wheat_&_Raisins,G,C,100,2,1,140,2,11,10,120,25,3,1,0.75,36.176196 +Double_Chex,R,C,100,2,0,190,1,18,5,80,25,3,1,0.75,44.330856 +Froot_Loops,K,C,110,2,1,125,1,11,13,30,25,2,1,1,32.207582 +Frosted_Flakes,K,C,110,1,0,200,1,14,11,25,25,1,1,0.75,31.435973 +Frosted_Mini-Wheats,K,C,100,3,0,0,3,14,7,100,25,2,1,0.8,58.345141 +"Fruit_&_Fibre_Dates,_Walnuts,_and_Oats",P,C,120,3,2,160,5,12,10,200,25,3,1.25,0.67,40.917047 +Fruitful_Bran,K,C,120,3,0,240,5,14,12,190,25,3,1.33,0.67,41.015492 +Fruity_Pebbles,P,C,110,1,1,135,0,13,12,25,25,2,1,0.75,28.025765 +Golden_Crisp,P,C,100,2,0,45,0,11,15,40,25,1,1,0.88,35.252444 +Golden_Grahams,G,C,110,1,1,280,0,15,9,45,25,2,1,0.75,23.804043 +Grape_Nuts_Flakes,P,C,100,3,1,140,3,15,5,85,25,3,1,0.88,52.076897 +Grape-Nuts,P,C,110,3,0,170,3,17,3,90,25,3,1,0.25,53.371007 +Great_Grains_Pecan,P,C,120,3,3,75,3,13,4,100,25,3,1,0.33,45.811716 +Honey_Graham_Ohs,Q,C,120,1,2,220,1,12,11,45,25,2,1,1,21.871292 +Honey_Nut_Cheerios,G,C,110,3,1,250,1.5,11.5,10,90,25,1,1,0.75,31.072217 +Honey-comb,P,C,110,1,0,180,0,14,11,35,25,1,1,1.33,28.742414 +Just_Right_Crunchy__Nuggets,K,C,110,2,1,170,1,17,6,60,100,3,1,1,36.523683 +Just_Right_Fruit_&_Nut,K,C,140,3,1,170,2,20,9,95,100,3,1.3,0.75,36.471512 +Kix,G,C,110,2,1,260,0,21,3,40,25,2,1,1.5,39.241114 +Life,Q,C,100,4,2,150,2,12,6,95,25,2,1,0.67,45.328074 +Lucky_Charms,G,C,110,2,1,180,0,12,12,55,25,2,1,1,26.734515 +Maypo,A,H,100,4,1,0,0,16,3,95,25,2,1,1,54.850917 +"Muesli_Raisins,_Dates,_&_Almonds",R,C,150,4,3,95,3,16,11,170,25,3,1,1,37.136863 +"Muesli_Raisins,_Peaches,_&_Pecans",R,C,150,4,3,150,3,16,11,170,25,3,1,1,34.139765 +Mueslix_Crispy_Blend,K,C,160,3,2,150,3,17,13,160,25,3,1.5,0.67,30.313351 +Multi-Grain_Cheerios,G,C,100,2,1,220,2,15,6,90,25,1,1,1,40.105965 +Nut&Honey_Crunch,K,C,120,2,1,190,0,15,9,40,25,2,1,0.67,29.924285 +Nutri-Grain_Almond-Raisin,K,C,140,3,2,220,3,21,7,130,25,3,1.33,0.67,40.69232 +Nutri-grain_Wheat,K,C,90,3,0,170,3,18,2,90,25,3,1,1,59.642837 +Oatmeal_Raisin_Crisp,G,C,130,3,2,170,1.5,13.5,10,120,25,3,1.25,0.5,30.450843 +Post_Nat._Raisin_Bran,P,C,120,3,1,200,6,11,14,260,25,3,1.33,0.67,37.840594 +Product_19,K,C,100,3,0,320,1,20,3,45,100,3,1,1,41.50354 +Puffed_Rice,Q,C,50,1,0,0,0,13,0,15,0,3,0.5,1,60.756112 +Puffed_Wheat,Q,C,50,2,0,0,1,10,0,50,0,3,0.5,1,63.005645 +Quaker_Oat_Squares,Q,C,100,4,1,135,2,14,6,110,25,3,1,0.5,49.511874 +Quaker_Oatmeal,Q,H,100,5,2,0,2.7,,,110,0,1,1,0.67,50.828392 +Raisin_Bran,K,C,120,3,1,210,5,14,12,240,25,2,1.33,0.75,39.259197 +Raisin_Nut_Bran,G,C,100,3,2,140,2.5,10.5,8,140,25,3,1,0.5,39.7034 +Raisin_Squares,K,C,90,2,0,0,2,15,6,110,25,3,1,0.5,55.333142 +Rice_Chex,R,C,110,1,0,240,0,23,2,30,25,1,1,1.13,41.998933 +Rice_Krispies,K,C,110,2,0,290,0,22,3,35,25,1,1,1,40.560159 +Shredded_Wheat,N,C,80,2,0,0,3,16,0,95,0,1,0.83,1,68.235885 +Shredded_Wheat_'n'Bran,N,C,90,3,0,0,4,19,0,140,0,1,1,0.67,74.472949 +Shredded_Wheat_spoon_size,N,C,90,3,0,0,3,20,0,120,0,1,1,0.67,72.801787 +Smacks,K,C,110,2,1,70,1,9,15,40,25,2,1,0.75,31.230054 +Special_K,K,C,110,6,0,230,1,16,3,55,25,1,1,1,53.131324 +Strawberry_Fruit_Wheats,N,C,90,2,0,15,3,15,5,90,25,2,1,1,59.363993 +Total_Corn_Flakes,G,C,110,2,1,200,0,21,3,35,100,3,1,1,38.839746 +Total_Raisin_Bran,G,C,140,3,1,190,4,15,14,230,100,3,1.5,1,28.592785 +Total_Whole_Grain,G,C,100,3,1,200,3,16,3,110,100,3,1,1,46.658844 +Triples,G,C,110,2,1,250,0,21,3,60,25,3,1,0.75,39.106174 +Trix,G,C,110,1,1,140,0,13,12,25,25,2,1,1,27.753301 +Wheat_Chex,R,C,100,3,1,230,3,17,3,115,25,1,1,0.67,49.787445 +Wheaties,G,C,100,3,1,200,3,17,3,110,25,1,1,1,51.592193 +Wheaties_Honey_Gold,G,C,110,2,1,200,1,16,8,60,25,1,1,0.75,36.187559 diff --git a/Schrick-Noah_Lecture-Work.ipynb b/Schrick-Noah_Lecture-Work.ipynb index d9c657f..69b4812 100644 --- a/Schrick-Noah_Lecture-Work.ipynb +++ b/Schrick-Noah_Lecture-Work.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 11, + "execution_count": 52, "metadata": {}, "outputs": [], "source": [ @@ -13,6 +13,7 @@ "\n", "import pandas as pd\n", "import numpy as np\n", + "from sklearn.decomposition import PCA\n", "import matplotlib.pyplot as plt" ] }, @@ -762,10 +763,133 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 53, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PC1PC2PC3PC4PC5PC6PC7PC8PC9PC10PC11PC12PC13
Standard deviation83.764170.914322.643719.18158.42322.09171.69940.77960.65780.37040.18640.0630.0
Proportion of variance0.53950.38670.03940.02830.00550.00030.00020.00000.00000.00000.00000.0000.0
Cumulative proportion0.53950.92620.96560.99390.99930.99970.99991.00001.00001.00001.00001.0001.0
\n", + "
" + ], + "text/plain": [ + " PC1 PC2 PC3 PC4 PC5 PC6 \\\n", + "Standard deviation 83.7641 70.9143 22.6437 19.1815 8.4232 2.0917 \n", + "Proportion of variance 0.5395 0.3867 0.0394 0.0283 0.0055 0.0003 \n", + "Cumulative proportion 0.5395 0.9262 0.9656 0.9939 0.9993 0.9997 \n", + "\n", + " PC7 PC8 PC9 PC10 PC11 PC12 PC13 \n", + "Standard deviation 1.6994 0.7796 0.6578 0.3704 0.1864 0.063 0.0 \n", + "Proportion of variance 0.0002 0.0000 0.0000 0.0000 0.0000 0.000 0.0 \n", + "Cumulative proportion 0.9999 1.0000 1.0000 1.0000 1.0000 1.000 1.0 " + ] + }, + "execution_count": 53, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "## PCA\n", + "cereals_df = pd.read_csv('Cereals.csv')\n", + "\n", + "pcs = PCA(n_components=2)\n", + "pcs.fit(cereals_df[['calories', 'rating']])\n", + "\n", + "pcs = PCA()\n", + "pcs.fit(cereals_df.iloc[:, 3:].dropna(axis=0))\n", + "pcsSummary_df = pd.DataFrame({'Standard deviation': \n", + " np.sqrt(pcs.explained_variance_), 'Proportion of variance': \n", + " pcs.explained_variance_ratio_,'Cumulative proportion': \n", + " np.cumsum(pcs.explained_variance_ratio_)})\n", + "pcsSummary_df = pcsSummary_df.transpose()\n", + "pcsSummary_df.columns = ['PC{}'.format(i) for i in range(1, \n", + " len(pcsSummary_df.columns) + 1)]\n", + "pcsSummary_df.round(4)" + ] } ], "metadata": {