Adding random forest and glmnet functions

2023-04-10 11:03:50 -05:00 · 2023-04-10 11:03:50 -05:00 · 150d91e110
commit 150d91e110
parent 92b51580dc
1 changed files with 56 additions and 11 deletions
--- a/Schrick-Noah_Homework-6.R
+++ b/Schrick-Noah_Homework-6.R
@ -6,33 +6,78 @@
 # 1. Penalized Regression and Classification
 ## a. Modified Ridge classification for LASSO penalties

-## Add cross-validation to tune penalty param
+### Add cross-validation to tune penalty param

-## Use npdro simulated data to test
+### Use npdro simulated data to test

-## Compare with Ridge
+### Compare with Ridge

-## Compare with Random Forest
+### Compare with Random Forest
+if (!require("randomForest")) install.packages("randomForest")
+library(randomForest)
+if (!require("ranger")) install.packages("ranger")
+library(ranger)

-## Compare with glmnet
+rf_comp <- function(train){
+  rf<-randomForest(as.factor(train$class) ~ .,data=train, ntree=5000,
+                   importance=T) 
+  print(rf)  # error
+  rf_imp<-data.frame(rf_score=importance(rf, type=1))
+  #dplyr::arrange(rf_imp,-MeanDecreaseAccuracy) 
+  dplyr::slice_max(rf_imp,order_by=MeanDecreaseAccuracy, n=20)
+  
+  rf2<-ranger(as.factor(train$class) ~ ., data=train, num.trees=5000,
+              importance="permutation") 
+  print(rf2)  # error
+  rf2_imp<-data.frame(rf_score=rf2$variable.importance)
+  #dplyr::arrange(rf_imp,-MeanDecreaseAccuracy) 
+  dplyr::slice_max(rf2_imp,order_by=rf_score, n=20)
+  
+  #rftest <- predict(rf, newdata=test, type="class")
+  #confusionMatrix(table(rftest,test$class))  
+}

-### Alpha = 0
+rf_comp(train)

-### Alpha = 1
+### Compare with glmnet
+if (!require("glmnet")) install.packages("glmnet")
+library(glmnet)
+
+glm_fcn <- function(train.X, train.y, alpha_p){
+  glmnet.class.model<-cv.glmnet(as.matrix(train.X), train.y, alpha=alpha_p, 
+                                family="binomial", type.measure="class")
+  glmnet.class.model$lambda.1se
+  glmnet.class.model$lambda.min
+  plot(glmnet.class.model)
+  glmnet.class.coeffs<-predict(glmnet.class.model,type="coefficients")
+  #glmnet.cc.coeffs  # maybe 3 is most important, Excess kurtosis
+  model.class.terms <- colnames(train.X)  # glmnet includes an intercept but we are going to ignore
+  #nonzero.glmnet.qtrait.coeffs <- model.qtrait.terms[glmnet.qtrait.coeffs@i[which(glmnet.qtrait.coeffs@i!=0)]] # skip intercept if there, 0-based counting
+  
+  glmnet.df <- data.frame(as.matrix(glmnet.class.coeffs))
+  glmnet.df$abs_scores <- abs(glmnet.df$lambda.1se)
+  dplyr::slice_max(glmnet.df,order_by=abs_scores,n=21)
+}
+
+#### Alpha = 0
+glm_fcn(train.X, train.y, 0)
+
+#### Alpha = 1
+glm_fcn(train.X, train.y, 1)

 ## b. Repeat comparison using a graph with clusters

 ## c. Use npdro and igraph to create knn

-## Plot network
+### Plot network

 ## d. Add Laplace graph penalty

-## Find resulting beta coeffs
+### Find resulting beta coeffs

-## Optimize or choose value for lambda2
+### Optimize or choose value for lambda2

-## Compare to a) and b)
+### Compare to a) and b)

 # 2. Gradient Descent
 ## Write fn with learning param