Adding random forest and glmnet functions

2023-04-10 11:03:50 -05:00 · 2023-04-10 11:03:50 -05:00 · 150d91e110
commit 150d91e110
parent 92b51580dc
1 changed files with 56 additions and 11 deletions
--- a/Schrick-Noah_Homework-6.R
+++ b/Schrick-Noah_Homework-6.R
@ -6,33 +6,78 @@
 # 1. Penalized Regression and Classification
 ## a. Modified Ridge classification for LASSO penalties
-## Add cross-validation to tune penalty param
+### Add cross-validation to tune penalty param
-## Use npdro simulated data to test
+### Use npdro simulated data to test
-## Compare with Ridge
+### Compare with Ridge
-## Compare with Random Forest
+### Compare with Random Forest
 if (!require("randomForest")) install.packages("randomForest")
 library(randomForest)
 if (!require("ranger")) install.packages("ranger")
 library(ranger)
-## Compare with glmnet
+rf_comp <- function(train){
  rf<-randomForest(as.factor(train$class) ~ .,data=train, ntree=5000,
                   importance=T) 
  print(rf)  # error
  rf_imp<-data.frame(rf_score=importance(rf, type=1))
  #dplyr::arrange(rf_imp,-MeanDecreaseAccuracy) 
  dplyr::slice_max(rf_imp,order_by=MeanDecreaseAccuracy, n=20)
-### Alpha = 0
+  rf2<-ranger(as.factor(train$class) ~ ., data=train, num.trees=5000,
              importance="permutation") 
  print(rf2)  # error
  rf2_imp<-data.frame(rf_score=rf2$variable.importance)
  #dplyr::arrange(rf_imp,-MeanDecreaseAccuracy) 
  dplyr::slice_max(rf2_imp,order_by=rf_score, n=20)
-### Alpha = 1
+  #rftest <- predict(rf, newdata=test, type="class")
  #confusionMatrix(table(rftest,test$class))  
 }
 rf_comp(train)
 ### Compare with glmnet
 if (!require("glmnet")) install.packages("glmnet")
 library(glmnet)
 glm_fcn <- function(train.X, train.y, alpha_p){
  glmnet.class.model<-cv.glmnet(as.matrix(train.X), train.y, alpha=alpha_p, 
                                family="binomial", type.measure="class")
  glmnet.class.model$lambda.1se
  glmnet.class.model$lambda.min
  plot(glmnet.class.model)
  glmnet.class.coeffs<-predict(glmnet.class.model,type="coefficients")
  #glmnet.cc.coeffs  # maybe 3 is most important, Excess kurtosis
  model.class.terms <- colnames(train.X)  # glmnet includes an intercept but we are going to ignore
  #nonzero.glmnet.qtrait.coeffs <- model.qtrait.terms[glmnet.qtrait.coeffs@i[which(glmnet.qtrait.coeffs@i!=0)]] # skip intercept if there, 0-based counting
  glmnet.df <- data.frame(as.matrix(glmnet.class.coeffs))
  glmnet.df$abs_scores <- abs(glmnet.df$lambda.1se)
  dplyr::slice_max(glmnet.df,order_by=abs_scores,n=21)
 }
 #### Alpha = 0
 glm_fcn(train.X, train.y, 0)
 #### Alpha = 1
 glm_fcn(train.X, train.y, 1)
 ## b. Repeat comparison using a graph with clusters
 ## c. Use npdro and igraph to create knn
-## Plot network
+### Plot network
 ## d. Add Laplace graph penalty
-## Find resulting beta coeffs
+### Find resulting beta coeffs
-## Optimize or choose value for lambda2
+### Optimize or choose value for lambda2
-## Compare to a) and b)
+### Compare to a) and b)
 # 2. Gradient Descent
 ## Write fn with learning param