diff --git a/Schrick-Noah_Homework-6.R b/Schrick-Noah_Homework-6.R index 026ab47..238382c 100644 --- a/Schrick-Noah_Homework-6.R +++ b/Schrick-Noah_Homework-6.R @@ -6,33 +6,78 @@ # 1. Penalized Regression and Classification ## a. Modified Ridge classification for LASSO penalties -## Add cross-validation to tune penalty param +### Add cross-validation to tune penalty param -## Use npdro simulated data to test +### Use npdro simulated data to test -## Compare with Ridge +### Compare with Ridge -## Compare with Random Forest +### Compare with Random Forest +if (!require("randomForest")) install.packages("randomForest") +library(randomForest) +if (!require("ranger")) install.packages("ranger") +library(ranger) -## Compare with glmnet +rf_comp <- function(train){ + rf<-randomForest(as.factor(train$class) ~ .,data=train, ntree=5000, + importance=T) + print(rf) # error + rf_imp<-data.frame(rf_score=importance(rf, type=1)) + #dplyr::arrange(rf_imp,-MeanDecreaseAccuracy) + dplyr::slice_max(rf_imp,order_by=MeanDecreaseAccuracy, n=20) + + rf2<-ranger(as.factor(train$class) ~ ., data=train, num.trees=5000, + importance="permutation") + print(rf2) # error + rf2_imp<-data.frame(rf_score=rf2$variable.importance) + #dplyr::arrange(rf_imp,-MeanDecreaseAccuracy) + dplyr::slice_max(rf2_imp,order_by=rf_score, n=20) + + #rftest <- predict(rf, newdata=test, type="class") + #confusionMatrix(table(rftest,test$class)) +} -### Alpha = 0 +rf_comp(train) -### Alpha = 1 +### Compare with glmnet +if (!require("glmnet")) install.packages("glmnet") +library(glmnet) + +glm_fcn <- function(train.X, train.y, alpha_p){ + glmnet.class.model<-cv.glmnet(as.matrix(train.X), train.y, alpha=alpha_p, + family="binomial", type.measure="class") + glmnet.class.model$lambda.1se + glmnet.class.model$lambda.min + plot(glmnet.class.model) + glmnet.class.coeffs<-predict(glmnet.class.model,type="coefficients") + #glmnet.cc.coeffs # maybe 3 is most important, Excess kurtosis + model.class.terms <- colnames(train.X) # glmnet includes an intercept but we are going to ignore + #nonzero.glmnet.qtrait.coeffs <- model.qtrait.terms[glmnet.qtrait.coeffs@i[which(glmnet.qtrait.coeffs@i!=0)]] # skip intercept if there, 0-based counting + + glmnet.df <- data.frame(as.matrix(glmnet.class.coeffs)) + glmnet.df$abs_scores <- abs(glmnet.df$lambda.1se) + dplyr::slice_max(glmnet.df,order_by=abs_scores,n=21) +} + +#### Alpha = 0 +glm_fcn(train.X, train.y, 0) + +#### Alpha = 1 +glm_fcn(train.X, train.y, 1) ## b. Repeat comparison using a graph with clusters ## c. Use npdro and igraph to create knn -## Plot network +### Plot network ## d. Add Laplace graph penalty -## Find resulting beta coeffs +### Find resulting beta coeffs -## Optimize or choose value for lambda2 +### Optimize or choose value for lambda2 -## Compare to a) and b) +### Compare to a) and b) # 2. Gradient Descent ## Write fn with learning param