# Project 6 for the University of Tulsa's CS-7863 Sci-Stat Course # Penalized Machine Learning # Professor: Dr. McKinney, Spring 2023 # Noah L. Schrick - 1492657 # 1. Penalized Regression and Classification ## a. Modified Ridge classification for LASSO penalties ### Add cross-validation to tune penalty param ### Use npdro simulated data to test ### Compare with Ridge ### Compare with Random Forest if (!require("randomForest")) install.packages("randomForest") library(randomForest) if (!require("ranger")) install.packages("ranger") library(ranger) rf_comp <- function(train){ rf<-randomForest(as.factor(train$class) ~ .,data=train, ntree=5000, importance=T) print(rf) # error rf_imp<-data.frame(rf_score=importance(rf, type=1)) #dplyr::arrange(rf_imp,-MeanDecreaseAccuracy) dplyr::slice_max(rf_imp,order_by=MeanDecreaseAccuracy, n=20) rf2<-ranger(as.factor(train$class) ~ ., data=train, num.trees=5000, importance="permutation") print(rf2) # error rf2_imp<-data.frame(rf_score=rf2$variable.importance) #dplyr::arrange(rf_imp,-MeanDecreaseAccuracy) dplyr::slice_max(rf2_imp,order_by=rf_score, n=20) #rftest <- predict(rf, newdata=test, type="class") #confusionMatrix(table(rftest,test$class)) } rf_comp(train) ### Compare with glmnet if (!require("glmnet")) install.packages("glmnet") library(glmnet) glm_fcn <- function(train.X, train.y, alpha_p){ glmnet.class.model<-cv.glmnet(as.matrix(train.X), train.y, alpha=alpha_p, family="binomial", type.measure="class") glmnet.class.model$lambda.1se glmnet.class.model$lambda.min plot(glmnet.class.model) glmnet.class.coeffs<-predict(glmnet.class.model,type="coefficients") #glmnet.cc.coeffs # maybe 3 is most important, Excess kurtosis model.class.terms <- colnames(train.X) # glmnet includes an intercept but we are going to ignore #nonzero.glmnet.qtrait.coeffs <- model.qtrait.terms[glmnet.qtrait.coeffs@i[which(glmnet.qtrait.coeffs@i!=0)]] # skip intercept if there, 0-based counting glmnet.df <- data.frame(as.matrix(glmnet.class.coeffs)) glmnet.df$abs_scores <- abs(glmnet.df$lambda.1se) dplyr::slice_max(glmnet.df,order_by=abs_scores,n=21) } #### Alpha = 0 glm_fcn(train.X, train.y, 0) #### Alpha = 1 glm_fcn(train.X, train.y, 1) ## b. Repeat comparison using a graph with clusters ## c. Use npdro and igraph to create knn ### Plot network ## d. Add Laplace graph penalty ### Find resulting beta coeffs ### Optimize or choose value for lambda2 ### Compare to a) and b) # 2. Gradient Descent ## Write fn with learning param ## Solve Rosenbrock function minimum ## Add momentum term