From f755ec2edc7c11963b5a047bbf67133104806d8a Mon Sep 17 00:00:00 2001 From: noah Date: Wed, 12 Apr 2023 20:54:33 -0500 Subject: [PATCH] GLMNet and Random Forest --- Schrick-Noah_Homework-6.R | 105 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 101 insertions(+), 4 deletions(-) diff --git a/Schrick-Noah_Homework-6.R b/Schrick-Noah_Homework-6.R index a6e2f12..06fb51d 100644 --- a/Schrick-Noah_Homework-6.R +++ b/Schrick-Noah_Homework-6.R @@ -5,7 +5,47 @@ # 1. Penalized Regression and Classification ## a. Modified Ridge classification for LASSO penalties -# gradient descent to optimize beta's +penalized_loss <- function(X, y, beta, lam, alpha=0){ + # y needs to be 0/1 + # beta: regression coefficients + # lam: penalty, lam=0 un-penalized logistic regression + # alpha = 0 ridge penalty, alpha = 1 lasso penalty + m <- nrow(X) + Xtilde <- as.matrix(cbind(intercept=rep(1,m), X)) + cnames <- colnames(Xtilde) + z <- Xtilde %*% beta # column vector + yhat <- 1/(1+exp(-z)) + yclass <- as.numeric(y) + # 1. logistic unpenalized loss + penal.loss <- sum(-yclass*log(yhat) - (1-yclass)*log(1-yhat))/m + + # 2. penalty, lam=0 removes penalty + lam*((1-alpha)*lam*sum(beta*beta)/2 + # ridge + alpha*sum(abs(beta))) # lasso + return(penal.loss) +} + +ridge_grad <- function(X, y, beta, lam){ + # y needs to be 0/1 + # also works for non-penalized logistic regression if lam=0 + m <- nrow(X) + p <- ncol(X) + Xtilde <- as.matrix(cbind(intercept=rep(1,m), X)) + cnames <- colnames(Xtilde) + z <- Xtilde %*% beta # column vector + yhat <- 1/(1+exp(-z)) + yclass <- as.numeric(y) + grad <- rep(0,p+1) + for (a in seq(1,p+1)){ + beta_a <- beta[a] # input beta from previous descent step + Loss.grad <- sum(-yclass*(1-yhat)*Xtilde[,a] + + (1-yclass)*yhat*Xtilde[,a]) + grad[a] <- Loss.grad + lam*beta_a + } # end for loop + grad <- grad/m + return(grad) +} + +### gradient descent to optimize beta's ridge_betas <- function(X,y,beta_init=NULL,lam, alpha=0, method="BFGS"){ if (is.null(beta_init)){beta_init <- rep(.1, ncol(X)+1)} # method: BFGS, CG, Nelder-Mead @@ -55,6 +95,30 @@ validation <- dataset$validation dataset$signal.names colnames(train) +# separate the class vector from the predictor data matrix +train.X <- train[, -which(colnames(train) == "class")] +train.y <- train[, "class"] +train.y.01 <- as.numeric(train.y)-1 + +lambda <- 0 +unpen_beta <- lasso_betas(train.X, train.y) +for(beta in unpen_beta$betas){ + if(abs(beta) <= lambda){ + beta <- 0 + } + else if (beta > lambda){ + beta <- beta-lambda + } + else{ + beta <- beta+lambda + } +} + +lasso.df <- data.frame(att=c("intercept", colnames(train.X)), + scores=unpen_beta$betas, + abs_scores=abs(unpen_beta$betas)) +dplyr::slice_max(lasso.df,order_by=abs_scores,n=20) + ### Compare with Ridge ### Compare with Random Forest @@ -67,16 +131,18 @@ rf_comp <- function(train){ rf<-randomForest(as.factor(train$class) ~ .,data=train, ntree=5000, importance=T) print(rf) # error - rf_imp<-data.frame(rf_score=importance(rf, type=1)) + detach("package:ranger", unload=TRUE) + rf_imp<-data.frame(rf_score=importance(rf, type=1)) # Cannot do if ranger is loaded #dplyr::arrange(rf_imp,-MeanDecreaseAccuracy) - dplyr::slice_max(rf_imp,order_by=MeanDecreaseAccuracy, n=20) + print(dplyr::slice_max(rf_imp,order_by=MeanDecreaseAccuracy, n=20)) + library(ranger) rf2<-ranger(as.factor(train$class) ~ ., data=train, num.trees=5000, importance="permutation") print(rf2) # error rf2_imp<-data.frame(rf_score=rf2$variable.importance) #dplyr::arrange(rf_imp,-MeanDecreaseAccuracy) - dplyr::slice_max(rf2_imp,order_by=rf_score, n=20) + print(dplyr::slice_max(rf2_imp,order_by=rf_score, n=20)) #rftest <- predict(rf, newdata=test, type="class") #confusionMatrix(table(rftest,test$class)) @@ -148,6 +214,37 @@ node_colors <- c(rep("red",npc), rep("green",npc), rep("blue",npc), rep("orange" g1 <- graph_from_adjacency_matrix(mat_clust, mode="undirected", diag=F) plot(g1, vertex.color=node_colors) +### Dataset with g1 +dataset.graph <- npdro::createSimulation2(num.samples=num.samples, + num.variables=num.variables, + pct.imbalance=0.5, + pct.signals=0.2, + main.bias=0.5, + interaction.bias=1, + hi.cor=0.95, + lo.cor=0.2, + mix.type="main-interactionScalefree", + label="class", + sim.type="mixed", + pct.mixed=0.5, + pct.train=0.5, + pct.holdout=0.5, + pct.validation=0, + plot.graph=F, + graph.structure = g1, + verbose=T) + +train.graph <- dataset.graph$train #150x101 +test.graph <- dataset.graph$holdout +validation.graph <- dataset.graph$validation +dataset.graph$signal.names +colnames(train.graph) + +# separate the class vector from the predictor data matrix +train.graph.X <- train.graph[, -which(colnames(train.graph) == "class")] +train.graph.y <- train.graph[, "class"] +train.graph.y.01 <- as.numeric(train.graph.y)-1 + ## c. Use npdro and igraph to create knn my.k <- 3 # larger k, fewer clusters npdr.nbpairs.idx <- npdro::nearestNeighbors(t(train.X),