From 2e81ded14ff929fc041fdd2d9cd477a8450a739c Mon Sep 17 00:00:00 2001 From: noah Date: Thu, 13 Apr 2023 01:30:08 -0500 Subject: [PATCH] Adding loop for lasso convergence --- Schrick-Noah_Homework-6.R | 28 +++++++++++++++--- Schrick-Noah_Ridge-LASSO-Regression.R | 41 ++++++++++++++++++--------- 2 files changed, 52 insertions(+), 17 deletions(-) diff --git a/Schrick-Noah_Homework-6.R b/Schrick-Noah_Homework-6.R index 2421496..b5e1dd7 100644 --- a/Schrick-Noah_Homework-6.R +++ b/Schrick-Noah_Homework-6.R @@ -16,7 +16,7 @@ bundled_data <- create_data() run_comparison <- function(bundled_data){ ### LASSO - unpen_beta <- unpen_coeff(bundled_data$train.X, bundled_data$train.y) + unpen_beta <- lasso_coeff(bundled_data$train.X, bundled_data$train.y) lasso.df <- data.frame(att=c("intercept", colnames(bundled_data$train.X)), scores=unpen_beta$betas, abs_scores=abs(unpen_beta$betas)) @@ -101,11 +101,32 @@ run_comparison(bundled_data) ## b. Repeat comparison using a graph with clusters source("Schrick-Noah_graphs.R") bundled_graph_data <- sim_graph_data() + run_comparison(bundled_graph_data) ## c. Use npdro and igraph to create knn +### Bundled Graph Data my.k <- 3 # larger k, fewer clusters -npdr.nbpairs.idx <- npdro::nearestNeighbors(t(train.X), +npdr.nbpairs.idx.g <- npdro::nearestNeighbors(t(bundled_graph_data$train.X), + # transpose does dist between predictors + # without transpose does dist between samples + #nbd.method="multisurf", k=0, + nbd.method = "relieff", + nbd.metric="manhattan", + k=my.k) +knn.graph.g <- graph_from_edgelist(as.matrix(npdr.nbpairs.idx.g), + directed=F) +knn.graph.g <- simplify(knn.graph.g) + +### Plot network +plot.igraph(knn.graph.g,layout=layout_with_fr(knn.graph.g), + vertex.color="red", + vertex.size=3,vertex.label=NA, + main="Manhattan, knn-graph from simulated data + with erdos-renyi graph structure") + +### Bundled Data +npdr.nbpairs.idx <- npdro::nearestNeighbors(t(bundled_data$train.X), # transpose does dist between predictors # without transpose does dist between samples #nbd.method="multisurf", k=0, @@ -120,8 +141,7 @@ knn.graph <- simplify(knn.graph) plot.igraph(knn.graph,layout=layout_with_fr(knn.graph), vertex.color="red", vertex.size=3,vertex.label=NA, - main="Manhattan, knn-graph") - + main="Manhattan, knn-graph from simulated data") ## d. Add Laplace graph penalty diff --git a/Schrick-Noah_Ridge-LASSO-Regression.R b/Schrick-Noah_Ridge-LASSO-Regression.R index b1c7f37..2c1ba7b 100644 --- a/Schrick-Noah_Ridge-LASSO-Regression.R +++ b/Schrick-Noah_Ridge-LASSO-Regression.R @@ -50,24 +50,39 @@ ridge_betas <- function(X,y,beta_init=NULL,lam, alpha=0, method="BFGS"){ } # Regression coeffs for LASSO -lasso_betas <- function(X,y){ - ridge_betas(X,y,beta_init=NULL,lam=0,alpha=0,method="BFGS") +lasso_betas <- function(X,y,beta_init=NULL){ + ridge_betas(X,y,beta_init=beta_init,lam=0,alpha=1,method="BFGS") } # Adjust betas -unpen_coeff <- function(X, y, lambda=0){ - unpen_beta <- lasso_betas(X, y) - for(beta in unpen_beta$betas){ - if(abs(beta) <= lambda){ - beta <- 0 - } - else if (beta > lambda){ - beta <- beta-lambda - } - else{ - beta <- beta+lambda +lasso_coeff <- function(X, y, lambda=0, tol=1e-8){ + unpen_beta <- lasso_betas(X, y, beta_init=numeric(101)) + old_loss <- unpen_beta$loss + lasso_converged <- FALSE + loop_count <- 0 + while (!lasso_converged){ + beta_LS <- optim(unpen_beta$betas, # guess + fn=function(beta){penalized_loss(X, y, beta, lam=0, alpha=1)}, # objective + gr=function(beta){ridge_grad(X, y, beta, lam=0)}, # gradient + method = "BFGS") #, control= list(trace = 2)) + + for(i in 1:length(beta_LS$par)){ + if(abs(beta_LS$par[i]) <= lambda){ #lambda is 0, so alpha?){ + beta_LS$par[i] <- 0 + } + else if (beta_LS$par[i] > lambda){ + beta_LS$par[i] <- beta_LS$par[i]-lambda + } + else{ + beta_LS$par[i] <- beta_LS$par[i]+lambda + } } + unpen_beta <- lasso_betas(X,y,beta_init=beta_LS$par) + lasso_converged <- abs(unpen_beta$loss - old_loss) < tol + old_loss <- unpen_beta$loss + loop_count <- loop_count + 1 } + print(loop_count) return(unpen_beta) }