Plotting comparison of feature selection

This commit is contained in:
Noah L. Schrick 2023-04-12 23:40:58 -05:00
parent 93eb5d2f23
commit c8cac4a638

View File

@ -66,6 +66,36 @@ glm.df.1 <- data.frame(att=c("intercept", colnames(bundled_data$train.X)),
glm.df.1.res <- dplyr::slice_max(glm.df.1,order_by=abs_scores,n=20) glm.df.1.res <- dplyr::slice_max(glm.df.1,order_by=abs_scores,n=20)
glm.1.table <- as.data.table(glm.df.1.res) glm.1.table <- as.data.table(glm.df.1.res)
### Plot
#### Convert names to indices
lasso.df$att <- match(lasso.df$att,colnames(bundled_data$train))
ridge.df$att <- match(ridge.df$att,colnames(bundled_data$train))
rf.df$att <- match(rf.df$att,colnames(bundled_data$train))
glm.df.0$att <- match(glm.df.0$att,colnames(bundled_data$train))
glm.df.1$att <- match(glm.df.1$att,colnames(bundled_data$train))
#### Scale
lasso.df$abs_scores <- scale(lasso.df$abs_scores)
ridge.df$abs_scores <- scale(ridge.df$abs_scores)
rf.df$scores <- scale(rf.df$scores)
glm.df.0$abs_scores <- scale(glm.df.0$abs_scores)
glm.df.1$abs_scores <- scale(glm.df.1$abs_scores)
plot(x=lasso.df$att, y=lasso.df$abs_scores, type="l", xlab="Vars",
ylab="Coefficients (Abs Scores)", xaxt="n", col="blue", ylim=c(-1,3),
main="Scaled scores for simulated data feature selection")
axis(1, at=1:101, labels=colnames(bundled_data$train), cex.axis=0.5)
lines(x=ridge.df$att, y=ridge.df$abs_scores, col="red")
lines(x=rf.df$att, y=rf.df$scores, col="green")
lines(x=glm.df.0$att, y=glm.df.0$abs_scores, col="bisque4")
lines(x=glm.df.1$att, y=glm.df.1$abs_scores, col="purple")
legend(x="topleft",
legend=c("LASSO", "Ridge", "Random Forest","glmnet (alpha=0)", "glmnet (alpha=1)"),
lty=c(1,1,1,1,1),
col=c("blue", "red", "green", "bisque4", "purple"),
cex=1)
## b. Repeat comparison using a graph with clusters ## b. Repeat comparison using a graph with clusters
if (!require("igraph")) install.packages("igraph") if (!require("igraph")) install.packages("igraph")
library(igraph) library(igraph)