q() source("self_newman_mod.R") getwd()_ getwd() setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) setwd(getSrcDirectory()[1]) this.dir <- dirname(parent.frame(2)$ofile) setwd(getSrcDirectory()[1]) setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) source("self_newman_mod.R") # Homework 3 for the University of Tulsa' s CS-7863 Network Theory Course # Network Clustering # Professor: Dr. McKinney, Spring 2022 # Noah Schrick - 1492657 # Imports library(igraph) library(igraphdata) library(WGCNA) # Set working directory to this source file location (Requires RStudio) setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) source("self_newman_mod.R") data(karate) data(yeast) g1 <- karate g1.netname <- "Karate" g2 <- yeast g2.netname <- "Yeast" ##################### Part 1: Laplace Spectral Clustering ##################### g1.adj <- get.adjacency(g1) # get adjacency g2.adj <- get.adjacency(g2) g1.deg <- rowSums(as.matrix(g1.adj)) # get degrees g2.deg <- rowSums(as.matrix(g2.adj)) g1.Lap <- diag(g1.deg) - g1.adj # L = D-A g2.Lap <- diag(g2.deg) - g2.adj n1 <- length(V(g1)) # number of nodes n2 <- length(V(g2)) # get eigvals and vecs x1 <- eigen(g1.Lap)$vectors[,n1-1] x2 <- eigen(g2.Lap)$vectors[,n2-1] x1_val <- eigen(g1.Lap)$values[n1-1] x2_val <- eigen(g2.Lap)$values[n2-1] names(x1) <- names(V(g1)) names(x2) <- names(V(g2)) x1 x2 x1_clusters <- ifelse(x1>0,1,-1) x2_clusters <- ifelse(x2>0,1,-1) # Plotting V(g1)$color <- ifelse(x1_clusters>0,"green","yellow") #V(g1)$size <- 80*abs(x1) V(g2)$color <- ifelse(x2_clusters>0,"green","yellow") V(g2)$size <- 10 plot(g1, main=paste(g1.netname, " Laplace Spectral Clustering")) plot(g2, main=paste(g2.netname, " Laplace Spectral Clustering"), vertex.label=NA) ########################## Part 2: Newman Modularity ########################## list(Q=Q,max.lam=max.lam,weights=weights,clusters=clusters) <- newman_mod(g1) list(Q=Q,max.lam=max.lam,weights=weights,clusters=clusters) <- newman_mod(g2) ##################### Part 3: Recursive Newman Modularity ##################### # Using igraph karate.modularity <- fastgreedy.community(karate,merges=TRUE, modularity=TRUE, membership=TRUE) #memberships <-community.to.membership(karate, karate.modularity$merges, # steps=which.max(fgreedy$modularity)-1) karate.modularity$membership karate.modularity$merges membership.ids <- unique(karate.modularity$membership) membership.ids cat(paste('Number of detected communities =',length(membership.ids))) cat("community sizes: ") sapply(membership.ids,function(x) {sum(x==karate.modularity$membership)}) cat("modularity: ") max(karate.modularity$modularity) #karate.modularity$modularity V(karate)$color[karate.modularity$membership==1] <- "green" V(karate)$color[karate.modularity$membership==2] <- "red" V(karate)$color[karate.modularity$membership==3] <- "blue" plot(karate,vertex.size=10, vertex.label=V(karate)$label,vertex.color=V(karate)$color, main=paste("Karate Recursive Newman Modularity")) ###################### Part 4: TOM and Dynamic Tree Cut ###################### # Next smallest eig y1 <- eigen(g1.Lap)$vectors[,n1-2] # get n-2 column names(y1) <- names(V(g1)) y1_val <- eigen(g1.Lap)$values[n1-2] y2 <- eigen(g2.Lap)$vectors[,n2-2] # get n-2 column names(y2) <- names(V(g2)) y2_val <- eigen(g2.Lap)$values[n2-2] # Distance in the 2D Laplacian Eig-Space lap_dist1 <- dist(cbind(x1,y1)) lap_dist2 <- dist(cbind(x2,y2)) # Use hierarchical clustering of distance matrix lap_tree1 <- hclust(lap_dist1) lap_tree2 <- hclust(lap_dist2) # Use WGCNA to dynamically decide how many clusters cutreeDynamicTree(lap_tree1) cutreeDynamicTree(lap_tree2) # TOM g1.TOM <- TOMsimilarity(as.matrix(g1.adj)) g2.TOM <- TOMsimilarity(as.matrix(g2.adj)) g1.TOM g2.TOM TOMplot(g1.TOM, lap_tree1, main=paste(g1.netname, " Heatmap Plot")) TOMplot(g2.TOM, lap_tree2, main=paste(g2.netname, " Heatmap Plot")) # dynamic tree cut minModuleSize = 2 # degree = 1 dynamicMods1 = cutreeDynamic(dendro = lap_tree1, distM = lap_dist1, deepSplit = 2, pamRespectsDendro = FALSE, minClusterSize = minModuleSize) init_mod_sizes <- table(dynamicMods) # you will have to work through finding and installing labels2colors. dynamicColors = labels2colors(dynamicMods1) table(dynamicColors) plotDendroAndColors(lap_tree1, dynamicColors, "Dynamic Tree Cut", dendroLabels = FALSE, hang = 0.03, addGuide = TRUE, guideHang = 0.05, main = paste(g1.netname, "dendogram and dynamic cut modules, degree = 1")) # degree = 1 dynamicMods2 = cutreeDynamic(dendro = lap_tree2, distM = lap_dist2, deepSplit = 2, pamRespectsDendro = FALSE, minClusterSize = minModuleSize) init_mod_sizes <- table(dynamicMods) # you will have to work through finding and installing labels2colors. dynamicColors = labels2colors(dynamicMods2) table(dynamicColors) plotDendroAndColors(lap_tree2, dynamicColors, "Dynamic Tree Cut", dendroLabels = FALSE, hang = 0.03, addGuide = TRUE, guideHang = 0.05, main = paste(g2.netname, "dendogram and dynamic cut modules, degree = 1")) ################################ Part 5: UMAP ################################ g1.dist2 <- 1- as.matrix(g1.adj) g2.dist2 <- 1- as.matrix(g2.adj) # first cluster with hierarchical clustering g1.tree2 <- hclust(as.dist(g1.dist2), method = "average") g2.tree2 <- hclust(as.dist(g2.dist2), method = "average") g1.tree2.2clust <- cutree(g1.tree2,k=2) # predicted 2-clusters g2.tree2.2clust <- cutree(g2.tree2,k=2) # predicted 2-clusters #g1.tree2$labels <- V(g1)[Faction] # true clusters #g2.tree2$labels <- V(g2)[Faction] # true clusters g1.tree2$labels <- g1.tree2.2clust g2.tree2$labels <- g2.tree2.2clust # if you want to label tree with predicted clusters plot(g1.tree2, main = paste(g1.netname, " HClust")) plot(g2.tree2, main = paste(g2.netname, " HClust")) # Using igraph g2.modularity <- fastgreedy.community(g2,merges=TRUE, modularity=TRUE, membership=TRUE) #memberships <-community.to.membership(karate, karate.modularity$merges, # steps=which.max(fgreedy$modularity)-1) g2.modularity$membership g2.modularity$merges g2.membership.ids <- unique(g1.modularity$membership) g2.membership.ids cat(paste('Number of detected communities =',length(g2.membership.ids))) cat("community sizes: ") sapply(membership.ids,function(x) {sum(x==g1.modularity$membership)}) cat("modularity: ") max(g2.modularity$modularity) #karate.modularity$modularity # Using igraph g2.modularity <- fastgreedy.community(g2,merges=TRUE, modularity=TRUE, membership=TRUE) #memberships <-community.to.membership(karate, karate.modularity$merges, # steps=which.max(fgreedy$modularity)-1) g2.modularity$membership g2.modularity$merges g2.membership.ids <- unique(g2.modularity$membership) g2.membership.ids cat(paste('Number of detected communities =',length(g2.membership.ids))) cat("community sizes: ") sapply(membership.ids,function(x) {sum(x==g1.modularity$membership)}) cat("modularity: ") max(g2.modularity$modularity) #karate.modularity$modularity # Using igraph g2.modularity <- fastgreedy.community(g2,merges=TRUE, modularity=TRUE, membership=TRUE) #memberships <-community.to.membership(karate, karate.modularity$merges, # steps=which.max(fgreedy$modularity)-1) g2.modularity$membership g2.modularity$merges g2.membership.ids <- unique(g2.modularity$membership) cat(paste('Number of detected communities =',length(g2.membership.ids))) V(g2)$color=g2.modularity$membership plot(g2,vertex.size=10, vertex.label=V(g2)$label,vertex.color=V(g2)$color, main=paste(g2.netname, " Recursive Newman Modularity")) plot(g2,vertex.size=10, vertex.label=V(g2)$label,vertex.color=V(g2)$color, main=paste(g2.netname, " Recursive Newman Modularity")) tmp <- simplify(g2) g2.modularity <- fastgreedy.community(g2,merges=TRUE, modularity=TRUE, membership=TRUE) #memberships <-community.to.membership(karate, karate.modularity$merges, # steps=which.max(fgreedy$modularity)-1) g2.modularity$membership g2.modularity$merges g2.membership.ids <- unique(g2.modularity$membership) g2.membership.ids cat(paste('Number of detected communities =',length(g2.membership.ids))) cat("community sizes: ") sapply(membership.ids,function(x) {sum(x==g1.modularity$membership)}) cat("modularity: ") max(g2.modularity$modularity) #karate.modularity$modularity V(g2)$color=g2.modularity$membership plot(g2,vertex.size=10, vertex.label=V(g2)$label,vertex.color=V(g2)$color, main=paste(g2.netname, " Recursive Newman Modularity")) plot(g2,vertex.size=10, vertex.label=NA,vertex.color=V(g2)$color, main=paste(g2.netname, " Recursive Newman Modularity")) plot(g2,vertex.size=10, vertex.label=NA,vertex.color=V(g2)$color, main=paste(g2.netname, " Recursive Newman Modularity")) g2 <- simplify(g2) # Using igraph g2.modularity <- fastgreedy.community(g2,merges=TRUE, modularity=TRUE, membership=TRUE) #memberships <-community.to.membership(karate, karate.modularity$merges, # steps=which.max(fgreedy$modularity)-1) g2.modularity$membership g2.modularity$merges g2.membership.ids <- unique(g2.modularity$membership) g2.membership.ids cat(paste('Number of detected communities =',length(g2.membership.ids))) cat("community sizes: ") sapply(membership.ids,function(x) {sum(x==g1.modularity$membership)}) cat("modularity: ") max(g2.modularity$modularity) #karate.modularity$modularity V(g2)$color=g2.modularity$membership plot(g2,vertex.size=10, vertex.label=NA,vertex.color=V(g2)$color, main=paste(g2.netname, " Recursive Newman Modularity")) # dynamic tree cut minModuleSize = 2 # degree = 1 dynamicMods1 = cutreeDynamic(dendro = lap_tree1, distM = lap_dist1, deepSplit = 2, pamRespectsDendro = FALSE, minClusterSize = minModuleSize) init_mod_sizes <- table(dynamicMods) # you will have to work through finding and installing labels2colors. dynamicColors = labels2colors(dynamicMods1) table(dynamicColors) plotDendroAndColors(lap_tree1, dynamicColors, "Dynamic Tree Cut", dendroLabels = FALSE, hang = 0.03, addGuide = TRUE, guideHang = 0.05, main = paste(g1.netname, "dendogram and dynamic cut modules, degree = 1")) install.packages("labels2colors") # dynamic tree cut minModuleSize = 2 # degree = 1 dynamicMods1 = cutreeDynamic(dendro = lap_tree1, distM = lap_dist1, deepSplit = 2, pamRespectsDendro = FALSE, minClusterSize = minModuleSize) g1.mat <- as.matrix(get.adjacency(g1)) g2.mat <- as.matrix(get.adjacency(g2)) g1.dist <- GTOMdist(g1.mat, degree = 1) g2.dist <- GTOMdist(g2.mat, degree = 1) # dynamic tree cut minModuleSize = 2 # degree = 1 dynamicMods1 = cutreeDynamic(dendro = lap_tree1, distM = g1.dist, deepSplit = 2, pamRespectsDendro = FALSE, minClusterSize = minModuleSize) init_mod_sizes <- table(dynamicMods) init_mod_sizes <- table(dynamicMods1) # you will have to work through finding and installing labels2colors. dynamicColors = labels2colors(dynamicMods1) table(dynamicColors) plotDendroAndColors(lap_tree1, dynamicColors, "Dynamic Tree Cut", dendroLabels = FALSE, hang = 0.03, addGuide = TRUE, guideHang = 0.05, main = paste(g1.netname, "dendogram and dynamic cut modules, degree = 1")) # degree = 1 dynamicMods2 = cutreeDynamic(dendro = lap_tree2, distM = g2.dist, deepSplit = 2, pamRespectsDendro = FALSE, minClusterSize = minModuleSize) init_mod_sizes <- table(dynamicMods2) # you will have to work through finding and installing labels2colors. dynamicColors = labels2colors(dynamicMods2) table(dynamicColors) plotDendroAndColors(lap_tree2, dynamicColors, "Dynamic Tree Cut", dendroLabels = FALSE, hang = 0.03, addGuide = TRUE, guideHang = 0.05, main = paste(g2.netname, "dendogram and dynamic cut modules, degree = 1")) ############################### Part 5: UMAP ################################ g1.dist2 <- 1- as.matrix(g1.adj) g2.dist2 <- 1- as.matrix(g2.adj) g1.tree2 <- hclust(as.dist(g1.dist2), method = "average") g2.tree2 <- hclust(as.dist(g2.dist2), method = "average") g1.tree2.2clust <- cutree(g1.tree2,k=2) # predicted 2-clusters g2.tree2.2clust <- cutree(g2.tree2,k=2) # predicted 2-clusters #g1.tree2$labels <- V(g1)[Faction] # true clusters #g2.tree2$labels <- V(g2)[Faction] # true clusters g1.tree2$labels <- g1.tree2.2clust g2.tree2$labels <- g2.tree2.2clust # if you want to label tree with predicted clusters plot(g1.tree2, main = paste(g1.netname, " HClust")) plot(g2.tree2, main = paste(g2.netname, " HClust")) plot(g2.tree2, main = paste(g2.netname, " HClust")) g <- g1 A <- get.adjacency(g) # adj m <- ecount(g) n <- vcount(g) if (is.null(weights)){ weights <- rep(1,n) } # Obtain the modularity matrix B.node.i <- function(i){degree(g)[i]*degree(g)} B.node.all <- sapply(1:n, B.node.i) B <- A - (B.node.all/(2*m)) # NOTE: This is identical to: modularity_matrix(g) ! Can verify with: # modularity_matrix(g) == B B.eigs <- eigen(B) max.lam <- B.eigs$values[1] s <- ifelse(B.eigs$vectors[,1]>0,1,-1) weights <- B.eigs$vectors[n]/B.eigs$vectors[,1] # Plotting #V(g)$color <- ifelse(B[1,]>0,"green","yellow") V(g)$color <- ifelse(B.eigs$vectors[,1]>0,"green","yellow") V(g)$size <- 10 plot(g, main=paste(g.netname, " Newman Modularity")) clust1 = list() clust2 = list() clusters = list() # Make list of clusters for(i in 1:n){ ifelse(V(g)[i]$color=="green", clust1 <- append(clust1, V(g)[i]$name), clust2 <- append(clust2, V(g)[i]$name))} clusters <- list(clust1, clust2) Q.node.i <- function(i){sum( (((B.eigs$vectors[i])*weights[i]*s)^2)*B.eigs[i]$values)} Q <- (1/(4*m))*sapply(1:n, Q.node.i) plot(g, main=paste(g.netname, " Newman Modularity")) g g.netname g.netname <- "Karate" plot(g, main=paste(g.netname, " Newman Modularity")) g <- g2 A <- get.adjacency(g) # adj m <- ecount(g) n <- vcount(g) if (is.null(weights)){ weights <- rep(1,n) } # Obtain the modularity matrix B.node.i <- function(i){degree(g)[i]*degree(g)} B.node.all <- sapply(1:n, B.node.i) B <- A - (B.node.all/(2*m)) # NOTE: This is identical to: modularity_matrix(g) ! Can verify with: # modularity_matrix(g) == B B.eigs <- eigen(B) max.lam <- B.eigs$values[1] s <- ifelse(B.eigs$vectors[,1]>0,1,-1) weights <- B.eigs$vectors[n]/B.eigs$vectors[,1] # Plotting #V(g)$color <- ifelse(B[1,]>0,"green","yellow") V(g)$color <- ifelse(B.eigs$vectors[,1]>0,"green","yellow") V(g)$size <- 10 plot(g, main=paste(g.netname, " Newman Modularity")) clust1 = list() clust2 = list() clusters = list() # Make list of clusters for(i in 1:n){ ifelse(V(g)[i]$color=="green", clust1 <- append(clust1, V(g)[i]$name), clust2 <- append(clust2, V(g)[i]$name))} clusters <- list(clust1, clust2) Q.node.i <- function(i){sum( (((B.eigs$vectors[i])*weights[i]*s)^2)*B.eigs[i]$values)} Q <- (1/(4*m))*sapply(1:n, Q.node.i) plot(g, vertex.label=NA, main=paste(g.netname, " Newman Modularity")) plot(g, vertex.label=NA, main=paste(g.netname, " Newman Modularity")) g.netname <- Yeast g.netname <- "Yeast" g <- g2 A <- get.adjacency(g) # adj m <- ecount(g) n <- vcount(g) if (is.null(weights)){ weights <- rep(1,n) } # Obtain the modularity matrix B.node.i <- function(i){degree(g)[i]*degree(g)} B.node.all <- sapply(1:n, B.node.i) B <- A - (B.node.all/(2*m)) # NOTE: This is identical to: modularity_matrix(g) ! Can verify with: # modularity_matrix(g) == B B.eigs <- eigen(B) max.lam <- B.eigs$values[1] s <- ifelse(B.eigs$vectors[,1]>0,1,-1) weights <- B.eigs$vectors[n]/B.eigs$vectors[,1] # Plotting #V(g)$color <- ifelse(B[1,]>0,"green","yellow") V(g)$color <- ifelse(B.eigs$vectors[,1]>0,"green","yellow") V(g)$size <- 10 plot(g, vertex.label=NA, main=paste(g.netname, " Newman Modularity")) g1 g1[3] normalizedMI(x1_clusters,x2_clusters) gc() source("mutual_information.R") normalizedMI(x1_clusters,x2_clusters) x1_clusters ##################### Part 1: Laplace Spectral Clustering ##################### g1.adj <- get.adjacency(g1) # get adjacency g1.deg <- rowSums(as.matrix(g1.adj)) # get degrees g1.Lap <- diag(g1.deg) - g1.adj # L = D-A n1 <- length(V(g1)) # number of nodes # get eigvals and vecs x1 <- eigen(g1.Lap)$vectors[,n1-1] x1_val <- eigen(g1.Lap)$values[n1-1] names(x1) <- names(V(g1)) x1 x1_clusters <- ifelse(x1>0,1,-1) normalizedMI(x1_clusters,x1_clusters) x1_clusters normalizedMI(x1_clusters) vcount(g2) cat(paste('Number of detected communities =',length(g2.membership.ids))) cat("community sizes: ") sapply(membership.ids,function(x) {sum(x==g1.modularity$membership)}) sapply(membership.ids,function(x) {sum(x==g2.modularity$membership)}) # Using igraph g2.modularity <- fastgreedy.community(g2,merges=TRUE, modularity=TRUE, membership=TRUE) #memberships <-community.to.membership(karate, karate.modularity$merges, # steps=which.max(fgreedy$modularity)-1) g2.modularity$membership g2.modularity$merges g2.membership.ids <- unique(g2.modularity$membership) g2.membership.ids cat(paste('Number of detected communities =',length(g2.membership.ids))) cat("community sizes: ") sapply(membership.ids,function(x) {sum(x==g2.modularity$membership)}) cat("modularity: ") max(g2.modularity$modularity)