2022-03-24 03:04:51 -05:00

425 lines
16 KiB
R

q()
source("self_newman_mod.R")
getwd()_
getwd()
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
setwd(getSrcDirectory()[1])
this.dir <- dirname(parent.frame(2)$ofile)
setwd(getSrcDirectory()[1])
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
source("self_newman_mod.R")
# Homework 3 for the University of Tulsa' s CS-7863 Network Theory Course
# Network Clustering
# Professor: Dr. McKinney, Spring 2022
# Noah Schrick - 1492657
# Imports
library(igraph)
library(igraphdata)
library(WGCNA)
# Set working directory to this source file location (Requires RStudio)
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
source("self_newman_mod.R")
data(karate)
data(yeast)
g1 <- karate
g1.netname <- "Karate"
g2 <- yeast
g2.netname <- "Yeast"
##################### Part 1: Laplace Spectral Clustering #####################
g1.adj <- get.adjacency(g1) # get adjacency
g2.adj <- get.adjacency(g2)
g1.deg <- rowSums(as.matrix(g1.adj)) # get degrees
g2.deg <- rowSums(as.matrix(g2.adj))
g1.Lap <- diag(g1.deg) - g1.adj # L = D-A
g2.Lap <- diag(g2.deg) - g2.adj
n1 <- length(V(g1)) # number of nodes
n2 <- length(V(g2))
# get eigvals and vecs
x1 <- eigen(g1.Lap)$vectors[,n1-1]
x2 <- eigen(g2.Lap)$vectors[,n2-1]
x1_val <- eigen(g1.Lap)$values[n1-1]
x2_val <- eigen(g2.Lap)$values[n2-1]
names(x1) <- names(V(g1))
names(x2) <- names(V(g2))
x1
x2
x1_clusters <- ifelse(x1>0,1,-1)
x2_clusters <- ifelse(x2>0,1,-1)
# Plotting
V(g1)$color <- ifelse(x1_clusters>0,"green","yellow")
#V(g1)$size <- 80*abs(x1)
V(g2)$color <- ifelse(x2_clusters>0,"green","yellow")
V(g2)$size <- 10
plot(g1, main=paste(g1.netname, " Laplace Spectral Clustering"))
plot(g2, main=paste(g2.netname, " Laplace Spectral Clustering"),
vertex.label=NA)
########################## Part 2: Newman Modularity ##########################
list(Q=Q,max.lam=max.lam,weights=weights,clusters=clusters) <- newman_mod(g1)
list(Q=Q,max.lam=max.lam,weights=weights,clusters=clusters) <- newman_mod(g2)
##################### Part 3: Recursive Newman Modularity #####################
# Using igraph
karate.modularity <- fastgreedy.community(karate,merges=TRUE, modularity=TRUE, membership=TRUE)
#memberships <-community.to.membership(karate, karate.modularity$merges,
# steps=which.max(fgreedy$modularity)-1)
karate.modularity$membership
karate.modularity$merges
membership.ids <- unique(karate.modularity$membership)
membership.ids
cat(paste('Number of detected communities =',length(membership.ids)))
cat("community sizes: ")
sapply(membership.ids,function(x) {sum(x==karate.modularity$membership)})
cat("modularity: ")
max(karate.modularity$modularity)
#karate.modularity$modularity
V(karate)$color[karate.modularity$membership==1] <- "green"
V(karate)$color[karate.modularity$membership==2] <- "red"
V(karate)$color[karate.modularity$membership==3] <- "blue"
plot(karate,vertex.size=10,
vertex.label=V(karate)$label,vertex.color=V(karate)$color,
main=paste("Karate Recursive Newman Modularity"))
###################### Part 4: TOM and Dynamic Tree Cut ######################
# Next smallest eig
y1 <- eigen(g1.Lap)$vectors[,n1-2] # get n-2 column
names(y1) <- names(V(g1))
y1_val <- eigen(g1.Lap)$values[n1-2]
y2 <- eigen(g2.Lap)$vectors[,n2-2] # get n-2 column
names(y2) <- names(V(g2))
y2_val <- eigen(g2.Lap)$values[n2-2]
# Distance in the 2D Laplacian Eig-Space
lap_dist1 <- dist(cbind(x1,y1))
lap_dist2 <- dist(cbind(x2,y2))
# Use hierarchical clustering of distance matrix
lap_tree1 <- hclust(lap_dist1)
lap_tree2 <- hclust(lap_dist2)
# Use WGCNA to dynamically decide how many clusters
cutreeDynamicTree(lap_tree1)
cutreeDynamicTree(lap_tree2)
# TOM
g1.TOM <- TOMsimilarity(as.matrix(g1.adj))
g2.TOM <- TOMsimilarity(as.matrix(g2.adj))
g1.TOM
g2.TOM
TOMplot(g1.TOM, lap_tree1, main=paste(g1.netname, " Heatmap Plot"))
TOMplot(g2.TOM, lap_tree2, main=paste(g2.netname, " Heatmap Plot"))
# dynamic tree cut
minModuleSize = 2
# degree = 1
dynamicMods1 = cutreeDynamic(dendro = lap_tree1, distM = lap_dist1, deepSplit = 2,
pamRespectsDendro = FALSE, minClusterSize = minModuleSize)
init_mod_sizes <- table(dynamicMods)
# you will have to work through finding and installing labels2colors.
dynamicColors = labels2colors(dynamicMods1)
table(dynamicColors)
plotDendroAndColors(lap_tree1, dynamicColors, "Dynamic Tree Cut", dendroLabels = FALSE,
hang = 0.03, addGuide = TRUE, guideHang = 0.05,
main = paste(g1.netname, "dendogram and dynamic cut modules, degree = 1"))
# degree = 1
dynamicMods2 = cutreeDynamic(dendro = lap_tree2, distM = lap_dist2, deepSplit = 2,
pamRespectsDendro = FALSE, minClusterSize = minModuleSize)
init_mod_sizes <- table(dynamicMods)
# you will have to work through finding and installing labels2colors.
dynamicColors = labels2colors(dynamicMods2)
table(dynamicColors)
plotDendroAndColors(lap_tree2, dynamicColors, "Dynamic Tree Cut", dendroLabels = FALSE,
hang = 0.03, addGuide = TRUE, guideHang = 0.05,
main = paste(g2.netname, "dendogram and dynamic cut modules, degree = 1"))
################################ Part 5: UMAP ################################
g1.dist2 <- 1- as.matrix(g1.adj)
g2.dist2 <- 1- as.matrix(g2.adj)
# first cluster with hierarchical clustering
g1.tree2 <- hclust(as.dist(g1.dist2), method = "average")
g2.tree2 <- hclust(as.dist(g2.dist2), method = "average")
g1.tree2.2clust <- cutree(g1.tree2,k=2) # predicted 2-clusters
g2.tree2.2clust <- cutree(g2.tree2,k=2) # predicted 2-clusters
#g1.tree2$labels <- V(g1)[Faction] # true clusters
#g2.tree2$labels <- V(g2)[Faction] # true clusters
g1.tree2$labels <- g1.tree2.2clust
g2.tree2$labels <- g2.tree2.2clust
# if you want to label tree with predicted clusters
plot(g1.tree2, main = paste(g1.netname, " HClust"))
plot(g2.tree2, main = paste(g2.netname, " HClust"))
# Using igraph
g2.modularity <- fastgreedy.community(g2,merges=TRUE, modularity=TRUE, membership=TRUE)
#memberships <-community.to.membership(karate, karate.modularity$merges,
# steps=which.max(fgreedy$modularity)-1)
g2.modularity$membership
g2.modularity$merges
g2.membership.ids <- unique(g1.modularity$membership)
g2.membership.ids
cat(paste('Number of detected communities =',length(g2.membership.ids)))
cat("community sizes: ")
sapply(membership.ids,function(x) {sum(x==g1.modularity$membership)})
cat("modularity: ")
max(g2.modularity$modularity)
#karate.modularity$modularity
# Using igraph
g2.modularity <- fastgreedy.community(g2,merges=TRUE, modularity=TRUE, membership=TRUE)
#memberships <-community.to.membership(karate, karate.modularity$merges,
# steps=which.max(fgreedy$modularity)-1)
g2.modularity$membership
g2.modularity$merges
g2.membership.ids <- unique(g2.modularity$membership)
g2.membership.ids
cat(paste('Number of detected communities =',length(g2.membership.ids)))
cat("community sizes: ")
sapply(membership.ids,function(x) {sum(x==g1.modularity$membership)})
cat("modularity: ")
max(g2.modularity$modularity)
#karate.modularity$modularity
# Using igraph
g2.modularity <- fastgreedy.community(g2,merges=TRUE, modularity=TRUE, membership=TRUE)
#memberships <-community.to.membership(karate, karate.modularity$merges,
# steps=which.max(fgreedy$modularity)-1)
g2.modularity$membership
g2.modularity$merges
g2.membership.ids <- unique(g2.modularity$membership)
cat(paste('Number of detected communities =',length(g2.membership.ids)))
V(g2)$color=g2.modularity$membership
plot(g2,vertex.size=10,
vertex.label=V(g2)$label,vertex.color=V(g2)$color,
main=paste(g2.netname, " Recursive Newman Modularity"))
plot(g2,vertex.size=10,
vertex.label=V(g2)$label,vertex.color=V(g2)$color,
main=paste(g2.netname, " Recursive Newman Modularity"))
tmp <- simplify(g2)
g2.modularity <- fastgreedy.community(g2,merges=TRUE, modularity=TRUE, membership=TRUE)
#memberships <-community.to.membership(karate, karate.modularity$merges,
# steps=which.max(fgreedy$modularity)-1)
g2.modularity$membership
g2.modularity$merges
g2.membership.ids <- unique(g2.modularity$membership)
g2.membership.ids
cat(paste('Number of detected communities =',length(g2.membership.ids)))
cat("community sizes: ")
sapply(membership.ids,function(x) {sum(x==g1.modularity$membership)})
cat("modularity: ")
max(g2.modularity$modularity)
#karate.modularity$modularity
V(g2)$color=g2.modularity$membership
plot(g2,vertex.size=10,
vertex.label=V(g2)$label,vertex.color=V(g2)$color,
main=paste(g2.netname, " Recursive Newman Modularity"))
plot(g2,vertex.size=10,
vertex.label=NA,vertex.color=V(g2)$color,
main=paste(g2.netname, " Recursive Newman Modularity"))
plot(g2,vertex.size=10,
vertex.label=NA,vertex.color=V(g2)$color,
main=paste(g2.netname, " Recursive Newman Modularity"))
g2 <- simplify(g2)
# Using igraph
g2.modularity <- fastgreedy.community(g2,merges=TRUE, modularity=TRUE, membership=TRUE)
#memberships <-community.to.membership(karate, karate.modularity$merges,
# steps=which.max(fgreedy$modularity)-1)
g2.modularity$membership
g2.modularity$merges
g2.membership.ids <- unique(g2.modularity$membership)
g2.membership.ids
cat(paste('Number of detected communities =',length(g2.membership.ids)))
cat("community sizes: ")
sapply(membership.ids,function(x) {sum(x==g1.modularity$membership)})
cat("modularity: ")
max(g2.modularity$modularity)
#karate.modularity$modularity
V(g2)$color=g2.modularity$membership
plot(g2,vertex.size=10,
vertex.label=NA,vertex.color=V(g2)$color,
main=paste(g2.netname, " Recursive Newman Modularity"))
# dynamic tree cut
minModuleSize = 2
# degree = 1
dynamicMods1 = cutreeDynamic(dendro = lap_tree1, distM = lap_dist1, deepSplit = 2,
pamRespectsDendro = FALSE, minClusterSize = minModuleSize)
init_mod_sizes <- table(dynamicMods)
# you will have to work through finding and installing labels2colors.
dynamicColors = labels2colors(dynamicMods1)
table(dynamicColors)
plotDendroAndColors(lap_tree1, dynamicColors, "Dynamic Tree Cut", dendroLabels = FALSE,
hang = 0.03, addGuide = TRUE, guideHang = 0.05,
main = paste(g1.netname, "dendogram and dynamic cut modules, degree = 1"))
install.packages("labels2colors")
# dynamic tree cut
minModuleSize = 2
# degree = 1
dynamicMods1 = cutreeDynamic(dendro = lap_tree1, distM = lap_dist1, deepSplit = 2,
pamRespectsDendro = FALSE, minClusterSize = minModuleSize)
g1.mat <- as.matrix(get.adjacency(g1))
g2.mat <- as.matrix(get.adjacency(g2))
g1.dist <- GTOMdist(g1.mat, degree = 1)
g2.dist <- GTOMdist(g2.mat, degree = 1)
# dynamic tree cut
minModuleSize = 2
# degree = 1
dynamicMods1 = cutreeDynamic(dendro = lap_tree1, distM = g1.dist, deepSplit = 2,
pamRespectsDendro = FALSE, minClusterSize = minModuleSize)
init_mod_sizes <- table(dynamicMods)
init_mod_sizes <- table(dynamicMods1)
# you will have to work through finding and installing labels2colors.
dynamicColors = labels2colors(dynamicMods1)
table(dynamicColors)
plotDendroAndColors(lap_tree1, dynamicColors, "Dynamic Tree Cut", dendroLabels = FALSE,
hang = 0.03, addGuide = TRUE, guideHang = 0.05,
main = paste(g1.netname, "dendogram and dynamic cut modules, degree = 1"))
# degree = 1
dynamicMods2 = cutreeDynamic(dendro = lap_tree2, distM = g2.dist, deepSplit = 2,
pamRespectsDendro = FALSE, minClusterSize = minModuleSize)
init_mod_sizes <- table(dynamicMods2)
# you will have to work through finding and installing labels2colors.
dynamicColors = labels2colors(dynamicMods2)
table(dynamicColors)
plotDendroAndColors(lap_tree2, dynamicColors, "Dynamic Tree Cut", dendroLabels = FALSE,
hang = 0.03, addGuide = TRUE, guideHang = 0.05,
main = paste(g2.netname, "dendogram and dynamic cut modules, degree = 1"))
############################### Part 5: UMAP ################################
g1.dist2 <- 1- as.matrix(g1.adj)
g2.dist2 <- 1- as.matrix(g2.adj)
g1.tree2 <- hclust(as.dist(g1.dist2), method = "average")
g2.tree2 <- hclust(as.dist(g2.dist2), method = "average")
g1.tree2.2clust <- cutree(g1.tree2,k=2) # predicted 2-clusters
g2.tree2.2clust <- cutree(g2.tree2,k=2) # predicted 2-clusters
#g1.tree2$labels <- V(g1)[Faction] # true clusters
#g2.tree2$labels <- V(g2)[Faction] # true clusters
g1.tree2$labels <- g1.tree2.2clust
g2.tree2$labels <- g2.tree2.2clust
# if you want to label tree with predicted clusters
plot(g1.tree2, main = paste(g1.netname, " HClust"))
plot(g2.tree2, main = paste(g2.netname, " HClust"))
plot(g2.tree2, main = paste(g2.netname, " HClust"))
g <- g1
A <- get.adjacency(g) # adj
m <- ecount(g)
n <- vcount(g)
if (is.null(weights)){
weights <- rep(1,n)
}
# Obtain the modularity matrix
B.node.i <- function(i){degree(g)[i]*degree(g)}
B.node.all <- sapply(1:n, B.node.i)
B <- A - (B.node.all/(2*m))
# NOTE: This is identical to: modularity_matrix(g) ! Can verify with:
# modularity_matrix(g) == B
B.eigs <- eigen(B)
max.lam <- B.eigs$values[1]
s <- ifelse(B.eigs$vectors[,1]>0,1,-1)
weights <- B.eigs$vectors[n]/B.eigs$vectors[,1]
# Plotting
#V(g)$color <- ifelse(B[1,]>0,"green","yellow")
V(g)$color <- ifelse(B.eigs$vectors[,1]>0,"green","yellow")
V(g)$size <- 10
plot(g, main=paste(g.netname, " Newman Modularity"))
clust1 = list()
clust2 = list()
clusters = list()
# Make list of clusters
for(i in 1:n){
ifelse(V(g)[i]$color=="green",
clust1 <- append(clust1, V(g)[i]$name),
clust2 <- append(clust2, V(g)[i]$name))}
clusters <- list(clust1, clust2)
Q.node.i <- function(i){sum(
(((B.eigs$vectors[i])*weights[i]*s)^2)*B.eigs[i]$values)}
Q <- (1/(4*m))*sapply(1:n, Q.node.i)
plot(g, main=paste(g.netname, " Newman Modularity"))
g
g.netname
g.netname <- "Karate"
plot(g, main=paste(g.netname, " Newman Modularity"))
g <- g2
A <- get.adjacency(g) # adj
m <- ecount(g)
n <- vcount(g)
if (is.null(weights)){
weights <- rep(1,n)
}
# Obtain the modularity matrix
B.node.i <- function(i){degree(g)[i]*degree(g)}
B.node.all <- sapply(1:n, B.node.i)
B <- A - (B.node.all/(2*m))
# NOTE: This is identical to: modularity_matrix(g) ! Can verify with:
# modularity_matrix(g) == B
B.eigs <- eigen(B)
max.lam <- B.eigs$values[1]
s <- ifelse(B.eigs$vectors[,1]>0,1,-1)
weights <- B.eigs$vectors[n]/B.eigs$vectors[,1]
# Plotting
#V(g)$color <- ifelse(B[1,]>0,"green","yellow")
V(g)$color <- ifelse(B.eigs$vectors[,1]>0,"green","yellow")
V(g)$size <- 10
plot(g, main=paste(g.netname, " Newman Modularity"))
clust1 = list()
clust2 = list()
clusters = list()
# Make list of clusters
for(i in 1:n){
ifelse(V(g)[i]$color=="green",
clust1 <- append(clust1, V(g)[i]$name),
clust2 <- append(clust2, V(g)[i]$name))}
clusters <- list(clust1, clust2)
Q.node.i <- function(i){sum(
(((B.eigs$vectors[i])*weights[i]*s)^2)*B.eigs[i]$values)}
Q <- (1/(4*m))*sapply(1:n, Q.node.i)
plot(g, vertex.label=NA, main=paste(g.netname, " Newman Modularity"))
plot(g, vertex.label=NA, main=paste(g.netname, " Newman Modularity"))
g.netname <- Yeast
g.netname <- "Yeast"
g <- g2
A <- get.adjacency(g) # adj
m <- ecount(g)
n <- vcount(g)
if (is.null(weights)){
weights <- rep(1,n)
}
# Obtain the modularity matrix
B.node.i <- function(i){degree(g)[i]*degree(g)}
B.node.all <- sapply(1:n, B.node.i)
B <- A - (B.node.all/(2*m))
# NOTE: This is identical to: modularity_matrix(g) ! Can verify with:
# modularity_matrix(g) == B
B.eigs <- eigen(B)
max.lam <- B.eigs$values[1]
s <- ifelse(B.eigs$vectors[,1]>0,1,-1)
weights <- B.eigs$vectors[n]/B.eigs$vectors[,1]
# Plotting
#V(g)$color <- ifelse(B[1,]>0,"green","yellow")
V(g)$color <- ifelse(B.eigs$vectors[,1]>0,"green","yellow")
V(g)$size <- 10
plot(g, vertex.label=NA, main=paste(g.netname, " Newman Modularity"))
g1
g1[3]
normalizedMI(x1_clusters,x2_clusters)
gc()
source("mutual_information.R")
normalizedMI(x1_clusters,x2_clusters)
x1_clusters
##################### Part 1: Laplace Spectral Clustering #####################
g1.adj <- get.adjacency(g1) # get adjacency
g1.deg <- rowSums(as.matrix(g1.adj)) # get degrees
g1.Lap <- diag(g1.deg) - g1.adj # L = D-A
n1 <- length(V(g1)) # number of nodes
# get eigvals and vecs
x1 <- eigen(g1.Lap)$vectors[,n1-1]
x1_val <- eigen(g1.Lap)$values[n1-1]
names(x1) <- names(V(g1))
x1
x1_clusters <- ifelse(x1>0,1,-1)
normalizedMI(x1_clusters,x1_clusters)
x1_clusters
normalizedMI(x1_clusters)
vcount(g2)
cat(paste('Number of detected communities =',length(g2.membership.ids)))
cat("community sizes: ")
sapply(membership.ids,function(x) {sum(x==g1.modularity$membership)})
sapply(membership.ids,function(x) {sum(x==g2.modularity$membership)})
# Using igraph
g2.modularity <- fastgreedy.community(g2,merges=TRUE, modularity=TRUE, membership=TRUE)
#memberships <-community.to.membership(karate, karate.modularity$merges,
# steps=which.max(fgreedy$modularity)-1)
g2.modularity$membership
g2.modularity$merges
g2.membership.ids <- unique(g2.modularity$membership)
g2.membership.ids
cat(paste('Number of detected communities =',length(g2.membership.ids)))
cat("community sizes: ")
sapply(membership.ids,function(x) {sum(x==g2.modularity$membership)})
cat("modularity: ")
max(g2.modularity$modularity)