191 lines
6.5 KiB
R
191 lines
6.5 KiB
R
# Homework 3 for the University of Tulsa' s CS-7863 Network Theory Course
|
|
# Network Clustering
|
|
# Professor: Dr. McKinney, Spring 2022
|
|
# Noah Schrick - 1492657
|
|
|
|
# Imports
|
|
library(igraph)
|
|
library(igraphdata)
|
|
library(WGCNA)
|
|
# Set working directory to this source file location (Requires RStudio)
|
|
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
|
|
source("self_newman_mod.R")
|
|
source("mutual_information.R")
|
|
|
|
data(karate)
|
|
data(yeast)
|
|
|
|
g1 <- karate
|
|
g1.netname <- "Karate"
|
|
g2 <- yeast
|
|
g2 <- simplify(g2)
|
|
g2.netname <- "Yeast"
|
|
|
|
|
|
##################### Part 1: Laplace Spectral Clustering #####################
|
|
g1.adj <- get.adjacency(g1) # get adjacency
|
|
g2.adj <- get.adjacency(g2)
|
|
g1.deg <- rowSums(as.matrix(g1.adj)) # get degrees
|
|
g2.deg <- rowSums(as.matrix(g2.adj))
|
|
g1.Lap <- diag(g1.deg) - g1.adj # L = D-A
|
|
g2.Lap <- diag(g2.deg) - g2.adj
|
|
n1 <- length(V(g1)) # number of nodes
|
|
n2 <- length(V(g2))
|
|
|
|
# get eigvals and vecs
|
|
x1 <- eigen(g1.Lap)$vectors[,n1-1]
|
|
x2 <- eigen(g2.Lap)$vectors[,n2-1]
|
|
x1_val <- eigen(g1.Lap)$values[n1-1]
|
|
x2_val <- eigen(g2.Lap)$values[n2-1]
|
|
names(x1) <- names(V(g1))
|
|
names(x2) <- names(V(g2))
|
|
x1
|
|
x2
|
|
x1_clusters <- ifelse(x1>0,1,-1)
|
|
x2_clusters <- ifelse(x2>0,1,-1)
|
|
#normalizedMI(x1_clusters)
|
|
|
|
# Plotting
|
|
V(g1)$color <- ifelse(x1_clusters>0,"green","yellow")
|
|
#V(g1)$size <- 80*abs(x1)
|
|
V(g2)$color <- ifelse(x2_clusters>0,"green","yellow")
|
|
V(g2)$size <- 10
|
|
plot(g1, main=paste(g1.netname, " Laplace Spectral Clustering"))
|
|
plot(g2, main=paste(g2.netname, " Laplace Spectral Clustering"),
|
|
vertex.label=NA)
|
|
|
|
|
|
########################## Part 2: Newman Modularity ##########################
|
|
list(Q=Q,max.lam=max.lam,weights=weights,clusters=clusters) <- newman_mod(g1)
|
|
list(Q=Q,max.lam=max.lam,weights=weights,clusters=clusters) <- newman_mod(g2)
|
|
|
|
##################### Part 3: Recursive Newman Modularity #####################
|
|
# Using igraph
|
|
g1.modularity <- fastgreedy.community(g1,merges=TRUE, modularity=TRUE, membership=TRUE)
|
|
#memberships <-community.to.membership(karate, karate.modularity$merges,
|
|
# steps=which.max(fgreedy$modularity)-1)
|
|
g1.modularity$membership
|
|
g1.modularity$merges
|
|
g1.membership.ids <- unique(g1.modularity$membership)
|
|
g1.membership.ids
|
|
cat(paste('Number of detected communities =',length(g1.membership.ids)))
|
|
cat("community sizes: ")
|
|
sapply(membership.ids,function(x) {sum(x==g1.modularity$membership)})
|
|
cat("modularity: ")
|
|
max(g1.modularity$modularity)
|
|
#karate.modularity$modularity
|
|
|
|
V(g1)$color[karate.modularity$membership==1] <- "green"
|
|
V(g1)$color[karate.modularity$membership==2] <- "red"
|
|
V(g1)$color[karate.modularity$membership==3] <- "blue"
|
|
|
|
plot(g1,vertex.size=10,
|
|
vertex.label=V(karate)$label,vertex.color=V(g1)$color,
|
|
main=paste(g1.netname, " Recursive Newman Modularity"))
|
|
|
|
# Using igraph
|
|
g2.modularity <- fastgreedy.community(g2,merges=TRUE, modularity=TRUE, membership=TRUE)
|
|
#memberships <-community.to.membership(karate, karate.modularity$merges,
|
|
# steps=which.max(fgreedy$modularity)-1)
|
|
g2.modularity$membership
|
|
g2.modularity$merges
|
|
g2.membership.ids <- unique(g2.modularity$membership)
|
|
g2.membership.ids
|
|
cat(paste('Number of detected communities =',length(g2.membership.ids)))
|
|
cat("community sizes: ")
|
|
sapply(membership.ids,function(x) {sum(x==g2.modularity$membership)})
|
|
cat("modularity: ")
|
|
max(g2.modularity$modularity)
|
|
#karate.modularity$modularity
|
|
|
|
V(g2)$color=g2.modularity$membership
|
|
|
|
|
|
plot(g2,vertex.size=10,
|
|
vertex.label=NA,vertex.color=V(g2)$color,
|
|
main=paste(g2.netname, " Recursive Newman Modularity"))
|
|
|
|
###################### Part 4: TOM and Dynamic Tree Cut ######################
|
|
# Next smallest eig
|
|
y1 <- eigen(g1.Lap)$vectors[,n1-2] # get n-2 column
|
|
names(y1) <- names(V(g1))
|
|
y1_val <- eigen(g1.Lap)$values[n1-2]
|
|
y2 <- eigen(g2.Lap)$vectors[,n2-2] # get n-2 column
|
|
names(y2) <- names(V(g2))
|
|
y2_val <- eigen(g2.Lap)$values[n2-2]
|
|
|
|
# Distance in the 2D Laplacian Eig-Space
|
|
lap_dist1 <- dist(cbind(x1,y1))
|
|
lap_dist2 <- dist(cbind(x2,y2))
|
|
|
|
# Matrix dists from TOM
|
|
g1.mat <- as.matrix(get.adjacency(g1))
|
|
g2.mat <- as.matrix(get.adjacency(g2))
|
|
g1.dist <- GTOMdist(g1.mat, degree = 1)
|
|
g2.dist <- GTOMdist(g2.mat, degree = 1)
|
|
|
|
# Use hierarchical clustering of distance matrix
|
|
lap_tree1 <- hclust(lap_dist1)
|
|
lap_tree2 <- hclust(lap_dist2)
|
|
|
|
# Use WGCNA to dynamically decide how many clusters
|
|
cutreeDynamicTree(lap_tree1)
|
|
cutreeDynamicTree(lap_tree2)
|
|
|
|
# TOM
|
|
g1.TOM <- TOMsimilarity(as.matrix(g1.adj))
|
|
g2.TOM <- TOMsimilarity(as.matrix(g2.adj))
|
|
g1.TOM
|
|
g2.TOM
|
|
TOMplot(g1.TOM, lap_tree1, main=paste(g1.netname, " Heatmap Plot"))
|
|
TOMplot(g2.TOM, lap_tree2, main=paste(g2.netname, " Heatmap Plot"))
|
|
|
|
# dynamic tree cut
|
|
minModuleSize = 2
|
|
|
|
# degree = 1
|
|
dynamicMods1 = cutreeDynamic(dendro = lap_tree1, distM = g1.dist, deepSplit = 2,
|
|
pamRespectsDendro = FALSE, minClusterSize = minModuleSize)
|
|
init_mod_sizes <- table(dynamicMods1)
|
|
|
|
# you will have to work through finding and installing labels2colors.
|
|
dynamicColors = labels2colors(dynamicMods1)
|
|
table(dynamicColors)
|
|
plotDendroAndColors(lap_tree1, dynamicColors, "Dynamic Tree Cut", dendroLabels = FALSE,
|
|
hang = 0.03, addGuide = TRUE, guideHang = 0.05,
|
|
main = paste(g1.netname, "dendogram and dynamic cut modules, degree = 1"))
|
|
|
|
# degree = 1
|
|
dynamicMods2 = cutreeDynamic(dendro = lap_tree2, distM = g2.dist, deepSplit = 2,
|
|
pamRespectsDendro = FALSE, minClusterSize = minModuleSize)
|
|
init_mod_sizes <- table(dynamicMods2)
|
|
|
|
# you will have to work through finding and installing labels2colors.
|
|
dynamicColors = labels2colors(dynamicMods2)
|
|
table(dynamicColors)
|
|
plotDendroAndColors(lap_tree2, dynamicColors, "Dynamic Tree Cut", dendroLabels = FALSE,
|
|
hang = 0.03, addGuide = TRUE, guideHang = 0.05,
|
|
main = paste(g2.netname, "dendogram and dynamic cut modules, degree = 1"))
|
|
|
|
############################### Part 5: UMAP ################################
|
|
g1.dist2 <- 1- as.matrix(g1.adj)
|
|
g2.dist2 <- 1- as.matrix(g2.adj)
|
|
|
|
# first cluster with hierarchical clustering
|
|
|
|
g1.tree2 <- hclust(as.dist(g1.dist2), method = "average")
|
|
g2.tree2 <- hclust(as.dist(g2.dist2), method = "average")
|
|
|
|
g1.tree2.2clust <- cutree(g1.tree2,k=2) # predicted 2-clusters
|
|
g2.tree2.2clust <- cutree(g2.tree2,k=2) # predicted 2-clusters
|
|
|
|
#g1.tree2$labels <- V(g1)[Faction] # true clusters
|
|
#g2.tree2$labels <- V(g2)[Faction] # true clusters
|
|
g1.tree2$labels <- g1.tree2.2clust
|
|
g2.tree2$labels <- g2.tree2.2clust
|
|
|
|
# if you want to label tree with predicted clusters
|
|
plot(g1.tree2, main = paste(g1.netname, " HClust"))
|
|
plot(g2.tree2, main = paste(g2.netname, " HClust"))
|
|
|