129 lines
4.0 KiB
R
129 lines
4.0 KiB
R
# Homework 3 for the University of Tulsa' s CS-7863 Network Theory Course
|
|
# Network Clustering
|
|
# Professor: Dr. McKinney, Spring 2022
|
|
# Noah Schrick - 1492657
|
|
|
|
# Imports
|
|
library(igraph)
|
|
library(igraphdata)
|
|
library(WGCNA)
|
|
|
|
data(karate)
|
|
data(yeast)
|
|
|
|
g1 <- karate
|
|
g1.netname <- "Karate"
|
|
g2 <- yeast
|
|
g2.netname <- "Yeast"
|
|
|
|
##################### Part 1: Laplace Spectral Clustering #####################
|
|
g1.adj <- get.adjacency(g1) # get adjacency
|
|
g2.adj <- get.adjacency(g2)
|
|
g1.deg <- rowSums(as.matrix(g1.adj)) # get degrees
|
|
g2.deg <- rowSums(as.matrix(g2.adj))
|
|
g1.Lap <- diag(g1.deg) - g1.adj # L = D-A
|
|
g2.Lap <- diag(g2.deg) - g2.adj
|
|
n1 <- length(V(g1)) # number of nodes
|
|
n2 <- length(V(g2))
|
|
|
|
# get eigvals and vecs
|
|
x1 <- eigen(g1.Lap)$vectors[,n1-1]
|
|
x2 <- eigen(g2.Lap)$vectors[,n2-1]
|
|
x1_val <- eigen(g1.Lap)$values[n1-1]
|
|
x2_val <- eigen(g2.Lap)$values[n2-1]
|
|
names(x1) <- names(V(g1))
|
|
names(x2) <- names(V(g2))
|
|
x1
|
|
x2
|
|
x1_clusters <- ifelse(x1>0,1,-1)
|
|
x2_clusters <- ifelse(x2>0,1,-1)
|
|
|
|
# Plotting
|
|
V(g1)$color <- ifelse(x1_clusters>0,"green","yellow")
|
|
#V(g1)$size <- 80*abs(x1)
|
|
V(g2)$color <- ifelse(x2_clusters>0,"green","yellow")
|
|
V(g2)$size <- 10
|
|
plot(g1, main=paste(g1.netname, " Laplace Spectral Clustering"))
|
|
plot(g2, main=paste(g2.netname, " Laplace Spectral Clustering"),
|
|
vertex.label=NA)
|
|
|
|
|
|
########################## Part 2: Newman Modularity ##########################
|
|
|
|
|
|
##################### Part 3: Recursive Newman Modularity #####################
|
|
# Using igraph
|
|
karate.modularity <- fastgreedy.community(karate,merges=TRUE, modularity=TRUE, membership=TRUE)
|
|
#memberships <-community.to.membership(karate, karate.modularity$merges,
|
|
# steps=which.max(fgreedy$modularity)-1)
|
|
karate.modularity$membership
|
|
karate.modularity$merges
|
|
membership.ids <- unique(karate.modularity$membership)
|
|
membership.ids
|
|
cat(paste('Number of detected communities =',length(membership.ids)))
|
|
cat("community sizes: ")
|
|
sapply(membership.ids,function(x) {sum(x==karate.modularity$membership)})
|
|
cat("modularity: ")
|
|
max(karate.modularity$modularity)
|
|
#karate.modularity$modularity
|
|
|
|
V(karate)$color[karate.modularity$membership==1] <- "green"
|
|
V(karate)$color[karate.modularity$membership==2] <- "red"
|
|
V(karate)$color[karate.modularity$membership==3] <- "blue"
|
|
|
|
plot(karate,vertex.size=10,
|
|
vertex.label=V(karate)$label,vertex.color=V(karate)$color,
|
|
main=paste("Karate Recursive Newman Modularity"))
|
|
|
|
|
|
###################### Part 4: TOM and Dynamic Tree Cut ######################
|
|
# Next smallest eig
|
|
y1 <- eigen(g1.Lap)$vectors[,n1-2] # get n-2 column
|
|
names(y1) <- names(V(g1))
|
|
y1_val <- eigen(g1.Lap)$values[n1-2]
|
|
y2 <- eigen(g2.Lap)$vectors[,n2-2] # get n-2 column
|
|
names(y2) <- names(V(g2))
|
|
y2_val <- eigen(g2.Lap)$values[n2-2]
|
|
|
|
# Distance in the 2D Laplacian Eig-Space
|
|
lap_dist1 <- dist(cbind(x1,y1))
|
|
lap_dist2 <- dist(cbind(x2,y2))
|
|
|
|
# Use hierachical clustering of distance matrix
|
|
lap_tree1 <- hclust(lap_dist1)
|
|
lap_tree2 <- hclust(lap_dist2)
|
|
|
|
# Use WGCNA to dynamically decide how many clusters
|
|
cutreeDynamicTree(lap_tree1)
|
|
cutreeDynamicTree(lap_tree2)
|
|
|
|
# TOM
|
|
g1.TOM <- TOMsimilarity(as.matrix(g1.adj))
|
|
g2.TOM <- TOMsimilarity(as.matrix(g2.adj))
|
|
g1.TOM
|
|
g2.TOM
|
|
TOMplot(g1.TOM, lap_tree1, main=paste(g1.netname, " Heatmap Plot"))
|
|
TOMplot(g2.TOM, lap_tree2, main=paste(g2.netname, " Heatmap Plot"))
|
|
|
|
################################ Part 5: UMAP ################################
|
|
g1.dist2 <- 1- as.matrix(g1.adj)
|
|
g2.dist2 <- 1- as.matrix(g2.adj)
|
|
|
|
# first cluster with hierarchical clustering
|
|
|
|
g1.tree2 <- hclust(as.dist(g1.dist2), method = "average")
|
|
g2.tree2 <- hclust(as.dist(g2.dist2), method = "average")
|
|
|
|
g1.tree2.2clust <- cutree(g1.tree2,k=2) # predicted 2-clusters
|
|
g2.tree2.2clust <- cutree(g2.tree2,k=2) # predicted 2-clusters
|
|
|
|
#g1.tree2$labels <- V(g1)[Faction] # true clusters
|
|
#g2.tree2$labels <- V(g2)[Faction] # true clusters
|
|
g1.tree2$labels <- g1.tree2.2clust
|
|
g2.tree2$labels <- g2.tree2.2clust
|
|
|
|
# if you want to label tree with predicted clusters
|
|
plot(g1.tree2, main = paste(g1.netname, " HClust"))
|
|
plot(g2.tree2, main = paste(g2.netname, " HClust"))
|
|
|