Final edits and report
This commit is contained in:
parent
d9c497c154
commit
777cbde4fc
424
.Rhistory
Normal file
424
.Rhistory
Normal file
@ -0,0 +1,424 @@
|
||||
q()
|
||||
source("self_newman_mod.R")
|
||||
getwd()_
|
||||
getwd()
|
||||
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
|
||||
setwd(getSrcDirectory()[1])
|
||||
this.dir <- dirname(parent.frame(2)$ofile)
|
||||
setwd(getSrcDirectory()[1])
|
||||
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
|
||||
source("self_newman_mod.R")
|
||||
# Homework 3 for the University of Tulsa' s CS-7863 Network Theory Course
|
||||
# Network Clustering
|
||||
# Professor: Dr. McKinney, Spring 2022
|
||||
# Noah Schrick - 1492657
|
||||
# Imports
|
||||
library(igraph)
|
||||
library(igraphdata)
|
||||
library(WGCNA)
|
||||
# Set working directory to this source file location (Requires RStudio)
|
||||
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
|
||||
source("self_newman_mod.R")
|
||||
data(karate)
|
||||
data(yeast)
|
||||
g1 <- karate
|
||||
g1.netname <- "Karate"
|
||||
g2 <- yeast
|
||||
g2.netname <- "Yeast"
|
||||
##################### Part 1: Laplace Spectral Clustering #####################
|
||||
g1.adj <- get.adjacency(g1) # get adjacency
|
||||
g2.adj <- get.adjacency(g2)
|
||||
g1.deg <- rowSums(as.matrix(g1.adj)) # get degrees
|
||||
g2.deg <- rowSums(as.matrix(g2.adj))
|
||||
g1.Lap <- diag(g1.deg) - g1.adj # L = D-A
|
||||
g2.Lap <- diag(g2.deg) - g2.adj
|
||||
n1 <- length(V(g1)) # number of nodes
|
||||
n2 <- length(V(g2))
|
||||
# get eigvals and vecs
|
||||
x1 <- eigen(g1.Lap)$vectors[,n1-1]
|
||||
x2 <- eigen(g2.Lap)$vectors[,n2-1]
|
||||
x1_val <- eigen(g1.Lap)$values[n1-1]
|
||||
x2_val <- eigen(g2.Lap)$values[n2-1]
|
||||
names(x1) <- names(V(g1))
|
||||
names(x2) <- names(V(g2))
|
||||
x1
|
||||
x2
|
||||
x1_clusters <- ifelse(x1>0,1,-1)
|
||||
x2_clusters <- ifelse(x2>0,1,-1)
|
||||
# Plotting
|
||||
V(g1)$color <- ifelse(x1_clusters>0,"green","yellow")
|
||||
#V(g1)$size <- 80*abs(x1)
|
||||
V(g2)$color <- ifelse(x2_clusters>0,"green","yellow")
|
||||
V(g2)$size <- 10
|
||||
plot(g1, main=paste(g1.netname, " Laplace Spectral Clustering"))
|
||||
plot(g2, main=paste(g2.netname, " Laplace Spectral Clustering"),
|
||||
vertex.label=NA)
|
||||
########################## Part 2: Newman Modularity ##########################
|
||||
list(Q=Q,max.lam=max.lam,weights=weights,clusters=clusters) <- newman_mod(g1)
|
||||
list(Q=Q,max.lam=max.lam,weights=weights,clusters=clusters) <- newman_mod(g2)
|
||||
##################### Part 3: Recursive Newman Modularity #####################
|
||||
# Using igraph
|
||||
karate.modularity <- fastgreedy.community(karate,merges=TRUE, modularity=TRUE, membership=TRUE)
|
||||
#memberships <-community.to.membership(karate, karate.modularity$merges,
|
||||
# steps=which.max(fgreedy$modularity)-1)
|
||||
karate.modularity$membership
|
||||
karate.modularity$merges
|
||||
membership.ids <- unique(karate.modularity$membership)
|
||||
membership.ids
|
||||
cat(paste('Number of detected communities =',length(membership.ids)))
|
||||
cat("community sizes: ")
|
||||
sapply(membership.ids,function(x) {sum(x==karate.modularity$membership)})
|
||||
cat("modularity: ")
|
||||
max(karate.modularity$modularity)
|
||||
#karate.modularity$modularity
|
||||
V(karate)$color[karate.modularity$membership==1] <- "green"
|
||||
V(karate)$color[karate.modularity$membership==2] <- "red"
|
||||
V(karate)$color[karate.modularity$membership==3] <- "blue"
|
||||
plot(karate,vertex.size=10,
|
||||
vertex.label=V(karate)$label,vertex.color=V(karate)$color,
|
||||
main=paste("Karate Recursive Newman Modularity"))
|
||||
###################### Part 4: TOM and Dynamic Tree Cut ######################
|
||||
# Next smallest eig
|
||||
y1 <- eigen(g1.Lap)$vectors[,n1-2] # get n-2 column
|
||||
names(y1) <- names(V(g1))
|
||||
y1_val <- eigen(g1.Lap)$values[n1-2]
|
||||
y2 <- eigen(g2.Lap)$vectors[,n2-2] # get n-2 column
|
||||
names(y2) <- names(V(g2))
|
||||
y2_val <- eigen(g2.Lap)$values[n2-2]
|
||||
# Distance in the 2D Laplacian Eig-Space
|
||||
lap_dist1 <- dist(cbind(x1,y1))
|
||||
lap_dist2 <- dist(cbind(x2,y2))
|
||||
# Use hierarchical clustering of distance matrix
|
||||
lap_tree1 <- hclust(lap_dist1)
|
||||
lap_tree2 <- hclust(lap_dist2)
|
||||
# Use WGCNA to dynamically decide how many clusters
|
||||
cutreeDynamicTree(lap_tree1)
|
||||
cutreeDynamicTree(lap_tree2)
|
||||
# TOM
|
||||
g1.TOM <- TOMsimilarity(as.matrix(g1.adj))
|
||||
g2.TOM <- TOMsimilarity(as.matrix(g2.adj))
|
||||
g1.TOM
|
||||
g2.TOM
|
||||
TOMplot(g1.TOM, lap_tree1, main=paste(g1.netname, " Heatmap Plot"))
|
||||
TOMplot(g2.TOM, lap_tree2, main=paste(g2.netname, " Heatmap Plot"))
|
||||
# dynamic tree cut
|
||||
minModuleSize = 2
|
||||
# degree = 1
|
||||
dynamicMods1 = cutreeDynamic(dendro = lap_tree1, distM = lap_dist1, deepSplit = 2,
|
||||
pamRespectsDendro = FALSE, minClusterSize = minModuleSize)
|
||||
init_mod_sizes <- table(dynamicMods)
|
||||
# you will have to work through finding and installing labels2colors.
|
||||
dynamicColors = labels2colors(dynamicMods1)
|
||||
table(dynamicColors)
|
||||
plotDendroAndColors(lap_tree1, dynamicColors, "Dynamic Tree Cut", dendroLabels = FALSE,
|
||||
hang = 0.03, addGuide = TRUE, guideHang = 0.05,
|
||||
main = paste(g1.netname, "dendogram and dynamic cut modules, degree = 1"))
|
||||
# degree = 1
|
||||
dynamicMods2 = cutreeDynamic(dendro = lap_tree2, distM = lap_dist2, deepSplit = 2,
|
||||
pamRespectsDendro = FALSE, minClusterSize = minModuleSize)
|
||||
init_mod_sizes <- table(dynamicMods)
|
||||
# you will have to work through finding and installing labels2colors.
|
||||
dynamicColors = labels2colors(dynamicMods2)
|
||||
table(dynamicColors)
|
||||
plotDendroAndColors(lap_tree2, dynamicColors, "Dynamic Tree Cut", dendroLabels = FALSE,
|
||||
hang = 0.03, addGuide = TRUE, guideHang = 0.05,
|
||||
main = paste(g2.netname, "dendogram and dynamic cut modules, degree = 1"))
|
||||
################################ Part 5: UMAP ################################
|
||||
g1.dist2 <- 1- as.matrix(g1.adj)
|
||||
g2.dist2 <- 1- as.matrix(g2.adj)
|
||||
# first cluster with hierarchical clustering
|
||||
g1.tree2 <- hclust(as.dist(g1.dist2), method = "average")
|
||||
g2.tree2 <- hclust(as.dist(g2.dist2), method = "average")
|
||||
g1.tree2.2clust <- cutree(g1.tree2,k=2) # predicted 2-clusters
|
||||
g2.tree2.2clust <- cutree(g2.tree2,k=2) # predicted 2-clusters
|
||||
#g1.tree2$labels <- V(g1)[Faction] # true clusters
|
||||
#g2.tree2$labels <- V(g2)[Faction] # true clusters
|
||||
g1.tree2$labels <- g1.tree2.2clust
|
||||
g2.tree2$labels <- g2.tree2.2clust
|
||||
# if you want to label tree with predicted clusters
|
||||
plot(g1.tree2, main = paste(g1.netname, " HClust"))
|
||||
plot(g2.tree2, main = paste(g2.netname, " HClust"))
|
||||
# Using igraph
|
||||
g2.modularity <- fastgreedy.community(g2,merges=TRUE, modularity=TRUE, membership=TRUE)
|
||||
#memberships <-community.to.membership(karate, karate.modularity$merges,
|
||||
# steps=which.max(fgreedy$modularity)-1)
|
||||
g2.modularity$membership
|
||||
g2.modularity$merges
|
||||
g2.membership.ids <- unique(g1.modularity$membership)
|
||||
g2.membership.ids
|
||||
cat(paste('Number of detected communities =',length(g2.membership.ids)))
|
||||
cat("community sizes: ")
|
||||
sapply(membership.ids,function(x) {sum(x==g1.modularity$membership)})
|
||||
cat("modularity: ")
|
||||
max(g2.modularity$modularity)
|
||||
#karate.modularity$modularity
|
||||
# Using igraph
|
||||
g2.modularity <- fastgreedy.community(g2,merges=TRUE, modularity=TRUE, membership=TRUE)
|
||||
#memberships <-community.to.membership(karate, karate.modularity$merges,
|
||||
# steps=which.max(fgreedy$modularity)-1)
|
||||
g2.modularity$membership
|
||||
g2.modularity$merges
|
||||
g2.membership.ids <- unique(g2.modularity$membership)
|
||||
g2.membership.ids
|
||||
cat(paste('Number of detected communities =',length(g2.membership.ids)))
|
||||
cat("community sizes: ")
|
||||
sapply(membership.ids,function(x) {sum(x==g1.modularity$membership)})
|
||||
cat("modularity: ")
|
||||
max(g2.modularity$modularity)
|
||||
#karate.modularity$modularity
|
||||
# Using igraph
|
||||
g2.modularity <- fastgreedy.community(g2,merges=TRUE, modularity=TRUE, membership=TRUE)
|
||||
#memberships <-community.to.membership(karate, karate.modularity$merges,
|
||||
# steps=which.max(fgreedy$modularity)-1)
|
||||
g2.modularity$membership
|
||||
g2.modularity$merges
|
||||
g2.membership.ids <- unique(g2.modularity$membership)
|
||||
cat(paste('Number of detected communities =',length(g2.membership.ids)))
|
||||
V(g2)$color=g2.modularity$membership
|
||||
plot(g2,vertex.size=10,
|
||||
vertex.label=V(g2)$label,vertex.color=V(g2)$color,
|
||||
main=paste(g2.netname, " Recursive Newman Modularity"))
|
||||
plot(g2,vertex.size=10,
|
||||
vertex.label=V(g2)$label,vertex.color=V(g2)$color,
|
||||
main=paste(g2.netname, " Recursive Newman Modularity"))
|
||||
tmp <- simplify(g2)
|
||||
g2.modularity <- fastgreedy.community(g2,merges=TRUE, modularity=TRUE, membership=TRUE)
|
||||
#memberships <-community.to.membership(karate, karate.modularity$merges,
|
||||
# steps=which.max(fgreedy$modularity)-1)
|
||||
g2.modularity$membership
|
||||
g2.modularity$merges
|
||||
g2.membership.ids <- unique(g2.modularity$membership)
|
||||
g2.membership.ids
|
||||
cat(paste('Number of detected communities =',length(g2.membership.ids)))
|
||||
cat("community sizes: ")
|
||||
sapply(membership.ids,function(x) {sum(x==g1.modularity$membership)})
|
||||
cat("modularity: ")
|
||||
max(g2.modularity$modularity)
|
||||
#karate.modularity$modularity
|
||||
V(g2)$color=g2.modularity$membership
|
||||
plot(g2,vertex.size=10,
|
||||
vertex.label=V(g2)$label,vertex.color=V(g2)$color,
|
||||
main=paste(g2.netname, " Recursive Newman Modularity"))
|
||||
plot(g2,vertex.size=10,
|
||||
vertex.label=NA,vertex.color=V(g2)$color,
|
||||
main=paste(g2.netname, " Recursive Newman Modularity"))
|
||||
plot(g2,vertex.size=10,
|
||||
vertex.label=NA,vertex.color=V(g2)$color,
|
||||
main=paste(g2.netname, " Recursive Newman Modularity"))
|
||||
g2 <- simplify(g2)
|
||||
# Using igraph
|
||||
g2.modularity <- fastgreedy.community(g2,merges=TRUE, modularity=TRUE, membership=TRUE)
|
||||
#memberships <-community.to.membership(karate, karate.modularity$merges,
|
||||
# steps=which.max(fgreedy$modularity)-1)
|
||||
g2.modularity$membership
|
||||
g2.modularity$merges
|
||||
g2.membership.ids <- unique(g2.modularity$membership)
|
||||
g2.membership.ids
|
||||
cat(paste('Number of detected communities =',length(g2.membership.ids)))
|
||||
cat("community sizes: ")
|
||||
sapply(membership.ids,function(x) {sum(x==g1.modularity$membership)})
|
||||
cat("modularity: ")
|
||||
max(g2.modularity$modularity)
|
||||
#karate.modularity$modularity
|
||||
V(g2)$color=g2.modularity$membership
|
||||
plot(g2,vertex.size=10,
|
||||
vertex.label=NA,vertex.color=V(g2)$color,
|
||||
main=paste(g2.netname, " Recursive Newman Modularity"))
|
||||
# dynamic tree cut
|
||||
minModuleSize = 2
|
||||
# degree = 1
|
||||
dynamicMods1 = cutreeDynamic(dendro = lap_tree1, distM = lap_dist1, deepSplit = 2,
|
||||
pamRespectsDendro = FALSE, minClusterSize = minModuleSize)
|
||||
init_mod_sizes <- table(dynamicMods)
|
||||
# you will have to work through finding and installing labels2colors.
|
||||
dynamicColors = labels2colors(dynamicMods1)
|
||||
table(dynamicColors)
|
||||
plotDendroAndColors(lap_tree1, dynamicColors, "Dynamic Tree Cut", dendroLabels = FALSE,
|
||||
hang = 0.03, addGuide = TRUE, guideHang = 0.05,
|
||||
main = paste(g1.netname, "dendogram and dynamic cut modules, degree = 1"))
|
||||
install.packages("labels2colors")
|
||||
# dynamic tree cut
|
||||
minModuleSize = 2
|
||||
# degree = 1
|
||||
dynamicMods1 = cutreeDynamic(dendro = lap_tree1, distM = lap_dist1, deepSplit = 2,
|
||||
pamRespectsDendro = FALSE, minClusterSize = minModuleSize)
|
||||
g1.mat <- as.matrix(get.adjacency(g1))
|
||||
g2.mat <- as.matrix(get.adjacency(g2))
|
||||
g1.dist <- GTOMdist(g1.mat, degree = 1)
|
||||
g2.dist <- GTOMdist(g2.mat, degree = 1)
|
||||
# dynamic tree cut
|
||||
minModuleSize = 2
|
||||
# degree = 1
|
||||
dynamicMods1 = cutreeDynamic(dendro = lap_tree1, distM = g1.dist, deepSplit = 2,
|
||||
pamRespectsDendro = FALSE, minClusterSize = minModuleSize)
|
||||
init_mod_sizes <- table(dynamicMods)
|
||||
init_mod_sizes <- table(dynamicMods1)
|
||||
# you will have to work through finding and installing labels2colors.
|
||||
dynamicColors = labels2colors(dynamicMods1)
|
||||
table(dynamicColors)
|
||||
plotDendroAndColors(lap_tree1, dynamicColors, "Dynamic Tree Cut", dendroLabels = FALSE,
|
||||
hang = 0.03, addGuide = TRUE, guideHang = 0.05,
|
||||
main = paste(g1.netname, "dendogram and dynamic cut modules, degree = 1"))
|
||||
# degree = 1
|
||||
dynamicMods2 = cutreeDynamic(dendro = lap_tree2, distM = g2.dist, deepSplit = 2,
|
||||
pamRespectsDendro = FALSE, minClusterSize = minModuleSize)
|
||||
init_mod_sizes <- table(dynamicMods2)
|
||||
# you will have to work through finding and installing labels2colors.
|
||||
dynamicColors = labels2colors(dynamicMods2)
|
||||
table(dynamicColors)
|
||||
plotDendroAndColors(lap_tree2, dynamicColors, "Dynamic Tree Cut", dendroLabels = FALSE,
|
||||
hang = 0.03, addGuide = TRUE, guideHang = 0.05,
|
||||
main = paste(g2.netname, "dendogram and dynamic cut modules, degree = 1"))
|
||||
############################### Part 5: UMAP ################################
|
||||
g1.dist2 <- 1- as.matrix(g1.adj)
|
||||
g2.dist2 <- 1- as.matrix(g2.adj)
|
||||
g1.tree2 <- hclust(as.dist(g1.dist2), method = "average")
|
||||
g2.tree2 <- hclust(as.dist(g2.dist2), method = "average")
|
||||
g1.tree2.2clust <- cutree(g1.tree2,k=2) # predicted 2-clusters
|
||||
g2.tree2.2clust <- cutree(g2.tree2,k=2) # predicted 2-clusters
|
||||
#g1.tree2$labels <- V(g1)[Faction] # true clusters
|
||||
#g2.tree2$labels <- V(g2)[Faction] # true clusters
|
||||
g1.tree2$labels <- g1.tree2.2clust
|
||||
g2.tree2$labels <- g2.tree2.2clust
|
||||
# if you want to label tree with predicted clusters
|
||||
plot(g1.tree2, main = paste(g1.netname, " HClust"))
|
||||
plot(g2.tree2, main = paste(g2.netname, " HClust"))
|
||||
plot(g2.tree2, main = paste(g2.netname, " HClust"))
|
||||
g <- g1
|
||||
A <- get.adjacency(g) # adj
|
||||
m <- ecount(g)
|
||||
n <- vcount(g)
|
||||
if (is.null(weights)){
|
||||
weights <- rep(1,n)
|
||||
}
|
||||
# Obtain the modularity matrix
|
||||
B.node.i <- function(i){degree(g)[i]*degree(g)}
|
||||
B.node.all <- sapply(1:n, B.node.i)
|
||||
B <- A - (B.node.all/(2*m))
|
||||
# NOTE: This is identical to: modularity_matrix(g) ! Can verify with:
|
||||
# modularity_matrix(g) == B
|
||||
B.eigs <- eigen(B)
|
||||
max.lam <- B.eigs$values[1]
|
||||
s <- ifelse(B.eigs$vectors[,1]>0,1,-1)
|
||||
weights <- B.eigs$vectors[n]/B.eigs$vectors[,1]
|
||||
# Plotting
|
||||
#V(g)$color <- ifelse(B[1,]>0,"green","yellow")
|
||||
V(g)$color <- ifelse(B.eigs$vectors[,1]>0,"green","yellow")
|
||||
V(g)$size <- 10
|
||||
plot(g, main=paste(g.netname, " Newman Modularity"))
|
||||
clust1 = list()
|
||||
clust2 = list()
|
||||
clusters = list()
|
||||
# Make list of clusters
|
||||
for(i in 1:n){
|
||||
ifelse(V(g)[i]$color=="green",
|
||||
clust1 <- append(clust1, V(g)[i]$name),
|
||||
clust2 <- append(clust2, V(g)[i]$name))}
|
||||
clusters <- list(clust1, clust2)
|
||||
Q.node.i <- function(i){sum(
|
||||
(((B.eigs$vectors[i])*weights[i]*s)^2)*B.eigs[i]$values)}
|
||||
Q <- (1/(4*m))*sapply(1:n, Q.node.i)
|
||||
plot(g, main=paste(g.netname, " Newman Modularity"))
|
||||
g
|
||||
g.netname
|
||||
g.netname <- "Karate"
|
||||
plot(g, main=paste(g.netname, " Newman Modularity"))
|
||||
g <- g2
|
||||
A <- get.adjacency(g) # adj
|
||||
m <- ecount(g)
|
||||
n <- vcount(g)
|
||||
if (is.null(weights)){
|
||||
weights <- rep(1,n)
|
||||
}
|
||||
# Obtain the modularity matrix
|
||||
B.node.i <- function(i){degree(g)[i]*degree(g)}
|
||||
B.node.all <- sapply(1:n, B.node.i)
|
||||
B <- A - (B.node.all/(2*m))
|
||||
# NOTE: This is identical to: modularity_matrix(g) ! Can verify with:
|
||||
# modularity_matrix(g) == B
|
||||
B.eigs <- eigen(B)
|
||||
max.lam <- B.eigs$values[1]
|
||||
s <- ifelse(B.eigs$vectors[,1]>0,1,-1)
|
||||
weights <- B.eigs$vectors[n]/B.eigs$vectors[,1]
|
||||
# Plotting
|
||||
#V(g)$color <- ifelse(B[1,]>0,"green","yellow")
|
||||
V(g)$color <- ifelse(B.eigs$vectors[,1]>0,"green","yellow")
|
||||
V(g)$size <- 10
|
||||
plot(g, main=paste(g.netname, " Newman Modularity"))
|
||||
clust1 = list()
|
||||
clust2 = list()
|
||||
clusters = list()
|
||||
# Make list of clusters
|
||||
for(i in 1:n){
|
||||
ifelse(V(g)[i]$color=="green",
|
||||
clust1 <- append(clust1, V(g)[i]$name),
|
||||
clust2 <- append(clust2, V(g)[i]$name))}
|
||||
clusters <- list(clust1, clust2)
|
||||
Q.node.i <- function(i){sum(
|
||||
(((B.eigs$vectors[i])*weights[i]*s)^2)*B.eigs[i]$values)}
|
||||
Q <- (1/(4*m))*sapply(1:n, Q.node.i)
|
||||
plot(g, vertex.label=NA, main=paste(g.netname, " Newman Modularity"))
|
||||
plot(g, vertex.label=NA, main=paste(g.netname, " Newman Modularity"))
|
||||
g.netname <- Yeast
|
||||
g.netname <- "Yeast"
|
||||
g <- g2
|
||||
A <- get.adjacency(g) # adj
|
||||
m <- ecount(g)
|
||||
n <- vcount(g)
|
||||
if (is.null(weights)){
|
||||
weights <- rep(1,n)
|
||||
}
|
||||
# Obtain the modularity matrix
|
||||
B.node.i <- function(i){degree(g)[i]*degree(g)}
|
||||
B.node.all <- sapply(1:n, B.node.i)
|
||||
B <- A - (B.node.all/(2*m))
|
||||
# NOTE: This is identical to: modularity_matrix(g) ! Can verify with:
|
||||
# modularity_matrix(g) == B
|
||||
B.eigs <- eigen(B)
|
||||
max.lam <- B.eigs$values[1]
|
||||
s <- ifelse(B.eigs$vectors[,1]>0,1,-1)
|
||||
weights <- B.eigs$vectors[n]/B.eigs$vectors[,1]
|
||||
# Plotting
|
||||
#V(g)$color <- ifelse(B[1,]>0,"green","yellow")
|
||||
V(g)$color <- ifelse(B.eigs$vectors[,1]>0,"green","yellow")
|
||||
V(g)$size <- 10
|
||||
plot(g, vertex.label=NA, main=paste(g.netname, " Newman Modularity"))
|
||||
g1
|
||||
g1[3]
|
||||
normalizedMI(x1_clusters,x2_clusters)
|
||||
gc()
|
||||
source("mutual_information.R")
|
||||
normalizedMI(x1_clusters,x2_clusters)
|
||||
x1_clusters
|
||||
##################### Part 1: Laplace Spectral Clustering #####################
|
||||
g1.adj <- get.adjacency(g1) # get adjacency
|
||||
g1.deg <- rowSums(as.matrix(g1.adj)) # get degrees
|
||||
g1.Lap <- diag(g1.deg) - g1.adj # L = D-A
|
||||
n1 <- length(V(g1)) # number of nodes
|
||||
# get eigvals and vecs
|
||||
x1 <- eigen(g1.Lap)$vectors[,n1-1]
|
||||
x1_val <- eigen(g1.Lap)$values[n1-1]
|
||||
names(x1) <- names(V(g1))
|
||||
x1
|
||||
x1_clusters <- ifelse(x1>0,1,-1)
|
||||
normalizedMI(x1_clusters,x1_clusters)
|
||||
x1_clusters
|
||||
normalizedMI(x1_clusters)
|
||||
vcount(g2)
|
||||
cat(paste('Number of detected communities =',length(g2.membership.ids)))
|
||||
cat("community sizes: ")
|
||||
sapply(membership.ids,function(x) {sum(x==g1.modularity$membership)})
|
||||
sapply(membership.ids,function(x) {sum(x==g2.modularity$membership)})
|
||||
# Using igraph
|
||||
g2.modularity <- fastgreedy.community(g2,merges=TRUE, modularity=TRUE, membership=TRUE)
|
||||
#memberships <-community.to.membership(karate, karate.modularity$merges,
|
||||
# steps=which.max(fgreedy$modularity)-1)
|
||||
g2.modularity$membership
|
||||
g2.modularity$merges
|
||||
g2.membership.ids <- unique(g2.modularity$membership)
|
||||
g2.membership.ids
|
||||
cat(paste('Number of detected communities =',length(g2.membership.ids)))
|
||||
cat("community sizes: ")
|
||||
sapply(membership.ids,function(x) {sum(x==g2.modularity$membership)})
|
||||
cat("modularity: ")
|
||||
max(g2.modularity$modularity)
|
||||
1
.~lock.Schrick-Noah_CS-7863_Homework3_Writeup.odt#
Normal file
1
.~lock.Schrick-Noah_CS-7863_Homework3_Writeup.odt#
Normal file
@ -0,0 +1 @@
|
||||
,noah,NovaArchSys,24.03.2022 03:03,file:///home/noah/.config/libreoffice/4;
|
||||
@ -10,6 +10,7 @@ library(WGCNA)
|
||||
# Set working directory to this source file location (Requires RStudio)
|
||||
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
|
||||
source("self_newman_mod.R")
|
||||
source("mutual_information.R")
|
||||
|
||||
data(karate)
|
||||
data(yeast)
|
||||
@ -17,8 +18,10 @@ data(yeast)
|
||||
g1 <- karate
|
||||
g1.netname <- "Karate"
|
||||
g2 <- yeast
|
||||
g2 <- simplify(g2)
|
||||
g2.netname <- "Yeast"
|
||||
|
||||
|
||||
##################### Part 1: Laplace Spectral Clustering #####################
|
||||
g1.adj <- get.adjacency(g1) # get adjacency
|
||||
g2.adj <- get.adjacency(g2)
|
||||
@ -40,6 +43,7 @@ x1
|
||||
x2
|
||||
x1_clusters <- ifelse(x1>0,1,-1)
|
||||
x2_clusters <- ifelse(x2>0,1,-1)
|
||||
#normalizedMI(x1_clusters)
|
||||
|
||||
# Plotting
|
||||
V(g1)$color <- ifelse(x1_clusters>0,"green","yellow")
|
||||
@ -57,28 +61,49 @@ list(Q=Q,max.lam=max.lam,weights=weights,clusters=clusters) <- newman_mod(g2)
|
||||
|
||||
##################### Part 3: Recursive Newman Modularity #####################
|
||||
# Using igraph
|
||||
karate.modularity <- fastgreedy.community(karate,merges=TRUE, modularity=TRUE, membership=TRUE)
|
||||
g1.modularity <- fastgreedy.community(g1,merges=TRUE, modularity=TRUE, membership=TRUE)
|
||||
#memberships <-community.to.membership(karate, karate.modularity$merges,
|
||||
# steps=which.max(fgreedy$modularity)-1)
|
||||
karate.modularity$membership
|
||||
karate.modularity$merges
|
||||
membership.ids <- unique(karate.modularity$membership)
|
||||
membership.ids
|
||||
cat(paste('Number of detected communities =',length(membership.ids)))
|
||||
g1.modularity$membership
|
||||
g1.modularity$merges
|
||||
g1.membership.ids <- unique(g1.modularity$membership)
|
||||
g1.membership.ids
|
||||
cat(paste('Number of detected communities =',length(g1.membership.ids)))
|
||||
cat("community sizes: ")
|
||||
sapply(membership.ids,function(x) {sum(x==karate.modularity$membership)})
|
||||
sapply(membership.ids,function(x) {sum(x==g1.modularity$membership)})
|
||||
cat("modularity: ")
|
||||
max(karate.modularity$modularity)
|
||||
max(g1.modularity$modularity)
|
||||
#karate.modularity$modularity
|
||||
|
||||
V(karate)$color[karate.modularity$membership==1] <- "green"
|
||||
V(karate)$color[karate.modularity$membership==2] <- "red"
|
||||
V(karate)$color[karate.modularity$membership==3] <- "blue"
|
||||
V(g1)$color[karate.modularity$membership==1] <- "green"
|
||||
V(g1)$color[karate.modularity$membership==2] <- "red"
|
||||
V(g1)$color[karate.modularity$membership==3] <- "blue"
|
||||
|
||||
plot(karate,vertex.size=10,
|
||||
vertex.label=V(karate)$label,vertex.color=V(karate)$color,
|
||||
main=paste("Karate Recursive Newman Modularity"))
|
||||
plot(g1,vertex.size=10,
|
||||
vertex.label=V(karate)$label,vertex.color=V(g1)$color,
|
||||
main=paste(g1.netname, " Recursive Newman Modularity"))
|
||||
|
||||
# Using igraph
|
||||
g2.modularity <- fastgreedy.community(g2,merges=TRUE, modularity=TRUE, membership=TRUE)
|
||||
#memberships <-community.to.membership(karate, karate.modularity$merges,
|
||||
# steps=which.max(fgreedy$modularity)-1)
|
||||
g2.modularity$membership
|
||||
g2.modularity$merges
|
||||
g2.membership.ids <- unique(g2.modularity$membership)
|
||||
g2.membership.ids
|
||||
cat(paste('Number of detected communities =',length(g2.membership.ids)))
|
||||
cat("community sizes: ")
|
||||
sapply(membership.ids,function(x) {sum(x==g2.modularity$membership)})
|
||||
cat("modularity: ")
|
||||
max(g2.modularity$modularity)
|
||||
#karate.modularity$modularity
|
||||
|
||||
V(g2)$color=g2.modularity$membership
|
||||
|
||||
|
||||
plot(g2,vertex.size=10,
|
||||
vertex.label=NA,vertex.color=V(g2)$color,
|
||||
main=paste(g2.netname, " Recursive Newman Modularity"))
|
||||
|
||||
###################### Part 4: TOM and Dynamic Tree Cut ######################
|
||||
# Next smallest eig
|
||||
@ -93,6 +118,12 @@ y2_val <- eigen(g2.Lap)$values[n2-2]
|
||||
lap_dist1 <- dist(cbind(x1,y1))
|
||||
lap_dist2 <- dist(cbind(x2,y2))
|
||||
|
||||
# Matrix dists from TOM
|
||||
g1.mat <- as.matrix(get.adjacency(g1))
|
||||
g2.mat <- as.matrix(get.adjacency(g2))
|
||||
g1.dist <- GTOMdist(g1.mat, degree = 1)
|
||||
g2.dist <- GTOMdist(g2.mat, degree = 1)
|
||||
|
||||
# Use hierarchical clustering of distance matrix
|
||||
lap_tree1 <- hclust(lap_dist1)
|
||||
lap_tree2 <- hclust(lap_dist2)
|
||||
@ -113,9 +144,9 @@ TOMplot(g2.TOM, lap_tree2, main=paste(g2.netname, " Heatmap Plot"))
|
||||
minModuleSize = 2
|
||||
|
||||
# degree = 1
|
||||
dynamicMods1 = cutreeDynamic(dendro = lap_tree1, distM = lap_dist1, deepSplit = 2,
|
||||
dynamicMods1 = cutreeDynamic(dendro = lap_tree1, distM = g1.dist, deepSplit = 2,
|
||||
pamRespectsDendro = FALSE, minClusterSize = minModuleSize)
|
||||
init_mod_sizes <- table(dynamicMods)
|
||||
init_mod_sizes <- table(dynamicMods1)
|
||||
|
||||
# you will have to work through finding and installing labels2colors.
|
||||
dynamicColors = labels2colors(dynamicMods1)
|
||||
@ -125,9 +156,9 @@ plotDendroAndColors(lap_tree1, dynamicColors, "Dynamic Tree Cut", dendroLabels =
|
||||
main = paste(g1.netname, "dendogram and dynamic cut modules, degree = 1"))
|
||||
|
||||
# degree = 1
|
||||
dynamicMods2 = cutreeDynamic(dendro = lap_tree2, distM = lap_dist2, deepSplit = 2,
|
||||
dynamicMods2 = cutreeDynamic(dendro = lap_tree2, distM = g2.dist, deepSplit = 2,
|
||||
pamRespectsDendro = FALSE, minClusterSize = minModuleSize)
|
||||
init_mod_sizes <- table(dynamicMods)
|
||||
init_mod_sizes <- table(dynamicMods2)
|
||||
|
||||
# you will have to work through finding and installing labels2colors.
|
||||
dynamicColors = labels2colors(dynamicMods2)
|
||||
@ -136,7 +167,7 @@ plotDendroAndColors(lap_tree2, dynamicColors, "Dynamic Tree Cut", dendroLabels =
|
||||
hang = 0.03, addGuide = TRUE, guideHang = 0.05,
|
||||
main = paste(g2.netname, "dendogram and dynamic cut modules, degree = 1"))
|
||||
|
||||
################################ Part 5: UMAP ################################
|
||||
############################### Part 5: UMAP ################################
|
||||
g1.dist2 <- 1- as.matrix(g1.adj)
|
||||
g2.dist2 <- 1- as.matrix(g2.adj)
|
||||
|
||||
|
||||
BIN
Schrick-Noah_CS-7863_Homework3_Writeup.odt
Normal file
BIN
Schrick-Noah_CS-7863_Homework3_Writeup.odt
Normal file
Binary file not shown.
BIN
Schrick-Noah_CS-7863_Homework3_Writeup.pdf
Normal file
BIN
Schrick-Noah_CS-7863_Homework3_Writeup.pdf
Normal file
Binary file not shown.
77
mutual_information.R
Normal file
77
mutual_information.R
Normal file
@ -0,0 +1,77 @@
|
||||
entropy <- function(x){
|
||||
n <- length(x)
|
||||
unique_states <- unique(x)
|
||||
# initialize the prob of each state to 0
|
||||
# two elements for p(x=0) and p(x=1)
|
||||
p <- rep(0,length(unique_states))
|
||||
# update two elements of p
|
||||
# curr_state = 0 or 1
|
||||
i <- 1 # R starts counting arrays at 1!
|
||||
for (curr_state in unique_states){
|
||||
p[i] = sum(x==curr_state)/n # prob of a unique state
|
||||
i <- i + 1
|
||||
}
|
||||
H = sum(-p*log2(p))
|
||||
return(H)
|
||||
}
|
||||
|
||||
mutualInformation <- function(X,Y){
|
||||
# X and Y are vectors of states
|
||||
n <- length(X)
|
||||
X.unique <- unique(X) # unique states in X
|
||||
Y.unique <- unique(Y)
|
||||
# creates joint states between X and Y states
|
||||
# This needs to be for all pairs in the X and Y vecs
|
||||
# this is where I messed up
|
||||
# xy.joint <- interaction(X.states,Y.states,sep=":")
|
||||
xy.joint <- interaction(X,Y,sep=":")
|
||||
# initialize mu=0
|
||||
mu <- 0
|
||||
for (x in X.unique){
|
||||
for (y in Y.unique){
|
||||
# sum over all pairs of unique x and y states
|
||||
# for example, 4 pairs
|
||||
px <- sum(x==X)/n # marg prob of state x
|
||||
py <- sum(y==Y)/n # marg prob
|
||||
xy <- paste(x,y,sep=":") # create observed pair val
|
||||
pxy <- sum(xy==xy.joint)/n # joint prob of pair
|
||||
if (pxy>0){
|
||||
mu <- mu + pxy*log2(pxy/(px*py))
|
||||
}
|
||||
}
|
||||
}
|
||||
return(mu)
|
||||
}
|
||||
|
||||
normalizedMI <- function(X,Y){
|
||||
mu <- mutualInformation(X,Y)
|
||||
Hx <- entropy(X)
|
||||
Hy <- entropy(Y)
|
||||
nmi <- 2*mu/(Hx+Hy)
|
||||
return(nmi)
|
||||
}
|
||||
|
||||
# Example X and Y vectors
|
||||
n <- 10 # num of obsvs
|
||||
p0 <- .5 # prob of first state
|
||||
# Roughly 50:50 1's and 0's
|
||||
x1 <- sample(c(0,1), size=n,
|
||||
replace=T, prob=c(p0,1-p0))
|
||||
# c(0,1) -- possible state values could be c("a","b")
|
||||
# size=n -- number of random samples of the state levels
|
||||
# number of observations or length of output
|
||||
# replace = T -- sample with replacement
|
||||
# so don't run out of states when sampling
|
||||
# prob=c(p0,1-p0) -- probability of sampling each state
|
||||
|
||||
y1 <- rep(1,n) # all 1's
|
||||
interaction(x1,y1,sep=":")
|
||||
unique(y1)
|
||||
entropy(x1)
|
||||
|
||||
mutualInformation(x1,y1)
|
||||
mutualInformation(x1,x1)
|
||||
normalizedMI(x1,y1) # close to 0 or 0
|
||||
normalizedMI(x1,x1) # should be 1 because normalized
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user