Final edits and report

2022-03-24 03:04:51 -05:00 · 2022-03-24 03:04:51 -05:00 · 777cbde4fc
commit 777cbde4fc
parent d9c497c154
7 changed files with 552 additions and 19 deletions
--- a/.RData
+++ b/.RData
--- a/.Rhistory
+++ b/.Rhistory
@ -0,0 +1,424 @@
+q()
+source("self_newman_mod.R")
+getwd()_
+getwd()
+setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
+setwd(getSrcDirectory()[1])
+this.dir <- dirname(parent.frame(2)$ofile)
+setwd(getSrcDirectory()[1])
+setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
+source("self_newman_mod.R")
+# Homework 3 for the University of Tulsa' s CS-7863 Network Theory Course
+# Network Clustering
+# Professor: Dr. McKinney, Spring 2022
+# Noah Schrick - 1492657
+# Imports
+library(igraph)
+library(igraphdata)
+library(WGCNA)
+# Set working directory to this source file location (Requires RStudio)
+setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
+source("self_newman_mod.R")
+data(karate)
+data(yeast)
+g1 <- karate
+g1.netname <- "Karate"
+g2 <- yeast
+g2.netname <- "Yeast"
+##################### Part 1: Laplace Spectral Clustering #####################
+g1.adj <- get.adjacency(g1) # get adjacency
+g2.adj <- get.adjacency(g2)
+g1.deg <- rowSums(as.matrix(g1.adj)) # get degrees
+g2.deg <- rowSums(as.matrix(g2.adj))
+g1.Lap <- diag(g1.deg) - g1.adj # L = D-A
+g2.Lap <- diag(g2.deg) - g2.adj
+n1 <- length(V(g1)) # number of nodes
+n2 <- length(V(g2))
+# get eigvals and vecs
+x1 <- eigen(g1.Lap)$vectors[,n1-1]
+x2 <- eigen(g2.Lap)$vectors[,n2-1]
+x1_val <- eigen(g1.Lap)$values[n1-1]
+x2_val <- eigen(g2.Lap)$values[n2-1]
+names(x1) <- names(V(g1))
+names(x2) <- names(V(g2))
+x1
+x2
+x1_clusters <- ifelse(x1>0,1,-1)
+x2_clusters <- ifelse(x2>0,1,-1)
+# Plotting
+V(g1)$color <- ifelse(x1_clusters>0,"green","yellow")
+#V(g1)$size <- 80*abs(x1)
+V(g2)$color <- ifelse(x2_clusters>0,"green","yellow")
+V(g2)$size <- 10
+plot(g1, main=paste(g1.netname, " Laplace Spectral Clustering"))
+plot(g2, main=paste(g2.netname, " Laplace Spectral Clustering"),
+vertex.label=NA)
+########################## Part 2: Newman Modularity ##########################
+list(Q=Q,max.lam=max.lam,weights=weights,clusters=clusters) <- newman_mod(g1)
+list(Q=Q,max.lam=max.lam,weights=weights,clusters=clusters) <- newman_mod(g2)
+##################### Part 3: Recursive Newman Modularity #####################
+# Using igraph
+karate.modularity <- fastgreedy.community(karate,merges=TRUE, modularity=TRUE, membership=TRUE)
+#memberships <-community.to.membership(karate, karate.modularity$merges,
+# steps=which.max(fgreedy$modularity)-1)
+karate.modularity$membership
+karate.modularity$merges
+membership.ids <- unique(karate.modularity$membership)
+membership.ids
+cat(paste('Number of detected communities =',length(membership.ids)))
+cat("community sizes: ")
+sapply(membership.ids,function(x) {sum(x==karate.modularity$membership)})
+cat("modularity: ")
+max(karate.modularity$modularity)
+#karate.modularity$modularity
+V(karate)$color[karate.modularity$membership==1] <- "green"
+V(karate)$color[karate.modularity$membership==2] <- "red"
+V(karate)$color[karate.modularity$membership==3] <- "blue"
+plot(karate,vertex.size=10,
+vertex.label=V(karate)$label,vertex.color=V(karate)$color,
+main=paste("Karate Recursive Newman Modularity"))
+###################### Part 4: TOM and Dynamic Tree Cut ######################
+# Next smallest eig
+y1 <- eigen(g1.Lap)$vectors[,n1-2] # get n-2 column
+names(y1) <- names(V(g1))
+y1_val <- eigen(g1.Lap)$values[n1-2]
+y2 <- eigen(g2.Lap)$vectors[,n2-2] # get n-2 column
+names(y2) <- names(V(g2))
+y2_val <- eigen(g2.Lap)$values[n2-2]
+# Distance in the 2D Laplacian Eig-Space
+lap_dist1 <- dist(cbind(x1,y1))
+lap_dist2 <- dist(cbind(x2,y2))
+# Use hierarchical clustering of distance matrix
+lap_tree1 <- hclust(lap_dist1)
+lap_tree2 <- hclust(lap_dist2)
+# Use WGCNA to dynamically decide how many clusters
+cutreeDynamicTree(lap_tree1)
+cutreeDynamicTree(lap_tree2)
+# TOM
+g1.TOM <- TOMsimilarity(as.matrix(g1.adj))
+g2.TOM <- TOMsimilarity(as.matrix(g2.adj))
+g1.TOM
+g2.TOM
+TOMplot(g1.TOM, lap_tree1, main=paste(g1.netname, " Heatmap Plot"))
+TOMplot(g2.TOM, lap_tree2, main=paste(g2.netname, " Heatmap Plot"))
+# dynamic tree cut
+minModuleSize = 2
+# degree = 1
+dynamicMods1 = cutreeDynamic(dendro = lap_tree1, distM = lap_dist1, deepSplit = 2,
+pamRespectsDendro = FALSE, minClusterSize = minModuleSize)
+init_mod_sizes <- table(dynamicMods)
+# you will have to work through finding and installing labels2colors.
+dynamicColors = labels2colors(dynamicMods1)
+table(dynamicColors)
+plotDendroAndColors(lap_tree1, dynamicColors, "Dynamic Tree Cut", dendroLabels = FALSE,
+hang = 0.03, addGuide = TRUE, guideHang = 0.05,
+main = paste(g1.netname, "dendogram and dynamic cut modules, degree = 1"))
+# degree = 1
+dynamicMods2 = cutreeDynamic(dendro = lap_tree2, distM = lap_dist2, deepSplit = 2,
+pamRespectsDendro = FALSE, minClusterSize = minModuleSize)
+init_mod_sizes <- table(dynamicMods)
+# you will have to work through finding and installing labels2colors.
+dynamicColors = labels2colors(dynamicMods2)
+table(dynamicColors)
+plotDendroAndColors(lap_tree2, dynamicColors, "Dynamic Tree Cut", dendroLabels = FALSE,
+hang = 0.03, addGuide = TRUE, guideHang = 0.05,
+main = paste(g2.netname, "dendogram and dynamic cut modules, degree = 1"))
+################################ Part 5: UMAP ################################
+g1.dist2 <- 1- as.matrix(g1.adj)
+g2.dist2 <- 1- as.matrix(g2.adj)
+# first cluster with hierarchical clustering
+g1.tree2 <- hclust(as.dist(g1.dist2), method = "average")
+g2.tree2 <- hclust(as.dist(g2.dist2), method = "average")
+g1.tree2.2clust <- cutree(g1.tree2,k=2)  # predicted 2-clusters
+g2.tree2.2clust <- cutree(g2.tree2,k=2)  # predicted 2-clusters
+#g1.tree2$labels <- V(g1)[Faction] # true clusters
+#g2.tree2$labels <- V(g2)[Faction] # true clusters
+g1.tree2$labels <- g1.tree2.2clust
+g2.tree2$labels <- g2.tree2.2clust
+# if you want to label tree with predicted clusters
+plot(g1.tree2, main = paste(g1.netname, " HClust"))
+plot(g2.tree2, main = paste(g2.netname, " HClust"))
+# Using igraph
+g2.modularity <- fastgreedy.community(g2,merges=TRUE, modularity=TRUE, membership=TRUE)
+#memberships <-community.to.membership(karate, karate.modularity$merges,
+# steps=which.max(fgreedy$modularity)-1)
+g2.modularity$membership
+g2.modularity$merges
+g2.membership.ids <- unique(g1.modularity$membership)
+g2.membership.ids
+cat(paste('Number of detected communities =',length(g2.membership.ids)))
+cat("community sizes: ")
+sapply(membership.ids,function(x) {sum(x==g1.modularity$membership)})
+cat("modularity: ")
+max(g2.modularity$modularity)
+#karate.modularity$modularity
+# Using igraph
+g2.modularity <- fastgreedy.community(g2,merges=TRUE, modularity=TRUE, membership=TRUE)
+#memberships <-community.to.membership(karate, karate.modularity$merges,
+# steps=which.max(fgreedy$modularity)-1)
+g2.modularity$membership
+g2.modularity$merges
+g2.membership.ids <- unique(g2.modularity$membership)
+g2.membership.ids
+cat(paste('Number of detected communities =',length(g2.membership.ids)))
+cat("community sizes: ")
+sapply(membership.ids,function(x) {sum(x==g1.modularity$membership)})
+cat("modularity: ")
+max(g2.modularity$modularity)
+#karate.modularity$modularity
+# Using igraph
+g2.modularity <- fastgreedy.community(g2,merges=TRUE, modularity=TRUE, membership=TRUE)
+#memberships <-community.to.membership(karate, karate.modularity$merges,
+# steps=which.max(fgreedy$modularity)-1)
+g2.modularity$membership
+g2.modularity$merges
+g2.membership.ids <- unique(g2.modularity$membership)
+cat(paste('Number of detected communities =',length(g2.membership.ids)))
+V(g2)$color=g2.modularity$membership
+plot(g2,vertex.size=10,
+vertex.label=V(g2)$label,vertex.color=V(g2)$color,
+main=paste(g2.netname, " Recursive Newman Modularity"))
+plot(g2,vertex.size=10,
+vertex.label=V(g2)$label,vertex.color=V(g2)$color,
+main=paste(g2.netname, " Recursive Newman Modularity"))
+tmp <- simplify(g2)
+g2.modularity <- fastgreedy.community(g2,merges=TRUE, modularity=TRUE, membership=TRUE)
+#memberships <-community.to.membership(karate, karate.modularity$merges,
+# steps=which.max(fgreedy$modularity)-1)
+g2.modularity$membership
+g2.modularity$merges
+g2.membership.ids <- unique(g2.modularity$membership)
+g2.membership.ids
+cat(paste('Number of detected communities =',length(g2.membership.ids)))
+cat("community sizes: ")
+sapply(membership.ids,function(x) {sum(x==g1.modularity$membership)})
+cat("modularity: ")
+max(g2.modularity$modularity)
+#karate.modularity$modularity
+V(g2)$color=g2.modularity$membership
+plot(g2,vertex.size=10,
+vertex.label=V(g2)$label,vertex.color=V(g2)$color,
+main=paste(g2.netname, " Recursive Newman Modularity"))
+plot(g2,vertex.size=10,
+vertex.label=NA,vertex.color=V(g2)$color,
+main=paste(g2.netname, " Recursive Newman Modularity"))
+plot(g2,vertex.size=10,
+vertex.label=NA,vertex.color=V(g2)$color,
+main=paste(g2.netname, " Recursive Newman Modularity"))
+g2 <- simplify(g2)
+# Using igraph
+g2.modularity <- fastgreedy.community(g2,merges=TRUE, modularity=TRUE, membership=TRUE)
+#memberships <-community.to.membership(karate, karate.modularity$merges,
+# steps=which.max(fgreedy$modularity)-1)
+g2.modularity$membership
+g2.modularity$merges
+g2.membership.ids <- unique(g2.modularity$membership)
+g2.membership.ids
+cat(paste('Number of detected communities =',length(g2.membership.ids)))
+cat("community sizes: ")
+sapply(membership.ids,function(x) {sum(x==g1.modularity$membership)})
+cat("modularity: ")
+max(g2.modularity$modularity)
+#karate.modularity$modularity
+V(g2)$color=g2.modularity$membership
+plot(g2,vertex.size=10,
+vertex.label=NA,vertex.color=V(g2)$color,
+main=paste(g2.netname, " Recursive Newman Modularity"))
+# dynamic tree cut
+minModuleSize = 2
+# degree = 1
+dynamicMods1 = cutreeDynamic(dendro = lap_tree1, distM = lap_dist1, deepSplit = 2,
+pamRespectsDendro = FALSE, minClusterSize = minModuleSize)
+init_mod_sizes <- table(dynamicMods)
+# you will have to work through finding and installing labels2colors.
+dynamicColors = labels2colors(dynamicMods1)
+table(dynamicColors)
+plotDendroAndColors(lap_tree1, dynamicColors, "Dynamic Tree Cut", dendroLabels = FALSE,
+hang = 0.03, addGuide = TRUE, guideHang = 0.05,
+main = paste(g1.netname, "dendogram and dynamic cut modules, degree = 1"))
+install.packages("labels2colors")
+# dynamic tree cut
+minModuleSize = 2
+# degree = 1
+dynamicMods1 = cutreeDynamic(dendro = lap_tree1, distM = lap_dist1, deepSplit = 2,
+pamRespectsDendro = FALSE, minClusterSize = minModuleSize)
+g1.mat <- as.matrix(get.adjacency(g1))
+g2.mat <- as.matrix(get.adjacency(g2))
+g1.dist <- GTOMdist(g1.mat, degree = 1)
+g2.dist <- GTOMdist(g2.mat, degree = 1)
+# dynamic tree cut
+minModuleSize = 2
+# degree = 1
+dynamicMods1 = cutreeDynamic(dendro = lap_tree1, distM = g1.dist, deepSplit = 2,
+pamRespectsDendro = FALSE, minClusterSize = minModuleSize)
+init_mod_sizes <- table(dynamicMods)
+init_mod_sizes <- table(dynamicMods1)
+# you will have to work through finding and installing labels2colors.
+dynamicColors = labels2colors(dynamicMods1)
+table(dynamicColors)
+plotDendroAndColors(lap_tree1, dynamicColors, "Dynamic Tree Cut", dendroLabels = FALSE,
+hang = 0.03, addGuide = TRUE, guideHang = 0.05,
+main = paste(g1.netname, "dendogram and dynamic cut modules, degree = 1"))
+# degree = 1
+dynamicMods2 = cutreeDynamic(dendro = lap_tree2, distM = g2.dist, deepSplit = 2,
+pamRespectsDendro = FALSE, minClusterSize = minModuleSize)
+init_mod_sizes <- table(dynamicMods2)
+# you will have to work through finding and installing labels2colors.
+dynamicColors = labels2colors(dynamicMods2)
+table(dynamicColors)
+plotDendroAndColors(lap_tree2, dynamicColors, "Dynamic Tree Cut", dendroLabels = FALSE,
+hang = 0.03, addGuide = TRUE, guideHang = 0.05,
+main = paste(g2.netname, "dendogram and dynamic cut modules, degree = 1"))
+############################### Part 5: UMAP ################################
+g1.dist2 <- 1- as.matrix(g1.adj)
+g2.dist2 <- 1- as.matrix(g2.adj)
+g1.tree2 <- hclust(as.dist(g1.dist2), method = "average")
+g2.tree2 <- hclust(as.dist(g2.dist2), method = "average")
+g1.tree2.2clust <- cutree(g1.tree2,k=2)  # predicted 2-clusters
+g2.tree2.2clust <- cutree(g2.tree2,k=2)  # predicted 2-clusters
+#g1.tree2$labels <- V(g1)[Faction] # true clusters
+#g2.tree2$labels <- V(g2)[Faction] # true clusters
+g1.tree2$labels <- g1.tree2.2clust
+g2.tree2$labels <- g2.tree2.2clust
+# if you want to label tree with predicted clusters
+plot(g1.tree2, main = paste(g1.netname, " HClust"))
+plot(g2.tree2, main = paste(g2.netname, " HClust"))
+plot(g2.tree2, main = paste(g2.netname, " HClust"))
+g <- g1
+A <- get.adjacency(g) # adj
+m <- ecount(g)
+n <- vcount(g)
+if (is.null(weights)){
+weights <- rep(1,n)
+}
+# Obtain the modularity matrix
+B.node.i <- function(i){degree(g)[i]*degree(g)}
+B.node.all <- sapply(1:n, B.node.i)
+B <- A - (B.node.all/(2*m))
+# NOTE: This is identical to: modularity_matrix(g) ! Can verify with:
+# modularity_matrix(g) == B
+B.eigs <- eigen(B)
+max.lam <- B.eigs$values[1]
+s <- ifelse(B.eigs$vectors[,1]>0,1,-1)
+weights <- B.eigs$vectors[n]/B.eigs$vectors[,1]
+# Plotting
+#V(g)$color <- ifelse(B[1,]>0,"green","yellow")
+V(g)$color <- ifelse(B.eigs$vectors[,1]>0,"green","yellow")
+V(g)$size <- 10
+plot(g, main=paste(g.netname, " Newman Modularity"))
+clust1 = list()
+clust2 = list()
+clusters = list()
+# Make list of clusters
+for(i in 1:n){
+ifelse(V(g)[i]$color=="green",
+clust1 <- append(clust1, V(g)[i]$name),
+clust2 <- append(clust2, V(g)[i]$name))}
+clusters <- list(clust1, clust2)
+Q.node.i <- function(i){sum(
+(((B.eigs$vectors[i])*weights[i]*s)^2)*B.eigs[i]$values)}
+Q <- (1/(4*m))*sapply(1:n, Q.node.i)
+plot(g, main=paste(g.netname, " Newman Modularity"))
+g
+g.netname
+g.netname <- "Karate"
+plot(g, main=paste(g.netname, " Newman Modularity"))
+g <- g2
+A <- get.adjacency(g) # adj
+m <- ecount(g)
+n <- vcount(g)
+if (is.null(weights)){
+weights <- rep(1,n)
+}
+# Obtain the modularity matrix
+B.node.i <- function(i){degree(g)[i]*degree(g)}
+B.node.all <- sapply(1:n, B.node.i)
+B <- A - (B.node.all/(2*m))
+# NOTE: This is identical to: modularity_matrix(g) ! Can verify with:
+# modularity_matrix(g) == B
+B.eigs <- eigen(B)
+max.lam <- B.eigs$values[1]
+s <- ifelse(B.eigs$vectors[,1]>0,1,-1)
+weights <- B.eigs$vectors[n]/B.eigs$vectors[,1]
+# Plotting
+#V(g)$color <- ifelse(B[1,]>0,"green","yellow")
+V(g)$color <- ifelse(B.eigs$vectors[,1]>0,"green","yellow")
+V(g)$size <- 10
+plot(g, main=paste(g.netname, " Newman Modularity"))
+clust1 = list()
+clust2 = list()
+clusters = list()
+# Make list of clusters
+for(i in 1:n){
+ifelse(V(g)[i]$color=="green",
+clust1 <- append(clust1, V(g)[i]$name),
+clust2 <- append(clust2, V(g)[i]$name))}
+clusters <- list(clust1, clust2)
+Q.node.i <- function(i){sum(
+(((B.eigs$vectors[i])*weights[i]*s)^2)*B.eigs[i]$values)}
+Q <- (1/(4*m))*sapply(1:n, Q.node.i)
+plot(g, vertex.label=NA, main=paste(g.netname, " Newman Modularity"))
+plot(g, vertex.label=NA, main=paste(g.netname, " Newman Modularity"))
+g.netname <- Yeast
+g.netname <- "Yeast"
+g <- g2
+A <- get.adjacency(g) # adj
+m <- ecount(g)
+n <- vcount(g)
+if (is.null(weights)){
+weights <- rep(1,n)
+}
+# Obtain the modularity matrix
+B.node.i <- function(i){degree(g)[i]*degree(g)}
+B.node.all <- sapply(1:n, B.node.i)
+B <- A - (B.node.all/(2*m))
+# NOTE: This is identical to: modularity_matrix(g) ! Can verify with:
+# modularity_matrix(g) == B
+B.eigs <- eigen(B)
+max.lam <- B.eigs$values[1]
+s <- ifelse(B.eigs$vectors[,1]>0,1,-1)
+weights <- B.eigs$vectors[n]/B.eigs$vectors[,1]
+# Plotting
+#V(g)$color <- ifelse(B[1,]>0,"green","yellow")
+V(g)$color <- ifelse(B.eigs$vectors[,1]>0,"green","yellow")
+V(g)$size <- 10
+plot(g, vertex.label=NA, main=paste(g.netname, " Newman Modularity"))
+g1
+g1[3]
+normalizedMI(x1_clusters,x2_clusters)
+gc()
+source("mutual_information.R")
+normalizedMI(x1_clusters,x2_clusters)
+x1_clusters
+##################### Part 1: Laplace Spectral Clustering #####################
+g1.adj <- get.adjacency(g1) # get adjacency
+g1.deg <- rowSums(as.matrix(g1.adj)) # get degrees
+g1.Lap <- diag(g1.deg) - g1.adj # L = D-A
+n1 <- length(V(g1)) # number of nodes
+# get eigvals and vecs
+x1 <- eigen(g1.Lap)$vectors[,n1-1]
+x1_val <- eigen(g1.Lap)$values[n1-1]
+names(x1) <- names(V(g1))
+x1
+x1_clusters <- ifelse(x1>0,1,-1)
+normalizedMI(x1_clusters,x1_clusters)
+x1_clusters
+normalizedMI(x1_clusters)
+vcount(g2)
+cat(paste('Number of detected communities =',length(g2.membership.ids)))
+cat("community sizes: ")
+sapply(membership.ids,function(x) {sum(x==g1.modularity$membership)})
+sapply(membership.ids,function(x) {sum(x==g2.modularity$membership)})
+# Using igraph
+g2.modularity <- fastgreedy.community(g2,merges=TRUE, modularity=TRUE, membership=TRUE)
+#memberships <-community.to.membership(karate, karate.modularity$merges,
+# steps=which.max(fgreedy$modularity)-1)
+g2.modularity$membership
+g2.modularity$merges
+g2.membership.ids <- unique(g2.modularity$membership)
+g2.membership.ids
+cat(paste('Number of detected communities =',length(g2.membership.ids)))
+cat("community sizes: ")
+sapply(membership.ids,function(x) {sum(x==g2.modularity$membership)})
+cat("modularity: ")
+max(g2.modularity$modularity)
--- a/.~lock.Schrick-Noah_CS-7863_Homework3_Writeup.odt#
+++ b/.~lock.Schrick-Noah_CS-7863_Homework3_Writeup.odt#
@ -0,0 +1 @@
+,noah,NovaArchSys,24.03.2022 03:03,file:///home/noah/.config/libreoffice/4;
--- a/Schrick-Noah_CS-7863_Homework-3.R
+++ b/Schrick-Noah_CS-7863_Homework-3.R
@ -10,6 +10,7 @@ library(WGCNA)
 # Set working directory to this source file location (Requires RStudio)
 setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
 source("self_newman_mod.R")
+source("mutual_information.R")

 data(karate)
 data(yeast)
@ -17,8 +18,10 @@ data(yeast)
 g1 <- karate
 g1.netname <- "Karate"
 g2 <- yeast
+g2 <- simplify(g2)
 g2.netname <- "Yeast"

+
 ##################### Part 1: Laplace Spectral Clustering #####################
 g1.adj <- get.adjacency(g1) # get adjacency
 g2.adj <- get.adjacency(g2)
@ -40,6 +43,7 @@ x1
 x2
 x1_clusters <- ifelse(x1>0,1,-1)
 x2_clusters <- ifelse(x2>0,1,-1)
+#normalizedMI(x1_clusters)

 # Plotting
 V(g1)$color <- ifelse(x1_clusters>0,"green","yellow")
@ -57,28 +61,49 @@ list(Q=Q,max.lam=max.lam,weights=weights,clusters=clusters) <- newman_mod(g2)

 ##################### Part 3: Recursive Newman Modularity #####################
 # Using igraph
-karate.modularity <- fastgreedy.community(karate,merges=TRUE, modularity=TRUE, membership=TRUE)
+g1.modularity <- fastgreedy.community(g1,merges=TRUE, modularity=TRUE, membership=TRUE)
 #memberships <-community.to.membership(karate, karate.modularity$merges, 
  # steps=which.max(fgreedy$modularity)-1)
-karate.modularity$membership
-karate.modularity$merges
-membership.ids <- unique(karate.modularity$membership)
-membership.ids
-cat(paste('Number of detected communities =',length(membership.ids)))
+g1.modularity$membership
+g1.modularity$merges
+g1.membership.ids <- unique(g1.modularity$membership)
+g1.membership.ids
+cat(paste('Number of detected communities =',length(g1.membership.ids)))
 cat("community sizes: ")
-sapply(membership.ids,function(x) {sum(x==karate.modularity$membership)})
+sapply(membership.ids,function(x) {sum(x==g1.modularity$membership)})
 cat("modularity: ")
-max(karate.modularity$modularity)
+max(g1.modularity$modularity)
 #karate.modularity$modularity

-V(karate)$color[karate.modularity$membership==1] <- "green"
-V(karate)$color[karate.modularity$membership==2] <- "red"
-V(karate)$color[karate.modularity$membership==3] <- "blue"
+V(g1)$color[karate.modularity$membership==1] <- "green"
+V(g1)$color[karate.modularity$membership==2] <- "red"
+V(g1)$color[karate.modularity$membership==3] <- "blue"

-plot(karate,vertex.size=10,
-     vertex.label=V(karate)$label,vertex.color=V(karate)$color,
-     main=paste("Karate Recursive Newman Modularity"))
+plot(g1,vertex.size=10,
+     vertex.label=V(karate)$label,vertex.color=V(g1)$color,
+     main=paste(g1.netname, " Recursive Newman Modularity"))

+# Using igraph
+g2.modularity <- fastgreedy.community(g2,merges=TRUE, modularity=TRUE, membership=TRUE)
+#memberships <-community.to.membership(karate, karate.modularity$merges, 
+# steps=which.max(fgreedy$modularity)-1)
+g2.modularity$membership
+g2.modularity$merges
+g2.membership.ids <- unique(g2.modularity$membership)
+g2.membership.ids
+cat(paste('Number of detected communities =',length(g2.membership.ids)))
+cat("community sizes: ")
+sapply(membership.ids,function(x) {sum(x==g2.modularity$membership)})
+cat("modularity: ")
+max(g2.modularity$modularity)
+#karate.modularity$modularity
+
+V(g2)$color=g2.modularity$membership
+
+
+plot(g2,vertex.size=10,
+     vertex.label=NA,vertex.color=V(g2)$color,
+     main=paste(g2.netname, " Recursive Newman Modularity"))

 ###################### Part 4: TOM and Dynamic Tree Cut ######################
 # Next smallest eig
@ -93,6 +118,12 @@ y2_val <- eigen(g2.Lap)$values[n2-2]
 lap_dist1 <- dist(cbind(x1,y1))
 lap_dist2 <- dist(cbind(x2,y2))

+# Matrix dists from TOM
+g1.mat <- as.matrix(get.adjacency(g1))
+g2.mat <- as.matrix(get.adjacency(g2))
+g1.dist <- GTOMdist(g1.mat, degree = 1)
+g2.dist <- GTOMdist(g2.mat, degree = 1)
+
 # Use hierarchical clustering of distance matrix
 lap_tree1 <- hclust(lap_dist1)
 lap_tree2 <- hclust(lap_dist2)
@ -113,9 +144,9 @@ TOMplot(g2.TOM, lap_tree2, main=paste(g2.netname, " Heatmap Plot"))
 minModuleSize = 2

 # degree = 1
-dynamicMods1 = cutreeDynamic(dendro = lap_tree1, distM = lap_dist1, deepSplit = 2,
+dynamicMods1 = cutreeDynamic(dendro = lap_tree1, distM = g1.dist, deepSplit = 2,
                            pamRespectsDendro = FALSE, minClusterSize = minModuleSize)
-init_mod_sizes <- table(dynamicMods)
+init_mod_sizes <- table(dynamicMods1)

 # you will have to work through finding and installing labels2colors.
 dynamicColors = labels2colors(dynamicMods1)
@ -125,9 +156,9 @@ plotDendroAndColors(lap_tree1, dynamicColors, "Dynamic Tree Cut", dendroLabels =
                    main = paste(g1.netname, "dendogram and dynamic cut modules, degree = 1"))

 # degree = 1
-dynamicMods2 = cutreeDynamic(dendro = lap_tree2, distM = lap_dist2, deepSplit = 2,
+dynamicMods2 = cutreeDynamic(dendro = lap_tree2, distM = g2.dist, deepSplit = 2,
                             pamRespectsDendro = FALSE, minClusterSize = minModuleSize)
-init_mod_sizes <- table(dynamicMods)
+init_mod_sizes <- table(dynamicMods2)

 # you will have to work through finding and installing labels2colors.
 dynamicColors = labels2colors(dynamicMods2)
@ -136,7 +167,7 @@ plotDendroAndColors(lap_tree2, dynamicColors, "Dynamic Tree Cut", dendroLabels =
                    hang = 0.03, addGuide = TRUE, guideHang = 0.05,
                    main = paste(g2.netname, "dendogram and dynamic cut modules, degree = 1"))

-################################ Part 5: UMAP ################################
+############################### Part 5: UMAP ################################
 g1.dist2 <- 1- as.matrix(g1.adj)
 g2.dist2 <- 1- as.matrix(g2.adj)

--- a/Schrick-Noah_CS-7863_Homework3_Writeup.odt
+++ b/Schrick-Noah_CS-7863_Homework3_Writeup.odt
--- a/Schrick-Noah_CS-7863_Homework3_Writeup.pdf
+++ b/Schrick-Noah_CS-7863_Homework3_Writeup.pdf
--- a/mutual_information.R
+++ b/mutual_information.R
@ -0,0 +1,77 @@
+entropy <- function(x){
+  n <- length(x)
+  unique_states <- unique(x)
+  # initialize the prob of each state to 0
+  # two elements for p(x=0) and p(x=1)
+  p <- rep(0,length(unique_states))
+  # update two elements of p
+  # curr_state = 0 or 1
+  i <- 1 # R starts counting arrays at 1!
+  for (curr_state in unique_states){ 
+    p[i] = sum(x==curr_state)/n # prob of a unique state
+    i <- i + 1
+  }
+  H = sum(-p*log2(p))
+  return(H)
+}
+
+mutualInformation <- function(X,Y){
+  # X and Y are vectors of states
+  n <- length(X)
+  X.unique <- unique(X)  # unique states in X
+  Y.unique <- unique(Y)
+  # creates joint states between X and Y states
+  # This needs to be for all pairs in the X and Y vecs
+  #             this is where I messed up
+  # xy.joint <- interaction(X.states,Y.states,sep=":")
+  xy.joint <- interaction(X,Y,sep=":")
+  # initialize mu=0
+  mu <- 0
+  for (x in X.unique){
+    for (y in Y.unique){
+      # sum over all pairs of unique x and y states
+      # for example, 4 pairs
+      px <- sum(x==X)/n  # marg prob of state x
+      py <- sum(y==Y)/n  # marg prob
+      xy <- paste(x,y,sep=":") # create observed pair val
+      pxy <- sum(xy==xy.joint)/n # joint prob of pair
+      if (pxy>0){
+        mu <- mu + pxy*log2(pxy/(px*py))
+      }
+    }
+  }
+  return(mu)
+}
+  
+normalizedMI <- function(X,Y){
+  mu <- mutualInformation(X,Y)
+  Hx <- entropy(X)
+  Hy <- entropy(Y)
+  nmi <- 2*mu/(Hx+Hy)
+  return(nmi)
+}
+
+# Example X and Y vectors
+n <- 10  # num of obsvs
+p0 <- .5 # prob of first state
+# Roughly 50:50 1's and 0's
+x1 <- sample(c(0,1), size=n,  
+             replace=T, prob=c(p0,1-p0))
+# c(0,1) -- possible state values could be c("a","b")
+# size=n -- number of random samples of the state levels
+#           number of observations or length of output
+# replace = T -- sample with replacement
+#                so don't run out of states when sampling
+# prob=c(p0,1-p0) -- probability of sampling each state
+      
+y1 <- rep(1,n)  # all 1's
+interaction(x1,y1,sep=":")
+unique(y1)
+entropy(x1)
+
+mutualInformation(x1,y1)
+mutualInformation(x1,x1)
+normalizedMI(x1,y1)  # close to 0 or 0
+normalizedMI(x1,x1)  # should be 1 because normalized
+
+
				`@ -0,0 +1 @@`
				`,noah,NovaArchSys,24.03.2022 03:03,file:///home/noah/.config/libreoffice/4;`