diff --git a/.RData b/.RData new file mode 100644 index 0000000..36336c6 Binary files /dev/null and b/.RData differ diff --git a/.Rhistory b/.Rhistory new file mode 100644 index 0000000..6e928f6 --- /dev/null +++ b/.Rhistory @@ -0,0 +1,424 @@ +q() +source("self_newman_mod.R") +getwd()_ +getwd() +setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) +setwd(getSrcDirectory()[1]) +this.dir <- dirname(parent.frame(2)$ofile) +setwd(getSrcDirectory()[1]) +setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) +source("self_newman_mod.R") +# Homework 3 for the University of Tulsa' s CS-7863 Network Theory Course +# Network Clustering +# Professor: Dr. McKinney, Spring 2022 +# Noah Schrick - 1492657 +# Imports +library(igraph) +library(igraphdata) +library(WGCNA) +# Set working directory to this source file location (Requires RStudio) +setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) +source("self_newman_mod.R") +data(karate) +data(yeast) +g1 <- karate +g1.netname <- "Karate" +g2 <- yeast +g2.netname <- "Yeast" +##################### Part 1: Laplace Spectral Clustering ##################### +g1.adj <- get.adjacency(g1) # get adjacency +g2.adj <- get.adjacency(g2) +g1.deg <- rowSums(as.matrix(g1.adj)) # get degrees +g2.deg <- rowSums(as.matrix(g2.adj)) +g1.Lap <- diag(g1.deg) - g1.adj # L = D-A +g2.Lap <- diag(g2.deg) - g2.adj +n1 <- length(V(g1)) # number of nodes +n2 <- length(V(g2)) +# get eigvals and vecs +x1 <- eigen(g1.Lap)$vectors[,n1-1] +x2 <- eigen(g2.Lap)$vectors[,n2-1] +x1_val <- eigen(g1.Lap)$values[n1-1] +x2_val <- eigen(g2.Lap)$values[n2-1] +names(x1) <- names(V(g1)) +names(x2) <- names(V(g2)) +x1 +x2 +x1_clusters <- ifelse(x1>0,1,-1) +x2_clusters <- ifelse(x2>0,1,-1) +# Plotting +V(g1)$color <- ifelse(x1_clusters>0,"green","yellow") +#V(g1)$size <- 80*abs(x1) +V(g2)$color <- ifelse(x2_clusters>0,"green","yellow") +V(g2)$size <- 10 +plot(g1, main=paste(g1.netname, " Laplace Spectral Clustering")) +plot(g2, main=paste(g2.netname, " Laplace Spectral Clustering"), +vertex.label=NA) +########################## Part 2: Newman Modularity ########################## +list(Q=Q,max.lam=max.lam,weights=weights,clusters=clusters) <- newman_mod(g1) +list(Q=Q,max.lam=max.lam,weights=weights,clusters=clusters) <- newman_mod(g2) +##################### Part 3: Recursive Newman Modularity ##################### +# Using igraph +karate.modularity <- fastgreedy.community(karate,merges=TRUE, modularity=TRUE, membership=TRUE) +#memberships <-community.to.membership(karate, karate.modularity$merges, +# steps=which.max(fgreedy$modularity)-1) +karate.modularity$membership +karate.modularity$merges +membership.ids <- unique(karate.modularity$membership) +membership.ids +cat(paste('Number of detected communities =',length(membership.ids))) +cat("community sizes: ") +sapply(membership.ids,function(x) {sum(x==karate.modularity$membership)}) +cat("modularity: ") +max(karate.modularity$modularity) +#karate.modularity$modularity +V(karate)$color[karate.modularity$membership==1] <- "green" +V(karate)$color[karate.modularity$membership==2] <- "red" +V(karate)$color[karate.modularity$membership==3] <- "blue" +plot(karate,vertex.size=10, +vertex.label=V(karate)$label,vertex.color=V(karate)$color, +main=paste("Karate Recursive Newman Modularity")) +###################### Part 4: TOM and Dynamic Tree Cut ###################### +# Next smallest eig +y1 <- eigen(g1.Lap)$vectors[,n1-2] # get n-2 column +names(y1) <- names(V(g1)) +y1_val <- eigen(g1.Lap)$values[n1-2] +y2 <- eigen(g2.Lap)$vectors[,n2-2] # get n-2 column +names(y2) <- names(V(g2)) +y2_val <- eigen(g2.Lap)$values[n2-2] +# Distance in the 2D Laplacian Eig-Space +lap_dist1 <- dist(cbind(x1,y1)) +lap_dist2 <- dist(cbind(x2,y2)) +# Use hierarchical clustering of distance matrix +lap_tree1 <- hclust(lap_dist1) +lap_tree2 <- hclust(lap_dist2) +# Use WGCNA to dynamically decide how many clusters +cutreeDynamicTree(lap_tree1) +cutreeDynamicTree(lap_tree2) +# TOM +g1.TOM <- TOMsimilarity(as.matrix(g1.adj)) +g2.TOM <- TOMsimilarity(as.matrix(g2.adj)) +g1.TOM +g2.TOM +TOMplot(g1.TOM, lap_tree1, main=paste(g1.netname, " Heatmap Plot")) +TOMplot(g2.TOM, lap_tree2, main=paste(g2.netname, " Heatmap Plot")) +# dynamic tree cut +minModuleSize = 2 +# degree = 1 +dynamicMods1 = cutreeDynamic(dendro = lap_tree1, distM = lap_dist1, deepSplit = 2, +pamRespectsDendro = FALSE, minClusterSize = minModuleSize) +init_mod_sizes <- table(dynamicMods) +# you will have to work through finding and installing labels2colors. +dynamicColors = labels2colors(dynamicMods1) +table(dynamicColors) +plotDendroAndColors(lap_tree1, dynamicColors, "Dynamic Tree Cut", dendroLabels = FALSE, +hang = 0.03, addGuide = TRUE, guideHang = 0.05, +main = paste(g1.netname, "dendogram and dynamic cut modules, degree = 1")) +# degree = 1 +dynamicMods2 = cutreeDynamic(dendro = lap_tree2, distM = lap_dist2, deepSplit = 2, +pamRespectsDendro = FALSE, minClusterSize = minModuleSize) +init_mod_sizes <- table(dynamicMods) +# you will have to work through finding and installing labels2colors. +dynamicColors = labels2colors(dynamicMods2) +table(dynamicColors) +plotDendroAndColors(lap_tree2, dynamicColors, "Dynamic Tree Cut", dendroLabels = FALSE, +hang = 0.03, addGuide = TRUE, guideHang = 0.05, +main = paste(g2.netname, "dendogram and dynamic cut modules, degree = 1")) +################################ Part 5: UMAP ################################ +g1.dist2 <- 1- as.matrix(g1.adj) +g2.dist2 <- 1- as.matrix(g2.adj) +# first cluster with hierarchical clustering +g1.tree2 <- hclust(as.dist(g1.dist2), method = "average") +g2.tree2 <- hclust(as.dist(g2.dist2), method = "average") +g1.tree2.2clust <- cutree(g1.tree2,k=2) # predicted 2-clusters +g2.tree2.2clust <- cutree(g2.tree2,k=2) # predicted 2-clusters +#g1.tree2$labels <- V(g1)[Faction] # true clusters +#g2.tree2$labels <- V(g2)[Faction] # true clusters +g1.tree2$labels <- g1.tree2.2clust +g2.tree2$labels <- g2.tree2.2clust +# if you want to label tree with predicted clusters +plot(g1.tree2, main = paste(g1.netname, " HClust")) +plot(g2.tree2, main = paste(g2.netname, " HClust")) +# Using igraph +g2.modularity <- fastgreedy.community(g2,merges=TRUE, modularity=TRUE, membership=TRUE) +#memberships <-community.to.membership(karate, karate.modularity$merges, +# steps=which.max(fgreedy$modularity)-1) +g2.modularity$membership +g2.modularity$merges +g2.membership.ids <- unique(g1.modularity$membership) +g2.membership.ids +cat(paste('Number of detected communities =',length(g2.membership.ids))) +cat("community sizes: ") +sapply(membership.ids,function(x) {sum(x==g1.modularity$membership)}) +cat("modularity: ") +max(g2.modularity$modularity) +#karate.modularity$modularity +# Using igraph +g2.modularity <- fastgreedy.community(g2,merges=TRUE, modularity=TRUE, membership=TRUE) +#memberships <-community.to.membership(karate, karate.modularity$merges, +# steps=which.max(fgreedy$modularity)-1) +g2.modularity$membership +g2.modularity$merges +g2.membership.ids <- unique(g2.modularity$membership) +g2.membership.ids +cat(paste('Number of detected communities =',length(g2.membership.ids))) +cat("community sizes: ") +sapply(membership.ids,function(x) {sum(x==g1.modularity$membership)}) +cat("modularity: ") +max(g2.modularity$modularity) +#karate.modularity$modularity +# Using igraph +g2.modularity <- fastgreedy.community(g2,merges=TRUE, modularity=TRUE, membership=TRUE) +#memberships <-community.to.membership(karate, karate.modularity$merges, +# steps=which.max(fgreedy$modularity)-1) +g2.modularity$membership +g2.modularity$merges +g2.membership.ids <- unique(g2.modularity$membership) +cat(paste('Number of detected communities =',length(g2.membership.ids))) +V(g2)$color=g2.modularity$membership +plot(g2,vertex.size=10, +vertex.label=V(g2)$label,vertex.color=V(g2)$color, +main=paste(g2.netname, " Recursive Newman Modularity")) +plot(g2,vertex.size=10, +vertex.label=V(g2)$label,vertex.color=V(g2)$color, +main=paste(g2.netname, " Recursive Newman Modularity")) +tmp <- simplify(g2) +g2.modularity <- fastgreedy.community(g2,merges=TRUE, modularity=TRUE, membership=TRUE) +#memberships <-community.to.membership(karate, karate.modularity$merges, +# steps=which.max(fgreedy$modularity)-1) +g2.modularity$membership +g2.modularity$merges +g2.membership.ids <- unique(g2.modularity$membership) +g2.membership.ids +cat(paste('Number of detected communities =',length(g2.membership.ids))) +cat("community sizes: ") +sapply(membership.ids,function(x) {sum(x==g1.modularity$membership)}) +cat("modularity: ") +max(g2.modularity$modularity) +#karate.modularity$modularity +V(g2)$color=g2.modularity$membership +plot(g2,vertex.size=10, +vertex.label=V(g2)$label,vertex.color=V(g2)$color, +main=paste(g2.netname, " Recursive Newman Modularity")) +plot(g2,vertex.size=10, +vertex.label=NA,vertex.color=V(g2)$color, +main=paste(g2.netname, " Recursive Newman Modularity")) +plot(g2,vertex.size=10, +vertex.label=NA,vertex.color=V(g2)$color, +main=paste(g2.netname, " Recursive Newman Modularity")) +g2 <- simplify(g2) +# Using igraph +g2.modularity <- fastgreedy.community(g2,merges=TRUE, modularity=TRUE, membership=TRUE) +#memberships <-community.to.membership(karate, karate.modularity$merges, +# steps=which.max(fgreedy$modularity)-1) +g2.modularity$membership +g2.modularity$merges +g2.membership.ids <- unique(g2.modularity$membership) +g2.membership.ids +cat(paste('Number of detected communities =',length(g2.membership.ids))) +cat("community sizes: ") +sapply(membership.ids,function(x) {sum(x==g1.modularity$membership)}) +cat("modularity: ") +max(g2.modularity$modularity) +#karate.modularity$modularity +V(g2)$color=g2.modularity$membership +plot(g2,vertex.size=10, +vertex.label=NA,vertex.color=V(g2)$color, +main=paste(g2.netname, " Recursive Newman Modularity")) +# dynamic tree cut +minModuleSize = 2 +# degree = 1 +dynamicMods1 = cutreeDynamic(dendro = lap_tree1, distM = lap_dist1, deepSplit = 2, +pamRespectsDendro = FALSE, minClusterSize = minModuleSize) +init_mod_sizes <- table(dynamicMods) +# you will have to work through finding and installing labels2colors. +dynamicColors = labels2colors(dynamicMods1) +table(dynamicColors) +plotDendroAndColors(lap_tree1, dynamicColors, "Dynamic Tree Cut", dendroLabels = FALSE, +hang = 0.03, addGuide = TRUE, guideHang = 0.05, +main = paste(g1.netname, "dendogram and dynamic cut modules, degree = 1")) +install.packages("labels2colors") +# dynamic tree cut +minModuleSize = 2 +# degree = 1 +dynamicMods1 = cutreeDynamic(dendro = lap_tree1, distM = lap_dist1, deepSplit = 2, +pamRespectsDendro = FALSE, minClusterSize = minModuleSize) +g1.mat <- as.matrix(get.adjacency(g1)) +g2.mat <- as.matrix(get.adjacency(g2)) +g1.dist <- GTOMdist(g1.mat, degree = 1) +g2.dist <- GTOMdist(g2.mat, degree = 1) +# dynamic tree cut +minModuleSize = 2 +# degree = 1 +dynamicMods1 = cutreeDynamic(dendro = lap_tree1, distM = g1.dist, deepSplit = 2, +pamRespectsDendro = FALSE, minClusterSize = minModuleSize) +init_mod_sizes <- table(dynamicMods) +init_mod_sizes <- table(dynamicMods1) +# you will have to work through finding and installing labels2colors. +dynamicColors = labels2colors(dynamicMods1) +table(dynamicColors) +plotDendroAndColors(lap_tree1, dynamicColors, "Dynamic Tree Cut", dendroLabels = FALSE, +hang = 0.03, addGuide = TRUE, guideHang = 0.05, +main = paste(g1.netname, "dendogram and dynamic cut modules, degree = 1")) +# degree = 1 +dynamicMods2 = cutreeDynamic(dendro = lap_tree2, distM = g2.dist, deepSplit = 2, +pamRespectsDendro = FALSE, minClusterSize = minModuleSize) +init_mod_sizes <- table(dynamicMods2) +# you will have to work through finding and installing labels2colors. +dynamicColors = labels2colors(dynamicMods2) +table(dynamicColors) +plotDendroAndColors(lap_tree2, dynamicColors, "Dynamic Tree Cut", dendroLabels = FALSE, +hang = 0.03, addGuide = TRUE, guideHang = 0.05, +main = paste(g2.netname, "dendogram and dynamic cut modules, degree = 1")) +############################### Part 5: UMAP ################################ +g1.dist2 <- 1- as.matrix(g1.adj) +g2.dist2 <- 1- as.matrix(g2.adj) +g1.tree2 <- hclust(as.dist(g1.dist2), method = "average") +g2.tree2 <- hclust(as.dist(g2.dist2), method = "average") +g1.tree2.2clust <- cutree(g1.tree2,k=2) # predicted 2-clusters +g2.tree2.2clust <- cutree(g2.tree2,k=2) # predicted 2-clusters +#g1.tree2$labels <- V(g1)[Faction] # true clusters +#g2.tree2$labels <- V(g2)[Faction] # true clusters +g1.tree2$labels <- g1.tree2.2clust +g2.tree2$labels <- g2.tree2.2clust +# if you want to label tree with predicted clusters +plot(g1.tree2, main = paste(g1.netname, " HClust")) +plot(g2.tree2, main = paste(g2.netname, " HClust")) +plot(g2.tree2, main = paste(g2.netname, " HClust")) +g <- g1 +A <- get.adjacency(g) # adj +m <- ecount(g) +n <- vcount(g) +if (is.null(weights)){ +weights <- rep(1,n) +} +# Obtain the modularity matrix +B.node.i <- function(i){degree(g)[i]*degree(g)} +B.node.all <- sapply(1:n, B.node.i) +B <- A - (B.node.all/(2*m)) +# NOTE: This is identical to: modularity_matrix(g) ! Can verify with: +# modularity_matrix(g) == B +B.eigs <- eigen(B) +max.lam <- B.eigs$values[1] +s <- ifelse(B.eigs$vectors[,1]>0,1,-1) +weights <- B.eigs$vectors[n]/B.eigs$vectors[,1] +# Plotting +#V(g)$color <- ifelse(B[1,]>0,"green","yellow") +V(g)$color <- ifelse(B.eigs$vectors[,1]>0,"green","yellow") +V(g)$size <- 10 +plot(g, main=paste(g.netname, " Newman Modularity")) +clust1 = list() +clust2 = list() +clusters = list() +# Make list of clusters +for(i in 1:n){ +ifelse(V(g)[i]$color=="green", +clust1 <- append(clust1, V(g)[i]$name), +clust2 <- append(clust2, V(g)[i]$name))} +clusters <- list(clust1, clust2) +Q.node.i <- function(i){sum( +(((B.eigs$vectors[i])*weights[i]*s)^2)*B.eigs[i]$values)} +Q <- (1/(4*m))*sapply(1:n, Q.node.i) +plot(g, main=paste(g.netname, " Newman Modularity")) +g +g.netname +g.netname <- "Karate" +plot(g, main=paste(g.netname, " Newman Modularity")) +g <- g2 +A <- get.adjacency(g) # adj +m <- ecount(g) +n <- vcount(g) +if (is.null(weights)){ +weights <- rep(1,n) +} +# Obtain the modularity matrix +B.node.i <- function(i){degree(g)[i]*degree(g)} +B.node.all <- sapply(1:n, B.node.i) +B <- A - (B.node.all/(2*m)) +# NOTE: This is identical to: modularity_matrix(g) ! Can verify with: +# modularity_matrix(g) == B +B.eigs <- eigen(B) +max.lam <- B.eigs$values[1] +s <- ifelse(B.eigs$vectors[,1]>0,1,-1) +weights <- B.eigs$vectors[n]/B.eigs$vectors[,1] +# Plotting +#V(g)$color <- ifelse(B[1,]>0,"green","yellow") +V(g)$color <- ifelse(B.eigs$vectors[,1]>0,"green","yellow") +V(g)$size <- 10 +plot(g, main=paste(g.netname, " Newman Modularity")) +clust1 = list() +clust2 = list() +clusters = list() +# Make list of clusters +for(i in 1:n){ +ifelse(V(g)[i]$color=="green", +clust1 <- append(clust1, V(g)[i]$name), +clust2 <- append(clust2, V(g)[i]$name))} +clusters <- list(clust1, clust2) +Q.node.i <- function(i){sum( +(((B.eigs$vectors[i])*weights[i]*s)^2)*B.eigs[i]$values)} +Q <- (1/(4*m))*sapply(1:n, Q.node.i) +plot(g, vertex.label=NA, main=paste(g.netname, " Newman Modularity")) +plot(g, vertex.label=NA, main=paste(g.netname, " Newman Modularity")) +g.netname <- Yeast +g.netname <- "Yeast" +g <- g2 +A <- get.adjacency(g) # adj +m <- ecount(g) +n <- vcount(g) +if (is.null(weights)){ +weights <- rep(1,n) +} +# Obtain the modularity matrix +B.node.i <- function(i){degree(g)[i]*degree(g)} +B.node.all <- sapply(1:n, B.node.i) +B <- A - (B.node.all/(2*m)) +# NOTE: This is identical to: modularity_matrix(g) ! Can verify with: +# modularity_matrix(g) == B +B.eigs <- eigen(B) +max.lam <- B.eigs$values[1] +s <- ifelse(B.eigs$vectors[,1]>0,1,-1) +weights <- B.eigs$vectors[n]/B.eigs$vectors[,1] +# Plotting +#V(g)$color <- ifelse(B[1,]>0,"green","yellow") +V(g)$color <- ifelse(B.eigs$vectors[,1]>0,"green","yellow") +V(g)$size <- 10 +plot(g, vertex.label=NA, main=paste(g.netname, " Newman Modularity")) +g1 +g1[3] +normalizedMI(x1_clusters,x2_clusters) +gc() +source("mutual_information.R") +normalizedMI(x1_clusters,x2_clusters) +x1_clusters +##################### Part 1: Laplace Spectral Clustering ##################### +g1.adj <- get.adjacency(g1) # get adjacency +g1.deg <- rowSums(as.matrix(g1.adj)) # get degrees +g1.Lap <- diag(g1.deg) - g1.adj # L = D-A +n1 <- length(V(g1)) # number of nodes +# get eigvals and vecs +x1 <- eigen(g1.Lap)$vectors[,n1-1] +x1_val <- eigen(g1.Lap)$values[n1-1] +names(x1) <- names(V(g1)) +x1 +x1_clusters <- ifelse(x1>0,1,-1) +normalizedMI(x1_clusters,x1_clusters) +x1_clusters +normalizedMI(x1_clusters) +vcount(g2) +cat(paste('Number of detected communities =',length(g2.membership.ids))) +cat("community sizes: ") +sapply(membership.ids,function(x) {sum(x==g1.modularity$membership)}) +sapply(membership.ids,function(x) {sum(x==g2.modularity$membership)}) +# Using igraph +g2.modularity <- fastgreedy.community(g2,merges=TRUE, modularity=TRUE, membership=TRUE) +#memberships <-community.to.membership(karate, karate.modularity$merges, +# steps=which.max(fgreedy$modularity)-1) +g2.modularity$membership +g2.modularity$merges +g2.membership.ids <- unique(g2.modularity$membership) +g2.membership.ids +cat(paste('Number of detected communities =',length(g2.membership.ids))) +cat("community sizes: ") +sapply(membership.ids,function(x) {sum(x==g2.modularity$membership)}) +cat("modularity: ") +max(g2.modularity$modularity) diff --git a/.~lock.Schrick-Noah_CS-7863_Homework3_Writeup.odt# b/.~lock.Schrick-Noah_CS-7863_Homework3_Writeup.odt# new file mode 100644 index 0000000..b0224b1 --- /dev/null +++ b/.~lock.Schrick-Noah_CS-7863_Homework3_Writeup.odt# @@ -0,0 +1 @@ +,noah,NovaArchSys,24.03.2022 03:03,file:///home/noah/.config/libreoffice/4; \ No newline at end of file diff --git a/Schrick-Noah_CS-7863_Homework-3.R b/Schrick-Noah_CS-7863_Homework-3.R index 73f79ec..fc4a9b3 100644 --- a/Schrick-Noah_CS-7863_Homework-3.R +++ b/Schrick-Noah_CS-7863_Homework-3.R @@ -10,6 +10,7 @@ library(WGCNA) # Set working directory to this source file location (Requires RStudio) setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) source("self_newman_mod.R") +source("mutual_information.R") data(karate) data(yeast) @@ -17,8 +18,10 @@ data(yeast) g1 <- karate g1.netname <- "Karate" g2 <- yeast +g2 <- simplify(g2) g2.netname <- "Yeast" + ##################### Part 1: Laplace Spectral Clustering ##################### g1.adj <- get.adjacency(g1) # get adjacency g2.adj <- get.adjacency(g2) @@ -40,6 +43,7 @@ x1 x2 x1_clusters <- ifelse(x1>0,1,-1) x2_clusters <- ifelse(x2>0,1,-1) +#normalizedMI(x1_clusters) # Plotting V(g1)$color <- ifelse(x1_clusters>0,"green","yellow") @@ -57,28 +61,49 @@ list(Q=Q,max.lam=max.lam,weights=weights,clusters=clusters) <- newman_mod(g2) ##################### Part 3: Recursive Newman Modularity ##################### # Using igraph -karate.modularity <- fastgreedy.community(karate,merges=TRUE, modularity=TRUE, membership=TRUE) +g1.modularity <- fastgreedy.community(g1,merges=TRUE, modularity=TRUE, membership=TRUE) #memberships <-community.to.membership(karate, karate.modularity$merges, # steps=which.max(fgreedy$modularity)-1) -karate.modularity$membership -karate.modularity$merges -membership.ids <- unique(karate.modularity$membership) -membership.ids -cat(paste('Number of detected communities =',length(membership.ids))) +g1.modularity$membership +g1.modularity$merges +g1.membership.ids <- unique(g1.modularity$membership) +g1.membership.ids +cat(paste('Number of detected communities =',length(g1.membership.ids))) cat("community sizes: ") -sapply(membership.ids,function(x) {sum(x==karate.modularity$membership)}) +sapply(membership.ids,function(x) {sum(x==g1.modularity$membership)}) cat("modularity: ") -max(karate.modularity$modularity) +max(g1.modularity$modularity) #karate.modularity$modularity -V(karate)$color[karate.modularity$membership==1] <- "green" -V(karate)$color[karate.modularity$membership==2] <- "red" -V(karate)$color[karate.modularity$membership==3] <- "blue" +V(g1)$color[karate.modularity$membership==1] <- "green" +V(g1)$color[karate.modularity$membership==2] <- "red" +V(g1)$color[karate.modularity$membership==3] <- "blue" -plot(karate,vertex.size=10, - vertex.label=V(karate)$label,vertex.color=V(karate)$color, - main=paste("Karate Recursive Newman Modularity")) +plot(g1,vertex.size=10, + vertex.label=V(karate)$label,vertex.color=V(g1)$color, + main=paste(g1.netname, " Recursive Newman Modularity")) +# Using igraph +g2.modularity <- fastgreedy.community(g2,merges=TRUE, modularity=TRUE, membership=TRUE) +#memberships <-community.to.membership(karate, karate.modularity$merges, +# steps=which.max(fgreedy$modularity)-1) +g2.modularity$membership +g2.modularity$merges +g2.membership.ids <- unique(g2.modularity$membership) +g2.membership.ids +cat(paste('Number of detected communities =',length(g2.membership.ids))) +cat("community sizes: ") +sapply(membership.ids,function(x) {sum(x==g2.modularity$membership)}) +cat("modularity: ") +max(g2.modularity$modularity) +#karate.modularity$modularity + +V(g2)$color=g2.modularity$membership + + +plot(g2,vertex.size=10, + vertex.label=NA,vertex.color=V(g2)$color, + main=paste(g2.netname, " Recursive Newman Modularity")) ###################### Part 4: TOM and Dynamic Tree Cut ###################### # Next smallest eig @@ -93,6 +118,12 @@ y2_val <- eigen(g2.Lap)$values[n2-2] lap_dist1 <- dist(cbind(x1,y1)) lap_dist2 <- dist(cbind(x2,y2)) +# Matrix dists from TOM +g1.mat <- as.matrix(get.adjacency(g1)) +g2.mat <- as.matrix(get.adjacency(g2)) +g1.dist <- GTOMdist(g1.mat, degree = 1) +g2.dist <- GTOMdist(g2.mat, degree = 1) + # Use hierarchical clustering of distance matrix lap_tree1 <- hclust(lap_dist1) lap_tree2 <- hclust(lap_dist2) @@ -113,9 +144,9 @@ TOMplot(g2.TOM, lap_tree2, main=paste(g2.netname, " Heatmap Plot")) minModuleSize = 2 # degree = 1 -dynamicMods1 = cutreeDynamic(dendro = lap_tree1, distM = lap_dist1, deepSplit = 2, +dynamicMods1 = cutreeDynamic(dendro = lap_tree1, distM = g1.dist, deepSplit = 2, pamRespectsDendro = FALSE, minClusterSize = minModuleSize) -init_mod_sizes <- table(dynamicMods) +init_mod_sizes <- table(dynamicMods1) # you will have to work through finding and installing labels2colors. dynamicColors = labels2colors(dynamicMods1) @@ -125,9 +156,9 @@ plotDendroAndColors(lap_tree1, dynamicColors, "Dynamic Tree Cut", dendroLabels = main = paste(g1.netname, "dendogram and dynamic cut modules, degree = 1")) # degree = 1 -dynamicMods2 = cutreeDynamic(dendro = lap_tree2, distM = lap_dist2, deepSplit = 2, +dynamicMods2 = cutreeDynamic(dendro = lap_tree2, distM = g2.dist, deepSplit = 2, pamRespectsDendro = FALSE, minClusterSize = minModuleSize) -init_mod_sizes <- table(dynamicMods) +init_mod_sizes <- table(dynamicMods2) # you will have to work through finding and installing labels2colors. dynamicColors = labels2colors(dynamicMods2) @@ -136,7 +167,7 @@ plotDendroAndColors(lap_tree2, dynamicColors, "Dynamic Tree Cut", dendroLabels = hang = 0.03, addGuide = TRUE, guideHang = 0.05, main = paste(g2.netname, "dendogram and dynamic cut modules, degree = 1")) -################################ Part 5: UMAP ################################ +############################### Part 5: UMAP ################################ g1.dist2 <- 1- as.matrix(g1.adj) g2.dist2 <- 1- as.matrix(g2.adj) diff --git a/Schrick-Noah_CS-7863_Homework3_Writeup.odt b/Schrick-Noah_CS-7863_Homework3_Writeup.odt new file mode 100644 index 0000000..2a561e3 Binary files /dev/null and b/Schrick-Noah_CS-7863_Homework3_Writeup.odt differ diff --git a/Schrick-Noah_CS-7863_Homework3_Writeup.pdf b/Schrick-Noah_CS-7863_Homework3_Writeup.pdf new file mode 100644 index 0000000..0570074 Binary files /dev/null and b/Schrick-Noah_CS-7863_Homework3_Writeup.pdf differ diff --git a/mutual_information.R b/mutual_information.R new file mode 100644 index 0000000..b377cca --- /dev/null +++ b/mutual_information.R @@ -0,0 +1,77 @@ +entropy <- function(x){ + n <- length(x) + unique_states <- unique(x) + # initialize the prob of each state to 0 + # two elements for p(x=0) and p(x=1) + p <- rep(0,length(unique_states)) + # update two elements of p + # curr_state = 0 or 1 + i <- 1 # R starts counting arrays at 1! + for (curr_state in unique_states){ + p[i] = sum(x==curr_state)/n # prob of a unique state + i <- i + 1 + } + H = sum(-p*log2(p)) + return(H) +} + +mutualInformation <- function(X,Y){ + # X and Y are vectors of states + n <- length(X) + X.unique <- unique(X) # unique states in X + Y.unique <- unique(Y) + # creates joint states between X and Y states + # This needs to be for all pairs in the X and Y vecs + # this is where I messed up + # xy.joint <- interaction(X.states,Y.states,sep=":") + xy.joint <- interaction(X,Y,sep=":") + # initialize mu=0 + mu <- 0 + for (x in X.unique){ + for (y in Y.unique){ + # sum over all pairs of unique x and y states + # for example, 4 pairs + px <- sum(x==X)/n # marg prob of state x + py <- sum(y==Y)/n # marg prob + xy <- paste(x,y,sep=":") # create observed pair val + pxy <- sum(xy==xy.joint)/n # joint prob of pair + if (pxy>0){ + mu <- mu + pxy*log2(pxy/(px*py)) + } + } + } + return(mu) +} + +normalizedMI <- function(X,Y){ + mu <- mutualInformation(X,Y) + Hx <- entropy(X) + Hy <- entropy(Y) + nmi <- 2*mu/(Hx+Hy) + return(nmi) +} + +# Example X and Y vectors +n <- 10 # num of obsvs +p0 <- .5 # prob of first state +# Roughly 50:50 1's and 0's +x1 <- sample(c(0,1), size=n, + replace=T, prob=c(p0,1-p0)) +# c(0,1) -- possible state values could be c("a","b") +# size=n -- number of random samples of the state levels +# number of observations or length of output +# replace = T -- sample with replacement +# so don't run out of states when sampling +# prob=c(p0,1-p0) -- probability of sampling each state + +y1 <- rep(1,n) # all 1's +interaction(x1,y1,sep=":") +unique(y1) +entropy(x1) + +mutualInformation(x1,y1) +mutualInformation(x1,x1) +normalizedMI(x1,y1) # close to 0 or 0 +normalizedMI(x1,x1) # should be 1 because normalized + +