# Final Project for the University of Tulsa's CS-7863 Network Theory Course # Compliance Graph Analysis # Professor: Dr. McKinney, Spring 2022 # Noah L. Schrick - 1492657 library(igraph) library(centiserve) library(RBGL) library(DirectedClustering) ################## Read in the previously generated networks ################## setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) source("./CG_Files/manual_import.R") car <- import_networks(1) car.netname <- "Vehicle Maintenance" hipaa <- import_networks(2) hipaa.netname <- "HIPAA Compliance" pci <- import_networks(3) pci.netname <- "PCI Compliance" # Get basic network attributes car.adj <- get.adjacency(car) car.deg <- rowSums(as.matrix(car.adj)) # degree car.n <- length(V(car)) hipaa.adj <- get.adjacency(hipaa) hipaa.deg <- rowSums(as.matrix(hipaa.adj)) # degree hipaa.n <- length(V(hipaa)) pci.adj <- get.adjacency(pci) pci.deg <- rowSums(as.matrix(pci.adj)) # degree pci.n <- length(V(pci)) ############################# Base Centralities ############################# source("centralities.R") base_centralities <- matrix(list(), nrow=3, ncol=5) rownames(base_centralities) <- c(car.netname, hipaa.netname, pci.netname) colnames(base_centralities) <- c("Degree", "Katz", "Page Rank", "K-path", "Betweenness") ### Degree base_centralities[[1,1]] <- car.deg %>% sort(decreasing = T) base_centralities[[2,1]] <- hipaa.deg %>% sort(decreasing = T) base_centralities[[3,1]] <- pci.deg %>% sort(decreasing = T) #### Katz car.katz <- katz.cent(car) nodes <- car.katz %>% order(decreasing=T) nodes <- head(nodes, 15)-1 vals <- car.katz %>% sort(decreasing=T) vals <- head(vals, 15) base_centralities[[1,2]] <- car.katz[rowSums(apply(car.katz,2,is.nan))==0,] %>% sort(decreasing = T) base_centralities[[2,2]] <- katz.cent(hipaa) %>% sort(decreasing = T) hipaa.katz <- katz.cent(hipaa) nodes <- hipaa.katz %>% order(decreasing=T) nodes <- head(nodes, 15)-1 vals <- hipaa.katz %>% sort(decreasing=T) vals <- head(vals, 15) base_centralities[[3,2]] <- katz.cent(pci) %>% sort(decreasing = T) pci.katz <- katz.cent(pci) nodes <- pci.katz %>% order(decreasing=T) nodes <- head(nodes, 15)-1 vals <- pci.katz %>% sort(decreasing=T) vals <- head(vals, 15) ### Page Rank base_centralities[[1,3]] <- page.rank(car)$vector %>% sort(decreasing = T) base_centralities[[2,3]] <- page.rank(hipaa)$vector %>% sort(decreasing = T) base_centralities[[3,3]] <- page.rank(pci)$vector %>% sort(decreasing = T) ### K-path base_centralities[[1,4]] <- geokpath(car, V(car), "out") %>% sort(decreasing = T) base_centralities[[2,4]] <- geokpath(hipaa, V(hipaa), "out") %>% sort(decreasing = T) base_centralities[[3,4]] <- geokpath(pci, V(pci), "out") %>% sort(decreasing = T) ### Betweenness base_centralities[[1,5]] <- betweenness(car, TRUE) %>% sort(decreasing = T) base_centralities[[2,5]] <- betweenness(hipaa, TRUE) %>% sort(decreasing = T) base_centralities[[3,5]] <- betweenness(pci, TRUE) %>% sort(decreasing = T) ############################### Base Clustering ############################### source("self_newman_mod.R") base_clusters <- matrix(list(), nrow=3, ncol=2) rownames(base_centralities) <- c(car.netname, hipaa.netname, pci.netname) colnames(base_centralities) <- c("Laplace", "CG") ### Laplacian car.Lap <- diag(car.deg) - car.adj # L = D-A hipaa.Lap <- diag(hipaa.deg) - hipaa.adj pci.Lap <- diag(pci.deg) - pci.adj # get eigvals and vecs car.eigs <- Re(eigen(car.Lap)$vectors[,car.n-1]) car.eig_val <- eigen(car.Lap)$values[car.n-1] names(car.eigs) <- names(V(car)) car.l_clusters <- ifelse(car.eigs>0,1,-1) base_clusters[[1,1]] <- car.l_clusters V(car)$color <- ifelse(car.l_clusters>0, "green", "yellow") plot(car, main=paste(car.netname, "Laplace Spectral Clustering"), vertex.label=NA) hipaa.eigs <- Re(eigen(hipaa.Lap)$vectors[,hipaa.n-1]) hipaa.eig_val <- eigen(hipaa.Lap)$values[hipaa.n-1] names(hipaa.eigs) <- names(V(hipaa)) hipaa.l_clusters <- ifelse(hipaa.eigs>0,1,-1) base_clusters[[2,1]] <- hipaa.l_clusters V(hipaa)$color <- ifelse(hipaa.l_clusters>0, "green", "yellow") plot(hipaa, main=paste(hipaa.netname, "Laplace Spectral Clustering"), vertex.label=NA) pci.eigs <- Re(eigen(pci.Lap)$vectors[,pci.n-1]) pci.eig_val <- eigen(pci.Lap)$values[pci.n-1] names(pci.eigs) <- names(V(pci)) pci.l_clusters <- ifelse(pci.eigs>0,1,-1) base_clusters[[3,1]] <- pci.l_clusters V(pci)$color <- ifelse(pci.l_clusters>0, "green", "yellow") plot(pci, main=paste(pci.netname, "Laplace Spectral Clustering"), vertex.label=NA) ### Clemente and Grassi base_clusters[[1,2]] <- ClustBCG(as.matrix(car.adj), "directed")$totalCC base_clusters[[2,2]] <- ClustBCG(as.matrix(hipaa.adj), "directed")$totalCC base_clusters[[3,2]] <- ClustBCG(as.matrix(pci.adj), "directed")$totalCC ################################ Misc Analysis ################################ min_cut(car,"0", "2490") min_cut(hipaa,"0","2320") min_cut(pci,"0","60") max_flow(car, "0", "2490") max_flow(hipaa,"0","2320") max_flow(pci,"0","60") ### Transitive closure car.graph <- as_graphnel(car) hipaa.graph <- as_graphnel(hipaa) pci.graph <- as_graphnel(pci) car.tc <- graph_from_graphnel(transitive.closure(car.graph)) hipaa.tc <- graph_from_graphnel(transitive.closure(hipaa.graph)) pci.tc <- graph_from_graphnel(transitive.closure(pci.graph)) ### Edge connectivty edge_connectivity(car, "0", "2490") edge_connectivity(hipaa,"0","2320") edge_connectivity(pci,"0","60") # Dominator Tree car.dtree <- dominator_tree(car, "0", "out")$domtree hipaa.dtree <- dominator_tree(hipaa, "0", "out")$domtree pci.dtree <- dominator_tree(pci, "0", "out")$domtree ###################### Transitive Closure Centralities ###################### tc_centralities <- matrix(list(), nrow=3, ncol=5) rownames(tc_centralities) <- c(car.netname, hipaa.netname, pci.netname) colnames(tc_centralities) <- c("Degree", "Katz", "Page Rank", "K-path", "Betweenness") # Get basic network attributes car.tc.adj <- get.adjacency(car.tc) car.tc.deg <- rowSums(as.matrix(car.tc.adj)) # degree car.tc.n <- length(V(car.tc)) hipaa.tc.adj <- get.adjacency(hipaa.tc) hipaa.tc.deg <- rowSums(as.matrix(hipaa.tc.adj)) # degree hipaa.tc.n <- length(V(hipaa.tc)) pci.tc.adj <- get.adjacency(pci.tc) pci.tc.deg <- rowSums(as.matrix(pci.tc.adj)) # degree pci.tc.n <- length(V(pci.tc)) ### Degree tc_centralities[[1,1]] <- car.tc.deg %>% sort(decreasing = T) tc_centralities[[2,1]] <- hipaa.tc.deg %>% sort(decreasing = T) tc_centralities[[3,1]] <- pci.tc.deg %>% sort(decreasing = T) #### Katz car.tc.katz <- katz.cent(car.tc) tc_centralities[[1,2]] <- car.tc.katz[rowSums(apply(car.tc.katz,2,is.nan))==0,] %>% sort(decreasing = T) car.tc.katz <- katz.cent(car.tc) nodes <- car.tc.katz %>% order(decreasing=T) nodes <- head(nodes, 15)-1 vals <- car.tc.katz %>% sort(decreasing=T) vals <- head(vals, 15) tc_centralities[[2,2]] <- katz.cent(hipaa.tc) %>% sort(decreasing = T) hipaa.tc.katz <- katz.cent(hipaa.tc) nodes <- hipaa.tc.katz %>% order(decreasing=T) nodes <- head(nodes, 15)-1 vals <- hipaa.tc.katz %>% sort(decreasing=T) vals <- head(vals, 15) tc_centralities[[3,2]] <- katz.cent(pci.tc) %>% sort(decreasing = T) pci.tc.katz <- katz.cent(pci.tc) nodes <- pci.tc.katz %>% order(decreasing=T) nodes <- head(nodes, 15)-1 vals <- pci.tc.katz %>% sort(decreasing=T) vals <- head(vals, 15) ### Page Rank tc_centralities[[1,3]] <- page.rank(car.tc)$vector %>% sort(decreasing = T) tc_centralities[[2,3]] <- page.rank(hipaa.tc)$vector %>% sort(decreasing = T) tc_centralities[[3,3]] <- page.rank(pci.tc)$vector %>% sort(decreasing = T) ### K-path tc_centralities[[1,4]] <- geokpath(car.tc, V(car.tc), "out") %>% sort(decreasing = T) tc_centralities[[2,4]] <- geokpath(hipaa.tc, V(hipaa.tc), "out") %>% sort(decreasing = T) tc_centralities[[3,4]] <- geokpath(pci.tc, V(pci.tc), "out") %>% sort(decreasing = T) ### Betweenness tc_centralities[[1,5]] <- betweenness(car.tc, TRUE) %>% sort(decreasing = T) tc_centralities[[2,5]] <- betweenness(hipaa.tc, TRUE) %>% sort(decreasing = T) tc_centralities[[3,5]] <- betweenness(pci.tc, TRUE) %>% sort(decreasing = T) ######################## Transitive Closure Clustering ######################## source("self_newman_mod.R") tc_clusters <- matrix(list(), nrow=3, ncol=2) rownames(tc_centralities) <- c(car.netname, hipaa.netname, pci.netname) colnames(tc_centralities) <- c("Laplace", "CG") ### Laplacian car.tc.Lap <- diag(car.tc.deg) - car.tc.adj # L = D-A hipaa.tc.Lap <- diag(hipaa.tc.deg) - hipaa.tc.adj pci.tc.Lap <- diag(pci.tc.deg) - pci.tc.adj # get eigvals and vecs car.tc.eigs <- Re(eigen(car.tc.Lap)$vectors[,car.tc.n-1]) car.tc.eig_val <- eigen(car.tc.Lap)$values[car.tc.n-1] names(car.tc.eigs) <- names(V(car.tc)) car.tc.l_clusters <- ifelse(car.tc.eigs>0,1,-1) tc_clusters[[1,1]] <- car.tc.l_clusters V(car.tc)$color <- ifelse(car.tc.l_clusters>0, "green", "yellow") plot(car.tc, main=paste(car.tc.netname, "Laplace Spectral Clustering"), vertex.label=NA) hipaa.tc.eigs <- Re(eigen(hipaa.tc.Lap)$vectors[,hipaa.tc.n-1]) hipaa.tc.eig_val <- eigen(hipaa.tc.Lap)$values[hipaa.tc.n-1] names(hipaa.tc.eigs) <- names(V(hipaa.tc)) hipaa.tc.l_clusters <- ifelse(hipaa.tc.eigs>0,1,-1) tc_clusters[[2,1]] <- hipaa.tc.l_clusters V(hipaa.tc)$color <- ifelse(hipaa.tc.l_clusters>0, "green", "yellow") plot(hipaa.tc, main=paste(hipaa.tc.netname, "Laplace Spectral Clustering"), vertex.label=NA) pci.tc.eigs <- Re(eigen(pci.tc.Lap)$vectors[,pci.tc.n-1]) pci.tc.eig_val <- eigen(pci.tc.Lap)$values[pci.tc.n-1] names(pci.tc.eigs) <- names(V(pci.tc)) pci.tc.l_clusters <- ifelse(pci.tc.eigs>0,1,-1) tc_clusters[[3,1]] <- pci.tc.l_clusters V(pci.tc)$color <- ifelse(pci.tc.l_clusters>0, "green", "yellow") plot(pci.tc, main=paste(pci.tc.netname, "Laplace Spectral Clustering"), vertex.label=NA) ### Clemente and Grassi tc_clusters[[1,2]] <- ClustBCG(as.matrix(car.tc.adj), "directed")$totalCC tc_clusters[[2,2]] <- ClustBCG(as.matrix(hipaa.tc.adj), "directed")$totalCC tc_clusters[[3,2]] <- ClustBCG(as.matrix(pci.tc.adj), "directed")$totalCC ######################### Dominant Tree Centralities ######################### dtree_centralities <- matrix(list(), nrow=3, ncol=5) rownames(dtree_centralities) <- c(car.netname, hipaa.netname, pci.netname) colnames(dtree_centralities) <- c("Degree", "Katz", "Page Rank", "K-path", "Betweenness") # Get basic network attributes car.dtree.adj <- get.adjacency(car.dtree) car.dtree.deg <- rowSums(as.matrix(car.dtree.adj)) # degree car.dtree.n <- length(V(car.dtree)) hipaa.dtree.adj <- get.adjacency(hipaa.dtree) hipaa.dtree.deg <- rowSums(as.matrix(hipaa.dtree.adj)) # degree hipaa.dtree.n <- length(V(hipaa.dtree)) pci.dtree.adj <- get.adjacency(pci.dtree) pci.dtree.deg <- rowSums(as.matrix(pci.dtree.adj)) # degree pci.dtree.n <- length(V(pci.dtree)) ### Degree dtree_centralities[[1,1]] <- car.dtree.deg %>% sort(decreasing = T) dtree_centralities[[2,1]] <- hipaa.dtree.deg %>% sort(decreasing = T) dtree_centralities[[3,1]] <- pci.dtree.deg %>% sort(decreasing = T) #### Katz car.dtree.katz <- katz.cent(car.dtree) dtree_centralities[[1,2]] <- car.dtree.katz[rowSums(apply(car.dtree.katz,2,is.nan))==0,] %>% sort(decreasing = T) car.dtree.katz <- katz.cent(car.dtree) nodes <- car.dtree.katz %>% order(decreasing=T) nodes <- head(nodes, 15)-1 vals <- car.dtree.katz %>% sort(decreasing=T) vals <- head(vals, 15) dtree_centralities[[2,2]] <- katz.cent(hipaa.dtree) %>% sort(decreasing = T) hipaa.dtree.katz <- katz.cent(hipaa.dtree) nodes <- hipaa.dtree.katz %>% order(decreasing=T) nodes <- head(nodes, 15)-1 vals <- hipaa.dtree.katz %>% sort(decreasing=T) vals <- head(vals, 15) dtree_centralities[[3,2]] <- katz.cent(pci.dtree) %>% sort(decreasing = T) pci.dtree.katz <- katz.cent(pci.dtree) nodes <- pci.dtree.katz %>% order(decreasing=T) nodes <- head(nodes, 15)-1 vals <- pci.dtree.katz %>% sort(decreasing=T) vals <- head(vals, 15) ### Page Rank dtree_centralities[[1,3]] <- page.rank(car.dtree)$vector %>% sort(decreasing = T) dtree_centralities[[2,3]] <- page.rank(hipaa.dtree)$vector %>% sort(decreasing = T) dtree_centralities[[3,3]] <- page.rank(pci.dtree)$vector %>% sort(decreasing = T) ### K-path dtree_centralities[[1,4]] <- geokpath(car.dtree, V(car.dtree), "out") %>% sort(decreasing = T) dtree_centralities[[2,4]] <- geokpath(hipaa.dtree, V(hipaa.dtree), "out") %>% sort(decreasing = T) dtree_centralities[[3,4]] <- geokpath(pci.dtree, V(pci.dtree), "out") %>% sort(decreasing = T) ### Betweenness dtree_centralities[[1,5]] <- betweenness(car.dtree, TRUE) %>% sort(decreasing = T) dtree_centralities[[2,5]] <- betweenness(hipaa.dtree, TRUE) %>% sort(decreasing = T) dtree_centralities[[3,5]] <- betweenness(pci.dtree, TRUE) %>% sort(decreasing = T) ########################## Dominant Tree Clustering ########################## source("self_newman_mod.R") dtree_clusters <- matrix(list(), nrow=3, ncol=2) rownames(dtree_centralities) <- c(car.netname, hipaa.netname, pci.netname) colnames(dtree_centralities) <- c("Laplace", "CG") ### Laplacian car.dtree.Lap <- diag(car.dtree.deg) - car.dtree.adj # L = D-A hipaa.dtree.Lap <- diag(hipaa.dtree.deg) - hipaa.dtree.adj pci.dtree.Lap <- diag(pci.dtree.deg) - pci.dtree.adj # get eigvals and vecs car.dtree.eigs <- Re(eigen(car.dtree.Lap)$vectors[,car.dtree.n-1]) car.dtree.eig_val <- eigen(car.dtree.Lap)$values[car.dtree.n-1] names(car.dtree.eigs) <- names(V(car.dtree)) car.dtree.l_clusters <- ifelse(car.dtree.eigs>0,1,-1) dtree_clusters[[1,1]] <- car.dtree.l_clusters V(car.dtree)$color <- ifelse(car.dtree.l_clusters>0, "green", "yellow") plot(car.dtree, main=paste(car.dtree.netname, "Laplace Spectral Clustering"), vertex.label=NA) hipaa.dtree.eigs <- Re(eigen(hipaa.dtree.Lap)$vectors[,hipaa.dtree.n-1]) hipaa.dtree.eig_val <- eigen(hipaa.dtree.Lap)$values[hipaa.dtree.n-1] names(hipaa.dtree.eigs) <- names(V(hipaa.dtree)) hipaa.dtree.l_clusters <- ifelse(hipaa.dtree.eigs>0,1,-1) dtree_clusters[[2,1]] <- hipaa.dtree.l_clusters V(hipaa.dtree)$color <- ifelse(hipaa.dtree.l_clusters>0, "green", "yellow") plot(hipaa.dtree, main=paste(hipaa.dtree.netname, "Laplace Spectral Clustering"), vertex.label=NA) pci.dtree.eigs <- Re(eigen(pci.dtree.Lap)$vectors[,pci.dtree.n-1]) pci.dtree.eig_val <- eigen(pci.dtree.Lap)$values[pci.dtree.n-1] names(pci.dtree.eigs) <- names(V(pci.dtree)) pci.dtree.l_clusters <- ifelse(pci.dtree.eigs>0,1,-1) dtree_clusters[[3,1]] <- pci.dtree.l_clusters V(pci.dtree)$color <- ifelse(pci.dtree.l_clusters>0, "green", "yellow") plot(pci.dtree, main=paste(pci.dtree.netname, "Laplace Spectral Clustering"), vertex.label=NA) ### Clemente and Grassi dtree_clusters[[1,2]] <- ClustBCG(as.matrix(car.dtree.adj), "directed")$totalCC dtree_clusters[[2,2]] <- ClustBCG(as.matrix(hipaa.dtree.adj), "directed")$totalCC dtree_clusters[[3,2]] <- ClustBCG(as.matrix(pci.dtree.adj), "directed")$totalCC ############################# Write Final Results ############################# write.table(base_centralities, file='results.csv') write.table(tc_centralities, file='results.csv') write.table(dtree_centralities, file='results.csv') ### Degree: head(base_centralities[[1,1]], 15) #Car head(tc_centralities[[1,1]],15) head(dtree_centralities[[1,1]],15) head(base_centralities[[2,1]], 15) #HIPAA head(tc_centralities[[2,1]],15) head(dtree_centralities[[2,1]],15) head(base_centralities[[3,1]], 15) #PCI head(tc_centralities[[3,1]],15) head(dtree_centralities[[3,1]],15) ### Katz: head(base_centralities[[1,2]], 15) #Car head(tc_centralities[[1,2]],15) head(dtree_centralities[[1,2]],15) head(base_centralities[[2,2]], 15) #HIPAA head(tc_centralities[[2,2]],15) head(dtree_centralities[[2,2]],15) head(base_centralities[[3,2]], 15) #PCI head(tc_centralities[[3,2]],15) head(dtree_centralities[[3,2]],15) ### Page Rank: head(base_centralities[[1,3]], 15) #Car head(tc_centralities[[1,3]],15) head(dtree_centralities[[1,3]],15) head(base_centralities[[2,3]], 15) #HIPAA head(tc_centralities[[2,3]],15) head(dtree_centralities[[2,3]],15) head(base_centralities[[3,3]], 15) #PCI head(tc_centralities[[3,3]],15) head(dtree_centralities[[3,3]],15) ### K-Path: head(base_centralities[[1,4]], 15) #Car head(tc_centralities[[1,4]],15) head(dtree_centralities[[1,4]],15) head(base_centralities[[2,4]], 15) #HIPAA head(tc_centralities[[2,4]],15) head(dtree_centralities[[2,4]],15) head(base_centralities[[3,4]], 15) #PCI head(tc_centralities[[3,4]],15) head(dtree_centralities[[3,4]],15) ### Betweenness: head(base_centralities[[1,5]], 15) #Car head(tc_centralities[[1,5]],15) head(dtree_centralities[[1,5]],15) head(base_centralities[[2,5]], 15) #HIPAA head(tc_centralities[[2,5]],15) head(dtree_centralities[[2,5]],15) head(base_centralities[[3,5]], 15) #PCI head(tc_centralities[[3,5]],15) head(dtree_centralities[[3,5]],15) ### Laplacian: head(base_clusters[[1,1]], 15) #Car head(tc_centralities[[1,1]],15) head(dtree_centralities[[1,1]],15) head(base_clusters[[2,1]], 15) #HIPAA head(tc_centralities[[2,1]],15) head(dtree_centralities[[2,1]],15) head(base_clusters[[3,1]], 15) #PCI head(tc_centralities[[3,1]],15) head(dtree_centralities[[3,1]],15) ### CG: head(base_clusters[[1,2]], 15) #Car head(tc_centralities[[1,2]],15) head(dtree_centralities[[1,2]],15) head(base_clusters[[2,2]], 15) #HIPAA head(tc_centralities[[2,2]],15) head(dtree_centralities[[2,2]],15) head(base_clusters[[3,2]], 15) #PCI head(tc_centralities[[3,2]],15) head(dtree_centralities[[3,2]],15)