From cf3cef21b66646c38ad255599403c6dd25ecce8a Mon Sep 17 00:00:00 2001 From: noah Date: Fri, 14 Apr 2023 17:16:46 -0500 Subject: [PATCH] Adding WIP analysis code --- code/Schrick-Noah_CG-Analysis.R | 626 ++++++++++++++++++++++++++++++++ 1 file changed, 626 insertions(+) create mode 100644 code/Schrick-Noah_CG-Analysis.R diff --git a/code/Schrick-Noah_CG-Analysis.R b/code/Schrick-Noah_CG-Analysis.R new file mode 100644 index 0000000..e927ad0 --- /dev/null +++ b/code/Schrick-Noah_CG-Analysis.R @@ -0,0 +1,626 @@ +# Final Project for the University of Tulsa's CS-7863 Network Theory Course +# Compliance Graph Analysis +# Professor: Dr. McKinney, Spring 2022 +# Noah L. Schrick - 1492657 + +library(igraph) +library(centiserve) +if (!require("BiocManager", quietly = TRUE)) + install.packages("BiocManager") +BiocManager::install("RBGL") +library(RBGL) +library(DirectedClustering) + +################## Read in the previously generated networks ################## +setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) +source("./CG_Files/manual_import.R") + +car <- import_networks(1) +car.netname <- "Vehicle Maintenance" +hipaa <- import_networks(2) +hipaa.netname <- "HIPAA Compliance" +pci <- import_networks(3) +pci.netname <- "PCI Compliance" + +# Get basic network attributes +car.adj <- get.adjacency(car) +car.deg <- rowSums(as.matrix(car.adj)) # degree +car.n <- length(V(car)) + +hipaa.adj <- get.adjacency(hipaa) +hipaa.deg <- rowSums(as.matrix(hipaa.adj)) # degree +hipaa.n <- length(V(hipaa)) + +pci.adj <- get.adjacency(pci) +pci.deg <- rowSums(as.matrix(pci.adj)) # degree +pci.n <- length(V(pci)) + +car_plots <- matrix(list(), nrow=3, ncol=5) +rownames(car_plots) <- c("Base", "Transitive Closure", "Dominant Tree") +colnames(car_plots) <- c("Degree", "Katz", "Page Rank", "K-path", "Betweenness") + +############################# Base Centralities ############################# +source("centralities.R") +base_centralities <- matrix(list(), nrow=3, ncol=5) +rownames(base_centralities) <- c(car.netname, hipaa.netname, pci.netname) +colnames(base_centralities) <- c("Degree", "Katz", "Page Rank", "K-path", "Betweenness") + +### Degree +base_centralities[[1,1]] <- car.deg %>% sort(decreasing = T) +base_centralities[[2,1]] <- hipaa.deg %>% sort(decreasing = T) +base_centralities[[3,1]] <- pci.deg %>% sort(decreasing = T) + +car_deg_Fn <- ( ecdf( car.deg )) +car_plots[[1,1]] <- car_deg_Fn(car.deg)*100 + +#### Katz +car.katz <- katz.cent(car) +nodes <- car.katz %>% order(decreasing=T) +nodes <- head(nodes, 15)-1 +vals <- car.katz %>% sort(decreasing=T) +vals <- head(vals, 15) +base_centralities[[1,2]] <- car.katz[rowSums(apply(car.katz,2,is.nan))==0,] %>% sort(decreasing = T) + +car_katz_Fn <- ( ecdf(as.numeric(car.katz ))) +car_plots[[1,2]] <- car_katz_Fn(as.numeric(car.katz))*100 + +base_centralities[[2,2]] <- katz.cent(hipaa) %>% sort(decreasing = T) +hipaa.katz <- katz.cent(hipaa) +nodes <- hipaa.katz %>% order(decreasing=T) +nodes <- head(nodes, 15)-1 +vals <- hipaa.katz %>% sort(decreasing=T) +vals <- head(vals, 15) + +base_centralities[[3,2]] <- katz.cent(pci) %>% sort(decreasing = T) +pci.katz <- katz.cent(pci) +nodes <- pci.katz %>% order(decreasing=T) +nodes <- head(nodes, 15)-1 +vals <- pci.katz %>% sort(decreasing=T) +vals <- head(vals, 15) + +### Page Rank +base_centralities[[1,3]] <- page.rank(car)$vector %>% sort(decreasing = T) +base_centralities[[2,3]] <- page.rank(hipaa)$vector %>% sort(decreasing = T) +base_centralities[[3,3]] <- page.rank(pci)$vector %>% sort(decreasing = T) + +car_pr_Fn <- ( ecdf( page.rank(car)$vector )) +car_plots[[1,3]] <- car_pr_Fn(page.rank(car)$vector)*100 + +### K-path +base_centralities[[1,4]] <- geokpath(car, V(car), "out") %>% sort(decreasing = T) +base_centralities[[2,4]] <- geokpath(hipaa, V(hipaa), "out") %>% sort(decreasing = T) +base_centralities[[3,4]] <- geokpath(pci, V(pci), "out") %>% sort(decreasing = T) + +car_kp_Fn <- (ecdf(geokpath(car, V(car), "out"))) +car_plots[[1,4]] <- car_kp_Fn(geokpath(car, V(car), "out"))*100 + +### Betweenness +base_centralities[[1,5]] <- betweenness(car, TRUE) %>% sort(decreasing = T) +base_centralities[[2,5]] <- betweenness(hipaa, TRUE) %>% sort(decreasing = T) +base_centralities[[3,5]] <- betweenness(pci, TRUE) %>% sort(decreasing = T) + +car_btw_Fn <- (ecdf(betweenness(car, TRUE))) +car_plots[[1,5]] <- car_btw_Fn(betweenness(car, TRUE))*100 + +############################### Base Clustering ############################### +#source("self_newman_mod.R") +#base_clusters <- matrix(list(), nrow=3, ncol=2) +#rownames(base_centralities) <- c(car.netname, hipaa.netname, pci.netname) +#colnames(base_centralities) <- c("Laplace", "CG") + +### Laplacian +#car.Lap <- diag(car.deg) - car.adj # L = D-A +#hipaa.Lap <- diag(hipaa.deg) - hipaa.adj +#pci.Lap <- diag(pci.deg) - pci.adj + +# get eigvals and vecs +#car.eigs <- Re(eigen(car.Lap)$vectors[,car.n-1]) +#car.eig_val <- eigen(car.Lap)$values[car.n-1] +#names(car.eigs) <- names(V(car)) +#car.l_clusters <- ifelse(car.eigs>0,1,-1) +#base_clusters[[1,1]] <- car.l_clusters +#V(car)$color <- ifelse(car.l_clusters>0, "green", "yellow") +#plot(car, main=paste(car.netname, "Laplace Spectral Clustering"), vertex.label=NA) + +# hipaa.eigs <- Re(eigen(hipaa.Lap)$vectors[,hipaa.n-1]) +# hipaa.eig_val <- eigen(hipaa.Lap)$values[hipaa.n-1] +# names(hipaa.eigs) <- names(V(hipaa)) +# hipaa.l_clusters <- ifelse(hipaa.eigs>0,1,-1) +# base_clusters[[2,1]] <- hipaa.l_clusters +# V(hipaa)$color <- ifelse(hipaa.l_clusters>0, "green", "yellow") +# plot(hipaa, main=paste(hipaa.netname, "Laplace Spectral Clustering"), vertex.label=NA) +# +# pci.eigs <- Re(eigen(pci.Lap)$vectors[,pci.n-1]) +# pci.eig_val <- eigen(pci.Lap)$values[pci.n-1] +# names(pci.eigs) <- names(V(pci)) +# pci.l_clusters <- ifelse(pci.eigs>0,1,-1) +# base_clusters[[3,1]] <- pci.l_clusters +# V(pci)$color <- ifelse(pci.l_clusters>0, "green", "yellow") +# plot(pci, main=paste(pci.netname, "Laplace Spectral Clustering"), vertex.label=NA) +# +# ### Clemente and Grassi +# base_clusters[[1,2]] <- ClustBCG(as.matrix(car.adj), "directed")$totalCC +# base_clusters[[2,2]] <- ClustBCG(as.matrix(hipaa.adj), "directed")$totalCC +# base_clusters[[3,2]] <- ClustBCG(as.matrix(pci.adj), "directed")$totalCC + +################################ Misc Analysis ################################ +# min_cut(car,"0", "2490") +# min_cut(hipaa,"0","2320") +# min_cut(pci,"0","60") +# +# max_flow(car, "0", "2490") +# max_flow(hipaa,"0","2320") +# max_flow(pci,"0","60") + +### Transitive closure +car.graph <- as_graphnel(car) +hipaa.graph <- as_graphnel(hipaa) +pci.graph <- as_graphnel(pci) + +car.tc <- graph_from_graphnel(transitive.closure(car.graph)) +hipaa.tc <- graph_from_graphnel(transitive.closure(hipaa.graph)) +pci.tc <- graph_from_graphnel(transitive.closure(pci.graph)) + +### Edge connectivty +edge_connectivity(car, "0", "2490") +edge_connectivity(hipaa,"0","2320") +edge_connectivity(pci,"0","60") + +# Dominator Tree +car.dtree <- dominator_tree(car, "0", "out")$domtree +hipaa.dtree <- dominator_tree(hipaa, "0", "out")$domtree +pci.dtree <- dominator_tree(pci, "0", "out")$domtree + +###################### Transitive Closure Centralities ###################### +tc_centralities <- matrix(list(), nrow=3, ncol=5) +rownames(tc_centralities) <- c(car.netname, hipaa.netname, pci.netname) +colnames(tc_centralities) <- c("Degree", "Katz", "Page Rank", "K-path", "Betweenness") + +# Get basic network attributes +car.tc.adj <- get.adjacency(car.tc) +car.tc.deg <- rowSums(as.matrix(car.tc.adj)) # degree +car.tc.n <- length(V(car.tc)) + +hipaa.tc.adj <- get.adjacency(hipaa.tc) +hipaa.tc.deg <- rowSums(as.matrix(hipaa.tc.adj)) # degree +hipaa.tc.n <- length(V(hipaa.tc)) + +pci.tc.adj <- get.adjacency(pci.tc) +pci.tc.deg <- rowSums(as.matrix(pci.tc.adj)) # degree +pci.tc.n <- length(V(pci.tc)) + +### Degree +tc_centralities[[1,1]] <- car.tc.deg %>% sort(decreasing = T) +tc_centralities[[2,1]] <- hipaa.tc.deg %>% sort(decreasing = T) +tc_centralities[[3,1]] <- pci.tc.deg %>% sort(decreasing = T) + +car_tc_deg_Fn <- ( ecdf( car.tc.deg )) +car_plots[[2,1]] <- car_tc_deg_Fn(car.tc.deg)*100 + +#### Katz +car.tc.katz <- katz.cent(car.tc) +tc_centralities[[1,2]] <- car.tc.katz[rowSums(apply(car.tc.katz,2,is.nan))==0,] %>% sort(decreasing = T) +car.tc.katz <- katz.cent(car.tc) +nodes <- car.tc.katz %>% order(decreasing=T) +nodes <- head(nodes, 15)-1 +vals <- car.tc.katz %>% sort(decreasing=T) +vals <- head(vals, 15) + +tc_centralities[[2,2]] <- katz.cent(hipaa.tc) %>% sort(decreasing = T) +hipaa.tc.katz <- katz.cent(hipaa.tc) +nodes <- hipaa.tc.katz %>% order(decreasing=T) +nodes <- head(nodes, 15)-1 +vals <- hipaa.tc.katz %>% sort(decreasing=T) +vals <- head(vals, 15) + +car_tc_katz_Fn <- ( ecdf(as.numeric(car.tc.katz ))) +car_plots[[2,2]] <- car_tc_katz_Fn(as.numeric(car.tc.katz))*100 + +tc_centralities[[3,2]] <- katz.cent(pci.tc) %>% sort(decreasing = T) +pci.tc.katz <- katz.cent(pci.tc) +nodes <- pci.tc.katz %>% order(decreasing=T) +nodes <- head(nodes, 15)-1 +vals <- pci.tc.katz %>% sort(decreasing=T) +vals <- head(vals, 15) + +### Page Rank +tc_centralities[[1,3]] <- page.rank(car.tc)$vector %>% sort(decreasing = T) +tc_centralities[[2,3]] <- page.rank(hipaa.tc)$vector %>% sort(decreasing = T) +tc_centralities[[3,3]] <- page.rank(pci.tc)$vector %>% sort(decreasing = T) + +car_tc_pr_Fn <- ( ecdf( page.rank(car.tc)$vector )) +car_plots[[2,3]] <- car_tc_pr_Fn(page.rank(car.tc)$vector)*100 + +### K-path +tc_centralities[[1,4]] <- geokpath(car.tc, V(car.tc), "out") %>% sort(decreasing = T) +tc_centralities[[2,4]] <- geokpath(hipaa.tc, V(hipaa.tc), "out") %>% sort(decreasing = T) +tc_centralities[[3,4]] <- geokpath(pci.tc, V(pci.tc), "out") %>% sort(decreasing = T) + +car_tc_kp_Fn <- (ecdf(geokpath(car.tc, V(car.tc), "out"))) +car_plots[[2,4]] <- car_tc_kp_Fn(geokpath(car.tc, V(car.tc), "out"))*100 + +### Betweenness +tc_centralities[[1,5]] <- betweenness(car.tc, TRUE) %>% sort(decreasing = T) +tc_centralities[[2,5]] <- betweenness(hipaa.tc, TRUE) %>% sort(decreasing = T) +tc_centralities[[3,5]] <- betweenness(pci.tc, TRUE) %>% sort(decreasing = T) + +car_tc_btw_Fn <- (ecdf(betweenness(car.tc, TRUE))) +car_plots[[2,5]] <- car_tc_btw_Fn(betweenness(car.tc, TRUE))*100 + +######################## Transitive Closure Clustering ######################## +# source("self_newman_mod.R") +# tc_clusters <- matrix(list(), nrow=3, ncol=2) +# rownames(tc_centralities) <- c(car.netname, hipaa.netname, pci.netname) +# colnames(tc_centralities) <- c("Laplace", "CG") +# +# ### Laplacian +# car.tc.Lap <- diag(car.tc.deg) - car.tc.adj # L = D-A +# hipaa.tc.Lap <- diag(hipaa.tc.deg) - hipaa.tc.adj +# pci.tc.Lap <- diag(pci.tc.deg) - pci.tc.adj +# +# # get eigvals and vecs +# car.tc.eigs <- Re(eigen(car.tc.Lap)$vectors[,car.tc.n-1]) +# car.tc.eig_val <- eigen(car.tc.Lap)$values[car.tc.n-1] +# names(car.tc.eigs) <- names(V(car.tc)) +# car.tc.l_clusters <- ifelse(car.tc.eigs>0,1,-1) +# tc_clusters[[1,1]] <- car.tc.l_clusters +# V(car.tc)$color <- ifelse(car.tc.l_clusters>0, "green", "yellow") +# plot(car.tc, main=paste(car.tc.netname, "Laplace Spectral Clustering"), vertex.label=NA) +# +# hipaa.tc.eigs <- Re(eigen(hipaa.tc.Lap)$vectors[,hipaa.tc.n-1]) +# hipaa.tc.eig_val <- eigen(hipaa.tc.Lap)$values[hipaa.tc.n-1] +# names(hipaa.tc.eigs) <- names(V(hipaa.tc)) +# hipaa.tc.l_clusters <- ifelse(hipaa.tc.eigs>0,1,-1) +# tc_clusters[[2,1]] <- hipaa.tc.l_clusters +# V(hipaa.tc)$color <- ifelse(hipaa.tc.l_clusters>0, "green", "yellow") +# plot(hipaa.tc, main=paste(hipaa.tc.netname, "Laplace Spectral Clustering"), vertex.label=NA) +# +# pci.tc.eigs <- Re(eigen(pci.tc.Lap)$vectors[,pci.tc.n-1]) +# pci.tc.eig_val <- eigen(pci.tc.Lap)$values[pci.tc.n-1] +# names(pci.tc.eigs) <- names(V(pci.tc)) +# pci.tc.l_clusters <- ifelse(pci.tc.eigs>0,1,-1) +# tc_clusters[[3,1]] <- pci.tc.l_clusters +# V(pci.tc)$color <- ifelse(pci.tc.l_clusters>0, "green", "yellow") +# plot(pci.tc, main=paste(pci.tc.netname, "Laplace Spectral Clustering"), vertex.label=NA) +# +# ### Clemente and Grassi +# tc_clusters[[1,2]] <- ClustBCG(as.matrix(car.tc.adj), "directed")$totalCC +# tc_clusters[[2,2]] <- ClustBCG(as.matrix(hipaa.tc.adj), "directed")$totalCC +# tc_clusters[[3,2]] <- ClustBCG(as.matrix(pci.tc.adj), "directed")$totalCC + +######################### Dominant Tree Centralities ######################### +dtree_centralities <- matrix(list(), nrow=3, ncol=5) +rownames(dtree_centralities) <- c(car.netname, hipaa.netname, pci.netname) +colnames(dtree_centralities) <- c("Degree", "Katz", "Page Rank", "K-path", "Betweenness") + +# Get basic network attributes +car.dtree.adj <- get.adjacency(car.dtree) +car.dtree.deg <- rowSums(as.matrix(car.dtree.adj)) # degree +car.dtree.n <- length(V(car.dtree)) + +hipaa.dtree.adj <- get.adjacency(hipaa.dtree) +hipaa.dtree.deg <- rowSums(as.matrix(hipaa.dtree.adj)) # degree +hipaa.dtree.n <- length(V(hipaa.dtree)) + +pci.dtree.adj <- get.adjacency(pci.dtree) +pci.dtree.deg <- rowSums(as.matrix(pci.dtree.adj)) # degree +pci.dtree.n <- length(V(pci.dtree)) + +### Degree +dtree_centralities[[1,1]] <- car.dtree.deg %>% sort(decreasing = T) +dtree_centralities[[2,1]] <- hipaa.dtree.deg %>% sort(decreasing = T) +dtree_centralities[[3,1]] <- pci.dtree.deg %>% sort(decreasing = T) + +car_dt_deg_Fn <- ( ecdf( car.dtree.deg )) +car_plots[[3,1]] <- car_dt_deg_Fn(car.dtree.deg)*100 + +#### Katz +car.dtree.katz <- katz.cent(car.dtree) +dtree_centralities[[1,2]] <- car.dtree.katz[rowSums(apply(car.dtree.katz,2,is.nan))==0,] %>% sort(decreasing = T) +car.dtree.katz <- katz.cent(car.dtree) +nodes <- car.dtree.katz %>% order(decreasing=T) +nodes <- head(nodes, 15)-1 +vals <- car.dtree.katz %>% sort(decreasing=T) +vals <- head(vals, 15) + +dtree_centralities[[2,2]] <- katz.cent(hipaa.dtree) %>% sort(decreasing = T) +hipaa.dtree.katz <- katz.cent(hipaa.dtree) +nodes <- hipaa.dtree.katz %>% order(decreasing=T) +nodes <- head(nodes, 15)-1 +vals <- hipaa.dtree.katz %>% sort(decreasing=T) +vals <- head(vals, 15) + + +dtree_centralities[[3,2]] <- katz.cent(pci.dtree) %>% sort(decreasing = T) +pci.dtree.katz <- katz.cent(pci.dtree) +nodes <- pci.dtree.katz %>% order(decreasing=T) +nodes <- head(nodes, 15)-1 +vals <- pci.dtree.katz %>% sort(decreasing=T) +vals <- head(vals, 15) + +car_dt_katz_Fn <- ( ecdf(as.numeric(car.dtree.katz ))) +car_plots[[3,2]] <- car_dt_katz_Fn(as.numeric(car.dtree.katz))*100 + +### Page Rank +dtree_centralities[[1,3]] <- page.rank(car.dtree)$vector %>% sort(decreasing = T) +dtree_centralities[[2,3]] <- page.rank(hipaa.dtree)$vector %>% sort(decreasing = T) +dtree_centralities[[3,3]] <- page.rank(pci.dtree)$vector %>% sort(decreasing = T) + +car_dt_pr_Fn <- ( ecdf( page.rank(car.dtree)$vector )) +car_plots[[3,3]] <- car_dt_pr_Fn(page.rank(car.dtree)$vector)*100 + +### K-path +dtree_centralities[[1,4]] <- geokpath(car.dtree, V(car.dtree), "out") %>% sort(decreasing = T) +dtree_centralities[[2,4]] <- geokpath(hipaa.dtree, V(hipaa.dtree), "out") %>% sort(decreasing = T) +dtree_centralities[[3,4]] <- geokpath(pci.dtree, V(pci.dtree), "out") %>% sort(decreasing = T) + +car_dt_kp_Fn <- (ecdf(geokpath(car.dtree, V(car.dtree), "out"))) +car_plots[[3,4]] <- car_tc_kp_Fn(geokpath(car.dtree, V(car.dtree), "out"))*100 + +### Betweenness +dtree_centralities[[1,5]] <- betweenness(car.dtree, TRUE) %>% sort(decreasing = T) +dtree_centralities[[2,5]] <- betweenness(hipaa.dtree, TRUE) %>% sort(decreasing = T) +dtree_centralities[[3,5]] <- betweenness(pci.dtree, TRUE) %>% sort(decreasing = T) + +car_dt_btw_Fn <- (ecdf(betweenness(car.dtree, TRUE))) +car_plots[[3,5]] <- car_dt_btw_Fn(betweenness(car.dtree, TRUE))*100 + +########################## Dominant Tree Clustering ########################## +# source("self_newman_mod.R") +# dtree_clusters <- matrix(list(), nrow=3, ncol=2) +# rownames(dtree_centralities) <- c(car.netname, hipaa.netname, pci.netname) +# colnames(dtree_centralities) <- c("Laplace", "CG") +# +# ### Laplacian +# car.dtree.Lap <- diag(car.dtree.deg) - car.dtree.adj # L = D-A +# hipaa.dtree.Lap <- diag(hipaa.dtree.deg) - hipaa.dtree.adj +# pci.dtree.Lap <- diag(pci.dtree.deg) - pci.dtree.adj +# +# # get eigvals and vecs +# car.dtree.eigs <- Re(eigen(car.dtree.Lap)$vectors[,car.dtree.n-1]) +# car.dtree.eig_val <- eigen(car.dtree.Lap)$values[car.dtree.n-1] +# names(car.dtree.eigs) <- names(V(car.dtree)) +# car.dtree.l_clusters <- ifelse(car.dtree.eigs>0,1,-1) +# dtree_clusters[[1,1]] <- car.dtree.l_clusters +# V(car.dtree)$color <- ifelse(car.dtree.l_clusters>0, "green", "yellow") +# plot(car.dtree, main=paste(car.dtree.netname, "Laplace Spectral Clustering"), vertex.label=NA) +# +# hipaa.dtree.eigs <- Re(eigen(hipaa.dtree.Lap)$vectors[,hipaa.dtree.n-1]) +# hipaa.dtree.eig_val <- eigen(hipaa.dtree.Lap)$values[hipaa.dtree.n-1] +# names(hipaa.dtree.eigs) <- names(V(hipaa.dtree)) +# hipaa.dtree.l_clusters <- ifelse(hipaa.dtree.eigs>0,1,-1) +# dtree_clusters[[2,1]] <- hipaa.dtree.l_clusters +# V(hipaa.dtree)$color <- ifelse(hipaa.dtree.l_clusters>0, "green", "yellow") +# plot(hipaa.dtree, main=paste(hipaa.dtree.netname, "Laplace Spectral Clustering"), vertex.label=NA) +# +# pci.dtree.eigs <- Re(eigen(pci.dtree.Lap)$vectors[,pci.dtree.n-1]) +# pci.dtree.eig_val <- eigen(pci.dtree.Lap)$values[pci.dtree.n-1] +# names(pci.dtree.eigs) <- names(V(pci.dtree)) +# pci.dtree.l_clusters <- ifelse(pci.dtree.eigs>0,1,-1) +# dtree_clusters[[3,1]] <- pci.dtree.l_clusters +# V(pci.dtree)$color <- ifelse(pci.dtree.l_clusters>0, "green", "yellow") +# plot(pci.dtree, main=paste(pci.dtree.netname, "Laplace Spectral Clustering"), vertex.label=NA) +# +# ### Clemente and Grassi +# dtree_clusters[[1,2]] <- ClustBCG(as.matrix(car.dtree.adj), "directed")$totalCC +# dtree_clusters[[2,2]] <- ClustBCG(as.matrix(hipaa.dtree.adj), "directed")$totalCC +# dtree_clusters[[3,2]] <- ClustBCG(as.matrix(pci.dtree.adj), "directed")$totalCC + + + +############################# Write Final Results ############################# +write.table(base_centralities, file='results.csv') +write.table(tc_centralities, file='results.csv') +write.table(dtree_centralities, file='results.csv') + +### Degree: +head(base_centralities[[1,1]], 15)/sum(base_centralities[[1,1]])*100 #Car +head(tc_centralities[[1,1]],15)/sum(tc_centralities[[1,1]])*100 +head(dtree_centralities[[1,1]],15)/sum(dtree_centralities[[1,1]])*100 + +head(base_centralities[[2,1]], 15)/sum(base_centralities[[2,1]])*100 #HIPAA +head(tc_centralities[[2,1]],15)/sum(tc_centralities[[2,1]])*100 +head(dtree_centralities[[2,1]],15)/sum(dtree_centralities[[2,1]])*100 + +head(base_centralities[[3,1]], 15)/sum(base_centralities[[3,1]])*100 #PCI +head(tc_centralities[[3,1]],15)/sum(tc_centralities[[3,1]])*100 +head(dtree_centralities[[3,1]],15)/sum(dtree_centralities[[3,1]])*100 + +### Katz: +head(base_centralities[[1,2]], 15)/sum(base_centralities[[1,2]])*100 #Car +head(tc_centralities[[1,2]],15)/sum(tc_centralities[[1,2]])*100 +head(dtree_centralities[[1,2]],15)/sum(dtree_centralities[[1,2]])*100 + +head(base_centralities[[2,2]], 15)/sum(base_centralities[[2,2]])*100 #HIPAA +head(tc_centralities[[2,2]],15)/sum(tc_centralities[[2,2]])*100 +head(dtree_centralities[[2,2]],15)/sum(dtree_centralities[[2,2]])*100 + +head(base_centralities[[3,2]], 15)/sum(base_centralities[[3,2]])*100 #PCI +head(tc_centralities[[3,2]],15)/sum(tc_centralities[[3,2]])*100 +head(dtree_centralities[[3,2]],15)/sum(dtree_centralities[[3,2]])*100 + +### Page Rank: +head(base_centralities[[1,3]], 15)/sum(base_centralities[[1,3]])*100 #Car +head(tc_centralities[[1,3]],15)/sum(tc_centralities[[1,3]])*100 +head(dtree_centralities[[1,3]],15)/sum(dtree_centralities[[1,3]])*100 + +head(base_centralities[[2,3]], 15)/sum(base_centralities[[2,3]])*100 #HIPAA +head(tc_centralities[[2,3]],15)/sum(tc_centralities[[2,3]])*100 +head(dtree_centralities[[2,3]],15)/sum(dtree_centralities[[2,3]])*100 + +head(base_centralities[[3,3]], 15)/sum(base_centralities[[3,3]])*100 #PCI +head(tc_centralities[[3,3]],15)/sum(tc_centralities[[3,3]])*100 +head(dtree_centralities[[3,3]],15)/sum(dtree_centralities[[3,3]])*100 + +### K-Path: +head(base_centralities[[1,4]], 15)/sum(base_centralities[[1,4]])*100 #Car +head(tc_centralities[[1,4]],15)/sum(tc_centralities[[1,4]])*100 +head(dtree_centralities[[1,4]],15)/sum(dtree_centralities[[1,4]])*100 + +head(base_centralities[[2,4]], 15)/sum(base_centralities[[2,4]])*100 #HIPAA +head(tc_centralities[[2,4]],15)/sum(tc_centralities[[2,4]])*100 +head(dtree_centralities[[2,4]],15)/sum(dtree_centralities[[2,4]])*100 + +head(base_centralities[[3,4]], 15)/sum(base_centralities[[3,4]])*100 #PCI +head(tc_centralities[[3,4]],15)/sum(tc_centralities[[3,4]])*100 +head(dtree_centralities[[3,4]],15)/sum(dtree_centralities[[3,4]])*100 + +### Betweenness: +head(base_centralities[[1,5]], 15)/sum(base_centralities[[1,5]])*100 #Car +head(tc_centralities[[1,5]],15)/sum(tc_centralities[[1,5]])*100 +head(dtree_centralities[[1,5]],15)/sum(dtree_centralities[[1,5]])*100 + +head(base_centralities[[2,5]], 15)/sum(base_centralities[[2,5]])*100 #HIPAA +head(tc_centralities[[2,5]],15)/sum(tc_centralities[[2,5]])*100 +head(dtree_centralities[[2,5]],15)/sum(dtree_centralities[[2,5]])*100 + +head(base_centralities[[3,5]], 15)/sum(base_centralities[[3,5]])*100 #PCI +head(tc_centralities[[3,5]],15)/sum(tc_centralities[[3,5]])*100 +head(dtree_centralities[[3,5]],15)/sum(dtree_centralities[[3,5]])*100 + + +# ### Laplacian: +# head(base_clusters[[1,1]], 15) #Car +# head(tc_centralities[[1,1]],15) +# head(dtree_centralities[[1,1]],15) +# +# head(base_clusters[[2,1]], 15) #HIPAA +# head(tc_centralities[[2,1]],15) +# head(dtree_centralities[[2,1]],15) +# +# head(base_clusters[[3,1]], 15) #PCI +# head(tc_centralities[[3,1]],15) +# head(dtree_centralities[[3,1]],15) +# +# ### CG: +# head(base_clusters[[1,2]], 15) #Car +# head(tc_centralities[[1,2]],15) +# head(dtree_centralities[[1,2]],15) +# +# head(base_clusters[[2,2]], 15) #HIPAA +# head(tc_centralities[[2,2]],15) +# head(dtree_centralities[[2,2]],15) +# +# head(base_clusters[[3,2]], 15) #PCI +# head(tc_centralities[[3,2]],15) +# head(dtree_centralities[[3,2]],15) + +#Deg +sum(base_centralities[[1,1]]) +sum(base_centralities[[2,1]]) +sum(base_centralities[[3,1]]) + +#Katz +sum(base_centralities[[1,2]]) +sum(base_centralities[[2,2]]) +sum(base_centralities[[3,2]]) + +#PR +sum(base_centralities[[1,3]]) +sum(base_centralities[[2,3]]) +sum(base_centralities[[3,3]]) + +#KPath +sum(base_centralities[[1,4]]) +sum(base_centralities[[2,4]]) +sum(base_centralities[[3,4]]) + +#Betweenness +sum(base_centralities[[1,5]]) +sum(base_centralities[[2,5]]) +sum(base_centralities[[3,5]]) + +### TC + +#Deg +sum(tc_centralities[[1,1]]) +sum(tc_centralities[[2,1]]) +sum(tc_centralities[[3,1]]) + +#Katz +sum(tc_centralities[[1,2]]) +sum(tc_centralities[[2,2]]) +sum(tc_centralities[[3,2]]) + +#PR +sum(tc_centralities[[1,3]]) +sum(tc_centralities[[2,3]]) +sum(tc_centralities[[3,3]]) + +#KPath +sum(tc_centralities[[1,4]]) +sum(tc_centralities[[2,4]]) +sum(tc_centralities[[3,4]]) + +#Betweenness +sum(tc_centralities[[1,5]]) +sum(tc_centralities[[2,5]]) +sum(tc_centralities[[3,5]]) + + +### dtree + +#Deg +sum(dtree_centralities[[1,1]]) +sum(dtree_centralities[[2,1]]) +sum(dtree_centralities[[3,1]]) + +#Katz +sum(dtree_centralities[[1,2]]) +sum(dtree_centralities[[2,2]]) +sum(dtree_centralities[[3,2]]) + +#PR +sum(dtree_centralities[[1,3]]) +sum(dtree_centralities[[2,3]]) +sum(dtree_centralities[[3,3]]) + +#KPath +sum(dtree_centralities[[1,4]]) +sum(dtree_centralities[[2,4]]) +sum(dtree_centralities[[3,4]]) + +#Betweenness +sum(dtree_centralities[[1,5]]) +sum(dtree_centralities[[2,5]]) +sum(dtree_centralities[[3,5]]) + + +####### Plots +if (!require("plotly", quietly = TRUE)) + install.packages("plotly") + +library(plotly) +car_base_df <- as.data.frame(car_plots[[1,1]]) +car_base_df$Node <- 1:nrow(as.data.frame(car_plots[[1,1]])) +colnames(car_base_df) <- c("Centrality", "Node") + +car_tc_df <- as.data.frame(car_plots[[2,1]]) +car_tc_df$Node <- 1:nrow(as.data.frame(car_plots[[2,1]])) +colnames(car_tc_df) <- c("Centrality", "Node") + +car_dtree_df <- as.data.frame(car_plots[[3,1]]) +car_dtree_df$Node <- 1:nrow(as.data.frame(car_plots[[3,1]])) +colnames(car_dtree_df) <- c("Centrality", "Node") + +fig1 <- plot_ly(x=car_base_df$Node, y=car_base_df$Centrality, type="bar") +fig2 <- plot_ly(x=car_base_df$Node, y=car_tc_df$Centrality, type="bar") +fig3 <- plot_ly(x=car_base_df$Node, y=car_dtree_df$Centrality, type="bar") +fig <- subplot(fig1, fig2, fig3, nrows = 3) %>% + layout(title = list(text = "Importance Distribution for Nodes + in the Automobile Maintenance Network"), + plot_bgcolor='#e5ecf6', + xaxis = list( + zerolinecolor = '#ffff', + zerolinewidth = 2, + gridcolor = 'ffff'), + yaxis = list( + zerolinecolor = '#ffff', + zerolinewidth = 2, + gridcolor = 'ffff')) +fig + +plot(car_plots[[1,1]], type="h", xlab="Node", + ylab="Percentile (Importance Ranking)", + main="Importance Distribution for Nodes + in the Automobile Maintenance Network")