AG-CG-Network-Science/Old_Version/code/Schrick-Noah_CG-Analysis.R
2024-04-18 15:17:30 -05:00

645 lines
24 KiB
R

# Final Project for the University of Tulsa's CS-7863 Network Theory Course
# Compliance Graph Analysis
# Professor: Dr. McKinney, Spring 2022
# Noah L. Schrick - 1492657
library(igraph)
library(centiserve)
if (!require("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("RBGL")
library(RBGL)
library(DirectedClustering)
################## Read in the previously generated networks ##################
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
source("./CG_Files/manual_import.R")
car <- import_networks(1)
car.netname <- "Vehicle Maintenance"
hipaa <- import_networks(2)
hipaa.netname <- "HIPAA Compliance"
pci <- import_networks(3)
pci.netname <- "PCI Compliance"
# Get basic network attributes
car.adj <- get.adjacency(car)
car.deg <- rowSums(as.matrix(car.adj)) # degree
car.n <- length(V(car))
hipaa.adj <- get.adjacency(hipaa)
hipaa.deg <- rowSums(as.matrix(hipaa.adj)) # degree
hipaa.n <- length(V(hipaa))
pci.adj <- get.adjacency(pci)
pci.deg <- rowSums(as.matrix(pci.adj)) # degree
pci.n <- length(V(pci))
car_plots <- matrix(list(), nrow=3, ncol=5)
rownames(car_plots) <- c("Base", "Transitive Closure", "Dominant Tree")
colnames(car_plots) <- c("Degree", "Katz", "Page Rank", "K-path", "Betweenness")
############################# Base Centralities #############################
source("centralities.R")
base_centralities <- matrix(list(), nrow=3, ncol=5)
rownames(base_centralities) <- c(car.netname, hipaa.netname, pci.netname)
colnames(base_centralities) <- c("Degree", "Katz", "Page Rank", "K-path", "Betweenness")
### Degree
base_centralities[[1,1]] <- car.deg %>% sort(decreasing = T)
base_centralities[[2,1]] <- hipaa.deg %>% sort(decreasing = T)
base_centralities[[3,1]] <- pci.deg %>% sort(decreasing = T)
car_deg_Fn <- ( ecdf( car.deg ))
#car_plots[[1,1]] <- car_deg_Fn(car.deg)*100
car_plots[[1,1]] <- car.deg/max(car.deg)
#### Katz
car.katz <- katz.cent(car)
nodes <- car.katz %>% order(decreasing=T)
nodes <- head(nodes, 15)-1
vals <- car.katz %>% sort(decreasing=T)
vals <- head(vals, 15)
base_centralities[[1,2]] <- car.katz[rowSums(apply(car.katz,2,is.nan))==0,] %>% sort(decreasing = T)
car_katz_Fn <- ( ecdf(as.numeric(car.katz )))
car_plots[[1,2]] <- car_katz_Fn(as.numeric(car.katz))*100
base_centralities[[2,2]] <- katz.cent(hipaa) %>% sort(decreasing = T)
hipaa.katz <- katz.cent(hipaa)
nodes <- hipaa.katz %>% order(decreasing=T)
nodes <- head(nodes, 15)-1
vals <- hipaa.katz %>% sort(decreasing=T)
vals <- head(vals, 15)
base_centralities[[3,2]] <- katz.cent(pci) %>% sort(decreasing = T)
pci.katz <- katz.cent(pci)
nodes <- pci.katz %>% order(decreasing=T)
nodes <- head(nodes, 15)-1
vals <- pci.katz %>% sort(decreasing=T)
vals <- head(vals, 15)
### Page Rank
base_centralities[[1,3]] <- page.rank(car)$vector %>% sort(decreasing = T)
base_centralities[[2,3]] <- page.rank(hipaa)$vector %>% sort(decreasing = T)
base_centralities[[3,3]] <- page.rank(pci)$vector %>% sort(decreasing = T)
car_pr_Fn <- ( ecdf( page.rank(car)$vector ))
car_plots[[1,3]] <- car_pr_Fn(page.rank(car)$vector)*100
### K-path
base_centralities[[1,4]] <- geokpath(car, V(car), "out") %>% sort(decreasing = T)
base_centralities[[2,4]] <- geokpath(hipaa, V(hipaa), "out") %>% sort(decreasing = T)
base_centralities[[3,4]] <- geokpath(pci, V(pci), "out") %>% sort(decreasing = T)
car_kp_Fn <- (ecdf(geokpath(car, V(car), "out")))
car_plots[[1,4]] <- car_kp_Fn(geokpath(car, V(car), "out"))*100
### Betweenness
base_centralities[[1,5]] <- betweenness(car, TRUE) %>% sort(decreasing = T)
base_centralities[[2,5]] <- betweenness(hipaa, TRUE) %>% sort(decreasing = T)
base_centralities[[3,5]] <- betweenness(pci, TRUE) %>% sort(decreasing = T)
car_btw_Fn <- (ecdf(betweenness(car, TRUE)))
car_plots[[1,5]] <- car_btw_Fn(betweenness(car, TRUE))*100
############################### Base Clustering ###############################
#source("self_newman_mod.R")
#base_clusters <- matrix(list(), nrow=3, ncol=2)
#rownames(base_centralities) <- c(car.netname, hipaa.netname, pci.netname)
#colnames(base_centralities) <- c("Laplace", "CG")
### Laplacian
#car.Lap <- diag(car.deg) - car.adj # L = D-A
#hipaa.Lap <- diag(hipaa.deg) - hipaa.adj
#pci.Lap <- diag(pci.deg) - pci.adj
# get eigvals and vecs
#car.eigs <- Re(eigen(car.Lap)$vectors[,car.n-1])
#car.eig_val <- eigen(car.Lap)$values[car.n-1]
#names(car.eigs) <- names(V(car))
#car.l_clusters <- ifelse(car.eigs>0,1,-1)
#base_clusters[[1,1]] <- car.l_clusters
#V(car)$color <- ifelse(car.l_clusters>0, "green", "yellow")
#plot(car, main=paste(car.netname, "Laplace Spectral Clustering"), vertex.label=NA)
# hipaa.eigs <- Re(eigen(hipaa.Lap)$vectors[,hipaa.n-1])
# hipaa.eig_val <- eigen(hipaa.Lap)$values[hipaa.n-1]
# names(hipaa.eigs) <- names(V(hipaa))
# hipaa.l_clusters <- ifelse(hipaa.eigs>0,1,-1)
# base_clusters[[2,1]] <- hipaa.l_clusters
# V(hipaa)$color <- ifelse(hipaa.l_clusters>0, "green", "yellow")
# plot(hipaa, main=paste(hipaa.netname, "Laplace Spectral Clustering"), vertex.label=NA)
#
# pci.eigs <- Re(eigen(pci.Lap)$vectors[,pci.n-1])
# pci.eig_val <- eigen(pci.Lap)$values[pci.n-1]
# names(pci.eigs) <- names(V(pci))
# pci.l_clusters <- ifelse(pci.eigs>0,1,-1)
# base_clusters[[3,1]] <- pci.l_clusters
# V(pci)$color <- ifelse(pci.l_clusters>0, "green", "yellow")
# plot(pci, main=paste(pci.netname, "Laplace Spectral Clustering"), vertex.label=NA)
#
# ### Clemente and Grassi
# base_clusters[[1,2]] <- ClustBCG(as.matrix(car.adj), "directed")$totalCC
# base_clusters[[2,2]] <- ClustBCG(as.matrix(hipaa.adj), "directed")$totalCC
# base_clusters[[3,2]] <- ClustBCG(as.matrix(pci.adj), "directed")$totalCC
################################ Misc Analysis ################################
# min_cut(car,"0", "2490")
# min_cut(hipaa,"0","2320")
# min_cut(pci,"0","60")
#
# max_flow(car, "0", "2490")
# max_flow(hipaa,"0","2320")
# max_flow(pci,"0","60")
### Transitive closure
car.graph <- as_graphnel(car)
hipaa.graph <- as_graphnel(hipaa)
pci.graph <- as_graphnel(pci)
car.tc <- graph_from_graphnel(transitive.closure(car.graph))
hipaa.tc <- graph_from_graphnel(transitive.closure(hipaa.graph))
pci.tc <- graph_from_graphnel(transitive.closure(pci.graph))
### Edge connectivty
edge_connectivity(car, "0", "2490")
edge_connectivity(hipaa,"0","2320")
edge_connectivity(pci,"0","60")
# Dominator Tree
car.dtree <- dominator_tree(car, "0", "out")$domtree
hipaa.dtree <- dominator_tree(hipaa, "0", "out")$domtree
pci.dtree <- dominator_tree(pci, "0", "out")$domtree
###################### Transitive Closure Centralities ######################
tc_centralities <- matrix(list(), nrow=3, ncol=5)
rownames(tc_centralities) <- c(car.netname, hipaa.netname, pci.netname)
colnames(tc_centralities) <- c("Degree", "Katz", "Page Rank", "K-path", "Betweenness")
# Get basic network attributes
car.tc.adj <- get.adjacency(car.tc)
car.tc.deg <- rowSums(as.matrix(car.tc.adj)) # degree
car.tc.n <- length(V(car.tc))
hipaa.tc.adj <- get.adjacency(hipaa.tc)
hipaa.tc.deg <- rowSums(as.matrix(hipaa.tc.adj)) # degree
hipaa.tc.n <- length(V(hipaa.tc))
pci.tc.adj <- get.adjacency(pci.tc)
pci.tc.deg <- rowSums(as.matrix(pci.tc.adj)) # degree
pci.tc.n <- length(V(pci.tc))
### Degree
tc_centralities[[1,1]] <- car.tc.deg %>% sort(decreasing = T)
tc_centralities[[2,1]] <- hipaa.tc.deg %>% sort(decreasing = T)
tc_centralities[[3,1]] <- pci.tc.deg %>% sort(decreasing = T)
car_tc_deg_Fn <- ( ecdf( car.tc.deg ))
#car_plots[[2,1]] <- car_tc_deg_Fn(car.tc.deg)*100
car_plots[[2,1]] <- car.tc.deg/max(car.tc.deg)
#### Katz
car.tc.katz <- katz.cent(car.tc)
tc_centralities[[1,2]] <- car.tc.katz[rowSums(apply(car.tc.katz,2,is.nan))==0,] %>% sort(decreasing = T)
car.tc.katz <- katz.cent(car.tc)
nodes <- car.tc.katz %>% order(decreasing=T)
nodes <- head(nodes, 15)-1
vals <- car.tc.katz %>% sort(decreasing=T)
vals <- head(vals, 15)
tc_centralities[[2,2]] <- katz.cent(hipaa.tc) %>% sort(decreasing = T)
hipaa.tc.katz <- katz.cent(hipaa.tc)
nodes <- hipaa.tc.katz %>% order(decreasing=T)
nodes <- head(nodes, 15)-1
vals <- hipaa.tc.katz %>% sort(decreasing=T)
vals <- head(vals, 15)
car_tc_katz_Fn <- ( ecdf(as.numeric(car.tc.katz )))
car_plots[[2,2]] <- car_tc_katz_Fn(as.numeric(car.tc.katz))*100
tc_centralities[[3,2]] <- katz.cent(pci.tc) %>% sort(decreasing = T)
pci.tc.katz <- katz.cent(pci.tc)
nodes <- pci.tc.katz %>% order(decreasing=T)
nodes <- head(nodes, 15)-1
vals <- pci.tc.katz %>% sort(decreasing=T)
vals <- head(vals, 15)
### Page Rank
tc_centralities[[1,3]] <- page.rank(car.tc)$vector %>% sort(decreasing = T)
tc_centralities[[2,3]] <- page.rank(hipaa.tc)$vector %>% sort(decreasing = T)
tc_centralities[[3,3]] <- page.rank(pci.tc)$vector %>% sort(decreasing = T)
car_tc_pr_Fn <- ( ecdf( page.rank(car.tc)$vector ))
car_plots[[2,3]] <- car_tc_pr_Fn(page.rank(car.tc)$vector)*100
### K-path
tc_centralities[[1,4]] <- geokpath(car.tc, V(car.tc), "out") %>% sort(decreasing = T)
tc_centralities[[2,4]] <- geokpath(hipaa.tc, V(hipaa.tc), "out") %>% sort(decreasing = T)
tc_centralities[[3,4]] <- geokpath(pci.tc, V(pci.tc), "out") %>% sort(decreasing = T)
car_tc_kp_Fn <- (ecdf(geokpath(car.tc, V(car.tc), "out")))
car_plots[[2,4]] <- car_tc_kp_Fn(geokpath(car.tc, V(car.tc), "out"))*100
### Betweenness
tc_centralities[[1,5]] <- betweenness(car.tc, TRUE) %>% sort(decreasing = T)
tc_centralities[[2,5]] <- betweenness(hipaa.tc, TRUE) %>% sort(decreasing = T)
tc_centralities[[3,5]] <- betweenness(pci.tc, TRUE) %>% sort(decreasing = T)
car_tc_btw_Fn <- (ecdf(betweenness(car.tc, TRUE)))
car_plots[[2,5]] <- car_tc_btw_Fn(betweenness(car.tc, TRUE))*100
######################## Transitive Closure Clustering ########################
# source("self_newman_mod.R")
# tc_clusters <- matrix(list(), nrow=3, ncol=2)
# rownames(tc_centralities) <- c(car.netname, hipaa.netname, pci.netname)
# colnames(tc_centralities) <- c("Laplace", "CG")
#
# ### Laplacian
# car.tc.Lap <- diag(car.tc.deg) - car.tc.adj # L = D-A
# hipaa.tc.Lap <- diag(hipaa.tc.deg) - hipaa.tc.adj
# pci.tc.Lap <- diag(pci.tc.deg) - pci.tc.adj
#
# # get eigvals and vecs
# car.tc.eigs <- Re(eigen(car.tc.Lap)$vectors[,car.tc.n-1])
# car.tc.eig_val <- eigen(car.tc.Lap)$values[car.tc.n-1]
# names(car.tc.eigs) <- names(V(car.tc))
# car.tc.l_clusters <- ifelse(car.tc.eigs>0,1,-1)
# tc_clusters[[1,1]] <- car.tc.l_clusters
# V(car.tc)$color <- ifelse(car.tc.l_clusters>0, "green", "yellow")
# plot(car.tc, main=paste(car.tc.netname, "Laplace Spectral Clustering"), vertex.label=NA)
#
# hipaa.tc.eigs <- Re(eigen(hipaa.tc.Lap)$vectors[,hipaa.tc.n-1])
# hipaa.tc.eig_val <- eigen(hipaa.tc.Lap)$values[hipaa.tc.n-1]
# names(hipaa.tc.eigs) <- names(V(hipaa.tc))
# hipaa.tc.l_clusters <- ifelse(hipaa.tc.eigs>0,1,-1)
# tc_clusters[[2,1]] <- hipaa.tc.l_clusters
# V(hipaa.tc)$color <- ifelse(hipaa.tc.l_clusters>0, "green", "yellow")
# plot(hipaa.tc, main=paste(hipaa.tc.netname, "Laplace Spectral Clustering"), vertex.label=NA)
#
# pci.tc.eigs <- Re(eigen(pci.tc.Lap)$vectors[,pci.tc.n-1])
# pci.tc.eig_val <- eigen(pci.tc.Lap)$values[pci.tc.n-1]
# names(pci.tc.eigs) <- names(V(pci.tc))
# pci.tc.l_clusters <- ifelse(pci.tc.eigs>0,1,-1)
# tc_clusters[[3,1]] <- pci.tc.l_clusters
# V(pci.tc)$color <- ifelse(pci.tc.l_clusters>0, "green", "yellow")
# plot(pci.tc, main=paste(pci.tc.netname, "Laplace Spectral Clustering"), vertex.label=NA)
#
# ### Clemente and Grassi
# tc_clusters[[1,2]] <- ClustBCG(as.matrix(car.tc.adj), "directed")$totalCC
# tc_clusters[[2,2]] <- ClustBCG(as.matrix(hipaa.tc.adj), "directed")$totalCC
# tc_clusters[[3,2]] <- ClustBCG(as.matrix(pci.tc.adj), "directed")$totalCC
######################### Dominant Tree Centralities #########################
dtree_centralities <- matrix(list(), nrow=3, ncol=5)
rownames(dtree_centralities) <- c(car.netname, hipaa.netname, pci.netname)
colnames(dtree_centralities) <- c("Degree", "Katz", "Page Rank", "K-path", "Betweenness")
# Get basic network attributes
car.dtree.adj <- get.adjacency(car.dtree)
car.dtree.deg <- rowSums(as.matrix(car.dtree.adj)) # degree
car.dtree.n <- length(V(car.dtree))
hipaa.dtree.adj <- get.adjacency(hipaa.dtree)
hipaa.dtree.deg <- rowSums(as.matrix(hipaa.dtree.adj)) # degree
hipaa.dtree.n <- length(V(hipaa.dtree))
pci.dtree.adj <- get.adjacency(pci.dtree)
pci.dtree.deg <- rowSums(as.matrix(pci.dtree.adj)) # degree
pci.dtree.n <- length(V(pci.dtree))
### Degree
dtree_centralities[[1,1]] <- car.dtree.deg %>% sort(decreasing = T)
dtree_centralities[[2,1]] <- hipaa.dtree.deg %>% sort(decreasing = T)
dtree_centralities[[3,1]] <- pci.dtree.deg %>% sort(decreasing = T)
car_dt_deg_Fn <- ( ecdf( car.dtree.deg ))
#car_plots[[3,1]] <- car_dt_deg_Fn(car.dtree.deg)*100
car_plots[[3,1]] <-car.dtree.deg/max(car.dtree.deg)
#### Katz
car.dtree.katz <- katz.cent(car.dtree)
dtree_centralities[[1,2]] <- car.dtree.katz[rowSums(apply(car.dtree.katz,2,is.nan))==0,] %>% sort(decreasing = T)
car.dtree.katz <- katz.cent(car.dtree)
nodes <- car.dtree.katz %>% order(decreasing=T)
nodes <- head(nodes, 15)-1
vals <- car.dtree.katz %>% sort(decreasing=T)
vals <- head(vals, 15)
dtree_centralities[[2,2]] <- katz.cent(hipaa.dtree) %>% sort(decreasing = T)
hipaa.dtree.katz <- katz.cent(hipaa.dtree)
nodes <- hipaa.dtree.katz %>% order(decreasing=T)
nodes <- head(nodes, 15)-1
vals <- hipaa.dtree.katz %>% sort(decreasing=T)
vals <- head(vals, 15)
dtree_centralities[[3,2]] <- katz.cent(pci.dtree) %>% sort(decreasing = T)
pci.dtree.katz <- katz.cent(pci.dtree)
nodes <- pci.dtree.katz %>% order(decreasing=T)
nodes <- head(nodes, 15)-1
vals <- pci.dtree.katz %>% sort(decreasing=T)
vals <- head(vals, 15)
car_dt_katz_Fn <- ( ecdf(as.numeric(car.dtree.katz )))
car_plots[[3,2]] <- car_dt_katz_Fn(as.numeric(car.dtree.katz))*100
### Page Rank
dtree_centralities[[1,3]] <- page.rank(car.dtree)$vector %>% sort(decreasing = T)
dtree_centralities[[2,3]] <- page.rank(hipaa.dtree)$vector %>% sort(decreasing = T)
dtree_centralities[[3,3]] <- page.rank(pci.dtree)$vector %>% sort(decreasing = T)
car_dt_pr_Fn <- ( ecdf( page.rank(car.dtree)$vector ))
car_plots[[3,3]] <- car_dt_pr_Fn(page.rank(car.dtree)$vector)*100
### K-path
dtree_centralities[[1,4]] <- geokpath(car.dtree, V(car.dtree), "out") %>% sort(decreasing = T)
dtree_centralities[[2,4]] <- geokpath(hipaa.dtree, V(hipaa.dtree), "out") %>% sort(decreasing = T)
dtree_centralities[[3,4]] <- geokpath(pci.dtree, V(pci.dtree), "out") %>% sort(decreasing = T)
car_dt_kp_Fn <- (ecdf(geokpath(car.dtree, V(car.dtree), "out")))
car_plots[[3,4]] <- car_tc_kp_Fn(geokpath(car.dtree, V(car.dtree), "out"))*100
### Betweenness
dtree_centralities[[1,5]] <- betweenness(car.dtree, TRUE) %>% sort(decreasing = T)
dtree_centralities[[2,5]] <- betweenness(hipaa.dtree, TRUE) %>% sort(decreasing = T)
dtree_centralities[[3,5]] <- betweenness(pci.dtree, TRUE) %>% sort(decreasing = T)
car_dt_btw_Fn <- (ecdf(betweenness(car.dtree, TRUE)))
car_plots[[3,5]] <- car_dt_btw_Fn(betweenness(car.dtree, TRUE))*100
########################## Dominant Tree Clustering ##########################
# source("self_newman_mod.R")
# dtree_clusters <- matrix(list(), nrow=3, ncol=2)
# rownames(dtree_centralities) <- c(car.netname, hipaa.netname, pci.netname)
# colnames(dtree_centralities) <- c("Laplace", "CG")
#
# ### Laplacian
# car.dtree.Lap <- diag(car.dtree.deg) - car.dtree.adj # L = D-A
# hipaa.dtree.Lap <- diag(hipaa.dtree.deg) - hipaa.dtree.adj
# pci.dtree.Lap <- diag(pci.dtree.deg) - pci.dtree.adj
#
# # get eigvals and vecs
# car.dtree.eigs <- Re(eigen(car.dtree.Lap)$vectors[,car.dtree.n-1])
# car.dtree.eig_val <- eigen(car.dtree.Lap)$values[car.dtree.n-1]
# names(car.dtree.eigs) <- names(V(car.dtree))
# car.dtree.l_clusters <- ifelse(car.dtree.eigs>0,1,-1)
# dtree_clusters[[1,1]] <- car.dtree.l_clusters
# V(car.dtree)$color <- ifelse(car.dtree.l_clusters>0, "green", "yellow")
# plot(car.dtree, main=paste(car.dtree.netname, "Laplace Spectral Clustering"), vertex.label=NA)
#
# hipaa.dtree.eigs <- Re(eigen(hipaa.dtree.Lap)$vectors[,hipaa.dtree.n-1])
# hipaa.dtree.eig_val <- eigen(hipaa.dtree.Lap)$values[hipaa.dtree.n-1]
# names(hipaa.dtree.eigs) <- names(V(hipaa.dtree))
# hipaa.dtree.l_clusters <- ifelse(hipaa.dtree.eigs>0,1,-1)
# dtree_clusters[[2,1]] <- hipaa.dtree.l_clusters
# V(hipaa.dtree)$color <- ifelse(hipaa.dtree.l_clusters>0, "green", "yellow")
# plot(hipaa.dtree, main=paste(hipaa.dtree.netname, "Laplace Spectral Clustering"), vertex.label=NA)
#
# pci.dtree.eigs <- Re(eigen(pci.dtree.Lap)$vectors[,pci.dtree.n-1])
# pci.dtree.eig_val <- eigen(pci.dtree.Lap)$values[pci.dtree.n-1]
# names(pci.dtree.eigs) <- names(V(pci.dtree))
# pci.dtree.l_clusters <- ifelse(pci.dtree.eigs>0,1,-1)
# dtree_clusters[[3,1]] <- pci.dtree.l_clusters
# V(pci.dtree)$color <- ifelse(pci.dtree.l_clusters>0, "green", "yellow")
# plot(pci.dtree, main=paste(pci.dtree.netname, "Laplace Spectral Clustering"), vertex.label=NA)
#
# ### Clemente and Grassi
# dtree_clusters[[1,2]] <- ClustBCG(as.matrix(car.dtree.adj), "directed")$totalCC
# dtree_clusters[[2,2]] <- ClustBCG(as.matrix(hipaa.dtree.adj), "directed")$totalCC
# dtree_clusters[[3,2]] <- ClustBCG(as.matrix(pci.dtree.adj), "directed")$totalCC
############################# Write Final Results #############################
write.table(base_centralities, file='results.csv')
write.table(tc_centralities, file='results.csv')
write.table(dtree_centralities, file='results.csv')
### Degree:
head(base_centralities[[1,1]], 15)/sum(base_centralities[[1,1]])*100 #Car
head(tc_centralities[[1,1]],15)/sum(tc_centralities[[1,1]])*100
head(dtree_centralities[[1,1]],15)/sum(dtree_centralities[[1,1]])*100
head(base_centralities[[2,1]], 15)/sum(base_centralities[[2,1]])*100 #HIPAA
head(tc_centralities[[2,1]],15)/sum(tc_centralities[[2,1]])*100
head(dtree_centralities[[2,1]],15)/sum(dtree_centralities[[2,1]])*100
head(base_centralities[[3,1]], 15)/sum(base_centralities[[3,1]])*100 #PCI
head(tc_centralities[[3,1]],15)/sum(tc_centralities[[3,1]])*100
head(dtree_centralities[[3,1]],15)/sum(dtree_centralities[[3,1]])*100
### Katz:
head(base_centralities[[1,2]], 15)/sum(base_centralities[[1,2]])*100 #Car
head(tc_centralities[[1,2]],15)/sum(tc_centralities[[1,2]])*100
head(dtree_centralities[[1,2]],15)/sum(dtree_centralities[[1,2]])*100
head(base_centralities[[2,2]], 15)/sum(base_centralities[[2,2]])*100 #HIPAA
head(tc_centralities[[2,2]],15)/sum(tc_centralities[[2,2]])*100
head(dtree_centralities[[2,2]],15)/sum(dtree_centralities[[2,2]])*100
head(base_centralities[[3,2]], 15)/sum(base_centralities[[3,2]])*100 #PCI
head(tc_centralities[[3,2]],15)/sum(tc_centralities[[3,2]])*100
head(dtree_centralities[[3,2]],15)/sum(dtree_centralities[[3,2]])*100
### Page Rank:
head(base_centralities[[1,3]], 15)/sum(base_centralities[[1,3]])*100 #Car
head(tc_centralities[[1,3]],15)/sum(tc_centralities[[1,3]])*100
head(dtree_centralities[[1,3]],15)/sum(dtree_centralities[[1,3]])*100
head(base_centralities[[2,3]], 15)/sum(base_centralities[[2,3]])*100 #HIPAA
head(tc_centralities[[2,3]],15)/sum(tc_centralities[[2,3]])*100
head(dtree_centralities[[2,3]],15)/sum(dtree_centralities[[2,3]])*100
head(base_centralities[[3,3]], 15)/sum(base_centralities[[3,3]])*100 #PCI
head(tc_centralities[[3,3]],15)/sum(tc_centralities[[3,3]])*100
head(dtree_centralities[[3,3]],15)/sum(dtree_centralities[[3,3]])*100
### K-Path:
head(base_centralities[[1,4]], 15)/sum(base_centralities[[1,4]])*100 #Car
head(tc_centralities[[1,4]],15)/sum(tc_centralities[[1,4]])*100
head(dtree_centralities[[1,4]],15)/sum(dtree_centralities[[1,4]])*100
head(base_centralities[[2,4]], 15)/sum(base_centralities[[2,4]])*100 #HIPAA
head(tc_centralities[[2,4]],15)/sum(tc_centralities[[2,4]])*100
head(dtree_centralities[[2,4]],15)/sum(dtree_centralities[[2,4]])*100
head(base_centralities[[3,4]], 15)/sum(base_centralities[[3,4]])*100 #PCI
head(tc_centralities[[3,4]],15)/sum(tc_centralities[[3,4]])*100
head(dtree_centralities[[3,4]],15)/sum(dtree_centralities[[3,4]])*100
### Betweenness:
head(base_centralities[[1,5]], 15)/sum(base_centralities[[1,5]])*100 #Car
head(tc_centralities[[1,5]],15)/sum(tc_centralities[[1,5]])*100
head(dtree_centralities[[1,5]],15)/sum(dtree_centralities[[1,5]])*100
head(base_centralities[[2,5]], 15)/sum(base_centralities[[2,5]])*100 #HIPAA
head(tc_centralities[[2,5]],15)/sum(tc_centralities[[2,5]])*100
head(dtree_centralities[[2,5]],15)/sum(dtree_centralities[[2,5]])*100
head(base_centralities[[3,5]], 15)/sum(base_centralities[[3,5]])*100 #PCI
head(tc_centralities[[3,5]],15)/sum(tc_centralities[[3,5]])*100
head(dtree_centralities[[3,5]],15)/sum(dtree_centralities[[3,5]])*100
# ### Laplacian:
# head(base_clusters[[1,1]], 15) #Car
# head(tc_centralities[[1,1]],15)
# head(dtree_centralities[[1,1]],15)
#
# head(base_clusters[[2,1]], 15) #HIPAA
# head(tc_centralities[[2,1]],15)
# head(dtree_centralities[[2,1]],15)
#
# head(base_clusters[[3,1]], 15) #PCI
# head(tc_centralities[[3,1]],15)
# head(dtree_centralities[[3,1]],15)
#
# ### CG:
# head(base_clusters[[1,2]], 15) #Car
# head(tc_centralities[[1,2]],15)
# head(dtree_centralities[[1,2]],15)
#
# head(base_clusters[[2,2]], 15) #HIPAA
# head(tc_centralities[[2,2]],15)
# head(dtree_centralities[[2,2]],15)
#
# head(base_clusters[[3,2]], 15) #PCI
# head(tc_centralities[[3,2]],15)
# head(dtree_centralities[[3,2]],15)
#Deg
sum(base_centralities[[1,1]])
sum(base_centralities[[2,1]])
sum(base_centralities[[3,1]])
#Katz
sum(base_centralities[[1,2]])
sum(base_centralities[[2,2]])
sum(base_centralities[[3,2]])
#PR
sum(base_centralities[[1,3]])
sum(base_centralities[[2,3]])
sum(base_centralities[[3,3]])
#KPath
sum(base_centralities[[1,4]])
sum(base_centralities[[2,4]])
sum(base_centralities[[3,4]])
#Betweenness
sum(base_centralities[[1,5]])
sum(base_centralities[[2,5]])
sum(base_centralities[[3,5]])
### TC
#Deg
sum(tc_centralities[[1,1]])
sum(tc_centralities[[2,1]])
sum(tc_centralities[[3,1]])
#Katz
sum(tc_centralities[[1,2]])
sum(tc_centralities[[2,2]])
sum(tc_centralities[[3,2]])
#PR
sum(tc_centralities[[1,3]])
sum(tc_centralities[[2,3]])
sum(tc_centralities[[3,3]])
#KPath
sum(tc_centralities[[1,4]])
sum(tc_centralities[[2,4]])
sum(tc_centralities[[3,4]])
#Betweenness
sum(tc_centralities[[1,5]])
sum(tc_centralities[[2,5]])
sum(tc_centralities[[3,5]])
### dtree
#Deg
sum(dtree_centralities[[1,1]])
sum(dtree_centralities[[2,1]])
sum(dtree_centralities[[3,1]])
#Katz
sum(dtree_centralities[[1,2]])
sum(dtree_centralities[[2,2]])
sum(dtree_centralities[[3,2]])
#PR
sum(dtree_centralities[[1,3]])
sum(dtree_centralities[[2,3]])
sum(dtree_centralities[[3,3]])
#KPath
sum(dtree_centralities[[1,4]])
sum(dtree_centralities[[2,4]])
sum(dtree_centralities[[3,4]])
#Betweenness
sum(dtree_centralities[[1,5]])
sum(dtree_centralities[[2,5]])
sum(dtree_centralities[[3,5]])
####### Plots
if (!require("plotly", quietly = TRUE))
install.packages("plotly")
library(plotly)
car_base_df <- as.data.frame(car_plots[[1,1]])
car_base_df$Node <- 1:nrow(as.data.frame(car_plots[[1,1]]))
colnames(car_base_df) <- c("Centrality", "Node")
car_tc_df <- as.data.frame(car_plots[[2,1]])
car_tc_df$Node <- 1:nrow(as.data.frame(car_plots[[2,1]]))
colnames(car_tc_df) <- c("Centrality", "Node")
car_dtree_df <- as.data.frame(car_plots[[3,1]])
car_dtree_df$Node <- 1:nrow(as.data.frame(car_plots[[3,1]]))
colnames(car_dtree_df) <- c("Centrality", "Node")
fig1 <- plot_ly(x=car_base_df$Node, y=car_base_df$Centrality, type="bar", name="Base Network") %>%
layout(xaxis = list(title = 'Node'))#, yaxis = list(title = 'Importance Percentile'))
# title=list(text="Base Network"))
fig2 <- plot_ly(x=car_base_df$Node, y=car_tc_df$Centrality, type="bar", name="Transitive Closure Network") %>%
layout(xaxis = list(title = 'Node'))#, yaxis = list(title = 'Importance Percentile'))
# title=list(text="Transitive Closure Network"))
fig3 <- plot_ly(x=car_base_df$Node, y=car_dtree_df$Centrality, type="bar", name="Dominant Tree Network")%>%
layout(xaxis = list(title = 'Node'))#, yaxis = list(title = 'Importance Percentile'))
# title=list(text="Dominant Tree Network"))
fig <- subplot(fig1, fig2, fig3, nrows = 3, shareX = TRUE, shareY = TRUE) %>%
layout(title = list(text = "Degree Centrality Importance Distribution for
Nodes in the Automobile Maintenance Network"),
plot_bgcolor='#e5ecf6',
xaxis = list(
zerolinecolor = '#ffff',
zerolinewidth = 2,
gridcolor = 'ffff'),
yaxis = list(
zerolinecolor = '#ffff',
zerolinewidth = 2,
gridcolor = 'ffff'),
annotations = list(
list(x = -0.1 , y = 0.5, text = "Node Importance/max(Importance)",
font = list(color = "black",size = 18),
textangle = 270,
showarrow = F, xref='paper', yref='paper', size=48)),
showlegend=T
)
fig
plot(car_plots[[1,1]], type="h", xlab="Node",
ylab="Percentile (Importance Ranking)",
main="Importance Distribution for Nodes
in the Automobile Maintenance Network")