627 lines
24 KiB
R
627 lines
24 KiB
R
# Final Project for the University of Tulsa's CS-7863 Network Theory Course
|
|
# Compliance Graph Analysis
|
|
# Professor: Dr. McKinney, Spring 2022
|
|
# Noah L. Schrick - 1492657
|
|
|
|
library(igraph)
|
|
library(centiserve)
|
|
if (!require("BiocManager", quietly = TRUE))
|
|
install.packages("BiocManager")
|
|
BiocManager::install("RBGL")
|
|
library(RBGL)
|
|
library(DirectedClustering)
|
|
|
|
################## Read in the previously generated networks ##################
|
|
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
|
|
source("./CG_Files/manual_import.R")
|
|
|
|
car <- import_networks(1)
|
|
car.netname <- "Vehicle Maintenance"
|
|
hipaa <- import_networks(2)
|
|
hipaa.netname <- "HIPAA Compliance"
|
|
pci <- import_networks(3)
|
|
pci.netname <- "PCI Compliance"
|
|
|
|
# Get basic network attributes
|
|
car.adj <- get.adjacency(car)
|
|
car.deg <- rowSums(as.matrix(car.adj)) # degree
|
|
car.n <- length(V(car))
|
|
|
|
hipaa.adj <- get.adjacency(hipaa)
|
|
hipaa.deg <- rowSums(as.matrix(hipaa.adj)) # degree
|
|
hipaa.n <- length(V(hipaa))
|
|
|
|
pci.adj <- get.adjacency(pci)
|
|
pci.deg <- rowSums(as.matrix(pci.adj)) # degree
|
|
pci.n <- length(V(pci))
|
|
|
|
car_plots <- matrix(list(), nrow=3, ncol=5)
|
|
rownames(car_plots) <- c("Base", "Transitive Closure", "Dominant Tree")
|
|
colnames(car_plots) <- c("Degree", "Katz", "Page Rank", "K-path", "Betweenness")
|
|
|
|
############################# Base Centralities #############################
|
|
source("centralities.R")
|
|
base_centralities <- matrix(list(), nrow=3, ncol=5)
|
|
rownames(base_centralities) <- c(car.netname, hipaa.netname, pci.netname)
|
|
colnames(base_centralities) <- c("Degree", "Katz", "Page Rank", "K-path", "Betweenness")
|
|
|
|
### Degree
|
|
base_centralities[[1,1]] <- car.deg %>% sort(decreasing = T)
|
|
base_centralities[[2,1]] <- hipaa.deg %>% sort(decreasing = T)
|
|
base_centralities[[3,1]] <- pci.deg %>% sort(decreasing = T)
|
|
|
|
car_deg_Fn <- ( ecdf( car.deg ))
|
|
car_plots[[1,1]] <- car_deg_Fn(car.deg)*100
|
|
|
|
#### Katz
|
|
car.katz <- katz.cent(car)
|
|
nodes <- car.katz %>% order(decreasing=T)
|
|
nodes <- head(nodes, 15)-1
|
|
vals <- car.katz %>% sort(decreasing=T)
|
|
vals <- head(vals, 15)
|
|
base_centralities[[1,2]] <- car.katz[rowSums(apply(car.katz,2,is.nan))==0,] %>% sort(decreasing = T)
|
|
|
|
car_katz_Fn <- ( ecdf(as.numeric(car.katz )))
|
|
car_plots[[1,2]] <- car_katz_Fn(as.numeric(car.katz))*100
|
|
|
|
base_centralities[[2,2]] <- katz.cent(hipaa) %>% sort(decreasing = T)
|
|
hipaa.katz <- katz.cent(hipaa)
|
|
nodes <- hipaa.katz %>% order(decreasing=T)
|
|
nodes <- head(nodes, 15)-1
|
|
vals <- hipaa.katz %>% sort(decreasing=T)
|
|
vals <- head(vals, 15)
|
|
|
|
base_centralities[[3,2]] <- katz.cent(pci) %>% sort(decreasing = T)
|
|
pci.katz <- katz.cent(pci)
|
|
nodes <- pci.katz %>% order(decreasing=T)
|
|
nodes <- head(nodes, 15)-1
|
|
vals <- pci.katz %>% sort(decreasing=T)
|
|
vals <- head(vals, 15)
|
|
|
|
### Page Rank
|
|
base_centralities[[1,3]] <- page.rank(car)$vector %>% sort(decreasing = T)
|
|
base_centralities[[2,3]] <- page.rank(hipaa)$vector %>% sort(decreasing = T)
|
|
base_centralities[[3,3]] <- page.rank(pci)$vector %>% sort(decreasing = T)
|
|
|
|
car_pr_Fn <- ( ecdf( page.rank(car)$vector ))
|
|
car_plots[[1,3]] <- car_pr_Fn(page.rank(car)$vector)*100
|
|
|
|
### K-path
|
|
base_centralities[[1,4]] <- geokpath(car, V(car), "out") %>% sort(decreasing = T)
|
|
base_centralities[[2,4]] <- geokpath(hipaa, V(hipaa), "out") %>% sort(decreasing = T)
|
|
base_centralities[[3,4]] <- geokpath(pci, V(pci), "out") %>% sort(decreasing = T)
|
|
|
|
car_kp_Fn <- (ecdf(geokpath(car, V(car), "out")))
|
|
car_plots[[1,4]] <- car_kp_Fn(geokpath(car, V(car), "out"))*100
|
|
|
|
### Betweenness
|
|
base_centralities[[1,5]] <- betweenness(car, TRUE) %>% sort(decreasing = T)
|
|
base_centralities[[2,5]] <- betweenness(hipaa, TRUE) %>% sort(decreasing = T)
|
|
base_centralities[[3,5]] <- betweenness(pci, TRUE) %>% sort(decreasing = T)
|
|
|
|
car_btw_Fn <- (ecdf(betweenness(car, TRUE)))
|
|
car_plots[[1,5]] <- car_btw_Fn(betweenness(car, TRUE))*100
|
|
|
|
############################### Base Clustering ###############################
|
|
#source("self_newman_mod.R")
|
|
#base_clusters <- matrix(list(), nrow=3, ncol=2)
|
|
#rownames(base_centralities) <- c(car.netname, hipaa.netname, pci.netname)
|
|
#colnames(base_centralities) <- c("Laplace", "CG")
|
|
|
|
### Laplacian
|
|
#car.Lap <- diag(car.deg) - car.adj # L = D-A
|
|
#hipaa.Lap <- diag(hipaa.deg) - hipaa.adj
|
|
#pci.Lap <- diag(pci.deg) - pci.adj
|
|
|
|
# get eigvals and vecs
|
|
#car.eigs <- Re(eigen(car.Lap)$vectors[,car.n-1])
|
|
#car.eig_val <- eigen(car.Lap)$values[car.n-1]
|
|
#names(car.eigs) <- names(V(car))
|
|
#car.l_clusters <- ifelse(car.eigs>0,1,-1)
|
|
#base_clusters[[1,1]] <- car.l_clusters
|
|
#V(car)$color <- ifelse(car.l_clusters>0, "green", "yellow")
|
|
#plot(car, main=paste(car.netname, "Laplace Spectral Clustering"), vertex.label=NA)
|
|
|
|
# hipaa.eigs <- Re(eigen(hipaa.Lap)$vectors[,hipaa.n-1])
|
|
# hipaa.eig_val <- eigen(hipaa.Lap)$values[hipaa.n-1]
|
|
# names(hipaa.eigs) <- names(V(hipaa))
|
|
# hipaa.l_clusters <- ifelse(hipaa.eigs>0,1,-1)
|
|
# base_clusters[[2,1]] <- hipaa.l_clusters
|
|
# V(hipaa)$color <- ifelse(hipaa.l_clusters>0, "green", "yellow")
|
|
# plot(hipaa, main=paste(hipaa.netname, "Laplace Spectral Clustering"), vertex.label=NA)
|
|
#
|
|
# pci.eigs <- Re(eigen(pci.Lap)$vectors[,pci.n-1])
|
|
# pci.eig_val <- eigen(pci.Lap)$values[pci.n-1]
|
|
# names(pci.eigs) <- names(V(pci))
|
|
# pci.l_clusters <- ifelse(pci.eigs>0,1,-1)
|
|
# base_clusters[[3,1]] <- pci.l_clusters
|
|
# V(pci)$color <- ifelse(pci.l_clusters>0, "green", "yellow")
|
|
# plot(pci, main=paste(pci.netname, "Laplace Spectral Clustering"), vertex.label=NA)
|
|
#
|
|
# ### Clemente and Grassi
|
|
# base_clusters[[1,2]] <- ClustBCG(as.matrix(car.adj), "directed")$totalCC
|
|
# base_clusters[[2,2]] <- ClustBCG(as.matrix(hipaa.adj), "directed")$totalCC
|
|
# base_clusters[[3,2]] <- ClustBCG(as.matrix(pci.adj), "directed")$totalCC
|
|
|
|
################################ Misc Analysis ################################
|
|
# min_cut(car,"0", "2490")
|
|
# min_cut(hipaa,"0","2320")
|
|
# min_cut(pci,"0","60")
|
|
#
|
|
# max_flow(car, "0", "2490")
|
|
# max_flow(hipaa,"0","2320")
|
|
# max_flow(pci,"0","60")
|
|
|
|
### Transitive closure
|
|
car.graph <- as_graphnel(car)
|
|
hipaa.graph <- as_graphnel(hipaa)
|
|
pci.graph <- as_graphnel(pci)
|
|
|
|
car.tc <- graph_from_graphnel(transitive.closure(car.graph))
|
|
hipaa.tc <- graph_from_graphnel(transitive.closure(hipaa.graph))
|
|
pci.tc <- graph_from_graphnel(transitive.closure(pci.graph))
|
|
|
|
### Edge connectivty
|
|
edge_connectivity(car, "0", "2490")
|
|
edge_connectivity(hipaa,"0","2320")
|
|
edge_connectivity(pci,"0","60")
|
|
|
|
# Dominator Tree
|
|
car.dtree <- dominator_tree(car, "0", "out")$domtree
|
|
hipaa.dtree <- dominator_tree(hipaa, "0", "out")$domtree
|
|
pci.dtree <- dominator_tree(pci, "0", "out")$domtree
|
|
|
|
###################### Transitive Closure Centralities ######################
|
|
tc_centralities <- matrix(list(), nrow=3, ncol=5)
|
|
rownames(tc_centralities) <- c(car.netname, hipaa.netname, pci.netname)
|
|
colnames(tc_centralities) <- c("Degree", "Katz", "Page Rank", "K-path", "Betweenness")
|
|
|
|
# Get basic network attributes
|
|
car.tc.adj <- get.adjacency(car.tc)
|
|
car.tc.deg <- rowSums(as.matrix(car.tc.adj)) # degree
|
|
car.tc.n <- length(V(car.tc))
|
|
|
|
hipaa.tc.adj <- get.adjacency(hipaa.tc)
|
|
hipaa.tc.deg <- rowSums(as.matrix(hipaa.tc.adj)) # degree
|
|
hipaa.tc.n <- length(V(hipaa.tc))
|
|
|
|
pci.tc.adj <- get.adjacency(pci.tc)
|
|
pci.tc.deg <- rowSums(as.matrix(pci.tc.adj)) # degree
|
|
pci.tc.n <- length(V(pci.tc))
|
|
|
|
### Degree
|
|
tc_centralities[[1,1]] <- car.tc.deg %>% sort(decreasing = T)
|
|
tc_centralities[[2,1]] <- hipaa.tc.deg %>% sort(decreasing = T)
|
|
tc_centralities[[3,1]] <- pci.tc.deg %>% sort(decreasing = T)
|
|
|
|
car_tc_deg_Fn <- ( ecdf( car.tc.deg ))
|
|
car_plots[[2,1]] <- car_tc_deg_Fn(car.tc.deg)*100
|
|
|
|
#### Katz
|
|
car.tc.katz <- katz.cent(car.tc)
|
|
tc_centralities[[1,2]] <- car.tc.katz[rowSums(apply(car.tc.katz,2,is.nan))==0,] %>% sort(decreasing = T)
|
|
car.tc.katz <- katz.cent(car.tc)
|
|
nodes <- car.tc.katz %>% order(decreasing=T)
|
|
nodes <- head(nodes, 15)-1
|
|
vals <- car.tc.katz %>% sort(decreasing=T)
|
|
vals <- head(vals, 15)
|
|
|
|
tc_centralities[[2,2]] <- katz.cent(hipaa.tc) %>% sort(decreasing = T)
|
|
hipaa.tc.katz <- katz.cent(hipaa.tc)
|
|
nodes <- hipaa.tc.katz %>% order(decreasing=T)
|
|
nodes <- head(nodes, 15)-1
|
|
vals <- hipaa.tc.katz %>% sort(decreasing=T)
|
|
vals <- head(vals, 15)
|
|
|
|
car_tc_katz_Fn <- ( ecdf(as.numeric(car.tc.katz )))
|
|
car_plots[[2,2]] <- car_tc_katz_Fn(as.numeric(car.tc.katz))*100
|
|
|
|
tc_centralities[[3,2]] <- katz.cent(pci.tc) %>% sort(decreasing = T)
|
|
pci.tc.katz <- katz.cent(pci.tc)
|
|
nodes <- pci.tc.katz %>% order(decreasing=T)
|
|
nodes <- head(nodes, 15)-1
|
|
vals <- pci.tc.katz %>% sort(decreasing=T)
|
|
vals <- head(vals, 15)
|
|
|
|
### Page Rank
|
|
tc_centralities[[1,3]] <- page.rank(car.tc)$vector %>% sort(decreasing = T)
|
|
tc_centralities[[2,3]] <- page.rank(hipaa.tc)$vector %>% sort(decreasing = T)
|
|
tc_centralities[[3,3]] <- page.rank(pci.tc)$vector %>% sort(decreasing = T)
|
|
|
|
car_tc_pr_Fn <- ( ecdf( page.rank(car.tc)$vector ))
|
|
car_plots[[2,3]] <- car_tc_pr_Fn(page.rank(car.tc)$vector)*100
|
|
|
|
### K-path
|
|
tc_centralities[[1,4]] <- geokpath(car.tc, V(car.tc), "out") %>% sort(decreasing = T)
|
|
tc_centralities[[2,4]] <- geokpath(hipaa.tc, V(hipaa.tc), "out") %>% sort(decreasing = T)
|
|
tc_centralities[[3,4]] <- geokpath(pci.tc, V(pci.tc), "out") %>% sort(decreasing = T)
|
|
|
|
car_tc_kp_Fn <- (ecdf(geokpath(car.tc, V(car.tc), "out")))
|
|
car_plots[[2,4]] <- car_tc_kp_Fn(geokpath(car.tc, V(car.tc), "out"))*100
|
|
|
|
### Betweenness
|
|
tc_centralities[[1,5]] <- betweenness(car.tc, TRUE) %>% sort(decreasing = T)
|
|
tc_centralities[[2,5]] <- betweenness(hipaa.tc, TRUE) %>% sort(decreasing = T)
|
|
tc_centralities[[3,5]] <- betweenness(pci.tc, TRUE) %>% sort(decreasing = T)
|
|
|
|
car_tc_btw_Fn <- (ecdf(betweenness(car.tc, TRUE)))
|
|
car_plots[[2,5]] <- car_tc_btw_Fn(betweenness(car.tc, TRUE))*100
|
|
|
|
######################## Transitive Closure Clustering ########################
|
|
# source("self_newman_mod.R")
|
|
# tc_clusters <- matrix(list(), nrow=3, ncol=2)
|
|
# rownames(tc_centralities) <- c(car.netname, hipaa.netname, pci.netname)
|
|
# colnames(tc_centralities) <- c("Laplace", "CG")
|
|
#
|
|
# ### Laplacian
|
|
# car.tc.Lap <- diag(car.tc.deg) - car.tc.adj # L = D-A
|
|
# hipaa.tc.Lap <- diag(hipaa.tc.deg) - hipaa.tc.adj
|
|
# pci.tc.Lap <- diag(pci.tc.deg) - pci.tc.adj
|
|
#
|
|
# # get eigvals and vecs
|
|
# car.tc.eigs <- Re(eigen(car.tc.Lap)$vectors[,car.tc.n-1])
|
|
# car.tc.eig_val <- eigen(car.tc.Lap)$values[car.tc.n-1]
|
|
# names(car.tc.eigs) <- names(V(car.tc))
|
|
# car.tc.l_clusters <- ifelse(car.tc.eigs>0,1,-1)
|
|
# tc_clusters[[1,1]] <- car.tc.l_clusters
|
|
# V(car.tc)$color <- ifelse(car.tc.l_clusters>0, "green", "yellow")
|
|
# plot(car.tc, main=paste(car.tc.netname, "Laplace Spectral Clustering"), vertex.label=NA)
|
|
#
|
|
# hipaa.tc.eigs <- Re(eigen(hipaa.tc.Lap)$vectors[,hipaa.tc.n-1])
|
|
# hipaa.tc.eig_val <- eigen(hipaa.tc.Lap)$values[hipaa.tc.n-1]
|
|
# names(hipaa.tc.eigs) <- names(V(hipaa.tc))
|
|
# hipaa.tc.l_clusters <- ifelse(hipaa.tc.eigs>0,1,-1)
|
|
# tc_clusters[[2,1]] <- hipaa.tc.l_clusters
|
|
# V(hipaa.tc)$color <- ifelse(hipaa.tc.l_clusters>0, "green", "yellow")
|
|
# plot(hipaa.tc, main=paste(hipaa.tc.netname, "Laplace Spectral Clustering"), vertex.label=NA)
|
|
#
|
|
# pci.tc.eigs <- Re(eigen(pci.tc.Lap)$vectors[,pci.tc.n-1])
|
|
# pci.tc.eig_val <- eigen(pci.tc.Lap)$values[pci.tc.n-1]
|
|
# names(pci.tc.eigs) <- names(V(pci.tc))
|
|
# pci.tc.l_clusters <- ifelse(pci.tc.eigs>0,1,-1)
|
|
# tc_clusters[[3,1]] <- pci.tc.l_clusters
|
|
# V(pci.tc)$color <- ifelse(pci.tc.l_clusters>0, "green", "yellow")
|
|
# plot(pci.tc, main=paste(pci.tc.netname, "Laplace Spectral Clustering"), vertex.label=NA)
|
|
#
|
|
# ### Clemente and Grassi
|
|
# tc_clusters[[1,2]] <- ClustBCG(as.matrix(car.tc.adj), "directed")$totalCC
|
|
# tc_clusters[[2,2]] <- ClustBCG(as.matrix(hipaa.tc.adj), "directed")$totalCC
|
|
# tc_clusters[[3,2]] <- ClustBCG(as.matrix(pci.tc.adj), "directed")$totalCC
|
|
|
|
######################### Dominant Tree Centralities #########################
|
|
dtree_centralities <- matrix(list(), nrow=3, ncol=5)
|
|
rownames(dtree_centralities) <- c(car.netname, hipaa.netname, pci.netname)
|
|
colnames(dtree_centralities) <- c("Degree", "Katz", "Page Rank", "K-path", "Betweenness")
|
|
|
|
# Get basic network attributes
|
|
car.dtree.adj <- get.adjacency(car.dtree)
|
|
car.dtree.deg <- rowSums(as.matrix(car.dtree.adj)) # degree
|
|
car.dtree.n <- length(V(car.dtree))
|
|
|
|
hipaa.dtree.adj <- get.adjacency(hipaa.dtree)
|
|
hipaa.dtree.deg <- rowSums(as.matrix(hipaa.dtree.adj)) # degree
|
|
hipaa.dtree.n <- length(V(hipaa.dtree))
|
|
|
|
pci.dtree.adj <- get.adjacency(pci.dtree)
|
|
pci.dtree.deg <- rowSums(as.matrix(pci.dtree.adj)) # degree
|
|
pci.dtree.n <- length(V(pci.dtree))
|
|
|
|
### Degree
|
|
dtree_centralities[[1,1]] <- car.dtree.deg %>% sort(decreasing = T)
|
|
dtree_centralities[[2,1]] <- hipaa.dtree.deg %>% sort(decreasing = T)
|
|
dtree_centralities[[3,1]] <- pci.dtree.deg %>% sort(decreasing = T)
|
|
|
|
car_dt_deg_Fn <- ( ecdf( car.dtree.deg ))
|
|
car_plots[[3,1]] <- car_dt_deg_Fn(car.dtree.deg)*100
|
|
|
|
#### Katz
|
|
car.dtree.katz <- katz.cent(car.dtree)
|
|
dtree_centralities[[1,2]] <- car.dtree.katz[rowSums(apply(car.dtree.katz,2,is.nan))==0,] %>% sort(decreasing = T)
|
|
car.dtree.katz <- katz.cent(car.dtree)
|
|
nodes <- car.dtree.katz %>% order(decreasing=T)
|
|
nodes <- head(nodes, 15)-1
|
|
vals <- car.dtree.katz %>% sort(decreasing=T)
|
|
vals <- head(vals, 15)
|
|
|
|
dtree_centralities[[2,2]] <- katz.cent(hipaa.dtree) %>% sort(decreasing = T)
|
|
hipaa.dtree.katz <- katz.cent(hipaa.dtree)
|
|
nodes <- hipaa.dtree.katz %>% order(decreasing=T)
|
|
nodes <- head(nodes, 15)-1
|
|
vals <- hipaa.dtree.katz %>% sort(decreasing=T)
|
|
vals <- head(vals, 15)
|
|
|
|
|
|
dtree_centralities[[3,2]] <- katz.cent(pci.dtree) %>% sort(decreasing = T)
|
|
pci.dtree.katz <- katz.cent(pci.dtree)
|
|
nodes <- pci.dtree.katz %>% order(decreasing=T)
|
|
nodes <- head(nodes, 15)-1
|
|
vals <- pci.dtree.katz %>% sort(decreasing=T)
|
|
vals <- head(vals, 15)
|
|
|
|
car_dt_katz_Fn <- ( ecdf(as.numeric(car.dtree.katz )))
|
|
car_plots[[3,2]] <- car_dt_katz_Fn(as.numeric(car.dtree.katz))*100
|
|
|
|
### Page Rank
|
|
dtree_centralities[[1,3]] <- page.rank(car.dtree)$vector %>% sort(decreasing = T)
|
|
dtree_centralities[[2,3]] <- page.rank(hipaa.dtree)$vector %>% sort(decreasing = T)
|
|
dtree_centralities[[3,3]] <- page.rank(pci.dtree)$vector %>% sort(decreasing = T)
|
|
|
|
car_dt_pr_Fn <- ( ecdf( page.rank(car.dtree)$vector ))
|
|
car_plots[[3,3]] <- car_dt_pr_Fn(page.rank(car.dtree)$vector)*100
|
|
|
|
### K-path
|
|
dtree_centralities[[1,4]] <- geokpath(car.dtree, V(car.dtree), "out") %>% sort(decreasing = T)
|
|
dtree_centralities[[2,4]] <- geokpath(hipaa.dtree, V(hipaa.dtree), "out") %>% sort(decreasing = T)
|
|
dtree_centralities[[3,4]] <- geokpath(pci.dtree, V(pci.dtree), "out") %>% sort(decreasing = T)
|
|
|
|
car_dt_kp_Fn <- (ecdf(geokpath(car.dtree, V(car.dtree), "out")))
|
|
car_plots[[3,4]] <- car_tc_kp_Fn(geokpath(car.dtree, V(car.dtree), "out"))*100
|
|
|
|
### Betweenness
|
|
dtree_centralities[[1,5]] <- betweenness(car.dtree, TRUE) %>% sort(decreasing = T)
|
|
dtree_centralities[[2,5]] <- betweenness(hipaa.dtree, TRUE) %>% sort(decreasing = T)
|
|
dtree_centralities[[3,5]] <- betweenness(pci.dtree, TRUE) %>% sort(decreasing = T)
|
|
|
|
car_dt_btw_Fn <- (ecdf(betweenness(car.dtree, TRUE)))
|
|
car_plots[[3,5]] <- car_dt_btw_Fn(betweenness(car.dtree, TRUE))*100
|
|
|
|
########################## Dominant Tree Clustering ##########################
|
|
# source("self_newman_mod.R")
|
|
# dtree_clusters <- matrix(list(), nrow=3, ncol=2)
|
|
# rownames(dtree_centralities) <- c(car.netname, hipaa.netname, pci.netname)
|
|
# colnames(dtree_centralities) <- c("Laplace", "CG")
|
|
#
|
|
# ### Laplacian
|
|
# car.dtree.Lap <- diag(car.dtree.deg) - car.dtree.adj # L = D-A
|
|
# hipaa.dtree.Lap <- diag(hipaa.dtree.deg) - hipaa.dtree.adj
|
|
# pci.dtree.Lap <- diag(pci.dtree.deg) - pci.dtree.adj
|
|
#
|
|
# # get eigvals and vecs
|
|
# car.dtree.eigs <- Re(eigen(car.dtree.Lap)$vectors[,car.dtree.n-1])
|
|
# car.dtree.eig_val <- eigen(car.dtree.Lap)$values[car.dtree.n-1]
|
|
# names(car.dtree.eigs) <- names(V(car.dtree))
|
|
# car.dtree.l_clusters <- ifelse(car.dtree.eigs>0,1,-1)
|
|
# dtree_clusters[[1,1]] <- car.dtree.l_clusters
|
|
# V(car.dtree)$color <- ifelse(car.dtree.l_clusters>0, "green", "yellow")
|
|
# plot(car.dtree, main=paste(car.dtree.netname, "Laplace Spectral Clustering"), vertex.label=NA)
|
|
#
|
|
# hipaa.dtree.eigs <- Re(eigen(hipaa.dtree.Lap)$vectors[,hipaa.dtree.n-1])
|
|
# hipaa.dtree.eig_val <- eigen(hipaa.dtree.Lap)$values[hipaa.dtree.n-1]
|
|
# names(hipaa.dtree.eigs) <- names(V(hipaa.dtree))
|
|
# hipaa.dtree.l_clusters <- ifelse(hipaa.dtree.eigs>0,1,-1)
|
|
# dtree_clusters[[2,1]] <- hipaa.dtree.l_clusters
|
|
# V(hipaa.dtree)$color <- ifelse(hipaa.dtree.l_clusters>0, "green", "yellow")
|
|
# plot(hipaa.dtree, main=paste(hipaa.dtree.netname, "Laplace Spectral Clustering"), vertex.label=NA)
|
|
#
|
|
# pci.dtree.eigs <- Re(eigen(pci.dtree.Lap)$vectors[,pci.dtree.n-1])
|
|
# pci.dtree.eig_val <- eigen(pci.dtree.Lap)$values[pci.dtree.n-1]
|
|
# names(pci.dtree.eigs) <- names(V(pci.dtree))
|
|
# pci.dtree.l_clusters <- ifelse(pci.dtree.eigs>0,1,-1)
|
|
# dtree_clusters[[3,1]] <- pci.dtree.l_clusters
|
|
# V(pci.dtree)$color <- ifelse(pci.dtree.l_clusters>0, "green", "yellow")
|
|
# plot(pci.dtree, main=paste(pci.dtree.netname, "Laplace Spectral Clustering"), vertex.label=NA)
|
|
#
|
|
# ### Clemente and Grassi
|
|
# dtree_clusters[[1,2]] <- ClustBCG(as.matrix(car.dtree.adj), "directed")$totalCC
|
|
# dtree_clusters[[2,2]] <- ClustBCG(as.matrix(hipaa.dtree.adj), "directed")$totalCC
|
|
# dtree_clusters[[3,2]] <- ClustBCG(as.matrix(pci.dtree.adj), "directed")$totalCC
|
|
|
|
|
|
|
|
############################# Write Final Results #############################
|
|
write.table(base_centralities, file='results.csv')
|
|
write.table(tc_centralities, file='results.csv')
|
|
write.table(dtree_centralities, file='results.csv')
|
|
|
|
### Degree:
|
|
head(base_centralities[[1,1]], 15)/sum(base_centralities[[1,1]])*100 #Car
|
|
head(tc_centralities[[1,1]],15)/sum(tc_centralities[[1,1]])*100
|
|
head(dtree_centralities[[1,1]],15)/sum(dtree_centralities[[1,1]])*100
|
|
|
|
head(base_centralities[[2,1]], 15)/sum(base_centralities[[2,1]])*100 #HIPAA
|
|
head(tc_centralities[[2,1]],15)/sum(tc_centralities[[2,1]])*100
|
|
head(dtree_centralities[[2,1]],15)/sum(dtree_centralities[[2,1]])*100
|
|
|
|
head(base_centralities[[3,1]], 15)/sum(base_centralities[[3,1]])*100 #PCI
|
|
head(tc_centralities[[3,1]],15)/sum(tc_centralities[[3,1]])*100
|
|
head(dtree_centralities[[3,1]],15)/sum(dtree_centralities[[3,1]])*100
|
|
|
|
### Katz:
|
|
head(base_centralities[[1,2]], 15)/sum(base_centralities[[1,2]])*100 #Car
|
|
head(tc_centralities[[1,2]],15)/sum(tc_centralities[[1,2]])*100
|
|
head(dtree_centralities[[1,2]],15)/sum(dtree_centralities[[1,2]])*100
|
|
|
|
head(base_centralities[[2,2]], 15)/sum(base_centralities[[2,2]])*100 #HIPAA
|
|
head(tc_centralities[[2,2]],15)/sum(tc_centralities[[2,2]])*100
|
|
head(dtree_centralities[[2,2]],15)/sum(dtree_centralities[[2,2]])*100
|
|
|
|
head(base_centralities[[3,2]], 15)/sum(base_centralities[[3,2]])*100 #PCI
|
|
head(tc_centralities[[3,2]],15)/sum(tc_centralities[[3,2]])*100
|
|
head(dtree_centralities[[3,2]],15)/sum(dtree_centralities[[3,2]])*100
|
|
|
|
### Page Rank:
|
|
head(base_centralities[[1,3]], 15)/sum(base_centralities[[1,3]])*100 #Car
|
|
head(tc_centralities[[1,3]],15)/sum(tc_centralities[[1,3]])*100
|
|
head(dtree_centralities[[1,3]],15)/sum(dtree_centralities[[1,3]])*100
|
|
|
|
head(base_centralities[[2,3]], 15)/sum(base_centralities[[2,3]])*100 #HIPAA
|
|
head(tc_centralities[[2,3]],15)/sum(tc_centralities[[2,3]])*100
|
|
head(dtree_centralities[[2,3]],15)/sum(dtree_centralities[[2,3]])*100
|
|
|
|
head(base_centralities[[3,3]], 15)/sum(base_centralities[[3,3]])*100 #PCI
|
|
head(tc_centralities[[3,3]],15)/sum(tc_centralities[[3,3]])*100
|
|
head(dtree_centralities[[3,3]],15)/sum(dtree_centralities[[3,3]])*100
|
|
|
|
### K-Path:
|
|
head(base_centralities[[1,4]], 15)/sum(base_centralities[[1,4]])*100 #Car
|
|
head(tc_centralities[[1,4]],15)/sum(tc_centralities[[1,4]])*100
|
|
head(dtree_centralities[[1,4]],15)/sum(dtree_centralities[[1,4]])*100
|
|
|
|
head(base_centralities[[2,4]], 15)/sum(base_centralities[[2,4]])*100 #HIPAA
|
|
head(tc_centralities[[2,4]],15)/sum(tc_centralities[[2,4]])*100
|
|
head(dtree_centralities[[2,4]],15)/sum(dtree_centralities[[2,4]])*100
|
|
|
|
head(base_centralities[[3,4]], 15)/sum(base_centralities[[3,4]])*100 #PCI
|
|
head(tc_centralities[[3,4]],15)/sum(tc_centralities[[3,4]])*100
|
|
head(dtree_centralities[[3,4]],15)/sum(dtree_centralities[[3,4]])*100
|
|
|
|
### Betweenness:
|
|
head(base_centralities[[1,5]], 15)/sum(base_centralities[[1,5]])*100 #Car
|
|
head(tc_centralities[[1,5]],15)/sum(tc_centralities[[1,5]])*100
|
|
head(dtree_centralities[[1,5]],15)/sum(dtree_centralities[[1,5]])*100
|
|
|
|
head(base_centralities[[2,5]], 15)/sum(base_centralities[[2,5]])*100 #HIPAA
|
|
head(tc_centralities[[2,5]],15)/sum(tc_centralities[[2,5]])*100
|
|
head(dtree_centralities[[2,5]],15)/sum(dtree_centralities[[2,5]])*100
|
|
|
|
head(base_centralities[[3,5]], 15)/sum(base_centralities[[3,5]])*100 #PCI
|
|
head(tc_centralities[[3,5]],15)/sum(tc_centralities[[3,5]])*100
|
|
head(dtree_centralities[[3,5]],15)/sum(dtree_centralities[[3,5]])*100
|
|
|
|
|
|
# ### Laplacian:
|
|
# head(base_clusters[[1,1]], 15) #Car
|
|
# head(tc_centralities[[1,1]],15)
|
|
# head(dtree_centralities[[1,1]],15)
|
|
#
|
|
# head(base_clusters[[2,1]], 15) #HIPAA
|
|
# head(tc_centralities[[2,1]],15)
|
|
# head(dtree_centralities[[2,1]],15)
|
|
#
|
|
# head(base_clusters[[3,1]], 15) #PCI
|
|
# head(tc_centralities[[3,1]],15)
|
|
# head(dtree_centralities[[3,1]],15)
|
|
#
|
|
# ### CG:
|
|
# head(base_clusters[[1,2]], 15) #Car
|
|
# head(tc_centralities[[1,2]],15)
|
|
# head(dtree_centralities[[1,2]],15)
|
|
#
|
|
# head(base_clusters[[2,2]], 15) #HIPAA
|
|
# head(tc_centralities[[2,2]],15)
|
|
# head(dtree_centralities[[2,2]],15)
|
|
#
|
|
# head(base_clusters[[3,2]], 15) #PCI
|
|
# head(tc_centralities[[3,2]],15)
|
|
# head(dtree_centralities[[3,2]],15)
|
|
|
|
#Deg
|
|
sum(base_centralities[[1,1]])
|
|
sum(base_centralities[[2,1]])
|
|
sum(base_centralities[[3,1]])
|
|
|
|
#Katz
|
|
sum(base_centralities[[1,2]])
|
|
sum(base_centralities[[2,2]])
|
|
sum(base_centralities[[3,2]])
|
|
|
|
#PR
|
|
sum(base_centralities[[1,3]])
|
|
sum(base_centralities[[2,3]])
|
|
sum(base_centralities[[3,3]])
|
|
|
|
#KPath
|
|
sum(base_centralities[[1,4]])
|
|
sum(base_centralities[[2,4]])
|
|
sum(base_centralities[[3,4]])
|
|
|
|
#Betweenness
|
|
sum(base_centralities[[1,5]])
|
|
sum(base_centralities[[2,5]])
|
|
sum(base_centralities[[3,5]])
|
|
|
|
### TC
|
|
|
|
#Deg
|
|
sum(tc_centralities[[1,1]])
|
|
sum(tc_centralities[[2,1]])
|
|
sum(tc_centralities[[3,1]])
|
|
|
|
#Katz
|
|
sum(tc_centralities[[1,2]])
|
|
sum(tc_centralities[[2,2]])
|
|
sum(tc_centralities[[3,2]])
|
|
|
|
#PR
|
|
sum(tc_centralities[[1,3]])
|
|
sum(tc_centralities[[2,3]])
|
|
sum(tc_centralities[[3,3]])
|
|
|
|
#KPath
|
|
sum(tc_centralities[[1,4]])
|
|
sum(tc_centralities[[2,4]])
|
|
sum(tc_centralities[[3,4]])
|
|
|
|
#Betweenness
|
|
sum(tc_centralities[[1,5]])
|
|
sum(tc_centralities[[2,5]])
|
|
sum(tc_centralities[[3,5]])
|
|
|
|
|
|
### dtree
|
|
|
|
#Deg
|
|
sum(dtree_centralities[[1,1]])
|
|
sum(dtree_centralities[[2,1]])
|
|
sum(dtree_centralities[[3,1]])
|
|
|
|
#Katz
|
|
sum(dtree_centralities[[1,2]])
|
|
sum(dtree_centralities[[2,2]])
|
|
sum(dtree_centralities[[3,2]])
|
|
|
|
#PR
|
|
sum(dtree_centralities[[1,3]])
|
|
sum(dtree_centralities[[2,3]])
|
|
sum(dtree_centralities[[3,3]])
|
|
|
|
#KPath
|
|
sum(dtree_centralities[[1,4]])
|
|
sum(dtree_centralities[[2,4]])
|
|
sum(dtree_centralities[[3,4]])
|
|
|
|
#Betweenness
|
|
sum(dtree_centralities[[1,5]])
|
|
sum(dtree_centralities[[2,5]])
|
|
sum(dtree_centralities[[3,5]])
|
|
|
|
|
|
####### Plots
|
|
if (!require("plotly", quietly = TRUE))
|
|
install.packages("plotly")
|
|
|
|
library(plotly)
|
|
car_base_df <- as.data.frame(car_plots[[1,1]])
|
|
car_base_df$Node <- 1:nrow(as.data.frame(car_plots[[1,1]]))
|
|
colnames(car_base_df) <- c("Centrality", "Node")
|
|
|
|
car_tc_df <- as.data.frame(car_plots[[2,1]])
|
|
car_tc_df$Node <- 1:nrow(as.data.frame(car_plots[[2,1]]))
|
|
colnames(car_tc_df) <- c("Centrality", "Node")
|
|
|
|
car_dtree_df <- as.data.frame(car_plots[[3,1]])
|
|
car_dtree_df$Node <- 1:nrow(as.data.frame(car_plots[[3,1]]))
|
|
colnames(car_dtree_df) <- c("Centrality", "Node")
|
|
|
|
fig1 <- plot_ly(x=car_base_df$Node, y=car_base_df$Centrality, type="bar")
|
|
fig2 <- plot_ly(x=car_base_df$Node, y=car_tc_df$Centrality, type="bar")
|
|
fig3 <- plot_ly(x=car_base_df$Node, y=car_dtree_df$Centrality, type="bar")
|
|
fig <- subplot(fig1, fig2, fig3, nrows = 3) %>%
|
|
layout(title = list(text = "Importance Distribution for Nodes
|
|
in the Automobile Maintenance Network"),
|
|
plot_bgcolor='#e5ecf6',
|
|
xaxis = list(
|
|
zerolinecolor = '#ffff',
|
|
zerolinewidth = 2,
|
|
gridcolor = 'ffff'),
|
|
yaxis = list(
|
|
zerolinecolor = '#ffff',
|
|
zerolinewidth = 2,
|
|
gridcolor = 'ffff'))
|
|
fig
|
|
|
|
plot(car_plots[[1,1]], type="h", xlab="Node",
|
|
ylab="Percentile (Importance Ranking)",
|
|
main="Importance Distribution for Nodes
|
|
in the Automobile Maintenance Network")
|