Degree, Betweenness, and beginning work of Katz Centrality

This commit is contained in:
Noah L. Schrick 2022-05-02 19:02:57 -05:00
parent 787f35b434
commit 9eacf9eb20
12 changed files with 139 additions and 46 deletions

View File

@ -1 +1 @@
,noah,NovaArchSys,02.05.2022 16:43,file:///home/noah/.config/libreoffice/4;
,noah,NovaArchSys,02.05.2022 17:47,file:///home/noah/.config/libreoffice/4;

Binary file not shown.

View File

@ -1333,4 +1333,27 @@ of 27},
Pages = {80},
Abstract = {<h4>Background</h4>Numerous centrality measures have been introduced to identify "central" nodes in large networks. The availability of a wide range of measures for ranking influential nodes leaves the user to decide which measure may best suit the analysis of a given network. The choice of a suitable measure is furthermore complicated by the impact of the network topology on ranking influential nodes by centrality measures. To approach this problem systematically, we examined the centrality profile of nodes of yeast protein-protein interaction networks (PPINs) in order to detect which centrality measure is succeeding in predicting influential proteins. We studied how different topological network features are reflected in a large set of commonly used centrality measures.<h4>Results</h4>We used yeast PPINs to compare 27 common of centrality measures. The measures characterize and assort influential nodes of the networks. We applied principal component analysis (PCA) and hierarchical clustering and found that the most informative measures depend on the network's topology. Interestingly, some measures had a high level of contribution in comparison to others in all PPINs, namely Latora closeness, Decay, Lin, Freeman closeness, Diffusion, Residual closeness and Average distance centralities.<h4>Conclusions</h4>The choice of a suitable set of centrality measures is crucial for inferring important functional properties of a network. We concluded that undertaking data reduction using unsupervised machine learning methods helps to choose appropriate variables (centrality measures). Hence, we proposed identifying the contribution proportions of the centrality measures with PCA as a prerequisite step of network analysis before inferring functional consequences, e.g., essentiality of a node.},
URL = {https://europepmc.org/articles/PMC6069823},
}
@Article{Katz,
author={Leo Katz},
title={{A new status index derived from sociometric analysis}},
journal={Psychometrika},
year=1953,
volume={18},
number={1},
pages={39-43},
month={March},
keywords={},
doi={10.1007/BF02289026},
abstract={No abstract is available for this item.},
url={https://ideas.repec.org/a/spr/psycho/v18y1953i1p39-43.html}
}
@article{ModKatz,
title={Katz centrality of Markovian temporal networks: Analysis and optimization},
author={Masaki Ogura and Victor M. Preciado},
journal={2017 American Control Conference (ACC)},
year={2017},
pages={5001-5006}
}

View File

@ -27,9 +27,12 @@
\@writefile{toc}{\contentsline {section}{\numberline {3}Experimental Networks}{4}{}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {4}Centralities}{4}{}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {4.1}Introduction}{4}{}\protected@file@percent }
\citation{Katz}
\@writefile{toc}{\contentsline {subsection}{\numberline {4.2}Degree}{5}{}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {4.3}Betweenness}{6}{}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {4.3}Betweenness}{5}{}\protected@file@percent }
\newlabel{eq:between}{{1}{5}}
\@writefile{toc}{\contentsline {subsection}{\numberline {4.4}Katz}{6}{}\protected@file@percent }
\newlabel{eq:Katz}{{2}{6}}
\@writefile{toc}{\contentsline {subsection}{\numberline {4.5}K-Path Edge}{6}{}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {4.6}Adapted Page Rank}{6}{}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {5}Transitive Closure}{6}{}\protected@file@percent }
@ -53,6 +56,7 @@
\bibcite{noauthor_health_1996}{10}
\bibcite{PCI}{11}
\bibcite{PMID:30064421}{12}
\bibcite{Katz}{13}
\bibstyle{ieeetr}
\@writefile{toc}{\contentsline {section}{Bibliography}{7}{}\protected@file@percent }
\gdef \@abspage@last{7}

View File

@ -58,4 +58,8 @@ M.~Ashtiani, A.~Salehzadeh-Yazdi, Z.~Razaghi-Moghadam, H.~Hennig,
centrality measures for protein-protein interaction networks,'' {\em BMC
systems biology}, vol.~12, p.~80, July 2018.
\bibitem{Katz}
L.~Katz, ``{A new status index derived from sociometric analysis},'' {\em
Psychometrika}, vol.~18, pp.~39--43, March 1953.
\end{thebibliography}

View File

@ -4,45 +4,45 @@ The top-level auxiliary file: Schrick-Noah_CS-7863_Final-Report.aux
The style file: ieeetr.bst
Database file #1: Bibliography.bib
Warning--empty booktitle in Mieghem2018DirectedGA
You've used 12 entries,
You've used 13 entries,
1876 wiz_defined-function locations,
549 strings with 5647 characters,
and the built_in function-call counts, 2222 in all, are:
= -- 211
> -- 83
558 strings with 5746 characters,
and the built_in function-call counts, 2480 in all, are:
= -- 239
> -- 87
< -- 0
+ -- 34
- -- 22
* -- 138
:= -- 337
add.period$ -- 16
call.type$ -- 12
change.case$ -- 11
+ -- 36
- -- 23
* -- 156
:= -- 372
add.period$ -- 17
call.type$ -- 13
change.case$ -- 12
chr.to.int$ -- 0
cite$ -- 13
duplicate$ -- 117
empty$ -- 241
format.name$ -- 22
if$ -- 541
cite$ -- 14
duplicate$ -- 131
empty$ -- 266
format.name$ -- 23
if$ -- 607
int.to.chr$ -- 0
int.to.str$ -- 12
missing$ -- 8
newline$ -- 45
num.names$ -- 11
pop$ -- 54
int.to.str$ -- 13
missing$ -- 9
newline$ -- 48
num.names$ -- 12
pop$ -- 55
preamble$ -- 1
purify$ -- 0
quote$ -- 0
skip$ -- 55
skip$ -- 67
stack$ -- 0
substring$ -- 64
swap$ -- 28
substring$ -- 84
swap$ -- 35
text.length$ -- 0
text.prefix$ -- 0
top$ -- 0
type$ -- 0
warning$ -- 1
while$ -- 17
width$ -- 14
write$ -- 114
while$ -- 20
width$ -- 15
write$ -- 124
(There was 1 warning)

View File

@ -1,4 +1,4 @@
This is pdfTeX, Version 3.141592653-2.6-1.40.24 (TeX Live 2022/Arch Linux) (preloaded format=pdflatex 2022.4.29) 2 MAY 2022 16:54
This is pdfTeX, Version 3.141592653-2.6-1.40.24 (TeX Live 2022/Arch Linux) (preloaded format=pdflatex 2022.4.29) 2 MAY 2022 19:02
entering extended mode
restricted \write18 enabled.
%&-line parsing enabled.
@ -364,28 +364,38 @@ Underfull \hbox (badness 10000) in paragraph at lines 50--54
\OT1/cmr/m/n/10 Data Security Standard,'' May 2018. Available:
[]
Underfull \hbox (badness 1571) in paragraph at lines 62--64
[]\OT1/cmr/m/n/10 L. Katz, ``A new status index derived from sociometric analys
is,''
[]
) [7
] (./Schrick-Noah_CS-7863_Final-Report.aux) )
Here is how much of TeX's memory you used:
5373 strings out of 478238
96520 string characters out of 5850456
398294 words of memory out of 5000000
23550 multiletter control sequences out of 15000+600000
5376 strings out of 478238
96547 string characters out of 5850456
398315 words of memory out of 5000000
23553 multiletter control sequences out of 15000+600000
473860 words of font info for 43 fonts, out of 8000000 for 9000
1141 hyphenation exceptions out of 8191
67i,8n,77p,1807b,289s stack positions out of 5000i,500n,10000p,200000b,80000s
</usr/share/texmf-dist/fonts/type1
/public/amsfonts/cm/cmbx10.pfb></usr/share/texmf-dist/fonts/type1/public/amsfon
ts/cm/cmbx12.pfb></usr/share/texmf-dist/fonts/type1/public/amsfonts/cm/cmr10.pf
b></usr/share/texmf-dist/fonts/type1/public/amsfonts/cm/cmr12.pfb></usr/share/t
exmf-dist/fonts/type1/public/amsfonts/cm/cmr17.pfb></usr/share/texmf-dist/fonts
/type1/public/amsfonts/cm/cmti10.pfb>
Output written on Schrick-Noah_CS-7863_Final-Report.pdf (7 pages, 116347 bytes)
ts/cm/cmbx12.pfb></usr/share/texmf-dist/fonts/type1/public/amsfonts/cm/cmex10.p
fb></usr/share/texmf-dist/fonts/type1/public/amsfonts/cm/cmmi10.pfb></usr/share
/texmf-dist/fonts/type1/public/amsfonts/cm/cmmi7.pfb></usr/share/texmf-dist/fon
ts/type1/public/amsfonts/cm/cmr10.pfb></usr/share/texmf-dist/fonts/type1/public
/amsfonts/cm/cmr12.pfb></usr/share/texmf-dist/fonts/type1/public/amsfonts/cm/cm
r17.pfb></usr/share/texmf-dist/fonts/type1/public/amsfonts/cm/cmr7.pfb></usr/sh
are/texmf-dist/fonts/type1/public/amsfonts/cm/cmsy7.pfb></usr/share/texmf-dist/
fonts/type1/public/amsfonts/cm/cmti10.pfb>
Output written on Schrick-Noah_CS-7863_Final-Report.pdf (7 pages, 161981 bytes)
.
PDF statistics:
58 PDF objects out of 1000 (max. 8388607)
36 compressed objects within 1 object stream
83 PDF objects out of 1000 (max. 8388607)
51 compressed objects within 1 object stream
0 named destinations out of 1000 (max. 500000)
1 words of extra memory for PDF output out of 10000 (max. 10000000)

View File

@ -65,9 +65,25 @@ The work conducted in this approach utilized three compliance graphs, with their
\subsection{Introduction}
The author of \cite{PMID:30064421} provides a survey of centrality measures, and discusses how various centrality measures have been implemented and brought forth in order to determine node importance in networks. By determining the importance of nodes, various conclusions can be drawn regarding the network. In the case of compliance graphs, conclusions can be drawn regarding the prioritization of patching or correction schemes. If one node is known to lead to the creation of many other nodes, it may be said that a patch is imperative to prevent further opportunities for compliance violation. This work discusses five centrality measures, and discusses their application to compliance graphs.
\subsection{Degree}
Degree centrality is a trivial, localized measure of node importance based on the number of edges that a node has. In an undirected graph, the degree centrality is predicated solely on the number of edges. However, in the case of a directed graph, a distinction is drawn with a degree centrality oriented on the number of edges coming into a node, and another measure focused on the number of edges leaving a node. Both of these cases provide useful information for compliance graphs. When a node has a large number of other nodes it points to, this node may be prioritized since it creates further room for violation. When a node has a large number of edges pointing to it, this node may be prioritized since the probability that systems may enter this state is higher due to the increased number of ways that a system could lead to this state.
Degree centrality is a trivial, localized measure of node importance based on the number of edges that a node has. In an undirected graph, the degree centrality is predicated solely on the number of edges. However, in the case of a directed graph, a distinction is drawn with a degree centrality oriented on the number of edges coming into a node, and another measure focused on the number of edges leaving a node. Both of these cases provide useful information for compliance graphs. When a node has a large number of other nodes it points to, this node may be prioritized since it creates further opportunity for violation. When a node has a large number of edges pointing to it, this node may be prioritized since the probability that systems may enter this state is higher due to the increased number of ways that a system could lead to this state.
\subsection{Betweenness}
Betweenness centrality ranks node importance based on its ability to transfer information flow in a network. For all pairs of nodes in a network, a shortest path is determined. A node that is in this shortest path is considered to have importance. The total betweenness centrality is based on the number of shortest paths that pass through a given node. For compliance graphs, the shortest paths are useful to identify the quickest way that systems may fall out of compliance. By prioritizing the nodes that fall in the highest number of shortest paths, correction schemes can be employed to prolong or prevent systems from falling out of compliance.
Betweenness centrality is given in Equation \ref{eq:between}, where \textit{i} and \textit{j} are two different, individual nodes in the network, $\sigma_{ij}$ is the total number of shortest paths from \textit{i} to \textit{j}, and $\sigma _{ij}(v)$ is the number of shortest paths that include a node \textit{v}.
\begin{equation}
\sum_{s \neq v \neq t} \frac{\sigma_{ij}(v)}{\sigma_{ij}}
\label{eq:between}
\end{equation}
\subsection{Katz}
Katz centrality was first introduced by the author of \cite{Katz}, and measures the importance of nodes through all paths in a network, and is not limited to solely the shortest path between any two given nodes. The original work by the author defines Katz as seen in Equation \ref{eq:Katz}.
\begin{equation}
C_{\mathrm {Katz} }(i)=\sum _{k=1}^{\infty }\sum _{j=1}^{n}\alpha ^{k}(A^{k})_{ji}
\label{eq:Katz}
\end{equation}
\subsection{K-Path Edge}
\subsection{Adapted Page Rank}

View File

@ -8,7 +8,7 @@
\contentsline {section}{\numberline {4}Centralities}{4}{}%
\contentsline {subsection}{\numberline {4.1}Introduction}{4}{}%
\contentsline {subsection}{\numberline {4.2}Degree}{5}{}%
\contentsline {subsection}{\numberline {4.3}Betweenness}{6}{}%
\contentsline {subsection}{\numberline {4.3}Betweenness}{5}{}%
\contentsline {subsection}{\numberline {4.4}Katz}{6}{}%
\contentsline {subsection}{\numberline {4.5}K-Path Edge}{6}{}%
\contentsline {subsection}{\numberline {4.6}Adapted Page Rank}{6}{}%

View File

@ -45,9 +45,25 @@ base_centralities[[3,1]] <- pci.deg %>% sort(decreasing = T)
#### Katz
car.katz <- katz.cent(car)
nodes <- car.katz %>% order(decreasing=T)
nodes <- head(nodes, 15)-1
vals <- car.katz %>% sort(decreasing=T)
vals <- head(vals, 15)
base_centralities[[1,2]] <- car.katz[rowSums(apply(car.katz,2,is.nan))==0,] %>% sort(decreasing = T)
base_centralities[[2,2]] <- katz.cent(hipaa) %>% sort(decreasing = T)
hipaa.katz <- katz.cent(hipaa)
nodes <- hipaa.katz %>% order(decreasing=T)
nodes <- head(nodes, 15)-1
vals <- hipaa.katz %>% sort(decreasing=T)
vals <- head(vals, 15)
base_centralities[[3,2]] <- katz.cent(pci) %>% sort(decreasing = T)
pci.katz <- katz.cent(pci)
nodes <- pci.katz %>% order(decreasing=T)
nodes <- head(nodes, 15)-1
vals <- pci.katz %>% sort(decreasing=T)
vals <- head(vals, 15)
### Page Rank
base_centralities[[1,3]] <- page.rank(car)$vector %>% sort(decreasing = T)
@ -159,8 +175,25 @@ tc_centralities[[3,1]] <- pci.tc.deg %>% sort(decreasing = T)
#### Katz
car.tc.katz <- katz.cent(car.tc)
tc_centralities[[1,2]] <- car.tc.katz[rowSums(apply(car.tc.katz,2,is.nan))==0,] %>% sort(decreasing = T)
car.tc.katz <- katz.cent(car.tc)
nodes <- car.tc.katz %>% order(decreasing=T)
nodes <- head(nodes, 15)-1
vals <- car.tc.katz %>% sort(decreasing=T)
vals <- head(vals, 15)
tc_centralities[[2,2]] <- katz.cent(hipaa.tc) %>% sort(decreasing = T)
hipaa.tc.katz <- katz.cent(hipaa.tc)
nodes <- hipaa.tc.katz %>% order(decreasing=T)
nodes <- head(nodes, 15)-1
vals <- hipaa.tc.katz %>% sort(decreasing=T)
vals <- head(vals, 15)
tc_centralities[[3,2]] <- katz.cent(pci.tc) %>% sort(decreasing = T)
pci.tc.katz <- katz.cent(pci.tc)
nodes <- pci.tc.katz %>% order(decreasing=T)
nodes <- head(nodes, 15)-1
vals <- pci.tc.katz %>% sort(decreasing=T)
vals <- head(vals, 15)
### Page Rank
tc_centralities[[1,3]] <- page.rank(car.tc)$vector %>% sort(decreasing = T)

View File

@ -6,7 +6,10 @@ katz.cent <- function(A, alpha=NULL, beta=NULL){ #NULL sets the default value
lam.dom <- eigen(A)$values[1] #dom eigenvec
if (is.null(alpha)){
alpha <- 0.9 * (1/lam.dom) #Set alpha to 90% of max allowed
if(lam.dom == 0)
alpha = 0.1
else
alpha <- 0.9 * (1/lam.dom) #Set alpha to 90% of max allowed
if (is.complex(alpha)){
alpha <- Re(alpha)
}
@ -19,6 +22,6 @@ katz.cent <- function(A, alpha=NULL, beta=NULL){ #NULL sets the default value
#Katz scores
scores <- solve(diag(n) - alpha*A,beta)
return(scores)
}