AG-CG-MPI-Tasking_Paper/TPDS_Submission/Bibliography.bib


@misc{noauthor_parmetis_nodate,
	title = {ParMETIS - Parallel Graph Partitioning and Fill-reducing Matrix Ordering},
	author = {Karypis Lab},
	note = {http://glaros.dtc.umn.edu/gkhome/metis/parmetis/overview},
}

@ARTICLE{7087377,
	author={Kaynar, Kerem and Sivrikaya, Fikret},
	journal={IEEE Transactions on Dependable and Secure Computing},
	title={Distributed Attack Graph Generation},
	year={2016},
	volume={13},
	number={5},
	pages={519-532},
	doi={10.1109/TDSC.2015.2423682}
}

@misc{noauthor_boost_nodate,
	title = {The Boost Graph Library, vers. 1.75.0},
	author = {Siek, Jeremy and Lee, Lie-Quan and Lumsdaine, Andrew},
	note = {https://www.boost.org/doc/libs/1$\_$75$\_$0/libs/graph/doc/index.html},
}

@article{ainsworth_graph_2016,
	title = {Graph prefetching using data structure knowledge},
	volume = {01-03-June},
	issn = {9781450343619},
	doi = {10.1145/2925426.2926254},
	abstract = {Searches on large graphs are heavily memory latency bound, as a result of many high latency DRAM accesses. Due to the highly irregular nature of the access patterns involved, caches and prefetchers, both hardware and software, perform poorly on graph workloads. This leads to CPU stalling for the majority of the time. However, in many cases the data access pattern is well defined and predictable in advance, many falling into a small set of simple patterns. Although existing implicit prefetchers cannot bring significant benefit, a prefetcher armed with knowledge of the data structures and access patterns could accurately anticipate applications' traversals to bring in the appropriate data. This paper presents a design of an explicitly configured prefetcher to improve performance for breadth-first searches and sequential iteration on the efficient and commonly-used compressed sparse row graph format. By snooping L1 cache accesses from the core and reacting to data returned from its own prefetches, the prefetcher can schedule timely loads of data in advance of the application needing it. For a range of applications and graph sizes, our prefetcher achieves average speedups of 2.3×, and up to 3.3×, with little impact on memory bandwidth requirements.},
	journal = {Proceedings of the International Conference on Supercomputing},
	author = {Ainsworth, Sam and Jones, Timothy M.},
	year = {2016},
	keywords = {Graphs, Prefetching},
	file = {Graph Prefetching Using Data Structure Knowledge:/home/noah/Zotero/storage/UUVEP42L/Graph Prefetching Using Data Structure Knowledge.pdf:application/pdf},
}

@mastersthesis{cook_rage_2018,
	title = {{RAGE}: {The} {Rage} {Attack} {Graph} {Engine}},
	author = {Cook, Kyle},
	school = {The {University} of {Tulsa}},
	year = {2018},
	file = {Kyle Cook Thesis:/home/noah/Zotero/storage/2SR28HM2/Kyle Cook Thesis.pdf:application/pdf},
}

@article{cook_scalable_2016,
	title = {Scalable attack graph generation},
	issn = {9781450337526},
	doi = {10.1145/2897795.2897821},
	abstract = {Attack graphs are a powerful modeling technique with which to explore the attack surface of a system. However, they can be difficult to generate due to the exponential growth of the state space, often times making exhaustive search im- practical. This paper discusses an approach for generating large attack graphs with an emphasis on scalable generation over a distributed system. First, a serial algorithm is presented, highlighting bottlenecks and opportunities to exploit inherent concurrency in the generation process. Then a strategy to parallelize this process is presented. Finally, we discuss plans for future work to implement the parallel algorithm using a hybrid distributed/shared memory programming model on a heterogeneous compute node cluster.},
	journal = {Proceedings of the 11th Annual Cyber and Information Security Research Conference, CISRC 2016},
	author = {Cook, Kyle and Shaw, Thomas and Hale, John and Hawrylak, Peter},
	year = {2016},
	keywords = {Attack graphs, Attack modeling, Vulnerability analysis},
	file = {Attachment:/home/noah/Zotero/storage/2YNSLTQH/Scalable Attack Graph Generation:application/pdf},
}

@article{dai_fpgp_2016,
	title = {{FPGP}: {Graph} processing framework on {FPGA}: {A} case study of breadth-first search},
	issn = {9781450338561},
	doi = {10.1145/2847263.2847339},
	abstract = {Large-scale graph processing is gaining increasing attentions in many domains. Meanwhile, FPGA provides a power-efficient and highly parallel platform for many applications, and has been applied to custom computing in many domains. In this paper, we describe FPGP (FPGA Graph Processing), a streamlined vertex-centric graph processing framework on FPGA, based on the interval-shard structure. FPGP is adaptable to different graph algorithms and users do not need to change the whole implementation on the FPGA. In our implementation, an on-chip parallel graph processor is proposed to both maximize the off-chip bandwidth of graph data and fully utilize the parallelism of graph processing. Meanwhile, we analyze the performance of FPGP and show the scalability of FPGP when the bandwidth of data path increases. FPGP is more power-efficient than single machine systems and scalable to larger graphs compared with other FPGA-based graph systems.},
	journal = {FPGA 2016 - Proceedings of the 2016 ACM/SIGDA International Symposium on Field-Programmable Gate Arrays},
	author = {Dai, Guohao and Chi, Yuze and Wang, Yu and Yang, Huazhong},
	year = {2016},
	keywords = {FPGA framework, Large scale graph processing},
	pages = {105--110},
	file = {FPGP\: Graph Processing Framework on FPGA:/home/noah/Zotero/storage/QJUQ3SDZ/FPGP Graph Processing Framework on FPGA.pdf:application/pdf},
}

@misc{j_hale_compliance_nodate,
	title = {Compliance {Method} for a {Cyber}-{Physical} {System}},
	author = {{J. Hale} and Hawrylak, P. and Papa, M.},
	note = {U.S. Patent Number 9,471,789, Oct. 18, 2016.},
	number = {9471789},
	file = {Complaince_Graph_US_Patent_9471789:/home/noah/Zotero/storage/55BZN4U7/Complaince_Graph_US_Patent_9471789.pdf:application/pdf},
}

@article{li_combining_2019,
	title = {Combining {OpenCL} and {MPI} to support heterogeneous computing on a cluster},
	issn = {9781450372275},
	doi = {10.1145/3332186.3333059},
	abstract = {This paper presents an implementation of a heterogeneous programming model which combines Open Computing Language (OpenCL) and Message Passing Interface (MPI). The model is applied to solving a Markov decision process (MDP) with value iteration method. The performance test is conducted on a high performance computing cluster. At peak performance, the model is able to achieve a 57X speedup over a serial implementation. For an extremely large input MDP, which has 1,000,000 states, the obtained speedup is still over 12X, showing that this heterogeneous programming model can solve MDPs more efficiently than the serial solver does.},
	journal = {ACM International Conference Proceeding Series},
	author = {Li, Ming and Hawrylak, Peter and Hale, John},
	year = {2019},
	keywords = {Heterogeneous computing, HPC, MDP, MPI, OpenCL, Parallelism},
	file = {Combining OpenCL and MPI to Support Heterogeneous Computing on a Cluster:/home/noah/Zotero/storage/TXHCQ5S8/Combining OpenCL and MPI to Support Heterogeneous Computing on a Cluster.pdf:application/pdf},
}

@article{li_concurrency_2019,
	title = {Concurrency {Strategies} for {Attack} {Graph} {Generation}},
	issn = {9781728120805},
	doi = {10.1109/ICDIS.2019.00033},
	abstract = {The network attack graph is a powerful tool for analyzing network security, but the generation of a large-scale graph is non-trivial. The main challenge is from the explosion of network state space, which greatly increases time and storage costs. In this paper, three parallel algorithms are proposed to generate scalable attack graphs. An OpenMP-based programming implementation is used to test their performance. Compared with the serial algorithm, the best performance from the proposed algorithms provides a 10X speedup.},
	journal = {Proceedings - 2019 2nd International Conference on Data Intelligence and Security, ICDIS 2019},
	author = {Li, Ming and Hawrylak, Peter and Hale, John},
	year = {2019},
	keywords = {Attack Graph, Multi-threaded Programming, Network Security, OpenMP},
	pages = {174--179},
	file = {Ming_LI_Thesis:/home/noah/Zotero/storage/CLSLS335/Ming_LI_Thesis.pdf:application/pdf},
}

@article{ou_scalable_2006,
	title = {A {Scalable} {Approach} to {Attack} {Graph} {Generation}},
	issn = {1595935185},
	author = {Ou, Xinming and Boyer, Wayne F and Mcqueen, Miles A},
	year = {2006},
	journal = {CCS '06: Proceedings of the 13th ACM conference on Computer and communications security},
	keywords = {attack graphs, enterprise network security, logic-programming},
	pages = {336--345},
	file = {1180405.1180446:/home/noah/Zotero/storage/TJKHVC4R/1180405.1180446.pdf:application/pdf},
}

@article{yao_efficient_2018,
	title = {An efficient graph accelerator with parallel data conflict management},
	issn = {9781450359863},
	doi = {10.1145/3243176.3243201},
	abstract = {Graph-specific computing with the support of dedicated accelerator has greatly boosted the graph processing in both efficiency and energy. Nevertheless, their data conflict management is still sequential when certain vertex needs a large number of conflicting updates at the same time, leading to prohibitive performance degradation. This is particularly true and serious for processing natural graphs. In this paper, we have the insight that the atomic operations for the vertex updating of many graph algorithms (e.g., BFS, PageRank, andWCC) are typically incremental and simplex. This hence allows us to parallelize the conflicting vertex updates in an accumulative manner.We architect AccuGraph, a novel graph-specific accelerator that can simultaneously process atomic vertex updates for massive parallelism while ensuring the correctness. A parallel accumulator is designed to remove the serialization in atomic protections for conflicting vertex updates through merging their results in parallel. Our implementation on Xilinx FPGA with a wide variety of typical graph algorithms shows that our accelerator achieves an average throughput by 2.36 GTEPS as well as up to 3.14x performance speedup in comparison with state-of-the-art ForeGraph (with its single-chip version).},
	journal = {Parallel Architectures and Compilation Techniques - Conference Proceedings, PACT},
	author = {Yao, Pengcheng and Zheng, Long and Liao, Xiaofei and Jin, Hai and He, Bingsheng},
	year = {2018},
	file = {An efficient graph accelerator with parallel data conflict management:/home/noah/Zotero/storage/NMA7DQ5B/An efficient graph accelerator with parallel data conflict management.pdf:application/pdf},
}

@article{zhang_boosting_2017,
	title = {Boosting the performance of {FPGA}-based graph processor using hybrid memory cube: {A} case for breadth first search},
	issn = {9781450343541},
	doi = {10.1145/3020078.3021737},
	abstract = {Large graph processing has gained great attention in recent years due to its broad applicability from machine learning to social science. Large real-world graphs, however, are inherently difficult to process efficiently, not only due to their large memory footprint, but also that most graph algorithms entail memory access patterns with poor locality and a low compute-to-memory access ratio. In this work, we leverage the exceptional random access performance of emerging Hybrid Memory Cube (HMC) technology that stacks multiple DRAM dies on top of a logic layer, combined with the flexibility and efficiency of FPGA to address these challenges. To our best knowledge, this is the first work that implements a graph processing system on a FPGA-HMC platform based on software/hardware co-design and co-optimization. We first present the modifications of algorithm and a platform-aware graph processing architecture to perform level-synchronized breadth first search (BFS) on FPGA-HMC platform. To gain better insights into the potential bottlenecks of proposed implementation, we develop an analytical performance model to quantitatively evaluate the HMC access latency and corresponding BFS performance. Based on the analysis, we propose a two-level bitmap scheme to further reduce memory access and perform optimization on key design parameters (e.g. memory access granularity). Finally, we evaluate the performance of our BFS implementation using the AC-510 development kit from Micron. We achieved 166 million edges traversed per second (MTEPS) using GRAPH500 benchmark on a random graph with a scale of 25 and an edge factor of 16, which significantly outperforms CPU and other FPGA-based large graph processors.},
	journal = {FPGA 2017 - Proceedings of the 2017 ACM/SIGDA International Symposium on Field-Programmable Gate Arrays},
	author = {Zhang, Jialiang and Khoram, Soroosh and Li, Jing},
	year = {2017},
	pages = {207--216},
	file = {Boosting the Performance of FPGA-based Graph Processor using Hybrdi Memory Cube:/home/noah/Zotero/storage/CDKPUXYF/Boosting the Performance of FPGA-based Graph Processor using Hybrdi Memory Cube.pdf:application/pdf},
}

@book{pacheco_introduction_2011,
	edition = {Print},
	title = {An {Introduction} to {Parallel} {Programming}},
	isbn = {978-0-12-374260-5},
	publisher = {Morgan Kaufmann},
	author = {Pacheco, Peter},
	year = {2011}
}

@article{ammar_experimental_2018,
	title = {Experimental {Analysis} of {Distributed} {Graph} {Systems}},
	volume = {11},
	doi = {10.14778/3231751.3231764},
	abstract = {This paper evaluates eight parallel graph processing systems: Hadoop, HaLoop, Vertica, Giraph, GraphLab (PowerGraph), Blogel, Flink Gelly, and GraphX (SPARK) over four very large datasets (Twitter, World Road Network, UK 200705, and ClueWeb) using four workloads (PageRank, WCC, SSSP and K-hop). The main objective is to perform an independent scale-out study by experimentally analyzing the performance, usability, and scalability (using up to 128 machines) of these systems. In addition to performance results, we discuss our experiences in using these systems and suggest some system tuning heuristics that lead to better performance.},
	number = {10},
	urldate = {2021-04-02},
	journal = {Proceedings of the VLDB Endowment},
	author = {Ammar, Khaled and Ozsu, Tamer},
	month = jun,
	year = {2018},
	keywords = {Computer Science - Distributed, Parallel, and Cluster Computing},
	annote = {Comment: Volume 11 of Proc. VLDB Endowment},
	file = {arXiv Fulltext PDF:/home/noah/Zotero/storage/QJA73MYR/Ammar and Ozsu - 2018 - Experimental Analysis of Distributed Graph Systems.pdf:application/pdf;arXiv.org Snapshot:/home/noah/Zotero/storage/TTUFSAHW/1806.html:text/html},
}

@article{mccune_thinking_2015,
	title = {Thinking {Like} a {Vertex}: {A} {Survey} of {Vertex}-{Centric} {Frameworks} for {Large}-{Scale} {Distributed} {Graph} {Processing}},
	volume = {48},
	doi = {10.1145/2818185},
	number = {2},
	journal = {ACM Computing Surveys},
	author = {McCune, Robert and Weninger, Tim and Madey, Greg},
	year = {2015},
}

@inproceedings{dimov_pass--hash_2017,
	title = {Pass-the-{Hash}: {One} of the {Most} {Prevalent} {Yet} {Underrated} {Attacks} for {Credentials} {Theft} and {Reuse}},
	doi = {10.1145/3134302.3134338},
	booktitle = {18th {International} {Conference} on {Computer} {Systems} and {Technologies}},
	author = {Dimov, Dimo and Tzonev, Yulian},
	year = {2017},
	pages = {149--154},
}

@inproceedings{baloyi_guidelines_2019,
	address = {Skukuza South Africa},
	title = {Guidelines for {Data} {Privacy} {Compliance}: {A} {Focus} on {Cyberphysical} {Systems} and {Internet} of {Things}},
	doi = {10.1145/3351108.3351143},
	booktitle = {{SAICSIT} '19: {Proceedings} of the {South} {African} {Institute} of {Computer} {Scientists} and {Information} {Technologists} 2019},
	publisher = {Association for Computing Machinery},
	author = {Baloyi, Ntsako and Kotzé, Paula},
	year = {2019},
}

@article{allman_complying_2006,
	title = {Complying with {Compliance}: {Blowing} it off is not an option.},
	volume = {4},
	number = {7},
	journal = {ACM Queue},
	author = {Allman, Eric},
	year = {2006},
}

@inproceedings{arifuzzaman_fast_2015,
	title = {Fast parallel conversion of edge list to adjacency list for large-scale graphs},
	booktitle = {{HPC} '15: {Proceedings} of the {Symposium} on {High} {Performance} {Computing}},
	author = {Arifuzzaman, Shaikh and Khan, Maleq},
	month = apr,
	year = {2015},
	pages = {17--24},
}

@inproceedings{yu_construction_2018,
	title = {The {Construction} of {Large} {Graph} {Data} {Structures} in a {Scalable} {Distributed} {Message} {System}},
	doi = {10.1145/3234664.3234682},
	booktitle = {{HPCCT} 2018: {Proceedings} of the 2018 2nd {High} {Performance} {Computing} and {Cluster} {Technologies} {Conference}},
	author = {Yu, Xinjie and Chen, Wentao and Miao, Jiajia and Chen, Jian and Mao, Handong and Luo, Qiong and Gu, Lin},
	month = jun,
	year = {2018},
	pages = {6--10},
}

@inproceedings{liakos_memory-optimized_2016,
	title = {Memory-{Optimized} {Distributed} {Graph} {Processing} through {Novel} {Compression} {Techniques}},
	doi = {10.1145/2983323.2983687},
	booktitle = {{CIKM} '16: {Proceedings} of the 25th {ACM} {International} {Conference} on {Information} and {Knowledge} {Management}},
	author = {Liakos, Panagiotis and Papakonstantinopoulou, Katia and Delis, Alex},
	month = oct,
	year = {2016},
	pages = {2317--2322},
}

@software{noauthor_parallel_nodate-1,
	author = {{Nick Edmonds}, {Douglas Gregor}, {Andrew Lumsdaine}},
	title = {Parallel {BGL} {Distributed} {Adjacency} {List}},
	url = {https://www.boost.org/doc/libs/1_73_0/libs/graph_parallel/doc/html/distributed_adjacency_list.html},
	version = {1.73.0},
	urldate = {2021-04-11},
}

@misc{Slurm,
	author = {SchedMD},
	title = {Slurm {Workload} {Manager}},
	howpublished = {https://slurm.schedmd.com/overview.html},
	month = apr,
	note = {Version 23.02},
	year = {2023}
}

@inproceedings{balaji_graph_2016,
	title = {Graph {Topology} {Abstraction} for {Distributed} {Path} {Queries}},
	doi = {10.1145/2915516.2915520},
	booktitle = {{HPGP} '16: {Proceedings} of the {ACM} {Workshop} on {High} {Performance} {Graph} {Processing}},
	author = {Balaji, Janani and Sunderraman, Rajshekhar},
	month = may,
	year = {2016},
	pages = {27--34},
}


@INPROCEEDINGS{9150145,
  author={Li, Ming and Hawrylak, Peter J. and Hale, John},
  booktitle={2020 IEEE International Parallel and Distributed Processing Symposium Workshops (IPDPSW)},
  title={Implementing an Attack Graph Generator in CUDA},
  year={2020},
  volume={},
  number={},
  pages={730-738},
  doi={10.1109/IPDPSW50202.2020.00128}}


@INPROCEEDINGS{9678822,
  author={Dakhno, Natalia and Leshchenko, Olga and Kravchenko, Yurii and Dudnik, Andriy and Trush, Olexandr and Khankishiev, Victor},
  booktitle={2021 IEEE 3rd International Conference on Advanced Trends in Information Theory (ATIT)},
  title={Dynamic Model of the Spread of Viruses in a Computer Network Using Differential Equations},
  year={2021},
  volume={},
  number={},
  pages={111-115},
  doi={10.1109/ATIT54053.2021.9678822}}

@INPROCEEDINGS{7993827,
  author={Kwon, Minhae and Kwon, Jungmin and Park, Byungchul and Park, Hyunggon},
  booktitle={2017 Ninth International Conference on Ubiquitous and Future Networks (ICUFN)},
  title={An architecture of IPTV networks based on network coding},
  year={2017},
  volume={},
  number={},
  pages={462-464},
  doi={10.1109/ICUFN.2017.7993827}}

@INPROCEEDINGS{8652334,
  author={Bai, Xiaodan and Liang, Mangui and Zhu, Senpeng},
  booktitle={2018 14th IEEE International Conference on Signal Processing (ICSP)},
  title={A New Routing Scheme for Large-scale Computer Network},
  year={2018},
  volume={},
  number={},
  pages={1019-1023},
  doi={10.1109/ICSP.2018.8652334}}

@inproceedings{CPSIOT,
author = {Al Ghazo, Alaa T. and Ibrahim, Mariam and Ren, Hao and Kumar, Ratnesh},
title = {A2G2V: Automated Attack Graph Generator and Visualizer},
year = {2018},
isbn = {9781450358606},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3215466.3215468},
doi = {10.1145/3215466.3215468},
booktitle = {Proceedings of the 1st ACM MobiHoc Workshop on Mobile IoT Sensing, Security, and Privacy},
articleno = {3},
numpages = {6},
keywords = {Model Checking, Security, Enumerating Counterexamples, Internet of Things, Attack Graph, Cyber-Physical Systems},
location = {Los Angeles, CA, USA},
series = {Mobile IoT SSP'18}
}

@article{ming_jo,
author = {Li, Ming and Hawrylak, Peter and Hale, John},
title = {Strategies for Practical Hybrid Attack Graph Generation and Analysis},
year = {2021},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
issn = {2692-1626},
url = {https://doi.org/10.1145/3491257},
doi = {10.1145/3491257},
abstract = {As an analytical tool in cyber-security, an attack graph (AG) is capable of discovering multi-stage attack vectors on target computer networks. Cyber-physical systems (CPSs) comprise a special type of network that not only contains computing devices but also integrates components that operate in the continuous domain, such as sensors and actuators. Using AGs on CPSs requires that the system models and exploit patterns capture both token- and real-valued information. In this paper, we describe a hybrid AG model for security analysis of CPSs and computer networks. Specifically, we focus on two issues related to applying the model in practice: efficient hybrid AG generation and techniques for information extraction from them. To address the first issue, we present an accelerated hybrid AG generator that employs parallel programming and high performance computing (HPC). We conduct performance tests on CPU and GPU platforms to characterize the efficiency of our parallel algorithms. To address the second issue, we introduce an analytical regimen based on centrality analysis and apply it to a hybrid AG generated for a target CPS system to discover effective vulnerability remediation solutions.},
journal = {Digital Threats},
month = {oct},
keywords = {cyber-physical system, high performance computing, attack graph, breadth-first search}
}

@article{Gust,
	author = {Gustafson, John L.},
	title = {Reevaluating Amdahl's Law},
	year = {1988},
	issue_date = {May 1988},
	publisher = {Association for Computing Machinery},
	address = {New York, NY, USA},
	volume = {31},
	number = {5},
	issn = {0001-0782},
	url = {https://doi.org/10.1145/42411.42415},
	doi = {10.1145/42411.42415},
	journal = {Commun. ACM},
	month = {may},
	pages = {532–533},
	numpages = {2}
}

@inproceedings{sun,
  title={Another view on parallel speedup},
  author={Sun, Xian-He and Ni, Lionel M},
  booktitle={Proceedings of the 1990 ACM/IEEE conference on Supercomputing},
  pages={324--333},
  year={1990}
}

@inproceedings{Amdahl,
	author = {Amdahl, Gene M.},
	title = {Validity of the Single Processor Approach to Achieving Large Scale Computing Capabilities},
	year = {1967},
	isbn = {9781450378956},
	publisher = {Association for Computing Machinery},
	address = {New York, NY, USA},
	url = {https://doi.org/10.1145/1465482.1465560},
	doi = {10.1145/1465482.1465560},
	booktitle = {Proceedings of the April 18-20, 1967, Spring Joint Computer Conference},
	pages = {483–485},
	numpages = {3},
	location = {Atlantic City, New Jersey},
	series = {AFIPS '67 (Spring)}
	}

@ARTICLE{10124989,
  author={Schrick, Noah L. and Hawrylak, Peter J.},
  journal={IEEE Open Journal of the Computer Society},
  title={State Space Explosion Mitigation for Large-Scale Attack and Compliance Graphs Using Synchronous Exploit Firing},
  year={2023},
  volume={4},
  number={},
  pages={147-157},
  doi={10.1109/OJCS.2023.3276370}
}