diff --git a/Schrick-Noah_AG-CG-CR.aux b/Schrick-Noah_AG-CG-CR.aux index 167d6a0..159de4c 100644 --- a/Schrick-Noah_AG-CG-CR.aux +++ b/Schrick-Noah_AG-CG-CR.aux @@ -44,9 +44,13 @@ \citation{li_concurrency_2019} \citation{li_combining_2019} \citation{zhang_boosting_2017} -\@writefile{toc}{\contentsline {section}{\numberline {III}Implementation}{2}{section.3}\protected@file@percent } -\@writefile{toc}{\contentsline {subsection}{\numberline {\mbox {III-A}}Memory Constraint Difficulties}{2}{subsection.3.1}\protected@file@percent } -\@writefile{toc}{\contentsline {subsection}{\numberline {\mbox {III-B}}Checkpointing}{2}{subsection.3.2}\protected@file@percent } +\@writefile{toc}{\contentsline {section}{\numberline {III}Methodology}{2}{section.3}\protected@file@percent } +\@writefile{toc}{\contentsline {subsection}{\numberline {\mbox {III-A}}Checkpointing}{2}{subsection.3.1}\protected@file@percent } +\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces An Attack or Compliance Graph Undergoing Generation}}{2}{figure.1}\protected@file@percent } +\newlabel{fig:cr}{{1}{2}{An Attack or Compliance Graph Undergoing Generation}{figure.1}{}} +\newlabel{sec:mem-constraint}{{\mbox {III-A}1}{2}{Memory Constraint Difficulties}{subsubsection.3.1.1}{}} +\@writefile{toc}{\contentsline {subsubsection}{\numberline {\mbox {III-A}1}Memory Constraint Difficulties}{2}{subsubsection.3.1.1}\protected@file@percent } +\@writefile{toc}{\contentsline {subsubsection}{\numberline {\mbox {III-A}2}Implementation}{2}{subsubsection.3.1.2}\protected@file@percent } \bibdata{Bibliography} \bibcite{schneier_modeling_1999}{1} \bibcite{j_hale_compliance_nodate}{2} @@ -59,14 +63,14 @@ \bibcite{hursey2010coordinated}{9} \bibcite{SCR}{10} \bibcite{dmtcp}{11} +\@writefile{toc}{\contentsline {subsubsection}{\numberline {\mbox {III-A}3}Portability}{3}{subsubsection.3.1.3}\protected@file@percent } +\@writefile{toc}{\contentsline {subsection}{\numberline {\mbox {III-B}}Restarting}{3}{subsection.3.2}\protected@file@percent } +\@writefile{toc}{\contentsline {section}{\numberline {IV}Results}{3}{section.4}\protected@file@percent } +\@writefile{toc}{\contentsline {section}{\numberline {V}Conclusions and Future Work}{3}{section.5}\protected@file@percent } +\@writefile{toc}{\contentsline {section}{References}{3}{section*.1}\protected@file@percent } \bibcite{BLCR}{12} \bibcite{cook_scalable_2016}{13} \bibcite{li_concurrency_2019}{14} \bibcite{li_combining_2019}{15} \bibstyle{ieeetr} -\@writefile{toc}{\contentsline {subsubsection}{\numberline {\mbox {III-B}1}Portability}{3}{subsubsection.3.2.1}\protected@file@percent } -\@writefile{toc}{\contentsline {subsection}{\numberline {\mbox {III-C}}Restarting}{3}{subsection.3.3}\protected@file@percent } -\@writefile{toc}{\contentsline {section}{\numberline {IV}Results}{3}{section.4}\protected@file@percent } -\@writefile{toc}{\contentsline {section}{\numberline {V}Conclusions and Future Work}{3}{section.5}\protected@file@percent } -\@writefile{toc}{\contentsline {section}{References}{3}{section*.1}\protected@file@percent } -\gdef \@abspage@last{3} +\gdef \@abspage@last{4} diff --git a/Schrick-Noah_AG-CG-CR.log b/Schrick-Noah_AG-CG-CR.log index e3bf1cc..175d3ad 100644 --- a/Schrick-Noah_AG-CG-CR.log +++ b/Schrick-Noah_AG-CG-CR.log @@ -1,4 +1,4 @@ -This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023/Arch Linux) (preloaded format=pdflatex 2023.4.3) 23 APR 2023 16:27 +This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023/Arch Linux) (preloaded format=pdflatex 2023.4.3) 23 APR 2023 17:00 entering extended mode restricted \write18 enabled. %&-line parsing enabled. @@ -513,9 +513,26 @@ Underfull \hbox (badness 1960) in paragraph at lines 70--75 \OT1/ptm/m/n/10 scalability to mitigate state space explosion or lengthy [] -[2] -Underfull \hbox (badness 4660) in paragraph at lines 117--122 - \OT1/ptm/m/it/10 1) Portability: [][][] \OT1/ptm/m/n/10 The checkpointing proc +<./images/checkpoint.png, id=91, 755.82375pt x 402.50375pt> +File: ./images/checkpoint.png Graphic file (type png) + +Package pdftex.def Info: ./images/checkpoint.png used on input line 83. +(pdftex.def) Requested size: 252.0pt x 134.19624pt. + +Underfull \hbox (badness 3158) in paragraph at lines 89--94 + \OT1/ptm/m/it/10 1) Memory Constraint Difficulties: [][][] \OT1/ptm/m/n/10 Whi +le the design + [] + + +Underfull \hbox (badness 10000) in paragraph at lines 102--108 + \OT1/ptm/m/it/10 2) Implementation: [][][] \OT1/ptm/m/n/10 Rather than only a +static + [] + +[2 <./images/checkpoint.png>] +Underfull \hbox (badness 4660) in paragraph at lines 125--130 + \OT1/ptm/m/it/10 3) Portability: [][][] \OT1/ptm/m/n/10 The checkpointing proc ess is greatly [] @@ -535,7 +552,7 @@ Underfull \hbox (badness 5091) in paragraph at lines 54--56 []\OT1/ptm/m/n/8 J. Ansel, K. Arya, and G. Cooperman, ``Dmtcp: Transparent [] -) +[3]) ** Conference Paper ** Before submitting the final camera ready copy, remember to: @@ -547,18 +564,20 @@ Before submitting the final camera ready copy, remember to: uses only Type 1 fonts and that every step in the generation process uses the appropriate paper size. -[3] (./Schrick-Noah_AG-CG-CR.aux) +[4 + +] (./Schrick-Noah_AG-CG-CR.aux) Package rerunfilecheck Info: File `Schrick-Noah_AG-CG-CR.out' has not changed. -(rerunfilecheck) Checksum: 6E2DC49B6AC85A528B419E5F14917A57;1246. +(rerunfilecheck) Checksum: CC85FF3DB94FE8393E2ED734D36908F3;1379. ) Here is how much of TeX's memory you used: - 12024 strings out of 476025 - 190058 string characters out of 5796533 + 12042 strings out of 476025 + 190409 string characters out of 5796533 1871388 words of memory out of 5000000 - 32293 multiletter control sequences out of 15000+600000 + 32305 multiletter control sequences out of 15000+600000 544489 words of font info for 89 fonts, out of 8000000 for 9000 1141 hyphenation exceptions out of 8191 - 75i,8n,76p,1314b,452s stack positions out of 5000i,500n,10000p,200000b,80000s + 75i,8n,76p,1314b,588s stack positions out of 5000i,500n,10000p,200000b,80000s < /usr/share/texmf-dist/fonts/type1/urw/times/utmbi8a.pfb> -Output written on Schrick-Noah_AG-CG-CR.pdf (3 pages, 110939 bytes). +Output written on Schrick-Noah_AG-CG-CR.pdf (4 pages, 133124 bytes). PDF statistics: - 148 PDF objects out of 1000 (max. 8388607) - 125 compressed objects within 2 object streams - 29 named destinations out of 1000 (max. 500000) - 81 words of extra memory for PDF output out of 10000 (max. 10000000) + 163 PDF objects out of 1000 (max. 8388607) + 137 compressed objects within 2 object streams + 32 named destinations out of 1000 (max. 500000) + 94 words of extra memory for PDF output out of 10000 (max. 10000000) diff --git a/Schrick-Noah_AG-CG-CR.out b/Schrick-Noah_AG-CG-CR.out index 7bcba8e..7c57e40 100644 --- a/Schrick-Noah_AG-CG-CR.out +++ b/Schrick-Noah_AG-CG-CR.out @@ -1,10 +1,11 @@ \BOOKMARK [1][-]{section.1}{\376\377\000I\000n\000t\000r\000o\000d\000u\000c\000t\000i\000o\000n}{}% 1 \BOOKMARK [1][-]{section.2}{\376\377\000R\000e\000l\000a\000t\000e\000d\000\040\000W\000o\000r\000k}{}% 2 -\BOOKMARK [1][-]{section.3}{\376\377\000I\000m\000p\000l\000e\000m\000e\000n\000t\000a\000t\000i\000o\000n}{}% 3 -\BOOKMARK [2][-]{subsection.3.1}{\376\377\000M\000e\000m\000o\000r\000y\000\040\000C\000o\000n\000s\000t\000r\000a\000i\000n\000t\000\040\000D\000i\000f\000f\000i\000c\000u\000l\000t\000i\000e\000s}{section.3}% 4 -\BOOKMARK [2][-]{subsection.3.2}{\376\377\000C\000h\000e\000c\000k\000p\000o\000i\000n\000t\000i\000n\000g}{section.3}% 5 -\BOOKMARK [3][-]{subsubsection.3.2.1}{\376\377\000P\000o\000r\000t\000a\000b\000i\000l\000i\000t\000y}{subsection.3.2}% 6 -\BOOKMARK [2][-]{subsection.3.3}{\376\377\000R\000e\000s\000t\000a\000r\000t\000i\000n\000g}{section.3}% 7 -\BOOKMARK [1][-]{section.4}{\376\377\000R\000e\000s\000u\000l\000t\000s}{}% 8 -\BOOKMARK [1][-]{section.5}{\376\377\000C\000o\000n\000c\000l\000u\000s\000i\000o\000n\000s\000\040\000a\000n\000d\000\040\000F\000u\000t\000u\000r\000e\000\040\000W\000o\000r\000k}{}% 9 -\BOOKMARK [1][-]{section*.1}{\376\377\000R\000e\000f\000e\000r\000e\000n\000c\000e\000s}{}% 10 +\BOOKMARK [1][-]{section.3}{\376\377\000M\000e\000t\000h\000o\000d\000o\000l\000o\000g\000y}{}% 3 +\BOOKMARK [2][-]{subsection.3.1}{\376\377\000C\000h\000e\000c\000k\000p\000o\000i\000n\000t\000i\000n\000g}{section.3}% 4 +\BOOKMARK [3][-]{subsubsection.3.1.1}{\376\377\000M\000e\000m\000o\000r\000y\000\040\000C\000o\000n\000s\000t\000r\000a\000i\000n\000t\000\040\000D\000i\000f\000f\000i\000c\000u\000l\000t\000i\000e\000s}{subsection.3.1}% 5 +\BOOKMARK [3][-]{subsubsection.3.1.2}{\376\377\000I\000m\000p\000l\000e\000m\000e\000n\000t\000a\000t\000i\000o\000n}{subsection.3.1}% 6 +\BOOKMARK [3][-]{subsubsection.3.1.3}{\376\377\000P\000o\000r\000t\000a\000b\000i\000l\000i\000t\000y}{subsection.3.1}% 7 +\BOOKMARK [2][-]{subsection.3.2}{\376\377\000R\000e\000s\000t\000a\000r\000t\000i\000n\000g}{section.3}% 8 +\BOOKMARK [1][-]{section.4}{\376\377\000R\000e\000s\000u\000l\000t\000s}{}% 9 +\BOOKMARK [1][-]{section.5}{\376\377\000C\000o\000n\000c\000l\000u\000s\000i\000o\000n\000s\000\040\000a\000n\000d\000\040\000F\000u\000t\000u\000r\000e\000\040\000W\000o\000r\000k}{}% 10 +\BOOKMARK [1][-]{section*.1}{\376\377\000R\000e\000f\000e\000r\000e\000n\000c\000e\000s}{}% 11 diff --git a/Schrick-Noah_AG-CG-CR.pdf b/Schrick-Noah_AG-CG-CR.pdf index 1dbcd09..3ef74ba 100644 Binary files a/Schrick-Noah_AG-CG-CR.pdf and b/Schrick-Noah_AG-CG-CR.pdf differ diff --git a/Schrick-Noah_AG-CG-CR.tex b/Schrick-Noah_AG-CG-CR.tex index e163d6c..10002da 100644 --- a/Schrick-Noah_AG-CG-CR.tex +++ b/Schrick-Noah_AG-CG-CR.tex @@ -73,12 +73,20 @@ to quadratic time and reduced the number of nodes in the resulting graph to $\ma presented by the authors of \cite{cook_scalable_2016} represents a description of systems and their qualities and topologies as a state, with a queue of unexplored states. This work was continued by the authors of \cite{li_concurrency_2019} by implementing a hash table among other features. Each of these works demonstrates an improvement in scalability through refining the desirable information output. -\section{Implementation} +\section{Methodology} +\subsection{Checkpointing} Previous works with RAGE have been designed around maximizing performance to limit the longer runtime caused by the state space explosion, such as the works seen by the authors of \cite{cook_rage_2018}, -\cite{li_concurrency_2019}, and \cite{li_combining_2019}. To this end, the output graph is contained in memory during the generation process to minimize disk writing and reading. RAGE does incorporate PostgreSQL as an initial and final storage mechanism to write the starting and resulting graph information, but no intermediate storage is otherwise conducted. Based on the inclusion of PostgreSQL in RAGE, the C/R approach was based around this dependency. +\cite{li_concurrency_2019}, and \cite{li_combining_2019}. To this end, the output graph is contained in memory during the generation process to minimize disk writing and reading. RAGE does incorporate PostgreSQL as an initial and final storage mechanism to write the starting and resulting graph information, but no intermediate storage is otherwise conducted. Based on the inclusion of PostgreSQL in RAGE, the C/R approach was based around this dependency. Figure \ref{fig:cr} shows an image of an attack or compliance graph that is undergoing the generation process. All nodes and edges within the ``instance" box have been fully explored, and all information is stored in memory. All nodes within the ``frontier" box have their information stored in memory, but they have not yet undergone exploration. To checkpoint at this point in time, both the instance and the frontier need to be saved. Additionally, since the instance will no longer be used, it can be fully removed from memory. Section \ref{sec:mem-constraint} highlights the advantages and necessities of this removal. -\subsection{Memory Constraint Difficulties} - While the design decision to not use intermediate storage maximizes performance for graph generation, it introduces a few complications. When generating large graphs, the system runs the risk +\begin{figure}[htp] + \centering + \includegraphics[width=\linewidth]{"./images/checkpoint.png"} + \caption{An Attack or Compliance Graph Undergoing Generation} + \label{fig:cr} +\end{figure} + +\subsubsection{Memory Constraint Difficulties} \label{sec:mem-constraint} + While the design decision to store all graph generation information in memory maximizes performance, it introduces a few complications. When generating large graphs, the system runs the risk of running out of memory. This typically does not occur when generation is conducted on small graphs, and is especially true when relatively small graphs are generated on an HPC system with substantial amounts of memory. However, when running on local systems or when the graph is large, memory can quickly be depleted due to state space explosion. The memory depletion is due to two primary memory consumption points: the frontier which contains all of the states that still need to be explored, and the graph instance which holds all of the states and their information, @@ -90,7 +98,7 @@ Previous works with RAGE have been designed around maximizing performance to lim qualities, the size of each state becomes noticeably larger. With some graphs containing millions of nodes and billions of edges like those mentioned by the authors of \cite{zhang_boosting_2017}, it becomes increasingly unlikely that the graph can be fully contained within system memory. Checkpointing provides an additional benefit to the generation process to relieve its memory strain. -\subsection{Checkpointing} + \subsubsection{Implementation} Rather than only a static implementation of storing to the database on disk at a set interval or a set size, the goal was to also allow for dynamically storing to the database only when necessary. This would allow for proper utilization of systems with greater memory, and would reduce fine-tuning of a maximum size variable before database writes on different systems. Since there is an associated cost with preparing the writes to disk, the communication cost across nodes, the writing to disk itself, and a cost for retrieving items from disk, it may be desirable to store as much in memory for as long as possible and only checkpoint when necessary. When running RAGE, a new argument can be passed \textit{(-a $<$double$>$)} to specify the amount of memory the tool should use before writing to disk. This argument is a value between 0 and 0.99 to specify a percentage. Alternatively, an integer greater than or equal to 1 can be passed, which allows for a discrete number of states to be held in memory before checkpointing. @@ -120,7 +128,6 @@ Previous works with RAGE have been designed around maximizing performance to lim by using a job scheduler argument such as Slurm's ``--exclusive" option, but this may not be desirable. Instead, a user could pass in the amount of total memory to use (and can be reused from a job scheduler's memory allocation request option), and the checkpointing process would function in the same fashion. Since PostgreSQL is used for the checkpointing, no file system dependencies are necessary for the cluster. - \subsection{Restarting} diff --git a/images/checkpoint b/images/checkpoint new file mode 100644 index 0000000..6c73fdd --- /dev/null +++ b/images/checkpoint @@ -0,0 +1,145 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/images/checkpoint.png b/images/checkpoint.png new file mode 100644 index 0000000..b4837e5 Binary files /dev/null and b/images/checkpoint.png differ