diff --git a/Schrick-Noah_MPI-Tasking.aux b/Schrick-Noah_MPI-Tasking.aux index 09a3d64..19b09b7 100644 --- a/Schrick-Noah_MPI-Tasking.aux +++ b/Schrick-Noah_MPI-Tasking.aux @@ -91,14 +91,14 @@ \newlabel{sec:test-platform}{{\mbox {VI-D}}{7}{Testing Platform}{subsection.6.4}{}} \@writefile{toc}{\contentsline {subsection}{\numberline {\mbox {VI-E}}Testing Process}{7}{subsection.6.5}\protected@file@percent } \@writefile{toc}{\contentsline {section}{\numberline {VII}Analysis and Results}{7}{section.7}\protected@file@percent } -\@writefile{lof}{\contentsline {figure}{\numberline {9}{\ignorespaces Number of Nodes and Number of Exploits (Averaged) vs. Runtime (ms)}}{8}{figure.9}\protected@file@percent } -\newlabel{fig:nodes-exp}{{9}{8}{Number of Nodes and Number of Exploits (Averaged) vs. Runtime (ms)}{figure.9}{}} -\@writefile{lof}{\contentsline {figure}{\numberline {10}{\ignorespaces Applicability of Exploits (\%) and Database Load (\%) (Averaged) vs. Runtime (ms)}}{8}{figure.10}\protected@file@percent } -\newlabel{fig:appl-load}{{10}{8}{Applicability of Exploits (\%) and Database Load (\%) (Averaged) vs. Runtime (ms)}{figure.10}{}} -\@writefile{lof}{\contentsline {figure}{\numberline {11}{\ignorespaces Minimum, Maximum, and Mean Speedup of MPI Tasking Across All Problem Sizes}}{9}{figure.11}\protected@file@percent } -\newlabel{fig:overall-speedup}{{11}{9}{Minimum, Maximum, and Mean Speedup of MPI Tasking Across All Problem Sizes}{figure.11}{}} -\@writefile{lof}{\contentsline {figure}{\numberline {12}{\ignorespaces Minimum, Maximum, and Mean Efficiency of MPI Tasking Across All Problem Sizes}}{9}{figure.12}\protected@file@percent } -\newlabel{fig:overall-efficiency}{{12}{9}{Minimum, Maximum, and Mean Efficiency of MPI Tasking Across All Problem Sizes}{figure.12}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {9}{\ignorespaces Number of Nodes and Number of Exploits (Averaged) vs. Runtime (ms), Combining and Averaging Across All Other Parameters}}{8}{figure.9}\protected@file@percent } +\newlabel{fig:nodes-exp}{{9}{8}{Number of Nodes and Number of Exploits (Averaged) vs. Runtime (ms), Combining and Averaging Across All Other Parameters}{figure.9}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {10}{\ignorespaces Applicability of Exploits (\%) and Database Load (\%) (Averaged) vs. Runtime (ms), Combining and Averaging Across All Other Parameters}}{8}{figure.10}\protected@file@percent } +\newlabel{fig:appl-load}{{10}{8}{Applicability of Exploits (\%) and Database Load (\%) (Averaged) vs. Runtime (ms), Combining and Averaging Across All Other Parameters}{figure.10}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {11}{\ignorespaces Minimum, Maximum, and Mean Speedup of MPI Tasking Across All Problem Sizes, Combining and Averaging Across All Parameters}}{9}{figure.11}\protected@file@percent } +\newlabel{fig:overall-speedup}{{11}{9}{Minimum, Maximum, and Mean Speedup of MPI Tasking Across All Problem Sizes, Combining and Averaging Across All Parameters}{figure.11}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {12}{\ignorespaces Minimum, Maximum, and Mean Efficiency of MPI Tasking Across All Problem Sizes, Combining and Averaging Across All Parameters}}{9}{figure.12}\protected@file@percent } +\newlabel{fig:overall-efficiency}{{12}{9}{Minimum, Maximum, and Mean Efficiency of MPI Tasking Across All Problem Sizes, Combining and Averaging Across All Parameters}{figure.12}{}} \@writefile{toc}{\contentsline {section}{\numberline {VIII}Conclusion and Future Work}{9}{section.8}\protected@file@percent } \newlabel{sec:FW}{{VIII}{9}{Conclusion and Future Work}{section.8}{}} \citation{Amdahl} @@ -107,15 +107,15 @@ \bibdata{Bibliography} \bibcite{9678822}{1} \bibcite{7993827}{2} +\@writefile{lof}{\contentsline {figure}{\numberline {13}{\ignorespaces Mean Speedup and Efficiency for the Exploit Parameter Across the Number of Compute Nodes, Combining and Averaging Across All Other Parameters}}{10}{figure.13}\protected@file@percent } +\newlabel{fig:param-exploit}{{13}{10}{Mean Speedup and Efficiency for the Exploit Parameter Across the Number of Compute Nodes, Combining and Averaging Across All Other Parameters}{figure.13}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {14}{\ignorespaces Mean Speedup and Efficiency for the Applicability of Exploit Parameter Across the Number of Compute Nodes, Combining and Averaging Across All Other Parameters}}{10}{figure.14}\protected@file@percent } +\newlabel{fig:param-appl}{{14}{10}{Mean Speedup and Efficiency for the Applicability of Exploit Parameter Across the Number of Compute Nodes, Combining and Averaging Across All Other Parameters}{figure.14}{}} +\@writefile{toc}{\contentsline {section}{References}{10}{section*.1}\protected@file@percent } \bibcite{8652334}{3} \bibcite{baloyi_guidelines_2019}{4} \bibcite{allman_complying_2006}{5} \bibcite{j_hale_compliance_nodate}{6} -\@writefile{lof}{\contentsline {figure}{\numberline {13}{\ignorespaces Mean Speedup and Efficiency for the Exploit Parameter Across the Number of Compute Nodes}}{10}{figure.13}\protected@file@percent } -\newlabel{fig:param-exploit}{{13}{10}{Mean Speedup and Efficiency for the Exploit Parameter Across the Number of Compute Nodes}{figure.13}{}} -\@writefile{lof}{\contentsline {figure}{\numberline {14}{\ignorespaces Mean Speedup and Efficiency for the Applicability of Exploit Parameter Across the Number of Compute Nodes}}{10}{figure.14}\protected@file@percent } -\newlabel{fig:param-appl}{{14}{10}{Mean Speedup and Efficiency for the Applicability of Exploit Parameter Across the Number of Compute Nodes}{figure.14}{}} -\@writefile{toc}{\contentsline {section}{References}{10}{section*.1}\protected@file@percent } \bibcite{ou_scalable_2006}{7} \bibcite{CPSIOT}{8} \bibcite{ming_jo}{9} diff --git a/Schrick-Noah_MPI-Tasking.log b/Schrick-Noah_MPI-Tasking.log index d7223e5..08025e0 100644 --- a/Schrick-Noah_MPI-Tasking.log +++ b/Schrick-Noah_MPI-Tasking.log @@ -1,8 +1,8 @@ -This is pdfTeX, Version 3.141592653-2.6-1.40.26 (TeX Live 2024/Arch Linux) (preloaded format=pdflatex 2024.4.16) 19 APR 2024 09:17 +This is pdfTeX, Version 3.141592653-2.6-1.40.26 (TeX Live 2024/Arch Linux) (preloaded format=pdflatex 2024.4.16) 19 APR 2024 11:40 entering extended mode restricted \write18 enabled. %&-line parsing enabled. -**Schrick-Noah_MPI-Tasking +**Schrick-Noah_MPI-Tasking.tex (./Schrick-Noah_MPI-Tasking.tex LaTeX2e <2023-11-01> patch level 1 L3 programming layer <2024-02-20> @@ -974,10 +974,15 @@ File: ./images/dbload-runtime.png Graphic file (type png) Package pdftex.def Info: ./images/dbload-runtime.png used on input line 311. (pdftex.def) Requested size: 252.0pt x 197.13081pt. -Underfull \vbox (badness 1502) has occurred while \output is active [] +Underfull \vbox (badness 1371) has occurred while \output is active [] -Underfull \vbox (badness 3815) has occurred while \output is active [] +Underfull \hbox (badness 1902) in paragraph at lines 318--319 +\OT1/ptm/m/n/10 drops in outcome variables. This effect is made more + [] + + +Underfull \vbox (badness 3503) has occurred while \output is active [] [8 <./images/nodes-runtime.png> <./images/exploits-runtime.png> <./images/appl icability-runtime.png> <./images/dbload-runtime.png>] @@ -1042,13 +1047,13 @@ Underfull \hbox (badness 4779) in paragraph at lines 365--366 []\OT1/ptm/m/n/10 The analysis portion of this work also has room [] -(./Schrick-Noah_MPI-Tasking.bbl +(./Schrick-Noah_MPI-Tasking.bbl [10 <./images/exploit-speedup.png> <./images/ex +ploit-eff.png> <./images/appl-speedup.png> <./images/appl-eff.png>] Underfull \hbox (badness 1442) in paragraph at lines 21--26 \OT1/ptm/m/n/8 A Focus on Cyberphysical Systems and Internet of Things,'' in [] -[10 <./images/exploit-speedup.png> <./images/exploit-eff.png> <./images/appl-sp -eedup.png> <./images/appl-eff.png>] + Underfull \hbox (badness 1527) in paragraph at lines 56--58 []\OT1/ptm/m/n/8 P. Pacheco, \OT1/ptm/m/it/8 An Introduction to Parallel Progra mming\OT1/ptm/m/n/8 . Morgan @@ -1080,7 +1085,7 @@ Underfull \hbox (badness 10000) in paragraph at lines 143--146 [] ) -<./images/Schrick-Noah_Author-Photo.png, id=325, 1865.46938pt x 2031.84094pt> +<./images/Schrick-Noah_Author-Photo.png, id=324, 1865.46938pt x 2031.84094pt> File: ./images/Schrick-Noah_Author-Photo.png Graphic file (type png) Package pdftex.def Info: ./images/Schrick-Noah_Author-Photo.png used on input @@ -1096,7 +1101,7 @@ Underfull \hbox (badness 2790) in paragraph at lines 376--378 []\OT1/ptm/m/n/8 His research focus is on cybersecurity and [] -<./images/Hawrylak-Peter_Author-Photo.jpg, id=328, 90.3375pt x 90.3375pt> +<./images/Hawrylak-Peter_Author-Photo.jpg, id=327, 90.3375pt x 90.3375pt> File: ./images/Hawrylak-Peter_Author-Photo.jpg Graphic file (type jpg) Package pdftex.def Info: ./images/Hawrylak-Peter_Author-Photo.jpg used on inpu @@ -1125,11 +1130,11 @@ d. Here is how much of TeX's memory you used: 24098 strings out of 476076 451701 string characters out of 5793775 - 1950187 words of memory out of 5000000 + 1953187 words of memory out of 5000000 45721 multiletter control sequences out of 15000+600000 598923 words of font info for 111 fonts, out of 8000000 for 9000 18 hyphenation exceptions out of 8191 - 99i,14n,101p,2031b,817s stack positions out of 10000i,1000n,20000p,200000b,200000s + 99i,14n,101p,2035b,817s stack positions out of 10000i,1000n,20000p,200000b,200000s -Output written on Schrick-Noah_MPI-Tasking.pdf (11 pages, 10269741 bytes). +Output written on Schrick-Noah_MPI-Tasking.pdf (11 pages, 10270212 bytes). PDF statistics: 399 PDF objects out of 1000 (max. 8388607) 319 compressed objects within 4 object streams diff --git a/Schrick-Noah_MPI-Tasking.pdf b/Schrick-Noah_MPI-Tasking.pdf index 2cf5a0b..eb587b7 100644 Binary files a/Schrick-Noah_MPI-Tasking.pdf and b/Schrick-Noah_MPI-Tasking.pdf differ diff --git a/Schrick-Noah_MPI-Tasking.tex b/Schrick-Noah_MPI-Tasking.tex index d601996..3939df4 100644 --- a/Schrick-Noah_MPI-Tasking.tex +++ b/Schrick-Noah_MPI-Tasking.tex @@ -23,7 +23,7 @@ \begin{document} -\title{Parallelization of Large-Scale Attack and Compliance Graph Generation Using Message-Passing Interface +\title{An Algorithm for the Parallelization of Large-Scale Attack and Compliance Graph Generation Using Message-Passing Interface } \author{NOAH L. SCHRICK\,\orcidlink{0000-0003-0875-8927}~\IEEEmembership{Member,~IEEE,}, AND PETER J. HAWRYLAK\,\orcidlink{0000-0003-3268-7452},~\IEEEmembership{Senior Member,~IEEE,} @@ -301,7 +301,7 @@ Exploratory data analysis was performed on the resulting data using Python to as \centering \includegraphics[width=\linewidth]{"./images/nodes-runtime.png"} \includegraphics[width=\linewidth]{"./images/exploits-runtime.png"} - \caption{Number of Nodes and Number of Exploits (Averaged) vs. Runtime (ms)} + \caption{Number of Nodes and Number of Exploits (Averaged) vs. Runtime (ms), Combining and Averaging Across All Other Parameters} \label{fig:nodes-exp} \end{figure} @@ -309,30 +309,30 @@ Exploratory data analysis was performed on the resulting data using Python to as \centering \includegraphics[width=\linewidth]{"./images/applicability-runtime.png"} \includegraphics[width=\linewidth]{"./images/dbload-runtime.png"} - \caption{Applicability of Exploits (\%) and Database Load (\%) (Averaged) vs. Runtime (ms)} + \caption{Applicability of Exploits (\%) and Database Load (\%) (Averaged) vs. Runtime (ms), Combining and Averaging Across All Other Parameters} \label{fig:appl-load} \end{figure} In terms of speedup, when the number of entries in the exploit list is small, the serial approach has better performance. As discussed in Section \ref{sec:Task-perf-expec}, this is expected due to the time elapsed for the communication cost exceeding the time taken to generate a state. However, as the number of items in the exploit list increase, the Tasking Approach quickly begins to outperform the serial approach. It is notable that even when the tasking pipeline is not fully saturated (when there are less compute nodes assigned than tasks), the performance is still approximately equal to that of the serial approach. The other noticeable feature is that as more compute nodes are assigned, the speedup continues to increase. -Figure \ref{fig:overall-speedup} displays the overall minimum, maximum, and mean of speedup across all problem sizes. It is observable through the mean and maximum bars that as other problem size parameters increase, the speedup of the Tasking Approach also increases. Since database load, applicability of exploits, and number of exploits all affect the runtime, increasing the problem size through any of these parameters showcases the viability of the parallelized approach. At the same time, it is worth noting that the parallelized approach is not strictly better. The minimum speedups shown in Figure \ref{fig:overall-speedup} demonstrate that for small problem sizes, the serial approach performs better due to the communication costs. +Figure \ref{fig:overall-speedup} displays the overall minimum, maximum, and mean of speedup across all problem sizes. All parameters are combined and averaged, which leads to the high-magnitude drops in outcome variables. This effect is made more noticeable since the minimum-bound data was collected, where the large majority of data was collected using only a few nodes. It is observable through the mean and maximum bars that as other problem size parameters increase, the speedup of the Tasking Approach also increases. Since database load, applicability of exploits, and number of exploits all affect the runtime, increasing the problem size through any of these parameters showcases the viability of the parallelized approach. At the same time, it is worth noting that the parallelized approach is not strictly better. The minimum speedups shown in Figure \ref{fig:overall-speedup} demonstrate that for small problem sizes, the serial approach performs better due to the communication costs. \begin{figure}[htp] \centering \includegraphics[width=\linewidth]{"./images/overall-speedup.png"} \vspace{.2truein} \centerline{} - \caption{Minimum, Maximum, and Mean Speedup of MPI Tasking Across All Problem Sizes} - \label{fig:overall-speedup} + \caption{Minimum, Maximum, and Mean Speedup of MPI Tasking Across All Problem Sizes, Combining and Averaging Across All Parameters} + \label{fig:overall-speedup} \end{figure} -Figure \ref{fig:overall-efficiency} displays the overall minimum, maximum, and mean of efficiency across all problem sizes. In terms of efficiency, 2 compute nodes offer the greatest value. While the 2 compute node configuration does offer the greatest efficiency, it does not provide a speedup greater than 1.0 on any of the testing cases conducted. The results also demonstrate that an odd number of compute nodes in a fully saturated pipeline has better efficiency that an even number of compute nodes. When referring to Figure \ref{fig:node-alloc}, when there is an odd number number of compute nodes, Task 1 is allocated more nodes than Task 2. Task 1 was responsible for iterating through an increased size of the exploit list, so more nodes is advantageous in distributing the workload. However, when many exploits were not applicable, Task 2 had a lower workload. Some test cases only had 6 applicable exploits, which is a substantially lower workload for Task 2 compared to cases where Task 1 had upwards of 49,000 exploits. As the applicability of exploits increases, the disparity in efficiency for odd and even number of nodes is not present. +Figure \ref{fig:overall-efficiency} displays the overall minimum, maximum, and mean of efficiency across all problem sizes. All parameters are combined and averaged, which leads to the high-magnitude drop in outcome variables. This effect is made more noticeable since the minimum-bound data was collected, where the large majority of data was collected using only a few nodes. In terms of efficiency, 2 compute nodes offer the greatest value. While the 2 compute node configuration does offer the greatest efficiency, it does not provide a speedup greater than 1.0 on any of the testing cases conducted. The results also demonstrate that an odd number of compute nodes in a fully saturated pipeline has better efficiency that an even number of compute nodes. When referring to Figure \ref{fig:node-alloc}, when there is an odd number number of compute nodes, Task 1 is allocated more nodes than Task 2. Task 1 was responsible for iterating through an increased size of the exploit list, so more nodes is advantageous in distributing the workload. However, when many exploits were not applicable, Task 2 had a lower workload. Some test cases only had 6 applicable exploits, which is a substantially lower workload for Task 2 compared to cases where Task 1 had upwards of 49,000 exploits. As the applicability of exploits increases, the disparity in efficiency for odd and even number of nodes is not present. \begin{figure}[htp] \centering \includegraphics[width=\linewidth]{"./images/overall-efficiency.png"} \vspace{.2truein} \centerline{} - \caption{Minimum, Maximum, and Mean Efficiency of MPI Tasking Across All Problem Sizes} - \label{fig:overall-efficiency} + \caption{Minimum, Maximum, and Mean Efficiency of MPI Tasking Across All Problem Sizes, Combining and Averaging Across All Parameters} + \label{fig:overall-efficiency} \end{figure} Speedups and efficiencies were also computed across each parameter. Using pivot tables, mean speedups and mean efficiencies were computed for a parameter across all node configurations. Figures \ref{fig:param-exploit} and \ref{fig:param-appl} display the speedups and efficiencies of the exploit parameter and the applicability of exploits parameter, respectively. The number of nodes has the largest impact on the exploit parameter, and Figure \ref{fig:param-exploit} illustrates that even when fewer nodes are used, speedup can still be obtained as the exploit list grows in size. Figure \ref{fig:param-appl} demonstrates that though Task 2 has less of an impact on overall runtime and contribution to speedup, speedup is still achievable as more compute nodes are added and as the applicability of exploits increase. Though database load was not a parameter to easily include in preliminary testing, speedup is expected as this parameter changes. By dedicating nodes to solely handle database operations, the tasking pipeline is able to move to new state generation without the need to wait for all preceding database operations to complete. @@ -341,7 +341,7 @@ Speedups and efficiencies were also computed across each parameter. Using pivot \centering \includegraphics[width=\linewidth]{"./images/exploit-speedup.png"} \includegraphics[width=\linewidth]{"./images/exploit-eff.png"} - \caption{Mean Speedup and Efficiency for the Exploit Parameter Across the Number of Compute Nodes} + \caption{Mean Speedup and Efficiency for the Exploit Parameter Across the Number of Compute Nodes, Combining and Averaging Across All Other Parameters} \label{fig:param-exploit} \end{figure} @@ -349,7 +349,7 @@ Speedups and efficiencies were also computed across each parameter. Using pivot \centering \includegraphics[width=\linewidth]{"./images/appl-speedup.png"} \includegraphics[width=\linewidth]{"./images/appl-eff.png"} - \caption{Mean Speedup and Efficiency for the Applicability of Exploit Parameter Across the Number of Compute Nodes} + \caption{Mean Speedup and Efficiency for the Applicability of Exploit Parameter Across the Number of Compute Nodes, Combining and Averaging Across All Other Parameters} \label{fig:param-appl} \end{figure}