From 09269addca0de9acd72a17035bf1b17e3f78b8fb Mon Sep 17 00:00:00 2001 From: hammer Date: Sat, 5 Feb 2022 01:36:26 -0600 Subject: [PATCH] Run scripts --- build/.nfs000000000041e67300002bf1 | 242 +++++++++++++++++++++++++++++ build/sync_data_col.sh | 8 +- 2 files changed, 249 insertions(+), 1 deletion(-) create mode 100755 build/.nfs000000000041e67300002bf1 diff --git a/build/.nfs000000000041e67300002bf1 b/build/.nfs000000000041e67300002bf1 new file mode 100755 index 0000000..6305da4 --- /dev/null +++ b/build/.nfs000000000041e67300002bf1 @@ -0,0 +1,242 @@ +#!/usr/bin/env bash + +#Choose folder +#folder=omp +#folder=mpi +folder=serial +#folder=serialnosync +#folder=ompnosync +TIMESTR=`date +"%s"` +DBNAME="${folder}_${TIMESTR}" +nodelist=compute01,compute04,compute07,compute10,compute11,compute12 +NODES=(2 3 4 5 6 7 8 9 10 11 12) + +pwd=`pwd` +#one=1 +#4,8,16,32 thr +thrds=(4 8 16 32) + +if [[ "$folder" == serial || "$folder" == serialnosync || "$folder" == mpi ]]; then + thrds=(1) + else + thrds=(4 8 16 32) +fi + +if [[ "$folder" != mpi ]]; then + NODES=(1) +fi + +QSIZE=1 +SKIPSYNC=false + +if [[ "$folder" == serialnosync ]]; then + SKIPSYNC=true +fi + +if [[ "$folder" == ompnosync ]]; then + SKIPSYNC=true +fi + +if [[ "$SKIPSYNC" == false ]]; then + + for thrd in "${thrds[@]}" + do + if [[ "$folder" == serial ]]; then + thrd=1 + nodelist=compute05 + fi + + if [[ "$folder" == serialnosync ]]; then + thrd=1 + nodelist=compute07 + fi + + if [[ "$folder" == ompnosync ]]; then + nodelist=compute08 + fi + + for qmult in {1..1} + do + if [[ "$qmult" -eq 3 ]]; then + QSIZE=$((thrd*4)) + else + QSIZE=$((thrd*qmult)) + fi + + if [[ "$folder" == serial ]]; then + QSIZE=1 + fi + + #1..6 Cars + for cars in {2..6} + do + + echo "----------------------------------- "$cars" cars -----------------------------------" >> ./slurm_reports/"$folder"/sync_data.txt + #1..4 (or 1..10)Services + for i in {1..10} + do + for node in "${NODES[@]}" + do + SKIP_FLAG=0 + #Dispatch the job and get the output ("Submitted batch job + JOB=`sbatch --nodelist="$nodelist" ag_run.sh "$i" "sync" "$cars" "$DBNAME" "$thrd" "$node"` + #Pull the jobid + JOB=`echo "$JOB" | grep -oP '(?<=job )[^ ]*'` + + #Output will be stored as this format + FILE="./slurm_reports/job.${JOB}.out" + ERR="./slurm_reports/job.${JOB}.err" + + NOW=`date +"%H:%M"` + + echo "Synchronous test with "$i" services "$thrd" omp threads "$node" nodes and "$cars" cars dispatched at "$NOW" with jobid "$JOB"" >> ./slurm_reports/"$folder"/sync_data.txt + + #Wait until job is finished + while squeue | grep "$JOB" > /dev/null 2>&1; do + #Slurm is having an issue where the prog finishes but doesn't leave slurm queue + #Check if program finished by checking the output file, delay by 5 seconds for safety, then cancel the job + if test -f "$FILE" && (cat "$FILE" | grep "total run time" > /dev/null 2>&1) ; then + #if test -f "$FILE"; then + for j in {1..5} + do + sleep 1 + done + SKIP_FLAG=1 + echo "Job done, but slurm hung." >> ./slurm_reports/"$folder"/sync_data.txt + tmp=`scancel "$JOB"` + sleep 3 + break + fi + #while ! test -f "$FILE"; do + sleep 1 + #done + done + + #Wait for the delay between job finish and file write + while ! test -f "$FILE"; do + sleep 1 + done + + #Get runtime and states from the output + RUNTIME=`cat "$FILE" | grep -oP '(?<=total run time is )[^ ]*'` + STATES=`cat "$FILE" | grep -oP '(?<=Total States: )[^ ]*'` + + if ! test -s "$ERR" || test "$SKIP_FLAG" -eq 1 ; then + printf "${i} Services Synchronized: ${STATES} states and ${RUNTIME} runtime \n\n" >> ./slurm_reports/"$folder"/sync_data.txt + str="$i","$STATES","$RUNTIME" + echo $str >> ./slurm_reports/"$folder"/sync_data.csv + else + printf "Errors occurred. Please see err file for more details. \n\n" >> ./slurm_reports/"$folder"/sync_data.txt + + fi + done + SKIP_FLAG=0 + done + done + + printf "\n\n" >> ./slurm_reports/"$folder"/sync_data.txt + done + done +fi + +if [[ "$SKIPSYNC" == false ]]; then + + for thrd in "${thrds[@]}" + do + if [[ "$folder" == serial ]]; then + thrd=1 + nodelist=compute05 + fi + + if [[ "$folder" == serialnosync ]]; then + thrd=1 + nodelist=compute07 + fi + + if [[ "$folder" == ompnosync ]]; then + nodelist=compute08 + fi + + for qmult in {1..1} + do + if [[ "$qmult" -eq 3 ]]; then + QSIZE=$((thrd*4)) + else + QSIZE=$((thrd*qmult)) + fi + + if [[ "$folder" == serial ]]; then + QSIZE=1 + fi + + #1..6 Cars + for cars in {2..6} + do + + echo "----------------------------------- "$cars" cars -----------------------------------" >> ./slurm_reports/"$folder"/non-sync_data.txt + #1..4 (or 1..10)Services + for i in {1..10} + do + for node in "${NODES[@]}" + do + SKIP_FLAG=0 + #Dispatch the job and get the output ("Submitted batch job + JOB=`sbatch --nodelist="$nodelist" ag_run.sh "$i" "non-sync" "$cars" "$DBNAME" "$thrd" "$node"` + #Pull the jobid + JOB=`echo "$JOB" | grep -oP '(?<=job )[^ ]*'` + + #Output will be stored as this format + FILE="./slurm_reports/job.${JOB}.out" + ERR="./slurm_reports/job.${JOB}.err" + + NOW=`date +"%H:%M"` + + echo "Non-Synchronous test with "$i" services "$thrd" omp threads "$node" nodes and "$cars" cars dispatched at "$NOW" with jobid "$JOB"" >> ./slurm_reports/"$folder"/non-sync_data.txt + + #Wait until job is finished + while squeue | grep "$JOB" > /dev/null 2>&1; do + #Slurm is having an issue where the prog finishes but doesn't leave slurm queue + #Check if program finished by checking the output file, delay by 5 seconds for safety, then cancel the job + if test -f "$FILE" && (cat "$FILE" | grep "total run time" > /dev/null 2>&1) ; then + #if test -f "$FILE"; then + for j in {1..5} + do + sleep 1 + done + SKIP_FLAG=1 + echo "Job done, but slurm hung." >> ./slurm_reports/"$folder"/non-sync_data.txt + tmp=`scancel "$JOB"` + sleep 3 + break + fi + #while ! test -f "$FILE"; do + sleep 1 + #done + done + + #Wait for the delay between job finish and file write + while ! test -f "$FILE"; do + sleep 1 + done + + #Get runtime and states from the output + RUNTIME=`cat "$FILE" | grep -oP '(?<=total run time is )[^ ]*'` + STATES=`cat "$FILE" | grep -oP '(?<=Total States: )[^ ]*'` + + if ! test -s "$ERR" || test "$SKIP_FLAG" -eq 1 ; then + printf "${i} Services Non-Synchronized: ${STATES} states and ${RUNTIME} runtime \n\n" >> ./slurm_reports/"$folder"/non-sync_data.txt + str="$i","$STATES","$RUNTIME" + echo $str >> ./slurm_reports/"$folder"/non-sync_data.csv + else + printf "Errors occurred. Please see err file for more details. \n\n" >> ./slurm_reports/"$folder"/non-sync_data.txt + + fi + done + SKIP_FLAG=0 + done + done + + printf "\n\n" >> ./slurm_reports/"$folder"/non-sync_data.txt + done + done +fi diff --git a/build/sync_data_col.sh b/build/sync_data_col.sh index 591a81c..1d07163 100755 --- a/build/sync_data_col.sh +++ b/build/sync_data_col.sh @@ -2,13 +2,19 @@ #Choose folder #folder=omp +#nodelist=compute08 + folder=mpi +nodelist=compute01,compute04,compute07,compute10,compute11,compute12 + #folder=serial +#nodelist=compute05 + #folder=serialnosync #folder=ompnosync TIMESTR=`date +"%s"` DBNAME="${folder}_${TIMESTR}" -nodelist=compute01,compute04,compute07,compute10,compute11,compute12 +#nodelist=compute01,compute04,compute07,compute10,compute11,compute12 NODES=(2 3 4 5 6 7 8 9 10 11 12) pwd=`pwd`