From 4196aa74fa828ba8655c941d8e2919417296f08e Mon Sep 17 00:00:00 2001 From: noah Date: Sat, 5 Feb 2022 00:47:02 -0600 Subject: [PATCH] Run script adjustments --- build/ag_run.sh | 5 +- build/run.sh | 9 +- build/sync_data_col.sh | 271 ++++++++++++++++++++++------------------- 3 files changed, 151 insertions(+), 134 deletions(-) diff --git a/build/ag_run.sh b/build/ag_run.sh index f670ff4..46083e0 100755 --- a/build/ag_run.sh +++ b/build/ag_run.sh @@ -17,7 +17,8 @@ NUM_SERV=${1:-1} TYPE=${2:-sync} CARS=${3:-2} DBNAME=${4:-tmp} -NODES=${5:-4} +NUM_THREADS=${5:-1} +NODES=${6:-4} module load graphviz module load gcc @@ -27,4 +28,4 @@ module load cmake module load postgresql #./run.sh 1 sync 2 "$TESTING" 2 -./run.sh "$NUM_SERV" "$TYPE" "$CARS" "$DBNAME" "$NODES" +./run.sh "$NUM_SERV" "$TYPE" "$CARS" "$DBNAME" "$NUM_THREADS" "$NODES" diff --git a/build/run.sh b/build/run.sh index f6794bf..1810eda 100755 --- a/build/run.sh +++ b/build/run.sh @@ -5,7 +5,8 @@ NUM_SERV=${1:-1} TYPE=${2:-sync} CARS=${3:-2} DBNAME=${4:-tmp} -NODES=${5:-2} +NUM_THREADS=${5:-1} +NODES=${6:-2} ITFC="eth0" @@ -54,7 +55,7 @@ if [ "$TYPE" == "$strval1" ]; then #Old #./ag_gen -n ../Feb_2021/generic_timeline_maintenance.nm -x ../Feb_2021/Sync/"$NUM_SERV"_Serv/sync_timeline_maintenance.xp -g DOTFILE.dot -t 1 -q 1 -p -a 0.9 - mpiexec --mca btl_openib_allow_ib 1 --mca btl openib,self,vader --mca opal_warn_on_missing_libcuda 0 --bind-to numa --map-by numa -np "$NODES" --timeout 129600 ./ag_gen -n ../Oct_2021/nm_files/"$CARS"_car_timeline_maintenance.nm -x ../Oct_2021/Sync/10_Exploits/"$NUM_SERV"_Serv/sync_timeline_maintenance.xp -t 1 -q 1 -p -a 0.6 -z "$DBNAME" + mpiexec --mca btl_openib_allow_ib 1 --mca btl openib,self,vader --mca opal_warn_on_missing_libcuda 0 --bind-to numa --map-by numa -np "$NODES" --timeout 129600 ./ag_gen -n ../Oct_2021/nm_files/"$CARS"_car_timeline_maintenance.nm -x ../Oct_2021/Sync/10_Exploits/"$NUM_SERV"_Serv/sync_timeline_maintenance.xp -t "$NUM_THREADS" -q 1 -p -a 0.6 -z "$DBNAME" # 4 Exploit #mpiexec -np "$NODES" --bind-to numa --map-by numa ./ag_gen -n ../Oct_2021/nm_files/"$CARS"_car_timeline_maintenance.nm -x ../Oct_2021/Sync/4_Exploits/"$NUM_SERV"_Serv/sync_timeline_maintenance.xp -t 1 -q 1 -p -a 0.6 -z "$DBNAME" @@ -63,11 +64,11 @@ if [ "$TYPE" == "$strval1" ]; then elif [ "$TYPE" == "$strval2" ]; then #./ag_gen -n ../Feb_2021/generic_timeline_maintenance.nm -x ../Feb_2021/Non_Sync/"$NUM_SERV"_Serv/generic_timeline_maintenance.xp -g DOTFILE.dot -t 1 -q 1 -p -a 0.9 - mpiexec --mca btl_openib_allow_ib 1 --mca btl openib,self,vader --mca opal_warn_on_missing_libcuda 0 --bind-to numa --map-by numa -np "$NODES" --timeout 129600 ./ag_gen -n ../Oct_2021/nm_files/"$CARS"_car_timeline_maintenance.nm -x ../Oct_2021/Non_Sync/10_Exploits/"$NUM_SERV"_Serv/generic_timeline_maintenance.xp -t 1 -q 1 -p -a 0.6 -z "$DBNAME" + mpiexec --mca btl_openib_allow_ib 1 --mca btl openib,self,vader --mca opal_warn_on_missing_libcuda 0 --bind-to numa --map-by numa -np "$NODES" --timeout 129600 ./ag_gen -n ../Oct_2021/nm_files/"$CARS"_car_timeline_maintenance.nm -x ../Oct_2021/Non_Sync/10_Exploits/"$NUM_SERV"_Serv/generic_timeline_maintenance.xp -t "$NUM_THREADS" -q 1 -p -a 0.6 -z "$DBNAME" else echo "Running default." - mpiexec --mca btl_openib_allow_ib 1 --mca btl openib,self,vader --mca opal_warn_on_missing_libcuda 0 --bind-to numa --map-by numa --timeout 129600 ./ag_gen -n ../Oct_2021/nm_files/1_car_timeline_maintenance.nm -x ../Oct_2021/Sync/10_Exploits/1_Serv/sync_timeline_maintenance.xp -t 1 -q 1 -p -a 0.6 -z "$DBNAME" + mpiexec --mca btl_openib_allow_ib 1 --mca btl openib,self,vader --mca opal_warn_on_missing_libcuda 0 --bind-to numa --map-by numa --timeout 129600 ./ag_gen -n ../Oct_2021/nm_files/1_car_timeline_maintenance.nm -x ../Oct_2021/Sync/10_Exploits/1_Serv/sync_timeline_maintenance.xp -t "$NUM_THREADS" -q 1 -p -a 0.6 -z "$DBNAME" fi diff --git a/build/sync_data_col.sh b/build/sync_data_col.sh index 0cff385..591a81c 100755 --- a/build/sync_data_col.sh +++ b/build/sync_data_col.sh @@ -38,6 +38,7 @@ if [[ "$folder" == ompnosync ]]; then fi if [[ "$SKIPSYNC" == false ]]; then + for thrd in "${thrds[@]}" do if [[ "$folder" == serial ]]; then @@ -72,60 +73,63 @@ if [[ "$SKIPSYNC" == false ]]; then echo "----------------------------------- "$cars" cars -----------------------------------" >> ./slurm_reports/"$folder"/sync_data.txt #1..4 (or 1..10)Services - for i in {1..4} + for i in {1..10} do - SKIP_FLAG=0 - #Dispatch the job and get the output ("Submitted batch job - JOB=`sbatch --nodelist="$nodelist" ag_run.sh "$i" "sync" "$cars" "$DBNAME" "$thrd" "$QSIZE"` - #Pull the jobid - JOB=`echo "$JOB" | grep -oP '(?<=job )[^ ]*'` - - #Output will be stored as this format - FILE="./slurm_reports/job.${JOB}.out" - ERR="./slurm_reports/job.${JOB}.err" - - NOW=`date +"%H:%M"` - - echo "Synchronous test with "$i" services "$thrd" omp threads "$QSIZE" Qsize and "$cars" cars dispatched at "$NOW" with jobid "$JOB"" >> ./slurm_reports/"$folder"/sync_data.txt + for node in "${NODES[@]}" + do + SKIP_FLAG=0 + #Dispatch the job and get the output ("Submitted batch job + JOB=`sbatch --nodelist="$nodelist" ag_run.sh "$i" "sync" "$cars" "$DBNAME" "$thrd" "$node"` + #Pull the jobid + JOB=`echo "$JOB" | grep -oP '(?<=job )[^ ]*'` - #Wait until job is finished - while squeue | grep "$JOB" > /dev/null 2>&1; do - #Slurm is having an issue where the prog finishes but doesn't leave slurm queue - #Check if program finished by checking the output file, delay by 5 seconds for safety, then cancel the job - if test -f "$FILE" && (cat "$FILE" | grep "total run time" > /dev/null 2>&1) ; then - #if test -f "$FILE"; then - for j in {1..5} - do - sleep 1 - done - SKIP_FLAG=1 - echo "Job done, but slurm hung." >> ./slurm_reports/"$folder"/sync_data.txt - tmp=`scancel "$JOB"` - sleep 3 - break + #Output will be stored as this format + FILE="./slurm_reports/job.${JOB}.out" + ERR="./slurm_reports/job.${JOB}.err" + + NOW=`date +"%H:%M"` + + echo "Synchronous test with "$i" services "$thrd" omp threads "$node" nodes and "$cars" cars dispatched at "$NOW" with jobid "$JOB"" >> ./slurm_reports/"$folder"/sync_data.txt + + #Wait until job is finished + while squeue | grep "$JOB" > /dev/null 2>&1; do + #Slurm is having an issue where the prog finishes but doesn't leave slurm queue + #Check if program finished by checking the output file, delay by 5 seconds for safety, then cancel the job + if test -f "$FILE" && (cat "$FILE" | grep "total run time" > /dev/null 2>&1) ; then + #if test -f "$FILE"; then + for j in {1..5} + do + sleep 1 + done + SKIP_FLAG=1 + echo "Job done, but slurm hung." >> ./slurm_reports/"$folder"/sync_data.txt + tmp=`scancel "$JOB"` + sleep 3 + break + fi + #while ! test -f "$FILE"; do + sleep 1 + #done + done + + #Wait for the delay between job finish and file write + while ! test -f "$FILE"; do + sleep 1 + done + + #Get runtime and states from the output + RUNTIME=`cat "$FILE" | grep -oP '(?<=total run time is )[^ ]*'` + STATES=`cat "$FILE" | grep -oP '(?<=Total States: )[^ ]*'` + + if ! test -s "$ERR" || test "$SKIP_FLAG" -eq 1 ; then + printf "${i} Services Synchronized: ${STATES} states and ${RUNTIME} runtime \n\n" >> ./slurm_reports/"$folder"/sync_data.txt + str="$i","$STATES","$RUNTIME" + echo $str >> ./slurm_reports/"$folder"/sync_data.csv + else + printf "Errors occurred. Please see err file for more details. \n\n" >> ./slurm_reports/"$folder"/sync_data.txt + fi - #while ! test -f "$FILE"; do - sleep 1 - #done done - - #Wait for the delay between job finish and file write - while ! test -f "$FILE"; do - sleep 1 - done - - #Get runtime and states from the output - RUNTIME=`cat "$FILE" | grep -oP '(?<=total run time is )[^ ]*'` - STATES=`cat "$FILE" | grep -oP '(?<=Total States: )[^ ]*'` - - if ! test -s "$ERR" || test "$SKIP_FLAG" -eq 1 ; then - printf "${i} Services Synchronized: ${STATES} states and ${RUNTIME} runtime \n\n" >> ./slurm_reports/"$folder"/sync_data.txt - str="$i","$STATES","$RUNTIME" - echo $str >> ./slurm_reports/"$folder"/sync_data.csv - else - printf "Errors occurred. Please see err file for more details. \n\n" >> ./slurm_reports/"$folder"/sync_data.txt - - fi SKIP_FLAG=0 done done @@ -135,93 +139,104 @@ if [[ "$SKIPSYNC" == false ]]; then done fi +if [[ "$SKIPSYNC" == false ]]; then + for thrd in "${thrds[@]}" + do + if [[ "$folder" == serial ]]; then + thrd=1 + nodelist=compute05 + fi -#thrds=(4 8 16 32) + if [[ "$folder" == serialnosync ]]; then + thrd=1 + nodelist=compute07 + fi -for thrd in "${thrds[@]}" -do - if [[ "$folder" == serial ]]; then - thrd=1 - nodelist=compute05 - fi + if [[ "$folder" == ompnosync ]]; then + nodelist=compute08 + fi - if [[ "$folder" == serialnosync ]]; then - thrd=1 - nodelist=compute07 - fi - - if [[ "$folder" == ompnosync ]]; then - nodelist=compute08 - fi - - for qmult in {1..1} - do - if [[ "$qmult" -eq 3 ]]; then - QSIZE=$((thrd*4)) - else - QSIZE=$((thrd*qmult)) - fi - - if [[ "$folder" == serial ]]; then - QSIZE=1 - fi - - for cars in {2..6} - do - echo "----------------------------------- "$cars" cars -----------------------------------" >> ./slurm_reports/"$folder"/sync_data.txt - - #1..4, 1..10 - for i in {1..4} - do - SKIP_FLAG=0 - JOB=`sbatch --nodelist="$nodelist" ag_run.sh "$i" "non-sync" "$cars" "$folder" "$thrd" "$QSIZE"` - JOB=`echo "$JOB" | grep -oP '(?<=job )[^ ]*'` - - FILE="./slurm_reports/job.${JOB}.out" - ERR="./slurm_reports/job.${JOB}.err" + for qmult in {1..1} + do + if [[ "$qmult" -eq 3 ]]; then + QSIZE=$((thrd*4)) + else + QSIZE=$((thrd*qmult)) + fi - NOW=`date +"%H:%M"` + if [[ "$folder" == serial ]]; then + QSIZE=1 + fi - echo "Non-Synchronous test with "$i" services "$thrd" threads "$QSIZE" Qsize and "$cars" cars dispatched at "$NOW" with jobid "$JOB"" >> ./slurm_reports/"$folder"/sync_data.txt + #1..6 Cars + for cars in {2..6} + do - while squeue | grep "$JOB" > /dev/null 2>&1; do - #while ! test -f "$FILE"; do - if test -f "$FILE" && (cat "$FILE" | grep "total run time" > /dev/null 2>&1) ; then - #if test -f "$FILE"; then - for j in {1..5} - do - sleep 1 - done - SKIP_FLAG=1 - echo "Job done, but slurm hung." >> ./slurm_reports/"$folder"/sync_data.txt - tmp=`scancel "$JOB"` - break + echo "----------------------------------- "$cars" cars -----------------------------------" >> ./slurm_reports/"$folder"/non-sync_data.txt + #1..4 (or 1..10)Services + for i in {1..10} + do + for node in "${NODES[@]}" + do + SKIP_FLAG=0 + #Dispatch the job and get the output ("Submitted batch job + JOB=`sbatch --nodelist="$nodelist" ag_run.sh "$i" "non-sync" "$cars" "$DBNAME" "$thrd" "$node"` + #Pull the jobid + JOB=`echo "$JOB" | grep -oP '(?<=job )[^ ]*'` + + #Output will be stored as this format + FILE="./slurm_reports/job.${JOB}.out" + ERR="./slurm_reports/job.${JOB}.err" - fi + NOW=`date +"%H:%M"` + + echo "Non-Synchronous test with "$i" services "$thrd" omp threads "$node" nodes and "$cars" cars dispatched at "$NOW" with jobid "$JOB"" >> ./slurm_reports/"$folder"/non-sync_data.txt + + #Wait until job is finished + while squeue | grep "$JOB" > /dev/null 2>&1; do + #Slurm is having an issue where the prog finishes but doesn't leave slurm queue + #Check if program finished by checking the output file, delay by 5 seconds for safety, then cancel the job + if test -f "$FILE" && (cat "$FILE" | grep "total run time" > /dev/null 2>&1) ; then + #if test -f "$FILE"; then + for j in {1..5} + do + sleep 1 + done + SKIP_FLAG=1 + echo "Job done, but slurm hung." >> ./slurm_reports/"$folder"/non-sync_data.txt + tmp=`scancel "$JOB"` + sleep 3 + break + fi + #while ! test -f "$FILE"; do + sleep 1 + #done + done - sleep 1 - #done + #Wait for the delay between job finish and file write + while ! test -f "$FILE"; do + sleep 1 + done + + #Get runtime and states from the output + RUNTIME=`cat "$FILE" | grep -oP '(?<=total run time is )[^ ]*'` + STATES=`cat "$FILE" | grep -oP '(?<=Total States: )[^ ]*'` + + if ! test -s "$ERR" || test "$SKIP_FLAG" -eq 1 ; then + printf "${i} Services Non-Synchronized: ${STATES} states and ${RUNTIME} runtime \n\n" >> ./slurm_reports/"$folder"/non-sync_data.txt + str="$i","$STATES","$RUNTIME" + echo $str >> ./slurm_reports/"$folder"/non-sync_data.csv + else + printf "Errors occurred. Please see err file for more details. \n\n" >> ./slurm_reports/"$folder"/non-sync_data.txt + + fi + done + SKIP_FLAG=0 done - - while ! test -f "$FILE"; do - sleep 1 - done - - RUNTIME=`cat "$FILE" | grep -oP '(?<=total run time is )[^ ]*'` - STATES=`cat "$FILE" | grep -oP '(?<=Total States: )[^ ]*'` - - if ! test -s "$ERR" || test "$SKIP_FLAG" -eq 1 ; then - printf "${i} Services Non-Synchronized: ${STATES} states and ${RUNTIME} runtime \n\n" >> ./slurm_reports/"$folder"/sync_data.txt - str="$i","$STATES","$RUNTIME" - echo $str >> ./slurm_reports/"$folder"/sync_data.csv - else - printf "Errors occurred. Please see err file for more details. \n\n" >> ./slurm_reports/"$folder"/sync_data.txt - - fi - - SKIP_FLAG=0 done + + printf "\n\n" >> ./slurm_reports/"$folder"/non-sync_data.txt + done done - done -done +fi