#!/usr/bin/env bash #Choose folder #folder=omp #folder=mpi folder=serial #folder=serialnosync #folder=ompnosync TIMESTR=`date +"%s"` DBNAME="${folder}_${TIMESTR}" nodelist=compute01,compute04,compute07,compute10,compute11,compute12 NODES=(2 3 4 5 6 7 8 9 10 11 12) pwd=`pwd` #one=1 #4,8,16,32 thr thrds=(4 8 16 32) if [[ "$folder" == serial || "$folder" == serialnosync || "$folder" == mpi ]]; then thrds=(1) else thrds=(4 8 16 32) fi if [[ "$folder" != mpi ]]; then NODES=(1) fi QSIZE=1 SKIPSYNC=false if [[ "$folder" == serialnosync ]]; then SKIPSYNC=true fi if [[ "$folder" == ompnosync ]]; then SKIPSYNC=true fi if [[ "$SKIPSYNC" == false ]]; then for thrd in "${thrds[@]}" do if [[ "$folder" == serial ]]; then thrd=1 nodelist=compute05 fi if [[ "$folder" == serialnosync ]]; then thrd=1 nodelist=compute07 fi if [[ "$folder" == ompnosync ]]; then nodelist=compute08 fi for qmult in {1..1} do if [[ "$qmult" -eq 3 ]]; then QSIZE=$((thrd*4)) else QSIZE=$((thrd*qmult)) fi if [[ "$folder" == serial ]]; then QSIZE=1 fi #1..6 Cars for cars in {2..6} do echo "----------------------------------- "$cars" cars -----------------------------------" >> ./slurm_reports/"$folder"/sync_data.txt #1..4 (or 1..10)Services for i in {1..10} do for node in "${NODES[@]}" do SKIP_FLAG=0 #Dispatch the job and get the output ("Submitted batch job JOB=`sbatch --nodelist="$nodelist" ag_run.sh "$i" "sync" "$cars" "$DBNAME" "$thrd" "$node"` #Pull the jobid JOB=`echo "$JOB" | grep -oP '(?<=job )[^ ]*'` #Output will be stored as this format FILE="./slurm_reports/job.${JOB}.out" ERR="./slurm_reports/job.${JOB}.err" NOW=`date +"%H:%M"` echo "Synchronous test with "$i" services "$thrd" omp threads "$node" nodes and "$cars" cars dispatched at "$NOW" with jobid "$JOB"" >> ./slurm_reports/"$folder"/sync_data.txt #Wait until job is finished while squeue | grep "$JOB" > /dev/null 2>&1; do #Slurm is having an issue where the prog finishes but doesn't leave slurm queue #Check if program finished by checking the output file, delay by 5 seconds for safety, then cancel the job if test -f "$FILE" && (cat "$FILE" | grep "total run time" > /dev/null 2>&1) ; then #if test -f "$FILE"; then for j in {1..5} do sleep 1 done SKIP_FLAG=1 echo "Job done, but slurm hung." >> ./slurm_reports/"$folder"/sync_data.txt tmp=`scancel "$JOB"` sleep 3 break fi #while ! test -f "$FILE"; do sleep 1 #done done #Wait for the delay between job finish and file write while ! test -f "$FILE"; do sleep 1 done #Get runtime and states from the output RUNTIME=`cat "$FILE" | grep -oP '(?<=total run time is )[^ ]*'` STATES=`cat "$FILE" | grep -oP '(?<=Total States: )[^ ]*'` if ! test -s "$ERR" || test "$SKIP_FLAG" -eq 1 ; then printf "${i} Services Synchronized: ${STATES} states and ${RUNTIME} runtime \n\n" >> ./slurm_reports/"$folder"/sync_data.txt str="$i","$STATES","$RUNTIME" echo $str >> ./slurm_reports/"$folder"/sync_data.csv else printf "Errors occurred. Please see err file for more details. \n\n" >> ./slurm_reports/"$folder"/sync_data.txt fi done SKIP_FLAG=0 done done printf "\n\n" >> ./slurm_reports/"$folder"/sync_data.txt done done fi if [[ "$SKIPSYNC" == false ]]; then for thrd in "${thrds[@]}" do if [[ "$folder" == serial ]]; then thrd=1 nodelist=compute05 fi if [[ "$folder" == serialnosync ]]; then thrd=1 nodelist=compute07 fi if [[ "$folder" == ompnosync ]]; then nodelist=compute08 fi for qmult in {1..1} do if [[ "$qmult" -eq 3 ]]; then QSIZE=$((thrd*4)) else QSIZE=$((thrd*qmult)) fi if [[ "$folder" == serial ]]; then QSIZE=1 fi #1..6 Cars for cars in {2..6} do echo "----------------------------------- "$cars" cars -----------------------------------" >> ./slurm_reports/"$folder"/non-sync_data.txt #1..4 (or 1..10)Services for i in {1..10} do for node in "${NODES[@]}" do SKIP_FLAG=0 #Dispatch the job and get the output ("Submitted batch job JOB=`sbatch --nodelist="$nodelist" ag_run.sh "$i" "non-sync" "$cars" "$DBNAME" "$thrd" "$node"` #Pull the jobid JOB=`echo "$JOB" | grep -oP '(?<=job )[^ ]*'` #Output will be stored as this format FILE="./slurm_reports/job.${JOB}.out" ERR="./slurm_reports/job.${JOB}.err" NOW=`date +"%H:%M"` echo "Non-Synchronous test with "$i" services "$thrd" omp threads "$node" nodes and "$cars" cars dispatched at "$NOW" with jobid "$JOB"" >> ./slurm_reports/"$folder"/non-sync_data.txt #Wait until job is finished while squeue | grep "$JOB" > /dev/null 2>&1; do #Slurm is having an issue where the prog finishes but doesn't leave slurm queue #Check if program finished by checking the output file, delay by 5 seconds for safety, then cancel the job if test -f "$FILE" && (cat "$FILE" | grep "total run time" > /dev/null 2>&1) ; then #if test -f "$FILE"; then for j in {1..5} do sleep 1 done SKIP_FLAG=1 echo "Job done, but slurm hung." >> ./slurm_reports/"$folder"/non-sync_data.txt tmp=`scancel "$JOB"` sleep 3 break fi #while ! test -f "$FILE"; do sleep 1 #done done #Wait for the delay between job finish and file write while ! test -f "$FILE"; do sleep 1 done #Get runtime and states from the output RUNTIME=`cat "$FILE" | grep -oP '(?<=total run time is )[^ ]*'` STATES=`cat "$FILE" | grep -oP '(?<=Total States: )[^ ]*'` if ! test -s "$ERR" || test "$SKIP_FLAG" -eq 1 ; then printf "${i} Services Non-Synchronized: ${STATES} states and ${RUNTIME} runtime \n\n" >> ./slurm_reports/"$folder"/non-sync_data.txt str="$i","$STATES","$RUNTIME" echo $str >> ./slurm_reports/"$folder"/non-sync_data.csv else printf "Errors occurred. Please see err file for more details. \n\n" >> ./slurm_reports/"$folder"/non-sync_data.txt fi done SKIP_FLAG=0 done done printf "\n\n" >> ./slurm_reports/"$folder"/non-sync_data.txt done done fi