ag_gen/build/.nfs000000000041e67300002bf1
2022-02-05 01:36:26 -06:00

243 lines
6.4 KiB
Bash
Executable File

#!/usr/bin/env bash
#Choose folder
#folder=omp
#folder=mpi
folder=serial
#folder=serialnosync
#folder=ompnosync
TIMESTR=`date +"%s"`
DBNAME="${folder}_${TIMESTR}"
nodelist=compute01,compute04,compute07,compute10,compute11,compute12
NODES=(2 3 4 5 6 7 8 9 10 11 12)
pwd=`pwd`
#one=1
#4,8,16,32 thr
thrds=(4 8 16 32)
if [[ "$folder" == serial || "$folder" == serialnosync || "$folder" == mpi ]]; then
thrds=(1)
else
thrds=(4 8 16 32)
fi
if [[ "$folder" != mpi ]]; then
NODES=(1)
fi
QSIZE=1
SKIPSYNC=false
if [[ "$folder" == serialnosync ]]; then
SKIPSYNC=true
fi
if [[ "$folder" == ompnosync ]]; then
SKIPSYNC=true
fi
if [[ "$SKIPSYNC" == false ]]; then
for thrd in "${thrds[@]}"
do
if [[ "$folder" == serial ]]; then
thrd=1
nodelist=compute05
fi
if [[ "$folder" == serialnosync ]]; then
thrd=1
nodelist=compute07
fi
if [[ "$folder" == ompnosync ]]; then
nodelist=compute08
fi
for qmult in {1..1}
do
if [[ "$qmult" -eq 3 ]]; then
QSIZE=$((thrd*4))
else
QSIZE=$((thrd*qmult))
fi
if [[ "$folder" == serial ]]; then
QSIZE=1
fi
#1..6 Cars
for cars in {2..6}
do
echo "----------------------------------- "$cars" cars -----------------------------------" >> ./slurm_reports/"$folder"/sync_data.txt
#1..4 (or 1..10)Services
for i in {1..10}
do
for node in "${NODES[@]}"
do
SKIP_FLAG=0
#Dispatch the job and get the output ("Submitted batch job <jobid>
JOB=`sbatch --nodelist="$nodelist" ag_run.sh "$i" "sync" "$cars" "$DBNAME" "$thrd" "$node"`
#Pull the jobid
JOB=`echo "$JOB" | grep -oP '(?<=job )[^ ]*'`
#Output will be stored as this format
FILE="./slurm_reports/job.${JOB}.out"
ERR="./slurm_reports/job.${JOB}.err"
NOW=`date +"%H:%M"`
echo "Synchronous test with "$i" services "$thrd" omp threads "$node" nodes and "$cars" cars dispatched at "$NOW" with jobid "$JOB"" >> ./slurm_reports/"$folder"/sync_data.txt
#Wait until job is finished
while squeue | grep "$JOB" > /dev/null 2>&1; do
#Slurm is having an issue where the prog finishes but doesn't leave slurm queue
#Check if program finished by checking the output file, delay by 5 seconds for safety, then cancel the job
if test -f "$FILE" && (cat "$FILE" | grep "total run time" > /dev/null 2>&1) ; then
#if test -f "$FILE"; then
for j in {1..5}
do
sleep 1
done
SKIP_FLAG=1
echo "Job done, but slurm hung." >> ./slurm_reports/"$folder"/sync_data.txt
tmp=`scancel "$JOB"`
sleep 3
break
fi
#while ! test -f "$FILE"; do
sleep 1
#done
done
#Wait for the delay between job finish and file write
while ! test -f "$FILE"; do
sleep 1
done
#Get runtime and states from the output
RUNTIME=`cat "$FILE" | grep -oP '(?<=total run time is )[^ ]*'`
STATES=`cat "$FILE" | grep -oP '(?<=Total States: )[^ ]*'`
if ! test -s "$ERR" || test "$SKIP_FLAG" -eq 1 ; then
printf "${i} Services Synchronized: ${STATES} states and ${RUNTIME} runtime \n\n" >> ./slurm_reports/"$folder"/sync_data.txt
str="$i","$STATES","$RUNTIME"
echo $str >> ./slurm_reports/"$folder"/sync_data.csv
else
printf "Errors occurred. Please see err file for more details. \n\n" >> ./slurm_reports/"$folder"/sync_data.txt
fi
done
SKIP_FLAG=0
done
done
printf "\n\n" >> ./slurm_reports/"$folder"/sync_data.txt
done
done
fi
if [[ "$SKIPSYNC" == false ]]; then
for thrd in "${thrds[@]}"
do
if [[ "$folder" == serial ]]; then
thrd=1
nodelist=compute05
fi
if [[ "$folder" == serialnosync ]]; then
thrd=1
nodelist=compute07
fi
if [[ "$folder" == ompnosync ]]; then
nodelist=compute08
fi
for qmult in {1..1}
do
if [[ "$qmult" -eq 3 ]]; then
QSIZE=$((thrd*4))
else
QSIZE=$((thrd*qmult))
fi
if [[ "$folder" == serial ]]; then
QSIZE=1
fi
#1..6 Cars
for cars in {2..6}
do
echo "----------------------------------- "$cars" cars -----------------------------------" >> ./slurm_reports/"$folder"/non-sync_data.txt
#1..4 (or 1..10)Services
for i in {1..10}
do
for node in "${NODES[@]}"
do
SKIP_FLAG=0
#Dispatch the job and get the output ("Submitted batch job <jobid>
JOB=`sbatch --nodelist="$nodelist" ag_run.sh "$i" "non-sync" "$cars" "$DBNAME" "$thrd" "$node"`
#Pull the jobid
JOB=`echo "$JOB" | grep -oP '(?<=job )[^ ]*'`
#Output will be stored as this format
FILE="./slurm_reports/job.${JOB}.out"
ERR="./slurm_reports/job.${JOB}.err"
NOW=`date +"%H:%M"`
echo "Non-Synchronous test with "$i" services "$thrd" omp threads "$node" nodes and "$cars" cars dispatched at "$NOW" with jobid "$JOB"" >> ./slurm_reports/"$folder"/non-sync_data.txt
#Wait until job is finished
while squeue | grep "$JOB" > /dev/null 2>&1; do
#Slurm is having an issue where the prog finishes but doesn't leave slurm queue
#Check if program finished by checking the output file, delay by 5 seconds for safety, then cancel the job
if test -f "$FILE" && (cat "$FILE" | grep "total run time" > /dev/null 2>&1) ; then
#if test -f "$FILE"; then
for j in {1..5}
do
sleep 1
done
SKIP_FLAG=1
echo "Job done, but slurm hung." >> ./slurm_reports/"$folder"/non-sync_data.txt
tmp=`scancel "$JOB"`
sleep 3
break
fi
#while ! test -f "$FILE"; do
sleep 1
#done
done
#Wait for the delay between job finish and file write
while ! test -f "$FILE"; do
sleep 1
done
#Get runtime and states from the output
RUNTIME=`cat "$FILE" | grep -oP '(?<=total run time is )[^ ]*'`
STATES=`cat "$FILE" | grep -oP '(?<=Total States: )[^ ]*'`
if ! test -s "$ERR" || test "$SKIP_FLAG" -eq 1 ; then
printf "${i} Services Non-Synchronized: ${STATES} states and ${RUNTIME} runtime \n\n" >> ./slurm_reports/"$folder"/non-sync_data.txt
str="$i","$STATES","$RUNTIME"
echo $str >> ./slurm_reports/"$folder"/non-sync_data.csv
else
printf "Errors occurred. Please see err file for more details. \n\n" >> ./slurm_reports/"$folder"/non-sync_data.txt
fi
done
SKIP_FLAG=0
done
done
printf "\n\n" >> ./slurm_reports/"$folder"/non-sync_data.txt
done
done
fi