Run script adjustments

This commit is contained in:
Noah L. Schrick 2022-02-05 00:47:02 -06:00
parent 34581517c6
commit 4196aa74fa
3 changed files with 151 additions and 134 deletions

View File

@ -17,7 +17,8 @@ NUM_SERV=${1:-1}
TYPE=${2:-sync}
CARS=${3:-2}
DBNAME=${4:-tmp}
NODES=${5:-4}
NUM_THREADS=${5:-1}
NODES=${6:-4}
module load graphviz
module load gcc
@ -27,4 +28,4 @@ module load cmake
module load postgresql
#./run.sh 1 sync 2 "$TESTING" 2
./run.sh "$NUM_SERV" "$TYPE" "$CARS" "$DBNAME" "$NODES"
./run.sh "$NUM_SERV" "$TYPE" "$CARS" "$DBNAME" "$NUM_THREADS" "$NODES"

View File

@ -5,7 +5,8 @@ NUM_SERV=${1:-1}
TYPE=${2:-sync}
CARS=${3:-2}
DBNAME=${4:-tmp}
NODES=${5:-2}
NUM_THREADS=${5:-1}
NODES=${6:-2}
ITFC="eth0"
@ -54,7 +55,7 @@ if [ "$TYPE" == "$strval1" ]; then
#Old
#./ag_gen -n ../Feb_2021/generic_timeline_maintenance.nm -x ../Feb_2021/Sync/"$NUM_SERV"_Serv/sync_timeline_maintenance.xp -g DOTFILE.dot -t 1 -q 1 -p -a 0.9
mpiexec --mca btl_openib_allow_ib 1 --mca btl openib,self,vader --mca opal_warn_on_missing_libcuda 0 --bind-to numa --map-by numa -np "$NODES" --timeout 129600 ./ag_gen -n ../Oct_2021/nm_files/"$CARS"_car_timeline_maintenance.nm -x ../Oct_2021/Sync/10_Exploits/"$NUM_SERV"_Serv/sync_timeline_maintenance.xp -t 1 -q 1 -p -a 0.6 -z "$DBNAME"
mpiexec --mca btl_openib_allow_ib 1 --mca btl openib,self,vader --mca opal_warn_on_missing_libcuda 0 --bind-to numa --map-by numa -np "$NODES" --timeout 129600 ./ag_gen -n ../Oct_2021/nm_files/"$CARS"_car_timeline_maintenance.nm -x ../Oct_2021/Sync/10_Exploits/"$NUM_SERV"_Serv/sync_timeline_maintenance.xp -t "$NUM_THREADS" -q 1 -p -a 0.6 -z "$DBNAME"
# 4 Exploit
#mpiexec -np "$NODES" --bind-to numa --map-by numa ./ag_gen -n ../Oct_2021/nm_files/"$CARS"_car_timeline_maintenance.nm -x ../Oct_2021/Sync/4_Exploits/"$NUM_SERV"_Serv/sync_timeline_maintenance.xp -t 1 -q 1 -p -a 0.6 -z "$DBNAME"
@ -63,11 +64,11 @@ if [ "$TYPE" == "$strval1" ]; then
elif [ "$TYPE" == "$strval2" ]; then
#./ag_gen -n ../Feb_2021/generic_timeline_maintenance.nm -x ../Feb_2021/Non_Sync/"$NUM_SERV"_Serv/generic_timeline_maintenance.xp -g DOTFILE.dot -t 1 -q 1 -p -a 0.9
mpiexec --mca btl_openib_allow_ib 1 --mca btl openib,self,vader --mca opal_warn_on_missing_libcuda 0 --bind-to numa --map-by numa -np "$NODES" --timeout 129600 ./ag_gen -n ../Oct_2021/nm_files/"$CARS"_car_timeline_maintenance.nm -x ../Oct_2021/Non_Sync/10_Exploits/"$NUM_SERV"_Serv/generic_timeline_maintenance.xp -t 1 -q 1 -p -a 0.6 -z "$DBNAME"
mpiexec --mca btl_openib_allow_ib 1 --mca btl openib,self,vader --mca opal_warn_on_missing_libcuda 0 --bind-to numa --map-by numa -np "$NODES" --timeout 129600 ./ag_gen -n ../Oct_2021/nm_files/"$CARS"_car_timeline_maintenance.nm -x ../Oct_2021/Non_Sync/10_Exploits/"$NUM_SERV"_Serv/generic_timeline_maintenance.xp -t "$NUM_THREADS" -q 1 -p -a 0.6 -z "$DBNAME"
else
echo "Running default."
mpiexec --mca btl_openib_allow_ib 1 --mca btl openib,self,vader --mca opal_warn_on_missing_libcuda 0 --bind-to numa --map-by numa --timeout 129600 ./ag_gen -n ../Oct_2021/nm_files/1_car_timeline_maintenance.nm -x ../Oct_2021/Sync/10_Exploits/1_Serv/sync_timeline_maintenance.xp -t 1 -q 1 -p -a 0.6 -z "$DBNAME"
mpiexec --mca btl_openib_allow_ib 1 --mca btl openib,self,vader --mca opal_warn_on_missing_libcuda 0 --bind-to numa --map-by numa --timeout 129600 ./ag_gen -n ../Oct_2021/nm_files/1_car_timeline_maintenance.nm -x ../Oct_2021/Sync/10_Exploits/1_Serv/sync_timeline_maintenance.xp -t "$NUM_THREADS" -q 1 -p -a 0.6 -z "$DBNAME"
fi

View File

@ -38,6 +38,7 @@ if [[ "$folder" == ompnosync ]]; then
fi
if [[ "$SKIPSYNC" == false ]]; then
for thrd in "${thrds[@]}"
do
if [[ "$folder" == serial ]]; then
@ -72,11 +73,13 @@ if [[ "$SKIPSYNC" == false ]]; then
echo "----------------------------------- "$cars" cars -----------------------------------" >> ./slurm_reports/"$folder"/sync_data.txt
#1..4 (or 1..10)Services
for i in {1..4}
for i in {1..10}
do
for node in "${NODES[@]}"
do
SKIP_FLAG=0
#Dispatch the job and get the output ("Submitted batch job <jobid>
JOB=`sbatch --nodelist="$nodelist" ag_run.sh "$i" "sync" "$cars" "$DBNAME" "$thrd" "$QSIZE"`
JOB=`sbatch --nodelist="$nodelist" ag_run.sh "$i" "sync" "$cars" "$DBNAME" "$thrd" "$node"`
#Pull the jobid
JOB=`echo "$JOB" | grep -oP '(?<=job )[^ ]*'`
@ -86,7 +89,7 @@ if [[ "$SKIPSYNC" == false ]]; then
NOW=`date +"%H:%M"`
echo "Synchronous test with "$i" services "$thrd" omp threads "$QSIZE" Qsize and "$cars" cars dispatched at "$NOW" with jobid "$JOB"" >> ./slurm_reports/"$folder"/sync_data.txt
echo "Synchronous test with "$i" services "$thrd" omp threads "$node" nodes and "$cars" cars dispatched at "$NOW" with jobid "$JOB"" >> ./slurm_reports/"$folder"/sync_data.txt
#Wait until job is finished
while squeue | grep "$JOB" > /dev/null 2>&1; do
@ -126,6 +129,7 @@ if [[ "$SKIPSYNC" == false ]]; then
printf "Errors occurred. Please see err file for more details. \n\n" >> ./slurm_reports/"$folder"/sync_data.txt
fi
done
SKIP_FLAG=0
done
done
@ -135,9 +139,7 @@ if [[ "$SKIPSYNC" == false ]]; then
done
fi
#thrds=(4 8 16 32)
if [[ "$SKIPSYNC" == false ]]; then
for thrd in "${thrds[@]}"
do
@ -167,26 +169,34 @@ do
QSIZE=1
fi
#1..6 Cars
for cars in {2..6}
do
echo "----------------------------------- "$cars" cars -----------------------------------" >> ./slurm_reports/"$folder"/sync_data.txt
#1..4, 1..10
for i in {1..4}
echo "----------------------------------- "$cars" cars -----------------------------------" >> ./slurm_reports/"$folder"/non-sync_data.txt
#1..4 (or 1..10)Services
for i in {1..10}
do
for node in "${NODES[@]}"
do
SKIP_FLAG=0
JOB=`sbatch --nodelist="$nodelist" ag_run.sh "$i" "non-sync" "$cars" "$folder" "$thrd" "$QSIZE"`
#Dispatch the job and get the output ("Submitted batch job <jobid>
JOB=`sbatch --nodelist="$nodelist" ag_run.sh "$i" "non-sync" "$cars" "$DBNAME" "$thrd" "$node"`
#Pull the jobid
JOB=`echo "$JOB" | grep -oP '(?<=job )[^ ]*'`
#Output will be stored as this format
FILE="./slurm_reports/job.${JOB}.out"
ERR="./slurm_reports/job.${JOB}.err"
NOW=`date +"%H:%M"`
echo "Non-Synchronous test with "$i" services "$thrd" threads "$QSIZE" Qsize and "$cars" cars dispatched at "$NOW" with jobid "$JOB"" >> ./slurm_reports/"$folder"/sync_data.txt
echo "Non-Synchronous test with "$i" services "$thrd" omp threads "$node" nodes and "$cars" cars dispatched at "$NOW" with jobid "$JOB"" >> ./slurm_reports/"$folder"/non-sync_data.txt
#Wait until job is finished
while squeue | grep "$JOB" > /dev/null 2>&1; do
#while ! test -f "$FILE"; do
#Slurm is having an issue where the prog finishes but doesn't leave slurm queue
#Check if program finished by checking the output file, delay by 5 seconds for safety, then cancel the job
if test -f "$FILE" && (cat "$FILE" | grep "total run time" > /dev/null 2>&1) ; then
#if test -f "$FILE"; then
for j in {1..5}
@ -194,34 +204,39 @@ do
sleep 1
done
SKIP_FLAG=1
echo "Job done, but slurm hung." >> ./slurm_reports/"$folder"/sync_data.txt
echo "Job done, but slurm hung." >> ./slurm_reports/"$folder"/non-sync_data.txt
tmp=`scancel "$JOB"`
sleep 3
break
fi
#while ! test -f "$FILE"; do
sleep 1
#done
done
#Wait for the delay between job finish and file write
while ! test -f "$FILE"; do
sleep 1
done
#Get runtime and states from the output
RUNTIME=`cat "$FILE" | grep -oP '(?<=total run time is )[^ ]*'`
STATES=`cat "$FILE" | grep -oP '(?<=Total States: )[^ ]*'`
if ! test -s "$ERR" || test "$SKIP_FLAG" -eq 1 ; then
printf "${i} Services Non-Synchronized: ${STATES} states and ${RUNTIME} runtime \n\n" >> ./slurm_reports/"$folder"/sync_data.txt
printf "${i} Services Non-Synchronized: ${STATES} states and ${RUNTIME} runtime \n\n" >> ./slurm_reports/"$folder"/non-sync_data.txt
str="$i","$STATES","$RUNTIME"
echo $str >> ./slurm_reports/"$folder"/sync_data.csv
echo $str >> ./slurm_reports/"$folder"/non-sync_data.csv
else
printf "Errors occurred. Please see err file for more details. \n\n" >> ./slurm_reports/"$folder"/sync_data.txt
printf "Errors occurred. Please see err file for more details. \n\n" >> ./slurm_reports/"$folder"/non-sync_data.txt
fi
done
SKIP_FLAG=0
done
done
printf "\n\n" >> ./slurm_reports/"$folder"/non-sync_data.txt
done
done
fi