Run script adjustments
This commit is contained in:
parent
34581517c6
commit
4196aa74fa
@ -17,7 +17,8 @@ NUM_SERV=${1:-1}
|
|||||||
TYPE=${2:-sync}
|
TYPE=${2:-sync}
|
||||||
CARS=${3:-2}
|
CARS=${3:-2}
|
||||||
DBNAME=${4:-tmp}
|
DBNAME=${4:-tmp}
|
||||||
NODES=${5:-4}
|
NUM_THREADS=${5:-1}
|
||||||
|
NODES=${6:-4}
|
||||||
|
|
||||||
module load graphviz
|
module load graphviz
|
||||||
module load gcc
|
module load gcc
|
||||||
@ -27,4 +28,4 @@ module load cmake
|
|||||||
module load postgresql
|
module load postgresql
|
||||||
|
|
||||||
#./run.sh 1 sync 2 "$TESTING" 2
|
#./run.sh 1 sync 2 "$TESTING" 2
|
||||||
./run.sh "$NUM_SERV" "$TYPE" "$CARS" "$DBNAME" "$NODES"
|
./run.sh "$NUM_SERV" "$TYPE" "$CARS" "$DBNAME" "$NUM_THREADS" "$NODES"
|
||||||
|
|||||||
@ -5,7 +5,8 @@ NUM_SERV=${1:-1}
|
|||||||
TYPE=${2:-sync}
|
TYPE=${2:-sync}
|
||||||
CARS=${3:-2}
|
CARS=${3:-2}
|
||||||
DBNAME=${4:-tmp}
|
DBNAME=${4:-tmp}
|
||||||
NODES=${5:-2}
|
NUM_THREADS=${5:-1}
|
||||||
|
NODES=${6:-2}
|
||||||
|
|
||||||
ITFC="eth0"
|
ITFC="eth0"
|
||||||
|
|
||||||
@ -54,7 +55,7 @@ if [ "$TYPE" == "$strval1" ]; then
|
|||||||
#Old
|
#Old
|
||||||
#./ag_gen -n ../Feb_2021/generic_timeline_maintenance.nm -x ../Feb_2021/Sync/"$NUM_SERV"_Serv/sync_timeline_maintenance.xp -g DOTFILE.dot -t 1 -q 1 -p -a 0.9
|
#./ag_gen -n ../Feb_2021/generic_timeline_maintenance.nm -x ../Feb_2021/Sync/"$NUM_SERV"_Serv/sync_timeline_maintenance.xp -g DOTFILE.dot -t 1 -q 1 -p -a 0.9
|
||||||
|
|
||||||
mpiexec --mca btl_openib_allow_ib 1 --mca btl openib,self,vader --mca opal_warn_on_missing_libcuda 0 --bind-to numa --map-by numa -np "$NODES" --timeout 129600 ./ag_gen -n ../Oct_2021/nm_files/"$CARS"_car_timeline_maintenance.nm -x ../Oct_2021/Sync/10_Exploits/"$NUM_SERV"_Serv/sync_timeline_maintenance.xp -t 1 -q 1 -p -a 0.6 -z "$DBNAME"
|
mpiexec --mca btl_openib_allow_ib 1 --mca btl openib,self,vader --mca opal_warn_on_missing_libcuda 0 --bind-to numa --map-by numa -np "$NODES" --timeout 129600 ./ag_gen -n ../Oct_2021/nm_files/"$CARS"_car_timeline_maintenance.nm -x ../Oct_2021/Sync/10_Exploits/"$NUM_SERV"_Serv/sync_timeline_maintenance.xp -t "$NUM_THREADS" -q 1 -p -a 0.6 -z "$DBNAME"
|
||||||
|
|
||||||
# 4 Exploit
|
# 4 Exploit
|
||||||
#mpiexec -np "$NODES" --bind-to numa --map-by numa ./ag_gen -n ../Oct_2021/nm_files/"$CARS"_car_timeline_maintenance.nm -x ../Oct_2021/Sync/4_Exploits/"$NUM_SERV"_Serv/sync_timeline_maintenance.xp -t 1 -q 1 -p -a 0.6 -z "$DBNAME"
|
#mpiexec -np "$NODES" --bind-to numa --map-by numa ./ag_gen -n ../Oct_2021/nm_files/"$CARS"_car_timeline_maintenance.nm -x ../Oct_2021/Sync/4_Exploits/"$NUM_SERV"_Serv/sync_timeline_maintenance.xp -t 1 -q 1 -p -a 0.6 -z "$DBNAME"
|
||||||
@ -63,11 +64,11 @@ if [ "$TYPE" == "$strval1" ]; then
|
|||||||
elif [ "$TYPE" == "$strval2" ]; then
|
elif [ "$TYPE" == "$strval2" ]; then
|
||||||
#./ag_gen -n ../Feb_2021/generic_timeline_maintenance.nm -x ../Feb_2021/Non_Sync/"$NUM_SERV"_Serv/generic_timeline_maintenance.xp -g DOTFILE.dot -t 1 -q 1 -p -a 0.9
|
#./ag_gen -n ../Feb_2021/generic_timeline_maintenance.nm -x ../Feb_2021/Non_Sync/"$NUM_SERV"_Serv/generic_timeline_maintenance.xp -g DOTFILE.dot -t 1 -q 1 -p -a 0.9
|
||||||
|
|
||||||
mpiexec --mca btl_openib_allow_ib 1 --mca btl openib,self,vader --mca opal_warn_on_missing_libcuda 0 --bind-to numa --map-by numa -np "$NODES" --timeout 129600 ./ag_gen -n ../Oct_2021/nm_files/"$CARS"_car_timeline_maintenance.nm -x ../Oct_2021/Non_Sync/10_Exploits/"$NUM_SERV"_Serv/generic_timeline_maintenance.xp -t 1 -q 1 -p -a 0.6 -z "$DBNAME"
|
mpiexec --mca btl_openib_allow_ib 1 --mca btl openib,self,vader --mca opal_warn_on_missing_libcuda 0 --bind-to numa --map-by numa -np "$NODES" --timeout 129600 ./ag_gen -n ../Oct_2021/nm_files/"$CARS"_car_timeline_maintenance.nm -x ../Oct_2021/Non_Sync/10_Exploits/"$NUM_SERV"_Serv/generic_timeline_maintenance.xp -t "$NUM_THREADS" -q 1 -p -a 0.6 -z "$DBNAME"
|
||||||
|
|
||||||
else
|
else
|
||||||
echo "Running default."
|
echo "Running default."
|
||||||
mpiexec --mca btl_openib_allow_ib 1 --mca btl openib,self,vader --mca opal_warn_on_missing_libcuda 0 --bind-to numa --map-by numa --timeout 129600 ./ag_gen -n ../Oct_2021/nm_files/1_car_timeline_maintenance.nm -x ../Oct_2021/Sync/10_Exploits/1_Serv/sync_timeline_maintenance.xp -t 1 -q 1 -p -a 0.6 -z "$DBNAME"
|
mpiexec --mca btl_openib_allow_ib 1 --mca btl openib,self,vader --mca opal_warn_on_missing_libcuda 0 --bind-to numa --map-by numa --timeout 129600 ./ag_gen -n ../Oct_2021/nm_files/1_car_timeline_maintenance.nm -x ../Oct_2021/Sync/10_Exploits/1_Serv/sync_timeline_maintenance.xp -t "$NUM_THREADS" -q 1 -p -a 0.6 -z "$DBNAME"
|
||||||
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|||||||
@ -38,6 +38,7 @@ if [[ "$folder" == ompnosync ]]; then
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
if [[ "$SKIPSYNC" == false ]]; then
|
if [[ "$SKIPSYNC" == false ]]; then
|
||||||
|
|
||||||
for thrd in "${thrds[@]}"
|
for thrd in "${thrds[@]}"
|
||||||
do
|
do
|
||||||
if [[ "$folder" == serial ]]; then
|
if [[ "$folder" == serial ]]; then
|
||||||
@ -72,11 +73,13 @@ if [[ "$SKIPSYNC" == false ]]; then
|
|||||||
|
|
||||||
echo "----------------------------------- "$cars" cars -----------------------------------" >> ./slurm_reports/"$folder"/sync_data.txt
|
echo "----------------------------------- "$cars" cars -----------------------------------" >> ./slurm_reports/"$folder"/sync_data.txt
|
||||||
#1..4 (or 1..10)Services
|
#1..4 (or 1..10)Services
|
||||||
for i in {1..4}
|
for i in {1..10}
|
||||||
|
do
|
||||||
|
for node in "${NODES[@]}"
|
||||||
do
|
do
|
||||||
SKIP_FLAG=0
|
SKIP_FLAG=0
|
||||||
#Dispatch the job and get the output ("Submitted batch job <jobid>
|
#Dispatch the job and get the output ("Submitted batch job <jobid>
|
||||||
JOB=`sbatch --nodelist="$nodelist" ag_run.sh "$i" "sync" "$cars" "$DBNAME" "$thrd" "$QSIZE"`
|
JOB=`sbatch --nodelist="$nodelist" ag_run.sh "$i" "sync" "$cars" "$DBNAME" "$thrd" "$node"`
|
||||||
#Pull the jobid
|
#Pull the jobid
|
||||||
JOB=`echo "$JOB" | grep -oP '(?<=job )[^ ]*'`
|
JOB=`echo "$JOB" | grep -oP '(?<=job )[^ ]*'`
|
||||||
|
|
||||||
@ -86,7 +89,7 @@ if [[ "$SKIPSYNC" == false ]]; then
|
|||||||
|
|
||||||
NOW=`date +"%H:%M"`
|
NOW=`date +"%H:%M"`
|
||||||
|
|
||||||
echo "Synchronous test with "$i" services "$thrd" omp threads "$QSIZE" Qsize and "$cars" cars dispatched at "$NOW" with jobid "$JOB"" >> ./slurm_reports/"$folder"/sync_data.txt
|
echo "Synchronous test with "$i" services "$thrd" omp threads "$node" nodes and "$cars" cars dispatched at "$NOW" with jobid "$JOB"" >> ./slurm_reports/"$folder"/sync_data.txt
|
||||||
|
|
||||||
#Wait until job is finished
|
#Wait until job is finished
|
||||||
while squeue | grep "$JOB" > /dev/null 2>&1; do
|
while squeue | grep "$JOB" > /dev/null 2>&1; do
|
||||||
@ -126,6 +129,7 @@ if [[ "$SKIPSYNC" == false ]]; then
|
|||||||
printf "Errors occurred. Please see err file for more details. \n\n" >> ./slurm_reports/"$folder"/sync_data.txt
|
printf "Errors occurred. Please see err file for more details. \n\n" >> ./slurm_reports/"$folder"/sync_data.txt
|
||||||
|
|
||||||
fi
|
fi
|
||||||
|
done
|
||||||
SKIP_FLAG=0
|
SKIP_FLAG=0
|
||||||
done
|
done
|
||||||
done
|
done
|
||||||
@ -135,9 +139,7 @@ if [[ "$SKIPSYNC" == false ]]; then
|
|||||||
done
|
done
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
if [[ "$SKIPSYNC" == false ]]; then
|
||||||
|
|
||||||
#thrds=(4 8 16 32)
|
|
||||||
|
|
||||||
for thrd in "${thrds[@]}"
|
for thrd in "${thrds[@]}"
|
||||||
do
|
do
|
||||||
@ -167,26 +169,34 @@ do
|
|||||||
QSIZE=1
|
QSIZE=1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
#1..6 Cars
|
||||||
for cars in {2..6}
|
for cars in {2..6}
|
||||||
do
|
do
|
||||||
echo "----------------------------------- "$cars" cars -----------------------------------" >> ./slurm_reports/"$folder"/sync_data.txt
|
|
||||||
|
|
||||||
#1..4, 1..10
|
echo "----------------------------------- "$cars" cars -----------------------------------" >> ./slurm_reports/"$folder"/non-sync_data.txt
|
||||||
for i in {1..4}
|
#1..4 (or 1..10)Services
|
||||||
|
for i in {1..10}
|
||||||
|
do
|
||||||
|
for node in "${NODES[@]}"
|
||||||
do
|
do
|
||||||
SKIP_FLAG=0
|
SKIP_FLAG=0
|
||||||
JOB=`sbatch --nodelist="$nodelist" ag_run.sh "$i" "non-sync" "$cars" "$folder" "$thrd" "$QSIZE"`
|
#Dispatch the job and get the output ("Submitted batch job <jobid>
|
||||||
|
JOB=`sbatch --nodelist="$nodelist" ag_run.sh "$i" "non-sync" "$cars" "$DBNAME" "$thrd" "$node"`
|
||||||
|
#Pull the jobid
|
||||||
JOB=`echo "$JOB" | grep -oP '(?<=job )[^ ]*'`
|
JOB=`echo "$JOB" | grep -oP '(?<=job )[^ ]*'`
|
||||||
|
|
||||||
|
#Output will be stored as this format
|
||||||
FILE="./slurm_reports/job.${JOB}.out"
|
FILE="./slurm_reports/job.${JOB}.out"
|
||||||
ERR="./slurm_reports/job.${JOB}.err"
|
ERR="./slurm_reports/job.${JOB}.err"
|
||||||
|
|
||||||
NOW=`date +"%H:%M"`
|
NOW=`date +"%H:%M"`
|
||||||
|
|
||||||
echo "Non-Synchronous test with "$i" services "$thrd" threads "$QSIZE" Qsize and "$cars" cars dispatched at "$NOW" with jobid "$JOB"" >> ./slurm_reports/"$folder"/sync_data.txt
|
echo "Non-Synchronous test with "$i" services "$thrd" omp threads "$node" nodes and "$cars" cars dispatched at "$NOW" with jobid "$JOB"" >> ./slurm_reports/"$folder"/non-sync_data.txt
|
||||||
|
|
||||||
|
#Wait until job is finished
|
||||||
while squeue | grep "$JOB" > /dev/null 2>&1; do
|
while squeue | grep "$JOB" > /dev/null 2>&1; do
|
||||||
#while ! test -f "$FILE"; do
|
#Slurm is having an issue where the prog finishes but doesn't leave slurm queue
|
||||||
|
#Check if program finished by checking the output file, delay by 5 seconds for safety, then cancel the job
|
||||||
if test -f "$FILE" && (cat "$FILE" | grep "total run time" > /dev/null 2>&1) ; then
|
if test -f "$FILE" && (cat "$FILE" | grep "total run time" > /dev/null 2>&1) ; then
|
||||||
#if test -f "$FILE"; then
|
#if test -f "$FILE"; then
|
||||||
for j in {1..5}
|
for j in {1..5}
|
||||||
@ -194,34 +204,39 @@ do
|
|||||||
sleep 1
|
sleep 1
|
||||||
done
|
done
|
||||||
SKIP_FLAG=1
|
SKIP_FLAG=1
|
||||||
echo "Job done, but slurm hung." >> ./slurm_reports/"$folder"/sync_data.txt
|
echo "Job done, but slurm hung." >> ./slurm_reports/"$folder"/non-sync_data.txt
|
||||||
tmp=`scancel "$JOB"`
|
tmp=`scancel "$JOB"`
|
||||||
|
sleep 3
|
||||||
break
|
break
|
||||||
|
|
||||||
fi
|
fi
|
||||||
|
#while ! test -f "$FILE"; do
|
||||||
sleep 1
|
sleep 1
|
||||||
#done
|
#done
|
||||||
done
|
done
|
||||||
|
|
||||||
|
#Wait for the delay between job finish and file write
|
||||||
while ! test -f "$FILE"; do
|
while ! test -f "$FILE"; do
|
||||||
sleep 1
|
sleep 1
|
||||||
done
|
done
|
||||||
|
|
||||||
|
#Get runtime and states from the output
|
||||||
RUNTIME=`cat "$FILE" | grep -oP '(?<=total run time is )[^ ]*'`
|
RUNTIME=`cat "$FILE" | grep -oP '(?<=total run time is )[^ ]*'`
|
||||||
STATES=`cat "$FILE" | grep -oP '(?<=Total States: )[^ ]*'`
|
STATES=`cat "$FILE" | grep -oP '(?<=Total States: )[^ ]*'`
|
||||||
|
|
||||||
if ! test -s "$ERR" || test "$SKIP_FLAG" -eq 1 ; then
|
if ! test -s "$ERR" || test "$SKIP_FLAG" -eq 1 ; then
|
||||||
printf "${i} Services Non-Synchronized: ${STATES} states and ${RUNTIME} runtime \n\n" >> ./slurm_reports/"$folder"/sync_data.txt
|
printf "${i} Services Non-Synchronized: ${STATES} states and ${RUNTIME} runtime \n\n" >> ./slurm_reports/"$folder"/non-sync_data.txt
|
||||||
str="$i","$STATES","$RUNTIME"
|
str="$i","$STATES","$RUNTIME"
|
||||||
echo $str >> ./slurm_reports/"$folder"/sync_data.csv
|
echo $str >> ./slurm_reports/"$folder"/non-sync_data.csv
|
||||||
else
|
else
|
||||||
printf "Errors occurred. Please see err file for more details. \n\n" >> ./slurm_reports/"$folder"/sync_data.txt
|
printf "Errors occurred. Please see err file for more details. \n\n" >> ./slurm_reports/"$folder"/non-sync_data.txt
|
||||||
|
|
||||||
fi
|
fi
|
||||||
|
done
|
||||||
SKIP_FLAG=0
|
SKIP_FLAG=0
|
||||||
done
|
done
|
||||||
done
|
done
|
||||||
|
|
||||||
|
printf "\n\n" >> ./slurm_reports/"$folder"/non-sync_data.txt
|
||||||
done
|
done
|
||||||
done
|
done
|
||||||
|
fi
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user