From 0e0f53b07d0f02d88d194297b70f41dabc22c829 Mon Sep 17 00:00:00 2001 From: hammer Date: Sat, 29 Jan 2022 15:09:06 -0600 Subject: [PATCH] Script changes --- build/ag_run.sh | 2 +- build/config.ini | 4 ++-- build/run.sh | 2 +- build/slurm_reports/job.29973.err | 13 +++++++++++++ config.ini | 4 ++-- 5 files changed, 19 insertions(+), 6 deletions(-) create mode 100644 build/slurm_reports/job.29973.err diff --git a/build/ag_run.sh b/build/ag_run.sh index dca2c19..7fe6721 100755 --- a/build/ag_run.sh +++ b/build/ag_run.sh @@ -15,7 +15,7 @@ NUM_SERV=${1:-1} TYPE=${2:-sync} CARS=${3:-2} DBNAME=${4:-tmp} -NODES=${5:-2} +NODES=${5:-3} module load graphviz module load gcc diff --git a/build/config.ini b/build/config.ini index 9ff73dd..f77efc8 100755 --- a/build/config.ini +++ b/build/config.ini @@ -1,6 +1,6 @@ [database] name = ag_gen -host = 127.0.0.1 -port = 5432 +host = compute03 +port = 5240 username = ag_gen password = 8PZQc79NUZ3FjqSB diff --git a/build/run.sh b/build/run.sh index eeae461..1e67ae3 100755 --- a/build/run.sh +++ b/build/run.sh @@ -15,7 +15,7 @@ strval2="non-sync" #Hammer if [ "$(dnsdomainname)" = "hammer.esg.utulsa.edu" ]; then #Database stored on node with port - if psql -p 5240 -h login ${DBNAME} -c '\q' 2>&1; then + if psql -p 5240 -h compute03 ${DBNAME} -c '\q' 2>&1; then echo "Database ${DBNAME} exists. Dropping and recreating from dump." psql -p 5240 -h compute03 postgres -c "DROP DATABASE "$DBNAME"" diff --git a/build/slurm_reports/job.29973.err b/build/slurm_reports/job.29973.err new file mode 100644 index 0000000..46ed1d7 --- /dev/null +++ b/build/slurm_reports/job.29973.err @@ -0,0 +1,13 @@ +mpirun: Forwarding signal 18 to job +-------------------------------------------------------------------------- +ORTE has lost communication with a remote daemon. + + HNP daemon : [[44229,0],0] on node compute03 + Remote daemon: [[44229,0],3] on node compute06 + +This is usually due to either a failure of the TCP network +connection to the node, or possibly an internal failure of +the daemon itself. We cannot recover from this failure, and +therefore will terminate the job. +-------------------------------------------------------------------------- +slurmstepd: *** JOB 29973 ON compute03 CANCELLED AT 2022-01-29T14:55:37 *** diff --git a/config.ini b/config.ini index 9ff73dd..f77efc8 100755 --- a/config.ini +++ b/config.ini @@ -1,6 +1,6 @@ [database] name = ag_gen -host = 127.0.0.1 -port = 5432 +host = compute03 +port = 5240 username = ag_gen password = 8PZQc79NUZ3FjqSB