This file is indexed.

/usr/share/arc/submit-SLURM-job is in nordugrid-arc-arex 5.3.0~rc1-1.

This file is owned by root:root, with mode 0o755.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
#!/bin/sh
# set -x
#
#  Based on globus submission script for pbs
#
#  Submits job to SLURM.
#  Input: path to grami file (same as Globus).
#
# The temporary job script is created for the submission and then removed 
# at the end of this script. 

echo "----- starting submit_slurm_job -----" 1>&2
joboption_lrms=SLURM

# ARC1 passes first the config file.
if [ "$1" = "--config" ]; then shift; ARC_CONFIG=$1; shift; fi

basedir=`dirname $0`
basedir=`cd $basedir > /dev/null && pwd` || exit $?

pkgdatadir="$basedir"

. ${pkgdatadir}/configure-SLURM-env.sh || exit $?
. ${pkgdatadir}/submit_common.sh || exit $?

perflogfilesub="${perflogdir}/submission.perflog"


if [ ! -z "$perflogdir" ]; then
   start_ts=`date +%s.%N`
fi

##############################################################
# Parse grami file, read arc config
##############################################################

init $1

read_arc_conf

failures_file="$joboption_controldir/job.$joboption_gridid.failed"

if [ -z "${RUNTIME_NODE_SEES_FRONTEND}" ] ; then
  if [ -z "${RUNTIME_LOCAL_SCRATCH_DIR}" ] ; then
    echo "Need to know at which directory to run job: RUNTIME_LOCAL_SCRATCH_DIR must be set if RUNTIME_NODE_SEES_FRONTEND is empty" 1>&2
    echo "Submission: Configuration error.">>"$failures_file"
    exit 1
  fi
fi

##############################################################
# Zero stage of runtime environments
##############################################################
RTE_stage0

##############################################################
# create job script
##############################################################
mktempscript

is_cluster=true
##############################################################
# Start job script
##############################################################
echo "#!/bin/bash -l" > $LRMS_JOB_SCRIPT
echo "# SLURM batch job script built by grid-manager" >> $LRMS_JOB_SCRIPT

# rerun is handled by GM, do not let SLURM requeue jobs itself.
echo "#SBATCH --no-requeue" >> $LRMS_JOB_SCRIPT

# write SLURM output to 'comment' file
echo "#SBATCH -e ${joboption_directory}.comment">> $LRMS_JOB_SCRIPT
echo "#SBATCH -o ${joboption_directory}.comment">> $LRMS_JOB_SCRIPT
echo "" >> $LRMS_JOB_SCRIPT
# choose queue
if [ ! -z "${joboption_queue}" ] ; then
  echo "#SBATCH -p $joboption_queue" >> $LRMS_JOB_SCRIPT
fi

##############################################################
# priority
##############################################################
if [ ! -z "$joboption_priority" ]; then
  #Slurm priority is -10000 to 10000. Lower is better.
  #Default is 0, and only superusers can assign priorities 
  #less than 0.
  #We set the priority as 100 - arc priority.
  #This will have the desired effect for all grid jobs
  #Local jobs will unfortunatly have a default priority equal 
  #to ARC priority 100, but there is no way around that.
  priority=$((100-joboption_priority))
  echo "#SBATCH --nice=${priority}" >> $LRMS_JOB_SCRIPT
else
  #If priority is not set we should set it to 
  #50 to match the default in the documentation
  priority=50
  echo "#SBATCH --nice=${priority}" >> $LRMS_JOB_SCRIPT
fi

# project name for accounting
if [ ! -z "${joboption_rsl_project}" ] ; then
  echo "#SBATCH -U $joboption_rsl_project" >> $LRMS_JOB_SCRIPT
fi
# job name for convenience
if [ ! -z "${joboption_jobname}" ] ; then
    #TODO is this necessary? do parts of the infosys need these limitations?
  jobname=`echo "$joboption_jobname" | \
           sed 's/^\([^[:alpha:]]\)/N\1/' | \
           sed 's/[^[:alnum:]]/_/g' | \
	   sed 's/\(...............\).*/\1/'`
  echo "#SBATCH -J '$jobname'" >> $LRMS_JOB_SCRIPT
else
    jobname="gridjob"
    echo "#SBATCH -J '$jobname'" >> $LRMS_JOB_SCRIPT
fi
echo "SLURM jobname: $jobname" 1>&2
# Set up the user's environment on the compute node where the script
# is executed.
echo "#SBATCH --get-user-env=10L" >> $LRMS_JOB_SCRIPT

##############################################################
# (non-)parallel jobs
##############################################################

set_count

nodes_string="#SBATCH -n ${joboption_count}"
echo "$nodes_string" >> $LRMS_JOB_SCRIPT

if [ ! -z $joboption_countpernode ] && [ $joboption_countpernode -gt 0 ] ; then
  echo "#SBATCH --ntasks-per-node $joboption_countpernode" >> $LRMS_JOB_SCRIPT
fi


nodes_string="#SBATCH "
i=0
eval "var_is_set=\${joboption_nodeproperty_$i+yes}"
while [ ! -z "${var_is_set}" ] ; do
  eval "var_value=\${joboption_nodeproperty_$i}"
  nodes_string="${nodes_string} ${var_value}"
  i=$(( $i + 1 ))
  eval "var_is_set=\${joboption_nodeproperty_$i+yes}"
done
echo "$nodes_string" >> $LRMS_JOB_SCRIPT


if [ "$joboption_exclusivenode" = "true" ]; then
  echo "#SBATCH --exclusive" >> $LRMS_JOB_SCRIPT
fi


##############################################################
# Execution times (minutes)
##############################################################
if [ ! -z "$joboption_cputime" ] ; then
  if [ $joboption_cputime -lt 0 ] ; then
    echo 'WARNING: Less than 0 cpu time requested: $joboption_cputime' 1>&2
    joboption_cputime=0
    echo 'WARNING: cpu time set to 0' 1>&2
  fi
  # this is actually walltime deduced from cputime !
  maxcputime=$(( $joboption_cputime / $joboption_count ))
  cputime_min=$(( $maxcputime / 60 ))
  cputime_sec=$(( $maxcputime - $cputime_min * 60 ))
  echo "#SBATCH -t ${cputime_min}:${cputime_sec}" >> $LRMS_JOB_SCRIPT
fi  
  
if [ -z "$joboption_walltime" ] ; then
  if [ ! -z "$joboption_cputime" ] ; then
    # Set walltime for backward compatibility or incomplete requests
    joboption_walltime=$(( $maxcputime * $walltime_ratio ))
  fi
fi

if [ ! -z "$joboption_walltime" ] ; then
  if [ $joboption_walltime -lt 0 ] ; then
    echo 'WARNING: Less than 0 wall time requested: $joboption_walltime' 1>&2
    joboption_walltime=0
    echo 'WARNING: wall time set to 0' 1>&2
  fi
  maxwalltime="$joboption_walltime"
  walltime_min=$(( $maxwalltime / 60 ))
  walltime_sec=$(( $maxwalltime - $walltime_min * 60 ))
  echo "#SBATCH -t ${walltime_min}:${walltime_sec}" >> $LRMS_JOB_SCRIPT
fi

##############################################################
# Requested memory (mb)
##############################################################

set_req_mem

if [ ! -z "$joboption_memory" ] ; then
  echo "#SBATCH --mem-per-cpu=${joboption_memory}" >> $LRMS_JOB_SCRIPT
fi

echo "" >> $LRMS_JOB_SCRIPT
echo "# Overide umask of execution node (sometime values are really strange)" >> $LRMS_JOB_SCRIPT
echo "umask 077" >> $LRMS_JOB_SCRIPT
echo " " >> $LRMS_JOB_SCRIPT

sourcewithargs_jobscript

##############################################################
# Add environment variables
##############################################################
add_user_env

##############################################################
# Check for existance of executable,
# there is no sense to check for executable if files are 
# downloaded directly to computing node
##############################################################
if [ -z "${joboption_arg_0}" ] ; then
  echo 'Executable is not specified' 1>&2
  rm -f "$LRMS_JOB_SCRIPT" "$LRMS_JOB_OUT" "$LRMS_JOB_ERR"
  echo "Submission: Job description error.">>"$failures_file"
  exit 1
fi

#######################################################################
# copy information useful for transfering files to/from node directly
#######################################################################
if [ "$joboption_localtransfer" = 'yes' ] ; then
  setup_local_transfer
fi

######################################################################
# Adjust working directory for tweaky nodes
# RUNTIME_GRIDAREA_DIR should be defined by external means on nodes
######################################################################
if [ ! -z "${RUNTIME_NODE_SEES_FRONTEND}" ] ; then
  setup_runtime_env
else
  echo "RUNTIME_JOB_DIR=$RUNTIME_LOCAL_SCRATCH_DIR/$joboption_gridid" >> $LRMS_JOB_SCRIPT
  echo "RUNTIME_JOB_DIAG=$RUNTIME_LOCAL_SCRATCH_DIR/${joboption_gridid}.diag" >> $LRMS_JOB_SCRIPT
  RUNTIME_STDIN_REL=`echo "${joboption_stdin}" | sed "s#^${joboption_directory}/*##"`
  RUNTIME_STDOUT_REL=`echo "${joboption_stdout}" | sed "s#^${joboption_directory}/*##"`
  RUNTIME_STDERR_REL=`echo "${joboption_stderr}" | sed "s#^${joboption_directory}/*##"`
  if [ "$RUNTIME_STDIN_REL" = "${joboption_stdin}" ] ; then
    echo "RUNTIME_JOB_STDIN=\"${joboption_stdin}\"" >> $LRMS_JOB_SCRIPT
  else
    echo "RUNTIME_JOB_STDIN=\"$RUNTIME_LOCAL_SCRATCH_DIR/$joboption_gridid/$RUNTIME_STDIN_REL\"" >> $LRMS_JOB_SCRIPT
  fi
  if [ "$RUNTIME_STDOUT_REL" = "${joboption_stdout}" ] ; then
    echo "RUNTIME_JOB_STDOUT=\"${joboption_stdout}\"" >> $LRMS_JOB_SCRIPT
  else
    echo "RUNTIME_JOB_STDOUT=\"$RUNTIME_LOCAL_SCRATCH_DIR/$joboption_gridid/$RUNTIME_STDOUT_REL\"" >> $LRMS_JOB_SCRIPT
  fi
  if [ "$RUNTIME_STDERR_REL" = "${joboption_stderr}" ] ; then
    echo "RUNTIME_JOB_STDERR=\"${joboption_stderr}\"" >> $LRMS_JOB_SCRIPT
  else
    echo "RUNTIME_JOB_STDERR=\"$RUNTIME_LOCAL_SCRATCH_DIR/$joboption_gridid/$RUNTIME_STDERR_REL\"" >> $LRMS_JOB_SCRIPT
  fi
fi

##############################################################
# Add std... to job arguments
##############################################################
include_std_streams

##############################################################
#  Move files to local working directory (job is done on node only)
#  RUNTIME_JOB_DIR -> RUNTIME_LOCAL_SCRATCH_DIR/job_id
##############################################################
move_files_to_node

echo "" >> $LRMS_JOB_SCRIPT
echo "RESULT=0" >> $LRMS_JOB_SCRIPT
echo "" >> $LRMS_JOB_SCRIPT


#####################################################
#  Download input files
####################################################
download_input_files

##############################################################
#  Skip execution if something already failed
##############################################################
echo "if [ \"\$RESULT\" = '0' ] ; then" >> $LRMS_JOB_SCRIPT

##############################################################
#  Runtime configuration at computing node
##############################################################
RTE_stage1

##############################################################
#  Diagnostics
##############################################################
echo "echo \"runtimeenvironments=\$runtimeenvironments\" >> \"\$RUNTIME_JOB_DIAG\"" >> $LRMS_JOB_SCRIPT
cat >> $LRMS_JOB_SCRIPT <<'EOSCR'
if [ ! "X$SLURM_NODEFILE" = 'X' ] ; then
  if [ -r "$SLURM_NODEFILE" ] ; then
    cat "$SLURM_NODEFILE" | sed 's/\(.*\)/nodename=\1/' >> "$RUNTIME_JOB_DIAG"
    NODENAME_WRITTEN="1"
  else
    SLURM_NODEFILE=
  fi
fi
EOSCR

##############################################################
#  Check intermediate result again
##############################################################
echo "if [ \"\$RESULT\" = '0' ] ; then" >> $LRMS_JOB_SCRIPT

##############################################################
#  Execution
##############################################################
cd_and_run

##############################################################
#  End of RESULT checks
##############################################################
echo "fi" >> $LRMS_JOB_SCRIPT
echo "fi" >> $LRMS_JOB_SCRIPT

##############################################################
#  Runtime (post)configuration at computing node
##############################################################
configure_runtime

#####################################################
#  Upload output files
####################################################
if [ "$joboption_localtransfer" = 'yes' ] ; then
  upload_output_files
else
# There is no sense to keep trash till GM runs uploader
  echo 'if [ ! -z  "$RUNTIME_LOCAL_SCRATCH_DIR" ] ; then' >> $LRMS_JOB_SCRIPT
# Delete all files except listed in job.#.output
  echo '  find ./ -type l -exec rm -f "{}" ";"' >> $LRMS_JOB_SCRIPT
  echo '  find ./ -type f -exec chmod u+w "{}" ";"' >> $LRMS_JOB_SCRIPT

  if [ -f "$joboption_controldir/job.$joboption_gridid.output" ] ; then
    cat "$joboption_controldir/job.$joboption_gridid.output" | \
    # remove leading backslashes, if any
    sed 's/^\/*//' | \
    # backslashes and spaces are escaped with a backslash in job.*.output. The
    # shell built-in read undoes this escaping.
    while read name rest; do

      # make it safe for shell by replacing single quotes with '\''
      name=`printf "%s" "$name"|sed "s/'/'\\\\\\''/g"`;

      # protect from deleting output files including those in the dynamic list
      if [ "${name#@}" != "$name" ]; then     # Does $name start with a @ ?

        dynlist=${name#@}
        echo "  dynlist='$dynlist'" >> $LRMS_JOB_SCRIPT
        cat >> $LRMS_JOB_SCRIPT <<'EOSCR'
  chmod -R u-w "./$dynlist" 2>/dev/null
  cat "./$dynlist" | while read name rest; do
    chmod -R u-w "./$name" 2>/dev/null
  done
EOSCR
      else

        echo "  chmod -R u-w \"\$RUNTIME_JOB_DIR\"/'$name' 2>/dev/null" >> $LRMS_JOB_SCRIPT
      fi
    done
  fi

  echo '  find ./ -type f -perm /200 -exec rm -f "{}" ";"' >> $LRMS_JOB_SCRIPT
  echo '  find ./ -type f -exec chmod u+w "{}" ";"' >> $LRMS_JOB_SCRIPT
  echo 'fi' >> $LRMS_JOB_SCRIPT
fi
echo "" >> $LRMS_JOB_SCRIPT

##############################################################
#  Move files back to session directory (job is done on node only)
#  RUNTIME_JOB_DIR -> RUNTIME_LOCAL_SCRATCH_DIR/job_id
# !!!!!!!!!!!!!!!!!!! would be better to know the names of files !!!!!!!!!!!
##############################################################
move_files_to_frontend

#######################################
#  Submit the job
#######################################
echo "SLURM job script built" 1>&2
# Execute sbatch command
cd "$joboption_directory"
echo "SLURM script follows:" 1>&2
echo "-------------------------------------------------------------------" 1>&2
cat "$LRMS_JOB_SCRIPT" 1>&2
echo "-------------------------------------------------------------------" 1>&2
echo "" 1>&2
SLURM_RESULT=1
SLURM_TRIES=0

#job creation finished
if [ ! -z "$perflogdir" ]; then
   stop_ts=`date +%s.%N`
   t=`awk "BEGIN { printf \"%.3f\", ${stop_ts}-${start_ts} }"`
   echo "[`date +%Y-%m-%d\ %T`] submit-slurm-job, JobScriptCreation: $t" >> $perflogfilesub
fi

while [ "$SLURM_TRIES" -lt '10' ] ; do

    # Unset all environment variables before calling sbatch. Otherwise
    # SLURM will forward them to the job and leak information about
    # the grid-manager.
    # Only unset lines with assignments.
    # Risks unsetting variables in sub assignments, but this is likely harmless.
    # TODO: Maybe we only should unset $ARC_*, $CONFIG_*, $GLOBUS_* etc?
  if [ ! -z "$perflogdir" ]; then
     start_ts=`date +%s.%N`
  fi
  (for i in $(env|grep '^[A-Za-z][A-Za-z0-9]*='|grep -v "LRMS_JOB_SCRIPT"|cut -d= -f1);do unset $i;done; \
       ${sbatch} $LRMS_JOB_SCRIPT) 1>$LRMS_JOB_OUT 2>$LRMS_JOB_ERR
  SLURM_RESULT="$?"
  if [ ! -z "$perflogdir" ]; then
    stop_ts=`date +%s.%N`
    t=`awk "BEGIN { printf \"%.3f\", ${stop_ts}-${start_ts} }"`
    echo "[`date +%Y-%m-%d\ %T`] submit-slurm-job, JobSubmission: $t" >> $perflogfilesub
  fi
  if [ "$SLURM_RESULT" -eq '0' ] ; then break ; fi 
  if [ "$SLURM_RESULT" -eq '198' ] ; then 
    echo "Waiting for queue to decrease" 1>&2
    sleep 60
    SLURM_TRIES=0
    continue
  fi
  grep 'maximum number of jobs' "$LRMS_JOB_OUT" "$LRMS_JOB_ERR"
  if [ $? -eq '0' ] ; then 
    echo "Waiting for queue to decrease" 1>&2
    sleep 60
    SLURM_TRIES=0
    continue
  fi
  # A rare SLURM error, but may cause chaos in the information/accounting system
  grep 'unable to accept job' "$LRMS_JOB_OUT" "$LRMS_JOB_ERR"
  if [ $? -eq '0' ] ; then 
    echo "Waiting for queue to decrease" 1>&2
    sleep 60
    SLURM_TRIES=0
    continue
  fi 
  SLURM_TRIES=$(( $SLURM_TRIES + 1 ))
  sleep 2
done
if [ $SLURM_RESULT -eq '0' ] ; then

#TODO test what happens when the jobqueue is full or when the slurm ctld is not responding
   # SLURM 1.x and 2.2.x outputs the jobid into STDERR and STDOUT respectively. Concat them,
   # and let sed sort it out. From the exit code we know that the job was submitted, so this
   # is safe. Ulf Tigerstedt <tigerste@csc.fi> 1.5.2011 
   # This is unfortunately not safe. Cray's SLURM sbatch returns 0, when it fails to submit a job. <soettrup@nbi.dk> 22.1.2015

   job_id=`cat $LRMS_JOB_OUT $LRMS_JOB_ERR |sed -e 's/^\(sbatch: \)\{0,1\}Submitted batch job \([0-9]*\)$/\2/'`
   if expr match "${job_id}" '[0-9][0-9]*' >/dev/null; then
      echo "joboption_jobid=$job_id" >> $arg_file
      echo "job submitted successfully!" 1>&2
      echo "local job id: $job_id" 1>&2
      # Remove temporary job script file
      rm -f $LRMS_JOB_SCRIPT $LRMS_JOB_OUT $LRMS_JOB_ERR
      echo "----- exiting submit_slurm_job -----" 1>&2
      echo "" 1>&2
      exit 0
   else
      echo "job *NOT* submitted successfully!" 1>&2
      echo "failed getting the slurm jobid for the job!" 1>&2
      echo "Instead got: ${job_id}" 1>&2
      echo "Submission: Local submission client behaved unexpectedly.">>"$failures_file"
   fi
else
  echo "job *NOT* submitted successfully!" 1>&2
  echo "got error code from sbatch: $SLURM_RESULT !" 1>&2
  echo "Submission: Local submission client failed.">>"$failures_file"
fi
echo "Output is:" 1>&2
cat $LRMS_JOB_OUT 1>&2
echo "Error output is:"
cat $LRMS_JOB_ERR 1>&2
rm -f "$LRMS_JOB_SCRIPT" "$LRMS_JOB_OUT" "$LRMS_JOB_ERR"
echo "----- exiting submit_slurm_job -----" 1>&2
echo "" 1>&2
exit 1