/usr/share/arc/scan-boinc-job is in nordugrid-arc-arex 5.4.2-1build1.
This file is owned by root:root, with mode 0o755.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 | #!/bin/sh
#
# Periodically monitor for jobs which has finished or failed but not
# reported an exitcode
#
#set -x
id=`id -u`
#debug=:
debug () {
echo -n `date` 1>&2
echo -n ' ' 1>&2
echo $@ 1>&2
}
debug "starting"
debug "options = $@"
# ARC1 passes first the config file.
if [ "$1" = "--config" ]; then shift; ARC_CONFIG=$1; shift; fi
basedir=`dirname $0`
basedir=`cd $basedir > /dev/null && pwd` || exit $?
pkgdatadir="$basedir"
. "${pkgdatadir}/configure-boinc-env.sh" || exit $?
if [ -z "$1" ] ; then echo "Argument missing" 1>&2 ; exit 1 ; fi
# Prints the uid of the owner of the file given as argument
# Perl is used because it's more portable than using the stat command
printuid () {
code='my @s = stat($ARGV[0]); print($s[4] || "")'
/usr/bin/perl -we "$code" "$1"
}
#
# Attempts to switch to uid passed as the first argument and then runs the
# commands passed as the second argument in a shell. The remaining arguments
# are passed as arguments to the shell. No warning is given in case switching
# uid is not possible.
#
do_as_uid () {
test $# -ge 2 || { log "do_as_uid requires 2 arguments"; return 1; }
script='use English;
my ($uid, @args) = @ARGV;
if ( $UID == 0 && $uid ) {
eval { $UID = $uid };
print STDERR "Cannot switch to uid($UID): $@\n" if $UID != $uid;
}
system("/bin/sh","-c",@args);
exit ($?>>8||128+($?&127));
'
/usr/bin/perl -we "$script" "$@"
}
# Append .comment (containing STDOUT & STDERR of the job wrapper) to .errors
save_commentfile () {
uid=$1
commentfile=$2
errorsfile=$3
echo '---------- Output of the job wrapper script -----------' >> $errorsfile
cat $commentfile 2> /dev/null >> $errorsfile
echo '------------------------- End of output -------------------------' >> $errorsfile
#do_as_uid "$uid" "$action"
}
for control_dir in "$@" ; do
if [ ! -d "${control_dir}" ]; then
echo "No control dir $control_dir" 1>&2
continue
fi
# Bash specific, but this script will be rewritten in python soon...
declare -A finished_jobs
appidclause=""
if [ ! -z "$CONFIG_boinc_app_id" ]; then
appidclause="and appid=$CONFIG_boinc_app_id"
fi
finished=$(mysql -h $CONFIG_boinc_db_host -P $CONFIG_boinc_db_port -u $CONFIG_boinc_db_user --password=$CONFIG_boinc_db_pass $CONFIG_boinc_db_name -e "select name from workunit where assimilate_state=2 $appidclause")
for job in `echo $finished`; do
finished_jobs[$job]=1
done
# iterate over all jobs known in the control directory
find "${control_dir}/processing" -name 'job.*.status' \
| xargs egrep -l "INLRMS|CANCELING" \
| sed -e 's/.*job\.//' -e 's/\.status$//' \
| while read job; do
#debug "scanning job = $job"
unset joboption_jobid
unset joboption_directory
# this job was already completed, nothing remains to be done
[ -f "${control_dir}/job.${job}.lrms_done" ] && continue
# a grami file exists for all jobs that GM thinks are running.
# proceed to next job if this file is missing.
if [ ! -f "${control_dir}/job.${job}.grami" ]; then
continue
fi
# extract process IDs of the grami file
[ ! -f "${control_dir}/job.${job}.grami" ] && continue
. "${control_dir}/job.${job}.grami"
# process IDs could not be learned, proceeding to next
[ -z "$joboption_jobid" ] && continue
#debug "local jobid = $joboption_jobid"
# checking if process is still running
if [[ ! ${finished_jobs[$joboption_jobid]} ]]; then
#debug "$joboption_jobid is still running, Continueing to next"
continue
else
debug "$joboption_jobid is finished"
fi
uid=$(printuid "${control_dir}/job.${job}.local")
debug "local user id = $uid"
diagfile=${joboption_directory}.diag
debug "checking $diagfile"
exitcode=$(do_as_uid "$uid" "cat '$diagfile'" | sed -n 's/^exitcode=\([0-9]*\).*/\1/p')
debug "exitcode = [$exitcode] extracted from $diagfile"
exitcode=0
comment=""
if [ -z "$joboption_arg_code" ] ; then joboption_arg_code='0' ; fi
if [ -z "$exitcode" ]; then
echo "Job $job with PID $joboption_jobid died unexpectedly" 1>&2
comment="Job died unexpectedly" 1>&2
exitcode=-1
elif [ "$exitcode" -ne "$joboption_arg_code" ]; then
comment="Job finished with wrong exit code - $exitcode != $joboption_arg_code" 1>&2
fi
debug "got exitcode=$exitcode"
save_commentfile "$uid" "${joboption_directory}.comment" "${control_dir}/job.${job}.errors"
echo "$exitcode $comment" > "${control_dir}/job.${job}.lrms_done"
done
done
debug "done, going to sleep"
sleep 120
exit 0
|