This file is indexed.

/usr/share/arc/scan-DGBridge-job is in nordugrid-arc-arex 1.1.1-1.

This file is owned by root:root, with mode 0o755.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
#!/bin/bash
#
# Periodically check state of grid jobs in DGBridge, and put mark files
# for finished jobs.
#
# usage: scan_DGBridge_job control_dir ...

# Load arc.conf and set up environment
joboption_lrms=DGBridge

# ARC1 passes the config file first.
if [ "$1" = "--config" ]; then shift; ARC_CONFIG=$1; shift; fi

basedir=`dirname $0`
basedir=`cd $basedir > /dev/null && pwd` || exit $?
basewebdir="/var/www/3GBridge/"

libexecdir="${ARC_LOCATION:-/usr}/lib/arc/"
pkgdatadir="$basedir"

. "${pkgdatadir}/configure-${joboption_lrms}-env.sh" || exit $?

. "${pkgdatadir}/scan_common.sh" || exit $?

# Prevent multiple instances of scan job to run concurrently
lockfile="${TMPDIR:-/tmp}/scan-DGBridge-job.lock"
#Check if lockfile exist, if not, create it.
(set -C; : > "$lockfile") 2> /dev/null
if [ "$?" != "0" ]; then
    if ps -p $(< "$lockfile") 2>/dev/null;then
	echo "lockfile exists and PID $(< $lockfile) is running"
	exit 1
    fi
    echo "old lockfile found, was scan-DGBridge-job killed?"

    # sleep, and if no other have removed and recreated the lockfile we remove it.
    # there are still races possible, but this will have to do
    sleep $((${RANDOM}%30+10))
    if ps -p $(< $lockfile) &>/dev/null;then
        echo "lockfile exists and $(< $lockfile) is running"
	exit 1
    else
	echo "still not running, removing lockfile"
	rm $lockfile
	exit 1
    fi
fi
echo "$$" > "$lockfile"
#If killed, remove lockfile
trap 'rm $lockfile' EXIT KILL TERM
#Default sleep-time is 30 seconds
sleep ${CONFIG_scan_wakeupperiod:-30}


## There is no shared file system possible in the DGBridge, instead we must copy output files from upload to session dir


#Validate control directories supplied on command-line
if [ -z "$1" ] ; then
    echo "no control_dir specified" 1>&2; exit 1
fi
for ctr_dir in "$@"; do
    if [ ! -d "$ctr_dir" ]; then
	echo "called with erronous control dir: $ctr_dir" 1>&2
	exit 1
    fi
done


# List of DGBridge jobids for grid-jobs with state INLRMS
declare -a localids
# Array with basenames of grid-job files in ctrl_dir, indexed by localid
# example /some/path/job.XXXXX /some/other/parh/job.YYYYY
declare -a basenames
declare -a gridids
declare -a endpoints
# Array with states of the jobs in SLURM, indexed by localid
declare -a jobstates
# Array to store localids of jobs that are determined to have finished, which are sent to gm-kick
declare -a kicklist

option_ctrdir=$@

# Find list of grid jobs with status INLRMS, store localid and
# basename for those jobs
#for basename in $(find "$@" -name 'job.*.status' -print0 \
#    | xargs -0 egrep -l "INLRMS|CANCELING" \
#    | sed 's/.status$//')
for basename in $(ls $option_ctrdir/processing|sed -e 's/.status//g' -e "s,^,$option_ctrdir/,")
do
  localid=$(grep ^localid= "${basename}.local" | cut -d= -f2)
  ind=${#localids[@]}
  localids[$ind]=`echo $localid| awk -F '|' '{ print $2; }'`
  endpoints[$ind]=`echo $localid| awk -F '|' '{ print $1; }'`
  temp=${basename##*/}
  gridids[$ind]=`echo $temp|sed 's/^job.//'` 
  basenames[$ind]="$basename"
done

# No need to continue further if no jobs have status INLRMS
if [ "${#localids[@]}" -eq 0 ];then
    exit 0
fi


# Get JobStates from wsclient

#performance: change this to use the file - switch to read multiple jids from stdin
numids=$((${#localids[@]}-1))
for ind in `seq 0 $numids` ; do
   #get endpoint
   wsendpoint=${endpoints[$ind]}
   jobid=${localids[$ind]}
   jobstate=$(wsclient -e "$wsendpoint" -m status -j "$jobid" 2>&1)
   if [[ $? -ne 0 || ! "${jobstate#$jobid }" =~ \
      "Init|Running|Finished|Unknown|Error|TempFailed" ]]; then
      echo "Failed to get job status from web service: $jobstate" 2>&1
      jobstate="$jobid WSError"
   fi
   jobstates[$ind]="${jobstate#$jobid }"
done

function cleanbridge()
{
   #$1 wsendpoint
   wsep=$1
   #3g id
   lid=$2
   #ARC id
   gridid=$3
   # clean local input storage
   echo "cleaning job: $gridid"
   # extract gridid
   # if [ ! "$gridid" = "" ]; then
   #    rm -rf $basewebdir/$gridid/
   # fi
   for ((i=0; i<=$EDGES_3G_RETRIES; i++)); do
      OUTPUT=$(wsclient -e "$wsep" -m delete -j "$lid" 2>&1)
      [ $? -eq 0 ] && break
      (( i < EDGES_3G_RETRIES )) && sleep "$((EDGES_3G_TIMEOUT / EDGES_3G_RETRIES))"
   done &
}

#setup edgi monitoring logs
dato=`date +%Y-%m-%d`
edgilog=$option_ctrdir/3gbridge_logs/$dato
dato=`date +%Y-%m-%d_%H:%M:%S`

# Look at the list of jobstates and determine which jobs that have
# finished. Write job.XXXX.lrms_done according to this
numids=$((${#localids[@]}-1))
for ind in `seq 0 $numids` ; do
    wsendpoint=${endpoints[$ind]}
    #echo "${localids[$ind]}($wsendpoint ; ${gridids[$ind]}): ${jobstates[$ind]}" >> /tmp/chrulle.scan
    
    case "${jobstates[$ind]}" in
  	Init)
            ;;
        Running)
  	#Job is running, nothing to do.
	#performance: delete input files in running state, only possible if remote 3gbridge does not use passthrough of data
	    msg="dt=$dato event=job_status job_id=${gridids[$ind]} status=Running"
            flock -w 2 $edgilog -c "echo $msg >> $edgilog"
            if [ $? == 1 ]; then
              echo "Failed to log monitor data to: $edgilog" 1>&2
            fi
  	    ;;
  	Unknown)
	    #bridge doesn't know job, maybe cancelled
	    echo "-1 Job was cancelled" > "${basenames[$ind]}.lrms_done"
	    kicklist=(${kicklist[@]} $ind)
	    cleanbridge $wsendpoint ${localids[$ind]} ${gridids[$ind]}

	    msg="dt=$dato event=job_status job_id=${gridids[$ind]} status=Failed"
            flock -w 2 $edgilog -c "echo $msg >> $edgilog"
            if [ $? == 1 ]; then
              echo "Failed to log monitor data to: $edgilog" 1>&2
            fi

	    ;;
  	Finished)
            #fetch outputfiles. Maybe this will take too long.
            #first get list

            OUTPUT=$(wsclient -e "$wsendpoint" -m output -j "${localids[$ind]}" 2>&1)
            if [ $? -ne 0 ]; then
               #echo "cannot get ouput fail job" >> /tmp/scanlog.chrulle
                
               echo "-1 Job could not get output" > "${basenames[$ind]}.lrms_done"
               kicklist=(${kicklist[@]} $ind)
               #clean bridge?
   	       cleanbridge $wsendpoint ${localids[$ind]} ${gridids[$ind]}

               msg="dt=$dato event=job_status job_id=${gridids[$ind]} status=Failed"
               flock -w 2 $edgilog -c "echo $msg >> $edgilog"
               if [ $? == 1 ]; then
                 echo "Failed to log monitor data to: $edgilog" 1>&2
               fi

               continue
            fi
            #fetch list using wget? yes
            # parse output
            output=$(echo "$OUTPUT"|grep http|awk '{print $2}')
            for line in $output; do
              # FIX
   	      #echo "fetching $line" >> /tmp/chrulle.scan
              wget -P /var/spool/nordugrid/session/${gridids[$ind]}/ $line
            done
            uid=`id -u`
            gid=`id -g`  
            chown -R $uid:$gid /var/spool/nordugrid/session/${gridids[$ind]}/
	    #clean 3Gbridge
	    cleanbridge $wsendpoint ${localids[$ind]} ${gridids[$ind]}
            
            #trigger done            
	    echo "0 Job Finished" > "${basenames[$ind]}.lrms_done"
            
            #monitor
	    msg="dt=$dato event=job_status job_id=${gridids[$ind]} status=Finished"
            flock -w 2 $edgilog -c "echo $msg >> $edgilog"
            if [ $? == 1 ]; then
              echo "Failed to log monitor data to: $edgilog" 1>&2
            fi
            
  	    ;;
  	Error|TempFailed)
	    #job failed
	    echo "-1 Job Failed" > "${basenames[$ind]}.lrms_done"
	    kicklist=(${kicklist[@]} $ind)
	    #clean
	    cleanbridge $wsendpoint ${localids[$ind]} ${gridids[$ind]}
            #monitor
	    msg="dt=$dato event=job_status job_id=${gridids[$ind]} status=Failed"
            flock -w 2 $edgilog -c "echo $msg >> $edgilog"
            if [ $? == 1 ]; then
              echo "Failed to log monitor data to: $edgilog" 1>&2
            fi


  	    ;;
	WSError)
	    #webservice failed - perhaps have a count and then fail job?
 	    ;;
	*)
	    ;;
    esac
done

# Kick the GM
if [ -n "${kicklist[*]}" ];then
    "${libexecdir}/gm-kick" \
	$(for ind in "${kicklist[@]}";do
	    echo "${basenames[$ind]}.status"
	    done | xargs)
fi

exit 0