4 # This job assumes it's being passed:
9 if [ "x$ART_SAM_DEBUG" = "xtrue" ]
35 self_destruct_timeout=""
41 datadir=$TMPDIR/ifdh_$$
45 # parse options we know, collect rest in $args
50 $0 [Options] [cmd_options]
52 find ifdh_art and dependencies in CVMFS or in /nusoft/app/externals,
53 register a consumer process, and run an ART executable,
54 fetching input from a SAM Project specified by $SAM_PROJECT
60 Print this message and exit
64 Set qualifiers and version of ifdh_art to setup if
65 it isn't setup by any --source parameters.
69 executable (defaults to experiment name) and config file to
70 run as executable -c config [cmd_options]
73 specify destination path or url for copying back output
74 default is to not copy back files
77 copy out put to a hashed directory structure. This is used
78 for production copy backs to prevent to many files in one fts
84 call "ifdh rename" to rename output files
85 ...possibly 2 or three times
88 get the config file as an input file from SAM
89 (i.e. for MonteCarlo simulation)
90 conflicts with --conf.
93 Fetches multiple files per job and runs the executable
95 conflicts with --getconfig
98 for --getconfig, prepend this file to the fetched
99 config file before running the executable
102 Run the executable under the debugger, and print a
103 stack trace if it dies
106 Pass a number of files limit to establishProcess.
109 Copy this extra input file into the job area before
110 running the executable
113 call "ifdh addOutputFile" with files that match this
114 glob pattern (i.e. --addoutput *out.root)
117 export the following VAR=value before running the
120 --self-destruct-timer seconds
121 suicide if the executable runs more than seconds seconds;
122 usually only use this if you have jobs that hang and you
125 --source file:arg:arg:...
126 --prescript file:arg:arg:...
127 --postscript file:arg:arg:...
128 source/execute the file before/after the main executable
135 echo "debug: \$1 '$1' \$2 '$2'"
137 x-h|x--help) usage; exit 0;;
138 x-q|x--quals) quals="$2"; shift; shift; continue;;
139 x-c|x--config) conf="$2"; shift; shift; continue;;
140 x-D|x--dest) dest="$2"; shift; shift; continue;;
141 x-H|x--hash) hash=true; shift; continue;;
142 x--prename) prename=true; shift; continue;;
143 x-R|x--rename) renam="$2"; shift; shift; continue;;
144 x--rename2) renam2="$2"; shift; shift; continue;;
145 x--rename3) renam3="$2"; shift; shift; continue;;
146 x-X|x--exe) cmd="$2"; shift; shift; continue;;
147 x-v|x--vers) vers="$2"; shift; shift; continue;;
148 x-g|x--getconfig)getconfig=true; shift; continue;;
149 x--multifile) multifile=true; shift; continue;;
150 x-G|x--with-gdb)use_gdb=true; shift; continue;;
151 x-L|x--limit) limit="$2"; shift; shift; continue;;
152 x--inputfile) input_files="$input_files $2"; shift; shift; continue;;
153 x--addoutput) addoutput="$2"; shift; shift; continue;;
154 x--confbase) confbase="$2"; shift; shift; continue;;
155 x--export) exports="$exports \"$2\"" shift; shift; continue;;
156 x--source) sources="$sources \"$2\"" shift; shift; continue;;
157 x--self-destruct-timer) self_destruct_timeout=$2; shift; shift; continue;;
158 x--prescript) prescripts="$prescripts \"$2\"":; shift; shift; continue;;
159 x--postscript) postscripts="$postscripts \"$2\"":; shift; shift; continue;;
160 *) args="$args \"$1\""; shift; continue;;
168 # use our slf6 stuff for systems with 3.x kernels (i.e. MTW2)
171 3.*) export UPS_OVERRIDE="-H Linux64bit+2.6-2.12";;
172 4.*) export UPS_OVERRIDE="-H Linux64bit+2.6-2.12";;
175 for path in /cvmfs/${EXPERIMENT}.opensciencegrid.org/products /cvmfs/${EXPERIMENT}.opensciencegrid.org/externals /cvmfs/oasis.opensciencegrid.org/${EXPERIMENT}/externals /cvmfs/${EXPERIMENT}cfs.fnal.gov/externals /nusoft/app/externals /grid/fermiapp/products/${EXPERIMENT}
177 if [ -r $path/setup ]
186 set : `df -P . | tail -1`
188 if [ $avail_blocks -lt 1024 ]
190 echo "Not enough space (only ${avail_blocks}k) on this node in `pwd`."
197 kill_proc_kids_after_n() {
204 echo "Starting self-destruct timer of $after_secs at $start"
206 while kill -0 $watchpid 2> /dev/null && [ $sofar -lt $after_secs ]
210 sofar=$((now - start))
217 pslist=`ps -ef | grep " $watchpid " | grep -v grep`
218 printf "Timed out after $sofar seconds...\n"
222 while read uid pid ppid rest
224 if [ $ppid = $watchpid ]
226 echo "killing -$signal $uid $pid $ppid $rest"
230 echo "killing -$signal $watchpid"
231 kill -$signal $watchpid
236 if [ x"$self_destruct_timeout" != x ]
238 kill_proc_kids_after_n $$ $self_destruct_timeout &
242 # if we don't have ups or enough space, try again for a bit
248 until find_ups && check_space
253 echo "Timed out waiting for space and/or cvmfs ups area"
259 # not sure we need this
260 if [ "x$IFDH_BASE_URI" = "x" ]
262 export IFDH_BASE_URI=http://samweb.fnal.gov:8480/sam/$EXPERIMENT/api
266 # treat colons as blanks when eval-ing sources below
267 # (need a better char? what about PATH changs?)
268 # because blanks get split by jobsub no matter what you do...
272 echo "doing: export $blat"
278 base=`echo $blat | sed -e 's/:.*//'`
279 blat=`echo $blat | sed -e 's/:/ /g'`
280 [ -x base ] || chmod +x $base
282 echo "doing: source $blat"
286 for blat in $prescripts
288 base=`echo $blat | sed -e 's/:.*//'`
289 blat=`echo $blat | sed -e 's/:/ /g'`
290 [ -x base ] || chmod +x $base
296 eval "confbase=$confbase"
299 # make sure we have ifdh_art
301 if [ x$IFDH_ART_DIR = x ]
303 . `ups setup ifdh_art $vers -q $quals:`
306 # should not need this, but seem to for older releases -- SL5 setup on SL6 bug
307 #PATH=/bin:/usr/bin:`echo $IFDHC_DIR/Linux*/bin`:$PATH
308 PATH=`echo $IFDHC_DIR/Linux*/bin`:$PATH:/bin:/usr/bin
309 LD_LIBRARY_PATH=`echo $IFDHC_DIR/Linux*/lib`:`echo $IFDH_ART_DIR/Linux*/lib`:$LD_LIBRARY_PATH
311 if [ -n "${JOBSUBJOBID}" ]
313 description="${JOBSUBJOBID}"
314 elif [ -n "${CLUSTER}"]
316 description="${CLUSTER}.${PROCESS}"
321 appname=$(basename $cmd)
323 hostname=`hostname --fqdn`
324 projurl=`ifdh findProject $SAM_PROJECT_NAME ${SAM_STATION:-$EXPERIMENT}`
327 consumer_id=`IFDH_DEBUG= ifdh establishProcess "$projurl" "$appname" "$ART_VERSION" "$hostname" "$GRID_USER" "art" "$description" "$limit"`
328 if [ "$consumer_id" = '' ]
330 echo "Unable to establish consumer id!"
331 echo "Unable to establish consumer id!" >&2
335 echo project url: $projurl
336 echo consumer id: $consumer_id
339 # override flags for grid copies..
340 # this should be in ifdh_cp, but until it is...
342 export IFDH_GRIDFTP_EXTRA="-p 0 -dp"
345 # Joe says not to do this...
350 echo "Active ups products:"
353 if [ -n "$input_files" ]
355 ifdh cp -D $input_files .
358 if [ ! -z "${TARFILE}" ] ; then
359 if [ ! -f "${TARFILE}" ] ; then
360 echo "ERROR Tar file ${TARFILE} doesn't exist"
371 echo "Getconfig case:"
376 uri=`IFDH_DEBUG= ifdh getNextFile $projurl $consumer_id | tail -1`
382 fname=`IFDH_DEBUG= ifdh fetchInput "$uri" | tail -1 `
385 echo "Error: unable to fetch input file $uri" >&2
389 if [ x$confbase != x ]
391 cat $confbase $fname > $fname.new
395 echo "config is now:"
396 echo "=============="
398 echo "=============="
400 datestamp=`date +%F-%H-%M-%S`
402 ifdh updateFileStatus $projurl $consumer_id $fname transferred
404 #out=`basename $fname | sed -e "s/.fcl$/$datestamp.root/"`
405 #command="\"${cmd}\" -c \"$conf\" $args -o $out --process-name test"
406 command="\"${cmd}\" -c \"$conf\" $args "
408 echo "Running: $command"
412 ifdh updateFileStatus $projurl $consumer_id $fname consumed
415 ifdh updateFileStatus $projurl $consumer_id $fname skipped
417 #uri=`ifdh getNextFile $projurl $consumer_id`
421 echo "Multi-file case:"
424 echo "--------------------------------------------------------"
428 echo "Python environment variables:"
431 echo "Path to python executable"
433 echo "--------------------------------------------------------"
439 uri=`IFDH_DEBUG= ifdh getNextFile $projurl $consumer_id | tail -1`
444 fname=`IFDH_DEBUG= ifdh fetchInput "$uri" | tail -1 `
445 echo "got file: $fname"
446 ifdh updateFileStatus $projurl $consumer_id $fname transferred
448 command="\"${cmd}\" -c \"$conf\" $args $fname"
450 echo "Running: $command"
453 ifdh updateFileStatus $projurl $consumer_id $fname consumed
456 ifdh updateFileStatus $projurl $consumer_id $fname skipped
460 echo "Not Getconfig case:"
469 cp $conf ${TMPDIR:=/var/tmp}/conf.$$
472 services.user.IFDH: {}
473 services.user.IFDH.debug: "1"
474 services.user.CatalogInterface.service_provider: "IFCatalogInterface"
475 services.user.CatalogInterface.webURI: "$projurl"
476 services.user.FileTransfer.service_provider: "IFFileTransfer"
477 source.fileNames: [ "$consumer_id" ]
481 args="$args \"--sam-web-uri=$projurl\" \"--sam-process-id=$consumer_id\""
490 command="\"${cmd}\" -c \"$conf\" $args"
494 printf 'run\nwhere\nquit\n' > gdbcmds
495 command="gdb -x gdbcmds --args $command"
498 echo "Running: $command"
509 for blat in $postscripts
511 base=`echo $blat | sed -e 's/:.*//'`
512 blat=`echo $blat | sed -e 's/:/ /g'`
513 [ -x base ] || chmod +x $base
519 if [ "$prename" = "true" -a "$res" = "0" ]
523 newname=`$SRT_PUBLIC_CONTEXT/Metadata/samUtils/get_new_file_name $f`
525 mv $f $fpath/$newname
529 if [ "$hash" = "true" -a "$res" = "0" ]
533 hashdir=`python -c 'import hashlib, sys;print "/".join(hashlib.md5(sys.argv[1]).hexdigest()[:3])' $f`
537 if [ "x$addoutput" != "x" -a "$res" = "0" ]
541 ifdh addOutputFile $f
545 if [ "x$renam" != "x" -a "$res" = "0" ]
547 ifdh renameOutput $renam
550 if [ "x$renam2" != "x" -a "$res" = "0" ]
552 ifdh renameOutput $renam2
555 if [ "x$renam3" != "x" -a "$res" = "0" ]
557 ifdh renameOutput $renam3
561 *.smu.edu) export IFDH_STAGE_VIA='srm://smuosgse.hpc.smu.edu:8443/srm/v2/server?SFN=/data/srm'
562 echo "turning on staging and for SMU..."
566 if [ "x$dest" != "x" -a "$res" = "0" ]
568 # workaround for srmls hangs
569 export SRM_JAVA_OPTIONS=-Xmx2048m
573 ifdh copyBackOutput "$dest/$hashdir/"
579 ifdh setStatus "$projurl" "$consumer_id" completed
581 ifdh setStatus "$projurl" "$consumer_id" bad
584 ifdh endProcess "$projurl" "$consumer_id"
588 # cleanup temporary script dir
591 # clean up usual detritus
592 rm -f *.fcl *.raw *.root t_* stage_*