condor_lar.sh
Go to the documentation of this file.
1 #! /bin/bash
2 #------------------------------------------------------------------
3 #
4 # Purpose: A general purpose larsoft batch worker script.
5 #
6 # Adapted from condor_lBdetMC.sh by E. Church.
7 #
8 # Usage:
9 #
10 # condor_lar.sh [options]
11 #
12 # Lar options:
13 #
14 # -c, --config <arg> - Configuration (fcl) file (required).
15 # -s, --source <arg> - Input file (full path).
16 # -S, --source-list <arg> - Input file list (full path, one per line).
17 # -o, --output <arg> - Output file name.
18 # -T, --TFileName <arg> - TFile output file name
19 # -n, --nevts <arg> - Number of events to process.
20 # --nskip <arg> - Number of events to skip.
21 # --nfile <arg> - Number of files to process per worker.
22 # --nfile_skip <arg> - Number of files to skip (use with option -S).
23 # --inputmode <arg> - Input mode ('textfile' or '', default '')
24 # --args <args...> - Arguments for lar command line (place at end).
25 #
26 # Sam and parallel project options.
27 #
28 # --sam_user <arg> - Specify sam user (default $GRID_USER).
29 # --sam_group <arg> - Specify sam group (default --group option).
30 # --sam_station <arg> - Specify sam station (default --group option).
31 # --sam_defname <arg> - Sam dataset definition name.
32 # --sam_project <arg> - Sam project name.
33 # --sam_start - Specify that this worker should be responsible for
34 # starting and stopping the sam project.
35 # --recur - Recursive input dataset (force snapshot).
36 # --sam_schema <arg> - Use this option with argument "root" to stream files using
37 # xrootd. Leave this option out for standard file copy.
38 # --os <arg> - A copy of the os argument passed to jobsub. May be used
39 # to affect definition of UPS_OVERRIDE.
40 # --njobs <arg> - Parallel project with specified number of jobs (default one).
41 # --data_file_type - Specify data file type (default "root," repeatable).
42 #
43 # Mix input options (second input stream).
44 #
45 # --mix_defname <arg> - Specify mix input sam dataset definition.
46 # --mix_project <arg> - Specify mix input sam project.
47 #
48 # Validation options.
49 #
50 # --declare - Do sam declaration.
51 # --validate - Do validation checks.
52 # --copy - Copy output files directly to FTS dropbox instead of
53 # output directory.
54 # --maintain_parentage - Recalculate sam parentage metadata for multistage jobs.
55 # (Use with --validate and --declare).
56 #
57 # Larsoft options.
58 #
59 # --ups <arg> - Comma-separated list of top level run-time ups products.
60 # -r, --release <arg> - Release tag.
61 # -q, -b, --build <arg> - Release build qualifier (default "debug", or "prof").
62 # --localdir <arg> - Larsoft local test release directory (default none).
63 # --localtar <arg> - Tarball of local test release.
64 # --mrb - Ignored (for compatibility).
65 # --srt - Exit with error status (SRT run time no longer supported).
66 #
67 # Other options.
68 #
69 # -h, --help - Print help.
70 # -i, --interactive - For interactive use.
71 # -g, --grid - No effect (allowed for compatibility).
72 # --group <arg> - Group or experiment (required).
73 # --workdir <arg> - No effect (allowed for compatibility).
74 # --outdir <arg> - Output directory (required).
75 # --logdir <arg> - Log directory (required).
76 # --dirsize <n> - Maximum directory size.
77 # --dirlevels <n> - Number of extra directory levels.
78 # --scratch <arg> - Scratch directory (only for interactive).
79 # --cluster <arg> - Job cluster (override $CLUSTER)
80 # --process <arg> - Process within cluster (override $PROCESS).
81 # --procmap <arg> - Name of process map file (override $PROCESS).
82 # --init-script <arg> - User initialization script execute.
83 # --init-source <arg> - User initialization script to source (bash).
84 # --end-script <arg> - User end-of-job script to execute.
85 # --mid-source <arg> - User midstage initialization script to source.
86 # --mid-script <arg> - User midstage finalization script to execute.
87 # --exe <arg> - Specify art-like executable (default "lar").
88 # --init <path> - Absolute path of environment initialization script.
89 #
90 # End options.
91 #
92 # Run time environment setup.
93 #
94 # MRB run-time environmental setup is controlled by four options:
95 # --release (-r), --build (-b, -q), --localdir, and --localtar.
96 #
97 # a) Use option --release or -r to specify version of top-level product(s).
98 # b) Use option --build or -b to specify build full qualifiers (e.g.
99 # "debug:e5" or "e5:prof").
100 # c) Options --localdir or --localtar are used to specify your local
101 # test release. Use one or the other (not both).
102 #
103 # Use --localdir to specify the location of your local install
104 # directory ($MRB_INSTALL).
105 #
106 # Use --localtar to specify thye location of a tarball of your
107 # install directory (made relative to $MRB_INSTALL).
108 #
109 # Note that --localdir is not grid-friendly.
110 #
111 # Notes.
112 #
113 # 1. Each batch worker is uniquely identified by two numbers stored
114 # in environment variables $CLUSTER and $PROCESS (the latter is
115 # a small integer that starts from zero and varies for different
116 # jobs in a parallel job group). These environment variables are
117 # normally set by the batch system, but can be overridden by options
118 # --cluster, --process, and --procmap (e.g. to rerun failed jobs).
119 #
120 # 2. The work directory must be set to an existing directory owned
121 # by the submitter and readable by the batch worker. Files from the
122 # work directory are copied to the batch worker scratch directory at
123 # the start of the job.
124 #
125 # 3. A local test release may be specified as an absolute path using
126 # --localdir, or a tarball using --localtar. The location of the tarball
127 # may be specified as an absolute path visible on the worker, or a
128 # relative path relative to the work directory.
129 #
130 # 4. The output directory must exist and be writable by the batch
131 # worker (i.e. be group-writable for grid jobs). The worker
132 # makes a new subdirectory called ${CLUSTER}_${PROCESS} in the output
133 # directory and copies all files in the batch scratch directory there
134 # at the end of the job. If the output directory is not specified, the
135 # default is /grid/data/<group>/outstage/<user> (user is defined as
136 # owner of work directory).
137 #
138 # 5. Parallel projects are specified whenever --njobs is specified to
139 # be greater than one. Parallel projects are supported for single file,
140 # file list, and sam project input.
141 #
142 # In all cases, each worker processes some number of complete files.
143 # If the number of jobs is greater than the number of input files, some
144 # workers will not have any input files to process.
145 #
146 # In any case, options --nfile and --nevts can be used to limit the
147 # number of files or events that are processed by a single worker,
148 # regardless of the way files are divided among the workers.
149 #
150 # Option --njobs is incompatible with options --nskip, and --nfile_skip.
151 #
152 # a) Non-sam (single file or file list) input.
153 #
154 # In this case, input files are preassigned to workers such that all input
155 # files are approximately evenly divided among the workers. All files
156 # preassigned to this worker are copied to the scratch directory at the
157 # start of the job.
158 #
159 # b) Sam project input.
160 #
161 # In this case, files are assigned to workers in a non-deterministic
162 # manner by the sam system. The sam system fetches input files to the
163 # scratch directory and deletes processed input files during job execution.
164 #
165 #
166 # 6. Using option -n or --nevts to limit number of events processed:
167 #
168 # a) If no input files are specified (e.g. mc generation), --nevts
169 # specifies total number of events among all workers.
170 #
171 # b) If input files are specified, --nevts specifies total number of
172 # events processed by each worker or from each input file, whichever
173 # is less.
174 #
175 # 7. The interactive option (-i or --interactive) allows this script
176 # to be run interactively by overriding some settings that are normally
177 # obtained from the batch system, including $CLUSTER, $PROCESS, and
178 # the scratch directory. Interactive jobs always set PROCESS=0 (unless
179 # overridden by --process).
180 #
181 # 8. Mix options (--mix_defname, --mix_project) are only partially handled
182 # in this script. These options are parsed and their values are stored
183 # in shell variables. It is assumed that the sam project specified
184 # by --mix_project has been started externally, unless --sam_start is
185 # also specified, in which case this script will start the project.
186 # This script does not include any provision for joining the project.
187 # Further processing of these options (joining sam project, generating
188 # command line options or fcl wrappers) should be handled by user
189 # provided initialization scripts (--init-script, --init-source).
190 #
191 # 9. Option --init <path> is optional. If specified, it should point to
192 # the absolute path of the experiment environment initialization script,
193 # which path must be visible from the batch worker (e.g. /cvmfs/...).
194 # If this option is not specified, this script will look for and source
195 # a script with hardwired name "setup_experiment.sh" in directory
196 # ${CONDIR_DIR_INPUT}.
197 #
198 #
199 # Created: H. Greenlee, 29-Aug-2012
200 #
201 #------------------------------------------------------------------
202 
203 # Parse arguments.
204 
205 FCL=""
206 INFILE=""
207 INLIST=""
208 INMODE=""
209 OUTFILE=""
210 TFILE=""
211 NEVT=0
212 NSKIP=0
213 SUBRUN=1
214 NFILE=0
215 NFILE_SKIP=0
216 NJOBS=1
217 ARGS=""
218 UPS_PRDS=""
219 REL=""
220 QUAL=""
221 LOCALDIR=""
222 LOCALTAR=""
223 INTERACTIVE=0
224 GRP=""
225 OUTDIR=""
226 LOGDIR=""
227 DIRSIZE=0
228 DIRLEVELS=0
229 SCRATCH=""
230 CLUS=""
231 PROC=""
232 PROCMAP=""
233 INITSCRIPT=""
234 INITSOURCE=""
235 ENDSCRIPT=""
236 MIDSOURCE=""
237 MIDSCRIPT=""
238 SAM_USER=$GRID_USER
239 SAM_GROUP=""
240 SAM_STATION=""
241 SAM_DEFNAME=""
242 SAM_PROJECT=""
243 SAM_START=0
244 RECUR=0
245 SAM_SCHEMA=""
246 OS=""
247 USE_SAM=0
248 MIX_DEFNAME=""
249 MIX_PROJECT=""
250 MIX_SAM=0
251 IFDH_OPT=""
252 DECLARE_IN_JOB=0
253 VALIDATE_IN_JOB=0
254 COPY_TO_FTS=0
255 MAINTAIN_PARENTAGE=0
256 EXE="lar"
257 INIT=""
258 declare -a DATAFILETYPES
259 
260 while [ $# -gt 0 ]; do
261  case "$1" in
262 
263  # Help.
264  -h|--help )
265  awk '/^# Usage:/,/^# End options/{print $0}' $0 | cut -c3- | head -n -2
266  exit
267  ;;
268 
269  # Config file.
270  -c|--config )
271  if [ $# -gt 1 ]; then
272  FCL=$2
273  shift
274  fi
275  ;;
276 
277  # Input file.
278  -s|--source )
279  if [ $# -gt 1 ]; then
280  INFILE=$2
281  shift
282  fi
283  ;;
284 
285  # Input file list.
286  -S|--source-list )
287  if [ $# -gt 1 ]; then
288  INLIST=$2
289  shift
290  fi
291  ;;
292 
293  # Input file mode.
294  --inputmode )
295  if [ $# -gt 1 ]; then
296  INMODE=$2
297  shift
298  fi
299  ;;
300 
301  # Output file.
302  -o|--output )
303  if [ $# -gt 1 ]; then
304  OUTFILE=$2
305  shift
306  fi
307  ;;
308 
309  # Output TFile.
310  -T|--TFileName )
311  if [ $# -gt 1 ]; then
312  TFILE=$2
313  shift
314  fi
315  ;;
316 
317  # Number of events.
318  -n|--nevts )
319  if [ $# -gt 1 ]; then
320  NEVT=$2
321  shift
322  fi
323  ;;
324 
325  # Number of events to skip.
326  --nskip )
327  if [ $# -gt 1 ]; then
328  NSKIP=$2
329  shift
330  fi
331  ;;
332 
333  # Number of files to process.
334  --nfile )
335  if [ $# -gt 1 ]; then
336  NFILE=$2
337  shift
338  fi
339  ;;
340 
341  # Number of files to skip.
342  --nfile_skip )
343  if [ $# -gt 1 ]; then
344  NFILE_SKIP=$2
345  shift
346  fi
347  ;;
348 
349  # Number of parallel jobs.
350  --njobs )
351  if [ $# -gt 1 ]; then
352  NJOBS=$2
353  shift
354  fi
355  ;;
356 
357  # Specify data file types (repeatable).
358  --data_file_type )
359  if [ $# -gt 1 ]; then
360  ntype=${#DATAFILETYPES[@]}
361  DATAFILETYPES[$ntype]=$2
362  shift
363  fi
364  ;;
365 
366  # Sam user.
367  --sam_user )
368  if [ $# -gt 1 ]; then
369  SAM_USER=$2
370  shift
371  fi
372  ;;
373 
374  # Sam group.
375  --sam_group )
376  if [ $# -gt 1 ]; then
377  SAM_GROUP=$2
378  shift
379  fi
380  ;;
381 
382  # Sam station.
383  --sam_station )
384  if [ $# -gt 1 ]; then
385  SAM_STATION=$2
386  shift
387  fi
388  ;;
389 
390  # Sam dataset definition name.
391  --sam_defname )
392  if [ $# -gt 1 ]; then
393  SAM_DEFNAME=$2
394  USE_SAM=1
395  shift
396  fi
397  ;;
398 
399  # Sam project name.
400  --sam_project )
401  if [ $# -gt 1 ]; then
402  SAM_PROJECT=$2
403  USE_SAM=1
404  shift
405  fi
406  ;;
407 
408  # Sam start/stop project flag.
409  --sam_start )
410  SAM_START=1
411  ;;
412 
413  # Recursive flag.
414  --recur )
415  RECUR=1
416  ;;
417 
418  # Sam schema.
419  --sam_schema )
420  if [ $# -gt 1 ]; then
421  SAM_SCHEMA=$2
422  shift
423  fi
424  ;;
425 
426  # OS.
427  --os )
428  if [ $# -gt 1 ]; then
429  OS=$2
430  shift
431  fi
432  ;;
433 
434  # General arguments for lar command line.
435  --args )
436  if [ $# -gt 1 ]; then
437  shift
438  ARGS=$@
439  break
440  fi
441  ;;
442 
443  # Top level ups products (comma-separated list).
444  --ups )
445  if [ $# -gt 1 ]; then
446  UPS_PRDS=$2
447  shift
448  fi
449  ;;
450 
451  # Release tag.
452  -r|--release )
453  if [ $# -gt 1 ]; then
454  REL=$2
455  shift
456  fi
457  ;;
458 
459  # Release build qualifier.
460  -q|-b|--build )
461  if [ $# -gt 1 ]; then
462  QUAL=$2
463  shift
464  fi
465  ;;
466 
467  # Local test release directory.
468  --localdir )
469  if [ $# -gt 1 ]; then
470  LOCALDIR=$2
471  shift
472  fi
473  ;;
474 
475  # Local test release tarball.
476  --localtar )
477  if [ $# -gt 1 ]; then
478  LOCALTAR=$2
479  shift
480  fi
481  ;;
482 
483  # MRB flag.
484  --mrb )
485  ;;
486 
487  # SRT flag.
488  --srt )
489  echo "SRT run time environment is no longer supported."
490  exit 1
491  ;;
492 
493  # Interactive flag.
494  -i|--interactive )
495  INTERACTIVE=1
496  ;;
497 
498  # Grid flag (no effect).
499  -g|--grid )
500  ;;
501 
502  # Group.
503  --group )
504  if [ $# -gt 1 ]; then
505  GRP=$2
506  shift
507  fi
508  ;;
509 
510  # Work directory.
511  --workdir )
512  if [ $# -gt 1 ]; then
513  shift
514  fi
515  ;;
516 
517  # Output directory.
518  --outdir )
519  if [ $# -gt 1 ]; then
520  OUTDIR=$2
521  shift
522  fi
523  ;;
524 
525  # Log directory.
526  --logdir )
527  if [ $# -gt 1 ]; then
528  LOGDIR=$2
529  shift
530  fi
531  ;;
532 
533  # Maximum directory size.
534  --dirsize )
535  if [ $# -gt 1 ]; then
536  DIRSIZE=$2
537  shift
538  fi
539  ;;
540 
541  # Number of extra directory levels.
542  --dirlevels )
543  if [ $# -gt 1 ]; then
544  DIRLEVELS=$2
545  shift
546  fi
547  ;;
548 
549  # Scratch directory.
550  --scratch )
551  if [ $# -gt 1 ]; then
552  SCRATCH=$2
553  shift
554  fi
555  ;;
556 
557  # Job cluster.
558  --cluster )
559  if [ $# -gt 1 ]; then
560  CLUS=$2
561  shift
562  fi
563  ;;
564 
565  # Process within cluster.
566  --process )
567  if [ $# -gt 1 ]; then
568  PROC=$2
569  shift
570  fi
571  ;;
572 
573  # Process map.
574  --procmap )
575  if [ $# -gt 1 ]; then
576  PROCMAP=$2
577  shift
578  fi
579  ;;
580 
581  # User initialization script.
582  --init-script )
583  if [ $# -gt 1 ]; then
584  INITSCRIPT=$2
585  shift
586  fi
587  ;;
588 
589  # User source initialization script.
590  --init-source )
591  if [ $# -gt 1 ]; then
592  INITSOURCE=$2
593  shift
594  fi
595  ;;
596 
597  # User end-of-job script.
598  --end-script )
599  if [ $# -gt 1 ]; then
600  ENDSCRIPT=$2
601  shift
602  fi
603  ;;
604 
605  # User midstage initialization source script.
606  --mid-source )
607  if [ $# -gt 1 ]; then
608  MIDSOURCE=$2
609  shift
610  fi
611  ;;
612 
613  # User midstage finalization script.
614  --mid-script )
615  if [ $# -gt 1 ]; then
616  MIDSCRIPT=$2
617  shift
618  fi
619  ;;
620 
621  # Declare good output root files to SAM.
622  --declare )
623  DECLARE_IN_JOB=1
624  ;;
625 
626  # Run validation steps in project.py on root outputs directly in the job.
627  --validate )
628  VALIDATE_IN_JOB=1
629  ;;
630 
631  # Copy Output to FTS.
632  --copy )
633  COPY_TO_FTS=1
634  ;;
635 
636  # Mix input sam dataset.
637  --mix_defname )
638  if [ $# -gt 1 ]; then
639  MIX_DEFNAME=$2
640  MIX_SAM=1
641  shift
642  fi
643  ;;
644 
645  # Mix input sam project.
646  --mix_project )
647  if [ $# -gt 1 ]; then
648  MIX_PROJECT=$2
649  MIX_SAM=1
650  shift
651  fi
652  ;;
653 
654  # Alter the output file's parentage such that it's parent(s) are from the input list OR sam process
655  --maintain_parentage )
656  MAINTAIN_PARENTAGE=1
657  ;;
658 
659  # Specify alternate art-like executable.
660  --exe )
661  if [ $# -gt 1 ]; then
662  EXE=$2
663  shift
664  fi
665  ;;
666 
667  # Specify environment initialization script path.
668  --init )
669  if [ $# -gt 1 ]; then
670  INIT=$2
671  shift
672  fi
673  ;;
674 
675  # Other.
676  * )
677  echo "Unknown option $1"
678  exit 1
679  esac
680  shift
681 done
682 
683 #echo "FCL=$FCL"
684 #echo "INFILE=$INFILE"
685 #echo "INLIST=$INLIST"
686 #echo "OUTFILE=$OUTFILE"
687 #echo "TFILE=$TFILE"
688 #echo "NEVT=$NEVT"
689 #echo "NSKIP=$NSKIP"
690 #echo "NFILE=$NFILE"
691 #echo "NFILE_SKIP=$NFILE_SKIP"
692 #echo "NJOBS=$NJOBS"
693 #echo "ARGS=$ARGS"
694 #echo "REL=$REL"
695 #echo "QUAL=$QUAL"
696 #echo "LOCALDIR=$LOCALDIR"
697 #echo "LOCALTAR=$LOCALTAR"
698 #echo "INTERACTIVE=$INTERACTIVE"
699 #echo "GRP=$GRP"
700 #echo "OUTDIR=$OUTDIR"
701 #echo "LOGDIR=$LOGDIR"
702 #echo "SCRATCH=$SCRATCH"
703 #echo "CLUS=$CLUS"
704 #echo "PROC=$PROC"
705 #echo "INITSCRIPT=$INITSCRIPT"
706 #echo "INITSOURCE=$INITSOURCE"
707 #echo "ENDSCRIPT=$ENDSCRIPT"
708 #echo "MIDSOURCE=$MIDSOURCE"
709 #echo "MIDSCRIPT=$MIDSCRIPT"
710 #echo "VALIDATE_IN_JOB=$VALIDATE_IN_JOB"
711 
712 # Set default data file types ("root").
713 
714 if [ ${#DATAFILETYPES[@]} -eq 0 ]; then
715  DATAFILETYPES[0]=root
716 fi
717 
718 # Done with arguments.
719 
720 echo "Nodename: `hostname -f`"
721 id
722 echo "Load average:"
723 cat /proc/loadavg
724 
725 # Set defaults.
726 
727 if [ x$QUAL = x ]; then
728  QUAL="prof:e9"
729 fi
730 
731 if [ x$SAM_GROUP = x ]; then
732  SAM_GROUP=$GRP
733 fi
734 
735 if [ x$SAM_STATION = x ]; then
736  SAM_STATION=$GRP
737 fi
738 
739 # Standardize sam_schema (xrootd -> root, xroot -> root).
740 
741 if [ x$SAM_SCHEMA = xxrootd ]; then
742  SAM_SCHEMA=root
743 fi
744 if [ x$SAM_SCHEMA = xxroot ]; then
745  SAM_SCHEMA=root
746 fi
747 
748 # Fix for sites with newer linux kernels:
749 # Do this only if OS is exclusively requested as SL6.
750 
751 #if [ x$OS = xSL6 ]; then
752 # case `uname -r` in
753 # 3.*) export UPS_OVERRIDE="-H Linux64bit+2.6-2.12";;
754 # 4.*) export UPS_OVERRIDE="-H Linux64bit+2.6-2.12";;
755 # esac
756 #fi
757 echo "uname -r: `uname -r`"
758 echo "UPS_OVERRIDE: $UPS_OVERRIDE"
759 
760 echo "Condor dir input: $CONDOR_DIR_INPUT"
761 
762 # Initialize experiment ups products and mrb.
763 
764 echo "Initializing ups and mrb."
765 
766 if [ x$INIT != x ]; then
767  if [ ! -f $INIT ]; then
768  echo "Environment initialization script $INIT not found."
769  exit 1
770  fi
771  echo "Sourcing $INIT"
772  source $INIT
773 else
774  echo "Sourcing setup_experiment.sh"
775  source ${CONDOR_DIR_INPUT}/setup_experiment.sh
776 fi
777 
778 echo PRODUCTS=$PRODUCTS
779 echo "ups flavor: `ups flavor`"
780 
781 # Set GROUP environment variable.
782 
783 unset GROUP
784 if [ x$GRP != x ]; then
785  GROUP=$GRP
786 else
787  echo "GROUP not specified."
788  exit 1
789 fi
790 export GROUP
791 echo "Group: $GROUP"
792 
793 # Set options for ifdh.
794 
795 echo "X509_USER_PROXY = $X509_USER_PROXY"
796 echo "IFDH_OPT=$IFDH_OPT"
797 
798 # Make sure fcl file argument was specified.
799 
800 if [ x$FCL = x ]; then
801  echo "No configuration option (-c|--config) was specified."
802  exit 1
803 fi
804 
805 # Make sure output directory exists and is writable.
806 
807 if [ x$OUTDIR = x ]; then
808  echo "Output directory not specified."
809  exit 1
810 fi
811 echo "Output directory: $OUTDIR"
812 
813 # Make sure log directory exists and is writable.
814 
815 if [ x$LOGDIR = x ]; then
816  echo "Log directory not specified."
817  exit 1
818 fi
819 echo "Log directory: $LOGDIR"
820 
821 # Make sure scratch directory is defined.
822 # For batch, the scratch directory is always $_CONDOR_SCRATCH_DIR
823 # For interactive, the scratch directory is specified by option
824 # --scratch or --outdir.
825 
826 if [ $INTERACTIVE -eq 0 ]; then
827  SCRATCH=$_CONDOR_SCRATCH_DIR
828 else
829  if [ x$SCRATCH = x ]; then
830  SCRATCH=$OUTDIR
831  fi
832 fi
833 if [ x$SCRATCH = x -o ! -d "$SCRATCH" -o ! -w "$SCRATCH" ]; then
834  echo "Local scratch directory not defined or not writable."
835  exit 1
836 fi
837 
838 # Create the scratch directory in the condor scratch diretory.
839 # Copied from condor_lBdetMC.sh.
840 # Scratch directory path is stored in $TMP.
841 # Scratch directory is automatically deleted when shell exits.
842 
843 # Do not change this section.
844 # It creates a temporary working directory that automatically cleans up all
845 # leftover files at the end.
846 TMP=`mktemp -d ${SCRATCH}/working_dir.XXXXXXXXXX`
847 TMP=${TMP:-${SCRATCH}/working_dir.$$}
848 
849 { [[ -n "$TMP" ]] && mkdir -p "$TMP"; } || \
850  { echo "ERROR: unable to create temporary directory!" 1>&2; exit 1; }
851 trap "[[ -n \"$TMP\" ]] && { rm -rf \"$TMP\"; }" 0
852 chmod 755 $TMP
853 cd $TMP
854 # End of the section you should not change.
855 
856 echo "Scratch directory: $TMP"
857 
858 # Copy files from work directory to scratch directory.
859 
860 echo "No longer fetching files from work directory."
861 echo "that's now done with using jobsub -f commands"
862 mkdir work
863 cp ${CONDOR_DIR_INPUT}/* ./work/
864 cd work
865 find . -name \*.tar -exec tar xf {} \;
866 find . -name \*.py -exec chmod +x {} \;
867 find . -name \*.sh -exec chmod +x {} \;
868 echo "Local working directoroy:"
869 pwd
870 ls
871 echo
872 
873 # Save the hostname and condor job id.
874 
875 hostname > hostname.txt
876 echo ${CLUSTER}.${PROCESS} > jobid.txt
877 
878 # Set default CLUSTER and PROCESS environment variables for interactive jobs.
879 
880 if [ $INTERACTIVE -ne 0 ]; then
881  CLUSTER=`date +%s` # From time stamp.
882  PROCESS=0 # Default zero for interactive.
883 fi
884 
885 # Override CLUSTER and PROCESS from command line options.
886 
887 if [ x$CLUS != x ]; then
888  CLUSTER=$CLUS
889 fi
890 if [ x$PROC != x ]; then
891  PROCESS=$PROC
892 fi
893 if [ x$PROCMAP != x ]; then
894  if [ -f $PROCMAP ]; then
895  PROCESS=`sed -n $(( $PROCESS + 1 ))p $PROCMAP`
896  else
897  echo "Process map file $PROCMAP not found."
898  exit 1
899  fi
900 fi
901 if [ x$CLUSTER = x ]; then
902  echo "CLUSTER not specified."
903  exit 1
904 fi
905 if [ x$PROCESS = x ]; then
906  echo "PROCESS not specified."
907  exit 1
908 fi
909 echo "Procmap: $PROCMAP"
910 echo "Cluster: $CLUSTER"
911 echo "Process: $PROCESS"
912 
913 # Construct name of output subdirectory.
914 
915 parentdir=''
916 ndir=$PROCESS
917 while [ $DIRLEVELS -gt 0 -a $DIRSIZE -gt 0 ]; do
918  parentdir=$(( $ndir % $DIRSIZE ))/$parentdir
919  ndir=$(( $ndir / $DIRSIZE ))
920  DIRLEVELS=$(( $DIRLEVELS - 1 ))
921 done
922 OUTPUT_SUBDIR=${parentdir}${CLUSTER}_${PROCESS}
923 echo "Output subdirectory: $OUTPUT_SUBDIR"
924 
925 # Make sure fcl file exists.
926 
927 if [ ! -f $FCL ]; then
928  echo "Configuration file $FCL does not exist."
929  exit 1
930 fi
931 
932 # Make sure init script exists and is executable (if specified).
933 
934 if [ x$INITSCRIPT != x ]; then
935  if [ -f "$INITSCRIPT" ]; then
936  chmod +x $INITSCRIPT
937  else
938  echo "Initialization script $INITSCRIPT does not exist."
939  exit 1
940  fi
941 fi
942 
943 # Make sure init source script exists (if specified).
944 
945 if [ x$INITSOURCE != x -a ! -f "$INITSOURCE" ]; then
946  echo "Initialization source script $INITSOURCE does not exist."
947  exit 1
948 fi
949 
950 # Make sure end-of-job script exists and is executable (if specified).
951 
952 if [ x$ENDSCRIPT != x ]; then
953  if [ -f "$ENDSCRIPT" ]; then
954  chmod +x $ENDSCRIPT
955  else
956  echo "Finalization script $ENDSCRIPT does not exist."
957  exit 1
958  fi
959 fi
960 
961 # Make sure midstage init source script exists (if specified).
962 
963 if [ x$MIDSOURCE != x -a ! -f "$MIDSOURCE" ]; then
964  echo "Midstage initialization source script $MIDSOURCE does not exist."
965  exit 1
966 fi
967 
968 # Make sure midstage finalization script exists and is executable (if specified).
969 
970 if [ x$MIDSCRIPT != x ]; then
971  if [ -f "$MIDSCRIPT" ]; then
972  chmod +x $MIDSCRIPT
973  else
974  echo "Midstage finalization script $MIDSCRIPT does not exist."
975  exit 1
976  fi
977 fi
978 
979 # MRB run time environment setup goes here.
980 
981 # Setup local test release, if any.
982 
983 if [ x$LOCALDIR != x ]; then
984  mkdir $TMP/local
985  cd $TMP/local
986 
987  # Copy test release directory recursively.
988 
989  echo "Copying local test release from directory ${LOCALDIR}."
990 
991  # Make sure ifdhc is setup.
992 
993  if [ x$IFDHC_DIR = x ]; then
994  echo "Setting up ifdhc before fetching local directory."
995  setup ifdhc
996  fi
997  echo "IFDHC_DIR=$IFDHC_DIR"
998  ifdh cp -r $IFDH_OPT $LOCALDIR .
999  stat=$?
1000  if [ $stat -ne 0 ]; then
1001  echo "ifdh cp failed with status ${stat}."
1002  exit $stat
1003  fi
1004  find . -name \*.py -exec chmod +x {} \;
1005  find . -name \*.sh -exec chmod +x {} \;
1006 
1007  # Setup the environment.
1008 
1009  cd $TMP/work
1010  echo "Initializing localProducts from ${LOCALDIR}."
1011  if [ ! -f $TMP/local/setup ]; then
1012  echo "Local test release directory $LOCALDIR does not contain a setup script."
1013  exit 1
1014  fi
1015  sed "s@setenv MRB_INSTALL.*@setenv MRB_INSTALL ${TMP}/local@" $TMP/local/setup | \
1016  sed "s@setenv MRB_TOP.*@setenv MRB_TOP ${TMP}@" > $TMP/local/setup.local
1017 
1018  # Make sure we have the correct version of mrb setup
1019 
1020  if grep -q bin/shell_independence $TMP/local/setup.local; then
1021 
1022  # This is an old style working area.
1023  # Set up old version of mrb.
1024 
1025  echo "Setting up old version of mrb."
1026  unsetup mrb
1027  setup mrb -o
1028  fi
1029 
1030  # Do local setup
1031 
1032  . $TMP/local/setup.local
1033  #echo "MRB_INSTALL=${MRB_INSTALL}."
1034  #echo "MRB_QUALS=${MRB_QUALS}."
1035  #echo "Setting up all localProducts."
1036  #if [ x$IFDHC_DIR != x ]; then
1037  # unsetup ifdhc
1038  #fi
1039  #mrbslp
1040 fi
1041 cd $TMP/work
1042 
1043 # Setup local larsoft test release from tarball.
1044 
1045 if [ x$LOCALTAR != x ]; then
1046  mkdir $TMP/local
1047  cd $TMP/local
1048 
1049  # Fetch the tarball.
1050 
1051  echo "Fetching test release tarball ${LOCALTAR}."
1052 
1053  # Make sure ifdhc is setup.
1054 
1055  if [ x$IFDHC_DIR = x ]; then
1056  echo "Setting up ifdhc before fetching tarball."
1057  setup ifdhc
1058  fi
1059  echo "IFDHC_DIR=$IFDHC_DIR"
1060  ifdh cp $LOCALTAR local.tar
1061  stat=$?
1062  if [ $stat -ne 0 ]; then
1063  echo "ifdh cp failed with status ${stat}."
1064  exit $stat
1065  fi
1066 
1067  # Extract the tarball.
1068 
1069  tar -xf local.tar
1070 
1071  # Setup the environment.
1072 
1073  cd $TMP/work
1074  echo "Initializing localProducts from tarball ${LOCALTAR}."
1075  sed "s@setenv MRB_INSTALL.*@setenv MRB_INSTALL ${TMP}/local@" $TMP/local/setup | \
1076  sed "s@setenv MRB_TOP.*@setenv MRB_TOP ${TMP}@" > $TMP/local/setup.local
1077 
1078  # Make sure we have the correct version of mrb setup
1079 
1080  if grep -q bin/shell_independence $TMP/local/setup.local; then
1081 
1082  # This is an old style working area.
1083  # Set up old version of mrb.
1084 
1085  echo "Setting up old version of mrb."
1086  unsetup mrb
1087  setup mrb -o
1088  fi
1089 
1090  # Do local setup
1091 
1092  . $TMP/local/setup.local
1093  #echo "MRB_INSTALL=${MRB_INSTALL}."
1094  #echo "MRB_QUALS=${MRB_QUALS}."
1095  #echo "Setting up all localProducts."
1096  #if [ x$IFDHC_DIR != x ]; then
1097  # unsetup ifdhc
1098  #fi
1099  #mrbslp
1100 fi
1101 
1102 # Setup specified version of top level run time products
1103 # (if specified, and if local test release did not set them up).
1104 
1105 for prd in `echo $UPS_PRDS | tr , ' '`
1106 do
1107  if ! ups active | grep -q $prd; then
1108  echo "Setting up $prd $REL -q ${QUAL}."
1109  if [ x$IFDHC_DIR != x -a x$IFBEAM_DIR = x ]; then
1110  unsetup ifdhc
1111  fi
1112  setup $prd $REL -q $QUAL
1113  fi
1114 done
1115 
1116 ups active
1117 
1118 cd $TMP/work
1119 
1120 # In case mrb setup didn't setup a version of ifdhc, set up ifdhc again.
1121 
1122 if [ x$IFDHC_DIR = x ]; then
1123  echo "Setting up ifdhc again, because larsoft did not set it up."
1124  setup ifdhc
1125 fi
1126 echo "IFDH_ART_DIR=$IFDH_ART_DIR"
1127 echo "IFDHC_DIR=$IFDHC_DIR"
1128 
1129 # Run/source optional initialization scripts.
1130 
1131 if [ x$INITSCRIPT != x ]; then
1132  echo "Running initialization script ${INITSCRIPT}."
1133  ./${INITSCRIPT}
1134  status=$?
1135  if [ $status -ne 0 ]; then
1136  exit $status
1137  fi
1138 fi
1139 
1140 if [ x$INITSOURCE != x ]; then
1141  echo "Sourcing initialization source script ${INITSOURCE}."
1142  . $INITSOURCE
1143  status=$?
1144  if [ $status -ne 0 ]; then
1145  exit $status
1146  fi
1147 fi
1148 
1149 # Save a copy of the environment, which can be helpful for debugging.
1150 
1151 env > env.txt
1152 
1153 # Get input files to process, either single file, file list, or sam.
1154 #
1155 # For non-sam non-xrootd input, copy all files local using ifdh cp, and make a
1156 # local file list called condor_lar_input.list. Save the remote file names (uri's)
1157 # in another file called transferred_uris.list
1158 #
1159 # For non-sam xrootd input ("--sam_schema root") convert input list to xrootd uri's,
1160 # if possible.
1161 
1162 rm -f condor_lar_input.list
1163 rm -f transferred_uris.list
1164 NFILE_TOTAL=0
1165 parent_files=()
1166 aunt_files=() #for data overaly, the data files being brought in are the output's aunts.
1167 
1168 if [ $USE_SAM -eq 0 -a x$INFILE != x ]; then
1169 
1170  # Single file case.
1171 
1172  # Don't allow any list-related options in single file case:
1173  # -S, --source-list, --nfile, --nfile_skip
1174 
1175  if [ x$INLIST != x -o $NFILE -ne 0 -o $NFILE_SKIP -ne 0 ]; then
1176  echo "File list options specified with single input file."
1177  exit 1
1178  fi
1179 
1180  #set the parent file to be the input file
1181  parent_files=("${parent_files[@]}" $INFILE)
1182 
1183  # Copy input file to scratch directoroy or convert to xrootd url.
1184 
1185  NFILE_TOTAL=1
1186  XROOTD_URI=$INFILE
1187  if [ x$SAM_SCHEMA = xroot ]; then
1188  XROOTD_URI=`file_to_url.sh $INFILE`
1189  fi
1190  if [ $XROOTD_URI != $INFILE ]; then
1191  echo $INFILE > transferred_uris.list
1192  echo $XROOTD_URI > condor_lar_input.list
1193  echo "Input xrootd uri: $XROOTD_URI"
1194  else
1195  LOCAL_INFILE=`basename $INFILE`
1196  echo "Copying $INFILE"
1197  ifdh cp $INFILE $LOCAL_INFILE
1198  stat=$?
1199  if [ $stat -ne 0 ]; then
1200  echo "ifdh cp failed with status ${stat}."
1201  exit $stat
1202  fi
1203  if [ -f $LOCAL_INFILE -a $stat -eq 0 ]; then
1204  echo $INFILE > transferred_uris.list
1205  echo $LOCAL_INFILE > condor_lar_input.list
1206  else
1207  echo "Error fetching input file ${INFILE}."
1208  exit 1
1209  fi
1210  fi
1211 
1212 elif [ $USE_SAM -eq 0 -a x$INLIST != x ]; then
1213 
1214  # Input list case.
1215 
1216  # Make sure input file list exists.
1217 
1218  if [ ! -f $INLIST ]; then
1219  echo "Input file list $INLIST does not exist."
1220  exit 1
1221  fi
1222 
1223  # Remember how many files are in the input file list.
1224 
1225  NFILE_TOTAL=`cat $INLIST | wc -l`
1226  echo "Input file list contains $NFILE_TOTAL total files."
1227 
1228  # Clamp the total number of files to be a maximum of NFILE * NJOBS, where
1229  # NFILE and NJOBS are specified via command line options. In project.py
1230  # terms, NFILE is <maxfilesperjob> and NOJBS is <numjobs>.
1231 
1232  MAX_TOTAL=$(( $NFILE * $NJOBS ))
1233  if [ $MAX_TOTAL -gt 0 -a $NFILE_TOTAL -gt $MAX_TOTAL ]; then
1234  NFILE_TOTAL=$MAX_TOTAL
1235  echo "Number of files to be processed will be limited to ${NFILE_TOTAL}."
1236  fi
1237 
1238  # If --njobs was specified, calculate how many files
1239  # to skip and process in this worker.
1240 
1241  if [ $NJOBS -ne 0 ]; then
1242 
1243  # Don't allow option --nfile_skip in this case.
1244 
1245  if [ $NFILE_SKIP -ne 0 ]; then
1246  echo "Illegal options specified with --njobs."
1247  exit 1
1248  fi
1249 
1250  # Clamp NJOBS to be a maximum of $NFILE_TOTAL.
1251  # This means that workers with $PROCESS >= $NFILE_TOTAL will not have
1252  # any input files to process.
1253 
1254  MYNJOBS=$NJOBS
1255  if [ $MYNJOBS -gt $NFILE_TOTAL ]; then
1256  MYNJOBS=$NFILE_TOTAL
1257  fi
1258 
1259  # Calculate number of files to skip and number of files to process.
1260 
1261  NFILE_SKIP=$(( $PROCESS * $NFILE_TOTAL / $MYNJOBS ))
1262  MYNFILE=$(( ( $PROCESS + 1 ) * $NFILE_TOTAL / $MYNJOBS - $NFILE_SKIP ))
1263  if [ $MYNFILE -eq 0 -o $NFILE_SKIP -ge $NFILE_TOTAL ]; then
1264  echo "This worker did not get any input files."
1265  exit 1
1266  fi
1267  if [ $MYNFILE -lt $NFILE -o $NFILE -eq 0 ]; then
1268  NFILE=$MYNFILE
1269  fi
1270  fi
1271 
1272  # Report number of files to skip and process.
1273 
1274  echo "Skipping $NFILE_SKIP files."
1275  if [ $NFILE -eq 0 ]; then
1276  echo "Processing all remaining files."
1277  else
1278  echo "Processing $NFILE files."
1279  fi
1280 
1281  # Copy input files and construct local input file list.
1282 
1283  nfile=0
1284  nfskip=$NFILE_SKIP
1285  nmax=$NFILE
1286  while read infile; do
1287  if [ $nfskip -gt 0 ]; then
1288  nfskip=$(( $nfskip - 1 ))
1289  else
1290 
1291  # Retain the original file name as the local file name, if possible.
1292  # Otherwise, generate a new (hopefully) unique name.
1293 
1294  if [ ! -f condor_lar_input.list ]; then
1295  touch condor_lar_input.list
1296  fi
1297 
1298  XROOTD_URI=$infile
1299  if [ x$SAM_SCHEMA = xroot ]; then
1300  XROOTD_URI=`file_to_url.sh $infile`
1301  fi
1302  if [ $XROOTD_URI != $infile ]; then
1303  echo $infile >> transferred_uris.list
1304  echo $XROOTD_URI >> condor_lar_input.list
1305  echo "Input xrootd uri: $XROOTD_URI"
1306  else
1307  LOCAL_INFILE=`basename $infile`
1308  if grep -q $LOCAL_INFILE condor_lar_input.list; then
1309  LOCAL_INFILE=input${nfile}.root
1310  if [ "$INMODE" = 'textfile' ]; then
1311  LOCAL_INFILE=input${nfile}.txt
1312  fi
1313  fi
1314  echo "Copying $infile"
1315  ifdh cp $infile $LOCAL_INFILE
1316  stat=$?
1317  if [ $stat -ne 0 ]; then
1318  echo "ifdh cp failed with status ${stat}."
1319  exit $stat
1320  fi
1321  if [ -f $LOCAL_INFILE -a $stat -eq 0 ]; then
1322  echo $infile >> transferred_uris.list
1323  echo $LOCAL_INFILE >> condor_lar_input.list
1324  parent_files=("${parent_files[@]}" $LOCAL_INFILE)
1325  else
1326  echo "Error fetching input file ${infile}."
1327  exit 1
1328  fi
1329  fi
1330  nmax=$(( $nmax - 1 ))
1331  if [ $nmax -eq 0 ]; then
1332  break
1333  fi
1334  fi
1335  nfile=$(( $nfile + 1 ))
1336  done < $INLIST
1337 fi
1338 
1339 NFILE_LOCAL=0
1340 if [ $USE_SAM -eq 0 -a x$SAM_SCHEMA != xroot ]; then
1341  if [ -f condor_lar_input.list ]; then
1342 
1343  # Sort input list by decreasing size so we don't get a file with
1344  # zero events as the first file.
1345 
1346  #ls -S1 `cat condor_lar_input.list` > condor_lar_input.list
1347  xargs ls -s1 < condor_lar_input.list | sort -nr | awk '{print $2}' > newcondor_lar_input.list
1348  mv -f newcondor_lar_input.list condor_lar_input.list
1349  echo "Local input file list:"
1350  cat condor_lar_input.list
1351  NFILE_LOCAL=`cat condor_lar_input.list | wc -l`
1352  else
1353  echo "No local input files."
1354  fi
1355  echo "Local input list has $NFILE_LOCAL files."
1356 fi
1357 
1358 #Break the master wrapper fcl into each stage
1359 nfcls=0
1360 
1361 while read -r line
1362 do
1363 
1364  if [ "$(echo $line | awk '{print $1}')" = "#---STAGE" ]; then
1365  stage="$(echo $line | awk '{print $2}')"
1366  stage_fcl="Stage$stage.fcl"
1367  nfcls=$(( $nfcls + 1 ))
1368  continue
1369  fi
1370 
1371  if [ "$line" = "#---END_STAGE" ]; then
1372  #cat EOF >> $fcl
1373  continue
1374  fi
1375  echo $line >> $stage_fcl
1376 done < $FCL
1377 
1378 #We now have nStage fcl files, each which need to be run serially
1379 stage=0
1380 
1381 echo "Start loop over stages"
1382 while [ $stage -lt $nfcls ]; do
1383  FCL="Stage$stage.fcl"
1384 
1385  # In case no input files were specified, and we are not getting input
1386  # from sam (i.e. mc generation), recalculate the subrun number, and the
1387  # number of events to generate in this worker.
1388  # This also applies to the textfile inputmode.
1389  # Note this only applies to the first stage by definition
1390 
1391  if [ $stage -eq 0 -a $USE_SAM -eq 0 ] && [ $NFILE_TOTAL -eq 0 -o "$INMODE" = 'textfile' ]; then #need to ask what is going on here
1392 
1393 
1394  # Don't allow --nskip.
1395 
1396  if [ $NSKIP -gt 0 ]; then
1397  echo "Illegal option --nskip specified with no input."
1398  exit 1
1399  fi
1400 
1401  # Do calculation.
1402 
1403  NSKIP=$(( $PROCESS * $NEVT / $NJOBS ))
1404  NEV=$(( ( $PROCESS + 1 ) * $NEVT / $NJOBS - $NSKIP ))
1405  NSKIP=0
1406  NEVT=$NEV
1407 
1408  # Set subrun=$PROCESS+1 in a wrapper fcl file.
1409 
1410  SUBRUN=$(( $PROCESS + 1))
1411  cat <<EOF > subrun_wrapper.fcl
1412 #include "$FCL"
1413 
1414 source.firstSubRun: $SUBRUN
1415 
1416 EOF
1417  if [ "$INMODE" = 'textfile' ]; then
1418 
1419  if [ $NFILE_LOCAL -ne 1 ]; then
1420  echo "Text file input mode specified with wrong number of input files."
1421  exit 1
1422  fi
1423  echo "physics.producers.generator.InputFileName: \"`cat condor_lar_input.list`\"" >> subrun_wrapper.fcl
1424  fi
1425 
1426  FCL=subrun_wrapper.fcl
1427 
1428  echo "MC subrun: $SUBRUN"
1429  echo "Number of MC events: $NEVT"
1430 
1431  fi
1432 
1433  # Sam stuff for main input.
1434 
1435  PURL=''
1436  CPID=''
1437  if [ $USE_SAM -ne 0 -a $stage -eq 0 ]; then
1438  echo "In SAM if"
1439 
1440  # Make sure a project name has been specified.
1441 
1442  if [ x$SAM_PROJECT = x ]; then
1443  echo "No sam project was specified."
1444  exit 1
1445  fi
1446  echo "Sam project: $SAM_PROJECT"
1447 
1448  # Start project (if requested).
1449 
1450  if [ $SAM_START -ne 0 ]; then
1451  if [ x$SAM_DEFNAME != x ]; then
1452 
1453  # Do some preliminary tests on the input dataset definition.
1454  # If dataset definition returns zero files at this point, abort the job.
1455  # If dataset definition returns too many files compared to --nfile, create
1456  # a new dataset definition by adding a "with limit" clause.
1457 
1458  nf=`ifdh translateConstraints "defname: $SAM_DEFNAME" | wc -l`
1459  if [ $nf -eq 0 ]; then
1460  echo "Input dataset $SAM_DEFNAME is empty."
1461  exit 1
1462  fi
1463  if [ $NFILE -ne 0 -a $nf -gt $NFILE ]; then
1464  limitdef=${SAM_PROJECT}_limit_$NFILE
1465 
1466  # Check whether limit def already exists.
1467  # Have to parse commd output because ifdh returns wrong status.
1468 
1469  existdef=`ifdh describeDefinition $limitdef 2>/dev/null | grep 'Definition Name:' | wc -l`
1470  if [ $existdef -gt 0 ]; then
1471  echo "Using already created limited dataset definition ${limitdef}."
1472  else
1473  ifdh createDefinition $limitdef "defname: $SAM_DEFNAME with limit $NFILE" $SAM_USER $SAM_GROUP
1474 
1475  # Assume command worked, because it returns the wrong status.
1476 
1477  echo "Created limited dataset definition ${limitdef}."
1478  fi
1479 
1480  # If we get to here, we know that we want to user $limitdef instead of $SAM_DEFNAME
1481  # as the input sam dataset definition.
1482 
1483  SAM_DEFNAME=$limitdef
1484  fi
1485 
1486  # If recursive flag, take snapshot of input dataset.
1487 
1488  if [ $RECUR -ne 0 ]; then
1489  echo "Forcing snapshot"
1490  SAM_DEFNAME=${SAM_DEFNAME}:force
1491  fi
1492 
1493  # Start the project.
1494 
1495  echo "Starting project $SAM_PROJECT using sam dataset definition $SAM_DEFNAME"
1496  ifdh startProject $SAM_PROJECT $SAM_STATION $SAM_DEFNAME $SAM_USER $SAM_GROUP
1497  if [ $? -eq 0 ]; then
1498  echo "Start project succeeded."
1499  else
1500  echo "Start projet failed."
1501  exit 1
1502  fi
1503  fi
1504 
1505  if [ x$SAM_DEFNAME = x ]; then
1506 
1507  echo "Start project requested, but no definition was specified."
1508  exit 1
1509  fi
1510 
1511  fi
1512 
1513 
1514  # Get the project url of a running project (maybe the one we just started,
1515  # or maybe started externally). This command has to succeed, or we can't
1516  # continue.
1517 
1518  PURL=`ifdh findProject $SAM_PROJECT $SAM_STATION`
1519  if [ x$PURL = x ]; then
1520  echo "Unable to find url for project ${SAM_PROJECT}."
1521  exit 1
1522  else
1523  echo "Project url: $PURL"
1524  fi
1525 
1526  # Start the consumer process. This command also has to succeed.
1527 
1528  NODE=`hostname`
1529  APPFAMILY=art
1530 
1531  # Parse fcl file to extract process_name, and use that
1532  # as the application name for starting the consumer process.
1533 
1534  APPNAME=`fhicl-dump $FCL | grep process_name: | head -1 | tr -d '"' | awk '{print $2}'`
1535  if [ $? -ne 0 ]; then
1536  echo "fhicl-dump $FCL failed to run. May be missing a ups product, library, or fcl file."
1537  exit 1
1538  fi
1539  if [ x$APPNAME = x ]; then
1540  echo "Trouble determining application name."
1541  echo "cat $FCL"
1542  cat $FCL
1543  exit 1
1544  fi
1545 
1546  # Make sure release version is not empty, or ifdh command line will be messed up.
1547 
1548  if [ x$REL = x ]; then
1549  REL=1
1550  fi
1551 
1552  # Make description, which is conventionally the jobsub job id.
1553  # This can not be empty.
1554 
1555  DESC=$JOBSUBJOBID
1556  if [ x$DESC = x ]; then
1557  DESC=$FCL
1558  fi
1559 
1560  echo "Starting consumer process."
1561  echo "ifdh establishProcess $PURL $APPNAME $REL $NODE $SAM_USER $APPFAMILY $DESC $NFILE $SAM_SCHEMA"
1562  CPID=`ifdh establishProcess $PURL $APPNAME $REL $NODE $SAM_USER $APPFAMILY $DESC $NFILE $SAM_SCHEMA`
1563  if [ x$CPID = x ]; then
1564  echo "Unable to start consumer process for project url ${PURL}."
1565  exit 1
1566  else
1567  echo "Consumer process id $CPID"
1568  fi
1569 
1570  # Stash away the project name and consumer process id in case we need them
1571  # later for bookkeeping.
1572 
1573  echo $SAM_PROJECT > sam_project.txt
1574  echo $CPID > cpid.txt
1575 
1576  fi
1577 
1578  # Sam stuff for secondary input.
1579 
1580  if [ $MIX_SAM -ne 0 ]; then
1581  echo "In Mix SAM if"
1582 
1583  # Make sure a project name has been specified.
1584 
1585  if [ x$MIX_PROJECT = x ]; then
1586  echo "No mix sam project was specified."
1587  exit 1
1588  fi
1589  echo "Mix project: $MIX_PROJECT"
1590 
1591  # Start mix project (if requested).
1592 
1593  if [ $SAM_START -ne 0 ]; then
1594  if [ x$MIX_DEFNAME != x ]; then
1595 
1596  echo "Starting project $MIX_PROJECT using sam dataset definition $MIX_DEFNAME"
1597  ifdh startProject $MIX_PROJECT $SAM_STATION $MIX_DEFNAME $SAM_USER $SAM_GROUP
1598  if [ $? -eq 0 ]; then
1599  echo "Start project succeeded."
1600  else
1601  echo "Start projet failed."
1602  exit 1
1603  fi
1604  fi
1605 
1606  if [ x$MIX_DEFNAME = x ]; then
1607 
1608  echo "Start project requested, but no mix definition was specified."
1609  exit 1
1610  fi
1611  fi
1612  fi
1613 
1614  #Figure out output file names.
1615  #If outfile is not defined and we are inputing a single file or file list, follow our
1616  #convention that the output file should be %inputfilename_%systemtime_stage.root
1617 
1618  # Construct options for lar command line.
1619 
1620  LAROPT="-c $FCL --rethrow-default"
1621  echo "Laropt: $LAROPT"
1622  if [ -f condor_lar_input.list -a $stage -eq 0 ]; then
1623  if [ "$INMODE" != 'textfile' ]; then
1624  LAROPT="$LAROPT -S condor_lar_input.list" #artroot files to read in
1625  #AOUTFILE=`cat condor_lar_input.list`
1626  fi
1627  fi
1628 
1629  # Extract output file name for this stage.
1630 
1631  if echo $OUTFILE | grep -q :; then
1632  outfile=''
1633  else
1634  outfile=$OUTFILE
1635  fi
1636  field=$(( $stage + 1 ))
1637  outfile_stage=`echo $OUTFILE | cut -d: -f$field`
1638  if [ x$outfile_stage != x ]; then
1639  outfile=$outfile_stage
1640  fi
1641  if [ x$outfile != x ]; then
1642  LAROPT="$LAROPT -o `basename $outfile .root`$stage.root"
1643  outstem=`basename $OUTFILE .root`
1644  fi
1645 
1646  if [ x$TFILE != x ]; then
1647  LAROPT="$LAROPT -T $TFILE"
1648  fi
1649 
1650  if [ $NEVT -ne 0 ]; then
1651  LAROPT="$LAROPT -n $NEVT"
1652  fi
1653 
1654  if [ $NSKIP -ne 0 ]; then
1655  LAROPT="$LAROPT --nskip $NSKIP"
1656  fi
1657 
1658  if [ x$PURL != x -a $stage -eq 0 ]; then
1659  LAROPT="$LAROPT --sam-web-uri $PURL"
1660  fi
1661 
1662  if [ x$CPID != x -a $stage -eq 0 ]; then
1663  LAROPT="$LAROPT --sam-process-id $CPID"
1664  fi
1665 
1666  if [ -n "$ARGS" ]; then
1667  LAROPT="$LAROPT $ARGS"
1668  fi
1669 
1670  # Source optional midstage initialization scripts.
1671 
1672  if [ x$MIDSOURCE != x ]; then
1673  echo "Sourcing midstage initialization source script ${MIDSOURCE}."
1674  . $MIDSOURCE
1675  status=$?
1676  if [ $status -ne 0 ]; then
1677  exit $status
1678  fi
1679  fi
1680 
1681  if [ $stage -ne 0 ]; then
1682  LAROPT="$LAROPT -s $next_stage_input"
1683  fi
1684 
1685  # Save a copy of the environment, which can be helpful for debugging.
1686 
1687  env > env${stage}.txt
1688 
1689  # Save a canonicalized version of the fcl configuration.
1690 
1691  fhicl-dump $FCL > cfgStage$stage.fcl
1692 
1693  # Dump proxy information.
1694 
1695  echo
1696  echo "Proxy:"
1697  echo
1698  voms-proxy-info -all
1699 
1700  # Run lar.
1701  pwd
1702 
1703  # Extract this stage exe.
1704 
1705  if echo $EXE | grep -q :; then
1706  exe='lar'
1707  else
1708  exe=$EXE
1709  fi
1710  field=$(( $stage + 1 ))
1711  exe_stage=`echo $EXE | cut -d: -f$field`
1712  if [ x$exe_stage != x ]; then
1713  exe=$exe_stage
1714  fi
1715  echo "$exe $LAROPT"
1716  echo "$exe $LAROPT" > commandStage$stage.txt
1717  $exe $LAROPT > larStage$stage.out 2> larStage$stage.err
1718  stat=$?
1719  echo $stat > larStage$stage.stat
1720  echo "$exe completed with exit status ${stat}."
1721  if [ $stat -ne 0 ]; then
1722  echo
1723  echo "Proxy:"
1724  echo
1725  voms-proxy-info -all
1726  echo
1727  echo "tail -1000 larStage$stage.out"
1728  echo
1729  tail -1000 larStage$stage.out
1730  echo
1731  echo "tail -1000 larStage$stage.err"
1732  echo
1733  tail -1000 larStage$stage.err
1734  echo
1735  fi
1736 
1737  # Sam cleanups.
1738 
1739  if [ $USE_SAM -ne 0 -a $stage -eq 0 ]; then
1740 
1741  # Get list of consumed files.
1742 
1743  if [ x$CPID = x -a -f cpid.txt ]; then
1744  CPID=`cat cpid.txt`
1745  fi
1746  ifdh translateConstraints "consumer_process_id $CPID and consumed_status consumed" > consumed_files.list
1747 
1748  # End consumer process.
1749 
1750  ifdh endProcess $PURL $CPID
1751 
1752  # Stop project (if appropriate).
1753 
1754  nprj=`ifdh translateConstraints "snapshot_for_project_name $SAM_PROJECT" | wc -l`
1755  nconsumed=`ifdh translateConstraints "project_name $SAM_PROJECT and consumed_status consumed" | wc -l`
1756  echo "$nprj files in project, $nconsumed files consumed so far."
1757 
1758  if [ $SAM_START -ne 0 -o \( $nprj -gt 0 -a $nconsumed -eq $nprj \) ]; then
1759  echo "Stopping project."
1760  ifdh endProject $PURL
1761  fi
1762  fi
1763 
1764  #If lar returns a status other than 0, do not move on to other stages
1765  if [ $stat -ne 0 ]; then
1766  break
1767  fi
1768 
1769  # Run optional midstage script.
1770 
1771  if [ x$MIDSCRIPT != x ]; then
1772  echo "Running midstage finalization script ${MIDSCRIPT}."
1773  ./${MIDSCRIPT} $stage
1774  status=$?
1775  if [ $status -ne 0 ]; then
1776  exit $status
1777  fi
1778  fi
1779 
1780  # Delete temporary input file.
1781 
1782  if [ $stage -ne 0 ]; then
1783  rm -rf $next_stage_input
1784  fi
1785 
1786  #echo `ls -t1 *.root | egrep -v 'hist|larlite|larcv' | head -n1`
1787 
1788  #echo "Outfile is $OUTFILE"
1789 
1790 
1791  next_stage_input=`ls -t1 *.root | egrep -v 'celltree|hist|larlite|larcv|Supplemental|TGraphs' | head -n1`
1792 
1793  # Don't let file name get too long.
1794 
1795  nc=`echo $next_stage_input | wc -c`
1796  if [ $nc -ge 200 ]; then
1797  base=`basename $next_stage_input`
1798  ext=${base##*.}
1799  stem=${base%.*}
1800  newstem=`echo $stem | cut -c1-150`_`uuidgen`
1801  echo "mv $next_stage_input ${newstem}.${ext}"
1802  mv $next_stage_input ${newstem}.${ext}
1803  next_stage_input=${newstem}.${ext}
1804  fi
1805 
1806  mixed_files=`sam_metadata_dumper $next_stage_input | grep mixparent | awk -F ":" '{gsub("\"" ,""); gsub(",",""); gsub(" ",""); print $2}' | sort -u`
1807 
1808  if [ x"$mixed_files" != x ]; then
1809  aunt_files=("${aunt_files[@]}" $mixed_files)
1810  fi
1811 
1812  stage=$[$stage +1]
1813 
1814  #rename the mem and time profile DBs by stage
1815 
1816  if [ -f time.db ]; then
1817  mv time.db time$stage.db
1818  fi
1819  if [ -f mem.db ]; then
1820  mv mem.db mem$stage.db
1821  fi
1822 
1823 done
1824 
1825 # Done looping over stages.
1826 
1827 # Secondary sam cleanups.
1828 
1829 if [ $MIX_SAM -ne 0 ]; then
1830 
1831  # Stop project (if appropriate).
1832 
1833  if [ $SAM_START -ne 0 ]; then
1834  echo "Stopping project."
1835  MURL=`ifdh findProject $MIX_PROJECT $SAM_STATION`
1836  ifdh endProject $MURL
1837  fi
1838 fi
1839 
1840 # Delete input files.
1841 
1842 if [ $USE_SAM -eq 0 -a x$SAM_SCHEMA != xroot -a -f condor_lar_input.list ]; then
1843  while read file; do
1844  rm -f $file
1845  done < condor_lar_input.list
1846 fi
1847 
1848 # Run optional end-of-job script.
1849 
1850 if [ x$ENDSCRIPT != x ]; then
1851  echo "Running end-of-job script ${ENDSCRIPT}."
1852  ./${ENDSCRIPT}
1853  status=$?
1854  if [ $status -ne 0 ]; then
1855  exit $status
1856  fi
1857 fi
1858 
1859 # Do root file checks.
1860 
1861 # Randomize names of data files that have a corresponding json file.
1862 # These are normally histogram files. Art files do not have external
1863 # json metadata at this point.
1864 
1865 # Also randomize the names of data files if there is no input specified
1866 # for this job (i.e. generator jobs).
1867 
1868 # Also randomize and shorten names of data files that are longer than
1869 # 200 characters.
1870 
1871 ran=0
1872 if [ $USE_SAM -eq 0 -a x$INFILE = x -a x$INLIST = x ]; then
1873  ran=1
1874 fi
1875 
1876 for ftype in ${DATAFILETYPES[*]}; do
1877  for datafile in *.${ftype}; do
1878  if [ -f $datafile ]; then
1879  nc=`echo $datafile | wc -c`
1880  if [ -f ${datafile}.json -o $ran != 0 -o $nc -ge 200 ]; then
1881  base=`basename $datafile`
1882  ext=${base##*.}
1883  stem=${base%.*}
1884  newstem=`echo $stem | cut -c1-150`_`uuidgen`
1885  echo "mv $datafile ${newstem}.${ext}"
1886  mv $datafile ${newstem}.${ext}
1887  if [ -f ${datafile}.json ]; then
1888  mv ${datafile}.json ${newstem}.${ext}.json
1889  fi
1890  fi
1891  fi
1892  done
1893 done
1894 
1895 # Calculate root metadata for all data files and save as json file.
1896 # If json metadata already exists, merge with newly geneated root metadata.
1897 
1898 for ftype in ${DATAFILETYPES[*]}; do
1899  for datafile in *.${ftype}; do
1900  if [ -f $datafile ]; then
1901  json=${datafile}.json
1902  if [ -f $json ]; then
1903  ./root_metadata.py --output="${json}2" "$datafile" >& /dev/null
1904  ./merge_json.py $json ${json}2 > ${json}3
1905  mv -f ${json}3 $json
1906  rm ${json}2
1907  else
1908  ./root_metadata.py --output="$json" "$datafile" >& /dev/null
1909  fi
1910  fi
1911  done
1912 done
1913 
1914 #create a master lar.stat file which contains the overall exit code of all stages
1915 stageStat=0
1916 overallStat=0
1917 while [ $stageStat -lt $nfcls ]; do
1918  stat=`cat larStage$stageStat.stat`
1919  if [[ "$stat" = 65 && $ART_VERSION < v2_01 ]]; then
1920  # Workaround TimeTracker crash bug for input files with zero events.
1921  for json in *.json; do
1922  if grep -q '"events": *"0"' $json; then
1923  stat=0
1924  fi
1925  done
1926  fi
1927  overallStat=$[$stat+$overallStat]
1928 
1929  #do some cleanup of intermediate files
1930  #rm Stage$stageStat.fcl
1931  stageStat=$[$stageStat +1]
1932 done
1933 echo $overallStat > lar.stat
1934 valstat=$overallStat
1935 
1936 # Make local output directories for files that we have to save.
1937 
1938 mkdir out
1939 mkdir log
1940 
1941 # Stash all of the files we want to save in the local directories that we just created.
1942 
1943 # First move data files and corresponding .json files into the out and log subdirectories.
1944 
1945 for ftype in ${DATAFILETYPES[*]}; do
1946  for datafile in *.${ftype}; do
1947  if [ -f $datafile ]; then
1948  mv $datafile out
1949  if [ -f ${datafile}.json ]; then
1950  mv ${datafile}.json log
1951  fi
1952  fi
1953  done
1954 done
1955 
1956 # Move any remaining files into the log subdirectory.
1957 
1958 for outfile in *; do
1959  if [ -f $outfile ]; then
1960  mv $outfile log
1961  fi
1962 done
1963 
1964 # Do validation (if requested).
1965 
1966 if [ $VALIDATE_IN_JOB -eq 1 ]; then
1967  #If SAM was used, get the parent files based on the cpid
1968  if [ $USE_SAM -ne 0 ]; then
1969  id=`cat log/cpid.txt`
1970  parent_files=($(ifdh translateConstraints "consumer_process_id=$id and consumed_status consumed"))
1971  stat=$?
1972  if [ $stat -ne 0 ]; then
1973  echo "Failed to determine parentage."
1974  #exit 1
1975  fi
1976  fi
1977 
1978  echo "The file's parents are: "
1979 
1980  for elt in ${parent_files[*]};
1981  do
1982  echo $elt
1983  done
1984 
1985  echo "The file's aunts are: "
1986  for elt in ${aunt_files[*]};
1987  do
1988  echo $elt
1989  done
1990 
1991  #if we are maintain the output's parentage, combine the file's parents and aunts into a flat string
1992  #this string will be interpretted by validate_in_job.py. If these are left empty, then validate_in_job will not change the file's parentage
1993  if [ $MAINTAIN_PARENTAGE -eq 1 ]; then
1994  export JOBS_PARENTS=`echo ${parent_files[*]}`
1995  export JOBS_AUNTS=`echo ${aunt_files[*]}`
1996  fi
1997 
1998  # Do validation function for the whole job.
1999 
2000  valstat=$overallStat
2001  if [ $valstat -eq 0 ]; then
2002  curdir=`pwd`
2003  cd $curdir/log
2004  dataopt=''
2005  for ftype in ${DATAFILETYPES[*]}; do
2006  dataopt="$dataopt --data_file_type $ftype"
2007  done
2008  echo "./validate_in_job.py --dir $curdir/out --logfiledir $curdir/log --outdir $OUTDIR/$OUTPUT_SUBDIR --declare $DECLARE_IN_JOB --copy $COPY_TO_FTS --maintain_parentage $MAINTAIN_PARENTAGE $dataopt"
2009  ./validate_in_job.py --dir $curdir/out --logfiledir $curdir/log --outdir $OUTDIR/$OUTPUT_SUBDIR --declare $DECLARE_IN_JOB --copy $COPY_TO_FTS --maintain_parentage $MAINTAIN_PARENTAGE $dataopt
2010  valstat=$?
2011  cd $curdir
2012  fi
2013 
2014 fi
2015 
2016 # Make a tarball of the log directory contents, and save the tarball in the log directory.
2017 
2018 rm -f log.tar
2019 tar -cjf log.tar -C log .
2020 mv log.tar log
2021 
2022 # For copy back, setup up current version of ifdhc.
2023 # May be different than version setup by larsoft.
2024 
2025 echo "Setting up current version of ifdhc."
2026 if [ x$IFDHC_DIR != x ]; then
2027  unsetup ifdhc
2028 fi
2029 setup ifdhc
2030 echo "IFDHC_DIR=$IFDHC_DIR"
2031 
2032 # Create remote output and log directories.
2033 
2034 export IFDH_CP_MAXRETRIES=5
2035 
2036 echo "Make directory ${LOGDIR}/${OUTPUT_SUBDIR}."
2037 date
2038 subdir=$OUTPUT_SUBDIR
2039 dir=$LOGDIR
2040 while echo $subdir | grep -q /; do
2041  dir=${dir}/${subdir%%/*}
2042  subdir=${subdir#*/}
2043  echo "ifdh mkdir $IFDH_OPT $dir"
2044  ifdh mkdir $IFDH_OPT $dir
2045 done
2046 echo "ifdh mkdir $IFDH_OPT ${LOGDIR}/$OUTPUT_SUBDIR"
2047 ifdh mkdir $IFDH_OPT ${LOGDIR}/$OUTPUT_SUBDIR
2048 echo "Done making directory ${LOGDIR}/${OUTPUT_SUBDIR}."
2049 date
2050 
2051 if [ ${OUTDIR} != ${LOGDIR} ]; then
2052  echo "Make directory ${OUTDIR}/${OUTPUT_SUBDIR}."
2053  date
2054  subdir=$OUTPUT_SUBDIR
2055  dir=$OUTDIR
2056  while echo $subdir | grep -q /; do
2057  dir=${dir}/${subdir%%/*}
2058  subdir=${subdir#*/}
2059  echo "ifdh mkdir $IFDH_OPT $dir"
2060  ifdh mkdir $IFDH_OPT $dir
2061  done
2062  echo "ifdh mkdir $IFDH_OPT ${OUTDIR}/$OUTPUT_SUBDIR"
2063  ifdh mkdir $IFDH_OPT ${OUTDIR}/$OUTPUT_SUBDIR
2064  echo "Done making directory ${OUTDIR}/${OUTPUT_SUBDIR}."
2065  date
2066 fi
2067 
2068 # Transfer tarball in log subdirectory.
2069 
2070 statout=0
2071 echo "ls log"
2072 ls log
2073 echo "ifdh cp -D $IFDH_OPT log/log.tar ${LOGDIR}/$OUTPUT_SUBDIR"
2074 ifdh cp -D $IFDH_OPT log/log.tar ${LOGDIR}/$OUTPUT_SUBDIR
2075 date
2076 stat=$?
2077 if [ $stat -ne 0 ]; then
2078  statout=1
2079  echo "ifdh cp failed with status ${stat}."
2080 fi
2081 
2082 # Transfer data files in out subdirectory.
2083 
2084 if [ $COPY_TO_FTS -eq 0 ]; then
2085 
2086  if [ "$( ls -A out )" ]; then
2087  echo "ifdh cp -D $IFDH_OPT out/* ${OUTDIR}/$OUTPUT_SUBDIR"
2088  ifdh cp -D $IFDH_OPT out/* ${OUTDIR}/$OUTPUT_SUBDIR
2089  stat=$?
2090  if [ $stat -ne 0 ]; then
2091  statout=1
2092  echo "ifdh cp failed with status ${stat}."
2093  fi
2094  fi
2095 
2096 fi
2097 
2098 if [ $statout -eq 0 -a -f log/lar.stat ]; then
2099  statout=`cat log/lar.stat`
2100 fi
2101 
2102 if [ $statout -eq 0 ]; then
2103  statout=$valstat
2104 fi
2105 
2106 exit $statout