condor_lar_pubs.sh
Go to the documentation of this file.
1 #! /bin/bash
2 #------------------------------------------------------------------
3 #
4 # Purpose: A general purpose larsoft batch worker script.
5 #
6 # Adapted from condor_lBdetMC.sh by E. Church.
7 #
8 # Usage:
9 #
10 # condor_lar.sh [options]
11 #
12 # Lar options:
13 #
14 # -c, --config <arg> - Configuration (fcl) file (required).
15 # -s, --source <arg> - Input file (full path).
16 # -S, --source-list <arg> - Input file list (full path, one per line).
17 # -o, --output <arg> - Output file name.
18 # -T, --TFileName <arg> - TFile output file name
19 # -n, --nevts <arg> - Number of events to process.
20 # --nskip <arg> - Number of events to skip.
21 # --nfile <arg> - Number of files to process per worker.
22 # --nfile_skip <arg> - Number of files to skip (use with option -S).
23 # --inputmode <arg> - Input mode ('textfile' or '', default '')
24 # --args <args...> - Arguments for lar command line (place at end).
25 #
26 # Sam and parallel project options.
27 #
28 # --sam_user <arg> - Specify sam user (default $GRID_USER).
29 # --sam_group <arg> - Specify sam group (default --group option).
30 # --sam_station <arg> - Specify sam station (default --group option).
31 # --sam_defname <arg> - Sam dataset definition name.
32 # --sam_project <arg> - Sam project name.
33 # --sam_start - Specify that this worker should be responsible for
34 # starting and stopping the sam project.
35 # --recur - Recursive input dataset (force snapshot).
36 # --sam_schema <arg> - Use this option with argument "root" to stream files using
37 # xrootd. Leave this option out for standard file copy.
38 # --njobs <arg> - Parallel project with specified number of jobs (default one).
39 # --single - Specify that the output and log directories will be emptied
40 # by the batch worker, and therefore the output and log
41 # directories will only ever contain output from a single
42 # worker.
43 #
44 # Mix input options (second input stream).
45 #
46 # --mix_defname <arg> - Specify mix input sam dataset definition.
47 # --mix_project <arg> - Specify mix input sam project.
48 #
49 # Larsoft options.
50 #
51 # --ups <arg> - Comma-separated list of top level run-time ups products.
52 # -r, --release <arg> - Release tag.
53 # -q, -b, --build <arg> - Release build qualifier (default "debug", or "prof").
54 # --localdir <arg> - Larsoft local test release directory (default none).
55 # --localtar <arg> - Tarball of local test release.
56 # --mrb - Ignored (for compatibility).
57 # --srt - Exit with error status (SRT run time no longer supported).
58 #
59 # Other options.
60 #
61 # -h, --help - Print help.
62 # -i, --interactive - For interactive use.
63 # -g, --grid - Be grid-friendly.
64 # --group <arg> - Group or experiment (required).
65 # --workdir <arg> - Work directory (required).
66 # --outdir <arg> - Output directory (required).
67 # --logdir <arg> - Log directory (required).
68 # --scratch <arg> - Scratch directory (only for interactive).
69 # --cluster <arg> - Job cluster (override $CLUSTER)
70 # --process <arg> - Process within cluster (override $PROCESS).
71 # --procmap <arg> - Name of process map file (override $PROCESS).
72 # --init-script <arg> - User initialization script execute.
73 # --init-source <arg> - User initialization script to source (bash).
74 # --end-script <arg> - User end-of-job script to execute.
75 # --exe <arg> - Specify art-like executable (default "lar").
76 # --init <path> - Absolute path of environment initialization script.
77 #
78 # End options.
79 #
80 # Run time environment setup.
81 #
82 # MRB run-time environmental setup is controlled by four options:
83 # --release (-r), --build (-b, -q), --localdir, and --localtar.
84 #
85 # a) Use option --release or -r to specify version of top-level product(s).
86 # b) Use option --build or -b to specify build full qualifiers (e.g.
87 # "debug:e5" or "e5:prof").
88 # c) Options --localdir or --localtar are used to specify your local
89 # test release. Use one or the other (not both).
90 #
91 # Use --localdir to specify the location of your local install
92 # directory ($MRB_INSTALL).
93 #
94 # Use --localtar to specify thye location of a tarball of your
95 # install directory (made relative to $MRB_INSTALL).
96 #
97 # Note that --localdir is not grid-friendly.
98 #
99 # Notes.
100 #
101 # 1. Each batch worker is uniquely identified by two numbers stored
102 # in environment variables $CLUSTER and $PROCESS (the latter is
103 # a small integer that starts from zero and varies for different
104 # jobs in a parallel job group). These environment variables are
105 # normally set by the batch system, but can be overridden by options
106 # --cluster, --process, and --procmap (e.g. to rerun failed jobs).
107 #
108 # 2. The work directory must be set to an existing directory owned
109 # by the submitter and readable by the batch worker. Files from the
110 # work directory are copied to the batch worker scratch directory at
111 # the start of the job.
112 #
113 # 3. The job configuration file (-c option), initialization and end-of-job
114 # scripts (optins --init-script, --init-source, --end-script) may
115 # be stored in the work directory specified by option --workdir, or they
116 # may be specified as absolute paths visible on the worker node.
117 #
118 # 4. A local test release may be specified as an absolute path using
119 # --localdir, or a tarball using --localtar. The location of the tarball
120 # may be specified as an absolute path visible on the worker, or a
121 # relative path relative to the work directory.
122 #
123 # 5. The output directory must exist and be writable by the batch
124 # worker (i.e. be group-writable for grid jobs). The worker
125 # makes a new subdirectory called ${CLUSTER}_${PROCESS} in the output
126 # directory and copies all files in the batch scratch directory there
127 # at the end of the job. If the output directory is not specified, the
128 # default is /grid/data/<group>/outstage/<user> (user is defined as
129 # owner of work directory).
130 #
131 # 6. Parallel projects are specified whenever --njobs is specified to
132 # be greater than one. Parallel projects are supported for single file,
133 # file list, and sam project input.
134 #
135 # In all cases, each worker processes some number of complete files.
136 # If the number of jobs is greater than the number of input files, some
137 # workers will not have any input files to process.
138 #
139 # In any case, options --nfile and --nevts can be used to limit the
140 # number of files or events that are processed by a single worker,
141 # regardless of the way files are divided among the workers.
142 #
143 # Option --njobs is incompatible with options --nskip, and --nfile_skip.
144 #
145 # a) Non-sam (single file or file list) input.
146 #
147 # In this case, input files are preassigned to workers such that all input
148 # files are approximately evenly divided among the workers. All files
149 # preassigned to this worker are copied to the scratch directory at the
150 # start of the job.
151 #
152 # b) Sam project input.
153 #
154 # In this case, files are assigned to workers in a non-deterministic
155 # manner by the sam system. The sam system fetches input files to the
156 # scratch directory and deletes processed input files during job execution.
157 #
158 #
159 # 7. Using option -n or --nevts to limit number of events processed:
160 #
161 # a) If no input files are specified (e.g. mc generation), --nevts
162 # specifies total number of events among all workers.
163 #
164 # b) If input files are specified, --nevts specifies total number of
165 # events processed by each worker or from each input file, whichever
166 # is less.
167 #
168 # 8. The interactive option (-i or --interactive) allows this script
169 # to be run interactively by overriding some settings that are normally
170 # obtained from the batch system, including $CLUSTER, $PROCESS, and
171 # the scratch directory. Interactive jobs always set PROCESS=0 (unless
172 # overridden by --process).
173 #
174 # 9. The grid option (-g or --grid) instructs this script to use grid-
175 # friendly tools. This means that there must be no direct access to
176 # bluearc disks. File transfers are done using gridftp or other
177 # grid-friendly protocol. Local test releases are not allowed to
178 # be specified as directories (--localdir), but may be specified as
179 # tarballs (--localtar).
180 #
181 # 10. Mix options (--mix_defname, --mix_project) are only partially handled
182 # in this script. These options are parsed and their values are stored
183 # in shell variables. It is assumed that the sam project specified
184 # by --mix_project has been started externally, unless --sam_start is
185 # also specified, in which case this script will start the project.
186 # This script does not include any provision for joining the project.
187 # Further processing of these options (joining sam project, generating
188 # command line options or fcl wrappers) should be handled by user
189 # provided initialization scripts (--init-script, --init-source).
190 #
191 # 11. Option --init <path> is optional. If specified, it should point to
192 # the absolute path of the experiment environment initialization script,
193 # which path must be visible from the batch worker (e.g. /cvmfs/...).
194 # If this option is not specified, this script will look for and source
195 # a script with hardwired name "setup_experiment.sh" in directory
196 # ${CONDIR_DIR_INPUT}.
197 #
198 #
199 # Created: H. Greenlee, 29-Aug-2012
200 #
201 #------------------------------------------------------------------
202 
203 # Parse arguments.
204 
205 FCL=""
206 INFILE=""
207 INLIST=""
208 INMODE=""
209 OUTFILE=""
210 TFILE=""
211 NEVT=0
212 NSKIP=0
213 FIRST_EVENT=0
214 SUBRUN=1
215 NFILE=0
216 NFILE_SKIP=0
217 NJOBS=1
218 SINGLE=0
219 ARGS=""
220 UPS_PRDS=""
221 REL=""
222 QUAL=""
223 LOCALDIR=""
224 LOCALTAR=""
225 INTERACTIVE=0
226 GRP=""
227 WORKDIR=""
228 OUTDIR=""
229 LOGDIR=""
230 SCRATCH=""
231 CLUS=""
232 PROC=""
233 PROCMAP=""
234 INITSCRIPT=""
235 INITSOURCE=""
236 ENDSCRIPT=""
237 SAM_USER=$GRID_USER
238 SAM_GROUP=""
239 SAM_STATION=""
240 SAM_DEFNAME=""
241 SAM_PROJECT=""
242 SAM_START=0
243 RECUR=0
244 SAM_SCHEMA=""
245 USE_SAM=0
246 MIX_DEFNAME=""
247 MIX_PROJECT=""
248 MIX_SAM=0
249 GRID=0
250 IFDH_OPT=""
251 DECLARE_IN_JOB=0
252 VALIDATE_IN_JOB=0
253 COPY_TO_FTS=0
254 MAINTAIN_PARENTAGE=0
255 EXE="lar"
256 INIT=""
257 
258 while [ $# -gt 0 ]; do
259  case "$1" in
260 
261  # Help.
262  -h|--help )
263  awk '/^# Usage:/,/^# End options/{print $0}' $0 | cut -c3- | head -n -2
264  exit
265  ;;
266 
267  # Config file.
268  -c|--config )
269  if [ $# -gt 1 ]; then
270  FCL=$2
271  shift
272  fi
273  ;;
274 
275  # Input file.
276  -s|--source )
277  if [ $# -gt 1 ]; then
278  INFILE=$2
279  shift
280  fi
281  ;;
282 
283  # Input file list.
284  -S|--source-list )
285  if [ $# -gt 1 ]; then
286  INLIST=$2
287  shift
288  fi
289  ;;
290 
291  # Input file mode.
292  --inputmode )
293  if [ $# -gt 1 ]; then
294  INMODE=$2
295  shift
296  fi
297  ;;
298 
299  # Output file.
300  -o|--output )
301  if [ $# -gt 1 ]; then
302  OUTFILE=$2
303  shift
304  fi
305  ;;
306 
307  # Output TFile.
308  -T|--TFileName )
309  if [ $# -gt 1 ]; then
310  TFILE=$2
311  shift
312  fi
313  ;;
314 
315  # Number of events.
316  -n|--nevts )
317  if [ $# -gt 1 ]; then
318  NEVT=$2
319  shift
320  fi
321  ;;
322 
323  # Number of events to skip.
324  --nskip )
325  if [ $# -gt 1 ]; then
326  NSKIP=$2
327  shift
328  fi
329  ;;
330 
331  # Number of files to process.
332  --nfile )
333  if [ $# -gt 1 ]; then
334  NFILE=$2
335  shift
336  fi
337  ;;
338 
339  # Number of files to skip.
340  --nfile_skip )
341  if [ $# -gt 1 ]; then
342  NFILE_SKIP=$2
343  shift
344  fi
345  ;;
346 
347  # Number of parallel jobs.
348  --njobs )
349  if [ $# -gt 1 ]; then
350  NJOBS=$2
351  shift
352  fi
353  ;;
354 
355  # Single worker mode.
356  --single )
357  SINGLE=1
358  ;;
359 
360  # Sam user.
361  --sam_user )
362  if [ $# -gt 1 ]; then
363  SAM_USER=$2
364  shift
365  fi
366  ;;
367 
368  # Sam group.
369  --sam_group )
370  if [ $# -gt 1 ]; then
371  SAM_GROUP=$2
372  shift
373  fi
374  ;;
375 
376  # Sam station.
377  --sam_station )
378  if [ $# -gt 1 ]; then
379  SAM_STATION=$2
380  shift
381  fi
382  ;;
383 
384  # Sam dataset definition name.
385  --sam_defname )
386  if [ $# -gt 1 ]; then
387  SAM_DEFNAME=$2
388  USE_SAM=1
389  shift
390  fi
391  ;;
392 
393  # Sam project name.
394  --sam_project )
395  if [ $# -gt 1 ]; then
396  SAM_PROJECT=$2
397  USE_SAM=1
398  shift
399  fi
400  ;;
401 
402  # Sam start/stop project flag.
403  --sam_start )
404  SAM_START=1
405  ;;
406 
407  # Recursive flag.
408  --recur )
409  RECUR=1
410  ;;
411 
412  # Sam schema.
413  --sam_schema )
414  if [ $# -gt 1 ]; then
415  SAM_SCHEMA=$2
416  shift
417  fi
418  ;;
419 
420  # General arguments for lar command line.
421  --args )
422  if [ $# -gt 1 ]; then
423  shift
424  ARGS=$@
425  break
426  fi
427  ;;
428 
429  # Top level ups products (comma-separated list).
430  --ups )
431  if [ $# -gt 1 ]; then
432  UPS_PRDS=$2
433  shift
434  fi
435  ;;
436 
437  # Release tag.
438  -r|--release )
439  if [ $# -gt 1 ]; then
440  REL=$2
441  shift
442  fi
443  ;;
444 
445  # Release build qualifier.
446  -q|-b|--build )
447  if [ $# -gt 1 ]; then
448  QUAL=$2
449  shift
450  fi
451  ;;
452 
453  # Local test release directory.
454  --localdir )
455  if [ $# -gt 1 ]; then
456  LOCALDIR=$2
457  shift
458  fi
459  ;;
460 
461  # Local test release tarball.
462  --localtar )
463  if [ $# -gt 1 ]; then
464  LOCALTAR=$2
465  shift
466  fi
467  ;;
468 
469  # MRB flag.
470  --mrb )
471  ;;
472 
473  # SRT flag.
474  --srt )
475  echo "SRT run time environment is no longer supported."
476  exit 1
477  ;;
478 
479  # Interactive flag.
480  -i|--interactive )
481  INTERACTIVE=1
482  ;;
483 
484  # Grid flag.
485  -g|--grid )
486  GRID=1
487  ;;
488 
489  # Group.
490  --group )
491  if [ $# -gt 1 ]; then
492  GRP=$2
493  shift
494  fi
495  ;;
496 
497  # Work directory.
498  --workdir )
499  if [ $# -gt 1 ]; then
500  WORKDIR=$2
501  shift
502  fi
503  ;;
504 
505  # Output directory.
506  --outdir )
507  if [ $# -gt 1 ]; then
508  OUTDIR=$2
509  shift
510  fi
511  ;;
512 
513  # Log directory.
514  --logdir )
515  if [ $# -gt 1 ]; then
516  LOGDIR=$2
517  shift
518  fi
519  ;;
520 
521  # Scratch directory.
522  --scratch )
523  if [ $# -gt 1 ]; then
524  SCRATCH=$2
525  shift
526  fi
527  ;;
528 
529  # Job cluster.
530  --cluster )
531  if [ $# -gt 1 ]; then
532  CLUS=$2
533  shift
534  fi
535  ;;
536 
537  # Process within cluster.
538  --process )
539  if [ $# -gt 1 ]; then
540  PROC=$2
541  shift
542  fi
543  ;;
544 
545  # Process map.
546  --procmap )
547  if [ $# -gt 1 ]; then
548  PROCMAP=$2
549  shift
550  fi
551  ;;
552 
553  # User initialization script.
554  --init-script )
555  if [ $# -gt 1 ]; then
556  INITSCRIPT=$2
557  shift
558  fi
559  ;;
560 
561  # User source initialization script.
562  --init-source )
563  if [ $# -gt 1 ]; then
564  INITSOURCE=$2
565  shift
566  fi
567  ;;
568 
569  # User end-of-job script.
570  --end-script )
571  if [ $# -gt 1 ]; then
572  ENDSCRIPT=$2
573  shift
574  fi
575  ;;
576 
577  # Declare good output root files to SAM.
578  --declare )
579  DECLARE_IN_JOB=1
580  ;;
581 
582  # Run validation steps in project.py on root outputs directly in the job.
583  --validate )
584  VALIDATE_IN_JOB=1
585  ;;
586 
587  # Copy Output to FTS.
588  --copy )
589  COPY_TO_FTS=1
590  ;;
591 
592  # Mix input sam dataset.
593  --mix_defname )
594  if [ $# -gt 1 ]; then
595  MIX_DEFNAME=$2
596  MIX_SAM=1
597  shift
598  fi
599  ;;
600 
601  # Mix input sam project.
602  --mix_project )
603  if [ $# -gt 1 ]; then
604  MIX_PROJECT=$2
605  MIX_SAM=1
606  shift
607  fi
608  ;;
609 
610  # Alter the output file's parentage such that it's parent(s) are from the input list OR sam process
611  --maintain_parentage )
612  MAINTAIN_PARENTAGE=1
613  ;;
614 
615  # Specify alternate art-like executable.
616  --exe )
617  if [ $# -gt 1 ]; then
618  EXE=$2
619  shift
620  fi
621  ;;
622 
623  # Specify environment initialization script path.
624  --init )
625  if [ $# -gt 1 ]; then
626  INIT=$2
627  shift
628  fi
629  ;;
630 
631  # Other.
632  * )
633  echo "Unknown option $1"
634  exit 1
635  esac
636  shift
637 done
638 
639 #echo "FCL=$FCL"
640 #echo "INFILE=$INFILE"
641 #echo "INLIST=$INLIST"
642 #echo "OUTFILE=$OUTFILE"
643 #echo "TFILE=$TFILE"
644 #echo "NEVT=$NEVT"
645 #echo "NSKIP=$NSKIP"
646 #echo "NFILE=$NFILE"
647 #echo "NFILE_SKIP=$NFILE_SKIP"
648 #echo "NJOBS=$NJOBS"
649 #echo "ARGS=$ARGS"
650 #echo "REL=$REL"
651 #echo "QUAL=$QUAL"
652 #echo "LOCALDIR=$LOCALDIR"
653 #echo "LOCALTAR=$LOCALTAR"
654 #echo "INTERACTIVE=$INTERACTIVE"
655 #echo "GRP=$GRP"
656 #echo "WORKDIR=$WORKDIR"
657 #echo "OUTDIR=$OUTDIR"
658 #echo "LOGDIR=$LOGDIR"
659 #echo "SCRATCH=$SCRATCH"
660 #echo "CLUS=$CLUS"
661 #echo "PROC=$PROC"
662 #echo "INITSCRIPT=$INITSCRIPT"
663 #echo "INITSOURCE=$INITSOURCE"
664 #echo "ENDSCRIPT=$ENDSCRIPT"
665 #echo "VALIDATE_IN_JOB=$VALIDATE_IN_JOB"
666 
667 # Done with arguments.
668 
669 echo "Nodename: `hostname -f`"
670 id
671 echo "Load average:"
672 cat /proc/loadavg
673 
674 # Set defaults.
675 
676 if [ x$QUAL = x ]; then
677  QUAL="prof:e9"
678 fi
679 
680 if [ x$SAM_GROUP = x ]; then
681  SAM_GROUP=$GRP
682 fi
683 
684 if [ x$SAM_STATION = x ]; then
685  SAM_STATION=$GRP
686 fi
687 
688 # Standardize sam_schema (xrootd -> root).
689 
690 if [ x$SAM_SCHEMA = xxrootd ]; then
691  SAM_SCHEMA=root
692 fi
693 
694 # Fix for sites with newer linux kernels:
695 
696 case `uname -r` in
697  3.*) export UPS_OVERRIDE="-H Linux64bit+2.6-2.12";;
698  4.*) export UPS_OVERRIDE="-H Linux64bit+2.6-2.12";;
699 esac
700 echo "uname -r: `uname -r`"
701 echo "UPS_OVERRIDE: $UPS_OVERRIDE"
702 
703 # Make sure work directory is defined and exists.
704 
705 if [ x$WORKDIR = x ]; then
706  echo "Work directory not specified."
707  exit 1
708 fi
709 if [ $GRID -eq 0 -a ! -d $WORKDIR ]; then
710  echo "Work directory $WORKDIR does not exist."
711  exit 1
712 fi
713 echo "Work directory: $WORKDIR"
714 
715 
716 
717 echo "Condor dir input: $CONDOR_DIR_INPUT"
718 
719 # Initialize experiment ups products and mrb.
720 
721 echo "Initializing ups and mrb."
722 
723 if [ x$INIT != x ]; then
724  if [ ! -f $INIT ]; then
725  echo "Environment initialization script $INIT not found."
726  exit 1
727  fi
728  echo "Sourcing $INIT"
729  source $INIT
730 else
731  echo "Sourcing setup_experiment.sh"
732  source ${CONDOR_DIR_INPUT}/setup_experiment.sh
733 fi
734 
735 echo PRODUCTS=$PRODUCTS
736 
737 # Ifdh may already be setup by jobsub wrapper.
738 # If not, set it up here.
739 
740 echo "IFDHC_DIR=$IFDHC_DIR"
741 if [ x$IFDHC_DIR = x ]; then
742  echo "Setting up ifdhc, because jobsub did not set it up."
743  setup ifdhc
744 fi
745 echo "IFDHC_DIR=$IFDHC_DIR"
746 
747 # Set GROUP environment variable.
748 
749 unset GROUP
750 if [ x$GRP != x ]; then
751  GROUP=$GRP
752 else
753  echo "GROUP not specified."
754  exit 1
755 fi
756 export GROUP
757 echo "Group: $GROUP"
758 
759 # Set options for ifdh.
760 
761 if [ $GRID -ne 0 ]; then
762 
763  # Figure out if this is a production job.
764  # This option is only used when copying back output.
765  # It affects the ownership of copied back files.
766 
767  echo "X509_USER_PROXY = $X509_USER_PROXY"
768  #if ! echo $X509_USER_PROXY | grep -q Production; then
769  # FORCE=expgridftp
770  # IFDH_OPT="--force=$FORCE"
771  #else
772  # FORCE=gridftp
773  # IFDH_OPT="--force=$FORCE"
774  #fi
775 fi
776 echo "IFDH_OPT=$IFDH_OPT"
777 
778 # Make sure fcl file argument was specified.
779 
780 if [ x$FCL = x ]; then
781  echo "No configuration option (-c|--config) was specified."
782  exit 1
783 fi
784 
785 # Make sure output directory exists and is writable.
786 
787 if [ x$OUTDIR = x ]; then
788  echo "Output directory not specified."
789  exit 1
790 fi
791 if [ $GRID -eq 0 -a \( ! -d $OUTDIR -o ! -w $OUTDIR \) ]; then
792  echo "Output directory $OUTDIR does not exist or is not writable."
793  exit 1
794 fi
795 echo "Output directory: $OUTDIR"
796 
797 # Make sure log directory exists and is writable.
798 
799 if [ x$LOGDIR = x ]; then
800  echo "Log directory not specified."
801  exit 1
802 fi
803 if [ $GRID -eq 0 -a \( ! -d $LOGDIR -o ! -w $LOGDIR \) ]; then
804  echo "Log directory $LOGDIR does not exist or is not writable."
805  exit 1
806 fi
807 echo "Log directory: $LOGDIR"
808 
809 # See if we need to set umask for group write.
810 
811 if [ $GRID -eq 0 ]; then
812  OUTUSER=`stat -c %U $OUTDIR`
813  LOGUSER=`stat -c %U $LOGDIR`
814  CURUSER=`whoami`
815  if [ $OUTUSER != $CURUSER -o $LOGUSER != $CURUSER ]; then
816  echo "Setting umask for group write."
817  umask 002
818  fi
819 fi
820 
821 # Make sure scratch directory is defined.
822 # For batch, the scratch directory is always $_CONDOR_SCRATCH_DIR
823 # For interactive, the scratch directory is specified by option
824 # --scratch or --outdir.
825 
826 if [ $INTERACTIVE -eq 0 ]; then
827  SCRATCH=$_CONDOR_SCRATCH_DIR
828 else
829  if [ x$SCRATCH = x ]; then
830  SCRATCH=$OUTDIR
831  fi
832 fi
833 if [ x$SCRATCH = x -o ! -d "$SCRATCH" -o ! -w "$SCRATCH" ]; then
834  echo "Local scratch directory not defined or not writable."
835  exit 1
836 fi
837 
838 # Create the scratch directory in the condor scratch diretory.
839 # Copied from condor_lBdetMC.sh.
840 # Scratch directory path is stored in $TMP.
841 # Scratch directory is automatically deleted when shell exits.
842 
843 # Do not change this section.
844 # It creates a temporary working directory that automatically cleans up all
845 # leftover files at the end.
846 TMP=`mktemp -d ${SCRATCH}/working_dir.XXXXXXXXXX`
847 TMP=${TMP:-${SCRATCH}/working_dir.$$}
848 
849 { [[ -n "$TMP" ]] && mkdir -p "$TMP"; } || \
850  { echo "ERROR: unable to create temporary directory!" 1>&2; exit 1; }
851 trap "[[ -n \"$TMP\" ]] && { rm -rf \"$TMP\"; }" 0
852 chmod 755 $TMP
853 cd $TMP
854 # End of the section you should not change.
855 
856 echo "Scratch directory: $TMP"
857 
858 # Copy files from work directory to scratch directory.
859 
860 echo "No longer fetching files from work directory."
861 echo "that's now done with using jobsub -f commands"
862 mkdir work
863 cp ${CONDOR_DIR_INPUT}/* ./work/
864 cd work
865 find . -name \*.tar -exec tar xf {} \;
866 find . -name \*.py -exec chmod +x {} \;
867 find . -name \*.sh -exec chmod +x {} \;
868 echo "Local working directoroy:"
869 pwd
870 ls
871 echo
872 
873 # Save the hostname and condor job id.
874 
875 hostname > hostname.txt
876 echo ${CLUSTER}.${PROCESS} > jobid.txt
877 
878 # Set default CLUSTER and PROCESS environment variables for interactive jobs.
879 
880 if [ $INTERACTIVE -ne 0 ]; then
881  CLUSTER=`date +%s` # From time stamp.
882  PROCESS=0 # Default zero for interactive.
883 fi
884 
885 # Override CLUSTER and PROCESS from command line options.
886 
887 if [ x$CLUS != x ]; then
888  CLUSTER=$CLUS
889 fi
890 if [ x$PROC != x ]; then
891  PROCESS=$PROC
892 fi
893 if [ x$PROCMAP != x ]; then
894  if [ -f $PROCMAP ]; then
895  PROCESS=`sed -n $(( $PROCESS + 1 ))p $PROCMAP`
896  else
897  echo "Process map file $PROCMAP not found."
898  exit 1
899  fi
900 fi
901 if [ x$CLUSTER = x ]; then
902  echo "CLUSTER not specified."
903  exit 1
904 fi
905 if [ x$PROCESS = x ]; then
906  echo "PROCESS not specified."
907  exit 1
908 fi
909 echo "Procmap: $PROCMAP"
910 echo "Cluster: $CLUSTER"
911 echo "Process: $PROCESS"
912 
913 # Construct name of output subdirectory.
914 
915 OUTPUT_SUBDIR=${CLUSTER}_${PROCESS}
916 echo "Output subdirectory: $OUTPUT_SUBDIR"
917 
918 # Make sure fcl file exists.
919 
920 if [ ! -f $FCL ]; then
921  echo "Configuration file $FCL does not exist."
922  exit 1
923 fi
924 
925 # Make sure init script exists and is executable (if specified).
926 
927 if [ x$INITSCRIPT != x ]; then
928  if [ -f "$INITSCRIPT" ]; then
929  chmod +x $INITSCRIPT
930  else
931  echo "Initialization script $INITSCRIPT does not exist."
932  exit 1
933  fi
934 fi
935 
936 # Make sure init source script exists (if specified).
937 
938 if [ x$INITSOURCE != x -a ! -f "$INITSOURCE" ]; then
939  echo "Initialization source script $INITSOURCE does not exist."
940  exit 1
941 fi
942 
943 # Make sure end-of-job script exists and is executable (if specified).
944 
945 if [ x$ENDSCRIPT != x ]; then
946  if [ -f "$ENDSCRIPT" ]; then
947  chmod +x $ENDSCRIPT
948  else
949  echo "Initialization script $ENDSCRIPT does not exist."
950  exit 1
951  fi
952 fi
953 
954 # MRB run time environment setup goes here.
955 
956 # Setup local test release, if any.
957 
958 if [ x$LOCALDIR != x ]; then
959  mkdir $TMP/local
960  cd $TMP/local
961 
962  # Copy test release directory recursively.
963 
964  echo "Copying local test release from directory ${LOCALDIR}."
965 
966  # Make sure ifdhc is setup.
967 
968  if [ x$IFDHC_DIR = x ]; then
969  echo "Setting up ifdhc before fetching local directory."
970  setup ifdhc
971  fi
972  echo "IFDHC_DIR=$IFDHC_DIR"
973  ifdh cp -r $IFDH_OPT $LOCALDIR .
974  stat=$?
975  if [ $stat -ne 0 ]; then
976  echo "ifdh cp failed with status ${stat}."
977  exit $stat
978  fi
979  find . -name \*.py -exec chmod +x {} \;
980  find . -name \*.sh -exec chmod +x {} \;
981 
982  # Setup the environment.
983 
984  cd $TMP/work
985  echo "Initializing localProducts from ${LOCALDIR}."
986  if [ ! -f $TMP/local/setup ]; then
987  echo "Local test release directory $LOCALDIR does not contain a setup script."
988  exit 1
989  fi
990  sed "s@setenv MRB_INSTALL.*@setenv MRB_INSTALL ${TMP}/local@" $TMP/local/setup | \
991  sed "s@setenv MRB_TOP.*@setenv MRB_TOP ${TMP}@" > $TMP/local/setup.local
992  . $TMP/local/setup.local
993  #echo "MRB_INSTALL=${MRB_INSTALL}."
994  #echo "MRB_QUALS=${MRB_QUALS}."
995  echo "Setting up all localProducts."
996  if [ x$IFDHC_DIR != x ]; then
997  unsetup ifdhc
998  fi
999  mrbslp
1000 fi
1001 cd $TMP/work
1002 
1003 # Setup local larsoft test release from tarball.
1004 
1005 if [ x$LOCALTAR != x ]; then
1006  mkdir $TMP/local
1007  cd $TMP/local
1008 
1009  # Fetch the tarball.
1010 
1011  echo "Fetching test release tarball ${LOCALTAR}."
1012 
1013  # Make sure ifdhc is setup.
1014 
1015  if [ x$IFDHC_DIR = x ]; then
1016  echo "Setting up ifdhc before fetching tarball."
1017  setup ifdhc
1018  fi
1019  echo "IFDHC_DIR=$IFDHC_DIR"
1020  ifdh cp $LOCALTAR local.tar
1021  stat=$?
1022  if [ $stat -ne 0 ]; then
1023  echo "ifdh cp failed with status ${stat}."
1024  exit $stat
1025  fi
1026 
1027  # Extract the tarball.
1028 
1029  tar -xf local.tar
1030 
1031  # Setup the environment.
1032 
1033  cd $TMP/work
1034  echo "Initializing localProducts from tarball ${LOCALTAR}."
1035  sed "s@setenv MRB_INSTALL.*@setenv MRB_INSTALL ${TMP}/local@" $TMP/local/setup | \
1036  sed "s@setenv MRB_TOP.*@setenv MRB_TOP ${TMP}@" > $TMP/local/setup.local
1037  . $TMP/local/setup.local
1038  #echo "MRB_INSTALL=${MRB_INSTALL}."
1039  #echo "MRB_QUALS=${MRB_QUALS}."
1040  echo "Setting up all localProducts."
1041  if [ x$IFDHC_DIR != x ]; then
1042  unsetup ifdhc
1043  fi
1044  mrbslp
1045 fi
1046 
1047 # Setup specified version of top level run time products
1048 # (if specified, and if local test release did not set them up).
1049 
1050 for prd in `echo $UPS_PRDS | tr , ' '`
1051 do
1052  if ! ups active | grep -q $prd; then
1053  echo "Setting up $prd $REL -q ${QUAL}."
1054  if [ x$IFDHC_DIR != x -a x$IFBEAM_DIR = x ]; then
1055  unsetup ifdhc
1056  fi
1057  setup $prd $REL -q $QUAL
1058  fi
1059 done
1060 
1061 ups active
1062 
1063 cd $TMP/work
1064 
1065 # In case mrb setup didn't setup a version of ifdhc, set up ifdhc again.
1066 
1067 if [ x$IFDHC_DIR = x ]; then
1068  echo "Setting up ifdhc again, because larsoft did not set it up."
1069  setup ifdhc
1070 fi
1071 echo "IFDH_ART_DIR=$IFDH_ART_DIR"
1072 echo "IFDHC_DIR=$IFDHC_DIR"
1073 
1074 # Get input files to process, either single file, file list, or sam.
1075 #
1076 # For non-sam non-xrootd input, copy all files local using ifdh cp, and make a
1077 # local file list called condor_lar_input.list. Save the remote file names (uri's)
1078 # in another file called transferred_uris.list
1079 #
1080 # For non-sam xrootd input ("--sam_schema root") convert input list to xrootd uri's,
1081 # if possible.
1082 
1083 rm -f condor_lar_input.list
1084 rm -f transferred_uris.list
1085 NFILE_TOTAL=0
1086 parent_files=()
1087 aunt_files=() #for data overaly, the data files being brought in are the output's aunts.
1088 
1089 if [ $USE_SAM -eq 0 -a x$INFILE != x ]; then
1090 
1091  # Single file case.
1092 
1093  # Don't allow any list-related options in single file case:
1094  # -S, --source-list, --nfile, --nfile_skip
1095 
1096  if [ x$INLIST != x -o $NFILE -ne 0 -o $NFILE_SKIP -ne 0 ]; then
1097  echo "File list options specified with single input file."
1098  exit 1
1099  fi
1100 
1101  #set the parent file to be the input file
1102  parent_files=("${parent_files[@]}" $INFILE)
1103 
1104  # Copy input file to scratch directoroy or convert to xrootd url.
1105 
1106  NFILE_TOTAL=1
1107  XROOTD_URI=$INFILE
1108  if [ x$SAM_SCHEMA = xroot ]; then
1109  XROOTD_URI=`file_to_url.sh $INFILE`
1110  fi
1111  if [ $XROOTD_URI != $INFILE ]; then
1112  echo $INFILE > transferred_uris.list
1113  echo $XROOTD_URI > condor_lar_input.list
1114  echo "Input xrootd uri: $XROOTD_URI"
1115  else
1116  LOCAL_INFILE=`basename $INFILE`
1117  echo "Copying $INFILE"
1118  ifdh cp $INFILE $LOCAL_INFILE
1119  stat=$?
1120  if [ $stat -ne 0 ]; then
1121  echo "ifdh cp failed with status ${stat}."
1122  exit $stat
1123  fi
1124  if [ -f $LOCAL_INFILE -a $stat -eq 0 ]; then
1125  echo $INFILE > transferred_uris.list
1126  echo $LOCAL_INFILE > condor_lar_input.list
1127  else
1128  echo "Error fetching input file ${INFILE}."
1129  exit 1
1130  fi
1131  fi
1132 
1133 elif [ $USE_SAM -eq 0 -a x$INLIST != x ]; then
1134 
1135  # Input list case.
1136 
1137  # Make sure input file list exists.
1138 
1139  if [ ! -f $INLIST ]; then
1140  echo "Input file list $INLIST does not exist."
1141  exit 1
1142  fi
1143 
1144  # Remember how many files are in the input file list.
1145 
1146  NFILE_TOTAL=`cat $INLIST | wc -l`
1147  echo "Input file list contains $NFILE_TOTAL total files."
1148 
1149  # Clamp the total number of files to be a maximum of NFILE * NJOBS, where
1150  # NFILE and NJOBS are specified via command line options. In project.py
1151  # terms, NFILE is <maxfilesperjob> and NOJBS is <numjobs>.
1152 
1153  MAX_TOTAL=$(( $NFILE * $NJOBS ))
1154  if [ $MAX_TOTAL -gt 0 -a $NFILE_TOTAL -gt $MAX_TOTAL ]; then
1155  NFILE_TOTAL=$MAX_TOTAL
1156  echo "Number of files to be processed will be limited to ${NFILE_TOTAL}."
1157  fi
1158 
1159  # If --njobs was specified, calculate how many files
1160  # to skip and process in this worker.
1161 
1162  if [ $NJOBS -ne 0 ]; then
1163 
1164  # Don't allow option --nfile_skip in this case.
1165 
1166  if [ $NFILE_SKIP -ne 0 ]; then
1167  echo "Illegal options specified with --njobs."
1168  exit 1
1169  fi
1170 
1171  # Clamp NJOBS to be a maximum of $NFILE_TOTAL.
1172  # This means that workers with $PROCESS >= $NFILE_TOTAL will not have
1173  # any input files to process.
1174 
1175  MYNJOBS=$NJOBS
1176  if [ $MYNJOBS -gt $NFILE_TOTAL ]; then
1177  MYNJOBS=$NFILE_TOTAL
1178  fi
1179 
1180  # Calculate number of files to skip and number of files to process.
1181 
1182  NFILE_SKIP=$(( $PROCESS * $NFILE_TOTAL / $MYNJOBS ))
1183  MYNFILE=$(( ( $PROCESS + 1 ) * $NFILE_TOTAL / $MYNJOBS - $NFILE_SKIP ))
1184  if [ $MYNFILE -eq 0 -o $NFILE_SKIP -ge $NFILE_TOTAL ]; then
1185  echo "This worker did not get any input files."
1186  exit 1
1187  fi
1188  if [ $MYNFILE -lt $NFILE -o $NFILE -eq 0 ]; then
1189  NFILE=$MYNFILE
1190  fi
1191  fi
1192 
1193  # Report number of files to skip and process.
1194 
1195  echo "Skipping $NFILE_SKIP files."
1196  if [ $NFILE -eq 0 ]; then
1197  echo "Processing all remaining files."
1198  else
1199  echo "Processing $NFILE files."
1200  fi
1201 
1202  # Copy input files and construct local input file list.
1203 
1204  nfile=0
1205  nfskip=$NFILE_SKIP
1206  nmax=$NFILE
1207  while read infile; do
1208  if [ $nfskip -gt 0 ]; then
1209  nfskip=$(( $nfskip - 1 ))
1210  else
1211 
1212  # Retain the original file name as the local file name, if possible.
1213  # Otherwise, generate a new (hopefully) unique name.
1214 
1215  if [ ! -f condor_lar_input.list ]; then
1216  touch condor_lar_input.list
1217  fi
1218 
1219  XROOTD_URI=$infile
1220  if [ x$SAM_SCHEMA = xroot ]; then
1221  XROOTD_URI=`file_to_url.sh $infile`
1222  fi
1223  if [ $XROOTD_URI != $infile ]; then
1224  echo $infile >> transferred_uris.list
1225  echo $XROOTD_URI >> condor_lar_input.list
1226  echo "Input xrootd uri: $XROOTD_URI"
1227  else
1228  LOCAL_INFILE=`basename $infile`
1229  if grep -q $LOCAL_INFILE condor_lar_input.list; then
1230  LOCAL_INFILE=input${nfile}.root
1231  if [ "$INMODE" = 'textfile' ]; then
1232  LOCAL_INFILE=input${nfile}.txt
1233  fi
1234  fi
1235  echo "Copying $infile"
1236  ifdh cp $infile $LOCAL_INFILE
1237  stat=$?
1238  if [ $stat -ne 0 ]; then
1239  echo "ifdh cp failed with status ${stat}."
1240  exit $stat
1241  fi
1242  if [ -f $LOCAL_INFILE -a $stat -eq 0 ]; then
1243  echo $infile >> transferred_uris.list
1244  echo $LOCAL_INFILE >> condor_lar_input.list
1245  parent_files=("${parent_files[@]}" $LOCAL_INFILE)
1246  else
1247  echo "Error fetching input file ${infile}."
1248  exit 1
1249  fi
1250  fi
1251  nmax=$(( $nmax - 1 ))
1252  if [ $nmax -eq 0 ]; then
1253  break
1254  fi
1255  fi
1256  nfile=$(( $nfile + 1 ))
1257  done < $INLIST
1258 fi
1259 
1260 NFILE_LOCAL=0
1261 if [ $USE_SAM -eq 0 -a x$SAM_SCHEMA != xroot ]; then
1262  if [ -f condor_lar_input.list ]; then
1263 
1264  # Sort input list by decreasing size so we don't get a file with
1265  # zero events as the first file.
1266 
1267  #ls -S1 `cat condor_lar_input.list` > condor_lar_input.list
1268  xargs ls -s1 < condor_lar_input.list | sort -nr | awk '{print $2}' > newcondor_lar_input.list
1269  mv -f newcondor_lar_input.list condor_lar_input.list
1270  echo "Local input file list:"
1271  cat condor_lar_input.list
1272  NFILE_LOCAL=`cat condor_lar_input.list | wc -l`
1273  else
1274  echo "No local input files."
1275  fi
1276  echo "Local input list has $NFILE_LOCAL files."
1277 fi
1278 
1279 #Break the master wrapper fcl into each stage
1280 nfcls=0
1281 
1282 while read -r line
1283 do
1284 
1285  if [ "$(echo $line | awk '{print $1}')" = "#---STAGE" ]; then
1286  stage="$(echo $line | awk '{print $2}')"
1287  stage_fcl="Stage$stage.fcl"
1288  nfcls=$(( $nfcls + 1 ))
1289  continue
1290  fi
1291 
1292  if [ "$line" = "#---END_STAGE" ]; then
1293  #cat EOF >> $fcl
1294  continue
1295  fi
1296  echo $line >> $stage_fcl
1297 done < $FCL
1298 
1299 #We now have nStage fcl files, each which need to be run serially
1300 stage=0
1301 
1302 echo "Start loop over stages"
1303 while [ $stage -lt $nfcls ]; do
1304  FCL="Stage$stage.fcl"
1305 
1306  # In case no input files were specified, and we are not getting input
1307  # from sam (i.e. mc generation), recalculate the first event number,
1308  # the subrun number, and the number of events to generate in this worker.
1309  # This also applies to the textfile inputmode.
1310  # Note this only applies to the first stage by definition
1311 
1312  if [ $stage -eq 0 -a $USE_SAM -eq 0 ] && [ $NFILE_TOTAL -eq 0 -o "$INMODE" = 'textfile' ]; then #need to ask what is going on here
1313 
1314 
1315  # Don't allow --nskip.
1316 
1317  if [ $NSKIP -gt 0 ]; then
1318  echo "Illegal option --nskip specified with no input."
1319  exit 1
1320  fi
1321 
1322  # Do calculation.
1323 
1324  NSKIP=$(( $PROCESS * $NEVT / $NJOBS ))
1325  NEV=$(( ( $PROCESS + 1 ) * $NEVT / $NJOBS - $NSKIP ))
1326  FIRST_EVENT=$(( $NSKIP + 1 ))
1327  NSKIP=0
1328  NEVT=$NEV
1329 
1330  # Set subrun=$PROCESS+1 in a wrapper fcl file.
1331 
1332  SUBRUN=$(( $PROCESS + 1))
1333  cat <<EOF > subrun_wrapper.fcl
1334 #include "$FCL"
1335 
1336 source.firstSubRun: $SUBRUN
1337 
1338 EOF
1339  if [ "$INMODE" = 'textfile' ]; then
1340 
1341  if [ $NFILE_LOCAL -ne 1 ]; then
1342  echo "Text file input mode specified with wrong number of input files."
1343  exit 1
1344  fi
1345  echo "physics.producers.generator.InputFileName: \"`cat condor_lar_input.list`\"" >> subrun_wrapper.fcl
1346  fi
1347 
1348  FCL=subrun_wrapper.fcl
1349 
1350  echo "First MC event: $FIRST_EVENT"
1351  echo "MC subrun: $SUBRUN"
1352  echo "Number of MC events: $NEVT"
1353 
1354  fi
1355 
1356  # Sam stuff for main input.
1357 
1358  PURL=''
1359  CPID=''
1360  if [ $USE_SAM -ne 0 -a $stage -eq 0 ]; then
1361  echo "In SAM if"
1362 
1363  # Make sure a project name has been specified.
1364 
1365  if [ x$SAM_PROJECT = x ]; then
1366  echo "No sam project was specified."
1367  exit 1
1368  fi
1369  echo "Sam project: $SAM_PROJECT"
1370 
1371  # Start project (if requested).
1372 
1373  if [ $SAM_START -ne 0 ]; then
1374  if [ x$SAM_DEFNAME != x ]; then
1375 
1376  # Do some preliminary tests on the input dataset definition.
1377  # If dataset definition returns zero files at this point, abort the job.
1378  # If dataset definition returns too many files compared to --nfile, create
1379  # a new dataset definition by adding a "with limit" clause.
1380 
1381  nf=`ifdh translateConstraints "defname: $SAM_DEFNAME" | wc -l`
1382  if [ $nf -eq 0 ]; then
1383  echo "Input dataset $SAM_DEFNAME is empty."
1384  exit 1
1385  fi
1386  if [ $NFILE -ne 0 -a $nf -gt $NFILE ]; then
1387  limitdef=${SAM_DEFNAME}_limit_$NFILE
1388 
1389  # Check whether limit def already exists.
1390  # Have to parse commd output because ifdh returns wrong status.
1391 
1392  existdef=`ifdh describeDefinition $limitdef 2>/dev/null | grep 'Definition Name:' | wc -l`
1393  if [ $existdef -gt 0 ]; then
1394  echo "Using already created limited dataset definition ${limitdef}."
1395  else
1396  ifdh createDefinition $limitdef "defname: $SAM_DEFNAME with limit $NFILE" $SAM_USER $SAM_GROUP
1397 
1398  # Assume command worked, because it returns the wrong status.
1399 
1400  echo "Created limited dataset definition ${limitdef}."
1401  fi
1402 
1403  # If we get to here, we know that we want to user $limitdef instead of $SAM_DEFNAME
1404  # as the input sam dataset definition.
1405 
1406  SAM_DEFNAME=$limitdef
1407  fi
1408 
1409  # If recursive flag, take snapshot of input dataset.
1410 
1411  if [ $RECUR -ne 0 ]; then
1412  echo "Forcing snapshot"
1413  SAM_DEFNAME=${SAM_DEFNAME}:force
1414  fi
1415 
1416  # Start the project.
1417 
1418  echo "Starting project $SAM_PROJECT using sam dataset definition $SAM_DEFNAME"
1419  ifdh startProject $SAM_PROJECT $SAM_STATION $SAM_DEFNAME $SAM_USER $SAM_GROUP
1420  if [ $? -eq 0 ]; then
1421  echo "Start project succeeded."
1422  else
1423  echo "Start projet failed."
1424  exit 1
1425  fi
1426  fi
1427 
1428  if [ x$SAM_DEFNAME = x ]; then
1429 
1430  echo "Start project requested, but no definition was specified."
1431  exit 1
1432  fi
1433 
1434  fi
1435 
1436 
1437  # Get the project url of a running project (maybe the one we just started,
1438  # or maybe started externally). This command has to succeed, or we can't
1439  # continue.
1440 
1441  PURL=`ifdh findProject $SAM_PROJECT $SAM_STATION`
1442  if [ x$PURL = x ]; then
1443  echo "Unable to find url for project ${SAM_PROJECT}."
1444  exit 1
1445  else
1446  echo "Project url: $PURL"
1447  fi
1448 
1449  # Start the consumer process. This command also has to succeed.
1450 
1451  NODE=`hostname`
1452  APPFAMILY=art
1453 
1454  # Parse fcl file to extract process_name, and use that
1455  # as the application name for starting the consumer process.
1456 
1457  APPNAME=`fhicl-dump $FCL | grep process_name: | tr -d '"' | awk '{print $2}'`
1458  if [ $? -ne 0 ]; then
1459  echo "fhicl-dump $FCL failed to run. May be missing a ups product, library, or fcl file."
1460  exit 1
1461  fi
1462  if [ x$APPNAME = x ]; then
1463  echo "Trouble determining application name."
1464  echo "cat $FCL"
1465  cat $FCL
1466  exit 1
1467  fi
1468 
1469  echo "Starting consumer process."
1470  echo "ifdh establishProcess $PURL $APPNAME $REL $NODE $SAM_USER $APPFAMILY $FCL $NFILE $SAM_SCHEMA"
1471  CPID=`ifdh establishProcess $PURL $APPNAME $REL $NODE $SAM_USER $APPFAMILY $FCL $NFILE $SAM_SCHEMA`
1472  if [ x$CPID = x ]; then
1473  echo "Unable to start consumer process for project url ${PURL}."
1474  exit 1
1475  else
1476  echo "Consumer process id $CPID"
1477  fi
1478 
1479  # Stash away the project name and consumer process id in case we need them
1480  # later for bookkeeping.
1481 
1482  echo $SAM_PROJECT > sam_project.txt
1483  echo $CPID > cpid.txt
1484 
1485  fi
1486 
1487  # Sam stuff for secondary input.
1488 
1489  if [ $MIX_SAM -ne 0 ]; then
1490  echo "In Mix SAM if"
1491 
1492  # Make sure a project name has been specified.
1493 
1494  if [ x$MIX_PROJECT = x ]; then
1495  echo "No mix sam project was specified."
1496  exit 1
1497  fi
1498  echo "Mix project: $MIX_PROJECT"
1499 
1500  # Start mix project (if requested).
1501 
1502  if [ $SAM_START -ne 0 ]; then
1503  if [ x$MIX_DEFNAME != x ]; then
1504 
1505  echo "Starting project $MIX_PROJECT using sam dataset definition $MIX_DEFNAME"
1506  ifdh startProject $MIX_PROJECT $SAM_STATION $MIX_DEFNAME $SAM_USER $SAM_GROUP
1507  if [ $? -eq 0 ]; then
1508  echo "Start project succeeded."
1509  else
1510  echo "Start projet failed."
1511  exit 1
1512  fi
1513  fi
1514 
1515  if [ x$MIX_DEFNAME = x ]; then
1516 
1517  echo "Start project requested, but no mix definition was specified."
1518  exit 1
1519  fi
1520  fi
1521  fi
1522 
1523  #Figure out output file names.
1524  #If outfile is not defined and we are inputing a single file or file list, follow our
1525  #convention that the output file should be %inputfilename_%systemtime_stage.root
1526 
1527  # Construct options for lar command line.
1528 
1529  LAROPT="-c $FCL --rethrow-default"
1530  echo "Laropt: $LAROPT"
1531  if [ -f condor_lar_input.list -a $stage -eq 0 ]; then
1532  if [ "$INMODE" != 'textfile' ]; then
1533  LAROPT="$LAROPT -S condor_lar_input.list" #artroot files to read in
1534  #AOUTFILE=`cat condor_lar_input.list`
1535  fi
1536  fi
1537 
1538  if [ x$OUTFILE != x ]; then
1539  LAROPT="$LAROPT -o `basename $OUTFILE .root`$stage.root"
1540  outstem=`basename $OUTFILE .root`
1541  fi
1542 
1543  if [ x$TFILE != x ]; then
1544  LAROPT="$LAROPT -T $TFILE"
1545  fi
1546 
1547  if [ $NEVT -ne 0 ]; then
1548  LAROPT="$LAROPT -n $NEVT"
1549  fi
1550 
1551  if [ $NSKIP -ne 0 ]; then
1552  LAROPT="$LAROPT --nskip $NSKIP"
1553  fi
1554 
1555  if [ $FIRST_EVENT -ne 0 ]; then
1556  LAROPT="$LAROPT -e $FIRST_EVENT"
1557  fi
1558 
1559  if [ x$PURL != x -a $stage -eq 0 ]; then
1560  LAROPT="$LAROPT --sam-web-uri $PURL"
1561  fi
1562 
1563  if [ x$CPID != x -a $stage -eq 0 ]; then
1564  LAROPT="$LAROPT --sam-process-id $CPID"
1565  fi
1566 
1567  if [ -n "$ARGS" ]; then
1568  LAROPT="$LAROPT $ARGS"
1569  fi
1570 
1571  if [ $stage -ne 0 ]; then
1572  LAROPT="$LAROPT -s $next_stage_input"
1573  fi
1574 
1575  # Run/source optional initialization scripts.
1576 
1577  if [ x$INITSCRIPT != x ]; then
1578  echo "Running initialization script ${INITSCRIPT}."
1579  if ! ./${INITSCRIPT}; then
1580  exit $?
1581  fi
1582  fi
1583 
1584  if [ x$INITSOURCE != x -a $stage -eq 0 ]; then
1585  echo "Sourcing initialization source script ${INITSOURCE}."
1586  . $INITSOURCE
1587  status=$?
1588  if [ $status -ne 0 ]; then
1589  exit $status
1590  fi
1591  fi
1592 
1593  # Save a copy of the environment, which can be helpful for debugging.
1594 
1595  env > env.txt
1596 
1597  # Save a canonicalized version of the fcl configuration.
1598 
1599  fhicl-dump $FCL > cfgStage$stage.fcl
1600 
1601  # Run lar.
1602  pwd
1603  echo "$EXE $LAROPT"
1604  echo "$EXE $LAROPT" > commandStage$stage.txt
1605  $EXE $LAROPT > larStage$stage.out 2> larStage$stage.err
1606  stat=$?
1607  echo $stat > larStage$stage.stat
1608  echo "$EXE completed with exit status ${stat}."
1609  if [ $stat -ne 0 ]; then
1610  echo
1611  echo "tail -100 larStage$stage.out"
1612  echo
1613  tail -100 larStage$stage.out
1614  echo
1615  echo "tail -100 larStage$stage.err"
1616  echo
1617  tail -100 larStage$stage.err
1618  echo
1619  fi
1620 
1621  # Sam cleanups.
1622 
1623  if [ $USE_SAM -ne 0 -a $stage -eq 0 ]; then
1624 
1625  # Get list of consumed files.
1626 
1627  if [ x$CPID = x -a -f cpid.txt ]; then
1628  CPID=`cat cpid.txt`
1629  fi
1630  ifdh translateConstraints "consumer_process_id $CPID and consumed_status consumed" > consumed_files.list
1631 
1632  # End consumer process.
1633 
1634  ifdh endProcess $PURL $CPID
1635 
1636  # Stop project (if appropriate).
1637 
1638  if [ $SAM_START -ne 0 ]; then
1639  echo "Stopping project."
1640  ifdh endProject $PURL
1641  fi
1642  fi
1643 
1644  #If lar returns a status other than 0, do not move on to other stages
1645  if [ $stat -ne 0 ]; then
1646  break
1647  fi
1648 
1649  if [ $stage -ne 0 ]; then
1650  rm -rf $next_stage_input
1651  fi
1652 
1653  #echo `ls -t1 *.root | egrep -v 'hist|larlite|larcv' | head -n1`
1654 
1655  #echo "Outfile is $OUTFILE"
1656 
1657 
1658  next_stage_input=`ls -t1 *.root | egrep -v 'hist|larlite|larcv|TGraphs' | head -n1`
1659 
1660  mixed_file=`sam_metadata_dumper $next_stage_input | grep mixparent | awk -F ":" '{gsub("\"" ,""); gsub(",",""); gsub(" ",""); print $2}'`
1661 
1662  if [ x$mixed_file != x ]; then
1663  aunt_files=("${aunt_files[@]}" $mixed_file)
1664  fi
1665 
1666  stage=$[$stage +1]
1667  FIRST_EVENT=0 #I don't think this does anything
1668 
1669  #rename the mem and time profile DBs by stage
1670 
1671  if [ -f time.db ]; then
1672  mv time.db time$stage.db
1673  fi
1674  if [ -f mem.db ]; then
1675  mv mem.db mem$stage.db
1676  fi
1677 
1678 done
1679 
1680 
1681 
1682 # Setup up current version of ifdhc (may be different than version setup by larsoft).
1683 
1684 #echo "Setting up current version of ifdhc."
1685 #if [ x$IFDHC_DIR != x ]; then
1686 # unsetup ifdhc
1687 #fi
1688 #setup ifdhc v1_3_2
1689 echo "IFDHC_DIR=$IFDHC_DIR"
1690 
1691 # Secondary sam cleanups.
1692 
1693 if [ $MIX_SAM -ne 0 ]; then
1694 
1695  # Stop project (if appropriate).
1696 
1697  if [ $SAM_START -ne 0 ]; then
1698  echo "Stopping project."
1699  MURL=`ifdh findProject $MIX_PROJECT $SAM_STATION`
1700  ifdh endProject $MURL
1701  fi
1702 fi
1703 
1704 # Delete input files.
1705 
1706 if [ $USE_SAM -eq 0 -a x$SAM_SCHEMA != xroot -a -f condor_lar_input.list ]; then
1707  while read file; do
1708  rm -f $file
1709  done < condor_lar_input.list
1710 fi
1711 
1712 # Run optional end-of-job script.
1713 
1714 if [ x$ENDSCRIPT != x ]; then
1715  echo "Running end-of-job script ${ENDSCRIPT}."
1716  if ! ./${ENDSCRIPT}; then
1717  exit $?
1718  fi
1719 fi
1720 
1721 # Do root file checks.
1722 
1723 # Randomize names of root files that have a corresponding json file.
1724 # These are normally histogram files. Art files do not have external
1725 # json metadata at this point.
1726 
1727 # Also randomize the names of root files if there is no input specified
1728 # for this job (i.e. generator jobs).
1729 
1730 # Also randomize and shorten names of root files that are longer than
1731 # 200 characters.
1732 
1733 ran=0
1734 if [ $USE_SAM -eq 0 -a x$INFILE = x -a x$INLIST = x ]; then
1735  ran=1
1736 fi
1737 
1738 for root in *.root; do
1739  if [ -f $root ]; then
1740  nc=`echo $root | wc -c`
1741  if [ -f ${root}.json -o $ran != 0 -o $nc -ge 200 ]; then
1742  echo "Move file 1 $root"
1743  base=`basename $root .root | cut -c1-150`_`uuidgen`
1744  echo "Move file 2 $base"
1745  mv $root ${base}.root
1746  if [ -f ${root}.json ]; then
1747  mv ${root}.json ${base}.root.json
1748  fi
1749  fi
1750  fi
1751 done
1752 
1753 # Calculate root metadata for all root files and save as json file.
1754 # If json metadata already exists, merge with newly geneated root metadata.
1755 # Extract a subrun number, if one exists. Make remote (not necessarily unique)
1756 # and local directories for root files with identifiable subrun numbers.
1757 
1758 subrun=''
1759 declare -a outdirs
1760 declare -a logdirs
1761 declare -a subruns
1762 for root in *.root; do
1763  if [ -f $root ]; then
1764  json=${root}.json
1765  if [ -f $json ]; then
1766  ./root_metadata.py --output="${json}2" "$root" >& /dev/null
1767  ./merge_json.py $json ${json}2 > ${json}3
1768  mv -f ${json}3 $json
1769  rm ${json}2
1770  else
1771  ./root_metadata.py --output="${json}" "$root" >& /dev/null
1772  fi
1773  subrun=`./subruns.py $root | awk 'NR==1{print $2}'`
1774  if [ x$subrun = x ]; then
1775  subrun=0
1776  fi
1777  subruns[$subrun]=$subrun
1778  outdirs[$subrun]=`echo $OUTDIR | sed "s/@s/$subrun/"`
1779  echo "Output directory for subrun $subrun is ${outdirs[$subrun]}"
1780  mkdir out$subrun
1781  logdirs[$subrun]=`echo $LOGDIR | sed "s/@s/$subrun/"`
1782  echo "Log directory for subrun $subrun is ${logdirs[$subrun]}"
1783  mkdir log$subrun
1784  fi
1785 done
1786 
1787 #create a master lar.stat file which contains the overall exit code of all stages
1788 stageStat=0
1789 overallStat=0
1790 while [ $stageStat -lt $nfcls ]; do
1791  stat=`cat larStage$stageStat.stat`
1792  if [[ "$stat" = 65 && $ART_VERSION < v2_01 ]]; then
1793  # Workaround TimeTracker crash bug for input files with zero events.
1794  for json in *.json; do
1795  if grep -q '"events": *"0"' $json; then
1796  stat=0
1797  fi
1798  done
1799  fi
1800  overallStat=$[$stat+$overallStat]
1801 
1802  #do some cleanup of intermediate files
1803  #rm Stage$stageStat.fcl
1804  stageStat=$[$stageStat +1]
1805 done
1806 echo $overallStat > lar.stat
1807 valstat=0
1808 
1809 # Make local output directories for files that don't have a subrun.
1810 
1811 mkdir out
1812 mkdir log
1813 
1814 # Make local files group write, if appropriate.
1815 
1816 if [ $GRID -eq 0 -a $OUTUSER != $CURUSER ]; then
1817  chmod -R g+rw .
1818 fi
1819 
1820 
1821 
1822 # Stash all of the files we want to save in a local
1823 # directories with a unique name. Then copy these directories
1824 # and their contents recursively.
1825 
1826 # First move .root and corresponding .json files into one subdirectory.
1827 # Note that .root files never get replicated.
1828 
1829 for root in *.root; do
1830  if [ -f $root ]; then
1831  subrun=`./subruns.py $root | awk 'NR==1{print $2}'`
1832 
1833  if [ x$subrun = x ]; then
1834  subrun=0
1835  fi
1836 
1837  mv $root out$subrun
1838  mv ${root}.json log$subrun
1839  fi
1840 done
1841 
1842 # Copy any remaining files into all log subdirectories.
1843 # These small files may get replicated.
1844 
1845 for outfile in *; do
1846  if [ -f $outfile ]; then
1847  cp $outfile log
1848  for subrun in ${subruns[*]}
1849  do
1850  cp $outfile log$subrun
1851  done
1852  fi
1853 done
1854 
1855 # Do validation (if requested).
1856 
1857 if [ $VALIDATE_IN_JOB -eq 1 ]; then
1858  #If SAM was used, get the parent files based on the cpid
1859  if [ $USE_SAM -ne 0 ]; then
1860  id=`cat cpid.txt`
1861  parent_files=($(ifdh translateConstraints "consumer_process_id=$id and consumed_status consumed"))
1862  stat=$?
1863  if [ $stat -ne 0 ]; then
1864  echo "Failed to determine parentage."
1865  exit 1
1866  fi
1867  fi
1868 
1869  echo "The file's parents are: "
1870 
1871  for elt in ${parent_files[*]};
1872  do
1873  echo $elt
1874  done
1875 
1876  echo "The file's aunts are: "
1877  for elt in ${aunt_files[*]};
1878  do
1879  echo $elt
1880  done
1881 
1882  #if we are maintain the output's parentage, combine the file's parents and aunts into a flat string
1883  #this string will be interpretted by validate_in_job.py. If these are left empty, then validate_in_job will not change the file's parentage
1884  if [ $MAINTAIN_PARENTAGE -eq 1 ]; then
1885  export JOBS_PARENTS=`echo ${parent_files[*]}`
1886  export JOBS_AUNTS=`echo ${aunt_files[*]}`
1887 
1888 
1889  fi
1890 
1891  # Do validation function for the whole job.
1892 
1893  valstat=0
1894  curdir=`pwd`
1895  #cd $curdir/log
1896  #./validate_in_job.py --dir $curdir/out --logfiledir $curdir/log --outdir $OUTDIR/$OUTPUT_SUBDIR --declare $DECLARE_IN_JOB --copy $COPY_TO_FTS
1897  #valstat=$?
1898  #cd $curdir
1899 
1900  # Do validation for each subrun.
1901 
1902  for subrun in ${subruns[*]}
1903  do
1904  cd $curdir/log$subrun
1905 
1906  ./validate_in_job.py --dir $curdir/out$subrun --logfiledir $curdir/log$subrun --outdir ${outdirs[$subrun]}/$OUTPUT_SUBDIR --declare $DECLARE_IN_JOB --copy $COPY_TO_FTS
1907  subvalstat=$?
1908  valstat=$(( $valstat + $subvalstat ))
1909  done
1910  cd $curdir
1911 
1912 fi
1913 
1914 # Remove duplicate files in log subdirectories, because they will cause ifdh to hang.
1915 
1916 for outfile in log/*; do
1917  for subrun in ${subruns[*]}
1918  do
1919  if [ ${logdirs[$subrun]} = $LOGDIR ]; then
1920  dupfile=log$subrun/`basename $outfile`
1921  if [ -f $dupfile ]; then
1922  echo "Removing duplicate file ${dupfile}."
1923  rm -f $dupfile
1924  fi
1925  fi
1926  done
1927 done
1928 
1929 # Make a tarball of each log directory, and save the tarball in its own log directory.
1930 
1931 #rm -f log0.tar
1932 #tar -cjf log0.tar -C log .
1933 #mv log0.tar log
1934 for subrun in ${subruns[*]}
1935 do
1936  rm -f log.tar
1937  tar -cf log.tar -C log$subrun .
1938  tar -rf log.tar -C log .
1939  mv log.tar log$subrun/log_s${subrun}.tar
1940 done
1941 
1942 # Clean remote output and log directories.
1943 
1944 #if [ 1 -eq 0 ]; then
1945 # export IFDH_FORCE=$FORCE #this isn't set when running interactive, causing problems...
1946 #fi
1947 
1948 for dir in ${LOGDIR} ${OUTDIR}
1949 do
1950  #echo $dir
1951  echo "Make sure directory0 ${dir}/$OUTPUT_SUBDIR exists."
1952 
1953  #mkdir ${dir}/$OUTPUT_SUBDIR
1954  date
1955  ./mkdir.py -v ${dir}/$OUTPUT_SUBDIR
1956  echo "Make sure directory0 ${dir}/$OUTPUT_SUBDIR is empty."
1957  date
1958  ./emptydir.py -v ${dir}/$OUTPUT_SUBDIR
1959  date
1960  ./mkdir.py -v ${dir}/$OUTPUT_SUBDIR
1961  echo "Directory0 ${dir}/$OUTPUT_SUBDIR clean ok."
1962  date
1963 done
1964 
1965 if [ $SINGLE != 0 ]; then
1966  for dir in ${logdirs[*]} ${outdirs[*]}
1967  do
1968  echo "Make sure directory1 $dir exists."
1969  date
1970  ./mkdir.py -v $dir
1971  echo "Make sure directory1 $dir is empty."
1972  date
1973  ./emptydir.py -v $dir
1974  date
1975  ./mkdir.py -v $dir/$OUTPUT_SUBDIR
1976  echo "Directory1 $dir/$OUTPUT_SUBDIR clean ok."
1977  date
1978  done
1979 else
1980  for dir in ${logdirs[*]} ${outdirs[*]}
1981  do
1982  echo "Make sure directory2 ${dir}/$OUTPUT_SUBDIR exists."
1983  date
1984  ./mkdir.py -v ${dir}/$OUTPUT_SUBDIR
1985  echo "Make sure directory2 ${dir}/$OUTPUT_SUBDIR is empty."
1986  date
1987  ./emptydir.py -v ${dir}/$OUTPUT_SUBDIR
1988  date
1989  ./mkdir.py -v ${dir}/$OUTPUT_SUBDIR
1990  echo "Directory2 ${dir}/$OUTPUT_SUBDIR clean ok."
1991  date
1992  done
1993 fi
1994 
1995 statout=0
1996 export IFDH_CP_MAXRETRIES=5
1997 echo "ls log"
1998 ls log
1999 #echo "ifdh cp -D $IFDH_OPT log/* ${LOGDIR}/$OUTPUT_SUBDIR"
2000 echo "ifdh cp -D $IFDH_OPT log/log*.tar ${LOGDIR}/$OUTPUT_SUBDIR"
2001 if [ "$( ls -A log )" ]; then
2002  if [ -f log/log*.tar ]; then
2003  date
2004  #echo "ifdh cp -D $IFDH_OPT log/* ${LOGDIR}/$OUTPUT_SUBDIR"
2005  echo "ifdh cp -D $IFDH_OPT log/log*.tar ${LOGDIR}/$OUTPUT_SUBDIR"
2006  #ifdh cp -D $IFDH_OPT log/* ${LOGDIR}/$OUTPUT_SUBDIR
2007  ifdh cp -D $IFDH_OPT log/log*.tar ${LOGDIR}/$OUTPUT_SUBDIR
2008  date
2009  stat=$?
2010  if [ $stat -ne 0 ]; then
2011  echo "ifdh cp failed with status ${stat}."
2012  fi
2013  fi
2014 fi
2015 
2016 for subrun in ${subruns[*]}
2017 do
2018  echo "ls log$subrun"
2019  ls log$subrun
2020  date
2021  #echo "ifdh cp -D $IFDH_OPT log${subrun}/* ${logdirs[$subrun]}/$OUTPUT_SUBDIR"
2022  echo "ifdh cp -D $IFDH_OPT log${subrun}/log*.tar ${logdirs[$subrun]}/$OUTPUT_SUBDIR"
2023  #ifdh cp -D $IFDH_OPT log${subrun}/* ${logdirs[$subrun]}/$OUTPUT_SUBDIR
2024  ifdh cp -D $IFDH_OPT log${subrun}/log*.tar ${logdirs[$subrun]}/$OUTPUT_SUBDIR
2025  date
2026  stat=$?
2027  if [ $stat -ne 0 ]; then
2028  echo "ifdh cp failed with status ${stat}."
2029  statout=$stat
2030  fi
2031 done
2032 
2033 if [ $COPY_TO_FTS -eq 0 ]; then
2034 
2035  if [ "$( ls -A out )" ]; then
2036  echo "ifdh cp -D $IFDH_OPT out/* ${OUTDIR}/$OUTPUT_SUBDIR"
2037  ifdh cp -D $IFDH_OPT out/* ${OUTDIR}/$OUTPUT_SUBDIR
2038  stat=$?
2039  if [ $stat -ne 0 ]; then
2040  echo "ifdh cp failed with status ${stat}."
2041  fi
2042  fi
2043 
2044  for subrun in ${subruns[*]}
2045  do
2046  echo "ifdh cp -D $IFDH_OPT out${subrun}/* ${outdirs[$subrun]}/$OUTPUT_SUBDIR"
2047  ifdh cp -D $IFDH_OPT out${subrun}/* ${outdirs[$subrun]}/$OUTPUT_SUBDIR
2048  stat=$?
2049  if [ $stat -ne 0 ]; then
2050  echo "ifdh cp failed with status ${stat}."
2051  fi
2052  statout=$stat
2053 
2054  done
2055 fi
2056 
2057 if [ $statout -eq 0 ]; then
2058  statout=`cat lar.stat`
2059 fi
2060 
2061 if [ $statout -eq 0 ]; then
2062  statout=$valstat
2063 fi
2064 
2065 exit $statout