condor_hadd_sam.sh
Go to the documentation of this file.
1 #! /bin/bash
2 #------------------------------------------------------------------
3 #
4 # Purpose: A batch script to fetch root files from sam and combine
5 # them using hadd.
6 #
7 # Adapted from condor_lBdetMC.sh by E. Church.
8 #
9 # Usage:
10 #
11 # condor_hadd_sam.sh [options]
12 #
13 # Options:
14 #
15 # -c, --config - For compatibility (ignored).
16 # -T, --TFileName <arg> - TFile output file name
17 # --nfile <arg> - Number of files to process per worker.
18 #
19 # Sam and parallel project options.
20 #
21 # --sam_user <arg> - Specify sam user (default $GRID_USER).
22 # --sam_group <arg> - Specify sam group (default --group option).
23 # --sam_station <arg> - Specify sam station (default --group option).
24 # --sam_defname <arg> - Sam dataset definition name.
25 # --sam_project <arg> - Sam project name.
26 # --sam_start - Specify that this worker should be responsible for
27 # starting and stopping the sam project.
28 # --recur - Recursive input dataset (force snapshot).
29 # --sam_schema <arg> - Use this option with argument "root" to stream files using
30 # --os <arg> - A copy of the os argument passed to jobsub. May be used
31 # to affect definition of UPS_OVERRIDE.
32 # --data_file_type - Specify data file type (default "root," repeatable).
33 #
34 # Larsoft options.
35 #
36 # --ups <arg> - Comma-separated list of top level run-time ups products.
37 # -r, --release <arg> - Release tag.
38 # -q, -b, --build <arg> - Release build qualifier (default "debug", or "prof").
39 # --localdir <arg> - Larsoft local test release directory (default none).
40 # --localtar <arg> - Tarball of local test release.
41 # --mrb - Ignored (for compatibility).
42 # --srt - Exit with error status (SRT run time no longer supported).
43 #
44 # Other options.
45 #
46 # -h, --help - Print help.
47 # -i, --interactive - For interactive use.
48 # -g, --grid - Be grid-friendly.
49 # --group <arg> - Group or experiment (required).
50 # --workdir <arg> - Work directory (required).
51 # --outdir <arg> - Output directory (required).
52 # --logdir <arg> - Log directory (required).
53 # --scratch <arg> - Scratch directory (only for interactive).
54 # --cluster <arg> - Job cluster (override $CLUSTER)
55 # --process <arg> - Process within cluster (override $PROCESS).
56 # --procmap <arg> - Name of process map file (override $PROCESS).
57 # --init-script <arg> - User initialization script execute.
58 # --init-source <arg> - User initialization script to source (bash).
59 # --end-script <arg> - User end-of-job script to execute.
60 # --init <path> - Absolute path of environment initialization script.
61 #
62 # End options.
63 #
64 # Run time environment setup.
65 #
66 # MRB run-time environmental setup is controlled by four options:
67 # --release (-r), --build (-b, -q), --localdir, and --localtar.
68 #
69 # a) Use option --release or -r to specify version of top-level product(s).
70 # b) Use option --build or -b to specify build full qualifiers (e.g.
71 # "debug:e5" or "e5:prof").
72 # c) Options --localdir or --localtar are used to specify your local
73 # test release. Use one or the other (not both).
74 #
75 # Use --localdir to specify the location of your local install
76 # directory ($MRB_INSTALL).
77 #
78 # Use --localtar to specify thye location of a tarball of your
79 # install directory (made relative to $MRB_INSTALL).
80 #
81 # Note that --localdir is not grid-friendly.
82 #
83 # Notes.
84 #
85 # 1. Each batch worker is uniquely identified by two numbers stored
86 # in environment variables $CLUSTER and $PROCESS (the latter is
87 # a small integer that starts from zero and varies for different
88 # jobs in a parallel job group). These environment variables are
89 # normally set by the batch system, but can be overridden by options
90 # --cluster, --process, and --procmap (e.g. to rerun failed jobs).
91 #
92 # 2. The work directory must be set to an existing directory owned
93 # by the submitter and readable by the batch worker. Files from the
94 # work directory are copied to the batch worker scratch directory at
95 # the start of the job.
96 #
97 # 3. The initialization and end-of-job
98 # scripts (optins --init-script, --init-source, --end-script) may
99 # be stored in the work directory specified by option --workdir, or they
100 # may be specified as absolute paths visible on the worker node.
101 #
102 # 4. A local test release may be specified as an absolute path using
103 # --localdir, or a tarball using --localtar. The location of the tarball
104 # may be specified as an absolute path visible on the worker, or a
105 # relative path relative to the work directory.
106 #
107 # 5. The output directory must exist and be writable by the batch
108 # worker (i.e. be group-writable for grid jobs). The worker
109 # makes a new subdirectory called ${CLUSTER}_${PROCESS} in the output
110 # directory and copies all files in the batch scratch directory there
111 # at the end of the job. If the output directory is not specified, the
112 # default is /grid/data/<group>/outstage/<user> (user is defined as
113 # owner of work directory).
114 #
115 # 6. This script reads input files from sam using the standard sam project api.
116 # All files are fetched from sam, then then are combined by a single
117 # invocation of hadd. This way of working implies an upper limit on
118 # the number of files that can be combined in a single worker.
119 #
120 #
121 # Created: H. Greenlee, 29-Aug-2012
122 #
123 #------------------------------------------------------------------
124 
125 # Parse arguments.
126 
127 TFILE=""
128 NFILE=10000
129 ARGS=""
130 UPS_PRDS=""
131 REL=""
132 QUAL=""
133 LOCALDIR=""
134 LOCALTAR=""
135 INTERACTIVE=0
136 GRP=""
137 WORKDIR=""
138 OUTDIR=""
139 LOGDIR=""
140 SCRATCH=""
141 CLUS=""
142 PROC=""
143 PROCMAP=""
144 INITSCRIPT=""
145 INITSOURCE=""
146 ENDSCRIPT=""
147 SAM_USER=$GRID_USER
148 SAM_GROUP=""
149 SAM_STATION=""
150 SAM_DEFNAME=""
151 SAM_PROJECT=""
152 SAM_START=0
153 RECUR=0
154 SAM_SCHEMA=""
155 OS=""
156 IFDH_OPT=""
157 INIT=""
158 declare -a DATAFILETYPES
159 
160 while [ $# -gt 0 ]; do
161  case "$1" in
162 
163  # Help.
164  -h|--help )
165  awk '/^# Usage:/,/^# End options/{print $0}' $0 | cut -c3- | head -n -2
166  exit
167  ;;
168 
169  # Config file (for compatibility -- ignored).
170  -c|--config )
171  if [ $# -gt 1 ]; then
172  shift
173  fi
174  ;;
175 
176  # Number of events (for compabitility -- ignored).
177  -n|--nevts )
178  if [ $# -gt 1 ]; then
179  shift
180  fi
181  ;;
182 
183  # Output TFile.
184  -T|--TFileName )
185  if [ $# -gt 1 ]; then
186  TFILE=$2
187  shift
188  fi
189  ;;
190 
191  # Number of files to process.
192  --nfile )
193  if [ $# -gt 1 ]; then
194  NFILE=$2
195  shift
196  fi
197  ;;
198 
199  # Specify data file types (repeatable).
200  --data_file_type )
201  if [ $# -gt 1 ]; then
202  ntype=${#DATAFILETYPES[@]}
203  DATAFILETYPES[$ntype]=$2
204  shift
205  fi
206  ;;
207 
208  # Sam user.
209  --sam_user )
210  if [ $# -gt 1 ]; then
211  SAM_USER=$2
212  shift
213  fi
214  ;;
215 
216  # Sam group.
217  --sam_group )
218  if [ $# -gt 1 ]; then
219  SAM_GROUP=$2
220  shift
221  fi
222  ;;
223 
224  # Sam station.
225  --sam_station )
226  if [ $# -gt 1 ]; then
227  SAM_STATION=$2
228  shift
229  fi
230  ;;
231 
232  # Sam dataset definition name.
233  --sam_defname )
234  if [ $# -gt 1 ]; then
235  SAM_DEFNAME=$2
236  shift
237  fi
238  ;;
239 
240  # Sam project name.
241  --sam_project )
242  if [ $# -gt 1 ]; then
243  SAM_PROJECT=$2
244  shift
245  fi
246  ;;
247 
248  # Sam start/stop project flag.
249  --sam_start )
250  SAM_START=1
251  ;;
252 
253  # Recursive flag.
254  --recur )
255  RECUR=1
256  ;;
257 
258  # Sam schema.
259  --sam_schema )
260  if [ $# -gt 1 ]; then
261  SAM_SCHEMA=$2
262  shift
263  fi
264  ;;
265 
266  # OS.
267  --os )
268  if [ $# -gt 1 ]; then
269  OS=$2
270  shift
271  fi
272  ;;
273 
274  # General arguments for hadd command line.
275  --args )
276  if [ $# -gt 1 ]; then
277  shift
278  ARGS=$@
279  break
280  fi
281  ;;
282 
283  # Top level ups products (comma-separated list).
284  --ups )
285  if [ $# -gt 1 ]; then
286  UPS_PRDS=$2
287  shift
288  fi
289  ;;
290 
291  # Release tag.
292  -r|--release )
293  if [ $# -gt 1 ]; then
294  REL=$2
295  shift
296  fi
297  ;;
298 
299  # Release build qualifier.
300  -q|-b|--build )
301  if [ $# -gt 1 ]; then
302  QUAL=$2
303  shift
304  fi
305  ;;
306 
307  # Local test release directory.
308  --localdir )
309  if [ $# -gt 1 ]; then
310  LOCALDIR=$2
311  shift
312  fi
313  ;;
314 
315  # Local test release tarball.
316  --localtar )
317  if [ $# -gt 1 ]; then
318  LOCALTAR=$2
319  shift
320  fi
321  ;;
322 
323  # MRB flag.
324  --mrb )
325  ;;
326 
327  # SRT flag.
328  --srt )
329  echo "SRT run time environment is no longer supported."
330  exit 1
331  ;;
332 
333  # Interactive flag.
334  -i|--interactive )
335  INTERACTIVE=1
336  ;;
337 
338  # Grid flag (no effect).
339  -g|--grid )
340  ;;
341 
342  # Group.
343  --group )
344  if [ $# -gt 1 ]; then
345  GRP=$2
346  shift
347  fi
348  ;;
349 
350  # Work directory.
351  --workdir )
352  if [ $# -gt 1 ]; then
353  WORKDIR=$2
354  shift
355  fi
356  ;;
357 
358  # Output directory.
359  --outdir )
360  if [ $# -gt 1 ]; then
361  OUTDIR=$2
362  shift
363  fi
364  ;;
365 
366  # Log directory.
367  --logdir )
368  if [ $# -gt 1 ]; then
369  LOGDIR=$2
370  shift
371  fi
372  ;;
373 
374  # Scratch directory.
375  --scratch )
376  if [ $# -gt 1 ]; then
377  SCRATCH=$2
378  shift
379  fi
380  ;;
381 
382  # Job cluster.
383  --cluster )
384  if [ $# -gt 1 ]; then
385  CLUS=$2
386  shift
387  fi
388  ;;
389 
390  # Process within cluster.
391  --process )
392  if [ $# -gt 1 ]; then
393  PROC=$2
394  shift
395  fi
396  ;;
397 
398  # Process map.
399  --procmap )
400  if [ $# -gt 1 ]; then
401  PROCMAP=$2
402  shift
403  fi
404  ;;
405 
406  # User initialization script.
407  --init-script )
408  if [ $# -gt 1 ]; then
409  INITSCRIPT=$2
410  shift
411  fi
412  ;;
413 
414  # User source initialization script.
415  --init-source )
416  if [ $# -gt 1 ]; then
417  INITSOURCE=$2
418  shift
419  fi
420  ;;
421 
422  # User end-of-job script.
423  --end-script )
424  if [ $# -gt 1 ]; then
425  ENDSCRIPT=$2
426  shift
427  fi
428  ;;
429 
430  # Specify environment initialization script path.
431  --init )
432  if [ $# -gt 1 ]; then
433  INIT=$2
434  shift
435  fi
436  ;;
437 
438  # Other.
439  * )
440  echo "Unknown option $1"
441  exit 1
442  esac
443  shift
444 done
445 
446 #echo "TFILE=$TFILE"
447 #echo "NFILE=$NFILE"
448 #echo "ARGS=$ARGS"
449 #echo "REL=$REL"
450 #echo "QUAL=$QUAL"
451 #echo "LOCALDIR=$LOCALDIR"
452 #echo "LOCALTAR=$LOCALTAR"
453 #echo "INTERACTIVE=$INTERACTIVE"
454 #echo "GRP=$GRP"
455 #echo "WORKDIR=$WORKDIR"
456 #echo "OUTDIR=$OUTDIR"
457 #echo "LOGDIR=$LOGDIR"
458 #echo "SCRATCH=$SCRATCH"
459 #echo "CLUS=$CLUS"
460 #echo "PROC=$PROC"
461 #echo "INITSCRIPT=$INITSCRIPT"
462 #echo "INITSOURCE=$INITSOURCE"
463 #echo "ENDSCRIPT=$ENDSCRIPT"
464 
465 # Set default data file types ("root").
466 
467 if [ ${#DATAFILETYPES[@]} -eq 0 ]; then
468  DATAFILETYPES[0]=root
469 fi
470 
471 # Done with arguments.
472 
473 echo "Nodename: `hostname -f`"
474 id
475 echo "Load average:"
476 cat /proc/loadavg
477 
478 # Set defaults.
479 
480 if [ x$QUAL = x ]; then
481  QUAL="prof:e9"
482 fi
483 
484 if [ x$SAM_GROUP = x ]; then
485  SAM_GROUP=$GRP
486 fi
487 
488 if [ x$SAM_STATION = x ]; then
489  SAM_STATION=$GRP
490 fi
491 
492 # Standardize sam_schema (xrootd -> root, xroot -> root).
493 
494 if [ x$SAM_SCHEMA = xxrootd ]; then
495  SAM_SCHEMA=root
496 fi
497 if [ x$SAM_SCHEMA = xxroot ]; then
498  SAM_SCHEMA=root
499 fi
500 
501 # Make sure work directory is defined and exists.
502 
503 if [ x$WORKDIR = x ]; then
504  echo "Work directory not specified."
505  exit 1
506 fi
507 echo "Work directory: $WORKDIR"
508 
509 # Initialize experiment ups products and mrb.
510 
511 echo "Initializing ups and mrb."
512 
513 if [ x$INIT != x ]; then
514  if [ ! -f $INIT ]; then
515  echo "Environment initialization script $INIT not found."
516  exit 1
517  fi
518  echo "Sourcing $INIT"
519  source $INIT
520 else
521  echo "Sourcing setup_experiment.sh"
522  source ${CONDOR_DIR_INPUT}/setup_experiment.sh
523 fi
524 
525 echo PRODUCTS=$PRODUCTS
526 
527 # Ifdh may already be setup by jobsub wrapper.
528 # If not, set it up here.
529 
530 echo "IFDHC_DIR=$IFDHC_DIR"
531 if [ x$IFDHC_DIR = x ]; then
532  echo "Setting up ifdhc, because jobsub did not set it up."
533  setup ifdhc
534 fi
535 echo "IFDHC_DIR=$IFDHC_DIR"
536 
537 # Set GROUP environment variable.
538 
539 unset GROUP
540 if [ x$GRP != x ]; then
541  GROUP=$GRP
542 else
543  echo "GROUP not specified."
544  exit 1
545 fi
546 export GROUP
547 echo "Group: $GROUP"
548 
549 echo "IFDH_OPT=$IFDH_OPT"
550 
551 # Make sure output directory exists and is writable.
552 
553 if [ x$OUTDIR = x ]; then
554  echo "Output directory not specified."
555  exit 1
556 fi
557 echo "Output directory: $OUTDIR"
558 
559 # Make sure log directory exists and is writable.
560 
561 if [ x$LOGDIR = x ]; then
562  echo "Log directory not specified."
563  exit 1
564 fi
565 echo "Log directory: $LOGDIR"
566 
567 # Make sure scratch directory is defined.
568 # For batch, the scratch directory is always $_CONDOR_SCRATCH_DIR
569 # For interactive, the scratch directory is specified by option
570 # --scratch or --outdir.
571 
572 if [ $INTERACTIVE -eq 0 ]; then
573  SCRATCH=$_CONDOR_SCRATCH_DIR
574 else
575  if [ x$SCRATCH = x ]; then
576  SCRATCH=$OUTDIR
577  fi
578 fi
579 if [ x$SCRATCH = x -o ! -d "$SCRATCH" -o ! -w "$SCRATCH" ]; then
580  echo "Local scratch directory not defined or not writable."
581  exit 1
582 fi
583 
584 # Create the scratch directory in the condor scratch diretory.
585 # Copied from condor_lBdetMC.sh.
586 # Scratch directory path is stored in $TMP.
587 # Scratch directory is automatically deleted when shell exits.
588 
589 # Do not change this section.
590 # It creates a temporary working directory that automatically cleans up all
591 # leftover files at the end.
592 TMP=`mktemp -d ${SCRATCH}/working_dir.XXXXXXXXXX`
593 TMP=${TMP:-${SCRATCH}/working_dir.$$}
594 
595 { [[ -n "$TMP" ]] && mkdir -p "$TMP"; } || \
596  { echo "ERROR: unable to create temporary directory!" 1>&2; exit 1; }
597 trap "[[ -n \"$TMP\" ]] && { rm -rf \"$TMP\"; }" 0
598 cd $TMP
599 # End of the section you should not change.
600 
601 echo "Scratch directory: $TMP"
602 
603 # Copy files from work directory to scratch directory.
604 
605 echo "No longer fetching files from work directory."
606 echo "that's now done with using jobsub -f commands"
607 mkdir work
608 cp ${CONDOR_DIR_INPUT}/* ./work/
609 cd work
610 echo "Local working directoroy:"
611 pwd
612 ls
613 echo
614 
615 # Save the hostname and condor job id.
616 
617 hostname > hostname.txt
618 echo ${CLUSTER}.${PROCESS} > jobid.txt
619 
620 # Set default CLUSTER and PROCESS environment variables for interactive jobs.
621 
622 if [ $INTERACTIVE -ne 0 ]; then
623  CLUSTER=`date +%s` # From time stamp.
624  PROCESS=0 # Default zero for interactive.
625 fi
626 
627 # Override CLUSTER and PROCESS from command line options.
628 
629 if [ x$CLUS != x ]; then
630  CLUSTER=$CLUS
631 fi
632 if [ x$PROC != x ]; then
633  PROCESS=$PROC
634 fi
635 if [ x$PROCMAP != x ]; then
636  if [ -f $PROCMAP ]; then
637  PROCESS=`sed -n $(( $PROCESS + 1 ))p $PROCMAP`
638  else
639  echo "Process map file $PROCMAP not found."
640  exit 1
641  fi
642 fi
643 if [ x$CLUSTER = x ]; then
644  echo "CLUSTER not specified."
645  exit 1
646 fi
647 if [ x$PROCESS = x ]; then
648  echo "PROCESS not specified."
649  exit 1
650 fi
651 echo "Procmap: $PROCMAP"
652 echo "Cluster: $CLUSTER"
653 echo "Process: $PROCESS"
654 
655 # Construct name of output subdirectory.
656 
657 OUTPUT_SUBDIR=${CLUSTER}_${PROCESS}
658 echo "Output subdirectory: $OUTPUT_SUBDIR"
659 
660 # Make sure init script exists and is executable (if specified).
661 
662 if [ x$INITSCRIPT != x ]; then
663  if [ -f "$INITSCRIPT" ]; then
664  chmod +x $INITSCRIPT
665  else
666  echo "Initialization script $INITSCRIPT does not exist."
667  exit 1
668  fi
669 fi
670 
671 # Make sure init source script exists (if specified).
672 
673 if [ x$INITSOURCE != x -a ! -f "$INITSOURCE" ]; then
674  echo "Initialization source script $INITSOURCE does not exist."
675  exit 1
676 fi
677 
678 # Make sure end-of-job script exists and is executable (if specified).
679 
680 if [ x$ENDSCRIPT != x ]; then
681  if [ -f "$ENDSCRIPT" ]; then
682  chmod +x $ENDSCRIPT
683  else
684  echo "Initialization script $ENDSCRIPT does not exist."
685  exit 1
686  fi
687 fi
688 
689 # MRB run time environment setup goes here.
690 
691 # Setup local test release, if any.
692 
693 if [ x$LOCALDIR != x ]; then
694  mkdir $TMP/local
695  cd $TMP/local
696 
697  # Copy test release directory recursively.
698 
699  echo "Copying local test release from directory ${LOCALDIR}."
700 
701  # Make sure ifdhc is setup.
702 
703  if [ x$IFDHC_DIR = x ]; then
704  echo "Setting up ifdhc before fetching local directory."
705  setup ifdhc
706  fi
707  echo "IFDHC_DIR=$IFDHC_DIR"
708  ifdh cp -r $IFDH_OPT $LOCALDIR .
709  stat=$?
710  if [ $stat -ne 0 ]; then
711  echo "ifdh cp failed with status ${stat}."
712  exit $stat
713  fi
714  find . -name \*.py -exec chmod +x {} \;
715  find . -name \*.sh -exec chmod +x {} \;
716 
717  # Setup the environment.
718 
719  cd $TMP/work
720  echo "Initializing localProducts from ${LOCALDIR}."
721  if [ ! -f $TMP/local/setup ]; then
722  echo "Local test release directory $LOCALDIR does not contain a setup script."
723  exit 1
724  fi
725  sed "s@setenv MRB_INSTALL.*@setenv MRB_INSTALL ${TMP}/local@" $TMP/local/setup | \
726  sed "s@setenv MRB_TOP.*@setenv MRB_TOP ${TMP}@" > $TMP/local/setup.local
727  . $TMP/local/setup.local
728  #echo "MRB_INSTALL=${MRB_INSTALL}."
729  #echo "MRB_QUALS=${MRB_QUALS}."
730  echo "Setting up all localProducts."
731  if [ x$IFDHC_DIR != x ]; then
732  unsetup ifdhc
733  fi
734  mrbslp
735 fi
736 cd $TMP/work
737 
738 # Setup local larsoft test release from tarball.
739 
740 if [ x$LOCALTAR != x ]; then
741  mkdir $TMP/local
742  cd $TMP/local
743 
744  # Fetch the tarball.
745 
746  echo "Fetching test release tarball ${LOCALTAR}."
747 
748  # Make sure ifdhc is setup.
749 
750  if [ x$IFDHC_DIR = x ]; then
751  echo "Setting up ifdhc before fetching tarball."
752  setup ifdhc
753  fi
754  echo "IFDHC_DIR=$IFDHC_DIR"
755  ifdh cp $LOCALTAR local.tar
756  stat=$?
757  if [ $stat -ne 0 ]; then
758  echo "ifdh cp failed with status ${stat}."
759  exit $stat
760  fi
761 
762  # Extract the tarball.
763 
764  tar -xf local.tar
765 
766  # Setup the environment.
767 
768  cd $TMP/work
769  echo "Initializing localProducts from tarball ${LOCALTAR}."
770  sed "s@setenv MRB_INSTALL.*@setenv MRB_INSTALL ${TMP}/local@" $TMP/local/setup | \
771  sed "s@setenv MRB_TOP.*@setenv MRB_TOP ${TMP}@" > $TMP/local/setup.local
772  . $TMP/local/setup.local
773  #echo "MRB_INSTALL=${MRB_INSTALL}."
774  #echo "MRB_QUALS=${MRB_QUALS}."
775  echo "Setting up all localProducts."
776  if [ x$IFDHC_DIR != x ]; then
777  unsetup ifdhc
778  fi
779  mrbslp
780 fi
781 
782 # Setup specified version of top level run time products
783 # (if specified, and if local test release did not set them up).
784 
785 if [ x$IFDHC_DIR != x ]; then
786  unsetup ifdhc
787 fi
788 
789 for prd in `echo $UPS_PRDS | tr , ' '`
790 do
791  if ! ups active | grep -q $prd; then
792  echo "Setting up $prd $REL -q ${QUAL}."
793  setup $prd $REL -q $QUAL
794  fi
795 done
796 
797 ups active
798 
799 cd $TMP/work
800 
801 # In case mrb setup didn't setup a version of ifdhc, set up ifdhc again.
802 
803 if [ x$IFDHC_DIR = x ]; then
804  echo "Setting up ifdhc again, because larsoft did not set it up."
805  setup ifdhc
806 fi
807 echo "IFDH_ART_DIR=$IFDH_ART_DIR"
808 echo "IFDHC_DIR=$IFDHC_DIR"
809 
810 # Start project (if necessary), and consumer process.
811 
812 PURL=''
813 CPID=''
814 
815 # Make sure a project name has been specified.
816 
817 if [ x$SAM_PROJECT = x ]; then
818  echo "No sam project was specified."
819  exit 1
820 fi
821 echo "Sam project: $SAM_PROJECT"
822 
823 # Start project (if requested).
824 
825 if [ $SAM_START -ne 0 ]; then
826 
827  # If recursive flag, take snapshot of input dataset.
828 
829  if [ $RECUR -ne 0 ]; then
830  echo "Forcing snapshot"
831  SAM_DEFNAME=${SAM_DEFNAME}:force
832  fi
833 
834  # Start the project.
835 
836  if [ x$SAM_DEFNAME != x ]; then
837 
838  echo "Starting project $SAM_PROJECT using sam dataset definition $SAM_DEFNAME"
839  ifdh startProject $SAM_PROJECT $SAM_STATION $SAM_DEFNAME $SAM_USER $SAM_GROUP
840  if [ $? -eq 0 ]; then
841  echo "Start project succeeded."
842  else
843  echo "Start projet failed."
844  exit 1
845  fi
846  else
847  echo "Start project requested, but no definition was specified."
848  exit 1
849  fi
850 fi
851 
852 # Get the project url of a running project (maybe the one we just started,
853 # or maybe started externally). This command has to succeed, or we can't
854 # continue.
855 
856 PURL=`ifdh findProject $SAM_PROJECT $SAM_STATION`
857 if [ x$PURL = x ]; then
858  echo "Unable to find url for project ${SAM_PROJECT}."
859  exit 1
860 else
861  echo "Project url: $PURL"
862 fi
863 
864 # Start the consumer process. This command also has to succeed.
865 
866 NODE=`hostname`
867 APPFAMILY=root
868 APPNAME=hadd
869 
870 echo "Starting consumer process."
871 echo "ifdh establishProcess $PURL $APPNAME $REL $NODE $SAM_USER $APPFAMILY hadd $NFILE $SAM_SCHEMA"
872 CPID=`ifdh establishProcess $PURL $APPNAME $REL $NODE $SAM_USER $APPFAMILY hadd $NFILE $SAM_SCHEMA`
873 if [ x$CPID = x ]; then
874  echo "Unable to start consumer process for project url ${PURL}."
875  exit 1
876 else
877  echo "Consumer process id $CPID"
878 fi
879 
880 # Stash away the project name and consumer process id in case we need them
881 # later for bookkeeping.
882 
883 echo $SAM_PROJECT > sam_project.txt
884 echo $CPID > cpid.txt
885 
886 # Run/source optional initialization scripts.
887 
888 if [ x$INITSCRIPT != x ]; then
889  echo "Running initialization script ${INITSCRIPT}."
890  if ! ./${INITSCRIPT}; then
891  exit $?
892  fi
893 fi
894 if [ x$INITSOURCE != x ]; then
895  echo "Sourcing initialization source script ${INITSOURCE}."
896  . $INITSOURCE
897  status=$?
898  if [ $status -ne 0 ]; then
899  exit $status
900  fi
901 fi
902 
903 # Save a copy of the environment, which can be helpful for debugging.
904 
905 env > env.txt
906 
907 # Fetch files and construct local input list.
908 # Keep going until we have fetched $NFILE files or no more files are available.
909 
910 rm -f condor_hadd_input.list
911 rm -f transferred_uris.list
912 touch condor_hadd_input.list
913 touch transferred_uris.list
914 
915 while [ $NFILE -gt 0 ]
916 do
917  NFILE=$(( $NFILE - 1 ))
918 
919  # Get uri of the next file
920 
921  fileuri=`ifdh getNextFile $PURL $CPID`
922  stat=$?
923  if [ $stat != 0 ]; then
924  echo "ifdh getNextFile returned status $stat"
925  break
926  fi
927  if [ x$fileuri = x ]; then
928  echo "ifdh getNextFile did not return anything."
929  break
930  fi
931 
932  # Break out of the loop if the same uri is returned twice.
933 
934  if grep -q $fileuri transferred_uris.list; then
935  echo "File $filename was returned twice by sam."
936  break
937  fi
938 
939  # Find the local path to which this uri will be fetched.
940 
941  filepath=$fileuri
942  if [[ ! $fileuri =~ ^root: ]]; then
943  filepath=`ifdh localPath $fileuri`
944  stat=$?
945  if [ $stat != 0 ]; then
946  echo "ifdh localPath returned status $stat"
947  break
948  fi
949  if [ x$filepath = x ]; then
950  echo "ifdh localPath did not return anything."
951  break
952  fi
953 
954  # Transfer the file.
955 
956  ifdh fetchInput $fileuri
957  stat=$?
958  if [ $stat != 0 ]; then
959  echo "ifdh fetchInput returned status $stat"
960  break
961  fi
962  if [ ! -f $filepath ]; then
963  echo "Transferred file $fileuri not found."
964  break
965  fi
966  fi
967 
968  # If we get to here, file has been transferred successfully.
969  # Update the file status to consumed.
970 
971  filename=`basename $filepath`
972  ifdh updateFileStatus $PURL $CPID $filename consumed
973 
974  # Update file lists.
975 
976  echo $fileuri >> transferred_uris.list
977  echo $filepath >> condor_hadd_input.list
978 
979 done
980 
981 # Run hadd.
982 
983 hadd $TFILE @condor_hadd_input.list
984 stat=$?
985 echo $stat > hadd.stat
986 echo $stat > lar.stat
987 echo "hadd completed with exit status ${stat}."
988 
989 # Setup up current version of ifdhc (may be different than version setup by larsoft).
990 
991 #echo "Setting up current version of ifdhc."
992 #if [ x$IFDHC_DIR != x ]; then
993 # unsetup ifdhc
994 #fi
995 #setup ifdhc v1_3_2
996 echo "IFDHC_DIR=$IFDHC_DIR"
997 
998 # Sam cleanups.
999 # Get list of consumed files.
1000 
1001 ifdh translateConstraints "consumer_process_id $CPID and consumed_status consumed" > consumed_files.list
1002 
1003 # End consumer process.
1004 
1005 ifdh endProcess $PURL $CPID
1006 
1007 # Stop project (if appropriate).
1008 
1009 if [ $SAM_START -ne 0 ]; then
1010  echo "Stopping project."
1011  ifdh endProject $PURL
1012 fi
1013 
1014 # Delete input files.
1015 
1016 if [ -f condor_hadd_input.list -a x$SAM_SCHEMA != xroot ]; then
1017  while read file; do
1018  rm -f $file
1019  done < condor_hadd_input.list
1020 fi
1021 
1022 # Run optional end-of-job script.
1023 
1024 if [ x$ENDSCRIPT != x ]; then
1025  echo "Running end-of-job script ${ENDSCRIPT}."
1026  if ! ./${ENDSCRIPT}; then
1027  exit $?
1028  fi
1029 fi
1030 
1031 # Do root file checks.
1032 
1033 # Randomize the names of the output root files.
1034 for root in *.root; do
1035  base=`basename $root .root`_`uuidgen`
1036  mv $root ${base}.root
1037  if [ -f ${root}.json ]; then
1038  mv ${root}.json ${base}.root.json
1039  fi
1040 done
1041 
1042 # Calculate root metadata for all root files and save as json file.
1043 # If json metadata already exists, merge with newly geneated root metadata.
1044 
1045 for root in *.root; do
1046  if [ -f $root ]; then
1047  json=${root}.json
1048  if [ -f $json ]; then
1049  ./root_metadata.py --output="${json}2" $root >& /dev/null
1050  ./merge_json.py $json ${json}2 > ${json}3
1051  mv -f ${json}3 $json
1052  rm ${json}2
1053  else
1054  ./root_metadata.py --output="$json" $root >& /dev/null
1055  fi
1056  fi
1057 done
1058 
1059 # Make local output directories for files that don't have a subrun.
1060 
1061 mkdir out
1062 mkdir log
1063 
1064 # Stash all of the files we want to save in a local directories that we just created.
1065 
1066 # First move .root and corresponding .json files into the out and log subdirectories.
1067 
1068 for root in *.root; do
1069  if [ -f $root ]; then
1070  mv $root out
1071  if [ -f ${root}.json ]; then
1072  mv ${root}.json log
1073  fi
1074  fi
1075 done
1076 
1077 # Copy any remaining files into all log subdirectories.
1078 # These small files get replicated.
1079 
1080 for outfile in *; do
1081  if [ -f $outfile ]; then
1082  mv $outfile log
1083  fi
1084 done
1085 
1086 # Make a tarball of the log directory contents, and save the tarball in the log directory.
1087 
1088 rm -f log.tar
1089 tar -cjf log.tar -C log .
1090 mv log.tar log
1091 
1092 # Create remote output and log directories.
1093 
1094 export IFDH_CP_MAXRETRIES=5
1095 
1096 echo "Make directory ${LOGDIR}/${OUTPUT_SUBDIR}."
1097 date
1098 ifdh mkdir $IFDH_OPT ${LOGDIR}/$OUTPUT_SUBDIR
1099 echo "Done making directory ${LOGDIR}/${OUTPUT_SUBDIR}."
1100 date
1101 
1102 if [ ${OUTDIR} != ${LOGDIR} ]; then
1103  echo "Make directory ${OUTDIR}/${OUTPUT_SUBDIR}."
1104  date
1105  ifdh mkdir $IFDH_OPT ${OUTDIR}/$OUTPUT_SUBDIR
1106  echo "Done making directory ${OUTDIR}/${OUTPUT_SUBDIR}."
1107  date
1108 fi
1109 
1110 # Transfer tarball in log subdirectory.
1111 
1112 statout=0
1113 echo "ls log"
1114 ls log
1115 echo "ifdh cp -D $IFDH_OPT log/log.tar ${LOGDIR}/$OUTPUT_SUBDIR"
1116 ifdh cp -D $IFDH_OPT log/log.tar ${LOGDIR}/$OUTPUT_SUBDIR
1117 date
1118 stat=$?
1119 if [ $stat -ne 0 ]; then
1120  statout=1
1121  echo "ifdh cp failed with status ${stat}."
1122 fi
1123 
1124 # Transfer root files in out subdirectory.
1125 
1126 if [ "$( ls -A out )" ]; then
1127  echo "ifdh cp -D $IFDH_OPT out/* ${OUTDIR}/$OUTPUT_SUBDIR"
1128  ifdh cp -D $IFDH_OPT out/* ${OUTDIR}/$OUTPUT_SUBDIR
1129  stat=$?
1130  if [ $stat -ne 0 ]; then
1131  statout=1
1132  echo "ifdh cp failed with status ${stat}."
1133  fi
1134 fi
1135 
1136 if [ $statout -eq 0 -a -f log/hadd.stat ]; then
1137  statout=`cat log/hadd.stat`
1138 fi
1139 
1140 exit $statout