SubmitCommand.sh
Go to the documentation of this file.
1 ############################################################################
2 ############################################################################
3 ############################################################################
4 ############################################################################
5 ############################################################################
6 ## SumbitCommand.sh is used for submitting DUNE Photon Library Generation ##
7 ## using the FNAL Grid ##
8 ## Author: Alex Himmel (ahimmel@fnal.gov) ##
9 ## Updated by Jason Stock (jason.stock@mines.sdsmt.edu) 2017-09-11 ##
10 ############################################################################
11 ############################################################################
12 ############################################################################
13 ############################################################################
14 ############################################################################
15 ############################################################################
16 
17 #!/bin/bash
18 
19 tarfile=
20 checkVar=0
21 testVar=0
22 offsiteVar=0
23 memory=2500MB
24 expectedlifetime=8h
25 makeupJobs=0
26 scriptIn=OpticalLibraryBuild_Grid_dune.sh
27 outdir=/pnfs/dune/scratch/users/${USER}/OpticalLibraries/OpticalLib_dune10kt_v2_1x2x6
28 fclIn=dune10kt_v2_1x2x6_buildopticallibrary_grid.fcl
29 USER=${USER} #Set the user to the default USER from the environment unless over ridder
30 HELPFILE=SubmitCommand.hlp
31 
32 ##This block handles flags given to the program.
33 # Allowed flags are:
34 # -t | --tar : Pass a tarfile of a larsoft installation to be setup on the cluster.
35 # User full path to file.
36 # -u | --user : Over ride the user directory to write to on dCache *NOT RECOMENDED
37 # | --test : run the test fcl file and a single job with short run time instead of building a new library
38 # -c | --check : Preform a dry run, returning the jobsub command, but not actually running any grid jobs.
39 # -l | --lifetime : The amount of time a job should be expected to run on the cluster.
40 # -s | --script : The script to run on the grid for each job (By default OpticalLibraryBuild_Grid_dune.sh)
41 # -f | --fcl : The fcl file template to be given to $script. This will be used to build the individual fcl files for each grid job.
42 # -o | --outdir : The output directory for the results from the simulation. The default is the scratch space of the user who submitted the jobs.
43 # -m | --memory : the amount of memory to request from each node on the cluster. *NOT RECOMENDED
44 # Allowed units are MB and GB
45 printf '////////////////////////////////////////////////////////////////////\n'
46 printf '//////// DUNE PhotonLibrary Build System /////////////////////\n'
47 while :; do
48  case $1 in
49  --debugSubmitCommand)
50  printf "\nSetting script debugger. This is not normal run mode.\n"
51  set -x
52  set -v
53  trap read debug
54  ;;
55  --help|-h)
56  if [ -f $HELPFILE ]; then
57  cat $HELPFILE
58  else
59  printf "Help File not found.\n" >&2
60  exit 1
61  fi
62  exit 0
63  ;;
64  --script|-s)
65  if [ "$2" ]; then
66  scriptIn=$2
67  printf "\nscriptIn set by user.\nscriptIn=$scriptIn\n"
68  shift
69  else
70  printf 'ERROR: "--script" requires an input parameter to give the script to be executed on the grid nodes.\n' >&2
71  fi
72  ;;
73  --script=?*)
74  scriptIn=${1#*=}
75  printf "\nscriptIn set by user.\nscriptIn=$scriptIn\n"
76  ;;
77  --fcl|-f)
78  if [ "$2" ]; then
79  fclIn=$2
80  printf "\nInput fcl file set by user.\nfclIn=$fclIn\n"
81  shift
82  else
83  printf 'ERROR: "--fcl" requires and input with the full path of the fcl file to be passed as the template for each grid job.\n' >&2
84  fi
85  ;;
86  --fcl=?*)
87  fclIn=${1#*=}
88  printf "\nInput fcl file set by user.\nfclIn=$fclIn\n"
89  ;;
90  --outdir|-o)
91  if [ "$2" ]; then
92  outdir=$2
93  outdir=$(sed 's/\/ *$//' <<<$outdir)
94  printf "\noutput directory set by user.\noutput directory will be $outdir\n"
95  shift
96  else
97  printf 'ERROR: "--outdir" requires an input with the full path of the directory where library generation results should be written.\n' >&2
98  fi
99  ;;
100  --outdir=?*)
101  outdir=${1#*=}
102  outdir=$(sed 's/\/ *$//' <<<$outdir)
103  printf "\noutput directory set by user.\noutput directory will be $outdir\n"
104  ;;
105  --memory|-m)
106  if [ "$2" ]; then
107  memory=$2
108  printf "\nCluster memory requirement set by user.\nmemory request will be $memory\n"
109  shift
110  else
111  printf 'ERROR: "--memory" requires an input value to pass on to the cluster.\n' >&2
112  exit 10
113  fi
114  ;;
115  --memory=?*)
116  memory=${1#*=}
117  printf "\nCluster memory requirement set by user.\nmemory request will be $memory\n"
118  ;;
119  --makeup|-n)
120  if [ "$2" ]; then
121  makeupJobs=$2
122  printf "\nNumber Of Jobs required for Makeup Jobs set. Your OpticalLibraryBuild_Grid_dune.sh.\n If your OpticalLibraryBuild_Grid_dune.sh does not contain the correct makeup list, this step will not behave as expecte.\n"
123  shift
124  else
125  printf 'ERROR: "--makeup" requires the number of makeup jobs to process.\n'
126  exit 10
127  fi
128  ;;
129  --makeup=?*)
130  makeupJobs=${1#*=}
131  printf "\nNumber Of Jobs required for Makeup Jobs set. Your OpticalLibraryBuild_Grid_dune.sh.\n If your OpticalLibraryBuild_Grid_dune.sh does not contain the correct makeup list, this step will not behave as expecte.\n"
132  ;;
133  --tar|-t)
134  if [ "$2" ]; then
135  tarfile=$2
136  printf "\nInput tar file specified.\nLArSoft/dunetpc will be setup from $tarfile\n"
137  shift
138  else
139  printf 'ERROR: "--tar" requires a path to a tar file of a larsoft installation.\n' >&2
140  exit 10
141  fi
142  ;;
143  --tar=?*)
144  tarfile=${1#*=}
145  printf "\nInput tar file specified.\nLArSoft/dunetpc will be setup from $tarfile\n"
146  ;;
147  --check|-c)
148  checkVar=1
149  printf "\nSetting check mode ON.\n"
150  ;;
151  --test)
152  testVar=1
153  printf "\nSetting test mode ON.\n"
154  ;;
155  --offsite)
156  offsiteVar=1
157  printf "\nAllow jobs to go offsite (e.g. the OSG).\n"
158  ;;
159  --lifetime|-l)
160  if [ "$2" ]; then
161  expectedlifetime=$2
162  printf "\nCluster upper limit on runtime set by user\nThe requested runtime will be $expectedlifetime\n"
163  shift
164  else
165  printf 'ERROR: "--lifetime" requires a parameter telling the cluster what the upper bound of each jobs runtime should be.\n' >&2
166  fi
167  ;;
168  --lifetime=?*)
169  expectedlifetime=${1#*=}
170  printf "\nCluster upper limit on runtime set by user\nThe requested runtime will be $expectedlifetime\n"
171  ;;
172  --user|-u)
173  if [ "$2" ]; then
174  USER=$2
175  printf "\nUSER for outputs over-ridden by user. CAUTION: This will likely not work. Only do this if you know exaclty why you are doing so.\nUser is set to $USER\n"
176  shift
177  else
178  printf 'ERROR: "--user" requires a username to use for the dCache directory.\n' >&2
179  exit 10
180  fi
181  ;;
182  --user=?*)
183  USER=${1#*=}
184  printf "\nUSER for outputs over-ridden by user. CAUTION: This will likely not work. Only do this if you know exaclty why you are doing so.\nUser is set to $USER\n"
185  ;;
186  --)
187  shift
188  break
189  ;;
190  -?*)
191  printf 'ERROR: Uknown option\n'
192  exit 10
193  ;;
194  *)
195  break
196  esac
197  shift
198 done
199 
200 
201 
202 if [ ! -d $outdir/root ]; then
203  mkdir -p $outdir/root
204  mkdir -p $outdir/fcl
205  mkdir -p $outdir/log
206 fi
207 
208 printf "\nfclIn=$fclIn\n"
209 printf "basename fclIn=$(basename $fclIn)\n"
210 
211 fcl="$outdir/$(basename $fclIn)"
212 
213 printf "fcl set.\nfcl=$fcl\n"
214 
215 printf "\nscriptIn=$scriptIn\n"
216 printf "basename scriptIn=$(basename $scriptIn)\n"
217 
218 script="$outdir/`basename $scriptIn`"
219 
220 printf "script set.\nscript=$script\n"
221 
222 if [ -e $fcl ]; then
223  printf "\n$fcl already exists. Removing old file and replacing with new.\n"
224  rm -f $fcl
225 fi
226 printf "\nPreparing fcl for transfer to the grid.\ncp $fclIn $fcl\n"
227 if [ -e $fclIn ]; then
228  cp $fclIn $fcl
229 else
230  printf "\nExiting with error. Source file for fcl not found. \nPlease make sure the fcl \n$fclIn \nexists.\n"
231  exit 10
232 fi
233 
234 if [ -e $script ]; then
235  printf "\n$script already exists. Removing old file and replacing with new.\n"
236  rm -f $script
237 fi
238 printf "\nPreparing script for transfer to the grid.\ncp $scriptIn $script\n"
239 if [ -e $scriptIn ]; then
240  cp $scriptIn $script
241 else
242  printf "\nExiting with error. Source file for Script not found. \nPlease make sure the script \n$scriptIn \nexists.\n"
243  exit 10
244 fi
245 
246 environmentVars="-e IFDH_CP_MAXRETRIES=5"
247 usage="DEDICATED,OPPORTUNISTIC"
248 if [ $offsiteVar -ne 0 ]; then
249  usage="DEDICATED,OPPORTUNISTIC,OFFSITE"
250 fi
251 clientargs="--resource-provides=usage_model=$usage --OS=SL6 --group=dune -f $fcl --role=Analysis --memory=$memory "
252 if [ x$tarfile != x ]; then
253  printf "\nUsing tarball. Not setting LArSoft environment variables\n"
254  larsoft=
255  clientargs="${clientargs} --tar_file_name=dropbox://${tarfile} "
256 else
257  larsoft="${environmentVars} -e mrb_top=$MRB_TOP -e mrb_project=dunetpc -e mrb_version=$MRB_PROJECT_VERSION -e mrb_quals=$MRB_QUALS "
258 fi
259 
260 toolsargs="-q -g --opportunistic --OS=SL6 "
261 fileargs="-d ROOT $outdir/root -d FCL $outdir/fcl -d LOG $outdir/log "
262 
263 #Test job vs real job
264 if [ $testVar -ne 0 ]; then #TEST VAR IS SET. Run the test job
265  printf "\n!!TEST JOB SET!!\nOnly a single job is being sent to the grid. This is for testing and debugging purposes, and will not build a complete library.\n"
266  #Test job 1 - jobsub_client
267  njobs=300000 #This is picked to select 10 voxels for 100x100x300 bins with 10 photons each.
268  nphotons=10
269  clientargs="$clientargs --expected-lifetime=$expectedlifetime "
270  thisjob="-Q -N 1 file://$script $njobs $nphotons $(basename $fcl)"
271 else
272  printf "Building Library\n"
273  #Real job - jobsub_client
274  njobs=6000
275  nphotons=50000
276  clientargs="$clientargs --expected-lifetime=$expectedlifetime "
277  # thisjob="-N $njobs file://$script $njobs $nphotons"
278  if [ 0 -ne $makeupJobs ]; then
279  echo "thisjob=\"-N $makeupJobs file://$script $njobs $nphotons $(basename $fcl) true\""
280  thisjob="-N $makeupJobs file://$script $njobs $nphotons $(basename $fcl) true"
281  else
282  thisjob="-N $njobs file://$script $njobs $nphotons $(basename $fcl)"
283  fi
284 fi
285 
286 if [ x$tarfile != x ]; then
287  printf "\n\njobsub_submit $environmentVars $clientargs $fileargs $thisjob \n\n\n"
288  if [ $checkVar -ne 0 ]; then
289  printf "CHECK Mode is set. The jobsub command will be printed, but will not be executed. Please check the command and run again without check mode. If you are trying to submit test jobs instead, the correct flag is -s or --test.\n"
290  else
291  jobsub_submit $environmentVars $clientargs $fileargs $thisjob
292  fi
293  ret=$?
294  printf "\nExiting with status $ret\n"
295  exit $ret
296 else
297  printf "jobsub_submit $environmentVars $larsoft $clientargs $fileargs $thisjob\n"
298  if [ $checkVar -ne 0 ]; then
299  printf "\n\nCHECK Mode is set. The jobsub command will be printed, but will not be executed. Please check the command and run again without check mode. If you are trying to submit test jobs instead, the correct flag is -s or --test.\n\n\n"
300  else
301  jobsub_submit $environmentVars $larsoft $clientargs $fileargs $thisjob
302  fi
303  ret=$?
304  printf "\nExiting with status $ret\n"
305  exit $ret
306 fi
307 
308