4 from subprocess
import Popen, PIPE
29 Purpose: To extract metadata from output file on worker node, generate JSON file 34 """Base class to hold / interpret general metadata""" 35 __metaclass__ = abc.ABCMeta
42 """Extract metadata from inputfile into a pipe for further processing.""" 45 proc = Popen([
"sam_metadata_dumper", local], stdout=PIPE,
49 proc = Popen([
"sam_metadata_dumper", url], stdout=PIPE,
51 if len(local) > 0
and local != self.
inputfile:
56 """Run the proc in a 60-sec timeout queue, return stdout, stderr""" 60 thread.join(timeout=7200)
62 print(
'Terminating subprocess because of timeout.')
69 raise RuntimeError(
'sam_metadata_dumper returned nonzero exit status {}.'.
format(rc))
74 """Run jobinfo, put the return code, stdout, and stderr into a queue""" 75 jobout, joberr = jobinfo.communicate()
77 for item
in (rc, jobout, joberr):
83 """Take Jobout and Joberr (in jobtuple) and return mdart object from that""" 85 mdtext =
''.join(line.replace(
", ,",
",")
for line
in jobtuple[0].
decode().
split(
'\n')
if line[-3:-1] !=
' ,')
86 mdtop = json.JSONDecoder().
decode(mdtext)
87 if len(list(mdtop.keys())) == 0:
88 print(
'No top-level key in extracted metadata.')
90 file_name = list(mdtop.keys())[0]
91 return mdtop[file_name]
95 """If there's no application key in md dict, create the key with a blank dictionary. 96 Then return md['application'], along with mdval""" 97 if 'application' not in md:
98 md[
'application'] = {}
99 return md[
'application']
109 return [self.
expname + elt
for elt
in (
'lbneMCGenerators',
'lbneMCName',
'lbneMCDetectorType',
'StageName')]
112 if key ==
'lbneMCDetectorType':
113 return 'lbne_MC.detector_type' 114 elif key ==
'StageName':
115 return 'lbne_MC.miscellaneous' 119 projNoun = stem.split(
"MC")
120 return prefix +
"_MC." + projNoun[1]
125 """Class to hold/interpret experiment-specific metadata""" 127 MetaData.__init__(self, inputfile)
138 self.metadataList, self.
translateKeyf = metaDataModule.metadataList(), metaDataModule.translateKey
141 """Returns the output of the imported translateKey function (as translateKeyf) called on key""" 145 """Loop through art metdata, generate metadata dictionary""" 152 for mdkey
in list(mdart.keys()):
156 if mdkey ==
'file_format_version':
158 elif mdkey ==
'file_format_era':
164 elif mdkey ==
'run_type':
166 elif mdkey ==
'application.version':
168 elif mdkey ==
'application.family':
170 elif mdkey ==
'application.name':
175 elif mdkey ==
'data_stream':
176 if 'dunemeta.data_stream' not in list(mdart.keys()):
177 md[
'data_stream'] = mdval
180 elif mdkey ==
'process_name':
185 elif mdkey ==
'applicationFamily':
186 if not md.has_key(
'application'):
187 md[
'application'] = {}
188 md[
'application'][
'family'] = mdval
189 elif mdkey ==
'StageName' or mdkey ==
'applicationName':
190 if not md.has_key(
'application'):
191 md[
'application'] = {}
192 md[
'application'][
'name'] = mdval
193 elif mdkey ==
'applicationVersion':
194 if not md.has_key(
'application'):
195 md[
'application'] = {}
196 md[
'application'][
'version'] = mdval
200 elif mdkey ==
'parents':
202 if not args.strip_parents:
204 parent_dict = {
'file_name': parent}
205 mdparents.append(parent_dict)
206 md[
'parents'] = mdparents
210 elif mdkey ==
'art.first_event':
212 elif mdkey ==
'art.last_event':
214 elif mdkey ==
'first_event':
216 elif mdkey ==
'last_event':
218 elif mdkey ==
'lbneMCGenerators':
219 md[
'lbne_MC.generators'] = mdval
220 elif mdkey ==
'lbneMCOscillationP':
221 md[
'lbne_MC.oscillationP'] = mdval
222 elif mdkey ==
'lbneMCTriggerListVersion':
223 md[
'lbne_MC.trigger-list-version'] = mdval
224 elif mdkey ==
'lbneMCBeamEnergy':
225 md[
'lbne_MC.beam_energy'] = mdval
226 elif mdkey ==
'lbneMCBeamFluxID':
227 md[
'lbne_MC.beam_flux_ID'] = mdval
228 elif mdkey ==
'lbneMCName':
229 md[
'lbne_MC.name'] = mdval
230 elif mdkey ==
'lbneMCDetectorType':
231 md[
'lbne_MC.detector_type'] = mdval
232 elif mdkey ==
'lbneMCNeutrinoFlavors':
233 md[
'lbne_MC.neutrino_flavors'] = mdval
234 elif mdkey ==
'lbneMCMassHierarchy':
235 md[
'lbne_MC.mass_hierarchy'] = mdval
236 elif mdkey ==
'lbneMCMiscellaneous':
237 md[
'lbne_MC.miscellaneous'] = mdval
238 elif mdkey ==
'lbneMCGeometryVersion':
239 md[
'lbne_MC.geometry_version'] = mdval
240 elif mdkey ==
'lbneMCOverlay':
241 md[
'lbne_MC.overlay'] = mdval
242 elif mdkey ==
'lbneDataRunMode':
243 md[
'lbne_data.run_mode'] = mdval
244 elif mdkey ==
'lbneDataDetectorType':
245 md[
'lbne_data.detector_type'] = mdval
246 elif mdkey ==
'lbneDataName':
247 md[
'lbne_data.name'] = mdval
248 elif mdkey ==
'detector.hv_status':
250 elif mdkey ==
'detector.hv_value':
252 elif mdkey ==
'detector.tpc_status':
254 elif mdkey ==
'detector.tpc_apa_status':
256 elif mdkey ==
'detector.tpc_apas':
258 elif mdkey ==
'detector.tpc_apa_1':
260 elif mdkey ==
'detector.tpc_apa_2':
262 elif mdkey ==
'detector.tpc_apa_3':
264 elif mdkey ==
'detector.tpc_apa_4':
266 elif mdkey ==
'detector.tpc_apa_5':
268 elif mdkey ==
'detector.tpc_apa_6':
270 elif mdkey ==
'detector.pd_status':
272 elif mdkey ==
'detector.crt_status':
274 elif mdkey ==
'daq.readout':
276 elif mdkey ==
'daq.felix_status':
278 elif mdkey ==
'beam.polarity':
280 elif mdkey ==
'beam.momentum':
282 elif mdkey ==
'dunemeta.data_stream':
283 md[
'data_stream'] = mdval
284 elif mdkey ==
'??.data_type':
286 elif mdkey ==
'data_quality.level':
288 elif mdkey ==
'data_quality.is_junk':
290 elif mdkey ==
'data_quality.do_not_process':
292 elif mdkey ==
'data_quality.online_good_run_list':
294 elif mdkey ==
'dunemeta.dune_data.accouple':
295 md[
'DUNE_data.accouple'] =
int(mdval)
296 elif mdkey ==
'dunemeta.dune_data.calibpulsemode':
297 md[
'DUNE_data.calibpulsemode'] =
int(mdval)
298 elif mdkey ==
'dunemeta.dune_data.daqconfigname':
299 md[
'DUNE_data.DAQConfigName'] = mdval
300 elif mdkey ==
'dunemeta.dune_data.detector_config':
301 md[
'DUNE_data.detector_config'] = mdval
302 elif mdkey ==
'dunemeta.dune_data.febaselinehigh':
303 md[
'DUNE_data.febaselinehigh'] =
int(mdval)
304 elif mdkey ==
'dunemeta.dune_data.fegain':
305 md[
'DUNE_data.fegain'] =
int(mdval)
306 elif mdkey ==
'dunemeta.dune_data.feleak10x':
307 md[
'DUNE_data.feleak10x'] =
int(mdval)
308 elif mdkey ==
'dunemeta.dune_data.feleakhigh':
309 md[
'DUNE_data.feleakhigh'] =
int(mdval)
310 elif mdkey ==
'dunemeta.dune_data.feshapingtime':
311 md[
'DUNE_data.feshapingtime'] =
int(mdval)
312 elif mdkey ==
'dunemeta.dune_data.inconsistent_hw_config':
313 md[
'DUNE_data.inconsistent_hw_config'] =
int(mdval)
314 elif mdkey ==
'dunemeta.dune_data.is_fake_data':
315 md[
'DUNE_data.is_fake_data'] =
int(mdval)
316 elif mdkey ==
'dunemeta.dune_data.readout_window':
317 md[
'DUNE_data.readout_window'] =
float(mdval)
322 md[mdkey] = mdart[mdkey]
326 md[
'file_name'] = self.inputfile.split(
"/")[-1]
327 if 'file_size' in md0:
328 md[
'file_size'] = md0[
'file_size']
330 md[
'file_size'] = os.path.getsize(self.
inputfile)
331 if 'crc' in md0
and not args.no_crc:
332 md[
'crc'] = md0[
'crc']
333 elif not args.no_crc:
334 md[
'crc'] = root_metadata.fileEnstoreChecksum(self.
inputfile)
341 """ Get metadata from input file and return as python dictionary. 342 Calls other methods in class and returns metadata dictionary""" 346 return self.
md_gen(mdart, md0)
350 argparser = argparse.ArgumentParser(
'Parse arguments')
351 argparser.add_argument(
'--infile',help=
'path to input file',required=
True,type=str)
352 argparser.add_argument(
'--declare',help=
'validate and declare the metadata for the file specified in --infile to SAM',action=
'store_true')
353 argparser.add_argument(
'--appname',help=
'application name for SAM metadata',type=str)
354 argparser.add_argument(
'--appversion',help=
'application version for SAM metadata',type=str)
355 argparser.add_argument(
'--appfamily',help=
'application family for SAM metadata',type=str)
356 argparser.add_argument(
'--campaign',help=
'Value for DUNE.campaign for SAM metadata',type=str)
357 argparser.add_argument(
'--data_stream',help=
'Value for data_stream for SAM metadata',type=str)
358 argparser.add_argument(
'--requestid',help=
'Value for DUNE.requestid for SAM metadata',type=str)
359 argparser.add_argument(
'--set_processed',help=
'Set for parent file as processed in SAM metadata',action=
"store_true")
360 argparser.add_argument(
'--strip_parents',help=
'Do not include the file\'s parents in SAM metadata for declaration',action=
"store_true")
361 argparser.add_argument(
'--no_crc',help=
'Leave the crc out of the generated json',action=
"store_true")
362 argparser.add_argument(
'--skip_dumper',help=
'Skip running sam_metadata_dumper on the input file',action=
"store_true")
363 argparser.add_argument(
'--input_json',help=
'Input json file containing metadata to be added to output (can contain ANY valid SAM metadata parameters)',type=str)
366 args = argparser.parse_args()
372 expSpecificMetadata =
expMetaData(os.environ[
'SAM_EXPERIMENT'], args.infile)
373 mddict = expSpecificMetadata.getmetadata()
375 if args.input_json !=
None:
376 if os.path.exists(args.input_json):
378 injson=
open(args.input_json)
379 arbjson = json.load(injson)
380 for key
in list(arbjson.keys()):
381 mddict[key] = arbjson[key]
383 print(
'Error loading input json file.')
386 print(
'Error, specified input file does not exist.')
389 if 'application' in mddict
and 'name' not in list(mddict[
'application'].
keys())
and args.appname !=
None:
390 mddict[
'application'][
'name'] = args.appname
391 if 'application' in mddict
and 'version' not in list(mddict[
'application'].
keys())
and args.appversion !=
None:
392 mddict[
'application'][
'version'] = args.appversion
393 if 'application' in mddict
and 'family' not in list(mddict[
'application'].
keys())
and args.appfamily !=
None:
394 mddict[
'application'][
'family'] = args.appfamily
395 if args.appfamily !=
None and args.appname !=
None and args.appversion !=
None:
396 mddict[
'application'] = {}
397 mddict[
'application'][
'family'] = args.appfamily
398 mddict[
'application'][
'name'] = args.appname
399 mddict[
'application'][
'version'] = args.appversion
400 if 'DUNE.campaign' not in list(mddict.keys())
and args.campaign !=
None:
401 mddict[
'DUNE.campaign'] = args.campaign
402 if args.data_stream !=
None:
403 mddict[
'data_stream'] = args.data_stream
404 if args.requestid !=
None:
405 mddict[
'DUNE.requestid'] = args.requestid
408 print(
'You have not implemented a defineMetaData function by providing an experiment.')
409 print(
'No metadata keys will be saved')
412 mdtext = json.dumps(mddict, indent=2, sort_keys=
True)
415 ih.declareFile(mdtext)
417 if args.set_processed:
418 swc = samweb_client.SAMWebClient(
'dune')
419 moddict = {
"DUNE.production_status" :
"processed" }
420 for parent
in moddict[
'parents']:
421 fname = moddict[
'parents'][parent][
'file_name']
423 swc.modifyFileMetadata(fname, moddict)
425 print(
'Error modidying metadata for %s' % fname)
433 if __name__ ==
"__main__":
int open(const char *, int)
Opens a file descriptor.
static bool format(QChar::Decomposition tag, QString &str, int index, int len)
void decode(std::any const &a, Hep2Vector &result)
void split(std::string const &s, char c, OutIter dest)