root_metadata.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 
3 from __future__ import absolute_import
4 from __future__ import print_function
5 
6 # Import stuff.
7 
8 import sys, os, subprocess, json, stream
9 import larbatch_posix
10 import larbatch_utilities
11 from larbatch_utilities import convert_str
12 import project_utilities
13 
14 # Import ROOT (hide command line arguments).
15 
16 myargv = sys.argv
17 sys.argv = myargv[0:1]
18 sys.argv.append('-n')
19 # Prevent root from printing garbage on initialization.
20 if 'TERM' in os.environ:
21  del os.environ['TERM']
22 import ROOT
23 ROOT.gErrorIgnoreLevel = ROOT.kError
24 sys.argv = myargv
25 
26 # Filter warnings.
27 
28 import warnings
29 warnings.filterwarnings('ignore', category = RuntimeWarning, message = 'creating converter.*')
30 
31 # Convert adler32-1 (used by dcache) to adler32-0 (used by sam).
32 
33 def convert_1_adler32_to_0_adler32(crc, filesize):
34  crc = int(crc)
35  filesize = int(filesize)
36  size = int(filesize % 65521)
37  s1 = (crc & 0xffff)
38  s2 = ((crc >> 16) & 0xffff)
39  s1 = (s1 + 65521 - 1) % 65521
40  s2 = (s2 + 65521 - size) % 65521
41  return (s2 << 16) + s1
42 
43 
44 # Checksum utilities copied from sam_web_client
45 
46 def enstoreChecksum(fileobj):
47  import zlib
48  readblocksize = 1024*1024
49  crc = 0
50  while 1:
51  try:
52  s = fileobj.read(readblocksize)
53  except (OSError, IOError) as ex:
54  raise Error(str(ex))
55  if not s: break
56  crc = zlib.adler32(s,crc)
57  crc = int(crc)
58  if crc < 0:
59  # Return 32 bit unsigned value
60  crc = (crc & 0x7FFFFFFF) | 0x80000000
61  return { "crc_value" : str(crc), "crc_type" : "adler 32 crc type" }
62 
64  """Calculate enstore compatible CRC value"""
65 
66  crc = {}
67  srm_url = project_utilities.path_to_srm_url(path)
68 
69  if srm_url == path:
70  try:
71  f = larbatch_posix.open(path,'rb')
72  crc = enstoreChecksum(f)
73  except (IOError, OSError) as ex:
74  raise Error(str(ex))
75  finally:
76  f.close()
77  else:
78  try:
79  # Following commented commands are old way of calculating checksum by
80  # transferring entire file over network.
81  # Should work again if uncommented (if srm way breaks).
82 
83  #cmd = ['ifdh', 'cp', path, '/dev/fd/1']
84  #p = subprocess.Popen(cmd, stdout=subprocess.PIPE)
85  #f = p.stdout
86  #crc = enstoreChecksum(f)
87 
88  # New (clever, efficient, obscure...) way of accessing dCache
89  # stored checksum using srm.
90  project_utilities.test_proxy()
91  cmd = ['srmls', '-2', '-l', srm_url]
92  srmout = convert_str(subprocess.check_output(cmd))
93  first = True
94  crc0 = 0
95  for line in srmout.split('\n'):
96  if first:
97  size = int(line[2:line.find('/')-1])
98  first = False
99  continue
100  if line.find("Checksum value:") > 0:
101  ssum = line[line.find(':') + 2:]
102  crc1 = int( ssum , base = 16 )
103  crc0 = convert_1_adler32_to_0_adler32(crc1, size)
104  break
105 
106  crc = {"crc_value": str(crc0), "crc_type": "adler 32 crc type"}
107 
108  except:
109  # Try the old method
110  cmd = ['ifdh', 'cp', path, '/dev/fd/1']
111  p = subprocess.Popen(cmd, stdout=subprocess.PIPE)
112  f = p.stdout
113  crc = enstoreChecksum(f)
114  return crc
115 
116 def get_external_metadata(inputfile):
117 
118  # define an empty python dictionary
119  md = {}
120 
121  # Check whether this file exists.
122  if not os.path.exists(inputfile):
123  return md
124 
125  # Get the other meta data field parameters
126  md['file_name'] = os.path.basename(inputfile)
127  md['file_size'] = str(os.path.getsize(inputfile))
128  md['crc'] = fileEnstoreChecksum(inputfile)
129 
130  # Quit here if file type is not ".root"
131 
132  if not inputfile.endswith('.root'):
133  return md
134 
135  # Root checks.
136 
137  ROOT.gEnv.SetValue('RooFit.Banner', '0')
138  file = ROOT.TFile.Open(larbatch_posix.root_stream(inputfile))
139  if file and file.IsOpen() and not file.IsZombie():
140 
141  # Root file opened successfully.
142  # Get number of events.
143 
144  obj = file.Get('Events')
145  if obj and obj.InheritsFrom('TTree'):
146 
147  # This has a TTree named Events.
148 
149  nev = obj.GetEntriesFast()
150  md['events'] = str(nev)
151 
152  # Get runs and subruns fro SubRuns tree.
153 
154  subrun_tree = file.Get('SubRuns')
155  if subrun_tree and subrun_tree.InheritsFrom('TTree'):
156  md['subruns'] = []
157  nsubruns = subrun_tree.GetEntriesFast()
158  tfr = ROOT.TTreeFormula('subruns',
159  'SubRunAuxiliary.id_.run_.run_',
160  subrun_tree)
161  tfs = ROOT.TTreeFormula('subruns',
162  'SubRunAuxiliary.id_.subRun_',
163  subrun_tree)
164  for entry in range(nsubruns):
165  subrun_tree.GetEntry(entry)
166  run = tfr.EvalInstance64()
167  subrun = tfs.EvalInstance64()
168  run_subrun = (run, subrun)
169  if not run_subrun in md['subruns']:
170  md['subruns'].append(run_subrun)
171 
172  # Get stream name.
173 
174  try:
175  stream_name = stream.get_stream(inputfile)
176  md['data_stream'] = stream_name
177  except:
178  pass
179 
180  return md
181 
182 if __name__ == "__main__":
183 
184  import argparse
185 
186  Parser = argparse.ArgumentParser \
187  (description="Extracts metadata for a ROOT file.")
188 
189  Parser.add_argument("InputFile", help="ROOT file to extract metadata about")
190  Parser.add_argument("--output", "-o", dest="OutputFile", default=None,
191  help="JSON file to write the output to [default: screen]"
192  )
193 
194  args = Parser.parse_args()
195 
196  md = get_external_metadata(args.InputFile)
197  mdtext = json.dumps(md, indent=2, sort_keys=True)
198 
199  outputFile = open(args.OutputFile, 'w') if args.OutputFile else sys.stdout
200  print(mdtext, file=outputFile)
201 
202  sys.exit(0)
int open(const char *, int)
Opens a file descriptor.
def convert_1_adler32_to_0_adler32(crc, filesize)
def get_external_metadata(inputfile)
nvidia::inferenceserver::client::Error Error
Definition: triton_utils.h:15
def fileEnstoreChecksum(path)
def enstoreChecksum(fileobj)
static QCString str