rootstat.py
Go to the documentation of this file.
1 #! /usr/bin/env python
2 ######################################################################
3 #
4 # Name: rootstat.py
5 #
6 # Purpose: Analyze art root file and dump object statistics.
7 #
8 # Created: 27-Nov-2012 Herbert Greenlee
9 #
10 # Usage:
11 #
12 # stat.py <options> [@filelist] [file1 file2 ...]
13 #
14 # Options:
15 #
16 # [-h|--help] - Print help message.
17 # --level n - Branch level (default 1). Use --level 1 to see top
18 # branches only. Use --level 2 to also see subbranches.
19 # --nfile n - Number of files to analyze (default all).
20 # --all - Print analysis of each file (default is only summary).
21 #
22 # Arguments:
23 #
24 # @filelist - File list containing one input file per line.
25 # file1 file2 ... - Input files.
26 #
27 ######################################################################
28 
29 from __future__ import absolute_import
30 from __future__ import print_function
31 import sys, os
32 import project_utilities
33 import larbatch_posix
34 
35 # Import ROOT module.
36 # Globally turn off root warnings.
37 # Don't let root see our command line options.
38 
39 myargv = sys.argv
40 sys.argv = myargv[0:1]
41 if 'TERM' in os.environ:
42  del os.environ['TERM']
43 import ROOT
44 ROOT.gErrorIgnoreLevel = ROOT.kError
45 sys.argv = myargv
46 
47 # Print help.
48 
49 def help():
50 
51  filename = sys.argv[0]
52  file = open(filename)
53 
54  doprint=0
55 
56  for line in file.readlines():
57  if line[2:9] == 'stat.py':
58  doprint = 1
59  elif line[0:6] == '######' and doprint:
60  doprint = 0
61  if doprint:
62  if len(line) > 2:
63  print(line[2:], end=' ')
64  else:
65  print()
66 
67 # Analyze root file.
68 
69 def analyze(root, level, gtrees, gbranches, doprint):
70 
71  trees = {}
72  events = None
73  keys = root.GetListOfKeys()
74  for key in keys:
75  objname = key.GetName()
76  if objname not in trees:
77  obj = root.Get(objname)
78  if obj and obj.InheritsFrom('TTree'):
79  trees[objname] = obj
80  if objname == 'Events':
81  events = obj
82 
83  # Print summary of trees.
84 
85  if doprint:
86  print('\nTrees:\n')
87  for key in sorted(trees.keys()):
88  tree = trees[key]
89  nentry = tree.GetEntriesFast()
90  if doprint:
91  print('%s has %d entries.' % (key, nentry))
92 
93  # Remember information about trees.
94 
95  if key in gtrees:
96  gtrees[key] = gtrees[key] + nentry
97  else:
98  gtrees[key] = nentry
99 
100  # Print summary of branches in Events tree.
101 
102  if doprint:
103  print('\nBranches of Events tree:\n')
104 
105  # If level is zero, we are done (don't analyze branches).
106 
107  if level == 0:
108  return
109 
110  if events:
111 
112  if doprint:
113  print(' Total bytes Zipped bytes Comp. Branch name')
114  print(' ----------- ------------ ----- -----------')
115 
116  branches = events.GetListOfBranches()
117  ntotall = 0
118  nzipall = 0
119 
120  # Loop over branche of Events tree.
121 
122  for branch in branches:
123  branch_class = branch.GetClass().GetName()
124 
125  # Only look at data products (class art::Wrapper<T>).
126 
127  if branch_class[0: 13] == 'art::Wrapper<':
128 
129  # Loop over subbranches.
130 
131  subbranches = branch.GetListOfBranches()
132  for subbranch in subbranches:
133  name = subbranch.GetName()
134 
135  # Only look at '.obj' subbranch (wrapped object).
136 
137  if name[-4:] == '.obj':
138  ntot = subbranch.GetTotBytes("*")
139  nzip = subbranch.GetZipBytes("*")
140  ntotall = ntotall + ntot
141  nzipall = nzipall + nzip
142  if doprint:
143  if nzip != 0:
144  comp = float(ntot) / float(nzip)
145  else:
146  comp = 0.
147  print('%14d%14d%8.2f %s' % (ntot, nzip, comp, name))
148 
149  # Remember information about branches.
150 
151  if name in gbranches:
152  gbranches[name][0] = gbranches[name][0] + ntot
153  gbranches[name][1] = gbranches[name][1] + nzip
154  else:
155  gbranches[name] = [ntot, nzip]
156 
157  # Loop over subsubbranches (attributes of wrapped object).
158 
159  if level > 1:
160  subsubbranches = subbranch.GetListOfBranches()
161  for subsubbranch in subsubbranches:
162  name = subsubbranch.GetName()
163  ntot = subsubbranch.GetTotBytes("*")
164  nzip = subsubbranch.GetZipBytes("*")
165  if doprint:
166  if nzip != 0:
167  comp = float(ntot) / float(nzip)
168  else:
169  comp = 0.
170  print('%14d%14d%8.2f %s' % (ntot, nzip, comp,
171  subsubbranch.GetName()))
172 
173  # Remember information about branches.
174 
175  if name in gbranches:
176  gbranches[name][0] = gbranches[name][0] + ntot
177  gbranches[name][1] = gbranches[name][1] + nzip
178  else:
179  gbranches[name] = [ntot, nzip]
180 
181  # Do summary of all branches.
182 
183  name = 'All branches'
184  if doprint:
185  if nzipall != 0:
186  comp = float(ntotall) / float(nzipall)
187  else:
188  comp = 0.
189  print('%14d%14d%8.2f %s' % (ntotall, nzipall, comp, name))
190 
191  # Print average event size.
192 
193  nev = events.GetEntriesFast()
194  if nev != 0:
195  nevtot = 1.e-6 * float(ntotall) / float(nev)
196  nevzip = 1.e-6 * float(nzipall) / float(nev)
197  else:
198  nevtot = 0.
199  nevzip = 0.
200  print()
201  print('%10d events.' % nev)
202  print('%7.2f Mb average size per event.' % nevtot)
203  print('%7.2f Mb average zipped size per event.' % nevzip)
204 
205  if name in gbranches:
206  gbranches[name][0] = gbranches[name][0] + ntotall
207  gbranches[name][1] = gbranches[name][1] + nzipall
208  else:
209  gbranches[name] = [ntotall, nzipall]
210 
211 
212  # Done.
213 
214  return
215 
216 # Main program.
217 
218 def main(argv):
219 
220  # Parse arguments.
221 
222  input_files = []
223  level = 1
224  nfilemax = 0
225  all = 0
226 
227  args = argv[1:]
228  while len(args) > 0:
229  if args[0] == '-h' or args[0] == '--help':
230 
231  # Help.
232 
233  help()
234  return 0
235 
236  elif args[0] == '--level' and len(args) > 1:
237 
238  # Analyze level.
239 
240  level = int(args[1])
241  del args[0:2]
242 
243  elif args[0] == '--nfile' and len(args) > 1:
244 
245  # Number of files.
246 
247  nfilemax = int(args[1])
248  del args[0:2]
249 
250  elif args[0] == '--all':
251 
252  # All files flag.
253 
254  all = 1
255  del args[0]
256 
257  elif args[0][0] == '-':
258 
259  # Unknown option.
260 
261  print('Unknown option %s' % args[0])
262  return 1
263 
264  elif args[0][0] == '@':
265 
266  # Read in file list to input files.
267 
268  filelistname = args[0][1:]
269  if larbatch_posix.exists(filelistname):
270  for filename in larbatch_posix.readlines(filelistname):
271  input_files.append(filename.strip())
272  else:
273  print('File list %s does not exist.' % filelistname)
274  return 1
275  del args[0]
276  else:
277 
278  # Add single file to input files.
279 
280  input_files.append(args[0])
281  del args[0]
282 
283  # Loop over input files.
284 
285  gtrees = {}
286  gbranches = {}
287  nfile = 0
288 
289  for input_file in input_files:
290 
291  if nfilemax > 0 and nfile >= nfilemax:
292  break
293  nfile = nfile + 1
294 
295  if not larbatch_posix.exists(input_file):
296  print('Input file %s does not exist.' % input_file)
297  return 1
298 
299  print('\nOpening %s' % input_file)
300  root = ROOT.TFile.Open(input_file)
301  if not root.IsOpen() or root.IsZombie():
302  print('Failed to open %s' % input_file)
303  return 1
304 
305  # Analyze this file.
306 
307  analyze(root, level, gtrees, gbranches, all)
308 
309  print('\n%d files analyzed.' % nfile)
310 
311  # Print summary of trees.
312 
313  print('\nTrees from all files:\n')
314  for key in sorted(gtrees.keys()):
315  nentry = gtrees[key]
316  print('%s has %d total entries.' % (key, nentry))
317 
318  # Print summary of branches.
319 
320  if level > 0:
321  print('\nBranches of Events tree from all files:\n')
322  print(' Total bytes Zipped bytes Comp. Branch name')
323  print(' ----------- ------------ ----- -----------')
324  allname = 'All branches'
325  ntot = 0
326  nzip = 0
327  for key in sorted(gbranches.keys()):
328  if key != allname:
329  ntot = gbranches[key][0]
330  nzip = gbranches[key][1]
331  if nzip != 0:
332  comp = float(ntot) / float(nzip)
333  else:
334  comp = 0.
335  print('%14d%14d%8.2f %s' % (ntot, nzip, comp, key))
336  if allname in gbranches:
337  ntot = gbranches[allname][0]
338  nzip = gbranches[allname][1]
339  if nzip != 0:
340  comp = float(ntot) / float(nzip)
341  else:
342  comp = 0.
343  print('%14d%14d%8.2f %s' % (ntot, nzip, comp, allname))
344 
345  # Print average event size.
346 
347  if 'Events' in gtrees:
348  nev = gtrees['Events']
349  if nev != 0:
350  nevtot = 1.e-6 * float(ntot) / float(nev)
351  nevzip = 1.e-6 * float(nzip) / float(nev)
352  else:
353  nevtot = 0.
354  nevzip = 0.
355  print()
356  print('%10d events.' % nev)
357  if level > 0:
358  print('%7.2f Mb average size per event.' % nevtot)
359  print('%7.2f Mb average zipped size per event.' % nevzip)
360 
361 
362  # Done.
363 
364  return 0
365 
366 # Invoke main program.
367 
368 if __name__ == '__main__':
369  sys.exit(main(sys.argv))
def analyze(root, level, gtrees, gbranches, doprint)
Definition: rootstat.py:69
def help()
Definition: rootstat.py:49
int open(const char *, int)
Opens a file descriptor.
def main(argv)
Definition: rootstat.py:218