RemoveMathFromGDML.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 #
3 # Run with '--help' for usage instructions
4 #
5 # Author: Gianluca Petrillo (petrillo@fnal.gov)
6 # Date: April 1, 2016
7 #
8 # Change log:
9 # 20160401 [v1.0] (petrillo@fnal.gov)
10 # original version
11 # 20160401 [v1.1] (petrillo@fnal.gov)
12 # switched default to direct parser;
13 # survive with warnings if XML is not available
14 # changed the default mode to backup the input and then replace the file;
15 # added a single output mode
16 # 20160405 [v1.2] (petrillo@fnal.gov)
17 # added support for power operator "^" and mathematical functions in expressions
18 # added dry-run option
19 # 20160405 [v1.3] (petrillo@fnal.gov)
20 # using ROOT as expression evaluator (as default)
21 # 20160406 [v1.4] (petrillo@fnal.gov)
22 # removed the option to use python as expression evaluator,
23 # since it may give wrong answers (example: "1/8*2.54" => 0)
24 # 20160407 [v1.5] (petrillo@fnal.gov)
25 # bug fix: expression option was swapped
26 # 20160408 [v1.6] (petrillo@fnal.gov)
27 # require explicit argument to read input from stdin;
28 # check that ROOT version is proper for the task;
29 # configuration-related exceptions do not print backtrace;
30 # option to warn when an expression evaluates to 0
31 #
32 
33 __doc__ = """Evaluates and replaces mathematical expressions from a GDML file.
34 
35 By default, each of the input file is renamed into a .bak file, and the output
36 replaces the old file. If no input file is specified, the file is read from standard
37 input and output to standard output, or to the value of the '--output' option.
38 The output option can also be specified to set the output file name, in which case
39 the input file is not renamed. If empty, output will be to standard output.
40 
41 This scripts supports two modes:
42 - direct parser: a simple parser that looks for patterns '="xxx"' in a line
43  and replaces xxx with its value; it preserves the form of the input,
44  but it might silently fail to parse good GDML
45 - XML parser: a python XML parser evaluates the GDML file completely, then it
46  writes it anew; it will parse any XML, but it loses comments and can change
47  the order of the attributes
48 The XML parser is easy to extend to include "define" GDML lines, that are not
49 currently supported.
50 
51 Expressions are evaluated by ROOT TFormula.
52 """
53 __version__ = "1.6"
54 
55 import sys, os
56 import logging
57 import math
58 
59 try:
60  import xml.dom.minidom
61  hasXML = True
62 except ImportError: hasXML = False
63 
64 
65 class ConfigurationError(RuntimeError):
66  def __init__(self, *args, **kargs):
67  RuntimeError.__init__(self, *args, **kargs);
68 # class ConfigurationError
69 
70 
71 ###############################################################################
72 ### Expression fixer
73 ###
75  def __init__(self, options = None):
76  self.constants = {}
77  self.environment = vars(math)
78  self.options = options
79  if not self.options.NoROOTformula:
80  self.initROOT()
81  self.formula = self.ROOT.TFormula("GDMLexpressionRemoverFormula", "0");
82  self.purify = self.purify_ROOT
83  else:
84  self.purify = self.purify_native
85  # __init__()
86 
87  @staticmethod
88  def sanitize(s):
89  return s.replace("^", "**")
90 
91  def initROOT(self):
92  try:
93  import ROOT
94  except ImportError:
95  raise ConfigurationError \
96  ("Can't load ROOT module: I can't use ROOT to evaluate formulas.")
97  ROOT.gErrorIgnoreLevel = ROOT.kFatal # do not even print errors
98  f = ROOT.TFormula("FTest", "1/2*2");
99  if f.Eval(0.) != 1.:
100  raise ConfigurationError(
101  """This script won't work with ROOT version (%s);\n"""
102  """Please set upa recent version 5.\n"""
103  """(quick test: 'TFormula("F", "1/2*2").Eval(0.)' should return 1)"""
104  % ROOT.gROOT.GetVersion()
105  )
106  self.ROOT = ROOT # store the module for loca use
107  # initROOT()
108 
109  def pass_floats(self, expression):
110  float(expression) # just throw an exception if not a float
111  return expression
112  # pass_floats()
113 
114  def purify_native(self, expression):
115  try: return self.pass_floats(expression)
116  except ValueError: pass
117 
118  # is it a valid expression?
119  try:
120  sanitized = self.sanitize(expression)
121  return str(eval(sanitized, self.environment, self.constants))
122  except:
123  return expression
124  # purify_native()
125 
126  def purify_ROOT(self, expression):
127  try: return self.pass_floats(expression)
128  except ValueError: pass
129 
130  # is it a valid expression?
131  if self.formula.Compile(expression) == 0:
132  return str(self.formula.Eval(0.))
133  else:
134  return expression
135  # purify_ROOT()
136 
137 # class GDMLexpressionRemover
138 
139 
140 
141 ###############################################################################
142 ### Direct text parsing approach
143 ###
145  def __init__(self, *args, **kargs):
146  GDMLexpressionRemover.__init__(self, *args, **kargs)
147 
148  @staticmethod
149  def findStrings(token):
150  """Returns a list of pairs: (prefix, double quoted string)
151 
152  One of them may be None if no such element is present
153  """
154  mode = 'p' # 'p': prefix; 'w': word; 'e': equal sign
155  tokens = []
156  iC = 0
157  prefix = ""
158  word = None
159  for c in token:
160  if c == '=':
161  if mode == 'p': # (p) => (e) on '='
162  mode = 'e'
163  continue
164  # if
165  elif c == '"':
166  if mode == 'e': # (e) => (w) on '"'
167  prefix += "="
168  word = ""
169  mode = 'w'
170  continue
171  elif mode == 'w': # (w) => (p) on '"'
172  tokens.append((prefix, word))
173  prefix = ""
174  word = None
175  mode = 'p'
176  continue
177  else: # (p) => (p) on '"'
178  pass
179  # if ... else
180  else:
181  if mode == 'e': # (e) => (p) on anything but '"'
182  mode = 'p'
183  # if ... else
184  if mode == 'p': prefix += c
185  elif mode == 'w': word += c
186  # while
187  if prefix or (word is not None):
188  tokens.append((prefix, word))
189  return tokens
190  # findStrings()
191 
192  def apply(self, token, iLine = None):
193  """Purifies the token"""
194  elements = []
195  for prefix, s in self.findStrings(token):
196  element = prefix if prefix is not None else ""
197  if s is not None:
198  purified = self.purify(s)
199  if s != purified:
200  if iLine is not None:
201  logging.debug(
202  "Evaluated '%s' into '%s' on line %d",
203  s, purified, iLine + 1
204  )
205  if self.options.WarnZero and (float(purified) == 0.):
206  logging.warn("On line %d: expression '%s' evaluated to 0",
207  iLine + 1, s)
208  else:
209  logging.debug("Evaluated '%s' into '%s'", s, purified)
210  if self.options.WarnZero and (float(purified) == 0.):
211  logging.warn("Expression '%s' evaluated to 0", s)
212  # if purified
213  element += '"' + str(purified) + '"'
214  # if s
215  elements.append(element)
216  # for
217  return "".join(elements)
218  # apply()
219 
220 # class GDMLpurifier
221 
222 
223 def RemoveMathFromGDMLfile(InputFileName, OutputFileName = None, options = None):
224 
225  if not options.Fake and OutputFileName and (InputFileName == OutputFileName):
226  raise ConfigurationError \
227  ("With the direct parser the input and output file must be different.")
228 
229  # if InputFileName is empty, use standard input
230  InputFile = open(InputFileName, 'r') if InputFileName else sys.stdin
231 
232  if options.Fake:
233  logging.info("Output will not be written in dry-run mode.")
234  OutputFile = None
235  else:
236  # if OutputFileName is empty, use standard output; otherwise, overwrite
237  OutputFile = open(OutputFileName, 'w') if OutputFileName else sys.stdout
238 
239  RemoveGDMLexpression = GDMLpurifier(options=options)
240 
241  for iLine, line in enumerate(InputFile):
242 
243  # save indentation
244  beef = line.lstrip()
245  indent = line[:-len(beef)]
246  beef = beef.rstrip() # remove stuff at the end of line too (will be lost)
247 
248  # we keep the words after removal in a new list
249  purified = RemoveGDMLexpression.apply(beef, iLine)
250 
251  if OutputFile:
252  # output accumulates the output line
253  output = indent + purified
254  print >>OutputFile, output
255  # if output
256  # for
257 
258  if OutputFileName and OutputFile:
259  logging.debug("GDML written to file '%s'", OutputFileName)
260 
261 # RemoveMathFromGDMLfile()
262 
263 
264 ###############################################################################
265 ### XML parsing approach
266 ###
268  def __init__(self, *args, **kargs):
269  GDMLexpressionRemover.__init__(self, *args, **kargs)
270 
271 
272  def purifyNode(self, node, level = None):
273  """Purifies the attributes in the DOM node"""
274  attributes = node.attributes
275  if not attributes: return
276  for name, value in attributes.items():
277  purified = self.purify(value)
278  if value != purified:
279  logging.debug("Evaluated '%s' into '%s'", value, purified)
280  attributes[name] = str(purified)
281  # if
282  # for attributes
283  # purifyNode()
284 
285 # class XMLpurifier
286 
287 
288 def ApplyToNodes(node, level, func, *args, **kargs):
289  """Applies a function to the specified node and all its descendants."""
290  if node.childNodes:
291  for child in node.childNodes:
292  ApplyToNodes(child, level + 1, func, *args, **kargs)
293  func(node, level, *args, **kargs)
294 
295 # ApplyToNodes()
296 
297 
298 def ApplyToDocument(document, func, *args, **kargs):
299  ApplyToNodes(document, 0, func, *args, **kargs)
300 
301 
302 def RemoveMathFromXMLfile(InputFileName, OutputFileName = None, options = None):
303 
304  # if InputFileName is empty, use standard input
305  InputFile = open(InputFileName, 'r') if InputFileName else sys.stdin
306 
307  # parse GDML document using minidom parser
308  DOMTree = xml.dom.minidom.parse(InputFile)
309  GDML = DOMTree.documentElement
310 
311  RemoveGDMLexpression = XMLpurifier()
312 
313  ApplyToDocument(GDML, RemoveGDMLexpression.purifyNode)
314 
315 
316  if options.Fake:
317  logging.info("Output will not be written in dry-run mode.")
318  else:
319  # if OutputFileName is empty, use standard output; otherwise, overwrite
320  OutputFile = open(OutputFileName, 'w') if OutputFileName else sys.stdout
321 
322  OutputFile.write(GDML.toxml())
323  OutputFile.write("\n")
324 
325  if OutputFileName:
326  logging.debug("GDML written to file '%s'", OutputFileName)
327  # if output
328 
329 # RemoveMathFromXMLfile()
330 
331 
332 ################################################################################
333 def LoggingSetup(LoggingLevel = logging.INFO):
334 
335  logging.basicConfig(
336  level=LoggingLevel,
337  format="%(levelname)s: %(message)s",
338  stream=sys.stderr # do not pollute standard output
339  )
340 
341 # def LoggingSetup()
342 
343 
344 def RunParserOn(parser, InputFileName, options = None):
345  """Renames the input file into '.bak', then runs the parser"""
346 
347  OldInputFileName = InputFileName
348  OutputFileName = OldInputFileName
349 
350  if not options.Fake:
351  InputFileName += ".bak"
352 
353  # rename the input file
354  if os.path.exists(InputFileName):
355  raise ConfigurationError(
356  "Backup file '%s' is on the way. Please remove it first."
357  % InputFileName
358  )
359  # if exists
360  logging.debug("Renaming the input file into '%s'", InputFileName)
361  os.rename(OldInputFileName, InputFileName)
362  # if not dry run
363 
364  # run the parser
365  try:
366  parser(InputFileName, OutputFileName, options=options)
367  except Exception, e:
368  if not options.Fake:
369  # if no output file was produced, rename back the input
370  if not os.path.exists(OutputFileName):
371  logging.debug("Restoring the input file name after a fatal error.")
372  os.rename(InputFileName, OldInputFileName)
373  # if
374  raise e
375  # try ... except
376 
377  if not options.Fake:
378  logging.info("File '%s' rewritten (old file in '%s')",
379  OutputFileName, InputFileName
380  )
381  # if
382 
383 # RunParserOn()
384 
385 
386 ################################################################################
388  import argparse
389 
390  LoggingSetup(logging.WARN)
391 
392  ###
393  ### argument parsing
394  ###
395  # the first parser is the default one
396  SupportedParsers = [ 'direct', 'xml', 'list' ]
397  if not hasXML:
398  logging.warn("XML parser is not supported (cam't find python XML module)")
399  SupportedParsers.remove('xml')
400  # if
401 
402  parser = argparse.ArgumentParser(description=__doc__)
403  parser.set_defaults(NoROOTformula=False, Parser=SupportedParsers[0])
404 
405  parser.add_argument('--stdin', dest="FromSTDIN", action='store_true',
406  help="read input from stdin")
407 
408  parser.add_argument("InputFiles", nargs="*", default=None,
409  help="input GDML files [default: stdin]")
410 
411  parser.add_argument("--parser", choices=SupportedParsers, dest="Parser",
412  help="choose which parser to use ('list' for a list) [%(default)s]")
413 
414  parser.add_argument("--direct", action="store_const", const="direct",
415  dest="Parser", help="use simple internal parser [%(default)s]")
416 
417  parser.add_argument("--xml", action="store_const", const="xml",
418  dest="Parser", help="use complete XML parser [%(default)s]")
419 
420  # mode disabled because it may give wrong answers;
421  # the implementation is still available; to enable it, you have to change
422  # the hard-coded value of arguments.NoROOTformula.
423 # parser.add_argument("--noroot", action="store_true",
424 # dest="NoROOTformula",
425 # help="use python instead of ROOT TFormula to evaluate expressions [%(default)s]"
426 # )
427 
428  parser.add_argument("--output", "-o", dest="OutputFile", default=None,
429  help="for a single input, use this as output file")
430 
431  parser.add_argument('--warnzero', '-z', dest="WarnZero", action='store_true',
432  help="emit a warning each time an expression evaluates to 0 [%(default)s]")
433  parser.add_argument('--dryrun', '--fake', '-n', dest="Fake", action='store_true',
434  help="do not write output [%(default)s]")
435  parser.add_argument('--verbose', '-v', dest="DoVerbose", action='store_true',
436  help="shows all the changes on screen [%(default)s]")
437  parser.add_argument('--debug', dest="DoDebug", action='store_true',
438  help="enables debug messages on screen")
439 
440  parser.add_argument('--version', action='version',
441  version='%(prog)s ' + __version__)
442 
443  arguments = parser.parse_args()
444 
445  ###
446  ### set up and parameter check
447  ###
448  # set up the logging system
449  logging.getLogger().setLevel \
450  (logging.DEBUG if arguments.DoDebug else logging.INFO)
451 
452  arguments.LogMsg = logging.info if arguments.DoVerbose else logging.debug
453 
454  if arguments.Parser == 'list':
455  SupportedParsers.remove('list')
456  logging.info("Supported parsers: '%s'.", "', '".join(SupportedParsers))
457  return 0
458  # if list parsers
459 
460  if arguments.Parser == 'direct':
461  Parser = RemoveMathFromGDMLfile
462  elif arguments.Parser == 'xml':
463  Parser = RemoveMathFromXMLfile
464  else:
465  raise ConfigurationError("Unexpected parser '%s' requested" % arguments.Parser)
466 
467  if bool(arguments.FromSTDIN) == bool(arguments.InputFiles):
468  raise ConfigurationError \
469  ("Please either specify option --stdin OR some input files.")
470  #
471 
472  ###
473  ### run
474  ###
475  if arguments.FromSTDIN:
476  Parser(None, options=arguments)
477  elif arguments.OutputFile is not None:
478  if len(arguments.InputFiles) > 1:
479  raise ConfigurationError \
480  ("Named output is supported only when a single input file is specified.")
481  # if
482  Parser(arguments.InputFiles[0], arguments.OutputFile, options=arguments)
483  else:
484  for InputFileName in arguments.InputFiles:
485  RunParserOn(Parser, InputFileName, options=arguments)
486  # if ... else
487 
488  ###
489  ### done
490  ###
491  return 0
492 # RemoveMathFromGDML()
493 
494 
495 ################################################################################
496 if __name__ == "__main__":
497  try:
498  sys.exit(RemoveMathFromGDML())
499  except ConfigurationError, e:
500  logging.error("%s" % str(e))
501  sys.exit(1)
502 # main
def ApplyToNodes(node, level, func, args, kargs)
int open(const char *, int)
Opens a file descriptor.
def apply(self, token, iLine=None)
auto enumerate(Iterables &&...iterables)
Range-for loop helper tracking the number of iteration.
Definition: enumerate.h:69
def ApplyToDocument(document, func, args, kargs)
def purifyNode(self, node, level=None)
def RemoveMathFromXMLfile(InputFileName, OutputFileName=None, options=None)
def RunParserOn(parser, InputFileName, options=None)
def LoggingSetup(LoggingLevel=logging.INFO)
def RemoveMathFromGDMLfile(InputFileName, OutputFileName=None, options=None)
def func()
Definition: docstring.py:7
static QCString str
def __init__(self, args, kargs)