Version 1.0
Copyright © 2022 Lowell D. Thomas
Python APG
 … an ABNF Parser Generator
parser.py
Go to the documentation of this file.
1 ''' @file apg_py/lib/parser.py @brief The APG parser.'''
2 
3 # from pprint import pprint
4 from apg_py.lib import identifiers as id
5 from apg_py.lib.backreferences import BackrefenceStack
6 
7 
9  '''A convenience class for the parser's results.'''
10 
11  def __init__(cls, parser):
12  '''Initialize the this result object from the parser object.'''
13  # For the parser to be successful, the state must be
14  # MATCH or EMPTY and the parser must match the full (sub)input string.
15  cls.successsuccess = (parser.state != id.NOMATCH
16  and parser.phrase_index == parser.sub_end)
17  # the state identifier
18  cls.statestate = parser.state
19  # the state as human readable text
20  cls.STATESTATE = id.dict.get(parser.state)
21  # the number of input characters (phrase integers) matched
22  cls.phrase_lengthphrase_length = parser.phrase_index - parser.sub_begin
23  # the total number of input characters
24  cls.input_lengthinput_length = len(parser.input)
25  # the beginning character index of the substring to parse
26  cls.sub_beginsub_begin = parser.sub_begin
27  # the ending character index of the substring to parse
28  cls.sub_endsub_end = parser.sub_end
29  # the total length (number of character) in the substring to parse
30  cls.sub_lengthsub_length = parser.sub_end - parser.sub_begin
31  # the number of parse tree nodes processed
32  cls.node_hitsnode_hits = parser.node_hits
33  # the maximum parse tree depth reached
34  cls.max_tree_depthmax_tree_depth = parser.max_tree_depth
35  # the maximum phrase length reached by the parser -
36  # likely to be exactly or close to the point of failure
37  # if the state is NOMATCH
38  cls.max_phrase_lengthmax_phrase_length = parser.max_phrase_length
39 
40  def __str__(cls):
41  '''Generate a string representation of the parser state.
42  The string will be displayed by print() function.
43  @returns Returns the string representation to display.
44  '''
45  display = '%19s: %s\n' % ('success', str(cls.successsuccess))
46  display += '%19s: %s\n' % ('state', str(cls.statestate))
47  display += '%19s: %s\n' % ('STATE', str(cls.STATESTATE))
48  display += '%19s: %s\n' % ('input_length', str(cls.input_lengthinput_length))
49  display += '%19s: %s\n' % ('sub_begin', str(cls.sub_beginsub_begin))
50  display += '%19s: %s\n' % ('sub_end', str(cls.sub_endsub_end))
51  display += '%19s: %s\n' % ('sub_length', str(cls.sub_lengthsub_length))
52  display += '%19s: %s\n' % ('phrase_length', str(cls.phrase_lengthphrase_length))
53  display += '%19s: %s\n' % ('max_phrase_length',
54  str(cls.max_phrase_lengthmax_phrase_length))
55  display += '%19s: %s\n' % ('node_hits', str(cls.node_hitsnode_hits))
56  display += '%19s: %s\n' % ('max_tree_depth', str(cls.max_tree_depthmax_tree_depth))
57  return display
58 
59 
60 class Parser:
61  '''The Parser class for parsing an APG grammar.'''
62 
63  def __init__(self, grammar):
64  '''The Parser class constructor.
65  @param grammar The grammar object generated from an SABNF grammar
66  by the API (see @ref api.py).'''
67 
68  self.rulesrules = grammar.rules
69  self.udtsudts = grammar.udts
70  self.rule_countrule_count = len(self.rulesrules)
71  self.udt_countudt_count = len(self.udtsudts)
72  self.tracetrace = None
73  self.astast = None
74  self.statsstats = None
75  self.tree_depth_limittree_depth_limit = id.MAX_INT
76  self.node_hits_limitnode_hits_limit = id.MAX_INT
77  self.max_tree_depthmax_tree_depth = 0
78  self.tree_depthtree_depth = 0
79  self.node_hitsnode_hits = 0
80  self.max_phrase_lengthmax_phrase_length = 0
81  self.opSelectopSelect = {
82  id.ALT: self.opALTopALT,
83  id.CAT: self.opCATopCAT,
84  id.REP: self.opREPopREP,
85  id.RNM: self.opRNMopRNM,
86  id.TLS: self.opTLSopTLS,
87  id.TBS: self.opTBSopTBS,
88  id.TRG: self.opTRGopTRG,
89  id.UDT: self.opUDTopUDT,
90  id.AND: self.opANDopAND,
91  id.NOT: self.opNOTopNOT,
92  id.BKR: self.opBKRopBKR,
93  id.BKA: self.opBKAopBKA,
94  id.BKN: self.opBKNopBKN,
95  id.ABG: self.opABGopABG,
96  id.AEN: self.opAENopAEN,
97  }
98  self.opSelectBehindopSelectBehind = {
99  id.ALT: self.opALTbehindopALTbehind,
100  id.CAT: self.opCATbehindopCATbehind,
101  id.REP: self.opREPopREP,
102  id.RNM: self.opRNMopRNM,
103  id.TLS: self.opTLSbehindopTLSbehind,
104  id.TBS: self.opTBSbehindopTBSbehind,
105  id.TRG: self.opTRGbehindopTRGbehind,
106  id.UDT: self.opUDTbehindopUDTbehind,
107  id.AND: self.opANDopAND,
108  id.NOT: self.opNOTopNOT,
109  id.BKR: self.opBKRbehindopBKRbehind,
110  id.BKA: self.opBKAopBKA,
111  id.BKN: self.opBKNopBKN,
112  id.ABG: self.opABGopABG,
113  id.AEN: self.opAENopAEN,
114  }
115  # initialize rule callback functions
116  self.rule_callbacksrule_callbacks = [None] * self.rule_countrule_count
117  if(self.udt_countudt_count):
118  self.udt_callbacksudt_callbacks = [None] * self.udt_countudt_count
119  else:
120  self.udt_callbacksudt_callbacks = []
121  # rule indexes for quick look up
122  # list of rule + UDT names for back referencing
123  bkru_names = []
124  bkrr_names = []
125  self.rule_indexesrule_indexes = {}
126  for rule in self.rulesrules:
127  self.rule_indexesrule_indexes[rule['lower']] = rule['index']
128  if(rule['is_bkru']):
129  bkru_names.append(rule['lower'])
130  if(rule['is_bkrr']):
131  bkrr_names.append(rule['lower'])
132  self.udt_indexesudt_indexes = {}
133  if(self.udt_countudt_count):
134  for udt in self.udtsudts:
135  self.udt_indexesudt_indexes[udt['lower']] = udt['index']
136  if(udt['is_bkru']):
137  bkru_names.append(udt['lower'])
138  if(udt['is_bkrr']):
139  bkrr_names.append(udt['lower'])
140  # set up for back referencing
141  self.bkru_stackbkru_stack = None
142  self.bkrr_stackbkrr_stack = None
143  if(grammar.has_bkru):
144  self.bkru_stackbkru_stack = BackrefenceStack(bkru_names)
145  if(grammar.has_bkrr):
146  self.bkrr_stackbkrr_stack = BackrefenceStack(bkrr_names)
147 
148  def add_callbacks(self, callbacks):
149  '''Add callback functions to the rule name (RNM) nodes.
150  Multiple calls to this function can be used to add multiple callbacks.
151  @param callbacks A dictionary of named callback functions of the form
152  {'rule1': func, 'rule2': func2, 'udt1': func3}. The functions should
153  have the prototype
154  <pre>func(callback_data)</pre>
155  where callback_data is a dictionary of the form
156  - 'state': ACTIVE, MATCH, EMPTY or NOMATCH
157  (see @ref identifiers.py).
158  Note: UDT callback function must set state to MATCH, EMPTY
159  or NOMATCH on return. If UDT name begins with "u_" an EMPTY
160  return will raise an Exception.
161  - 'sub_begin': The index of the first character of the sub-string
162  of the input string that is being parsed.
163  - 'sub_end': The index of the last character of the sub-string
164  of the input string that is being parsed.
165  - 'phrase_index': The offset to the first character
166  of the matched phrase.
167  - 'phrase_length: The number of characters in the matched phrase.
168  Note: UDT callback functions must set phrase_length on return.
169  - 'max_phrase_length': The maximum number of matched characters.
170  (Used mainly in the syntax (@ref syntax_callbacks.py) phase
171  for error reporting.)
172  - 'user_data': The data object passed to the parser by the user
173  in @ref parser().
174  '''
175  items = callbacks.items()
176  for item in items:
177  name = item[0].lower()
178  index = self.rule_indexesrule_indexes.get(name, None)
179  if(index is None):
180  # not a rule name, try UDTs
181  if(self.udt_countudt_count):
182  index = self.udt_indexesudt_indexes.get(name, None)
183  if(index is None):
184  raise Exception(
185  'callback name is not a rule name or UDT name',
186  item[0])
187  else:
188  # it's a UDT name
189  self.udt_callbacksudt_callbacks[index] = item[1]
190  else:
191  raise Exception(
192  'callback name is not a rule name or UDT name',
193  item[0])
194  else:
195  # it's a rule name
196  self.rule_callbacksrule_callbacks[index] = item[1]
197 
198  def set_tree_depth_limit(self, maxt):
199  '''Set a maximum tree depth.
200  The parser will raise an Exception if the parse
201  tree depth exceeds the specified maximum.
202  @param maxt the maximum allowed parse tree depth'''
203 
204  self.tree_depth_limittree_depth_limit = max(0, maxt)
205 
206  def set_node_hit_limit(self, maxt):
207  '''Set a maximum number of node hits.
208  The parser will raise an Exception if the number
209  of node hits exceeds the specified maximum.
210  @param maxt the maximum allowed number of node hits'''
211  self.node_hits_limitnode_hits_limit = max(0, maxt)
212 
213  def parse(
214  self,
215  input,
216  start_rule=None,
217  sub_begin=0,
218  sub_length=0,
219  user_data=None):
220  '''Parses an input string.
221  @param input A tuple of positive integers representing
222  the input string.
223  @param start_rule Name of the grammar's start rule
224  (defaults to first rule of the SABNF grammar.)
225  @param sub_begin The index of the first integer of the substring
226  of the input to parse.
227  @param sub_length The length of the substring to parse
228  (<=0 indicates end of input string.)
229  @param user_data Data which will be passed to the callback functions
230  strictly for user's use.
231  '''
232 
233  # initialize
234  if(start_rule):
235  # search for rule name
236  lower = start_rule.lower()
237  self.start_rulestart_rule = None
238  for rule in self.rulesrules:
239  if(rule['lower'] == lower):
240  self.start_rulestart_rule = rule['index']
241  break
242  if(self.start_rulestart_rule is None):
243  raise Exception('start rule not a valid rule name', start_rule)
244  else:
245  # use the first rule
246  self.start_rulestart_rule = 0
247  self.inputinput = input
248  self.sub_beginsub_begin = sub_begin
249  input_len = len(input)
250  if(sub_length > 0):
251  self.sub_endsub_end = self.sub_beginsub_begin + sub_length
252  if(self.sub_endsub_end > input_len):
253  self.sub_endsub_end = input_len
254  else:
255  self.sub_endsub_end = input_len
256  # verify that all UDT callbacks are set, if any
257  if(self.udt_countudt_count):
258  for udt in self.udtsudts:
259  index = udt['index']
260  name = udt['name']
261  cb = self.udt_callbacksudt_callbacks[index]
262  if(cb is None):
263  raise Exception(
264  'All UDTs require a callback function. None for '
265  + name)
266  if(self.astast):
267  # initialize the AST
268  self.astast.input = input
269  self.astast.indexStack.clear()
270  self.astast.records.clear()
271  self.max_phrase_lengthmax_phrase_length = 0
272  self.node_hitsnode_hits = 0
273  self.tree_depthtree_depth = 0
274  self.max_tree_depthmax_tree_depth = 0
275  self.statestate = id.ACTIVE
276  self.phrase_indexphrase_index = self.sub_beginsub_begin
277  self.lookaroundlookaround = 0
278  self.current_look_directioncurrent_look_direction = id.LOOKAROUND_NONE
279  # dummy opcode for start rule
280  self.opcodesopcodes = ({'type': id.RNM, 'index': self.start_rulestart_rule},)
281  self.cbDatacbData = {'state': id.ACTIVE,
282  'input': self.inputinput,
283  'sub_begin': self.sub_beginsub_begin,
284  'sub_end': self.sub_endsub_end,
285  'phrase_index': self.sub_beginsub_begin,
286  'phrase_length': 0,
287  'max_phrase_length': 0,
288  'user_data': user_data}
289  self.opExecuteopExecute(0)
290  return ParserResult(self)
291 
292  def opALT(self, op_index):
293  '''Only called internally by the parser,
294  never called explicitly by the user.
295  '''
296  op = self.opcodesopcodes[op_index]
297  index = self.phrase_indexphrase_index
298  state = id.NOMATCH
299  for childOp in op['children']:
300  self.statestate = id.ACTIVE
301  if(self.bkru_stackbkru_stack):
302  saveu = self.bkru_stackbkru_stack.save_state()
303  if(self.bkrr_stackbkrr_stack):
304  saver = self.bkrr_stackbkrr_stack.save_state()
305  self.opExecuteopExecute(childOp)
306  if(self.statestate == id.NOMATCH):
307  # reset phrase index on failure
308  self.phrase_indexphrase_index = index
309  if(self.bkru_stackbkru_stack):
310  self.bkru_stackbkru_stack.restore_state(saveu)
311  if(self.bkrr_stackbkrr_stack):
312  self.bkrr_stackbkrr_stack.restore_state(saver)
313  else:
314  # ALT succeeds when first child succeeds
315  state = id.MATCH if(self.phrase_indexphrase_index > index) else id.EMPTY
316  break
317  self.statestate = state
318 
319  def opALTbehind(self, op_index):
320  '''Only called internally by the parser,
321  never called explicitly by the user.
322  '''
323  op = self.opcodesopcodes[op_index]
324  index = self.phrase_indexphrase_index
325  state = id.NOMATCH
326  for childOp in reversed(op['children']):
327  self.statestate = id.ACTIVE
328  self.opExecuteopExecute(childOp)
329  if(self.statestate == id.NOMATCH):
330  # reset phrase index on failure
331  self.phrase_indexphrase_index = index
332  else:
333  # ALT succeeds when first child succeeds
334  state = id.MATCH
335  break
336  self.statestate = state
337 
338  def opCAT(self, op_index):
339  '''Only called internally by the parser,
340  never called explicitly by the user.
341  '''
342  op = self.opcodesopcodes[op_index]
343  if(self.astast and self.lookaroundlookaround == 0):
344  savedAstState = self.astast.save_state()
345  index = self.phrase_indexphrase_index
346  state = id.MATCH
347  if(self.bkru_stackbkru_stack):
348  saveu = self.bkru_stackbkru_stack.save_state()
349  if(self.bkrr_stackbkrr_stack):
350  saver = self.bkrr_stackbkrr_stack.save_state()
351  for childOp in op['children']:
352  self.statestate = id.ACTIVE
353  self.opExecuteopExecute(childOp)
354  if(self.statestate == id.NOMATCH):
355  # CAT fails if any child fails
356  self.phrase_indexphrase_index = index
357  state = id.NOMATCH
358  if(self.bkru_stackbkru_stack):
359  self.bkru_stackbkru_stack.restore_state(saveu)
360  if(self.bkrr_stackbkrr_stack):
361  self.bkrr_stackbkrr_stack.restore_state(saver)
362  if(self.astast and self.lookaroundlookaround == 0):
363  self.astast.restore_state(savedAstState)
364  break
365  self.statestate = state
366 
367  def opCATbehind(self, op_index):
368  '''Only called internally by the parser,
369  never called explicitly by the user.
370  '''
371  op = self.opcodesopcodes[op_index]
372  if(self.astast and self.lookaroundlookaround == 0):
373  savedAstState = self.astast.save_state()
374  index = self.phrase_indexphrase_index
375  state = id.MATCH
376  for childOp in reversed(op['children']):
377  self.statestate = id.ACTIVE
378  self.opExecuteopExecute(childOp)
379  if(self.statestate == id.NOMATCH):
380  # CAT fails if any child fails
381  self.phrase_indexphrase_index = index
382  state = id.NOMATCH
383  if(self.astast and self.lookaroundlookaround == 0):
384  self.astast.restore_state(savedAstState)
385  break
386  self.statestate = state
387 
388  def opREP(self, op_index):
389  '''Only called internally by the parser,
390  never called explicitly by the user.
391  '''
392  op = self.opcodesopcodes[op_index]
393  repCount = 0
394  index = self.phrase_indexphrase_index
395  while(True):
396  if(self.phrase_indexphrase_index >= self.sub_endsub_end):
397  # exit on end of string
398  break
399  # execute the child node
400  self.statestate = id.ACTIVE
401  if(self.bkru_stackbkru_stack):
402  saveu = self.bkru_stackbkru_stack.save_state()
403  if(self.bkrr_stackbkrr_stack):
404  saver = self.bkrr_stackbkrr_stack.save_state()
405  if(self.astast and self.lookaroundlookaround == 0):
406  savedAstState = self.astast.save_state()
407  self.opExecuteopExecute(op_index + 1)
408  if(self.statestate == id.MATCH):
409  i = 0
410  if(self.statestate == id.EMPTY):
411  # end if child node return EMPTY (prevents infinite loop)
412  break
413  if(self.statestate == id.NOMATCH):
414  # end if the child node fails
415  if(self.bkru_stackbkru_stack):
416  self.bkru_stackbkru_stack.restore_state(saveu)
417  if(self.bkrr_stackbkrr_stack):
418  self.bkrr_stackbkrr_stack.restore_state(saver)
419  if(self.astast and self.lookaroundlookaround == 0):
420  self.astast.restore_state(savedAstState)
421  break
422  repCount += 1
423  if(repCount == op['max']):
424  # end when the repetition count has maxed out
425  break
426  # done with repetitions, evaluate the match count
427  # abs() keeps the phrase length positive in look behind mode
428  # - in this case the phrase index is moving backwards
429  repPhraseLength = abs(self.phrase_indexphrase_index - index)
430  if(self.statestate == id.EMPTY):
431  # REP always succeeds when child node returns EMPTY
432  # this may not seem obvious, but that's the way it works out
433  self.statestate = id.EMPTY if(
434  repPhraseLength == 0) else id.MATCH
435  elif(repCount >= op['min']):
436  self.statestate = id.EMPTY if(
437  repPhraseLength == 0) else id.MATCH
438  else:
439  self.statestate = id.NOMATCH
440 
441  def opRNM(self, op_index):
442  '''Only called internally by the parser,
443  never called explicitly by the user.
444  '''
445  parentOps = self.opcodesopcodes
446  op = self.opcodesopcodes[op_index]
447  rule = self.rulesrules[op['index']]
448  lower = rule['lower']
449  self.opcodesopcodes = rule['opcodes']
450  if(rule['has_bkrr']):
451  saver = self.bkrr_stackbkrr_stack.save_state()
452  self.statestate = id.ACTIVE
453  phrase_index = self.phrase_indexphrase_index
454  phrase_length = 0
455  if(self.rule_callbacksrule_callbacks[op['index']]):
456  # handle rule callback function (down)
457  self.cbDatacbData['state'] = id.ACTIVE
458  self.cbDatacbData['phrase_index'] = phrase_index
459  self.cbDatacbData['phrase_length'] = 0
460  self.cbDatacbData['max_phrase_length'] = self.max_phrase_lengthmax_phrase_length
461  self.rule_callbacksrule_callbacks[op['index']](self.cbDatacbData)
462  if(self.astast and self.lookaroundlookaround == 0):
463  savedAstState = self.astast.save_state()
464  self.astast.down(lower)
465  self.opExecuteopExecute(0)
466  if(self.current_look_directioncurrent_look_direction == id.LOOKAROUND_BEHIND):
467  # phrase index is moving backwards here
468  phrase_length = phrase_index - self.phrase_indexphrase_index
469  phrase_index = self.phrase_indexphrase_index
470  else:
471  phrase_length = self.phrase_indexphrase_index - phrase_index
472  if(self.rule_callbacksrule_callbacks[op['index']]):
473  # handle rule callback function (up)
474  self.cbDatacbData['state'] = self.statestate
475  self.cbDatacbData['phrase_length'] = phrase_length
476  if(self.current_look_directioncurrent_look_direction == id.LOOKAROUND_BEHIND):
477  self.cbDatacbData['phrase_index'] = self.phrase_indexphrase_index
478  else:
479  self.cbDatacbData['phrase_index'] = phrase_index
480  self.rule_callbacksrule_callbacks[op['index']](self.cbDatacbData)
481  # handle back referencing, if any
482  if(self.statestate == id.NOMATCH):
483  if(self.astast and self.lookaroundlookaround == 0):
484  self.astast.restore_state(savedAstState)
485  else:
486  if(self.astast and self.lookaroundlookaround == 0):
487  self.astast.up(lower, phrase_index, phrase_length)
488  # save the phrase for later back referencing
489  if(rule['is_bkru']):
490  self.bkru_stackbkru_stack.save_phrase(
491  lower, phrase_index, phrase_length)
492  if(rule['is_bkrr']):
493  self.bkrr_stackbkrr_stack.save_phrase(
494  lower, phrase_index, phrase_length)
495  if(rule['has_bkrr']):
496  # pop the recursive back referencing stack
497  # Note: there is a conflict here if a rule is both
498  # recursive and recursively back referenced.
499  # The recursive back reference will always fail because
500  # it is both saved and removed on restore.
501  self.bkrr_stackbkrr_stack.restore_state(saver)
502  self.opcodesopcodes = parentOps
503 
504  def opTLS(self, op_index):
505  '''Only called internally by the parser,
506  never called explicitly by the user.
507  '''
508  op = self.opcodesopcodes[op_index]
509  index = self.phrase_indexphrase_index
510  length = len(op['string'])
511  if(length == 0):
512  # EMPTY match allowed, only in TLS
513  self.statestate = id.EMPTY
514  return
515  state = id.NOMATCH
516  if(index + length <= self.sub_endsub_end):
517  state = id.MATCH
518  for char in op['string']:
519  ichar = self.inputinput[index]
520  if(ichar >= 65 and ichar <= 90):
521  ichar += 32
522  if(char != ichar):
523  state = id.NOMATCH
524  break
525  index += 1
526  self.statestate = state
527  if(state == id.MATCH):
528  self.phrase_indexphrase_index += length
529 
530  def opTLSbehind(self, op_index):
531  '''Only called internally by the parser,
532  never called explicitly by the user.
533  '''
534  op = self.opcodesopcodes[op_index]
535  index = self.phrase_indexphrase_index
536  length = len(op['string'])
537  if(length == 0):
538  # EMPTY match allowed, only in TLS
539  self.statestate = id.EMPTY
540  return
541  state = id.NOMATCH
542  if(index - length >= 0):
543  state = id.MATCH
544  index -= length
545  for char in op['string']:
546  ichar = self.inputinput[index]
547  if(ichar >= 65 and ichar <= 90):
548  ichar += 32
549  if(char != ichar):
550  state = id.NOMATCH
551  break
552  index += 1
553  self.statestate = state
554  if(state == id.MATCH):
555  self.phrase_indexphrase_index -= length
556 
557  def opTBS(self, op_index):
558  '''Only called internally by the parser,
559  never called explicitly by the user.
560  '''
561  op = self.opcodesopcodes[op_index]
562  index = self.phrase_indexphrase_index
563  length = len(op['string'])
564  state = id.NOMATCH
565  if(index + length <= self.sub_endsub_end):
566  state = id.MATCH
567  for char in op['string']:
568  if(char != self.inputinput[index]):
569  state = id.NOMATCH
570  break
571  index += 1
572  self.statestate = state
573  if(state == id.MATCH):
574  self.phrase_indexphrase_index += length
575 
576  def opTBSbehind(self, op_index):
577  '''Only called internally by the parser,
578  never called explicitly by the user.
579  '''
580  op = self.opcodesopcodes[op_index]
581  index = self.phrase_indexphrase_index
582  length = len(op['string'])
583  state = id.NOMATCH
584  if(index - length >= 0):
585  state = id.MATCH
586  index -= length
587  for char in op['string']:
588  if(char != self.inputinput[index]):
589  state = id.NOMATCH
590  break
591  index += 1
592  self.statestate = state
593  if(state == id.MATCH):
594  self.phrase_indexphrase_index -= length
595 
596  def opTRG(self, op_index):
597  '''Only called internally by the parser,
598  never called explicitly by the user.
599  '''
600  op = self.opcodesopcodes[op_index]
601  index = self.phrase_indexphrase_index
602  state = id.NOMATCH
603  if(index < self.sub_endsub_end):
604  char = self.inputinput[index]
605  if(char >= op['min'] and char <= op['max']):
606  state = id.MATCH
607  self.phrase_indexphrase_index += 1
608  self.statestate = state
609 
610  def opTRGbehind(self, op_index):
611  '''Only called internally by the parser,
612  never called explicitly by the user.
613  '''
614  op = self.opcodesopcodes[op_index]
615  index = self.phrase_indexphrase_index - 1 if(self.phrase_indexphrase_index > 0) else 0
616  state = id.NOMATCH
617  if(index > 0):
618  char = self.inputinput[index]
619  if(char >= op['min'] and char <= op['max']):
620  state = id.MATCH
621  self.phrase_indexphrase_index -= 1
622  self.statestate = state
623 
625  self,
626  state,
627  phrase_index,
628  phrase_length,
629  last_index,
630  name,
631  empty):
632  '''Only called internally by the parser,
633  never called explicitly by the user.
634  '''
635  def raiseEx(msg):
636  raise Exception('UDT ' + name + ' ' + msg)
637  if(state == id.MATCH):
638  if(phrase_length <= 0):
639  raiseEx(
640  'matched phrase length must be > 0: ' +
641  str(phrase_length))
642  if(phrase_index + phrase_length > last_index):
643  raiseEx('phrase_length cannot extend past end of substring')
644  return
645  if(state == id.EMPTY):
646  if(empty is not True):
647  raiseEx('not allowed to return EMPTY state')
648  if(phrase_length):
649  raiseEx(
650  'EMPTY state must have 0 phrase length: ' +
651  str(phrase_length))
652  return
653  if(state == id.NOMATCH):
654  return
655  if(state == id.ACTIVE):
656  raiseEx('must return EMPTY, MATCH or NOMATCH')
657  raiseEx('returned unrecognized state: ' + str(state))
658 
659  def opUDT(self, op_index):
660  '''Only called internally by the parser,
661  never called explicitly by the user.
662  '''
663  op = self.opcodesopcodes[op_index]
664  udt = self.udtsudts[op['index']]
665  lower = udt['lower']
666  self.cbDatacbData['state'] = id.ACTIVE
667  self.cbDatacbData['phrase_index'] = self.phrase_indexphrase_index
668  self.cbDatacbData['phrase_length'] = 0
669  self.cbDatacbData['max_phrase_length'] = self.max_phrase_lengthmax_phrase_length
670  self.udt_callbacksudt_callbacks[op['index']](self.cbDatacbData)
671  self.UDTValidateUDTValidate(
672  self.cbDatacbData['state'],
673  self.phrase_indexphrase_index,
674  self.cbDatacbData['phrase_length'],
675  self.sub_endsub_end,
676  udt['name'],
677  udt['empty'])
678  self.statestate = self.cbDatacbData['state']
679  # handle back referencing, if any
680  if(self.statestate != id.NOMATCH):
681  if(self.astast and self.lookaroundlookaround == 0):
682  self.astast.down(lower)
683  self.astast.up(
684  lower,
685  self.cbDatacbData['phrase_index'],
686  self.cbDatacbData['phrase_length'])
687  # save the phrase for later back referencing
688  if(udt['is_bkru']):
689  self.bkru_stackbkru_stack.save_phrase(
690  udt['lower'],
691  self.cbDatacbData['phrase_index'],
692  self.cbDatacbData['phrase_length'])
693  if(udt['is_bkrr']):
694  self.bkrr_stackbkrr_stack.save_phrase(
695  udt['lower'],
696  self.cbDatacbData['phrase_index'],
697  self.cbDatacbData['phrase_length'])
698  self.phrase_indexphrase_index += self.cbDatacbData['phrase_length']
699 
700  def opUDTbehind(self, op_index):
701  '''UDT operator not allowed in look behind mode.'''
702  op = self.opcodesopcodes[op_index]
703  udt = self.udtsudts[op['index']]
704  msg = 'UDT('
705  msg += udt['name']
706  msg += ') called. '
707  msg += 'UDTs not allowed in look behind mode (operators && and !!).'
708  raise Exception(msg)
709 
710  def opAND(self, op_index):
711  '''Only called internally by the parser,
712  never called explicitly by the user.
713  '''
714  index = self.phrase_indexphrase_index
715  self.statestate = id.ACTIVE
716  self.lookaroundlookaround += 1
717  saveDir = self.current_look_directioncurrent_look_direction
718  self.current_look_directioncurrent_look_direction = id.LOOKAROUND_AHEAD
719  if(self.bkrr_stackbkrr_stack):
720  saver = self.bkrr_stackbkrr_stack.save_state()
721  if(self.bkru_stackbkru_stack):
722  saveu = self.bkru_stackbkru_stack.save_state()
723  #
724  self.opExecuteopExecute(op_index + 1)
725  #
726  if(self.bkrr_stackbkrr_stack):
727  self.bkrr_stackbkrr_stack.restore_state(saver)
728  if(self.bkru_stackbkru_stack):
729  self.bkru_stackbkru_stack.restore_state(saveu)
730  if(self.statestate == id.EMPTY
731  or self.statestate == id.MATCH):
732  # AND succeeds if child succeeds
733  self.statestate = id.EMPTY
734  else:
735  # AND fails if child fails
736  self.statestate = id.NOMATCH
737  self.phrase_indexphrase_index = index
738  self.lookaroundlookaround -= 1
739  self.current_look_directioncurrent_look_direction = saveDir
740 
741  def opNOT(self, op_index):
742  '''Only called internally by the parser,
743  never called explicitly by the user.
744  '''
745  index = self.phrase_indexphrase_index
746  self.statestate = id.ACTIVE
747  self.lookaroundlookaround += 1
748  saveDir = self.current_look_directioncurrent_look_direction
749  self.current_look_directioncurrent_look_direction = id.LOOKAROUND_AHEAD
750  if(self.bkrr_stackbkrr_stack):
751  saver = self.bkrr_stackbkrr_stack.save_state()
752  if(self.bkru_stackbkru_stack):
753  saveu = self.bkru_stackbkru_stack.save_state()
754  #
755  self.opExecuteopExecute(op_index + 1)
756  #
757  if(self.bkrr_stackbkrr_stack):
758  self.bkrr_stackbkrr_stack.restore_state(saver)
759  if(self.bkru_stackbkru_stack):
760  self.bkru_stackbkru_stack.restore_state(saveu)
761  if(self.statestate == id.NOMATCH):
762  # NOT succeeds if child fails
763  self.statestate = id.EMPTY
764  else:
765  # NOT fails if child succeeds
766  self.statestate = id.NOMATCH
767  self.phrase_indexphrase_index = index
768  self.lookaroundlookaround -= 1
769  self.current_look_directioncurrent_look_direction = saveDir
770 
771  def opBKA(self, op_index):
772  '''Only called internally by the parser,
773  never called explicitly by the user.
774  '''
775  index = self.phrase_indexphrase_index
776  self.statestate = id.ACTIVE
777  self.lookaroundlookaround += 1
778  saveDir = self.current_look_directioncurrent_look_direction
779  self.current_look_directioncurrent_look_direction = id.LOOKAROUND_BEHIND
780  if(self.bkrr_stackbkrr_stack):
781  saver = self.bkrr_stackbkrr_stack.save_state()
782  if(self.bkru_stackbkru_stack):
783  saveu = self.bkru_stackbkru_stack.save_state()
784  #
785  self.opExecuteopExecute(op_index + 1)
786  #
787  if(self.bkrr_stackbkrr_stack):
788  self.bkrr_stackbkrr_stack.restore_state(saver)
789  if(self.bkru_stackbkru_stack):
790  self.bkru_stackbkru_stack.restore_state(saveu)
791  if(self.statestate == id.EMPTY
792  or self.statestate == id.MATCH):
793  # BKA succeeds if child succeeds
794  self.statestate = id.EMPTY
795  else:
796  # BKA fails if child fails
797  self.statestate = id.NOMATCH
798  self.phrase_indexphrase_index = index
799  self.lookaroundlookaround -= 1
800  self.current_look_directioncurrent_look_direction = saveDir
801 
802  def opBKN(self, op_index):
803  '''Only called internally by the parser,
804  never called explicitly by the user.
805  '''
806  index = self.phrase_indexphrase_index
807  self.statestate = id.ACTIVE
808  self.lookaroundlookaround += 1
809  saveDir = self.current_look_directioncurrent_look_direction
810  self.current_look_directioncurrent_look_direction = id.LOOKAROUND_BEHIND
811  if(self.bkrr_stackbkrr_stack):
812  saver = self.bkrr_stackbkrr_stack.save_state()
813  if(self.bkru_stackbkru_stack):
814  saveu = self.bkru_stackbkru_stack.save_state()
815  #
816  self.opExecuteopExecute(op_index + 1)
817  #
818  if(self.bkrr_stackbkrr_stack):
819  self.bkrr_stackbkrr_stack.restore_state(saver)
820  if(self.bkru_stackbkru_stack):
821  self.bkru_stackbkru_stack.restore_state(saveu)
822  if(self.statestate == id.NOMATCH):
823  # NOT succeeds if child fails
824  self.statestate = id.EMPTY
825  else:
826  # NOT fails if child succeeds
827  self.statestate = id.NOMATCH
828  self.phrase_indexphrase_index = index
829  self.lookaroundlookaround -= 1
830  self.current_look_directioncurrent_look_direction = saveDir
831 
832  def opBKR(self, op_index):
833  '''Only called internally by the parser,
834  never called explicitly by the user.
835  '''
836  op = self.opcodesopcodes[op_index]
837  if(op['bkr_mode'] == id.BKR_MODE_UM):
838  phrase = self.bkru_stackbkru_stack.get_phrase(op['lower'])
839  elif(op['bkr_mode'] == id.BKR_MODE_RM):
840  phrase = self.bkrr_stackbkrr_stack.get_phrase(op['lower'])
841  else:
842  raise Exception('BKR mode not recognized')
843  bkrIndex = phrase[0]
844  bkrLength = phrase[1]
845  if(bkrLength == 0):
846  self.statestate = id.EMPTY
847  return
848  state = id.NOMATCH
849  if(self.phrase_indexphrase_index + bkrLength <= self.sub_endsub_end):
850  state = id.MATCH
851  if(op['bkr_case'] == id.BKR_MODE_CS):
852  for i in range(bkrLength):
853  bkrChar = self.inputinput[bkrIndex + i]
854  inputChar = self.inputinput[self.phrase_indexphrase_index + i]
855  if(bkrChar != inputChar):
856  state = id.NOMATCH
857  break
858  elif(op['bkr_case'] == id.BKR_MODE_CI):
859  for i in range(bkrLength):
860  bkrChar = self.inputinput[bkrIndex + i]
861  inputChar = self.inputinput[self.phrase_indexphrase_index + i]
862  if(bkrChar >= 65 and bkrChar <= 90):
863  bkrChar += 32
864  if(inputChar >= 65 and inputChar <= 90):
865  inputChar += 32
866  if(bkrChar != inputChar):
867  state = id.NOMATCH
868  break
869  else:
870  raise Exception('BKR case not recognized')
871 
872  self.statestate = state
873  if(state == id.MATCH):
874  self.phrase_indexphrase_index += bkrLength
875  return
876 
877  def opBKRbehind(self, op_index):
878  '''Back references not allowed in look behind mode.'''
879  op = self.opcodesopcodes[op_index]
880  msg = 'BKR('
881  msg += op['name']
882  msg += ') called. '
883  msg += 'Back referencing not allowed in look behind mode '
884  msg += '(operators && and !!).'
885  raise Exception(msg)
886 
887  def opABG(self, op_index):
888  '''Only called internally by the parser,
889  never called explicitly by the user.
890  '''
891  if(self.phrase_indexphrase_index == 0):
892  # ABG, beginning of string anchor succeeds
893  # if index is first character of the input string
894  # (not first character of substring, if any)
895  self.statestate = id.EMPTY
896  else:
897  self.statestate = id.NOMATCH
898 
899  def opAEN(self, op_index):
900  '''Only called internally by the parser,
901  never called explicitly by the user.
902  '''
903  if(self.phrase_indexphrase_index == len(self.inputinput)):
904  # AEN, end of string anchor succeeds
905  # if index is the last character
906  # (not last character of substring, if any)
907  self.statestate = id.EMPTY
908  else:
909  self.statestate = id.NOMATCH
910 
911  def opExecute(self, op_index):
912  '''Only called internally by the parser,
913  never called explicitly by the user.
914  '''
915  op = self.opcodesopcodes[op_index]
916  opSelect = self.opSelectBehindopSelectBehind if(
917  self.current_look_directioncurrent_look_direction
918  == id.LOOKAROUND_BEHIND) else self.opSelectopSelect
919  opFunc = opSelect.get(op['type'], None)
920  index = self.phrase_indexphrase_index
921  self.execDownexecDown(op)
922  opFunc(op_index)
923  self.execUpexecUp(op, index)
924 
925  def execDown(self, op):
926  '''Only called internally by the parser,
927  never called explicitly by the user.
928  '''
929  if(self.tracetrace):
930  self.tracetrace.down(op)
931  self.tree_depthtree_depth += 1
932  self.node_hitsnode_hits += 1
933  if(self.tree_depthtree_depth >= self.tree_depth_limittree_depth_limit):
934  raise Exception(
935  'parse tree depth limit exceeded, limit = %d' %
936  self.tree_depth_limittree_depth_limit)
937  if(self.node_hitsnode_hits >= self.node_hits_limitnode_hits_limit):
938  raise Exception(
939  'node hits limit exceeded, limit = %d' %
940  self.node_hits_limitnode_hits_limit)
941  if(self.tree_depthtree_depth > self.max_tree_depthmax_tree_depth):
942  self.max_tree_depthmax_tree_depth = self.tree_depthtree_depth
943 
944  def execUp(self, op, begin_index):
945  '''Only called internally by the parser,
946  never called explicitly by the user.
947  '''
948  self.tree_depthtree_depth -= 1
949  if(self.lookaroundlookaround == 0):
950  totalLength = self.phrase_indexphrase_index - self.sub_beginsub_begin
951  if(totalLength > self.max_phrase_lengthmax_phrase_length):
952  self.max_phrase_lengthmax_phrase_length = totalLength
953  if(self.tracetrace):
954  self.tracetrace.up(op, begin_index)
955  if(self.statsstats):
956  self.statsstats.collect(op)
A convenience class for the parser's results.
Definition: parser.py:8
def __init__(cls, parser)
Initialize the this result object from the parser object.
Definition: parser.py:11
def __str__(cls)
Generate a string representation of the parser state.
Definition: parser.py:40
The Parser class for parsing an APG grammar.
Definition: parser.py:60
def opTLS(self, op_index)
Only called internally by the parser, never called explicitly by the user.
Definition: parser.py:504
def set_tree_depth_limit(self, maxt)
Set a maximum tree depth.
Definition: parser.py:198
def UDTValidate(self, state, phrase_index, phrase_length, last_index, name, empty)
Only called internally by the parser, never called explicitly by the user.
Definition: parser.py:631
def opBKRbehind(self, op_index)
Back references not allowed in look behind mode.
Definition: parser.py:877
def opNOT(self, op_index)
Only called internally by the parser, never called explicitly by the user.
Definition: parser.py:741
def parse(self, input, start_rule=None, sub_begin=0, sub_length=0, user_data=None)
Parses an input string.
Definition: parser.py:219
def opCAT(self, op_index)
Only called internally by the parser, never called explicitly by the user.
Definition: parser.py:338
def opTRGbehind(self, op_index)
Only called internally by the parser, never called explicitly by the user.
Definition: parser.py:610
def execUp(self, op, begin_index)
Only called internally by the parser, never called explicitly by the user.
Definition: parser.py:944
def opBKA(self, op_index)
Only called internally by the parser, never called explicitly by the user.
Definition: parser.py:771
def opExecute(self, op_index)
Only called internally by the parser, never called explicitly by the user.
Definition: parser.py:911
def opRNM(self, op_index)
Only called internally by the parser, never called explicitly by the user.
Definition: parser.py:441
def opBKR(self, op_index)
Only called internally by the parser, never called explicitly by the user.
Definition: parser.py:832
def __init__(self, grammar)
The Parser class constructor.
Definition: parser.py:63
def set_node_hit_limit(self, maxt)
Set a maximum number of node hits.
Definition: parser.py:206
def opREP(self, op_index)
Only called internally by the parser, never called explicitly by the user.
Definition: parser.py:388
def opCATbehind(self, op_index)
Only called internally by the parser, never called explicitly by the user.
Definition: parser.py:367
def opUDTbehind(self, op_index)
UDT operator not allowed in look behind mode.
Definition: parser.py:700
def opTRG(self, op_index)
Only called internally by the parser, never called explicitly by the user.
Definition: parser.py:596
def opUDT(self, op_index)
Only called internally by the parser, never called explicitly by the user.
Definition: parser.py:659
def opALT(self, op_index)
Only called internally by the parser, never called explicitly by the user.
Definition: parser.py:292
def opAEN(self, op_index)
Only called internally by the parser, never called explicitly by the user.
Definition: parser.py:899
def opTBS(self, op_index)
Only called internally by the parser, never called explicitly by the user.
Definition: parser.py:557
def add_callbacks(self, callbacks)
Add callback functions to the rule name (RNM) nodes.
Definition: parser.py:148
def opBKN(self, op_index)
Only called internally by the parser, never called explicitly by the user.
Definition: parser.py:802
def opAND(self, op_index)
Only called internally by the parser, never called explicitly by the user.
Definition: parser.py:710
def opALTbehind(self, op_index)
Only called internally by the parser, never called explicitly by the user.
Definition: parser.py:319
def opTBSbehind(self, op_index)
Only called internally by the parser, never called explicitly by the user.
Definition: parser.py:576
def opTLSbehind(self, op_index)
Only called internally by the parser, never called explicitly by the user.
Definition: parser.py:530
def execDown(self, op)
Only called internally by the parser, never called explicitly by the user.
Definition: parser.py:925
def opABG(self, op_index)
Only called internally by the parser, never called explicitly by the user.
Definition: parser.py:887
Python APG, Version 1.0, is licensed under the 2-Clause BSD License,
an Open Source Initiative Approved License.