1 ''' @file apg_py/lib/parser.py @brief The APG parser.'''
9 '''A convenience class for the parser's results.'''
12 '''Initialize the this result object from the parser object.'''
15 cls.
successsuccess = (parser.state != id.NOMATCH
16 and parser.phrase_index == parser.sub_end)
20 cls.
STATESTATE = id.dict.get(parser.state)
30 cls.
sub_lengthsub_length = parser.sub_end - parser.sub_begin
41 '''Generate a string representation of the parser state.
42 The string will be displayed by print() function.
43 @returns Returns the string representation to display.
45 display =
'%19s: %s\n' % (
'success', str(cls.
successsuccess))
46 display +=
'%19s: %s\n' % (
'state', str(cls.
statestate))
47 display +=
'%19s: %s\n' % (
'STATE', str(cls.
STATESTATE))
48 display +=
'%19s: %s\n' % (
'input_length', str(cls.
input_lengthinput_length))
49 display +=
'%19s: %s\n' % (
'sub_begin', str(cls.
sub_beginsub_begin))
50 display +=
'%19s: %s\n' % (
'sub_end', str(cls.
sub_endsub_end))
51 display +=
'%19s: %s\n' % (
'sub_length', str(cls.
sub_lengthsub_length))
52 display +=
'%19s: %s\n' % (
'phrase_length', str(cls.
phrase_lengthphrase_length))
53 display +=
'%19s: %s\n' % (
'max_phrase_length',
55 display +=
'%19s: %s\n' % (
'node_hits', str(cls.
node_hitsnode_hits))
56 display +=
'%19s: %s\n' % (
'max_tree_depth', str(cls.
max_tree_depthmax_tree_depth))
61 '''The Parser class for parsing an APG grammar.'''
64 '''The Parser class constructor.
65 @param grammar The grammar object generated from an SABNF grammar
66 by the API (see @ref api.py).'''
69 self.
udtsudts = grammar.udts
82 id.ALT: self.
opALTopALT,
83 id.CAT: self.
opCATopCAT,
84 id.REP: self.
opREPopREP,
85 id.RNM: self.
opRNMopRNM,
86 id.TLS: self.
opTLSopTLS,
87 id.TBS: self.
opTBSopTBS,
88 id.TRG: self.
opTRGopTRG,
89 id.UDT: self.
opUDTopUDT,
90 id.AND: self.
opANDopAND,
91 id.NOT: self.
opNOTopNOT,
92 id.BKR: self.
opBKRopBKR,
93 id.BKA: self.
opBKAopBKA,
94 id.BKN: self.
opBKNopBKN,
95 id.ABG: self.
opABGopABG,
96 id.AEN: self.
opAENopAEN,
101 id.REP: self.
opREPopREP,
102 id.RNM: self.
opRNMopRNM,
107 id.AND: self.
opANDopAND,
108 id.NOT: self.
opNOTopNOT,
110 id.BKA: self.
opBKAopBKA,
111 id.BKN: self.
opBKNopBKN,
112 id.ABG: self.
opABGopABG,
113 id.AEN: self.
opAENopAEN,
126 for rule
in self.
rulesrules:
127 self.
rule_indexesrule_indexes[rule[
'lower']] = rule[
'index']
129 bkru_names.append(rule[
'lower'])
131 bkrr_names.append(rule[
'lower'])
134 for udt
in self.
udtsudts:
135 self.
udt_indexesudt_indexes[udt[
'lower']] = udt[
'index']
137 bkru_names.append(udt[
'lower'])
139 bkrr_names.append(udt[
'lower'])
143 if(grammar.has_bkru):
145 if(grammar.has_bkrr):
149 '''Add callback functions to the rule name (RNM) nodes.
150 Multiple calls to this function can be used to add multiple callbacks.
151 @param callbacks A dictionary of named callback functions of the form
152 {'rule1': func, 'rule2': func2, 'udt1': func3}. The functions should
154 <pre>func(callback_data)</pre>
155 where callback_data is a dictionary of the form
156 - 'state': ACTIVE, MATCH, EMPTY or NOMATCH
157 (see @ref identifiers.py).
158 Note: UDT callback function must set state to MATCH, EMPTY
159 or NOMATCH on return. If UDT name begins with "u_" an EMPTY
160 return will raise an Exception.
161 - 'sub_begin': The index of the first character of the sub-string
162 of the input string that is being parsed.
163 - 'sub_end': The index of the last character of the sub-string
164 of the input string that is being parsed.
165 - 'phrase_index': The offset to the first character
166 of the matched phrase.
167 - 'phrase_length: The number of characters in the matched phrase.
168 Note: UDT callback functions must set phrase_length on return.
169 - 'max_phrase_length': The maximum number of matched characters.
170 (Used mainly in the syntax (@ref syntax_callbacks.py) phase
171 for error reporting.)
172 - 'user_data': The data object passed to the parser by the user
175 items = callbacks.items()
177 name = item[0].lower()
182 index = self.
udt_indexesudt_indexes.get(name,
None)
185 'callback name is not a rule name or UDT name',
192 'callback name is not a rule name or UDT name',
199 '''Set a maximum tree depth.
200 The parser will raise an Exception if the parse
201 tree depth exceeds the specified maximum.
202 @param maxt the maximum allowed parse tree depth'''
207 '''Set a maximum number of node hits.
208 The parser will raise an Exception if the number
209 of node hits exceeds the specified maximum.
210 @param maxt the maximum allowed number of node hits'''
220 '''Parses an input string.
221 @param input A tuple of positive integers representing
223 @param start_rule Name of the grammar's start rule
224 (defaults to first rule of the SABNF grammar.)
225 @param sub_begin The index of the first integer of the substring
226 of the input to parse.
227 @param sub_length The length of the substring to parse
228 (<=0 indicates end of input string.)
229 @param user_data Data which will be passed to the callback functions
230 strictly for user's use.
236 lower = start_rule.lower()
238 for rule
in self.
rulesrules:
239 if(rule[
'lower'] == lower):
243 raise Exception(
'start rule not a valid rule name', start_rule)
249 input_len =
len(input)
252 if(self.
sub_endsub_end > input_len):
253 self.
sub_endsub_end = input_len
255 self.
sub_endsub_end = input_len
258 for udt
in self.
udtsudts:
264 'All UDTs require a callback function. None for '
268 self.
astast.input = input
269 self.
astast.indexStack.clear()
270 self.
astast.records.clear()
282 'input': self.
inputinput,
284 'sub_end': self.
sub_endsub_end,
287 'max_phrase_length': 0,
288 'user_data': user_data}
293 '''Only called internally by the parser,
294 never called explicitly by the user.
296 op = self.
opcodesopcodes[op_index]
299 for childOp
in op[
'children']:
300 self.
statestate = id.ACTIVE
302 saveu = self.
bkru_stackbkru_stack.save_state()
304 saver = self.
bkrr_stackbkrr_stack.save_state()
306 if(self.
statestate == id.NOMATCH):
310 self.
bkru_stackbkru_stack.restore_state(saveu)
312 self.
bkrr_stackbkrr_stack.restore_state(saver)
315 state = id.MATCH if(self.
phrase_indexphrase_index > index)
else id.EMPTY
317 self.
statestate = state
320 '''Only called internally by the parser,
321 never called explicitly by the user.
323 op = self.
opcodesopcodes[op_index]
326 for childOp
in reversed(op[
'children']):
327 self.
statestate = id.ACTIVE
329 if(self.
statestate == id.NOMATCH):
336 self.
statestate = state
339 '''Only called internally by the parser,
340 never called explicitly by the user.
342 op = self.
opcodesopcodes[op_index]
344 savedAstState = self.
astast.save_state()
348 saveu = self.
bkru_stackbkru_stack.save_state()
350 saver = self.
bkrr_stackbkrr_stack.save_state()
351 for childOp
in op[
'children']:
352 self.
statestate = id.ACTIVE
354 if(self.
statestate == id.NOMATCH):
359 self.
bkru_stackbkru_stack.restore_state(saveu)
361 self.
bkrr_stackbkrr_stack.restore_state(saver)
363 self.
astast.restore_state(savedAstState)
365 self.
statestate = state
368 '''Only called internally by the parser,
369 never called explicitly by the user.
371 op = self.
opcodesopcodes[op_index]
373 savedAstState = self.
astast.save_state()
376 for childOp
in reversed(op[
'children']):
377 self.
statestate = id.ACTIVE
379 if(self.
statestate == id.NOMATCH):
384 self.
astast.restore_state(savedAstState)
386 self.
statestate = state
389 '''Only called internally by the parser,
390 never called explicitly by the user.
392 op = self.
opcodesopcodes[op_index]
400 self.
statestate = id.ACTIVE
402 saveu = self.
bkru_stackbkru_stack.save_state()
404 saver = self.
bkrr_stackbkrr_stack.save_state()
406 savedAstState = self.
astast.save_state()
408 if(self.
statestate == id.MATCH):
410 if(self.
statestate == id.EMPTY):
413 if(self.
statestate == id.NOMATCH):
416 self.
bkru_stackbkru_stack.restore_state(saveu)
418 self.
bkrr_stackbkrr_stack.restore_state(saver)
420 self.
astast.restore_state(savedAstState)
423 if(repCount == op[
'max']):
429 repPhraseLength = abs(self.
phrase_indexphrase_index - index)
430 if(self.
statestate == id.EMPTY):
433 self.
statestate = id.EMPTY if(
434 repPhraseLength == 0)
else id.MATCH
435 elif(repCount >= op[
'min']):
436 self.
statestate = id.EMPTY if(
437 repPhraseLength == 0)
else id.MATCH
439 self.
statestate = id.NOMATCH
442 '''Only called internally by the parser,
443 never called explicitly by the user.
445 parentOps = self.
opcodesopcodes
446 op = self.
opcodesopcodes[op_index]
447 rule = self.
rulesrules[op[
'index']]
448 lower = rule[
'lower']
449 self.
opcodesopcodes = rule[
'opcodes']
450 if(rule[
'has_bkrr']):
451 saver = self.
bkrr_stackbkrr_stack.save_state()
452 self.
statestate = id.ACTIVE
457 self.
cbDatacbData[
'state'] = id.ACTIVE
458 self.
cbDatacbData[
'phrase_index'] = phrase_index
459 self.
cbDatacbData[
'phrase_length'] = 0
463 savedAstState = self.
astast.save_state()
464 self.
astast.down(lower)
468 phrase_length = phrase_index - self.
phrase_indexphrase_index
471 phrase_length = self.
phrase_indexphrase_index - phrase_index
475 self.
cbDatacbData[
'phrase_length'] = phrase_length
479 self.
cbDatacbData[
'phrase_index'] = phrase_index
482 if(self.
statestate == id.NOMATCH):
484 self.
astast.restore_state(savedAstState)
487 self.
astast.up(lower, phrase_index, phrase_length)
491 lower, phrase_index, phrase_length)
494 lower, phrase_index, phrase_length)
495 if(rule[
'has_bkrr']):
501 self.
bkrr_stackbkrr_stack.restore_state(saver)
502 self.
opcodesopcodes = parentOps
505 '''Only called internally by the parser,
506 never called explicitly by the user.
508 op = self.
opcodesopcodes[op_index]
510 length =
len(op[
'string'])
513 self.
statestate = id.EMPTY
516 if(index + length <= self.
sub_endsub_end):
518 for char
in op[
'string']:
519 ichar = self.
inputinput[index]
520 if(ichar >= 65
and ichar <= 90):
526 self.
statestate = state
527 if(state == id.MATCH):
531 '''Only called internally by the parser,
532 never called explicitly by the user.
534 op = self.
opcodesopcodes[op_index]
536 length =
len(op[
'string'])
539 self.
statestate = id.EMPTY
542 if(index - length >= 0):
545 for char
in op[
'string']:
546 ichar = self.
inputinput[index]
547 if(ichar >= 65
and ichar <= 90):
553 self.
statestate = state
554 if(state == id.MATCH):
558 '''Only called internally by the parser,
559 never called explicitly by the user.
561 op = self.
opcodesopcodes[op_index]
563 length =
len(op[
'string'])
565 if(index + length <= self.
sub_endsub_end):
567 for char
in op[
'string']:
568 if(char != self.
inputinput[index]):
572 self.
statestate = state
573 if(state == id.MATCH):
577 '''Only called internally by the parser,
578 never called explicitly by the user.
580 op = self.
opcodesopcodes[op_index]
582 length =
len(op[
'string'])
584 if(index - length >= 0):
587 for char
in op[
'string']:
588 if(char != self.
inputinput[index]):
592 self.
statestate = state
593 if(state == id.MATCH):
597 '''Only called internally by the parser,
598 never called explicitly by the user.
600 op = self.
opcodesopcodes[op_index]
603 if(index < self.
sub_endsub_end):
604 char = self.
inputinput[index]
605 if(char >= op[
'min']
and char <= op[
'max']):
608 self.
statestate = state
611 '''Only called internally by the parser,
612 never called explicitly by the user.
614 op = self.
opcodesopcodes[op_index]
618 char = self.
inputinput[index]
619 if(char >= op[
'min']
and char <= op[
'max']):
622 self.
statestate = state
632 '''Only called internally by the parser,
633 never called explicitly by the user.
636 raise Exception(
'UDT ' + name +
' ' + msg)
637 if(state == id.MATCH):
638 if(phrase_length <= 0):
640 'matched phrase length must be > 0: ' +
642 if(phrase_index + phrase_length > last_index):
643 raiseEx(
'phrase_length cannot extend past end of substring')
645 if(state == id.EMPTY):
646 if(empty
is not True):
647 raiseEx(
'not allowed to return EMPTY state')
650 'EMPTY state must have 0 phrase length: ' +
653 if(state == id.NOMATCH):
655 if(state == id.ACTIVE):
656 raiseEx(
'must return EMPTY, MATCH or NOMATCH')
657 raiseEx(
'returned unrecognized state: ' + str(state))
660 '''Only called internally by the parser,
661 never called explicitly by the user.
663 op = self.
opcodesopcodes[op_index]
664 udt = self.
udtsudts[op[
'index']]
666 self.
cbDatacbData[
'state'] = id.ACTIVE
668 self.
cbDatacbData[
'phrase_length'] = 0
672 self.
cbDatacbData[
'state'],
674 self.
cbDatacbData[
'phrase_length'],
680 if(self.
statestate != id.NOMATCH):
682 self.
astast.down(lower)
685 self.
cbDatacbData[
'phrase_index'],
686 self.
cbDatacbData[
'phrase_length'])
691 self.
cbDatacbData[
'phrase_index'],
692 self.
cbDatacbData[
'phrase_length'])
696 self.
cbDatacbData[
'phrase_index'],
697 self.
cbDatacbData[
'phrase_length'])
701 '''UDT operator not allowed in look behind mode.'''
702 op = self.
opcodesopcodes[op_index]
703 udt = self.
udtsudts[op[
'index']]
707 msg +=
'UDTs not allowed in look behind mode (operators && and !!).'
711 '''Only called internally by the parser,
712 never called explicitly by the user.
715 self.
statestate = id.ACTIVE
720 saver = self.
bkrr_stackbkrr_stack.save_state()
722 saveu = self.
bkru_stackbkru_stack.save_state()
727 self.
bkrr_stackbkrr_stack.restore_state(saver)
729 self.
bkru_stackbkru_stack.restore_state(saveu)
730 if(self.
statestate == id.EMPTY
731 or self.
statestate == id.MATCH):
733 self.
statestate = id.EMPTY
736 self.
statestate = id.NOMATCH
742 '''Only called internally by the parser,
743 never called explicitly by the user.
746 self.
statestate = id.ACTIVE
751 saver = self.
bkrr_stackbkrr_stack.save_state()
753 saveu = self.
bkru_stackbkru_stack.save_state()
758 self.
bkrr_stackbkrr_stack.restore_state(saver)
760 self.
bkru_stackbkru_stack.restore_state(saveu)
761 if(self.
statestate == id.NOMATCH):
763 self.
statestate = id.EMPTY
766 self.
statestate = id.NOMATCH
772 '''Only called internally by the parser,
773 never called explicitly by the user.
776 self.
statestate = id.ACTIVE
781 saver = self.
bkrr_stackbkrr_stack.save_state()
783 saveu = self.
bkru_stackbkru_stack.save_state()
788 self.
bkrr_stackbkrr_stack.restore_state(saver)
790 self.
bkru_stackbkru_stack.restore_state(saveu)
791 if(self.
statestate == id.EMPTY
792 or self.
statestate == id.MATCH):
794 self.
statestate = id.EMPTY
797 self.
statestate = id.NOMATCH
803 '''Only called internally by the parser,
804 never called explicitly by the user.
807 self.
statestate = id.ACTIVE
812 saver = self.
bkrr_stackbkrr_stack.save_state()
814 saveu = self.
bkru_stackbkru_stack.save_state()
819 self.
bkrr_stackbkrr_stack.restore_state(saver)
821 self.
bkru_stackbkru_stack.restore_state(saveu)
822 if(self.
statestate == id.NOMATCH):
824 self.
statestate = id.EMPTY
827 self.
statestate = id.NOMATCH
833 '''Only called internally by the parser,
834 never called explicitly by the user.
836 op = self.
opcodesopcodes[op_index]
837 if(op[
'bkr_mode'] == id.BKR_MODE_UM):
838 phrase = self.
bkru_stackbkru_stack.get_phrase(op[
'lower'])
839 elif(op[
'bkr_mode'] == id.BKR_MODE_RM):
840 phrase = self.
bkrr_stackbkrr_stack.get_phrase(op[
'lower'])
842 raise Exception(
'BKR mode not recognized')
844 bkrLength = phrase[1]
846 self.
statestate = id.EMPTY
851 if(op[
'bkr_case'] == id.BKR_MODE_CS):
852 for i
in range(bkrLength):
853 bkrChar = self.
inputinput[bkrIndex + i]
855 if(bkrChar != inputChar):
858 elif(op[
'bkr_case'] == id.BKR_MODE_CI):
859 for i
in range(bkrLength):
860 bkrChar = self.
inputinput[bkrIndex + i]
862 if(bkrChar >= 65
and bkrChar <= 90):
864 if(inputChar >= 65
and inputChar <= 90):
866 if(bkrChar != inputChar):
870 raise Exception(
'BKR case not recognized')
872 self.
statestate = state
873 if(state == id.MATCH):
878 '''Back references not allowed in look behind mode.'''
879 op = self.
opcodesopcodes[op_index]
883 msg +=
'Back referencing not allowed in look behind mode '
884 msg +=
'(operators && and !!).'
888 '''Only called internally by the parser,
889 never called explicitly by the user.
895 self.
statestate = id.EMPTY
897 self.
statestate = id.NOMATCH
900 '''Only called internally by the parser,
901 never called explicitly by the user.
907 self.
statestate = id.EMPTY
909 self.
statestate = id.NOMATCH
912 '''Only called internally by the parser,
913 never called explicitly by the user.
915 op = self.
opcodesopcodes[op_index]
918 == id.LOOKAROUND_BEHIND)
else self.
opSelectopSelect
919 opFunc = opSelect.get(op[
'type'],
None)
923 self.
execUpexecUp(op, index)
926 '''Only called internally by the parser,
927 never called explicitly by the user.
930 self.
tracetrace.down(op)
935 'parse tree depth limit exceeded, limit = %d' %
939 'node hits limit exceeded, limit = %d' %
945 '''Only called internally by the parser,
946 never called explicitly by the user.
954 self.
tracetrace.up(op, begin_index)
956 self.
statsstats.collect(op)
A convenience class for the parser's results.
def __init__(cls, parser)
Initialize the this result object from the parser object.
def __str__(cls)
Generate a string representation of the parser state.
The Parser class for parsing an APG grammar.
def opTLS(self, op_index)
Only called internally by the parser, never called explicitly by the user.
def set_tree_depth_limit(self, maxt)
Set a maximum tree depth.
def UDTValidate(self, state, phrase_index, phrase_length, last_index, name, empty)
Only called internally by the parser, never called explicitly by the user.
def opBKRbehind(self, op_index)
Back references not allowed in look behind mode.
def opNOT(self, op_index)
Only called internally by the parser, never called explicitly by the user.
def parse(self, input, start_rule=None, sub_begin=0, sub_length=0, user_data=None)
Parses an input string.
def opCAT(self, op_index)
Only called internally by the parser, never called explicitly by the user.
def opTRGbehind(self, op_index)
Only called internally by the parser, never called explicitly by the user.
def execUp(self, op, begin_index)
Only called internally by the parser, never called explicitly by the user.
def opBKA(self, op_index)
Only called internally by the parser, never called explicitly by the user.
def opExecute(self, op_index)
Only called internally by the parser, never called explicitly by the user.
def opRNM(self, op_index)
Only called internally by the parser, never called explicitly by the user.
def opBKR(self, op_index)
Only called internally by the parser, never called explicitly by the user.
def __init__(self, grammar)
The Parser class constructor.
def set_node_hit_limit(self, maxt)
Set a maximum number of node hits.
def opREP(self, op_index)
Only called internally by the parser, never called explicitly by the user.
def opCATbehind(self, op_index)
Only called internally by the parser, never called explicitly by the user.
def opUDTbehind(self, op_index)
UDT operator not allowed in look behind mode.
def opTRG(self, op_index)
Only called internally by the parser, never called explicitly by the user.
def opUDT(self, op_index)
Only called internally by the parser, never called explicitly by the user.
def opALT(self, op_index)
Only called internally by the parser, never called explicitly by the user.
def opAEN(self, op_index)
Only called internally by the parser, never called explicitly by the user.
def opTBS(self, op_index)
Only called internally by the parser, never called explicitly by the user.
def add_callbacks(self, callbacks)
Add callback functions to the rule name (RNM) nodes.
def opBKN(self, op_index)
Only called internally by the parser, never called explicitly by the user.
def opAND(self, op_index)
Only called internally by the parser, never called explicitly by the user.
def opALTbehind(self, op_index)
Only called internally by the parser, never called explicitly by the user.
def opTBSbehind(self, op_index)
Only called internally by the parser, never called explicitly by the user.
def opTLSbehind(self, op_index)
Only called internally by the parser, never called explicitly by the user.
def execDown(self, op)
Only called internally by the parser, never called explicitly by the user.
def opABG(self, op_index)
Only called internally by the parser, never called explicitly by the user.