1 ''' @file apg_py/exp/exp.py
2 @brief ApgExp - a RegExp-like pattern matching engine.
16 '''A class for returning the results of a pattern match.
30 '''The Result class constructor. Only called internally by ApgExp'''
31 ending = begin + length
35 self.
matchmatch = source[begin:ending]
41 self.
astast = ast.copy() if(ast)
else None
46 '''print(Result) will call this function for a display of the object.
47 @returns Returns the display string.'''
48 string =
' match: ' + str(self.
matchmatch)
49 string +=
'\n index: ' + str(self.
indexindex)
50 string +=
'\n indices: ' + str(self.
indicesindices)
51 string +=
'\n left_context: ' + str(self.
left_contextleft_context)
52 string +=
'\n right_context: ' + str(self.
right_contextright_context)
53 string +=
'\n node hits: ' + str(self.
node_hitsnode_hits)
54 string +=
'\nmax tree depth: ' + str(self.
max_tree_depthmax_tree_depth)
56 string +=
'\n\nrules: ' + str(
len(self.
rulesrules))
57 for key, value
in self.
rulesrules.items():
59 string +=
'\n' + self.
namesnames[key]
60 string +=
'[0]: <undefined>'
61 for i
in range(
len(value)):
65 string +=
'\n' + self.
namesnames[key]
66 string +=
'[' + str(i) +
']: '
70 string += str(val) +
','
74 string += utils.tuple_to_string(value[i])
80 '''The ApgExp class provides a pattern-matching engine similar
81 to JavaScript's [RegExp](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp)'''
84 '''The ApgExp constructor.
85 @param pattern The SABNF pattern to match.
86 @param flags A string of characters specifying
87 the operation characteristics. May be any of the following:
88 - c display results as lists of integer "character" codes
89 - g global matching mode - last_index follows
90 the previous matched phrase
91 - t trace a trace of each match attempt is displayed
92 - y sticky similar to global except that the next match
93 must be at exactly last_index
96 - letters may be in any order
97 - multiple occurrances of letters allowed, the last occurrance wins
98 - the global, g, and sticky, y, flags are mutually exclusive
99 - any letter not in this list will raise and exception
117 raise Exception(
'flag ' + ll +
' not recognized')
120 self.
flagsflags +=
'c'
122 self.
flagsflags +=
'g'
124 self.
flagsflags +=
't'
126 self.
flagsflags +=
'y'
129 api.generate(pattern)
131 raise Exception(
'Pattern syntax error: \n' + api.display_errors())
135 self.
__ast__ast =
None
145 def __callback_factory(self, name):
146 def fn(state, source, index, length, data):
147 if(state == id.SEM_POST):
148 self.
rulesrules[name.lower()].append(source[index:index + length])
151 def __get_input(self, input):
153 if(isinstance(input, tuple)):
157 msg =
'"c" flag is set - '
158 msg +=
'input must be a tuple of integers'
161 if(isinstance(input, str)):
162 input_tuple = utils.string_to_tuple(input)
165 msg =
'"c" flag is not set - '
166 msg +=
'input must be a string'
171 '''Limit the maximum tree depth that the parser may make.
172 @param depth The maximum allowed tree node depth.
173 If the parser exceeds this limit an exception is raised.'''
178 '''Limit the maximum number of parse tree nodes that the parser may visit.
179 @param hits The maximum allowed number of node hits the parser can make.
180 If the parser exceeds this limit an exception is raised.'''
185 '''UDTs are user-written callback functions for specialized pattern matching.
186 Callback functions must be defined for all UDTs in the SABNF grammar syntax.
187 @param callbacks A dictionary defining one or more callbacks.
188 Multiple calls may be made until all UDT callback are defined.
189 callbacks = {'udt1': func[[, 'udt2': func2], etc.]}
192 items = callbacks.items()
194 name = item[0].lower()
196 for udt
in self.
parserparser.udts:
197 if(udt[
'lower'] == name):
201 raise(Exception(
'UDT name ' + name +
' not found'))
203 self.
parserparser.add_callbacks(callbacks)
205 raise Exception(
'pattern has no UDTs')
208 '''Define the list of rule/UDT name phrases to be included
209 in the matched results.
210 @param names A list of rule/UDT names.
211 An empty list will include ALL rules and UDTs.
218 for rule
in self.
parserparser.rules:
219 self.
namesnames[rule[
'lower']] = rule[
'name']
220 self.
rulesrules[rule[
'lower']] = []
221 self.
__ast__ast.add_callback(
224 for udt
in self.
parserparser.udts:
225 self.
namesnames[udt[
'lower']] = udt[
'name']
226 self.
rulesrules[udt[
'lower']] = []
227 self.
__ast__ast.add_callback(
235 name_lower = name.lower()
236 for rule
in self.
parserparser.rules:
237 if(rule[
'lower'] == name_lower):
238 self.
namesnames[rule[
'lower']] = rule[
'name']
239 self.
rulesrules[rule[
'lower']] = []
240 self.
__ast__ast.add_callback(
245 if(
not name_found
and len(self.
parserparser.udts)):
246 for udt
in self.
parserparser.udts:
247 if(udt[
'lower'] == name_lower):
248 self.
namesnames[udt[
'lower']] = udt[
'name']
249 self.
rulesrules[udt[
'lower']] = []
250 self.
__ast__ast.add_callback(
256 raise Exception(name +
' not a rule or UDT name')
259 '''Define the list of rule/UDT name phrases to be excluded
260 from the matched results.
261 @param names A list of rule/UDT names.
262 An empty list will include ALL rules and UDTs.
274 names_lower.append(name.lower())
275 for rule
in self.
parserparser.rules:
276 if(rule[
'lower']
not in names_lower):
277 self.
namesnames[rule[
'lower']] = rule[
'name']
278 self.
rulesrules[rule[
'name']] = []
279 self.
__ast__ast.add_callback(
282 for udt
in self.
parserparser.udts:
283 if(udt[
'lower']
not in names_lower):
284 self.
namesnames[udt[
'lower']] = udt[
'name']
285 self.
rulesrules[udt[
'name']] = []
286 self.
__ast__ast.add_callback(
291 '''Execute the pattern match.
292 Search for a match begins at last_index.
293 (Note: last_index can be set prior to calling exec()
294 with ApgExp.last_index = value.)
295 If the g or y flag is set, last_index is set to the
296 next character beyond the matched pattern
297 or incremented by one if the matched pattern is empty.
298 If the pattern is not matched, last_index is always set to 0.
299 @param input The input as a string or tuple of character codes
300 if the "c" flag is set.
301 @returns Returns the result object if pattern is matched.
306 sub_end =
len(input_tuple)
307 if(sub_beg >= sub_end):
314 print(
'trace beginning at sticky character ' + str(sub_beg))
315 parser_result = self.
parserparser.parse(input_tuple, sub_begin=sub_beg)
316 if(parser_result.state == id.MATCH
317 or parser_result.state == id.EMPTY):
320 max(1, parser_result.phrase_length)
323 self.
__ast__ast.translate(data)
326 parser_result.phrase_length,
327 parser_result.node_hits,
328 parser_result.max_tree_depth,
331 while sub_beg < sub_end:
334 print(
'trace beginning at character ' + str(sub_beg))
335 parser_result = self.
parserparser.parse(
336 input_tuple, sub_begin=sub_beg)
338 if(parser_result.state == id.MATCH
339 or parser_result.state == id.EMPTY):
343 max(1, parser_result.phrase_length)
347 self.
rulesrules[key] = []
349 self.
__ast__ast.translate(data)
352 parser_result.phrase_length,
353 parser_result.node_hits,
354 parser_result.max_tree_depth,
362 '''Same as @ref exec() except for the return.
363 @returns Returns True if a pattern match is found, False otherwise.'''
366 sub_end =
len(input_tuple)
367 if(sub_beg >= sub_end):
374 print(
'trace beginning at sticky character ' + str(sub_beg))
375 parser_result = self.
parserparser.parse(input_tuple, sub_begin=sub_beg)
376 if(parser_result.state == id.MATCH
377 or parser_result.state == id.EMPTY):
379 max(1, parser_result.phrase_length)
384 while sub_beg < sub_end:
387 print(
'trace beginning at character ' + str(sub_beg))
388 parser_result = self.
parserparser.parse(
389 input_tuple, sub_begin=sub_beg)
391 if(parser_result.state == id.MATCH
392 or parser_result.state == id.EMPTY):
396 max(1, parser_result.phrase_length)
403 '''Split the input string on the matched delimiters.
404 The ApgExp pattern defines the delimiters.
406 [JavaScript String.split()](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/split)
408 All flags except the character code flag "c" are ignored.
409 If the "c" flag is set, substitute "tuple of character codes" for string.
410 - if the input string is empty, the output list contains
411 a single empty string
412 - if the pattern matches the entire string, the output list contains
413 a single empty string.
414 - if no pattern matches are found in the input string,
415 the output list contains a single string which
416 is a copy of the input string.
417 - if the pattern finds multiple matches, the output list contains
418 a each of the strings between the matches
419 - if the pattern matches the empty string, the output will be a list
420 of the single characters.
421 @param input The input string or tuple of character codes.
422 @param limit If limit > 0 only limit delimiters are matched.
423 The trailing string suffix, if any, is ignored.
424 @returns Returns a list of strings or character code tuples.
426 def gen_output(intervals):
429 if(
len(intervals) == 0):
434 for interval
in intervals:
435 tup = input_tuple[interval[0]:interval[1]]
439 gen.append(utils.tuple_to_string(tup))
443 if(
len(input_tuple) == 0):
445 return gen_output([])
449 sub_end =
len(input_tuple)
451 while(sub_beg < sub_end
and limit > 0):
452 parser_result = self.
parserparser.parse(
453 input_tuple, sub_begin=sub_beg)
454 if(parser_result.state == id.MATCH):
457 [sub_beg, sub_beg + parser_result.phrase_length])
458 sub_beg += parser_result.phrase_length
459 elif(parser_result.state == id.EMPTY):
461 intervals.append([sub_beg, sub_beg])
465 len_intervals =
len(intervals)
466 if(len_intervals == 0):
468 return gen_output([[0, sub_end]])
471 for interval
in intervals:
472 if(beg < interval[0]):
473 out_put.append([beg, interval[0]])
475 intervals_end = intervals[len_intervals - 1][1]
478 if(intervals_end < sub_end):
479 out_put.append([intervals_end, sub_end])
480 return gen_output(out_put)
483 '''Replace matched patterns. If a pattern match is found in "input"
484 it will be replaced with "replacement".
486 [JavaScript String.replace()](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/replace)
488 If the "g" or "y" flags are set, all matched patterns are replaced.
489 Otherwise, only the first match is replaced.
490 @param input The string to look for pattern matches in.
491 If the "c" flag is set, input must be a tuple of integers.
492 Otherwise, it is a string.
493 @param replacement This may be a simple replacement string,
494 a complex replacement string with special characters or
495 a function that returns the replacement string.
496 If the "c" flag is not set, replacement must be a string,
497 possibly with special characters or
498 a function that returns a string. Special string characters are:
500 - $` substitute the matched left context
501 - $& substitute the matched pattern itself
502 - $' substitute the matched right context
503 - ${name} substitue the matched rule/UDT name(case insensitive),
504 note that if this rule has no match an empty string will be used.
507 The function must have the prototype
508 - fn(input, result) where input is the original input string
509 and result is the pattern matching result object.
510 The function must return a string
513 If the "c" flag is set, replacement must be a tuple of integers
514 or a function that returns a tuple of integers.
515 In this case there are no special characters comparable to the string
516 special characters. However, since the function gets the result
517 as an argument, it can be used for the same purpose.
518 The function must have the prototype:
519 - fn(input, result) where input is the original input tuple
520 and result is the pattern matching result object.
521 The function must return a tuple of integers.
522 @returns Returns the input string/tuple "input" with one or all
523 matched patterns replaced with the replacement string,
528 res = self.
execexec(input)
531 return copy.copy(input)
533 results.append(copy.deepcopy(res))
536 res = self.
execexec(input)
538 results.append(copy.deepcopy(res))
540 if(isinstance(replacement, types.FunctionType)):
542 output = copy.copy(input)
544 for result
in results:
545 pref = output[:diff + result.indices[0]]
546 suff = output[diff + result.indices[1]:]
547 repl = replacement(input, result)
549 if(
not isinstance(repl, tuple)):
550 msg =
'replacement function must return'
551 msg +=
' a tuple of integers'
554 if(
not isinstance(repl, str)):
555 msg =
'replacement function must return'
558 diff +=
len(repl) + result.indices[0] - result.indices[1]
559 output = pref + repl + suff
561 if(
not isinstance(replacement, tuple)):
563 'replace(): "c" flag set - input must be a tuple')
565 output = copy.copy(input)
567 for result
in results:
568 pref = output[:diff + result.indices[0]]
569 suff = output[diff + result.indices[1]:]
570 diff +=
len(replacement) + \
571 result.indices[0] - result.indices[1]
572 output = pref + replacement + suff
574 if(
not isinstance(replacement, str)):
576 'replace(): "c" flag not set - input must be a string')
578 output = copy.copy(input)
580 for result
in results:
581 pref = output[:diff + result.indices[0]]
582 suff = output[diff + result.indices[1]:]
584 diff +=
len(repl) + result.indices[0] - result.indices[1]
585 output = pref + repl + suff
591 special = copy.copy(replacement)
594 while(special_found):
595 special_found =
False
596 for i
in range(start,
len(special)):
597 if(special[i] ==
'$'):
599 suf = special[i + 2:]
602 if(special[i + 1] ==
'$'):
603 special = pref +
'$' + suf
605 if(special[i + 1] ==
'&'):
606 special = pref + result.match + suf
608 if(special[i + 1] ==
'`'):
609 special = pref + result.left_context + suf
611 if(special[i + 1] ==
"'"):
612 special = pref + result.right_context + suf
614 if(special[i + 1] ==
"{"):
616 for j
in range(i + 2,
len(special)):
617 if(special[j] ==
'}'):
618 name = special[i + 2:j]
619 suf = special[j + 1:]
623 msg =
'replace(): ${name}, name or closing bracket '
627 if(result.rules.get(lower)):
628 last_match =
len(result.rules[lower]) - 1
629 name_string = utils.tuple_to_string(
630 result.rules[lower][last_match])
634 special = pref + name_string + suf
635 start =
len(pref) +
len(name_string)
The ApgExp class provides a pattern-matching engine similar to JavaScript's RegExp
def __get_input(self, input)
def __callback_factory(self, name)
def define_udts(self, callbacks)
UDTs are user-written callback functions for specialized pattern matching.
def exec(self, input)
Execute the pattern match.
def include(self, names=[])
Define the list of rule/UDT name phrases to be included in the matched results.
def set_tree_depth(self, depth)
Limit the maximum tree depth that the parser may make.
def replace(self, input, replacement)
Replace matched patterns.
def set_node_hits(self, hits)
Limit the maximum number of parse tree nodes that the parser may visit.
def exclude(self, names=[])
Define the list of rule/UDT name phrases to be excluded from the matched results.
def split(self, input, limit=0)
Split the input string on the matched delimiters.
def test(self, input)
Same as exec() except for the return.
def __init__(self, pattern, flags='')
The ApgExp constructor.
A class for returning the results of a pattern match.
def __init__(self, source, begin, length, node_hits, tree_depth, rules, names, ast, codes=False)
The Result class constructor.
def __str__(self)
print(Result) will call this function for a display of the object.
A class for capturing the AST as the parser traverses the parse tree.
The Parser class for parsing an APG grammar.
Class for tracing and displaying the progress of the parser through the parse tree.
def replace_special_chars(replacement, result)