1 ''' @file apg_py/api/api.py
2 @brief An API for generating grammar objects from SABNF grammars.
3 @dir apg_py All of the APG library, generator and pattern-matching files.
4 @dir apg_py/api The parser generator files.
5 @dir apg_py/exp The pattern-matching files.
6 @dir apg_py/lib The basic APG parsing library.
7 @dir docs Documentation helper files for [doxygen](https://www.doxygen.nl/).
24 from pprint
import pprint
29 '''Creates a grammar object which can be used by the APG library.
33 '''Grammar constructor.
34 @param rules The generated rules list from Api.generate().
35 @param udts The generated UDT list from Api.generate().
36 @param source The original SABNF syntax source.
41 for op
in rule[
'opcodes']:
42 if(op[
'type'] == id.ALT
or op[
'type'] == id.CAT):
43 op[
'children'] = tuple(op[
'children'])
44 if(op[
'type'] == id.TBS
or op[
'type'] == id.TLS):
45 op[
'string'] = tuple(op[
'string'])
46 rule[
'opcodes'] = tuple(rule[
'opcodes'])
57 self.
udtsudts = tuple(udts)
62 '''The API class. Houses all of the facilities needed to
63 process an SABNF grammar syntax
64 into a grammar object that can be used by an APG parser.'''
67 '''A helper class to keep track of rule and UDT names.
68 Maintains a list of rule/UDT names, the lower case for easy
69 comparisons and the rule/UDT index for the name.
76 '''Add a name to the list.
77 @param name The name to add.
78 @returns Returns the saved dictionary with the name,
79 lower case name and the matching index.
80 Returns -1 if the name already exists in the list.
83 find = self.
getget(name)
88 'lower': name.lower(),
91 self.
namesnames.append(ret)
96 '''Retrieve a name from the list.
97 @param name The name to retrieve.
98 @returns Returns the saved dictionary if the name is in the list.
99 Returns -1 if the name does not exist in the list.
103 for n
in self.
namesnames:
104 if(n[
'lower'] == lower):
111 self.errors is a list of errors. Each item, error, contains:
112 - error['line'] - The line number(zero-based) where
114 - error['index'] - The character index where the error occurred.
115 - error['msg'] - A text message describing the error.
117 self.lines is a list of the text lines in the input grammar.
118 Each item, line, contains:
119 - line['line_no'] - The zero-based line number.
120 - line['index'] - The offset to the first character of the line.
121 - line['length'] - The number of characters in the line,
122 including the line end characters, if any.
123 - line['text_length'] - the number of characters in the line,
124 not including the line end characters.
125 - line['end'] - Designates the line end characters.
126 - 'CRLF' - Carriage return, line feed pair (\\r\\n or 0x0D0A)
127 - 'CR' - Carriage return only (\\r or 0x0D)
128 - 'LF' - Line feed only (\\n or 0x0A)
129 - '' - Empty string means no line end at all
130 (possible for last line.)
148 def generate(self, source, strict=False, phase='all'):
149 '''Generate a grammar object from an SABNF grammar syntax.
150 Works its way through multiple steps.
151 - scan source for invalid characters, catalog lines
152 - parse the source, syntax check
153 - translate the parsed AST, semantic check
154 - attributes (left recursion, etc.) and rule dependencies
155 - construct the grammar object
156 @param source An SABNF grammar syntax as a Python string.
157 @param strict If True, source must be constrained to strictly follow
158 the ABNF conventions of RFCs 5234 & 7405. If False, SABNF operators
159 and line ending conventions are followed.
160 @param phase Used primarily for debugging.
161 - 'scanner' - generation halts after the scanner phase
162 - 'syntax' - generation halts after the syntax phase
163 - 'semantic' - generation halts after the semantic phase
164 - 'attributes' - generation halts after the attributes phase
165 - 'all' - (default) a grammar object is generated if no errors
167 @return If successful, returns the grammar object.
168 Otherwise, returns None. Use, for example<br>
170 grammar = api.generator(...)
173 print(api.display_errors())
174 raise Exception('api.generate() failed')
175 # use the generated grammar
179 def discover_has_bkrr(rules, udts, rule_deps):
180 '''Discover which rules reference rules which are
181 recursive backreferenced.'''
182 rule_range = range(
len(rules))
183 udt_range = range(
len(udts))
186 rules[i][
'has_bkrr'] =
False
187 if(rdi[
'recursive_type'] != id.ATTR_N):
189 if(rdi[
'refers_to'][j]):
190 if(rules[j][
'is_bkrr']):
191 rules[i][
'has_bkrr'] =
True
193 if(rdi[
'refers_to_udt'][j]):
194 if(udts[j][
'is_bkrr']):
195 rules[i][
'has_bkrr'] =
True
198 self.
sourcesource = source
201 self.
errorserrors = result[
'errors']
202 self.
lineslines = result[
'lines']
205 if(phase ==
'scanner'):
212 if(phase ==
'syntax'):
217 self.
errorserrors = result[
'errors']
220 self.
rulesrules = result[
'rules']
221 self.
udtsudts = result[
'udts']
222 self.
rule_namesrule_names = result[
'rule_names']
223 self.
udt_namesudt_names = result[
'udt_names']
224 if(phase ==
'semantic'):
239 self.
errorserrors = result[
'errors']
243 if(phase ==
'attributes'):
251 '''Write the APG grammar to a file in format for later use by a parser.
252 @param fname the file name to write the grammar to
254 def grammar_copyright():
256 display +=
'# Copyright (c) 2022 Lowell D. Thomas, '
257 display +=
'all rights reserved\n'
258 display +=
'# BSD-2-Clause '
259 display +=
'(https://opensource.org/licenses/BSD-2-Clause)\n'
263 def grammar_summary(rules, udts):
266 info[
'op_counts'][op_id] += 1
267 if(op_id == id.TLS
or op_id == id.TBS):
268 for ch
in op[
'string']:
269 if(ch < info[
'char_min']):
270 info[
'char_min'] = ch
271 if(ch > info[
'char_max']):
272 info[
'char_max'] = ch
274 if(op[
'min'] < info[
'char_min']):
275 info[
'char_min'] = op[
'min']
276 if(op[
'max'] > info[
'char_max']):
277 info[
'char_max'] = op[
'max']
279 info = {
'op_counts': [0] * (id.AEN + 1),
281 'char_min': sys.maxsize,
283 rule_count =
len(rules)
284 udt_count =
len(udts)
287 info[
'opcodes'] +=
len(rule[
'opcodes'])
288 for op
in rule[
'opcodes']:
291 display =
'# SUMMARY'
292 display +=
'\n# rules = ' + str(rule_count)
293 display +=
'\n# udts = ' + str(udt_count)
294 display +=
'\n# opcodes = ' + str(info[
'opcodes'])
295 display +=
'\n# --- ABNF original opcodes'
296 display +=
'\n# ALT = ' + str(info[
'op_counts'][id.ALT])
297 display +=
'\n# CAT = ' + str(info[
'op_counts'][id.CAT])
298 display +=
'\n# REP = ' + str(info[
'op_counts'][id.REP])
299 display +=
'\n# RNM = ' + str(info[
'op_counts'][id.RNM])
300 display +=
'\n# TLS = ' + str(info[
'op_counts'][id.TLS])
301 display +=
'\n# TBS = ' + str(info[
'op_counts'][id.TBS])
302 display +=
'\n# TRG = ' + str(info[
'op_counts'][id.TRG])
303 display +=
'\n# --- SABNF super set opcodes'
304 display +=
'\n# UDT = ' + str(info[
'op_counts'][id.UDT])
305 display +=
'\n# AND = ' + str(info[
'op_counts'][id.AND])
306 display +=
'\n# NOT = ' + str(info[
'op_counts'][id.NOT])
307 display +=
'\n# BKA = ' + str(info[
'op_counts'][id.BKA])
308 display +=
'\n# BKN = ' + str(info[
'op_counts'][id.BKN])
309 display +=
'\n# BKR = ' + str(info[
'op_counts'][id.BKR])
310 display +=
'\n# ABG = ' + str(info[
'op_counts'][id.ABG])
311 display +=
'\n# AEN = ' + str(info[
'op_counts'][id.AEN])
312 display +=
'\n# characters = ['
313 display += str(info[
'char_min'])
315 display += str(info[
'char_max'])
318 display +=
' + user defined'
322 def grammar_to_string(lines, source):
323 display =
'def to_string():\n'
324 display +=
" '''Displays the original SABNF syntax.'''\n"
325 display +=
' sabnf = ""\n'
327 display +=
' sabnf += "'
328 for i
in range(line[
'index'], line[
'index'] + line[
'length']):
343 display +=
' return sabnf\n'
347 raise Exception(
'no grammar has been generated')
348 stdout_save = sys.stdout
350 sys.stdout = open(fname,
'w')
351 print(grammar_copyright())
352 print(grammar_summary(self.
grammargrammar.rules, self.
grammargrammar.udts))
354 print(
'rules = ', end=
'')
355 pprint(self.
grammargrammar.rules, sort_dicts=
False)
358 print(
'udts = ', end=
'')
359 pprint(self.
grammargrammar.udts, sort_dicts=
False)
361 print(
'has_bkru = ', end=
'')
362 print(self.
grammargrammar.has_bkru)
363 print(
'has_bkrr = ', end=
'')
364 print(self.
grammargrammar.has_bkrr)
367 print(grammar_to_string(self.
lineslines, self.
grammargrammar.source))
370 sys.stdout = stdout_save
373 '''Display the rule attributes.
374 @param sort Determines the order of rule display.
375 - 'index' (default) rules are listed in the order they appear
377 - 'type' rules are listed by recursive type
378 @returns Returns a string with the displayed rule attributes.
384 return 'rule attributes not available'
387 '''Display the rule dependencies. For each rule R, list both
388 the list of rules that R refers to (both directly and indirectly)
389 and the list of rules that refer to R (both directly and indirectly).
390 @param sort Determines the order of display of the rules, R.
391 - 'index' (default) rules are listed in the order they appear
393 - 'alpha' (actually anything but 'index') rules are listed
395 @returns Returns a string with the displayed rule dependencies.
397 alpha = sort ==
'index'
404 return 'rule dependencies not available'
407 '''Find the line number of the line in which the given
409 @param index The (zero-based) index of the character in the source.
410 @returns Returns the line number of the specified character.
411 If index is out of range, returns the last line.'''
414 'find_line: No lines - no input (source grammar) defined.')
420 for line
in self.
lineslines:
422 if(index >= line[
'index']
and
423 index < line[
'index'] + line[
'length']):
425 raise Exception(
'find_line: Should never reach here - internal error.')
428 '''Display the list of SABNF errors, if any.
429 For each error, lists the line number and relative character offset
430 where the error occurred and a descriptive message.
431 @returns Returns a string of the displayed error messages.'''
435 for error
in self.
errorserrors:
436 offset = error[
'index'] - self.
lineslines[error[
'line']][
'index']
441 display += (
'line: %d: offset: %d: %s' %
442 (error[
'line'], offset, error[
'msg']))
447 '''Displays an annotated version of the SABNF grammar syntax.
448 Each line is preceeded by the line number and character offset
450 @returns Returns a string of the display text.'''
454 for line
in self.
lineslines:
455 last_index = line[
'index'] + line[
'length']
457 display += (
'%03d: %03d: %s' % (no, line[
'index'], text))
463 '''Displays a line with special characters accounted for.
464 @param line_no The line number to display
465 @returns Returns a string of the displayed line.'''
466 if(line_no >= 0
and line_no <
len(self.
lineslines)):
467 line = self.
lineslines[line_no]
468 last_index = line[
'index'] + line[
'length']
470 return 'line_no ' + str(line_no) +
' out of range'
473 '''Displays a syntax line, underlined with carrets highlightling
475 @param line_no The line number to display
476 @param index The index of the character to highlight
477 @returns Returns a string of the displayed line.
479 if(line_no >= 0
and line_no <
len(self.
lineslines)):
481 line = self.
lineslines[line_no]
482 last_index = line[
'index'] + line[
'length']
483 line_segment = self.
inputinput[line[
'index']:last_index]
488 return 'line_no ' + str(line_no) +
' out of range'
491 '''Display the syntax rules and UDTs, if available.
493 - 'index'(default) - display rules in the order
494 in which they are defined
495 - 'alpha' - display rules alphabetically
497 def sort_rules_alpha(val):
498 return self.
rulesrules[val][
'lower']
500 def sort_udts_alpha(val):
501 return self.
udtsudts[val][
'lower']
506 rule_range = range(rule_count)
508 udt_range = range(udt_count)
509 irules = [0] * rule_count
510 iudts = [0] * udt_count
516 irules.sort(key=sort_rules_alpha)
518 iudts.sort(key=sort_udts_alpha)
520 display +=
'%03d: %s\n' % (
521 self.
rulesrules[i][
'index'], self.
rulesrules[i][
'name'])
523 display +=
'\nUDTS\n'
525 display +=
'%03d: %s\n' % (
526 self.
udtsudts[i][
'index'], self.
udtsudts[i][
'name'])
A helper class to keep track of rule and UDT names.
def get(self, name)
Retrieve a name from the list.
def add(self, name)
Add a name to the list.
def display_rules(self, sort='index')
Display the syntax rules and UDTs, if available.
def __init__(self)
API constructor.
def display_errors(self)
Display the list of SABNF errors, if any.
def write_grammar(self, fname)
Write the APG grammar to a file in format for later use by a parser.
def display_rule_attributes(self, sort='index')
Display the rule attributes.
def find_line(self, index)
Find the line number of the line in which the given character occurs.
def display_underline(self, line_no, index)
Displays a syntax line, underlined with carrets highlightling error locations.
def display_grammar(self)
Displays an annotated version of the SABNF grammar syntax.
def display_rule_dependencies(self, sort='index')
Display the rule dependencies.
def display_line(self, line_no)
Displays a line with special characters accounted for.
def generate(self, source, strict=False, phase='all')
Generate a grammar object from an SABNF grammar syntax.
Creates a grammar object which can be used by the APG library.
def __init__(self, rules, udts, source)
Grammar constructor.
A class for capturing the AST as the parser traverses the parse tree.
The Parser class for parsing an APG grammar.
def rule_attributes(rules, rule_deps)
Compute the recursive and non-recursive attributes for each rule.
def rule_dependencies(rules, udts, rule_names, udt_names)
Determine the rule dependencies and recursive types for each rule.
def display_deps(rule_deps, rules, udts, alpha=True)
Display the rule dependencies.
def scanner(input, strict)
Scan the input for invalid characters and construct a line catalog for looking up line numbers from c...
def add_ast_callbacks(ast)
def semantic(api)
Translate the AST, generating a list of rule objects, and UDT objects, if any.
def add_syntax_callbacks(parser)
def tuple_to_ascii(input, map=None)
Converts a tuple of Unicode values to an ASCII string.
def tuple_to_ascii_underline(map, index)
Uses the (optional) map generated by tuple_to_ascii() to generate a mapping of the display characters...
def string_to_tuple(string)
Converts a string to a tuple of the Unicode values of the string characters.