Version 1.0
Copyright © 2022 Lowell D. Thomas
Python APG
 … an ABNF Parser Generator
api.py
Go to the documentation of this file.
1 ''' @file apg_py/api/api.py
2 @brief An API for generating grammar objects from SABNF grammars.
3 @dir apg_py All of the APG library, generator and pattern-matching files.
4 @dir apg_py/api The parser generator files.
5 @dir apg_py/exp The pattern-matching files.
6 @dir apg_py/lib The basic APG parsing library.
7 @dir docs Documentation helper files for [doxygen](https://www.doxygen.nl/).
8 '''
9 from apg_py.api import sabnf_grammar
10 from apg_py.lib.parser import Parser
11 from apg_py.lib.ast import Ast
12 from apg_py.lib import identifiers as id
13 from apg_py.api.scanner import scanner
14 from apg_py.api.syntax import syntax
15 from apg_py.api.semantic import semantic
16 # from apg.api.attributes import attributes
17 from apg_py.api.rule_dependencies import display_deps, rule_dependencies
18 from apg_py.api.rule_attributes import display_rule_attributes, rule_attributes
19 from apg_py.api.syntax_callbacks import add_syntax_callbacks
20 from apg_py.api.semantic_callbacks import add_ast_callbacks
21 from apg_py.lib.utilities import tuple_to_ascii
22 from apg_py.lib.utilities import tuple_to_ascii_underline
23 from apg_py.lib.utilities import string_to_tuple
24 from pprint import pprint
25 import sys
26 
27 
28 class Grammar():
29  '''Creates a grammar object which can be used by the APG library.
30  '''
31 
32  def __init__(self, rules, udts, source):
33  '''Grammar constructor.
34  @param rules The generated rules list from Api.generate().
35  @param udts The generated UDT list from Api.generate().
36  @param source The original SABNF syntax source.
37  '''
38  self.has_bkruhas_bkru = False
39  self.has_bkrrhas_bkrr = False
40  for rule in rules:
41  for op in rule['opcodes']:
42  if(op['type'] == id.ALT or op['type'] == id.CAT):
43  op['children'] = tuple(op['children'])
44  if(op['type'] == id.TBS or op['type'] == id.TLS):
45  op['string'] = tuple(op['string'])
46  rule['opcodes'] = tuple(rule['opcodes'])
47  if(rule['is_bkru']):
48  self.has_bkruhas_bkru = True
49  if(rule['is_bkrr']):
50  self.has_bkrrhas_bkrr = True
51  for udt in udts:
52  if(udt['is_bkru']):
53  self.has_bkruhas_bkru = True
54  if(udt['is_bkrr']):
55  self.has_bkrrhas_bkrr = True
56  self.rulesrules = tuple(rules)
57  self.udtsudts = tuple(udts)
58  self.sourcesource = source
59 
60 
61 class Api():
62  '''The API class. Houses all of the facilities needed to
63  process an SABNF grammar syntax
64  into a grammar object that can be used by an APG parser.'''
65 
66  class NameList():
67  '''A helper class to keep track of rule and UDT names.
68  Maintains a list of rule/UDT names, the lower case for easy
69  comparisons and the rule/UDT index for the name.
70  '''
71 
72  def __init__(self):
73  self.namesnames = []
74 
75  def add(self, name):
76  '''Add a name to the list.
77  @param name The name to add.
78  @returns Returns the saved dictionary with the name,
79  lower case name and the matching index.
80  Returns -1 if the name already exists in the list.
81  '''
82  ret = -1
83  find = self.getget(name)
84  if(find == -1):
85  # name does not exist in list so add it
86  ret = {
87  'name': name,
88  'lower': name.lower(),
89  'index': len(
90  self.namesnames)}
91  self.namesnames.append(ret)
92  # else: name already exists, -1 return indicates duplicate names
93  return ret
94 
95  def get(self, name):
96  '''Retrieve a name from the list.
97  @param name The name to retrieve.
98  @returns Returns the saved dictionary if the name is in the list.
99  Returns -1 if the name does not exist in the list.
100  '''
101  ret = -1
102  lower = name.lower()
103  for n in self.namesnames:
104  if(n['lower'] == lower):
105  ret = n
106  break
107  return ret
108 
109  def __init__(self):
110  '''API constructor.
111  self.errors is a list of errors. Each item, error, contains:
112  - error['line'] - The line number(zero-based) where
113  the error occurred.
114  - error['index'] - The character index where the error occurred.
115  - error['msg'] - A text message describing the error.
116 
117  self.lines is a list of the text lines in the input grammar.
118  Each item, line, contains:
119  - line['line_no'] - The zero-based line number.
120  - line['index'] - The offset to the first character of the line.
121  - line['length'] - The number of characters in the line,
122  including the line end characters, if any.
123  - line['text_length'] - the number of characters in the line,
124  not including the line end characters.
125  - line['end'] - Designates the line end characters.
126  - 'CRLF' - Carriage return, line feed pair (\\r\\n or 0x0D0A)
127  - 'CR' - Carriage return only (\\r or 0x0D)
128  - 'LF' - Line feed only (\\n or 0x0A)
129  - '' - Empty string means no line end at all
130  (possible for last line.)
131  '''
132  self.parserparser = Parser(sabnf_grammar)
133  self.astast = Ast(self.parserparser)
134  add_syntax_callbacks(self.parserparser)
135  add_ast_callbacks(self.astast)
136  # place holders
137  self.errorserrors = None
138  self.lineslines = None
139  self.grammargrammar = None
140  self.sourcesource = None
141  self.inputinput = None
142  self.rulesrules = None
143  self.udtsudts = None
144  self.rule_namesrule_names = None
145  self.udt_namesudt_names = None
146  self.rule_depsrule_deps = None
147 
148  def generate(self, source, strict=False, phase='all'):
149  '''Generate a grammar object from an SABNF grammar syntax.
150  Works its way through multiple steps.
151  - scan source for invalid characters, catalog lines
152  - parse the source, syntax check
153  - translate the parsed AST, semantic check
154  - attributes (left recursion, etc.) and rule dependencies
155  - construct the grammar object
156  @param source An SABNF grammar syntax as a Python string.
157  @param strict If True, source must be constrained to strictly follow
158  the ABNF conventions of RFCs 5234 & 7405. If False, SABNF operators
159  and line ending conventions are followed.
160  @param phase Used primarily for debugging.
161  - 'scanner' - generation halts after the scanner phase
162  - 'syntax' - generation halts after the syntax phase
163  - 'semantic' - generation halts after the semantic phase
164  - 'attributes' - generation halts after the attributes phase
165  - 'all' - (default) a grammar object is generated if no errors
166  in any phases
167  @return If successful, returns the grammar object.
168  Otherwise, returns None. Use, for example<br>
169  <pre>
170  grammar = api.generator(...)
171  if(not grammar):
172  if(api.errors):
173  print(api.display_errors())
174  raise Exception('api.generate() failed')
175  # use the generated grammar
176  </pre>
177  '''
178 
179  def discover_has_bkrr(rules, udts, rule_deps):
180  '''Discover which rules reference rules which are
181  recursive backreferenced.'''
182  rule_range = range(len(rules))
183  udt_range = range(len(udts))
184  for i in rule_range:
185  rdi = rule_deps[i]
186  rules[i]['has_bkrr'] = False
187  if(rdi['recursive_type'] != id.ATTR_N):
188  for j in rule_range:
189  if(rdi['refers_to'][j]):
190  if(rules[j]['is_bkrr']):
191  rules[i]['has_bkrr'] = True
192  for j in udt_range:
193  if(rdi['refers_to_udt'][j]):
194  if(udts[j]['is_bkrr']):
195  rules[i]['has_bkrr'] = True
196 
197  # scan for bad characters and generate a line catalog
198  self.sourcesource = source
199  self.inputinput = string_to_tuple(self.sourcesource)
200  result = scanner(self.inputinput, strict)
201  self.errorserrors = result['errors']
202  self.lineslines = result['lines']
203  if(len(self.errorserrors)):
204  return
205  if(phase == 'scanner'):
206  return
207 
208  # syntax - check for SABNF syntax errors
209  self.errorserrors = syntax(self, strict)
210  if(len(self.errorserrors)):
211  return
212  if(phase == 'syntax'):
213  return
214 
215  # semantic - check for SABNF semantic errors
216  result = semantic(self)
217  self.errorserrors = result['errors']
218  if(len(self.errorserrors)):
219  return
220  self.rulesrules = result['rules']
221  self.udtsudts = result['udts']
222  self.rule_namesrule_names = result['rule_names']
223  self.udt_namesudt_names = result['udt_names']
224  if(phase == 'semantic'):
225  return
226 
227  # discover all rule dependencies, including mutually-recursive groups
228  self.rule_depsrule_deps = rule_dependencies(
229  self.rulesrules,
230  self.udtsudts,
231  self.rule_namesrule_names,
232  self.udt_namesudt_names)
233 
234  # discover which recursive rules refer to back referenced rules
235  discover_has_bkrr(self.rulesrules, self.udtsudts, self.rule_depsrule_deps)
236 
237  # discover all rule attributes, reporting attribute errors
238  result = rule_attributes(self.rulesrules, self.rule_depsrule_deps)
239  self.errorserrors = result['errors']
240  self.attributesattributes = result['attributes']
241  if(len(self.errorserrors)):
242  return
243  if(phase == 'attributes'):
244  return
245 
246  # return the grammar object
247  self.grammargrammar = Grammar(self.rulesrules, self.udtsudts, self.sourcesource)
248  return Grammar(self.rulesrules, self.udtsudts, self.sourcesource)
249 
250  def write_grammar(self, fname):
251  '''Write the APG grammar to a file in format for later use by a parser.
252  @param fname the file name to write the grammar to
253  '''
254  def grammar_copyright():
255  display = ''
256  display += '# Copyright (c) 2022 Lowell D. Thomas, '
257  display += 'all rights reserved\n'
258  display += '# BSD-2-Clause '
259  display += '(https://opensource.org/licenses/BSD-2-Clause)\n'
260  display += '#\n'
261  return display
262 
263  def grammar_summary(rules, udts):
264  def inc_op(op):
265  op_id = op['type']
266  info['op_counts'][op_id] += 1
267  if(op_id == id.TLS or op_id == id.TBS):
268  for ch in op['string']:
269  if(ch < info['char_min']):
270  info['char_min'] = ch
271  if(ch > info['char_max']):
272  info['char_max'] = ch
273  if(op_id == id.TRG):
274  if(op['min'] < info['char_min']):
275  info['char_min'] = op['min']
276  if(op['max'] > info['char_max']):
277  info['char_max'] = op['max']
278 
279  info = {'op_counts': [0] * (id.AEN + 1),
280  'opcodes': 0,
281  'char_min': sys.maxsize,
282  'char_max': 0}
283  rule_count = len(rules)
284  udt_count = len(udts)
285 
286  for rule in rules:
287  info['opcodes'] += len(rule['opcodes'])
288  for op in rule['opcodes']:
289  inc_op(op)
290 
291  display = '# SUMMARY'
292  display += '\n# rules = ' + str(rule_count)
293  display += '\n# udts = ' + str(udt_count)
294  display += '\n# opcodes = ' + str(info['opcodes'])
295  display += '\n# --- ABNF original opcodes'
296  display += '\n# ALT = ' + str(info['op_counts'][id.ALT])
297  display += '\n# CAT = ' + str(info['op_counts'][id.CAT])
298  display += '\n# REP = ' + str(info['op_counts'][id.REP])
299  display += '\n# RNM = ' + str(info['op_counts'][id.RNM])
300  display += '\n# TLS = ' + str(info['op_counts'][id.TLS])
301  display += '\n# TBS = ' + str(info['op_counts'][id.TBS])
302  display += '\n# TRG = ' + str(info['op_counts'][id.TRG])
303  display += '\n# --- SABNF super set opcodes'
304  display += '\n# UDT = ' + str(info['op_counts'][id.UDT])
305  display += '\n# AND = ' + str(info['op_counts'][id.AND])
306  display += '\n# NOT = ' + str(info['op_counts'][id.NOT])
307  display += '\n# BKA = ' + str(info['op_counts'][id.BKA])
308  display += '\n# BKN = ' + str(info['op_counts'][id.BKN])
309  display += '\n# BKR = ' + str(info['op_counts'][id.BKR])
310  display += '\n# ABG = ' + str(info['op_counts'][id.ABG])
311  display += '\n# AEN = ' + str(info['op_counts'][id.AEN])
312  display += '\n# characters = ['
313  display += str(info['char_min'])
314  display += ' - '
315  display += str(info['char_max'])
316  display += ']'
317  if(udt_count):
318  display += ' + user defined'
319  display += '\n#\n'
320  return display
321 
322  def grammar_to_string(lines, source):
323  display = 'def to_string():\n'
324  display += " '''Displays the original SABNF syntax.'''\n"
325  display += ' sabnf = ""\n'
326  for line in lines:
327  display += ' sabnf += "'
328  for i in range(line['index'], line['index'] + line['length']):
329  ch = ord(source[i])
330  if(ch == 9):
331  display += " "
332  elif(ch == 10):
333  display += "\\n"
334  elif(ch == 13):
335  display += "\\r"
336  elif(ch == 34):
337  display += '\\"'
338  elif(ch == 92):
339  display += "\\\\"
340  else:
341  display += source[i]
342  display += '"\n'
343  display += ' return sabnf\n'
344  return display
345 
346  if(not self.grammargrammar):
347  raise Exception('no grammar has been generated')
348  stdout_save = sys.stdout
349  try:
350  sys.stdout = open(fname, 'w')
351  print(grammar_copyright())
352  print(grammar_summary(self.grammargrammar.rules, self.grammargrammar.udts))
353  print('# RULES')
354  print('rules = ', end='')
355  pprint(self.grammargrammar.rules, sort_dicts=False)
356  print()
357  print('# UDTS')
358  print('udts = ', end='')
359  pprint(self.grammargrammar.udts, sort_dicts=False)
360  print()
361  print('has_bkru = ', end='')
362  print(self.grammargrammar.has_bkru)
363  print('has_bkrr = ', end='')
364  print(self.grammargrammar.has_bkrr)
365  print()
366  print()
367  print(grammar_to_string(self.lineslines, self.grammargrammar.source))
368  sys.stdout.close()
369  finally:
370  sys.stdout = stdout_save
371 
372  def display_rule_attributes(self, sort='index'):
373  '''Display the rule attributes.
374  @param sort Determines the order of rule display.
375  - 'index' (default) rules are listed in the order they appear
376  in the grammar
377  - 'type' rules are listed by recursive type
378  @returns Returns a string with the displayed rule attributes.
379  '''
380  if(sort != 'index'):
381  sort = 'type'
382  if(self.attributesattributes):
383  return display_rule_attributes(self.attributesattributes, sort)
384  return 'rule attributes not available'
385 
386  def display_rule_dependencies(self, sort='index'):
387  '''Display the rule dependencies. For each rule R, list both
388  the list of rules that R refers to (both directly and indirectly)
389  and the list of rules that refer to R (both directly and indirectly).
390  @param sort Determines the order of display of the rules, R.
391  - 'index' (default) rules are listed in the order they appear
392  in the grammar
393  - 'alpha' (actually anything but 'index') rules are listed
394  alphabetically
395  @returns Returns a string with the displayed rule dependencies.
396  '''
397  alpha = sort == 'index'
398  if(self.rule_depsrule_deps):
399  return display_deps(
400  self.rule_depsrule_deps,
401  self.rulesrules,
402  self.udtsudts,
403  alpha)
404  return 'rule dependencies not available'
405 
406  def find_line(self, index):
407  '''Find the line number of the line in which the given
408  character occurs.
409  @param index The (zero-based) index of the character in the source.
410  @returns Returns the line number of the specified character.
411  If index is out of range, returns the last line.'''
412  if(len(self.lineslines) == 0):
413  raise Exception(
414  'find_line: No lines - no input (source grammar) defined.')
415  if(index <= 0):
416  return 0
417  if(index >= len(self.inputinput)):
418  return len(self.lineslines) - 1
419  line_no = -1
420  for line in self.lineslines:
421  line_no += 1
422  if(index >= line['index'] and
423  index < line['index'] + line['length']):
424  return line_no
425  raise Exception('find_line: Should never reach here - internal error.')
426 
427  def display_errors(self):
428  '''Display the list of SABNF errors, if any.
429  For each error, lists the line number and relative character offset
430  where the error occurred and a descriptive message.
431  @returns Returns a string of the displayed error messages.'''
432  if(len(self.errorserrors) == 0):
433  return 'no errors'
434  display = ''
435  for error in self.errorserrors:
436  offset = error['index'] - self.lineslines[error['line']]['index']
437  if(offset < 0):
438  offset = 0
439  display += self.display_underlinedisplay_underline(error['line'], offset)
440  display += '\n'
441  display += ('line: %d: offset: %d: %s' %
442  (error['line'], offset, error['msg']))
443  display += '\n'
444  return display
445 
446  def display_grammar(self):
447  '''Displays an annotated version of the SABNF grammar syntax.
448  Each line is preceeded by the line number and character offset
449  of the line.
450  @returns Returns a string of the display text.'''
451  display = ''
452  no = 0
453  source = string_to_tuple(self.sourcesource)
454  for line in self.lineslines:
455  last_index = line['index'] + line['length']
456  text = tuple_to_ascii(source[line['index']: last_index])
457  display += ('%03d: %03d: %s' % (no, line['index'], text))
458  display += '\n'
459  no += 1
460  return display
461 
462  def display_line(self, line_no):
463  '''Displays a line with special characters accounted for.
464  @param line_no The line number to display
465  @returns Returns a string of the displayed line.'''
466  if(line_no >= 0 and line_no < len(self.lineslines)):
467  line = self.lineslines[line_no]
468  last_index = line['index'] + line['length']
469  return tuple_to_ascii(self.inputinput[line['index']:last_index])
470  return 'line_no ' + str(line_no) + ' out of range'
471 
472  def display_underline(self, line_no, index):
473  '''Displays a syntax line, underlined with carrets highlightling
474  error locations.
475  @param line_no The line number to display
476  @param index The index of the character to highlight
477  @returns Returns a string of the displayed line.
478  '''
479  if(line_no >= 0 and line_no < len(self.lineslines)):
480  map = []
481  line = self.lineslines[line_no]
482  last_index = line['index'] + line['length']
483  line_segment = self.inputinput[line['index']:last_index]
484  text = tuple_to_ascii(line_segment, map)
485  text += '\n'
486  text += tuple_to_ascii_underline(map, index)
487  return text
488  return 'line_no ' + str(line_no) + ' out of range'
489 
490  def display_rules(self, sort='index'):
491  '''Display the syntax rules and UDTs, if available.
492  @param sort
493  - 'index'(default) - display rules in the order
494  in which they are defined
495  - 'alpha' - display rules alphabetically
496  '''
497  def sort_rules_alpha(val):
498  return self.rulesrules[val]['lower']
499 
500  def sort_udts_alpha(val):
501  return self.udtsudts[val]['lower']
502 
503  display = ''
504  if(self.rulesrules):
505  rule_count = len(self.rulesrules)
506  rule_range = range(rule_count)
507  udt_count = len(self.udtsudts)
508  udt_range = range(udt_count)
509  irules = [0] * rule_count
510  iudts = [0] * udt_count
511  for i in rule_range:
512  irules[i] = i
513  for i in udt_range:
514  iudts[i] = i
515  if(sort == 'alpha'):
516  irules.sort(key=sort_rules_alpha)
517  if(udt_count):
518  iudts.sort(key=sort_udts_alpha)
519  for i in irules:
520  display += '%03d: %s\n' % (
521  self.rulesrules[i]['index'], self.rulesrules[i]['name'])
522  if(udt_count):
523  display += '\nUDTS\n'
524  for i in iudts:
525  display += '%03d: %s\n' % (
526  self.udtsudts[i]['index'], self.udtsudts[i]['name'])
527  return display
A helper class to keep track of rule and UDT names.
Definition: api.py:66
def __init__(self)
Definition: api.py:72
def get(self, name)
Retrieve a name from the list.
Definition: api.py:95
def add(self, name)
Add a name to the list.
Definition: api.py:75
The API class.
Definition: api.py:61
def display_rules(self, sort='index')
Display the syntax rules and UDTs, if available.
Definition: api.py:490
def __init__(self)
API constructor.
Definition: api.py:109
def display_errors(self)
Display the list of SABNF errors, if any.
Definition: api.py:427
def write_grammar(self, fname)
Write the APG grammar to a file in format for later use by a parser.
Definition: api.py:250
def display_rule_attributes(self, sort='index')
Display the rule attributes.
Definition: api.py:372
def find_line(self, index)
Find the line number of the line in which the given character occurs.
Definition: api.py:406
def display_underline(self, line_no, index)
Displays a syntax line, underlined with carrets highlightling error locations.
Definition: api.py:472
def display_grammar(self)
Displays an annotated version of the SABNF grammar syntax.
Definition: api.py:446
def display_rule_dependencies(self, sort='index')
Display the rule dependencies.
Definition: api.py:386
def display_line(self, line_no)
Displays a line with special characters accounted for.
Definition: api.py:462
def generate(self, source, strict=False, phase='all')
Generate a grammar object from an SABNF grammar syntax.
Definition: api.py:148
Creates a grammar object which can be used by the APG library.
Definition: api.py:28
def __init__(self, rules, udts, source)
Grammar constructor.
Definition: api.py:32
A class for capturing the AST as the parser traverses the parse tree.
Definition: ast.py:69
The Parser class for parsing an APG grammar.
Definition: parser.py:60
def rule_attributes(rules, rule_deps)
Compute the recursive and non-recursive attributes for each rule.
def rule_dependencies(rules, udts, rule_names, udt_names)
Determine the rule dependencies and recursive types for each rule.
def display_deps(rule_deps, rules, udts, alpha=True)
Display the rule dependencies.
def scanner(input, strict)
Scan the input for invalid characters and construct a line catalog for looking up line numbers from c...
Definition: scanner.py:16
def semantic(api)
Translate the AST, generating a list of rule objects, and UDT objects, if any.
Definition: semantic.py:10
def tuple_to_ascii(input, map=None)
Converts a tuple of Unicode values to an ASCII string.
Definition: utilities.py:36
def tuple_to_ascii_underline(map, index)
Uses the (optional) map generated by tuple_to_ascii() to generate a mapping of the display characters...
Definition: utilities.py:85
def string_to_tuple(string)
Converts a string to a tuple of the Unicode values of the string characters.
Definition: utilities.py:8
Python APG, Version 1.0, is licensed under the 2-Clause BSD License,
an Open Source Initiative Approved License.