Version 1.0
Copyright © 2022 Lowell D. Thomas
Python APG
 … an ABNF Parser Generator
csv.py
Go to the documentation of this file.
1 ''' @file examples/exp/csv.py
2 @brief Demonstrates parsing comma separated values.
3 '''
4 import sys
5 import os
6 # add the current working directory to the path
7 # DO NOT MOVE THE FOLLOWING STATEMENT
8 # if using autopep8 formatter, for example, set argument '--ignore=E402'
9 sys.path.append(os.getcwd())
10 from apg_py.exp.exp import ApgExp
11 from apg_py.lib import utilities as utils
12 
13 title = '''This example will demonstrate how to parse the
14 Microsoft format for comma separated values.
15 Two approaches are compared. One is to parse out one value
16 at a time with repeated searches using the global flag.
17 The other is to parse all values out of a line in one fell swoop.
18 Note that some translation is required to get the values
19 from quoted fields.
20 This problem is addressed in Jeffrey Friedl's book
21 "Mastering Regular Expressions", pg. 213
22 You can compare this solution to his discussion there.
23 '''
24 print()
25 print(title)
26 
27 csv_value = '''value = begin-anchor field end-anchor
28 begin-anchor = &&%d44 / %^
29 end-anchor = &%d44 / %$
30 field = quoted / text
31 quoted = %d34 quoted-text %d34
32 quoted-text = *(any-but-quote / double-quote)
33 double-quote = 2%d34
34 any-but-quote = %d32-33 / %d35-126
35 text = *any-but-comma
36 any-but-comma = %d32-43 / %d45-126
37 '''
38 csv_line = '''csv = field *(%d44 field)
39 field = quoted / text
40 quoted = %d34 quoted-text %d34
41 quoted-text = *(any-but-quote / double-quote)
42 double-quote = 2%d34
43 any-but-quote = %d32-33 / %d35-126
44 text = *any-but-comma
45 any-but-comma = %d32-43 / %d45-126
46 '''
47 input = '''Ten Thousand,10000, 2710 ,,"10,000","It's ""10 Grand"", baby",10K'''
48 pat = 'PATTERN'
49 header = 'RESULT'
50 testno = 0
51 
52 # one value at a time
53 exp = ApgExp(csv_value, 'g')
54 testno += 1
55 print('\n' + str(testno) + ') One value at a time.')
56 print(pat)
57 print(csv_value)
58 print('input string: ' + input)
59 print(header)
60 result = exp.exec(input)
61 while(result):
62  if(len(result.match) and result.match[0] == '"'):
63  # strip leading and trailing quotes
64  strip = result.match[1:-1]
65  # replace double quotes
66  strip = strip.replace('""', '"')
67  print(strip)
68  else:
69  print(result.match)
70  result = exp.exec(input)
71 
72 # all values at once
73 exp = ApgExp(csv_line)
74 exp.include(['field'])
75 testno += 1
76 print('\n' + str(testno) + ') All values at once.')
77 print(pat)
78 print(csv_line)
79 print('input string: ' + input)
80 print(header)
81 result = exp.exec(input)
82 for rule in result.rules['field']:
83  match = utils.tuple_to_string(rule)
84  if(len(match) and match[0] == '"'):
85  # strip leading and trailing quotes
86  strip = match[1:-1]
87  # replace double quotes
88  strip = strip.replace('""', '"')
89  print(strip)
90  else:
91  print(match)
The ApgExp class provides a pattern-matching engine similar to JavaScript's RegExp
Definition: exp.py:79
Python APG, Version 1.0, is licensed under the 2-Clause BSD License,
an Open Source Initiative Approved License.