Version 7.0
Copyright © 2021 Lowell D. Thomas
APG
… an ABNF Parser Generator
pppt.c
Go to the documentation of this file.
1 /* *************************************************************************************
2  Copyright (c) 2021, Lowell D. Thomas
3  All rights reserved.
4 
5  This file is part of APG Version 7.0.
6  APG Version 7.0 may be used under the terms of the BSD 2-Clause License.
7 
8  Redistribution and use in source and binary forms, with or without
9  modification, are permitted provided that the following conditions are met:
10 
11  1. Redistributions of source code must retain the above copyright notice, this
12  list of conditions and the following disclaimer.
13 
14  2. Redistributions in binary form must reproduce the above copyright notice,
15  this list of conditions and the following disclaimer in the documentation
16  and/or other materials provided with the distribution.
17 
18  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21  DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24  SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25  CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26  OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 
29 * *************************************************************************************/
42 // sanity check assertion
43 //#define ASSERT_PPPT
44 #ifdef ASSERT_PPPT
45 #include <assert.h>
46 #endif /* ASSERT_PPPT */
47 
48 #include "../api/api.h"
49 #include "../api/apip.h"
50 #include "../api/attributes.h"
51 
52 //#define TRACE_PPPT 1
53 #ifdef TRACE_PPPT
54 #include "../library/parserp.h"
55 #include "../utilities/utilities.h"
56 static aint s_uiTreeDepth = 0;
57 static void vRuleOpen(api* spApi, aint uiRuleIndex);
58 static void vRuleLeaf(api* spApi, aint uiRuleIndex, uint8_t* ucpMap);
59 static void vRuleClose(api* spApi, aint uiRuleIndex);
60 static void vOpcodeOpen(api* spApi, api_op* spOp);
61 static void vOpcodeClose(api* spApi, api_op* spOp);
62 static void vIndent(aint uiIndent);
63 static void vPrintMap(api* spApi, uint8_t* ucpMap);
64 #define TRACE_RULE_OPEN(n) vRuleOpen(spApi, (n))
65 #define TRACE_RULE_LEAF(n, m) vRuleLeaf(spApi, (n), (m))
66 #define TRACE_RULE_CLOSE(n) vRuleClose(spApi, (n))
67 #define TRACE_OPCODE_OPEN(o) vOpcodeOpen(spApi, (o))
68 #define TRACE_OPCODE_CLOSE(o) vOpcodeClose(spApi, (o))
69 #else
70 #define TRACE_RULE_OPEN(n)
71 #define TRACE_RULE_LEAF(n, m)
72 #define TRACE_RULE_CLOSE(n)
73 #define TRACE_OPCODE_OPEN(o)
74 #define TRACE_OPCODE_CLOSE(o)
75 #endif /* TRACE_PPPT */
76 
77 static void vSetMapValGen(uint8_t* ucpMap, luint luiOffset, luint luiChar, uint8_t ucVal);
78 static uint8_t ucGetMapValGen(api* spApi, uint8_t* ucpMap, luint luiOffset, luint luiChar);
79 static void vGetMaps(api* spApi);
80 static int iCompOps(const void* vpL, const void* vpR);
81 static int iCompName(const void* vpL, const void* vpR);
82 static int iNameInsensitiveCompare(char* cpL, char* cpR);
83 static int iMatchRule(api_rule* spRule, aint uiRuleCount, char* cpName);
84 static void vCopyMap(uint8_t* ucpDst, uint8_t* ucpSrc, aint uiLen);
85 static void vClearMap(uint8_t* ucpMap, aint uiLen);
86 static void vRuleMap(api* spApi, aint uiRuleIndex, uint8_t* ucpMap);
87 static void vAltMap(api* spApi, api_op* spOp, uint8_t* ucpMap);
88 static void vCatMap(api* spApi, api_op* spOp, uint8_t* ucpMap);
89 static void vRepMap(api* spApi, api_op* spOp, uint8_t* ucpMap);
90 static void vOpcodeMap(api* spApi, api_op* spOp, uint8_t* ucpMap);
91 
101 void vApiPppt(void* vpCtx, char** cppProtectedRules, aint uiProtectedRules){
102  if(!bApiValidate(vpCtx)){
103  vExContext();
104  }
105  api* spApi = (api*) vpCtx;
106  aint ui;
107  api_op* spOp = NULL;
108  if (!spApi->bSemanticsValid) {
109  XTHROW(spApi->spException,
110  "attempted PPPT construction but opcodes (vApiOpcodes()) have not been constructed");
111  }
112 
113  // PPPT sizes computed in semantics - vApiOpcodes()
114  // test to see if the maps are impossibly large
115  if(spApi->luiAcharMax == (luint)-1){
116  XTHROW(spApi->spException, "Partially-Predictive Parsing Tables cannot be used for this grammar. The maximum character is too large - 0xFFFFFFFFFFFFFFFF");
117  }
118  if(cppProtectedRules && uiProtectedRules){
119  // protect all rules in protection list
120  api_rule saRules[spApi->uiRuleCount];
121  memcpy((void*)saRules, (void*)spApi->spRules, sizeof(saRules));
122  qsort((void*)saRules, (size_t)spApi->uiRuleCount, sizeof(api_rule), iCompName);
123  for(ui = 0; ui < uiProtectedRules; ui++){
124  int iIndex = iMatchRule(saRules, spApi->uiRuleCount, cppProtectedRules[ui]);
125  if(iIndex == -1){
126  char caBuf[256];
127  snprintf(caBuf, 256, "PPPT protected rules: %s is not a valid rule name", cppProtectedRules[ui]);
128  XTHROW(spApi->spException, caBuf);
129  }else{
130  spApi->spRules[saRules[iIndex].uiIndex].bProtected = APG_TRUE;
131  }
132  }
133  if(uiMsgsCount(spApi->vpLog)){
134  XTHROW(spApi->spException, "PPPT protected rules have invalid rule names");
135  }
136  }
137 
138  // allocate and create the character, empty and undecided maps
139  spApi->ucpPpptUndecidedMap = (uint8_t*)vpMemAlloc(spApi->vpMem, spApi->luiPpptMapSize * sizeof(uint8_t));
140  memset((void*)spApi->ucpPpptUndecidedMap, ID_PPPT_ACTIVE, spApi->luiPpptMapSize * sizeof(uint8_t));
141  spApi->ucpPpptEmptyMap = (uint8_t*)vpMemAlloc(spApi->vpMem, spApi->luiPpptMapSize * sizeof(uint8_t));
142  memset((void*)spApi->ucpPpptEmptyMap, 0, spApi->luiPpptMapSize * sizeof(uint8_t));
143 
144  luint lu;
145  spOp = spApi->spOpcodes;
146  for(ui = 0; ui < spApi->uiOpcodeCount; ui++, spOp++){
147  if(spOp->uiId == ID_TRG) {
148  lu = spOp->luiMin;
149  for(; lu <= spOp->luiMax; lu++){
150  vSetMapValGen(spApi->ucpPpptEmptyMap, spApi->luiAcharMin, lu, ID_PPPT_EMPTY);
151  }
152  }else if(spOp->uiId == ID_TBS){
153  vSetMapValGen(spApi->ucpPpptEmptyMap, spApi->luiAcharMin, *spOp->luipAchar, ID_PPPT_EMPTY);
154  }else if(spOp->uiId == ID_TLS){
155  if(spOp->uiAcharLength){
156  vSetMapValGen(spApi->ucpPpptEmptyMap, spApi->luiAcharMin, *spOp->luipAchar, ID_PPPT_EMPTY);
157  if(*spOp->luipAchar >= 97 && *spOp->luipAchar <= 122){
158  vSetMapValGen(spApi->ucpPpptEmptyMap, spApi->luiAcharMin, (*spOp->luipAchar - 32), ID_PPPT_EMPTY);
159  }
160  }
161  }
162  }
163  vSetMapValGen(spApi->ucpPpptEmptyMap, spApi->luiAcharMin, spApi->luiAcharEos, ID_PPPT_EMPTY);
164 
165  // !!!! DEBUG
166 #ifdef TRACE_PPPT
167  printf("\n");
168  printf("CHARACTER MAP\n");
169  printf("character | char val | undec val\n");
170  printf("----------|----------|----------\n");
171  uint8_t uc;
172  for(lu = spApi->luiAcharMin; lu <= spApi->luiAcharMax; lu++){
173  uc =0;
174  printf("%9"PRIuMAX" | %8"PRIuMAX" | ", (luint)lu, (luint)uc);
175  uc = ucGetMapValGen(spApi, spApi->ucpPpptUndecidedMap, spApi->luiAcharMin, lu);
176  printf("%9"PRIuMAX"\n", (luint)uc);
177  }
178  uc =0;
179  printf("%9s | %8"PRIuMAX" | ", "EOS", (luint)uc);
180  uc = ucGetMapValGen(spApi, spApi->ucpPpptUndecidedMap, spApi->luiAcharMin, spApi->luiAcharEos);
181  printf("%9"PRIuMAX"\n", (luint)uc);
182 #endif /* TRACE_PPPT */
183  // !!!! DEBUG
184 
185 
186  // set the opcode map indexes
187  aint uiIndex = 0;
188  aint uiMapSize = spApi->luiPpptMapSize;
189  for(ui = 0; ui < spApi->uiRuleCount; ui++){
190  spApi->spRules[ui].uiPpptIndex = uiIndex++ * uiMapSize;
191  }
192  spOp = spApi->spOpcodes;
193  for(ui = 0; ui < spApi->uiOpcodeCount; ui++, spOp++){
194  switch (spOp->uiId) {
195  case ID_RNM:
196  spOp->uiPpptIndex = spApi->spRules[spOp->uiIndex].uiPpptIndex;
197  break;
198  case ID_ALT:
199  case ID_CAT:
200  case ID_REP:
201  case ID_TRG:
202  case ID_TLS:
203  case ID_TBS:
204  case ID_AND:
205  case ID_NOT:
206  spOp->uiPpptIndex = uiIndex++ * uiMapSize;
207  break;
208  // these opcodes have no PPPT map
209  // we can't predict what the user will do in a UDT
210  case ID_UDT:
211  // case insensitivity BKR it is possible for BKR to accept characters outside of AcharMin and AcharMax
212  case ID_BKR:
213  // look behind is iterative and impossible (or extremely difficult) to determine a PPPT map
214  case ID_BKA:
215  case ID_BKN:
216  // anchor opcodes only examine the character position, not the character value
217  case ID_ABG:
218  case ID_AEN:
219  break;
220  default:
221  XTHROW(spApi->spException, "unrecognized operator ID");
222  break;
223  }
224  }
225 
226  // allocate the PPPT table
227  spApi->ucpPpptTable = (uint8_t*)vpMemAlloc(spApi->vpMem, spApi->luiPpptTableLength);
228  memset((void*)spApi->ucpPpptTable, 0, spApi->luiPpptTableLength);
229 
230  // compute all maps in the PPPT table
231  vGetMaps(spApi);
232 
233  // success
234  spApi->bUsePppt = APG_TRUE;
235 }
236 
246 void vApiPpptSize(void *vpCtx, pppt_size* spSize){
247  if(!bApiValidate(vpCtx)){
248  vExContext();
249  }
250  api* spApi = (api*) vpCtx;
251  if (!spApi->bSemanticsValid) {
252  XTHROW(spApi->spException, "this function may not be called prior to vApiOpcodes()");
253  }
254  if(!spSize){
255  XTHROW(spApi->spException, "size pointer, spSize, may not be NULL");
256  }
257  spSize->luiAcharMax = spApi->luiAcharMax;
258  spSize->luiAcharMin = spApi->luiAcharMin;
259  spSize->luiMapSize = spApi->luiPpptMapSize;
260  spSize->luiMaps = spApi->luiPpptMapCount;
261  spSize->luiTableSize = spApi->luiPpptTableLength;
262 }
263 
264 static void vSetMapValGen(uint8_t* ucpMap, luint luiOffset, luint luiChar, uint8_t ucVal){
265  ucpMap[luiChar - luiOffset] = ucVal;
266 // luint luiRelChar = luiChar - luiOffset;
267 // luint luiMapIndex = luiRelChar >> 2;
268 // luint luiValShift = (3 - (luiRelChar - (luiMapIndex << 2))) << 1;
269 // ucpMap[luiMapIndex] &= ~(3 << luiValShift);
270 // ucpMap[luiMapIndex] |= ucVal << luiValShift;
271 }
272 //static uint8_t s_ucGetMask[4] = {0xC0, 0x30, 0xC,0x3};
273 //static uint8_t s_ucGetShift[4] = {6,4,2,0};
274 static uint8_t ucGetMapValGen(api* spApi, uint8_t* ucpMap, luint luiOffset, luint luiChar){
275  // sanity check
276  if((luiChar < luiOffset) || ((luiChar - luiOffset) >= (luint)spApi->luiPpptMapSize)){
277  XTHROW(spApi->spException, "bad character value");
278  }
279  return ucpMap[luiChar - luiOffset];
280 // luint luiRelChar = luiChar - luiOffset;
281 // luint luiMapIndex = luiRelChar >> 2;
282 // uint8_t ucByteIndex = (uint8_t)(luiRelChar - (luiMapIndex << 2));
283 // return (ucpMap[luiMapIndex] & s_ucGetMask[ucByteIndex]) >> s_ucGetShift[ucByteIndex];
284 }
285 
286 static void vCopyMap(uint8_t* ucpDst, uint8_t* ucpSrc, aint uiLen){
287  uint8_t* ucpEnd = ucpDst + uiLen;
288  while(ucpDst < ucpEnd){
289  *ucpDst++ = *ucpSrc++;
290  }
291 }
292 static void vClearMap(uint8_t* ucpMap, aint uiLen){
293  memset((void*)ucpMap, 0, sizeof(uint8_t) * uiLen);
294 }
295 
296 static void vGetMaps(api* spApi){
297  aint ui;
298  uint8_t ucaMap[spApi->luiPpptMapSize];
299  api_rule saRules[spApi->uiRuleCount];
300  memcpy((void*)saRules, (void*)spApi->spRules, sizeof(saRules));
301 
302  // order the rules ascending on their opcode count - want to process the smallest rules first.
303  qsort((void*)saRules, (size_t)spApi->uiRuleCount, sizeof(api_rule), iCompName);
304  qsort((void*)saRules, (size_t)spApi->uiRuleCount, sizeof(api_rule), iCompOps);
305 
306 #ifdef TRACE_PPPT
307  api_rule* spRule = saRules;
308  printf("opcode count | name\n");
309  printf("------------ | ----\n");
310  for(ui = 0; ui < spApi->uiRuleCount; ui++, spRule++){
311  printf("%12lu | %s\n", (luint)spRule->uiOpCount, spRule->cpName);
312  }
313 #endif /* TRACE_PPPT */
314 
315  // compute PPPTs for all opcodes of all rules
316  for(ui = 0; ui < spApi->uiRuleCount; ui++){
317  vRuleMap(spApi, saRules[ui].uiIndex, ucaMap);
318  }
319 }
320 
321 static void vRuleMap(api* spApi, aint uiRuleIndex, uint8_t* ucpMap){
322  TRACE_RULE_OPEN(uiRuleIndex);
323  uint8_t* ucpRuleMap = spApi->ucpPpptTable + spApi->spRules[uiRuleIndex].uiPpptIndex;
324  if(spApi->spRules[uiRuleIndex].bIsComplete){
325  vCopyMap(ucpMap, ucpRuleMap, spApi->luiPpptMapSize);
326  TRACE_RULE_CLOSE(uiRuleIndex);
327  }else if(spApi->spRules[uiRuleIndex].bIsOpen){
328  memcpy((void*)ucpMap, (void*)spApi->ucpPpptUndecidedMap, (sizeof(uint8_t) * spApi->luiPpptMapSize));
329  TRACE_RULE_LEAF(uiRuleIndex, ucpMap);
330  }else{
331  spApi->spRules[uiRuleIndex].bIsOpen = APG_TRUE;
332  api_op* spOp = &spApi->spOpcodes[spApi->spRules[uiRuleIndex].uiOpOffset];
333  vOpcodeMap(spApi, spOp, ucpMap);
334  if(spApi->spRules[uiRuleIndex].bProtected){
335  memcpy((void*)ucpRuleMap, (void*)spApi->ucpPpptUndecidedMap, (sizeof(uint8_t) * spApi->luiPpptMapSize));
336  }else{
337  memcpy((void*)ucpRuleMap, (void*)ucpMap, (sizeof(uint8_t) * spApi->luiPpptMapSize));
338  }
339  vCopyMap(ucpMap, ucpRuleMap, spApi->luiPpptMapSize);
340  spApi->spRules[uiRuleIndex].bIsComplete = APG_TRUE;
341  spApi->spRules[uiRuleIndex].bIsOpen = APG_FALSE;
342  TRACE_RULE_CLOSE(uiRuleIndex);
343  }
344 }
345 
351 static void vAltMap(api* spApi, api_op* spOp, uint8_t* ucpMap){
352  aint ui;
353  luint lu;
354  uint8_t ucChildVal;
355  aint uiCount = spOp->uiChildCount;
356  api_op* spChildOp;
357  uint8_t ucaChildren[spApi->luiPpptMapSize * spOp->uiChildCount];
358  uint8_t* ucpChild = ucaChildren;
359  for (ui = 0; ui < uiCount; ui++, ucpChild += spApi->luiPpptMapSize) {
360  spChildOp = &spApi->spOpcodes[spOp->uipChildIndex[ui]];
361  vOpcodeMap(spApi, spChildOp, ucpChild);
362  }
363  for(lu = spApi->luiAcharMin; lu <= spApi->luiAcharEos; lu++){
364  ucpChild = ucaChildren;
365  for (ui = 0; ui < uiCount; ui++, ucpChild += spApi->luiPpptMapSize) {
366  ucChildVal = ucGetMapValGen(spApi, ucpChild, spApi->luiAcharMin, lu);
367  if(ucChildVal != ID_PPPT_NOMATCH){
368  vSetMapValGen(ucpMap, spApi->luiAcharMin, lu, ucChildVal);
369  break;
370  }
371  }
372  }
373 }
374 
380 static void vCatMap(api* spApi, api_op* spOp, uint8_t* ucpMap){
381  luint lu;
382  aint ui;
383  api_op* spChildOp;
384  uint8_t ucChildVal;
385  aint uiCount = spOp->uiChildCount;
386  uint8_t ucaChildren[spApi->luiPpptMapSize * spOp->uiChildCount];
387  uint8_t* ucpChild = ucaChildren;
388  // get the maps for all children
389  for (ui = 0; ui < uiCount; ui++, ucpChild += spApi->luiPpptMapSize) {
390  spChildOp = &spApi->spOpcodes[spOp->uipChildIndex[ui]];
391  vOpcodeMap(spApi, spChildOp, ucpChild);
392  }
393  for(lu = spApi->luiAcharMin; lu <= spApi->luiAcharEos; lu++){
394  // only use the first child to evaluate CAT map
395  ucChildVal = ucGetMapValGen(spApi, ucaChildren, spApi->luiAcharMin, lu);
396  if(ucChildVal != ID_PPPT_NOMATCH){
397  vSetMapValGen(ucpMap, spApi->luiAcharMin, lu, ID_PPPT_ACTIVE);
398  }
399  }
400 }
401 static void vRepMap(api* spApi, api_op* spOp, uint8_t* ucpMap){
402  luint lu;
403  uint8_t ucChildVal;
404  uint8_t ucaChildMap[spApi->luiPpptMapSize];
405  vOpcodeMap(spApi, (spOp + 1), ucaChildMap);
406  for(lu = spApi->luiAcharMin; lu <= spApi->luiAcharEos; lu++){
407  ucChildVal = ucGetMapValGen(spApi, ucaChildMap, spApi->luiAcharMin, lu);
408  if(ucChildVal == ID_PPPT_EMPTY){
409  vSetMapValGen(ucpMap, spApi->luiAcharMin, lu, ID_PPPT_EMPTY);
410  }else if(ucChildVal == ID_PPPT_NOMATCH){
411  if(spOp->luiMin == 0){
412  vSetMapValGen(ucpMap, spApi->luiAcharMin, lu, ID_PPPT_EMPTY);
413  }else{
414  vSetMapValGen(ucpMap, spApi->luiAcharMin, lu, ID_PPPT_NOMATCH);
415  }
416  }else{
417  vSetMapValGen(ucpMap, spApi->luiAcharMin, lu, ID_PPPT_ACTIVE);
418  }
419  }
420 }
421 static void vOpcodeMap(api* spApi, api_op* spOp, uint8_t* ucpMap){
422  TRACE_OPCODE_OPEN(spOp);
423  luint lu;
424  vClearMap(ucpMap, spApi->luiPpptMapSize);
425  switch (spOp->uiId) {
426  case ID_ALT:
427  vAltMap(spApi, spOp, ucpMap);
428  break;
429  case ID_CAT:
430  vCatMap(spApi, spOp, ucpMap);
431  break;
432  case ID_REP:
433  vRepMap(spApi, spOp, ucpMap);
434  break;
435  case ID_RNM:
436  vRuleMap(spApi, spOp->uiIndex, ucpMap);
437  break;
438  case ID_AND:
439  vOpcodeMap(spApi, (spOp + 1), ucpMap);
440  for(lu = spApi->luiAcharMin; lu <= spApi->luiAcharEos; lu++){
441  uint8_t ucVal = ucGetMapValGen(spApi, ucpMap, spApi->luiAcharMin, lu);
442  if(ucVal == ID_PPPT_MATCH){
443  vSetMapValGen(ucpMap, spApi->luiAcharMin, lu, ID_PPPT_EMPTY);
444  }
445  }
446  break;
447  case ID_NOT:
448  vOpcodeMap(spApi, (spOp + 1), ucpMap);
449  // reverse NOMATCH/MATCH
450  for(lu = spApi->luiAcharMin; lu <= spApi->luiAcharEos; lu++){
451  uint8_t ucVal = ucGetMapValGen(spApi, ucpMap, spApi->luiAcharMin, lu);
452  if(ucVal == ID_PPPT_MATCH){
453  vSetMapValGen(ucpMap, spApi->luiAcharMin, lu, ID_PPPT_NOMATCH);
454  }else if(ucVal == ID_PPPT_NOMATCH){
455  vSetMapValGen(ucpMap, spApi->luiAcharMin, lu, ID_PPPT_EMPTY);
456  }
457  }
458  break;
459  case ID_TLS:
460 #ifdef ASSERT_PPPT
461  if(*spOp->luipAchar >= 65 && *spOp->luipAchar <= 90){
462  assert(APG_FALSE);
463  }
464 #endif /* ASSERT_PPPT */
465  if(spOp->uiAcharLength > 1){
466  vSetMapValGen(ucpMap, spApi->luiAcharMin, *spOp->luipAchar, ID_PPPT_ACTIVE);
467  if(*spOp->luipAchar >= 97 && *spOp->luipAchar <= 122){
468  vSetMapValGen(ucpMap, spApi->luiAcharMin, (*spOp->luipAchar - 32), ID_PPPT_ACTIVE);
469  }
470  }else if(spOp->uiAcharLength == 1){
471  vSetMapValGen(ucpMap, spApi->luiAcharMin, *spOp->luipAchar, ID_PPPT_MATCH);
472  if(*spOp->luipAchar >= 97 && *spOp->luipAchar <= 122){
473  vSetMapValGen(ucpMap, spApi->luiAcharMin, (*spOp->luipAchar - 32), ID_PPPT_MATCH);
474  }
475  }else {
476  vCopyMap(ucpMap, spApi->ucpPpptEmptyMap, spApi->luiPpptMapSize);
477  }
478  break;
479  case ID_TBS:
480  if(spOp->uiAcharLength > 1){
481  vSetMapValGen(ucpMap, spApi->luiAcharMin, *spOp->luipAchar, ID_PPPT_ACTIVE);
482  }else{
483  vSetMapValGen(ucpMap, spApi->luiAcharMin, *spOp->luipAchar, ID_PPPT_MATCH);
484  }
485  break;
486  case ID_TRG:
487  for(lu = spOp->luiMin; lu <= spOp->luiMax; lu++){
488  vSetMapValGen(ucpMap, spApi->luiAcharMin, lu, ID_PPPT_MATCH);
489  }
490  break;
491  // these opcodes have no PPPT map, so pass undecided (ACTIVE) up to the parent
492  case ID_ABG:
493  case ID_AEN:
494  case ID_BKR:
495  case ID_BKA:
496  case ID_UDT:
497  case ID_BKN:
498  vCopyMap(ucpMap, spApi->ucpPpptUndecidedMap, spApi->luiPpptMapSize);
499  break;
500  default:
501  XTHROW(spApi->spException, "unrecognized operator ID");
502  break;
503  }
504  switch (spOp->uiId) {
505  case ID_ALT:
506  case ID_CAT:
507  case ID_REP:
508  case ID_AND:
509  case ID_NOT:
510  case ID_TLS:
511  case ID_TBS:
512  case ID_TRG:
513  // copy this result to the proper opcode index in the PPPT table
514  vCopyMap((spApi->ucpPpptTable + spOp->uiPpptIndex), ucpMap, spApi->luiPpptMapSize);
515 #ifdef ASSERT_PPPT
516  assert(((luint)spOp->uiPpptIndex + spApi->luiPpptMapSize) <= spApi->luiPpptTableLength);
517 #endif /* ASSERT_PPPT */
518  break;
519  case ID_RNM: // this map is saved in vRuleMap()
520  case ID_UDT: // no way to predict what characters the user will accept in a UDT
521  case ID_BKR: // case insensitivity makes it possible for back reference to accept characters outside of min and max
522  case ID_BKA: // the look-behind algorithm is iterative - no way to predict its behavior - always ACTIVE
523  case ID_BKN: // the look-behind algorithm is iterative - no way to predict its behavior - always ACTIVE
524  case ID_ABG: // looks only at the character position, not the character itself
525  case ID_AEN: // looks only at the character position, not the character itself
526  // no map for these opcodes
527  break;
528  }
529  TRACE_OPCODE_CLOSE(spOp);
530 }
531 static int iCompOps(const void* vpL, const void* vpR){
532  api_rule* spRuleL = (api_rule*)vpL;
533  api_rule* spRuleR = (api_rule*)vpR;
534  if(spRuleL->uiOpCount < spRuleR->uiOpCount){
535  return -1;
536  }
537  if(spRuleL->uiOpCount > spRuleR->uiOpCount){
538  return 1;
539  }
540  return 0;
541 }
542 static int iNameInsensitiveCompare(char* cpL, char* cpR){
543  aint uiLenL = strlen(cpL);
544  aint uiLenR = strlen(cpR);
545  char l, r;
546  aint uiLesser = uiLenL < uiLenR ? uiLenL : uiLenR;
547  while (uiLesser--) {
548  l = *cpL;
549  if (l >= 65 && l <= 90) {
550  l += 32;
551  }
552  r = *cpR;
553  if (r >= 65 && r <= 90) {
554  r += 32;
555  }
556  if (l < r) {
557  return -1;
558  }
559  if (l > r) {
560  return 1;
561  }
562  cpL++;
563  cpR++;
564  }
565  if (uiLenL < uiLenR) {
566  return -1;
567  }
568  if (uiLenL > uiLenR) {
569  return 1;
570  }
571  return 0;
572 }
573 static int iCompName(const void* vpL, const void* vpR) {
574  api_rule* spL = (api_rule*) vpL;
575  api_rule* spR = (api_rule*) vpR;
576  return iNameInsensitiveCompare(spL->cpName, spR->cpName);
577 }
578 
579 static int iMatchRule(api_rule* spRule, aint uiRuleCount, char* cpName){
580  // Bottenbruch binary search: https://en.wikipedia.org/wiki/Binary_search_algorithm
581  int iL, iR, iM;
582  int iComp;
583  iL = 0;
584  iR = uiRuleCount - 1;
585  while(iL <= iR){
586  iM = ((iR - iL) / 2 + iL);
587  iComp = iNameInsensitiveCompare(spRule[iM].cpName, cpName);
588  if(iComp > 0){
589  iR = iM - 1;
590  }else if(iComp < 0){
591  iL = iM + 1;
592  }else{
593  return iM;
594  }
595  }
596  return -1;
597 }
598 
599 #ifdef TRACE_PPPT
600 static const char* cpMapVal(uint8_t ucVal){
601  static char* caVal[5] = {"N", "M", "E", "A", "U"};
602 // static char* caVal[5] = {"n", "m", "e", "a", "U"};
603  if(ucVal < 4){
604  return caVal[ucVal];
605  }
606  return caVal[4];
607 }
608 static void vPrintMap(api* spApi, uint8_t* ucpMap){
609  luint lu;
610  uint8_t ucVal;
611  for(lu = spApi->luiAcharMin; lu <= spApi->luiAcharEos; lu++){
612  ucVal = ucGetMapValGen(spApi, ucpMap, spApi->luiAcharMin, lu);
613  printf(" %"PRIuMAX"%s", lu, cpMapVal(ucVal));
614 // if(ucVal == ID_PPPT_NOMATCH || ucVal == ID_PPPT_MATCH){
615 // printf(" %"PRIuMAX"%s", lu, cpMapVal(ucVal));
616 // }
617  }
618  printf("\n");
619 }
620 static void vRuleOpen(api* spApi, aint uiRuleIndex) {
621  vIndent(s_uiTreeDepth);
622  printf("%s: open\n", spApi->spRules[uiRuleIndex].cpName);
623  s_uiTreeDepth++;
624 }
625 static void vRuleLeaf(api* spApi, aint uiRuleIndex, uint8_t* ucpMap) {
626  s_uiTreeDepth--;
627  vIndent(s_uiTreeDepth);
628  printf("%s: leaf", spApi->spRules[uiRuleIndex].cpName);
629  vPrintMap(spApi, ucpMap);
630 }
631 static void vRuleClose(api* spApi, aint uiRuleIndex) {
632  s_uiTreeDepth--;
633  vIndent(s_uiTreeDepth);
634  printf("%s:", spApi->spRules[uiRuleIndex].cpName);
635  uint8_t* ucpMap = spApi->ucpPpptTable + spApi->spRules[uiRuleIndex].uiPpptIndex;
636  vPrintMap(spApi, ucpMap);
637 }
638 static void vOpcodeOpen(api* spApi, api_op* spOp) {
639  vIndent(s_uiTreeDepth);
640  printf("%s: open\n", cpUtilOpName(spOp->uiId));
641  s_uiTreeDepth++;
642 }
643 static void vOpcodeClose(api* spApi, api_op* spOp) {
644  s_uiTreeDepth--;
645  vIndent(s_uiTreeDepth);
646  printf("%s:", cpUtilOpName(spOp->uiId));
647  uint8_t* ucpMap = spApi->ucpPpptTable + spOp->uiPpptIndex;
648  vPrintMap(spApi, ucpMap);
649 }
650 static void vIndent(aint uiIndent) {
651  while (uiIndent--) {
652  printf(".");
653  }
654 }
655 #endif /* TRACE_PPPT */
656 
api::ucpPpptEmptyMap
uint8_t * ucpPpptEmptyMap
Common PPPT character map for an operator that is an empty match on the next alphabet character.
Definition: apip.h:169
bApiValidate
abool bApiValidate(void *vpCtx)
Validates an API context pointer.
Definition: api.c:104
api_op::uiPpptIndex
aint uiPpptIndex
Index to the PPPT map for this opcode.
Definition: apip.h:91
api::ucpPpptUndecidedMap
uint8_t * ucpPpptUndecidedMap
Common PPPT character map for an operator that is indeterminate on the next alphabet character.
Definition: apip.h:168
api::spRules
api_rule * spRules
Points to an array of rule structures.
Definition: apip.h:145
vApiPpptSize
void vApiPpptSize(void *vpCtx, pppt_size *spSize)
Compute the size of the PPPT maps and the number of bytes for the entire table.
Definition: pppt.c:246
api_op::luiMax
luint luiMax
maximum value for REP and TRG opcodes
Definition: apip.h:85
ID_RNM
#define ID_RNM
rule name
Definition: parser.h:46
api::uiOpcodeCount
aint uiOpcodeCount
Number of opcodes.
Definition: apip.h:163
api_rule::uiPpptIndex
aint uiPpptIndex
Index to the PPPT map for this opcode.
Definition: apip.h:62
api::spOpcodes
api_op * spOpcodes
Pointer to the array of opcodes for the SANF grammar.
Definition: apip.h:162
api_op::uiAcharLength
aint uiAcharLength
number of characters in TLS/TBS strings
Definition: apip.h:87
api::luiAcharEos
luint luiAcharEos
The special End-Of-String character. In practice, luiAcharMax + 1.
Definition: apip.h:176
api::vpLog
void * vpLog
A msglog context for error reporting.
Definition: apip.h:179
uiMsgsCount
aint uiMsgsCount(void *vpCtx)
Get the number of logged messages.
Definition: msglog.c:213
ID_ALT
#define ID_ALT
alternation
Definition: parser.h:43
cpUtilOpName
const char * cpUtilOpName(aint uiId)
Convert an opcode identifier to a human-readable opcode name.
Definition: utilities.c:659
ID_BKR
#define ID_BKR
back reference to a previously matched rule or UDT name
Definition: parser.h:58
ID_UDT
#define ID_UDT
user-defined terminal
Definition: parser.h:55
api_rule
API information about each rule.
Definition: apip.h:55
vExContext
void vExContext()
Handles bad context pointers.
Definition: exception.c:126
api_op::uiChildCount
aint uiChildCount
number of children for this ALT or CAT operator
Definition: apip.h:83
pppt_size::luiMapSize
luint luiMapSize
The size, in bytes, of a single PPPT table entry (map).
Definition: api.h:86
TRACE_RULE_LEAF
#define TRACE_RULE_LEAF(n, m)
Definition: pppt.c:71
ID_PPPT_NOMATCH
#define ID_PPPT_NOMATCH
deterministic NOMATCH – there is no chance of a phrase match with this leading character
Definition: parser.h:81
pppt_size::luiAcharMax
luint luiAcharMax
The maximum character size in the grammar alphabet.
Definition: api.h:85
ID_NOT
#define ID_NOT
negative look ahead
Definition: parser.h:57
ID_AND
#define ID_AND
positive look ahead
Definition: parser.h:56
api_op::luipAchar
luint * luipAchar
pointer to the first character in the achar table for this TLS/TBS operator
Definition: apip.h:86
XTHROW
#define XTHROW(ctx, msg)
Exception throw macro.
Definition: exception.h:67
api_op::luiMin
luint luiMin
minimum value for REP and TRG opcodes
Definition: apip.h:84
api::uiRuleCount
aint uiRuleCount
The number of rules in the SABNF grammar and in the array.
Definition: apip.h:146
aint
uint_fast32_t aint
The APG parser's unsigned integer type.
Definition: apg.h:79
api_rule::bProtected
abool bProtected
if true, this rule will be protected from being hidden under a fully-predictive node in the parse tre...
Definition: apip.h:63
pppt_size::luiMaps
luint luiMaps
The number of maps needed.
Definition: api.h:87
ID_CAT
#define ID_CAT
concatenation
Definition: parser.h:44
vpMemAlloc
void * vpMemAlloc(void *vpCtx, aint uiBytes)
Allocates memory.
Definition: memory.c:196
ID_PPPT_MATCH
#define ID_PPPT_MATCH
deterministic MATCH – this character constitutes a single character phrase match of length 1
Definition: parser.h:82
ID_TRG
#define ID_TRG
terminal range
Definition: parser.h:47
ID_REP
#define ID_REP
repetition
Definition: parser.h:45
TRACE_OPCODE_OPEN
#define TRACE_OPCODE_OPEN(o)
Definition: pppt.c:73
api::luiPpptMapCount
luint luiPpptMapCount
The number of operator maps in the table.
Definition: apip.h:172
TRACE_RULE_CLOSE
#define TRACE_RULE_CLOSE(n)
Definition: pppt.c:72
pppt_size::luiTableSize
luint luiTableSize
The memory requirement, in bytes, of the full table.
Definition: api.h:88
TRACE_OPCODE_CLOSE
#define TRACE_OPCODE_CLOSE(o)
Definition: pppt.c:74
api_op::uiIndex
aint uiIndex
index of this referenced rule or UDT
Definition: apip.h:80
api
The API context.
Definition: apip.h:123
luint
uintmax_t luint
luint is used to cast integers suitable for the %"PRIuMAX" printf format.
Definition: apg.h:133
api_rule::bIsOpen
abool bIsOpen
used for walking the SEST, set to true at the root it will tell when a recursive instance of the rule...
Definition: apip.h:60
api_op
API information about each opcode.
Definition: apip.h:78
ID_BKA
#define ID_BKA
positive look behind
Definition: parser.h:59
pppt_size
Size information for the **P**artially-**P**redictive **P**arsing **T**ables (PPPT) data.
Definition: api.h:83
vApiPppt
void vApiPppt(void *vpCtx, char **cppProtectedRules, aint uiProtectedRules)
Compute the Partially-Predictive Parsing Tables.
Definition: pppt.c:101
api::luiPpptMapSize
luint luiPpptMapSize
The size, in bytes, of a single operator map.
Definition: apip.h:173
api::ucpPpptTable
uint8_t * ucpPpptTable
Pointer to the PPPT table of operator maps.
Definition: apip.h:170
api_op::uiId
aint uiId
type of opcode, ID_ALT, etc.
Definition: apip.h:79
ID_TLS
#define ID_TLS
terminal literal string
Definition: parser.h:49
pppt_size::luiAcharMin
luint luiAcharMin
The minimum character size in the grammar alphabet.
Definition: api.h:84
APG_TRUE
#define APG_TRUE
Definition: apg.h:291
api::bUsePppt
abool bUsePppt
True of PPPT are being used.
Definition: apip.h:167
ID_PPPT_EMPTY
#define ID_PPPT_EMPTY
deterministic EMTPY – this is an empty string match, the parse succeeds but the phrase length is 0
Definition: parser.h:83
ID_BKN
#define ID_BKN
negative look behind
Definition: parser.h:60
api_rule::cpName
char * cpName
pointer to null-terminated string in the string table
Definition: apip.h:56
api::luiAcharMin
luint luiAcharMin
The minimum alphabet character referenced by the terminal nodes, TLS, TBL & TRG.
Definition: apip.h:174
api::vpMem
void * vpMem
Pointer to the memory context used for all memory allocations and exceptions thrown.
Definition: apip.h:126
api::luiAcharMax
luint luiAcharMax
The maximum alphabet character referenced by the terminal nodes, TLS, TBL & TRG.
Definition: apip.h:175
TRACE_RULE_OPEN
#define TRACE_RULE_OPEN(n)
Definition: pppt.c:70
api_op::uipChildIndex
aint * uipChildIndex
pointer to the first child index of this ALT or CAT operator
Definition: apip.h:82
ID_ABG
#define ID_ABG
anchor - beginning of string
Definition: parser.h:61
ID_PPPT_ACTIVE
#define ID_PPPT_ACTIVE
non-deterministic – it is not possible to determine a match or not based on this character – the pars...
Definition: parser.h:84
api::spException
exception * spException
Definition: apip.h:125
api::bSemanticsValid
abool bSemanticsValid
APG_TRUE if the the input semantics are valid. That is, the opcodes for the parser have been generate...
Definition: apip.h:184
api_rule::uiOpOffset
aint uiOpOffset
offset into the opcode table to the first opcode of this rule
Definition: apip.h:58
ID_TBS
#define ID_TBS
terminal binary string
Definition: parser.h:48
api_rule::bIsComplete
abool bIsComplete
used when processing rules recursively. If the rule is already complete it need not be recursed again...
Definition: apip.h:61
api::luiPpptTableLength
luint luiPpptTableLength
The PPPT length.
Definition: apip.h:171
api_rule::uiOpCount
aint uiOpCount
the number of opcodes in this rule
Definition: apip.h:59
ID_AEN
#define ID_AEN
anchor - end of string
Definition: parser.h:62
APG_FALSE
#define APG_FALSE
Definition: apg.h:292
APG Version 7.0 is licensed under the 2-Clause BSD License,
an Open Source Initiative Approved License.