Version 7.0
Copyright © 2021 Lowell D. Thomas
APG
… an ABNF Parser Generator
|
Go to the documentation of this file.
50 static const void* s_vpMagicNumber = (
void*)
"parser";
55 #include "../utilities/utilities.h"
96 luint* luipParserInit = NULL;
97 aint* uipChildList = NULL;
98 achar* acpAcharTable = NULL;
102 XTHROW(spException,
"sizeof(achar) is too small for this parser's grammar");
108 XTHROW(spException,
"sizeof(aint) is too small for this parser's maximum integer");
113 memset((
void*) spCtx, 0,
sizeof(parser));
114 spCtx->vpMem = vpMem;
115 spCtx->spException = spException;
121 char* cpStr = (
char*)
vpMemAlloc(vpMem, uiAlloc);
123 spCtx->cpStringTable = (
const char*)cpStr;
125 spCtx->ucpMaps = NULL;
128 uint8_t* ucpStr = (uint8_t*)
vpMemAlloc(vpMem, uiAlloc);
130 spCtx->ucpMaps = (
const uint8_t*)ucpStr;
144 XTHROW(spCtx->vpMem,
"invalid achar table data");
146 spCtx->acpAcharTable = acpAcharTable;
152 XTHROW(spCtx->vpMem,
"invalid parser initialization data");
154 spInitHdr = (
init_hdr*) luipParserInit;
164 spCtx->uipChildList = (
const aint*) uipChildList;
169 memset((
void*)spCtx->spRules, 0, (
sizeof(
rule) * spCtx->uiRuleCount));
173 if (spCtx->uiUdtCount) {
175 memset((
void*)spCtx->spUdts, 0, (
sizeof(
udt) * spCtx->uiUdtCount));
180 spCtx->spOpcodes = (opcode*)
vpMemAlloc(vpMem, (
aint) (
sizeof(opcode) * spCtx->uiOpcodeCount));
181 memset((
void*)spCtx->spOpcodes, 0, (
sizeof(opcode) * spCtx->uiOpcodeCount));
203 spCtx->pfnOpFunc[
ID_BKR] = NULL;
205 #ifdef APG_STRICT_ABNF
206 spCtx->pfnOpFunc[
ID_UDT] = NULL;
207 spCtx->pfnOpFunc[
ID_AND] = NULL;
208 spCtx->pfnOpFunc[
ID_NOT] = NULL;
209 spCtx->pfnOpFunc[
ID_BKA] = NULL;
210 spCtx->pfnOpFunc[
ID_BKN] = NULL;
211 spCtx->pfnOpFunc[
ID_ABG] = NULL;
212 spCtx->pfnOpFunc[
ID_AEN] = NULL;
228 vPrintRules(spCtx, NULL);
230 vPrintUdts(spCtx, NULL);
232 vPrintOpcodes(spCtx, NULL);
236 spCtx->vpValidate = s_vpMagicNumber;
237 return (
void*) spCtx;
246 parser* spCtx = (parser*) vpCtx;
248 if (spCtx->vpValidate == s_vpMagicNumber) {
249 void* vpMem = spCtx->vpMem;
251 memset((
void*)spCtx, 0,
sizeof(*spCtx));
269 parser* spCtx = (parser*) vpCtx;
271 if(!vpCtx || (spCtx->vpValidate != s_vpMagicNumber)){
276 XTHROW(spCtx->vpMem,
"parser configuration pointer cannot be NULL");
280 XTHROW(spCtx->vpMem,
"parser state pointer cannot be NULL");
285 XTHROW(spCtx->vpMem,
"input string is NULL");
288 XTHROW(spCtx->vpMem,
"start rule is out of range");
290 memset(spState, 0,
sizeof(*spState));
293 if (spCtx->uiUdtCount) {
294 for (ui = 0; ui < spCtx->uiUdtCount; ui += 1) {
295 if (spCtx->spUdts[ui].pfnCallback == NULL) {
296 XTHROW(spCtx->spException,
297 "NULL UDT callback function pointers - all UDT callback functions must be set");
309 XTHROW(spCtx->vpMem,
"sub string beginning is beyond the end of the input string");
313 spCtx->uiSubStringEnd = spCtx->uiInputStringLength;
322 spCtx->uiSubStringBeg = 0;
325 spCtx->uiSubStringLength = spCtx->uiSubStringEnd - spCtx->uiSubStringBeg;
326 spCtx->uiOffset = spCtx->uiSubStringBeg;
330 spCtx->uiLookBehindLength = spCtx->uiInputStringLength;
332 spCtx->uiLookBehindLength = spCtx->uiInputStringLength < spCtx->uiLookBehindLength ?
333 spCtx->uiInputStringLength : spCtx->uiLookBehindLength;
337 spCtx->sCBData.vpCtx = (
void*) spCtx;
338 spCtx->sCBData.vpMem = spCtx->vpMem;
339 spCtx->sCBData.spException = spCtx->spException;
340 spCtx->sCBData.acpString = &spCtx->acpInputString[spCtx->uiSubStringBeg];
341 spCtx->sCBData.uiStringLength = spCtx->uiSubStringLength;
342 spCtx->sCBData.uiParserOffset = 0;
343 spCtx->sCBData.uiParserState =
ID_ACTIVE;
344 spCtx->sCBData.uiParserPhraseLength = 0;
345 spCtx->sCBData.vpUserData = spConfig->
vpUserData;
346 spCtx->sCBData.uiCallbackPhraseLength = 0;
347 spCtx->sCBData.uiCallbackState =
ID_ACTIVE;
354 memset((
void*)&spCtx->sState, 0,
sizeof(spCtx->sState));
355 spCtx->uiTreeDepth = 0;
356 spCtx->sStartOp.sRnm.spRule = &spCtx->spRules[spCtx->uiStartRule];
357 spCtx->sStartOp.sRnm.uiId =
ID_RNM;
358 spCtx->sStartOp.sRnm.ucpPpptMap = spCtx->spRules[spCtx->uiStartRule].ucpPpptMap;
359 spCtx->pfnOpFunc[
ID_RNM](spCtx, &spCtx->sStartOp);
365 spCtx->sState.uiState = spCtx->uiOpState;
366 spCtx->sState.uiPhraseLength =
367 spCtx->uiOffset > spCtx->uiSubStringBeg ? spCtx->uiOffset - spCtx->uiSubStringBeg : 0;
368 spCtx->sState.uiStringLength = spCtx->uiSubStringLength;
371 }
else if (spCtx->sState.uiPhraseLength == spCtx->sState.uiStringLength) {
376 memcpy((
void*) spState, (
void*) &spCtx->sState,
sizeof(*spState));
387 parser* spCtx = (parser*) vpCtx;
388 if (vpCtx && (spCtx->vpValidate == s_vpMagicNumber)) {
389 if (uiRuleId < spCtx->uiRuleCount) {
390 spCtx->spRules[uiRuleId].pfnCallback = pfnCallback;
408 parser* spCtx = (parser*) vpCtx;
409 if (vpCtx && (spCtx->vpValidate == s_vpMagicNumber)) {
410 if (uiUdtId < spCtx->uiUdtCount) {
411 spCtx->spUdts[uiUdtId].pfnCallback = pfnCallback;
423 parser* spCtx = (parser*) vpCtx;
424 if (vpCtx && (spCtx->vpValidate == s_vpMagicNumber)) {
442 parser* spCtx = (parser*) vpCtx;
443 if (!(vpCtx && (spCtx->vpValidate == s_vpMagicNumber))) {
448 rule* spRule = spCtx->spRules;
450 for(; ui < spCtx->uiRuleCount; ui++, spRule++){
470 parser* spCtx = (parser*) vpCtx;
471 if (!(vpCtx && (spCtx->vpValidate == s_vpMagicNumber))) {
475 if(uiRuleIndex < spCtx->uiRuleCount){
476 return spCtx->spRules[uiRuleIndex].cpRuleName;
493 parser* spCtx = (parser*) vpCtx;
494 if (!(vpCtx && (spCtx->vpValidate == s_vpMagicNumber))) {
498 if(uiUdtIndex < spCtx->uiUdtCount){
499 return spCtx->spUdts[uiUdtIndex].cpUdtName;
505 #ifdef PARSER_EVAL_DEBUG
507 #include "../utilities/utilities.h"
508 static const char* cpMapVal(uint8_t ucVal){
509 static char* caVal[5] = {
"N",
"M",
"E",
"A",
"U"};
515 static void vPrintMap(
achar acMin,
achar acMax,
const uint8_t* ucpMap){
518 for(ui = acMin; ui <= acMax; ui++){
521 printf(
" %"PRIuMAX
"%s", (
luint)ui, cpMapVal(ucVal));
535 aint uiPpptState(parser* spCtx,
const opcode* spOp,
aint uiOffset){
541 switch(spOp->sGen.uiId){
551 if(uiOffset >= spCtx->uiSubStringEnd){
552 if(spCtx->uiSubStringEnd == 0){
557 ucVal = spOp->sGen.ucpPpptMap[spCtx->acAcharMax + 1 - spCtx->acAcharMin];
559 acChar = spCtx->acpInputString[uiOffset];
560 if(acChar < spCtx->acAcharMin || acChar > spCtx->acAcharMax){
563 ucVal = spOp->sGen.ucpPpptMap[acChar - spCtx->acAcharMin];
565 #ifdef PARSER_EVAL_DEBUG
566 printf(
"%s: ", cpOpName(spOp->sGen.uiId));
567 printf(
" %"PRIuMAX
"%s: ", (
luint)acChar, cpMapVal(ucVal));
568 vPrintMap(spCtx->acAcharMin, spCtx->acAcharMax, spOp->sGen.ucpPpptMap);
590 abool bPpptEval(parser* spCtx,
const opcode* spOp,
aint uiOffset){
591 aint uiState = uiPpptState(spCtx, spOp, uiOffset);
595 spCtx->uiPhraseLength = 0;
599 spCtx->uiPhraseLength = 0;
603 spCtx->uiPhraseLength = 1;
631 return ucpMap[luiChar - luiOffset];
This header "#include"s all publid lib headers and other standard headers needed by most objects.
The parent-mode back reference object.
void vCat(parser *spCtx, const opcode *spOp)
abool bParseSubString
If true (non-zero), only parse the defined sub-string of the input string.
void * vpParserCtor(exception *spException, void *vpParserInit)
The parser's constructor for file initialization data.
aint uiSubStringBeg
The first character of the sub-string to parse. Must be < uiInputLength or exception is thrown.
void vMemDtor(void *vpCtx)
Destroys a Memory component. Frees all memory allocated.
void vBka(parser *spCtx, const opcode *spOp)
uint32_t uiSizeofUint
Minimum size, in bytes, required for the basic parser unsigned integer, aint.
void vRnm(parser *spCtx, const opcode *spOp)
Private header for the SABNF parser.
void vTranslateRules(parser *spCtx, rule *spRules, opcode *spOpcodes, luint *luipData)
Translate the initialization data for the rules into the internal rules format.
luint uiOpcodesOffset
Offset from the beginning of the initialization data to the to the list of opcodes.
const char * cpStringTable
Pointer to the string table.
luint uiUdtCount
The number of UDTs in the grammar.
const achar * acpInput
Pointer to the input string.
#define ID_ALT
alternation
#define ID_BKR
back reference to a previously matched rule or UDT name
luint uiRuleCount
The number of rules in the grammar.
#define ID_UDT
user-defined terminal
uint32_t uiAcharTableLength
Length of this data's alphabet character table.
aint uiParserRuleLookup(void *vpCtx, const char *cpRuleName)
Find the rule index corresponding to a rule name.
void vExContext()
Handles bad context pointers.
void vTranslateOpcodes(parser *spCtx, rule *spRules, udt *spUdts, opcode *spOpcodes, luint *luipData)
Translate the initialization data for the opcodes into the internal opcode format.
void vAbg(parser *spCtx, const opcode *spOp)
void * vpUserData
Pointer to user data, if any. Not examined or used by the parser in any way. Presented to the user's ...
uint_fast8_t achar
achar is the type for the parser's alphabet characters.
void vTranslateUdts(parser *spCtx, udt *spUdts, luint *luipData)
Translate the initialization data for the UDTs into the internal UDT format.
void vParserSetRuleCallback(void *vpCtx, aint uiRuleId, parser_callback pfnCallback)
Set a call back function for a specific rule.
#define ID_EMPTY
indicates a matched empty phrase parser state on return from parse tree below this node
const void * vpParserInit
#define ID_PPPT_NOMATCH
deterministic NOMATCH – there is no chance of a phrase match with this leading character
luint uiOpcodeCount
The number of opcodes in the grammar.
#define ID_NOT
negative look ahead
void vGetChildListTable(init_hdr *spInitHdr, aint *uipList)
Extract the child index list from the initialization data.
#define ID_AND
positive look ahead
aint uiGetAcharTable(parser_init *spParserInit, achar *acpAcharTable)
Extract the alphabet character table from the initialization data.
abool bGetParserInitData(parser_init *spParserInit, luint *luipParserInit)
Re-size the initialization data to the required integer size.
void vAen(parser *spCtx, const opcode *spOp)
uint32_t uiSizeofAchar
Minimum size, in bytes, required for the alphabet characters, achar.
uint32_t uiParserInitLength
Length of the parser initialization data.
uint32_t uiStringTableLength
Length of this data's string table.
#define XTHROW(ctx, msg)
Exception throw macro.
The initialization information generated by APG.
void(* parser_callback)(callback_data *spData)
User-written callback function prototype.
aint uiRuleIndex
The rule index - zero-based order in which the rule appears in the SABNF grammar.
void vBkn(parser *spCtx, const opcode *spOp)
void vParserSetUdtCallback(void *vpCtx, aint uiUdtId, parser_callback pfnCallback)
Set a call back function for a specific UDT.
uint_fast32_t aint
The APG parser's unsigned integer type.
void vTbs(parser *spCtx, const opcode *spOp)
Private declarations common to both universal and parent modes.
aint uiInputLength
Number of input string alphabet characters.
void vTrg(parser *spCtx, const opcode *spOp)
Defines the input string and other configuration parameters for the parser,.
#define ID_CAT
concatenation
luint uiAcharMin
The minimum value of all of the alphabet characters (achar) present in the grammar.
void * vpMemAlloc(void *vpCtx, aint uiBytes)
Allocates memory.
luint uiUdtsOffset
Offset from the beginning of the initialization data to the to the list of UDTs.
#define ID_PPPT_MATCH
deterministic MATCH – this character constitutes a single character phrase match of length 1
void * vpVecCtor(void *vpMem, aint uiElementSize, aint uiInitialAlloc)
The vector object constructor.
void(* pfn_op)(struct parser_tag *spCtx, const union opcode_tag *spOp)
Prototype for the node operation functions.
#define ID_TRG
terminal range
The parser's final state.
A structure to describe the type and location of a caught exception.
void vMemFree(void *vpCtx, const void *vpData)
Free memory previously allocated with vpMemAlloc().
#define ID_ACTIVE
indicates active parser state, parser has just entered the node and is moving down the parse tree
void vUdt(parser *spCtx, const opcode *spOp)
#define ID_MATCH
indicates a matched phrase parser state on return from parse tree below this node
uintmax_t luint
luint is used to cast integers suitable for the %"PRIuMAX" printf format.
aint uiLookBehindLength
The maximum length to look behind for a match. Use 0 or APG_INFINITE for infinite look behind.
const uint8_t * ucpPpptTable
Pointer to the PPPT or NULL if none.
void vTls(parser *spCtx, const opcode *spOp)
abool bExValidate(exception *spException)
Test an exception structure for validity.
void * vpVecPushn(void *vpCtx, void *vpElement, aint uiCount)
Adds one or more elements to the end of the array.
#define ID_BKA
positive look behind
void * vpMemCtor(exception *spException)
Construct a memory component.
#define ID_GEN
general opcode (not SABNF). Serves to locate the ID in any opcode structure and must be larger than a...
void vRep(parser *spCtx, const opcode *spOp)
Private header file for the trace functions.
#define ID_TLS
terminal literal string
void vBkr(parser *spCtx, const opcode *spOp)
Data structure for a single rule.
aint uiSubStringLength
The number of characters in the sub-string. If 0, then the remainder of the string from uiSubStringBe...
luint uiRulesOffset
Offset from the beginning of the initialization data to the to the list of rules.
luint uiAcharMax
The maximum value of all of the alphabet characters (achar) present in the grammar.
void vParserDtor(void *vpCtx)
Clears the parser component's context and frees all heap memory associated with this parser.
void vAnd(parser *spCtx, const opcode *spOp)
#define ID_PPPT_EMPTY
deterministic EMTPY – this is an empty string match, the parse succeeds but the phrase length is 0
uint8_t ucGetMapVal(const uint8_t *ucpMap, luint luiOffset, luint luiChar)
#define ID_BKN
negative look behind
luint uiChildListLength
The number of integers in the child index list.
uint8_t abool
abool is the APG bool type.
void vAlt(parser *spCtx, const opcode *spOp)
void vNot(parser *spCtx, const opcode *spOp)
aint uiStartRule
Index of the start rule. Any rule in the SABNF grammar may be used as the start rule.
#define ID_NOMATCH
indicates that no phrase was matched on return from parse tree below this node
uint32_t uiPpptTableLength
Length of this data's PPPT.
luint uiMapSize
The number of bytes in one PPPT map.
void * vpParserAllocCtor(exception *spException, void *vpParserInit, abool bAllocateTables)
The parser constructor.
#define ID_ABG
anchor - beginning of string
luint uiMapCount
The number rule, UDT, and opcode PPPT maps.
The universal-mode back reference object.
Header for the parser initialization data.
const char * cpParserUdtName(void *vpCtx, aint uiUdtIndex)
Find the UDT name corresponding to a UDT index.
Header file for the suite of the parser's operator functions.
#define ID_TBS
terminal binary string
abool bParserValidate(void *vpCtx)
Validate the context pointer of a parser.
Private header file for the statistics gathering functions.
Data structure for a single UDT.
void vVecClear(void *vpCtx)
Clears all used elements in a vector component.
const char * cpParserRuleName(void *vpCtx, aint uiRuleIndex)
Find the rule name from the corresponding index.
const char * cpRuleName
Pointer to the (null-terminated) ASCII rule name.
#define ID_AEN
anchor - end of string
void vParserParse(void *vpCtx, parser_config *spConfig, parser_state *spState)
Parse an input string of alphabet characters.
APG Version 7.0 is licensed under the
2-Clause BSD License,
an Open Source Initiative Approved License.