Version 7.0
Copyright © 2021 Lowell D. Thomas
APG
… an ABNF Parser Generator
|
Go to the documentation of this file.
39 static const size_t s_uiBufSize = (PATH_MAX + 128);
40 static char s_cPeriod = 46;
41 static char* s_cpBinaryVal[16] = {
42 "00 00",
"00 01",
"00 10",
"00 11",
43 "01 00",
"01 01",
"01 10",
"01 11",
44 "10 00",
"10 01",
"10 10",
"10 11",
45 "11 00",
"11 01",
"11 10",
"11 11",
47 static char* s_cpDecimalVal[16] = {
48 "0 0",
"0 1",
"0 2",
"0 3",
49 "1 0",
"1 1",
"1 2",
"1 3",
50 "2 0",
"2 1",
"2 2",
"2 3",
51 "3 0",
"3 1",
"3 2",
"3 3",
54 static void vDisplayBinary(uint8_t ucChar);
55 static void vDisplayDecimal(uint8_t ucChar);
61 static char* cpDefined =
"defined";
62 static char* cpUndefined =
"undefined";
66 printf(
"TYPES & SIZES\n");
67 printf(
"sizeof(achar) : %"PRIuMAX
" : the APG alphabet character\n", (
luint)
sizeof(
achar));
68 printf(
"sizeof(aint) : %"PRIuMAX
" : the APG unsigned int\n", (
luint)
sizeof(
aint));
69 printf(
"sizeof(abool) : %"PRIuMAX
" : the APG true/false boolean\n", (
luint)
sizeof(
abool));
70 printf(
"sizeof(luint) : %"PRIuMAX
" : longest unsigned int,\n", (
luint)
sizeof(
luint));
71 printf(
" used primarily for printing integers of unknown length(e.g. printf(\"%%\"PRIuMAX\"\", (luint)var)\n");
78 printf(
"APG_ACHAR : %u : controls the size of the parser's alphabet character(achar)\n", (
unsigned)APG_ACHAR);
80 printf(
"APG_ACHAR : %s : controls the size of the parser's alphabet character(achar)\n", cpUndefined);
83 printf(
"APG_AINT : %u : controls the size of the parser's unsigned integer(aint)\n", (
unsigned)APG_AINT);
85 printf(
"APG_AINT : %s : controls the size of the parser's unsigned integer(aint)\n", cpUndefined);
87 printf(
"APG_TRUE : %u : the APG \"true\" value\n", (
unsigned)
APG_TRUE);
88 printf(
"APG_FALSE : %u : the APG \"false\" value\n", (
unsigned)
APG_FALSE);
89 printf(
"APG_SUCCESS : %u : function return value indicating success\n", (
unsigned)
APG_SUCCESS);
90 printf(
"APG_FAILURE : %u : function return value indicating failure\n", (
unsigned)
APG_FAILURE);
91 printf(
"APG_UNDEFINED : %"PRIuMAX
" : used to indicate an undefined unsigned integer\n", (
luint)
APG_UNDEFINED);
92 printf(
"APG_INFINITE : %"PRIuMAX
" : used to indicate an infinite unsigned integer\n", (
luint)
APG_INFINITE);
93 printf(
"APG_MAX_AINT : %"PRIuMAX
" : maximum allowed unsigned integer value \n", (
luint)
APG_MAX_AINT);
94 printf(
"APG_MAX_ACHAR : %"PRIuMAX
" : maximum allowed alphabet character\n", (
luint)
APG_MAX_ACHAR);
100 printf(
"APG_DEBUG : %9s : if defined, defines APG_TRACE, APG_STATS, APG_MEM_STATS, APG_VEC_STATS, APG_AST & APG_BKR\n", cpDef);
106 printf(
"APG_TRACE : %9s : if defined, allow parser tracing (includes stdio.h)\n", cpDef);
112 printf(
"APG_STATS : %9s : if defined, allows parser to collect parsing statistics (includes stdio.h)\n", cpDef);
118 printf(
"APG_MEM_STATS : %9s : if defined, collect all memory object statistics\n", cpDef);
124 printf(
"APG_VEC_STATS : %9s : if defined, collect all vector object statistics\n", cpDef);
130 printf(
"APG_AST : %9s : if defined, allow creation of the Absract Syntax Tree (AST)\n", cpDef);
136 printf(
"APG_BKR : %9s : if defined, allow back reference operators, e.g. %%urulename\n", cpDef);
139 #ifdef APG_STRICT_ABNF
142 printf(
"APG_STRICT_ABNF : %9s : if defined, allow only grammars with ABNF as defined in RFCs 5234 & 7405\n", cpDef);
148 printf(
"APG_NO_PPPT : %9s : if defined, no Partially-Predictive Parsing Tables (PPPT) will be generated\n", cpDef);
154 printf(
"APG TYPES & SIZES\n");
155 printf(
"sizeof(achar) %"PRIuMAX
" : the APG alphabet character\n", (
luint)
sizeof(
achar));
156 printf(
"sizeof(aint) %"PRIuMAX
" : the APG unsigned int\n", (
luint)
sizeof(
aint));
157 printf(
"sizeof(abool) %"PRIuMAX
" : the APG true/false boolean\n", (
luint)
sizeof(
abool));
158 printf(
"sizeof(luint) %"PRIuMAX
" : for printing ints of unknown length (e.g. printf(\"%%\"PRIuMAX\"\", (luint)uiVar)\n", (
luint)
sizeof(
luint));
159 printf(
"\nAPG MAXIMUM VALUES\n");
160 printf(
"achar %"PRIuMAX
"\n", (
luint)((
achar)-1));
161 printf(
"aint %"PRIuMAX
"\n", (
luint)((
aint)-1));
162 printf(
"abool %"PRIuMAX
"\n", (
luint)((
abool)-1));
163 printf(
"luint %"PRIuMAX
"\n", (
luint)-1);
164 printf(
"\nSYSTEM TYPES & SIZES\n");
165 printf(
"sizeof(unsigned char) %"PRIuMAX
"\n", (
luint)
sizeof(
unsigned char));
166 printf(
"sizeof(unsigned short int) %"PRIuMAX
"\n", (
luint)
sizeof(
unsigned short int));
167 printf(
"sizeof(unsigned int) %"PRIuMAX
"\n", (
luint)
sizeof(
unsigned int));
168 printf(
"sizeof(unsigned long int) %"PRIuMAX
"\n", (
luint)
sizeof(
unsigned long int));
169 printf(
"sizeof(unsigned long long int) %"PRIuMAX
"\n", (
luint)
sizeof(
unsigned long long int));
170 printf(
"sizeof(uintmax_t) %"PRIuMAX
"\n", (
luint)
sizeof(uintmax_t));
171 printf(
"sizeof(uint8_t) %"PRIuMAX
"\n", (
luint)
sizeof(uint8_t));
172 printf(
"sizeof(uint16_t) %"PRIuMAX
"\n", (
luint)
sizeof(uint16_t));
173 printf(
"sizeof(uint32_t) %"PRIuMAX
"\n", (
luint)
sizeof(uint32_t));
174 printf(
"sizeof(uint64_t) %"PRIuMAX
"\n", (
luint)
sizeof(uint64_t));
175 printf(
"sizeof(uint_least8_t) %"PRIuMAX
"\n", (
luint)
sizeof(uint_least8_t));
176 printf(
"sizeof(uint_least16_t) %"PRIuMAX
"\n", (
luint)
sizeof(uint_least16_t));
177 printf(
"sizeof(uint_least32_t) %"PRIuMAX
"\n", (
luint)
sizeof(uint_least32_t));
178 printf(
"sizeof(uint_least64_t) %"PRIuMAX
"\n", (
luint)
sizeof(uint_least64_t));
179 printf(
"sizeof(uint_fast8_t) %"PRIuMAX
"\n", (
luint)
sizeof(uint_fast8_t));
180 printf(
"sizeof(uint_fast16_t) %"PRIuMAX
"\n", (
luint)
sizeof(uint_fast16_t));
181 printf(
"sizeof(uint_fast32_t) %"PRIuMAX
"\n", (
luint)
sizeof(uint_fast32_t));
182 printf(
"sizeof(uint_fast64_t) %"PRIuMAX
"\n", (
luint)
sizeof(uint_fast64_t));
183 printf(
"\nSYSTEM MAXIMUM VALUES\n");
184 printf(
"uint8_t %"PRIuMAX
"\n", (
luint)((uint8_t)-1));
185 printf(
"uint16_t %"PRIuMAX
"\n", (
luint)((uint16_t)-1));
186 printf(
"uint32_t %"PRIuMAX
"\n", (
luint)((uint32_t)-1));
187 printf(
"uint64_t %"PRIuMAX
"\n", (
luint)((uint64_t)-1));
192 printf(
"CURRENT WORKING DIRECTORY\n");
194 if (getcwd(cwd,
sizeof(cwd)) != NULL) {
197 printf(
"getcwd() error\n");
214 char caBuf[s_uiBufSize];
216 if(!cpFileName || cpFileName[0] == 0){
217 XTHROW(spEx,
"file name cannot be NULL or empty");
219 if(!ucpData || !uiLen){
220 XTHROW(spEx,
"data cannot be NULL or empty");
223 FILE* spFile = fopen(cpFileName,
"wb");
225 snprintf(caBuf, s_uiBufSize,
"can't open file \"%s\" for write", cpFileName);
228 size_t uiWrite = fwrite((
void*)ucpData,
sizeof(uint8_t), uiLen, spFile);
230 if((
aint)uiWrite != uiLen){
231 snprintf(caBuf, s_uiBufSize,
"file write error: file name: %s: bytes to write: %"PRIuMAX
": bytes written: %"PRIuMAX
"",
256 char caBuf[s_uiBufSize];
258 if(!cpFileName || cpFileName[0] == 0){
259 XTHROW(spEx,
"file name cannot be NULL or empty");
262 XTHROW(spEx,
"data length pointer cannot be NULL");
265 FILE* spFile = fopen(cpFileName,
"rb");
267 snprintf(caBuf, s_uiBufSize,
"can't open file \"%s\" for read", cpFileName);
274 uiBufferLen = *uipLen;
276 size_t uiBytesRead = 0;
277 size_t uiBytesCopied = 0;
278 size_t uiBytesToCopy = uiBufferLen;
279 uint8_t caReadBuf[4096];
280 size_t uiBlockBytes = fread(caReadBuf, 1,
sizeof(caReadBuf), spFile);
281 while(uiBlockBytes != 0){
282 uiBytesRead += uiBlockBytes;
283 if(uiBytesCopied < uiBufferLen){
284 uiBytesToCopy = uiBufferLen - uiBytesCopied;
285 if(uiBytesToCopy > uiBlockBytes){
286 uiBytesToCopy = uiBlockBytes;
288 memcpy((
void*)&ucpData[uiBytesCopied], (
void*)&caReadBuf[0], uiBytesToCopy);
289 uiBytesCopied += uiBytesToCopy;
291 uiBlockBytes = fread(caReadBuf, 1,
sizeof(caReadBuf), spFile);
295 XTHROW(spEx,
"sizeof(aint) too small");
297 *uipLen = (
aint)(uiBytesRead);
307 FILE* spL = NULL, *spR = NULL;
310 spL = fopen(cpFileL,
"rb");
314 spR = fopen(cpFileR,
"rb");
355 if(!cpFileL || !cpFileR){
356 XTHROW(spEx,
"file names cannot be NULL");
362 void* vpLinesL = NULL;
363 void* vpLinesR = NULL;
364 uint8_t* ucpLeft = NULL;
365 uint8_t* ucpRight = NULL;
367 aint uiSizeL = uiSize;
368 aint uiSizeR = uiSize;
369 ucpLeft = (uint8_t*)
vpMemAlloc(vpMem, uiSizeL);
371 if(uiSizeL > uiSize){
373 ucpLeft = (uint8_t*)
vpMemAlloc(vpMem, uiSizeL);
376 ucpRight = (uint8_t*)
vpMemAlloc(vpMem, uiSizeR);
378 if(uiSizeR > uiSize){
380 ucpRight = (uint8_t*)
vpMemAlloc(vpMem, uiSizeR);
383 vpLinesL =
vpLinesCtor(spEx, (
char*)ucpLeft, uiSizeL);
384 vpLinesR =
vpLinesCtor(spEx, (
char*)ucpRight, uiSizeR);
417 printf(
"%s:%s(%u):\n%s\n",
420 printf(
"vUtilPrintException: not a valid exception pointer\n");
430 printf(
"frees: %"PRIuMAX
"\n", (
luint) (spStats->
uiFrees));
431 printf(
"current cells: %"PRIuMAX
"\n", (
luint) (spStats->
uiCells));
441 printf(
"ORIGINAL:\n");
445 printf(
"CURRENT:\n");
446 printf(
" reserved elements: %"PRIuMAX
"\n", (
luint) (spStats->
uiReserved));
448 printf(
" used elements: %"PRIuMAX
"\n", (
luint) (spStats->
uiUsed));
451 printf(
" max elements: %"PRIuMAX
"\n", (
luint) (spStats->
uiMaxUsed));
454 printf(
" pushed elements: %"PRIuMAX
"\n", (
luint) (spStats->
uiPushed));
455 printf(
" popped elements: %"PRIuMAX
"\n", (
luint) (spStats->
uiPopped));
467 printf(
"line index: %d\n", (
int)spLine->
uiLineIndex);
468 printf(
"char index: %d\n", (
int)spLine->
uiCharIndex);
476 printf(
"print lines: NULL input\n");
486 printf(
"line index: %d\n", (
int)spLine->
uiLineIndex);
487 printf(
"char index: %d\n", (
int)spLine->
uiCharIndex);
495 printf(
"print lines: NULL input\n");
508 sprintf(cpBuf,
"\\t");
512 sprintf(cpBuf,
"\\n");
516 sprintf(cpBuf,
"\\r");
520 sprintf(cpBuf,
"sp");
523 if(cChar >= 33 && cChar <= 126){
524 sprintf(cpBuf,
"%c", cChar);
527 sprintf(cpBuf,
"0x%02X", (
unsigned char)cChar);
543 sprintf(cpBuf,
"TAB");
547 sprintf(cpBuf,
"LF");
551 sprintf(cpBuf,
"VT");
555 sprintf(cpBuf,
"FF");
559 sprintf(cpBuf,
"CR");
563 sprintf(cpBuf,
"NEL");
566 if(uiChar == 0x2028){
567 sprintf(cpBuf,
"LS");
570 if(uiChar == 0x2029){
571 sprintf(cpBuf,
"PS");
575 sprintf(cpBuf,
"sp");
578 if(uiChar >= 33 && uiChar <= 126){
579 sprintf(cpBuf,
"%c", uiChar);
583 sprintf(cpBuf,
"0x%02X", uiChar);
585 sprintf(cpBuf,
"0x%04X", uiChar);
613 cpReturn = bBase64 ?
"(BINARY | BASE64)" :
"BINARY";
616 cpReturn = bBase64 ?
"(UTF-8 | BASE64)" :
"UTF-8";
619 cpReturn = bBase64 ?
"(UTF-16 | BASE64)" :
"UTF-16";
622 cpReturn = bBase64 ?
"(UTF-16BE | BASE64)" :
"UTF-16BE";
625 cpReturn = bBase64 ?
"(UTF-16LE | BASE64)" :
"UTF-16LE";
628 cpReturn = bBase64 ?
"(UTF-32 | BASE64)" :
"UTF-32";
631 cpReturn = bBase64 ?
"(UTF-32BE | BASE64)" :
"UTF-32BE";
634 cpReturn = bBase64 ?
"(UTF-32LE | BASE64)" :
"UTF-32LE";
637 cpReturn =
"UNKNOWN";
647 static char* cpTrue =
"TRUE";
648 static char* cpFalse =
"FALSE";
705 static char* cpUtilParserState(
aint uiState){
706 char* cpReturn =
"UNKNOWN";
715 cpReturn =
"NOMATCH";
729 printf(
" PARSER STATE:\n");
735 printf(
" state: %s\n", cpUtilParserState(uiState));
753 printf(
"%s\n", cpMsg);
759 void (*pfnDisplay)(uint8_t) = vDisplayBinary;
760 if(cpMode && (cpMode[0] ==
'd' || cpMode[0] ==
'D')){
761 pfnDisplay = vDisplayDecimal;
764 for(ui = 0; ui < uiLength; ui++){
765 pfnDisplay(ucpMap[ui + uiBegin]);
780 fprintf(spFile,
" ");
793 for (ui = 0; ui < uiLength; ui += 1) {
794 achar aChar = acpChars[ui];
797 fprintf(spFile,
"&");
798 }
else if(aChar == 60){
800 fprintf(spFile,
"<");
801 }
else if (aChar >= 32 && aChar <= 126) {
802 fprintf(spFile,
"%c", (
char) aChar);
804 fprintf(spFile,
"&%"PRIuMAX
";", (
luint) aChar);
831 aint uiStrLen = (
aint)strlen(cpStr);
834 for(; ui < uiStrLen; ui++){
835 acpBuf[ui] = (
achar)((uint8_t)cpStr[ui]);
864 char* cpStr = (
char*)
vpMemAlloc(vpMem, ((
aint)
sizeof(
char) * (uiLen + 1)));
867 for(; ui < uiLen; ui++){
868 acChar = acpAchar[ui];
869 if(acChar == 9 || acChar == 10 || acChar == 13 || (acChar >= 32 && acChar <= 126)){
870 cpStr[ui] = (char)acChar;
872 cpStr[ui] = s_cPeriod;
899 size_t uiStrLen = strlen(cpStr);
903 for(; ui < uiStrLen; ui++){
904 acpBuf[ui] = (
achar)((uint8_t)cpStr[ui]);
936 for(; ui < spPhrase->
uiLength; ui++){
938 if(acChar == 9 || acChar == 10 || acChar == 13 || (acChar >= 32 && acChar <= 126)){
939 cpStr[ui] = (char)acChar;
941 cpStr[ui] = s_cPeriod;
970 aint uiStrLen = (
aint)strlen(cpStr);
971 uint32_t* uipBuf = (uint32_t*)
vpMemAlloc(vpMem, ((
aint)
sizeof(uint32_t) * uiStrLen));
973 for(; ui < uiStrLen; ui++){
974 uipBuf[ui] = (uint32_t)((uint8_t)cpStr[ui]);
1004 char* cpStr = (
char*)
vpMemAlloc(vpMem, ((
aint)
sizeof(
char) * (uiLen + 1)));
1007 for(; ui < uiLen; ui++){
1008 uiChar = uipUint[ui];
1009 if(uiChar == 9 || uiChar == 10 || uiChar == 13 || (uiChar >= 32 && uiChar <= 126)){
1010 cpStr[ui] = (char)uiChar;
1012 cpStr[ui] = s_cPeriod;
1015 cpStr[ui] = (char)0;
1039 size_t uiStrLen = strlen(cpStr);
1041 uint32_t* acpBuf = (uint32_t*)&spPhrase[1];
1043 for(; ui < uiStrLen; ui++){
1044 acpBuf[ui] = (uint32_t)((uint8_t)cpStr[ui]);
1076 for(; ui < spPhrase->
uiLength; ui++){
1078 if(uiChar == 9 || uiChar == 10 || uiChar == 13 || (uiChar >= 32 && uiChar <= 126)){
1079 cpStr[ui] = (char)uiChar;
1081 cpStr[ui] = s_cPeriod;
1084 cpStr[ui] = (char)0;
1110 static char* s_cpStringNode =
"_string_";
1111 static abool bAstValidXmlChar(
luint luiChar);
1112 static void vAstDecimalString(FILE* spOut,
abool bHex,
const achar* acpString,
aint uiLen);
1113 static void vAstUnicodeString(
void* vpMem, FILE* spOut,
const achar* acpString,
aint uiLen);
1114 static void vAstMaxChar(
const achar* acpString,
aint uiLen,
luint* uipMaxChar,
aint* uipSizeof);
1130 static FILE* spOut = NULL;
1131 char caBuf[s_uiBufSize];
1132 char* cpRoot =
"_root_";
1138 spOut = fopen(cpFileName,
"wb");
1140 snprintf(caBuf, s_uiBufSize,
"can't open file %s for writing", cpFileName);
1151 fprintf(spOut,
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
1155 fprintf(spOut,
"<%s>\n\n", cpRoot);
1156 fprintf(spOut,
"<!-- The <%s> node contains the character codes of the full input string. Its attributes are:\n", s_cpStringNode);
1157 fprintf(spOut,
" length - the number of characters in the string\n");
1158 fprintf(spOut,
" max-char - the maximum character size\n");
1159 fprintf(spOut,
" sizeof - the number of bytes in the maximum character\n");
1160 fprintf(spOut,
" -->\n");
1172 fprintf(spOut,
"\n<!-- The <rule> node attributes define each rule/UDT the corresponding matched substring phrase.\n");
1173 fprintf(spOut,
" name - the name of the rule or UDT\n");
1174 fprintf(spOut,
" index - the grammar index of the rule or UDT\n");
1175 fprintf(spOut,
" udt - (optional), if \"true\", name refers to a UDT, if \"false\" or absent, name refers to a rule\n");
1176 fprintf(spOut,
" offset - the offset to the first character in the input string of the matched phrase\n");
1177 fprintf(spOut,
" length - the number of characters in the matched phrase (may be \"0\" for a matched EMPTY phrase)\n");
1178 fprintf(spOut,
" -->\n");
1182 for(; spRec < spEnd; spRec++){
1186 fprintf(spOut,
"<rule name=\"%s\" index=\"%"PRIuMAX
"\"", spRec->
cpName, (
luint)spRec->
uiIndex);
1188 fprintf(spOut,
" udt=\"true\"");
1190 fprintf(spOut,
" offset=\"%"PRIuMAX
"\" length=\"%"PRIuMAX
"\">\n",
1194 fprintf(spOut,
"</rule>\n");
1200 fprintf(spOut,
"</%s>\n", cpRoot);
1205 if(spOut && (spOut != stdout)){
1234 FILE* spFile = stdout;
1235 void* vpLines = NULL;
1237 XTHROW(spEx,
"input string cannot be NULL");
1239 aint uiLen = (
aint)strlen(cpString);
1241 XTHROW(spEx,
"input string cannot be empty");
1244 spFile = fopen(cpFileName,
"wb");
1246 char caBuf[s_uiBufSize];
1247 snprintf(caBuf, s_uiBufSize,
"unable to open file name '%s' for writing", cpFileName);
1252 uiEndLen = (
aint) strlen(cpEnd);
1259 fwrite(cpEnd,
sizeof(
char), uiEndLen, spFile);
1264 if (spFile && (spFile != stdout)) {
1272 static void vAstDecimalString(FILE* spOut,
abool bHex,
const achar* acpString,
aint uiLen){
1273 aint uiCharsPerLine = 10;
1278 aint uiEnd = uiLen - 1;
1281 cpFmt =
"0x%"PRIXMAX
"";
1282 fprintf(spOut,
"<!-- The character codes are represented as comma- and white space-delimited hexadecimal integers. -->\n");
1284 cpFmt =
"%"PRIuMAX
"";
1285 fprintf(spOut,
"<!-- The character codes are represented as comma- and white space-delimited decimal integers. -->\n");
1288 vAstMaxChar(acpString, uiLen, &luiMaxChar, &uiSizeof);
1289 fprintf(spOut,
"<%s length=\"%"PRIuMAX
"\" max-char=\"%"PRIuMAX
"\" sizeof=\"%"PRIuMAX
"\">",
1290 s_cpStringNode, (
luint)uiLen, luiMaxChar, (
luint)uiSizeof);
1291 for(; ui < uiLen; ui++){
1292 fprintf(spOut, cpFmt, (
luint)acpString[ui]);
1294 fprintf(spOut,
",");
1297 if(uiChar == uiCharsPerLine){
1299 fprintf(spOut,
"\n");
1303 fprintf(spOut,
"\n");
1305 fprintf(spOut,
"</%s>\n", s_cpStringNode);
1307 static void vAstUnicodeString(
void* vpMem, FILE* spOut,
const achar* acpString,
aint uiLen){
1308 char caBuf[s_uiBufSize];
1309 fprintf(spOut,
"<!-- The character codes are represented as a UTF-8-encoded XML Unicode string.\n");
1310 fprintf(spOut,
" Note that XML Unicode does not allow the following characters:\n");
1311 fprintf(spOut,
" - control except TAB(0x09), LF(0x0A), CR(0x0D) and DEL(0x7F)\n");
1312 fprintf(spOut,
" - 0xFFFE and 0xFFFF\n");
1313 fprintf(spOut,
" - surrogate-pair range 0xD800 - 0xDFFF\n");
1314 fprintf(spOut,
" - beyond Unicode range > 0x10FFFF\n");
1315 fprintf(spOut,
" -->\n");
1318 vAstMaxChar(acpString, uiLen, &luiMaxChar, &uiSizeof);
1319 fprintf(spOut,
"<%s length=\"%"PRIuMAX
"\" max-char=\"%"PRIuMAX
"\" sizeof=\"%"PRIuMAX
"\">",
1320 s_cpStringNode, (
luint)uiLen, luiMaxChar, (
luint)uiSizeof);
1321 uint32_t* uipIn32 = (uint32_t*)
vpMemAlloc(vpMem, (uiLen *
sizeof(uint32_t)));
1323 for(; ui < uiLen; ui++){
1324 if(!bAstValidXmlChar((
luint)acpString[ui])){
1325 snprintf(caBuf, s_uiBufSize,
"input string has invalid XML character: offset = %"PRIuMAX
": character = %"PRIuMAX
"",
1330 uipIn32[ui] = (uint32_t)acpString[ui];
1339 fprintf(spOut,
"</%s>\n", s_cpStringNode);
1347 static abool bAstValidXmlChar(
luint luiChar){
1350 if(luiChar >= 0 && luiChar < 9){
1353 if(luiChar >= 11 && luiChar < 13){
1356 if(luiChar >= 14 && luiChar < 32){
1359 if(luiChar >= 0xD800 && luiChar < 0xE000){
1362 if(luiChar > 0x10FFFF){
1370 static void vAstMaxChar(
const achar* acpString,
aint uiLen,
luint* uipMaxChar,
aint* uipSizeof){
1373 for(; ui < uiLen; ui++){
1374 if((
luint)acpString[ui] > luiChar){
1375 luiChar = (
luint)acpString[ui];
1378 *uipMaxChar = luiChar;
1379 if(luiChar <= 0xFF){
1383 if(luiChar <= 0xFFFF){
1387 if(luiChar <= 0xFFFFFFFF){
1391 if(luiChar <= 0xFFFFFFFFFFFFFFFF){
1398 static void vDisplayBinary(uint8_t ucChar){
1400 ucR = ucChar & 0x0F;
1401 ucL = (ucChar & 0xF0) >> 4;
1402 printf(
"%s %s ", s_cpBinaryVal[ucL], s_cpBinaryVal[ucR]);
1404 static void vDisplayDecimal(uint8_t ucChar){
1406 ucR = ucChar & 0x0F;
1407 ucL = (ucChar & 0xF0) >> 4;
1408 printf(
"%s %s ", s_cpDecimalVal[ucL], s_cpDecimalVal[ucR]);
aint uiLineIndex
zero-based line number
aint uiReserved
The current number of elements reserved.
aint uiLineLength
The number of characters in the line, including the line end characters.
#define UTF_16
Data type macro for UTF-16 encoding/decoding.
#define XCTOR(e)
This macro will initialize an exception structure and prepare entry to the "try" block.
const char * cpUtilPhraseToStr(void *vpMem, apg_phrase *spPhrase)
Convert an apg_phrase to a null-terminated ASCII string.
void vUtilFileRead(void *vpMem, const char *cpFileName, uint8_t *ucpData, aint *uipLen)
Read a file into the caller's data area.
abool bUtilCompareFiles(const char *cpFileL, const char *cpFileR)
Compare two files, byte for byte.
char caFunc[64]
The source code function name where the error occurred. "__func__".
void * vpLinesCtor(exception *spEx, const char *cpInput, aint uiLength)
The lines object constructor.
aint uiSuccess
True (>0) if the input string was matched in its entirety, false (0) otherwise.
void vMemDtor(void *vpCtx)
Destroys a Memory component. Frees all memory allocated.
abool try
True for the try block, false for the catch block.
aint uiGrownCount
The number times the vector was automatically extended.
const achar * acpString
The parsed input string.
aint uiStringLength
The number of characters in the input string.
Carries detailed information about the characters and line endings. One for each line in the input gr...
uint32_t uiLength
The number of integers in the array.
aint uiPhraseLength
The number of characters in the matched phrase.
aint uiGrownBytes
The number of bytes automatically added to the vector.
const char * cpUtilPhrase32ToStr(void *vpMem, u32_phrase *spPhrase)
Convert an u32_phrase to a null-terminated ASCII string.
unsigned int uiLine
The source code line number where the error occurred. "__LINE__".
#define ID_ALT
alternation
const char * cpUtilOpName(aint uiId)
Convert an opcode identifier to a human-readable opcode name.
void vUtilPrintMemStats(const mem_stats *spStats)
Display the memory object's statistics.
aint uiOriginalBytes
The initial number of bytes allocated to the vector.
#define ID_BKR
back reference to a previously matched rule or UDT name
#define ID_UDT
user-defined terminal
void vUtilPrintMsgs(void *vpMsgs)
Display the list of messages in a message object to stdout.
aint uiTextLength
number of Unicode text characters in the line, excluding line end characters
void vExContext()
Handles bad context pointers.
#define ID_AST_PRE
indicates pre-node-traversal AST callback state (down the tree)
char * cpUtilPrintUChar(uint32_t uiChar, char *cpBuf)
Generates a string representation for a single Unicode character.
uint_fast8_t achar
achar is the type for the parser's alphabet characters.
aint uiStringLength
Length of the input string.
char * cpUtilPrintChar(char cChar, char *cpBuf)
Generates a string representation for a single character.
void * vpConvCtor(exception *spEx)
The data conversion object constructor.
aint uiUsed
The current number elements used.
const char * cpName
Name of the rule or UDT of this record.
void vPrintPPPTMap(uint8_t *ucpMap, aint uiBegin, aint uiLength, const char *cpMode)
const char * cpUtilTrueFalse(luint luiTrue)
Return a human-readable string version of the given value in its true/false sense.
#define ID_EMPTY
indicates a matched empty phrase parser state on return from parse tree below this node
Available to the user for display of memory statistics.
#define ID_NOT
negative look ahead
#define ID_AND
positive look ahead
void vAstInfo(void *vpCtx, ast_info *spInfo)
Retrieve basic information about the AST object.
abool bUtilCompareFileLines(void *vpMem, const char *cpFileL, const char *cpFileR)
Compare two text files, line by line, without regard for the line ending characters.
const uint32_t * uipPhrase
Pointer to an array of 32-bit unsigned integers.
#define UTF_32
Data type macro for UTF-32 encoding/decoding.
char caFile[256]
The source code file name where the error occurred. "__FILE__".
#define XTHROW(ctx, msg)
Exception throw macro.
const char * cpType(aint uiId)
Convert an attribute type ID to an ASCII string.
aint uiPushed
The total number of elements pushed onto (added to) the vector.
abool bUtilAstToXml(void *vpAst, char *cpType, const char *cpFileName)
Convert the AST records to XML format.
#define UTF_16BE
Data type macro for UTF-16BE encoding/decoding.
uint_fast32_t aint
The APG parser's unsigned integer type.
aint uiOriginalElements
The initial number of elements allocated to the vector.
aint uiUsedBytes
The current number of bytes in use.
exception * spMemException(void *vpCtx)
Get a pointer to this memory objects's exception handler.
abool bBOM
[in] If true(BOM) prepend a Byte Order Mark, if false(NOBOM) no Byte Order Mark prepended.
#define ID_CAT
concatenation
const char * cpUtilUint32ToStr(void *vpMem, const uint32_t *uipUint, aint uiLen)
Convert an array of 32-bit unsigned integers to a null-terminated ASCII string.
aint uiDataType
[in] One of the encoding type identifiers, UTF_8, etc.
void * vpMemAlloc(void *vpCtx, aint uiBytes)
Allocates memory.
Defines the output data type, location, length and whether or not to preface with a Byte Order Mark (...
#define ID_TRG
terminal range
abool bIsUdt
True if this record is for a UDT.
aint uiReservedBytes
The current number of bytes reserved.
#define UTF_32LE
Data type macro for UTF-32LE encoding/decoding.
The parser's final state.
All the information a user needs to write a custom AST translator.
const char * cpUtilUtfTypeName(aint uiType)
Convert a conversion type identifier to a readable, printable ASCII string. Conversion type identifie...
A structure to describe the type and location of a caught exception.
Defines a pointer to an achar array plus its length. That is, a phrase as is often used by APG.
void vMemFree(void *vpCtx, const void *vpData)
Free memory previously allocated with vpMemAlloc().
#define ID_ACTIVE
indicates active parser state, parser has just entered the node and is moving down the parse tree
aint uiRecordCount
The number of records (two for each node of the AST, one down traversal, one up.)
#define ID_MATCH
indicates a matched phrase parser state on return from parse tree below this node
uintmax_t luint
luint is used to cast integers suitable for the %"PRIuMAX" printf format.
aint uiState
ID_AST_PRE if the current record opens the rule, ID_AST_POST if the current record closes the rule.
void vUtilPrintLine(line *spLine)
Display one line from a line object.
const char * cpMsgsFirst(void *vpCtx)
Get a pointer to the first logged message, if any.
void vUtilIndent(FILE *spFile, aint uiIndent)
Indent by adding the given number of spaces to the output file.
const char * cpMsgsNext(void *vpCtx)
Get a pointer to the next logged message, if any.
aint uiLineIndex
The zero-based line index.
abool bExValidate(exception *spException)
Test an exception structure for validity.
#define UTF_16LE
Data type macro for UTF-16LE encoding/decoding.
Miscellaneous utility functions.
aint uiTextLength
The number of characters in the line, excluding the line end characters.
#define ID_BKA
positive look behind
Defines the characteristics of a single line.
void * vpMemCtor(exception *spException)
Construct a memory component.
aint uiState
One of ID_EMPTY, ID_MATCH or ID_NOMATCH. Note that it is possible for the parser to match a phrase wi...
void vUtilSizes(void)
Display the APG type sizes, the compiler's C-language type sizes and a few max values.
const char * cpUtilAcharToStr(void *vpMem, achar *acpAchar, aint uiLen)
Convert an array of achar characters to a null-terminated ASCII string.
line * spLinesNext(void *vpCtx)
Returns the next line of text from the iterator.
aint uiPopped
The total number of elements popped from (removed from) the vector.
void vConvEncode(void *vpCtx, conv_dst *spDst)
Encode the 32-bit Unicode code points to a byte stream.
#define BASE64_MASK
The base64 mask. And (&) with the type to extract the base64 bit, if any.
void vUtilCharsToAscii(FILE *spFile, const achar *acpChars, aint uiLength)
Convert a string of alphabet characters to printable ASCII.
void vUtilPrintLineu(line_u *spLine)
Display one line from a line_u object.
ast_record * spRecords
The list of records in the order of a depth-first traversal of the AST.
#define ID_TLS
terminal literal string
#define UTF_32BE
Data type macro for UTF-32BE encoding/decoding.
void vUtilPrintVecStats(const vec_stats *spStats)
Display the vector object's statistics.
aint uiGrownElements
The number new elements automatically added to the vector.
line * spLinesFirst(void *vpCtx)
Initialize an iterator over the lines.
#define TYPE_MASK
The type mask. And (&) with the type to extract the type, without the base64 bit.
abool bMemValidate(void *vpCtx)
Validates a memory context.
aint uiLength
The number of characters in the array.
aint uiMaxTreeDepth
The maximum tree depth reached during the parse.
void vLinesDtor(void *vpCtx)
The lines object destructor.
#define APG_MAX_AINT
Since the maximum unsigned integer value is used to indicate Infinite and Undefined values,...
#define ID_BKN
negative look behind
#define UTF_8
Data type macro for UTF-8 encoding/decoding.
aint uiHitCount
The number of nodes visited during the traversal of the parse tree.
aint uiDataLen
[out] Number of bytes in the byte stream.
void vUtilFileWrite(void *vpMem, const char *cpFileName, uint8_t *ucpData, aint uiLen)
Write from the caller's data area to the given file name.
void vUtilApgInfo(void)
Display the current state of apg.h.
uint8_t abool
abool is the APG bool type.
aint uiPhraseLength
Length of the matched phrase.
uint32_t * uipUtilStrToUint32(void *vpMem, const char *cpStr, aint *uipLen)
Convert a null-terminated ASCII string to an array of 32-bit unsigned integers.
#define ID_NOMATCH
indicates that no phrase was matched on return from parse tree below this node
char caMsg[256]
A the caller's error message.
uint32_t uiaLineEnd[3]
the actual string of line ending character(s), if any
char caLineEnd[3]
The actual, null-terminated string of line ending character(s), if any.
achar * acpUtilStrToAchar(void *vpMem, const char *cpStr, aint *uipLen)
Convert a null-terminated ASCII string to an array of achar characters.
aint uiPhraseOffset
The offset into the input string to the first character of the matched phrase.
#define ID_ABG
anchor - beginning of string
aint uiIndex
Index of the rule or UDT of this record.
Defines a pointer to an array of 32-bit unsigned integers plus its length. Typically needed by Unicod...
void vConvDtor(void *vpCtx)
Conversion object destructor.
void vUtilConvertLineEnds(exception *spEx, const char *cpString, const char *cpEnd, const char *cpFileName)
Convert all line ending characters.
apg_phrase * spUtilStrToPhrase(void *vpMem, const char *cpStr)
Convert a null-terminated ASCII string to an apg_phrase.
void vUtilPrintException(exception *spEx)
Prints exception information from an exception structure.
#define ID_TBS
terminal binary string
uint8_t * ucpData
[out] Pointer to the output byte stream. Valid until another function call on the context handle.
void vConvUseCodePoints(void *vpCtx, uint32_t *uipSrc, aint uiSrcLen)
Insert a stream of 32-bit Unicode code points as the intermediate data.
void vUtilCurrentWorkingDirectory(void)
Display the current working directory.
aint uiMaxUsedBytes
The maximum number of bytes used over the lifetime of the vector.
aint uiLineLength
number of Unicode characters in the line, including line end characters
#define BINARY
Alias for ISO_8895_1.
u32_phrase * spUtilStrToPhrase32(void *vpMem, const char *cpStr)
Convert a null-terminated ASCII string to a 32-bit phrase.
void vUtilPrintParserState(parser_state *spState)
Display the parser state in human-readable format to stdout.
const achar * acpPhrase
Pointer to an array of type achar APG alphabet characters.
aint uiCharIndex
The zero-based index of the first character of the line.
abool bMsgsValidate(void *vpCtx)
Validate a msglog context pointer.
aint uiElementSize
The number of bytes in one element.
#define ID_AEN
anchor - end of string
aint uiCharIndex
zero-based index of the first Unicode character of the line
aint uiMaxUsed
The maximum number of elements used during the vector's lifetime.
aint uiLinesCount(void *vpCtx)
Returns the number of lines of text.
APG Version 7.0 is licensed under the
2-Clause BSD License,
an Open Source Initiative Approved License.