Version 7.0
Copyright © 2021 Lowell D. Thomas
APG
… an ABNF Parser Generator
|
Go to the documentation of this file.
78 #include "../../apgex/apgex.h"
82 static const char* cpMakeFileName(
char* cpBuffer,
const char* cpBase,
const char* cpDivider,
const char* cpName){
83 strcpy(cpBuffer, cpBase);
84 strcat(cpBuffer, cpDivider);
85 strcat(cpBuffer, cpName);
89 static char* s_cpDescription =
90 "Illustrate the construction and use of the apgex pattern-matching object.";
92 static char* s_cppCases[] = {
93 "Display application information.",
94 "Illustrate the basic use of the default mode, verifying and matching the parts of an email address.",
95 "Illustrate all the details of an email pattern match.",
96 "Illustrate the apgex object's properties before and after a successful match",
97 "Illustrate pattern-matching in global mode.",
98 "Illustrate pattern-matching in sticky mode.",
99 "Illustrate the trace mode, with and without PPPT, ASCII and HTML displays.",
100 "Illustrate patterns with User-Defined Terminals (UDTs).",
101 "Illustrate simple testing for a matched pattern without detailed results.",
102 "Illustrate using the AST for a complex translation of a recursive pattern.",
103 "Illustrate the replacement of matched phrases with simple phrases and compound phrases.",
104 "Illustrate using matched phrases as delimiters to split a phrase into an array of sub-phrases.",
105 "Illustrate defining word and line boundaries to find words and lines.",
106 "Illustrate extracting quoted and unquoted fields from Comma Separated Value (CSV) records.",
107 "Illustrate the use of patterns with wide characters.",
108 "Illustrate back references, universal and parent modes.",
110 static long int s_iCaseCount = (
long int)(
sizeof(s_cppCases) /
sizeof(s_cppCases[0]));
112 static int iHelp(
void){
115 printf(
"description: %s\n", s_cpDescription);
116 printf(
" usage: ex-api arg\n");
117 printf(
" arg = n, 1 <= n <= %ld\n", s_iCaseCount);
118 printf(
" execute case number n\n");
119 printf(
" arg = anthing else\n");
120 printf(
" print this help screen\n");
122 for(; i < s_iCaseCount; i++){
123 printf(
"case %ld %s\n", (i + 1), s_cppCases[i]);
138 static int iPatterns() {
139 int iReturn = EXIT_SUCCESS;
140 static void* vpMem = NULL;
141 static void* vpApgex = NULL;
142 static void* vpApi = NULL;
143 static void* vpParser = NULL;
144 char* cpEmailGrammar =
145 "email-address = %^ local \"@\" domain %$\n"
146 "local = local-word *(\".\" local-word)\n"
147 "domain = 1*(sub-domain \".\") top-domain\n"
148 "local-word = 1*local-char\n"
149 "local-char = alpha / num / special\n"
150 "sub-domain = 1*sub-domain-char\n"
151 "top-domain = 2*6top-domain-char\n"
152 "sub-domain-char = alpha / num / \"-\"\n"
153 "top-domain-char = alpha\n"
154 "alpha = %d65-90 / %d97-122\n"
156 "special = %d33 / %d35 / %d36-39 / %d42-43 / %d45 / %d47\n"
157 " / %d61 / %d63 / %d94-96 / %d123-126\n";
158 char* cpEmail =
"just.me@my.email.domain.com";
159 char* cpGrammarName =
"email.abnf";
160 char caGrammarBuf[PATH_MAX];
161 const char* cpGrammarFile;
172 cpGrammarFile = cpMakeFileName(caGrammarBuf, SOURCE_DIR,
"/../input/", cpGrammarName);
176 "This example case illustrates the three methods of defining the pattern for\n"
177 "verification of an email address.\n";
178 printf(
"\n%s", cpHeader);
182 printf(
"\nThe email pattern:\n%s", cpEmailGrammar);
183 printf(
"\nThe email to match: %s\n", cpEmail);
186 printf(
"\nvApgexPattern: use an application-defined string to define the pattern.\n");
192 printf(
"\nvApgexPatternFile: use a file to define the pattern.\n");
198 printf(
"\nvApgexPatternParser: use a pre-constructed parser to define the pattern.\n");
209 iReturn = EXIT_FAILURE;
220 static int iResults() {
221 int iReturn = EXIT_SUCCESS;
222 static void* vpMem = NULL;
223 static void* vpApgex = NULL;
224 static void* vpApi = NULL;
225 char* cpEmail =
"just.me@my.email.domain.com";
226 char* cpEmail2 =
"This email address is a fake just.me@my.email.domain.com so don't share it with anyone.";
227 char* cpGrammarName =
"email.abnf";
228 char caGrammarBuf[PATH_MAX];
229 const char* cpGrammarFile;
240 cpGrammarFile = cpMakeFileName(caGrammarBuf, SOURCE_DIR,
"/../input/", cpGrammarName);
244 "This example case illustrates details of the pattern-matching result.\n"
245 "Three cases show minimal, partial and full rule results.\n";
246 printf(
"\n%s", cpHeader);
251 printf(
"\nThe email pattern:\n%s",
cpApiInFile(vpApi, cpGrammarFile));
252 printf(
"\nThe email to match: %s\n", cpEmail);
255 printf(
"\nMinimal result: By default the result only shows the full pattern match.\n");
261 printf(
"\nIntermediate result: Display the sub-phrases for a few select rules.\n");
267 printf(
"\nFull result: Display the sub-phrases for all rules.\n");
274 printf(
"\n Embedded phrase: Find the pattern in a longer string. Display left and right context\n");
275 printf(
"The embedded phrase: %s\n", cpEmail2);
285 iReturn = EXIT_FAILURE;
295 static int iProperties() {
296 int iReturn = EXIT_SUCCESS;
297 static void* vpMem = NULL;
298 static void* vpApgex = NULL;
299 static void* vpApi = NULL;
300 char* cpEmailGood =
"This, just.me@my.email.domain.com, is an email address.";
301 char* cpEmailBad =
"Not an email address.";
302 char* cpGrammarName =
"email.abnf";
303 char caGrammarBuf[PATH_MAX];
304 const char* cpGrammarFile;
316 cpGrammarFile = cpMakeFileName(caGrammarBuf, SOURCE_DIR,
"/../input/", cpGrammarName);
320 "This example case illustrates the pattern-matching properties.\n"
321 "Properties are illustrated before a pattern match, after a successful match\n"
322 "and after an unsuccessful match.\n";
323 printf(
"\n%s", cpHeader);
326 printf(
"\nProperties before the pattern match:\n");
332 printf(
"\nProperties after successful pattern match:\n");
340 printf(
"\nProperties after an unsuccessful pattern match:\n");
354 iReturn = EXIT_FAILURE;
364 static int iGlobalMode() {
365 int iReturn = EXIT_SUCCESS;
366 static void* vpMem = NULL;
367 static void* vpApgex = NULL;
368 char* cpGrammar =
"word = %s\"abc\"\n";
369 char* cpStr =
"Learn your abcs by repeating, abc, abc, abc over an over again.";
381 "This example case illustrates pattern-matching in global mode.\n"
382 "Setting the global flag \"g\" facilitates finding all occurrences of a phrase in a sting.\n";
383 printf(
"\n%s", cpHeader);
384 printf(
"\nThe Grammar\n");
385 printf(
"%s\n", cpGrammar);
386 printf(
"The Input String\n");
387 printf(
"%s\n", cpStr);
390 printf(
"\nFind all occurrences of the grammar phrase in the input string.\n");
403 iReturn = EXIT_FAILURE;
412 static int iStickyMode() {
413 int iReturn = EXIT_SUCCESS;
414 static void* vpMem = NULL;
415 static void* vpApgex = NULL;
416 char* cpGrammar =
"word = %s\"abc\"\n";
417 char* cpStr =
"Learn your abcs by repeating, abcabcabc over an over again.";
429 "This example case illustrates pattern-matching in sticky mode.\n"
430 "Setting the sticky flag \"y\" facilitates finding only occurrences\n"
431 "at a fixed position in the input string. Additionally, it can find consecutive\n"
432 "occurrences as long as there are no characters in between.\n";
433 printf(
"\n%s", cpHeader);
434 printf(
"\nThe Grammar\n");
435 printf(
"%s\n", cpGrammar);
436 printf(
"The Input String\n");
437 printf(
"%s\n", cpStr);
440 printf(
"\nAttempt finding a match at the beginning of the string.\n");
447 printf(
"\nFind a match at a set position.\n");
453 printf(
"\nFind consecutive phrases.\n");
465 iReturn = EXIT_FAILURE;
474 static int iTraceMode() {
475 int iReturn = EXIT_SUCCESS;
476 static void* vpMem = NULL;
477 static void* vpApgex = NULL;
478 char* cpGrammar =
"word = \"abc\" / \"xyz\"\n";
479 char* cpStr =
"---xyz---";
480 const char* cpOutput;
481 char caOutputBuf[PATH_MAX];
490 cpOutput = cpMakeFileName(caOutputBuf, SOURCE_DIR,
"/../output/",
"trace.html");
494 "This example case illustrates the trace mode.\n"
495 "Setting the trace flag \"t\" will generate a trace of each phrase-matching attempt.\n"
496 "By default, the display will be in ASCII mode and displayed to stdout.\n"
497 "With the PPPT flag \"p\" set the PPPT trace can be compared to the previous without.\n"
498 "Finally, with the \"th\" flags set, the trace will be generated in HTML format.\n";
499 printf(
"\n%s", cpHeader);
500 printf(
"\nThe Grammar\n");
501 printf(
"%s\n", cpGrammar);
502 printf(
"The Input String\n");
503 printf(
"%s\n", cpStr);
506 printf(
"\nTrace of all phrase-matching attempts.\n");
511 printf(
"\nCompare trace with PPPT to previous without PPPT.\n");
515 printf(
"\nDisplay trace in HTML format.\n");
516 printf(
"Display %s in any browser.\n",cpOutput);
520 XTHROW(&e,
"should have a trace context pointer here");
529 iReturn = EXIT_FAILURE;
545 achar acPrevChar1 = 0;
546 achar acPrevChar2 = 0;
547 achar acPrevChar3 = 0;
552 if((acpChar +3) < acpEnd){
553 if(*acpChar++ !=
LT){
556 if(*acpChar++ !=
BANG){
566 while(acpChar < acpEnd){
579 acPrevChar3 = acPrevChar2;
580 acPrevChar2 = acPrevChar1;
581 acPrevChar1 = acChar;
590 int iReturn = EXIT_SUCCESS;
591 static void* vpMem = NULL;
592 static void* vpApgex = NULL;
594 "tags = start-tag text end-tag\n"
597 "start-tag = %d60 name %d62\n"
598 "end-tag = %d60.47 name %d62\n"
599 "empty-tag = %d60 name %d47.62\n"
600 "name = %d97-122 *(%d97-122 / %d48-57)\n"
601 "text = *%d97-122\n";
602 char* cpStr =
"<start>text</start> <!-- comment --> <empty/>";
614 "This example case illustrates the patterns with User-Defined Terminals, UDTs.\n"
615 "A simple mockup of XML tags is used with a UDT for the comment tag.\n";
616 printf(
"\n%s", cpHeader);
617 printf(
"\nThe Grammar\n");
618 printf(
"%s\n", cpGrammar);
619 printf(
"The Input String\n");
620 printf(
"%s\n", cpStr);
623 printf(
"\nFind all tags.\n");
638 iReturn = EXIT_FAILURE;
647 int iReturn = EXIT_SUCCESS;
648 static void* vpMem = NULL;
649 static void* vpApgex = NULL;
650 char* cpGrammar =
"word = %s\"abc\"\n";
651 char* cpStr =
"Learn your abcs by repeating, abcabcabc over an over again.";
665 "This example case illustrates testing for a match.\n"
666 "The modes are treated exactly the same as for executing a phrase match.\n"
667 "The difference is that testing only gives a yes or no answer. The matched phrase is not captured.\n";
668 printf(
"\n%s", cpHeader);
669 printf(
"\nThe Grammar\n");
670 printf(
"%s\n", cpGrammar);
671 printf(
"The Input String\n");
672 printf(
"%s\n", cpStr);
676 printf(
"\nTest default mode at the beginning of the string.\n");
679 printf(
"test = %s\n", (bTest ? cpYes : cpNo));
682 printf(
"\nTest global mode for all successes.\n");
685 printf(
"test = %s\n", (bTest ? cpYes : cpNo));
688 printf(
"test = %s\n", (bTest ? cpYes : cpNo));
692 printf(
"\nTest sticky mode at the beginning of the string.\n");
695 printf(
"test = %s\n", (bTest ? cpYes : cpNo));
698 printf(
"\nTest sticky mode at the beginning of a pattern.\n");
701 printf(
"test = %s\n", (bTest ? cpYes : cpNo));
704 printf(
"\nTest sticky mode for consecutive patterns.\n");
705 printf(
"\nFind consecutive phrases.\n");
708 printf(
"test = %s\n", (bTest ? cpYes : cpNo));
711 printf(
"test = %s\n", (bTest ? cpYes : cpNo));
718 iReturn = EXIT_FAILURE;
739 printf(
"HTML translating...\n");
759 if(cChar >= 65 && cChar <= 90){
776 for(; ui < spPhrase->
uiLength; ui++){
777 char cChar = (char)spPhrase->
acpPhrase[ui];
778 if(cChar >= 65 && cChar <= 90){
797 int iReturn = EXIT_SUCCESS;
798 static void* vpMem = NULL;
799 static void* vpAst = NULL;
800 static void* vpParser = NULL;
801 static void* vpApgex = NULL;
803 "html = (open html close) / (open close)\n"
804 "open = %d60 name %d62\n"
805 "close = %d60.47 name %d62\n"
806 "name = alpha *alphanum\n"
807 "alpha = %d97-122 / %d65-90\n"
808 "alphanum = alpha / %d48-57\n";
809 char* cpStr =
"<h1><P></Q></H2>";
822 "This example case illustrates the use of the AST for a complex translation of the matched phrase.\n"
823 "The pattern matches HTML-like tags. The translation will normalize the tag names.\n"
824 "Convert to lower case and match closing names to opening names.\n"
825 "Incidentally, this also illustrates matching of recursive patterns.\n";
826 printf(
"\n%s", cpHeader);
827 printf(
"\nThe Grammar\n");
828 printf(
"%s\n", cpGrammar);
829 printf(
"The Input String\n");
830 printf(
"%s\n", cpStr);
834 printf(
"\nTranslate the matched phrase\n");
838 XTHROW(&e,
"pattern match failed");
856 iReturn = EXIT_FAILURE;
870 size_t uiBufSize = 1024;
872 n += snprintf(&caBuf[n], (uiBufSize - n),
"\nReplaced matched phrase with custom stuff. ");
873 n += snprintf(&caBuf[n], (uiBufSize - n),
"\nI have access to the results (node hits = %"PRIuMAX
") and ",
875 n += snprintf(&caBuf[n], (uiBufSize - n),
"the properties (default mode = %s.)\n", (spProperties->
bDefaultMode ? cpYes : cpNo));
879 static int iReplace() {
880 int iReturn = EXIT_SUCCESS;
881 static void* vpMem = NULL;
882 static void* vpApgex = NULL;
887 char* cpStr =
"-abc-xyz-";
888 char* cpSimple =
"555";
889 char* cpEscape =
"$$";
890 char* cpLeft =
"($`)";
891 char* cpRight =
"($')";
892 char* cpSelf =
"($&)";
893 char* cpRulea =
"($<abc>)";
894 char* cpRulex =
"($<xyz>)";
907 "This example case illustrates the replacement of matched phrases with simple phrases and compound phrases.\n"
908 "Simple replacement simply replaces the matched phrase with a specified phrase.\n"
909 "Compound replacement uses various parts of the result for the replacement phrase.\n";
910 printf(
"\n%s", cpHeader);
911 printf(
"\nThe Grammar\n");
912 printf(
"%s\n", cpGrammar);
913 printf(
"The Input String\n");
914 printf(
"%s\n", cpStr);
918 printf(
"\nSimple replacement in default mode\n");
923 XTHROW(&e,
"replacement failed");
925 printf(
"Replace matched phrases with %s\n", cpSimple);
926 printf(
"The matched phrases with replacements:\n%s\n",
cpUtilPhraseToStr(vpMem, &sReplaced));
928 printf(
"\nSimple replacement in global mode\n");
933 XTHROW(&e,
"replacement failed");
935 printf(
"Replace matched phrases with %s\n", cpSimple);
936 printf(
"The matched phrases with replacements:\n%s\n",
cpUtilPhraseToStr(vpMem, &sReplaced));
944 XTHROW(&e,
"replacement failed");
946 printf(
"Replace matched phrases with %s - escape character\n", cpEscape);
947 printf(
"The matched phrases with replacements:\n%s\n",
cpUtilPhraseToStr(vpMem, &sReplaced));
952 XTHROW(&e,
"replacement failed");
954 printf(
"Replace matched phrases with %s - left context\n", cpLeft);
955 printf(
"The matched phrases with replacements:\n%s\n",
cpUtilPhraseToStr(vpMem, &sReplaced));
960 XTHROW(&e,
"replacement failed");
962 printf(
"Replace matched phrases with %s - self, the matched phrase\n", cpSelf);
963 printf(
"The matched phrases with replacements:\n%s\n",
cpUtilPhraseToStr(vpMem, &sReplaced));
968 XTHROW(&e,
"replacement failed");
970 printf(
"Replace matched phrases with %s - the right context\n", cpRight);
971 printf(
"The matched phrases with replacements:\n%s\n",
cpUtilPhraseToStr(vpMem, &sReplaced));
976 XTHROW(&e,
"replacement failed");
978 printf(
"Replace matched phrases with %s - the rule 'abc'\n", cpRulea);
979 printf(
"The matched phrases with replacements:\n%s\n",
cpUtilPhraseToStr(vpMem, &sReplaced));
984 XTHROW(&e,
"replacement failed");
986 printf(
"Replace matched phrases with %s - the rule 'xyz'\n", cpRulex);
987 printf(
"The matched phrases with replacements:\n%s\n",
cpUtilPhraseToStr(vpMem, &sReplaced));
991 XTHROW(&e,
"replacement failed");
993 printf(
"Replace matched phrases with custom replacement function.\n");
994 printf(
"The matched phrases with replacements:\n%s\n",
cpUtilPhraseToStr(vpMem, &sReplaced));
1000 iReturn = EXIT_FAILURE;
1009 static int iSplit() {
1010 int iReturn = EXIT_SUCCESS;
1011 static void* vpMem = NULL;
1012 static void* vpApgex = NULL;
1013 char* cpPatternSep =
"sep = *%d32 \";\" *%d32\n";
1014 char* cpPatternEmpty =
"sep = \"\"\n";
1015 char* cpPatternLetters =
"letters = 1*%d97-122\n";
1017 char caStrBuf[1024];
1031 "This example case illustrates the using matched phrases as separators to split a phrase into an array of sub-phrases.\n";
1032 printf(
"\n%s", cpHeader);
1035 printf(
"\nThe Separator Pattern\n");
1036 printf(
"%s\n", cpPatternSep);
1037 printf(
"The Input Phrase\n");
1038 cpStr =
"one ; two;three";
1039 printf(
"'%s'\n", cpStr);
1042 spArray =
spApgexSplit(vpApgex, spPhrase, 0, &uiCount);
1044 XTHROW(&e,
"split failed");
1046 printf(
"\nArray of sub-phrases - split at separators\n");
1047 for(ui = 0; ui < uiCount; ui++){
1049 printf(
"index: %d: phrase: %s\n", (
int)ui, cpSub);
1053 printf(
"\nThe Separator Pattern\n");
1054 printf(
"%s\n", cpPatternSep);
1055 printf(
"The Input Phrase - separator is entire string, array is empty\n");
1057 printf(
"'%s'\n", cpStr);
1060 spArray =
spApgexSplit(vpApgex, spPhrase, 0, &uiCount);
1062 XTHROW(&e,
"split failed");
1064 printf(
"\nArray of sub-phrases - separator is entire string, array is empty\n");
1067 printf(
"\nThe Separator Pattern\n");
1068 printf(
"%s\n", cpPatternSep);
1069 printf(
"The Input Phrase\n");
1071 printf(
"'%s'\n", cpStr);
1074 spArray =
spApgexSplit(vpApgex, spPhrase, 0, &uiCount);
1076 XTHROW(&e,
"split failed");
1078 printf(
"\nArray of sub-phrases - no separators, array is the whole string\n");
1079 for(ui = 0; ui < uiCount; ui++){
1081 printf(
"index: %d: phrase: %s\n", (
int)ui, cpSub);
1085 printf(
"\nThe Separator Pattern\n");
1086 printf(
"%s\n", cpPatternEmpty);
1087 printf(
"The Input Phrase\n");
1089 printf(
"'%s'\n", cpStr);
1092 spArray =
spApgexSplit(vpApgex, spPhrase, 0, &uiCount);
1094 XTHROW(&e,
"split failed");
1096 printf(
"\nArray of sub-phrases - separate into individual characters\n");
1097 for(ui = 0; ui < uiCount; ui++){
1099 printf(
"index: %d: phrase: %s\n", (
int)ui, cpSub);
1103 printf(
"\nThe Separator Pattern\n");
1104 printf(
"%s\n", cpPatternLetters);
1105 printf(
"The Input Phrase\n");
1106 cpStr =
"123abc4d56e";
1107 printf(
"'%s'\n", cpStr);
1110 spArray =
spApgexSplit(vpApgex, spPhrase, 0, &uiCount);
1112 XTHROW(&e,
"split failed");
1114 printf(
"\nArray of sub-phrases - letters as separators\n");
1115 for(ui = 0; ui < uiCount; ui++){
1117 printf(
"index: %d: phrase: %s\n", (
int)ui, cpSub);
1125 iReturn = EXIT_FAILURE;
1134 static int iBoundaries() {
1135 int iReturn = EXIT_SUCCESS;
1136 static void* vpMem = NULL;
1137 static void* vpApgex = NULL;
1138 char* cpWordPattern =
1139 "word-to-find = abw \"cat\" aew\n"
1140 "word-char = %d65-90/%d97-122\n"
1141 "abw = (!!word-char / %^) ; define word beginning\n"
1142 "aew = (!word-char / %$) ; define word end\n";
1143 char* cpLinePattern =
1144 "phrase-to-find = abl \"The \" animal \" in the hat.\" ael\n"
1145 "animal = \"cat\" / \"dog\" / \"bird\" / \"mouse\"\n"
1146 "line-end = %d13.10 / %d10 / %d13\n"
1147 "abl = (&&line-end / %^) ; define line beginning\n"
1148 "ael = (&line-end / %$) ; define line end\n";
1149 char* cpWordStr =
"Cat - a Bobcat is a cat but a caterpillar is not a cat.";
1151 "The cat in the hat.\n"
1152 "The dog in the hat.\r\n"
1153 "The bird in the hat.\r"
1154 "The dog is not in the hat.\n"
1155 "The cat in the hat is black.\n"
1156 "The mouse in the hat.";
1168 "This example case illustrates the definition and use of word and line boundaries.\n"
1169 "Unlike most \"regex\" engines, apgex makes no assumptions about what constitutes\n"
1170 "a word or line boundary. Nonetheless it is very easy, using look around and anchors,\n"
1171 "to define word and line boundaries according to the needs of the problem at hand.\n";
1172 printf(
"\n%s", cpHeader);
1175 printf(
"\nFind Words\n");
1176 printf(
"%s\n", cpWordPattern);
1177 printf(
"The Input Phrase\n");
1178 printf(
"'%s'\n", cpWordStr);
1188 printf(
"\nFind Lines\n");
1189 printf(
"%s\n", cpLinePattern);
1190 printf(
"The Input Phrase\n");
1191 printf(
"'%s'\n", cpLineStr);
1204 iReturn = EXIT_FAILURE;
1214 static char s_cZero = 0;
1229 while(acpChar < acpEnd){
1230 cChar = (char)*acpChar++;
1235 printf(
"[%-15s]", cpStr);
1247 printf(
"[%-15s]", cpStr);
1256 char cChar = (char)*acpChar;
1272 int iReturn = EXIT_SUCCESS;
1273 static void* vpMem = NULL;
1274 static void* vpVec = NULL;
1275 static void* vpAst = NULL;
1276 static void* vpParser = NULL;
1277 static void* vpApgex = NULL;
1278 char* cpCSVPattern =
1279 "; the record and field formats from RFC4180\n"
1280 "; slightly modified for easier phrase capture and replacement\n"
1281 "record = field *(COMMA field) [CRLF]\n"
1282 "field = (escaped / non-escaped)\n"
1283 "escaped = LQUOTE *(text / DDQUOTE) RQUOTE\n"
1284 "text = TEXTDATA / COMMA / CR / LF\n"
1286 "non-escaped = *TEXTDATA\n"
1292 "CRLF = CR LF / LF / CR ; modified from RFC4180 to include all forms of line ends\n"
1293 "TEXTDATA = %x20-21 / %x23-2B / %x2D-7E\n";
1295 "ITEM,DESCRIPTION,VALUE\n"
1296 "Cup,\"coffee,tea,etc\",$10.00\n"
1297 "Camero,Sedan,\"$25,000\"\n"
1298 "Empty Desc.,,\"$0,000\"\n"
1299 "Junker,empty price,\n"
1300 "Aston Martin,\"$316,300\",\"He said, \"\"That's way too much moola, man.\"\"\"\n";
1303 aint uiBufSize = 1024;
1305 const char* cpaStrings[10];
1319 "This example case illustrates the use of apgex for extracting the values from\n"
1320 "Comma Separated Value (CSV) formatted data. There seems to be no standard format\n"
1321 "but the field format used here is from RFC 4180. For comparison to \"regex\"\n"
1322 "see the solution for the similar Microsoft format described in Jeffrey Friedl's\n"
1323 "book \"Mastering Regular Expressions\", O'Reilly, 2006, pg. 213.\n";
1324 printf(
"\n%s", cpHeader);
1326 printf(
"\nThe Pattern\n");
1327 printf(
"%s\n", cpCSVPattern);
1328 printf(
"\nThe CSV File\n");
1329 printf(
"%s\n", cpFileStr);
1332 printf(
"\nDisplay the raw fields in each record.\n");
1342 printf(
"[%-15s]", cpaStrings[ui]);
1349 printf(
"\nExtract unquoted fields.\n");
1350 printf(
"Brute Force\n");
1363 if(uiLen >= uiBufSize){
1364 XTHROW(&e,
"buffer size too small for field conversion");
1367 caBuf[uiNewLen] = 0;
1369 for(uj = 1; uj < (uiLen - 1); uj++){
1371 caBuf[uiNewLen++] =
DQUOTE;
1374 caBuf[uiNewLen++] = cpStr[uj];
1377 caBuf[uiNewLen++] = 0;
1379 printf(
"[%-15s]", caBuf);
1381 printf(
"[%-15s]", cpStr);
1390 printf(
"\nExtract unquoted fields.\n");
1391 printf(
"AST Translation\n");
1392 vpVec =
vpVecCtor(vpMem,
sizeof(
char), 1024);
1414 iReturn = EXIT_FAILURE;
1423 static int iWide() {
1424 int iReturn = EXIT_SUCCESS;
1425 static void* vpMem = NULL;
1426 static void* vpFmt = NULL;
1427 static void* vpApgex = NULL;
1429 "word = 1*%x13A0-13F4\n";
1432 const char* cpInput;
1433 char caInputBuf[PATH_MAX];
1435 uint8_t ucaBuf[1024];
1436 aint uiBufSize = 1024;
1449 "This example case illustrates patterns with Unicode UTF-32 characters.\n"
1450 "The pattern will match Cherokee words in 32-bit UTF-32 format.\n";
1451 printf(
"\n%s", cpHeader);
1454 if(
sizeof(
achar) != 4){
1455 XTHROW(&e,
"recompile with APG_ACHAR=32, sizeof(achar) must = 4");
1459 cpInput = cpMakeFileName(caInputBuf, SOURCE_DIR,
"/../input/",
"cherokee.utf32be");
1461 cpInput = cpMakeFileName(caInputBuf, SOURCE_DIR,
"/../input/",
"cherokee.utf32le");
1465 if(uiSize > uiBufSize){
1466 XTHROW(&e,
"buffer size too small for input file");
1470 printf(
"\nThe Cherokee Word Pattern\n");
1471 printf(
"%s\n", cpCherokee);
1473 printf(
"The Input Phrase\n");
1474 cpLine =
cpFmtFirstUnicode(vpFmt, (uint32_t*)ucaBuf, (uint64_t)(uiSize / 4), 0, 0);
1476 printf(
"%s", cpLine);
1480 printf(
"\nThe Cherokee Words\n");
1489 printf(
"%s", cpLine);
1499 iReturn = EXIT_FAILURE;
1509 static int iBackReference() {
1510 int iReturn = EXIT_SUCCESS;
1511 static void* vpMem = NULL;
1512 static void* vpApgex = NULL;
1514 "pattern = %^ tag %$\n"
1515 "tag = (open tag close) / (open close)\n"
1517 "alpha = %d97-122 / %d65-90\n"
1518 "open = %d60 name %d62\n"
1519 "close = %d60.47 \\name %d62\n";
1521 "pattern = %^ tag %$\n"
1522 "tag = (open tag close) / (open close)\n"
1524 "alpha = %d97-122 / %d65-90\n"
1525 "open = %d60 name %d62\n"
1526 "close = %d60.47 \\%s%uname %d62\n";
1528 "pattern = %^ tag %$\n"
1529 "tag = (open tag close) / (open close)\n"
1531 "alpha = %d97-122 / %d65-90\n"
1532 "open = %d60 name %d62\n"
1533 "close = %d60.47 \\%pname %d62\n";
1535 "pattern = %^ tag %$\n"
1536 "tag = (open tag close) / (open close)\n"
1538 "alpha = %d97-122 / %d65-90\n"
1539 "open = %d60 name %d62\n"
1540 "close = %d60.47 \\%p%sname %d62\n";
1541 char* cpTagsUI =
"<div><span></SPAN></SPAN>";
1542 char* cpTagsUS =
"<div><span></span></span>";
1543 char* cpTagsPI =
"<div><span></SPAN></DIV>";
1544 char* cpTagsPS =
"<div><span></span></div>";
1545 apg_phrase* spPhraseUI, *spPhraseUS, *spPhrasePI, *spPhrasePS;
1556 "This example case illustrates back references in both \"universal\" and \"parent\" modes.\n"
1557 "The patterns match XML-like tags. The strings have both matching and non-matching node names.\n"
1558 "The opening and closing tags have both case-sensitive and case-insensitive corresponding names.\n"
1559 "Phrase-matching results are shown for all possible combinations, illustrating the differences\n"
1560 "between the different modes and case sensitivities.\n"
1561 "Note that, due to the begin-or-string and end-of-string anchors,\n"
1562 "the patterns require that the entire source phrase must be matched.\n";
1563 printf(
"\n%s", cpHeader);
1569 printf(
"\nUniversal I: universal mode, case insenstive pattern\n");
1570 printf(
"%s", cpPatternUI);
1571 printf(
"\nUniversal S: universal mode, case senstive pattern\n");
1572 printf(
"%s", cpPatternUS);
1573 printf(
"\nParent I: parent mode, case insenstive pattern\n");
1574 printf(
"%s", cpPatternPI);
1575 printf(
"\nParent S: parent mode, case senstive pattern\n");
1576 printf(
"%s", cpPatternPS);
1579 printf(
"\n%-12s %-26s %-10s\n",
"grammar",
"source",
"result");
1582 printf(
"%-12s %-26s %-10s\n",
"Universal I", cpTagsUI,
cpUtilTrueFalse(bResult));
1584 printf(
"%-12s %-26s %-10s\n",
"Universal I", cpTagsUS,
cpUtilTrueFalse(bResult));
1586 printf(
"%-12s %-26s %-10s\n",
"Universal I", cpTagsPI,
cpUtilTrueFalse(bResult));
1588 printf(
"%-12s %-26s %-10s\n",
"Universal I", cpTagsPS,
cpUtilTrueFalse(bResult));
1594 printf(
"%-12s %-26s %-10s\n",
"Universal S", cpTagsUI,
cpUtilTrueFalse(bResult));
1596 printf(
"%-12s %-26s %-10s\n",
"Universal S", cpTagsUS,
cpUtilTrueFalse(bResult));
1598 printf(
"%-12s %-26s %-10s\n",
"Universal S", cpTagsPI,
cpUtilTrueFalse(bResult));
1600 printf(
"%-12s %-26s %-10s\n",
"Universal S", cpTagsPS,
cpUtilTrueFalse(bResult));
1606 printf(
"%-12s %-26s %-10s\n",
"Parent I", cpTagsUI,
cpUtilTrueFalse(bResult));
1608 printf(
"%-12s %-26s %-10s\n",
"Parent I", cpTagsUS,
cpUtilTrueFalse(bResult));
1610 printf(
"%-12s %-26s %-10s\n",
"Parent I", cpTagsPI,
cpUtilTrueFalse(bResult));
1612 printf(
"%-12s %-26s %-10s\n",
"Parent I", cpTagsPS,
cpUtilTrueFalse(bResult));
1618 printf(
"%-12s %-26s %-10s\n",
"Parent S", cpTagsUI,
cpUtilTrueFalse(bResult));
1620 printf(
"%-12s %-26s %-10s\n",
"Parent S", cpTagsUS,
cpUtilTrueFalse(bResult));
1622 printf(
"%-12s %-26s %-10s\n",
"Parent S", cpTagsPI,
cpUtilTrueFalse(bResult));
1624 printf(
"%-12s %-26s %-10s\n",
"Parent S", cpTagsPS,
cpUtilTrueFalse(bResult));
1631 iReturn = EXIT_FAILURE;
1650 iCase = atol(argv[1]);
1652 if((iCase > 0) && (iCase <= s_iCaseCount)){
1653 printf(
"%s\n", s_cppCases[iCase -1]);
1663 return iProperties();
1665 return iGlobalMode();
1667 return iStickyMode();
1669 return iTraceMode();
1681 return iBoundaries();
1687 return iBackReference();
void vTraceSetOutput(void *vpCtx, const char *cpFileName)
aint uiNodeHits
The number of parser node hits.
#define XCTOR(e)
This macro will initialize an exception structure and prepare entry to the "try" block.
apgex_phrase * spPhrases
The list of matched phrases. Any given rule or UDT may have multiple matched sub-phrases.
const char * cpUtilPhraseToStr(void *vpMem, apg_phrase *spPhrase)
Convert an apg_phrase to a null-terminated ASCII string.
void vUtilFileRead(void *vpMem, const char *cpFileName, uint8_t *ucpData, aint *uipLen)
Read a file into the caller's data area.
aint uiPhraseCount
The number of matched sub-phrases for this rule/UDT.
apg_phrase sApgexReplace(void *vpCtx, apg_phrase *spSource, apg_phrase *spReplacement)
Replace the matched phrase with a specified phrase.
void vMemDtor(void *vpCtx)
Destroys a Memory component. Frees all memory allocated.
abool try
True for the try block, false for the catch block.
void vApgexPatternFile(void *vpCtx, const char *cpFileName, const char *cpFlags)
Reads the SABNF grammar defining the pattern from a file.
Detailed information about the apgex object after vApgexPattern() has been called.
void vApiInClear(void *vpCtx)
Clears the input and related memory.
void vAstSetRuleCallback(void *vpCtx, aint uiRuleIndex, ast_callback pfnCallback)
Define a callback function for a single rule on the AST.
void vApgexDefineUDT(void *vpCtx, const char *cpName, parser_callback pfnUdt)
Define the callback function for a User-Defined Terminal (UDT).
aint uiParserOffset
[read only] Offset from acpString to the first character to match
apgex_result sApgexExec(void *vpCtx, apg_phrase *spSource)
Attempt a pattern match on the source array of APG alphabet characters.
const achar * acpString
[read only] Pointer to the input sub-string,
#define ID_AST_POST
indicates post-node-traversal AST callback state (up the tree)
void * vpApiCtor(exception *spEx)
Construct an API component context (object).
aint uiParserState
[read only] ID_ACTIVE if the parser is going down the tree. ID_MATCH or ID_NOMATCH if coming up the t...
aint uiParserRuleLookup(void *vpCtx, const char *cpRuleName)
Find the rule index corresponding to a rule name.
#define ID_AST_OK
normal AST callback function return
#define ID_AST_PRE
indicates pre-node-traversal AST callback state (down the tree)
Input data to the AST callback functions.
uint_fast8_t achar
achar is the type for the parser's alphabet characters.
void vApgexPattern(void *vpCtx, const char *cpPattern, const char *cpFlags)
Prepare a phrase-matching parser for the given pattern.
The phrase matching results.
void vAstTranslate(void *vpCtx, void *vpUserData)
Do a depth-first traversal of the AST with user-defined callback functions to translate the AST recor...
void vApiDtor(void *vpCtx)
The API component destructor.
const char * cpUtilTrueFalse(luint luiTrue)
Return a human-readable string version of the given value in its true/false sense.
abool bDefaultMode
True if the cpFlags parameter in vApgexPattern() is NULL or empty.
#define XTHROW(ctx, msg)
Exception throw macro.
void * vpVecPop(void *vpCtx)
Pops one element from the end of the array.
uint_fast32_t aint
The APG parser's unsigned integer type.
apgex_phrase * spResult
The matched phrase. NULL if no match.
aint uiLastIndex
The index of the character in the input string where the attempted pattern match begins....
void vApiFile(void *vpCtx, const char *cpFileName, abool bStrict, abool bPppt)
Quicky way to generate a parser from a grammar file.
int main(int argc, char **argv)
The executable from this main function is the ABNF Parser Generator application, APG.
aint uiCallbackState
[input/output] Rule name (RNM) callback functions: If ID_ACTIVE, the parser takes no action....
void * vpVecCtor(void *vpMem, aint uiElementSize, aint uiInitialAlloc)
The vector object constructor.
void * vpApgexCtor(exception *spEx)
The phrase-matching engine object constructor.
A structure to describe the type and location of a caught exception.
aint uiLastIndex
The last index following the last pattern match attempt.
Defines a pointer to an achar array plus its length. That is, a phrase as is often used by APG.
void vMemFree(void *vpCtx, const void *vpData)
Free memory previously allocated with vpMemAlloc().
#define ID_ACTIVE
indicates active parser state, parser has just entered the node and is moving down the parse tree
void * vpApgexGetParser(void *vpCtx)
Get a pointer to the parser object's context.
aint uiCallbackPhraseLength
[input/output] The phrase length of the matched phrase if the callback function returns ID_MATCH.
#define ID_MATCH
indicates a matched phrase parser state on return from parse tree below this node
uintmax_t luint
luint is used to cast integers suitable for the %"PRIuMAX" printf format.
void * vpApiOutputParser(void *vpCtx)
Generate a parser object directly from the specified SABNF grammar.
void * vpApgexGetTrace(void *vpCtx)
Get a pointer to the trace object's context.
The data struct passed to each callback function.
void vApgexDisplayResult(void *vpCtx, apgex_result *spResult, const char *cpFileName)
Display the complete results from a pattern match.
void vApgexDisplayProperties(void *vpCtx, apgex_properties *spProperties, const char *cpFileName)
Display the object's properties.
void * vpVecFirst(void *vpCtx)
Get the first element one the vector. The vector is not altered.
apg_phrase * spApgexSplit(void *vpCtx, apg_phrase *spSource, aint uiLimit, aint *uipCount)
Split a phrase into an array of sub-phrases.
void * vpMemCtor(exception *spException)
Construct a memory component.
const char * cpApiInFile(void *vpCtx, const char *cpFileName)
Reads an SABNF grammar byte stream from a file.
aint uiLength
The number of characters in the array.
void vApgexSetLastIndex(void *vpCtx, aint uiLastIndex)
Sets the index of the character in the source where the pattern-match search is to begin.
void vParserDtor(void *vpCtx)
Clears the parser component's context and frees all heap memory associated with this parser.
void vApgexDisplayPatternErrors(void *vpCtx, const char *cpFileName)
void vUtilApgInfo(void)
Display the current state of apg.h.
uint8_t abool
abool is the APG bool type.
void vApgexDtor(void *vpCtx)
The phrase-matching engine object destructor.
#define ID_NOMATCH
indicates that no phrase was matched on return from parse tree below this node
Information about each rule or UDT in the SABNF pattern.
void vApgexEnableRules(void *vpCtx, const char *cpNames, abool bEnable)
Enable or disable specified rule and/or UDT names for phrase capture.
void vApgexPatternParser(void *vpCtx, void *vpParser, const char *cpFlags)
Define the SABNF pattern with a user-created parser.
apgex_properties sApgexProperties(void *vpCtx)
Get a copy of the object's properties.
apg_phrase sApgexReplaceFunc(void *vpCtx, apg_phrase *spSource, pfn_replace pfnFunc, void *vpUser)
Replace the matched phrase with a user-generated phrase.
apg_phrase sPhrase
The matched phrase.
abool bApgexTest(void *vpCtx, apg_phrase *spSource)
Report only success or failure on a pattern match.
void * vpApgexGetAst(void *vpCtx)
Get a pointer to the AST object's context.
aint uiStringLength
[read only] The input string length.
apg_phrase * spUtilStrToPhrase(void *vpMem, const char *cpStr)
Convert a null-terminated ASCII string to an apg_phrase.
void vUtilPrintException(exception *spEx)
Prints exception information from an exception structure.
void * vpVecPush(void *vpCtx, void *vpElement)
Adds one element to the end of the array.
apgex_rule * spRules
The phrases matched by all enabled rules and/or UDTs. NULL if no match.
void vUtilCurrentWorkingDirectory(void)
Display the current working directory.
void vVecClear(void *vpCtx)
Clears all used elements in a vector component.
const achar * acpPhrase
Pointer to an array of type achar APG alphabet characters.
APG Version 7.0 is licensed under the
2-Clause BSD License,
an Open Source Initiative Approved License.