Version 7.0
Copyright © 2021 Lowell D. Thomas
APG
… an ABNF Parser Generator
xml.c
Go to the documentation of this file.
1 /* *************************************************************************************
2  Copyright (c) 2021, Lowell D. Thomas
3  All rights reserved.
4 
5  This file is part of APG Version 7.0.
6  APG Version 7.0 may be used under the terms of the BSD 2-Clause License.
7 
8  Redistribution and use in source and binary forms, with or without
9  modification, are permitted provided that the following conditions are met:
10 
11  1. Redistributions of source code must retain the above copyright notice, this
12  list of conditions and the following disclaimer.
13 
14  2. Redistributions in binary form must reproduce the above copyright notice,
15  this list of conditions and the following disclaimer in the documentation
16  and/or other materials provided with the distribution.
17 
18  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21  DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24  SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25  CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26  OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 
29 * *************************************************************************************/
87 #include "xml.h"
88 
89 //#include "../utilities/format.h"
90 #include "xmlgrammar.h"
91 #include "xmlp.h"
92 
97 #define TAB 9
98 #define LF 10
99 #define CR 13
100 #define LINE_LEN 16
101 #define LINE_LEN4 4
102 #define LINE_LEN8 8
103 #define LINE_LEN12 12
104 #define LT 0x3C
105 #define CHARS_LINE_LEN 8
106 #define CHAR_BUF_LEN 256
107 
112 typedef struct{
116 } input_info;
117 
118 static const void* s_vpMagicNumber = (const void*)"xml";
119 
120 static void vClear(xml* spXml);
121 //static void vDisplayParserState(xml* spXml, parser_state* spState);
122 static void vDisplayXml(xml* spXml, abool bShowLines, void* vpVecChars);
123 static abool bIsUtf8(uint8_t* ucpData);
124 static abool bIsUtf16be(uint8_t* ucpData);
125 static abool bIsUtf16le(uint8_t* ucpData);
126 static abool bUtfType(uint8_t* ucpData, aint* uipStartByte, aint* uipTrueType, input_info* spInfo);
127 static void vEmptyTagDisplay(u32_phrase* spName, u32_phrase* spAttrNames, u32_phrase* spAttrValues,
128  uint32_t uiAttrCount, void* vpUserData);
129 static void vStartTagDisplay(u32_phrase* spName, u32_phrase* spAttrNames, u32_phrase* spAttrValues,
130  uint32_t uiAttrCount, void* vpUserData);
131 static void vEndTagDisplay(u32_phrase* spName, u32_phrase* spContent, void* vpUserData);
132 static void vPIDisplay(u32_phrase* spTarget, u32_phrase* spInfo, void* vpUserData);
133 static void vXmlDeclDisplay(xmldecl_info* spInfo, void* vpUserData);
134 static void vDTDDisplay(dtd_info* spInfo, void* vpUserData);
135 static void vCommentDisplay(u32_phrase* spComment, void* vpUserData);
136 static void vDisplayUnicode(void* vpFmt, const uint32_t* uipChars, uint32_t uiLength);
137 static void vGetData(xml* spXml, uint8_t* ucpData, aint uiDataLen, char* cpBuf, size_t uiBufBeg, size_t uiBufLen);
138 static void vDisplayCData(char* cpName, u32_phrase* spData, void* vpFmt);
139 
159 void* vpXmlCtor(exception* spEx){
160  xml* spXml = NULL;
161  if(!bExValidate(spEx)){
162  vExContext();
163  }
164  if(CHAR_BIT != 8){
165  // make sure a char is 8 bits for this processor and compiler
166  char caBuf[CHAR_BUF_LEN];
167  snprintf(caBuf, CHAR_BUF_LEN,
168  "On this system, length of char is %d bits. This XML processor requires length of char to be 8 bits.",
169  (int)CHAR_BIT);
170  XTHROW(spEx, caBuf);
171  }
172  void* vpMem = vpMemCtor(spEx);
173  spXml = (xml*)vpMemAlloc(vpMem, sizeof(xml));
174  memset((void*)spXml, 0, sizeof(xml));
175  spXml->vpMem = vpMem;
176  spXml->spException = spEx;
177  spXml->vpConv = vpConvCtor(spEx);
178  spXml->vpFmt = vpFmtCtor(spEx);
179  spXml->vpMsgs = vpMsgsCtor(spEx);
180  spXml->vpVecChars = vpVecCtor(vpMem, sizeof(uint8_t), 4096);
181  spXml->vpVec32 = vpVecCtor(vpMem, sizeof(uint32_t), 4096);
182  spXml->vpVec8 = vpVecCtor(vpMem, sizeof(uint8_t), 4096);
183  spXml->vpVecString = vpVecCtor(vpMem, sizeof(char), 4096);
184  spXml->vpVecName = vpVecCtor(vpMem, sizeof(uint32_t), 4096);
185  spXml->vpVecCData = vpVecCtor(vpMem, sizeof(u32_phrase), 512);
186  spXml->vpVecGEDefs= vpVecCtor(vpMem, sizeof(entity_decl), 64);
187  spXml->vpVecEntityFrames = vpVecCtor(vpMem, sizeof(entity_frame), 512);
188  spXml->vpVecNotationDecls= vpVecCtor(vpMem, sizeof(entity_decl), 64);
189  spXml->vpVecAttDecls = vpVecCtor(vpMem, sizeof(att_decl), 64);
190  spXml->vpVecAttWork = vpVecCtor(vpMem, sizeof(uint32_t), 4096);
191  spXml->vpVecAttList = vpVecCtor(vpMem, sizeof(named_value), 64);
192  spXml->vpVecFrame = vpVecCtor(vpMem, sizeof(element_frame), 4096);
193  spXml->vpValidate = s_vpMagicNumber;
194  return (void*)spXml;
195 }
196 
197 static void vClear(xml* spXml){
198  vMsgsClear(spXml->vpMsgs);
199  vLinesDtor(spXml->vpLines);
200  spXml->vpLines = NULL;
201  vParserDtor(spXml->vpParser);
202  spXml->vpParser = NULL;
203  if(spXml->ucpData){
204  vMemFree(spXml->vpMem, spXml->ucpData);
205  spXml->ucpData = NULL;
206  }
207  if(spXml->acpChars){
208  vMemFree(spXml->vpMem, spXml->acpChars);
209  spXml->acpChars = NULL;
210  }
211  spXml->bStandalone = APG_FALSE;
212  spXml->bExtSubset = APG_FALSE;
213  spXml->uiExternalIds = 0;
214  spXml->uiPEDecls = 0;
215  spXml->uiPERefs = 0;
216  spXml->uiGEDeclsNotProcessed = 0;
217  spXml->uiAttListsNotProcessed = 0;
218  vVecClear(spXml->vpVecChars);
219  vVecClear(spXml->vpVecName);
220  vVecClear(spXml->vpVec32);
221  vVecClear(spXml->vpVec8);
222  vVecClear(spXml->vpVecString);
223  vVecClear(spXml->vpVecCData);
224  vVecClear(spXml->vpVecGEDefs);
225  vVecClear(spXml->vpVecEntityFrames);
226  vVecClear(spXml->vpVecNotationDecls);
227  vVecClear(spXml->vpVecAttDecls);
228  vVecClear(spXml->vpVecAttWork);
229  vVecClear(spXml->vpVecFrame);
230  vVecClear(spXml->vpVecAttList);
231 }
232 
233 
242 void vXmlDtor(void* vpCtx){
243  xml* spXml = (xml*)vpCtx;
244  if(vpCtx && (spXml->vpValidate == s_vpMagicNumber)){
245  void* vpMem;
246  vConvDtor(spXml->vpConv);
247  vFmtDtor(spXml->vpFmt);
248  vMsgsDtor(spXml->vpMsgs);
249  vLinesDtor(spXml->vpLines);
250  vParserDtor(spXml->vpParser);
251  vpMem = spXml->vpMem;
252  memset((void*)spXml, 0, sizeof(xml));
253  vMemDtor(vpMem);
254  }
255 }
256 
261 abool bXmlValidate(void* vpCtx){
262  xml* spXml = (xml*)vpCtx;
263  if(vpCtx && (spXml->vpValidate == s_vpMagicNumber)){
264  return APG_TRUE;
265  }
266  return APG_FALSE;
267 }
268 
277 void vXmlGetFile(void* vpCtx, const char* cpFileName){
278  xml* spXml = (xml*)vpCtx;
279  if(vpCtx && (spXml->vpValidate == s_vpMagicNumber)){
280  aint uiLen = 0;
281  size_t uiBufSize = PATH_MAX + CHAR_BUF_LEN;
282  size_t uiBufLen, uiBufBeg;
283  char caBuf[PATH_MAX + CHAR_BUF_LEN];
284  vUtilFileRead(spXml->vpMem, cpFileName, NULL, &uiLen);
285  vClear(spXml);
286  spXml->ucpData = (uint8_t*)vpMemAlloc(spXml->vpMem, uiLen);
287  vUtilFileRead(spXml->vpMem, cpFileName, spXml->ucpData, &uiLen);
288  uiBufBeg = (size_t)snprintf(caBuf, uiBufSize, "file: %s: ", cpFileName);
289  if(uiBufBeg >= uiBufSize){
290  // should never get here
291  snprintf(caBuf, uiBufSize, "file name \"%s\" too long", cpFileName);
292  XTHROW(spXml->spException, caBuf);
293  }
294  uiBufLen = uiBufSize - uiBufBeg;
295  vGetData(spXml, spXml->ucpData, uiLen, caBuf, uiBufBeg, uiBufLen);
296  vMemFree(spXml->vpMem, spXml->ucpData);
297  spXml->ucpData = NULL;
298  }else{
299  vExContext();
300  }
301 }
302 
303 static void vGetData(xml* spXml, uint8_t* ucpData, aint uiDataLen, char* cpBuf, size_t uiBufBeg, size_t uiBufLen){
304  aint uiStartByte;
305  aint uiTrueType;
306  input_info sInfo;
307  vVecClear(spXml->vpVecChars);
308  vLinesDtor(spXml->vpLines);
309  spXml->vpLines = NULL;
310  if(uiDataLen < 3){
311  snprintf(cpBuf, uiBufLen, "input error: data has too few bytes (< 3)");
312  XTHROW(spXml->spException, cpBuf);
313  }
314 
315  // determine the actual data type
316  if(!bUtfType(ucpData, &uiStartByte, &uiTrueType, &sInfo)){
317  if(sInfo.bBom){
318  const char* cpType = cpUtilUtfTypeName(sInfo.uiType);
319  snprintf(&cpBuf[uiBufBeg], uiBufLen,
320  "input error: data begins with %s encoding type BOM but invalid XML characters follows", cpType);
321  XTHROW(spXml->spException, cpBuf);
322  }
323  switch(sInfo.uiType){
324  case UTF_8:
325  // should never get here, but just in case
326  snprintf(&cpBuf[uiBufBeg], uiBufLen,
327  "unexpected input error: type is UTF-8 an no errors are expected at this stage");
328  break;
329  case UTF_16BE:
330  snprintf(&cpBuf[uiBufBeg], uiBufLen,
331  "input error: encoding type appears to be UTF-16BE but required BOM not present");
332  break;
333  case UTF_16LE:
334  snprintf(&cpBuf[uiBufBeg], uiBufLen,
335  "input error: encoding type appears to be UTF-16LE but required BOM not present");
336  break;
337  default:
338  snprintf(&cpBuf[uiBufBeg], uiBufLen,
339  "input error: unrecognized encoding type - invalid XML document");
340  break;
341  }
342  if(cpBuf[uiBufBeg]){
343  XTHROW(spXml->spException, cpBuf);
344  }
345  }
346  spXml->uiTrueType = uiTrueType;
347  ucpData += uiStartByte;
348  uiDataLen -= uiStartByte;
349  conv_src sSrc = {uiTrueType, ucpData, uiDataLen};
350 
351  // translate the input to 32-bit code points
352  // NOTE: this process validates input characters are valid Unicode code points
353  vConvDecode(spXml->vpConv, &sSrc);
354 
355  // validate control characters and convert line ends
356  uint32_t uiCodeLen = 0;
357  uint32_t* uipCode, *uipTrans;
358  vConvGetCodePoints(spXml->vpConv, NULL, &uiCodeLen);
359  vVecClear(spXml->vpVec32);
360  uipCode = (uint32_t*)vpVecPushn(spXml->vpVec32, NULL, (aint)((2 * uiCodeLen) + 2));
361  uipTrans = uipCode + uiCodeLen + 2;
362  vConvGetCodePoints(spXml->vpConv, uipCode, &uiCodeLen);
363  uipCode[uiCodeLen] = 0; // this insures the test for CRLF doesn't overrun & fail at the end of data
364  uint32_t ui = 0;
365  uint32_t uiCount = 0;
366  uint32_t uiPoint;
367  for(; ui < uiCodeLen; ui++){
368  uiPoint = uipCode[ui];
369  // check for invalid control characters
370  if((uiPoint < 9) || (uiPoint == 11) || (uiPoint ==12) || ((uiPoint > 13) && (uiPoint < 32))){
371  snprintf(&cpBuf[uiBufBeg], uiBufLen,
372  "code point 0x%02X at offset %"PRIuMAX" is disallowed control character",
373  uiPoint, (luint)ui);
374  XTHROW(spXml->spException, cpBuf);
375  }
376  if((uiPoint == 0xFFFE) || (uiPoint == 0xFFFF)){
377  snprintf(&cpBuf[uiBufBeg], uiBufLen,
378  "code point 0x%X at offset %"PRIuMAX" is disallowed (characters 0xFFFE & 0xFFFF are forbidden)",
379  uiPoint, (luint)ui);
380  XTHROW(spXml->spException, cpBuf);
381  }
382  // convert line ends
383  if(uiPoint == 13){
384  uipTrans[uiCount++] = 10;
385  if(uipCode[ui + 1] == 10){
386  ui++;
387  }
388  }else{
389  uipTrans[uiCount++] = uiPoint;
390  }
391  }
392 
393  // convert to UTF-8
394  vConvUseCodePoints(spXml->vpConv, uipTrans, uiCount);
395  conv_dst sDst = {UTF_8, NOBOM, NULL, 0};
396  vConvEncode(spXml->vpConv, &sDst);
397  vVecClear(spXml->vpVec32);
398  vpVecPushn(spXml->vpVecChars, sDst.ucpData, sDst.uiDataLen);
399  spXml->vpLines = vpLinesCtor(spXml->spException, vpVecFirst(spXml->vpVecChars), uiVecLen(spXml->vpVecChars));
400 }
401 
418 void vXmlGetArray(void* vpCtx, uint8_t* ucpData, aint uiDataLen){
419  xml* spXml = (xml*)vpCtx;
420  if(!vpCtx || (spXml->vpValidate != s_vpMagicNumber)){
421  vExContext();
422  }
423  char caBuf[CHAR_BUF_LEN];
424  caBuf[0] = 0;
425  vClear(spXml);
426  vGetData(spXml, ucpData, uiDataLen, caBuf, 0, CHAR_BUF_LEN);
427 }
428 
429 // Add the pre-defined entity definitions to the entity list
430 static void vPreDefinedEntities(xml* spXml){
431  static uint32_t uiAmp = 38;
432  static uint32_t uiApos = 39;
433  static uint32_t uiGt = 62;
434  static uint32_t uiLt = 60;
435  static uint32_t uiQuot = 34;
436  static uint32_t uiAmpName[] = {97,109,112};
437  static uint32_t uiAposName[] = {97,112,111,115};
438  static uint32_t uiGtName[] = {103,116};
439  static uint32_t uiLtName[] = {108,116};
440  static uint32_t uiQuotName[] = {113,117,111,116};
441  entity_decl* spValue;
442  // amp
443  spValue = (entity_decl*)vpVecPush(spXml->vpVecGEDefs, NULL);
444  memset(spValue, 0, sizeof(entity_decl));
445  spValue->spXml = spXml;
446  spValue->sName.uiOffset = (uint32_t)uiVecLen(spXml->vpVec32);
447  spValue->sName.uiLength = 3;
448  vpVecPushn(spXml->vpVec32, uiAmpName, 3);
449  spValue->sValue.uiOffset = (uint32_t)uiVecLen(spXml->vpVec32);
450  spValue->sValue.uiLength = 1;
451  vpVecPush(spXml->vpVec32, &uiAmp);
452  // apos
453  spValue = (entity_decl*)vpVecPush(spXml->vpVecGEDefs, NULL);
454  memset(spValue, 0, sizeof(entity_decl));
455  spValue->spXml = spXml;
456  spValue->sName.uiOffset = (uint32_t)uiVecLen(spXml->vpVec32);
457  spValue->sName.uiLength = 4;
458  vpVecPushn(spXml->vpVec32, uiAposName, 4);
459  spValue->sValue.uiOffset = (uint32_t)uiVecLen(spXml->vpVec32);
460  spValue->sValue.uiLength = 1;
461  vpVecPush(spXml->vpVec32, &uiApos);
462  // gt
463  spValue = (entity_decl*)vpVecPush(spXml->vpVecGEDefs, NULL);
464  memset(spValue, 0, sizeof(entity_decl));
465  spValue->spXml = spXml;
466  spValue->sName.uiOffset = (uint32_t)uiVecLen(spXml->vpVec32);
467  spValue->sName.uiLength = 2;
468  vpVecPushn(spXml->vpVec32, uiGtName, 2);
469  spValue->sValue.uiOffset = (uint32_t)uiVecLen(spXml->vpVec32);
470  spValue->sValue.uiLength = 1;
471  vpVecPush(spXml->vpVec32, &uiGt);
472  // lt
473  spValue = (entity_decl*)vpVecPush(spXml->vpVecGEDefs, NULL);
474  memset(spValue, 0, sizeof(entity_decl));
475  spValue->spXml = spXml;
476  spValue->sName.uiOffset = (uint32_t)uiVecLen(spXml->vpVec32);
477  spValue->sName.uiLength = 2;
478  vpVecPushn(spXml->vpVec32, uiLtName, 2);
479  spValue->sValue.uiOffset = (uint32_t)uiVecLen(spXml->vpVec32);
480  spValue->sValue.uiLength = 1;
481  vpVecPush(spXml->vpVec32, &uiLt);
482  // quot
483  spValue = (entity_decl*)vpVecPush(spXml->vpVecGEDefs, NULL);
484  memset(spValue, 0, sizeof(entity_decl));
485  spValue->spXml = spXml;
486  spValue->sName.uiOffset = (uint32_t)uiVecLen(spXml->vpVec32);
487  spValue->sName.uiLength = 4;
488  vpVecPushn(spXml->vpVec32, uiQuotName, 4);
489  spValue->sValue.uiOffset = (uint32_t)uiVecLen(spXml->vpVec32);
490  spValue->sValue.uiLength = 1;
491  vpVecPush(spXml->vpVec32, &uiQuot);
492  spXml->uiGEDeclsTotal = 5;
493 }
494 
500 void vXmlParse(void* vpCtx){
501  xml* spXml = (xml*)vpCtx;
502  if(!vpCtx || (spXml->vpValidate != s_vpMagicNumber)){
503  vExContext();
504  }
505  parser_state sState;
506  parser_config sInput;
507  aint ui;
508  aint uiCharCount = uiVecLen(spXml->vpVecChars);
509  uint8_t* uipInputChars = (uint8_t*)vpVecFirst(spXml->vpVecChars);
510  if(!uipInputChars || !uiCharCount){
511  XTHROW(spXml->spException, "no XML input");
512  }
513 
514  // initialize the pre-defined general entities
515  vPreDefinedEntities(spXml);
516 
517  // construct the parser
518  spXml->vpParser = vpParserCtor(spXml->spException, vpXmlgrammarInit);
519 
520  // set the rule callback functions
521  vXmlgrammarRuleCallbacks(spXml->vpParser);
522 
523  // if achar is not 8-bit character, convert the input characters
524  achar* acpInput = (achar*)uipInputChars;
525  if(sizeof(achar) != sizeof(uint8_t)){
526  if(spXml->acpChars){
527  vMemFree(spXml->vpMem, spXml->acpChars);
528  }
529  spXml->acpChars = (achar*)vpMemAlloc(spXml->vpMem, (sizeof(achar) * uiCharCount));
530  for(ui = 0; ui < uiCharCount; ui++){
531  spXml->acpChars[ui] = (achar)uipInputChars[ui];
532  }
533  acpInput = spXml->acpChars;
534  }
535 
536  // configure the parser
537  memset((void*)&sInput, 0, sizeof(sInput));
538  sInput.acpInput = acpInput;
539  sInput.uiInputLength = uiCharCount;
540  sInput.uiStartRule = XMLGRAMMAR_DOCUMENT;
541  sInput.vpUserData = (void*)spXml;
542  sInput.bParseSubString = APG_FALSE;
543 
544  // parse the input
545  vParserParse(spXml->vpParser, &sInput, &sState);
546 
547  if(!sState.uiSuccess){
548  XTHROW(spXml->spException, "XML parser failed: invalid XML input");
549  }
550  if(spXml->acpChars){
551  vMemFree(spXml->vpMem, spXml->acpChars);
552  spXml->acpChars = NULL;
553  }
554  vClear(spXml);
555 }
556 
565 void vXmlDisplayMsgs(void* vpCtx){
566  xml* spXml = (xml*)vpCtx;
567  if(!vpCtx || (spXml->vpValidate != s_vpMagicNumber)){
568  vExContext();
569  }
570  if(uiMsgsCount(spXml->vpMsgs)){
571  vUtilPrintMsgs(spXml->vpMsgs);
572  }
573 }
574 
579 void* vpXmlGetMsgs(void* vpCtx){
580  xml* spXml = (xml*)vpCtx;
581  if(!vpCtx || (spXml->vpValidate != s_vpMagicNumber)){
582  vExContext();
583  }
584  return spXml->vpMsgs;
585 }
586 
600 void vXmlSetStartTagCallback(void* vpCtx, pfnStartTagCallback pfnCallback, void* vpUserData){
601  xml* spXml = (xml*)vpCtx;
602  if(vpCtx && (spXml->vpValidate == s_vpMagicNumber)){
603  if(pfnCallback == DEFAULT_CALLBACK){
604  spXml->pfnStartTagCallback = vStartTagDisplay;
605  spXml->vpStartTagData = (void*)spXml;
606  }else{
607  spXml->pfnStartTagCallback = pfnCallback;
608  spXml->vpStartTagData = vpUserData;
609  }
610  }else{
611  vExContext();
612  }
613 }
614 
632 void vXmlSetEmptyTagCallback(void* vpCtx, pfnEmptyTagCallback pfnCallback, void* vpUserData){
633  xml* spXml = (xml*)vpCtx;
634  if(vpCtx && (spXml->vpValidate == s_vpMagicNumber)){
635  if(pfnCallback == DEFAULT_CALLBACK){
636  spXml->pfnEmptyTagCallback = vEmptyTagDisplay;
637  spXml->vpEmptyTagData = (void*)spXml;
638  }else{
639  spXml->pfnEmptyTagCallback = pfnCallback;
640  spXml->vpEmptyTagData = vpUserData;
641  }
642  }
643 }
656 void vXmlSetEndTagCallback(void* vpCtx, pfnEndTagCallback pfnCallback, void* vpUserData){
657  xml* spXml = (xml*)vpCtx;
658  if(vpCtx && (spXml->vpValidate == s_vpMagicNumber)){
659  if(pfnCallback == DEFAULT_CALLBACK){
660  spXml->pfnEndTagCallback = vEndTagDisplay;
661  spXml->vpEndTagData = (void*)spXml;
662  }else{
663  spXml->pfnEndTagCallback = pfnCallback;
664  spXml->vpEndTagData = vpUserData;
665  }
666  }
667 }
668 
681 void vXmlSetPICallback(void* vpCtx, pfnPICallback pfnCallback, void* vpUserData){
682  xml* spXml = (xml*)vpCtx;
683  if(vpCtx && (spXml->vpValidate == s_vpMagicNumber)){
684  if(pfnCallback == DEFAULT_CALLBACK){
685  spXml->pfnPICallback = vPIDisplay;
686  spXml->vpPIData = (void*)spXml;
687  }else{
688  spXml->pfnPICallback = pfnCallback;
689  spXml->vpPIData = vpUserData;
690  }
691  }
692 }
693 
706 void vXmlSetXmlDeclCallback(void* vpCtx, pfnXmlDeclCallback pfnCallback, void* vpUserData){
707  xml* spXml = (xml*)vpCtx;
708  if(vpCtx && (spXml->vpValidate == s_vpMagicNumber)){
709  if(pfnCallback == DEFAULT_CALLBACK){
710  spXml->pfnXmlDeclCallback = vXmlDeclDisplay;
711  spXml->vpXmlDeclData = (void*)spXml;
712  }else{
713  spXml->pfnXmlDeclCallback = pfnCallback;
714  spXml->vpXmlDeclData = vpUserData;
715  }
716  }
717 }
718 
731 void vXmlSetDTDCallback(void* vpCtx, pfnDTDCallback pfnCallback, void* vpUserData){
732  xml* spXml = (xml*)vpCtx;
733  if(vpCtx && (spXml->vpValidate == s_vpMagicNumber)){
734  if(pfnCallback == DEFAULT_CALLBACK){
735  spXml->pfnDTDCallback = vDTDDisplay;
736  spXml->vpDTDData = (void*)spXml;
737  }else{
738  spXml->pfnDTDCallback = pfnCallback;
739  spXml->vpDTDData = vpUserData;
740  }
741  }
742 }
743 
756 void vXmlSetCommentCallback(void* vpCtx, pfnCommentCallback pfnCallback, void* vpUserData){
757  xml* spXml = (xml*)vpCtx;
758  if(vpCtx && (spXml->vpValidate == s_vpMagicNumber)){
759  if(pfnCallback == DEFAULT_CALLBACK){
760  spXml->pfnCommentCallback = vCommentDisplay;
761  spXml->vpCommentData = (void*)spXml;
762  }else{
763  spXml->pfnCommentCallback = pfnCallback;
764  spXml->vpCommentData = vpUserData;
765  }
766  }
767 }
768 
781 void vXmlDisplayInput(void* vpCtx, abool bShowLines){
782  xml* spXml = (xml*)vpCtx;
783  if(vpCtx && (spXml->vpValidate == s_vpMagicNumber)){
784  vDisplayXml(spXml, bShowLines, spXml->vpVecChars);
785  }
786 }
787 
788 /****************************************************************
789  * STATIC FUNCTIONS
790  ****************************************************************/
791 static void vDisplayCData(char* cpName, u32_phrase* spData, void* vpUserData){
792  xml* spXml = (xml*)vpUserData;
793  if(bIsPhrase32Ascii(spData)){
794  vVecClear(spXml->vpVecString);
795  char* cpStr = (char*)vpVecPushn(spXml->vpVecString, NULL, (spData->uiLength + 1));
796  cpPhrase32ToStr(spData, cpStr);
797  printf("%10s: '%s'\n", cpName, cpStr);
798  vVecClear(spXml->vpVecString);
799  }else{
800  printf("%10s: (some or all characters non-ASCII)\n", cpName);
801  vDisplayUnicode(spXml->vpFmt, spData->uipPhrase, spData->uiLength);
802  }
803 }
804 
805 static void vDisplayUnicode(void* vpFmt, const uint32_t* uipChars, uint32_t uiLength){
806  const char* cpNext = cpFmtFirstUnicode(vpFmt, uipChars, uiLength, 0, 0);
807  while(cpNext){
808  printf("%s", cpNext);
809  cpNext = cpFmtNext(vpFmt);
810  }
811 }
825 static void vStartTagDisplay(u32_phrase* spName, u32_phrase* spAttNames, u32_phrase* spAttValues,
826  uint32_t uiAttCount, void* vpUserData){
827  printf("Start Tag\n");
828  vDisplayCData("name", spName, vpUserData);
829  printf("Attributes (%u)\n", uiAttCount);
830  uint32_t ui = 0;
831  for(; ui < uiAttCount; ui++){
832  vDisplayCData("name", &spAttNames[ui], vpUserData);
833  vDisplayCData("value", &spAttValues[ui], vpUserData);
834  }
835  printf("\n");
836 }
837 
848 static void vEndTagDisplay(u32_phrase* spName, u32_phrase* spContent, void* vpUserData){
849  printf("End Tag\n");
850  vDisplayCData("name", spName, vpUserData);
851  vDisplayCData("content", spContent, vpUserData);
852  printf("\n");
853 }
854 
866 static void vPIDisplay(u32_phrase* spTarget, u32_phrase* spInfo, void* vpUserData){
867  printf("Processing Instruction\n");
868  vDisplayCData("target", spTarget, vpUserData);
869  vDisplayCData("info", spInfo, vpUserData);
870  printf("\n");
871 }
872 
873 static void vXmlDeclDisplay(xmldecl_info* spInfo, void* vpUserData){
874  printf("INFORMATION: XML DECLARATION\n");
875  static char* cpFormat = ""
876  "exists = %s\n"
877  "version = %s\n"
878  "encoding = %s\n"
879  "standalone = %s\n";
880  printf(cpFormat, spInfo->cpExists, spInfo->cpVersion, spInfo->cpEncoding, spInfo->cpStandalone);
881  printf("\n");
882 }
883 static void vCommentDisplay(u32_phrase* spComment, void* vpUserData){
884  vDisplayCData("comment", spComment, vpUserData);
885  printf("\n");
886 }
887 
888 static void vDTDDisplay(dtd_info* spInfo, void* vpUserData){
889  static char* cpYes = "yes";
890  static char* cpNo = "no";
891  printf("INFORMATION: DOCUMENT TYPE DECLARATION (DTD)\n");
892  if(spInfo->bExists){
893  printf("%3s: %s\n", cpYes, "DTD exists");
894  printf("%3s: %s\n", (spInfo->bStandalone ? cpYes : cpNo), "Document is standalone");
895  printf("%3s: %s\n", (spInfo->bExtSubset ? cpYes : cpNo), "DTD has external subset");
896  printf("%3d: %s\n", (int)spInfo->uiExternalIds, "external ids");
897  printf("%3d: %s\n", (int)spInfo->uiPEDecls, "Parameter Entity declarations");
898  printf("%3d: %s\n", (int)spInfo->uiPERefs, "Parameter Entity references");
899  printf("%3d: %s\n", (int)spInfo->uiGEDeclsDeclared,
900  "General Entity declarations: all declarations (includes pre-defined & not processed)");
901  printf("%3d: %s\n", (int)spInfo->uiGEDeclsNotProcessed, "General Entity declarations: not processed");
902  printf("%3d: %s\n", (int)spInfo->uiGEDeclsUnique, "General Entity declarations: unique processed (includes pre-defined)");
903  printf("%3d: %s\n", (int)spInfo->uiAttListsDeclared, "Attribute List declarations: all declarations");
904  printf("%3d: %s\n", (int)spInfo->uiAttListsUnique, "Attribute List declarations: unique element/attribute name combinations");
905  printf("%3d: %s\n", (int)spInfo->uiAttListsNotProcessed, "Attribute List declarations: not processed");
906  printf("%3d: %s\n", (int)spInfo->uiElementDecls, "Element declarations");
907  printf("%3d: %s\n", (int)spInfo->uiNotationDecls, "Notation declarations");
908 
909  printf("\n");
910  vDisplayCData("document name", spInfo->spName, vpUserData);
911  if(spInfo->uiGEDeclsUnique){
912  printf("\n");
913  printf("General Entity names and values\n");
914  aint ui = 0;
915  for(; ui < spInfo->uiGEDeclsUnique; ui++){
916  vDisplayCData("entity name", &spInfo->spGENames[ui], vpUserData);
917  vDisplayCData("entity value", &spInfo->spGEValues[ui], vpUserData);
918  }
919  }
920  if(spInfo->uiAttListsUnique){
921  printf("\n");
922  printf("Attribute List element names, attribute names and attribute values\n");
923  aint ui = 0;
924  for(; ui < spInfo->uiAttListsUnique; ui++){
925  vDisplayCData("element name", &spInfo->spAttElementNames[ui], vpUserData);
926  vDisplayCData("attribute name", &spInfo->spAttNames[ui], vpUserData);
927  vDisplayCData("attribute type", &spInfo->spAttTypes[ui], vpUserData);
928  vDisplayCData("attribute value", &spInfo->spAttValues[ui], vpUserData);
929  }
930  }
931  if(spInfo->uiNotationDecls){
932  printf("\n");
933  printf("Notation names and values\n");
934  aint ui = 0;
935  for(; ui < spInfo->uiNotationDecls; ui++){
936  vDisplayCData("notation name", &spInfo->spNotationNames[ui], vpUserData);
937  vDisplayCData("notation value", &spInfo->spNotationValues[ui], vpUserData);
938  }
939  }
940  }else{
941  printf("%3s: %s\n", cpNo, "DTD exists");
942  }
943  printf("\n");
944 }
957 static void vEmptyTagDisplay(u32_phrase* spName, u32_phrase* spAttNames, u32_phrase* spAttValues,
958  uint32_t uiAttCount, void* vpUserData){
959  printf("Empty Tag\n");
960  vDisplayCData("name", spName, vpUserData);
961  printf("Attributes (%u)\n", uiAttCount);
962  uint32_t ui = 0;
963  for(; ui < uiAttCount; ui++){
964  vDisplayCData("name", &spAttNames[ui], vpUserData);
965  vDisplayCData("value", &spAttValues[ui], vpUserData);
966  }
967  printf("\n");
968 }
969 
970 // display the parser's state on the current output stream
971 //static void vDisplayParserState(xml* spXml, parser_state* spState) {
972 // aint uiState;
973 // printf("PARSER STATE:\n");
974 // printf(" success: ");
975 // printf("%s", cpUtilTrueFalse(spState->uiSuccess));
976 // printf("\n");
977 // printf(" state: ");
978 // uiState = spState->uiState;
979 // if ((uiState == ID_MATCH) && (spState->uiPhraseLength == 0)) {
980 // uiState = ID_EMPTY;
981 // }
982 // if(uiState == ID_MATCH){
983 // printf("MATCH\n");
984 // }else if(uiState == ID_NOMATCH){
985 // printf("NO MATCH\n");
986 // }else if(uiState == ID_EMPTY){
987 // printf("EMPTY\n");
988 // }
989 // printf(" phrase length: %"PRIuMAX"\n", (luint) spState->uiPhraseLength);
990 // printf(" input length: %"PRIuMAX"\n", (luint) spState->uiStringLength);
991 // printf("max tree depth: %"PRIuMAX"\n", (luint) spState->uiMaxTreeDepth);
992 // printf(" hit count: %"PRIuMAX"\n", (luint) spState->uiHitCount);
993 //}
994 
995 static aint uiCountDigits(aint uiValue){
996  aint uiReturn = 1;
997  uiValue = uiValue/10;
998  while(uiValue){
999  uiReturn++;
1000  uiValue = uiValue/10;
1001  }
1002  return uiReturn;
1003 }
1004 static void vDisplayXml(xml* spXml, abool bShowLines, void* vpVecChars){
1005  const char* cpNext;
1006  uint8_t* ucpChars = (uint8_t*)vpVecFirst(vpVecChars);
1007  while(APG_TRUE){
1008  static char* cpFmt1 = "%d: %s";
1009  static char* cpFmt2 = "%02d: %s";
1010  static char* cpFmt3 = "%03d: %s";
1011  static char* cpFmt4 = "%04d: %s";
1012  static char* cpAlt1 = " : %s";
1013  static char* cpAlt2 = " : %s";;
1014  static char* cpAlt3 = " : %s";;
1015  static char* cpAlt4 = " : %s";;
1016  char* cpFmt, *cpAlt;
1017  printf(" true type: %s\n", cpUtilUtfTypeName(spXml->uiTrueType));
1018  printf("display type: %s\n", cpUtilUtfTypeName(UTF_8));
1019  if(!ucpChars){
1020  printf("00000000 no data\n");
1021  break;
1022  }
1023  if(bShowLines){
1024  line* spLine;
1025  int iLineNo, iPartial;
1026  if(!uiLinesCount(spXml->vpLines)){
1027  printf("00000000 no lines\n");
1028  break;
1029  }
1030  // count the lines
1031  spLine = spLinesFirst(spXml->vpLines);
1032  iLineNo = 0;
1033  while(spLine) {
1034  spLine = spLinesNext(spXml->vpLines);
1035  iLineNo++;
1036  }
1037  // select the format
1038  aint uiDigits = uiCountDigits((aint)iLineNo);
1039  switch(uiDigits){
1040  case 1:
1041  cpFmt = cpFmt1;
1042  cpAlt = cpAlt1;
1043  break;
1044  case 2:
1045  cpFmt = cpFmt2;
1046  cpAlt = cpAlt2;
1047  break;
1048  case 3:
1049  cpFmt = cpFmt3;
1050  cpAlt = cpAlt3;
1051  break;
1052  default:
1053  cpFmt = cpFmt4;
1054  cpAlt = cpAlt4;
1055  break;
1056  }
1057 
1058  spLine = spLinesFirst(spXml->vpLines);
1059  iLineNo = 0;
1060  while(spLine) {
1061  cpNext = cpFmtFirstBytes(spXml->vpFmt, &ucpChars[spLine->uiCharIndex], spLine->uiLineLength,
1062  FMT_CANONICAL, 0, 0);
1063  iPartial = 0;
1064  while(cpNext){
1065  if(iPartial == 0){
1066  printf(cpFmt, iLineNo, cpNext);
1067  }else{
1068  printf(cpAlt, cpNext);
1069  }
1070  cpNext = cpFmtNext(spXml->vpFmt);
1071  iPartial++;
1072  }
1073  spLine = spLinesNext(spXml->vpLines);
1074  iLineNo++;
1075  }
1076  }else{
1077  cpNext = cpFmtFirstBytes(spXml->vpFmt, ucpChars, uiVecLen(vpVecChars), FMT_CANONICAL, 0, 0);
1078  while(cpNext){
1079  printf("%s", cpNext);
1080  cpNext = cpFmtNext(spXml->vpFmt);
1081  }
1082  }
1083 
1084  // success
1085  break;
1086  }
1087 }
1088 static abool bIsUtf8(uint8_t* ucpData){
1089  abool bReturn = APG_TRUE;
1090  while(APG_TRUE){
1091  if(ucpData[0] == 0x3C &&
1092  ucpData[1] == 0x3F &&
1093  ucpData[1] == 0x78 &&
1094  ucpData[1] == 0x6D &&
1095  ucpData[1] == 0x6C){
1096  // begins with UTF-8 declaration "<?xml" (no white space allowed)
1097  break;
1098  }
1099  if((ucpData[0] == 0x3C) && (ucpData[1] != 0)){
1100  // begins with tag character "<"
1101  break;
1102  }
1103  if((ucpData[0] == 0x20) && (ucpData[1] != 0)){
1104  // begins with UTF-8 white space (OK when no declaration is present)
1105  break;
1106  }
1107  if((ucpData[0] == 0x09) && (ucpData[1] != 0)){
1108  // begins with UTF-8 white space (OK when no declaration is present)
1109  break;
1110  }
1111  if((ucpData[0] == 0x0A) && (ucpData[1] != 0)){
1112  // begins with UTF-8 white space (OK when no declaration is present)
1113  break;
1114  }
1115  if((ucpData[0] == 0x0D) && (ucpData[1] != 0)){
1116  // begins with UTF-8 white space (OK when no declaration is present)
1117  break;
1118  }
1119  bReturn = APG_FALSE;
1120  break;
1121  }
1122  return bReturn;
1123 }
1124 static abool bIsUtf16be(uint8_t* ucpData){
1125  abool bReturn = APG_TRUE;
1126  while(APG_TRUE){
1127  if(ucpData[0] == 0x00 && ucpData[1] == 0x3C &&
1128  ucpData[2] == 0x00 && ucpData[3] == 0x3F &&
1129  ucpData[4] == 0x00 && ucpData[5] == 0x78 &&
1130  ucpData[6] == 0x00 && ucpData[7] == 0x6D &&
1131  ucpData[8] == 0x00 && ucpData[9] == 0x6C){
1132  // begins with UTF-16BE declaration "<?xml" (no white space allowed)
1133  break;
1134  }
1135  if(ucpData[0] == 0x00 && ucpData[1] == 0x3C){
1136  // begins with UTF-16BE tag character "<"
1137  break;
1138  }
1139  if(ucpData[0] == 0x00 && ucpData[1] == 0x20){
1140  // begins with UTF-16BE white space (OK when no declaration)
1141  break;
1142  }
1143  if(ucpData[0] == 0x00 && ucpData[1] == 0x09){
1144  // begins with UTF-16BE white space (OK when no declaration)
1145  break;
1146  }
1147  if(ucpData[0] == 0x00 && ucpData[1] == 0x0A){
1148  // begins with UTF-16BE white space (OK when no declaration)
1149  break;
1150  }
1151  if(ucpData[0] == 0x00 && ucpData[1] == 0x0D){
1152  // begins with UTF-16BE white space (OK when no declaration)
1153  break;
1154  }
1155  bReturn = APG_FALSE;
1156  break;
1157  }
1158  return bReturn;
1159 }
1160 static abool bIsUtf16le(uint8_t* ucpData){
1161  abool bReturn = APG_TRUE;
1162  while(APG_TRUE){
1163  if(ucpData[0] == 0x3C && ucpData[1] == 0x00 &&
1164  ucpData[2] == 0x3F && ucpData[3] == 0x00 &&
1165  ucpData[4] == 0x78 && ucpData[5] == 0x00 &&
1166  ucpData[6] == 0x6D && ucpData[7] == 0x00 &&
1167  ucpData[8] == 0x6C && ucpData[9] == 0x00){
1168  // begins with UTF-16LE declaration "<?xml" (no white space allowed)
1169  break;
1170  }
1171  if(ucpData[0] == 0x3C && ucpData[1] == 0x00){
1172  // begins with UTF-16LE tag character "<"
1173  break;
1174  }
1175  if(ucpData[0] == 0x20 && ucpData[1] == 0x00){
1176  // begins with UTF-16LE white space (OK when no declaration)
1177  break;
1178  }
1179  if(ucpData[0] == 0x09 && ucpData[1] == 0x00){
1180  // begins with UTF-16LE white space (OK when no declaration)
1181  break;
1182  }
1183  if(ucpData[0] == 0x0A && ucpData[1] == 0x00){
1184  // begins with UTF-16LE white space (OK when no declaration)
1185  break;
1186  }
1187  if(ucpData[0] == 0x0D && ucpData[1] == 0x00){
1188  // begins with UTF-16LE white space (OK when no declaration)
1189  break;
1190  }
1191  bReturn = APG_FALSE;
1192  break;
1193  }
1194  return bReturn;
1195 }
1196 static abool bUtfType(uint8_t* ucpData, aint* uipStartByte, aint* uipTrueType, input_info* spInfo){
1197  spInfo->bValid = APG_FALSE;
1198  *uipStartByte = 0;
1199  *uipTrueType = BINARY;
1200  while(APG_TRUE){
1201  if(ucpData[0] == 0xEF && ucpData[1] == 0xBB && ucpData[2] == 0xBF){
1202  // UTF-8 with BOM
1203  *uipStartByte = 3;
1204  *uipTrueType = UTF_8;
1205  spInfo->uiType = UTF_8;
1206  spInfo->bBom = APG_TRUE;
1207  spInfo->bValid = bIsUtf8(&ucpData[3]);
1208  break;
1209  }
1210  if((ucpData[0] == 0xFE && ucpData[1] == 0xFF) && !(ucpData[2] == 0 && ucpData[3] == 0)){
1211  // UTF-16BE with BOM
1212  *uipStartByte = 2;
1213  *uipTrueType = UTF_16BE;
1214  spInfo->uiType = UTF_16BE;
1215  spInfo->bBom = APG_TRUE;
1216  spInfo->bValid = bIsUtf16be(&ucpData[2]);
1217  break;
1218  }
1219  if((ucpData[0] == 0xFF && ucpData[1] == 0xFE) && !(ucpData[2] == 0 && ucpData[3] == 0)){
1220  // UTF-16LE with BOM
1221  *uipStartByte = 2;
1222  *uipTrueType = UTF_16LE;
1223  spInfo->uiType = UTF_16LE;
1224  spInfo->bBom = APG_TRUE;
1225  spInfo->bValid = bIsUtf16le(&ucpData[2]);
1226  break;
1227  }
1228  // test for encodings without BOM
1229  // Note: must begin with "<?xml" or
1230  // first byte must be '<' or white space (allowed if no XML declaration is present)
1231  if(bIsUtf8(ucpData)){
1232  // UTF-8 without BOM
1233  *uipTrueType = UTF_8;
1234  spInfo->uiType = UTF_8;
1235  spInfo->bBom = APG_FALSE;
1236  spInfo->bValid = APG_TRUE;
1237  break;
1238  }
1239  if(bIsUtf16be(ucpData)){
1240  // UTF-16BE without BOM
1241  *uipTrueType = UTF_16BE;
1242  spInfo->uiType = UTF_16BE;
1243  spInfo->bBom = APG_FALSE;
1244  spInfo->bValid = APG_FALSE;
1245  break;
1246  }
1247  if(bIsUtf16le(ucpData)){
1248  // UTF-16LE without BOM
1249  *uipTrueType = UTF_16LE;
1250  spInfo->uiType = UTF_16LE;
1251  spInfo->bBom = APG_FALSE;
1252  spInfo->bValid = APG_FALSE;
1253  break;
1254  }
1255 
1256  // unrecognized type
1257  spInfo->uiType = UTF_UNKNOWN;
1258  spInfo->bBom = APG_FALSE;
1259  spInfo->bValid = APG_FALSE;
1260  break;
1261  }
1262  return spInfo->bValid;
1263 }
1264 
line::uiLineLength
aint uiLineLength
The number of characters in the line, including the line end characters.
Definition: lines.h:43
vXmlParse
void vXmlParse(void *vpCtx)
Parse the XML data from vXmlGetFile or vXmlGetArray.
Definition: xml.c:500
dtd_info::spNotationValues
u32_phrase * spNotationValues
A list of the Notation values, if any.
Definition: xml.h:100
dtd_info::spGEValues
u32_phrase * spGEValues
A list of (uiGEDeclsUnique) declared General Entity Declaration values, if any.
Definition: xml.h:94
dtd_info::spAttNames
u32_phrase * spAttNames
A list of (uiAttListsUnique) names of declared attribute defaults.
Definition: xml.h:96
pfnEmptyTagCallback
void(* pfnEmptyTagCallback)(u32_phrase *spName, u32_phrase *spAttrNames, u32_phrase *spAttrValues, uint32_t uiAttrCount, void *vpUserData)
Defines the function type that is called after an empty tag has been found.
Definition: xml.h:159
vpParserCtor
void * vpParserCtor(exception *spException, void *vpParserInit)
The parser's constructor for file initialization data.
Definition: parser.c:67
vUtilFileRead
void vUtilFileRead(void *vpMem, const char *cpFileName, uint8_t *ucpData, aint *uipLen)
Read a file into the caller's data area.
Definition: utilities.c:252
entity_frame
Definition: xmlp.h:107
vpXmlCtor
void * vpXmlCtor(exception *spEx)
The XML Parser constructor.
Definition: xml.c:159
vXmlDisplayInput
void vXmlDisplayInput(void *vpCtx, abool bShowLines)
Display input file.
Definition: xml.c:781
vpLinesCtor
void * vpLinesCtor(exception *spEx, const char *cpInput, aint uiLength)
The lines object constructor.
Definition: lines.c:85
parser_state::uiSuccess
aint uiSuccess
True (>0) if the input string was matched in its entirety, false (0) otherwise.
Definition: parser.h:184
vMemDtor
void vMemDtor(void *vpCtx)
Destroys a Memory component. Frees all memory allocated.
Definition: memory.c:141
dtd_info::spName
u32_phrase * spName
The DTD name (name of the root element).
Definition: xml.h:92
cdata_id::uiLength
uint32_t uiLength
The number of 32-bit data characters.
Definition: xmlp.h:44
pfnEndTagCallback
void(* pfnEndTagCallback)(u32_phrase *spName, u32_phrase *spContent, void *vpUserData)
Defines the function type that is called after an element's end tag has been found.
Definition: xml.h:172
input_info::bValid
abool bValid
True if this is a valid XML file, false otherwise.
Definition: xml.c:115
DEFAULT_CALLBACK
#define DEFAULT_CALLBACK
Indicator for a pre-defined, default callback function.
Definition: xml.h:51
uiMsgsCount
aint uiMsgsCount(void *vpCtx)
Get the number of logged messages.
Definition: msglog.c:213
vXmlSetEmptyTagCallback
void vXmlSetEmptyTagCallback(void *vpCtx, pfnEmptyTagCallback pfnCallback, void *vpUserData)
Set the user's callback function for the empty tags (<name attr="10"/>).
Definition: xml.c:632
element_frame
A stack is used to track which element is currently being parsed. This frame struct contains all of t...
Definition: xmlp.h:95
u32_phrase::uiLength
uint32_t uiLength
The number of integers in the array.
Definition: lib.h:75
xml
This is the encapsulated data for the xml component. The component context or handle is an opaque poi...
cpFmtFirstBytes
const char * cpFmtFirstBytes(void *vpCtx, const uint8_t *ucpBytes, uint64_t uiLength, int iStyle, uint64_t uiOffset, uint64_t uiLimit)
Initiate the iterator over an array of 8-bit byte data.
Definition: format.c:210
dtd_info::uiPERefs
aint uiPERefs
The number of Parameter Entity references.
Definition: xml.h:81
input_info
Information about the input data.
Definition: xml.c:112
cpPhrase32ToStr
char * cpPhrase32ToStr(u32_phrase *spPhrase, char *cpStr)
Convert a 32-bit phrase to a null-terminated ASCII string.
Definition: tools.c:382
vpXmlgrammarInit
void * vpXmlgrammarInit
Definition: xmlgrammar.c:5741
vUtilPrintMsgs
void vUtilPrintMsgs(void *vpMsgs)
Display the list of messages in a message object to stdout.
Definition: utilities.c:747
vExContext
void vExContext()
Handles bad context pointers.
Definition: exception.c:126
bIsPhrase32Ascii
abool bIsPhrase32Ascii(u32_phrase *spPhrase)
Determine if a 32-bit phrase consists entirely of printable ASCII characters.
Definition: tools.c:405
vXmlSetCommentCallback
void vXmlSetCommentCallback(void *vpCtx, pfnCommentCallback pfnCallback, void *vpUserData)
Set the user's callback function for comments.
Definition: xml.c:756
achar
uint_fast8_t achar
achar is the type for the parser's alphabet characters.
Definition: apg.h:91
CHAR_BUF_LEN
#define CHAR_BUF_LEN
Definition: xml.c:106
vpConvCtor
void * vpConvCtor(exception *spEx)
The data conversion object constructor.
Definition: conv.c:134
FMT_CANONICAL
#define FMT_CANONICAL
Display lines with both FMT_HEX and FMT_ASCII formats.
Definition: format.h:55
input_info::bBom
abool bBom
Specifies whether a BOM is present (required for UTF-16).
Definition: xml.c:114
bXmlValidate
abool bXmlValidate(void *vpCtx)
Validate an XML context pointer.
Definition: xml.c:261
cpFmtNext
const char * cpFmtNext(void *vpCtx)
Formats the next line of data.
Definition: format.c:386
entity_decl
Provides the offset into the general 32-bit vector and length of a name and value pair.
Definition: xmlp.h:80
dtd_info::bExtSubset
abool bExtSubset
True if an external subset is declared.
Definition: xml.h:78
dtd_info::uiExternalIds
aint uiExternalIds
The number of external IDs declared.
Definition: xml.h:79
dtd_info::uiAttListsDeclared
aint uiAttListsDeclared
The number of ALL Attribute declarations.
Definition: xml.h:87
dtd_info::spAttElementNames
u32_phrase * spAttElementNames
A list of (uiAttListsUnique) element names of declared attribute defaults.
Definition: xml.h:95
xml.h
Public header file for the APG XML parser API..
cpFmtFirstUnicode
const char * cpFmtFirstUnicode(void *vpCtx, const uint32_t *uipChars, uint64_t uiLength, uint64_t uiOffset, uint64_t uiLimit)
Initiate the iterator over an array of 32-bit Unicode code points.
Definition: format.c:341
xmldecl_info::cpExists
const char * cpExists
"yes" if the XML declaration exists, "no" otherwise.
Definition: xml.h:59
xmldecl_info::cpStandalone
const char * cpStandalone
The value of the standalone declaration.
Definition: xml.h:65
u32_phrase::uipPhrase
const uint32_t * uipPhrase
Pointer to an array of 32-bit unsigned integers.
Definition: lib.h:74
XTHROW
#define XTHROW(ctx, msg)
Exception throw macro.
Definition: exception.h:67
cpType
const char * cpType(aint uiId)
Convert an attribute type ID to an ASCII string.
Definition: attributes.c:484
pfnStartTagCallback
void(* pfnStartTagCallback)(u32_phrase *spName, u32_phrase *spAttNames, u32_phrase *spAttValues, uint32_t uiAttCount, void *vpUserData)
Defines the function type that is called after an element's start tag has been found.
Definition: xml.h:141
vXmlGetArray
void vXmlGetArray(void *vpCtx, uint8_t *ucpData, aint uiDataLen)
Gets the XML byte stream from a byte array.
Definition: xml.c:418
pfnCommentCallback
void(* pfnCommentCallback)(u32_phrase *spComment, void *vpUserData)
Defines the function type that is called after a comment is found.
Definition: xml.h:195
dtd_info::uiGEDeclsUnique
aint uiGEDeclsUnique
A count of the unique and valid General Entities declared.
Definition: xml.h:83
vXmlgrammarRuleCallbacks
void vXmlgrammarRuleCallbacks(void *vpParserCtx)
Definition: basics.c:993
UTF_16BE
#define UTF_16BE
Data type macro for UTF-16BE encoding/decoding.
Definition: conv.h:81
aint
uint_fast32_t aint
The APG parser's unsigned integer type.
Definition: apg.h:79
vConvGetCodePoints
void vConvGetCodePoints(void *vpCtx, uint32_t *uipData, uint32_t *uipDataLen)
Access the intermediate 32-bit data following a call to vConvDecode() or vConvUseCodePoints().
Definition: conv.c:361
vpXmlGetMsgs
void * vpXmlGetMsgs(void *vpCtx)
Give the user a handle to the message log.
Definition: xml.c:579
pfnXmlDeclCallback
void(* pfnXmlDeclCallback)(xmldecl_info *spInfo, void *vpUserData)
Defines the function type that is called after parsing the XML declaration.
Definition: xml.h:115
entity_decl::sValue
cdata_id sValue
The offset (into vpVec32) and length of the value.
Definition: xmlp.h:83
dtd_info::spAttValues
u32_phrase * spAttValues
A list of (uiAttListsUnique) normalized values of declared attribute defaults.
Definition: xml.h:98
vXmlGetFile
void vXmlGetFile(void *vpCtx, const char *cpFileName)
Gets the XML byte stream from a file.
Definition: xml.c:277
parser_config
Defines the input string and other configuration parameters for the parser,.
Definition: parser.h:198
cdata_id::uiOffset
uint32_t uiOffset
The offset into vpVec32 array for the start of the data.
Definition: xmlp.h:43
vpMsgsCtor
void * vpMsgsCtor(exception *spEx)
The Message Log constructor.
Definition: msglog.c:68
vMsgsClear
void vMsgsClear(void *vpCtx)
Clears the object of all messages.
Definition: msglog.c:124
xmlp.h
Private header for the APG XML parser's component context. Not needed by application programs.
vpMemAlloc
void * vpMemAlloc(void *vpCtx, aint uiBytes)
Allocates memory.
Definition: memory.c:196
uiVecLen
aint uiVecLen(void *vpCtx)
Get the vector length. That is, the number of elements on the vector.
Definition: vector.c:385
named_value
Provides offsets and lengths in the vpVec32 array for a name and value pair.
Definition: xmlp.h:50
vpVecCtor
void * vpVecCtor(void *vpMem, aint uiElementSize, aint uiInitialAlloc)
The vector object constructor.
Definition: vector.c:118
conv_dst
Defines the output data type, location, length and whether or not to preface with a Byte Order Mark (...
Definition: conv.h:120
dtd_info::spNotationNames
u32_phrase * spNotationNames
A list of the Notation names, if any.
Definition: xml.h:99
dtd_info::uiGEDeclsNotProcessed
aint uiGEDeclsNotProcessed
The number of General Entity declarations not processed because of condition:
Definition: xml.h:85
parser_state
The parser's final state.
Definition: parser.h:183
dtd_info
Information about the Document Type Declaration.
Definition: xml.h:75
cpUtilUtfTypeName
const char * cpUtilUtfTypeName(aint uiType)
Convert a conversion type identifier to a readable, printable ASCII string. Conversion type identifie...
Definition: utilities.c:607
entity_decl::sName
cdata_id sName
The offset (into vpVec32) and length of the name.
Definition: xmlp.h:82
exception
A structure to describe the type and location of a caught exception.
Definition: exception.h:47
vMemFree
void vMemFree(void *vpCtx, const void *vpData)
Free memory previously allocated with vpMemAlloc().
Definition: memory.c:226
vXmlSetStartTagCallback
void vXmlSetStartTagCallback(void *vpCtx, pfnStartTagCallback pfnCallback, void *vpUserData)
Set the user's callback function for the start tags (<name attr="10">).
Definition: xml.c:600
vMsgsDtor
void vMsgsDtor(void *vpCtx)
The object destructor.
Definition: msglog.c:92
luint
uintmax_t luint
luint is used to cast integers suitable for the %"PRIuMAX" printf format.
Definition: apg.h:133
xmldecl_info
Information about the XML declaration.
Definition: xml.h:58
bExValidate
abool bExValidate(exception *spException)
Test an exception structure for validity.
Definition: exception.c:70
UTF_16LE
#define UTF_16LE
Data type macro for UTF-16LE encoding/decoding.
Definition: conv.h:83
vXmlSetDTDCallback
void vXmlSetDTDCallback(void *vpCtx, pfnDTDCallback pfnCallback, void *vpUserData)
Set the user's callback function for the Processing Instruction tags(<?target instructions?...
Definition: xml.c:731
vConvDecode
void vConvDecode(void *vpCtx, conv_src *spSrc)
Decode a source byte stream to 32-bit Unicode code points.
Definition: conv.c:205
dtd_info::bExists
abool bExists
True if the DTD exists, false otherwise.
Definition: xml.h:76
vpVecFirst
void * vpVecFirst(void *vpCtx)
Get the first element one the vector. The vector is not altered.
Definition: vector.c:326
vpVecPushn
void * vpVecPushn(void *vpCtx, void *vpElement, aint uiCount)
Adds one or more elements to the end of the array.
Definition: vector.c:221
dtd_info::uiGEDeclsDeclared
aint uiGEDeclsDeclared
A count of ALL General Entities declared.
Definition: xml.h:82
line
Defines the characteristics of a single line.
Definition: lines.h:40
vpMemCtor
void * vpMemCtor(exception *spException)
Construct a memory component.
Definition: memory.c:121
vXmlSetEndTagCallback
void vXmlSetEndTagCallback(void *vpCtx, pfnEndTagCallback pfnCallback, void *vpUserData)
Set the user's callback function for the end tags (</name>).
Definition: xml.c:656
spLinesNext
line * spLinesNext(void *vpCtx)
Returns the next line of text from the iterator.
Definition: lines.c:185
xmlgrammar.h
input_info::uiType
aint uiType
The data type as determined from an examination of the first few byters.
Definition: xml.c:113
vConvEncode
void vConvEncode(void *vpCtx, conv_dst *spDst)
Encode the 32-bit Unicode code points to a byte stream.
Definition: conv.c:302
pfnDTDCallback
void(* pfnDTDCallback)(dtd_info *spInfo, void *vpUserData)
Defines the function type that is called after parsing the Document Type Declaration (DTD).
Definition: xml.h:127
vXmlDisplayMsgs
void vXmlDisplayMsgs(void *vpCtx)
Display the parser's messages on stdout, if any.
Definition: xml.c:565
UTF_UNKNOWN
#define UTF_UNKNOWN
Data type macro for unknown encoding type.
Definition: conv.h:95
xmldecl_info::cpVersion
const char * cpVersion
The value of version="1.ddd". Default is 1.0. Any other value is a fatal error.
Definition: xml.h:60
APG_TRUE
#define APG_TRUE
Definition: apg.h:291
spLinesFirst
line * spLinesFirst(void *vpCtx)
Initialize an iterator over the lines.
Definition: lines.c:170
dtd_info::spAttTypes
u32_phrase * spAttTypes
A list of (uiAttListsUnique) types of declared attribute defaults.
Definition: xml.h:97
vLinesDtor
void vLinesDtor(void *vpCtx)
The lines object destructor.
Definition: lines.c:123
vParserDtor
void vParserDtor(void *vpCtx)
Clears the parser component's context and frees all heap memory associated with this parser.
Definition: parser.c:245
vXmlSetXmlDeclCallback
void vXmlSetXmlDeclCallback(void *vpCtx, pfnXmlDeclCallback pfnCallback, void *vpUserData)
Set the user's callback function for the XML declaration.
Definition: xml.c:706
dtd_info::uiAttListsUnique
aint uiAttListsUnique
The number of unique and valid Attribute declarations.
Definition: xml.h:88
UTF_8
#define UTF_8
Data type macro for UTF-8 encoding/decoding.
Definition: conv.h:77
conv_dst::uiDataLen
aint uiDataLen
[out] Number of bytes in the byte stream.
Definition: conv.h:124
abool
uint8_t abool
abool is the APG bool type.
Definition: apg.h:140
dtd_info::uiElementDecls
aint uiElementDecls
The number of element declarations found.
Definition: xml.h:102
conv_src
Defines the input data type, location and length.
Definition: conv.h:109
xmldecl_info::cpEncoding
const char * cpEncoding
If present must be UTF-8 or UTF-16. It is a fatal error if the data is not in the specified format....
Definition: xml.h:62
XMLGRAMMAR_DOCUMENT
#define XMLGRAMMAR_DOCUMENT
Definition: xmlgrammar.h:92
dtd_info::uiAttListsNotProcessed
aint uiAttListsNotProcessed
The number of Attribute List declarations not processed because of above condition:
Definition: xml.h:90
dtd_info::bStandalone
abool bStandalone
True if standalone = "yes", false if standalone = "no".
Definition: xml.h:77
vFmtDtor
void vFmtDtor(void *vpCtx)
The object destructor.
Definition: format.c:146
vXmlSetPICallback
void vXmlSetPICallback(void *vpCtx, pfnPICallback pfnCallback, void *vpUserData)
Set the user's callback function for the Processing Instruction tags(<?target instructions?...
Definition: xml.c:681
u32_phrase
Defines a pointer to an array of 32-bit unsigned integers plus its length. Typically needed by Unicod...
Definition: lib.h:73
vXmlDtor
void vXmlDtor(void *vpCtx)
The XML Parser component destructor.
Definition: xml.c:242
vConvDtor
void vConvDtor(void *vpCtx)
Conversion object destructor.
Definition: conv.c:161
att_decl
Identifies the element name, attribute name and default attribute value of attribute list declaration...
Definition: xmlp.h:60
dtd_info::uiPEDecls
aint uiPEDecls
The number of Parameter Entities declared.
Definition: xml.h:80
conv_dst::ucpData
uint8_t * ucpData
[out] Pointer to the output byte stream. Valid until another function call on the context handle.
Definition: conv.h:123
vpFmtCtor
void * vpFmtCtor(exception *spEx)
The object constructor.
Definition: format.c:118
vpVecPush
void * vpVecPush(void *vpCtx, void *vpElement)
Adds one element to the end of the array.
Definition: vector.c:193
pfnPICallback
void(* pfnPICallback)(u32_phrase *spTarget, u32_phrase *spInfo, void *vpUserData)
Defines the function type that is called after a Processing Instruction has been found.
Definition: xml.h:184
dtd_info::uiNotationDecls
aint uiNotationDecls
The number of notation declarations found.
Definition: xml.h:101
vConvUseCodePoints
void vConvUseCodePoints(void *vpCtx, uint32_t *uipSrc, aint uiSrcLen)
Insert a stream of 32-bit Unicode code points as the intermediate data.
Definition: conv.c:395
entity_decl::spXml
struct xml_tag * spXml
A copy of the XML object context pointer. Needed for quick sort and quick look up.
Definition: xmlp.h:81
BINARY
#define BINARY
Alias for ISO_8895_1.
Definition: conv.h:64
vVecClear
void vVecClear(void *vpCtx)
Clears all used elements in a vector component.
Definition: vector.c:420
dtd_info::spGENames
u32_phrase * spGENames
A list of (uiGEDeclsUnique) declared General Entity names, if any.
Definition: xml.h:93
line::uiCharIndex
aint uiCharIndex
The zero-based index of the first character of the line.
Definition: lines.h:42
NOBOM
#define NOBOM
The "false" macro for destination BOM flag.
Definition: conv.h:99
APG_FALSE
#define APG_FALSE
Definition: apg.h:292
vParserParse
void vParserParse(void *vpCtx, parser_config *spConfig, parser_state *spState)
Parse an input string of alphabet characters.
Definition: parser.c:268
uiLinesCount
aint uiLinesCount(void *vpCtx)
Returns the number of lines of text.
Definition: lines.c:202
APG Version 7.0 is licensed under the 2-Clause BSD License,
an Open Source Initiative Approved License.