36 #define CLOSE_BRACKET 62
39 static const uint32_t s_uiAmp = 38;
40 static const uint32_t s_uiSemi = 59;
41 static const char* s_cpLineFoundFmt =
"line: %2"PRIuMAX
" offset: %2"PRIuMAX
"(0x%02"PRIXMAX
"): %s";
42 static const char* s_cpLineNotFoundFmt =
"line: %2"PRIuMAX
" offset: %2"PRIuMAX
"(0x%02"PRIXMAX
")(EOF): %s";
47 void vThrowError(
xml* spXml,
const char* cpMsg,
aint uiOffset,
unsigned int uiLine,
const char* cpFile,
const char* cpFunc){
48 aint uiXmlLine, uiRelOffset;
50 if(!
bLinesFindLine(spXml->vpLines, uiOffset, &uiXmlLine, & uiRelOffset)){
53 snprintf(caBuf,
CABUF_LEN, s_cpLineNotFoundFmt,
56 snprintf(caBuf,
CABUF_LEN, s_cpLineFoundFmt,
59 vExThrow(spXml->spException, caBuf, uiLine, cpFile, cpFunc);
63 aint uiXmlLine, uiRelOffset;
66 if(
bLinesFindLine(spXml->vpLines, uiOffset, &uiXmlLine, &uiRelOffset)){
67 snprintf(caBuf, uiSize, s_cpLineFoundFmt, (
luint)uiXmlLine, (
luint)uiRelOffset, (
luint)uiRelOffset, cpTitle);
71 snprintf(caBuf,
CABUF_LEN, s_cpLineNotFoundFmt,
80 memset((
void*)spXml->spCurrentFrame, 0,
sizeof(
element_frame));
81 spXml->spCurrentFrame->uiBase32 =
uiVecLen(spXml->vpVec32);
82 spXml->spCurrentFrame->uiBaseAtt =
uiVecLen(spXml->vpVecAttList);
88 if(spXml->spCurrentFrame){
89 vpVecPopi(spXml->vpVec32, spXml->spCurrentFrame->uiBase32);
90 vpVecPopi(spXml->vpVecAttList, spXml->spCurrentFrame->uiBaseAtt);
93 if(spFrame != spXml->spCurrentFrame){
94 XML_THROW(
"popped frame not same as current frame");
103 uiChar = ((uint32_t)acpBytes[0] & 0x1f) << 6;
104 uiChar += (uint32_t)acpBytes[1] & 0x3f;
110 uiChar = ((uint32_t)acpBytes[0] & 0xf) << 12;
111 uiChar += ((uint32_t)acpBytes[1] & 0x3f) << 6;
112 uiChar += (uint32_t)acpBytes[2] & 0x3f;
118 uiChar = ((uint32_t)acpBytes[0] & 0x7) << 18;
119 uiChar += ((uint32_t)acpBytes[1] & 0x3f) << 12;
120 uiChar += ((uint32_t)acpBytes[2] & 0x3f) << 6;
121 uiChar += (uint32_t)acpBytes[3] & 0x3f;
127 if(uiChar >= 0 && uiChar < 9){
130 if(uiChar > 10 && uiChar < 13){
133 if(uiChar > 13 && uiChar < 32){
136 if(uiChar >= 0xD800 && uiChar < 0xE000){
139 if(uiChar == 0xFFFE || uiChar == 0xFFFF){
143 if(uiChar > 0x10FFFF){
162 vThrowError(spXml,
"vector index unexpectedly out of range",
163 uiOffset, __LINE__, __FILE__, __func__);
170 uint8_t* ucpData = (uint8_t*)
vpVecPushn(spXml->vpVec8, NULL, uiLen);
174 for(ui = 0; ui < uiLen; ui++){
175 ucpData[ui] = (uint8_t)acpInput[ui];
198 uint8_t* ucpBuf = (uint8_t*)
vpVecPushn(spXml->vpVec8, NULL, uiPhraseLength);
204 for(ui = 0; ui < uiPhraseLength; ui++){
205 ucpBuf[ui] = (uint8_t)acpPhrase[ui];
235 if(!acpData || !uiDataLen){
236 *uipOffset =
uiVecLen(spXml->vpVec32);
242 uint32_t* uipCodePoints;
243 uint8_t* ucpData = (uint8_t*)acpData;
244 if(
sizeof(
achar) !=
sizeof(uint8_t)){
246 ucpData = (uint8_t*)
vpVecPushn(spXml->vpVec8, NULL, uiDataLen);
247 for(ui = 0; ui < uiDataLen; ui++){
248 ucpData[ui] = (uint8_t)acpData[ui];
256 *uipOffset =
uiVecLen(spXml->vpVec32);
257 uipCodePoints = (uint32_t*)
vpVecPushn(spXml->vpVec32, NULL, *uipLength);
270 XML_THROW(
"double hyphens ('--' or '--->') not allowed in comments");
276 if(spXml->pfnCommentCallback){
278 if(
sizeof(
achar) !=
sizeof(uint8_t)){
285 ucpComment[ui] = (uint8_t)spData->
acpString[uj];
298 uint32_t* uipCom = (uint32_t*)
vpVecPushn(spXml->vpVec32, NULL, uiComLen);
305 spXml->pfnCommentCallback(&sComData, spXml->vpCommentData);
335 if(spXml->pfnPICallback){
339 spXml->pfnPICallback(&sTarget, &sInfo, spXml->vpPIData);
343 XML_THROW(
"expected close of processing instruction not found");
351 uint32_t* uipName = (uint32_t*)
vpVecFirst(spXml->vpVecName);
352 spXml->spCurrentFrame->sSName.uiOffset =
uiVecLen(spXml->vpVec32);
353 spXml->spCurrentFrame->sSName.uiLength = uiLen;
354 vpVecPushn(spXml->vpVec32, (
void*)uipName, uiLen);
357 XML_THROW(
"processing instruction target is invalid");
363 spXml->spCurrentFrame->sEName.uiOffset =
uiVecLen(spXml->vpVec32);
364 spXml->spCurrentFrame->sEName.uiLength = 0;
370 uint32_t uiChars[2] = {63, spXml->uiChar};
372 spXml->spCurrentFrame->sEName.uiLength += 2;
379 vpVecPush(spXml->vpVec32, &spXml->uiChar);
380 spXml->spCurrentFrame->sEName.uiLength += 1;
386 XML_THROW(
"Processing Instruction name \"xml\" is forbidden - see https://www.w3.org/XML/xml-V10-5e-errata");
392 XML_THROW(
"Processing Instruction names beginning with \"xml-\" are reserved - see https://www.w3.org/XML/xml-V10-5e-errata");
445 uint32_t ui = spXml->uiChar;
447 if(ui >= 65 && ui <= 90){
450 if(ui >= 97 && ui <= 122){
453 if(ui == 58 || ui == 95){
456 if(ui >= 0xC0 && ui <= 0xD6){
459 if(ui >= 0xD8 && ui <= 0xF6){
462 if(ui >= 0xF8 && ui <= 0x2FF){
465 if(ui >= 0x370 && ui <= 0x37D){
468 if(ui >= 0x37F && ui <= 0x1FFF){
471 if(ui >= 0x200C && ui <= 0x200D){
474 if(ui >= 0x2070 && ui <= 0x218F){
477 if(ui >= 0x2C00 && ui <= 0x2FEF){
480 if(ui >= 0x3001 && ui <= 0xD7FF){
483 if(ui >= 0xF900 && ui <= 0xFDCF){
486 if(ui >= 0xFDF0 && ui <= 0xFFFD){
489 if(ui >= 0x10000 && ui <= 0xEFFFF){
496 vpVecPush(spXml->vpVecName, &spXml->uiChar);
514 uint32_t ui = spXml->uiChar;
516 if(ui >= 48 && ui <= 57){
519 if(ui == 45 || ui == 46){
526 if(ui >= 0x300 && ui <= 0x36F){
529 if(ui >= 0x203F && ui <= 0x2040){
536 vpVecPush(spXml->vpVecName, &spXml->uiChar);
547 uint32_t uiDigit = 0;
549 for(ui = 0; ui < uiCount; ui++){
550 if(uipChars[ui] >= 48 && uipChars[ui] <= 57){
551 uiDigit = uipChars[ui] - 48;
552 }
else if(uipChars[ui] >= 65 && uipChars[ui] <= 70){
553 uiDigit = uipChars[ui] - 55;
554 }
else if(uipChars[ui] >= 97 && uipChars[ui] <= 102){
555 uiDigit = uipChars[ui] - 87;
557 snprintf(caBuf,
CABUF_LEN,
"illegal hex digit in Reference: %c", (
char)uipChars[ui]);
559 uiOffset, __LINE__, __FILE__, __func__);
562 vThrowError(spXml,
"decimal value in Reference is too large: causes uint32_t overflow",
563 uiOffset, __LINE__, __FILE__, __func__);
565 if(!
bSum32(uiSum, uiDigit, &uiSum)){
566 vThrowError(spXml,
"decimal value in Reference is too large: causes uint32_t overflow",
567 uiOffset, __LINE__, __FILE__, __func__);
572 snprintf(caBuf,
CABUF_LEN,
"Well-formedness Constraint: Legal Character\n"
573 "Characters referred to using character references MUST match the production for Char\n"
574 "https://www.w3.org/TR/REC-xml/#sec-references\n"
575 "hex character: 0x%X", uiSum);
577 uiOffset, __LINE__, __FILE__, __func__);
582 uint32_t ui, uiSum, uiDigit;
584 for(ui = 0; ui < uiCount; ui++){
585 uiDigit = uipChars[ui] - 48;
587 vThrowError(spXml,
"decimal value in Reference is too large: causes uint32_t overflow",
588 uiOffset, __LINE__, __FILE__, __func__);
590 if(!
bSum32(uiSum, uiDigit, &uiSum)){
591 vThrowError(spXml,
"decimal value in Reference is too large: causes uint32_t overflow",
592 uiOffset, __LINE__, __FILE__, __func__);
597 snprintf(caBuf,
CABUF_LEN,
"Well-formedness Constraint: Legal Character\n"
598 "Characters referred to using character references MUST match the production for Char\n"
599 "https://www.w3.org/TR/REC-xml/#sec-references\n"
600 "decimal character: %u", uiSum);
602 uiOffset, __LINE__, __FILE__, __func__);
613 if(spNamed && uiCount){
614 uint32_t* uipChar32 = (uint32_t*)
vpVecFirst(spXml->vpVec32);
620 uiM = uiL + (uiR - uiL)/2;
621 spAm = &spNamed[uiM];
622 i =
iCompNames(&uipChar32[spAm->sElementName.uiOffset], spAm->sElementName.uiLength, uipName, uiNameLen);
630 spAm = &spNamed[uiL];
652 if(spNamed && uiCount){
653 uint32_t* uipChar32 = (uint32_t*)
vpVecFirst(spXml->vpVec32);
657 uiM = uiL + (uiR - uiL)/2;
658 spAm = &spNamed[uiM];
659 i =
iCompNames(&uipChar32[spAm->sName.uiOffset], spAm->sName.uiLength, uipName, uiNameLen);
667 spAm = &spNamed[uiL];
676 abool bCompNames(
const uint32_t* uipLName, uint32_t uiLLen,
const uint32_t* uipRName, uint32_t uiRLen){
677 if(0 ==
iCompNames(uipLName, uiLLen, uipRName, uiRLen)){
684 uint32_t* uipData, *uipDataEnd;;
686 uint32_t ui, uiInc, uiChar, uiLastChar, uiRef;
690 while(ui < uiLength){
691 if(uipAttValue[ui] ==
ATT_AMP){
692 for(uiInc = ui + 1; uiInc < uiLength; uiInc++){
696 vThrowError(spXml,
"attribute value has & (begins character or entity reference) with no closing ;",
697 uiOffset, __LINE__, __FILE__, __func__);
700 if(uipAttValue[ui+2] ==
ATT_X){
703 uiChar =
uiHexValue32(spXml, uiOffset, &uipAttValue[uiRef], uiInc-uiRef);
707 uiChar =
uiDecValue32(spXml, uiOffset, &uipAttValue[uiRef], uiInc-uiRef);
715 vThrowError(spXml,
"undeclared entity name in attribute list value",
716 uiOffset, __LINE__, __FILE__, __func__);
724 for(; uipData < uipDataEnd; uipData++){
725 if(*uipData == 9 || *uipData == 10 || *uipData == 13){
733 uiChar = uipAttValue[ui++];
734 if(uiChar == 9 || uiChar == 10 || uiChar == 13){
743 uiInc = (uint32_t)
uiVecLen(spXml->vpVecAttWork);
744 for(ui = 0; ui < uiInc; ui ++){
745 if(uipData[ui] == 60){
746 vLogMsg(spXml, uiOffset,
"Well-formedness constraint: No \"<\" in Attribute Values\n"
747 "The replacement text of any entity referred to directly or indirectly in an attribute value MUST NOT contain a <.");
761 uint32_t* uipChar = (uint32_t*)
vpVecFirst(spXml->vpVecAttWork);
762 uint32_t* uipEnd = uipChar + (uint32_t)
uiVecLen(spXml->vpVecAttWork);
764 for(; uipChar < uipEnd; uipChar++){
783 vThrowError(spXml,
"attribute value normalization: should never get here",
784 uiOffset, __LINE__, __FILE__, __func__);
789 uipChar = (uint32_t*)
vpVecFirst(spXml->vpVecAttWork);
790 uiLength = (uint32_t)
uiVecLen(spXml->vpVecAttWork);
794 for(ui = 0; ui < uiLength; ui++){
795 uiChar = uipChar[ui];
802 if(uiLastChar == 32){
816 for(; ui < uiLen; ui++){
817 if(uipLeft[ui] < uipRight[ui]){
820 if(uipLeft[ui] > uipRight[ui]){
839 for(; ui < uiLen; ui++){
840 if(uipLeft[ui] < uipRight[ui]){
843 if(uipLeft[ui] > uipRight[ui]){
857 uint32_t* uipChars32, *uipTemp;
859 uint32_t uiEntityOffset, uiEntityLength;
860 uint32_t uiFromOffset, uiRemainingChars, uiCopyChars;
862 aint uiFrames, uiReplacementBegin;
866 uipChars32 = (uint32_t*)
vpVecFirst(spXml->vpVec32);
867 while(
bHasEntity(&uipChars32[uiFromOffset], uiRemainingChars, &uiEntityOffset, &uiEntityLength)){
870 &uipChars32[uiFromOffset + uiEntityOffset + 1], (uiEntityLength - 2));
872 vThrowError(spXml,
"General Entity refers to undeclared entity",
876 uiFrames =
uiVecLen(spXml->vpVecEntityFrames);
880 for(; spParent < spEnd; spParent++){
882 vThrowError(spXml,
"General Entity refers to itself indirectly",
891 uiReplacementBegin =
uiVecLen(spXml->vpVec32);
895 uipTemp = (uint32_t*)
vpVecPushn(spXml->vpVec32, NULL, uiEntityOffset);
896 uipChars32 = (uint32_t*)
vpVecFirst(spXml->vpVec32);
897 for(ui = 0; ui < uiEntityOffset; ui++){
898 uipTemp[ui] = uipChars32[uiFromOffset + ui];
903 uiCopyChars = uiEntityOffset + uiEntityLength;
904 uiFromOffset += uiCopyChars;
905 uiRemainingChars = (uiCopyChars < uiRemainingChars) ? (uiRemainingChars - uiCopyChars) : 0;
909 uipTemp = (uint32_t*)
vpVecPushn(spXml->vpVec32, NULL, uiCopyChars);
910 uipChars32 = (uint32_t*)
vpVecFirst(spXml->vpVec32);
911 for(ui = 0; ui < uiCopyChars; ui++){
915 spEntityFound->
sValue.
uiLength, &uiEntityOffset, &uiEntityLength)){
923 uipTemp = (uint32_t*)
vpVecPushn(spXml->vpVec32, NULL, uiEntityLength);
924 uipChars32 = (uint32_t*)
vpVecFirst(spXml->vpVec32);
925 for(ui = 0; ui < uiEntityLength; ui++){
926 uipTemp[ui] = uipChars32[uiEntityOffset + ui];
932 if(uiRemainingChars){
933 uipTemp = (uint32_t*)
vpVecPushn(spXml->vpVec32, NULL, uiRemainingChars);
934 uipChars32 = (uint32_t*)
vpVecFirst(spXml->vpVec32);
935 for(ui = 0; ui < uiRemainingChars; ui++){
936 uipTemp[ui] = uipChars32[uiFromOffset + ui];
946 abool bHasEntity(uint32_t* uipChars, uint32_t uiLen, uint32_t* uipEntityOffset, uint32_t* uipEntityLen){
949 uint32_t uiFoundLen = 0;
950 *uipEntityOffset = 0;
951 for(; ui < uiLen; ui++){
952 if(uipChars[ui] == s_uiAmp){
953 *uipEntityOffset = ui;
956 if(uipChars[ui] == s_uiSemi){
965 *uipEntityLen = uiFoundLen;
969 int iCompNames(
const uint32_t* uipLName, uint32_t uiLLen,
const uint32_t* uipRName, uint32_t uiRLen){
970 uint32_t uiLen = (uiLLen < uiRLen) ? uiLLen : uiRLen;
972 for(; ui < uiLen; ui++){
973 if(uipLName[ui] < uipRName[ui]){
976 if(uipLName[ui] > uipRName[ui]){
996 memset((
void*)cb, 0,
sizeof(cb));