Version 7.0
Copyright © 2021 Lowell D. Thomas
APG
… an ABNF Parser Generator
conv.c
Go to the documentation of this file.
1 /* *************************************************************************************
2  Copyright (c) 2021, Lowell D. Thomas
3  All rights reserved.
4 
5  This file is part of APG Version 7.0.
6  APG Version 7.0 may be used under the terms of the BSD 2-Clause License.
7 
8  Redistribution and use in source and binary forms, with or without
9  modification, are permitted provided that the following conditions are met:
10 
11  1. Redistributions of source code must retain the above copyright notice, this
12  list of conditions and the following disclaimer.
13 
14  2. Redistributions in binary form must reproduce the above copyright notice,
15  this list of conditions and the following disclaimer in the documentation
16  and/or other materials provided with the distribution.
17 
18  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21  DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24  SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25  CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26  OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 
29 * *************************************************************************************/
51 #include "../library/lib.h"
52 #include "./conv.h"
53 
55 #define BASE64_LINE_LEN 76
57 #define TAIL_CHAR 61
58 #define NON_BYTE_MASK 0xFFFFFF00
59 #define BYTE_MASK 0xFF
60 
62 static const void* s_vpMagicNumber = (void*)"conv";
63 static uint8_t s_caBOM8[] = {0xEF, 0xBB, 0xBF};
64 static uint8_t s_caBOM16BE[] = {0xFE, 0xFF};
65 static uint8_t s_caBOM16LE[] = {0xFF, 0xFE};
66 static uint8_t s_caBOM32BE[] = {0, 0, 0xFE, 0xFF};
67 static uint8_t s_caBOM32LE[] = {0xFF, 0xFE, 0, 0};
68 static uint8_t s_ucaCRLF[] = {13,10};
69 
73 typedef struct {
74  uint32_t uiValue;
75  uint32_t uiOffset;
76  const char* cpMsg;
78 } conv_error;
79 
83 typedef struct {
84  const void* vpValidate;
86  void* vpMem;
88  void* vpVecInput;
89  void* vpVecOutput;
90  void* vpVec32bit;
95 } conv;
96 
97 static abool bIsBOM8(uint8_t* ucpStream, aint uiLen);
98 static abool bIsBOM16BE(uint8_t* ucpStream, aint uiLen);
99 static abool bIsBOM16LE(uint8_t* ucpStream, aint uiLen);
100 static abool bIsBOM32BE(uint8_t* ucpStream, aint uiLen);
101 static abool bIsBOM32LE(uint8_t* ucpStream, aint uiLen);
102 static void vBase64Encode(conv* spConv, conv_dst* spDst);
103 static void vBase64Decode(conv* spConv, uint8_t* ucpSrc, aint uiSrcLen);
104 static void vBase64Validate(conv* spConv, uint8_t* ucpSrc, aint uiSrcLen);
105 static void vBinaryEncode(conv* spConv);
106 static void vBinaryDecode(conv* spConv);
107 static void vUtf32BEDecode(conv* spConv, uint8_t* ucpData, aint uiDataLen);
108 static void vUtf32LEDecode(conv* spConv, uint8_t* ucpData, aint uiDataLen);
109 static void vUtf32BEEncode(conv* spConv, conv_dst* spDst);
110 static void vUtf32LEEncode(conv* spConv, conv_dst* spDst);
111 static void vUtf16BEDecode(conv* spConv, uint8_t* ucpData, aint uiDataLen);
112 static void vUtf16LEDecode(conv* spConv, uint8_t* ucpData, aint uiDataLen);
113 static void vUtf16BEEncode(conv* spConv, conv_dst* spDst);
114 static void vUtf16LEEncode(conv* spConv, conv_dst* spDst);
115 static void vUtf8Encode(conv* spConv, conv_dst* spDst);
116 static void vUtf8Decode(conv* spConv, uint8_t* ucpData, aint uiDataLen);
117 static void vSetError(conv* spConv, uint32_t uiValue, uint32_t uiOffset, const char* cpMsg);
118 
119 static uint8_t ucaBase64Chars[] = {
120  65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,
121  97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,
122  48,49,50,51,52,53,54,55,56,57,
123  43,47,61
124 };
125 static uint32_t uiEncode64Mask = 0x0000003F;
126 static uint32_t uiDecode64Mask = 0x000000FF;
127 
134 void* vpConvCtor(exception* spEx){
135  conv* spConv = NULL;
136  if(bExValidate(spEx)){
137  void* vpMem = vpMemCtor(spEx);
138  spConv = (conv*)vpMemAlloc(vpMem, sizeof(conv));
139  memset(spConv, 0, sizeof(conv));
140  spConv->vpMem = vpMem;
141  spConv->spException = spEx;
142  aint uiBufSize = 128 * 1024;
143  spConv->vpVecInput = (uint8_t*)vpVecCtor(vpMem, sizeof(uint8_t), sizeof(uint8_t) * uiBufSize);
144  spConv->vpVecOutput = (uint8_t*)vpVecCtor(vpMem, sizeof(uint8_t), sizeof(uint8_t) * uiBufSize);
145  spConv->vpVec32bit = (uint32_t*)vpVecCtor(vpMem, sizeof(uint32_t), sizeof(uint32_t) * uiBufSize);
147  spConv->uiBase64LineEnd = BASE64_LF;
148  spConv->vpValidate = s_vpMagicNumber;
149  return (void*)spConv;
150  }
151  vExContext();
152  return NULL;
153 }
154 
161 void vConvDtor(void* vpCtx){
162  conv* spCtx = (conv*)vpCtx;
163  if(vpCtx){
164  if(spCtx->vpValidate == s_vpMagicNumber){
165  void* vpMem = spCtx->vpMem;
166  memset(vpCtx, 0, sizeof(conv));
167  vMemDtor(vpMem);
168  }else{
169  vExContext();
170  }
171  }
172 }
173 
186 void vConvConfigureBase64(void* vpCtx, aint uiLineLen, aint uiLineEnd){
187  conv* spConv = (conv*)vpCtx;
188  if(spConv && spConv->vpValidate == s_vpMagicNumber){
189  if(uiLineEnd == BASE64_LF || uiLineEnd == BASE64_CRLF){
190  spConv->uiBase64LineEnd = uiLineEnd;
191  }else{
192  XTHROW(spConv->spException, "uiLineEnd must be one of BASE64_LF or BASE64_CRLF");
193  }
194  spConv->uiBase64LineLen = uiLineLen;
195  }else{
196  vExContext();
197  }
198 }
199 
205 void vConvDecode(void* vpCtx, conv_src* spSrc){
206  conv* spConv = (conv*)vpCtx;
207  if(spConv && spConv->vpValidate == s_vpMagicNumber){
208  vVecClear(spConv->vpVecInput);
209  vVecClear(spConv->vpVecOutput);
210  vVecClear(spConv->vpVec32bit);
211  spConv->sError.bHasError = APG_FALSE;
212  uint8_t* ucpData;
213  aint uiDataLen;
214  if(!spSrc || !spSrc->ucpData || !spSrc->uiDataLen){
215  XTHROW(spConv->spException, "source cannot be NULL or empty");
216  return;
217  }
218  if(spSrc->uiDataType & BASE64_MASK){
219  // preprocess with base64 conversion
220  vBase64Decode(spConv, spSrc->ucpData, spSrc->uiDataLen);
221  }else{
222  // make a permanent copy of user's input data
223  vpVecPushn(spConv->vpVecInput, spSrc->ucpData, spSrc->uiDataLen);
224  }
225  ucpData = (uint8_t*)vpVecFirst(spConv->vpVecInput);
226  uiDataLen = uiVecLen(spConv->vpVecInput);
227  if(!ucpData || !uiDataLen){
228  XTHROW(spConv->spException, "internal error processing input");
229  return;
230  }
231  switch(spSrc->uiDataType & TYPE_MASK){
232  case BINARY:
233  vBinaryDecode(spConv);
234  break;
235  case UTF_8:
236  if(bIsBOM8(ucpData, uiDataLen)){
237  vUtf8Decode(spConv, (ucpData + 3), (uiDataLen - 3));
238  }else{
239  vUtf8Decode(spConv, ucpData, uiDataLen);
240  }
241  break;
242  case UTF_16:
243  if(bIsBOM16BE(ucpData, uiDataLen)){
244  vUtf16BEDecode(spConv, (ucpData + 2), (uiDataLen - 2));
245  }else if(bIsBOM16LE(ucpData, uiDataLen)){
246  vUtf16LEDecode(spConv, (ucpData + 2), (uiDataLen - 2));
247  }else{
248  vUtf16BEDecode(spConv, ucpData, uiDataLen);
249  }
250  break;
251  case UTF_16BE:
252  if(bIsBOM16BE(ucpData, uiDataLen)){
253  vUtf16BEDecode(spConv, (ucpData + 2), (uiDataLen - 2));
254  }else{
255  vUtf16BEDecode(spConv, ucpData, uiDataLen);
256  }
257  break;
258  case UTF_16LE:
259  if(bIsBOM16LE(ucpData, uiDataLen)){
260  vUtf16LEDecode(spConv, (ucpData + 2), (uiDataLen - 2));
261  }else{
262  vUtf16LEDecode(spConv, ucpData, uiDataLen);
263  }
264  break;
265  case UTF_32:
266  if(bIsBOM32BE(ucpData, uiDataLen)){
267  vUtf32BEDecode(spConv, (ucpData + 4), (uiDataLen - 4));
268  }else if(bIsBOM32LE(ucpData, uiDataLen)){
269  vUtf32LEDecode(spConv, (ucpData + 4), (uiDataLen - 4));
270  }else{
271  vUtf32BEDecode(spConv, ucpData, uiDataLen);
272  }
273  break;
274  case UTF_32BE:
275  if(bIsBOM32BE(ucpData, uiDataLen)){
276  vUtf32BEDecode(spConv, (ucpData + 4), (uiDataLen - 4));
277  }else{
278  vUtf32BEDecode(spConv, ucpData, uiDataLen);
279  }
280  break;
281  case UTF_32LE:
282  if(bIsBOM32LE(ucpData, uiDataLen)){
283  vUtf32LEDecode(spConv, (ucpData + 4), (uiDataLen - 4));
284  }else{
285  vUtf32LEDecode(spConv, ucpData, uiDataLen);
286  }
287  break;
288  default:
289  XTHROW(spConv->spException, "unrecognized encoding type");
290  break;
291  }
292  }else{
293  vExContext();
294  }
295 }
302 void vConvEncode(void* vpCtx, conv_dst* spDst){
303  conv* spConv = (conv*)vpCtx;
304  if(spConv && spConv->vpValidate == s_vpMagicNumber){
305  vVecClear(spConv->vpVecInput);
306  vVecClear(spConv->vpVecOutput);
307  spConv->sError.bHasError = APG_FALSE;
308  if(!uiVecLen(spConv->vpVec32bit)){
309  XTHROW(spConv->spException, "no 32-bit data to encode");
310  return;
311  }
312  switch(spDst->uiDataType & TYPE_MASK){
313  case BINARY:
314  vBinaryEncode(spConv);
315  break;
316  case UTF_8:
317  vUtf8Encode(spConv, spDst);
318  break;
319  case UTF_16:
320  case UTF_16BE:
321  vUtf16BEEncode(spConv, spDst);
322  break;
323  case UTF_16LE:
324  vUtf16LEEncode(spConv, spDst);
325  break;
326  case UTF_32:
327  case UTF_32BE:
328  vUtf32BEEncode(spConv, spDst);
329  break;
330  case UTF_32LE:
331  vUtf32LEEncode(spConv, spDst);
332  break;
333  default:
334  XTHROW(spConv->spException, "unrecognized encoding type");
335  break;
336  }
337  if(spDst->uiDataType & BASE64_MASK){
338  // final base64 conversion of translated byte stream
339  vBase64Encode(spConv, spDst);
340  }
341  spDst->ucpData = (uint8_t*)vpVecFirst(spConv->vpVecOutput);
342  spDst->uiDataLen = uiVecLen(spConv->vpVecOutput);
343  }else{
344  vExContext();
345  }
346 }
361 void vConvGetCodePoints(void* vpCtx, uint32_t* uipData, uint32_t* uipDataLen){
362  conv* spConv = (conv*)vpCtx;
363  if(spConv && spConv->vpValidate == s_vpMagicNumber){
364  if(!uipDataLen){
365  XTHROW(spConv->spException, "data length pointer cannot be NULL");
366  return;
367  }
368  uint32_t uiOriginalLen = *uipDataLen;
369  *uipDataLen = uiVecLen(spConv->vpVec32bit);
370  uint32_t* uip32 = (uint32_t*)vpVecFirst(spConv->vpVec32bit);
371  if(!*uipDataLen || !uip32){
372  XTHROW(spConv->spException, "no 32-bit data to copy");
373  return;
374  }
375  if(uipData && uiOriginalLen){
376  aint ui = 0;
377  aint uiLen = *uipDataLen >= uiOriginalLen ? uiOriginalLen : *uipDataLen;
378  for(; ui < uiLen; ui++){
379  uipData[ui] = uip32[ui];
380  }
381  }
382  }else{
383  vExContext();
384  }
385 }
386 
395 void vConvUseCodePoints(void* vpCtx, uint32_t* uipSrc, aint uiSrcLen){
396  conv* spConv = (conv*)vpCtx;
397  if(spConv && spConv->vpValidate == s_vpMagicNumber){
398  aint ui;
399  uint32_t* uip32bit;
400  if(!uipSrc || !uiSrcLen){
401  XTHROW(spConv->spException, "source cannot be NULL or empty");
402  return;
403  }
404  vVecClear(spConv->vpVec32bit);
405  uip32bit = (uint32_t*)vpVecPushn(spConv->vpVec32bit, NULL, uiSrcLen);
406  for(ui = 0; ui < uiSrcLen; ui++){
407  uip32bit[ui] = uipSrc[ui];
408  }
409  }else{
410  vExContext();
411  }
412 }
413 
422 void vConvConvert(void* vpCtx, conv_src* spSrc, conv_dst* spDst){
423  conv* spConv = (conv*)vpCtx;
424  if(spConv && spConv->vpValidate == s_vpMagicNumber){
425  if(!spSrc || !spDst){
426  XTHROW(spConv->spException, "source and destination must be non-NULL");
427  return;
428  }
429  vConvDecode(vpCtx, spSrc);
430  vConvEncode(vpCtx, spDst);
431  }else{
432  vExContext();
433  }
434 }
435 
436 static void vBase64Encode(conv* spConv, conv_dst* spDst){
437  aint uiTail, uiUnits, u3, u4, uu;
438  uint8_t* ucpTrans = NULL;
439  uint32_t ui32;
440  uint8_t* ucpSrc;
441  aint uiSrcLen;
442  ucpSrc = (uint8_t*)vpVecFirst(spConv->vpVecOutput);
443  uiSrcLen = uiVecLen(spConv->vpVecOutput);
444  if(!ucpSrc){
445  XTHROW(spConv->spException, "internal error - vBase64Encode called with no source");
446  }
447  uiUnits = uiSrcLen / 3;
448  uiTail = 3 - uiSrcLen % 3;
449  if(uiTail == 3){
450  ucpTrans = (uint8_t*)vpMemAlloc(spConv->vpMem, (aint)sizeof(uint32_t) * uiUnits * 4);
451  }else{
452  ucpTrans = (uint8_t*)vpMemAlloc(spConv->vpMem, (aint)sizeof(uint32_t) * (uiUnits + 1) * 4);
453  }
454  u3 = 0;
455  u4 = 0;
456  uu = 0;
457  for(; uu < uiUnits; uu++){
458  ui32 = ucpSrc[u3++] << 16;
459  ui32 += ucpSrc[u3++] << 8;
460  ui32 += ucpSrc[u3++];
461  ucpTrans[u4++] = ucaBase64Chars[(ui32 >> 18) & uiEncode64Mask];
462  ucpTrans[u4++] = ucaBase64Chars[(ui32 >> 12) & uiEncode64Mask];
463  ucpTrans[u4++] = ucaBase64Chars[(ui32 >> 6) & uiEncode64Mask];
464  ucpTrans[u4++] = ucaBase64Chars[ui32 & uiEncode64Mask];
465  }
466  if(uiTail == 1){
467  ui32 = ucpSrc[u3++] << 16;
468  ui32 += ucpSrc[u3] << 8;
469  ucpTrans[u4++] = ucaBase64Chars[(ui32 >> 18) & uiEncode64Mask];
470  ucpTrans[u4++] = ucaBase64Chars[(ui32 >> 12) & uiEncode64Mask];
471  ucpTrans[u4++] = ucaBase64Chars[(ui32 >> 6) & uiEncode64Mask];
472  ucpTrans[u4++] = ucaBase64Chars[64];
473  }else if(uiTail == 2){
474  ui32 = ucpSrc[u3] << 16;
475  ucpTrans[u4++] = ucaBase64Chars[(ui32 >> 18) & uiEncode64Mask];
476  ucpTrans[u4++] = ucaBase64Chars[(ui32 >> 12) & uiEncode64Mask];
477  ucpTrans[u4++] = ucaBase64Chars[64];
478  ucpTrans[u4++] = ucaBase64Chars[64];
479  }
480  vVecClear(spConv->vpVecOutput);
481  if(spConv->uiBase64LineLen){
482  aint ui = 0;
483 // uint8_t ucaCRLF[] = {13,10};
484  for(; ui < u4; ui++){
485  if((ui % spConv->uiBase64LineLen == 0) && ui){
486  if(spConv->uiBase64LineEnd == BASE64_LF){
487  vpVecPush(spConv->vpVecOutput, &s_ucaCRLF[1]);
488  }else{
489  vpVecPushn(spConv->vpVecOutput, s_ucaCRLF, 2);
490  }
491  }
492  vpVecPush(spConv->vpVecOutput, &ucpTrans[ui]);
493  }
494  if(spConv->uiBase64LineEnd == BASE64_LF){
495  vpVecPush(spConv->vpVecOutput, &s_ucaCRLF[1]);
496  }else{
497  vpVecPushn(spConv->vpVecOutput, s_ucaCRLF, 2);
498  }
499  }else{
500  vpVecPushn(spConv->vpVecOutput, ucpTrans, u4);
501  }
502  vMemFree(spConv->vpMem, ucpTrans);
503 }
504 static void vBase64Validate(conv* spConv, uint8_t* ucpSrc, aint uiSrcLen){
505  uint8_t* ucpValues = NULL;
506  spConv->uiTail = 0;
507  uint8_t ucChar;
508  uint8_t* ucpChar = ucpSrc;
509  uint8_t* ucpEnd = ucpSrc + uiSrcLen - spConv->uiTail;
510  ucpValues = (uint8_t*)vpMemAlloc(spConv->vpMem, (uiSrcLen + 32));
511  aint uiValuesLen = 0;
512  aint uiOffset = 0;
513  for(; ucpChar < ucpEnd; ucpChar++, uiOffset++){
514  ucChar = *ucpChar;
515  while(APG_TRUE){
516  if(ucChar == 10 || ucChar == 13 || ucChar == 9 || ucChar == 32){
517  // ignore white space
518  break;
519  }
520  if(ucChar >= 65 && ucChar <= 90){
521  ucpValues[uiValuesLen++] = ucChar - 65;
522  break;
523  }
524  if(ucChar >= 97 && ucChar <= 122){
525  ucpValues[uiValuesLen++] = ucChar - 71;
526  break;
527  }
528  if(ucChar >= 48 && ucChar <= 57){
529  ucpValues[uiValuesLen++] = ucChar + 4;
530  break;
531  }
532  if(ucChar == 43){
533  ucpValues[uiValuesLen++] = 62;
534  break;
535  }
536  if(ucChar == 47){
537  ucpValues[uiValuesLen++] = 63;
538  break;
539  }
540  if(ucChar == TAIL_CHAR){
541  spConv->uiTail++;
542  ucpValues[uiValuesLen++] = 64;
543  break;
544  }
545  vSetError(spConv, (uint32_t)ucChar, (uint32_t)uiOffset, "invalid base64 character");
546  XTHROW(spConv->spException, spConv->sError.cpMsg);
547  return;
548  }
549  }
550  if(spConv->uiTail > 2){
551  XTHROW(spConv->spException, "too many base64 tail characters");
552  }
553  if(spConv->uiTail == 2){
554  if(!(ucpValues[uiValuesLen - 1] == 64 && ucpValues[uiValuesLen - 2] == 64)){
555  XTHROW(spConv->spException, "bad base64 tail characters");
556  }
557  }else if(spConv->uiTail == 1){
558  if(ucpValues[uiValuesLen - 1] != 64){
559  XTHROW(spConv->spException, "bad base64 tail characters");
560  }
561  }
562  if((uiValuesLen % 4) != 0){
563  XTHROW(spConv->spException, "number of base 64 characters not multiple of 4");
564  }
565  vpVecPushn(spConv->vpVecInput, ucpValues, uiValuesLen);
566  vMemFree(spConv->vpMem, ucpValues);
567 }
568 static void vBase64Decode(conv* spConv, uint8_t* ucpSrc, aint uiSrcLen){
569  uint8_t* ucpOut = NULL;
570  vBase64Validate(spConv, ucpSrc, uiSrcLen);
571  uint8_t* ucpBase = (uint8_t*)vpVecFirst(spConv->vpVecInput);
572  aint uiBaseLen = uiVecLen(spConv->vpVecInput);
573  aint uiUnits = uiBaseLen / 4;
574  ucpOut = (uint8_t*)vpMemAlloc(spConv->vpMem, (uiUnits * 3));
575  if(spConv->uiTail){
576  uiUnits--;
577  }
578  aint u3 = 0;
579  aint u4 = 0;
580  aint uu = 0;
581  uint32_t ui32bits = 0;
582  for(; uu < uiUnits; uu++){
583  ui32bits = ((uint32_t)ucpBase[u4++]) << 18;
584  ui32bits += ((uint32_t)ucpBase[u4++]) << 12;
585  ui32bits += ((uint32_t)ucpBase[u4++]) << 6;
586  ui32bits += (uint32_t)ucpBase[u4++];
587  ucpOut[u3++] = (uint8_t)((ui32bits >> 16) & uiDecode64Mask);
588  ucpOut[u3++] = (uint8_t)((ui32bits >> 8) & uiDecode64Mask);
589  ucpOut[u3++] = (uint8_t)(ui32bits & uiDecode64Mask);
590  }
591  if(spConv->uiTail == 1){
592  ui32bits = ((uint32_t)ucpBase[u4++]) << 18;
593  ui32bits += ((uint32_t)ucpBase[u4++]) << 12;
594  ui32bits += ((uint32_t)ucpBase[u4++]) << 6;
595  ucpOut[u3++] = (uint8_t)((ui32bits >> 16) & uiDecode64Mask);
596  ucpOut[u3++] = (uint8_t)((ui32bits >> 8) & uiDecode64Mask);
597  }else if(spConv->uiTail == 2){
598  ui32bits = ((uint32_t)ucpBase[u4++]) << 18;
599  ui32bits += ((uint32_t)ucpBase[u4++]) << 12;
600  ucpOut[u3++] = (uint8_t)((ui32bits >> 16) & uiDecode64Mask);
601  }
602  vVecClear(spConv->vpVecInput);
603  vpVecPushn(spConv->vpVecInput, ucpOut, u3);
604  vMemFree(spConv->vpMem, ucpOut);
605 }
606 
607 static void vBinaryDecode(conv* spConv){
608  uint32_t* uip32;
609  uint32_t* uipEnd;
610  uint8_t* ucpIn;
611  aint uiLen;
612  ucpIn = (uint8_t*)vpVecFirst(spConv->vpVecInput);
613  uiLen = uiVecLen(spConv->vpVecInput);
614  if(!ucpIn || !uiLen){
615  XTHROW(spConv->spException, "internal error - function called without necessary data");
616  return;
617  }
618  uip32 = (uint32_t*)vpVecPushn(spConv->vpVec32bit, NULL, uiLen);
619  uipEnd = uip32 + uiLen;
620  while(uip32 < uipEnd){
621  *uip32++ = (uint32_t)*ucpIn++;
622  }
623 }
624 static void vBinaryEncode(conv* spConv){
625  uint32_t* uip32;
626  uint32_t* uipEnd;
627  uint8_t* ucpOut;
628  aint uiLen;
629  uip32 = (uint32_t*)vpVecFirst(spConv->vpVec32bit);
630  uiLen = uiVecLen(spConv->vpVec32bit);
631  if(!uip32 || !uiLen){
632  XTHROW(spConv->spException, "internal error - function called without necessary data");
633  return;
634  }
635  ucpOut = (uint8_t*)vpVecPushn(spConv->vpVecOutput, NULL, uiLen);
636  uipEnd = uip32 + uiLen;
637  uint32_t uiOffset = 0;
638  while(uip32 < uipEnd){
639  if((*uip32 & NON_BYTE_MASK) != 0){
640  vSetError(spConv, *uip32, uiOffset, "can't binary encode values > 0xFF");
641  XTHROW(spConv->spException, spConv->sError.cpMsg);
642  return;
643  }
644  uiOffset++;
645  *ucpOut++ = (uint8_t)*uip32++;
646  }
647 }
648 
649 static void vUtf32BEDecode(conv* spConv, uint8_t* ucpData, aint uiDataLen){
650  uint32_t uc = 0;
651  uint32_t uiWord;
652  if(!ucpData || !uiDataLen){
653  XTHROW(spConv->spException, "internal error - function called without necessary data");
654  return;
655  }
656  if(uiDataLen %4 != 0){
657  XTHROW(spConv->spException, "UTF-32BE data cannot have an odd number of bytes");
658  return;
659  }
660  while(uc < uiDataLen){
661  uiWord = ((uint32_t)ucpData[uc++]) << 24;
662  uiWord += ((uint32_t)ucpData[uc++]) << 16;
663  uiWord += ((uint32_t)ucpData[uc++]) << 8;
664  uiWord += (uint32_t)ucpData[uc++];
665  if(uiWord >= 0xd800 && uiWord < 0xe000){
666  vSetError(spConv, uiWord, uc, "UTF-32BE value in surrogate pair range");
667  XTHROW(spConv->spException, spConv->sError.cpMsg);
668  break;
669  }
670  if(uiWord > 0x10ffff){
671  vSetError(spConv, uiWord, uc, "UTF-32BE value out of range (> 0x10FFFF)");
672  XTHROW(spConv->spException, spConv->sError.cpMsg);
673  break;
674  }
675  vpVecPush(spConv->vpVec32bit, &uiWord);
676  }
677 }
678 static void vUtf32LEDecode(conv* spConv, uint8_t* ucpData, aint uiDataLen){
679  uint32_t uc = 0;
680  uint32_t uiWord;
681  if(!ucpData || !uiDataLen){
682  XTHROW(spConv->spException, "internal error - function called without necessary data");
683  return;
684  }
685  if(uiDataLen %4 != 0){
686  XTHROW(spConv->spException, "UTF-32LE data cannot have odd number of bytes");
687  return;
688  }
689  while(uc < uiDataLen){
690  uiWord = (uint32_t)ucpData[uc++];
691  uiWord += ((uint32_t)ucpData[uc++]) << 8;
692  uiWord += ((uint32_t)ucpData[uc++]) << 16;
693  uiWord += ((uint32_t)ucpData[uc++]) << 24;
694  if(uiWord >= 0xd800 && uiWord < 0xe000){
695  vSetError(spConv, uiWord, uc, "UTF-32LE value in surrogate pair range");
696  XTHROW(spConv->spException, spConv->sError.cpMsg);
697  break;
698  }
699  if(uiWord > 0x10ffff){
700  vSetError(spConv, uiWord, uc, "UTF-32LE value out of range (> 0x10FFFF)");
701  XTHROW(spConv->spException, spConv->sError.cpMsg);
702  break;
703  }
704  vpVecPush(spConv->vpVec32bit, &uiWord);
705  }
706 }
707 static void vUtf32BEEncode(conv* spConv, conv_dst* spDst){
708  uint32_t ui;
709  uint8_t ucaBuf[4];
710  uint32_t uiWord;
711  uint32_t* uipWords = (uint32_t*)vpVecFirst(spConv->vpVec32bit);
712  aint uiWordCount = uiVecLen(spConv->vpVec32bit);
713  if(!uipWords || !uiWordCount){
714  XTHROW(spConv->spException, "internal error - function called without necessary data");
715  return;
716  }
717  vVecClear(spConv->vpVecOutput);
718  if(spDst->bBOM){
719  ucaBuf[0] = s_caBOM32BE[0];
720  ucaBuf[1] = s_caBOM32BE[1];
721  ucaBuf[2] = s_caBOM32BE[2];
722  ucaBuf[3] = s_caBOM32BE[3];
723  vpVecPushn(spConv->vpVecOutput, ucaBuf, 4);
724  }
725  for(ui = 0; ui < uiWordCount; ui++){
726  uiWord = uipWords[ui];
727  if(uiWord >= 0xd800 && uiWord < 0xe000){
728  vSetError(spConv, uiWord, ui, "UTF-32BE value in surrogate pair range");
729  XTHROW(spConv->spException, spConv->sError.cpMsg);
730  break;
731  }
732  if(uiWord > 0x10ffff){
733  vSetError(spConv, uiWord, ui, "UTF-32BE value out of range (> 0x10FFFF)");
734  XTHROW(spConv->spException, spConv->sError.cpMsg);
735  break;
736  }
737  ucaBuf[0] = (uint8_t)(uiWord >> 24);
738  ucaBuf[1] = (uint8_t)((uiWord >> 16) & BYTE_MASK);
739  ucaBuf[2] = (uint8_t)((uiWord >> 8) & BYTE_MASK);
740  ucaBuf[3] = (uint8_t)(uiWord & BYTE_MASK);
741  vpVecPushn(spConv->vpVecOutput, ucaBuf, 4);
742  }
743  spDst->ucpData = (uint8_t*)vpVecFirst(spConv->vpVecOutput);
744  spDst->uiDataLen = uiVecLen(spConv->vpVecOutput);
745 }
746 static void vUtf32LEEncode(conv* spConv, conv_dst* spDst){
747  uint32_t ui;
748  uint8_t ucaBuf[4];
749  uint32_t uiWord;
750  uint32_t* uipWords = (uint32_t*)vpVecFirst(spConv->vpVec32bit);
751  aint uiWordCount = uiVecLen(spConv->vpVec32bit);
752  if(!uipWords || !uiWordCount){
753  XTHROW(spConv->spException, "internal error - function called without necessary data");
754  return;
755  }
756  vVecClear(spConv->vpVecOutput);
757  if(spDst->bBOM){
758  ucaBuf[0] = s_caBOM32LE[0];
759  ucaBuf[1] = s_caBOM32LE[1];
760  ucaBuf[2] = s_caBOM32LE[2];
761  ucaBuf[3] = s_caBOM32LE[3];
762  vpVecPushn(spConv->vpVecOutput, ucaBuf, 4);
763  }
764  for(ui = 0; ui < uiWordCount; ui++){
765  uiWord = uipWords[ui];
766  if(uiWord >= 0xd800 && uiWord < 0xe000){
767  vSetError(spConv, uiWord, ui, "UTF-32LE value in surrogate pair range");
768  XTHROW(spConv->spException, spConv->sError.cpMsg);
769  break;
770  }
771  if(uiWord > 0x10ffff){
772  vSetError(spConv, uiWord, ui, "UTF-32LE value out of range (> 0x10FFFF)");
773  XTHROW(spConv->spException, spConv->sError.cpMsg);
774  break;
775  }
776  ucaBuf[3] = (uint8_t)(uiWord >> 24);
777  ucaBuf[2] = (uint8_t)((uiWord >> 16) & BYTE_MASK);
778  ucaBuf[1] = (uint8_t)((uiWord >> 8) & BYTE_MASK);
779  ucaBuf[0] = (uint8_t)(uiWord & BYTE_MASK);
780  vpVecPushn(spConv->vpVecOutput, ucaBuf, 4);
781  }
782  spDst->ucpData = (uint8_t*)vpVecFirst(spConv->vpVecOutput);
783  spDst->uiDataLen = uiVecLen(spConv->vpVecOutput);
784 }
785 static abool bIsBOM8(uint8_t* ucpStream, aint uiLen){
786  abool bReturn = APG_FALSE;
787  if(uiLen > 3){
788  if(ucpStream[0] == s_caBOM8[0] &&
789  ucpStream[1] == s_caBOM8[1] &&
790  ucpStream[2] == s_caBOM8[2]){
791  bReturn = APG_TRUE;
792  }
793  }
794  return bReturn;
795 }
796 static abool bIsBOM16BE(uint8_t* ucpStream, aint uiLen){
797  abool bReturn = APG_FALSE;
798  if(uiLen > 2){
799  if(ucpStream[0] == s_caBOM16BE[0] &&
800  ucpStream[1] == s_caBOM16BE[1]){
801  bReturn = APG_TRUE;
802  }
803  }
804  return bReturn;
805 }
806 static abool bIsBOM16LE(uint8_t* ucpStream, aint uiLen){
807  abool bReturn = APG_FALSE;
808  if(uiLen > 2){
809  if(ucpStream[0] == s_caBOM16LE[0] &&
810  ucpStream[1] == s_caBOM16LE[1]){
811  bReturn = APG_TRUE;
812  }
813  }
814  return bReturn;
815 }
816 static abool bIsBOM32BE(uint8_t* ucpStream, aint uiLen){
817  abool bReturn = APG_FALSE;
818  if(uiLen > 4){
819  if(ucpStream[0] == s_caBOM32BE[0] &&
820  ucpStream[1] == s_caBOM32BE[1] &&
821  ucpStream[2] == s_caBOM32BE[2] &&
822  ucpStream[3] == s_caBOM32BE[3] ){
823  bReturn = APG_TRUE;
824  }
825  }
826  return bReturn;
827 }
828 static abool bIsBOM32LE(uint8_t* ucpStream, aint uiLen){
829  abool bReturn = APG_FALSE;
830  if(uiLen > 4){
831  if(ucpStream[0] == s_caBOM32LE[0] &&
832  ucpStream[1] == s_caBOM32LE[1] &&
833  ucpStream[2] == s_caBOM32LE[2] &&
834  ucpStream[3] == s_caBOM32LE[3] ){
835  bReturn = APG_TRUE;
836  }
837  }
838  return bReturn;
839 }
840 static void vUtf8Encode(conv* spConv, conv_dst* spDst){
841  uint32_t ui;
842  uint8_t ucaBuf[4];
843  uint32_t uiWord;
844  uint32_t* uipWords = (uint32_t*)vpVecFirst(spConv->vpVec32bit);
845  aint uiWordCount = uiVecLen(spConv->vpVec32bit);
846  spDst->ucpData = NULL;
847  spDst->uiDataLen = 0;
848  if(!uipWords || !uiWordCount){
849  XTHROW(spConv->spException, "internal error - function called without necessary data");
850  return;
851  }
852  vVecClear(spConv->vpVecOutput);
853  if(spDst->bBOM){
854  ucaBuf[0] = s_caBOM8[0];
855  ucaBuf[1] = s_caBOM8[1];
856  ucaBuf[2] = s_caBOM8[2];
857  vpVecPushn(spConv->vpVecOutput, ucaBuf, 3);
858  }
859  for(ui = 0; ui < uiWordCount; ui++){
860  uiWord = uipWords[ui];
861  if(uiWord < 0x80){
862  // one byte
863  ucaBuf[0] = (uint8_t)uiWord;
864  vpVecPush(spConv->vpVecOutput, &ucaBuf[0]);
865  }else if(uiWord < 0x800){
866  // 2 bytes
867  ucaBuf[0] = 0xc0 + (uint8_t)((uiWord & 0x7c0) >> 6);
868  ucaBuf[1] = 0x80 + (uint8_t)(uiWord & 0x3f);
869  vpVecPushn(spConv->vpVecOutput, &ucaBuf[0], 2);
870  }else if(uiWord < 0xd800){
871  // 3 bytes
872  ucaBuf[0] = 0xe0 + (uint8_t)((uiWord & 0xf000) >> 12);
873  ucaBuf[1] = 0x80 + (uint8_t)((uiWord & 0xfc0) >> 6);
874  ucaBuf[2] = 0x80 + (uint8_t)(uiWord & 0x3f);
875  vpVecPushn(spConv->vpVecOutput, &ucaBuf[0], 3);
876  }else if(uiWord < 0xe000){
877  // 3-byte error, Unicode UTF-16 surrogate pairs
878  vSetError(spConv, uiWord, ui, "UTF-8 value in surrogate pair");
879  XTHROW(spConv->spException, spConv->sError.cpMsg);
880  break;
881  }else if(uiWord < 0x10000){
882  // 3 bytes
883  ucaBuf[0] = 0xe0 + (uint8_t)((uiWord & 0xf000) >> 12);
884  ucaBuf[1] = 0x80 + (uint8_t)((uiWord & 0xfc0) >> 6);
885  ucaBuf[2] = 0x80 + (uint8_t)(uiWord & 0x3f);
886  vpVecPushn(spConv->vpVecOutput, &ucaBuf[0], 3);
887  }else if(uiWord <= 0x10ffff){
888  // 4 bytes
889  ucaBuf[0] = 0xf0 + (uint8_t)((uiWord & 0x1c0000) >> 18);
890  ucaBuf[1] = 0x80 + (uint8_t)((uiWord & 0x3f000) >> 12);
891  ucaBuf[2] = 0x80 + (uint8_t)((uiWord & 0xfc0) >> 6);
892  ucaBuf[3] = 0x80 + (uint8_t)(uiWord & 0x3f);
893  vpVecPushn(spConv->vpVecOutput, &ucaBuf[0], 4);
894  }else{
895  // out of Unicode range
896  vSetError(spConv, uiWord, ui, "UTF-8 value out of range (> 0x10FFFF)");
897  XTHROW(spConv->spException, spConv->sError.cpMsg);
898  break;
899  }
900  }
901  spDst->ucpData = (uint8_t*)vpVecFirst(spConv->vpVecOutput);
902  spDst->uiDataLen = uiVecLen(spConv->vpVecOutput);
903 }
904 static void vUtf8Decode(conv* spConv, uint8_t* ucpData, aint uiDataLen){
905  if(!ucpData || !uiDataLen){
906  XTHROW(spConv->spException, "internal error - function called without necessary data");
907  return;
908  }
909  uint8_t ucChar;
910  uint32_t uiWord;
911  uint32_t uc = 0;
912  aint uiRemainder = uiDataLen - 1;
913  while(uc < uiDataLen){
914  ucChar = ucpData[uc];
915  if(ucChar == 0xc0 || ucChar == 0xc1 || ucChar >= 0xf5){
916  vSetError(spConv, (uint32_t)ucChar, uc, "invalid UTF-8 value");
917  XTHROW(spConv->spException, spConv->sError.cpMsg);
918  return;
919  }
920  if(ucChar < 0x80){
921  // one byte
922  uiWord = (uint32_t)ucChar;
923  uc++;
924  uiRemainder--;
925  }else if(ucChar < 0xe0){
926  // 2 byte (starts at 0x80)
927  if(uiRemainder < 1){
928  vSetError(spConv, (uint32_t)ucChar, uc, "UTF-8 data has too few trailing bytes");
929  XTHROW(spConv->spException, spConv->sError.cpMsg);
930  return;
931  }
932  uiWord = (uint32_t)(ucChar & 0x1f) << 6;
933  uiWord += (uint32_t)(ucpData[++uc] & 0x3f);
934  uc++;
935  uiRemainder -= 2;
936  }else if(ucChar < 0xf0){
937  // 3byte (starts at 0xe0)
938  if(uiRemainder < 2){
939  vSetError(spConv, (uint32_t)ucChar, uc, "UTF-8 data has too few trailing bytes");
940  XTHROW(spConv->spException, spConv->sError.cpMsg);
941  return;
942  }
943  uiWord = (uint32_t)(ucChar & 0xf) << 12;
944  uiWord += (uint32_t)(ucpData[++uc] & 0x3f) << 6;
945  uiWord += (uint32_t)(ucpData[++uc] & 0x3f);
946  uc++;
947  uiRemainder -= 3;
948  if(uiWord >= 0xd800 && uiWord <= 0xdfff){
949  vSetError(spConv, (uint32_t)ucChar, (uc - 3), "UTF-8 value in surrogate pair range (0xD800 - 0xDFFF)");
950  XTHROW(spConv->spException, spConv->sError.cpMsg);
951  return;
952  }
953  if(uiWord < 0x800){
954  vSetError(spConv, (uint32_t)ucChar, (uc - 3), "UTF-8 value has over-long encoding");
955  XTHROW(spConv->spException, spConv->sError.cpMsg);
956  return;
957  }
958  }else if(ucChar < 0xf5){
959  // 4 byte (starts with at 0xf0)
960  if(uiRemainder < 3){
961  vSetError(spConv, (uint32_t)ucChar, uc, "UTF-8 data has too few trailing bytes");
962  XTHROW(spConv->spException, spConv->sError.cpMsg);
963  return;
964  }
965  // 3byte
966  uiWord = (uint32_t)(ucChar & 0x7) << 18;
967  uiWord += (uint32_t)(ucpData[++uc] & 0x3f) << 12;
968  uiWord += (uint32_t)(ucpData[++uc] & 0x3f) << 6;
969  uiWord += (uint32_t)(ucpData[++uc] & 0x3f);
970  uc++;
971  uiRemainder -= 4;
972  if(uiWord < 0x10000){
973  vSetError(spConv, (uint32_t)ucChar, (uc - 4), "UTF-8 value has over-long encoding");
974  XTHROW(spConv->spException, spConv->sError.cpMsg);
975  return;
976  }
977  if(uiWord > 0x10ffff){
978  vSetError(spConv, (uint32_t)ucChar, (uc - 4), "UTF-8 value out of range (> 0x10FFFF)");
979  XTHROW(spConv->spException, spConv->sError.cpMsg);
980  return;
981  }
982  }
983  vpVecPush(spConv->vpVec32bit, &uiWord);
984  }
985 }
986 static void vUtf16BEDecode(conv* spConv, uint8_t* ucpData, aint uiDataLen){
987  if(!ucpData || !uiDataLen){
988  XTHROW(spConv->spException, "internal error - function called without necessary data");
989  return;
990  }
991  if(uiDataLen % 2 != 0){
992  XTHROW(spConv->spException, "supposed UTF-16BE data has odd number of byte");
993  return;
994  }
995  uint32_t uiLow, uiHigh;
996  uint32_t uc = 0;
997  aint uiRemainder = uiDataLen;
998  while(uc < uiDataLen){
999  uiHigh = (((uint32_t)ucpData[uc]) << 8);
1000  uc++;
1001  uiHigh += (((uint32_t)ucpData[uc]) & 0xff);
1002  uc++;
1003  uiRemainder -= 2;
1004  if(uiHigh >= 0xd800 && uiHigh < 0xdc00){
1005  // handle surrogate pairs
1006  if(uiRemainder == 0){
1007  vSetError(spConv, uiHigh, (uc - 2), "UTF-16BE data has missing low surrogate value");
1008  XTHROW(spConv->spException, spConv->sError.cpMsg);
1009  return;
1010  }
1011  uiLow = (((uint32_t)ucpData[uc]) << 8);
1012  uc++;
1013  uiLow += (((uint32_t)ucpData[uc]) & 0xff);
1014  uc++;
1015  uiRemainder -= 2;
1016  if(uiLow >= 0xdc00 && uiLow < 0xe000){
1017  uiHigh = (uiHigh - 0xd800) << 10;
1018  uiLow -= 0xdc00;
1019  uiHigh += uiLow + 0x10000;
1020  vpVecPush(spConv->vpVec32bit, &uiHigh);
1021  }else{
1022  vSetError(spConv, uiLow, (uc - 2), "UTF-16BE data has missing low surrogate value");
1023  XTHROW(spConv->spException, spConv->sError.cpMsg);
1024  return;
1025  }
1026  }else{
1027  if(uiHigh >= 0xdc00 && uiHigh < 0xe000){
1028  vSetError(spConv, uiHigh, (uc - 2), "UTF-16BE data has high surrogate out of order");
1029  XTHROW(spConv->spException, spConv->sError.cpMsg);
1030  return;
1031  }
1032  // high surrogate is complete word
1033  vpVecPush(spConv->vpVec32bit, &uiHigh);
1034  }
1035  }
1036 }
1037 static void vUtf16LEDecode(conv* spConv, uint8_t* ucpData, aint uiDataLen){
1038  if(!ucpData || !uiDataLen){
1039  XTHROW(spConv->spException, "internal error - function called without necessary data");
1040  return;
1041  }
1042  if(uiDataLen % 2 != 0){
1043  XTHROW(spConv->spException, "supposed UTF-16LE data has odd number of byte");
1044  return;
1045  }
1046  uint32_t uiLow, uiHigh;
1047  uint32_t uc = 0;
1048  aint uiRemainder = uiDataLen;
1049  while(uc < uiDataLen){
1050  uiHigh = (((uint32_t)ucpData[uc]) & 0xff);
1051  uc++;
1052  uiHigh += (((uint32_t)ucpData[uc]) << 8);
1053  uc++;
1054  uiRemainder -= 2;
1055  if(uiHigh >= 0xd800 && uiHigh < 0xdc00){
1056  // handle surrogate pairs
1057  if(uiRemainder == 0){
1058  vSetError(spConv, uiHigh, (uc - 2), "UTF-16LE data has missing low surrogate value");
1059  XTHROW(spConv->spException, spConv->sError.cpMsg);
1060  return;
1061  }
1062  uiLow = (((uint32_t)ucpData[uc]) & 0xff);
1063  uc++;
1064  uiLow += (((uint32_t)ucpData[uc]) << 8);
1065  uc++;
1066  uiRemainder -= 2;
1067  if(uiLow >= 0xdc00 && uiLow < 0xe000){
1068  uiHigh = (uiHigh - 0xd800) << 10;
1069  uiLow -= 0xdc00;
1070  uiHigh += uiLow + 0x10000;
1071  vpVecPush(spConv->vpVec32bit, &uiHigh);
1072  }else{
1073  vSetError(spConv, uiLow, (uc - 2), "UTF-16LE data has missing low surrogate value");
1074  XTHROW(spConv->spException, spConv->sError.cpMsg);
1075  return;
1076  }
1077  }else{
1078  if(uiHigh >= 0xdc00 && uiHigh < 0xe000){
1079  vSetError(spConv, uiHigh, (uc - 2), "UTF-16LE data has high surrogate out of order");
1080  XTHROW(spConv->spException, spConv->sError.cpMsg);
1081  return;
1082  }
1083  // high surrogate is complete word
1084  vpVecPush(spConv->vpVec32bit, &uiHigh);
1085  }
1086  }
1087 }
1088 static void vUtf16BEEncode(conv* spConv, conv_dst* spDst){
1089  uint32_t ui;
1090  uint8_t ucaBuf[4];
1091  uint32_t uiWord;
1092  uint32_t* uipWords = (uint32_t*)vpVecFirst(spConv->vpVec32bit);
1093  aint uiWordCount = uiVecLen(spConv->vpVec32bit);
1094  spDst->ucpData = NULL;
1095  spDst->uiDataLen = 0;
1096  if(!uipWords || !uiWordCount){
1097  XTHROW(spConv->spException, "internal error - function called without necessary data");
1098  return;
1099  }
1100  vVecClear(spConv->vpVecOutput);
1101  if(spDst->bBOM){
1102  ucaBuf[0] = s_caBOM16BE[0];
1103  ucaBuf[1] = s_caBOM16BE[1];
1104  vpVecPushn(spConv->vpVecOutput, ucaBuf, 2);
1105  }
1106  for(ui = 0; ui < uiWordCount; ui++){
1107  uiWord = uipWords[ui];
1108  if(uiWord < 0x10000){
1109  // 2 bytes
1110  if(uiWord >= 0xd800 && uiWord < 0xe000){
1111  vSetError(spConv, uiWord, ui, "UTF-16BE has value in surrogate pair range (0xD800-0xDFFF)");
1112  XTHROW(spConv->spException, spConv->sError.cpMsg);
1113  return;
1114  }
1115  ucaBuf[0] = (uint8_t)(uiWord >> 8);
1116  ucaBuf[1] = (uint8_t)(uiWord & 0xFF);
1117  vpVecPushn(spConv->vpVecOutput, &ucaBuf[0], 2);
1118  }else if(uiWord <= 0x10FFFF){
1119  // 4 bytes
1120  uiWord -= 0x10000;
1121  uint32_t uiHigh = 0xd800 + (uiWord >> 10);
1122  uint32_t uiLow = 0xdc00 + (uiWord & 0x3FF);
1123  ucaBuf[0] = (uint8_t)(uiHigh >> 8);
1124  ucaBuf[1] = (uint8_t)(uiHigh & 0xFF);
1125  ucaBuf[2] = (uint8_t)(uiLow >> 8);
1126  ucaBuf[3] = (uint8_t)(uiLow & 0xFF);
1127  vpVecPushn(spConv->vpVecOutput, &ucaBuf[0], 4);
1128  }else{
1129  // out of Unicode range
1130  vSetError(spConv, uiWord, ui, "UTF-16BE has value out of range (> 0x10FFFF)");
1131  XTHROW(spConv->spException, spConv->sError.cpMsg);
1132  return;
1133  }
1134  }
1135  spDst->ucpData = (uint8_t*)vpVecFirst(spConv->vpVecOutput);
1136  spDst->uiDataLen = uiVecLen(spConv->vpVecOutput);
1137 }
1138 static void vUtf16LEEncode(conv* spConv, conv_dst* spDst){
1139  uint32_t ui;
1140  uint8_t ucaBuf[4];
1141  uint32_t uiWord;
1142  uint32_t* uipWords = (uint32_t*)vpVecFirst(spConv->vpVec32bit);
1143  aint uiWordCount = uiVecLen(spConv->vpVec32bit);
1144  spDst->ucpData = NULL;
1145  spDst->uiDataLen = 0;
1146  if(!uipWords || !uiWordCount){
1147  XTHROW(spConv->spException, "internal error - function called without necessary data");
1148  return;
1149  }
1150  vVecClear(spConv->vpVecOutput);
1151  if(spDst->bBOM){
1152  ucaBuf[0] = s_caBOM16LE[0];
1153  ucaBuf[1] = s_caBOM16LE[1];
1154  vpVecPushn(spConv->vpVecOutput, ucaBuf, 2);
1155  }
1156  for(ui = 0; ui < uiWordCount; ui++){
1157  uiWord = uipWords[ui];
1158  if(uiWord < 0x10000){
1159  // 2 bytes
1160  if(uiWord >= 0xd800 && uiWord < 0xe000){
1161  vSetError(spConv, uiWord, ui, "UTF-16LE has value in surrogate pair range (0xD800-0xDFFF)");
1162  XTHROW(spConv->spException, spConv->sError.cpMsg);
1163  return;
1164  }
1165  ucaBuf[1] = (uint8_t)(uiWord >> 8);
1166  ucaBuf[0] = (uint8_t)(uiWord & 0xFF);
1167  vpVecPushn(spConv->vpVecOutput, &ucaBuf[0], 2);
1168  }else if(uiWord <= 0x10FFFF){
1169  // 4 bytes
1170  uiWord -= 0x10000;
1171  uint32_t uiHigh = 0xd800 + (uiWord >> 10);
1172  uint32_t uiLow = 0xdc00 + (uiWord & 0x3FF);
1173  ucaBuf[1] = (uint8_t)(uiHigh >> 8);
1174  ucaBuf[0] = (uint8_t)(uiHigh & 0xFF);
1175  ucaBuf[3] = (uint8_t)(uiLow >> 8);
1176  ucaBuf[2] = (uint8_t)(uiLow & 0xFF);
1177  vpVecPushn(spConv->vpVecOutput, &ucaBuf[0], 4);
1178  }else{
1179  // out of Unicode range
1180  vSetError(spConv, uiWord, ui, "UTF-16LE has value out of range (> 0x10FFFF)");
1181  XTHROW(spConv->spException, spConv->sError.cpMsg);
1182  return;
1183  }
1184  }
1185  spDst->ucpData = (uint8_t*)vpVecFirst(spConv->vpVecOutput);
1186  spDst->uiDataLen = uiVecLen(spConv->vpVecOutput);
1187 }
1188 static void vSetError(conv* spConv, uint32_t uiValue, uint32_t uiOffset, const char* cpMsg){
1189  spConv->sError.bHasError = APG_TRUE;
1190  spConv->sError.uiValue = uiValue;
1191  spConv->sError.uiOffset = uiOffset;
1192  spConv->sError.cpMsg = cpMsg;
1193 }
conv_src::uiDataType
aint uiDataType
One of the encoding type identifiers, UTF_8, etc. If or'ed (|) with BASE64, the source data stream is...
Definition: conv.h:110
UTF_16
#define UTF_16
Data type macro for UTF-16 encoding/decoding.
Definition: conv.h:79
conv
The conv object context.
Definition: conv.c:83
vMemDtor
void vMemDtor(void *vpCtx)
Destroys a Memory component. Frees all memory allocated.
Definition: memory.c:141
conv::uiBase64LineLen
aint uiBase64LineLen
Used in for base64.
Definition: conv.c:92
conv.h
Header for the Unicode UTF encoding module.
vExContext
void vExContext()
Handles bad context pointers.
Definition: exception.c:126
vpConvCtor
void * vpConvCtor(exception *spEx)
The data conversion object constructor.
Definition: conv.c:134
UTF_32
#define UTF_32
Data type macro for UTF-32 encoding/decoding.
Definition: conv.h:85
conv_error::uiOffset
uint32_t uiOffset
Offset to the value that is in error.
Definition: conv.c:75
XTHROW
#define XTHROW(ctx, msg)
Exception throw macro.
Definition: exception.h:67
conv_src::ucpData
uint8_t * ucpData
Pointer to the byte stream to decode,.
Definition: conv.h:113
conv::vpValidate
const void * vpValidate
Must be the "magic number" to be a valid context.
Definition: conv.c:84
vConvConfigureBase64
void vConvConfigureBase64(void *vpCtx, aint uiLineLen, aint uiLineEnd)
Configures base64 output format.
Definition: conv.c:186
UTF_16BE
#define UTF_16BE
Data type macro for UTF-16BE encoding/decoding.
Definition: conv.h:81
conv::vpVecOutput
void * vpVecOutput
Pointer to a vector which holds the output byte stream.
Definition: conv.c:89
aint
uint_fast32_t aint
The APG parser's unsigned integer type.
Definition: apg.h:79
vConvGetCodePoints
void vConvGetCodePoints(void *vpCtx, uint32_t *uipData, uint32_t *uipDataLen)
Access the intermediate 32-bit data following a call to vConvDecode() or vConvUseCodePoints().
Definition: conv.c:361
conv_error::cpMsg
const char * cpMsg
Pointer to the error message.
Definition: conv.c:76
conv_dst::bBOM
abool bBOM
[in] If true(BOM) prepend a Byte Order Mark, if false(NOBOM) no Byte Order Mark prepended.
Definition: conv.h:122
conv_dst::uiDataType
aint uiDataType
[in] One of the encoding type identifiers, UTF_8, etc.
Definition: conv.h:121
vpMemAlloc
void * vpMemAlloc(void *vpCtx, aint uiBytes)
Allocates memory.
Definition: memory.c:196
uiVecLen
aint uiVecLen(void *vpCtx)
Get the vector length. That is, the number of elements on the vector.
Definition: vector.c:385
vpVecCtor
void * vpVecCtor(void *vpMem, aint uiElementSize, aint uiInitialAlloc)
The vector object constructor.
Definition: vector.c:118
conv_dst
Defines the output data type, location, length and whether or not to preface with a Byte Order Mark (...
Definition: conv.h:120
conv::sError
conv_error sError
Definition: conv.c:94
UTF_32LE
#define UTF_32LE
Data type macro for UTF-32LE encoding/decoding.
Definition: conv.h:89
exception
A structure to describe the type and location of a caught exception.
Definition: exception.h:47
vMemFree
void vMemFree(void *vpCtx, const void *vpData)
Free memory previously allocated with vpMemAlloc().
Definition: memory.c:226
vConvConvert
void vConvConvert(void *vpCtx, conv_src *spSrc, conv_dst *spDst)
Decodes and encodes in a single functions call.
Definition: conv.c:422
conv_src::uiDataLen
aint uiDataLen
Number of bytes in the byte stream.
Definition: conv.h:114
BASE64_LINE_LEN
#define BASE64_LINE_LEN
Definition: conv.c:56
conv::vpMem
void * vpMem
Pointer to a memory object used for all memory allocations.
Definition: conv.c:87
TAIL_CHAR
#define TAIL_CHAR
Definition: conv.c:57
bExValidate
abool bExValidate(exception *spException)
Test an exception structure for validity.
Definition: exception.c:70
UTF_16LE
#define UTF_16LE
Data type macro for UTF-16LE encoding/decoding.
Definition: conv.h:83
vConvDecode
void vConvDecode(void *vpCtx, conv_src *spSrc)
Decode a source byte stream to 32-bit Unicode code points.
Definition: conv.c:205
vpVecFirst
void * vpVecFirst(void *vpCtx)
Get the first element one the vector. The vector is not altered.
Definition: vector.c:326
vpVecPushn
void * vpVecPushn(void *vpCtx, void *vpElement, aint uiCount)
Adds one or more elements to the end of the array.
Definition: vector.c:221
vpMemCtor
void * vpMemCtor(exception *spException)
Construct a memory component.
Definition: memory.c:121
conv::vpVec32bit
void * vpVec32bit
Pointer to a vector which holds the 32-bit decoded data.
Definition: conv.c:90
vConvEncode
void vConvEncode(void *vpCtx, conv_dst *spDst)
Encode the 32-bit Unicode code points to a byte stream.
Definition: conv.c:302
BASE64_MASK
#define BASE64_MASK
The base64 mask. And (&) with the type to extract the base64 bit, if any.
Definition: conv.h:51
conv_error::bHasError
abool bHasError
True if the data represents a valid error. False if no error has been detected.
Definition: conv.c:77
BASE64_LF
#define BASE64_LF
For configuring base64 destinations, indicates line breaks as line feed (\n, 0x0A).
Definition: conv.h:101
APG_TRUE
#define APG_TRUE
Definition: apg.h:291
UTF_32BE
#define UTF_32BE
Data type macro for UTF-32BE encoding/decoding.
Definition: conv.h:87
conv::spException
exception * spException
Pointer to the exception structure for reporting errors to the application catch block.
Definition: conv.c:85
TYPE_MASK
#define TYPE_MASK
The type mask. And (&) with the type to extract the type, without the base64 bit.
Definition: conv.h:58
conv::vpVecInput
void * vpVecInput
Pointer to a vector which holds a copy of the input byte stream.
Definition: conv.c:88
UTF_8
#define UTF_8
Data type macro for UTF-8 encoding/decoding.
Definition: conv.h:77
conv_dst::uiDataLen
aint uiDataLen
[out] Number of bytes in the byte stream.
Definition: conv.h:124
abool
uint8_t abool
abool is the APG bool type.
Definition: apg.h:140
conv_error::uiValue
uint32_t uiValue
The value that is in error.
Definition: conv.c:74
conv_src
Defines the input data type, location and length.
Definition: conv.h:109
BASE64_CRLF
#define BASE64_CRLF
For configuring base64 destinations, indicates line breaks as carriage return + line feed (\r\n,...
Definition: conv.h:103
conv::uiTail
aint uiTail
Used in for base64.
Definition: conv.c:91
BYTE_MASK
#define BYTE_MASK
Definition: conv.c:59
vConvDtor
void vConvDtor(void *vpCtx)
Conversion object destructor.
Definition: conv.c:161
NON_BYTE_MASK
#define NON_BYTE_MASK
Definition: conv.c:58
conv_error
Defines the value, location and error message when a value is in error and a fatal error is issued.
Definition: conv.c:73
conv::uiBase64LineEnd
aint uiBase64LineEnd
Used in for base64.
Definition: conv.c:93
conv_dst::ucpData
uint8_t * ucpData
[out] Pointer to the output byte stream. Valid until another function call on the context handle.
Definition: conv.h:123
vpVecPush
void * vpVecPush(void *vpCtx, void *vpElement)
Adds one element to the end of the array.
Definition: vector.c:193
vConvUseCodePoints
void vConvUseCodePoints(void *vpCtx, uint32_t *uipSrc, aint uiSrcLen)
Insert a stream of 32-bit Unicode code points as the intermediate data.
Definition: conv.c:395
BINARY
#define BINARY
Alias for ISO_8895_1.
Definition: conv.h:64
vVecClear
void vVecClear(void *vpCtx)
Clears all used elements in a vector component.
Definition: vector.c:420
APG_FALSE
#define APG_FALSE
Definition: apg.h:292
APG Version 7.0 is licensed under the 2-Clause BSD License,
an Open Source Initiative Approved License.