Version 7.0
Copyright © 2021 Lowell D. Thomas
APG
… an ABNF Parser Generator
linesu.c
Go to the documentation of this file.
1 /* *************************************************************************************
2  Copyright (c) 2021, Lowell D. Thomas
3  All rights reserved.
4 
5  This file is part of APG Version 7.0.
6  APG Version 7.0 may be used under the terms of the BSD 2-Clause License.
7 
8  Redistribution and use in source and binary forms, with or without
9  modification, are permitted provided that the following conditions are met:
10 
11  1. Redistributions of source code must retain the above copyright notice, this
12  list of conditions and the following disclaimer.
13 
14  2. Redistributions in binary form must reproduce the above copyright notice,
15  this list of conditions and the following disclaimer in the documentation
16  and/or other materials provided with the distribution.
17 
18  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21  DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24  SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25  CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26  OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 
29 * *************************************************************************************/
40 #include "../library/lib.h"
41 #include "./linesu.h"
42 
43 
50 #define LF 0x0A
52 #define VT 0x0B
53 #define FF 0x0C
54 #define CR 0x0D
55 #define NEL 0x85
56 #define LS 0x2028
57 #define PS 0x2029
58 
60 static const void* s_vpMagicNumber = (void*)"lines_u";
61 
65 typedef struct{
66  const void* vpValidate;
68  void* vpMem;
70  uint32_t* uipInput;
72  void* vpVecLines;
76 } lines_u;
77 
78 static void vInputLines(lines_u* spCtx);
79 static abool bFindLine(line_u* spLines, aint uiLineCount, aint uiCharIndex, aint* uipLine);
80 
92 void* vpLinesuCtor(exception* spEx, const uint32_t* uipInput, aint uiLength){
93  if(bExValidate(spEx)){
94  if(!uipInput || !uiLength){
95  XTHROW(spEx, "input is NULL or empty");
96  }
97  // get the context
98  void* vpMem = vpMemCtor(spEx);
99  lines_u* spCtx = (lines_u*)vpMemAlloc(vpMem, sizeof(lines_u));
100  memset((void*)spCtx, 0, sizeof(lines_u));
101  spCtx->uipInput = (uint32_t*)vpMemAlloc(vpMem, ((sizeof(uint32_t) * uiLength)));
102  memcpy((void*)spCtx->uipInput, (void*)uipInput, (sizeof(uint32_t) * uiLength));
103  spCtx->uiLength = uiLength;
104  spCtx->vpMem = vpMem;
105  spCtx->spException = spEx;
106  spCtx->vpVecLines = vpVecCtor(vpMem, sizeof(line_u), 512);
107 
108  // compute lines
109  vInputLines(spCtx);
110  spCtx->vpValidate = s_vpMagicNumber;
111  return (void*)spCtx;
112  }else{
113  vExContext();
114  }
115  return NULL;
116 }
117 
126 void vLinesuDtor(void* vpCtx){
127  lines_u* spCtx = (lines_u*)vpCtx;
128  if(vpCtx){
129  if(spCtx->vpValidate == s_vpMagicNumber){
130  // otherwise, just free what was allocated explicitly
131  void* vpMem = spCtx->vpMem;
132  memset(vpCtx, 0, sizeof(lines_u));
133  vMemDtor(vpMem);
134  }else{
135  vExContext();
136  }
137  }
138 }
139 
149 abool bLinesuFindLine(void* vpCtx, aint uiOffset, aint* uipLine, aint* uipRelOffset){
150  abool bReturn = APG_FAILURE;
151  lines_u* spCtx = (lines_u*)vpCtx;
152  if(vpCtx && (spCtx->vpValidate == s_vpMagicNumber)){
153  if(!uipLine || !uipRelOffset){
154  XTHROW(spCtx->spException, "line and relative offset pointers cannot be NULL");
155  }
156  if(bFindLine(spCtx->spLines, spCtx->uiLineCount, uiOffset, uipLine)){
157  *uipRelOffset = uiOffset - spCtx->spLines[*uipLine].uiCharIndex;
158  bReturn = APG_SUCCESS;
159  }
160  }else{
161  vExContext();
162  }
163  return bReturn;
164 }
165 
173 line_u* spLinesuFirst(void* vpCtx){
174  lines_u* spCtx = (lines_u*)vpCtx;
175  if(vpCtx && (spCtx->vpValidate == s_vpMagicNumber)){
176  spCtx->uiIterator = 1;
177  return spCtx->spLines;
178  }
179  vExContext();
180  return NULL;
181 }
187 line_u* spLinesuNext(void* vpCtx){
188  lines_u* spCtx = (lines_u*)vpCtx;
189  if(vpCtx && (spCtx->vpValidate == s_vpMagicNumber)){
190  if(spCtx->uiIterator < spCtx->uiLineCount){
191  return &spCtx->spLines[spCtx->uiIterator++];
192  }
193  return NULL;
194  }
195  vExContext();
196  return NULL;
197 }
198 
204 aint uiLinesuCount(void* vpCtx){
205  lines_u* spCtx = (lines_u*)vpCtx;
206  if(vpCtx && (spCtx->vpValidate == s_vpMagicNumber)){
207  return spCtx->uiLineCount;
208  }
209  vExContext();
210  return 0;
211 }
212 
220 aint uiLinesuLength(void* vpCtx){
221  lines_u* spCtx = (lines_u*)vpCtx;
222  if(vpCtx && (spCtx->vpValidate == s_vpMagicNumber)){
223  return spCtx->uiLength;
224  }
225  vExContext();
226  return 0;
227 }
228 
229 static abool bFindLine(line_u* spLines, aint uiLineCount, aint uiCharIndex, aint* uipLine) {
230  abool bReturn = APG_FAILURE;
231  if (!spLines || !uiLineCount) {
232  goto fail;
233  }
234  aint ui;
235  line_u* spThis;
236  if (uiLineCount < 5) {
237  // linear search
238  for (ui = 0; ui < uiLineCount; ui += 1) {
239  spThis = &spLines[ui];
240  if ((uiCharIndex >= spThis->uiCharIndex) && (uiCharIndex < (spThis->uiCharIndex + spThis->uiLineLength))) {
241  *uipLine = ui;
242  goto success;
243  }
244  }
245  goto fail;
246  } else {
247  // binary search (https://en.wikipedia.org/wiki/Binary_search_algorithm)
248  aint uiL = 0;
249  aint uiR = uiLineCount - 1;
250  aint uiM;
251  while (uiL < uiR) {
252  uiM = uiL + (uiR - uiL) / 2;
253  spThis = &spLines[uiM];
254  if (uiCharIndex >= (spThis->uiCharIndex + spThis->uiLineLength)) {
255  uiL = uiM + 1;
256  continue;
257  }
258  if (uiCharIndex < spThis->uiCharIndex) {
259  uiR = uiM - 1;
260  continue;
261  }
262  *uipLine = uiM;
263  goto success;
264  }
265  if (uiL == uiR) {
266  spThis = &spLines[uiL];
267  if ((uiCharIndex >= spThis->uiCharIndex) && (uiCharIndex < (spThis->uiCharIndex + spThis->uiLineLength))) {
268  *uipLine = uiL;
269  goto success;
270  }
271  }
272  goto fail;
273  }
274  success: bReturn = APG_SUCCESS;
275  fail: ;
276  return bReturn;
277 }
278 
283 static void vInputLines(lines_u* spCtx) {
284  aint uiLineIndex = 0;
285  aint uiCharIndex = 0;
286  aint uiTextLength = 0;
287  uint32_t uiChar;
288  line_u sLine;
289  void* vpVec = spCtx->vpVecLines;
290  uint32_t* uipInput = spCtx->uipInput;
291  aint uiLen = spCtx->uiLength;
292  vVecClear(vpVec);
293  while (uiCharIndex < uiLen) {
294  uiChar = uipInput[uiCharIndex];
295  if (uiChar == LF || uiChar == VT || uiChar == FF || uiChar == NEL || uiChar == LS || uiChar == PS ) {
296  // LF line ending
297  sLine.uiLineIndex = uiLineIndex;
298  sLine.uiCharIndex = uiCharIndex - uiTextLength;
299  sLine.uiTextLength = uiTextLength;
300  sLine.uiLineLength = uiTextLength + 1;
301  uiCharIndex += 1;
302  sLine.uiaLineEnd[0] = uiChar;
303  sLine.uiaLineEnd[1] = 0;
304  sLine.uiaLineEnd[2] = 0;
305  vpVecPush(vpVec, (void*) &sLine);
306  uiLineIndex += 1;
307  uiTextLength = 0;
308  } else if (uiChar == CR) {
309  sLine.uiLineIndex = uiLineIndex;
310  sLine.uiCharIndex = uiCharIndex - uiTextLength;
311  sLine.uiTextLength = uiTextLength;
312  if ((uiCharIndex < (uiLen - 1)) && (uipInput[uiCharIndex + 1] == LF)) {
313  // CRLF line ending
314  sLine.uiLineLength = uiTextLength + 2;
315  sLine.uiaLineEnd[0] = CR;
316  sLine.uiaLineEnd[1] = LF;
317  sLine.uiaLineEnd[2] = 0;
318  uiCharIndex += 2;
319  } else {
320  // CR line ending
321  sLine.uiLineLength = uiTextLength + 1;
322  sLine.uiaLineEnd[0] = CR;
323  sLine.uiaLineEnd[1] = 0;
324  sLine.uiaLineEnd[2] = 0;
325  uiCharIndex += 1;
326  }
327  vpVecPush(vpVec, (void*) &sLine);
328  uiLineIndex += 1;
329  uiTextLength = 0;
330  } else {
331  uiTextLength += 1;
332  uiCharIndex += 1;
333  }
334  }
335  if (uiTextLength > 0) {
336  // last line had no line ending
337  sLine.uiLineIndex = uiLineIndex;
338  sLine.uiCharIndex = uiCharIndex - uiTextLength;
339  sLine.uiTextLength = uiTextLength;
340  sLine.uiLineLength = uiTextLength;
341  sLine.uiaLineEnd[0] = 0;
342  sLine.uiaLineEnd[1] = 0;
343  sLine.uiaLineEnd[2] = 0;
344  vpVecPush(vpVec, (void*) &sLine);
345  }
346  spCtx->spLines = (line_u*) vpVecFirst(spCtx->vpVecLines);
347  spCtx->uiLineCount = uiVecLen(spCtx->vpVecLines);
348 }
FF
#define FF
Form Feed.
Definition: linesu.c:53
line_u::uiLineIndex
aint uiLineIndex
zero-based line number
Definition: linesu.h:41
lines_u::spException
exception * spException
Pointer to an exception structure to report fatal errors back to the application's catch block.
Definition: linesu.c:67
APG_FAILURE
#define APG_FAILURE
Definition: apg.h:308
vMemDtor
void vMemDtor(void *vpCtx)
Destroys a Memory component. Frees all memory allocated.
Definition: memory.c:141
vpLinesuCtor
void * vpLinesuCtor(exception *spEx, const uint32_t *uipInput, aint uiLength)
The linesu object constructor.
Definition: linesu.c:92
LS
#define LS
Line Separator.
Definition: linesu.c:56
lines_u::uiIterator
aint uiIterator
Used by the iterator.
Definition: linesu.c:75
spLinesuNext
line_u * spLinesuNext(void *vpCtx)
Returns the next line from the iterator.
Definition: linesu.c:187
line_u
Carries detailed information about the characters and line endings. One for each line in the input gr...
Definition: linesu.h:40
CR
#define CR
Carriage Return.
Definition: linesu.c:54
line_u::uiTextLength
aint uiTextLength
number of Unicode text characters in the line, excluding line end characters
Definition: linesu.h:44
vExContext
void vExContext()
Handles bad context pointers.
Definition: exception.c:126
NEL
#define NEL
Next Line.
Definition: linesu.c:55
lines_u::uiLength
aint uiLength
Number of integers in the array.
Definition: linesu.c:71
lines_u
The lines object context.
Definition: linesu.c:65
XTHROW
#define XTHROW(ctx, msg)
Exception throw macro.
Definition: exception.h:67
aint
uint_fast32_t aint
The APG parser's unsigned integer type.
Definition: apg.h:79
vpMemAlloc
void * vpMemAlloc(void *vpCtx, aint uiBytes)
Allocates memory.
Definition: memory.c:196
lines_u::uiLineCount
aint uiLineCount
Number of lines in the array.
Definition: linesu.c:74
LF
#define LF
Line Feed.
Definition: linesu.c:51
uiVecLen
aint uiVecLen(void *vpCtx)
Get the vector length. That is, the number of elements on the vector.
Definition: vector.c:385
vpVecCtor
void * vpVecCtor(void *vpMem, aint uiElementSize, aint uiInitialAlloc)
The vector object constructor.
Definition: vector.c:118
lines_u::uipInput
uint32_t * uipInput
Pointer to the 32-bit integer array.
Definition: linesu.c:70
bLinesuFindLine
abool bLinesuFindLine(void *vpCtx, aint uiOffset, aint *uipLine, aint *uipRelOffset)
Find the line that the given integer is in.
Definition: linesu.c:149
APG_SUCCESS
#define APG_SUCCESS
Definition: apg.h:307
exception
A structure to describe the type and location of a caught exception.
Definition: exception.h:47
lines_u::vpValidate
const void * vpValidate
A "magic number" to validate the context.
Definition: linesu.c:66
lines_u::spLines
line_u * spLines
Pointer to the first line.
Definition: linesu.c:73
bExValidate
abool bExValidate(exception *spException)
Test an exception structure for validity.
Definition: exception.c:70
spLinesuFirst
line_u * spLinesuFirst(void *vpCtx)
Initialize an iterator over the lines.
Definition: linesu.c:173
vpVecFirst
void * vpVecFirst(void *vpCtx)
Get the first element one the vector. The vector is not altered.
Definition: vector.c:326
vpMemCtor
void * vpMemCtor(exception *spException)
Construct a memory component.
Definition: memory.c:121
lines_u::vpVecLines
void * vpVecLines
Pointer to a vector of parsed lines.
Definition: linesu.c:72
abool
uint8_t abool
abool is the APG bool type.
Definition: apg.h:140
line_u::uiaLineEnd
uint32_t uiaLineEnd[3]
the actual string of line ending character(s), if any
Definition: linesu.h:45
VT
#define VT
Vertical Tab.
Definition: linesu.c:52
PS
#define PS
Paragraph Separator.
Definition: linesu.c:57
lines_u::vpMem
void * vpMem
Pointer to a memory object for allocating all memory associated with this object.
Definition: linesu.c:69
vLinesuDtor
void vLinesuDtor(void *vpCtx)
The linesu object destructor.
Definition: linesu.c:126
uiLinesuCount
aint uiLinesuCount(void *vpCtx)
Returns the number of lines.
Definition: linesu.c:204
linesu.h
Header file for the 32-bit integer version of the lines objects.
vpVecPush
void * vpVecPush(void *vpCtx, void *vpElement)
Adds one element to the end of the array.
Definition: vector.c:193
uiLinesuLength
aint uiLinesuLength(void *vpCtx)
Returns the number of integers in the 32-bit integer array.
Definition: linesu.c:220
line_u::uiLineLength
aint uiLineLength
number of Unicode characters in the line, including line end characters
Definition: linesu.h:43
vVecClear
void vVecClear(void *vpCtx)
Clears all used elements in a vector component.
Definition: vector.c:420
line_u::uiCharIndex
aint uiCharIndex
zero-based index of the first Unicode character of the line
Definition: linesu.h:42
APG Version 7.0 is licensed under the 2-Clause BSD License,
an Open Source Initiative Approved License.