Descent XML
An XML Parser Helper Library
Loading...
Searching...
No Matches
parse.h
Go to the documentation of this file.
1/*
2 * XMLTree - An XML Parser-Helper Library
3 * Copyright (C) 2025 Marcus Harrison
4 *
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <https://www.gnu.org/licenses/>.
17 */
18
19#ifndef DESCENT_XML_PARSE
20#define DESCENT_XML_PARSE
21
22#ifdef __cplusplus
23extern "C" {
24#endif
25
26#include <stdlib.h>
27#include <string.h>
28#include <stdbool.h>
29
30
31#include "lex.h"
32
33#include <libadt/lptr.h>
34#include <libadt/vector.h>
35
68 struct descent_xml_lex token,
69 struct libadt_const_lptr element_name,
70 struct libadt_const_lptr attributes,
71 bool empty,
72 void *context
73);
74
90 struct libadt_const_lptr text,
91 bool is_cdata,
92 void *context
93);
94
115 struct descent_xml_lex token,
116 char *element_name,
117 char **attributes,
118 bool empty,
119 void *context
120);
121
133 char *text,
134 bool is_cdata,
135 void *context
136);
137
138inline bool _descent_xml_end_token(struct descent_xml_lex token)
139{
140 return token.type == descent_xml_classifier_eof
142}
143
144inline bool _descent_xml_is_attribute_value_type(struct descent_xml_lex token)
145{
146 return token.type == descent_xml_classifier_attribute_value_single_quote
147 || token.type == descent_xml_classifier_attribute_value_single_quote_entity_start
148 || token.type == descent_xml_classifier_attribute_value_single_quote_entity
149 || token.type == descent_xml_classifier_attribute_value_double_quote
150 || token.type == descent_xml_classifier_attribute_value_double_quote_entity_start
151 || token.type == descent_xml_classifier_attribute_value_double_quote_entity;
152
153}
154
155typedef struct {
156 struct libadt_const_lptr value;
157 struct descent_xml_lex token;
158} _descent_xml_value_t;
159
160inline _descent_xml_value_t _descent_xml_attribute_value(
161 struct descent_xml_lex token
162)
163{
164 if (!_descent_xml_is_attribute_value_type(token))
165 return (_descent_xml_value_t) {
166 libadt_const_lptr_truncate(token.value, 0),
167 token
168 };
169
170 struct libadt_const_lptr result = token.value;
171 struct descent_xml_lex next = descent_xml_lex_next_raw(token);
172 while (_descent_xml_is_attribute_value_type(next)) {
173 result.length += next.value.length;
174 next = descent_xml_lex_next_raw(next);
175 }
176 return (_descent_xml_value_t) { result, next };
177}
178
179inline struct descent_xml_lex _descent_xml_handle_element(
180 struct descent_xml_lex token,
181 descent_xml_parse_element_fn *element_handler,
182 void *context
183)
184{
185 const struct libadt_const_lptr name = token.value;
186
187 token = descent_xml_lex_next_raw(token);
189 return token;
190
191 LIBADT_VECTOR_WITH(attributes, sizeof(struct libadt_const_lptr), 0) {
192 while (token.type == descent_xml_classifier_element_space) {
193 token = descent_xml_lex_next_raw(token);
194
196 break;
197
198 if (token.type == descent_xml_classifier_attribute_name) {
199 attributes = libadt_vector_append(
200 attributes,
201 &token.value
202 );
203 token = descent_xml_lex_next_raw(token);
205 break;
206 if (token.type == descent_xml_classifier_attribute_expect_assign)
207 token = descent_xml_lex_next_raw(token);
208 if (token.type == descent_xml_classifier_attribute_assign)
209 token = descent_xml_lex_next_raw(token);
210 const bool quote =
211 token.type == descent_xml_classifier_attribute_value_single_quote_start
212 || token.type == descent_xml_classifier_attribute_value_double_quote_start;
213 if (quote)
214 token = descent_xml_lex_next_raw(token);
215
216 _descent_xml_value_t attr
217 = _descent_xml_attribute_value(token);
218 attributes = libadt_vector_append(
219 attributes,
220 &attr.value
221 );
222 token = attr.token;
223 token = descent_xml_lex_next_raw(token);
224 }
225 }
226
228 continue;
229
230 const bool is_empty
231 = token.type == descent_xml_classifier_element_empty;
232
233 if (is_empty || token.type == descent_xml_classifier_element_end) {
234 struct libadt_const_lptr attribsptr = {
235 .buffer = attributes.buffer,
236 .size = sizeof(struct libadt_const_lptr),
237 .length = (ssize_t)attributes.length,
238 };
239
240 token = element_handler(
241 token,
242 name,
243 attribsptr,
244 is_empty,
245 context
246 );
247 }
248 }
249 return token;
250}
251
252inline bool _descent_xml_is_text_type(struct descent_xml_lex token)
253{
254 return token.type == descent_xml_classifier_text_space
255 || token.type == descent_xml_classifier_text
256 || token.type == descent_xml_classifier_text_entity_start
257 || token.type == descent_xml_classifier_text_entity;
258}
259
260inline _descent_xml_value_t _descent_xml_text_value(
261 struct descent_xml_lex token
262)
263{
264 struct libadt_const_lptr result = token.value;
265 struct descent_xml_lex next = descent_xml_lex_next_raw(token);
266 while (_descent_xml_is_text_type(next)) {
267 result.length += next.value.length;
268 token = next;
269 next = descent_xml_lex_next_raw(next);
270 }
271 return (_descent_xml_value_t) { result, token };
272}
273
274inline struct descent_xml_lex _descent_xml_handle_text(
275 struct descent_xml_lex token,
276 descent_xml_parse_text_fn *text_handler,
277 void *context
278)
279{
280 _descent_xml_value_t result = _descent_xml_text_value(token);
281 text_handler(result.value, false, context);
282 return result.token;
283}
284
320 struct descent_xml_lex xml,
321 descent_xml_parse_element_fn *element_handler,
322 descent_xml_parse_text_fn *text_handler,
323 void *context
324)
325{
326 xml = descent_xml_lex_next_raw(xml);
327
328 if (xml.type == descent_xml_classifier_element_name && element_handler) {
329 xml = _descent_xml_handle_element(
330 xml,
331 element_handler,
332 context
333 );
334 } else if (_descent_xml_is_text_type(xml) && text_handler) {
335 xml = _descent_xml_handle_text(
336 xml,
337 text_handler,
338 context
339 );
340 } else if (xml.type == descent_xml_lex_cdata && text_handler) {
341 // TODO: clean this up
342 struct libadt_const_lptr arg
343 = libadt_const_lptr_index(
344 xml.value,
345 sizeof("![CDATA[") - 1
346 );
347 arg = libadt_const_lptr_truncate(
348 arg,
349 arg.length - 2 /* ]] */
350 );
351 text_handler(arg, true, context);
352 }
353
354 return xml;
355}
356
357typedef struct {
358 descent_xml_parse_element_cstr_fn *const element_handler;
359 descent_xml_parse_text_cstr_fn *const text_handler;
360 void *const context;
361 int error;
362} _descent_xml_parse_cstr_context;
363
364extern descent_xml_classifier_void_fn *descent_xml_parse_error(wchar_t);
365
366inline struct descent_xml_lex _cstr_element_handler(
367 struct descent_xml_lex xml,
368 struct libadt_const_lptr element_name,
369 struct libadt_const_lptr attributes,
370 bool empty,
371 void *context
372)
373{
374 // this function is why I hate c-strings
375 const _descent_xml_parse_cstr_context *const cstr_context = context;
376 if (!cstr_context->element_handler)
377 return xml;
378
379 char *const cname = strndup(element_name.buffer, (size_t)element_name.length);
380 if (!cname)
381 goto error_return_xml;
382
383 char * *const cattr = calloc((size_t)(attributes.length + 1), sizeof(char*));
384 if (!cattr)
385 goto error_free_cname;
386
387 for (ssize_t i = 0; i < attributes.length; ++i) {
388 const struct libadt_const_lptr *const attarr = attributes.buffer;
389 const struct libadt_const_lptr *const attribute = &attarr[i];
390 cattr[i] = strndup(attribute->buffer, (size_t)attribute->length);
391 if (!cattr[i])
392 goto error_free_cattr;
393 }
394
395 xml = cstr_context->element_handler(
396 xml,
397 cname,
398 cattr,
399 empty,
400 cstr_context->context
401 );
402
403 for (char **attr = cattr; *attr; attr++) {
404 free(*attr);
405 }
406 free(cattr);
407 free(cname);
408
409 return xml;
410
411error_free_cattr:
412 for (char **attr = cattr; *attr; attr++) {
413 free(*attr);
414 }
415 free(cattr);
416error_free_cname:
417 free(cname);
418error_return_xml:
419 xml.type = descent_xml_parse_error;
420 return xml;
421}
422
423inline void _cstr_text_handler(
424 struct libadt_const_lptr text,
425 bool is_cdata,
426 void *context
427)
428{
429 _descent_xml_parse_cstr_context *const cstr_context = context;
430 if (!cstr_context->text_handler)
431 return;
432
433 char *const ctext = strndup(text.buffer, (size_t)text.length);
434 if (!ctext) {
435 cstr_context->error = 1;
436 return;
437 }
438
439 cstr_context->text_handler(
440 ctext,
441 is_cdata,
442 cstr_context->context
443 );
444
445 free(ctext);
446}
447
485 struct descent_xml_lex xml,
486 descent_xml_parse_element_cstr_fn *element_handler,
487 descent_xml_parse_text_cstr_fn *text_handler,
488 void *context
489)
490{
491 _descent_xml_parse_cstr_context cstr_context = {
492 .element_handler = element_handler,
493 .text_handler = text_handler,
494 .context = context,
495 };
496 return descent_xml_parse(
497 xml,
498 _cstr_element_handler,
499 _cstr_text_handler,
500 &cstr_context
501 );
502}
503
504#ifdef __cplusplus
505} // extern "C"
506#endif
507
508#endif // DESCENT_XML_PARSE
void descent_xml_classifier_void_fn(void)
Definition classifier.h:59
descent_xml_classifier_fn *const descent_xml_classifier_unexpected
descent_xml_classifier_fn *const descent_xml_classifier_eof
struct descent_xml_lex descent_xml_lex_next_raw(struct descent_xml_lex token)
Returns the next, raw token in the script referred to by previous.
Definition lex.h:608
void descent_xml_parse_text_cstr_fn(char *text, bool is_cdata, void *context)
Type signature for a user-passed text node parsing function. Used by descent_xml_parse_cstr().
Definition parse.h:132
struct descent_xml_lex descent_xml_parse_element_fn(struct descent_xml_lex token, struct libadt_const_lptr element_name, struct libadt_const_lptr attributes, bool empty, void *context)
Type signature for a user-passed element parsing function. Used by descent_xml_parse().
struct descent_xml_lex descent_xml_parse_element_cstr_fn(struct descent_xml_lex token, char *element_name, char **attributes, bool empty, void *context)
Type signature for a user-passed element parsing function. Used by descent_xml_parse_cstr().
struct descent_xml_lex descent_xml_parse_cstr(struct descent_xml_lex xml, descent_xml_parse_element_cstr_fn *element_handler, descent_xml_parse_text_cstr_fn *text_handler, void *context)
Function for parsing an XML document.
Definition parse.h:484
void descent_xml_parse_text_fn(struct libadt_const_lptr text, bool is_cdata, void *context)
Type signature for a user-passed text node parsing function. Used by descent_xml_parse().
Definition parse.h:89
struct descent_xml_lex descent_xml_parse(struct descent_xml_lex xml, descent_xml_parse_element_fn *element_handler, descent_xml_parse_text_fn *text_handler, void *context)
Function for parsing an XML document.
Definition parse.h:319
Represents a single token.
Definition lex.h:42
struct libadt_const_lptr value
A pointer to the classifiered value.
Definition lex.h:58
descent_xml_classifier_fn * type
Represents the type of token classifiered.
Definition lex.h:46