Descent XML
An XML Parser Helper Library
Loading...
Searching...
No Matches
validate.h
Go to the documentation of this file.
1/*
2 * XMLTree - An XML Parser-Helper Library
3 * Copyright (C) 2025 Marcus Harrison
4 *
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <https://www.gnu.org/licenses/>.
17 */
18
19#ifndef DESCENT_XML_VALIDATE
20#define DESCENT_XML_VALIDATE
21
22#ifdef __cplusplus
23extern "C" {
24#endif
25
26#include <stdbool.h>
27
28#include <libadt/lptr.h>
29#include <libadt/str.h>
30
31#include "parse.h"
32
37inline struct descent_xml_lex _descent_xml_validate_element_handler(
38 struct descent_xml_lex token,
39 struct libadt_const_lptr element_name,
40 struct libadt_const_lptr attributes,
41 bool empty,
42 void *context_p
43)
44{
45 (void)attributes;
46 struct {
47 bool valid;
48 int depth;
49 } *context = context_p;
50 if (context->depth == 0) {
51 context->valid = false;
52 return token;
53 }
54 context->depth--;
55
56 const struct libadt_const_lptr xmldecl = libadt_str_literal("?xml");
57
58 if (libadt_const_lptr_equal(element_name, xmldecl)) {
59 context->valid = false;
60 return token;
61 }
62
63 if (empty)
64 return token;
65
66 while (token.type != descent_xml_classifier_element_close_name) {
67 if (
69 || token.type == descent_xml_classifier_eof
70 || token.type == descent_xml_lex_xmldecl
71 || token.type == descent_xml_lex_doctype
72 || !context->valid
73 ) {
74 context->valid = false;
75 return token;
76 }
77 token = descent_xml_parse(
78 token,
79 _descent_xml_validate_element_handler,
80 NULL,
81 context
82 );
83 }
84
85 context->valid = libadt_const_lptr_equal(
86 token.value,
87 element_name
88 );
89 context->depth++;
90 token = descent_xml_parse(token, NULL, NULL, NULL);
91 return token;
92}
93
94inline bool descent_xml_validate_element_depth(
95 struct descent_xml_lex token,
96 int depth
97)
98{
99 struct {
100 bool valid;
101 int depth;
102 } context = { true, depth };
103 while (token.type != descent_xml_classifier_element) {
104 if (
107 )
108 return false;
109 token = descent_xml_lex_next_raw(token);
110 }
111 token = descent_xml_parse(
112 token,
113 _descent_xml_validate_element_handler,
114 NULL,
115 &context
116 );
117
118 // We have to check for unexpected/eof here in case the
119 // element handler never runs
120 return
121 context.valid
124}
125
126inline bool descent_xml_validate_element(struct descent_xml_lex token)
127{
128 return descent_xml_validate_element_depth(token, 10000);
129}
130
131inline bool _descent_xml_non_space_text(struct descent_xml_lex token)
132{
133 return token.type == descent_xml_classifier_text
134 || token.type == descent_xml_classifier_text_entity_start
135 || token.type == descent_xml_classifier_text_entity
136 || token.type == descent_xml_lex_cdata;
137}
138
139inline struct descent_xml_lex _descent_xml_validate_prolog_goto_element(
140 struct descent_xml_lex token
141)
142{
143 while (token.type != descent_xml_classifier_element) {
144 if (
147 || _descent_xml_non_space_text(token)
148 )
149 return token;
150 token = descent_xml_lex_next_raw(token);
151 }
152 return token;
153}
154
155inline struct descent_xml_lex _descent_xml_validate_prolog_comments(
156 struct descent_xml_lex token
157)
158{
159 if (token.type != descent_xml_classifier_element) {
161 return token;
162 }
163
164 struct descent_xml_lex next = descent_xml_lex_next_raw(token);
165 if (next.type == descent_xml_lex_comment) {
166 token = descent_xml_lex_then(
167 next,
168 _descent_xml_validate_prolog_goto_element
169 );
170 token = descent_xml_lex_optional(
171 token,
172 _descent_xml_validate_prolog_comments
173 );
174 }
175 return token;
176}
177
178inline struct descent_xml_lex _descent_xml_validate_xmldecl(
179 struct descent_xml_lex token
180)
181{
182 struct descent_xml_lex next = descent_xml_lex_next_raw(token);
183
184 if (next.type == descent_xml_lex_xmldecl) {
185 token = descent_xml_lex_then(
186 next,
187 _descent_xml_validate_prolog_goto_element
188 );
189 }
190
191 return token;
192}
193
194inline struct descent_xml_lex _descent_xml_validate_doctype(
195 struct descent_xml_lex token
196)
197{
198 struct descent_xml_lex next = descent_xml_lex_next_raw(token);
199
200 if (next.type == descent_xml_lex_doctype) {
201 token = descent_xml_lex_then(
202 next,
203 _descent_xml_validate_prolog_goto_element
204 );
205 }
206
207 return token;
208}
209
210inline struct descent_xml_lex _descent_xml_validate_parse_prolog(
211 struct descent_xml_lex token
212)
213{
214 token = descent_xml_lex_then(
215 token,
216 _descent_xml_validate_prolog_goto_element
217 );
218 token = descent_xml_lex_optional(
219 token,
220 _descent_xml_validate_xmldecl
221 );
222 token = descent_xml_lex_optional(
223 token,
224 _descent_xml_validate_prolog_comments
225 );
226 token = descent_xml_lex_optional(
227 token,
228 _descent_xml_validate_doctype
229 );
230 token = descent_xml_lex_optional(
231 token,
232 _descent_xml_validate_prolog_comments
233 );
234 return token;
235}
236
237inline bool descent_xml_validate_document_depth(
238 struct descent_xml_lex token,
239 int depth
240)
241{
242 struct {
243 bool valid;
244 int depth;
245 } context = {true, depth};
246
247 token = _descent_xml_validate_parse_prolog(token);
248 if (
251 )
252 return false;
253
254 token = descent_xml_parse(
255 token,
256 _descent_xml_validate_element_handler,
257 NULL,
258 &context
259 );
260
261 if (!context.valid)
262 return false;
263
264 // check that there's only one element node in the root
265 while (token.type != descent_xml_classifier_element) {
266 if (token.type == descent_xml_classifier_eof)
267 return true;
269 return false;
270 if (token.type == descent_xml_classifier_text)
271 return false;
272 token = descent_xml_lex_next_raw(token);
273 }
274
275 return false;
276}
277
278inline bool descent_xml_validate_document(
279 struct descent_xml_lex token
280)
281{
282 return descent_xml_validate_document_depth(token, 1000);
283}
284
285#ifdef __cplusplus
286} // extern "C"
287#endif
288
289#endif // DESCENT_XML_VALIDATE
descent_xml_classifier_fn *const descent_xml_classifier_unexpected
descent_xml_classifier_fn *const descent_xml_classifier_eof
struct descent_xml_lex descent_xml_lex_next_raw(struct descent_xml_lex token)
Returns the next, raw token in the script referred to by previous.
Definition lex.h:608
struct descent_xml_lex descent_xml_parse(struct descent_xml_lex xml, descent_xml_parse_element_fn *element_handler, descent_xml_parse_text_fn *text_handler, void *context)
Function for parsing an XML document.
Definition parse.h:319
Represents a single token.
Definition lex.h:42
descent_xml_classifier_fn * type
Represents the type of token classifiered.
Definition lex.h:46