1use crate::error::ParseError;
4use crate::utf8_utils;
5use quick_xml::events::{BytesEnd, BytesStart, BytesText, Event};
6use quick_xml::Reader;
7use std::io::BufRead;
8
9#[derive(Debug, Clone)]
11pub struct XmlValidator {
12 element_stack: Vec<(String, usize)>,
14 current_depth: usize,
16 current_position: usize,
18 strict_validation: bool,
20 extended_validation: bool,
22}
23
24impl Default for XmlValidator {
25 fn default() -> Self {
26 Self::new(true, false)
27 }
28}
29
30impl XmlValidator {
31 pub fn new(strict: bool, extended: bool) -> Self {
33 Self {
34 element_stack: Vec::new(),
35 current_depth: 0,
36 current_position: 0,
37 strict_validation: strict,
38 extended_validation: extended,
39 }
40 }
41
42 pub fn strict() -> Self {
44 Self::new(true, true)
45 }
46
47 pub fn lenient() -> Self {
49 Self::new(false, false)
50 }
51
52 pub fn validate_event<R: BufRead>(
54 &mut self,
55 event: &Event,
56 reader: &Reader<R>,
57 ) -> Result<(), ParseError> {
58 self.current_position = reader.buffer_position() as usize;
60
61 match event {
62 Event::Start(ref element) => {
63 self.handle_start_element(element)?;
64 }
65 Event::End(ref element) => {
66 self.handle_end_element(element)?;
67 }
68 Event::Empty(ref element) => {
69 self.handle_empty_element(element)?;
70 }
71 Event::Text(ref text) => {
72 if self.extended_validation {
73 self.validate_text_content(text)?;
74 }
75 }
76 Event::CData(ref cdata) => {
77 if self.extended_validation {
78 self.validate_cdata_content(cdata)?;
79 }
80 }
81 Event::Comment(_) => {
82 }
84 Event::Decl(_) => {
85 }
87 Event::PI(_) => {
88 }
90 Event::DocType(_) => {
91 }
93 Event::Eof => {
94 self.validate_document_end()?;
95 }
96 }
97
98 Ok(())
99 }
100
101 fn handle_start_element(&mut self, element: &BytesStart) -> Result<(), ParseError> {
103 let element_name = utf8_utils::decode_utf8_at_position(
105 element.local_name().as_ref(),
106 self.current_position,
107 )?;
108
109 if self.strict_validation {
110 if element_name.is_empty() {
112 return Err(ParseError::MalformedXml {
113 message: "Empty element name".to_string(),
114 position: self.current_position,
115 });
116 }
117
118 if !is_valid_xml_name(&element_name) {
120 return Err(ParseError::MalformedXml {
121 message: format!("Invalid element name: '{}'", element_name),
122 position: self.current_position,
123 });
124 }
125 }
126
127 if self.extended_validation {
129 self.validate_attributes(element)?;
130 }
131
132 let element_depth = self.element_stack.len() + 1;
135
136 self.element_stack
138 .push((element_name.clone(), element_depth));
139
140 self.current_depth = element_depth;
142
143 if self.element_stack.len() <= 5 {
145 eprintln!(
146 "PUSH DEBUG: '{}' depth {} (stack size now: {})",
147 element_name,
148 self.current_depth,
149 self.element_stack.len()
150 );
151 }
152
153 Ok(())
154 }
155
156 fn handle_end_element(&mut self, element: &BytesEnd) -> Result<(), ParseError> {
158 let element_name = utf8_utils::decode_utf8_at_position(
160 element.local_name().as_ref(),
161 self.current_position,
162 )?;
163
164 if self.strict_validation {
165 if let Some((expected, depth)) = self.element_stack.pop() {
167 if expected != element_name {
168 eprintln!("TAG MISMATCH DEBUG:");
170 eprintln!(" Expected: '{}' at depth {}", expected, depth);
171 eprintln!(" Found: '{}'", element_name);
172 eprintln!(" Stack size: {}", self.element_stack.len() + 1); eprintln!(" Stack contents: {:?}", self.element_stack);
174 eprintln!(" Position: {}", self.current_position);
175
176 return Err(ParseError::MismatchedTags {
177 expected,
178 found: element_name,
179 position: self.current_position,
180 });
181 }
182 self.current_depth = self.element_stack.len();
185 } else {
186 return Err(ParseError::UnexpectedClosingTag {
187 tag: element_name,
188 position: self.current_position,
189 });
190 }
191 } else {
192 if let Some((_, _depth)) = self.element_stack.pop() {
194 self.current_depth = self.element_stack.len();
196 }
197 }
198
199 Ok(())
200 }
201
202 fn handle_empty_element(&mut self, element: &BytesStart) -> Result<(), ParseError> {
204 let element_name = utf8_utils::decode_utf8_at_position(
206 element.local_name().as_ref(),
207 self.current_position,
208 )?;
209
210 if self.strict_validation {
211 if element_name.is_empty() {
213 return Err(ParseError::MalformedXml {
214 message: "Empty element name".to_string(),
215 position: self.current_position,
216 });
217 }
218
219 if !is_valid_xml_name(&element_name) {
220 return Err(ParseError::MalformedXml {
221 message: format!("Invalid element name: '{}'", element_name),
222 position: self.current_position,
223 });
224 }
225 }
226
227 if self.extended_validation {
229 self.validate_attributes(element)?;
230 }
231
232 Ok(())
235 }
236
237 fn validate_text_content(&self, text: &BytesText) -> Result<(), ParseError> {
239 let _decoded = utf8_utils::handle_text_node(text, self.current_position)?;
241
242 Ok(())
246 }
247
248 fn validate_cdata_content(&self, cdata: &[u8]) -> Result<(), ParseError> {
250 let _decoded = utf8_utils::decode_utf8_at_position(cdata, self.current_position)?;
252
253 let cdata_str = std::str::from_utf8(cdata).map_err(|e| ParseError::InvalidUtf8 {
255 position: self.current_position + e.valid_up_to(),
256 error: e.to_string(),
257 })?;
258
259 if cdata_str.contains("]]>") && !cdata_str.ends_with("]]>") {
260 return Err(ParseError::MalformedXml {
261 message: "CDATA section contains ']]>' in the middle".to_string(),
262 position: self.current_position,
263 });
264 }
265
266 Ok(())
267 }
268
269 fn validate_attributes(&self, element: &BytesStart) -> Result<(), ParseError> {
271 let mut seen_attributes = std::collections::HashSet::new();
272
273 for attr_result in element.attributes() {
274 let attr = attr_result.map_err(|e| ParseError::MalformedXml {
275 message: format!("Malformed attribute: {}", e),
276 position: self.current_position,
277 })?;
278
279 let attr_name =
281 utf8_utils::decode_attribute_name(attr.key.as_ref(), self.current_position)?;
282 let attr_value =
283 utf8_utils::decode_attribute_value(&attr.value, self.current_position)?;
284
285 if attr_name.is_empty() {
287 return Err(ParseError::InvalidAttribute {
288 message: "Empty attribute name".to_string(),
289 position: self.current_position,
290 });
291 }
292
293 if !is_valid_xml_name(&attr_name) {
294 return Err(ParseError::InvalidAttribute {
295 message: format!("Invalid attribute name: '{}'", attr_name),
296 position: self.current_position,
297 });
298 }
299
300 if !seen_attributes.insert(attr_name.clone()) {
302 return Err(ParseError::InvalidAttribute {
303 message: format!("Duplicate attribute: '{}'", attr_name),
304 position: self.current_position,
305 });
306 }
307
308 if attr_value.contains('<') || attr_value.contains('&') && !attr_value.contains(';') {
310 return Err(ParseError::InvalidAttribute {
311 message: format!("Invalid character in attribute value: '{}'", attr_value),
312 position: self.current_position,
313 });
314 }
315 }
316
317 Ok(())
318 }
319
320 fn validate_document_end(&mut self) -> Result<(), ParseError> {
322 if self.strict_validation && !self.element_stack.is_empty() {
323 let unclosed_tags = self
324 .element_stack
325 .iter()
326 .map(|(name, _)| name.clone())
327 .collect();
328 return Err(ParseError::UnclosedTags {
329 tags: unclosed_tags,
330 position: self.current_position,
331 });
332 }
333
334 self.element_stack.clear();
336 self.current_depth = 0;
337 Ok(())
338 }
339
340 pub fn get_element_stack(&self) -> Vec<String> {
342 self.element_stack
343 .iter()
344 .map(|(name, _)| name.clone())
345 .collect()
346 }
347
348 pub fn is_in_element(&self) -> bool {
350 !self.element_stack.is_empty()
351 }
352
353 pub fn get_depth(&self) -> usize {
355 self.element_stack.len()
358 }
359}
360
361fn is_valid_xml_name(name: &str) -> bool {
364 if name.is_empty() {
365 return false;
366 }
367
368 let chars: Vec<char> = name.chars().collect();
369
370 if !is_name_start_char(chars[0]) {
372 return false;
373 }
374
375 for &ch in chars.iter().skip(1) {
377 if !is_name_char(ch) {
378 return false;
379 }
380 }
381
382 true
383}
384
385fn is_name_start_char(ch: char) -> bool {
387 ch.is_ascii_alphabetic()
388 || ch == '_'
389 || ch == ':'
390 || ('\u{C0}'..='\u{D6}').contains(&ch)
391 || ('\u{D8}'..='\u{F6}').contains(&ch)
392 || ('\u{F8}'..='\u{2FF}').contains(&ch)
393 || ('\u{370}'..='\u{37D}').contains(&ch)
394 || ('\u{37F}'..='\u{1FFF}').contains(&ch)
395 || ('\u{200C}'..='\u{200D}').contains(&ch)
396 || ('\u{2070}'..='\u{218F}').contains(&ch)
397 || ('\u{2C00}'..='\u{2FEF}').contains(&ch)
398 || ('\u{3001}'..='\u{D7FF}').contains(&ch)
399 || ('\u{F900}'..='\u{FDCF}').contains(&ch)
400 || ('\u{FDF0}'..='\u{FFFD}').contains(&ch)
401}
402
403fn is_name_char(ch: char) -> bool {
405 is_name_start_char(ch)
406 || ch.is_ascii_digit()
407 || ch == '-'
408 || ch == '.'
409 || ch == '\u{B7}'
410 || ('\u{0300}'..='\u{036F}').contains(&ch)
411 || ('\u{203F}'..='\u{2040}').contains(&ch)
412}
413
414#[cfg(test)]
415mod tests {
416 use super::*;
417 use std::io::Cursor;
418
419 #[test]
420 fn test_valid_xml_names() {
421 assert!(is_valid_xml_name("element"));
422 assert!(is_valid_xml_name("_private"));
423 assert!(is_valid_xml_name("ns:element"));
424 assert!(is_valid_xml_name("element-1"));
425 assert!(is_valid_xml_name("element.1"));
426 }
427
428 #[test]
429 fn test_invalid_xml_names() {
430 assert!(!is_valid_xml_name(""));
431 assert!(!is_valid_xml_name("1element"));
432 assert!(!is_valid_xml_name("-element"));
433 assert!(!is_valid_xml_name(".element"));
434 assert!(!is_valid_xml_name("element with spaces"));
435 }
436
437 #[test]
438 fn test_validator_creation() {
439 let validator = XmlValidator::default();
440 assert_eq!(validator.get_depth(), 0);
441 assert!(!validator.is_in_element());
442 }
443
444 #[test]
445 fn test_element_stack_tracking() {
446 let mut validator = XmlValidator::strict();
447 let cursor = Cursor::new(b"test");
448 let reader = Reader::from_reader(cursor);
449
450 let start_element = BytesStart::new("test");
452 let start_event = Event::Start(start_element);
453
454 validator.validate_event(&start_event, &reader).unwrap();
455 assert_eq!(validator.get_depth(), 1);
456 assert!(validator.is_in_element());
457
458 let end_element = BytesEnd::new("test");
460 let end_event = Event::End(end_element);
461
462 validator.validate_event(&end_event, &reader).unwrap();
463 assert_eq!(validator.get_depth(), 0);
464 assert!(!validator.is_in_element());
465 }
466}