1use crate::error::ParseError;
4use crate::utf8_utils;
5use quick_xml::events::{BytesEnd, BytesStart, BytesText, Event};
6use quick_xml::Reader;
7use std::io::BufRead;
8
9#[derive(Debug, Clone)]
11pub struct XmlValidator {
12 element_stack: Vec<(String, usize)>,
14 current_depth: usize,
16 current_position: usize,
18 strict_validation: bool,
20 extended_validation: bool,
22}
23
24impl Default for XmlValidator {
25 fn default() -> Self {
26 Self::new(true, false)
27 }
28}
29
30impl XmlValidator {
31 pub fn new(strict: bool, extended: bool) -> Self {
33 Self {
34 element_stack: Vec::new(),
35 current_depth: 0,
36 current_position: 0,
37 strict_validation: strict,
38 extended_validation: extended,
39 }
40 }
41
42 pub fn strict() -> Self {
44 Self::new(true, true)
45 }
46
47 pub fn lenient() -> Self {
49 Self::new(false, false)
50 }
51
52 pub fn validate_event<R: BufRead>(
54 &mut self,
55 event: &Event,
56 reader: &Reader<R>,
57 ) -> Result<(), ParseError> {
58 self.current_position = reader.buffer_position() as usize;
60
61 match event {
62 Event::Start(ref element) => {
63 self.handle_start_element(element)?;
64 }
65 Event::End(ref element) => {
66 self.handle_end_element(element)?;
67 }
68 Event::Empty(ref element) => {
69 self.handle_empty_element(element)?;
70 }
71 Event::Text(ref text) => {
72 if self.extended_validation {
73 self.validate_text_content(text)?;
74 }
75 }
76 Event::CData(ref cdata) => {
77 if self.extended_validation {
78 self.validate_cdata_content(cdata)?;
79 }
80 }
81 Event::Comment(_) => {
82 }
84 Event::Decl(_) => {
85 }
87 Event::PI(_) => {
88 }
90 Event::DocType(_) => {
91 }
93 Event::Eof => {
94 self.validate_document_end()?;
95 }
96 }
97
98 Ok(())
99 }
100
101 fn handle_start_element(&mut self, element: &BytesStart) -> Result<(), ParseError> {
103 let element_name = utf8_utils::decode_utf8_at_position(
105 element.local_name().as_ref(),
106 self.current_position,
107 )?;
108
109 if self.strict_validation {
110 if element_name.is_empty() {
112 return Err(ParseError::MalformedXml {
113 message: "Empty element name".to_string(),
114 position: self.current_position,
115 });
116 }
117
118 if !is_valid_xml_name(&element_name) {
120 return Err(ParseError::MalformedXml {
121 message: format!("Invalid element name: '{}'", element_name),
122 position: self.current_position,
123 });
124 }
125 }
126
127 if self.extended_validation {
129 self.validate_attributes(element)?;
130 }
131
132 let element_depth = self.element_stack.len() + 1;
135
136 self.element_stack
138 .push((element_name.clone(), element_depth));
139
140 self.current_depth = element_depth;
142
143 if self.element_stack.len() <= 5 {
145 eprintln!(
146 "PUSH DEBUG: '{}' depth {} (stack size now: {})",
147 element_name,
148 self.current_depth,
149 self.element_stack.len()
150 );
151 }
152
153 Ok(())
154 }
155
156 fn handle_end_element(&mut self, element: &BytesEnd) -> Result<(), ParseError> {
158 let element_name = utf8_utils::decode_utf8_at_position(
160 element.local_name().as_ref(),
161 self.current_position,
162 )?;
163
164 if self.strict_validation {
165 if let Some((expected, depth)) = self.element_stack.pop() {
167 if expected != element_name {
168 eprintln!("TAG MISMATCH DEBUG:");
170 eprintln!(" Expected: '{}' at depth {}", expected, depth);
171 eprintln!(" Found: '{}'", element_name);
172 eprintln!(" Stack size: {}", self.element_stack.len() + 1); eprintln!(" Stack contents: {:?}", self.element_stack);
174 eprintln!(" Position: {}", self.current_position);
175
176 return Err(ParseError::MismatchedTags {
177 expected,
178 found: element_name,
179 position: self.current_position,
180 });
181 }
182 self.current_depth = self.element_stack.len();
185 } else {
186 return Err(ParseError::UnexpectedClosingTag {
187 tag: element_name,
188 position: self.current_position,
189 });
190 }
191 } else {
192 if let Some((_, _depth)) = self.element_stack.pop() {
194 self.current_depth = self.element_stack.len();
196 }
197 }
198
199 Ok(())
200 }
201
202 fn handle_empty_element(&mut self, element: &BytesStart) -> Result<(), ParseError> {
204 let element_name = utf8_utils::decode_utf8_at_position(
206 element.local_name().as_ref(),
207 self.current_position,
208 )?;
209
210 if self.strict_validation {
211 if element_name.is_empty() {
213 return Err(ParseError::MalformedXml {
214 message: "Empty element name".to_string(),
215 position: self.current_position,
216 });
217 }
218
219 if !is_valid_xml_name(&element_name) {
220 return Err(ParseError::MalformedXml {
221 message: format!("Invalid element name: '{}'", element_name),
222 position: self.current_position,
223 });
224 }
225 }
226
227 if self.extended_validation {
229 self.validate_attributes(element)?;
230 }
231
232 Ok(())
235 }
236
237 fn validate_text_content(&self, text: &BytesText) -> Result<(), ParseError> {
239 let _decoded = utf8_utils::handle_text_node(text, self.current_position)?;
241
242 Ok(())
246 }
247
248 fn validate_cdata_content(&self, cdata: &[u8]) -> Result<(), ParseError> {
250 let _decoded = utf8_utils::decode_utf8_at_position(cdata, self.current_position)?;
252
253 let cdata_str = std::str::from_utf8(cdata).map_err(|e| ParseError::InvalidUtf8 {
255 message: format!("UTF-8 decoding error at position {}: {}", self.current_position + e.valid_up_to(), e),
256 })?;
257
258 if cdata_str.contains("]]>") && !cdata_str.ends_with("]]>") {
259 return Err(ParseError::MalformedXml {
260 message: "CDATA section contains ']]>' in the middle".to_string(),
261 position: self.current_position,
262 });
263 }
264
265 Ok(())
266 }
267
268 fn validate_attributes(&self, element: &BytesStart) -> Result<(), ParseError> {
270 let mut seen_attributes = std::collections::HashSet::new();
271
272 for attr_result in element.attributes() {
273 let attr = attr_result.map_err(|e| ParseError::MalformedXml {
274 message: format!("Malformed attribute: {}", e),
275 position: self.current_position,
276 })?;
277
278 let attr_name =
280 utf8_utils::decode_attribute_name(attr.key.as_ref(), self.current_position)?;
281 let attr_value =
282 utf8_utils::decode_attribute_value(&attr.value, self.current_position)?;
283
284 if attr_name.is_empty() {
286 return Err(ParseError::InvalidAttribute {
287 message: "Empty attribute name".to_string(),
288 position: self.current_position,
289 });
290 }
291
292 if !is_valid_xml_name(&attr_name) {
293 return Err(ParseError::InvalidAttribute {
294 message: format!("Invalid attribute name: '{}'", attr_name),
295 position: self.current_position,
296 });
297 }
298
299 if !seen_attributes.insert(attr_name.clone()) {
301 return Err(ParseError::InvalidAttribute {
302 message: format!("Duplicate attribute: '{}'", attr_name),
303 position: self.current_position,
304 });
305 }
306
307 if attr_value.contains('<') || attr_value.contains('&') && !attr_value.contains(';') {
309 return Err(ParseError::InvalidAttribute {
310 message: format!("Invalid character in attribute value: '{}'", attr_value),
311 position: self.current_position,
312 });
313 }
314 }
315
316 Ok(())
317 }
318
319 fn validate_document_end(&mut self) -> Result<(), ParseError> {
321 if self.strict_validation && !self.element_stack.is_empty() {
322 let unclosed_tags = self
323 .element_stack
324 .iter()
325 .map(|(name, _)| name.clone())
326 .collect();
327 return Err(ParseError::UnclosedTags {
328 tags: unclosed_tags,
329 position: self.current_position,
330 });
331 }
332
333 self.element_stack.clear();
335 self.current_depth = 0;
336 Ok(())
337 }
338
339 pub fn get_element_stack(&self) -> Vec<String> {
341 self.element_stack
342 .iter()
343 .map(|(name, _)| name.clone())
344 .collect()
345 }
346
347 pub fn is_in_element(&self) -> bool {
349 !self.element_stack.is_empty()
350 }
351
352 pub fn get_depth(&self) -> usize {
354 self.element_stack.len()
357 }
358}
359
360fn is_valid_xml_name(name: &str) -> bool {
363 if name.is_empty() {
364 return false;
365 }
366
367 let chars: Vec<char> = name.chars().collect();
368
369 if !is_name_start_char(chars[0]) {
371 return false;
372 }
373
374 for &ch in chars.iter().skip(1) {
376 if !is_name_char(ch) {
377 return false;
378 }
379 }
380
381 true
382}
383
384fn is_name_start_char(ch: char) -> bool {
386 ch.is_ascii_alphabetic()
387 || ch == '_'
388 || ch == ':'
389 || ('\u{C0}'..='\u{D6}').contains(&ch)
390 || ('\u{D8}'..='\u{F6}').contains(&ch)
391 || ('\u{F8}'..='\u{2FF}').contains(&ch)
392 || ('\u{370}'..='\u{37D}').contains(&ch)
393 || ('\u{37F}'..='\u{1FFF}').contains(&ch)
394 || ('\u{200C}'..='\u{200D}').contains(&ch)
395 || ('\u{2070}'..='\u{218F}').contains(&ch)
396 || ('\u{2C00}'..='\u{2FEF}').contains(&ch)
397 || ('\u{3001}'..='\u{D7FF}').contains(&ch)
398 || ('\u{F900}'..='\u{FDCF}').contains(&ch)
399 || ('\u{FDF0}'..='\u{FFFD}').contains(&ch)
400}
401
402fn is_name_char(ch: char) -> bool {
404 is_name_start_char(ch)
405 || ch.is_ascii_digit()
406 || ch == '-'
407 || ch == '.'
408 || ch == '\u{B7}'
409 || ('\u{0300}'..='\u{036F}').contains(&ch)
410 || ('\u{203F}'..='\u{2040}').contains(&ch)
411}
412
413#[cfg(test)]
414mod tests {
415 use super::*;
416 use std::io::Cursor;
417
418 #[test]
419 fn test_valid_xml_names() {
420 assert!(is_valid_xml_name("element"));
421 assert!(is_valid_xml_name("_private"));
422 assert!(is_valid_xml_name("ns:element"));
423 assert!(is_valid_xml_name("element-1"));
424 assert!(is_valid_xml_name("element.1"));
425 }
426
427 #[test]
428 fn test_invalid_xml_names() {
429 assert!(!is_valid_xml_name(""));
430 assert!(!is_valid_xml_name("1element"));
431 assert!(!is_valid_xml_name("-element"));
432 assert!(!is_valid_xml_name(".element"));
433 assert!(!is_valid_xml_name("element with spaces"));
434 }
435
436 #[test]
437 fn test_validator_creation() {
438 let validator = XmlValidator::default();
439 assert_eq!(validator.get_depth(), 0);
440 assert!(!validator.is_in_element());
441 }
442
443 #[test]
444 fn test_element_stack_tracking() {
445 let mut validator = XmlValidator::strict();
446 let cursor = Cursor::new(b"test");
447 let reader = Reader::from_reader(cursor);
448
449 let start_element = BytesStart::new("test");
451 let start_event = Event::Start(start_element);
452
453 validator.validate_event(&start_event, &reader).unwrap();
454 assert_eq!(validator.get_depth(), 1);
455 assert!(validator.is_in_element());
456
457 let end_element = BytesEnd::new("test");
459 let end_event = Event::End(end_element);
460
461 validator.validate_event(&end_event, &reader).unwrap();
462 assert_eq!(validator.get_depth(), 0);
463 assert!(!validator.is_in_element());
464 }
465}