1use std::collections::{BTreeMap, BTreeSet};
8use std::io::Read;
9use std::str;
10
11use quick_xml::events::{BytesStart, Event};
12use quick_xml::reader::Reader;
13use quick_xml::XmlVersion;
14
15use crate::core::{
16 validate_namespace_binding, Attribute, Document, ErrorKind, NamespaceDeclaration, QName, Span,
17 XmlError, XmlResult, XML_NAMESPACE_URI,
18};
19use crate::security::{EntityPolicy, ParserSecurityConfig, SecurityLimits};
20
21#[derive(Debug, Clone, PartialEq, Eq)]
23pub struct ParserConfig {
24 preserve_comments: bool,
25 preserve_cdata: bool,
26 security: ParserSecurityConfig,
27}
28
29impl ParserConfig {
30 pub fn new() -> Self {
31 Self::default()
32 }
33
34 pub fn with_preserve_comments(mut self, preserve: bool) -> Self {
35 self.preserve_comments = preserve;
36 self
37 }
38
39 pub fn with_preserve_cdata(mut self, preserve: bool) -> Self {
40 self.preserve_cdata = preserve;
41 self
42 }
43
44 pub fn with_max_document_bytes(mut self, limit: usize) -> Self {
45 let limits = self
46 .security
47 .limits()
48 .clone()
49 .with_max_document_bytes(limit);
50 self.security = self.security.with_limits(limits);
51 self
52 }
53
54 pub fn with_max_text_bytes(mut self, limit: usize) -> Self {
55 let limits = self.security.limits().clone().with_max_text_bytes(limit);
56 self.security = self.security.with_limits(limits);
57 self
58 }
59
60 pub fn with_max_depth(mut self, limit: usize) -> Self {
61 let limits = self.security.limits().clone().with_max_depth(limit);
62 self.security = self.security.with_limits(limits);
63 self
64 }
65
66 pub fn with_max_nodes(mut self, limit: usize) -> Self {
67 let limits = self.security.limits().clone().with_max_nodes(limit);
68 self.security = self.security.with_limits(limits);
69 self
70 }
71
72 pub fn with_security(mut self, security: ParserSecurityConfig) -> Self {
73 self.security = security;
74 self
75 }
76
77 pub fn preserve_comments(&self) -> bool {
78 self.preserve_comments
79 }
80
81 pub fn preserve_cdata(&self) -> bool {
82 self.preserve_cdata
83 }
84
85 pub fn security(&self) -> &ParserSecurityConfig {
86 &self.security
87 }
88
89 fn limits(&self) -> &SecurityLimits {
90 self.security.limits()
91 }
92}
93
94impl Default for ParserConfig {
95 fn default() -> Self {
96 Self {
97 preserve_comments: true,
98 preserve_cdata: true,
99 security: ParserSecurityConfig::default(),
100 }
101 }
102}
103
104pub fn parse_str(xml: &str) -> XmlResult<Document> {
106 parse_str_with_config(xml, &ParserConfig::default())
107}
108
109pub fn parse_str_with_config(xml: &str, config: &ParserConfig) -> XmlResult<Document> {
111 config.limits().check_document_size(xml.len())?;
112
113 let mut reader = Reader::from_str(xml);
114 reader.config_mut().trim_text(false);
115 reader.config_mut().expand_empty_elements = false;
116 reader.config_mut().check_end_names = true;
117
118 parse_events(xml, &mut reader, config)
119}
120
121pub fn parse_reader(reader: impl Read) -> XmlResult<Document> {
123 parse_reader_with_config(reader, &ParserConfig::default())
124}
125
126pub fn parse_reader_with_config(
128 mut reader: impl Read,
129 config: &ParserConfig,
130) -> XmlResult<Document> {
131 let mut bytes = Vec::new();
132 let limit = config.limits().max_document_bytes() as u64 + 1;
133 reader
134 .by_ref()
135 .take(limit)
136 .read_to_end(&mut bytes)
137 .map_err(|error| XmlError::new(ErrorKind::Io, error.to_string()))?;
138
139 config.limits().check_document_size(bytes.len())?;
140
141 let xml = String::from_utf8(bytes).map_err(|error| {
142 XmlError::new(
143 ErrorKind::Parse,
144 format!("XML input must be valid UTF-8: {error}"),
145 )
146 })?;
147
148 parse_str_with_config(&xml, config)
149}
150
151fn parse_events(
152 xml: &str,
153 reader: &mut Reader<&[u8]>,
154 config: &ParserConfig,
155) -> XmlResult<Document> {
156 let mut state = ParserState::new(config);
157
158 loop {
159 let event = reader.read_event().map_err(|error| {
160 parse_error_with_position(xml, reader.error_position() as usize, error.to_string())
161 })?;
162
163 match event {
164 Event::Start(start) => state.start_element(start, reader, xml)?,
165 Event::Empty(start) => {
166 state.start_element(start, reader, xml)?;
167 state.end_element();
168 }
169 Event::End(_) => state.end_element(),
170 Event::Text(text) => {
171 let value = text.xml10_content().map_err(|error| {
172 parse_error_with_position(
173 xml,
174 reader.error_position() as usize,
175 error.to_string(),
176 )
177 })?;
178 state.text(value.as_ref())?;
179 }
180 Event::CData(cdata) => {
181 let value = cdata.decode().map_err(|error| {
182 parse_error_with_position(
183 xml,
184 reader.error_position() as usize,
185 error.to_string(),
186 )
187 })?;
188 state.cdata(value.as_ref())?;
189 }
190 Event::Comment(comment) => {
191 if config.preserve_comments {
192 let value = comment.xml10_content().map_err(|error| {
193 parse_error_with_position(
194 xml,
195 reader.error_position() as usize,
196 error.to_string(),
197 )
198 })?;
199 state.comment(value.as_ref())?;
200 }
201 }
202 Event::PI(pi) => {
203 let content = str::from_utf8(pi.content()).map_err(|error| {
204 parse_error_with_position(
205 xml,
206 reader.error_position() as usize,
207 error.to_string(),
208 )
209 })?;
210 let target = str::from_utf8(pi.target()).map_err(|error| {
211 parse_error_with_position(
212 xml,
213 reader.error_position() as usize,
214 error.to_string(),
215 )
216 })?;
217 state.processing_instruction(target, processing_instruction_data(content))?;
218 }
219 Event::Decl(_) => {}
220 Event::DocType(_) => {
221 config
222 .security()
223 .entity_policy()
224 .reject_doctype()
225 .map_err(|error| {
226 error.with_span(span_for_byte(xml, reader.error_position() as usize))
227 })?;
228 }
229 Event::GeneralRef(reference) => {
230 if let Some(ch) = reference.resolve_char_ref().map_err(|error| {
231 parse_error_with_position(
232 xml,
233 reader.error_position() as usize,
234 error.to_string(),
235 )
236 })? {
237 state.text(&ch.to_string())?;
238 } else {
239 let name = reference.decode().map_err(|error| {
240 parse_error_with_position(
241 xml,
242 reader.error_position() as usize,
243 error.to_string(),
244 )
245 })?;
246 let value = match predefined_entity(name.as_ref()) {
247 Some(value) => value,
248 None => {
249 return Err(unresolved_entity_error(
250 config.security().entity_policy(),
251 name.as_ref(),
252 span_for_byte(xml, reader.error_position() as usize),
253 ));
254 }
255 };
256 state.text(value)?;
257 }
258 }
259 Event::Eof => break,
260 }
261 }
262
263 state.finish()
264}
265
266struct ParserState<'a> {
267 config: &'a ParserConfig,
268 document: Document,
269 stack: Vec<crate::core::NodeId>,
270 namespace_stack: Vec<NamespaceScope>,
271 node_count: usize,
272}
273
274impl<'a> ParserState<'a> {
275 fn new(config: &'a ParserConfig) -> Self {
276 Self {
277 config,
278 document: Document::new(),
279 stack: Vec::new(),
280 namespace_stack: vec![NamespaceScope::default()],
281 node_count: 0,
282 }
283 }
284
285 fn start_element(
286 &mut self,
287 start: BytesStart<'_>,
288 reader: &Reader<&[u8]>,
289 xml: &str,
290 ) -> XmlResult<()> {
291 let declarations = namespace_declarations(&start, reader, xml)?;
292 let scope = self
293 .namespace_stack
294 .last()
295 .expect("root namespace scope exists")
296 .with_declarations(&declarations);
297 let name = qname_from_raw(start.name().as_ref(), &scope, true)?;
298 let id = match self.stack.last().copied() {
299 Some(parent) => self.document.add_element(parent, name)?,
300 None => self.document.add_root_element(name)?,
301 };
302
303 self.count_node()?;
304 self.config.limits().check_depth(self.stack.len() + 1)?;
305
306 for declaration in declarations {
307 let core_declaration = match declaration.prefix {
308 Some(prefix) => NamespaceDeclaration::prefixed(prefix, declaration.uri)?,
309 None => NamespaceDeclaration::default(declaration.uri)?,
310 };
311 self.document
312 .add_namespace_declaration(id, core_declaration)?;
313 }
314
315 let mut attribute_names = BTreeSet::new();
316 for attribute in start.attributes() {
317 let attribute =
318 attribute.map_err(|error| parse_error_with_position(xml, 0, error.to_string()))?;
319 if is_namespace_declaration(attribute.key.as_ref()) {
320 continue;
321 }
322 let name = qname_from_raw(attribute.key.as_ref(), &scope, false)?;
323 if !attribute_names.insert(expanded_attribute_name(&name)) {
324 return Err(XmlError::new(
325 ErrorKind::Parse,
326 format!(
327 "duplicate attribute `{}` by expanded name",
328 name.lexical_name()
329 ),
330 ));
331 }
332 let value = attribute
333 .decoded_and_normalized_value(XmlVersion::Explicit1_0, reader.decoder())
334 .map_err(|error| parse_error_with_position(xml, 0, error.to_string()))?;
335 self.document
336 .add_attribute(id, Attribute::new(name, value.as_ref()))?;
337 }
338
339 self.stack.push(id);
340 self.namespace_stack.push(scope);
341 Ok(())
342 }
343
344 fn end_element(&mut self) {
345 self.stack.pop();
346 self.namespace_stack.pop();
347 }
348
349 fn text(&mut self, value: &str) -> XmlResult<()> {
350 if value.is_empty() {
351 return Ok(());
352 }
353 if self.stack.is_empty() {
354 if value.trim().is_empty() {
355 return Ok(());
356 }
357 return Err(XmlError::new(
358 ErrorKind::Parse,
359 "non-whitespace text outside the document root is not allowed",
360 ));
361 }
362 self.check_text_limit(value)?;
363 let parent = self.current_parent()?;
364 self.document.add_text(parent, value)?;
365 self.count_node()
366 }
367
368 fn cdata(&mut self, value: &str) -> XmlResult<()> {
369 if value.is_empty() {
370 return Ok(());
371 }
372 if self.stack.is_empty() {
373 return Err(XmlError::new(
374 ErrorKind::Parse,
375 "CDATA outside the document root is not allowed",
376 ));
377 }
378 self.check_text_limit(value)?;
379 let parent = self.current_parent()?;
380 if self.config.preserve_cdata {
381 self.document.add_cdata(parent, value)?;
382 } else {
383 self.document.add_text(parent, value)?;
384 }
385 self.count_node()
386 }
387
388 fn comment(&mut self, value: &str) -> XmlResult<()> {
389 if self.stack.is_empty() {
390 return Ok(());
391 }
392 self.check_text_limit(value)?;
393 let parent = self.current_parent()?;
394 self.document.add_comment(parent, value)?;
395 self.count_node()
396 }
397
398 fn processing_instruction(&mut self, target: &str, data: Option<&str>) -> XmlResult<()> {
399 if self.stack.is_empty() {
400 return Ok(());
401 }
402 let parent = self.current_parent()?;
403 self.document
404 .add_processing_instruction(parent, target, data)?;
405 self.count_node()
406 }
407
408 fn finish(self) -> XmlResult<Document> {
409 if !self.stack.is_empty() {
410 return Err(XmlError::new(
411 ErrorKind::Parse,
412 "XML document ended before closing all elements",
413 ));
414 }
415 if self.document.root().is_none() {
416 return Err(XmlError::new(
417 ErrorKind::Parse,
418 "XML document must contain one root element",
419 ));
420 }
421 Ok(self.document)
422 }
423
424 fn current_parent(&self) -> XmlResult<crate::core::NodeId> {
425 self.stack.last().copied().ok_or_else(|| {
426 XmlError::new(
427 ErrorKind::Parse,
428 "XML content outside the document root is not supported",
429 )
430 })
431 }
432
433 fn count_node(&mut self) -> XmlResult<()> {
434 self.node_count += 1;
435 self.config.limits().check_nodes(self.node_count)
436 }
437
438 fn check_text_limit(&self, value: &str) -> XmlResult<()> {
439 self.config.limits().check_text_size(value.len())
440 }
441}
442
443#[derive(Debug, Clone, Default)]
444struct NamespaceScope {
445 default_namespace: Option<String>,
446 prefixed: BTreeMap<String, String>,
447}
448
449impl NamespaceScope {
450 fn with_declarations(&self, declarations: &[ParsedNamespaceDeclaration]) -> Self {
451 let mut next = self.clone();
452 for declaration in declarations {
453 match &declaration.prefix {
454 Some(prefix) => {
455 next.prefixed
456 .insert(prefix.clone(), declaration.uri.clone());
457 }
458 None => {
459 next.default_namespace = Some(declaration.uri.clone());
460 }
461 }
462 }
463 next
464 }
465}
466
467#[derive(Debug, Clone, PartialEq, Eq)]
468struct ParsedNamespaceDeclaration {
469 prefix: Option<String>,
470 uri: String,
471}
472
473fn namespace_declarations(
474 start: &BytesStart<'_>,
475 reader: &Reader<&[u8]>,
476 xml: &str,
477) -> XmlResult<Vec<ParsedNamespaceDeclaration>> {
478 let mut declarations = Vec::new();
479 for attribute in start.attributes() {
480 let attribute =
481 attribute.map_err(|error| parse_error_with_position(xml, 0, error.to_string()))?;
482 let raw_name = attribute.key.as_ref();
483 if !is_namespace_declaration(raw_name) {
484 continue;
485 }
486 let uri = attribute
487 .decoded_and_normalized_value(XmlVersion::Explicit1_0, reader.decoder())
488 .map_err(|error| parse_error_with_position(xml, 0, error.to_string()))?;
489 let prefix = raw_name
490 .strip_prefix(b"xmlns:")
491 .map(bytes_to_string)
492 .transpose()?;
493 validate_namespace_binding(prefix.as_deref(), uri.as_ref())?;
494 declarations.push(ParsedNamespaceDeclaration {
495 prefix,
496 uri: uri.into_owned(),
497 });
498 }
499 Ok(declarations)
500}
501
502fn qname_from_raw(raw: &[u8], scope: &NamespaceScope, default_applies: bool) -> XmlResult<QName> {
503 let raw = bytes_to_string(raw)?;
504 match raw.split_once(':') {
505 Some((prefix, local)) => {
506 let uri = if prefix == "xml" {
507 XML_NAMESPACE_URI
508 } else {
509 scope.prefixed.get(prefix).ok_or_else(|| {
510 XmlError::new(
511 ErrorKind::UnknownNamespacePrefix,
512 format!("namespace prefix `{prefix}` is not declared"),
513 )
514 })?
515 };
516 QName::qualified(prefix, local, uri)
517 }
518 None if default_applies => match &scope.default_namespace {
519 Some(uri) => QName::namespaced(raw, uri),
520 None => QName::new(raw),
521 },
522 None => QName::new(raw),
523 }
524}
525
526fn expanded_attribute_name(name: &QName) -> (Option<String>, String) {
527 (
528 name.namespace_uri().map(|uri| uri.as_str().to_owned()),
529 name.local().to_owned(),
530 )
531}
532
533fn is_namespace_declaration(raw_name: &[u8]) -> bool {
534 raw_name == b"xmlns" || raw_name.starts_with(b"xmlns:")
535}
536
537fn bytes_to_string(bytes: &[u8]) -> XmlResult<String> {
538 str::from_utf8(bytes)
539 .map(str::to_owned)
540 .map_err(|error| XmlError::new(ErrorKind::Parse, error.to_string()))
541}
542
543fn empty_to_none(value: &str) -> Option<&str> {
544 if value.is_empty() {
545 None
546 } else {
547 Some(value)
548 }
549}
550
551fn processing_instruction_data(value: &str) -> Option<&str> {
552 let value = value
553 .strip_prefix(' ')
554 .or_else(|| value.strip_prefix('\t'))
555 .or_else(|| value.strip_prefix('\r'))
556 .or_else(|| value.strip_prefix('\n'))
557 .unwrap_or(value);
558 empty_to_none(value)
559}
560
561fn predefined_entity(name: &str) -> Option<&'static str> {
562 match name {
563 "lt" => Some("<"),
564 "gt" => Some(">"),
565 "amp" => Some("&"),
566 "apos" => Some("'"),
567 "quot" => Some("\""),
568 _ => None,
569 }
570}
571
572fn unresolved_entity_error(policy: &EntityPolicy, name: &str, span: Span) -> XmlError {
573 match policy.reject_external_entity(name) {
574 Err(error) => error.with_span(span),
575 Ok(()) => XmlError::new(
576 ErrorKind::Parse,
577 format!("external entity resolution is not implemented for `&{name};`"),
578 )
579 .with_span(span),
580 }
581}
582
583fn parse_error_with_position(
584 xml: &str,
585 byte_position: usize,
586 message: impl Into<String>,
587) -> XmlError {
588 XmlError::new(ErrorKind::Parse, message).with_span(span_for_byte(xml, byte_position))
589}
590
591fn span_for_byte(xml: &str, byte_position: usize) -> Span {
592 let mut line = 1;
593 let mut column = 1;
594 for (index, ch) in xml.char_indices() {
595 if index >= byte_position {
596 break;
597 }
598 if ch == '\n' {
599 line += 1;
600 column = 1;
601 } else {
602 column += 1;
603 }
604 }
605 Span::new(line, column)
606}
607
608#[cfg(test)]
609mod tests {
610 use std::io::Cursor;
611
612 use super::*;
613 use crate::core::{Attribute, NamespaceDeclaration, NodeKind};
614 use crate::writer::to_string_compact;
615
616 #[test]
617 fn parser_parse_str_reads_simple_xml() -> XmlResult<()> {
618 let document = parse_str("<Root><Child>value</Child></Root>")?;
619 let root = document.root().expect("root");
620 let [child] = document.children(root)? else {
621 panic!("expected one child");
622 };
623 let [text] = document.children(*child)? else {
624 panic!("expected one text child");
625 };
626
627 assert!(matches!(document.node(*text)?.kind(), NodeKind::Text(value) if value == "value"));
628 Ok(())
629 }
630
631 #[test]
632 fn parser_parse_reader_reads_xml() -> XmlResult<()> {
633 let document = parse_reader(Cursor::new("<Root><Child/></Root>"))?;
634
635 assert_eq!(to_string_compact(&document)?, "<Root><Child/></Root>");
636 Ok(())
637 }
638
639 #[test]
640 fn parser_whitespace_around_root_is_allowed() -> XmlResult<()> {
641 let document = parse_str(" \n\t<Root/> \n")?;
642
643 assert_eq!(to_string_compact(&document)?, "<Root/>");
644 Ok(())
645 }
646
647 #[test]
648 fn parser_xml_declaration_and_boundary_misc_are_allowed() -> XmlResult<()> {
649 let document = parse_str(
650 "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!-- before -->\n<?before ok?>\n<Root/>\n<!-- after -->\n<?after ok?>",
651 )?;
652
653 assert_eq!(to_string_compact(&document)?, "<Root/>");
654 Ok(())
655 }
656
657 #[test]
658 fn parser_processing_instruction_roundtrips_without_accumulating_separator_space(
659 ) -> XmlResult<()> {
660 let document = parse_str("<Root><?format keep?></Root>")?;
661
662 assert_eq!(
663 to_string_compact(&document)?,
664 "<Root><?format keep?></Root>"
665 );
666 Ok(())
667 }
668
669 #[test]
670 fn parser_rejects_non_whitespace_text_outside_root() {
671 let before = parse_str("text<Root/>").expect_err("text before root must fail");
672 let after = parse_str("<Root/>text").expect_err("text after root must fail");
673
674 assert_eq!(before.kind(), &ErrorKind::Parse);
675 assert!(before.message().contains("outside the document root"));
676 assert_eq!(after.kind(), &ErrorKind::Parse);
677 assert!(after.message().contains("outside the document root"));
678 }
679
680 #[test]
681 fn parser_empty_document_requires_root() {
682 let empty = parse_str("").expect_err("empty document must fail");
683 let whitespace = parse_str(" \n\t ").expect_err("whitespace-only document must fail");
684 let comment =
685 parse_str("<!-- only comment -->").expect_err("comment-only document must fail");
686 let pi = parse_str("<?xml-stylesheet href=\"style.xsl\"?>")
687 .expect_err("PI-only document must fail");
688
689 for error in [empty, whitespace, comment, pi] {
690 assert_eq!(error.kind(), &ErrorKind::Parse);
691 assert!(error.message().contains("root element"));
692 }
693 }
694
695 #[test]
696 fn parser_namespaces_preserves_qnames_and_attributes() -> XmlResult<()> {
697 let document = parse_str(
698 r#"<doc:Root xmlns="urn:default" xmlns:doc="urn:doc" doc:id="A1"><Child plain="yes"/></doc:Root>"#,
699 )?;
700 let root = document.root().expect("root");
701 let root_node = document.node(root)?;
702 let NodeKind::Element(root_element) = root_node.kind() else {
703 panic!("expected root element");
704 };
705
706 assert_eq!(
707 root_element.name().prefix().map(|prefix| prefix.as_str()),
708 Some("doc")
709 );
710 assert_eq!(
711 root_element.name().namespace_uri().map(|uri| uri.as_str()),
712 Some("urn:doc")
713 );
714 assert_eq!(root_element.namespace_declarations().len(), 2);
715 assert_eq!(root_element.attributes()[0].name().lexical_name(), "doc:id");
716
717 let child = document.children(root)?[0];
718 let NodeKind::Element(child_element) = document.node(child)?.kind() else {
719 panic!("expected child element");
720 };
721 assert_eq!(
722 child_element.name().namespace_uri().map(|uri| uri.as_str()),
723 Some("urn:default")
724 );
725 assert_eq!(child_element.attributes()[0].name().namespace_uri(), None);
726 Ok(())
727 }
728
729 #[test]
730 fn parser_namespace_reserved_xml_prefix_is_implicit() -> XmlResult<()> {
731 let document = parse_str(r#"<Root xml:lang="en" xml:space="preserve"/>"#)?;
732 let root = document.root().expect("root");
733 let NodeKind::Element(element) = document.node(root)?.kind() else {
734 panic!("expected root element");
735 };
736
737 assert_eq!(element.attributes().len(), 2);
738 assert_eq!(element.attributes()[0].name().lexical_name(), "xml:lang");
739 assert_eq!(
740 element.attributes()[0]
741 .name()
742 .namespace_uri()
743 .map(|uri| uri.as_str()),
744 Some(XML_NAMESPACE_URI)
745 );
746 assert_eq!(element.attributes()[1].name().lexical_name(), "xml:space");
747 Ok(())
748 }
749
750 #[test]
751 fn parser_namespace_rejects_reserved_declaration_misuse() {
752 let cases = [
753 r#"<Root xmlns:xml="urn:wrong"/>"#,
754 r#"<Root xmlns:doc="http://www.w3.org/XML/1998/namespace"/>"#,
755 r#"<Root xmlns:xmlns="urn:any"/>"#,
756 r#"<Root xmlns="http://www.w3.org/2000/xmlns/"/>"#,
757 ];
758
759 for xml in cases {
760 let error = parse_str(xml).expect_err("reserved namespace misuse must fail");
761 assert_eq!(error.kind(), &ErrorKind::InvalidNamespace, "{xml}");
762 }
763 }
764
765 #[test]
766 fn parser_namespace_rejects_duplicate_attributes_by_expanded_name() {
767 let direct = parse_str(r#"<Root id="1" id="2"/>"#)
768 .expect_err("duplicate unqualified attributes must fail");
769 let expanded = parse_str(r#"<Root xmlns:a="urn:x" xmlns:b="urn:x" a:id="1" b:id="2"/>"#)
770 .expect_err("duplicate expanded attributes must fail");
771
772 assert_eq!(direct.kind(), &ErrorKind::Parse);
773 assert!(direct.message().contains("duplicate"));
774 assert_eq!(expanded.kind(), &ErrorKind::Parse);
775 assert!(expanded.message().contains("duplicate"));
776 }
777
778 #[test]
779 fn parser_namespace_default_does_not_apply_to_attributes() -> XmlResult<()> {
780 let document = parse_str(r#"<Root xmlns="urn:root" id="A1"/>"#)?;
781 let root = document.root().expect("root");
782 let NodeKind::Element(element) = document.node(root)?.kind() else {
783 panic!("expected root element");
784 };
785
786 assert_eq!(
787 element.name().namespace_uri().map(|uri| uri.as_str()),
788 Some("urn:root")
789 );
790 assert_eq!(element.attributes()[0].name().namespace_uri(), None);
791 Ok(())
792 }
793
794 #[test]
795 fn parser_comments_can_be_preserved_or_discarded() -> XmlResult<()> {
796 let preserved = parse_str("<Root><!-- note --><Child/></Root>")?;
797 assert!(matches!(
798 preserved.node(preserved.children(preserved.root().unwrap())?[0])?.kind(),
799 NodeKind::Comment(comment) if comment == " note "
800 ));
801
802 let discarded = parse_str_with_config(
803 "<Root><!-- note --><Child/></Root>",
804 &ParserConfig::default().with_preserve_comments(false),
805 )?;
806 assert_eq!(discarded.children(discarded.root().unwrap())?.len(), 1);
807 Ok(())
808 }
809
810 #[test]
811 fn parser_preserves_cdata() -> XmlResult<()> {
812 let document = parse_str("<Root><![CDATA[a < b]]></Root>")?;
813 let root = document.root().expect("root");
814 let child = document.children(root)?[0];
815
816 assert!(matches!(document.node(child)?.kind(), NodeKind::CData(value) if value == "a < b"));
817 Ok(())
818 }
819
820 #[test]
821 fn parser_security_rejects_external_entities_by_default() {
822 let error = parse_str(r#"<!DOCTYPE Root SYSTEM "file:///tmp/x"><Root/>"#)
823 .expect_err("doctype must be blocked");
824
825 assert_eq!(error.kind(), &ErrorKind::Parse);
826 assert!(error.message().contains("DOCTYPE"));
827 }
828
829 #[test]
830 fn parser_entity_predefined_references_are_resolved() -> XmlResult<()> {
831 let document = parse_str("<Root><&>'"</Root>")?;
832
833 assert_eq!(
834 to_string_compact(&document)?,
835 "<Root><&>'\"</Root>"
836 );
837 Ok(())
838 }
839
840 #[test]
841 fn parser_entity_unknown_reference_is_rejected_by_default() {
842 let error = parse_str("<Root>&xxe;</Root>").expect_err("unknown entity must fail");
843
844 assert_eq!(error.kind(), &ErrorKind::Parse);
845 assert!(error.message().contains("disabled by default"));
846 assert!(error.span().is_some());
847 }
848
849 #[test]
850 fn parser_entity_permissive_policy_still_rejects_unimplemented_resolution() {
851 let security = ParserSecurityConfig::default()
852 .with_entity_policy(EntityPolicy::secure().with_external_entities(true));
853 let config = ParserConfig::default().with_security(security);
854
855 let error = parse_str_with_config("<Root>&xxe;</Root>", &config)
856 .expect_err("unknown entity must fail without panic");
857
858 assert_eq!(error.kind(), &ErrorKind::Parse);
859 assert!(error.message().contains("not implemented"));
860 assert!(error.span().is_some());
861 }
862
863 #[test]
864 fn parser_respects_max_depth() {
865 let config = ParserConfig::default().with_max_depth(1);
866 let error =
867 parse_str_with_config("<Root><Child/></Root>", &config).expect_err("depth must fail");
868
869 assert_eq!(error.kind(), &ErrorKind::Parse);
870 assert!(error.message().contains("depth"));
871 }
872
873 #[test]
874 fn parser_consumes_shared_security_config() {
875 let security = ParserSecurityConfig::default()
876 .with_limits(SecurityLimits::default().with_max_document_bytes(6));
877 let config = ParserConfig::default().with_security(security);
878
879 let error = parse_str_with_config("<Root/>", &config).expect_err("size must fail");
880
881 assert_eq!(error.kind(), &ErrorKind::Parse);
882 assert!(error.message().contains("maximum size"));
883 }
884
885 #[test]
886 fn parser_reports_span_for_malformed_xml() {
887 let error = parse_str("<Root>\n <Child></Root>").expect_err("malformed XML must fail");
888
889 assert_eq!(error.kind(), &ErrorKind::Parse);
890 assert!(error.span().is_some());
891 }
892
893 #[test]
894 fn parser_roundtrip_reads_writer_output() -> XmlResult<()> {
895 let mut document = Document::new();
896 let root = document.add_root_element(QName::qualified("doc", "Root", "urn:doc")?)?;
897 document
898 .add_namespace_declaration(root, NamespaceDeclaration::prefixed("doc", "urn:doc")?)?;
899 document.add_attribute(root, Attribute::new(QName::new("id")?, "A1"))?;
900 document.add_text(root, "value")?;
901
902 let xml = to_string_compact(&document)?;
903 let parsed = parse_str(&xml)?;
904
905 assert_eq!(to_string_compact(&parsed)?, xml);
906 Ok(())
907 }
908}