1use std::collections::hash_map::DefaultHasher;
8use std::collections::HashMap;
9use std::hash::{Hash, Hasher};
10use std::io::BufRead;
11
12use bumpalo::Bump;
13use quick_xml::events::Event;
14use quick_xml::Reader;
15
16use crate::namespace::table::XML_NAMESPACE;
17use crate::namespace::NameTable;
18use crate::parser::location::SourceSpan;
19use crate::schema::SchemaSet;
20
21use super::buffer::BufferDocument;
22use super::error::BufferDocumentError;
23use super::{
24 BindingRemapTable, BufferDocumentOptions, DocumentKind, ElementIndex, NamespaceNode,
25 NamespacePageFactory, Node, NodePages, NodeSchemaBinding, NodeSourceSpans, NodeType, NsRef,
26 QNameAtom, QNameTable, StringStore, NULL,
27};
28
29#[derive(Clone, Copy, Debug)]
33struct ElementBuildState {
34 #[allow(dead_code)] node_ref: u32,
36 #[allow(dead_code)] has_attrs: bool,
38}
39
40fn hash_name(name: &str) -> u32 {
44 let mut hasher = DefaultHasher::new();
45 name.hash(&mut hasher);
46 hasher.finish() as u32
47}
48
49pub struct BufferDocumentBuilder<'a> {
53 doc: BufferDocument<'a>,
54 parent: u32,
55 last_sibling: u32,
56 last_attr: u32,
57 namespace_stack: Vec<(u32, NsRef)>,
58 text_buffer: String,
59 text_type: Option<NodeType>,
60 current_namespace: NsRef,
61 element_stack: Vec<ElementBuildState>,
62 pending_spans: Vec<(u32, usize)>,
63 #[allow(dead_code)]
64 options: BufferDocumentOptions,
65}
66
67impl<'a> BufferDocumentBuilder<'a> {
68 pub fn new(
74 arena: &'a Bump,
75 names: &'a NameTable,
76 schema_set: Option<&'a SchemaSet>,
77 options: BufferDocumentOptions,
78 ) -> Result<Self, BufferDocumentError> {
79 let effective_names = schema_set
80 .map(|ss| &ss.name_table as &'a NameTable)
81 .unwrap_or(names);
82
83 let mut namespace_pages = NamespacePageFactory::new(arena);
85 let xml_ns_ref = namespace_pages
86 .alloc()
87 .ok_or(BufferDocumentError::Overflow)?;
88 let xml_prefix_id = effective_names.add("xml");
89 let xml_uri_id = effective_names.add(XML_NAMESPACE);
90 namespace_pages.set(
91 xml_ns_ref,
92 NamespaceNode::new(xml_prefix_id, xml_uri_id, NsRef::NULL),
93 );
94
95 let mut nodes = NodePages::new(arena);
97 let root_ref = nodes.alloc()?;
98 let mut root_node = Node::default();
99 root_node.set_node_type(NodeType::Root);
100 root_node.parent = NULL;
101 root_node.next_sibling = NULL;
102 nodes.set(root_ref, root_node);
103
104 let doc = BufferDocument {
105 arena,
106 kind: options.kind,
107 names: effective_names,
108 nodes,
109 qname_table: QNameTable::new(),
110 strings: StringStore::new(arena),
111 binding_remap: BindingRemapTable::new(),
112 root: root_ref,
113 options,
114 namespace_pages,
115 xml_namespace: xml_ns_ref,
116 element_namespaces: HashMap::new(),
117 element_index: ElementIndex::new(),
118 source_spans: NodeSourceSpans::new(),
119 id_elements: HashMap::new(),
120 schema_set,
121 fragment_base_uri: None,
122 };
123
124 Ok(Self {
125 doc,
126 parent: root_ref,
127 last_sibling: NULL,
128 last_attr: NULL,
129 namespace_stack: Vec::new(),
130 text_buffer: String::new(),
131 text_type: None,
132 current_namespace: NsRef::NULL,
133 element_stack: Vec::new(),
134 pending_spans: Vec::new(),
135 options,
136 })
137 }
138
139 pub fn start_element(
146 &mut self,
147 local_name: &str,
148 ns_uri: &str,
149 prefix: &str,
150 ns_declarations: &[(&str, &str)],
151 ) -> Result<u32, BufferDocumentError> {
152 self.flush_text()?;
153
154 let prev_namespace = self.current_namespace;
156
157 for &(ns_prefix, ns_uri_decl) in ns_declarations {
159 self.handle_namespace_decl(ns_prefix, ns_uri_decl)?;
160 }
161
162 let local_id = self.doc.names.add(local_name);
164 let uri_id = self.doc.names.add(ns_uri);
165 let prefix_id = self.doc.names.add(prefix);
166 let local_hash = hash_name(local_name);
167
168 let qualified_name_idx = if prefix.is_empty() {
169 self.doc.strings.store(local_name)
170 } else {
171 self.doc.strings.store(&format!("{prefix}:{local_name}"))
172 };
173 let qname = QNameAtom {
174 local_name: local_id,
175 namespace_uri: uri_id,
176 prefix: prefix_id,
177 local_name_hash: local_hash,
178 qualified_name_idx,
179 };
180 let qname_idx = self.doc.qname_table.atomize(qname);
181
182 let elem_ref = self.doc.nodes.alloc()?;
184 let mut elem_node = Node::default();
185 elem_node.set_node_type(NodeType::Element);
186 elem_node.parent = self.parent;
187 elem_node.next_sibling = NULL;
188 elem_node.value = qname_idx;
189 self.doc.nodes.set(elem_ref, elem_node);
190
191 if self.last_sibling != NULL {
193 self.doc.nodes.update(self.last_sibling, |n| {
194 n.next_sibling = elem_ref;
195 });
196 }
197
198 self.doc
200 .nodes
201 .update(self.parent, |n| n.set_flag(Node::HAS_CHILDREN));
202
203 if self.doc.kind == DocumentKind::Full {
205 self.doc.element_index.add(local_hash, elem_ref);
206 }
207
208 if self.current_namespace != prev_namespace {
210 self.namespace_stack.push((elem_ref, prev_namespace));
211 self.doc.nodes.update(elem_ref, |n| {
212 n.set_flag(Node::HAS_NMSP_DECLS);
213 });
214 self.doc
215 .element_namespaces
216 .insert(elem_ref, self.current_namespace);
217 }
218
219 self.element_stack.push(ElementBuildState {
221 node_ref: elem_ref,
222 has_attrs: false,
223 });
224 self.parent = elem_ref;
225 self.last_sibling = NULL;
226 self.last_attr = NULL;
227
228 Ok(elem_ref)
229 }
230
231 pub fn attribute(
233 &mut self,
234 local_name: &str,
235 ns_uri: &str,
236 prefix: &str,
237 value: &str,
238 ) -> Result<u32, BufferDocumentError> {
239 let local_id = self.doc.names.add(local_name);
240 let uri_id = self.doc.names.add(ns_uri);
241 let prefix_id = self.doc.names.add(prefix);
242
243 let qualified_name_idx = if prefix.is_empty() {
244 self.doc.strings.store(local_name)
245 } else {
246 self.doc.strings.store(&format!("{prefix}:{local_name}"))
247 };
248 let qname = QNameAtom {
249 local_name: local_id,
250 namespace_uri: uri_id,
251 prefix: prefix_id,
252 local_name_hash: 0, qualified_name_idx,
254 };
255 let qname_idx = self.doc.qname_table.atomize(qname);
256
257 let attr_ref = self.doc.nodes.alloc()?;
259 let mut attr_node = Node::default();
260 attr_node.set_node_type(NodeType::Attribute);
261 attr_node.parent = self.parent;
262 attr_node.next_sibling = NULL;
263 attr_node.value = qname_idx;
264 self.doc.nodes.set(attr_ref, attr_node);
265
266 let val_idx = self.doc.strings.store(value);
268 let cv_ref = self.doc.nodes.alloc()?;
269 let mut cv_node = Node::default();
270 cv_node.set_node_type(NodeType::ChildValue);
271 cv_node.parent = attr_ref; cv_node.next_sibling = NULL;
273 cv_node.value = val_idx;
274 self.doc.nodes.set(cv_ref, cv_node);
275
276 if self.last_attr != NULL {
278 self.doc.nodes.update(self.last_attr, |n| {
279 n.next_sibling = attr_ref;
280 });
281 }
282 self.last_attr = attr_ref;
283
284 self.doc
286 .nodes
287 .update(self.parent, |n| n.set_flag(Node::HAS_ATTRIBUTE));
288
289 if let Some(state) = self.element_stack.last_mut() {
291 state.has_attrs = true;
292 }
293
294 Ok(attr_ref)
295 }
296
297 pub fn end_of_attributes(&mut self) {
299 self.last_sibling = NULL;
300 self.last_attr = NULL;
301 }
302
303 pub fn text(&mut self, value: &str) {
305 self.text_buffer.push_str(value);
306 if self.text_type.is_none() {
307 self.text_type = Some(NodeType::Text);
308 }
309 }
310
311 pub fn comment(&mut self, value: &str) -> Result<(), BufferDocumentError> {
313 self.flush_text()?;
314 self.add_content_node(NodeType::Comment, value)?;
315 Ok(())
316 }
317
318 pub fn processing_instruction(
320 &mut self,
321 target: &str,
322 data: &str,
323 ) -> Result<(), BufferDocumentError> {
324 self.flush_text()?;
325
326 let target_idx = self.doc.strings.store(target);
327 let pi_ref = self.doc.nodes.alloc()?;
328 let mut pi_node = Node::default();
329 pi_node.set_node_type(NodeType::ProcessingInstruction);
330 pi_node.parent = self.parent;
331 pi_node.next_sibling = NULL;
332 pi_node.value = target_idx;
333 self.doc.nodes.set(pi_ref, pi_node);
334
335 let data_idx = self.doc.strings.store(data);
336 let cv_ref = self.doc.nodes.alloc()?;
337 let mut cv_node = Node::default();
338 cv_node.set_node_type(NodeType::ChildValue);
339 cv_node.parent = pi_ref;
340 cv_node.next_sibling = NULL;
341 cv_node.value = data_idx;
342 self.doc.nodes.set(cv_ref, cv_node);
343
344 if self.last_sibling != NULL {
346 self.doc.nodes.update(self.last_sibling, |n| {
347 n.next_sibling = pi_ref;
348 });
349 }
350 self.last_sibling = pi_ref;
351
352 self.doc
354 .nodes
355 .update(self.parent, |n| n.set_flag(Node::HAS_CHILDREN));
356
357 Ok(())
358 }
359
360 pub fn end_element(&mut self) -> Result<(), BufferDocumentError> {
362 self.flush_text()?;
363
364 let _state = self
365 .element_stack
366 .pop()
367 .ok_or(BufferDocumentError::UnmatchedEndElement)?;
368
369 let elem_node = self.doc.nodes.get(self.parent);
371 if elem_node.has_flag(Node::HAS_NMSP_DECLS) {
372 if let Some((_elem_ref, prev_ns)) = self.namespace_stack.pop() {
373 self.current_namespace = prev_ns;
374 }
375 }
376
377 self.last_sibling = self.parent;
378 self.parent = elem_node.parent;
379 self.last_attr = NULL;
380
381 Ok(())
382 }
383
384 pub fn finalize(mut self) -> Result<BufferDocument<'a>, BufferDocumentError> {
386 self.flush_text()?;
387
388 let nul_ref = self.doc.nodes.alloc()?;
390 let nul_node = Node::default(); self.doc.nodes.set(nul_ref, nul_node);
392
393 Ok(self.doc)
394 }
395
396 pub fn set_node_binding(
400 &mut self,
401 node_ref: u32,
402 binding: NodeSchemaBinding,
403 ) -> Result<bool, BufferDocumentError> {
404 let idx = self.doc.binding_remap.register(binding)?;
405 let is_complex = matches!(binding.type_key, crate::ids::TypeKey::Complex(_));
406 self.doc.nodes.update(node_ref, |n| {
407 n.set_binding_index(idx);
408 if is_complex {
409 n.set_flag(Node::IS_COMPLEX_TYPE);
410 } else {
411 n.clear_flag(Node::IS_COMPLEX_TYPE);
412 }
413 });
414 Ok(is_complex)
415 }
416
417 pub fn set_nil(&mut self, node_ref: u32) {
419 self.doc.nodes.update(node_ref, |n| {
420 n.set_flag(Node::IS_NIL);
421 });
422 }
423
424 pub fn register_xml_id(&mut self, id: &str, elem_ref: u32) -> Result<(), BufferDocumentError> {
429 if self.doc.kind != DocumentKind::Full {
430 return Ok(());
431 }
432 let id_val: Box<str> = id.into();
433 if self.doc.id_elements.contains_key(&id_val) {
434 return Err(BufferDocumentError::DuplicateId(id_val.into_string()));
435 }
436 self.doc.id_elements.insert(id_val, elem_ref);
437 Ok(())
438 }
439
440 #[inline]
442 pub fn track_source_locations(&self) -> bool {
443 self.options.track_source_locations
444 }
445
446 pub fn set_source_span(&mut self, node_ref: u32, span: SourceSpan) {
448 self.doc.source_spans.set(node_ref, span);
449 }
450
451 fn flush_text(&mut self) -> Result<(), BufferDocumentError> {
455 if let Some(nt) = self.text_type.take() {
456 let value = std::mem::take(&mut self.text_buffer);
457 if !value.is_empty() {
458 self.add_content_node(nt, &value)?;
459 }
460 }
461 Ok(())
462 }
463
464 fn add_content_node(
466 &mut self,
467 node_type: NodeType,
468 value: &str,
469 ) -> Result<u32, BufferDocumentError> {
470 let str_idx = self.doc.strings.store(value);
471 let node_ref = self.doc.nodes.alloc()?;
472 let mut node = Node::default();
473 node.set_node_type(node_type);
474 node.parent = self.parent;
475 node.next_sibling = NULL;
476 node.value = str_idx;
477 self.doc.nodes.set(node_ref, node);
478
479 if self.last_sibling != NULL {
480 self.doc.nodes.update(self.last_sibling, |n| {
481 n.next_sibling = node_ref;
482 });
483 }
484 self.last_sibling = node_ref;
485
486 self.doc
487 .nodes
488 .update(self.parent, |n| n.set_flag(Node::HAS_CHILDREN));
489
490 Ok(node_ref)
491 }
492
493 fn handle_namespace_decl(
495 &mut self,
496 prefix: &str,
497 uri: &str,
498 ) -> Result<(), BufferDocumentError> {
499 let prefix_id = self.doc.names.add(prefix);
500 let uri_id = self.doc.names.add(uri);
501
502 let ns_ref = self
503 .doc
504 .namespace_pages
505 .alloc()
506 .ok_or(BufferDocumentError::Overflow)?;
507 self.doc.namespace_pages.set(
508 ns_ref,
509 NamespaceNode::new(prefix_id, uri_id, self.current_namespace),
510 );
511 self.current_namespace = ns_ref;
512
513 Ok(())
514 }
515
516 pub fn build<R: BufRead>(
520 mut self,
521 reader: R,
522 ) -> Result<BufferDocument<'a>, BufferDocumentError> {
523 let mut xml_reader = Reader::from_reader(reader);
524 xml_reader.trim_text(false);
525
526 let mut prefix_map: HashMap<Box<[u8]>, Vec<String>> = HashMap::new();
528 prefix_map
529 .entry(b"xml".to_vec().into_boxed_slice())
530 .or_default()
531 .push(XML_NAMESPACE.to_string());
532 prefix_map
533 .entry(b"".to_vec().into_boxed_slice())
534 .or_default()
535 .push(String::new());
536
537 let mut scope_decls: Vec<Vec<Box<[u8]>>> = Vec::new();
539
540 let track = self.options.track_source_locations;
541 let mut buf = Vec::with_capacity(1024);
542
543 loop {
544 let event_start = if track {
545 xml_reader.buffer_position()
546 } else {
547 0
548 };
549
550 match xml_reader.read_event_into(&mut buf) {
551 Ok(Event::Start(ref e)) => {
552 let elem_ref =
553 self.handle_start_or_empty(e, false, &mut prefix_map, &mut scope_decls)?;
554 if track {
555 self.pending_spans.push((elem_ref, event_start));
556 }
557 }
558 Ok(Event::Empty(ref e)) => {
559 let elem_ref =
560 self.handle_start_or_empty(e, true, &mut prefix_map, &mut scope_decls)?;
561 if track {
562 self.doc.source_spans.set(
563 elem_ref,
564 SourceSpan::new(event_start, xml_reader.buffer_position()),
565 );
566 }
567 }
568 Ok(Event::End(_)) => {
569 if track {
570 if let Some((elem_ref, start)) = self.pending_spans.pop() {
571 self.doc.source_spans.set(
572 elem_ref,
573 SourceSpan::new(start, xml_reader.buffer_position()),
574 );
575 }
576 }
577 if let Some(decls) = scope_decls.pop() {
579 for prefix_key in &decls {
580 if let Some(stack) = prefix_map.get_mut(prefix_key.as_ref()) {
581 stack.pop();
582 }
583 }
584 }
585 self.end_element()?;
586 }
587 Ok(Event::Text(ref e)) => {
588 if !self.element_stack.is_empty() {
589 let text = e.unescape()?;
590 self.text(&text);
591 }
592 }
593 Ok(Event::CData(ref e)) => {
594 if !self.element_stack.is_empty() {
595 let text = std::str::from_utf8(e)?;
596 self.text(text);
597 }
598 }
599 Ok(Event::Comment(ref e)) => {
600 let text = std::str::from_utf8(e)?;
601 self.comment(text)?;
602 }
603 Ok(Event::PI(ref e)) => {
604 let raw = std::str::from_utf8(e)?;
605 let (target, data) = parse_pi_content(raw);
606 self.processing_instruction(target, data)?;
607 }
608 Ok(Event::Decl(_) | Event::DocType(_)) => {}
609 Ok(Event::Eof) => break,
610 Err(e) => return Err(e.into()),
611 }
612 buf.clear();
613 }
614
615 self.finalize()
616 }
617
618 fn handle_start_or_empty(
620 &mut self,
621 e: &quick_xml::events::BytesStart<'_>,
622 is_empty: bool,
623 prefix_map: &mut HashMap<Box<[u8]>, Vec<String>>,
624 scope_decls: &mut Vec<Vec<Box<[u8]>>>,
625 ) -> Result<u32, BufferDocumentError> {
626 let mut local_decls: Vec<Box<[u8]>> = Vec::new();
627 let mut ns_decls_str: Vec<(String, String)> = Vec::new();
628
629 for attr_result in e.attributes() {
631 let attr = attr_result?;
632 let key = attr.key.as_ref();
633
634 if key == b"xmlns" {
635 let value = attr.unescape_value()?;
637 let uri = value.to_string();
638 let prefix_key: Box<[u8]> = b"".to_vec().into_boxed_slice();
639 prefix_map
640 .entry(prefix_key.clone())
641 .or_default()
642 .push(uri.clone());
643 local_decls.push(prefix_key);
644 ns_decls_str.push((String::new(), uri));
645 } else if key.starts_with(b"xmlns:") {
646 let prefix_bytes = &key[6..];
647 let value = attr.unescape_value()?;
648 let uri = value.to_string();
649 let prefix_key: Box<[u8]> = prefix_bytes.to_vec().into_boxed_slice();
650 prefix_map
651 .entry(prefix_key.clone())
652 .or_default()
653 .push(uri.clone());
654 local_decls.push(prefix_key);
655 let prefix_str =
656 std::str::from_utf8(prefix_bytes).map_err(BufferDocumentError::Utf8)?;
657 ns_decls_str.push((prefix_str.to_string(), uri));
658 }
659 }
660
661 scope_decls.push(local_decls);
662
663 let ns_decl_refs: Vec<(&str, &str)> = ns_decls_str
665 .iter()
666 .map(|(p, u)| (p.as_str(), u.as_str()))
667 .collect();
668
669 let full_name = e.name();
671 let full_name_bytes = full_name.as_ref();
672 let (elem_prefix_bytes, elem_local_bytes) = split_prefix_local(full_name_bytes);
673
674 let elem_local =
675 std::str::from_utf8(elem_local_bytes).map_err(BufferDocumentError::Utf8)?;
676 let elem_prefix_str =
677 std::str::from_utf8(elem_prefix_bytes).map_err(BufferDocumentError::Utf8)?;
678
679 let elem_ns_uri = match prefix_map.get(elem_prefix_bytes) {
681 Some(stack) if !stack.is_empty() => stack.last().unwrap().as_str().to_string(),
682 _ if elem_prefix_bytes.is_empty() => String::new(),
683 _ => {
684 return Err(BufferDocumentError::UnboundPrefix(
685 elem_prefix_str.to_string(),
686 ))
687 }
688 };
689
690 let elem_ref =
691 self.start_element(elem_local, &elem_ns_uri, elem_prefix_str, &ns_decl_refs)?;
692
693 for attr_result in e.attributes() {
695 let attr = attr_result?;
696 let key = attr.key.as_ref();
697
698 if key == b"xmlns" || key.starts_with(b"xmlns:") {
700 continue;
701 }
702
703 let (attr_prefix_bytes, attr_local_bytes) = split_prefix_local(key);
704 let attr_local =
705 std::str::from_utf8(attr_local_bytes).map_err(BufferDocumentError::Utf8)?;
706 let attr_prefix_str =
707 std::str::from_utf8(attr_prefix_bytes).map_err(BufferDocumentError::Utf8)?;
708
709 let attr_ns_uri = if attr_prefix_bytes.is_empty() {
711 String::new()
712 } else {
713 match prefix_map.get(attr_prefix_bytes) {
714 Some(stack) if !stack.is_empty() => stack.last().unwrap().as_str().to_string(),
715 _ => {
716 return Err(BufferDocumentError::UnboundPrefix(
717 attr_prefix_str.to_string(),
718 ))
719 }
720 }
721 };
722
723 let unescaped = attr.unescape_value()?;
724 self.attribute(attr_local, &attr_ns_uri, attr_prefix_str, &unescaped)?;
725
726 if self.doc.kind == DocumentKind::Full
728 && attr_local == "id"
729 && attr_ns_uri == XML_NAMESPACE
730 {
731 let id_val: Box<str> = unescaped.as_ref().into();
732 if self.doc.id_elements.contains_key(&id_val) {
733 return Err(BufferDocumentError::DuplicateId(id_val.into_string()));
734 }
735 self.doc.id_elements.insert(id_val, elem_ref);
736 }
737 }
738
739 self.end_of_attributes();
740
741 if is_empty {
742 if let Some(decls) = scope_decls.pop() {
744 for prefix_key in &decls {
745 if let Some(stack) = prefix_map.get_mut(prefix_key.as_ref()) {
746 stack.pop();
747 }
748 }
749 }
750 self.end_element()?;
751 }
752
753 Ok(elem_ref)
754 }
755}
756
757pub(crate) fn split_prefix_local(name: &[u8]) -> (&[u8], &[u8]) {
762 match name.iter().position(|&b| b == b':') {
763 Some(pos) => (&name[..pos], &name[pos + 1..]),
764 None => (b"", name),
765 }
766}
767
768pub(crate) fn parse_pi_content(raw: &str) -> (&str, &str) {
770 let trimmed = raw.trim();
771 match trimmed.find(|c: char| c.is_ascii_whitespace()) {
772 Some(pos) => (&trimmed[..pos], trimmed[pos..].trim_start()),
773 None => (trimmed, ""),
774 }
775}
776
777impl From<quick_xml::events::attributes::AttrError> for BufferDocumentError {
779 fn from(e: quick_xml::events::attributes::AttrError) -> Self {
780 BufferDocumentError::Parse(quick_xml::Error::from(e))
781 }
782}
783
784#[cfg(test)]
787mod tests {
788 use super::*;
789 use crate::ids::TypeKey;
790 use crate::navigator::DomNavigator;
791
792 fn make_builder<'a>(arena: &'a Bump, names: &'a NameTable) -> BufferDocumentBuilder<'a> {
793 BufferDocumentBuilder::new(arena, names, None, BufferDocumentOptions::default()).unwrap()
794 }
795
796 fn make_builder_full<'a>(arena: &'a Bump, names: &'a NameTable) -> BufferDocumentBuilder<'a> {
797 BufferDocumentBuilder::new(arena, names, None, BufferDocumentOptions::full()).unwrap()
798 }
799
800 #[test]
803 fn test_empty_document() {
804 let arena = Bump::new();
805 let names = NameTable::new();
806 let builder = make_builder(&arena, &names);
807 let doc = builder.finalize().unwrap();
808
809 assert_eq!(doc.nodes.len(), 2);
811 assert_eq!(doc.nodes.get(0).node_type(), NodeType::Root);
812 assert_eq!(doc.nodes.get(1).node_type(), NodeType::Nul);
813 }
814
815 #[test]
816 fn test_single_element() {
817 let arena = Bump::new();
818 let names = NameTable::new();
819 let mut builder = make_builder(&arena, &names);
820
821 let elem = builder.start_element("root", "", "", &[]).unwrap();
822 builder.end_of_attributes();
823 builder.end_element().unwrap();
824
825 let doc = builder.finalize().unwrap();
826
827 let elem_node = doc.nodes.get(elem);
828 assert_eq!(elem_node.node_type(), NodeType::Element);
829 assert_eq!(elem_node.parent, 0); assert!(doc.nodes.get(0).has_flag(Node::HAS_CHILDREN));
831 }
832
833 #[test]
834 fn test_element_with_text() {
835 let arena = Bump::new();
836 let names = NameTable::new();
837 let mut builder = make_builder(&arena, &names);
838
839 builder.start_element("root", "", "", &[]).unwrap();
840 builder.end_of_attributes();
841 builder.text("hello world");
842 builder.end_element().unwrap();
843
844 let doc = builder.finalize().unwrap();
845
846 assert_eq!(doc.nodes.len(), 4);
848 let text_node = doc.nodes.get(2);
849 assert_eq!(text_node.node_type(), NodeType::Text);
850 assert_eq!(doc.strings.get(text_node.value), "hello world");
851 }
852
853 #[test]
854 fn test_text_coalescing() {
855 let arena = Bump::new();
856 let names = NameTable::new();
857 let mut builder = make_builder(&arena, &names);
858
859 builder.start_element("root", "", "", &[]).unwrap();
860 builder.end_of_attributes();
861 builder.text("hello ");
862 builder.text("world");
863 builder.end_element().unwrap();
864
865 let doc = builder.finalize().unwrap();
866
867 assert_eq!(doc.nodes.len(), 4);
869 let text_node = doc.nodes.get(2);
870 assert_eq!(text_node.node_type(), NodeType::Text);
871 assert_eq!(doc.strings.get(text_node.value), "hello world");
872 }
873
874 #[test]
875 fn test_element_with_attributes() {
876 let arena = Bump::new();
877 let names = NameTable::new();
878 let mut builder = make_builder(&arena, &names);
879
880 builder.start_element("root", "", "", &[]).unwrap();
881 let attr1 = builder.attribute("id", "", "", "123").unwrap();
882 let attr2 = builder.attribute("name", "", "", "test").unwrap();
883 builder.end_of_attributes();
884 builder.end_element().unwrap();
885
886 let doc = builder.finalize().unwrap();
887
888 assert_eq!(doc.nodes.len(), 7);
890
891 let a1 = doc.nodes.get(attr1);
892 assert_eq!(a1.node_type(), NodeType::Attribute);
893 assert_eq!(a1.parent, 1); assert_eq!(a1.next_sibling, attr2); let cv1 = doc.nodes.get(attr1 + 1);
897 assert_eq!(cv1.node_type(), NodeType::ChildValue);
898 assert_eq!(cv1.parent, attr1); assert_eq!(doc.strings.get(cv1.value), "123");
900
901 let a2 = doc.nodes.get(attr2);
902 assert_eq!(a2.node_type(), NodeType::Attribute);
903 assert_eq!(a2.next_sibling, NULL);
904
905 assert!(doc.nodes.get(1).has_flag(Node::HAS_ATTRIBUTE));
906 }
907
908 #[test]
909 fn test_nested_elements() {
910 let arena = Bump::new();
911 let names = NameTable::new();
912 let mut builder = make_builder(&arena, &names);
913
914 builder.start_element("a", "", "", &[]).unwrap();
915 builder.end_of_attributes();
916
917 let b = builder.start_element("b", "", "", &[]).unwrap();
918 builder.end_of_attributes();
919 builder.end_element().unwrap();
920
921 builder.end_element().unwrap();
922
923 let doc = builder.finalize().unwrap();
924
925 let b_node = doc.nodes.get(b);
926 assert_eq!(b_node.parent, 1); }
928
929 #[test]
930 fn test_sibling_elements() {
931 let arena = Bump::new();
932 let names = NameTable::new();
933 let mut builder = make_builder(&arena, &names);
934
935 builder.start_element("root", "", "", &[]).unwrap();
936 builder.end_of_attributes();
937
938 let a = builder.start_element("a", "", "", &[]).unwrap();
939 builder.end_of_attributes();
940 builder.end_element().unwrap();
941
942 let b = builder.start_element("b", "", "", &[]).unwrap();
943 builder.end_of_attributes();
944 builder.end_element().unwrap();
945
946 builder.end_element().unwrap();
947
948 let doc = builder.finalize().unwrap();
949
950 let a_node = doc.nodes.get(a);
951 assert_eq!(a_node.next_sibling, b);
952
953 let b_node = doc.nodes.get(b);
954 assert_eq!(b_node.next_sibling, NULL);
955 }
956
957 #[test]
958 fn test_comment_node() {
959 let arena = Bump::new();
960 let names = NameTable::new();
961 let mut builder = make_builder(&arena, &names);
962
963 builder.start_element("root", "", "", &[]).unwrap();
964 builder.end_of_attributes();
965 builder.comment("a comment").unwrap();
966 builder.end_element().unwrap();
967
968 let doc = builder.finalize().unwrap();
969
970 let comment = doc.nodes.get(2);
972 assert_eq!(comment.node_type(), NodeType::Comment);
973 assert_eq!(doc.strings.get(comment.value), "a comment");
974 }
975
976 #[test]
977 fn test_processing_instruction() {
978 let arena = Bump::new();
979 let names = NameTable::new();
980 let mut builder = make_builder(&arena, &names);
981
982 builder.start_element("root", "", "", &[]).unwrap();
983 builder.end_of_attributes();
984 builder
985 .processing_instruction("target", "data here")
986 .unwrap();
987 builder.end_element().unwrap();
988
989 let doc = builder.finalize().unwrap();
990
991 let pi = doc.nodes.get(2);
993 assert_eq!(pi.node_type(), NodeType::ProcessingInstruction);
994 assert_eq!(doc.strings.get(pi.value), "target");
995
996 let cv = doc.nodes.get(3);
997 assert_eq!(cv.node_type(), NodeType::ChildValue);
998 assert_eq!(cv.parent, 2); assert_eq!(doc.strings.get(cv.value), "data here");
1000 }
1001
1002 #[test]
1003 fn test_namespace_declarations() {
1004 let arena = Bump::new();
1005 let names = NameTable::new();
1006 let mut builder = make_builder(&arena, &names);
1007
1008 builder
1009 .start_element(
1010 "root",
1011 "http://example.com",
1012 "ex",
1013 &[("ex", "http://example.com")],
1014 )
1015 .unwrap();
1016 builder.end_of_attributes();
1017 builder.end_element().unwrap();
1018
1019 let doc = builder.finalize().unwrap();
1020
1021 let elem = doc.nodes.get(1);
1022 assert!(elem.has_flag(Node::HAS_NMSP_DECLS));
1023 assert!(doc.element_namespaces.contains_key(&1));
1024 }
1025
1026 #[test]
1027 fn test_namespace_scope_restore() {
1028 let arena = Bump::new();
1029 let names = NameTable::new();
1030 let mut builder = make_builder(&arena, &names);
1031
1032 builder
1034 .start_element(
1035 "outer",
1036 "http://outer.com",
1037 "o",
1038 &[("o", "http://outer.com")],
1039 )
1040 .unwrap();
1041 builder.end_of_attributes();
1042
1043 builder
1045 .start_element(
1046 "inner",
1047 "http://inner.com",
1048 "o",
1049 &[("o", "http://inner.com")],
1050 )
1051 .unwrap();
1052 builder.end_of_attributes();
1053 builder.end_element().unwrap();
1054
1055 builder.end_element().unwrap();
1056
1057 let doc = builder.finalize().unwrap();
1058
1059 assert!(doc.nodes.get(1).has_flag(Node::HAS_NMSP_DECLS));
1061 assert!(doc.nodes.get(2).has_flag(Node::HAS_NMSP_DECLS));
1063 }
1064
1065 #[test]
1066 fn test_element_index_full_mode() {
1067 let arena = Bump::new();
1068 let names = NameTable::new();
1069 let mut builder = make_builder_full(&arena, &names);
1070
1071 let elem = builder.start_element("item", "", "", &[]).unwrap();
1072 builder.end_of_attributes();
1073 builder.end_element().unwrap();
1074
1075 let doc = builder.finalize().unwrap();
1076
1077 let h = hash_name("item");
1078 let found = doc.element_index.find(h);
1079 assert_eq!(found, &[elem]);
1080 }
1081
1082 #[test]
1083 fn test_set_node_binding() {
1084 let arena = Bump::new();
1085 let names = NameTable::new();
1086 let mut builder = make_builder(&arena, &names);
1087
1088 let elem = builder.start_element("root", "", "", &[]).unwrap();
1089 builder.end_of_attributes();
1090
1091 use slotmap::SlotMap;
1093 let mut sm: SlotMap<crate::ids::ComplexTypeKey, ()> = SlotMap::with_key();
1094 let ck = sm.insert(());
1095
1096 let binding = NodeSchemaBinding {
1097 type_key: TypeKey::Complex(ck),
1098 element_decl: None,
1099 attribute_decl: None,
1100 content_type: None,
1101 };
1102
1103 let is_complex = builder.set_node_binding(elem, binding).unwrap();
1104 assert!(is_complex);
1105
1106 builder.end_element().unwrap();
1107 let doc = builder.finalize().unwrap();
1108
1109 let node = doc.nodes.get(elem);
1110 assert!(node.has_flag(Node::IS_COMPLEX_TYPE));
1111 assert!(node.binding_index() > 0);
1112 }
1113
1114 #[test]
1115 fn test_set_nil() {
1116 let arena = Bump::new();
1117 let names = NameTable::new();
1118 let mut builder = make_builder(&arena, &names);
1119
1120 let elem = builder.start_element("root", "", "", &[]).unwrap();
1121 builder.end_of_attributes();
1122
1123 builder.set_nil(elem);
1124
1125 builder.end_element().unwrap();
1126 let doc = builder.finalize().unwrap();
1127
1128 let node = doc.nodes.get(elem);
1129 assert!(node.has_flag(Node::IS_NIL));
1130 }
1131
1132 fn build_from_str(xml: &str) -> BufferDocument<'_> {
1135 let arena = Bump::new();
1136 let names = NameTable::new();
1137 let arena = Box::leak(Box::new(arena));
1140 let names = Box::leak(Box::new(names));
1141 let builder =
1142 BufferDocumentBuilder::new(arena, names, None, BufferDocumentOptions::default())
1143 .unwrap();
1144 builder.build(xml.as_bytes()).unwrap()
1145 }
1146
1147 fn build_from_str_full(xml: &str) -> BufferDocument<'_> {
1148 let arena = Box::leak(Box::new(Bump::new()));
1149 let names = Box::leak(Box::new(NameTable::new()));
1150 let builder =
1151 BufferDocumentBuilder::new(arena, names, None, BufferDocumentOptions::full()).unwrap();
1152 builder.build(xml.as_bytes()).unwrap()
1153 }
1154
1155 #[test]
1156 fn test_build_simple() {
1157 let doc = build_from_str("<root/>");
1158 assert_eq!(doc.nodes.len(), 3);
1160 assert_eq!(doc.nodes.get(1).node_type(), NodeType::Element);
1161 }
1162
1163 #[test]
1164 fn test_build_nested() {
1165 let doc = build_from_str("<a><b>text</b></a>");
1166 assert_eq!(doc.nodes.len(), 5);
1168 assert_eq!(doc.nodes.get(2).parent, 1); let text = doc.nodes.get(3);
1170 assert_eq!(text.node_type(), NodeType::Text);
1171 assert_eq!(doc.strings.get(text.value), "text");
1172 }
1173
1174 #[test]
1175 fn test_build_attributes() {
1176 let doc = build_from_str(r#"<root attr="val"/>"#);
1177 assert_eq!(doc.nodes.len(), 5);
1179 assert!(doc.nodes.get(1).has_flag(Node::HAS_ATTRIBUTE));
1180 let cv = doc.nodes.get(3);
1181 assert_eq!(doc.strings.get(cv.value), "val");
1182 }
1183
1184 #[test]
1185 fn test_build_namespace_prefixed() {
1186 let doc = build_from_str(r#"<ns:root xmlns:ns="http://example.com"/>"#);
1187 let elem = doc.nodes.get(1);
1188 assert_eq!(elem.node_type(), NodeType::Element);
1189 assert!(elem.has_flag(Node::HAS_NMSP_DECLS));
1190
1191 let qname = doc.qname_table.get(elem.value);
1192 assert_eq!(doc.names.resolve(qname.local_name), "root");
1193 assert_eq!(doc.names.resolve(qname.namespace_uri), "http://example.com");
1194 assert_eq!(doc.names.resolve(qname.prefix), "ns");
1195 }
1196
1197 #[test]
1198 fn test_build_default_namespace() {
1199 let doc = build_from_str(r#"<root xmlns="http://default.com"><child/></root>"#);
1200 let child = doc.nodes.get(2);
1202 let child_qname = doc.qname_table.get(child.value);
1203 assert_eq!(
1204 doc.names.resolve(child_qname.namespace_uri),
1205 "http://default.com"
1206 );
1207 }
1208
1209 #[test]
1210 fn test_build_namespace_override() {
1211 let doc = build_from_str(
1212 r#"<root xmlns="http://outer.com"><child xmlns="http://inner.com"/></root>"#,
1213 );
1214 let root = doc.nodes.get(1);
1215 let root_qname = doc.qname_table.get(root.value);
1216 assert_eq!(
1217 doc.names.resolve(root_qname.namespace_uri),
1218 "http://outer.com"
1219 );
1220
1221 let child = doc.nodes.get(2);
1222 let child_qname = doc.qname_table.get(child.value);
1223 assert_eq!(
1224 doc.names.resolve(child_qname.namespace_uri),
1225 "http://inner.com"
1226 );
1227 }
1228
1229 #[test]
1230 fn test_build_cdata_coalescing() {
1231 let doc = build_from_str("<root>hello <![CDATA[world]]></root>");
1232 let text = doc.nodes.get(2);
1234 assert_eq!(text.node_type(), NodeType::Text);
1235 assert_eq!(doc.strings.get(text.value), "hello world");
1236 }
1237
1238 #[test]
1239 fn test_build_comment() {
1240 let doc = build_from_str("<root><!-- a comment --></root>");
1241 let comment = doc.nodes.get(2);
1242 assert_eq!(comment.node_type(), NodeType::Comment);
1243 assert_eq!(doc.strings.get(comment.value), " a comment ");
1244 }
1245
1246 #[test]
1247 fn test_build_pi() {
1248 let doc = build_from_str("<root><?target data?></root>");
1249 let pi = doc.nodes.get(2);
1250 assert_eq!(pi.node_type(), NodeType::ProcessingInstruction);
1251 assert_eq!(doc.strings.get(pi.value), "target");
1252
1253 let cv = doc.nodes.get(3);
1254 assert_eq!(cv.node_type(), NodeType::ChildValue);
1255 assert_eq!(doc.strings.get(cv.value), "data");
1256 }
1257
1258 #[test]
1259 fn test_build_mixed_content() {
1260 let doc = build_from_str("<root>text<!-- comment --><child/>more</root>");
1261 assert_eq!(doc.nodes.get(2).node_type(), NodeType::Text);
1263 assert_eq!(doc.strings.get(doc.nodes.get(2).value), "text");
1264 assert_eq!(doc.nodes.get(3).node_type(), NodeType::Comment);
1265 assert_eq!(doc.nodes.get(4).node_type(), NodeType::Element);
1266 assert_eq!(doc.nodes.get(5).node_type(), NodeType::Text);
1267 assert_eq!(doc.strings.get(doc.nodes.get(5).value), "more");
1268 }
1269
1270 #[test]
1271 fn test_build_source_spans() {
1272 let doc = build_from_str_full("<root><child/></root>");
1273 assert!(doc.source_spans.get(1).is_some()); assert!(doc.source_spans.get(2).is_some()); }
1277
1278 #[test]
1279 fn test_build_no_source_spans_when_disabled() {
1280 let doc = build_from_str("<root><child/></root>");
1281 assert!(doc.source_spans.is_empty());
1282 }
1283
1284 #[test]
1285 fn test_build_xml_id() {
1286 let doc = build_from_str_full(r#"<root xml:id="myid"/>"#);
1287 assert_eq!(doc.get_element_by_id("myid"), Some(1));
1288 }
1289
1290 #[test]
1291 fn test_build_xml_id_duplicate_error() {
1292 let arena = Box::leak(Box::new(Bump::new()));
1293 let names = Box::leak(Box::new(NameTable::new()));
1294 let builder =
1295 BufferDocumentBuilder::new(arena, names, None, BufferDocumentOptions::full()).unwrap();
1296 let result = builder.build(r#"<root><a xml:id="dup"/><b xml:id="dup"/></root>"#.as_bytes());
1297 assert!(matches!(result, Err(BufferDocumentError::DuplicateId(_))));
1298 }
1299
1300 #[test]
1301 fn test_build_unbound_prefix_error() {
1302 let arena = Box::leak(Box::new(Bump::new()));
1303 let names = Box::leak(Box::new(NameTable::new()));
1304 let builder =
1305 BufferDocumentBuilder::new(arena, names, None, BufferDocumentOptions::default())
1306 .unwrap();
1307 let result = builder.build(r#"<ns:root/>"#.as_bytes());
1308 assert!(matches!(result, Err(BufferDocumentError::UnboundPrefix(_))));
1309 }
1310
1311 #[test]
1312 fn test_build_nul_sentinel() {
1313 let doc = build_from_str("<root/>");
1314 let last = doc.nodes.len() - 1;
1315 assert_eq!(doc.nodes.get(last).node_type(), NodeType::Nul);
1316 }
1317
1318 #[test]
1319 fn test_build_document_level_whitespace_ignored() {
1320 let doc = build_from_str("<!-- prolog -->\n<root/>\n<!-- epilog -->");
1323 assert_eq!(doc.nodes.len(), 5);
1326 assert_eq!(doc.nodes.get(1).node_type(), NodeType::Comment);
1327 assert_eq!(doc.nodes.get(2).node_type(), NodeType::Element);
1328 assert_eq!(doc.nodes.get(3).node_type(), NodeType::Comment);
1329 assert_eq!(doc.nodes.get(4).node_type(), NodeType::Nul);
1330 }
1331
1332 fn make_builder_fragment<'a>(
1335 arena: &'a Bump,
1336 names: &'a NameTable,
1337 ) -> BufferDocumentBuilder<'a> {
1338 BufferDocumentBuilder::new(arena, names, None, BufferDocumentOptions::fragment()).unwrap()
1339 }
1340
1341 #[test]
1344 fn fragment_build_navigate() {
1345 let arena = Bump::new();
1346 let names = NameTable::new();
1347 let mut builder = make_builder_fragment(&arena, &names);
1348
1349 let elem = builder.start_element("item", "", "", &[]).unwrap();
1350 builder.end_of_attributes();
1351 builder.text("value");
1352 builder.end_element().unwrap();
1353
1354 let doc = builder.finalize().unwrap();
1355
1356 let mut nav = doc.create_navigator();
1358 assert!(nav.move_to_first_child()); assert_eq!(nav.current_ref(), elem);
1360 assert!(nav.move_to_first_child()); }
1362
1363 #[test]
1364 fn fragment_root_is_synthetic() {
1365 let arena = Bump::new();
1366 let names = NameTable::new();
1367 let mut builder = make_builder_fragment(&arena, &names);
1368
1369 builder.start_element("item", "", "", &[]).unwrap();
1370 builder.end_of_attributes();
1371 builder.end_element().unwrap();
1372
1373 let doc = builder.finalize().unwrap();
1374
1375 let root = doc.nodes.get(0);
1377 assert_eq!(root.node_type(), NodeType::Root);
1378
1379 let mut nav = doc.create_navigator(); assert!(!nav.move_to_parent());
1382 }
1383
1384 #[test]
1385 fn fragment_navigation_boundary() {
1386 let arena = Bump::new();
1387 let names = NameTable::new();
1388 let mut builder = make_builder_fragment(&arena, &names);
1389
1390 builder.start_element("item", "", "", &[]).unwrap();
1391 builder.end_of_attributes();
1392 builder.end_element().unwrap();
1393
1394 let doc = builder.finalize().unwrap();
1395
1396 let mut nav = doc.create_navigator();
1397 assert!(nav.move_to_first_child()); assert!(nav.move_to_parent()); assert!(!nav.move_to_parent()); }
1401
1402 #[test]
1403 fn fragment_skips_element_index() {
1404 let arena = Bump::new();
1405 let names = NameTable::new();
1406 let mut builder = make_builder_fragment(&arena, &names);
1407
1408 builder.start_element("item", "", "", &[]).unwrap();
1409 builder.end_of_attributes();
1410 builder.end_element().unwrap();
1411
1412 let doc = builder.finalize().unwrap();
1413
1414 let h = hash_name("item");
1415 assert!(
1416 doc.element_index.find(h).is_empty(),
1417 "Fragment mode should not populate element_index"
1418 );
1419 }
1420
1421 #[test]
1422 fn fragment_skips_id_registration() {
1423 let arena = Bump::new();
1424 let names = NameTable::new();
1425 let mut builder = make_builder_fragment(&arena, &names);
1426
1427 let elem = builder.start_element("item", "", "", &[]).unwrap();
1428 builder.end_of_attributes();
1429 builder.register_xml_id("myid", elem).unwrap();
1431 builder.end_element().unwrap();
1432
1433 let doc = builder.finalize().unwrap();
1434 assert_eq!(
1435 doc.get_element_by_id("myid"),
1436 None,
1437 "Fragment mode register_xml_id should be no-op"
1438 );
1439 }
1440
1441 #[test]
1442 fn fragment_namespace_inheritance() {
1443 let arena = Bump::new();
1444 let names = NameTable::new();
1445 let mut builder = make_builder_fragment(&arena, &names);
1446
1447 builder
1448 .start_element(
1449 "outer",
1450 "http://example.com",
1451 "ex",
1452 &[("ex", "http://example.com")],
1453 )
1454 .unwrap();
1455 builder.end_of_attributes();
1456
1457 let child = builder
1459 .start_element("inner", "http://example.com", "ex", &[])
1460 .unwrap();
1461 builder.end_of_attributes();
1462 builder.end_element().unwrap();
1463
1464 builder.end_element().unwrap();
1465
1466 let doc = builder.finalize().unwrap();
1467
1468 let child_qname = doc.qname_table.get(doc.nodes.get(child).value);
1469 assert_eq!(
1470 doc.names.resolve(child_qname.namespace_uri),
1471 "http://example.com",
1472 "child should inherit parent namespace in fragment mode"
1473 );
1474 }
1475
1476 #[test]
1477 fn fragment_push_api_parity() {
1478 let arena_full = Bump::new();
1480 let names_full = NameTable::new();
1481 let mut b_full = make_builder(&arena_full, &names_full);
1482
1483 let arena_frag = Bump::new();
1484 let names_frag = NameTable::new();
1485 let mut b_frag = make_builder_fragment(&arena_frag, &names_frag);
1486
1487 for b in [&mut b_full as &mut BufferDocumentBuilder, &mut b_frag] {
1488 b.start_element("root", "", "", &[]).unwrap();
1489 b.attribute("id", "", "", "1").unwrap();
1490 b.end_of_attributes();
1491 b.text("hello");
1492 b.end_element().unwrap();
1493 }
1494
1495 let doc_full = b_full.finalize().unwrap();
1496 let doc_frag = b_frag.finalize().unwrap();
1497
1498 assert_eq!(doc_full.nodes.len(), doc_frag.nodes.len());
1499 for i in 0..doc_full.nodes.len() as u32 {
1500 assert_eq!(
1501 doc_full.nodes.get(i).node_type(),
1502 doc_frag.nodes.get(i).node_type(),
1503 "node type mismatch at index {i}"
1504 );
1505 }
1506 }
1507}