Skip to main content

xsd_schema/document/
buffer.rs

1//! Top-level `BufferDocument` struct assembling all storage primitives.
2
3use std::collections::HashMap;
4
5use bumpalo::Bump;
6
7use crate::namespace::NameTable;
8use crate::schema::SchemaSet;
9
10use super::{
11    BindingRemapTable, BufferDocumentOptions, DocumentKind, ElementIndex, NamespacePageFactory,
12    Node, NodePages, NodeSourceSpans, NsRef, QNameTable, StringStore, NULL,
13};
14
15/// Compact, cache-friendly XML document representation.
16///
17/// Built on a flat array of 16-byte [`Node`] structs with power-of-2
18/// page addressing.  All string data lives in the arena or in the
19/// [`StringStore`]; qualified names are deduplicated via [`QNameTable`].
20#[allow(dead_code)] // fields used by builder/navigator in later steps
21pub struct BufferDocument<'a> {
22    pub(crate) arena: &'a Bump,
23    pub(crate) kind: DocumentKind,
24    pub(crate) names: &'a NameTable,
25    pub(crate) nodes: NodePages<'a>,
26    pub(crate) qname_table: QNameTable,
27    pub(crate) strings: StringStore<'a>,
28    pub(crate) binding_remap: BindingRemapTable,
29    pub(crate) root: u32,
30    pub(crate) options: BufferDocumentOptions,
31    // Side tables
32    pub(crate) namespace_pages: NamespacePageFactory<'a>,
33    pub(crate) xml_namespace: NsRef,
34    pub(crate) element_namespaces: HashMap<u32, NsRef>,
35    pub(crate) element_index: ElementIndex,
36    pub(crate) source_spans: NodeSourceSpans,
37    pub(crate) id_elements: HashMap<Box<str>, u32>,
38    pub(crate) schema_set: Option<&'a SchemaSet>,
39    /// Document-level base URI surfaced by `BufferDocNavigator::base_uri()`
40    /// when no `xml:base` is found and the cursor reaches the document root.
41    /// Used by CTA fragment evaluation to expose the instance file URI to
42    /// `fn:base-uri(.)` while leaving the static base URI in
43    /// `XPathContext::base_uri` free to carry the schema document URI.
44    pub(crate) fragment_base_uri: Option<&'a str>,
45}
46
47impl<'a> BufferDocument<'a> {
48    // ── Accessors ──────────────────────────────────────────────────────
49
50    /// Returns the document kind (full or fragment).
51    #[inline]
52    pub fn kind(&self) -> DocumentKind {
53        self.kind
54    }
55
56    /// Returns the construction options.
57    #[inline]
58    pub fn options(&self) -> &BufferDocumentOptions {
59        &self.options
60    }
61
62    /// Returns the root node index.
63    #[inline]
64    pub fn root(&self) -> u32 {
65        self.root
66    }
67
68    /// Returns the associated schema set, if any.
69    #[inline]
70    pub fn schema_set(&self) -> Option<&'a SchemaSet> {
71        self.schema_set
72    }
73
74    /// Returns the shared name table.
75    #[inline]
76    pub fn names(&self) -> &'a NameTable {
77        self.names
78    }
79
80    // ── Navigation helpers ─────────────────────────────────────────────
81
82    /// Returns the first child of `parent` (always `parent + 1`).
83    ///
84    /// This relies on the document-order layout: the first child node
85    /// is stored immediately after its parent.
86    #[inline]
87    pub fn first_child_of(&self, parent: u32) -> u32 {
88        parent + 1
89    }
90
91    /// Returns the first content (non-attribute) child of `parent`, or
92    /// `None` if the element has no children.
93    ///
94    /// Attribute pairs precede content children in document order.
95    /// This method skips over them by walking the `next_sibling` chain
96    /// of attribute nodes until a non-attribute child is found.
97    pub fn first_content_child_of(&self, parent: u32) -> Option<u32> {
98        let node = self.nodes.get(parent);
99        if !node.has_flag(Node::HAS_CHILDREN) {
100            return None;
101        }
102        if !node.has_flag(Node::HAS_ATTRIBUTE) {
103            return Some(parent + 1);
104        }
105        // Walk the attribute next_sibling chain.
106        // Each attribute is a 2-node pair (Attribute + ChildValue).
107        let mut cursor = parent + 1; // first attribute
108        loop {
109            let attr = self.nodes.get(cursor);
110            if attr.next_sibling == NULL {
111                // Last attribute pair — content starts after its ChildValue node.
112                return Some(cursor + 2);
113            }
114            cursor = attr.next_sibling;
115        }
116    }
117
118    /// Returns the flat index one past the last node in the subtree
119    /// rooted at `elem`.
120    ///
121    /// Walks ancestors until a node with a `next_sibling` is found and
122    /// returns that sibling.  If the root is reached without finding a
123    /// sibling, returns `self.nodes.len()` (end of document).
124    pub fn subtree_end(&self, elem: u32) -> u32 {
125        let mut cursor = elem;
126        loop {
127            let node = self.nodes.get(cursor);
128            if node.next_sibling != NULL {
129                return node.next_sibling;
130            }
131            if node.parent == NULL {
132                return self.nodes.len();
133            }
134            cursor = node.parent;
135        }
136    }
137
138    /// Looks up an element node by its `xml:id` value.
139    pub fn get_element_by_id(&self, id: &str) -> Option<u32> {
140        self.id_elements.get(id).copied()
141    }
142
143    // ── CTA fragment configuration ─────────────────────────────────────
144
145    /// Reconfigure this document so that `elem_ref` is the root of the
146    /// XDM tree visible to the navigator (XSD 1.1 §3.12.4 CTA XDM
147    /// instance shape).
148    ///
149    /// After this call, `move_to_parent()` from `elem_ref` returns `false`
150    /// (the synthetic Root node at index 0 is severed from the tree),
151    /// `move_to_root()` lands on `elem_ref`, and the `following::*` /
152    /// `preceding::*` axes cannot escape the subtree anchored at
153    /// `elem_ref`. The optional `base_uri` argument is surfaced by
154    /// `BufferDocNavigator::base_uri()` when no `xml:base` is found,
155    /// so `fn:base-uri(.)` can return the instance file URI even
156    /// though the static base URI in the XPath context is set to the
157    /// schema document URI.
158    pub(crate) fn set_cta_fragment(&mut self, elem_ref: u32, base_uri: Option<&'a str>) {
159        self.kind = DocumentKind::Fragment;
160        self.root = elem_ref;
161        self.nodes.update(elem_ref, |n| n.parent = NULL);
162        self.fragment_base_uri = base_uri;
163    }
164
165    // ── Navigator factory ─────────────────────────────────────────────
166
167    /// Creates a navigator positioned at the document root.
168    pub fn create_navigator(&self) -> super::navigator::BufferDocNavigator<'_> {
169        super::navigator::BufferDocNavigator::new(self, self.root)
170    }
171
172    /// Creates a navigator positioned at the given node reference.
173    pub fn create_navigator_at(&self, node_ref: u32) -> super::navigator::BufferDocNavigator<'_> {
174        super::navigator::BufferDocNavigator::new(self, node_ref)
175    }
176
177    // ── Parsing helpers ───────────────────────────────────────────────
178
179    /// Parses an XML document from a reader into a `BufferDocument`.
180    pub fn from_reader<R: std::io::BufRead>(
181        reader: R,
182        arena: &'a Bump,
183        names: &'a NameTable,
184        options: BufferDocumentOptions,
185        schema_set: Option<&'a SchemaSet>,
186    ) -> Result<Self, super::BufferDocumentError> {
187        let builder =
188            super::builder::BufferDocumentBuilder::new(arena, names, schema_set, options)?;
189        builder.build(reader)
190    }
191
192    /// Parses an XML document from a reader with default options.
193    pub fn from_reader_default<R: std::io::BufRead>(
194        reader: R,
195        arena: &'a Bump,
196        names: &'a NameTable,
197    ) -> Result<Self, super::BufferDocumentError> {
198        Self::from_reader(reader, arena, names, BufferDocumentOptions::default(), None)
199    }
200}
201
202#[cfg(test)]
203mod tests {
204    use super::*;
205    use crate::document::{Node, NodeType, NULL};
206
207    /// Helper: builds a minimal `BufferDocument` for testing.
208    fn make_doc<'a>(arena: &'a Bump, names: &'a NameTable) -> BufferDocument<'a> {
209        BufferDocument {
210            arena,
211            kind: DocumentKind::default(),
212            names,
213            nodes: NodePages::new(arena),
214            qname_table: QNameTable::new(),
215            strings: StringStore::new(arena),
216            binding_remap: BindingRemapTable::new(),
217            root: 0,
218            options: BufferDocumentOptions::default(),
219            namespace_pages: NamespacePageFactory::new(arena),
220            xml_namespace: NsRef::NULL,
221            element_namespaces: HashMap::new(),
222            element_index: ElementIndex::new(),
223            source_spans: NodeSourceSpans::new(),
224            id_elements: HashMap::new(),
225            schema_set: None,
226            fragment_base_uri: None,
227        }
228    }
229
230    /// Helper: allocate a node and write it.
231    fn push_node(doc: &mut BufferDocument<'_>, node: Node) -> u32 {
232        let idx = doc.nodes.alloc().unwrap();
233        doc.nodes.set(idx, node);
234        idx
235    }
236
237    /// Helper: create a node with specific type and flags.
238    fn make_node(nt: NodeType, parent: u32, next_sibling: u32, flags: u32) -> Node {
239        let mut n = Node::default();
240        n.set_node_type(nt);
241        n.parent = parent;
242        n.next_sibling = next_sibling;
243        n.props_type |= flags;
244        n
245    }
246
247    #[test]
248    fn first_child_of() {
249        let arena = Bump::new();
250        let names = NameTable::new();
251        let doc = make_doc(&arena, &names);
252        assert_eq!(doc.first_child_of(0), 1);
253        assert_eq!(doc.first_child_of(5), 6);
254        assert_eq!(doc.first_child_of(100), 101);
255    }
256
257    #[test]
258    fn first_content_child_of_no_children() {
259        let arena = Bump::new();
260        let names = NameTable::new();
261        let mut doc = make_doc(&arena, &names);
262
263        // Element without HAS_CHILDREN
264        let elem = make_node(NodeType::Element, NULL, NULL, 0);
265        push_node(&mut doc, elem);
266
267        assert_eq!(doc.first_content_child_of(0), None);
268    }
269
270    #[test]
271    fn first_content_child_of_no_attrs() {
272        let arena = Bump::new();
273        let names = NameTable::new();
274        let mut doc = make_doc(&arena, &names);
275
276        // Element with children but no attributes
277        let elem = make_node(NodeType::Element, NULL, NULL, Node::HAS_CHILDREN);
278        push_node(&mut doc, elem); // 0
279
280        // First child is text
281        let text = make_node(NodeType::Text, 0, NULL, 0);
282        push_node(&mut doc, text); // 1
283
284        assert_eq!(doc.first_content_child_of(0), Some(1));
285    }
286
287    #[test]
288    fn first_content_child_of_with_attrs() {
289        let arena = Bump::new();
290        let names = NameTable::new();
291        let mut doc = make_doc(&arena, &names);
292
293        // Element with both attributes and children
294        let elem = make_node(
295            NodeType::Element,
296            NULL,
297            NULL,
298            Node::HAS_CHILDREN | Node::HAS_ATTRIBUTE,
299        );
300        push_node(&mut doc, elem); // 0
301
302        // Attribute 1 (pair: Attribute + ChildValue)
303        // next_sibling points to the next attribute at index 3
304        let attr1 = make_node(NodeType::Attribute, 0, 3, 0);
305        push_node(&mut doc, attr1); // 1
306
307        let val1 = make_node(NodeType::ChildValue, 0, NULL, 0);
308        push_node(&mut doc, val1); // 2
309
310        // Attribute 2 (pair: Attribute + ChildValue)
311        // next_sibling = NULL → last attribute
312        let attr2 = make_node(NodeType::Attribute, 0, NULL, 0);
313        push_node(&mut doc, attr2); // 3
314
315        let val2 = make_node(NodeType::ChildValue, 0, NULL, 0);
316        push_node(&mut doc, val2); // 4
317
318        // Content child (text) at index 5
319        let text = make_node(NodeType::Text, 0, NULL, 0);
320        push_node(&mut doc, text); // 5
321
322        // first_content_child_of should skip both attribute pairs → index 5
323        assert_eq!(doc.first_content_child_of(0), Some(5));
324    }
325
326    #[test]
327    fn subtree_end_with_sibling() {
328        let arena = Bump::new();
329        let names = NameTable::new();
330        let mut doc = make_doc(&arena, &names);
331
332        // Root at 0
333        let root = make_node(NodeType::Root, NULL, NULL, Node::HAS_CHILDREN);
334        push_node(&mut doc, root); // 0
335
336        // Element at 1, has sibling at 2
337        let elem = make_node(NodeType::Element, 0, 2, 0);
338        push_node(&mut doc, elem); // 1
339
340        // Sibling element at 2
341        let sib = make_node(NodeType::Element, 0, NULL, 0);
342        push_node(&mut doc, sib); // 2
343
344        assert_eq!(doc.subtree_end(1), 2);
345    }
346
347    #[test]
348    fn subtree_end_walks_ancestors() {
349        let arena = Bump::new();
350        let names = NameTable::new();
351        let mut doc = make_doc(&arena, &names);
352
353        // Root at 0, has sibling at 5 (hypothetical)
354        let root = make_node(NodeType::Root, NULL, NULL, Node::HAS_CHILDREN);
355        push_node(&mut doc, root); // 0
356
357        // Parent element at 1, has next_sibling at 4
358        let parent = make_node(NodeType::Element, 0, 4, Node::HAS_CHILDREN);
359        push_node(&mut doc, parent); // 1
360
361        // Nested child element at 2, no sibling
362        let child = make_node(NodeType::Element, 1, NULL, 0);
363        push_node(&mut doc, child); // 2
364
365        // Text at 3 (unused, just to fill space)
366        let text = make_node(NodeType::Text, 1, NULL, 0);
367        push_node(&mut doc, text); // 3
368
369        // Sibling of parent at 4
370        let uncle = make_node(NodeType::Element, 0, NULL, 0);
371        push_node(&mut doc, uncle); // 4
372
373        // child(2) has no sibling → walk to parent(1) which has sibling 4
374        assert_eq!(doc.subtree_end(2), 4);
375    }
376
377    #[test]
378    fn subtree_end_at_document_end() {
379        let arena = Bump::new();
380        let names = NameTable::new();
381        let mut doc = make_doc(&arena, &names);
382
383        // Root at 0, no sibling, parent = NULL
384        let root = make_node(NodeType::Root, NULL, NULL, Node::HAS_CHILDREN);
385        push_node(&mut doc, root); // 0
386
387        // Single child at 1, no sibling
388        let elem = make_node(NodeType::Element, 0, NULL, 0);
389        push_node(&mut doc, elem); // 1
390
391        // elem(1) has no sibling → walk to root(0) which has parent = NULL → nodes.len()
392        assert_eq!(doc.subtree_end(1), doc.nodes.len());
393        assert_eq!(doc.subtree_end(1), 2);
394    }
395
396    #[test]
397    fn get_element_by_id_found() {
398        let arena = Bump::new();
399        let names = NameTable::new();
400        let mut doc = make_doc(&arena, &names);
401        doc.id_elements.insert("foo".into(), 42);
402
403        assert_eq!(doc.get_element_by_id("foo"), Some(42));
404    }
405
406    #[test]
407    fn get_element_by_id_not_found() {
408        let arena = Bump::new();
409        let names = NameTable::new();
410        let doc = make_doc(&arena, &names);
411
412        assert_eq!(doc.get_element_by_id("nonexistent"), None);
413    }
414}