Skip to main content

xsd_schema/document/
node.rs

1use crate::navigator::DomNodeType;
2
3// ── Page-addressing constants ──────────────────────────────────────────
4
5/// Page size exponent: 2^12 = 4096 nodes per page.
6pub const PAGE_SHIFT: u32 = 12;
7
8/// Nodes per page (4096). Each page = 4096 × 16 bytes = 64 KB.
9pub const PAGE_SIZE: u32 = 1 << PAGE_SHIFT;
10
11/// Bitmask for extracting the slot within a page.
12pub const PAGE_MASK: u32 = PAGE_SIZE - 1; // 0xFFF
13
14/// NULL sentinel — never a valid node index.
15pub const NULL: u32 = u32::MAX;
16
17// ── Page-addressing helpers ────────────────────────────────────────────
18
19/// Returns the page number for a flat node index.
20#[inline]
21pub fn page_of(node_ref: u32) -> u32 {
22    node_ref >> PAGE_SHIFT
23}
24
25/// Returns the slot (offset within page) for a flat node index.
26#[inline]
27pub fn slot_of(node_ref: u32) -> u32 {
28    node_ref & PAGE_MASK
29}
30
31/// Constructs a flat node index from a page number and slot.
32///
33/// # Panics (debug only)
34///
35/// Panics if `slot >= PAGE_SIZE`, which would alias bits into the page portion.
36#[inline]
37pub fn node_ref_from(page: u32, slot: u32) -> u32 {
38    debug_assert!(slot < PAGE_SIZE, "slot {slot} >= PAGE_SIZE ({PAGE_SIZE})");
39    (page << PAGE_SHIFT) | slot
40}
41
42// ── NodeType ───────────────────────────────────────────────────────────
43
44/// Discriminant stored in the low 4 bits of [`Node::props_type`].
45#[repr(u8)]
46#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
47pub enum NodeType {
48    /// Sentinel: marks end of document.
49    #[default]
50    Nul = 0,
51    /// Document root.
52    Root = 1,
53    /// Element node (value = QNameAtom index).
54    Element = 2,
55    /// Attribute name node of a two-node pair (value = QNameAtom index).
56    Attribute = 3,
57    /// String content of a two-node pair (attribute value or PI data).
58    ChildValue = 4,
59    /// Text node (value = string index).
60    Text = 5,
61    /// Whitespace-only text node.
62    Whitespace = 6,
63    /// Schema-significant whitespace.
64    SignificantWhitespace = 7,
65    /// Comment node (value = string index).
66    Comment = 8,
67    /// Processing instruction target (value = string index); data in next ChildValue.
68    ProcessingInstruction = 9,
69}
70
71impl From<NodeType> for DomNodeType {
72    fn from(nt: NodeType) -> Self {
73        match nt {
74            NodeType::Root => DomNodeType::Root,
75            NodeType::Element => DomNodeType::Element,
76            NodeType::Attribute => DomNodeType::Attribute,
77            NodeType::Text => DomNodeType::Text,
78            NodeType::Whitespace => DomNodeType::Whitespace,
79            NodeType::SignificantWhitespace => DomNodeType::SignificantWhitespace,
80            NodeType::Comment => DomNodeType::Comment,
81            NodeType::ProcessingInstruction => DomNodeType::ProcessingInstruction,
82            NodeType::Nul | NodeType::ChildValue => {
83                unreachable!("Nul and ChildValue have no DomNodeType equivalent")
84            }
85        }
86    }
87}
88
89impl NodeType {
90    /// Decode from the low 4 bits of a `props_type` value.
91    #[inline]
92    fn from_bits(bits: u32) -> Self {
93        match bits & Node::NODE_TYPE_MASK {
94            0 => NodeType::Nul,
95            1 => NodeType::Root,
96            2 => NodeType::Element,
97            3 => NodeType::Attribute,
98            4 => NodeType::ChildValue,
99            5 => NodeType::Text,
100            6 => NodeType::Whitespace,
101            7 => NodeType::SignificantWhitespace,
102            8 => NodeType::Comment,
103            9 => NodeType::ProcessingInstruction,
104            _ => NodeType::Nul, // reserved values → Nul
105        }
106    }
107}
108
109// ── Node ───────────────────────────────────────────────────────────────
110
111/// 16-byte flat node in the `BufferDocument` node array.
112///
113/// Layout of `props_type` (32 bits):
114/// - Bits \[3:0\]   — [`NodeType`] discriminant (4 bits)
115/// - Bits \[7:4\]   — property flags (`HAS_ATTRIBUTE`, `HAS_CHILDREN`, `IS_COMPLEX_TYPE`, `HAS_NMSP_DECLS`)
116/// - Bit  \[8\]     — `IS_NIL` flag (xsi:nil="true")
117/// - Bits \[11:9\]  — reserved (must be 0)
118/// - Bits \[31:12\] — 20-bit binding index into `BindingRemapTable` (0 = unbound)
119#[repr(C)]
120#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
121pub struct Node {
122    /// Index of the next sibling node, or [`NULL`].
123    pub next_sibling: u32,
124    /// Index of the parent node, or [`NULL`].
125    pub parent: u32,
126    /// Packed field: node type (4 bits) | flags (4 bits) | type_index (24 bits).
127    pub props_type: u32,
128    /// Interpretation depends on node type (QNameAtom index, string index, etc.).
129    pub value: u32,
130}
131
132impl Node {
133    // ── Bitmask constants ──────────────────────────────────────────────
134
135    const NODE_TYPE_MASK: u32 = 0x0F; // bits [3:0]
136    const BINDING_INDEX_SHIFT: u32 = 12; // bits [31:12]
137
138    /// Element has attribute children.
139    pub const HAS_ATTRIBUTE: u32 = 0x10;
140    /// Element/Root has content children.
141    pub const HAS_CHILDREN: u32 = 0x20;
142    /// `binding_index` references a complex type in the remap table.
143    pub const IS_COMPLEX_TYPE: u32 = 0x40;
144    /// Element declares namespace bindings.
145    pub const HAS_NMSP_DECLS: u32 = 0x80;
146    /// Element has xsi:nil="true".
147    pub const IS_NIL: u32 = 0x100; // bit [8]
148
149    // ── Accessors ──────────────────────────────────────────────────────
150
151    /// Returns the [`NodeType`] stored in the low 4 bits.
152    #[inline]
153    pub fn node_type(self) -> NodeType {
154        NodeType::from_bits(self.props_type)
155    }
156
157    /// Returns the raw flag nibble (bits \[7:4\]).
158    #[inline]
159    pub fn flags(self) -> u32 {
160        self.props_type & 0xF0
161    }
162
163    /// Returns the 20-bit binding index (bits \[31:12\]).
164    #[inline]
165    pub fn binding_index(self) -> u32 {
166        self.props_type >> Self::BINDING_INDEX_SHIFT
167    }
168
169    /// Overwrites the [`NodeType`] in bits \[3:0\], preserving other fields.
170    #[inline]
171    pub fn set_node_type(&mut self, nt: NodeType) {
172        self.props_type = (self.props_type & !Self::NODE_TYPE_MASK) | (nt as u32);
173    }
174
175    /// Sets a flag bit (e.g. [`HAS_ATTRIBUTE`](Self::HAS_ATTRIBUTE)).
176    #[inline]
177    pub fn set_flag(&mut self, flag: u32) {
178        self.props_type |= flag;
179    }
180
181    /// Clears a flag bit.
182    #[inline]
183    pub fn clear_flag(&mut self, flag: u32) {
184        self.props_type &= !flag;
185    }
186
187    /// Tests whether a flag bit is set.
188    #[inline]
189    pub fn has_flag(self, flag: u32) -> bool {
190        self.props_type & flag != 0
191    }
192
193    /// Sets the 20-bit binding index in bits \[31:12\].
194    ///
195    /// Preserves bits \[11:0\] (node type, flags, and IS_NIL).
196    ///
197    /// # Panics (debug only)
198    ///
199    /// Panics if `idx` exceeds 20 bits (>= 0x10_0000).
200    #[inline]
201    pub fn set_binding_index(&mut self, idx: u32) {
202        debug_assert!(idx < (1 << 20), "binding_index {idx} exceeds 20-bit range");
203        self.props_type = (self.props_type & 0xFFF) | (idx << Self::BINDING_INDEX_SHIFT);
204    }
205}
206
207// ── Tests ──────────────────────────────────────────────────────────────
208
209#[cfg(test)]
210mod tests {
211    use super::*;
212    use std::mem;
213
214    #[test]
215    fn node_size_is_16_bytes() {
216        assert_eq!(mem::size_of::<Node>(), 16);
217    }
218
219    #[test]
220    fn node_is_copy_clone_default() {
221        let a = Node::default();
222        let b = a; // Copy
223        #[allow(clippy::clone_on_copy)]
224        let c = a.clone(); // Clone — intentionally testing the Clone impl
225        assert_eq!(a, b);
226        assert_eq!(a, c);
227        // Default zeroes
228        assert_eq!(a.next_sibling, 0);
229        assert_eq!(a.parent, 0);
230        assert_eq!(a.props_type, 0);
231        assert_eq!(a.value, 0);
232    }
233
234    #[test]
235    fn node_type_round_trip() {
236        let variants = [
237            NodeType::Nul,
238            NodeType::Root,
239            NodeType::Element,
240            NodeType::Attribute,
241            NodeType::ChildValue,
242            NodeType::Text,
243            NodeType::Whitespace,
244            NodeType::SignificantWhitespace,
245            NodeType::Comment,
246            NodeType::ProcessingInstruction,
247        ];
248        for nt in variants {
249            let mut node = Node::default();
250            node.set_node_type(nt);
251            assert_eq!(node.node_type(), nt, "round-trip failed for {nt:?}");
252        }
253    }
254
255    #[test]
256    fn node_type_preserves_other_bits() {
257        let mut node = Node::default();
258        node.set_flag(Node::HAS_CHILDREN);
259        node.set_binding_index(42);
260        node.set_node_type(NodeType::Element);
261        assert_eq!(node.node_type(), NodeType::Element);
262        assert!(node.has_flag(Node::HAS_CHILDREN));
263        assert_eq!(node.binding_index(), 42);
264    }
265
266    #[test]
267    fn flags_set_clear_has() {
268        let mut node = Node::default();
269        assert!(!node.has_flag(Node::HAS_ATTRIBUTE));
270        assert!(!node.has_flag(Node::HAS_CHILDREN));
271        assert!(!node.has_flag(Node::IS_COMPLEX_TYPE));
272        assert!(!node.has_flag(Node::HAS_NMSP_DECLS));
273
274        node.set_flag(Node::HAS_ATTRIBUTE);
275        assert!(node.has_flag(Node::HAS_ATTRIBUTE));
276
277        node.set_flag(Node::HAS_CHILDREN);
278        assert!(node.has_flag(Node::HAS_CHILDREN));
279        assert!(node.has_flag(Node::HAS_ATTRIBUTE)); // still set
280
281        node.clear_flag(Node::HAS_ATTRIBUTE);
282        assert!(!node.has_flag(Node::HAS_ATTRIBUTE));
283        assert!(node.has_flag(Node::HAS_CHILDREN)); // unchanged
284    }
285
286    #[test]
287    fn flags_nibble() {
288        let mut node = Node::default();
289        node.set_flag(Node::HAS_ATTRIBUTE | Node::HAS_NMSP_DECLS);
290        assert_eq!(node.flags(), Node::HAS_ATTRIBUTE | Node::HAS_NMSP_DECLS);
291    }
292
293    #[test]
294    fn binding_index_set_get() {
295        let mut node = Node::default();
296        node.set_node_type(NodeType::Element);
297        node.set_flag(Node::HAS_CHILDREN);
298
299        node.set_binding_index(0);
300        assert_eq!(node.binding_index(), 0);
301
302        node.set_binding_index(1);
303        assert_eq!(node.binding_index(), 1);
304
305        node.set_binding_index(0xF_FFFF); // max 20-bit
306        assert_eq!(node.binding_index(), 0xF_FFFF);
307
308        // Verify node_type and flags are preserved
309        assert_eq!(node.node_type(), NodeType::Element);
310        assert!(node.has_flag(Node::HAS_CHILDREN));
311    }
312
313    #[test]
314    #[cfg(debug_assertions)]
315    #[should_panic(expected = "binding_index")]
316    fn binding_index_overflow_panics_in_debug() {
317        let mut node = Node::default();
318        node.set_binding_index(0x10_0000); // 21 bits — too large
319    }
320
321    #[test]
322    fn is_nil_flag_set_clear() {
323        let mut node = Node::default();
324        node.set_node_type(NodeType::Element);
325        assert!(!node.has_flag(Node::IS_NIL));
326
327        node.set_flag(Node::IS_NIL);
328        assert!(node.has_flag(Node::IS_NIL));
329        assert_eq!(node.node_type(), NodeType::Element);
330
331        node.clear_flag(Node::IS_NIL);
332        assert!(!node.has_flag(Node::IS_NIL));
333    }
334
335    #[test]
336    fn is_nil_and_binding_index_independent() {
337        let mut node = Node::default();
338        node.set_node_type(NodeType::Element);
339        node.set_flag(Node::IS_NIL);
340        node.set_binding_index(123);
341
342        assert!(node.has_flag(Node::IS_NIL));
343        assert_eq!(node.binding_index(), 123);
344        assert_eq!(node.node_type(), NodeType::Element);
345    }
346
347    #[test]
348    fn set_binding_index_preserves_is_nil() {
349        let mut node = Node::default();
350        node.set_node_type(NodeType::Element);
351        node.set_flag(Node::HAS_CHILDREN | Node::IS_NIL);
352
353        node.set_binding_index(42);
354        assert!(node.has_flag(Node::IS_NIL));
355        assert!(node.has_flag(Node::HAS_CHILDREN));
356        assert_eq!(node.binding_index(), 42);
357        assert_eq!(node.node_type(), NodeType::Element);
358    }
359
360    #[test]
361    fn page_addressing_round_trip() {
362        // First node on first page
363        assert_eq!(page_of(0), 0);
364        assert_eq!(slot_of(0), 0);
365        assert_eq!(node_ref_from(0, 0), 0);
366
367        // Last slot on first page
368        assert_eq!(page_of(PAGE_SIZE - 1), 0);
369        assert_eq!(slot_of(PAGE_SIZE - 1), PAGE_SIZE - 1);
370        assert_eq!(node_ref_from(0, PAGE_SIZE - 1), PAGE_SIZE - 1);
371
372        // First slot on second page
373        assert_eq!(page_of(PAGE_SIZE), 1);
374        assert_eq!(slot_of(PAGE_SIZE), 0);
375        assert_eq!(node_ref_from(1, 0), PAGE_SIZE);
376
377        // Arbitrary position
378        let page = 7u32;
379        let slot = 123u32;
380        let r = node_ref_from(page, slot);
381        assert_eq!(page_of(r), page);
382        assert_eq!(slot_of(r), slot);
383    }
384
385    #[test]
386    fn null_sentinel() {
387        assert_eq!(NULL, u32::MAX);
388        // NULL should decode to a very large page, not page 0
389        assert_ne!(page_of(NULL), 0);
390    }
391
392    #[test]
393    fn dom_node_type_conversion() {
394        assert_eq!(DomNodeType::from(NodeType::Root), DomNodeType::Root);
395        assert_eq!(DomNodeType::from(NodeType::Element), DomNodeType::Element);
396        assert_eq!(
397            DomNodeType::from(NodeType::Attribute),
398            DomNodeType::Attribute
399        );
400        assert_eq!(DomNodeType::from(NodeType::Text), DomNodeType::Text);
401        assert_eq!(
402            DomNodeType::from(NodeType::Whitespace),
403            DomNodeType::Whitespace
404        );
405        assert_eq!(
406            DomNodeType::from(NodeType::SignificantWhitespace),
407            DomNodeType::SignificantWhitespace
408        );
409        assert_eq!(DomNodeType::from(NodeType::Comment), DomNodeType::Comment);
410        assert_eq!(
411            DomNodeType::from(NodeType::ProcessingInstruction),
412            DomNodeType::ProcessingInstruction
413        );
414    }
415
416    #[test]
417    #[should_panic(expected = "Nul and ChildValue")]
418    fn dom_node_type_nul_panics() {
419        let _ = DomNodeType::from(NodeType::Nul);
420    }
421
422    #[test]
423    #[should_panic(expected = "Nul and ChildValue")]
424    fn dom_node_type_child_value_panics() {
425        let _ = DomNodeType::from(NodeType::ChildValue);
426    }
427
428    #[test]
429    fn node_type_default_is_nul() {
430        assert_eq!(NodeType::default(), NodeType::Nul);
431    }
432
433    #[test]
434    fn page_constants() {
435        assert_eq!(PAGE_SHIFT, 12);
436        assert_eq!(PAGE_SIZE, 4096);
437        assert_eq!(PAGE_MASK, 0xFFF);
438        assert_eq!(1u32 << PAGE_SHIFT, PAGE_SIZE);
439        assert_eq!(PAGE_SIZE - 1, PAGE_MASK);
440    }
441}