Skip to main content

fhp_tree/
node.rs

1//! Cache-line aligned node layout and supporting types.
2//!
3//! Each [`Node`](crate::node::Node) occupies exactly 64 bytes (one cache line), split into a hot
4//! first half (tree links, tag, depth) and a cold second half (attribute
5//! metadata, padding).
6
7use fhp_core::tag::Tag;
8
9/// Index into the arena's node vector.
10///
11/// Uses `u32` internally — supports up to ~4 billion nodes.
12/// [`NodeId::NULL`] represents the absence of a node (like `None`).
13#[derive(Clone, Copy, PartialEq, Eq, Hash)]
14pub struct NodeId(pub u32);
15
16impl NodeId {
17    /// Sentinel value representing "no node".
18    pub const NULL: NodeId = NodeId(u32::MAX);
19
20    /// Returns `true` if this is the null sentinel.
21    #[inline(always)]
22    pub const fn is_null(self) -> bool {
23        self.0 == u32::MAX
24    }
25
26    /// Returns the index as `usize` for array indexing.
27    #[inline(always)]
28    pub const fn index(self) -> usize {
29        self.0 as usize
30    }
31}
32
33impl core::fmt::Debug for NodeId {
34    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
35        if self.is_null() {
36            write!(f, "NodeId(NULL)")
37        } else {
38            write!(f, "NodeId({})", self.0)
39        }
40    }
41}
42
43/// Bitflags for node properties.
44///
45/// Packed into a single `u8` for minimal space inside the [`Node`] struct.
46#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
47pub struct NodeFlags(u8);
48
49impl NodeFlags {
50    /// The element is a void element (no children allowed).
51    pub const IS_VOID: u8 = 1 << 0;
52    /// The element has at least one child.
53    pub const HAS_CHILDREN: u8 = 1 << 1;
54    /// The element has at least one attribute.
55    pub const HAS_ATTRS: u8 = 1 << 2;
56    /// The element was self-closing in source (`<br/>`).
57    pub const IS_SELF_CLOSING: u8 = 1 << 3;
58    /// This node is a text node (not an element).
59    pub const IS_TEXT: u8 = 1 << 4;
60    /// This node is a comment node.
61    pub const IS_COMMENT: u8 = 1 << 5;
62    /// This node is a doctype node.
63    pub const IS_DOCTYPE: u8 = 1 << 6;
64    /// This node is a CDATA section.
65    pub const IS_CDATA: u8 = 1 << 7;
66
67    /// Text node reads from `Arena::source` instead of `text_slab`.
68    ///
69    /// Reuses the [`IS_VOID`](Self::IS_VOID) bit (bit 0), which is unused for
70    /// text nodes. Only meaningful when [`IS_TEXT`](Self::IS_TEXT) is also set.
71    pub const IS_TEXT_FROM_SOURCE: u8 = 1 << 0;
72
73    /// Create empty flags.
74    #[inline(always)]
75    pub const fn empty() -> Self {
76        Self(0)
77    }
78
79    /// Set a flag bit.
80    #[inline(always)]
81    pub fn set(&mut self, flag: u8) {
82        self.0 |= flag;
83    }
84
85    /// Check if a flag bit is set.
86    #[inline(always)]
87    pub const fn has(self, flag: u8) -> bool {
88        self.0 & flag != 0
89    }
90}
91
92/// A single DOM node in the arena, exactly 64 bytes (one cache line).
93///
94/// The first 32 bytes contain frequently accessed fields (tree links, tag,
95/// depth). The second 32 bytes contain less-frequently accessed data
96/// (attribute metadata).
97///
98/// # Layout
99///
100/// | Offset | Bytes | Field |
101/// |--------|-------|-------|
102/// | 0      | 1     | `tag` |
103/// | 1      | 1     | `flags` |
104/// | 2      | 2     | `depth` |
105/// | 4      | 4     | `parent` |
106/// | 8      | 4     | `first_child` |
107/// | 12     | 4     | `next_sibling` |
108/// | 16     | 4     | `last_child` |
109/// | 20     | 4     | `prev_sibling` |
110/// | 24     | 4     | `text_offset` |
111/// | 28     | 4     | `text_len` |
112/// | 32     | 4     | `attr_offset` |
113/// | 36     | 4     | `attr_raw_offset` |
114/// | 40     | 8     | `class_hash` |
115/// | 48     | 4     | `id_hash` |
116/// | 52     | 2     | `attr_raw_len` |
117/// | 54     | 2     | `element_index` |
118/// | 56     | 1     | `attr_count` |
119/// | 57     | 7     | `_padding` |
120#[repr(C, align(64))]
121pub struct Node {
122    // === Hot (first 32 bytes) ===
123    /// Interned tag type.
124    pub tag: Tag,
125    /// Bitflags for node properties.
126    pub flags: NodeFlags,
127    /// Nesting depth from root (root = 0).
128    pub depth: u16,
129    /// Parent node, or `NodeId::NULL` for root.
130    pub parent: NodeId,
131    /// First child, or `NodeId::NULL` if no children.
132    pub first_child: NodeId,
133    /// Next sibling, or `NodeId::NULL` if last.
134    pub next_sibling: NodeId,
135    /// Last child, or `NodeId::NULL` if no children.
136    pub last_child: NodeId,
137    /// Previous sibling, or `NodeId::NULL` if first.
138    pub prev_sibling: NodeId,
139    /// Byte offset into the text slab.
140    pub text_offset: u32,
141    /// Length in bytes of the text content in the slab.
142    pub text_len: u32,
143
144    // === Cold (second 32 bytes) ===
145    // Fields ordered to avoid implicit padding with repr(C):
146    // u32s first, then u16, then u8, then byte-array padding.
147    /// Byte offset into the attribute slab (after lazy parse).
148    pub attr_offset: u32,
149    /// Byte offset into `attr_str_slab` for the raw attribute region.
150    pub attr_raw_offset: u32,
151    /// 64-bit bloom filter of class attribute tokens.
152    ///
153    /// Each class token is hashed via FNV-1a and a single bit is set at
154    /// `hash % 64`. Zero means no class attribute. Used by the selector
155    /// matcher for fast rejection. 64 bits reduces false positive rate
156    /// from ~15% (5 classes) to ~8% compared to 32-bit.
157    pub class_hash: u64,
158    /// FNV-1a hash of the `id` attribute value.
159    ///
160    /// Zero means no id attribute. Used by the selector matcher for fast
161    /// rejection before scanning attributes.
162    pub id_hash: u32,
163    /// Length in bytes of the raw attribute region (max 65535).
164    pub attr_raw_len: u16,
165    /// 1-based index among element siblings (0 = not computed or text node).
166    pub element_index: u16,
167    /// Number of attributes (0 with raw data present = unparsed lazy).
168    pub attr_count: u8,
169    /// Padding to fill the cache line.
170    pub _padding: [u8; 7],
171}
172
173impl Node {
174    /// Create a new element node with all links set to NULL.
175    pub fn new_element(tag: Tag, depth: u16) -> Self {
176        let mut flags = NodeFlags::empty();
177        if tag.is_void() {
178            flags.set(NodeFlags::IS_VOID);
179        }
180        Self {
181            tag,
182            flags,
183            depth,
184            parent: NodeId::NULL,
185            first_child: NodeId::NULL,
186            next_sibling: NodeId::NULL,
187            last_child: NodeId::NULL,
188            prev_sibling: NodeId::NULL,
189            text_offset: 0,
190            text_len: 0,
191            attr_offset: 0,
192            attr_count: 0,
193            attr_raw_offset: 0,
194            attr_raw_len: 0,
195            class_hash: 0,
196            id_hash: 0,
197            element_index: 0,
198            _padding: [0; 7],
199        }
200    }
201
202    /// Create a new text node.
203    pub fn new_text(depth: u16, text_offset: u32, text_len: u32) -> Self {
204        let mut flags = NodeFlags::empty();
205        flags.set(NodeFlags::IS_TEXT);
206        Self {
207            tag: Tag::Unknown,
208            flags,
209            depth,
210            parent: NodeId::NULL,
211            first_child: NodeId::NULL,
212            next_sibling: NodeId::NULL,
213            last_child: NodeId::NULL,
214            prev_sibling: NodeId::NULL,
215            text_offset,
216            text_len,
217            attr_offset: 0,
218            attr_count: 0,
219            attr_raw_offset: 0,
220            attr_raw_len: 0,
221            class_hash: 0,
222            id_hash: 0,
223            element_index: 0,
224            _padding: [0; 7],
225        }
226    }
227
228    /// Create a new comment node.
229    pub fn new_comment(depth: u16, text_offset: u32, text_len: u32) -> Self {
230        let mut flags = NodeFlags::empty();
231        flags.set(NodeFlags::IS_COMMENT);
232        Self {
233            tag: Tag::Unknown,
234            flags,
235            depth,
236            parent: NodeId::NULL,
237            first_child: NodeId::NULL,
238            next_sibling: NodeId::NULL,
239            last_child: NodeId::NULL,
240            prev_sibling: NodeId::NULL,
241            text_offset,
242            text_len,
243            attr_offset: 0,
244            attr_count: 0,
245            attr_raw_offset: 0,
246            attr_raw_len: 0,
247            class_hash: 0,
248            id_hash: 0,
249            element_index: 0,
250            _padding: [0; 7],
251        }
252    }
253
254    /// Create a new doctype node.
255    pub fn new_doctype(depth: u16, text_offset: u32, text_len: u32) -> Self {
256        let mut flags = NodeFlags::empty();
257        flags.set(NodeFlags::IS_DOCTYPE);
258        Self {
259            tag: Tag::Unknown,
260            flags,
261            depth,
262            parent: NodeId::NULL,
263            first_child: NodeId::NULL,
264            next_sibling: NodeId::NULL,
265            last_child: NodeId::NULL,
266            prev_sibling: NodeId::NULL,
267            text_offset,
268            text_len,
269            attr_offset: 0,
270            attr_count: 0,
271            attr_raw_offset: 0,
272            attr_raw_len: 0,
273            class_hash: 0,
274            id_hash: 0,
275            element_index: 0,
276            _padding: [0; 7],
277        }
278    }
279}
280
281impl core::fmt::Debug for Node {
282    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
283        f.debug_struct("Node")
284            .field("tag", &self.tag)
285            .field("flags", &self.flags)
286            .field("depth", &self.depth)
287            .field("parent", &self.parent)
288            .field("first_child", &self.first_child)
289            .field("next_sibling", &self.next_sibling)
290            .finish()
291    }
292}
293
294#[cfg(test)]
295mod tests {
296    use super::*;
297
298    #[test]
299    fn node_is_64_bytes() {
300        assert_eq!(
301            std::mem::size_of::<Node>(),
302            64,
303            "Node must be exactly 64 bytes (one cache line)"
304        );
305    }
306
307    #[test]
308    fn node_alignment_is_64() {
309        assert_eq!(
310            std::mem::align_of::<Node>(),
311            64,
312            "Node must be 64-byte aligned"
313        );
314    }
315
316    #[test]
317    fn node_id_null() {
318        assert!(NodeId::NULL.is_null());
319        assert!(!NodeId(0).is_null());
320        assert!(!NodeId(42).is_null());
321    }
322
323    #[test]
324    fn node_id_debug() {
325        assert_eq!(format!("{:?}", NodeId::NULL), "NodeId(NULL)");
326        assert_eq!(format!("{:?}", NodeId(5)), "NodeId(5)");
327    }
328
329    #[test]
330    fn node_flags() {
331        let mut flags = NodeFlags::empty();
332        assert!(!flags.has(NodeFlags::IS_VOID));
333        assert!(!flags.has(NodeFlags::HAS_CHILDREN));
334
335        flags.set(NodeFlags::IS_VOID);
336        assert!(flags.has(NodeFlags::IS_VOID));
337        assert!(!flags.has(NodeFlags::HAS_CHILDREN));
338
339        flags.set(NodeFlags::HAS_CHILDREN);
340        assert!(flags.has(NodeFlags::IS_VOID));
341        assert!(flags.has(NodeFlags::HAS_CHILDREN));
342    }
343
344    #[test]
345    fn new_element_sets_void_flag() {
346        let br = Node::new_element(Tag::Br, 0);
347        assert!(br.flags.has(NodeFlags::IS_VOID));
348
349        let div = Node::new_element(Tag::Div, 0);
350        assert!(!div.flags.has(NodeFlags::IS_VOID));
351    }
352
353    #[test]
354    fn new_text_sets_text_flag() {
355        let text = Node::new_text(1, 0, 5);
356        assert!(text.flags.has(NodeFlags::IS_TEXT));
357        assert_eq!(text.text_offset, 0);
358        assert_eq!(text.text_len, 5);
359    }
360}