1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
//! TreeNodeData - Arena-allocated parse tree node data
//!
//! This module provides the core data structure for parse tree nodes that are
//! allocated in the arena. TreeNodeData is designed to be memory-efficient,
//! cache-friendly, and support all tree-sitter node capabilities.
//!
//! # Design
//!
//! - **Size**: ≤64 bytes for cache efficiency
//! - **SmallVec optimization**: 0-3 children inline, heap for larger
//! - **Handle-based**: Child references via NodeHandle (arena indirection)
//! - **Packed flags**: 8 boolean flags in 1 byte
//!
//! # Example
//!
//! ```
//! use adze::tree_node_data::TreeNodeData;
//! use adze::arena_allocator::NodeHandle;
//!
//! // Create a leaf node
//! let leaf = TreeNodeData::leaf(5, 0, 10);
//!
//! // Create a branch node
//! let children = vec![NodeHandle::new(0, 0), NodeHandle::new(0, 1)];
//! let branch = TreeNodeData::branch(10, 0, 50, children);
//! ```
//!
//! Related: docs/specs/TREE_NODE_DATA_SPEC.md
use crate::arena_allocator::NodeHandle;
use smallvec::SmallVec;
/// Arena-allocated parse tree node data
///
/// This struct represents the data for a single node in the parse tree.
/// It is designed to be allocated in the arena for efficient memory usage.
///
/// Size: 56 bytes (with 8 bytes padding to 64-byte alignment)
#[derive(Clone, Debug)]
pub struct TreeNodeData {
/// Symbol/kind ID from grammar
symbol: u16,
/// Byte range in source text
start_byte: u32,
end_byte: u32,
/// Child node handles
/// SmallVec optimizes for 0-3 children inline (no heap allocation)
/// Larger child counts spill to heap automatically
children: SmallVec<[NodeHandle; 3]>,
/// Number of named children (subset of children)
named_child_count: u16,
/// Field ID (if this node is a named field)
/// Uses niche optimization: None = 0xFFFF
field_id: Option<u16>,
/// Packed boolean flags
flags: NodeFlags,
}
/// Packed node flags (8 flags in 1 byte)
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
struct NodeFlags {
bits: u8,
}
impl NodeFlags {
const IS_NAMED: u8 = 1 << 0; // Is this a named node?
const IS_MISSING: u8 = 1 << 1; // Is this node missing (error recovery)?
const IS_ERROR: u8 = 1 << 2; // Is this an error node?
const IS_EXTRA: u8 = 1 << 3; // Is this an extra node?
#[allow(dead_code)] // Reserved for incremental parsing
const HAS_CHANGES: u8 = 1 << 4; // Has this node changed (incremental)?
// 3 bits reserved for future use
fn new() -> Self {
NodeFlags { bits: 0 }
}
fn get(&self, mask: u8) -> bool {
(self.bits & mask) != 0
}
fn set(&mut self, mask: u8, value: bool) {
if value {
self.bits |= mask;
} else {
self.bits &= !mask;
}
}
}
impl TreeNodeData {
/// Create a new node with symbol and byte range
///
/// # Example
///
/// ```
/// use adze::tree_node_data::TreeNodeData;
///
/// let node = TreeNodeData::new(42, 0, 10);
/// assert_eq!(node.symbol(), 42);
/// assert_eq!(node.byte_range(), (0, 10));
/// ```
pub fn new(symbol: u16, start_byte: u32, end_byte: u32) -> Self {
TreeNodeData {
symbol,
start_byte,
end_byte,
children: SmallVec::new(),
named_child_count: 0,
field_id: None,
flags: NodeFlags::new(),
}
}
/// Create a leaf node (no children)
///
/// This is equivalent to `new()` but makes intent clearer.
///
/// # Example
///
/// ```
/// use adze::tree_node_data::TreeNodeData;
///
/// let leaf = TreeNodeData::leaf(5, 10, 20);
/// assert!(leaf.is_leaf());
/// ```
pub fn leaf(symbol: u16, start_byte: u32, end_byte: u32) -> Self {
Self::new(symbol, start_byte, end_byte)
}
/// Create a branch node with children
///
/// # Example
///
/// ```
/// use adze::tree_node_data::TreeNodeData;
/// use adze::arena_allocator::NodeHandle;
///
/// let children = vec![NodeHandle::new(0, 0), NodeHandle::new(0, 1)];
/// let branch = TreeNodeData::branch(10, 0, 50, children);
/// assert!(!branch.is_leaf());
/// assert_eq!(branch.child_count(), 2);
/// ```
pub fn branch(
symbol: u16,
start_byte: u32,
end_byte: u32,
children: impl IntoIterator<Item = NodeHandle>,
) -> Self {
let children_vec: SmallVec<[NodeHandle; 3]> = children.into_iter().collect();
TreeNodeData {
symbol,
start_byte,
end_byte,
children: children_vec,
named_child_count: 0,
field_id: None,
flags: NodeFlags::new(),
}
}
// ========================================================================
// Basic Accessors
// ========================================================================
/// Get the symbol/kind ID
pub fn symbol(&self) -> u16 {
self.symbol
}
/// Get start byte position
pub fn start_byte(&self) -> u32 {
self.start_byte
}
/// Get end byte position
pub fn end_byte(&self) -> u32 {
self.end_byte
}
/// Get byte range (start, end)
pub fn byte_range(&self) -> (u32, u32) {
(self.start_byte, self.end_byte)
}
/// Get text length in bytes
pub fn byte_len(&self) -> u32 {
self.end_byte.saturating_sub(self.start_byte)
}
// ========================================================================
// Children
// ========================================================================
/// Get child count
pub fn child_count(&self) -> usize {
self.children.len()
}
/// Get named child count
pub fn named_child_count(&self) -> usize {
self.named_child_count as usize
}
/// Check if node has children
pub fn is_leaf(&self) -> bool {
self.children.is_empty()
}
/// Get child by index
///
/// Returns `None` if index is out of bounds.
pub fn child(&self, index: usize) -> Option<NodeHandle> {
self.children.get(index).copied()
}
/// Get all children as a slice
pub fn children(&self) -> &[NodeHandle] {
&self.children
}
/// Add a child node
///
/// This adds an unnamed child to the node.
pub fn add_child(&mut self, child: NodeHandle) {
self.children.push(child);
}
/// Add a named child node
///
/// This adds a named child and increments the named child count.
pub fn add_named_child(&mut self, child: NodeHandle) {
self.children.push(child);
self.named_child_count = self.named_child_count.saturating_add(1);
}
// ========================================================================
// Flags
// ========================================================================
/// Check if node is named
pub fn is_named(&self) -> bool {
self.flags.get(NodeFlags::IS_NAMED)
}
/// Set named flag
pub fn set_named(&mut self, value: bool) {
self.flags.set(NodeFlags::IS_NAMED, value);
}
/// Check if node is error
pub fn is_error(&self) -> bool {
self.flags.get(NodeFlags::IS_ERROR)
}
/// Set error flag
pub fn set_error(&mut self, value: bool) {
self.flags.set(NodeFlags::IS_ERROR, value);
}
/// Check if node is missing
pub fn is_missing(&self) -> bool {
self.flags.get(NodeFlags::IS_MISSING)
}
/// Set missing flag
pub fn set_missing(&mut self, value: bool) {
self.flags.set(NodeFlags::IS_MISSING, value);
}
/// Check if node is extra
pub fn is_extra(&self) -> bool {
self.flags.get(NodeFlags::IS_EXTRA)
}
/// Set extra flag
pub fn set_extra(&mut self, value: bool) {
self.flags.set(NodeFlags::IS_EXTRA, value);
}
// ========================================================================
// Fields
// ========================================================================
/// Get field ID
///
/// Returns `None` if this node is not a named field.
pub fn field_id(&self) -> Option<u16> {
self.field_id
}
/// Set field ID
pub fn set_field_id(&mut self, id: Option<u16>) {
self.field_id = id;
}
}
#[cfg(test)]
mod tests {
use super::*;
#[cfg(not(debug_assertions))]
macro_rules! debug_trace {
($($arg:tt)*) => {};
}
#[cfg(debug_assertions)]
macro_rules! debug_trace {
($($arg:tt)*) => {
if std::env::var("RUST_LOG")
.ok()
.unwrap_or_default()
.contains("debug")
{
println!($($arg)*);
}
};
}
#[test]
fn test_size_constraint() {
use std::mem;
let size = mem::size_of::<TreeNodeData>();
assert!(
size <= 64,
"TreeNodeData is {} bytes, exceeds 64-byte target",
size
);
debug_trace!("TreeNodeData size: {} bytes", size);
debug_trace!("NodeHandle size: {} bytes", mem::size_of::<NodeHandle>());
debug_trace!(
"SmallVec size: {} bytes",
mem::size_of::<SmallVec<[NodeHandle; 3]>>()
);
}
#[test]
fn test_flags_size() {
use std::mem;
assert_eq!(mem::size_of::<NodeFlags>(), 1);
}
}