oak_core/memory/
arena.rs

1//! Specialized high-performance bump allocator for AST nodes.
2//!
3//! This module implements a `SyntaxArena`, which is a bump allocator optimized for
4//! short-lived, POD (Plain Old Data) objects like AST nodes. It minimizes allocation
5//! overhead by using a chunk-based strategy and a thread-local pool for recycling memory.
6//!
7//! ### Architecture & Memory Layout
8//!
9//! The arena allocates memory in "chunks" (defaulting to 64KB).
10//! - **Current Chunk:** The active chunk where new allocations are "bumped" from a pointer.
11//! - **Full Chunks:** A list of previously exhausted chunks that will be recycled together.
12//! - **Chunk Pool:** A thread-local storage that keeps up to 64 standard-sized chunks to
13//!   avoid hitting the global allocator for every new arena.
14//!
15//! ### Design Rationale: Why not `bumpalo`?
16//!
17//! While general-purpose bump allocators like `bumpalo` are excellent, they are designed to
18//! handle arbitrary alignment, `Drop` checking, and varied allocation patterns.
19//! Our `SyntaxArena` is intentionally specialized for AST nodes with the following advantages:
20//!
21//! 1. **Zero-Drop Enforcement:** We assume all allocated types are POD and do not require
22//!    dropping. This eliminates the need for a "drop list" or any drop-tracking overhead.
23//! 2. **Fixed Alignment:** Optimized for a constant 8-byte alignment (sufficient for 64-bit
24//!    pointers and primitives), simplifying pointer arithmetic and reducing branching.
25//! 3. **Thread-Local Pooling:** We reuse memory chunks via a thread-local pool (`CHUNK_POOL`)
26//!    to minimize the overhead of hitting the global allocator between parser runs.
27//! 4. **Minimal Overhead:** Focused purely on the needs of our high-performance parser by
28//!    stripping away features not required for AST construction.
29//!
30//! ### Safety Invariants
31//!
32//! - All types allocated in the arena **must not** implement `Drop` (or their `Drop`
33//!   implementation will be ignored).
34//! - The arena is not `Sync` or `Send` in a way that allows cross-thread allocation,
35//!   though the underlying chunks are managed safely via thread-local storage.
36//! - Memory is only reclaimed when the entire `SyntaxArena` is dropped.
37use crate::tree::TokenProvenance;
38use std::{
39    alloc::{Layout, alloc, dealloc},
40    cell::{RefCell, UnsafeCell},
41    ptr::{NonNull, copy_nonoverlapping},
42};
43
44/// Default chunk size: 64KB.
45/// Large enough to amortize the cost of system-level allocations, yet small enough to be
46/// L2-cache friendly and avoid excessive internal fragmentation for small templates.
47const CHUNK_SIZE: usize = 64 * 1024;
48
49/// Alignment for all allocations: 8 bytes.
50/// This covers all standard Rust primitives (u64, f64, pointers) on 64-bit architectures.
51const ALIGN: usize = 8;
52
53// A thread-local pool of memory chunks to avoid hitting the global allocator.
54//
55// **Memory Safety & Leak Prevention:**
56// - **Bounded Capacity:** The pool is limited to 64 chunks (4MB) per thread to prevent
57//   unbounded memory growth in long-running processes.
58// - **Large Allocations:** Chunks larger than `CHUNK_SIZE` (64KB) are not pooled and
59//   are returned directly to the global allocator upon drop.
60// - **Automatic Cleanup:** All chunks are either recycled into this pool or freed when
61//   the `SyntaxArena` is dropped.
62thread_local! {
63    static CHUNK_POOL: RefCell<Vec<NonNull<u8>>> = RefCell::new(Vec::with_capacity(16))
64}
65
66/// A high-performance bump allocator optimized for AST nodes.
67///
68/// The arena works by "bumping" a pointer within a pre-allocated chunk of memory.
69/// When a chunk is exhausted, a new one is requested from the thread-local pool
70/// or the global allocator.
71pub struct SyntaxArena {
72    /// Pointer to the next free byte in the current chunk.
73    /// Always kept 8-byte aligned.
74    ptr: UnsafeCell<NonNull<u8>>,
75    /// Pointer to the end of the current chunk (exclusive).
76    end: UnsafeCell<NonNull<u8>>,
77    /// List of full chunks allocated by this arena (excluding the current one).
78    /// Stored as (start_pointer, total_size).
79    full_chunks: UnsafeCell<Vec<(NonNull<u8>, usize)>>,
80    /// The start pointer of the current chunk (used for recycling/freeing).
81    current_chunk_start: UnsafeCell<NonNull<u8>>,
82    /// Store for token provenance metadata.
83    metadata: UnsafeCell<Vec<TokenProvenance>>,
84}
85
86impl SyntaxArena {
87    /// Creates a new empty arena.
88    ///
89    /// Initial pointers are set to dangling. The first allocation will trigger
90    /// a chunk allocation.
91    pub fn new(capacity: usize) -> Self {
92        // Use a pointer aligned to ALIGN even for the dangling state to satisfy debug assertions.
93        let dangling = unsafe { NonNull::new_unchecked(ALIGN as *mut u8) };
94        Self { ptr: UnsafeCell::new(dangling), end: UnsafeCell::new(dangling), full_chunks: UnsafeCell::new(Vec::with_capacity(capacity)), current_chunk_start: UnsafeCell::new(NonNull::dangling()), metadata: UnsafeCell::new(Vec::new()) }
95    }
96
97    /// Stores a token provenance in the arena and returns its index.
98    pub fn add_metadata(&self, provenance: TokenProvenance) -> std::num::NonZeroU32 {
99        let metadata = unsafe { &mut *self.metadata.get() };
100        metadata.push(provenance);
101        std::num::NonZeroU32::new(metadata.len() as u32).expect("Metadata index overflow")
102    }
103
104    /// Retrieves a token provenance by index.
105    pub fn get_metadata(&self, index: std::num::NonZeroU32) -> Option<&TokenProvenance> {
106        let metadata = unsafe { &*self.metadata.get() };
107        metadata.get(index.get() as usize - 1)
108    }
109
110    /// Allocates a value of type `T` in the arena and moves `value` into it.
111    ///
112    /// # Safety
113    ///
114    /// The caller must ensure that `T` is a POD (Plain Old Data) type.
115    /// The `Drop` implementation for `T` (if any) will **not** be called when
116    /// the arena is dropped.
117    ///
118    /// # Panics
119    ///
120    /// Panics if the allocation fails (OOM).
121    #[inline(always)]
122    pub fn alloc<T>(&self, value: T) -> &mut T {
123        let layout = Layout::new::<T>();
124        // Ensure the type's alignment requirement is within our 8-byte guarantee.
125        debug_assert!(layout.align() <= ALIGN);
126
127        unsafe {
128            let ptr = self.alloc_raw(layout.size());
129            let ptr = ptr.as_ptr() as *mut T;
130            // Write the value into the allocated space.
131            ptr.write(value);
132            &mut *ptr
133        }
134    }
135
136    /// Allocates a slice in the arena and copies the contents of `slice` into it.
137    ///
138    /// This is useful for storing strings or other contiguous data in the arena.
139    ///
140    /// # Safety
141    ///
142    /// Same as `alloc`, `T` must be `Copy` (and thus POD).
143    #[inline(always)]
144    pub fn alloc_slice_copy<T: Copy>(&self, slice: &[T]) -> &mut [T] {
145        if slice.is_empty() {
146            return &mut [];
147        }
148        let layout = Layout::for_value(slice);
149        debug_assert!(layout.align() <= ALIGN);
150
151        unsafe {
152            let ptr = self.alloc_raw(layout.size());
153            let ptr = ptr.as_ptr() as *mut T;
154            copy_nonoverlapping(slice.as_ptr(), ptr, slice.len());
155            std::slice::from_raw_parts_mut(ptr, slice.len())
156        }
157    }
158
159    /// Allocates a slice in the arena and fills it using an iterator.
160    ///
161    /// This is more efficient than collecting into a temporary `Vec` and then copying,
162    /// as it writes directly into the arena memory.
163    ///
164    /// # Safety
165    ///
166    /// The iterator must yield exactly `count` items. If it yields fewer, the remaining
167    /// memory will be uninitialized (UB if accessed). If it yields more, the extra
168    /// items are ignored. `T` must be POD.
169    #[inline(always)]
170    pub fn alloc_slice_fill_iter<T, I>(&self, count: usize, iter: I) -> &mut [T]
171    where
172        I: IntoIterator<Item = T>,
173    {
174        if count == 0 {
175            return &mut [];
176        }
177        let layout = Layout::array::<T>(count).unwrap();
178        debug_assert!(layout.align() <= ALIGN);
179
180        unsafe {
181            let ptr = self.alloc_raw(layout.size());
182            let base_ptr = ptr.as_ptr() as *mut T;
183
184            let mut i = 0;
185            for item in iter {
186                if i >= count {
187                    break;
188                }
189                base_ptr.add(i).write(item);
190                i += 1
191            }
192
193            // In a production-ready system, we should handle the case where iter is short.
194            // But for our internal use in deep_clone, we know the count is exact.
195            debug_assert_eq!(i, count, "Iterator yielded fewer items than expected");
196
197            std::slice::from_raw_parts_mut(base_ptr, count)
198        }
199    }
200
201    /// Internal raw allocation logic.
202    ///
203    /// Attempts to allocate `size` bytes from the current chunk.
204    /// If there is not enough space, it falls back to `alloc_slow`.
205    ///
206    /// # Safety
207    ///
208    /// `size` must be non-zero. The returned pointer is guaranteed to be 8-byte aligned.
209    #[inline(always)]
210    unsafe fn alloc_raw(&self, size: usize) -> NonNull<u8> {
211        // Unsafe block to wrap unsafe ops
212        unsafe {
213            let ptr = *self.ptr.get();
214            let end = *self.end.get();
215
216            // Calculate aligned pointer. Since we always maintain ALIGN (8) byte alignment
217            // for `self.ptr`, we only need to add the size and check against `end`.
218            let current_addr = ptr.as_ptr() as usize;
219
220            // Safety check: ensure the pointer is indeed aligned as we expect.
221            debug_assert!(current_addr % ALIGN == 0);
222
223            // We add `size` and then align up the result for the NEXT allocation.
224            let next_addr = (current_addr + size + ALIGN - 1) & !(ALIGN - 1);
225
226            if std::intrinsics::likely(next_addr <= end.as_ptr() as usize) {
227                *self.ptr.get() = NonNull::new_unchecked(next_addr as *mut u8);
228                return ptr;
229            }
230
231            self.alloc_slow(size)
232        }
233    }
234
235    /// Slow path for allocation when the current chunk is exhausted.
236    ///
237    /// 1. Pushes the current chunk to `full_chunks`.
238    /// 2. Allocates a new chunk (either standard 64KB or larger if `size` requires it).
239    /// 3. Sets the new chunk as the current one.
240    #[inline(never)]
241    unsafe fn alloc_slow(&self, size: usize) -> NonNull<u8> {
242        unsafe {
243            // Retire current chunk if it exists.
244            let current_start = *self.current_chunk_start.get();
245            if current_start != NonNull::dangling() {
246                // We record the full size of the chunk so it can be correctly recycled.
247                // Note: for now we assume chunks are either CHUNK_SIZE or specially sized.
248                let current_end = (*self.end.get()).as_ptr() as usize;
249                let actual_size = current_end - current_start.as_ptr() as usize;
250                (*self.full_chunks.get()).push((current_start, actual_size))
251            }
252
253            // Allocate new chunk.
254            // If request is huge (> CHUNK_SIZE), we allocate a larger chunk specifically for it.
255            // These "huge chunks" are NOT recycled into the pool to avoid wasting space.
256            let alloc_size = usize::max(size + ALIGN, CHUNK_SIZE);
257
258            let chunk_ptr = Self::alloc_chunk(alloc_size);
259
260            *self.current_chunk_start.get() = chunk_ptr;
261
262            let start_addr = chunk_ptr.as_ptr() as usize;
263            // Resulting pointer is the start of the new chunk.
264            let result_ptr = NonNull::new_unchecked(start_addr as *mut u8);
265
266            // Calculate the next free pointer, aligned to ALIGN.
267            let next_free = (start_addr + size + ALIGN - 1) & !(ALIGN - 1);
268
269            *self.ptr.get() = NonNull::new_unchecked(next_free as *mut u8);
270            *self.end.get() = NonNull::new_unchecked((start_addr + alloc_size) as *mut u8);
271
272            result_ptr
273        }
274    }
275
276    /// Allocates a new memory chunk from the thread-local pool or global allocator.
277    unsafe fn alloc_chunk(size: usize) -> NonNull<u8> {
278        // Try to get from pool if size matches the standard chunk size.
279        if size == CHUNK_SIZE {
280            let ptr = CHUNK_POOL.try_with(|pool| pool.borrow_mut().pop());
281
282            if let Ok(Some(ptr)) = ptr {
283                return ptr;
284            }
285        }
286
287        let layout = Layout::from_size_align(size, ALIGN).unwrap();
288        // unsafe block for alloc
289        unsafe {
290            let ptr = alloc(layout);
291            if ptr.is_null() {
292                std::alloc::handle_alloc_error(layout)
293            }
294            NonNull::new_unchecked(ptr)
295        }
296    }
297}
298
299impl Drop for SyntaxArena {
300    /// Drops the arena, recycling all its chunks back to the thread-local pool or freeing them.
301    fn drop(&mut self) {
302        unsafe {
303            // Recycle the current chunk.
304            let current = *self.current_chunk_start.get();
305            if current != NonNull::dangling() {
306                let current_end = (*self.end.get()).as_ptr() as usize;
307                let actual_size = current_end - current.as_ptr() as usize;
308                Self::recycle_chunk(current, actual_size)
309            }
310
311            // Recycle all full chunks.
312            for (ptr, size) in (*self.full_chunks.get()).iter() {
313                Self::recycle_chunk(*ptr, *size)
314            }
315        }
316    }
317}
318
319impl SyntaxArena {
320    /// Returns a chunk to the thread-local pool or deallocates it if the pool is full.
321    ///
322    /// # Safety
323    ///
324    /// `ptr` must have been allocated with `ALIGN` and its size must be `size`.
325    unsafe fn recycle_chunk(ptr: NonNull<u8>, size: usize) {
326        if size == CHUNK_SIZE {
327            // Only pool standard-sized chunks to maintain predictability.
328            let _ = CHUNK_POOL.try_with(|pool| {
329                let mut pool = pool.borrow_mut();
330                if pool.len() < 64 {
331                    // Hard limit to prevent memory bloating per thread.
332                    pool.push(ptr)
333                }
334            });
335            // If try_with fails (e.g. during thread destruction), we just leak or dealloc?
336            // Since we can't access pool, we should dealloc.
337            // But try_with error usually means TLS is gone.
338            // We can check error kind. For simplicity, we just fallback to dealloc if pool is unreachable.
339            return;
340        }
341        // If not pooled (either because it's a huge chunk or the pool is full/unreachable), deallocate immediately.
342        let layout = Layout::from_size_align(size, ALIGN).unwrap();
343        unsafe { dealloc(ptr.as_ptr(), layout) }
344    }
345}
346
347unsafe impl Send for SyntaxArena {}
348unsafe impl Sync for SyntaxArena {}
349
350impl Default for SyntaxArena {
351    fn default() -> Self {
352        Self::new(16)
353    }
354}
oak_core/memory/arena.rs

oak_core/memory/
arena.rs