oak_core/memory/
arena.rs

1//! Specialized high-performance bump allocator for AST nodes.
2//!
3//! This module implements a `SyntaxArena`, which is a bump allocator optimized for
4//! short-lived, POD (Plain Old Data) objects like AST nodes. It minimizes allocation
5//! overhead by using a chunk-based strategy and a thread-local pool for recycling memory.
6//!
7//! ### Architecture & Memory Layout
8//!
9//! The arena allocates memory in "chunks" (defaulting to 64KB).
10//! - **Current Chunk:** The active chunk where new allocations are "bumped" from a pointer.
11//! - **Full Chunks:** A list of previously exhausted chunks that will be recycled together.
12//! - **Chunk Pool:** A thread-local storage that keeps up to 64 standard-sized chunks to
13//!   avoid hitting the global allocator for every new arena.
14//!
15//! ### Design Rationale: Why not `bumpalo`?
16//!
17//! While general-purpose bump allocators like `bumpalo` are excellent, they are designed to
18//! handle arbitrary alignment, `Drop` checking, and varied allocation patterns.
19//! Our `SyntaxArena` is intentionally specialized for AST nodes with the following advantages:
20//!
21//! 1. **Zero-Drop Enforcement:** We assume all allocated types are POD and do not require
22//!    dropping. This eliminates the need for a "drop list" or any drop-tracking overhead.
23//! 2. **Fixed Alignment:** Optimized for a constant 8-byte alignment (sufficient for 64-bit
24//!    pointers and primitives), simplifying pointer arithmetic and reducing branching.
25//! 3. **Thread-Local Pooling:** We reuse memory chunks via a thread-local pool (`CHUNK_POOL`)
26//!    to minimize the overhead of hitting the global allocator between parser runs.
27//! 4. **Minimal Overhead:** Focused purely on the needs of our high-performance parser by
28//!    stripping away features not required for AST construction.
29//!
30//! ### Safety Invariants
31//!
32//! - All types allocated in the arena **must not** implement `Drop` (or their `Drop`
33//!   implementation will be ignored).
34//! - The arena is not `Sync` or `Send` in a way that allows cross-thread allocation,
35//!   though the underlying chunks are managed safely via thread-local storage.
36//! - Memory is only reclaimed when the entire `SyntaxArena` is dropped.
37use std::{
38    alloc::{Layout, alloc, dealloc},
39    cell::{RefCell, UnsafeCell},
40    ptr::{NonNull, copy_nonoverlapping},
41};
42
43/// Default chunk size: 64KB.
44/// Large enough to amortize the cost of system-level allocations, yet small enough to be
45/// L2-cache friendly and avoid excessive internal fragmentation for small templates.
46const CHUNK_SIZE: usize = 64 * 1024;
47
48/// Alignment for all allocations: 8 bytes.
49/// This covers all standard Rust primitives (u64, f64, pointers) on 64-bit architectures.
50const ALIGN: usize = 8;
51
52// A thread-local pool of memory chunks to avoid hitting the global allocator.
53//
54// **Memory Safety & Leak Prevention:**
55// - **Bounded Capacity:** The pool is limited to 64 chunks (4MB) per thread to prevent
56//   unbounded memory growth in long-running processes.
57// - **Large Allocations:** Chunks larger than `CHUNK_SIZE` (64KB) are not pooled and
58//   are returned directly to the global allocator upon drop.
59// - **Automatic Cleanup:** All chunks are either recycled into this pool or freed when
60//   the `SyntaxArena` is dropped.
61thread_local! {
62    static CHUNK_POOL: RefCell<Vec<NonNull<u8>>> = RefCell::new(Vec::with_capacity(16));
63}
64
65/// A high-performance bump allocator optimized for AST nodes.
66///
67/// The arena works by "bumping" a pointer within a pre-allocated chunk of memory.
68/// When a chunk is exhausted, a new one is requested from the thread-local pool
69/// or the global allocator.
70pub struct SyntaxArena {
71    /// Pointer to the next free byte in the current chunk.
72    /// Always kept 8-byte aligned.
73    ptr: UnsafeCell<NonNull<u8>>,
74    /// Pointer to the end of the current chunk (exclusive).
75    end: UnsafeCell<NonNull<u8>>,
76    /// List of full chunks allocated by this arena (excluding the current one).
77    /// Stored as (start_pointer, total_size).
78    full_chunks: UnsafeCell<Vec<(NonNull<u8>, usize)>>,
79    /// The start pointer of the current chunk (used for recycling/freeing).
80    current_chunk_start: UnsafeCell<NonNull<u8>>,
81}
82
83impl SyntaxArena {
84    /// Creates a new empty arena.
85    ///
86    /// Initial pointers are set to dangling. The first allocation will trigger
87    /// a chunk allocation.
88    pub fn new(capacity: usize) -> Self {
89        // Use a pointer aligned to ALIGN even for the dangling state to satisfy debug assertions.
90        let dangling = unsafe { NonNull::new_unchecked(ALIGN as *mut u8) };
91        Self { ptr: UnsafeCell::new(dangling), end: UnsafeCell::new(dangling), full_chunks: UnsafeCell::new(Vec::with_capacity(capacity)), current_chunk_start: UnsafeCell::new(NonNull::dangling()) }
92    }
93
94    /// Allocates a value of type `T` in the arena and moves `value` into it.
95    ///
96    /// # Safety
97    ///
98    /// The caller must ensure that `T` is a POD (Plain Old Data) type.
99    /// The `Drop` implementation for `T` (if any) will **not** be called when
100    /// the arena is dropped.
101    ///
102    /// # Panics
103    ///
104    /// Panics if the allocation fails (OOM).
105    #[inline(always)]
106    pub fn alloc<T>(&self, value: T) -> &mut T {
107        let layout = Layout::new::<T>();
108        // Ensure the type's alignment requirement is within our 8-byte guarantee.
109        debug_assert!(layout.align() <= ALIGN);
110
111        unsafe {
112            let ptr = self.alloc_raw(layout.size());
113            let ptr = ptr.as_ptr() as *mut T;
114            // Write the value into the allocated space.
115            ptr.write(value);
116            &mut *ptr
117        }
118    }
119
120    /// Allocates a slice in the arena and copies the contents of `slice` into it.
121    ///
122    /// This is useful for storing strings or other contiguous data in the arena.
123    ///
124    /// # Safety
125    ///
126    /// Same as `alloc`, `T` must be `Copy` (and thus POD).
127    #[inline(always)]
128    pub fn alloc_slice_copy<T: Copy>(&self, slice: &[T]) -> &mut [T] {
129        if slice.is_empty() {
130            return &mut [];
131        }
132        let layout = Layout::for_value(slice);
133        debug_assert!(layout.align() <= ALIGN);
134
135        unsafe {
136            let ptr = self.alloc_raw(layout.size());
137            let ptr = ptr.as_ptr() as *mut T;
138            copy_nonoverlapping(slice.as_ptr(), ptr, slice.len());
139            std::slice::from_raw_parts_mut(ptr, slice.len())
140        }
141    }
142
143    /// Allocates a slice in the arena and fills it using an iterator.
144    ///
145    /// This is more efficient than collecting into a temporary `Vec` and then copying,
146    /// as it writes directly into the arena memory.
147    ///
148    /// # Safety
149    ///
150    /// The iterator must yield exactly `count` items. If it yields fewer, the remaining
151    /// memory will be uninitialized (UB if accessed). If it yields more, the extra
152    /// items are ignored. `T` must be POD.
153    #[inline(always)]
154    pub fn alloc_slice_fill_iter<T, I>(&self, count: usize, iter: I) -> &mut [T]
155    where
156        I: IntoIterator<Item = T>,
157    {
158        if count == 0 {
159            return &mut [];
160        }
161        let layout = Layout::array::<T>(count).unwrap();
162        debug_assert!(layout.align() <= ALIGN);
163
164        unsafe {
165            let ptr = self.alloc_raw(layout.size());
166            let base_ptr = ptr.as_ptr() as *mut T;
167
168            let mut i = 0;
169            for item in iter {
170                if i >= count {
171                    break;
172                }
173                base_ptr.add(i).write(item);
174                i += 1;
175            }
176
177            // In a production-ready system, we should handle the case where iter is short.
178            // But for our internal use in deep_clone, we know the count is exact.
179            debug_assert_eq!(i, count, "Iterator yielded fewer items than expected");
180
181            std::slice::from_raw_parts_mut(base_ptr, count)
182        }
183    }
184
185    /// Internal raw allocation logic.
186    ///
187    /// Attempts to allocate `size` bytes from the current chunk.
188    /// If there is not enough space, it falls back to `alloc_slow`.
189    ///
190    /// # Safety
191    ///
192    /// `size` must be non-zero. The returned pointer is guaranteed to be 8-byte aligned.
193    #[inline(always)]
194    unsafe fn alloc_raw(&self, size: usize) -> NonNull<u8> {
195        // Unsafe block to wrap unsafe ops
196        unsafe {
197            let ptr = *self.ptr.get();
198            let end = *self.end.get();
199
200            // Calculate aligned pointer. Since we always maintain ALIGN (8) byte alignment
201            // for `self.ptr`, we only need to add the size and check against `end`.
202            let current_addr = ptr.as_ptr() as usize;
203
204            // Safety check: ensure the pointer is indeed aligned as we expect.
205            debug_assert!(current_addr % ALIGN == 0);
206
207            // We add `size` and then align up the result for the NEXT allocation.
208            let next_addr = (current_addr + size + ALIGN - 1) & !(ALIGN - 1);
209
210            if std::intrinsics::likely(next_addr <= end.as_ptr() as usize) {
211                *self.ptr.get() = NonNull::new_unchecked(next_addr as *mut u8);
212                return ptr;
213            }
214
215            self.alloc_slow(size)
216        }
217    }
218
219    /// Slow path for allocation when the current chunk is exhausted.
220    ///
221    /// 1. Pushes the current chunk to `full_chunks`.
222    /// 2. Allocates a new chunk (either standard 64KB or larger if `size` requires it).
223    /// 3. Sets the new chunk as the current one.
224    #[inline(never)]
225    unsafe fn alloc_slow(&self, size: usize) -> NonNull<u8> {
226        unsafe {
227            // Retire current chunk if it exists.
228            let current_start = *self.current_chunk_start.get();
229            if current_start != NonNull::dangling() {
230                // We record the full size of the chunk so it can be correctly recycled.
231                // Note: for now we assume chunks are either CHUNK_SIZE or specially sized.
232                let current_end = (*self.end.get()).as_ptr() as usize;
233                let actual_size = current_end - current_start.as_ptr() as usize;
234                (*self.full_chunks.get()).push((current_start, actual_size));
235            }
236
237            // Allocate new chunk.
238            // If request is huge (> CHUNK_SIZE), we allocate a larger chunk specifically for it.
239            // These "huge chunks" are NOT recycled into the pool to avoid wasting space.
240            let alloc_size = usize::max(size + ALIGN, CHUNK_SIZE);
241
242            let chunk_ptr = Self::alloc_chunk(alloc_size);
243
244            *self.current_chunk_start.get() = chunk_ptr;
245
246            let start_addr = chunk_ptr.as_ptr() as usize;
247            // Resulting pointer is the start of the new chunk.
248            let result_ptr = NonNull::new_unchecked(start_addr as *mut u8);
249
250            // Calculate the next free pointer, aligned to ALIGN.
251            let next_free = (start_addr + size + ALIGN - 1) & !(ALIGN - 1);
252
253            *self.ptr.get() = NonNull::new_unchecked(next_free as *mut u8);
254            *self.end.get() = NonNull::new_unchecked((start_addr + alloc_size) as *mut u8);
255
256            result_ptr
257        }
258    }
259
260    /// Allocates a new memory chunk from the thread-local pool or global allocator.
261    unsafe fn alloc_chunk(size: usize) -> NonNull<u8> {
262        // Try to get from pool if size matches the standard chunk size.
263        if size == CHUNK_SIZE {
264            let ptr = CHUNK_POOL.try_with(|pool| pool.borrow_mut().pop());
265
266            if let Ok(Some(ptr)) = ptr {
267                return ptr;
268            }
269        }
270
271        let layout = Layout::from_size_align(size, ALIGN).unwrap();
272        // unsafe block for alloc
273        unsafe {
274            let ptr = alloc(layout);
275            if ptr.is_null() {
276                std::alloc::handle_alloc_error(layout);
277            }
278            NonNull::new_unchecked(ptr)
279        }
280    }
281}
282
283impl Drop for SyntaxArena {
284    /// Drops the arena, recycling all its chunks back to the thread-local pool or freeing them.
285    fn drop(&mut self) {
286        unsafe {
287            // Recycle the current chunk.
288            let current = *self.current_chunk_start.get();
289            if current != NonNull::dangling() {
290                let current_end = (*self.end.get()).as_ptr() as usize;
291                let actual_size = current_end - current.as_ptr() as usize;
292                Self::recycle_chunk(current, actual_size);
293            }
294
295            // Recycle all full chunks.
296            for (ptr, size) in (*self.full_chunks.get()).iter() {
297                Self::recycle_chunk(*ptr, *size);
298            }
299        }
300    }
301}
302
303impl SyntaxArena {
304    /// Returns a chunk to the thread-local pool or deallocates it if the pool is full.
305    ///
306    /// # Safety
307    ///
308    /// `ptr` must have been allocated with `ALIGN` and its size must be `size`.
309    unsafe fn recycle_chunk(ptr: NonNull<u8>, size: usize) {
310        if size == CHUNK_SIZE {
311            // Only pool standard-sized chunks to maintain predictability.
312            let _ = CHUNK_POOL.try_with(|pool| {
313                let mut pool = pool.borrow_mut();
314                if pool.len() < 64 {
315                    // Hard limit to prevent memory bloating per thread.
316                    pool.push(ptr);
317                }
318            });
319            // If try_with fails (e.g. during thread destruction), we just leak or dealloc?
320            // Since we can't access pool, we should dealloc.
321            // But try_with error usually means TLS is gone.
322            // We can check error kind. For simplicity, we just fallback to dealloc if pool is unreachable.
323            return;
324        }
325        // If not pooled (either because it's a huge chunk or the pool is full/unreachable), deallocate immediately.
326        let layout = Layout::from_size_align(size, ALIGN).unwrap();
327        unsafe {
328            dealloc(ptr.as_ptr(), layout);
329        }
330    }
331}
332
333unsafe impl Send for SyntaxArena {}
334unsafe impl Sync for SyntaxArena {}
335
336impl Default for SyntaxArena {
337    fn default() -> Self {
338        Self::new(16)
339    }
340}