Skip to main content

toml_spanner/
arena.rs

1use std::alloc::Layout;
2use std::cell::Cell;
3use std::ptr::{self, NonNull};
4
5const HEADER_SIZE: usize = std::mem::size_of::<SlabHeader>();
6const INITIAL_SLAB_SIZE: usize = 4096;
7const ALLOC_ALIGN: usize = 8;
8const SLAB_ALIGN: usize = if std::mem::align_of::<SlabHeader>() >= ALLOC_ALIGN {
9    std::mem::align_of::<SlabHeader>()
10} else {
11    ALLOC_ALIGN
12};
13
14const _: () = assert!(SLAB_ALIGN >= ALLOC_ALIGN);
15const _: () = assert!(HEADER_SIZE.is_multiple_of(ALLOC_ALIGN));
16
17#[repr(C)]
18struct SlabHeader {
19    prev: Option<NonNull<SlabHeader>>,
20    size: usize,
21}
22
23// SAFETY: The only `SlabHeader` that needs `Sync` is the static `EMPTY_SLAB`
24// sentinel. It is immutable (prev=None, size=0) and never written through the
25// pointer. Heap-allocated SlabHeaders are only reachable through `Arena`, which
26// is `!Sync` (contains `Cell`), so they are never shared across threads.
27unsafe impl Sync for SlabHeader {}
28
29static EMPTY_SLAB: SlabHeader = SlabHeader {
30    prev: None,
31    size: 0,
32};
33
34/// Bump allocator backing all data structures produced by the parser.
35///
36/// Create an `Arena` before calling [`parse`](crate::parse) and pass it by
37/// reference. The arena must outlive the returned [`Document`](crate::Document)
38/// because parsed values borrow from it. All memory is freed on drop.
39///
40/// # Examples
41///
42/// ```
43/// let arena = toml_spanner::Arena::new();
44/// let table = toml_spanner::parse("key = \"value\"", &arena)?;
45/// // `table` borrows from both the input string and `arena`.
46/// # Ok::<(), toml_spanner::Error>(())
47/// ```
48pub struct Arena {
49    ptr: Cell<NonNull<u8>>,
50    end: Cell<NonNull<u8>>,
51    slab: Cell<NonNull<SlabHeader>>,
52}
53
54#[cfg(target_pointer_width = "64")]
55const _: () = assert!(std::mem::size_of::<Arena>() == 24);
56
57// SAFETY: `Arena` owns its heap-allocated slab chain exclusively. Moving the
58// arena to another thread transfers that exclusive ownership, and the bump
59// pointers held in `Cell` are simply data alongside the slabs. No other thread
60// retains access to the slabs after the move, and the global allocator allows
61// `dealloc` on a different thread than the one that called `alloc`.
62//
63// `Arena` is intentionally **not** `Sync`: `alloc`, `alloc_str`, and the
64// reallocation helpers mutate the bump pointers through `&self` via `Cell`,
65// which is not atomic. Concurrent allocation from two threads would race on
66// those cells and corrupt the slab state.
67unsafe impl Send for Arena {}
68
69impl Default for Arena {
70    fn default() -> Self {
71        Self::new()
72    }
73}
74
75impl Arena {
76    /// Creates a new, empty arena.
77    pub fn new() -> Self {
78        // Safety: EMPTY_SLAB is a static with a stable address.
79        let sentinel =
80            unsafe { NonNull::new_unchecked(&EMPTY_SLAB as *const SlabHeader as *mut SlabHeader) };
81        let dangling = NonNull::dangling();
82        Arena {
83            ptr: Cell::new(dangling),
84            end: Cell::new(dangling),
85            slab: Cell::new(sentinel),
86        }
87    }
88
89    /// Allocate `size` bytes aligned to 8.
90    ///
91    /// Returns a non-null pointer to the allocated region. Aborts on OOM.
92    #[inline]
93    pub(crate) fn alloc(&self, size: usize) -> NonNull<u8> {
94        let ptr = self.ptr.get();
95        let addr = ptr.as_ptr() as usize;
96        let aligned_addr = (addr + ALLOC_ALIGN - 1) & !(ALLOC_ALIGN - 1);
97        let padding = aligned_addr - addr;
98        let needed = padding.saturating_add(size);
99        let remaining = self.end.get().as_ptr() as usize - addr;
100
101        let result = if needed <= remaining {
102            // Safety: needed bytes fit within the current slab (end >= ptr is
103            // invariant, so remaining cannot underflow). Using add() from ptr
104            // preserves provenance.
105            unsafe {
106                self.ptr
107                    .set(NonNull::new_unchecked(ptr.as_ptr().add(needed)));
108                NonNull::new_unchecked(ptr.as_ptr().add(padding))
109            }
110        } else {
111            self.alloc_slow(size)
112        };
113
114        // Tag pointer for MIRI
115        #[cfg(test)]
116        if true {
117            use std::mem::MaybeUninit;
118
119            unsafe {
120                return NonNull::new_unchecked(
121                    std::slice::from_raw_parts_mut(result.cast::<MaybeUninit<u8>>().as_ptr(), size)
122                        .as_mut_ptr()
123                        .cast(),
124                );
125            }
126        }
127
128        result
129    }
130
131    /// Grow an existing allocation in place when possible, otherwise allocate
132    /// a new region and copy the old data.
133    ///
134    /// Returns the (possibly unchanged) pointer to the allocation.
135    ///
136    /// # Safety
137    ///
138    /// - `ptr` must have been returned by a prior `alloc` on this arena whose
139    ///   size was `old_size`.
140    /// - `new_size` must be `>= old_size`. Violating this causes wrapping
141    ///   underflow in the in place growth path or an out of bounds copy.
142    pub(crate) unsafe fn realloc(
143        &self,
144        ptr: NonNull<u8>,
145        old_size: usize,
146        new_size: usize,
147    ) -> NonNull<u8> {
148        debug_assert!(new_size >= old_size);
149
150        // Use integer arithmetic for bounds checks to avoid creating
151        // out-of-bounds pointers and to preserve strict provenance.
152        let old_end = ptr.as_ptr() as usize + old_size;
153        let head = self.ptr.get().as_ptr() as usize;
154
155        if old_end == head {
156            // Compare as new_size <= end - ptr instead of ptr + new_size <= end
157            // to avoid wrapping overflow when new_size is very large.
158            let remaining = self.end.get().as_ptr() as usize - ptr.as_ptr() as usize;
159            if new_size <= remaining {
160                // Safety: new_end is within the current slab. Advance the bump
161                // pointer by the growth delta, preserving provenance via add().
162                let extra = new_size - old_size;
163                unsafe {
164                    let head_ptr = self.ptr.get().as_ptr();
165                    self.ptr.set(NonNull::new_unchecked(head_ptr.add(extra)));
166                    // Re-derive from the bump pointer (which carries full slab
167                    // provenance) instead of returning `ptr` directly, because
168                    // the MIRI tagging in alloc() restricts `ptr`'s Stacked
169                    // Borrows tag to old_size bytes.
170                    let result = NonNull::new_unchecked(head_ptr.sub(old_size));
171
172                    #[cfg(test)]
173                    if true {
174                        use std::mem::MaybeUninit;
175                        return NonNull::new_unchecked(
176                            std::slice::from_raw_parts_mut(
177                                result.cast::<MaybeUninit<u8>>().as_ptr(),
178                                new_size,
179                            )
180                            .as_mut_ptr()
181                            .cast(),
182                        );
183                    }
184
185                    return result;
186                }
187            }
188        }
189
190        let new_ptr = self.alloc(new_size);
191        // SAFETY:
192        // - Source (`ptr`) is from a prior alloc of `old_size` bytes, valid to read.
193        // - Destination (`new_ptr`) is a fresh alloc of `new_size >= old_size` bytes.
194        // - The regions do not overlap because `new_ptr` comes from a subsequent
195        //   bump (same or different slab) so it cannot alias the `ptr` region.
196        unsafe { ptr::copy_nonoverlapping(ptr.as_ptr(), new_ptr.as_ptr(), old_size) };
197        new_ptr
198    }
199
200    #[cold]
201    #[inline(never)]
202    fn alloc_slow(&self, size: usize) -> NonNull<u8> {
203        self.grow(size);
204
205        let ptr = self.ptr.get();
206        let addr = ptr.as_ptr() as usize;
207        let aligned_addr = (addr + ALLOC_ALIGN - 1) & !(ALLOC_ALIGN - 1);
208        let padding = aligned_addr - addr;
209        // Cannot overflow: grow() panics if HEADER_SIZE + size overflows,
210        // and post-grow ptr is SLAB_ALIGN-aligned so padding is always 0.
211        let needed = padding + size;
212        debug_assert!(needed <= self.end.get().as_ptr() as usize - addr);
213
214        // Safety: grow() guarantees the new slab is large enough.
215        // Using add() from ptr preserves provenance.
216        unsafe {
217            self.ptr
218                .set(NonNull::new_unchecked(ptr.as_ptr().add(needed)));
219            NonNull::new_unchecked(ptr.as_ptr().add(padding))
220        }
221    }
222
223    /// Create a scratch buffer that writes into the arena's current slab.
224    ///
225    /// # Safety
226    ///
227    /// The caller must not call `alloc` on this arena while the returned
228    /// `Scratch` is alive. The scratch exclusively owns the bump region.
229    pub(crate) unsafe fn scratch(&self) -> Scratch<'_> {
230        let start = self.ptr.get();
231        let cap = self.end.get().as_ptr() as usize - start.as_ptr() as usize;
232        Scratch {
233            arena: self,
234            start,
235            len: 0,
236            cap,
237        }
238    }
239
240    fn grow(&self, size: usize) {
241        // SAFETY: self.slab always points to a valid SlabHeader — either the
242        // static EMPTY_SLAB sentinel or a heap-allocated header written in grow().
243        let current_size = unsafe { self.slab.get().as_ref().size };
244
245        let min_slab = HEADER_SIZE.checked_add(size).expect("layout overflow");
246
247        let new_size = current_size
248            .saturating_mul(2)
249            .max(min_slab)
250            .max(INITIAL_SLAB_SIZE);
251
252        let slab_layout =
253            Layout::from_size_align(new_size, SLAB_ALIGN).expect("slab layout overflow");
254
255        // SAFETY: slab_layout was validated by Layout::from_size_align above.
256        let raw = unsafe { std::alloc::alloc(slab_layout) };
257        let Some(base) = NonNull::new(raw) else {
258            std::alloc::handle_alloc_error(slab_layout);
259        };
260
261        // Safety: base points to a freshly allocated region of new_size bytes.
262        unsafe {
263            let header_ptr = base.as_ptr().cast::<SlabHeader>();
264            header_ptr.write(SlabHeader {
265                prev: Some(self.slab.get()),
266                size: new_size,
267            });
268
269            self.slab.set(NonNull::new_unchecked(header_ptr));
270            self.ptr
271                .set(NonNull::new_unchecked(base.as_ptr().add(HEADER_SIZE)));
272            self.end
273                .set(NonNull::new_unchecked(base.as_ptr().add(new_size)));
274        }
275    }
276
277    /// Allocates a copy of `s` in the arena and returns a reference to it.
278    pub fn alloc_str(&self, s: &str) -> &str {
279        if s.is_empty() {
280            return "";
281        }
282        let dst = self.alloc(s.len());
283        // SAFETY: dst is a fresh arena allocation of s.len() bytes, disjoint
284        // from s. The resulting slice is valid UTF-8 because s is.
285        unsafe {
286            std::ptr::copy_nonoverlapping(s.as_ptr(), dst.as_ptr(), s.len());
287            std::str::from_utf8_unchecked(std::slice::from_raw_parts(dst.as_ptr(), s.len()))
288        }
289    }
290}
291
292impl Drop for Arena {
293    fn drop(&mut self) {
294        let mut current = self.slab.get();
295        loop {
296            // Safety: current is either a heap slab or the static sentinel.
297            let header = unsafe { current.as_ref() };
298            if header.size == 0 {
299                break;
300            }
301            let prev = header.prev;
302            // Safety: header.size and SLAB_ALIGN match the layout used in grow().
303            let slab_layout = unsafe { Layout::from_size_align_unchecked(header.size, SLAB_ALIGN) };
304            unsafe {
305                std::alloc::dealloc(current.as_ptr().cast(), slab_layout);
306            }
307            match prev {
308                Some(p) => current = p,
309                None => break,
310            }
311        }
312    }
313}
314
315/// A temporary byte buffer that writes directly into an [`Arena`] slab.
316///
317/// Scratch tracks its own write position without advancing the arena's bump
318/// pointer. On [`commit`](Scratch::commit) the arena pointer is advanced past
319/// the committed bytes. If the scratch is dropped without committing, the arena
320/// pointer is unchanged and the space is reused by subsequent allocations.
321pub(crate) struct Scratch<'a> {
322    arena: &'a Arena,
323    start: NonNull<u8>,
324    len: usize,
325    cap: usize,
326}
327
328impl<'a> Scratch<'a> {
329    #[inline]
330    pub fn push(&mut self, byte: u8) {
331        let len = self.len;
332        if len == self.cap {
333            self.grow_slow(1);
334        }
335        // Safety: len < cap, so start + len is within the slab.
336        unsafe {
337            self.start.as_ptr().add(len).write(byte);
338        }
339        self.len = len + 1;
340    }
341
342    #[inline]
343    pub fn extend(&mut self, bytes: &[u8]) {
344        if bytes.len() > self.cap - self.len {
345            self.grow_slow(bytes.len());
346        }
347        // Safety: cap - len >= bytes.len(), so the copy is in bounds.
348        unsafe {
349            ptr::copy_nonoverlapping(
350                bytes.as_ptr(),
351                self.start.as_ptr().add(self.len),
352                bytes.len(),
353            );
354        }
355        self.len += bytes.len();
356    }
357
358    /// Push bytes while stripping underscores. Returns `false` if any
359    /// underscore is not placed between two ASCII digits.
360    #[inline]
361    pub(crate) fn push_strip_underscores(&mut self, bytes: &[u8]) -> bool {
362        let mut prev = 0u8;
363        for &b in bytes {
364            if b == b'_' {
365                if !prev.is_ascii_digit() {
366                    return false;
367                }
368            } else {
369                if prev == b'_' && !b.is_ascii_digit() {
370                    return false;
371                }
372                self.push(b);
373            }
374            prev = b;
375        }
376        prev != b'_'
377    }
378
379    #[inline]
380    pub fn as_bytes(&self) -> &[u8] {
381        if self.len == 0 {
382            return &[];
383        }
384        // Safety: start..start+len was written by us and is within the slab.
385        unsafe { std::slice::from_raw_parts(self.start.as_ptr(), self.len) }
386    }
387
388    /// Finalize the scratch data and return it as a byte slice tied to the
389    /// arena's lifetime. Advances the arena's bump pointer past the committed
390    /// bytes.
391    pub fn commit(self) -> &'a [u8] {
392        if self.len == 0 {
393            return &[];
394        }
395        // Safety: start..start+len is valid scratch memory within the arena.
396        let slice = unsafe { std::slice::from_raw_parts(self.start.as_ptr(), self.len) };
397        // Safety: start + len is within the slab (we ensured capacity on every write).
398        unsafe {
399            self.arena
400                .ptr
401                .set(NonNull::new_unchecked(self.start.as_ptr().add(self.len)));
402        }
403        slice
404    }
405
406    #[cold]
407    #[inline(never)]
408    fn grow_slow(&mut self, additional: usize) {
409        let required = self.len.checked_add(additional).expect("scratch overflow");
410        let new_cap = self.cap.saturating_mul(2).max(required);
411
412        self.arena.grow(new_cap);
413
414        // Copy existing scratch data to the start of the new slab.
415        let new_start = self.arena.ptr.get();
416        if self.len > 0 {
417            // Safety: old data at self.start..+len is still valid (old slab hasn't been freed).
418            // New slab has at least new_cap bytes of data space >= required > self.len.
419            unsafe {
420                ptr::copy_nonoverlapping(self.start.as_ptr(), new_start.as_ptr(), self.len);
421            }
422        }
423        self.start = new_start;
424        self.cap = self.arena.end.get().as_ptr() as usize - new_start.as_ptr() as usize;
425    }
426}
427
428#[cfg(test)]
429#[path = "./arena_tests.rs"]
430mod tests;