Skip to main content

toml_spanner/
arena.rs

1use std::alloc::Layout;
2use std::cell::Cell;
3use std::ptr::{self, NonNull};
4
5const HEADER_SIZE: usize = std::mem::size_of::<SlabHeader>();
6const INITIAL_SLAB_SIZE: usize = 4096;
7const ALLOC_ALIGN: usize = 8;
8const SLAB_ALIGN: usize = if std::mem::align_of::<SlabHeader>() >= ALLOC_ALIGN {
9    std::mem::align_of::<SlabHeader>()
10} else {
11    ALLOC_ALIGN
12};
13
14const _: () = assert!(SLAB_ALIGN >= ALLOC_ALIGN);
15const _: () = assert!(HEADER_SIZE.is_multiple_of(ALLOC_ALIGN));
16
17#[repr(C)]
18struct SlabHeader {
19    prev: Option<NonNull<SlabHeader>>,
20    size: usize,
21}
22
23// SAFETY: The only `SlabHeader` that needs `Sync` is the static `EMPTY_SLAB`
24// sentinel. It is immutable (prev=None, size=0) and never written through the
25// pointer. Heap-allocated SlabHeaders are only reachable through `Arena`, which
26// is `!Sync` (contains `Cell`), so they are never shared across threads.
27unsafe impl Sync for SlabHeader {}
28
29static EMPTY_SLAB: SlabHeader = SlabHeader {
30    prev: None,
31    size: 0,
32};
33
34/// Bump allocator backing all data structures produced by the parser.
35///
36/// Create an `Arena` before calling [`parse`](crate::parse) and pass it by
37/// reference. The arena must outlive the returned [`Document`](crate::Document)
38/// because parsed values borrow from it. All memory is freed on drop.
39///
40/// # Examples
41///
42/// ```
43/// let arena = toml_spanner::Arena::new();
44/// let table = toml_spanner::parse("key = \"value\"", &arena)?;
45/// // `table` borrows from both the input string and `arena`.
46/// # Ok::<(), toml_spanner::Error>(())
47/// ```
48pub struct Arena {
49    ptr: Cell<NonNull<u8>>,
50    end: Cell<NonNull<u8>>,
51    slab: Cell<NonNull<SlabHeader>>,
52}
53
54#[cfg(target_pointer_width = "64")]
55const _: () = assert!(std::mem::size_of::<Arena>() == 24);
56
57impl Default for Arena {
58    fn default() -> Self {
59        Self::new()
60    }
61}
62
63impl Arena {
64    /// Creates a new, empty arena.
65    pub fn new() -> Self {
66        // Safety: EMPTY_SLAB is a static with a stable address.
67        let sentinel =
68            unsafe { NonNull::new_unchecked(&EMPTY_SLAB as *const SlabHeader as *mut SlabHeader) };
69        let dangling = NonNull::dangling();
70        Arena {
71            ptr: Cell::new(dangling),
72            end: Cell::new(dangling),
73            slab: Cell::new(sentinel),
74        }
75    }
76
77    /// Allocate `size` bytes aligned to 8.
78    ///
79    /// Returns a non-null pointer to the allocated region. Aborts on OOM.
80    #[inline]
81    pub(crate) fn alloc(&self, size: usize) -> NonNull<u8> {
82        let ptr = self.ptr.get();
83        let addr = ptr.as_ptr() as usize;
84        let aligned_addr = (addr + ALLOC_ALIGN - 1) & !(ALLOC_ALIGN - 1);
85        let padding = aligned_addr - addr;
86        let needed = padding.saturating_add(size);
87        let remaining = self.end.get().as_ptr() as usize - addr;
88
89        let result = if needed <= remaining {
90            // Safety: needed bytes fit within the current slab (end >= ptr is
91            // invariant, so remaining cannot underflow). Using add() from ptr
92            // preserves provenance.
93            unsafe {
94                self.ptr
95                    .set(NonNull::new_unchecked(ptr.as_ptr().add(needed)));
96                NonNull::new_unchecked(ptr.as_ptr().add(padding))
97            }
98        } else {
99            self.alloc_slow(size)
100        };
101
102        // Tag pointer for MIRI
103        #[cfg(test)]
104        if true {
105            use std::mem::MaybeUninit;
106
107            unsafe {
108                return NonNull::new_unchecked(
109                    std::slice::from_raw_parts_mut(result.cast::<MaybeUninit<u8>>().as_ptr(), size)
110                        .as_mut_ptr()
111                        .cast(),
112                );
113            }
114        }
115
116        result
117    }
118
119    /// Grow an existing allocation in place when possible, otherwise allocate
120    /// a new region and copy the old data.
121    ///
122    /// Returns the (possibly unchanged) pointer to the allocation.
123    ///
124    /// # Safety
125    ///
126    /// - `ptr` must have been returned by a prior `alloc` on this arena whose
127    ///   size was `old_size`.
128    /// - `new_size` must be `>= old_size`. Violating this causes wrapping
129    ///   underflow in the in place growth path or an out of bounds copy.
130    pub(crate) unsafe fn realloc(
131        &self,
132        ptr: NonNull<u8>,
133        old_size: usize,
134        new_size: usize,
135    ) -> NonNull<u8> {
136        debug_assert!(new_size >= old_size);
137
138        // Use integer arithmetic for bounds checks to avoid creating
139        // out-of-bounds pointers and to preserve strict provenance.
140        let old_end = ptr.as_ptr() as usize + old_size;
141        let head = self.ptr.get().as_ptr() as usize;
142
143        if old_end == head {
144            // Compare as new_size <= end - ptr instead of ptr + new_size <= end
145            // to avoid wrapping overflow when new_size is very large.
146            let remaining = self.end.get().as_ptr() as usize - ptr.as_ptr() as usize;
147            if new_size <= remaining {
148                // Safety: new_end is within the current slab. Advance the bump
149                // pointer by the growth delta, preserving provenance via add().
150                let extra = new_size - old_size;
151                unsafe {
152                    let head_ptr = self.ptr.get().as_ptr();
153                    self.ptr.set(NonNull::new_unchecked(head_ptr.add(extra)));
154                    // Re-derive from the bump pointer (which carries full slab
155                    // provenance) instead of returning `ptr` directly, because
156                    // the MIRI tagging in alloc() restricts `ptr`'s Stacked
157                    // Borrows tag to old_size bytes.
158                    let result = NonNull::new_unchecked(head_ptr.sub(old_size));
159
160                    #[cfg(test)]
161                    if true {
162                        use std::mem::MaybeUninit;
163                        return NonNull::new_unchecked(
164                            std::slice::from_raw_parts_mut(
165                                result.cast::<MaybeUninit<u8>>().as_ptr(),
166                                new_size,
167                            )
168                            .as_mut_ptr()
169                            .cast(),
170                        );
171                    }
172
173                    return result;
174                }
175            }
176        }
177
178        let new_ptr = self.alloc(new_size);
179        // SAFETY:
180        // - Source (`ptr`) is from a prior alloc of `old_size` bytes, valid to read.
181        // - Destination (`new_ptr`) is a fresh alloc of `new_size >= old_size` bytes.
182        // - The regions do not overlap because `new_ptr` comes from a subsequent
183        //   bump (same or different slab) so it cannot alias the `ptr` region.
184        unsafe { ptr::copy_nonoverlapping(ptr.as_ptr(), new_ptr.as_ptr(), old_size) };
185        new_ptr
186    }
187
188    #[cold]
189    #[inline(never)]
190    fn alloc_slow(&self, size: usize) -> NonNull<u8> {
191        self.grow(size);
192
193        let ptr = self.ptr.get();
194        let addr = ptr.as_ptr() as usize;
195        let aligned_addr = (addr + ALLOC_ALIGN - 1) & !(ALLOC_ALIGN - 1);
196        let padding = aligned_addr - addr;
197        // Cannot overflow: grow() panics if HEADER_SIZE + size overflows,
198        // and post-grow ptr is SLAB_ALIGN-aligned so padding is always 0.
199        let needed = padding + size;
200        debug_assert!(needed <= self.end.get().as_ptr() as usize - addr);
201
202        // Safety: grow() guarantees the new slab is large enough.
203        // Using add() from ptr preserves provenance.
204        unsafe {
205            self.ptr
206                .set(NonNull::new_unchecked(ptr.as_ptr().add(needed)));
207            NonNull::new_unchecked(ptr.as_ptr().add(padding))
208        }
209    }
210
211    /// Create a scratch buffer that writes into the arena's current slab.
212    ///
213    /// # Safety
214    ///
215    /// The caller must not call `alloc` on this arena while the returned
216    /// `Scratch` is alive. The scratch exclusively owns the bump region.
217    pub(crate) unsafe fn scratch(&self) -> Scratch<'_> {
218        let start = self.ptr.get();
219        let cap = self.end.get().as_ptr() as usize - start.as_ptr() as usize;
220        Scratch {
221            arena: self,
222            start,
223            len: 0,
224            cap,
225        }
226    }
227
228    fn grow(&self, size: usize) {
229        // SAFETY: self.slab always points to a valid SlabHeader — either the
230        // static EMPTY_SLAB sentinel or a heap-allocated header written in grow().
231        let current_size = unsafe { self.slab.get().as_ref().size };
232
233        let min_slab = HEADER_SIZE.checked_add(size).expect("layout overflow");
234
235        let new_size = current_size
236            .saturating_mul(2)
237            .max(min_slab)
238            .max(INITIAL_SLAB_SIZE);
239
240        let slab_layout =
241            Layout::from_size_align(new_size, SLAB_ALIGN).expect("slab layout overflow");
242
243        // SAFETY: slab_layout was validated by Layout::from_size_align above.
244        let raw = unsafe { std::alloc::alloc(slab_layout) };
245        let Some(base) = NonNull::new(raw) else {
246            std::alloc::handle_alloc_error(slab_layout);
247        };
248
249        // Safety: base points to a freshly allocated region of new_size bytes.
250        unsafe {
251            let header_ptr = base.as_ptr().cast::<SlabHeader>();
252            header_ptr.write(SlabHeader {
253                prev: Some(self.slab.get()),
254                size: new_size,
255            });
256
257            self.slab.set(NonNull::new_unchecked(header_ptr));
258            self.ptr
259                .set(NonNull::new_unchecked(base.as_ptr().add(HEADER_SIZE)));
260            self.end
261                .set(NonNull::new_unchecked(base.as_ptr().add(new_size)));
262        }
263    }
264
265    /// Allocates a copy of `s` in the arena and returns a reference to it.
266    pub fn alloc_str(&self, s: &str) -> &str {
267        if s.is_empty() {
268            return "";
269        }
270        let dst = self.alloc(s.len());
271        // SAFETY: dst is a fresh arena allocation of s.len() bytes, disjoint
272        // from s. The resulting slice is valid UTF-8 because s is.
273        unsafe {
274            std::ptr::copy_nonoverlapping(s.as_ptr(), dst.as_ptr(), s.len());
275            std::str::from_utf8_unchecked(std::slice::from_raw_parts(dst.as_ptr(), s.len()))
276        }
277    }
278}
279
280impl Drop for Arena {
281    fn drop(&mut self) {
282        let mut current = self.slab.get();
283        loop {
284            // Safety: current is either a heap slab or the static sentinel.
285            let header = unsafe { current.as_ref() };
286            if header.size == 0 {
287                break;
288            }
289            let prev = header.prev;
290            // Safety: header.size and SLAB_ALIGN match the layout used in grow().
291            let slab_layout = unsafe { Layout::from_size_align_unchecked(header.size, SLAB_ALIGN) };
292            unsafe {
293                std::alloc::dealloc(current.as_ptr().cast(), slab_layout);
294            }
295            match prev {
296                Some(p) => current = p,
297                None => break,
298            }
299        }
300    }
301}
302
303/// A temporary byte buffer that writes directly into an [`Arena`] slab.
304///
305/// Scratch tracks its own write position without advancing the arena's bump
306/// pointer. On [`commit`](Scratch::commit) the arena pointer is advanced past
307/// the committed bytes. If the scratch is dropped without committing, the arena
308/// pointer is unchanged and the space is reused by subsequent allocations.
309pub(crate) struct Scratch<'a> {
310    arena: &'a Arena,
311    start: NonNull<u8>,
312    len: usize,
313    cap: usize,
314}
315
316impl<'a> Scratch<'a> {
317    #[inline]
318    pub fn push(&mut self, byte: u8) {
319        let len = self.len;
320        if len == self.cap {
321            self.grow_slow(1);
322        }
323        // Safety: len < cap, so start + len is within the slab.
324        unsafe {
325            self.start.as_ptr().add(len).write(byte);
326        }
327        self.len = len + 1;
328    }
329
330    #[inline]
331    pub fn extend(&mut self, bytes: &[u8]) {
332        if bytes.len() > self.cap - self.len {
333            self.grow_slow(bytes.len());
334        }
335        // Safety: cap - len >= bytes.len(), so the copy is in bounds.
336        unsafe {
337            ptr::copy_nonoverlapping(
338                bytes.as_ptr(),
339                self.start.as_ptr().add(self.len),
340                bytes.len(),
341            );
342        }
343        self.len += bytes.len();
344    }
345
346    /// Push bytes while stripping underscores. Returns `false` if any
347    /// underscore is not placed between two ASCII digits.
348    #[inline]
349    pub(crate) fn push_strip_underscores(&mut self, bytes: &[u8]) -> bool {
350        let mut prev = 0u8;
351        for &b in bytes {
352            if b == b'_' {
353                if !prev.is_ascii_digit() {
354                    return false;
355                }
356            } else {
357                if prev == b'_' && !b.is_ascii_digit() {
358                    return false;
359                }
360                self.push(b);
361            }
362            prev = b;
363        }
364        prev != b'_'
365    }
366
367    #[inline]
368    pub fn as_bytes(&self) -> &[u8] {
369        if self.len == 0 {
370            return &[];
371        }
372        // Safety: start..start+len was written by us and is within the slab.
373        unsafe { std::slice::from_raw_parts(self.start.as_ptr(), self.len) }
374    }
375
376    /// Finalize the scratch data and return it as a byte slice tied to the
377    /// arena's lifetime. Advances the arena's bump pointer past the committed
378    /// bytes.
379    pub fn commit(self) -> &'a [u8] {
380        if self.len == 0 {
381            return &[];
382        }
383        // Safety: start..start+len is valid scratch memory within the arena.
384        let slice = unsafe { std::slice::from_raw_parts(self.start.as_ptr(), self.len) };
385        // Safety: start + len is within the slab (we ensured capacity on every write).
386        unsafe {
387            self.arena
388                .ptr
389                .set(NonNull::new_unchecked(self.start.as_ptr().add(self.len)));
390        }
391        slice
392    }
393
394    #[cold]
395    #[inline(never)]
396    fn grow_slow(&mut self, additional: usize) {
397        let required = self.len.checked_add(additional).expect("scratch overflow");
398        let new_cap = self.cap.saturating_mul(2).max(required);
399
400        self.arena.grow(new_cap);
401
402        // Copy existing scratch data to the start of the new slab.
403        let new_start = self.arena.ptr.get();
404        if self.len > 0 {
405            // Safety: old data at self.start..+len is still valid (old slab hasn't been freed).
406            // New slab has at least new_cap bytes of data space >= required > self.len.
407            unsafe {
408                ptr::copy_nonoverlapping(self.start.as_ptr(), new_start.as_ptr(), self.len);
409            }
410        }
411        self.start = new_start;
412        self.cap = self.arena.end.get().as_ptr() as usize - new_start.as_ptr() as usize;
413    }
414}
415
416#[cfg(test)]
417#[path = "./arena_tests.rs"]
418mod tests;