Skip to main content

toml_spanner/
arena.rs

1use std::alloc::Layout;
2use std::cell::Cell;
3use std::ptr::{self, NonNull};
4
5const HEADER_SIZE: usize = std::mem::size_of::<SlabHeader>();
6const INITIAL_SLAB_SIZE: usize = 4096;
7const ALLOC_ALIGN: usize = 8;
8const SLAB_ALIGN: usize = if std::mem::align_of::<SlabHeader>() >= ALLOC_ALIGN {
9    std::mem::align_of::<SlabHeader>()
10} else {
11    ALLOC_ALIGN
12};
13
14const _: () = assert!(SLAB_ALIGN >= ALLOC_ALIGN);
15const _: () = assert!(HEADER_SIZE.is_multiple_of(ALLOC_ALIGN));
16
17#[repr(C)]
18struct SlabHeader {
19    prev: Option<NonNull<SlabHeader>>,
20    size: usize,
21}
22
23// Safety: EMPTY_SLAB is an immutable sentinel (prev=None, size=0). SlabHeaders
24// on the heap are only reachable through Arena, which is !Sync due to Cell.
25unsafe impl Sync for SlabHeader {}
26
27static EMPTY_SLAB: SlabHeader = SlabHeader {
28    prev: None,
29    size: 0,
30};
31
32/// Bump allocator backing all data structures produced by the parser.
33///
34/// Create an `Arena` before calling [`parse`](crate::parse) and pass it by
35/// reference. The arena must outlive the returned [`Root`](crate::Root)
36/// because parsed values borrow from it.
37///
38/// All memory is freed when the arena is dropped.
39///
40/// # Examples
41///
42/// ```
43/// let arena = toml_spanner::Arena::new();
44/// let table = toml_spanner::parse("key = \"value\"", &arena)?;
45/// // `table` borrows from both the input string and `arena`.
46/// # Ok::<(), toml_spanner::Error>(())
47/// ```
48pub struct Arena {
49    ptr: Cell<NonNull<u8>>,
50    end: Cell<NonNull<u8>>,
51    slab: Cell<NonNull<SlabHeader>>,
52}
53
54#[cfg(target_pointer_width = "64")]
55const _: () = assert!(std::mem::size_of::<Arena>() == 24);
56
57impl Default for Arena {
58    fn default() -> Self {
59        Self::new()
60    }
61}
62
63impl Arena {
64    /// Creates a new, empty arena.
65    pub fn new() -> Self {
66        // Safety: EMPTY_SLAB is a static with a stable address.
67        let sentinel =
68            unsafe { NonNull::new_unchecked(&EMPTY_SLAB as *const SlabHeader as *mut SlabHeader) };
69        let dangling = NonNull::dangling();
70        Arena {
71            ptr: Cell::new(dangling),
72            end: Cell::new(dangling),
73            slab: Cell::new(sentinel),
74        }
75    }
76
77    /// Allocate `size` bytes aligned to 8.
78    ///
79    /// Returns a non-null pointer to the allocated region. Aborts on OOM.
80    #[inline]
81    pub(crate) fn alloc(&self, size: usize) -> NonNull<u8> {
82        let ptr = self.ptr.get();
83        let addr = ptr.as_ptr() as usize;
84        let aligned_addr = (addr + ALLOC_ALIGN - 1) & !(ALLOC_ALIGN - 1);
85        let padding = aligned_addr - addr;
86        let needed = padding.saturating_add(size);
87        let remaining = self.end.get().as_ptr() as usize - addr;
88
89        if needed <= remaining {
90            // Safety: needed bytes fit within the current slab (end >= ptr is
91            // invariant, so remaining cannot underflow). Using add() from ptr
92            // preserves provenance.
93            unsafe {
94                self.ptr
95                    .set(NonNull::new_unchecked(ptr.as_ptr().add(needed)));
96                NonNull::new_unchecked(ptr.as_ptr().add(padding))
97            }
98        } else {
99            self.alloc_slow(size)
100        }
101    }
102
103    /// Grow an existing allocation in place when possible, otherwise allocate
104    /// a new region and copy the old data.
105    ///
106    /// Returns the (possibly unchanged) pointer to the allocation.
107    ///
108    /// # Safety
109    ///
110    /// `ptr` must have been returned by a prior `alloc` on this arena whose
111    /// size was `old_size`. `new_size` must be `>= old_size`.
112    pub(crate) unsafe fn realloc(
113        &self,
114        ptr: NonNull<u8>,
115        old_size: usize,
116        new_size: usize,
117    ) -> NonNull<u8> {
118        debug_assert!(new_size >= old_size);
119
120        // Use integer arithmetic for bounds checks to avoid creating
121        // out-of-bounds pointers and to preserve strict provenance.
122        let old_end = ptr.as_ptr() as usize + old_size;
123        let head = self.ptr.get().as_ptr() as usize;
124
125        if old_end == head {
126            // Compare as new_size <= end - ptr instead of ptr + new_size <= end
127            // to avoid wrapping overflow when new_size is very large.
128            let remaining = self.end.get().as_ptr() as usize - ptr.as_ptr() as usize;
129            if new_size <= remaining {
130                // Safety: new_end is within the current slab. Advance the bump
131                // pointer by the growth delta, preserving provenance via add().
132                let extra = new_size - old_size;
133                unsafe {
134                    self.ptr
135                        .set(NonNull::new_unchecked(self.ptr.get().as_ptr().add(extra)));
136                }
137                return ptr;
138            }
139        }
140
141        let new_ptr = self.alloc(new_size);
142        // Safety: old and new regions are non-overlapping arena allocations,
143        // and old_size <= new_size <= new allocation size.
144        unsafe { ptr::copy_nonoverlapping(ptr.as_ptr(), new_ptr.as_ptr(), old_size) };
145        new_ptr
146    }
147
148    #[cold]
149    #[inline(never)]
150    fn alloc_slow(&self, size: usize) -> NonNull<u8> {
151        self.grow(size);
152
153        let ptr = self.ptr.get();
154        let addr = ptr.as_ptr() as usize;
155        let aligned_addr = (addr + ALLOC_ALIGN - 1) & !(ALLOC_ALIGN - 1);
156        let padding = aligned_addr - addr;
157        // Cannot overflow: grow() panics if HEADER_SIZE + size overflows,
158        // and post-grow ptr is SLAB_ALIGN-aligned so padding is always 0.
159        let needed = padding + size;
160        debug_assert!(needed <= self.end.get().as_ptr() as usize - addr);
161
162        // Safety: grow() guarantees the new slab is large enough.
163        // Using add() from ptr preserves provenance.
164        unsafe {
165            self.ptr
166                .set(NonNull::new_unchecked(ptr.as_ptr().add(needed)));
167            NonNull::new_unchecked(ptr.as_ptr().add(padding))
168        }
169    }
170
171    /// Create a scratch buffer that writes into the arena's current slab.
172    ///
173    /// # Safety
174    ///
175    /// The caller must not call `alloc` on this arena while the returned
176    /// `Scratch` is alive. The scratch exclusively owns the bump region.
177    pub(crate) unsafe fn scratch(&self) -> Scratch<'_> {
178        let start = self.ptr.get();
179        let cap = self.end.get().as_ptr() as usize - start.as_ptr() as usize;
180        Scratch {
181            arena: self,
182            start,
183            len: 0,
184            cap,
185        }
186    }
187
188    fn grow(&self, size: usize) {
189        // SAFETY: self.slab always points to a valid SlabHeader — either the
190        // static EMPTY_SLAB sentinel or a heap-allocated header written in grow().
191        let current_size = unsafe { self.slab.get().as_ref().size };
192
193        let min_slab = HEADER_SIZE.checked_add(size).expect("layout overflow");
194
195        let new_size = current_size
196            .saturating_mul(2)
197            .max(min_slab)
198            .max(INITIAL_SLAB_SIZE);
199
200        let slab_layout =
201            Layout::from_size_align(new_size, SLAB_ALIGN).expect("slab layout overflow");
202
203        // SAFETY: slab_layout was validated by Layout::from_size_align above.
204        let raw = unsafe { std::alloc::alloc(slab_layout) };
205        let Some(base) = NonNull::new(raw) else {
206            std::alloc::handle_alloc_error(slab_layout);
207        };
208
209        // Safety: base points to a freshly allocated region of new_size bytes.
210        unsafe {
211            let header_ptr = base.as_ptr().cast::<SlabHeader>();
212            header_ptr.write(SlabHeader {
213                prev: Some(self.slab.get()),
214                size: new_size,
215            });
216
217            self.slab.set(NonNull::new_unchecked(header_ptr));
218            self.ptr
219                .set(NonNull::new_unchecked(base.as_ptr().add(HEADER_SIZE)));
220            self.end
221                .set(NonNull::new_unchecked(base.as_ptr().add(new_size)));
222        }
223    }
224}
225
226impl Drop for Arena {
227    fn drop(&mut self) {
228        let mut current = self.slab.get();
229        loop {
230            // Safety: current is either a heap slab or the static sentinel.
231            let header = unsafe { current.as_ref() };
232            if header.size == 0 {
233                break;
234            }
235            let prev = header.prev;
236            // Safety: header.size and SLAB_ALIGN match the layout used in grow().
237            let slab_layout = unsafe { Layout::from_size_align_unchecked(header.size, SLAB_ALIGN) };
238            unsafe {
239                std::alloc::dealloc(current.as_ptr().cast(), slab_layout);
240            }
241            match prev {
242                Some(p) => current = p,
243                None => break,
244            }
245        }
246    }
247}
248
249/// A temporary byte buffer that writes directly into an [`Arena`] slab.
250///
251/// Scratch tracks its own write position without advancing the arena's bump
252/// pointer. On [`commit`](Scratch::commit) the arena pointer is advanced past
253/// the committed bytes. If the scratch is dropped without committing, the arena
254/// pointer is unchanged and the space is reused by subsequent allocations.
255pub(crate) struct Scratch<'a> {
256    arena: &'a Arena,
257    start: NonNull<u8>,
258    len: usize,
259    cap: usize,
260}
261
262impl<'a> Scratch<'a> {
263    #[inline]
264    pub fn push(&mut self, byte: u8) {
265        let len = self.len;
266        if len == self.cap {
267            self.grow_slow(1);
268        }
269        // Safety: len < cap, so start + len is within the slab.
270        unsafe {
271            self.start.as_ptr().add(len).write(byte);
272        }
273        self.len = len + 1;
274    }
275
276    #[inline]
277    pub fn extend(&mut self, bytes: &[u8]) {
278        if bytes.len() > self.cap - self.len {
279            self.grow_slow(bytes.len());
280        }
281        // Safety: cap - len >= bytes.len(), so the copy is in bounds.
282        unsafe {
283            ptr::copy_nonoverlapping(
284                bytes.as_ptr(),
285                self.start.as_ptr().add(self.len),
286                bytes.len(),
287            );
288        }
289        self.len += bytes.len();
290    }
291
292    /// Push bytes while stripping underscores. Returns `false` if any
293    /// underscore is not placed between two ASCII digits.
294    #[inline]
295    pub(crate) fn push_strip_underscores(&mut self, bytes: &[u8]) -> bool {
296        let mut prev = 0u8;
297        for &b in bytes {
298            if b == b'_' {
299                if !prev.is_ascii_digit() {
300                    return false;
301                }
302            } else {
303                if prev == b'_' && !b.is_ascii_digit() {
304                    return false;
305                }
306                self.push(b);
307            }
308            prev = b;
309        }
310        prev != b'_'
311    }
312
313    #[inline]
314    pub fn as_bytes(&self) -> &[u8] {
315        if self.len == 0 {
316            return &[];
317        }
318        // Safety: start..start+len was written by us and is within the slab.
319        unsafe { std::slice::from_raw_parts(self.start.as_ptr(), self.len) }
320    }
321
322    /// Finalize the scratch data and return it as a byte slice tied to the
323    /// arena's lifetime. Advances the arena's bump pointer past the committed
324    /// bytes.
325    pub fn commit(self) -> &'a [u8] {
326        if self.len == 0 {
327            return &[];
328        }
329        // Safety: start..start+len is valid scratch memory within the arena.
330        let slice = unsafe { std::slice::from_raw_parts(self.start.as_ptr(), self.len) };
331        // Safety: start + len is within the slab (we ensured capacity on every write).
332        unsafe {
333            self.arena
334                .ptr
335                .set(NonNull::new_unchecked(self.start.as_ptr().add(self.len)));
336        }
337        slice
338    }
339
340    #[cold]
341    #[inline(never)]
342    fn grow_slow(&mut self, additional: usize) {
343        let required = self.len.checked_add(additional).expect("scratch overflow");
344        let new_cap = self.cap.saturating_mul(2).max(required);
345
346        self.arena.grow(new_cap);
347
348        // Copy existing scratch data to the start of the new slab.
349        let new_start = self.arena.ptr.get();
350        if self.len > 0 {
351            // Safety: old data at self.start..+len is still valid (old slab hasn't been freed).
352            // New slab has at least new_cap bytes of data space >= required > self.len.
353            unsafe {
354                ptr::copy_nonoverlapping(self.start.as_ptr(), new_start.as_ptr(), self.len);
355            }
356        }
357        self.start = new_start;
358        self.cap = self.arena.end.get().as_ptr() as usize - new_start.as_ptr() as usize;
359    }
360}
361
362#[cfg(test)]
363#[path = "./arena_tests.rs"]
364mod tests;