Skip to main content

toml_spanner/
arena.rs

1#![allow(unsafe_code)]
2
3use std::alloc::Layout;
4use std::cell::Cell;
5use std::ptr::{self, NonNull};
6
7const HEADER_SIZE: usize = std::mem::size_of::<SlabHeader>();
8const INITIAL_SLAB_SIZE: usize = 4096;
9const ALLOC_ALIGN: usize = 8;
10const SLAB_ALIGN: usize = if std::mem::align_of::<SlabHeader>() >= ALLOC_ALIGN {
11    std::mem::align_of::<SlabHeader>()
12} else {
13    ALLOC_ALIGN
14};
15
16const _: () = assert!(SLAB_ALIGN >= ALLOC_ALIGN);
17const _: () = assert!(HEADER_SIZE.is_multiple_of(ALLOC_ALIGN));
18
19#[repr(C)]
20struct SlabHeader {
21    prev: Option<NonNull<SlabHeader>>,
22    size: usize,
23}
24
25// Safety: EMPTY_SLAB is an immutable sentinel (prev=None, size=0). SlabHeaders
26// on the heap are only reachable through Arena, which is !Sync due to Cell.
27unsafe impl Sync for SlabHeader {}
28
29static EMPTY_SLAB: SlabHeader = SlabHeader {
30    prev: None,
31    size: 0,
32};
33
34/// A bump allocator used by the parser to store escaped strings.
35///
36/// Create an `Arena` before calling [`parse`](crate::parse) and pass it by
37/// reference. The arena must live at least as long as the parsed [`Table`](crate::Table)
38/// because [`Str`](crate::Str) values may borrow from it.
39///
40/// All memory is freed when the arena is dropped.
41///
42/// # Examples
43///
44/// ```
45/// let arena = toml_spanner::Arena::new();
46/// let table = toml_spanner::parse("key = \"value\"", &arena)?;
47/// // `table` borrows from both the input string and `arena`.
48/// # Ok::<(), toml_spanner::Error>(())
49/// ```
50pub struct Arena {
51    ptr: Cell<NonNull<u8>>,
52    end: Cell<NonNull<u8>>,
53    slab: Cell<NonNull<SlabHeader>>,
54}
55
56#[cfg(target_pointer_width = "64")]
57const _: () = assert!(std::mem::size_of::<Arena>() == 24);
58
59impl Default for Arena {
60    fn default() -> Self {
61        Self::new()
62    }
63}
64
65impl Arena {
66    /// Creates a new, empty arena.
67    pub fn new() -> Self {
68        // Safety: EMPTY_SLAB is a static with a stable address.
69        let sentinel =
70            unsafe { NonNull::new_unchecked(&EMPTY_SLAB as *const SlabHeader as *mut SlabHeader) };
71        let dangling = NonNull::dangling();
72        Arena {
73            ptr: Cell::new(dangling),
74            end: Cell::new(dangling),
75            slab: Cell::new(sentinel),
76        }
77    }
78
79    /// Allocate `size` bytes aligned to 8.
80    ///
81    /// Returns a non-null pointer to the allocated region. Aborts on OOM.
82    #[inline]
83    pub(crate) fn alloc(&self, size: usize) -> NonNull<u8> {
84        let ptr = self.ptr.get();
85        let addr = ptr.as_ptr() as usize;
86        let aligned_addr = (addr + ALLOC_ALIGN - 1) & !(ALLOC_ALIGN - 1);
87        let padding = aligned_addr - addr;
88        let needed = padding + size;
89
90        if addr + needed <= self.end.get().as_ptr() as usize {
91            // Safety: padding+size bytes fit within the current slab.
92            // Using add() from ptr preserves provenance.
93            unsafe {
94                self.ptr
95                    .set(NonNull::new_unchecked(ptr.as_ptr().add(needed)));
96                NonNull::new_unchecked(ptr.as_ptr().add(padding))
97            }
98        } else {
99            self.alloc_slow(size)
100        }
101    }
102
103    /// Grow an existing allocation in place when possible, otherwise allocate
104    /// a new region and copy the old data.
105    ///
106    /// Returns the (possibly unchanged) pointer to the allocation.
107    ///
108    /// # Safety
109    ///
110    /// `ptr` must have been returned by a prior `alloc` on this arena whose
111    /// size was `old_size`. `new_size` must be `>= old_size`.
112    pub(crate) unsafe fn realloc(
113        &self,
114        ptr: NonNull<u8>,
115        old_size: usize,
116        new_size: usize,
117    ) -> NonNull<u8> {
118        debug_assert!(new_size >= old_size);
119
120        // Use integer arithmetic for bounds checks to avoid creating
121        // out-of-bounds pointers and to preserve strict provenance.
122        let old_end = ptr.as_ptr() as usize + old_size;
123        let head = self.ptr.get().as_ptr() as usize;
124
125        if old_end == head {
126            let new_end = ptr.as_ptr() as usize + new_size;
127            if new_end <= self.end.get().as_ptr() as usize {
128                // Safety: new_end is within the current slab. Advance the bump
129                // pointer by the growth delta, preserving provenance via add().
130                let extra = new_size - old_size;
131                unsafe {
132                    self.ptr
133                        .set(NonNull::new_unchecked(self.ptr.get().as_ptr().add(extra)));
134                }
135                return ptr;
136            }
137        }
138
139        let new_ptr = self.alloc(new_size);
140        // Safety: old and new regions are non-overlapping arena allocations,
141        // and old_size <= new_size <= new allocation size.
142        unsafe { ptr::copy_nonoverlapping(ptr.as_ptr(), new_ptr.as_ptr(), old_size) };
143        new_ptr
144    }
145
146    #[cold]
147    #[inline(never)]
148    fn alloc_slow(&self, size: usize) -> NonNull<u8> {
149        self.grow(size);
150
151        let ptr = self.ptr.get();
152        let addr = ptr.as_ptr() as usize;
153        let aligned_addr = (addr + ALLOC_ALIGN - 1) & !(ALLOC_ALIGN - 1);
154        let padding = aligned_addr - addr;
155        let needed = padding + size;
156        debug_assert!(addr + needed <= self.end.get().as_ptr() as usize);
157
158        // Safety: grow() guarantees the new slab is large enough.
159        // Using add() from ptr preserves provenance.
160        unsafe {
161            self.ptr
162                .set(NonNull::new_unchecked(ptr.as_ptr().add(needed)));
163            NonNull::new_unchecked(ptr.as_ptr().add(padding))
164        }
165    }
166
167    /// Create a scratch buffer that writes into the arena's current slab.
168    ///
169    /// # Safety
170    ///
171    /// The caller must not call `alloc` on this arena while the returned
172    /// `Scratch` is alive. The scratch exclusively owns the bump region.
173    pub(crate) unsafe fn scratch(&self) -> Scratch<'_> {
174        let start = self.ptr.get();
175        let cap = self.end.get().as_ptr() as usize - start.as_ptr() as usize;
176        Scratch {
177            arena: self,
178            start,
179            len: 0,
180            cap,
181        }
182    }
183
184    fn grow(&self, size: usize) {
185        let current_size = unsafe { self.slab.get().as_ref().size };
186
187        let min_slab = HEADER_SIZE.checked_add(size).expect("layout overflow");
188
189        let new_size = current_size
190            .saturating_mul(2)
191            .max(min_slab)
192            .max(INITIAL_SLAB_SIZE);
193
194        let slab_layout =
195            Layout::from_size_align(new_size, SLAB_ALIGN).expect("slab layout overflow");
196
197        let raw = unsafe { std::alloc::alloc(slab_layout) };
198        let Some(base) = NonNull::new(raw) else {
199            std::alloc::handle_alloc_error(slab_layout);
200        };
201
202        // Safety: base points to a freshly allocated region of new_size bytes.
203        unsafe {
204            let header_ptr = base.as_ptr().cast::<SlabHeader>();
205            header_ptr.write(SlabHeader {
206                prev: Some(self.slab.get()),
207                size: new_size,
208            });
209
210            self.slab.set(NonNull::new_unchecked(header_ptr));
211            self.ptr
212                .set(NonNull::new_unchecked(base.as_ptr().add(HEADER_SIZE)));
213            self.end
214                .set(NonNull::new_unchecked(base.as_ptr().add(new_size)));
215        }
216    }
217}
218
219impl Drop for Arena {
220    fn drop(&mut self) {
221        let mut current = self.slab.get();
222        loop {
223            // Safety: current is either a heap slab or the static sentinel.
224            let header = unsafe { current.as_ref() };
225            if header.size == 0 {
226                break;
227            }
228            let prev = header.prev;
229            // Safety: header.size and SLAB_ALIGN match the layout used in grow().
230            let slab_layout = unsafe { Layout::from_size_align_unchecked(header.size, SLAB_ALIGN) };
231            unsafe {
232                std::alloc::dealloc(current.as_ptr().cast(), slab_layout);
233            }
234            match prev {
235                Some(p) => current = p,
236                None => break,
237            }
238        }
239    }
240}
241
242/// A temporary byte buffer that writes directly into an [`Arena`] slab.
243///
244/// Scratch tracks its own write position without advancing the arena's bump
245/// pointer. On [`commit`](Scratch::commit) the arena pointer is advanced past
246/// the committed bytes. If the scratch is dropped without committing, the arena
247/// pointer is unchanged and the space is reused by subsequent allocations.
248pub(crate) struct Scratch<'a> {
249    arena: &'a Arena,
250    start: NonNull<u8>,
251    len: usize,
252    cap: usize,
253}
254
255impl<'a> Scratch<'a> {
256    #[inline]
257    pub fn push(&mut self, byte: u8) {
258        let len = self.len;
259        if len == self.cap {
260            self.grow_slow(1);
261        }
262        // Safety: len < cap, so start + len is within the slab.
263        unsafe {
264            self.start.as_ptr().add(len).write(byte);
265        }
266        self.len = len + 1;
267    }
268
269    #[inline]
270    pub fn extend(&mut self, bytes: &[u8]) {
271        if bytes.len() > self.cap - self.len {
272            self.grow_slow(bytes.len());
273        }
274        // Safety: cap - len >= bytes.len(), so the copy is in bounds.
275        unsafe {
276            ptr::copy_nonoverlapping(
277                bytes.as_ptr(),
278                self.start.as_ptr().add(self.len),
279                bytes.len(),
280            );
281        }
282        self.len += bytes.len();
283    }
284
285    /// Push bytes while stripping underscores. Returns `false` if any
286    /// underscore is not placed between two ASCII digits.
287    #[inline]
288    pub(crate) fn push_strip_underscores(&mut self, bytes: &[u8]) -> bool {
289        let mut prev = 0u8;
290        for &b in bytes {
291            if b == b'_' {
292                if !prev.is_ascii_digit() {
293                    return false;
294                }
295            } else {
296                if prev == b'_' && !b.is_ascii_digit() {
297                    return false;
298                }
299                self.push(b);
300            }
301            prev = b;
302        }
303        prev != b'_'
304    }
305
306    #[inline]
307    pub fn as_bytes(&self) -> &[u8] {
308        if self.len == 0 {
309            return &[];
310        }
311        // Safety: start..start+len was written by us and is within the slab.
312        unsafe { std::slice::from_raw_parts(self.start.as_ptr(), self.len) }
313    }
314
315    /// Finalize the scratch data and return it as a byte slice tied to the
316    /// arena's lifetime. Advances the arena's bump pointer past the committed
317    /// bytes.
318    pub fn commit(self) -> &'a [u8] {
319        if self.len == 0 {
320            return &[];
321        }
322        // Safety: start..start+len is valid scratch memory within the arena.
323        let slice = unsafe { std::slice::from_raw_parts(self.start.as_ptr(), self.len) };
324        // Safety: start + len is within the slab (we ensured capacity on every write).
325        unsafe {
326            self.arena
327                .ptr
328                .set(NonNull::new_unchecked(self.start.as_ptr().add(self.len)));
329        }
330        slice
331    }
332
333    #[cold]
334    #[inline(never)]
335    fn grow_slow(&mut self, additional: usize) {
336        let required = self.len.checked_add(additional).expect("scratch overflow");
337        let new_cap = self.cap.saturating_mul(2).max(required);
338
339        self.arena.grow(new_cap);
340
341        // Copy existing scratch data to the start of the new slab.
342        let new_start = self.arena.ptr.get();
343        if self.len > 0 {
344            // Safety: old data at self.start..+len is still valid (old slab hasn't been freed).
345            // New slab has at least new_cap bytes of data space >= required > self.len.
346            unsafe {
347                ptr::copy_nonoverlapping(self.start.as_ptr(), new_start.as_ptr(), self.len);
348            }
349        }
350        self.start = new_start;
351        self.cap = self.arena.end.get().as_ptr() as usize - new_start.as_ptr() as usize;
352    }
353}
354
355#[cfg(test)]
356#[path = "./arena_tests.rs"]
357mod tests;