Skip to main content

toml_spanner/
arena.rs

1#![allow(unsafe_code)]
2
3use std::alloc::Layout;
4use std::cell::Cell;
5use std::ptr::{self, NonNull};
6
7const SLAB_ALIGN: usize = std::mem::align_of::<SlabHeader>();
8const HEADER_SIZE: usize = std::mem::size_of::<SlabHeader>();
9const INITIAL_SLAB_SIZE: usize = 1024;
10const ALLOC_ALIGN: usize = 8;
11
12const _: () = assert!(HEADER_SIZE == 16);
13const _: () = assert!(SLAB_ALIGN == ALLOC_ALIGN);
14
15#[repr(C)]
16struct SlabHeader {
17    prev: Option<NonNull<SlabHeader>>,
18    size: usize,
19}
20
21// Safety: EMPTY_SLAB is an immutable sentinel (prev=None, size=0). SlabHeaders
22// on the heap are only reachable through Arena, which is !Sync due to Cell.
23unsafe impl Sync for SlabHeader {}
24
25static EMPTY_SLAB: SlabHeader = SlabHeader {
26    prev: None,
27    size: 0,
28};
29
30/// A bump allocator used by the parser to store escaped strings.
31///
32/// Create an `Arena` before calling [`parse`](crate::parse) and pass it by
33/// reference. The arena must live at least as long as the parsed [`Table`](crate::Table)
34/// because [`Str`](crate::Str) values may borrow from it.
35///
36/// All memory is freed when the arena is dropped.
37///
38/// # Examples
39///
40/// ```
41/// let arena = toml_spanner::Arena::new();
42/// let table = toml_spanner::parse("key = \"value\"", &arena)?;
43/// // `table` borrows from both the input string and `arena`.
44/// # Ok::<(), toml_spanner::Error>(())
45/// ```
46pub struct Arena {
47    ptr: Cell<NonNull<u8>>,
48    end: Cell<NonNull<u8>>,
49    slab: Cell<NonNull<SlabHeader>>,
50}
51
52const _: () = assert!(std::mem::size_of::<Arena>() == 24);
53
54impl Default for Arena {
55    fn default() -> Self {
56        Self::new()
57    }
58}
59
60impl Arena {
61    /// Creates a new, empty arena.
62    pub fn new() -> Self {
63        // Safety: EMPTY_SLAB is a static with a stable address.
64        let sentinel =
65            unsafe { NonNull::new_unchecked(&EMPTY_SLAB as *const SlabHeader as *mut SlabHeader) };
66        let dangling = NonNull::dangling();
67        Arena {
68            ptr: Cell::new(dangling),
69            end: Cell::new(dangling),
70            slab: Cell::new(sentinel),
71        }
72    }
73
74    /// Allocate `size` bytes aligned to 8.
75    ///
76    /// Returns a non-null pointer to the allocated region. Aborts on OOM.
77    #[inline]
78    pub(crate) fn alloc(&self, size: usize) -> NonNull<u8> {
79        let ptr = self.ptr.get();
80        let addr = ptr.as_ptr() as usize;
81        let aligned_addr = (addr + ALLOC_ALIGN - 1) & !(ALLOC_ALIGN - 1);
82        let padding = aligned_addr - addr;
83        let needed = padding + size;
84
85        if addr + needed <= self.end.get().as_ptr() as usize {
86            // Safety: padding+size bytes fit within the current slab.
87            // Using add() from ptr preserves provenance.
88            unsafe {
89                self.ptr
90                    .set(NonNull::new_unchecked(ptr.as_ptr().add(needed)));
91                NonNull::new_unchecked(ptr.as_ptr().add(padding))
92            }
93        } else {
94            self.alloc_slow(size)
95        }
96    }
97
98    /// Grow an existing allocation in place when possible, otherwise allocate
99    /// a new region and copy the old data.
100    ///
101    /// Returns the (possibly unchanged) pointer to the allocation.
102    ///
103    /// # Safety
104    ///
105    /// `ptr` must have been returned by a prior `alloc` on this arena whose
106    /// size was `old_size`. `new_size` must be `>= old_size`.
107    pub(crate) unsafe fn realloc(
108        &self,
109        ptr: NonNull<u8>,
110        old_size: usize,
111        new_size: usize,
112    ) -> NonNull<u8> {
113        debug_assert!(new_size >= old_size);
114
115        // Use integer arithmetic for bounds checks to avoid creating
116        // out-of-bounds pointers and to preserve strict provenance.
117        let old_end = ptr.as_ptr() as usize + old_size;
118        let head = self.ptr.get().as_ptr() as usize;
119
120        if old_end == head {
121            let new_end = ptr.as_ptr() as usize + new_size;
122            if new_end <= self.end.get().as_ptr() as usize {
123                // Safety: new_end is within the current slab. Advance the bump
124                // pointer by the growth delta, preserving provenance via add().
125                let extra = new_size - old_size;
126                unsafe {
127                    self.ptr
128                        .set(NonNull::new_unchecked(self.ptr.get().as_ptr().add(extra)));
129                }
130                return ptr;
131            }
132        }
133
134        let new_ptr = self.alloc(new_size);
135        // Safety: old and new regions are non-overlapping arena allocations,
136        // and old_size <= new_size <= new allocation size.
137        unsafe { ptr::copy_nonoverlapping(ptr.as_ptr(), new_ptr.as_ptr(), old_size) };
138        new_ptr
139    }
140
141    #[cold]
142    #[inline(never)]
143    fn alloc_slow(&self, size: usize) -> NonNull<u8> {
144        self.grow(size);
145
146        let ptr = self.ptr.get();
147        let addr = ptr.as_ptr() as usize;
148        let aligned_addr = (addr + ALLOC_ALIGN - 1) & !(ALLOC_ALIGN - 1);
149        let padding = aligned_addr - addr;
150        let needed = padding + size;
151        debug_assert!(addr + needed <= self.end.get().as_ptr() as usize);
152
153        // Safety: grow() guarantees the new slab is large enough.
154        // Using add() from ptr preserves provenance.
155        unsafe {
156            self.ptr
157                .set(NonNull::new_unchecked(ptr.as_ptr().add(needed)));
158            NonNull::new_unchecked(ptr.as_ptr().add(padding))
159        }
160    }
161
162    /// Create a scratch buffer that writes into the arena's current slab.
163    ///
164    /// # Safety
165    ///
166    /// The caller must not call `alloc` on this arena while the returned
167    /// `Scratch` is alive. The scratch exclusively owns the bump region.
168    pub(crate) unsafe fn scratch(&self) -> Scratch<'_> {
169        let start = self.ptr.get();
170        let cap = self.end.get().as_ptr() as usize - start.as_ptr() as usize;
171        Scratch {
172            arena: self,
173            start,
174            len: 0,
175            cap,
176        }
177    }
178
179    fn grow(&self, size: usize) {
180        let current_size = unsafe { self.slab.get().as_ref().size };
181
182        let min_slab = HEADER_SIZE.checked_add(size).expect("layout overflow");
183
184        let new_size = current_size
185            .saturating_mul(2)
186            .max(min_slab)
187            .max(INITIAL_SLAB_SIZE);
188
189        let slab_layout =
190            Layout::from_size_align(new_size, SLAB_ALIGN).expect("slab layout overflow");
191
192        let raw = unsafe { std::alloc::alloc(slab_layout) };
193        let Some(base) = NonNull::new(raw) else {
194            std::alloc::handle_alloc_error(slab_layout);
195        };
196
197        // Safety: base points to a freshly allocated region of new_size bytes.
198        unsafe {
199            let header_ptr = base.as_ptr().cast::<SlabHeader>();
200            header_ptr.write(SlabHeader {
201                prev: Some(self.slab.get()),
202                size: new_size,
203            });
204
205            self.slab.set(NonNull::new_unchecked(header_ptr));
206            self.ptr
207                .set(NonNull::new_unchecked(base.as_ptr().add(HEADER_SIZE)));
208            self.end
209                .set(NonNull::new_unchecked(base.as_ptr().add(new_size)));
210        }
211    }
212}
213
214impl Drop for Arena {
215    fn drop(&mut self) {
216        let mut current = self.slab.get();
217        loop {
218            // Safety: current is either a heap slab or the static sentinel.
219            let header = unsafe { current.as_ref() };
220            if header.size == 0 {
221                break;
222            }
223            let prev = header.prev;
224            // Safety: header.size and SLAB_ALIGN match the layout used in grow().
225            let slab_layout = unsafe { Layout::from_size_align_unchecked(header.size, SLAB_ALIGN) };
226            unsafe {
227                std::alloc::dealloc(current.as_ptr().cast(), slab_layout);
228            }
229            match prev {
230                Some(p) => current = p,
231                None => break,
232            }
233        }
234    }
235}
236
237/// A temporary byte buffer that writes directly into an [`Arena`] slab.
238///
239/// Scratch tracks its own write position without advancing the arena's bump
240/// pointer. On [`commit`](Scratch::commit) the arena pointer is advanced past
241/// the committed bytes. If the scratch is dropped without committing, the arena
242/// pointer is unchanged and the space is reused by subsequent allocations.
243pub(crate) struct Scratch<'a> {
244    arena: &'a Arena,
245    start: NonNull<u8>,
246    len: usize,
247    cap: usize,
248}
249
250impl<'a> Scratch<'a> {
251    #[inline]
252    pub fn push(&mut self, byte: u8) {
253        let len = self.len;
254        if len == self.cap {
255            self.grow_slow(1);
256        }
257        // Safety: len < cap, so start + len is within the slab.
258        unsafe {
259            self.start.as_ptr().add(len).write(byte);
260        }
261        self.len = len + 1;
262    }
263
264    #[inline]
265    pub fn extend(&mut self, bytes: &[u8]) {
266        if bytes.len() > self.cap - self.len {
267            self.grow_slow(bytes.len());
268        }
269        // Safety: cap - len >= bytes.len(), so the copy is in bounds.
270        unsafe {
271            ptr::copy_nonoverlapping(
272                bytes.as_ptr(),
273                self.start.as_ptr().add(self.len),
274                bytes.len(),
275            );
276        }
277        self.len += bytes.len();
278    }
279
280    /// Push bytes while stripping underscores. Returns `false` if any
281    /// underscore is not placed between two ASCII digits.
282    #[inline]
283    pub(crate) fn push_strip_underscores(&mut self, bytes: &[u8]) -> bool {
284        let mut prev = 0u8;
285        for &b in bytes {
286            if b == b'_' {
287                if !prev.is_ascii_digit() {
288                    return false;
289                }
290            } else {
291                if prev == b'_' && !b.is_ascii_digit() {
292                    return false;
293                }
294                self.push(b);
295            }
296            prev = b;
297        }
298        prev != b'_'
299    }
300
301    #[inline]
302    pub fn as_bytes(&self) -> &[u8] {
303        if self.len == 0 {
304            return &[];
305        }
306        // Safety: start..start+len was written by us and is within the slab.
307        unsafe { std::slice::from_raw_parts(self.start.as_ptr(), self.len) }
308    }
309
310    /// Finalize the scratch data and return it as a byte slice tied to the
311    /// arena's lifetime. Advances the arena's bump pointer past the committed
312    /// bytes.
313    pub fn commit(self) -> &'a [u8] {
314        if self.len == 0 {
315            return &[];
316        }
317        // Safety: start..start+len is valid scratch memory within the arena.
318        let slice = unsafe { std::slice::from_raw_parts(self.start.as_ptr(), self.len) };
319        // Safety: start + len is within the slab (we ensured capacity on every write).
320        unsafe {
321            self.arena
322                .ptr
323                .set(NonNull::new_unchecked(self.start.as_ptr().add(self.len)));
324        }
325        slice
326    }
327
328    #[cold]
329    #[inline(never)]
330    fn grow_slow(&mut self, additional: usize) {
331        let required = self.len.checked_add(additional).expect("scratch overflow");
332        let new_cap = self.cap.saturating_mul(2).max(required);
333
334        self.arena.grow(new_cap);
335
336        // Copy existing scratch data to the start of the new slab.
337        let new_start = self.arena.ptr.get();
338        if self.len > 0 {
339            // Safety: old data at self.start..+len is still valid (old slab hasn't been freed).
340            // New slab has at least new_cap bytes of data space >= required > self.len.
341            unsafe {
342                ptr::copy_nonoverlapping(self.start.as_ptr(), new_start.as_ptr(), self.len);
343            }
344        }
345        self.start = new_start;
346        self.cap = self.arena.end.get().as_ptr() as usize - new_start.as_ptr() as usize;
347    }
348}
349
350#[cfg(test)]
351#[path = "./arena_tests.rs"]
352mod tests;