toml_spanner/arena.rs
1use std::alloc::Layout;
2use std::cell::Cell;
3use std::ptr::{self, NonNull};
4
5const HEADER_SIZE: usize = std::mem::size_of::<SlabHeader>();
6const INITIAL_SLAB_SIZE: usize = 4096;
7const ALLOC_ALIGN: usize = 8;
8const SLAB_ALIGN: usize = if std::mem::align_of::<SlabHeader>() >= ALLOC_ALIGN {
9 std::mem::align_of::<SlabHeader>()
10} else {
11 ALLOC_ALIGN
12};
13
14const _: () = assert!(SLAB_ALIGN >= ALLOC_ALIGN);
15const _: () = assert!(HEADER_SIZE.is_multiple_of(ALLOC_ALIGN));
16
17#[repr(C)]
18struct SlabHeader {
19 prev: Option<NonNull<SlabHeader>>,
20 size: usize,
21}
22
23// SAFETY: The only `SlabHeader` that needs `Sync` is the static `EMPTY_SLAB`
24// sentinel. It is immutable (prev=None, size=0) and never written through the
25// pointer. Heap-allocated SlabHeaders are only reachable through `Arena`, which
26// is `!Sync` (contains `Cell`), so they are never shared across threads.
27unsafe impl Sync for SlabHeader {}
28
29static EMPTY_SLAB: SlabHeader = SlabHeader {
30 prev: None,
31 size: 0,
32};
33
34/// Bump allocator backing all data structures produced by the parser.
35///
36/// Create an `Arena` before calling [`parse`](crate::parse) and pass it by
37/// reference. The arena must outlive the returned [`Document`](crate::Document)
38/// because parsed values borrow from it. All memory is freed on drop.
39///
40/// # Examples
41///
42/// ```
43/// let arena = toml_spanner::Arena::new();
44/// let table = toml_spanner::parse("key = \"value\"", &arena)?;
45/// // `table` borrows from both the input string and `arena`.
46/// # Ok::<(), toml_spanner::Error>(())
47/// ```
48pub struct Arena {
49 ptr: Cell<NonNull<u8>>,
50 end: Cell<NonNull<u8>>,
51 slab: Cell<NonNull<SlabHeader>>,
52}
53
54#[cfg(target_pointer_width = "64")]
55const _: () = assert!(std::mem::size_of::<Arena>() == 24);
56
57impl Default for Arena {
58 fn default() -> Self {
59 Self::new()
60 }
61}
62
63impl Arena {
64 /// Creates a new, empty arena.
65 pub fn new() -> Self {
66 // Safety: EMPTY_SLAB is a static with a stable address.
67 let sentinel =
68 unsafe { NonNull::new_unchecked(&EMPTY_SLAB as *const SlabHeader as *mut SlabHeader) };
69 let dangling = NonNull::dangling();
70 Arena {
71 ptr: Cell::new(dangling),
72 end: Cell::new(dangling),
73 slab: Cell::new(sentinel),
74 }
75 }
76
77 /// Allocate `size` bytes aligned to 8.
78 ///
79 /// Returns a non-null pointer to the allocated region. Aborts on OOM.
80 #[inline]
81 pub(crate) fn alloc(&self, size: usize) -> NonNull<u8> {
82 let ptr = self.ptr.get();
83 let addr = ptr.as_ptr() as usize;
84 let aligned_addr = (addr + ALLOC_ALIGN - 1) & !(ALLOC_ALIGN - 1);
85 let padding = aligned_addr - addr;
86 let needed = padding.saturating_add(size);
87 let remaining = self.end.get().as_ptr() as usize - addr;
88
89 let result = if needed <= remaining {
90 // Safety: needed bytes fit within the current slab (end >= ptr is
91 // invariant, so remaining cannot underflow). Using add() from ptr
92 // preserves provenance.
93 unsafe {
94 self.ptr
95 .set(NonNull::new_unchecked(ptr.as_ptr().add(needed)));
96 NonNull::new_unchecked(ptr.as_ptr().add(padding))
97 }
98 } else {
99 self.alloc_slow(size)
100 };
101
102 // Tag pointer for MIRI
103 #[cfg(test)]
104 if true {
105 use std::mem::MaybeUninit;
106
107 unsafe {
108 return NonNull::new_unchecked(
109 std::slice::from_raw_parts_mut(result.cast::<MaybeUninit<u8>>().as_ptr(), size)
110 .as_mut_ptr()
111 .cast(),
112 );
113 }
114 }
115
116 result
117 }
118
119 /// Grow an existing allocation in place when possible, otherwise allocate
120 /// a new region and copy the old data.
121 ///
122 /// Returns the (possibly unchanged) pointer to the allocation.
123 ///
124 /// # Safety
125 ///
126 /// - `ptr` must have been returned by a prior `alloc` on this arena whose
127 /// size was `old_size`.
128 /// - `new_size` must be `>= old_size`. Violating this causes wrapping
129 /// underflow in the in place growth path or an out of bounds copy.
130 pub(crate) unsafe fn realloc(
131 &self,
132 ptr: NonNull<u8>,
133 old_size: usize,
134 new_size: usize,
135 ) -> NonNull<u8> {
136 debug_assert!(new_size >= old_size);
137
138 // Use integer arithmetic for bounds checks to avoid creating
139 // out-of-bounds pointers and to preserve strict provenance.
140 let old_end = ptr.as_ptr() as usize + old_size;
141 let head = self.ptr.get().as_ptr() as usize;
142
143 if old_end == head {
144 // Compare as new_size <= end - ptr instead of ptr + new_size <= end
145 // to avoid wrapping overflow when new_size is very large.
146 let remaining = self.end.get().as_ptr() as usize - ptr.as_ptr() as usize;
147 if new_size <= remaining {
148 // Safety: new_end is within the current slab. Advance the bump
149 // pointer by the growth delta, preserving provenance via add().
150 let extra = new_size - old_size;
151 unsafe {
152 let head_ptr = self.ptr.get().as_ptr();
153 self.ptr.set(NonNull::new_unchecked(head_ptr.add(extra)));
154 // Re-derive from the bump pointer (which carries full slab
155 // provenance) instead of returning `ptr` directly, because
156 // the MIRI tagging in alloc() restricts `ptr`'s Stacked
157 // Borrows tag to old_size bytes.
158 let result = NonNull::new_unchecked(head_ptr.sub(old_size));
159
160 #[cfg(test)]
161 if true {
162 use std::mem::MaybeUninit;
163 return NonNull::new_unchecked(
164 std::slice::from_raw_parts_mut(
165 result.cast::<MaybeUninit<u8>>().as_ptr(),
166 new_size,
167 )
168 .as_mut_ptr()
169 .cast(),
170 );
171 }
172
173 return result;
174 }
175 }
176 }
177
178 let new_ptr = self.alloc(new_size);
179 // SAFETY:
180 // - Source (`ptr`) is from a prior alloc of `old_size` bytes, valid to read.
181 // - Destination (`new_ptr`) is a fresh alloc of `new_size >= old_size` bytes.
182 // - The regions do not overlap because `new_ptr` comes from a subsequent
183 // bump (same or different slab) so it cannot alias the `ptr` region.
184 unsafe { ptr::copy_nonoverlapping(ptr.as_ptr(), new_ptr.as_ptr(), old_size) };
185 new_ptr
186 }
187
188 #[cold]
189 #[inline(never)]
190 fn alloc_slow(&self, size: usize) -> NonNull<u8> {
191 self.grow(size);
192
193 let ptr = self.ptr.get();
194 let addr = ptr.as_ptr() as usize;
195 let aligned_addr = (addr + ALLOC_ALIGN - 1) & !(ALLOC_ALIGN - 1);
196 let padding = aligned_addr - addr;
197 // Cannot overflow: grow() panics if HEADER_SIZE + size overflows,
198 // and post-grow ptr is SLAB_ALIGN-aligned so padding is always 0.
199 let needed = padding + size;
200 debug_assert!(needed <= self.end.get().as_ptr() as usize - addr);
201
202 // Safety: grow() guarantees the new slab is large enough.
203 // Using add() from ptr preserves provenance.
204 unsafe {
205 self.ptr
206 .set(NonNull::new_unchecked(ptr.as_ptr().add(needed)));
207 NonNull::new_unchecked(ptr.as_ptr().add(padding))
208 }
209 }
210
211 /// Create a scratch buffer that writes into the arena's current slab.
212 ///
213 /// # Safety
214 ///
215 /// The caller must not call `alloc` on this arena while the returned
216 /// `Scratch` is alive. The scratch exclusively owns the bump region.
217 pub(crate) unsafe fn scratch(&self) -> Scratch<'_> {
218 let start = self.ptr.get();
219 let cap = self.end.get().as_ptr() as usize - start.as_ptr() as usize;
220 Scratch {
221 arena: self,
222 start,
223 len: 0,
224 cap,
225 }
226 }
227
228 fn grow(&self, size: usize) {
229 // SAFETY: self.slab always points to a valid SlabHeader — either the
230 // static EMPTY_SLAB sentinel or a heap-allocated header written in grow().
231 let current_size = unsafe { self.slab.get().as_ref().size };
232
233 let min_slab = HEADER_SIZE.checked_add(size).expect("layout overflow");
234
235 let new_size = current_size
236 .saturating_mul(2)
237 .max(min_slab)
238 .max(INITIAL_SLAB_SIZE);
239
240 let slab_layout =
241 Layout::from_size_align(new_size, SLAB_ALIGN).expect("slab layout overflow");
242
243 // SAFETY: slab_layout was validated by Layout::from_size_align above.
244 let raw = unsafe { std::alloc::alloc(slab_layout) };
245 let Some(base) = NonNull::new(raw) else {
246 std::alloc::handle_alloc_error(slab_layout);
247 };
248
249 // Safety: base points to a freshly allocated region of new_size bytes.
250 unsafe {
251 let header_ptr = base.as_ptr().cast::<SlabHeader>();
252 header_ptr.write(SlabHeader {
253 prev: Some(self.slab.get()),
254 size: new_size,
255 });
256
257 self.slab.set(NonNull::new_unchecked(header_ptr));
258 self.ptr
259 .set(NonNull::new_unchecked(base.as_ptr().add(HEADER_SIZE)));
260 self.end
261 .set(NonNull::new_unchecked(base.as_ptr().add(new_size)));
262 }
263 }
264
265 /// Allocates a copy of `s` in the arena and returns a reference to it.
266 pub fn alloc_str(&self, s: &str) -> &str {
267 if s.is_empty() {
268 return "";
269 }
270 let dst = self.alloc(s.len());
271 // SAFETY: dst is a fresh arena allocation of s.len() bytes, disjoint
272 // from s. The resulting slice is valid UTF-8 because s is.
273 unsafe {
274 std::ptr::copy_nonoverlapping(s.as_ptr(), dst.as_ptr(), s.len());
275 std::str::from_utf8_unchecked(std::slice::from_raw_parts(dst.as_ptr(), s.len()))
276 }
277 }
278}
279
280impl Drop for Arena {
281 fn drop(&mut self) {
282 let mut current = self.slab.get();
283 loop {
284 // Safety: current is either a heap slab or the static sentinel.
285 let header = unsafe { current.as_ref() };
286 if header.size == 0 {
287 break;
288 }
289 let prev = header.prev;
290 // Safety: header.size and SLAB_ALIGN match the layout used in grow().
291 let slab_layout = unsafe { Layout::from_size_align_unchecked(header.size, SLAB_ALIGN) };
292 unsafe {
293 std::alloc::dealloc(current.as_ptr().cast(), slab_layout);
294 }
295 match prev {
296 Some(p) => current = p,
297 None => break,
298 }
299 }
300 }
301}
302
303/// A temporary byte buffer that writes directly into an [`Arena`] slab.
304///
305/// Scratch tracks its own write position without advancing the arena's bump
306/// pointer. On [`commit`](Scratch::commit) the arena pointer is advanced past
307/// the committed bytes. If the scratch is dropped without committing, the arena
308/// pointer is unchanged and the space is reused by subsequent allocations.
309pub(crate) struct Scratch<'a> {
310 arena: &'a Arena,
311 start: NonNull<u8>,
312 len: usize,
313 cap: usize,
314}
315
316impl<'a> Scratch<'a> {
317 #[inline]
318 pub fn push(&mut self, byte: u8) {
319 let len = self.len;
320 if len == self.cap {
321 self.grow_slow(1);
322 }
323 // Safety: len < cap, so start + len is within the slab.
324 unsafe {
325 self.start.as_ptr().add(len).write(byte);
326 }
327 self.len = len + 1;
328 }
329
330 #[inline]
331 pub fn extend(&mut self, bytes: &[u8]) {
332 if bytes.len() > self.cap - self.len {
333 self.grow_slow(bytes.len());
334 }
335 // Safety: cap - len >= bytes.len(), so the copy is in bounds.
336 unsafe {
337 ptr::copy_nonoverlapping(
338 bytes.as_ptr(),
339 self.start.as_ptr().add(self.len),
340 bytes.len(),
341 );
342 }
343 self.len += bytes.len();
344 }
345
346 /// Push bytes while stripping underscores. Returns `false` if any
347 /// underscore is not placed between two ASCII digits.
348 #[inline]
349 pub(crate) fn push_strip_underscores(&mut self, bytes: &[u8]) -> bool {
350 let mut prev = 0u8;
351 for &b in bytes {
352 if b == b'_' {
353 if !prev.is_ascii_digit() {
354 return false;
355 }
356 } else {
357 if prev == b'_' && !b.is_ascii_digit() {
358 return false;
359 }
360 self.push(b);
361 }
362 prev = b;
363 }
364 prev != b'_'
365 }
366
367 #[inline]
368 pub fn as_bytes(&self) -> &[u8] {
369 if self.len == 0 {
370 return &[];
371 }
372 // Safety: start..start+len was written by us and is within the slab.
373 unsafe { std::slice::from_raw_parts(self.start.as_ptr(), self.len) }
374 }
375
376 /// Finalize the scratch data and return it as a byte slice tied to the
377 /// arena's lifetime. Advances the arena's bump pointer past the committed
378 /// bytes.
379 pub fn commit(self) -> &'a [u8] {
380 if self.len == 0 {
381 return &[];
382 }
383 // Safety: start..start+len is valid scratch memory within the arena.
384 let slice = unsafe { std::slice::from_raw_parts(self.start.as_ptr(), self.len) };
385 // Safety: start + len is within the slab (we ensured capacity on every write).
386 unsafe {
387 self.arena
388 .ptr
389 .set(NonNull::new_unchecked(self.start.as_ptr().add(self.len)));
390 }
391 slice
392 }
393
394 #[cold]
395 #[inline(never)]
396 fn grow_slow(&mut self, additional: usize) {
397 let required = self.len.checked_add(additional).expect("scratch overflow");
398 let new_cap = self.cap.saturating_mul(2).max(required);
399
400 self.arena.grow(new_cap);
401
402 // Copy existing scratch data to the start of the new slab.
403 let new_start = self.arena.ptr.get();
404 if self.len > 0 {
405 // Safety: old data at self.start..+len is still valid (old slab hasn't been freed).
406 // New slab has at least new_cap bytes of data space >= required > self.len.
407 unsafe {
408 ptr::copy_nonoverlapping(self.start.as_ptr(), new_start.as_ptr(), self.len);
409 }
410 }
411 self.start = new_start;
412 self.cap = self.arena.end.get().as_ptr() as usize - new_start.as_ptr() as usize;
413 }
414}
415
416#[cfg(test)]
417#[path = "./arena_tests.rs"]
418mod tests;