toml_spanner/arena.rs
1use std::alloc::Layout;
2use std::cell::Cell;
3use std::ptr::{self, NonNull};
4
5const HEADER_SIZE: usize = std::mem::size_of::<SlabHeader>();
6const INITIAL_SLAB_SIZE: usize = 4096;
7const ALLOC_ALIGN: usize = 8;
8const SLAB_ALIGN: usize = if std::mem::align_of::<SlabHeader>() >= ALLOC_ALIGN {
9 std::mem::align_of::<SlabHeader>()
10} else {
11 ALLOC_ALIGN
12};
13
14const _: () = assert!(SLAB_ALIGN >= ALLOC_ALIGN);
15const _: () = assert!(HEADER_SIZE.is_multiple_of(ALLOC_ALIGN));
16
17#[repr(C)]
18struct SlabHeader {
19 prev: Option<NonNull<SlabHeader>>,
20 size: usize,
21}
22
23// SAFETY: The only `SlabHeader` that needs `Sync` is the static `EMPTY_SLAB`
24// sentinel. It is immutable (prev=None, size=0) and never written through the
25// pointer. Heap-allocated SlabHeaders are only reachable through `Arena`, which
26// is `!Sync` (contains `Cell`), so they are never shared across threads.
27unsafe impl Sync for SlabHeader {}
28
29static EMPTY_SLAB: SlabHeader = SlabHeader {
30 prev: None,
31 size: 0,
32};
33
34/// Bump allocator backing all data structures produced by the parser.
35///
36/// Create an `Arena` before calling [`parse`](crate::parse) and pass it by
37/// reference. The arena must outlive the returned [`Document`](crate::Document)
38/// because parsed values borrow from it. All memory is freed on drop.
39///
40/// # Examples
41///
42/// ```
43/// let arena = toml_spanner::Arena::new();
44/// let table = toml_spanner::parse("key = \"value\"", &arena)?;
45/// // `table` borrows from both the input string and `arena`.
46/// # Ok::<(), toml_spanner::Error>(())
47/// ```
48pub struct Arena {
49 ptr: Cell<NonNull<u8>>,
50 end: Cell<NonNull<u8>>,
51 slab: Cell<NonNull<SlabHeader>>,
52}
53
54#[cfg(target_pointer_width = "64")]
55const _: () = assert!(std::mem::size_of::<Arena>() == 24);
56
57// SAFETY: `Arena` owns its heap-allocated slab chain exclusively. Moving the
58// arena to another thread transfers that exclusive ownership, and the bump
59// pointers held in `Cell` are simply data alongside the slabs. No other thread
60// retains access to the slabs after the move, and the global allocator allows
61// `dealloc` on a different thread than the one that called `alloc`.
62//
63// `Arena` is intentionally **not** `Sync`: `alloc`, `alloc_str`, and the
64// reallocation helpers mutate the bump pointers through `&self` via `Cell`,
65// which is not atomic. Concurrent allocation from two threads would race on
66// those cells and corrupt the slab state.
67unsafe impl Send for Arena {}
68
69impl Default for Arena {
70 fn default() -> Self {
71 Self::new()
72 }
73}
74
75impl Arena {
76 /// Creates a new, empty arena.
77 pub fn new() -> Self {
78 // Safety: EMPTY_SLAB is a static with a stable address.
79 let sentinel =
80 unsafe { NonNull::new_unchecked(&EMPTY_SLAB as *const SlabHeader as *mut SlabHeader) };
81 let dangling = NonNull::dangling();
82 Arena {
83 ptr: Cell::new(dangling),
84 end: Cell::new(dangling),
85 slab: Cell::new(sentinel),
86 }
87 }
88
89 /// Allocate `size` bytes aligned to 8.
90 ///
91 /// Returns a non-null pointer to the allocated region. Aborts on OOM.
92 #[inline]
93 pub(crate) fn alloc(&self, size: usize) -> NonNull<u8> {
94 let ptr = self.ptr.get();
95 let addr = ptr.as_ptr() as usize;
96 let aligned_addr = (addr + ALLOC_ALIGN - 1) & !(ALLOC_ALIGN - 1);
97 let padding = aligned_addr - addr;
98 let needed = padding.saturating_add(size);
99 let remaining = self.end.get().as_ptr() as usize - addr;
100
101 let result = if needed <= remaining {
102 // Safety: needed bytes fit within the current slab (end >= ptr is
103 // invariant, so remaining cannot underflow). Using add() from ptr
104 // preserves provenance.
105 unsafe {
106 self.ptr
107 .set(NonNull::new_unchecked(ptr.as_ptr().add(needed)));
108 NonNull::new_unchecked(ptr.as_ptr().add(padding))
109 }
110 } else {
111 self.alloc_slow(size)
112 };
113
114 // Tag pointer for MIRI
115 #[cfg(test)]
116 if true {
117 use std::mem::MaybeUninit;
118
119 unsafe {
120 return NonNull::new_unchecked(
121 std::slice::from_raw_parts_mut(result.cast::<MaybeUninit<u8>>().as_ptr(), size)
122 .as_mut_ptr()
123 .cast(),
124 );
125 }
126 }
127
128 result
129 }
130
131 /// Grow an existing allocation in place when possible, otherwise allocate
132 /// a new region and copy the old data.
133 ///
134 /// Returns the (possibly unchanged) pointer to the allocation.
135 ///
136 /// # Safety
137 ///
138 /// - `ptr` must have been returned by a prior `alloc` on this arena whose
139 /// size was `old_size`.
140 /// - `new_size` must be `>= old_size`. Violating this causes wrapping
141 /// underflow in the in place growth path or an out of bounds copy.
142 pub(crate) unsafe fn realloc(
143 &self,
144 ptr: NonNull<u8>,
145 old_size: usize,
146 new_size: usize,
147 ) -> NonNull<u8> {
148 debug_assert!(new_size >= old_size);
149
150 // Use integer arithmetic for bounds checks to avoid creating
151 // out-of-bounds pointers and to preserve strict provenance.
152 let old_end = ptr.as_ptr() as usize + old_size;
153 let head = self.ptr.get().as_ptr() as usize;
154
155 if old_end == head {
156 // Compare as new_size <= end - ptr instead of ptr + new_size <= end
157 // to avoid wrapping overflow when new_size is very large.
158 let remaining = self.end.get().as_ptr() as usize - ptr.as_ptr() as usize;
159 if new_size <= remaining {
160 // Safety: new_end is within the current slab. Advance the bump
161 // pointer by the growth delta, preserving provenance via add().
162 let extra = new_size - old_size;
163 unsafe {
164 let head_ptr = self.ptr.get().as_ptr();
165 self.ptr.set(NonNull::new_unchecked(head_ptr.add(extra)));
166 // Re-derive from the bump pointer (which carries full slab
167 // provenance) instead of returning `ptr` directly, because
168 // the MIRI tagging in alloc() restricts `ptr`'s Stacked
169 // Borrows tag to old_size bytes.
170 let result = NonNull::new_unchecked(head_ptr.sub(old_size));
171
172 #[cfg(test)]
173 if true {
174 use std::mem::MaybeUninit;
175 return NonNull::new_unchecked(
176 std::slice::from_raw_parts_mut(
177 result.cast::<MaybeUninit<u8>>().as_ptr(),
178 new_size,
179 )
180 .as_mut_ptr()
181 .cast(),
182 );
183 }
184
185 return result;
186 }
187 }
188 }
189
190 let new_ptr = self.alloc(new_size);
191 // SAFETY:
192 // - Source (`ptr`) is from a prior alloc of `old_size` bytes, valid to read.
193 // - Destination (`new_ptr`) is a fresh alloc of `new_size >= old_size` bytes.
194 // - The regions do not overlap because `new_ptr` comes from a subsequent
195 // bump (same or different slab) so it cannot alias the `ptr` region.
196 unsafe { ptr::copy_nonoverlapping(ptr.as_ptr(), new_ptr.as_ptr(), old_size) };
197 new_ptr
198 }
199
200 #[cold]
201 #[inline(never)]
202 fn alloc_slow(&self, size: usize) -> NonNull<u8> {
203 self.grow(size);
204
205 let ptr = self.ptr.get();
206 let addr = ptr.as_ptr() as usize;
207 let aligned_addr = (addr + ALLOC_ALIGN - 1) & !(ALLOC_ALIGN - 1);
208 let padding = aligned_addr - addr;
209 // Cannot overflow: grow() panics if HEADER_SIZE + size overflows,
210 // and post-grow ptr is SLAB_ALIGN-aligned so padding is always 0.
211 let needed = padding + size;
212 debug_assert!(needed <= self.end.get().as_ptr() as usize - addr);
213
214 // Safety: grow() guarantees the new slab is large enough.
215 // Using add() from ptr preserves provenance.
216 unsafe {
217 self.ptr
218 .set(NonNull::new_unchecked(ptr.as_ptr().add(needed)));
219 NonNull::new_unchecked(ptr.as_ptr().add(padding))
220 }
221 }
222
223 /// Create a scratch buffer that writes into the arena's current slab.
224 ///
225 /// # Safety
226 ///
227 /// The caller must not call `alloc` on this arena while the returned
228 /// `Scratch` is alive. The scratch exclusively owns the bump region.
229 pub(crate) unsafe fn scratch(&self) -> Scratch<'_> {
230 let start = self.ptr.get();
231 let cap = self.end.get().as_ptr() as usize - start.as_ptr() as usize;
232 Scratch {
233 arena: self,
234 start,
235 len: 0,
236 cap,
237 }
238 }
239
240 fn grow(&self, size: usize) {
241 // SAFETY: self.slab always points to a valid SlabHeader — either the
242 // static EMPTY_SLAB sentinel or a heap-allocated header written in grow().
243 let current_size = unsafe { self.slab.get().as_ref().size };
244
245 let min_slab = HEADER_SIZE.checked_add(size).expect("layout overflow");
246
247 let new_size = current_size
248 .saturating_mul(2)
249 .max(min_slab)
250 .max(INITIAL_SLAB_SIZE);
251
252 let slab_layout =
253 Layout::from_size_align(new_size, SLAB_ALIGN).expect("slab layout overflow");
254
255 // SAFETY: slab_layout was validated by Layout::from_size_align above.
256 let raw = unsafe { std::alloc::alloc(slab_layout) };
257 let Some(base) = NonNull::new(raw) else {
258 std::alloc::handle_alloc_error(slab_layout);
259 };
260
261 // Safety: base points to a freshly allocated region of new_size bytes.
262 unsafe {
263 let header_ptr = base.as_ptr().cast::<SlabHeader>();
264 header_ptr.write(SlabHeader {
265 prev: Some(self.slab.get()),
266 size: new_size,
267 });
268
269 self.slab.set(NonNull::new_unchecked(header_ptr));
270 self.ptr
271 .set(NonNull::new_unchecked(base.as_ptr().add(HEADER_SIZE)));
272 self.end
273 .set(NonNull::new_unchecked(base.as_ptr().add(new_size)));
274 }
275 }
276
277 /// Allocates a copy of `s` in the arena and returns a reference to it.
278 pub fn alloc_str(&self, s: &str) -> &str {
279 if s.is_empty() {
280 return "";
281 }
282 let dst = self.alloc(s.len());
283 // SAFETY: dst is a fresh arena allocation of s.len() bytes, disjoint
284 // from s. The resulting slice is valid UTF-8 because s is.
285 unsafe {
286 std::ptr::copy_nonoverlapping(s.as_ptr(), dst.as_ptr(), s.len());
287 std::str::from_utf8_unchecked(std::slice::from_raw_parts(dst.as_ptr(), s.len()))
288 }
289 }
290}
291
292impl Drop for Arena {
293 fn drop(&mut self) {
294 let mut current = self.slab.get();
295 loop {
296 // Safety: current is either a heap slab or the static sentinel.
297 let header = unsafe { current.as_ref() };
298 if header.size == 0 {
299 break;
300 }
301 let prev = header.prev;
302 // Safety: header.size and SLAB_ALIGN match the layout used in grow().
303 let slab_layout = unsafe { Layout::from_size_align_unchecked(header.size, SLAB_ALIGN) };
304 unsafe {
305 std::alloc::dealloc(current.as_ptr().cast(), slab_layout);
306 }
307 match prev {
308 Some(p) => current = p,
309 None => break,
310 }
311 }
312 }
313}
314
315/// A temporary byte buffer that writes directly into an [`Arena`] slab.
316///
317/// Scratch tracks its own write position without advancing the arena's bump
318/// pointer. On [`commit`](Scratch::commit) the arena pointer is advanced past
319/// the committed bytes. If the scratch is dropped without committing, the arena
320/// pointer is unchanged and the space is reused by subsequent allocations.
321pub(crate) struct Scratch<'a> {
322 arena: &'a Arena,
323 start: NonNull<u8>,
324 len: usize,
325 cap: usize,
326}
327
328impl<'a> Scratch<'a> {
329 #[inline]
330 pub fn push(&mut self, byte: u8) {
331 let len = self.len;
332 if len == self.cap {
333 self.grow_slow(1);
334 }
335 // Safety: len < cap, so start + len is within the slab.
336 unsafe {
337 self.start.as_ptr().add(len).write(byte);
338 }
339 self.len = len + 1;
340 }
341
342 #[inline]
343 pub fn extend(&mut self, bytes: &[u8]) {
344 if bytes.len() > self.cap - self.len {
345 self.grow_slow(bytes.len());
346 }
347 // Safety: cap - len >= bytes.len(), so the copy is in bounds.
348 unsafe {
349 ptr::copy_nonoverlapping(
350 bytes.as_ptr(),
351 self.start.as_ptr().add(self.len),
352 bytes.len(),
353 );
354 }
355 self.len += bytes.len();
356 }
357
358 /// Push bytes while stripping underscores. Returns `false` if any
359 /// underscore is not placed between two ASCII digits.
360 #[inline]
361 pub(crate) fn push_strip_underscores(&mut self, bytes: &[u8]) -> bool {
362 let mut prev = 0u8;
363 for &b in bytes {
364 if b == b'_' {
365 if !prev.is_ascii_digit() {
366 return false;
367 }
368 } else {
369 if prev == b'_' && !b.is_ascii_digit() {
370 return false;
371 }
372 self.push(b);
373 }
374 prev = b;
375 }
376 prev != b'_'
377 }
378
379 #[inline]
380 pub fn as_bytes(&self) -> &[u8] {
381 if self.len == 0 {
382 return &[];
383 }
384 // Safety: start..start+len was written by us and is within the slab.
385 unsafe { std::slice::from_raw_parts(self.start.as_ptr(), self.len) }
386 }
387
388 /// Finalize the scratch data and return it as a byte slice tied to the
389 /// arena's lifetime. Advances the arena's bump pointer past the committed
390 /// bytes.
391 pub fn commit(self) -> &'a [u8] {
392 if self.len == 0 {
393 return &[];
394 }
395 // Safety: start..start+len is valid scratch memory within the arena.
396 let slice = unsafe { std::slice::from_raw_parts(self.start.as_ptr(), self.len) };
397 // Safety: start + len is within the slab (we ensured capacity on every write).
398 unsafe {
399 self.arena
400 .ptr
401 .set(NonNull::new_unchecked(self.start.as_ptr().add(self.len)));
402 }
403 slice
404 }
405
406 #[cold]
407 #[inline(never)]
408 fn grow_slow(&mut self, additional: usize) {
409 let required = self.len.checked_add(additional).expect("scratch overflow");
410 let new_cap = self.cap.saturating_mul(2).max(required);
411
412 self.arena.grow(new_cap);
413
414 // Copy existing scratch data to the start of the new slab.
415 let new_start = self.arena.ptr.get();
416 if self.len > 0 {
417 // Safety: old data at self.start..+len is still valid (old slab hasn't been freed).
418 // New slab has at least new_cap bytes of data space >= required > self.len.
419 unsafe {
420 ptr::copy_nonoverlapping(self.start.as_ptr(), new_start.as_ptr(), self.len);
421 }
422 }
423 self.start = new_start;
424 self.cap = self.arena.end.get().as_ptr() as usize - new_start.as_ptr() as usize;
425 }
426}
427
428#[cfg(test)]
429#[path = "./arena_tests.rs"]
430mod tests;