toml-spanner 1.0.2

High Performance Toml parser and deserializer that preserves span information with fast compile times.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
use std::alloc::Layout;
use std::cell::Cell;
use std::ptr::{self, NonNull};

const HEADER_SIZE: usize = std::mem::size_of::<SlabHeader>();
const INITIAL_SLAB_SIZE: usize = 4096;
const ALLOC_ALIGN: usize = 8;
const SLAB_ALIGN: usize = if std::mem::align_of::<SlabHeader>() >= ALLOC_ALIGN {
    std::mem::align_of::<SlabHeader>()
} else {
    ALLOC_ALIGN
};

const _: () = assert!(SLAB_ALIGN >= ALLOC_ALIGN);
const _: () = assert!(HEADER_SIZE.is_multiple_of(ALLOC_ALIGN));

#[repr(C)]
struct SlabHeader {
    prev: Option<NonNull<SlabHeader>>,
    size: usize,
}

// SAFETY: The only `SlabHeader` that needs `Sync` is the static `EMPTY_SLAB`
// sentinel. It is immutable (prev=None, size=0) and never written through the
// pointer. Heap-allocated SlabHeaders are only reachable through `Arena`, which
// is `!Sync` (contains `Cell`), so they are never shared across threads.
unsafe impl Sync for SlabHeader {}

static EMPTY_SLAB: SlabHeader = SlabHeader {
    prev: None,
    size: 0,
};

/// Bump allocator backing all data structures produced by the parser.
///
/// Create an `Arena` before calling [`parse`](crate::parse) and pass it by
/// reference. The arena must outlive the returned [`Document`](crate::Document)
/// because parsed values borrow from it. All memory is freed on drop.
///
/// # Examples
///
/// ```
/// let arena = toml_spanner::Arena::new();
/// let table = toml_spanner::parse("key = \"value\"", &arena)?;
/// // `table` borrows from both the input string and `arena`.
/// # Ok::<(), toml_spanner::Error>(())
/// ```
pub struct Arena {
    ptr: Cell<NonNull<u8>>,
    end: Cell<NonNull<u8>>,
    slab: Cell<NonNull<SlabHeader>>,
}

#[cfg(target_pointer_width = "64")]
const _: () = assert!(std::mem::size_of::<Arena>() == 24);

// SAFETY: `Arena` owns its heap-allocated slab chain exclusively. Moving the
// arena to another thread transfers that exclusive ownership, and the bump
// pointers held in `Cell` are simply data alongside the slabs. No other thread
// retains access to the slabs after the move, and the global allocator allows
// `dealloc` on a different thread than the one that called `alloc`.
//
// `Arena` is intentionally **not** `Sync`: `alloc`, `alloc_str`, and the
// reallocation helpers mutate the bump pointers through `&self` via `Cell`,
// which is not atomic. Concurrent allocation from two threads would race on
// those cells and corrupt the slab state.
unsafe impl Send for Arena {}

impl Default for Arena {
    fn default() -> Self {
        Self::new()
    }
}

impl Arena {
    /// Creates a new, empty arena.
    pub fn new() -> Self {
        // Safety: EMPTY_SLAB is a static with a stable address.
        let sentinel =
            unsafe { NonNull::new_unchecked(&EMPTY_SLAB as *const SlabHeader as *mut SlabHeader) };
        let dangling = NonNull::dangling();
        Arena {
            ptr: Cell::new(dangling),
            end: Cell::new(dangling),
            slab: Cell::new(sentinel),
        }
    }

    /// Allocate `size` bytes aligned to 8.
    ///
    /// Returns a non-null pointer to the allocated region. Aborts on OOM.
    #[inline]
    pub(crate) fn alloc(&self, size: usize) -> NonNull<u8> {
        let ptr = self.ptr.get();
        let addr = ptr.as_ptr() as usize;
        let aligned_addr = (addr + ALLOC_ALIGN - 1) & !(ALLOC_ALIGN - 1);
        let padding = aligned_addr - addr;
        let needed = padding.saturating_add(size);
        let remaining = self.end.get().as_ptr() as usize - addr;

        let result = if needed <= remaining {
            // Safety: needed bytes fit within the current slab (end >= ptr is
            // invariant, so remaining cannot underflow). Using add() from ptr
            // preserves provenance.
            unsafe {
                self.ptr
                    .set(NonNull::new_unchecked(ptr.as_ptr().add(needed)));
                NonNull::new_unchecked(ptr.as_ptr().add(padding))
            }
        } else {
            self.alloc_slow(size)
        };

        // Tag pointer for MIRI
        #[cfg(test)]
        if true {
            use std::mem::MaybeUninit;

            unsafe {
                return NonNull::new_unchecked(
                    std::slice::from_raw_parts_mut(result.cast::<MaybeUninit<u8>>().as_ptr(), size)
                        .as_mut_ptr()
                        .cast(),
                );
            }
        }

        result
    }

    /// Grow an existing allocation in place when possible, otherwise allocate
    /// a new region and copy the old data.
    ///
    /// Returns the (possibly unchanged) pointer to the allocation.
    ///
    /// # Safety
    ///
    /// - `ptr` must have been returned by a prior `alloc` on this arena whose
    ///   size was `old_size`.
    /// - `new_size` must be `>= old_size`. Violating this causes wrapping
    ///   underflow in the in place growth path or an out of bounds copy.
    pub(crate) unsafe fn realloc(
        &self,
        ptr: NonNull<u8>,
        old_size: usize,
        new_size: usize,
    ) -> NonNull<u8> {
        debug_assert!(new_size >= old_size);

        // Use integer arithmetic for bounds checks to avoid creating
        // out-of-bounds pointers and to preserve strict provenance.
        let old_end = ptr.as_ptr() as usize + old_size;
        let head = self.ptr.get().as_ptr() as usize;

        if old_end == head {
            // Compare as new_size <= end - ptr instead of ptr + new_size <= end
            // to avoid wrapping overflow when new_size is very large.
            let remaining = self.end.get().as_ptr() as usize - ptr.as_ptr() as usize;
            if new_size <= remaining {
                // Safety: new_end is within the current slab. Advance the bump
                // pointer by the growth delta, preserving provenance via add().
                let extra = new_size - old_size;
                unsafe {
                    let head_ptr = self.ptr.get().as_ptr();
                    self.ptr.set(NonNull::new_unchecked(head_ptr.add(extra)));
                    // Re-derive from the bump pointer (which carries full slab
                    // provenance) instead of returning `ptr` directly, because
                    // the MIRI tagging in alloc() restricts `ptr`'s Stacked
                    // Borrows tag to old_size bytes.
                    let result = NonNull::new_unchecked(head_ptr.sub(old_size));

                    #[cfg(test)]
                    if true {
                        use std::mem::MaybeUninit;
                        return NonNull::new_unchecked(
                            std::slice::from_raw_parts_mut(
                                result.cast::<MaybeUninit<u8>>().as_ptr(),
                                new_size,
                            )
                            .as_mut_ptr()
                            .cast(),
                        );
                    }

                    return result;
                }
            }
        }

        let new_ptr = self.alloc(new_size);
        // SAFETY:
        // - Source (`ptr`) is from a prior alloc of `old_size` bytes, valid to read.
        // - Destination (`new_ptr`) is a fresh alloc of `new_size >= old_size` bytes.
        // - The regions do not overlap because `new_ptr` comes from a subsequent
        //   bump (same or different slab) so it cannot alias the `ptr` region.
        unsafe { ptr::copy_nonoverlapping(ptr.as_ptr(), new_ptr.as_ptr(), old_size) };
        new_ptr
    }

    #[cold]
    #[inline(never)]
    fn alloc_slow(&self, size: usize) -> NonNull<u8> {
        self.grow(size);

        let ptr = self.ptr.get();
        let addr = ptr.as_ptr() as usize;
        let aligned_addr = (addr + ALLOC_ALIGN - 1) & !(ALLOC_ALIGN - 1);
        let padding = aligned_addr - addr;
        // Cannot overflow: grow() panics if HEADER_SIZE + size overflows,
        // and post-grow ptr is SLAB_ALIGN-aligned so padding is always 0.
        let needed = padding + size;
        debug_assert!(needed <= self.end.get().as_ptr() as usize - addr);

        // Safety: grow() guarantees the new slab is large enough.
        // Using add() from ptr preserves provenance.
        unsafe {
            self.ptr
                .set(NonNull::new_unchecked(ptr.as_ptr().add(needed)));
            NonNull::new_unchecked(ptr.as_ptr().add(padding))
        }
    }

    /// Create a scratch buffer that writes into the arena's current slab.
    ///
    /// # Safety
    ///
    /// The caller must not call `alloc` on this arena while the returned
    /// `Scratch` is alive. The scratch exclusively owns the bump region.
    pub(crate) unsafe fn scratch(&self) -> Scratch<'_> {
        let start = self.ptr.get();
        let cap = self.end.get().as_ptr() as usize - start.as_ptr() as usize;
        Scratch {
            arena: self,
            start,
            len: 0,
            cap,
        }
    }

    fn grow(&self, size: usize) {
        // SAFETY: self.slab always points to a valid SlabHeader — either the
        // static EMPTY_SLAB sentinel or a heap-allocated header written in grow().
        let current_size = unsafe { self.slab.get().as_ref().size };

        let min_slab = HEADER_SIZE.checked_add(size).expect("layout overflow");

        let new_size = current_size
            .saturating_mul(2)
            .max(min_slab)
            .max(INITIAL_SLAB_SIZE);

        let slab_layout =
            Layout::from_size_align(new_size, SLAB_ALIGN).expect("slab layout overflow");

        // SAFETY: slab_layout was validated by Layout::from_size_align above.
        let raw = unsafe { std::alloc::alloc(slab_layout) };
        let Some(base) = NonNull::new(raw) else {
            std::alloc::handle_alloc_error(slab_layout);
        };

        // Safety: base points to a freshly allocated region of new_size bytes.
        unsafe {
            let header_ptr = base.as_ptr().cast::<SlabHeader>();
            header_ptr.write(SlabHeader {
                prev: Some(self.slab.get()),
                size: new_size,
            });

            self.slab.set(NonNull::new_unchecked(header_ptr));
            self.ptr
                .set(NonNull::new_unchecked(base.as_ptr().add(HEADER_SIZE)));
            self.end
                .set(NonNull::new_unchecked(base.as_ptr().add(new_size)));
        }
    }

    /// Allocates a copy of `s` in the arena and returns a reference to it.
    pub fn alloc_str(&self, s: &str) -> &str {
        if s.is_empty() {
            return "";
        }
        let dst = self.alloc(s.len());
        // SAFETY: dst is a fresh arena allocation of s.len() bytes, disjoint
        // from s. The resulting slice is valid UTF-8 because s is.
        unsafe {
            std::ptr::copy_nonoverlapping(s.as_ptr(), dst.as_ptr(), s.len());
            std::str::from_utf8_unchecked(std::slice::from_raw_parts(dst.as_ptr(), s.len()))
        }
    }
}

impl Drop for Arena {
    fn drop(&mut self) {
        let mut current = self.slab.get();
        loop {
            // Safety: current is either a heap slab or the static sentinel.
            let header = unsafe { current.as_ref() };
            if header.size == 0 {
                break;
            }
            let prev = header.prev;
            // Safety: header.size and SLAB_ALIGN match the layout used in grow().
            let slab_layout = unsafe { Layout::from_size_align_unchecked(header.size, SLAB_ALIGN) };
            unsafe {
                std::alloc::dealloc(current.as_ptr().cast(), slab_layout);
            }
            match prev {
                Some(p) => current = p,
                None => break,
            }
        }
    }
}

/// A temporary byte buffer that writes directly into an [`Arena`] slab.
///
/// Scratch tracks its own write position without advancing the arena's bump
/// pointer. On [`commit`](Scratch::commit) the arena pointer is advanced past
/// the committed bytes. If the scratch is dropped without committing, the arena
/// pointer is unchanged and the space is reused by subsequent allocations.
pub(crate) struct Scratch<'a> {
    arena: &'a Arena,
    start: NonNull<u8>,
    len: usize,
    cap: usize,
}

impl<'a> Scratch<'a> {
    #[inline]
    pub fn push(&mut self, byte: u8) {
        let len = self.len;
        if len == self.cap {
            self.grow_slow(1);
        }
        // Safety: len < cap, so start + len is within the slab.
        unsafe {
            self.start.as_ptr().add(len).write(byte);
        }
        self.len = len + 1;
    }

    #[inline]
    pub fn extend(&mut self, bytes: &[u8]) {
        if bytes.len() > self.cap - self.len {
            self.grow_slow(bytes.len());
        }
        // Safety: cap - len >= bytes.len(), so the copy is in bounds.
        unsafe {
            ptr::copy_nonoverlapping(
                bytes.as_ptr(),
                self.start.as_ptr().add(self.len),
                bytes.len(),
            );
        }
        self.len += bytes.len();
    }

    /// Push bytes while stripping underscores. Returns `false` if any
    /// underscore is not placed between two ASCII digits.
    #[inline]
    pub(crate) fn push_strip_underscores(&mut self, bytes: &[u8]) -> bool {
        let mut prev = 0u8;
        for &b in bytes {
            if b == b'_' {
                if !prev.is_ascii_digit() {
                    return false;
                }
            } else {
                if prev == b'_' && !b.is_ascii_digit() {
                    return false;
                }
                self.push(b);
            }
            prev = b;
        }
        prev != b'_'
    }

    #[inline]
    pub fn as_bytes(&self) -> &[u8] {
        if self.len == 0 {
            return &[];
        }
        // Safety: start..start+len was written by us and is within the slab.
        unsafe { std::slice::from_raw_parts(self.start.as_ptr(), self.len) }
    }

    /// Finalize the scratch data and return it as a byte slice tied to the
    /// arena's lifetime. Advances the arena's bump pointer past the committed
    /// bytes.
    pub fn commit(self) -> &'a [u8] {
        if self.len == 0 {
            return &[];
        }
        // Safety: start..start+len is valid scratch memory within the arena.
        let slice = unsafe { std::slice::from_raw_parts(self.start.as_ptr(), self.len) };
        // Safety: start + len is within the slab (we ensured capacity on every write).
        unsafe {
            self.arena
                .ptr
                .set(NonNull::new_unchecked(self.start.as_ptr().add(self.len)));
        }
        slice
    }

    #[cold]
    #[inline(never)]
    fn grow_slow(&mut self, additional: usize) {
        let required = self.len.checked_add(additional).expect("scratch overflow");
        let new_cap = self.cap.saturating_mul(2).max(required);

        self.arena.grow(new_cap);

        // Copy existing scratch data to the start of the new slab.
        let new_start = self.arena.ptr.get();
        if self.len > 0 {
            // Safety: old data at self.start..+len is still valid (old slab hasn't been freed).
            // New slab has at least new_cap bytes of data space >= required > self.len.
            unsafe {
                ptr::copy_nonoverlapping(self.start.as_ptr(), new_start.as_ptr(), self.len);
            }
        }
        self.start = new_start;
        self.cap = self.arena.end.get().as_ptr() as usize - new_start.as_ptr() as usize;
    }
}

#[cfg(test)]
#[path = "./arena_tests.rs"]
mod tests;