Skip to main content

toml_spanner/
parser.rs

1// Deliberately avoid `?` operator throughout this module for compile-time
2// performance: explicit match/if-let prevents the compiler from generating
3// From::from conversion and drop-glue machinery at every call site.
4#![allow(clippy::question_mark)]
5#![allow(unsafe_code)]
6
7#[cfg(test)]
8#[path = "./parser_tests.rs"]
9mod tests;
10
11use crate::{
12    Span,
13    arena::Arena,
14    error::{Error, ErrorKind},
15    table::{InnerTable, Table},
16    value::{self, Item, Key},
17};
18use std::char;
19use std::hash::{Hash, Hasher};
20use std::ptr::NonNull;
21
22const MAX_RECURSION_DEPTH: i16 = 256;
23// When a method returns Err(ParseError), the full error details have already
24// been written into Parser::error_kind / Parser::error_span.
25#[derive(Copy, Clone)]
26struct ParseError;
27
28struct Ctx<'b, 'de> {
29    /// The current table context — a `Table` view into a table `Value`.
30    /// Gives direct mutable access to both the span fields and the `Table` payload.
31    table: &'b mut Table<'de>,
32    /// If this table is an entry in an array-of-tables, a disjoint borrow of
33    /// the parent array Value'arena `end_and_flag` field so its span can be
34    /// extended alongside the entry.
35    array_end_span: Option<&'b mut u32>,
36}
37
38/// Tables with at least this many entries use the hash index for lookups.
39const INDEXED_TABLE_THRESHOLD: usize = 6;
40
41const fn build_hex_table() -> [i8; 256] {
42    let mut table = [-1i8; 256];
43    let mut ch = 0usize;
44    while ch < 256 {
45        table[ch] = match ch as u8 {
46            b'0'..=b'9' => (ch as u8 - b'0') as i8,
47            b'A'..=b'F' => (ch as u8 - b'A' + 10) as i8,
48            b'a'..=b'f' => (ch as u8 - b'a' + 10) as i8,
49            _ => -1,
50        };
51        ch += 1;
52    }
53    table
54}
55
56static HEX: [i8; 256] = build_hex_table();
57
58/// Hash-map key that identifies a (table, key-name) pair without owning the
59/// string data.  The raw `key_ptr`/`len` point into either the input buffer
60/// or the arena; both are stable for the lifetime of the parse.
61/// `first_key_span` is the `span.start()` of the **first** key ever inserted
62/// into the table and serves as a cheap, collision-free table discriminator.
63struct KeyIndex<'de> {
64    key_ptr: NonNull<u8>,
65    len: u32,
66    first_key_span: u32,
67    marker: std::marker::PhantomData<&'de str>,
68}
69
70impl<'de> KeyIndex<'de> {
71    #[inline]
72    fn new(key: &'de str, first_key_span: u32) -> Self {
73        KeyIndex {
74            key_ptr: unsafe { NonNull::new_unchecked(key.as_ptr() as *mut u8) },
75            len: key.len() as u32,
76            first_key_span,
77            marker: std::marker::PhantomData,
78        }
79    }
80}
81
82impl<'de> KeyIndex<'de> {
83    #[inline]
84    fn as_str(&self) -> &'de str {
85        unsafe {
86            std::str::from_utf8_unchecked(std::slice::from_raw_parts(
87                self.key_ptr.as_ptr(),
88                self.len as usize,
89            ))
90        }
91    }
92}
93
94impl<'de> Hash for KeyIndex<'de> {
95    #[inline]
96    fn hash<H: Hasher>(&self, state: &mut H) {
97        self.as_str().hash(state);
98        self.first_key_span.hash(state);
99    }
100}
101
102impl<'de> PartialEq for KeyIndex<'de> {
103    #[inline]
104    fn eq(&self, other: &Self) -> bool {
105        self.first_key_span == other.first_key_span && self.as_str() == other.as_str()
106    }
107}
108
109impl<'de> Eq for KeyIndex<'de> {}
110
111struct Parser<'de> {
112    /// Raw bytes of the input. Always valid UTF-8 (derived from `&str`).
113    bytes: &'de [u8],
114    cursor: usize,
115    arena: &'de Arena,
116
117    // Error context -- populated just before returning ParseError
118    error_span: Span,
119    error_kind: Option<ErrorKind>,
120
121    // Global key-index for O(1) lookups in large tables.
122    // Maps (table-discriminator, key-name) → entry index in the table.
123    table_index: foldhash::HashMap<KeyIndex<'de>, usize>,
124}
125
126#[allow(unsafe_code)]
127impl<'de> Parser<'de> {
128    fn new(input: &'de str, arena: &'de Arena) -> Self {
129        let bytes = input.as_bytes();
130        // Skip UTF-8 BOM (U+FEFF = EF BB BF) if present at the start.
131        let cursor = if bytes.starts_with(b"\xef\xbb\xbf") {
132            3
133        } else {
134            0
135        };
136        Parser {
137            bytes,
138            cursor,
139            arena,
140            error_span: Span::new(0, 0),
141            error_kind: None,
142            // initialize to about ~ 8 KB
143            table_index: foldhash::HashMap::with_capacity_and_hasher(
144                256,
145                foldhash::fast::RandomState::default(),
146            ),
147        }
148    }
149
150    /// Get a `&str` slice from the underlying bytes.
151    /// SAFETY: `self.bytes` is always valid UTF-8, and callers must ensure
152    /// `start..end` falls on UTF-8 char boundaries.
153    #[inline]
154    unsafe fn str_slice(&self, start: usize, end: usize) -> &'de str {
155        #[cfg(not(debug_assertions))]
156        unsafe {
157            std::str::from_utf8_unchecked(&self.bytes[start..end])
158        }
159        #[cfg(debug_assertions)]
160        match std::str::from_utf8(&self.bytes[start..end]) {
161            Ok(value) => value,
162            Err(err) => panic!(
163                "Invalid UTF-8 slice: bytes[{}..{}] is not valid UTF-8: {}",
164                start, end, err
165            ),
166        }
167    }
168
169    #[cold]
170    fn set_duplicate_key_error(&mut self, first: Span, second: Span, key: &str) -> ParseError {
171        self.error_span = second;
172        self.error_kind = Some(ErrorKind::DuplicateKey {
173            key: key.into(),
174            first,
175        });
176        ParseError
177    }
178    #[cold]
179    fn set_error(&mut self, start: usize, end: Option<usize>, kind: ErrorKind) -> ParseError {
180        self.error_span = Span::new(start as u32, end.unwrap_or(start + 1) as u32);
181        self.error_kind = Some(kind);
182        ParseError
183    }
184
185    fn take_error(&mut self) -> Error {
186        let kind = self
187            .error_kind
188            .take()
189            .expect("take_error called without error");
190        let span = self.error_span;
191
192        // Black Magic Optimization:
193        // Removing the following introduces 8% performance
194        // regression across the board.
195        {
196            for entry in self.bytes.iter().enumerate() {
197                std::hint::black_box(&entry);
198                break;
199            }
200        }
201
202        Error { kind, span }
203    }
204
205    #[inline]
206    fn peek_byte(&self) -> Option<u8> {
207        self.bytes.get(self.cursor).copied()
208    }
209
210    #[inline]
211    fn peek_byte_at(&self, offset: usize) -> Option<u8> {
212        self.bytes.get(self.cursor + offset).copied()
213    }
214
215    #[inline]
216    fn eat_byte(&mut self, b: u8) -> bool {
217        if self.peek_byte() == Some(b) {
218            self.cursor += 1;
219            true
220        } else {
221            false
222        }
223    }
224
225    fn expect_byte(&mut self, b: u8) -> Result<(), ParseError> {
226        if self.eat_byte(b) {
227            Ok(())
228        } else {
229            let start = self.cursor;
230            let (found_desc, end) = self.scan_token_desc_and_end();
231            Err(self.set_error(
232                start,
233                Some(end),
234                ErrorKind::Wanted {
235                    expected: byte_describe(b),
236                    found: found_desc,
237                },
238            ))
239        }
240    }
241
242    fn eat_whitespace(&mut self) {
243        while let Some(b) = self.peek_byte() {
244            if b == b' ' || b == b'\t' {
245                self.cursor += 1;
246            } else {
247                break;
248            }
249        }
250    }
251
252    fn eat_comment(&mut self) -> Result<bool, ParseError> {
253        if !self.eat_byte(b'#') {
254            return Ok(false);
255        }
256        while let Some(0x09 | 0x20..=0x7E | 0x80..) = self.peek_byte() {
257            self.cursor += 1;
258        }
259        self.eat_newline_or_eof().map(|()| true)
260    }
261
262    fn eat_newline_or_eof(&mut self) -> Result<(), ParseError> {
263        match self.peek_byte() {
264            None => Ok(()),
265            Some(b'\n') => {
266                self.cursor += 1;
267                Ok(())
268            }
269            Some(b'\r') if self.peek_byte_at(1) == Some(b'\n') => {
270                self.cursor += 2;
271                Ok(())
272            }
273            _ => {
274                let start = self.cursor;
275                let (found_desc, end) = self.scan_token_desc_and_end();
276                Err(self.set_error(
277                    start,
278                    Some(end),
279                    ErrorKind::Wanted {
280                        expected: "newline",
281                        found: found_desc,
282                    },
283                ))
284            }
285        }
286    }
287
288    fn eat_newline(&mut self) -> bool {
289        match self.peek_byte() {
290            Some(b'\n') => {
291                self.cursor += 1;
292                true
293            }
294            Some(b'\r') if self.peek_byte_at(1) == Some(b'\n') => {
295                self.cursor += 2;
296                true
297            }
298            _ => false,
299        }
300    }
301
302    /// Scan forward from the current position to determine the description
303    /// and end position of the "token" at the cursor. This provides compatible
304    /// error spans with the old tokenizer.
305    fn scan_token_desc_and_end(&self) -> (&'static str, usize) {
306        match self.peek_byte() {
307            None => ("eof", self.bytes.len()),
308            Some(b'\n' | b'\r') => ("a newline", self.cursor + 1),
309            Some(b' ' | b'\t') => {
310                let mut end = self.cursor + 1;
311                while end < self.bytes.len()
312                    && (self.bytes[end] == b' ' || self.bytes[end] == b'\t')
313                {
314                    end += 1;
315                }
316                ("whitespace", end)
317            }
318            Some(b'#') => ("a comment", self.cursor + 1),
319            Some(b'=') => ("an equals", self.cursor + 1),
320            Some(b'.') => ("a period", self.cursor + 1),
321            Some(b',') => ("a comma", self.cursor + 1),
322            Some(b':') => ("a colon", self.cursor + 1),
323            Some(b'+') => ("a plus", self.cursor + 1),
324            Some(b'{') => ("a left brace", self.cursor + 1),
325            Some(b'}') => ("a right brace", self.cursor + 1),
326            Some(b'[') => ("a left bracket", self.cursor + 1),
327            Some(b']') => ("a right bracket", self.cursor + 1),
328            Some(b'\'' | b'"') => ("a string", self.cursor + 1),
329            Some(b) if is_keylike_byte(b) => {
330                let mut end = self.cursor + 1;
331                while end < self.bytes.len() && is_keylike_byte(self.bytes[end]) {
332                    end += 1;
333                }
334                ("an identifier", end)
335            }
336            Some(_) => ("a character", self.cursor + 1),
337        }
338    }
339
340    fn read_keylike(&mut self) -> &'de str {
341        let start = self.cursor;
342        while let Some(b) = self.peek_byte() {
343            if !is_keylike_byte(b) {
344                break;
345            }
346            self.cursor += 1;
347        }
348        // SAFETY: keylike bytes are ASCII, always valid UTF-8 boundaries
349        unsafe { self.str_slice(start, self.cursor) }
350    }
351
352    fn read_table_key(&mut self) -> Result<Key<'de>, ParseError> {
353        match self.peek_byte() {
354            Some(b'"') => {
355                let start = self.cursor;
356                self.cursor += 1;
357                let (key, multiline) = match self.read_string(start, b'"') {
358                    Ok(v) => v,
359                    Err(e) => return Err(e),
360                };
361                if multiline {
362                    return Err(self.set_error(
363                        start,
364                        Some(key.span.end as usize),
365                        ErrorKind::MultilineStringKey,
366                    ));
367                }
368                Ok(key)
369            }
370            Some(b'\'') => {
371                let start = self.cursor;
372                self.cursor += 1;
373                let (key, multiline) = match self.read_string(start, b'\'') {
374                    Ok(v) => v,
375                    Err(e) => return Err(e),
376                };
377                if multiline {
378                    return Err(self.set_error(
379                        start,
380                        Some(key.span.end as usize),
381                        ErrorKind::MultilineStringKey,
382                    ));
383                }
384                Ok(key)
385            }
386            Some(b) if is_keylike_byte(b) => {
387                let start = self.cursor;
388                let name = self.read_keylike();
389                let span = Span::new(start as u32, self.cursor as u32);
390                Ok(Key { name, span })
391            }
392            Some(_) => {
393                let start = self.cursor;
394                let (found_desc, end) = self.scan_token_desc_and_end();
395                Err(self.set_error(
396                    start,
397                    Some(end),
398                    ErrorKind::Wanted {
399                        expected: "a table key",
400                        found: found_desc,
401                    },
402                ))
403            }
404            None => Err(self.set_error(
405                self.bytes.len(),
406                None,
407                ErrorKind::Wanted {
408                    expected: "a table key",
409                    found: "eof",
410                },
411            )),
412        }
413    }
414
415    /// Read a basic (double-quoted) string. `start` is the byte offset of the
416    /// opening quote. The cursor should be positioned right after the opening `"`.
417    fn read_string(&mut self, start: usize, delim: u8) -> Result<(Key<'de>, bool), ParseError> {
418        let mut multiline = false;
419        if self.eat_byte(delim) {
420            if self.eat_byte(delim) {
421                multiline = true;
422            } else {
423                return Ok((
424                    Key {
425                        name: "",
426                        span: Span::new(start as u32, (start + 1) as u32),
427                    },
428                    false,
429                ));
430            }
431        }
432
433        let mut content_start = self.cursor;
434        if multiline {
435            match self.peek_byte() {
436                Some(b'\n') => {
437                    self.cursor += 1;
438                    content_start = self.cursor;
439                }
440                Some(b'\r') if self.peek_byte_at(1) == Some(b'\n') => {
441                    self.cursor += 2;
442                    content_start = self.cursor;
443                }
444                _ => {}
445            }
446        }
447
448        self.read_string_loop(start, content_start, multiline, delim)
449    }
450
451    /// Advance `self.cursor` past bytes that do not require special handling
452    /// inside a string.  Uses SWAR (SIMD-Within-A-Register) to scan 8 bytes
453    /// at a time.
454    ///
455    /// Stops at the first byte that is:
456    ///   * a control character (< 0x20) — tab (0x09) is a benign false positive
457    ///   * DEL (0x7F)
458    ///   * the string delimiter (`"` or `'`)
459    ///   * a backslash (`\`) — benign false positive for literal strings
460    ///   * past the end of input
461    fn skip_string_plain(&mut self, delim: u8) {
462        // Quick bail-out for EOF or an immediately-interesting byte.
463        // Avoids SWAR setup cost for consecutive specials (e.g. \n\n).
464        let Some(&b) = self.bytes.get(self.cursor) else {
465            return;
466        };
467
468        if b == delim || b == b'\\' || b == 0x7F || (b < 0x20 && b != 0x09) {
469            return;
470        }
471        self.cursor += 1;
472
473        let base = self.cursor;
474        let rest = &self.bytes[base..];
475
476        type Chunk = u64;
477        const STEP: usize = std::mem::size_of::<Chunk>();
478        const ONE: Chunk = Chunk::MAX / 255; // 0x0101_0101_0101_0101
479        const HIGH: Chunk = ONE << 7; // 0x8080_8080_8080_8080
480
481        let fill_delim = ONE * Chunk::from(delim);
482        let fill_bslash = ONE * Chunk::from(b'\\');
483        let fill_del = ONE * 0x7F;
484
485        let chunks = rest.chunks_exact(STEP);
486        let remainder_len = chunks.remainder().len();
487
488        for (i, chunk) in chunks.enumerate() {
489            let v = Chunk::from_le_bytes(chunk.try_into().unwrap());
490
491            let has_ctrl = v.wrapping_sub(ONE * 0x20) & !v;
492            let eq_delim = (v ^ fill_delim).wrapping_sub(ONE) & !(v ^ fill_delim);
493            let eq_bslash = (v ^ fill_bslash).wrapping_sub(ONE) & !(v ^ fill_bslash);
494            let eq_del = (v ^ fill_del).wrapping_sub(ONE) & !(v ^ fill_del);
495
496            let masked = (has_ctrl | eq_delim | eq_bslash | eq_del) & HIGH;
497            if masked != 0 {
498                self.cursor = base + i * STEP + masked.trailing_zeros() as usize / 8;
499                return;
500            }
501        }
502
503        self.cursor = self.bytes.len() - remainder_len;
504        self.skip_string_plain_slow(delim);
505    }
506
507    #[cold]
508    #[inline(never)]
509    fn skip_string_plain_slow(&mut self, delim: u8) {
510        while let Some(&b) = self.bytes.get(self.cursor) {
511            if b == delim || b == b'\\' || b == 0x7F || (b < 0x20 && b != 0x09) {
512                return;
513            }
514            self.cursor += 1;
515        }
516    }
517
518    fn read_string_loop(
519        &mut self,
520        start: usize,
521        content_start: usize,
522        multiline: bool,
523        delim: u8,
524    ) -> Result<(Key<'de>, bool), ParseError> {
525        let mut flush_from = content_start;
526        let mut scratch: Option<crate::arena::Scratch<'de>> = None;
527        loop {
528            self.skip_string_plain(delim);
529
530            let i = self.cursor;
531            let Some(&b) = self.bytes.get(i) else {
532                return Err(self.set_error(start, None, ErrorKind::UnterminatedString));
533            };
534            self.cursor = i + 1;
535
536            match b {
537                b'\r' => {
538                    if self.eat_byte(b'\n') {
539                        if !multiline {
540                            return Err(self.set_error(
541                                i,
542                                None,
543                                ErrorKind::InvalidCharInString('\n'),
544                            ));
545                        }
546                    } else {
547                        return Err(self.set_error(i, None, ErrorKind::InvalidCharInString('\r')));
548                    }
549                }
550                b'\n' => {
551                    if !multiline {
552                        return Err(self.set_error(i, None, ErrorKind::InvalidCharInString('\n')));
553                    }
554                }
555                d if d == delim => {
556                    let (span, end) = if multiline {
557                        if !self.eat_byte(delim) {
558                            continue;
559                        }
560                        if !self.eat_byte(delim) {
561                            continue;
562                        }
563                        let mut extra = 0usize;
564                        if self.eat_byte(delim) {
565                            extra += 1;
566                        }
567                        if self.eat_byte(delim) {
568                            extra += 1;
569                        }
570
571                        let maybe_nl = self.bytes[start + 3];
572                        let start_off = if maybe_nl == b'\n' {
573                            4
574                        } else if maybe_nl == b'\r' {
575                            5
576                        } else {
577                            3
578                        };
579
580                        (
581                            Span::new((start + start_off) as u32, (self.cursor - 3) as u32),
582                            i + extra,
583                        )
584                    } else {
585                        (Span::new((start + 1) as u32, (self.cursor - 1) as u32), i)
586                    };
587
588                    let name = if let Some(mut s) = scratch {
589                        s.extend(&self.bytes[flush_from..end]);
590                        let committed = s.commit();
591                        // Safety: scratch contents are valid UTF-8 (built from
592                        // validated input and well-formed escape sequences).
593                        unsafe { std::str::from_utf8_unchecked(committed) }
594                    } else {
595                        // Safety: content_start..end is validated UTF-8.
596                        unsafe { self.str_slice(content_start, end) }
597                    };
598                    return Ok((Key { name, span }, multiline));
599                }
600                b'\\' if delim == b'"' => {
601                    let arena = self.arena;
602                    let s = scratch.get_or_insert_with(|| unsafe { arena.scratch() });
603                    s.extend(&self.bytes[flush_from..i]);
604                    if let Err(e) = self.read_basic_escape(s, start, multiline) {
605                        return Err(e);
606                    }
607                    flush_from = self.cursor;
608                }
609                // Tab or backslash-in-literal-string: benign false positives
610                // from the SWAR scan.
611                0x09 | 0x20..=0x7E | 0x80.. => {}
612                _ => {
613                    return Err(self.set_error(i, None, ErrorKind::InvalidCharInString(b as char)));
614                }
615            }
616        }
617    }
618
619    fn read_basic_escape(
620        &mut self,
621        scratch: &mut crate::arena::Scratch<'_>,
622        string_start: usize,
623        multi: bool,
624    ) -> Result<(), ParseError> {
625        let i = self.cursor;
626        let Some(&b) = self.bytes.get(i) else {
627            return Err(self.set_error(string_start, None, ErrorKind::UnterminatedString));
628        };
629        self.cursor = i + 1;
630        let chr: char = 'char: {
631            let byte: u8 = 'byte: {
632                match b {
633                    b'"' => break 'byte b'"',
634                    b'\\' => break 'byte b'\\',
635                    b'b' => break 'byte 0x08,
636                    b'f' => break 'byte 0x0C,
637                    b'n' => break 'byte b'\n',
638                    b'r' => break 'byte b'\r',
639                    b't' => break 'byte b'\t',
640                    b'e' => break 'byte 0x1B,
641                    b'u' => match self.read_hex(4, string_start, i) {
642                        Ok(ch) => break 'char ch,
643                        Err(e) => return Err(e),
644                    },
645                    b'U' => match self.read_hex(8, string_start, i) {
646                        Ok(ch) => break 'char ch,
647                        Err(e) => return Err(e),
648                    },
649                    b'x' => match self.read_hex(2, string_start, i) {
650                        Ok(ch) => break 'char ch,
651                        Err(e) => return Err(e),
652                    },
653                    b' ' | b'\t' | b'\n' | b'\r' if multi => {
654                        // CRLF folding: \r\n counts as \n
655                        let c = if b == b'\r' && self.peek_byte() == Some(b'\n') {
656                            self.cursor += 1;
657                            '\n'
658                        } else {
659                            b as char
660                        };
661                        if c != '\n' {
662                            loop {
663                                match self.peek_byte() {
664                                    Some(b' ' | b'\t') => {
665                                        self.cursor += 1;
666                                    }
667                                    Some(b'\n') => {
668                                        self.cursor += 1;
669                                        break;
670                                    }
671                                    Some(b'\r') if self.peek_byte_at(1) == Some(b'\n') => {
672                                        self.cursor += 2;
673                                        break;
674                                    }
675                                    _ => {
676                                        return Err(self.set_error(
677                                            i,
678                                            None,
679                                            ErrorKind::InvalidEscape(c),
680                                        ));
681                                    }
682                                }
683                            }
684                        }
685                        loop {
686                            match self.peek_byte() {
687                                Some(b' ' | b'\t' | b'\n') => {
688                                    self.cursor += 1;
689                                }
690                                Some(b'\r') if self.peek_byte_at(1) == Some(b'\n') => {
691                                    self.cursor += 2;
692                                }
693                                _ => break,
694                            }
695                        }
696                    }
697                    _ => {
698                        self.cursor -= 1;
699                        return Err(self.set_error(
700                            self.cursor,
701                            None,
702                            ErrorKind::InvalidEscape(self.next_char_for_error()),
703                        ));
704                    }
705                }
706                return Ok(());
707            };
708
709            scratch.push(byte);
710            return Ok(());
711        };
712        let mut buf = [0u8; 4];
713        let len = chr.encode_utf8(&mut buf).len();
714        scratch.extend(&buf[..len]);
715        return Ok(());
716    }
717
718    fn read_hex(
719        &mut self,
720        n: usize,
721        string_start: usize,
722        escape_start: usize,
723    ) -> Result<char, ParseError> {
724        let mut val: u32 = 0;
725        for _ in 0..n {
726            let Some(&byte) = self.bytes.get(self.cursor) else {
727                return Err(self.set_error(string_start, None, ErrorKind::UnterminatedString));
728            };
729            let digit = HEX[byte as usize];
730            if digit >= 0 {
731                val = (val << 4) | digit as u32;
732                self.cursor += 1;
733            } else {
734                return Err(self.set_error(
735                    self.cursor,
736                    None,
737                    ErrorKind::InvalidHexEscape(self.next_char_for_error()),
738                ));
739            }
740        }
741        match char::from_u32(val) {
742            Some(ch) => Ok(ch),
743            None => Err(self.set_error(
744                escape_start,
745                Some(escape_start + n),
746                ErrorKind::InvalidEscapeValue(val),
747            )),
748        }
749    }
750
751    fn next_char_for_error(&self) -> char {
752        // Safety: The input was valid UTF-8 via a &str
753        let text = unsafe { std::str::from_utf8_unchecked(self.bytes) };
754        if let Some(value) = text.get(self.cursor..) {
755            value.chars().next().unwrap_or(char::REPLACEMENT_CHARACTER)
756        } else {
757            char::REPLACEMENT_CHARACTER
758        }
759    }
760    fn number(
761        &mut self,
762        start: u32,
763        end: u32,
764        s: &'de str,
765        sign: u8,
766    ) -> Result<Item<'de>, ParseError> {
767        let bytes = s.as_bytes();
768
769        // Base-prefixed integers (0x, 0o, 0b).
770        // TOML forbids signs on these, so only match when first byte is '0'.
771        if sign == 2 {
772            if let [b'0', format, rest @ ..] = s.as_bytes() {
773                match format {
774                    b'x' => return self.integer_hex(rest, Span::new(start, end)),
775                    b'o' => return self.integer_octal(rest, Span::new(start, end)),
776                    b'b' => return self.integer_binary(rest, Span::new(start, end)),
777                    _ => {}
778                }
779            }
780        }
781
782        if self.eat_byte(b'.') {
783            let at = self.cursor;
784            return match self.peek_byte() {
785                Some(b) if is_keylike_byte(b) => {
786                    let after = self.read_keylike();
787                    match self.float(start, end, s, Some(after), sign) {
788                        Ok(f) => Ok(Item::float(f, Span::new(start, self.cursor as u32))),
789                        Err(e) => Err(e),
790                    }
791                }
792                _ => Err(self.set_error(at, Some(end as usize), ErrorKind::InvalidNumber)),
793            };
794        }
795
796        if let Ok(v) = self.integer_decimal(bytes, Span::new(start, end), sign) {
797            return Ok(v);
798        }
799
800        if bytes.iter().any(|&b| b == b'e' || b == b'E') {
801            return match self.float(start, end, s, None, sign) {
802                Ok(f) => Ok(Item::float(f, Span::new(start, self.cursor as u32))),
803                Err(e) => Err(e),
804            };
805        }
806
807        Err(ParseError)
808    }
809
810    fn integer_decimal(
811        &mut self,
812        bytes: &'de [u8],
813        span: Span,
814        sign: u8,
815    ) -> Result<Item<'de>, ParseError> {
816        let mut acc: u64 = 0;
817        let mut prev_underscore = false;
818        let mut has_digit = false;
819        let mut leading_zero = false;
820        let negative = sign == 0;
821        'error: {
822            for &b in bytes {
823                if b == b'_' {
824                    if !has_digit || prev_underscore {
825                        break 'error;
826                    }
827                    prev_underscore = true;
828                    continue;
829                }
830                if !b.is_ascii_digit() {
831                    break 'error;
832                }
833                if leading_zero {
834                    break 'error;
835                }
836                if !has_digit && b == b'0' {
837                    leading_zero = true;
838                }
839                has_digit = true;
840                prev_underscore = false;
841                let digit = (b - b'0') as u64;
842                acc = match acc.checked_mul(10).and_then(|a| a.checked_add(digit)) {
843                    Some(v) => v,
844                    None => break 'error,
845                };
846            }
847
848            if !has_digit || prev_underscore {
849                break 'error;
850            }
851
852            let max = if negative {
853                (i64::MAX as u64) + 1
854            } else {
855                i64::MAX as u64
856            };
857            if acc > max {
858                break 'error;
859            }
860
861            let val = if negative {
862                (acc as i64).wrapping_neg()
863            } else {
864                acc as i64
865            };
866            return Ok(Item::integer(val, span));
867        }
868        self.error_span = span;
869        self.error_kind = Some(ErrorKind::InvalidNumber);
870        Err(ParseError)
871    }
872
873    fn integer_hex(&mut self, bytes: &'de [u8], span: Span) -> Result<Item<'de>, ParseError> {
874        let mut acc: u64 = 0;
875        let mut prev_underscore = false;
876        let mut has_digit = false;
877        'error: {
878            if bytes.is_empty() {
879                break 'error;
880            }
881
882            for &b in bytes {
883                if b == b'_' {
884                    if !has_digit || prev_underscore {
885                        break 'error;
886                    }
887                    prev_underscore = true;
888                    continue;
889                }
890                let digit = HEX[b as usize];
891                if digit < 0 {
892                    break 'error;
893                }
894                has_digit = true;
895                prev_underscore = false;
896                if acc >> 60 != 0 {
897                    break 'error;
898                }
899                acc = (acc << 4) | digit as u64;
900            }
901
902            if !has_digit || prev_underscore {
903                break 'error;
904            }
905
906            if acc > i64::MAX as u64 {
907                break 'error;
908            }
909            return Ok(Item::integer(acc as i64, span));
910        }
911        self.error_span = span;
912        self.error_kind = Some(ErrorKind::InvalidNumber);
913        Err(ParseError)
914    }
915
916    fn integer_octal(&mut self, bytes: &'de [u8], span: Span) -> Result<Item<'de>, ParseError> {
917        let mut acc: u64 = 0;
918        let mut prev_underscore = false;
919        let mut has_digit = false;
920        'error: {
921            if bytes.is_empty() {
922                break 'error;
923            }
924
925            for &b in bytes {
926                if b == b'_' {
927                    if !has_digit || prev_underscore {
928                        break 'error;
929                    }
930                    prev_underscore = true;
931                    continue;
932                }
933                if !b.is_ascii_digit() || b > b'7' {
934                    break 'error;
935                }
936                has_digit = true;
937                prev_underscore = false;
938                if acc >> 61 != 0 {
939                    break 'error;
940                }
941                acc = (acc << 3) | (b - b'0') as u64;
942            }
943
944            if !has_digit || prev_underscore {
945                break 'error;
946            }
947
948            if acc > i64::MAX as u64 {
949                break 'error;
950            }
951            return Ok(Item::integer(acc as i64, span));
952        }
953        self.error_span = span;
954        self.error_kind = Some(ErrorKind::InvalidNumber);
955        Err(ParseError)
956    }
957
958    fn integer_binary(&mut self, bytes: &'de [u8], span: Span) -> Result<Item<'de>, ParseError> {
959        let mut acc: u64 = 0;
960        let mut prev_underscore = false;
961        let mut has_digit = false;
962        'error: {
963            if bytes.is_empty() {
964                break 'error;
965            }
966
967            for &b in bytes {
968                if b == b'_' {
969                    if !has_digit || prev_underscore {
970                        break 'error;
971                    }
972                    prev_underscore = true;
973                    continue;
974                }
975                if b != b'0' && b != b'1' {
976                    break 'error;
977                }
978                has_digit = true;
979                prev_underscore = false;
980                if acc >> 63 != 0 {
981                    break 'error;
982                }
983                acc = (acc << 1) | (b - b'0') as u64;
984            }
985
986            if !has_digit || prev_underscore {
987                break 'error;
988            }
989
990            if acc > i64::MAX as u64 {
991                break 'error;
992            }
993            return Ok(Item::integer(acc as i64, span));
994        }
995        self.error_span = span;
996        self.error_kind = Some(ErrorKind::InvalidNumber);
997        Err(ParseError)
998    }
999
1000    fn float(
1001        &mut self,
1002        start: u32,
1003        end: u32,
1004        s: &'de str,
1005        after_decimal: Option<&'de str>,
1006        sign: u8,
1007    ) -> Result<f64, ParseError> {
1008        let s_start = start as usize;
1009        let s_end = end as usize;
1010
1011        // TOML forbids leading zeros in the integer part (e.g. 00.5, -01.0).
1012        if let [b'0', b'0'..=b'9' | b'_', ..] = s.as_bytes() {
1013            return Err(self.set_error(s_start, Some(s_end), ErrorKind::InvalidNumber));
1014        }
1015
1016        // Safety: no other Scratch or arena.alloc() is active during float parsing.
1017        let mut scratch = unsafe { self.arena.scratch() };
1018
1019        if sign == 0 {
1020            scratch.push(b'-');
1021        }
1022        if !scratch.push_strip_underscores(s.as_bytes()) {
1023            return Err(self.set_error(s_start, Some(s_end), ErrorKind::InvalidNumber));
1024        }
1025
1026        let mut last = s;
1027
1028        if let Some(after) = after_decimal {
1029            if !matches!(after.as_bytes().first(), Some(b'0'..=b'9')) {
1030                return Err(self.set_error(s_start, Some(s_end), ErrorKind::InvalidNumber));
1031            }
1032            scratch.push(b'.');
1033            if !scratch.push_strip_underscores(after.as_bytes()) {
1034                return Err(self.set_error(s_start, Some(s_end), ErrorKind::InvalidNumber));
1035            }
1036            last = after;
1037        }
1038
1039        // When the last keylike token ends with e/E, the '+' and exponent
1040        // digits are separate tokens in the stream ('-' IS keylike so
1041        // e.g. "1e-5" stays in one token and needs no special handling).
1042        if matches!(last.as_bytes().last(), Some(b'e' | b'E')) {
1043            self.eat_byte(b'+');
1044            match self.peek_byte() {
1045                Some(b) if is_keylike_byte(b) && b != b'-' => {
1046                    let next = self.read_keylike();
1047                    if !scratch.push_strip_underscores(next.as_bytes()) {
1048                        return Err(self.set_error(s_start, Some(s_end), ErrorKind::InvalidNumber));
1049                    }
1050                }
1051                _ => {
1052                    return Err(self.set_error(s_start, Some(s_end), ErrorKind::InvalidNumber));
1053                }
1054            }
1055        }
1056
1057        // Scratch is not committed — arena pointer stays unchanged, space is
1058        // reused by subsequent allocations.
1059        let n: f64 = match unsafe { std::str::from_utf8_unchecked(scratch.as_bytes()) }.parse() {
1060            Ok(n) => n,
1061            Err(_) => {
1062                return Err(self.set_error(s_start, Some(s_end), ErrorKind::InvalidNumber));
1063            }
1064        };
1065        if n.is_finite() {
1066            Ok(n)
1067        } else {
1068            Err(self.set_error(s_start, Some(s_end), ErrorKind::InvalidNumber))
1069        }
1070    }
1071
1072    fn value(&mut self, depth_remaining: i16) -> Result<Item<'de>, ParseError> {
1073        let at = self.cursor;
1074        let Some(byte) = self.peek_byte() else {
1075            return Err(self.set_error(self.bytes.len(), None, ErrorKind::UnexpectedEof));
1076        };
1077        let sign = match byte {
1078            b'"' | b'\'' => {
1079                self.cursor += 1;
1080                return match self.read_string(self.cursor - 1, byte) {
1081                    Ok((key, _)) => Ok(Item::string(key.name, key.span)),
1082                    Err(e) => Err(e),
1083                };
1084            }
1085            b'{' => {
1086                let start = self.cursor as u32;
1087                self.cursor += 1;
1088                let mut table = crate::table::InnerTable::new();
1089                if let Err(err) = self.inline_table_contents(&mut table, depth_remaining - 1) {
1090                    return Err(err);
1091                }
1092                return Ok(Item::table_frozen(
1093                    table,
1094                    Span::new(start, self.cursor as u32),
1095                ));
1096            }
1097            b'[' => {
1098                let start = self.cursor as u32;
1099                self.cursor += 1;
1100                let mut arr = value::Array::new();
1101                if let Err(err) = self.array_contents(&mut arr, depth_remaining - 1) {
1102                    return Err(err);
1103                };
1104                return Ok(Item::array(arr, Span::new(start, self.cursor as u32)));
1105            }
1106            b't' => {
1107                return if self.bytes[self.cursor..].starts_with(b"true") {
1108                    self.cursor += 4;
1109                    Ok(Item::boolean(
1110                        true,
1111                        Span::new(at as u32, self.cursor as u32),
1112                    ))
1113                } else {
1114                    Err(self.set_error(
1115                        at,
1116                        Some(self.cursor),
1117                        ErrorKind::Wanted {
1118                            expected: "the literal `true`",
1119                            found: "something else",
1120                        },
1121                    ))
1122                };
1123            }
1124            b'f' => {
1125                self.cursor += 1;
1126                return if self.bytes[self.cursor..].starts_with(b"alse") {
1127                    self.cursor += 4;
1128                    Ok(Item::boolean(
1129                        false,
1130                        Span::new(at as u32, self.cursor as u32),
1131                    ))
1132                } else {
1133                    Err(self.set_error(
1134                        at,
1135                        Some(self.cursor),
1136                        ErrorKind::Wanted {
1137                            expected: "the literal `false`",
1138                            found: "something else",
1139                        },
1140                    ))
1141                };
1142            }
1143            b'-' => {
1144                self.cursor += 1;
1145                0
1146            }
1147            b'+' => {
1148                self.cursor += 1;
1149                1
1150            }
1151            _ => 2,
1152        };
1153
1154        let key = self.read_keylike();
1155
1156        let end = self.cursor as u32;
1157        match key {
1158            "inf" => {
1159                return Ok(Item::float(
1160                    if sign != 0 {
1161                        f64::INFINITY
1162                    } else {
1163                        f64::NEG_INFINITY
1164                    },
1165                    Span::new(at as u32, end),
1166                ));
1167            }
1168            "nan" => {
1169                return Ok(Item::float(
1170                    if sign != 0 {
1171                        f64::NAN.copysign(1.0)
1172                    } else {
1173                        f64::NAN.copysign(-1.0)
1174                    },
1175                    Span::new(at as u32, end),
1176                ));
1177            }
1178            _ => (),
1179        }
1180
1181        if let [b'0'..=b'9', ..] = key.as_bytes() {
1182            self.number(at as u32, end, key, sign)
1183        } else {
1184            return Err(self.set_error(
1185                at as usize,
1186                Some(self.cursor as usize),
1187                ErrorKind::InvalidNumber,
1188            ));
1189        }
1190    }
1191
1192    fn inline_table_contents(
1193        &mut self,
1194        out: &mut crate::table::InnerTable<'de>,
1195        depth_remaining: i16,
1196    ) -> Result<(), ParseError> {
1197        if depth_remaining < 0 {
1198            return Err(self.set_error(
1199                self.cursor,
1200                None,
1201                ErrorKind::OutOfRange("Max recursion depth exceeded"),
1202            ));
1203        }
1204        if let Err(e) = self.eat_inline_table_whitespace() {
1205            return Err(e);
1206        }
1207        if self.eat_byte(b'}') {
1208            return Ok(());
1209        }
1210        loop {
1211            let mut table_ref: &mut crate::table::InnerTable<'de> = &mut *out;
1212            let mut key = match self.read_table_key() {
1213                Ok(k) => k,
1214                Err(e) => return Err(e),
1215            };
1216            self.eat_whitespace();
1217            while self.eat_byte(b'.') {
1218                self.eat_whitespace();
1219                table_ref = match self.navigate_dotted_key(table_ref, key) {
1220                    Ok(t) => t,
1221                    Err(e) => return Err(e),
1222                };
1223                key = match self.read_table_key() {
1224                    Ok(k) => k,
1225                    Err(e) => return Err(e),
1226                };
1227                self.eat_whitespace();
1228            }
1229            if let Err(e) = self.eat_inline_table_whitespace() {
1230                return Err(e);
1231            }
1232            if let Err(e) = self.expect_byte(b'=') {
1233                return Err(e);
1234            }
1235            if let Err(e) = self.eat_inline_table_whitespace() {
1236                return Err(e);
1237            }
1238            {
1239                let val = match self.value(depth_remaining) {
1240                    Ok(v) => v,
1241                    Err(e) => return Err(e),
1242                };
1243                if let Err(e) = self.insert_value(table_ref, key, val) {
1244                    return Err(e);
1245                }
1246            }
1247
1248            if let Err(e) = self.eat_inline_table_whitespace() {
1249                return Err(e);
1250            }
1251            if self.eat_byte(b'}') {
1252                return Ok(());
1253            }
1254            if let Err(e) = self.expect_byte(b',') {
1255                return Err(e);
1256            }
1257            if let Err(e) = self.eat_inline_table_whitespace() {
1258                return Err(e);
1259            }
1260            if self.eat_byte(b'}') {
1261                return Ok(());
1262            }
1263        }
1264    }
1265
1266    fn array_contents(
1267        &mut self,
1268        out: &mut value::Array<'de>,
1269        depth_remaining: i16,
1270    ) -> Result<(), ParseError> {
1271        if depth_remaining < 0 {
1272            return Err(self.set_error(
1273                self.cursor,
1274                None,
1275                ErrorKind::OutOfRange("Max recursion depth exceeded"),
1276            ));
1277        }
1278        loop {
1279            if let Err(e) = self.eat_intermediate() {
1280                return Err(e);
1281            }
1282            if self.eat_byte(b']') {
1283                return Ok(());
1284            }
1285            match self.value(depth_remaining) {
1286                Ok(value) => out.push(value, self.arena),
1287                Err(e) => return Err(e),
1288            };
1289            if let Err(e) = self.eat_intermediate() {
1290                return Err(e);
1291            }
1292            if !self.eat_byte(b',') {
1293                break;
1294            }
1295        }
1296        if let Err(e) = self.eat_intermediate() {
1297            return Err(e);
1298        }
1299        self.expect_byte(b']')
1300    }
1301
1302    fn eat_inline_table_whitespace(&mut self) -> Result<(), ParseError> {
1303        loop {
1304            self.eat_whitespace();
1305            if self.eat_newline() {
1306                continue;
1307            }
1308            match self.eat_comment() {
1309                Ok(true) => {}
1310                Ok(false) => break,
1311                Err(e) => return Err(e),
1312            }
1313        }
1314        Ok(())
1315    }
1316
1317    fn eat_intermediate(&mut self) -> Result<(), ParseError> {
1318        loop {
1319            self.eat_whitespace();
1320            if self.eat_newline() {
1321                continue;
1322            }
1323            match self.eat_comment() {
1324                Ok(true) => {}
1325                Ok(false) => break,
1326                Err(e) => return Err(e),
1327            }
1328        }
1329        Ok(())
1330    }
1331
1332    /// Navigate into an existing or new table for a dotted-key intermediate
1333    /// segment. Checks frozen and header bits.
1334    /// New tables are created with the `DOTTED` tag.
1335    fn navigate_dotted_key<'t>(
1336        &mut self,
1337        table: &'t mut InnerTable<'de>,
1338        key: Key<'de>,
1339    ) -> Result<&'t mut InnerTable<'de>, ParseError> {
1340        if let Some(idx) = self.indexed_find(table, key.name) {
1341            let (existing_key, value) = &mut table.entries_mut()[idx];
1342            let ok = value.is_table() && !value.is_frozen() && !value.has_header_bit();
1343
1344            if !ok {
1345                return Err(self.set_error(
1346                    key.span.start as usize,
1347                    Some(key.span.end as usize),
1348                    ErrorKind::DottedKeyInvalidType {
1349                        first: existing_key.span,
1350                    },
1351                ));
1352            }
1353            // Safety: check above ensures value is table
1354            unsafe { Ok(value.as_table_mut_unchecked()) }
1355        } else {
1356            let span = key.span;
1357            let inserted = self.insert_value_known_to_be_unique(
1358                table,
1359                key,
1360                Item::table_dotted(InnerTable::new(), span),
1361            );
1362            unsafe { Ok(inserted.as_table_mut_unchecked()) }
1363        }
1364    }
1365
1366    /// Navigate an intermediate segment of a table header (e.g. `a` in `[a.b.c]`).
1367    /// Creates implicit tables (no flag bits) if not found.
1368    /// Handles arrays-of-tables by navigating into the last element.
1369    ///
1370    /// Returns a `SpannedTable` view of the table navigated into.
1371    fn navigate_header_intermediate<'b>(
1372        &mut self,
1373        st: &'b mut Table<'de>,
1374        key: Key<'de>,
1375    ) -> Result<&'b mut Table<'de>, ParseError> {
1376        let table = &mut st.value;
1377
1378        if let Some(idx) = self.indexed_find(table, key.name) {
1379            let (existing_key, existing) = &mut table.entries_mut()[idx];
1380            let first_key_span = existing_key.span;
1381            let is_table = existing.is_table();
1382            let is_frozen = existing.is_frozen();
1383            let is_aot = existing.is_aot();
1384
1385            if is_table {
1386                if is_frozen {
1387                    return Err(self.set_duplicate_key_error(first_key_span, key.span, key.name));
1388                }
1389                unsafe { Ok(existing.as_spanned_table_mut_unchecked()) }
1390            } else if is_aot {
1391                // unwrap is safe since we just check it's an array of tables and thus a array.
1392                let arr = existing.as_array_mut().unwrap();
1393                // unwrap is safe as array's of tables always have atleast one value by construction
1394                let last = arr.last_mut().unwrap();
1395                if !last.is_table() {
1396                    return Err(self.set_duplicate_key_error(first_key_span, key.span, key.name));
1397                }
1398                unsafe { Ok(last.as_spanned_table_mut_unchecked()) }
1399            } else {
1400                Err(self.set_duplicate_key_error(first_key_span, key.span, key.name))
1401            }
1402        } else {
1403            let span = key.span;
1404            let inserted = self.insert_value_known_to_be_unique(
1405                table,
1406                key,
1407                Item::table(InnerTable::new(), span),
1408            );
1409            unsafe { Ok(inserted.as_spanned_table_mut_unchecked()) }
1410        }
1411    }
1412    fn insert_value_known_to_be_unique<'t>(
1413        &mut self,
1414        table: &'t mut InnerTable<'de>,
1415        key: Key<'de>,
1416        item: Item<'de>,
1417    ) -> &'t mut value::Item<'de> {
1418        let len = table.len();
1419        if len >= INDEXED_TABLE_THRESHOLD {
1420            let table_id = unsafe { table.first_key_span_start_unchecked() };
1421            if len == INDEXED_TABLE_THRESHOLD {
1422                for (i, (key, _)) in table.entries().iter().enumerate() {
1423                    self.table_index
1424                        .insert(KeyIndex::new(key.as_str(), table_id), i);
1425                }
1426            }
1427            self.table_index
1428                .insert(KeyIndex::new(key.as_str(), table_id), len);
1429        }
1430        &mut table.insert(key, item, self.arena).1
1431    }
1432
1433    /// Handle the final segment of a standard table header `[a.b.c]`.
1434    ///
1435    /// Returns the [`Ctx`] for the table that subsequent key-value pairs
1436    /// should be inserted into.
1437    fn navigate_header_table_final<'b>(
1438        &mut self,
1439        st: &'b mut Table<'de>,
1440        key: Key<'de>,
1441        header_start: u32,
1442        header_end: u32,
1443    ) -> Result<Ctx<'b, 'de>, ParseError> {
1444        let table = &mut st.value;
1445
1446        if let Some(idx) = self.indexed_find(table, key.name) {
1447            let (existing_key, value) = &mut table.entries_mut()[idx];
1448            let first_key_span = existing_key.span;
1449            let is_table = value.is_table();
1450            let is_frozen = value.is_frozen();
1451            let has_header = value.has_header_bit();
1452            let has_dotted = value.has_dotted_bit();
1453            let val_span = value.span();
1454
1455            if !is_table || is_frozen {
1456                return Err(self.set_duplicate_key_error(first_key_span, key.span, key.name));
1457            }
1458            if has_header {
1459                return Err(self.set_error(
1460                    header_start as usize,
1461                    Some(header_end as usize),
1462                    ErrorKind::DuplicateTable {
1463                        name: String::from(key.name),
1464                        first: val_span,
1465                    },
1466                ));
1467            }
1468            if has_dotted {
1469                return Err(self.set_duplicate_key_error(first_key_span, key.span, key.name));
1470            }
1471            let table = unsafe { value.as_spanned_table_mut_unchecked() };
1472            table.set_header_flag();
1473            table.set_span_start(header_start);
1474            table.set_span_end(header_end);
1475            Ok(Ctx {
1476                table,
1477                array_end_span: None,
1478            })
1479        } else {
1480            let inserted = self.insert_value_known_to_be_unique(
1481                table,
1482                key,
1483                Item::table_header(InnerTable::new(), Span::new(header_start, header_end)),
1484            );
1485            Ok(Ctx {
1486                table: unsafe { inserted.as_spanned_table_mut_unchecked() },
1487                array_end_span: None,
1488            })
1489        }
1490    }
1491
1492    /// Handle the final segment of an array-of-tables header `[[a.b.c]]`.
1493    ///
1494    /// Returns the [`Ctx`] for the new table entry that subsequent key-value
1495    /// pairs should be inserted into.
1496    fn navigate_header_array_final<'b>(
1497        &mut self,
1498        st: &'b mut Table<'de>,
1499        key: Key<'de>,
1500        header_start: u32,
1501        header_end: u32,
1502    ) -> Result<Ctx<'b, 'de>, ParseError> {
1503        let table = &mut st.value;
1504
1505        if let Some(idx) = self.indexed_find(table, key.name) {
1506            let (existing_key, value) = &mut table.entries_mut()[idx];
1507            let first_key_span = existing_key.span;
1508            let is_aot = value.is_aot();
1509            let is_table = value.is_table();
1510
1511            if is_aot {
1512                let (end_flag, arr) = unsafe { value.split_array_end_flag() };
1513                let entry_span = Span::new(header_start, header_end);
1514                arr.push(
1515                    Item::table_header(InnerTable::new(), entry_span),
1516                    self.arena,
1517                );
1518                let entry = arr.last_mut().unwrap();
1519                Ok(Ctx {
1520                    table: unsafe { entry.as_spanned_table_mut_unchecked() },
1521                    array_end_span: Some(end_flag),
1522                })
1523            } else if is_table {
1524                Err(self.set_error(
1525                    header_start as usize,
1526                    Some(header_end as usize),
1527                    ErrorKind::RedefineAsArray,
1528                ))
1529            } else {
1530                Err(self.set_duplicate_key_error(first_key_span, key.span, key.name))
1531            }
1532        } else {
1533            let entry_span = Span::new(header_start, header_end);
1534            let first_entry = Item::table_header(InnerTable::new(), entry_span);
1535            let array_span = Span::new(header_start, header_end);
1536            let array_val = Item::array_aot(
1537                value::Array::with_single(first_entry, self.arena),
1538                array_span,
1539            );
1540            let inserted = self.insert_value_known_to_be_unique(table, key, array_val);
1541            let (end_flag, arr) = unsafe { inserted.split_array_end_flag() };
1542            let entry = arr.last_mut().unwrap();
1543            Ok(Ctx {
1544                table: unsafe { entry.as_spanned_table_mut_unchecked() },
1545                array_end_span: Some(end_flag),
1546            })
1547        }
1548    }
1549
1550    /// Insert a value into a table, checking for duplicates.
1551    fn insert_value(
1552        &mut self,
1553        table: &mut InnerTable<'de>,
1554        key: Key<'de>,
1555        item: Item<'de>,
1556    ) -> Result<(), ParseError> {
1557        if table.len() < INDEXED_TABLE_THRESHOLD {
1558            for (existing_key, _) in table.entries() {
1559                if existing_key.as_str() == key.name {
1560                    return Err(self.set_duplicate_key_error(
1561                        existing_key.span,
1562                        key.span,
1563                        key.name,
1564                    ));
1565                }
1566            }
1567            table.insert(key, item, &self.arena);
1568            return Ok(());
1569        }
1570        // Safety: We now table len > 0, from above check.
1571        let table_id = unsafe { table.first_key_span_start_unchecked() };
1572
1573        // Note: if find a duplicate we bail out, terminating the parsing with an error.
1574        // Even if we did end up re-inserting no issues would come of it.
1575        if table.len() == INDEXED_TABLE_THRESHOLD {
1576            for (i, (key, _)) in table.entries().iter().enumerate() {
1577                // Wish I could use insert_unique here but that would require
1578                // pulling in hashbrown :(
1579                self.table_index
1580                    .insert(KeyIndex::new(key.as_str(), table_id), i);
1581            }
1582        }
1583
1584        match self
1585            .table_index
1586            .entry(KeyIndex::new(key.as_str(), table_id))
1587        {
1588            std::collections::hash_map::Entry::Occupied(occupied_entry) => {
1589                let idx = *occupied_entry.get();
1590                let (existing_key, _) = &table.entries()[idx];
1591                return Err(self.set_duplicate_key_error(existing_key.span, key.span, key.name));
1592            }
1593            std::collections::hash_map::Entry::Vacant(vacant_entry) => {
1594                vacant_entry.insert(table.len());
1595                table.insert(key, item, &self.arena);
1596                return Ok(());
1597            }
1598        }
1599    }
1600
1601    /// Look up a key name in a table, returning its entry index.
1602    /// Uses the hash index for tables at or above the threshold, otherwise
1603    /// falls back to a linear scan.
1604    fn indexed_find(&self, table: &InnerTable<'de>, name: &str) -> Option<usize> {
1605        // NOTE: I would return a refernce to actual entry here, however this
1606        // runs into all sorts of NLL limitations.
1607        if table.len() > INDEXED_TABLE_THRESHOLD {
1608            let first_key_span = unsafe { table.first_key_span_start_unchecked() };
1609            self.table_index
1610                .get(&KeyIndex::new(name, first_key_span))
1611                .copied()
1612        } else {
1613            table.find_index(name)
1614        }
1615    }
1616
1617    fn parse_document(&mut self, root_st: &mut Table<'de>) -> Result<(), ParseError> {
1618        let mut ctx = Ctx {
1619            table: root_st,
1620            array_end_span: None,
1621        };
1622
1623        loop {
1624            self.eat_whitespace();
1625            match self.eat_comment() {
1626                Ok(true) => continue,
1627                Ok(false) => {}
1628                Err(e) => return Err(e),
1629            }
1630            if self.eat_newline() {
1631                continue;
1632            }
1633
1634            match self.peek_byte() {
1635                None => break,
1636                Some(b'[') => {
1637                    ctx = match self.process_table_header(root_st) {
1638                        Ok(c) => c,
1639                        Err(e) => return Err(e),
1640                    };
1641                }
1642                Some(b'\r') => {
1643                    return Err(self.set_error(self.cursor, None, ErrorKind::Unexpected('\r')));
1644                }
1645                Some(_) => {
1646                    if let Err(e) = self.process_key_value(&mut ctx) {
1647                        return Err(e);
1648                    }
1649                }
1650            }
1651        }
1652        Ok(())
1653    }
1654
1655    fn process_table_header<'b>(
1656        &mut self,
1657        root_st: &'b mut Table<'de>,
1658    ) -> Result<Ctx<'b, 'de>, ParseError> {
1659        let header_start = self.cursor as u32;
1660        if let Err(e) = self.expect_byte(b'[') {
1661            return Err(e);
1662        }
1663        let is_array = self.eat_byte(b'[');
1664
1665        let mut current = root_st;
1666
1667        self.eat_whitespace();
1668        let mut key = match self.read_table_key() {
1669            Ok(k) => k,
1670            Err(e) => return Err(e),
1671        };
1672        loop {
1673            self.eat_whitespace();
1674            if self.eat_byte(b'.') {
1675                self.eat_whitespace();
1676                current = match self.navigate_header_intermediate(current, key) {
1677                    Ok(p) => p,
1678                    Err(e) => return Err(e),
1679                };
1680                key = match self.read_table_key() {
1681                    Ok(k) => k,
1682                    Err(e) => return Err(e),
1683                };
1684            } else {
1685                break;
1686            }
1687        }
1688
1689        self.eat_whitespace();
1690        if let Err(e) = self.expect_byte(b']') {
1691            return Err(e);
1692        }
1693        if is_array && let Err(e) = self.expect_byte(b']') {
1694            return Err(e);
1695        }
1696
1697        self.eat_whitespace();
1698        match self.eat_comment() {
1699            Ok(true) => {}
1700            Ok(false) => {
1701                if let Err(e) = self.eat_newline_or_eof() {
1702                    return Err(e);
1703                }
1704            }
1705            Err(e) => return Err(e),
1706        }
1707        let header_end = self.cursor as u32;
1708
1709        if is_array {
1710            self.navigate_header_array_final(current, key, header_start, header_end)
1711        } else {
1712            self.navigate_header_table_final(current, key, header_start, header_end)
1713        }
1714    }
1715
1716    fn process_key_value(&mut self, ctx: &mut Ctx<'_, 'de>) -> Result<(), ParseError> {
1717        let line_start = self.cursor as u32;
1718        // Borrow the Table payload from the SpannedTable. NLL drops this
1719        // borrow at its last use (the insert_value call), freeing ctx.st
1720        // for the span updates that follow.
1721        let mut table_ref: &mut InnerTable<'de> = &mut ctx.table.value;
1722
1723        let mut key = match self.read_table_key() {
1724            Ok(k) => k,
1725            Err(e) => return Err(e),
1726        };
1727        self.eat_whitespace();
1728
1729        while self.eat_byte(b'.') {
1730            self.eat_whitespace();
1731            table_ref = match self.navigate_dotted_key(table_ref, key) {
1732                Ok(t) => t,
1733                Err(e) => return Err(e),
1734            };
1735            key = match self.read_table_key() {
1736                Ok(k) => k,
1737                Err(e) => return Err(e),
1738            };
1739            self.eat_whitespace();
1740        }
1741
1742        if let Err(e) = self.expect_byte(b'=') {
1743            return Err(e);
1744        }
1745        self.eat_whitespace();
1746        let val = match self.value(MAX_RECURSION_DEPTH) {
1747            Ok(v) => v,
1748            Err(e) => return Err(e),
1749        };
1750        let line_end = self.cursor as u32;
1751
1752        self.eat_whitespace();
1753        match self.eat_comment() {
1754            Ok(true) => {}
1755            Ok(false) => {
1756                if let Err(e) = self.eat_newline_or_eof() {
1757                    return Err(e);
1758                }
1759            }
1760            Err(e) => return Err(e),
1761        }
1762
1763        if let Err(e) = self.insert_value(table_ref, key, val) {
1764            return Err(e);
1765        }
1766
1767        let start = ctx.table.span_start();
1768        ctx.table.set_span_start(start.min(line_start));
1769        ctx.table.extend_span_end(line_end);
1770
1771        if let Some(end_flag) = &mut ctx.array_end_span {
1772            let old = **end_flag;
1773            let current = old >> value::FLAG_SHIFT;
1774            **end_flag = (current.max(line_end) << value::FLAG_SHIFT) | (old & value::FLAG_MASK);
1775        }
1776
1777        Ok(())
1778    }
1779}
1780
1781/// Parses a TOML string into a [`Table`].
1782///
1783/// The returned table borrows from both the input string and the [`Arena`],
1784/// so both must outlive the table. The arena is used to store escape sequences;
1785/// plain strings borrow directly from the input.
1786pub fn parse<'de>(s: &'de str, arena: &'de Arena) -> Result<Table<'de>, Error> {
1787    // Tag bits use the low 3 bits of start_and_tag, limiting span.start to
1788    // 29 bits (512 MiB). The flag state uses the low 3 bits of end_and_flag,
1789    // limiting span.end to 29 bits (512 MiB).
1790    const MAX_SIZE: usize = (1u32 << 29) as usize;
1791
1792    if s.len() > MAX_SIZE {
1793        return Err(Error {
1794            kind: ErrorKind::FileTooLarge,
1795            span: Span::new(0, 0),
1796        });
1797    }
1798
1799    // SAFETY: root is a table, so the SpannedTable reinterpretation is valid.
1800    let mut root_st = Table::new(Span::new(0, s.len() as u32));
1801    let mut parser = Parser::new(s, arena);
1802    match parser.parse_document(&mut root_st) {
1803        Ok(()) => {}
1804        Err(_) => return Err(parser.take_error()),
1805    }
1806    // Note that root is about the drop (but doesn't implement drop), so we can take
1807    // ownership of this table.
1808    // todo don't do this
1809    Ok(root_st)
1810}
1811
1812#[inline]
1813fn is_keylike_byte(b: u8) -> bool {
1814    b.is_ascii_alphanumeric() || b == b'-' || b == b'_'
1815}
1816
1817fn byte_describe(b: u8) -> &'static str {
1818    match b {
1819        b'\n' => "a newline",
1820        b' ' | b'\t' => "whitespace",
1821        b'=' => "an equals",
1822        b'.' => "a period",
1823        b',' => "a comma",
1824        b':' => "a colon",
1825        b'+' => "a plus",
1826        b'{' => "a left brace",
1827        b'}' => "a right brace",
1828        b'[' => "a left bracket",
1829        b']' => "a right bracket",
1830        b'\'' | b'"' => "a string",
1831        _ if is_keylike_byte(b) => "an identifier",
1832        _ => "a character",
1833    }
1834}