Skip to main content

toml_spanner/
parser.rs

1// Deliberately avoid `?` operator throughout this module for compile-time
2// performance: explicit match/if-let prevents the compiler from generating
3// From::from conversion and drop-glue machinery at every call site.
4
5#[cfg(test)]
6#[path = "./parser_tests.rs"]
7mod tests;
8
9#[cfg(feature = "deserialization")]
10use crate::de::{Failed, TableHelper};
11use crate::{
12    MaybeItem, Span,
13    arena::Arena,
14    error::{Error, ErrorKind},
15    table::{InnerTable, Table},
16    time::DateTime,
17    value::{self, Item, Key},
18};
19use std::char;
20use std::hash::{Hash, Hasher};
21use std::ptr::NonNull;
22
23const MAX_RECURSION_DEPTH: i16 = 256;
24// When a method returns Err(ParseError), the full error details have already
25// been written into Parser::error_kind / Parser::error_span.
26#[derive(Copy, Clone)]
27struct ParseError;
28
29struct Ctx<'b, 'de> {
30    /// The current table context — a `Table` view into a table `Value`.
31    /// Gives direct mutable access to both the span fields and the `Table` payload.
32    table: &'b mut Table<'de>,
33    /// If this table is an entry in an array-of-tables, a disjoint borrow of
34    /// the parent array Value'arena `end_and_flag` field so its span can be
35    /// extended alongside the entry.
36    array_end_span: Option<&'b mut u32>,
37}
38
39/// Tables with at least this many entries use the hash index for lookups.
40/// Note: Looking purely at parsing benchmarks you might be inclined to raise
41///  this value higher, however the same index is then used during deserialization
42///  where the loss of initializing the index is recouped.
43pub const INDEXED_TABLE_THRESHOLD: usize = 6;
44
45const fn build_hex_table() -> [i8; 256] {
46    let mut table = [-1i8; 256];
47    let mut ch = 0usize;
48    while ch < 256 {
49        table[ch] = match ch as u8 {
50            b'0'..=b'9' => (ch as u8 - b'0') as i8,
51            b'A'..=b'F' => (ch as u8 - b'A' + 10) as i8,
52            b'a'..=b'f' => (ch as u8 - b'a' + 10) as i8,
53            _ => -1,
54        };
55        ch += 1;
56    }
57    table
58}
59
60static HEX: [i8; 256] = build_hex_table();
61
62/// Hash-map key that identifies a (table, key-name) pair without owning the
63/// string data.  The raw `key_ptr`/`len` point into either the input buffer
64/// or the arena; both are stable for the lifetime of the parse.
65/// `first_key_span` is the `span.start()` of the **first** key ever inserted
66/// into the table and serves as a cheap, collision-free table discriminator.
67pub(crate) struct KeyRef<'de> {
68    key_ptr: NonNull<u8>,
69    len: u32,
70    first_key_span: u32,
71    marker: std::marker::PhantomData<&'de str>,
72}
73
74impl<'de> KeyRef<'de> {
75    #[inline]
76    pub(crate) fn new(key: &'de str, first_key_span: u32) -> Self {
77        KeyRef {
78            // SAFETY: str::as_ptr() is guaranteed non-null.
79            key_ptr: unsafe { NonNull::new_unchecked(key.as_ptr() as *mut u8) },
80            len: key.len() as u32,
81            first_key_span,
82            marker: std::marker::PhantomData,
83        }
84    }
85}
86
87impl<'de> KeyRef<'de> {
88    #[inline]
89    fn as_str(&self) -> &'de str {
90        // SAFETY: key_ptr and len were captured from a valid &'de str in new().
91        // The PhantomData<&'de str> ensures the borrow is live.
92        unsafe {
93            std::str::from_utf8_unchecked(std::slice::from_raw_parts(
94                self.key_ptr.as_ptr(),
95                self.len as usize,
96            ))
97        }
98    }
99}
100
101impl<'de> Hash for KeyRef<'de> {
102    #[inline]
103    fn hash<H: Hasher>(&self, state: &mut H) {
104        self.first_key_span.hash(state);
105        // Note: KeyRef is meant only beused inside the Index where it's
106        // the KeyRef is entirety of the Hash Input so we don't have to
107        // worry about prefix freedom.
108        self.as_str().hash(state);
109    }
110}
111
112impl<'de> PartialEq for KeyRef<'de> {
113    #[inline]
114    fn eq(&self, other: &Self) -> bool {
115        self.first_key_span == other.first_key_span && self.as_str() == other.as_str()
116    }
117}
118
119impl<'de> Eq for KeyRef<'de> {}
120
121struct Parser<'de> {
122    /// Raw bytes of the input. Always valid UTF-8 (derived from `&str`).
123    bytes: &'de [u8],
124    cursor: usize,
125    arena: &'de Arena,
126
127    // Error context -- populated just before returning ParseError
128    error_span: Span,
129    error_kind: Option<ErrorKind>,
130
131    // Global key-index for O(1) lookups in large tables.
132    // Maps (table-discriminator, key-name) → entry index in the table.
133    index: foldhash::HashMap<KeyRef<'de>, usize>,
134}
135
136impl<'de> Parser<'de> {
137    fn new(input: &'de str, arena: &'de Arena) -> Self {
138        let bytes = input.as_bytes();
139        // Skip UTF-8 BOM (U+FEFF = EF BB BF) if present at the start.
140        let cursor = if bytes.starts_with(b"\xef\xbb\xbf") {
141            3
142        } else {
143            0
144        };
145        Parser {
146            bytes,
147            cursor,
148            arena,
149            error_span: Span::new(0, 0),
150            error_kind: None,
151            // initialize to about ~ 8 KB
152            index: foldhash::HashMap::with_capacity_and_hasher(
153                256,
154                foldhash::fast::RandomState::default(),
155            ),
156        }
157    }
158
159    /// Get a `&str` slice from the underlying bytes.
160    /// SAFETY: `self.bytes` is always valid UTF-8, and callers must ensure
161    /// `start..end` falls on UTF-8 char boundaries.
162    #[inline]
163    unsafe fn str_slice(&self, start: usize, end: usize) -> &'de str {
164        #[cfg(not(debug_assertions))]
165        unsafe {
166            std::str::from_utf8_unchecked(&self.bytes[start..end])
167        }
168        #[cfg(debug_assertions)]
169        match std::str::from_utf8(&self.bytes[start..end]) {
170            Ok(value) => value,
171            Err(err) => panic!(
172                "Invalid UTF-8 slice: bytes[{}..{}] is not valid UTF-8: {}",
173                start, end, err
174            ),
175        }
176    }
177
178    #[cold]
179    fn set_duplicate_key_error(&mut self, first: Span, second: Span, key: &str) -> ParseError {
180        self.error_span = second;
181        self.error_kind = Some(ErrorKind::DuplicateKey {
182            key: key.into(),
183            first,
184        });
185        ParseError
186    }
187    #[cold]
188    fn set_error(&mut self, start: usize, end: Option<usize>, kind: ErrorKind) -> ParseError {
189        self.error_span = Span::new(start as u32, end.unwrap_or(start + 1) as u32);
190        self.error_kind = Some(kind);
191        ParseError
192    }
193
194    fn take_error(&mut self) -> Error {
195        let kind = self
196            .error_kind
197            .take()
198            .expect("take_error called without error");
199        let span = self.error_span;
200
201        // Black Magic Optimization:
202        // Removing the following introduces 8% performance
203        // regression across the board.
204        std::hint::black_box(&self.bytes.iter().enumerate().next());
205
206        Error { kind, span }
207    }
208
209    #[inline]
210    fn peek_byte(&self) -> Option<u8> {
211        self.bytes.get(self.cursor).copied()
212    }
213
214    #[inline]
215    fn peek_byte_at(&self, offset: usize) -> Option<u8> {
216        self.bytes.get(self.cursor + offset).copied()
217    }
218
219    #[inline]
220    fn eat_byte(&mut self, b: u8) -> bool {
221        if self.peek_byte() == Some(b) {
222            self.cursor += 1;
223            true
224        } else {
225            false
226        }
227    }
228    #[cold]
229    fn expected_error(&mut self, b: u8) -> ParseError {
230        let start = self.cursor;
231        let (found_desc, end) = self.scan_token_desc_and_end();
232        self.set_error(
233            start,
234            Some(end),
235            ErrorKind::Wanted {
236                expected: byte_describe(b),
237                found: found_desc,
238            },
239        )
240    }
241
242    fn expect_byte(&mut self, b: u8) -> Result<(), ParseError> {
243        if self.peek_byte() == Some(b) {
244            self.cursor += 1;
245            Ok(())
246        } else {
247            Err(self.expected_error(b))
248        }
249    }
250
251    fn eat_whitespace(&mut self) {
252        while let Some(b) = self.peek_byte() {
253            if b == b' ' || b == b'\t' {
254                self.cursor += 1;
255            } else {
256                break;
257            }
258        }
259    }
260
261    fn eat_whitespace_to(&mut self) -> Option<u8> {
262        while let Some(b) = self.peek_byte() {
263            if b == b' ' || b == b'\t' {
264                self.cursor += 1;
265            } else {
266                return Some(b);
267            }
268        }
269        None
270    }
271
272    fn eat_newline_or_eof(&mut self) -> Result<(), ParseError> {
273        match self.peek_byte() {
274            None => Ok(()),
275            Some(b'\n') => {
276                self.cursor += 1;
277                Ok(())
278            }
279            Some(b'\r') if self.peek_byte_at(1) == Some(b'\n') => {
280                self.cursor += 2;
281                Ok(())
282            }
283            _ => {
284                let start = self.cursor;
285                let (found_desc, end) = self.scan_token_desc_and_end();
286                Err(self.set_error(
287                    start,
288                    Some(end),
289                    ErrorKind::Wanted {
290                        expected: "newline",
291                        found: found_desc,
292                    },
293                ))
294            }
295        }
296    }
297
298    fn eat_comment(&mut self) -> Result<bool, ParseError> {
299        if !self.eat_byte(b'#') {
300            return Ok(false);
301        }
302        while let Some(0x09 | 0x20..=0x7E | 0x80..) = self.peek_byte() {
303            self.cursor += 1;
304        }
305        self.eat_newline_or_eof().map(|()| true)
306    }
307
308    fn eat_newline(&mut self) -> bool {
309        match self.peek_byte() {
310            Some(b'\n') => {
311                self.cursor += 1;
312                true
313            }
314            Some(b'\r') if self.peek_byte_at(1) == Some(b'\n') => {
315                self.cursor += 2;
316                true
317            }
318            _ => false,
319        }
320    }
321
322    /// Scan forward from the current position to determine the description
323    /// and end position of the "token" at the cursor. This provides compatible
324    /// error spans with the old tokenizer.
325    fn scan_token_desc_and_end(&self) -> (&'static str, usize) {
326        let Some(b) = self.peek_byte() else {
327            return ("eof", self.bytes.len());
328        };
329        match b {
330            b'\n' => ("a newline", self.cursor + 1),
331            b'\r' => ("a carriage return", self.cursor + 1),
332            b' ' | b'\t' => {
333                let mut end = self.cursor + 1;
334                while end < self.bytes.len()
335                    && (self.bytes[end] == b' ' || self.bytes[end] == b'\t')
336                {
337                    end += 1;
338                }
339                ("whitespace", end)
340            }
341            b'#' => ("a comment", self.cursor + 1),
342            b'=' => ("an equals", self.cursor + 1),
343            b'.' => ("a period", self.cursor + 1),
344            b',' => ("a comma", self.cursor + 1),
345            b':' => ("a colon", self.cursor + 1),
346            b'+' => ("a plus", self.cursor + 1),
347            b'{' => ("a left brace", self.cursor + 1),
348            b'}' => ("a right brace", self.cursor + 1),
349            b'[' => ("a left bracket", self.cursor + 1),
350            b']' => ("a right bracket", self.cursor + 1),
351            b'\'' | b'"' => ("a string", self.cursor + 1),
352            _ if is_keylike_byte(b) => {
353                let mut end = self.cursor + 1;
354                while end < self.bytes.len() && is_keylike_byte(self.bytes[end]) {
355                    end += 1;
356                }
357                ("an identifier", end)
358            }
359            _ => ("a character", self.cursor + 1),
360        }
361    }
362
363    fn read_keylike(&mut self) -> &'de str {
364        let start = self.cursor;
365        while let Some(b) = self.peek_byte() {
366            if !is_keylike_byte(b) {
367                break;
368            }
369            self.cursor += 1;
370        }
371        // SAFETY: keylike bytes are ASCII, always valid UTF-8 boundaries
372        unsafe { self.str_slice(start, self.cursor) }
373    }
374
375    fn read_table_key(&mut self) -> Result<Key<'de>, ParseError> {
376        let Some(b) = self.peek_byte() else {
377            return Err(self.set_error(
378                self.bytes.len(),
379                None,
380                ErrorKind::Wanted {
381                    expected: "a table key",
382                    found: "eof",
383                },
384            ));
385        };
386        match b {
387            b'"' => {
388                let start = self.cursor;
389                self.cursor += 1;
390                let (key, multiline) = match self.read_string(start, b'"') {
391                    Ok(v) => v,
392                    Err(e) => return Err(e),
393                };
394                if multiline {
395                    return Err(self.set_error(
396                        start,
397                        Some(key.span.end as usize),
398                        ErrorKind::MultilineStringKey,
399                    ));
400                }
401                Ok(key)
402            }
403            b'\'' => {
404                let start = self.cursor;
405                self.cursor += 1;
406                let (key, multiline) = match self.read_string(start, b'\'') {
407                    Ok(v) => v,
408                    Err(e) => return Err(e),
409                };
410                if multiline {
411                    return Err(self.set_error(
412                        start,
413                        Some(key.span.end as usize),
414                        ErrorKind::MultilineStringKey,
415                    ));
416                }
417                Ok(key)
418            }
419            b if is_keylike_byte(b) => {
420                let start = self.cursor;
421                let name = self.read_keylike();
422                let span = Span::new(start as u32, self.cursor as u32);
423                Ok(Key { name, span })
424            }
425            _ => {
426                let start = self.cursor;
427                let (found_desc, end) = self.scan_token_desc_and_end();
428                Err(self.set_error(
429                    start,
430                    Some(end),
431                    ErrorKind::Wanted {
432                        expected: "a table key",
433                        found: found_desc,
434                    },
435                ))
436            }
437        }
438    }
439
440    /// Read a basic (double-quoted) string. `start` is the byte offset of the
441    /// opening quote. The cursor should be positioned right after the opening `"`.
442    fn read_string(&mut self, start: usize, delim: u8) -> Result<(Key<'de>, bool), ParseError> {
443        let mut multiline = false;
444        if self.eat_byte(delim) {
445            if self.eat_byte(delim) {
446                multiline = true;
447            } else {
448                return Ok((
449                    Key {
450                        name: "",
451                        span: Span::new(start as u32, (start + 1) as u32),
452                    },
453                    false,
454                ));
455            }
456        }
457
458        let mut content_start = self.cursor;
459        if multiline {
460            match self.peek_byte() {
461                Some(b'\n') => {
462                    self.cursor += 1;
463                    content_start = self.cursor;
464                }
465                Some(b'\r') if self.peek_byte_at(1) == Some(b'\n') => {
466                    self.cursor += 2;
467                    content_start = self.cursor;
468                }
469                _ => {}
470            }
471        }
472
473        self.read_string_loop(start, content_start, multiline, delim)
474    }
475
476    /// Advance `self.cursor` past bytes that do not require special handling
477    /// inside a string.  Uses SWAR (SIMD-Within-A-Register) to scan 8 bytes
478    /// at a time.
479    ///
480    /// Stops at the first byte that is:
481    ///   * a control character (< 0x20) — tab (0x09) is a benign false positive
482    ///   * DEL (0x7F)
483    ///   * the string delimiter (`"` or `'`)
484    ///   * a backslash (`\`) — benign false positive for literal strings
485    ///   * past the end of input
486    fn skip_string_plain(&mut self, delim: u8) {
487        // Quick bail-out for EOF or an immediately-interesting byte.
488        // Avoids SWAR setup cost for consecutive specials (e.g. \n\n).
489        let Some(&b) = self.bytes.get(self.cursor) else {
490            return;
491        };
492
493        if b == delim || b == b'\\' || b == 0x7F || (b < 0x20 && b != 0x09) {
494            return;
495        }
496        self.cursor += 1;
497
498        let base = self.cursor;
499        let rest = &self.bytes[base..];
500
501        type Chunk = u64;
502        const STEP: usize = std::mem::size_of::<Chunk>();
503        const ONE: Chunk = Chunk::MAX / 255; // 0x0101_0101_0101_0101
504        const HIGH: Chunk = ONE << 7; // 0x8080_8080_8080_8080
505
506        let fill_delim = ONE * Chunk::from(delim);
507        let fill_bslash = ONE * Chunk::from(b'\\');
508        let fill_del = ONE * 0x7F;
509
510        let chunks = rest.chunks_exact(STEP);
511        let remainder_len = chunks.remainder().len();
512
513        for (i, chunk) in chunks.enumerate() {
514            let v = Chunk::from_le_bytes(chunk.try_into().unwrap());
515
516            let has_ctrl = v.wrapping_sub(ONE * 0x20) & !v;
517            let eq_delim = (v ^ fill_delim).wrapping_sub(ONE) & !(v ^ fill_delim);
518            let eq_bslash = (v ^ fill_bslash).wrapping_sub(ONE) & !(v ^ fill_bslash);
519            let eq_del = (v ^ fill_del).wrapping_sub(ONE) & !(v ^ fill_del);
520
521            let masked = (has_ctrl | eq_delim | eq_bslash | eq_del) & HIGH;
522            if masked != 0 {
523                self.cursor = base + i * STEP + masked.trailing_zeros() as usize / 8;
524                return;
525            }
526        }
527
528        self.cursor = self.bytes.len() - remainder_len;
529        self.skip_string_plain_slow(delim);
530    }
531
532    #[cold]
533    #[inline(never)]
534    fn skip_string_plain_slow(&mut self, delim: u8) {
535        while let Some(&b) = self.bytes.get(self.cursor) {
536            if b == delim || b == b'\\' || b == 0x7F || (b < 0x20 && b != 0x09) {
537                return;
538            }
539            self.cursor += 1;
540        }
541    }
542
543    fn read_string_loop(
544        &mut self,
545        start: usize,
546        content_start: usize,
547        multiline: bool,
548        delim: u8,
549    ) -> Result<(Key<'de>, bool), ParseError> {
550        let mut flush_from = content_start;
551        let mut scratch: Option<crate::arena::Scratch<'de>> = None;
552        loop {
553            self.skip_string_plain(delim);
554
555            let i = self.cursor;
556            let Some(&b) = self.bytes.get(i) else {
557                return Err(self.set_error(start, None, ErrorKind::UnterminatedString));
558            };
559            self.cursor = i + 1;
560
561            match b {
562                b'\r' => {
563                    if self.eat_byte(b'\n') {
564                        if !multiline {
565                            return Err(self.set_error(
566                                i,
567                                None,
568                                ErrorKind::InvalidCharInString('\n'),
569                            ));
570                        }
571                    } else {
572                        return Err(self.set_error(i, None, ErrorKind::InvalidCharInString('\r')));
573                    }
574                }
575                b'\n' => {
576                    if !multiline {
577                        return Err(self.set_error(i, None, ErrorKind::InvalidCharInString('\n')));
578                    }
579                }
580                d if d == delim => {
581                    let (span, end) = if multiline {
582                        if !self.eat_byte(delim) {
583                            continue;
584                        }
585                        if !self.eat_byte(delim) {
586                            continue;
587                        }
588                        let mut extra = 0usize;
589                        if self.eat_byte(delim) {
590                            extra += 1;
591                        }
592                        if self.eat_byte(delim) {
593                            extra += 1;
594                        }
595
596                        let maybe_nl = self.bytes[start + 3];
597                        let start_off = if maybe_nl == b'\n' {
598                            4
599                        } else if maybe_nl == b'\r' {
600                            5
601                        } else {
602                            3
603                        };
604
605                        (
606                            Span::new((start + start_off) as u32, (self.cursor - 3) as u32),
607                            i + extra,
608                        )
609                    } else {
610                        (Span::new((start + 1) as u32, (self.cursor - 1) as u32), i)
611                    };
612
613                    let name = if let Some(mut s) = scratch {
614                        s.extend(&self.bytes[flush_from..end]);
615                        let committed = s.commit();
616                        // Safety: scratch contents are valid UTF-8 (built from
617                        // validated input and well-formed escape sequences).
618                        unsafe { std::str::from_utf8_unchecked(committed) }
619                    } else {
620                        // Safety: content_start..end is validated UTF-8.
621                        unsafe { self.str_slice(content_start, end) }
622                    };
623                    return Ok((Key { name, span }, multiline));
624                }
625                b'\\' if delim == b'"' => {
626                    let arena = self.arena;
627                    // SAFETY: the closure only runs when scratch is None, so no
628                    // other Scratch or arena.alloc() call is active.
629                    let s = scratch.get_or_insert_with(|| unsafe { arena.scratch() });
630                    s.extend(&self.bytes[flush_from..i]);
631                    if let Err(e) = self.read_basic_escape(s, start, multiline) {
632                        return Err(e);
633                    }
634                    flush_from = self.cursor;
635                }
636                // Tab or backslash-in-literal-string: benign false positives
637                // from the SWAR scan.
638                0x09 | 0x20..=0x7E | 0x80.. => {}
639                _ => {
640                    return Err(self.set_error(i, None, ErrorKind::InvalidCharInString(b as char)));
641                }
642            }
643        }
644    }
645
646    fn read_basic_escape(
647        &mut self,
648        scratch: &mut crate::arena::Scratch<'_>,
649        string_start: usize,
650        multi: bool,
651    ) -> Result<(), ParseError> {
652        let i = self.cursor;
653        let Some(&b) = self.bytes.get(i) else {
654            return Err(self.set_error(string_start, None, ErrorKind::UnterminatedString));
655        };
656        self.cursor = i + 1;
657        let chr: char = 'char: {
658            let byte: u8 = 'byte: {
659                match b {
660                    b'"' => break 'byte b'"',
661                    b'\\' => break 'byte b'\\',
662                    b'b' => break 'byte 0x08,
663                    b'f' => break 'byte 0x0C,
664                    b'n' => break 'byte b'\n',
665                    b'r' => break 'byte b'\r',
666                    b't' => break 'byte b'\t',
667                    b'e' => break 'byte 0x1B,
668                    b'u' => match self.read_hex(4, string_start, i) {
669                        Ok(ch) => break 'char ch,
670                        Err(e) => return Err(e),
671                    },
672                    b'U' => match self.read_hex(8, string_start, i) {
673                        Ok(ch) => break 'char ch,
674                        Err(e) => return Err(e),
675                    },
676                    b'x' => match self.read_hex(2, string_start, i) {
677                        Ok(ch) => break 'char ch,
678                        Err(e) => return Err(e),
679                    },
680                    b' ' | b'\t' | b'\n' | b'\r' if multi => {
681                        // CRLF folding: \r\n counts as \n
682                        let c = if b == b'\r' && self.peek_byte() == Some(b'\n') {
683                            self.cursor += 1;
684                            '\n'
685                        } else if b == b'\r' {
686                            return Err(self.set_error(
687                                i,
688                                None,
689                                ErrorKind::InvalidCharInString('\r'),
690                            ));
691                        } else {
692                            b as char
693                        };
694                        if c != '\n' {
695                            loop {
696                                match self.peek_byte() {
697                                    Some(b' ' | b'\t') => {
698                                        self.cursor += 1;
699                                    }
700                                    Some(b'\n') => {
701                                        self.cursor += 1;
702                                        break;
703                                    }
704                                    Some(b'\r') if self.peek_byte_at(1) == Some(b'\n') => {
705                                        self.cursor += 2;
706                                        break;
707                                    }
708                                    _ => {
709                                        return Err(self.set_error(
710                                            i,
711                                            None,
712                                            ErrorKind::InvalidEscape(c),
713                                        ));
714                                    }
715                                }
716                            }
717                        }
718                        loop {
719                            match self.peek_byte() {
720                                Some(b' ' | b'\t' | b'\n') => {
721                                    self.cursor += 1;
722                                }
723                                Some(b'\r') if self.peek_byte_at(1) == Some(b'\n') => {
724                                    self.cursor += 2;
725                                }
726                                _ => break,
727                            }
728                        }
729                    }
730                    _ => {
731                        self.cursor -= 1;
732                        return Err(self.set_error(
733                            self.cursor,
734                            None,
735                            ErrorKind::InvalidEscape(self.next_char_for_error()),
736                        ));
737                    }
738                }
739                return Ok(());
740            };
741
742            scratch.push(byte);
743            return Ok(());
744        };
745        let mut buf = [0u8; 4];
746        let len = chr.encode_utf8(&mut buf).len();
747        scratch.extend(&buf[..len]);
748        Ok(())
749    }
750
751    fn read_hex(
752        &mut self,
753        n: usize,
754        string_start: usize,
755        escape_start: usize,
756    ) -> Result<char, ParseError> {
757        let mut val: u32 = 0;
758        for _ in 0..n {
759            let Some(&byte) = self.bytes.get(self.cursor) else {
760                return Err(self.set_error(string_start, None, ErrorKind::UnterminatedString));
761            };
762            let digit = HEX[byte as usize];
763            if digit >= 0 {
764                val = (val << 4) | digit as u32;
765                self.cursor += 1;
766            } else {
767                return Err(self.set_error(
768                    self.cursor,
769                    None,
770                    ErrorKind::InvalidHexEscape(self.next_char_for_error()),
771                ));
772            }
773        }
774        match char::from_u32(val) {
775            Some(ch) => Ok(ch),
776            None => Err(self.set_error(
777                escape_start,
778                Some(escape_start + n),
779                ErrorKind::InvalidEscapeValue(val),
780            )),
781        }
782    }
783
784    fn next_char_for_error(&self) -> char {
785        // Safety: The input was valid UTF-8 via a &str
786        let text = unsafe { std::str::from_utf8_unchecked(self.bytes) };
787        if let Some(value) = text.get(self.cursor..) {
788            value.chars().next().unwrap_or(char::REPLACEMENT_CHARACTER)
789        } else {
790            char::REPLACEMENT_CHARACTER
791        }
792    }
793    fn number(
794        &mut self,
795        start: u32,
796        end: u32,
797        s: &'de str,
798        sign: u8,
799    ) -> Result<Item<'de>, ParseError> {
800        let bytes = s.as_bytes();
801
802        // Base-prefixed integers (0x, 0o, 0b).
803        // TOML forbids signs on these, so only match when first byte is '0'.
804        if sign == 2
805            && let [b'0', format, rest @ ..] = s.as_bytes()
806        {
807            match format {
808                b'x' => return self.integer_hex(rest, Span::new(start, end)),
809                b'o' => return self.integer_octal(rest, Span::new(start, end)),
810                b'b' => return self.integer_binary(rest, Span::new(start, end)),
811                _ => {}
812            }
813        }
814
815        if self.eat_byte(b'.') {
816            let at = self.cursor;
817            return match self.peek_byte() {
818                Some(b) if is_keylike_byte(b) => {
819                    let after = self.read_keylike();
820                    match self.float(start, end, s, Some(after), sign) {
821                        Ok(f) => Ok(Item::float(f, Span::new(start, self.cursor as u32))),
822                        Err(e) => Err(e),
823                    }
824                }
825                _ => Err(self.set_error(at, Some(end as usize), ErrorKind::InvalidNumber)),
826            };
827        }
828
829        if sign == 2 {
830            let head = &self.bytes[start as usize..];
831            if let Some((consumed, moment)) = DateTime::munch(head) {
832                self.cursor = start as usize + consumed;
833                return Ok(Item::moment(moment, Span::new(start, self.cursor as u32)));
834            }
835        }
836
837        if let Ok(v) = self.integer_decimal(bytes, Span::new(start, end), sign) {
838            return Ok(v);
839        }
840
841        if bytes.iter().any(|&b| b == b'e' || b == b'E') {
842            return match self.float(start, end, s, None, sign) {
843                Ok(f) => Ok(Item::float(f, Span::new(start, self.cursor as u32))),
844                Err(e) => Err(e),
845            };
846        }
847
848        Err(ParseError)
849    }
850
851    fn integer_decimal(
852        &mut self,
853        bytes: &'de [u8],
854        span: Span,
855        sign: u8,
856    ) -> Result<Item<'de>, ParseError> {
857        let mut acc: u64 = 0;
858        let mut prev_underscore = false;
859        let mut has_digit = false;
860        let mut leading_zero = false;
861        let negative = sign == 0;
862        'error: {
863            for &b in bytes {
864                if b == b'_' {
865                    if !has_digit || prev_underscore {
866                        break 'error;
867                    }
868                    prev_underscore = true;
869                    continue;
870                }
871                if !b.is_ascii_digit() {
872                    break 'error;
873                }
874                if leading_zero {
875                    break 'error;
876                }
877                if !has_digit && b == b'0' {
878                    leading_zero = true;
879                }
880                has_digit = true;
881                prev_underscore = false;
882                let digit = (b - b'0') as u64;
883                acc = match acc.checked_mul(10).and_then(|a| a.checked_add(digit)) {
884                    Some(v) => v,
885                    None => break 'error,
886                };
887            }
888
889            if !has_digit || prev_underscore {
890                break 'error;
891            }
892
893            let max = if negative {
894                (i64::MAX as u64) + 1
895            } else {
896                i64::MAX as u64
897            };
898            if acc > max {
899                break 'error;
900            }
901
902            let val = if negative {
903                (acc as i64).wrapping_neg()
904            } else {
905                acc as i64
906            };
907            return Ok(Item::integer(val, span));
908        }
909        self.error_span = span;
910        self.error_kind = Some(ErrorKind::InvalidNumber);
911        Err(ParseError)
912    }
913
914    fn integer_hex(&mut self, bytes: &'de [u8], span: Span) -> Result<Item<'de>, ParseError> {
915        let mut acc: u64 = 0;
916        let mut prev_underscore = false;
917        let mut has_digit = false;
918        'error: {
919            if bytes.is_empty() {
920                break 'error;
921            }
922
923            for &b in bytes {
924                if b == b'_' {
925                    if !has_digit || prev_underscore {
926                        break 'error;
927                    }
928                    prev_underscore = true;
929                    continue;
930                }
931                let digit = HEX[b as usize];
932                if digit < 0 {
933                    break 'error;
934                }
935                has_digit = true;
936                prev_underscore = false;
937                if acc >> 60 != 0 {
938                    break 'error;
939                }
940                acc = (acc << 4) | digit as u64;
941            }
942
943            if !has_digit || prev_underscore {
944                break 'error;
945            }
946
947            if acc > i64::MAX as u64 {
948                break 'error;
949            }
950            return Ok(Item::integer(acc as i64, span));
951        }
952        self.error_span = span;
953        self.error_kind = Some(ErrorKind::InvalidNumber);
954        Err(ParseError)
955    }
956
957    fn integer_octal(&mut self, bytes: &'de [u8], span: Span) -> Result<Item<'de>, ParseError> {
958        let mut acc: u64 = 0;
959        let mut prev_underscore = false;
960        let mut has_digit = false;
961        'error: {
962            if bytes.is_empty() {
963                break 'error;
964            }
965
966            for &b in bytes {
967                if b == b'_' {
968                    if !has_digit || prev_underscore {
969                        break 'error;
970                    }
971                    prev_underscore = true;
972                    continue;
973                }
974                if !b.is_ascii_digit() || b > b'7' {
975                    break 'error;
976                }
977                has_digit = true;
978                prev_underscore = false;
979                if acc >> 61 != 0 {
980                    break 'error;
981                }
982                acc = (acc << 3) | (b - b'0') as u64;
983            }
984
985            if !has_digit || prev_underscore {
986                break 'error;
987            }
988
989            if acc > i64::MAX as u64 {
990                break 'error;
991            }
992            return Ok(Item::integer(acc as i64, span));
993        }
994        self.error_span = span;
995        self.error_kind = Some(ErrorKind::InvalidNumber);
996        Err(ParseError)
997    }
998
999    fn integer_binary(&mut self, bytes: &'de [u8], span: Span) -> Result<Item<'de>, ParseError> {
1000        let mut acc: u64 = 0;
1001        let mut prev_underscore = false;
1002        let mut has_digit = false;
1003        'error: {
1004            if bytes.is_empty() {
1005                break 'error;
1006            }
1007
1008            for &b in bytes {
1009                if b == b'_' {
1010                    if !has_digit || prev_underscore {
1011                        break 'error;
1012                    }
1013                    prev_underscore = true;
1014                    continue;
1015                }
1016                if b != b'0' && b != b'1' {
1017                    break 'error;
1018                }
1019                has_digit = true;
1020                prev_underscore = false;
1021                if acc >> 63 != 0 {
1022                    break 'error;
1023                }
1024                acc = (acc << 1) | (b - b'0') as u64;
1025            }
1026
1027            if !has_digit || prev_underscore {
1028                break 'error;
1029            }
1030
1031            if acc > i64::MAX as u64 {
1032                break 'error;
1033            }
1034            return Ok(Item::integer(acc as i64, span));
1035        }
1036        self.error_span = span;
1037        self.error_kind = Some(ErrorKind::InvalidNumber);
1038        Err(ParseError)
1039    }
1040
1041    fn float(
1042        &mut self,
1043        start: u32,
1044        end: u32,
1045        s: &'de str,
1046        after_decimal: Option<&'de str>,
1047        sign: u8,
1048    ) -> Result<f64, ParseError> {
1049        let s_start = start as usize;
1050        let s_end = end as usize;
1051
1052        // TOML forbids leading zeros in the integer part (e.g. 00.5, -01.0).
1053        if let [b'0', b'0'..=b'9' | b'_', ..] = s.as_bytes() {
1054            return Err(self.set_error(s_start, Some(s_end), ErrorKind::InvalidNumber));
1055        }
1056
1057        // Safety: no other Scratch or arena.alloc() is active during float parsing.
1058        let mut scratch = unsafe { self.arena.scratch() };
1059
1060        if sign == 0 {
1061            scratch.push(b'-');
1062        }
1063        if !scratch.push_strip_underscores(s.as_bytes()) {
1064            return Err(self.set_error(s_start, Some(s_end), ErrorKind::InvalidNumber));
1065        }
1066
1067        let mut last = s;
1068
1069        if let Some(after) = after_decimal {
1070            if !matches!(after.as_bytes().first(), Some(b'0'..=b'9')) {
1071                return Err(self.set_error(s_start, Some(s_end), ErrorKind::InvalidNumber));
1072            }
1073            scratch.push(b'.');
1074            if !scratch.push_strip_underscores(after.as_bytes()) {
1075                return Err(self.set_error(s_start, Some(s_end), ErrorKind::InvalidNumber));
1076            }
1077            last = after;
1078        }
1079
1080        // When the last keylike token ends with e/E, the '+' and exponent
1081        // digits are separate tokens in the stream ('-' IS keylike so
1082        // e.g. "1e-5" stays in one token and needs no special handling).
1083        if matches!(last.as_bytes().last(), Some(b'e' | b'E')) {
1084            self.eat_byte(b'+');
1085            match self.peek_byte() {
1086                Some(b) if is_keylike_byte(b) && b != b'-' => {
1087                    let next = self.read_keylike();
1088                    if !scratch.push_strip_underscores(next.as_bytes()) {
1089                        return Err(self.set_error(s_start, Some(s_end), ErrorKind::InvalidNumber));
1090                    }
1091                }
1092                _ => {
1093                    return Err(self.set_error(s_start, Some(s_end), ErrorKind::InvalidNumber));
1094                }
1095            }
1096        }
1097
1098        // Scratch is not committed — arena pointer stays unchanged, space is
1099        // reused by subsequent allocations.
1100        // SAFETY: scratch contains only ASCII digits, signs, dots, and 'e'/'E'
1101        // copied from validated input via push_strip_underscores.
1102        let n: f64 = match unsafe { std::str::from_utf8_unchecked(scratch.as_bytes()) }.parse() {
1103            Ok(n) => n,
1104            Err(_) => {
1105                return Err(self.set_error(s_start, Some(s_end), ErrorKind::InvalidNumber));
1106            }
1107        };
1108        if n.is_finite() {
1109            Ok(n)
1110        } else {
1111            Err(self.set_error(s_start, Some(s_end), ErrorKind::InvalidNumber))
1112        }
1113    }
1114
1115    fn value(&mut self, depth_remaining: i16) -> Result<Item<'de>, ParseError> {
1116        let at = self.cursor;
1117        let Some(byte) = self.peek_byte() else {
1118            return Err(self.set_error(self.bytes.len(), None, ErrorKind::UnexpectedEof));
1119        };
1120        let sign = match byte {
1121            b'"' | b'\'' => {
1122                self.cursor += 1;
1123                return match self.read_string(self.cursor - 1, byte) {
1124                    Ok((key, _)) => Ok(Item::string(key.name, key.span)),
1125                    Err(e) => Err(e),
1126                };
1127            }
1128            b'{' => {
1129                let start = self.cursor as u32;
1130                self.cursor += 1;
1131                let mut table = crate::table::InnerTable::new();
1132                if let Err(err) = self.inline_table_contents(&mut table, depth_remaining - 1) {
1133                    return Err(err);
1134                }
1135                return Ok(Item::table_frozen(
1136                    table,
1137                    Span::new(start, self.cursor as u32),
1138                ));
1139            }
1140            b'[' => {
1141                let start = self.cursor as u32;
1142                self.cursor += 1;
1143                let mut arr = value::Array::new();
1144                if let Err(err) = self.array_contents(&mut arr, depth_remaining - 1) {
1145                    return Err(err);
1146                };
1147                return Ok(Item::array(arr, Span::new(start, self.cursor as u32)));
1148            }
1149            b't' => {
1150                return if self.bytes[self.cursor..].starts_with(b"true") {
1151                    self.cursor += 4;
1152                    Ok(Item::boolean(
1153                        true,
1154                        Span::new(at as u32, self.cursor as u32),
1155                    ))
1156                } else {
1157                    Err(self.set_error(
1158                        at,
1159                        Some(self.cursor),
1160                        ErrorKind::Wanted {
1161                            expected: "the literal `true`",
1162                            found: "something else",
1163                        },
1164                    ))
1165                };
1166            }
1167            b'f' => {
1168                self.cursor += 1;
1169                return if self.bytes[self.cursor..].starts_with(b"alse") {
1170                    self.cursor += 4;
1171                    Ok(Item::boolean(
1172                        false,
1173                        Span::new(at as u32, self.cursor as u32),
1174                    ))
1175                } else {
1176                    Err(self.set_error(
1177                        at,
1178                        Some(self.cursor),
1179                        ErrorKind::Wanted {
1180                            expected: "the literal `false`",
1181                            found: "something else",
1182                        },
1183                    ))
1184                };
1185            }
1186            b'-' => {
1187                self.cursor += 1;
1188                0
1189            }
1190            b'+' => {
1191                self.cursor += 1;
1192                1
1193            }
1194            _ => 2,
1195        };
1196
1197        let key = self.read_keylike();
1198
1199        let end = self.cursor as u32;
1200        match key {
1201            "inf" => {
1202                return Ok(Item::float(
1203                    if sign != 0 {
1204                        f64::INFINITY
1205                    } else {
1206                        f64::NEG_INFINITY
1207                    },
1208                    Span::new(at as u32, end),
1209                ));
1210            }
1211            "nan" => {
1212                return Ok(Item::float(
1213                    if sign != 0 {
1214                        f64::NAN.copysign(1.0)
1215                    } else {
1216                        f64::NAN.copysign(-1.0)
1217                    },
1218                    Span::new(at as u32, end),
1219                ));
1220            }
1221            _ => (),
1222        }
1223
1224        if let [b'0'..=b'9', ..] = key.as_bytes() {
1225            self.number(at as u32, end, key, sign)
1226        } else if byte == b'\r' {
1227            Err(self.set_error(at, None, ErrorKind::Unexpected('\r')))
1228        } else {
1229            Err(self.set_error(at, Some(self.cursor), ErrorKind::InvalidNumber))
1230        }
1231    }
1232
1233    fn inline_table_contents(
1234        &mut self,
1235        out: &mut crate::table::InnerTable<'de>,
1236        depth_remaining: i16,
1237    ) -> Result<(), ParseError> {
1238        if depth_remaining < 0 {
1239            return Err(self.set_error(
1240                self.cursor,
1241                None,
1242                ErrorKind::OutOfRange("Max recursion depth exceeded"),
1243            ));
1244        }
1245        if let Err(e) = self.eat_inline_table_whitespace() {
1246            return Err(e);
1247        }
1248        if self.eat_byte(b'}') {
1249            return Ok(());
1250        }
1251        loop {
1252            let mut table_ref: &mut crate::table::InnerTable<'de> = &mut *out;
1253            let mut key = match self.read_table_key() {
1254                Ok(k) => k,
1255                Err(e) => return Err(e),
1256            };
1257            self.eat_whitespace();
1258            while self.eat_byte(b'.') {
1259                self.eat_whitespace();
1260                table_ref = match self.navigate_dotted_key(table_ref, key) {
1261                    Ok(t) => t,
1262                    Err(e) => return Err(e),
1263                };
1264                key = match self.read_table_key() {
1265                    Ok(k) => k,
1266                    Err(e) => return Err(e),
1267                };
1268                self.eat_whitespace();
1269            }
1270            if let Err(e) = self.eat_inline_table_whitespace() {
1271                return Err(e);
1272            }
1273            if let Err(e) = self.expect_byte(b'=') {
1274                return Err(e);
1275            }
1276            if let Err(e) = self.eat_inline_table_whitespace() {
1277                return Err(e);
1278            }
1279            {
1280                let val = match self.value(depth_remaining) {
1281                    Ok(v) => v,
1282                    Err(e) => return Err(e),
1283                };
1284                if let Err(e) = self.insert_value(table_ref, key, val) {
1285                    return Err(e);
1286                }
1287            }
1288
1289            if let Err(e) = self.eat_inline_table_whitespace() {
1290                return Err(e);
1291            }
1292            if self.eat_byte(b'}') {
1293                return Ok(());
1294            }
1295            if let Err(e) = self.expect_byte(b',') {
1296                return Err(e);
1297            }
1298            if let Err(e) = self.eat_inline_table_whitespace() {
1299                return Err(e);
1300            }
1301            if self.eat_byte(b'}') {
1302                return Ok(());
1303            }
1304        }
1305    }
1306
1307    fn array_contents(
1308        &mut self,
1309        out: &mut value::Array<'de>,
1310        depth_remaining: i16,
1311    ) -> Result<(), ParseError> {
1312        if depth_remaining < 0 {
1313            return Err(self.set_error(
1314                self.cursor,
1315                None,
1316                ErrorKind::OutOfRange("Max recursion depth exceeded"),
1317            ));
1318        }
1319        loop {
1320            if let Err(e) = self.eat_intermediate() {
1321                return Err(e);
1322            }
1323            if self.eat_byte(b']') {
1324                return Ok(());
1325            }
1326            match self.value(depth_remaining) {
1327                Ok(value) => out.push(value, self.arena),
1328                Err(e) => return Err(e),
1329            };
1330            if let Err(e) = self.eat_intermediate() {
1331                return Err(e);
1332            }
1333            if !self.eat_byte(b',') {
1334                break;
1335            }
1336        }
1337        if let Err(e) = self.eat_intermediate() {
1338            return Err(e);
1339        }
1340        self.expect_byte(b']')
1341    }
1342
1343    #[inline(always)]
1344    fn eat_inline_table_whitespace(&mut self) -> Result<(), ParseError> {
1345        loop {
1346            match self.peek_byte() {
1347                Some(b' ' | b'\t' | b'\n') => self.cursor += 1,
1348                Some(b'\r') if self.peek_byte_at(1) == Some(b'\n') => self.cursor += 2,
1349                Some(b'#') => match self.eat_comment() {
1350                    Ok(_) => {}
1351                    Err(e) => return Err(e),
1352                },
1353                _ => return Ok(()),
1354            }
1355        }
1356    }
1357
1358    #[inline(always)]
1359    fn eat_intermediate(&mut self) -> Result<(), ParseError> {
1360        loop {
1361            match self.peek_byte() {
1362                Some(b' ' | b'\t' | b'\n') => self.cursor += 1,
1363                Some(b'\r') if self.peek_byte_at(1) == Some(b'\n') => self.cursor += 2,
1364                Some(b'#') => match self.eat_comment() {
1365                    Ok(_) => {}
1366                    Err(e) => return Err(e),
1367                },
1368                _ => return Ok(()),
1369            }
1370        }
1371    }
1372
1373    /// Navigate into an existing or new table for a dotted-key intermediate
1374    /// segment. Checks frozen and header bits.
1375    /// New tables are created with the `DOTTED` tag.
1376    fn navigate_dotted_key<'t>(
1377        &mut self,
1378        table: &'t mut InnerTable<'de>,
1379        key: Key<'de>,
1380    ) -> Result<&'t mut InnerTable<'de>, ParseError> {
1381        if let Some(idx) = self.indexed_find(table, key.name) {
1382            let (existing_key, value) = &mut table.entries_mut()[idx];
1383            let ok = value.is_table() && !value.is_frozen() && !value.has_header_bit();
1384
1385            if !ok {
1386                return Err(self.set_error(
1387                    key.span.start as usize,
1388                    Some(key.span.end as usize),
1389                    ErrorKind::DottedKeyInvalidType {
1390                        first: existing_key.span,
1391                    },
1392                ));
1393            }
1394            // SAFETY: is_table() verified by the guard above.
1395            unsafe { Ok(value.as_inner_table_mut_unchecked()) }
1396        } else {
1397            let span = key.span;
1398            let inserted = self.insert_value_known_to_be_unique(
1399                table,
1400                key,
1401                Item::table_dotted(InnerTable::new(), span),
1402            );
1403            // SAFETY: Item::table_dotted() produces a table-tagged item.
1404            unsafe { Ok(inserted.as_inner_table_mut_unchecked()) }
1405        }
1406    }
1407
1408    /// Navigate an intermediate segment of a table header (e.g. `a` in `[a.b.c]`).
1409    /// Creates implicit tables (no flag bits) if not found.
1410    /// Handles arrays-of-tables by navigating into the last element.
1411    ///
1412    /// Returns a `Table` view of the table navigated into.
1413    fn navigate_header_intermediate<'b>(
1414        &mut self,
1415        st: &'b mut Table<'de>,
1416        key: Key<'de>,
1417    ) -> Result<&'b mut Table<'de>, ParseError> {
1418        let table = &mut st.value;
1419
1420        if let Some(idx) = self.indexed_find(table, key.name) {
1421            let (existing_key, existing) = &mut table.entries_mut()[idx];
1422            let existing_span = existing_key.span;
1423
1424            // Note: I would use safey accessor heres but that would cause issues
1425            // with NLL limitations.
1426            if existing.is_table() {
1427                if existing.is_frozen() {
1428                    return Err(self.set_duplicate_key_error(existing_span, key.span, key.name));
1429                }
1430                // SAFETY: is_table() verified by the preceding check.
1431                unsafe { Ok(existing.as_table_mut_unchecked()) }
1432            } else if existing.is_aot() {
1433                // unwrap is safe since we just check it's an array of tables and thus a array.
1434                let arr = existing.as_array_mut().unwrap();
1435                // unwrap is safe as array's of tables always have atleast one value by construction
1436                let last = arr.last_mut().unwrap();
1437                if !last.is_table() {
1438                    return Err(self.set_duplicate_key_error(existing_span, key.span, key.name));
1439                }
1440                // SAFETY: last.is_table() verified by the preceding check.
1441                unsafe { Ok(last.as_table_mut_unchecked()) }
1442            } else {
1443                Err(self.set_duplicate_key_error(existing_span, key.span, key.name))
1444            }
1445        } else {
1446            let span = key.span;
1447            let inserted = self.insert_value_known_to_be_unique(
1448                table,
1449                key,
1450                Item::table(InnerTable::new(), span),
1451            );
1452            // SAFETY: Item::table() produces a table-tagged item.
1453            unsafe { Ok(inserted.as_table_mut_unchecked()) }
1454        }
1455    }
1456    fn insert_value_known_to_be_unique<'t>(
1457        &mut self,
1458        table: &'t mut InnerTable<'de>,
1459        key: Key<'de>,
1460        item: Item<'de>,
1461    ) -> &'t mut value::Item<'de> {
1462        let len = table.len();
1463        if len >= INDEXED_TABLE_THRESHOLD {
1464            // SAFETY: len >= INDEXED_TABLE_THRESHOLD (>= 6), so the table is non-empty.
1465            let table_id = unsafe { table.first_key_span_start_unchecked() };
1466            if len == INDEXED_TABLE_THRESHOLD {
1467                for (i, (key, _)) in table.entries().iter().enumerate() {
1468                    self.index.insert(KeyRef::new(key.as_str(), table_id), i);
1469                }
1470            }
1471            self.index.insert(KeyRef::new(key.as_str(), table_id), len);
1472        }
1473        &mut table.insert(key, item, self.arena).1
1474    }
1475
1476    /// Handle the final segment of a standard table header `[a.b.c]`.
1477    ///
1478    /// Returns the [`Ctx`] for the table that subsequent key-value pairs
1479    /// should be inserted into.
1480    fn navigate_header_table_final<'b>(
1481        &mut self,
1482        st: &'b mut Table<'de>,
1483        key: Key<'de>,
1484        header_start: u32,
1485        header_end: u32,
1486    ) -> Result<Ctx<'b, 'de>, ParseError> {
1487        let table = &mut st.value;
1488
1489        if let Some(idx) = self.indexed_find(table, key.name) {
1490            let (existing_key, existing) = &mut table.entries_mut()[idx];
1491            let first_key_span = existing_key.span;
1492
1493            if !existing.is_table() || existing.is_frozen() {
1494                return Err(self.set_duplicate_key_error(first_key_span, key.span, key.name));
1495            }
1496            if existing.has_header_bit() {
1497                return Err(self.set_error(
1498                    header_start as usize,
1499                    Some(header_end as usize),
1500                    ErrorKind::DuplicateTable {
1501                        name: String::from(key.name),
1502                        first: existing.span(),
1503                    },
1504                ));
1505            }
1506            if existing.has_dotted_bit() {
1507                return Err(self.set_duplicate_key_error(first_key_span, key.span, key.name));
1508            }
1509            // SAFETY: is_table() verified by the preceding checks.
1510            let table = unsafe { existing.as_table_mut_unchecked() };
1511            table.set_header_flag();
1512            table.set_span_start(header_start);
1513            table.set_span_end(header_end);
1514            Ok(Ctx {
1515                table,
1516                array_end_span: None,
1517            })
1518        } else {
1519            let inserted = self.insert_value_known_to_be_unique(
1520                table,
1521                key,
1522                Item::table_header(InnerTable::new(), Span::new(header_start, header_end)),
1523            );
1524            Ok(Ctx {
1525                // SAFETY: Item::table_header() produces a table-tagged item.
1526                table: unsafe { inserted.as_table_mut_unchecked() },
1527                array_end_span: None,
1528            })
1529        }
1530    }
1531
1532    /// Handle the final segment of an array-of-tables header `[[a.b.c]]`.
1533    ///
1534    /// Returns the [`Ctx`] for the new table entry that subsequent key-value
1535    /// pairs should be inserted into.
1536    fn navigate_header_array_final<'b>(
1537        &mut self,
1538        st: &'b mut Table<'de>,
1539        key: Key<'de>,
1540        header_start: u32,
1541        header_end: u32,
1542    ) -> Result<Ctx<'b, 'de>, ParseError> {
1543        let table = &mut st.value;
1544
1545        if let Some(idx) = self.indexed_find(table, key.name) {
1546            let (existing_key, existing) = &mut table.entries_mut()[idx];
1547            let first_key_span = existing_key.span;
1548
1549            if existing.is_aot() {
1550                // SAFETY: is_aot verified by the preceding check, which implies is_array().
1551                let (end_flag, arr) = unsafe { existing.split_array_end_flag() };
1552                let entry_span = Span::new(header_start, header_end);
1553                arr.push(
1554                    Item::table_header(InnerTable::new(), entry_span),
1555                    self.arena,
1556                );
1557                let entry = arr.last_mut().unwrap();
1558                Ok(Ctx {
1559                    // SAFETY: Item::table_header() produces a table-tagged item.
1560                    table: unsafe { entry.as_table_mut_unchecked() },
1561                    array_end_span: Some(end_flag),
1562                })
1563            } else if existing.is_table() {
1564                Err(self.set_error(
1565                    header_start as usize,
1566                    Some(header_end as usize),
1567                    ErrorKind::RedefineAsArray,
1568                ))
1569            } else {
1570                Err(self.set_duplicate_key_error(first_key_span, key.span, key.name))
1571            }
1572        } else {
1573            let entry_span = Span::new(header_start, header_end);
1574            let first_entry = Item::table_header(InnerTable::new(), entry_span);
1575            let array_span = Span::new(header_start, header_end);
1576            let array_val = Item::array_aot(
1577                value::Array::with_single(first_entry, self.arena),
1578                array_span,
1579            );
1580            let inserted = self.insert_value_known_to_be_unique(table, key, array_val);
1581            // SAFETY: Item::array_aot() produces an array-tagged item.
1582            let (end_flag, arr) = unsafe { inserted.split_array_end_flag() };
1583            let entry = arr.last_mut().unwrap();
1584            Ok(Ctx {
1585                // SAFETY: Item::table_header() (used in with_single) produces a table-tagged item.
1586                table: unsafe { entry.as_table_mut_unchecked() },
1587                array_end_span: Some(end_flag),
1588            })
1589        }
1590    }
1591
1592    /// Insert a value into a table, checking for duplicates.
1593    fn insert_value(
1594        &mut self,
1595        table: &mut InnerTable<'de>,
1596        key: Key<'de>,
1597        item: Item<'de>,
1598    ) -> Result<(), ParseError> {
1599        if table.len() < INDEXED_TABLE_THRESHOLD {
1600            for (existing_key, _) in table.entries() {
1601                if existing_key.as_str() == key.name {
1602                    return Err(self.set_duplicate_key_error(
1603                        existing_key.span,
1604                        key.span,
1605                        key.name,
1606                    ));
1607                }
1608            }
1609            table.insert(key, item, self.arena);
1610            return Ok(());
1611        }
1612        // SAFETY: len >= INDEXED_TABLE_THRESHOLD (>= 6), so the table is non-empty.
1613        let table_id = unsafe { table.first_key_span_start_unchecked() };
1614
1615        // Note: if find a duplicate we bail out, terminating the parsing with an error.
1616        // Even if we did end up re-inserting no issues would come of it.
1617        if table.len() == INDEXED_TABLE_THRESHOLD {
1618            for (i, (key, _)) in table.entries().iter().enumerate() {
1619                // Wish I could use insert_unique here but that would require
1620                // pulling in hashbrown :(
1621                self.index.insert(KeyRef::new(key.as_str(), table_id), i);
1622            }
1623        }
1624
1625        match self.index.entry(KeyRef::new(key.as_str(), table_id)) {
1626            std::collections::hash_map::Entry::Occupied(occupied_entry) => {
1627                let idx = *occupied_entry.get();
1628                let (existing_key, _) = &table.entries()[idx];
1629                Err(self.set_duplicate_key_error(existing_key.span, key.span, key.name))
1630            }
1631            std::collections::hash_map::Entry::Vacant(vacant_entry) => {
1632                vacant_entry.insert(table.len());
1633                table.insert(key, item, self.arena);
1634                Ok(())
1635            }
1636        }
1637    }
1638
1639    /// Look up a key name in a table, returning its entry index.
1640    /// Uses the hash index for tables at or above the threshold, otherwise
1641    /// falls back to a linear scan.
1642    fn indexed_find(&self, table: &InnerTable<'de>, name: &str) -> Option<usize> {
1643        // NOTE: I would return a reference to actual entry here, however this
1644        // runs into all sorts of NLL limitations.
1645        if table.len() > INDEXED_TABLE_THRESHOLD {
1646            // SAFETY: len > INDEXED_TABLE_THRESHOLD (> 6), so the table is non-empty.
1647            let first_key_span = unsafe { table.first_key_span_start_unchecked() };
1648            self.index.get(&KeyRef::new(name, first_key_span)).copied()
1649        } else {
1650            table.find_index(name)
1651        }
1652    }
1653
1654    fn parse_document(&mut self, root_st: &mut Table<'de>) -> Result<(), ParseError> {
1655        let mut ctx = Ctx {
1656            table: root_st,
1657            array_end_span: None,
1658        };
1659
1660        loop {
1661            self.eat_whitespace();
1662            match self.eat_comment() {
1663                Ok(true) => continue,
1664                Ok(false) => {}
1665                Err(e) => return Err(e),
1666            }
1667            if self.eat_newline() {
1668                continue;
1669            }
1670
1671            match self.peek_byte() {
1672                None => break,
1673                Some(b'[') => {
1674                    ctx = match self.process_table_header(root_st) {
1675                        Ok(c) => c,
1676                        Err(e) => return Err(e),
1677                    };
1678                }
1679                Some(b'\r') => {
1680                    return Err(self.set_error(self.cursor, None, ErrorKind::Unexpected('\r')));
1681                }
1682                Some(_) => {
1683                    if let Err(e) = self.process_key_value(&mut ctx) {
1684                        return Err(e);
1685                    }
1686                }
1687            }
1688        }
1689        Ok(())
1690    }
1691
1692    fn process_table_header<'b>(
1693        &mut self,
1694        root_st: &'b mut Table<'de>,
1695    ) -> Result<Ctx<'b, 'de>, ParseError> {
1696        let header_start = self.cursor as u32;
1697        if let Err(e) = self.expect_byte(b'[') {
1698            return Err(e);
1699        }
1700        let is_array = self.eat_byte(b'[');
1701
1702        let mut current = root_st;
1703
1704        self.eat_whitespace();
1705        let mut key = match self.read_table_key() {
1706            Ok(k) => k,
1707            Err(e) => return Err(e),
1708        };
1709        loop {
1710            if self.eat_whitespace_to() == Some(b'.') {
1711                self.cursor += 1;
1712                self.eat_whitespace();
1713                current = match self.navigate_header_intermediate(current, key) {
1714                    Ok(p) => p,
1715                    Err(e) => return Err(e),
1716                };
1717                key = match self.read_table_key() {
1718                    Ok(k) => k,
1719                    Err(e) => return Err(e),
1720                };
1721            } else {
1722                break;
1723            }
1724        }
1725        if let Err(e) = self.expect_byte(b']') {
1726            return Err(e);
1727        }
1728        if is_array && let Err(e) = self.expect_byte(b']') {
1729            return Err(e);
1730        }
1731
1732        self.eat_whitespace();
1733        match self.eat_comment() {
1734            Ok(true) => {}
1735            Ok(false) => {
1736                if let Err(e) = self.eat_newline_or_eof() {
1737                    return Err(e);
1738                }
1739            }
1740            Err(e) => return Err(e),
1741        }
1742        let header_end = self.cursor as u32;
1743
1744        if is_array {
1745            self.navigate_header_array_final(current, key, header_start, header_end)
1746        } else {
1747            self.navigate_header_table_final(current, key, header_start, header_end)
1748        }
1749    }
1750
1751    fn process_key_value(&mut self, ctx: &mut Ctx<'_, 'de>) -> Result<(), ParseError> {
1752        let line_start = self.cursor as u32;
1753        // Borrow the Table payload from the Table. NLL drops this
1754        // borrow at its last use (the insert_value call), freeing ctx.st
1755        // for the span updates that follow.
1756        let mut table_ref: &mut InnerTable<'de> = &mut ctx.table.value;
1757
1758        let mut key = match self.read_table_key() {
1759            Ok(k) => k,
1760            Err(e) => return Err(e),
1761        };
1762        self.eat_whitespace();
1763
1764        while self.eat_byte(b'.') {
1765            self.eat_whitespace();
1766            table_ref = match self.navigate_dotted_key(table_ref, key) {
1767                Ok(t) => t,
1768                Err(e) => return Err(e),
1769            };
1770            key = match self.read_table_key() {
1771                Ok(k) => k,
1772                Err(e) => return Err(e),
1773            };
1774            self.eat_whitespace();
1775        }
1776
1777        if let Err(e) = self.expect_byte(b'=') {
1778            return Err(e);
1779        }
1780        self.eat_whitespace();
1781        let val = match self.value(MAX_RECURSION_DEPTH) {
1782            Ok(v) => v,
1783            Err(e) => return Err(e),
1784        };
1785        let line_end = self.cursor as u32;
1786
1787        self.eat_whitespace();
1788        match self.eat_comment() {
1789            Ok(true) => {}
1790            Ok(false) => {
1791                if let Err(e) = self.eat_newline_or_eof() {
1792                    return Err(e);
1793                }
1794            }
1795            Err(e) => return Err(e),
1796        }
1797
1798        if let Err(e) = self.insert_value(table_ref, key, val) {
1799            return Err(e);
1800        }
1801
1802        let start = ctx.table.span_start();
1803        ctx.table.set_span_start(start.min(line_start));
1804        ctx.table.extend_span_end(line_end);
1805
1806        if let Some(end_flag) = &mut ctx.array_end_span {
1807            let old = **end_flag;
1808            let current = old >> value::FLAG_SHIFT;
1809            **end_flag = (current.max(line_end) << value::FLAG_SHIFT) | (old & value::FLAG_MASK);
1810        }
1811
1812        Ok(())
1813    }
1814}
1815
1816/// Holds both the root table and the parsing context for deserialization.
1817///
1818/// During deserialization the document tree remains immutable. Use
1819/// [`Root::into_table()`] to extract the table for mutable access.
1820pub struct Root<'de> {
1821    pub(crate) table: Table<'de>,
1822    #[cfg(feature = "deserialization")]
1823    pub ctx: crate::de::Context<'de>,
1824}
1825
1826impl<'de> Root<'de> {
1827    /// Extracts the root table for mutable access. Root is consumed.
1828    /// You can also access the root table immutably via [`Root::table()`].
1829    pub fn into_table(self) -> Table<'de> {
1830        self.table
1831    }
1832
1833    /// Converts the root table into an [`Item`] with the same span and payload.
1834    pub fn into_item(self) -> Item<'de> {
1835        self.table.into_item()
1836    }
1837
1838    /// Access the root table immutably.
1839    pub fn table(&self) -> &Table<'de> {
1840        &self.table
1841    }
1842}
1843
1844#[cfg(feature = "deserialization")]
1845impl<'de> Root<'de> {
1846    /// Create a [`TableHelper`] for the root table.
1847    pub fn helper<'ctx>(&'ctx mut self) -> TableHelper<'ctx, 'ctx, 'de> {
1848        TableHelper::new(&mut self.ctx, &self.table)
1849    }
1850
1851    /// Deserialize the root table into a typed value.
1852    pub fn deserialize<T>(&mut self) -> Result<T, Failed>
1853    where
1854        T: crate::de::Deserialize<'de>,
1855    {
1856        T::deserialize(&mut self.ctx, self.table.as_item())
1857    }
1858
1859    /// Returns the accumulated deserialization errors.
1860    pub fn errors(&self) -> &[Error] {
1861        &self.ctx.errors
1862    }
1863
1864    /// Returns `true` if any deserialization errors have been recorded.
1865    pub fn has_errors(&self) -> bool {
1866        !self.ctx.errors.is_empty()
1867    }
1868}
1869
1870impl<'de> std::ops::Index<&str> for Root<'de> {
1871    type Output = MaybeItem<'de>;
1872
1873    fn index(&self, key: &str) -> &Self::Output {
1874        &self.table[key]
1875    }
1876}
1877
1878#[cfg(feature = "serde")]
1879impl serde::Serialize for Root<'_> {
1880    fn serialize<S>(&self, ser: S) -> Result<S::Ok, S::Error>
1881    where
1882        S: serde::Serializer,
1883    {
1884        self.table.serialize(ser)
1885    }
1886}
1887
1888impl std::fmt::Debug for Root<'_> {
1889    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1890        self.table.fmt(f)
1891    }
1892}
1893
1894/// Parses a TOML document and returns a [`Root`] containing the parsed tree.
1895///
1896/// Both `s` and `arena` must outlive the returned [`Root`] because parsed
1897/// values borrow directly from the input string and allocate escaped strings
1898/// into the arena.
1899///
1900/// # Errors
1901///
1902/// Returns an [`Error`] on the first syntax error encountered.
1903///
1904/// # Examples
1905///
1906/// ```
1907/// let arena = toml_spanner::Arena::new();
1908/// let root = toml_spanner::parse("key = 'value'", &arena)?;
1909/// assert_eq!(root["key"].as_str(), Some("value"));
1910/// # Ok::<(), toml_spanner::Error>(())
1911/// ```
1912#[inline(never)]
1913pub fn parse<'de>(document: &'de str, arena: &'de Arena) -> Result<Root<'de>, Error> {
1914    // Tag bits use the low 3 bits of start_and_tag, limiting span.start to
1915    // 29 bits (512 MiB). The flag state uses the low 3 bits of end_and_flag,
1916    // limiting span.end to 29 bits (512 MiB).
1917    const MAX_SIZE: usize = (1u32 << 29) as usize;
1918
1919    if document.len() >= MAX_SIZE {
1920        return Err(Error {
1921            kind: ErrorKind::FileTooLarge,
1922            span: Span::new(0, 0),
1923        });
1924    }
1925
1926    let mut root_st = Table::new(Span::new(0, document.len() as u32));
1927    let mut parser = Parser::new(document, arena);
1928    match parser.parse_document(&mut root_st) {
1929        Ok(()) => {}
1930        Err(_) => return Err(parser.take_error()),
1931    }
1932    // Note that root is about the drop (but doesn't implement drop), so we can take
1933    // ownership of this table.
1934    // todo don't do this
1935    Ok(Root {
1936        table: root_st,
1937        #[cfg(feature = "deserialization")]
1938        ctx: crate::de::Context {
1939            errors: Vec::new(),
1940            index: parser.index,
1941            arena,
1942        },
1943    })
1944}
1945
1946#[inline]
1947fn is_keylike_byte(b: u8) -> bool {
1948    b.is_ascii_alphanumeric() || b == b'-' || b == b'_'
1949}
1950
1951fn byte_describe(b: u8) -> &'static str {
1952    match b {
1953        b'\n' => "a newline",
1954        b' ' | b'\t' => "whitespace",
1955        b'=' => "an equals",
1956        b'.' => "a period",
1957        b',' => "a comma",
1958        b':' => "a colon",
1959        b'+' => "a plus",
1960        b'{' => "a left brace",
1961        b'}' => "a right brace",
1962        b'[' => "a left bracket",
1963        b']' => "a right bracket",
1964        b'\'' | b'"' => "a string",
1965        _ if is_keylike_byte(b) => "an identifier",
1966        _ => "a character",
1967    }
1968}