Skip to main content

toml_spanner/
parser.rs

1// Deliberately avoid `?` operator throughout this module for compile-time
2// performance: explicit match/if-let prevents the compiler from generating
3// From::from conversion and drop-glue machinery at every call site.
4
5#[cfg(test)]
6#[path = "./parser_tests.rs"]
7mod tests;
8
9#[cfg(feature = "from-toml")]
10use crate::de::TableHelper;
11use crate::{
12    Failed, MaybeItem, Span,
13    arena::Arena,
14    error::{Error, ErrorKind, PathComponent},
15    item::{
16        self, Item, Key,
17        table::{InnerTable, Table},
18    },
19    time::DateTime,
20};
21use std::char;
22use std::hash::{Hash, Hasher};
23use std::ptr::NonNull;
24
25const MAX_RECURSION_DEPTH: i16 = 256;
26
27struct Ctx<'b, 'de> {
28    /// The current table context.
29    table: &'b mut Table<'de>,
30    /// If this table is an entry in an array-of-tables, a disjoint borrow of
31    /// the parent array Value'arena `end_and_flag` field so its span can be
32    /// extended alongside the entry.
33    array_end_span: Option<&'b mut u32>,
34}
35
36/// Tables with at least this many entries use the hash index for lookups.
37/// Note: Looking purely at parsing benchmarks you might be inclined to raise
38///  this value higher, however the same index is then used during deserialization
39///  where the loss of initializing the index is recouped.
40pub(crate) const INDEXED_TABLE_THRESHOLD: usize = 6;
41
42const fn build_hex_table() -> [i8; 256] {
43    let mut table = [-1i8; 256];
44    let mut ch = 0usize;
45    while ch < 256 {
46        table[ch] = match ch as u8 {
47            b'0'..=b'9' => (ch as u8 - b'0') as i8,
48            b'A'..=b'F' => (ch as u8 - b'A' + 10) as i8,
49            b'a'..=b'f' => (ch as u8 - b'a' + 10) as i8,
50            _ => -1,
51        };
52        ch += 1;
53    }
54    table
55}
56
57static HEX: [i8; 256] = build_hex_table();
58
59/// Hash-map key that identifies a (table, key-name) pair without owning the
60/// string data.  The raw `key_ptr`/`len` point into either the input buffer
61/// or the arena; both are stable for the lifetime of the parse.
62/// `first_key_span` is the `span.start()` of the **first** key ever inserted
63/// into the table and serves as a cheap, collision-free table discriminator.
64pub(crate) struct KeyRef<'de> {
65    key_ptr: NonNull<u8>,
66    len: u32,
67    first_key_span: u32,
68    marker: std::marker::PhantomData<&'de str>,
69}
70
71impl<'de> KeyRef<'de> {
72    #[inline]
73    pub(crate) fn new(key: &'de str, first_key_span: u32) -> Self {
74        KeyRef {
75            // SAFETY: str::as_ptr() is guaranteed non-null.
76            key_ptr: unsafe { NonNull::new_unchecked(key.as_ptr() as *mut u8) },
77            len: key.len() as u32,
78            first_key_span,
79            marker: std::marker::PhantomData,
80        }
81    }
82}
83
84impl<'de> KeyRef<'de> {
85    #[inline]
86    fn as_str(&self) -> &'de str {
87        // SAFETY: key_ptr and len were captured from a valid &'de str in new().
88        // The PhantomData<&'de str> ensures the borrow is live.
89        unsafe {
90            std::str::from_utf8_unchecked(std::slice::from_raw_parts(
91                self.key_ptr.as_ptr(),
92                self.len as usize,
93            ))
94        }
95    }
96}
97
98impl<'de> Hash for KeyRef<'de> {
99    #[inline]
100    fn hash<H: Hasher>(&self, state: &mut H) {
101        self.first_key_span.hash(state);
102        // Note: KeyRef is meant only beused inside the Index where it's
103        // the KeyRef is entirety of the Hash Input so we don't have to
104        // worry about prefix freedom.
105        self.as_str().hash(state);
106    }
107}
108
109impl<'de> PartialEq for KeyRef<'de> {
110    #[inline]
111    fn eq(&self, other: &Self) -> bool {
112        self.first_key_span == other.first_key_span && self.as_str() == other.as_str()
113    }
114}
115
116impl<'de> Eq for KeyRef<'de> {}
117
118struct Parser<'de> {
119    /// Raw bytes of the input. Always valid UTF-8 (derived from `&str`).
120    bytes: &'de [u8],
121    cursor: usize,
122    arena: &'de Arena,
123
124    // Error context, populated just before returning Failed
125    error_span: Span,
126    error_kind: Option<ErrorKind<'static>>,
127
128    // TOML path tracking for error context (zero-cost on happy path)
129    path: [PathComponent<'de>; 16],
130    path_len: u8,
131
132    // Global key-index for O(1) lookups in large tables.
133    // Maps (table-discriminator, key-name) → entry index in the table.
134    index: foldhash::HashMap<KeyRef<'de>, usize>,
135
136    // Recovery mode: when true, parse errors are accumulated instead of
137    // immediately returned, and parsing continues from the next line.
138    recovering: bool,
139    errors: Vec<Error>,
140}
141
142impl<'de> Parser<'de> {
143    fn new(input: &'de str, arena: &'de Arena) -> Self {
144        let bytes = input.as_bytes();
145        // Skip UTF-8 BOM (U+FEFF = EF BB BF) if present at the start.
146        let cursor = if bytes.starts_with(b"\xef\xbb\xbf") {
147            3
148        } else {
149            0
150        };
151        Parser {
152            bytes,
153            cursor,
154            arena,
155            error_span: Span::new(0, 0),
156            error_kind: None,
157            path: [PathComponent::Index(0); 16],
158            path_len: 0,
159            // initialize to about ~ 8 KB
160            index: foldhash::HashMap::with_capacity_and_hasher(
161                256,
162                foldhash::fast::RandomState::default(),
163            ),
164            recovering: false,
165            errors: Vec::new(),
166        }
167    }
168
169    /// Get a `&str` slice from the underlying bytes.
170    ///
171    /// # Safety
172    ///
173    /// - `start <= end <= self.bytes.len()`.
174    /// - `start` and `end` must lie on UTF-8 character boundaries within
175    ///   `self.bytes` (which is always valid UTF-8 because it was derived
176    ///   from a `&str`).
177    #[inline]
178    unsafe fn str_slice(&self, start: usize, end: usize) -> &'de str {
179        #[cfg(not(debug_assertions))]
180        unsafe {
181            std::str::from_utf8_unchecked(&self.bytes[start..end])
182        }
183        #[cfg(debug_assertions)]
184        match std::str::from_utf8(&self.bytes[start..end]) {
185            Ok(value) => value,
186            Err(err) => panic!(
187                "Invalid UTF-8 slice: bytes[{}..{}] is not valid UTF-8: {}",
188                start, end, err
189            ),
190        }
191    }
192
193    #[inline]
194    fn push_path(&mut self, component: PathComponent<'de>) {
195        let len = self.path_len as usize;
196        if len < self.path.len() {
197            self.path[len] = component;
198        }
199        self.path_len = self.path_len.saturating_add(1);
200    }
201
202    #[cold]
203    fn build_error_path(&self) -> crate::error::MaybeTomlPath {
204        let depth = (self.path_len as usize).min(self.path.len());
205        crate::error::MaybeTomlPath::from_components(&self.path[..depth])
206    }
207
208    #[cold]
209    fn set_duplicate_key_error(&mut self, first: Span, second: Span) -> Failed {
210        self.error_span = second;
211        self.error_kind = Some(ErrorKind::DuplicateKey { first });
212        Failed
213    }
214
215    #[cold]
216    fn set_error(&mut self, start: usize, end: Option<usize>, kind: ErrorKind<'static>) -> Failed {
217        let len = self.bytes.len();
218        let start = start.min(len);
219        self.error_span = Span::new(start as u32, end.unwrap_or((start + 1).min(len)) as u32);
220        self.error_kind = Some(kind);
221        Failed
222    }
223
224    fn take_error(&mut self) -> Error {
225        let kind = self
226            .error_kind
227            .take()
228            .expect("take_error called without error");
229        let span = self.error_span;
230        let path = self.build_error_path();
231
232        // Black Magic Optimization:
233        // Removing the following introduces 8% performance
234        // regression across the board.
235        std::hint::black_box(&self.bytes.iter().enumerate().next());
236
237        Error::new_with_path(kind, span, path)
238    }
239
240    #[inline]
241    fn peek_byte(&self) -> Option<u8> {
242        self.bytes.get(self.cursor).copied()
243    }
244
245    #[inline]
246    fn peek_byte_at(&self, offset: usize) -> Option<u8> {
247        self.bytes.get(self.cursor + offset).copied()
248    }
249
250    #[inline]
251    fn eat_byte(&mut self, b: u8) -> bool {
252        if self.peek_byte() == Some(b) {
253            self.cursor += 1;
254            true
255        } else {
256            false
257        }
258    }
259    #[cold]
260    fn expected_error(&mut self, b: u8) -> Failed {
261        let start = self.cursor;
262        let (found_desc, end) = self.scan_token_desc_and_end();
263        self.set_error(
264            start,
265            Some(end),
266            ErrorKind::Wanted {
267                expected: byte_describe(b),
268                found: found_desc,
269            },
270        )
271    }
272
273    fn expect_byte(&mut self, b: u8) -> Result<(), Failed> {
274        if self.peek_byte() == Some(b) {
275            self.cursor += 1;
276            Ok(())
277        } else {
278            Err(self.expected_error(b))
279        }
280    }
281
282    fn eat_whitespace(&mut self) {
283        while let Some(b) = self.peek_byte() {
284            if b == b' ' || b == b'\t' {
285                self.cursor += 1;
286            } else {
287                break;
288            }
289        }
290    }
291
292    fn eat_whitespace_to(&mut self) -> Option<u8> {
293        while let Some(b) = self.peek_byte() {
294            if b == b' ' || b == b'\t' {
295                self.cursor += 1;
296            } else {
297                return Some(b);
298            }
299        }
300        None
301    }
302
303    fn eat_newline_or_eof(&mut self) -> Result<(), Failed> {
304        match self.peek_byte() {
305            None => Ok(()),
306            Some(b'\n') => {
307                self.cursor += 1;
308                Ok(())
309            }
310            Some(b'\r') if self.peek_byte_at(1) == Some(b'\n') => {
311                self.cursor += 2;
312                Ok(())
313            }
314            _ => {
315                let start = self.cursor;
316                let (found_desc, end) = self.scan_token_desc_and_end();
317                Err(self.set_error(
318                    start,
319                    Some(end),
320                    ErrorKind::Wanted {
321                        expected: &"newline",
322                        found: found_desc,
323                    },
324                ))
325            }
326        }
327    }
328
329    fn eat_comment(&mut self) -> Result<bool, Failed> {
330        if !self.eat_byte(b'#') {
331            return Ok(false);
332        }
333        while let Some(0x09 | 0x20..=0x7E | 0x80..) = self.peek_byte() {
334            self.cursor += 1;
335        }
336        self.eat_newline_or_eof().map(|()| true)
337    }
338
339    fn eat_newline(&mut self) -> bool {
340        match self.peek_byte() {
341            Some(b'\n') => {
342                self.cursor += 1;
343                true
344            }
345            Some(b'\r') if self.peek_byte_at(1) == Some(b'\n') => {
346                self.cursor += 2;
347                true
348            }
349            _ => false,
350        }
351    }
352
353    /// Scan forward from the current position to determine the description
354    /// and end position of the "token" at the cursor. This provides compatible
355    /// error spans with the old tokenizer.
356    fn scan_token_desc_and_end(&self) -> (&'static &'static str, usize) {
357        let Some(b) = self.peek_byte() else {
358            return (&"eof", self.bytes.len());
359        };
360        match b {
361            b'\n' => (&"a newline", self.cursor + 1),
362            b'\r' => (&"a carriage return", self.cursor + 1),
363            b' ' | b'\t' => {
364                let mut end = self.cursor + 1;
365                while end < self.bytes.len()
366                    && (self.bytes[end] == b' ' || self.bytes[end] == b'\t')
367                {
368                    end += 1;
369                }
370                (&"whitespace", end)
371            }
372            b'#' => (&"a comment", self.cursor + 1),
373            b'=' => (&"an equals", self.cursor + 1),
374            b'.' => (&"a period", self.cursor + 1),
375            b',' => (&"a comma", self.cursor + 1),
376            b':' => (&"a colon", self.cursor + 1),
377            b'+' => (&"a plus", self.cursor + 1),
378            b'{' => (&"a left brace", self.cursor + 1),
379            b'}' => (&"a right brace", self.cursor + 1),
380            b'[' => (&"a left bracket", self.cursor + 1),
381            b']' => (&"a right bracket", self.cursor + 1),
382            b'\'' | b'"' => (&"a string", self.cursor + 1),
383            _ if is_keylike_byte(b) => {
384                let mut end = self.cursor + 1;
385                while end < self.bytes.len() && is_keylike_byte(self.bytes[end]) {
386                    end += 1;
387                }
388                (&"an identifier", end)
389            }
390            _ => (&"a character", self.cursor + 1),
391        }
392    }
393
394    fn read_keylike(&mut self) -> &'de str {
395        let start = self.cursor;
396        while let Some(b) = self.peek_byte() {
397            if !is_keylike_byte(b) {
398                break;
399            }
400            self.cursor += 1;
401        }
402        // SAFETY: keylike bytes are ASCII, always valid UTF-8 boundaries
403        unsafe { self.str_slice(start, self.cursor) }
404    }
405
406    fn read_table_key(&mut self) -> Result<Key<'de>, Failed> {
407        let Some(b) = self.peek_byte() else {
408            return Err(self.set_error(
409                self.bytes.len(),
410                None,
411                ErrorKind::Wanted {
412                    expected: &"a table key",
413                    found: &"eof",
414                },
415            ));
416        };
417        match b {
418            b'"' => {
419                let start = self.cursor;
420                self.cursor += 1;
421                let (key, multiline) = match self.read_string(start, b'"') {
422                    Ok(v) => v,
423                    Err(e) => return Err(e),
424                };
425                if multiline {
426                    return Err(self.set_error(
427                        start,
428                        Some(key.span.end as usize),
429                        ErrorKind::MultilineStringKey,
430                    ));
431                }
432                Ok(key)
433            }
434            b'\'' => {
435                let start = self.cursor;
436                self.cursor += 1;
437                let (key, multiline) = match self.read_string(start, b'\'') {
438                    Ok(v) => v,
439                    Err(e) => return Err(e),
440                };
441                if multiline {
442                    return Err(self.set_error(
443                        start,
444                        Some(key.span.end as usize),
445                        ErrorKind::MultilineStringKey,
446                    ));
447                }
448                Ok(key)
449            }
450            b if is_keylike_byte(b) => {
451                let start = self.cursor;
452                let name = self.read_keylike();
453                let span = Span::new(start as u32, self.cursor as u32);
454                Ok(Key { name, span })
455            }
456            _ => {
457                let start = self.cursor;
458                let (found_desc, end) = self.scan_token_desc_and_end();
459                Err(self.set_error(
460                    start,
461                    Some(end),
462                    ErrorKind::Wanted {
463                        expected: &"a table key",
464                        found: found_desc,
465                    },
466                ))
467            }
468        }
469    }
470
471    /// Read a basic (double-quoted) string. `start` is the byte offset of the
472    /// opening quote. The cursor should be positioned right after the opening `"`.
473    fn read_string(&mut self, start: usize, delim: u8) -> Result<(Key<'de>, bool), Failed> {
474        let mut multiline = false;
475        if self.eat_byte(delim) {
476            if self.eat_byte(delim) {
477                multiline = true;
478            } else {
479                return Ok((
480                    Key {
481                        name: "",
482                        span: Span::new(start as u32, self.cursor as u32),
483                    },
484                    false,
485                ));
486            }
487        }
488
489        let mut content_start = self.cursor;
490        if multiline {
491            match self.peek_byte() {
492                Some(b'\n') => {
493                    self.cursor += 1;
494                    content_start = self.cursor;
495                }
496                Some(b'\r') if self.peek_byte_at(1) == Some(b'\n') => {
497                    self.cursor += 2;
498                    content_start = self.cursor;
499                }
500                _ => {}
501            }
502        }
503
504        self.read_string_loop(start, content_start, multiline, delim)
505    }
506
507    /// Advance `self.cursor` past bytes that do not require special handling
508    /// inside a string.  Uses SWAR (SIMD-Within-A-Register) to scan 8 bytes
509    /// at a time.
510    ///
511    /// Stops at the first byte that is:
512    ///   * a control character (< 0x20) — tab (0x09) is a benign false positive
513    ///   * DEL (0x7F)
514    ///   * the string delimiter (`"` or `'`)
515    ///   * a backslash (`\`) — benign false positive for literal strings
516    ///   * past the end of input
517    fn skip_string_plain(&mut self, delim: u8) {
518        // Quick bail-out for EOF or an immediately-interesting byte.
519        // Avoids SWAR setup cost for consecutive specials (e.g. \n\n).
520        let Some(&b) = self.bytes.get(self.cursor) else {
521            return;
522        };
523
524        if b == delim || b == b'\\' || b == 0x7F || (b < 0x20 && b != 0x09) {
525            return;
526        }
527        self.cursor += 1;
528
529        let base = self.cursor;
530        let rest = &self.bytes[base..];
531
532        type Chunk = u64;
533        const STEP: usize = std::mem::size_of::<Chunk>();
534        const ONE: Chunk = Chunk::MAX / 255; // 0x0101_0101_0101_0101
535        const HIGH: Chunk = ONE << 7; // 0x8080_8080_8080_8080
536
537        let fill_delim = ONE * Chunk::from(delim);
538        let fill_bslash = ONE * Chunk::from(b'\\');
539        let fill_del = ONE * 0x7F;
540
541        let chunks = rest.chunks_exact(STEP);
542        let remainder_len = chunks.remainder().len();
543
544        for (i, chunk) in chunks.enumerate() {
545            let v = Chunk::from_le_bytes(chunk.try_into().unwrap());
546
547            let has_ctrl = v.wrapping_sub(ONE * 0x20) & !v;
548            let eq_delim = (v ^ fill_delim).wrapping_sub(ONE) & !(v ^ fill_delim);
549            let eq_bslash = (v ^ fill_bslash).wrapping_sub(ONE) & !(v ^ fill_bslash);
550            let eq_del = (v ^ fill_del).wrapping_sub(ONE) & !(v ^ fill_del);
551
552            let masked = (has_ctrl | eq_delim | eq_bslash | eq_del) & HIGH;
553            if masked != 0 {
554                self.cursor = base + i * STEP + masked.trailing_zeros() as usize / 8;
555                return;
556            }
557        }
558
559        self.cursor = self.bytes.len() - remainder_len;
560        self.skip_string_plain_slow(delim);
561    }
562
563    #[cold]
564    #[inline(never)]
565    fn skip_string_plain_slow(&mut self, delim: u8) {
566        while let Some(&b) = self.bytes.get(self.cursor) {
567            if b == delim || b == b'\\' || b == 0x7F || (b < 0x20 && b != 0x09) {
568                return;
569            }
570            self.cursor += 1;
571        }
572    }
573
574    fn read_string_loop(
575        &mut self,
576        start: usize,
577        content_start: usize,
578        multiline: bool,
579        delim: u8,
580    ) -> Result<(Key<'de>, bool), Failed> {
581        let mut flush_from = content_start;
582        let mut scratch: Option<crate::arena::Scratch<'de>> = None;
583        loop {
584            self.skip_string_plain(delim);
585
586            let i = self.cursor;
587            let Some(&b) = self.bytes.get(i) else {
588                return Err(self.set_error(
589                    i,
590                    Some(i),
591                    ErrorKind::UnterminatedString(delim as char),
592                ));
593            };
594            self.cursor = i + 1;
595
596            match b {
597                b'\r' => {
598                    if self.eat_byte(b'\n') {
599                        if !multiline {
600                            return Err(self.set_error(
601                                i,
602                                Some(i),
603                                ErrorKind::UnterminatedString(delim as char),
604                            ));
605                        }
606                    } else {
607                        return Err(self.set_error(i, None, ErrorKind::InvalidCharInString('\r')));
608                    }
609                }
610                b'\n' => {
611                    if !multiline {
612                        return Err(self.set_error(
613                            i,
614                            Some(i),
615                            ErrorKind::UnterminatedString(delim as char),
616                        ));
617                    }
618                }
619                d if d == delim => {
620                    let (span, end) = if multiline {
621                        if !self.eat_byte(delim) {
622                            continue;
623                        }
624                        if !self.eat_byte(delim) {
625                            continue;
626                        }
627                        let mut extra = 0usize;
628                        if self.eat_byte(delim) {
629                            extra += 1;
630                        }
631                        if self.eat_byte(delim) {
632                            extra += 1;
633                        }
634
635                        (Span::new(start as u32, self.cursor as u32), i + extra)
636                    } else {
637                        (Span::new(start as u32, self.cursor as u32), i)
638                    };
639
640                    let name = if let Some(mut s) = scratch {
641                        s.extend(&self.bytes[flush_from..end]);
642                        let committed = s.commit();
643                        // Safety: scratch contents are valid UTF-8 (built from
644                        // validated input and well-formed escape sequences).
645                        unsafe { std::str::from_utf8_unchecked(committed) }
646                    } else {
647                        // Safety: content_start..end is validated UTF-8.
648                        unsafe { self.str_slice(content_start, end) }
649                    };
650                    return Ok((Key { name, span }, multiline));
651                }
652                b'\\' if delim == b'"' => {
653                    let arena = self.arena;
654                    // SAFETY: the closure only runs when scratch is None, so no
655                    // other Scratch or arena.alloc() call is active.
656                    let s = scratch.get_or_insert_with(|| unsafe { arena.scratch() });
657                    s.extend(&self.bytes[flush_from..i]);
658                    if let Err(e) = self.read_basic_escape(s, start, multiline) {
659                        return Err(e);
660                    }
661                    flush_from = self.cursor;
662                }
663                // Tab or backslash-in-literal-string: benign false positives
664                // from the SWAR scan.
665                0x09 | 0x20..=0x7E | 0x80.. => {}
666                _ => {
667                    return Err(self.set_error(i, None, ErrorKind::InvalidCharInString(b as char)));
668                }
669            }
670        }
671    }
672
673    fn read_basic_escape(
674        &mut self,
675        scratch: &mut crate::arena::Scratch<'_>,
676        string_start: usize,
677        multi: bool,
678    ) -> Result<(), Failed> {
679        let i = self.cursor;
680        let Some(&b) = self.bytes.get(i) else {
681            return Err(self.set_error(i, Some(i), ErrorKind::UnterminatedString('"')));
682        };
683        self.cursor = i + 1;
684        let chr: char = 'char: {
685            let byte: u8 = 'byte: {
686                match b {
687                    b'"' => break 'byte b'"',
688                    b'\\' => break 'byte b'\\',
689                    b'b' => break 'byte 0x08,
690                    b'f' => break 'byte 0x0C,
691                    b'n' => break 'byte b'\n',
692                    b'r' => break 'byte b'\r',
693                    b't' => break 'byte b'\t',
694                    b'e' => break 'byte 0x1B,
695                    b'u' => match self.read_hex(4, string_start, i) {
696                        Ok(ch) => break 'char ch,
697                        Err(e) => return Err(e),
698                    },
699                    b'U' => match self.read_hex(8, string_start, i) {
700                        Ok(ch) => break 'char ch,
701                        Err(e) => return Err(e),
702                    },
703                    b'x' => match self.read_hex(2, string_start, i) {
704                        Ok(ch) => break 'char ch,
705                        Err(e) => return Err(e),
706                    },
707                    b' ' | b'\t' | b'\n' | b'\r' if multi => {
708                        // CRLF folding: \r\n counts as \n
709                        let c = if b == b'\r' && self.peek_byte() == Some(b'\n') {
710                            self.cursor += 1;
711                            '\n'
712                        } else if b == b'\r' {
713                            return Err(self.set_error(
714                                i,
715                                None,
716                                ErrorKind::InvalidCharInString('\r'),
717                            ));
718                        } else {
719                            b as char
720                        };
721                        if c != '\n' {
722                            loop {
723                                match self.peek_byte() {
724                                    Some(b' ' | b'\t') => {
725                                        self.cursor += 1;
726                                    }
727                                    Some(b'\n') => {
728                                        self.cursor += 1;
729                                        break;
730                                    }
731                                    Some(b'\r') if self.peek_byte_at(1) == Some(b'\n') => {
732                                        self.cursor += 2;
733                                        break;
734                                    }
735                                    _ => {
736                                        return Err(self.set_error(
737                                            i,
738                                            None,
739                                            ErrorKind::InvalidEscape(c),
740                                        ));
741                                    }
742                                }
743                            }
744                        }
745                        loop {
746                            match self.peek_byte() {
747                                Some(b' ' | b'\t' | b'\n') => {
748                                    self.cursor += 1;
749                                }
750                                Some(b'\r') if self.peek_byte_at(1) == Some(b'\n') => {
751                                    self.cursor += 2;
752                                }
753                                _ => break,
754                            }
755                        }
756                    }
757                    _ => {
758                        self.cursor -= 1;
759                        return Err(self.set_error(
760                            self.cursor,
761                            None,
762                            ErrorKind::InvalidEscape(self.next_char_for_error()),
763                        ));
764                    }
765                }
766                return Ok(());
767            };
768
769            scratch.push(byte);
770            return Ok(());
771        };
772        let mut buf = [0u8; 4];
773        let len = chr.encode_utf8(&mut buf).len();
774        scratch.extend(&buf[..len]);
775        Ok(())
776    }
777
778    fn read_hex(
779        &mut self,
780        n: usize,
781        _string_start: usize,
782        escape_start: usize,
783    ) -> Result<char, Failed> {
784        let mut val: u32 = 0;
785        for _ in 0..n {
786            let Some(&byte) = self.bytes.get(self.cursor) else {
787                return Err(self.set_error(
788                    self.cursor,
789                    Some(self.cursor),
790                    ErrorKind::UnterminatedString('"'),
791                ));
792            };
793            let digit = HEX[byte as usize];
794            if digit >= 0 {
795                val = (val << 4) | digit as u32;
796                self.cursor += 1;
797            } else {
798                return Err(self.set_error(
799                    self.cursor,
800                    None,
801                    ErrorKind::InvalidHexEscape(self.next_char_for_error()),
802                ));
803            }
804        }
805        match char::from_u32(val) {
806            Some(ch) => Ok(ch),
807            None => Err(self.set_error(
808                escape_start - 1,
809                Some(self.cursor),
810                ErrorKind::InvalidEscapeValue(val),
811            )),
812        }
813    }
814
815    fn next_char_for_error(&self) -> char {
816        // Safety: The input was valid UTF-8 via a &str
817        let text = unsafe { std::str::from_utf8_unchecked(self.bytes) };
818        if let Some(value) = text.get(self.cursor..) {
819            value.chars().next().unwrap_or(char::REPLACEMENT_CHARACTER)
820        } else {
821            char::REPLACEMENT_CHARACTER
822        }
823    }
824    fn number(&mut self, start: u32, end: u32, s: &'de str, sign: u8) -> Result<Item<'de>, Failed> {
825        let bytes = s.as_bytes();
826
827        // Base-prefixed integers (0x, 0o, 0b).
828        // TOML forbids signs on these, so only match when first byte is '0'.
829        if sign == 2
830            && let [b'0', format, rest @ ..] = s.as_bytes()
831        {
832            match format {
833                b'x' => return self.integer_prefixed(rest, Span::new(start, end), 4),
834                b'o' => return self.integer_prefixed(rest, Span::new(start, end), 3),
835                b'b' => return self.integer_prefixed(rest, Span::new(start, end), 1),
836                _ => {}
837            }
838        }
839
840        if self.eat_byte(b'.') {
841            let at = self.cursor;
842            return match self.peek_byte() {
843                Some(b) if is_keylike_byte(b) => {
844                    let after = self.read_keylike();
845                    match self.float(start, end, s, Some(after), sign) {
846                        Ok(f) => Ok(Item::float_spanned(f, Span::new(start, self.cursor as u32))),
847                        Err(e) => Err(e),
848                    }
849                }
850                _ => Err(self.set_error(
851                    start as usize,
852                    Some(at),
853                    ErrorKind::InvalidFloat("nothing after decimal point"),
854                )),
855            };
856        }
857
858        if sign == 2 {
859            let head = &self.bytes[start as usize..];
860            match DateTime::munch(head) {
861                Ok((consumed, moment)) => {
862                    self.cursor = start as usize + consumed;
863                    return Ok(Item::moment(moment, Span::new(start, self.cursor as u32)));
864                }
865                Err(reason) if !reason.is_empty() => {
866                    let rest = &self.bytes[start as usize..];
867                    let mut consumed = 0;
868                    while consumed < rest.len()
869                        && !matches!(
870                            rest[consumed],
871                            b' ' | b'\t' | b'\n' | b'\r' | b'#' | b',' | b']' | b'}'
872                        )
873                    {
874                        consumed += 1;
875                    }
876                    self.cursor = start as usize + consumed;
877                    return Err(self.set_error(
878                        start as usize,
879                        Some(self.cursor),
880                        ErrorKind::InvalidDateTime(reason),
881                    ));
882                }
883                Err(_) => {}
884            }
885        }
886
887        if sign != 2
888            && let [b'0', b'x' | b'o' | b'b', ..] = bytes
889        {
890            return Err(self.set_error(
891                start as usize,
892                Some(end as usize),
893                ErrorKind::InvalidInteger("signs are not allowed on prefixed integers"),
894            ));
895        }
896
897        if let Ok(v) = self.integer_decimal(bytes, Span::new(start, end), sign) {
898            return Ok(v);
899        }
900
901        if bytes.iter().any(|&b| b == b'e' || b == b'E') {
902            return match self.float(start, end, s, None, sign) {
903                Ok(f) => Ok(Item::float_spanned(f, Span::new(start, self.cursor as u32))),
904                Err(e) => Err(e),
905            };
906        }
907
908        Err(Failed)
909    }
910
911    fn integer_decimal(
912        &mut self,
913        bytes: &'de [u8],
914        span: Span,
915        sign: u8,
916    ) -> Result<Item<'de>, Failed> {
917        let mut acc: u128 = 0;
918        let mut prev_underscore = false;
919        let mut has_digit = false;
920        let mut leading_zero = false;
921        let negative = sign == 0;
922        let sign_len = if sign != 2 { 1u32 } else { 0u32 };
923        let mut error_span = span;
924        let reason = 'error: {
925            let mut i = 0;
926            while i < bytes.len() {
927                let b = bytes[i];
928                if b == b'_' {
929                    if !has_digit || prev_underscore {
930                        let pos = span.start + sign_len + i as u32;
931                        error_span = Span::new(pos, pos + 1);
932                        break 'error "underscores must be between two digits";
933                    }
934                    prev_underscore = true;
935                    i += 1;
936                    continue;
937                }
938                if !b.is_ascii_digit() {
939                    let pos = span.start + sign_len + i as u32;
940                    error_span = Span::new(pos, pos + 1);
941                    break 'error "contains non-digit character";
942                }
943                if leading_zero {
944                    break 'error "leading zeros are not allowed";
945                }
946                if !has_digit && b == b'0' {
947                    leading_zero = true;
948                }
949                has_digit = true;
950                prev_underscore = false;
951                let digit = (b - b'0') as u128;
952                acc = match acc.checked_mul(10).and_then(|a| a.checked_add(digit)) {
953                    Some(v) => v,
954                    None => break 'error "integer overflow",
955                };
956                i += 1;
957            }
958
959            if !has_digit {
960                break 'error "expected at least one digit";
961            }
962            if prev_underscore {
963                let pos = span.start + sign_len + bytes.len() as u32 - 1;
964                error_span = Span::new(pos, pos + 1);
965                break 'error "underscores must be between two digits";
966            }
967
968            let max = if negative {
969                (i128::MAX as u128) + 1
970            } else {
971                i128::MAX as u128
972            };
973            if acc > max {
974                break 'error "integer overflow";
975            }
976
977            let val = if negative {
978                (acc as i128).wrapping_neg()
979            } else {
980                acc as i128
981            };
982            return Ok(Item::integer_spanned(val, span));
983        };
984        self.error_span = error_span;
985        self.error_kind = Some(ErrorKind::InvalidInteger(reason));
986        Err(Failed)
987    }
988
989    #[inline(never)]
990    fn integer_prefixed(
991        &mut self,
992        bytes: &'de [u8],
993        span: Span,
994        shift: u32,
995    ) -> Result<Item<'de>, Failed> {
996        let max_digit = (1i8 << shift) - 1;
997        let invalid_msg = match shift {
998            4 => "invalid digit for hexadecimal",
999            3 => "invalid digit for octal",
1000            _ => "invalid digit for binary",
1001        };
1002        let mut acc: u128 = 0;
1003        let mut prev_underscore = false;
1004        let mut has_digit = false;
1005        let mut error_span = span;
1006        let reason = 'error: {
1007            if bytes.is_empty() {
1008                break 'error "no digits after prefix";
1009            }
1010
1011            let mut i = 0;
1012            while i < bytes.len() {
1013                let b = bytes[i];
1014                if b == b'_' {
1015                    if !has_digit || prev_underscore {
1016                        let pos = span.start + 2 + i as u32;
1017                        error_span = Span::new(pos, pos + 1);
1018                        break 'error "underscores must be between two digits";
1019                    }
1020                    prev_underscore = true;
1021                    i += 1;
1022                    continue;
1023                }
1024                let digit = HEX[b as usize];
1025                if digit < 0 || digit > max_digit {
1026                    let pos = span.start + 2 + i as u32;
1027                    error_span = Span::new(pos, pos + 1);
1028                    break 'error invalid_msg;
1029                }
1030                has_digit = true;
1031                prev_underscore = false;
1032                if acc >> (128 - shift) != 0 {
1033                    break 'error "integer overflow";
1034                }
1035                acc = (acc << shift) | digit as u128;
1036                i += 1;
1037            }
1038
1039            if !has_digit {
1040                break 'error "no digits after prefix";
1041            }
1042            if prev_underscore {
1043                let pos = span.start + 2 + bytes.len() as u32 - 1;
1044                error_span = Span::new(pos, pos + 1);
1045                break 'error "underscores must be between two digits";
1046            }
1047
1048            if acc > i128::MAX as u128 {
1049                break 'error "integer overflow";
1050            }
1051            return Ok(Item::integer_spanned(acc as i128, span));
1052        };
1053        self.error_span = error_span;
1054        self.error_kind = Some(ErrorKind::InvalidInteger(reason));
1055        Err(Failed)
1056    }
1057
1058    fn float(
1059        &mut self,
1060        start: u32,
1061        end: u32,
1062        s: &'de str,
1063        after_decimal: Option<&'de str>,
1064        sign: u8,
1065    ) -> Result<f64, Failed> {
1066        let s_start = start as usize;
1067        let s_end = end as usize;
1068
1069        // TOML forbids leading zeros in the integer part (e.g. 00.5, -01.0).
1070        if let [b'0', b'0'..=b'9' | b'_', ..] = s.as_bytes() {
1071            return Err(self.set_error(
1072                s_start,
1073                Some(s_end),
1074                ErrorKind::InvalidFloat("leading zeros are not allowed"),
1075            ));
1076        }
1077
1078        // Safety: no other Scratch or arena.alloc() is active during float parsing.
1079        let mut scratch = unsafe { self.arena.scratch() };
1080
1081        if sign == 0 {
1082            scratch.push(b'-');
1083        }
1084        if !scratch.push_strip_underscores(s.as_bytes()) {
1085            return Err(self.set_error(
1086                s_start,
1087                Some(s_end),
1088                ErrorKind::InvalidFloat("underscores must be between two digits"),
1089            ));
1090        }
1091
1092        let mut last = s;
1093
1094        if let Some(after) = after_decimal {
1095            if !matches!(after.as_bytes().first(), Some(b'0'..=b'9')) {
1096                return Err(self.set_error(
1097                    s_start,
1098                    Some(self.cursor),
1099                    ErrorKind::InvalidFloat("expected digit after decimal point"),
1100                ));
1101            }
1102            scratch.push(b'.');
1103            if !scratch.push_strip_underscores(after.as_bytes()) {
1104                return Err(self.set_error(
1105                    s_start,
1106                    Some(self.cursor),
1107                    ErrorKind::InvalidFloat("underscores must be between two digits"),
1108                ));
1109            }
1110            last = after;
1111        }
1112
1113        // When the last keylike token ends with e/E, the '+' and exponent
1114        // digits are separate tokens in the stream ('-' IS keylike so
1115        // e.g. "1e-5" stays in one token and needs no special handling).
1116        if matches!(last.as_bytes().last(), Some(b'e' | b'E')) {
1117            self.eat_byte(b'+');
1118            match self.peek_byte() {
1119                Some(b) if is_keylike_byte(b) && b != b'-' => {
1120                    let next = self.read_keylike();
1121                    if !scratch.push_strip_underscores(next.as_bytes()) {
1122                        return Err(self.set_error(
1123                            s_start,
1124                            Some(self.cursor),
1125                            ErrorKind::InvalidFloat("exponent requires at least one digit"),
1126                        ));
1127                    }
1128                }
1129                _ => {
1130                    return Err(self.set_error(
1131                        s_start,
1132                        Some(self.cursor),
1133                        ErrorKind::InvalidFloat("exponent requires at least one digit"),
1134                    ));
1135                }
1136            }
1137        }
1138
1139        // Scratch is not committed — arena pointer stays unchanged, space is
1140        // reused by subsequent allocations.
1141        // SAFETY: scratch contains only ASCII digits, signs, dots, and 'e'/'E'
1142        // copied from validated input via push_strip_underscores.
1143        let n: f64 = match unsafe { std::str::from_utf8_unchecked(scratch.as_bytes()) }.parse() {
1144            Ok(n) => n,
1145            // std's float parse error is always just "invalid float literal"
1146            Err(_) => {
1147                return Err(self.set_error(
1148                    s_start,
1149                    Some(self.cursor),
1150                    ErrorKind::InvalidFloat(""),
1151                ));
1152            }
1153        };
1154        if n.is_finite() {
1155            Ok(n)
1156        } else {
1157            Err(self.set_error(
1158                s_start,
1159                Some(self.cursor),
1160                ErrorKind::InvalidFloat("float overflow"),
1161            ))
1162        }
1163    }
1164
1165    fn value(&mut self, depth_remaining: i16) -> Result<Item<'de>, Failed> {
1166        let at = self.cursor;
1167        let Some(byte) = self.peek_byte() else {
1168            return Err(self.set_error(self.bytes.len(), None, ErrorKind::UnexpectedEof));
1169        };
1170        let sign = match byte {
1171            b'"' | b'\'' => {
1172                self.cursor += 1;
1173                return match self.read_string(self.cursor - 1, byte) {
1174                    Ok((key, _)) => Ok(Item::string_spanned(key.name, key.span)),
1175                    Err(e) => Err(e),
1176                };
1177            }
1178            b'{' => {
1179                let start = self.cursor as u32;
1180                self.cursor += 1;
1181                let mut table = crate::item::table::InnerTable::new();
1182                if let Err(err) = self.inline_table_contents(&mut table, depth_remaining - 1) {
1183                    return Err(err);
1184                }
1185                return Ok(Item::table_frozen(
1186                    table,
1187                    Span::new(start, self.cursor as u32),
1188                ));
1189            }
1190            b'[' => {
1191                let start = self.cursor as u32;
1192                self.cursor += 1;
1193                let mut arr = crate::item::array::InternalArray::new();
1194                if let Err(err) = self.array_contents(&mut arr, depth_remaining - 1) {
1195                    return Err(err);
1196                };
1197                return Ok(Item::array(arr, Span::new(start, self.cursor as u32)));
1198            }
1199            b'-' => {
1200                self.cursor += 1;
1201                0
1202            }
1203            b'+' => {
1204                self.cursor += 1;
1205                1
1206            }
1207            _ => 2,
1208        };
1209
1210        let key = self.read_keylike();
1211
1212        let end = self.cursor as u32;
1213        match key {
1214            "inf" => {
1215                return Ok(Item::float_spanned(
1216                    if sign != 0 {
1217                        f64::INFINITY
1218                    } else {
1219                        f64::NEG_INFINITY
1220                    },
1221                    Span::new(at as u32, end),
1222                ));
1223            }
1224            "nan" => {
1225                return Ok(Item::float_spanned(
1226                    if sign != 0 {
1227                        f64::NAN.copysign(1.0)
1228                    } else {
1229                        f64::NAN.copysign(-1.0)
1230                    },
1231                    Span::new(at as u32, end),
1232                ));
1233            }
1234            "true" | "false" if sign == 2 => {
1235                return Ok(Item::boolean(key == "true", Span::new(at as u32, end)));
1236            }
1237            _ => (),
1238        }
1239
1240        if let [b'0'..=b'9', ..] = key.as_bytes() {
1241            self.number(at as u32, end, key, sign)
1242        } else if byte == b'\r' {
1243            Err(self.set_error(at, None, ErrorKind::Unexpected('\r')))
1244        } else if sign != 2 {
1245            Err(self.set_error(
1246                at,
1247                Some(self.cursor),
1248                ErrorKind::InvalidInteger("expected digit after sign"),
1249            ))
1250        } else if key.is_empty() {
1251            Err(self.set_error(at, None, ErrorKind::Unexpected(self.next_char_for_error())))
1252        } else {
1253            Err(self.set_error(at, Some(self.cursor), ErrorKind::UnquotedString))
1254        }
1255    }
1256
1257    fn inline_table_contents(
1258        &mut self,
1259        out: &mut crate::item::table::InnerTable<'de>,
1260        depth_remaining: i16,
1261    ) -> Result<(), Failed> {
1262        if depth_remaining < 0 {
1263            return Err(self.set_error(
1264                self.cursor,
1265                None,
1266                ErrorKind::OutOfRange {
1267                    ty: &"Max recursion depth exceeded",
1268                    range: &"",
1269                },
1270            ));
1271        }
1272        if let Err(e) = self.eat_inline_table_whitespace() {
1273            return Err(e);
1274        }
1275        if self.eat_byte(b'}') {
1276            return Ok(());
1277        }
1278        loop {
1279            let saved_path_len = self.path_len;
1280            let mut table_ref: &mut crate::item::table::InnerTable<'de> = &mut *out;
1281            let mut key = match self.read_table_key() {
1282                Ok(k) => k,
1283                Err(e) => return Err(e),
1284            };
1285            self.eat_whitespace();
1286            while self.eat_byte(b'.') {
1287                self.eat_whitespace();
1288                self.push_path(PathComponent::Key(key));
1289                table_ref = match self.navigate_dotted_key(table_ref, key) {
1290                    Ok(t) => t,
1291                    Err(e) => return Err(e),
1292                };
1293                key = match self.read_table_key() {
1294                    Ok(k) => k,
1295                    Err(e) => return Err(e),
1296                };
1297                self.eat_whitespace();
1298            }
1299            if let Err(e) = self.eat_inline_table_whitespace() {
1300                return Err(e);
1301            }
1302            if let Err(e) = self.expect_byte(b'=') {
1303                return Err(e);
1304            }
1305            if let Err(e) = self.eat_inline_table_whitespace() {
1306                return Err(e);
1307            }
1308
1309            self.push_path(PathComponent::Key(key));
1310            {
1311                let val = match self.value(depth_remaining) {
1312                    Ok(v) => v,
1313                    Err(e) => return Err(e),
1314                };
1315                if let Err(e) = self.insert_value(table_ref, key, val) {
1316                    return Err(e);
1317                }
1318            }
1319            self.path_len = saved_path_len;
1320
1321            if let Err(e) = self.eat_inline_table_whitespace() {
1322                return Err(e);
1323            }
1324            if self.eat_byte(b'}') {
1325                return Ok(());
1326            }
1327            if !self.eat_byte(b',') {
1328                let start = self.cursor;
1329                if self.peek_byte().is_none() {
1330                    return Err(self.set_error(start, None, ErrorKind::UnclosedInlineTable));
1331                }
1332                let (_found_desc, end) = self.scan_token_desc_and_end();
1333                return Err(self.set_error(start, Some(end), ErrorKind::MissingInlineTableComma));
1334            }
1335            if let Err(e) = self.eat_inline_table_whitespace() {
1336                return Err(e);
1337            }
1338            if self.eat_byte(b'}') {
1339                return Ok(());
1340            }
1341        }
1342    }
1343
1344    fn array_contents(
1345        &mut self,
1346        out: &mut crate::item::array::InternalArray<'de>,
1347        depth_remaining: i16,
1348    ) -> Result<(), Failed> {
1349        if depth_remaining < 0 {
1350            return Err(self.set_error(
1351                self.cursor,
1352                None,
1353                ErrorKind::OutOfRange {
1354                    ty: &"Max recursion depth exceeded",
1355                    range: &"",
1356                },
1357            ));
1358        }
1359        let saved_path_len = self.path_len;
1360        loop {
1361            if let Err(e) = self.eat_intermediate() {
1362                return Err(e);
1363            }
1364            if self.eat_byte(b']') {
1365                self.path_len = saved_path_len;
1366                return Ok(());
1367            }
1368            self.push_path(PathComponent::Index(out.len()));
1369            match self.value(depth_remaining) {
1370                Ok(value) => out.push(value, self.arena),
1371                Err(e) => return Err(e),
1372            };
1373            self.path_len = saved_path_len;
1374            if let Err(e) = self.eat_intermediate() {
1375                return Err(e);
1376            }
1377            if !self.eat_byte(b',') {
1378                break;
1379            }
1380        }
1381        if let Err(e) = self.eat_intermediate() {
1382            return Err(e);
1383        }
1384        if self.eat_byte(b']') {
1385            return Ok(());
1386        }
1387        let start = self.cursor;
1388        if self.peek_byte().is_none() {
1389            return Err(self.set_error(start, None, ErrorKind::UnclosedArray));
1390        }
1391        let (_found_desc, end) = self.scan_token_desc_and_end();
1392        Err(self.set_error(start, Some(end), ErrorKind::MissingArrayComma))
1393    }
1394
1395    #[inline(always)]
1396    fn eat_inline_table_whitespace(&mut self) -> Result<(), Failed> {
1397        loop {
1398            match self.peek_byte() {
1399                Some(b' ' | b'\t' | b'\n') => self.cursor += 1,
1400                Some(b'\r') if self.peek_byte_at(1) == Some(b'\n') => self.cursor += 2,
1401                Some(b'#') => match self.eat_comment() {
1402                    Ok(_) => {}
1403                    Err(e) => return Err(e),
1404                },
1405                _ => return Ok(()),
1406            }
1407        }
1408    }
1409
1410    #[inline(always)]
1411    fn eat_intermediate(&mut self) -> Result<(), Failed> {
1412        loop {
1413            match self.peek_byte() {
1414                Some(b' ' | b'\t' | b'\n') => self.cursor += 1,
1415                Some(b'\r') if self.peek_byte_at(1) == Some(b'\n') => self.cursor += 2,
1416                Some(b'#') => match self.eat_comment() {
1417                    Ok(_) => {}
1418                    Err(e) => return Err(e),
1419                },
1420                _ => return Ok(()),
1421            }
1422        }
1423    }
1424
1425    /// Navigate into an existing or new table for a dotted-key intermediate
1426    /// segment. Checks frozen and header bits.
1427    /// New tables are created with the `DOTTED` tag.
1428    fn navigate_dotted_key<'t>(
1429        &mut self,
1430        table: &'t mut InnerTable<'de>,
1431        key: Key<'de>,
1432    ) -> Result<&'t mut InnerTable<'de>, Failed> {
1433        if let Some(idx) = self.indexed_find(table, key.name) {
1434            let (existing_key, value) = &mut table.entries_mut()[idx];
1435
1436            if !value.is_table() {
1437                return Err(self.set_error(
1438                    key.span.start as usize,
1439                    Some(key.span.end as usize),
1440                    ErrorKind::DottedKeyInvalidType {
1441                        first: existing_key.span,
1442                    },
1443                ));
1444            }
1445            if value.is_frozen() || value.has_header_bit() {
1446                return Err(self.set_duplicate_key_error(existing_key.span, key.span));
1447            }
1448            // Promote IMPLICIT -> DOTTED: an implicit table created by a section
1449            // header intermediate (e.g. `b` in `[a.b.c]`) is now being touched
1450            // by a dotted key in the body (e.g. `b.x = 1` inside `[a]`).
1451            if value.is_implicit_table() {
1452                // SAFETY: is_table() verified by the guard above.
1453                let t = unsafe { value.as_table_mut_unchecked() };
1454                t.set_dotted_flag();
1455                t.set_span_start(key.span.start);
1456                t.set_span_end(key.span.end);
1457            }
1458            // SAFETY: is_table() verified by the guard above.
1459            unsafe { Ok(value.as_inner_table_mut_unchecked()) }
1460        } else {
1461            let span = key.span;
1462            let inserted = self.insert_value_known_to_be_unique(
1463                table,
1464                key,
1465                Item::table_dotted(InnerTable::new(), span),
1466            );
1467            // SAFETY: Item::table_dotted() produces a table-tagged item.
1468            unsafe { Ok(inserted.as_inner_table_mut_unchecked()) }
1469        }
1470    }
1471
1472    /// Navigate an intermediate segment of a table header (e.g. `a` in `[a.b.c]`).
1473    /// Creates implicit tables (no flag bits) if not found.
1474    /// Handles arrays-of-tables by navigating into the last element.
1475    ///
1476    /// Returns a `Table` view of the table navigated into.
1477    fn navigate_header_intermediate<'b>(
1478        &mut self,
1479        st: &'b mut Table<'de>,
1480        key: Key<'de>,
1481    ) -> Result<&'b mut Table<'de>, Failed> {
1482        let table = &mut st.value;
1483
1484        if let Some(idx) = self.indexed_find(table, key.name) {
1485            let (existing_key, existing) = &mut table.entries_mut()[idx];
1486            let existing_span = existing_key.span;
1487
1488            // Note: I would use safey accessor heres but that would cause issues
1489            // with NLL limitations.
1490            if existing.is_table() {
1491                if existing.is_frozen() {
1492                    return Err(self.set_duplicate_key_error(existing_span, key.span));
1493                }
1494                // SAFETY: is_table() verified by the preceding check.
1495                unsafe { Ok(existing.as_table_mut_unchecked()) }
1496            } else if existing.is_aot() {
1497                // unwrap is safe since we just check it's an array of tables and thus a array.
1498                let arr = existing.as_array_mut().unwrap();
1499                self.push_path(PathComponent::Index(arr.len() - 1));
1500                // unwrap is safe as array's of tables always have atleast one value by construction
1501                let last = arr.last_mut().unwrap();
1502                if !last.is_table() {
1503                    return Err(self.set_duplicate_key_error(existing_span, key.span));
1504                }
1505                // SAFETY: last.is_table() verified by the preceding check.
1506                unsafe { Ok(last.as_table_mut_unchecked()) }
1507            } else {
1508                Err(self.set_duplicate_key_error(existing_span, key.span))
1509            }
1510        } else {
1511            let span = key.span;
1512            let inserted = self.insert_value_known_to_be_unique(
1513                table,
1514                key,
1515                Item::table(InnerTable::new(), span),
1516            );
1517            // SAFETY: Item::table() produces a table-tagged item.
1518            unsafe { Ok(inserted.as_table_mut_unchecked()) }
1519        }
1520    }
1521    fn insert_value_known_to_be_unique<'t>(
1522        &mut self,
1523        table: &'t mut InnerTable<'de>,
1524        key: Key<'de>,
1525        item: Item<'de>,
1526    ) -> &'t mut item::Item<'de> {
1527        let len = table.len();
1528        if len >= INDEXED_TABLE_THRESHOLD {
1529            // SAFETY: len >= INDEXED_TABLE_THRESHOLD (>= 6), so the table is non-empty.
1530            let table_id = unsafe { table.first_key_span_start_unchecked() };
1531            if len == INDEXED_TABLE_THRESHOLD {
1532                for (i, (key, _)) in table.entries().iter().enumerate() {
1533                    self.index.insert(KeyRef::new(key.as_str(), table_id), i);
1534                }
1535            }
1536            self.index.insert(KeyRef::new(key.as_str(), table_id), len);
1537        }
1538        &mut table.insert_unique(key, item, self.arena).1
1539    }
1540
1541    /// Handle the final segment of a standard table header `[a.b.c]`.
1542    ///
1543    /// Returns the [`Ctx`] for the table that subsequent key-value pairs
1544    /// should be inserted into.
1545    fn navigate_header_table_final<'b>(
1546        &mut self,
1547        st: &'b mut Table<'de>,
1548        key: Key<'de>,
1549        header_start: u32,
1550        header_end: u32,
1551    ) -> Result<Ctx<'b, 'de>, Failed> {
1552        let table = &mut st.value;
1553
1554        if let Some(idx) = self.indexed_find(table, key.name) {
1555            let (existing_key, existing) = &mut table.entries_mut()[idx];
1556            let first_key_span = existing_key.span;
1557
1558            if !existing.is_table() || existing.is_frozen() {
1559                return Err(self.set_duplicate_key_error(first_key_span, key.span));
1560            }
1561            if existing.has_header_bit() {
1562                return Err(self.set_error(
1563                    header_start as usize,
1564                    Some(header_end as usize),
1565                    ErrorKind::DuplicateTable {
1566                        name: key.span,
1567                        first: existing.span_unchecked(),
1568                    },
1569                ));
1570            }
1571            if existing.has_dotted_bit() {
1572                return Err(self.set_duplicate_key_error(first_key_span, key.span));
1573            }
1574            // SAFETY: is_table() verified by the preceding checks.
1575            let table = unsafe { existing.as_table_mut_unchecked() };
1576            table.set_header_flag();
1577            table.set_span_start(header_start);
1578            table.set_span_end(header_end);
1579            Ok(Ctx {
1580                table,
1581                array_end_span: None,
1582            })
1583        } else {
1584            let inserted = self.insert_value_known_to_be_unique(
1585                table,
1586                key,
1587                Item::table_header(InnerTable::new(), Span::new(header_start, header_end)),
1588            );
1589            Ok(Ctx {
1590                // SAFETY: Item::table_header() produces a table-tagged item.
1591                table: unsafe { inserted.as_table_mut_unchecked() },
1592                array_end_span: None,
1593            })
1594        }
1595    }
1596
1597    /// Handle the final segment of an array-of-tables header `[[a.b.c]]`.
1598    ///
1599    /// Returns the [`Ctx`] for the new table entry that subsequent key-value
1600    /// pairs should be inserted into.
1601    fn navigate_header_array_final<'b>(
1602        &mut self,
1603        st: &'b mut Table<'de>,
1604        key: Key<'de>,
1605        header_start: u32,
1606        header_end: u32,
1607    ) -> Result<Ctx<'b, 'de>, Failed> {
1608        let table = &mut st.value;
1609
1610        if let Some(idx) = self.indexed_find(table, key.name) {
1611            let (existing_key, existing) = &mut table.entries_mut()[idx];
1612            let first_key_span = existing_key.span;
1613
1614            if existing.is_aot() {
1615                // SAFETY: is_aot verified by the preceding check, which implies is_array().
1616                let (end_flag, arr) = unsafe { existing.split_array_end_flag() };
1617                let entry_span = Span::new(header_start, header_end);
1618                arr.push(
1619                    Item::table_header(InnerTable::new(), entry_span),
1620                    self.arena,
1621                );
1622                self.push_path(PathComponent::Index(arr.len() - 1));
1623                let entry = arr.last_mut().unwrap();
1624                Ok(Ctx {
1625                    // SAFETY: Item::table_header() produces a table-tagged item.
1626                    table: unsafe { entry.as_table_mut_unchecked() },
1627                    array_end_span: Some(end_flag),
1628                })
1629            } else if existing.is_table() {
1630                Err(self.set_error(
1631                    header_start as usize,
1632                    Some(header_end as usize),
1633                    ErrorKind::RedefineAsArray {
1634                        first: first_key_span,
1635                    },
1636                ))
1637            } else {
1638                Err(self.set_duplicate_key_error(first_key_span, key.span))
1639            }
1640        } else {
1641            let entry_span = Span::new(header_start, header_end);
1642            let first_entry = Item::table_header(InnerTable::new(), entry_span);
1643            let array_span = Span::new(header_start, header_end);
1644            let array_val = Item::array_aot(
1645                crate::item::array::InternalArray::with_single(first_entry, self.arena),
1646                array_span,
1647            );
1648            let inserted = self.insert_value_known_to_be_unique(table, key, array_val);
1649            self.push_path(PathComponent::Index(0));
1650            // SAFETY: Item::array_aot() produces an array-tagged item.
1651            let (end_flag, arr) = unsafe { inserted.split_array_end_flag() };
1652            let entry = arr.last_mut().unwrap();
1653            Ok(Ctx {
1654                // SAFETY: Item::table_header() (used in with_single) produces a table-tagged item.
1655                table: unsafe { entry.as_table_mut_unchecked() },
1656                array_end_span: Some(end_flag),
1657            })
1658        }
1659    }
1660
1661    /// Insert a value into a table, checking for duplicates.
1662    fn insert_value(
1663        &mut self,
1664        table: &mut InnerTable<'de>,
1665        key: Key<'de>,
1666        item: Item<'de>,
1667    ) -> Result<(), Failed> {
1668        if table.len() < INDEXED_TABLE_THRESHOLD {
1669            for (existing_key, _) in table.entries() {
1670                if existing_key.as_str() == key.name {
1671                    return Err(self.set_duplicate_key_error(existing_key.span, key.span));
1672                }
1673            }
1674            table.insert_unique(key, item, self.arena);
1675            return Ok(());
1676        }
1677        // SAFETY: len >= INDEXED_TABLE_THRESHOLD (>= 6), so the table is non-empty.
1678        let table_id = unsafe { table.first_key_span_start_unchecked() };
1679
1680        // Note: if find a duplicate we bail out, terminating the parsing with an error.
1681        // Even if we did end up re-inserting no issues would come of it.
1682        if table.len() == INDEXED_TABLE_THRESHOLD {
1683            for (i, (key, _)) in table.entries().iter().enumerate() {
1684                // Wish I could use insert_unique here but that would require
1685                // pulling in hashbrown :(
1686                self.index.insert(KeyRef::new(key.as_str(), table_id), i);
1687            }
1688        }
1689
1690        match self.index.entry(KeyRef::new(key.as_str(), table_id)) {
1691            std::collections::hash_map::Entry::Occupied(occupied_entry) => {
1692                let idx = *occupied_entry.get();
1693                let (existing_key, _) = &table.entries()[idx];
1694                Err(self.set_duplicate_key_error(existing_key.span, key.span))
1695            }
1696            std::collections::hash_map::Entry::Vacant(vacant_entry) => {
1697                vacant_entry.insert(table.len());
1698                table.insert_unique(key, item, self.arena);
1699                Ok(())
1700            }
1701        }
1702    }
1703
1704    /// Look up a key name in a table, returning its entry index.
1705    /// Uses the hash index for tables at or above the threshold, otherwise
1706    /// falls back to a linear scan.
1707    fn indexed_find(&self, table: &InnerTable<'de>, name: &str) -> Option<usize> {
1708        // NOTE: I would return a reference to actual entry here, however this
1709        // runs into all sorts of NLL limitations.
1710        if table.len() > INDEXED_TABLE_THRESHOLD {
1711            // SAFETY: len > INDEXED_TABLE_THRESHOLD (> 6), so the table is non-empty.
1712            let first_key_span = unsafe { table.first_key_span_start_unchecked() };
1713            self.index.get(&KeyRef::new(name, first_key_span)).copied()
1714        } else {
1715            table.find_index(name)
1716        }
1717    }
1718
1719    fn skip_recovery_string(&mut self) {
1720        let delim = self.bytes[self.cursor];
1721        self.cursor += 1;
1722        let multiline = self.peek_byte() == Some(delim) && self.peek_byte_at(1) == Some(delim);
1723        if multiline {
1724            self.cursor += 2;
1725            loop {
1726                match self.peek_byte() {
1727                    None => return,
1728                    Some(b)
1729                        if b == delim
1730                            && self.peek_byte_at(1) == Some(delim)
1731                            && self.peek_byte_at(2) == Some(delim) =>
1732                    {
1733                        self.cursor += 3;
1734                        while self.peek_byte() == Some(delim) {
1735                            self.cursor += 1;
1736                        }
1737                        return;
1738                    }
1739                    Some(b'\\') if delim == b'"' => self.cursor += 2,
1740                    _ => self.cursor += 1,
1741                }
1742            }
1743        }
1744        loop {
1745            match self.peek_byte() {
1746                None | Some(b'\n') => return,
1747                Some(b) if b == delim => {
1748                    self.cursor += 1;
1749                    return;
1750                }
1751                Some(b'\\') if delim == b'"' => self.cursor += 2,
1752                _ => self.cursor += 1,
1753            }
1754        }
1755    }
1756
1757    fn at_statement_start(&self) -> bool {
1758        matches!(self.peek_byte(), None | Some(b'[') | Some(b'#'))
1759            || matches!(self.peek_byte(), Some(b) if is_keylike_byte(b) || b == b'"' || b == b'\'')
1760    }
1761
1762    fn skip_to_next_statement(&mut self) {
1763        loop {
1764            match self.peek_byte() {
1765                None => return,
1766                Some(b'\n') => {
1767                    self.cursor += 1;
1768                    let saved = self.cursor;
1769                    while matches!(self.peek_byte(), Some(b' ' | b'\t')) {
1770                        self.cursor += 1;
1771                    }
1772                    if self.at_statement_start() {
1773                        self.cursor = saved;
1774                        return;
1775                    }
1776                    self.cursor = saved;
1777                }
1778                Some(b'"' | b'\'') => self.skip_recovery_string(),
1779                Some(b'#') => {
1780                    self.cursor += 1;
1781                    while let Some(b) = self.peek_byte() {
1782                        if b == b'\n' {
1783                            break;
1784                        }
1785                        self.cursor += 1;
1786                    }
1787                }
1788                _ => self.cursor += 1,
1789            }
1790        }
1791    }
1792
1793    const MAX_RECOVER_ERRORS: usize = 25;
1794
1795    fn try_recover(&mut self) -> bool {
1796        if !self.recovering {
1797            return false;
1798        }
1799        let error = self.take_error();
1800        self.errors.push(error);
1801        self.path_len = 0;
1802        let at_line_start = self.cursor == 0 || self.bytes.get(self.cursor - 1) == Some(&b'\n');
1803        if at_line_start && self.at_statement_start() {
1804            return self.errors.len() < Self::MAX_RECOVER_ERRORS;
1805        }
1806        let _before = self.cursor;
1807        self.skip_to_next_statement();
1808        debug_assert!(
1809            self.cursor > _before || self.cursor >= self.bytes.len(),
1810            "skip_to_next_statement did not advance cursor from {_before}",
1811        );
1812        self.errors.len() < Self::MAX_RECOVER_ERRORS
1813    }
1814
1815    fn parse_document(&mut self, root_st: &mut Table<'de>) -> Result<(), Failed> {
1816        let mut ctx = Ctx {
1817            table: root_st,
1818            array_end_span: None,
1819        };
1820
1821        #[cfg(debug_assertions)]
1822        let mut _prev_loop_cursor = usize::MAX;
1823
1824        loop {
1825            #[cfg(debug_assertions)]
1826            if self.recovering {
1827                debug_assert!(
1828                    self.cursor != _prev_loop_cursor || self.peek_byte().is_none(),
1829                    "parse_document recovery loop stalled at cursor {}",
1830                    self.cursor,
1831                );
1832                _prev_loop_cursor = self.cursor;
1833            }
1834
1835            self.eat_whitespace();
1836            match self.eat_comment() {
1837                Ok(true) => continue,
1838                Ok(false) => {}
1839                Err(_) => {
1840                    if !self.try_recover() {
1841                        return Err(Failed);
1842                    }
1843                    continue;
1844                }
1845            }
1846            if self.eat_newline() {
1847                continue;
1848            }
1849
1850            match self.peek_byte() {
1851                None => break,
1852                Some(b'[') => {
1853                    ctx = match self.process_table_header(root_st) {
1854                        Ok(c) => c,
1855                        Err(_) => {
1856                            if !self.try_recover() {
1857                                return Err(Failed);
1858                            }
1859                            Ctx {
1860                                table: root_st,
1861                                array_end_span: None,
1862                            }
1863                        }
1864                    };
1865                }
1866                Some(b'\r') => {
1867                    self.set_error(self.cursor, None, ErrorKind::Unexpected('\r'));
1868                    if !self.try_recover() {
1869                        return Err(Failed);
1870                    }
1871                    continue;
1872                }
1873                Some(_) => {
1874                    if self.process_key_value(&mut ctx).is_err() {
1875                        if !self.try_recover() {
1876                            return Err(Failed);
1877                        }
1878                    }
1879                }
1880            }
1881        }
1882        Ok(())
1883    }
1884
1885    fn process_table_header<'b>(
1886        &mut self,
1887        root_st: &'b mut Table<'de>,
1888    ) -> Result<Ctx<'b, 'de>, Failed> {
1889        self.path_len = 0;
1890        let header_start = self.cursor as u32;
1891        if let Err(e) = self.expect_byte(b'[') {
1892            return Err(e);
1893        }
1894        let is_array = self.eat_byte(b'[');
1895
1896        let mut current = root_st;
1897
1898        self.eat_whitespace();
1899        let mut key = match self.read_table_key() {
1900            Ok(k) => k,
1901            Err(e) => return Err(e),
1902        };
1903        loop {
1904            if self.eat_whitespace_to() == Some(b'.') {
1905                self.cursor += 1;
1906                self.eat_whitespace();
1907                self.push_path(PathComponent::Key(key));
1908                current = match self.navigate_header_intermediate(current, key) {
1909                    Ok(p) => p,
1910                    Err(e) => return Err(e),
1911                };
1912                key = match self.read_table_key() {
1913                    Ok(k) => k,
1914                    Err(e) => return Err(e),
1915                };
1916            } else {
1917                break;
1918            }
1919        }
1920        if let Err(e) = self.expect_byte(b']') {
1921            return Err(e);
1922        }
1923        if is_array && let Err(e) = self.expect_byte(b']') {
1924            return Err(e);
1925        }
1926
1927        self.eat_whitespace();
1928        match self.eat_comment() {
1929            Ok(true) => {}
1930            Ok(false) => {
1931                if let Err(e) = self.eat_newline_or_eof() {
1932                    return Err(e);
1933                }
1934            }
1935            Err(e) => return Err(e),
1936        }
1937        let header_end = self.cursor as u32;
1938
1939        self.push_path(PathComponent::Key(key));
1940        if is_array {
1941            self.navigate_header_array_final(current, key, header_start, header_end)
1942        } else {
1943            self.navigate_header_table_final(current, key, header_start, header_end)
1944        }
1945    }
1946
1947    fn process_key_value(&mut self, ctx: &mut Ctx<'_, 'de>) -> Result<(), Failed> {
1948        let saved_path_len = self.path_len;
1949        let line_start = self.cursor as u32;
1950        // Borrow the Table payload from the Table. NLL drops this
1951        // borrow at its last use (the insert_value call), freeing ctx.st
1952        // for the span updates that follow.
1953        let mut table_ref: &mut InnerTable<'de> = &mut ctx.table.value;
1954
1955        let mut key = match self.read_table_key() {
1956            Ok(k) => k,
1957            Err(e) => return Err(e),
1958        };
1959        self.eat_whitespace();
1960
1961        while self.eat_byte(b'.') {
1962            self.eat_whitespace();
1963            self.push_path(PathComponent::Key(key));
1964            table_ref = match self.navigate_dotted_key(table_ref, key) {
1965                Ok(t) => t,
1966                Err(e) => return Err(e),
1967            };
1968            key = match self.read_table_key() {
1969                Ok(k) => k,
1970                Err(e) => return Err(e),
1971            };
1972            self.eat_whitespace();
1973        }
1974
1975        self.push_path(PathComponent::Key(key));
1976
1977        if let Err(e) = self.expect_byte(b'=') {
1978            return Err(e);
1979        }
1980        self.eat_whitespace();
1981        let val = match self.value(MAX_RECURSION_DEPTH) {
1982            Ok(v) => v,
1983            Err(e) => return Err(e),
1984        };
1985        let line_end = self.cursor as u32;
1986
1987        self.eat_whitespace();
1988        match self.eat_comment() {
1989            Ok(true) => {}
1990            Ok(false) => {
1991                if let Err(e) = self.eat_newline_or_eof() {
1992                    return Err(e);
1993                }
1994            }
1995            Err(e) => return Err(e),
1996        }
1997
1998        if let Err(e) = self.insert_value(table_ref, key, val) {
1999            return Err(e);
2000        }
2001
2002        self.path_len = saved_path_len;
2003
2004        let start = ctx.table.span_start();
2005        ctx.table.set_span_start(start.min(line_start));
2006        ctx.table.extend_span_end(line_end);
2007
2008        if let Some(end_flag) = &mut ctx.array_end_span {
2009            let old = **end_flag;
2010            let current = old >> item::FLAG_SHIFT;
2011            **end_flag = (current.max(line_end) << item::FLAG_SHIFT) | (old & item::FLAG_MASK);
2012        }
2013
2014        Ok(())
2015    }
2016}
2017
2018/// The result of parsing a TOML document.
2019///
2020/// Wraps the parsed [`Table`] tree and a [`Context`](crate::Context) that
2021/// accumulates errors.
2022///
2023/// Access values via index operators (`doc["key"]`) which return
2024/// [`MaybeItem`](crate::MaybeItem), or use [`table_helper`](Self::table_helper) and
2025/// [`to`](Self::to) for typed conversion.
2026///
2027/// # Examples
2028///
2029/// ```
2030/// let arena = toml_spanner::Arena::new();
2031/// let doc = toml_spanner::parse("name = 'world'", &arena).unwrap();
2032/// assert_eq!(doc["name"].as_str(), Some("world"));
2033/// ```
2034pub struct Document<'de> {
2035    pub(crate) table: Table<'de>,
2036    #[cfg(feature = "from-toml")]
2037    pub ctx: crate::de::Context<'de>,
2038}
2039
2040impl<'de> Document<'de> {
2041    /// Consumes the document and returns the underlying [`Table`].
2042    pub fn into_table(self) -> Table<'de> {
2043        self.table
2044    }
2045
2046    /// Converts the root table into an [`Item`] with the same span and payload.
2047    pub fn into_item(self) -> Item<'de> {
2048        self.table.into_item()
2049    }
2050
2051    /// Returns a shared reference to the root table.
2052    pub fn table(&self) -> &Table<'de> {
2053        &self.table
2054    }
2055
2056    /// Returns disjoint borrows of the [`Context`](crate::Context) and the
2057    /// root [`Table`].
2058    ///
2059    /// Useful when passing the context into
2060    /// [`TableHelper::new`](crate::TableHelper::new) while still holding
2061    /// a reference to the table.
2062    #[cfg(feature = "from-toml")]
2063    pub fn split(&mut self) -> (&mut crate::de::Context<'de>, &Table<'de>) {
2064        (&mut self.ctx, &self.table)
2065    }
2066
2067    /// Returns the parser's hash index for O(1) key lookups in large tables.
2068    ///
2069    /// Used internally by [`reproject`](crate::reproject).
2070    #[cfg(feature = "to-toml")]
2071    pub(crate) fn table_index(&self) -> &crate::item::table::TableIndex<'de> {
2072        // `to-toml` implies `from-toml`, so ctx is always available here.
2073        &self.ctx.index
2074    }
2075
2076    /// Detects the indent style from parsed item spans.
2077    ///
2078    /// Finds the first array element or inline table entry on its own
2079    /// line and measures the preceding whitespace.
2080    #[cfg(feature = "to-toml")]
2081    pub(crate) fn detect_indent(&self) -> crate::emit::Indent {
2082        let src = self.ctx.source().as_bytes();
2083        if let Some(indent) = detect_indent_in_table(&self.table, src) {
2084            return indent;
2085        }
2086        crate::emit::Indent::default()
2087    }
2088}
2089
2090#[cfg(feature = "from-toml")]
2091impl<'de> Document<'de> {
2092    /// Creates a [`TableHelper`] for the root table.
2093    ///
2094    /// Typical entry point for typed extraction. Extract fields with
2095    /// [`TableHelper::required`](crate::TableHelper::required) and
2096    /// [`TableHelper::optional`](crate::TableHelper::optional), then call
2097    /// [`TableHelper::require_empty`](crate::TableHelper::require_empty) to
2098    /// reject unknown keys.
2099    #[doc(alias = "helper")]
2100    pub fn table_helper<'ctx>(&'ctx mut self) -> TableHelper<'ctx, 'ctx, 'de> {
2101        TableHelper::new(&mut self.ctx, &self.table)
2102    }
2103
2104    /// Converts the root table into a typed value `T` via [`FromToml`](crate::FromToml).
2105    ///
2106    /// # Errors
2107    /// Returns [`FromTomlError`](crate::FromTomlError) containing all accumulated errors.
2108    #[doc(alias = "deserialize")]
2109    #[doc(alias = "from_toml")]
2110    pub fn to<T: crate::FromToml<'de>>(&mut self) -> Result<T, crate::FromTomlError> {
2111        let result = T::from_toml(&mut self.ctx, self.table.as_item());
2112        crate::de::compute_paths(&self.table, &mut self.ctx.errors);
2113        match result {
2114            Ok(v) if self.ctx.errors.is_empty() => Ok(v),
2115            _ => Err(crate::de::FromTomlError {
2116                errors: std::mem::take(&mut self.ctx.errors),
2117            }),
2118        }
2119    }
2120
2121    /// Converts the root table into a typed value `T` via [`FromToml`](crate::FromToml).
2122    /// returning non-fatal errors alongside.
2123    ///
2124    /// # Errors
2125    /// Returns [`FromTomlError`](crate::FromTomlError) containing all accumulated errors.
2126    pub fn to_allowing_errors<T>(
2127        &mut self,
2128    ) -> Result<(T, crate::de::FromTomlError), crate::de::FromTomlError>
2129    where
2130        T: crate::de::FromToml<'de>,
2131    {
2132        let result = T::from_toml(&mut self.ctx, self.table.as_item());
2133        crate::de::compute_paths(&self.table, &mut self.ctx.errors);
2134        let errors = crate::de::FromTomlError {
2135            errors: std::mem::take(&mut self.ctx.errors),
2136        };
2137        match result {
2138            Ok(v) => Ok((v, errors)),
2139            _ => Err(errors),
2140        }
2141    }
2142
2143    /// Returns the accumulated errors.
2144    pub fn errors(&self) -> &[Error] {
2145        &self.ctx.errors
2146    }
2147
2148    /// Returns `true` if any errors have been recorded.
2149    pub fn has_errors(&self) -> bool {
2150        !self.ctx.errors.is_empty()
2151    }
2152}
2153
2154impl<'de> std::ops::Index<&str> for Document<'de> {
2155    type Output = MaybeItem<'de>;
2156
2157    fn index(&self, key: &str) -> &Self::Output {
2158        &self.table[key]
2159    }
2160}
2161
2162impl std::fmt::Debug for Document<'_> {
2163    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
2164        self.table.fmt(f)
2165    }
2166}
2167
2168/// Parses a TOML document and returns a [`Document`] containing the parsed tree.
2169///
2170/// Both `s` and `arena` must outlive the returned [`Document`] because parsed
2171/// values borrow from the input string and allocate escaped strings into the
2172/// arena.
2173///
2174/// # Errors
2175///
2176/// Returns an [`Error`] on the first syntax error encountered.
2177///
2178/// # Examples
2179///
2180/// ```
2181/// let arena = toml_spanner::Arena::new();
2182/// let doc = toml_spanner::parse("key = 'value'", &arena).unwrap();
2183/// assert_eq!(doc["key"].as_str(), Some("value"));
2184/// ```
2185#[inline(never)]
2186pub fn parse<'de>(document: &'de str, arena: &'de Arena) -> Result<Document<'de>, Error> {
2187    // Tag bits use the low 3 bits of start_and_tag, limiting span.start to
2188    // 28 bits (256 MiB). The flag state uses the low 3 bits of end_and_flag,
2189    // and bit 31 is the variant discriminator, limiting span.end to 28 bits
2190    // (256 MiB).
2191    const MAX_SIZE: usize = (1u32 << 28) as usize;
2192
2193    if document.len() >= MAX_SIZE {
2194        return Err(Error::new(ErrorKind::FileTooLarge, Span::new(0, 0)));
2195    }
2196
2197    let mut root_st = Table::new_spanned(Span::new(0, document.len() as u32));
2198    let mut parser = Parser::new(document, arena);
2199    match parser.parse_document(&mut root_st) {
2200        Ok(()) => {}
2201        Err(_) => return Err(parser.take_error()),
2202    }
2203    // Note that root is about the drop (but doesn't implement drop), so we can take
2204    // ownership of this table.
2205    Ok(Document {
2206        table: root_st,
2207        #[cfg(feature = "from-toml")]
2208        ctx: crate::de::Context {
2209            errors: Vec::new(),
2210            index: parser.index,
2211            arena,
2212            source: document,
2213        },
2214    })
2215}
2216
2217/// Parses a TOML document in recovery mode, accumulating errors instead of
2218/// stopping on the first one.
2219///
2220/// Always returns a [`Document`] (never `Err`). Syntax errors are collected
2221/// into the document's [`Context::errors`](crate::Context) alongside any
2222/// later deserialization errors. Valid portions of the input are still
2223/// parsed into the tree.
2224///
2225/// Recovery is line-based: when a statement fails, the parser skips to the
2226/// next line and continues. At most 25 errors are collected before parsing
2227/// stops.
2228///
2229/// # Examples
2230///
2231/// ```
2232/// let arena = toml_spanner::Arena::new();
2233/// let mut doc = toml_spanner::parse_recoverable("key = 'value'\nbad =\n", &arena);
2234/// assert_eq!(doc["key"].as_str(), Some("value"));
2235/// assert!(!doc.errors().is_empty());
2236/// ```
2237#[cfg(feature = "from-toml")]
2238pub fn parse_recoverable<'de>(document: &'de str, arena: &'de Arena) -> Document<'de> {
2239    const MAX_SIZE: usize = (1u32 << 28) as usize;
2240    let mut parser = Parser::new(document, arena);
2241    parser.recovering = true;
2242
2243    if document.len() >= MAX_SIZE {
2244        parser
2245            .errors
2246            .push(Error::new(ErrorKind::FileTooLarge, Span::new(0, 0)));
2247        return Document {
2248            table: Table::new_spanned(Span::new(0, 0)),
2249            ctx: crate::de::Context {
2250                errors: parser.errors,
2251                index: parser.index,
2252                arena,
2253                source: document,
2254            },
2255        };
2256    }
2257
2258    let mut root_st = Table::new_spanned(Span::new(0, document.len() as u32));
2259    let failed = parser.parse_document(&mut root_st).is_err();
2260
2261    if failed {
2262        if let Some(kind) = parser.error_kind.take() {
2263            parser.errors.push(Error::new_with_path(
2264                kind,
2265                parser.error_span,
2266                parser.build_error_path(),
2267            ));
2268        }
2269    }
2270
2271    Document {
2272        table: root_st,
2273        ctx: crate::de::Context {
2274            errors: parser.errors,
2275            index: parser.index,
2276            arena,
2277            source: document,
2278        },
2279    }
2280}
2281
2282#[inline]
2283fn is_keylike_byte(b: u8) -> bool {
2284    b.is_ascii_alphanumeric() || b == b'-' || b == b'_'
2285}
2286
2287fn byte_describe(b: u8) -> &'static &'static str {
2288    match b {
2289        b'\n' => &"a newline",
2290        b' ' | b'\t' => &"whitespace",
2291        b'=' => &"an equals",
2292        b'.' => &"a period",
2293        b',' => &"a comma",
2294        b':' => &"a colon",
2295        b'+' => &"a plus",
2296        b'{' => &"a left brace",
2297        b'}' => &"a right brace",
2298        b'[' => &"a left bracket",
2299        b']' => &"a right bracket",
2300        b'\'' | b'"' => &"a string",
2301        _ if is_keylike_byte(b) => &"an identifier",
2302        _ => &"a character",
2303    }
2304}
2305
2306#[cfg(feature = "to-toml")]
2307fn detect_indent_in_table(table: &Table<'_>, src: &[u8]) -> Option<crate::emit::Indent> {
2308    use crate::item::{ArrayStyle, TableStyle, Value};
2309    for (_, item) in table {
2310        match item.value() {
2311            Value::Array(arr) => {
2312                if arr.style() == ArrayStyle::Inline {
2313                    for elem in arr {
2314                        let span = elem.span();
2315                        if !span.is_empty() {
2316                            if let Some(indent) = indent_from_span(src, span.start as usize) {
2317                                return Some(indent);
2318                            }
2319                        }
2320                    }
2321                }
2322                for elem in arr {
2323                    if let Some(sub) = elem.as_table() {
2324                        if let Some(indent) = detect_indent_in_table(sub, src) {
2325                            return Some(indent);
2326                        }
2327                    }
2328                }
2329            }
2330            Value::Table(sub) => {
2331                if sub.style() == TableStyle::Inline {
2332                    for (key, _) in sub {
2333                        if !key.span.is_empty() {
2334                            if let Some(indent) = indent_from_span(src, key.span.start as usize) {
2335                                return Some(indent);
2336                            }
2337                        }
2338                    }
2339                }
2340                if let Some(indent) = detect_indent_in_table(sub, src) {
2341                    return Some(indent);
2342                }
2343            }
2344            _ => (),
2345        }
2346    }
2347    None
2348}
2349
2350#[cfg(feature = "to-toml")]
2351fn indent_from_span(src: &[u8], pos: usize) -> Option<crate::emit::Indent> {
2352    let mut i = pos;
2353    if i >= src.len() {
2354        return None;
2355    }
2356    while i > 0 {
2357        i -= 1;
2358        match src[i] {
2359            b' ' => continue,
2360            b'\t' => return Some(crate::emit::Indent::Tab),
2361            b'\n' => {
2362                let spaces = (pos - i - 1) as u8;
2363                if spaces > 0 {
2364                    return Some(crate::emit::Indent::Spaces(if spaces > 8 {
2365                        8
2366                    } else {
2367                        spaces
2368                    }));
2369                }
2370                return None;
2371            }
2372            _ => return None,
2373        }
2374    }
2375    None
2376}