Skip to main content

toml_spanner/
parser.rs

1// Deliberately avoid `?` operator throughout this module for compile-time
2// performance: explicit match/if-let prevents the compiler from generating
3// From::from conversion and drop-glue machinery at every call site.
4#![allow(clippy::question_mark)]
5#![allow(unsafe_code)]
6
7#[cfg(test)]
8#[path = "./parser_tests.rs"]
9mod tests;
10
11use crate::{
12    Span,
13    arena::Arena,
14    error::{Error, ErrorKind},
15    str::Str,
16    table::{InnerTable, Table},
17    value::{self, Item, Key},
18};
19use std::hash::{Hash, Hasher};
20use std::ptr::NonNull;
21use std::{char, collections::HashMap};
22
23const MAX_RECURSION_DEPTH: i16 = 256;
24// When a method returns Err(ParseError), the full error details have already
25// been written into Parser::error_kind / Parser::error_span.
26#[derive(Copy, Clone)]
27struct ParseError;
28
29struct Ctx<'b, 'de> {
30    /// The current table context — a `Table` view into a table `Value`.
31    /// Gives direct mutable access to both the span fields and the `Table` payload.
32    table: &'b mut Table<'de>,
33    /// If this table is an entry in an array-of-tables, a disjoint borrow of
34    /// the parent array Value'arena `end_and_flag` field so its span can be
35    /// extended alongside the entry.
36    array_end_span: Option<&'b mut u32>,
37}
38
39/// Tables with at least this many entries use the hash index for lookups.
40const INDEXED_TABLE_THRESHOLD: usize = 7;
41
42const fn build_hex_table() -> [i8; 256] {
43    let mut table = [-1i8; 256];
44    let mut ch = 0usize;
45    while ch < 256 {
46        table[ch] = match ch as u8 {
47            b'0'..=b'9' => (ch as u8 - b'0') as i8,
48            b'A'..=b'F' => (ch as u8 - b'A' + 10) as i8,
49            b'a'..=b'f' => (ch as u8 - b'a' + 10) as i8,
50            _ => -1,
51        };
52        ch += 1;
53    }
54    table
55}
56
57static HEX: [i8; 256] = build_hex_table();
58
59/// Hash-map key that identifies a (table, key-name) pair without owning the
60/// string data.  The raw `key_ptr`/`len` point into either the input buffer
61/// or the arena; both are stable for the lifetime of the parse.
62/// `first_key_span` is the `span.start()` of the **first** key ever inserted
63/// into the table and serves as a cheap, collision-free table discriminator.
64struct KeyIndex<'de> {
65    key_ptr: NonNull<u8>,
66    len: u32,
67    first_key_span: u32,
68    marker: std::marker::PhantomData<&'de str>,
69}
70
71impl<'de> KeyIndex<'de> {
72    #[inline]
73    fn new(key: &'de str, first_key_span: u32) -> Self {
74        KeyIndex {
75            key_ptr: unsafe { NonNull::new_unchecked(key.as_ptr() as *mut u8) },
76            len: key.len() as u32,
77            first_key_span,
78            marker: std::marker::PhantomData,
79        }
80    }
81}
82
83impl<'de> KeyIndex<'de> {
84    #[inline]
85    fn as_str(&self) -> &'de str {
86        unsafe {
87            std::str::from_utf8_unchecked(std::slice::from_raw_parts(
88                self.key_ptr.as_ptr(),
89                self.len as usize,
90            ))
91        }
92    }
93}
94
95impl<'de> Hash for KeyIndex<'de> {
96    #[inline]
97    fn hash<H: Hasher>(&self, state: &mut H) {
98        self.as_str().hash(state);
99        self.first_key_span.hash(state);
100    }
101}
102
103impl<'de> PartialEq for KeyIndex<'de> {
104    #[inline]
105    fn eq(&self, other: &Self) -> bool {
106        self.first_key_span == other.first_key_span && self.as_str() == other.as_str()
107    }
108}
109
110impl<'de> Eq for KeyIndex<'de> {}
111
112struct Parser<'de> {
113    /// Raw bytes of the input. Always valid UTF-8 (derived from `&str`).
114    bytes: &'de [u8],
115    cursor: usize,
116    arena: &'de Arena,
117
118    // Error context -- populated just before returning ParseError
119    error_span: Span,
120    error_kind: Option<ErrorKind>,
121
122    // Global key-index for O(1) lookups in large tables.
123    // Maps (table-discriminator, key-name) → entry index in the table.
124    table_index: foldhash::HashMap<KeyIndex<'de>, usize>,
125}
126
127#[allow(unsafe_code)]
128impl<'de> Parser<'de> {
129    fn new(input: &'de str, arena: &'de Arena) -> Self {
130        let bytes = input.as_bytes();
131        // Skip UTF-8 BOM (U+FEFF = EF BB BF) if present at the start.
132        let cursor = if bytes.starts_with(b"\xef\xbb\xbf") {
133            3
134        } else {
135            0
136        };
137        Parser {
138            bytes,
139            cursor,
140            arena,
141            error_span: Span::new(0, 0),
142            error_kind: None,
143            table_index: HashMap::default(),
144        }
145    }
146
147    /// Get a `&str` slice from the underlying bytes.
148    /// SAFETY: `self.bytes` is always valid UTF-8, and callers must ensure
149    /// `start..end` falls on UTF-8 char boundaries.
150    #[inline]
151    unsafe fn str_slice(&self, start: usize, end: usize) -> &'de str {
152        #[cfg(not(debug_assertions))]
153        unsafe {
154            std::str::from_utf8_unchecked(&self.bytes[start..end])
155        }
156        #[cfg(debug_assertions)]
157        match std::str::from_utf8(&self.bytes[start..end]) {
158            Ok(value) => value,
159            Err(err) => panic!(
160                "Invalid UTF-8 slice: bytes[{}..{}] is not valid UTF-8: {}",
161                start, end, err
162            ),
163        }
164    }
165
166    #[cold]
167    fn set_duplicate_key_error(&mut self, first: Span, second: Span, key: &str) -> ParseError {
168        self.error_span = second;
169        self.error_kind = Some(ErrorKind::DuplicateKey {
170            key: key.into(),
171            first,
172        });
173        ParseError
174    }
175    #[cold]
176    fn set_error(&mut self, start: usize, end: Option<usize>, kind: ErrorKind) -> ParseError {
177        self.error_span = Span::new(start as u32, end.unwrap_or(start + 1) as u32);
178        self.error_kind = Some(kind);
179        ParseError
180    }
181
182    fn take_error(&mut self) -> Error {
183        let kind = self
184            .error_kind
185            .take()
186            .expect("take_error called without error");
187        let span = self.error_span;
188
189        // Black Magic Optimization:
190        // Removing the following introduces 8% performance
191        // regression across the board.
192        {
193            let line_info = Some(self.to_linecol(std::hint::black_box(0)));
194            std::hint::black_box(&line_info);
195        }
196
197        Error { kind, span }
198    }
199
200    pub fn to_linecol(&self, offset: usize) -> (u32, u32) {
201        let mut line_start = 0;
202        let mut line_num = 0;
203        for (i, &b) in self.bytes.iter().enumerate() {
204            if i >= offset {
205                return (line_num as u32, (offset - line_start) as u32);
206            }
207            if b == b'\n' {
208                line_num += 1;
209                line_start = i + 1;
210            }
211        }
212        (line_num as u32, (offset - line_start) as u32)
213    }
214
215    #[inline]
216    fn peek_byte(&self) -> Option<u8> {
217        self.bytes.get(self.cursor).copied()
218    }
219
220    #[inline]
221    fn peek_byte_at(&self, offset: usize) -> Option<u8> {
222        self.bytes.get(self.cursor + offset).copied()
223    }
224
225    #[inline]
226    fn advance(&mut self) {
227        self.cursor += 1;
228    }
229
230    #[inline]
231    fn eat_byte(&mut self, b: u8) -> bool {
232        if self.peek_byte() == Some(b) {
233            self.advance();
234            true
235        } else {
236            false
237        }
238    }
239
240    fn eat_byte_spanned(&mut self, b: u8) -> Option<Span> {
241        if self.peek_byte() == Some(b) {
242            let start = self.cursor;
243            self.advance();
244            Some(Span::new(start as u32, self.cursor as u32))
245        } else {
246            None
247        }
248    }
249
250    fn expect_byte(&mut self, b: u8) -> Result<(), ParseError> {
251        if self.eat_byte(b) {
252            Ok(())
253        } else {
254            let start = self.cursor;
255            let (found_desc, end) = self.scan_token_desc_and_end();
256            Err(self.set_error(
257                start,
258                Some(end),
259                ErrorKind::Wanted {
260                    expected: byte_describe(b),
261                    found: found_desc,
262                },
263            ))
264        }
265    }
266
267    fn expect_byte_spanned(&mut self, b: u8) -> Result<Span, ParseError> {
268        if let Some(span) = self.eat_byte_spanned(b) {
269            Ok(span)
270        } else {
271            let start = self.cursor;
272            let (found_desc, end) = self.scan_token_desc_and_end();
273            Err(self.set_error(
274                start,
275                Some(end),
276                ErrorKind::Wanted {
277                    expected: byte_describe(b),
278                    found: found_desc,
279                },
280            ))
281        }
282    }
283
284    fn eat_whitespace(&mut self) {
285        while let Some(b) = self.peek_byte() {
286            if b == b' ' || b == b'\t' {
287                self.advance();
288            } else {
289                break;
290            }
291        }
292    }
293
294    fn eat_comment(&mut self) -> Result<bool, ParseError> {
295        if !self.eat_byte(b'#') {
296            return Ok(false);
297        }
298        while let Some(0x09 | 0x20..=0x7E | 0x80..) = self.peek_byte() {
299            self.cursor += 1;
300        }
301        self.eat_newline_or_eof().map(|()| true)
302    }
303
304    fn eat_newline_or_eof(&mut self) -> Result<(), ParseError> {
305        match self.peek_byte() {
306            None => Ok(()),
307            Some(b'\n') => {
308                self.advance();
309                Ok(())
310            }
311            Some(b'\r') if self.peek_byte_at(1) == Some(b'\n') => {
312                self.cursor += 2;
313                Ok(())
314            }
315            _ => {
316                let start = self.cursor;
317                let (found_desc, end) = self.scan_token_desc_and_end();
318                Err(self.set_error(
319                    start,
320                    Some(end),
321                    ErrorKind::Wanted {
322                        expected: "newline",
323                        found: found_desc,
324                    },
325                ))
326            }
327        }
328    }
329
330    fn eat_newline(&mut self) -> bool {
331        match self.peek_byte() {
332            Some(b'\n') => {
333                self.advance();
334                true
335            }
336            Some(b'\r') if self.peek_byte_at(1) == Some(b'\n') => {
337                self.cursor += 2;
338                true
339            }
340            _ => false,
341        }
342    }
343
344    /// Scan forward from the current position to determine the description
345    /// and end position of the "token" at the cursor. This provides compatible
346    /// error spans with the old tokenizer.
347    fn scan_token_desc_and_end(&self) -> (&'static str, usize) {
348        match self.peek_byte() {
349            None => ("eof", self.bytes.len()),
350            Some(b'\n' | b'\r') => ("a newline", self.cursor + 1),
351            Some(b' ' | b'\t') => {
352                let mut end = self.cursor + 1;
353                while end < self.bytes.len()
354                    && (self.bytes[end] == b' ' || self.bytes[end] == b'\t')
355                {
356                    end += 1;
357                }
358                ("whitespace", end)
359            }
360            Some(b'#') => ("a comment", self.cursor + 1),
361            Some(b'=') => ("an equals", self.cursor + 1),
362            Some(b'.') => ("a period", self.cursor + 1),
363            Some(b',') => ("a comma", self.cursor + 1),
364            Some(b':') => ("a colon", self.cursor + 1),
365            Some(b'+') => ("a plus", self.cursor + 1),
366            Some(b'{') => ("a left brace", self.cursor + 1),
367            Some(b'}') => ("a right brace", self.cursor + 1),
368            Some(b'[') => ("a left bracket", self.cursor + 1),
369            Some(b']') => ("a right bracket", self.cursor + 1),
370            Some(b'\'' | b'"') => ("a string", self.cursor + 1),
371            Some(b) if is_keylike_byte(b) => {
372                let mut end = self.cursor + 1;
373                while end < self.bytes.len() && is_keylike_byte(self.bytes[end]) {
374                    end += 1;
375                }
376                ("an identifier", end)
377            }
378            Some(_) => ("a character", self.cursor + 1),
379        }
380    }
381
382    fn read_keylike(&mut self) -> &'de str {
383        let start = self.cursor;
384        while let Some(b) = self.peek_byte() {
385            if !is_keylike_byte(b) {
386                break;
387            }
388            self.advance();
389        }
390        // SAFETY: keylike bytes are ASCII, always valid UTF-8 boundaries
391        unsafe { self.str_slice(start, self.cursor) }
392    }
393
394    fn read_table_key(&mut self) -> Result<Key<'de>, ParseError> {
395        match self.peek_byte() {
396            Some(b'"') => {
397                let start = self.cursor;
398                self.advance();
399                let (key, multiline) = match self.read_string(start, b'"') {
400                    Ok(v) => v,
401                    Err(e) => return Err(e),
402                };
403                if multiline {
404                    return Err(self.set_error(
405                        start,
406                        Some(key.span.end as usize),
407                        ErrorKind::MultilineStringKey,
408                    ));
409                }
410                Ok(key)
411            }
412            Some(b'\'') => {
413                let start = self.cursor;
414                self.advance();
415                let (key, multiline) = match self.read_string(start, b'\'') {
416                    Ok(v) => v,
417                    Err(e) => return Err(e),
418                };
419                if multiline {
420                    return Err(self.set_error(
421                        start,
422                        Some(key.span.end as usize),
423                        ErrorKind::MultilineStringKey,
424                    ));
425                }
426                Ok(key)
427            }
428            Some(b) if is_keylike_byte(b) => {
429                let start = self.cursor;
430                let k = self.read_keylike();
431                let span = Span::new(start as u32, self.cursor as u32);
432                Ok(Key {
433                    name: Str::from(k),
434                    span,
435                })
436            }
437            Some(_) => {
438                let start = self.cursor;
439                let (found_desc, end) = self.scan_token_desc_and_end();
440                Err(self.set_error(
441                    start,
442                    Some(end),
443                    ErrorKind::Wanted {
444                        expected: "a table key",
445                        found: found_desc,
446                    },
447                ))
448            }
449            None => Err(self.set_error(
450                self.bytes.len(),
451                None,
452                ErrorKind::Wanted {
453                    expected: "a table key",
454                    found: "eof",
455                },
456            )),
457        }
458    }
459
460    /// Read a basic (double-quoted) string. `start` is the byte offset of the
461    /// opening quote. The cursor should be positioned right after the opening `"`.
462    fn read_string(&mut self, start: usize, delim: u8) -> Result<(Key<'de>, bool), ParseError> {
463        let mut multiline = false;
464        if self.eat_byte(delim) {
465            if self.eat_byte(delim) {
466                multiline = true;
467            } else {
468                return Ok((
469                    Key {
470                        name: Str::from(""),
471                        span: Span::new(start as u32, (start + 1) as u32),
472                    },
473                    false,
474                ));
475            }
476        }
477
478        let mut content_start = self.cursor;
479        if multiline {
480            match self.peek_byte() {
481                Some(b'\n') => {
482                    self.advance();
483                    content_start = self.cursor;
484                }
485                Some(b'\r') if self.peek_byte_at(1) == Some(b'\n') => {
486                    self.cursor += 2;
487                    content_start = self.cursor;
488                }
489                _ => {}
490            }
491        }
492
493        self.read_string_loop(start, content_start, multiline, delim)
494    }
495
496    /// Advance `self.cursor` past bytes that do not require special handling
497    /// inside a string.  Uses SWAR (SIMD-Within-A-Register) to scan 8 bytes
498    /// at a time.
499    ///
500    /// Stops at the first byte that is:
501    ///   * a control character (< 0x20) — tab (0x09) is a benign false positive
502    ///   * DEL (0x7F)
503    ///   * the string delimiter (`"` or `'`)
504    ///   * a backslash (`\`) — benign false positive for literal strings
505    ///   * past the end of input
506    fn skip_string_plain(&mut self, delim: u8) {
507        // Quick bail-out for EOF or an immediately-interesting byte.
508        // Avoids SWAR setup cost for consecutive specials (e.g. \n\n).
509        let Some(&b) = self.bytes.get(self.cursor) else {
510            return;
511        };
512
513        if b == delim || b == b'\\' || b == 0x7F || (b < 0x20 && b != 0x09) {
514            return;
515        }
516        self.cursor += 1;
517
518        let base = self.cursor;
519        let rest = &self.bytes[base..];
520
521        type Chunk = u64;
522        const STEP: usize = std::mem::size_of::<Chunk>();
523        const ONE: Chunk = Chunk::MAX / 255; // 0x0101_0101_0101_0101
524        const HIGH: Chunk = ONE << 7; // 0x8080_8080_8080_8080
525
526        let fill_delim = ONE * Chunk::from(delim);
527        let fill_bslash = ONE * Chunk::from(b'\\');
528        let fill_del = ONE * 0x7F;
529
530        let chunks = rest.chunks_exact(STEP);
531        let remainder_len = chunks.remainder().len();
532
533        for (i, chunk) in chunks.enumerate() {
534            let v = Chunk::from_le_bytes(chunk.try_into().unwrap());
535
536            let has_ctrl = v.wrapping_sub(ONE * 0x20) & !v;
537            let eq_delim = (v ^ fill_delim).wrapping_sub(ONE) & !(v ^ fill_delim);
538            let eq_bslash = (v ^ fill_bslash).wrapping_sub(ONE) & !(v ^ fill_bslash);
539            let eq_del = (v ^ fill_del).wrapping_sub(ONE) & !(v ^ fill_del);
540
541            let masked = (has_ctrl | eq_delim | eq_bslash | eq_del) & HIGH;
542            if masked != 0 {
543                self.cursor = base + i * STEP + masked.trailing_zeros() as usize / 8;
544                return;
545            }
546        }
547
548        self.cursor = self.bytes.len() - remainder_len;
549        self.skip_string_plain_slow(delim);
550    }
551
552    #[cold]
553    #[inline(never)]
554    fn skip_string_plain_slow(&mut self, delim: u8) {
555        while let Some(&b) = self.bytes.get(self.cursor) {
556            if b == delim || b == b'\\' || b == 0x7F || (b < 0x20 && b != 0x09) {
557                return;
558            }
559            self.cursor += 1;
560        }
561    }
562
563    fn read_string_loop(
564        &mut self,
565        start: usize,
566        content_start: usize,
567        multiline: bool,
568        delim: u8,
569    ) -> Result<(Key<'de>, bool), ParseError> {
570        let mut flush_from = content_start;
571        let mut scratch: Option<crate::arena::Scratch<'de>> = None;
572        loop {
573            self.skip_string_plain(delim);
574
575            let i = self.cursor;
576            let Some(&b) = self.bytes.get(i) else {
577                return Err(self.set_error(start, None, ErrorKind::UnterminatedString));
578            };
579            self.cursor = i + 1;
580
581            match b {
582                b'\r' => {
583                    if self.eat_byte(b'\n') {
584                        if !multiline {
585                            return Err(self.set_error(
586                                i,
587                                None,
588                                ErrorKind::InvalidCharInString('\n'),
589                            ));
590                        }
591                    } else {
592                        return Err(self.set_error(i, None, ErrorKind::InvalidCharInString('\r')));
593                    }
594                }
595                b'\n' => {
596                    if !multiline {
597                        return Err(self.set_error(i, None, ErrorKind::InvalidCharInString('\n')));
598                    }
599                }
600                d if d == delim => {
601                    let (span, end) = if multiline {
602                        if !self.eat_byte(delim) {
603                            continue;
604                        }
605                        if !self.eat_byte(delim) {
606                            continue;
607                        }
608                        let mut extra = 0usize;
609                        if self.eat_byte(delim) {
610                            extra += 1;
611                        }
612                        if self.eat_byte(delim) {
613                            extra += 1;
614                        }
615
616                        let maybe_nl = self.bytes[start + 3];
617                        let start_off = if maybe_nl == b'\n' {
618                            4
619                        } else if maybe_nl == b'\r' {
620                            5
621                        } else {
622                            3
623                        };
624
625                        (
626                            Span::new((start + start_off) as u32, (self.cursor - 3) as u32),
627                            i + extra,
628                        )
629                    } else {
630                        (Span::new((start + 1) as u32, (self.cursor - 1) as u32), i)
631                    };
632
633                    let name = if let Some(mut s) = scratch {
634                        s.extend(&self.bytes[flush_from..end]);
635                        let committed = s.commit();
636                        // Safety: scratch contents are valid UTF-8 (built from
637                        // validated input and well-formed escape sequences).
638                        Str::from(unsafe { std::str::from_utf8_unchecked(committed) })
639                    } else {
640                        // Safety: content_start..end is validated UTF-8.
641                        unsafe { Str::from(self.str_slice(content_start, end)) }
642                    };
643                    return Ok((Key { name, span }, multiline));
644                }
645                b'\\' if delim == b'"' => {
646                    let arena = self.arena;
647                    let s = scratch.get_or_insert_with(|| unsafe { arena.scratch() });
648                    s.extend(&self.bytes[flush_from..i]);
649                    if let Err(e) = self.read_basic_escape(s, start, multiline) {
650                        return Err(e);
651                    }
652                    flush_from = self.cursor;
653                }
654                // Tab or backslash-in-literal-string: benign false positives
655                // from the SWAR scan.
656                0x09 | 0x20..=0x7E | 0x80.. => {}
657                _ => {
658                    return Err(self.set_error(i, None, ErrorKind::InvalidCharInString(b as char)));
659                }
660            }
661        }
662    }
663
664    fn read_basic_escape(
665        &mut self,
666        scratch: &mut crate::arena::Scratch<'_>,
667        string_start: usize,
668        multi: bool,
669    ) -> Result<(), ParseError> {
670        let i = self.cursor;
671        let Some(&b) = self.bytes.get(i) else {
672            return Err(self.set_error(string_start, None, ErrorKind::UnterminatedString));
673        };
674        self.cursor = i + 1;
675
676        match b {
677            b'"' => scratch.push(b'"'),
678            b'\\' => scratch.push(b'\\'),
679            b'b' => scratch.push(0x08),
680            b'f' => scratch.push(0x0C),
681            b'n' => scratch.push(b'\n'),
682            b'r' => scratch.push(b'\r'),
683            b't' => scratch.push(b'\t'),
684            b'e' => scratch.push(0x1B),
685            b'u' => {
686                let ch = self.read_hex(4, string_start, i);
687                match ch {
688                    Ok(ch) => {
689                        let mut buf = [0u8; 4];
690                        let len = ch.encode_utf8(&mut buf).len();
691                        scratch.extend(&buf[..len]);
692                    }
693                    Err(e) => return Err(e),
694                }
695            }
696            b'U' => {
697                let ch = self.read_hex(8, string_start, i);
698                match ch {
699                    Ok(ch) => {
700                        let mut buf = [0u8; 4];
701                        let len = ch.encode_utf8(&mut buf).len();
702                        scratch.extend(&buf[..len]);
703                    }
704                    Err(e) => return Err(e),
705                }
706            }
707            b'x' => {
708                let ch = self.read_hex(2, string_start, i);
709                match ch {
710                    Ok(ch) => {
711                        let mut buf = [0u8; 4];
712                        let len = ch.encode_utf8(&mut buf).len();
713                        scratch.extend(&buf[..len]);
714                    }
715                    Err(e) => return Err(e),
716                }
717            }
718            b' ' | b'\t' | b'\n' | b'\r' if multi => {
719                // CRLF folding: \r\n counts as \n
720                let c = if b == b'\r' && self.peek_byte() == Some(b'\n') {
721                    self.advance();
722                    '\n'
723                } else {
724                    b as char
725                };
726                if c != '\n' {
727                    loop {
728                        match self.peek_byte() {
729                            Some(b' ' | b'\t') => {
730                                self.advance();
731                            }
732                            Some(b'\n') => {
733                                self.advance();
734                                break;
735                            }
736                            Some(b'\r') if self.peek_byte_at(1) == Some(b'\n') => {
737                                self.cursor += 2;
738                                break;
739                            }
740                            _ => return Err(self.set_error(i, None, ErrorKind::InvalidEscape(c))),
741                        }
742                    }
743                }
744                loop {
745                    match self.peek_byte() {
746                        Some(b' ' | b'\t' | b'\n') => {
747                            self.advance();
748                        }
749                        Some(b'\r') if self.peek_byte_at(1) == Some(b'\n') => {
750                            self.cursor += 2;
751                        }
752                        _ => break,
753                    }
754                }
755            }
756            _ => {
757                self.cursor -= 1;
758                return Err(self.set_error(
759                    self.cursor,
760                    None,
761                    ErrorKind::InvalidEscape(self.next_char_for_error()),
762                ));
763            }
764        }
765        Ok(())
766    }
767
768    fn next_char_for_error(&self) -> char {
769        // Safety: The input was valid UTF-8 via a &str
770        let text = unsafe { std::str::from_utf8_unchecked(self.bytes) };
771        if let Some(value) = text.get(self.cursor..) {
772            value.chars().next().unwrap_or(char::REPLACEMENT_CHARACTER)
773        } else {
774            char::REPLACEMENT_CHARACTER
775        }
776    }
777
778    fn read_hex(
779        &mut self,
780        n: usize,
781        string_start: usize,
782        escape_start: usize,
783    ) -> Result<char, ParseError> {
784        let mut val: u32 = 0;
785        for _ in 0..n {
786            let Some(&byte) = self.bytes.get(self.cursor) else {
787                return Err(self.set_error(string_start, None, ErrorKind::UnterminatedString));
788            };
789            let digit = HEX[byte as usize];
790            if digit >= 0 {
791                val = (val << 4) | digit as u32;
792                self.cursor += 1;
793            } else {
794                return Err(self.set_error(
795                    self.cursor,
796                    None,
797                    ErrorKind::InvalidHexEscape(self.next_char_for_error()),
798                ));
799            }
800        }
801        match char::from_u32(val) {
802            Some(ch) => Ok(ch),
803            None => Err(self.set_error(
804                escape_start,
805                Some(escape_start + n),
806                ErrorKind::InvalidEscapeValue(val),
807            )),
808        }
809    }
810
811    fn number(&mut self, start: u32, end: u32, s: &'de str) -> Result<Item<'de>, ParseError> {
812        let bytes = s.as_bytes();
813
814        // Base-prefixed integers (0x, 0o, 0b).
815        // TOML forbids signs on these, so only match when first byte is '0'.
816        if let [b'0', format, rest @ ..] = s.as_bytes() {
817            match format {
818                b'x' => return self.integer_hex(rest, Span::new(start, end)),
819                b'o' => return self.integer_octal(rest, Span::new(start, end)),
820                b'b' => return self.integer_binary(rest, Span::new(start, end)),
821                _ => {}
822            }
823        }
824
825        if self.eat_byte(b'.') {
826            let at = self.cursor;
827            return match self.peek_byte() {
828                Some(b) if is_keylike_byte(b) => {
829                    let after = self.read_keylike();
830                    match self.float(start, end, s, Some(after)) {
831                        Ok(f) => Ok(Item::float(f, Span::new(start, self.cursor as u32))),
832                        Err(e) => Err(e),
833                    }
834                }
835                _ => Err(self.set_error(at, Some(end as usize), ErrorKind::InvalidNumber)),
836            };
837        }
838
839        // Special float literals (inf, nan and signed variants).
840        // Guard behind first-significant-byte check to skip string
841        // comparisons for the common digit-only case.
842        let off = usize::from(bytes.first() == Some(&b'-'));
843        if let Some(&b'i' | &b'n') = bytes.get(off) {
844            return match s {
845                "inf" => Ok(Item::float(f64::INFINITY, Span::new(start, end))),
846                "-inf" => Ok(Item::float(f64::NEG_INFINITY, Span::new(start, end))),
847                "nan" => Ok(Item::float(f64::NAN.copysign(1.0), Span::new(start, end))),
848                "-nan" => Ok(Item::float(f64::NAN.copysign(-1.0), Span::new(start, end))),
849                _ => Err(self.set_error(
850                    start as usize,
851                    Some(end as usize),
852                    ErrorKind::InvalidNumber,
853                )),
854            };
855        }
856
857        if let Ok(v) = self.integer_decimal(bytes, Span::new(start, end)) {
858            return Ok(v);
859        }
860
861        if bytes.iter().any(|&b| b == b'e' || b == b'E') {
862            return match self.float(start, end, s, None) {
863                Ok(f) => Ok(Item::float(f, Span::new(start, self.cursor as u32))),
864                Err(e) => Err(e),
865            };
866        }
867
868        Err(ParseError)
869    }
870
871    fn number_leading_plus(&mut self, plus_start: u32) -> Result<Item<'de>, ParseError> {
872        match self.peek_byte() {
873            Some(b'0'..=b'9' | b'i' | b'n') => {
874                let s = self.read_keylike();
875                let end = self.cursor as u32;
876                // TOML forbids signs on base-prefixed integers.
877                if let [b'0', b'x' | b'o' | b'b', ..] = s.as_bytes() {
878                    return Err(self.set_error(
879                        plus_start as usize,
880                        Some(end as usize),
881                        ErrorKind::InvalidNumber,
882                    ));
883                }
884                self.number(plus_start, end, s)
885            }
886            _ => Err(self.set_error(
887                plus_start as usize,
888                Some(self.cursor),
889                ErrorKind::InvalidNumber,
890            )),
891        }
892    }
893
894    fn integer_decimal(&mut self, bytes: &'de [u8], span: Span) -> Result<Item<'de>, ParseError> {
895        let mut acc: u64 = 0;
896        let mut prev_underscore = false;
897        let mut has_digit = false;
898        let mut leading_zero = false;
899        'error: {
900            let (negative, digits) = match bytes.first() {
901                Some(&b'+') => (false, &bytes[1..]),
902                Some(&b'-') => (true, &bytes[1..]),
903                _ => (false, bytes),
904            };
905
906            if digits.is_empty() {
907                break 'error;
908            }
909
910            for &b in digits {
911                if b == b'_' {
912                    if !has_digit || prev_underscore {
913                        break 'error;
914                    }
915                    prev_underscore = true;
916                    continue;
917                }
918                if !b.is_ascii_digit() {
919                    break 'error;
920                }
921                if leading_zero {
922                    break 'error;
923                }
924                if !has_digit && b == b'0' {
925                    leading_zero = true;
926                }
927                has_digit = true;
928                prev_underscore = false;
929                let digit = (b - b'0') as u64;
930                acc = match acc.checked_mul(10).and_then(|a| a.checked_add(digit)) {
931                    Some(v) => v,
932                    None => break 'error,
933                };
934            }
935
936            if !has_digit || prev_underscore {
937                break 'error;
938            }
939
940            let max = if negative {
941                (i64::MAX as u64) + 1
942            } else {
943                i64::MAX as u64
944            };
945            if acc > max {
946                break 'error;
947            }
948
949            let val = if negative {
950                (acc as i64).wrapping_neg()
951            } else {
952                acc as i64
953            };
954            return Ok(Item::integer(val, span));
955        }
956        self.error_span = span;
957        self.error_kind = Some(ErrorKind::InvalidNumber);
958        Err(ParseError)
959    }
960
961    fn integer_hex(&mut self, bytes: &'de [u8], span: Span) -> Result<Item<'de>, ParseError> {
962        let mut acc: u64 = 0;
963        let mut prev_underscore = false;
964        let mut has_digit = false;
965        'error: {
966            if bytes.is_empty() {
967                break 'error;
968            }
969
970            for &b in bytes {
971                if b == b'_' {
972                    if !has_digit || prev_underscore {
973                        break 'error;
974                    }
975                    prev_underscore = true;
976                    continue;
977                }
978                let digit = HEX[b as usize];
979                if digit < 0 {
980                    break 'error;
981                }
982                has_digit = true;
983                prev_underscore = false;
984                if acc >> 60 != 0 {
985                    break 'error;
986                }
987                acc = (acc << 4) | digit as u64;
988            }
989
990            if !has_digit || prev_underscore {
991                break 'error;
992            }
993
994            if acc > i64::MAX as u64 {
995                break 'error;
996            }
997            return Ok(Item::integer(acc as i64, span));
998        }
999        self.error_span = span;
1000        self.error_kind = Some(ErrorKind::InvalidNumber);
1001        Err(ParseError)
1002    }
1003
1004    fn integer_octal(&mut self, bytes: &'de [u8], span: Span) -> Result<Item<'de>, ParseError> {
1005        let mut acc: u64 = 0;
1006        let mut prev_underscore = false;
1007        let mut has_digit = false;
1008        'error: {
1009            if bytes.is_empty() {
1010                break 'error;
1011            }
1012
1013            for &b in bytes {
1014                if b == b'_' {
1015                    if !has_digit || prev_underscore {
1016                        break 'error;
1017                    }
1018                    prev_underscore = true;
1019                    continue;
1020                }
1021                if !b.is_ascii_digit() || b > b'7' {
1022                    break 'error;
1023                }
1024                has_digit = true;
1025                prev_underscore = false;
1026                if acc >> 61 != 0 {
1027                    break 'error;
1028                }
1029                acc = (acc << 3) | (b - b'0') as u64;
1030            }
1031
1032            if !has_digit || prev_underscore {
1033                break 'error;
1034            }
1035
1036            if acc > i64::MAX as u64 {
1037                break 'error;
1038            }
1039            return Ok(Item::integer(acc as i64, span));
1040        }
1041        self.error_span = span;
1042        self.error_kind = Some(ErrorKind::InvalidNumber);
1043        Err(ParseError)
1044    }
1045
1046    fn integer_binary(&mut self, bytes: &'de [u8], span: Span) -> Result<Item<'de>, ParseError> {
1047        let mut acc: u64 = 0;
1048        let mut prev_underscore = false;
1049        let mut has_digit = false;
1050        'error: {
1051            if bytes.is_empty() {
1052                break 'error;
1053            }
1054
1055            for &b in bytes {
1056                if b == b'_' {
1057                    if !has_digit || prev_underscore {
1058                        break 'error;
1059                    }
1060                    prev_underscore = true;
1061                    continue;
1062                }
1063                if b != b'0' && b != b'1' {
1064                    break 'error;
1065                }
1066                has_digit = true;
1067                prev_underscore = false;
1068                if acc >> 63 != 0 {
1069                    break 'error;
1070                }
1071                acc = (acc << 1) | (b - b'0') as u64;
1072            }
1073
1074            if !has_digit || prev_underscore {
1075                break 'error;
1076            }
1077
1078            if acc > i64::MAX as u64 {
1079                break 'error;
1080            }
1081            return Ok(Item::integer(acc as i64, span));
1082        }
1083        self.error_span = span;
1084        self.error_kind = Some(ErrorKind::InvalidNumber);
1085        Err(ParseError)
1086    }
1087
1088    fn float(
1089        &mut self,
1090        start: u32,
1091        end: u32,
1092        s: &'de str,
1093        after_decimal: Option<&'de str>,
1094    ) -> Result<f64, ParseError> {
1095        let s_start = start as usize;
1096        let s_end = end as usize;
1097
1098        // TOML forbids leading zeros in the integer part (e.g. 00.5, -01.0).
1099        let unsigned = if s.as_bytes().first() == Some(&b'-') {
1100            &s[1..]
1101        } else {
1102            s
1103        };
1104        if let [b'0', b'0'..=b'9' | b'_', ..] = unsigned.as_bytes() {
1105            return Err(self.set_error(s_start, Some(s_end), ErrorKind::InvalidNumber));
1106        }
1107
1108        // Safety: no other Scratch or arena.alloc() is active during float parsing.
1109        let mut scratch = unsafe { self.arena.scratch() };
1110
1111        if !scratch.push_strip_underscores(s.as_bytes()) {
1112            return Err(self.set_error(s_start, Some(s_end), ErrorKind::InvalidNumber));
1113        }
1114
1115        let mut last = s;
1116
1117        if let Some(after) = after_decimal {
1118            if !matches!(after.as_bytes().first(), Some(b'0'..=b'9')) {
1119                return Err(self.set_error(s_start, Some(s_end), ErrorKind::InvalidNumber));
1120            }
1121            scratch.push(b'.');
1122            if !scratch.push_strip_underscores(after.as_bytes()) {
1123                return Err(self.set_error(s_start, Some(s_end), ErrorKind::InvalidNumber));
1124            }
1125            last = after;
1126        }
1127
1128        // When the last keylike token ends with e/E, the '+' and exponent
1129        // digits are separate tokens in the stream ('-' IS keylike so
1130        // e.g. "1e-5" stays in one token and needs no special handling).
1131        if matches!(last.as_bytes().last(), Some(b'e' | b'E')) {
1132            self.eat_byte(b'+');
1133            match self.peek_byte() {
1134                Some(b) if is_keylike_byte(b) && b != b'-' => {
1135                    let next = self.read_keylike();
1136                    if !scratch.push_strip_underscores(next.as_bytes()) {
1137                        return Err(self.set_error(s_start, Some(s_end), ErrorKind::InvalidNumber));
1138                    }
1139                }
1140                _ => {
1141                    return Err(self.set_error(s_start, Some(s_end), ErrorKind::InvalidNumber));
1142                }
1143            }
1144        }
1145
1146        // Scratch is not committed — arena pointer stays unchanged, space is
1147        // reused by subsequent allocations.
1148        let n: f64 = match unsafe { std::str::from_utf8_unchecked(scratch.as_bytes()) }.parse() {
1149            Ok(n) => n,
1150            Err(_) => {
1151                return Err(self.set_error(s_start, Some(s_end), ErrorKind::InvalidNumber));
1152            }
1153        };
1154        if n.is_finite() {
1155            Ok(n)
1156        } else {
1157            Err(self.set_error(s_start, Some(s_end), ErrorKind::InvalidNumber))
1158        }
1159    }
1160
1161    fn value(&mut self, depth_remaining: i16) -> Result<Item<'de>, ParseError> {
1162        let at = self.cursor;
1163        let Some(byte) = self.peek_byte() else {
1164            return Err(self.set_error(self.bytes.len(), None, ErrorKind::UnexpectedEof));
1165        };
1166        match byte {
1167            b'"' => {
1168                self.advance();
1169                let (key, _multiline) = match self.read_string(self.cursor - 1, b'"') {
1170                    Ok(v) => v,
1171                    Err(e) => return Err(e),
1172                };
1173                Ok(Item::string(key.name, key.span))
1174            }
1175            b'\'' => {
1176                self.advance();
1177                let (key, _multiline) = match self.read_string(self.cursor - 1, b'\'') {
1178                    Ok(v) => v,
1179                    Err(e) => return Err(e),
1180                };
1181                Ok(Item::string(key.name, key.span))
1182            }
1183            b'{' => {
1184                let start = self.cursor as u32;
1185                self.advance();
1186                let mut table = crate::table::InnerTable::new();
1187                let end_span = match self.inline_table_contents(&mut table, depth_remaining - 1) {
1188                    Ok(v) => v,
1189                    Err(e) => return Err(e),
1190                };
1191                Ok(Item::table_frozen(table, Span::new(start, end_span.end)))
1192            }
1193            b'[' => {
1194                let start = self.cursor as u32;
1195                self.advance();
1196                let mut arr = value::Array::new();
1197                let end_span = match self.array_contents(&mut arr, depth_remaining - 1) {
1198                    Ok(v) => v,
1199                    Err(e) => return Err(e),
1200                };
1201                Ok(Item::array(arr, Span::new(start, end_span.end)))
1202            }
1203            b'+' => {
1204                let start = self.cursor as u32;
1205                self.advance();
1206                self.number_leading_plus(start)
1207            }
1208            b if is_keylike_byte(b) => {
1209                let start = self.cursor as u32;
1210                let key = self.read_keylike();
1211                let end = self.cursor as u32;
1212                let span = Span::new(start, end);
1213
1214                match key {
1215                    "true" => Ok(Item::boolean(true, span)),
1216                    "false" => Ok(Item::boolean(false, span)),
1217                    "inf" | "nan" => self.number(start, end, key),
1218                    _ => {
1219                        let first_char = key.chars().next().expect("key should not be empty");
1220                        match first_char {
1221                            '-' => match key.as_bytes().get(1) {
1222                                Some(b'0'..=b'9' | b'i' | b'n') => self.number(start, end, key),
1223                                _ => Err(self.set_error(
1224                                    start as usize,
1225                                    Some(end as usize),
1226                                    ErrorKind::InvalidNumber,
1227                                )),
1228                            },
1229                            '0'..='9' => self.number(start, end, key),
1230                            _ => Err(self.set_error(
1231                                at,
1232                                Some(end as usize),
1233                                ErrorKind::UnquotedString,
1234                            )),
1235                        }
1236                    }
1237                }
1238            }
1239            _ => {
1240                let (found_desc, end) = self.scan_token_desc_and_end();
1241                Err(self.set_error(
1242                    at,
1243                    Some(end),
1244                    ErrorKind::Wanted {
1245                        expected: "a value",
1246                        found: found_desc,
1247                    },
1248                ))
1249            }
1250        }
1251    }
1252
1253    fn inline_table_contents(
1254        &mut self,
1255        out: &mut crate::table::InnerTable<'de>,
1256        depth_remaining: i16,
1257    ) -> Result<Span, ParseError> {
1258        if depth_remaining < 0 {
1259            return Err(self.set_error(
1260                self.cursor,
1261                None,
1262                ErrorKind::OutOfRange("Max recursion depth exceeded"),
1263            ));
1264        }
1265        if let Err(e) = self.eat_inline_table_whitespace() {
1266            return Err(e);
1267        }
1268        if let Some(span) = self.eat_byte_spanned(b'}') {
1269            return Ok(span);
1270        }
1271        loop {
1272            let mut table_ref: &mut crate::table::InnerTable<'de> = &mut *out;
1273            let mut key = match self.read_table_key() {
1274                Ok(k) => k,
1275                Err(e) => return Err(e),
1276            };
1277            if let Err(e) = self.eat_inline_table_whitespace() {
1278                return Err(e);
1279            }
1280            while self.eat_byte(b'.') {
1281                if let Err(e) = self.eat_inline_table_whitespace() {
1282                    return Err(e);
1283                }
1284                table_ref = match self.navigate_dotted_key(table_ref, key) {
1285                    Ok(t) => t,
1286                    Err(e) => return Err(e),
1287                };
1288                key = match self.read_table_key() {
1289                    Ok(k) => k,
1290                    Err(e) => return Err(e),
1291                };
1292                if let Err(e) = self.eat_inline_table_whitespace() {
1293                    return Err(e);
1294                }
1295            }
1296            if let Err(e) = self.expect_byte(b'=') {
1297                return Err(e);
1298            }
1299            if let Err(e) = self.eat_inline_table_whitespace() {
1300                return Err(e);
1301            }
1302            {
1303                let val = match self.value(depth_remaining) {
1304                    Ok(v) => v,
1305                    Err(e) => return Err(e),
1306                };
1307                if let Err(e) = self.insert_value(table_ref, key, val) {
1308                    return Err(e);
1309                }
1310            }
1311
1312            if let Err(e) = self.eat_inline_table_whitespace() {
1313                return Err(e);
1314            }
1315            if let Some(span) = self.eat_byte_spanned(b'}') {
1316                return Ok(span);
1317            }
1318            if let Err(e) = self.expect_byte(b',') {
1319                return Err(e);
1320            }
1321            if let Err(e) = self.eat_inline_table_whitespace() {
1322                return Err(e);
1323            }
1324            if let Some(span) = self.eat_byte_spanned(b'}') {
1325                return Ok(span);
1326            }
1327        }
1328    }
1329
1330    fn array_contents(
1331        &mut self,
1332        out: &mut value::Array<'de>,
1333        depth_remaining: i16,
1334    ) -> Result<Span, ParseError> {
1335        if depth_remaining < 0 {
1336            return Err(self.set_error(
1337                self.cursor,
1338                None,
1339                ErrorKind::OutOfRange("Max recursion depth exceeded"),
1340            ));
1341        }
1342        loop {
1343            if let Err(e) = self.eat_intermediate() {
1344                return Err(e);
1345            }
1346            if let Some(span) = self.eat_byte_spanned(b']') {
1347                return Ok(span);
1348            }
1349            let val = match self.value(depth_remaining) {
1350                Ok(v) => v,
1351                Err(e) => return Err(e),
1352            };
1353            out.push(val, self.arena);
1354            if let Err(e) = self.eat_intermediate() {
1355                return Err(e);
1356            }
1357            if !self.eat_byte(b',') {
1358                break;
1359            }
1360        }
1361        if let Err(e) = self.eat_intermediate() {
1362            return Err(e);
1363        }
1364        self.expect_byte_spanned(b']')
1365    }
1366
1367    fn eat_inline_table_whitespace(&mut self) -> Result<(), ParseError> {
1368        loop {
1369            self.eat_whitespace();
1370            if self.eat_newline() {
1371                continue;
1372            }
1373            match self.eat_comment() {
1374                Ok(true) => {}
1375                Ok(false) => break,
1376                Err(e) => return Err(e),
1377            }
1378        }
1379        Ok(())
1380    }
1381
1382    fn eat_intermediate(&mut self) -> Result<(), ParseError> {
1383        loop {
1384            self.eat_whitespace();
1385            if self.eat_newline() {
1386                continue;
1387            }
1388            match self.eat_comment() {
1389                Ok(true) => {}
1390                Ok(false) => break,
1391                Err(e) => return Err(e),
1392            }
1393        }
1394        Ok(())
1395    }
1396
1397    /// Navigate into an existing or new table for a dotted-key intermediate
1398    /// segment. Checks frozen and header bits.
1399    /// New tables are created with the `DOTTED` tag.
1400    fn navigate_dotted_key<'t>(
1401        &mut self,
1402        table: &'t mut InnerTable<'de>,
1403        key: Key<'de>,
1404    ) -> Result<&'t mut InnerTable<'de>, ParseError> {
1405        if let Some(idx) = self.indexed_find(table, &key.name) {
1406            let (existing_key, value) = &mut table.entries_mut()[idx];
1407            let ok = value.is_table() && !value.is_frozen() && !value.has_header_bit();
1408
1409            if !ok {
1410                return Err(self.set_error(
1411                    key.span.start as usize,
1412                    Some(key.span.end as usize),
1413                    ErrorKind::DottedKeyInvalidType {
1414                        first: existing_key.span,
1415                    },
1416                ));
1417            }
1418            // Safety: check above ensures value is table
1419            unsafe { Ok(value.as_table_mut_unchecked()) }
1420        } else {
1421            let span = key.span;
1422            let inserted =
1423                self.insert_into_table(table, key, Item::table_dotted(InnerTable::new(), span));
1424            unsafe { Ok(inserted.as_table_mut_unchecked()) }
1425        }
1426    }
1427
1428    /// Navigate an intermediate segment of a table header (e.g. `a` in `[a.b.c]`).
1429    /// Creates implicit tables (no flag bits) if not found.
1430    /// Handles arrays-of-tables by navigating into the last element.
1431    ///
1432    /// Returns a `SpannedTable` view of the table navigated into.
1433    fn navigate_header_intermediate<'b>(
1434        &mut self,
1435        st: &'b mut Table<'de>,
1436        key: Key<'de>,
1437    ) -> Result<&'b mut Table<'de>, ParseError> {
1438        let table = &mut st.value;
1439
1440        if let Some(idx) = self.indexed_find(table, &key.name) {
1441            let (existing_key, existing) = &mut table.entries_mut()[idx];
1442            let first_key_span = existing_key.span;
1443            let is_table = existing.is_table();
1444            let is_array = existing.is_array();
1445            let is_frozen = existing.is_frozen();
1446            let is_aot = existing.is_aot();
1447
1448            if is_table {
1449                if is_frozen {
1450                    return Err(self.set_duplicate_key_error(first_key_span, key.span, &key.name));
1451                }
1452                unsafe { Ok(existing.as_spanned_table_mut_unchecked()) }
1453            } else if is_array && is_aot {
1454                let arr = existing.as_array_mut().unwrap();
1455                let last = arr.last_mut().unwrap();
1456                if !last.is_table() {
1457                    return Err(self.set_duplicate_key_error(first_key_span, key.span, &key.name));
1458                }
1459                unsafe { Ok(last.as_spanned_table_mut_unchecked()) }
1460            } else {
1461                Err(self.set_duplicate_key_error(first_key_span, key.span, &key.name))
1462            }
1463        } else {
1464            let span = key.span;
1465            let inserted = self.insert_into_table(table, key, Item::table(InnerTable::new(), span));
1466            unsafe { Ok(inserted.as_spanned_table_mut_unchecked()) }
1467        }
1468    }
1469    fn insert_into_table<'t>(
1470        &mut self,
1471        table: &'t mut InnerTable<'de>,
1472        key: Key<'de>,
1473        item: Item<'de>,
1474    ) -> &'t mut value::Item<'de> {
1475        let len = table.len();
1476        if len >= INDEXED_TABLE_THRESHOLD {
1477            let table_id = unsafe { table.first_key_span_start_unchecked() };
1478            if len == INDEXED_TABLE_THRESHOLD {
1479                for (i, (key, _)) in table.entries().iter().enumerate() {
1480                    self.table_index
1481                        .insert(KeyIndex::new(key.as_str(), table_id), i);
1482                }
1483            }
1484            self.table_index
1485                .insert(KeyIndex::new(key.as_str(), table_id), len);
1486        }
1487        &mut table.insert(key, item, self.arena).1
1488    }
1489
1490    /// Handle the final segment of a standard table header `[a.b.c]`.
1491    ///
1492    /// Returns the [`Ctx`] for the table that subsequent key-value pairs
1493    /// should be inserted into.
1494    fn navigate_header_table_final<'b>(
1495        &mut self,
1496        st: &'b mut Table<'de>,
1497        key: Key<'de>,
1498        header_start: u32,
1499        header_end: u32,
1500    ) -> Result<Ctx<'b, 'de>, ParseError> {
1501        let table = &mut st.value;
1502
1503        if let Some(idx) = self.indexed_find(table, &key.name) {
1504            let (existing_key, value) = &mut table.entries_mut()[idx];
1505            let first_key_span = existing_key.span;
1506            let is_table = value.is_table();
1507            let is_frozen = value.is_frozen();
1508            let has_header = value.has_header_bit();
1509            let has_dotted = value.has_dotted_bit();
1510            let val_span = value.span();
1511
1512            if !is_table || is_frozen {
1513                return Err(self.set_duplicate_key_error(first_key_span, key.span, &key.name));
1514            }
1515            if has_header {
1516                return Err(self.set_error(
1517                    header_start as usize,
1518                    Some(header_end as usize),
1519                    ErrorKind::DuplicateTable {
1520                        name: String::from(&*key.name),
1521                        first: val_span,
1522                    },
1523                ));
1524            }
1525            if has_dotted {
1526                return Err(self.set_duplicate_key_error(first_key_span, key.span, &key.name));
1527            }
1528            let table = unsafe { value.as_spanned_table_mut_unchecked() };
1529            table.set_header_flag();
1530            table.set_span_start(header_start);
1531            table.set_span_end(header_end);
1532            Ok(Ctx {
1533                table,
1534                array_end_span: None,
1535            })
1536        } else {
1537            let inserted = self.insert_into_table(
1538                table,
1539                key,
1540                Item::table_header(InnerTable::new(), Span::new(header_start, header_end)),
1541            );
1542            Ok(Ctx {
1543                table: unsafe { inserted.as_spanned_table_mut_unchecked() },
1544                array_end_span: None,
1545            })
1546        }
1547    }
1548
1549    /// Handle the final segment of an array-of-tables header `[[a.b.c]]`.
1550    ///
1551    /// Returns the [`Ctx`] for the new table entry that subsequent key-value
1552    /// pairs should be inserted into.
1553    fn navigate_header_array_final<'b>(
1554        &mut self,
1555        st: &'b mut Table<'de>,
1556        key: Key<'de>,
1557        header_start: u32,
1558        header_end: u32,
1559    ) -> Result<Ctx<'b, 'de>, ParseError> {
1560        let table = &mut st.value;
1561
1562        if let Some(idx) = self.indexed_find(table, &key.name) {
1563            let (existing_key, value) = &mut table.entries_mut()[idx];
1564            let first_key_span = existing_key.span;
1565            let is_aot = value.is_aot();
1566            let is_table = value.is_table();
1567
1568            if is_aot {
1569                let (end_flag, arr) = unsafe { value.split_array_end_flag() };
1570                let entry_span = Span::new(header_start, header_end);
1571                arr.push(
1572                    Item::table_header(InnerTable::new(), entry_span),
1573                    self.arena,
1574                );
1575                let entry = arr.last_mut().unwrap();
1576                Ok(Ctx {
1577                    table: unsafe { entry.as_spanned_table_mut_unchecked() },
1578                    array_end_span: Some(end_flag),
1579                })
1580            } else if is_table {
1581                Err(self.set_error(
1582                    header_start as usize,
1583                    Some(header_end as usize),
1584                    ErrorKind::RedefineAsArray,
1585                ))
1586            } else {
1587                Err(self.set_duplicate_key_error(first_key_span, key.span, &key.name))
1588            }
1589        } else {
1590            let entry_span = Span::new(header_start, header_end);
1591            let first_entry = Item::table_header(InnerTable::new(), entry_span);
1592            let array_span = Span::new(header_start, header_end);
1593            let array_val = Item::array_aot(
1594                value::Array::with_single(first_entry, self.arena),
1595                array_span,
1596            );
1597            let inserted = self.insert_into_table(table, key, array_val);
1598            let (end_flag, arr) = unsafe { inserted.split_array_end_flag() };
1599            let entry = arr.last_mut().unwrap();
1600            Ok(Ctx {
1601                table: unsafe { entry.as_spanned_table_mut_unchecked() },
1602                array_end_span: Some(end_flag),
1603            })
1604        }
1605    }
1606
1607    /// Insert a value into a table, checking for duplicates.
1608    fn insert_value(
1609        &mut self,
1610        table: &mut InnerTable<'de>,
1611        key: Key<'de>,
1612        item: Item<'de>,
1613    ) -> Result<(), ParseError> {
1614        if let Some(idx) = self.indexed_find(table, &key.name) {
1615            let (existing_key, _) = &table.entries_mut()[idx];
1616            return Err(self.set_duplicate_key_error(existing_key.span, key.span, &key.name));
1617        }
1618
1619        self.insert_into_table(table, key, item);
1620        Ok(())
1621    }
1622
1623    /// Look up a key name in a table, returning its entry index.
1624    /// Uses the hash index for tables at or above the threshold, otherwise
1625    /// falls back to a linear scan.
1626    fn indexed_find(&self, table: &InnerTable<'de>, name: &str) -> Option<usize> {
1627        // NOTE: I would return a refernce to actual entry here, however this
1628        // runs into all sorts of NLL limitations.
1629        if table.len() > INDEXED_TABLE_THRESHOLD {
1630            let first_key_span = unsafe { table.first_key_span_start_unchecked() };
1631            self.table_index
1632                .get(&KeyIndex::new(name, first_key_span))
1633                .copied()
1634        } else {
1635            table.find_index(name)
1636        }
1637    }
1638
1639    fn parse_document(&mut self, root_st: &mut Table<'de>) -> Result<(), ParseError> {
1640        let mut ctx = Ctx {
1641            table: root_st,
1642            array_end_span: None,
1643        };
1644
1645        loop {
1646            self.eat_whitespace();
1647            match self.eat_comment() {
1648                Ok(true) => continue,
1649                Ok(false) => {}
1650                Err(e) => return Err(e),
1651            }
1652            if self.eat_newline() {
1653                continue;
1654            }
1655
1656            match self.peek_byte() {
1657                None => break,
1658                Some(b'[') => {
1659                    ctx = match self.process_table_header(root_st) {
1660                        Ok(c) => c,
1661                        Err(e) => return Err(e),
1662                    };
1663                }
1664                Some(b'\r') => {
1665                    return Err(self.set_error(self.cursor, None, ErrorKind::Unexpected('\r')));
1666                }
1667                Some(_) => {
1668                    if let Err(e) = self.process_key_value(&mut ctx) {
1669                        return Err(e);
1670                    }
1671                }
1672            }
1673        }
1674        Ok(())
1675    }
1676
1677    fn process_table_header<'b>(
1678        &mut self,
1679        root_st: &'b mut Table<'de>,
1680    ) -> Result<Ctx<'b, 'de>, ParseError> {
1681        let header_start = self.cursor as u32;
1682        if let Err(e) = self.expect_byte(b'[') {
1683            return Err(e);
1684        }
1685        let is_array = self.eat_byte(b'[');
1686
1687        let mut current = root_st;
1688
1689        self.eat_whitespace();
1690        let mut key = match self.read_table_key() {
1691            Ok(k) => k,
1692            Err(e) => return Err(e),
1693        };
1694        loop {
1695            self.eat_whitespace();
1696            if self.eat_byte(b'.') {
1697                self.eat_whitespace();
1698                current = match self.navigate_header_intermediate(current, key) {
1699                    Ok(p) => p,
1700                    Err(e) => return Err(e),
1701                };
1702                key = match self.read_table_key() {
1703                    Ok(k) => k,
1704                    Err(e) => return Err(e),
1705                };
1706            } else {
1707                break;
1708            }
1709        }
1710
1711        self.eat_whitespace();
1712        if let Err(e) = self.expect_byte(b']') {
1713            return Err(e);
1714        }
1715        if is_array && let Err(e) = self.expect_byte(b']') {
1716            return Err(e);
1717        }
1718
1719        self.eat_whitespace();
1720        match self.eat_comment() {
1721            Ok(true) => {}
1722            Ok(false) => {
1723                if let Err(e) = self.eat_newline_or_eof() {
1724                    return Err(e);
1725                }
1726            }
1727            Err(e) => return Err(e),
1728        }
1729        let header_end = self.cursor as u32;
1730
1731        if is_array {
1732            self.navigate_header_array_final(current, key, header_start, header_end)
1733        } else {
1734            self.navigate_header_table_final(current, key, header_start, header_end)
1735        }
1736    }
1737
1738    fn process_key_value(&mut self, ctx: &mut Ctx<'_, 'de>) -> Result<(), ParseError> {
1739        let line_start = self.cursor as u32;
1740        // Borrow the Table payload from the SpannedTable. NLL drops this
1741        // borrow at its last use (the insert_value call), freeing ctx.st
1742        // for the span updates that follow.
1743        let mut table_ref: &mut InnerTable<'de> = &mut ctx.table.value;
1744
1745        let mut key = match self.read_table_key() {
1746            Ok(k) => k,
1747            Err(e) => return Err(e),
1748        };
1749        self.eat_whitespace();
1750
1751        while self.eat_byte(b'.') {
1752            self.eat_whitespace();
1753            table_ref = match self.navigate_dotted_key(table_ref, key) {
1754                Ok(t) => t,
1755                Err(e) => return Err(e),
1756            };
1757            key = match self.read_table_key() {
1758                Ok(k) => k,
1759                Err(e) => return Err(e),
1760            };
1761            self.eat_whitespace();
1762        }
1763
1764        if let Err(e) = self.expect_byte(b'=') {
1765            return Err(e);
1766        }
1767        self.eat_whitespace();
1768        let val = match self.value(MAX_RECURSION_DEPTH) {
1769            Ok(v) => v,
1770            Err(e) => return Err(e),
1771        };
1772        let line_end = self.cursor as u32;
1773
1774        self.eat_whitespace();
1775        match self.eat_comment() {
1776            Ok(true) => {}
1777            Ok(false) => {
1778                if let Err(e) = self.eat_newline_or_eof() {
1779                    return Err(e);
1780                }
1781            }
1782            Err(e) => return Err(e),
1783        }
1784
1785        if let Err(e) = self.insert_value(table_ref, key, val) {
1786            return Err(e);
1787        }
1788
1789        let start = ctx.table.span_start();
1790        ctx.table.set_span_start(start.min(line_start));
1791        ctx.table.extend_span_end(line_end);
1792
1793        if let Some(end_flag) = &mut ctx.array_end_span {
1794            let old = **end_flag;
1795            let current = old >> value::FLAG_SHIFT;
1796            **end_flag = (current.max(line_end) << value::FLAG_SHIFT) | (old & value::FLAG_MASK);
1797        }
1798
1799        Ok(())
1800    }
1801}
1802
1803/// Parses a TOML string into a [`Table`].
1804///
1805/// The returned table borrows from both the input string and the [`Arena`],
1806/// so both must outlive the table. The arena is used to store escape sequences;
1807/// plain strings borrow directly from the input.
1808pub fn parse<'de>(s: &'de str, arena: &'de Arena) -> Result<Table<'de>, Error> {
1809    // Tag bits use the low 3 bits of start_and_tag, limiting span.start to
1810    // 29 bits (512 MiB). The flag state uses the low 3 bits of end_and_flag,
1811    // limiting span.end to 29 bits (512 MiB).
1812    const MAX_SIZE: usize = (1u32 << 29) as usize;
1813
1814    if s.len() > MAX_SIZE {
1815        return Err(Error {
1816            kind: ErrorKind::FileTooLarge,
1817            span: Span::new(0, 0),
1818        });
1819    }
1820
1821    // SAFETY: root is a table, so the SpannedTable reinterpretation is valid.
1822    let mut root_st = Table::new(Span::new(0, s.len() as u32));
1823    let mut parser = Parser::new(s, arena);
1824    match parser.parse_document(&mut root_st) {
1825        Ok(()) => {}
1826        Err(_) => return Err(parser.take_error()),
1827    }
1828    // Note that root is about the drop (but doesn't implement drop), so we can take
1829    // ownership of this table.
1830    // todo don't do this
1831    Ok(root_st)
1832}
1833
1834#[inline]
1835fn is_keylike_byte(b: u8) -> bool {
1836    b.is_ascii_alphanumeric() || b == b'-' || b == b'_'
1837}
1838
1839fn byte_describe(b: u8) -> &'static str {
1840    match b {
1841        b'\n' => "a newline",
1842        b' ' | b'\t' => "whitespace",
1843        b'=' => "an equals",
1844        b'.' => "a period",
1845        b',' => "a comma",
1846        b':' => "a colon",
1847        b'+' => "a plus",
1848        b'{' => "a left brace",
1849        b'}' => "a right brace",
1850        b'[' => "a left bracket",
1851        b']' => "a right bracket",
1852        b'\'' | b'"' => "a string",
1853        _ if is_keylike_byte(b) => "an identifier",
1854        _ => "a character",
1855    }
1856}