1#[cfg(test)]
6#[path = "./parser_tests.rs"]
7mod tests;
8
9#[cfg(feature = "from-toml")]
10use crate::de::TableHelper;
11use crate::{
12 Failed, MaybeItem, Span,
13 arena::Arena,
14 error::{Error, ErrorKind, PathComponent},
15 item::{
16 self, Item, Key,
17 table::{InnerTable, Table},
18 },
19 time::DateTime,
20};
21use std::char;
22use std::hash::{Hash, Hasher};
23use std::ptr::NonNull;
24
25const MAX_RECURSION_DEPTH: i16 = 256;
26
27struct Ctx<'b, 'de> {
28 table: &'b mut Table<'de>,
30 array_end_span: Option<&'b mut u32>,
34}
35
36pub(crate) const INDEXED_TABLE_THRESHOLD: usize = 6;
41
42const fn build_hex_table() -> [i8; 256] {
43 let mut table = [-1i8; 256];
44 let mut ch = 0usize;
45 while ch < 256 {
46 table[ch] = match ch as u8 {
47 b'0'..=b'9' => (ch as u8 - b'0') as i8,
48 b'A'..=b'F' => (ch as u8 - b'A' + 10) as i8,
49 b'a'..=b'f' => (ch as u8 - b'a' + 10) as i8,
50 _ => -1,
51 };
52 ch += 1;
53 }
54 table
55}
56
57static HEX: [i8; 256] = build_hex_table();
58
59pub(crate) struct KeyRef<'de> {
65 key_ptr: NonNull<u8>,
66 len: u32,
67 first_key_span: u32,
68 marker: std::marker::PhantomData<&'de str>,
69}
70
71impl<'de> KeyRef<'de> {
72 #[inline]
73 pub(crate) fn new(key: &'de str, first_key_span: u32) -> Self {
74 KeyRef {
75 key_ptr: unsafe { NonNull::new_unchecked(key.as_ptr() as *mut u8) },
77 len: key.len() as u32,
78 first_key_span,
79 marker: std::marker::PhantomData,
80 }
81 }
82}
83
84impl<'de> KeyRef<'de> {
85 #[inline]
86 fn as_str(&self) -> &'de str {
87 unsafe {
90 std::str::from_utf8_unchecked(std::slice::from_raw_parts(
91 self.key_ptr.as_ptr(),
92 self.len as usize,
93 ))
94 }
95 }
96}
97
98impl<'de> Hash for KeyRef<'de> {
99 #[inline]
100 fn hash<H: Hasher>(&self, state: &mut H) {
101 self.first_key_span.hash(state);
102 self.as_str().hash(state);
106 }
107}
108
109impl<'de> PartialEq for KeyRef<'de> {
110 #[inline]
111 fn eq(&self, other: &Self) -> bool {
112 self.first_key_span == other.first_key_span && self.as_str() == other.as_str()
113 }
114}
115
116impl<'de> Eq for KeyRef<'de> {}
117
118struct Parser<'de> {
119 bytes: &'de [u8],
121 cursor: usize,
122 arena: &'de Arena,
123
124 error_span: Span,
126 error_kind: Option<ErrorKind<'static>>,
127
128 path: [PathComponent<'de>; 16],
130 path_len: u8,
131
132 index: foldhash::HashMap<KeyRef<'de>, usize>,
135
136 recovering: bool,
139 errors: Vec<Error>,
140}
141
142impl<'de> Parser<'de> {
143 fn new(input: &'de str, arena: &'de Arena) -> Self {
144 let bytes = input.as_bytes();
145 let cursor = if bytes.starts_with(b"\xef\xbb\xbf") {
147 3
148 } else {
149 0
150 };
151 Parser {
152 bytes,
153 cursor,
154 arena,
155 error_span: Span::new(0, 0),
156 error_kind: None,
157 path: [PathComponent::Index(0); 16],
158 path_len: 0,
159 index: foldhash::HashMap::with_capacity_and_hasher(
161 256,
162 foldhash::fast::RandomState::default(),
163 ),
164 recovering: false,
165 errors: Vec::new(),
166 }
167 }
168
169 #[inline]
178 unsafe fn str_slice(&self, start: usize, end: usize) -> &'de str {
179 #[cfg(not(debug_assertions))]
180 unsafe {
181 std::str::from_utf8_unchecked(&self.bytes[start..end])
182 }
183 #[cfg(debug_assertions)]
184 match std::str::from_utf8(&self.bytes[start..end]) {
185 Ok(value) => value,
186 Err(err) => panic!(
187 "Invalid UTF-8 slice: bytes[{}..{}] is not valid UTF-8: {}",
188 start, end, err
189 ),
190 }
191 }
192
193 #[inline]
194 fn push_path(&mut self, component: PathComponent<'de>) {
195 let len = self.path_len as usize;
196 if len < self.path.len() {
197 self.path[len] = component;
198 }
199 self.path_len = self.path_len.saturating_add(1);
200 }
201
202 #[cold]
203 fn build_error_path(&self) -> crate::error::MaybeTomlPath {
204 let depth = (self.path_len as usize).min(self.path.len());
205 crate::error::MaybeTomlPath::from_components(&self.path[..depth])
206 }
207
208 #[cold]
209 fn set_duplicate_key_error(&mut self, first: Span, second: Span) -> Failed {
210 self.error_span = second;
211 self.error_kind = Some(ErrorKind::DuplicateKey { first });
212 Failed
213 }
214
215 #[cold]
216 fn set_error(&mut self, start: usize, end: Option<usize>, kind: ErrorKind<'static>) -> Failed {
217 let len = self.bytes.len();
218 let start = start.min(len);
219 self.error_span = Span::new(start as u32, end.unwrap_or((start + 1).min(len)) as u32);
220 self.error_kind = Some(kind);
221 Failed
222 }
223
224 fn take_error(&mut self) -> Error {
225 let kind = self
226 .error_kind
227 .take()
228 .expect("take_error called without error");
229 let span = self.error_span;
230 let path = self.build_error_path();
231
232 std::hint::black_box(&self.bytes.iter().enumerate().next());
236
237 Error::new_with_path(kind, span, path)
238 }
239
240 #[inline]
241 fn peek_byte(&self) -> Option<u8> {
242 self.bytes.get(self.cursor).copied()
243 }
244
245 #[inline]
246 fn peek_byte_at(&self, offset: usize) -> Option<u8> {
247 self.bytes.get(self.cursor + offset).copied()
248 }
249
250 #[inline]
251 fn eat_byte(&mut self, b: u8) -> bool {
252 if self.peek_byte() == Some(b) {
253 self.cursor += 1;
254 true
255 } else {
256 false
257 }
258 }
259 #[cold]
260 fn expected_error(&mut self, b: u8) -> Failed {
261 let start = self.cursor;
262 let (found_desc, end) = self.scan_token_desc_and_end();
263 self.set_error(
264 start,
265 Some(end),
266 ErrorKind::Wanted {
267 expected: byte_describe(b),
268 found: found_desc,
269 },
270 )
271 }
272
273 fn expect_byte(&mut self, b: u8) -> Result<(), Failed> {
274 if self.peek_byte() == Some(b) {
275 self.cursor += 1;
276 Ok(())
277 } else {
278 Err(self.expected_error(b))
279 }
280 }
281
282 fn eat_whitespace(&mut self) {
283 while let Some(b) = self.peek_byte() {
284 if b == b' ' || b == b'\t' {
285 self.cursor += 1;
286 } else {
287 break;
288 }
289 }
290 }
291
292 fn eat_whitespace_to(&mut self) -> Option<u8> {
293 while let Some(b) = self.peek_byte() {
294 if b == b' ' || b == b'\t' {
295 self.cursor += 1;
296 } else {
297 return Some(b);
298 }
299 }
300 None
301 }
302
303 fn eat_newline_or_eof(&mut self) -> Result<(), Failed> {
304 match self.peek_byte() {
305 None => Ok(()),
306 Some(b'\n') => {
307 self.cursor += 1;
308 Ok(())
309 }
310 Some(b'\r') if self.peek_byte_at(1) == Some(b'\n') => {
311 self.cursor += 2;
312 Ok(())
313 }
314 _ => {
315 let start = self.cursor;
316 let (found_desc, end) = self.scan_token_desc_and_end();
317 Err(self.set_error(
318 start,
319 Some(end),
320 ErrorKind::Wanted {
321 expected: &"newline",
322 found: found_desc,
323 },
324 ))
325 }
326 }
327 }
328
329 fn eat_comment(&mut self) -> Result<bool, Failed> {
330 if !self.eat_byte(b'#') {
331 return Ok(false);
332 }
333 while let Some(0x09 | 0x20..=0x7E | 0x80..) = self.peek_byte() {
334 self.cursor += 1;
335 }
336 self.eat_newline_or_eof().map(|()| true)
337 }
338
339 fn eat_newline(&mut self) -> bool {
340 match self.peek_byte() {
341 Some(b'\n') => {
342 self.cursor += 1;
343 true
344 }
345 Some(b'\r') if self.peek_byte_at(1) == Some(b'\n') => {
346 self.cursor += 2;
347 true
348 }
349 _ => false,
350 }
351 }
352
353 fn scan_token_desc_and_end(&self) -> (&'static &'static str, usize) {
357 let Some(b) = self.peek_byte() else {
358 return (&"eof", self.bytes.len());
359 };
360 match b {
361 b'\n' => (&"a newline", self.cursor + 1),
362 b'\r' => (&"a carriage return", self.cursor + 1),
363 b' ' | b'\t' => {
364 let mut end = self.cursor + 1;
365 while end < self.bytes.len()
366 && (self.bytes[end] == b' ' || self.bytes[end] == b'\t')
367 {
368 end += 1;
369 }
370 (&"whitespace", end)
371 }
372 b'#' => (&"a comment", self.cursor + 1),
373 b'=' => (&"an equals", self.cursor + 1),
374 b'.' => (&"a period", self.cursor + 1),
375 b',' => (&"a comma", self.cursor + 1),
376 b':' => (&"a colon", self.cursor + 1),
377 b'+' => (&"a plus", self.cursor + 1),
378 b'{' => (&"a left brace", self.cursor + 1),
379 b'}' => (&"a right brace", self.cursor + 1),
380 b'[' => (&"a left bracket", self.cursor + 1),
381 b']' => (&"a right bracket", self.cursor + 1),
382 b'\'' | b'"' => (&"a string", self.cursor + 1),
383 _ if is_keylike_byte(b) => {
384 let mut end = self.cursor + 1;
385 while end < self.bytes.len() && is_keylike_byte(self.bytes[end]) {
386 end += 1;
387 }
388 (&"an identifier", end)
389 }
390 _ => (&"a character", self.cursor + 1),
391 }
392 }
393
394 fn read_keylike(&mut self) -> &'de str {
395 let start = self.cursor;
396 while let Some(b) = self.peek_byte() {
397 if !is_keylike_byte(b) {
398 break;
399 }
400 self.cursor += 1;
401 }
402 unsafe { self.str_slice(start, self.cursor) }
404 }
405
406 fn read_table_key(&mut self) -> Result<Key<'de>, Failed> {
407 let Some(b) = self.peek_byte() else {
408 return Err(self.set_error(
409 self.bytes.len(),
410 None,
411 ErrorKind::Wanted {
412 expected: &"a table key",
413 found: &"eof",
414 },
415 ));
416 };
417 match b {
418 b'"' => {
419 let start = self.cursor;
420 self.cursor += 1;
421 let (key, multiline) = match self.read_string(start, b'"') {
422 Ok(v) => v,
423 Err(e) => return Err(e),
424 };
425 if multiline {
426 return Err(self.set_error(
427 start,
428 Some(key.span.end as usize),
429 ErrorKind::MultilineStringKey,
430 ));
431 }
432 Ok(key)
433 }
434 b'\'' => {
435 let start = self.cursor;
436 self.cursor += 1;
437 let (key, multiline) = match self.read_string(start, b'\'') {
438 Ok(v) => v,
439 Err(e) => return Err(e),
440 };
441 if multiline {
442 return Err(self.set_error(
443 start,
444 Some(key.span.end as usize),
445 ErrorKind::MultilineStringKey,
446 ));
447 }
448 Ok(key)
449 }
450 b if is_keylike_byte(b) => {
451 let start = self.cursor;
452 let name = self.read_keylike();
453 let span = Span::new(start as u32, self.cursor as u32);
454 Ok(Key { name, span })
455 }
456 _ => {
457 let start = self.cursor;
458 let (found_desc, end) = self.scan_token_desc_and_end();
459 Err(self.set_error(
460 start,
461 Some(end),
462 ErrorKind::Wanted {
463 expected: &"a table key",
464 found: found_desc,
465 },
466 ))
467 }
468 }
469 }
470
471 fn read_string(&mut self, start: usize, delim: u8) -> Result<(Key<'de>, bool), Failed> {
474 let mut multiline = false;
475 if self.eat_byte(delim) {
476 if self.eat_byte(delim) {
477 multiline = true;
478 } else {
479 return Ok((
480 Key {
481 name: "",
482 span: Span::new(start as u32, self.cursor as u32),
483 },
484 false,
485 ));
486 }
487 }
488
489 let mut content_start = self.cursor;
490 if multiline {
491 match self.peek_byte() {
492 Some(b'\n') => {
493 self.cursor += 1;
494 content_start = self.cursor;
495 }
496 Some(b'\r') if self.peek_byte_at(1) == Some(b'\n') => {
497 self.cursor += 2;
498 content_start = self.cursor;
499 }
500 _ => {}
501 }
502 }
503
504 self.read_string_loop(start, content_start, multiline, delim)
505 }
506
507 fn skip_string_plain(&mut self, delim: u8) {
518 let Some(&b) = self.bytes.get(self.cursor) else {
521 return;
522 };
523
524 if b == delim || b == b'\\' || b == 0x7F || (b < 0x20 && b != 0x09) {
525 return;
526 }
527 self.cursor += 1;
528
529 let base = self.cursor;
530 let rest = &self.bytes[base..];
531
532 type Chunk = u64;
533 const STEP: usize = std::mem::size_of::<Chunk>();
534 const ONE: Chunk = Chunk::MAX / 255; const HIGH: Chunk = ONE << 7; let fill_delim = ONE * Chunk::from(delim);
538 let fill_bslash = ONE * Chunk::from(b'\\');
539 let fill_del = ONE * 0x7F;
540
541 let chunks = rest.chunks_exact(STEP);
542 let remainder_len = chunks.remainder().len();
543
544 for (i, chunk) in chunks.enumerate() {
545 let v = Chunk::from_le_bytes(chunk.try_into().unwrap());
546
547 let has_ctrl = v.wrapping_sub(ONE * 0x20) & !v;
548 let eq_delim = (v ^ fill_delim).wrapping_sub(ONE) & !(v ^ fill_delim);
549 let eq_bslash = (v ^ fill_bslash).wrapping_sub(ONE) & !(v ^ fill_bslash);
550 let eq_del = (v ^ fill_del).wrapping_sub(ONE) & !(v ^ fill_del);
551
552 let masked = (has_ctrl | eq_delim | eq_bslash | eq_del) & HIGH;
553 if masked != 0 {
554 self.cursor = base + i * STEP + masked.trailing_zeros() as usize / 8;
555 return;
556 }
557 }
558
559 self.cursor = self.bytes.len() - remainder_len;
560 self.skip_string_plain_slow(delim);
561 }
562
563 #[cold]
564 #[inline(never)]
565 fn skip_string_plain_slow(&mut self, delim: u8) {
566 while let Some(&b) = self.bytes.get(self.cursor) {
567 if b == delim || b == b'\\' || b == 0x7F || (b < 0x20 && b != 0x09) {
568 return;
569 }
570 self.cursor += 1;
571 }
572 }
573
574 fn read_string_loop(
575 &mut self,
576 start: usize,
577 content_start: usize,
578 multiline: bool,
579 delim: u8,
580 ) -> Result<(Key<'de>, bool), Failed> {
581 let mut flush_from = content_start;
582 let mut scratch: Option<crate::arena::Scratch<'de>> = None;
583 loop {
584 self.skip_string_plain(delim);
585
586 let i = self.cursor;
587 let Some(&b) = self.bytes.get(i) else {
588 return Err(self.set_error(
589 i,
590 Some(i),
591 ErrorKind::UnterminatedString(delim as char),
592 ));
593 };
594 self.cursor = i + 1;
595
596 match b {
597 b'\r' => {
598 if self.eat_byte(b'\n') {
599 if !multiline {
600 return Err(self.set_error(
601 i,
602 Some(i),
603 ErrorKind::UnterminatedString(delim as char),
604 ));
605 }
606 } else {
607 return Err(self.set_error(i, None, ErrorKind::InvalidCharInString('\r')));
608 }
609 }
610 b'\n' => {
611 if !multiline {
612 return Err(self.set_error(
613 i,
614 Some(i),
615 ErrorKind::UnterminatedString(delim as char),
616 ));
617 }
618 }
619 d if d == delim => {
620 let (span, end) = if multiline {
621 if !self.eat_byte(delim) {
622 continue;
623 }
624 if !self.eat_byte(delim) {
625 continue;
626 }
627 let mut extra = 0usize;
628 if self.eat_byte(delim) {
629 extra += 1;
630 }
631 if self.eat_byte(delim) {
632 extra += 1;
633 }
634
635 (Span::new(start as u32, self.cursor as u32), i + extra)
636 } else {
637 (Span::new(start as u32, self.cursor as u32), i)
638 };
639
640 let name = if let Some(mut s) = scratch {
641 s.extend(&self.bytes[flush_from..end]);
642 let committed = s.commit();
643 unsafe { std::str::from_utf8_unchecked(committed) }
646 } else {
647 unsafe { self.str_slice(content_start, end) }
649 };
650 return Ok((Key { name, span }, multiline));
651 }
652 b'\\' if delim == b'"' => {
653 let arena = self.arena;
654 let s = scratch.get_or_insert_with(|| unsafe { arena.scratch() });
657 s.extend(&self.bytes[flush_from..i]);
658 if let Err(e) = self.read_basic_escape(s, start, multiline) {
659 return Err(e);
660 }
661 flush_from = self.cursor;
662 }
663 0x09 | 0x20..=0x7E | 0x80.. => {}
666 _ => {
667 return Err(self.set_error(i, None, ErrorKind::InvalidCharInString(b as char)));
668 }
669 }
670 }
671 }
672
673 fn read_basic_escape(
674 &mut self,
675 scratch: &mut crate::arena::Scratch<'_>,
676 string_start: usize,
677 multi: bool,
678 ) -> Result<(), Failed> {
679 let i = self.cursor;
680 let Some(&b) = self.bytes.get(i) else {
681 return Err(self.set_error(i, Some(i), ErrorKind::UnterminatedString('"')));
682 };
683 self.cursor = i + 1;
684 let chr: char = 'char: {
685 let byte: u8 = 'byte: {
686 match b {
687 b'"' => break 'byte b'"',
688 b'\\' => break 'byte b'\\',
689 b'b' => break 'byte 0x08,
690 b'f' => break 'byte 0x0C,
691 b'n' => break 'byte b'\n',
692 b'r' => break 'byte b'\r',
693 b't' => break 'byte b'\t',
694 b'e' => break 'byte 0x1B,
695 b'u' => match self.read_hex(4, string_start, i) {
696 Ok(ch) => break 'char ch,
697 Err(e) => return Err(e),
698 },
699 b'U' => match self.read_hex(8, string_start, i) {
700 Ok(ch) => break 'char ch,
701 Err(e) => return Err(e),
702 },
703 b'x' => match self.read_hex(2, string_start, i) {
704 Ok(ch) => break 'char ch,
705 Err(e) => return Err(e),
706 },
707 b' ' | b'\t' | b'\n' | b'\r' if multi => {
708 let c = if b == b'\r' && self.peek_byte() == Some(b'\n') {
710 self.cursor += 1;
711 '\n'
712 } else if b == b'\r' {
713 return Err(self.set_error(
714 i,
715 None,
716 ErrorKind::InvalidCharInString('\r'),
717 ));
718 } else {
719 b as char
720 };
721 if c != '\n' {
722 loop {
723 match self.peek_byte() {
724 Some(b' ' | b'\t') => {
725 self.cursor += 1;
726 }
727 Some(b'\n') => {
728 self.cursor += 1;
729 break;
730 }
731 Some(b'\r') if self.peek_byte_at(1) == Some(b'\n') => {
732 self.cursor += 2;
733 break;
734 }
735 _ => {
736 return Err(self.set_error(
737 i,
738 None,
739 ErrorKind::InvalidEscape(c),
740 ));
741 }
742 }
743 }
744 }
745 loop {
746 match self.peek_byte() {
747 Some(b' ' | b'\t' | b'\n') => {
748 self.cursor += 1;
749 }
750 Some(b'\r') if self.peek_byte_at(1) == Some(b'\n') => {
751 self.cursor += 2;
752 }
753 _ => break,
754 }
755 }
756 }
757 _ => {
758 self.cursor -= 1;
759 return Err(self.set_error(
760 self.cursor,
761 None,
762 ErrorKind::InvalidEscape(self.next_char_for_error()),
763 ));
764 }
765 }
766 return Ok(());
767 };
768
769 scratch.push(byte);
770 return Ok(());
771 };
772 let mut buf = [0u8; 4];
773 let len = chr.encode_utf8(&mut buf).len();
774 scratch.extend(&buf[..len]);
775 Ok(())
776 }
777
778 fn read_hex(
779 &mut self,
780 n: usize,
781 _string_start: usize,
782 escape_start: usize,
783 ) -> Result<char, Failed> {
784 let mut val: u32 = 0;
785 for _ in 0..n {
786 let Some(&byte) = self.bytes.get(self.cursor) else {
787 return Err(self.set_error(
788 self.cursor,
789 Some(self.cursor),
790 ErrorKind::UnterminatedString('"'),
791 ));
792 };
793 let digit = HEX[byte as usize];
794 if digit >= 0 {
795 val = (val << 4) | digit as u32;
796 self.cursor += 1;
797 } else {
798 return Err(self.set_error(
799 self.cursor,
800 None,
801 ErrorKind::InvalidHexEscape(self.next_char_for_error()),
802 ));
803 }
804 }
805 match char::from_u32(val) {
806 Some(ch) => Ok(ch),
807 None => Err(self.set_error(
808 escape_start - 1,
809 Some(self.cursor),
810 ErrorKind::InvalidEscapeValue(val),
811 )),
812 }
813 }
814
815 fn next_char_for_error(&self) -> char {
816 let text = unsafe { std::str::from_utf8_unchecked(self.bytes) };
818 if let Some(value) = text.get(self.cursor..) {
819 value.chars().next().unwrap_or(char::REPLACEMENT_CHARACTER)
820 } else {
821 char::REPLACEMENT_CHARACTER
822 }
823 }
824 fn number(&mut self, start: u32, end: u32, s: &'de str, sign: u8) -> Result<Item<'de>, Failed> {
825 let bytes = s.as_bytes();
826
827 if sign == 2
830 && let [b'0', format, rest @ ..] = s.as_bytes()
831 {
832 match format {
833 b'x' => return self.integer_prefixed(rest, Span::new(start, end), 4),
834 b'o' => return self.integer_prefixed(rest, Span::new(start, end), 3),
835 b'b' => return self.integer_prefixed(rest, Span::new(start, end), 1),
836 _ => {}
837 }
838 }
839
840 if self.eat_byte(b'.') {
841 let at = self.cursor;
842 return match self.peek_byte() {
843 Some(b) if is_keylike_byte(b) => {
844 let after = self.read_keylike();
845 match self.float(start, end, s, Some(after), sign) {
846 Ok(f) => Ok(Item::float_spanned(f, Span::new(start, self.cursor as u32))),
847 Err(e) => Err(e),
848 }
849 }
850 _ => Err(self.set_error(
851 start as usize,
852 Some(at),
853 ErrorKind::InvalidFloat("nothing after decimal point"),
854 )),
855 };
856 }
857
858 if sign == 2 {
859 let head = &self.bytes[start as usize..];
860 match DateTime::munch(head) {
861 Ok((consumed, moment)) => {
862 self.cursor = start as usize + consumed;
863 return Ok(Item::moment(moment, Span::new(start, self.cursor as u32)));
864 }
865 Err(reason) if !reason.is_empty() => {
866 let rest = &self.bytes[start as usize..];
867 let mut consumed = 0;
868 while consumed < rest.len()
869 && !matches!(
870 rest[consumed],
871 b' ' | b'\t' | b'\n' | b'\r' | b'#' | b',' | b']' | b'}'
872 )
873 {
874 consumed += 1;
875 }
876 self.cursor = start as usize + consumed;
877 return Err(self.set_error(
878 start as usize,
879 Some(self.cursor),
880 ErrorKind::InvalidDateTime(reason),
881 ));
882 }
883 Err(_) => {}
884 }
885 }
886
887 if sign != 2
888 && let [b'0', b'x' | b'o' | b'b', ..] = bytes
889 {
890 return Err(self.set_error(
891 start as usize,
892 Some(end as usize),
893 ErrorKind::InvalidInteger("signs are not allowed on prefixed integers"),
894 ));
895 }
896
897 if let Ok(v) = self.integer_decimal(bytes, Span::new(start, end), sign) {
898 return Ok(v);
899 }
900
901 if bytes.iter().any(|&b| b == b'e' || b == b'E') {
902 return match self.float(start, end, s, None, sign) {
903 Ok(f) => Ok(Item::float_spanned(f, Span::new(start, self.cursor as u32))),
904 Err(e) => Err(e),
905 };
906 }
907
908 Err(Failed)
909 }
910
911 fn integer_decimal(
912 &mut self,
913 bytes: &'de [u8],
914 span: Span,
915 sign: u8,
916 ) -> Result<Item<'de>, Failed> {
917 let mut acc: u128 = 0;
918 let mut prev_underscore = false;
919 let mut has_digit = false;
920 let mut leading_zero = false;
921 let negative = sign == 0;
922 let sign_len = if sign != 2 { 1u32 } else { 0u32 };
923 let mut error_span = span;
924 let reason = 'error: {
925 let mut i = 0;
926 while i < bytes.len() {
927 let b = bytes[i];
928 if b == b'_' {
929 if !has_digit || prev_underscore {
930 let pos = span.start + sign_len + i as u32;
931 error_span = Span::new(pos, pos + 1);
932 break 'error "underscores must be between two digits";
933 }
934 prev_underscore = true;
935 i += 1;
936 continue;
937 }
938 if !b.is_ascii_digit() {
939 let pos = span.start + sign_len + i as u32;
940 error_span = Span::new(pos, pos + 1);
941 break 'error "contains non-digit character";
942 }
943 if leading_zero {
944 break 'error "leading zeros are not allowed";
945 }
946 if !has_digit && b == b'0' {
947 leading_zero = true;
948 }
949 has_digit = true;
950 prev_underscore = false;
951 let digit = (b - b'0') as u128;
952 acc = match acc.checked_mul(10).and_then(|a| a.checked_add(digit)) {
953 Some(v) => v,
954 None => break 'error "integer overflow",
955 };
956 i += 1;
957 }
958
959 if !has_digit {
960 break 'error "expected at least one digit";
961 }
962 if prev_underscore {
963 let pos = span.start + sign_len + bytes.len() as u32 - 1;
964 error_span = Span::new(pos, pos + 1);
965 break 'error "underscores must be between two digits";
966 }
967
968 let max = if negative {
969 (i128::MAX as u128) + 1
970 } else {
971 i128::MAX as u128
972 };
973 if acc > max {
974 break 'error "integer overflow";
975 }
976
977 let val = if negative {
978 (acc as i128).wrapping_neg()
979 } else {
980 acc as i128
981 };
982 return Ok(Item::integer_spanned(val, span));
983 };
984 self.error_span = error_span;
985 self.error_kind = Some(ErrorKind::InvalidInteger(reason));
986 Err(Failed)
987 }
988
989 #[inline(never)]
990 fn integer_prefixed(
991 &mut self,
992 bytes: &'de [u8],
993 span: Span,
994 shift: u32,
995 ) -> Result<Item<'de>, Failed> {
996 let max_digit = (1i8 << shift) - 1;
997 let invalid_msg = match shift {
998 4 => "invalid digit for hexadecimal",
999 3 => "invalid digit for octal",
1000 _ => "invalid digit for binary",
1001 };
1002 let mut acc: u128 = 0;
1003 let mut prev_underscore = false;
1004 let mut has_digit = false;
1005 let mut error_span = span;
1006 let reason = 'error: {
1007 if bytes.is_empty() {
1008 break 'error "no digits after prefix";
1009 }
1010
1011 let mut i = 0;
1012 while i < bytes.len() {
1013 let b = bytes[i];
1014 if b == b'_' {
1015 if !has_digit || prev_underscore {
1016 let pos = span.start + 2 + i as u32;
1017 error_span = Span::new(pos, pos + 1);
1018 break 'error "underscores must be between two digits";
1019 }
1020 prev_underscore = true;
1021 i += 1;
1022 continue;
1023 }
1024 let digit = HEX[b as usize];
1025 if digit < 0 || digit > max_digit {
1026 let pos = span.start + 2 + i as u32;
1027 error_span = Span::new(pos, pos + 1);
1028 break 'error invalid_msg;
1029 }
1030 has_digit = true;
1031 prev_underscore = false;
1032 if acc >> (128 - shift) != 0 {
1033 break 'error "integer overflow";
1034 }
1035 acc = (acc << shift) | digit as u128;
1036 i += 1;
1037 }
1038
1039 if !has_digit {
1040 break 'error "no digits after prefix";
1041 }
1042 if prev_underscore {
1043 let pos = span.start + 2 + bytes.len() as u32 - 1;
1044 error_span = Span::new(pos, pos + 1);
1045 break 'error "underscores must be between two digits";
1046 }
1047
1048 if acc > i128::MAX as u128 {
1049 break 'error "integer overflow";
1050 }
1051 return Ok(Item::integer_spanned(acc as i128, span));
1052 };
1053 self.error_span = error_span;
1054 self.error_kind = Some(ErrorKind::InvalidInteger(reason));
1055 Err(Failed)
1056 }
1057
1058 fn float(
1059 &mut self,
1060 start: u32,
1061 end: u32,
1062 s: &'de str,
1063 after_decimal: Option<&'de str>,
1064 sign: u8,
1065 ) -> Result<f64, Failed> {
1066 let s_start = start as usize;
1067 let s_end = end as usize;
1068
1069 if let [b'0', b'0'..=b'9' | b'_', ..] = s.as_bytes() {
1071 return Err(self.set_error(
1072 s_start,
1073 Some(s_end),
1074 ErrorKind::InvalidFloat("leading zeros are not allowed"),
1075 ));
1076 }
1077
1078 let mut scratch = unsafe { self.arena.scratch() };
1080
1081 if sign == 0 {
1082 scratch.push(b'-');
1083 }
1084 if !scratch.push_strip_underscores(s.as_bytes()) {
1085 return Err(self.set_error(
1086 s_start,
1087 Some(s_end),
1088 ErrorKind::InvalidFloat("underscores must be between two digits"),
1089 ));
1090 }
1091
1092 let mut last = s;
1093
1094 if let Some(after) = after_decimal {
1095 if !matches!(after.as_bytes().first(), Some(b'0'..=b'9')) {
1096 return Err(self.set_error(
1097 s_start,
1098 Some(self.cursor),
1099 ErrorKind::InvalidFloat("expected digit after decimal point"),
1100 ));
1101 }
1102 scratch.push(b'.');
1103 if !scratch.push_strip_underscores(after.as_bytes()) {
1104 return Err(self.set_error(
1105 s_start,
1106 Some(self.cursor),
1107 ErrorKind::InvalidFloat("underscores must be between two digits"),
1108 ));
1109 }
1110 last = after;
1111 }
1112
1113 if matches!(last.as_bytes().last(), Some(b'e' | b'E')) {
1117 self.eat_byte(b'+');
1118 match self.peek_byte() {
1119 Some(b) if is_keylike_byte(b) && b != b'-' => {
1120 let next = self.read_keylike();
1121 if !scratch.push_strip_underscores(next.as_bytes()) {
1122 return Err(self.set_error(
1123 s_start,
1124 Some(self.cursor),
1125 ErrorKind::InvalidFloat("exponent requires at least one digit"),
1126 ));
1127 }
1128 }
1129 _ => {
1130 return Err(self.set_error(
1131 s_start,
1132 Some(self.cursor),
1133 ErrorKind::InvalidFloat("exponent requires at least one digit"),
1134 ));
1135 }
1136 }
1137 }
1138
1139 let n: f64 = match unsafe { std::str::from_utf8_unchecked(scratch.as_bytes()) }.parse() {
1144 Ok(n) => n,
1145 Err(_) => {
1147 return Err(self.set_error(
1148 s_start,
1149 Some(self.cursor),
1150 ErrorKind::InvalidFloat(""),
1151 ));
1152 }
1153 };
1154 if n.is_finite() {
1155 Ok(n)
1156 } else {
1157 Err(self.set_error(
1158 s_start,
1159 Some(self.cursor),
1160 ErrorKind::InvalidFloat("float overflow"),
1161 ))
1162 }
1163 }
1164
1165 fn value(&mut self, depth_remaining: i16) -> Result<Item<'de>, Failed> {
1166 let at = self.cursor;
1167 let Some(byte) = self.peek_byte() else {
1168 return Err(self.set_error(self.bytes.len(), None, ErrorKind::UnexpectedEof));
1169 };
1170 let sign = match byte {
1171 b'"' | b'\'' => {
1172 self.cursor += 1;
1173 return match self.read_string(self.cursor - 1, byte) {
1174 Ok((key, _)) => Ok(Item::string_spanned(key.name, key.span)),
1175 Err(e) => Err(e),
1176 };
1177 }
1178 b'{' => {
1179 let start = self.cursor as u32;
1180 self.cursor += 1;
1181 let mut table = crate::item::table::InnerTable::new();
1182 if let Err(err) = self.inline_table_contents(&mut table, depth_remaining - 1) {
1183 return Err(err);
1184 }
1185 return Ok(Item::table_frozen(
1186 table,
1187 Span::new(start, self.cursor as u32),
1188 ));
1189 }
1190 b'[' => {
1191 let start = self.cursor as u32;
1192 self.cursor += 1;
1193 let mut arr = crate::item::array::InternalArray::new();
1194 if let Err(err) = self.array_contents(&mut arr, depth_remaining - 1) {
1195 return Err(err);
1196 };
1197 return Ok(Item::array(arr, Span::new(start, self.cursor as u32)));
1198 }
1199 b'-' => {
1200 self.cursor += 1;
1201 0
1202 }
1203 b'+' => {
1204 self.cursor += 1;
1205 1
1206 }
1207 _ => 2,
1208 };
1209
1210 let key = self.read_keylike();
1211
1212 let end = self.cursor as u32;
1213 match key {
1214 "inf" => {
1215 return Ok(Item::float_spanned(
1216 if sign != 0 {
1217 f64::INFINITY
1218 } else {
1219 f64::NEG_INFINITY
1220 },
1221 Span::new(at as u32, end),
1222 ));
1223 }
1224 "nan" => {
1225 return Ok(Item::float_spanned(
1226 if sign != 0 {
1227 f64::NAN.copysign(1.0)
1228 } else {
1229 f64::NAN.copysign(-1.0)
1230 },
1231 Span::new(at as u32, end),
1232 ));
1233 }
1234 "true" | "false" if sign == 2 => {
1235 return Ok(Item::boolean(key == "true", Span::new(at as u32, end)));
1236 }
1237 _ => (),
1238 }
1239
1240 if let [b'0'..=b'9', ..] = key.as_bytes() {
1241 self.number(at as u32, end, key, sign)
1242 } else if byte == b'\r' {
1243 Err(self.set_error(at, None, ErrorKind::Unexpected('\r')))
1244 } else if sign != 2 {
1245 Err(self.set_error(
1246 at,
1247 Some(self.cursor),
1248 ErrorKind::InvalidInteger("expected digit after sign"),
1249 ))
1250 } else if key.is_empty() {
1251 Err(self.set_error(at, None, ErrorKind::Unexpected(self.next_char_for_error())))
1252 } else {
1253 Err(self.set_error(at, Some(self.cursor), ErrorKind::UnquotedString))
1254 }
1255 }
1256
1257 fn inline_table_contents(
1258 &mut self,
1259 out: &mut crate::item::table::InnerTable<'de>,
1260 depth_remaining: i16,
1261 ) -> Result<(), Failed> {
1262 if depth_remaining < 0 {
1263 return Err(self.set_error(
1264 self.cursor,
1265 None,
1266 ErrorKind::OutOfRange {
1267 ty: &"Max recursion depth exceeded",
1268 range: &"",
1269 },
1270 ));
1271 }
1272 if let Err(e) = self.eat_inline_table_whitespace() {
1273 return Err(e);
1274 }
1275 if self.eat_byte(b'}') {
1276 return Ok(());
1277 }
1278 loop {
1279 let saved_path_len = self.path_len;
1280 let mut table_ref: &mut crate::item::table::InnerTable<'de> = &mut *out;
1281 let mut key = match self.read_table_key() {
1282 Ok(k) => k,
1283 Err(e) => return Err(e),
1284 };
1285 self.eat_whitespace();
1286 while self.eat_byte(b'.') {
1287 self.eat_whitespace();
1288 self.push_path(PathComponent::Key(key));
1289 table_ref = match self.navigate_dotted_key(table_ref, key) {
1290 Ok(t) => t,
1291 Err(e) => return Err(e),
1292 };
1293 key = match self.read_table_key() {
1294 Ok(k) => k,
1295 Err(e) => return Err(e),
1296 };
1297 self.eat_whitespace();
1298 }
1299 if let Err(e) = self.eat_inline_table_whitespace() {
1300 return Err(e);
1301 }
1302 if let Err(e) = self.expect_byte(b'=') {
1303 return Err(e);
1304 }
1305 if let Err(e) = self.eat_inline_table_whitespace() {
1306 return Err(e);
1307 }
1308
1309 self.push_path(PathComponent::Key(key));
1310 {
1311 let val = match self.value(depth_remaining) {
1312 Ok(v) => v,
1313 Err(e) => return Err(e),
1314 };
1315 if let Err(e) = self.insert_value(table_ref, key, val) {
1316 return Err(e);
1317 }
1318 }
1319 self.path_len = saved_path_len;
1320
1321 if let Err(e) = self.eat_inline_table_whitespace() {
1322 return Err(e);
1323 }
1324 if self.eat_byte(b'}') {
1325 return Ok(());
1326 }
1327 if !self.eat_byte(b',') {
1328 let start = self.cursor;
1329 if self.peek_byte().is_none() {
1330 return Err(self.set_error(start, None, ErrorKind::UnclosedInlineTable));
1331 }
1332 let (_found_desc, end) = self.scan_token_desc_and_end();
1333 return Err(self.set_error(start, Some(end), ErrorKind::MissingInlineTableComma));
1334 }
1335 if let Err(e) = self.eat_inline_table_whitespace() {
1336 return Err(e);
1337 }
1338 if self.eat_byte(b'}') {
1339 return Ok(());
1340 }
1341 }
1342 }
1343
1344 fn array_contents(
1345 &mut self,
1346 out: &mut crate::item::array::InternalArray<'de>,
1347 depth_remaining: i16,
1348 ) -> Result<(), Failed> {
1349 if depth_remaining < 0 {
1350 return Err(self.set_error(
1351 self.cursor,
1352 None,
1353 ErrorKind::OutOfRange {
1354 ty: &"Max recursion depth exceeded",
1355 range: &"",
1356 },
1357 ));
1358 }
1359 let saved_path_len = self.path_len;
1360 loop {
1361 if let Err(e) = self.eat_intermediate() {
1362 return Err(e);
1363 }
1364 if self.eat_byte(b']') {
1365 self.path_len = saved_path_len;
1366 return Ok(());
1367 }
1368 self.push_path(PathComponent::Index(out.len()));
1369 match self.value(depth_remaining) {
1370 Ok(value) => out.push(value, self.arena),
1371 Err(e) => return Err(e),
1372 };
1373 self.path_len = saved_path_len;
1374 if let Err(e) = self.eat_intermediate() {
1375 return Err(e);
1376 }
1377 if !self.eat_byte(b',') {
1378 break;
1379 }
1380 }
1381 if let Err(e) = self.eat_intermediate() {
1382 return Err(e);
1383 }
1384 if self.eat_byte(b']') {
1385 return Ok(());
1386 }
1387 let start = self.cursor;
1388 if self.peek_byte().is_none() {
1389 return Err(self.set_error(start, None, ErrorKind::UnclosedArray));
1390 }
1391 let (_found_desc, end) = self.scan_token_desc_and_end();
1392 Err(self.set_error(start, Some(end), ErrorKind::MissingArrayComma))
1393 }
1394
1395 #[inline(always)]
1396 fn eat_inline_table_whitespace(&mut self) -> Result<(), Failed> {
1397 loop {
1398 match self.peek_byte() {
1399 Some(b' ' | b'\t' | b'\n') => self.cursor += 1,
1400 Some(b'\r') if self.peek_byte_at(1) == Some(b'\n') => self.cursor += 2,
1401 Some(b'#') => match self.eat_comment() {
1402 Ok(_) => {}
1403 Err(e) => return Err(e),
1404 },
1405 _ => return Ok(()),
1406 }
1407 }
1408 }
1409
1410 #[inline(always)]
1411 fn eat_intermediate(&mut self) -> Result<(), Failed> {
1412 loop {
1413 match self.peek_byte() {
1414 Some(b' ' | b'\t' | b'\n') => self.cursor += 1,
1415 Some(b'\r') if self.peek_byte_at(1) == Some(b'\n') => self.cursor += 2,
1416 Some(b'#') => match self.eat_comment() {
1417 Ok(_) => {}
1418 Err(e) => return Err(e),
1419 },
1420 _ => return Ok(()),
1421 }
1422 }
1423 }
1424
1425 fn navigate_dotted_key<'t>(
1429 &mut self,
1430 table: &'t mut InnerTable<'de>,
1431 key: Key<'de>,
1432 ) -> Result<&'t mut InnerTable<'de>, Failed> {
1433 if let Some(idx) = self.indexed_find(table, key.name) {
1434 let (existing_key, value) = &mut table.entries_mut()[idx];
1435
1436 if !value.is_table() {
1437 return Err(self.set_error(
1438 key.span.start as usize,
1439 Some(key.span.end as usize),
1440 ErrorKind::DottedKeyInvalidType {
1441 first: existing_key.span,
1442 },
1443 ));
1444 }
1445 if value.is_frozen() || value.has_header_bit() {
1446 return Err(self.set_duplicate_key_error(existing_key.span, key.span));
1447 }
1448 if value.is_implicit_table() {
1452 let t = unsafe { value.as_table_mut_unchecked() };
1454 t.set_dotted_flag();
1455 t.set_span_start(key.span.start);
1456 t.set_span_end(key.span.end);
1457 }
1458 unsafe { Ok(value.as_inner_table_mut_unchecked()) }
1460 } else {
1461 let span = key.span;
1462 let inserted = self.insert_value_known_to_be_unique(
1463 table,
1464 key,
1465 Item::table_dotted(InnerTable::new(), span),
1466 );
1467 unsafe { Ok(inserted.as_inner_table_mut_unchecked()) }
1469 }
1470 }
1471
1472 fn navigate_header_intermediate<'b>(
1478 &mut self,
1479 st: &'b mut Table<'de>,
1480 key: Key<'de>,
1481 ) -> Result<&'b mut Table<'de>, Failed> {
1482 let table = &mut st.value;
1483
1484 if let Some(idx) = self.indexed_find(table, key.name) {
1485 let (existing_key, existing) = &mut table.entries_mut()[idx];
1486 let existing_span = existing_key.span;
1487
1488 if existing.is_table() {
1491 if existing.is_frozen() {
1492 return Err(self.set_duplicate_key_error(existing_span, key.span));
1493 }
1494 unsafe { Ok(existing.as_table_mut_unchecked()) }
1496 } else if existing.is_aot() {
1497 let arr = existing.as_array_mut().unwrap();
1499 self.push_path(PathComponent::Index(arr.len() - 1));
1500 let last = arr.last_mut().unwrap();
1502 if !last.is_table() {
1503 return Err(self.set_duplicate_key_error(existing_span, key.span));
1504 }
1505 unsafe { Ok(last.as_table_mut_unchecked()) }
1507 } else {
1508 Err(self.set_duplicate_key_error(existing_span, key.span))
1509 }
1510 } else {
1511 let span = key.span;
1512 let inserted = self.insert_value_known_to_be_unique(
1513 table,
1514 key,
1515 Item::table(InnerTable::new(), span),
1516 );
1517 unsafe { Ok(inserted.as_table_mut_unchecked()) }
1519 }
1520 }
1521 fn insert_value_known_to_be_unique<'t>(
1522 &mut self,
1523 table: &'t mut InnerTable<'de>,
1524 key: Key<'de>,
1525 item: Item<'de>,
1526 ) -> &'t mut item::Item<'de> {
1527 let len = table.len();
1528 if len >= INDEXED_TABLE_THRESHOLD {
1529 let table_id = unsafe { table.first_key_span_start_unchecked() };
1531 if len == INDEXED_TABLE_THRESHOLD {
1532 for (i, (key, _)) in table.entries().iter().enumerate() {
1533 self.index.insert(KeyRef::new(key.as_str(), table_id), i);
1534 }
1535 }
1536 self.index.insert(KeyRef::new(key.as_str(), table_id), len);
1537 }
1538 &mut table.insert_unique(key, item, self.arena).1
1539 }
1540
1541 fn navigate_header_table_final<'b>(
1546 &mut self,
1547 st: &'b mut Table<'de>,
1548 key: Key<'de>,
1549 header_start: u32,
1550 header_end: u32,
1551 ) -> Result<Ctx<'b, 'de>, Failed> {
1552 let table = &mut st.value;
1553
1554 if let Some(idx) = self.indexed_find(table, key.name) {
1555 let (existing_key, existing) = &mut table.entries_mut()[idx];
1556 let first_key_span = existing_key.span;
1557
1558 if !existing.is_table() || existing.is_frozen() {
1559 return Err(self.set_duplicate_key_error(first_key_span, key.span));
1560 }
1561 if existing.has_header_bit() {
1562 return Err(self.set_error(
1563 header_start as usize,
1564 Some(header_end as usize),
1565 ErrorKind::DuplicateTable {
1566 name: key.span,
1567 first: existing.span_unchecked(),
1568 },
1569 ));
1570 }
1571 if existing.has_dotted_bit() {
1572 return Err(self.set_duplicate_key_error(first_key_span, key.span));
1573 }
1574 let table = unsafe { existing.as_table_mut_unchecked() };
1576 table.set_header_flag();
1577 table.set_span_start(header_start);
1578 table.set_span_end(header_end);
1579 Ok(Ctx {
1580 table,
1581 array_end_span: None,
1582 })
1583 } else {
1584 let inserted = self.insert_value_known_to_be_unique(
1585 table,
1586 key,
1587 Item::table_header(InnerTable::new(), Span::new(header_start, header_end)),
1588 );
1589 Ok(Ctx {
1590 table: unsafe { inserted.as_table_mut_unchecked() },
1592 array_end_span: None,
1593 })
1594 }
1595 }
1596
1597 fn navigate_header_array_final<'b>(
1602 &mut self,
1603 st: &'b mut Table<'de>,
1604 key: Key<'de>,
1605 header_start: u32,
1606 header_end: u32,
1607 ) -> Result<Ctx<'b, 'de>, Failed> {
1608 let table = &mut st.value;
1609
1610 if let Some(idx) = self.indexed_find(table, key.name) {
1611 let (existing_key, existing) = &mut table.entries_mut()[idx];
1612 let first_key_span = existing_key.span;
1613
1614 if existing.is_aot() {
1615 let (end_flag, arr) = unsafe { existing.split_array_end_flag() };
1617 let entry_span = Span::new(header_start, header_end);
1618 arr.push(
1619 Item::table_header(InnerTable::new(), entry_span),
1620 self.arena,
1621 );
1622 self.push_path(PathComponent::Index(arr.len() - 1));
1623 let entry = arr.last_mut().unwrap();
1624 Ok(Ctx {
1625 table: unsafe { entry.as_table_mut_unchecked() },
1627 array_end_span: Some(end_flag),
1628 })
1629 } else if existing.is_table() {
1630 Err(self.set_error(
1631 header_start as usize,
1632 Some(header_end as usize),
1633 ErrorKind::RedefineAsArray {
1634 first: first_key_span,
1635 },
1636 ))
1637 } else {
1638 Err(self.set_duplicate_key_error(first_key_span, key.span))
1639 }
1640 } else {
1641 let entry_span = Span::new(header_start, header_end);
1642 let first_entry = Item::table_header(InnerTable::new(), entry_span);
1643 let array_span = Span::new(header_start, header_end);
1644 let array_val = Item::array_aot(
1645 crate::item::array::InternalArray::with_single(first_entry, self.arena),
1646 array_span,
1647 );
1648 let inserted = self.insert_value_known_to_be_unique(table, key, array_val);
1649 self.push_path(PathComponent::Index(0));
1650 let (end_flag, arr) = unsafe { inserted.split_array_end_flag() };
1652 let entry = arr.last_mut().unwrap();
1653 Ok(Ctx {
1654 table: unsafe { entry.as_table_mut_unchecked() },
1656 array_end_span: Some(end_flag),
1657 })
1658 }
1659 }
1660
1661 fn insert_value(
1663 &mut self,
1664 table: &mut InnerTable<'de>,
1665 key: Key<'de>,
1666 item: Item<'de>,
1667 ) -> Result<(), Failed> {
1668 if table.len() < INDEXED_TABLE_THRESHOLD {
1669 for (existing_key, _) in table.entries() {
1670 if existing_key.as_str() == key.name {
1671 return Err(self.set_duplicate_key_error(existing_key.span, key.span));
1672 }
1673 }
1674 table.insert_unique(key, item, self.arena);
1675 return Ok(());
1676 }
1677 let table_id = unsafe { table.first_key_span_start_unchecked() };
1679
1680 if table.len() == INDEXED_TABLE_THRESHOLD {
1683 for (i, (key, _)) in table.entries().iter().enumerate() {
1684 self.index.insert(KeyRef::new(key.as_str(), table_id), i);
1687 }
1688 }
1689
1690 match self.index.entry(KeyRef::new(key.as_str(), table_id)) {
1691 std::collections::hash_map::Entry::Occupied(occupied_entry) => {
1692 let idx = *occupied_entry.get();
1693 let (existing_key, _) = &table.entries()[idx];
1694 Err(self.set_duplicate_key_error(existing_key.span, key.span))
1695 }
1696 std::collections::hash_map::Entry::Vacant(vacant_entry) => {
1697 vacant_entry.insert(table.len());
1698 table.insert_unique(key, item, self.arena);
1699 Ok(())
1700 }
1701 }
1702 }
1703
1704 fn indexed_find(&self, table: &InnerTable<'de>, name: &str) -> Option<usize> {
1708 if table.len() > INDEXED_TABLE_THRESHOLD {
1711 let first_key_span = unsafe { table.first_key_span_start_unchecked() };
1713 self.index.get(&KeyRef::new(name, first_key_span)).copied()
1714 } else {
1715 table.find_index(name)
1716 }
1717 }
1718
1719 fn skip_recovery_string(&mut self) {
1720 let delim = self.bytes[self.cursor];
1721 self.cursor += 1;
1722 let multiline = self.peek_byte() == Some(delim) && self.peek_byte_at(1) == Some(delim);
1723 if multiline {
1724 self.cursor += 2;
1725 loop {
1726 match self.peek_byte() {
1727 None => return,
1728 Some(b)
1729 if b == delim
1730 && self.peek_byte_at(1) == Some(delim)
1731 && self.peek_byte_at(2) == Some(delim) =>
1732 {
1733 self.cursor += 3;
1734 while self.peek_byte() == Some(delim) {
1735 self.cursor += 1;
1736 }
1737 return;
1738 }
1739 Some(b'\\') if delim == b'"' => self.cursor += 2,
1740 _ => self.cursor += 1,
1741 }
1742 }
1743 }
1744 loop {
1745 match self.peek_byte() {
1746 None | Some(b'\n') => return,
1747 Some(b) if b == delim => {
1748 self.cursor += 1;
1749 return;
1750 }
1751 Some(b'\\') if delim == b'"' => self.cursor += 2,
1752 _ => self.cursor += 1,
1753 }
1754 }
1755 }
1756
1757 fn at_statement_start(&self) -> bool {
1758 matches!(self.peek_byte(), None | Some(b'[') | Some(b'#'))
1759 || matches!(self.peek_byte(), Some(b) if is_keylike_byte(b) || b == b'"' || b == b'\'')
1760 }
1761
1762 fn skip_to_next_statement(&mut self) {
1763 loop {
1764 match self.peek_byte() {
1765 None => return,
1766 Some(b'\n') => {
1767 self.cursor += 1;
1768 let saved = self.cursor;
1769 while matches!(self.peek_byte(), Some(b' ' | b'\t')) {
1770 self.cursor += 1;
1771 }
1772 if self.at_statement_start() {
1773 self.cursor = saved;
1774 return;
1775 }
1776 self.cursor = saved;
1777 }
1778 Some(b'"' | b'\'') => self.skip_recovery_string(),
1779 Some(b'#') => {
1780 self.cursor += 1;
1781 while let Some(b) = self.peek_byte() {
1782 if b == b'\n' {
1783 break;
1784 }
1785 self.cursor += 1;
1786 }
1787 }
1788 _ => self.cursor += 1,
1789 }
1790 }
1791 }
1792
1793 const MAX_RECOVER_ERRORS: usize = 25;
1794
1795 fn try_recover(&mut self) -> bool {
1796 if !self.recovering {
1797 return false;
1798 }
1799 let error = self.take_error();
1800 self.errors.push(error);
1801 self.path_len = 0;
1802 let at_line_start = self.cursor == 0 || self.bytes.get(self.cursor - 1) == Some(&b'\n');
1803 if at_line_start && self.at_statement_start() {
1804 return self.errors.len() < Self::MAX_RECOVER_ERRORS;
1805 }
1806 let _before = self.cursor;
1807 self.skip_to_next_statement();
1808 debug_assert!(
1809 self.cursor > _before || self.cursor >= self.bytes.len(),
1810 "skip_to_next_statement did not advance cursor from {_before}",
1811 );
1812 self.errors.len() < Self::MAX_RECOVER_ERRORS
1813 }
1814
1815 fn parse_document(&mut self, root_st: &mut Table<'de>) -> Result<(), Failed> {
1816 let mut ctx = Ctx {
1817 table: root_st,
1818 array_end_span: None,
1819 };
1820
1821 #[cfg(debug_assertions)]
1822 let mut _prev_loop_cursor = usize::MAX;
1823
1824 loop {
1825 #[cfg(debug_assertions)]
1826 if self.recovering {
1827 debug_assert!(
1828 self.cursor != _prev_loop_cursor || self.peek_byte().is_none(),
1829 "parse_document recovery loop stalled at cursor {}",
1830 self.cursor,
1831 );
1832 _prev_loop_cursor = self.cursor;
1833 }
1834
1835 self.eat_whitespace();
1836 match self.eat_comment() {
1837 Ok(true) => continue,
1838 Ok(false) => {}
1839 Err(_) => {
1840 if !self.try_recover() {
1841 return Err(Failed);
1842 }
1843 continue;
1844 }
1845 }
1846 if self.eat_newline() {
1847 continue;
1848 }
1849
1850 match self.peek_byte() {
1851 None => break,
1852 Some(b'[') => {
1853 ctx = match self.process_table_header(root_st) {
1854 Ok(c) => c,
1855 Err(_) => {
1856 if !self.try_recover() {
1857 return Err(Failed);
1858 }
1859 Ctx {
1860 table: root_st,
1861 array_end_span: None,
1862 }
1863 }
1864 };
1865 }
1866 Some(b'\r') => {
1867 self.set_error(self.cursor, None, ErrorKind::Unexpected('\r'));
1868 if !self.try_recover() {
1869 return Err(Failed);
1870 }
1871 continue;
1872 }
1873 Some(_) => {
1874 if self.process_key_value(&mut ctx).is_err() {
1875 if !self.try_recover() {
1876 return Err(Failed);
1877 }
1878 }
1879 }
1880 }
1881 }
1882 Ok(())
1883 }
1884
1885 fn process_table_header<'b>(
1886 &mut self,
1887 root_st: &'b mut Table<'de>,
1888 ) -> Result<Ctx<'b, 'de>, Failed> {
1889 self.path_len = 0;
1890 let header_start = self.cursor as u32;
1891 if let Err(e) = self.expect_byte(b'[') {
1892 return Err(e);
1893 }
1894 let is_array = self.eat_byte(b'[');
1895
1896 let mut current = root_st;
1897
1898 self.eat_whitespace();
1899 let mut key = match self.read_table_key() {
1900 Ok(k) => k,
1901 Err(e) => return Err(e),
1902 };
1903 loop {
1904 if self.eat_whitespace_to() == Some(b'.') {
1905 self.cursor += 1;
1906 self.eat_whitespace();
1907 self.push_path(PathComponent::Key(key));
1908 current = match self.navigate_header_intermediate(current, key) {
1909 Ok(p) => p,
1910 Err(e) => return Err(e),
1911 };
1912 key = match self.read_table_key() {
1913 Ok(k) => k,
1914 Err(e) => return Err(e),
1915 };
1916 } else {
1917 break;
1918 }
1919 }
1920 if let Err(e) = self.expect_byte(b']') {
1921 return Err(e);
1922 }
1923 if is_array && let Err(e) = self.expect_byte(b']') {
1924 return Err(e);
1925 }
1926
1927 self.eat_whitespace();
1928 match self.eat_comment() {
1929 Ok(true) => {}
1930 Ok(false) => {
1931 if let Err(e) = self.eat_newline_or_eof() {
1932 return Err(e);
1933 }
1934 }
1935 Err(e) => return Err(e),
1936 }
1937 let header_end = self.cursor as u32;
1938
1939 self.push_path(PathComponent::Key(key));
1940 if is_array {
1941 self.navigate_header_array_final(current, key, header_start, header_end)
1942 } else {
1943 self.navigate_header_table_final(current, key, header_start, header_end)
1944 }
1945 }
1946
1947 fn process_key_value(&mut self, ctx: &mut Ctx<'_, 'de>) -> Result<(), Failed> {
1948 let saved_path_len = self.path_len;
1949 let line_start = self.cursor as u32;
1950 let mut table_ref: &mut InnerTable<'de> = &mut ctx.table.value;
1954
1955 let mut key = match self.read_table_key() {
1956 Ok(k) => k,
1957 Err(e) => return Err(e),
1958 };
1959 self.eat_whitespace();
1960
1961 while self.eat_byte(b'.') {
1962 self.eat_whitespace();
1963 self.push_path(PathComponent::Key(key));
1964 table_ref = match self.navigate_dotted_key(table_ref, key) {
1965 Ok(t) => t,
1966 Err(e) => return Err(e),
1967 };
1968 key = match self.read_table_key() {
1969 Ok(k) => k,
1970 Err(e) => return Err(e),
1971 };
1972 self.eat_whitespace();
1973 }
1974
1975 self.push_path(PathComponent::Key(key));
1976
1977 if let Err(e) = self.expect_byte(b'=') {
1978 return Err(e);
1979 }
1980 self.eat_whitespace();
1981 let val = match self.value(MAX_RECURSION_DEPTH) {
1982 Ok(v) => v,
1983 Err(e) => return Err(e),
1984 };
1985 let line_end = self.cursor as u32;
1986
1987 self.eat_whitespace();
1988 match self.eat_comment() {
1989 Ok(true) => {}
1990 Ok(false) => {
1991 if let Err(e) = self.eat_newline_or_eof() {
1992 return Err(e);
1993 }
1994 }
1995 Err(e) => return Err(e),
1996 }
1997
1998 if let Err(e) = self.insert_value(table_ref, key, val) {
1999 return Err(e);
2000 }
2001
2002 self.path_len = saved_path_len;
2003
2004 let start = ctx.table.span_start();
2005 ctx.table.set_span_start(start.min(line_start));
2006 ctx.table.extend_span_end(line_end);
2007
2008 if let Some(end_flag) = &mut ctx.array_end_span {
2009 let old = **end_flag;
2010 let current = old >> item::FLAG_SHIFT;
2011 **end_flag = (current.max(line_end) << item::FLAG_SHIFT) | (old & item::FLAG_MASK);
2012 }
2013
2014 Ok(())
2015 }
2016}
2017
2018pub struct Document<'de> {
2035 pub(crate) table: Table<'de>,
2036 #[cfg(feature = "from-toml")]
2037 pub ctx: crate::de::Context<'de>,
2038}
2039
2040impl<'de> Document<'de> {
2041 pub fn into_table(self) -> Table<'de> {
2043 self.table
2044 }
2045
2046 pub fn into_item(self) -> Item<'de> {
2048 self.table.into_item()
2049 }
2050
2051 pub fn table(&self) -> &Table<'de> {
2053 &self.table
2054 }
2055
2056 #[cfg(feature = "from-toml")]
2063 pub fn split(&mut self) -> (&mut crate::de::Context<'de>, &Table<'de>) {
2064 (&mut self.ctx, &self.table)
2065 }
2066
2067 #[cfg(feature = "to-toml")]
2071 pub(crate) fn table_index(&self) -> &crate::item::table::TableIndex<'de> {
2072 &self.ctx.index
2074 }
2075
2076 #[cfg(feature = "to-toml")]
2081 pub(crate) fn detect_indent(&self) -> crate::emit::Indent {
2082 let src = self.ctx.source().as_bytes();
2083 if let Some(indent) = detect_indent_in_table(&self.table, src) {
2084 return indent;
2085 }
2086 crate::emit::Indent::default()
2087 }
2088}
2089
2090#[cfg(feature = "from-toml")]
2091impl<'de> Document<'de> {
2092 #[doc(alias = "helper")]
2100 pub fn table_helper<'ctx>(&'ctx mut self) -> TableHelper<'ctx, 'ctx, 'de> {
2101 TableHelper::new(&mut self.ctx, &self.table)
2102 }
2103
2104 #[doc(alias = "deserialize")]
2109 #[doc(alias = "from_toml")]
2110 pub fn to<T: crate::FromToml<'de>>(&mut self) -> Result<T, crate::FromTomlError> {
2111 let result = T::from_toml(&mut self.ctx, self.table.as_item());
2112 crate::de::compute_paths(&self.table, &mut self.ctx.errors);
2113 match result {
2114 Ok(v) if self.ctx.errors.is_empty() => Ok(v),
2115 _ => Err(crate::de::FromTomlError {
2116 errors: std::mem::take(&mut self.ctx.errors),
2117 }),
2118 }
2119 }
2120
2121 pub fn to_allowing_errors<T>(
2127 &mut self,
2128 ) -> Result<(T, crate::de::FromTomlError), crate::de::FromTomlError>
2129 where
2130 T: crate::de::FromToml<'de>,
2131 {
2132 let result = T::from_toml(&mut self.ctx, self.table.as_item());
2133 crate::de::compute_paths(&self.table, &mut self.ctx.errors);
2134 let errors = crate::de::FromTomlError {
2135 errors: std::mem::take(&mut self.ctx.errors),
2136 };
2137 match result {
2138 Ok(v) => Ok((v, errors)),
2139 _ => Err(errors),
2140 }
2141 }
2142
2143 pub fn errors(&self) -> &[Error] {
2145 &self.ctx.errors
2146 }
2147
2148 pub fn has_errors(&self) -> bool {
2150 !self.ctx.errors.is_empty()
2151 }
2152}
2153
2154impl<'de> std::ops::Index<&str> for Document<'de> {
2155 type Output = MaybeItem<'de>;
2156
2157 fn index(&self, key: &str) -> &Self::Output {
2158 &self.table[key]
2159 }
2160}
2161
2162impl std::fmt::Debug for Document<'_> {
2163 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
2164 self.table.fmt(f)
2165 }
2166}
2167
2168#[inline(never)]
2186pub fn parse<'de>(document: &'de str, arena: &'de Arena) -> Result<Document<'de>, Error> {
2187 const MAX_SIZE: usize = (1u32 << 28) as usize;
2192
2193 if document.len() >= MAX_SIZE {
2194 return Err(Error::new(ErrorKind::FileTooLarge, Span::new(0, 0)));
2195 }
2196
2197 let mut root_st = Table::new_spanned(Span::new(0, document.len() as u32));
2198 let mut parser = Parser::new(document, arena);
2199 match parser.parse_document(&mut root_st) {
2200 Ok(()) => {}
2201 Err(_) => return Err(parser.take_error()),
2202 }
2203 Ok(Document {
2206 table: root_st,
2207 #[cfg(feature = "from-toml")]
2208 ctx: crate::de::Context {
2209 errors: Vec::new(),
2210 index: parser.index,
2211 arena,
2212 source: document,
2213 },
2214 })
2215}
2216
2217#[cfg(feature = "from-toml")]
2238pub fn parse_recoverable<'de>(document: &'de str, arena: &'de Arena) -> Document<'de> {
2239 const MAX_SIZE: usize = (1u32 << 28) as usize;
2240 let mut parser = Parser::new(document, arena);
2241 parser.recovering = true;
2242
2243 if document.len() >= MAX_SIZE {
2244 parser
2245 .errors
2246 .push(Error::new(ErrorKind::FileTooLarge, Span::new(0, 0)));
2247 return Document {
2248 table: Table::new_spanned(Span::new(0, 0)),
2249 ctx: crate::de::Context {
2250 errors: parser.errors,
2251 index: parser.index,
2252 arena,
2253 source: document,
2254 },
2255 };
2256 }
2257
2258 let mut root_st = Table::new_spanned(Span::new(0, document.len() as u32));
2259 let failed = parser.parse_document(&mut root_st).is_err();
2260
2261 if failed {
2262 if let Some(kind) = parser.error_kind.take() {
2263 parser.errors.push(Error::new_with_path(
2264 kind,
2265 parser.error_span,
2266 parser.build_error_path(),
2267 ));
2268 }
2269 }
2270
2271 Document {
2272 table: root_st,
2273 ctx: crate::de::Context {
2274 errors: parser.errors,
2275 index: parser.index,
2276 arena,
2277 source: document,
2278 },
2279 }
2280}
2281
2282#[inline]
2283fn is_keylike_byte(b: u8) -> bool {
2284 b.is_ascii_alphanumeric() || b == b'-' || b == b'_'
2285}
2286
2287fn byte_describe(b: u8) -> &'static &'static str {
2288 match b {
2289 b'\n' => &"a newline",
2290 b' ' | b'\t' => &"whitespace",
2291 b'=' => &"an equals",
2292 b'.' => &"a period",
2293 b',' => &"a comma",
2294 b':' => &"a colon",
2295 b'+' => &"a plus",
2296 b'{' => &"a left brace",
2297 b'}' => &"a right brace",
2298 b'[' => &"a left bracket",
2299 b']' => &"a right bracket",
2300 b'\'' | b'"' => &"a string",
2301 _ if is_keylike_byte(b) => &"an identifier",
2302 _ => &"a character",
2303 }
2304}
2305
2306#[cfg(feature = "to-toml")]
2307fn detect_indent_in_table(table: &Table<'_>, src: &[u8]) -> Option<crate::emit::Indent> {
2308 use crate::item::{ArrayStyle, TableStyle, Value};
2309 for (_, item) in table {
2310 match item.value() {
2311 Value::Array(arr) => {
2312 if arr.style() == ArrayStyle::Inline {
2313 for elem in arr {
2314 let span = elem.span();
2315 if !span.is_empty() {
2316 if let Some(indent) = indent_from_span(src, span.start as usize) {
2317 return Some(indent);
2318 }
2319 }
2320 }
2321 }
2322 for elem in arr {
2323 if let Some(sub) = elem.as_table() {
2324 if let Some(indent) = detect_indent_in_table(sub, src) {
2325 return Some(indent);
2326 }
2327 }
2328 }
2329 }
2330 Value::Table(sub) => {
2331 if sub.style() == TableStyle::Inline {
2332 for (key, _) in sub {
2333 if !key.span.is_empty() {
2334 if let Some(indent) = indent_from_span(src, key.span.start as usize) {
2335 return Some(indent);
2336 }
2337 }
2338 }
2339 }
2340 if let Some(indent) = detect_indent_in_table(sub, src) {
2341 return Some(indent);
2342 }
2343 }
2344 _ => (),
2345 }
2346 }
2347 None
2348}
2349
2350#[cfg(feature = "to-toml")]
2351fn indent_from_span(src: &[u8], pos: usize) -> Option<crate::emit::Indent> {
2352 let mut i = pos;
2353 if i >= src.len() {
2354 return None;
2355 }
2356 while i > 0 {
2357 i -= 1;
2358 match src[i] {
2359 b' ' => continue,
2360 b'\t' => return Some(crate::emit::Indent::Tab),
2361 b'\n' => {
2362 let spaces = (pos - i - 1) as u8;
2363 if spaces > 0 {
2364 return Some(crate::emit::Indent::Spaces(if spaces > 8 {
2365 8
2366 } else {
2367 spaces
2368 }));
2369 }
2370 return None;
2371 }
2372 _ => return None,
2373 }
2374 }
2375 None
2376}