1#![allow(clippy::question_mark)]
5#![allow(unsafe_code)]
6
7#[cfg(test)]
8#[path = "./parser_tests.rs"]
9mod tests;
10
11use crate::{
12 Span,
13 arena::Arena,
14 error::{Error, ErrorKind},
15 str::Str,
16 table::{InnerTable, Table},
17 value::{self, Item, Key},
18};
19use std::hash::{Hash, Hasher};
20use std::ptr::NonNull;
21use std::{char, collections::HashMap};
22
23const MAX_RECURSION_DEPTH: i16 = 256;
24#[derive(Copy, Clone)]
27struct ParseError;
28
29struct Ctx<'b, 'de> {
30 table: &'b mut Table<'de>,
33 array_end_span: Option<&'b mut u32>,
37}
38
39const INDEXED_TABLE_THRESHOLD: usize = 7;
41
42const fn build_hex_table() -> [i8; 256] {
43 let mut table = [-1i8; 256];
44 let mut ch = 0usize;
45 while ch < 256 {
46 table[ch] = match ch as u8 {
47 b'0'..=b'9' => (ch as u8 - b'0') as i8,
48 b'A'..=b'F' => (ch as u8 - b'A' + 10) as i8,
49 b'a'..=b'f' => (ch as u8 - b'a' + 10) as i8,
50 _ => -1,
51 };
52 ch += 1;
53 }
54 table
55}
56
57static HEX: [i8; 256] = build_hex_table();
58
59struct KeyIndex<'de> {
65 key_ptr: NonNull<u8>,
66 len: u32,
67 first_key_span: u32,
68 marker: std::marker::PhantomData<&'de str>,
69}
70
71impl<'de> KeyIndex<'de> {
72 #[inline]
73 fn new(key: &'de str, first_key_span: u32) -> Self {
74 KeyIndex {
75 key_ptr: unsafe { NonNull::new_unchecked(key.as_ptr() as *mut u8) },
76 len: key.len() as u32,
77 first_key_span,
78 marker: std::marker::PhantomData,
79 }
80 }
81}
82
83impl<'de> KeyIndex<'de> {
84 #[inline]
85 fn as_str(&self) -> &'de str {
86 unsafe {
87 std::str::from_utf8_unchecked(std::slice::from_raw_parts(
88 self.key_ptr.as_ptr(),
89 self.len as usize,
90 ))
91 }
92 }
93}
94
95impl<'de> Hash for KeyIndex<'de> {
96 #[inline]
97 fn hash<H: Hasher>(&self, state: &mut H) {
98 self.as_str().hash(state);
99 self.first_key_span.hash(state);
100 }
101}
102
103impl<'de> PartialEq for KeyIndex<'de> {
104 #[inline]
105 fn eq(&self, other: &Self) -> bool {
106 self.first_key_span == other.first_key_span && self.as_str() == other.as_str()
107 }
108}
109
110impl<'de> Eq for KeyIndex<'de> {}
111
112struct Parser<'de> {
113 bytes: &'de [u8],
115 cursor: usize,
116 arena: &'de Arena,
117
118 error_span: Span,
120 error_kind: Option<ErrorKind>,
121
122 table_index: foldhash::HashMap<KeyIndex<'de>, usize>,
125}
126
127#[allow(unsafe_code)]
128impl<'de> Parser<'de> {
129 fn new(input: &'de str, arena: &'de Arena) -> Self {
130 let bytes = input.as_bytes();
131 let cursor = if bytes.starts_with(b"\xef\xbb\xbf") {
133 3
134 } else {
135 0
136 };
137 Parser {
138 bytes,
139 cursor,
140 arena,
141 error_span: Span::new(0, 0),
142 error_kind: None,
143 table_index: HashMap::default(),
144 }
145 }
146
147 #[inline]
151 unsafe fn str_slice(&self, start: usize, end: usize) -> &'de str {
152 #[cfg(not(debug_assertions))]
153 unsafe {
154 std::str::from_utf8_unchecked(&self.bytes[start..end])
155 }
156 #[cfg(debug_assertions)]
157 match std::str::from_utf8(&self.bytes[start..end]) {
158 Ok(value) => value,
159 Err(err) => panic!(
160 "Invalid UTF-8 slice: bytes[{}..{}] is not valid UTF-8: {}",
161 start, end, err
162 ),
163 }
164 }
165
166 #[cold]
167 fn set_duplicate_key_error(&mut self, first: Span, second: Span, key: &str) -> ParseError {
168 self.error_span = second;
169 self.error_kind = Some(ErrorKind::DuplicateKey {
170 key: key.into(),
171 first,
172 });
173 ParseError
174 }
175 #[cold]
176 fn set_error(&mut self, start: usize, end: Option<usize>, kind: ErrorKind) -> ParseError {
177 self.error_span = Span::new(start as u32, end.unwrap_or(start + 1) as u32);
178 self.error_kind = Some(kind);
179 ParseError
180 }
181
182 fn take_error(&mut self) -> Error {
183 let kind = self
184 .error_kind
185 .take()
186 .expect("take_error called without error");
187 let span = self.error_span;
188
189 {
193 let line_info = Some(self.to_linecol(std::hint::black_box(0)));
194 std::hint::black_box(&line_info);
195 }
196
197 Error { kind, span }
198 }
199
200 pub fn to_linecol(&self, offset: usize) -> (u32, u32) {
201 let mut line_start = 0;
202 let mut line_num = 0;
203 for (i, &b) in self.bytes.iter().enumerate() {
204 if i >= offset {
205 return (line_num as u32, (offset - line_start) as u32);
206 }
207 if b == b'\n' {
208 line_num += 1;
209 line_start = i + 1;
210 }
211 }
212 (line_num as u32, (offset - line_start) as u32)
213 }
214
215 #[inline]
216 fn peek_byte(&self) -> Option<u8> {
217 self.bytes.get(self.cursor).copied()
218 }
219
220 #[inline]
221 fn peek_byte_at(&self, offset: usize) -> Option<u8> {
222 self.bytes.get(self.cursor + offset).copied()
223 }
224
225 #[inline]
226 fn advance(&mut self) {
227 self.cursor += 1;
228 }
229
230 #[inline]
231 fn eat_byte(&mut self, b: u8) -> bool {
232 if self.peek_byte() == Some(b) {
233 self.advance();
234 true
235 } else {
236 false
237 }
238 }
239
240 fn eat_byte_spanned(&mut self, b: u8) -> Option<Span> {
241 if self.peek_byte() == Some(b) {
242 let start = self.cursor;
243 self.advance();
244 Some(Span::new(start as u32, self.cursor as u32))
245 } else {
246 None
247 }
248 }
249
250 fn expect_byte(&mut self, b: u8) -> Result<(), ParseError> {
251 if self.eat_byte(b) {
252 Ok(())
253 } else {
254 let start = self.cursor;
255 let (found_desc, end) = self.scan_token_desc_and_end();
256 Err(self.set_error(
257 start,
258 Some(end),
259 ErrorKind::Wanted {
260 expected: byte_describe(b),
261 found: found_desc,
262 },
263 ))
264 }
265 }
266
267 fn expect_byte_spanned(&mut self, b: u8) -> Result<Span, ParseError> {
268 if let Some(span) = self.eat_byte_spanned(b) {
269 Ok(span)
270 } else {
271 let start = self.cursor;
272 let (found_desc, end) = self.scan_token_desc_and_end();
273 Err(self.set_error(
274 start,
275 Some(end),
276 ErrorKind::Wanted {
277 expected: byte_describe(b),
278 found: found_desc,
279 },
280 ))
281 }
282 }
283
284 fn eat_whitespace(&mut self) {
285 while let Some(b) = self.peek_byte() {
286 if b == b' ' || b == b'\t' {
287 self.advance();
288 } else {
289 break;
290 }
291 }
292 }
293
294 fn eat_comment(&mut self) -> Result<bool, ParseError> {
295 if !self.eat_byte(b'#') {
296 return Ok(false);
297 }
298 while let Some(0x09 | 0x20..=0x7E | 0x80..) = self.peek_byte() {
299 self.cursor += 1;
300 }
301 self.eat_newline_or_eof().map(|()| true)
302 }
303
304 fn eat_newline_or_eof(&mut self) -> Result<(), ParseError> {
305 match self.peek_byte() {
306 None => Ok(()),
307 Some(b'\n') => {
308 self.advance();
309 Ok(())
310 }
311 Some(b'\r') if self.peek_byte_at(1) == Some(b'\n') => {
312 self.cursor += 2;
313 Ok(())
314 }
315 _ => {
316 let start = self.cursor;
317 let (found_desc, end) = self.scan_token_desc_and_end();
318 Err(self.set_error(
319 start,
320 Some(end),
321 ErrorKind::Wanted {
322 expected: "newline",
323 found: found_desc,
324 },
325 ))
326 }
327 }
328 }
329
330 fn eat_newline(&mut self) -> bool {
331 match self.peek_byte() {
332 Some(b'\n') => {
333 self.advance();
334 true
335 }
336 Some(b'\r') if self.peek_byte_at(1) == Some(b'\n') => {
337 self.cursor += 2;
338 true
339 }
340 _ => false,
341 }
342 }
343
344 fn scan_token_desc_and_end(&self) -> (&'static str, usize) {
348 match self.peek_byte() {
349 None => ("eof", self.bytes.len()),
350 Some(b'\n' | b'\r') => ("a newline", self.cursor + 1),
351 Some(b' ' | b'\t') => {
352 let mut end = self.cursor + 1;
353 while end < self.bytes.len()
354 && (self.bytes[end] == b' ' || self.bytes[end] == b'\t')
355 {
356 end += 1;
357 }
358 ("whitespace", end)
359 }
360 Some(b'#') => ("a comment", self.cursor + 1),
361 Some(b'=') => ("an equals", self.cursor + 1),
362 Some(b'.') => ("a period", self.cursor + 1),
363 Some(b',') => ("a comma", self.cursor + 1),
364 Some(b':') => ("a colon", self.cursor + 1),
365 Some(b'+') => ("a plus", self.cursor + 1),
366 Some(b'{') => ("a left brace", self.cursor + 1),
367 Some(b'}') => ("a right brace", self.cursor + 1),
368 Some(b'[') => ("a left bracket", self.cursor + 1),
369 Some(b']') => ("a right bracket", self.cursor + 1),
370 Some(b'\'' | b'"') => ("a string", self.cursor + 1),
371 Some(b) if is_keylike_byte(b) => {
372 let mut end = self.cursor + 1;
373 while end < self.bytes.len() && is_keylike_byte(self.bytes[end]) {
374 end += 1;
375 }
376 ("an identifier", end)
377 }
378 Some(_) => ("a character", self.cursor + 1),
379 }
380 }
381
382 fn read_keylike(&mut self) -> &'de str {
383 let start = self.cursor;
384 while let Some(b) = self.peek_byte() {
385 if !is_keylike_byte(b) {
386 break;
387 }
388 self.advance();
389 }
390 unsafe { self.str_slice(start, self.cursor) }
392 }
393
394 fn read_table_key(&mut self) -> Result<Key<'de>, ParseError> {
395 match self.peek_byte() {
396 Some(b'"') => {
397 let start = self.cursor;
398 self.advance();
399 let (key, multiline) = match self.read_string(start, b'"') {
400 Ok(v) => v,
401 Err(e) => return Err(e),
402 };
403 if multiline {
404 return Err(self.set_error(
405 start,
406 Some(key.span.end as usize),
407 ErrorKind::MultilineStringKey,
408 ));
409 }
410 Ok(key)
411 }
412 Some(b'\'') => {
413 let start = self.cursor;
414 self.advance();
415 let (key, multiline) = match self.read_string(start, b'\'') {
416 Ok(v) => v,
417 Err(e) => return Err(e),
418 };
419 if multiline {
420 return Err(self.set_error(
421 start,
422 Some(key.span.end as usize),
423 ErrorKind::MultilineStringKey,
424 ));
425 }
426 Ok(key)
427 }
428 Some(b) if is_keylike_byte(b) => {
429 let start = self.cursor;
430 let k = self.read_keylike();
431 let span = Span::new(start as u32, self.cursor as u32);
432 Ok(Key {
433 name: Str::from(k),
434 span,
435 })
436 }
437 Some(_) => {
438 let start = self.cursor;
439 let (found_desc, end) = self.scan_token_desc_and_end();
440 Err(self.set_error(
441 start,
442 Some(end),
443 ErrorKind::Wanted {
444 expected: "a table key",
445 found: found_desc,
446 },
447 ))
448 }
449 None => Err(self.set_error(
450 self.bytes.len(),
451 None,
452 ErrorKind::Wanted {
453 expected: "a table key",
454 found: "eof",
455 },
456 )),
457 }
458 }
459
460 fn read_string(&mut self, start: usize, delim: u8) -> Result<(Key<'de>, bool), ParseError> {
463 let mut multiline = false;
464 if self.eat_byte(delim) {
465 if self.eat_byte(delim) {
466 multiline = true;
467 } else {
468 return Ok((
469 Key {
470 name: Str::from(""),
471 span: Span::new(start as u32, (start + 1) as u32),
472 },
473 false,
474 ));
475 }
476 }
477
478 let mut content_start = self.cursor;
479 if multiline {
480 match self.peek_byte() {
481 Some(b'\n') => {
482 self.advance();
483 content_start = self.cursor;
484 }
485 Some(b'\r') if self.peek_byte_at(1) == Some(b'\n') => {
486 self.cursor += 2;
487 content_start = self.cursor;
488 }
489 _ => {}
490 }
491 }
492
493 self.read_string_loop(start, content_start, multiline, delim)
494 }
495
496 fn skip_string_plain(&mut self, delim: u8) {
507 let Some(&b) = self.bytes.get(self.cursor) else {
510 return;
511 };
512
513 if b == delim || b == b'\\' || b == 0x7F || (b < 0x20 && b != 0x09) {
514 return;
515 }
516 self.cursor += 1;
517
518 let base = self.cursor;
519 let rest = &self.bytes[base..];
520
521 type Chunk = u64;
522 const STEP: usize = std::mem::size_of::<Chunk>();
523 const ONE: Chunk = Chunk::MAX / 255; const HIGH: Chunk = ONE << 7; let fill_delim = ONE * Chunk::from(delim);
527 let fill_bslash = ONE * Chunk::from(b'\\');
528 let fill_del = ONE * 0x7F;
529
530 let chunks = rest.chunks_exact(STEP);
531 let remainder_len = chunks.remainder().len();
532
533 for (i, chunk) in chunks.enumerate() {
534 let v = Chunk::from_le_bytes(chunk.try_into().unwrap());
535
536 let has_ctrl = v.wrapping_sub(ONE * 0x20) & !v;
537 let eq_delim = (v ^ fill_delim).wrapping_sub(ONE) & !(v ^ fill_delim);
538 let eq_bslash = (v ^ fill_bslash).wrapping_sub(ONE) & !(v ^ fill_bslash);
539 let eq_del = (v ^ fill_del).wrapping_sub(ONE) & !(v ^ fill_del);
540
541 let masked = (has_ctrl | eq_delim | eq_bslash | eq_del) & HIGH;
542 if masked != 0 {
543 self.cursor = base + i * STEP + masked.trailing_zeros() as usize / 8;
544 return;
545 }
546 }
547
548 self.cursor = self.bytes.len() - remainder_len;
549 self.skip_string_plain_slow(delim);
550 }
551
552 #[cold]
553 #[inline(never)]
554 fn skip_string_plain_slow(&mut self, delim: u8) {
555 while let Some(&b) = self.bytes.get(self.cursor) {
556 if b == delim || b == b'\\' || b == 0x7F || (b < 0x20 && b != 0x09) {
557 return;
558 }
559 self.cursor += 1;
560 }
561 }
562
563 fn read_string_loop(
564 &mut self,
565 start: usize,
566 content_start: usize,
567 multiline: bool,
568 delim: u8,
569 ) -> Result<(Key<'de>, bool), ParseError> {
570 let mut flush_from = content_start;
571 let mut scratch: Option<crate::arena::Scratch<'de>> = None;
572 loop {
573 self.skip_string_plain(delim);
574
575 let i = self.cursor;
576 let Some(&b) = self.bytes.get(i) else {
577 return Err(self.set_error(start, None, ErrorKind::UnterminatedString));
578 };
579 self.cursor = i + 1;
580
581 match b {
582 b'\r' => {
583 if self.eat_byte(b'\n') {
584 if !multiline {
585 return Err(self.set_error(
586 i,
587 None,
588 ErrorKind::InvalidCharInString('\n'),
589 ));
590 }
591 } else {
592 return Err(self.set_error(i, None, ErrorKind::InvalidCharInString('\r')));
593 }
594 }
595 b'\n' => {
596 if !multiline {
597 return Err(self.set_error(i, None, ErrorKind::InvalidCharInString('\n')));
598 }
599 }
600 d if d == delim => {
601 let (span, end) = if multiline {
602 if !self.eat_byte(delim) {
603 continue;
604 }
605 if !self.eat_byte(delim) {
606 continue;
607 }
608 let mut extra = 0usize;
609 if self.eat_byte(delim) {
610 extra += 1;
611 }
612 if self.eat_byte(delim) {
613 extra += 1;
614 }
615
616 let maybe_nl = self.bytes[start + 3];
617 let start_off = if maybe_nl == b'\n' {
618 4
619 } else if maybe_nl == b'\r' {
620 5
621 } else {
622 3
623 };
624
625 (
626 Span::new((start + start_off) as u32, (self.cursor - 3) as u32),
627 i + extra,
628 )
629 } else {
630 (Span::new((start + 1) as u32, (self.cursor - 1) as u32), i)
631 };
632
633 let name = if let Some(mut s) = scratch {
634 s.extend(&self.bytes[flush_from..end]);
635 let committed = s.commit();
636 Str::from(unsafe { std::str::from_utf8_unchecked(committed) })
639 } else {
640 unsafe { Str::from(self.str_slice(content_start, end)) }
642 };
643 return Ok((Key { name, span }, multiline));
644 }
645 b'\\' if delim == b'"' => {
646 let arena = self.arena;
647 let s = scratch.get_or_insert_with(|| unsafe { arena.scratch() });
648 s.extend(&self.bytes[flush_from..i]);
649 if let Err(e) = self.read_basic_escape(s, start, multiline) {
650 return Err(e);
651 }
652 flush_from = self.cursor;
653 }
654 0x09 | 0x20..=0x7E | 0x80.. => {}
657 _ => {
658 return Err(self.set_error(i, None, ErrorKind::InvalidCharInString(b as char)));
659 }
660 }
661 }
662 }
663
664 fn read_basic_escape(
665 &mut self,
666 scratch: &mut crate::arena::Scratch<'_>,
667 string_start: usize,
668 multi: bool,
669 ) -> Result<(), ParseError> {
670 let i = self.cursor;
671 let Some(&b) = self.bytes.get(i) else {
672 return Err(self.set_error(string_start, None, ErrorKind::UnterminatedString));
673 };
674 self.cursor = i + 1;
675
676 match b {
677 b'"' => scratch.push(b'"'),
678 b'\\' => scratch.push(b'\\'),
679 b'b' => scratch.push(0x08),
680 b'f' => scratch.push(0x0C),
681 b'n' => scratch.push(b'\n'),
682 b'r' => scratch.push(b'\r'),
683 b't' => scratch.push(b'\t'),
684 b'e' => scratch.push(0x1B),
685 b'u' => {
686 let ch = self.read_hex(4, string_start, i);
687 match ch {
688 Ok(ch) => {
689 let mut buf = [0u8; 4];
690 let len = ch.encode_utf8(&mut buf).len();
691 scratch.extend(&buf[..len]);
692 }
693 Err(e) => return Err(e),
694 }
695 }
696 b'U' => {
697 let ch = self.read_hex(8, string_start, i);
698 match ch {
699 Ok(ch) => {
700 let mut buf = [0u8; 4];
701 let len = ch.encode_utf8(&mut buf).len();
702 scratch.extend(&buf[..len]);
703 }
704 Err(e) => return Err(e),
705 }
706 }
707 b'x' => {
708 let ch = self.read_hex(2, string_start, i);
709 match ch {
710 Ok(ch) => {
711 let mut buf = [0u8; 4];
712 let len = ch.encode_utf8(&mut buf).len();
713 scratch.extend(&buf[..len]);
714 }
715 Err(e) => return Err(e),
716 }
717 }
718 b' ' | b'\t' | b'\n' | b'\r' if multi => {
719 let c = if b == b'\r' && self.peek_byte() == Some(b'\n') {
721 self.advance();
722 '\n'
723 } else {
724 b as char
725 };
726 if c != '\n' {
727 loop {
728 match self.peek_byte() {
729 Some(b' ' | b'\t') => {
730 self.advance();
731 }
732 Some(b'\n') => {
733 self.advance();
734 break;
735 }
736 Some(b'\r') if self.peek_byte_at(1) == Some(b'\n') => {
737 self.cursor += 2;
738 break;
739 }
740 _ => return Err(self.set_error(i, None, ErrorKind::InvalidEscape(c))),
741 }
742 }
743 }
744 loop {
745 match self.peek_byte() {
746 Some(b' ' | b'\t' | b'\n') => {
747 self.advance();
748 }
749 Some(b'\r') if self.peek_byte_at(1) == Some(b'\n') => {
750 self.cursor += 2;
751 }
752 _ => break,
753 }
754 }
755 }
756 _ => {
757 self.cursor -= 1;
758 return Err(self.set_error(
759 self.cursor,
760 None,
761 ErrorKind::InvalidEscape(self.next_char_for_error()),
762 ));
763 }
764 }
765 Ok(())
766 }
767
768 fn next_char_for_error(&self) -> char {
769 let text = unsafe { std::str::from_utf8_unchecked(self.bytes) };
771 if let Some(value) = text.get(self.cursor..) {
772 value.chars().next().unwrap_or(char::REPLACEMENT_CHARACTER)
773 } else {
774 char::REPLACEMENT_CHARACTER
775 }
776 }
777
778 fn read_hex(
779 &mut self,
780 n: usize,
781 string_start: usize,
782 escape_start: usize,
783 ) -> Result<char, ParseError> {
784 let mut val: u32 = 0;
785 for _ in 0..n {
786 let Some(&byte) = self.bytes.get(self.cursor) else {
787 return Err(self.set_error(string_start, None, ErrorKind::UnterminatedString));
788 };
789 let digit = HEX[byte as usize];
790 if digit >= 0 {
791 val = (val << 4) | digit as u32;
792 self.cursor += 1;
793 } else {
794 return Err(self.set_error(
795 self.cursor,
796 None,
797 ErrorKind::InvalidHexEscape(self.next_char_for_error()),
798 ));
799 }
800 }
801 match char::from_u32(val) {
802 Some(ch) => Ok(ch),
803 None => Err(self.set_error(
804 escape_start,
805 Some(escape_start + n),
806 ErrorKind::InvalidEscapeValue(val),
807 )),
808 }
809 }
810
811 fn number(&mut self, start: u32, end: u32, s: &'de str) -> Result<Item<'de>, ParseError> {
812 let bytes = s.as_bytes();
813
814 if let [b'0', format, rest @ ..] = s.as_bytes() {
817 match format {
818 b'x' => return self.integer_hex(rest, Span::new(start, end)),
819 b'o' => return self.integer_octal(rest, Span::new(start, end)),
820 b'b' => return self.integer_binary(rest, Span::new(start, end)),
821 _ => {}
822 }
823 }
824
825 if self.eat_byte(b'.') {
826 let at = self.cursor;
827 return match self.peek_byte() {
828 Some(b) if is_keylike_byte(b) => {
829 let after = self.read_keylike();
830 match self.float(start, end, s, Some(after)) {
831 Ok(f) => Ok(Item::float(f, Span::new(start, self.cursor as u32))),
832 Err(e) => Err(e),
833 }
834 }
835 _ => Err(self.set_error(at, Some(end as usize), ErrorKind::InvalidNumber)),
836 };
837 }
838
839 let off = usize::from(bytes.first() == Some(&b'-'));
843 if let Some(&b'i' | &b'n') = bytes.get(off) {
844 return match s {
845 "inf" => Ok(Item::float(f64::INFINITY, Span::new(start, end))),
846 "-inf" => Ok(Item::float(f64::NEG_INFINITY, Span::new(start, end))),
847 "nan" => Ok(Item::float(f64::NAN.copysign(1.0), Span::new(start, end))),
848 "-nan" => Ok(Item::float(f64::NAN.copysign(-1.0), Span::new(start, end))),
849 _ => Err(self.set_error(
850 start as usize,
851 Some(end as usize),
852 ErrorKind::InvalidNumber,
853 )),
854 };
855 }
856
857 if let Ok(v) = self.integer_decimal(bytes, Span::new(start, end)) {
858 return Ok(v);
859 }
860
861 if bytes.iter().any(|&b| b == b'e' || b == b'E') {
862 return match self.float(start, end, s, None) {
863 Ok(f) => Ok(Item::float(f, Span::new(start, self.cursor as u32))),
864 Err(e) => Err(e),
865 };
866 }
867
868 Err(ParseError)
869 }
870
871 fn number_leading_plus(&mut self, plus_start: u32) -> Result<Item<'de>, ParseError> {
872 match self.peek_byte() {
873 Some(b'0'..=b'9' | b'i' | b'n') => {
874 let s = self.read_keylike();
875 let end = self.cursor as u32;
876 if let [b'0', b'x' | b'o' | b'b', ..] = s.as_bytes() {
878 return Err(self.set_error(
879 plus_start as usize,
880 Some(end as usize),
881 ErrorKind::InvalidNumber,
882 ));
883 }
884 self.number(plus_start, end, s)
885 }
886 _ => Err(self.set_error(
887 plus_start as usize,
888 Some(self.cursor),
889 ErrorKind::InvalidNumber,
890 )),
891 }
892 }
893
894 fn integer_decimal(&mut self, bytes: &'de [u8], span: Span) -> Result<Item<'de>, ParseError> {
895 let mut acc: u64 = 0;
896 let mut prev_underscore = false;
897 let mut has_digit = false;
898 let mut leading_zero = false;
899 'error: {
900 let (negative, digits) = match bytes.first() {
901 Some(&b'+') => (false, &bytes[1..]),
902 Some(&b'-') => (true, &bytes[1..]),
903 _ => (false, bytes),
904 };
905
906 if digits.is_empty() {
907 break 'error;
908 }
909
910 for &b in digits {
911 if b == b'_' {
912 if !has_digit || prev_underscore {
913 break 'error;
914 }
915 prev_underscore = true;
916 continue;
917 }
918 if !b.is_ascii_digit() {
919 break 'error;
920 }
921 if leading_zero {
922 break 'error;
923 }
924 if !has_digit && b == b'0' {
925 leading_zero = true;
926 }
927 has_digit = true;
928 prev_underscore = false;
929 let digit = (b - b'0') as u64;
930 acc = match acc.checked_mul(10).and_then(|a| a.checked_add(digit)) {
931 Some(v) => v,
932 None => break 'error,
933 };
934 }
935
936 if !has_digit || prev_underscore {
937 break 'error;
938 }
939
940 let max = if negative {
941 (i64::MAX as u64) + 1
942 } else {
943 i64::MAX as u64
944 };
945 if acc > max {
946 break 'error;
947 }
948
949 let val = if negative {
950 (acc as i64).wrapping_neg()
951 } else {
952 acc as i64
953 };
954 return Ok(Item::integer(val, span));
955 }
956 self.error_span = span;
957 self.error_kind = Some(ErrorKind::InvalidNumber);
958 Err(ParseError)
959 }
960
961 fn integer_hex(&mut self, bytes: &'de [u8], span: Span) -> Result<Item<'de>, ParseError> {
962 let mut acc: u64 = 0;
963 let mut prev_underscore = false;
964 let mut has_digit = false;
965 'error: {
966 if bytes.is_empty() {
967 break 'error;
968 }
969
970 for &b in bytes {
971 if b == b'_' {
972 if !has_digit || prev_underscore {
973 break 'error;
974 }
975 prev_underscore = true;
976 continue;
977 }
978 let digit = HEX[b as usize];
979 if digit < 0 {
980 break 'error;
981 }
982 has_digit = true;
983 prev_underscore = false;
984 if acc >> 60 != 0 {
985 break 'error;
986 }
987 acc = (acc << 4) | digit as u64;
988 }
989
990 if !has_digit || prev_underscore {
991 break 'error;
992 }
993
994 if acc > i64::MAX as u64 {
995 break 'error;
996 }
997 return Ok(Item::integer(acc as i64, span));
998 }
999 self.error_span = span;
1000 self.error_kind = Some(ErrorKind::InvalidNumber);
1001 Err(ParseError)
1002 }
1003
1004 fn integer_octal(&mut self, bytes: &'de [u8], span: Span) -> Result<Item<'de>, ParseError> {
1005 let mut acc: u64 = 0;
1006 let mut prev_underscore = false;
1007 let mut has_digit = false;
1008 'error: {
1009 if bytes.is_empty() {
1010 break 'error;
1011 }
1012
1013 for &b in bytes {
1014 if b == b'_' {
1015 if !has_digit || prev_underscore {
1016 break 'error;
1017 }
1018 prev_underscore = true;
1019 continue;
1020 }
1021 if !b.is_ascii_digit() || b > b'7' {
1022 break 'error;
1023 }
1024 has_digit = true;
1025 prev_underscore = false;
1026 if acc >> 61 != 0 {
1027 break 'error;
1028 }
1029 acc = (acc << 3) | (b - b'0') as u64;
1030 }
1031
1032 if !has_digit || prev_underscore {
1033 break 'error;
1034 }
1035
1036 if acc > i64::MAX as u64 {
1037 break 'error;
1038 }
1039 return Ok(Item::integer(acc as i64, span));
1040 }
1041 self.error_span = span;
1042 self.error_kind = Some(ErrorKind::InvalidNumber);
1043 Err(ParseError)
1044 }
1045
1046 fn integer_binary(&mut self, bytes: &'de [u8], span: Span) -> Result<Item<'de>, ParseError> {
1047 let mut acc: u64 = 0;
1048 let mut prev_underscore = false;
1049 let mut has_digit = false;
1050 'error: {
1051 if bytes.is_empty() {
1052 break 'error;
1053 }
1054
1055 for &b in bytes {
1056 if b == b'_' {
1057 if !has_digit || prev_underscore {
1058 break 'error;
1059 }
1060 prev_underscore = true;
1061 continue;
1062 }
1063 if b != b'0' && b != b'1' {
1064 break 'error;
1065 }
1066 has_digit = true;
1067 prev_underscore = false;
1068 if acc >> 63 != 0 {
1069 break 'error;
1070 }
1071 acc = (acc << 1) | (b - b'0') as u64;
1072 }
1073
1074 if !has_digit || prev_underscore {
1075 break 'error;
1076 }
1077
1078 if acc > i64::MAX as u64 {
1079 break 'error;
1080 }
1081 return Ok(Item::integer(acc as i64, span));
1082 }
1083 self.error_span = span;
1084 self.error_kind = Some(ErrorKind::InvalidNumber);
1085 Err(ParseError)
1086 }
1087
1088 fn float(
1089 &mut self,
1090 start: u32,
1091 end: u32,
1092 s: &'de str,
1093 after_decimal: Option<&'de str>,
1094 ) -> Result<f64, ParseError> {
1095 let s_start = start as usize;
1096 let s_end = end as usize;
1097
1098 let unsigned = if s.as_bytes().first() == Some(&b'-') {
1100 &s[1..]
1101 } else {
1102 s
1103 };
1104 if let [b'0', b'0'..=b'9' | b'_', ..] = unsigned.as_bytes() {
1105 return Err(self.set_error(s_start, Some(s_end), ErrorKind::InvalidNumber));
1106 }
1107
1108 let mut scratch = unsafe { self.arena.scratch() };
1110
1111 if !scratch.push_strip_underscores(s.as_bytes()) {
1112 return Err(self.set_error(s_start, Some(s_end), ErrorKind::InvalidNumber));
1113 }
1114
1115 let mut last = s;
1116
1117 if let Some(after) = after_decimal {
1118 if !matches!(after.as_bytes().first(), Some(b'0'..=b'9')) {
1119 return Err(self.set_error(s_start, Some(s_end), ErrorKind::InvalidNumber));
1120 }
1121 scratch.push(b'.');
1122 if !scratch.push_strip_underscores(after.as_bytes()) {
1123 return Err(self.set_error(s_start, Some(s_end), ErrorKind::InvalidNumber));
1124 }
1125 last = after;
1126 }
1127
1128 if matches!(last.as_bytes().last(), Some(b'e' | b'E')) {
1132 self.eat_byte(b'+');
1133 match self.peek_byte() {
1134 Some(b) if is_keylike_byte(b) && b != b'-' => {
1135 let next = self.read_keylike();
1136 if !scratch.push_strip_underscores(next.as_bytes()) {
1137 return Err(self.set_error(s_start, Some(s_end), ErrorKind::InvalidNumber));
1138 }
1139 }
1140 _ => {
1141 return Err(self.set_error(s_start, Some(s_end), ErrorKind::InvalidNumber));
1142 }
1143 }
1144 }
1145
1146 let n: f64 = match unsafe { std::str::from_utf8_unchecked(scratch.as_bytes()) }.parse() {
1149 Ok(n) => n,
1150 Err(_) => {
1151 return Err(self.set_error(s_start, Some(s_end), ErrorKind::InvalidNumber));
1152 }
1153 };
1154 if n.is_finite() {
1155 Ok(n)
1156 } else {
1157 Err(self.set_error(s_start, Some(s_end), ErrorKind::InvalidNumber))
1158 }
1159 }
1160
1161 fn value(&mut self, depth_remaining: i16) -> Result<Item<'de>, ParseError> {
1162 let at = self.cursor;
1163 let Some(byte) = self.peek_byte() else {
1164 return Err(self.set_error(self.bytes.len(), None, ErrorKind::UnexpectedEof));
1165 };
1166 match byte {
1167 b'"' => {
1168 self.advance();
1169 let (key, _multiline) = match self.read_string(self.cursor - 1, b'"') {
1170 Ok(v) => v,
1171 Err(e) => return Err(e),
1172 };
1173 Ok(Item::string(key.name, key.span))
1174 }
1175 b'\'' => {
1176 self.advance();
1177 let (key, _multiline) = match self.read_string(self.cursor - 1, b'\'') {
1178 Ok(v) => v,
1179 Err(e) => return Err(e),
1180 };
1181 Ok(Item::string(key.name, key.span))
1182 }
1183 b'{' => {
1184 let start = self.cursor as u32;
1185 self.advance();
1186 let mut table = crate::table::InnerTable::new();
1187 let end_span = match self.inline_table_contents(&mut table, depth_remaining - 1) {
1188 Ok(v) => v,
1189 Err(e) => return Err(e),
1190 };
1191 Ok(Item::table_frozen(table, Span::new(start, end_span.end)))
1192 }
1193 b'[' => {
1194 let start = self.cursor as u32;
1195 self.advance();
1196 let mut arr = value::Array::new();
1197 let end_span = match self.array_contents(&mut arr, depth_remaining - 1) {
1198 Ok(v) => v,
1199 Err(e) => return Err(e),
1200 };
1201 Ok(Item::array(arr, Span::new(start, end_span.end)))
1202 }
1203 b'+' => {
1204 let start = self.cursor as u32;
1205 self.advance();
1206 self.number_leading_plus(start)
1207 }
1208 b if is_keylike_byte(b) => {
1209 let start = self.cursor as u32;
1210 let key = self.read_keylike();
1211 let end = self.cursor as u32;
1212 let span = Span::new(start, end);
1213
1214 match key {
1215 "true" => Ok(Item::boolean(true, span)),
1216 "false" => Ok(Item::boolean(false, span)),
1217 "inf" | "nan" => self.number(start, end, key),
1218 _ => {
1219 let first_char = key.chars().next().expect("key should not be empty");
1220 match first_char {
1221 '-' => match key.as_bytes().get(1) {
1222 Some(b'0'..=b'9' | b'i' | b'n') => self.number(start, end, key),
1223 _ => Err(self.set_error(
1224 start as usize,
1225 Some(end as usize),
1226 ErrorKind::InvalidNumber,
1227 )),
1228 },
1229 '0'..='9' => self.number(start, end, key),
1230 _ => Err(self.set_error(
1231 at,
1232 Some(end as usize),
1233 ErrorKind::UnquotedString,
1234 )),
1235 }
1236 }
1237 }
1238 }
1239 _ => {
1240 let (found_desc, end) = self.scan_token_desc_and_end();
1241 Err(self.set_error(
1242 at,
1243 Some(end),
1244 ErrorKind::Wanted {
1245 expected: "a value",
1246 found: found_desc,
1247 },
1248 ))
1249 }
1250 }
1251 }
1252
1253 fn inline_table_contents(
1254 &mut self,
1255 out: &mut crate::table::InnerTable<'de>,
1256 depth_remaining: i16,
1257 ) -> Result<Span, ParseError> {
1258 if depth_remaining < 0 {
1259 return Err(self.set_error(
1260 self.cursor,
1261 None,
1262 ErrorKind::OutOfRange("Max recursion depth exceeded"),
1263 ));
1264 }
1265 if let Err(e) = self.eat_inline_table_whitespace() {
1266 return Err(e);
1267 }
1268 if let Some(span) = self.eat_byte_spanned(b'}') {
1269 return Ok(span);
1270 }
1271 loop {
1272 let mut table_ref: &mut crate::table::InnerTable<'de> = &mut *out;
1273 let mut key = match self.read_table_key() {
1274 Ok(k) => k,
1275 Err(e) => return Err(e),
1276 };
1277 if let Err(e) = self.eat_inline_table_whitespace() {
1278 return Err(e);
1279 }
1280 while self.eat_byte(b'.') {
1281 if let Err(e) = self.eat_inline_table_whitespace() {
1282 return Err(e);
1283 }
1284 table_ref = match self.navigate_dotted_key(table_ref, key) {
1285 Ok(t) => t,
1286 Err(e) => return Err(e),
1287 };
1288 key = match self.read_table_key() {
1289 Ok(k) => k,
1290 Err(e) => return Err(e),
1291 };
1292 if let Err(e) = self.eat_inline_table_whitespace() {
1293 return Err(e);
1294 }
1295 }
1296 if let Err(e) = self.expect_byte(b'=') {
1297 return Err(e);
1298 }
1299 if let Err(e) = self.eat_inline_table_whitespace() {
1300 return Err(e);
1301 }
1302 {
1303 let val = match self.value(depth_remaining) {
1304 Ok(v) => v,
1305 Err(e) => return Err(e),
1306 };
1307 if let Err(e) = self.insert_value(table_ref, key, val) {
1308 return Err(e);
1309 }
1310 }
1311
1312 if let Err(e) = self.eat_inline_table_whitespace() {
1313 return Err(e);
1314 }
1315 if let Some(span) = self.eat_byte_spanned(b'}') {
1316 return Ok(span);
1317 }
1318 if let Err(e) = self.expect_byte(b',') {
1319 return Err(e);
1320 }
1321 if let Err(e) = self.eat_inline_table_whitespace() {
1322 return Err(e);
1323 }
1324 if let Some(span) = self.eat_byte_spanned(b'}') {
1325 return Ok(span);
1326 }
1327 }
1328 }
1329
1330 fn array_contents(
1331 &mut self,
1332 out: &mut value::Array<'de>,
1333 depth_remaining: i16,
1334 ) -> Result<Span, ParseError> {
1335 if depth_remaining < 0 {
1336 return Err(self.set_error(
1337 self.cursor,
1338 None,
1339 ErrorKind::OutOfRange("Max recursion depth exceeded"),
1340 ));
1341 }
1342 loop {
1343 if let Err(e) = self.eat_intermediate() {
1344 return Err(e);
1345 }
1346 if let Some(span) = self.eat_byte_spanned(b']') {
1347 return Ok(span);
1348 }
1349 let val = match self.value(depth_remaining) {
1350 Ok(v) => v,
1351 Err(e) => return Err(e),
1352 };
1353 out.push(val, self.arena);
1354 if let Err(e) = self.eat_intermediate() {
1355 return Err(e);
1356 }
1357 if !self.eat_byte(b',') {
1358 break;
1359 }
1360 }
1361 if let Err(e) = self.eat_intermediate() {
1362 return Err(e);
1363 }
1364 self.expect_byte_spanned(b']')
1365 }
1366
1367 fn eat_inline_table_whitespace(&mut self) -> Result<(), ParseError> {
1368 loop {
1369 self.eat_whitespace();
1370 if self.eat_newline() {
1371 continue;
1372 }
1373 match self.eat_comment() {
1374 Ok(true) => {}
1375 Ok(false) => break,
1376 Err(e) => return Err(e),
1377 }
1378 }
1379 Ok(())
1380 }
1381
1382 fn eat_intermediate(&mut self) -> Result<(), ParseError> {
1383 loop {
1384 self.eat_whitespace();
1385 if self.eat_newline() {
1386 continue;
1387 }
1388 match self.eat_comment() {
1389 Ok(true) => {}
1390 Ok(false) => break,
1391 Err(e) => return Err(e),
1392 }
1393 }
1394 Ok(())
1395 }
1396
1397 fn navigate_dotted_key<'t>(
1401 &mut self,
1402 table: &'t mut InnerTable<'de>,
1403 key: Key<'de>,
1404 ) -> Result<&'t mut InnerTable<'de>, ParseError> {
1405 if let Some(idx) = self.indexed_find(table, &key.name) {
1406 let (existing_key, value) = &mut table.entries_mut()[idx];
1407 let ok = value.is_table() && !value.is_frozen() && !value.has_header_bit();
1408
1409 if !ok {
1410 return Err(self.set_error(
1411 key.span.start as usize,
1412 Some(key.span.end as usize),
1413 ErrorKind::DottedKeyInvalidType {
1414 first: existing_key.span,
1415 },
1416 ));
1417 }
1418 unsafe { Ok(value.as_table_mut_unchecked()) }
1420 } else {
1421 let span = key.span;
1422 let inserted =
1423 self.insert_into_table(table, key, Item::table_dotted(InnerTable::new(), span));
1424 unsafe { Ok(inserted.as_table_mut_unchecked()) }
1425 }
1426 }
1427
1428 fn navigate_header_intermediate<'b>(
1434 &mut self,
1435 st: &'b mut Table<'de>,
1436 key: Key<'de>,
1437 ) -> Result<&'b mut Table<'de>, ParseError> {
1438 let table = &mut st.value;
1439
1440 if let Some(idx) = self.indexed_find(table, &key.name) {
1441 let (existing_key, existing) = &mut table.entries_mut()[idx];
1442 let first_key_span = existing_key.span;
1443 let is_table = existing.is_table();
1444 let is_array = existing.is_array();
1445 let is_frozen = existing.is_frozen();
1446 let is_aot = existing.is_aot();
1447
1448 if is_table {
1449 if is_frozen {
1450 return Err(self.set_duplicate_key_error(first_key_span, key.span, &key.name));
1451 }
1452 unsafe { Ok(existing.as_spanned_table_mut_unchecked()) }
1453 } else if is_array && is_aot {
1454 let arr = existing.as_array_mut().unwrap();
1455 let last = arr.last_mut().unwrap();
1456 if !last.is_table() {
1457 return Err(self.set_duplicate_key_error(first_key_span, key.span, &key.name));
1458 }
1459 unsafe { Ok(last.as_spanned_table_mut_unchecked()) }
1460 } else {
1461 Err(self.set_duplicate_key_error(first_key_span, key.span, &key.name))
1462 }
1463 } else {
1464 let span = key.span;
1465 let inserted = self.insert_into_table(table, key, Item::table(InnerTable::new(), span));
1466 unsafe { Ok(inserted.as_spanned_table_mut_unchecked()) }
1467 }
1468 }
1469 fn insert_into_table<'t>(
1470 &mut self,
1471 table: &'t mut InnerTable<'de>,
1472 key: Key<'de>,
1473 item: Item<'de>,
1474 ) -> &'t mut value::Item<'de> {
1475 let len = table.len();
1476 if len >= INDEXED_TABLE_THRESHOLD {
1477 let table_id = unsafe { table.first_key_span_start_unchecked() };
1478 if len == INDEXED_TABLE_THRESHOLD {
1479 for (i, (key, _)) in table.entries().iter().enumerate() {
1480 self.table_index
1481 .insert(KeyIndex::new(key.as_str(), table_id), i);
1482 }
1483 }
1484 self.table_index
1485 .insert(KeyIndex::new(key.as_str(), table_id), len);
1486 }
1487 &mut table.insert(key, item, self.arena).1
1488 }
1489
1490 fn navigate_header_table_final<'b>(
1495 &mut self,
1496 st: &'b mut Table<'de>,
1497 key: Key<'de>,
1498 header_start: u32,
1499 header_end: u32,
1500 ) -> Result<Ctx<'b, 'de>, ParseError> {
1501 let table = &mut st.value;
1502
1503 if let Some(idx) = self.indexed_find(table, &key.name) {
1504 let (existing_key, value) = &mut table.entries_mut()[idx];
1505 let first_key_span = existing_key.span;
1506 let is_table = value.is_table();
1507 let is_frozen = value.is_frozen();
1508 let has_header = value.has_header_bit();
1509 let has_dotted = value.has_dotted_bit();
1510 let val_span = value.span();
1511
1512 if !is_table || is_frozen {
1513 return Err(self.set_duplicate_key_error(first_key_span, key.span, &key.name));
1514 }
1515 if has_header {
1516 return Err(self.set_error(
1517 header_start as usize,
1518 Some(header_end as usize),
1519 ErrorKind::DuplicateTable {
1520 name: String::from(&*key.name),
1521 first: val_span,
1522 },
1523 ));
1524 }
1525 if has_dotted {
1526 return Err(self.set_duplicate_key_error(first_key_span, key.span, &key.name));
1527 }
1528 let table = unsafe { value.as_spanned_table_mut_unchecked() };
1529 table.set_header_flag();
1530 table.set_span_start(header_start);
1531 table.set_span_end(header_end);
1532 Ok(Ctx {
1533 table,
1534 array_end_span: None,
1535 })
1536 } else {
1537 let inserted = self.insert_into_table(
1538 table,
1539 key,
1540 Item::table_header(InnerTable::new(), Span::new(header_start, header_end)),
1541 );
1542 Ok(Ctx {
1543 table: unsafe { inserted.as_spanned_table_mut_unchecked() },
1544 array_end_span: None,
1545 })
1546 }
1547 }
1548
1549 fn navigate_header_array_final<'b>(
1554 &mut self,
1555 st: &'b mut Table<'de>,
1556 key: Key<'de>,
1557 header_start: u32,
1558 header_end: u32,
1559 ) -> Result<Ctx<'b, 'de>, ParseError> {
1560 let table = &mut st.value;
1561
1562 if let Some(idx) = self.indexed_find(table, &key.name) {
1563 let (existing_key, value) = &mut table.entries_mut()[idx];
1564 let first_key_span = existing_key.span;
1565 let is_aot = value.is_aot();
1566 let is_table = value.is_table();
1567
1568 if is_aot {
1569 let (end_flag, arr) = unsafe { value.split_array_end_flag() };
1570 let entry_span = Span::new(header_start, header_end);
1571 arr.push(
1572 Item::table_header(InnerTable::new(), entry_span),
1573 self.arena,
1574 );
1575 let entry = arr.last_mut().unwrap();
1576 Ok(Ctx {
1577 table: unsafe { entry.as_spanned_table_mut_unchecked() },
1578 array_end_span: Some(end_flag),
1579 })
1580 } else if is_table {
1581 Err(self.set_error(
1582 header_start as usize,
1583 Some(header_end as usize),
1584 ErrorKind::RedefineAsArray,
1585 ))
1586 } else {
1587 Err(self.set_duplicate_key_error(first_key_span, key.span, &key.name))
1588 }
1589 } else {
1590 let entry_span = Span::new(header_start, header_end);
1591 let first_entry = Item::table_header(InnerTable::new(), entry_span);
1592 let array_span = Span::new(header_start, header_end);
1593 let array_val = Item::array_aot(
1594 value::Array::with_single(first_entry, self.arena),
1595 array_span,
1596 );
1597 let inserted = self.insert_into_table(table, key, array_val);
1598 let (end_flag, arr) = unsafe { inserted.split_array_end_flag() };
1599 let entry = arr.last_mut().unwrap();
1600 Ok(Ctx {
1601 table: unsafe { entry.as_spanned_table_mut_unchecked() },
1602 array_end_span: Some(end_flag),
1603 })
1604 }
1605 }
1606
1607 fn insert_value(
1609 &mut self,
1610 table: &mut InnerTable<'de>,
1611 key: Key<'de>,
1612 item: Item<'de>,
1613 ) -> Result<(), ParseError> {
1614 if let Some(idx) = self.indexed_find(table, &key.name) {
1615 let (existing_key, _) = &table.entries_mut()[idx];
1616 return Err(self.set_duplicate_key_error(existing_key.span, key.span, &key.name));
1617 }
1618
1619 self.insert_into_table(table, key, item);
1620 Ok(())
1621 }
1622
1623 fn indexed_find(&self, table: &InnerTable<'de>, name: &str) -> Option<usize> {
1627 if table.len() > INDEXED_TABLE_THRESHOLD {
1630 let first_key_span = unsafe { table.first_key_span_start_unchecked() };
1631 self.table_index
1632 .get(&KeyIndex::new(name, first_key_span))
1633 .copied()
1634 } else {
1635 table.find_index(name)
1636 }
1637 }
1638
1639 fn parse_document(&mut self, root_st: &mut Table<'de>) -> Result<(), ParseError> {
1640 let mut ctx = Ctx {
1641 table: root_st,
1642 array_end_span: None,
1643 };
1644
1645 loop {
1646 self.eat_whitespace();
1647 match self.eat_comment() {
1648 Ok(true) => continue,
1649 Ok(false) => {}
1650 Err(e) => return Err(e),
1651 }
1652 if self.eat_newline() {
1653 continue;
1654 }
1655
1656 match self.peek_byte() {
1657 None => break,
1658 Some(b'[') => {
1659 ctx = match self.process_table_header(root_st) {
1660 Ok(c) => c,
1661 Err(e) => return Err(e),
1662 };
1663 }
1664 Some(b'\r') => {
1665 return Err(self.set_error(self.cursor, None, ErrorKind::Unexpected('\r')));
1666 }
1667 Some(_) => {
1668 if let Err(e) = self.process_key_value(&mut ctx) {
1669 return Err(e);
1670 }
1671 }
1672 }
1673 }
1674 Ok(())
1675 }
1676
1677 fn process_table_header<'b>(
1678 &mut self,
1679 root_st: &'b mut Table<'de>,
1680 ) -> Result<Ctx<'b, 'de>, ParseError> {
1681 let header_start = self.cursor as u32;
1682 if let Err(e) = self.expect_byte(b'[') {
1683 return Err(e);
1684 }
1685 let is_array = self.eat_byte(b'[');
1686
1687 let mut current = root_st;
1688
1689 self.eat_whitespace();
1690 let mut key = match self.read_table_key() {
1691 Ok(k) => k,
1692 Err(e) => return Err(e),
1693 };
1694 loop {
1695 self.eat_whitespace();
1696 if self.eat_byte(b'.') {
1697 self.eat_whitespace();
1698 current = match self.navigate_header_intermediate(current, key) {
1699 Ok(p) => p,
1700 Err(e) => return Err(e),
1701 };
1702 key = match self.read_table_key() {
1703 Ok(k) => k,
1704 Err(e) => return Err(e),
1705 };
1706 } else {
1707 break;
1708 }
1709 }
1710
1711 self.eat_whitespace();
1712 if let Err(e) = self.expect_byte(b']') {
1713 return Err(e);
1714 }
1715 if is_array && let Err(e) = self.expect_byte(b']') {
1716 return Err(e);
1717 }
1718
1719 self.eat_whitespace();
1720 match self.eat_comment() {
1721 Ok(true) => {}
1722 Ok(false) => {
1723 if let Err(e) = self.eat_newline_or_eof() {
1724 return Err(e);
1725 }
1726 }
1727 Err(e) => return Err(e),
1728 }
1729 let header_end = self.cursor as u32;
1730
1731 if is_array {
1732 self.navigate_header_array_final(current, key, header_start, header_end)
1733 } else {
1734 self.navigate_header_table_final(current, key, header_start, header_end)
1735 }
1736 }
1737
1738 fn process_key_value(&mut self, ctx: &mut Ctx<'_, 'de>) -> Result<(), ParseError> {
1739 let line_start = self.cursor as u32;
1740 let mut table_ref: &mut InnerTable<'de> = &mut ctx.table.value;
1744
1745 let mut key = match self.read_table_key() {
1746 Ok(k) => k,
1747 Err(e) => return Err(e),
1748 };
1749 self.eat_whitespace();
1750
1751 while self.eat_byte(b'.') {
1752 self.eat_whitespace();
1753 table_ref = match self.navigate_dotted_key(table_ref, key) {
1754 Ok(t) => t,
1755 Err(e) => return Err(e),
1756 };
1757 key = match self.read_table_key() {
1758 Ok(k) => k,
1759 Err(e) => return Err(e),
1760 };
1761 self.eat_whitespace();
1762 }
1763
1764 if let Err(e) = self.expect_byte(b'=') {
1765 return Err(e);
1766 }
1767 self.eat_whitespace();
1768 let val = match self.value(MAX_RECURSION_DEPTH) {
1769 Ok(v) => v,
1770 Err(e) => return Err(e),
1771 };
1772 let line_end = self.cursor as u32;
1773
1774 self.eat_whitespace();
1775 match self.eat_comment() {
1776 Ok(true) => {}
1777 Ok(false) => {
1778 if let Err(e) = self.eat_newline_or_eof() {
1779 return Err(e);
1780 }
1781 }
1782 Err(e) => return Err(e),
1783 }
1784
1785 if let Err(e) = self.insert_value(table_ref, key, val) {
1786 return Err(e);
1787 }
1788
1789 let start = ctx.table.span_start();
1790 ctx.table.set_span_start(start.min(line_start));
1791 ctx.table.extend_span_end(line_end);
1792
1793 if let Some(end_flag) = &mut ctx.array_end_span {
1794 let old = **end_flag;
1795 let current = old >> value::FLAG_SHIFT;
1796 **end_flag = (current.max(line_end) << value::FLAG_SHIFT) | (old & value::FLAG_MASK);
1797 }
1798
1799 Ok(())
1800 }
1801}
1802
1803pub fn parse<'de>(s: &'de str, arena: &'de Arena) -> Result<Table<'de>, Error> {
1809 const MAX_SIZE: usize = (1u32 << 29) as usize;
1813
1814 if s.len() > MAX_SIZE {
1815 return Err(Error {
1816 kind: ErrorKind::FileTooLarge,
1817 span: Span::new(0, 0),
1818 });
1819 }
1820
1821 let mut root_st = Table::new(Span::new(0, s.len() as u32));
1823 let mut parser = Parser::new(s, arena);
1824 match parser.parse_document(&mut root_st) {
1825 Ok(()) => {}
1826 Err(_) => return Err(parser.take_error()),
1827 }
1828 Ok(root_st)
1832}
1833
1834#[inline]
1835fn is_keylike_byte(b: u8) -> bool {
1836 b.is_ascii_alphanumeric() || b == b'-' || b == b'_'
1837}
1838
1839fn byte_describe(b: u8) -> &'static str {
1840 match b {
1841 b'\n' => "a newline",
1842 b' ' | b'\t' => "whitespace",
1843 b'=' => "an equals",
1844 b'.' => "a period",
1845 b',' => "a comma",
1846 b':' => "a colon",
1847 b'+' => "a plus",
1848 b'{' => "a left brace",
1849 b'}' => "a right brace",
1850 b'[' => "a left bracket",
1851 b']' => "a right bracket",
1852 b'\'' | b'"' => "a string",
1853 _ if is_keylike_byte(b) => "an identifier",
1854 _ => "a character",
1855 }
1856}