1use alloc::borrow::Cow;
3use alloc::boxed::Box;
4use alloc::string::String;
5#[derive(PartialEq, Debug, Clone)]
8pub(crate) enum Token<'s> {
9 Null,
10 Bool(bool),
11 NumU(u64),
12 NumI(i64),
13 NumF(f64),
14 StrBorrow(&'s str),
15 StrOwn(Box<str>),
16 Colon,
17 Comma,
18 ObjectBegin,
19 ObjectEnd,
20 ArrayBegin,
21 ArrayEnd,
22}
23
24#[derive(Debug, Clone, Copy, PartialEq, Eq)]
32#[non_exhaustive]
33pub struct Dialect {
34 pub allow_comments: bool,
43 }
46
47impl Default for Dialect {
48 #[inline]
49 fn default() -> Self {
50 Self::DEFAULT
51 }
52}
53
54impl Dialect {
55 pub const STRICT: Self = Self {
57 allow_comments: false,
58 };
60
61 pub const CJSON: Self = Self {
68 allow_comments: false,
69 };
71
72 pub const DEFAULT: Self = Self {
85 allow_comments: cfg!(feature = "default_allow_comments"),
86 };
88
89 pub const LOOSE: Self = Self {
103 allow_comments: true,
104 };
106 #[inline]
107 pub const fn comments(mut self, v: bool) -> Self {
108 self.allow_comments = v;
109 self
110 }
111 }
117
118#[derive(Debug, Clone)]
151pub struct Error {
152 #[cfg(any(debug_assertions, feature = "better_errors"))]
153 _pos: (usize, usize, usize),
154 #[cfg(not(any(debug_assertions, feature = "better_errors")))]
155 _priv: (),
156}
157impl core::fmt::Display for Error {
158 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
159 #[cfg(any(debug_assertions, feature = "better_errors"))]
160 {
161 write!(
162 f,
163 "JSON parse error around index {} (line {} column {})",
164 self._pos.0, self._pos.1, self._pos.2
165 )
166 }
167 #[cfg(not(any(debug_assertions, feature = "better_errors")))]
168 {
169 f.write_str("JSON parse error")
170 }
171 }
172}
173
174pub type Result<T, E = Error> = core::result::Result<T, E>;
175
176pub struct Reader<'a> {
177 input: &'a str,
178 bytes: &'a [u8],
179 tok_start: usize,
180 pos: usize,
181 buf: String,
182 stash: Option<Token<'a>>,
183 dialect: Dialect,
184}
185
186impl<'a> Reader<'a> {
187 pub fn new(input: &'a str) -> Self {
189 Self::with_dialect(input, Dialect::DEFAULT)
190 }
191
192 pub fn with_dialect(input: &'a str, dialect: Dialect) -> Self {
194 Self {
195 input,
196 bytes: input.as_bytes(),
197 pos: 0,
198 buf: String::new(),
199 tok_start: 0,
200 stash: None,
201 dialect,
202 }
203 }
204
205 #[inline]
206 pub fn dialect_mut(&mut self) -> &mut Dialect {
207 &mut self.dialect
208 }
209
210 #[inline]
211 pub fn dialect(self) -> Dialect {
212 self.dialect
213 }
214
215 #[inline]
216 pub fn position(&self) -> usize {
217 self.pos.min(self.bytes.len())
218 }
219
220 #[cold]
221 pub(super) fn err(&mut self) -> Error {
222 #[cfg(any(debug_assertions, feature = "better_errors"))]
223 {
224 let index = self.pos.min(self.input.len());
225 let so_far = &self.bytes[..index];
227 let line = so_far.iter().filter(|n| **n == b'\n').count();
228 let col = if line == 0 {
230 index
231 } else {
232 so_far
233 .iter()
234 .rposition(|n| *n == b'\n')
235 .map(|i| i + 1)
236 .unwrap_or_default()
237 };
238 Error {
239 _pos: (index, line, col),
240 }
241 }
242 #[cfg(not(any(debug_assertions, feature = "better_errors")))]
243 {
244 Error { _priv: () }
245 }
246 }
247
248 pub fn finish(mut self) -> Result<()> {
251 match self.next_token() {
252 Ok(Some(_)) => Err(self.err()),
253 Ok(None) => Ok(()),
254 Err(e) => Err(e),
255 }
256 }
257
258 fn bnext_if(&mut self, b: u8) -> bool {
259 if self.pos < self.bytes.len() && self.bytes[self.pos] == b {
260 self.pos += 1;
261 true
262 } else {
263 false
264 }
265 }
266
267 fn bnext(&mut self) -> Option<u8> {
268 if self.pos < self.bytes.len() {
269 let ch = self.bytes[self.pos];
270 self.pos += 1;
271 Some(ch)
272 } else {
273 None
274 }
275 }
276
277 fn bnext_or_err(&mut self) -> Result<u8> {
278 match self.bnext() {
279 Some(c) => Ok(c),
280 None => Err(self.err()),
281 }
282 }
283
284 fn bpeek(&mut self) -> Option<u8> {
285 if self.pos < self.bytes.len() {
286 Some(self.bytes[self.pos])
287 } else {
288 None
289 }
290 }
291
292 fn bpeek_or_nul(&mut self) -> u8 {
293 self.bpeek().unwrap_or(b'\0')
294 }
295
296 fn bump(&mut self) {
297 self.pos += 1;
298 debug_assert!(self.pos <= self.input.len());
299 }
300
301 fn finished(&self) -> bool {
302 self.pos >= self.bytes.len()
303 }
304
305 pub(super) fn ref_stash(&self) -> Option<&Token<'a>> {
306 self.stash.as_ref()
307 }
308
309 pub(super) fn mut_stash(&mut self) -> &mut Option<Token<'a>> {
310 &mut self.stash
311 }
312 pub(super) fn take_stash(&mut self) -> Option<Token<'a>> {
313 self.stash.take()
314 }
315
316 pub(super) fn skipnpeek(&mut self) -> Result<Option<u8>> {
317 debug_assert!(self.stash.is_none());
318 self.skip_trivial()?;
319 Ok(self.bpeek())
320 }
321
322 fn skip_trivial(&mut self) -> Result<()> {
323 loop {
324 self.skip_ws_only();
325 if !self.dialect.allow_comments || !self.bnext_if(b'/') {
326 return Ok(());
327 }
328 match self.bnext() {
329 Some(b'*') => tri!(self.skip_block_comment()),
330 Some(b'/') => self.skip_line_comment(),
331 _ => return Err(self.err()),
332 }
333 }
334 }
335
336 fn skip_line_comment(&mut self) {
337 let (mut p, bs) = (self.pos, self.bytes);
338 while p < bs.len() && bs[p] != b'\n' {
339 p += 1;
340 }
341 self.pos = p;
342 }
343
344 fn skip_block_comment(&mut self) -> Result<()> {
345 let (mut p, bs) = (self.pos, self.bytes);
346 loop {
347 if p + 1 >= bs.len() {
348 self.pos = p;
349 return Err(self.err());
350 }
351 if bs[p] == b'*' && bs[p + 1] == b'/' {
352 self.pos = p + 2;
353 return Ok(());
354 }
355 p += 1;
356 }
357 }
358
359 fn skip_ws_only(&mut self) {
360 let (mut p, bs) = (self.pos, self.bytes);
361 while p < bs.len() && matches!(bs[p], b'\n' | b' ' | b'\t' | b'\r') {
362 p += 1;
363 }
364 self.pos = p;
365 }
366
367 fn cur_ch(&self) -> Option<char> {
368 self.input[self.pos..].chars().next()
369 }
370
371 fn single_hex_escape(&mut self) -> Result<u16> {
372 let mut acc = 0;
373 for _ in 0..4 {
374 let b = tri!(self.bnext_or_err());
375 let n = match b {
376 b'0'..=b'9' => b - b'0',
377 b'a'..=b'f' => b - b'a' + 10,
378 b'A'..=b'F' => b - b'A' + 10,
379 _ => return Err(self.err()),
380 };
381 acc = acc * 16 + (n as u16);
382 }
383 Ok(acc)
384 }
385
386 fn read_hex_escape(&mut self) -> Result<()> {
387 use core::char::REPLACEMENT_CHARACTER as REPLACEMENT;
390 const LEAD: core::ops::Range<u16> = 0xd800..0xdc00;
391 const TRAIL: core::ops::Range<u16> = 0xdc00..0xe000;
392
393 let lead = tri!(self.single_hex_escape());
394 if let Some(c) = core::char::from_u32(lead as u32) {
395 self.buf.push(c);
396 return Ok(());
397 }
398 if TRAIL.contains(&lead) {
399 self.buf.push(REPLACEMENT);
400 return Ok(());
401 }
402 debug_assert!(LEAD.contains(&lead));
403 let p = self.pos;
404 let trail = if self.bytes[p..].starts_with(b"\\u") {
405 self.pos += 2;
406 tri!(self.single_hex_escape())
407 } else {
408 self.buf.push(REPLACEMENT);
409 return Ok(());
410 };
411 if !TRAIL.contains(&trail) {
412 self.pos = p;
415 self.buf.push(REPLACEMENT);
416 return Ok(());
417 }
418 let scalar = (((lead as u32 - 0xd800) << 10) | (trail as u32 - 0xdc00)) + 0x10000;
419 debug_assert!(
420 core::char::from_u32(scalar).is_some(),
421 r#""\u{:04x}\u{:04x}" => {:#x}"#,
422 lead,
423 trail,
424 scalar,
425 );
426 self.buf.push(core::char::from_u32(scalar).unwrap());
429 Ok(())
430 }
431
432 fn expect_next(&mut self, next: &[u8]) -> Result<()> {
433 for &i in next {
434 if Some(i) != self.bnext() {
435 return Err(self.err());
436 }
437 }
438 Ok(())
439 }
440
441 fn unescape_next(&mut self) -> Result<()> {
442 let b = tri!(self.bnext_or_err());
443 match b {
444 b'b' => self.buf.push('\x08'),
445 b'f' => self.buf.push('\x0c'),
446 b'n' => self.buf.push('\n'),
447 b'r' => self.buf.push('\r'),
448 b't' => self.buf.push('\t'),
449 b'\\' => self.buf.push('\\'),
450 b'/' => self.buf.push('/'),
451 b'\"' => self.buf.push('\"'),
452 b'u' => return self.read_hex_escape(),
453 _ => return Err(self.err()),
454 }
455 Ok(())
456 }
457
458 fn read_keyword(&mut self, id: &[u8], t: Token<'a>) -> Result<Token<'a>> {
459 debug_assert_eq!(self.bytes[self.pos - 1], id[0]);
460 tri!(self.expect_next(&id[1..]));
461 Ok(t)
462 }
463
464 pub(crate) fn unpeek(&mut self, t: Token<'a>) {
465 assert!(self.stash.is_none());
466 self.stash = Some(t);
467 }
468 pub(crate) fn next_token(&mut self) -> Result<Option<Token<'a>>> {
469 if let Some(t) = self.stash.take() {
470 return Ok(Some(t));
471 }
472 self.skip_trivial()?;
473 if self.pos >= self.input.len() {
474 return Ok(None);
475 }
476 self.tok_start = self.pos;
477 let tok = match tri!(self.bnext_or_err()) {
478 b':' => return Ok(Some(Token::Colon)),
479 b',' => return Ok(Some(Token::Comma)),
480 b'{' => return Ok(Some(Token::ObjectBegin)),
481 b'}' => return Ok(Some(Token::ObjectEnd)),
482 b'[' => return Ok(Some(Token::ArrayBegin)),
483 b']' => return Ok(Some(Token::ArrayEnd)),
484 b'"' => self.read_string(),
485 b't' => self.read_keyword(b"true", Token::Bool(true)),
486 b'f' => self.read_keyword(b"false", Token::Bool(false)),
487 b'n' => self.read_keyword(b"null", Token::Null),
488 b'-' | b'0'..=b'9' => self.read_num(),
489 _ => return Err(self.err()),
490 };
491 Ok(Some(tri!(tok)))
492 }
493
494 fn is_delim_byte(&self, b: u8) -> bool {
495 match b {
496 b',' | b'}' | b']' | b' ' | b'\t' | b'\n' | b'\r' => true,
497 b'/' if self.dialect.allow_comments => true,
498 _ => false,
499 }
500 }
501
502 fn read_num(&mut self) -> Result<Token<'a>> {
503 let neg = self.bytes[self.tok_start] == b'-';
504 let mut float = false;
505 while let Some(b) = self.bpeek() {
506 match b {
507 b'.' | b'e' | b'E' | b'+' | b'-' => {
508 float = true;
509 self.bump();
510 }
511 b'0'..=b'9' => {
512 self.bump();
513 }
514 b if self.is_delim_byte(b) => break,
515 _ => return Err(self.err()),
516 }
517 }
518 let text = &self.input[self.tok_start..self.pos];
519 if !float {
520 if neg {
521 if let Ok(i) = text.parse::<i64>() {
522 debug_assert!(i < 0);
523 return Ok(Token::NumI(i));
524 }
525 } else if let Ok(u) = text.parse::<u64>() {
526 return Ok(Token::NumU(u));
527 }
528 }
529 if let Ok(v) = text.parse::<f64>() {
530 Ok(Token::NumF(v))
531 } else {
532 Err(self.err())
533 }
534 }
535
536 fn read_string(&mut self) -> Result<Token<'a>> {
537 self.buf.clear();
538 let bs = self.bytes;
539 loop {
540 let mut p = self.pos;
541 let start = p;
542 while p < bs.len() && bs[p] != b'"' && bs[p] != b'\\' {
543 p += 1;
544 }
545 if p == bs.len() || !self.input.is_char_boundary(p) {
546 self.pos = p;
547 return Err(self.err());
548 }
549 self.pos = p + 1;
550 if bs[p] == b'"' && self.buf.is_empty() {
551 return Ok(Token::StrBorrow(&self.input[start..p]));
553 }
554 self.buf.push_str(&self.input[start..p]);
555 if bs[p] == b'"' {
556 return Ok(Token::StrOwn(self.buf.clone().into_boxed_str()));
557 }
558 debug_assert_eq!(bs[p], b'\\');
559 tri!(self.unescape_next());
560 }
561 }
562}
563
564macro_rules! tok_tester {
565 ($($func:ident matches $tok:ident);*) => {$(
566 pub(crate) fn $func(&mut self) -> Result<()> {
567 match self.next_token() {
568 Ok(Some(Token::$tok)) => Ok(()),
569 Err(e) => Err(e),
570 _ => Err(self.err()),
571 }
572 }
573 )*};
574}
575impl<'a> Reader<'a> {
576 pub(crate) fn next(&mut self) -> Result<Token<'a>> {
577 match self.next_token() {
578 Ok(Some(v)) => Ok(v),
579 Err(e) => Err(e),
580 _ => Err(self.err()),
581 }
582 }
583 tok_tester! {
584 array_begin matches ArrayBegin;
585 obj_begin matches ObjectBegin;
587 comma matches Comma;
589 colon matches Colon;
590 null matches Null
591 }
592 pub(crate) fn comma_or_obj_end(&mut self) -> Result<bool> {
593 match self.next_token() {
594 Ok(Some(Token::Comma)) => Ok(true),
595 Ok(Some(Token::ObjectEnd)) => Ok(false),
596 Err(e) => Err(e),
597 _ => Err(self.err()),
598 }
599 }
600 pub(crate) fn comma_or_array_end(&mut self) -> Result<bool> {
601 match self.next_token() {
602 Ok(Some(Token::Comma)) => Ok(true),
603 Ok(Some(Token::ArrayEnd)) => Ok(false),
604 Err(e) => Err(e),
605 _ => Err(self.err()),
606 }
607 }
608 pub(crate) fn key(&mut self) -> Result<Cow<'a, str>> {
609 match self.next_token() {
610 Ok(Some(Token::StrBorrow(b))) => Ok(Cow::Borrowed(b)),
611 Ok(Some(Token::StrOwn(b))) => Ok(Cow::Owned(b.into())),
612 Err(e) => Err(e),
613 Ok(Some(_t)) => {
614 return Err(self.err());
615 }
616 _o => return Err(self.err()),
617 }
618 }
619}
620
621fn dec_utf16_single(a: u16, b: u16) -> Option<char> {
622 if (0xdc00..=0xdfff).contains(&a) || !(0xdc00..=0xdfff).contains(&b) {
623 return None;
624 }
625 debug_assert!((0xd800..0xdc00).contains(&a), "huh? {:#x}", a);
626 let c = (((a as u32 - 0xd800) << 10) | (b as u32 - 0xdc00)) + 0x10000;
627 core::char::from_u32(c)
628}
629
630#[cfg(test)]
631mod test {
632 use super::*;
633 #[test]
634 fn test_u16() {
635 for c in (0x10000..0x110000).filter_map(core::char::from_u32) {
636 let mut buf = [0, 0];
637 c.encode_utf16(&mut buf);
638 assert_eq!(dec_utf16_single(buf[0], buf[1]), Some(c));
639 }
640 }
641}
642
643impl<'a> Reader<'a> {
644 pub fn read_i64(&mut self) -> Result<i64> {
661 match self.next_token() {
662 Ok(Some(Token::NumF(f))) => Ok(f as i64),
663 Ok(Some(Token::NumI(i))) => Ok(i),
664 Ok(Some(Token::NumU(i))) => Ok(i as i64),
665 Err(e) => Err(e),
666 _ => Err(self.err()),
667 }
668 }
669 pub fn read_u64(&mut self) -> Result<u64> {
670 match self.next_token() {
671 Ok(Some(Token::NumF(f))) => Ok(f as u64),
672 Ok(Some(Token::NumI(i))) => Ok(i as f64 as u64),
673 Ok(Some(Token::NumU(i))) => Ok(i),
674 Err(e) => Err(e),
675 _ => Err(self.err()),
676 }
677 }
678 pub fn read_str(&mut self) -> Result<Cow<'a, str>> {
679 match self.next_token() {
680 Ok(Some(Token::StrBorrow(s))) => Ok(Cow::Borrowed(s)),
681 Ok(Some(Token::StrOwn(s))) => Ok(Cow::Owned(s.into())),
682 Err(e) => Err(e),
683 _ => Err(self.err()),
684 }
685 }
686 }
688
689