1use crate::error::{JsonError, Result};
10use crate::value::JsonValue;
11use std::borrow::Cow;
12
13const MAX_DEPTH: usize = 128;
15
16static WS: [bool; 256] = {
18 let mut t = [false; 256];
19 t[b' ' as usize] = true;
20 t[b'\t' as usize] = true;
21 t[b'\n' as usize] = true;
22 t[b'\r' as usize] = true;
23 t
24};
25
26pub trait ParseInt: Sized {
29 fn from_parts(negative: bool, value: u64) -> Option<Self>;
30}
31
32macro_rules! impl_parse_int_signed {
33 ($($t:ty),*) => {
34 $(
35 impl ParseInt for $t {
36 #[inline]
37 fn from_parts(negative: bool, value: u64) -> Option<Self> {
38 if negative {
39 let neg = (value as i64).wrapping_neg();
42 if value != 0 && neg >= 0 {
45 return None; }
47 <$t>::try_from(neg).ok()
48 } else {
49 <$t>::try_from(value).ok()
50 }
51 }
52 }
53 )*
54 };
55}
56
57macro_rules! impl_parse_int_unsigned {
58 ($($t:ty),*) => {
59 $(
60 impl ParseInt for $t {
61 #[inline]
62 fn from_parts(negative: bool, value: u64) -> Option<Self> {
63 if negative { return None; }
64 <$t>::try_from(value).ok()
65 }
66 }
67 )*
68 };
69}
70
71impl_parse_int_signed!(i8, i16, i32, i64, isize);
72impl_parse_int_unsigned!(u8, u16, u32, u64, usize);
73
74pub struct JsonParser<'a> {
76 input: &'a [u8],
78 pos: usize,
80 depth: usize,
82 len: usize,
84}
85
86impl<'a> JsonParser<'a> {
87 #[inline]
89 pub fn new(input: &'a str) -> Self {
90 Self {
91 input: input.as_bytes(),
92 pos: 0,
93 depth: 0,
94 len: input.len(),
95 }
96 }
97
98 #[inline]
100 pub fn from_bytes(input: &'a [u8]) -> Self {
101 Self {
102 input,
103 pos: 0,
104 depth: 0,
105 len: input.len(),
106 }
107 }
108
109 #[inline]
111 pub fn position(&self) -> usize {
112 self.pos
113 }
114
115 #[inline]
117 pub fn is_empty(&self) -> bool {
118 self.pos >= self.len
119 }
120
121 #[inline]
123 fn peek(&self) -> Option<u8> {
124 self.input.get(self.pos).copied()
125 }
126
127 #[inline]
129 fn advance(&mut self) {
130 self.pos += 1;
131 }
132
133 #[inline]
135 fn skip_whitespace(&mut self) {
136 let input = self.input;
137 let mut pos = self.pos;
138 unsafe {
139 while pos < self.len && *WS.get_unchecked(input[pos] as usize) {
140 pos += 1;
141 }
142 }
143 self.pos = pos;
144 }
145
146 #[inline]
148 pub fn peek_is_string(&mut self) -> Result<bool> {
149 self.skip_whitespace();
150 Ok(self.peek() == Some(b'"'))
151 }
152
153 pub fn parse_value(&mut self) -> Result<JsonValue> {
155 self.skip_whitespace();
156
157 if self.depth > MAX_DEPTH {
158 return Err(JsonError::NestingTooDeep(self.depth));
159 }
160
161 let byte = self.peek().ok_or(JsonError::UnexpectedEnd)?;
162
163 match byte {
164 b'"' => self
165 .parse_string()
166 .map(|s| JsonValue::String(s.into_owned())),
167 b'{' => self.parse_object_value(),
168 b'[' => self.parse_array_value(),
169 b't' => self.parse_true().map(|_| JsonValue::Bool(true)),
170 b'f' => self.parse_false().map(|_| JsonValue::Bool(false)),
171 b'n' => self.parse_null().map(|_| JsonValue::Null),
172 b'-' | b'0'..=b'9' => self.parse_number_value(),
173 _ => Err(JsonError::UnexpectedChar(byte as char, self.pos)),
174 }
175 }
176
177 pub fn parse_string(&mut self) -> Result<Cow<'a, str>> {
179 self.skip_whitespace();
180
181 if self.peek() != Some(b'"') {
182 return Err(JsonError::ExpectedToken("string", self.pos));
183 }
184 self.advance();
185
186 let start = self.pos;
187 let mut has_escapes = false;
188
189 unsafe {
190 while self.pos < self.len {
192 match self.input.get_unchecked(self.pos) {
193 b'"' => {
195 if has_escapes {
196 let raw = &self.input.get_unchecked(start..self.pos);
198 self.advance(); return self.unescape_string(raw);
200 } else {
201 let s = std::str::from_utf8_unchecked(
203 self.input.get_unchecked(start..self.pos),
204 );
205 self.advance(); return Ok(Cow::Borrowed(s));
207 }
208 }
209 b'\\' => {
210 has_escapes = true;
211 self.pos += 2; }
213 _ => self.pos += 1,
214 }
215 }
216 }
217
218 Err(JsonError::UnexpectedEnd)
219 }
220
221 fn unescape_string(&self, raw: &[u8]) -> Result<Cow<'a, str>> {
223 let mut result = Vec::with_capacity(raw.len());
224 let mut i = 0;
225
226 while i < raw.len() {
227 if raw[i] == b'\\' {
228 i += 1;
229 if i >= raw.len() {
230 return Err(JsonError::InvalidEscape(self.pos));
231 }
232 match raw[i] {
233 b'"' => result.push(b'"'),
234 b'\\' => result.push(b'\\'),
235 b'/' => result.push(b'/'),
236 b'b' => result.push(0x08),
237 b'f' => result.push(0x0C),
238 b'n' => result.push(b'\n'),
239 b'r' => result.push(b'\r'),
240 b't' => result.push(b'\t'),
241 b'u' => {
242 if i + 4 >= raw.len() {
243 return Err(JsonError::InvalidUnicode(self.pos));
244 }
245 let hex = &raw[i + 1..i + 5];
246 let code_point = self.parse_hex4(hex)?;
247
248 if (0xD800..=0xDBFF).contains(&code_point) {
250 if i + 10 < raw.len() && raw[i + 5] == b'\\' && raw[i + 6] == b'u' {
252 let low_hex = &raw[i + 7..i + 11];
253 let low_code_point = self.parse_hex4(low_hex)?;
254 if (0xDC00..=0xDFFF).contains(&low_code_point) {
255 let combined = 0x10000
256 + ((code_point as u32 - 0xD800) << 10)
257 + (low_code_point as u32 - 0xDC00);
258 if let Some(c) = char::from_u32(combined) {
259 let mut buf = [0u8; 4];
260 result
261 .extend_from_slice(c.encode_utf8(&mut buf).as_bytes());
262 }
263 i += 11;
270 continue;
271 }
272 }
273 }
274
275 if let Some(c) = char::from_u32(code_point as u32) {
276 let mut buf = [0u8; 4];
277 result.extend_from_slice(c.encode_utf8(&mut buf).as_bytes());
278 }
279 i += 4;
280 }
281 _ => return Err(JsonError::InvalidEscape(self.pos)),
282 }
283 i += 1;
284 } else {
285 result.push(raw[i]);
286 i += 1;
287 }
288 }
289
290 String::from_utf8(result)
291 .map(Cow::Owned)
292 .map_err(|_| JsonError::InvalidUtf8)
293 }
294
295 #[inline]
297 fn parse_hex4(&self, hex: &[u8]) -> Result<u16> {
298 let mut value = 0u16;
299 for &b in hex {
300 let digit = match b {
301 b'0'..=b'9' => b - b'0',
302 b'a'..=b'f' => b - b'a' + 10,
303 b'A'..=b'F' => b - b'A' + 10,
304 _ => return Err(JsonError::InvalidUnicode(self.pos)),
305 };
306 value = value * 16 + digit as u16;
307 }
308 Ok(value)
309 }
310
311 fn parse_number_value(&mut self) -> Result<JsonValue> {
313 let start = self.pos;
314 let mut is_float = false;
315
316 if self.peek() == Some(b'-') {
318 self.advance();
319 }
320
321 match self.peek() {
323 Some(b'0') => self.advance(),
324 Some(b'1'..=b'9') => {
325 self.advance();
326 while let Some(b'0'..=b'9') = self.peek() {
327 self.advance();
328 }
329 }
330 _ => return Err(JsonError::InvalidNumber(start)),
331 }
332
333 if self.peek() == Some(b'.') {
335 is_float = true;
336 self.advance();
337 if !matches!(self.peek(), Some(b'0'..=b'9')) {
338 return Err(JsonError::InvalidNumber(self.pos));
339 }
340 while let Some(b'0'..=b'9') = self.peek() {
341 self.advance();
342 }
343 }
344
345 if matches!(self.peek(), Some(b'e' | b'E')) {
347 is_float = true;
348 self.advance();
349 if matches!(self.peek(), Some(b'+' | b'-')) {
350 self.advance();
351 }
352 if !matches!(self.peek(), Some(b'0'..=b'9')) {
353 return Err(JsonError::InvalidNumber(self.pos));
354 }
355 while let Some(b'0'..=b'9') = self.peek() {
356 self.advance();
357 }
358 }
359
360 let num_str = unsafe { std::str::from_utf8_unchecked(&self.input[start..self.pos]) };
361
362 if is_float {
363 num_str
364 .parse::<f64>()
365 .map(JsonValue::Float)
366 .map_err(|_| JsonError::InvalidNumber(start))
367 } else {
368 num_str
369 .parse::<i64>()
370 .map(JsonValue::Integer)
371 .map_err(|_| JsonError::InvalidNumber(start))
372 }
373 }
374
375 pub fn parse_integer<T: ParseInt>(&mut self) -> Result<T> {
378 self.skip_whitespace();
379 let start = self.pos;
380 let negative = self.input.get(self.pos) == Some(&b'-');
381 if negative {
382 self.pos += 1;
383 }
384
385 let mut value: u64 = 0;
386 while let Some(&d @ b'0'..=b'9') = self.input.get(self.pos) {
387 value = value.wrapping_mul(10).wrapping_add((d - b'0') as u64);
388 self.pos += 1;
389 }
390
391 if self.pos == start || (negative && self.pos == start + 1) {
392 return Err(JsonError::InvalidNumber(start));
393 }
394
395 T::from_parts(negative, value).ok_or(JsonError::InvalidNumber(start))
396 }
397
398 pub fn parse_float<T: std::str::FromStr>(&mut self) -> Result<T> {
400 self.skip_whitespace();
401 let start = self.pos;
402
403 if self.input.get(self.pos) == Some(&b'-') {
404 self.pos += 1;
405 }
406
407 match self.input.get(self.pos) {
408 Some(b'0') => self.pos += 1,
409 Some(b'1'..=b'9') => {
410 self.pos += 1;
411 while matches!(self.input.get(self.pos), Some(b'0'..=b'9')) {
412 self.pos += 1;
413 }
414 }
415 _ => return Err(JsonError::InvalidNumber(start)),
416 }
417
418 if self.input.get(self.pos) == Some(&b'.') {
419 self.pos += 1;
420 while matches!(self.input.get(self.pos), Some(b'0'..=b'9')) {
421 self.pos += 1;
422 }
423 }
424
425 if matches!(self.input.get(self.pos), Some(b'e' | b'E')) {
426 self.pos += 1;
427 if matches!(self.input.get(self.pos), Some(b'+' | b'-')) {
428 self.pos += 1;
429 }
430 while matches!(self.input.get(self.pos), Some(b'0'..=b'9')) {
431 self.pos += 1;
432 }
433 }
434
435 let num_str = unsafe { std::str::from_utf8_unchecked(&self.input[start..self.pos]) };
436 num_str.parse().map_err(|_| JsonError::InvalidNumber(start))
437 }
438
439 fn parse_true(&mut self) -> Result<()> {
441 if self.input[self.pos..].starts_with(b"true") {
442 self.pos += 4;
443 Ok(())
444 } else {
445 Err(JsonError::ExpectedToken("true", self.pos))
446 }
447 }
448
449 fn parse_false(&mut self) -> Result<()> {
451 if self.input[self.pos..].starts_with(b"false") {
452 self.pos += 5;
453 Ok(())
454 } else {
455 Err(JsonError::ExpectedToken("false", self.pos))
456 }
457 }
458
459 fn parse_null(&mut self) -> Result<()> {
461 if self.input[self.pos..].starts_with(b"null") {
462 self.pos += 4;
463 Ok(())
464 } else {
465 Err(JsonError::ExpectedToken("null", self.pos))
466 }
467 }
468
469 pub fn parse_bool(&mut self) -> Result<bool> {
471 self.skip_whitespace();
472 match self.peek() {
473 Some(b't') => {
474 self.parse_true()?;
475 Ok(true)
476 }
477 Some(b'f') => {
478 self.parse_false()?;
479 Ok(false)
480 }
481 _ => Err(JsonError::ExpectedToken("boolean", self.pos)),
482 }
483 }
484
485 fn parse_object_value(&mut self) -> Result<JsonValue> {
487 self.depth += 1;
488 self.advance(); self.skip_whitespace();
490
491 let mut map = Vec::new();
492
493 if self.peek() == Some(b'}') {
494 self.advance();
495 self.depth -= 1;
496 return Ok(JsonValue::Object(map));
497 }
498
499 loop {
500 self.skip_whitespace();
501 let key = self.parse_string()?.into_owned();
502
503 self.skip_whitespace();
504 if self.peek() != Some(b':') {
505 return Err(JsonError::ExpectedChar(':', self.pos));
506 }
507 self.advance();
508
509 let value = self.parse_value()?;
510 map.push((key, value));
511
512 self.skip_whitespace();
513 match self.peek() {
514 Some(b',') => self.advance(),
515 Some(b'}') => {
516 self.advance();
517 self.depth -= 1;
518 return Ok(JsonValue::Object(map));
519 }
520 _ => return Err(JsonError::ExpectedChar('}', self.pos)),
521 }
522 }
523 }
524
525 fn parse_array_value(&mut self) -> Result<JsonValue> {
527 self.depth += 1;
528 self.advance(); self.skip_whitespace();
530
531 let mut arr = Vec::new();
532
533 if self.peek() == Some(b']') {
534 self.advance();
535 self.depth -= 1;
536 return Ok(JsonValue::Array(arr));
537 }
538
539 loop {
540 arr.push(self.parse_value()?);
541
542 self.skip_whitespace();
543 match self.peek() {
544 Some(b',') => self.advance(),
545 Some(b']') => {
546 self.advance();
547 self.depth -= 1;
548 return Ok(JsonValue::Array(arr));
549 }
550 _ => return Err(JsonError::ExpectedChar(']', self.pos)),
551 }
552 }
553 }
554
555 #[inline]
557 pub fn skip_whitespace_pub(&mut self) {
558 self.skip_whitespace();
559 }
560
561 #[inline]
563 pub fn peek_is_null(&mut self) -> bool {
564 self.skip_whitespace();
565 self.input[self.pos..].starts_with(b"null")
566 }
567
568 #[inline]
570 pub fn has_next_array_element_or_first(&mut self, is_first: bool) -> Result<bool> {
571 self.skip_whitespace();
572 match self.peek() {
573 Some(b']') => Ok(false),
574 Some(b',') if !is_first => {
575 self.advance();
576 self.skip_whitespace();
577 Ok(self.peek() != Some(b']'))
578 }
579 Some(_) if is_first => Ok(true),
580 Some(c) => Err(JsonError::UnexpectedChar(c as char, self.pos)),
581 None => Err(JsonError::UnexpectedEnd),
582 }
583 }
584
585 pub fn expect_object_start(&mut self) -> Result<()> {
589 self.skip_whitespace();
590 if self.peek() != Some(b'{') {
591 return Err(JsonError::ExpectedChar('{', self.pos));
592 }
593 self.advance();
594 self.depth += 1;
595 Ok(())
596 }
597
598 pub fn expect_object_end(&mut self) -> Result<()> {
600 self.skip_whitespace();
601 if self.peek() != Some(b'}') {
602 return Err(JsonError::ExpectedChar('}', self.pos));
603 }
604 self.advance();
605 self.depth -= 1;
606 Ok(())
607 }
608
609 pub fn expect_array_start(&mut self) -> Result<()> {
611 self.skip_whitespace();
612 if self.peek() != Some(b'[') {
613 return Err(JsonError::ExpectedChar('[', self.pos));
614 }
615 self.advance();
616 self.depth += 1;
617 Ok(())
618 }
619
620 pub fn expect_array_end(&mut self) -> Result<()> {
622 self.skip_whitespace();
623 if self.peek() != Some(b']') {
624 return Err(JsonError::ExpectedChar(']', self.pos));
625 }
626 self.advance();
627 self.depth -= 1;
628 Ok(())
629 }
630
631 pub fn expect_comma(&mut self) -> Result<()> {
633 self.skip_whitespace();
634 if self.peek() != Some(b',') {
635 return Err(JsonError::ExpectedChar(',', self.pos));
636 }
637 self.advance();
638 Ok(())
639 }
640
641 pub fn expect_null(&mut self) -> Result<()> {
643 self.skip_whitespace();
644 self.parse_null()
645 }
646
647 pub fn next_object_key(&mut self) -> Result<Option<Cow<'a, str>>> {
649 self.skip_whitespace();
650
651 match self.peek() {
652 Some(b'}') => Ok(None),
653 Some(b',') => {
654 self.advance();
655 self.skip_whitespace();
656 if self.peek() == Some(b'}') {
657 return Ok(None);
658 }
659 let key = self.parse_string()?;
660 self.skip_whitespace();
661 if self.peek() != Some(b':') {
662 return Err(JsonError::ExpectedChar(':', self.pos));
663 }
664 self.advance();
665 Ok(Some(key))
666 }
667 Some(b'"') => {
668 let key = self.parse_string()?;
669 self.skip_whitespace();
670 if self.peek() != Some(b':') {
671 return Err(JsonError::ExpectedChar(':', self.pos));
672 }
673 self.advance();
674 Ok(Some(key))
675 }
676 Some(c) => Err(JsonError::UnexpectedChar(c as char, self.pos)),
677 None => Err(JsonError::UnexpectedEnd),
678 }
679 }
680
681 pub fn has_next_array_element(&mut self) -> Result<bool> {
683 self.skip_whitespace();
684 match self.peek() {
685 Some(b']') => Ok(false),
686 Some(b',') => {
687 self.advance();
688 self.skip_whitespace();
689 Ok(self.peek() != Some(b']'))
690 }
691 Some(_) => Ok(true),
692 None => Err(JsonError::UnexpectedEnd),
693 }
694 }
695
696 pub fn skip_value(&mut self) -> Result<()> {
698 self.skip_whitespace();
699 match self.input.get(self.pos) {
700 Some(b'"') => unsafe {
701 self.pos += 1; while self.pos < self.len {
704 match self.input.get_unchecked(self.pos) {
705 b'"' => {
706 self.pos += 1;
707 return Ok(());
708 }
709 b'\\' => {
710 self.pos += 2;
711 } _ => {
713 self.pos += 1;
714 }
715 }
716 }
717
718 Err(JsonError::UnexpectedEnd)
719 },
720 Some(b'{') => unsafe {
721 self.pos += 1;
722 let mut depth = 1usize;
723 while self.pos < self.len {
724 match self.input.get_unchecked(self.pos) {
726 b'"' => {
727 self.pos += 1;
728 self.skip_string_body()?;
729 }
730 b'{' | b'[' => {
731 depth += 1;
732 self.pos += 1;
733 }
734 b'}' | b']' => {
735 self.pos += 1;
736 depth -= 1;
737 if depth == 0 {
738 return Ok(());
739 }
740 }
741 _ => {
742 self.pos += 1;
743 }
744 }
745 }
746 Err(JsonError::UnexpectedEnd)
747 },
748 Some(b'[') => unsafe {
749 self.pos += 1;
750 let mut depth = 1usize;
751 while self.pos < self.len {
752 match self.input.get_unchecked(self.pos) {
753 b'"' => {
754 self.pos += 1;
755 self.skip_string_body()?;
756 }
757 b'{' | b'[' => {
758 depth += 1;
759 self.pos += 1;
760 }
761 b'}' | b']' => {
762 self.pos += 1;
763 depth -= 1;
764 if depth == 0 {
765 return Ok(());
766 }
767 }
768 _ => {
769 self.pos += 1;
770 }
771 }
772 }
773 Err(JsonError::UnexpectedEnd)
774 },
775 Some(b't') => {
776 self.pos += 4;
777 Ok(())
778 } Some(b'f') => {
780 self.pos += 5;
781 Ok(())
782 } Some(b'n') => {
784 self.pos += 4;
785 Ok(())
786 } Some(b'-') | Some(b'0'..=b'9') => {
788 if self.input.get(self.pos) == Some(&b'-') {
790 self.pos += 1;
791 }
792 while matches!(
793 self.input.get(self.pos),
794 Some(b'0'..=b'9' | b'.' | b'e' | b'E' | b'+' | b'-')
795 ) {
796 self.pos += 1;
797 }
798 Ok(())
799 }
800 Some(&c) => Err(JsonError::UnexpectedChar(c as char, self.pos)),
801 None => Err(JsonError::UnexpectedEnd),
802 }
803 }
804
805 #[inline]
807 fn skip_string_body(&mut self) -> Result<()> {
808 loop {
809 match self.input.get(self.pos) {
810 Some(b'"') => {
811 self.pos += 1;
812 return Ok(());
813 }
814 Some(b'\\') => {
815 self.pos += 2;
816 }
817 Some(_) => {
818 self.pos += 1;
819 }
820 None => return Err(JsonError::UnexpectedEnd),
821 }
822 }
823 }
824}
825
826#[cfg(test)]
827mod tests {
828 use super::*;
829
830 #[test]
831 fn test_parse_string_simple() {
832 let mut parser = JsonParser::new(r#""hello""#);
833 assert_eq!(parser.parse_string().unwrap(), "hello");
834 }
835
836 #[test]
837 fn test_parse_string_escapes() {
838 let mut parser = JsonParser::new(r#""hello\nworld""#);
839 assert_eq!(parser.parse_string().unwrap(), "hello\nworld");
840 }
841
842 #[test]
843 fn test_parse_string_unicode() {
844 let mut parser = JsonParser::new(r#""\u0048\u0065\u006c\u006c\u006f""#);
845 assert_eq!(parser.parse_string().unwrap(), "Hello");
846 }
847
848 #[test]
849 fn test_parse_number_integer() {
850 let mut parser = JsonParser::new("42");
851 match parser.parse_value().unwrap() {
852 JsonValue::Integer(n) => assert_eq!(n, 42),
853 _ => panic!("expected integer"),
854 }
855 }
856
857 #[test]
858 fn test_parse_number_negative() {
859 let mut parser = JsonParser::new("-123");
860 match parser.parse_value().unwrap() {
861 JsonValue::Integer(n) => assert_eq!(n, -123),
862 _ => panic!("expected integer"),
863 }
864 }
865
866 #[test]
867 fn test_parse_number_float() {
868 let mut parser = JsonParser::new("3.14");
869 match parser.parse_value().unwrap() {
870 JsonValue::Float(n) => assert!((n - 3.14).abs() < 0.001),
871 _ => panic!("expected float"),
872 }
873 }
874
875 #[test]
876 fn test_parse_bool() {
877 let mut parser = JsonParser::new("true");
878 assert!(parser.parse_bool().unwrap());
879
880 let mut parser = JsonParser::new("false");
881 assert!(!parser.parse_bool().unwrap());
882 }
883
884 #[test]
885 fn test_parse_array() {
886 let mut parser = JsonParser::new("[1, 2, 3]");
887 let value = parser.parse_value().unwrap();
888 assert!(value.is_array());
889 }
890
891 #[test]
892 fn test_parse_object() {
893 let mut parser = JsonParser::new(r#"{"key": "value"}"#);
894 let value = parser.parse_value().unwrap();
895 assert!(value.is_object());
896 }
897}