1mod stage1;
44mod stage2;
45
46pub(crate) mod charclass;
48
49#[derive(Debug, Clone, Copy, PartialEq, Eq)]
51pub enum ErrorKind {
52 UnexpectedEof,
54 UnterminatedString,
56 MissingClosingBrace,
58 MissingClosingBracket,
60 InvalidCharacter(u8),
62 InvalidEscape,
64 MissingColon,
66 MissingComma,
68 InvalidValue,
70 MismatchedBracket,
72 InvalidStructure,
74}
75
76impl std::fmt::Display for ErrorKind {
77 #[cold]
78 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
79 match self {
80 ErrorKind::UnexpectedEof => write!(f, "Unexpected end of input"),
81 ErrorKind::UnterminatedString => write!(f, "Unterminated string"),
82 ErrorKind::MissingClosingBrace => write!(f, "Missing closing brace"),
83 ErrorKind::MissingClosingBracket => write!(f, "Missing closing bracket"),
84 ErrorKind::InvalidCharacter(c) => {
85 write!(f, "Invalid character: {}", char::from(*c))
86 }
87 ErrorKind::InvalidEscape => write!(f, "Invalid escape sequence"),
88 ErrorKind::MissingColon => write!(f, "Missing colon after object key"),
89 ErrorKind::MissingComma => write!(f, "Missing comma between values"),
90 ErrorKind::InvalidValue => write!(f, "Invalid value"),
91 ErrorKind::MismatchedBracket => write!(f, "Mismatched bracket"),
92 ErrorKind::InvalidStructure => write!(f, "Invalid JSON structure"),
93 }
94 }
95}
96
97impl std::error::Error for ErrorKind {}
98
99#[derive(Debug, Clone, Copy, PartialEq, Eq)]
101pub enum FragmentStatus {
102 Complete,
104 Incomplete(ErrorKind),
106}
107
108#[derive(Debug, Clone, PartialEq, Eq)]
113pub struct Fragment {
114 pub start: usize,
116 pub length: usize,
118 pub status: FragmentStatus,
120}
121
122impl Fragment {
123 #[inline]
128 pub fn end(&self) -> usize {
129 self.start + self.length
130 }
131
132 #[inline]
137 pub fn is_complete(&self) -> bool {
138 matches!(self.status, FragmentStatus::Complete)
139 }
140}
141
142pub struct StagedScanner {
161 stage1_output: stage1::Stage1Output,
163 fragments: Vec<Fragment>,
165}
166
167impl StagedScanner {
168 pub fn new() -> Self {
178 Self {
179 stage1_output: stage1::Stage1Output::new(),
180 fragments: Vec::new(),
181 }
182 }
183
184 pub fn scan_fragments(&mut self, data: &[u8]) -> &[Fragment] {
211 self.fragments.clear();
213
214 stage1::find_structural_indices(data, &mut self.stage1_output);
216
217 stage2::extract_fragments(
219 data,
220 &self.stage1_output.structural_indices,
221 &self.stage1_output.bracket_pairs,
222 &mut self.fragments,
224 );
225
226 &self.fragments
227 }
228}
229
230impl Default for StagedScanner {
231 fn default() -> Self {
232 Self::new()
233 }
234}
235
236pub struct JsonFragmentScanner;
240
241impl JsonFragmentScanner {
242 pub fn scan_fragments(data: &[u8]) -> Vec<Fragment> {
269 let mut scanner = StagedScanner::new();
271 scanner.scan_fragments(data).to_vec()
272 }
273}
274
275pub fn extract_first(data: &str) -> Option<&str> {
290 let mut scanner = StagedScanner::new();
291 scanner
292 .scan_fragments(data.as_bytes())
293 .iter()
294 .find(|f| f.is_complete())
295 .map(|f| &data[f.start..f.end()])
296}
297
298#[cfg(test)]
299mod tests {
300 use super::*;
301
302 #[test]
303 fn test_single_complete_object() {
304 let data = br#"{"name": "Alice"}"#;
305 let fragments = JsonFragmentScanner::scan_fragments(data);
306
307 assert_eq!(
308 fragments.len(),
309 1,
310 "Expected 1 fragment, got {}",
311 fragments.len()
312 );
313 if !fragments.is_empty() {
314 eprintln!("Fragment status: {:?}", fragments[0].status);
315 }
316 assert!(fragments[0].is_complete());
317 assert_eq!(fragments[0].start, 0);
318 assert_eq!(fragments[0].length, 17);
319 }
320
321 #[test]
322 fn test_single_complete_array() {
323 let data = br#"[1, 2, 3]"#;
324 let fragments = JsonFragmentScanner::scan_fragments(data);
325
326 assert_eq!(fragments.len(), 1);
327 assert!(fragments[0].is_complete());
328 assert_eq!(fragments[0].start, 0);
329 assert_eq!(fragments[0].length, 9);
330 }
331
332 #[test]
333 fn test_multiple_fragments() {
334 let data = br#"{"name": "Alice"} {"age": 30}"#;
335 let fragments = JsonFragmentScanner::scan_fragments(data);
336
337 assert_eq!(fragments.len(), 2);
338 assert!(fragments[0].is_complete());
339 assert_eq!(fragments[0].start, 0);
340 assert_eq!(fragments[0].length, 17);
341
342 assert!(fragments[1].is_complete());
343 assert_eq!(fragments[1].start, 18);
344 assert_eq!(fragments[1].length, 11);
345 }
346
347 #[test]
348 fn test_nested_objects() {
349 let data = br#"{"outer": {"inner": 123}}"#;
350 let fragments = JsonFragmentScanner::scan_fragments(data);
351
352 assert_eq!(fragments.len(), 1);
353 assert!(fragments[0].is_complete());
354 }
355
356 #[test]
357 fn test_nested_arrays() {
358 let data = br#"[[1, 2], [3, 4]]"#;
359 let fragments = JsonFragmentScanner::scan_fragments(data);
360
361 assert_eq!(fragments.len(), 1);
362 assert!(fragments[0].is_complete());
363 }
364
365 #[test]
366 fn test_mixed_nesting() {
367 let data = br#"{"array": [1, {"nested": true}]}"#;
368 let fragments = JsonFragmentScanner::scan_fragments(data);
369
370 assert_eq!(fragments.len(), 1);
371 assert!(fragments[0].is_complete());
372 }
373
374 #[test]
375 fn test_string_with_escapes() {
376 let data = br#"{"text": "hello \"world\""}"#;
377 let fragments = JsonFragmentScanner::scan_fragments(data);
378
379 assert_eq!(fragments.len(), 1);
380 assert!(fragments[0].is_complete());
381 }
382
383 #[test]
384 fn test_string_with_brackets() {
385 let data = br#"{"text": "has { and ] chars"}"#;
386 let fragments = JsonFragmentScanner::scan_fragments(data);
387
388 assert_eq!(fragments.len(), 1);
389 assert!(fragments[0].is_complete());
390 }
391
392 #[test]
393 fn test_incomplete_fragment() {
394 let incomplete_data = br#"{"field": "#;
396 let fragments = JsonFragmentScanner::scan_fragments(incomplete_data);
397 assert_eq!(fragments.len(), 1);
398 assert!(!fragments[0].is_complete());
399 assert_eq!(fragments[0].start, 0);
400 }
401
402 #[test]
403 fn test_incomplete_string() {
404 let incomplete_string = br#"{"text": "hel"#;
406 let fragments = JsonFragmentScanner::scan_fragments(incomplete_string);
407 assert_eq!(fragments.len(), 1);
408 assert!(!fragments[0].is_complete());
409 }
410
411 #[test]
412 fn test_numbers() {
413 let data = br#"{"int": 123, "float": 45.67, "exp": 1.2e-10}"#;
414 let fragments = JsonFragmentScanner::scan_fragments(data);
415
416 assert_eq!(fragments.len(), 1);
417 assert!(fragments[0].is_complete());
418 }
419
420 #[test]
421 fn test_booleans_and_null() {
422 let data = br#"{"bool": true, "other": false, "nothing": null}"#;
423 let fragments = JsonFragmentScanner::scan_fragments(data);
424
425 assert_eq!(fragments.len(), 1);
426 assert!(fragments[0].is_complete());
427 }
428
429 #[test]
430 fn test_whitespace_handling() {
431 let data = b" \n\t { \"test\" : 123 } \n ";
432 let fragments = JsonFragmentScanner::scan_fragments(data);
433
434 assert_eq!(fragments.len(), 1);
435 assert!(fragments[0].is_complete());
436 }
437
438 #[test]
439 fn test_text_before_fragment() {
440 let data = br#"some random text {"json": "here"} more text"#;
441 let fragments = JsonFragmentScanner::scan_fragments(data);
442
443 assert_eq!(fragments.len(), 1);
444 assert!(fragments[0].is_complete());
445 assert_eq!(fragments[0].start, 17);
446 }
447
448 #[test]
449 fn test_empty_object() {
450 let data = br#"{}"#;
451 let fragments = JsonFragmentScanner::scan_fragments(data);
452
453 assert_eq!(fragments.len(), 1);
454 assert!(fragments[0].is_complete());
455 assert_eq!(fragments[0].length, 2);
456 }
457
458 #[test]
459 fn test_empty_array() {
460 let data = br#"[]"#;
461 let fragments = JsonFragmentScanner::scan_fragments(data);
462
463 assert_eq!(fragments.len(), 1);
464 assert!(fragments[0].is_complete());
465 assert_eq!(fragments[0].length, 2);
466 }
467
468 #[test]
469 fn test_simple_object_stage1_debug() {
470 let json = br#"{"a":1}"#;
471 let mut stage1_out = crate::stage1::Stage1Output::new();
472 crate::stage1::find_structural_indices(json, &mut stage1_out);
473 println!("Simple JSON: {}", String::from_utf8_lossy(json));
474 println!(
475 "Stage1 found {} structural indices",
476 stage1_out.structural_indices.len()
477 );
478 println!("Expected: 5 (1 left-brace + 2 quotes + 1 colon + 1 right-brace)");
479 println!("Indices: {:?}", stage1_out.structural_indices);
480 assert!(
481 stage1_out.structural_indices.len() >= 5,
482 "Should find at least 5 structural chars"
483 );
484 }
485
486 #[test]
487 fn test_deeply_nested() {
488 let mut deep = String::from("{");
490 for _ in 0..50 {
491 deep.push_str("\"a\":{");
492 }
493 deep.push_str("\"value\":123");
494 for _ in 0..50 {
495 deep.push('}');
496 }
497 deep.push('}');
498
499 println!("Generated JSON: {} chars", deep.len());
500 println!("First 100 chars: {}", &deep[..100.min(deep.len())]);
501
502 let mut stage1_out = crate::stage1::Stage1Output::new();
504 crate::stage1::find_structural_indices(deep.as_bytes(), &mut stage1_out);
505 println!(
506 "Stage1: {} structural indices, {} bracket pairs",
507 stage1_out.structural_indices.len(),
508 stage1_out.bracket_pairs.len(),
509 );
510
511 let fragments = JsonFragmentScanner::scan_fragments(deep.as_bytes());
512 println!("Fragments found: {}", fragments.len());
513 for (i, f) in fragments.iter().enumerate() {
514 println!(
515 " Fragment {}: start={}, len={}, complete={}",
516 i,
517 f.start,
518 f.length,
519 f.is_complete()
520 );
521 }
522 assert_eq!(fragments.len(), 1, "Expected 1 fragment");
523 assert!(fragments[0].is_complete(), "Fragment should be complete");
524 }
525
526 #[test]
527 fn test_fragment_end_method() {
528 let fragment = Fragment {
529 start: 10,
530 length: 20,
531 status: FragmentStatus::Complete,
532 };
533 assert_eq!(fragment.end(), 30);
534 }
535
536 #[test]
537 fn test_trailing_comma_in_object() {
538 let data = br#"{"a": 1,}"#;
539 let fragments = JsonFragmentScanner::scan_fragments(data);
540
541 assert_eq!(fragments.len(), 1);
542 assert!(fragments[0].is_complete());
545 }
546
547 #[test]
548 fn test_trailing_comma_in_array() {
549 let data = br#"[1, 2,]"#;
550 let fragments = JsonFragmentScanner::scan_fragments(data);
551
552 assert_eq!(fragments.len(), 1);
553 assert!(fragments[0].is_complete());
555 }
556
557 #[test]
558 fn test_complex_valid_object() {
559 let data = br#"{"a": 1, "b": [2, 3], "c": {"d": true}}"#;
560 let fragments = JsonFragmentScanner::scan_fragments(data);
561
562 assert_eq!(fragments.len(), 1);
563 assert!(fragments[0].is_complete());
564 }
565
566 #[test]
567 fn test_complex_valid_array() {
568 let data = br#"[1, "two", {"three": 3}, [4, 5], true, null]"#;
569 let fragments = JsonFragmentScanner::scan_fragments(data);
570
571 assert_eq!(fragments.len(), 1);
572 assert!(fragments[0].is_complete());
573 }
574
575 #[test]
576 fn test_empty_string_as_key() {
577 let data = br#"{"": "value"}"#;
578 let fragments = JsonFragmentScanner::scan_fragments(data);
579
580 assert_eq!(fragments.len(), 1);
581 assert!(fragments[0].is_complete());
582 }
583
584 #[test]
585 fn test_array_in_object_value_position() {
586 let data = br#"{"key": [1, 2, 3]}"#;
587 let fragments = JsonFragmentScanner::scan_fragments(data);
588
589 assert_eq!(fragments.len(), 1);
590 assert!(fragments[0].is_complete());
591 }
592
593 #[test]
594 fn test_object_in_array() {
595 let data = br#"[{"a": 1}, {"b": 2}]"#;
596 let fragments = JsonFragmentScanner::scan_fragments(data);
597
598 assert_eq!(fragments.len(), 1);
599 assert!(fragments[0].is_complete());
600 }
601
602 #[test]
605 fn test_utf8_multibyte_emoji() {
606 let data = r#"{"emoji": "👍 🚀 ✅"}"#.as_bytes();
608 let fragments = JsonFragmentScanner::scan_fragments(data);
609
610 assert_eq!(fragments.len(), 1);
611 assert!(fragments[0].is_complete());
612 }
613
614 #[test]
615 fn test_utf8_multibyte_cjk() {
616 let data = r#"{"text": "你好世界", "lang": "中文"}"#.as_bytes();
618 let fragments = JsonFragmentScanner::scan_fragments(data);
619
620 assert_eq!(fragments.len(), 1);
621 assert!(fragments[0].is_complete());
622 }
623
624 #[test]
625 fn test_utf8_multibyte_mixed() {
626 let data = r#"{"msg": "Hello мир 世界 👋!"}"#.as_bytes();
628 let fragments = JsonFragmentScanner::scan_fragments(data);
629
630 assert_eq!(fragments.len(), 1);
631 assert!(fragments[0].is_complete());
632 }
633
634 #[test]
635 fn test_utf8_in_keys() {
636 let data = r#"{"名前": "Alice", "возраст": 30}"#.as_bytes();
638 let fragments = JsonFragmentScanner::scan_fragments(data);
639
640 assert_eq!(fragments.len(), 1);
641 assert!(fragments[0].is_complete());
642 }
643
644 #[test]
645 fn test_single_escaped_quote() {
646 let data = br#"{"text": "He said \"hello\""}"#;
648 let fragments = JsonFragmentScanner::scan_fragments(data);
649
650 assert_eq!(fragments.len(), 1);
651 assert!(fragments[0].is_complete());
652 }
653
654 #[test]
655 fn test_double_backslash_then_quote() {
656 let data = br#"{"text": "path\\", "next": "value"}"#;
659 let fragments = JsonFragmentScanner::scan_fragments(data);
660
661 assert_eq!(fragments.len(), 1);
662 assert!(fragments[0].is_complete());
663 }
664
665 #[test]
666 fn test_triple_backslash_then_quote() {
667 let data = br#"{"text": "value\\\""}"#;
669 let fragments = JsonFragmentScanner::scan_fragments(data);
670
671 assert_eq!(fragments.len(), 1);
672 assert!(fragments[0].is_complete());
673 }
674
675 #[test]
676 fn test_four_backslashes_then_quote() {
677 let data = br#"{"text": "path\\\\", "next": "value"}"#;
679 let fragments = JsonFragmentScanner::scan_fragments(data);
680
681 assert_eq!(fragments.len(), 1);
682 assert!(fragments[0].is_complete());
683 }
684
685 #[test]
686 fn test_many_consecutive_backslashes() {
687 let data = br#"{"text": "backslashes: \\\\\\\\"}"#;
689 let fragments = JsonFragmentScanner::scan_fragments(data);
690
691 assert_eq!(fragments.len(), 1);
692 assert!(fragments[0].is_complete());
693 }
694
695 #[test]
696 fn test_escaped_backslash_in_middle() {
697 let data = br#"{"path": "C:\\Users\\Alice\\file.txt"}"#;
699 let fragments = JsonFragmentScanner::scan_fragments(data);
700
701 assert_eq!(fragments.len(), 1);
702 assert!(fragments[0].is_complete());
703 }
704
705 #[test]
706 fn test_bracket_mismatch_array_closed_with_brace() {
707 let data = br#"[1, 2, 3}"#;
709 let fragments = JsonFragmentScanner::scan_fragments(data);
710
711 assert_eq!(fragments.len(), 1);
713 assert!(!fragments[0].is_complete());
714 }
715
716 #[test]
717 fn test_bracket_mismatch_object_closed_with_bracket() {
718 let data = br#"{"key": "value"]"#;
720 let fragments = JsonFragmentScanner::scan_fragments(data);
721
722 assert_eq!(fragments.len(), 1);
724 assert!(!fragments[0].is_complete());
725 }
726
727 #[test]
728 fn test_bracket_mismatch_nested() {
729 let data = br#"{"array": [1, 2}"#;
731 let fragments = JsonFragmentScanner::scan_fragments(data);
732
733 assert_eq!(fragments.len(), 1);
737 }
740
741 #[test]
742 fn test_bracket_mismatch_multiple_fragments() {
743 let data = br#"[1, 2} {"valid": true}"#;
745 let fragments = JsonFragmentScanner::scan_fragments(data);
746
747 assert!(!fragments.is_empty());
749 assert!(!fragments[0].is_complete());
751 }
752
753 #[test]
754 fn test_null_byte_in_string() {
755 let mut data = Vec::from(br#"{"text": "before"#);
757 data.push(0); data.extend_from_slice(br#"after"}"#);
759
760 let fragments = JsonFragmentScanner::scan_fragments(&data);
761
762 assert_eq!(fragments.len(), 1);
764 }
765
766 #[test]
767 fn test_control_characters_in_string() {
768 let mut data = Vec::from(br#"{"text": "hello"#);
770 data.push(0x01); data.push(0x0F); data.extend_from_slice(br#"world"}"#);
773
774 let fragments = JsonFragmentScanner::scan_fragments(&data);
775
776 assert_eq!(fragments.len(), 1);
778 }
779
780 #[test]
781 fn test_tab_and_newline_in_string() {
782 let data = b"{\"text\": \"line1\nline2\tindented\"}";
784
785 let fragments = JsonFragmentScanner::scan_fragments(data);
786
787 assert_eq!(fragments.len(), 1);
789 }
790
791 #[test]
792 fn test_high_byte_values() {
793 let mut data = Vec::from(br#"{"data": ""#);
795 data.push(0xFF);
796 data.push(0xFE);
797 data.push(0xFD);
798 data.extend_from_slice(br#""}"#);
799
800 let fragments = JsonFragmentScanner::scan_fragments(&data);
801
802 assert_eq!(fragments.len(), 1);
804 }
805
806 #[test]
807 fn test_extreme_nesting_1000_levels() {
808 let mut json = String::new();
811 for i in 0..1000 {
812 json.push('{');
813 json.push_str(&format!("\"level_{}\":", i));
814 }
815 json.push_str("\"value\"");
816 for _ in 0..1000 {
817 json.push('}');
818 }
819
820 let fragments = JsonFragmentScanner::scan_fragments(json.as_bytes());
821
822 assert_eq!(fragments.len(), 1);
824 assert!(fragments[0].is_complete());
825 }
826
827 #[test]
828 fn test_extreme_nesting_mixed_brackets() {
829 let mut json = String::new();
831 for i in 0..500 {
832 if i % 2 == 0 {
833 json.push('{');
834 json.push_str(&format!("\"key_{}\":", i));
835 } else {
836 json.push('[');
837 }
838 }
839 json.push_str("42");
840 for i in (0..500).rev() {
841 if i % 2 == 0 {
842 json.push('}');
843 } else {
844 json.push(']');
845 }
846 }
847
848 let fragments = JsonFragmentScanner::scan_fragments(json.as_bytes());
849
850 assert_eq!(fragments.len(), 1);
852 assert!(fragments[0].is_complete());
853 }
854}
855
856#[cfg(test)]
857mod proptest_tests {
858 use super::*;
859 use proptest::prelude::*;
860
861 fn json_fragment() -> impl Strategy<Value = String> {
863 let leaf = prop_oneof![
864 "[a-z]{1,10}".prop_map(|s| format!("\"{}\"", s)),
866 (-1000i32..1000i32).prop_map(|n| n.to_string()),
868 prop_oneof![
870 Just("true".to_string()),
871 Just("false".to_string()),
872 Just("null".to_string()),
873 ],
874 ];
875
876 leaf.prop_recursive(
877 4, 32, 10, |inner| {
881 prop_oneof![
882 prop::collection::vec(inner.clone(), 0..5)
884 .prop_map(|items| format!("[{}]", items.join(","))),
885 prop::collection::vec(("[a-z]{1,5}", inner.clone()), 0..5).prop_map(|items| {
887 let pairs: Vec<String> = items
888 .into_iter()
889 .map(|(k, v)| format!("\"{}\":{}", k, v))
890 .collect();
891 format!("{{{}}}", pairs.join(","))
892 }),
893 ]
894 },
895 )
896 .prop_filter("Must be object or array", |s| {
897 s.starts_with('{') || s.starts_with('[')
898 })
899 }
900
901 #[test]
902 fn proptest_multiple_fragments() {
903 proptest!(|(jsons in prop::collection::vec(json_fragment(), 1..5))| {
904 let combined = jsons.join(" ");
905
906 let fragments = JsonFragmentScanner::scan_fragments(combined.as_bytes());
907
908 prop_assert_eq!(fragments.len(), jsons.len());
910
911 for frag in fragments {
913 prop_assert!(frag.is_complete());
914 }
915 });
916 }
917
918 #[test]
919 fn proptest_random_bytes_no_panic() {
920 proptest!(|(bytes in prop::collection::vec(any::<u8>(), 0..100))| {
921
922 let _ = JsonFragmentScanner::scan_fragments(&bytes);
924 });
925 }
926}