json_repair/
repair_json.rs

1crate::ix!();
2
3/// applies all known fixes in order until one proves to work
4pub fn repair_json_string(input: &str) -> Result<Value, JsonRepairError> {
5    repair_json_string_series(input)
6}
7
8/// this one will try all of the fixes in parallel first before falling back to serial application.
9///
10/// may be useful in some contexts
11pub fn repair_json_string_heavy(input: &str) -> Result<Value, JsonRepairError> {
12    match repair_json_string_parallel(input) {
13        Ok(repaired) => Ok(repaired),
14        Err(e)       => repair_json_string_series(input),
15    }
16}
17
18/// this one we use for certain cases where the JSON is known to have list items which are all
19/// `Sentence fragments of this format`
20pub fn repair_json_with_known_capitalized_sentence_fragment_list_items(input: &str) -> Result<Value, JsonRepairError> {
21    let repaired = repair_json_string_series(input)?;
22    Ok(repair_standard_list_items_with_possible_splits(repaired))
23}
24
25#[cfg(test)]
26mod repair_json_tests {
27    use super::*;
28    use serde_json::json;
29
30    #[traced_test]
31    fn test_valid_json() {
32        let input = r#"{"key": "value", "list": [1, 2, 3]}"#;
33        let expected = serde_json::from_str(input).unwrap();
34        let output = repair_json_string(input);
35        assert_expected_matches_output_result(input,&output,&expected);
36    }
37
38    #[traced_test]
39    fn test_truncated_in_string() {
40        let input = r#"{"key": "value"#;
41        let expected = json!({"key": "value"});
42        let output = repair_json_string(input);
43        assert_expected_matches_output_result(input,&output,&expected);
44    }
45
46    #[traced_test]
47    fn test_unclosed_array() {
48        let input = r#"{"list": [1, 2, 3"#;
49        let expected = json!({"list": [1, 2, 3]});
50        let output = repair_json_string(input);
51        assert_expected_matches_output_result(input,&output,&expected);
52    }
53
54    #[traced_test]
55    fn test_unclosed_object() {
56        let input = r#"{"object": {"nested": "value"#;
57        let expected = json!({"object": {"nested": "value"}});
58        let output = repair_json_string(input);
59        assert_expected_matches_output_result(input,&output,&expected);
60    }
61
62    #[traced_test]
63    fn test_trailing_comma_in_object() {
64        let input = r#"{"key": "value",}"#;
65        let expected = json!({"key": "value"});
66        let output = repair_json_string(input);
67        assert_expected_matches_output_result(input,&output,&expected);
68    }
69
70    #[traced_test]
71    fn test_trailing_comma_in_array() {
72        let input = r#"{"list": [1, 2, 3, ]}"#;
73        let expected = json!({"list": [1, 2, 3]});
74        let output = repair_json_string(input);
75        assert_expected_matches_output_result(input,&output,&expected);
76    }
77
78    #[traced_test]
79    fn test_empty_input() {
80        let input = "";
81        let expected = json!({});
82        let output = repair_json_string(input);
83        assert_expected_matches_output_result(input,&output,&expected);
84    }
85
86    #[traced_test]
87    fn test_only_opening_brace() {
88        let input = "{";
89        let expected = json!({});
90        let output = repair_json_string(input);
91        assert_expected_matches_output_result(input,&output,&expected);
92    }
93
94    #[traced_test]
95    fn test_only_opening_bracket() {
96        let input = "[";
97        let expected = json!([]);
98        let output = repair_json_string(input);
99        assert_expected_matches_output_result(input,&output,&expected);
100    }
101
102    #[traced_test]
103    fn test_unclosed_string_in_array() {
104        let input = r#"["value1", "value2"#;
105        let expected = json!(["value1", "value2"]);
106        let output = repair_json_string(input);
107        assert_expected_matches_output_result(input,&output,&expected);
108    }
109
110    #[traced_test]
111    fn test_truncated_in_the_middle_of_array_element() {
112        let input = r#"["value1", "value2", "value"#;
113        let expected = json!(["value1", "value2", "value"]);
114        let output = repair_json_string(input);
115        assert_expected_matches_output_result(input,&output,&expected);
116    }
117
118    #[traced_test]
119    fn test_nested_structures_with_truncation() {
120        let input = r#"{"a": {"b": {"c": [1, 2, {"d": "e"#;
121        let expected = json!({"a": {"b": {"c": [1, 2, {"d": "e"}]}}});
122        let output = repair_json_string(input);
123        assert_expected_matches_output_result(input,&output,&expected);
124    }
125
126    #[traced_test]
127    fn test_truncated_number() {
128        let input = r#"{"number": 1234"#;
129        let expected = json!({"number": 1234});
130        let output = repair_json_string(input);
131        assert_expected_matches_output_result(input,&output,&expected);
132    }
133
134    #[traced_test]
135    fn test_truncated_boolean_true() {
136        let input    = r#"{"bool": tr"#;
137        let output   = repair_json_string(input);
138        let expected = json!({"bool": true});
139        assert_expected_matches_output_result(input,&output,&expected);
140    }
141
142    #[traced_test]
143    fn test_truncated_boolean_false() {
144        let input = r#"{"bool": fal"#;
145        let output = repair_json_string(input);
146        let expected = json!({"bool": false});
147        assert_expected_matches_output_result(input,&output,&expected);
148    }
149
150    #[traced_test]
151    fn test_extra_commas_and_unclosed_structures() {
152        let input = r#"{"key1": "value1", "key2": "value2", "#;
153        let expected = json!({"key1": "value1", "key2": "value2"});
154        let output = repair_json_string(input);
155        assert_expected_matches_output_result(input,&output,&expected);
156    }
157
158    #[traced_test]
159    fn test_complex_truncated_json() {
160        let input = r#"{
161  "aesthetic_details": [
162    "Focus on contrasts.",
163    "Patterns in water.",
164    "Intricate branches.",
165  ],
166  "cognitive_and_perceptual_influence": [
167    "Enhances awareness.",
168    "Stimulates thought.",
169    "Encourages storytelling.",
170  ],
171  "concrete_steps_to_create_in_our_location": [
172    "Identify location.",
173    "Design layout.",
174    "Engage historians.",
175  ],
176  "core_essence_and_symbolism": [
177    "Represents integration of nature and civilization.",
178    "Embodies the timeless flow of knowledge."
179  ],
180  "additional_notes": "This project aims to fu
181"#;
182        let expected = json!({
183            "aesthetic_details": [
184                "Focus on contrasts.",
185                "Patterns in water.",
186                "Intricate branches."
187            ],
188            "cognitive_and_perceptual_influence": [
189                "Enhances awareness.",
190                "Stimulates thought.",
191                "Encourages storytelling."
192            ],
193            "concrete_steps_to_create_in_our_location": [
194                "Identify location.",
195                "Design layout.",
196                "Engage historians."
197            ],
198            "core_essence_and_symbolism": [
199                "Represents integration of nature and civilization.",
200                "Embodies the timeless flow of knowledge."
201            ],
202            "additional_notes": "This project aims to fu"
203        });
204        let output = repair_json_string(input);
205        assert_expected_matches_output_result(input,&output,&expected);
206    }
207
208    #[traced_test]
209    fn test_repair_single_quote_instead_of_double_quote() {
210        //value4 has a single quote instead of a double
211        let input = r#"{
212            "key": [
213                "value1",
214                "value2",
215                "value3",
216                "value4',
217                "value5",
218                "value6"
219            ]
220        }"#;
221
222        let expected = json!({
223            "key": [
224                "value1",
225                "value2",
226                "value3",
227                "value4",
228                "value5",
229                "value6"
230            ]
231        });
232
233        let output = repair_json_string(input);
234
235        assert_expected_matches_output_result(input,&output,&expected);
236    }
237
238    #[traced_test]
239    fn test_missing_comma() {
240
241        //value5 has no comma after the quote
242        let input = r#"{
243            "key": [
244                "value1",
245                "value2",
246                "value3",
247                "value4",
248                "value5"
249                "value6",
250                "value7"
251            ]
252        }"#;
253
254        let expected = json!({
255            "key": [
256                "value1",
257                "value2",
258                "value3",
259                "value4",
260                "value5",
261                "value6",
262                "value7"
263            ]
264        });
265
266        let output = repair_json_string(input);
267
268        assert_expected_matches_output_result(input,&output,&expected);
269    }
270
271    #[traced_test]
272    fn test_comma_and_quote_accidentally_swapped() {
273        //value3 has the comma and the trailing quote swapped
274        let input = r#"{
275            "key": [
276                "value1",
277                "value2",
278                "value3,"
279                "value4",
280                "value5"
281            ]
282        }"#;
283
284        let expected = json!({
285            "key": [
286                "value1",
287                "value2",
288                "value3",
289                "value4",
290                "value5"
291            ]
292        });
293
294        let output = repair_json_string(input);
295
296        assert_expected_matches_output_result(input,&output,&expected);
297    }
298
299    #[traced_test]
300    fn test_multiple_problems() {
301        //value3 has the comma and the trailing quote swapped
302        //value5 has no trailing comma
303        let input = r#"{
304            "key": [
305                "value1",
306                "value2",
307                "value3,"
308                "value4",
309                "value5"
310                "value6",
311                "value7",
312            ]
313        }"#;
314
315        let expected = json!({
316            "key": [
317                "value1",
318                "value2",
319                "value3",
320                "value4",
321                "value5",
322                "value6",
323                "value7"
324            ]
325        });
326
327        let output = repair_json_string(input);
328
329        assert_expected_matches_output_result(input,&output,&expected);
330    }
331
332    #[traced_test]
333    fn test_repair_single_quote_in_keys_and_values() {
334        let input = r#"{
335            'key1': 'value1',
336            'key2': "value2",
337            "key3": 'value3',
338            "text": "Don't stop believing",
339            'another_text': 'It\'s a kind of magic',
340            "nested": {
341                'inner_key': 'inner_value'
342            }
343        }"#;
344
345        let expected = json!({
346            "key1": "value1",
347            "key2": "value2",
348            "key3": "value3",
349            "text": "Don't stop believing",
350            "another_text": "It's a kind of magic",
351            "nested": {
352                "inner_key": "inner_value"
353            }
354        });
355
356        let output = repair_json_string(input);
357
358        assert_expected_matches_output_result(input, &output, &expected);
359    }
360
361    #[traced_test]
362    fn test_repair_mixed_quotes_and_escaped_quotes() {
363        let input = r#"{
364            "message": 'He said, "It\'s a sunny day!"',
365            'reply': "Yes, it\'s beautiful."
366        }"#;
367
368        let expected = json!({
369            "message": "He said, \"It's a sunny day!\"",
370            "reply": "Yes, it's beautiful."
371        });
372
373        let output = repair_json_string(input);
374
375        assert_expected_matches_output_result(input, &output, &expected);
376    }
377
378    #[traced_test]
379    fn test_brace_instead_of_bracket() {
380        let input = r#"{
381          "tag1": [
382            "item1",
383            "item2",
384            "item3",
385            "tag4"
386          },
387          "tag2": [
388            "item1",
389            "item2",
390            "item3",
391            "item4"
392          ]
393        }"#;
394
395        let expected = json!({
396            "tag1": [
397                "item1",
398                "item2",
399                "item3",
400                "tag4"
401            ],
402            "tag2": [
403                "item1",
404                "item2",
405                "item3",
406                "item4"
407            ]
408        });
409
410        let output = repair_json_string(input);
411        assert_expected_matches_output_result(input, &output, &expected);
412    }
413
414    #[traced_test]
415    fn test_control_character_error() {
416        let input = "{ \"text\": \"This is a test\u{0001}string with control characters\" }";
417
418        let expected = json!({
419            "text": "This is a teststring with control characters"
420        });
421
422        let output = repair_json_string(input);
423        assert_expected_matches_output_result(input, &output, &expected);
424    }
425
426    #[traced_test]
427    fn test_missing_comma_inside_list() {
428        let input = r#"{
429          "tag": [
430            "item1",
431            "item2",
432            "item3"
433            "item4",
434            "item5",
435            "item6",
436            "item7",
437            "item8",
438            "item9",
439            "item10",
440            "item11",
441            "item12",
442            "item13",
443            "item14",
444            "item15"
445          ]
446        }"#;
447
448        let expected = json!({
449            "tag": [
450                "item1",
451                "item2",
452                "item3",
453                "item4",
454                "item5",
455                "item6",
456                "item7",
457                "item8",
458                "item9",
459                "item10",
460                "item11",
461                "item12",
462                "item13",
463                "item14",
464                "item15"
465            ]
466        });
467
468        let output = repair_json_string(input);
469        assert_expected_matches_output_result(input, &output, &expected);
470    }
471
472    #[traced_test]
473    fn test_unexpected_eof_inside_list() {
474        let input = r#"{
475            "tag": [
476                "item1",
477                "item2",
478                "item3",
479                "item4",
480                "item5",
481                "item6",
482                "item7",
483                "item8",
484                "item9",
485                "item10",
486                "item11",
487                "item12",
488                "iteEOF
489        "#;
490
491        let expected = json!({
492            "tag": [
493                "item1",
494                "item2",
495                "item3",
496                "item4",
497                "item5",
498                "item6",
499                "item7",
500                "item8",
501                "item9",
502                "item10",
503                "item11",
504                "item12",
505                "iteEOF"
506            ]
507        });
508
509        let output = repair_json_string(input);
510        assert_expected_matches_output_result(input, &output, &expected);
511    }
512
513    #[traced_test]
514    fn test_duplicate_quote_to_close_list_item() {
515        let input = r#"{
516          "tag": [
517            "item1",
518            "item2",
519            "item3",
520            "item4",
521            "item5"",
522            "item6",
523            "item7",
524            "item8",
525            "item9",
526            "item10"
527          ]
528        }"#;
529
530        let expected = json!({
531            "tag": [
532                "item1",
533                "item2",
534                "item3",
535                "item4",
536                "item5",
537                "item6",
538                "item7",
539                "item8",
540                "item9",
541                "item10"
542            ]
543        });
544
545        let output = repair_json_string(input);
546        assert_expected_matches_output_result(input, &output, &expected);
547    }
548
549    #[test]
550    fn test_missing_closing_double_quote_but_comma_present() -> Result<(), JsonRepairError> {
551        let input = r#"{
552          "tag": [
553            "item1",
554            "item2",
555            "item3",
556            "item4",
557            "item5,
558            "item6",
559            "item7",
560            "item8",
561            "item9",
562            "item10",
563            "item11",
564            "item12"
565          ]
566        }"#;
567
568        let expected = json!({
569            "tag": [
570                "item1",
571                "item2",
572                "item3",
573                "item4",
574                "item5",
575                "item6",
576                "item7",
577                "item8",
578                "item9",
579                "item10",
580                "item11",
581                "item12"
582            ]
583        });
584
585        let output = repair_json_string_series(input)?;
586
587        // Parse output as JSON Value
588        let output_json: Value = output;
589
590        assert_eq!(output_json, expected);
591
592        Ok(())
593    }
594
595    #[traced_test]
596    fn test_eof_in_between_lists() {
597        let input = r#"{
598          "tag": [
599            "item1",
600            "item2",
601            // ... more items ...
602            "item20"
603          ],
604          "a"#;
605
606        let expected = json!({
607            "tag": [
608                "item1",
609                "item2",
610                // ... more items ...
611                "item20"
612            ],
613            "a": null
614        });
615
616        let output = repair_json_string(input);
617        assert_expected_matches_output_result(input, &output, &expected);
618    }
619
620    #[traced_test]
621    fn test_bad_quote_character() {
622        let input = r#"{
623            "tag": [
624                "item1",
625                "item2",
626                "item3",
627                "item4",
628                "item5",
629                "item6",
630                "item7",
631                "item8",
632                "item9",
633                "item10",
634                "item11',
635                "item12",
636                "item13",
637                "item14",
638                "item15",
639                "item16",
640                "item17",
641                "item18",
642                "item19",
643                "item20"
644            ]
645        }"#;
646
647        let expected = json!({
648            "tag": [
649                "item1",
650                "item2",
651                "item3",
652                "item4",
653                "item5",
654                "item6",
655                "item7",
656                "item8",
657                "item9",
658                "item10",
659                "item11",
660                "item12",
661                "item13",
662                "item14",
663                "item15",
664                "item16",
665                "item17",
666                "item18",
667                "item19",
668                "item20"
669            ]
670        });
671
672        let output = repair_json_string(input);
673        assert_expected_matches_output_result(input, &output, &expected);
674    }
675
676    #[traced_test]
677    fn test_eof_found_midway_through_array_tag() {
678        let input = r#"{
679          "tag1": [
680            "item1",
681            // ... more items ...
682            "item20"
683          ],
684          "tag2"#;
685
686        let expected = json!({
687            "tag1": [
688                "item1",
689                // ... more items ...
690                "item20"
691            ],
692            "tag2": null
693        });
694
695        let output = repair_json_string(input);
696        assert_expected_matches_output_result(input, &output, &expected);
697    }
698
699    #[traced_test]
700    fn test_eof_found_midway_through_array_item() {
701        let input = r#"{
702          "tag": [
703            "item1",
704            // ... more items ...
705            "itEOF
706        "#;
707
708        let expected = json!({
709            "tag": [
710                "item1",
711                // ... more items ...
712                "itEOF"
713            ]
714        });
715
716        let output = repair_json_string(input);
717        assert_expected_matches_output_result(input, &output, &expected);
718    }
719}