Skip to main content

suture_core/
repair.rs

1/// Result of an append-only repair (for streaming passthrough, where already-
2/// emitted bytes cannot be retracted).
3#[derive(Debug, Clone, PartialEq, Eq)]
4pub struct AppendRepair {
5    /// Structurally consistent so far. If false, pass the original through untouched.
6    pub consistent: bool,
7    /// True if appending `append` to the already-emitted bytes yields valid JSON
8    /// WITHOUT dropping anything. False means append-only cannot fix the tail
9    /// (trailing comma, partial scalar/keyword, mid-escape, incomplete key, or a
10    /// truncated multibyte UTF-8 char) — the caller should skip this target.
11    pub safe: bool,
12    /// Bytes to append when `safe` (optional closing '"' then container closers).
13    pub append: Vec<u8>,
14}
15
16impl AppendRepair {
17    pub fn is_noop(&self) -> bool {
18        self.append.is_empty()
19    }
20}
21
22/// Result of computing how to make a (possibly truncated) JSON byte stream valid.
23#[derive(Debug, Clone, PartialEq, Eq)]
24pub struct Repair {
25    pub consistent: bool,
26    pub drop_trailing: usize,
27    pub append: Vec<u8>,
28}
29
30impl Repair {
31    pub fn is_noop(&self) -> bool {
32        self.drop_trailing == 0 && self.append.is_empty()
33    }
34}
35
36#[derive(Clone, Copy, PartialEq, Eq)]
37enum Frame {
38    Object,
39    Array,
40}
41
42#[derive(Clone, Copy, PartialEq, Eq)]
43enum Pos {
44    TopBefore,
45    TopAfter,
46    ArrBeforeElem,
47    ArrAfterElem,
48    ObjBeforeKey,
49    ObjAfterKey,
50    ObjBeforeVal,
51    ObjAfterVal,
52}
53
54#[derive(Clone, Copy, PartialEq, Eq)]
55enum Lex {
56    Between,
57    Str,
58    StrEsc,
59    StrU(u8),
60    Scalar,
61}
62
63struct FrameState {
64    frame: Frame,
65    pos: Pos,
66    /// Offset to drop back to in order to remove the current (incomplete)
67    /// element/member INCLUDING any preceding comma. Updated when entering a
68    /// "before key/elem" state (just after `{`/`[`/`,`).
69    elem_drop_to: usize,
70    /// Whether this container has had at least one complete element/member.
71    seen_member: bool,
72}
73
74pub struct StreamRepairer {
75    frames: Vec<FrameState>,
76    top_pos: Pos,
77    lex: Lex,
78    consistent: bool,
79    len: usize,
80    /// Whether the current/just-finished string sits in an object-key position.
81    str_is_key: bool,
82    /// Raw bytes of the current scalar token (for shape validation).
83    scalar_buf: Vec<u8>,
84    /// Count of trailing bytes belonging to an as-yet-incomplete multibyte
85    /// UTF-8 char inside the current string (0 when on a char boundary).
86    str_incomplete: usize,
87    /// Total expected byte length of the current multibyte char.
88    str_char_len: usize,
89}
90
91impl Default for StreamRepairer {
92    fn default() -> Self {
93        Self::new()
94    }
95}
96
97fn is_ws(b: u8) -> bool {
98    matches!(b, b' ' | b'\t' | b'\n' | b'\r')
99}
100
101fn is_hex(b: u8) -> bool {
102    b.is_ascii_hexdigit()
103}
104
105/// Validate a complete JSON number per RFC 8259 grammar:
106/// `-?(0|[1-9][0-9]*)(\.[0-9]+)?([eE][+-]?[0-9]+)?`
107fn is_valid_json_number(b: &[u8]) -> bool {
108    let n = b.len();
109    let mut i = 0;
110    if i < n && b[i] == b'-' {
111        i += 1;
112    }
113    if i >= n {
114        return false;
115    }
116    if b[i] == b'0' {
117        i += 1;
118    } else if b[i].is_ascii_digit() {
119        while i < n && b[i].is_ascii_digit() {
120            i += 1;
121        }
122    } else {
123        return false;
124    }
125    if i < n && b[i] == b'.' {
126        i += 1;
127        if i >= n || !b[i].is_ascii_digit() {
128            return false;
129        }
130        while i < n && b[i].is_ascii_digit() {
131            i += 1;
132        }
133    }
134    if i < n && (b[i] == b'e' || b[i] == b'E') {
135        i += 1;
136        if i < n && (b[i] == b'+' || b[i] == b'-') {
137            i += 1;
138        }
139        if i >= n || !b[i].is_ascii_digit() {
140            return false;
141        }
142        while i < n && b[i].is_ascii_digit() {
143            i += 1;
144        }
145    }
146    i == n
147}
148
149fn is_valid_scalar(b: &[u8]) -> bool {
150    matches!(b, b"true" | b"false" | b"null") || is_valid_json_number(b)
151}
152
153fn is_scalar_start(b: u8) -> bool {
154    b.is_ascii_digit() || matches!(b, b'-' | b't' | b'f' | b'n')
155}
156
157fn is_scalar_byte(b: u8) -> bool {
158    b.is_ascii_digit()
159        || matches!(
160            b,
161            b'-' | b'+'
162                | b'.'
163                | b'e'
164                | b'E'
165                | b't'
166                | b'r'
167                | b'u'
168                | b'f'
169                | b'a'
170                | b'l'
171                | b's'
172                | b'n'
173        )
174}
175
176impl StreamRepairer {
177    pub fn new() -> Self {
178        Self {
179            frames: Vec::new(),
180            top_pos: Pos::TopBefore,
181            lex: Lex::Between,
182            consistent: true,
183            len: 0,
184            str_is_key: false,
185            scalar_buf: Vec::new(),
186            str_incomplete: 0,
187            str_char_len: 0,
188        }
189    }
190
191    pub fn push(&mut self, bytes: &[u8]) {
192        for &b in bytes {
193            self.process(b);
194        }
195    }
196
197    /// Track multibyte UTF-8 progress for raw bytes inside a string.
198    fn track_utf8(&mut self, b: u8) {
199        let is_cont = (0x80..=0xBF).contains(&b);
200        if self.str_incomplete > 0 {
201            // expecting a continuation byte
202            if !is_cont {
203                self.consistent = false;
204                return;
205            }
206            self.str_incomplete += 1;
207            if self.str_incomplete >= self.str_char_len {
208                self.str_incomplete = 0;
209            }
210        } else if b < 0x80 {
211            // ASCII — fine, on a boundary
212        } else if b >= 0xC0 {
213            // lead byte
214            self.str_char_len = if b >= 0xF0 {
215                4
216            } else if b >= 0xE0 {
217                3
218            } else {
219                2
220            };
221            self.str_incomplete = 1;
222        } else {
223            // stray continuation byte with no lead
224            self.consistent = false;
225        }
226    }
227
228    fn cur_pos(&self) -> Pos {
229        self.frames.last().map(|f| f.pos).unwrap_or(self.top_pos)
230    }
231
232    fn set_pos(&mut self, p: Pos) {
233        match self.frames.last_mut() {
234            Some(f) => f.pos = p,
235            None => self.top_pos = p,
236        }
237    }
238
239    fn cur_drop_to(&self) -> usize {
240        self.frames.last().map(|f| f.elem_drop_to).unwrap_or(0)
241    }
242
243    fn value_allowed(&self) -> bool {
244        matches!(
245            self.cur_pos(),
246            Pos::ArrBeforeElem | Pos::ObjBeforeVal | Pos::TopBefore
247        )
248    }
249
250    fn process(&mut self, b: u8) {
251        let off = self.len;
252        self.len += 1;
253        if !self.consistent {
254            return;
255        }
256        match self.lex {
257            Lex::Str => match b {
258                b'\\' => {
259                    self.lex = Lex::StrEsc;
260                    self.str_incomplete = 0;
261                }
262                b'"' => {
263                    self.lex = Lex::Between;
264                    self.complete_string();
265                }
266                _ => {
267                    if b < 0x20 {
268                        // raw control characters are not allowed unescaped in JSON strings
269                        self.consistent = false;
270                        return;
271                    }
272                    self.track_utf8(b);
273                }
274            },
275            Lex::StrEsc => {
276                self.lex = match b {
277                    b'"' | b'\\' | b'/' | b'b' | b'f' | b'n' | b'r' | b't' => Lex::Str,
278                    b'u' => Lex::StrU(0),
279                    _ => {
280                        self.consistent = false;
281                        return;
282                    }
283                };
284            }
285            Lex::StrU(n) => {
286                if is_hex(b) {
287                    self.lex = if n == 3 { Lex::Str } else { Lex::StrU(n + 1) };
288                } else {
289                    self.consistent = false;
290                }
291            }
292            Lex::Scalar => {
293                if is_scalar_byte(b) {
294                    self.scalar_buf.push(b);
295                } else {
296                    self.lex = Lex::Between;
297                    self.complete_scalar();
298                    if self.consistent {
299                        self.process_between(b, off);
300                    }
301                }
302            }
303            Lex::Between => self.process_between(b, off),
304        }
305    }
306
307    fn complete_string(&mut self) {
308        if self.str_is_key {
309            self.set_pos(Pos::ObjAfterKey);
310        } else {
311            self.after_value();
312        }
313    }
314
315    fn complete_scalar(&mut self) {
316        if !is_valid_scalar(&self.scalar_buf) {
317            self.consistent = false;
318            return;
319        }
320        self.after_value();
321    }
322
323    /// Advance position after a complete value (string value, scalar, or
324    /// closed container) was produced in the current value position.
325    fn after_value(&mut self) {
326        match self.cur_pos() {
327            Pos::ArrBeforeElem => {
328                self.set_pos(Pos::ArrAfterElem);
329                if let Some(f) = self.frames.last_mut() {
330                    f.seen_member = true;
331                }
332            }
333            Pos::ObjBeforeVal => {
334                self.set_pos(Pos::ObjAfterVal);
335                if let Some(f) = self.frames.last_mut() {
336                    f.seen_member = true;
337                }
338            }
339            Pos::TopBefore => self.top_pos = Pos::TopAfter,
340            _ => self.consistent = false,
341        }
342    }
343
344    fn process_between(&mut self, b: u8, off: usize) {
345        if is_ws(b) {
346            return;
347        }
348        match b {
349            b'"' => {
350                match self.cur_pos() {
351                    Pos::ObjBeforeKey => self.str_is_key = true,
352                    Pos::ArrBeforeElem | Pos::ObjBeforeVal | Pos::TopBefore => {
353                        self.str_is_key = false
354                    }
355                    _ => {
356                        self.consistent = false;
357                        return;
358                    }
359                }
360                self.lex = Lex::Str;
361                self.str_incomplete = 0;
362                self.str_char_len = 0;
363            }
364            b'{' => {
365                if !self.value_allowed() {
366                    self.consistent = false;
367                    return;
368                }
369                self.frames.push(FrameState {
370                    frame: Frame::Object,
371                    pos: Pos::ObjBeforeKey,
372                    elem_drop_to: off + 1,
373                    seen_member: false,
374                });
375            }
376            b'[' => {
377                if !self.value_allowed() {
378                    self.consistent = false;
379                    return;
380                }
381                self.frames.push(FrameState {
382                    frame: Frame::Array,
383                    pos: Pos::ArrBeforeElem,
384                    elem_drop_to: off + 1,
385                    seen_member: false,
386                });
387            }
388            b'}' => self.close(Frame::Object),
389            b']' => self.close(Frame::Array),
390            b':' => {
391                if self.cur_pos() == Pos::ObjAfterKey {
392                    self.set_pos(Pos::ObjBeforeVal);
393                } else {
394                    self.consistent = false;
395                }
396            }
397            b',' => match self.cur_pos() {
398                Pos::ArrAfterElem => {
399                    self.set_pos(Pos::ArrBeforeElem);
400                    if let Some(f) = self.frames.last_mut() {
401                        f.elem_drop_to = off;
402                    }
403                }
404                Pos::ObjAfterVal => {
405                    self.set_pos(Pos::ObjBeforeKey);
406                    if let Some(f) = self.frames.last_mut() {
407                        f.elem_drop_to = off;
408                    }
409                }
410                _ => self.consistent = false,
411            },
412            _ => {
413                if is_scalar_start(b) && self.value_allowed() {
414                    self.lex = Lex::Scalar;
415                    self.scalar_buf.clear();
416                    self.scalar_buf.push(b);
417                } else {
418                    self.consistent = false;
419                }
420            }
421        }
422    }
423
424    fn close(&mut self, want: Frame) {
425        let ok = match self.frames.last() {
426            Some(f) if f.frame == want => match (want, f.pos) {
427                (Frame::Object, Pos::ObjAfterVal) => true,
428                (Frame::Object, Pos::ObjBeforeKey) => !f.seen_member,
429                (Frame::Array, Pos::ArrAfterElem) => true,
430                (Frame::Array, Pos::ArrBeforeElem) => !f.seen_member,
431                _ => false,
432            },
433            _ => false,
434        };
435        if !ok {
436            self.consistent = false;
437            return;
438        }
439        self.frames.pop();
440        self.after_value();
441    }
442
443    fn cur_seen_member(&self) -> bool {
444        self.frames.last().map(|f| f.seen_member).unwrap_or(false)
445    }
446
447    /// Append-only repair: keep already-emitted bytes, append only closers.
448    /// Suited to streaming passthrough. See `AppendRepair`.
449    pub fn append_repair(&self) -> AppendRepair {
450        if !self.consistent {
451            return AppendRepair {
452                consistent: false,
453                safe: false,
454                append: Vec::new(),
455            };
456        }
457        let mut append: Vec<u8> = Vec::new();
458        let safe = match self.lex {
459            Lex::Str => {
460                if self.str_is_key || self.str_incomplete > 0 {
461                    false
462                } else {
463                    append.push(b'"');
464                    true
465                }
466            }
467            Lex::StrEsc | Lex::StrU(_) => false,
468            Lex::Scalar => is_valid_scalar(&self.scalar_buf),
469            Lex::Between => match self.cur_pos() {
470                Pos::TopBefore => {
471                    return AppendRepair {
472                        consistent: true,
473                        safe: true,
474                        append: Vec::new(),
475                    };
476                }
477                Pos::TopAfter | Pos::ArrAfterElem | Pos::ObjAfterVal => true,
478                Pos::ArrBeforeElem | Pos::ObjBeforeKey => !self.cur_seen_member(),
479                Pos::ObjAfterKey | Pos::ObjBeforeVal => false,
480            },
481        };
482        if !safe {
483            return AppendRepair {
484                consistent: true,
485                safe: false,
486                append: Vec::new(),
487            };
488        }
489        for f in self.frames.iter().rev() {
490            append.push(match f.frame {
491                Frame::Object => b'}',
492                Frame::Array => b']',
493            });
494        }
495        AppendRepair {
496            consistent: true,
497            safe: true,
498            append,
499        }
500    }
501
502    pub fn finish(&self) -> Repair {
503        if !self.consistent {
504            return Repair {
505                consistent: false,
506                drop_trailing: 0,
507                append: Vec::new(),
508            };
509        }
510        let mut drop_trailing = 0usize;
511        let mut append: Vec<u8> = Vec::new();
512
513        // 1) Resolve the in-progress lexer token.
514        match self.lex {
515            Lex::Str => {
516                if self.str_is_key {
517                    drop_trailing = self.len - self.cur_drop_to();
518                } else {
519                    drop_trailing = self.str_incomplete;
520                    append.push(b'"');
521                }
522            }
523            Lex::StrEsc => {
524                if self.str_is_key {
525                    drop_trailing = self.len - self.cur_drop_to();
526                } else {
527                    drop_trailing = 1; // drop the dangling '\'
528                    append.push(b'"');
529                }
530            }
531            Lex::StrU(n) => {
532                if self.str_is_key {
533                    drop_trailing = self.len - self.cur_drop_to();
534                } else {
535                    drop_trailing = 2 + n as usize; // drop '\u' + n hex digits
536                    append.push(b'"');
537                }
538            }
539            Lex::Scalar => {
540                let is_keyword = matches!(self.scalar_buf.as_slice(), b"true" | b"false" | b"null");
541                if is_keyword {
542                    // complete keyword: keep it; frames closed below
543                } else if self.frames.is_empty() {
544                    // top-level bare scalar: out of scope, leave unchanged
545                } else {
546                    drop_trailing = self.len - self.cur_drop_to();
547                }
548            }
549            Lex::Between => match self.cur_pos() {
550                Pos::TopAfter | Pos::ArrAfterElem | Pos::ObjAfterVal => {}
551                Pos::TopBefore => {
552                    return Repair {
553                        consistent: true,
554                        drop_trailing: 0,
555                        append: Vec::new(),
556                    };
557                }
558                Pos::ArrBeforeElem | Pos::ObjBeforeKey | Pos::ObjAfterKey | Pos::ObjBeforeVal => {
559                    drop_trailing = self.len - self.cur_drop_to();
560                }
561            },
562        }
563
564        // 2) Close every open frame, innermost first.
565        for f in self.frames.iter().rev() {
566            append.push(match f.frame {
567                Frame::Object => b'}',
568                Frame::Array => b']',
569            });
570        }
571
572        Repair {
573            consistent: true,
574            drop_trailing,
575            append,
576        }
577    }
578}
579
580#[cfg(test)]
581mod tests {
582    use super::StreamRepairer;
583    use crate::repair_str;
584    use serde_json::Value;
585
586    /// Apply an engine repair at the raw-byte level (for testing inputs that are
587    /// not valid UTF-8, which `repair_str` cannot accept).
588    fn engine_repair(bytes: &[u8]) -> Option<Vec<u8>> {
589        let mut r = StreamRepairer::new();
590        r.push(bytes);
591        let rep = r.finish();
592        if !rep.consistent {
593            return None;
594        }
595        let keep = bytes.len() - rep.drop_trailing;
596        let mut out = bytes[..keep].to_vec();
597        out.extend_from_slice(&rep.append);
598        Some(out)
599    }
600
601    #[test]
602    fn truncated_multibyte_char_in_string_value_is_utf8_safe() {
603        // `{"a":"x` followed by the lead byte 0xC3 of 'é' (continuation missing)
604        let mut bytes = br#"{"a":"x"#.to_vec();
605        bytes.push(0xC3);
606        let out = engine_repair(&bytes).expect("should be consistent");
607        let s = std::str::from_utf8(&out).expect("output must be valid UTF-8");
608        serde_json::from_str::<Value>(s).expect("output must parse");
609        assert_eq!(s, r#"{"a":"x"}"#);
610    }
611
612    #[test]
613    fn truncated_emoji_in_string_value_is_utf8_safe() {
614        // `["` + 3 of the 4 bytes of 😀 (F0 9F 98 80)
615        let mut bytes = br#"["#.to_vec();
616        bytes.push(b'"');
617        bytes.extend_from_slice(&[0xF0, 0x9F, 0x98]);
618        let out = engine_repair(&bytes).expect("should be consistent");
619        let s = std::str::from_utf8(&out).expect("output must be valid UTF-8");
620        serde_json::from_str::<Value>(s).expect("output must parse");
621        assert_eq!(s, r#"[""]"#);
622    }
623
624    #[test]
625    fn complete_multibyte_char_kept() {
626        let out = engine_repair("{\"a\":\"café".as_bytes()).expect("consistent");
627        let s = std::str::from_utf8(&out).unwrap();
628        assert_eq!(s, r#"{"a":"café"}"#);
629    }
630
631    #[test]
632    fn malformed_delimited_scalars_are_inconsistent() {
633        assert_eq!(crate::repair_str(r#"{"a":truee}"#), None);
634        assert_eq!(crate::repair_str("[truee]"), None);
635        assert_eq!(crate::repair_str("[nulll]"), None);
636        assert_eq!(crate::repair_str("[falsee]"), None);
637        assert_eq!(crate::repair_str("[1e5e5]"), None);
638        assert_eq!(crate::repair_str("[1..2]"), None);
639        assert_eq!(crate::repair_str("[--1]"), None);
640        assert_eq!(crate::repair_str("[1,2tru]"), None);
641    }
642
643    #[test]
644    fn valid_numbers_still_accepted() {
645        assert_repairs("[0,-0,1.5,-2e10,3.14,1E-5", "[0,-0,1.5,-2e10,3.14]");
646    }
647
648    /// Assert the repaired output parses as JSON.
649    fn assert_repairs(input: &str, expected: &str) {
650        let got = repair_str(input).expect("should be consistent");
651        assert_eq!(got, expected, "input: {input:?}");
652        serde_json::from_str::<Value>(&got).expect("repaired output must parse");
653    }
654
655    #[test]
656    fn closes_truncated_string_value() {
657        assert_repairs(
658            r#"{"id":42,"generation":"The application sequence failed due to an error"#,
659            r#"{"id":42,"generation":"The application sequence failed due to an error"}"#,
660        );
661    }
662
663    #[test]
664    fn empty_containers() {
665        assert_repairs("{", "{}");
666        assert_repairs("[", "[]");
667        assert_repairs("{}", "{}");
668        assert_repairs("[]", "[]");
669    }
670
671    #[test]
672    fn nested_containers_closed_in_order() {
673        assert_repairs(r#"{"a":["x",{"b":"c"#, r#"{"a":["x",{"b":"c"}]}"#);
674    }
675
676    #[test]
677    fn drops_incomplete_object_key() {
678        assert_repairs(r#"{"ab"#, "{}");
679        assert_repairs(r#"{"a":"v","b"#, r#"{"a":"v"}"#);
680    }
681
682    #[test]
683    fn drops_dangling_colon_value_position() {
684        assert_repairs(r#"{"a":"#, "{}");
685        assert_repairs(r#"{"x":"v","a":"#, r#"{"x":"v"}"#);
686    }
687
688    #[test]
689    fn drops_incomplete_scalar_in_array() {
690        assert_repairs(
691            r#"{"status":"partial","payload_metrics":[250,194,"#,
692            r#"{"status":"partial","payload_metrics":[250,194]}"#,
693        );
694        assert_repairs("[1,2,3", "[1,2]");
695        assert_repairs("[1,2,", "[1,2]");
696    }
697
698    #[test]
699    fn drops_incomplete_scalar_object_value() {
700        assert_repairs(r#"{"a":1"#, "{}");
701        assert_repairs(r#"{"x":1,"a":2"#, r#"{"x":1}"#);
702    }
703
704    #[test]
705    fn keeps_complete_value_then_closes() {
706        assert_repairs(r#"{"a":"b","c":"d"#, r#"{"a":"b","c":"d"}"#);
707        assert_repairs(r#"[true,false,null"#, "[true,false,null]");
708    }
709
710    #[test]
711    fn top_level_string_value_closed() {
712        assert_repairs(r#""hello wor"#, r#""hello wor""#);
713    }
714
715    #[test]
716    fn whitespace_tolerated() {
717        assert_repairs("{  \"a\" : \"b\" , ", r#"{  "a" : "b" }"#);
718    }
719
720    #[test]
721    fn escaped_quote_does_not_close_string() {
722        assert_repairs(
723            r#"{"a":"he said \"hi\" to me"#,
724            r#"{"a":"he said \"hi\" to me"}"#,
725        );
726    }
727
728    #[test]
729    fn escaped_backslash_then_quote_closes() {
730        assert_repairs(r#"["c:\\path"#, r#"["c:\\path"]"#);
731    }
732
733    #[test]
734    fn drops_dangling_backslash() {
735        assert_repairs(r#"{"a":"line\"#, r#"{"a":"line"}"#);
736    }
737
738    #[test]
739    fn drops_incomplete_unicode_escape() {
740        assert_repairs(r#"{"a":"caf\u00"#, r#"{"a":"caf"}"#);
741        assert_repairs(r#"{"a":"x\u"#, r#"{"a":"x"}"#);
742    }
743
744    #[test]
745    fn complete_unicode_escape_kept() {
746        assert_repairs(r#"{"a":"café and more"#, r#"{"a":"café and more"}"#);
747    }
748
749    #[test]
750    fn mismatched_closer_is_inconsistent() {
751        assert_eq!(crate::repair_str("[}"), None);
752        assert_eq!(crate::repair_str("{]"), None);
753    }
754
755    #[test]
756    fn underflow_closer_is_inconsistent() {
757        assert_eq!(crate::repair_str("}"), None);
758        assert_eq!(crate::repair_str("[1]]"), None);
759    }
760
761    #[test]
762    fn trailing_comma_before_close_is_inconsistent() {
763        assert_eq!(crate::repair_str("[1,]"), None);
764        assert_eq!(crate::repair_str(r#"{"a":1,}"#), None);
765    }
766
767    #[test]
768    fn missing_comma_between_values_is_inconsistent() {
769        assert_eq!(crate::repair_str("[1 2]"), None);
770    }
771
772    #[test]
773    fn second_top_level_value_is_inconsistent() {
774        assert_eq!(crate::repair_str("{}{}"), None);
775    }
776
777    /// Apply an append-only repair to a string input.
778    fn append_repair_str(input: &str) -> Option<String> {
779        let mut r = StreamRepairer::new();
780        r.push(input.as_bytes());
781        let ar = r.append_repair();
782        if !ar.consistent || !ar.safe {
783            return None;
784        }
785        let mut out = input.as_bytes().to_vec();
786        out.extend_from_slice(&ar.append);
787        Some(String::from_utf8(out).unwrap())
788    }
789
790    #[test]
791    fn append_closes_mid_string_value() {
792        assert_eq!(
793            append_repair_str(r#"{"a":"hello wor"#).as_deref(),
794            Some(r#"{"a":"hello wor"}"#)
795        );
796    }
797
798    #[test]
799    fn append_keeps_complete_scalar_value() {
800        assert_eq!(
801            append_repair_str(r#"{"count":123"#).as_deref(),
802            Some(r#"{"count":123}"#)
803        );
804        assert_eq!(append_repair_str("[1,2,3").as_deref(), Some("[1,2,3]"));
805        assert_eq!(
806            append_repair_str("[true,false").as_deref(),
807            Some("[true,false]")
808        );
809    }
810
811    #[test]
812    fn append_closes_nested() {
813        assert_eq!(
814            append_repair_str(r#"{"a":["x",{"b":"c"#).as_deref(),
815            Some(r#"{"a":["x",{"b":"c"}]}"#)
816        );
817    }
818
819    #[test]
820    fn append_empty_containers() {
821        assert_eq!(append_repair_str("{").as_deref(), Some("{}"));
822        assert_eq!(append_repair_str("[").as_deref(), Some("[]"));
823    }
824
825    #[test]
826    fn append_unsafe_cases_return_none() {
827        assert_eq!(append_repair_str("[1,2,"), None);
828        assert_eq!(append_repair_str(r#"{"a":1,"#), None);
829        assert_eq!(append_repair_str(r#"{"a":1."#), None);
830        assert_eq!(append_repair_str(r#"{"a":1e"#), None);
831        assert_eq!(append_repair_str("[tru"), None);
832        assert_eq!(append_repair_str(r#"{"a"#), None);
833        assert_eq!(append_repair_str(r#"{"a":"#), None);
834        assert_eq!(append_repair_str(r#"{"a":"x\"#), None);
835        assert_eq!(append_repair_str(r#"{"a":"x\u00"#), None);
836    }
837
838    #[test]
839    fn append_noop_on_complete_json() {
840        let mut r = StreamRepairer::new();
841        r.push(r#"{"a":[1,2]}"#.as_bytes());
842        let ar = r.append_repair();
843        assert!(ar.consistent && ar.safe && ar.is_noop());
844    }
845
846    #[test]
847    fn append_inconsistent_propagates() {
848        let mut r = StreamRepairer::new();
849        r.push("[}".as_bytes());
850        let ar = r.append_repair();
851        assert!(!ar.consistent);
852    }
853
854    #[test]
855    fn rejects_raw_control_char_in_string() {
856        // a literal newline (0x0A) inside a string is malformed JSON -> inconsistent
857        assert_eq!(crate::repair_str("{\"a\":\"li\nne"), None);
858        assert_eq!(crate::repair_str("{\"a\":\"x\u{0001}"), None);
859    }
860
861    #[test]
862    fn rejects_invalid_escape() {
863        assert_eq!(crate::repair_str(r#"{"a":"x\q"#), None);
864        assert_eq!(crate::repair_str(r#"["a\,b"#), None);
865    }
866
867    #[test]
868    fn valid_escapes_and_multibyte_still_repair() {
869        // sanity: legal escapes and real UTF-8 still work
870        assert_eq!(
871            crate::repair_str(r#"{"a":"tab\tnewline\n"#).as_deref(),
872            Some(r#"{"a":"tab\tnewline\n"}"#)
873        );
874        assert_eq!(
875            crate::repair_str("{\"a\":\"caf\u{00e9}").as_deref(),
876            Some("{\"a\":\"caf\u{00e9}\"}")
877        );
878    }
879}