Skip to main content

zagens_runtime_adapters/tools/
arg_repair.rs

1//! Deterministic JSON argument repair for malformed tool-call inputs.
2//!
3//! DeepSeek streams `tool_calls.function.arguments` as deltas. Two failure
4//! shapes are common: (a) SSE chunk boundary cuts inside a JSON string and
5//! reassembly leaves a trailing comma or unclosed brace; (b) some local
6//! backends emit literal control characters inside JSON string values.
7//!
8//! The repair ladder runs five stages before falling back to an empty object:
9//!
10//!  1. Strict parse — done if it parses.
11//!  2. Strip literal control chars inside string values.
12//!  3. Strip trailing commas before `}` or `]`.
13//!  4. Balance braces/brackets (append closers).
14//!  5. Strip excess closers if delta is negative.
15//!  6. Fallback: empty object `{}`.
16
17use serde_json::{Map, Value};
18
19/// Maximum raw argument length we'll attempt to repair (1 MiB).
20const MAX_ARG_LEN: usize = 1024 * 1024;
21
22#[derive(Debug, thiserror::Error)]
23pub enum ArgRepairError {
24    #[error("argument exceeded {0} chars; refusing to repair")]
25    TooLarge(usize),
26}
27
28/// Repair a raw JSON argument string into a valid `serde_json::Value`.
29///
30/// Runs the deterministic ladder; on success returns the parsed value.
31/// The final fallback is an empty object `{}` so dispatch always proceeds.
32pub fn repair(raw: &str) -> Result<Value, ArgRepairError> {
33    if raw.len() > MAX_ARG_LEN {
34        return Err(ArgRepairError::TooLarge(raw.len()));
35    }
36    // Stage 1: strict parse
37    if let Ok(v) = serde_json::from_str(raw) {
38        return Ok(v);
39    }
40    // Stage 2: strip control chars inside strings
41    let mut s = strip_control_chars_in_strings(raw);
42    if let Ok(v) = serde_json::from_str(&s) {
43        return Ok(v);
44    }
45    // Stage 3: strip trailing commas
46    s = strip_trailing_commas(&s);
47    if let Ok(v) = serde_json::from_str(&s) {
48        return Ok(v);
49    }
50    // Stage 4: balance braces
51    s = balance_braces(&s, 50);
52    if let Ok(v) = serde_json::from_str(&s) {
53        return Ok(v);
54    }
55    // Stage 5: strip excess closers
56    s = strip_excess_closers(&s);
57    if let Ok(v) = serde_json::from_str(&s) {
58        return Ok(v);
59    }
60    // Fallback: empty object
61    Ok(Value::Object(Map::new()))
62}
63
64/// Strip ASCII control characters (0x00–0x1F except \t, \n, \r) that appear
65/// inside JSON string values. We walk character-by-character tracking whether
66/// we're inside a string (between unescaped double-quotes).
67fn strip_control_chars_in_strings(s: &str) -> String {
68    let mut out = String::with_capacity(s.len());
69    let mut in_string = false;
70    let mut escape = false;
71    for ch in s.chars() {
72        if escape {
73            out.push(ch);
74            escape = false;
75            continue;
76        }
77        if ch == '\\' {
78            escape = true;
79            out.push(ch);
80            continue;
81        }
82        if ch == '"' {
83            in_string = !in_string;
84            out.push(ch);
85            continue;
86        }
87        if in_string && (ch as u32) < 0x20 && ch != '\t' && ch != '\n' && ch != '\r' {
88            // Drop control characters inside strings
89            continue;
90        }
91        out.push(ch);
92    }
93    out
94}
95
96/// Strip trailing commas before `}` or `]`.
97fn strip_trailing_commas(s: &str) -> String {
98    // Repeatedly replace ",}" and ",]" until stable (handles nested cases).
99    let mut out = s.to_string();
100    loop {
101        let prev = out.clone();
102        out = out.replace(",}", "}").replace(",]", "]");
103        // Handle trailing comma at end of string
104        out = out.trim_end_matches(',').to_string();
105        if out == prev {
106            break;
107        }
108    }
109    out
110}
111
112/// Balance braces and brackets: count `{`/`}` and `[`/`]`, append closers if
113/// positive delta (more opens than closes). Caps iterations so a
114/// catastrophically broken input doesn't loop forever.
115fn balance_braces(s: &str, max_iter: usize) -> String {
116    let mut out = s.to_string();
117    for _ in 0..max_iter {
118        let brace_delta: i32 = out
119            .chars()
120            .map(|ch| match ch {
121                '{' => 1,
122                '}' => -1,
123                _ => 0,
124            })
125            .sum();
126        let bracket_delta: i32 = out
127            .chars()
128            .map(|ch| match ch {
129                '[' => 1,
130                ']' => -1,
131                _ => 0,
132            })
133            .sum();
134        if brace_delta <= 0 && bracket_delta <= 0 {
135            break;
136        }
137        // Append needed closers in reverse order (brackets before braces
138        // for correct nesting when both are unbalanced).
139        for _ in 0..bracket_delta.max(0) {
140            out.push(']');
141        }
142        for _ in 0..brace_delta.max(0) {
143            out.push('}');
144        }
145    }
146    out
147}
148
149/// Strip excess closers when the delta is negative (more closes than opens).
150fn strip_excess_closers(s: &str) -> String {
151    let mut brace_depth: i32 = 0;
152    let mut bracket_depth: i32 = 0;
153    let mut out = String::with_capacity(s.len());
154    for ch in s.chars() {
155        match ch {
156            '}' => {
157                if brace_depth > 0 {
158                    brace_depth -= 1;
159                    out.push(ch);
160                }
161                // else drop excess closer
162            }
163            ']' => {
164                if bracket_depth > 0 {
165                    bracket_depth -= 1;
166                    out.push(ch);
167                }
168            }
169            '{' => {
170                brace_depth += 1;
171                out.push(ch);
172            }
173            '[' => {
174                bracket_depth += 1;
175                out.push(ch);
176            }
177            _ => out.push(ch),
178        }
179    }
180    out
181}
182
183#[cfg(test)]
184mod tests {
185    use super::*;
186    use serde_json::json;
187
188    #[test]
189    fn strict_parse_passes_through() {
190        let v = repair(r#"{"path": "hello.txt"}"#).unwrap();
191        assert_eq!(v, json!({"path": "hello.txt"}));
192    }
193
194    #[test]
195    fn repairs_trailing_comma() {
196        let v = repair(r#"{"path": "hello.txt",}"#).unwrap();
197        assert_eq!(v, json!({"path": "hello.txt"}));
198    }
199
200    #[test]
201    fn repairs_trailing_comma_in_array() {
202        let v = repair(r#"["a", "b",]"#).unwrap();
203        assert_eq!(v, json!(["a", "b"]));
204    }
205
206    #[test]
207    fn repairs_missing_close_brace() {
208        let v = repair(r#"{"path": "hello.txt""#).unwrap();
209        assert_eq!(v, json!({"path": "hello.txt"}));
210    }
211
212    #[test]
213    fn repairs_missing_close_bracket() {
214        let v = repair(r#"["a", "b""#).unwrap();
215        assert_eq!(v, json!(["a", "b"]));
216    }
217
218    #[test]
219    fn strips_embedded_control_chars() {
220        // Raw \x0B (vertical tab) inside a string value
221        let raw = "{\"key\": \"val\x0Bue\"}";
222        let v = repair(raw).unwrap();
223        assert_eq!(v, json!({"key": "value"}));
224    }
225
226    #[test]
227    fn handles_empty_string() {
228        let v = repair("").unwrap();
229        assert_eq!(v, json!({}));
230    }
231
232    #[test]
233    fn handles_gibberish() {
234        let v = repair("not json at all").unwrap();
235        assert_eq!(v, json!({}));
236    }
237
238    #[test]
239    fn balances_nested_braces() {
240        let v = repair(r#"{"outer": {"inner": "val""#).unwrap();
241        assert_eq!(v, json!({"outer": {"inner": "val"}}));
242    }
243
244    #[test]
245    fn strips_excess_closers() {
246        let v = repair(r#"{"key": "val"}}"#).unwrap();
247        assert_eq!(v, json!({"key": "val"}));
248    }
249
250    #[test]
251    fn handles_double_encoded_json() {
252        // This is a valid JSON string containing a JSON object literal.
253        // repair parses it as a string; the engine's existing fallback
254        // (parse_tool_input) will unwrap the string and re-parse.
255        let v = repair(r#""{\"path\": \"hello.txt\"}""#).unwrap();
256        assert_eq!(v, Value::String(r#"{"path": "hello.txt"}"#.to_string()));
257    }
258
259    #[test]
260    fn oversize_input_rejected() {
261        let big = "x".repeat(MAX_ARG_LEN + 1);
262        assert!(repair(&big).is_err());
263    }
264
265    #[test]
266    fn repairs_brace_balance_with_trailing_comma() {
267        let v = repair(r#"{"a": 1,"#).unwrap();
268        assert_eq!(v, json!({"a": 1}));
269    }
270}