Skip to main content

tycode_core/chat/
json_tool_parser.rs

1use crate::ai::ToolUseData;
2use anyhow::Result;
3use serde_json::Value;
4use tracing::debug;
5use uuid::Uuid;
6
7/// Manual brace matching handles nested structures and escaped quotes that regex cannot reliably parse.
8fn find_json_end(text: &str, start: usize) -> Option<usize> {
9    let bytes = text.as_bytes();
10    if start >= bytes.len() {
11        return None;
12    }
13
14    let opener = bytes[start];
15    let closer = match opener {
16        b'{' => b'}',
17        b'[' => b']',
18        _ => return None,
19    };
20
21    let mut depth = 0;
22    let mut in_string = false;
23    let mut escape_next = false;
24    let mut i = start;
25
26    while i < bytes.len() {
27        let ch = bytes[i];
28
29        if escape_next {
30            escape_next = false;
31            i += 1;
32            continue;
33        }
34
35        if ch == b'\\' && in_string {
36            escape_next = true;
37            i += 1;
38            continue;
39        }
40
41        if ch == b'"' {
42            in_string = !in_string;
43            i += 1;
44            continue;
45        }
46
47        if in_string {
48            i += 1;
49            continue;
50        }
51
52        if ch == opener {
53            depth += 1;
54        } else if ch == closer {
55            depth -= 1;
56            if depth == 0 {
57                return Some(i + 1);
58            }
59        }
60
61        i += 1;
62    }
63
64    None
65}
66
67/// Recursion handles nested content arrays in Anthropic's message format.
68fn extract_tool_uses(value: &Value) -> Vec<ToolUseData> {
69    let mut results = Vec::new();
70
71    if let Some(obj) = value.as_object() {
72        if obj.get("type").and_then(|v| v.as_str()) == Some("tool_use") {
73            if let (Some(name), Some(input)) =
74                (obj.get("name").and_then(|v| v.as_str()), obj.get("input"))
75            {
76                let id = obj
77                    .get("id")
78                    .and_then(|v| v.as_str())
79                    .map(|s| s.to_string())
80                    .unwrap_or_else(|| Uuid::new_v4().to_string());
81
82                results.push(ToolUseData {
83                    id,
84                    name: name.to_string(),
85                    arguments: input.clone(),
86                });
87            }
88        }
89
90        if let Some(content) = obj.get("content") {
91            results.extend(extract_tool_uses(content));
92        }
93    }
94
95    if let Some(arr) = value.as_array() {
96        for item in arr {
97            results.extend(extract_tool_uses(item));
98        }
99    }
100
101    results
102}
103
104/// Linear scan from text start required because JSON string state depends on all preceding characters.
105fn is_inside_json_string(text: &str, pos: usize) -> bool {
106    let bytes = text.as_bytes();
107    let mut in_string = false;
108    let mut escape_next = false;
109
110    for (i, &ch) in bytes.iter().enumerate() {
111        if i >= pos {
112            return in_string;
113        }
114
115        if escape_next {
116            escape_next = false;
117            continue;
118        }
119
120        if ch == b'\\' && in_string {
121            escape_next = true;
122            continue;
123        }
124
125        if ch == b'"' {
126            in_string = !in_string;
127        }
128    }
129
130    in_string
131}
132
133/// Prevents leaving partial JSON in remaining text by finding complete outermost structure.
134fn find_outermost_json_containing(
135    text: &str,
136    search_start: usize,
137    marker_pos: usize,
138) -> Option<(usize, usize, Value)> {
139    let search_region = &text[search_start..marker_pos];
140
141    for (offset, _) in search_region.match_indices('{') {
142        let json_start = search_start + offset;
143        let Some(json_end) = find_json_end(text, json_start) else {
144            continue;
145        };
146
147        if json_end <= marker_pos {
148            continue;
149        }
150
151        let json_str = &text[json_start..json_end];
152        if let Ok(parsed) = serde_json::from_str::<Value>(json_str) {
153            let extracted = extract_tool_uses(&parsed);
154            if !extracted.is_empty() {
155                return Some((json_start, json_end, parsed));
156            }
157        }
158    }
159
160    None
161}
162
163/// Parse JSON tool calls from text containing `"type":"tool_use"` structures.
164///
165/// This parser extracts tool calls that match the Anthropic/Claude format.
166/// Tool calls can appear as:
167/// - Standalone JSON objects: `{"type":"tool_use","id":"toolu_xxx","name":"tool_name","input":{...}}`
168/// - Inside a message object's content array: `{"content":[{"type":"tool_use",...}],...}`
169/// - Multiple tool calls mixed with regular text
170///
171/// Returns extracted tool calls and remaining text with JSON tool calls removed.
172pub fn parse_json_tool_calls(text: &str) -> Result<(Vec<ToolUseData>, String)> {
173    let mut tool_calls = Vec::new();
174    let mut remaining_text = String::new();
175    let mut last_end = 0;
176
177    let tool_use_marker = "\"type\":\"tool_use\"";
178    let mut search_pos = 0;
179
180    while let Some(marker_pos) = text[search_pos..].find(tool_use_marker) {
181        let abs_marker_pos = search_pos + marker_pos;
182
183        // Prevents false extraction when tool call syntax appears in string parameter values
184        if is_inside_json_string(text, abs_marker_pos) {
185            search_pos = abs_marker_pos + tool_use_marker.len();
186            continue;
187        }
188
189        let Some((json_start, json_end, parsed)) =
190            find_outermost_json_containing(text, last_end, abs_marker_pos)
191        else {
192            search_pos = abs_marker_pos + tool_use_marker.len();
193            continue;
194        };
195
196        let extracted = extract_tool_uses(&parsed);
197
198        remaining_text.push_str(&text[last_end..json_start]);
199        tool_calls.extend(extracted);
200        last_end = json_end;
201        search_pos = json_end;
202    }
203
204    remaining_text.push_str(&text[last_end..]);
205
206    debug!(
207        tool_count = tool_calls.len(),
208        remaining_len = remaining_text.len(),
209        "Parsed JSON tool calls"
210    );
211
212    Ok((tool_calls, remaining_text.trim().to_string()))
213}
214
215#[cfg(test)]
216mod tests {
217    use super::*;
218
219    #[test]
220    fn test_single_standalone_tool_call() {
221        let input = r#"{"type":"tool_use","id":"toolu_123","name":"test_tool","input":{"param1":"value1"}}"#;
222
223        let (calls, remaining) = parse_json_tool_calls(input).unwrap();
224
225        assert_eq!(calls.len(), 1);
226        assert_eq!(calls[0].id, "toolu_123");
227        assert_eq!(calls[0].name, "test_tool");
228        assert_eq!(calls[0].arguments["param1"], "value1");
229        assert!(remaining.is_empty());
230    }
231
232    #[test]
233    fn test_tool_calls_in_content_array() {
234        let input = r#"{"id":"msg_01","type":"message","role":"assistant","content":[{"type":"tool_use","id":"toolu_01K","name":"manage_task_list","input":{"title":"Test","tasks":[]}},{"type":"tool_use","id":"toolu_01L","name":"set_tracked_files","input":{"file_paths":[]}}],"model":"claude-opus-4-5-20251101"}"#;
235
236        let (calls, remaining) = parse_json_tool_calls(input).unwrap();
237
238        assert_eq!(calls.len(), 2);
239        assert_eq!(calls[0].id, "toolu_01K");
240        assert_eq!(calls[0].name, "manage_task_list");
241        assert_eq!(calls[1].id, "toolu_01L");
242        assert_eq!(calls[1].name, "set_tracked_files");
243        assert!(remaining.is_empty());
244    }
245
246    #[test]
247    fn test_tool_calls_mixed_with_text() {
248        let input = r#"Here is some text before.
249{"type":"tool_use","id":"toolu_abc","name":"my_tool","input":{"key":"value"}}
250And some text after."#;
251
252        let (calls, remaining) = parse_json_tool_calls(input).unwrap();
253
254        assert_eq!(calls.len(), 1);
255        assert_eq!(calls[0].name, "my_tool");
256        assert!(remaining.contains("Here is some text before."));
257        assert!(remaining.contains("And some text after."));
258    }
259
260    #[test]
261    fn test_no_tool_calls() {
262        let input = "Just regular text without any tool calls";
263        let (calls, remaining) = parse_json_tool_calls(input).unwrap();
264
265        assert!(calls.is_empty());
266        assert_eq!(remaining, input);
267    }
268
269    #[test]
270    fn test_invalid_json_gracefully_skipped() {
271        let input = r#"{"type":"tool_use","id":"incomplete"#;
272        let (calls, remaining) = parse_json_tool_calls(input).unwrap();
273
274        assert!(calls.is_empty());
275        assert_eq!(remaining, input);
276    }
277
278    #[test]
279    fn test_missing_id_generates_uuid() {
280        let input = r#"{"type":"tool_use","name":"no_id_tool","input":{"a":1}}"#;
281
282        let (calls, _) = parse_json_tool_calls(input).unwrap();
283
284        assert_eq!(calls.len(), 1);
285        assert_eq!(calls[0].name, "no_id_tool");
286        assert!(!calls[0].id.is_empty());
287    }
288
289    #[test]
290    fn test_nested_json_in_input() {
291        let input = r#"{"type":"tool_use","id":"toolu_nested","name":"complex_tool","input":{"nested":{"deep":{"value":42}},"array":[1,2,3]}}"#;
292
293        let (calls, _) = parse_json_tool_calls(input).unwrap();
294
295        assert_eq!(calls.len(), 1);
296        assert_eq!(calls[0].arguments["nested"]["deep"]["value"], 42);
297        assert_eq!(calls[0].arguments["array"][1], 2);
298    }
299
300    #[test]
301    fn test_multiple_separate_tool_calls() {
302        let input = r#"First: {"type":"tool_use","id":"t1","name":"tool1","input":{}}
303Second: {"type":"tool_use","id":"t2","name":"tool2","input":{}}"#;
304
305        let (calls, remaining) = parse_json_tool_calls(input).unwrap();
306
307        assert_eq!(calls.len(), 2);
308        assert_eq!(calls[0].name, "tool1");
309        assert_eq!(calls[1].name, "tool2");
310        assert!(remaining.contains("First:"));
311        assert!(remaining.contains("Second:"));
312    }
313
314    #[test]
315    fn test_string_with_escaped_quotes() {
316        let input = r#"{"type":"tool_use","id":"t1","name":"test","input":{"message":"He said \"hello\""}}"#;
317
318        let (calls, _) = parse_json_tool_calls(input).unwrap();
319
320        assert_eq!(calls.len(), 1);
321        assert_eq!(calls[0].arguments["message"], "He said \"hello\"");
322    }
323
324    #[test]
325    fn test_nested_tool_call_in_string_parameter() {
326        // A write_file tool call that contains JSON resembling a tool call in its content string
327        // The inner "tool call" should NOT be extracted - it's just string content
328        let input = r#"{"type":"tool_use","id":"outer","name":"write_file","input":{"content":"{\"type\":\"tool_use\",\"id\":\"inner\",\"name\":\"should_not_extract\",\"input\":{}}"}}"#;
329
330        let (calls, remaining) = parse_json_tool_calls(input).unwrap();
331
332        // Should only extract the outer tool call
333        assert_eq!(calls.len(), 1);
334        assert_eq!(calls[0].id, "outer");
335        assert_eq!(calls[0].name, "write_file");
336        // The inner JSON should remain as string content, not extracted
337        let content = calls[0].arguments["content"].as_str().unwrap();
338        assert!(content.contains("should_not_extract"));
339        assert!(remaining.is_empty());
340    }
341
342    #[test]
343    fn test_real_world_example() {
344        let input = r#"{"id":"msg_01FE5LdhP7dTZCT5E9jFz6X9","type":"message","role":"assistant","content":[{"type":"tool_use","id":"toolu_01K2BLo5hGK86Q9NSkw4kbPv","name":"manage_task_list","input":{"title":"Tool test complete","tasks":[{"description":"Await user request","status":"completed"},{"description":"Understand/Explore the code base and propose a comprehensive plan","status":"completed"}]}},{"type":"tool_use","id":"toolu_01LxYAHu8HLb7MJtD5WC73Ur","name":"set_tracked_files","input":{"file_paths":[]}}],"model":"claude-opus-4-5-20251101","stop_reason":"tool_use","stop_sequence":null,"usage":{"input_tokens":4038,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"output_tokens":185,"thinking_tokens":252}}"#;
345
346        let (calls, _) = parse_json_tool_calls(input).unwrap();
347
348        assert_eq!(calls.len(), 2);
349        assert_eq!(calls[0].name, "manage_task_list");
350        assert_eq!(calls[0].id, "toolu_01K2BLo5hGK86Q9NSkw4kbPv");
351        assert_eq!(calls[1].name, "set_tracked_files");
352        assert_eq!(calls[1].id, "toolu_01LxYAHu8HLb7MJtD5WC73Ur");
353    }
354}