zagens_core/engine/
tool_parser.rs1use regex::Regex;
25use serde_json::{Value, json};
26use std::sync::OnceLock;
27
28#[derive(Debug, Clone)]
30pub struct ParsedToolCall {
31 pub name: String,
33 pub args: Value,
35 pub id: String,
37}
38
39#[derive(Debug)]
41pub struct ParseResult {
42 pub clean_text: String,
44 pub tool_calls: Vec<ParsedToolCall>,
46}
47
48static TOOL_CALL_REGEX: OnceLock<Regex> = OnceLock::new();
49static XML_TOOL_CALL_REGEX: OnceLock<Regex> = OnceLock::new();
50static INVOKE_REGEX: OnceLock<Regex> = OnceLock::new();
51static THINKING_REGEX: OnceLock<Regex> = OnceLock::new();
52
53fn get_tool_call_regex() -> &'static Regex {
54 TOOL_CALL_REGEX.get_or_init(|| {
55 Regex::new(r"(?s)\[TOOL_CALL\]\s*(.*?)\s*\[/TOOL_CALL\]")
57 .expect("TOOL_CALL regex pattern is valid")
58 })
59}
60
61fn get_xml_tool_call_regex() -> &'static Regex {
62 XML_TOOL_CALL_REGEX.get_or_init(|| {
63 Regex::new(r"(?s)<(?:deepseek:)?tool_call[^>]*>\s*(.*?)\s*</(?:deepseek:)?tool_call>")
65 .expect("XML tool_call regex pattern is valid")
66 })
67}
68
69fn get_invoke_regex() -> &'static Regex {
70 INVOKE_REGEX.get_or_init(|| {
71 Regex::new(r#"(?s)<invoke\s+name\s*=\s*"([^"]+)"[^>]*>(.*?)</invoke>"#)
73 .expect("invoke regex pattern is valid")
74 })
75}
76
77fn get_thinking_regex() -> &'static Regex {
78 THINKING_REGEX.get_or_init(|| {
79 Regex::new(r"(?s)</?(?:think|thinking)[^>]*>").expect("thinking regex pattern is valid")
81 })
82}
83
84pub fn parse_tool_calls(text: &str) -> ParseResult {
87 let mut tool_calls = Vec::new();
88 let mut clean_text = text.to_string();
89 let mut id_counter = 0;
90
91 let thinking_regex = get_thinking_regex();
93 clean_text = thinking_regex.replace_all(&clean_text, "").to_string();
94
95 let regex = get_tool_call_regex();
97 for cap in regex.captures_iter(text) {
98 let (Some(full_match), Some(inner)) = (cap.get(0), cap.get(1)) else {
99 continue;
100 };
101 let full_match = full_match.as_str();
102 let inner = inner.as_str().trim();
103
104 if let Some(parsed) = parse_tool_call_inner(inner, &mut id_counter) {
105 tool_calls.push(parsed);
106 }
107
108 clean_text = clean_text.replace(full_match, "");
109 }
110
111 let xml_regex = get_xml_tool_call_regex();
113 for cap in xml_regex.captures_iter(text) {
114 let (Some(full_match), Some(inner)) = (cap.get(0), cap.get(1)) else {
115 continue;
116 };
117 let full_match = full_match.as_str();
118 let inner = inner.as_str().trim();
119
120 if let Some(parsed) = parse_invoke_block(inner, &mut id_counter) {
122 tool_calls.push(parsed);
123 } else if let Some(parsed) = parse_tool_call_inner(inner, &mut id_counter) {
124 tool_calls.push(parsed);
125 }
126
127 clean_text = clean_text.replace(full_match, "");
128 }
129
130 let invoke_regex = get_invoke_regex();
132 for cap in invoke_regex.captures_iter(&clean_text.clone()) {
133 let (Some(full_match), Some(tool_name), Some(inner)) = (cap.get(0), cap.get(1), cap.get(2))
134 else {
135 continue;
136 };
137 let full_match = full_match.as_str();
138 let tool_name = tool_name.as_str();
139 let inner = inner.as_str();
140
141 let args = parse_xml_parameters(inner);
142 id_counter += 1;
143 tool_calls.push(ParsedToolCall {
144 name: tool_name.to_string(),
145 args,
146 id: format!("xml_tool_{id_counter}"),
147 });
148
149 clean_text = clean_text.replace(full_match, "");
150 }
151
152 clean_text = clean_text
154 .lines()
155 .filter(|line| !line.trim().is_empty())
156 .collect::<Vec<_>>()
157 .join("\n")
158 .trim()
159 .to_string();
160
161 ParseResult {
162 clean_text,
163 tool_calls,
164 }
165}
166
167fn parse_invoke_block(content: &str, id_counter: &mut u32) -> Option<ParsedToolCall> {
169 let invoke_regex = get_invoke_regex();
170 let cap = invoke_regex.captures(content)?;
171
172 let tool_name = cap.get(1)?.as_str();
173 let inner = cap.get(2)?.as_str();
174
175 let args = parse_xml_parameters(inner);
176
177 *id_counter += 1;
178 Some(ParsedToolCall {
179 name: tool_name.to_string(),
180 args,
181 id: format!("xml_tool_{id_counter}"),
182 })
183}
184
185fn parse_xml_parameters(content: &str) -> Value {
187 let param_regex = Regex::new(
188 "<(?:parameter|param)\\s+name\\s*=\\s*\"([^\"]+)\"[^>]*>(.*?)</(?:parameter|param)>",
189 )
190 .ok();
191 let simple_tag_regex =
192 Regex::new("<([a-zA-Z_][a-zA-Z0-9_]*)>(.*?)</([a-zA-Z_][a-zA-Z0-9_]*)>").ok();
193
194 let mut map = serde_json::Map::new();
195
196 if let Some(regex) = param_regex {
198 for cap in regex.captures_iter(content) {
199 if let (Some(name), Some(value)) = (cap.get(1), cap.get(2)) {
200 let name_str = name.as_str();
201 let value_str = value.as_str().trim();
202
203 let json_value = serde_json::from_str(value_str)
205 .unwrap_or_else(|_| Value::String(value_str.to_string()));
206 map.insert(name_str.to_string(), json_value);
207 }
208 }
209 }
210
211 if let Some(regex) = simple_tag_regex {
213 for cap in regex.captures_iter(content) {
214 if let (Some(name), Some(value), Some(close)) = (cap.get(1), cap.get(2), cap.get(3)) {
215 if name.as_str() != close.as_str() {
216 continue;
217 }
218 let name_str = name.as_str();
219 if ["invoke", "tool_call", "parameter", "param"].contains(&name_str) {
221 continue;
222 }
223 let value_str = value.as_str().trim();
224 if !map.contains_key(name_str) {
225 let json_value = serde_json::from_str(value_str)
226 .unwrap_or_else(|_| Value::String(value_str.to_string()));
227 map.insert(name_str.to_string(), json_value);
228 }
229 }
230 }
231 }
232
233 Value::Object(map)
234}
235
236fn parse_tool_call_inner(inner: &str, id_counter: &mut u32) -> Option<ParsedToolCall> {
238 if let Ok(json) = serde_json::from_str::<Value>(inner) {
240 return parse_from_json(&json, id_counter);
241 }
242
243 if let Some(parsed) = parse_arrow_syntax(inner, id_counter) {
245 return Some(parsed);
246 }
247
248 parse_flexible_format(inner, id_counter)
250}
251
252fn parse_from_json(json: &Value, id_counter: &mut u32) -> Option<ParsedToolCall> {
254 let obj = json.as_object()?;
255
256 let name = obj
258 .get("tool")
259 .or_else(|| obj.get("name"))
260 .or_else(|| obj.get("function"))
261 .and_then(|v| v.as_str())?
262 .to_string();
263
264 let args = obj
266 .get("args")
267 .or_else(|| obj.get("arguments"))
268 .or_else(|| obj.get("input"))
269 .or_else(|| obj.get("parameters"))
270 .cloned()
271 .unwrap_or(json!({}));
272
273 *id_counter += 1;
274 Some(ParsedToolCall {
275 name,
276 args,
277 id: format!("text_tool_{id_counter}"),
278 })
279}
280
281fn parse_arrow_syntax(inner: &str, id_counter: &mut u32) -> Option<ParsedToolCall> {
283 let tool_regex = Regex::new(r#"tool\s*=>\s*"([^"]+)""#).ok()?;
285 let name = tool_regex.captures(inner)?.get(1)?.as_str().to_string();
286
287 let args = if let Some(args_start) = inner.find("args =>") {
289 let args_str = inner[args_start + 7..].trim();
290 if let Ok(args_json) = serde_json::from_str::<Value>(args_str) {
292 args_json
293 } else if let Some(brace_start) = args_str.find('{') {
294 let mut brace_count = 0;
296 let mut end_idx = brace_start;
297 for (i, c) in args_str[brace_start..].chars().enumerate() {
298 match c {
299 '{' => brace_count += 1,
300 '}' => {
301 brace_count -= 1;
302 if brace_count == 0 {
303 end_idx = brace_start + i + 1;
304 break;
305 }
306 }
307 _ => {}
308 }
309 }
310 let content = &args_str[brace_start + 1..end_idx - 1];
311
312 if let Ok(json) = serde_json::from_str::<Value>(&format!("{{{content}}}")) {
314 json
315 } else {
316 parse_cli_style_args(content)
318 }
319 } else {
320 json!({})
321 }
322 } else {
323 json!({})
324 };
325
326 *id_counter += 1;
327 Some(ParsedToolCall {
328 name,
329 args,
330 id: format!("text_tool_{id_counter}"),
331 })
332}
333
334fn parse_cli_style_args(content: &str) -> Value {
336 let mut map = serde_json::Map::new();
337
338 let arg_regex =
340 Regex::new(r#"--([a-zA-Z_][a-zA-Z0-9_]*)\s+(?:"([^"]*)"|'([^']*)'|(\S+))"#).ok();
341
342 if let Some(regex) = arg_regex {
343 for cap in regex.captures_iter(content) {
344 if let Some(arg_name) = cap.get(1) {
345 let arg_name = arg_name.as_str();
346 let value = cap
348 .get(2)
349 .or_else(|| cap.get(3))
350 .or_else(|| cap.get(4))
351 .map_or("", |m| m.as_str());
352
353 let json_value = serde_json::from_str(value)
355 .unwrap_or_else(|_| Value::String(value.to_string()));
356 map.insert(arg_name.to_string(), json_value);
357 }
358 }
359 }
360
361 let kv_regex =
363 Regex::new(r#"([a-zA-Z_][a-zA-Z0-9_]*)\s*[:=]\s*(?:"([^"]*)"|'([^']*)'|(\S+))"#).ok();
364 if let Some(regex) = kv_regex {
365 for cap in regex.captures_iter(content) {
366 if let Some(key) = cap.get(1) {
367 let key = key.as_str();
368 if !map.contains_key(key) {
369 let value = cap
370 .get(2)
371 .or_else(|| cap.get(3))
372 .or_else(|| cap.get(4))
373 .map_or("", |m| m.as_str());
374 let json_value = serde_json::from_str(value)
375 .unwrap_or_else(|_| Value::String(value.to_string()));
376 map.insert(key.to_string(), json_value);
377 }
378 }
379 }
380 }
381
382 Value::Object(map)
383}
384
385fn parse_flexible_format(inner: &str, id_counter: &mut u32) -> Option<ParsedToolCall> {
387 let patterns = [(
393 r#"(?:tool|name|function)\s*[:=]\s*"?([a-zA-Z_][a-zA-Z0-9_]*)"?"#,
394 1,
395 )];
396
397 for (pattern, group) in patterns {
398 if let Ok(regex) = Regex::new(pattern)
399 && let Some(cap) = regex.captures(inner)
400 && let Some(name_match) = cap.get(group)
401 {
402 let name = name_match.as_str().to_string();
403
404 let args = extract_json_object(inner).unwrap_or(json!({}));
406
407 *id_counter += 1;
408 return Some(ParsedToolCall {
409 name,
410 args,
411 id: format!("text_tool_{id_counter}"),
412 });
413 }
414 }
415
416 None
417}
418
419fn extract_json_object(text: &str) -> Option<Value> {
421 let start = text.find('{')?;
422 let mut brace_count = 0;
423 let mut end_idx = start;
424
425 for (i, c) in text[start..].chars().enumerate() {
426 match c {
427 '{' => brace_count += 1,
428 '}' => {
429 brace_count -= 1;
430 if brace_count == 0 {
431 end_idx = start + i + 1;
432 break;
433 }
434 }
435 _ => {}
436 }
437 }
438
439 let json_str = &text[start..end_idx];
440 serde_json::from_str(json_str).ok()
441}
442
443pub fn has_tool_call_markers(text: &str) -> bool {
445 text.contains("[TOOL_CALL]")
446 || text.contains("<deepseek:tool_call")
447 || text.contains("<tool_call")
448 || text.contains("<invoke ")
449}
450
451#[cfg(test)]
452mod tests {
453 use super::*;
454
455 #[test]
456 fn test_parse_arrow_syntax() {
457 let text = r#"I'll list the directory.
458[TOOL_CALL]
459{tool => "list_dir", args => {}}
460[/TOOL_CALL]"#;
461
462 let result = parse_tool_calls(text);
463 assert_eq!(result.tool_calls.len(), 1);
464 assert_eq!(result.tool_calls[0].name, "list_dir");
465 assert_eq!(result.clean_text, "I'll list the directory.");
466 }
467
468 #[test]
469 fn test_parse_json_syntax() {
470 let text = r#"Let me check.
471[TOOL_CALL]
472{"tool": "read_file", "args": {"path": "test.txt"}}
473[/TOOL_CALL]"#;
474
475 let result = parse_tool_calls(text);
476 assert_eq!(result.tool_calls.len(), 1);
477 assert_eq!(result.tool_calls[0].name, "read_file");
478 assert_eq!(result.tool_calls[0].args["path"], "test.txt");
479 }
480
481 #[test]
482 fn test_parse_multiple_tool_calls() {
483 let text = r#"First I'll list, then read.
484[TOOL_CALL]
485{tool => "list_dir", args => {}}
486[/TOOL_CALL]
487[TOOL_CALL]
488{tool => "read_file", args => {"path": "file.txt"}}
489[/TOOL_CALL]"#;
490
491 let result = parse_tool_calls(text);
492 assert_eq!(result.tool_calls.len(), 2);
493 assert_eq!(result.tool_calls[0].name, "list_dir");
494 assert_eq!(result.tool_calls[1].name, "read_file");
495 }
496
497 #[test]
498 fn test_no_tool_calls() {
499 let text = "Just some regular text without any tool calls.";
500 let result = parse_tool_calls(text);
501 assert!(result.tool_calls.is_empty());
502 assert_eq!(result.clean_text, text);
503 }
504
505 #[test]
506 fn test_has_markers() {
507 assert!(has_tool_call_markers("[TOOL_CALL]test[/TOOL_CALL]"));
508 assert!(!has_tool_call_markers("no markers here"));
509 }
510}