Skip to main content

sparrow/provider/
tool_markup.rs

1//! Fallback parser for tool calls emitted as inline markup instead of the
2//! provider's native function-calling JSON.
3//!
4//! Some models — notably DeepSeek served through certain OpenAI-compatible
5//! proxies (e.g. `opencode-go`) — emit tool calls as XML-ish markup inside the
6//! assistant content stream rather than as OpenAI `tool_calls`:
7//!
8//! ```text
9//! <||DSML||tool_calls>
10//! <||DSML||invoke name="read">
11//! <||DSML||parameter name="file_path" string="true">config.py</||DSML||parameter>
12//! </||DSML||invoke>
13//! </||DSML||tool_calls>
14//! ```
15//!
16//! Anthropic-style `<invoke name="...">` blocks use the same shape without the
17//! `||DSML||` token. When the OpenAI-compatible layer sees this in `content`
18//! (with `finish_reason: "stop"`), the tool would otherwise leak to the user as
19//! raw text and never execute. We detect and normalize both forms into real
20//! tool calls.
21
22use regex::Regex;
23use serde_json::{Map, Value};
24
25/// The DeepSeek special token that wraps each tag: `||DSML||` where `|` is
26/// U+FF5C (fullwidth vertical line).
27const DSML_TOKEN: &str = "\u{FF5C}\u{FF5C}DSML\u{FF5C}\u{FF5C}";
28
29#[derive(Debug, Clone, PartialEq)]
30pub struct ParsedToolCall {
31    pub name: String,
32    pub args: Value,
33}
34
35/// Cheap pre-check: does this text contain inline tool-call markup we can parse?
36/// Used to (a) decide whether to suppress the raw text from the user and
37/// (b) whether to run the (more expensive) full extraction at stream end.
38///
39/// B3: require a COMPLETE structure (opening *and* closing tag), not just two
40/// loose substrings. Prose that merely mentions `<invoke name="…">` (a code
41/// review, documentation) must NOT be suppressed and re-parsed as phantom
42/// tool calls.
43pub fn looks_like_tool_markup(text: &str) -> bool {
44    let stripped = strip_dsml(text);
45    // Anthropic-style / DSML `<invoke …>…</invoke>` block, fully closed.
46    let invoke_closed = (stripped.contains("<invoke ") || stripped.contains("<invoke\t"))
47        && stripped.contains("name=")
48        && stripped.contains("</invoke>");
49    let trimmed = stripped.trim();
50    let fenced_json = trimmed.starts_with("```json")
51        && trimmed.ends_with("```")
52        && (stripped.contains("\"arguments\"") || stripped.contains("\"args\""));
53    let bracketed_tool = stripped.contains("[TOOL_CALL]") && stripped.contains("[/TOOL_CALL]");
54    let deepseek_tool =
55        stripped.contains("<|tool▁call▁begin|>") && stripped.contains("<|tool▁call▁end|>");
56    invoke_closed || fenced_json || bracketed_tool || deepseek_tool
57}
58
59/// True while the first streamed content bytes could still become one of the
60/// supported tool-call markup forms. Used by streaming adapters to avoid
61/// leaking partial DSML/DeepSeek tags before `looks_like_tool_markup()` can see
62/// a complete closed block.
63pub fn could_be_tool_markup_prefix(text: &str) -> bool {
64    let stripped = strip_dsml(text);
65    let trimmed = stripped.trim_start();
66    if trimmed.is_empty() {
67        return true;
68    }
69    const STARTS: &[&str] = &[
70        "<invoke",
71        "[TOOL_CALL]",
72        "```json",
73        "<|tool▁call",
74        "<|tool▁calls",
75    ];
76    STARTS
77        .iter()
78        .any(|start| start.starts_with(trimmed) || trimmed.starts_with(start))
79}
80
81fn strip_dsml(text: &str) -> String {
82    // Remove the exact DSML token so `<||DSML||invoke ...>` becomes
83    // `<invoke ...>`. We never touch parameter *values* because the token only
84    // appears as a tag prefix.
85    text.replace(DSML_TOKEN, "")
86}
87
88/// Coerce a parameter's raw text into a JSON value.
89///
90/// B2: when the markup explicitly declares the value is a string
91/// (`string="true"`, as DSML emits), NEVER coerce to number/bool — `"123"`
92/// must stay the string "123". And NEVER `trim()` a string value: a file's
93/// `content` keeps its leading/trailing whitespace and newlines, which trim
94/// would silently corrupt.
95fn coerce(raw: &str, declared_string: bool) -> Value {
96    if declared_string {
97        return Value::String(raw.to_string());
98    }
99    let t = raw.trim();
100    if t == "true" {
101        return Value::Bool(true);
102    }
103    if t == "false" {
104        return Value::Bool(false);
105    }
106    if let Ok(i) = t.parse::<i64>() {
107        return Value::from(i);
108    }
109    if let Ok(f) = t.parse::<f64>() {
110        // Keep integers integral; only use float when it really is one.
111        if t.contains('.') {
112            return Value::from(f);
113        }
114    }
115    // Untyped string value: preserve verbatim (no trim) — see B2.
116    Value::String(raw.to_string())
117}
118
119/// Extract supported inline tool-call formats into structured tool calls.
120///
121/// I4: keep this as a small parser registry. The first parser that finds calls
122/// wins, so a prose block containing examples from another format cannot mix
123/// accidental partial matches into the real call.
124pub fn extract_tool_calls(text: &str) -> Vec<ParsedToolCall> {
125    for parser in [
126        parse_invoke_tool_calls as fn(&str) -> Vec<ParsedToolCall>,
127        parse_tool_call_blocks,
128        parse_json_fences,
129        parse_deepseek_tool_calls,
130    ] {
131        let calls = parser(text);
132        if !calls.is_empty() {
133            return calls;
134        }
135    }
136    Vec::new()
137}
138
139fn parse_invoke_tool_calls(text: &str) -> Vec<ParsedToolCall> {
140    let cleaned = strip_dsml(text);
141    let invoke_re = Regex::new(r#"(?s)<invoke\s+name="([^"]+)"\s*>(.*?)</invoke>"#)
142        .expect("static invoke regex");
143    // Capture the attribute blob (group 2) so we can honour `string="true"`.
144    let param_re = Regex::new(r#"(?s)<parameter\s+name="([^"]+)"([^>]*)>(.*?)</parameter>"#)
145        .expect("static parameter regex");
146
147    let mut calls = Vec::new();
148    for inv in invoke_re.captures_iter(&cleaned) {
149        let name = inv[1].trim().to_string();
150        let body = &inv[2];
151        let mut args = Map::new();
152        for p in param_re.captures_iter(body) {
153            let pname = p[1].trim().to_string();
154            let declared_string = p[2].contains("string=\"true\"");
155            let pval = coerce(&p[3], declared_string);
156            args.insert(pname, pval);
157        }
158        if !name.is_empty() {
159            calls.push(ParsedToolCall {
160                name,
161                args: Value::Object(args),
162            });
163        }
164    }
165    calls
166}
167
168fn parse_tool_call_blocks(text: &str) -> Vec<ParsedToolCall> {
169    let block_re = Regex::new(r#"(?s)\[TOOL_CALL\](.*?)\[/TOOL_CALL\]"#)
170        .expect("static tool-call block regex");
171    block_re
172        .captures_iter(text)
173        .filter_map(|cap| serde_json::from_str::<Value>(cap[1].trim()).ok())
174        .filter_map(call_from_json)
175        .collect()
176}
177
178fn parse_json_fences(text: &str) -> Vec<ParsedToolCall> {
179    let trimmed = text.trim();
180    if !trimmed.starts_with("```json") || !trimmed.ends_with("```") {
181        return Vec::new();
182    }
183    let fence_re = Regex::new(r#"(?s)```json\s*(.*?)\s*```"#).expect("static json fence regex");
184    fence_re
185        .captures_iter(text)
186        .filter_map(|cap| serde_json::from_str::<Value>(cap[1].trim()).ok())
187        .flat_map(calls_from_json_value)
188        .collect()
189}
190
191fn parse_deepseek_tool_calls(text: &str) -> Vec<ParsedToolCall> {
192    let call_re = Regex::new(r#"(?s)<|tool▁call▁begin|>(.*?)<|tool▁call▁end|>"#)
193        .expect("static deepseek tool-call regex");
194    call_re
195        .captures_iter(text)
196        .filter_map(|cap| {
197            let body = cap[1].trim();
198            let (maybe_name, json_text) = match body.split_once("<|tool▁sep|>") {
199                Some((name, json)) => (Some(name.trim()), json.trim()),
200                None => (None, body),
201            };
202            let value = serde_json::from_str::<Value>(json_text).ok()?;
203            if let Some(call) = call_from_json(value.clone()) {
204                return Some(call);
205            }
206            let name = maybe_name?.trim();
207            if name.is_empty() || name == "function" {
208                return None;
209            }
210            Some(ParsedToolCall {
211                name: name.to_string(),
212                args: value,
213            })
214        })
215        .collect()
216}
217
218fn calls_from_json_value(value: Value) -> Vec<ParsedToolCall> {
219    match value {
220        Value::Array(items) => items.into_iter().filter_map(call_from_json).collect(),
221        other => call_from_json(other).into_iter().collect(),
222    }
223}
224
225fn call_from_json(value: Value) -> Option<ParsedToolCall> {
226    let obj = value.as_object()?;
227
228    if let Some(function) = obj.get("function").and_then(Value::as_object) {
229        let name = function.get("name").and_then(Value::as_str)?;
230        let args = function
231            .get("arguments")
232            .cloned()
233            .or_else(|| obj.get("arguments").cloned())
234            .unwrap_or_else(|| Value::Object(Map::new()));
235        return Some(ParsedToolCall {
236            name: name.to_string(),
237            args: normalize_args(args),
238        });
239    }
240
241    let name = obj
242        .get("name")
243        .or_else(|| obj.get("tool"))
244        .or_else(|| obj.get("tool_name"))
245        .and_then(Value::as_str)?;
246    let args = obj
247        .get("arguments")
248        .or_else(|| obj.get("args"))
249        .or_else(|| obj.get("input"))
250        .cloned()
251        .unwrap_or_else(|| Value::Object(Map::new()));
252    Some(ParsedToolCall {
253        name: name.to_string(),
254        args: normalize_args(args),
255    })
256}
257
258fn normalize_args(args: Value) -> Value {
259    match args {
260        Value::String(s) => serde_json::from_str::<Value>(&s).unwrap_or(Value::String(s)),
261        other => other,
262    }
263}
264
265#[cfg(test)]
266mod tests {
267    use super::*;
268
269    const SAMPLE: &str = "<\u{FF5C}\u{FF5C}DSML\u{FF5C}\u{FF5C}tool_calls>\n<\u{FF5C}\u{FF5C}DSML\u{FF5C}\u{FF5C}invoke name=\"read\">\n<\u{FF5C}\u{FF5C}DSML\u{FF5C}\u{FF5C}parameter name=\"file_path\" string=\"true\">config.py</\u{FF5C}\u{FF5C}DSML\u{FF5C}\u{FF5C}parameter>\n</\u{FF5C}\u{FF5C}DSML\u{FF5C}\u{FF5C}invoke>\n</\u{FF5C}\u{FF5C}DSML\u{FF5C}\u{FF5C}tool_calls>";
270
271    #[test]
272    fn detects_dsml_markup() {
273        assert!(looks_like_tool_markup(SAMPLE));
274        assert!(!looks_like_tool_markup(
275            "just a normal answer about config.py"
276        ));
277    }
278
279    #[test]
280    fn parses_dsml_single_tool() {
281        let calls = extract_tool_calls(SAMPLE);
282        assert_eq!(calls.len(), 1);
283        assert_eq!(calls[0].name, "read");
284        assert_eq!(calls[0].args["file_path"], "config.py");
285    }
286
287    #[test]
288    fn parses_anthropic_style_without_dsml() {
289        let text = r#"<invoke name="fs_write">
290<parameter name="path">reverse.py</parameter>
291<parameter name="content">def f(): pass</parameter>
292</invoke>"#;
293        let calls = extract_tool_calls(text);
294        assert_eq!(calls.len(), 1);
295        assert_eq!(calls[0].name, "fs_write");
296        assert_eq!(calls[0].args["path"], "reverse.py");
297        assert_eq!(calls[0].args["content"], "def f(): pass");
298    }
299
300    #[test]
301    fn parses_multiple_invokes() {
302        let text = r#"<invoke name="a"><parameter name="x">1</parameter></invoke>
303<invoke name="b"><parameter name="y">two</parameter></invoke>"#;
304        let calls = extract_tool_calls(text);
305        assert_eq!(calls.len(), 2);
306        assert_eq!(calls[0].name, "a");
307        assert_eq!(calls[0].args["x"], 1);
308        assert_eq!(calls[1].name, "b");
309        assert_eq!(calls[1].args["y"], "two");
310    }
311
312    #[test]
313    fn ignores_plain_text() {
314        assert!(extract_tool_calls("no tools here, just prose").is_empty());
315    }
316
317    #[test]
318    fn i4_parses_json_tool_call_fence() {
319        let text = r#"```json
320{"name":"fs_write","arguments":{"path":"poeme.txt","content":"salut"}}
321```"#;
322        assert!(looks_like_tool_markup(text));
323        let calls = extract_tool_calls(text);
324        assert_eq!(calls.len(), 1);
325        assert_eq!(calls[0].name, "fs_write");
326        assert_eq!(calls[0].args["path"], "poeme.txt");
327        assert_eq!(calls[0].args["content"], "salut");
328    }
329
330    #[test]
331    fn i4_does_not_parse_embedded_json_example_as_tool_call() {
332        let text = r#"Here is the format:
333```json
334{"name":"read","arguments":{"file_path":"config.py"}}
335```
336Use it carefully."#;
337        assert!(!looks_like_tool_markup(text));
338        assert!(extract_tool_calls(text).is_empty());
339    }
340
341    #[test]
342    fn i4_parses_bracketed_tool_call() {
343        let text =
344            r#"[TOOL_CALL]{"name":"read","arguments":{"file_path":"config.py"}}[/TOOL_CALL]"#;
345        assert!(looks_like_tool_markup(text));
346        let calls = extract_tool_calls(text);
347        assert_eq!(calls.len(), 1);
348        assert_eq!(calls[0].name, "read");
349        assert_eq!(calls[0].args["file_path"], "config.py");
350    }
351
352    #[test]
353    fn i4_parses_deepseek_native_tool_call_json() {
354        let text = r#"<|tool▁calls▁begin|><|tool▁call▁begin|>{"name":"read","arguments":{"file_path":"src/main.rs"}}<|tool▁call▁end|><|tool▁calls▁end|>"#;
355        assert!(looks_like_tool_markup(text));
356        let calls = extract_tool_calls(text);
357        assert_eq!(calls.len(), 1);
358        assert_eq!(calls[0].name, "read");
359        assert_eq!(calls[0].args["file_path"], "src/main.rs");
360    }
361
362    #[test]
363    fn i4_parses_deepseek_native_tool_call_with_separator() {
364        let text = r#"<|tool▁call▁begin|>fs_write<|tool▁sep|>{"path":"a.txt","content":"ok"}<|tool▁call▁end|>"#;
365        let calls = extract_tool_calls(text);
366        assert_eq!(calls.len(), 1);
367        assert_eq!(calls[0].name, "fs_write");
368        assert_eq!(calls[0].args["path"], "a.txt");
369    }
370
371    #[test]
372    fn i4_parses_openai_function_shape_with_string_arguments() {
373        let text = r#"```json
374{"function":{"name":"read","arguments":"{\"file_path\":\"Cargo.toml\"}"}}
375```"#;
376        let calls = extract_tool_calls(text);
377        assert_eq!(calls.len(), 1);
378        assert_eq!(calls[0].name, "read");
379        assert_eq!(calls[0].args["file_path"], "Cargo.toml");
380    }
381
382    #[test]
383    fn b1_detects_partial_tool_markup_prefixes() {
384        assert!(could_be_tool_markup_prefix("<"));
385        assert!(could_be_tool_markup_prefix("<invoke name=\"read\""));
386        assert!(could_be_tool_markup_prefix("[TOOL"));
387        assert!(could_be_tool_markup_prefix("```json\n{\"name\""));
388        assert!(could_be_tool_markup_prefix("<|tool▁call▁begin|>"));
389        assert!(!could_be_tool_markup_prefix(
390            "Bonjour <invoke name=\"read\">"
391        ));
392        assert!(!could_be_tool_markup_prefix("plain text"));
393    }
394
395    #[test]
396    fn b3_prose_mentioning_invoke_is_not_treated_as_markup() {
397        // A code review or doc that *talks about* the markup must not be
398        // suppressed and re-parsed as a phantom tool call.
399        let prose = r#"To call a tool, the model emits `<invoke name="read">` —
400note there is no closing tag in this explanation."#;
401        assert!(!looks_like_tool_markup(prose));
402    }
403
404    #[test]
405    fn b3_complete_block_is_detected() {
406        let t = r#"<invoke name="read"><parameter name="p">x</parameter></invoke>"#;
407        assert!(looks_like_tool_markup(t));
408    }
409
410    #[test]
411    fn b2_declared_string_is_not_coerced() {
412        // DSML marks values string="true"; "123" must stay a string.
413        let t = "<\u{FF5C}\u{FF5C}DSML\u{FF5C}\u{FF5C}invoke name=\"x\">\n<\u{FF5C}\u{FF5C}DSML\u{FF5C}\u{FF5C}parameter name=\"n\" string=\"true\">123</\u{FF5C}\u{FF5C}DSML\u{FF5C}\u{FF5C}parameter>\n</\u{FF5C}\u{FF5C}DSML\u{FF5C}\u{FF5C}invoke>";
414        let calls = extract_tool_calls(t);
415        assert_eq!(calls[0].args["n"], Value::String("123".into()));
416    }
417
418    #[test]
419    fn b2_file_content_whitespace_is_preserved() {
420        // A file's content keeps leading/trailing newlines — trim would corrupt it.
421        let t = "<invoke name=\"fs_write\"><parameter name=\"content\">\nline1\nline2\n</parameter></invoke>";
422        let calls = extract_tool_calls(t);
423        assert_eq!(
424            calls[0].args["content"],
425            Value::String("\nline1\nline2\n".into())
426        );
427    }
428}