use super::*;
struct InlineJson;
impl ToolParser for InlineJson {
fn parse(&self, text: &str, _tools: Option<&Value>) -> Result<Vec<ToolCall>, Error> {
let v: Value =
serde_json::from_str(text.trim()).map_err(|e| err(format!("inline_json: {e}")))?;
let name = v
.get("name")
.and_then(Value::as_str)
.ok_or_else(|| err("inline_json: missing name"))?;
let args = v.get("arguments").cloned().unwrap_or(Value::Null);
Ok(obj(name, args))
}
fn name(&self) -> &'static str {
"inline_json_test_parser"
}
fn try_parse_one_call(
&self,
buffer: &str,
tools: Option<&Value>,
) -> Result<Option<(Vec<ToolCall>, usize)>, Error> {
let Some((_, obj_end)) = balanced_json_object_prefix(buffer) else {
return Ok(None);
};
let inner = buffer[..obj_end].trim();
match self.parse(inner, tools) {
Ok(calls) if !calls.is_empty() => Ok(Some((calls, obj_end))),
_ => Ok(Some((Vec::new(), obj_end))),
}
}
}
#[test]
fn streaming_tagged_json_single_chunk() {
let mut p = ToolCallProcessor::new(Box::new(JsonTools), None);
let out = p.process_chunk(r#"<tool_call>{"name": "get_time", "arguments": {}}</tool_call>"#);
assert_eq!(out, None);
assert_eq!(p.tool_calls.len(), 1);
assert_eq!(p.tool_calls[0].name(), "get_time");
assert_eq!(*p.tool_calls[0].arguments(), serde_json::json!({}));
}
#[test]
fn streaming_tagged_json_split_across_chunks() {
let mut p = ToolCallProcessor::new(Box::new(JsonTools), None);
assert_eq!(
p.process_chunk(r#"<tool_call>{"name": "get_weather", "#),
None
);
assert_eq!(p.tool_calls.len(), 0); assert_eq!(
p.process_chunk(r#""arguments": {"city": "Tokyo"}}</tool_call>"#),
None
);
assert_eq!(p.tool_calls.len(), 1);
assert_eq!(p.tool_calls[0].name(), "get_weather");
assert_eq!(
*p.tool_calls[0].arguments(),
serde_json::json!({"city": "Tokyo"})
);
}
#[test]
fn streaming_tagged_json_split_mid_token() {
let mut p = ToolCallProcessor::new(Box::new(JsonTools), None);
assert_eq!(p.process_chunk("<tool_"), None); assert_eq!(p.tool_calls.len(), 0);
assert_eq!(
p.process_chunk(r#"call>{"name": "ping", "arguments": {}}</tool_call>"#),
None
);
assert_eq!(p.tool_calls.len(), 1);
assert_eq!(p.tool_calls[0].name(), "ping");
}
#[test]
fn streaming_leading_text_then_tool_call() {
let mut p = ToolCallProcessor::new(Box::new(JsonTools), None);
let out = p.process_chunk(r#"Let me check. <tool_call>{"name": "ls", "arguments": {}}"#);
assert_eq!(out.as_deref(), Some("Let me check. "));
assert_eq!(p.process_chunk("</tool_call>"), None);
assert_eq!(p.tool_calls.len(), 1);
assert_eq!(p.tool_calls[0].name(), "ls");
}
#[test]
fn streaming_trailing_text_after_end_tag() {
let mut p = ToolCallProcessor::new(Box::new(JsonTools), None);
let out = p.process_chunk(r#"<tool_call>{"name": "ls", "arguments": {}}</tool_call> all done"#);
assert_eq!(out.as_deref(), Some(" all done"));
assert_eq!(p.tool_calls.len(), 1);
assert_eq!(p.tool_calls[0].name(), "ls");
}
#[test]
fn streaming_multiple_tool_calls() {
let mut p = ToolCallProcessor::new(Box::new(JsonTools), None);
let out = p.process_chunk(
r#"<tool_call>{"name": "a", "arguments": {}}</tool_call><tool_call>{"name": "b", "arguments": {}}</tool_call>"#,
);
assert_eq!(out, None);
assert_eq!(p.tool_calls.len(), 2);
assert_eq!(p.tool_calls[0].name(), "a");
assert_eq!(p.tool_calls[1].name(), "b");
}
#[test]
fn streaming_passthrough_no_tool_call() {
let mut p = ToolCallProcessor::new(Box::new(JsonTools), None);
assert_eq!(
p.process_chunk("The capital of France ").as_deref(),
Some("The capital of France ")
);
assert_eq!(p.process_chunk("is Paris.").as_deref(), Some("is Paris."));
p.process_eos();
assert!(p.tool_calls.is_empty());
}
#[test]
fn streaming_false_start_flushed() {
let mut p = ToolCallProcessor::new(Box::new(JsonTools), None);
let out = p.process_chunk("<thinking>hmm</thinking>");
assert_eq!(out.as_deref(), Some("<thinking>hmm</thinking>"));
assert!(p.tool_calls.is_empty());
}
#[test]
fn streaming_false_start_split_then_flushed() {
let mut p = ToolCallProcessor::new(Box::new(JsonTools), None);
assert_eq!(p.process_chunk("<t"), None); let out = p.process_chunk("hinking>");
assert_eq!(out.as_deref(), Some("<thinking>"));
assert!(p.tool_calls.is_empty());
}
#[test]
fn streaming_inline_single_chunk() {
let mut p = ToolCallProcessor::new(Box::new(InlineJson), None);
let out = p.process_chunk(r#"{"name": "now", "arguments": {}}"#);
assert_eq!(out, None);
assert_eq!(p.tool_calls.len(), 1);
assert_eq!(p.tool_calls[0].name(), "now");
}
#[test]
fn streaming_inline_split_across_chunks() {
let mut p = ToolCallProcessor::new(Box::new(InlineJson), None);
assert_eq!(p.process_chunk(r#"{"name": "now", "#), None);
assert_eq!(p.tool_calls.len(), 0);
assert_eq!(p.process_chunk(r#""arguments": {"tz": "UTC"}}"#), None);
assert_eq!(p.tool_calls.len(), 1);
assert_eq!(p.tool_calls[0].name(), "now");
assert_eq!(
*p.tool_calls[0].arguments(),
serde_json::json!({"tz": "UTC"})
);
}
#[test]
fn streaming_inline_leading_text() {
let mut p = ToolCallProcessor::new(Box::new(InlineJson), None);
let out = p.process_chunk(r#"sure {"name": "now", "arguments": {}}"#);
assert_eq!(out.as_deref(), Some("sure "));
assert_eq!(p.tool_calls.len(), 1);
assert_eq!(p.tool_calls[0].name(), "now");
}
#[test]
fn streaming_inline_balanced_non_tool_call_flushed() {
let mut p = ToolCallProcessor::new(Box::new(InlineJson), None);
let out = p.process_chunk(r#"{"unrelated": 1}"#);
assert_eq!(out.as_deref(), Some(r#"{"unrelated": 1}"#));
assert!(p.tool_calls.is_empty());
}
#[test]
fn streaming_inline_no_brace_passthrough() {
let mut p = ToolCallProcessor::new(Box::new(InlineJson), None);
assert_eq!(
p.process_chunk("just plain text").as_deref(),
Some("just plain text")
);
assert!(p.tool_calls.is_empty());
}
#[test]
fn streaming_mistral_eos() {
let mut p = ToolCallProcessor::new(Box::new(Mistral), None);
assert_eq!(
p.process_chunk(r#"[TOOL_CALLS]get_weather[ARGS]{"city": "Tokyo"}"#),
None
);
assert_eq!(p.tool_calls.len(), 0);
p.process_eos();
assert_eq!(p.tool_calls.len(), 1);
assert_eq!(p.tool_calls[0].name(), "get_weather");
assert_eq!(
*p.tool_calls[0].arguments(),
serde_json::json!({"city": "Tokyo"})
);
}
#[test]
fn streaming_eos_noop_when_normal() {
let mut p = ToolCallProcessor::new(Box::new(JsonTools), None);
p.process_chunk("plain text");
p.process_eos();
assert!(p.tool_calls.is_empty());
}
#[test]
fn streaming_malformed_partial_no_panic() {
let mut p = ToolCallProcessor::new(Box::new(JsonTools), None);
assert_eq!(p.process_chunk("<tool_call>{not valid json"), None);
assert!(p.tool_calls.is_empty());
p.process_eos();
assert!(p.tool_calls.is_empty());
}
#[test]
fn streaming_malformed_unicode_chunks_no_panic() {
let mut p = ToolCallProcessor::new(Box::new(JsonTools), None);
let _ = p.process_chunk("héllo <");
let _ = p.process_chunk("tøøl");
let _ = p.process_chunk("</tool_call>");
p.process_eos();
}
#[test]
fn streaming_malformed_inline_garbage_no_panic() {
let mut p = ToolCallProcessor::new(Box::new(InlineJson), None);
assert_eq!(p.process_chunk("{{{ broken"), None);
assert!(p.tool_calls.is_empty());
p.process_eos();
assert!(p.tool_calls.is_empty());
}
#[test]
fn streaming_inline_object_then_suffix_one_chunk() {
let mut p = ToolCallProcessor::new(Box::new(InlineJson), None);
let out = p.process_chunk(r#"{"name":"now","arguments":{}} done"#);
assert_eq!(out.as_deref(), Some(" done"));
assert_eq!(p.tool_calls.len(), 1);
assert_eq!(p.tool_calls[0].name(), "now");
let mut p2 = ToolCallProcessor::new(Box::new(InlineJson), None);
assert_eq!(p2.process_chunk(r#"{"name":"now","arguments":{}}"#), None);
assert_eq!(p2.process_chunk(" done").as_deref(), Some(" done"));
assert_eq!(p2.tool_calls.len(), 1);
assert_eq!(p2.tool_calls[0].name(), "now");
}
#[test]
fn streaming_inline_suffix_is_a_second_tool_call() {
let mut p = ToolCallProcessor::new(Box::new(InlineJson), None);
let out = p.process_chunk(r#"{"name":"a","arguments":{}}{"name":"b","arguments":{}}"#);
assert_eq!(out, None);
assert_eq!(p.tool_calls.len(), 2);
assert_eq!(p.tool_calls[0].name(), "a");
assert_eq!(p.tool_calls[1].name(), "b");
}
#[test]
fn streaming_inline_braces_inside_string_value() {
let mut p = ToolCallProcessor::new(Box::new(InlineJson), None);
let out = p.process_chunk(r#"{"unrelated":"}"}"#);
assert_eq!(out.as_deref(), Some(r#"{"unrelated":"}"}"#));
assert!(p.tool_calls.is_empty());
let mut p2 = ToolCallProcessor::new(Box::new(InlineJson), None);
let out2 = p2.process_chunk(r#"{"name":"echo","arguments":{"s":"a}b{c"}}"#);
assert_eq!(out2, None);
assert_eq!(p2.tool_calls.len(), 1);
assert_eq!(p2.tool_calls[0].name(), "echo");
assert_eq!(
*p2.tool_calls[0].arguments(),
serde_json::json!({"s": "a}b{c"})
);
}
#[test]
fn streaming_inline_unbalanced_stream_is_bounded() {
let mut p = ToolCallProcessor::new(Box::new(InlineJson), None);
assert_eq!(p.process_chunk(r#"{"name":"now","arguments":{"x":""#), None);
let big = "a".repeat(64 * 1024);
let bound = MAX_TOOL_CALL_BUFFER_BYTES + big.len();
let total_fed: usize = 8 * big.len();
for _ in 0..8 {
let _ = p.process_chunk(&big);
assert!(p.tool_call_buffer.len() <= bound);
}
assert!(total_fed > bound);
assert!(p.tool_calls.is_empty());
assert_eq!(p.tool_call_buffer.len(), 0);
let out = p.process_chunk(r#"{"name":"ok","arguments":{}}"#);
assert_eq!(out, None);
assert_eq!(p.tool_calls.len(), 1);
assert_eq!(p.tool_calls[0].name(), "ok");
}
#[test]
fn streaming_tagged_missing_end_tag_is_bounded() {
let mut p = ToolCallProcessor::new(Box::new(JsonTools), None);
assert_eq!(p.process_chunk(r#"<tool_call>{"name":"now""#), None);
let big = "b".repeat(64 * 1024);
let bound = MAX_TOOL_CALL_BUFFER_BYTES + big.len();
for _ in 0..8 {
let _ = p.process_chunk(&big);
assert!(p.tool_call_buffer.len() <= bound);
}
assert_eq!(p.tool_call_buffer.len(), 0);
assert!(p.tool_calls.is_empty());
}
#[test]
fn streaming_mistral_empty_end_tag_is_bounded() {
let mut p = ToolCallProcessor::new(Box::new(Mistral), None);
assert_eq!(
p.process_chunk(r#"[TOOL_CALLS]get_weather[ARGS]{"city":""#),
None
);
let big = "c".repeat(64 * 1024);
let bound = MAX_TOOL_CALL_BUFFER_BYTES + big.len();
for _ in 0..8 {
let _ = p.process_chunk(&big);
assert!(p.tool_call_buffer.len() <= bound);
}
assert_eq!(p.tool_call_buffer.len(), 0);
p.process_eos();
assert!(p.tool_calls.is_empty());
}
#[test]
fn streaming_many_back_to_back_tagged_calls_no_stack_overflow() {
const N: usize = 4000;
let mut chunk = String::with_capacity(N * 56);
for i in 0..N {
chunk.push_str(&format!(
r#"<tool_call>{{"name":"f","arguments":{{"i":{i}}}}}</tool_call>"#
));
}
assert!(chunk.len() <= MAX_TOOL_CALL_BUFFER_BYTES);
let mut p = ToolCallProcessor::new(Box::new(JsonTools), None);
let out = p.process_chunk(&chunk);
assert_eq!(out, None);
assert_eq!(p.tool_calls.len(), N);
for (idx, call) in p.tool_calls.iter().enumerate() {
assert_eq!(call.name(), "f");
assert_eq!(*call.arguments(), serde_json::json!({ "i": idx }));
}
}
fn run_tagged_stream(chunks: &[&str]) -> (String, Vec<ToolCall>) {
let mut p = ToolCallProcessor::new(Box::new(JsonTools), None);
let mut display = String::new();
for c in chunks {
if let Some(d) = p.process_chunk(c) {
display.push_str(&d);
}
}
p.process_eos();
(display, p.tool_calls)
}
#[test]
fn streaming_tagged_leading_text_is_boundary_equivalent() {
let whole = r#"Let me check. <tool_call>{"name":"ls","arguments":{}}</tool_call>"#;
let (d_whole, calls_whole) = run_tagged_stream(&[whole]);
let (d_split, calls_split) = run_tagged_stream(&[
"Let me check. ",
r#"<tool_call>{"name":"ls","arguments":{}}</tool_call>"#,
]);
assert_eq!(d_whole, "Let me check. ");
assert_eq!(d_whole, d_split);
assert_eq!(calls_whole.len(), 1);
assert_eq!(calls_split.len(), 1);
assert_eq!(calls_whole[0].name, "ls");
assert_eq!(calls_split[0].name, "ls");
let (d_mid, calls_mid) = run_tagged_stream(&[
"Let me ",
r#"check. <tool_call>{"name":"ls","arguments":{}}</tool_call>"#,
]);
assert_eq!(d_mid, "Let me check. ");
assert_eq!(calls_mid.len(), 1);
}
#[test]
fn streaming_tagged_display_text_between_two_calls() {
let whole = concat!(
r#"<tool_call>{"name":"a","arguments":{}}</tool_call>"#,
" and then ",
r#"<tool_call>{"name":"b","arguments":{}}</tool_call>"#,
);
let (d_whole, calls_whole) = run_tagged_stream(&[whole]);
assert_eq!(d_whole, " and then ");
assert_eq!(calls_whole.len(), 2);
assert_eq!(calls_whole[0].name, "a");
assert_eq!(calls_whole[1].name, "b");
let (d_split, calls_split) = run_tagged_stream(&[
r#"<tool_call>{"name":"a","arguments":{}}</tool_call>"#,
" and then ",
r#"<tool_call>{"name":"b","arguments":{}}</tool_call>"#,
]);
assert_eq!(d_split, " and then ");
assert_eq!(calls_split.len(), 2);
assert_eq!(calls_split[1].name, "b");
}
#[test]
fn streaming_tagged_end_delimiter_inside_json_string_value() {
let mut p = ToolCallProcessor::new(Box::new(JsonTools), None);
let out =
p.process_chunk(r#"<tool_call>{"name":"echo","arguments":{"s":"</tool_call>"}}</tool_call>"#);
assert_eq!(out, None, "no suffix may leak as display text");
assert_eq!(p.tool_calls.len(), 1, "the call must not be discarded");
assert_eq!(p.tool_calls[0].name(), "echo");
assert_eq!(
*p.tool_calls[0].arguments(),
serde_json::json!({"s": "</tool_call>"}),
"the delimiter inside the string argument is preserved verbatim"
);
}
#[test]
fn streaming_tagged_end_delimiter_in_string_split_across_chunks() {
let mut p = ToolCallProcessor::new(Box::new(JsonTools), None);
assert_eq!(
p.process_chunk(r#"<tool_call>{"name":"echo","arguments":{"s":"<"#),
None
);
assert_eq!(p.tool_calls.len(), 0);
assert_eq!(p.process_chunk(r#"/tool_call>"}}</tool_call>"#), None);
assert_eq!(p.tool_calls.len(), 1);
assert_eq!(p.tool_calls[0].name(), "echo");
assert_eq!(
*p.tool_calls[0].arguments(),
serde_json::json!({"s": "</tool_call>"})
);
}
#[test]
fn streaming_tagged_end_delimiter_in_string_then_trailing_text() {
let mut p = ToolCallProcessor::new(Box::new(JsonTools), None);
let out = p.process_chunk(
r#"<tool_call>{"name":"echo","arguments":{"s":"</tool_call>"}}</tool_call> done"#,
);
assert_eq!(out.as_deref(), Some(" done"));
assert_eq!(p.tool_calls.len(), 1);
assert_eq!(p.tool_calls[0].name(), "echo");
assert_eq!(
*p.tool_calls[0].arguments(),
serde_json::json!({"s": "</tool_call>"})
);
}
#[test]
fn per_parser_try_parse_one_call_routing() {
{
let buf = r#"<tool_call>{"name":"echo","arguments":{"s":"</tool_call>"}}</tool_call>"#;
let (calls, end_pos) = JsonTools
.try_parse_one_call(buf, None)
.expect("Ok")
.expect("Some");
assert_eq!(end_pos, buf.len(), "end_pos lands at buffer end");
assert_eq!(calls.len(), 1, "one call extracted intact");
assert_eq!(calls[0].name(), "echo");
assert_eq!(
*calls[0].arguments(),
serde_json::json!({"s": "</tool_call>"}),
"in-string end-tag literal preserved verbatim"
);
assert!(matches!(
JsonTools.try_parse_one_call(r#"<tool_call>{"s":"</tool_call>"#, None),
Ok(None)
));
}
{
let buf = r#"<tool_call>{"s":"</tool_call>"}</tool_call>"#;
let (_, end_pos) = Glm47
.try_parse_one_call(buf, None)
.expect("Ok")
.expect("Some");
assert_eq!(end_pos, buf.len());
let buf = r#"<tool_call>[{"s":"</tool_call>"}]</tool_call>"#;
let (_, end_pos) = Glm47
.try_parse_one_call(buf, None)
.expect("Ok")
.expect("Some");
assert_eq!(end_pos, buf.len());
let xml = "<tool_call>name<arg_key>k</arg_key><arg_value>v</arg_value></tool_call>";
let (_, end_pos) = Glm47
.try_parse_one_call(xml, None)
.expect("Ok")
.expect("Some");
assert_eq!(end_pos, xml.len());
assert!(matches!(
Glm47.try_parse_one_call(r#"<tool_call>[{"s":"</tool_call>"#, None),
Ok(None)
));
}
{
let buf = r#"<longcat_tool_call>{"s":"</longcat_tool_call>"}</longcat_tool_call>"#;
let (_, end_pos) = Longcat
.try_parse_one_call(buf, None)
.expect("Ok")
.expect("Some");
assert_eq!(end_pos, buf.len());
let xml = "<longcat_tool_call>name<longcat_arg_key>k</longcat_arg_key><longcat_arg_value>v</longcat_arg_value></longcat_tool_call>";
let (_, end_pos) = Longcat
.try_parse_one_call(xml, None)
.expect("Ok")
.expect("Some");
assert_eq!(end_pos, xml.len());
}
{
let buf = "<|tool_call_start|>[echo(s='<|tool_call_end|>')]<|tool_call_end|>";
let (_, end_pos) = Pythonic
.try_parse_one_call(buf, None)
.expect("Ok")
.expect("Some");
assert_eq!(end_pos, buf.len());
let buf = r#"<|tool_call_start|>[echo(s="<|tool_call_end|>")]<|tool_call_end|>"#;
let (_, end_pos) = Pythonic
.try_parse_one_call(buf, None)
.expect("Ok")
.expect("Some");
assert_eq!(end_pos, buf.len());
assert!(matches!(
Pythonic.try_parse_one_call("<|tool_call_start|>[echo(s='[", None),
Ok(None)
));
}
{
let buf =
"<tool_call><function=echo><parameter=s></tool_call></parameter></function></tool_call>";
let (_, end_pos) = Qwen3Coder
.try_parse_one_call(buf, None)
.expect("Ok")
.expect("Some");
assert_eq!(end_pos, buf.len());
}
{
let buf = concat!(
"<minimax:tool_call>",
r#"<invoke name="f"><parameter name="p">v</parameter></invoke>"#,
"</minimax:tool_call>",
);
let (_, end_pos) = MinimaxM2
.try_parse_one_call(buf, None)
.expect("Ok")
.expect("Some");
assert_eq!(end_pos, buf.len());
let buf = concat!(
"<minimax:tool_call>",
r#"<invoke name="f"><parameter name="p"></minimax:tool_call></parameter></invoke>"#,
"</minimax:tool_call>",
);
let (_, end_pos) = MinimaxM2
.try_parse_one_call(buf, None)
.expect("Ok")
.expect("Some");
assert_eq!(end_pos, buf.len());
}
{
let buf = concat!(
"<|tool_calls_section_begin|>",
"<|tool_call_begin|>functions.f:0<|tool_call_argument_begin|>",
r#"{"s":"<|tool_calls_section_end|>"}"#,
"<|tool_call_end|>",
"<|tool_calls_section_end|>",
);
let (_, end_pos) = KimiK2
.try_parse_one_call(buf, None)
.expect("Ok")
.expect("Some");
assert_eq!(end_pos, buf.len());
}
{
let buf =
"<start_function_call>call:f{k:<escape><end_function_call><escape>}<end_function_call>";
let (_, end_pos) = FunctionGemma
.try_parse_one_call(buf, None)
.expect("Ok")
.expect("Some");
assert_eq!(end_pos, buf.len());
}
{
let buf = r#"<|tool_call>call:f{k: <|"|><tool_call|><|"|>}<tool_call|>"#;
let (_, end_pos) = Gemma4
.try_parse_one_call(buf, None)
.expect("Ok")
.expect("Some");
assert_eq!(end_pos, buf.len());
}
}
#[test]
fn payload_starts_with_json_value_classification() {
let cases: &[(&str, JsonPayloadStart)] = &[
("", JsonPayloadStart::None),
(" ", JsonPayloadStart::None),
("\t\n\r ", JsonPayloadStart::None),
("<", JsonPayloadStart::None),
("<invoke>", JsonPayloadStart::None),
("name ", JsonPayloadStart::None),
("123", JsonPayloadStart::None),
(r#""str""#, JsonPayloadStart::None),
("null", JsonPayloadStart::None),
("{}", JsonPayloadStart::Object),
("{\"k\":1}", JsonPayloadStart::Object),
(" {\"k\":1}", JsonPayloadStart::Object),
("\n\t{}", JsonPayloadStart::Object),
("[]", JsonPayloadStart::Array),
("[1,2,3]", JsonPayloadStart::Array),
(" [{\"a\":1}]", JsonPayloadStart::Array),
("\n\t[]", JsonPayloadStart::Array),
(" é", JsonPayloadStart::None),
];
for (input, expected) in cases {
assert_eq!(
classify_json_payload_start(input),
*expected,
"classify_json_payload_start({input:?})"
);
}
}
#[test]
fn balanced_json_object_prefix_basics() {
assert_eq!(balanced_json_object_prefix(""), None);
assert_eq!(balanced_json_object_prefix("plain text"), None);
assert_eq!(balanced_json_object_prefix("{}"), Some((0, 2)));
assert_eq!(
balanced_json_object_prefix(r#"{"a": {"b": 1}}"#),
Some((0, 15))
);
assert_eq!(balanced_json_object_prefix("{"), None);
assert_eq!(balanced_json_object_prefix(r#"{"a": {"b":"#), None);
}
#[test]
fn balanced_json_object_prefix_is_string_aware() {
assert_eq!(
balanced_json_object_prefix(r#"{"unrelated":"}"}"#),
Some((0, 17))
);
assert_eq!(
balanced_json_object_prefix(r#"{"k":"a\"}b"}"#),
Some((0, 13))
);
assert_eq!(
balanced_json_object_prefix(r#"{"x":"{{{{"}"#),
Some((0, 12))
);
}
#[test]
fn balanced_json_array_prefix_basic() {
assert_eq!(balanced_json_array_prefix(""), None);
assert_eq!(balanced_json_array_prefix("plain text"), None);
assert_eq!(balanced_json_array_prefix("{not_array:1}"), None);
assert_eq!(balanced_json_array_prefix("[]"), Some((0, 2)));
assert_eq!(balanced_json_array_prefix("[1,2,3]"), Some((0, 7)));
assert_eq!(balanced_json_array_prefix(r#"[{"a":1}]"#), Some((0, 9)));
assert_eq!(balanced_json_array_prefix("[[1],[2]]"), Some((0, 9)));
assert_eq!(
balanced_json_array_prefix(r#"["unrelated]"]"#),
Some((0, 14))
);
assert_eq!(balanced_json_array_prefix(r#"["a\"]b"]"#), Some((0, 9)));
assert_eq!(balanced_json_array_prefix(r#"["]]]]"]"#), Some((0, 8)));
assert_eq!(balanced_json_array_prefix("["), None);
assert_eq!(balanced_json_array_prefix("[1,2"), None);
assert_eq!(balanced_json_array_prefix(r#"[{"a":["#), None);
let s = "hi [1,2] bye";
let (st, en) = balanced_json_array_prefix(s).expect("balanced array");
assert_eq!(&s[..st], "hi ");
assert_eq!(&s[st..en], "[1,2]");
let t = "[1,2,3] done";
let (st2, en2) = balanced_json_array_prefix(t).expect("balanced array");
assert_eq!(&t[st2..en2], "[1,2,3]");
assert_eq!(&t[en2..], " done");
}
#[test]
fn balanced_json_object_prefix_finds_prefix_and_suffix() {
let s = r#"{"name":"now","arguments":{}} done"#;
let (start, end) = balanced_json_object_prefix(s).expect("balanced object");
assert_eq!(start, 0);
assert_eq!(&s[start..end], r#"{"name":"now","arguments":{}}"#);
assert_eq!(&s[end..], " done");
let s2 = r#"hi {"a":1} bye"#;
let (start2, end2) = balanced_json_object_prefix(s2).expect("balanced object");
assert_eq!(&s2[..start2], "hi ");
assert_eq!(&s2[start2..end2], r#"{"a":1}"#);
}
#[test]
fn partial_match_basics() {
assert!(partial_match("", "<tool_call>"));
assert!(partial_match("<tool", "<tool_call>"));
assert!(partial_match("<tool_call>", "<tool_call>"));
assert!(partial_match("<tool_call>extra", "<tool_call>"));
assert!(!partial_match("<thinking>", "<tool_call>"));
}
#[test]
fn strip_markers_tagged_and_inline() {
let inner = strip_markers(&JsonTools, "<tool_call> {\"x\": 1} </tool_call>");
assert_eq!(inner, r#"{"x": 1}"#);
let inner = strip_markers(&InlineJson, " {\"x\": 1} ");
assert_eq!(inner, r#"{"x": 1}"#);
}
fn run_with_parser(parser: Box<dyn ToolParser>, chunks: &[&str]) -> (String, Vec<ToolCall>) {
let mut p = ToolCallProcessor::new(parser, None);
let mut display = String::new();
for c in chunks {
if let Some(d) = p.process_chunk(c) {
display.push_str(&d);
}
}
p.process_eos();
(display, p.tool_calls)
}
#[test]
fn streaming_leading_text_split_inside_start_tag_persists() {
let (d_split, c_split) = run_with_parser(
Box::new(JsonTools),
&[
"Let me <",
r#"tool_call>{"name":"ls","arguments":{}}</tool_call>"#,
],
);
let (d_whole, c_whole) = run_with_parser(
Box::new(JsonTools),
&[r#"Let me <tool_call>{"name":"ls","arguments":{}}</tool_call>"#],
);
assert_eq!(d_split, "Let me ");
assert_eq!(d_split, d_whole, "split-inside-start-tag must equal whole");
assert_eq!(c_split.len(), 1);
assert_eq!(c_whole.len(), 1);
assert_eq!(c_split[0].name, "ls");
assert_eq!(c_whole[0].name, "ls");
}
#[test]
fn streaming_leading_text_every_byte_boundary_inside_start_tag() {
let prefix = "Let me <tool_call>";
let tail = r#"{"name":"ls","arguments":{}}</tool_call>"#;
let combined: String = format!("{prefix}{tail}");
let (d_baseline, c_baseline) = run_with_parser(Box::new(JsonTools), &[&combined]);
assert_eq!(d_baseline, "Let me ");
assert_eq!(c_baseline.len(), 1);
for k in 1..prefix.len() {
let head = &combined[..k];
let rest = &combined[k..];
let (d, c) = run_with_parser(Box::new(JsonTools), &[head, rest]);
assert_eq!(
d, d_baseline,
"byte split at k={k} ({head:?}|{rest:?}) lost leading text"
);
assert_eq!(c.len(), 1, "byte split at k={k} lost the call");
assert_eq!(c[0].name, "ls");
}
}
#[test]
fn streaming_pending_display_flushed_on_false_start() {
let (d, c) = run_with_parser(
Box::new(JsonTools),
&["Let me <", "thinking>oops</thinking> and continue"],
);
assert_eq!(c.len(), 0, "no tool call from a false start");
assert_eq!(
d, "Let me <thinking>oops</thinking> and continue",
"leading prose + false-start prefix + remainder all surface"
);
}
#[test]
fn streaming_back_to_back_with_trailing_partial_next_start() {
let (d, c) = run_with_parser(
Box::new(JsonTools),
&[
r#"<tool_call>{"name":"a","arguments":{}}</tool_call> and then <"#,
r#"tool_call>{"name":"b","arguments":{}}</tool_call>"#,
],
);
assert_eq!(d, " and then ");
assert_eq!(c.len(), 2);
assert_eq!(c[0].name, "a");
assert_eq!(c[1].name, "b");
}
#[test]
fn streaming_glm47_json_fallback_end_tag_in_string_extracted() {
let (d, c) = run_with_parser(
Box::new(Glm47),
&[r#"<tool_call>{"name":"echo","arguments":{"s":"</tool_call>"}}</tool_call>"#],
);
assert_eq!(d, "", "no suffix leaks");
assert_eq!(c.len(), 1, "glm47 JSON-fallback call must extract intact");
assert_eq!(c[0].name, "echo");
assert_eq!(
c[0].arguments,
serde_json::json!({"s": "</tool_call>"}),
"in-string delimiter preserved verbatim"
);
}
#[test]
fn streaming_glm47_json_fallback_end_tag_in_string_split_across_chunks() {
let (d, c) = run_with_parser(
Box::new(Glm47),
&[
r#"<tool_call>{"name":"echo","arguments":{"s":"<"#,
r#"/tool_call>"}}</tool_call>"#,
],
);
assert_eq!(d, "");
assert_eq!(c.len(), 1);
assert_eq!(c[0].name, "echo");
assert_eq!(c[0].arguments, serde_json::json!({"s": "</tool_call>"}));
}
#[test]
fn streaming_glm47_json_array_end_tag_in_string_extracted() {
let (d, c) = run_with_parser(
Box::new(Glm47),
&[r#"<tool_call>[{"name":"echo","arguments":{"s":"</tool_call>"}}]</tool_call>"#],
);
assert_eq!(d, "", "no suffix leaks (end tag matched after the array)");
assert_eq!(c.len(), 1, "glm47 JSON-array call must extract intact");
assert_eq!(c[0].name, "echo");
assert_eq!(
c[0].arguments,
serde_json::json!({"s": "</tool_call>"}),
"in-string delimiter preserved verbatim"
);
}
#[test]
fn streaming_glm47_json_array_end_tag_in_string_split_across_chunks() {
let (d, c) = run_with_parser(
Box::new(Glm47),
&[
r#"<tool_call>[{"name":"echo","arguments":{"s":"<"#,
r#"/tool_call>"}}]</tool_call>"#,
],
);
assert_eq!(d, "");
assert_eq!(c.len(), 1);
assert_eq!(c[0].name, "echo");
assert_eq!(c[0].arguments, serde_json::json!({"s": "</tool_call>"}));
}
#[test]
fn streaming_longcat_json_fastpath_end_tag_in_string_extracted() {
let (d, c) = run_with_parser(
Box::new(Longcat),
&[
r#"<longcat_tool_call>{"name":"echo","arguments":{"s":"</longcat_tool_call>"}}</longcat_tool_call>"#,
],
);
assert_eq!(d, "");
assert_eq!(c.len(), 1, "longcat JSON-fastpath call must extract intact");
assert_eq!(c[0].name, "echo");
assert_eq!(
c[0].arguments,
serde_json::json!({"s": "</longcat_tool_call>"}),
);
}
#[test]
fn streaming_longcat_json_fastpath_end_tag_in_string_split_across_chunks() {
let (d, c) = run_with_parser(
Box::new(Longcat),
&[
r#"<longcat_tool_call>{"name":"echo","arguments":{"s":"<"#,
r#"/longcat_tool_call>"}}</longcat_tool_call>"#,
],
);
assert_eq!(d, "");
assert_eq!(c.len(), 1);
assert_eq!(c[0].name, "echo");
assert_eq!(
c[0].arguments,
serde_json::json!({"s": "</longcat_tool_call>"}),
);
}
#[test]
fn streaming_pending_display_counted_against_cap() {
let mut p = ToolCallProcessor::new(Box::new(JsonTools), None);
let _ = p.process_chunk("Let me say a lot first <");
let big = "x".repeat(64 * 1024);
let bound = MAX_TOOL_CALL_BUFFER_BYTES + big.len();
for _ in 0..8 {
let _ = p.process_chunk(&big);
assert!(p.tool_call_buffer.len() + p.pending_display.len() <= bound);
}
assert_eq!(p.tool_call_buffer.len(), 0);
assert_eq!(p.pending_display.len(), 0);
let out = p.process_chunk(r#"<tool_call>{"name":"ok","arguments":{}}</tool_call>"#);
assert_eq!(out, None);
assert_eq!(p.tool_calls.len(), 1);
assert_eq!(p.tool_calls[0].name(), "ok");
}
#[test]
fn streaming_pending_display_cleared_on_eos() {
let mut p = ToolCallProcessor::new(Box::new(JsonTools), None);
let _ = p.process_chunk("Just thinking <");
p.process_eos();
assert!(
p.pending_display.is_empty(),
"pending_display leaked past EOS"
);
assert!(p.tool_call_buffer.is_empty());
let out = p.process_chunk("hello");
assert_eq!(out.as_deref(), Some("hello"));
}
#[test]
fn streaming_pythonic_single_quoted_string_with_unmatched_bracket_preserves_trailing_display() {
let payload = "<|tool_call_start|>[echo(s='[abc')]<|tool_call_end|> after";
let (d_one, c_one) = run_with_parser(Box::new(Pythonic), &[payload]);
assert_eq!(
d_one, " after",
"single-chunk: trailing display must survive byte-for-byte"
);
assert_eq!(c_one.len(), 1, "single-chunk: tool call must extract");
assert_eq!(c_one[0].name, "echo");
assert_eq!(c_one[0].arguments, serde_json::json!({"s": "[abc"}));
let (d_split, c_split) = run_with_parser(
Box::new(Pythonic),
&[
"<|tool_call_start|>[echo(s='[",
"abc')]<|tool_call_end|> after",
],
);
assert_eq!(
d_split, " after",
"split-chunk: trailing display must survive across the split"
);
assert_eq!(c_split.len(), 1, "split-chunk: tool call must extract");
assert_eq!(c_split[0].name, "echo");
assert_eq!(c_split[0].arguments, serde_json::json!({"s": "[abc"}));
let mut p = ToolCallProcessor::new(Box::new(Pythonic), None);
let _ = p.process_chunk(payload);
p.process_eos();
assert!(
p.tool_call_buffer.is_empty(),
"no buffer growth past the end marker (tool_call_buffer)"
);
assert!(
p.pending_display.is_empty(),
"no buffer growth past the end marker (pending_display)"
);
}
#[test]
fn streaming_json_tools_leading_bracket_buffer_extraction_is_contract_correct() {
let buf = r#"<tool_call>[{"name":"echo","arguments":{}}]</tool_call> trailing"#;
let (calls, end_pos) = JsonTools
.try_parse_one_call(buf, None)
.expect("Ok")
.expect("Some — section is closeable");
assert!(buf[..end_pos].ends_with("</tool_call>"));
assert_eq!(
calls.len(),
0,
"json_tools rejects a top-level array shape (no `name` field)"
);
let (d, c) = run_with_parser(Box::new(JsonTools), &[buf]);
assert_eq!(c.len(), 0, "json_tools rejects a top-level array (no name)");
assert_eq!(
d, " trailing",
"trailing display must survive even though parse() rejected the call"
);
}
#[test]
fn parser_try_parse_one_call_audit_assignments() {
struct Row {
label: &'static str,
parser: Box<dyn ToolParser>,
buffer: &'static str,
expect_end_pos_eq_len: bool,
}
let rows = [
Row {
label: "json_tools",
parser: Box::new(JsonTools),
buffer: r#"<tool_call>{"s":"</tool_call>"}</tool_call>"#,
expect_end_pos_eq_len: true,
},
Row {
label: "glm47 (object)",
parser: Box::new(Glm47),
buffer: r#"<tool_call>{"s":"</tool_call>"}</tool_call>"#,
expect_end_pos_eq_len: true,
},
Row {
label: "glm47 (array)",
parser: Box::new(Glm47),
buffer: r#"<tool_call>[{"s":"</tool_call>"}]</tool_call>"#,
expect_end_pos_eq_len: true,
},
Row {
label: "longcat",
parser: Box::new(Longcat),
buffer: r#"<longcat_tool_call>{"s":"</longcat_tool_call>"}</longcat_tool_call>"#,
expect_end_pos_eq_len: true,
},
Row {
label: "pythonic (single-quoted)",
parser: Box::new(Pythonic),
buffer: "<|tool_call_start|>[echo(s='<|tool_call_end|>')]<|tool_call_end|>",
expect_end_pos_eq_len: true,
},
Row {
label: "pythonic (double-quoted)",
parser: Box::new(Pythonic),
buffer: r#"<|tool_call_start|>[echo(s="<|tool_call_end|>")]<|tool_call_end|>"#,
expect_end_pos_eq_len: true,
},
Row {
label: "qwen3_coder",
parser: Box::new(Qwen3Coder),
buffer: "<tool_call><function=echo><parameter=s></tool_call></parameter></function></tool_call>",
expect_end_pos_eq_len: true,
},
Row {
label: "minimax_m2",
parser: Box::new(MinimaxM2),
buffer: "<minimax:tool_call><invoke name=\"f\"><parameter name=\"p\"></minimax:tool_call></parameter></invoke></minimax:tool_call>",
expect_end_pos_eq_len: true,
},
Row {
label: "kimi_k2",
parser: Box::new(KimiK2),
buffer: "<|tool_calls_section_begin|><|tool_call_begin|>functions.f:0<|tool_call_argument_begin|>{\"s\":\"<|tool_calls_section_end|>\"}<|tool_call_end|><|tool_calls_section_end|>",
expect_end_pos_eq_len: true,
},
Row {
label: "function_gemma",
parser: Box::new(FunctionGemma),
buffer: "<start_function_call>call:f{k:<escape><end_function_call><escape>}<end_function_call>",
expect_end_pos_eq_len: true,
},
Row {
label: "gemma4",
parser: Box::new(Gemma4),
buffer: r#"<|tool_call>call:f{k: <|"|><tool_call|><|"|>}<tool_call|>"#,
expect_end_pos_eq_len: true,
},
];
for row in &rows {
let result = row
.parser
.try_parse_one_call(row.buffer, None)
.unwrap_or_else(|e| panic!("{}: try_parse_one_call errored: {e}", row.label));
let (_calls, end_pos) =
result.unwrap_or_else(|| panic!("{}: section not detected as complete", row.label));
if row.expect_end_pos_eq_len {
assert_eq!(
end_pos,
row.buffer.len(),
"{}: end_pos must land at buffer end (one past the trailing close)",
row.label
);
}
assert!(
end_pos > 0,
"{}: end_pos must advance past at least one byte",
row.label
);
}
assert!(Mistral.tool_call_end().is_empty());
}
#[test]
fn streaming_pythonic_string_argument_contains_literal_end_marker_preserves_payload() {
let payload = "<|tool_call_start|>[echo(s='<|tool_call_end|>')]<|tool_call_end|> after";
let (d_one, c_one) = run_with_parser(Box::new(Pythonic), &[payload]);
assert_eq!(
d_one, " after",
"trailing display must survive byte-for-byte"
);
assert_eq!(c_one.len(), 1, "tool call must extract");
assert_eq!(c_one[0].name, "echo");
assert_eq!(
c_one[0].arguments,
serde_json::json!({"s": "<|tool_call_end|>"}),
"in-string end marker preserved verbatim"
);
let (d_split, c_split) = run_with_parser(
Box::new(Pythonic),
&[
"<|tool_call_start|>[echo(s='<|tool_call_",
"end|>')]<|tool_call_end|> after",
],
);
assert_eq!(d_split, " after");
assert_eq!(c_split.len(), 1);
assert_eq!(c_split[0].name, "echo");
assert_eq!(
c_split[0].arguments,
serde_json::json!({"s": "<|tool_call_end|>"})
);
}
#[test]
fn streaming_pythonic_double_quoted_string_with_literal_end_marker_preserves_payload() {
let payload = r#"<|tool_call_start|>[echo(s="<|tool_call_end|>")]<|tool_call_end|> after"#;
let (d_one, c_one) = run_with_parser(Box::new(Pythonic), &[payload]);
assert_eq!(d_one, " after");
assert_eq!(c_one.len(), 1);
assert_eq!(c_one[0].name, "echo");
assert_eq!(
c_one[0].arguments,
serde_json::json!({"s": "<|tool_call_end|>"})
);
let (d_split, c_split) = run_with_parser(
Box::new(Pythonic),
&[
r#"<|tool_call_start|>[echo(s="<|tool_call_"#,
r#"end|>")]<|tool_call_end|> after"#,
],
);
assert_eq!(d_split, " after");
assert_eq!(c_split.len(), 1);
assert_eq!(c_split[0].name, "echo");
assert_eq!(
c_split[0].arguments,
serde_json::json!({"s": "<|tool_call_end|>"})
);
}
#[test]
fn streaming_json_tools_string_value_contains_end_marker_preserves_payload() {
let payload = r#"<tool_call>{"name":"echo","arguments":{"s":"</tool_call>"}}</tool_call>"#;
let (d, c) = run_with_parser(Box::new(JsonTools), &[payload]);
assert_eq!(d, "");
assert_eq!(c.len(), 1);
assert_eq!(c[0].name, "echo");
assert_eq!(c[0].arguments, serde_json::json!({"s": "</tool_call>"}));
}
#[test]
fn streaming_qwen3_coder_parameter_value_contains_end_marker_extracted() {
let payload =
"<tool_call><function=echo><parameter=s></tool_call></parameter></function></tool_call> after";
let mut p = ToolCallProcessor::new(Box::new(Qwen3Coder), None);
let out = p.process_chunk(payload);
assert_eq!(out.as_deref(), Some(" after"));
assert_eq!(p.tool_calls.len(), 1);
assert_eq!(p.tool_calls[0].name(), "echo");
}
#[test]
fn streaming_minimax_m2_parameter_value_contains_end_marker_extracted() {
let payload = "<minimax:tool_call><invoke name=\"f\"><parameter name=\"p\"></minimax:tool_call></parameter></invoke></minimax:tool_call> after";
let mut p = ToolCallProcessor::new(Box::new(MinimaxM2), None);
let out = p.process_chunk(payload);
assert_eq!(out.as_deref(), Some(" after"));
assert_eq!(p.tool_calls.len(), 1);
assert_eq!(p.tool_calls[0].name(), "f");
}
#[test]
fn streaming_kimi_k2_argument_string_contains_section_end_marker_extracted() {
let payload = concat!(
"<|tool_calls_section_begin|>",
"<|tool_call_begin|>functions.echo:0<|tool_call_argument_begin|>",
r#"{"s":"<|tool_calls_section_end|>"}"#,
"<|tool_call_end|>",
"<|tool_calls_section_end|>",
" after",
);
let mut p = ToolCallProcessor::new(Box::new(KimiK2), None);
let out = p.process_chunk(payload);
assert_eq!(out.as_deref(), Some(" after"));
assert_eq!(p.tool_calls.len(), 1);
assert_eq!(p.tool_calls[0].name(), "echo");
assert_eq!(
*p.tool_calls[0].arguments(),
serde_json::json!({"s": "<|tool_calls_section_end|>"}),
);
}
#[test]
fn streaming_function_gemma_escape_string_contains_end_marker_extracted() {
let payload =
"<start_function_call>call:f{k:<escape><end_function_call><escape>}<end_function_call> after";
let mut p = ToolCallProcessor::new(Box::new(FunctionGemma), None);
let out = p.process_chunk(payload);
assert_eq!(out.as_deref(), Some(" after"));
assert_eq!(p.tool_calls.len(), 1);
assert_eq!(p.tool_calls[0].name(), "f");
assert_eq!(
*p.tool_calls[0].arguments(),
serde_json::json!({"k": "<end_function_call>"})
);
}
#[test]
fn streaming_gemma4_string_contains_end_marker_extracted() {
let payload = r#"<|tool_call>call:f{k: <|"|><tool_call|><|"|>}<tool_call|> after"#;
let mut p = ToolCallProcessor::new(Box::new(Gemma4), None);
let out = p.process_chunk(payload);
assert_eq!(out.as_deref(), Some(" after"));
assert_eq!(p.tool_calls.len(), 1);
assert_eq!(p.tool_calls[0].name(), "f");
assert_eq!(
*p.tool_calls[0].arguments(),
serde_json::json!({"k": "<tool_call|>"})
);
}
#[test]
fn streaming_minimax_m2_trailing_display_with_inner_opener_does_not_hide_end_tag() {
let payload = concat!(
"<minimax:tool_call>",
r#"<invoke name="f"><parameter name="p">v</parameter></invoke>"#,
"</minimax:tool_call>",
r#" some text <invoke name="x">"#,
);
let (d, c) = run_with_parser(Box::new(MinimaxM2), &[payload]);
assert_eq!(c.len(), 1, "completed tool call must be emitted");
assert_eq!(c[0].name, "f");
assert_eq!(
d, r#" some text <invoke name="x">"#,
"trailing display (with the inner-opener literal) reaches the caller \
byte-for-byte; the in-display opener does not re-open collection"
);
}
#[test]
fn streaming_minimax_m2_trailing_display_with_inner_opener_split_across_chunks() {
let (d, c) = run_with_parser(
Box::new(MinimaxM2),
&[
concat!(
"<minimax:tool_call>",
r#"<invoke name="f"><parameter name="p">v</parameter></invoke>"#,
"</minimax:tool_call>",
" some text <invoke ",
),
r#"name="x">"#,
],
);
assert_eq!(c.len(), 1);
assert_eq!(c[0].name, "f");
assert_eq!(d, r#" some text <invoke name="x">"#);
}
#[test]
fn streaming_kimi_k2_trailing_display_with_inner_opener_does_not_hide_end_tag() {
let payload = concat!(
"<|tool_calls_section_begin|>",
"<|tool_call_begin|>functions.f:0<|tool_call_argument_begin|>",
r#"{"k":"v"}"#,
"<|tool_call_end|>",
"<|tool_calls_section_end|>",
" some text <|tool_call_begin|>functions.x:1",
);
let (d, c) = run_with_parser(Box::new(KimiK2), &[payload]);
assert_eq!(c.len(), 1, "completed tool call must be emitted");
assert_eq!(c[0].name, "f");
assert_eq!(c[0].arguments, serde_json::json!({"k": "v"}));
assert_eq!(
d, " some text <|tool_call_begin|>functions.x:1",
"trailing display (with the inner-opener literal) reaches the caller"
);
}
#[test]
fn streaming_kimi_k2_trailing_display_with_inner_opener_split_across_chunks() {
let (d, c) = run_with_parser(
Box::new(KimiK2),
&[
concat!(
"<|tool_calls_section_begin|>",
"<|tool_call_begin|>functions.f:0<|tool_call_argument_begin|>",
r#"{"k":"v"}"#,
"<|tool_call_end|>",
"<|tool_calls_section_end|>",
" some text <|tool_call_",
),
"begin|>functions.x:1",
],
);
assert_eq!(c.len(), 1);
assert_eq!(c[0].name, "f");
assert_eq!(d, " some text <|tool_call_begin|>functions.x:1");
}
#[test]
fn streaming_gemma4_trailing_display_with_inner_opener_does_not_hide_end_tag() {
let payload = concat!(
"<|tool_call>",
r#"call:f{"k":"v"}"#,
"<tool_call|>",
" some text call:x{abc",
);
let (d, c) = run_with_parser(Box::new(Gemma4), &[payload]);
assert_eq!(c.len(), 1, "completed tool call must be emitted");
assert_eq!(c[0].name, "f");
assert_eq!(c[0].arguments, serde_json::json!({"k": "v"}));
assert_eq!(
d, " some text call:x{abc",
"trailing display (with the inner-opener literal) reaches the caller"
);
}
#[test]
fn streaming_gemma4_trailing_display_with_inner_opener_split_across_chunks() {
let (d, c) = run_with_parser(
Box::new(Gemma4),
&[
concat!(
"<|tool_call>",
r#"call:f{"k":"v"}"#,
"<tool_call|>",
" some text call",
),
":x{abc",
],
);
assert_eq!(c.len(), 1);
assert_eq!(c[0].name, "f");
assert_eq!(d, " some text call:x{abc");
}
#[test]
fn streaming_glm47_xml_arg_value_contains_wrapper_end_literal_not_truncated() {
let payload = concat!(
"<tool_call>",
"echo<arg_key>s</arg_key><arg_value>blah</tool_call> more blah</arg_value>",
"</tool_call>",
" after",
);
let (d, c) = run_with_parser(Box::new(Glm47), &[payload]);
assert_eq!(d, " after", "trailing display reaches caller");
assert_eq!(c.len(), 1, "tool call must extract intact");
assert_eq!(c[0].name, "echo");
assert_eq!(
c[0].arguments,
serde_json::json!({"s": "blah</tool_call> more blah"}),
"in-value wrapper-end literal preserved verbatim inside the arg value"
);
}
#[test]
fn streaming_glm47_xml_arg_value_contains_wrapper_end_literal_split_across_chunks() {
let (d, c) = run_with_parser(
Box::new(Glm47),
&[
concat!(
"<tool_call>",
"echo<arg_key>s</arg_key><arg_value>blah</tool_",
),
"call> more blah</arg_value></tool_call> after",
],
);
assert_eq!(d, " after");
assert_eq!(c.len(), 1);
assert_eq!(c[0].name, "echo");
assert_eq!(
c[0].arguments,
serde_json::json!({"s": "blah</tool_call> more blah"}),
);
}
#[test]
fn streaming_longcat_xml_arg_value_contains_wrapper_end_literal_not_truncated() {
let payload = concat!(
"<longcat_tool_call>",
"echo<longcat_arg_key>s</longcat_arg_key>",
"<longcat_arg_value>blah</longcat_tool_call> more blah</longcat_arg_value>",
"</longcat_tool_call>",
" after",
);
let (d, c) = run_with_parser(Box::new(Longcat), &[payload]);
assert_eq!(d, " after", "trailing display reaches caller");
assert_eq!(c.len(), 1, "tool call must extract intact");
assert_eq!(c[0].name, "echo");
assert_eq!(
c[0].arguments,
serde_json::json!({"s": "blah</longcat_tool_call> more blah"}),
"in-value wrapper-end literal preserved verbatim"
);
}
#[test]
fn streaming_longcat_xml_arg_value_contains_wrapper_end_literal_split_across_chunks() {
let (d, c) = run_with_parser(
Box::new(Longcat),
&[
concat!(
"<longcat_tool_call>",
"echo<longcat_arg_key>s</longcat_arg_key>",
"<longcat_arg_value>blah</longcat_",
),
"tool_call> more blah</longcat_arg_value></longcat_tool_call> after",
],
);
assert_eq!(d, " after");
assert_eq!(c.len(), 1);
assert_eq!(c[0].name, "echo");
assert_eq!(
c[0].arguments,
serde_json::json!({"s": "blah</longcat_tool_call> more blah"}),
);
}
#[test]
fn streaming_glm47_plain_fallback_with_unmatched_arg_value_literal_does_not_block() {
let (d, c) = run_with_parser(
Box::new(Glm47),
&["<tool_call>echo <arg_value></tool_call> after"],
);
assert_eq!(d, " after", "trailing display reaches caller");
assert_eq!(
c.len(),
1,
"plain-fallback call must extract (not be dropped)"
);
assert_eq!(c[0].name, "echo");
assert_eq!(
c[0].arguments,
serde_json::json!({"raw": "<arg_value>"}),
"raw `<arg_value>` literal preserved verbatim as plain arg text"
);
}
#[test]
fn streaming_glm47_plain_fallback_with_unmatched_arg_value_literal_split_across_chunks() {
let (d, c) = run_with_parser(
Box::new(Glm47),
&["<tool_call>echo <arg_value></tool_", "call> after"],
);
assert_eq!(d, " after");
assert_eq!(c.len(), 1, "plain-fallback call must extract across chunks");
assert_eq!(c[0].name, "echo");
assert_eq!(c[0].arguments, serde_json::json!({"raw": "<arg_value>"}));
}
#[test]
fn streaming_glm47_xml_arg_key_arrives_in_later_chunk() {
let (d, c) = run_with_parser(
Box::new(Glm47),
&[
"<tool_call>echo<arg_ke",
"y>s</arg_key><arg_value>v</arg_value></tool_call> after",
],
);
assert_eq!(d, " after");
assert_eq!(
c.len(),
1,
"XML-style call must extract once `<arg_key>` arrives"
);
assert_eq!(c[0].name, "echo");
assert_eq!(
c[0].arguments,
serde_json::json!({"s": "v"}),
"XML-aware routing recovers the key/value pair"
);
}
#[test]
fn streaming_glm47_plain_fallback_with_trailing_arg_key_in_display_does_not_block() {
let (d, c) = run_with_parser(
Box::new(Glm47),
&["<tool_call>echo <arg_value></tool_call> after <arg_key>"],
);
assert_eq!(d, " after <arg_key>", "trailing display reaches caller");
assert_eq!(
c.len(),
1,
"plain-fallback call must extract (not be dropped)"
);
assert_eq!(c[0].name, "echo");
assert_eq!(
c[0].arguments,
serde_json::json!({"raw": "<arg_value>"}),
"raw `<arg_value>` literal preserved verbatim as plain arg text"
);
}
#[test]
fn streaming_glm47_plain_fallback_with_trailing_arg_key_in_display_split_across_chunks() {
let (d, c) = run_with_parser(
Box::new(Glm47),
&[
"<tool_call>echo <arg_value></tool_call> after <arg",
"_key>",
],
);
assert_eq!(d, " after <arg_key>");
assert_eq!(c.len(), 1, "plain-fallback call must extract across chunks");
assert_eq!(c[0].name, "echo");
assert_eq!(c[0].arguments, serde_json::json!({"raw": "<arg_value>"}));
}
#[test]
fn streaming_glm47_xml_style_with_trailing_arg_key_in_display_does_not_misroute() {
let (d, c) = run_with_parser(
Box::new(Glm47),
&[concat!(
"<tool_call><arg_key>k</arg_key><arg_value>v</arg_value>",
"</tool_call> bonus <arg_key>"
)],
);
assert_eq!(
d, " bonus <arg_key>",
"trailing display (with stray `<arg_key>`) reaches caller"
);
assert_eq!(c.len(), 1, "XML-style call must extract intact");
assert_eq!(c[0].name, "", "no name prefix before the first `<arg_key>`");
assert_eq!(
c[0].arguments,
serde_json::json!({"k": "v"}),
"key/value extracted via the XML-aware scan"
);
}
#[test]
fn streaming_qwen3_coder_parameter_value_with_function_close_and_tool_call_close_literals_extracts_intact()
{
let payload = concat!(
"<tool_call><function=f><parameter=p>v containing ",
"</function> and </tool_call>",
"</parameter></function></tool_call>",
);
let (d, c) = run_with_parser(Box::new(Qwen3Coder), &[payload]);
assert_eq!(d, "", "no trailing display leak");
assert_eq!(c.len(), 1, "one tool call extracted");
assert_eq!(c[0].name, "f");
let p_value = c[0]
.arguments
.as_object()
.and_then(|m| m.get("p"))
.and_then(Value::as_str)
.expect("string parameter `p`");
assert!(
p_value.contains("</function>"),
"`</function>` literal preserved verbatim inside the parameter value (got: {p_value:?})"
);
assert!(
p_value.contains("</tool_call>"),
"`</tool_call>` literal preserved verbatim inside the parameter value (got: {p_value:?})"
);
let (d2, c2) = run_with_parser(
Box::new(Qwen3Coder),
&[
concat!(
"<tool_call><function=f><parameter=p>v containing ",
"</function> and </tool_",
),
"call></parameter></function></tool_call>",
],
);
assert_eq!(d2, "");
assert_eq!(c2.len(), 1);
assert_eq!(c2[0].name, "f");
}
fn assert_try_parse_one_call_matches_parse(parser: &dyn ToolParser, label: &str, buffer: &str) {
let try_result = parser
.try_parse_one_call(buffer, None)
.unwrap_or_else(|e| panic!("{label}: try_parse_one_call errored: {e}"));
let (try_calls, end_pos) = try_result
.unwrap_or_else(|| panic!("{label}: try_parse_one_call returned None (incomplete buffer)"));
let inner = strip_section_markers(
&buffer[..end_pos],
parser.tool_call_start(),
parser.tool_call_end(),
);
let parse_calls = parser.parse(inner, None).unwrap_or_default();
assert_eq!(
try_calls.len(),
parse_calls.len(),
"{label}: try_parse_one_call vs parse call-count mismatch"
);
for (i, (a, b)) in try_calls.iter().zip(parse_calls.iter()).enumerate() {
assert_eq!(a.name, b.name, "{label}[{i}]: name mismatch");
assert_eq!(a.arguments, b.arguments, "{label}[{i}]: arguments mismatch");
assert_eq!(a.id, b.id, "{label}[{i}]: id mismatch");
}
}
#[test]
fn try_parse_one_call_matches_parse_json_tools() {
let cases = [
r#"<tool_call>{"name":"a","arguments":{}}</tool_call>"#,
r#"<tool_call>{"name":"echo","arguments":{"s":"</tool_call>"}}</tool_call>"#,
r#"<tool_call>{"name":"a","arguments":{}}</tool_call> trailing"#,
];
for c in cases {
assert_try_parse_one_call_matches_parse(&JsonTools, "json_tools", c);
}
}
#[test]
fn try_parse_one_call_matches_parse_pythonic() {
let cases = [
"<|tool_call_start|>[ping()]<|tool_call_end|>",
"<|tool_call_start|>[echo(s='hello')]<|tool_call_end|>",
"<|tool_call_start|>[echo(s='<|tool_call_end|>')]<|tool_call_end|>",
r#"<|tool_call_start|>[echo(s="<|tool_call_end|>")]<|tool_call_end|>"#,
"<|tool_call_start|>[ping()]<|tool_call_end|> after",
];
for c in cases {
assert_try_parse_one_call_matches_parse(&Pythonic, "pythonic", c);
}
}
#[test]
fn try_parse_one_call_matches_parse_mistral() {
let cases = [
r#"[TOOL_CALLS]get_weather[ARGS]{"city":"Tokyo"}"#,
r#"[TOOL_CALLS]ping[ARGS]{}"#,
];
for c in cases {
assert_try_parse_one_call_matches_parse(&Mistral, "mistral", c);
}
}
#[test]
fn try_parse_one_call_matches_parse_qwen3_coder() {
let cases = [
"<tool_call><function=ping></function></tool_call>",
"<tool_call><function=echo><parameter=s>hello</parameter></function></tool_call>",
concat!(
"<tool_call><function=f><parameter=p>v containing ",
"</function> and </tool_call>",
"</parameter></function></tool_call>",
),
];
for c in cases {
assert_try_parse_one_call_matches_parse(&Qwen3Coder, "qwen3_coder", c);
}
}
#[test]
fn try_parse_one_call_matches_parse_glm47() {
let cases = [
"<tool_call>echo<arg_key>s</arg_key><arg_value>v</arg_value></tool_call>",
r#"<tool_call>{"name":"echo","arguments":{"s":"hi"}}</tool_call>"#,
r#"<tool_call>[{"name":"echo","arguments":{"s":"hi"}}]</tool_call>"#,
"<tool_call>plain command</tool_call>",
];
for c in cases {
assert_try_parse_one_call_matches_parse(&Glm47, "glm47", c);
}
}
#[test]
fn try_parse_one_call_matches_parse_longcat() {
let cases = [
"<longcat_tool_call>echo<longcat_arg_key>s</longcat_arg_key><longcat_arg_value>v</longcat_arg_value></longcat_tool_call>",
r#"<longcat_tool_call>{"name":"echo","arguments":{"s":"hi"}}</longcat_tool_call>"#,
];
for c in cases {
assert_try_parse_one_call_matches_parse(&Longcat, "longcat", c);
}
}
#[test]
fn try_parse_one_call_matches_parse_minimax_m2() {
let cases = [
concat!(
"<minimax:tool_call>",
r#"<invoke name="f"><parameter name="p">v</parameter></invoke>"#,
"</minimax:tool_call>",
),
concat!(
"<minimax:tool_call>",
r#"<invoke name="a"></invoke><invoke name="b"></invoke>"#,
"</minimax:tool_call>",
),
];
for c in cases {
assert_try_parse_one_call_matches_parse(&MinimaxM2, "minimax_m2", c);
}
}
#[test]
fn try_parse_one_call_matches_parse_kimi_k2() {
let cases = [
concat!(
"<|tool_calls_section_begin|>",
"<|tool_call_begin|>functions.f:0<|tool_call_argument_begin|>",
r#"{"k":"v"}"#,
"<|tool_call_end|>",
"<|tool_calls_section_end|>",
),
concat!(
"<|tool_calls_section_begin|>",
"<|tool_call_begin|>functions.a:0<|tool_call_argument_begin|>",
r#"{}"#,
"<|tool_call_end|>",
"<|tool_call_begin|>functions.b:1<|tool_call_argument_begin|>",
r#"{}"#,
"<|tool_call_end|>",
"<|tool_calls_section_end|>",
),
];
for c in cases {
assert_try_parse_one_call_matches_parse(&KimiK2, "kimi_k2", c);
}
}
#[test]
fn try_parse_one_call_matches_parse_function_gemma() {
let cases = [
"<start_function_call>call:f{k:1}<end_function_call>",
"<start_function_call>call:f{k:<escape>hello<escape>}<end_function_call>",
];
for c in cases {
assert_try_parse_one_call_matches_parse(&FunctionGemma, "function_gemma", c);
}
}
#[test]
fn try_parse_one_call_matches_parse_gemma4() {
let cases = [
r#"<|tool_call>call:f{k: 1}<tool_call|>"#,
r#"<|tool_call>call:f{k: <|"|>hello<|"|>}<tool_call|>"#,
r#"<|tool_call>call:a{k: 1}call:b{k: 2}<tool_call|>"#,
];
for c in cases {
assert_try_parse_one_call_matches_parse(&Gemma4, "gemma4", c);
}
}
#[test]
fn streaming_qwen3_coder_trailing_display_with_tool_call_close_literal_does_not_consume_past_real_close()
{
let payload = concat!(
"<tool_call><function=f></function></tool_call>",
" some text containing </tool_call>",
);
let (d, c) = run_with_parser(Box::new(Qwen3Coder), &[payload]);
assert_eq!(
c.len(),
1,
"exactly one tool call extracted (got {})",
c.len()
);
assert_eq!(c[0].name, "f");
assert_eq!(
d, " some text containing </tool_call>",
"trailing display reaches output byte-for-byte"
);
let (d2, c2) = run_with_parser(
Box::new(Qwen3Coder),
&[
"<tool_call><function=f></function></tool_call> some text ",
"containing </tool_call>",
],
);
assert_eq!(c2.len(), 1);
assert_eq!(c2[0].name, "f");
assert_eq!(d2, " some text containing </tool_call>");
}
#[test]
fn streaming_qwen3_coder_back_to_back_calls_extracted_separately() {
let payload = concat!(
"<tool_call><function=f></function></tool_call>",
"<tool_call><function=g></function></tool_call>",
);
let (d, c) = run_with_parser(Box::new(Qwen3Coder), &[payload]);
assert_eq!(d, "", "no display leak between back-to-back calls");
assert_eq!(c.len(), 2, "exactly two tool calls extracted");
assert_eq!(c[0].name, "f");
assert_eq!(c[1].name, "g");
let (d2, c2) = run_with_parser(
Box::new(Qwen3Coder),
&[
"<tool_call><function=f></function></tool_",
"call><tool_call><function=g></function></tool_call>",
],
);
assert_eq!(d2, "");
assert_eq!(c2.len(), 2);
assert_eq!(c2[0].name, "f");
assert_eq!(c2[1].name, "g");
}
#[test]
fn try_parse_one_call_back_to_back_per_parser_audit() {
struct Row {
label: &'static str,
parser: Box<dyn ToolParser>,
buffer: &'static str,
expect_end_pos: usize,
expect_first_name: &'static str,
}
let rows = [
Row {
label: "json_tools",
parser: Box::new(JsonTools),
buffer: concat!(
r#"<tool_call>{"name":"a","arguments":{}}</tool_call>"#,
r#"<tool_call>{"name":"b","arguments":{}}</tool_call>"#,
),
expect_end_pos: r#"<tool_call>{"name":"a","arguments":{}}</tool_call>"#.len(),
expect_first_name: "a",
},
Row {
label: "glm47 (object)",
parser: Box::new(Glm47),
buffer: concat!(
r#"<tool_call>{"name":"a","arguments":{}}</tool_call>"#,
r#"<tool_call>{"name":"b","arguments":{}}</tool_call>"#,
),
expect_end_pos: r#"<tool_call>{"name":"a","arguments":{}}</tool_call>"#.len(),
expect_first_name: "a",
},
Row {
label: "longcat (object)",
parser: Box::new(Longcat),
buffer: concat!(
r#"<longcat_tool_call>{"name":"a","arguments":{}}</longcat_tool_call>"#,
r#"<longcat_tool_call>{"name":"b","arguments":{}}</longcat_tool_call>"#,
),
expect_end_pos: r#"<longcat_tool_call>{"name":"a","arguments":{}}</longcat_tool_call>"#.len(),
expect_first_name: "a",
},
Row {
label: "pythonic",
parser: Box::new(Pythonic),
buffer: concat!(
"<|tool_call_start|>[a()]<|tool_call_end|>",
"<|tool_call_start|>[b()]<|tool_call_end|>",
),
expect_end_pos: "<|tool_call_start|>[a()]<|tool_call_end|>".len(),
expect_first_name: "a",
},
Row {
label: "qwen3_coder",
parser: Box::new(Qwen3Coder),
buffer: concat!(
"<tool_call><function=a></function></tool_call>",
"<tool_call><function=b></function></tool_call>",
),
expect_end_pos: "<tool_call><function=a></function></tool_call>".len(),
expect_first_name: "a",
},
Row {
label: "minimax_m2",
parser: Box::new(MinimaxM2),
buffer: concat!(
r#"<minimax:tool_call><invoke name="a"></invoke></minimax:tool_call>"#,
r#"<minimax:tool_call><invoke name="b"></invoke></minimax:tool_call>"#,
),
expect_end_pos: r#"<minimax:tool_call><invoke name="a"></invoke></minimax:tool_call>"#.len(),
expect_first_name: "a",
},
Row {
label: "kimi_k2",
parser: Box::new(KimiK2),
buffer: concat!(
"<|tool_calls_section_begin|>",
"<|tool_call_begin|>functions.a:0<|tool_call_argument_begin|>{}<|tool_call_end|>",
"<|tool_calls_section_end|>",
"<|tool_calls_section_begin|>",
"<|tool_call_begin|>functions.b:1<|tool_call_argument_begin|>{}<|tool_call_end|>",
"<|tool_calls_section_end|>",
),
expect_end_pos: concat!(
"<|tool_calls_section_begin|>",
"<|tool_call_begin|>functions.a:0<|tool_call_argument_begin|>{}<|tool_call_end|>",
"<|tool_calls_section_end|>",
)
.len(),
expect_first_name: "a",
},
Row {
label: "function_gemma",
parser: Box::new(FunctionGemma),
buffer: concat!(
"<start_function_call>call:a{}<end_function_call>",
"<start_function_call>call:b{}<end_function_call>",
),
expect_end_pos: "<start_function_call>call:a{}<end_function_call>".len(),
expect_first_name: "a",
},
Row {
label: "gemma4",
parser: Box::new(Gemma4),
buffer: concat!(
r#"<|tool_call>call:a{}<tool_call|>"#,
r#"<|tool_call>call:b{}<tool_call|>"#,
),
expect_end_pos: r#"<|tool_call>call:a{}<tool_call|>"#.len(),
expect_first_name: "a",
},
];
for row in &rows {
let result = row
.parser
.try_parse_one_call(row.buffer, None)
.unwrap_or_else(|e| panic!("{}: try_parse_one_call errored: {e}", row.label));
let (calls, end_pos) =
result.unwrap_or_else(|| panic!("{}: first section not detected complete", row.label));
assert_eq!(
end_pos, row.expect_end_pos,
"{}: end_pos must land one past the FIRST section's close, not the second's",
row.label
);
assert!(
!calls.is_empty(),
"{}: at least one call from the first section",
row.label
);
assert_eq!(
calls[0].name(),
row.expect_first_name,
"{}: first section's first call name",
row.label
);
}
assert!(Mistral.tool_call_end().is_empty());
}
struct AlwaysErrParser;
impl ToolParser for AlwaysErrParser {
fn parse(&self, _text: &str, _tools: Option<&Value>) -> Result<Vec<ToolCall>, Error> {
Err(err("always_err: malformed"))
}
fn name(&self) -> &'static str {
"always_err_test_parser"
}
fn tool_call_start(&self) -> &'static str {
"<tc>"
}
fn tool_call_end(&self) -> &'static str {
"</tc>"
}
fn try_parse_one_call(
&self,
buffer: &str,
_tools: Option<&Value>,
) -> Result<Option<(Vec<ToolCall>, usize)>, Error> {
if buffer.contains("<tc>") && buffer.contains("</tc>") {
Err(err("always_err: rejected"))
} else {
Ok(None)
}
}
}
#[test]
fn processor_err_from_try_parse_one_call_clears_buffer_immediately() {
let mut p = ToolCallProcessor::new(Box::new(AlwaysErrParser), None);
let out1 = p.process_chunk("<tc>malformed</tc>");
assert_eq!(out1, None, "no display leak from the Err recovery itself");
assert!(
p.tool_call_buffer.is_empty(),
"tool_call_buffer drained immediately after Err (got {} bytes)",
p.tool_call_buffer.len()
);
assert!(
p.pending_display.is_empty(),
"pending_display cleared after Err",
);
assert_eq!(
p.state,
State::Normal,
"state reset to Normal after Err — next chunk starts fresh",
);
assert_eq!(p.tool_calls.len(), 0, "no tool calls extracted");
let out2 = p.process_chunk("hello world");
assert_eq!(
out2.as_deref(),
Some("hello world"),
"subsequent plain chunk passes through immediately (not suppressed until cap)",
);
}
#[test]
fn processor_err_does_not_suppress_output_until_cap() {
let mut p = ToolCallProcessor::new(Box::new(AlwaysErrParser), None);
p.process_chunk("<tc>x</tc>");
assert!(
"<tc>x</tc>".len() < MAX_TOOL_CALL_BUFFER_BYTES,
"test premise: malformed section is below the cap",
);
assert!(p.tool_call_buffer.is_empty());
assert_eq!(p.state, State::Normal);
let out = p.process_chunk("plain");
assert_eq!(out.as_deref(), Some("plain"));
}
struct RejectedSectionParser;
impl ToolParser for RejectedSectionParser {
fn parse(&self, _text: &str, _tools: Option<&Value>) -> Result<Vec<ToolCall>, Error> {
Err(err("rejected_section_test_parser: rejected"))
}
fn name(&self) -> &'static str {
"rejected_section_test_parser"
}
fn tool_call_start(&self) -> &'static str {
"<tc>"
}
fn tool_call_end(&self) -> &'static str {
"</tc>"
}
fn try_parse_one_call(
&self,
buffer: &str,
_tools: Option<&Value>,
) -> Result<Option<(Vec<ToolCall>, usize)>, Error> {
let start = "<tc>";
let end = "</tc>";
let Some(s) = buffer.find(start) else {
return Ok(None);
};
let after_start = s + start.len();
let Some(e_rel) = buffer[after_start..].find(end) else {
return Ok(None);
};
let end_pos = after_start + e_rel + end.len();
Ok(Some((Vec::new(), end_pos)))
}
}
#[test]
fn processor_rejected_section_preserves_same_chunk_suffix() {
let (display, calls) = run_with_parser(Box::new(RejectedSectionParser), &["<tc>bad</tc>visible"]);
assert_eq!(calls.len(), 0, "rejected section emits no tool calls");
assert_eq!(
display, "visible",
"trailing suffix from the SAME chunk must survive the rejected section"
);
}
#[test]
fn processor_rejected_section_preserves_same_chunk_suffix_split_chunk() {
let (display, calls) = run_with_parser(
Box::new(RejectedSectionParser),
&["<tc>bad", "</tc>visible"],
);
assert_eq!(calls.len(), 0, "rejected section emits no tool calls");
assert_eq!(
display, "visible",
"trailing suffix split across chunks must still reach display"
);
}
#[test]
fn processor_rejected_section_returns_to_normal_state() {
let mut p = ToolCallProcessor::new(Box::new(RejectedSectionParser), None);
let out1 = p.process_chunk("<tc>bad</tc>visible");
assert_eq!(out1.as_deref(), Some("visible"));
assert!(p.tool_call_buffer.is_empty(), "buffer drained");
assert!(p.pending_display.is_empty(), "pending_display drained");
assert_eq!(p.state, State::Normal, "state reset");
let out2 = p.process_chunk("hello world");
assert_eq!(out2.as_deref(), Some("hello world"));
}
#[test]
fn processor_rejected_section_back_to_back_with_suffix() {
let (display, calls) = run_with_parser(
Box::new(RejectedSectionParser),
&["<tc>a</tc><tc>b</tc>tail"],
);
assert_eq!(calls.len(), 0);
assert_eq!(
display, "tail",
"back-to-back rejected sections + trailing suffix"
);
}
#[test]
fn processor_rejected_section_preserves_leading_display() {
let (display, calls) = run_with_parser(
Box::new(RejectedSectionParser),
&["before <tc>bad</tc>after"],
);
assert_eq!(calls.len(), 0);
assert_eq!(
display, "before after",
"leading prose (`before `) + trailing suffix (`after`) survive in stream order"
);
}
#[test]
fn processor_err_for_truly_indeterminate_buffer_still_resets() {
let mut p = ToolCallProcessor::new(Box::new(AlwaysErrParser), None);
let out1 = p.process_chunk("<tc>indeterminate</tc>");
assert_eq!(out1, None, "Err recovery drops the whole buffer");
assert!(p.tool_call_buffer.is_empty());
assert!(p.pending_display.is_empty());
assert_eq!(p.state, State::Normal);
let out2 = p.process_chunk("next");
assert_eq!(out2.as_deref(), Some("next"));
}
#[test]
fn try_parse_one_call_rejected_section_with_same_chunk_suffix_per_parser_audit() {
let rows: Vec<(
&'static str,
Box<dyn ToolParser>,
&'static str,
&'static str,
)> = vec![
(
"json_tools (array body — no `name`)",
Box::new(JsonTools),
r#"<tool_call>[{"x":1}]</tool_call>tail"#,
"tail",
),
(
"gemma4 (unparseable args body)",
Box::new(Gemma4),
r#"<|tool_call>call:f{!bad!}<tool_call|>tail"#,
"tail",
),
];
for (label, parser, buffer, expect_display) in rows {
let (display, calls) = run_with_parser(parser, &[buffer]);
assert_eq!(
calls.len(),
0,
"{}: parser rejected the body so zero calls",
label,
);
assert_eq!(
display, expect_display,
"{}: same-chunk suffix must reach display",
label,
);
}
}
#[test]
fn streaming_json_tools_malformed_body_in_closed_section_preserves_same_chunk_suffix() {
let (display, calls) =
run_with_parser(Box::new(JsonTools), &["<tool_call>bad</tool_call>visible"]);
assert_eq!(calls.len(), 0, "malformed body emits no calls");
assert_eq!(
display, "visible",
"trailing suffix from the SAME chunk must survive the malformed-but-closed section"
);
}
#[test]
fn streaming_json_tools_malformed_body_in_closed_section_preserves_suffix_split_chunk() {
let (display, calls) = run_with_parser(
Box::new(JsonTools),
&["<tool_call>bad", "</tool_call>visible"],
);
assert_eq!(calls.len(), 0);
assert_eq!(
display, "visible",
"split-chunk: end-tag + suffix in chunk 2 still surface `visible`"
);
}
#[test]
fn streaming_json_tools_malformed_body_returns_state_to_normal() {
let mut p = ToolCallProcessor::new(Box::new(JsonTools), None);
let out1 = p.process_chunk("<tool_call>bad</tool_call>visible");
assert_eq!(out1.as_deref(), Some("visible"));
assert!(p.tool_call_buffer.is_empty(), "buffer drained");
assert!(p.pending_display.is_empty(), "pending_display drained");
assert_eq!(p.state, State::Normal, "state reset to Normal");
let out2 = p.process_chunk("hello world");
assert_eq!(out2.as_deref(), Some("hello world"));
}
#[test]
fn streaming_json_tools_object_body_unbalanced_with_outside_end_tag_closes() {
let (display, calls) = run_with_parser(Box::new(JsonTools), &["<tool_call>{</tool_call>visible"]);
assert_eq!(calls.len(), 0);
assert_eq!(display, "visible");
}
#[test]
fn streaming_json_tools_in_string_end_tag_with_incomplete_object_stays_buffered() {
let mut p = ToolCallProcessor::new(Box::new(JsonTools), None);
let out1 = p.process_chunk(r#"<tool_call>{"name":"echo","arguments":{"s":"</tool_call>"#);
assert_eq!(
out1, None,
"in-string `</tool_call>` MUST NOT close section"
);
assert_eq!(p.tool_calls.len(), 0);
let out2 = p.process_chunk(r#""}}</tool_call> done"#);
assert_eq!(out2.as_deref(), Some(" done"));
assert_eq!(p.tool_calls.len(), 1);
assert_eq!(p.tool_calls[0].name(), "echo");
assert_eq!(
*p.tool_calls[0].arguments(),
serde_json::json!({"s": "</tool_call>"}),
"in-string `</tool_call>` literal preserved verbatim"
);
}
#[test]
fn try_parse_one_call_malformed_body_in_closed_section_per_parser_audit() {
let rows: Vec<(&'static str, Box<dyn ToolParser>, String, &'static str)> = vec![
(
"json_tools (no-{ malformed body)",
Box::new(JsonTools),
"<tool_call>bad</tool_call>visible".to_owned(),
"visible",
),
(
"json_tools ({-open malformed body)",
Box::new(JsonTools),
"<tool_call>{</tool_call>visible".to_owned(),
"visible",
),
(
"pythonic (no-[ malformed body)",
Box::new(Pythonic),
"<|tool_call_start|>bad<|tool_call_end|>visible".to_owned(),
"visible",
),
(
"pythonic ([-open malformed body)",
Box::new(Pythonic),
"<|tool_call_start|>[bad<|tool_call_end|>visible".to_owned(),
"visible",
),
(
"qwen3_coder (no-<function= malformed body)",
Box::new(Qwen3Coder),
"<tool_call>bad</tool_call>visible".to_owned(),
"visible",
),
(
"qwen3_coder (<function= without close)",
Box::new(Qwen3Coder),
"<tool_call><function=f</tool_call>visible".to_owned(),
"visible",
),
(
"glm47 ({-open malformed body)",
Box::new(Glm47),
"<tool_call>{</tool_call>visible".to_owned(),
"visible",
),
(
"glm47 ([-open malformed body)",
Box::new(Glm47),
"<tool_call>[</tool_call>visible".to_owned(),
"visible",
),
(
"longcat ({-open malformed body)",
Box::new(Longcat),
"<longcat_tool_call>{</longcat_tool_call>visible".to_owned(),
"visible",
),
(
"kimi_k2 (call_begin without argument_begin)",
Box::new(KimiK2),
concat!(
"<|tool_calls_section_begin|>",
"<|tool_call_begin|>functions.f:0BAD",
"<|tool_calls_section_end|>visible",
)
.to_owned(),
"visible",
),
(
"kimi_k2 (args { without close)",
Box::new(KimiK2),
concat!(
"<|tool_calls_section_begin|>",
"<|tool_call_begin|>functions.f:0<|tool_call_argument_begin|>{",
"<|tool_calls_section_end|>visible",
)
.to_owned(),
"visible",
),
(
"minimax_m2 (<invoke without close)",
Box::new(MinimaxM2),
r#"<minimax:tool_call><invoke name="f"</minimax:tool_call>visible"#.to_owned(),
"visible",
),
(
"function_gemma (no call: marker)",
Box::new(FunctionGemma),
"<start_function_call>bad<end_function_call>visible".to_owned(),
"visible",
),
(
"function_gemma (call:NAME without {)",
Box::new(FunctionGemma),
"<start_function_call>call:f<end_function_call>visible".to_owned(),
"visible",
),
(
"function_gemma (call:f{ without close)",
Box::new(FunctionGemma),
"<start_function_call>call:f{garbage<end_function_call>visible".to_owned(),
"visible",
),
(
"gemma4 (no call: marker)",
Box::new(Gemma4),
"<|tool_call>bad<tool_call|>visible".to_owned(),
"visible",
),
(
"gemma4 (call:f{ without close)",
Box::new(Gemma4),
"<|tool_call>call:f{garbage<tool_call|>visible".to_owned(),
"visible",
),
];
for (label, parser, buffer, expect_display) in rows {
let (display, calls) = run_with_parser(parser, &[buffer.as_str()]);
assert_eq!(
calls.len(),
0,
"{}: malformed body must produce zero calls",
label,
);
assert_eq!(
display, expect_display,
"{}: same-chunk suffix must reach display verbatim",
label,
);
}
}
#[test]
fn try_parse_one_call_in_value_end_tag_stays_buffered_per_parser_audit() {
let rows: Vec<(&'static str, Box<dyn ToolParser>, &'static str)> = vec![
(
"json_tools (in-JSON-string end-tag)",
Box::new(JsonTools),
r#"<tool_call>{"s":"</tool_call>"#,
),
(
"pythonic (in-single-quote end-tag)",
Box::new(Pythonic),
"<|tool_call_start|>[echo(s='<|tool_call_end|>",
),
(
"glm47 (in-JSON-string end-tag, object body)",
Box::new(Glm47),
r#"<tool_call>{"s":"</tool_call>"#,
),
(
"glm47 (in-JSON-string end-tag, array body)",
Box::new(Glm47),
r#"<tool_call>[{"s":"</tool_call>"#,
),
(
"longcat (in-JSON-string end-tag)",
Box::new(Longcat),
r#"<longcat_tool_call>{"s":"</longcat_tool_call>"#,
),
(
"kimi_k2 (in-args-JSON-string section-end)",
Box::new(KimiK2),
concat!(
"<|tool_calls_section_begin|>",
"<|tool_call_begin|>functions.f:0<|tool_call_argument_begin|>",
r#"{"s":"<|tool_calls_section_end|>"#,
),
),
(
"qwen3_coder (in-parameter-value end-tag)",
Box::new(Qwen3Coder),
"<tool_call><function=f><parameter=p></tool_call>",
),
(
"minimax_m2 (in-parameter-value end-tag)",
Box::new(MinimaxM2),
r#"<minimax:tool_call><invoke name="f"><parameter name="p"></minimax:tool_call>"#,
),
(
"function_gemma (in-escape end-tag)",
Box::new(FunctionGemma),
"<start_function_call>call:f{k:<escape><end_function_call>",
),
(
"gemma4 (in-STR end-tag)",
Box::new(Gemma4),
r#"<|tool_call>call:f{k: <|"|><tool_call|>"#,
),
];
for (label, parser, buffer) in rows {
let result = parser
.try_parse_one_call(buffer, None)
.unwrap_or_else(|e| panic!("{}: try_parse_one_call errored: {e}", label));
assert!(
result.is_none(),
"{}: in-value end-tag literal MUST NOT close the section (got {:?})",
label,
result,
);
}
}
#[test]
fn streaming_json_tools_suffix_object_after_malformed_section_preserved() {
let (display, calls) = run_with_parser(
Box::new(JsonTools),
&[r#"<tool_call>bad</tool_call>{"name":"x","arguments":{}} tail"#],
);
assert_eq!(
calls.len(),
0,
"malformed body must not produce a call, and the suffix object must not be confused for one in the same section"
);
assert_eq!(
display, r#"{"name":"x","arguments":{}} tail"#,
"FULL suffix (object literal + tail text) survives the suffix-bait attack"
);
}
#[test]
fn streaming_json_tools_suffix_object_after_malformed_section_preserved_split_chunk() {
let (display, calls) = run_with_parser(
Box::new(JsonTools),
&[
"<tool_call>bad",
r#"</tool_call>{"name":"x","arguments":{}} tail"#,
],
);
assert_eq!(calls.len(), 0);
assert_eq!(display, r#"{"name":"x","arguments":{}} tail"#);
}
#[test]
fn streaming_pythonic_suffix_call_after_malformed_section_preserved() {
let (display, calls) = run_with_parser(
Box::new(Pythonic),
&["<|tool_call_start|>bad<|tool_call_end|>[echo(x=1)] tail"],
);
assert_eq!(calls.len(), 0);
assert_eq!(
display, "[echo(x=1)] tail",
"FULL suffix (pythonic call literal + tail) survives the suffix-bait attack"
);
}
#[test]
fn streaming_pythonic_suffix_call_after_malformed_section_preserved_split_chunk() {
let (display, calls) = run_with_parser(
Box::new(Pythonic),
&[
"<|tool_call_start|>bad",
"<|tool_call_end|>[echo(x=1)] tail",
],
);
assert_eq!(calls.len(), 0);
assert_eq!(display, "[echo(x=1)] tail");
}
#[test]
fn streaming_qwen3_coder_suffix_function_after_malformed_section_preserved() {
let (display, calls) = run_with_parser(
Box::new(Qwen3Coder),
&["<tool_call>bad</tool_call><function=f><parameter=p>v</parameter></function> tail"],
);
assert_eq!(calls.len(), 0);
assert_eq!(
display, "<function=f><parameter=p>v</parameter></function> tail",
"FULL suffix (qwen function literal + tail) survives the suffix-bait attack"
);
}
#[test]
fn streaming_qwen3_coder_suffix_function_after_malformed_section_preserved_split_chunk() {
let (display, calls) = run_with_parser(
Box::new(Qwen3Coder),
&[
"<tool_call>bad",
"</tool_call><function=f><parameter=p>v</parameter></function> tail",
],
);
assert_eq!(calls.len(), 0);
assert_eq!(
display,
"<function=f><parameter=p>v</parameter></function> tail"
);
}
#[test]
fn streaming_function_gemma_suffix_call_after_malformed_section_preserved() {
let (display, calls) = run_with_parser(
Box::new(FunctionGemma),
&["<start_function_call>bad<end_function_call>call:f{k:v} tail"],
);
assert_eq!(calls.len(), 0);
assert_eq!(
display, "call:f{k:v} tail",
"FULL suffix (function_gemma call literal + tail) survives the suffix-bait attack"
);
}
#[test]
fn streaming_function_gemma_suffix_call_after_malformed_section_preserved_split_chunk() {
let (display, calls) = run_with_parser(
Box::new(FunctionGemma),
&[
"<start_function_call>bad",
"<end_function_call>call:f{k:v} tail",
],
);
assert_eq!(calls.len(), 0);
assert_eq!(display, "call:f{k:v} tail");
}
#[test]
fn streaming_glm47_suffix_object_after_malformed_section_preserved() {
let (display, calls) = run_with_parser(
Box::new(Glm47),
&[r#"<tool_call>bad</tool_call>{"name":"y"} tail"#],
);
assert_eq!(
calls.len(),
1,
"glm47 is permissive: plain-text body `bad` becomes ToolCall(`bad`); the invariant is suffix preservation, not call rejection"
);
assert_eq!(calls[0].name(), "bad", "plain-text body parsed as name");
assert_eq!(
display, r#"{"name":"y"} tail"#,
"FULL suffix (object literal + tail) survives the suffix-bait attack — body scan must not lock onto the suffix object"
);
}
#[test]
fn streaming_longcat_suffix_object_after_malformed_section_preserved() {
let (display, calls) = run_with_parser(
Box::new(Longcat),
&[r#"<longcat_tool_call>bad</longcat_tool_call>{"name":"y"} tail"#],
);
assert_eq!(
calls.len(),
0,
"longcat rejects body `bad` (no `<longcat_arg_key>`, not JSON) → zero calls"
);
assert_eq!(
display, r#"{"name":"y"} tail"#,
"FULL suffix (object literal + tail) survives the suffix-bait attack — body scan must not lock onto the suffix object"
);
}
#[test]
fn try_parse_one_call_suffix_starting_with_parser_syntax_per_parser_audit() {
struct AuditRow {
label: &'static str,
parser: Box<dyn ToolParser>,
buffer: &'static str,
expect_display: &'static str,
expect_calls: usize,
}
let rows: Vec<AuditRow> = vec![
AuditRow {
label: "json_tools (suffix = JSON object)",
parser: Box::new(JsonTools),
buffer: r#"<tool_call>bad</tool_call>{"name":"x","arguments":{}} tail"#,
expect_display: r#"{"name":"x","arguments":{}} tail"#,
expect_calls: 0,
},
AuditRow {
label: "pythonic (suffix = [call(args)])",
parser: Box::new(Pythonic),
buffer: "<|tool_call_start|>bad<|tool_call_end|>[echo(x=1)] tail",
expect_display: "[echo(x=1)] tail",
expect_calls: 0,
},
AuditRow {
label: "qwen3_coder (suffix = <function=...>)",
parser: Box::new(Qwen3Coder),
buffer: "<tool_call>bad</tool_call><function=f><parameter=p>v</parameter></function> tail",
expect_display: "<function=f><parameter=p>v</parameter></function> tail",
expect_calls: 0,
},
AuditRow {
label: "glm47 (suffix = JSON object after non-JSON body)",
parser: Box::new(Glm47),
buffer: r#"<tool_call>bad</tool_call>{"name":"y"} tail"#,
expect_display: r#"{"name":"y"} tail"#,
expect_calls: 1,
},
AuditRow {
label: "longcat (suffix = JSON object after non-JSON body)",
parser: Box::new(Longcat),
buffer: r#"<longcat_tool_call>bad</longcat_tool_call>{"name":"y"} tail"#,
expect_display: r#"{"name":"y"} tail"#,
expect_calls: 0,
},
AuditRow {
label: "function_gemma (suffix = call:f{k:v})",
parser: Box::new(FunctionGemma),
buffer: "<start_function_call>bad<end_function_call>call:f{k:v} tail",
expect_display: "call:f{k:v} tail",
expect_calls: 0,
},
];
for row in rows {
let (display, calls) = run_with_parser(row.parser, &[row.buffer]);
assert_eq!(
calls.len(),
row.expect_calls,
"{}: call count must match parser's per-body acceptance baseline (suffix preservation changes neither)",
row.label,
);
assert_eq!(
display, row.expect_display,
"{}: FULL suffix bytes must reach display verbatim (not silently dropped, not partially parsed)",
row.label,
);
}
}
#[test]
fn try_parse_one_call_suffix_bait_end_pos_lands_at_wrapper_close_per_parser_audit() {
struct Row {
label: &'static str,
parser: Box<dyn ToolParser>,
buffer: &'static str,
expect_end_pos: usize,
expect_calls_empty: bool,
}
let rows = [
Row {
label: "json_tools",
parser: Box::new(JsonTools),
buffer: r#"<tool_call>bad</tool_call>{"name":"x","arguments":{}} tail"#,
expect_end_pos: "<tool_call>bad</tool_call>".len(),
expect_calls_empty: true,
},
Row {
label: "pythonic",
parser: Box::new(Pythonic),
buffer: "<|tool_call_start|>bad<|tool_call_end|>[echo(x=1)] tail",
expect_end_pos: "<|tool_call_start|>bad<|tool_call_end|>".len(),
expect_calls_empty: true,
},
Row {
label: "qwen3_coder",
parser: Box::new(Qwen3Coder),
buffer: "<tool_call>bad</tool_call><function=f><parameter=p>v</parameter></function> tail",
expect_end_pos: "<tool_call>bad</tool_call>".len(),
expect_calls_empty: true,
},
Row {
label: "glm47",
parser: Box::new(Glm47),
buffer: r#"<tool_call>bad</tool_call>{"name":"y"} tail"#,
expect_end_pos: "<tool_call>bad</tool_call>".len(),
expect_calls_empty: false,
},
Row {
label: "longcat",
parser: Box::new(Longcat),
buffer: r#"<longcat_tool_call>bad</longcat_tool_call>{"name":"y"} tail"#,
expect_end_pos: "<longcat_tool_call>bad</longcat_tool_call>".len(),
expect_calls_empty: true,
},
Row {
label: "function_gemma",
parser: Box::new(FunctionGemma),
buffer: "<start_function_call>bad<end_function_call>call:f{k:v} tail",
expect_end_pos: "<start_function_call>bad<end_function_call>".len(),
expect_calls_empty: true,
},
];
for row in &rows {
let result = row
.parser
.try_parse_one_call(row.buffer, None)
.unwrap_or_else(|e| panic!("{}: try_parse_one_call errored: {e}", row.label));
let (calls, end_pos) = result.unwrap_or_else(|| {
panic!(
"{}: confirmed-bounded section expected (the wrapper end-tag is in the buffer), got Ok(None) — regression: opener-search likely locked onto suffix-bait",
row.label,
)
});
assert_eq!(
end_pos, row.expect_end_pos,
"{}: end_pos must land at the FIRST wrapper close — body scan must not advance past the bound prefix",
row.label,
);
assert_eq!(
calls.is_empty(),
row.expect_calls_empty,
"{}: per-parser call-acceptance baseline for malformed body inside bounded prefix (got {:?})",
row.label,
calls,
);
}
}
#[test]
fn streaming_json_tools_orphan_quote_in_malformed_body_does_not_hide_close() {
let (display, calls) = run_with_parser(
Box::new(JsonTools),
&[r#"<tool_call>bad"</tool_call>{"name":"x"}"#],
);
assert_eq!(
calls.len(),
0,
"orphan `\"` BEFORE wrapper close must not hide the real end-tag",
);
assert_eq!(
display, r#"{"name":"x"}"#,
"FULL suffix bytes reach display — body scan must not lock onto the orphan `\"`",
);
}
#[test]
fn streaming_pythonic_orphan_quote_in_malformed_body_does_not_hide_close() {
let (display, calls) = run_with_parser(
Box::new(Pythonic),
&["<|tool_call_start|>bad'<|tool_call_end|>[echo(x=1)] tail"],
);
assert_eq!(
calls.len(),
0,
"orphan `'` BEFORE wrapper close must not hide the real end-tag",
);
assert_eq!(
display, "[echo(x=1)] tail",
"FULL suffix bytes reach display — body scan must not lock onto the orphan `'`",
);
}
#[test]
fn streaming_qwen3_coder_orphan_parameter_in_malformed_body_does_not_hide_close() {
let (display, calls) = run_with_parser(
Box::new(Qwen3Coder),
&[
"<tool_call>bad<parameter=p></tool_call><function=f><parameter=p>v</parameter></function> tail",
],
);
assert_eq!(
calls.len(),
0,
"orphan `<parameter=` BEFORE wrapper close must not hide the real end-tag",
);
assert_eq!(
display, "<function=f><parameter=p>v</parameter></function> tail",
"FULL suffix bytes reach display — body scan must not lock onto the orphan `<parameter=`",
);
}
#[test]
fn streaming_glm47_orphan_quote_in_malformed_body_does_not_hide_close() {
let (display, calls) = run_with_parser(
Box::new(Glm47),
&[r#"<tool_call>{garbage}</tool_call>{"name":"y"} tail"#],
);
assert_eq!(calls.len(), 1, "glm47 permissive parse on `{{garbage}}`");
assert_eq!(calls[0].name(), "{garbage}");
assert_eq!(
display, r#"{"name":"y"} tail"#,
"FULL suffix bytes reach display — Object arm race must close at the FIRST wrapper end-tag",
);
}
#[test]
fn streaming_glm47_orphan_bracket_in_malformed_body_does_not_hide_close() {
let (display, calls) = run_with_parser(
Box::new(Glm47),
&[r#"<tool_call>[garbage]</tool_call>{"name":"y"} tail"#],
);
assert_eq!(calls.len(), 1, "glm47 permissive parse on `[garbage]`");
assert_eq!(calls[0].name(), "[garbage]");
assert_eq!(
display, r#"{"name":"y"} tail"#,
"FULL suffix bytes reach display — Array arm race must close at the FIRST wrapper end-tag",
);
}
#[test]
fn streaming_glm47_orphan_arg_key_in_malformed_body_does_not_hide_close() {
let (display, calls) = run_with_parser(
Box::new(Glm47),
&[r#"<tool_call>bad<arg_key></tool_call>{"name":"y"} tail"#],
);
assert_eq!(calls.len(), 1, "glm47 permissive parse extracts one call");
assert_eq!(calls[0].name(), "bad");
assert_eq!(
display, r#"{"name":"y"} tail"#,
"FULL suffix bytes reach display — the None-arm arg-key race stays correct against the orphan-quote case",
);
}
#[test]
fn streaming_longcat_orphan_quote_in_malformed_body_does_not_hide_close() {
let (display, calls) = run_with_parser(
Box::new(Longcat),
&[r#"<longcat_tool_call>{garbage}</longcat_tool_call>{"name":"y"} tail"#],
);
assert_eq!(
calls.len(),
0,
"longcat strict on malformed `{{garbage}}` body"
);
assert_eq!(
display, r#"{"name":"y"} tail"#,
"FULL suffix bytes reach display — Object arm race must close at the FIRST wrapper end-tag",
);
}
#[test]
fn streaming_function_gemma_orphan_escape_in_malformed_body_does_not_hide_close() {
let (display, calls) = run_with_parser(
Box::new(FunctionGemma),
&["<start_function_call>bad<escape><end_function_call>call:f{k:v} tail"],
);
assert_eq!(
calls.len(),
0,
"orphan `<escape>` BEFORE wrapper close must not hide the real end-tag",
);
assert_eq!(
display, "call:f{k:v} tail",
"FULL suffix bytes reach display — body scan must not lock onto the orphan `<escape>`",
);
}
#[test]
fn try_parse_one_call_orphan_value_markers_per_parser_audit() {
struct Row {
label: &'static str,
parser: Box<dyn ToolParser>,
buffer: &'static str,
expect_end_pos: usize,
}
let rows = [
Row {
label: "json_tools (orphan `\"`)",
parser: Box::new(JsonTools),
buffer: r#"<tool_call>bad"</tool_call>{"name":"x"}"#,
expect_end_pos: r#"<tool_call>bad"</tool_call>"#.len(),
},
Row {
label: "pythonic (orphan `'`)",
parser: Box::new(Pythonic),
buffer: "<|tool_call_start|>bad'<|tool_call_end|>[echo(x=1)] tail",
expect_end_pos: "<|tool_call_start|>bad'<|tool_call_end|>".len(),
},
Row {
label: "qwen3_coder (orphan `<parameter=`)",
parser: Box::new(Qwen3Coder),
buffer: "<tool_call>bad<parameter=p></tool_call><function=f><parameter=p>v</parameter></function> tail",
expect_end_pos: "<tool_call>bad<parameter=p></tool_call>".len(),
},
Row {
label: "glm47 None arm (orphan `<arg_key>`)",
parser: Box::new(Glm47),
buffer: r#"<tool_call>bad<arg_key></tool_call>{"name":"y"} tail"#,
expect_end_pos: r#"<tool_call>bad<arg_key></tool_call>"#.len(),
},
Row {
label: "longcat Object arm (malformed `{`-leading)",
parser: Box::new(Longcat),
buffer: r#"<longcat_tool_call>{garbage}</longcat_tool_call>{"name":"y"} tail"#,
expect_end_pos: r#"<longcat_tool_call>{garbage}</longcat_tool_call>"#.len(),
},
Row {
label: "function_gemma (orphan `<escape>`)",
parser: Box::new(FunctionGemma),
buffer: "<start_function_call>bad<escape><end_function_call>call:f{k:v} tail",
expect_end_pos: "<start_function_call>bad<escape><end_function_call>".len(),
},
];
for row in &rows {
let result = row
.parser
.try_parse_one_call(row.buffer, None)
.unwrap_or_else(|e| panic!("{}: try_parse_one_call errored: {e}", row.label));
let (_, end_pos) = result.unwrap_or_else(|| {
panic!(
"{}: confirmed-bounded section expected (the wrapper end-tag is in the buffer), got Ok(None) — regression: orphan value marker hid the real wrapper close",
row.label,
)
});
assert_eq!(
end_pos, row.expect_end_pos,
"{}: end_pos must land at the FIRST wrapper close — orphan value marker must not bias the body scan",
row.label,
);
}
}
#[test]
fn streaming_json_tools_stray_open_brace_in_malformed_body_does_not_hide_close() {
let (display, calls) = run_with_parser(
Box::new(JsonTools),
&[r#"<tool_call>bad{"</tool_call>{"name":"x"}"#],
);
assert_eq!(
calls.len(),
0,
"stray `{{` in malformed body must not unlock JSON-quote-aware scan",
);
assert_eq!(
display, r#"{"name":"x"}"#,
"FULL suffix bytes reach display — context predicate requires `{{` as LEADING shape, not any-position match",
);
}
#[test]
fn streaming_pythonic_stray_open_bracket_in_malformed_body_does_not_hide_close() {
let (display, calls) = run_with_parser(
Box::new(Pythonic),
&["<|tool_call_start|>bad['<|tool_call_end|>[name(x=1)] tail"],
);
assert_eq!(
calls.len(),
0,
"stray `[` (without `[name(` shape) in malformed body must not unlock Python-quote-aware scan",
);
assert_eq!(
display, "[name(x=1)] tail",
"FULL suffix bytes reach display — context predicate requires `[name(` SHAPE, not just any `[`",
);
}
#[test]
fn streaming_qwen3_coder_stray_function_open_in_malformed_body_does_not_hide_close() {
let (display, calls) = run_with_parser(
Box::new(Qwen3Coder),
&[
"<tool_call>bad<function= <parameter=p></tool_call><function=f><parameter=p>v</parameter></function> tail",
],
);
assert_eq!(
calls.len(),
0,
"stray `<function=` (without `NAME>` close) in malformed body must not unlock parameter-value-aware scan",
);
assert_eq!(
display, "<function=f><parameter=p>v</parameter></function> tail",
"FULL suffix bytes reach display — context predicate requires `<function=NAME>` SHAPE, not just `<function=` literal",
);
}
#[test]
fn streaming_function_gemma_stray_call_in_malformed_body_does_not_hide_close() {
let (display, calls) = run_with_parser(
Box::new(FunctionGemma),
&["<start_function_call>bad call:<escape><end_function_call>call:f{k:v} tail"],
);
assert_eq!(
calls.len(),
0,
"stray `call:` (without `NAME{{` shape) in malformed body must not unlock escape-region-aware scan",
);
assert_eq!(
display, "call:f{k:v} tail",
"FULL suffix bytes reach display — context predicate requires `call:NAME{{` SHAPE, not just `call:` literal",
);
}
#[test]
fn try_parse_one_call_stray_opener_per_parser_audit() {
struct Row {
label: &'static str,
parser: Box<dyn ToolParser>,
buffer: &'static str,
expect_end_pos: usize,
}
let rows = [
Row {
label: "json_tools (stray `{` + orphan `\"`)",
parser: Box::new(JsonTools),
buffer: r#"<tool_call>bad{"</tool_call>{"name":"x"}"#,
expect_end_pos: r#"<tool_call>bad{"</tool_call>"#.len(),
},
Row {
label: "pythonic (stray `[` + orphan `'`)",
parser: Box::new(Pythonic),
buffer: "<|tool_call_start|>bad['<|tool_call_end|>[name(x=1)] tail",
expect_end_pos: "<|tool_call_start|>bad['<|tool_call_end|>".len(),
},
Row {
label: "qwen3_coder (stray `<function=` + orphan `<parameter=`)",
parser: Box::new(Qwen3Coder),
buffer: "<tool_call>bad<function= <parameter=p></tool_call><function=f><parameter=p>v</parameter></function> tail",
expect_end_pos: "<tool_call>bad<function= <parameter=p></tool_call>".len(),
},
Row {
label: "function_gemma (stray `call:` + orphan `<escape>`)",
parser: Box::new(FunctionGemma),
buffer: "<start_function_call>bad call:<escape><end_function_call>call:f{k:v} tail",
expect_end_pos: "<start_function_call>bad call:<escape><end_function_call>".len(),
},
Row {
label: "glm47 None arm (stray `<arg_value>` without `<arg_key>`)",
parser: Box::new(Glm47),
buffer: r#"<tool_call>bad<arg_value></tool_call>{"name":"y"} tail"#,
expect_end_pos: r#"<tool_call>bad<arg_value></tool_call>"#.len(),
},
];
for row in &rows {
let result = row
.parser
.try_parse_one_call(row.buffer, None)
.unwrap_or_else(|e| panic!("{}: try_parse_one_call errored: {e}", row.label));
let (_, end_pos) = result.unwrap_or_else(|| {
panic!(
"{}: confirmed-bounded section expected (the wrapper end-tag is in the buffer), got Ok(None) — regression: stray opener literal unlocked syntax-aware scan and orphan marker hid the wrapper close",
row.label,
)
});
assert_eq!(
end_pos, row.expect_end_pos,
"{}: end_pos must land at the FIRST wrapper close — stray opener literal must not satisfy the structural context predicate",
row.label,
);
}
}
#[test]
fn streaming_pythonic_digit_leading_name_with_in_string_end_marker_does_not_drop_call() {
let (display, calls) = run_with_parser(
Box::new(Pythonic),
&["<|tool_call_start|>[1tool(s='<|tool_call_end|>')]<|tool_call_end|> tail"],
);
assert_eq!(
calls.len(),
1,
"digit-leading pythonic name MUST be accepted by the shared recognizer",
);
assert_eq!(calls[0].name(), "1tool");
assert_eq!(
*calls[0].arguments(),
serde_json::json!({ "s": "<|tool_call_end|>" }),
"in-single-quoted-string `<|tool_call_end|>` literal MUST survive the quote-aware scan when the recognizer accepts the digit-leading name",
);
assert_eq!(
display, " tail",
"FULL suffix (just the ` tail` past the SECOND wrapper end-tag) reaches display — the in-string end-marker MUST NOT be treated as the wrapper close",
);
}
#[test]
fn streaming_pythonic_stray_open_bracket_with_whitespace_in_malformed_body_does_not_hide_close() {
let (display, calls) = run_with_parser(
Box::new(Pythonic),
&["<|tool_call_start|>bad[name (<|tool_call_end|>[real(x=1)] tail"],
);
assert_eq!(
calls.len(),
0,
"stray `[name (` (whitespace before `(`) MUST NOT context-prove pythonic — predicate must match the parser's no-whitespace recognizer",
);
assert_eq!(
display, "[real(x=1)] tail",
"FULL suffix bytes reach display — the wrapper close MUST be hit at the FIRST end-tag when the predicate correctly rejects the whitespace-bearing opener",
);
}
#[test]
fn streaming_function_gemma_stray_call_with_whitespace_in_malformed_body_does_not_hide_close() {
let (display, calls) = run_with_parser(
Box::new(FunctionGemma),
&["<start_function_call>bad call:f {<escape><end_function_call>call:f{k:v} tail"],
);
assert_eq!(
calls.len(),
0,
"stray `call:f {{` (whitespace before `{{`) MUST NOT context-prove function_gemma — predicate must match the parser's no-whitespace recognizer",
);
assert_eq!(
display, "call:f{k:v} tail",
"FULL suffix bytes reach display — the wrapper close MUST be hit at the FIRST end-tag when the predicate correctly rejects the whitespace-bearing opener",
);
}
#[test]
fn streaming_qwen3_coder_dotted_name_with_in_parameter_end_marker_does_not_drop_call() {
let (display, calls) = run_with_parser(
Box::new(Qwen3Coder),
&[
"<tool_call><function=foo.bar><parameter=p>contains </tool_call> bytes</parameter></function></tool_call> tail",
],
);
assert_eq!(
calls.len(),
1,
"dotted-name qwen3_coder body MUST be accepted by the shared recognizer",
);
assert_eq!(calls[0].name(), "foo.bar");
assert_eq!(
*calls[0].arguments(),
serde_json::json!({ "p": "contains </tool_call> bytes" }),
"in-parameter `</tool_call>` literal MUST survive the parameter-value-aware scan when the recognizer accepts the dotted name",
);
assert_eq!(
display, " tail",
"FULL suffix (just the ` tail` past the REAL wrapper end-tag) reaches display — the in-parameter end-marker MUST NOT be treated as the wrapper close",
);
}
#[test]
fn streaming_qwen3_coder_spaced_name_with_in_parameter_end_marker_does_not_drop_call() {
let (display, calls) = run_with_parser(
Box::new(Qwen3Coder),
&[
"<tool_call><function=foo bar><parameter=p>has </tool_call> in value</parameter></function></tool_call> tail",
],
);
assert_eq!(
calls.len(),
1,
"space-bearing qwen3_coder name MUST be accepted by the shared recognizer",
);
assert_eq!(calls[0].name(), "foo bar");
assert_eq!(
*calls[0].arguments(),
serde_json::json!({ "p": "has </tool_call> in value" }),
"in-parameter `</tool_call>` literal MUST survive the parameter-value-aware scan when the recognizer accepts the spaced name",
);
assert_eq!(
display, " tail",
"FULL suffix bytes reach display — the in-parameter end-marker MUST NOT be treated as the wrapper close when the recognizer accepts the spaced name",
);
}
#[test]
fn streaming_qwen3_coder_malformed_outer_opener_with_nested_valid_does_not_extract_nested_as_call()
{
let (display, calls) = run_with_parser(
Box::new(Qwen3Coder),
&[
"<tool_call><function=a<function=real><parameter=p>v</parameter></function></tool_call> tail",
],
);
assert_eq!(
calls.len(),
0,
"malformed outer `<function=a<...>` opener MUST NOT be bypassed by scanning past to a nested `<function=real>` opener (the first `<function=` literal IS the section's structural anchor — if it is malformed the section as a whole is malformed)",
);
assert_eq!(
display, " tail",
"FULL same-chunk suffix bytes reach display — terminal-on-first-marker MUST reject the section without emitting a nested-marker call",
);
}
#[test]
fn streaming_qwen3_coder_empty_name_opener_with_nested_valid_does_not_extract_nested() {
let (display, calls) = run_with_parser(
Box::new(Qwen3Coder),
&[
"<tool_call><function=><function=real><parameter=p>v</parameter></function></tool_call> tail",
],
);
assert_eq!(
calls.len(),
0,
"malformed outer `<function=>` (empty name) opener MUST NOT be bypassed by scanning past to a nested `<function=real>` opener",
);
assert_eq!(
display, " tail",
"FULL same-chunk suffix bytes reach display — terminal-on-first-marker MUST reject the section without emitting a nested-marker call",
);
}
#[test]
fn try_parse_one_call_context_predicate_matches_recognizer_per_parser() {
struct Row {
label: &'static str,
parser: Box<dyn ToolParser>,
buffer: &'static str,
should_extract: bool,
}
let rows: Vec<Row> = vec![
Row {
label: "pythonic accept: digit-leading name `[1tool(x=1)]`",
parser: Box::new(Pythonic),
buffer: "<|tool_call_start|>[1tool(x=1)]<|tool_call_end|>",
should_extract: true,
},
Row {
label: "pythonic accept: underscore-leading name `[_tool(x=1)]`",
parser: Box::new(Pythonic),
buffer: "<|tool_call_start|>[_tool(x=1)]<|tool_call_end|>",
should_extract: true,
},
Row {
label: "pythonic reject: whitespace before name `[ tool(x=1)]`",
parser: Box::new(Pythonic),
buffer: "<|tool_call_start|>[ tool(x=1)]<|tool_call_end|>",
should_extract: false,
},
Row {
label: "pythonic reject: whitespace before `(` `[tool (x=1)]`",
parser: Box::new(Pythonic),
buffer: "<|tool_call_start|>[tool (x=1)]<|tool_call_end|>",
should_extract: false,
},
Row {
label: "pythonic reject: empty name `[(x=1)]`",
parser: Box::new(Pythonic),
buffer: "<|tool_call_start|>[(x=1)]<|tool_call_end|>",
should_extract: false,
},
Row {
label: "pythonic reject: stray `[` only `bad[`",
parser: Box::new(Pythonic),
buffer: "<|tool_call_start|>bad[<|tool_call_end|>",
should_extract: false,
},
Row {
label: "function_gemma accept: `call:foo{k:v}`",
parser: Box::new(FunctionGemma),
buffer: "<start_function_call>call:foo{k:v}<end_function_call>",
should_extract: true,
},
Row {
label: "function_gemma accept: digit-leading `call:1foo{k:v}`",
parser: Box::new(FunctionGemma),
buffer: "<start_function_call>call:1foo{k:v}<end_function_call>",
should_extract: true,
},
Row {
label: "function_gemma accept: hyphen `call:foo-bar{k:v}`",
parser: Box::new(FunctionGemma),
buffer: "<start_function_call>call:foo-bar{k:v}<end_function_call>",
should_extract: true,
},
Row {
label: "function_gemma reject: whitespace before `{` `call:foo {k:v}`",
parser: Box::new(FunctionGemma),
buffer: "<start_function_call>call:foo {k:v}<end_function_call>",
should_extract: false,
},
Row {
label: "function_gemma reject: empty name `call:{k:v}`",
parser: Box::new(FunctionGemma),
buffer: "<start_function_call>call:{k:v}<end_function_call>",
should_extract: false,
},
Row {
label: "function_gemma reject: stray `call:` only `bad call:`",
parser: Box::new(FunctionGemma),
buffer: "<start_function_call>bad call:<end_function_call>",
should_extract: false,
},
Row {
label: "json_tools accept: leading `{`",
parser: Box::new(JsonTools),
buffer: r#"<tool_call>{"name":"x","arguments":{}}</tool_call>"#,
should_extract: true,
},
Row {
label: "json_tools reject: stray `{` after garbage `bad{`",
parser: Box::new(JsonTools),
buffer: r#"<tool_call>bad{"name":"x"}</tool_call>"#,
should_extract: false,
},
Row {
label: "qwen3_coder accept: `<function=foo></function>`",
parser: Box::new(Qwen3Coder),
buffer: "<tool_call><function=foo></function></tool_call>",
should_extract: true,
},
Row {
label: "qwen3_coder accept: dotted name `<function=foo.bar></function>`",
parser: Box::new(Qwen3Coder),
buffer: "<tool_call><function=foo.bar></function></tool_call>",
should_extract: true,
},
Row {
label: "qwen3_coder accept: spaced name `<function=foo bar></function>`",
parser: Box::new(Qwen3Coder),
buffer: "<tool_call><function=foo bar></function></tool_call>",
should_extract: true,
},
Row {
label: "qwen3_coder accept: special-char name `<function=ns:method/v2></function>`",
parser: Box::new(Qwen3Coder),
buffer: "<tool_call><function=ns:method/v2></function></tool_call>",
should_extract: true,
},
Row {
label: "qwen3_coder reject: `<function=` without `NAME>` close",
parser: Box::new(Qwen3Coder),
buffer: "<tool_call>bad<function= </tool_call>",
should_extract: false,
},
Row {
label: "qwen3_coder reject: empty name `<function=></function>`",
parser: Box::new(Qwen3Coder),
buffer: "<tool_call><function=></function></tool_call>",
should_extract: false,
},
Row {
label: "qwen3_coder reject: name with `<` `<function=a<b></function>`",
parser: Box::new(Qwen3Coder),
buffer: "<tool_call><function=a<b></function></tool_call>",
should_extract: false,
},
Row {
label: "qwen3_coder reject: malformed outer + nested valid `<function=a<function=real></parameter></function>`",
parser: Box::new(Qwen3Coder),
buffer: "<tool_call><function=a<function=real><parameter=p>v</parameter></function></tool_call>",
should_extract: false,
},
Row {
label: "qwen3_coder reject: empty-name outer + nested valid `<function=><function=real></parameter></function>`",
parser: Box::new(Qwen3Coder),
buffer: "<tool_call><function=><function=real><parameter=p>v</parameter></function></tool_call>",
should_extract: false,
},
Row {
label: "function_gemma reject: stray `call:` + orphan `<escape>`",
parser: Box::new(FunctionGemma),
buffer: "<start_function_call>bad call:<escape><end_function_call>",
should_extract: false,
},
];
for row in &rows {
let result = row
.parser
.try_parse_one_call(row.buffer, None)
.unwrap_or_else(|e| panic!("{}: try_parse_one_call errored: {e}", row.label));
let (calls, _end_pos) = result.unwrap_or_else(|| {
panic!(
"{}: confirmed-bounded section expected (the wrapper end-tag is in the buffer), got Ok(None) — predicate/recognizer drift hid the wrapper close",
row.label,
)
});
if row.should_extract {
assert!(
!calls.is_empty(),
"{}: the predicate must ACCEPT this body shape (the parser's recognizer accepts it); got zero calls — predicate is STRICTER than the parser body (false-negative drift)",
row.label,
);
} else {
assert!(
calls.is_empty(),
"{}: the predicate must REJECT this body shape (the parser's recognizer rejects it); got {} call(s) — predicate is LOOSER than the parser body (false-positive drift)",
row.label,
calls.len(),
);
}
}
}
#[test]
fn literal_eval_json_first() {
assert_eq!(literal_eval("42"), serde_json::json!(42));
assert_eq!(literal_eval("3.5"), serde_json::json!(3.5));
assert_eq!(literal_eval(r#"{"a":1}"#), serde_json::json!({"a": 1}));
assert_eq!(literal_eval("[1,2]"), serde_json::json!([1, 2]));
assert_eq!(literal_eval(" 7 "), serde_json::json!(7));
}
#[test]
fn literal_eval_python_bool_none() {
assert_eq!(literal_eval("True"), Value::Bool(true));
assert_eq!(literal_eval("False"), Value::Bool(false));
assert_eq!(literal_eval("None"), Value::Null);
assert_eq!(literal_eval("true"), Value::Bool(true));
assert_eq!(literal_eval("null"), Value::Null);
}
#[test]
fn literal_eval_single_and_double_quoted_strings() {
assert_eq!(literal_eval("'hello'"), Value::String("hello".to_owned()));
assert_eq!(literal_eval(r#""hi""#), Value::String("hi".to_owned()));
assert_eq!(literal_eval("''"), Value::String(String::new()));
}
#[test]
fn literal_eval_bare_int_and_float_non_json() {
assert_eq!(literal_eval("+5"), serde_json::json!(5));
assert_eq!(literal_eval("+1.5"), serde_json::json!(1.5));
assert_eq!(literal_eval("007"), serde_json::json!(7));
}
#[test]
fn literal_eval_python_container_quote_swap() {
assert_eq!(literal_eval("['a', 'b']"), serde_json::json!(["a", "b"]));
assert_eq!(literal_eval("(1, 2)"), serde_json::json!([1, 2]));
assert_eq!(
literal_eval("{'ok': True, 'x': None}"),
serde_json::json!({"ok": true, "x": null})
);
}
#[test]
fn literal_eval_unparseable_falls_back_to_string() {
assert_eq!(literal_eval("foo bar"), Value::String("foo bar".to_owned()));
assert_eq!(literal_eval("[oops"), Value::String("[oops".to_owned()));
}
#[test]
fn deserialize_json_then_literal() {
assert_eq!(deserialize(r#"{"a":1}"#), serde_json::json!({"a": 1}));
assert_eq!(deserialize("'x'"), Value::String("x".to_owned()));
assert_eq!(deserialize("plain"), Value::String("plain".to_owned()));
}
fn tools_with_param_type(func: &str, param: &str, ty: &str) -> Value {
serde_json::json!([{
"function": {
"name": func,
"parameters": { "properties": { param: { "type": ty } } }
}
}])
}
#[test]
fn convert_param_value_no_props_is_string() {
assert_eq!(
convert_param_value("5", "n", None),
Value::String("5".to_owned())
);
}
#[test]
fn convert_param_value_null_literal() {
assert_eq!(convert_param_value("NULL", "x", None), Value::Null);
assert_eq!(convert_param_value("null", "x", None), Value::Null);
}
#[test]
fn convert_param_value_missing_schema_for_name() {
let props = serde_json::json!({ "other": { "type": "string" } });
let map = props.as_object().unwrap();
assert_eq!(
convert_param_value("v", "missing", Some(map)),
Value::String("v".to_owned())
);
}
#[test]
fn convert_param_value_typed_branches() {
fn props_for(tools: &Value, func: &str) -> serde_json::Map<String, Value> {
tools
.as_array()
.unwrap()
.iter()
.find(|t| t["function"]["name"] == func)
.unwrap()["function"]["parameters"]["properties"]
.as_object()
.unwrap()
.clone()
}
let t = tools_with_param_type("f", "p", "string");
let p = props_for(&t, "f");
assert_eq!(
convert_param_value("hello", "p", Some(&p)),
Value::String("hello".to_owned())
);
let t = tools_with_param_type("f", "p", "int");
let p = props_for(&t, "f");
assert_eq!(
convert_param_value("12", "p", Some(&p)),
serde_json::json!(12)
);
assert_eq!(
convert_param_value("nope", "p", Some(&p)),
Value::String("nope".to_owned())
);
let t = tools_with_param_type("f", "p", "number");
let p = props_for(&t, "f");
assert_eq!(
convert_param_value("2.5", "p", Some(&p)),
serde_json::json!(2.5)
);
assert_eq!(
convert_param_value("NaN", "p", Some(&p)),
Value::String("NaN".to_owned())
);
let t = tools_with_param_type("f", "p", "boolean");
let p = props_for(&t, "f");
assert_eq!(
convert_param_value("TRUE", "p", Some(&p)),
Value::Bool(true)
);
assert_eq!(convert_param_value("no", "p", Some(&p)), Value::Bool(false));
let t = tools_with_param_type("f", "p", "object");
let p = props_for(&t, "f");
assert_eq!(
convert_param_value(r#"{"k":1}"#, "p", Some(&p)),
serde_json::json!({"k": 1})
);
assert_eq!(
convert_param_value("plain", "p", Some(&p)),
Value::String("plain".to_owned())
);
let t = tools_with_param_type("f", "p", "weird");
let p = props_for(&t, "f");
assert_eq!(
convert_param_value("True", "p", Some(&p)),
Value::Bool(true)
);
}
#[test]
fn schema_types_variants() {
assert_eq!(
schema_types(&serde_json::json!({"type": "integer"})),
vec!["integer".to_owned()]
);
assert_eq!(
schema_types(&serde_json::json!({"type": ["string", "null"]})),
vec!["null".to_owned(), "string".to_owned()]
);
assert_eq!(
schema_types(&serde_json::json!({"enum": [1, "a", true, null]})),
vec![
"boolean".to_owned(),
"integer".to_owned(),
"null".to_owned(),
"string".to_owned()
]
);
assert_eq!(
schema_types(&serde_json::json!({
"anyOf": [{"type": "integer"}, {"type": "boolean"}]
})),
vec!["boolean".to_owned(), "integer".to_owned()]
);
assert_eq!(
schema_types(&serde_json::json!({})),
vec!["string".to_owned()]
);
assert_eq!(
schema_types(&serde_json::json!(123)),
vec!["string".to_owned()]
);
}
#[test]
fn convert_with_types_branches() {
assert_eq!(
convert_with_types("null", &["integer".to_owned()]),
Value::Null
);
assert_eq!(
convert_with_types("nil", &["string".to_owned()]),
Value::Null
);
assert_eq!(convert_with_types("x", &["null".to_owned()]), Value::Null);
assert_eq!(
convert_with_types("7", &["integer".to_owned()]),
serde_json::json!(7)
);
assert_eq!(
convert_with_types("2.0", &["number".to_owned()]),
serde_json::json!(2.0)
);
assert_eq!(
convert_with_types("yes", &["boolean".to_owned()]),
Value::Bool(true)
);
assert_eq!(
convert_with_types("off", &["boolean".to_owned()]),
Value::Bool(false)
);
assert_eq!(
convert_with_types("[1,2]", &["array".to_owned()]),
serde_json::json!([1, 2])
);
assert_eq!(
convert_with_types("42", &["string".to_owned(), "integer".to_owned()]),
serde_json::json!(42),
"integer is visited before string in the priority order"
);
assert_eq!(
convert_with_types("xyz", &["string".to_owned(), "integer".to_owned()]),
Value::String("xyz".to_owned()),
"integer parse fails → falls through to the string branch"
);
assert_eq!(
convert_with_types("bare", &["unknown_type".to_owned()]),
Value::String("bare".to_owned())
);
assert_eq!(
convert_with_types("99", &["unknown_type".to_owned()]),
serde_json::json!(99),
"no recognised type → final serde fallback parses the JSON number"
);
}
#[test]
fn extract_name_quote_stripping() {
assert_eq!(extract_name(r#""foo""#), "foo");
assert_eq!(extract_name("'bar'"), "bar");
assert_eq!(extract_name(" baz "), "baz", "trimmed, no quotes");
assert_eq!(extract_name(r#""x"#), r#""x"#);
}
#[test]
fn find_pythonic_call_extracts_first() {
assert_eq!(
find_pythonic_call("[echo(a=1, b=2)]"),
Some(("echo".to_owned(), "a=1, b=2".to_owned()))
);
assert_eq!(
find_pythonic_call("noise [f()] tail"),
Some(("f".to_owned(), String::new()))
);
assert_eq!(find_pythonic_call("plain text"), None);
assert_eq!(find_pythonic_call("[ not_a_call ]"), None);
}
#[test]
fn parse_kw_args_quoted_unquoted_and_unterminated() {
assert_eq!(
parse_kw_args(r#"a="x", b=2, c=hello"#).unwrap(),
vec![
("a".to_owned(), "x".to_owned()),
("b".to_owned(), "2".to_owned()),
("c".to_owned(), "hello".to_owned()),
]
);
assert!(parse_kw_args("").unwrap().is_empty());
assert!(parse_kw_args("garbage").unwrap().is_empty());
assert_eq!(
parse_kw_args(r#"a="unterminated"#).unwrap(),
vec![("a".to_owned(), "unterminated".to_owned())]
);
assert_eq!(
parse_kw_args(r#"city= "é""#).unwrap(),
vec![("city".to_owned(), "é".to_owned())]
);
}
#[test]
fn find_all_basic_and_unclosed() {
assert_eq!(
find_all("<a>1</a><a>2</a>", "<a>", "</a>"),
vec!["1".to_owned(), "2".to_owned()]
);
assert_eq!(
find_all("<a>1</a><a>dangling", "<a>", "</a>"),
vec!["1".to_owned()]
);
assert!(find_all("nothing here", "<a>", "</a>").is_empty());
}
#[test]
fn find_kv_pairs_basic_and_truncated() {
assert_eq!(
find_kv_pairs(
"<k>a</k><v>1</v><k>b</k><v>2</v>",
"<k>",
"</k>",
"<v>",
"</v>"
),
vec![
("a".to_owned(), "1".to_owned()),
("b".to_owned(), "2".to_owned())
]
);
assert_eq!(
find_kv_pairs("<k>a</k>no value", "<k>", "</k>", "<v>", "</v>"),
Vec::<(String, String)>::new()
);
}
#[test]
fn glm_parse_json_shapes() {
assert_eq!(
glm_parse_json(r#"{"name":"f","arguments":{"x":1}}"#, None),
Some(ToolCall::new_nameless_id("f", serde_json::json!({"x": 1})))
);
assert_eq!(
glm_parse_json(r#"{"function":"g","arguments":{}}"#, None),
Some(ToolCall::new_nameless_id("g", serde_json::json!({})))
);
assert_eq!(
glm_parse_json(r#"{"tool":{"name":"h","arguments":{"a":2}}}"#, None),
Some(ToolCall::new_nameless_id("h", serde_json::json!({"a": 2})))
);
assert_eq!(
glm_parse_json(r#"[{"name":"arr","arguments":{}}]"#, None),
Some(ToolCall::new_nameless_id("arr", serde_json::json!({})))
);
assert_eq!(glm_parse_json(r#"{"arguments":{}}"#, None), None);
assert_eq!(
glm_parse_json(r#"{"tool":{"name":"k"}}"#, None),
Some(ToolCall::new_nameless_id("k", serde_json::json!({})))
);
assert_eq!(
glm_parse_json(r#"{"name":"k","arguments":"{\"z\":3}"}"#, None),
Some(ToolCall::new_nameless_id("k", serde_json::json!({"z": 3})))
);
assert_eq!(glm_parse_json(r#"{"name":"k","arguments":5}"#, None), None);
assert_eq!(glm_parse_json("not json", None), None);
}
#[test]
fn glm_parse_plain_shapes() {
assert_eq!(glm_parse_plain(" ", None), None);
assert_eq!(
glm_parse_plain("myfunc\n{\"a\":1}", None),
Some(ToolCall::new_nameless_id(
"myfunc",
serde_json::json!({"a": 1})
))
);
assert_eq!(
glm_parse_plain("solo", None),
Some(ToolCall::new_nameless_id("solo", serde_json::json!({})))
);
assert_eq!(
glm_parse_plain(r#"f {"k":2}"#, None),
Some(ToolCall::new_nameless_id("f", serde_json::json!({"k": 2})))
);
assert_eq!(
glm_parse_plain("f a=1 b=2", None),
Some(ToolCall::new_nameless_id(
"f",
serde_json::json!({"a": 1, "b": 2})
))
);
assert_eq!(
glm_parse_plain("f a=1 bad", None),
Some(ToolCall::new_nameless_id(
"f",
serde_json::json!({"raw": "a=1 bad"})
))
);
}
#[test]
fn gemma_call_first_call_and_rejects() {
assert_eq!(
gemma_call("call:foo{a:1}", false),
Some(("foo".to_owned(), "a:1".to_owned()))
);
assert_eq!(gemma_call("call:foo bar", false), None);
assert_eq!(gemma_call("nothing", false), None);
}
#[test]
fn gemma4_calls_walks_blocks() {
assert_eq!(
gemma4_calls("call:a{x:1}call:b{y:2}"),
vec![
("a".to_owned(), "{x:1}".to_owned()),
("b".to_owned(), "{y:2}".to_owned())
]
);
assert_eq!(
gemma4_calls("call:skip then call:c{z:3}"),
vec![("c".to_owned(), "{z:3}".to_owned())]
);
assert!(gemma4_calls("plain").is_empty());
}
#[test]
fn gemma4_args_to_json_quotes_keys_and_restores_strings() {
let json = gemma4_args_to_json(r#"{k: <|"|>v}x<|"|>,n: 2}"#);
let parsed: Value = serde_json::from_str(&json).expect("valid JSON output");
assert_eq!(parsed, serde_json::json!({"k": "v}x", "n": 2}));
let json2 = gemma4_args_to_json(r#"{s: <|"|>a}b<|"|>}"#);
let parsed2: Value = serde_json::from_str(&json2).expect("valid JSON output");
assert_eq!(parsed2, serde_json::json!({"s": "a}b"}));
}
#[test]
fn balanced_brace_end_and_utf8_width() {
assert_eq!(balanced_brace_end("{}"), Some(1));
assert_eq!(balanced_brace_end(r#"{a:{b:1}}"#), Some(8));
assert_eq!(
balanced_brace_end(r#"{s:<|"|>}<|"|>}"#),
Some(14),
"the `}}` inside the <|\"|> literal is skipped; the real close is last"
);
assert_eq!(balanced_brace_end("x{}"), None);
assert_eq!(balanced_brace_end("{a:1"), None);
assert_eq!(utf8_char_width(b'a'), 1);
assert_eq!(utf8_char_width(0xC3), 2);
assert_eq!(utf8_char_width(0xE2), 3);
assert_eq!(utf8_char_width(0xF0), 4);
assert_eq!(
utf8_char_width(0x80),
1,
"stray continuation byte advances 1"
);
}
#[test]
fn pythonic_call_close_variants() {
let s = "[f(a=1)]";
assert_eq!(pythonic_call_close(s), Some(s.len()));
assert_eq!(pythonic_call_close("plain"), None);
let q = "[echo(s=')]')]";
assert_eq!(pythonic_call_close(q), Some(q.len()));
let rescan = "[bad][g(x=1)]";
assert_eq!(pythonic_call_close(rescan), Some(rescan.len()));
assert_eq!(pythonic_call_close("[just text]"), None);
}
#[test]
fn qwen_function_open_at_boundaries() {
assert_eq!(qwen_function_open_at("<fun", 0), None);
assert_eq!(qwen_function_open_at("xxfunction=f>", 0), None);
assert_eq!(qwen_function_open_at("<function=foo>", 0), Some((10, 14)));
assert_eq!(qwen_function_open_at("<function=>", 0), None);
assert_eq!(qwen_function_open_at("<function=foo", 0), None);
}
#[test]
fn xml_value_aware_end_tag_scan_branches() {
assert_eq!(
xml_value_aware_end_tag_scan("END rest", "<v>", "</v>", "END"),
Some(0)
);
let s = "<v>inner END</v>END";
let expected = s.find("</v>").unwrap() + "</v>".len();
assert_eq!(
xml_value_aware_end_tag_scan(s, "<v>", "</v>", "END"),
Some(expected)
);
assert_eq!(
xml_value_aware_end_tag_scan("<v>unterminated END", "<v>", "</v>", "END"),
None
);
assert_eq!(
xml_value_aware_end_tag_scan("plain bytes", "<v>", "</v>", "END"),
None
);
}
#[test]
fn balanced_json_array_prefix_extra_close_and_leading_skip() {
assert_eq!(balanced_json_array_prefix("[]]"), Some((0, 2)));
assert_eq!(balanced_json_array_prefix("][1]"), Some((1, 4)));
}
#[test]
fn balanced_json_object_prefix_extra_close_is_suffix() {
assert_eq!(balanced_json_object_prefix("{}}"), Some((0, 2)));
}
#[test]
fn parse_json_tools_batch() {
let out = JsonTools
.parse(r#"{"name":"get_time","arguments":{"tz":"UTC"}}"#, None)
.unwrap();
assert_eq!(
out,
vec![ToolCall::new_nameless_id(
"get_time",
serde_json::json!({"tz": "UTC"})
)]
);
let e = JsonTools.parse(r#"{"arguments":{}}"#, None).unwrap_err();
assert!(
matches!(e, Error::Tokenizer(_)),
"missing-name is a Tokenizer error"
);
assert!(matches!(
JsonTools.parse("not json", None).unwrap_err(),
Error::Tokenizer(_)
));
assert_eq!(
JsonTools.parse(r#"{"name":"f"}"#, None).unwrap(),
vec![ToolCall::new_nameless_id("f", Value::Null)]
);
}
#[test]
fn parse_pythonic_batch() {
let out = Pythonic
.parse(r#"[echo(a="x", b=2, c=True)]"#, None)
.unwrap();
assert_eq!(
out,
vec![ToolCall::new_nameless_id(
"echo",
serde_json::json!({"a": "x", "b": 2, "c": true})
)]
);
assert!(matches!(
Pythonic.parse("nope", None).unwrap_err(),
Error::Tokenizer(_)
));
}
#[test]
fn parse_mistral_batch_and_errors() {
let out = Mistral
.parse(r#"get_weather[ARGS]{"city":"Tokyo"}"#, None)
.unwrap();
assert_eq!(
out,
vec![ToolCall::new_nameless_id(
"get_weather",
serde_json::json!({"city": "Tokyo"})
)]
);
assert!(matches!(
Mistral.parse("nothing", None).unwrap_err(),
Error::Tokenizer(_)
));
assert!(matches!(
Mistral.parse("f[ARGS] no-json", None).unwrap_err(),
Error::Tokenizer(_)
));
assert!(matches!(
Mistral.parse("f[ARGS]{bad}", None).unwrap_err(),
Error::Tokenizer(_)
));
}
#[test]
fn parse_qwen3_coder_batch_with_types() {
let tools = serde_json::json!([{
"function": {
"name": "f",
"parameters": { "properties": {
"n": { "type": "integer" },
"s": { "type": "string" }
} }
}
}]);
let out = Qwen3Coder
.parse(
"<function=f><parameter=n>42</parameter><parameter=s>hi</parameter></function>",
Some(&tools),
)
.unwrap();
assert_eq!(
out,
vec![ToolCall::new_nameless_id(
"f",
serde_json::json!({"n": 42, "s": "hi"})
)]
);
let out2 = Qwen3Coder
.parse(
"<function=g><parameter=p>\nval\n</parameter></function>",
None,
)
.unwrap();
assert_eq!(
out2,
vec![ToolCall::new_nameless_id(
"g",
serde_json::json!({"p": "val"})
)]
);
assert!(matches!(
Qwen3Coder.parse("plain", None).unwrap_err(),
Error::Tokenizer(_)
));
assert!(matches!(
Qwen3Coder
.parse("<function=f><parameter=p>v</parameter>", None)
.unwrap_err(),
Error::Tokenizer(_)
));
}
#[test]
fn parse_glm47_batch_all_three_shapes() {
let tools = serde_json::json!([{
"function": {
"name": "echo",
"parameters": { "properties": { "s": { "type": "string" } } }
}
}]);
let out = Glm47
.parse(
"echo<arg_key>s</arg_key><arg_value>123</arg_value>",
Some(&tools),
)
.unwrap();
assert_eq!(
out,
vec![ToolCall::new_nameless_id(
"echo",
serde_json::json!({"s": "123"})
)],
"string-typed arg stays a string even though `123` looks numeric"
);
let out = Glm47
.parse(r#"{"name":"f","arguments":{"x":1}}"#, None)
.unwrap();
assert_eq!(
out,
vec![ToolCall::new_nameless_id("f", serde_json::json!({"x": 1}))]
);
let out = Glm47.parse("toolname key=val", None).unwrap();
assert_eq!(
out,
vec![ToolCall::new_nameless_id(
"toolname",
serde_json::json!({"key": "val"})
)]
);
let out = Glm47.parse("", None).unwrap();
assert_eq!(
out,
vec![ToolCall::new_nameless_id(
"unknown",
serde_json::json!({"raw": ""})
)]
);
}
#[test]
fn parse_glm47_xml_non_string_arg_is_deserialized() {
let out = Glm47
.parse("f<arg_key>n</arg_key><arg_value>7</arg_value>", None)
.unwrap();
assert_eq!(
out,
vec![ToolCall::new_nameless_id("f", serde_json::json!({"n": 7}))]
);
}
#[test]
fn parse_kimi_k2_batch_single_and_multi() {
let single = "functions.weather:0<|tool_call_argument_begin|>{\"city\":\"NYC\"}";
let out = KimiK2.parse(single, None).unwrap();
assert_eq!(
out,
vec![ToolCall::new(
"weather",
serde_json::json!({"city": "NYC"}),
Some("functions.weather:0".to_owned())
)]
);
let multi = concat!(
"<|tool_call_begin|>functions.a:0<|tool_call_argument_begin|>{}<|tool_call_end|>",
"<|tool_call_begin|>functions.b:1<|tool_call_argument_begin|>{}<|tool_call_end|>",
);
let out = KimiK2.parse(multi, None).unwrap();
assert_eq!(out.len(), 2);
assert_eq!(out[0].name(), "a");
assert_eq!(out[0].id(), Some("functions.a:0"));
assert_eq!(out[1].name(), "b");
assert_eq!(out[1].id(), Some("functions.b:1"));
let bare = "weather:3<|tool_call_argument_begin|>{}";
let out = KimiK2.parse(bare, None).unwrap();
assert_eq!(out[0].name(), "weather");
assert_eq!(out[0].id(), Some("weather:3"));
}
#[test]
fn parse_kimi_k2_single_errors() {
assert!(matches!(
KimiK2.parse("no-arg-begin", None).unwrap_err(),
Error::Tokenizer(_)
));
assert!(matches!(
KimiK2
.parse("functions.noindex<|tool_call_argument_begin|>{}", None)
.unwrap_err(),
Error::Tokenizer(_)
));
assert!(matches!(
KimiK2
.parse("functions.f:notnum<|tool_call_argument_begin|>{}", None)
.unwrap_err(),
Error::Tokenizer(_)
));
}
#[test]
fn parse_longcat_batch() {
let out = Longcat
.parse(r#"{"name":"f","arguments":{"x":1}}"#, None)
.unwrap();
assert_eq!(
out,
vec![ToolCall::new_nameless_id("f", serde_json::json!({"x": 1}))]
);
let out = Longcat.parse(r#"{"other":2}"#, None).unwrap();
assert_eq!(
out,
vec![ToolCall::new_nameless_id(
"",
serde_json::json!({"other": 2})
)]
);
let out = Longcat
.parse(
"f<longcat_arg_key>k</longcat_arg_key><longcat_arg_value>1</longcat_arg_value>",
None,
)
.unwrap();
assert_eq!(
out,
vec![ToolCall::new_nameless_id("f", serde_json::json!({"k": 1}))]
);
assert!(matches!(
Longcat.parse("plain text", None).unwrap_err(),
Error::Tokenizer(_)
));
}
#[test]
fn parse_minimax_m2_batch() {
let tools = serde_json::json!([{
"function": {
"name": "a",
"parameters": { "properties": { "p": { "type": "integer" } } }
}
}]);
let text = concat!(
r#"<invoke name="a"><parameter name="p">5</parameter></invoke>"#,
r#"<invoke name="b"></invoke>"#,
);
let out = MinimaxM2.parse(text, Some(&tools)).unwrap();
assert_eq!(out.len(), 2);
assert_eq!(out[0].name(), "a");
assert_eq!(*out[0].arguments(), serde_json::json!({"p": 5}));
assert_eq!(out[1].name(), "b");
assert_eq!(*out[1].arguments(), serde_json::json!({}));
assert!(matches!(
MinimaxM2.parse("no invokes", None).unwrap_err(),
Error::Tokenizer(_)
));
}
#[test]
fn parse_function_gemma_batch() {
let out = FunctionGemma
.parse("call:f{k:<escape>hi there<escape>,n:2}", None)
.unwrap();
assert_eq!(
out,
vec![ToolCall::new_nameless_id(
"f",
serde_json::json!({"k": "hi there", "n": 2})
)]
);
let out = FunctionGemma.parse("call:g{k:bareword}", None).unwrap();
assert_eq!(
out,
vec![ToolCall::new_nameless_id(
"g",
serde_json::json!({"k": "bareword"})
)]
);
assert!(matches!(
FunctionGemma.parse("plain", None).unwrap_err(),
Error::Tokenizer(_)
));
}
#[test]
fn parse_gemma4_batch_and_errors() {
let out = Gemma4
.parse(r#"call:f{k: <|"|>hello<|"|>,n: 3}"#, None)
.unwrap();
assert_eq!(
out,
vec![ToolCall::new_nameless_id(
"f",
serde_json::json!({"k": "hello", "n": 3})
)]
);
assert!(matches!(
Gemma4.parse("plain", None).unwrap_err(),
Error::Tokenizer(_)
));
assert!(matches!(
Gemma4.parse("call:f{!bad!}", None).unwrap_err(),
Error::Tokenizer(_)
));
}
struct DefaultParseProbe {
zero_width: bool,
}
impl ToolParser for DefaultParseProbe {
fn name(&self) -> &'static str {
"default_parse_probe"
}
fn tool_call_start(&self) -> &'static str {
"("
}
fn tool_call_end(&self) -> &'static str {
")"
}
fn try_parse_one_call(
&self,
buffer: &str,
_tools: Option<&Value>,
) -> Result<Option<(Vec<ToolCall>, usize)>, Error> {
let Some(open) = buffer.find('(') else {
return Ok(None);
};
let Some(close_rel) = buffer[open + 1..].find(')') else {
return Ok(None);
};
let name = buffer[open + 1..open + 1 + close_rel].to_owned();
let end_pos = if self.zero_width {
0
} else {
open + 1 + close_rel + 1
};
Ok(Some((
vec![ToolCall::new_nameless_id(name, serde_json::json!({}))],
end_pos,
)))
}
}
#[test]
fn default_parse_loop_extracts_back_to_back_via_trait_default() {
let probe = DefaultParseProbe { zero_width: false };
let out = probe.parse("(a)(b)", None).unwrap();
assert_eq!(out.len(), 2);
assert_eq!(out[0].name(), "a");
assert_eq!(out[1].name(), "b");
assert!(probe.parse("no tokens", None).unwrap().is_empty());
}
#[test]
fn default_parse_loop_zero_width_advance_breaks() {
let probe = DefaultParseProbe { zero_width: true };
let out = probe.parse("(a)(b)", None).unwrap();
assert!(
out.is_empty(),
"zero-width advance breaks before extending, so no calls are collected"
);
}
#[test]
fn parser_by_name_known_and_unknown() {
for name in [
"json_tools",
"pythonic",
"mistral",
"qwen3_coder",
"glm47",
"kimi_k2",
"longcat",
"minimax_m2",
"function_gemma",
"gemma4",
] {
let p = parser_by_name(name).unwrap_or_else(|| panic!("known parser {name}"));
assert_eq!(p.name(), name, "boxed parser reports its own name");
}
assert!(parser_by_name("does_not_exist").is_none());
}
#[test]
fn infer_tool_parser_select_rules() {
assert_eq!(infer_tool_parser(None), None);
assert_eq!(
infer_tool_parser(Some("uses <minimax:tool_call> here")),
Some("minimax_m2")
);
assert_eq!(
infer_tool_parser(Some("a <|tool_call> b <tool_call|> c")),
Some("gemma4")
);
assert_eq!(
infer_tool_parser(Some("<start_function_call>")),
Some("function_gemma")
);
assert_eq!(
infer_tool_parser(Some("<longcat_tool_call>")),
Some("longcat")
);
assert_eq!(
infer_tool_parser(Some("has <arg_key> token")),
Some("glm47")
);
assert_eq!(
infer_tool_parser(Some("<|tool_list_start|>")),
Some("pythonic")
);
assert_eq!(
infer_tool_parser(Some("<tool_call>\n<function=")),
Some("qwen3_coder")
);
assert_eq!(
infer_tool_parser(Some("<|tool_calls_section_begin|>")),
Some("kimi_k2")
);
assert_eq!(infer_tool_parser(Some("[TOOL_CALLS]")), Some("mistral"));
assert_eq!(
infer_tool_parser(Some("<tool_call> ... tool_call.name")),
Some("json_tools")
);
assert_eq!(infer_tool_parser(Some("no markers at all")), None);
}
#[test]
fn infer_tool_parser_first_rule_wins() {
let ct = "<minimax:tool_call> <|tool_call> <tool_call|>";
assert_eq!(infer_tool_parser(Some(ct)), Some("minimax_m2"));
}
#[test]
fn parser_marker_lookup() {
assert_eq!(JsonTools.tool_call_start(), "<tool_call>");
assert_eq!(JsonTools.tool_call_end(), "</tool_call>");
assert_eq!(Pythonic.tool_call_start(), "<|tool_call_start|>");
assert_eq!(Pythonic.tool_call_end(), "<|tool_call_end|>");
assert_eq!(Mistral.tool_call_start(), "[TOOL_CALLS]");
assert_eq!(Mistral.tool_call_end(), "");
}
#[test]
fn tool_call_public_accessors() {
let tc = ToolCall::new(
"fname",
serde_json::json!({"a": 1}),
Some("id-7".to_owned()),
);
assert_eq!(tc.name(), "fname");
assert_eq!(*tc.arguments(), serde_json::json!({"a": 1}));
assert_eq!(tc.id(), Some("id-7"));
let tc2 = ToolCall::new_nameless_id("g", Value::Null);
assert_eq!(tc2.id(), None);
}
#[test]
fn process_eos_clears_pending_display_in_potential_state() {
let mut p = ToolCallProcessor::new(Box::new(JsonTools), None);
let _ = p.process_chunk("leading <");
assert_eq!(p.state, State::PotentialToolCall);
assert!(!p.pending_display.is_empty());
p.process_eos();
assert!(
p.pending_display.is_empty(),
"pending_display cleared at EOS"
);
assert!(p.tool_call_buffer.is_empty(), "buffer cleared at EOS");
assert_eq!(p.state, State::Normal);
assert!(p.tool_calls.is_empty());
}
#[test]
fn recover_at_cap_collecting_drops_runaway_tool_buffer() {
let mut p = ToolCallProcessor::new(Box::new(JsonTools), None);
let first = p.process_chunk(r#"prose <tool_call>{"name":"x""#);
assert_eq!(
first.as_deref(),
Some("prose "),
"pre-confirmation prose flushed at start-tag confirmation"
);
assert_eq!(p.state, State::CollectingToolCall);
assert!(
p.pending_display.is_empty(),
"prose moved out of pending_display"
);
let big = "z".repeat(64 * 1024);
let bound = MAX_TOOL_CALL_BUFFER_BYTES + big.len();
for _ in 0..8 {
let _ = p.process_chunk(&big);
assert!(p.tool_call_buffer.len() <= bound);
}
assert_eq!(
p.tool_call_buffer.len(),
0,
"runaway tool buffer dropped at cap"
);
assert!(
p.tool_calls.is_empty(),
"runaway content never became a call"
);
}
#[test]
fn process_eos_noop_when_already_normal() {
let mut p = ToolCallProcessor::new(Box::new(JsonTools), None);
assert_eq!(p.process_chunk("plain").as_deref(), Some("plain"));
assert_eq!(p.state, State::Normal);
p.process_eos();
assert_eq!(p.state, State::Normal);
assert!(p.tool_calls.is_empty());
assert!(p.tool_call_buffer.is_empty());
assert!(p.pending_display.is_empty());
}
#[test]
fn try_parse_one_call_start_tag_absent_returns_none_per_parser() {
let no_tag = "just some streamed prose with no start tag";
let parsers: Vec<(&'static str, Box<dyn ToolParser>)> = vec![
("json_tools", Box::new(JsonTools)),
("pythonic", Box::new(Pythonic)),
("qwen3_coder", Box::new(Qwen3Coder)),
("glm47", Box::new(Glm47)),
("longcat", Box::new(Longcat)),
("kimi_k2", Box::new(KimiK2)),
("minimax_m2", Box::new(MinimaxM2)),
("function_gemma", Box::new(FunctionGemma)),
("gemma4", Box::new(Gemma4)),
];
for (label, parser) in parsers {
let r = parser
.try_parse_one_call(no_tag, None)
.unwrap_or_else(|e| panic!("{label}: errored: {e}"));
assert!(
r.is_none(),
"{label}: start tag absent must yield Ok(None) (got {r:?})",
);
}
}
#[test]
fn try_parse_one_call_mistral_start_tag_absent_returns_none() {
let r = Mistral
.try_parse_one_call("no tool call markers here", None)
.expect("Ok");
assert!(r.is_none(), "mistral start tag absent → Ok(None)");
}
#[test]
fn try_parse_one_call_kimi_k2_open_block_without_section_end_stays_none() {
let buf = concat!(
"<|tool_calls_section_begin|>",
"<|tool_call_begin|>functions.f:0<|tool_call_argument_begin|>{}<|tool_call_end|>",
);
let r = KimiK2.try_parse_one_call(buf, None).expect("Ok");
assert!(
r.is_none(),
"kimi_k2 complete inner block without section end keeps collecting (Ok(None))",
);
}
#[test]
fn try_parse_one_call_kimi_k2_balanced_args_without_inner_end_stays_none() {
let buf = concat!(
"<|tool_calls_section_begin|>",
"<|tool_call_begin|>functions.f:0<|tool_call_argument_begin|>{}",
);
let r = KimiK2.try_parse_one_call(buf, None).expect("Ok");
assert!(
r.is_none(),
"kimi_k2 balanced args but no inner end + no section end → Ok(None)",
);
}
#[test]
fn try_parse_one_call_minimax_m2_open_invoke_without_section_end_stays_none() {
let buf = concat!(
"<minimax:tool_call>",
r#"<invoke name="f"><parameter name="p">v</parameter></invoke>"#,
);
let r = MinimaxM2.try_parse_one_call(buf, None).expect("Ok");
assert!(
r.is_none(),
"minimax_m2 complete invoke without section end → Ok(None)",
);
}
#[test]
fn try_parse_one_call_gemma4_call_without_section_end_stays_none() {
let buf = r#"<|tool_call>call:f{"k":"v"}"#;
let r = Gemma4.try_parse_one_call(buf, None).expect("Ok");
assert!(
r.is_none(),
"gemma4 complete call without section end → Ok(None)",
);
}
#[test]
fn try_parse_one_call_gemma4_marker_without_open_brace_skips_then_none() {
let buf = "<|tool_call>call:f no brace here";
let r = Gemma4.try_parse_one_call(buf, None).expect("Ok");
assert!(
r.is_none(),
"gemma4 `call:` without `{{` is skipped, no section end → Ok(None)",
);
}
#[test]
fn closed_but_malformed_end_pos_empty_and_absent() {
assert_eq!(closed_but_malformed_end_pos("anything", 0, ""), None);
assert_eq!(
closed_but_malformed_end_pos("no marker here", 0, "</tool_call>"),
None
);
let s = "head</tc>tail";
assert_eq!(
closed_but_malformed_end_pos(s, 0, "</tc>"),
Some(s.find("</tc>").unwrap() + "</tc>".len())
);
let t = "</tc>AAA</tc>";
let second = t.rfind("</tc>").unwrap();
assert_eq!(
closed_but_malformed_end_pos(t, 5, "</tc>"),
Some(second + "</tc>".len()),
"the match at offset 0 is before payload_at=5 and is skipped"
);
}
#[test]
fn closed_but_malformed_end_pos_value_aware_empty_and_absent() {
assert_eq!(
closed_but_malformed_end_pos_value_aware("x", 0, "", "<v>", "</v>"),
None
);
assert_eq!(
closed_but_malformed_end_pos_value_aware("x", 0, "END", "", "</v>"),
None
);
assert_eq!(
closed_but_malformed_end_pos_value_aware("x", 0, "END", "<v>", ""),
None
);
let s = "<v>inner END</v> END tail";
let want = s.rfind("END").unwrap() + "END".len();
assert_eq!(
closed_but_malformed_end_pos_value_aware(s, 0, "END", "<v>", "</v>"),
Some(want),
"the in-value END is skipped; the outside-value END closes",
);
assert_eq!(
closed_but_malformed_end_pos_value_aware("<v>unterminated END", 0, "END", "<v>", "</v>"),
None
);
assert_eq!(
closed_but_malformed_end_pos_value_aware("plain bytes only", 0, "END", "<v>", "</v>"),
None
);
}
#[test]
fn closed_but_malformed_end_pos_quote_aware_empty_escape_and_absent() {
assert_eq!(
closed_but_malformed_end_pos_quote_aware("x", 0, "", b"\""),
None
);
let s = r#"{"s":"END"}END"#;
let want = s.rfind("END").unwrap() + "END".len();
assert_eq!(
closed_but_malformed_end_pos_quote_aware(s, 0, "END", b"\""),
Some(want),
"the in-string END is skipped; the outside-string END closes",
);
let esc = r#""a\"END\"b"END"#;
let want_esc = esc.rfind("END").unwrap() + "END".len();
assert_eq!(
closed_but_malformed_end_pos_quote_aware(esc, 0, "END", b"\""),
Some(want_esc),
"escaped quote does not end the string; in-string END is skipped",
);
assert_eq!(
closed_but_malformed_end_pos_quote_aware(r#"{"s":"END"#, 0, "END", b"\""),
None
);
}
#[test]
fn bound_context_or_plain_end_empty_tag_and_arms() {
assert_eq!(bound_context_or_plain_end("anything", "", |_| true), None);
assert_eq!(
bound_context_or_plain_end("no end here", "</tc>", |_| true),
None
);
assert_eq!(
bound_context_or_plain_end("body</tc>rest", "</tc>", |_| true),
Some(None)
);
let s = "body</tc>rest";
let first_end = s.find("</tc>").unwrap() + "</tc>".len();
assert_eq!(
bound_context_or_plain_end(s, "</tc>", |_| false),
Some(Some(first_end))
);
}
#[test]
fn xml_value_aware_end_tag_scan_value_open_without_end_tag_then_unterminated() {
assert_eq!(
xml_value_aware_end_tag_scan("<v>body</v> nothing", "<v>", "</v>", "END"),
None,
"value region with no end-tag candidate → None",
);
assert_eq!(
xml_value_aware_end_tag_scan("<v>unterminated forever", "<v>", "</v>", "END"),
None
);
}
#[test]
fn locate_tagged_payload_empty_present_absent() {
assert_eq!(locate_tagged_payload("anything", ""), None);
let s = "pre<tool_call>body";
let (at, payload) = locate_tagged_payload(s, "<tool_call>").expect("found");
assert_eq!(at, "pre<tool_call>".len());
assert_eq!(payload, "body");
assert_eq!(locate_tagged_payload("no tag", "<tool_call>"), None);
}
#[test]
fn pythonic_call_close_escaped_quote_in_string() {
let s = r#"[echo(s='a\'b)]c')]"#;
assert_eq!(
pythonic_call_close(s),
Some(s.len()),
"escaped quote keeps the string open; the in-string `)]` is skipped",
);
let d = r#"[echo(s="x\")]y")]"#;
assert_eq!(pythonic_call_close(d), Some(d.len()));
}
#[test]
fn glm_parse_json_name_is_object_pulls_nested_arguments() {
assert_eq!(
glm_parse_json(
r#"{"tool":{"name":{"name":"inner","arguments":{"c":3}}}}"#,
None
),
Some(ToolCall::new_nameless_id(
"inner",
serde_json::json!({"c": 3})
))
);
assert_eq!(
glm_parse_json(r#"{"name":{"name":"deep"},"arguments":{"a":1}}"#, None),
Some(ToolCall::new_nameless_id(
"deep",
serde_json::json!({"a": 1})
))
);
}
#[test]
fn glm_parse_plain_key_value_with_string_typed_arg() {
let tools = serde_json::json!([{
"function": {
"name": "f",
"parameters": { "properties": { "s": { "type": "string" } } }
}
}]);
assert_eq!(
glm_parse_plain("f s=1", Some(&tools)),
Some(ToolCall::new_nameless_id(
"f",
serde_json::json!({"s": "1"})
)),
"string-typed key=value stays a string",
);
}
#[test]
fn find_kv_pairs_value_open_without_close_breaks() {
assert_eq!(
find_kv_pairs(
"<k>a</k><v>1</v><k>b</k><v>dangling",
"<k>",
"</k>",
"<v>",
"</v>"
),
vec![("a".to_owned(), "1".to_owned())],
"the second value never closes → scan stops after the first pair",
);
}
#[test]
fn schema_types_enum_float_array_object_shapes() {
assert_eq!(
schema_types(&serde_json::json!({"enum": [1.5, [1, 2], {"k": 1}]})),
vec!["array".to_owned(), "number".to_owned(), "object".to_owned()],
"float→number, list→array, dict→object (BTreeSet-sorted)",
);
}
#[test]
fn normalize_arguments_string_typed_value_is_stringified() {
let tools = serde_json::json!([{
"function": {
"name": "f",
"parameters": { "properties": { "s": { "type": "string" } } }
}
}]);
let mut args = serde_json::Map::new();
args.insert("s".to_owned(), serde_json::json!(5));
args.insert("n".to_owned(), serde_json::json!("7"));
args.insert("b".to_owned(), serde_json::json!(true));
let out = normalize_arguments("f", &args, Some(&tools));
assert_eq!(out.get("s"), Some(&Value::String("5".to_owned())));
assert_eq!(out.get("n"), Some(&serde_json::json!(7)));
assert_eq!(out.get("b"), Some(&serde_json::json!(true)));
}
#[test]
fn parse_minimax_m2_parameter_without_gt_is_skipped() {
let text = concat!(
r#"<invoke name="f">"#,
r#"<parameter name="bad"</parameter>"#,
r#"<parameter name="ok">v</parameter>"#,
r#"</invoke>"#,
);
let out = MinimaxM2.parse(text, None).unwrap();
assert_eq!(out.len(), 1);
assert_eq!(out[0].name(), "f");
assert_eq!(
*out[0].arguments(),
serde_json::json!({"ok": "v"}),
"the `>`-less parameter is skipped; only the well-formed one is kept",
);
}
#[test]
fn try_parse_one_call_qwen3_coder_valid_opener_without_function_close_is_bounded_empty() {
let buf = "<tool_call><function=f><parameter=p>v</parameter></tool_call>visible";
let (calls, end_pos) = Qwen3Coder
.try_parse_one_call(buf, None)
.expect("Ok")
.expect("Some — wrapper close is in the buffer");
assert_eq!(calls.len(), 0, "no `</function>` close → zero calls");
assert!(
buf[..end_pos].ends_with("</tool_call>"),
"end_pos lands one past the wrapper close",
);
let (display, c) = run_with_parser(Box::new(Qwen3Coder), &[buf]);
assert_eq!(c.len(), 0);
assert_eq!(display, "visible", "same-chunk suffix survives");
}
#[test]
fn parse_qwen3_coder_parameter_without_gt_is_skipped() {
let text = concat!(
"<function=f>",
"<parameter=nogt</parameter>",
"<parameter=p>v</parameter>",
"</function>",
);
let out = Qwen3Coder.parse(text, None).unwrap();
assert_eq!(out.len(), 1);
assert_eq!(out[0].name(), "f");
assert_eq!(
*out[0].arguments(),
serde_json::json!({"p": "v"}),
"the `>`-less parameter capture is skipped",
);
}
#[test]
fn parse_function_gemma_value_to_end_and_no_colon_break() {
let out = FunctionGemma.parse("call:f{a:1,b:2}", None).unwrap();
assert_eq!(
out,
vec![ToolCall::new_nameless_id(
"f",
serde_json::json!({"a": 1, "b": 2})
)]
);
let out = FunctionGemma.parse("call:g{nocolon}", None).unwrap();
assert_eq!(
out,
vec![ToolCall::new_nameless_id("g", serde_json::json!({}))],
"non-empty body with no `:` → break → empty arguments",
);
}
#[test]
fn recover_at_cap_potential_state_flushes_pending_and_prefix() {
let mut p = ToolCallProcessor::new(Box::new(JsonTools), None);
let huge_prose = "p".repeat(MAX_TOOL_CALL_BUFFER_BYTES + 1024);
let out = p.process_chunk(&format!("{huge_prose}<"));
let displayed = out.expect("cap recovery flushes the buffered bytes as display");
assert!(
displayed.starts_with(&huge_prose),
"leading prose is flushed first (stream order)",
);
assert!(
displayed.ends_with('<'),
"the ambiguous `<` prefix is flushed after the prose (not dropped)",
);
assert_eq!(displayed.len(), huge_prose.len() + 1);
assert_eq!(p.state, State::Normal, "reset to Normal after cap recovery");
assert_eq!(p.tool_call_buffer.len(), 0);
assert_eq!(p.pending_display.len(), 0);
let out2 = p.process_chunk(r#"<tool_call>{"name":"ok","arguments":{}}</tool_call>"#);
assert_eq!(out2, None);
assert_eq!(p.tool_calls.len(), 1);
assert_eq!(p.tool_calls[0].name(), "ok");
}