use serde_json::Value;
use crate::Error;
use crate::tokenizer::generated::{TOOL_PARSER_MARKERS, TOOL_PARSER_SELECT};
fn marker_start(name: &str) -> &'static str {
TOOL_PARSER_MARKERS
.iter()
.find(|m| m.name == name)
.map(|m| m.start)
.unwrap_or("")
}
fn marker_end(name: &str) -> &'static str {
TOOL_PARSER_MARKERS
.iter()
.find(|m| m.name == name)
.map(|m| m.end)
.unwrap_or("")
}
#[derive(Debug, Clone, PartialEq)]
pub struct ToolCall {
name: String,
arguments: Value,
id: Option<String>,
}
impl ToolCall {
fn new_nameless_id(name: impl Into<String>, arguments: Value) -> Self {
Self {
name: name.into(),
arguments,
id: None,
}
}
pub fn new(name: impl Into<String>, arguments: Value, id: Option<String>) -> Self {
Self {
name: name.into(),
arguments,
id,
}
}
#[inline(always)]
pub fn name(&self) -> &str {
&self.name
}
#[inline(always)]
pub fn arguments(&self) -> &Value {
&self.arguments
}
#[inline(always)]
pub fn id(&self) -> Option<&str> {
self.id.as_deref()
}
}
pub trait ToolParser: Send + Sync {
fn parse(&self, text: &str, tools: Option<&Value>) -> Result<Vec<ToolCall>, Error> {
default_parse_via_try_parse_one_call(self, text, tools)
}
fn name(&self) -> &'static str;
fn tool_call_start(&self) -> &'static str {
marker_start(self.name())
}
fn tool_call_end(&self) -> &'static str {
marker_end(self.name())
}
fn try_parse_one_call(
&self,
buffer: &str,
tools: Option<&Value>,
) -> Result<Option<(Vec<ToolCall>, usize)>, Error>;
}
fn default_parse_via_try_parse_one_call<P: ToolParser + ?Sized>(
parser: &P,
text: &str,
tools: Option<&Value>,
) -> Result<Vec<ToolCall>, Error> {
let mut out = Vec::new();
let mut cursor = 0usize;
while cursor < text.len() {
match parser.try_parse_one_call(&text[cursor..], tools)? {
Some((calls, end_pos)) => {
if end_pos == 0 {
break;
}
out.extend(calls);
cursor += end_pos;
}
None => break,
}
}
Ok(out)
}
fn err(msg: impl Into<String>) -> Error {
Error::tokenizer(msg.into())
}
fn locate_tagged_payload<'a>(buffer: &'a str, start_tag: &str) -> Option<(usize, &'a str)> {
if start_tag.is_empty() {
return None;
}
let start_at = buffer.find(start_tag)?;
let payload_at = start_at + start_tag.len();
Some((payload_at, &buffer[payload_at..]))
}
fn strip_section_markers<'a>(section: &'a str, start_tag: &str, end_tag: &str) -> &'a str {
let mut text = section;
if !start_tag.is_empty()
&& let Some(idx) = text.find(start_tag)
{
text = &text[idx + start_tag.len()..];
}
if !end_tag.is_empty()
&& let Some(idx) = text.rfind(end_tag)
{
text = &text[..idx];
}
text.trim()
}
fn closed_but_malformed_end_pos(buffer: &str, payload_at: usize, end_tag: &str) -> Option<usize> {
if end_tag.is_empty() {
return None;
}
let rel = buffer.get(payload_at..)?.find(end_tag)?;
Some(payload_at + rel + end_tag.len())
}
fn closed_but_malformed_end_pos_value_aware(
buffer: &str,
payload_at: usize,
end_tag: &str,
value_open: &str,
value_close: &str,
) -> Option<usize> {
if end_tag.is_empty() || value_open.is_empty() || value_close.is_empty() {
return None;
}
let payload = buffer.get(payload_at..)?;
let end_bytes = end_tag.as_bytes();
let bytes = payload.as_bytes();
let mut i = 0;
while i < bytes.len() {
if payload.get(i..).is_some_and(|s| s.starts_with(value_open)) {
let after_open = i + value_open.len();
let close_rel = payload.get(after_open..)?.find(value_close)?;
i = after_open + close_rel + value_close.len();
continue;
}
if i + end_bytes.len() <= bytes.len() && &bytes[i..i + end_bytes.len()] == end_bytes {
return Some(payload_at + i + end_bytes.len());
}
i += 1;
}
None
}
fn closed_but_malformed_end_pos_quote_aware(
buffer: &str,
payload_at: usize,
end_tag: &str,
quotes: &[u8],
) -> Option<usize> {
if end_tag.is_empty() {
return None;
}
let payload = buffer.get(payload_at..)?;
let end_bytes = end_tag.as_bytes();
let bytes = payload.as_bytes();
let mut active_quote: Option<u8> = None;
let mut escaped = false;
let mut i = 0;
while i < bytes.len() {
let b = bytes[i];
if let Some(q) = active_quote {
if escaped {
escaped = false;
} else if b == b'\\' {
escaped = true;
} else if b == q {
active_quote = None;
}
i += 1;
continue;
}
if quotes.contains(&b) {
active_quote = Some(b);
i += 1;
continue;
}
if i + end_bytes.len() <= bytes.len() && &bytes[i..i + end_bytes.len()] == end_bytes {
return Some(payload_at + i + end_bytes.len());
}
i += 1;
}
None
}
fn bound_context_or_plain_end(
payload: &str,
end_tag: &str,
context_proven: impl Fn(&str) -> bool,
) -> Option<Option<usize>> {
if end_tag.is_empty() {
return None;
}
let first_end_rel = payload.find(end_tag)?;
let prefix = &payload[..first_end_rel];
if context_proven(prefix) {
Some(None)
} else {
Some(Some(first_end_rel + end_tag.len()))
}
}
fn json_object_context_proven(prefix: &str) -> bool {
prefix.trim_start().starts_with('{')
}
fn json_array_context_proven(prefix: &str) -> bool {
prefix.trim_start().starts_with('[')
}
fn pythonic_call_context_proven(prefix: &str) -> bool {
find_first_pythonic_call_start(prefix).is_some()
}
fn qwen_function_context_proven(prefix: &str) -> bool {
find_first_qwen_function_open(prefix).is_some()
}
fn function_gemma_call_context_proven(prefix: &str) -> bool {
find_first_function_gemma_call_start(prefix).is_some()
}
fn literal_context_proven<'a>(needle: &'a str) -> impl Fn(&str) -> bool + 'a {
move |prefix: &str| !needle.is_empty() && prefix.contains(needle)
}
fn literal_eval(s: &str) -> Value {
let t = s.trim();
if let Ok(v) = serde_json::from_str::<Value>(t) {
return v;
}
match t {
"True" | "true" => return Value::Bool(true),
"False" | "false" => return Value::Bool(false),
"None" | "null" => return Value::Null,
_ => {}
}
if (t.starts_with('\'') && t.ends_with('\'') && t.len() >= 2)
|| (t.starts_with('"') && t.ends_with('"') && t.len() >= 2)
{
return Value::String(t[1..t.len() - 1].to_owned());
}
if let Ok(i) = t.parse::<i64>() {
return Value::Number(i.into());
}
if let Ok(f) = t.parse::<f64>()
&& let Some(n) = serde_json::Number::from_f64(f)
{
return Value::Number(n);
}
if (t.starts_with('[') && t.ends_with(']'))
|| (t.starts_with('{') && t.ends_with('}'))
|| (t.starts_with('(') && t.ends_with(')'))
{
let swapped = t
.replace('\'', "\"")
.replace("(", "[")
.replace(")", "]")
.replace("True", "true")
.replace("False", "false")
.replace("None", "null");
if let Ok(v) = serde_json::from_str::<Value>(&swapped) {
return v;
}
}
Value::String(t.to_owned())
}
fn deserialize(value: &str) -> Value {
if let Ok(v) = serde_json::from_str::<Value>(value) {
return v;
}
literal_eval(value)
}
fn obj(name: &str, args: Value) -> Vec<ToolCall> {
vec![ToolCall::new_nameless_id(name, args)]
}
fn tool_properties<'a>(
tools: Option<&'a Value>,
func_name: &str,
) -> Option<&'a serde_json::Map<String, Value>> {
let arr = tools?.as_array()?;
for tool in arr {
let f = tool.get("function")?;
if f.get("name").and_then(Value::as_str) == Some(func_name) {
return f
.get("parameters")
.and_then(|p| p.get("properties"))
.and_then(Value::as_object);
}
}
None
}
fn string_arg_names(tools: Option<&Value>, func_name: &str) -> Vec<String> {
tool_properties(tools, func_name)
.map(|props| {
props
.iter()
.filter(|(_, v)| v.get("type").and_then(Value::as_str) == Some("string"))
.map(|(k, _)| k.clone())
.collect()
})
.unwrap_or_default()
}
pub struct JsonTools;
impl JsonTools {
fn bound_section<'a>(
&self,
payload: &'a str,
payload_at: usize,
end_tag: &str,
) -> Option<(&'a str, usize)> {
let end_pos = match bound_context_or_plain_end(payload, end_tag, json_object_context_proven)? {
Some(end_rel) => end_rel,
None => closed_but_malformed_end_pos_quote_aware(payload, 0, end_tag, b"\"")?,
};
let body_end = end_pos - end_tag.len();
Some((&payload[..body_end], payload_at + end_pos))
}
}
impl ToolParser for JsonTools {
fn parse(&self, text: &str, _tools: Option<&Value>) -> Result<Vec<ToolCall>, Error> {
let v: Value =
serde_json::from_str(text.trim()).map_err(|e| err(format!("json_tools: {e}")))?;
let name = v
.get("name")
.and_then(Value::as_str)
.ok_or_else(|| err("json_tools: missing name"))?;
let args = v.get("arguments").cloned().unwrap_or(Value::Null);
Ok(obj(name, args))
}
fn name(&self) -> &'static str {
"json_tools"
}
fn try_parse_one_call(
&self,
buffer: &str,
tools: Option<&Value>,
) -> Result<Option<(Vec<ToolCall>, usize)>, Error> {
let start_tag = self.tool_call_start();
let end_tag = self.tool_call_end();
let Some((payload_at, payload)) = locate_tagged_payload(buffer, start_tag) else {
return Ok(None);
};
let Some((bounded, end_pos)) = self.bound_section(payload, payload_at, end_tag) else {
return Ok(None);
};
if balanced_json_object_prefix(bounded).is_none() {
return Ok(Some((Vec::new(), end_pos)));
}
let inner = strip_section_markers(&buffer[..end_pos], start_tag, end_tag);
match self.parse(inner, tools) {
Ok(calls) if !calls.is_empty() => Ok(Some((calls, end_pos))),
_ => Ok(Some((Vec::new(), end_pos))),
}
}
}
pub struct Pythonic;
impl Pythonic {
fn bound_section<'a>(
&self,
payload: &'a str,
payload_at: usize,
end_tag: &str,
) -> Option<(&'a str, usize)> {
let end_pos = match bound_context_or_plain_end(payload, end_tag, pythonic_call_context_proven)?
{
Some(end_rel) => end_rel,
None => closed_but_malformed_end_pos_quote_aware(payload, 0, end_tag, b"\"'")?,
};
let body_end = end_pos - end_tag.len();
Some((&payload[..body_end], payload_at + end_pos))
}
}
impl ToolParser for Pythonic {
fn parse(&self, text: &str, _tools: Option<&Value>) -> Result<Vec<ToolCall>, Error> {
let (fname, args_str) =
find_pythonic_call(text).ok_or_else(|| err("pythonic: No function provided."))?;
let mut arguments = serde_json::Map::new();
for (k, v) in parse_kw_args(&args_str)? {
arguments.insert(k, literal_eval(&v));
}
Ok(obj(&fname, Value::Object(arguments)))
}
fn name(&self) -> &'static str {
"pythonic"
}
fn try_parse_one_call(
&self,
buffer: &str,
tools: Option<&Value>,
) -> Result<Option<(Vec<ToolCall>, usize)>, Error> {
let start_tag = self.tool_call_start();
let end_tag = self.tool_call_end();
let Some((payload_at, payload)) = locate_tagged_payload(buffer, start_tag) else {
return Ok(None);
};
let Some((bounded, end_pos)) = self.bound_section(payload, payload_at, end_tag) else {
return Ok(None);
};
if pythonic_call_close(bounded).is_none() {
return Ok(Some((Vec::new(), end_pos)));
}
let inner = strip_section_markers(&buffer[..end_pos], start_tag, end_tag);
match self.parse(inner, tools) {
Ok(calls) if !calls.is_empty() => Ok(Some((calls, end_pos))),
_ => Ok(Some((Vec::new(), end_pos))),
}
}
}
fn pythonic_call_start_at(payload: &str, at: usize) -> Option<(usize, usize)> {
let bytes = payload.as_bytes();
if at >= bytes.len() || bytes[at] != b'[' {
return None;
}
let name_start = at + 1;
let mut j = name_start;
while j < bytes.len() && (bytes[j].is_ascii_alphanumeric() || bytes[j] == b'_') {
j += 1;
}
if j == name_start {
return None;
}
if j >= bytes.len() || bytes[j] != b'(' {
return None;
}
Some((name_start, j + 1))
}
fn find_first_pythonic_call_start(payload: &str) -> Option<usize> {
let bytes = payload.as_bytes();
for i in 0..bytes.len() {
if let Some((_, after_open)) = pythonic_call_start_at(payload, i) {
return Some(after_open);
}
}
None
}
fn find_pythonic_call(text: &str) -> Option<(String, String)> {
let bytes = text.as_bytes();
for i in 0..bytes.len() {
let Some((name_start, after_open)) = pythonic_call_start_at(text, i) else {
continue;
};
let name_end = after_open - 1;
let name = text[name_start..name_end].to_owned();
let rest = &text[after_open..];
if let Some(close) = rest.find(")]") {
return Some((name, rest[..close].to_owned()));
}
}
None
}
fn parse_kw_args(s: &str) -> Result<Vec<(String, String)>, Error> {
let mut out = Vec::new();
let mut rest = s.trim();
while !rest.is_empty() {
let eq = match rest.find('=') {
Some(e) => e,
None => break,
};
let key = rest[..eq].trim().to_owned();
let val_start = {
let after = &rest[eq + 1..];
eq + 1 + (after.len() - after.trim_start().len())
};
let value_region = &rest[val_start..];
let (val, consumed) = if let Some(stripped) = value_region.strip_prefix('"') {
match stripped.find('"') {
Some(end) => (stripped[..end].to_owned(), val_start + 1 + end + 1),
None => (stripped.to_owned(), rest.len()),
}
} else {
let end = value_region.find(',').unwrap_or(value_region.len());
(value_region[..end].trim().to_owned(), val_start + end)
};
out.push((key, val));
if consumed >= rest.len() {
break;
}
if !rest.is_char_boundary(consumed) {
return Err(err("pythonic: malformed argument encoding"));
}
rest = rest[consumed..]
.trim_start()
.trim_start_matches(',')
.trim_start();
}
Ok(out)
}
pub struct Mistral;
impl ToolParser for Mistral {
fn parse(&self, text: &str, _tools: Option<&Value>) -> Result<Vec<ToolCall>, Error> {
let idx = text
.find("[ARGS]")
.ok_or_else(|| err(format!("mistral: Could not parse tool call from: {text}")))?;
let name = text[..idx].trim().to_owned();
let after = text[idx + "[ARGS]".len()..].trim_start();
let brace = after
.find('{')
.ok_or_else(|| err("mistral: no json args"))?;
let json_str = after[brace..].trim_end();
let args: Value = serde_json::from_str(json_str).map_err(|e| err(format!("mistral: {e}")))?;
Ok(obj(&name, args))
}
fn name(&self) -> &'static str {
"mistral"
}
fn try_parse_one_call(
&self,
buffer: &str,
tools: Option<&Value>,
) -> Result<Option<(Vec<ToolCall>, usize)>, Error> {
let start_tag = self.tool_call_start();
let end_tag = self.tool_call_end();
let Some((payload_at, payload)) = locate_tagged_payload(buffer, start_tag) else {
return Ok(None);
};
let Some(args_rel) = payload.find("[ARGS]") else {
return Ok(
closed_but_malformed_end_pos(buffer, payload_at, end_tag).map(|ep| (Vec::new(), ep)),
);
};
let after_args = args_rel + "[ARGS]".len();
let Some((obj_start_in, obj_end_in)) = balanced_json_object_prefix(&payload[after_args..])
else {
return Ok(
closed_but_malformed_end_pos(buffer, payload_at, end_tag).map(|ep| (Vec::new(), ep)),
);
};
let _ = obj_start_in;
let end_pos = payload_at + after_args + obj_end_in;
let inner = &buffer[..end_pos];
let inner = strip_section_markers(inner, start_tag, "");
match self.parse(inner, tools) {
Ok(calls) if !calls.is_empty() => Ok(Some((calls, end_pos))),
_ => Ok(Some((Vec::new(), end_pos))),
}
}
}
pub struct Qwen3Coder;
impl Qwen3Coder {
fn bound_section<'a>(
&self,
payload: &'a str,
payload_at: usize,
end_tag: &str,
) -> Option<(&'a str, usize)> {
let rel = match bound_context_or_plain_end(payload, end_tag, qwen_function_context_proven)? {
Some(end_rel) => end_rel - end_tag.len(),
None => xml_value_aware_end_tag_scan(payload, "<parameter=", "</parameter>", end_tag)?,
};
let end_pos = rel + end_tag.len();
Some((&payload[..rel], payload_at + end_pos))
}
}
impl ToolParser for Qwen3Coder {
fn parse(&self, text: &str, tools: Option<&Value>) -> Result<Vec<ToolCall>, Error> {
let marker_at = find_first_qwen_function_open(text)
.ok_or_else(|| err("qwen3_coder: No function provided."))?;
let (name_start, after_close_bracket) = qwen_function_open_at(text, marker_at)
.ok_or_else(|| err("qwen3_coder: No function provided."))?;
let after = &text[after_close_bracket..];
let end = after
.rfind("</function>")
.ok_or_else(|| err("qwen3_coder: No function provided."))?;
let func_name = text[name_start..after_close_bracket - 1].to_owned();
let params_str = &after[..end];
let props = tool_properties(tools, &func_name);
let mut args = serde_json::Map::new();
for cap in find_all(params_str, "<parameter=", "</parameter>") {
let pgt = match cap.find('>') {
Some(p) => p,
None => continue,
};
let pname = cap[..pgt].to_owned();
let mut pval = cap[pgt + 1..].to_owned();
if let Some(r) = pval.strip_prefix('\n') {
pval = r.to_owned();
}
if let Some(r) = pval.strip_suffix('\n') {
pval = r.to_owned();
}
args.insert(pname.clone(), convert_param_value(&pval, &pname, props));
}
Ok(obj(&func_name, Value::Object(args)))
}
fn name(&self) -> &'static str {
"qwen3_coder"
}
fn try_parse_one_call(
&self,
buffer: &str,
tools: Option<&Value>,
) -> Result<Option<(Vec<ToolCall>, usize)>, Error> {
let start_tag = self.tool_call_start();
let end_tag = self.tool_call_end();
let Some((payload_at, payload)) = locate_tagged_payload(buffer, start_tag) else {
return Ok(None);
};
let Some((bounded, end_pos)) = self.bound_section(payload, payload_at, end_tag) else {
return Ok(None);
};
let function_close = "</function>";
let parameter_open = "<parameter=";
let parameter_close = "</parameter>";
let Some(marker_at) = find_first_qwen_function_open(bounded) else {
return Ok(Some((Vec::new(), end_pos)));
};
let Some((_, after_open_rel)) = qwen_function_open_at(bounded, marker_at) else {
return Ok(Some((Vec::new(), end_pos)));
};
let mut cursor = after_open_rel;
let fn_close_found = loop {
let next_fclose = bounded[cursor..].find(function_close);
let next_popen = bounded[cursor..].find(parameter_open);
match (next_fclose, next_popen) {
(None, _) => break None,
(Some(f), None) => break Some(cursor + f),
(Some(f), Some(p)) if f <= p => break Some(cursor + f),
(Some(_), Some(p)) => {
let region_after_open = cursor + p + parameter_open.len();
let Some(rel) = bounded[region_after_open..].find(parameter_close) else {
break None;
};
let next_cursor = region_after_open + rel + parameter_close.len();
debug_assert!(next_cursor > cursor);
cursor = next_cursor;
}
}
};
if fn_close_found.is_none() {
return Ok(Some((Vec::new(), end_pos)));
}
let inner = strip_section_markers(&buffer[..end_pos], start_tag, end_tag);
match self.parse(inner, tools) {
Ok(calls) if !calls.is_empty() => Ok(Some((calls, end_pos))),
_ => Ok(Some((Vec::new(), end_pos))),
}
}
}
fn qwen_function_open_at(payload: &str, at: usize) -> Option<(usize, usize)> {
let needle = "<function=";
let bytes = payload.as_bytes();
if at + needle.len() > bytes.len() {
return None;
}
if &bytes[at..at + needle.len()] != needle.as_bytes() {
return None;
}
let name_start = at + needle.len();
let mut j = name_start;
while j < bytes.len() && bytes[j] != b'>' && bytes[j] != b'<' {
j += 1;
}
if j == name_start {
return None;
}
if j >= bytes.len() || bytes[j] != b'>' {
return None;
}
Some((name_start, j + 1))
}
fn find_first_qwen_function_open(payload: &str) -> Option<usize> {
let marker_at = payload.find("<function=")?;
qwen_function_open_at(payload, marker_at).map(|_| marker_at)
}
fn convert_param_value(
v: &str,
name: &str,
props: Option<&serde_json::Map<String, Value>>,
) -> Value {
if v.eq_ignore_ascii_case("null") {
return Value::Null;
}
let Some(props) = props else {
return Value::String(v.to_owned());
};
let Some(schema) = props.get(name) else {
return Value::String(v.to_owned());
};
let ptype = schema
.get("type")
.and_then(Value::as_str)
.unwrap_or("string")
.trim()
.to_lowercase();
const STR: &[&str] = &["string", "str", "text", "varchar", "char", "enum"];
const BOOL: &[&str] = &["boolean", "bool", "binary"];
const OBJ: &[&str] = &["object", "array", "arr"];
if STR.contains(&ptype.as_str()) {
Value::String(v.to_owned())
} else if ["int", "uint", "long", "short", "unsigned"]
.iter()
.any(|p| ptype.starts_with(p))
{
v.trim()
.parse::<i64>()
.map(|i| Value::Number(i.into()))
.unwrap_or_else(|_| Value::String(v.to_owned()))
} else if ptype.starts_with("num") || ptype.starts_with("float") {
match v.trim().parse::<f64>() {
Ok(f) => serde_json::Number::from_f64(f)
.map(Value::Number)
.unwrap_or_else(|| Value::String(v.to_owned())),
Err(_) => Value::String(v.to_owned()),
}
} else if BOOL.contains(&ptype.as_str()) {
Value::Bool(v.trim().eq_ignore_ascii_case("true"))
} else if OBJ.contains(&ptype.as_str()) || ptype.starts_with("dict") || ptype.starts_with("list")
{
serde_json::from_str(v).unwrap_or_else(|_| literal_eval(v))
} else {
literal_eval(v)
}
}
fn find_all(text: &str, open: &str, close: &str) -> Vec<String> {
let mut out = Vec::new();
let mut i = 0;
while let Some(s) = text[i..].find(open) {
let abs = i + s + open.len();
if let Some(e) = text[abs..].find(close) {
out.push(text[abs..abs + e].to_owned());
i = abs + e + close.len();
} else {
break;
}
}
out
}
pub struct Glm47;
impl Glm47 {
fn bound_section<'a>(
&self,
payload: &'a str,
payload_at: usize,
end_tag: &str,
) -> Option<(&'a str, usize)> {
let end_rel = match classify_json_payload_start(payload) {
JsonPayloadStart::Object => {
match bound_context_or_plain_end(payload, end_tag, json_object_context_proven)? {
Some(end_rel) => end_rel - end_tag.len(),
None => closed_but_malformed_end_pos_quote_aware(payload, 0, end_tag, b"\"")
.map(|ep| ep - end_tag.len())?,
}
}
JsonPayloadStart::Array => {
match bound_context_or_plain_end(payload, end_tag, json_array_context_proven)? {
Some(end_rel) => end_rel - end_tag.len(),
None => closed_but_malformed_end_pos_quote_aware(payload, 0, end_tag, b"\"")
.map(|ep| ep - end_tag.len())?,
}
}
JsonPayloadStart::None => {
match bound_context_or_plain_end(payload, end_tag, literal_context_proven("<arg_key>"))? {
Some(end_rel) => end_rel - end_tag.len(),
None => xml_value_aware_end_tag_scan(payload, "<arg_value>", "</arg_value>", end_tag)?,
}
}
};
Some((&payload[..end_rel], payload_at + end_rel + end_tag.len()))
}
}
impl ToolParser for Glm47 {
fn parse(&self, text: &str, tools: Option<&Value>) -> Result<Vec<ToolCall>, Error> {
if let Some(idx) = text.find("<arg_key>") {
let func_name = text[..idx].trim().to_owned();
let string_args = string_arg_names(tools, &func_name);
let mut args = serde_json::Map::new();
for (k, v) in find_kv_pairs(
text,
"<arg_key>",
"</arg_key>",
"<arg_value>",
"</arg_value>",
) {
let key = k.trim().to_owned();
let val = if string_args.contains(&key) {
Value::String(v.trim().to_owned())
} else {
deserialize(v.trim())
};
args.insert(key, val);
}
return Ok(obj(&func_name, Value::Object(args)));
}
if let Some(c) = glm_parse_json(text, tools) {
return Ok(vec![c]);
}
if let Some(c) = glm_parse_plain(text, tools) {
return Ok(vec![c]);
}
Ok(vec![ToolCall::new_nameless_id(
"unknown",
serde_json::json!({"raw": text.trim()}),
)])
}
fn name(&self) -> &'static str {
"glm47"
}
fn try_parse_one_call(
&self,
buffer: &str,
tools: Option<&Value>,
) -> Result<Option<(Vec<ToolCall>, usize)>, Error> {
let start_tag = self.tool_call_start();
let end_tag = self.tool_call_end();
let Some((payload_at, payload)) = locate_tagged_payload(buffer, start_tag) else {
return Ok(None);
};
let Some((bounded, end_pos)) = self.bound_section(payload, payload_at, end_tag) else {
return Ok(None);
};
let body_balanced = match classify_json_payload_start(bounded) {
JsonPayloadStart::Object => balanced_json_object_prefix(bounded).is_some(),
JsonPayloadStart::Array => balanced_json_array_prefix(bounded).is_some(),
JsonPayloadStart::None => true,
};
if !body_balanced {
return Ok(Some((Vec::new(), end_pos)));
}
let inner = strip_section_markers(&buffer[..end_pos], start_tag, end_tag);
match self.parse(inner, tools) {
Ok(calls) if !calls.is_empty() => Ok(Some((calls, end_pos))),
_ => Ok(Some((Vec::new(), end_pos))),
}
}
}
fn normalize_arguments(
func_name: &str,
arguments: &serde_json::Map<String, Value>,
tools: Option<&Value>,
) -> serde_json::Map<String, Value> {
let string_args = string_arg_names(tools, func_name);
let mut out = serde_json::Map::new();
for (k, v) in arguments {
if string_args.contains(k) {
let coerced = match v {
Value::String(s) => Value::String(s.clone()),
other => Value::String(other.to_string()),
};
out.insert(k.clone(), coerced);
} else if let Value::String(s) = v {
out.insert(k.clone(), deserialize(s));
} else {
out.insert(k.clone(), v.clone());
}
}
out
}
fn glm_parse_json(text: &str, tools: Option<&Value>) -> Option<ToolCall> {
let parsed: Value = serde_json::from_str(text.trim()).ok()?;
let parsed = match &parsed {
Value::Array(a) if !a.is_empty() && a[0].is_object() => a[0].clone(),
_ => parsed,
};
let p = parsed.as_object()?;
let (mut name, mut arguments) = if p.contains_key("name") && p.contains_key("arguments") {
(p.get("name").cloned(), p.get("arguments").cloned())
} else if p.contains_key("function") && p.contains_key("arguments") {
(p.get("function").cloned(), p.get("arguments").cloned())
} else if let Some(t) = p.get("tool").and_then(Value::as_object) {
(t.get("name").cloned(), t.get("arguments").cloned())
} else {
(None, None)
};
if let Some(Value::Object(nm)) = &name {
if arguments.is_none() {
arguments = nm.get("arguments").cloned();
}
name = nm.get("name").cloned();
}
if let Some(Value::String(s)) = &arguments {
arguments = Some(deserialize(s));
}
let name = name.and_then(|n| n.as_str().map(str::to_owned))?;
match arguments {
None => Some(ToolCall::new_nameless_id(name, serde_json::json!({}))),
Some(Value::Object(m)) => {
let norm = normalize_arguments(&name, &m, tools);
Some(ToolCall::new_nameless_id(name, Value::Object(norm)))
}
_ => None,
}
}
fn glm_parse_plain(text: &str, tools: Option<&Value>) -> Option<ToolCall> {
let stripped = text.trim();
if stripped.is_empty() {
return None;
}
if let Some(nl) = stripped.find('\n') {
let name = stripped[..nl].trim().to_owned();
let rest = stripped[nl + 1..].trim();
if !name.is_empty()
&& !rest.is_empty()
&& let Value::Object(m) = deserialize(rest)
{
let norm = normalize_arguments(&name, &m, tools);
return Some(ToolCall::new_nameless_id(name, Value::Object(norm)));
}
}
let (name, rest) = match stripped.split_once(' ') {
Some((n, r)) => (n.to_owned(), r.trim().to_owned()),
None => (stripped.to_owned(), String::new()),
};
if name.is_empty() {
return None;
}
if rest.is_empty() {
return Some(ToolCall::new_nameless_id(name, serde_json::json!({})));
}
if let Value::Object(m) = deserialize(&rest) {
let norm = normalize_arguments(&name, &m, tools);
return Some(ToolCall::new_nameless_id(name, Value::Object(norm)));
}
if rest.contains('=') {
let mut args = serde_json::Map::new();
let string_args = string_arg_names(tools, &name);
let mut ok = true;
for token in rest.split_whitespace() {
match token.split_once('=') {
Some((k, v)) if !k.trim().is_empty() => {
let key = k.trim().to_owned();
if string_args.contains(&key) {
args.insert(key, Value::String(v.to_owned()));
} else {
args.insert(key, deserialize(v));
}
}
_ => {
ok = false;
break;
}
}
}
if ok && !args.is_empty() {
return Some(ToolCall::new_nameless_id(name, Value::Object(args)));
}
}
Some(ToolCall::new_nameless_id(
name,
serde_json::json!({"raw": rest}),
))
}
fn find_kv_pairs(text: &str, ko: &str, kc: &str, vo: &str, vc: &str) -> Vec<(String, String)> {
let mut out = Vec::new();
let mut i = 0;
while let Some(ks) = text[i..].find(ko) {
let kabs = i + ks + ko.len();
let Some(ke) = text[kabs..].find(kc) else {
break;
};
let key = text[kabs..kabs + ke].to_owned();
let after = kabs + ke + kc.len();
let Some(vs) = text[after..].find(vo) else {
break;
};
let vabs = after + vs + vo.len();
let Some(ve) = text[vabs..].find(vc) else {
break;
};
let val = text[vabs..vabs + ve].to_owned();
out.push((key, val));
i = vabs + ve + vc.len();
}
out
}
pub struct KimiK2;
impl KimiK2 {
fn parse_single(text: &str) -> Result<ToolCall, Error> {
let abeg = "<|tool_call_argument_begin|>";
let abeg_idx = text
.find(abeg)
.ok_or_else(|| err("kimi_k2: No tool call found."))?;
let head = text[..abeg_idx].trim();
let colon = head
.rfind(':')
.ok_or_else(|| err("kimi_k2: No tool call found."))?;
if head[colon + 1..].trim().parse::<u64>().is_err() {
return Err(err("kimi_k2: No tool call found."));
}
let full_id = head.to_owned();
let base = &head[..colon];
let func_name = base.strip_prefix("functions.").unwrap_or(base).to_owned();
let args_part = text[abeg_idx + abeg.len()..].trim();
let arg = deserialize(args_part);
Ok(ToolCall::new(func_name, arg, Some(full_id)))
}
}
impl ToolParser for KimiK2 {
fn parse(&self, text: &str, _tools: Option<&Value>) -> Result<Vec<ToolCall>, Error> {
let matches = find_all(text, "<|tool_call_begin|>", "<|tool_call_end|>");
if matches.is_empty() {
Ok(vec![Self::parse_single(text)?])
} else {
matches.iter().map(|m| Self::parse_single(m)).collect()
}
}
fn name(&self) -> &'static str {
"kimi_k2"
}
fn try_parse_one_call(
&self,
buffer: &str,
tools: Option<&Value>,
) -> Result<Option<(Vec<ToolCall>, usize)>, Error> {
let start_tag = self.tool_call_start();
let end_tag = self.tool_call_end();
let Some((payload_at, payload)) = locate_tagged_payload(buffer, start_tag) else {
return Ok(None);
};
let call_begin = "<|tool_call_begin|>";
let arg_begin = "<|tool_call_argument_begin|>";
let call_end = "<|tool_call_end|>";
let r13_plain = || -> Option<(Vec<ToolCall>, usize)> {
closed_but_malformed_end_pos(buffer, payload_at, end_tag).map(|ep| (Vec::new(), ep))
};
let r13_json = || -> Option<(Vec<ToolCall>, usize)> {
closed_but_malformed_end_pos_quote_aware(buffer, payload_at, end_tag, b"\"")
.map(|ep| (Vec::new(), ep))
};
let mut cursor = 0usize;
let section_end_rel = loop {
let end_rel = payload[cursor..].find(end_tag).map(|p| cursor + p);
let open_rel = payload[cursor..].find(call_begin).map(|p| cursor + p);
let open_rel = match (end_rel, open_rel) {
(Some(e), Some(o)) if e <= o => break e,
(Some(_), Some(o)) => o,
(Some(e), None) => break e,
(None, Some(o)) => o,
(None, None) => return Ok(None),
};
let after_open = open_rel + call_begin.len();
let arg_open_rel = match payload[after_open..].find(arg_begin) {
Some(a) => after_open + a,
None => return Ok(r13_plain()),
};
let args_at = arg_open_rel + arg_begin.len();
let args_region = &payload[args_at..];
let after_args_rel = match classify_json_payload_start(args_region) {
JsonPayloadStart::Object => {
let Some((_, obj_end)) = balanced_json_object_prefix(args_region) else {
return Ok(r13_json());
};
let Some(end_rel) = args_region[obj_end..].find(call_end) else {
return Ok(r13_plain());
};
obj_end + end_rel + call_end.len()
}
_ => {
let Some(end_rel) = args_region.find(call_end) else {
return Ok(r13_plain());
};
end_rel + call_end.len()
}
};
cursor = args_at + after_args_rel;
};
let end_pos = payload_at + section_end_rel + end_tag.len();
let inner = strip_section_markers(&buffer[..end_pos], start_tag, end_tag);
match self.parse(inner, tools) {
Ok(calls) if !calls.is_empty() => Ok(Some((calls, end_pos))),
_ => Ok(Some((Vec::new(), end_pos))),
}
}
}
pub struct Longcat;
impl Longcat {
fn bound_section<'a>(
&self,
payload: &'a str,
payload_at: usize,
end_tag: &str,
) -> Option<(&'a str, usize)> {
let end_rel = if matches!(
classify_json_payload_start(payload),
JsonPayloadStart::Object
) {
match bound_context_or_plain_end(payload, end_tag, json_object_context_proven)? {
Some(end_rel) => end_rel - end_tag.len(),
None => closed_but_malformed_end_pos_quote_aware(payload, 0, end_tag, b"\"")
.map(|ep| ep - end_tag.len())?,
}
} else {
match bound_context_or_plain_end(
payload,
end_tag,
literal_context_proven("<longcat_arg_key>"),
)? {
Some(end_rel) => end_rel - end_tag.len(),
None => xml_value_aware_end_tag_scan(
payload,
"<longcat_arg_value>",
"</longcat_arg_value>",
end_tag,
)?,
}
};
Some((&payload[..end_rel], payload_at + end_rel + end_tag.len()))
}
}
impl ToolParser for Longcat {
fn parse(&self, text: &str, tools: Option<&Value>) -> Result<Vec<ToolCall>, Error> {
let text = text.trim();
if text.starts_with('{')
&& let Ok(v) = serde_json::from_str::<Value>(text)
{
let name = v
.get("name")
.and_then(Value::as_str)
.unwrap_or("")
.to_owned();
let args = match v.get("arguments") {
Some(a) => a.clone(),
None => v,
};
return Ok(obj(&name, args));
}
let idx = text
.find("<longcat_arg_key>")
.ok_or_else(|| err("longcat: no function name"))?;
let func_name = text[..idx].trim().to_owned();
let string_args = string_arg_names(tools, &func_name);
let mut args = serde_json::Map::new();
for (k, v) in find_kv_pairs(
text,
"<longcat_arg_key>",
"</longcat_arg_key>",
"<longcat_arg_value>",
"</longcat_arg_value>",
) {
let key = k.trim().to_owned();
let val = if string_args.contains(&key) {
Value::String(v.trim().to_owned())
} else {
deserialize(v.trim())
};
args.insert(key, val);
}
Ok(obj(&func_name, Value::Object(args)))
}
fn name(&self) -> &'static str {
"longcat"
}
fn try_parse_one_call(
&self,
buffer: &str,
tools: Option<&Value>,
) -> Result<Option<(Vec<ToolCall>, usize)>, Error> {
let start_tag = self.tool_call_start();
let end_tag = self.tool_call_end();
let Some((payload_at, payload)) = locate_tagged_payload(buffer, start_tag) else {
return Ok(None);
};
let Some((bounded, end_pos)) = self.bound_section(payload, payload_at, end_tag) else {
return Ok(None);
};
if matches!(
classify_json_payload_start(bounded),
JsonPayloadStart::Object
) && balanced_json_object_prefix(bounded).is_none()
{
return Ok(Some((Vec::new(), end_pos)));
}
let inner = strip_section_markers(&buffer[..end_pos], start_tag, end_tag);
match self.parse(inner, tools) {
Ok(calls) if !calls.is_empty() => Ok(Some((calls, end_pos))),
_ => Ok(Some((Vec::new(), end_pos))),
}
}
}
pub struct MinimaxM2;
fn extract_name(s: &str) -> String {
let s = s.trim();
if (s.starts_with('"') && s.ends_with('"') && s.len() >= 2)
|| (s.starts_with('\'') && s.ends_with('\'') && s.len() >= 2)
{
s[1..s.len() - 1].to_owned()
} else {
s.to_owned()
}
}
fn schema_types(schema: &Value) -> Vec<String> {
let mut types = std::collections::BTreeSet::new();
if let Some(o) = schema.as_object() {
match o.get("type") {
Some(Value::String(s)) => {
types.insert(s.clone());
}
Some(Value::Array(a)) => {
for t in a {
if let Some(s) = t.as_str() {
types.insert(s.to_owned());
}
}
}
_ => {}
}
if let Some(Value::Array(en)) = o.get("enum") {
for v in en {
types.insert(
match v {
Value::Null => "null",
Value::Bool(_) => "boolean",
Value::Number(n) if n.is_i64() || n.is_u64() => "integer",
Value::Number(_) => "number",
Value::String(_) => "string",
Value::Array(_) => "array",
Value::Object(_) => "object",
}
.to_owned(),
);
}
}
for cf in ["anyOf", "oneOf", "allOf"] {
if let Some(Value::Array(a)) = o.get(cf) {
for c in a {
for t in schema_types(c) {
types.insert(t);
}
}
}
}
}
if types.is_empty() {
vec!["string".to_owned()]
} else {
types.into_iter().collect()
}
}
fn convert_with_types(value: &str, ptypes: &[String]) -> Value {
if value.eq_ignore_ascii_case("null") {
return Value::Null;
}
let norm: Vec<String> = ptypes.iter().map(|t| t.to_lowercase()).collect();
let lv = value.to_lowercase();
if norm.iter().any(|t| t == "null") || ["null", "none", "nil"].contains(&lv.as_str()) {
return Value::Null;
}
for pt in [
"integer", "int", "number", "float", "boolean", "bool", "object", "array", "string", "str",
"text",
] {
if !norm.iter().any(|t| t == pt) {
continue;
}
match pt {
"string" | "str" | "text" => return Value::String(value.to_owned()),
"integer" | "int" => {
if let Ok(i) = value.parse::<i64>() {
return Value::Number(i.into());
}
}
"number" | "float" => {
if let Ok(f) = value.parse::<f64>()
&& let Some(n) = serde_json::Number::from_f64(f)
{
return Value::Number(n);
}
}
"boolean" | "bool" => {
let l = value.trim().to_lowercase();
if ["true", "1", "yes", "on"].contains(&l.as_str()) {
return Value::Bool(true);
}
if ["false", "0", "no", "off"].contains(&l.as_str()) {
return Value::Bool(false);
}
}
"object" | "array" => {
if let Ok(v) = serde_json::from_str::<Value>(value) {
return v;
}
}
_ => {}
}
}
serde_json::from_str::<Value>(value).unwrap_or_else(|_| Value::String(value.to_owned()))
}
impl ToolParser for MinimaxM2 {
fn parse(&self, text: &str, tools: Option<&Value>) -> Result<Vec<ToolCall>, Error> {
let invokes = find_all(text, "<invoke name=", "</invoke>");
if invokes.is_empty() {
return Err(err("minimax_m2: No tool call found"));
}
let mut calls = Vec::new();
for inv in &invokes {
let name_seg = inv.split('>').next().unwrap_or("");
let function_name = extract_name(name_seg);
let props = tool_properties(tools, &function_name);
let mut args = serde_json::Map::new();
for pm in find_all(inv, "<parameter name=", "</parameter>") {
let gt = match pm.find('>') {
Some(g) => g,
None => continue,
};
let pname = extract_name(&pm[..gt]);
let mut pval = pm[gt + 1..].trim().to_owned();
if let Some(r) = pval.strip_prefix('\n') {
pval = r.to_owned();
}
if let Some(r) = pval.strip_suffix('\n') {
pval = r.to_owned();
}
let ptypes = props
.and_then(|p| p.get(&pname))
.map(schema_types)
.unwrap_or_else(|| vec!["string".to_owned()]);
args.insert(pname, convert_with_types(&pval, &ptypes));
}
calls.push(ToolCall::new_nameless_id(
&function_name,
Value::Object(args),
));
}
Ok(calls)
}
fn name(&self) -> &'static str {
"minimax_m2"
}
fn try_parse_one_call(
&self,
buffer: &str,
tools: Option<&Value>,
) -> Result<Option<(Vec<ToolCall>, usize)>, Error> {
let start_tag = self.tool_call_start();
let end_tag = self.tool_call_end();
let Some((payload_at, payload)) = locate_tagged_payload(buffer, start_tag) else {
return Ok(None);
};
let open = "<invoke name=";
let close = "</invoke>";
let mut cursor = 0usize;
let section_end_rel = loop {
let end_rel = payload[cursor..].find(end_tag).map(|p| cursor + p);
let open_rel = payload[cursor..].find(open).map(|p| cursor + p);
let open_rel = match (end_rel, open_rel) {
(Some(e), Some(o)) if e <= o => break e,
(Some(_), Some(o)) => o,
(Some(e), None) => break e,
(None, Some(o)) => o,
(None, None) => return Ok(None),
};
let close_search_from = open_rel + open.len();
let Some(close_rel) = payload[close_search_from..].find(close) else {
return Ok(
xml_value_aware_end_tag_scan(payload, "<parameter name=", "</parameter>", end_tag)
.map(|rel| (Vec::new(), payload_at + rel + end_tag.len())),
);
};
cursor = close_search_from + close_rel + close.len();
};
let end_pos = payload_at + section_end_rel + end_tag.len();
let inner = strip_section_markers(&buffer[..end_pos], start_tag, end_tag);
match self.parse(inner, tools) {
Ok(calls) if !calls.is_empty() => Ok(Some((calls, end_pos))),
_ => Ok(Some((Vec::new(), end_pos))),
}
}
}
pub struct FunctionGemma;
impl FunctionGemma {
fn bound_section<'a>(
&self,
payload: &'a str,
payload_at: usize,
end_tag: &str,
) -> Option<(&'a str, usize)> {
let end_pos =
match bound_context_or_plain_end(payload, end_tag, function_gemma_call_context_proven)? {
Some(end_rel) => end_rel,
None => {
closed_but_malformed_end_pos_value_aware(payload, 0, end_tag, "<escape>", "<escape>")?
}
};
let body_end = end_pos - end_tag.len();
Some((&payload[..body_end], payload_at + end_pos))
}
}
impl ToolParser for FunctionGemma {
fn parse(&self, text: &str, _tools: Option<&Value>) -> Result<Vec<ToolCall>, Error> {
let (func_name, mut args_str) =
gemma_call(text, false).ok_or_else(|| err("function_gemma: No function provided."))?;
let mut arguments = serde_json::Map::new();
let escape = "<escape>";
while !args_str.is_empty() {
let split = match args_str.find(':') {
Some(s) => s,
None => break,
};
let key = args_str[..split].to_owned();
args_str = args_str[split + 1..].to_owned();
if let Some(rest) = args_str.strip_prefix(escape)
&& let Some(end) = rest.find(escape)
{
arguments.insert(key, Value::String(rest[..end].to_owned()));
let after_escape = end + escape.len();
let consumed = after_escape + 1;
args_str = if consumed >= rest.len() {
String::new()
} else if rest.is_char_boundary(consumed) {
rest[consumed..].to_owned()
} else {
return Err(err("function_gemma: malformed argument encoding"));
};
continue;
}
let split = args_str.find(',').unwrap_or(args_str.len());
let value = args_str[..split].to_owned();
args_str = if split < args_str.len() {
args_str[(split + 1).min(args_str.len())..].to_owned()
} else {
String::new()
};
let v =
serde_json::from_str::<Value>(&value).unwrap_or_else(|_| Value::String(value.clone()));
arguments.insert(key, v);
}
Ok(obj(&func_name, Value::Object(arguments)))
}
fn name(&self) -> &'static str {
"function_gemma"
}
fn try_parse_one_call(
&self,
buffer: &str,
tools: Option<&Value>,
) -> Result<Option<(Vec<ToolCall>, usize)>, Error> {
let start_tag = self.tool_call_start();
let end_tag = self.tool_call_end();
let Some((payload_at, payload)) = locate_tagged_payload(buffer, start_tag) else {
return Ok(None);
};
let Some((bounded, end_pos)) = self.bound_section(payload, payload_at, end_tag) else {
return Ok(None);
};
let Some(after_open_brace) = find_first_function_gemma_call_start(bounded) else {
return Ok(Some((Vec::new(), end_pos)));
};
let bytes = bounded.as_bytes();
let escape = "<escape>";
let mut idx = after_open_brace;
let mut in_escape = false;
let body_close_found = loop {
if idx >= bytes.len() {
break false;
}
if !in_escape && bounded[idx..].starts_with(escape) {
in_escape = true;
idx += escape.len();
continue;
}
if in_escape && bounded[idx..].starts_with(escape) {
in_escape = false;
idx += escape.len();
continue;
}
if !in_escape && bytes[idx] == b'}' {
break true;
}
idx += utf8_char_width(bytes[idx]);
};
if !body_close_found {
return Ok(Some((Vec::new(), end_pos)));
}
let inner = strip_section_markers(&buffer[..end_pos], start_tag, end_tag);
match self.parse(inner, tools) {
Ok(calls) if !calls.is_empty() => Ok(Some((calls, end_pos))),
_ => Ok(Some((Vec::new(), end_pos))),
}
}
}
fn function_gemma_call_start_at(payload: &str, at: usize) -> Option<(usize, usize)> {
let needle = "call:";
let bytes = payload.as_bytes();
if at + needle.len() > bytes.len() {
return None;
}
if &bytes[at..at + needle.len()] != needle.as_bytes() {
return None;
}
let name_start = at + needle.len();
let mut j = name_start;
while j < bytes.len()
&& (bytes[j].is_ascii_alphanumeric() || bytes[j] == b'_' || bytes[j] == b'-')
{
j += 1;
}
if j == name_start {
return None;
}
if j >= bytes.len() || bytes[j] != b'{' {
return None;
}
Some((name_start, j + 1))
}
fn find_first_function_gemma_call_start(payload: &str) -> Option<usize> {
let bytes = payload.as_bytes();
for i in 0..bytes.len() {
if let Some((_, after_open)) = function_gemma_call_start_at(payload, i) {
return Some(after_open);
}
}
None
}
fn gemma_call(text: &str, _balanced: bool) -> Option<(String, String)> {
let bytes = text.as_bytes();
for i in 0..bytes.len() {
let Some((name_start, after_open_brace)) = function_gemma_call_start_at(text, i) else {
continue;
};
let name_end = after_open_brace - 1;
let name = text[name_start..name_end].to_owned();
let rest = &text[after_open_brace..];
let close = rest.find('}')?;
return Some((name, rest[..close].to_owned()));
}
None
}
pub struct Gemma4;
impl ToolParser for Gemma4 {
fn parse(&self, text: &str, _tools: Option<&Value>) -> Result<Vec<ToolCall>, Error> {
let calls = gemma4_calls(text);
if calls.is_empty() {
return Err(err("gemma4: No function provided."));
}
let mut out = Vec::new();
for (name, args_str) in calls {
let json_str = gemma4_args_to_json(&args_str);
let arguments: Value =
serde_json::from_str(&json_str).map_err(|e| err(format!("gemma4: {e}")))?;
out.push(ToolCall::new_nameless_id(&name, arguments));
}
Ok(out)
}
fn name(&self) -> &'static str {
"gemma4"
}
fn try_parse_one_call(
&self,
buffer: &str,
tools: Option<&Value>,
) -> Result<Option<(Vec<ToolCall>, usize)>, Error> {
let start_tag = self.tool_call_start();
let end_tag = self.tool_call_end();
let Some((payload_at, payload)) = locate_tagged_payload(buffer, start_tag) else {
return Ok(None);
};
let bytes = payload.as_bytes();
let mut cursor = 0usize;
let section_end_rel = loop {
let end_rel = payload[cursor..].find(end_tag).map(|p| cursor + p);
let call_rel = payload[cursor..].find("call:").map(|p| cursor + p);
let call_rel = match (end_rel, call_rel) {
(Some(e), Some(c)) if e <= c => break e,
(Some(_), Some(c)) => c,
(Some(e), None) => break e,
(None, Some(c)) => c,
(None, None) => return Ok(None),
};
let after_marker = call_rel + "call:".len();
let mut j = after_marker;
while j < bytes.len()
&& (bytes[j].is_ascii_alphanumeric() || bytes[j] == b'_' || bytes[j] == b'-')
{
j += 1;
}
if j >= bytes.len() || bytes[j] != b'{' {
cursor = after_marker;
continue;
}
let body = &payload[j..];
let Some(close_rel) = balanced_brace_end(body) else {
return Ok(
closed_but_malformed_end_pos_value_aware(buffer, payload_at, end_tag, "<|\"|>", "<|\"|>")
.map(|ep| (Vec::new(), ep)),
);
};
cursor = j + close_rel + 1;
};
let end_pos = payload_at + section_end_rel + end_tag.len();
let inner = strip_section_markers(&buffer[..end_pos], start_tag, end_tag);
match self.parse(inner, tools) {
Ok(calls) if !calls.is_empty() => Ok(Some((calls, end_pos))),
_ => Ok(Some((Vec::new(), end_pos))),
}
}
}
fn gemma4_calls(text: &str) -> Vec<(String, String)> {
let mut out = Vec::new();
let mut i = 0;
while let Some(s) = text[i..].find("call:") {
let abs = i + s + 5;
let after = &text[abs..];
let mut j = 0;
let b = after.as_bytes();
while j < b.len() && (b[j].is_ascii_alphanumeric() || b[j] == b'_' || b[j] == b'-') {
j += 1;
}
if j == 0 || j >= b.len() || b[j] != b'{' {
i = abs;
continue;
}
let name = after[..j].to_owned();
let body = &after[j..];
if let Some(end) = balanced_brace_end(body) {
out.push((name, body[..=end].to_owned()));
i = abs + j + end + 1;
} else {
i = abs + j;
}
}
out
}
fn balanced_brace_end(s: &str) -> Option<usize> {
let bytes = s.as_bytes();
if bytes.first() != Some(&b'{') {
return None;
}
let lit = "<|\"|>";
let mut depth = 0usize;
let mut idx = 0;
while idx < s.len() {
if s[idx..].starts_with(lit) {
let close = s[idx + lit.len()..].find(lit)?;
idx = idx + lit.len() + close + lit.len();
continue;
}
match bytes[idx] {
b'{' => depth += 1,
b'}' => {
depth -= 1;
if depth == 0 {
return Some(idx);
}
}
_ => {}
}
idx += utf8_char_width(bytes[idx]);
}
None
}
#[inline]
fn utf8_char_width(b: u8) -> usize {
match b {
0x00..=0x7F => 1,
0xC0..=0xDF => 2,
0xE0..=0xEF => 3,
0xF0..=0xF7 => 4,
_ => 1,
}
}
fn gemma4_args_to_json(text: &str) -> String {
let lit = "<|\"|>";
let mut strings: Vec<String> = Vec::new();
let mut result = String::new();
let mut i = 0;
while i < text.len() {
if text[i..].starts_with(lit)
&& let Some(close) = text[i + lit.len()..].find(lit)
{
let inner = &text[i + lit.len()..i + lit.len() + close];
result.push('\u{0}');
result.push_str(&strings.len().to_string());
result.push('\u{0}');
strings.push(inner.to_owned());
i = i + lit.len() + close + lit.len();
continue;
}
let ch = text[i..].chars().next().unwrap();
result.push(ch);
i += ch.len_utf8();
}
let mut quoted = String::new();
let rb = result.as_bytes();
let mut k = 0;
while k < result.len() {
let c = rb[k];
quoted.push(c as char);
if c == b'{' || c == b',' {
let mut m = k + 1;
while m < rb.len() && (rb[m].is_ascii_alphanumeric() || rb[m] == b'_') {
m += 1;
}
if m > k + 1 && m < rb.len() && rb[m] == b':' {
quoted.push('"');
quoted.push_str(&result[k + 1..m]);
quoted.push('"');
quoted.push(':');
k = m + 1;
continue;
}
}
k += 1;
}
for (idx, sv) in strings.iter().enumerate() {
let placeholder = format!("\u{0}{idx}\u{0}");
let json = serde_json::to_string(sv).unwrap_or_else(|_| "\"\"".into());
quoted = quoted.replace(&placeholder, &json);
}
quoted
}
pub fn parser_by_name(name: &str) -> Option<Box<dyn ToolParser>> {
Some(match name {
"json_tools" => Box::new(JsonTools),
"pythonic" => Box::new(Pythonic),
"mistral" => Box::new(Mistral),
"qwen3_coder" => Box::new(Qwen3Coder),
"glm47" => Box::new(Glm47),
"kimi_k2" => Box::new(KimiK2),
"longcat" => Box::new(Longcat),
"minimax_m2" => Box::new(MinimaxM2),
"function_gemma" => Box::new(FunctionGemma),
"gemma4" => Box::new(Gemma4),
_ => return None,
})
}
pub fn infer_tool_parser(chat_template: Option<&str>) -> Option<&'static str> {
let ct = chat_template?;
for rule in TOOL_PARSER_SELECT {
let all_ok = rule.all.iter().all(|s| ct.contains(s));
let any_ok = rule.any_of.is_empty() || rule.any_of.iter().any(|s| ct.contains(s));
if all_ok && any_ok {
return Some(rule.name);
}
}
None
}
fn strip_markers<'a>(parser: &dyn ToolParser, buffer: &'a str) -> &'a str {
let mut text = buffer;
let start = parser.tool_call_start();
if !start.is_empty()
&& let Some(idx) = text.find(start)
{
text = &text[idx + start.len()..];
}
let end = parser.tool_call_end();
if !end.is_empty()
&& let Some(idx) = text.rfind(end)
{
text = &text[..idx];
}
text.trim()
}
fn parse_eos(parser: &dyn ToolParser, buffer: &str, tools: Option<&Value>) -> Vec<ToolCall> {
let start = parser.tool_call_start();
if start.is_empty() {
let inner = strip_markers(parser, buffer);
return parser.parse(inner, tools).unwrap_or_default();
}
buffer
.split(start)
.filter(|seg| !seg.is_empty())
.filter_map(|seg| parser.parse(strip_markers(parser, seg), tools).ok())
.flatten()
.collect()
}
const MAX_TOOL_CALL_BUFFER_BYTES: usize = 256 * 1024;
pub struct ToolCallProcessor {
parser: Box<dyn ToolParser>,
tools: Option<Value>,
state: State,
tool_call_buffer: String,
pending_display: String,
pub tool_calls: Vec<ToolCall>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum State {
Normal,
PotentialToolCall,
CollectingToolCall,
}
impl ToolCallProcessor {
pub fn new(parser: Box<dyn ToolParser>, tools: Option<Value>) -> Self {
Self {
parser,
tools,
state: State::Normal,
tool_call_buffer: String::new(),
pending_display: String::new(),
tool_calls: Vec::new(),
}
}
fn is_inline_format(&self) -> bool {
self.parser.tool_call_start().is_empty()
}
fn start_tag_first_char(&self) -> Option<char> {
self.parser.tool_call_start().chars().next()
}
pub fn process_chunk(&mut self, chunk: &str) -> Option<String> {
if self.is_inline_format() {
self.process_inline_chunk(chunk)
} else {
self.process_tagged_chunk(chunk)
}
}
pub fn process_eos(&mut self) {
if self.state != State::CollectingToolCall && self.state != State::PotentialToolCall {
return;
}
if self.tool_call_buffer.is_empty() {
self.pending_display.clear();
self.state = State::Normal;
return;
}
let parsed = parse_eos(
self.parser.as_ref(),
&self.tool_call_buffer,
self.tools.as_ref(),
);
self.tool_calls.extend(parsed);
self.tool_call_buffer.clear();
self.pending_display.clear();
self.state = State::Normal;
}
fn recover_at_cap(&mut self) -> Option<String> {
let drop_tool_buffer = self.state == State::CollectingToolCall;
let pending = std::mem::take(&mut self.pending_display);
let recovered_buffer = std::mem::take(&mut self.tool_call_buffer);
self.state = State::Normal;
let mut out: Option<String> = None;
if !pending.is_empty() {
push_display(&mut out, &pending);
}
if !drop_tool_buffer && !recovered_buffer.is_empty() {
push_display(&mut out, &recovered_buffer);
}
out
}
fn cap_recover_into(&mut self, display: &mut Option<String>) {
if self.tool_call_buffer.len() + self.pending_display.len() <= MAX_TOOL_CALL_BUFFER_BYTES {
return;
}
if let Some(flushed) = self.recover_at_cap() {
push_display(display, &flushed);
}
}
fn reset_on_malformed(&mut self, display: &mut Option<String>) {
if let Some(flushed) = self.recover_at_cap() {
push_display(display, &flushed);
}
}
fn process_inline_chunk(&mut self, chunk: &str) -> Option<String> {
let leading = match self.state {
State::Normal => {
let Some(brace) = chunk.find('{') else {
return Some(chunk.to_owned());
};
let leading = chunk[..brace].to_owned();
self.tool_call_buffer.clear();
self.tool_call_buffer.push_str(&chunk[brace..]);
self.state = State::CollectingToolCall;
leading
}
State::PotentialToolCall | State::CollectingToolCall => {
self.tool_call_buffer.push_str(chunk);
String::new()
}
};
let mut display = self.drain_inline_buffer();
if self.state == State::CollectingToolCall {
self.cap_recover_into(&mut display);
}
if leading.is_empty() {
display
} else {
Some(leading + display.as_deref().unwrap_or(""))
}
}
fn drain_inline_buffer(&mut self) -> Option<String> {
let mut display: Option<String> = None;
loop {
match balanced_json_object_prefix(&self.tool_call_buffer) {
Some((obj_start, obj_end)) => {
if obj_start > 0 {
push_display(&mut display, &self.tool_call_buffer[..obj_start]);
}
let object: String = self.tool_call_buffer[obj_start..obj_end].to_owned();
let suffix: String = self.tool_call_buffer[obj_end..].to_owned();
let inner = strip_markers(self.parser.as_ref(), &object);
match self.parser.parse(inner, self.tools.as_ref()) {
Ok(calls) if !calls.is_empty() => self.tool_calls.extend(calls),
_ => push_display(&mut display, &object),
}
self.tool_call_buffer = suffix;
if self.tool_call_buffer.is_empty() {
self.state = State::Normal;
return display;
}
}
None => {
if self.tool_call_buffer.contains('{') {
return display;
}
self.state = State::Normal;
let remainder = std::mem::take(&mut self.tool_call_buffer);
push_display(&mut display, &remainder);
return display;
}
}
}
}
fn process_tagged_chunk(&mut self, chunk: &str) -> Option<String> {
let start_tag = self.parser.tool_call_start();
let Some(start_char) = self.start_tag_first_char() else {
return Some(chunk.to_owned());
};
let mut chunk: std::borrow::Cow<'_, str> = std::borrow::Cow::Borrowed(chunk);
let mut display: Option<String> = None;
loop {
if self.state == State::Normal && !chunk.contains(start_char) {
push_display(&mut display, &chunk);
return display;
}
if self.state == State::Normal {
if let Some(idx) = chunk.find(start_char) {
if idx > 0 {
self.pending_display.push_str(&chunk[..idx]);
}
self.tool_call_buffer.push_str(&chunk[idx..]);
} else {
self.pending_display.push_str(&chunk);
}
self.state = State::PotentialToolCall;
} else {
self.tool_call_buffer.push_str(&chunk);
}
if self.state == State::PotentialToolCall {
if partial_match(&self.tool_call_buffer, start_tag) {
if self.tool_call_buffer.starts_with(start_tag) {
self.state = State::CollectingToolCall;
let leading = std::mem::take(&mut self.pending_display);
push_display(&mut display, &leading);
} else {
self.cap_recover_into(&mut display);
return display;
}
} else {
self.state = State::Normal;
let leading = std::mem::take(&mut self.pending_display);
let buffer = std::mem::take(&mut self.tool_call_buffer);
push_display(&mut display, &leading);
push_display(&mut display, &buffer);
return display;
}
}
let end_tag = self.parser.tool_call_end();
if end_tag.is_empty() {
self.cap_recover_into(&mut display);
return display;
}
let outcome = self
.parser
.try_parse_one_call(&self.tool_call_buffer, self.tools.as_ref());
match outcome {
Ok(Some((calls, end_pos))) => {
if end_pos == 0 {
self.cap_recover_into(&mut display);
return display;
}
self.tool_calls.extend(calls);
let trailing_token = self.tool_call_buffer[end_pos..].to_owned();
self.tool_call_buffer.clear();
self.state = State::Normal;
if trailing_token.contains(start_char) {
chunk = std::borrow::Cow::Owned(trailing_token);
} else {
push_display(&mut display, &trailing_token);
return display;
}
}
Ok(None) => {
self.cap_recover_into(&mut display);
return display;
}
Err(_) => {
self.reset_on_malformed(&mut display);
return display;
}
}
}
}
}
fn push_display(display: &mut Option<String>, text: &str) {
if text.is_empty() {
return;
}
display.get_or_insert_with(String::new).push_str(text);
}
fn balanced_json_object_prefix(text: &str) -> Option<(usize, usize)> {
let bytes = text.as_bytes();
let start = bytes.iter().position(|&b| b == b'{')?;
let mut depth: i32 = 0;
let mut in_string = false;
let mut escaped = false;
for (i, &b) in bytes.iter().enumerate().skip(start) {
if in_string {
if escaped {
escaped = false;
} else if b == b'\\' {
escaped = true;
} else if b == b'"' {
in_string = false;
}
continue;
}
match b {
b'"' => in_string = true,
b'{' => depth += 1,
b'}' => {
depth -= 1;
if depth == 0 {
return Some((start, i + 1));
}
if depth < 0 {
return None;
}
}
_ => {}
}
}
None
}
fn balanced_json_array_prefix(text: &str) -> Option<(usize, usize)> {
let bytes = text.as_bytes();
let start = bytes.iter().position(|&b| b == b'[')?;
let mut depth: i32 = 0;
let mut in_string = false;
let mut escaped = false;
for (i, &b) in bytes.iter().enumerate().skip(start) {
if in_string {
if escaped {
escaped = false;
} else if b == b'\\' {
escaped = true;
} else if b == b'"' {
in_string = false;
}
continue;
}
match b {
b'"' => in_string = true,
b'[' => depth += 1,
b']' => {
depth -= 1;
if depth == 0 {
return Some((start, i + 1));
}
if depth < 0 {
return None;
}
}
_ => {}
}
}
None
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum JsonPayloadStart {
None,
Object,
Array,
}
fn classify_json_payload_start(payload: &str) -> JsonPayloadStart {
match payload
.bytes()
.find(|b| !matches!(b, b' ' | b'\t' | b'\n' | b'\r'))
{
Some(b'{') => JsonPayloadStart::Object,
Some(b'[') => JsonPayloadStart::Array,
_ => JsonPayloadStart::None,
}
}
fn pythonic_call_close(payload: &str) -> Option<usize> {
let bytes = payload.as_bytes();
let start = bytes.iter().position(|&b| b == b'[')?;
let mut idx = start;
let mut bracket_depth: i32 = 0;
let mut paren_depth: i32 = 0;
let mut in_str: Option<u8> = None;
let mut escaped = false;
while idx < bytes.len() {
let b = bytes[idx];
if let Some(q) = in_str {
if escaped {
escaped = false;
} else if b == b'\\' {
escaped = true;
} else if b == q {
in_str = None;
}
idx += utf8_char_width(b);
continue;
}
match b {
b'\'' | b'"' => in_str = Some(b),
b'[' => bracket_depth += 1,
b']' => {
bracket_depth -= 1;
if bracket_depth == 0 {
if idx >= 1 && bytes[idx - 1] == b')' {
return Some(idx + 1);
}
idx += 1;
while idx < bytes.len() && bytes[idx] != b'[' {
idx += utf8_char_width(bytes[idx]);
}
if idx >= bytes.len() {
return None;
}
bracket_depth = 0;
paren_depth = 0;
in_str = None;
escaped = false;
continue;
}
if bracket_depth < 0 {
return None;
}
}
b'(' => paren_depth += 1,
b')' if paren_depth > 0 => {
paren_depth -= 1;
}
_ => {}
}
idx += utf8_char_width(b);
}
None
}
fn xml_value_aware_end_tag_scan(
payload: &str,
value_open: &str,
value_close: &str,
end_tag: &str,
) -> Option<usize> {
let mut idx = 0usize;
while idx <= payload.len() {
let next_value = payload[idx..].find(value_open).map(|p| idx + p);
let next_end = payload[idx..].find(end_tag).map(|p| idx + p);
match (next_value, next_end) {
(Some(v), Some(e)) if v < e => {
let after_open = v + value_open.len();
let close_rel = payload[after_open..].find(value_close)?;
idx = after_open + close_rel + value_close.len();
}
(_, Some(e)) => return Some(e),
(Some(v), None) => {
let after_open = v + value_open.len();
let close_rel = payload[after_open..].find(value_close)?;
idx = after_open + close_rel + value_close.len();
}
(None, None) => return None,
}
}
None
}
fn partial_match(buffer: &str, tag: &str) -> bool {
buffer.chars().zip(tag.chars()).all(|(b, t)| b == t)
}
#[cfg(test)]
mod tests;