use winnow::ascii::multispace0 as ws0;
use winnow::combinator::{alt, eof, repeat, seq, terminated};
use winnow::prelude::*;
use winnow::stream::Partial;
use winnow::token::{literal, rest, take_until, take_while};
use super::parameters::ToolSchemas;
use super::utils::{parse_buffered_event, safe_text_len};
use super::{Result, ToolCallDelta, ToolParserOutput};
use crate::vllm_tool_parser::Tool;
mod glm47_moe;
pub use glm47_moe::Glm47MoeToolParser;
const TOOL_CALL_START: &str = "<tool_call>";
const TOOL_CALL_END: &str = "</tool_call>";
const ARG_KEY_START: &str = "<arg_key>";
const ARG_KEY_END: &str = "</arg_key>";
const ARG_VALUE_START: &str = "<arg_value>";
const ARG_VALUE_END: &str = "</arg_value>";
type GlmInput<'i> = Partial<&'i str>;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum GlmMode {
Text,
ToolCall,
AfterToolCall,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum Separator {
Newline,
Flexible,
}
#[derive(Debug, Clone, PartialEq, Eq)]
enum GlmEvent {
Text {
len: usize,
},
ToolCallStart,
ToolCall {
name: String,
raw_params: Vec<(String, String)>,
},
IgnoredRest,
}
struct GlmXmlToolParser {
buffer: String,
mode: GlmMode,
emitted_tool_count: usize,
tool_parameters: ToolSchemas,
separator: Separator,
}
impl GlmXmlToolParser {
fn new(tools: &[Tool], separator: Separator) -> Self {
Self {
buffer: String::new(),
mode: GlmMode::Text,
emitted_tool_count: 0,
tool_parameters: ToolSchemas::from_tools(tools),
separator,
}
}
fn apply_event(&mut self, event: GlmEvent, output: &mut ToolParserOutput) -> Result<()> {
match event {
GlmEvent::Text { len: consumed_len } => {
output.normal_text.push_str(&self.buffer[..consumed_len]);
}
GlmEvent::ToolCallStart => self.mode = GlmMode::ToolCall,
GlmEvent::ToolCall { name, raw_params } => {
self.mode = GlmMode::AfterToolCall;
let arguments = self
.tool_parameters
.convert_params_with_schema(&name, raw_params);
let arguments = serde_json::to_string(&arguments)
.map_err(|error| parsing_failed!("failed to serialize arguments: {}", error))?;
output.calls.push(ToolCallDelta {
tool_index: self.emitted_tool_count,
name: Some(name),
arguments,
});
self.emitted_tool_count += 1;
}
GlmEvent::IgnoredRest => {}
}
Ok(())
}
fn reset(&mut self) -> String {
self.mode = GlmMode::Text;
self.emitted_tool_count = 0;
std::mem::take(&mut self.buffer)
}
fn parse_into(&mut self, chunk: &str, output: &mut ToolParserOutput) -> Result<()> {
self.buffer.push_str(chunk);
while let Some((event, consumed_len)) = parse_buffered_event(&self.buffer, |input| {
parse_next_glm_event(input, self.mode, self.separator)
})? {
self.apply_event(event, output)?;
self.buffer.drain(..consumed_len);
}
Ok(())
}
fn finish(&mut self) -> Result<ToolParserOutput> {
let mut output = ToolParserOutput::default();
if !self.buffer.is_empty() {
match self.mode {
GlmMode::Text => output.normal_text.push_str(&self.buffer),
GlmMode::ToolCall => return Err(parsing_failed!("incomplete GLM MoE tool call")),
GlmMode::AfterToolCall => {}
}
}
let _ = self.reset();
Ok(output)
}
}
fn parse_next_glm_event(
input: &mut GlmInput<'_>,
mode: GlmMode,
separator: Separator,
) -> ModalResult<GlmEvent> {
match mode {
GlmMode::Text => parse_text_event(input),
GlmMode::ToolCall => tool_call_event(input, separator),
GlmMode::AfterToolCall => after_tool_call_event(input),
}
}
fn parse_text_event(input: &mut GlmInput<'_>) -> ModalResult<GlmEvent> {
alt((tool_call_start_event, safe_text_event)).parse_next(input)
}
fn tool_call_start_event(input: &mut GlmInput<'_>) -> ModalResult<GlmEvent> {
literal(TOOL_CALL_START)
.value(GlmEvent::ToolCallStart)
.parse_next(input)
}
fn safe_text_event(input: &mut GlmInput<'_>) -> ModalResult<GlmEvent> {
safe_text_len(input, TOOL_CALL_START).map(|len| GlmEvent::Text { len })
}
fn after_tool_call_event(input: &mut GlmInput<'_>) -> ModalResult<GlmEvent> {
ws0.void().parse_next(input)?;
alt((tool_call_start_event, ignored_rest_event)).parse_next(input)
}
fn ignored_rest_event(input: &mut GlmInput<'_>) -> ModalResult<GlmEvent> {
rest.value(GlmEvent::IgnoredRest).parse_next(input)
}
fn tool_call_event(input: &mut GlmInput<'_>, separator: Separator) -> ModalResult<GlmEvent> {
let (body,) = seq!(
take_until(0.., TOOL_CALL_END),
_: literal(TOOL_CALL_END),
)
.parse_next(input)?;
parse_tool_call_body(body, separator)
}
fn parse_tool_call_body(body: &str, separator: Separator) -> ModalResult<GlmEvent> {
let mut input = body;
let (name, raw_params) = match separator {
Separator::Newline => seq!(
_: ws0,
parse_newline_separated_function_name,
parse_parameters,
_: ws0,
_: eof,
)
.parse_next(&mut input)?,
Separator::Flexible => seq!(
_: ws0,
parse_flexible_function_name,
parse_parameters,
_: ws0,
_: eof,
)
.parse_next(&mut input)?,
};
Ok(GlmEvent::ToolCall {
name: name.to_string(),
raw_params,
})
}
fn parse_newline_separated_function_name<'i>(input: &mut &'i str) -> ModalResult<&'i str> {
terminated(take_until(1.., "\n"), "\n")
.map(str::trim)
.parse_next(input)
}
fn parse_flexible_function_name<'i>(input: &mut &'i str) -> ModalResult<&'i str> {
terminated(
take_while(1.., |ch: char| !ch.is_whitespace() && ch != '<'),
ws0,
)
.parse_next(input)
}
fn parse_parameters(input: &mut &str) -> ModalResult<Vec<(String, String)>> {
repeat(0.., terminated(parse_parameter, ws0)).parse_next(input)
}
fn parse_parameter(input: &mut &str) -> ModalResult<(String, String)> {
let (key, value) = seq!(
_: literal(ARG_KEY_START),
take_until(1.., ARG_KEY_END),
_: literal(ARG_KEY_END),
_: ws0,
_: literal(ARG_VALUE_START),
take_until(0.., ARG_VALUE_END).map(str::trim),
_: literal(ARG_VALUE_END),
)
.parse_next(input)?;
Ok((key.trim().to_string(), value.to_string()))
}