Skip to main content

tool_parser/parsers/
qwen.rs

1use async_trait::async_trait;
2use openai_protocol::common::Tool;
3use regex::Regex;
4use serde_json::Value;
5
6use crate::{
7    errors::{ParserError, ParserResult},
8    parsers::helpers,
9    partial_json::PartialJson,
10    traits::ToolParser,
11    types::{FunctionCall, StreamingParseResult, ToolCall},
12};
13
14/// Qwen format parser for tool calls
15///
16/// Handles the Qwen 2.5/3 specific format:
17/// `<tool_call>\n{"name": "func", "arguments": {...}}\n</tool_call>`
18///
19/// Features:
20/// - Tool Call Tags: `<tool_call>` and `</tool_call>` wrap each individual call
21/// - Each individual call is separated by `\n`
22/// - Function Call Object: JSON object with "name" and "arguments" fields
23///
24/// Reference: https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct?chat_template=default
25pub struct QwenParser {
26    /// Parser for handling incomplete JSON during streaming
27    partial_json: PartialJson,
28
29    /// Regex for extracting tool calls in parse_complete
30    extractor: Regex,
31
32    /// Buffer for accumulating incomplete patterns across chunks
33    buffer: String,
34
35    /// Stores complete tool call info (name and arguments) for each tool being parsed
36    prev_tool_call_arr: Vec<Value>,
37
38    /// Index of currently streaming tool call (-1 means no active tool)
39    current_tool_id: i32,
40
41    /// Flag for whether current tool's name has been sent to client
42    current_tool_name_sent: bool,
43
44    /// Tracks raw JSON string content streamed to client for each tool's arguments
45    streamed_args_for_tool: Vec<String>,
46
47    /// Buffer for normal text that might precede partial end tokens
48    normal_text_buffer: String,
49
50    /// Token configuration
51    /// Start/end tokens for each individual tool call (not the entire sequence)
52    individual_tool_start_token: &'static str,
53    individual_tool_end_token: &'static str,
54    tool_call_separator: &'static str,
55}
56
57impl QwenParser {
58    /// Create a new Qwen parser
59    pub fn new() -> Self {
60        // Use (?s) flag for DOTALL mode to handle newlines
61        let pattern = r"(?s)<tool_call>\n(.*?)\n</tool_call>";
62        let extractor = Regex::new(pattern).expect("Valid regex pattern");
63
64        Self {
65            partial_json: PartialJson::default(),
66            extractor,
67            buffer: String::new(),
68            prev_tool_call_arr: Vec::new(),
69            current_tool_id: -1,
70            current_tool_name_sent: false,
71            streamed_args_for_tool: Vec::new(),
72            normal_text_buffer: String::new(),
73            individual_tool_start_token: "<tool_call>\n",
74            individual_tool_end_token: "\n</tool_call>",
75            tool_call_separator: "\n",
76        }
77    }
78
79    /// Parse a single JSON object into a ToolCall
80    fn parse_single_object(&self, obj: &Value) -> ParserResult<Option<ToolCall>> {
81        let name = obj.get("name").and_then(|v| v.as_str());
82
83        if let Some(name) = name {
84            // Get arguments - Qwen uses "arguments" key
85            let empty_obj = Value::Object(serde_json::Map::new());
86            let args = obj.get("arguments").unwrap_or(&empty_obj);
87
88            // Convert arguments to JSON string
89            let arguments = serde_json::to_string(args)
90                .map_err(|e| ParserError::ParsingFailed(e.to_string()))?;
91
92            Ok(Some(ToolCall {
93                function: FunctionCall {
94                    name: name.to_string(),
95                    arguments,
96                },
97            }))
98        } else {
99            Ok(None)
100        }
101    }
102}
103
104impl Default for QwenParser {
105    fn default() -> Self {
106        Self::new()
107    }
108}
109
110#[async_trait]
111impl ToolParser for QwenParser {
112    async fn parse_complete(&self, text: &str) -> ParserResult<(String, Vec<ToolCall>)> {
113        // Check if text contains Qwen format
114        if !self.has_tool_markers(text) {
115            return Ok((text.to_string(), vec![]));
116        }
117
118        // Find where the first tool call begins
119        let idx = text.find("<tool_call>").unwrap(); // Safe because has_tool_markers checked
120        let normal_text = text[..idx].to_string();
121
122        // Extract tool calls
123        let mut tools = Vec::new();
124        for captures in self.extractor.captures_iter(text) {
125            if let Some(json_str) = captures.get(1) {
126                let parsed = serde_json::from_str::<Value>(json_str.as_str().trim())
127                    .map_err(|e| ParserError::ParsingFailed(e.to_string()))
128                    .and_then(|v| self.parse_single_object(&v));
129
130                match parsed {
131                    Ok(Some(tool)) => tools.push(tool),
132                    Ok(None) => continue,
133                    Err(e) => {
134                        tracing::debug!("Failed to parse tool call: {:?}", e);
135                        continue;
136                    }
137                }
138            }
139        }
140
141        // If no tools were successfully parsed despite having markers, return entire text as fallback
142        if tools.is_empty() {
143            return Ok((text.to_string(), vec![]));
144        }
145
146        Ok((normal_text, tools))
147    }
148
149    async fn parse_incremental(
150        &mut self,
151        chunk: &str,
152        tools: &[Tool],
153    ) -> ParserResult<StreamingParseResult> {
154        // Append new text to buffer
155        self.buffer.push_str(chunk);
156        let current_text = &self.buffer.clone();
157
158        // Check if current_text has tool_call
159        let has_tool_start = self.has_tool_markers(current_text)
160            || (self.current_tool_id > 0 && current_text.starts_with(self.tool_call_separator));
161
162        if !has_tool_start {
163            // Only clear buffer if we're sure no tool call is starting
164            if helpers::ends_with_partial_token(&self.buffer, self.individual_tool_start_token)
165                .is_none()
166            {
167                let normal_text = self.buffer.clone();
168                self.buffer.clear();
169
170                return Ok(StreamingParseResult {
171                    normal_text,
172                    calls: vec![],
173                });
174            } else {
175                // Might be partial individual_tool_start_token, keep buffering
176                return Ok(StreamingParseResult::default());
177            }
178        }
179
180        // Build tool indices
181        let tool_indices = helpers::get_tool_indices(tools);
182
183        // Determine start index for JSON parsing
184        let start_idx = if let Some(pos) = current_text.find(self.individual_tool_start_token) {
185            pos + self.individual_tool_start_token.len()
186        } else if self.current_tool_id > 0 && current_text.starts_with(self.tool_call_separator) {
187            self.tool_call_separator.len()
188        } else {
189            0
190        };
191
192        let mut result = helpers::handle_json_tool_streaming(
193            current_text,
194            start_idx,
195            &mut self.partial_json,
196            &tool_indices,
197            &mut self.buffer,
198            &mut self.current_tool_id,
199            &mut self.current_tool_name_sent,
200            &mut self.streamed_args_for_tool,
201            &mut self.prev_tool_call_arr,
202        )?;
203
204        // Qwen-specific: Handle partial end tokens in normal text
205        // After tool calls complete, normal text might contain partial "</tool_call>" tags
206        if !result.normal_text.is_empty() {
207            self.normal_text_buffer.push_str(&result.normal_text);
208
209            // Check if buffer contains complete end token (without leading newline)
210            let end_token_without_newline = &self.individual_tool_end_token[1..]; // "</tool_call>"
211            if self.normal_text_buffer.contains(end_token_without_newline) {
212                // Complete end token found - clean it and return
213                let cleaned_text = self
214                    .normal_text_buffer
215                    .replace(end_token_without_newline, "");
216                self.normal_text_buffer.clear();
217                result.normal_text = cleaned_text;
218            } else {
219                // Check if buffer might contain partial end token at the end
220                if let Some(partial_match_len) = helpers::ends_with_partial_token(
221                    &self.normal_text_buffer,
222                    end_token_without_newline,
223                ) {
224                    // Keep potential partial match in buffer, return the rest
225                    let split_point = self.normal_text_buffer.len() - partial_match_len;
226                    result.normal_text = self.normal_text_buffer[..split_point].to_string();
227                    self.normal_text_buffer = self.normal_text_buffer[split_point..].to_string();
228                } else {
229                    // No partial match, return all buffered text
230                    result.normal_text = self.normal_text_buffer.clone();
231                    self.normal_text_buffer.clear();
232                }
233            }
234        }
235
236        Ok(result)
237    }
238
239    fn has_tool_markers(&self, text: &str) -> bool {
240        text.contains("<tool_call>")
241    }
242
243    fn get_unstreamed_tool_args(&self) -> Option<Vec<crate::types::ToolCallItem>> {
244        helpers::get_unstreamed_args(&self.prev_tool_call_arr, &self.streamed_args_for_tool)
245    }
246
247    fn reset(&mut self) {
248        helpers::reset_parser_state(
249            &mut self.buffer,
250            &mut self.prev_tool_call_arr,
251            &mut self.current_tool_id,
252            &mut self.current_tool_name_sent,
253            &mut self.streamed_args_for_tool,
254        );
255    }
256}