Skip to main content

tool_parser/parsers/
qwen.rs

1use async_trait::async_trait;
2use openai_protocol::common::Tool;
3use regex::Regex;
4use serde_json::Value;
5
6use crate::{
7    errors::{ParserError, ParserResult},
8    parsers::helpers,
9    partial_json::PartialJson,
10    traits::ToolParser,
11    types::{FunctionCall, StreamingParseResult, ToolCall},
12};
13
14/// Qwen format parser for tool calls
15///
16/// Handles the Qwen 2.5/3 specific format:
17/// `<tool_call>\n{"name": "func", "arguments": {...}}\n</tool_call>`
18///
19/// Features:
20/// - Tool Call Tags: `<tool_call>` and `</tool_call>` wrap each individual call
21/// - Each individual call is separated by `\n`
22/// - Function Call Object: JSON object with "name" and "arguments" fields
23///
24/// Reference: https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct?chat_template=default
25pub struct QwenParser {
26    /// Parser for handling incomplete JSON during streaming
27    partial_json: PartialJson,
28
29    /// Regex for extracting tool calls in parse_complete
30    extractor: Regex,
31
32    /// Buffer for accumulating incomplete patterns across chunks
33    buffer: String,
34
35    /// Stores complete tool call info (name and arguments) for each tool being parsed
36    prev_tool_call_arr: Vec<Value>,
37
38    /// Index of currently streaming tool call (-1 means no active tool)
39    current_tool_id: i32,
40
41    /// Flag for whether current tool's name has been sent to client
42    current_tool_name_sent: bool,
43
44    /// Tracks raw JSON string content streamed to client for each tool's arguments
45    streamed_args_for_tool: Vec<String>,
46
47    /// Buffer for normal text that might precede partial end tokens
48    normal_text_buffer: String,
49
50    /// Token configuration
51    /// Start/end tokens for each individual tool call (not the entire sequence)
52    individual_tool_start_token: &'static str,
53    individual_tool_end_token: &'static str,
54    tool_call_separator: &'static str,
55}
56
57impl QwenParser {
58    /// Create a new Qwen parser
59    #[expect(
60        clippy::expect_used,
61        reason = "regex pattern is a compile-time string literal"
62    )]
63    pub fn new() -> Self {
64        // Use (?s) flag for DOTALL mode to handle newlines
65        let pattern = r"(?s)<tool_call>\n(.*?)\n</tool_call>";
66        let extractor = Regex::new(pattern).expect("Valid regex pattern");
67
68        Self {
69            partial_json: PartialJson::default(),
70            extractor,
71            buffer: String::new(),
72            prev_tool_call_arr: Vec::new(),
73            current_tool_id: -1,
74            current_tool_name_sent: false,
75            streamed_args_for_tool: Vec::new(),
76            normal_text_buffer: String::new(),
77            individual_tool_start_token: "<tool_call>\n",
78            individual_tool_end_token: "\n</tool_call>",
79            tool_call_separator: "\n",
80        }
81    }
82
83    /// Parse a single JSON object into a ToolCall
84    fn parse_single_object(obj: &Value) -> ParserResult<Option<ToolCall>> {
85        let name = obj.get("name").and_then(|v| v.as_str());
86
87        if let Some(name) = name {
88            // Get arguments - Qwen uses "arguments" key
89            let empty_obj = Value::Object(serde_json::Map::new());
90            let args = obj.get("arguments").unwrap_or(&empty_obj);
91
92            // Convert arguments to JSON string
93            let arguments = serde_json::to_string(args)
94                .map_err(|e| ParserError::ParsingFailed(e.to_string()))?;
95
96            Ok(Some(ToolCall {
97                function: FunctionCall {
98                    name: name.to_string(),
99                    arguments,
100                },
101            }))
102        } else {
103            Ok(None)
104        }
105    }
106}
107
108impl Default for QwenParser {
109    fn default() -> Self {
110        Self::new()
111    }
112}
113
114#[async_trait]
115impl ToolParser for QwenParser {
116    async fn parse_complete(&self, text: &str) -> ParserResult<(String, Vec<ToolCall>)> {
117        // Check if text contains Qwen format
118        if !self.has_tool_markers(text) {
119            return Ok((text.to_string(), vec![]));
120        }
121
122        // Find where the first tool call begins
123        // Safe: has_tool_markers() already confirmed the marker exists
124        let idx = text
125            .find("<tool_call>")
126            .ok_or_else(|| ParserError::ParsingFailed("tool_call marker not found".to_string()))?;
127        let normal_text = text[..idx].to_string();
128
129        // Extract tool calls
130        let mut tools = Vec::new();
131        for captures in self.extractor.captures_iter(text) {
132            if let Some(json_str) = captures.get(1) {
133                let parsed = serde_json::from_str::<Value>(json_str.as_str().trim())
134                    .map_err(|e| ParserError::ParsingFailed(e.to_string()))
135                    .and_then(|v| Self::parse_single_object(&v));
136
137                match parsed {
138                    Ok(Some(tool)) => tools.push(tool),
139                    Ok(None) => continue,
140                    Err(e) => {
141                        tracing::debug!("Failed to parse tool call: {:?}", e);
142                        continue;
143                    }
144                }
145            }
146        }
147
148        // If no tools were successfully parsed despite having markers, return entire text as fallback
149        if tools.is_empty() {
150            return Ok((text.to_string(), vec![]));
151        }
152
153        Ok((normal_text, tools))
154    }
155
156    async fn parse_incremental(
157        &mut self,
158        chunk: &str,
159        tools: &[Tool],
160    ) -> ParserResult<StreamingParseResult> {
161        // Append new text to buffer
162        self.buffer.push_str(chunk);
163        let current_text = &self.buffer.clone();
164
165        // Check if current_text has tool_call
166        let has_tool_start = self.has_tool_markers(current_text)
167            || (self.current_tool_id > 0 && current_text.starts_with(self.tool_call_separator));
168
169        if !has_tool_start {
170            // Only clear buffer if we're sure no tool call is starting
171            if helpers::ends_with_partial_token(&self.buffer, self.individual_tool_start_token)
172                .is_none()
173            {
174                let normal_text = self.buffer.clone();
175                self.buffer.clear();
176
177                return Ok(StreamingParseResult {
178                    normal_text,
179                    calls: vec![],
180                });
181            } else {
182                // Might be partial individual_tool_start_token, keep buffering
183                return Ok(StreamingParseResult::default());
184            }
185        }
186
187        // Build tool indices
188        let tool_indices = helpers::get_tool_indices(tools);
189
190        // Determine start index for JSON parsing
191        let start_idx = if let Some(pos) = current_text.find(self.individual_tool_start_token) {
192            pos + self.individual_tool_start_token.len()
193        } else if self.current_tool_id > 0 && current_text.starts_with(self.tool_call_separator) {
194            self.tool_call_separator.len()
195        } else {
196            0
197        };
198
199        let mut result = helpers::handle_json_tool_streaming(
200            current_text,
201            start_idx,
202            &mut self.partial_json,
203            &tool_indices,
204            &mut self.buffer,
205            &mut self.current_tool_id,
206            &mut self.current_tool_name_sent,
207            &mut self.streamed_args_for_tool,
208            &mut self.prev_tool_call_arr,
209        )?;
210
211        // Qwen-specific: Handle partial end tokens in normal text
212        // After tool calls complete, normal text might contain partial "</tool_call>" tags
213        if !result.normal_text.is_empty() {
214            self.normal_text_buffer.push_str(&result.normal_text);
215
216            // Check if buffer contains complete end token (without leading newline)
217            let end_token_without_newline = &self.individual_tool_end_token[1..]; // "</tool_call>"
218            if self.normal_text_buffer.contains(end_token_without_newline) {
219                // Complete end token found - clean it and return
220                let cleaned_text = self
221                    .normal_text_buffer
222                    .replace(end_token_without_newline, "");
223                self.normal_text_buffer.clear();
224                result.normal_text = cleaned_text;
225            } else {
226                // Check if buffer might contain partial end token at the end
227                if let Some(partial_match_len) = helpers::ends_with_partial_token(
228                    &self.normal_text_buffer,
229                    end_token_without_newline,
230                ) {
231                    // Keep potential partial match in buffer, return the rest
232                    let split_point = self.normal_text_buffer.len() - partial_match_len;
233                    result.normal_text = self.normal_text_buffer[..split_point].to_string();
234                    self.normal_text_buffer = self.normal_text_buffer[split_point..].to_string();
235                } else {
236                    // No partial match, return all buffered text
237                    result.normal_text = self.normal_text_buffer.clone();
238                    self.normal_text_buffer.clear();
239                }
240            }
241        }
242
243        Ok(result)
244    }
245
246    fn has_tool_markers(&self, text: &str) -> bool {
247        text.contains("<tool_call>")
248    }
249
250    fn get_unstreamed_tool_args(&self) -> Option<Vec<crate::types::ToolCallItem>> {
251        helpers::get_unstreamed_args(&self.prev_tool_call_arr, &self.streamed_args_for_tool)
252    }
253
254    fn reset(&mut self) {
255        helpers::reset_parser_state(
256            &mut self.buffer,
257            &mut self.prev_tool_call_arr,
258            &mut self.current_tool_id,
259            &mut self.current_tool_name_sent,
260            &mut self.streamed_args_for_tool,
261        );
262    }
263}