tool_parser/parsers/
qwen.rs1use async_trait::async_trait;
2use openai_protocol::common::Tool;
3use regex::Regex;
4use serde_json::Value;
5
6use crate::{
7 errors::{ParserError, ParserResult},
8 parsers::helpers,
9 partial_json::PartialJson,
10 traits::ToolParser,
11 types::{FunctionCall, StreamingParseResult, ToolCall},
12};
13
14pub struct QwenParser {
26 partial_json: PartialJson,
28
29 extractor: Regex,
31
32 buffer: String,
34
35 prev_tool_call_arr: Vec<Value>,
37
38 current_tool_id: i32,
40
41 current_tool_name_sent: bool,
43
44 streamed_args_for_tool: Vec<String>,
46
47 normal_text_buffer: String,
49
50 individual_tool_start_token: &'static str,
53 individual_tool_end_token: &'static str,
54 tool_call_separator: &'static str,
55}
56
57impl QwenParser {
58 #[expect(
60 clippy::expect_used,
61 reason = "regex pattern is a compile-time string literal"
62 )]
63 pub fn new() -> Self {
64 let pattern = r"(?s)<tool_call>\n(.*?)\n</tool_call>";
66 let extractor = Regex::new(pattern).expect("Valid regex pattern");
67
68 Self {
69 partial_json: PartialJson::default(),
70 extractor,
71 buffer: String::new(),
72 prev_tool_call_arr: Vec::new(),
73 current_tool_id: -1,
74 current_tool_name_sent: false,
75 streamed_args_for_tool: Vec::new(),
76 normal_text_buffer: String::new(),
77 individual_tool_start_token: "<tool_call>\n",
78 individual_tool_end_token: "\n</tool_call>",
79 tool_call_separator: "\n",
80 }
81 }
82
83 fn parse_single_object(obj: &Value) -> ParserResult<Option<ToolCall>> {
85 let name = obj.get("name").and_then(|v| v.as_str());
86
87 if let Some(name) = name {
88 let empty_obj = Value::Object(serde_json::Map::new());
90 let args = obj.get("arguments").unwrap_or(&empty_obj);
91
92 let arguments = serde_json::to_string(args)
94 .map_err(|e| ParserError::ParsingFailed(e.to_string()))?;
95
96 Ok(Some(ToolCall {
97 function: FunctionCall {
98 name: name.to_string(),
99 arguments,
100 },
101 }))
102 } else {
103 Ok(None)
104 }
105 }
106}
107
108impl Default for QwenParser {
109 fn default() -> Self {
110 Self::new()
111 }
112}
113
114#[async_trait]
115impl ToolParser for QwenParser {
116 async fn parse_complete(&self, text: &str) -> ParserResult<(String, Vec<ToolCall>)> {
117 if !self.has_tool_markers(text) {
119 return Ok((text.to_string(), vec![]));
120 }
121
122 let idx = text
125 .find("<tool_call>")
126 .ok_or_else(|| ParserError::ParsingFailed("tool_call marker not found".to_string()))?;
127 let normal_text = text[..idx].to_string();
128
129 let mut tools = Vec::new();
131 for captures in self.extractor.captures_iter(text) {
132 if let Some(json_str) = captures.get(1) {
133 let parsed = serde_json::from_str::<Value>(json_str.as_str().trim())
134 .map_err(|e| ParserError::ParsingFailed(e.to_string()))
135 .and_then(|v| Self::parse_single_object(&v));
136
137 match parsed {
138 Ok(Some(tool)) => tools.push(tool),
139 Ok(None) => continue,
140 Err(e) => {
141 tracing::debug!("Failed to parse tool call: {:?}", e);
142 continue;
143 }
144 }
145 }
146 }
147
148 if tools.is_empty() {
150 return Ok((text.to_string(), vec![]));
151 }
152
153 Ok((normal_text, tools))
154 }
155
156 async fn parse_incremental(
157 &mut self,
158 chunk: &str,
159 tools: &[Tool],
160 ) -> ParserResult<StreamingParseResult> {
161 self.buffer.push_str(chunk);
163 let current_text = &self.buffer.clone();
164
165 let has_tool_start = self.has_tool_markers(current_text)
167 || (self.current_tool_id > 0 && current_text.starts_with(self.tool_call_separator));
168
169 if !has_tool_start {
170 if helpers::ends_with_partial_token(&self.buffer, self.individual_tool_start_token)
172 .is_none()
173 {
174 let normal_text = self.buffer.clone();
175 self.buffer.clear();
176
177 return Ok(StreamingParseResult {
178 normal_text,
179 calls: vec![],
180 });
181 } else {
182 return Ok(StreamingParseResult::default());
184 }
185 }
186
187 let tool_indices = helpers::get_tool_indices(tools);
189
190 let start_idx = if let Some(pos) = current_text.find(self.individual_tool_start_token) {
192 pos + self.individual_tool_start_token.len()
193 } else if self.current_tool_id > 0 && current_text.starts_with(self.tool_call_separator) {
194 self.tool_call_separator.len()
195 } else {
196 0
197 };
198
199 let mut result = helpers::handle_json_tool_streaming(
200 current_text,
201 start_idx,
202 &mut self.partial_json,
203 &tool_indices,
204 &mut self.buffer,
205 &mut self.current_tool_id,
206 &mut self.current_tool_name_sent,
207 &mut self.streamed_args_for_tool,
208 &mut self.prev_tool_call_arr,
209 )?;
210
211 if !result.normal_text.is_empty() {
214 self.normal_text_buffer.push_str(&result.normal_text);
215
216 let end_token_without_newline = &self.individual_tool_end_token[1..]; if self.normal_text_buffer.contains(end_token_without_newline) {
219 let cleaned_text = self
221 .normal_text_buffer
222 .replace(end_token_without_newline, "");
223 self.normal_text_buffer.clear();
224 result.normal_text = cleaned_text;
225 } else {
226 if let Some(partial_match_len) = helpers::ends_with_partial_token(
228 &self.normal_text_buffer,
229 end_token_without_newline,
230 ) {
231 let split_point = self.normal_text_buffer.len() - partial_match_len;
233 result.normal_text = self.normal_text_buffer[..split_point].to_string();
234 self.normal_text_buffer = self.normal_text_buffer[split_point..].to_string();
235 } else {
236 result.normal_text = self.normal_text_buffer.clone();
238 self.normal_text_buffer.clear();
239 }
240 }
241 }
242
243 Ok(result)
244 }
245
246 fn has_tool_markers(&self, text: &str) -> bool {
247 text.contains("<tool_call>")
248 }
249
250 fn get_unstreamed_tool_args(&self) -> Option<Vec<crate::types::ToolCallItem>> {
251 helpers::get_unstreamed_args(&self.prev_tool_call_arr, &self.streamed_args_for_tool)
252 }
253
254 fn reset(&mut self) {
255 helpers::reset_parser_state(
256 &mut self.buffer,
257 &mut self.prev_tool_call_arr,
258 &mut self.current_tool_id,
259 &mut self.current_tool_name_sent,
260 &mut self.streamed_args_for_tool,
261 );
262 }
263}