tool_parser/parsers/
qwen.rs1use async_trait::async_trait;
2use openai_protocol::common::Tool;
3use regex::Regex;
4use serde_json::Value;
5
6use crate::{
7 errors::{ParserError, ParserResult},
8 parsers::helpers,
9 partial_json::PartialJson,
10 traits::ToolParser,
11 types::{FunctionCall, StreamingParseResult, ToolCall},
12};
13
14pub struct QwenParser {
26 partial_json: PartialJson,
28
29 extractor: Regex,
31
32 buffer: String,
34
35 prev_tool_call_arr: Vec<Value>,
37
38 current_tool_id: i32,
40
41 current_tool_name_sent: bool,
43
44 streamed_args_for_tool: Vec<String>,
46
47 normal_text_buffer: String,
49
50 individual_tool_start_token: &'static str,
53 individual_tool_end_token: &'static str,
54 tool_call_separator: &'static str,
55}
56
57impl QwenParser {
58 pub fn new() -> Self {
60 let pattern = r"(?s)<tool_call>\n(.*?)\n</tool_call>";
62 let extractor = Regex::new(pattern).expect("Valid regex pattern");
63
64 Self {
65 partial_json: PartialJson::default(),
66 extractor,
67 buffer: String::new(),
68 prev_tool_call_arr: Vec::new(),
69 current_tool_id: -1,
70 current_tool_name_sent: false,
71 streamed_args_for_tool: Vec::new(),
72 normal_text_buffer: String::new(),
73 individual_tool_start_token: "<tool_call>\n",
74 individual_tool_end_token: "\n</tool_call>",
75 tool_call_separator: "\n",
76 }
77 }
78
79 fn parse_single_object(&self, obj: &Value) -> ParserResult<Option<ToolCall>> {
81 let name = obj.get("name").and_then(|v| v.as_str());
82
83 if let Some(name) = name {
84 let empty_obj = Value::Object(serde_json::Map::new());
86 let args = obj.get("arguments").unwrap_or(&empty_obj);
87
88 let arguments = serde_json::to_string(args)
90 .map_err(|e| ParserError::ParsingFailed(e.to_string()))?;
91
92 Ok(Some(ToolCall {
93 function: FunctionCall {
94 name: name.to_string(),
95 arguments,
96 },
97 }))
98 } else {
99 Ok(None)
100 }
101 }
102}
103
104impl Default for QwenParser {
105 fn default() -> Self {
106 Self::new()
107 }
108}
109
110#[async_trait]
111impl ToolParser for QwenParser {
112 async fn parse_complete(&self, text: &str) -> ParserResult<(String, Vec<ToolCall>)> {
113 if !self.has_tool_markers(text) {
115 return Ok((text.to_string(), vec![]));
116 }
117
118 let idx = text.find("<tool_call>").unwrap(); let normal_text = text[..idx].to_string();
121
122 let mut tools = Vec::new();
124 for captures in self.extractor.captures_iter(text) {
125 if let Some(json_str) = captures.get(1) {
126 let parsed = serde_json::from_str::<Value>(json_str.as_str().trim())
127 .map_err(|e| ParserError::ParsingFailed(e.to_string()))
128 .and_then(|v| self.parse_single_object(&v));
129
130 match parsed {
131 Ok(Some(tool)) => tools.push(tool),
132 Ok(None) => continue,
133 Err(e) => {
134 tracing::debug!("Failed to parse tool call: {:?}", e);
135 continue;
136 }
137 }
138 }
139 }
140
141 if tools.is_empty() {
143 return Ok((text.to_string(), vec![]));
144 }
145
146 Ok((normal_text, tools))
147 }
148
149 async fn parse_incremental(
150 &mut self,
151 chunk: &str,
152 tools: &[Tool],
153 ) -> ParserResult<StreamingParseResult> {
154 self.buffer.push_str(chunk);
156 let current_text = &self.buffer.clone();
157
158 let has_tool_start = self.has_tool_markers(current_text)
160 || (self.current_tool_id > 0 && current_text.starts_with(self.tool_call_separator));
161
162 if !has_tool_start {
163 if helpers::ends_with_partial_token(&self.buffer, self.individual_tool_start_token)
165 .is_none()
166 {
167 let normal_text = self.buffer.clone();
168 self.buffer.clear();
169
170 return Ok(StreamingParseResult {
171 normal_text,
172 calls: vec![],
173 });
174 } else {
175 return Ok(StreamingParseResult::default());
177 }
178 }
179
180 let tool_indices = helpers::get_tool_indices(tools);
182
183 let start_idx = if let Some(pos) = current_text.find(self.individual_tool_start_token) {
185 pos + self.individual_tool_start_token.len()
186 } else if self.current_tool_id > 0 && current_text.starts_with(self.tool_call_separator) {
187 self.tool_call_separator.len()
188 } else {
189 0
190 };
191
192 let mut result = helpers::handle_json_tool_streaming(
193 current_text,
194 start_idx,
195 &mut self.partial_json,
196 &tool_indices,
197 &mut self.buffer,
198 &mut self.current_tool_id,
199 &mut self.current_tool_name_sent,
200 &mut self.streamed_args_for_tool,
201 &mut self.prev_tool_call_arr,
202 )?;
203
204 if !result.normal_text.is_empty() {
207 self.normal_text_buffer.push_str(&result.normal_text);
208
209 let end_token_without_newline = &self.individual_tool_end_token[1..]; if self.normal_text_buffer.contains(end_token_without_newline) {
212 let cleaned_text = self
214 .normal_text_buffer
215 .replace(end_token_without_newline, "");
216 self.normal_text_buffer.clear();
217 result.normal_text = cleaned_text;
218 } else {
219 if let Some(partial_match_len) = helpers::ends_with_partial_token(
221 &self.normal_text_buffer,
222 end_token_without_newline,
223 ) {
224 let split_point = self.normal_text_buffer.len() - partial_match_len;
226 result.normal_text = self.normal_text_buffer[..split_point].to_string();
227 self.normal_text_buffer = self.normal_text_buffer[split_point..].to_string();
228 } else {
229 result.normal_text = self.normal_text_buffer.clone();
231 self.normal_text_buffer.clear();
232 }
233 }
234 }
235
236 Ok(result)
237 }
238
239 fn has_tool_markers(&self, text: &str) -> bool {
240 text.contains("<tool_call>")
241 }
242
243 fn get_unstreamed_tool_args(&self) -> Option<Vec<crate::types::ToolCallItem>> {
244 helpers::get_unstreamed_args(&self.prev_tool_call_arr, &self.streamed_args_for_tool)
245 }
246
247 fn reset(&mut self) {
248 helpers::reset_parser_state(
249 &mut self.buffer,
250 &mut self.prev_tool_call_arr,
251 &mut self.current_tool_id,
252 &mut self.current_tool_name_sent,
253 &mut self.streamed_args_for_tool,
254 );
255 }
256}