1use std::sync::OnceLock;
2
3use async_trait::async_trait;
14use num_traits::ToPrimitive;
15use openai_protocol::common::Tool;
16use regex::Regex;
17use rustpython_parser::{
18 ast::{Constant, Expr, Mod, UnaryOp},
19 parse, Mode,
20};
21use serde_json::{Map, Number, Value};
22
23use crate::{
24 errors::{ParserError, ParserResult},
25 parsers::helpers,
26 traits::ToolParser,
27 types::{FunctionCall, StreamingParseResult, ToolCall, ToolCallItem},
28};
29
30static PYTHONIC_BLOCK_REGEX: OnceLock<Regex> = OnceLock::new();
31
32#[expect(
34 clippy::expect_used,
35 reason = "regex pattern is a compile-time string literal"
36)]
37fn pythonic_block_regex() -> &'static Regex {
38 PYTHONIC_BLOCK_REGEX.get_or_init(|| {
39 Regex::new(r"(?s)\[\s*[A-Za-z_]\w*\s*\(.*?\)\s*(?:,\s*[A-Za-z_]\w*\s*\(.*?\)\s*)*\]")
44 .expect("pythonic tool call regex must compile")
45 })
46}
47
48pub struct PythonicParser {
50 buffer: String,
52}
53
54impl Default for PythonicParser {
55 fn default() -> Self {
56 Self::new()
57 }
58}
59
60impl PythonicParser {
61 pub fn new() -> Self {
63 Self {
64 buffer: String::new(),
65 }
66 }
67
68 fn extract_tool_calls(text: &str) -> Option<(String, String)> {
71 pythonic_block_regex().find(text).map(|mat| {
72 let block = mat.as_str().to_string();
73 let normal = format!("{}{}", &text[..mat.start()], &text[mat.end()..]);
74 (block, normal)
75 })
76 }
77
78 fn strip_special_tokens(text: &str) -> String {
80 text.replace("<|python_start|>", "")
81 .replace("<|python_end|>", "")
82 }
83
84 fn parse_tool_call_block(block: &str) -> ParserResult<Vec<ToolCall>> {
85 let expr = parse_python_expression(block)?;
86 match expr {
87 Expr::List(list_expr) => list_expr
88 .elts
89 .into_iter()
90 .enumerate()
91 .map(|(idx, call_expr)| build_tool_call(call_expr, idx))
92 .collect(),
93 _ => Err(ParserError::ParsingFailed(
94 "Expected a list of function calls in pythonic tool call".to_string(),
95 )),
96 }
97 }
98}
99
100#[async_trait]
101impl ToolParser for PythonicParser {
102 async fn parse_complete(&self, text: &str) -> ParserResult<(String, Vec<ToolCall>)> {
103 let cleaned = Self::strip_special_tokens(text);
104
105 if let Some((tool_calls_text, normal_text)) = Self::extract_tool_calls(&cleaned) {
106 match Self::parse_tool_call_block(&tool_calls_text) {
107 Ok(calls) => {
108 if calls.is_empty() {
109 Ok((text.to_string(), vec![]))
111 } else {
112 Ok((normal_text, calls))
113 }
114 }
115 Err(e) => {
116 tracing::debug!("Failed to parse pythonic tool calls: {}", e);
118 Ok((text.to_string(), vec![]))
119 }
120 }
121 } else {
122 Ok((text.to_string(), vec![]))
123 }
124 }
125
126 async fn parse_incremental(
127 &mut self,
128 chunk: &str,
129 tools: &[Tool],
130 ) -> ParserResult<StreamingParseResult> {
131 self.buffer.push_str(chunk);
132
133 let cleaned = Self::strip_special_tokens(&self.buffer);
134
135 if let Some(start) = cleaned.find('[') {
137 let normal_text = if start > 0 {
138 cleaned[..start].to_string()
139 } else {
140 String::new()
141 };
142
143 if let Some(end) = find_matching_bracket(&cleaned, start) {
145 let call_text = &cleaned[start..=end];
147
148 match self.parse_complete(call_text).await {
149 Ok((_, calls)) => {
150 let remaining_text = &cleaned[end + 1..];
152 self.buffer = remaining_text.to_string();
153
154 let tool_indices = helpers::get_tool_indices(tools);
156 let items: Vec<ToolCallItem> = calls
157 .into_iter()
158 .enumerate()
159 .filter_map(|(idx, tool)| {
160 if !tool_indices.contains_key(&tool.function.name) {
161 tracing::debug!(
162 "Invalid tool name '{}' - skipping",
163 tool.function.name
164 );
165 return None;
166 }
167
168 Some(ToolCallItem {
169 tool_index: idx,
170 name: Some(tool.function.name),
171 parameters: tool.function.arguments,
172 })
173 })
174 .collect();
175
176 return Ok(StreamingParseResult {
177 normal_text,
178 calls: items,
179 });
180 }
181 Err(e) => {
182 tracing::debug!("Failed to parse pythonic tool call: {}", e);
183 self.buffer.clear();
185 return Ok(StreamingParseResult::default());
186 }
187 }
188 } else {
189 self.buffer = cleaned[start..].to_string();
192
193 if !normal_text.is_empty() {
194 return Ok(StreamingParseResult {
195 normal_text,
196 calls: vec![],
197 });
198 }
199
200 return Ok(StreamingParseResult::default());
202 }
203 }
204
205 self.buffer.clear();
207 Ok(StreamingParseResult {
208 normal_text: cleaned,
209 calls: vec![],
210 })
211 }
212
213 fn has_tool_markers(&self, text: &str) -> bool {
214 let cleaned = Self::strip_special_tokens(text);
215 if pythonic_block_regex().is_match(&cleaned) {
216 return true;
217 }
218
219 false
220 }
221}
222
223fn find_matching_bracket(buffer: &str, start: usize) -> Option<usize> {
226 let mut bracket_count = 0;
227 let chars: Vec<char> = buffer.chars().collect();
228
229 for (i, &ch) in chars.iter().enumerate().skip(start) {
230 if ch == '[' {
231 bracket_count += 1;
232 } else if ch == ']' {
233 bracket_count -= 1;
234 if bracket_count == 0 {
235 return Some(i);
236 }
237 }
238 }
239 None }
241
242fn parse_python_expression(source: &str) -> ParserResult<Expr> {
243 let module = parse(source, Mode::Expression, "<pythonic_tool_call>")
244 .map_err(|err| ParserError::ParsingFailed(err.to_string()))?;
245
246 match module {
247 Mod::Expression(expr_mod) => Ok(*expr_mod.body),
248 _ => Err(ParserError::ParsingFailed(
249 "Expected a Python expression".to_string(),
250 )),
251 }
252}
253
254fn build_tool_call(expr: Expr, _index: usize) -> ParserResult<ToolCall> {
255 match expr {
256 Expr::Call(call_expr) => {
257 if !call_expr.args.is_empty() {
258 return Err(ParserError::ParsingFailed(
259 "Positional arguments are not supported in pythonic tool calls".to_string(),
260 ));
261 }
262
263 let function_name = match *call_expr.func {
264 Expr::Name(name_expr) => name_expr.id.to_string(),
265 _ => {
266 return Err(ParserError::ParsingFailed(
267 "Unsupported function reference in pythonic tool call".to_string(),
268 ))
269 }
270 };
271
272 let mut arguments_map = Map::with_capacity(call_expr.keywords.len());
273 for keyword in call_expr.keywords {
274 let arg_name = keyword.arg.ok_or_else(|| {
275 ParserError::ParsingFailed(
276 "pythonic tool calls do not support **kwargs".to_string(),
277 )
278 })?;
279 let value_json = expression_to_json(&keyword.value)?;
280 arguments_map.insert(arg_name.to_string(), value_json);
281 }
282
283 let arguments_json = Value::Object(arguments_map);
284 let arguments_string = serde_json::to_string(&arguments_json)?;
285
286 Ok(ToolCall {
287 function: FunctionCall {
288 name: function_name,
289 arguments: arguments_string,
290 },
291 })
292 }
293 _ => Err(ParserError::ParsingFailed(
294 "Expected function calls inside pythonic tool call list".to_string(),
295 )),
296 }
297}
298
299fn expression_to_json(expr: &Expr) -> ParserResult<Value> {
300 match expr {
301 Expr::Constant(expr_constant) => constant_to_json(&expr_constant.value),
302 Expr::List(list_expr) => collect_sequence(&list_expr.elts).map(Value::Array),
303 Expr::Tuple(tuple_expr) => collect_sequence(&tuple_expr.elts).map(Value::Array),
304 Expr::Dict(dict_expr) => {
305 collect_dict(&dict_expr.keys, &dict_expr.values).map(Value::Object)
306 }
307 Expr::UnaryOp(unary_expr) => match unary_expr.op {
308 UnaryOp::USub => match unary_expr.operand.as_ref() {
309 Expr::Constant(const_expr) => negate_constant(&const_expr.value),
310 _ => Err(ParserError::ParsingFailed(
311 "Unsupported unary operand in pythonic tool call".to_string(),
312 )),
313 },
314 UnaryOp::UAdd => expression_to_json(unary_expr.operand.as_ref()),
315 _ => Err(ParserError::ParsingFailed(format!(
316 "Unsupported unary operator in pythonic tool call: {:?}",
317 unary_expr.op
318 ))),
319 },
320 Expr::Name(name_expr) => Ok(Value::String(name_expr.id.to_string())),
321 _ => Err(ParserError::ParsingFailed(format!(
322 "Unsupported expression in pythonic tool call: {expr:?}"
323 ))),
324 }
325}
326
327fn constant_to_json(constant: &Constant) -> ParserResult<Value> {
328 match constant {
329 Constant::None => Ok(Value::Null),
330 Constant::Bool(b) => Ok(Value::Bool(*b)),
331 Constant::Int(value) => Ok(integer_constant_to_value(value, false)),
332 Constant::Float(f) => Number::from_f64(*f).map(Value::Number).ok_or_else(|| {
333 ParserError::ParsingFailed("Invalid float literal in pythonic tool call".to_string())
334 }),
335 Constant::Str(s) => Ok(Value::String(s.clone())),
336 Constant::Bytes(bytes) => Ok(Value::String(String::from_utf8_lossy(bytes).into_owned())),
337 Constant::Tuple(values) => constant_tuple_to_array(values).map(Value::Array),
338 Constant::Ellipsis | Constant::Complex { .. } => Err(ParserError::ParsingFailed(
339 "Unsupported literal in pythonic tool call".to_string(),
340 )),
341 }
342}
343
344fn negate_constant(constant: &Constant) -> ParserResult<Value> {
345 match constant {
346 Constant::Int(value) => Ok(integer_constant_to_value(value, true)),
347 Constant::Float(f) => Number::from_f64(-f).map(Value::Number).ok_or_else(|| {
348 ParserError::ParsingFailed("Invalid float literal in pythonic tool call".to_string())
349 }),
350 _ => Err(ParserError::ParsingFailed(
351 "Unsupported unary operand in pythonic tool call".to_string(),
352 )),
353 }
354}
355
356fn value_to_key_string(value: Value) -> ParserResult<String> {
357 match value {
358 Value::String(s) => Ok(s),
359 Value::Number(num) => Ok(num.to_string()),
360 Value::Bool(b) => Ok(b.to_string()),
361 Value::Null => Ok("null".to_string()),
362 other => Err(ParserError::ParsingFailed(format!(
363 "Unsupported key type in pythonic tool call: {other:?}"
364 ))),
365 }
366}
367
368fn collect_sequence(elements: &[Expr]) -> ParserResult<Vec<Value>> {
369 elements.iter().map(expression_to_json).collect()
370}
371
372fn collect_dict(keys: &[Option<Expr>], values: &[Expr]) -> ParserResult<Map<String, Value>> {
373 let mut map = Map::with_capacity(keys.len());
374 for (key_expr, value_expr) in keys.iter().zip(values.iter()) {
375 let key_expr = key_expr.as_ref().ok_or_else(|| {
376 ParserError::ParsingFailed("pythonic tool calls do not support **kwargs".to_string())
377 })?;
378 let key_value = expression_to_json(key_expr)?;
379 let key = value_to_key_string(key_value)?;
380 let value_json = expression_to_json(value_expr)?;
381 map.insert(key, value_json);
382 }
383 Ok(map)
384}
385
386fn constant_tuple_to_array(values: &[Constant]) -> ParserResult<Vec<Value>> {
387 values.iter().map(constant_to_json).collect()
388}
389
390fn integer_constant_to_value<T>(value: &T, negate: bool) -> Value
391where
392 T: ToPrimitive + std::fmt::Display,
393{
394 if let Some(mut i) = value.to_i64() {
395 if negate {
396 i = -i;
397 }
398 return Value::Number(Number::from(i));
399 }
400
401 if negate {
402 if let Some(u) = value.to_u64() {
403 if u <= i64::MAX as u64 {
404 return Value::Number(Number::from(-(u as i64)));
405 }
406 return Value::String(format!("-{value}"));
407 }
408 Value::String(format!("-{value}"))
409 } else if let Some(u) = value.to_u64() {
410 Value::Number(Number::from(u))
411 } else {
412 Value::String(value.to_string())
413 }
414}