1use std::sync::OnceLock;
2
3use async_trait::async_trait;
14use num_traits::ToPrimitive;
15use openai_protocol::common::Tool;
16use regex::Regex;
17use rustpython_parser::{
18 ast::{Constant, Expr, Mod, UnaryOp},
19 parse, Mode,
20};
21use serde_json::{Map, Number, Value};
22
23use crate::{
24 errors::{ParserError, ParserResult},
25 parsers::helpers,
26 traits::ToolParser,
27 types::{FunctionCall, StreamingParseResult, ToolCall, ToolCallItem},
28};
29
30static PYTHONIC_BLOCK_REGEX: OnceLock<Regex> = OnceLock::new();
31
32fn pythonic_block_regex() -> &'static Regex {
34 PYTHONIC_BLOCK_REGEX.get_or_init(|| {
35 Regex::new(r"(?s)\[\s*[A-Za-z_]\w*\s*\(.*?\)\s*(?:,\s*[A-Za-z_]\w*\s*\(.*?\)\s*)*\]")
40 .expect("pythonic tool call regex must compile")
41 })
42}
43
44pub struct PythonicParser {
46 buffer: String,
48}
49
50impl Default for PythonicParser {
51 fn default() -> Self {
52 Self::new()
53 }
54}
55
56impl PythonicParser {
57 pub fn new() -> Self {
59 Self {
60 buffer: String::new(),
61 }
62 }
63
64 fn extract_tool_calls(&self, text: &str) -> Option<(String, String)> {
67 pythonic_block_regex().find(text).map(|mat| {
68 let block = mat.as_str().to_string();
69 let normal = format!("{}{}", &text[..mat.start()], &text[mat.end()..]);
70 (block, normal)
71 })
72 }
73
74 fn strip_special_tokens(text: &str) -> String {
76 text.replace("<|python_start|>", "")
77 .replace("<|python_end|>", "")
78 }
79
80 fn parse_tool_call_block(&self, block: &str) -> ParserResult<Vec<ToolCall>> {
81 let expr = parse_python_expression(block)?;
82 match expr {
83 Expr::List(list_expr) => list_expr
84 .elts
85 .into_iter()
86 .enumerate()
87 .map(|(idx, call_expr)| build_tool_call(call_expr, idx))
88 .collect(),
89 _ => Err(ParserError::ParsingFailed(
90 "Expected a list of function calls in pythonic tool call".to_string(),
91 )),
92 }
93 }
94}
95
96#[async_trait]
97impl ToolParser for PythonicParser {
98 async fn parse_complete(&self, text: &str) -> ParserResult<(String, Vec<ToolCall>)> {
99 let cleaned = Self::strip_special_tokens(text);
100
101 if let Some((tool_calls_text, normal_text)) = self.extract_tool_calls(&cleaned) {
102 match self.parse_tool_call_block(&tool_calls_text) {
103 Ok(calls) => {
104 if calls.is_empty() {
105 Ok((text.to_string(), vec![]))
107 } else {
108 Ok((normal_text, calls))
109 }
110 }
111 Err(e) => {
112 tracing::debug!("Failed to parse pythonic tool calls: {}", e);
114 Ok((text.to_string(), vec![]))
115 }
116 }
117 } else {
118 Ok((text.to_string(), vec![]))
119 }
120 }
121
122 async fn parse_incremental(
123 &mut self,
124 chunk: &str,
125 tools: &[Tool],
126 ) -> ParserResult<StreamingParseResult> {
127 self.buffer.push_str(chunk);
128
129 let cleaned = Self::strip_special_tokens(&self.buffer);
130
131 if let Some(start) = cleaned.find('[') {
133 let normal_text = if start > 0 {
134 cleaned[..start].to_string()
135 } else {
136 String::new()
137 };
138
139 if let Some(end) = find_matching_bracket(&cleaned, start) {
141 let call_text = &cleaned[start..=end];
143
144 match self.parse_complete(call_text).await {
145 Ok((_, calls)) => {
146 let remaining_text = &cleaned[end + 1..];
148 self.buffer = remaining_text.to_string();
149
150 let tool_indices = helpers::get_tool_indices(tools);
152 let items: Vec<ToolCallItem> = calls
153 .into_iter()
154 .enumerate()
155 .filter_map(|(idx, tool)| {
156 if !tool_indices.contains_key(&tool.function.name) {
157 tracing::debug!(
158 "Invalid tool name '{}' - skipping",
159 tool.function.name
160 );
161 return None;
162 }
163
164 Some(ToolCallItem {
165 tool_index: idx,
166 name: Some(tool.function.name),
167 parameters: tool.function.arguments,
168 })
169 })
170 .collect();
171
172 return Ok(StreamingParseResult {
173 normal_text,
174 calls: items,
175 });
176 }
177 Err(e) => {
178 tracing::debug!("Failed to parse pythonic tool call: {}", e);
179 self.buffer.clear();
181 return Ok(StreamingParseResult::default());
182 }
183 }
184 } else {
185 self.buffer = cleaned[start..].to_string();
188
189 if !normal_text.is_empty() {
190 return Ok(StreamingParseResult {
191 normal_text,
192 calls: vec![],
193 });
194 }
195
196 return Ok(StreamingParseResult::default());
198 }
199 }
200
201 self.buffer.clear();
203 Ok(StreamingParseResult {
204 normal_text: cleaned,
205 calls: vec![],
206 })
207 }
208
209 fn has_tool_markers(&self, text: &str) -> bool {
210 let cleaned = Self::strip_special_tokens(text);
211 if pythonic_block_regex().is_match(&cleaned) {
212 return true;
213 }
214
215 false
216 }
217}
218
219fn find_matching_bracket(buffer: &str, start: usize) -> Option<usize> {
222 let mut bracket_count = 0;
223 let chars: Vec<char> = buffer.chars().collect();
224
225 for (i, &ch) in chars.iter().enumerate().skip(start) {
226 if ch == '[' {
227 bracket_count += 1;
228 } else if ch == ']' {
229 bracket_count -= 1;
230 if bracket_count == 0 {
231 return Some(i);
232 }
233 }
234 }
235 None }
237
238fn parse_python_expression(source: &str) -> ParserResult<Expr> {
239 let module = parse(source, Mode::Expression, "<pythonic_tool_call>")
240 .map_err(|err| ParserError::ParsingFailed(err.to_string()))?;
241
242 match module {
243 Mod::Expression(expr_mod) => Ok(*expr_mod.body),
244 _ => Err(ParserError::ParsingFailed(
245 "Expected a Python expression".to_string(),
246 )),
247 }
248}
249
250fn build_tool_call(expr: Expr, _index: usize) -> ParserResult<ToolCall> {
251 match expr {
252 Expr::Call(call_expr) => {
253 if !call_expr.args.is_empty() {
254 return Err(ParserError::ParsingFailed(
255 "Positional arguments are not supported in pythonic tool calls".to_string(),
256 ));
257 }
258
259 let function_name = match *call_expr.func {
260 Expr::Name(name_expr) => name_expr.id.to_string(),
261 _ => {
262 return Err(ParserError::ParsingFailed(
263 "Unsupported function reference in pythonic tool call".to_string(),
264 ))
265 }
266 };
267
268 let mut arguments_map = Map::with_capacity(call_expr.keywords.len());
269 for keyword in call_expr.keywords {
270 let arg_name = keyword.arg.ok_or_else(|| {
271 ParserError::ParsingFailed(
272 "pythonic tool calls do not support **kwargs".to_string(),
273 )
274 })?;
275 let value_json = expression_to_json(&keyword.value)?;
276 arguments_map.insert(arg_name.to_string(), value_json);
277 }
278
279 let arguments_json = Value::Object(arguments_map);
280 let arguments_string = serde_json::to_string(&arguments_json)?;
281
282 Ok(ToolCall {
283 function: FunctionCall {
284 name: function_name,
285 arguments: arguments_string,
286 },
287 })
288 }
289 _ => Err(ParserError::ParsingFailed(
290 "Expected function calls inside pythonic tool call list".to_string(),
291 )),
292 }
293}
294
295fn expression_to_json(expr: &Expr) -> ParserResult<Value> {
296 match expr {
297 Expr::Constant(expr_constant) => constant_to_json(&expr_constant.value),
298 Expr::List(list_expr) => collect_sequence(&list_expr.elts).map(Value::Array),
299 Expr::Tuple(tuple_expr) => collect_sequence(&tuple_expr.elts).map(Value::Array),
300 Expr::Dict(dict_expr) => {
301 collect_dict(&dict_expr.keys, &dict_expr.values).map(Value::Object)
302 }
303 Expr::UnaryOp(unary_expr) => match unary_expr.op {
304 UnaryOp::USub => match unary_expr.operand.as_ref() {
305 Expr::Constant(const_expr) => negate_constant(&const_expr.value),
306 _ => Err(ParserError::ParsingFailed(
307 "Unsupported unary operand in pythonic tool call".to_string(),
308 )),
309 },
310 UnaryOp::UAdd => expression_to_json(unary_expr.operand.as_ref()),
311 _ => Err(ParserError::ParsingFailed(format!(
312 "Unsupported unary operator in pythonic tool call: {:?}",
313 unary_expr.op
314 ))),
315 },
316 Expr::Name(name_expr) => Ok(Value::String(name_expr.id.to_string())),
317 _ => Err(ParserError::ParsingFailed(format!(
318 "Unsupported expression in pythonic tool call: {:?}",
319 expr
320 ))),
321 }
322}
323
324fn constant_to_json(constant: &Constant) -> ParserResult<Value> {
325 match constant {
326 Constant::None => Ok(Value::Null),
327 Constant::Bool(b) => Ok(Value::Bool(*b)),
328 Constant::Int(value) => Ok(integer_constant_to_value(value, false)),
329 Constant::Float(f) => Number::from_f64(*f).map(Value::Number).ok_or_else(|| {
330 ParserError::ParsingFailed("Invalid float literal in pythonic tool call".to_string())
331 }),
332 Constant::Str(s) => Ok(Value::String(s.clone())),
333 Constant::Bytes(bytes) => Ok(Value::String(String::from_utf8_lossy(bytes).into_owned())),
334 Constant::Tuple(values) => constant_tuple_to_array(values).map(Value::Array),
335 Constant::Ellipsis | Constant::Complex { .. } => Err(ParserError::ParsingFailed(
336 "Unsupported literal in pythonic tool call".to_string(),
337 )),
338 }
339}
340
341fn negate_constant(constant: &Constant) -> ParserResult<Value> {
342 match constant {
343 Constant::Int(value) => Ok(integer_constant_to_value(value, true)),
344 Constant::Float(f) => Number::from_f64(-f).map(Value::Number).ok_or_else(|| {
345 ParserError::ParsingFailed("Invalid float literal in pythonic tool call".to_string())
346 }),
347 _ => Err(ParserError::ParsingFailed(
348 "Unsupported unary operand in pythonic tool call".to_string(),
349 )),
350 }
351}
352
353fn value_to_key_string(value: Value) -> ParserResult<String> {
354 match value {
355 Value::String(s) => Ok(s),
356 Value::Number(num) => Ok(num.to_string()),
357 Value::Bool(b) => Ok(b.to_string()),
358 Value::Null => Ok("null".to_string()),
359 other => Err(ParserError::ParsingFailed(format!(
360 "Unsupported key type in pythonic tool call: {:?}",
361 other
362 ))),
363 }
364}
365
366fn collect_sequence(elements: &[Expr]) -> ParserResult<Vec<Value>> {
367 elements.iter().map(expression_to_json).collect()
368}
369
370fn collect_dict(keys: &[Option<Expr>], values: &[Expr]) -> ParserResult<Map<String, Value>> {
371 let mut map = Map::with_capacity(keys.len());
372 for (key_expr, value_expr) in keys.iter().zip(values.iter()) {
373 let key_expr = key_expr.as_ref().ok_or_else(|| {
374 ParserError::ParsingFailed("pythonic tool calls do not support **kwargs".to_string())
375 })?;
376 let key_value = expression_to_json(key_expr)?;
377 let key = value_to_key_string(key_value)?;
378 let value_json = expression_to_json(value_expr)?;
379 map.insert(key, value_json);
380 }
381 Ok(map)
382}
383
384fn constant_tuple_to_array(values: &[Constant]) -> ParserResult<Vec<Value>> {
385 values.iter().map(constant_to_json).collect()
386}
387
388fn integer_constant_to_value<T>(value: &T, negate: bool) -> Value
389where
390 T: ToPrimitive + std::fmt::Display,
391{
392 if let Some(mut i) = value.to_i64() {
393 if negate {
394 i = -i;
395 }
396 return Value::Number(Number::from(i));
397 }
398
399 if negate {
400 if let Some(u) = value.to_u64() {
401 if u <= i64::MAX as u64 {
402 return Value::Number(Number::from(-(u as i64)));
403 }
404 return Value::String(format!("-{}", value));
405 }
406 Value::String(format!("-{}", value))
407 } else if let Some(u) = value.to_u64() {
408 Value::Number(Number::from(u))
409 } else {
410 Value::String(value.to_string())
411 }
412}