Skip to main content

tool_parser/parsers/
pythonic.rs

1use std::sync::OnceLock;
2
3/// Pythonic format parser for tool calls
4///
5/// Handles Python function call syntax within square brackets:
6/// ```text
7/// [tool1(arg1=val1, arg2=val2), tool2(arg1=val3)]
8/// ```
9///
10/// This format is used by Llama models and uses Python literals
11/// rather than JSON for arguments.
12/// Reference: https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct?chat_template=default
13use async_trait::async_trait;
14use num_traits::ToPrimitive;
15use openai_protocol::common::Tool;
16use regex::Regex;
17use rustpython_parser::{
18    ast::{Constant, Expr, Mod, UnaryOp},
19    parse, Mode,
20};
21use serde_json::{Map, Number, Value};
22
23use crate::{
24    errors::{ParserError, ParserResult},
25    parsers::helpers,
26    traits::ToolParser,
27    types::{FunctionCall, StreamingParseResult, ToolCall, ToolCallItem},
28};
29
30static PYTHONIC_BLOCK_REGEX: OnceLock<Regex> = OnceLock::new();
31
32/// Lazily compiled regex that locates pythonic tool call blocks.
33#[expect(
34    clippy::expect_used,
35    reason = "regex pattern is a compile-time string literal"
36)]
37fn pythonic_block_regex() -> &'static Regex {
38    PYTHONIC_BLOCK_REGEX.get_or_init(|| {
39        // Matches one or more function calls inside a list. The `(?s)` flag allows
40        // newlines inside argument lists while keeping the pattern anchored to
41        // identifiers followed by parentheses, preventing plain lists like
42        // `[1, 2, 3]` from matching.
43        Regex::new(r"(?s)\[\s*[A-Za-z_]\w*\s*\(.*?\)\s*(?:,\s*[A-Za-z_]\w*\s*\(.*?\)\s*)*\]")
44            .expect("pythonic tool call regex must compile")
45    })
46}
47
48/// Parser for Pythonic tool call format
49pub struct PythonicParser {
50    /// Buffer for accumulating chunks
51    buffer: String,
52}
53
54impl Default for PythonicParser {
55    fn default() -> Self {
56        Self::new()
57    }
58}
59
60impl PythonicParser {
61    /// Create a new Pythonic parser
62    pub fn new() -> Self {
63        Self {
64            buffer: String::new(),
65        }
66    }
67
68    /// Extract the first pythonic tool call block and return it along with the
69    /// surrounding "normal" content.
70    fn extract_tool_calls(text: &str) -> Option<(String, String)> {
71        pythonic_block_regex().find(text).map(|mat| {
72            let block = mat.as_str().to_string();
73            let normal = format!("{}{}", &text[..mat.start()], &text[mat.end()..]);
74            (block, normal)
75        })
76    }
77
78    /// Strip special tokens that Llama models might output
79    fn strip_special_tokens(text: &str) -> String {
80        text.replace("<|python_start|>", "")
81            .replace("<|python_end|>", "")
82    }
83
84    fn parse_tool_call_block(block: &str) -> ParserResult<Vec<ToolCall>> {
85        let expr = parse_python_expression(block)?;
86        match expr {
87            Expr::List(list_expr) => list_expr
88                .elts
89                .into_iter()
90                .enumerate()
91                .map(|(idx, call_expr)| build_tool_call(call_expr, idx))
92                .collect(),
93            _ => Err(ParserError::ParsingFailed(
94                "Expected a list of function calls in pythonic tool call".to_string(),
95            )),
96        }
97    }
98}
99
100#[async_trait]
101impl ToolParser for PythonicParser {
102    async fn parse_complete(&self, text: &str) -> ParserResult<(String, Vec<ToolCall>)> {
103        let cleaned = Self::strip_special_tokens(text);
104
105        if let Some((tool_calls_text, normal_text)) = Self::extract_tool_calls(&cleaned) {
106            match Self::parse_tool_call_block(&tool_calls_text) {
107                Ok(calls) => {
108                    if calls.is_empty() {
109                        // No tools successfully parsed despite having markers
110                        Ok((text.to_string(), vec![]))
111                    } else {
112                        Ok((normal_text, calls))
113                    }
114                }
115                Err(e) => {
116                    // Log and return entire text as fallback
117                    tracing::debug!("Failed to parse pythonic tool calls: {}", e);
118                    Ok((text.to_string(), vec![]))
119                }
120            }
121        } else {
122            Ok((text.to_string(), vec![]))
123        }
124    }
125
126    async fn parse_incremental(
127        &mut self,
128        chunk: &str,
129        tools: &[Tool],
130    ) -> ParserResult<StreamingParseResult> {
131        self.buffer.push_str(chunk);
132
133        let cleaned = Self::strip_special_tokens(&self.buffer);
134
135        // Look for opening bracket
136        if let Some(start) = cleaned.find('[') {
137            let normal_text = if start > 0 {
138                cleaned[..start].to_string()
139            } else {
140                String::new()
141            };
142
143            // Look for matching closing bracket
144            if let Some(end) = find_matching_bracket(&cleaned, start) {
145                // Found complete tool call - extract it and parse using parse_complete
146                let call_text = &cleaned[start..=end];
147
148                match self.parse_complete(call_text).await {
149                    Ok((_, calls)) => {
150                        // Update buffer with remaining text after tool call
151                        let remaining_text = &cleaned[end + 1..];
152                        self.buffer = remaining_text.to_string();
153
154                        // Validate tool names and convert ToolCall to ToolCallItem
155                        let tool_indices = helpers::get_tool_indices(tools);
156                        let items: Vec<ToolCallItem> = calls
157                            .into_iter()
158                            .enumerate()
159                            .filter_map(|(idx, tool)| {
160                                if !tool_indices.contains_key(&tool.function.name) {
161                                    tracing::debug!(
162                                        "Invalid tool name '{}' - skipping",
163                                        tool.function.name
164                                    );
165                                    return None;
166                                }
167
168                                Some(ToolCallItem {
169                                    tool_index: idx,
170                                    name: Some(tool.function.name),
171                                    parameters: tool.function.arguments,
172                                })
173                            })
174                            .collect();
175
176                        return Ok(StreamingParseResult {
177                            normal_text,
178                            calls: items,
179                        });
180                    }
181                    Err(e) => {
182                        tracing::debug!("Failed to parse pythonic tool call: {}", e);
183                        // Clear buffer on error
184                        self.buffer.clear();
185                        return Ok(StreamingParseResult::default());
186                    }
187                }
188            } else {
189                // We have an opening bracket but no closing bracket yet
190                // Put back everything from the bracket onwards
191                self.buffer = cleaned[start..].to_string();
192
193                if !normal_text.is_empty() {
194                    return Ok(StreamingParseResult {
195                        normal_text,
196                        calls: vec![],
197                    });
198                }
199
200                // Still accumulating a potential tool call
201                return Ok(StreamingParseResult::default());
202            }
203        }
204
205        // No tool call bracket found
206        self.buffer.clear();
207        Ok(StreamingParseResult {
208            normal_text: cleaned,
209            calls: vec![],
210        })
211    }
212
213    fn has_tool_markers(&self, text: &str) -> bool {
214        let cleaned = Self::strip_special_tokens(text);
215        if pythonic_block_regex().is_match(&cleaned) {
216            return true;
217        }
218
219        false
220    }
221}
222
223/// Find the matching closing bracket for the opening bracket at start position.
224/// Properly handles nested brackets.
225fn find_matching_bracket(buffer: &str, start: usize) -> Option<usize> {
226    let mut bracket_count = 0;
227    let chars: Vec<char> = buffer.chars().collect();
228
229    for (i, &ch) in chars.iter().enumerate().skip(start) {
230        if ch == '[' {
231            bracket_count += 1;
232        } else if ch == ']' {
233            bracket_count -= 1;
234            if bracket_count == 0 {
235                return Some(i);
236            }
237        }
238    }
239    None // No matching bracket found
240}
241
242fn parse_python_expression(source: &str) -> ParserResult<Expr> {
243    let module = parse(source, Mode::Expression, "<pythonic_tool_call>")
244        .map_err(|err| ParserError::ParsingFailed(err.to_string()))?;
245
246    match module {
247        Mod::Expression(expr_mod) => Ok(*expr_mod.body),
248        _ => Err(ParserError::ParsingFailed(
249            "Expected a Python expression".to_string(),
250        )),
251    }
252}
253
254fn build_tool_call(expr: Expr, _index: usize) -> ParserResult<ToolCall> {
255    match expr {
256        Expr::Call(call_expr) => {
257            if !call_expr.args.is_empty() {
258                return Err(ParserError::ParsingFailed(
259                    "Positional arguments are not supported in pythonic tool calls".to_string(),
260                ));
261            }
262
263            let function_name = match *call_expr.func {
264                Expr::Name(name_expr) => name_expr.id.to_string(),
265                _ => {
266                    return Err(ParserError::ParsingFailed(
267                        "Unsupported function reference in pythonic tool call".to_string(),
268                    ))
269                }
270            };
271
272            let mut arguments_map = Map::with_capacity(call_expr.keywords.len());
273            for keyword in call_expr.keywords {
274                let arg_name = keyword.arg.ok_or_else(|| {
275                    ParserError::ParsingFailed(
276                        "pythonic tool calls do not support **kwargs".to_string(),
277                    )
278                })?;
279                let value_json = expression_to_json(&keyword.value)?;
280                arguments_map.insert(arg_name.to_string(), value_json);
281            }
282
283            let arguments_json = Value::Object(arguments_map);
284            let arguments_string = serde_json::to_string(&arguments_json)?;
285
286            Ok(ToolCall {
287                function: FunctionCall {
288                    name: function_name,
289                    arguments: arguments_string,
290                },
291            })
292        }
293        _ => Err(ParserError::ParsingFailed(
294            "Expected function calls inside pythonic tool call list".to_string(),
295        )),
296    }
297}
298
299fn expression_to_json(expr: &Expr) -> ParserResult<Value> {
300    match expr {
301        Expr::Constant(expr_constant) => constant_to_json(&expr_constant.value),
302        Expr::List(list_expr) => collect_sequence(&list_expr.elts).map(Value::Array),
303        Expr::Tuple(tuple_expr) => collect_sequence(&tuple_expr.elts).map(Value::Array),
304        Expr::Dict(dict_expr) => {
305            collect_dict(&dict_expr.keys, &dict_expr.values).map(Value::Object)
306        }
307        Expr::UnaryOp(unary_expr) => match unary_expr.op {
308            UnaryOp::USub => match unary_expr.operand.as_ref() {
309                Expr::Constant(const_expr) => negate_constant(&const_expr.value),
310                _ => Err(ParserError::ParsingFailed(
311                    "Unsupported unary operand in pythonic tool call".to_string(),
312                )),
313            },
314            UnaryOp::UAdd => expression_to_json(unary_expr.operand.as_ref()),
315            _ => Err(ParserError::ParsingFailed(format!(
316                "Unsupported unary operator in pythonic tool call: {:?}",
317                unary_expr.op
318            ))),
319        },
320        Expr::Name(name_expr) => Ok(Value::String(name_expr.id.to_string())),
321        _ => Err(ParserError::ParsingFailed(format!(
322            "Unsupported expression in pythonic tool call: {expr:?}"
323        ))),
324    }
325}
326
327fn constant_to_json(constant: &Constant) -> ParserResult<Value> {
328    match constant {
329        Constant::None => Ok(Value::Null),
330        Constant::Bool(b) => Ok(Value::Bool(*b)),
331        Constant::Int(value) => Ok(integer_constant_to_value(value, false)),
332        Constant::Float(f) => Number::from_f64(*f).map(Value::Number).ok_or_else(|| {
333            ParserError::ParsingFailed("Invalid float literal in pythonic tool call".to_string())
334        }),
335        Constant::Str(s) => Ok(Value::String(s.clone())),
336        Constant::Bytes(bytes) => Ok(Value::String(String::from_utf8_lossy(bytes).into_owned())),
337        Constant::Tuple(values) => constant_tuple_to_array(values).map(Value::Array),
338        Constant::Ellipsis | Constant::Complex { .. } => Err(ParserError::ParsingFailed(
339            "Unsupported literal in pythonic tool call".to_string(),
340        )),
341    }
342}
343
344fn negate_constant(constant: &Constant) -> ParserResult<Value> {
345    match constant {
346        Constant::Int(value) => Ok(integer_constant_to_value(value, true)),
347        Constant::Float(f) => Number::from_f64(-f).map(Value::Number).ok_or_else(|| {
348            ParserError::ParsingFailed("Invalid float literal in pythonic tool call".to_string())
349        }),
350        _ => Err(ParserError::ParsingFailed(
351            "Unsupported unary operand in pythonic tool call".to_string(),
352        )),
353    }
354}
355
356fn value_to_key_string(value: Value) -> ParserResult<String> {
357    match value {
358        Value::String(s) => Ok(s),
359        Value::Number(num) => Ok(num.to_string()),
360        Value::Bool(b) => Ok(b.to_string()),
361        Value::Null => Ok("null".to_string()),
362        other => Err(ParserError::ParsingFailed(format!(
363            "Unsupported key type in pythonic tool call: {other:?}"
364        ))),
365    }
366}
367
368fn collect_sequence(elements: &[Expr]) -> ParserResult<Vec<Value>> {
369    elements.iter().map(expression_to_json).collect()
370}
371
372fn collect_dict(keys: &[Option<Expr>], values: &[Expr]) -> ParserResult<Map<String, Value>> {
373    let mut map = Map::with_capacity(keys.len());
374    for (key_expr, value_expr) in keys.iter().zip(values.iter()) {
375        let key_expr = key_expr.as_ref().ok_or_else(|| {
376            ParserError::ParsingFailed("pythonic tool calls do not support **kwargs".to_string())
377        })?;
378        let key_value = expression_to_json(key_expr)?;
379        let key = value_to_key_string(key_value)?;
380        let value_json = expression_to_json(value_expr)?;
381        map.insert(key, value_json);
382    }
383    Ok(map)
384}
385
386fn constant_tuple_to_array(values: &[Constant]) -> ParserResult<Vec<Value>> {
387    values.iter().map(constant_to_json).collect()
388}
389
390fn integer_constant_to_value<T>(value: &T, negate: bool) -> Value
391where
392    T: ToPrimitive + std::fmt::Display,
393{
394    if let Some(mut i) = value.to_i64() {
395        if negate {
396            i = -i;
397        }
398        return Value::Number(Number::from(i));
399    }
400
401    if negate {
402        if let Some(u) = value.to_u64() {
403            if u <= i64::MAX as u64 {
404                return Value::Number(Number::from(-(u as i64)));
405            }
406            return Value::String(format!("-{value}"));
407        }
408        Value::String(format!("-{value}"))
409    } else if let Some(u) = value.to_u64() {
410        Value::Number(Number::from(u))
411    } else {
412        Value::String(value.to_string())
413    }
414}