tool_parser/parsers/
pythonic.rs

1use std::sync::OnceLock;
2
3/// Pythonic format parser for tool calls
4///
5/// Handles Python function call syntax within square brackets:
6/// ```text
7/// [tool1(arg1=val1, arg2=val2), tool2(arg1=val3)]
8/// ```
9///
10/// This format is used by Llama models and uses Python literals
11/// rather than JSON for arguments.
12/// Reference: https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct?chat_template=default
13use async_trait::async_trait;
14use num_traits::ToPrimitive;
15use openai_protocol::common::Tool;
16use regex::Regex;
17use rustpython_parser::{
18    ast::{Constant, Expr, Mod, UnaryOp},
19    parse, Mode,
20};
21use serde_json::{Map, Number, Value};
22
23use crate::{
24    errors::{ParserError, ParserResult},
25    parsers::helpers,
26    traits::ToolParser,
27    types::{FunctionCall, StreamingParseResult, ToolCall, ToolCallItem},
28};
29
30static PYTHONIC_BLOCK_REGEX: OnceLock<Regex> = OnceLock::new();
31
32/// Lazily compiled regex that locates pythonic tool call blocks.
33fn pythonic_block_regex() -> &'static Regex {
34    PYTHONIC_BLOCK_REGEX.get_or_init(|| {
35        // Matches one or more function calls inside a list. The `(?s)` flag allows
36        // newlines inside argument lists while keeping the pattern anchored to
37        // identifiers followed by parentheses, preventing plain lists like
38        // `[1, 2, 3]` from matching.
39        Regex::new(r"(?s)\[\s*[A-Za-z_]\w*\s*\(.*?\)\s*(?:,\s*[A-Za-z_]\w*\s*\(.*?\)\s*)*\]")
40            .expect("pythonic tool call regex must compile")
41    })
42}
43
44/// Parser for Pythonic tool call format
45pub struct PythonicParser {
46    /// Buffer for accumulating chunks
47    buffer: String,
48}
49
50impl Default for PythonicParser {
51    fn default() -> Self {
52        Self::new()
53    }
54}
55
56impl PythonicParser {
57    /// Create a new Pythonic parser
58    pub fn new() -> Self {
59        Self {
60            buffer: String::new(),
61        }
62    }
63
64    /// Extract the first pythonic tool call block and return it along with the
65    /// surrounding "normal" content.
66    fn extract_tool_calls(&self, text: &str) -> Option<(String, String)> {
67        pythonic_block_regex().find(text).map(|mat| {
68            let block = mat.as_str().to_string();
69            let normal = format!("{}{}", &text[..mat.start()], &text[mat.end()..]);
70            (block, normal)
71        })
72    }
73
74    /// Strip special tokens that Llama models might output
75    fn strip_special_tokens(text: &str) -> String {
76        text.replace("<|python_start|>", "")
77            .replace("<|python_end|>", "")
78    }
79
80    fn parse_tool_call_block(&self, block: &str) -> ParserResult<Vec<ToolCall>> {
81        let expr = parse_python_expression(block)?;
82        match expr {
83            Expr::List(list_expr) => list_expr
84                .elts
85                .into_iter()
86                .enumerate()
87                .map(|(idx, call_expr)| build_tool_call(call_expr, idx))
88                .collect(),
89            _ => Err(ParserError::ParsingFailed(
90                "Expected a list of function calls in pythonic tool call".to_string(),
91            )),
92        }
93    }
94}
95
96#[async_trait]
97impl ToolParser for PythonicParser {
98    async fn parse_complete(&self, text: &str) -> ParserResult<(String, Vec<ToolCall>)> {
99        let cleaned = Self::strip_special_tokens(text);
100
101        if let Some((tool_calls_text, normal_text)) = self.extract_tool_calls(&cleaned) {
102            match self.parse_tool_call_block(&tool_calls_text) {
103                Ok(calls) => {
104                    if calls.is_empty() {
105                        // No tools successfully parsed despite having markers
106                        Ok((text.to_string(), vec![]))
107                    } else {
108                        Ok((normal_text, calls))
109                    }
110                }
111                Err(e) => {
112                    // Log and return entire text as fallback
113                    tracing::debug!("Failed to parse pythonic tool calls: {}", e);
114                    Ok((text.to_string(), vec![]))
115                }
116            }
117        } else {
118            Ok((text.to_string(), vec![]))
119        }
120    }
121
122    async fn parse_incremental(
123        &mut self,
124        chunk: &str,
125        tools: &[Tool],
126    ) -> ParserResult<StreamingParseResult> {
127        self.buffer.push_str(chunk);
128
129        let cleaned = Self::strip_special_tokens(&self.buffer);
130
131        // Look for opening bracket
132        if let Some(start) = cleaned.find('[') {
133            let normal_text = if start > 0 {
134                cleaned[..start].to_string()
135            } else {
136                String::new()
137            };
138
139            // Look for matching closing bracket
140            if let Some(end) = find_matching_bracket(&cleaned, start) {
141                // Found complete tool call - extract it and parse using parse_complete
142                let call_text = &cleaned[start..=end];
143
144                match self.parse_complete(call_text).await {
145                    Ok((_, calls)) => {
146                        // Update buffer with remaining text after tool call
147                        let remaining_text = &cleaned[end + 1..];
148                        self.buffer = remaining_text.to_string();
149
150                        // Validate tool names and convert ToolCall to ToolCallItem
151                        let tool_indices = helpers::get_tool_indices(tools);
152                        let items: Vec<ToolCallItem> = calls
153                            .into_iter()
154                            .enumerate()
155                            .filter_map(|(idx, tool)| {
156                                if !tool_indices.contains_key(&tool.function.name) {
157                                    tracing::debug!(
158                                        "Invalid tool name '{}' - skipping",
159                                        tool.function.name
160                                    );
161                                    return None;
162                                }
163
164                                Some(ToolCallItem {
165                                    tool_index: idx,
166                                    name: Some(tool.function.name),
167                                    parameters: tool.function.arguments,
168                                })
169                            })
170                            .collect();
171
172                        return Ok(StreamingParseResult {
173                            normal_text,
174                            calls: items,
175                        });
176                    }
177                    Err(e) => {
178                        tracing::debug!("Failed to parse pythonic tool call: {}", e);
179                        // Clear buffer on error
180                        self.buffer.clear();
181                        return Ok(StreamingParseResult::default());
182                    }
183                }
184            } else {
185                // We have an opening bracket but no closing bracket yet
186                // Put back everything from the bracket onwards
187                self.buffer = cleaned[start..].to_string();
188
189                if !normal_text.is_empty() {
190                    return Ok(StreamingParseResult {
191                        normal_text,
192                        calls: vec![],
193                    });
194                }
195
196                // Still accumulating a potential tool call
197                return Ok(StreamingParseResult::default());
198            }
199        }
200
201        // No tool call bracket found
202        self.buffer.clear();
203        Ok(StreamingParseResult {
204            normal_text: cleaned,
205            calls: vec![],
206        })
207    }
208
209    fn has_tool_markers(&self, text: &str) -> bool {
210        let cleaned = Self::strip_special_tokens(text);
211        if pythonic_block_regex().is_match(&cleaned) {
212            return true;
213        }
214
215        false
216    }
217}
218
219/// Find the matching closing bracket for the opening bracket at start position.
220/// Properly handles nested brackets.
221fn find_matching_bracket(buffer: &str, start: usize) -> Option<usize> {
222    let mut bracket_count = 0;
223    let chars: Vec<char> = buffer.chars().collect();
224
225    for (i, &ch) in chars.iter().enumerate().skip(start) {
226        if ch == '[' {
227            bracket_count += 1;
228        } else if ch == ']' {
229            bracket_count -= 1;
230            if bracket_count == 0 {
231                return Some(i);
232            }
233        }
234    }
235    None // No matching bracket found
236}
237
238fn parse_python_expression(source: &str) -> ParserResult<Expr> {
239    let module = parse(source, Mode::Expression, "<pythonic_tool_call>")
240        .map_err(|err| ParserError::ParsingFailed(err.to_string()))?;
241
242    match module {
243        Mod::Expression(expr_mod) => Ok(*expr_mod.body),
244        _ => Err(ParserError::ParsingFailed(
245            "Expected a Python expression".to_string(),
246        )),
247    }
248}
249
250fn build_tool_call(expr: Expr, _index: usize) -> ParserResult<ToolCall> {
251    match expr {
252        Expr::Call(call_expr) => {
253            if !call_expr.args.is_empty() {
254                return Err(ParserError::ParsingFailed(
255                    "Positional arguments are not supported in pythonic tool calls".to_string(),
256                ));
257            }
258
259            let function_name = match *call_expr.func {
260                Expr::Name(name_expr) => name_expr.id.to_string(),
261                _ => {
262                    return Err(ParserError::ParsingFailed(
263                        "Unsupported function reference in pythonic tool call".to_string(),
264                    ))
265                }
266            };
267
268            let mut arguments_map = Map::with_capacity(call_expr.keywords.len());
269            for keyword in call_expr.keywords {
270                let arg_name = keyword.arg.ok_or_else(|| {
271                    ParserError::ParsingFailed(
272                        "pythonic tool calls do not support **kwargs".to_string(),
273                    )
274                })?;
275                let value_json = expression_to_json(&keyword.value)?;
276                arguments_map.insert(arg_name.to_string(), value_json);
277            }
278
279            let arguments_json = Value::Object(arguments_map);
280            let arguments_string = serde_json::to_string(&arguments_json)?;
281
282            Ok(ToolCall {
283                function: FunctionCall {
284                    name: function_name,
285                    arguments: arguments_string,
286                },
287            })
288        }
289        _ => Err(ParserError::ParsingFailed(
290            "Expected function calls inside pythonic tool call list".to_string(),
291        )),
292    }
293}
294
295fn expression_to_json(expr: &Expr) -> ParserResult<Value> {
296    match expr {
297        Expr::Constant(expr_constant) => constant_to_json(&expr_constant.value),
298        Expr::List(list_expr) => collect_sequence(&list_expr.elts).map(Value::Array),
299        Expr::Tuple(tuple_expr) => collect_sequence(&tuple_expr.elts).map(Value::Array),
300        Expr::Dict(dict_expr) => {
301            collect_dict(&dict_expr.keys, &dict_expr.values).map(Value::Object)
302        }
303        Expr::UnaryOp(unary_expr) => match unary_expr.op {
304            UnaryOp::USub => match unary_expr.operand.as_ref() {
305                Expr::Constant(const_expr) => negate_constant(&const_expr.value),
306                _ => Err(ParserError::ParsingFailed(
307                    "Unsupported unary operand in pythonic tool call".to_string(),
308                )),
309            },
310            UnaryOp::UAdd => expression_to_json(unary_expr.operand.as_ref()),
311            _ => Err(ParserError::ParsingFailed(format!(
312                "Unsupported unary operator in pythonic tool call: {:?}",
313                unary_expr.op
314            ))),
315        },
316        Expr::Name(name_expr) => Ok(Value::String(name_expr.id.to_string())),
317        _ => Err(ParserError::ParsingFailed(format!(
318            "Unsupported expression in pythonic tool call: {:?}",
319            expr
320        ))),
321    }
322}
323
324fn constant_to_json(constant: &Constant) -> ParserResult<Value> {
325    match constant {
326        Constant::None => Ok(Value::Null),
327        Constant::Bool(b) => Ok(Value::Bool(*b)),
328        Constant::Int(value) => Ok(integer_constant_to_value(value, false)),
329        Constant::Float(f) => Number::from_f64(*f).map(Value::Number).ok_or_else(|| {
330            ParserError::ParsingFailed("Invalid float literal in pythonic tool call".to_string())
331        }),
332        Constant::Str(s) => Ok(Value::String(s.clone())),
333        Constant::Bytes(bytes) => Ok(Value::String(String::from_utf8_lossy(bytes).into_owned())),
334        Constant::Tuple(values) => constant_tuple_to_array(values).map(Value::Array),
335        Constant::Ellipsis | Constant::Complex { .. } => Err(ParserError::ParsingFailed(
336            "Unsupported literal in pythonic tool call".to_string(),
337        )),
338    }
339}
340
341fn negate_constant(constant: &Constant) -> ParserResult<Value> {
342    match constant {
343        Constant::Int(value) => Ok(integer_constant_to_value(value, true)),
344        Constant::Float(f) => Number::from_f64(-f).map(Value::Number).ok_or_else(|| {
345            ParserError::ParsingFailed("Invalid float literal in pythonic tool call".to_string())
346        }),
347        _ => Err(ParserError::ParsingFailed(
348            "Unsupported unary operand in pythonic tool call".to_string(),
349        )),
350    }
351}
352
353fn value_to_key_string(value: Value) -> ParserResult<String> {
354    match value {
355        Value::String(s) => Ok(s),
356        Value::Number(num) => Ok(num.to_string()),
357        Value::Bool(b) => Ok(b.to_string()),
358        Value::Null => Ok("null".to_string()),
359        other => Err(ParserError::ParsingFailed(format!(
360            "Unsupported key type in pythonic tool call: {:?}",
361            other
362        ))),
363    }
364}
365
366fn collect_sequence(elements: &[Expr]) -> ParserResult<Vec<Value>> {
367    elements.iter().map(expression_to_json).collect()
368}
369
370fn collect_dict(keys: &[Option<Expr>], values: &[Expr]) -> ParserResult<Map<String, Value>> {
371    let mut map = Map::with_capacity(keys.len());
372    for (key_expr, value_expr) in keys.iter().zip(values.iter()) {
373        let key_expr = key_expr.as_ref().ok_or_else(|| {
374            ParserError::ParsingFailed("pythonic tool calls do not support **kwargs".to_string())
375        })?;
376        let key_value = expression_to_json(key_expr)?;
377        let key = value_to_key_string(key_value)?;
378        let value_json = expression_to_json(value_expr)?;
379        map.insert(key, value_json);
380    }
381    Ok(map)
382}
383
384fn constant_tuple_to_array(values: &[Constant]) -> ParserResult<Vec<Value>> {
385    values.iter().map(constant_to_json).collect()
386}
387
388fn integer_constant_to_value<T>(value: &T, negate: bool) -> Value
389where
390    T: ToPrimitive + std::fmt::Display,
391{
392    if let Some(mut i) = value.to_i64() {
393        if negate {
394            i = -i;
395        }
396        return Value::Number(Number::from(i));
397    }
398
399    if negate {
400        if let Some(u) = value.to_u64() {
401            if u <= i64::MAX as u64 {
402                return Value::Number(Number::from(-(u as i64)));
403            }
404            return Value::String(format!("-{}", value));
405        }
406        Value::String(format!("-{}", value))
407    } else if let Some(u) = value.to_u64() {
408        Value::Number(Number::from(u))
409    } else {
410        Value::String(value.to_string())
411    }
412}