openfunctions_rs/parser/
python.rs

1//! Python parser for tool definitions from function docstrings.
2//!
3//! This parser extracts a `ToolDefinition` from a Python function named `run`.
4//! It expects a specific format for the docstring, including sections for
5//! arguments, environment variables, and metadata.
6//!
7//! The docstring should be formatted as follows:
8//!
9//! ```python
10//! def run(param1: str, param2: bool = False):
11//!     """
12//!     A multiline description of what the tool does.
13//!
14//!     Args:
15//!         param1: The description for the first parameter.
16//!         param2: The description for the second parameter.
17//!
18//!     Envs:
19//!         API_KEY: A required API key.
20//!         ENDPOINT: The API endpoint to use. (optional)
21//!
22//!     Meta:
23//!         require-tools: tool1 tool2
24//!     """
25//!     pass
26//! ```
27
28use crate::models::{EnvVarDefinition, ParameterDefinition, ParameterType, ToolDefinition};
29use anyhow::Result;
30use regex::Regex;
31use std::collections::HashMap;
32
33/// Parses a Python source file and extracts a `ToolDefinition` from the `run`
34/// function's docstring.
35pub fn parse(source: &str) -> Result<ToolDefinition> {
36    let func_re = Regex::new(r#"def\s+run\s*\([^)]*\)\s*:\s*\n\s*"""([\s\S]*?)""""#)?;
37    let docstring = func_re
38        .captures(source)
39        .ok_or_else(|| anyhow::anyhow!("No run function with docstring found"))?
40        .get(1)
41        .unwrap()
42        .as_str();
43
44    let mut description = String::new();
45    let mut parameters = Vec::new();
46    let mut env_vars = Vec::new();
47    let mut required_tools = Vec::new();
48    let metadata = HashMap::new();
49
50    let sig_re = Regex::new(r"def\s+run\s*\(([^)]*)\)")?;
51    let params_map = if let Some(caps) = sig_re.captures(source) {
52        parse_python_signature(caps.get(1).unwrap().as_str())?
53    } else {
54        HashMap::new()
55    };
56
57    let mut in_args_section = false;
58    let mut in_envs_section = false;
59    let mut in_meta_section = false;
60    let arg_re = Regex::new(r"^\s*(\w+):\s*(.*)$")?;
61    let meta_re = Regex::new(r"^\s*([\w-]+):\s*(.*)$")?;
62
63    for line in docstring.lines() {
64        let line = line.trim();
65
66        if line.starts_with("Args:") {
67            in_args_section = true;
68            in_envs_section = false;
69            in_meta_section = false;
70            continue;
71        } else if line.starts_with("Envs:") {
72            in_args_section = false;
73            in_envs_section = true;
74            in_meta_section = false;
75            continue;
76        } else if line.starts_with("Meta:") {
77            in_args_section = false;
78            in_envs_section = false;
79            in_meta_section = true;
80            continue;
81        }
82
83        if in_args_section {
84            if let Some(caps) = arg_re.captures(line) {
85                let name = caps.get(1).unwrap().as_str().to_string();
86                let desc = caps.get(2).unwrap().as_str().to_string();
87
88                let (param_type, required) = params_map
89                    .get(&name)
90                    .cloned()
91                    .unwrap_or((ParameterType::String, true));
92
93                parameters.push(ParameterDefinition {
94                    name,
95                    param_type,
96                    description: desc,
97                    required,
98                    default: None,
99                    enum_values: None,
100                });
101            }
102        } else if in_envs_section {
103            if let Some(caps) = arg_re.captures(line) {
104                let name = caps.get(1).unwrap().as_str().to_string();
105                let desc = caps.get(2).unwrap().as_str().to_string();
106                let required = !desc.contains("(optional)");
107
108                env_vars.push(EnvVarDefinition {
109                    name,
110                    description: desc.replace("(optional)", "").trim().to_string(),
111                    required,
112                    default: None,
113                });
114            }
115        } else if in_meta_section {
116            if let Some(caps) = meta_re.captures(line) {
117                let key = caps.get(1).unwrap().as_str();
118                let value = caps.get(2).unwrap().as_str();
119                if key == "require-tools" {
120                    required_tools = value.split_whitespace().map(|s| s.to_string()).collect();
121                }
122            }
123        } else if !line.is_empty() && description.is_empty() {
124            description = line.to_string();
125        }
126    }
127
128    if description.is_empty() {
129        anyhow::bail!("No description found in docstring");
130    }
131
132    Ok(ToolDefinition {
133        description,
134        parameters,
135        env_vars,
136        required_tools,
137        metadata,
138    })
139}
140
141fn parse_python_signature(sig: &str) -> Result<HashMap<String, (ParameterType, bool)>> {
142    let mut params = HashMap::new();
143    let param_re = Regex::new(r"(\w+)\s*:\s*([^,=]+)(?:\s*=\s*[^,]+)?")?;
144
145    for caps in param_re.captures_iter(sig) {
146        let name = caps.get(1).unwrap().as_str().to_string();
147        let type_str = caps.get(2).unwrap().as_str().trim();
148        let has_default =
149            sig.contains(&format!("{} =", name)) || sig.contains(&format!("{}=", name));
150
151        let (param_type, required) = parse_python_type(type_str, !has_default)?;
152        params.insert(name, (param_type, required));
153    }
154
155    Ok(params)
156}
157
158fn parse_python_type(type_str: &str, is_required: bool) -> Result<(ParameterType, bool)> {
159    let type_str = type_str.trim();
160
161    if type_str.starts_with("Optional[") {
162        let inner = type_str
163            .trim_start_matches("Optional[")
164            .trim_end_matches(']');
165        let (param_type, _) = parse_python_type(inner, false)?;
166        return Ok((param_type, false));
167    }
168
169    if type_str.starts_with("List[") {
170        return Ok((ParameterType::Array, is_required));
171    }
172
173    if type_str.starts_with("Literal[") {
174        let inner = type_str
175            .trim_start_matches("Literal[")
176            .trim_end_matches(']');
177        let values: Vec<String> = inner
178            .split(',')
179            .map(|s| s.trim().trim_matches('"').trim_matches('\'').to_string())
180            .collect();
181        return Ok((ParameterType::Enum(values), is_required));
182    }
183
184    let param_type = match type_str {
185        "str" => ParameterType::String,
186        "int" => ParameterType::Integer,
187        "float" => ParameterType::Number,
188        "bool" => ParameterType::Boolean,
189        "list" => ParameterType::Array,
190        "dict" => ParameterType::Object,
191        _ => ParameterType::String,
192    };
193
194    Ok((param_type, is_required))
195}