openfunctions-rs 0.1.0

A universal framework for creating and managing LLM tools and agents
Documentation
//! Python parser for tool definitions from function docstrings.
//!
//! This parser extracts a `ToolDefinition` from a Python function named `run`.
//! It expects a specific format for the docstring, including sections for
//! arguments, environment variables, and metadata.
//!
//! The docstring should be formatted as follows:
//!
//! ```python
//! def run(param1: str, param2: bool = False):
//!     """
//!     A multiline description of what the tool does.
//!
//!     Args:
//!         param1: The description for the first parameter.
//!         param2: The description for the second parameter.
//!
//!     Envs:
//!         API_KEY: A required API key.
//!         ENDPOINT: The API endpoint to use. (optional)
//!
//!     Meta:
//!         require-tools: tool1 tool2
//!     """
//!     pass
//! ```

use crate::models::{EnvVarDefinition, ParameterDefinition, ParameterType, ToolDefinition};
use anyhow::Result;
use regex::Regex;
use std::collections::HashMap;

/// Parses a Python source file and extracts a `ToolDefinition` from the `run`
/// function's docstring.
pub fn parse(source: &str) -> Result<ToolDefinition> {
    let func_re = Regex::new(r#"def\s+run\s*\([^)]*\)\s*:\s*\n\s*"""([\s\S]*?)""""#)?;
    let docstring = func_re
        .captures(source)
        .ok_or_else(|| anyhow::anyhow!("No run function with docstring found"))?
        .get(1)
        .unwrap()
        .as_str();

    let mut description = String::new();
    let mut parameters = Vec::new();
    let mut env_vars = Vec::new();
    let mut required_tools = Vec::new();
    let metadata = HashMap::new();

    let sig_re = Regex::new(r"def\s+run\s*\(([^)]*)\)")?;
    let params_map = if let Some(caps) = sig_re.captures(source) {
        parse_python_signature(caps.get(1).unwrap().as_str())?
    } else {
        HashMap::new()
    };

    let mut in_args_section = false;
    let mut in_envs_section = false;
    let mut in_meta_section = false;
    let arg_re = Regex::new(r"^\s*(\w+):\s*(.*)$")?;
    let meta_re = Regex::new(r"^\s*([\w-]+):\s*(.*)$")?;

    for line in docstring.lines() {
        let line = line.trim();

        if line.starts_with("Args:") {
            in_args_section = true;
            in_envs_section = false;
            in_meta_section = false;
            continue;
        } else if line.starts_with("Envs:") {
            in_args_section = false;
            in_envs_section = true;
            in_meta_section = false;
            continue;
        } else if line.starts_with("Meta:") {
            in_args_section = false;
            in_envs_section = false;
            in_meta_section = true;
            continue;
        }

        if in_args_section {
            if let Some(caps) = arg_re.captures(line) {
                let name = caps.get(1).unwrap().as_str().to_string();
                let desc = caps.get(2).unwrap().as_str().to_string();

                let (param_type, required) = params_map
                    .get(&name)
                    .cloned()
                    .unwrap_or((ParameterType::String, true));

                parameters.push(ParameterDefinition {
                    name,
                    param_type,
                    description: desc,
                    required,
                    default: None,
                    enum_values: None,
                });
            }
        } else if in_envs_section {
            if let Some(caps) = arg_re.captures(line) {
                let name = caps.get(1).unwrap().as_str().to_string();
                let desc = caps.get(2).unwrap().as_str().to_string();
                let required = !desc.contains("(optional)");

                env_vars.push(EnvVarDefinition {
                    name,
                    description: desc.replace("(optional)", "").trim().to_string(),
                    required,
                    default: None,
                });
            }
        } else if in_meta_section {
            if let Some(caps) = meta_re.captures(line) {
                let key = caps.get(1).unwrap().as_str();
                let value = caps.get(2).unwrap().as_str();
                if key == "require-tools" {
                    required_tools = value.split_whitespace().map(|s| s.to_string()).collect();
                }
            }
        } else if !line.is_empty() && description.is_empty() {
            description = line.to_string();
        }
    }

    if description.is_empty() {
        anyhow::bail!("No description found in docstring");
    }

    Ok(ToolDefinition {
        description,
        parameters,
        env_vars,
        required_tools,
        metadata,
    })
}

fn parse_python_signature(sig: &str) -> Result<HashMap<String, (ParameterType, bool)>> {
    let mut params = HashMap::new();
    let param_re = Regex::new(r"(\w+)\s*:\s*([^,=]+)(?:\s*=\s*[^,]+)?")?;

    for caps in param_re.captures_iter(sig) {
        let name = caps.get(1).unwrap().as_str().to_string();
        let type_str = caps.get(2).unwrap().as_str().trim();
        let has_default =
            sig.contains(&format!("{} =", name)) || sig.contains(&format!("{}=", name));

        let (param_type, required) = parse_python_type(type_str, !has_default)?;
        params.insert(name, (param_type, required));
    }

    Ok(params)
}

fn parse_python_type(type_str: &str, is_required: bool) -> Result<(ParameterType, bool)> {
    let type_str = type_str.trim();

    if type_str.starts_with("Optional[") {
        let inner = type_str
            .trim_start_matches("Optional[")
            .trim_end_matches(']');
        let (param_type, _) = parse_python_type(inner, false)?;
        return Ok((param_type, false));
    }

    if type_str.starts_with("List[") {
        return Ok((ParameterType::Array, is_required));
    }

    if type_str.starts_with("Literal[") {
        let inner = type_str
            .trim_start_matches("Literal[")
            .trim_end_matches(']');
        let values: Vec<String> = inner
            .split(',')
            .map(|s| s.trim().trim_matches('"').trim_matches('\'').to_string())
            .collect();
        return Ok((ParameterType::Enum(values), is_required));
    }

    let param_type = match type_str {
        "str" => ParameterType::String,
        "int" => ParameterType::Integer,
        "float" => ParameterType::Number,
        "bool" => ParameterType::Boolean,
        "list" => ParameterType::Array,
        "dict" => ParameterType::Object,
        _ => ParameterType::String,
    };

    Ok((param_type, is_required))
}