collet 0.1.0 - Docs.rs

use std::sync::Arc;

use serde_json::Value;

use crate::common::Result;
use crate::tools::{
    bash, file_read, file_write, git_patch, rag_search, search, skill, subagent, tool_search,
};

/// Parse tool call arguments JSON into the expected input type.
/// Centralises the `serde_json::from_str` pattern used by every dispatch branch.
fn parse_args<T: serde::de::DeserializeOwned>(args: &str) -> Result<T> {
    serde_json::from_str(args).map_err(Into::into)
}

/// Read-only tool definitions for the architect planning phase.
///
/// Excludes all write tools (file_write, file_edit, git_patch).
pub fn read_only_tool_definitions() -> Vec<Value> {
    vec![
        bash::definition(),
        file_read::definition(),
        search::definition(),
    ]
}

/// Tool definitions respecting trust level.
pub fn trusted_tool_definitions(
    trust: crate::trust::TrustLevel,
    has_skills: bool,
    has_rag: bool,
) -> Vec<Value> {
    use crate::trust::TrustLevel;
    match trust {
        TrustLevel::Full => all_tool_definitions(has_skills, has_rag),
        TrustLevel::ReadOnly => read_only_tool_definitions(),
        TrustLevel::Untrusted => vec![], // no tools
    }
}

/// All available tool definitions for the LLM.
///
/// Includes skill and subagent tools if skills are available.
pub fn all_tool_definitions(has_skills: bool, has_rag: bool) -> Vec<Value> {
    let mut tools = vec![
        bash::definition(),
        file_read::definition(),
        file_write::write_definition(),
        file_write::edit_definition(),
        git_patch::definition(),
        search::definition(),
        subagent::definition(),
    ];

    if has_skills {
        tools.push(skill::definition());
    }

    if has_rag {
        tools.push(rag_search::definition());
    }

    // Validate that every tool definition parses as a ToolDefinition (schema check).
    #[cfg(debug_assertions)]
    for tool in &tools {
        let _: crate::api::models::ToolDefinition = serde_json::from_value(tool.clone())
            .expect("Tool definition must conform to ToolDefinition schema");
    }

    tools
}

/// Context needed for dispatching tools that require external state.
///
/// `config` and `skill_registry` are wrapped in `Arc` so that parallel tool
/// futures within a single iteration share the same allocation instead of
/// each receiving a deep clone.
#[derive(Clone)]
pub struct DispatchContext {
    pub skill_registry: Arc<crate::skills::SkillRegistry>,
    pub client: crate::api::provider::OpenAiCompatibleProvider,
    pub config: Arc<crate::config::Config>,
    pub system_prompt: String,
    pub lsp_manager: crate::lsp::manager::LspManager,
    pub mcp_manager: Arc<crate::mcp::manager::McpManager>,
    pub tool_index: Arc<crate::tools::tool_index::ToolIndex>,
    /// RAG manager (present only when RAG is configured).
    pub rag_manager: Option<Arc<crate::rag::RagManager>>,
    /// Shared knowledge base (present only in swarm mode: Hive/Flock).
    pub shared_knowledge: Option<crate::agent::swarm::knowledge::SharedKnowledge>,
}

/// Default character limit for tool result truncation (~3000 tokens).
const DEFAULT_TOOL_RESULT_LIMIT: usize = 12_000;

/// Truncate a tool result string if it exceeds `max_chars`.
///
/// When truncated, a summary note is appended indicating the original and
/// truncated lengths so the LLM knows content was omitted.
pub fn truncate_tool_result(result: &str, max_chars: Option<usize>) -> String {
    let limit = max_chars.unwrap_or(DEFAULT_TOOL_RESULT_LIMIT);
    if result.len() <= limit {
        return result.to_string();
    }

    // Find a char boundary at or before the limit to avoid splitting a
    // multi-byte character.
    let truncated_end = result.floor_char_boundary(limit);
    let mut truncated = result[..truncated_end].to_string();
    truncated.push_str(&format!(
        "\n\n[Output truncated: showing first {} of {} total chars.]",
        truncated_end,
        result.len(),
    ));
    truncated
}

/// Dispatch a tool call by name and return the result.
pub async fn dispatch(name: &str, arguments: &str, working_dir: &str) -> Result<String> {
    match name {
        "bash" => {
            let input = parse_args::<bash::BashInput>(arguments)?;
            bash::execute(input, working_dir).await
        }
        "file_read" => {
            let input = parse_args::<file_read::FileReadInput>(arguments)?;
            file_read::execute(input, working_dir).await
        }
        "file_write" => {
            let input = parse_args::<file_write::FileWriteInput>(arguments)?;
            file_write::execute_write(input, working_dir, None).await
        }
        "file_edit" => {
            let input = parse_args::<file_write::FileEditInput>(arguments)?;
            file_write::execute_edit(input, working_dir, None).await
        }
        "search" => {
            let input = parse_args::<search::SearchInput>(arguments)?;
            search::execute(input, working_dir).await
        }
        "git_patch" => {
            let input = parse_args::<git_patch::GitPatchInput>(arguments)?;
            git_patch::execute(input, working_dir).await
        }
        // tool_search without context falls back to a plain BM25 search over an
        // empty index — returns "no results" rather than a hard error.
        "tool_search" => {
            let input = parse_args::<tool_search::ToolSearchInput>(arguments)?;
            let empty_index = crate::tools::tool_index::ToolIndex::new();
            tool_search::execute(input, &empty_index)
        }
        // skill and subagent require extra context — handled by dispatch_with_context
        "skill" | "subagent" => Err(crate::common::AgentError::InvalidArgument(format!(
            "Tool '{}' requires dispatch_with_context()",
            name
        ))),
        _ => Err(crate::common::AgentError::InvalidArgument(format!(
            "Unknown tool: {}",
            name
        ))),
    }
}

/// Dispatch tools that need extra context (skill registry, client, etc.).
pub async fn dispatch_with_context(
    name: &str,
    arguments: &str,
    working_dir: &str,
    ctx: &DispatchContext,
) -> Result<String> {
    // ── deny_paths guard ────────────────────────────────────────────────────
    // File-access tools: extract the `path` argument, resolve it lexically,
    // and reject it if it matches any entry in `security.deny_paths`.
    const FILE_ACCESS_TOOLS: &[&str] = &[
        "file_read",
        "file_write",
        "file_edit",
        "git_patch",
        "search",
    ];
    if FILE_ACCESS_TOOLS.contains(&name)
        && let Ok(val) = serde_json::from_str::<serde_json::Value>(arguments)
        && let Some(path) = val.get("path").and_then(|p| p.as_str())
    {
        let candidate = if std::path::Path::new(path).is_absolute() {
            std::path::PathBuf::from(path)
        } else {
            std::path::Path::new(working_dir).join(path)
        };
        let resolved = crate::agent::approval::normalize_path_lexical(&candidate);
        let resolved_str = resolved.to_string_lossy();

        // Check lexically-normalized path against deny_paths.
        if !ctx.config.deny_paths.is_empty()
            && crate::agent::approval::is_path_denied(&resolved_str, &ctx.config.deny_paths)
        {
            return Err(crate::common::AgentError::InvalidArgument(format!(
                "Access denied: '{}' is blocked by security.deny_paths",
                path
            )));
        }

        // Symlink policy: when follow_symlinks is false, resolve the real path
        // and re-check against deny_paths to prevent symlink-based escapes.
        if !ctx.config.follow_symlinks
            && resolved.exists()
            && let Ok(canonical) = std::fs::canonicalize(&resolved)
        {
            let canonical_str = canonical.to_string_lossy();
            if canonical_str != resolved_str
                && !ctx.config.deny_paths.is_empty()
                && crate::agent::approval::is_path_denied(&canonical_str, &ctx.config.deny_paths)
            {
                return Err(crate::common::AgentError::InvalidArgument(format!(
                    "Access denied: '{}' resolves to a denied path via symlink",
                    path
                )));
            }
        }
    }
    // ────────────────────────────────────────────────────────────────────────
    match name {
        "skill" => {
            crate::telemetry::track("feature_used", serde_json::json!({"feature": "skill"}));
            let input = parse_args::<skill::SkillInput>(arguments)?;
            skill::execute(input, &ctx.skill_registry, &ctx.tool_index)
        }
        "subagent" => {
            crate::telemetry::track("feature_used", serde_json::json!({"feature": "subagent"}));
            let input = parse_args::<subagent::SubagentInput>(arguments)?;
            subagent::execute(
                input,
                ctx.client.clone(),
                Arc::clone(&ctx.config),
                ctx.system_prompt.clone(),
                working_dir.to_string(),
                ctx.lsp_manager.clone(),
                Some(Arc::clone(&ctx.mcp_manager)),
            )
            .await
        }
        "tool_search" => {
            let input = parse_args::<tool_search::ToolSearchInput>(arguments)?;
            tool_search::execute_with_mcp(input, &ctx.tool_index, Some(&ctx.mcp_manager))
        }
        "rag_search" => {
            crate::telemetry::track("feature_used", serde_json::json!({"feature": "rag_search"}));
            let input = parse_args::<rag_search::RagSearchInput>(arguments)?;
            match &ctx.rag_manager {
                Some(mgr) => {
                    rag_search::execute(input, mgr, working_dir, ctx.shared_knowledge.as_ref())
                        .await
                }
                None => Ok(
                    "RAG is not configured. Add [rag] section to collet.toml to enable."
                        .to_string(),
                ),
            }
        }
        "file_write" => {
            let input = parse_args::<file_write::FileWriteInput>(arguments)?;
            file_write::execute_write(input, working_dir, Some(&ctx.lsp_manager)).await
        }
        "file_edit" => {
            let input = parse_args::<file_write::FileEditInput>(arguments)?;
            file_write::execute_edit(input, working_dir, Some(&ctx.lsp_manager)).await
        }
        _ if ctx.mcp_manager.is_mcp_tool(name) => {
            // Track MCP usage by server name (extract from mcp__{server}__{tool}).
            // Use a thread-local set to avoid flooding telemetry with per-call events.
            use std::cell::RefCell;
            thread_local! {
                static SEEN_MCP: RefCell<std::collections::HashSet<String>> = RefCell::new(std::collections::HashSet::new());
            }
            let server = name
                .strip_prefix("mcp__")
                .and_then(|rest| rest.split("__").next())
                .unwrap_or(name);
            SEEN_MCP.with(|seen| {
                if seen.borrow_mut().insert(server.to_string()) {
                    crate::telemetry::track(
                        "feature_used",
                        serde_json::json!({
                            "feature": "mcp",
                            "server": server,
                        }),
                    );
                }
            });
            ctx.mcp_manager.call_tool(name, arguments).await
        }
        _ => dispatch(name, arguments, working_dir).await,
    }
}