rsclaw 2026.5.1

//! Tool list builder — generates the consolidated ToolDef list for an agent.
//!
//! Extracted from `runtime.rs` to reduce file size.
//! All public items are re-exported by `runtime.rs` so callers are unaffected.

use serde_json::{Value, json};

use super::registry::AgentRegistry;
use crate::{
    config::schema::ExternalAgentConfig,
    plugin::{PluginRegistry, WasmPlugin},
    provider::ToolDef,
    skill::SkillRegistry,
};

/// Build a `Vec<ToolDef>` advertising every tool exported by every loaded
/// WASM plugin. Tool names are namespaced as `<plugin>.<tool>` so the
/// dispatcher can route them back to the right plugin instance.
pub(crate) fn build_wasm_tool_defs(plugins: &[WasmPlugin]) -> Vec<ToolDef> {
    plugins
        .iter()
        .flat_map(|p| {
            let plugin_name = p.name.clone();
            p.tools.iter().map(move |t| ToolDef {
                name: format!("{}.{}", plugin_name, t.name),
                description: t.description.clone(),
                parameters: t.parameters.clone(),
            })
        })
        .collect()
}

/// Build a `Vec<ToolDef>` for every tool exported by every loaded
/// shell-bridge plugin. Tool names are `<plugin>.<tool>`, mirroring the
/// wasm-plugin convention so the dispatcher in `runtime.rs` can route
/// either with the same `split_once('.')` pattern.
pub(crate) fn build_shell_tool_defs(plugins: &PluginRegistry) -> Vec<ToolDef> {
    plugins
        .shell_plugins_iter()
        .flat_map(|(plugin_name, plugin)| {
            let plugin_name = plugin_name.clone();
            plugin.manifest.tools.iter().map(move |t| ToolDef {
                name: format!("{plugin_name}.{}", t.name),
                description: t.description.clone(),
                parameters: t.input_schema.clone().unwrap_or_else(|| {
                    serde_json::json!({
                        "type": "object",
                        "properties": {}
                    })
                }),
            })
        })
        .collect()
}

/// Build a system-prompt section that lists installed plugins (wasm + shell).
/// Helps the model decide *to use* the plugin instead of falling back to a
/// generic browser-automation flow. Sorted by name for byte-stable output.
pub(crate) fn build_plugins_system(
    wasm_plugins: &[WasmPlugin],
    shell_plugins: Option<&PluginRegistry>,
) -> Option<String> {
    let no_shell = shell_plugins
        .map(|r| r.shell_plugins_iter().next().is_none())
        .unwrap_or(true);
    if wasm_plugins.is_empty() && no_shell {
        return None;
    }

    let mut blocks: Vec<(String, String)> = wasm_plugins
        .iter()
        .map(|p| {
            let tools_lines: Vec<String> = p
                .tools
                .iter()
                .map(|t| format!("  - {}.{}: {}", p.name, t.name, t.description))
                .collect();
            (
                p.name.clone(),
                format!(
                    "<plugin name=\"{}\" version=\"{}\">\n{}\n\nTools:\n{}\n</plugin>",
                    p.name,
                    p.version.as_deref().unwrap_or(""),
                    p.description.as_deref().unwrap_or(""),
                    tools_lines.join("\n"),
                ),
            )
        })
        .collect();

    if let Some(reg) = shell_plugins {
        for (plugin_name, plugin) in reg.shell_plugins_iter() {
            let tools_lines: Vec<String> = plugin
                .manifest
                .tools
                .iter()
                .map(|t| format!("  - {}.{}: {}", plugin_name, t.name, t.description))
                .collect();
            blocks.push((
                plugin_name.clone(),
                format!(
                    "<plugin name=\"{}\" version=\"{}\">\n{}\n\nTools:\n{}\n</plugin>",
                    plugin_name,
                    plugin.manifest.version.as_deref().unwrap_or(""),
                    plugin.manifest.description.as_deref().unwrap_or(""),
                    tools_lines.join("\n"),
                ),
            ));
        }
    }

    // Sort by name for byte-stable output (HashMap iteration order is
    // nondeterministic; this matters because the system prompt feeds the
    // LLM's KV cache, and unstable ordering invalidates the cache).
    blocks.sort_by(|a, b| a.0.cmp(&b.0));
    let blocks_text: Vec<String> = blocks.into_iter().map(|(_, b)| b).collect();

    Some(format!(
        "## Installed Plugins\n\
         Plugins automate external services (e.g. image/video generation, \
         marketplace ops). When the user's task matches a plugin tool, prefer \
         it over a generic browser-automation flow.\n\
         Priority: plugins > skills > built-in tools.\n\n\
         {}",
        blocks_text.join("\n\n"),
    ))
}

/// Compute the set of allowed tool names based on toolset level + custom tools.
/// Returns None for "full" (no filtering), Some(set) for others.
pub(crate) fn toolset_allowed_names(
    toolset: &str,
    custom_tools: Option<&Vec<String>>,
) -> Option<std::collections::HashSet<String>> {
    const MINIMAL: &[&str] = &[
        "execute_command",
        "read_file",
        "write_file",
        "send_file",
        "list_dir",
        "search_file",
        "search_content",
        "web_search",
        "web_fetch",
        "memory",
        "clarify",
        "anycli",
        "use_skill",
    ];
    const WEB: &[&str] = &[
        "web_search",
        "web_fetch",
        "web_download",
        "read_file",
        "write_file",
        "list_dir",
        "search_file",
        "memory",
        "use_skill",
    ];
    const CODE: &[&str] = &[
        "execute_command",
        "read_file",
        "write_file",
        "list_dir",
        "search_file",
        "search_content",
        "memory",
        "use_skill",
    ];
    const STANDARD: &[&str] = &[
        "execute_command",
        "read_file",
        "write_file",
        "list_dir",
        "search_file",
        "search_content",
        "web_search",
        "web_fetch",
        "memory",
        "web_browser",
        "image_gen",
        "video_gen",
        "channel",
        "cron",
        "computer_use",
        "clarify",
        "anycli",
        "use_skill",
        "task",
    ];

    let base: Option<&[&str]> = match toolset {
        "minimal" => Some(MINIMAL),
        "web" => Some(WEB),
        "code" => Some(CODE),
        "standard" => Some(STANDARD),
        "full" => None,
        _ => Some(STANDARD),
    };

    match (base, custom_tools) {
        (None, None) => None, // full, no custom -> no filtering
        (None, Some(extra)) => {
            // full + custom whitelist -> use custom as whitelist
            Some(extra.iter().cloned().collect())
        }
        (Some(base_list), None) => Some(base_list.iter().map(|s| s.to_string()).collect()),
        (Some(base_list), Some(extra)) => {
            // Merge: toolset base + custom extras, deduplicated
            let mut set: std::collections::HashSet<String> =
                base_list.iter().map(|s| s.to_string()).collect();
            set.extend(extra.iter().cloned());
            Some(set)
        }
    }
}

/// Build the complete tool list for an agent runtime.
///
/// Includes built-in tools, per-agent A2A tools, external agent tools,
/// and skill-derived tools.
pub(crate) fn build_tool_list(
    skills: &SkillRegistry,
    agents: Option<&AgentRegistry>,
    caller_id: &str,
    external_agents: &[ExternalAgentConfig],
) -> Vec<ToolDef> {
    let mut tools = Vec::new();

    // Built-in tools — consolidated (32+ tools -> ~13 unified tools).
    tools.push(ToolDef {
        name: "memory".to_owned(),
        description: "Manage long-term memory across sessions.\n\
            Actions:\n\
            - search: Semantic search over stored memories. Example: {\"action\":\"search\",\"query\":\"user preferences\"}\n\
            - get: Retrieve a specific memory by ID. Example: {\"action\":\"get\",\"id\":\"abc-123\"}\n\
            - put: Store a new memory. Example: {\"action\":\"put\",\"text\":\"User prefers dark mode\",\"kind\":\"fact\"}\n\
            Use this tool to recall prior context, user preferences, or previously learned information.\n\
            Search BEFORE answering questions about past conversations or user details.".to_owned(),
        parameters: json!({
            "type": "object",
            "properties": {
                "action": {"type": "string", "enum": ["search", "get", "put"], "description": "Action to perform: search, get, or put"},
                "query":  {"type": "string", "description": "Search query (for search). Examples: 'user name', 'project deadlines', 'API keys'"},
                "id":     {"type": "string", "description": "Memory document ID (for get)"},
                "text":   {"type": "string", "description": "Content to store (for put). Be specific and include context."},
                "scope":  {"type": "string", "description": "Scope filter (optional)"},
                "kind":   {"type": "string", "description": "Document kind: note (general), fact (verified info), remember (user explicitly asked to remember). Do NOT use kind=summary; session summaries are written automatically by /compact, /new, /reset."},
                "top_k":  {"type": "integer", "description": "Max results (for search, default 5)"}
            },
            "required": ["action"]
        }),
    });
    // `use_skill` — first-class entry point for installed skills. Listed
    // EARLY in the tool list so the LLM notices it before web_fetch /
    // web_browser / execute_command. Only registered when at least one
    // skill is installed; otherwise it'd be dead surface area.
    if skills.all().next().is_some() {
        let skill_names: Vec<String> = skills.all().map(|s| s.name.clone()).collect();
        let names_hint = if skill_names.is_empty() {
            String::new()
        } else {
            format!(" Installed skill names: {}.", skill_names.join(", "))
        };
        tools.push(ToolDef {
            name: "use_skill".to_owned(),
            description: format!(
                "ACTIVATE an installed skill. Use this BEFORE web_fetch / web_browser / \
                execute_command whenever the user's task matches any skill description \
                shown in the system prompt under '## Installed Skills' (flights, hotels, \
                stocks, weather, finance data, etc.).\n\n\
                Returns the full SKILL.md so you know the exact CLI command and flags. \
                After calling use_skill you typically call execute_command with the CLI \
                from skill_md.\n\n\
                Common failure to avoid: defaulting to web_fetch on a domain a skill \
                already covers. If a skill description matches, you MUST use_skill \
                first.{names_hint}"
            ),
            parameters: json!({
                "type": "object",
                "properties": {
                    "name": {
                        "type": "string",
                        "description": "Exact skill name from the Installed Skills list (e.g. 'flyai', 'hithink-market-query'). Case-sensitive."
                    }
                },
                "required": ["name"]
            }),
        });
    }
    // `task` — escalate the current chat into a multi-turn background task.
    // The LLM decides when sustained work is warranted (implementation
    // spanning many tool calls, multi-file refactor, deep research). For
    // short Q&A, jokes, greetings, and one-shot tool calls the LLM should
    // just answer directly without calling this tool.
    tools.push(ToolDef {
        name: "task".to_owned(),
        description: "Escalate the user's current request into a multi-turn background task. \
            Call this ONLY when the work clearly needs sustained execution: implementation \
            across multiple files, multi-step debugging, deep research with many web fetches, \
            data pipelines, end-to-end deployments. \
            Do NOT call for: greetings, casual questions, single tool calls (one web_search, \
            one read_file, one calculation), explanations, or anything you can answer in this \
            same turn. When in doubt, just answer directly — the user can always send \
            `/task <request>` to escalate manually.\n\n\
            Returns a task ID; the gateway then runs the work in the background and posts \
            replies as turns complete. After calling task, your reply to the user should be a \
            short acknowledgement only — the actual work happens in the background turns.".to_owned(),
        parameters: json!({
            "type": "object",
            "properties": {
                "task_text": {
                    "type": "string",
                    "description": "The task instruction for the background runner. Usually the user's original request, optionally clarified."
                },
                "max_turns": {
                    "type": "integer",
                    "description": "Optional cap on agent turns. Default 10. Raise for big jobs (e.g. 30 for full feature implementation)."
                },
                "ttl_secs": {
                    "type": "integer",
                    "description": "Optional wall-clock deadline in seconds. Default 3600 (1h)."
                }
            },
            "required": ["task_text"]
        }),
    });
    tools.push(ToolDef {
        name: "read_file".to_owned(),
        description: "Read a file from the agent workspace.\n\
            Path is relative to workspace root.\n\
            Supports text files, code, config, markdown, etc.\n\
            Example: {\"path\":\"config.json\"} or {\"path\":\"src/main.py\"}\n\
            For binary files (images, PDFs), use the dedicated tools instead.".to_owned(),
        parameters: json!({
            "type": "object",
            "properties": {
                "path": {"type": "string", "description": "Relative file path. Examples: 'README.md', 'src/app.py', 'data/output.csv'"}
            },
            "required": ["path"]
        }),
    });
    tools.push(ToolDef {
        name: "write_file".to_owned(),
        description: "Write/create a file. Use this for ALL file creation and writing — do NOT use execute_command with notepad, echo, or any other editor/command to create files.\n\
            Creates parent directories as needed. Path is relative to workspace root.\n\
            Both 'path' and 'content' are required.\n\
            CRITICAL: When writing user-provided content, copy it EXACTLY character-by-character. \
            Never omit, rephrase, or regenerate numbers, dates, addresses, names, or any specific values. \
            If the user said '135号168栋', the content MUST contain '135号168栋' exactly.".to_owned(),
        parameters: json!({
            "type": "object",
            "properties": {
                "path":    {"type": "string", "description": "Relative file path within the workspace (REQUIRED). Example: 'output.py'"},
                "content": {"type": "string", "description": "File content to write (REQUIRED). MUST preserve all numbers, dates, and specific values from the user's message exactly as given."},
                "explanation": {"type": "string", "description": "Brief explanation of what you are creating and why, to help organize your thoughts before writing content."}
            },
            "required": ["path", "content"]
        }),
    });
    tools.push(ToolDef {
        name: "send_file".to_owned(),
        description: "Send a file from the workspace to the user as an attachment. \
            Use this when the user asks you to send, share, or download a file. \
            The file will be delivered as a chat attachment (not as text). \
            Path is relative to workspace root.".to_owned(),
        parameters: json!({
            "type": "object",
            "properties": {
                "path": {"type": "string", "description": "File path to send (relative to workspace or absolute)"}
            },
            "required": ["path"]
        }),
    });
    tools.push(ToolDef {
        name: "execute_command".to_owned(),
        description: if cfg!(target_os = "windows") {
            "Run a shell command (PowerShell) on Windows.\n\
             IMPORTANT: For file listing use `list_dir`, for file search use `search_file`, for content search use `search_content`, for tool install use `install_tool`, for HTTP/API requests use `web_fetch`. Only use exec for commands that have no dedicated tool.\n\
             Use exec for: git operations, running scripts (node/python/cargo), system info (systeminfo, ipconfig, Get-Process), package management (npm/pip), process management (Start-Process, Stop-Process, taskkill).\n\
             Do NOT use exec for HTTP requests (curl/wget/Invoke-WebRequest) or file downloads — use `web_fetch` / `web_download` instead.\n\
             \n\
             Tool selection: PowerShell for file/system ops; python for data processing (CSV/JSON/automation).\n\
             Check tool availability first (`Get-Command python`, `Get-Command node`). Use `install_tool` for system tools.\n\
             \n\
             PowerShell patterns:\n\
             - Pipes: Get-Process | Sort-Object CPU -Descending | Select-Object -First 10\n\
             - Network connectivity check (not fetch): Test-NetConnection host -Port 80\n\
             - Text: (Get-Content file) -replace 'old','new'\n\
             - Dates: Get-Date -Format 'yyyy-MM-dd'; [DateTimeOffset]::Now.ToUnixTimeSeconds()\n\
             Python patterns: `python -c \"import json; ...\"` for one-liners, write to $env:TEMP\\script.py for multi-line.\n\
             \n\
             Best practices: Do NOT wrap commands in extra cmd /c or powershell -Command layers. Use `| Select-Object -First 10` to limit output.\n\
             Do NOT use exec for destructive operations on personal directories (Desktop, Downloads, Documents).\n\
             Commands run in background by default (wait=false). Use wait=true only for short commands where you need the output immediately.\n\
             If a command fails, do NOT retry with the same arguments. Try a different approach or ask the user."
                .to_owned()
        } else if cfg!(target_os = "macos") {
            "Run a shell command (bash/zsh) on macOS.\n\
             IMPORTANT: For file listing use `list_dir`, for file search use `search_file`, for content search use `search_content`, for tool install use `install_tool`, for HTTP/API requests use `web_fetch`. Only use exec for commands that have no dedicated tool.\n\
             Use exec for: git operations, running scripts (node/python/cargo), system info (uname, df, top), package management (brew/npm/pip), process management (ps, kill).\n\
             Do NOT use exec for HTTP requests (curl/wget) or file downloads — use `web_fetch` / `web_download` instead.\n\
             \n\
             Tool selection: bash for file/text/system ops; python3 for data processing (CSV/JSON/automation).\n\
             Check tool availability first (`which python3`, `which node`). Use `install_tool` for system tools.\n\
             \n\
             Bash patterns: `| head -n 20` to limit output, `date +%s` for timestamps, `find . -name '*.py' -mtime -7`.\n\
             Python patterns: `python3 -c \"import json; ...\"` for one-liners, write to /tmp/script.py for multi-line, `pip install` for packages.\n\
             \n\
             Best practices: pipe large output through head/tail, use wait=false for long tasks, never run destructive commands on personal dirs.\n\
             If a command fails, do NOT retry with the same arguments. Try a different approach or ask the user."
                .to_owned()
        } else {
            "Run a shell command (bash/sh) on Linux.\n\
             IMPORTANT: For file listing use `list_dir`, for file search use `search_file`, for content search use `search_content`, for tool install use `install_tool`, for HTTP/API requests use `web_fetch`. Only use exec for commands that have no dedicated tool.\n\
             Use exec for: git operations, running scripts (node/python/cargo), system info (uname, df, top), package management (apt/npm/pip), process management (ps, kill).\n\
             Do NOT use exec for HTTP requests (curl/wget) or file downloads — use `web_fetch` / `web_download` instead.\n\
             \n\
             Tool selection: bash for file/text/system ops; python3 for data processing (CSV/JSON/automation).\n\
             Check tool availability first (`which python3`, `which node`). Use `install_tool` for system tools.\n\
             \n\
             Bash patterns: `| head -n 20` to limit output, `date +%s` for timestamps, `find . -name '*.py' -mtime -7`.\n\
             Python patterns: `python3 -c \"import json; ...\"` for one-liners, write to /tmp/script.py for multi-line, `pip install` for packages.\n\
             \n\
             Best practices: pipe large output through head/tail, use wait=false for long tasks, never run destructive commands on personal dirs.\n\
             If a command fails, do NOT retry with the same arguments. Try a different approach or ask the user."
                .to_owned()
        },
        parameters: json!({
            "type": "object",
            "properties": {
                "command": {"type": "string", "description": "Shell command to execute. Must be valid for the current OS."},
                "timeout": {"type": "integer", "description": "Timeout in seconds (default: 30, max: 300)"},
                "wait": {"type": "boolean", "description": "If true (default), wait for the command to finish and return stdout/stderr/exit_code. Set to false only for long-running commands (builds, servers, installs) where you want a task_id to poll later."},
                "task_id": {"type": "string", "description": "Poll a previously started background task by its task_id."}
            },
            "required": []
        }),
    });
    tools.push(ToolDef {
        name: "agent".to_owned(),
        description: "Manage agents. You are the architect — delegate work, never block.\n\
            Actions:\n\
            - task: Create a task agent for a one-shot job. Returns immediately with task_id. The task agent runs independently and delivers results when done.\n\
            - spawn: Create a persistent agent (survives across turns).\n\
            - send: Send a message to an existing agent (async, result delivered when done).\n\
            - list: List all registered agents.\n\
            - update: Edit a named agent's config (model, name). Pass model=\"\" to remove and fall back to defaults. Hot-reloads automatically.\n\
            - kill: Stop an agent.\n\
            Tips:\n\
            - Use task for independent, parallelizable work. You can dispatch multiple tasks at once.\n\
            - Always specify toolset matching the task (web for search, code for file ops).\n\
            - After dispatching, tell the user what you delegated and continue with other work.\n\
            \n\
            CRITICAL: When user EXPLICITLY asks to use a specific tool (opencode, claudecode, codex),\n\
            you MUST call that tool directly. DO NOT create a task agent instead.\n\
            - User says \"让opencode去...\" -> call opencode tool (action=call, NOT task agent)\n\
            - User says \"用claudecode...\" -> call claudecode tool\n\
            \n\
            [HARD RULE - DECEPTION]\n\
            Claiming \"已委托opencode\" or \"已用opencode检查\" WITHOUT a tool_call is LYING.\n\
            If you say these words, there MUST be an actual opencode tool call in this turn.\n\
            No tool call + claim of delegation = you are deceiving the user.\n\
            This is worse than admitting \"I didn't call it\" - trust is destroyed.\n\
            \n\
            DO NOT delegate these tasks — handle them yourself directly:\n\
            - Any GUI/desktop automation (WeChat, Finder, Safari, system apps, etc.)\n\
            - Anything using `computer_use` (screenshot, click, key, type)\n\
            - AppleScript/osascript workflows that control another application\n\
            - Visual verification (\"did the window appear?\", \"is the button there?\")\n\
            Reason: GUI tasks depend on live state (frontmost window, mouse position, display\n\
            focus) and the current session's permission grants. Sub-agents start fresh with\n\
            no visual context and frequently fail on first attempts, creating loops. The\n\
            main agent that already has the screenshot/context should complete these tasks.".to_owned(),
        parameters: json!({
            "type": "object",
            "properties": {
                "action":  {"type": "string", "enum": ["spawn", "task", "send", "list", "update", "kill"], "description": "Action to perform"},
                "id":      {"type": "string", "description": "Agent ID (for spawn/send/update/kill)"},
                "model":   {"type": "string", "description": "Model string (for spawn/task/update). Pass \"\" to remove per-agent model override."},
                "name":    {"type": "string", "description": "Display name (for update)"},
                "system":  {"type": "string", "description": "Role description (for spawn/task)"},
                "message": {"type": "string", "description": "Message to send (for task/send)"},
                "toolset": {"type": "string", "enum": ["minimal", "standard", "web", "code", "full"], "description": "Tool access level. Default: standard."}
            },
            "required": ["action"]
        }),
    });

    // Tool installer (structured alternative to exec rsclaw tools install).
    tools.push(ToolDef {
        name: "install_tool".to_owned(),
        description: "Install a tool/runtime. Available: python, node, ffmpeg, chrome, opencode, claude-code, sherpa-onnx.".to_owned(),
        parameters: json!({
            "type": "object",
            "properties": {
                "name": {"type": "string", "enum": ["python", "node", "ffmpeg", "chrome", "opencode", "claude-code", "sherpa-onnx"], "description": "Tool name to install"}
            },
            "required": ["name"]
        }),
    });

    // File operation tools (structured alternatives to exec ls/find/grep).
    // These help small models avoid digit-loss and dead-loop issues.
    tools.push(ToolDef {
        name: "list_dir".to_owned(),
        description: "List files and directories in a given path.\n\
            Use this instead of execute_command with ls/dir.\n\
            - Returns file names, sizes, and types.\n\
            - Does not display hidden/dot files by default.\n\
            - Use 'pattern' to filter by glob (e.g. '*.json').\n\
            - Use 'recursive' to list subdirectories.\n\
            CRITICAL: 'path' must be returned before other parameters.".to_owned(),
        parameters: json!({
            "type": "object",
            "properties": {
                "path":      {"type": "string", "description": "Directory path to list. Relative to workspace root or absolute. Examples: '.', 'src/', '/tmp'"},
                "recursive": {"type": "boolean", "description": "If true, list all files in subdirectories recursively. Default: false."},
                "pattern":   {"type": "string", "description": "Glob pattern filter. Examples: '*.json', '*.py', 'test_*'"}
            }
        }),
    });
    tools.push(ToolDef {
        name: "search_file".to_owned(),
        description: "Search for files by name pattern. Use this instead of execute_command with find.\n\
            - Supports wildcard patterns for flexible matching.\n\
            - Returns relative file paths.\n\
            - Prefer this over list_dir when you have a specific file pattern.\n\
            CRITICAL: 'pattern' must be returned before other parameters.".to_owned(),
        parameters: json!({
            "type": "object",
            "properties": {
                "pattern": {"type": "string", "description": "REQUIRED: File name pattern with wildcards. Examples: '*.log', 'config*', 'test_*.py', '**/*.rs'"},
                "path":    {"type": "string", "description": "Root directory to search in. Defaults to workspace root. Can be relative or absolute."},
                "max_results": {"type": "integer", "description": "Maximum results to return (default: 20)"}
            },
            "required": ["pattern"]
        }),
    });
    tools.push(ToolDef {
        name: "search_content".to_owned(),
        description: "Search file contents by regex or text pattern. Built on ripgrep.\n\
            Use this instead of execute_command with grep/rg. This tool is faster and respects .gitignore.\n\
            - Supports full regex syntax: 'log.*Error', 'function\\s+\\w+', 'TODO|FIXME'\n\
            - Escape special chars for literal matches: 'functionCall\\('\n\
            - Use 'include' to filter by file type: '*.py', '*.rs'\n\
            CRITICAL: 'pattern' must be returned before other parameters.".to_owned(),
        parameters: json!({
            "type": "object",
            "properties": {
                "pattern":  {"type": "string", "description": "REQUIRED: Regex pattern to search for. Examples: 'TODO', 'import.*from', 'class\\s+\\w+', 'def main'"},
                "path":     {"type": "string", "description": "File or directory to search in. Defaults to workspace root."},
                "include":  {"type": "string", "description": "File glob filter. Examples: '*.py', '*.{ts,tsx}', '*.rs'"},
                "ignore_case": {"type": "boolean", "description": "If true, match case-insensitively. Default: false."},
                "max_results": {"type": "integer", "description": "Maximum results (default: 20)"}
            },
            "required": ["pattern"]
        }),
    });

    // Web tools.
    tools.push(ToolDef {
        name: "web_search".to_owned(),
        description: "Search the web for real-time information.\n\
            When to use:\n\
            - Questions beyond your knowledge cutoff or training data\n\
            - Current events, recent updates, time-sensitive information\n\
            - Latest documentation, API references, version-specific features\n\
            - When unsure about facts — search BEFORE saying 'I don't know'\n\
            Tips:\n\
            - Be specific: include version numbers, dates, or exact terms\n\
            - Use the current year (not past years) for latest docs\n\
            - For Chinese content, search in Chinese for better results".to_owned(),
        parameters: json!({
            "type": "object",
            "properties": {
                "query":    {"type": "string", "description": "Search query — be specific, include keywords and dates"},
                "provider": {"type": "string", "description": "Search provider: duckduckgo, google, bing, brave. Leave empty for default."},
                "limit":    {"type": "integer", "description": "Max results (default 5)"}
            },
            "required": ["query"]
        }),
    });
    tools.push(ToolDef {
        name: "web_fetch".to_owned(),
        description: "PREFERRED tool for HTTP requests — web pages, REST APIs, documentation, articles.\n\
            Do NOT use execute_command with curl/wget/Invoke-WebRequest — use web_fetch instead.\n\
            - URL must be fully-formed (https://...)\n\
            - HTTP auto-upgraded to HTTPS\n\
            - HTML pages are dehydrated to clean text/markdown\n\
            - JSON / plain-text / non-HTML responses are returned as-is (raw body)\n\
            - Falls back to browser rendering for JS-heavy pages and CAPTCHA-blocked sites (GET only)\n\
            - GET responses without headers/body are cached 15 minutes; non-GET bypasses cache\n\
            - For large pages, pass 'prompt' to LLM-extract specific information\n\
            - DO NOT pass 'prompt' for compact structured responses (JSON APIs, RSS, \
              <2KB text). Passing 'prompt' triggers an internal LLM summarize pass \
              (extra ~30-60 s + tokens). For api.* / *.json / arxiv Atom / RSS / \
              search-result JSON, omit 'prompt' — read the raw response directly.\n\
            \n\
            METHODS, HEADERS, BODY — supports the full HTTP surface:\n\
              - method: GET (default), POST, PUT, PATCH, DELETE\n\
              - headers: object — Authorization, X-API-Key, Cookie, custom Content-Type, etc.\n\
              - body: string (raw) OR object/array (auto JSON-serialized + Content-Type set)\n\
            \n\
            FALL BACK to execute_command + curl only when you need:\n\
              - File upload via multipart/form-data\n\
              - Streaming responses (SSE, chunked transfers consumed incrementally)\n\
              - Sites behind interactive login (use web_browser when interaction is needed)".to_owned(),
        parameters: json!({
            "type": "object",
            "properties": {
                "url":     {"type": "string", "description": "Full URL to fetch (e.g. https://docs.example.com/api)"},
                "method":  {"type": "string", "enum": ["GET", "POST", "PUT", "PATCH", "DELETE"], "description": "HTTP method. Default: GET"},
                "headers": {"type": "object", "description": "Optional request headers, e.g. {\"Authorization\": \"Bearer xyz\", \"X-API-Key\": \"...\"}", "additionalProperties": {"type": "string"}},
                "body":    {"description": "Optional request body. String → sent as-is (set Content-Type via headers). Object/array → JSON-serialized; Content-Type defaulted to application/json."},
                "prompt":  {"type": "string", "description": "OPTIONAL. What to extract from a LARGE HTML page (e.g. 'list all API endpoints'). Triggers an LLM-summarize pass on the response. OMIT for compact JSON/XML/text where you can read the raw body — passing 'prompt' on small structured responses just adds ~30-60 s of LLM latency for nothing."}
            },
            "required": ["url"]
        }),
    });
    tools.push(ToolDef {
        name: "web_download".to_owned(),
        description: "Download a file (image/video/document/archive) from URL to local path.\n\
            - Supports resume for large files\n\
            - Use use_browser_cookies=true for authenticated downloads (e.g. after logging in via web_browser)\n\
            - Path is relative to workspace/downloads/ — just use filename like 'photo.jpg'\n\
            - Do NOT use execute_command with curl/wget — always use this tool\n\
            - After downloading, use send_file to deliver the file to the user".to_owned(),
        parameters: json!({
            "type": "object",
            "properties": {
                "url":  {"type": "string", "description": "Full URL to download"},
                "path": {"type": "string", "description": "Destination filename (e.g. 'video.mp4', 'report.pdf'). Relative to workspace/downloads/."},
                "cookies": {"type": "string", "description": "Cookie header string, e.g. 'session=abc; token=xyz'"},
                "use_browser_cookies": {"type": "boolean", "description": "Auto-extract cookies from active browser session for this URL's domain (use after web_browser login)"}
            },
            "required": ["url", "path"]
        }),
    });
    tools.push(ToolDef {
        name: "web_browser".to_owned(),
        description: "Control a web browser. Core workflow:\n\
            1. `open` — navigate to a URL\n\
            2. `snapshot` — get page structure with interactive element refs (@e1, @e2...). Use `interactive: true` to only get actionable elements (saves tokens).\n\
            3. `click` ref=@e1 / `fill` ref=@e2 text='...' — interact using refs\n\
            4. Re-snapshot after any page change to get updated refs\n\
            Autocomplete inputs (Ctrip/Fliggy/Qunar city pickers, Google/Baidu search, flight/hotel/movie pickers, any input that pops a dropdown of suggestions): ALWAYS use `pick` ref=@eN query='武汉' in a single call — it focuses, types, waits for the popup, and clicks the first visible candidate. DO NOT build it yourself out of click+type+wait+screenshot loops; that wastes 5-7 iterations per field and the dropdown often dismisses before you re-screenshot. `pick` handles IME/React-controlled inputs that silently drop programmatic values.\n\
            Interaction: hover (triggers menus/tooltips), dblclick, drag (from=@e1 to=@e2, for sliders), focus, scrollintoview.\n\
            Quick search: `search` — auto-find search box on ANY site, fill text, submit, return results.\n\
            `clickAt` ref=@e1 or x=100 y=200 — real mouse click via CDP (for file dialogs, anti-bot sites).\n\
            Semantic locators: `getbytext` value='Submit', `getbyrole` value='button', `getbylabel` value='Email' — find elements without @ref.\n\
            Frame: `frame` selector=@e1 (switch to iframe), `mainframe` (switch back).\n\
            Console: `console` — get browser console messages (log/warn/error).\n\
            Content: `content` — get full page HTML.\n\
            WaitForUrl: `waitforurl` url='dashboard' — wait for URL change (after login/redirect).\n\
            Other: type, select, check, scroll, screenshot, pdf, press, back, forward, reload, wait, evaluate, cookies, get_text, get_url, get_title, find, get_article, upload, new_tab, switch_tab, close_tab.\n\
            IMPORTANT: Always snapshot BEFORE clicking/filling. Element refs change after page updates.\n\n\
            Site-rules — platform-specific DOM selectors, URL routes, and gotchas live under \
            `~/.rsclaw/tools/web_browser/site-rules/`. Two layouts coexist:\n\
            - `<domain>.md` — flat, single-file (e.g. `douyin.md`, `kuaishou.md`, \
              `xiaohongshu.md`, `bilibili.md`)\n\
            - `<domain>/<task>.md` — nested per-task (e.g. `amazon/product-search.md`, \
              `tiktok/upload.md`, `linkedin/connect.md`)\n\
            When you `open` a URL whose host matches either layout, read_file the matching \
            file FIRST so you use the verified selectors instead of guessing them per-session. \
            Saves 5+ snapshot/click iterations and avoids stale-selector breakage.\n\n\
            BEFORE reading any nested `<domain>/<task>.md` (which may have been imported from \
            browser-use/browser-harness and use Python helper syntax), read \
            `~/.rsclaw/tools/web_browser/site-rules/_VOCABULARY.md` once per session — it maps \
            their `click_at_xy` / `type_text` / `js(...)` etc. to your `clickAt` / `type` / \
            `evaluate` actions so you can translate the procedural code on the fly.\n\n\
            Screenshot routing — do NOT call `action=screenshot` without a target:\n\
            - Web page screenshot (user gave a URL): pass it inline,\n\
              `action=screenshot url=https://example.com` — this navigates\n\
              first then captures, single call. This is the one-shot equivalent\n\
              of `/webshot`.\n\
            - Desktop / system screenshot (no URL, user just says \"screenshot\"\n\
              or \"截图\"): you cannot do this from web_browser. Tell the user\n\
              to type `/ss` or `/screenshot` (preparse fast path → macOS\n\
              `screencapture` / Windows / Linux equivalent).\n\
            - Plain `action=screenshot` (no url) only captures what's already\n\
              in the persistent browser session — usually a blank Chrome new\n\
              tab → near-black PNG. Don't do this.".to_owned(),
        parameters: json!({
            "type": "object",
            "properties": {
                "action":     {"type": "string", "enum": [
                    "open", "navigate", "snapshot", "click", "clickAt", "fill", "type", "pick",
                    "select", "check", "uncheck", "scroll", "screenshot", "pdf",
                    "hover", "dblclick", "drag", "focus", "scrollintoview",
                    "back", "forward", "reload", "get_text", "get_url", "get_title",
                    "wait", "evaluate", "cookies", "press", "set_viewport",
                    "dialog", "state", "network", "new_tab", "list_tabs",
                    "switch_tab", "close_tab", "highlight", "clipboard", "find",
                    "get_article", "upload", "context", "emulate", "diff", "record",
                    "search", "console", "content", "frame", "mainframe",
                    "waitforurl", "getbytext", "getbyrole", "getbylabel"
                ]},
                "url":        {"type": "string", "description": "URL for open/navigate"},
                "interactive":{"type": "boolean", "description": "For snapshot: only return actionable elements (saves ~80% tokens). Default: false"},
                "ref":        {"type": "string", "description": "Element ref like @e3 from snapshot"},
                "from":       {"type": "string", "description": "Source element ref for drag"},
                "to":         {"type": "string", "description": "Target element ref for drag"},
                "x":          {"type": "number", "description": "X pixel coordinate for clickAt"},
                "y":          {"type": "number", "description": "Y pixel coordinate for clickAt"},
                "text":       {"type": "string", "description": "Text for fill/type/click-by-text/clipboard/dialog"},
                "query":      {"type": "string", "description": "Query text for pick (typed into input and used to match dropdown candidates)"},
                "index":      {"type": "integer", "description": "For pick: zero-based candidate index when multiple match (default 0)"},
                "timeout_ms": {"type": "integer", "description": "For pick: total time to wait for dropdown and click target in ms (default 5000)"},
                "value":      {"type": "string", "description": "Value for select, or sub-action for cookies/state/dialog/network/clipboard/context/emulate/diff/record"},
                "key":        {"type": "string", "description": "Key name for press (Enter, Tab, Escape, etc.)"},
                "direction":  {"type": "string", "enum": ["up", "down", "left", "right"], "description": "Scroll direction"},
                "amount":     {"type": "integer", "description": "Scroll distance in pixels (default 500)"},
                "selector":   {"type": "string", "description": "CSS selector for scroll container"},
                "js":         {"type": "string", "description": "JavaScript for evaluate action"},
                "target":     {"type": "string", "description": "Wait target: CSS selector, text, url, networkidle, fn"},
                "timeout":    {"type": "number", "description": "Timeout in seconds (default 15)"},
                "format":     {"type": "string", "enum": ["png", "jpeg"], "description": "Screenshot format"},
                "quality":    {"type": "integer", "description": "JPEG quality (1-100)"},
                "full_page":  {"type": "boolean", "description": "Capture full scrollable page"},
                "annotate":   {"type": "boolean", "description": "Overlay numbered labels on interactive elements"},
                "width":      {"type": "integer", "description": "Viewport width for set_viewport"},
                "height":     {"type": "integer", "description": "Viewport height for set_viewport"},
                "scale":      {"type": "number", "description": "Device scale factor for set_viewport"},
                "mobile":     {"type": "boolean", "description": "Mobile emulation for set_viewport"},
                "target_id":  {"type": "string", "description": "Tab target ID for switch_tab/close_tab"},
                "state":      {"type": "object", "description": "State object for state load"},
                "pattern":    {"type": "string", "description": "URL pattern for network block/intercept"},
                "by":         {"type": "string", "enum": ["text", "label"], "description": "Find element by text or label"},
                "then":       {"type": "string", "description": "Action after find (click)"},
                "cookie":     {"type": "object", "description": "Cookie object for cookies set"},
                "files":      {"type": "array", "items": {"type": "string"}, "description": "File paths for upload"},
                "context_id": {"type": "string", "description": "Browser context ID for cookie isolation"},
                "latitude":   {"type": "number", "description": "Latitude for geolocation emulation"},
                "longitude":  {"type": "number", "description": "Longitude for geolocation emulation"},
                "accuracy":   {"type": "number", "description": "Geolocation accuracy in meters"},
                "locale":     {"type": "string", "description": "Locale for emulation (e.g. en-US, zh-CN)"},
                "timezone_id":{"type": "string", "description": "IANA timezone (e.g. Asia/Shanghai)"},
                "permissions":{"type": "array", "items": {"type": "string"}, "description": "Browser permissions to grant"},
                "action_type":{"type": "string", "description": "Intercept action: block or mock"},
                "body":       {"type": "string", "description": "Mock response body for network intercept"},
                "headed":     {"type": "boolean", "description": "true=foreground (visible window), false=background (headless). Default: auto-detect based on display availability. Omit this field to use the default."}
            },
            "required": ["action"]
        }),
    });
    tools.push(ToolDef {
        name: "computer_use".to_owned(),
        description: "Control the computer desktop. ONLY use when the user EXPLICITLY asks to take a screenshot, click, type, or interact with the desktop. Do NOT call this tool just because the message mentions words like 'screenshot' or 'screen' in other contexts. Screenshots auto-resize, and mouse coordinates use the same physical-pixel space as the returned `original_width`/`original_height` (HiDPI is handled internally — multiply image-pixel coords by the returned `scale` and pass directly).".to_owned(),
        parameters: json!({
            "type": "object",
            "properties": {
                "action":    {"type": "string", "enum": [
                    "screenshot", "mouse_move", "mouse_click", "left_click",
                    "double_click", "triple_click", "right_click", "middle_click",
                    "drag", "scroll", "type", "key", "hold_key",
                    "cursor_position", "get_active_window", "ui_tree",
                    "list_app_rules", "get_app_rule", "wait"
                ], "description": "Action to perform. ui_tree returns the accessibility tree of the focused window (interactive elements with role/label/coordinates). list_app_rules/get_app_rule load per-app desktop automation playbooks from ~/.rsclaw/tools/computer_use/app-rules/."},
                "x":         {"type": "number", "description": "X coordinate (mouse actions, drag start) in physical pixels"},
                "y":         {"type": "number", "description": "Y coordinate (mouse actions, drag start) in physical pixels"},
                "to_x":      {"type": "number", "description": "Drag destination X (physical pixels)"},
                "to_y":      {"type": "number", "description": "Drag destination Y (physical pixels)"},
                "button":    {"type": "string", "enum": ["left", "right", "middle"], "description": "Mouse button (default: left)"},
                "text":      {"type": "string", "description": "Text for type action"},
                "key":       {"type": "string", "description": "Key name or combo (e.g. Enter, ctrl+c, cmd+shift+s)"},
                "then":      {"type": "string", "enum": ["click", "double_click", "right_click", "triple_click"], "description": "Sub-action for hold_key (default: click)"},
                "direction": {"type": "string", "enum": ["up", "down", "left", "right"], "description": "Scroll direction (default: down)"},
                "amount":    {"type": "integer", "description": "Scroll clicks (default: 3)"},
                "ms":        {"type": "integer", "description": "Wait duration in milliseconds (max 10000)"},
                "name":      {"type": "string", "description": "App-rule name (for get_app_rule action)"},
                "region":    {"type": "object", "description": "Optional screenshot region in physical pixels. Use after a full screenshot to zoom in on a specific area without recapturing the whole screen.", "properties": {
                    "x":      {"type": "number"},
                    "y":      {"type": "number"},
                    "width":  {"type": "number"},
                    "height": {"type": "number"}
                }, "required": ["x", "y", "width", "height"]},
                "max_long_edge_px": {"type": "integer", "description": "Screenshot resize cap: longest edge of returned image. Default 1024 (XGA). Range 64-8192. Larger values = more detail + more tokens."}
            },
            "required": ["action"]
        }),
    });

    // --- New openclaw-compatible tools ---

    tools.push(ToolDef {
        name: "image_gen".to_owned(),
        description: "Generate an image from a text description using an AI image model. Pass the user's original description as-is (preserve their language, do not translate).".to_owned(),
        parameters: json!({
            "type": "object",
            "properties": {
                "prompt": {"type": "string", "description": "Image description. IMPORTANT: use the user's original language and wording, do not translate to English."},
                "size":   {"type": "string", "description": "Image size, e.g. 2048x2048", "default": "2048x2048"}
            },
            "required": ["prompt"]
        }),
    });
    tools.push(ToolDef {
        name: "video_gen".to_owned(),
        description: "Generate a video from a text description using an AI video model. \
            Use this tool whenever the user asks to: create a video, animate an image, \
            generate a clip, make a short film, produce footage, or anything involving \
            video output. Pass the user's original description as-is (preserve their \
            language, do not translate).".to_owned(),
        parameters: json!({
            "type": "object",
            "properties": {
                "prompt":       {"type": "string", "description": "Video description. Use the user's original language and wording."},
                "duration":     {"type": "integer", "description": "Duration in seconds (default: 5)", "default": 5},
                "aspect_ratio": {"type": "string", "description": "Aspect ratio: 16:9, 9:16, 1:1 (default: 16:9)", "default": "16:9"},
                "model":        {"type": "string", "description": "Video model to use, e.g. seedance, minimax, kling (optional, uses configured default)"}
            },
            "required": ["prompt"]
        }),
    });
    tools.push(ToolDef {
        name: "pdf".to_owned(),
        description: "Extract text content from a PDF file or URL.\n\
            - Supports local files and remote URLs.\n\
            - Returns extracted text suitable for analysis.\n\
            - For large PDFs, content may be truncated.\n\
            Example: {\"path\":\"report.pdf\"} or {\"path\":\"https://example.com/doc.pdf\"}".to_owned(),
        parameters: json!({
            "type": "object",
            "properties": {
                "path": {"type": "string", "description": "REQUIRED: File path (relative to workspace) or full URL. Examples: 'docs/report.pdf', 'https://example.com/whitepaper.pdf'"}
            },
            "required": ["path"]
        }),
    });
    tools.push(ToolDef {
        name: "text_to_voice".to_owned(),
        description: "Convert text to speech audio and send as voice message.\n\
            - Generates audio from text input.\n\
            - On macOS uses 'say', on Linux uses espeak/sherpa-onnx.\n\
            - Result is sent as a voice attachment to the user.\n\
            Example: {\"text\":\"Hello world\",\"voice\":\"Tingting\"}".to_owned(),
        parameters: json!({
            "type": "object",
            "properties": {
                "text":  {"type": "string", "description": "REQUIRED: Text to convert to speech. Can be any language."},
                "voice": {"type": "string", "description": "Voice name. macOS: run 'say -v ?' for list. Linux: run 'espeak --voices'. Examples: 'Tingting' (Chinese), 'Samantha' (English)"}
            },
            "required": ["text"]
        }),
    });
    tools.push(ToolDef {
        name: "send_message".to_owned(),
        description: "Send a message to a chat channel target (user or group).\n\
            Use this to proactively reach out to users on messaging platforms.\n\
            Channel is auto-detected from current session if not specified.\n\
            Example: {\"target\":\"user123\",\"text\":\"Task completed!\",\"channel\":\"telegram\"}".to_owned(),
        parameters: json!({
            "type": "object",
            "properties": {
                "channel": {"type": "string", "description": "Channel type. Examples: 'telegram', 'discord', 'feishu', 'weixin', 'slack'"},
                "target":  {"type": "string", "description": "REQUIRED: Target user ID or group/chat ID"},
                "text":    {"type": "string", "description": "REQUIRED: Message text to send"}
            },
            "required": ["target", "text"]
        }),
    });
    tools.push(ToolDef {
        name: "cron".to_owned(),
        description: "List, add, edit, remove, enable or disable cron jobs.\n\
            Supports recurring (cron expression OR fixed interval) and one-shot (delay_ms) schedules.\n\
            \n\
            CHOOSING ONE-SHOT vs RECURRING — read carefully, this is the most common mistake:\n\
              ONE-SHOT (set `delay_ms`):\n\
                Use when the user names a SPECIFIC time/date that fires ONCE.\n\
                  - \"22:04截图给我\" / \"晚上 8 点提醒我\" / \"15分钟后\" / \"明天下午3点\"\n\
                Compute milliseconds from now until the target moment, set delay_ms.\n\
                Auto-removes after firing — exactly what \"once\" means.\n\
              RECURRING (set `schedule` cron expr or `every_seconds`):\n\
                Use ONLY when the user explicitly says repetition: 每天 / 每周 / 每小时 /\n\
                  every day / every Monday / 每隔 N 分钟 / weekly / hourly.\n\
              DO NOT pick a daily cron expr like \"4 22 * * *\" when the user asked for ONE\n\
              specific time today (\"22:04截图发我\"). That creates a job that fires every day\n\
              at 22:04 forever — the user has to manually delete it. The cron tool DOES NOT\n\
              auto-collapse \"today only\" intent into one-shot; you must do it.\n\
            \n\
            One-shot jobs auto-remove after execution.\n\
            For edit/remove/enable/disable, prefer using `index` from the list output instead of `id`.\n\
            \n\
            KIND — what should fire when the schedule triggers:\n\
              agentTurn (DEFAULT — pick this unless you are CERTAIN systemEvent applies):\n\
                Dispatch `message` to the agent at fire time. The agent runs LLM + tools\n\
                and delivers the result. Required for ANY task whose answer changes between\n\
                runs or depends on outside information: weather, prices, news, comments,\n\
                emails, system status, file/page contents, conditional logic, summaries.\n\
              systemEvent: deliver `message` text VERBATIM to the user. NO LLM, NO TOOLS,\n\
                NO QUERIES — every fire produces the exact same string you wrote in `message`.\n\
                Use ONLY when the message is a fixed text reminder whose content never\n\
                needs to be computed (e.g. \"drink water\", \"stand up\", \"daily 9am: standup\").\n\
              Disqualifying signal: if `message` describes an action to perform (\"check X\",\n\
                \"query Y\", \"fetch Z\", \"每N分钟查/取/看…\") rather than literal text to display,\n\
                it MUST be agentTurn. Picking systemEvent here means every fire just echoes\n\
                the instruction back to the user instead of executing it — a real, observed\n\
                failure mode. Token cost is not a reason to downgrade to systemEvent; a\n\
                useless echo is more expensive than a correct LLM call.\n\
            \n\
            CRON FORMAT (schedule field) — EXACTLY 5 fields separated by spaces:\n\
              minute hour day month weekday\n\
            Common examples:\n\
              \"*/5 * * * *\"    every 5 minutes\n\
              \"0 * * * *\"      every hour on the hour\n\
              \"0 17 * * *\"     5:00 PM daily  (NOT \"017 * * *\" — needs the space!)\n\
              \"30 8 * * 1-5\"   8:30 AM on weekdays\n\
              \"0 9 1 * *\"      9:00 AM on the 1st of each month\n\
            Pitfall: '0 17 * * *' is FIVE fields. Writing '017 * * *' (no space after 0) is\n\
            only FOUR fields and will be rejected. Always check your expression has exactly\n\
            5 whitespace-separated tokens.\n\
            \n\
            ITER (round-robin) — set when the user wants to rotate through a list,\n\
            ONE item per firing (e.g. \"按顺序轮流查询东京、曼谷、迪拜的天气\").\n\
            Pass `iter` as a JSON array of items, and use `{current}` (and optionally\n\
            `{next}`, `{index}`, `{total}`) as placeholders inside `message`. The\n\
            scheduler advances the cursor every fire and persists it — the agent\n\
            never has to remember progress, and a restart can never repeat or skip.\n\
            Example: message=\"查询{current}的当前天气\", iter=[\"东京\",\"曼谷\",\"迪拜\"].\n\
            Without iter the LLM must track its own progress in memory, which is\n\
            unreliable across restarts and embedding-model swaps — prefer iter when rotation is intended.\n\
            \n\
            ROTATE vs BATCH — disambiguate before reaching for iter:\n\
              ROTATE (use iter): the user wants ONE item per fire, cycling. Trigger\n\
                phrases: \"轮流\" / \"按顺序\" / \"依次\" / \"each time\" / \"one at a time\" /\n\
                \"rotate\" / \"cycle through\" / \"每次只查一个\".\n\
              BATCH (do NOT use iter): the user wants ALL items reported together each\n\
                fire. Phrases: \"每 N 分钟报一次 A、B、C 的价格\" / \"every N min give me\n\
                the prices of A, B, C\" / lists with no rotation signal. Build a single\n\
                cron whose message names every item — the agent fans out tool calls in\n\
                one turn and replies with the combined result.\n\
              When in doubt, BATCH is the safer default: a single late report is much\n\
              less surprising than silently dropping items every cycle.".to_owned(),
        parameters: json!({
            "type": "object",
            "properties": {
                "action":        {"type": "string", "enum": ["list", "add", "edit", "remove", "enable", "disable"], "description": "Action to perform"},
                "schedule":      {"type": "string", "description": "Cron schedule expression (for add/edit recurring jobs). Must be 5 whitespace-separated fields."},
                "every_seconds": {"type": "number", "description": "Fire every N seconds (for add). Use for fixed intervals like 45 minutes (every_seconds=2700) that cannot be expressed as a 5-field cron expression."},
                "delay_ms":      {"type": "number", "description": "Delay in milliseconds for one-shot timer (e.g., 1200000 = 20 min). Use instead of schedule for reminders/timers."},
                "message":       {"type": "string", "description": "Message or task to run (for add, edit)"},
                "kind":          {"type": "string", "enum": ["agentTurn", "systemEvent"], "description": "What fires when the schedule triggers. agentTurn (default) = run agent (LLM+tools) so the answer reflects current state. systemEvent = deliver `message` verbatim with NO agent run — only valid when `message` is fixed display text whose content never needs to be computed. If the user wants something queried/fetched/checked on a schedule, use agentTurn."},
                "index":         {"type": "number", "description": "Job index from list (1-based, for edit/remove/enable/disable - preferred)"},
                "id":            {"type": "string", "description": "Job ID (for edit/remove/enable/disable - use index instead if possible)"},
                "name":          {"type": "string", "description": "Job name (for add, edit)"},
                "tz":            {"type": "string", "description": "Timezone IANA name. Auto-detected if omitted. Only set if user explicitly requests a different timezone."},
                "agentId":       {"type": "string", "description": "Agent ID to run the job (for add, edit, default: main)"},
                "iter":          {"type": "array", "items": {"type": "string"}, "description": "Round-robin items the scheduler cycles through, one per firing. Use `{current}` (and optionally `{next}`, `{index}`, `{total}`) as placeholders in `message`. Set this whenever the user asks for rotating tasks (e.g. 'cycle through cities'); leaves the agent free of progress-tracking duties. On `edit`: pass a new array to replace items; pass `null` or `[]` to clear iter mode."},
                "iter_cursor":   {"type": "number", "description": "On `edit`: explicitly set the iter cursor (0-based). Use to reset rotation back to the start, or to jump to a specific item. Without `iter`, requires the job to already have iter configured."}
            },
            "required": ["action"]
        }),
    });
    tools.push(ToolDef {
        name: "session".to_owned(),
        description: "Manage sessions. Actions: send (message to another agent), list (all active sessions), history (retrieve conversation), status (session info).".to_owned(),
        parameters: json!({
            "type": "object",
            "properties": {
                "action":     {"type": "string", "enum": ["send", "list", "history", "status"], "description": "Action to perform"},
                "agentId":    {"type": "string", "description": "Target agent ID (for send)"},
                "sessionKey": {"type": "string", "description": "Session key (for send/history/status)"},
                "message":    {"type": "string", "description": "Message text (for send)"},
                "limit":      {"type": "number", "description": "Max messages to return (for history, default 50)"}
            },
            "required": ["action"]
        }),
    });
    tools.push(ToolDef {
        name: "gateway".to_owned(),
        description: "Query gateway status and information.\n\
            - status: Current gateway state, uptime, connected channels, active agents\n\
            - health: Health check (OK/degraded)\n\
            - version: Gateway version and build info".to_owned(),
        parameters: json!({
            "type": "object",
            "properties": {
                "action": {"type": "string", "enum": ["status", "health", "version"], "description": "REQUIRED: Info to retrieve. Examples: 'status', 'version'"}
            },
            "required": ["action"]
        }),
    });
    tools.push(ToolDef {
        name: "opencode".to_owned(),
        description: "Execute coding/debugging tasks using OpenCode (a powerful coding agent).\n\n\
            MANDATORY USAGE RULES:\n\
            1. When user reports a bug/error/crash -> MUST call this tool to investigate\n\
            2. When user asks to fix/debug a script -> MUST call this tool\n\
            3. When user says '让opencode...' or '用opencode...' -> MUST call this tool\n\
            4. DO NOT say '已委托opencode' without actually calling this tool\n\
            5. Saying you delegated without calling = LYING = worst failure mode\n\
            \n\
            If you cannot or will not call this tool, tell user honestly why.\n\
            NEVER pretend to have called it.\n\
            \n\
            Technical: Create project subdirectory for new projects. Runs async, results delivered when complete.".to_owned(),
        parameters: json!({
            "type": "object",
            "properties": {
                "task": {"type": "string", "description": "The coding task to execute. Be specific about file paths and always mention creating a project subdirectory for new projects."}
            },
            "required": ["task"]
        }),
    });
    tools.push(ToolDef {
        name: "claudecode".to_owned(),
        description: "Execute coding tasks using Claude Code (official Claude Agent SDK via ACP protocol). Uses Claude's native coding capabilities with full context awareness. IMPORTANT: When creating new projects or files, ALWAYS create a dedicated project directory first. The task will run asynchronously and results will be sent when complete.".to_owned(),
        parameters: json!({
            "type": "object",
            "properties": {
                "task": {"type": "string", "description": "The coding task to execute. Be specific about requirements and file paths."}
            },
            "required": ["task"]
        }),
    });
    tools.push(ToolDef {
        name: "codex".to_owned(),
        description: "Execute coding tasks using OpenAI Codex CLI (MCP Server mode). Uses OpenAI's coding capabilities with sandboxed file operations. IMPORTANT: When creating new projects or files, ALWAYS create a dedicated project directory first. Requires Codex CLI installation: npm install -g @openai/codex. The task will run asynchronously and results will be sent when complete.".to_owned(),
        parameters: json!({
            "type": "object",
            "properties": {
                "task": {"type": "string", "description": "The coding task to execute. Be specific about requirements and file paths."}
            },
            "required": ["task"]
        }),
    });
    tools.push(ToolDef {
        name: "channel".to_owned(),
        description: "Perform channel-specific actions (send, reply, pin, delete messages). Channel is auto-detected from current session or can be specified explicitly: telegram, discord, slack, whatsapp, feishu, weixin, qq, dingtalk.".to_owned(),
        parameters: json!({
            "type": "object",
            "properties": {
                "action":    {"type": "string", "enum": ["send", "reply", "forward", "pin", "unpin", "delete"], "description": "Action to perform"},
                "channel":   {"type": "string", "description": "Channel type (auto-detected if omitted): telegram, discord, slack, whatsapp, feishu, weixin, qq, dingtalk"},
                "chatId":    {"type": "string", "description": "Chat/channel ID"},
                "text":      {"type": "string", "description": "Message text"},
                "messageId": {"type": "string", "description": "Message ID (for reply/pin/delete)"}
            },
            "required": ["action"]
        }),
    });

    tools.push(ToolDef {
        name: "anycli".to_owned(),
        description: "Extract structured data from websites using declarative adapters.\n\
            Actions:\n\
            - run: Execute an adapter command (e.g., hackernews top, bilibili hot)\n\
            - list: List all available adapters\n\
            - info: Show adapter details and available commands\n\
            - search: Search community hub for adapters\n\
            - install: Install an adapter from the hub\n\
            Built-in adapters: hackernews, bilibili, arxiv, wikipedia, github-trending.".to_owned(),
        parameters: json!({
            "type": "object",
            "properties": {
                "action":  {"type": "string", "enum": ["run", "list", "info", "search", "install"], "description": "Action to perform"},
                "adapter": {"type": "string", "description": "Adapter name (for run/info)"},
                "command": {"type": "string", "description": "Command name within adapter (for run)"},
                "params":  {"type": "object", "description": "Key-value parameters (for run), e.g. {\"limit\": \"10\", \"query\": \"rust\"}"},
                "query":   {"type": "string", "description": "Search query (for search)"},
                "name":    {"type": "string", "description": "Adapter name (for install)"},
                "format":  {"type": "string", "enum": ["json", "table", "csv", "markdown"], "description": "Output format (for run, default: json)"}
            },
            "required": ["action"]
        }),
    });
    tools.push(ToolDef {
        name: "clarify".to_owned(),
        description: "Ask the user a clarifying question before proceeding. Use when:\n\
            - The request is ambiguous and multiple valid interpretations exist\n\
            - A choice is needed (e.g., which file, which format, which approach)\n\
            - Destructive or irreversible action needs confirmation\n\
            Provide options for quick selection or leave open-ended for free-form answers.\n\
            IMPORTANT: Do NOT use this for simple confirmations. Only when genuine ambiguity exists.".to_owned(),
        parameters: json!({
            "type": "object",
            "properties": {
                "question": {"type": "string", "description": "The question to ask the user"},
                "options":  {"type": "array", "items": {"type": "string"}, "description": "Optional list of choices. Omit for open-ended questions."}
            },
            "required": ["question"]
        }),
    });
    tools.push(ToolDef {
        name: "pairing".to_owned(),
        description: "Manage channel pairing (dmPolicy=pairing). Actions: list (show pending codes and approved peers), approve (approve a pairing code), revoke (revoke an approved peer).".to_owned(),
        parameters: json!({
            "type": "object",
            "properties": {
                "action":  {"type": "string", "enum": ["list", "approve", "revoke"], "description": "Action to perform"},
                "code":    {"type": "string", "description": "Pairing code to approve (for approve action, e.g. ZGTB-NB79)"},
                "channel": {"type": "string", "description": "Channel name (for revoke action, e.g. qq, telegram)"},
                "peerId":  {"type": "string", "description": "Peer ID to revoke (for revoke action)"}
            },
            "required": ["action"]
        }),
    });

    // Document tools — split into simple independent tools for better small-model compatibility.
    // Formatting note injected into content-bearing tools.
    let doc_fmt_hint = " Structure content professionally: use # headings, - bullet lists, blank lines between sections. For notices/reports: add title, organize into sections.";

    tools.push(ToolDef {
        name: "create_docx".to_owned(),
        description: format!("Create a Word document (.docx).{doc_fmt_hint} After creating, use send_file to deliver."),
        parameters: json!({
            "type": "object",
            "properties": {
                "path":    {"type": "string", "description": "File path, e.g. 'report.docx'"},
                "content": {"type": "string", "description": "Document content. Use # for headings, - for lists, blank lines for paragraphs."},
                "title":   {"type": "string", "description": "Document title (optional, displayed at top)"},
                "explanation": {"type": "string", "description": "Brief explanation of what you are creating and why, to help organize your thoughts before writing content."}
            },
            "required": ["path", "content"]
        }),
    });
    tools.push(ToolDef {
        name: "create_pdf".to_owned(),
        description: format!("Create a PDF document.{doc_fmt_hint} After creating, use send_file to deliver."),
        parameters: json!({
            "type": "object",
            "properties": {
                "path":    {"type": "string", "description": "File path, e.g. 'report.pdf'"},
                "content": {"type": "string", "description": "Document content. Use # for headings, - for lists, blank lines for paragraphs."},
                "title":   {"type": "string", "description": "Document title (optional, displayed at top)"},
                "explanation": {"type": "string", "description": "Brief explanation of what you are creating and why, to help organize your thoughts before writing content."}
            },
            "required": ["path", "content"]
        }),
    });
    tools.push(ToolDef {
        name: "create_xlsx".to_owned(),
        description: "Create an Excel spreadsheet (.xlsx). Extract structured data into columns with meaningful headers. After creating, use send_file to deliver.".to_owned(),
        parameters: json!({
            "type": "object",
            "properties": {
                "path":   {"type": "string", "description": "File path, e.g. 'data.xlsx'"},
                "sheets": {"type": "array", "description": "Sheets: [{name, headers: [str], rows: [[value]]}]",
                    "items": {"type": "object", "properties": {
                        "name":    {"type": "string", "description": "Sheet name (tab label in the spreadsheet)."},
                        "headers": {"type": "array", "items": {"type": "string"}, "description": "Column header labels for the first row."},
                        "rows":    {"type": "array", "items": {"type": "array"}, "description": "Data rows, each an array of cell values in column order."}
                    }}
                },
                "explanation": {"type": "string", "description": "Brief explanation of what you are creating and why, to help organize your thoughts before writing content."}
            },
            "required": ["path", "sheets"]
        }),
    });
    tools.push(ToolDef {
        name: "create_pptx".to_owned(),
        description: "Create a PowerPoint presentation (.pptx). After creating, use send_file to deliver.".to_owned(),
        parameters: json!({
            "type": "object",
            "properties": {
                "path":   {"type": "string", "description": "File path, e.g. 'deck.pptx'"},
                "slides": {"type": "array", "description": "Slides: [{title, body}]",
                    "items": {"type": "object", "properties": {
                        "title": {"type": "string", "description": "Slide title displayed at the top."},
                        "body":  {"type": "string", "description": "Slide body text. Use newlines to separate bullet points."}
                    }}
                },
                "explanation": {"type": "string", "description": "Brief explanation of what you are creating and why, to help organize your thoughts before writing content."}
            },
            "required": ["path", "slides"]
        }),
    });
    // Keep doc tool for read/edit operations (less frequently used by small models).
    tools.push(ToolDef {
        name: "doc".to_owned(),
        description: "Read or edit existing documents.\n\
            Actions: read_doc (xlsx/docx/pdf), edit_excel, edit_word, edit_pdf.\n\
            For CREATING new documents, use create_docx/create_pdf/create_xlsx/create_pptx instead.".to_owned(),
        parameters: json!({
            "type": "object",
            "properties": {
                "action":  {"type": "string", "enum": ["read_doc", "edit_excel", "edit_word", "edit_pdf"], "description": "Action to perform"},
                "path":    {"type": "string", "description": "File path"},
                "content": {"type": "string", "description": "For edit_word: replacement content"},
                "append":  {"type": "string", "description": "For edit_word: text to append"},
                "sheets":  {"type": "array", "description": "For edit_excel: [{name, headers, rows}]",
                    "items": {"type": "object", "properties": {
                        "name":    {"type": "string", "description": "Sheet name (tab label in the spreadsheet)."},
                        "headers": {"type": "array", "items": {"type": "string"}, "description": "Column header labels for the first row."},
                        "rows":    {"type": "array", "items": {"type": "array"}, "description": "Data rows, each an array of cell values in column order."}
                    }}
                },
                "append_rows": {"type": "array", "description": "For edit_excel: append rows to an existing sheet without replacing it.",
                    "items": {"type": "object", "properties": {
                        "sheet": {"type": "string", "description": "Name of the existing sheet to append to."},
                        "rows":  {"type": "array", "items": {"type": "array"}, "description": "Rows to append, each an array of cell values."}
                    }}
                },
                "replacements": {"type": "array", "description": "For edit_pdf: [{find, replace}]",
                    "items": {"type": "object", "properties": {
                        "find":    {"type": "string", "description": "Text string to find in the PDF."},
                        "replace": {"type": "string", "description": "Replacement text."}
                    }}
                },
                "delete_pages": {"type": "array", "description": "For edit_pdf: 1-indexed page numbers to delete", "items": {"type": "integer"}}
            },
            "required": ["action", "path"]
        }),
    });

    // Dynamic per-agent A2A tools.
    if let Some(reg) = agents {
        for handle in reg.all() {
            if handle.id == caller_id {
                continue;
            }
            tools.push(ToolDef {
                name: format!("agent_{}", handle.id),
                description: format!(
                    "Send a task to agent '{}'. Returns the agent's reply.",
                    handle.id
                ),
                parameters: json!({
                    "type": "object",
                    "properties": {
                        "text": {"type": "string", "description": "Task or message to send"}
                    },
                    "required": ["text"]
                }),
            });
        }
    }

    // External remote agent A2A tools (remote gateways).
    tracing::debug!(
        count = external_agents.len(),
        "build_tool_list: external agents"
    );
    for ext in external_agents {
        if ext.id == caller_id {
            continue;
        }
        tools.push(ToolDef {
            name: format!("agent_{}", ext.id),
            description: format!(
                "Send a task to remote agent '{}' at {}. Returns the agent's reply.",
                ext.id, ext.url
            ),
            parameters: json!({
                "type": "object",
                "properties": {
                    "text": {"type": "string", "description": "Task or message to send"}
                },
                "required": ["text"]
            }),
        });
    }

    // Skill tools.
    for skill in skills.all() {
        for spec in &skill.tools {
            tools.push(ToolDef {
                name: format!("{}.{}", skill.name, spec.name),
                description: spec.description.clone(),
                parameters: spec
                    .input_schema
                    .clone()
                    .unwrap_or_else(|| Value::Object(Default::default())),
            });
        }
    }

    // Inject `additionalProperties: false` and `$schema` into every tool's
    // parameters object. This enables constrained decoding in Ollama/vLLM,
    // which dramatically reduces digit-loss on small models (9b).
    for tool in &mut tools {
        if let Some(obj) = tool.parameters.as_object_mut() {
            obj.entry("additionalProperties").or_insert(json!(false));
            obj.entry("$schema")
                .or_insert(json!("http://json-schema.org/draft-07/schema#"));
        }
    }

    tools
}