//! Tool list builder — generates the consolidated ToolDef list for an agent.
//!
//! Extracted from `runtime.rs` to reduce file size.
//! All public items are re-exported by `runtime.rs` so callers are unaffected.
use serde_json::{Value, json};
use super::registry::AgentRegistry;
use crate::{
config::schema::ExternalAgentConfig,
plugin::{PluginRegistry, WasmPlugin},
provider::ToolDef,
skill::SkillRegistry,
};
/// Build a `Vec<ToolDef>` advertising every tool exported by every loaded
/// WASM plugin. Tool names are namespaced as `<plugin>.<tool>` so the
/// dispatcher can route them back to the right plugin instance.
pub(crate) fn build_wasm_tool_defs(plugins: &[WasmPlugin]) -> Vec<ToolDef> {
plugins
.iter()
.flat_map(|p| {
let plugin_name = p.name.clone();
p.tools.iter().map(move |t| ToolDef {
name: format!("{}.{}", plugin_name, t.name),
description: t.description.clone(),
parameters: t.parameters.clone(),
})
})
.collect()
}
/// Build a `Vec<ToolDef>` for every tool exported by every loaded
/// shell-bridge plugin. Tool names are `<plugin>.<tool>`, mirroring the
/// wasm-plugin convention so the dispatcher in `runtime.rs` can route
/// either with the same `split_once('.')` pattern.
pub(crate) fn build_shell_tool_defs(plugins: &PluginRegistry) -> Vec<ToolDef> {
plugins
.shell_plugins_iter()
.flat_map(|(plugin_name, plugin)| {
let plugin_name = plugin_name.clone();
plugin.manifest.tools.iter().map(move |t| ToolDef {
name: format!("{plugin_name}.{}", t.name),
description: t.description.clone(),
parameters: t.input_schema.clone().unwrap_or_else(|| {
serde_json::json!({
"type": "object",
"properties": {}
})
}),
})
})
.collect()
}
/// Build a system-prompt section that lists installed plugins (wasm + shell).
/// Helps the model decide *to use* the plugin instead of falling back to a
/// generic browser-automation flow. Sorted by name for byte-stable output.
pub(crate) fn build_plugins_system(
wasm_plugins: &[WasmPlugin],
shell_plugins: Option<&PluginRegistry>,
) -> Option<String> {
let no_shell = shell_plugins
.map(|r| r.shell_plugins_iter().next().is_none())
.unwrap_or(true);
if wasm_plugins.is_empty() && no_shell {
return None;
}
let mut blocks: Vec<(String, String)> = wasm_plugins
.iter()
.map(|p| {
let tools_lines: Vec<String> = p
.tools
.iter()
.map(|t| format!(" - {}.{}: {}", p.name, t.name, t.description))
.collect();
(
p.name.clone(),
format!(
"<plugin name=\"{}\" version=\"{}\">\n{}\n\nTools:\n{}\n</plugin>",
p.name,
p.version.as_deref().unwrap_or(""),
p.description.as_deref().unwrap_or(""),
tools_lines.join("\n"),
),
)
})
.collect();
if let Some(reg) = shell_plugins {
for (plugin_name, plugin) in reg.shell_plugins_iter() {
let tools_lines: Vec<String> = plugin
.manifest
.tools
.iter()
.map(|t| format!(" - {}.{}: {}", plugin_name, t.name, t.description))
.collect();
blocks.push((
plugin_name.clone(),
format!(
"<plugin name=\"{}\" version=\"{}\">\n{}\n\nTools:\n{}\n</plugin>",
plugin_name,
plugin.manifest.version.as_deref().unwrap_or(""),
plugin.manifest.description.as_deref().unwrap_or(""),
tools_lines.join("\n"),
),
));
}
}
// Sort by name for byte-stable output (HashMap iteration order is
// nondeterministic; this matters because the system prompt feeds the
// LLM's KV cache, and unstable ordering invalidates the cache).
blocks.sort_by(|a, b| a.0.cmp(&b.0));
let blocks_text: Vec<String> = blocks.into_iter().map(|(_, b)| b).collect();
Some(format!(
"## Installed Plugins\n\
Plugins automate external services (e.g. image/video generation, \
marketplace ops). When the user's task matches a plugin tool, prefer \
it over a generic browser-automation flow.\n\
Priority: plugins > skills > built-in tools.\n\n\
{}",
blocks_text.join("\n\n"),
))
}
/// Compute the set of allowed tool names based on toolset level + custom tools.
/// Returns None for "full" (no filtering), Some(set) for others.
pub(crate) fn toolset_allowed_names(
toolset: &str,
custom_tools: Option<&Vec<String>>,
) -> Option<std::collections::HashSet<String>> {
const MINIMAL: &[&str] = &[
"execute_command",
"read_file",
"write_file",
"send_file",
"list_dir",
"search_file",
"search_content",
"web_search",
"web_fetch",
"memory",
"clarify",
"anycli",
"use_skill",
];
const WEB: &[&str] = &[
"web_search",
"web_fetch",
"web_download",
"read_file",
"write_file",
"list_dir",
"search_file",
"memory",
"use_skill",
];
const CODE: &[&str] = &[
"execute_command",
"read_file",
"write_file",
"list_dir",
"search_file",
"search_content",
"memory",
"use_skill",
];
const STANDARD: &[&str] = &[
"execute_command",
"read_file",
"write_file",
"list_dir",
"search_file",
"search_content",
"web_search",
"web_fetch",
"memory",
"web_browser",
"image_gen",
"video_gen",
"channel",
"cron",
"computer_use",
"clarify",
"anycli",
"use_skill",
"task",
];
let base: Option<&[&str]> = match toolset {
"minimal" => Some(MINIMAL),
"web" => Some(WEB),
"code" => Some(CODE),
"standard" => Some(STANDARD),
"full" => None,
_ => Some(STANDARD),
};
match (base, custom_tools) {
(None, None) => None, // full, no custom -> no filtering
(None, Some(extra)) => {
// full + custom whitelist -> use custom as whitelist
Some(extra.iter().cloned().collect())
}
(Some(base_list), None) => Some(base_list.iter().map(|s| s.to_string()).collect()),
(Some(base_list), Some(extra)) => {
// Merge: toolset base + custom extras, deduplicated
let mut set: std::collections::HashSet<String> =
base_list.iter().map(|s| s.to_string()).collect();
set.extend(extra.iter().cloned());
Some(set)
}
}
}
/// Build the complete tool list for an agent runtime.
///
/// Includes built-in tools, per-agent A2A tools, external agent tools,
/// and skill-derived tools.
pub(crate) fn build_tool_list(
skills: &SkillRegistry,
agents: Option<&AgentRegistry>,
caller_id: &str,
external_agents: &[ExternalAgentConfig],
) -> Vec<ToolDef> {
let mut tools = Vec::new();
// Built-in tools — consolidated (32+ tools -> ~13 unified tools).
tools.push(ToolDef {
name: "memory".to_owned(),
description: "Manage long-term memory across sessions.\n\
Actions:\n\
- search: Semantic search over stored memories. Example: {\"action\":\"search\",\"query\":\"user preferences\"}\n\
- get: Retrieve a specific memory by ID. Example: {\"action\":\"get\",\"id\":\"abc-123\"}\n\
- put: Store a new memory. Example: {\"action\":\"put\",\"text\":\"User prefers dark mode\",\"kind\":\"fact\"}\n\
Use this tool to recall prior context, user preferences, or previously learned information.\n\
Search BEFORE answering questions about past conversations or user details.".to_owned(),
parameters: json!({
"type": "object",
"properties": {
"action": {"type": "string", "enum": ["search", "get", "put"], "description": "Action to perform: search, get, or put"},
"query": {"type": "string", "description": "Search query (for search). Examples: 'user name', 'project deadlines', 'API keys'"},
"id": {"type": "string", "description": "Memory document ID (for get)"},
"text": {"type": "string", "description": "Content to store (for put). Be specific and include context."},
"scope": {"type": "string", "description": "Scope filter (optional)"},
"kind": {"type": "string", "description": "Document kind: note (general), fact (verified info), remember (user explicitly asked to remember). Do NOT use kind=summary; session summaries are written automatically by /compact, /new, /reset."},
"top_k": {"type": "integer", "description": "Max results (for search, default 5)"}
},
"required": ["action"]
}),
});
// `use_skill` — first-class entry point for installed skills. Listed
// EARLY in the tool list so the LLM notices it before web_fetch /
// web_browser / execute_command. Only registered when at least one
// skill is installed; otherwise it'd be dead surface area.
if skills.all().next().is_some() {
let skill_names: Vec<String> = skills.all().map(|s| s.name.clone()).collect();
let names_hint = if skill_names.is_empty() {
String::new()
} else {
format!(" Installed skill names: {}.", skill_names.join(", "))
};
tools.push(ToolDef {
name: "use_skill".to_owned(),
description: format!(
"ACTIVATE an installed skill. Use this BEFORE web_fetch / web_browser / \
execute_command whenever the user's task matches any skill description \
shown in the system prompt under '## Installed Skills' (flights, hotels, \
stocks, weather, finance data, etc.).\n\n\
Returns the full SKILL.md so you know the exact CLI command and flags. \
After calling use_skill you typically call execute_command with the CLI \
from skill_md.\n\n\
Common failure to avoid: defaulting to web_fetch on a domain a skill \
already covers. If a skill description matches, you MUST use_skill \
first.{names_hint}"
),
parameters: json!({
"type": "object",
"properties": {
"name": {
"type": "string",
"description": "Exact skill name from the Installed Skills list (e.g. 'flyai', 'hithink-market-query'). Case-sensitive."
}
},
"required": ["name"]
}),
});
}
// `task` — escalate the current chat into a multi-turn background task.
// The LLM decides when sustained work is warranted (implementation
// spanning many tool calls, multi-file refactor, deep research). For
// short Q&A, jokes, greetings, and one-shot tool calls the LLM should
// just answer directly without calling this tool.
tools.push(ToolDef {
name: "task".to_owned(),
description: "Escalate the user's current request into a multi-turn background task. \
Call this ONLY when the work clearly needs sustained execution: implementation \
across multiple files, multi-step debugging, deep research with many web fetches, \
data pipelines, end-to-end deployments. \
Do NOT call for: greetings, casual questions, single tool calls (one web_search, \
one read_file, one calculation), explanations, or anything you can answer in this \
same turn. When in doubt, just answer directly — the user can always send \
`/task <request>` to escalate manually.\n\n\
Returns a task ID; the gateway then runs the work in the background and posts \
replies as turns complete. After calling task, your reply to the user should be a \
short acknowledgement only — the actual work happens in the background turns.".to_owned(),
parameters: json!({
"type": "object",
"properties": {
"task_text": {
"type": "string",
"description": "The task instruction for the background runner. Usually the user's original request, optionally clarified."
},
"max_turns": {
"type": "integer",
"description": "Optional cap on agent turns. Default 10. Raise for big jobs (e.g. 30 for full feature implementation)."
},
"ttl_secs": {
"type": "integer",
"description": "Optional wall-clock deadline in seconds. Default 3600 (1h)."
}
},
"required": ["task_text"]
}),
});
tools.push(ToolDef {
name: "read_file".to_owned(),
description: "Read a file from the agent workspace.\n\
Path is relative to workspace root.\n\
Supports text files, code, config, markdown, etc.\n\
Example: {\"path\":\"config.json\"} or {\"path\":\"src/main.py\"}\n\
For binary files (images, PDFs), use the dedicated tools instead.".to_owned(),
parameters: json!({
"type": "object",
"properties": {
"path": {"type": "string", "description": "Relative file path. Examples: 'README.md', 'src/app.py', 'data/output.csv'"}
},
"required": ["path"]
}),
});
tools.push(ToolDef {
name: "write_file".to_owned(),
description: "Write/create a file. Use this for ALL file creation and writing — do NOT use execute_command with notepad, echo, or any other editor/command to create files.\n\
Creates parent directories as needed. Path is relative to workspace root.\n\
Both 'path' and 'content' are required.\n\
CRITICAL: When writing user-provided content, copy it EXACTLY character-by-character. \
Never omit, rephrase, or regenerate numbers, dates, addresses, names, or any specific values. \
If the user said '135号168栋', the content MUST contain '135号168栋' exactly.".to_owned(),
parameters: json!({
"type": "object",
"properties": {
"path": {"type": "string", "description": "Relative file path within the workspace (REQUIRED). Example: 'output.py'"},
"content": {"type": "string", "description": "File content to write (REQUIRED). MUST preserve all numbers, dates, and specific values from the user's message exactly as given."},
"explanation": {"type": "string", "description": "Brief explanation of what you are creating and why, to help organize your thoughts before writing content."}
},
"required": ["path", "content"]
}),
});
tools.push(ToolDef {
name: "send_file".to_owned(),
description: "Send a file from the workspace to the user as an attachment. \
Use this when the user asks you to send, share, or download a file. \
The file will be delivered as a chat attachment (not as text). \
Path is relative to workspace root.".to_owned(),
parameters: json!({
"type": "object",
"properties": {
"path": {"type": "string", "description": "File path to send (relative to workspace or absolute)"}
},
"required": ["path"]
}),
});
tools.push(ToolDef {
name: "execute_command".to_owned(),
description: if cfg!(target_os = "windows") {
"Run a shell command (PowerShell) on Windows.\n\
IMPORTANT: For file listing use `list_dir`, for file search use `search_file`, for content search use `search_content`, for tool install use `install_tool`, for HTTP/API requests use `web_fetch`. Only use exec for commands that have no dedicated tool.\n\
Use exec for: git operations, running scripts (node/python/cargo), system info (systeminfo, ipconfig, Get-Process), package management (npm/pip), process management (Start-Process, Stop-Process, taskkill).\n\
Do NOT use exec for HTTP requests (curl/wget/Invoke-WebRequest) or file downloads — use `web_fetch` / `web_download` instead.\n\
\n\
Tool selection: PowerShell for file/system ops; python for data processing (CSV/JSON/automation).\n\
Check tool availability first (`Get-Command python`, `Get-Command node`). Use `install_tool` for system tools.\n\
\n\
PowerShell patterns:\n\
- Pipes: Get-Process | Sort-Object CPU -Descending | Select-Object -First 10\n\
- Network connectivity check (not fetch): Test-NetConnection host -Port 80\n\
- Text: (Get-Content file) -replace 'old','new'\n\
- Dates: Get-Date -Format 'yyyy-MM-dd'; [DateTimeOffset]::Now.ToUnixTimeSeconds()\n\
Python patterns: `python -c \"import json; ...\"` for one-liners, write to $env:TEMP\\script.py for multi-line.\n\
\n\
Best practices: Do NOT wrap commands in extra cmd /c or powershell -Command layers. Use `| Select-Object -First 10` to limit output.\n\
Do NOT use exec for destructive operations on personal directories (Desktop, Downloads, Documents).\n\
Commands run in background by default (wait=false). Use wait=true only for short commands where you need the output immediately.\n\
If a command fails, do NOT retry with the same arguments. Try a different approach or ask the user."
.to_owned()
} else if cfg!(target_os = "macos") {
"Run a shell command (bash/zsh) on macOS.\n\
IMPORTANT: For file listing use `list_dir`, for file search use `search_file`, for content search use `search_content`, for tool install use `install_tool`, for HTTP/API requests use `web_fetch`. Only use exec for commands that have no dedicated tool.\n\
Use exec for: git operations, running scripts (node/python/cargo), system info (uname, df, top), package management (brew/npm/pip), process management (ps, kill).\n\
Do NOT use exec for HTTP requests (curl/wget) or file downloads — use `web_fetch` / `web_download` instead.\n\
\n\
Tool selection: bash for file/text/system ops; python3 for data processing (CSV/JSON/automation).\n\
Check tool availability first (`which python3`, `which node`). Use `install_tool` for system tools.\n\
\n\
Bash patterns: `| head -n 20` to limit output, `date +%s` for timestamps, `find . -name '*.py' -mtime -7`.\n\
Python patterns: `python3 -c \"import json; ...\"` for one-liners, write to /tmp/script.py for multi-line, `pip install` for packages.\n\
\n\
Best practices: pipe large output through head/tail, use wait=false for long tasks, never run destructive commands on personal dirs.\n\
If a command fails, do NOT retry with the same arguments. Try a different approach or ask the user."
.to_owned()
} else {
"Run a shell command (bash/sh) on Linux.\n\
IMPORTANT: For file listing use `list_dir`, for file search use `search_file`, for content search use `search_content`, for tool install use `install_tool`, for HTTP/API requests use `web_fetch`. Only use exec for commands that have no dedicated tool.\n\
Use exec for: git operations, running scripts (node/python/cargo), system info (uname, df, top), package management (apt/npm/pip), process management (ps, kill).\n\
Do NOT use exec for HTTP requests (curl/wget) or file downloads — use `web_fetch` / `web_download` instead.\n\
\n\
Tool selection: bash for file/text/system ops; python3 for data processing (CSV/JSON/automation).\n\
Check tool availability first (`which python3`, `which node`). Use `install_tool` for system tools.\n\
\n\
Bash patterns: `| head -n 20` to limit output, `date +%s` for timestamps, `find . -name '*.py' -mtime -7`.\n\
Python patterns: `python3 -c \"import json; ...\"` for one-liners, write to /tmp/script.py for multi-line, `pip install` for packages.\n\
\n\
Best practices: pipe large output through head/tail, use wait=false for long tasks, never run destructive commands on personal dirs.\n\
If a command fails, do NOT retry with the same arguments. Try a different approach or ask the user."
.to_owned()
},
parameters: json!({
"type": "object",
"properties": {
"command": {"type": "string", "description": "Shell command to execute. Must be valid for the current OS."},
"timeout": {"type": "integer", "description": "Timeout in seconds (default: 30, max: 300)"},
"wait": {"type": "boolean", "description": "If true (default), wait for the command to finish and return stdout/stderr/exit_code. Set to false only for long-running commands (builds, servers, installs) where you want a task_id to poll later."},
"task_id": {"type": "string", "description": "Poll a previously started background task by its task_id."}
},
"required": []
}),
});
tools.push(ToolDef {
name: "agent".to_owned(),
description: "Manage agents. You are the architect — delegate work, never block.\n\
Actions:\n\
- task: Create a task agent for a one-shot job. Returns immediately with task_id. The task agent runs independently and delivers results when done.\n\
- spawn: Create a persistent agent (survives across turns).\n\
- send: Send a message to an existing agent (async, result delivered when done).\n\
- list: List all registered agents.\n\
- update: Edit a named agent's config (model, name). Pass model=\"\" to remove and fall back to defaults. Hot-reloads automatically.\n\
- kill: Stop an agent.\n\
Tips:\n\
- Use task for independent, parallelizable work. You can dispatch multiple tasks at once.\n\
- Always specify toolset matching the task (web for search, code for file ops).\n\
- After dispatching, tell the user what you delegated and continue with other work.\n\
\n\
CRITICAL: When user EXPLICITLY asks to use a specific tool (opencode, claudecode, codex),\n\
you MUST call that tool directly. DO NOT create a task agent instead.\n\
- User says \"让opencode去...\" -> call opencode tool (action=call, NOT task agent)\n\
- User says \"用claudecode...\" -> call claudecode tool\n\
\n\
[HARD RULE - DECEPTION]\n\
Claiming \"已委托opencode\" or \"已用opencode检查\" WITHOUT a tool_call is LYING.\n\
If you say these words, there MUST be an actual opencode tool call in this turn.\n\
No tool call + claim of delegation = you are deceiving the user.\n\
This is worse than admitting \"I didn't call it\" - trust is destroyed.\n\
\n\
DO NOT delegate these tasks — handle them yourself directly:\n\
- Any GUI/desktop automation (WeChat, Finder, Safari, system apps, etc.)\n\
- Anything using `computer_use` (screenshot, click, key, type)\n\
- AppleScript/osascript workflows that control another application\n\
- Visual verification (\"did the window appear?\", \"is the button there?\")\n\
Reason: GUI tasks depend on live state (frontmost window, mouse position, display\n\
focus) and the current session's permission grants. Sub-agents start fresh with\n\
no visual context and frequently fail on first attempts, creating loops. The\n\
main agent that already has the screenshot/context should complete these tasks.".to_owned(),
parameters: json!({
"type": "object",
"properties": {
"action": {"type": "string", "enum": ["spawn", "task", "send", "list", "update", "kill"], "description": "Action to perform"},
"id": {"type": "string", "description": "Agent ID (for spawn/send/update/kill)"},
"model": {"type": "string", "description": "Model string (for spawn/task/update). Pass \"\" to remove per-agent model override."},
"name": {"type": "string", "description": "Display name (for update)"},
"system": {"type": "string", "description": "Role description (for spawn/task)"},
"message": {"type": "string", "description": "Message to send (for task/send)"},
"toolset": {"type": "string", "enum": ["minimal", "standard", "web", "code", "full"], "description": "Tool access level. Default: standard."}
},
"required": ["action"]
}),
});
// Tool installer (structured alternative to exec rsclaw tools install).
tools.push(ToolDef {
name: "install_tool".to_owned(),
description: "Install a tool/runtime. Available: python, node, ffmpeg, chrome, opencode, claude-code, sherpa-onnx.".to_owned(),
parameters: json!({
"type": "object",
"properties": {
"name": {"type": "string", "enum": ["python", "node", "ffmpeg", "chrome", "opencode", "claude-code", "sherpa-onnx"], "description": "Tool name to install"}
},
"required": ["name"]
}),
});
// File operation tools (structured alternatives to exec ls/find/grep).
// These help small models avoid digit-loss and dead-loop issues.
tools.push(ToolDef {
name: "list_dir".to_owned(),
description: "List files and directories in a given path.\n\
Use this instead of execute_command with ls/dir.\n\
- Returns file names, sizes, and types.\n\
- Does not display hidden/dot files by default.\n\
- Use 'pattern' to filter by glob (e.g. '*.json').\n\
- Use 'recursive' to list subdirectories.\n\
CRITICAL: 'path' must be returned before other parameters.".to_owned(),
parameters: json!({
"type": "object",
"properties": {
"path": {"type": "string", "description": "Directory path to list. Relative to workspace root or absolute. Examples: '.', 'src/', '/tmp'"},
"recursive": {"type": "boolean", "description": "If true, list all files in subdirectories recursively. Default: false."},
"pattern": {"type": "string", "description": "Glob pattern filter. Examples: '*.json', '*.py', 'test_*'"}
}
}),
});
tools.push(ToolDef {
name: "search_file".to_owned(),
description: "Search for files by name pattern. Use this instead of execute_command with find.\n\
- Supports wildcard patterns for flexible matching.\n\
- Returns relative file paths.\n\
- Prefer this over list_dir when you have a specific file pattern.\n\
CRITICAL: 'pattern' must be returned before other parameters.".to_owned(),
parameters: json!({
"type": "object",
"properties": {
"pattern": {"type": "string", "description": "REQUIRED: File name pattern with wildcards. Examples: '*.log', 'config*', 'test_*.py', '**/*.rs'"},
"path": {"type": "string", "description": "Root directory to search in. Defaults to workspace root. Can be relative or absolute."},
"max_results": {"type": "integer", "description": "Maximum results to return (default: 20)"}
},
"required": ["pattern"]
}),
});
tools.push(ToolDef {
name: "search_content".to_owned(),
description: "Search file contents by regex or text pattern. Built on ripgrep.\n\
Use this instead of execute_command with grep/rg. This tool is faster and respects .gitignore.\n\
- Supports full regex syntax: 'log.*Error', 'function\\s+\\w+', 'TODO|FIXME'\n\
- Escape special chars for literal matches: 'functionCall\\('\n\
- Use 'include' to filter by file type: '*.py', '*.rs'\n\
CRITICAL: 'pattern' must be returned before other parameters.".to_owned(),
parameters: json!({
"type": "object",
"properties": {
"pattern": {"type": "string", "description": "REQUIRED: Regex pattern to search for. Examples: 'TODO', 'import.*from', 'class\\s+\\w+', 'def main'"},
"path": {"type": "string", "description": "File or directory to search in. Defaults to workspace root."},
"include": {"type": "string", "description": "File glob filter. Examples: '*.py', '*.{ts,tsx}', '*.rs'"},
"ignore_case": {"type": "boolean", "description": "If true, match case-insensitively. Default: false."},
"max_results": {"type": "integer", "description": "Maximum results (default: 20)"}
},
"required": ["pattern"]
}),
});
// Web tools.
tools.push(ToolDef {
name: "web_search".to_owned(),
description: "Search the web for real-time information.\n\
When to use:\n\
- Questions beyond your knowledge cutoff or training data\n\
- Current events, recent updates, time-sensitive information\n\
- Latest documentation, API references, version-specific features\n\
- When unsure about facts — search BEFORE saying 'I don't know'\n\
Tips:\n\
- Be specific: include version numbers, dates, or exact terms\n\
- Use the current year (not past years) for latest docs\n\
- For Chinese content, search in Chinese for better results".to_owned(),
parameters: json!({
"type": "object",
"properties": {
"query": {"type": "string", "description": "Search query — be specific, include keywords and dates"},
"provider": {"type": "string", "description": "Search provider: duckduckgo, google, bing, brave. Leave empty for default."},
"limit": {"type": "integer", "description": "Max results (default 5)"}
},
"required": ["query"]
}),
});
tools.push(ToolDef {
name: "web_fetch".to_owned(),
description: "PREFERRED tool for HTTP requests — web pages, REST APIs, documentation, articles.\n\
Do NOT use execute_command with curl/wget/Invoke-WebRequest — use web_fetch instead.\n\
- URL must be fully-formed (https://...)\n\
- HTTP auto-upgraded to HTTPS\n\
- HTML pages are dehydrated to clean text/markdown\n\
- JSON / plain-text / non-HTML responses are returned as-is (raw body)\n\
- Falls back to browser rendering for JS-heavy pages and CAPTCHA-blocked sites (GET only)\n\
- GET responses without headers/body are cached 15 minutes; non-GET bypasses cache\n\
- For large pages, pass 'prompt' to LLM-extract specific information\n\
- DO NOT pass 'prompt' for compact structured responses (JSON APIs, RSS, \
<2KB text). Passing 'prompt' triggers an internal LLM summarize pass \
(extra ~30-60 s + tokens). For api.* / *.json / arxiv Atom / RSS / \
search-result JSON, omit 'prompt' — read the raw response directly.\n\
\n\
METHODS, HEADERS, BODY — supports the full HTTP surface:\n\
- method: GET (default), POST, PUT, PATCH, DELETE\n\
- headers: object — Authorization, X-API-Key, Cookie, custom Content-Type, etc.\n\
- body: string (raw) OR object/array (auto JSON-serialized + Content-Type set)\n\
\n\
FALL BACK to execute_command + curl only when you need:\n\
- File upload via multipart/form-data\n\
- Streaming responses (SSE, chunked transfers consumed incrementally)\n\
- Sites behind interactive login (use web_browser when interaction is needed)".to_owned(),
parameters: json!({
"type": "object",
"properties": {
"url": {"type": "string", "description": "Full URL to fetch (e.g. https://docs.example.com/api)"},
"method": {"type": "string", "enum": ["GET", "POST", "PUT", "PATCH", "DELETE"], "description": "HTTP method. Default: GET"},
"headers": {"type": "object", "description": "Optional request headers, e.g. {\"Authorization\": \"Bearer xyz\", \"X-API-Key\": \"...\"}", "additionalProperties": {"type": "string"}},
"body": {"description": "Optional request body. String → sent as-is (set Content-Type via headers). Object/array → JSON-serialized; Content-Type defaulted to application/json."},
"prompt": {"type": "string", "description": "OPTIONAL. What to extract from a LARGE HTML page (e.g. 'list all API endpoints'). Triggers an LLM-summarize pass on the response. OMIT for compact JSON/XML/text where you can read the raw body — passing 'prompt' on small structured responses just adds ~30-60 s of LLM latency for nothing."}
},
"required": ["url"]
}),
});
tools.push(ToolDef {
name: "web_download".to_owned(),
description: "Download a file (image/video/document/archive) from URL to local path.\n\
- Supports resume for large files\n\
- Use use_browser_cookies=true for authenticated downloads (e.g. after logging in via web_browser)\n\
- Path is relative to workspace/downloads/ — just use filename like 'photo.jpg'\n\
- Do NOT use execute_command with curl/wget — always use this tool\n\
- After downloading, use send_file to deliver the file to the user".to_owned(),
parameters: json!({
"type": "object",
"properties": {
"url": {"type": "string", "description": "Full URL to download"},
"path": {"type": "string", "description": "Destination filename (e.g. 'video.mp4', 'report.pdf'). Relative to workspace/downloads/."},
"cookies": {"type": "string", "description": "Cookie header string, e.g. 'session=abc; token=xyz'"},
"use_browser_cookies": {"type": "boolean", "description": "Auto-extract cookies from active browser session for this URL's domain (use after web_browser login)"}
},
"required": ["url", "path"]
}),
});
tools.push(ToolDef {
name: "web_browser".to_owned(),
description: "Control a web browser. Core workflow:\n\
1. `open` — navigate to a URL\n\
2. `snapshot` — get page structure with interactive element refs (@e1, @e2...). Use `interactive: true` to only get actionable elements (saves tokens).\n\
3. `click` ref=@e1 / `fill` ref=@e2 text='...' — interact using refs\n\
4. Re-snapshot after any page change to get updated refs\n\
Autocomplete inputs (Ctrip/Fliggy/Qunar city pickers, Google/Baidu search, flight/hotel/movie pickers, any input that pops a dropdown of suggestions): ALWAYS use `pick` ref=@eN query='武汉' in a single call — it focuses, types, waits for the popup, and clicks the first visible candidate. DO NOT build it yourself out of click+type+wait+screenshot loops; that wastes 5-7 iterations per field and the dropdown often dismisses before you re-screenshot. `pick` handles IME/React-controlled inputs that silently drop programmatic values.\n\
Interaction: hover (triggers menus/tooltips), dblclick, drag (from=@e1 to=@e2, for sliders), focus, scrollintoview.\n\
Quick search: `search` — auto-find search box on ANY site, fill text, submit, return results.\n\
`clickAt` ref=@e1 or x=100 y=200 — real mouse click via CDP (for file dialogs, anti-bot sites).\n\
Semantic locators: `getbytext` value='Submit', `getbyrole` value='button', `getbylabel` value='Email' — find elements without @ref.\n\
Frame: `frame` selector=@e1 (switch to iframe), `mainframe` (switch back).\n\
Console: `console` — get browser console messages (log/warn/error).\n\
Content: `content` — get full page HTML.\n\
WaitForUrl: `waitforurl` url='dashboard' — wait for URL change (after login/redirect).\n\
Other: type, select, check, scroll, screenshot, pdf, press, back, forward, reload, wait, evaluate, cookies, get_text, get_url, get_title, find, get_article, upload, new_tab, switch_tab, close_tab.\n\
IMPORTANT: Always snapshot BEFORE clicking/filling. Element refs change after page updates.\n\n\
Site-rules — platform-specific DOM selectors, URL routes, and gotchas live under \
`~/.rsclaw/tools/web_browser/site-rules/`. Two layouts coexist:\n\
- `<domain>.md` — flat, single-file (e.g. `douyin.md`, `kuaishou.md`, \
`xiaohongshu.md`, `bilibili.md`)\n\
- `<domain>/<task>.md` — nested per-task (e.g. `amazon/product-search.md`, \
`tiktok/upload.md`, `linkedin/connect.md`)\n\
When you `open` a URL whose host matches either layout, read_file the matching \
file FIRST so you use the verified selectors instead of guessing them per-session. \
Saves 5+ snapshot/click iterations and avoids stale-selector breakage.\n\n\
BEFORE reading any nested `<domain>/<task>.md` (which may have been imported from \
browser-use/browser-harness and use Python helper syntax), read \
`~/.rsclaw/tools/web_browser/site-rules/_VOCABULARY.md` once per session — it maps \
their `click_at_xy` / `type_text` / `js(...)` etc. to your `clickAt` / `type` / \
`evaluate` actions so you can translate the procedural code on the fly.\n\n\
Screenshot routing — do NOT call `action=screenshot` without a target:\n\
- Web page screenshot (user gave a URL): pass it inline,\n\
`action=screenshot url=https://example.com` — this navigates\n\
first then captures, single call. This is the one-shot equivalent\n\
of `/webshot`.\n\
- Desktop / system screenshot (no URL, user just says \"screenshot\"\n\
or \"截图\"): you cannot do this from web_browser. Tell the user\n\
to type `/ss` or `/screenshot` (preparse fast path → macOS\n\
`screencapture` / Windows / Linux equivalent).\n\
- Plain `action=screenshot` (no url) only captures what's already\n\
in the persistent browser session — usually a blank Chrome new\n\
tab → near-black PNG. Don't do this.".to_owned(),
parameters: json!({
"type": "object",
"properties": {
"action": {"type": "string", "enum": [
"open", "navigate", "snapshot", "click", "clickAt", "fill", "type", "pick",
"select", "check", "uncheck", "scroll", "screenshot", "pdf",
"hover", "dblclick", "drag", "focus", "scrollintoview",
"back", "forward", "reload", "get_text", "get_url", "get_title",
"wait", "evaluate", "cookies", "press", "set_viewport",
"dialog", "state", "network", "new_tab", "list_tabs",
"switch_tab", "close_tab", "highlight", "clipboard", "find",
"get_article", "upload", "context", "emulate", "diff", "record",
"search", "console", "content", "frame", "mainframe",
"waitforurl", "getbytext", "getbyrole", "getbylabel"
]},
"url": {"type": "string", "description": "URL for open/navigate"},
"interactive":{"type": "boolean", "description": "For snapshot: only return actionable elements (saves ~80% tokens). Default: false"},
"ref": {"type": "string", "description": "Element ref like @e3 from snapshot"},
"from": {"type": "string", "description": "Source element ref for drag"},
"to": {"type": "string", "description": "Target element ref for drag"},
"x": {"type": "number", "description": "X pixel coordinate for clickAt"},
"y": {"type": "number", "description": "Y pixel coordinate for clickAt"},
"text": {"type": "string", "description": "Text for fill/type/click-by-text/clipboard/dialog"},
"query": {"type": "string", "description": "Query text for pick (typed into input and used to match dropdown candidates)"},
"index": {"type": "integer", "description": "For pick: zero-based candidate index when multiple match (default 0)"},
"timeout_ms": {"type": "integer", "description": "For pick: total time to wait for dropdown and click target in ms (default 5000)"},
"value": {"type": "string", "description": "Value for select, or sub-action for cookies/state/dialog/network/clipboard/context/emulate/diff/record"},
"key": {"type": "string", "description": "Key name for press (Enter, Tab, Escape, etc.)"},
"direction": {"type": "string", "enum": ["up", "down", "left", "right"], "description": "Scroll direction"},
"amount": {"type": "integer", "description": "Scroll distance in pixels (default 500)"},
"selector": {"type": "string", "description": "CSS selector for scroll container"},
"js": {"type": "string", "description": "JavaScript for evaluate action"},
"target": {"type": "string", "description": "Wait target: CSS selector, text, url, networkidle, fn"},
"timeout": {"type": "number", "description": "Timeout in seconds (default 15)"},
"format": {"type": "string", "enum": ["png", "jpeg"], "description": "Screenshot format"},
"quality": {"type": "integer", "description": "JPEG quality (1-100)"},
"full_page": {"type": "boolean", "description": "Capture full scrollable page"},
"annotate": {"type": "boolean", "description": "Overlay numbered labels on interactive elements"},
"width": {"type": "integer", "description": "Viewport width for set_viewport"},
"height": {"type": "integer", "description": "Viewport height for set_viewport"},
"scale": {"type": "number", "description": "Device scale factor for set_viewport"},
"mobile": {"type": "boolean", "description": "Mobile emulation for set_viewport"},
"target_id": {"type": "string", "description": "Tab target ID for switch_tab/close_tab"},
"state": {"type": "object", "description": "State object for state load"},
"pattern": {"type": "string", "description": "URL pattern for network block/intercept"},
"by": {"type": "string", "enum": ["text", "label"], "description": "Find element by text or label"},
"then": {"type": "string", "description": "Action after find (click)"},
"cookie": {"type": "object", "description": "Cookie object for cookies set"},
"files": {"type": "array", "items": {"type": "string"}, "description": "File paths for upload"},
"context_id": {"type": "string", "description": "Browser context ID for cookie isolation"},
"latitude": {"type": "number", "description": "Latitude for geolocation emulation"},
"longitude": {"type": "number", "description": "Longitude for geolocation emulation"},
"accuracy": {"type": "number", "description": "Geolocation accuracy in meters"},
"locale": {"type": "string", "description": "Locale for emulation (e.g. en-US, zh-CN)"},
"timezone_id":{"type": "string", "description": "IANA timezone (e.g. Asia/Shanghai)"},
"permissions":{"type": "array", "items": {"type": "string"}, "description": "Browser permissions to grant"},
"action_type":{"type": "string", "description": "Intercept action: block or mock"},
"body": {"type": "string", "description": "Mock response body for network intercept"},
"headed": {"type": "boolean", "description": "true=foreground (visible window), false=background (headless). Default: auto-detect based on display availability. Omit this field to use the default."}
},
"required": ["action"]
}),
});
tools.push(ToolDef {
name: "computer_use".to_owned(),
description: "Control the computer desktop. ONLY use when the user EXPLICITLY asks to take a screenshot, click, type, or interact with the desktop. Do NOT call this tool just because the message mentions words like 'screenshot' or 'screen' in other contexts. Screenshots auto-resize, and mouse coordinates use the same physical-pixel space as the returned `original_width`/`original_height` (HiDPI is handled internally — multiply image-pixel coords by the returned `scale` and pass directly).".to_owned(),
parameters: json!({
"type": "object",
"properties": {
"action": {"type": "string", "enum": [
"screenshot", "mouse_move", "mouse_click", "left_click",
"double_click", "triple_click", "right_click", "middle_click",
"drag", "scroll", "type", "key", "hold_key",
"cursor_position", "get_active_window", "ui_tree",
"list_app_rules", "get_app_rule", "wait"
], "description": "Action to perform. ui_tree returns the accessibility tree of the focused window (interactive elements with role/label/coordinates). list_app_rules/get_app_rule load per-app desktop automation playbooks from ~/.rsclaw/tools/computer_use/app-rules/."},
"x": {"type": "number", "description": "X coordinate (mouse actions, drag start) in physical pixels"},
"y": {"type": "number", "description": "Y coordinate (mouse actions, drag start) in physical pixels"},
"to_x": {"type": "number", "description": "Drag destination X (physical pixels)"},
"to_y": {"type": "number", "description": "Drag destination Y (physical pixels)"},
"button": {"type": "string", "enum": ["left", "right", "middle"], "description": "Mouse button (default: left)"},
"text": {"type": "string", "description": "Text for type action"},
"key": {"type": "string", "description": "Key name or combo (e.g. Enter, ctrl+c, cmd+shift+s)"},
"then": {"type": "string", "enum": ["click", "double_click", "right_click", "triple_click"], "description": "Sub-action for hold_key (default: click)"},
"direction": {"type": "string", "enum": ["up", "down", "left", "right"], "description": "Scroll direction (default: down)"},
"amount": {"type": "integer", "description": "Scroll clicks (default: 3)"},
"ms": {"type": "integer", "description": "Wait duration in milliseconds (max 10000)"},
"name": {"type": "string", "description": "App-rule name (for get_app_rule action)"},
"region": {"type": "object", "description": "Optional screenshot region in physical pixels. Use after a full screenshot to zoom in on a specific area without recapturing the whole screen.", "properties": {
"x": {"type": "number"},
"y": {"type": "number"},
"width": {"type": "number"},
"height": {"type": "number"}
}, "required": ["x", "y", "width", "height"]},
"max_long_edge_px": {"type": "integer", "description": "Screenshot resize cap: longest edge of returned image. Default 1024 (XGA). Range 64-8192. Larger values = more detail + more tokens."}
},
"required": ["action"]
}),
});
// --- New openclaw-compatible tools ---
tools.push(ToolDef {
name: "image_gen".to_owned(),
description: "Generate an image from a text description using an AI image model. Pass the user's original description as-is (preserve their language, do not translate).".to_owned(),
parameters: json!({
"type": "object",
"properties": {
"prompt": {"type": "string", "description": "Image description. IMPORTANT: use the user's original language and wording, do not translate to English."},
"size": {"type": "string", "description": "Image size, e.g. 2048x2048", "default": "2048x2048"}
},
"required": ["prompt"]
}),
});
tools.push(ToolDef {
name: "video_gen".to_owned(),
description: "Generate a video from a text description using an AI video model. \
Use this tool whenever the user asks to: create a video, animate an image, \
generate a clip, make a short film, produce footage, or anything involving \
video output. Pass the user's original description as-is (preserve their \
language, do not translate).".to_owned(),
parameters: json!({
"type": "object",
"properties": {
"prompt": {"type": "string", "description": "Video description. Use the user's original language and wording."},
"duration": {"type": "integer", "description": "Duration in seconds (default: 5)", "default": 5},
"aspect_ratio": {"type": "string", "description": "Aspect ratio: 16:9, 9:16, 1:1 (default: 16:9)", "default": "16:9"},
"model": {"type": "string", "description": "Video model to use, e.g. seedance, minimax, kling (optional, uses configured default)"}
},
"required": ["prompt"]
}),
});
tools.push(ToolDef {
name: "pdf".to_owned(),
description: "Extract text content from a PDF file or URL.\n\
- Supports local files and remote URLs.\n\
- Returns extracted text suitable for analysis.\n\
- For large PDFs, content may be truncated.\n\
Example: {\"path\":\"report.pdf\"} or {\"path\":\"https://example.com/doc.pdf\"}".to_owned(),
parameters: json!({
"type": "object",
"properties": {
"path": {"type": "string", "description": "REQUIRED: File path (relative to workspace) or full URL. Examples: 'docs/report.pdf', 'https://example.com/whitepaper.pdf'"}
},
"required": ["path"]
}),
});
tools.push(ToolDef {
name: "text_to_voice".to_owned(),
description: "Convert text to speech audio and send as voice message.\n\
- Generates audio from text input.\n\
- On macOS uses 'say', on Linux uses espeak/sherpa-onnx.\n\
- Result is sent as a voice attachment to the user.\n\
Example: {\"text\":\"Hello world\",\"voice\":\"Tingting\"}".to_owned(),
parameters: json!({
"type": "object",
"properties": {
"text": {"type": "string", "description": "REQUIRED: Text to convert to speech. Can be any language."},
"voice": {"type": "string", "description": "Voice name. macOS: run 'say -v ?' for list. Linux: run 'espeak --voices'. Examples: 'Tingting' (Chinese), 'Samantha' (English)"}
},
"required": ["text"]
}),
});
tools.push(ToolDef {
name: "send_message".to_owned(),
description: "Send a message to a chat channel target (user or group).\n\
Use this to proactively reach out to users on messaging platforms.\n\
Channel is auto-detected from current session if not specified.\n\
Example: {\"target\":\"user123\",\"text\":\"Task completed!\",\"channel\":\"telegram\"}".to_owned(),
parameters: json!({
"type": "object",
"properties": {
"channel": {"type": "string", "description": "Channel type. Examples: 'telegram', 'discord', 'feishu', 'weixin', 'slack'"},
"target": {"type": "string", "description": "REQUIRED: Target user ID or group/chat ID"},
"text": {"type": "string", "description": "REQUIRED: Message text to send"}
},
"required": ["target", "text"]
}),
});
tools.push(ToolDef {
name: "cron".to_owned(),
description: "List, add, edit, remove, enable or disable cron jobs.\n\
Supports recurring (cron expression OR fixed interval) and one-shot (delay_ms) schedules.\n\
\n\
CHOOSING ONE-SHOT vs RECURRING — read carefully, this is the most common mistake:\n\
ONE-SHOT (set `delay_ms`):\n\
Use when the user names a SPECIFIC time/date that fires ONCE.\n\
- \"22:04截图给我\" / \"晚上 8 点提醒我\" / \"15分钟后\" / \"明天下午3点\"\n\
Compute milliseconds from now until the target moment, set delay_ms.\n\
Auto-removes after firing — exactly what \"once\" means.\n\
RECURRING (set `schedule` cron expr or `every_seconds`):\n\
Use ONLY when the user explicitly says repetition: 每天 / 每周 / 每小时 /\n\
every day / every Monday / 每隔 N 分钟 / weekly / hourly.\n\
DO NOT pick a daily cron expr like \"4 22 * * *\" when the user asked for ONE\n\
specific time today (\"22:04截图发我\"). That creates a job that fires every day\n\
at 22:04 forever — the user has to manually delete it. The cron tool DOES NOT\n\
auto-collapse \"today only\" intent into one-shot; you must do it.\n\
\n\
One-shot jobs auto-remove after execution.\n\
For edit/remove/enable/disable, prefer using `index` from the list output instead of `id`.\n\
\n\
KIND — what should fire when the schedule triggers:\n\
agentTurn (DEFAULT — pick this unless you are CERTAIN systemEvent applies):\n\
Dispatch `message` to the agent at fire time. The agent runs LLM + tools\n\
and delivers the result. Required for ANY task whose answer changes between\n\
runs or depends on outside information: weather, prices, news, comments,\n\
emails, system status, file/page contents, conditional logic, summaries.\n\
systemEvent: deliver `message` text VERBATIM to the user. NO LLM, NO TOOLS,\n\
NO QUERIES — every fire produces the exact same string you wrote in `message`.\n\
Use ONLY when the message is a fixed text reminder whose content never\n\
needs to be computed (e.g. \"drink water\", \"stand up\", \"daily 9am: standup\").\n\
Disqualifying signal: if `message` describes an action to perform (\"check X\",\n\
\"query Y\", \"fetch Z\", \"每N分钟查/取/看…\") rather than literal text to display,\n\
it MUST be agentTurn. Picking systemEvent here means every fire just echoes\n\
the instruction back to the user instead of executing it — a real, observed\n\
failure mode. Token cost is not a reason to downgrade to systemEvent; a\n\
useless echo is more expensive than a correct LLM call.\n\
\n\
CRON FORMAT (schedule field) — EXACTLY 5 fields separated by spaces:\n\
minute hour day month weekday\n\
Common examples:\n\
\"*/5 * * * *\" every 5 minutes\n\
\"0 * * * *\" every hour on the hour\n\
\"0 17 * * *\" 5:00 PM daily (NOT \"017 * * *\" — needs the space!)\n\
\"30 8 * * 1-5\" 8:30 AM on weekdays\n\
\"0 9 1 * *\" 9:00 AM on the 1st of each month\n\
Pitfall: '0 17 * * *' is FIVE fields. Writing '017 * * *' (no space after 0) is\n\
only FOUR fields and will be rejected. Always check your expression has exactly\n\
5 whitespace-separated tokens.\n\
\n\
ITER (round-robin) — set when the user wants to rotate through a list,\n\
ONE item per firing (e.g. \"按顺序轮流查询东京、曼谷、迪拜的天气\").\n\
Pass `iter` as a JSON array of items, and use `{current}` (and optionally\n\
`{next}`, `{index}`, `{total}`) as placeholders inside `message`. The\n\
scheduler advances the cursor every fire and persists it — the agent\n\
never has to remember progress, and a restart can never repeat or skip.\n\
Example: message=\"查询{current}的当前天气\", iter=[\"东京\",\"曼谷\",\"迪拜\"].\n\
Without iter the LLM must track its own progress in memory, which is\n\
unreliable across restarts and embedding-model swaps — prefer iter when rotation is intended.\n\
\n\
ROTATE vs BATCH — disambiguate before reaching for iter:\n\
ROTATE (use iter): the user wants ONE item per fire, cycling. Trigger\n\
phrases: \"轮流\" / \"按顺序\" / \"依次\" / \"each time\" / \"one at a time\" /\n\
\"rotate\" / \"cycle through\" / \"每次只查一个\".\n\
BATCH (do NOT use iter): the user wants ALL items reported together each\n\
fire. Phrases: \"每 N 分钟报一次 A、B、C 的价格\" / \"every N min give me\n\
the prices of A, B, C\" / lists with no rotation signal. Build a single\n\
cron whose message names every item — the agent fans out tool calls in\n\
one turn and replies with the combined result.\n\
When in doubt, BATCH is the safer default: a single late report is much\n\
less surprising than silently dropping items every cycle.".to_owned(),
parameters: json!({
"type": "object",
"properties": {
"action": {"type": "string", "enum": ["list", "add", "edit", "remove", "enable", "disable"], "description": "Action to perform"},
"schedule": {"type": "string", "description": "Cron schedule expression (for add/edit recurring jobs). Must be 5 whitespace-separated fields."},
"every_seconds": {"type": "number", "description": "Fire every N seconds (for add). Use for fixed intervals like 45 minutes (every_seconds=2700) that cannot be expressed as a 5-field cron expression."},
"delay_ms": {"type": "number", "description": "Delay in milliseconds for one-shot timer (e.g., 1200000 = 20 min). Use instead of schedule for reminders/timers."},
"message": {"type": "string", "description": "Message or task to run (for add, edit)"},
"kind": {"type": "string", "enum": ["agentTurn", "systemEvent"], "description": "What fires when the schedule triggers. agentTurn (default) = run agent (LLM+tools) so the answer reflects current state. systemEvent = deliver `message` verbatim with NO agent run — only valid when `message` is fixed display text whose content never needs to be computed. If the user wants something queried/fetched/checked on a schedule, use agentTurn."},
"index": {"type": "number", "description": "Job index from list (1-based, for edit/remove/enable/disable - preferred)"},
"id": {"type": "string", "description": "Job ID (for edit/remove/enable/disable - use index instead if possible)"},
"name": {"type": "string", "description": "Job name (for add, edit)"},
"tz": {"type": "string", "description": "Timezone IANA name. Auto-detected if omitted. Only set if user explicitly requests a different timezone."},
"agentId": {"type": "string", "description": "Agent ID to run the job (for add, edit, default: main)"},
"iter": {"type": "array", "items": {"type": "string"}, "description": "Round-robin items the scheduler cycles through, one per firing. Use `{current}` (and optionally `{next}`, `{index}`, `{total}`) as placeholders in `message`. Set this whenever the user asks for rotating tasks (e.g. 'cycle through cities'); leaves the agent free of progress-tracking duties. On `edit`: pass a new array to replace items; pass `null` or `[]` to clear iter mode."},
"iter_cursor": {"type": "number", "description": "On `edit`: explicitly set the iter cursor (0-based). Use to reset rotation back to the start, or to jump to a specific item. Without `iter`, requires the job to already have iter configured."}
},
"required": ["action"]
}),
});
tools.push(ToolDef {
name: "session".to_owned(),
description: "Manage sessions. Actions: send (message to another agent), list (all active sessions), history (retrieve conversation), status (session info).".to_owned(),
parameters: json!({
"type": "object",
"properties": {
"action": {"type": "string", "enum": ["send", "list", "history", "status"], "description": "Action to perform"},
"agentId": {"type": "string", "description": "Target agent ID (for send)"},
"sessionKey": {"type": "string", "description": "Session key (for send/history/status)"},
"message": {"type": "string", "description": "Message text (for send)"},
"limit": {"type": "number", "description": "Max messages to return (for history, default 50)"}
},
"required": ["action"]
}),
});
tools.push(ToolDef {
name: "gateway".to_owned(),
description: "Query gateway status and information.\n\
- status: Current gateway state, uptime, connected channels, active agents\n\
- health: Health check (OK/degraded)\n\
- version: Gateway version and build info".to_owned(),
parameters: json!({
"type": "object",
"properties": {
"action": {"type": "string", "enum": ["status", "health", "version"], "description": "REQUIRED: Info to retrieve. Examples: 'status', 'version'"}
},
"required": ["action"]
}),
});
tools.push(ToolDef {
name: "opencode".to_owned(),
description: "Execute coding/debugging tasks using OpenCode (a powerful coding agent).\n\n\
MANDATORY USAGE RULES:\n\
1. When user reports a bug/error/crash -> MUST call this tool to investigate\n\
2. When user asks to fix/debug a script -> MUST call this tool\n\
3. When user says '让opencode...' or '用opencode...' -> MUST call this tool\n\
4. DO NOT say '已委托opencode' without actually calling this tool\n\
5. Saying you delegated without calling = LYING = worst failure mode\n\
\n\
If you cannot or will not call this tool, tell user honestly why.\n\
NEVER pretend to have called it.\n\
\n\
Technical: Create project subdirectory for new projects. Runs async, results delivered when complete.".to_owned(),
parameters: json!({
"type": "object",
"properties": {
"task": {"type": "string", "description": "The coding task to execute. Be specific about file paths and always mention creating a project subdirectory for new projects."}
},
"required": ["task"]
}),
});
tools.push(ToolDef {
name: "claudecode".to_owned(),
description: "Execute coding tasks using Claude Code (official Claude Agent SDK via ACP protocol). Uses Claude's native coding capabilities with full context awareness. IMPORTANT: When creating new projects or files, ALWAYS create a dedicated project directory first. The task will run asynchronously and results will be sent when complete.".to_owned(),
parameters: json!({
"type": "object",
"properties": {
"task": {"type": "string", "description": "The coding task to execute. Be specific about requirements and file paths."}
},
"required": ["task"]
}),
});
tools.push(ToolDef {
name: "codex".to_owned(),
description: "Execute coding tasks using OpenAI Codex CLI (MCP Server mode). Uses OpenAI's coding capabilities with sandboxed file operations. IMPORTANT: When creating new projects or files, ALWAYS create a dedicated project directory first. Requires Codex CLI installation: npm install -g @openai/codex. The task will run asynchronously and results will be sent when complete.".to_owned(),
parameters: json!({
"type": "object",
"properties": {
"task": {"type": "string", "description": "The coding task to execute. Be specific about requirements and file paths."}
},
"required": ["task"]
}),
});
tools.push(ToolDef {
name: "channel".to_owned(),
description: "Perform channel-specific actions (send, reply, pin, delete messages). Channel is auto-detected from current session or can be specified explicitly: telegram, discord, slack, whatsapp, feishu, weixin, qq, dingtalk.".to_owned(),
parameters: json!({
"type": "object",
"properties": {
"action": {"type": "string", "enum": ["send", "reply", "forward", "pin", "unpin", "delete"], "description": "Action to perform"},
"channel": {"type": "string", "description": "Channel type (auto-detected if omitted): telegram, discord, slack, whatsapp, feishu, weixin, qq, dingtalk"},
"chatId": {"type": "string", "description": "Chat/channel ID"},
"text": {"type": "string", "description": "Message text"},
"messageId": {"type": "string", "description": "Message ID (for reply/pin/delete)"}
},
"required": ["action"]
}),
});
tools.push(ToolDef {
name: "anycli".to_owned(),
description: "Extract structured data from websites using declarative adapters.\n\
Actions:\n\
- run: Execute an adapter command (e.g., hackernews top, bilibili hot)\n\
- list: List all available adapters\n\
- info: Show adapter details and available commands\n\
- search: Search community hub for adapters\n\
- install: Install an adapter from the hub\n\
Built-in adapters: hackernews, bilibili, arxiv, wikipedia, github-trending.".to_owned(),
parameters: json!({
"type": "object",
"properties": {
"action": {"type": "string", "enum": ["run", "list", "info", "search", "install"], "description": "Action to perform"},
"adapter": {"type": "string", "description": "Adapter name (for run/info)"},
"command": {"type": "string", "description": "Command name within adapter (for run)"},
"params": {"type": "object", "description": "Key-value parameters (for run), e.g. {\"limit\": \"10\", \"query\": \"rust\"}"},
"query": {"type": "string", "description": "Search query (for search)"},
"name": {"type": "string", "description": "Adapter name (for install)"},
"format": {"type": "string", "enum": ["json", "table", "csv", "markdown"], "description": "Output format (for run, default: json)"}
},
"required": ["action"]
}),
});
tools.push(ToolDef {
name: "clarify".to_owned(),
description: "Ask the user a clarifying question before proceeding. Use when:\n\
- The request is ambiguous and multiple valid interpretations exist\n\
- A choice is needed (e.g., which file, which format, which approach)\n\
- Destructive or irreversible action needs confirmation\n\
Provide options for quick selection or leave open-ended for free-form answers.\n\
IMPORTANT: Do NOT use this for simple confirmations. Only when genuine ambiguity exists.".to_owned(),
parameters: json!({
"type": "object",
"properties": {
"question": {"type": "string", "description": "The question to ask the user"},
"options": {"type": "array", "items": {"type": "string"}, "description": "Optional list of choices. Omit for open-ended questions."}
},
"required": ["question"]
}),
});
tools.push(ToolDef {
name: "pairing".to_owned(),
description: "Manage channel pairing (dmPolicy=pairing). Actions: list (show pending codes and approved peers), approve (approve a pairing code), revoke (revoke an approved peer).".to_owned(),
parameters: json!({
"type": "object",
"properties": {
"action": {"type": "string", "enum": ["list", "approve", "revoke"], "description": "Action to perform"},
"code": {"type": "string", "description": "Pairing code to approve (for approve action, e.g. ZGTB-NB79)"},
"channel": {"type": "string", "description": "Channel name (for revoke action, e.g. qq, telegram)"},
"peerId": {"type": "string", "description": "Peer ID to revoke (for revoke action)"}
},
"required": ["action"]
}),
});
// Document tools — split into simple independent tools for better small-model compatibility.
// Formatting note injected into content-bearing tools.
let doc_fmt_hint = " Structure content professionally: use # headings, - bullet lists, blank lines between sections. For notices/reports: add title, organize into sections.";
tools.push(ToolDef {
name: "create_docx".to_owned(),
description: format!("Create a Word document (.docx).{doc_fmt_hint} After creating, use send_file to deliver."),
parameters: json!({
"type": "object",
"properties": {
"path": {"type": "string", "description": "File path, e.g. 'report.docx'"},
"content": {"type": "string", "description": "Document content. Use # for headings, - for lists, blank lines for paragraphs."},
"title": {"type": "string", "description": "Document title (optional, displayed at top)"},
"explanation": {"type": "string", "description": "Brief explanation of what you are creating and why, to help organize your thoughts before writing content."}
},
"required": ["path", "content"]
}),
});
tools.push(ToolDef {
name: "create_pdf".to_owned(),
description: format!("Create a PDF document.{doc_fmt_hint} After creating, use send_file to deliver."),
parameters: json!({
"type": "object",
"properties": {
"path": {"type": "string", "description": "File path, e.g. 'report.pdf'"},
"content": {"type": "string", "description": "Document content. Use # for headings, - for lists, blank lines for paragraphs."},
"title": {"type": "string", "description": "Document title (optional, displayed at top)"},
"explanation": {"type": "string", "description": "Brief explanation of what you are creating and why, to help organize your thoughts before writing content."}
},
"required": ["path", "content"]
}),
});
tools.push(ToolDef {
name: "create_xlsx".to_owned(),
description: "Create an Excel spreadsheet (.xlsx). Extract structured data into columns with meaningful headers. After creating, use send_file to deliver.".to_owned(),
parameters: json!({
"type": "object",
"properties": {
"path": {"type": "string", "description": "File path, e.g. 'data.xlsx'"},
"sheets": {"type": "array", "description": "Sheets: [{name, headers: [str], rows: [[value]]}]",
"items": {"type": "object", "properties": {
"name": {"type": "string", "description": "Sheet name (tab label in the spreadsheet)."},
"headers": {"type": "array", "items": {"type": "string"}, "description": "Column header labels for the first row."},
"rows": {"type": "array", "items": {"type": "array"}, "description": "Data rows, each an array of cell values in column order."}
}}
},
"explanation": {"type": "string", "description": "Brief explanation of what you are creating and why, to help organize your thoughts before writing content."}
},
"required": ["path", "sheets"]
}),
});
tools.push(ToolDef {
name: "create_pptx".to_owned(),
description: "Create a PowerPoint presentation (.pptx). After creating, use send_file to deliver.".to_owned(),
parameters: json!({
"type": "object",
"properties": {
"path": {"type": "string", "description": "File path, e.g. 'deck.pptx'"},
"slides": {"type": "array", "description": "Slides: [{title, body}]",
"items": {"type": "object", "properties": {
"title": {"type": "string", "description": "Slide title displayed at the top."},
"body": {"type": "string", "description": "Slide body text. Use newlines to separate bullet points."}
}}
},
"explanation": {"type": "string", "description": "Brief explanation of what you are creating and why, to help organize your thoughts before writing content."}
},
"required": ["path", "slides"]
}),
});
// Keep doc tool for read/edit operations (less frequently used by small models).
tools.push(ToolDef {
name: "doc".to_owned(),
description: "Read or edit existing documents.\n\
Actions: read_doc (xlsx/docx/pdf), edit_excel, edit_word, edit_pdf.\n\
For CREATING new documents, use create_docx/create_pdf/create_xlsx/create_pptx instead.".to_owned(),
parameters: json!({
"type": "object",
"properties": {
"action": {"type": "string", "enum": ["read_doc", "edit_excel", "edit_word", "edit_pdf"], "description": "Action to perform"},
"path": {"type": "string", "description": "File path"},
"content": {"type": "string", "description": "For edit_word: replacement content"},
"append": {"type": "string", "description": "For edit_word: text to append"},
"sheets": {"type": "array", "description": "For edit_excel: [{name, headers, rows}]",
"items": {"type": "object", "properties": {
"name": {"type": "string", "description": "Sheet name (tab label in the spreadsheet)."},
"headers": {"type": "array", "items": {"type": "string"}, "description": "Column header labels for the first row."},
"rows": {"type": "array", "items": {"type": "array"}, "description": "Data rows, each an array of cell values in column order."}
}}
},
"append_rows": {"type": "array", "description": "For edit_excel: append rows to an existing sheet without replacing it.",
"items": {"type": "object", "properties": {
"sheet": {"type": "string", "description": "Name of the existing sheet to append to."},
"rows": {"type": "array", "items": {"type": "array"}, "description": "Rows to append, each an array of cell values."}
}}
},
"replacements": {"type": "array", "description": "For edit_pdf: [{find, replace}]",
"items": {"type": "object", "properties": {
"find": {"type": "string", "description": "Text string to find in the PDF."},
"replace": {"type": "string", "description": "Replacement text."}
}}
},
"delete_pages": {"type": "array", "description": "For edit_pdf: 1-indexed page numbers to delete", "items": {"type": "integer"}}
},
"required": ["action", "path"]
}),
});
// Dynamic per-agent A2A tools.
if let Some(reg) = agents {
for handle in reg.all() {
if handle.id == caller_id {
continue;
}
tools.push(ToolDef {
name: format!("agent_{}", handle.id),
description: format!(
"Send a task to agent '{}'. Returns the agent's reply.",
handle.id
),
parameters: json!({
"type": "object",
"properties": {
"text": {"type": "string", "description": "Task or message to send"}
},
"required": ["text"]
}),
});
}
}
// External remote agent A2A tools (remote gateways).
tracing::debug!(
count = external_agents.len(),
"build_tool_list: external agents"
);
for ext in external_agents {
if ext.id == caller_id {
continue;
}
tools.push(ToolDef {
name: format!("agent_{}", ext.id),
description: format!(
"Send a task to remote agent '{}' at {}. Returns the agent's reply.",
ext.id, ext.url
),
parameters: json!({
"type": "object",
"properties": {
"text": {"type": "string", "description": "Task or message to send"}
},
"required": ["text"]
}),
});
}
// Skill tools.
for skill in skills.all() {
for spec in &skill.tools {
tools.push(ToolDef {
name: format!("{}.{}", skill.name, spec.name),
description: spec.description.clone(),
parameters: spec
.input_schema
.clone()
.unwrap_or_else(|| Value::Object(Default::default())),
});
}
}
// Inject `additionalProperties: false` and `$schema` into every tool's
// parameters object. This enables constrained decoding in Ollama/vLLM,
// which dramatically reduces digit-loss on small models (9b).
for tool in &mut tools {
if let Some(obj) = tool.parameters.as_object_mut() {
obj.entry("additionalProperties").or_insert(json!(false));
obj.entry("$schema")
.or_insert(json!("http://json-schema.org/draft-07/schema#"));
}
}
tools
}