//! Static tool descriptors returned by `tools/list`.
//!
//! Why: the JSON schemas for all 18 MCP tools are long but mechanical; keeping
//! them in a dedicated file makes it easy to update a single tool's description
//! or input schema without touching any dispatch logic.
//! What: exports `tool_descriptors()` which returns the full `serde_json::Value`
//! array consumed by the `tools/list` handler in `dispatch`.
//! Test: `tools_list_returns_all_tools`, `test_tools_list_response`,
//! `tools_list_returns_five_search_tools`, and
//! `per_lane_tool_descriptions_carry_when_to_use_hooks` in `tests.rs`.
use serde_json::Value;
/// Static metadata for `tools/list`. Keep in sync with `call_tool` in `mod.rs`.
///
/// Why: listing tools separately from their implementation allows the MCP
/// client to introspect the full tool catalogue without triggering any daemon
/// HTTP calls.
/// What: returns a `Value::Array` containing one descriptor object per
/// registered tool. Each object has `name`, `description`, and `inputSchema`.
/// Test: `test_tools_list_response` asserts every required tool is present and
/// carries an `inputSchema`.
pub fn tool_descriptors() -> Value {
serde_json::json!([
// Issue #138 — per-lane MCP tools. Tool descriptions are
// first-class LLM prompts: each one opens with "when to use",
// gives concrete fit/don't-fit examples, states the cost, and
// explains the failure mode (STAGE_NOT_READY). The legacy
// `search` tool is preserved below as a back-compat alias.
{
"name": "search_lexical",
"description": "Find code by exact symbol name, regex, or literal string. Equivalent to a fast ripgrep on the indexed codebase. Use this FIRST for any query where the user mentions a specific identifier (function name, struct name, file name) or a literal phrase. Best for: `apply_archive_downrank`, `pub fn main`, `\"TODO: refactor\"`, filename globs like `*.toml`. Don't use for: conceptual queries like \"how does authentication work\" — use `search_semantic` instead. Always available on any indexed project. Cheapest tool in this family.",
"inputSchema": {
"type": "object",
"required": ["index_id", "query"],
"properties": {
"index_id": { "type": "string", "description": "Target index id (from `list_indexes`)" },
"query": { "type": "string", "description": "Exact symbol, regex, or literal phrase" },
"top_k": { "type": "integer", "default": 10 },
"mode": { "type": "string", "enum": ["code", "text", "data"], "default": "code" },
"exclude_archived": { "type": "boolean", "default": false },
"branch_files": { "type": "array", "items": { "type": "string" } },
"branch_boost": { "type": "number" },
"branch": { "type": "string" }
},
"examples": [
{ "index_id": "trusty-tools", "query": "apply_archive_downrank" },
{ "index_id": "trusty-tools", "query": "pub fn main" },
{ "index_id": "trusty-tools", "query": "TODO: refactor" }
]
}
},
{
"name": "search_semantic",
"description": "Find code by meaning, not by literal text. Uses embedding-based similarity to retrieve chunks that semantically match the query, even when the query words don't appear in the code. Best for: \"code that handles JWT verification\", \"the place that does community detection\", \"how does the embedder batch requests\". Don't use for: exact symbol lookups (use `search_lexical`) or finding callers of a known function (use `search_kg`). Requires Stage 2 (embeddings) to be ready on the index — returns a STAGE_NOT_READY error with a `suggested_tools` retry hint if not. Medium cost.",
"inputSchema": {
"type": "object",
"required": ["index_id", "query"],
"properties": {
"index_id": { "type": "string" },
"query": { "type": "string", "description": "Conceptual query — meaning, not literal text" },
"top_k": { "type": "integer", "default": 10 },
"mode": { "type": "string", "enum": ["code", "text", "data"], "default": "code" },
"exclude_archived": { "type": "boolean", "default": false }
},
"examples": [
{ "index_id": "trusty-tools", "query": "code that handles JWT verification" },
{ "index_id": "trusty-tools", "query": "the place that does community detection" }
]
}
},
{
"name": "search_kg",
"description": "Explore code structure from a known seed — either a chunk_id (from a previous search result) or a symbol name. Returns chunks connected to the seed via `calls`, `called_by`, `contains`, `inherits` edges. Best for: \"what calls `validate_token`\", \"what does `Authenticator` use internally\", impact analysis before a refactor. Don't use for: free-text discovery (use `search_semantic`) or initial entry-point finding (use `search_lexical` first). Requires Stage 3 (symbol graph) to be ready. Returns empty if the seed is not in the index. Cheap once you have a seed. Optional `refine_query`: provide a longer natural-language description to rerank and filter the expanded neighbourhood by semantic relevance — useful when the seed chunk is correct but you want only the most relevant callers/callees (issue #147).",
"inputSchema": {
"type": "object",
"required": ["index_id", "query"],
"properties": {
"index_id": { "type": "string" },
"query": { "type": "string", "description": "Seed: a symbol name or chunk_id from a previous result" },
"top_k": { "type": "integer", "default": 10 },
"mode": { "type": "string", "enum": ["code", "text", "data"], "default": "code" },
"refine_query": { "type": "string", "description": "Optional: rerank and filter expanded KG neighbours by cosine similarity to this natural-language description. Neighbours below the 0.4 cosine threshold are dropped. Omit to use default KG expansion without filtering." }
},
"examples": [
{ "index_id": "trusty-tools", "query": "validate_token" },
{ "index_id": "trusty-tools", "query": "Authenticator", "refine_query": "callers that handle token refresh in the auth middleware" }
]
}
},
{
"name": "search_all",
"description": "When in doubt, use this. Runs the full hybrid pipeline (lexical + semantic + KG expansion) and merges results via RRF. More expensive than the targeted tools but catches edge cases. Use when: your query has both literal symbols AND conceptual phrasing (\"find the `AuthValidator` that handles refresh tokens\"), or when you've tried the targeted tools and they didn't surface what you need. Always available; gracefully degrades to whatever lanes are ready. When called without `index_id`, falls back to legacy cross-project fan-out behaviour (issue #10) — provide `index_id` for the per-index hybrid path.",
"inputSchema": {
"type": "object",
"required": ["query"],
"properties": {
"index_id": { "type": "string", "description": "Target index (omit for cross-project fan-out)" },
"query": { "type": "string" },
"top_k": { "type": "integer", "default": 10 },
"mode": { "type": "string", "enum": ["code", "text", "data"], "default": "code" },
"exclude_archived": { "type": "boolean", "default": false },
"full_content": { "type": "boolean", "default": false, "description": "Legacy fan-out only: include full chunk content in each hit" },
"branch_files": { "type": "array", "items": { "type": "string" } },
"branch_boost": { "type": "number" },
"branch": { "type": "string" }
},
"examples": [
{ "index_id": "trusty-tools", "query": "AuthValidator that handles refresh tokens" },
{ "query": "global cross-project fan-out without index_id" }
]
}
},
{
"name": "search",
"description": "Unified hybrid search (BM25+vector+KG+RRF) with mode-aware ranking (issue #77). The `mode` parameter (\"code\" | \"text\" | \"data\", default \"code\") picks the file-type penalty matrix: code prefers source (prose 0.1x, data 0.2x); text prefers prose docs (source 0.5x, data 0.3x); data prefers structured data (source 0.3x, prose 0.3x). Set `exclude_archived: true` to drop archived/deprecated/legacy chunks entirely instead of downranking them (issue #74). Supports branch-aware scoring via branch_files/branch_boost/branch (issue #122). Replaces the legacy `search_code` tool name; callers that omit `mode` get identical pre-#77 behaviour.",
"inputSchema": {
"type": "object",
"required": ["index_id", "query"],
"properties": {
"index_id": { "type": "string" },
"query": { "type": "string" },
"top_k": { "type": "integer", "default": 10 },
"mode": {
"type": "string",
"enum": ["code", "text", "data"],
"default": "code",
"description": "Ranking mode: prefer source code, prose docs, or structured data."
},
"exclude_archived": {
"type": "boolean",
"default": false,
"description": "Drop archived/deprecated/legacy chunks (paths like _archive/, archive/, _deprecated/, old/, .archive/; #[deprecated]; .archived/DEPRECATED markers) instead of downranking them."
},
"branch_files": {
"type": "array",
"items": { "type": "string" },
"description": "Files modified on current git branch (relative to index root). Boosted in results."
},
"branch_boost": {
"type": "number",
"description": "Score multiplier for branch files (default 1.5, range 1.0-3.0)."
},
"branch": {
"type": "string",
"description": "Branch name; daemon will compute branch_files via git if branch_files is absent."
}
}
}
},
{
"name": "index_file",
"description": "Add or update one file in an index",
"inputSchema": {
"type": "object",
"required": ["index_id", "path", "content"],
"properties": {
"index_id": { "type": "string" },
"path": { "type": "string" },
"content": { "type": "string" }
}
}
},
{
"name": "remove_file",
"description": "Remove a file's chunks from an index",
"inputSchema": {
"type": "object",
"required": ["index_id", "path"],
"properties": {
"index_id": { "type": "string" },
"path": { "type": "string" }
}
}
},
{
"name": "list_indexes",
"description": "List all registered indexes on this daemon",
"inputSchema": { "type": "object", "properties": {} }
},
{
"name": "create_index",
"description": "Register a new (empty) index",
"inputSchema": {
"type": "object",
"required": ["id", "root_path"],
"properties": {
"id": { "type": "string" },
"root_path": { "type": "string" }
}
}
},
{
"name": "search_similar",
"description": "Find chunks semantically similar to a given file/function via HNSW (issue #31)",
"inputSchema": {
"type": "object",
"required": ["file"],
"properties": {
"file": { "type": "string" },
"function": { "type": "string" },
"top_k": { "type": "number" },
"index": { "type": "string" }
}
}
},
{
"name": "search_health",
"description": "Probe daemon liveness and version",
"inputSchema": { "type": "object", "properties": {} }
},
{
"name": "delete_index",
"description": "Delete a registered index and all its data",
"inputSchema": {
"type": "object",
"required": ["index_id"],
"properties": {
"index_id": { "type": "string" }
}
}
},
{
"name": "reindex",
"description": "Trigger a full reindex of a collection (async, returns immediately)",
"inputSchema": {
"type": "object",
"required": ["index_id"],
"properties": {
"index_id": { "type": "string" },
"root_path": { "type": "string" }
}
}
},
{
"name": "index_status",
"description": "Get stats for an index (chunk count, root path)",
"inputSchema": {
"type": "object",
"required": ["index_id"],
"properties": {
"index_id": { "type": "string" }
}
}
},
{
"name": "list_chunks",
"description": "Paginated enumeration of every chunk in an index (issue #54). \
Two modes: offset/limit (stable order by file, start_line) for \
shallow paging, or cursor paging via `after` (issue #1325) for \
deep/bulk enumeration. Pass the response's `next_cursor` back as \
`after` to fetch the next page in O(page) time (an indexed seek) \
instead of the O(offset) scan that times out on large indexes. \
`next_cursor` is null once the corpus is exhausted.",
"inputSchema": {
"type": "object",
"required": ["index_id"],
"properties": {
"index_id": { "type": "string" },
"offset": { "type": "integer", "default": 0 },
"limit": { "type": "integer", "default": 100 },
"after": { "type": "string", "description": "Forward cursor (a chunk id, typically the previous page's next_cursor). When set, offset is ignored." }
}
}
},
{
"name": "get_call_chain",
"description": "Annotated call tree for a function entry point (issue #76). \
Returns plain-text prose with the entry function's signature, \
Why/What doc lines, its depth-1 callees with full source, and \
its depth-1 callers as signatures only. LLMs read this prose \
tree more reliably than JSON. Entry point accepts an exact \
symbol name, a case-insensitive fuzzy substring, or a \
`file:line` reference; the most-connected match wins ties.",
"inputSchema": {
"type": "object",
"required": ["index_id", "entry_point"],
"properties": {
"index_id": { "type": "string" },
"entry_point": { "type": "string", "description": "Function name, fuzzy substring, or file:line" },
"direction": { "type": "string", "enum": ["both", "outgoing", "callers"], "default": "both" },
"max_depth": { "type": "integer", "minimum": 1, "maximum": 4, "default": 2 },
"include_source": { "type": "boolean", "default": true, "description": "Embed full source at depth <= 1" }
}
}
},
{
"name": "grep",
"description": "Search indexed files using regex/literal patterns with ripgrep-compatible options. \
Greps the on-disk bytes of files the index already knows about, so no \
re-embedding occurs and line numbers are exact. Supports regex or fixed-string \
matching, case folding (-i), context windows (-A/-B/-C), include globs, \
multiline mode, files-with-matches (-l), invert (-v), and word-regexp (-w). \
When `index_id` is omitted the daemon fans out across every registered index.",
"inputSchema": {
"type": "object",
"required": ["pattern"],
"properties": {
"pattern": { "type": "string", "description": "Regex (default) or literal when fixed_strings=true" },
"index_id": { "type": "string", "description": "Optional index id; omit to fan out across all indexes" },
"case_insensitive": { "type": "boolean", "default": false, "description": "-i / --ignore-case" },
"context": { "type": "integer", "description": "-C: equal before/after context, overrides context_before/context_after" },
"context_before": { "type": "integer", "description": "-B: lines of context before each match" },
"context_after": { "type": "integer", "description": "-A: lines of context after each match" },
"glob": { "type": "string", "description": "--include glob (e.g. '**/*.rs')" },
"multiline": { "type": "boolean", "default": false, "description": "Let `.` span newlines" },
"fixed_strings": { "type": "boolean", "default": false, "description": "-F: treat pattern as literal" },
"files_with_matches": { "type": "boolean", "default": false, "description": "-l: return one path per matching file" },
"invert_match": { "type": "boolean", "default": false, "description": "-v: return lines that do NOT match" },
"word_regexp": { "type": "boolean", "default": false, "description": "-w: require word boundaries" },
"max_results": { "type": "integer", "default": 100, "description": "Hard cap on returned matches (alias: max_count)" },
"max_count": { "type": "integer", "description": "Alias for max_results (ripgrep --max-count parity)" }
}
}
},
{
"name": "chat",
"description": "Ask a natural-language question about the indexed codebase. \
Automatically searches for the top_k most relevant chunks and \
sends them as context to an OpenRouter LLM (default model: \
anthropic/claude-haiku-4.5). Returns {answer, sources, model}. \
Requires OPENROUTER_API_KEY env var on the daemon, or an \
`api_key` field in the request.",
"inputSchema": {
"type": "object",
"required": ["index_id"],
"properties": {
"index_id": { "type": "string" },
"message": { "type": "string", "description": "User question (alias: question)" },
"question": { "type": "string", "description": "User question (alias: message)" },
"history": { "type": "array", "items": { "type": "object" } },
"model": { "type": "string", "description": "OpenRouter model id (default: anthropic/claude-haiku-4.5)" },
"top_k": { "type": "integer", "description": "Number of context chunks (default: 5)", "default": 5 },
"api_key": { "type": "string", "description": "Fallback OpenRouter API key when OPENROUTER_API_KEY env is unset" }
}
}
},
{
"name": "upgrade",
"description": "Check for or install a new version of trusty-search (issue #537). \
With check=true (or without confirm): report current vs. available version — NEVER installs. \
With confirm=true: install via `cargo install trusty-search --locked`, run a binary \
health gate, then restart the daemon under launchd (or print a restart hint when \
not supervised). The MCP response is returned BEFORE the daemon exits.",
"inputSchema": {
"type": "object",
"properties": {
"check": { "type": "boolean", "description": "Report versions only, no install (default when confirm absent)", "default": true },
"confirm": { "type": "boolean", "description": "Set to true to install the new version. Must be explicit — never assumed.", "default": false }
},
"required": []
}
},
{
"name": "console_metrics",
"description": "Return a ConsoleMetricsReport with daemon health and index aggregate \
statistics (index_count, warm_boot_degraded, index list with id/root_path/size_bytes). \
Used by the trusty-console dashboard metrics poller (epic #1104).",
"inputSchema": {
"type": "object",
"properties": {},
"required": []
}
}
])
}
/// `tools/list` descriptors with the session's pinned index advertised (#1373).
///
/// Why: when `trusty-search serve --index <id>` pins the session to one
/// project index, the LLM should not have to call `list_indexes` and guess
/// which index to pass — it routinely picks the wrong one (usually the
/// persistent `claude-mpm` index). Advertising the pin in the schema makes
/// `index_id` optional and tells the model exactly what it defaults to, so a
/// bare `search`/`grep` resolves to the session's own project index.
/// What: returns [`tool_descriptors`] verbatim when `pinned` is `None`
/// (backward-compatible); when `Some(id)`, for every tool whose `inputSchema`
/// has an `index_id` property it (1) removes `index_id` from the `required`
/// array (the pin supplies the default) and (2) appends a note to the
/// `index_id` property description naming the pinned default.
/// Test: `pinned_descriptors_make_index_id_optional` and
/// `pinned_descriptors_annotate_index_id` in `tests_tools_list.rs`;
/// `tool_descriptors_pinned_none_is_unchanged` pins the no-op case.
pub fn tool_descriptors_pinned(pinned: Option<&str>) -> Value {
let mut defs = tool_descriptors();
let Some(id) = pinned else {
return defs;
};
let note = format!("Defaults to this session's pinned project index ('{id}') when omitted.");
if let Some(tools) = defs.as_array_mut() {
for tool in tools.iter_mut() {
let Some(schema) = tool.get_mut("inputSchema").and_then(Value::as_object_mut) else {
continue;
};
// Only touch tools that actually accept an `index_id`.
let has_index_id = schema
.get("properties")
.and_then(Value::as_object)
.is_some_and(|p| p.contains_key("index_id"));
if !has_index_id {
continue;
}
// (1) Drop `index_id` from `required` — the pin supplies it.
if let Some(required) = schema.get_mut("required").and_then(Value::as_array_mut) {
required.retain(|v| v.as_str() != Some("index_id"));
}
// (2) Annotate the `index_id` property description with the default.
if let Some(prop) = schema
.get_mut("properties")
.and_then(Value::as_object_mut)
.and_then(|p| p.get_mut("index_id"))
.and_then(Value::as_object_mut)
{
let base = prop
.get("description")
.and_then(Value::as_str)
.unwrap_or("Target index id");
// Normalise the join so the result reads "<base>. <note>" with
// exactly one separating period+space, whether or not `base`
// already ends in a period (avoids the ".." double-period bug).
let base = base.trim_end().trim_end_matches('.');
prop.insert(
"description".to_string(),
Value::String(format!("{base}. {note}")),
);
}
}
}
defs
}