trusty-search 0.26.1

Machine-wide hybrid code search service: BM25 + vector + KG, zero cold-start, MCP server
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
//! Static tool descriptors returned by `tools/list`.
//!
//! Why: the JSON schemas for all 18 MCP tools are long but mechanical; keeping
//! them in a dedicated file makes it easy to update a single tool's description
//! or input schema without touching any dispatch logic.
//! What: exports `tool_descriptors()` which returns the full `serde_json::Value`
//! array consumed by the `tools/list` handler in `dispatch`.
//! Test: `tools_list_returns_all_tools`, `test_tools_list_response`,
//! `tools_list_returns_five_search_tools`, and
//! `per_lane_tool_descriptions_carry_when_to_use_hooks` in `tests.rs`.

use serde_json::Value;

/// Static metadata for `tools/list`. Keep in sync with `call_tool` in `mod.rs`.
///
/// Why: listing tools separately from their implementation allows the MCP
/// client to introspect the full tool catalogue without triggering any daemon
/// HTTP calls.
/// What: returns a `Value::Array` containing one descriptor object per
/// registered tool. Each object has `name`, `description`, and `inputSchema`.
/// Test: `test_tools_list_response` asserts every required tool is present and
/// carries an `inputSchema`.
pub fn tool_descriptors() -> Value {
    serde_json::json!([
        // Issue #138 — per-lane MCP tools. Tool descriptions are
        // first-class LLM prompts: each one opens with "when to use",
        // gives concrete fit/don't-fit examples, states the cost, and
        // explains the failure mode (STAGE_NOT_READY). The legacy
        // `search` tool is preserved below as a back-compat alias.
        {
            "name": "search_lexical",
            "description": "Find code by exact symbol name, regex, or literal string. Equivalent to a fast ripgrep on the indexed codebase. Use this FIRST for any query where the user mentions a specific identifier (function name, struct name, file name) or a literal phrase. Best for: `apply_archive_downrank`, `pub fn main`, `\"TODO: refactor\"`, filename globs like `*.toml`. Don't use for: conceptual queries like \"how does authentication work\" — use `search_semantic` instead. Always available on any indexed project. Cheapest tool in this family.",
            "inputSchema": {
                "type": "object",
                "required": ["index_id", "query"],
                "properties": {
                    "index_id":         { "type": "string", "description": "Target index id (from `list_indexes`)" },
                    "query":            { "type": "string", "description": "Exact symbol, regex, or literal phrase" },
                    "top_k":            { "type": "integer", "default": 10 },
                    "mode":             { "type": "string", "enum": ["code", "text", "data"], "default": "code" },
                    "exclude_archived": { "type": "boolean", "default": false },
                    "branch_files":     { "type": "array", "items": { "type": "string" } },
                    "branch_boost":     { "type": "number" },
                    "branch":           { "type": "string" }
                },
                "examples": [
                    { "index_id": "trusty-tools", "query": "apply_archive_downrank" },
                    { "index_id": "trusty-tools", "query": "pub fn main" },
                    { "index_id": "trusty-tools", "query": "TODO: refactor" }
                ]
            }
        },
        {
            "name": "search_semantic",
            "description": "Find code by meaning, not by literal text. Uses embedding-based similarity to retrieve chunks that semantically match the query, even when the query words don't appear in the code. Best for: \"code that handles JWT verification\", \"the place that does community detection\", \"how does the embedder batch requests\". Don't use for: exact symbol lookups (use `search_lexical`) or finding callers of a known function (use `search_kg`). Requires Stage 2 (embeddings) to be ready on the index — returns a STAGE_NOT_READY error with a `suggested_tools` retry hint if not. Medium cost.",
            "inputSchema": {
                "type": "object",
                "required": ["index_id", "query"],
                "properties": {
                    "index_id":         { "type": "string" },
                    "query":            { "type": "string", "description": "Conceptual query — meaning, not literal text" },
                    "top_k":            { "type": "integer", "default": 10 },
                    "mode":             { "type": "string", "enum": ["code", "text", "data"], "default": "code" },
                    "exclude_archived": { "type": "boolean", "default": false }
                },
                "examples": [
                    { "index_id": "trusty-tools", "query": "code that handles JWT verification" },
                    { "index_id": "trusty-tools", "query": "the place that does community detection" }
                ]
            }
        },
        {
            "name": "search_kg",
            "description": "Explore code structure from a known seed — either a chunk_id (from a previous search result) or a symbol name. Returns chunks connected to the seed via `calls`, `called_by`, `contains`, `inherits` edges. Best for: \"what calls `validate_token`\", \"what does `Authenticator` use internally\", impact analysis before a refactor. Don't use for: free-text discovery (use `search_semantic`) or initial entry-point finding (use `search_lexical` first). Requires Stage 3 (symbol graph) to be ready. Returns empty if the seed is not in the index. Cheap once you have a seed. Optional `refine_query`: provide a longer natural-language description to rerank and filter the expanded neighbourhood by semantic relevance — useful when the seed chunk is correct but you want only the most relevant callers/callees (issue #147).",
            "inputSchema": {
                "type": "object",
                "required": ["index_id", "query"],
                "properties": {
                    "index_id":      { "type": "string" },
                    "query":         { "type": "string", "description": "Seed: a symbol name or chunk_id from a previous result" },
                    "top_k":         { "type": "integer", "default": 10 },
                    "mode":          { "type": "string", "enum": ["code", "text", "data"], "default": "code" },
                    "refine_query":  { "type": "string", "description": "Optional: rerank and filter expanded KG neighbours by cosine similarity to this natural-language description. Neighbours below the 0.4 cosine threshold are dropped. Omit to use default KG expansion without filtering." }
                },
                "examples": [
                    { "index_id": "trusty-tools", "query": "validate_token" },
                    { "index_id": "trusty-tools", "query": "Authenticator", "refine_query": "callers that handle token refresh in the auth middleware" }
                ]
            }
        },
        {
            "name": "search_all",
            "description": "When in doubt, use this. Runs the full hybrid pipeline (lexical + semantic + KG expansion) and merges results via RRF. More expensive than the targeted tools but catches edge cases. Use when: your query has both literal symbols AND conceptual phrasing (\"find the `AuthValidator` that handles refresh tokens\"), or when you've tried the targeted tools and they didn't surface what you need. Always available; gracefully degrades to whatever lanes are ready. When called without `index_id`, falls back to legacy cross-project fan-out behaviour (issue #10) — provide `index_id` for the per-index hybrid path.",
            "inputSchema": {
                "type": "object",
                "required": ["query"],
                "properties": {
                    "index_id":         { "type": "string", "description": "Target index (omit for cross-project fan-out)" },
                    "query":            { "type": "string" },
                    "top_k":            { "type": "integer", "default": 10 },
                    "mode":             { "type": "string", "enum": ["code", "text", "data"], "default": "code" },
                    "exclude_archived": { "type": "boolean", "default": false },
                    "full_content":     { "type": "boolean", "default": false, "description": "Legacy fan-out only: include full chunk content in each hit" },
                    "branch_files":     { "type": "array", "items": { "type": "string" } },
                    "branch_boost":     { "type": "number" },
                    "branch":           { "type": "string" }
                },
                "examples": [
                    { "index_id": "trusty-tools", "query": "AuthValidator that handles refresh tokens" },
                    { "query": "global cross-project fan-out without index_id" }
                ]
            }
        },
        {
            "name": "search",
            "description": "Unified hybrid search (BM25+vector+KG+RRF) with mode-aware ranking (issue #77). The `mode` parameter (\"code\" | \"text\" | \"data\", default \"code\") picks the file-type penalty matrix: code prefers source (prose 0.1x, data 0.2x); text prefers prose docs (source 0.5x, data 0.3x); data prefers structured data (source 0.3x, prose 0.3x). Set `exclude_archived: true` to drop archived/deprecated/legacy chunks entirely instead of downranking them (issue #74). Supports branch-aware scoring via branch_files/branch_boost/branch (issue #122). Replaces the legacy `search_code` tool name; callers that omit `mode` get identical pre-#77 behaviour.",
            "inputSchema": {
                "type": "object",
                "required": ["index_id", "query"],
                "properties": {
                    "index_id": { "type": "string" },
                    "query": { "type": "string" },
                    "top_k": { "type": "integer", "default": 10 },
                    "mode": {
                        "type": "string",
                        "enum": ["code", "text", "data"],
                        "default": "code",
                        "description": "Ranking mode: prefer source code, prose docs, or structured data."
                    },
                    "exclude_archived": {
                        "type": "boolean",
                        "default": false,
                        "description": "Drop archived/deprecated/legacy chunks (paths like _archive/, archive/, _deprecated/, old/, .archive/; #[deprecated]; .archived/DEPRECATED markers) instead of downranking them."
                    },
                    "branch_files": {
                        "type": "array",
                        "items": { "type": "string" },
                        "description": "Files modified on current git branch (relative to index root). Boosted in results."
                    },
                    "branch_boost": {
                        "type": "number",
                        "description": "Score multiplier for branch files (default 1.5, range 1.0-3.0)."
                    },
                    "branch": {
                        "type": "string",
                        "description": "Branch name; daemon will compute branch_files via git if branch_files is absent."
                    }
                }
            }
        },
        {
            "name": "index_file",
            "description": "Add or update one file in an index",
            "inputSchema": {
                "type": "object",
                "required": ["index_id", "path", "content"],
                "properties": {
                    "index_id": { "type": "string" },
                    "path":     { "type": "string" },
                    "content":  { "type": "string" }
                }
            }
        },
        {
            "name": "remove_file",
            "description": "Remove a file's chunks from an index",
            "inputSchema": {
                "type": "object",
                "required": ["index_id", "path"],
                "properties": {
                    "index_id": { "type": "string" },
                    "path":     { "type": "string" }
                }
            }
        },
        {
            "name": "list_indexes",
            "description": "List all registered indexes on this daemon",
            "inputSchema": { "type": "object", "properties": {} }
        },
        {
            "name": "create_index",
            "description": "Register a new (empty) index",
            "inputSchema": {
                "type": "object",
                "required": ["id", "root_path"],
                "properties": {
                    "id":        { "type": "string" },
                    "root_path": { "type": "string" }
                }
            }
        },
        {
            "name": "search_similar",
            "description": "Find chunks semantically similar to a given file/function via HNSW (issue #31)",
            "inputSchema": {
                "type": "object",
                "required": ["file"],
                "properties": {
                    "file":     { "type": "string" },
                    "function": { "type": "string" },
                    "top_k":    { "type": "number" },
                    "index":    { "type": "string" }
                }
            }
        },
        {
            "name": "search_health",
            "description": "Probe daemon liveness and version",
            "inputSchema": { "type": "object", "properties": {} }
        },
        {
            "name": "delete_index",
            "description": "Delete a registered index and all its data",
            "inputSchema": {
                "type": "object",
                "required": ["index_id"],
                "properties": {
                    "index_id": { "type": "string" }
                }
            }
        },
        {
            "name": "reindex",
            "description": "Trigger a full reindex of a collection (async, returns immediately)",
            "inputSchema": {
                "type": "object",
                "required": ["index_id"],
                "properties": {
                    "index_id":  { "type": "string" },
                    "root_path": { "type": "string" }
                }
            }
        },
        {
            "name": "index_status",
            "description": "Get stats for an index (chunk count, root path)",
            "inputSchema": {
                "type": "object",
                "required": ["index_id"],
                "properties": {
                    "index_id": { "type": "string" }
                }
            }
        },
        {
            "name": "list_chunks",
            "description": "Paginated enumeration of every chunk in an index (issue #54). \
                            Two modes: offset/limit (stable order by file, start_line) for \
                            shallow paging, or cursor paging via `after` (issue #1325) for \
                            deep/bulk enumeration. Pass the response's `next_cursor` back as \
                            `after` to fetch the next page in O(page) time (an indexed seek) \
                            instead of the O(offset) scan that times out on large indexes. \
                            `next_cursor` is null once the corpus is exhausted.",
            "inputSchema": {
                "type": "object",
                "required": ["index_id"],
                "properties": {
                    "index_id": { "type": "string" },
                    "offset":   { "type": "integer", "default": 0 },
                    "limit":    { "type": "integer", "default": 100 },
                    "after":    { "type": "string", "description": "Forward cursor (a chunk id, typically the previous page's next_cursor). When set, offset is ignored." }
                }
            }
        },
        {
            "name": "get_call_chain",
            "description": "Annotated call tree for a function entry point (issue #76). \
                            Returns plain-text prose with the entry function's signature, \
                            Why/What doc lines, its depth-1 callees with full source, and \
                            its depth-1 callers as signatures only. LLMs read this prose \
                            tree more reliably than JSON. Entry point accepts an exact \
                            symbol name, a case-insensitive fuzzy substring, or a \
                            `file:line` reference; the most-connected match wins ties.",
            "inputSchema": {
                "type": "object",
                "required": ["index_id", "entry_point"],
                "properties": {
                    "index_id":       { "type": "string" },
                    "entry_point":    { "type": "string", "description": "Function name, fuzzy substring, or file:line" },
                    "direction":      { "type": "string", "enum": ["both", "outgoing", "callers"], "default": "both" },
                    "max_depth":      { "type": "integer", "minimum": 1, "maximum": 4, "default": 2 },
                    "include_source": { "type": "boolean", "default": true, "description": "Embed full source at depth <= 1" }
                }
            }
        },
        {
            "name": "grep",
            "description": "Search indexed files using regex/literal patterns with ripgrep-compatible options. \
                            Greps the on-disk bytes of files the index already knows about, so no \
                            re-embedding occurs and line numbers are exact. Supports regex or fixed-string \
                            matching, case folding (-i), context windows (-A/-B/-C), include globs, \
                            multiline mode, files-with-matches (-l), invert (-v), and word-regexp (-w). \
                            When `index_id` is omitted the daemon fans out across every registered index.",
            "inputSchema": {
                "type": "object",
                "required": ["pattern"],
                "properties": {
                    "pattern":            { "type": "string", "description": "Regex (default) or literal when fixed_strings=true" },
                    "index_id":           { "type": "string", "description": "Optional index id; omit to fan out across all indexes" },
                    "case_insensitive":   { "type": "boolean", "default": false, "description": "-i / --ignore-case" },
                    "context":            { "type": "integer", "description": "-C: equal before/after context, overrides context_before/context_after" },
                    "context_before":     { "type": "integer", "description": "-B: lines of context before each match" },
                    "context_after":      { "type": "integer", "description": "-A: lines of context after each match" },
                    "glob":                { "type": "string", "description": "--include glob (e.g. '**/*.rs')" },
                    "multiline":          { "type": "boolean", "default": false, "description": "Let `.` span newlines" },
                    "fixed_strings":      { "type": "boolean", "default": false, "description": "-F: treat pattern as literal" },
                    "files_with_matches": { "type": "boolean", "default": false, "description": "-l: return one path per matching file" },
                    "invert_match":       { "type": "boolean", "default": false, "description": "-v: return lines that do NOT match" },
                    "word_regexp":        { "type": "boolean", "default": false, "description": "-w: require word boundaries" },
                    "max_results":        { "type": "integer", "default": 100, "description": "Hard cap on returned matches (alias: max_count)" },
                    "max_count":          { "type": "integer", "description": "Alias for max_results (ripgrep --max-count parity)" }
                }
            }
        },
        {
            "name": "chat",
            "description": "Ask a natural-language question about the indexed codebase. \
                            Automatically searches for the top_k most relevant chunks and \
                            sends them as context to an OpenRouter LLM (default model: \
                            anthropic/claude-haiku-4.5). Returns {answer, sources, model}. \
                            Requires OPENROUTER_API_KEY env var on the daemon, or an \
                            `api_key` field in the request.",
            "inputSchema": {
                "type": "object",
                "required": ["index_id"],
                "properties": {
                    "index_id": { "type": "string" },
                    "message":  { "type": "string", "description": "User question (alias: question)" },
                    "question": { "type": "string", "description": "User question (alias: message)" },
                    "history":  { "type": "array", "items": { "type": "object" } },
                    "model":    { "type": "string", "description": "OpenRouter model id (default: anthropic/claude-haiku-4.5)" },
                    "top_k":    { "type": "integer", "description": "Number of context chunks (default: 5)", "default": 5 },
                    "api_key":  { "type": "string", "description": "Fallback OpenRouter API key when OPENROUTER_API_KEY env is unset" }
                }
            }
        },
        {
            "name": "upgrade",
            "description": "Check for or install a new version of trusty-search (issue #537). \
                            With check=true (or without confirm): report current vs. available version — NEVER installs. \
                            With confirm=true: install via `cargo install trusty-search --locked`, run a binary \
                            health gate, then restart the daemon under launchd (or print a restart hint when \
                            not supervised). The MCP response is returned BEFORE the daemon exits.",
            "inputSchema": {
                "type": "object",
                "properties": {
                    "check":   { "type": "boolean", "description": "Report versions only, no install (default when confirm absent)", "default": true },
                    "confirm": { "type": "boolean", "description": "Set to true to install the new version. Must be explicit — never assumed.", "default": false }
                },
                "required": []
            }
        },
        {
            "name": "console_metrics",
            "description": "Return a ConsoleMetricsReport with daemon health and index aggregate \
                            statistics (index_count, warm_boot_degraded, index list with id/root_path/size_bytes). \
                            Used by the trusty-console dashboard metrics poller (epic #1104).",
            "inputSchema": {
                "type": "object",
                "properties": {},
                "required": []
            }
        }
    ])
}

/// `tools/list` descriptors with the session's pinned index advertised (#1373).
///
/// Why: when `trusty-search serve --index <id>` pins the session to one
/// project index, the LLM should not have to call `list_indexes` and guess
/// which index to pass — it routinely picks the wrong one (usually the
/// persistent `claude-mpm` index). Advertising the pin in the schema makes
/// `index_id` optional and tells the model exactly what it defaults to, so a
/// bare `search`/`grep` resolves to the session's own project index.
/// What: returns [`tool_descriptors`] verbatim when `pinned` is `None`
/// (backward-compatible); when `Some(id)`, for every tool whose `inputSchema`
/// has an `index_id` property it (1) removes `index_id` from the `required`
/// array (the pin supplies the default) and (2) appends a note to the
/// `index_id` property description naming the pinned default.
/// Test: `pinned_descriptors_make_index_id_optional` and
/// `pinned_descriptors_annotate_index_id` in `tests_tools_list.rs`;
/// `tool_descriptors_pinned_none_is_unchanged` pins the no-op case.
pub fn tool_descriptors_pinned(pinned: Option<&str>) -> Value {
    let mut defs = tool_descriptors();
    let Some(id) = pinned else {
        return defs;
    };
    let note = format!("Defaults to this session's pinned project index ('{id}') when omitted.");
    if let Some(tools) = defs.as_array_mut() {
        for tool in tools.iter_mut() {
            let Some(schema) = tool.get_mut("inputSchema").and_then(Value::as_object_mut) else {
                continue;
            };
            // Only touch tools that actually accept an `index_id`.
            let has_index_id = schema
                .get("properties")
                .and_then(Value::as_object)
                .is_some_and(|p| p.contains_key("index_id"));
            if !has_index_id {
                continue;
            }
            // (1) Drop `index_id` from `required` — the pin supplies it.
            if let Some(required) = schema.get_mut("required").and_then(Value::as_array_mut) {
                required.retain(|v| v.as_str() != Some("index_id"));
            }
            // (2) Annotate the `index_id` property description with the default.
            if let Some(prop) = schema
                .get_mut("properties")
                .and_then(Value::as_object_mut)
                .and_then(|p| p.get_mut("index_id"))
                .and_then(Value::as_object_mut)
            {
                let base = prop
                    .get("description")
                    .and_then(Value::as_str)
                    .unwrap_or("Target index id");
                // Normalise the join so the result reads "<base>. <note>" with
                // exactly one separating period+space, whether or not `base`
                // already ends in a period (avoids the ".." double-period bug).
                let base = base.trim_end().trim_end_matches('.');
                prop.insert(
                    "description".to_string(),
                    Value::String(format!("{base}. {note}")),
                );
            }
        }
    }
    defs
}