Skip to main content

mcp_proxy/
discovery.rs

1//! BM25 full-text tool discovery and search.
2//!
3//! When a proxy aggregates many backends, clients (and the LLMs behind them)
4//! need a way to find relevant tools without scanning a long flat list. This
5//! module builds a BM25 search index over all registered tools using
6//! [`jpx-engine`](jpx_engine) and exposes three discovery tools under the
7//! `proxy/` namespace:
8//!
9//! | Tool | Description |
10//! |---|---|
11//! | `proxy/search_tools` | Full-text search across tool names, descriptions, parameters, and tags |
12//! | `proxy/similar_tools` | Find tools related to a given tool by BM25 term similarity |
13//! | `proxy/tool_categories` | Browse tools grouped by backend namespace with counts |
14//!
15//! # Enabling discovery
16//!
17//! Set `tool_discovery = true` in the `[proxy]` section:
18//!
19//! ```toml
20//! [proxy]
21//! name = "my-proxy"
22//! tool_discovery = true
23//!
24//! [proxy.listen]
25//! # ...
26//! ```
27//!
28//! The three `proxy/` discovery tools are added to the proxy's tool list
29//! alongside the backend tools.
30//!
31//! # Search mode (`tool_exposure = "search"`)
32//!
33//! For proxies aggregating a large number of tools (100+), listing every tool
34//! in `tools/list` responses can overwhelm LLM context windows. Setting
35//! `tool_exposure = "search"` hides individual backend tools from listings
36//! while keeping them invokable. Only the `proxy/` meta-tools appear in
37//! `tools/list`; clients use `proxy/search_tools` to discover and then call
38//! backend tools by name.
39//!
40//! ```toml
41//! [proxy]
42//! name = "my-proxy"
43//! tool_exposure = "search"
44//! # tool_discovery is implied when tool_exposure = "search"
45//! ```
46//!
47//! # Indexing architecture
48//!
49//! At startup, [`build_index`] sends a `ListTools` request through the proxy
50//! to collect all registered tools, groups them by backend namespace (derived
51//! from the configured separator), and registers each group as a
52//! [`DiscoverySpec`] in a
53//! [`DiscoveryRegistry`]. Tool annotations
54//! (destructive, read-only, idempotent, open-world) are extracted as
55//! searchable tags.
56//!
57//! The index is stored as a [`SharedDiscoveryIndex`] (`Arc<RwLock<DiscoveryRegistry>>`)
58//! for concurrent read access from tool handlers.
59//!
60//! # Hot reload re-indexing
61//!
62//! When the proxy configuration is hot-reloaded (backends added, removed, or
63//! updated), [`reindex`] rebuilds the search index from scratch with the new
64//! tool set. This keeps search results consistent with the proxy's current
65//! state without requiring a restart.
66
67use std::sync::Arc;
68
69use jpx_engine::{
70    CategorySummary, DiscoveryRegistry, DiscoverySpec, ParamSpec, ServerInfo, ToolQueryResult,
71    ToolSpec,
72};
73use schemars::JsonSchema;
74use serde::{Deserialize, Serialize};
75use tokio::sync::RwLock;
76use tower_mcp::proxy::McpProxy;
77use tower_mcp::{CallToolResult, NoParams, ToolBuilder, ToolDefinition};
78
79/// Shared discovery index, wrapped for concurrent access from tool handlers.
80pub type SharedDiscoveryIndex = Arc<RwLock<DiscoveryRegistry>>;
81
82/// Build a discovery index from the proxy's current tool list.
83///
84/// Sends a `ListTools` request through the proxy to collect all registered
85/// tools, then indexes them using jpx-engine's BM25 search.
86pub async fn build_index(proxy: &mut McpProxy, separator: &str) -> SharedDiscoveryIndex {
87    use tower::Service;
88    use tower_mcp::protocol::{ListToolsParams, McpRequest, McpResponse, RequestId};
89    use tower_mcp::router::{Extensions, RouterRequest};
90
91    let req = RouterRequest {
92        id: RequestId::Number(0),
93        inner: McpRequest::ListTools(ListToolsParams::default()),
94        extensions: Extensions::new(),
95    };
96
97    let tools = match proxy.call(req).await {
98        Ok(resp) => match resp.inner {
99            Ok(McpResponse::ListTools(result)) => result.tools,
100            _ => {
101                tracing::warn!("Failed to list tools for discovery indexing");
102                vec![]
103            }
104        },
105        Err(_) => vec![],
106    };
107
108    let mut registry = DiscoveryRegistry::new();
109    index_tools(&mut registry, &tools, separator);
110
111    tracing::info!(tools_indexed = tools.len(), "Built tool discovery index");
112
113    Arc::new(RwLock::new(registry))
114}
115
116/// Re-index all tools into an existing shared discovery index.
117///
118/// Called after hot reload adds, removes, or replaces backends to keep
119/// the search index in sync with the proxy's current tool set.
120pub async fn reindex(index: &SharedDiscoveryIndex, proxy: &mut McpProxy, separator: &str) {
121    use tower::Service;
122    use tower_mcp::protocol::{ListToolsParams, McpRequest, McpResponse, RequestId};
123    use tower_mcp::router::{Extensions, RouterRequest};
124
125    let req = RouterRequest {
126        id: RequestId::Number(0),
127        inner: McpRequest::ListTools(ListToolsParams::default()),
128        extensions: Extensions::new(),
129    };
130
131    let tools = match proxy.call(req).await {
132        Ok(resp) => match resp.inner {
133            Ok(McpResponse::ListTools(result)) => result.tools,
134            _ => vec![],
135        },
136        Err(_) => vec![],
137    };
138
139    let mut registry = DiscoveryRegistry::new();
140    index_tools(&mut registry, &tools, separator);
141
142    let mut guard = index.write().await;
143    *guard = registry;
144
145    tracing::info!(tools_indexed = tools.len(), "Re-indexed tool discovery");
146}
147
148/// Index MCP tool definitions into the discovery registry.
149///
150/// Groups tools by backend namespace (derived from the separator) and registers
151/// each group as a discovery "server" with its tools.
152fn index_tools(registry: &mut DiscoveryRegistry, tools: &[ToolDefinition], separator: &str) {
153    // Group tools by backend namespace
154    let mut by_namespace: std::collections::HashMap<String, Vec<&ToolDefinition>> =
155        std::collections::HashMap::new();
156
157    for tool in tools {
158        let namespace = tool
159            .name
160            .split_once(separator)
161            .map(|(ns, _)| ns.to_string())
162            .unwrap_or_else(|| "default".to_string());
163        by_namespace.entry(namespace).or_default().push(tool);
164    }
165
166    for (namespace, ns_tools) in &by_namespace {
167        let tool_specs: Vec<ToolSpec> = ns_tools
168            .iter()
169            .map(|t| tool_definition_to_spec(t, separator))
170            .collect();
171
172        let spec = DiscoverySpec {
173            schema: None,
174            server: ServerInfo {
175                name: namespace.clone(),
176                version: None,
177                description: None,
178            },
179            tools: tool_specs,
180            categories: std::collections::HashMap::new(),
181        };
182
183        registry.register(spec, true);
184    }
185}
186
187/// Convert an MCP ToolDefinition to a jpx ToolSpec for indexing.
188fn tool_definition_to_spec(tool: &ToolDefinition, separator: &str) -> ToolSpec {
189    // Extract the local tool name (without namespace prefix)
190    let local_name = tool
191        .name
192        .split_once(separator)
193        .map(|(_, name)| name.to_string())
194        .unwrap_or_else(|| tool.name.clone());
195
196    // Extract parameter names from input schema
197    let params = extract_params(&tool.input_schema);
198
199    // Extract tags from annotations if available
200    let mut tags = Vec::new();
201    if let Some(annotations) = &tool.annotations {
202        if annotations.destructive_hint {
203            tags.push("destructive".to_string());
204        }
205        if annotations.read_only_hint {
206            tags.push("read-only".to_string());
207        }
208        if annotations.idempotent_hint {
209            tags.push("idempotent".to_string());
210        }
211        if annotations.open_world_hint {
212            tags.push("open-world".to_string());
213        }
214    }
215
216    // Extract category from namespace
217    let category = tool
218        .name
219        .split_once(separator)
220        .map(|(ns, _)| ns.to_string());
221
222    ToolSpec {
223        name: local_name,
224        aliases: vec![],
225        category,
226        subcategory: None,
227        tags,
228        summary: tool.description.clone(),
229        description: tool.description.clone(),
230        params,
231        returns: None,
232        examples: vec![],
233        related: vec![],
234        since: None,
235        stability: None,
236    }
237}
238
239/// Extract parameter specs from a JSON Schema input_schema.
240fn extract_params(schema: &serde_json::Value) -> Vec<ParamSpec> {
241    let Some(properties) = schema.get("properties").and_then(|p| p.as_object()) else {
242        return vec![];
243    };
244    let required: std::collections::HashSet<&str> = schema
245        .get("required")
246        .and_then(|r| r.as_array())
247        .map(|arr| arr.iter().filter_map(|v| v.as_str()).collect())
248        .unwrap_or_default();
249
250    properties
251        .iter()
252        .map(|(name, prop)| ParamSpec {
253            name: name.clone(),
254            param_type: prop.get("type").and_then(|t| t.as_str()).map(String::from),
255            required: required.contains(name.as_str()),
256            description: prop
257                .get("description")
258                .and_then(|d| d.as_str())
259                .map(String::from),
260            enum_values: None,
261            default: None,
262        })
263        .collect()
264}
265
266// ---------------------------------------------------------------------------
267// Discovery tool handlers
268// ---------------------------------------------------------------------------
269
270#[derive(Debug, Deserialize, JsonSchema)]
271struct SearchInput {
272    /// Search query (e.g. "read file", "database query", "math operations")
273    query: String,
274    /// Maximum number of results to return (default: 10)
275    #[serde(default = "default_top_k")]
276    top_k: usize,
277}
278
279fn default_top_k() -> usize {
280    10
281}
282
283#[derive(Debug, Deserialize, JsonSchema)]
284struct SimilarInput {
285    /// Tool ID to find similar tools for (e.g. "math:add")
286    tool_id: String,
287    /// Maximum number of results to return (default: 5)
288    #[serde(default = "default_similar_k")]
289    top_k: usize,
290}
291
292fn default_similar_k() -> usize {
293    5
294}
295
296#[derive(Serialize)]
297struct SearchResultEntry {
298    id: String,
299    server: String,
300    name: String,
301    description: Option<String>,
302    score: f64,
303    tags: Vec<String>,
304    category: Option<String>,
305}
306
307impl From<ToolQueryResult> for SearchResultEntry {
308    fn from(r: ToolQueryResult) -> Self {
309        Self {
310            id: r.id,
311            server: r.server,
312            name: r.tool.name,
313            description: r.tool.description,
314            score: r.score,
315            tags: r.tool.tags,
316            category: r.tool.category,
317        }
318    }
319}
320
321#[derive(Serialize)]
322struct CategoriesResult {
323    categories: Vec<CategorySummary>,
324    total_categories: usize,
325}
326
327/// Build the discovery tools and return them for inclusion in the admin router.
328pub fn build_discovery_tools(index: SharedDiscoveryIndex) -> Vec<tower_mcp::Tool> {
329    let index_for_search = Arc::clone(&index);
330    let search_tools = ToolBuilder::new("search_tools")
331        .description(
332            "Search for tools across all backends using BM25 full-text search. \
333             Searches tool names, descriptions, parameters, and tags.",
334        )
335        .handler(move |input: SearchInput| {
336            let idx = Arc::clone(&index_for_search);
337            async move {
338                let registry = idx.read().await;
339                let results = registry.query(&input.query, input.top_k);
340                let entries: Vec<SearchResultEntry> =
341                    results.into_iter().map(SearchResultEntry::from).collect();
342                Ok(CallToolResult::text(
343                    serde_json::to_string_pretty(&entries).unwrap(),
344                ))
345            }
346        })
347        .build();
348
349    let index_for_similar = Arc::clone(&index);
350    let similar_tools = ToolBuilder::new("similar_tools")
351        .description(
352            "Find tools similar to a given tool. Uses BM25 similarity based on \
353             shared terms in descriptions, parameters, and tags.",
354        )
355        .handler(move |input: SimilarInput| {
356            let idx = Arc::clone(&index_for_similar);
357            async move {
358                let registry = idx.read().await;
359                let results = registry.similar(&input.tool_id, input.top_k);
360                let entries: Vec<SearchResultEntry> =
361                    results.into_iter().map(SearchResultEntry::from).collect();
362                Ok(CallToolResult::text(
363                    serde_json::to_string_pretty(&entries).unwrap(),
364                ))
365            }
366        })
367        .build();
368
369    let index_for_categories = Arc::clone(&index);
370    let tool_categories = ToolBuilder::new("tool_categories")
371        .description(
372            "List all tool categories (backend namespaces) with tool counts. \
373             Useful for browsing available capabilities by domain.",
374        )
375        .handler(move |_: NoParams| {
376            let idx = Arc::clone(&index_for_categories);
377            async move {
378                let registry = idx.read().await;
379                let categories = registry.list_categories();
380                let mut cats: Vec<CategorySummary> = categories.into_values().collect();
381                cats.sort_by(|a, b| b.tool_count.cmp(&a.tool_count));
382                let result = CategoriesResult {
383                    total_categories: cats.len(),
384                    categories: cats,
385                };
386                Ok(CallToolResult::text(
387                    serde_json::to_string_pretty(&result).unwrap(),
388                ))
389            }
390        })
391        .build();
392
393    vec![search_tools, similar_tools, tool_categories]
394}
395
396#[cfg(test)]
397mod tests {
398    use jpx_engine::DiscoveryRegistry;
399    use tower_mcp::ToolDefinition;
400    use tower_mcp_types::protocol::ToolAnnotations;
401
402    use super::*;
403
404    fn make_tool(
405        name: &str,
406        description: Option<&str>,
407        annotations: Option<ToolAnnotations>,
408    ) -> ToolDefinition {
409        ToolDefinition {
410            name: name.to_string(),
411            title: None,
412            description: description.map(|d| d.to_string()),
413            input_schema: serde_json::json!({"type": "object"}),
414            output_schema: None,
415            icons: None,
416            annotations,
417            execution: None,
418            meta: None,
419        }
420    }
421
422    // -- index_tools tests ---------------------------------------------------
423
424    #[test]
425    fn index_tools_empty_list() {
426        let mut registry = DiscoveryRegistry::new();
427        index_tools(&mut registry, &[], "/");
428        let cats = registry.list_categories();
429        assert!(cats.is_empty());
430    }
431
432    #[test]
433    fn index_tools_groups_by_namespace() {
434        let tools = vec![
435            make_tool("fs/read", Some("Read a file"), None),
436            make_tool("fs/write", Some("Write a file"), None),
437            make_tool("db/query", Some("Run a query"), None),
438        ];
439
440        let mut registry = DiscoveryRegistry::new();
441        index_tools(&mut registry, &tools, "/");
442
443        let cats = registry.list_categories();
444        assert_eq!(cats.len(), 2);
445        assert!(cats.contains_key("fs"));
446        assert!(cats.contains_key("db"));
447    }
448
449    #[test]
450    fn index_tools_no_separator_uses_default_namespace() {
451        let tools = vec![make_tool("standalone", Some("No namespace"), None)];
452
453        let mut registry = DiscoveryRegistry::new();
454        index_tools(&mut registry, &tools, "/");
455
456        // The tool is registered under the "default" server; search finds it
457        let results = registry.query("namespace", 10);
458        assert!(!results.is_empty());
459    }
460
461    #[test]
462    fn index_tools_without_descriptions() {
463        let tools = vec![make_tool("ns/tool", None, None)];
464
465        let mut registry = DiscoveryRegistry::new();
466        index_tools(&mut registry, &tools, "/");
467
468        let cats = registry.list_categories();
469        assert_eq!(cats.len(), 1);
470    }
471
472    #[test]
473    fn index_tools_with_annotations() {
474        let tools = vec![make_tool(
475            "ns/dangerous",
476            Some("Dangerous tool"),
477            Some(ToolAnnotations {
478                title: None,
479                destructive_hint: true,
480                read_only_hint: false,
481                idempotent_hint: false,
482                open_world_hint: true,
483            }),
484        )];
485
486        let mut registry = DiscoveryRegistry::new();
487        index_tools(&mut registry, &tools, "/");
488
489        // The tool was indexed; search for its annotation tag
490        let results = registry.query("destructive", 10);
491        assert!(!results.is_empty());
492    }
493
494    // -- tool_definition_to_spec tests ----------------------------------------
495
496    #[test]
497    fn tool_definition_to_spec_extracts_local_name() {
498        let tool = make_tool("backend/read_file", Some("Reads files"), None);
499        let spec = tool_definition_to_spec(&tool, "/");
500        assert_eq!(spec.name, "read_file");
501        assert_eq!(spec.category.as_deref(), Some("backend"));
502    }
503
504    #[test]
505    fn tool_definition_to_spec_no_separator() {
506        let tool = make_tool("read_file", Some("Reads files"), None);
507        let spec = tool_definition_to_spec(&tool, "/");
508        assert_eq!(spec.name, "read_file");
509        assert!(spec.category.is_none());
510    }
511
512    #[test]
513    fn tool_definition_to_spec_annotation_tags() {
514        let tool = make_tool(
515            "ns/tool",
516            Some("desc"),
517            Some(ToolAnnotations {
518                title: None,
519                destructive_hint: true,
520                read_only_hint: true,
521                idempotent_hint: true,
522                open_world_hint: true,
523            }),
524        );
525        let spec = tool_definition_to_spec(&tool, "/");
526        assert_eq!(spec.tags.len(), 4);
527        assert!(spec.tags.contains(&"destructive".to_string()));
528        assert!(spec.tags.contains(&"read-only".to_string()));
529        assert!(spec.tags.contains(&"idempotent".to_string()));
530        assert!(spec.tags.contains(&"open-world".to_string()));
531    }
532
533    #[test]
534    fn tool_definition_to_spec_no_annotations_no_tags() {
535        let tool = make_tool("ns/tool", Some("desc"), None);
536        let spec = tool_definition_to_spec(&tool, "/");
537        assert!(spec.tags.is_empty());
538    }
539
540    #[test]
541    fn tool_definition_to_spec_preserves_description() {
542        let tool = make_tool("ns/tool", Some("My description"), None);
543        let spec = tool_definition_to_spec(&tool, "/");
544        assert_eq!(spec.summary.as_deref(), Some("My description"));
545        assert_eq!(spec.description.as_deref(), Some("My description"));
546    }
547
548    // -- extract_params tests ------------------------------------------------
549
550    #[test]
551    fn extract_params_empty_schema() {
552        let schema = serde_json::json!({"type": "object"});
553        let params = extract_params(&schema);
554        assert!(params.is_empty());
555    }
556
557    #[test]
558    fn extract_params_with_properties() {
559        let schema = serde_json::json!({
560            "type": "object",
561            "properties": {
562                "path": {
563                    "type": "string",
564                    "description": "File path"
565                },
566                "recursive": {
567                    "type": "boolean"
568                }
569            },
570            "required": ["path"]
571        });
572        let params = extract_params(&schema);
573        assert_eq!(params.len(), 2);
574
575        let path_param = params.iter().find(|p| p.name == "path").unwrap();
576        assert!(path_param.required);
577        assert_eq!(path_param.param_type.as_deref(), Some("string"));
578        assert_eq!(path_param.description.as_deref(), Some("File path"));
579
580        let recursive_param = params.iter().find(|p| p.name == "recursive").unwrap();
581        assert!(!recursive_param.required);
582        assert_eq!(recursive_param.param_type.as_deref(), Some("boolean"));
583    }
584
585    #[test]
586    fn extract_params_no_required_field() {
587        let schema = serde_json::json!({
588            "type": "object",
589            "properties": {
590                "name": {"type": "string"}
591            }
592        });
593        let params = extract_params(&schema);
594        assert_eq!(params.len(), 1);
595        assert!(!params[0].required);
596    }
597
598    // -- build_discovery_tools tests -----------------------------------------
599
600    #[test]
601    fn build_discovery_tools_returns_three_tools() {
602        let index = Arc::new(RwLock::new(DiscoveryRegistry::new()));
603        let tools = build_discovery_tools(index);
604        assert_eq!(tools.len(), 3);
605        assert_eq!(tools[0].name, "search_tools");
606        assert_eq!(tools[1].name, "similar_tools");
607        assert_eq!(tools[2].name, "tool_categories");
608    }
609}