Skip to main content

roder_api/
tool_search_catalog.rs

1//! Provider-safe searchable tool catalog (roadmap phase 79, Task 2).
2//!
3//! Builds a deterministic, redacted catalog from the tool specs assembled
4//! for a turn (registered tools, MCP tools, skill tools, and lazy
5//! discovery items are all materialized as `ToolSpec` by the time a turn
6//! request is mapped). Catalog payloads are what may leave the process for
7//! provider-native tool search and for the client-executed search flow —
8//! execution stays authoritative in Roder: a selected catalog item resolves
9//! back to the canonical tool name and flows through `TurnToolExecutor`,
10//! permission checks, hooks, and policy mode like any other tool call.
11
12use serde::{Deserialize, Serialize};
13
14use crate::inference::ToolSearchConfig;
15use crate::tools::ToolSpec;
16
17/// Schemas above this serialized size are dropped from catalog payloads;
18/// search ranking only needs names/descriptions, and the full schema is
19/// re-attached locally when the tool is selected.
20const MAX_SCHEMA_BYTES: usize = 8 * 1024;
21
22/// Catalog source classification, derived from canonical naming
23/// conventions (`mcp__server__tool`, `skill:` ids).
24#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
25#[serde(rename_all = "snake_case")]
26pub enum ToolCatalogSource {
27    Builtin,
28    Mcp,
29    Skill,
30}
31
32#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
33#[serde(rename_all = "camelCase")]
34pub struct ToolCatalogItem {
35    /// Stable catalog id (`tool:<name>`, duplicates suffixed `#2`, `#3`…).
36    pub id: String,
37    /// Canonical tool name; resolves back to the executable `ToolSpec`.
38    pub name: String,
39    pub description: String,
40    /// Redacted parameter schema; `None` when dropped for size.
41    #[serde(default, skip_serializing_if = "Option::is_none")]
42    pub parameters: Option<serde_json::Value>,
43    pub source: ToolCatalogSource,
44}
45
46/// Deterministic, redacted, size-bounded tool catalog for one turn.
47#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
48#[serde(rename_all = "camelCase")]
49pub struct ToolSearchCatalog {
50    pub items: Vec<ToolCatalogItem>,
51}
52
53impl ToolSearchCatalog {
54    /**
55     * Builds the catalog from the turn's tool specs: deterministic
56     * name-ordering, stable ids with duplicate-name handling, source
57     * filtering per config, `max_catalog_items` limiting, and redaction of
58     * credential-like values, internal-only fields, and oversized schemas.
59     */
60    pub fn build(tools: &[ToolSpec], config: &ToolSearchConfig) -> Self {
61        let mut sorted: Vec<&ToolSpec> = tools.iter().collect();
62        sorted.sort_by(|a, b| a.name.cmp(&b.name));
63
64        let mut items = Vec::new();
65        let mut seen_names: std::collections::HashMap<String, u32> =
66            std::collections::HashMap::new();
67        for spec in sorted {
68            let source = classify_source(&spec.name);
69            match source {
70                ToolCatalogSource::Mcp if !config.include_mcp => continue,
71                ToolCatalogSource::Skill if !config.include_skills => continue,
72                _ => {}
73            }
74            let count = seen_names.entry(spec.name.clone()).or_insert(0);
75            *count += 1;
76            let id = if *count == 1 {
77                format!("tool:{}", spec.name)
78            } else {
79                format!("tool:{}#{}", spec.name, count)
80            };
81            items.push(ToolCatalogItem {
82                id,
83                name: spec.name.clone(),
84                description: redact_text(&spec.description),
85                parameters: redact_schema(&spec.parameters),
86                source,
87            });
88            if let Some(max) = config.max_catalog_items
89                && items.len() >= max as usize
90            {
91                break;
92            }
93        }
94        Self { items }
95    }
96
97    /// Resolves a catalog id (or bare tool name) back to the canonical
98    /// tool name for execution through `TurnToolExecutor`.
99    pub fn resolve(&self, id_or_name: &str) -> Option<&ToolCatalogItem> {
100        self.items
101            .iter()
102            .find(|item| item.id == id_or_name || item.name == id_or_name)
103    }
104
105    /**
106     * Local search executor for the client-executed tool-search flow:
107     * case-insensitive token matching over names and descriptions, ranked
108     * by (name hits, description hits, name) for determinism.
109     */
110    pub fn search(&self, query: &str, limit: usize) -> Vec<&ToolCatalogItem> {
111        let tokens: Vec<String> = query
112            .split_whitespace()
113            .map(str::to_ascii_lowercase)
114            .filter(|token| !token.is_empty())
115            .collect();
116        if tokens.is_empty() {
117            return Vec::new();
118        }
119        let mut scored: Vec<(usize, usize, &ToolCatalogItem)> = self
120            .items
121            .iter()
122            .filter_map(|item| {
123                let name = item.name.to_ascii_lowercase();
124                let description = item.description.to_ascii_lowercase();
125                let name_hits = tokens.iter().filter(|token| name.contains(*token)).count();
126                let description_hits = tokens
127                    .iter()
128                    .filter(|token| description.contains(*token))
129                    .count();
130                (name_hits + description_hits > 0).then_some((name_hits, description_hits, item))
131            })
132            .collect();
133        scored.sort_by(|a, b| {
134            b.0.cmp(&a.0)
135                .then(b.1.cmp(&a.1))
136                .then(a.2.name.cmp(&b.2.name))
137        });
138        scored.into_iter().take(limit).map(|(_, _, item)| item).collect()
139    }
140}
141
142fn classify_source(name: &str) -> ToolCatalogSource {
143    if name.starts_with("mcp__") || name.starts_with("mcp_") {
144        ToolCatalogSource::Mcp
145    } else if name.starts_with("skill__") || name.starts_with("skill:") {
146        ToolCatalogSource::Skill
147    } else {
148        ToolCatalogSource::Builtin
149    }
150}
151
152/// Keys whose values must never leave the process in catalog payloads.
153fn is_credential_key(key: &str) -> bool {
154    let key = key.to_ascii_lowercase();
155    [
156        "api_key", "apikey", "token", "secret", "password", "authorization", "auth_header",
157        "bearer", "credential", "private_key",
158    ]
159    .iter()
160    .any(|needle| key.contains(needle))
161}
162
163/// Internal-only schema keys stripped from catalog payloads.
164fn is_internal_key(key: &str) -> bool {
165    key.starts_with("x-roder-") || key == "x-internal" || key.starts_with("x_roder_")
166}
167
168fn looks_like_credential(value: &str) -> bool {
169    value.starts_with("sk-")
170        || value.starts_with("Bearer ")
171        || value.starts_with("rk_")
172        || value.starts_with("ghp_")
173}
174
175/// Strings that leak process-local filesystem layout.
176fn looks_like_local_path(value: &str) -> bool {
177    value.starts_with("/Users/") || value.starts_with("/home/") || value.starts_with("C:\\Users\\")
178}
179
180fn redact_text(text: &str) -> String {
181    text.split_whitespace()
182        .map(|word| {
183            if looks_like_credential(word) || looks_like_local_path(word) {
184                "[redacted]"
185            } else {
186                word
187            }
188        })
189        .collect::<Vec<_>>()
190        .join(" ")
191}
192
193/// Redacts a schema for catalog payloads; oversized schemas are dropped.
194fn redact_schema(schema: &serde_json::Value) -> Option<serde_json::Value> {
195    let redacted = redact_value(schema);
196    let size = serde_json::to_vec(&redacted).map(|bytes| bytes.len()).unwrap_or(usize::MAX);
197    (size <= MAX_SCHEMA_BYTES).then_some(redacted)
198}
199
200fn redact_value(value: &serde_json::Value) -> serde_json::Value {
201    match value {
202        serde_json::Value::Object(map) => {
203            let mut out = serde_json::Map::new();
204            for (key, entry) in map {
205                if is_internal_key(key) {
206                    continue;
207                }
208                if is_credential_key(key) {
209                    // Keep the key shape (it is part of the schema) but
210                    // never any default/example/const value for it.
211                    out.insert(key.clone(), redact_credential_property(entry));
212                    continue;
213                }
214                out.insert(key.clone(), redact_value(entry));
215            }
216            serde_json::Value::Object(out)
217        }
218        serde_json::Value::Array(items) => {
219            serde_json::Value::Array(items.iter().map(redact_value).collect())
220        }
221        serde_json::Value::String(text)
222            if looks_like_credential(text) || looks_like_local_path(text) =>
223        {
224            serde_json::Value::String("[redacted]".to_string())
225        }
226        other => other.clone(),
227    }
228}
229
230/// For credential-named schema properties: keep structural keys, drop any
231/// value-bearing fields (`default`, `examples`, `const`, `enum`).
232fn redact_credential_property(value: &serde_json::Value) -> serde_json::Value {
233    match value {
234        serde_json::Value::Object(map) => {
235            let mut out = serde_json::Map::new();
236            for (key, entry) in map {
237                if matches!(key.as_str(), "default" | "examples" | "const" | "enum") {
238                    continue;
239                }
240                out.insert(key.clone(), redact_value(entry));
241            }
242            serde_json::Value::Object(out)
243        }
244        _ => serde_json::Value::String("[redacted]".to_string()),
245    }
246}
247
248#[cfg(test)]
249mod tests {
250    use super::*;
251
252    fn spec(name: &str, description: &str, parameters: serde_json::Value) -> ToolSpec {
253        ToolSpec {
254            name: name.to_string(),
255            description: description.to_string(),
256            parameters,
257        }
258    }
259
260    fn sample_tools() -> Vec<ToolSpec> {
261        vec![
262            spec("read_file", "Read a file from the workspace", serde_json::json!({
263                "type": "object",
264                "properties": { "path": { "type": "string" } }
265            })),
266            spec("mcp__github__search", "Search GitHub issues", serde_json::json!({
267                "type": "object",
268                "properties": {
269                    "query": { "type": "string" },
270                    "api_key": { "type": "string", "default": "sk-secret-default" }
271                },
272                "x-roder-internal": { "registry": "/Users/someone/.roder/mcp" }
273            })),
274            spec("skill__deploy", "Deploy the app per the deploy skill at /Users/me/skills", serde_json::json!({})),
275            spec("edit_file", "Edit a file", serde_json::json!({})),
276            spec("edit_file", "Duplicate-named tool", serde_json::json!({})),
277        ]
278    }
279
280    #[test]
281    fn tool_search_catalog_is_deterministic_with_stable_ids() {
282        let tools = sample_tools();
283        let config = ToolSearchConfig::default();
284        let first = ToolSearchCatalog::build(&tools, &config);
285        let second = ToolSearchCatalog::build(&tools, &config);
286        assert_eq!(first, second, "catalogs are stable across runs");
287
288        let ids: Vec<&str> = first.items.iter().map(|item| item.id.as_str()).collect();
289        assert_eq!(
290            ids,
291            vec![
292                "tool:edit_file",
293                "tool:edit_file#2",
294                "tool:mcp__github__search",
295                "tool:read_file",
296                "tool:skill__deploy",
297            ]
298        );
299        assert_eq!(first.items[2].source, ToolCatalogSource::Mcp);
300        assert_eq!(first.items[4].source, ToolCatalogSource::Skill);
301    }
302
303    #[test]
304    fn tool_search_catalog_redacts_credentials_paths_and_internal_fields() {
305        let catalog = ToolSearchCatalog::build(&sample_tools(), &ToolSearchConfig::default());
306        let serialized = serde_json::to_string(&catalog).unwrap();
307        assert!(!serialized.contains("sk-secret-default"));
308        assert!(!serialized.contains("x-roder-internal"));
309        assert!(!serialized.contains("/Users/"));
310        // The credential-named property's structural shape survives.
311        let mcp = catalog.resolve("tool:mcp__github__search").unwrap();
312        let parameters = mcp.parameters.as_ref().unwrap();
313        assert!(parameters["properties"]["api_key"].get("type").is_some());
314        assert!(parameters["properties"]["api_key"].get("default").is_none());
315    }
316
317    #[test]
318    fn tool_search_catalog_filters_sources_and_limits_items() {
319        let tools = sample_tools();
320        let config = ToolSearchConfig {
321            include_mcp: false,
322            include_skills: false,
323            ..ToolSearchConfig::default()
324        };
325        let catalog = ToolSearchCatalog::build(&tools, &config);
326        assert!(catalog.items.iter().all(|item| item.source == ToolCatalogSource::Builtin));
327
328        let config = ToolSearchConfig {
329            max_catalog_items: Some(2),
330            ..ToolSearchConfig::default()
331        };
332        assert_eq!(ToolSearchCatalog::build(&tools, &config).items.len(), 2);
333    }
334
335    #[test]
336    fn tool_search_catalog_search_ranks_and_resolves_to_canonical_specs() {
337        let catalog = ToolSearchCatalog::build(&sample_tools(), &ToolSearchConfig::default());
338        let results = catalog.search("read file", 3);
339        assert_eq!(results[0].name, "read_file", "name hits rank first");
340        assert!(catalog.search("", 5).is_empty());
341        assert!(catalog.search("zzz-nothing", 5).is_empty());
342
343        // Selected ids resolve back to canonical tool names for execution.
344        let resolved = catalog.resolve(&results[0].id).unwrap();
345        assert_eq!(resolved.name, "read_file");
346        assert_eq!(catalog.resolve("edit_file").unwrap().id, "tool:edit_file");
347
348        let oversized = spec(
349            "big",
350            "Tool with oversized schema",
351            serde_json::json!({ "blob": "x".repeat(20_000) }),
352        );
353        let catalog =
354            ToolSearchCatalog::build(std::slice::from_ref(&oversized), &ToolSearchConfig::default());
355        assert!(catalog.items[0].parameters.is_none(), "oversized schemas are dropped");
356    }
357}