Skip to main content

roder_api/
tool_search_catalog.rs

1//! Provider-safe searchable tool catalog (roadmap phase 79, Task 2).
2//!
3//! Builds a deterministic, redacted catalog from the tool specs assembled
4//! for a turn (registered tools, MCP tools, skill tools, and lazy
5//! discovery items are all materialized as `ToolSpec` by the time a turn
6//! request is mapped). Catalog payloads are what may leave the process for
7//! provider-native tool search and for the client-executed search flow —
8//! execution stays authoritative in Roder: a selected catalog item resolves
9//! back to the canonical tool name and flows through `TurnToolExecutor`,
10//! permission checks, hooks, and policy mode like any other tool call.
11
12use serde::{Deserialize, Serialize};
13
14use crate::inference::ToolSearchConfig;
15use crate::tools::ToolSpec;
16
17/// Schemas above this serialized size are dropped from catalog payloads;
18/// search ranking only needs names/descriptions, and the full schema is
19/// re-attached locally when the tool is selected.
20const MAX_SCHEMA_BYTES: usize = 8 * 1024;
21
22/// Catalog source classification, derived from canonical naming
23/// conventions (`mcp__server__tool`, `skill:` ids).
24#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
25#[serde(rename_all = "snake_case")]
26pub enum ToolCatalogSource {
27    Builtin,
28    Mcp,
29    Skill,
30}
31
32#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
33#[serde(rename_all = "camelCase")]
34pub struct ToolCatalogItem {
35    /// Stable catalog id (`tool:<name>`, duplicates suffixed `#2`, `#3`…).
36    pub id: String,
37    /// Canonical tool name; resolves back to the executable `ToolSpec`.
38    pub name: String,
39    pub description: String,
40    /// Redacted parameter schema; `None` when dropped for size.
41    #[serde(default, skip_serializing_if = "Option::is_none")]
42    pub parameters: Option<serde_json::Value>,
43    pub source: ToolCatalogSource,
44}
45
46/// Deterministic, redacted, size-bounded tool catalog for one turn.
47#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
48#[serde(rename_all = "camelCase")]
49pub struct ToolSearchCatalog {
50    pub items: Vec<ToolCatalogItem>,
51}
52
53impl ToolSearchCatalog {
54    /**
55     * Builds the catalog from the turn's tool specs: deterministic
56     * name-ordering, stable ids with duplicate-name handling, source
57     * filtering per config, `max_catalog_items` limiting, and redaction of
58     * credential-like values, internal-only fields, and oversized schemas.
59     */
60    pub fn build(tools: &[ToolSpec], config: &ToolSearchConfig) -> Self {
61        let mut sorted: Vec<&ToolSpec> = tools.iter().collect();
62        sorted.sort_by(|a, b| a.name.cmp(&b.name));
63
64        let mut items = Vec::new();
65        let mut seen_names: std::collections::HashMap<String, u32> =
66            std::collections::HashMap::new();
67        for spec in sorted {
68            let source = classify_source(&spec.name);
69            match source {
70                ToolCatalogSource::Mcp if !config.include_mcp => continue,
71                ToolCatalogSource::Skill if !config.include_skills => continue,
72                _ => {}
73            }
74            let count = seen_names.entry(spec.name.clone()).or_insert(0);
75            *count += 1;
76            let id = if *count == 1 {
77                format!("tool:{}", spec.name)
78            } else {
79                format!("tool:{}#{}", spec.name, count)
80            };
81            items.push(ToolCatalogItem {
82                id,
83                name: spec.name.clone(),
84                description: redact_text(&spec.description),
85                parameters: redact_schema(&spec.parameters),
86                source,
87            });
88            if let Some(max) = config.max_catalog_items
89                && items.len() >= max as usize
90            {
91                break;
92            }
93        }
94        Self { items }
95    }
96
97    /// Resolves a catalog id (or bare tool name) back to the canonical
98    /// tool name for execution through `TurnToolExecutor`.
99    pub fn resolve(&self, id_or_name: &str) -> Option<&ToolCatalogItem> {
100        self.items
101            .iter()
102            .find(|item| item.id == id_or_name || item.name == id_or_name)
103    }
104
105    /**
106     * Local search executor for the client-executed tool-search flow:
107     * case-insensitive token matching over names and descriptions, ranked
108     * by (name hits, description hits, name) for determinism.
109     */
110    pub fn search(&self, query: &str, limit: usize) -> Vec<&ToolCatalogItem> {
111        let tokens: Vec<String> = query
112            .split_whitespace()
113            .map(str::to_ascii_lowercase)
114            .filter(|token| !token.is_empty())
115            .collect();
116        if tokens.is_empty() {
117            return Vec::new();
118        }
119        let mut scored: Vec<(usize, usize, &ToolCatalogItem)> = self
120            .items
121            .iter()
122            .filter_map(|item| {
123                let name = item.name.to_ascii_lowercase();
124                let description = item.description.to_ascii_lowercase();
125                let name_hits = tokens.iter().filter(|token| name.contains(*token)).count();
126                let description_hits = tokens
127                    .iter()
128                    .filter(|token| description.contains(*token))
129                    .count();
130                (name_hits + description_hits > 0).then_some((name_hits, description_hits, item))
131            })
132            .collect();
133        scored.sort_by(|a, b| {
134            b.0.cmp(&a.0)
135                .then(b.1.cmp(&a.1))
136                .then(a.2.name.cmp(&b.2.name))
137        });
138        scored
139            .into_iter()
140            .take(limit)
141            .map(|(_, _, item)| item)
142            .collect()
143    }
144}
145
146fn classify_source(name: &str) -> ToolCatalogSource {
147    if name.starts_with("mcp__") || name.starts_with("mcp_") {
148        ToolCatalogSource::Mcp
149    } else if name.starts_with("skill__") || name.starts_with("skill:") {
150        ToolCatalogSource::Skill
151    } else {
152        ToolCatalogSource::Builtin
153    }
154}
155
156/// Keys whose values must never leave the process in catalog payloads.
157fn is_credential_key(key: &str) -> bool {
158    let key = key.to_ascii_lowercase();
159    [
160        "api_key",
161        "apikey",
162        "token",
163        "secret",
164        "password",
165        "authorization",
166        "auth_header",
167        "bearer",
168        "credential",
169        "private_key",
170    ]
171    .iter()
172    .any(|needle| key.contains(needle))
173}
174
175/// Internal-only schema keys stripped from catalog payloads.
176fn is_internal_key(key: &str) -> bool {
177    key.starts_with("x-roder-") || key == "x-internal" || key.starts_with("x_roder_")
178}
179
180fn looks_like_credential(value: &str) -> bool {
181    value.starts_with("sk-")
182        || value.starts_with("Bearer ")
183        || value.starts_with("rk_")
184        || value.starts_with("ghp_")
185}
186
187/// Strings that leak process-local filesystem layout.
188fn looks_like_local_path(value: &str) -> bool {
189    value.starts_with("/Users/") || value.starts_with("/home/") || value.starts_with("C:\\Users\\")
190}
191
192fn redact_text(text: &str) -> String {
193    text.split_whitespace()
194        .map(|word| {
195            if looks_like_credential(word) || looks_like_local_path(word) {
196                "[redacted]"
197            } else {
198                word
199            }
200        })
201        .collect::<Vec<_>>()
202        .join(" ")
203}
204
205/// Redacts a schema for catalog payloads; oversized schemas are dropped.
206fn redact_schema(schema: &serde_json::Value) -> Option<serde_json::Value> {
207    let redacted = redact_value(schema);
208    let size = serde_json::to_vec(&redacted)
209        .map(|bytes| bytes.len())
210        .unwrap_or(usize::MAX);
211    (size <= MAX_SCHEMA_BYTES).then_some(redacted)
212}
213
214fn redact_value(value: &serde_json::Value) -> serde_json::Value {
215    match value {
216        serde_json::Value::Object(map) => {
217            let mut out = serde_json::Map::new();
218            for (key, entry) in map {
219                if is_internal_key(key) {
220                    continue;
221                }
222                if is_credential_key(key) {
223                    // Keep the key shape (it is part of the schema) but
224                    // never any default/example/const value for it.
225                    out.insert(key.clone(), redact_credential_property(entry));
226                    continue;
227                }
228                out.insert(key.clone(), redact_value(entry));
229            }
230            serde_json::Value::Object(out)
231        }
232        serde_json::Value::Array(items) => {
233            serde_json::Value::Array(items.iter().map(redact_value).collect())
234        }
235        serde_json::Value::String(text)
236            if looks_like_credential(text) || looks_like_local_path(text) =>
237        {
238            serde_json::Value::String("[redacted]".to_string())
239        }
240        other => other.clone(),
241    }
242}
243
244/// For credential-named schema properties: keep structural keys, drop any
245/// value-bearing fields (`default`, `examples`, `const`, `enum`).
246fn redact_credential_property(value: &serde_json::Value) -> serde_json::Value {
247    match value {
248        serde_json::Value::Object(map) => {
249            let mut out = serde_json::Map::new();
250            for (key, entry) in map {
251                if matches!(key.as_str(), "default" | "examples" | "const" | "enum") {
252                    continue;
253                }
254                out.insert(key.clone(), redact_value(entry));
255            }
256            serde_json::Value::Object(out)
257        }
258        _ => serde_json::Value::String("[redacted]".to_string()),
259    }
260}
261
262#[cfg(test)]
263mod tests {
264    use super::*;
265
266    fn spec(name: &str, description: &str, parameters: serde_json::Value) -> ToolSpec {
267        ToolSpec {
268            name: name.to_string(),
269            description: description.to_string(),
270            parameters,
271        }
272    }
273
274    fn sample_tools() -> Vec<ToolSpec> {
275        vec![
276            spec(
277                "read_file",
278                "Read a file from the workspace",
279                serde_json::json!({
280                    "type": "object",
281                    "properties": { "path": { "type": "string" } }
282                }),
283            ),
284            spec(
285                "mcp__github__search",
286                "Search GitHub issues",
287                serde_json::json!({
288                    "type": "object",
289                    "properties": {
290                        "query": { "type": "string" },
291                        "api_key": { "type": "string", "default": "sk-secret-default" }
292                    },
293                    "x-roder-internal": { "registry": "/Users/someone/.roder/mcp" }
294                }),
295            ),
296            spec(
297                "skill__deploy",
298                "Deploy the app per the deploy skill at /Users/me/skills",
299                serde_json::json!({}),
300            ),
301            spec("edit_file", "Edit a file", serde_json::json!({})),
302            spec("edit_file", "Duplicate-named tool", serde_json::json!({})),
303        ]
304    }
305
306    #[test]
307    fn tool_search_catalog_is_deterministic_with_stable_ids() {
308        let tools = sample_tools();
309        let config = ToolSearchConfig::default();
310        let first = ToolSearchCatalog::build(&tools, &config);
311        let second = ToolSearchCatalog::build(&tools, &config);
312        assert_eq!(first, second, "catalogs are stable across runs");
313
314        let ids: Vec<&str> = first.items.iter().map(|item| item.id.as_str()).collect();
315        assert_eq!(
316            ids,
317            vec![
318                "tool:edit_file",
319                "tool:edit_file#2",
320                "tool:mcp__github__search",
321                "tool:read_file",
322                "tool:skill__deploy",
323            ]
324        );
325        assert_eq!(first.items[2].source, ToolCatalogSource::Mcp);
326        assert_eq!(first.items[4].source, ToolCatalogSource::Skill);
327    }
328
329    #[test]
330    fn tool_search_catalog_redacts_credentials_paths_and_internal_fields() {
331        let catalog = ToolSearchCatalog::build(&sample_tools(), &ToolSearchConfig::default());
332        let serialized = serde_json::to_string(&catalog).unwrap();
333        assert!(!serialized.contains("sk-secret-default"));
334        assert!(!serialized.contains("x-roder-internal"));
335        assert!(!serialized.contains("/Users/"));
336        // The credential-named property's structural shape survives.
337        let mcp = catalog.resolve("tool:mcp__github__search").unwrap();
338        let parameters = mcp.parameters.as_ref().unwrap();
339        assert!(parameters["properties"]["api_key"].get("type").is_some());
340        assert!(parameters["properties"]["api_key"].get("default").is_none());
341    }
342
343    #[test]
344    fn tool_search_catalog_filters_sources_and_limits_items() {
345        let tools = sample_tools();
346        let config = ToolSearchConfig {
347            include_mcp: false,
348            include_skills: false,
349            ..ToolSearchConfig::default()
350        };
351        let catalog = ToolSearchCatalog::build(&tools, &config);
352        assert!(
353            catalog
354                .items
355                .iter()
356                .all(|item| item.source == ToolCatalogSource::Builtin)
357        );
358
359        let config = ToolSearchConfig {
360            max_catalog_items: Some(2),
361            ..ToolSearchConfig::default()
362        };
363        assert_eq!(ToolSearchCatalog::build(&tools, &config).items.len(), 2);
364    }
365
366    #[test]
367    fn tool_search_catalog_search_ranks_and_resolves_to_canonical_specs() {
368        let catalog = ToolSearchCatalog::build(&sample_tools(), &ToolSearchConfig::default());
369        let results = catalog.search("read file", 3);
370        assert_eq!(results[0].name, "read_file", "name hits rank first");
371        assert!(catalog.search("", 5).is_empty());
372        assert!(catalog.search("zzz-nothing", 5).is_empty());
373
374        // Selected ids resolve back to canonical tool names for execution.
375        let resolved = catalog.resolve(&results[0].id).unwrap();
376        assert_eq!(resolved.name, "read_file");
377        assert_eq!(catalog.resolve("edit_file").unwrap().id, "tool:edit_file");
378
379        let oversized = spec(
380            "big",
381            "Tool with oversized schema",
382            serde_json::json!({ "blob": "x".repeat(20_000) }),
383        );
384        let catalog = ToolSearchCatalog::build(
385            std::slice::from_ref(&oversized),
386            &ToolSearchConfig::default(),
387        );
388        assert!(
389            catalog.items[0].parameters.is_none(),
390            "oversized schemas are dropped"
391        );
392    }
393}