Skip to main content

koda_core/
tool_normalize.rs

1//! Tool name normalization — maps model-emitted variants to canonical PascalCase.
2//!
3//! Models sometimes emit tool names in lowercase (`list`, `read`) or
4//! snake_case (`list_files`, `read_file`) instead of the canonical PascalCase
5//! (`List`, `Read`). This module provides a single normalization point at the
6//! API boundary so all downstream code (dispatch, approval, loop guard, undo)
7//! sees canonical names.
8//!
9//! ## Design
10//!
11//! - Normalization is applied *once*, in `inference.rs`, after collecting
12//!   the streamed response — before dispatch, approval, or persistence.
13//! - Unknown names pass through unchanged so the dispatcher can surface
14//!   a clear `Unknown tool` error.
15//! - The alias map covers lowercase, snake_case, and camelCase variants.
16//!
17//! See: <https://github.com/lijunzh/koda/issues/548>
18//!      <https://github.com/lijunzh/koda/issues/49>
19
20use crate::providers::ToolCall;
21use std::collections::HashMap;
22use std::sync::LazyLock;
23
24/// All canonical (PascalCase) built-in tool names.
25const CANONICAL: &[&str] = &[
26    "ActivateSkill",
27    "AskUser",
28    "Bash",
29    "Delete",
30    "Edit",
31    "Glob",
32    "Grep",
33    "InvokeAgent",
34    "List",
35    "ListAgents",
36    "ListSkills",
37    "MemoryRead",
38    "MemoryWrite",
39    "Read",
40    "RecallContext",
41    "TodoWrite",
42    "WebFetch",
43    "WebSearch",
44    "Write",
45];
46
47/// Static alias map: lowercased variant → canonical name.
48///
49/// Built once on first access.  Includes:
50/// - self-mappings for every canonical name (lowercased key → itself)
51/// - unambiguous snake_case alternatives   (`"list_files"` → `"List"`)
52///
53/// **Only unambiguous aliases are included.** If a name could plausibly
54/// map to more than one tool (e.g. `"search"` → Grep or Glob?), it is
55/// intentionally omitted — surfacing an `Unknown tool` error is better
56/// than silently misrouting to the wrong tool.
57static ALIASES: LazyLock<HashMap<String, &'static str>> = LazyLock::new(|| {
58    let mut m = HashMap::new();
59
60    // Self-mappings: canonical names (lowercased) → themselves.
61    // This lets normalize_tool_name() do a single O(1) lookup for
62    // every path, including the fast-path where the name is already
63    // canonical.
64    for &name in CANONICAL {
65        m.insert(name.to_lowercase(), name);
66    }
67
68    // ── Unambiguous snake_case / camelCase aliases ───────────────
69    //
70    // Only include aliases where the mapping is unambiguous.
71    // If a short name could plausibly mean multiple tools, leave it
72    // out — an "Unknown tool" error is better than silent misrouting.
73
74    // AskUser
75    m.insert("ask_user".into(), "AskUser");
76    m.insert("ask_question".into(), "AskUser");
77    m.insert("askquestion".into(), "AskUser");
78
79    // File tools
80    m.insert("list_files".into(), "List");
81    m.insert("listfiles".into(), "List");
82    m.insert("list_directory".into(), "List");
83    m.insert("ls".into(), "List");
84
85    m.insert("read_file".into(), "Read");
86    m.insert("readfile".into(), "Read");
87    m.insert("file_read".into(), "Read");
88
89    m.insert("write_file".into(), "Write");
90    m.insert("writefile".into(), "Write");
91    m.insert("create_file".into(), "Write");
92    m.insert("file_write".into(), "Write");
93
94    m.insert("edit_file".into(), "Edit");
95    m.insert("editfile".into(), "Edit");
96    m.insert("file_edit".into(), "Edit");
97
98    m.insert("delete_file".into(), "Delete");
99    m.insert("deletefile".into(), "Delete");
100    m.insert("remove_file".into(), "Delete");
101    m.insert("rm".into(), "Delete");
102
103    // Search tools
104    m.insert("grep_search".into(), "Grep");
105    m.insert("ripgrep".into(), "Grep");
106    m.insert("rg".into(), "Grep");
107
108    m.insert("glob_search".into(), "Glob");
109    m.insert("glob_pattern".into(), "Glob");
110
111    // Shell — only unambiguous aliases
112    m.insert("shell".into(), "Bash");
113    m.insert("run_command".into(), "Bash");
114    m.insert("run_shell_command".into(), "Bash");
115
116    m.insert("todo_write".into(), "TodoWrite");
117    m.insert("update_todos".into(), "TodoWrite");
118    m.insert("todo".into(), "TodoWrite");
119    // Web
120    m.insert("web_fetch".into(), "WebFetch");
121    m.insert("http_get".into(), "WebFetch");
122    m.insert("curl".into(), "WebFetch");
123    m.insert("web_search".into(), "WebSearch");
124    m.insert("search_web".into(), "WebSearch");
125
126    // Memory
127    m.insert("memory_read".into(), "MemoryRead");
128    m.insert("memory_write".into(), "MemoryWrite");
129
130    // Agent tools
131    m.insert("list_agents".into(), "ListAgents");
132    m.insert("invoke_agent".into(), "InvokeAgent");
133
134    // Skill tools
135    m.insert("list_skills".into(), "ListSkills");
136    m.insert("activate_skill".into(), "ActivateSkill");
137
138    // Recall
139    m.insert("recall_context".into(), "RecallContext");
140    m.insert("recall".into(), "RecallContext");
141
142    m
143});
144
145/// Normalize a single tool name to its canonical PascalCase form.
146///
147/// Returns the canonical name if a mapping exists, otherwise returns
148/// the input unchanged (so the dispatcher can surface a proper error).
149///
150/// ```
151/// use koda_core::tool_normalize::normalize_tool_name;
152///
153/// assert_eq!(normalize_tool_name("list_files"), "List");
154/// assert_eq!(normalize_tool_name("Read"), "Read");
155/// assert_eq!(normalize_tool_name("run_command"), "Bash");
156/// assert_eq!(normalize_tool_name("unknown_tool"), "unknown_tool");
157/// ```
158pub fn normalize_tool_name(name: &str) -> String {
159    // Single O(1) lookup: lowercase the input and check the alias map.
160    // Canonical names are self-mapped (e.g. "list" → "List"), so this
161    // handles both the fast-path and the alias-path in one operation.
162    let lower = name.to_lowercase();
163    if let Some(&canonical) = ALIASES.get(&lower) {
164        return canonical.to_string();
165    }
166
167    // Unknown — pass through for the dispatcher to handle
168    name.to_string()
169}
170
171/// Normalize all tool calls in a batch.
172///
173/// Maps model-emitted names to canonical PascalCase. No deduplication,
174/// no per-turn cap — frontier models legitimately emit 30+ parallel
175/// calls (e.g. reading many files at once). If a model emits duplicate
176/// calls, the user should see that and switch models, not have us
177/// silently paper over it. Loops are caught by the consecutive-call
178/// detector in `loop_guard.rs`.
179pub fn normalize_tool_calls(mut tool_calls: Vec<ToolCall>) -> Vec<ToolCall> {
180    for tc in &mut tool_calls {
181        tc.function_name = normalize_tool_name(&tc.function_name);
182    }
183    tool_calls
184}
185
186#[cfg(test)]
187mod tests {
188    use super::*;
189
190    // ── Canonical names pass through unchanged ──────────────────
191
192    #[test]
193    fn canonical_names_unchanged() {
194        for &name in CANONICAL {
195            assert_eq!(normalize_tool_name(name), name);
196        }
197    }
198
199    // ── Lowercase variants ──────────────────────────────────────
200
201    #[test]
202    fn lowercase_variants() {
203        assert_eq!(normalize_tool_name("list"), "List");
204        assert_eq!(normalize_tool_name("read"), "Read");
205        assert_eq!(normalize_tool_name("write"), "Write");
206        assert_eq!(normalize_tool_name("edit"), "Edit");
207        assert_eq!(normalize_tool_name("delete"), "Delete");
208        assert_eq!(normalize_tool_name("bash"), "Bash");
209        assert_eq!(normalize_tool_name("grep"), "Grep");
210        assert_eq!(normalize_tool_name("glob"), "Glob");
211        assert_eq!(normalize_tool_name("webfetch"), "WebFetch");
212    }
213
214    // ── Snake_case variants ─────────────────────────────────────
215
216    #[test]
217    fn snake_case_variants() {
218        assert_eq!(normalize_tool_name("list_files"), "List");
219        assert_eq!(normalize_tool_name("read_file"), "Read");
220        assert_eq!(normalize_tool_name("write_file"), "Write");
221        assert_eq!(normalize_tool_name("edit_file"), "Edit");
222        assert_eq!(normalize_tool_name("delete_file"), "Delete");
223        assert_eq!(normalize_tool_name("run_shell_command"), "Bash");
224        assert_eq!(normalize_tool_name("grep_search"), "Grep");
225        assert_eq!(normalize_tool_name("glob_search"), "Glob");
226        assert_eq!(normalize_tool_name("web_fetch"), "WebFetch");
227        assert_eq!(normalize_tool_name("list_agents"), "ListAgents");
228        assert_eq!(normalize_tool_name("invoke_agent"), "InvokeAgent");
229        assert_eq!(normalize_tool_name("list_skills"), "ListSkills");
230        assert_eq!(normalize_tool_name("activate_skill"), "ActivateSkill");
231        assert_eq!(normalize_tool_name("memory_read"), "MemoryRead");
232        assert_eq!(normalize_tool_name("memory_write"), "MemoryWrite");
233        assert_eq!(normalize_tool_name("recall_context"), "RecallContext");
234    }
235
236    // ── Short aliases (model hallucinations) ────────────────────
237
238    #[test]
239    fn short_aliases() {
240        assert_eq!(normalize_tool_name("ls"), "List");
241        assert_eq!(normalize_tool_name("rm"), "Delete");
242        assert_eq!(normalize_tool_name("rg"), "Grep");
243        assert_eq!(normalize_tool_name("shell"), "Bash");
244        assert_eq!(normalize_tool_name("curl"), "WebFetch");
245        assert_eq!(normalize_tool_name("recall"), "RecallContext");
246    }
247
248    // ── Ambiguous names are NOT mapped (silent misrouting prevention) ──
249
250    #[test]
251    fn ambiguous_names_not_mapped() {
252        // These could plausibly map to multiple tools.
253        // Better to surface "Unknown tool" than silently misroute.
254        for name in [
255            "search",
256            "execute",
257            "exec",
258            "patch",
259            "terminal",
260            "find_files",
261            "fetch",
262        ] {
263            let result = normalize_tool_name(name);
264            assert_eq!(
265                result, name,
266                "'{name}' should NOT be mapped — it's ambiguous"
267            );
268        }
269    }
270
271    // ── Case insensitivity ──────────────────────────────────────
272
273    #[test]
274    fn mixed_case_normalized() {
275        assert_eq!(normalize_tool_name("LIST"), "List");
276        assert_eq!(normalize_tool_name("List"), "List");
277        assert_eq!(normalize_tool_name("lIsT"), "List");
278        assert_eq!(normalize_tool_name("READ"), "Read");
279        assert_eq!(normalize_tool_name("BASH"), "Bash");
280        assert_eq!(normalize_tool_name("LIST_FILES"), "List");
281        assert_eq!(normalize_tool_name("Read_File"), "Read");
282    }
283
284    // ── Unknown names pass through ──────────────────────────────
285
286    #[test]
287    fn unknown_names_pass_through() {
288        assert_eq!(normalize_tool_name("FooBar"), "FooBar");
289        assert_eq!(normalize_tool_name("totally_unknown"), "totally_unknown");
290        assert_eq!(normalize_tool_name(""), "");
291    }
292
293    // ── Batch normalization ─────────────────────────────────────
294
295    #[test]
296    fn normalize_batch() {
297        let calls = vec![
298            ToolCall {
299                id: "1".into(),
300                function_name: "list".into(),
301                arguments: "{}".into(),
302                thought_signature: None,
303            },
304            ToolCall {
305                id: "2".into(),
306                function_name: "read_file".into(),
307                arguments: r#"{"path":"x"}"#.into(),
308                thought_signature: None,
309            },
310            ToolCall {
311                id: "3".into(),
312                function_name: "Read".into(),
313                arguments: r#"{"path":"y"}"#.into(),
314                thought_signature: None,
315            },
316        ];
317        let normalized = normalize_tool_calls(calls);
318        assert_eq!(normalized[0].function_name, "List");
319        assert_eq!(normalized[1].function_name, "Read");
320        assert_eq!(normalized[2].function_name, "Read");
321        assert_eq!(normalized.len(), 3); // no dedup
322    }
323
324    // ── Every canonical name has a lowercase alias ──────────────
325
326    #[test]
327    fn all_canonical_names_have_lowercase_alias() {
328        for &name in CANONICAL {
329            let lower = name.to_lowercase();
330            assert_eq!(
331                normalize_tool_name(&lower),
332                name,
333                "Missing lowercase alias for '{name}'"
334            );
335        }
336    }
337
338    // ── Every alias target must be a canonical tool name ────────
339
340    #[test]
341    fn all_alias_targets_are_canonical() {
342        let canonical_set: std::collections::HashSet<&str> = CANONICAL.iter().copied().collect();
343        for (alias, &target) in ALIASES.iter() {
344            assert!(
345                canonical_set.contains(target),
346                "Alias '{alias}' maps to '{target}' which is not in CANONICAL"
347            );
348        }
349    }
350}