Skip to main content

koda_core/
tool_normalize.rs

1//! Tool name normalization — maps model-emitted variants to canonical PascalCase.
2//!
3//! Models sometimes emit tool names in lowercase (`list`, `read`) or
4//! snake_case (`list_files`, `read_file`) instead of the canonical PascalCase
5//! (`List`, `Read`). This module provides a single normalization point at the
6//! API boundary so all downstream code (dispatch, approval, loop guard, undo)
7//! sees canonical names.
8//!
9//! ## Design
10//!
11//! - Normalization is applied *once*, in `inference.rs`, after collecting
12//!   the streamed response — before dispatch, approval, or persistence.
13//! - Unknown names pass through unchanged so the dispatcher can surface
14//!   a clear `Unknown tool` error.
15//! - The alias map covers lowercase, snake_case, and camelCase variants.
16//!
17//! See: <https://github.com/lijunzh/koda/issues/548>
18//!      <https://github.com/lijunzh/koda/issues/49>
19
20use crate::providers::ToolCall;
21use std::collections::HashMap;
22use std::sync::LazyLock;
23
24/// All canonical (PascalCase) built-in tool names.
25const CANONICAL: &[&str] = &[
26    "ActivateSkill",
27    "AskUser",
28    "Bash",
29    "CancelTask",
30    "Delete",
31    "Edit",
32    "Glob",
33    "Grep",
34    "InvokeAgent",
35    "List",
36    "ListAgents",
37    "ListBackgroundTasks",
38    "ListSkills",
39    "MemoryRead",
40    "MemoryWrite",
41    "Read",
42    "RecallContext",
43    "TodoWrite",
44    "WaitTask",
45    "WebFetch",
46    "WebSearch",
47    "Write",
48];
49
50/// Static alias map: lowercased variant → canonical name.
51///
52/// Built once on first access.  Includes:
53/// - self-mappings for every canonical name (lowercased key → itself)
54/// - unambiguous snake_case alternatives   (`"list_files"` → `"List"`)
55///
56/// **Only unambiguous aliases are included.** If a name could plausibly
57/// map to more than one tool (e.g. `"search"` → Grep or Glob?), it is
58/// intentionally omitted — surfacing an `Unknown tool` error is better
59/// than silently misrouting to the wrong tool.
60static ALIASES: LazyLock<HashMap<String, &'static str>> = LazyLock::new(|| {
61    let mut m = HashMap::new();
62
63    // Self-mappings: canonical names (lowercased) → themselves.
64    // This lets normalize_tool_name() do a single O(1) lookup for
65    // every path, including the fast-path where the name is already
66    // canonical.
67    for &name in CANONICAL {
68        m.insert(name.to_lowercase(), name);
69    }
70
71    // ── Unambiguous snake_case / camelCase aliases ───────────────
72    //
73    // Only include aliases where the mapping is unambiguous.
74    // If a short name could plausibly mean multiple tools, leave it
75    // out — an "Unknown tool" error is better than silent misrouting.
76
77    // AskUser
78    m.insert("ask_user".into(), "AskUser");
79    m.insert("ask_question".into(), "AskUser");
80    m.insert("askquestion".into(), "AskUser");
81
82    // File tools
83    m.insert("list_files".into(), "List");
84    m.insert("listfiles".into(), "List");
85    m.insert("list_directory".into(), "List");
86    m.insert("ls".into(), "List");
87
88    m.insert("read_file".into(), "Read");
89    m.insert("readfile".into(), "Read");
90    m.insert("file_read".into(), "Read");
91
92    m.insert("write_file".into(), "Write");
93    m.insert("writefile".into(), "Write");
94    m.insert("create_file".into(), "Write");
95    m.insert("file_write".into(), "Write");
96
97    m.insert("edit_file".into(), "Edit");
98    m.insert("editfile".into(), "Edit");
99    m.insert("file_edit".into(), "Edit");
100
101    m.insert("delete_file".into(), "Delete");
102    m.insert("deletefile".into(), "Delete");
103    m.insert("remove_file".into(), "Delete");
104    m.insert("rm".into(), "Delete");
105
106    // Search tools
107    m.insert("grep_search".into(), "Grep");
108    m.insert("ripgrep".into(), "Grep");
109    m.insert("rg".into(), "Grep");
110
111    m.insert("glob_search".into(), "Glob");
112    m.insert("glob_pattern".into(), "Glob");
113
114    // Shell — only unambiguous aliases
115    m.insert("shell".into(), "Bash");
116    m.insert("run_command".into(), "Bash");
117    m.insert("run_shell_command".into(), "Bash");
118
119    m.insert("todo_write".into(), "TodoWrite");
120    m.insert("update_todos".into(), "TodoWrite");
121    m.insert("todo".into(), "TodoWrite");
122    // Web
123    m.insert("web_fetch".into(), "WebFetch");
124    m.insert("http_get".into(), "WebFetch");
125    m.insert("curl".into(), "WebFetch");
126    m.insert("web_search".into(), "WebSearch");
127    m.insert("search_web".into(), "WebSearch");
128
129    // Memory
130    m.insert("memory_read".into(), "MemoryRead");
131    m.insert("memory_write".into(), "MemoryWrite");
132
133    // Agent tools
134    m.insert("list_agents".into(), "ListAgents");
135    m.insert("invoke_agent".into(), "InvokeAgent");
136
137    // Skill tools
138    m.insert("list_skills".into(), "ListSkills");
139    m.insert("activate_skill".into(), "ActivateSkill");
140
141    // Recall
142    m.insert("recall_context".into(), "RecallContext");
143    m.insert("recall".into(), "RecallContext");
144
145    // #996 Layer 2 — background-task management
146    m.insert("list_background_tasks".into(), "ListBackgroundTasks");
147    m.insert("list_bg_tasks".into(), "ListBackgroundTasks");
148    m.insert("cancel_task".into(), "CancelTask");
149    m.insert("wait_task".into(), "WaitTask");
150    m.insert("wait_for_task".into(), "WaitTask");
151
152    m
153});
154
155/// Normalize a single tool name to its canonical PascalCase form.
156///
157/// Returns the canonical name if a mapping exists, otherwise returns
158/// the input unchanged (so the dispatcher can surface a proper error).
159///
160/// ```
161/// use koda_core::tool_normalize::normalize_tool_name;
162///
163/// assert_eq!(normalize_tool_name("list_files"), "List");
164/// assert_eq!(normalize_tool_name("Read"), "Read");
165/// assert_eq!(normalize_tool_name("run_command"), "Bash");
166/// assert_eq!(normalize_tool_name("unknown_tool"), "unknown_tool");
167/// ```
168pub fn normalize_tool_name(name: &str) -> String {
169    // Single O(1) lookup: lowercase the input and check the alias map.
170    // Canonical names are self-mapped (e.g. "list" → "List"), so this
171    // handles both the fast-path and the alias-path in one operation.
172    let lower = name.to_lowercase();
173    if let Some(&canonical) = ALIASES.get(&lower) {
174        return canonical.to_string();
175    }
176
177    // Unknown — pass through for the dispatcher to handle
178    name.to_string()
179}
180
181/// Normalize all tool calls in a batch.
182///
183/// Maps model-emitted names to canonical PascalCase. No deduplication,
184/// no per-turn cap — frontier models legitimately emit 30+ parallel
185/// calls (e.g. reading many files at once). If a model emits duplicate
186/// calls, the user should see that and switch models, not have us
187/// silently paper over it. Loops are caught by the consecutive-call
188/// detector in `loop_guard.rs`.
189pub fn normalize_tool_calls(mut tool_calls: Vec<ToolCall>) -> Vec<ToolCall> {
190    for tc in &mut tool_calls {
191        tc.function_name = normalize_tool_name(&tc.function_name);
192    }
193    tool_calls
194}
195
196#[cfg(test)]
197mod tests {
198    use super::*;
199
200    // ── Canonical names pass through unchanged ──────────────────
201
202    #[test]
203    fn canonical_names_unchanged() {
204        for &name in CANONICAL {
205            assert_eq!(normalize_tool_name(name), name);
206        }
207    }
208
209    // ── Lowercase variants ──────────────────────────────────────
210
211    #[test]
212    fn lowercase_variants() {
213        assert_eq!(normalize_tool_name("list"), "List");
214        assert_eq!(normalize_tool_name("read"), "Read");
215        assert_eq!(normalize_tool_name("write"), "Write");
216        assert_eq!(normalize_tool_name("edit"), "Edit");
217        assert_eq!(normalize_tool_name("delete"), "Delete");
218        assert_eq!(normalize_tool_name("bash"), "Bash");
219        assert_eq!(normalize_tool_name("grep"), "Grep");
220        assert_eq!(normalize_tool_name("glob"), "Glob");
221        assert_eq!(normalize_tool_name("webfetch"), "WebFetch");
222    }
223
224    // ── Snake_case variants ─────────────────────────────────────
225
226    #[test]
227    fn snake_case_variants() {
228        assert_eq!(normalize_tool_name("list_files"), "List");
229        assert_eq!(normalize_tool_name("read_file"), "Read");
230        assert_eq!(normalize_tool_name("write_file"), "Write");
231        assert_eq!(normalize_tool_name("edit_file"), "Edit");
232        assert_eq!(normalize_tool_name("delete_file"), "Delete");
233        assert_eq!(normalize_tool_name("run_shell_command"), "Bash");
234        assert_eq!(normalize_tool_name("grep_search"), "Grep");
235        assert_eq!(normalize_tool_name("glob_search"), "Glob");
236        assert_eq!(normalize_tool_name("web_fetch"), "WebFetch");
237        assert_eq!(normalize_tool_name("list_agents"), "ListAgents");
238        assert_eq!(normalize_tool_name("invoke_agent"), "InvokeAgent");
239        assert_eq!(normalize_tool_name("list_skills"), "ListSkills");
240        assert_eq!(normalize_tool_name("activate_skill"), "ActivateSkill");
241        assert_eq!(normalize_tool_name("memory_read"), "MemoryRead");
242        assert_eq!(normalize_tool_name("memory_write"), "MemoryWrite");
243        assert_eq!(normalize_tool_name("recall_context"), "RecallContext");
244    }
245
246    // ── Short aliases (model hallucinations) ────────────────────
247
248    #[test]
249    fn short_aliases() {
250        assert_eq!(normalize_tool_name("ls"), "List");
251        assert_eq!(normalize_tool_name("rm"), "Delete");
252        assert_eq!(normalize_tool_name("rg"), "Grep");
253        assert_eq!(normalize_tool_name("shell"), "Bash");
254        assert_eq!(normalize_tool_name("curl"), "WebFetch");
255        assert_eq!(normalize_tool_name("recall"), "RecallContext");
256    }
257
258    // ── Ambiguous names are NOT mapped (silent misrouting prevention) ──
259
260    #[test]
261    fn ambiguous_names_not_mapped() {
262        // These could plausibly map to multiple tools.
263        // Better to surface "Unknown tool" than silently misroute.
264        for name in [
265            "search",
266            "execute",
267            "exec",
268            "patch",
269            "terminal",
270            "find_files",
271            "fetch",
272        ] {
273            let result = normalize_tool_name(name);
274            assert_eq!(
275                result, name,
276                "'{name}' should NOT be mapped — it's ambiguous"
277            );
278        }
279    }
280
281    // ── Case insensitivity ──────────────────────────────────────
282
283    #[test]
284    fn mixed_case_normalized() {
285        assert_eq!(normalize_tool_name("LIST"), "List");
286        assert_eq!(normalize_tool_name("List"), "List");
287        assert_eq!(normalize_tool_name("lIsT"), "List");
288        assert_eq!(normalize_tool_name("READ"), "Read");
289        assert_eq!(normalize_tool_name("BASH"), "Bash");
290        assert_eq!(normalize_tool_name("LIST_FILES"), "List");
291        assert_eq!(normalize_tool_name("Read_File"), "Read");
292    }
293
294    // ── Unknown names pass through ──────────────────────────────
295
296    #[test]
297    fn unknown_names_pass_through() {
298        assert_eq!(normalize_tool_name("FooBar"), "FooBar");
299        assert_eq!(normalize_tool_name("totally_unknown"), "totally_unknown");
300        assert_eq!(normalize_tool_name(""), "");
301    }
302
303    // ── Batch normalization ─────────────────────────────────────
304
305    #[test]
306    fn normalize_batch() {
307        let calls = vec![
308            ToolCall {
309                id: "1".into(),
310                function_name: "list".into(),
311                arguments: "{}".into(),
312                thought_signature: None,
313            },
314            ToolCall {
315                id: "2".into(),
316                function_name: "read_file".into(),
317                arguments: r#"{"path":"x"}"#.into(),
318                thought_signature: None,
319            },
320            ToolCall {
321                id: "3".into(),
322                function_name: "Read".into(),
323                arguments: r#"{"path":"y"}"#.into(),
324                thought_signature: None,
325            },
326        ];
327        let normalized = normalize_tool_calls(calls);
328        assert_eq!(normalized[0].function_name, "List");
329        assert_eq!(normalized[1].function_name, "Read");
330        assert_eq!(normalized[2].function_name, "Read");
331        assert_eq!(normalized.len(), 3); // no dedup
332    }
333
334    // ── Every canonical name has a lowercase alias ──────────────
335
336    #[test]
337    fn all_canonical_names_have_lowercase_alias() {
338        for &name in CANONICAL {
339            let lower = name.to_lowercase();
340            assert_eq!(
341                normalize_tool_name(&lower),
342                name,
343                "Missing lowercase alias for '{name}'"
344            );
345        }
346    }
347
348    // ── Every alias target must be a canonical tool name ────────
349
350    #[test]
351    fn all_alias_targets_are_canonical() {
352        let canonical_set: std::collections::HashSet<&str> = CANONICAL.iter().copied().collect();
353        for (alias, &target) in ALIASES.iter() {
354            assert!(
355                canonical_set.contains(target),
356                "Alias '{alias}' maps to '{target}' which is not in CANONICAL"
357            );
358        }
359    }
360}