Skip to main content

lean_ctx/proxy/
tool_kind.rs

1//! Classifies what produced a `tool_result` so the proxy never lossy-compresses
2//! a file/source-code read the model still needs (e.g. mid-refactor).
3//!
4//! The request body only carries the tool *result* plus an id linking it to the
5//! originating tool *call*. We resolve that id → tool name from the assistant's
6//! `tool_use` / `tool_calls` / `function_call` items, then map the name to a
7//! [`ToolResultKind`]. A content heuristic ([`looks_like_source_code`]) is the
8//! fallback for unknown/custom tools so a file read through a non-standard tool
9//! is still protected.
10
11use std::collections::HashMap;
12
13use serde_json::Value;
14
15/// What kind of tool produced a `tool_result`.
16#[derive(Debug, Clone, Copy, PartialEq, Eq)]
17pub enum ToolResultKind {
18    /// A file/source read — must reach the model intact (it is what gets edited).
19    FileRead,
20    /// Shell/command output — safe to run through the pattern compressors.
21    Shell,
22    /// Search/listing output — safe to compress.
23    Search,
24    /// Unknown — fall back to the content heuristic before compressing.
25    Other,
26}
27
28/// Maps a tool name (from any agent) to a [`ToolResultKind`].
29///
30/// Matching is case-insensitive and substring-based so vendor prefixes
31/// (`mcp__fs__read_file`, `functions.read`) and casing variants are covered.
32pub fn classify_tool_name(name: &str) -> ToolResultKind {
33    let n = name.to_ascii_lowercase();
34
35    // Order matters: a "read_file" must not be caught by a generic "file".
36    const FILE_READ: &[&str] = &[
37        "read_file",
38        "readfile",
39        "file_read",
40        "fsread",
41        "fs_read",
42        "view_file",
43        "viewfile",
44        "open_file",
45        "notebookread",
46        "notebook_read",
47        "cat_file",
48        "get_file",
49        "fetch_file",
50        "ctx_read",
51        "ctx_multi_read",
52        "multi_read",
53        "multiread",
54        "read_many", // Gemini CLI `read_many_files`
55        "read_files",
56        "str_replace_editor", // view sub-mode returns file content
57    ];
58    if FILE_READ.iter().any(|k| n.contains(k)) {
59        return ToolResultKind::FileRead;
60    }
61    // Bare "read"/"view"/"cat" as a whole token (Claude Code `Read`, Pi `read`).
62    if matches!(n.as_str(), "read" | "view" | "cat" | "open") {
63        return ToolResultKind::FileRead;
64    }
65
66    const SEARCH: &[&str] = &[
67        "grep",
68        "ripgrep",
69        "search",
70        "find",
71        "glob",
72        "list_dir",
73        "listdir",
74        "list_files",
75        "listfiles",
76        "ls",
77        "codebase_search",
78        "ctx_search",
79        "ctx_tree",
80    ];
81    if SEARCH.iter().any(|k| n.contains(k)) {
82        return ToolResultKind::Search;
83    }
84
85    const SHELL: &[&str] = &[
86        "bash",
87        "shell",
88        "terminal",
89        "run_command",
90        "run_terminal",
91        "runterminal",
92        "execute_command",
93        "exec_command",
94        "command_exec",
95        "ctx_shell",
96    ];
97    if SHELL.iter().any(|k| n.contains(k)) {
98        return ToolResultKind::Shell;
99    }
100    if matches!(n.as_str(), "run" | "exec" | "execute" | "command" | "sh") {
101        return ToolResultKind::Shell;
102    }
103
104    ToolResultKind::Other
105}
106
107/// Builds a `tool_use_id → tool_name` map from Anthropic `messages`.
108///
109/// Scans every assistant content block of `type:"tool_use"`.
110pub fn anthropic_tool_names(messages: &[Value]) -> HashMap<String, String> {
111    let mut map = HashMap::new();
112    for msg in messages {
113        let Some(blocks) = msg.get("content").and_then(|c| c.as_array()) else {
114            continue;
115        };
116        for block in blocks {
117            if block.get("type").and_then(|t| t.as_str()) != Some("tool_use") {
118                continue;
119            }
120            if let (Some(id), Some(name)) = (
121                block.get("id").and_then(|v| v.as_str()),
122                block.get("name").and_then(|v| v.as_str()),
123            ) {
124                map.insert(id.to_string(), name.to_string());
125            }
126        }
127    }
128    map
129}
130
131/// Builds a `tool_call_id → function_name` map from OpenAI Chat Completions
132/// `messages` (assistant `tool_calls[]`).
133pub fn openai_tool_names(messages: &[Value]) -> HashMap<String, String> {
134    let mut map = HashMap::new();
135    for msg in messages {
136        let Some(calls) = msg.get("tool_calls").and_then(|c| c.as_array()) else {
137            continue;
138        };
139        for call in calls {
140            let id = call.get("id").and_then(|v| v.as_str());
141            let name = call
142                .get("function")
143                .and_then(|f| f.get("name"))
144                .and_then(|v| v.as_str());
145            if let (Some(id), Some(name)) = (id, name) {
146                map.insert(id.to_string(), name.to_string());
147            }
148        }
149    }
150    map
151}
152
153/// Builds a `call_id → name` map from OpenAI Responses `input` items
154/// (`type:"function_call"`).
155pub fn responses_tool_names(input: &[Value]) -> HashMap<String, String> {
156    let mut map = HashMap::new();
157    for item in input {
158        if item.get("type").and_then(|t| t.as_str()) != Some("function_call") {
159            continue;
160        }
161        if let (Some(id), Some(name)) = (
162            item.get("call_id").and_then(|v| v.as_str()),
163            item.get("name").and_then(|v| v.as_str()),
164        ) {
165            map.insert(id.to_string(), name.to_string());
166        }
167    }
168    map
169}
170
171/// Whether a `tool_result` with the given resolved kind and content must be
172/// preserved intact (never lossy-compressed) by the proxy.
173///
174/// File reads are always protected; unknown tools are protected only when the
175/// content heuristically looks like source code. Shell/search output is never
176/// protected here — it flows through the normal pattern compressors.
177pub fn should_protect(kind: ToolResultKind, content: &str) -> bool {
178    match kind {
179        ToolResultKind::FileRead => true,
180        ToolResultKind::Other => looks_like_source_code(content),
181        ToolResultKind::Shell | ToolResultKind::Search => false,
182    }
183}
184
185/// Heuristic fallback: does this text look like source code (vs command output)?
186///
187/// Deliberately conservative — it only returns `true` when code signals clearly
188/// dominate and shell/log signals are essentially absent, so genuine logs and
189/// build output are still compressed. Used only when the tool name is unknown.
190pub fn looks_like_source_code(content: &str) -> bool {
191    let mut code_signals = 0usize;
192    let mut shell_signals = 0usize;
193    let mut considered = 0usize;
194
195    for raw in content.lines().take(200) {
196        let line = raw.trim_end();
197        let trimmed = line.trim_start();
198        if trimmed.is_empty() {
199            continue;
200        }
201        considered += 1;
202
203        // Command/log markers — strong evidence this is NOT a file read.
204        if trimmed.starts_with("$ ")
205            || trimmed.starts_with("% ")
206            || trimmed.starts_with(">>> ")
207            || trimmed.starts_with("warning:")
208            || trimmed.starts_with("error:")
209            || trimmed.starts_with("error[")
210            || trimmed.starts_with("INFO ")
211            || trimmed.starts_with("WARN ")
212            || trimmed.starts_with("DEBUG ")
213            || trimmed.starts_with("ERROR ")
214            || trimmed.starts_with("Compiling ")
215            || trimmed.starts_with("Downloaded ")
216            || trimmed.starts_with("test result:")
217        {
218            shell_signals += 1;
219            continue;
220        }
221
222        // Code markers.
223        let is_indented = line.len() != trimmed.len();
224        let has_code_punct = trimmed.ends_with('{')
225            || trimmed.ends_with('}')
226            || trimmed.ends_with(';')
227            || trimmed.ends_with("=>")
228            || trimmed.ends_with("->")
229            || trimmed.ends_with(':');
230        let has_keyword = [
231            "fn ",
232            "def ",
233            "class ",
234            "import ",
235            "from ",
236            "function ",
237            "func ",
238            "pub ",
239            "const ",
240            "let ",
241            "var ",
242            "package ",
243            "public ",
244            "private ",
245            "struct ",
246            "enum ",
247            "impl ",
248            "#include",
249            "return ",
250            "async ",
251            "export ",
252        ]
253        .iter()
254        .any(|k| trimmed.starts_with(k) || trimmed.contains(k));
255
256        if (is_indented && has_code_punct) || has_keyword {
257            code_signals += 1;
258        }
259    }
260
261    if considered < 5 || shell_signals > 0 {
262        return false;
263    }
264    // Require a clear majority of code-shaped lines.
265    code_signals * 2 >= considered
266}
267
268#[cfg(test)]
269mod tests {
270    use super::*;
271
272    #[test]
273    fn classifies_file_read_tools() {
274        for name in [
275            "Read",
276            "read_file",
277            "view_file",
278            "ctx_read",
279            "mcp__fs__readFile",
280            // Multi-file reads return file content and must be protected too.
281            "ctx_multi_read",
282            "read_many_files",
283        ] {
284            assert_eq!(
285                classify_tool_name(name),
286                ToolResultKind::FileRead,
287                "{name} should be FileRead"
288            );
289        }
290    }
291
292    #[test]
293    fn classifies_shell_and_search() {
294        assert_eq!(classify_tool_name("Bash"), ToolResultKind::Shell);
295        assert_eq!(
296            classify_tool_name("run_terminal_cmd"),
297            ToolResultKind::Shell
298        );
299        assert_eq!(classify_tool_name("Grep"), ToolResultKind::Search);
300        assert_eq!(
301            classify_tool_name("codebase_search"),
302            ToolResultKind::Search
303        );
304    }
305
306    #[test]
307    fn unknown_tool_is_other() {
308        assert_eq!(classify_tool_name("submit_pr"), ToolResultKind::Other);
309    }
310
311    #[test]
312    fn anthropic_names_resolve_from_tool_use() {
313        let messages = vec![
314            serde_json::json!({
315                "role": "assistant",
316                "content": [
317                    {"type": "text", "text": "reading"},
318                    {"type": "tool_use", "id": "toolu_1", "name": "Read", "input": {}}
319                ]
320            }),
321            serde_json::json!({
322                "role": "user",
323                "content": [{"type": "tool_result", "tool_use_id": "toolu_1", "content": "x"}]
324            }),
325        ];
326        let names = anthropic_tool_names(&messages);
327        assert_eq!(names.get("toolu_1").map(String::as_str), Some("Read"));
328    }
329
330    #[test]
331    fn openai_names_resolve_from_tool_calls() {
332        let messages = vec![serde_json::json!({
333            "role": "assistant",
334            "tool_calls": [{"id": "call_1", "type": "function", "function": {"name": "read_file"}}]
335        })];
336        let names = openai_tool_names(&messages);
337        assert_eq!(names.get("call_1").map(String::as_str), Some("read_file"));
338    }
339
340    #[test]
341    fn responses_names_resolve_from_function_call() {
342        let input = vec![serde_json::json!({
343            "type": "function_call", "call_id": "call_1", "name": "Read", "arguments": "{}"
344        })];
345        let names = responses_tool_names(&input);
346        assert_eq!(names.get("call_1").map(String::as_str), Some("Read"));
347    }
348
349    #[test]
350    fn source_code_detected() {
351        let code = "pub fn build(cfg: &Config) -> Result<App> {\n    let mut app = App::new();\n    app.configure(cfg);\n    for route in cfg.routes() {\n        app.register(route);\n    }\n    Ok(app)\n}";
352        assert!(looks_like_source_code(code));
353    }
354
355    #[test]
356    fn command_output_not_code() {
357        let log = "$ cargo build\n   Compiling foo v0.1.0\n   Compiling bar v0.2.0\nwarning: unused variable\n    Finished dev target\nerror: could not compile";
358        assert!(!looks_like_source_code(log));
359    }
360
361    #[test]
362    fn plain_prose_not_code() {
363        let prose = "This is a normal paragraph of text.\nIt has several sentences.\nNone of them are code.\nThey are just words on lines.\nMore words follow here.";
364        assert!(!looks_like_source_code(prose));
365    }
366}