Skip to main content

codetether_agent/rlm/
tools.rs

1//! RLM REPL operations expressed as tool definitions.
2//!
3//! When FunctionGemma is active the RLM loop sends these definitions alongside
4//! the analysis prompt.  The primary LLM (or FunctionGemma after reformatting)
5//! returns structured `ContentPart::ToolCall` entries that are dispatched here
6//! instead of being regex-parsed from code blocks.
7//!
8//! Each tool mirrors a command in the existing DSL REPL (`head`, `tail`,
9//! `grep`, `count`, `llm_query`, `FINAL`).
10
11use crate::provider::ToolDefinition;
12
13const MAX_TOOL_OUTPUT_LINES: usize = 1200;
14const MAX_TOOL_OUTPUT_CHARS: usize = 120_000;
15
16fn truncate_chars(input: &str, max_chars: usize) -> String {
17    if input.chars().count() <= max_chars {
18        return input.to_string();
19    }
20    input.chars().take(max_chars).collect()
21}
22
23fn clamp_tool_output(output: String, tool_name: &str) -> String {
24    let original_lines = output.lines().count();
25    let original_chars = output.chars().count();
26    let mut clamped = output;
27    let mut trimmed = false;
28
29    if original_lines > MAX_TOOL_OUTPUT_LINES {
30        clamped = clamped
31            .lines()
32            .take(MAX_TOOL_OUTPUT_LINES)
33            .collect::<Vec<_>>()
34            .join("\n");
35        trimmed = true;
36    }
37
38    if clamped.chars().count() > MAX_TOOL_OUTPUT_CHARS {
39        clamped = truncate_chars(&clamped, MAX_TOOL_OUTPUT_CHARS);
40        trimmed = true;
41    }
42
43    if trimmed {
44        clamped.push_str(&format!(
45            "\n\n[RLM TOOL OUTPUT TRUNCATED by {tool_name}: {original_lines} lines/{original_chars} chars → <= {MAX_TOOL_OUTPUT_LINES} lines/{MAX_TOOL_OUTPUT_CHARS} chars]"
46        ));
47    }
48
49    clamped
50}
51
52/// All RLM REPL operations as tool definitions.
53pub fn rlm_tool_definitions() -> Vec<ToolDefinition> {
54    vec![
55        ToolDefinition {
56            name: "rlm_head".to_string(),
57            description: "Return the first N lines of the loaded context.".to_string(),
58            parameters: serde_json::json!({
59                "type": "object",
60                "properties": {
61                    "n": {
62                        "type": "integer",
63                        "description": "Number of lines from the start (default: 10)"
64                    }
65                },
66                "required": []
67            }),
68        },
69        ToolDefinition {
70            name: "rlm_tail".to_string(),
71            description: "Return the last N lines of the loaded context.".to_string(),
72            parameters: serde_json::json!({
73                "type": "object",
74                "properties": {
75                    "n": {
76                        "type": "integer",
77                        "description": "Number of lines from the end (default: 10)"
78                    }
79                },
80                "required": []
81            }),
82        },
83        ToolDefinition {
84            name: "rlm_grep".to_string(),
85            description: "Search the loaded context for lines matching a regex pattern. Returns matching lines with line numbers.".to_string(),
86            parameters: serde_json::json!({
87                "type": "object",
88                "properties": {
89                    "pattern": {
90                        "type": "string",
91                        "description": "Regex pattern to search for"
92                    }
93                },
94                "required": ["pattern"]
95            }),
96        },
97        ToolDefinition {
98            name: "rlm_count".to_string(),
99            description: "Count occurrences of a regex pattern in the loaded context.".to_string(),
100            parameters: serde_json::json!({
101                "type": "object",
102                "properties": {
103                    "pattern": {
104                        "type": "string",
105                        "description": "Regex pattern to count"
106                    }
107                },
108                "required": ["pattern"]
109            }),
110        },
111        ToolDefinition {
112            name: "rlm_slice".to_string(),
113            description: "Return a slice of the context by line range.".to_string(),
114            parameters: serde_json::json!({
115                "type": "object",
116                "properties": {
117                    "start": {
118                        "type": "integer",
119                        "description": "Start line number (0-indexed)"
120                    },
121                    "end": {
122                        "type": "integer",
123                        "description": "End line number (exclusive)"
124                    }
125                },
126                "required": ["start", "end"]
127            }),
128        },
129        ToolDefinition {
130            name: "rlm_llm_query".to_string(),
131            description: "Ask a focused sub-question about a portion of the context. Use this for semantic understanding of specific sections.".to_string(),
132            parameters: serde_json::json!({
133                "type": "object",
134                "properties": {
135                    "query": {
136                        "type": "string",
137                        "description": "The question to answer about the context"
138                    },
139                    "context_slice": {
140                        "type": "string",
141                        "description": "Optional: specific text slice to analyze (if omitted, uses full context)"
142                    }
143                },
144                "required": ["query"]
145            }),
146        },
147        ToolDefinition {
148            name: "rlm_final".to_string(),
149            description: "Return the final structured payload to the analysis query. Always emit a JSON payload matching the FINAL schema.".to_string(),
150            parameters: serde_json::json!({
151                "type": "object",
152                "properties": {
153                    "payload": {
154                        "type": "object",
155                        "description": "FINAL(JSON) payload. Preferred over `answer`."
156                    },
157                    "answer": {
158                        "type": "string",
159                        "description": "Deprecated compatibility field; if used, must contain the FINAL(JSON) payload string."
160                    }
161                },
162                "additionalProperties": false
163            }),
164        },
165        ToolDefinition {
166            name: "rlm_ast_query".to_string(),
167            description: "Execute a tree-sitter AST query on the loaded context. Use this for structural code analysis (function signatures, struct fields, impl blocks).".to_string(),
168            parameters: serde_json::json!({
169                "type": "object",
170                "properties": {
171                    "query": {
172                        "type": "string",
173                        "description": "Tree-sitter S-expression query (e.g., '(function_item name: (identifier) @name)')"
174                    }
175                },
176                "required": ["query"]
177            }),
178        },
179    ]
180}
181
182/// Result of dispatching an RLM tool call.
183pub enum RlmToolResult {
184    /// Normal output to feed back to the LLM.
185    Output(String),
186    /// The final answer — terminates the RLM loop.
187    Final(String),
188}
189
190/// Dispatch a structured tool call against the REPL.
191///
192/// Returns `None` if the tool name is not an `rlm_*` tool (pass-through for
193/// any other tool calls the model may have produced).
194pub fn dispatch_tool_call(
195    name: &str,
196    arguments: &str,
197    repl: &mut super::repl::RlmRepl,
198) -> Option<RlmToolResult> {
199    let args: serde_json::Value = serde_json::from_str(arguments).unwrap_or_default();
200
201    match name {
202        "rlm_head" => {
203            let n = args.get("n").and_then(|v| v.as_u64()).unwrap_or(10) as usize;
204            let output = clamp_tool_output(repl.head(n).join("\n"), "rlm_head");
205            Some(RlmToolResult::Output(output))
206        }
207        "rlm_tail" => {
208            let n = args.get("n").and_then(|v| v.as_u64()).unwrap_or(10) as usize;
209            let output = clamp_tool_output(repl.tail(n).join("\n"), "rlm_tail");
210            Some(RlmToolResult::Output(output))
211        }
212        "rlm_grep" => {
213            let pattern = args.get("pattern").and_then(|v| v.as_str()).unwrap_or("");
214            let matches = repl.grep(pattern);
215            let output = matches
216                .iter()
217                .map(|(i, line)| format!("{}:{}", i, line))
218                .collect::<Vec<_>>()
219                .join("\n");
220            if output.is_empty() {
221                Some(RlmToolResult::Output("(no matches)".to_string()))
222            } else {
223                Some(RlmToolResult::Output(clamp_tool_output(output, "rlm_grep")))
224            }
225        }
226        "rlm_count" => {
227            let pattern = args.get("pattern").and_then(|v| v.as_str()).unwrap_or("");
228            let count = repl.count(pattern);
229            Some(RlmToolResult::Output(count.to_string()))
230        }
231        "rlm_slice" => {
232            let start = args.get("start").and_then(|v| v.as_u64()).unwrap_or(0) as usize;
233            let end = args.get("end").and_then(|v| v.as_u64()).unwrap_or(0) as usize;
234            let output = clamp_tool_output(repl.slice(start, end).to_string(), "rlm_slice");
235            Some(RlmToolResult::Output(output))
236        }
237        "rlm_llm_query" => {
238            // The llm_query tool requires async provider calls — return a
239            // sentinel so the caller knows to handle it specially.
240            let query = args
241                .get("query")
242                .and_then(|v| v.as_str())
243                .unwrap_or("")
244                .to_string();
245            let context_slice = args
246                .get("context_slice")
247                .and_then(|v| v.as_str())
248                .map(|s| s.to_string());
249            // Encode the query + optional slice as JSON so the caller can
250            // destructure it.
251            let payload = serde_json::json!({
252                "__rlm_llm_query": true,
253                "query": query,
254                "context_slice": context_slice,
255            });
256            Some(RlmToolResult::Output(payload.to_string()))
257        }
258        "rlm_final" => {
259            if let Some(payload) = args.get("payload") {
260                if let Some(text_payload) = payload.as_str() {
261                    return Some(RlmToolResult::Final(text_payload.to_string()));
262                }
263                return Some(RlmToolResult::Final(payload.to_string()));
264            }
265            let answer = args
266                .get("answer")
267                .and_then(|v| v.as_str())
268                .unwrap_or("")
269                .to_string();
270            Some(RlmToolResult::Final(answer))
271        }
272        "rlm_ast_query" => {
273            let query = args.get("query").and_then(|v| v.as_str()).unwrap_or("");
274
275            // Create a tree-sitter oracle and execute the query
276            let mut oracle = super::oracle::TreeSitterOracle::new(repl.context().to_string());
277            match oracle.query(query) {
278                Ok(result) => {
279                    // Format the result as JSON
280                    let matches: Vec<serde_json::Value> = result
281                        .matches
282                        .iter()
283                        .map(|m| {
284                            serde_json::json!({
285                                "line": m.line,
286                                "column": m.column,
287                                "captures": m.captures,
288                                "text": m.text
289                            })
290                        })
291                        .collect();
292
293                    let output = serde_json::json!({
294                        "query": query,
295                        "match_count": matches.len(),
296                        "matches": matches
297                    });
298
299                    Some(RlmToolResult::Output(clamp_tool_output(
300                        output.to_string(),
301                        "rlm_ast_query",
302                    )))
303                }
304                Err(e) => Some(RlmToolResult::Output(format!("AST query error: {}", e))),
305            }
306        }
307        _ => None, // Not an RLM tool
308    }
309}
310
311#[cfg(test)]
312mod tests {
313    use super::*;
314    use crate::rlm::repl::{ReplRuntime, RlmRepl};
315
316    #[test]
317    fn tool_definitions_are_complete() {
318        let defs = rlm_tool_definitions();
319        assert_eq!(defs.len(), 8);
320        let names: Vec<&str> = defs.iter().map(|d| d.name.as_str()).collect();
321        assert!(names.contains(&"rlm_head"));
322        assert!(names.contains(&"rlm_tail"));
323        assert!(names.contains(&"rlm_grep"));
324        assert!(names.contains(&"rlm_count"));
325        assert!(names.contains(&"rlm_slice"));
326        assert!(names.contains(&"rlm_llm_query"));
327        assert!(names.contains(&"rlm_final"));
328        assert!(names.contains(&"rlm_ast_query"));
329    }
330
331    #[test]
332    fn dispatch_head() {
333        let ctx = "line 1\nline 2\nline 3\nline 4\nline 5".to_string();
334        let mut repl = RlmRepl::new(ctx, ReplRuntime::Rust);
335        let result = dispatch_tool_call("rlm_head", r#"{"n": 2}"#, &mut repl);
336        match result {
337            Some(RlmToolResult::Output(s)) => assert_eq!(s, "line 1\nline 2"),
338            _ => panic!("expected Output"),
339        }
340    }
341
342    #[test]
343    fn dispatch_tail() {
344        let ctx = "line 1\nline 2\nline 3\nline 4\nline 5".to_string();
345        let mut repl = RlmRepl::new(ctx, ReplRuntime::Rust);
346        let result = dispatch_tool_call("rlm_tail", r#"{"n": 2}"#, &mut repl);
347        match result {
348            Some(RlmToolResult::Output(s)) => assert_eq!(s, "line 4\nline 5"),
349            _ => panic!("expected Output"),
350        }
351    }
352
353    #[test]
354    fn dispatch_grep() {
355        let ctx = "error: fail\ninfo: ok\nerror: boom".to_string();
356        let mut repl = RlmRepl::new(ctx, ReplRuntime::Rust);
357        let result = dispatch_tool_call("rlm_grep", r#"{"pattern": "error"}"#, &mut repl);
358        match result {
359            Some(RlmToolResult::Output(s)) => {
360                assert!(s.contains("error: fail"));
361                assert!(s.contains("error: boom"));
362            }
363            _ => panic!("expected Output"),
364        }
365    }
366
367    #[test]
368    fn dispatch_final() {
369        let ctx = "whatever".to_string();
370        let mut repl = RlmRepl::new(ctx, ReplRuntime::Rust);
371        let result =
372            dispatch_tool_call("rlm_final", r#"{"answer": "The answer is 42"}"#, &mut repl);
373        match result {
374            Some(RlmToolResult::Final(s)) => assert_eq!(s, "The answer is 42"),
375            _ => panic!("expected Final"),
376        }
377    }
378
379    #[test]
380    fn dispatch_unknown_returns_none() {
381        let ctx = "data".to_string();
382        let mut repl = RlmRepl::new(ctx, ReplRuntime::Rust);
383        assert!(dispatch_tool_call("unknown_tool", "{}", &mut repl).is_none());
384    }
385}