Skip to main content

koda_core/tools/
agent.rs

1//! Sub-agent invocation and discovery tools.
2//!
3//! Exposes `InvokeAgent` and `ListAgents` as tools the LLM can call.
4//! Actual sub-agent execution is handled by the event loop since it needs
5//! access to config, DB, and the provider.
6//!
7//! ## Usage patterns
8//!
9//! - **Delegate a task**: `InvokeAgent { prompt: "write tests for auth.rs" }`
10//!   (uses the `task` agent by default)
11//! - **Use a specialist**: `InvokeAgent { agent_name: "explore", prompt: "find all error handling" }`
12//! - **Fork context**: `InvokeAgent { agent_name: "fork", prompt: "..." }`
13//!   (inherits parent's full conversation)
14//! - **Background work**: `InvokeAgent { prompt: "...", background: true }`
15//!   (returns immediately, results injected when complete)
16//!
17//! ## When to use sub-agents
18//!
19//! - Complex multi-step tasks (keeps parent context clean)
20//! - Independent parallel work (launch multiple agents at once)
21//! - Research that generates lots of noise (grep results, file contents)
22//!
23//! ## When NOT to use sub-agents
24//!
25//! - Simple file reads or 2–3 grep queries (overhead > benefit)
26//! - Tasks requiring user interaction (sub-agents can't ask questions)
27
28use crate::providers::ToolDefinition;
29use serde_json::json;
30use std::collections::HashMap;
31use std::path::{Path, PathBuf};
32
33/// Return tool definitions for the LLM.
34pub fn definitions() -> Vec<ToolDefinition> {
35    vec![
36        ToolDefinition {
37            name: "InvokeAgent".to_string(),
38            description: "Delegate a task to a specialized sub-agent.
39
40EXECUTION MODES (pick one per call):
41- Sequential foreground (default): one sub-agent runs, blocks until done.
42- Parallel foreground: emit multiple InvokeAgent tool calls in the same
43  message and they run concurrently. Each write-capable agent gets its own
44  isolated workspace, so parallel write-agents cannot trample each other.
45- Background (background=true): returns immediately. Results inject as a
46  user message on the next iteration. Use for long-running independent work.
47- Forked context (agent_name='fork'): inherits your full conversation
48  history. Useful when the sub-agent needs everything you've already loaded.
49
50Use InvokeAgent when:
51- The task requires exploring many files or running many searches that would pollute your context
52- Work is independent and can run in parallel with your current reasoning
53- A specialist persona adds value (explore for search, plan for architecture, verify for testing)
54
55Do NOT use InvokeAgent when:
56- A single Read, Grep, or Glob would answer the question (overhead > benefit)
57- The task requires real-time back-and-forth with the user (sub-agents have no way to ask questions; AskUser is filtered from their tool set)
58- You've already loaded the relevant context (just do the work yourself)
59
60Key rules:
61- Sub-agent results are NOT shown to the user — you must summarize them in your reply
62- Sub-agents CANNOT spawn other sub-agents. Plan all fan-out at this level; the InvokeAgent tool is filtered from every sub-agent's tool set.
63- Identical (agent_name, prompt) calls hit a cache and skip the LLM call. Cheap to retry idempotent tasks; no need to memoize yourself.
64- A result starting with '[ERROR: sub-agent ...]' is a structural failure (e.g. iteration cap, workspace setup), not a model answer. Re-strategize rather than treat as content.
65- Always write a clear, self-contained prompt — the sub-agent hasn't seen your conversation
66- Include specific file paths, function names, and success criteria in your prompt
67- Omit agent_name to use the 'task' worker (full write access)"
68                .to_string(),
69            parameters: json!({
70                "type": "object",
71                "properties": {
72                    "agent_name": {
73                        "type": "string",
74                        "description": "Name of the sub-agent (from ListAgents). Omit for 'task', use 'fork' to inherit parent context."
75                    },
76                    "prompt": {
77                        "type": "string",
78                        "description": "The task to delegate to the sub-agent"
79                    },
80                    "background": {
81                        "type": "boolean",
82                        "description": "Run in background and return immediately (default: false). \
83                            Results are drained and injected as a user message at the start of \
84                            the next iteration — NOT mid-iteration. The bg agent inherits the \
85                            parent's trust + sandbox at spawn time and is cancelled on Ctrl+C. \
86                            Use for independent long-running tasks that don't block your current work."
87                    }
88                },
89                "required": ["prompt"]
90            }),
91        },
92        ToolDefinition {
93            name: "ListAgents".to_string(),
94            description: "List available sub-agents. Use detail=true to see system prompts."
95                .to_string(),
96            parameters: json!({
97                "type": "object",
98                "properties": {
99                    "detail": {
100                        "type": "boolean",
101                        "description": "Show full system prompts"
102                    }
103                }
104            }),
105        },
106    ]
107}
108
109/// Agent info from discovery: name, description, source, and optionally the full prompt.
110pub struct AgentInfo {
111    /// Agent name (used in `InvokeAgent` tool calls).
112    pub name: String,
113    /// One-line description shown in `ListAgents` output.
114    pub description: String,
115    /// Discovery source: `"built-in"`, `"user"`, or `"project"`.
116    pub source: &'static str,
117    /// Full system prompt content.
118    pub system_prompt: String,
119}
120
121/// Discover all agents from all sources, with project > user > built-in priority.
122pub fn discover_all_agents(project_root: &Path) -> Vec<AgentInfo> {
123    let mut agents: HashMap<String, AgentInfo> = HashMap::new();
124
125    // 1. Built-in agents (lowest priority)
126    for (name, config) in crate::config::KodaConfig::builtin_agents() {
127        // Skip `default` — it's the main agent, not a sub-agent.
128        // Map omitted agent_name to `task` instead (see InvokeAgent schema).
129        if name == "default" {
130            continue;
131        }
132        agents.insert(
133            name.clone(),
134            AgentInfo {
135                name,
136                description: extract_description(&config.system_prompt),
137                source: "built-in",
138                system_prompt: config.system_prompt,
139            },
140        );
141    }
142
143    // 2. User agents (~/.config/koda/agents/) — overrides built-ins
144    if let Ok(user_dir) = user_agents_dir() {
145        load_agents_from_dir(&user_dir, "user", &mut agents);
146    }
147
148    // 3. Project agents (<project>/agents/) — highest priority
149    let project_dir = project_root.join("agents");
150    load_agents_from_dir(&project_dir, "project", &mut agents);
151
152    let mut result: Vec<AgentInfo> = agents.into_values().collect();
153    result.sort_by(|a, b| a.name.cmp(&b.name));
154    result
155}
156
157/// Load agents from a directory into the map (later calls override earlier).
158fn load_agents_from_dir(dir: &Path, source: &'static str, agents: &mut HashMap<String, AgentInfo>) {
159    let Ok(entries) = std::fs::read_dir(dir) else {
160        return;
161    };
162    for entry in entries.flatten() {
163        let name = entry.file_name().to_string_lossy().to_string();
164        let Some(agent_name) = name.strip_suffix(".json") else {
165            continue;
166        };
167        if agent_name == "default" {
168            continue;
169        }
170        let Ok(content) = std::fs::read_to_string(entry.path()) else {
171            continue;
172        };
173        let Ok(config) = serde_json::from_str::<serde_json::Value>(&content) else {
174            continue;
175        };
176        let prompt = config["system_prompt"].as_str().unwrap_or("").to_string();
177        agents.insert(
178            agent_name.to_string(),
179            AgentInfo {
180                name: agent_name.to_string(),
181                description: extract_description(&prompt),
182                source,
183                system_prompt: prompt,
184            },
185        );
186    }
187}
188
189/// Return the user-level agents directory path.
190fn user_agents_dir() -> Result<PathBuf, std::env::VarError> {
191    let home = std::env::var("HOME").or_else(|_| std::env::var("USERPROFILE"))?;
192    Ok(PathBuf::from(home)
193        .join(".config")
194        .join("koda")
195        .join("agents"))
196}
197
198/// Return agent list data for display (used by /agent command and ListAgents tool).
199///
200/// Returns a list of `(name, description, source)` tuples.
201/// The client is responsible for formatting/coloring.
202pub fn list_agents(project_root: &Path) -> Vec<(String, String, String)> {
203    discover_all_agents(project_root)
204        .into_iter()
205        .map(|a| {
206            (
207                a.name.to_string(),
208                a.description.to_string(),
209                a.source.to_string(),
210            )
211        })
212        .collect()
213}
214
215/// Format detailed agent list (for ListAgents with detail=true).
216pub fn list_agents_detail(project_root: &Path) -> String {
217    let agents = discover_all_agents(project_root);
218
219    if agents.is_empty() {
220        return "No sub-agents configured.".to_string();
221    }
222
223    let mut output = String::new();
224    for a in &agents {
225        output.push_str(&format!("## {} [{}]\n", a.name, a.source));
226        // Show first 500 chars of prompt as template reference
227        let preview: String = a.system_prompt.chars().take(500).collect();
228        output.push_str(&preview);
229        if a.system_prompt.len() > 500 {
230            output.push_str("\n[...truncated]");
231        }
232        output.push_str("\n\n");
233    }
234    output
235}
236
237/// Extract a clean one-line description from a system prompt.
238/// Looks for "Your job is to ..." or falls back to the first sentence.
239fn extract_description(prompt: &str) -> String {
240    // Try to find "Your job is to ..." pattern
241    if let Some(idx) = prompt.find("Your job is to ") {
242        let rest = &prompt[idx + "Your job is to ".len()..];
243        let end = rest.find('.').unwrap_or(rest.len().min(80));
244        let desc: String = rest[..end].chars().take(80).collect();
245        return capitalize_first(&desc);
246    }
247
248    // Try "You are a ..." pattern — extract the role
249    if let Some(idx) = prompt.find("You are a ") {
250        let rest = &prompt[idx + "You are a ".len()..];
251        let end = rest.find('.').unwrap_or(rest.len().min(60));
252        let role: String = rest[..end].chars().take(60).collect();
253        return capitalize_first(&role);
254    }
255
256    // Fallback: first line, capped
257    let first_line = prompt.lines().next().unwrap_or("");
258    let capped: String = first_line.chars().take(60).collect();
259    capped
260}
261
262/// Capitalize the first character of a string.
263fn capitalize_first(s: &str) -> String {
264    let mut chars = s.chars();
265    match chars.next() {
266        None => String::new(),
267        Some(c) => c.to_uppercase().to_string() + chars.as_str(),
268    }
269}
270
271#[cfg(test)]
272mod tests {
273    use super::*;
274    use tempfile::TempDir;
275
276    #[test]
277    fn test_definitions_count() {
278        let defs = definitions();
279        assert_eq!(defs.len(), 2);
280        assert_eq!(defs[0].name, "InvokeAgent");
281        assert_eq!(defs[1].name, "ListAgents");
282    }
283
284    /// Pin the load-bearing pieces of the InvokeAgent description so future
285    /// "tighter wording" refactors don't silently drop the bits the model
286    /// needs to dispatch correctly. We don't pin exact wording — just the
287    /// concepts that have engineering meaning behind them.
288    #[test]
289    fn test_invoke_agent_description_documents_all_four_modes() {
290        let defs = definitions();
291        let desc = &defs[0].description;
292        // The four execution modes are the user-facing vocabulary the
293        // engine actually implements (sub_agent_dispatch.rs + bg_agent.rs).
294        assert!(
295            desc.contains("Sequential foreground"),
296            "description must name the sequential foreground mode"
297        );
298        assert!(
299            desc.contains("Parallel foreground"),
300            "description must name the parallel foreground mode"
301        );
302        assert!(
303            desc.contains("Background") && desc.contains("background=true"),
304            "description must explain background dispatch and the parameter"
305        );
306        assert!(
307            desc.contains("Forked context") && desc.contains("agent_name='fork'"),
308            "description must name fork mode and its trigger"
309        );
310    }
311
312    #[test]
313    fn test_invoke_agent_description_warns_about_no_nested_invocation() {
314        // Sub-agents cannot spawn other sub-agents (DESIGN.md invariant).
315        // The model needs to know this so it doesn't try a workaround that
316        // hits the empty-tool refusal at runtime.
317        let defs = definitions();
318        let desc = &defs[0].description;
319        assert!(
320            desc.contains("CANNOT spawn other sub-agents") || desc.contains("cannot spawn"),
321            "description must surface the no-nested-invocation rule"
322        );
323    }
324
325    #[test]
326    fn test_invoke_agent_description_explains_error_marker_convention() {
327        // The [ERROR: ...] marker (B18, B21) is structural failure metadata,
328        // not a model answer. The model needs to know that so it
329        // re-strategizes instead of treating the marker as content.
330        let defs = definitions();
331        let desc = &defs[0].description;
332        assert!(
333            desc.contains("[ERROR: sub-agent"),
334            "description must explain the [ERROR: marker so the model knows to re-strategize"
335        );
336    }
337
338    #[test]
339    fn test_invoke_agent_description_mentions_result_caching() {
340        // SubAgentCache lives on KodaSession and survives across turns.
341        // The model should know calls are memoized so it doesn't build its
342        // own (worse) memoization on top.
343        let defs = definitions();
344        let desc = &defs[0].description;
345        assert!(
346            desc.contains("cache") || desc.contains("memoize"),
347            "description must mention result caching so the model doesn't roll its own"
348        );
349    }
350
351    #[test]
352    fn test_invoke_agent_background_param_documents_drain_semantics() {
353        // The drain-on-next-iteration timing is load-bearing: the model
354        // shouldn't expect mid-iteration results from a bg agent.
355        let defs = definitions();
356        let bg_desc = defs[0]
357            .parameters
358            .pointer("/properties/background/description")
359            .and_then(|v| v.as_str())
360            .expect("background param must have a description");
361        assert!(
362            bg_desc.contains("next iteration"),
363            "background param must explain drain-on-next-iteration timing"
364        );
365    }
366
367    #[test]
368    fn test_list_agents_has_builtins() {
369        let dir = TempDir::new().unwrap();
370        let result = list_agents(dir.path());
371        let builtins: Vec<_> = result
372            .iter()
373            .filter(|(_, _, src)| src == "built-in")
374            .collect();
375        assert_eq!(
376            builtins.len(),
377            4,
378            "Expected task/explore/plan/verify built-ins"
379        );
380        let names: Vec<&str> = result.iter().map(|(n, _, _)| n.as_str()).collect();
381        assert!(names.contains(&"task"));
382        assert!(names.contains(&"explore"));
383        assert!(names.contains(&"plan"));
384        assert!(names.contains(&"verify"));
385        // Default is always excluded from listing
386        assert!(!names.contains(&"default"), "Should exclude default agent");
387    }
388
389    #[test]
390    fn test_list_agents_project_overrides_builtin() {
391        let dir = TempDir::new().unwrap();
392        let agents_dir = dir.path().join("agents");
393        std::fs::create_dir(&agents_dir).unwrap();
394        std::fs::write(
395            agents_dir.join("reviewer.json"),
396            r#"{"name":"reviewer","system_prompt":"You are a custom project reviewer. Your job is to do project-specific reviews."}"#,
397        ).unwrap();
398        let result = list_agents(dir.path());
399        let reviewer = result.iter().find(|(n, _, _)| n == "reviewer");
400        assert!(reviewer.is_some());
401        assert_eq!(
402            reviewer.unwrap().2,
403            "project",
404            "Project agent should be tagged"
405        );
406    }
407
408    #[test]
409    fn test_discover_all_agents_has_builtins() {
410        let dir = TempDir::new().unwrap();
411        let agents = discover_all_agents(dir.path());
412        let builtins: Vec<_> = agents.iter().filter(|a| a.source == "built-in").collect();
413        assert_eq!(
414            builtins.len(),
415            4,
416            "Expected task/explore/plan/verify built-ins"
417        );
418        let names: Vec<&str> = builtins.iter().map(|a| a.name.as_str()).collect();
419        assert!(names.contains(&"task"));
420        assert!(names.contains(&"explore"));
421        assert!(names.contains(&"plan"));
422        assert!(names.contains(&"verify"));
423    }
424
425    #[test]
426    fn test_list_agents_detail_shows_builtins() {
427        let dir = TempDir::new().unwrap();
428        let result = list_agents_detail(dir.path());
429        assert!(result.contains("[built-in]"));
430        assert!(result.contains("task"));
431        assert!(result.contains("explore"));
432        assert!(result.contains("plan"));
433        assert!(result.contains("verify"));
434    }
435
436    #[test]
437    fn test_extract_description_job_pattern() {
438        let desc =
439            extract_description("You are a reviewer. Your job is to find bugs and improvements.");
440        assert_eq!(desc, "Find bugs and improvements");
441    }
442
443    #[test]
444    fn test_extract_description_role_pattern() {
445        let desc = extract_description("You are a paranoid security auditor.");
446        assert_eq!(desc, "Paranoid security auditor");
447    }
448
449    #[test]
450    fn test_extract_description_fallback() {
451        let desc = extract_description("Review all the code carefully.");
452        assert_eq!(desc, "Review all the code carefully.");
453    }
454}