Skip to main content

mermaid_cli/providers/tool/
subagent.rs

1//! `agent` tool — spawn a child reducer loop as a tool.
2//!
3//! The design rests on one observation: from the model's perspective,
4//! delegating to a subagent is "call a tool with a prompt, get back
5//! a summary". There's no state-machine visibility the parent
6//! reducer needs — `TurnState::ExecutingTools` already parallelizes
7//! tool calls for free, so a single model turn emitting three
8//! `agent` calls gets three concurrent `SubagentTool::execute`
9//! invocations with zero additional infrastructure.
10//!
11//! Everything lives inside this module:
12//!
13//! - `SubagentSpawner` owns the shared `ProviderFactory` + a
14//!   `Semaphore(max_inflight)` that backpressures parallel fan-out.
15//!   Depth tracking uses `tokio::task_local!` so nested subagents
16//!   (a subagent calling `agent`) can see their own depth without
17//!   threading state through `ExecContext`.
18//! - `SubagentTool::execute` builds a fresh child `State`, a
19//!   filtered `ToolRegistry` (no self-recursion, no GUI tools), and
20//!   a child `EffectRunner` + msg channel. It drives the child
21//!   reducer to `Idle`, streaming progress back to the parent via
22//!   `ProgressEvent::Subagent*`, and returns the last assistant
23//!   message as the tool's `output`.
24
25use std::sync::Arc;
26use std::time::{Duration, Instant};
27
28use async_trait::async_trait;
29use serde_json::Value;
30use tokio::sync::{Semaphore, mpsc};
31use tokio::time::timeout;
32use tokio_util::sync::CancellationToken;
33
34use crate::domain::{
35    Msg, State, ToolDefinition, ToolMetadata, ToolOutcome, ToolRunMetadata, TurnState, update,
36};
37use crate::effect::{EffectRunner, MSG_CHANNEL_CAPACITY};
38use crate::models::MessageRole;
39use crate::providers::ProviderFactory;
40use crate::providers::ctx::{ExecContext, ProgressEvent, SubagentPhase};
41
42use super::ToolExecutor;
43use super::ToolRegistry;
44
45/// Maximum nesting depth. `agent` calling `agent` calling `agent`
46/// works up to this cap; the fourth-level spawn errors cleanly.
47pub const MAX_DEPTH: usize = 3;
48
49/// Maximum subagents running simultaneously across the whole process.
50/// Covers the pathological "parent emits 30 agent calls in one turn"
51/// case. Hit this cap → later calls block on the semaphore until
52/// some earlier subagent finishes or cancels.
53pub const MAX_INFLIGHT: usize = 10;
54
55/// Hard ceiling on a subagent's wall-clock runtime. Above this the
56/// subagent is cancelled and reports `Error`.
57pub const DEFAULT_TIMEOUT_SECS: u64 = 20 * 60;
58
59tokio::task_local! {
60    /// Current subagent depth. Unset (=0) at the root; incremented
61    /// once per `SubagentTool::execute` nesting. Read via
62    /// `SUBAGENT_DEPTH.try_with(|d| *d).unwrap_or(0)` (so unset ==
63    /// root).
64    static SUBAGENT_DEPTH: usize;
65}
66
67/// Shared spawner. One per process; held by `SubagentTool`.
68pub struct SubagentSpawner {
69    providers: Arc<ProviderFactory>,
70    inflight: Arc<Semaphore>,
71}
72
73impl SubagentSpawner {
74    pub fn new(providers: Arc<ProviderFactory>) -> Self {
75        Self {
76            providers,
77            inflight: Arc::new(Semaphore::new(MAX_INFLIGHT)),
78        }
79    }
80}
81
82/// The `agent` tool the model sees.
83pub struct SubagentTool {
84    spawner: Arc<SubagentSpawner>,
85}
86
87impl SubagentTool {
88    pub fn new(spawner: Arc<SubagentSpawner>) -> Self {
89        Self { spawner }
90    }
91}
92
93#[async_trait]
94impl ToolExecutor for SubagentTool {
95    fn name(&self) -> &'static str {
96        "agent"
97    }
98
99    fn schema(&self) -> ToolDefinition {
100        ToolDefinition {
101            name: "agent".to_string(),
102            description: format!(
103                "Spawn a child agent with its own context and tool access to work on an \
104                 independent sub-task. Useful for parallel fan-out (emit multiple `agent` \
105                 calls in the same turn to run them concurrently) or for scoping a noisy \
106                 sub-task (the child's tool output doesn't clutter the parent's turn). \
107                 Depth-capped at {max_depth}; breadth-capped at {max_breadth} concurrent. \
108                 Subagents don't get GUI (screenshot/click/…) access because coordinate \
109                 metadata can't be shared cleanly.",
110                max_depth = MAX_DEPTH,
111                max_breadth = MAX_INFLIGHT,
112            ),
113            input_schema: serde_json::json!({
114                "type": "object",
115                "properties": {
116                    "prompt": {
117                        "type": "string",
118                        "description": "The task for the subagent. Self-contained; the subagent has no access to the parent's conversation."
119                    },
120                    "description": {
121                        "type": "string",
122                        "description": "Short label shown in the parent's status line (e.g. 'list domain files')."
123                    }
124                },
125                "required": ["prompt"]
126            }),
127        }
128    }
129
130    async fn execute(&self, args: Value, ctx: ExecContext) -> ToolOutcome {
131        let started = Instant::now();
132
133        // Depth gate: if we're already at the cap, return immediately.
134        let current_depth = SUBAGENT_DEPTH.try_with(|d| *d).unwrap_or(0);
135        if current_depth >= MAX_DEPTH {
136            return ToolOutcome::error(format!("subagent depth limit {} reached", MAX_DEPTH), 0.0);
137        }
138
139        // Parse args.
140        let prompt = match args.get("prompt").and_then(|v| v.as_str()) {
141            Some(s) if !s.trim().is_empty() => s.to_string(),
142            _ => {
143                return ToolOutcome::error("agent requires non-empty `prompt`", 0.0);
144            },
145        };
146        let description = args
147            .get("description")
148            .and_then(|v| v.as_str())
149            .unwrap_or("subagent")
150            .to_string();
151
152        // Acquire a breadth permit. Respects parent cancellation so
153        // a fan-out that lands 30 calls doesn't hold the parent's
154        // Ctrl+C response hostage.
155        let permit = tokio::select! {
156            biased;
157            _ = ctx.token.cancelled() => return ToolOutcome::cancelled(),
158            p = self.spawner.inflight.clone().acquire_owned() => match p {
159                Ok(permit) => permit,
160                Err(_) => return ToolOutcome::error(
161                    "subagent semaphore closed",
162                    started.elapsed().as_secs_f64(),
163                ),
164            },
165        };
166
167        // Build the child runtime. The child uses the same parent
168        // config + cwd + model id, with a fresh `State` and a tool
169        // registry filtered to remove self-recursion and GUI tools.
170        //
171        // F7: `ExecContext` now carries the parent's `Config` +
172        // `model_id`. Previously we built `Config::default()` here and
173        // the child model id defaulted to `config.default_model.name`
174        // (usually empty), which made subagents fail at provider
175        // resolution.
176        let config = (*ctx.config).clone();
177        let cwd = ctx.workdir.clone();
178        let model_id = if ctx.model_id.is_empty() {
179            default_model_id(&config)
180        } else {
181            ctx.model_id.clone()
182        };
183        let child_model_id = model_id.clone();
184        let child_state = State::new(config.clone(), cwd.clone(), model_id);
185
186        let child_tools = build_child_registry(self.spawner.providers.clone());
187
188        // Child runner rooted at parent's scope child token. When
189        // parent cancels, `child_token.cancelled()` fires and the
190        // child's subprocess + model streams abort.
191        let child_token = ctx.token.child_token();
192        let (child_tx, child_rx) = mpsc::channel(MSG_CHANNEL_CAPACITY);
193        let child_runner =
194            EffectRunner::new_child(child_tx, cwd, self.spawner.providers.clone(), child_tools);
195
196        // Depth-scoped drive: nested `agent` calls inside this child
197        // see `current_depth + 1` via `SUBAGENT_DEPTH.try_with`.
198        let drive = drive_child(
199            child_state,
200            child_runner,
201            child_rx,
202            ctx.progress.clone(),
203            prompt,
204            description.clone(),
205            child_token,
206        );
207        let depth_scoped = SUBAGENT_DEPTH.scope(current_depth + 1, drive);
208
209        let result = timeout(Duration::from_secs(DEFAULT_TIMEOUT_SECS), depth_scoped).await;
210        drop(permit);
211
212        let elapsed = started.elapsed().as_secs_f64();
213        match result {
214            Ok(Ok(summary)) => ToolOutcome::success(summary, "subagent completed", elapsed)
215                .with_metadata(subagent_metadata(child_model_id)),
216            Ok(Err(DriveError::Cancelled)) => ToolOutcome::cancelled(),
217            Ok(Err(DriveError::Errored(e))) => {
218                ToolOutcome::error(format!("subagent ({}): {}", description, e), elapsed)
219                    .with_metadata(subagent_metadata(child_model_id))
220            },
221            Err(_) => ToolOutcome::error(
222                format!(
223                    "subagent ({}) exceeded {}s timeout",
224                    description, DEFAULT_TIMEOUT_SECS
225                ),
226                elapsed,
227            )
228            .with_metadata(subagent_metadata(child_model_id)),
229        }
230    }
231}
232
233fn subagent_metadata(model_id: String) -> ToolRunMetadata {
234    ToolRunMetadata {
235        detail: ToolMetadata::Subagent { model_id },
236        ..ToolRunMetadata::default()
237    }
238}
239
240enum DriveError {
241    Cancelled,
242    Errored(String),
243}
244
245/// Drive the child's reducer loop to `Idle`. Forwards child
246/// `ToolStarted` / `ToolFinished` / `StreamText` events to the
247/// parent's progress channel as `ProgressEvent::Subagent*`.
248async fn drive_child(
249    mut state: State,
250    mut runner: EffectRunner,
251    mut msg_rx: mpsc::Receiver<Msg>,
252    parent_progress: mpsc::Sender<ProgressEvent>,
253    prompt: String,
254    description: String,
255    token: CancellationToken,
256) -> Result<String, DriveError> {
257    // Signal start to parent.
258    let _ = parent_progress
259        .send(ProgressEvent::SubagentText(format!(
260            "▶ {} — {}",
261            description,
262            prompt.chars().take(80).collect::<String>()
263        )))
264        .await;
265
266    // MERMAID.md instructions — same as the root interactive path.
267    runner.dispatch(crate::domain::Cmd::RefreshInstructions);
268
269    // Seed the child turn.
270    let seed = Msg::SubmitPrompt {
271        text: prompt,
272        attachment_ids: vec![],
273    };
274    let (new_state, cmds) = update(state, seed);
275    state = new_state;
276    for cmd in cmds {
277        runner.dispatch(cmd);
278    }
279
280    // Loop until the child reducer reaches Idle with no queued work.
281    loop {
282        if token.is_cancelled() {
283            runner.shutdown().await;
284            return Err(DriveError::Cancelled);
285        }
286        if matches!(state.turn, TurnState::Idle) && state.ui.queued_messages.is_empty() {
287            break;
288        }
289
290        let msg = tokio::select! {
291            biased;
292            _ = token.cancelled() => {
293                runner.shutdown().await;
294                return Err(DriveError::Cancelled);
295            },
296            recv = msg_rx.recv() => match recv {
297                Some(m) => m,
298                None => {
299                    // Channel closed — child runner shut down.
300                    break;
301                },
302            },
303        };
304
305        // Forward child activity to parent progress BEFORE the
306        // reducer mutates state (we want `call_id` + `tool_name`
307        // semantic info, which reducer events strip).
308        forward_child_event(&msg, &parent_progress, &state).await;
309
310        let (new_state, cmds) = update(state, msg);
311        state = new_state;
312        for cmd in cmds {
313            runner.dispatch(cmd);
314        }
315        if state.should_exit {
316            break;
317        }
318    }
319
320    runner.shutdown().await;
321
322    // Extract last assistant message as the result.
323    let summary = state
324        .session
325        .messages()
326        .iter()
327        .rev()
328        .find(|m| m.role == MessageRole::Assistant)
329        .map(|m| m.content.clone())
330        .unwrap_or_default();
331    if summary.trim().is_empty() {
332        return Err(DriveError::Errored(
333            "subagent produced no assistant output".to_string(),
334        ));
335    }
336    Ok(summary)
337}
338
339/// Translate child-scope `Msg` events into parent-scope
340/// `ProgressEvent::Subagent*`. Flat mapping, never recursive — the
341/// parent reducer just sees "a tool started / finished / said
342/// something" with the child's call identity.
343async fn forward_child_event(msg: &Msg, progress: &mpsc::Sender<ProgressEvent>, state: &State) {
344    match msg {
345        Msg::ToolStarted {
346            turn: _, call_id, ..
347        } => {
348            let tool_name = lookup_tool_name(state, *call_id).unwrap_or_else(|| "tool".to_string());
349            let _ = progress
350                .send(ProgressEvent::SubagentToolCall {
351                    child_call_id: *call_id,
352                    tool_name,
353                    phase: SubagentPhase::Started,
354                })
355                .await;
356        },
357        Msg::ToolFinished {
358            turn: _,
359            call_id,
360            outcome,
361        } => {
362            let tool_name = lookup_tool_name(state, *call_id).unwrap_or_else(|| "tool".to_string());
363            let phase = if outcome.is_success() {
364                SubagentPhase::Finished
365            } else {
366                SubagentPhase::Errored
367            };
368            let _ = progress
369                .send(ProgressEvent::SubagentToolCall {
370                    child_call_id: *call_id,
371                    tool_name,
372                    phase,
373                })
374                .await;
375        },
376        Msg::StreamText { chunk, .. } => {
377            // Only forward a compact preview; long assistant text is
378            // overwhelming in the parent's status line.
379            if !chunk.trim().is_empty() {
380                let snippet: String = chunk.chars().take(120).collect();
381                let _ = progress.send(ProgressEvent::SubagentText(snippet)).await;
382            }
383        },
384        _ => {},
385    }
386}
387
388/// Look up a tool name from a `PendingToolCall` in the state.
389/// Returns `None` if the call id isn't known (e.g. during teardown).
390fn lookup_tool_name(state: &State, call_id: crate::domain::ToolCallId) -> Option<String> {
391    match &state.turn {
392        TurnState::ExecutingTools { calls, .. } => calls
393            .iter()
394            .find(|c| c.call_id == call_id)
395            .map(|c| c.source.function.name.clone()),
396        _ => None,
397    }
398}
399
400/// Construct the child `ToolRegistry` — a subset of what the parent
401/// offers. Explicitly excludes:
402///
403///   - `agent` itself — depth cap would catch it but excluding up
404///     front saves a wasted call.
405///   - All seven GUI / computer-use tools — the parent's
406///     `ComputerUseDriver` owns the screenshot coord registry; a
407///     subagent clicking would corrupt the parent's latest-capture
408///     pointer.
409///
410/// Filesystem + exec + web + MCP tools come along unchanged. That
411/// lets subagents read/write files, run commands, and call MCP tools
412/// for their work.
413fn build_child_registry(providers: Arc<ProviderFactory>) -> Arc<ToolRegistry> {
414    use super::{
415        computer_use, exec, filesystem, mcp,
416        web::{WebFetchTool, WebSearchTool},
417    };
418    let mut r = ToolRegistry::new();
419    r.register(Arc::new(filesystem::ReadFileTool));
420    r.register(Arc::new(filesystem::WriteFileTool));
421    r.register(Arc::new(filesystem::EditFileTool));
422    r.register(Arc::new(filesystem::DeleteFileTool));
423    r.register(Arc::new(filesystem::CreateDirectoryTool));
424    r.register(Arc::new(exec::ExecuteCommandTool));
425    r.register(Arc::new(mcp::McpToolProxy));
426    if let Some(key) = crate::utils::resolve_api_key("OLLAMA_API_KEY", None) {
427        r.register(Arc::new(WebSearchTool::new(key.clone())));
428        r.register(Arc::new(WebFetchTool::new(key)));
429    }
430    // NO computer_use::*  — GUI tools are parent-only.
431    // NO subagent::SubagentTool — depth cap would catch it.
432    // Silence unused-import if the above imports don't all resolve.
433    let _ = computer_use::probe;
434    let _ = providers;
435    Arc::new(r)
436}
437
438/// Fallback child model id when `ExecContext::model_id` is empty
439/// (e.g. a test harness that uses the default `test_exec_context`
440/// builder). Production code always provides the parent's active model
441/// id via `Cmd::ExecuteTool::model_id`.
442fn default_model_id(config: &crate::app::Config) -> String {
443    if !config.default_model.provider.is_empty() && !config.default_model.name.is_empty() {
444        format!(
445            "{}/{}",
446            config.default_model.provider, config.default_model.name
447        )
448    } else {
449        config.default_model.name.clone()
450    }
451}
452
453#[cfg(test)]
454mod tests {
455    use super::*;
456    use crate::domain::{ToolCallId, TurnId};
457    use crate::providers::ctx::test_exec_context;
458    use std::path::PathBuf;
459
460    #[tokio::test]
461    async fn depth_cap_rejects_when_at_max() {
462        let spawner = Arc::new(SubagentSpawner::new(Arc::new(ProviderFactory::new(
463            crate::app::Config::default(),
464        ))));
465        let tool = SubagentTool::new(spawner);
466        let (ctx, _rx) = test_exec_context(TurnId(1), ToolCallId(1), PathBuf::from("/tmp"));
467
468        let outcome = SUBAGENT_DEPTH
469            .scope(
470                MAX_DEPTH,
471                tool.execute(serde_json::json!({"prompt": "hi"}), ctx),
472            )
473            .await;
474        let error = outcome.error_message().expect("expected error");
475        assert!(
476            error.contains("depth limit"),
477            "expected depth-limit error, got: {}",
478            error
479        );
480    }
481
482    #[tokio::test]
483    async fn empty_prompt_is_rejected() {
484        let spawner = Arc::new(SubagentSpawner::new(Arc::new(ProviderFactory::new(
485            crate::app::Config::default(),
486        ))));
487        let tool = SubagentTool::new(spawner);
488        let (ctx, _rx) = test_exec_context(TurnId(1), ToolCallId(1), PathBuf::from("/tmp"));
489        let outcome = tool.execute(serde_json::json!({"prompt": "  "}), ctx).await;
490        assert_eq!(outcome.status, crate::domain::ToolStatus::Error);
491    }
492
493    /// F7: when `ExecContext::model_id` is empty (the test builder's
494    /// default), the fallback walks `config.default_model.{provider,name}`.
495    /// This pins the happy-path behavior.
496    #[test]
497    fn default_model_id_reads_config_provider_and_name() {
498        let mut cfg = crate::app::Config::default();
499        cfg.default_model.provider = "ollama".to_string();
500        cfg.default_model.name = "qwen3-coder:30b".to_string();
501        assert_eq!(default_model_id(&cfg), "ollama/qwen3-coder:30b");
502    }
503
504    #[test]
505    fn default_model_id_returns_bare_name_when_provider_empty() {
506        let mut cfg = crate::app::Config::default();
507        cfg.default_model.name = "just-a-name".to_string();
508        // provider is empty — single-slash shape would be
509        // "/just-a-name", which provider resolution would reject.
510        assert_eq!(default_model_id(&cfg), "just-a-name");
511    }
512
513    #[test]
514    fn build_child_registry_excludes_gui_and_self() {
515        let providers = Arc::new(ProviderFactory::new(crate::app::Config::default()));
516        let r = build_child_registry(providers);
517        // GUI tools absent.
518        assert!(r.get("screenshot").is_none());
519        assert!(r.get("click").is_none());
520        assert!(r.get("type_text").is_none());
521        assert!(r.get("press_key").is_none());
522        assert!(r.get("scroll").is_none());
523        assert!(r.get("mouse_move").is_none());
524        assert!(r.get("list_windows").is_none());
525        // Self absent — no recursion bootstrap.
526        assert!(r.get("agent").is_none());
527        // Core tools present.
528        assert!(r.get("read_file").is_some());
529        assert!(r.get("execute_command").is_some());
530    }
531}