defect_agent/tool/
spawn_agent.rs

1//! `spawn_agent`: delegates a task to a subagent.
2//!
3//! The subagent runs a nested [`TurnRunner`] in a **fresh, isolated context**, and only
4//! the final assistant text is returned as the tool result to the parent agent — the
5//! parent never sees the subagent's intermediate steps. See the design memo
6//! `project-subagent-design`.
7//!
8//! ## Two Gates
9//!
10//! - **Gate A (which tools are visible)**: each profile's `tool_allow` whitelist is a
11//!   subset of the parent agent's tool set. `spawn_agent` **may** be in the whitelist —
12//!   recursion is controlled by the **depth gate** (see below), not unconditionally
13//!   excluded.
14//! - **Gate B (how much is allowed at runtime)**: the child turn's policy is
15//!   [`NonInteractivePolicy`] wrapping the parent policy — `Ask` is downgraded to `Deny`,
16//!   the child agent is non-interactive, never blocks on [`PermissionGate`], and its
17//!   authorization is always ≤ the parent's.
18//!
19//! ## Recursion and the Depth Gate
20//!
21//! A subagent is simply "an agent with a parent" — parent and child run the same
22//! [`TurnRunner`]. Recursion depth is controlled by
23//! [`crate::tool::ToolContext::subagent_depth`]: the top-level turn injects a configured
24//! maximum (`TurnConfig::subagent_max_depth`), decremented by one for each level. If a
25//! level's `tool_allow` contains `spawn_agent` **and the remaining child depth > 0**, a
26//! freshly constructed `spawn_agent` tool is installed for the child agent (capturing the
27//! same base tool set as the subset source, so grandchildren can continue); when depth is
28//! exhausted (0), the tool is not installed — a structural cutoff. A turn with `depth ==
29//! 0` has no `spawn_agent` in its tool set; calling it fails loudly.
30//!
31//! ## Inheritance Principle
32//!
33//! Inherit "ability to reach the world" (provider registry / fs / shell / http), but
34//! **not** "identity and behavior" (parent's system prompt / hooks / task framework). The
35//! child agent's system prompt = inherited base_prompt + the profile's own `system.md`,
36//! and does **not** go through
37//! [`resolve_system_prompt`](crate::session::resolve_system_prompt) (which would crawl
38//! the workspace `AGENTS.md` — that is the parent's identity).
39
40use std::collections::BTreeMap;
41use std::pin::Pin;
42use std::sync::Arc;
43
44use agent_client_protocol_schema::{
45    Content, ContentBlock, SessionId, TextContent, ToolCallContent, ToolCallUpdateFields, ToolKind,
46};
47use futures::StreamExt;
48use futures::future::BoxFuture;
49use serde::Deserialize;
50use serde_json::json;
51
52use crate::error::BoxError;
53use crate::event::AgentEvent;
54use crate::hooks::{HookEngine, NoopHookEngine};
55use crate::llm::{HostedCapabilities, MessageContent, ProviderRegistry, Role, SamplingParams};
56use crate::policy::{NonInteractivePolicy, SandboxPolicy};
57use crate::session::{
58    EventEmitter, History, PermissionGate, RequestAuditTracker, StaticToolRegistry, ToolRegistry,
59    TurnConfig, TurnRequestLimit, TurnRunner, VecHistory,
60};
61use crate::tool::{
62    SafetyClass, Tool, ToolCallDescription, ToolContext, ToolError, ToolEvent, ToolSchema,
63    ToolStream,
64};
65
66/// The name of the `spawn_agent` tool. A constant so it can be reused when pruning the
67/// tool set to exclude itself, preventing typos.
68pub(crate) const SPAWN_AGENT_TOOL_NAME: &str = "spawn_agent";
69
70/// A subagent profile that can be invoked by `spawn_agent` (agent-side representation).
71///
72/// `ProfileSpec` in `defect-config` is the source of truth on the config side; the CLI
73/// projects it into this struct during assembly before handing it to the tool. The two
74/// are kept separate because `defect-config` depends on `defect-agent` — the agent cannot
75/// depend on config in the opposite direction, or a cycle would result.
76#[derive(Clone)]
77pub struct SubagentProfile {
78    /// Selection-time description that goes into the tool schema's catalog, allowing the
79    /// LLM to choose a profile based on it.
80    pub description: String,
81    /// Optional model override; `None` falls back to the parent session's currently
82    /// selected model (`ctx.current_model`).
83    pub model: Option<String>,
84    /// The full system prompt for this profile.
85    pub system_prompt: String,
86    /// Tool allowlist — the child agent can only see these tools (`spawn_agent` is always
87    /// excluded).
88    pub tool_allow: Vec<String>,
89    /// Optional sampling overrides.
90    pub sampling: Option<SamplingParams>,
91    /// The hook engine for this profile — hooks that run when a sub-agent executes a
92    /// turn.
93    ///
94    /// Consistent with the "inherit world, not identity" principle: hooks belong to the
95    /// profile's identity and are declared by the profile's own configuration (the CLI
96    /// assembles `ProfileSpec.hooks` into an engine at build time). They are **not**
97    /// inherited from the parent session. `None` means the sub-agent has no hooks (falls
98    /// back to [`NoopHookEngine`]), preserving exactly the same behavior as before —
99    /// existing profiles without hooks are unaffected.
100    pub hooks: Option<Arc<dyn HookEngine>>,
101}
102
103// `Arc<dyn HookEngine>` is not `Debug`; manually implement `Debug` to skip it (only
104// indicate whether an engine is attached).
105impl std::fmt::Debug for SubagentProfile {
106    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
107        f.debug_struct("SubagentProfile")
108            .field("description", &self.description)
109            .field("model", &self.model)
110            .field("system_prompt", &self.system_prompt)
111            .field("tool_allow", &self.tool_allow)
112            .field("sampling", &self.sampling)
113            .field("hooks", &self.hooks.as_ref().map(|_| "<engine>"))
114            .finish()
115    }
116}
117
118/// The `spawn_agent` tool. It is registered on `StaticToolRegistry` and shared across
119/// sessions of the owning `AgentCore` via `process_tools` (it is **not** a process-global
120/// singleton — a single process may host multiple `AgentCore` instances, each with its
121/// own copy). At construction time it captures everything needed to run a nested turn,
122/// because [`ToolContext`] only carries cwd/fs/shell/http/cancel/current_model, not the
123/// provider registry, policy, or tool set.
124pub struct SpawnAgentTool {
125    schema: ToolSchema,
126    profiles: Arc<BTreeMap<String, SubagentProfile>>,
127    registry: Arc<ProviderRegistry>,
128    /// The parent agent's policy (shared by all sessions in this core). The child turn
129    /// wraps it with [`NonInteractivePolicy`].
130    policy: Arc<dyn SandboxPolicy>,
131    /// Parent agent tool set — source for subsetting by profile allowlist.
132    process_tools: Arc<dyn ToolRegistry>,
133    /// The `base_prompt` text inherited by child agents (the "you are an agent that can
134    /// use tools" boilerplate).
135    base_prompt: Option<String>,
136}
137
138impl SpawnAgentTool {
139    /// Constructs a `spawn_agent` tool. When `profiles` is empty, the caller **should
140    /// not** register this tool (the `profile` enum in the schema will be an empty set,
141    /// so calls will always fail) — see [`Self::has_profiles`].
142    pub fn new(
143        profiles: Arc<BTreeMap<String, SubagentProfile>>,
144        registry: Arc<ProviderRegistry>,
145        policy: Arc<dyn SandboxPolicy>,
146        process_tools: Arc<dyn ToolRegistry>,
147        base_prompt: Option<String>,
148    ) -> Self {
149        let schema = build_schema(&profiles);
150        Self {
151            schema,
152            profiles,
153            registry,
154            policy,
155            process_tools,
156            base_prompt,
157        }
158    }
159
160    /// Whether any profiles were discovered. The assembler uses this to decide whether to
161    /// register this tool.
162    pub fn has_profiles(profiles: &BTreeMap<String, SubagentProfile>) -> bool {
163        !profiles.is_empty()
164    }
165}
166
167/// Dynamically build the schema: `profile` is an enum of discovered profile names (hard
168/// constraint), and the tool description embeds a catalog of `- <name>: <description>`
169/// entries (soft guidance). Both are required: the enum alone gives no usage context,
170/// while the catalog alone risks name typos.
171fn build_schema(profiles: &BTreeMap<String, SubagentProfile>) -> ToolSchema {
172    let names: Vec<&str> = profiles.keys().map(String::as_str).collect();
173    let catalog = profiles
174        .iter()
175        .map(|(name, p)| format!("- {name}: {}", p.description))
176        .collect::<Vec<_>>()
177        .join("\n");
178    let description = format!(
179        "Delegate a task to a specialized subagent that runs in a fresh, isolated context. \
180         The subagent returns only its final summary, not its intermediate work. \
181         Pick the profile whose description best matches the task.\n\n\
182         When you have multiple independent pieces of work, emit several `spawn_agent` \
183         calls in a single message: they run concurrently (fanout), so the total wait is \
184         the slowest subagent rather than their sum. Only spawn one at a time when a later \
185         task genuinely depends on an earlier subagent's result.\n\n\
186         Available profiles:\n{catalog}"
187    );
188    ToolSchema {
189        name: SPAWN_AGENT_TOOL_NAME.to_string(),
190        description,
191        input_schema: json!({
192            "type": "object",
193            "properties": {
194                "profile": {
195                    "type": "string",
196                    "enum": names,
197                    "description": "Which subagent to spawn. See the tool description for what each profile does."
198                },
199                "task": {
200                    "type": "string",
201                    "description": "The complete task for the subagent, as a self-contained \
202                                    natural-language instruction. The subagent has none of this \
203                                    conversation's context — include everything it needs."
204                },
205                "model": {
206                    "type": "string",
207                    "description": "Optional model override for this subagent. When omitted, \
208                                    the profile's configured model is used, falling back to the \
209                                    parent session's current model. Only set this when a task \
210                                    needs a specifically more or less capable model than the default."
211                },
212                "run_in_background": {
213                    "type": "boolean",
214                    "description": "When true, spawn the subagent asynchronously and return \
215                                    immediately with a task id, without waiting for it to finish. \
216                                    The subagent's result is delivered back to you later, on a \
217                                    subsequent turn, so you can keep working in the meantime. \
218                                    Leave false (the default) when the next step depends on this \
219                                    subagent's result — then the call blocks until it completes."
220                }
221            },
222            "required": ["profile", "task"]
223        }),
224    }
225}
226
227#[derive(Debug, Deserialize)]
228struct SpawnArgs {
229    profile: String,
230    task: String,
231    /// Optional per-call model override. Takes highest priority (overrides
232    /// `profile.model` and parent model).
233    #[serde(default)]
234    model: Option<String>,
235    /// Whether to run in the background. When `true` and the context supports it
236    /// (`ToolContext::background` is `Some`), spawn returns the task id immediately
237    /// without waiting for the child agent to finish. Defaults to `false` (synchronous
238    /// blocking).
239    #[serde(default)]
240    run_in_background: bool,
241}
242
243impl Tool for SpawnAgentTool {
244    fn schema(&self) -> &ToolSchema {
245        &self.schema
246    }
247
248    fn safety_hint(&self, _args: &serde_json::Value) -> SafetyClass {
249        // Conservatively mark as Mutating: the "danger" of spawn itself is determined by
250        // the child agent's tool set (gate A) and `NonInteractivePolicy` (gate B), not
251        // subdivided at this layer.
252        SafetyClass::Mutating
253    }
254
255    fn describe<'a>(
256        &'a self,
257        args: &'a serde_json::Value,
258        _ctx: ToolContext<'a>,
259    ) -> BoxFuture<'a, ToolCallDescription> {
260        Box::pin(async move {
261            let profile = args.get("profile").and_then(|v| v.as_str()).unwrap_or("?");
262            let mut fields = ToolCallUpdateFields::default();
263            fields.title = Some(format!("Spawn subagent `{profile}`"));
264            fields.kind = Some(ToolKind::Think);
265            ToolCallDescription { fields }
266        })
267    }
268
269    fn execute(&self, args: serde_json::Value, ctx: ToolContext<'_>) -> ToolStream {
270        // Move captured dependencies from construction and runtime handles from `ctx`
271        // into a `'static` future — all borrows of the nested `TurnRunner` live inside
272        // this async block and do not escape.
273        let profiles = self.profiles.clone();
274        let registry = self.registry.clone();
275        // Prefer the active policy from the current turn's snapshot (injected via `ctx`),
276        // which reflects the session's current permission mode; fall back to the policy
277        // captured at construction time only when none was injected (e.g. in tests or
278        // when omitted).
279        let policy = ctx.policy.clone().unwrap_or_else(|| self.policy.clone());
280        let process_tools = self.process_tools.clone();
281        let base_prompt = self.base_prompt.clone();
282
283        let cwd = ctx.cwd.to_path_buf();
284        let fs = ctx.fs.clone();
285        let shell = ctx.shell.clone();
286        let http = ctx.http.clone();
287        let parent_model = ctx.current_model.to_string();
288        let parent_provider = ctx.current_provider.to_string();
289        let background = ctx.background.clone();
290        // Subagent event bridge: nest child-turn events back into the parent trace
291        // (observability).
292        let bridge = ctx.subagent_bridge.clone();
293        // Remaining subagent dispatch depth for this turn. Child turns receive `depth-1`;
294        // whether the child toolset includes `spawn_agent` is determined by `child_depth
295        // > 0` (see `run_subagent_core`).
296        let subagent_depth = ctx.subagent_depth;
297        // The synchronous path uses a turn child token (cancelled when the turn ends);
298        // the background path does not use it, instead using a session-level child token
299        // minted by `BackgroundTasks` at spawn time (see below).
300        let turn_cancel = ctx.cancel.child_token();
301
302        // First parse `run_in_background` and the profile name to decide whether to run
303        // synchronously or in the background. On parse failure, both paths treat it as
304        // `InvalidArgs`.
305        let parsed: Result<SpawnArgs, _> = serde_json::from_value(args.clone());
306
307        let fut = async move {
308            let parsed = match parsed {
309                Ok(p) => p,
310                Err(err) => return ToolEvent::Failed(ToolError::InvalidArgs(BoxError::new(err))),
311            };
312
313            // Depth guard: the remaining dispatch depth for this turn is exhausted (0),
314            // so the `spawn_agent` tool should never have been visible —
315            // `run_subagent_core` does not include it in the child tool set when
316            // `child_depth == 0`. Reaching this point indicates a malformed `ctx`; fail
317            // loudly, do not silently swallow. The top-level turn injects the configured
318            // maximum, which is always > 0 under normal conditions.
319            if subagent_depth == 0 {
320                return ToolEvent::Failed(ToolError::InvalidArgs(BoxError::new(io_err(
321                    "subagent recursion depth exhausted: this agent is not allowed to spawn \
322                     further subagents"
323                        .to_string(),
324                ))));
325            }
326
327            // Background path: requires `ctx` to support background (only injected at the
328            // top-level turn), and `run_in_background=true`.
329            if parsed.run_in_background {
330                let Some(bg) = background else {
331                    // Background context is unavailable (nested subagent / test) — fail
332                    // loud, do not silently fall back to synchronous execution, otherwise
333                    // the model believes it is running in the background while actually
334                    // blocking, contradicting the declared behavior.
335                    return ToolEvent::Failed(ToolError::InvalidArgs(BoxError::new(io_err(
336                        "run_in_background is not available in this context (nested subagents \
337                         cannot spawn background tasks)"
338                            .to_string(),
339                    ))));
340                };
341                let label = parsed.profile.clone();
342                let deps = SubagentDeps {
343                    profiles,
344                    registry,
345                    policy,
346                    process_tools,
347                    base_prompt,
348                    cwd,
349                    fs,
350                    shell,
351                    http,
352                    parent_model,
353                    parent_provider,
354                    subagent_depth,
355                    // The background path also uses the bridge — the same
356                    // `AgentEvent::Subagent` mechanism as the foreground. The
357                    // `spawn_agent` tool span that initiates it closes normally first
358                    // (the `ToolCallFinished` "started" below), then the child turn
359                    // events appear as an **adjacent** subagent span under the same
360                    // `parent_tool_call_id` anchor, remaining open until the child turn
361                    // truly ends. The projector naturally distinguishes foreground
362                    // (nested) from background (adjacent) by checking whether the tool
363                    // span is still in the table. The bridge's `parent_events` is a
364                    // session-level `EventEmitter` that stays alive while the background
365                    // task runs.
366                    bridge,
367                    // Only the background path exposes history — `task_handle` is
368                    // obtained inside the spawn closure and injected later (see below).
369                    task_handle: None,
370                };
371                // Spawn mints a session-level child token for the task, so the task's
372                // cancellation lifecycle is independent of the turn that spawned it —
373                // ending the turn does not kill it. Also obtains a `TaskHandle`, shares
374                // the child turn's `history` `Arc` into the task table, and lets the main
375                // agent inspect the child agent's **submitted-to-LLM message blocks**
376                // (not streaming deltas) via `inspect_background_task`.
377                let label_for_log = parsed.profile.clone();
378                let task_id = bg.spawn(label, move |task_cancel, task_handle| async move {
379                    let mut deps = deps;
380                    deps.task_handle = Some(task_handle);
381                    match run_subagent_core(parsed, deps, task_cancel).await {
382                        Ok(answer) => crate::session::BackgroundResult::Completed(answer),
383                        Err(err) => {
384                            // Log loudly: background failures were previously silently
385                            // reduced to a `Failed` string, with no Langfuse event or log
386                            // entry. This adds a `warn` with the task and error details.
387                            tracing::warn!(
388                                profile = %label_for_log,
389                                error = %err,
390                                "background subagent failed"
391                            );
392                            crate::session::BackgroundResult::Failed(err.to_string())
393                        }
394                    }
395                });
396                // Return synchronously with "started id=X" to satisfy the tool_use ↔
397                // tool_result pairing contract.
398                // Subagent profiles are indexed by source name at startup.
399                let msg = format!(
400                    "Started background subagent `{}`, task id `{}`. Its result will arrive on a \
401                     later turn.",
402                    parsed_profile_for_msg(&args),
403                    task_id
404                );
405                let mut fields = ToolCallUpdateFields::default();
406                fields.content = Some(vec![ToolCallContent::Content(Content::new(
407                    ContentBlock::Text(TextContent::new(msg.clone())),
408                ))]);
409                fields.raw_output = Some(serde_json::Value::String(msg));
410                return ToolEvent::Completed(fields);
411            }
412
413            // Synchronous path: original behavior — block until the sub-turn finishes,
414            // then use the final text as the result.
415            let deps = SubagentDeps {
416                profiles,
417                registry,
418                policy,
419                process_tools,
420                base_prompt,
421                cwd,
422                fs,
423                shell,
424                http,
425                parent_model,
426                parent_provider,
427                subagent_depth,
428                // Synchronous path: the parent `spawn_agent` tool span remains open for
429                // the entire duration (blocking until the child turn completes), allowing
430                // child events to be nested under it.
431                bridge,
432                // Synchronous path: no background task, no history exposed (parent call
433                // blocks entirely; no need to "peek while running").
434                task_handle: None,
435            };
436            match run_subagent_core(parsed, deps, turn_cancel).await {
437                Ok(answer) => {
438                    let mut fields = ToolCallUpdateFields::default();
439                    fields.content = Some(vec![ToolCallContent::Content(Content::new(
440                        ContentBlock::Text(TextContent::new(answer.clone())),
441                    ))]);
442                    fields.raw_output = Some(serde_json::Value::String(answer));
443                    ToolEvent::Completed(fields)
444                }
445                Err(err) => ToolEvent::Failed(err),
446            }
447        };
448        let s: Pin<Box<dyn futures::Stream<Item = ToolEvent> + Send>> =
449            Box::pin(futures::stream::once(fut));
450        s
451    }
452}
453
454/// Dependency bundle for `run_subagent_core` — avoids a dozen positional parameters. All
455/// construction-time and ctx handles are moved in, fully owned, so they can cross await
456/// points or be sent to a background task.
457struct SubagentDeps {
458    profiles: Arc<BTreeMap<String, SubagentProfile>>,
459    registry: Arc<ProviderRegistry>,
460    policy: Arc<dyn SandboxPolicy>,
461    process_tools: Arc<dyn ToolRegistry>,
462    base_prompt: Option<String>,
463    cwd: std::path::PathBuf,
464    fs: Arc<dyn crate::fs::FsBackend>,
465    shell: Arc<dyn crate::shell::ShellBackend>,
466    http: Arc<dyn crate::http::HttpClient>,
467    parent_model: String,
468    /// The provider vendor currently selected in the parent session. Together with
469    /// `parent_model` this forms a `(vendor, model)` selection pair – when the child
470    /// agent's model falls back to the parent's choice, the entry is resolved exactly by
471    /// this pair. An empty string means the parent context did not inject a vendor
472    /// (legacy/test path), in which case the fallback picks the first entry by bare model
473    /// id.
474    parent_provider: String,
475    /// Remaining dispatch depth for this (initiator) turn. Child turns run at
476    /// `subagent_depth - 1`; the child toolset includes `spawn_agent` only when that
477    /// decremented value is `> 0` (see `run_subagent_core`).
478    subagent_depth: u32,
479    /// Subagent event bridge: when `Some`, nests child turn events back into the parent
480    /// trace. Only set on the synchronous path.
481    bridge: Option<crate::tool::SubagentBridge>,
482    /// Background task handle: when `Some`, shares the child turn's history `Arc` into
483    /// the task table so the main agent can inspect the child agent's **message chunks
484    /// submitted to the LLM** via `inspect_background_task`. Only set in the background
485    /// path — the synchronous path's parent `spawn_agent` call blocks entirely, so there
486    /// is no need to "peek while running".
487    task_handle: Option<crate::session::TaskHandle>,
488}
489
490/// Extracts the profile name from the raw args (used only for the background-start
491/// confirmation message; falls back to a placeholder on failure).
492fn parsed_profile_for_msg(args: &serde_json::Value) -> String {
493    args.get("profile")
494        .and_then(|v| v.as_str())
495        .unwrap_or("?")
496        .to_string()
497}
498
499/// Runs a sub-agent turn, returning the final text (`Ok`) or an error description
500/// (`Err`).
501///
502/// Both the synchronous and background paths share this core: the synchronous path wraps
503/// `Ok/Err` into `ToolEvent::Completed/Failed`, while the background path wraps them into
504/// `BackgroundResult::Completed/Failed`. The caller determines the lifecycle of `cancel`
505/// — the synchronous path passes a turn-level child token, and the background path passes
506/// a session-level child token.
507async fn run_subagent_core(
508    parsed: SpawnArgs,
509    deps: SubagentDeps,
510    cancel: tokio_util::sync::CancellationToken,
511) -> Result<String, ToolError> {
512    let SubagentDeps {
513        profiles,
514        registry,
515        policy,
516        process_tools,
517        base_prompt,
518        cwd,
519        fs,
520        shell,
521        http,
522        parent_model,
523        parent_provider,
524        subagent_depth,
525        bridge,
526        task_handle,
527    } = deps;
528
529    let Some(profile) = profiles.get(&parsed.profile) else {
530        return Err(ToolError::InvalidArgs(BoxError::new(io_err(format!(
531            "unknown profile `{}`; available: {}",
532            parsed.profile,
533            profiles.keys().cloned().collect::<Vec<_>>().join(", ")
534        )))));
535    };
536
537    // Model priority: call argument > profile > parent session's current model.
538    // Only when the model falls back to the parent (no explicit override) do we also
539    // inherit the parent's provider vendor, resolving precisely by `(vendor, model)` pair
540    // (so multiple gateways with the same model won't pick the wrong provider). When the
541    // model is explicitly overridden, there is no provider dimension information — fall
542    // back to taking the first entry by bare model id.
543    let model_override = parsed.model.clone().or_else(|| profile.model.clone());
544    let inherits_parent = model_override.is_none();
545    let model = model_override.unwrap_or(parent_model);
546    let entry = if inherits_parent && !parent_provider.is_empty() {
547        registry.entry_for(&parent_provider, &model)
548    } else {
549        registry.first_entry_for_model(&model)
550    };
551    let Some(entry) = entry else {
552        return Err(ToolError::Execution(BoxError::new(io_err(format!(
553            "subagent model `{model}` is not declared by any provider entry"
554        )))));
555    };
556    let provider = entry.provider().clone();
557
558    // The remaining dispatch depth for the child turn is this layer minus one. By this
559    // point `subagent_depth >= 1` (execute already fails loud on 0), so the child depth
560    // is >= 0.
561    let child_depth = subagent_depth - 1;
562
563    // Gate A: subset the parent tool set by the allowlist. `spawn_agent` is no longer
564    // unconditionally excluded — instead, a **depth gate** decides: only when
565    // `child_depth > 0` (the child agent can dispatch at least one more level) and the
566    // profile allowlist explicitly permits it, the child agent receives a **freshly
567    // constructed** `spawn_agent` tool (which captures the same base `process_tools` as
568    // the subset source, enabling grandchildren to continue recursion). When depth is
569    // exhausted (`child_depth == 0`), `spawn_agent` is ignored even if listed in the
570    // allowlist — a structural closure (same effect as the old hardcoded behavior, but
571    // configurable). Unknown tool names hard-fail (fail loud, not silently ignored).
572    let mut builder = StaticToolRegistry::builder();
573    for name in &profile.tool_allow {
574        if name == SPAWN_AGENT_TOOL_NAME {
575            if child_depth > 0 {
576                let child_spawn = SpawnAgentTool::new(
577                    profiles.clone(),
578                    registry.clone(),
579                    // Pass the parent policy obtained at this layer to the child
580                    // `SpawnAgentTool`, which captures it as a fallback at construction
581                    // time; at runtime, the active policy injected via `ctx` still takes
582                    // precedence. The child turn is further wrapped in `NonInteractive`.
583                    policy.clone(),
584                    process_tools.clone(),
585                    base_prompt.clone(),
586                );
587                builder = builder.insert(Arc::new(child_spawn));
588            }
589            // child_depth == 0: depth exhausted, skip — structurally prevents further
590            // recursion.
591            continue;
592        }
593        match process_tools.get(name) {
594            Some(tool) => builder = builder.insert(tool),
595            None => {
596                return Err(ToolError::InvalidArgs(BoxError::new(io_err(format!(
597                    "profile `{}` allows unknown tool `{name}`",
598                    parsed.profile
599                )))));
600            }
601        }
602    }
603    let sub_tools = builder.build();
604
605    // System prompt: inherited `base_prompt` + profile's own `system.md`. Does not use
606    // `resolve_system_prompt` (to avoid crawling workspace `AGENTS.md` / provider·model
607    // overlay).
608    let mut sections = Vec::new();
609    if let Some(bp) = base_prompt.as_deref()
610        && !bp.is_empty()
611    {
612        sections.push(bp.to_string());
613    }
614    if !profile.system_prompt.is_empty() {
615        sections.push(profile.system_prompt.clone());
616    }
617    let system_prompt: Option<Arc<str>> =
618        (!sections.is_empty()).then(|| Arc::from(sections.join("\n\n").as_str()));
619
620    // All sub-turn state is local to this async block and dropped when it completes.
621    // `history` is wrapped in `Arc` so the background path can share the same history
622    // with the task table, allowing the control plane to peek at the message blocks the
623    // sub-agent submits to the LLM.
624    let history: Arc<dyn History> = Arc::new(VecHistory::new());
625    if let Some(handle) = &task_handle {
626        handle.attach_history(history.clone());
627    }
628    let events = Arc::new(EventEmitter::new());
629
630    // Observability bridge: wraps each event from the child turn into an
631    // `AgentEvent::Subagent` and forwards it back to the parent session's event stream,
632    // so that Langfuse can nest the child turn under the parent's `spawn_agent` tool
633    // span. This is observability-only — the isolation contract leaves `storage` / `wire`
634    // / `REPL` unchanged (they ignore `Subagent`). The bridge task subscribes to the
635    // child emitter; once the child turn finishes and this function returns, dropping
636    // `events` (the last strong reference) ends the child stream, and the task exits
637    // naturally without an explicit join.
638    let bridge_task = bridge.map(|b| {
639        let mut sub_events = events.subscribe();
640        let agent_type = parsed.profile.clone();
641        tokio::spawn(async move {
642            while let Some(ev) = sub_events.next().await {
643                // Recursive flattening: this bridge layer only prepends its own
644                // `tool_call_id`.
645                //
646                // - From a deeper layer that is **already** a `Subagent` (with a partial
647                //   ancestor chain) → insert this layer's id at the head of the chain,
648                //   keeping the deeper `agent_type` and leaf `inner` unchanged.
649                // - A **leaf** event from a child turn → wrap it as `Subagent{[this
650                //   layer's id], this layer's profile, leaf}`.
651                //
652                // After the event passes through N layers, `ancestor_path` is exactly the
653                // complete chain from the top layer to the leaf.
654                let forwarded = match ev {
655                    AgentEvent::Subagent {
656                        mut ancestor_path,
657                        agent_type: deeper,
658                        inner,
659                    } => {
660                        ancestor_path.insert(0, b.parent_tool_call_id.clone());
661                        AgentEvent::Subagent {
662                            ancestor_path,
663                            agent_type: deeper,
664                            inner,
665                        }
666                    }
667                    leaf => AgentEvent::Subagent {
668                        ancestor_path: vec![b.parent_tool_call_id.clone()],
669                        agent_type: agent_type.clone(),
670                        inner: Box::new(leaf),
671                    },
672                };
673                b.parent_events.emit(forwarded).await;
674            }
675        })
676    });
677
678    let permissions = PermissionGate::new();
679    let sub_policy: Arc<dyn SandboxPolicy> = Arc::new(NonInteractivePolicy::new(policy));
680    // Use the hook engine declared in the profile, or fall back to `NoopHookEngine` (same
681    // behavior as before the change).
682    let noop = NoopHookEngine;
683    let hooks: &dyn HookEngine = match &profile.hooks {
684        Some(engine) => engine.as_ref(),
685        None => &noop,
686    };
687    let session_id = SessionId::new(format!("subagent-{}", parsed.profile));
688    let audit = RequestAuditTracker::new();
689
690    let config = TurnConfig {
691        model: model.clone(),
692        sampling: profile.sampling.clone().unwrap_or_default(),
693        // Limit subagent to a fixed number of steps to prevent runaway nested loops.
694        request_limit: TurnRequestLimit::Fixed(32),
695        // Depth decreases by one per level: the child turn's tool driver uses this to
696        // decide whether grandchildren can be dispatched. When `child_depth == 0`, the
697        // child turn's tool set already lacks `spawn_agent` (gate A above is not
698        // installed), so redundantly setting it to 0 here is self-consistent.
699        subagent_max_depth: child_depth,
700        ..TurnConfig::default()
701    };
702
703    let runner = TurnRunner {
704        history: history.as_ref(),
705        tools: &sub_tools,
706        provider: provider.as_ref(),
707        policy: sub_policy,
708        events: events.clone(),
709        permissions: &permissions,
710        cancel: cancel.clone(),
711        config: &config,
712        system_prompt,
713        cwd: &cwd,
714        fs,
715        shell,
716        http,
717        hosted_capabilities: HostedCapabilities::default(),
718        hooks,
719        session_id: &session_id,
720        request_audit: &audit,
721        // Sub‑agent turns carry no background handle: structurally prevents background
722        // tasks from spawning themselves (same anti‑recursion design as "whitelist never
723        // contains spawn_agent itself").
724        background: None,
725        // Sub‑agent does not participate in the parent’s goal loop: the parent’s
726        // `goal_done` / `goal‑gate` only apply at the top‑level turn; the sub‑agent has
727        // its own finite step limit (`request_limit`) as a safety net.
728        goal: None,
729        // Sub-agent turns skip background compaction: the context is short and its
730        // lifetime ends with the tool call, so no cross-turn background summary is
731        // needed. It still benefits from the hard-watermark synchronous compaction
732        // fallback (the `compact_hard` path requires `provider_arc`), so we give it
733        // `provider_arc` and leave the other background compaction fields empty.
734        compaction_slot: None,
735        history_arc: None,
736        provider_arc: Some(provider.clone()),
737        session_cancel: None,
738        // The sub-agent's task is its "user input".
739        ingest_source: crate::hooks::step::IngestSource::User,
740    };
741
742    let prompt = vec![ContentBlock::Text(TextContent::new(parsed.task))];
743    let run_result = runner.run(prompt).await;
744
745    // End of sub-turn: drop `runner` and the local strong reference to `events`, allowing
746    // the child event stream to close. The bridge task flushes any buffered events to the
747    // parent emitter and then exits. Awaiting it ensures all child events arrive before
748    // the parent `spawn_agent` tool span finishes (this function returns →
749    // `ToolCallFinished`).
750    drop(runner);
751    drop(events);
752    if let Some(task) = bridge_task {
753        let _ = task.await;
754    }
755
756    if let Err(err) = run_result {
757        return Err(ToolError::Execution(BoxError::new(io_err(format!(
758            "subagent turn failed: {err}"
759        )))));
760    }
761
762    // Take the text of the last assistant message as the result.
763    Ok(last_assistant_text(&history.snapshot()))
764}
765
766/// Take the **last** [`Role::Assistant`] message from the history and concatenate all its
767/// `Text` segments (skipping thinking / tool_use). The tool-use loop may append multiple
768/// assistant messages; the last one corresponds to the "final answer".
769fn last_assistant_text(history: &[crate::llm::Message]) -> String {
770    history
771        .iter()
772        .rev()
773        .find(|m| m.role == Role::Assistant)
774        .map(|m| {
775            m.content
776                .iter()
777                .filter_map(|c| match c {
778                    MessageContent::Text { text } => Some(text.as_str()),
779                    _ => None,
780                })
781                .collect::<Vec<_>>()
782                .join("")
783        })
784        .unwrap_or_default()
785}
786
787fn io_err(msg: String) -> std::io::Error {
788    std::io::Error::other(msg)
789}
790
791#[cfg(test)]
792mod tests;
defect_agent/tool/spawn_agent.rs

defect_agent/tool/
spawn_agent.rs