Skip to main content

fluers_core/
subagent.rs

1//! Subagent delegation: the built-in `task` tool.
2//!
3//! Mirrors Flue's [Subagents](https://flue.dev/docs/guide/subagents/): an agent
4//! delegates a focused piece of work to a named subagent. The subagent runs in a
5//! fresh child session and its answer returns to the parent as the `task` tool
6//! result.
7//!
8//! See `docs/MVP4_SUBAGENTS_DESIGN.md` for the full design and scope.
9//!
10//! # Configuration inheritance (Flue-compatible)
11//!
12//! Capability fields (`instructions` / `tools` / `subagents`) are
13//! **profile-owned** — the parent's values never flow into the delegated
14//! session. Scalar defaults (`model` / `config`) inherit from the parent when
15//! the profile omits them.
16
17use std::sync::atomic::{AtomicUsize, Ordering};
18use std::sync::Arc;
19
20use async_trait::async_trait;
21use serde_json::Value;
22use tokio_util::sync::CancellationToken;
23use uuid::Uuid;
24
25use crate::error::{CoreError, Result as CoreResult};
26use crate::event::EventSink;
27use crate::message::{AgentMessage, ContentBlock, Role};
28use crate::model::{Model, ModelProvider};
29use crate::policy::ToolPolicy;
30use crate::runner::{run_agent, RunConfig, RunOutcome};
31use crate::tool::{InvokeContext, Tool, ToolDefinition, ToolResult};
32
33/// Default recursion limit. The top-level agent runs at depth 0; its `task`
34/// calls run children at depth 1, etc. This matches the default in most agent
35/// harnesses and keeps runaway delegation bounded.
36pub const DEFAULT_MAX_DEPTH: usize = 5;
37
38/// Default cap on the **total** number of delegations across the whole tree.
39/// Depth alone bounds chain length but not branching: a parent turn can issue
40/// many parallel `task` calls, each of which can do the same, producing
41/// exponential fan-out (up to `max_tool_calls_per_turn`^`max_depth` ≈ 10⁵ at
42/// defaults). This shared budget turns that into a hard ceiling regardless of
43/// depth or width.
44pub const DEFAULT_MAX_DELEGATIONS: usize = 64;
45
46/// A named, declarable subagent profile.
47///
48/// Capability fields (`instructions` / `tools` / `subagents`) are
49/// **profile-owned** — the parent's values never flow into a delegated session,
50/// so a parent's bash tool never silently leaks into a reviewer subagent.
51/// Scalar defaults (`model` / `config`) inherit from the parent when `None`.
52#[derive(Clone)]
53pub struct SubagentProfile {
54    /// Machine name the parent model targets in `task({ agent: ... })`.
55    pub name: String,
56    /// Delegation guidance shown to the parent model alongside the name.
57    pub description: String,
58    /// The subagent's system message (the child session's first message).
59    pub instructions: String,
60    /// Profile-owned model. `None` ⇒ inherit the parent's model.
61    pub model: Option<Model>,
62    /// Profile-owned run config. `None` ⇒ inherit the parent's config.
63    pub config: Option<RunConfig>,
64    /// Profile-owned tools. The parent's tools do NOT flow into the child.
65    pub tools: Vec<Arc<dyn Tool>>,
66    /// Profile-owned subagents (enables recursive delegation). The parent's
67    /// subagents do NOT flow into the child.
68    pub subagents: Vec<SubagentProfile>,
69}
70
71impl SubagentProfile {
72    /// Build a minimal profile (name + instructions). Other fields default to
73    /// inherited / empty. `description` is left empty (callers can override
74    /// with `.with_description(...)`; an empty description renders as
75    /// "(no description provided)" in the tool listing).
76    #[must_use]
77    pub fn new(name: impl Into<String>, instructions: impl Into<String>) -> Self {
78        Self {
79            name: name.into(),
80            // Description defaults to a trimmed copy of the instructions; callers
81            // can override with `.with_description(...)`.
82            description: String::new(),
83            instructions: instructions.into(),
84            model: None,
85            config: None,
86            tools: Vec::new(),
87            subagents: Vec::new(),
88        }
89    }
90
91    /// Set the delegation-guidance description shown to the parent model.
92    #[must_use]
93    pub fn with_description(mut self, description: impl Into<String>) -> Self {
94        self.description = description.into();
95        self
96    }
97
98    /// Set the profile-owned model (overrides inheritance).
99    #[must_use]
100    pub fn with_model(mut self, model: Model) -> Self {
101        self.model = Some(model);
102        self
103    }
104
105    /// Set the profile-owned run config (overrides inheritance).
106    #[must_use]
107    pub fn with_config(mut self, config: RunConfig) -> Self {
108        self.config = Some(config);
109        self
110    }
111
112    /// Add a profile-owned tool.
113    #[must_use]
114    pub fn with_tool(mut self, tool: Arc<dyn Tool>) -> Self {
115        self.tools.push(tool);
116        self
117    }
118
119    /// Declare a nested subagent (enables recursive delegation).
120    #[must_use]
121    pub fn with_subagent(mut self, subagent: SubagentProfile) -> Self {
122        self.subagents.push(subagent);
123        self
124    }
125}
126
127/// Options for the [`TaskTool`].
128#[derive(Clone, Copy, Debug)]
129pub struct SubagentOptions {
130    /// Maximum delegation **depth** (chain length). The top-level agent runs
131    /// at depth 0; its `task` calls run children at depth 1; their `task` calls
132    /// run at depth 2; etc. A `task` call at `depth >= max_depth` returns a
133    /// depth-exceeded error result.
134    pub max_depth: usize,
135    /// Maximum **total** number of delegations across the whole tree (shared
136    /// atomic counter). Bounds exponential fan-out: a parent issuing many
137    /// parallel `task` calls, each spawning children that do the same, is
138    /// capped regardless of depth or width. A `task` call that would exceed
139    /// the remaining budget returns a budget-exceeded error result.
140    pub max_delegations: usize,
141}
142
143impl Default for SubagentOptions {
144    fn default() -> Self {
145        Self {
146            max_depth: DEFAULT_MAX_DEPTH,
147            max_delegations: DEFAULT_MAX_DELEGATIONS,
148        }
149    }
150}
151
152/// The built-in `task` tool, which also holds the delegation state.
153///
154/// Construct one and include it in the parent's tool list to enable delegation.
155/// Each nested run gets a new `TaskTool` with `depth + 1` and the child
156/// profile's own `subagents` (for recursion).
157///
158/// # Profile ownership
159///
160/// The parent's tool list (other than this `TaskTool`) never flows into a
161/// child. The child gets exactly: the profile's declared `tools`, plus a fresh
162/// child `TaskTool` when the profile declares its own `subagents`.
163pub struct TaskTool {
164    /// Shared model provider (one is reused across the delegation tree).
165    provider: Arc<dyn ModelProvider>,
166    /// Parent model — inherited when a profile omits its own.
167    parent_model: Model,
168    /// Parent config — inherited when a profile omits its own.
169    parent_config: RunConfig,
170    /// Subagents declared at this level.
171    subagents: Vec<SubagentProfile>,
172    /// Recursion limit.
173    max_depth: usize,
174    /// Current depth (0 for the top-level agent's `task` tool).
175    depth: usize,
176    /// Cancellation token shared across the delegation tree.
177    cancel: CancellationToken,
178    /// Optional event sink (children emit to the same sink with a new session
179    /// id, giving a nested trace without explicit span-parent linking).
180    event_sink: Option<Arc<dyn EventSink>>,
181    /// Optional tool policy (Fae deviation; see the README). Held as an owned
182    /// `Arc` because a `TaskTool` is `'static` (lives in the tool list); it is
183    /// handed to each child run's [`RunHooks`] as a borrow via `as_deref()`, so
184    /// a governance gate applies to delegated subagents too — the policy cannot
185    /// be bypassed by delegating.
186    ///
187    /// [`RunHooks`]: crate::event::RunHooks
188    policy: Option<Arc<dyn ToolPolicy>>,
189    /// Shared counter of **remaining** delegations across the whole tree.
190    /// Bounds exponential fan-out: each successful `task` call decrements it.
191    remaining_delegations: Arc<AtomicUsize>,
192}
193
194impl TaskTool {
195    /// Construct the top-level `task` tool (depth 0).
196    ///
197    /// Include the returned tool in the parent agent's tool list to enable
198    /// delegation to any of `subagents`.
199    // Each argument is a distinct per-run input (provider, model, config,
200    // subagents, options, cancel, event sink, policy); bundling them would only
201    // move the list into a struct without reducing what a caller must supply.
202    #[allow(clippy::too_many_arguments)]
203    #[must_use]
204    pub fn new(
205        provider: Arc<dyn ModelProvider>,
206        parent_model: Model,
207        parent_config: RunConfig,
208        subagents: Vec<SubagentProfile>,
209        options: SubagentOptions,
210        cancel: CancellationToken,
211        event_sink: Option<Arc<dyn EventSink>>,
212        policy: Option<Arc<dyn ToolPolicy>>,
213    ) -> Self {
214        Self {
215            provider,
216            parent_model,
217            parent_config,
218            subagents,
219            max_depth: options.max_depth,
220            depth: 0,
221            cancel,
222            event_sink,
223            policy,
224            remaining_delegations: Arc::new(AtomicUsize::new(options.max_delegations)),
225        }
226    }
227
228    /// Construct a child `task` tool at `depth + 1`.
229    fn child(
230        &self,
231        subagents: Vec<SubagentProfile>,
232        parent_model: Model,
233        parent_config: RunConfig,
234    ) -> Self {
235        Self {
236            provider: Arc::clone(&self.provider),
237            parent_model,
238            parent_config,
239            subagents,
240            max_depth: self.max_depth,
241            depth: self.depth + 1,
242            cancel: self.cancel.clone(),
243            event_sink: self.event_sink.as_ref().map(Arc::clone),
244            // The same policy governs every level of the tree.
245            policy: self.policy.as_ref().map(Arc::clone),
246            // Shared across the whole tree.
247            remaining_delegations: Arc::clone(&self.remaining_delegations),
248        }
249    }
250
251    /// Resolve a profile by name.
252    fn resolve(&self, name: &str) -> Option<&SubagentProfile> {
253        self.subagents.iter().find(|s| s.name == name)
254    }
255
256    /// The original delegation budget (for diagnostics). Stored implicitly as
257    /// `remaining + consumed`; since we only need it for error messages, we
258    /// approximate by reading `remaining` plus the depth index. This is best-
259    /// effort and used only in the budget-exceeded message.
260    fn max_delegations_hint(&self) -> usize {
261        // We don't store the original cap separately; approximate from the
262        // current remaining count. The message is guidance, not a contract.
263        self.remaining_delegations.load(Ordering::Relaxed) + 1
264    }
265
266    /// Delegate to the resolved subagent. Returns the child's final text.
267    async fn delegate(&self, profile: &SubagentProfile, prompt: String) -> CoreResult<RunOutcome> {
268        // Apply inheritance.
269        let child_model = profile
270            .model
271            .clone()
272            .unwrap_or_else(|| self.parent_model.clone());
273        let child_config = profile
274            .config
275            .clone()
276            .unwrap_or_else(|| self.parent_config.clone());
277
278        // Build the child's tool list. Profile-owned only; the parent's tools
279        // never flow in. Add a child TaskTool only if the profile declares its
280        // own subagents (recursion). The child TaskTool is PREPENDED so that,
281        // if a profile mistakenly/​maliciously declares a tool named "task",
282        // the depth-enforcing child TaskTool wins the name lookup (the runner
283        // matches the first tool by name) and depth limits are preserved.
284        let mut child_tools: Vec<Arc<dyn Tool>> = Vec::new();
285        if !profile.subagents.is_empty() {
286            let child_task = self.child(
287                profile.subagents.clone(),
288                // The child's TaskTool inherits from the *resolved* child
289                // model/config, so grandchildren inherit the right defaults.
290                child_model.clone(),
291                child_config.clone(),
292            );
293            child_tools.push(Arc::new(child_task));
294        }
295        child_tools.extend(profile.tools.clone());
296
297        // Fresh child session: new UUID, messages = [system, user].
298        let child_session = Uuid::new_v4();
299        let mut child_messages = vec![
300            AgentMessage {
301                role: Role::System,
302                content: vec![ContentBlock::Text {
303                    text: profile.instructions.clone(),
304                }],
305            },
306            AgentMessage {
307                role: Role::User,
308                content: vec![ContentBlock::Text { text: prompt }],
309            },
310        ];
311
312        // Child hooks: new session id, no turn sink (the parent's persistence
313        // records the task tool result — exact replay), same event sink, and
314        // the same tool policy (inherited so a governance gate applies to the
315        // delegated subagent too — it cannot be bypassed by delegating).
316        let child_hooks = crate::event::RunHooks {
317            session_id: Some(child_session),
318            turn_sink: None,
319            event_sink: self.event_sink.as_deref(),
320            policy: self.policy.as_deref(),
321        };
322
323        // Run the child to completion. Its events (SessionStarted → ... →
324        // TurnFinished / RunFailed) flow to the same event sink with the
325        // child's session id, giving a nested trace.
326        run_agent(
327            self.provider.as_ref(),
328            &child_tools,
329            &mut child_messages,
330            &child_model,
331            &child_config,
332            &self.cancel,
333            &child_hooks,
334        )
335        .await
336    }
337}
338
339#[async_trait]
340impl Tool for TaskTool {
341    fn definition(&self) -> ToolDefinition {
342        let mut desc = String::from(
343            "Delegate a focused subtask to a named subagent. The subagent runs \
344             in a fresh context and its answer is returned to you. Call this \
345             only when a declared subagent is well-suited to the work. \
346             Available subagents:",
347        );
348        if self.subagents.is_empty() {
349            desc.push_str(" (none declared)");
350        } else {
351            for s in &self.subagents {
352                let guidance = if s.description.trim().is_empty() {
353                    "(no description provided)"
354                } else {
355                    s.description.trim()
356                };
357                desc.push_str(&format!("\n  - \"{}\": {}", s.name, guidance));
358            }
359        }
360
361        // Schema: object requiring `agent` (string) and `prompt` (string).
362        let mut fields = serde_json::Map::new();
363        fields.insert("type".into(), Value::String("object".into()));
364        fields.insert(
365            "properties".into(),
366            serde_json::json!({
367                "agent": {
368                    "type": "string",
369                    "description": "The name of the declared subagent to delegate to."
370                },
371                "prompt": {
372                    "type": "string",
373                    "description": "The task to give the subagent (it sees this, not your conversation history)."
374                }
375            }),
376        );
377        fields.insert(
378            "required".into(),
379            Value::Array(vec![
380                Value::String("agent".into()),
381                Value::String("prompt".into()),
382            ]),
383        );
384
385        ToolDefinition {
386            name: "task".into(),
387            label: "Task".into(),
388            description: desc,
389            parameters: crate::tool::ParameterSchema {
390                fields: fields.into_iter().collect(),
391            },
392        }
393    }
394
395    async fn execute(&self, ctx: InvokeContext, input: Value) -> CoreResult<ToolResult> {
396        // Parse { agent, prompt }.
397        let obj = input.as_object().ok_or_else(|| {
398            CoreError::ToolInputValidation("task tool expects an object input".into())
399        })?;
400        let agent = obj.get("agent").and_then(Value::as_str).ok_or_else(|| {
401            CoreError::ToolInputValidation("task tool requires a string `agent`".into())
402        })?;
403        let prompt = obj.get("prompt").and_then(Value::as_str).ok_or_else(|| {
404            CoreError::ToolInputValidation("task tool requires a string `prompt`".into())
405        })?;
406
407        // Resolve the subagent (SubagentNotDeclared).
408        let profile = match self.resolve(agent) {
409            Some(p) => p,
410            None => {
411                let known: Vec<&str> = self.subagents.iter().map(|s| s.name.as_str()).collect();
412                return Err(CoreError::ToolInputValidation(format!(
413                    "subagent not declared: \"{agent}\" (known: {})",
414                    known.join(", ")
415                )));
416            }
417        };
418
419        // Enforce the shared delegation budget (bounds exponential fan-out).
420        // fetch_sub returns the PREVIOUS value; if it was 0, we're already at
421        // the cap and this call must be rejected. Otherwise we've claimed one
422        // slot.
423        let prev = self.remaining_delegations.fetch_sub(1, Ordering::Relaxed);
424        if prev == 0 {
425            // Restore the counter (we didn't consume a slot) and report.
426            self.remaining_delegations.fetch_add(1, Ordering::Relaxed);
427            return Err(CoreError::ToolInputValidation(format!(
428                "delegation budget exhausted (max {} total delegations across the tree)",
429                self.max_delegations_hint()
430            )));
431        }
432
433        // Enforce the depth limit (DelegationDepthExceeded).
434        if self.depth >= self.max_depth {
435            // We already decremented the budget; restore it since we're not
436            // actually delegating.
437            self.remaining_delegations.fetch_add(1, Ordering::Relaxed);
438            return Err(CoreError::ToolInputValidation(format!(
439                "delegation depth exceeded (depth {} >= max_depth {})",
440                self.depth, self.max_depth
441            )));
442        }
443
444        // Honor cancellation before spawning the child (the child run also
445        // checks cancellation, but failing fast avoids a needless child span).
446        if ctx.cancel.is_cancelled() {
447            return Err(CoreError::Cancelled("task delegation cancelled".into()));
448        }
449
450        // Delegate. Map a child-run failure into a bounded error result string
451        // (the runner turns any Err into a model-visible `Error:` tool result,
452        // so the parent can recover).
453        let outcome = self.delegate(profile, prompt.to_string()).await?;
454        Ok(ToolResult {
455            content: vec![serde_json::json!({
456                "type": "text",
457                "text": if outcome.final_text.trim().is_empty() {
458                    "(subagent returned no text)".to_string()
459                } else {
460                    outcome.final_text
461                },
462            })],
463            details: None,
464        })
465    }
466}
467
468#[cfg(test)]
469mod tests {
470    use super::*;
471    use crate::model::{ModelRequest, ModelResponse};
472
473    fn dummy_profile(name: &str) -> SubagentProfile {
474        SubagentProfile::new(name, "you are a helper")
475    }
476
477    fn top_level_tool(profiles: Vec<SubagentProfile>, max_depth: usize) -> TaskTool {
478        TaskTool::new(
479            // Provider is only touched inside `delegate`/`run_agent`, which the
480            // unit tests below do not exercise. A panic-on-call provider would
481            // be wrong here because `Arc::new(())` isn't a provider — so we use
482            // a dedicated test provider below where delegation actually runs.
483            test_provider(),
484            Model {
485                id: "test/model".into(),
486            },
487            RunConfig::default(),
488            profiles,
489            SubagentOptions {
490                max_depth,
491                max_delegations: DEFAULT_MAX_DELEGATIONS,
492            },
493            CancellationToken::new(),
494            None,
495            None,
496        )
497    }
498
499    // A minimal recording provider used by tests that exercise delegation.
500    fn test_provider() -> Arc<dyn ModelProvider> {
501        use async_trait::async_trait;
502        struct TestProvider;
503        #[async_trait]
504        impl ModelProvider for TestProvider {
505            async fn invoke(
506                &self,
507                _request: crate::model::ModelRequest,
508            ) -> CoreResult<crate::model::ModelResponse> {
509                // Return a single assistant text message with no tool calls.
510                Ok(crate::model::ModelResponse {
511                    messages: vec![crate::message::AgentMessage {
512                        role: crate::message::Role::Assistant,
513                        content: vec![crate::message::ContentBlock::Text {
514                            text: "child done".into(),
515                        }],
516                    }],
517                })
518            }
519        }
520        Arc::new(TestProvider)
521    }
522
523    #[test]
524    fn definition_lists_declared_subagents() {
525        let profiles = vec![
526            dummy_profile("reviewer").with_description("Review changes."),
527            dummy_profile("classifier").with_description("Classify issues."),
528        ];
529        let tool = top_level_tool(profiles, DEFAULT_MAX_DEPTH);
530        let def = tool.definition();
531        assert_eq!(def.name, "task");
532        assert_eq!(def.label, "Task");
533        assert!(def.description.contains("\"reviewer\""), "missing reviewer");
534        assert!(def.description.contains("Review changes."));
535        assert!(def.description.contains("\"classifier\""));
536        assert!(def.description.contains("Classify issues."));
537    }
538
539    #[test]
540    fn definition_handles_no_subagents() {
541        let tool = top_level_tool(vec![], DEFAULT_MAX_DEPTH);
542        let def = tool.definition();
543        assert!(def.description.contains("(none declared)"));
544    }
545
546    #[test]
547    fn definition_schema_requires_agent_and_prompt() {
548        let tool = top_level_tool(vec![dummy_profile("a")], DEFAULT_MAX_DEPTH);
549        let def = tool.definition();
550        let required = def
551            .parameters
552            .fields
553            .get("required")
554            .and_then(|v| v.as_array())
555            .expect("required array");
556        let names: Vec<&str> = required.iter().filter_map(Value::as_str).collect();
557        assert!(names.contains(&"agent"));
558        assert!(names.contains(&"prompt"));
559    }
560
561    #[tokio::test]
562    async fn unknown_agent_returns_error() {
563        let tool = top_level_tool(vec![dummy_profile("reviewer")], DEFAULT_MAX_DEPTH);
564        let ctx = InvokeContext {
565            tool_call_id: "c1".into(),
566            cancel: CancellationToken::new(),
567        };
568        let err = tool
569            .execute(ctx, serde_json::json!({ "agent": "ghost", "prompt": "hi" }))
570            .await
571            .expect_err("unknown agent should error");
572        let msg = err.to_string();
573        assert!(msg.contains("not declared"), "msg: {msg}");
574        assert!(msg.contains("ghost"));
575        // Helpful: lists the known subagents.
576        assert!(msg.contains("reviewer"));
577    }
578
579    #[tokio::test]
580    async fn depth_exceeded_at_max_zero() {
581        // max_depth = 0 means even the top-level task tool (depth 0) exceeds.
582        let tool = top_level_tool(vec![dummy_profile("a")], 0);
583        let ctx = InvokeContext {
584            tool_call_id: "c2".into(),
585            cancel: CancellationToken::new(),
586        };
587        let err = tool
588            .execute(ctx, serde_json::json!({ "agent": "a", "prompt": "hi" }))
589            .await
590            .expect_err("depth should exceed");
591        let msg = err.to_string();
592        assert!(msg.contains("depth exceeded"), "msg: {msg}");
593        assert!(msg.contains("max_depth 0"));
594    }
595
596    #[tokio::test]
597    async fn budget_exhaustion_blocks_delegation() {
598        // max_delegations = 1 allows ONE delegation; the second `task` call in
599        // the SAME run must be rejected with a budget-exceeded error.
600        let tool = TaskTool::new(
601            test_provider(),
602            Model {
603                id: "test/model".into(),
604            },
605            RunConfig::default(),
606            vec![dummy_profile("worker")],
607            SubagentOptions {
608                max_depth: DEFAULT_MAX_DEPTH,
609                max_delegations: 1,
610            },
611            CancellationToken::new(),
612            None,
613            None,
614        );
615        let ctx1 = InvokeContext {
616            tool_call_id: "b1".into(),
617            cancel: CancellationToken::new(),
618        };
619        // First delegation consumes the single budget slot and succeeds.
620        let r1 = tool
621            .execute(
622                ctx1,
623                serde_json::json!({ "agent": "worker", "prompt": "go" }),
624            )
625            .await
626            .expect("first delegation succeeds");
627        assert_eq!(r1.content.len(), 1);
628
629        // Second delegation in the same tree is rejected.
630        let ctx2 = InvokeContext {
631            tool_call_id: "b2".into(),
632            cancel: CancellationToken::new(),
633        };
634        let err = tool
635            .execute(
636                ctx2,
637                serde_json::json!({ "agent": "worker", "prompt": "again" }),
638            )
639            .await
640            .expect_err("budget should be exhausted");
641        let msg = err.to_string();
642        assert!(msg.contains("budget exhausted"), "msg: {msg}");
643    }
644
645    #[tokio::test]
646    async fn delegate_runs_child_and_returns_text() {
647        let tool = top_level_tool(vec![dummy_profile("worker")], DEFAULT_MAX_DEPTH);
648        let ctx = InvokeContext {
649            tool_call_id: "c3".into(),
650            cancel: CancellationToken::new(),
651        };
652        let result = tool
653            .execute(
654                ctx,
655                serde_json::json!({ "agent": "worker", "prompt": "do it" }),
656            )
657            .await
658            .expect("delegation should succeed");
659        assert_eq!(result.content.len(), 1);
660        let text = result.content[0]
661            .get("text")
662            .and_then(Value::as_str)
663            .expect("text");
664        assert_eq!(text, "child done");
665    }
666
667    #[tokio::test]
668    async fn cancellation_aborts_before_child_spawn() {
669        let tool = top_level_tool(vec![dummy_profile("a")], DEFAULT_MAX_DEPTH);
670        let cancel = CancellationToken::new();
671        let ctx = InvokeContext {
672            tool_call_id: "c4".into(),
673            cancel: cancel.clone(),
674        };
675        cancel.cancel();
676        let err = tool
677            .execute(ctx, serde_json::json!({ "agent": "a", "prompt": "hi" }))
678            .await
679            .expect_err("should be cancelled");
680        assert!(matches!(err, CoreError::Cancelled(_)), "err: {err}");
681    }
682
683    // ── Integration: run_agent-driven delegation ─────────────────────────
684
685    /// A scripted provider: returns a queue of canned responses in order.
686    struct ScriptedProvider {
687        responses: std::sync::Mutex<std::collections::VecDeque<ModelResponse>>,
688    }
689
690    impl ScriptedProvider {
691        fn new(responses: Vec<Vec<AgentMessage>>) -> Self {
692            let responses = responses
693                .into_iter()
694                .map(|msgs| ModelResponse { messages: msgs })
695                .collect();
696            Self {
697                responses: std::sync::Mutex::new(responses),
698            }
699        }
700    }
701
702    #[async_trait]
703    impl ModelProvider for ScriptedProvider {
704        async fn invoke(&self, _request: ModelRequest) -> CoreResult<ModelResponse> {
705            let next = self
706                .responses
707                .lock()
708                .unwrap()
709                .pop_front()
710                .unwrap_or(ModelResponse { messages: vec![] });
711            Ok(next)
712        }
713    }
714
715    fn assistant_text(t: &str) -> AgentMessage {
716        AgentMessage {
717            role: Role::Assistant,
718            content: vec![ContentBlock::Text { text: t.into() }],
719        }
720    }
721
722    /// A parent response that issues a `task` tool call.
723    fn parent_task_call(agent: &str, prompt: &str) -> AgentMessage {
724        AgentMessage {
725            role: Role::Assistant,
726            content: vec![ContentBlock::ToolUse {
727                id: "call_1".into(),
728                call: crate::tool::ToolCall {
729                    name: "task".into(),
730                    input: serde_json::json!({ "agent": agent, "prompt": prompt }),
731                },
732            }],
733        }
734    }
735
736    #[tokio::test]
737    async fn integration_parent_delegates_and_child_answers() {
738        // Parent: first response is a task tool call; after the tool result,
739        // it emits final text.
740        let provider: Arc<dyn ModelProvider> = Arc::new(ScriptedProvider::new(vec![
741            // Parent turn 1: delegate.
742            vec![parent_task_call("worker", "do the work")],
743            // Child turn 1 (fresh session): the child's own response.
744            vec![assistant_text("child done")],
745            // Parent turn 2: summarize the child's answer (returned as the
746            // task tool result).
747            vec![assistant_text("got: child done")],
748        ]));
749        let cancel = CancellationToken::new();
750        let task = Arc::new(TaskTool::new(
751            Arc::clone(&provider),
752            Model {
753                id: "test/m".into(),
754            },
755            RunConfig::default(),
756            vec![dummy_profile("worker")],
757            SubagentOptions::default(),
758            cancel.clone(),
759            None,
760            None,
761        ));
762        let tools: Vec<Arc<dyn Tool>> = vec![task];
763        let mut messages = vec![
764            AgentMessage {
765                role: Role::System,
766                content: vec![ContentBlock::Text {
767                    text: "be brief".into(),
768                }],
769            },
770            AgentMessage {
771                role: Role::User,
772                content: vec![ContentBlock::Text {
773                    text: "delegate the work".into(),
774                }],
775            },
776        ];
777        let outcome = run_agent(
778            provider.as_ref(),
779            &tools,
780            &mut messages,
781            &Model {
782                id: "test/m".into(),
783            },
784            &RunConfig::default(),
785            &cancel,
786            &crate::event::RunHooks::default(),
787        )
788        .await
789        .expect("parent run");
790        assert_eq!(outcome.turns, 2);
791        assert_eq!(outcome.final_text, "got: child done");
792    }
793
794    #[tokio::test]
795    async fn integration_nested_delegation_stops_at_max_depth() {
796        // Two-level profile: parent → child → grandchild. With max_depth = 1,
797        // the grandchild delegation must return a depth-exceeded error result.
798        let grandchild = dummy_profile("grandchild");
799        let child = SubagentProfile::new("child", "you delegate").with_subagent(grandchild);
800
801        // Responses: parent delegates to child; child delegates to grandchild;
802        // child then reports what it got back.
803        let provider: Arc<dyn ModelProvider> = Arc::new(ScriptedProvider::new(vec![
804            // Parent turn 1: delegate to child.
805            vec![parent_task_call("child", "sub-delegate")],
806            // Child turn 1 (fresh session): it tries to delegate to grandchild.
807            vec![AgentMessage {
808                role: Role::Assistant,
809                content: vec![ContentBlock::ToolUse {
810                    id: "cchild".into(),
811                    call: crate::tool::ToolCall {
812                        name: "task".into(),
813                        input: serde_json::json!({
814                            "agent": "grandchild",
815                            "prompt": "too deep"
816                        }),
817                    },
818                }],
819            }],
820            // Child turn 2: summarize the depth-exceeded error it received
821            // (the grandchild task call returned a tool error result).
822            vec![assistant_text("grandchild was unreachable")],
823            // Parent turn 2: summarize the child's report.
824            vec![assistant_text("done")],
825        ]));
826        let cancel = CancellationToken::new();
827        let task = Arc::new(TaskTool::new(
828            Arc::clone(&provider),
829            Model {
830                id: "test/m".into(),
831            },
832            RunConfig::default(),
833            vec![child],
834            // max_depth = 1: only ONE level of delegation allowed.
835            SubagentOptions {
836                max_depth: 1,
837                max_delegations: DEFAULT_MAX_DELEGATIONS,
838            },
839            cancel.clone(),
840            None,
841            None,
842        ));
843        let tools: Vec<Arc<dyn Tool>> = vec![task];
844        let mut messages = vec![AgentMessage {
845            role: Role::User,
846            content: vec![ContentBlock::Text { text: "go".into() }],
847        }];
848        let outcome = run_agent(
849            provider.as_ref(),
850            &tools,
851            &mut messages,
852            &Model {
853                id: "test/m".into(),
854            },
855            &RunConfig::default(),
856            &cancel,
857            &crate::event::RunHooks::default(),
858        )
859        .await
860        .expect("parent run");
861        // The parent ran two turns (delegate + summarize). The run completed
862        // despite the grandchild depth-exceeded error (tool errors are
863        // model-visible, not run-fatal).
864        assert_eq!(outcome.turns, 2);
865    }
866
867    // ── Policy inheritance: a governance gate applies to subagents ───────
868
869    /// A tool that records whether it executed (to prove it did/did not run).
870    struct FlagTool {
871        name: String,
872        ran: Arc<std::sync::atomic::AtomicBool>,
873    }
874
875    #[async_trait]
876    impl Tool for FlagTool {
877        fn definition(&self) -> ToolDefinition {
878            ToolDefinition {
879                name: self.name.clone(),
880                label: self.name.clone(),
881                description: "records execution".into(),
882                parameters: crate::tool::ParameterSchema {
883                    fields: std::collections::BTreeMap::new(),
884                },
885            }
886        }
887
888        async fn execute(&self, _ctx: InvokeContext, _input: Value) -> CoreResult<ToolResult> {
889            self.ran.store(true, Ordering::SeqCst);
890            Ok(ToolResult {
891                content: vec![serde_json::json!({ "type": "text", "text": "ran" })],
892                details: None,
893            })
894        }
895    }
896
897    /// A policy that denies exactly one tool by name; allows everything else.
898    struct DenyByName(String);
899
900    #[async_trait]
901    impl ToolPolicy for DenyByName {
902        async fn check(
903            &self,
904            tool: &str,
905            _input: &Value,
906            _ctx: &InvokeContext,
907        ) -> crate::policy::PolicyVerdict {
908            if tool == self.0 {
909                crate::policy::PolicyVerdict::Deny("blocked by test policy".into())
910            } else {
911                crate::policy::PolicyVerdict::Allow
912            }
913        }
914    }
915
916    #[tokio::test]
917    async fn policy_applies_to_delegated_subagent() {
918        // A policy set on the TaskTool must govern the CHILD run too: a
919        // subagent must not be able to bypass the gate. The child owns a
920        // "danger" tool; the policy denies it. We assert the tool never
921        // executed, proving the policy was inherited by the delegated run.
922        let ran = Arc::new(std::sync::atomic::AtomicBool::new(false));
923        let danger = Arc::new(FlagTool {
924            name: "danger".into(),
925            ran: Arc::clone(&ran),
926        });
927        let worker = SubagentProfile::new("worker", "you do work").with_tool(danger);
928
929        let provider: Arc<dyn ModelProvider> = Arc::new(ScriptedProvider::new(vec![
930            // Parent turn 1: delegate to the worker.
931            vec![parent_task_call("worker", "use the danger tool")],
932            // Child turn 1 (fresh session): call the denied tool.
933            vec![AgentMessage {
934                role: Role::Assistant,
935                content: vec![ContentBlock::ToolUse {
936                    id: "cdanger".into(),
937                    call: crate::tool::ToolCall {
938                        name: "danger".into(),
939                        input: serde_json::json!({}),
940                    },
941                }],
942            }],
943            // Child turn 2: recover from the denial and report.
944            vec![assistant_text("could not run danger")],
945            // Parent turn 2: summarize.
946            vec![assistant_text("done")],
947        ]));
948
949        let cancel = CancellationToken::new();
950        let policy: Arc<dyn ToolPolicy> = Arc::new(DenyByName("danger".into()));
951        let task = Arc::new(TaskTool::new(
952            Arc::clone(&provider),
953            Model {
954                id: "test/m".into(),
955            },
956            RunConfig::default(),
957            vec![worker],
958            SubagentOptions::default(),
959            cancel.clone(),
960            None,
961            Some(Arc::clone(&policy)),
962        ));
963        let tools: Vec<Arc<dyn Tool>> = vec![task];
964        let mut messages = vec![AgentMessage {
965            role: Role::User,
966            content: vec![ContentBlock::Text {
967                text: "delegate".into(),
968            }],
969        }];
970        // The parent run itself carries the same policy (borrowed).
971        let hooks = crate::event::RunHooks {
972            policy: Some(policy.as_ref()),
973            ..crate::event::RunHooks::default()
974        };
975        let outcome = run_agent(
976            provider.as_ref(),
977            &tools,
978            &mut messages,
979            &Model {
980                id: "test/m".into(),
981            },
982            &RunConfig::default(),
983            &cancel,
984            &hooks,
985        )
986        .await
987        .expect("parent run completes");
988
989        assert_eq!(outcome.final_text, "done");
990        assert!(
991            !ran.load(Ordering::SeqCst),
992            "policy must block the subagent's tool — it was inherited, not bypassed"
993        );
994    }
995}