heartbit_core/agent/
orchestrator.rs

1//! Multi-agent orchestrator for parallel and sequential sub-agent delegation.
2
3use std::future::Future;
4use std::pin::Pin;
5use std::sync::{Arc, Mutex};
6use std::time::Duration;
7
8use serde::{Deserialize, Serialize};
9use serde_json::json;
10use tracing::{info, info_span};
11
12use crate::error::Error;
13use crate::llm::types::{TokenUsage, ToolDefinition};
14use crate::llm::{BoxedProvider, LlmProvider};
15use crate::tool::{Tool, ToolOutput};
16use crate::types::DispatchMode;
17
18use crate::memory::Memory;
19
20use crate::knowledge::KnowledgeBase;
21
22use crate::tool::builtins::OnQuestion;
23
24use super::blackboard::{Blackboard, InMemoryBlackboard};
25use super::blackboard_tools::blackboard_tools;
26use super::context::ContextStrategy;
27use super::events::{AgentEvent, OnEvent};
28use super::guardrail::Guardrail;
29use super::{AgentOutput, AgentRunner};
30
31/// A sub-agent definition registered with the orchestrator.
32#[derive(Clone)]
33pub(crate) struct SubAgentDef {
34    pub(crate) name: String,
35    pub(crate) description: String,
36    pub(crate) system_prompt: String,
37    pub(crate) tools: Vec<Arc<dyn Tool>>,
38    pub(crate) context_strategy: Option<ContextStrategy>,
39    pub(crate) summarize_threshold: Option<u32>,
40    pub(crate) tool_timeout: Option<Duration>,
41    pub(crate) max_tool_output_bytes: Option<usize>,
42    /// Per-agent turn limit. When `None`, uses orchestrator default.
43    pub(crate) max_turns: Option<usize>,
44    /// Per-agent token limit. When `None`, uses orchestrator default.
45    pub(crate) max_tokens: Option<u32>,
46    /// Optional JSON Schema for structured output.
47    pub(crate) response_schema: Option<serde_json::Value>,
48    /// Guardrails applied to this sub-agent's LLM calls and tool executions.
49    pub(crate) guardrails: Vec<Arc<dyn Guardrail>>,
50    /// Optional per-agent run timeout. When `None`, no timeout is applied.
51    pub(crate) run_timeout: Option<Duration>,
52    /// Optional per-agent LLM provider override. When `None`, the orchestrator's
53    /// shared provider is used.
54    pub(crate) provider_override: Option<Arc<BoxedProvider>>,
55    /// Optional reasoning/thinking effort level for this sub-agent.
56    pub(crate) reasoning_effort: Option<crate::llm::types::ReasoningEffort>,
57    /// Enable reflection prompts after tool results for this sub-agent.
58    pub(crate) enable_reflection: Option<bool>,
59    /// Tool output compression threshold in bytes for this sub-agent.
60    pub(crate) tool_output_compression_threshold: Option<usize>,
61    /// Maximum tools per turn for this sub-agent.
62    pub(crate) max_tools_per_turn: Option<usize>,
63    /// Tool profile for pre-filtering tool definitions.
64    pub(crate) tool_profile: Option<super::tool_filter::ToolProfile>,
65    /// Maximum consecutive identical tool-call turns for doom loop detection.
66    pub(crate) max_identical_tool_calls: Option<u32>,
67    /// Maximum consecutive fuzzy-identical tool-call turns for doom loop detection.
68    pub(crate) max_fuzzy_identical_tool_calls: Option<u32>,
69    /// Maximum number of tool calls allowed in a single LLM turn (per-turn cap).
70    pub(crate) max_tool_calls_per_turn: Option<u32>,
71    /// Session pruning configuration.
72    pub(crate) session_prune_config: Option<crate::agent::pruner::SessionPruneConfig>,
73    /// Enable recursive summarization.
74    pub(crate) enable_recursive_summarization: Option<bool>,
75    /// Memory reflection threshold.
76    pub(crate) reflection_threshold: Option<u32>,
77    /// Run memory consolidation at session end.
78    pub(crate) consolidate_on_exit: Option<bool>,
79    /// Optional workspace root for this sub-agent.
80    pub(crate) workspace: Option<std::path::PathBuf>,
81    /// Hard limit on cumulative tokens (input + output) across all turns.
82    pub(crate) max_total_tokens: Option<u64>,
83    /// Optional audit trail for recording untruncated agent decisions.
84    pub(crate) audit_trail: Option<Arc<dyn super::audit::AuditTrail>>,
85    /// Optional user ID for multi-tenant audit enrichment.
86    pub(crate) audit_user_id: Option<String>,
87    /// Optional tenant ID for multi-tenant audit enrichment.
88    pub(crate) audit_tenant_id: Option<String>,
89    /// Delegation chain for audit records (propagated from orchestrator).
90    pub(crate) audit_delegation_chain: Vec<String>,
91}
92
93impl std::fmt::Debug for SubAgentDef {
94    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
95        f.debug_struct("SubAgentDef")
96            .field("name", &self.name)
97            .field("description", &self.description)
98            .field("tools_count", &self.tools.len())
99            .finish()
100    }
101}
102
103impl SubAgentDef {
104    /// Create a new sub-agent definition with required fields. All optional
105    /// fields default to `None`, empty vecs, or false.
106    pub(crate) fn new(
107        name: impl Into<String>,
108        description: impl Into<String>,
109        system_prompt: impl Into<String>,
110    ) -> Self {
111        Self {
112            name: name.into(),
113            description: description.into(),
114            system_prompt: system_prompt.into(),
115            tools: vec![],
116            context_strategy: None,
117            summarize_threshold: None,
118            tool_timeout: None,
119            max_tool_output_bytes: None,
120            max_turns: None,
121            max_tokens: None,
122            response_schema: None,
123            run_timeout: None,
124            guardrails: vec![],
125            provider_override: None,
126            reasoning_effort: None,
127            enable_reflection: None,
128            tool_output_compression_threshold: None,
129            max_tools_per_turn: None,
130            tool_profile: None,
131            max_identical_tool_calls: None,
132            max_fuzzy_identical_tool_calls: None,
133            max_tool_calls_per_turn: None,
134            session_prune_config: None,
135            enable_recursive_summarization: None,
136            reflection_threshold: None,
137            consolidate_on_exit: None,
138            workspace: None,
139            max_total_tokens: None,
140            audit_trail: None,
141            audit_user_id: None,
142            audit_tenant_id: None,
143            audit_delegation_chain: Vec::new(),
144        }
145    }
146}
147
148impl From<SubAgentConfig> for SubAgentDef {
149    fn from(def: SubAgentConfig) -> Self {
150        Self {
151            name: def.name,
152            description: def.description,
153            system_prompt: def.system_prompt,
154            tools: def.tools,
155            context_strategy: def.context_strategy,
156            summarize_threshold: def.summarize_threshold,
157            tool_timeout: def.tool_timeout,
158            max_tool_output_bytes: def.max_tool_output_bytes,
159            max_turns: def.max_turns,
160            max_tokens: def.max_tokens,
161            response_schema: def.response_schema,
162            run_timeout: def.run_timeout,
163            guardrails: def.guardrails,
164            provider_override: def.provider,
165            reasoning_effort: def.reasoning_effort,
166            enable_reflection: def.enable_reflection,
167            tool_output_compression_threshold: def.tool_output_compression_threshold,
168            max_tools_per_turn: def.max_tools_per_turn,
169            tool_profile: def.tool_profile,
170            max_identical_tool_calls: def.max_identical_tool_calls,
171            max_fuzzy_identical_tool_calls: def.max_fuzzy_identical_tool_calls,
172            max_tool_calls_per_turn: def.max_tool_calls_per_turn,
173            session_prune_config: def.session_prune_config,
174            enable_recursive_summarization: def.enable_recursive_summarization,
175            reflection_threshold: def.reflection_threshold,
176            consolidate_on_exit: def.consolidate_on_exit,
177            workspace: def.workspace,
178            max_total_tokens: def.max_total_tokens,
179            audit_trail: def.audit_trail,
180            audit_user_id: def.audit_user_id,
181            audit_tenant_id: def.audit_tenant_id,
182            audit_delegation_chain: def.audit_delegation_chain,
183        }
184    }
185}
186
187/// A task delegated by the orchestrator to a sub-agent.
188#[derive(Debug, Clone, Serialize, Deserialize)]
189pub(crate) struct DelegatedTask {
190    pub(crate) agent: String,
191    pub(crate) task: String,
192}
193
194/// Result from a sub-agent execution.
195#[derive(Debug, Clone)]
196pub(crate) struct SubAgentResult {
197    pub(crate) agent: String,
198    pub(crate) result: String,
199    pub(crate) tokens_used: TokenUsage,
200    pub(crate) success: bool,
201}
202
203/// Multi-agent orchestrator.
204///
205/// Refactored to use `AgentRunner` internally with a `DelegateTaskTool`.
206/// No duplicated agent loop — the orchestrator IS an AgentRunner.
207///
208/// The `DelegateTaskTool` accumulates sub-agent token usage in a shared
209/// `Arc<Mutex<TokenUsage>>`. Each `run()` call resets the accumulator
210/// before starting, so sequential calls are safe. For concurrent use,
211/// create separate `Orchestrator` instances.
212pub struct Orchestrator<P: LlmProvider> {
213    runner: AgentRunner<P>,
214    /// Shared accumulator for sub-agent token usage (populated by DelegateTaskTool).
215    sub_agent_tokens: Arc<Mutex<TokenUsage>>,
216}
217
218impl<P: LlmProvider + 'static> Orchestrator<P> {
219    /// Create a new [`OrchestratorBuilder`] with the given LLM provider.
220    pub fn builder(provider: Arc<P>) -> OrchestratorBuilder<P> {
221        OrchestratorBuilder {
222            provider,
223            sub_agents: vec![],
224            max_turns: 10,
225            max_tokens: 4096,
226            context_strategy: None,
227            summarize_threshold: None,
228            tool_timeout: None,
229            max_tool_output_bytes: None,
230            shared_memory: None,
231            memory_namespace_prefix: None,
232            blackboard: None,
233            knowledge_base: None,
234            on_text: None,
235            on_approval: None,
236            on_event: None,
237            guardrails: Vec::new(),
238            on_question: None,
239            run_timeout: None,
240            enable_squads: None,
241            reasoning_effort: None,
242            enable_reflection: false,
243            tool_output_compression_threshold: None,
244            max_tools_per_turn: None,
245            max_identical_tool_calls: None,
246            max_fuzzy_identical_tool_calls: None,
247            max_tool_calls_per_turn: None,
248            permission_rules: super::permission::PermissionRuleset::default(),
249            instruction_text: None,
250            learned_permissions: None,
251            lsp_manager: None,
252            observability_mode: None,
253            dispatch_mode: DispatchMode::Parallel,
254            workspace: None,
255            audit_trail: None,
256            audit_user_id: None,
257            audit_tenant_id: None,
258            audit_delegation_chain: Vec::new(),
259            allow_shared_write: true,
260            multi_agent_prompt: true,
261            spawn_config: None,
262            spawn_builtin_tools: Vec::new(),
263            tenant_tracker: None,
264        }
265    }
266
267    /// Run the orchestrator with a task. Returns the combined output from
268    /// the orchestrator and all sub-agents.
269    ///
270    /// # Concurrent use
271    ///
272    /// This method takes `&mut self` to prevent concurrent calls on the same
273    /// instance at compile time. The sub-agent token accumulator is reset at
274    /// the start of each call, so concurrent runs would produce incorrect
275    /// token counts. For concurrent use, create separate `Orchestrator`
276    /// instances.
277    pub async fn run(&mut self, task: &str) -> Result<AgentOutput, Error> {
278        // Reset sub-agent token accumulator so repeated calls don't inflate counts
279        {
280            let mut acc = self.sub_agent_tokens.lock().expect("token lock poisoned");
281            *acc = TokenUsage::default();
282        }
283        match self.runner.execute(task).await {
284            Ok(mut output) => {
285                // Add sub-agent tokens that were accumulated during delegation
286                let sub_tokens = *self.sub_agent_tokens.lock().expect("token lock poisoned");
287                output.tokens_used += sub_tokens;
288                Ok(output)
289            }
290            Err(e) => {
291                // Include sub-agent tokens in the error's partial usage so callers
292                // see the full token cost even when the orchestrator itself fails.
293                let sub_tokens = *self.sub_agent_tokens.lock().expect("token lock poisoned");
294                let mut usage = e.partial_usage();
295                usage += sub_tokens;
296                Err(e.with_partial_usage(usage))
297            }
298        }
299    }
300}
301
302/// The orchestrator's primary tool: delegates tasks to sub-agents in parallel.
303///
304/// Implements `Tool` so it can be registered with `AgentRunner`.
305/// Unknown agent names return an error result to the LLM instead of crashing.
306///
307/// Sub-agents always use `AgentRunner<BoxedProvider>` for type erasure. Each
308/// sub-agent uses its `provider_override` if set, falling back to `shared_provider`.
309struct DelegateTaskTool {
310    shared_provider: Arc<BoxedProvider>,
311    sub_agents: Vec<SubAgentDef>,
312    max_turns: usize,
313    max_tokens: u32,
314    /// Permission rules inherited from the orchestrator, forwarded to sub-agents.
315    permission_rules: super::permission::PermissionRuleset,
316    /// Shared accumulator for sub-agent token usage, read by Orchestrator::run.
317    accumulated_tokens: Arc<Mutex<TokenUsage>>,
318    /// Shared memory store for cross-agent memory (None if not configured).
319    shared_memory: Option<Arc<dyn Memory>>,
320    /// Optional prefix prepended to agent names when namespacing memory
321    /// (e.g. `"tg:123"` → namespace becomes `"tg:123:assistant"` instead of `"assistant"`).
322    memory_namespace_prefix: Option<String>,
323    /// Shared blackboard for cross-agent coordination (None if not configured).
324    blackboard: Option<Arc<dyn Blackboard>>,
325    /// Shared knowledge base for document retrieval (None if not configured).
326    knowledge_base: Option<Arc<dyn KnowledgeBase>>,
327    /// Cached tool definition, computed at construction time to avoid calling
328    /// `Tool::definition()` on every sub-agent tool every LLM turn.
329    cached_definition: ToolDefinition,
330    /// Optional event callback for sub-agent dispatch/completion events.
331    on_event: Option<Arc<OnEvent>>,
332    /// Optional streaming text callback, forwarded to sub-agents.
333    on_text: Option<Arc<crate::llm::OnText>>,
334    /// Optional LSP manager, forwarded to sub-agents.
335    lsp_manager: Option<Arc<crate::lsp::LspManager>>,
336    /// Observability mode inherited from the orchestrator, forwarded to sub-agents.
337    observability_mode: super::observability::ObservabilityMode,
338    /// Whether sub-agents may write to shared institutional memory.
339    allow_shared_write: bool,
340    /// Optional per-tenant token tracker propagated to all sub-agent runners.
341    tenant_tracker: Option<Arc<crate::agent::tenant_tracker::TenantTokenTracker>>,
342    /// Orchestrator-level guardrails propagated to delegated sub-agents.
343    /// SECURITY (F-AGENT-2): without this, sub-agents tournaient sans les
344    /// défenses (PII, secret scanner, LLM judge) que l'opérateur avait
345    /// configurées au niveau orchestrator.
346    guardrails: Vec<Arc<dyn Guardrail>>,
347}
348
349impl DelegateTaskTool {
350    async fn delegate(&self, tasks: Vec<DelegatedTask>) -> Result<String, Error> {
351        if tasks.is_empty() {
352            return Err(Error::Agent(
353                "delegate_task requires at least one task".into(),
354            ));
355        }
356        let task_count = tasks.len();
357        let agent_names: Vec<String> = tasks.iter().map(|t| t.agent.clone()).collect();
358        let _delegate_span = info_span!(
359            "heartbit.orchestrator.delegate",
360            agent_count = task_count,
361            agents = ?agent_names,
362        );
363
364        if let Some(ref cb) = self.on_event {
365            cb(AgentEvent::SubAgentsDispatched {
366                agent: "orchestrator".into(),
367                agents: agent_names.clone(),
368            });
369        }
370
371        let mut join_set = tokio::task::JoinSet::new();
372
373        for (idx, task) in tasks.into_iter().enumerate() {
374            let agent_def = match self.sub_agents.iter().find(|a| a.name == task.agent) {
375                Some(def) => def.clone(),
376                None => {
377                    // Unknown agent: we'll collect this as an error in the results
378                    let agent_name = task.agent.clone();
379                    join_set.spawn(async move {
380                        (
381                            idx,
382                            SubAgentResult {
383                                agent: agent_name.clone(),
384                                result: format!("Error: unknown agent '{agent_name}'"),
385                                tokens_used: TokenUsage::default(),
386                                success: false,
387                            },
388                        )
389                    });
390                    continue;
391                }
392            };
393
394            let provider = agent_def
395                .provider_override
396                .clone()
397                .unwrap_or_else(|| self.shared_provider.clone());
398            let max_turns = agent_def.max_turns.unwrap_or(self.max_turns);
399            let max_tokens = agent_def.max_tokens.unwrap_or(self.max_tokens);
400            let shared_memory = self.shared_memory.clone();
401            let ns_prefix = self.memory_namespace_prefix.clone();
402            let blackboard = self.blackboard.clone();
403            let knowledge_base = self.knowledge_base.clone();
404            let on_event = self.on_event.clone();
405            let on_text = self.on_text.clone();
406            let lsp_manager = self.lsp_manager.clone();
407            let permission_rules = self.permission_rules.clone();
408            let observability_mode = self.observability_mode;
409            let allow_shared_write = self.allow_shared_write;
410            let tenant_tracker = self.tenant_tracker.clone();
411            // SECURITY (F-AGENT-2): propagate orchestrator guardrails to delegated
412            // sub-agents. Without this, an opérator who hardens the orchestrator
413            // (PII, secret scanner, LLM judge) sees their defenses silently drop
414            // the moment work is delegated. SpawnAgentTool already does this; the
415            // delegate path used to skip it.
416            let orchestrator_guardrails = self.guardrails.clone();
417
418            info!(agent = %agent_def.name, task = %task.task, "spawning sub-agent");
419
420            join_set.spawn(async move {
421                let mut builder = AgentRunner::builder(provider)
422                    .name(&agent_def.name)
423                    .system_prompt(&agent_def.system_prompt)
424                    .tools(agent_def.tools)
425                    .max_turns(max_turns)
426                    .max_tokens(max_tokens);
427
428                if let Some(strategy) = agent_def.context_strategy {
429                    builder = builder.context_strategy(strategy);
430                }
431                if let Some(threshold) = agent_def.summarize_threshold {
432                    builder = builder.summarize_threshold(threshold);
433                }
434                if let Some(timeout) = agent_def.tool_timeout {
435                    builder = builder.tool_timeout(timeout);
436                }
437                if let Some(max) = agent_def.max_tool_output_bytes {
438                    builder = builder.max_tool_output_bytes(max);
439                }
440                if let Some(schema) = agent_def.response_schema {
441                    builder = builder.structured_schema(schema);
442                }
443                // SECURITY (F-AGENT-2): combine orchestrator guardrails with the
444                // sub-agent's own. Both lists are appended (orchestrator first so
445                // global denies fire before agent-local ones). Either may be empty.
446                let mut combined_guardrails = orchestrator_guardrails;
447                combined_guardrails.extend(agent_def.guardrails);
448                if !combined_guardrails.is_empty() {
449                    builder = builder.guardrails(combined_guardrails);
450                }
451                if let Some(timeout) = agent_def.run_timeout {
452                    builder = builder.run_timeout(timeout);
453                }
454                if let Some(effort) = agent_def.reasoning_effort {
455                    builder = builder.reasoning_effort(effort);
456                }
457                if let Some(true) = agent_def.enable_reflection {
458                    builder = builder.enable_reflection(true);
459                }
460                if let Some(threshold) = agent_def.tool_output_compression_threshold {
461                    builder = builder.tool_output_compression_threshold(threshold);
462                }
463                if let Some(max) = agent_def.max_tools_per_turn {
464                    builder = builder.max_tools_per_turn(max);
465                }
466                if let Some(profile) = agent_def.tool_profile {
467                    builder = builder.tool_profile(profile);
468                }
469                if let Some(max) = agent_def.max_identical_tool_calls {
470                    builder = builder.max_identical_tool_calls(max);
471                }
472                if let Some(max) = agent_def.max_fuzzy_identical_tool_calls {
473                    builder = builder.max_fuzzy_identical_tool_calls(max);
474                }
475                if let Some(cap) = agent_def.max_tool_calls_per_turn {
476                    builder = builder.max_tool_calls_per_turn(cap);
477                }
478                if let Some(ref config) = agent_def.session_prune_config {
479                    builder = builder.session_prune_config(config.clone());
480                }
481                if let Some(true) = agent_def.enable_recursive_summarization {
482                    builder = builder.enable_recursive_summarization(true);
483                }
484                if let Some(threshold) = agent_def.reflection_threshold {
485                    builder = builder.reflection_threshold(threshold);
486                }
487                if let Some(true) = agent_def.consolidate_on_exit {
488                    builder = builder.consolidate_on_exit(true);
489                }
490                if let Some(ref ws) = agent_def.workspace {
491                    builder = builder.workspace(ws.clone());
492                }
493                if let Some(max) = agent_def.max_total_tokens {
494                    builder = builder.max_total_tokens(max);
495                }
496                if let Some(trail) = agent_def.audit_trail {
497                    builder = builder.audit_trail(trail);
498                }
499                if let Some(uid) = &agent_def.audit_user_id
500                    && let Some(tid) = &agent_def.audit_tenant_id
501                {
502                    builder = builder.audit_user_context(uid.clone(), tid.clone());
503                }
504                if !agent_def.audit_delegation_chain.is_empty() {
505                    builder =
506                        builder.audit_delegation_chain(agent_def.audit_delegation_chain.clone());
507                }
508
509                // Forward permission rules from orchestrator to sub-agents
510                if !permission_rules.is_empty() {
511                    builder = builder.permission_rules(permission_rules);
512                }
513
514                // Forward observability mode from orchestrator to sub-agents
515                builder = builder.observability_mode(observability_mode);
516
517                // Forward per-tenant token tracker so sub-agent usage counts toward
518                // the same per-tenant cap as the orchestrator.
519                if let Some(ref tracker) = tenant_tracker {
520                    builder = builder.tenant_tracker(tracker.clone());
521                }
522
523                // Forward LSP manager to sub-agents
524                if let Some(ref lsp) = lsp_manager {
525                    builder = builder.lsp_manager(lsp.clone());
526                }
527
528                // Forward on_event so sub-agent events are visible
529                if let Some(ref on_event) = on_event {
530                    builder = builder.on_event(on_event.clone());
531                }
532                // Forward on_text so sub-agent streaming text is visible
533                if let Some(ref on_text) = on_text {
534                    builder = builder.on_text(on_text.clone());
535                }
536
537                // Add memory tools if shared memory is configured
538                if let Some(ref memory) = shared_memory {
539                    let agent_ns = match &ns_prefix {
540                        Some(prefix) => format!("{prefix}:{}", agent_def.name),
541                        None => agent_def.name.clone(),
542                    };
543                    let ns = Arc::new(crate::memory::namespaced::NamespacedMemory::new(
544                        memory.clone(),
545                        &agent_ns,
546                    ));
547                    builder = builder.memory(ns);
548                    let mem_scope = crate::auth::TenantScope::from_audit_fields(
549                        agent_def.audit_tenant_id.as_deref(),
550                        agent_def.audit_user_id.as_deref(),
551                    );
552                    builder = builder.tools(crate::memory::shared_tools::shared_memory_tools(
553                        memory.clone(),
554                        &agent_ns,
555                        mem_scope,
556                        allow_shared_write,
557                    ));
558                }
559
560                // Add blackboard tools if blackboard is configured.
561                // SECURITY (F-AGENT-7): pass the sub-agent name so writes are
562                // caller-namespaced (`caller:{name}/...`) and the reserved
563                // `agent:` prefix is denied to sub-agents.
564                if let Some(ref bb) = blackboard {
565                    builder = builder.tools(blackboard_tools(bb.clone(), &agent_def.name));
566                }
567
568                // Add knowledge tools if knowledge base is configured
569                if let Some(ref kb) = knowledge_base {
570                    builder = builder.knowledge(kb.clone());
571                }
572
573                let runner = match builder.build() {
574                    Ok(r) => r,
575                    Err(e) => {
576                        return (
577                            idx,
578                            SubAgentResult {
579                                agent: agent_def.name,
580                                result: format!("Error building agent: {e}"),
581                                tokens_used: TokenUsage::default(),
582                                success: false,
583                            },
584                        );
585                    }
586                };
587
588                let result = match runner.execute(&task.task).await {
589                    Ok(output) => {
590                        // Write successful result to blackboard (matching Restate path)
591                        if let Some(ref bb) = blackboard {
592                            let key = format!("agent:{}", agent_def.name);
593                            if let Err(e) = bb
594                                .write(&key, serde_json::Value::String(output.result.clone()))
595                                .await
596                            {
597                                tracing::warn!(
598                                    agent = %agent_def.name,
599                                    error = %e,
600                                    "failed to write result to blackboard"
601                                );
602                            }
603                        }
604                        SubAgentResult {
605                            agent: agent_def.name,
606                            result: output.result,
607                            tokens_used: output.tokens_used,
608                            success: true,
609                        }
610                    }
611                    Err(e) => SubAgentResult {
612                        agent: agent_def.name,
613                        result: format!("Error: {e}"),
614                        tokens_used: e.partial_usage(),
615                        success: false,
616                    },
617                };
618
619                (idx, result)
620            });
621        }
622
623        let mut results: Vec<Option<(usize, SubAgentResult)>> = vec![None; task_count];
624        while let Some(result) = join_set.join_next().await {
625            match result {
626                Ok((idx, sub_result)) => {
627                    results[idx] = Some((idx, sub_result));
628                }
629                Err(e) => {
630                    tracing::error!(error = %e, "sub-agent task panicked");
631                }
632            }
633        }
634
635        // Fill gaps (panicked tasks) with error results, then sort by index
636        let mut results: Vec<(usize, SubAgentResult)> = results
637            .into_iter()
638            .enumerate()
639            .map(|(idx, r)| {
640                r.unwrap_or_else(|| {
641                    (
642                        idx,
643                        SubAgentResult {
644                            agent: agent_names[idx].clone(),
645                            result: "Error: sub-agent task panicked".into(),
646                            tokens_used: TokenUsage::default(),
647                            success: false,
648                        },
649                    )
650                })
651            })
652            .collect();
653        results.sort_by_key(|(idx, _)| *idx);
654
655        // Accumulate sub-agent tokens (lock scope kept minimal — no callbacks inside)
656        {
657            let mut acc = self.accumulated_tokens.lock().expect("token lock poisoned");
658            for (_, r) in &results {
659                *acc += r.tokens_used;
660            }
661        }
662
663        // Emit completion events outside the lock
664        if let Some(ref cb) = self.on_event {
665            for (_, r) in &results {
666                cb(AgentEvent::SubAgentCompleted {
667                    agent: r.agent.clone(),
668                    success: r.success,
669                    usage: r.tokens_used,
670                });
671            }
672        }
673
674        let formatted = results
675            .iter()
676            .map(|(_, r)| format!("=== Agent: {} ===\n{}", r.agent, r.result))
677            .collect::<Vec<_>>()
678            .join("\n\n");
679
680        Ok(formatted)
681    }
682}
683
684impl Tool for DelegateTaskTool {
685    fn definition(&self) -> ToolDefinition {
686        self.cached_definition.clone()
687    }
688
689    fn execute(
690        &self,
691        _ctx: &crate::ExecutionContext,
692        input: serde_json::Value,
693    ) -> Pin<Box<dyn Future<Output = Result<ToolOutput, Error>> + Send + '_>> {
694        Box::pin(async move {
695            let delegate_input: DelegateInput = serde_json::from_value(input)
696                .map_err(|e| Error::Agent(format!("Invalid delegate_task input: {e}")))?;
697
698            let result = self.delegate(delegate_input.tasks).await?;
699            Ok(ToolOutput::success(result))
700        })
701    }
702}
703
704#[derive(Deserialize)]
705struct DelegateInput {
706    tasks: Vec<DelegatedTask>,
707}
708
709/// The orchestrator's squad-formation tool: dispatches per-agent tasks with a
710/// shared private blackboard for intra-squad coordination.
711///
712/// Unlike `DelegateTaskTool` which runs agents independently with the outer blackboard,
713/// `FormSquadTool` creates a private `InMemoryBlackboard` so squad members can read
714/// each other's intermediate results without polluting the outer blackboard. The final
715/// formatted result is written to the outer blackboard under `"squad:{names}"`.
716///
717/// Accepts the same `{tasks:[{agent, task}]}` format as `delegate_task`.
718struct FormSquadTool {
719    shared_provider: Arc<BoxedProvider>,
720    agent_pool: Vec<SubAgentDef>,
721    default_max_turns: usize,
722    default_max_tokens: u32,
723    /// Permission rules inherited from the orchestrator, forwarded to squad members.
724    permission_rules: super::permission::PermissionRuleset,
725    accumulated_tokens: Arc<Mutex<TokenUsage>>,
726    shared_memory: Option<Arc<dyn Memory>>,
727    memory_namespace_prefix: Option<String>,
728    /// Outer blackboard for writing squad results. Squad members use a private one.
729    blackboard: Option<Arc<dyn Blackboard>>,
730    knowledge_base: Option<Arc<dyn KnowledgeBase>>,
731    on_event: Option<Arc<OnEvent>>,
732    /// Optional streaming text callback, forwarded to squad members.
733    on_text: Option<Arc<crate::llm::OnText>>,
734    /// Optional LSP manager, forwarded to squad members.
735    lsp_manager: Option<Arc<crate::lsp::LspManager>>,
736    cached_definition: ToolDefinition,
737    /// Observability mode inherited from the orchestrator, forwarded to squad members.
738    observability_mode: super::observability::ObservabilityMode,
739    /// Whether squad members may write to shared institutional memory.
740    allow_shared_write: bool,
741    /// Optional per-tenant token tracker propagated to all squad member runners.
742    tenant_tracker: Option<Arc<crate::agent::tenant_tracker::TenantTokenTracker>>,
743}
744
745impl Tool for FormSquadTool {
746    fn definition(&self) -> ToolDefinition {
747        self.cached_definition.clone()
748    }
749
750    fn execute(
751        &self,
752        _ctx: &crate::ExecutionContext,
753        input: serde_json::Value,
754    ) -> Pin<Box<dyn Future<Output = Result<ToolOutput, Error>> + Send + '_>> {
755        Box::pin(async move {
756            let delegate_input: DelegateInput = serde_json::from_value(input)
757                .map_err(|e| Error::Agent(format!("Invalid form_squad input: {e}")))?;
758
759            let tasks = delegate_input.tasks;
760
761            // Validate: at least 2 tasks (agents)
762            if tasks.len() < 2 {
763                return Ok(ToolOutput::error(
764                    "form_squad requires at least 2 tasks. Use delegate_task for single-agent tasks."
765                        .to_string(),
766                ));
767            }
768
769            // Validate: no duplicate agent names
770            {
771                let mut seen = std::collections::HashSet::new();
772                for t in &tasks {
773                    if !seen.insert(&t.agent) {
774                        return Ok(ToolOutput::error(format!(
775                            "Duplicate agent name in squad: '{}'",
776                            t.agent
777                        )));
778                    }
779                }
780            }
781
782            // Validate all agents exist before spawning any
783            for t in &tasks {
784                if !self.agent_pool.iter().any(|a| a.name == t.agent) {
785                    return Ok(ToolOutput::error(format!(
786                        "Unknown agent '{}'. Available agents: {}",
787                        t.agent,
788                        self.agent_pool
789                            .iter()
790                            .map(|a| a.name.as_str())
791                            .collect::<Vec<_>>()
792                            .join(", ")
793                    )));
794                }
795            }
796
797            let task_count = tasks.len();
798            let agent_names: Vec<String> = tasks.iter().map(|t| t.agent.clone()).collect();
799
800            // Create private blackboard for intra-squad coordination
801            let private_bb: Arc<dyn Blackboard> = Arc::new(InMemoryBlackboard::new());
802
803            let _squad_span = info_span!(
804                "heartbit.orchestrator.squad",
805                agent_count = task_count,
806                agents = ?agent_names,
807            );
808
809            if let Some(ref cb) = self.on_event {
810                cb(AgentEvent::SubAgentsDispatched {
811                    agent: "squad-leader".into(),
812                    agents: agent_names.clone(),
813                });
814            }
815
816            // Direct dispatch via JoinSet — no mini-orchestrator
817            let mut join_set = tokio::task::JoinSet::new();
818
819            for (idx, task) in tasks.into_iter().enumerate() {
820                // Agent existence already validated above, but avoid panic in library code
821                let agent_def = match self.agent_pool.iter().find(|a| a.name == task.agent) {
822                    Some(def) => def.clone(),
823                    None => {
824                        return Ok(ToolOutput::error(format!(
825                            "Internal error: agent '{}' not found after validation",
826                            task.agent
827                        )));
828                    }
829                };
830
831                let provider = agent_def
832                    .provider_override
833                    .clone()
834                    .unwrap_or_else(|| self.shared_provider.clone());
835                let max_turns = agent_def.max_turns.unwrap_or(self.default_max_turns);
836                let max_tokens = agent_def.max_tokens.unwrap_or(self.default_max_tokens);
837                let shared_memory = self.shared_memory.clone();
838                let ns_prefix = self.memory_namespace_prefix.clone();
839                let bb = private_bb.clone();
840                let knowledge_base = self.knowledge_base.clone();
841                let on_event = self.on_event.clone();
842                let on_text = self.on_text.clone();
843                let lsp_manager = self.lsp_manager.clone();
844                let permission_rules = self.permission_rules.clone();
845                let observability_mode = self.observability_mode;
846                let allow_shared_write = self.allow_shared_write;
847                let tenant_tracker = self.tenant_tracker.clone();
848
849                info!(agent = %agent_def.name, task = %task.task, "spawning squad member");
850
851                join_set.spawn(async move {
852                    let mut builder = AgentRunner::builder(provider)
853                        .name(&agent_def.name)
854                        .system_prompt(&agent_def.system_prompt)
855                        .tools(agent_def.tools)
856                        .max_turns(max_turns)
857                        .max_tokens(max_tokens);
858
859                    if let Some(strategy) = agent_def.context_strategy {
860                        builder = builder.context_strategy(strategy);
861                    }
862                    if let Some(threshold) = agent_def.summarize_threshold {
863                        builder = builder.summarize_threshold(threshold);
864                    }
865                    if let Some(timeout) = agent_def.tool_timeout {
866                        builder = builder.tool_timeout(timeout);
867                    }
868                    if let Some(max) = agent_def.max_tool_output_bytes {
869                        builder = builder.max_tool_output_bytes(max);
870                    }
871                    if let Some(schema) = agent_def.response_schema {
872                        builder = builder.structured_schema(schema);
873                    }
874                    if !agent_def.guardrails.is_empty() {
875                        builder = builder.guardrails(agent_def.guardrails);
876                    }
877                    if let Some(timeout) = agent_def.run_timeout {
878                        builder = builder.run_timeout(timeout);
879                    }
880                    if let Some(effort) = agent_def.reasoning_effort {
881                        builder = builder.reasoning_effort(effort);
882                    }
883                    if let Some(true) = agent_def.enable_reflection {
884                        builder = builder.enable_reflection(true);
885                    }
886                    if let Some(threshold) = agent_def.tool_output_compression_threshold {
887                        builder = builder.tool_output_compression_threshold(threshold);
888                    }
889                    if let Some(max) = agent_def.max_tools_per_turn {
890                        builder = builder.max_tools_per_turn(max);
891                    }
892                    if let Some(profile) = agent_def.tool_profile {
893                        builder = builder.tool_profile(profile);
894                    }
895                    if let Some(max) = agent_def.max_identical_tool_calls {
896                        builder = builder.max_identical_tool_calls(max);
897                    }
898                    if let Some(max) = agent_def.max_fuzzy_identical_tool_calls {
899                        builder = builder.max_fuzzy_identical_tool_calls(max);
900                    }
901                    if let Some(cap) = agent_def.max_tool_calls_per_turn {
902                        builder = builder.max_tool_calls_per_turn(cap);
903                    }
904                    if let Some(ref config) = agent_def.session_prune_config {
905                        builder = builder.session_prune_config(config.clone());
906                    }
907                    if let Some(true) = agent_def.enable_recursive_summarization {
908                        builder = builder.enable_recursive_summarization(true);
909                    }
910                    if let Some(threshold) = agent_def.reflection_threshold {
911                        builder = builder.reflection_threshold(threshold);
912                    }
913                    if let Some(true) = agent_def.consolidate_on_exit {
914                        builder = builder.consolidate_on_exit(true);
915                    }
916                    if let Some(ref ws) = agent_def.workspace {
917                        builder = builder.workspace(ws.clone());
918                    }
919                    if let Some(max) = agent_def.max_total_tokens {
920                        builder = builder.max_total_tokens(max);
921                    }
922                    if let Some(trail) = agent_def.audit_trail {
923                        builder = builder.audit_trail(trail);
924                    }
925                    if let Some(uid) = &agent_def.audit_user_id
926                        && let Some(tid) = &agent_def.audit_tenant_id
927                    {
928                        builder = builder.audit_user_context(uid.clone(), tid.clone());
929                    }
930                    if !agent_def.audit_delegation_chain.is_empty() {
931                        builder = builder
932                            .audit_delegation_chain(agent_def.audit_delegation_chain.clone());
933                    }
934
935                    // Forward permission rules from orchestrator to squad members
936                    if !permission_rules.is_empty() {
937                        builder = builder.permission_rules(permission_rules);
938                    }
939
940                    // Forward observability mode from orchestrator to squad members
941                    builder = builder.observability_mode(observability_mode);
942
943                    // Forward per-tenant token tracker so squad member usage counts
944                    // toward the same per-tenant cap as the orchestrator.
945                    if let Some(ref tracker) = tenant_tracker {
946                        builder = builder.tenant_tracker(tracker.clone());
947                    }
948
949                    // Forward LSP manager to squad members
950                    if let Some(ref lsp) = lsp_manager {
951                        builder = builder.lsp_manager(lsp.clone());
952                    }
953
954                    // Forward on_event so sub-agent events are visible
955                    if let Some(ref on_event) = on_event {
956                        builder = builder.on_event(on_event.clone());
957                    }
958                    // Forward on_text so sub-agent streaming text is visible
959                    if let Some(ref on_text) = on_text {
960                        builder = builder.on_text(on_text.clone());
961                    }
962
963                    // Add memory tools if shared memory is configured
964                    if let Some(ref memory) = shared_memory {
965                        let agent_ns = match &ns_prefix {
966                            Some(prefix) => format!("{prefix}:{}", agent_def.name),
967                            None => agent_def.name.clone(),
968                        };
969                        let ns = Arc::new(crate::memory::namespaced::NamespacedMemory::new(
970                            memory.clone(),
971                            &agent_ns,
972                        ));
973                        builder = builder.memory(ns);
974                        let mem_scope = crate::auth::TenantScope::from_audit_fields(
975                            agent_def.audit_tenant_id.as_deref(),
976                            agent_def.audit_user_id.as_deref(),
977                        );
978                        builder = builder.tools(crate::memory::shared_tools::shared_memory_tools(
979                            memory.clone(),
980                            &agent_ns,
981                            mem_scope,
982                            allow_shared_write,
983                        ));
984                    }
985
986                    // Add blackboard tools using the PRIVATE blackboard.
987                    // SECURITY (F-AGENT-7): caller-namespaced; reserved
988                    // `agent:` prefix denied to sub-agents.
989                    builder = builder.tools(blackboard_tools(bb.clone(), &agent_def.name));
990
991                    // Add knowledge tools if knowledge base is configured
992                    if let Some(ref kb) = knowledge_base {
993                        builder = builder.knowledge(kb.clone());
994                    }
995
996                    let runner = match builder.build() {
997                        Ok(r) => r,
998                        Err(e) => {
999                            return (
1000                                idx,
1001                                SubAgentResult {
1002                                    agent: agent_def.name,
1003                                    result: format!("Error building agent: {e}"),
1004                                    tokens_used: TokenUsage::default(),
1005                                    success: false,
1006                                },
1007                            );
1008                        }
1009                    };
1010
1011                    let result = match runner.execute(&task.task).await {
1012                        Ok(output) => {
1013                            // Write successful result to private blackboard
1014                            let key = format!("agent:{}", agent_def.name);
1015                            if let Err(e) = bb
1016                                .write(&key, serde_json::Value::String(output.result.clone()))
1017                                .await
1018                            {
1019                                tracing::warn!(
1020                                    agent = %agent_def.name,
1021                                    error = %e,
1022                                    "failed to write result to private blackboard"
1023                                );
1024                            }
1025                            SubAgentResult {
1026                                agent: agent_def.name,
1027                                result: output.result,
1028                                tokens_used: output.tokens_used,
1029                                success: true,
1030                            }
1031                        }
1032                        Err(e) => SubAgentResult {
1033                            agent: agent_def.name,
1034                            result: format!("Error: {e}"),
1035                            tokens_used: e.partial_usage(),
1036                            success: false,
1037                        },
1038                    };
1039
1040                    (idx, result)
1041                });
1042            }
1043
1044            let mut results: Vec<Option<(usize, SubAgentResult)>> = vec![None; task_count];
1045            while let Some(result) = join_set.join_next().await {
1046                match result {
1047                    Ok((idx, sub_result)) => {
1048                        results[idx] = Some((idx, sub_result));
1049                    }
1050                    Err(e) => {
1051                        tracing::error!(error = %e, "squad member task panicked");
1052                    }
1053                }
1054            }
1055
1056            // Fill gaps (panicked tasks) with error results, then sort by index
1057            let mut results: Vec<(usize, SubAgentResult)> = results
1058                .into_iter()
1059                .enumerate()
1060                .map(|(idx, r)| {
1061                    r.unwrap_or_else(|| {
1062                        (
1063                            idx,
1064                            SubAgentResult {
1065                                agent: agent_names[idx].clone(),
1066                                result: "Error: squad member task panicked".into(),
1067                                tokens_used: TokenUsage::default(),
1068                                success: false,
1069                            },
1070                        )
1071                    })
1072                })
1073                .collect();
1074            results.sort_by_key(|(idx, _)| *idx);
1075
1076            let squad_label = format!("squad[{}]", agent_names.join(","));
1077            let bb_key = format!("squad:{}", agent_names.join("+"));
1078
1079            // Accumulate sub-agent tokens (lock scope kept minimal)
1080            let mut total_tokens = TokenUsage::default();
1081            {
1082                let mut acc = self.accumulated_tokens.lock().expect("token lock poisoned");
1083                for (_, r) in &results {
1084                    *acc += r.tokens_used;
1085                    total_tokens += r.tokens_used;
1086                }
1087            }
1088
1089            // Emit per-agent completion events
1090            if let Some(ref cb) = self.on_event {
1091                for (_, r) in &results {
1092                    cb(AgentEvent::SubAgentCompleted {
1093                        agent: r.agent.clone(),
1094                        success: r.success,
1095                        usage: r.tokens_used,
1096                    });
1097                }
1098            }
1099
1100            let all_success = results.iter().all(|(_, r)| r.success);
1101
1102            let formatted = results
1103                .iter()
1104                .map(|(_, r)| format!("=== Agent: {} ===\n{}", r.agent, r.result))
1105                .collect::<Vec<_>>()
1106                .join("\n\n");
1107
1108            // Emit aggregate squad completion event
1109            if let Some(ref cb) = self.on_event {
1110                cb(AgentEvent::SubAgentCompleted {
1111                    agent: squad_label,
1112                    success: all_success,
1113                    usage: total_tokens,
1114                });
1115            }
1116
1117            // Write squad result to outer blackboard
1118            if let Some(ref bb) = self.blackboard
1119                && let Err(e) = bb
1120                    .write(&bb_key, serde_json::Value::String(formatted.clone()))
1121                    .await
1122            {
1123                tracing::warn!(
1124                    key = %bb_key,
1125                    error = %e,
1126                    "failed to write squad result to outer blackboard"
1127                );
1128            }
1129
1130            Ok(ToolOutput::success(formatted))
1131        })
1132    }
1133}
1134
1135/// The orchestrator's dynamic agent spawning tool: creates specialist agents at runtime.
1136///
1137/// Unlike `DelegateTaskTool` which dispatches to pre-configured agents, `SpawnAgentTool`
1138/// lets the LLM define new agents on-the-fly with a custom system prompt and tool subset.
1139/// Security: tool allowlist enforced, spawn count capped, token budget tracked, no recursion.
1140struct SpawnAgentTool {
1141    shared_provider: Arc<BoxedProvider>,
1142    spawn_config: crate::types::SpawnConfig,
1143    /// Pre-built tools from allowlist (validated at build time).
1144    tool_pool: std::collections::HashMap<String, Arc<dyn Tool>>,
1145    /// Tracks how many agents have been spawned this run.
1146    spawn_count: Arc<std::sync::atomic::AtomicU32>,
1147    /// Prevents name reuse within a single run.
1148    spawned_names: Arc<Mutex<std::collections::HashSet<String>>>,
1149    /// Tracks cumulative token usage across all spawned agents.
1150    accumulated_tokens: Arc<Mutex<TokenUsage>>,
1151    permission_rules: super::permission::PermissionRuleset,
1152    shared_memory: Option<Arc<dyn Memory>>,
1153    memory_namespace_prefix: Option<String>,
1154    on_event: Option<Arc<OnEvent>>,
1155    on_text: Option<Arc<crate::llm::OnText>>,
1156    lsp_manager: Option<Arc<crate::lsp::LspManager>>,
1157    observability_mode: super::observability::ObservabilityMode,
1158    workspace: Option<std::path::PathBuf>,
1159    guardrails: Vec<Arc<dyn Guardrail>>,
1160    audit_trail: Option<Arc<dyn super::audit::AuditTrail>>,
1161    audit_user_id: Option<String>,
1162    audit_tenant_id: Option<String>,
1163    audit_delegation_chain: Vec<String>,
1164    cached_definition: ToolDefinition,
1165    /// Optional per-tenant token tracker propagated to each spawned agent runner.
1166    tenant_tracker: Option<Arc<crate::agent::tenant_tracker::TenantTokenTracker>>,
1167}
1168
1169#[derive(Deserialize)]
1170struct SpawnAgentInput {
1171    name: String,
1172    system_prompt: String,
1173    #[serde(default)]
1174    tools: Vec<String>,
1175    task: String,
1176}
1177
1178/// Maximum allowed system prompt length for spawned agents (32 KB).
1179const SPAWN_MAX_PROMPT_BYTES: usize = 32 * 1024;
1180
1181impl SpawnAgentTool {
1182    fn build_definition(config: &crate::types::SpawnConfig) -> ToolDefinition {
1183        let allowlist = if config.tool_allowlist.is_empty() {
1184            "(none — reasoning-only agents)".to_string()
1185        } else {
1186            config.tool_allowlist.join(", ")
1187        };
1188        ToolDefinition {
1189            name: "spawn_agent".into(),
1190            description: format!(
1191                "Create a new specialist agent at runtime when no pre-configured agent fits the task. \
1192                 The spawned agent runs with the given system prompt and tool subset, then returns its result.\n\n\
1193                 Available tools for spawned agents: [{allowlist}]. Budget: {} agents max per run.",
1194                config.max_spawned_agents
1195            ),
1196            input_schema: json!({
1197                "type": "object",
1198                "required": ["name", "system_prompt", "task"],
1199                "properties": {
1200                    "name": {
1201                        "type": "string",
1202                        "description": "Lowercase identifier for the agent (a-z, 0-9, underscores). Must start with a letter. E.g. 'tax_specialist', 'csv_analyzer'."
1203                    },
1204                    "system_prompt": {
1205                        "type": "string",
1206                        "description": "The agent's role and behavior instructions. Be specific about expertise and constraints."
1207                    },
1208                    "tools": {
1209                        "type": "array",
1210                        "items": { "type": "string" },
1211                        "description": format!("Subset of available tools: [{allowlist}]. Empty array creates a reasoning-only agent.")
1212                    },
1213                    "task": {
1214                        "type": "string",
1215                        "description": "The specific task for this agent to accomplish."
1216                    }
1217                },
1218                "additionalProperties": false
1219            }),
1220        }
1221    }
1222
1223    async fn spawn(&self, input: SpawnAgentInput) -> Result<ToolOutput, Error> {
1224        // 1. Spawn count cap
1225        let current = self.spawn_count.load(std::sync::atomic::Ordering::Relaxed);
1226        if current >= self.spawn_config.max_spawned_agents {
1227            return Ok(ToolOutput::error(format!(
1228                "Spawn limit reached: {current}/{} agents already spawned this run.",
1229                self.spawn_config.max_spawned_agents
1230            )));
1231        }
1232
1233        // 2. Name format validation
1234        let name_re =
1235            regex::Regex::new(r"^[a-z][a-z0-9_]{0,63}$").expect("spawn agent name regex is valid");
1236        if !name_re.is_match(&input.name) {
1237            return Ok(ToolOutput::error(format!(
1238                "Invalid agent name '{}'. Must match ^[a-z][a-z0-9_]{{0,63}}$ \
1239                 (lowercase, starts with letter, alphanumeric + underscores, max 64 chars).",
1240                input.name
1241            )));
1242        }
1243
1244        // 3. Name uniqueness
1245        {
1246            let mut names = self.spawned_names.lock().expect("spawned names lock");
1247            if !names.insert(input.name.clone()) {
1248                return Ok(ToolOutput::error(format!(
1249                    "Agent name '{}' already used in this run. Choose a different name.",
1250                    input.name
1251                )));
1252            }
1253        }
1254
1255        // 4. Tool subset validation
1256        for tool_name in &input.tools {
1257            if !self.tool_pool.contains_key(tool_name) {
1258                let available: Vec<&str> = self.tool_pool.keys().map(|k| k.as_str()).collect();
1259                return Ok(ToolOutput::error(format!(
1260                    "Tool '{}' not in allowlist. Available: [{}]",
1261                    tool_name,
1262                    available.join(", ")
1263                )));
1264            }
1265        }
1266
1267        // 5. Token budget headroom check
1268        {
1269            let acc = self.accumulated_tokens.lock().expect("token lock");
1270            let used = acc.total();
1271            if used >= self.spawn_config.max_total_tokens {
1272                return Ok(ToolOutput::error(format!(
1273                    "Spawn token budget exhausted: {used}/{} tokens used across spawned agents.",
1274                    self.spawn_config.max_total_tokens
1275                )));
1276            }
1277        }
1278
1279        // 6. System prompt length check
1280        if input.system_prompt.len() > SPAWN_MAX_PROMPT_BYTES {
1281            return Ok(ToolOutput::error(format!(
1282                "System prompt too long: {} bytes (max {SPAWN_MAX_PROMPT_BYTES}).",
1283                input.system_prompt.len()
1284            )));
1285        }
1286
1287        let spawned_name = format!("spawn:{}", input.name);
1288
1289        // Emit AgentSpawned event
1290        if let Some(ref cb) = self.on_event {
1291            cb(AgentEvent::AgentSpawned {
1292                agent: "orchestrator".into(),
1293                spawned_name: spawned_name.clone(),
1294                tools: input.tools.clone(),
1295                task: input.task.clone(),
1296            });
1297        }
1298
1299        // Build tool set from requested subset
1300        let selected_tools: Vec<Arc<dyn Tool>> = input
1301            .tools
1302            .iter()
1303            .filter_map(|name| self.tool_pool.get(name).cloned())
1304            .collect();
1305
1306        // Build AgentRunner
1307        let mut builder = AgentRunner::builder(self.shared_provider.clone())
1308            .name(&spawned_name)
1309            .system_prompt(&input.system_prompt)
1310            .tools(selected_tools)
1311            .max_turns(self.spawn_config.max_turns)
1312            .max_tokens(self.spawn_config.max_tokens)
1313            .observability_mode(self.observability_mode);
1314
1315        // Inherit security state from orchestrator
1316        if !self.permission_rules.is_empty() {
1317            builder = builder.permission_rules(self.permission_rules.clone());
1318        }
1319        if !self.guardrails.is_empty() {
1320            builder = builder.guardrails(self.guardrails.clone());
1321        }
1322        if let Some(ref ws) = self.workspace {
1323            builder = builder.workspace(ws.clone());
1324        }
1325        if let Some(ref lsp) = self.lsp_manager {
1326            builder = builder.lsp_manager(lsp.clone());
1327        }
1328        if let Some(ref cb) = self.on_event {
1329            builder = builder.on_event(cb.clone());
1330        }
1331        if let Some(ref cb) = self.on_text {
1332            builder = builder.on_text(cb.clone());
1333        }
1334        if let Some(ref trail) = self.audit_trail {
1335            builder = builder.audit_trail(trail.clone());
1336        }
1337        if let (Some(uid), Some(tid)) = (&self.audit_user_id, &self.audit_tenant_id) {
1338            builder = builder.audit_user_context(uid.clone(), tid.clone());
1339        }
1340        if !self.audit_delegation_chain.is_empty() {
1341            let mut chain = self.audit_delegation_chain.clone();
1342            chain.push(spawned_name.clone());
1343            builder = builder.audit_delegation_chain(chain);
1344        }
1345
1346        // Forward per-tenant token tracker so spawned agent usage counts toward
1347        // the same per-tenant cap as the orchestrator.
1348        if let Some(ref tracker) = self.tenant_tracker {
1349            builder = builder.tenant_tracker(tracker.clone());
1350        }
1351
1352        // Memory: read-only shared access with namespaced isolation
1353        if let Some(ref memory) = self.shared_memory {
1354            let agent_ns = match &self.memory_namespace_prefix {
1355                Some(prefix) => format!("{prefix}:{spawned_name}"),
1356                None => spawned_name.clone(),
1357            };
1358            let ns = Arc::new(crate::memory::namespaced::NamespacedMemory::new(
1359                memory.clone(),
1360                &agent_ns,
1361            ));
1362            builder = builder.memory(ns);
1363            let mem_scope = crate::auth::TenantScope::from_audit_fields(
1364                self.audit_tenant_id.as_deref(),
1365                self.audit_user_id.as_deref(),
1366            );
1367            builder = builder.tools(crate::memory::shared_tools::shared_memory_tools(
1368                memory.clone(),
1369                &agent_ns,
1370                mem_scope,
1371                false, // read-only: spawned agents cannot write to shared memory
1372            ));
1373        }
1374
1375        let runner = builder.build()?;
1376
1377        info!(
1378            agent = %spawned_name,
1379            tools = ?input.tools,
1380            "spawning dynamic agent"
1381        );
1382
1383        // Execute
1384        match runner.execute(&input.task).await {
1385            Ok(output) => {
1386                // Post-execution bookkeeping
1387                self.spawn_count
1388                    .fetch_add(1, std::sync::atomic::Ordering::Relaxed);
1389                {
1390                    let mut acc = self.accumulated_tokens.lock().expect("token lock");
1391                    *acc += output.tokens_used;
1392                }
1393                if let Some(ref cb) = self.on_event {
1394                    cb(AgentEvent::SubAgentCompleted {
1395                        agent: spawned_name.clone(),
1396                        success: true,
1397                        usage: output.tokens_used,
1398                    });
1399                }
1400                Ok(ToolOutput::success(format!(
1401                    "=== Spawned Agent: {} ===\n{}",
1402                    spawned_name, output.result
1403                )))
1404            }
1405            Err(e) => {
1406                self.spawn_count
1407                    .fetch_add(1, std::sync::atomic::Ordering::Relaxed);
1408                let partial = e.partial_usage();
1409                {
1410                    let mut acc = self.accumulated_tokens.lock().expect("token lock");
1411                    *acc += partial;
1412                }
1413                if let Some(ref cb) = self.on_event {
1414                    cb(AgentEvent::SubAgentCompleted {
1415                        agent: spawned_name.clone(),
1416                        success: false,
1417                        usage: partial,
1418                    });
1419                }
1420                Ok(ToolOutput::error(format!(
1421                    "Spawned agent '{spawned_name}' failed: {e}"
1422                )))
1423            }
1424        }
1425    }
1426}
1427
1428impl Tool for SpawnAgentTool {
1429    fn definition(&self) -> ToolDefinition {
1430        self.cached_definition.clone()
1431    }
1432
1433    fn execute(
1434        &self,
1435        _ctx: &crate::ExecutionContext,
1436        input: serde_json::Value,
1437    ) -> Pin<Box<dyn Future<Output = Result<ToolOutput, Error>> + Send + '_>> {
1438        Box::pin(async move {
1439            let spawn_input: SpawnAgentInput = serde_json::from_value(input)
1440                .map_err(|e| Error::Agent(format!("Invalid spawn_agent input: {e}")))?;
1441            self.spawn(spawn_input).await
1442        })
1443    }
1444}
1445
1446/// Build the orchestrator system prompt listing available agents.
1447///
1448/// Shared between standalone and Restate paths. Takes `(name, description, tool_names)` triples.
1449///
1450/// When `squads_enabled` is `true`, the prompt explains both `delegate_task` and `form_squad`
1451/// tools. When `false`, only `delegate_task` is mentioned (Restate path or opt-out).
1452///
1453/// **Note:** Only the agent's registered tools are listed. Runtime-injected tools
1454/// (memory, blackboard, knowledge) are shared infrastructure available to all agents
1455/// and are not shown here to avoid noise in the prompt.
1456pub fn build_system_prompt(
1457    agents: &[(&str, &str, &[String])],
1458    squads_enabled: bool,
1459    dispatch_mode: DispatchMode,
1460) -> String {
1461    let agent_list: String = agents
1462        .iter()
1463        .map(|(name, desc, tools)| {
1464            if tools.is_empty() {
1465                format!("- **{name}**: {desc}\n  Tools: (none)")
1466            } else {
1467                format!("- **{name}**: {desc}\n  Tools: {}", tools.join(", "))
1468            }
1469        })
1470        .collect::<Vec<_>>()
1471        .join("\n");
1472
1473    let delegation_instructions = match (squads_enabled, dispatch_mode) {
1474        (_, DispatchMode::Sequential) => {
1475            "## Delegation Tool\n\
1476             Delegate to ONE agent at a time using **delegate_task**. Wait for the result \
1477             before deciding the next agent. Do NOT batch multiple agents in a single call."
1478        }
1479        (true, DispatchMode::Parallel) => {
1480            "## Delegation Tools\n\
1481             You have two delegation tools:\n\n\
1482             1. **delegate_task** — Run independent subtasks in parallel. Each agent works in \
1483                isolation and cannot see other agents' output. Use when subtasks are independent.\n\n\
1484             2. **form_squad** — Run subtasks in parallel with a shared blackboard. \
1485                Unlike delegate_task, agents can read each other's results via the blackboard. \
1486                Agents run concurrently — use when they benefit from shared state, not when \
1487                strict ordering is needed.\n\n\
1488             After receiving results, synthesize them into a coherent response."
1489        }
1490        (false, DispatchMode::Parallel) => {
1491            "## Delegation Tool\n\
1492             Use the **delegate_task** tool to assign work to sub-agents. You can assign \
1493             multiple tasks at once for parallel execution. Each agent works in isolation. \
1494             After receiving results, synthesize them into a coherent response."
1495        }
1496    };
1497
1498    let choose_tool_step = match (squads_enabled, dispatch_mode) {
1499        (_, DispatchMode::Sequential) => {
1500            "3. DELEGATE: Use delegate_task with ONE agent at a time. Wait for results before \
1501                delegating to the next agent."
1502        }
1503        (true, DispatchMode::Parallel) => {
1504            "3. CHOOSE TOOL: Select delegate_task for independent parallel work, or form_squad \
1505                when agents benefit from shared state via a blackboard."
1506        }
1507        (false, DispatchMode::Parallel) => {
1508            "3. DELEGATE: Use delegate_task to assign subtasks to the best-fit agents."
1509        }
1510    };
1511
1512    format!(
1513        "You are an orchestrator agent. Analyze incoming tasks and delegate work to \
1514         specialized sub-agents.\n\n\
1515         ## Decision Process\n\
1516         1. DECOMPOSE: Break the task into distinct subtasks. Identify which require different expertise.\n\
1517         2. MATCH: For each subtask, pick the best-fit agent based on their description and tools.\n\
1518         {choose_tool_step}\n\n\
1519         ## Important\n\
1520         - ALWAYS delegate to a sub-agent using your delegation tools. You do NOT have any \
1521           direct capabilities — sub-agents have the tools. Never respond to the user directly \
1522           without delegating first.\n\n\
1523         ## Effort Scaling\n\
1524         - If only ONE agent is relevant, delegate a single task. Do NOT force-split across agents.\n\
1525         - If the task is simple enough for one agent, use one agent.\n\
1526         - Only use multiple agents when the task genuinely has multiple distinct parts \
1527           needing different expertise.\n\n\
1528         ## Task Quality\n\
1529         - Each delegated task must be self-contained: include all context the agent needs.\n\
1530         - Be specific: \"Read /path/to/file and extract X\" not \"look at the project\".\n\
1531         - Avoid overlapping tasks — no two agents should do the same work.\n\n\
1532         ## Available Sub-Agents\n\
1533         Choose agents based on their description and available tools:\n\
1534         {agent_list}\n\n\
1535         {delegation_instructions}"
1536    )
1537}
1538
1539/// Build the delegate_task tool definition.
1540///
1541/// Shared between standalone and Restate paths. Takes `(name, description, tool_names)` triples.
1542///
1543/// When `dispatch_mode` is `Sequential`, the schema adds `maxItems: 1` to the tasks array
1544/// so the LLM can only dispatch one agent at a time. This is a schema-level enforcement
1545/// that works even with weaker models that ignore prompt instructions.
1546pub fn build_delegate_tool_schema(
1547    agents: &[(&str, &str, &[String])],
1548    dispatch_mode: DispatchMode,
1549) -> ToolDefinition {
1550    let agent_descriptions: Vec<serde_json::Value> = agents
1551        .iter()
1552        .map(|(name, desc, tools)| json!({"name": name, "description": desc, "tools": tools}))
1553        .collect();
1554
1555    let (description, tasks_schema) = match dispatch_mode {
1556        DispatchMode::Sequential => (
1557            format!(
1558                "Delegate a task to ONE sub-agent at a time. Wait for the result before \
1559                 delegating to the next agent. Each task runs in isolation. \
1560                 Write clear, self-contained task descriptions with all necessary context. \
1561                 Available agents: {}",
1562                serde_json::to_string(&agent_descriptions)
1563                    .expect("agent list serialization is infallible")
1564            ),
1565            json!({
1566                "type": "array",
1567                "items": {
1568                    "type": "object",
1569                    "properties": {
1570                        "agent": {
1571                            "type": "string",
1572                            "description": "Name of the sub-agent"
1573                        },
1574                        "task": {
1575                            "type": "string",
1576                            "description": "Task instruction for the sub-agent"
1577                        }
1578                    },
1579                    "required": ["agent", "task"]
1580                },
1581                "minItems": 1,
1582                "maxItems": 1
1583            }),
1584        ),
1585        DispatchMode::Parallel => (
1586            format!(
1587                "Delegate independent tasks to sub-agents for parallel execution. \
1588                 Each task runs in isolation — agents cannot see each other's work. \
1589                 Write clear, self-contained task descriptions with all necessary context. \
1590                 Available agents: {}",
1591                serde_json::to_string(&agent_descriptions)
1592                    .expect("agent list serialization is infallible")
1593            ),
1594            json!({
1595                "type": "array",
1596                "items": {
1597                    "type": "object",
1598                    "properties": {
1599                        "agent": {
1600                            "type": "string",
1601                            "description": "Name of the sub-agent"
1602                        },
1603                        "task": {
1604                            "type": "string",
1605                            "description": "Task instruction for the sub-agent"
1606                        }
1607                    },
1608                    "required": ["agent", "task"]
1609                },
1610                "minItems": 1
1611            }),
1612        ),
1613    };
1614
1615    ToolDefinition {
1616        name: "delegate_task".into(),
1617        description,
1618        input_schema: json!({
1619            "type": "object",
1620            "properties": {
1621                "tasks": tasks_schema
1622            },
1623            "required": ["tasks"]
1624        }),
1625    }
1626}
1627
1628/// Build the form_squad tool definition.
1629///
1630/// Standalone path only. Takes `(name, description, tool_names)` triples so the LLM
1631/// knows which agents are available for squad formation.
1632///
1633/// Uses the same `{tasks:[{agent, task}]}` format as `delegate_task` for consistency.
1634pub(crate) fn build_form_squad_tool_schema(agents: &[(&str, &str, &[String])]) -> ToolDefinition {
1635    let agent_descriptions: Vec<serde_json::Value> = agents
1636        .iter()
1637        .map(|(name, desc, tools)| json!({"name": name, "description": desc, "tools": tools}))
1638        .collect();
1639
1640    ToolDefinition {
1641        name: "form_squad".into(),
1642        description: format!(
1643            "Dispatch per-agent tasks in parallel with a shared blackboard for intra-squad coordination. \
1644             Unlike delegate_task, squad agents can read each other's results via the blackboard. \
1645             Use this when agents benefit from shared state (e.g., building on each other's work, \
1646             coordinating on a shared artifact). Agents run concurrently. \
1647             Requires at least 2 tasks (one per agent). \
1648             Available agents: {}",
1649            serde_json::to_string(&agent_descriptions)
1650                .expect("agent list serialization is infallible")
1651        ),
1652        input_schema: json!({
1653            "type": "object",
1654            "properties": {
1655                "tasks": {
1656                    "type": "array",
1657                    "items": {
1658                        "type": "object",
1659                        "properties": {
1660                            "agent": {
1661                                "type": "string",
1662                                "description": "Name of the sub-agent"
1663                            },
1664                            "task": {
1665                                "type": "string",
1666                                "description": "Task instruction for the sub-agent"
1667                            }
1668                        },
1669                        "required": ["agent", "task"]
1670                    },
1671                    "minItems": 2,
1672                    "description": "Per-agent tasks for the squad (minimum 2)"
1673                }
1674            },
1675            "required": ["tasks"]
1676        }),
1677    }
1678}
1679
1680/// Configuration for adding a sub-agent to the orchestrator.
1681///
1682/// Used by `OrchestratorBuilder::sub_agent_full` to avoid a long parameter list.
1683#[derive(Default)]
1684pub struct SubAgentConfig {
1685    /// Unique name for this sub-agent.
1686    pub name: String,
1687    /// Human-readable description of what this sub-agent does.
1688    pub description: String,
1689    /// System prompt to give this sub-agent.
1690    pub system_prompt: String,
1691    /// Tools available to this sub-agent.
1692    pub tools: Vec<Arc<dyn Tool>>,
1693    /// Context window management strategy for this sub-agent.
1694    pub context_strategy: Option<ContextStrategy>,
1695    /// Summarize conversation when message count exceeds this threshold.
1696    pub summarize_threshold: Option<u32>,
1697    /// Per-tool execution timeout for this sub-agent.
1698    pub tool_timeout: Option<Duration>,
1699    /// Maximum tool output size in bytes for this sub-agent.
1700    pub max_tool_output_bytes: Option<usize>,
1701    /// Per-agent turn limit. When `None`, uses orchestrator default.
1702    pub max_turns: Option<usize>,
1703    /// Per-agent token limit. When `None`, uses orchestrator default.
1704    pub max_tokens: Option<u32>,
1705    /// Optional JSON Schema for structured output. When set, the sub-agent
1706    /// receives a synthetic `__respond__` tool and returns structured JSON.
1707    pub response_schema: Option<serde_json::Value>,
1708    /// Optional per-agent run timeout. When `None`, no timeout is applied
1709    /// to this sub-agent's run.
1710    pub run_timeout: Option<Duration>,
1711    /// Guardrails applied to this sub-agent's LLM calls and tool executions.
1712    pub guardrails: Vec<Arc<dyn Guardrail>>,
1713    /// Optional per-agent LLM provider override. When `None`, the sub-agent
1714    /// inherits the orchestrator's provider. Use this to route sub-agents to
1715    /// different models (e.g., Haiku for cheap tasks, Opus for complex ones).
1716    pub provider: Option<Arc<BoxedProvider>>,
1717    /// Optional reasoning/thinking effort level for this sub-agent.
1718    pub reasoning_effort: Option<crate::llm::types::ReasoningEffort>,
1719    /// Enable reflection prompts after tool results for this sub-agent.
1720    pub enable_reflection: Option<bool>,
1721    /// Tool output compression threshold in bytes for this sub-agent.
1722    pub tool_output_compression_threshold: Option<usize>,
1723    /// Maximum tools per turn for this sub-agent.
1724    pub max_tools_per_turn: Option<usize>,
1725    /// Tool profile for pre-filtering tool definitions for this sub-agent.
1726    pub tool_profile: Option<super::tool_filter::ToolProfile>,
1727    /// Maximum consecutive identical tool-call turns for doom loop detection.
1728    pub max_identical_tool_calls: Option<u32>,
1729    /// Maximum consecutive fuzzy-identical tool-call turns for doom loop detection.
1730    pub max_fuzzy_identical_tool_calls: Option<u32>,
1731    /// Maximum number of tool calls allowed in a single LLM turn (per-turn cap).
1732    pub max_tool_calls_per_turn: Option<u32>,
1733    /// Session pruning configuration for this sub-agent.
1734    pub session_prune_config: Option<crate::agent::pruner::SessionPruneConfig>,
1735    /// Enable recursive summarization for this sub-agent.
1736    pub enable_recursive_summarization: Option<bool>,
1737    /// Memory reflection threshold for this sub-agent.
1738    pub reflection_threshold: Option<u32>,
1739    /// Run memory consolidation at session end for this sub-agent.
1740    pub consolidate_on_exit: Option<bool>,
1741    /// Optional workspace root for this sub-agent's file tools and system prompt.
1742    pub workspace: Option<std::path::PathBuf>,
1743    /// Hard limit on cumulative tokens (input + output) across all turns.
1744    pub max_total_tokens: Option<u64>,
1745    /// Optional audit trail for recording untruncated agent decisions.
1746    pub audit_trail: Option<Arc<dyn super::audit::AuditTrail>>,
1747    /// Optional user ID for multi-tenant audit enrichment.
1748    pub audit_user_id: Option<String>,
1749    /// Optional tenant ID for multi-tenant audit enrichment.
1750    pub audit_tenant_id: Option<String>,
1751    /// Delegation chain for audit records (propagated to sub-agents).
1752    #[allow(dead_code)]
1753    pub audit_delegation_chain: Vec<String>,
1754}
1755
1756/// Builder for [`Orchestrator`].
1757///
1758/// Construct with [`Orchestrator::builder`] and call `.build()` to get an `Orchestrator`.
1759/// Builder for [`Orchestrator`].
1760///
1761/// Construct via [`Orchestrator::builder`], register sub-agents with
1762/// [`sub_agent`](OrchestratorBuilder::sub_agent), then call
1763/// [`build`](OrchestratorBuilder::build) to produce an `Orchestrator` ready to
1764/// run. The builder validates that all sub-agent names are unique and rejects
1765/// zero-valued turn/token limits. Sub-agent settings that are not explicitly set
1766/// on the sub-agent inherit the orchestrator-level defaults at build time.
1767pub struct OrchestratorBuilder<P: LlmProvider> {
1768    provider: Arc<P>,
1769    sub_agents: Vec<SubAgentDef>,
1770    max_turns: usize,
1771    max_tokens: u32,
1772    context_strategy: Option<ContextStrategy>,
1773    summarize_threshold: Option<u32>,
1774    tool_timeout: Option<Duration>,
1775    max_tool_output_bytes: Option<usize>,
1776    shared_memory: Option<Arc<dyn Memory>>,
1777    memory_namespace_prefix: Option<String>,
1778    blackboard: Option<Arc<dyn Blackboard>>,
1779    knowledge_base: Option<Arc<dyn KnowledgeBase>>,
1780    on_text: Option<Arc<crate::llm::OnText>>,
1781    on_approval: Option<Arc<crate::llm::OnApproval>>,
1782    on_event: Option<Arc<OnEvent>>,
1783    guardrails: Vec<Arc<dyn Guardrail>>,
1784    on_question: Option<Arc<OnQuestion>>,
1785    run_timeout: Option<Duration>,
1786    enable_squads: Option<bool>,
1787    reasoning_effort: Option<crate::llm::types::ReasoningEffort>,
1788    enable_reflection: bool,
1789    tool_output_compression_threshold: Option<usize>,
1790    max_tools_per_turn: Option<usize>,
1791    max_identical_tool_calls: Option<u32>,
1792    max_fuzzy_identical_tool_calls: Option<u32>,
1793    max_tool_calls_per_turn: Option<u32>,
1794    permission_rules: super::permission::PermissionRuleset,
1795    instruction_text: Option<String>,
1796    learned_permissions: Option<Arc<std::sync::Mutex<super::permission::LearnedPermissions>>>,
1797    lsp_manager: Option<Arc<crate::lsp::LspManager>>,
1798    observability_mode: Option<super::observability::ObservabilityMode>,
1799    dispatch_mode: DispatchMode,
1800    /// Optional workspace root for all sub-agents. Propagated to sub-agent builders.
1801    workspace: Option<std::path::PathBuf>,
1802    /// Optional audit trail propagated to all sub-agents.
1803    audit_trail: Option<Arc<dyn super::audit::AuditTrail>>,
1804    /// Optional user ID for multi-tenant audit enrichment (propagated to all sub-agents).
1805    audit_user_id: Option<String>,
1806    /// Optional tenant ID for multi-tenant audit enrichment (propagated to all sub-agents).
1807    audit_tenant_id: Option<String>,
1808    /// Delegation chain for audit records (propagated to all sub-agents).
1809    audit_delegation_chain: Vec<String>,
1810    /// Whether sub-agents are allowed to write to shared institutional memory.
1811    /// Defaults to `true` for backward compatibility. Set to `false` to restrict
1812    /// write access based on user roles.
1813    allow_shared_write: bool,
1814    /// Whether to append the multi-agent collaboration prompt to each sub-agent's
1815    /// system prompt. Default: true.
1816    multi_agent_prompt: bool,
1817    /// Dynamic agent spawning configuration. When `Some`, the `spawn_agent` tool
1818    /// is registered on the orchestrator.
1819    spawn_config: Option<crate::types::SpawnConfig>,
1820    /// Pre-built builtin tools to use as the spawn tool pool.
1821    spawn_builtin_tools: Vec<Arc<dyn Tool>>,
1822    /// Optional per-tenant token tracker propagated to the orchestrator's own runner
1823    /// and to all sub-agent runners dispatched via DelegateTaskTool, FormSquadTool,
1824    /// and SpawnAgentTool. When set, multi-agent token usage is correctly accounted
1825    /// per tenant so the per-tenant cap applies to the combined run.
1826    tenant_tracker: Option<Arc<crate::agent::tenant_tracker::TenantTokenTracker>>,
1827}
1828
1829impl<P: LlmProvider + 'static> OrchestratorBuilder<P> {
1830    /// Add a sub-agent with the given name, description, and system prompt.
1831    pub fn sub_agent(
1832        mut self,
1833        name: impl Into<String>,
1834        description: impl Into<String>,
1835        system_prompt: impl Into<String>,
1836    ) -> Self {
1837        let mut def = SubAgentDef::new(name, description, system_prompt);
1838        def.workspace = self.workspace.clone();
1839        def.audit_trail = self.audit_trail.clone();
1840        def.audit_user_id = self.audit_user_id.clone();
1841        def.audit_tenant_id = self.audit_tenant_id.clone();
1842        def.audit_delegation_chain = self.audit_delegation_chain.clone();
1843        self.sub_agents.push(def);
1844        self
1845    }
1846
1847    /// Add a sub-agent with a predefined set of tools.
1848    pub fn sub_agent_with_tools(
1849        mut self,
1850        name: impl Into<String>,
1851        description: impl Into<String>,
1852        system_prompt: impl Into<String>,
1853        tools: Vec<Arc<dyn Tool>>,
1854    ) -> Self {
1855        let mut def = SubAgentDef::new(name, description, system_prompt);
1856        def.tools = tools;
1857        def.workspace = self.workspace.clone();
1858        def.audit_trail = self.audit_trail.clone();
1859        def.audit_user_id = self.audit_user_id.clone();
1860        def.audit_tenant_id = self.audit_tenant_id.clone();
1861        def.audit_delegation_chain = self.audit_delegation_chain.clone();
1862        self.sub_agents.push(def);
1863        self
1864    }
1865
1866    /// Add a sub-agent using a fully specified [`SubAgentConfig`].
1867    pub fn sub_agent_full(mut self, config: SubAgentConfig) -> Self {
1868        let mut def = SubAgentDef::from(config);
1869        if def.workspace.is_none() {
1870            def.workspace = self.workspace.clone();
1871        }
1872        if def.audit_trail.is_none() {
1873            def.audit_trail = self.audit_trail.clone();
1874        }
1875        if def.audit_user_id.is_none() {
1876            def.audit_user_id = self.audit_user_id.clone();
1877        }
1878        if def.audit_tenant_id.is_none() {
1879            def.audit_tenant_id = self.audit_tenant_id.clone();
1880        }
1881        if def.audit_delegation_chain.is_empty() {
1882            def.audit_delegation_chain = self.audit_delegation_chain.clone();
1883        }
1884        self.sub_agents.push(def);
1885        self
1886    }
1887
1888    /// Set the maximum number of turns for the orchestrator's own LLM loop.
1889    pub fn max_turns(mut self, max_turns: usize) -> Self {
1890        self.max_turns = max_turns;
1891        self
1892    }
1893
1894    /// Set the maximum number of tokens for the orchestrator's own LLM calls.
1895    pub fn max_tokens(mut self, max_tokens: u32) -> Self {
1896        self.max_tokens = max_tokens;
1897        self
1898    }
1899
1900    /// Set context management strategy for the orchestrator's own conversation.
1901    pub fn context_strategy(mut self, strategy: ContextStrategy) -> Self {
1902        self.context_strategy = Some(strategy);
1903        self
1904    }
1905
1906    /// Set token threshold for summarization of the orchestrator's own context.
1907    pub fn summarize_threshold(mut self, threshold: u32) -> Self {
1908        self.summarize_threshold = Some(threshold);
1909        self
1910    }
1911
1912    /// Set timeout for the orchestrator's own tool executions (i.e., delegate_task).
1913    pub fn tool_timeout(mut self, timeout: Duration) -> Self {
1914        self.tool_timeout = Some(timeout);
1915        self
1916    }
1917
1918    /// Set maximum byte size for tool output on the orchestrator's own tools.
1919    pub fn max_tool_output_bytes(mut self, max: usize) -> Self {
1920        self.max_tool_output_bytes = Some(max);
1921        self
1922    }
1923
1924    /// Attach a shared memory store. Each sub-agent gets:
1925    /// - Private memory tools (namespaced to the agent)
1926    /// - Shared memory tools (cross-agent read/write)
1927    pub fn shared_memory(mut self, memory: Arc<dyn Memory>) -> Self {
1928        self.shared_memory = Some(memory);
1929        self
1930    }
1931
1932    /// Set a prefix for memory namespacing. When set, sub-agent memory
1933    /// namespaces become `"{prefix}:{agent_name}"` instead of just `"{agent_name}"`.
1934    /// This enables per-user or per-story isolation without nesting `NamespacedMemory`.
1935    pub fn memory_namespace_prefix(mut self, prefix: impl Into<String>) -> Self {
1936        self.memory_namespace_prefix = Some(prefix.into());
1937        self
1938    }
1939
1940    /// Control whether sub-agents are allowed to write to shared institutional memory.
1941    ///
1942    /// When `false`, only `shared_memory_read` is provided — agents can read company
1943    /// knowledge but cannot write to it. Use this to enforce role-based write access.
1944    /// Defaults to `true` for backward compatibility.
1945    pub fn allow_shared_write(mut self, allow: bool) -> Self {
1946        self.allow_shared_write = allow;
1947        self
1948    }
1949
1950    /// Enable or disable the multi-agent collaboration prompt on sub-agent
1951    /// system prompts. Default: `true`.
1952    pub fn multi_agent_prompt(mut self, enabled: bool) -> Self {
1953        self.multi_agent_prompt = enabled;
1954        self
1955    }
1956
1957    /// Enable dynamic agent spawning with the given config.
1958    ///
1959    /// The `builtin_tools` are the pool from which spawned agents' tools are selected
1960    /// (filtered by `SpawnConfig::tool_allowlist`). Unknown tools in the allowlist
1961    /// cause a build error.
1962    pub fn spawn_config(
1963        mut self,
1964        config: crate::types::SpawnConfig,
1965        builtin_tools: Vec<Arc<dyn Tool>>,
1966    ) -> Self {
1967        self.spawn_config = Some(config);
1968        self.spawn_builtin_tools = builtin_tools;
1969        self
1970    }
1971
1972    /// Attach a per-tenant token tracker to the orchestrator and all sub-agents.
1973    ///
1974    /// When set, the orchestrator's own runner and every sub-agent runner dispatched
1975    /// via `delegate_task`, `form_squad`, or `spawn_agent` inherits the same tracker.
1976    /// This ensures multi-agent token usage is correctly accounted per tenant so the
1977    /// per-tenant cap applies to the combined run. Requires `audit_user_context` to
1978    /// also be set — the tracker only fires when both a tracker and a tenant ID are
1979    /// present.
1980    pub fn tenant_tracker(
1981        mut self,
1982        tracker: Arc<crate::agent::tenant_tracker::TenantTokenTracker>,
1983    ) -> Self {
1984        self.tenant_tracker = Some(tracker);
1985        self
1986    }
1987
1988    /// Attach a shared blackboard for cross-agent coordination.
1989    ///
1990    /// Each sub-agent receives `blackboard_read`, `blackboard_write`, and
1991    /// `blackboard_list` tools. After each sub-agent completes, its result
1992    /// is automatically written to the blackboard under the `"agent:{name}"` key.
1993    pub fn blackboard(mut self, blackboard: Arc<dyn Blackboard>) -> Self {
1994        self.blackboard = Some(blackboard);
1995        self
1996    }
1997
1998    /// Attach a shared knowledge base for document retrieval.
1999    ///
2000    /// Each sub-agent receives a `knowledge_search` tool to query the knowledge
2001    /// base at runtime.
2002    pub fn knowledge(mut self, kb: Arc<dyn KnowledgeBase>) -> Self {
2003        self.knowledge_base = Some(kb);
2004        self
2005    }
2006
2007    /// Set a callback for streaming text output on the orchestrator's LLM calls.
2008    /// Sub-agents do not stream — only the orchestrator's own reasoning and
2009    /// final synthesis are emitted incrementally.
2010    pub fn on_text(mut self, callback: Arc<crate::llm::OnText>) -> Self {
2011        self.on_text = Some(callback);
2012        self
2013    }
2014
2015    /// Set a callback for human-in-the-loop approval on the orchestrator's
2016    /// tool calls (i.e., delegate_task calls). Sub-agents do not use this
2017    /// callback — only the orchestrator's decisions are gated.
2018    pub fn on_approval(mut self, callback: Arc<crate::llm::OnApproval>) -> Self {
2019        self.on_approval = Some(callback);
2020        self
2021    }
2022
2023    /// Set learned permissions for persisting AlwaysAllow/AlwaysDeny decisions.
2024    pub fn learned_permissions(
2025        mut self,
2026        learned: Arc<std::sync::Mutex<super::permission::LearnedPermissions>>,
2027    ) -> Self {
2028        self.learned_permissions = Some(learned);
2029        self
2030    }
2031
2032    /// Set an LSP manager for collecting diagnostics after file-modifying tools.
2033    pub fn lsp_manager(mut self, manager: Arc<crate::lsp::LspManager>) -> Self {
2034        self.lsp_manager = Some(manager);
2035        self
2036    }
2037
2038    /// Set a callback for structured agent events.
2039    ///
2040    /// The callback receives events from the orchestrator's own agent loop **and**
2041    /// from all sub-agents (both `delegate_task` and `form_squad` paths). Sub-agent
2042    /// events carry the sub-agent name in their `agent` field for disambiguation.
2043    pub fn on_event(mut self, callback: Arc<OnEvent>) -> Self {
2044        self.on_event = Some(callback);
2045        self
2046    }
2047
2048    /// Add a single guardrail applied to the orchestrator's own agent loop.
2049    pub fn guardrail(mut self, guardrail: Arc<dyn Guardrail>) -> Self {
2050        self.guardrails.push(guardrail);
2051        self
2052    }
2053
2054    /// Add multiple guardrails to the orchestrator's own agent loop.
2055    pub fn guardrails(mut self, guardrails: Vec<Arc<dyn Guardrail>>) -> Self {
2056        self.guardrails.extend(guardrails);
2057        self
2058    }
2059
2060    /// Set a callback for structured questions from the orchestrator to the user.
2061    pub fn on_question(mut self, callback: Arc<OnQuestion>) -> Self {
2062        self.on_question = Some(callback);
2063        self
2064    }
2065
2066    /// Set a wall-clock deadline for the entire orchestrator run. If the run
2067    /// does not complete within this duration, `Error::RunTimeout` is returned.
2068    pub fn run_timeout(mut self, timeout: Duration) -> Self {
2069        self.run_timeout = Some(timeout);
2070        self
2071    }
2072
2073    /// Enable or disable the `form_squad` tool for dynamic agent squad formation.
2074    ///
2075    /// When `true`, the orchestrator can assemble temporary squads of agents to
2076    /// collaboratively solve complex subtasks. When `false`, only `delegate_task`
2077    /// is available.
2078    ///
2079    /// Default: auto-enabled when there are >= 2 agents.
2080    pub fn enable_squads(mut self, enable: bool) -> Self {
2081        self.enable_squads = Some(enable);
2082        self
2083    }
2084
2085    /// Set reasoning/thinking effort level for the orchestrator's own LLM calls.
2086    pub fn reasoning_effort(mut self, effort: crate::llm::types::ReasoningEffort) -> Self {
2087        self.reasoning_effort = Some(effort);
2088        self
2089    }
2090
2091    /// Enable or disable reflection prompts after tool results.
2092    pub fn enable_reflection(mut self, enabled: bool) -> Self {
2093        self.enable_reflection = enabled;
2094        self
2095    }
2096
2097    /// Compress tool outputs larger than `threshold` bytes before including in context.
2098    pub fn tool_output_compression_threshold(mut self, threshold: usize) -> Self {
2099        self.tool_output_compression_threshold = Some(threshold);
2100        self
2101    }
2102
2103    /// Limit the number of tools the LLM may call in a single turn.
2104    pub fn max_tools_per_turn(mut self, max: usize) -> Self {
2105        self.max_tools_per_turn = Some(max);
2106        self
2107    }
2108
2109    /// Set the doom-loop threshold: abort after this many consecutive identical tool-call batches.
2110    pub fn max_identical_tool_calls(mut self, max: u32) -> Self {
2111        self.max_identical_tool_calls = Some(max);
2112        self
2113    }
2114
2115    /// Set the fuzzy doom-loop threshold for near-duplicate tool-call batches.
2116    pub fn max_fuzzy_identical_tool_calls(mut self, max: u32) -> Self {
2117        self.max_fuzzy_identical_tool_calls = Some(max);
2118        self
2119    }
2120
2121    /// Cap the total number of tool calls the LLM may make in a single turn.
2122    pub fn max_tool_calls_per_turn(mut self, cap: u32) -> Self {
2123        self.max_tool_calls_per_turn = Some(cap);
2124        self
2125    }
2126
2127    /// Set the permission ruleset applied to tool calls made by the orchestrator's own runner.
2128    pub fn permission_rules(mut self, rules: super::permission::PermissionRuleset) -> Self {
2129        self.permission_rules = rules;
2130        self
2131    }
2132
2133    /// Provide pre-loaded instruction text to prepend to the orchestrator's system prompt.
2134    pub fn instruction_text(mut self, text: impl Into<String>) -> Self {
2135        let text = text.into();
2136        if !text.is_empty() {
2137            self.instruction_text = Some(text);
2138        }
2139        self
2140    }
2141
2142    /// Set the observability verbosity mode for the orchestrator and its sub-agents.
2143    pub fn observability_mode(mut self, mode: super::observability::ObservabilityMode) -> Self {
2144        self.observability_mode = Some(mode);
2145        self
2146    }
2147
2148    /// Set the dispatch mode for orchestrator delegation.
2149    ///
2150    /// When `Sequential`, the delegate_task schema constrains `maxItems: 1` so
2151    /// the LLM dispatches one agent at a time. This is enforced at the JSON schema
2152    /// level, which works even with weaker models that ignore prompt instructions.
2153    pub fn dispatch_mode(mut self, mode: DispatchMode) -> Self {
2154        self.dispatch_mode = mode;
2155        self
2156    }
2157
2158    /// Set the workspace directory for all sub-agents. Each sub-agent
2159    /// shares this workspace unless overridden via `SubAgentConfig.workspace`.
2160    pub fn workspace(mut self, path: impl Into<std::path::PathBuf>) -> Self {
2161        self.workspace = Some(path.into());
2162        self
2163    }
2164
2165    /// Attach an audit trail propagated to all sub-agents.
2166    ///
2167    /// Individual sub-agents can override via `SubAgentConfig.audit_trail`.
2168    pub fn audit_trail(mut self, trail: Arc<dyn super::audit::AuditTrail>) -> Self {
2169        self.audit_trail = Some(trail);
2170        self
2171    }
2172
2173    /// Set user context for multi-tenant audit enrichment.
2174    /// Propagated to all sub-agents and the orchestrator's own runner.
2175    pub fn audit_user_context(
2176        mut self,
2177        user_id: impl Into<String>,
2178        tenant_id: impl Into<String>,
2179    ) -> Self {
2180        self.audit_user_id = Some(user_id.into());
2181        self.audit_tenant_id = Some(tenant_id.into());
2182        self
2183    }
2184
2185    /// Set the delegation chain for all sub-agent audit records.
2186    pub fn audit_delegation_chain(mut self, chain: Vec<String>) -> Self {
2187        self.audit_delegation_chain = chain;
2188        self
2189    }
2190
2191    /// Build the [`Orchestrator`], validating all sub-agent definitions.
2192    pub fn build(mut self) -> Result<Orchestrator<P>, Error> {
2193        // Append multi-agent collaboration prompt to each sub-agent's system prompt
2194        if self.multi_agent_prompt {
2195            for agent in &mut self.sub_agents {
2196                agent
2197                    .system_prompt
2198                    .push_str(&crate::agent::prompts::render_collab_prompt(
2199                        &agent.name,
2200                        &agent.description,
2201                    ));
2202            }
2203        }
2204
2205        // Validate sub-agent definitions
2206        {
2207            let mut seen = std::collections::HashSet::new();
2208            for agent in &self.sub_agents {
2209                if agent.name.is_empty() {
2210                    return Err(Error::Config("sub-agent name must not be empty".into()));
2211                }
2212                if !seen.insert(&agent.name) {
2213                    return Err(Error::Config(format!(
2214                        "duplicate sub-agent name: '{}'",
2215                        agent.name
2216                    )));
2217                }
2218                if agent.max_turns == Some(0) {
2219                    return Err(Error::Config(format!(
2220                        "sub-agent '{}': max_turns must be > 0",
2221                        agent.name
2222                    )));
2223                }
2224                if agent.max_tokens == Some(0) {
2225                    return Err(Error::Config(format!(
2226                        "sub-agent '{}': max_tokens must be > 0",
2227                        agent.name
2228                    )));
2229                }
2230            }
2231        }
2232
2233        if self.sub_agents.is_empty() {
2234            tracing::warn!(
2235                "orchestrator built with no sub-agents — delegate_task tool will list no agents"
2236            );
2237        }
2238
2239        // Sequential dispatch and squads are incompatible — form_squad runs agents
2240        // in parallel, which defeats sequential ordering. Force squads off.
2241        let squads_enabled = if self.dispatch_mode == DispatchMode::Sequential {
2242            false
2243        } else {
2244            self.enable_squads.unwrap_or(self.sub_agents.len() >= 2)
2245        };
2246        if squads_enabled && self.sub_agents.len() < 2 {
2247            tracing::warn!(
2248                "enable_squads is true but fewer than 2 agents are registered — \
2249                 form_squad requires at least 2 agents to be useful"
2250            );
2251        }
2252
2253        let tool_names: Vec<Vec<String>> = self
2254            .sub_agents
2255            .iter()
2256            .map(|a| a.tools.iter().map(|t| t.definition().name).collect())
2257            .collect();
2258        let triples: Vec<(&str, &str, &[String])> = self
2259            .sub_agents
2260            .iter()
2261            .zip(tool_names.iter())
2262            .map(|(a, names)| (a.name.as_str(), a.description.as_str(), names.as_slice()))
2263            .collect();
2264        let mut system = build_system_prompt(&triples, squads_enabled, self.dispatch_mode);
2265        // Append user identity context so the orchestrator knows who it is serving.
2266        if let (Some(uid), Some(tid)) = (&self.audit_user_id, &self.audit_tenant_id) {
2267            system.push_str(&format!(
2268                "\n---\nYou are operating on behalf of **{uid}** in organization **{tid}**.\nKeep this user's information private. Do not share their data with other users."
2269            ));
2270        }
2271        let cached_definition = build_delegate_tool_schema(&triples, self.dispatch_mode);
2272        let form_squad_definition = if squads_enabled {
2273            Some(build_form_squad_tool_schema(&triples))
2274        } else {
2275            None
2276        };
2277        // Drop borrows on self.sub_agents so we can move it below
2278        drop(triples);
2279        drop(tool_names);
2280
2281        let sub_agent_tokens = Arc::new(Mutex::new(TokenUsage::default()));
2282
2283        let shared_provider = Arc::new(BoxedProvider::from_arc(self.provider.clone()));
2284
2285        // Clone agent pool for FormSquadTool before moving into DelegateTaskTool
2286        let agent_pool = if squads_enabled {
2287            Some(self.sub_agents.clone())
2288        } else {
2289            None
2290        };
2291
2292        let resolved_mode = self
2293            .observability_mode
2294            .unwrap_or(super::observability::ObservabilityMode::Production);
2295
2296        // Build spawn_agent tool pool and validate before moving shared state
2297        let spawn_tool_data = if let Some(spawn_cfg) = self.spawn_config.take() {
2298            let mut tool_pool = std::collections::HashMap::new();
2299            for tool in &self.spawn_builtin_tools {
2300                let name = tool.definition().name;
2301                if spawn_cfg.tool_allowlist.contains(&name) {
2302                    tool_pool.insert(name, tool.clone());
2303                }
2304            }
2305            // Validate all allowlist entries exist
2306            for allowed in &spawn_cfg.tool_allowlist {
2307                if !tool_pool.contains_key(allowed) {
2308                    return Err(Error::Config(format!(
2309                        "orchestrator.spawn.tool_allowlist: unknown tool '{}'. \
2310                         Available builtin tools: [{}]",
2311                        allowed,
2312                        self.spawn_builtin_tools
2313                            .iter()
2314                            .map(|t| t.definition().name)
2315                            .collect::<Vec<_>>()
2316                            .join(", ")
2317                    )));
2318                }
2319            }
2320            // Ensure no delegation tools leak into the pool
2321            tool_pool.remove("delegate_task");
2322            tool_pool.remove("form_squad");
2323            tool_pool.remove("spawn_agent");
2324
2325            let cached_definition = SpawnAgentTool::build_definition(&spawn_cfg);
2326
2327            system.push_str(
2328                "\n\n## Dynamic Agent Spawning\n\
2329                 You also have the **spawn_agent** tool to create specialist agents at runtime \
2330                 when no pre-configured agent fits the task. Use this as a secondary option — \
2331                 prefer delegating to existing agents when they match the need.",
2332            );
2333
2334            Some((spawn_cfg, tool_pool, cached_definition))
2335        } else {
2336            None
2337        };
2338
2339        let delegate_tool: Arc<dyn Tool> = Arc::new(DelegateTaskTool {
2340            shared_provider: shared_provider.clone(),
2341            sub_agents: self.sub_agents,
2342            max_turns: self.max_turns,
2343            max_tokens: self.max_tokens,
2344            permission_rules: self.permission_rules.clone(),
2345            accumulated_tokens: sub_agent_tokens.clone(),
2346            shared_memory: self.shared_memory.clone(),
2347            memory_namespace_prefix: self.memory_namespace_prefix.clone(),
2348            blackboard: self.blackboard.clone(),
2349            knowledge_base: self.knowledge_base.clone(),
2350            cached_definition,
2351            on_event: self.on_event.clone(),
2352            on_text: self.on_text.clone(),
2353            lsp_manager: self.lsp_manager.clone(),
2354            observability_mode: resolved_mode,
2355            allow_shared_write: self.allow_shared_write,
2356            tenant_tracker: self.tenant_tracker.clone(),
2357            guardrails: self.guardrails.clone(),
2358        });
2359
2360        let mut runner_builder = AgentRunner::builder(self.provider)
2361            .name("orchestrator")
2362            .system_prompt(system)
2363            .tool(delegate_tool)
2364            .max_turns(self.max_turns)
2365            .max_tokens(self.max_tokens);
2366
2367        // Register form_squad tool when enabled
2368        if let Some(agent_pool) = agent_pool {
2369            // SAFETY: form_squad_definition is always Some when agent_pool is Some
2370            let squad_def = form_squad_definition.expect("squad definition computed when enabled");
2371            let form_squad_tool: Arc<dyn Tool> = Arc::new(FormSquadTool {
2372                shared_provider: shared_provider.clone(),
2373                agent_pool,
2374                default_max_turns: self.max_turns,
2375                default_max_tokens: self.max_tokens,
2376                permission_rules: self.permission_rules.clone(),
2377                accumulated_tokens: sub_agent_tokens.clone(),
2378                shared_memory: self.shared_memory.clone(),
2379                memory_namespace_prefix: self.memory_namespace_prefix.clone(),
2380                blackboard: self.blackboard.clone(),
2381                knowledge_base: self.knowledge_base.clone(),
2382                on_event: self.on_event.clone(),
2383                on_text: self.on_text.clone(),
2384                lsp_manager: self.lsp_manager.clone(),
2385                cached_definition: squad_def,
2386                observability_mode: resolved_mode,
2387                allow_shared_write: self.allow_shared_write,
2388                tenant_tracker: self.tenant_tracker.clone(),
2389            });
2390            runner_builder = runner_builder.tool(form_squad_tool);
2391        }
2392
2393        // Register spawn_agent tool when configured
2394        if let Some((spawn_cfg, tool_pool, spawn_def)) = spawn_tool_data {
2395            let spawn_tool: Arc<dyn Tool> = Arc::new(SpawnAgentTool {
2396                shared_provider: shared_provider.clone(),
2397                spawn_config: spawn_cfg,
2398                tool_pool,
2399                spawn_count: Arc::new(std::sync::atomic::AtomicU32::new(0)),
2400                spawned_names: Arc::new(Mutex::new(std::collections::HashSet::new())),
2401                accumulated_tokens: sub_agent_tokens.clone(),
2402                permission_rules: self.permission_rules.clone(),
2403                shared_memory: self.shared_memory.clone(),
2404                memory_namespace_prefix: self.memory_namespace_prefix.clone(),
2405                on_event: self.on_event.clone(),
2406                on_text: self.on_text.clone(),
2407                lsp_manager: self.lsp_manager.clone(),
2408                observability_mode: resolved_mode,
2409                workspace: self.workspace.clone(),
2410                guardrails: self.guardrails.clone(),
2411                audit_trail: self.audit_trail.clone(),
2412                audit_user_id: self.audit_user_id.clone(),
2413                audit_tenant_id: self.audit_tenant_id.clone(),
2414                audit_delegation_chain: self.audit_delegation_chain.clone(),
2415                cached_definition: spawn_def,
2416                tenant_tracker: self.tenant_tracker.clone(),
2417            });
2418            runner_builder = runner_builder.tool(spawn_tool);
2419        }
2420
2421        // Give the orchestrator itself memory tools so it can recall/store memories
2422        // directly, without needing to delegate to a sub-agent.
2423        if let Some(ref memory) = self.shared_memory {
2424            let orch_ns = self
2425                .memory_namespace_prefix
2426                .as_deref()
2427                .unwrap_or("orchestrator");
2428            let mem_scope = crate::auth::TenantScope::from_audit_fields(
2429                self.audit_tenant_id.as_deref(),
2430                self.audit_user_id.as_deref(),
2431            );
2432            let mem_tools = crate::memory::shared_tools::shared_memory_tools(
2433                memory.clone(),
2434                orch_ns,
2435                mem_scope,
2436                self.allow_shared_write,
2437            );
2438            runner_builder = runner_builder.tools(mem_tools);
2439        }
2440
2441        if let Some(strategy) = self.context_strategy {
2442            runner_builder = runner_builder.context_strategy(strategy);
2443        }
2444        if let Some(threshold) = self.summarize_threshold {
2445            runner_builder = runner_builder.summarize_threshold(threshold);
2446        }
2447        if let Some(timeout) = self.tool_timeout {
2448            runner_builder = runner_builder.tool_timeout(timeout);
2449        }
2450        if let Some(max) = self.max_tool_output_bytes {
2451            runner_builder = runner_builder.max_tool_output_bytes(max);
2452        }
2453        if let Some(on_text) = self.on_text {
2454            runner_builder = runner_builder.on_text(on_text);
2455        }
2456        if let Some(on_approval) = self.on_approval {
2457            runner_builder = runner_builder.on_approval(on_approval);
2458        }
2459        if let Some(learned) = self.learned_permissions {
2460            runner_builder = runner_builder.learned_permissions(learned);
2461        }
2462        if let Some(lsp) = self.lsp_manager {
2463            runner_builder = runner_builder.lsp_manager(lsp);
2464        }
2465        if let Some(on_event) = self.on_event {
2466            runner_builder = runner_builder.on_event(on_event);
2467        }
2468        if !self.guardrails.is_empty() {
2469            runner_builder = runner_builder.guardrails(self.guardrails);
2470        }
2471        if let Some(on_question) = self.on_question {
2472            runner_builder = runner_builder.on_question(on_question);
2473        }
2474        if let Some(timeout) = self.run_timeout {
2475            runner_builder = runner_builder.run_timeout(timeout);
2476        }
2477        if let Some(effort) = self.reasoning_effort {
2478            runner_builder = runner_builder.reasoning_effort(effort);
2479        }
2480        if self.enable_reflection {
2481            runner_builder = runner_builder.enable_reflection(true);
2482        }
2483        if let Some(threshold) = self.tool_output_compression_threshold {
2484            runner_builder = runner_builder.tool_output_compression_threshold(threshold);
2485        }
2486        if let Some(max) = self.max_tools_per_turn {
2487            runner_builder = runner_builder.max_tools_per_turn(max);
2488        }
2489        if let Some(max) = self.max_identical_tool_calls {
2490            runner_builder = runner_builder.max_identical_tool_calls(max);
2491        }
2492        if let Some(max) = self.max_fuzzy_identical_tool_calls {
2493            runner_builder = runner_builder.max_fuzzy_identical_tool_calls(max);
2494        }
2495        if let Some(cap) = self.max_tool_calls_per_turn {
2496            runner_builder = runner_builder.max_tool_calls_per_turn(cap);
2497        }
2498        if !self.permission_rules.is_empty() {
2499            runner_builder = runner_builder.permission_rules(self.permission_rules);
2500        }
2501        if let Some(text) = self.instruction_text {
2502            runner_builder = runner_builder.instruction_text(text);
2503        }
2504        if let Some(mode) = self.observability_mode {
2505            runner_builder = runner_builder.observability_mode(mode);
2506        }
2507        if let Some(trail) = self.audit_trail {
2508            runner_builder = runner_builder.audit_trail(trail);
2509        }
2510        if let Some(uid) = self.audit_user_id
2511            && let Some(tid) = self.audit_tenant_id
2512        {
2513            runner_builder = runner_builder.audit_user_context(uid, tid);
2514        }
2515        if !self.audit_delegation_chain.is_empty() {
2516            runner_builder =
2517                runner_builder.audit_delegation_chain(self.audit_delegation_chain.clone());
2518        }
2519        if let Some(tracker) = self.tenant_tracker {
2520            runner_builder = runner_builder.tenant_tracker(tracker);
2521        }
2522
2523        let runner = runner_builder.build()?;
2524
2525        Ok(Orchestrator {
2526            runner,
2527            sub_agent_tokens,
2528        })
2529    }
2530}
2531
2532#[cfg(test)]
2533mod tests {
2534    use super::*;
2535    use crate::llm::types::{
2536        CompletionRequest, CompletionResponse, ContentBlock, StopReason, TokenUsage,
2537    };
2538    use crate::tool::ToolOutput;
2539    use std::sync::Mutex;
2540
2541    struct MockProvider {
2542        responses: Mutex<Vec<CompletionResponse>>,
2543    }
2544
2545    impl MockProvider {
2546        fn new(responses: Vec<CompletionResponse>) -> Self {
2547            Self {
2548                responses: Mutex::new(responses),
2549            }
2550        }
2551    }
2552
2553    impl LlmProvider for MockProvider {
2554        async fn complete(&self, _request: CompletionRequest) -> Result<CompletionResponse, Error> {
2555            let mut responses = self.responses.lock().expect("mock lock poisoned");
2556            if responses.is_empty() {
2557                return Err(Error::Agent("no more mock responses".into()));
2558            }
2559            Ok(responses.remove(0))
2560        }
2561
2562        fn model_name(&self) -> Option<&str> {
2563            Some("mock-model-v1")
2564        }
2565    }
2566
2567    /// A mock tool for orchestrator tests. Returns a fixed response.
2568    struct MockTool {
2569        def: crate::llm::types::ToolDefinition,
2570        response: String,
2571    }
2572
2573    impl MockTool {
2574        fn new(name: &str, response: &str) -> Self {
2575            Self {
2576                def: crate::llm::types::ToolDefinition {
2577                    name: name.into(),
2578                    description: format!("Mock {name}"),
2579                    input_schema: json!({"type": "object"}),
2580                },
2581                response: response.into(),
2582            }
2583        }
2584    }
2585
2586    impl crate::tool::Tool for MockTool {
2587        fn definition(&self) -> crate::llm::types::ToolDefinition {
2588            self.def.clone()
2589        }
2590
2591        fn execute(
2592            &self,
2593            _ctx: &crate::ExecutionContext,
2594            _input: serde_json::Value,
2595        ) -> std::pin::Pin<
2596            Box<dyn std::future::Future<Output = Result<ToolOutput, Error>> + Send + '_>,
2597        > {
2598            let response = self.response.clone();
2599            Box::pin(async move { Ok(ToolOutput::success(response)) })
2600        }
2601    }
2602
2603    #[test]
2604    fn system_prompt_includes_agents() {
2605        let tools_a = vec!["web_search".to_string(), "read_file".to_string()];
2606        let tools_b: Vec<String> = vec![];
2607        let agents: Vec<(&str, &str, &[String])> = vec![
2608            ("researcher", "Research specialist", tools_a.as_slice()),
2609            ("coder", "Coding expert", tools_b.as_slice()),
2610        ];
2611
2612        let prompt = build_system_prompt(&agents, false, DispatchMode::Parallel);
2613        assert!(prompt.contains("researcher"));
2614        assert!(prompt.contains("Research specialist"));
2615        assert!(prompt.contains("coder"));
2616        assert!(prompt.contains("Tools: web_search, read_file"));
2617        assert!(prompt.contains("Tools: (none)"));
2618        // New structured sections
2619        assert!(
2620            prompt.contains("Decision Process"),
2621            "prompt should contain Decision Process section: {prompt}"
2622        );
2623        assert!(
2624            prompt.contains("Effort Scaling"),
2625            "prompt should contain Effort Scaling section: {prompt}"
2626        );
2627        assert!(
2628            prompt.contains("Task Quality"),
2629            "prompt should contain Task Quality section: {prompt}"
2630        );
2631        assert!(
2632            prompt.contains("DECOMPOSE"),
2633            "prompt should contain decomposition guidance: {prompt}"
2634        );
2635    }
2636
2637    #[test]
2638    fn system_prompt_shows_tool_names() {
2639        let tools = vec![
2640            "web_search".to_string(),
2641            "read_file".to_string(),
2642            "knowledge_search".to_string(),
2643        ];
2644        let no_tools: Vec<String> = vec![];
2645        let agents: Vec<(&str, &str, &[String])> = vec![
2646            ("researcher", "Research specialist", tools.as_slice()),
2647            ("analyst", "Analytical thinker", no_tools.as_slice()),
2648        ];
2649
2650        let prompt = build_system_prompt(&agents, false, DispatchMode::Parallel);
2651        assert!(
2652            prompt.contains("Tools: web_search, read_file, knowledge_search"),
2653            "prompt should list tool names: {prompt}"
2654        );
2655        assert!(
2656            prompt.contains("Tools: (none)"),
2657            "prompt should show (none) for agents without tools: {prompt}"
2658        );
2659    }
2660
2661    #[test]
2662    fn system_prompt_sequential_says_one_at_a_time() {
2663        let tools: Vec<String> = vec![];
2664        let agents: Vec<(&str, &str, &[String])> =
2665            vec![("builder", "Builds stuff", tools.as_slice())];
2666        let prompt = build_system_prompt(&agents, false, DispatchMode::Sequential);
2667        assert!(
2668            prompt.contains("ONE agent at a time"),
2669            "sequential prompt should say one at a time: {prompt}"
2670        );
2671        assert!(
2672            !prompt.contains("parallel execution"),
2673            "sequential prompt should not mention parallel: {prompt}"
2674        );
2675    }
2676
2677    #[test]
2678    fn system_prompt_parallel_says_parallel() {
2679        let tools: Vec<String> = vec![];
2680        let agents: Vec<(&str, &str, &[String])> =
2681            vec![("builder", "Builds stuff", tools.as_slice())];
2682        let prompt = build_system_prompt(&agents, false, DispatchMode::Parallel);
2683        assert!(
2684            prompt.contains("parallel execution"),
2685            "parallel prompt should mention parallel: {prompt}"
2686        );
2687    }
2688
2689    #[test]
2690    fn delegate_schema_sequential_max_items_1() {
2691        let tools: Vec<String> = vec![];
2692        let agents: Vec<(&str, &str, &[String])> =
2693            vec![("builder", "Builds stuff", tools.as_slice())];
2694        let def = build_delegate_tool_schema(&agents, DispatchMode::Sequential);
2695        let tasks = &def.input_schema["properties"]["tasks"];
2696        assert_eq!(
2697            tasks["maxItems"], 1,
2698            "sequential schema should have maxItems=1: {tasks}"
2699        );
2700        assert!(
2701            def.description.contains("ONE sub-agent"),
2702            "sequential description should say ONE: {}",
2703            def.description
2704        );
2705    }
2706
2707    #[test]
2708    fn delegate_schema_parallel_no_max_items() {
2709        let tools: Vec<String> = vec![];
2710        let agents: Vec<(&str, &str, &[String])> =
2711            vec![("builder", "Builds stuff", tools.as_slice())];
2712        let def = build_delegate_tool_schema(&agents, DispatchMode::Parallel);
2713        let tasks = &def.input_schema["properties"]["tasks"];
2714        assert!(
2715            tasks.get("maxItems").is_none(),
2716            "parallel schema should not have maxItems: {tasks}"
2717        );
2718    }
2719
2720    #[tokio::test]
2721    async fn sequential_dispatch_disables_squads() {
2722        // With 2 agents, squads auto-enable. Sequential mode should force them off.
2723        // We verify by running the orchestrator and checking that the LLM request
2724        // only contains delegate_task (not form_squad).
2725        let provider = Arc::new(MockProvider::new(vec![CompletionResponse {
2726            content: vec![ContentBlock::Text {
2727                text: "done".into(),
2728            }],
2729            stop_reason: StopReason::EndTurn,
2730            usage: TokenUsage::default(),
2731            model: None,
2732        }]));
2733        let mut orch = Orchestrator::builder(provider)
2734            .sub_agent("a", "Agent A", "prompt a")
2735            .sub_agent("b", "Agent B", "prompt b")
2736            .dispatch_mode(DispatchMode::Sequential)
2737            .build()
2738            .unwrap();
2739        let output = orch.run("test").await.unwrap();
2740        assert_eq!(output.result, "done");
2741        // The system prompt should NOT mention form_squad
2742        // (indirectly verifies squads were disabled)
2743    }
2744
2745    #[test]
2746    fn sequential_dispatch_disables_squads_in_prompt() {
2747        let tools: Vec<String> = vec![];
2748        let agents: Vec<(&str, &str, &[String])> = vec![
2749            ("a", "Agent A", tools.as_slice()),
2750            ("b", "Agent B", tools.as_slice()),
2751        ];
2752        // Sequential + squads_enabled=true should still produce a prompt without form_squad
2753        let prompt = build_system_prompt(&agents, false, DispatchMode::Sequential);
2754        assert!(
2755            !prompt.contains("form_squad"),
2756            "sequential prompt should not mention form_squad: {prompt}"
2757        );
2758    }
2759
2760    #[test]
2761    fn delegate_tool_schema_includes_agents() {
2762        let tools = vec!["web_search".to_string()];
2763        let agents: Vec<(&str, &str, &[String])> =
2764            vec![("researcher", "Research", tools.as_slice())];
2765        let def = build_delegate_tool_schema(&agents, DispatchMode::Parallel);
2766        assert_eq!(def.name, "delegate_task");
2767        assert!(def.description.contains("researcher"));
2768        assert!(
2769            def.description.contains("web_search"),
2770            "delegate tool description should contain tool names: {}",
2771            def.description
2772        );
2773        assert!(
2774            def.description.contains("isolation"),
2775            "delegate tool description should mention isolation: {}",
2776            def.description
2777        );
2778        assert!(
2779            def.description.contains("self-contained"),
2780            "delegate tool description should mention self-contained tasks: {}",
2781            def.description
2782        );
2783    }
2784
2785    #[test]
2786    fn delegate_tool_definition_includes_agents() {
2787        let agents: Vec<(&str, &str, &[String])> = vec![("researcher", "Research", &[])];
2788        let cached_definition = build_delegate_tool_schema(&agents, DispatchMode::Parallel);
2789
2790        let tool = DelegateTaskTool {
2791            shared_provider: Arc::new(BoxedProvider::new(MockProvider::new(vec![]))),
2792            sub_agents: vec![SubAgentDef {
2793                name: "researcher".into(),
2794                description: "Research".into(),
2795                system_prompt: "prompt".into(),
2796                tools: vec![],
2797                context_strategy: None,
2798                summarize_threshold: None,
2799                tool_timeout: None,
2800                max_tool_output_bytes: None,
2801                max_turns: None,
2802                max_tokens: None,
2803                response_schema: None,
2804                run_timeout: None,
2805                guardrails: vec![],
2806                provider_override: None,
2807                reasoning_effort: None,
2808                enable_reflection: None,
2809                tool_output_compression_threshold: None,
2810                max_tools_per_turn: None,
2811                tool_profile: None,
2812                max_identical_tool_calls: None,
2813                max_fuzzy_identical_tool_calls: None,
2814                max_tool_calls_per_turn: None,
2815                session_prune_config: None,
2816                enable_recursive_summarization: None,
2817                reflection_threshold: None,
2818                consolidate_on_exit: None,
2819                workspace: None,
2820                max_total_tokens: None,
2821                audit_trail: None,
2822                audit_user_id: None,
2823                audit_tenant_id: None,
2824                audit_delegation_chain: Vec::new(),
2825            }],
2826            shared_memory: None,
2827            memory_namespace_prefix: None,
2828            blackboard: None,
2829            knowledge_base: None,
2830            max_turns: 10,
2831            max_tokens: 4096,
2832            permission_rules: crate::agent::permission::PermissionRuleset::default(),
2833            accumulated_tokens: Arc::new(Mutex::new(TokenUsage::default())),
2834            cached_definition,
2835            on_event: None,
2836            on_text: None,
2837            lsp_manager: None,
2838            observability_mode: crate::ObservabilityMode::Production,
2839            allow_shared_write: true,
2840            tenant_tracker: None,
2841            guardrails: vec![],
2842        };
2843
2844        let def = tool.definition();
2845        assert_eq!(def.name, "delegate_task");
2846        assert!(def.description.contains("researcher"));
2847        assert!(
2848            def.description.contains("tools"),
2849            "delegate tool description should contain 'tools' key: {}",
2850            def.description
2851        );
2852    }
2853
2854    #[test]
2855    fn build_errors_on_duplicate_sub_agent_names() {
2856        let provider = Arc::new(MockProvider::new(vec![]));
2857        let result = Orchestrator::builder(provider)
2858            .sub_agent("researcher", "Research 1", "prompt1")
2859            .sub_agent("researcher", "Research 2", "prompt2")
2860            .build();
2861        assert!(result.is_err());
2862        let err = result.err().unwrap();
2863        assert!(
2864            err.to_string()
2865                .contains("duplicate sub-agent name: 'researcher'"),
2866            "error: {err}"
2867        );
2868    }
2869
2870    #[tokio::test]
2871    async fn orchestrator_direct_response_no_delegation() {
2872        let provider = Arc::new(MockProvider::new(vec![CompletionResponse {
2873            content: vec![ContentBlock::Text {
2874                text: "Simple answer.".into(),
2875            }],
2876            stop_reason: StopReason::EndTurn,
2877            usage: TokenUsage {
2878                input_tokens: 10,
2879                output_tokens: 5,
2880                ..Default::default()
2881            },
2882            model: None,
2883        }]));
2884
2885        let mut orch = Orchestrator::builder(provider)
2886            .sub_agent("researcher", "Research", "prompt")
2887            .build()
2888            .unwrap();
2889
2890        let output = orch.run("simple question").await.unwrap();
2891        assert_eq!(output.result, "Simple answer.");
2892        assert_eq!(output.tool_calls_made, 0);
2893    }
2894
2895    #[tokio::test]
2896    async fn orchestrator_delegates_and_synthesizes() {
2897        // Responses consumed in order by provider. The orchestrator calls provider,
2898        // then sub-agents call provider (in spawn order under single-threaded tokio test runtime),
2899        // then orchestrator calls provider again for synthesis.
2900        let provider = Arc::new(MockProvider::new(vec![
2901            // 1: Orchestrator decides to delegate
2902            CompletionResponse {
2903                content: vec![ContentBlock::ToolUse {
2904                    id: "call-1".into(),
2905                    name: "delegate_task".into(),
2906                    input: json!({
2907                        "tasks": [
2908                            {"agent": "researcher", "task": "Research Rust"},
2909                            {"agent": "analyst", "task": "Analyze findings"}
2910                        ]
2911                    }),
2912                }],
2913                stop_reason: StopReason::ToolUse,
2914                usage: TokenUsage {
2915                    input_tokens: 50,
2916                    output_tokens: 20,
2917                    ..Default::default()
2918                },
2919                model: None,
2920            },
2921            // 2: Sub-agent "researcher" response
2922            CompletionResponse {
2923                content: vec![ContentBlock::Text {
2924                    text: "Rust is fast and safe.".into(),
2925                }],
2926                stop_reason: StopReason::EndTurn,
2927                usage: TokenUsage {
2928                    input_tokens: 10,
2929                    output_tokens: 8,
2930                    ..Default::default()
2931                },
2932                model: None,
2933            },
2934            // 3: Sub-agent "analyst" response
2935            CompletionResponse {
2936                content: vec![ContentBlock::Text {
2937                    text: "Strengths: memory safety, performance.".into(),
2938                }],
2939                stop_reason: StopReason::EndTurn,
2940                usage: TokenUsage {
2941                    input_tokens: 12,
2942                    output_tokens: 10,
2943                    ..Default::default()
2944                },
2945                model: None,
2946            },
2947            // 4: Orchestrator synthesis
2948            CompletionResponse {
2949                content: vec![ContentBlock::Text {
2950                    text: "Based on research: Rust is excellent.".into(),
2951                }],
2952                stop_reason: StopReason::EndTurn,
2953                usage: TokenUsage {
2954                    input_tokens: 80,
2955                    output_tokens: 30,
2956                    ..Default::default()
2957                },
2958                model: None,
2959            },
2960        ]));
2961
2962        let mut orch = Orchestrator::builder(provider)
2963            .sub_agent("researcher", "Research specialist", "You research.")
2964            .sub_agent("analyst", "Analysis expert", "You analyze.")
2965            .build()
2966            .unwrap();
2967
2968        let output = orch.run("Analyze Rust").await.unwrap();
2969        assert_eq!(output.result, "Based on research: Rust is excellent.");
2970        assert_eq!(output.tool_calls_made, 1); // one delegate_task call
2971        // Orchestrator tokens (50+80 in, 20+30 out) + sub-agent tokens (10+12 in, 8+10 out)
2972        assert_eq!(output.tokens_used.input_tokens, 50 + 80 + 10 + 12);
2973        assert_eq!(output.tokens_used.output_tokens, 20 + 30 + 8 + 10);
2974    }
2975
2976    #[tokio::test]
2977    async fn orchestrator_handles_unknown_agent_gracefully() {
2978        // Unknown agent now returns error in tool result, not a hard crash
2979        let provider = Arc::new(MockProvider::new(vec![
2980            CompletionResponse {
2981                content: vec![ContentBlock::ToolUse {
2982                    id: "call-1".into(),
2983                    name: "delegate_task".into(),
2984                    input: json!({
2985                        "tasks": [{"agent": "nonexistent", "task": "do stuff"}]
2986                    }),
2987                }],
2988                stop_reason: StopReason::ToolUse,
2989                usage: TokenUsage::default(),
2990                model: None,
2991            },
2992            // Orchestrator recovers after seeing the error
2993            CompletionResponse {
2994                content: vec![ContentBlock::Text {
2995                    text: "No such agent available.".into(),
2996                }],
2997                stop_reason: StopReason::EndTurn,
2998                usage: TokenUsage::default(),
2999                model: None,
3000            },
3001        ]));
3002
3003        let mut orch = Orchestrator::builder(provider)
3004            .sub_agent("researcher", "Research", "prompt")
3005            .build()
3006            .unwrap();
3007
3008        let output = orch.run("delegate to unknown").await.unwrap();
3009        assert_eq!(output.result, "No such agent available.");
3010    }
3011
3012    #[tokio::test]
3013    async fn orchestrator_handles_invalid_tool_name() {
3014        let provider = Arc::new(MockProvider::new(vec![
3015            CompletionResponse {
3016                content: vec![ContentBlock::ToolUse {
3017                    id: "call-1".into(),
3018                    name: "wrong_tool".into(),
3019                    input: json!({}),
3020                }],
3021                stop_reason: StopReason::ToolUse,
3022                usage: TokenUsage::default(),
3023                model: None,
3024            },
3025            CompletionResponse {
3026                content: vec![ContentBlock::Text {
3027                    text: "Sorry, let me respond directly.".into(),
3028                }],
3029                stop_reason: StopReason::EndTurn,
3030                usage: TokenUsage::default(),
3031                model: None,
3032            },
3033        ]));
3034
3035        let mut orch = Orchestrator::builder(provider)
3036            .sub_agent("researcher", "Research", "prompt")
3037            .build()
3038            .unwrap();
3039
3040        let output = orch.run("do something").await.unwrap();
3041        assert_eq!(output.result, "Sorry, let me respond directly.");
3042    }
3043
3044    #[tokio::test]
3045    async fn orchestrator_handles_empty_delegate_tasks() {
3046        let provider = Arc::new(MockProvider::new(vec![
3047            // 1: LLM sends delegate_task with empty tasks array
3048            CompletionResponse {
3049                content: vec![ContentBlock::ToolUse {
3050                    id: "call-1".into(),
3051                    name: "delegate_task".into(),
3052                    input: json!({"tasks": []}),
3053                }],
3054                stop_reason: StopReason::ToolUse,
3055                usage: TokenUsage::default(),
3056                model: None,
3057            },
3058            // 2: LLM recovers after seeing the error
3059            CompletionResponse {
3060                content: vec![ContentBlock::Text {
3061                    text: "Let me try again properly.".into(),
3062                }],
3063                stop_reason: StopReason::EndTurn,
3064                usage: TokenUsage::default(),
3065                model: None,
3066            },
3067        ]));
3068
3069        let mut orch = Orchestrator::builder(provider)
3070            .sub_agent("researcher", "Research", "prompt")
3071            .build()
3072            .unwrap();
3073
3074        let output = orch.run("do something").await.unwrap();
3075        assert_eq!(output.result, "Let me try again properly.");
3076    }
3077
3078    #[tokio::test]
3079    async fn orchestrator_handles_missing_tasks_field() {
3080        let provider = Arc::new(MockProvider::new(vec![
3081            // 1: LLM sends delegate_task without tasks field
3082            CompletionResponse {
3083                content: vec![ContentBlock::ToolUse {
3084                    id: "call-1".into(),
3085                    name: "delegate_task".into(),
3086                    input: json!({}),
3087                }],
3088                stop_reason: StopReason::ToolUse,
3089                usage: TokenUsage::default(),
3090                model: None,
3091            },
3092            // 2: LLM recovers after seeing the parse error
3093            CompletionResponse {
3094                content: vec![ContentBlock::Text {
3095                    text: "I need to format correctly.".into(),
3096                }],
3097                stop_reason: StopReason::EndTurn,
3098                usage: TokenUsage::default(),
3099                model: None,
3100            },
3101        ]));
3102
3103        let mut orch = Orchestrator::builder(provider)
3104            .sub_agent("researcher", "Research", "prompt")
3105            .build()
3106            .unwrap();
3107
3108        let output = orch.run("do something").await.unwrap();
3109        assert_eq!(output.result, "I need to format correctly.");
3110    }
3111
3112    #[tokio::test]
3113    async fn blackboard_populated_after_delegation() {
3114        use crate::agent::blackboard::InMemoryBlackboard;
3115
3116        let bb = Arc::new(InMemoryBlackboard::new());
3117
3118        let provider = Arc::new(MockProvider::new(vec![
3119            // 1: Orchestrator delegates to researcher
3120            CompletionResponse {
3121                content: vec![ContentBlock::ToolUse {
3122                    id: "call-1".into(),
3123                    name: "delegate_task".into(),
3124                    input: json!({
3125                        "tasks": [{"agent": "researcher", "task": "Find info"}]
3126                    }),
3127                }],
3128                stop_reason: StopReason::ToolUse,
3129                usage: TokenUsage::default(),
3130                model: None,
3131            },
3132            // 2: Sub-agent responds
3133            CompletionResponse {
3134                content: vec![ContentBlock::Text {
3135                    text: "Research result here.".into(),
3136                }],
3137                stop_reason: StopReason::EndTurn,
3138                usage: TokenUsage::default(),
3139                model: None,
3140            },
3141            // 3: Orchestrator synthesis
3142            CompletionResponse {
3143                content: vec![ContentBlock::Text {
3144                    text: "Done.".into(),
3145                }],
3146                stop_reason: StopReason::EndTurn,
3147                usage: TokenUsage::default(),
3148                model: None,
3149            },
3150        ]));
3151
3152        let mut orch = Orchestrator::builder(provider)
3153            .sub_agent("researcher", "Research specialist", "You research.")
3154            .blackboard(bb.clone())
3155            .build()
3156            .unwrap();
3157
3158        orch.run("research something").await.unwrap();
3159
3160        // Verify the blackboard has the agent result
3161        let val: Option<serde_json::Value> = bb.read("agent:researcher").await.unwrap();
3162        assert!(val.is_some(), "blackboard should have agent:researcher key");
3163        assert_eq!(
3164            val.unwrap(),
3165            serde_json::Value::String("Research result here.".into())
3166        );
3167    }
3168
3169    #[tokio::test]
3170    async fn sub_agents_receive_blackboard_tools() {
3171        use crate::agent::blackboard::InMemoryBlackboard;
3172        use crate::llm::types::CompletionRequest;
3173
3174        // Track tool definitions seen by the sub-agent
3175        struct ToolTrackingProvider {
3176            responses: Mutex<Vec<CompletionResponse>>,
3177            tool_names_seen: Mutex<Vec<Vec<String>>>,
3178        }
3179
3180        impl LlmProvider for ToolTrackingProvider {
3181            async fn complete(
3182                &self,
3183                request: CompletionRequest,
3184            ) -> Result<CompletionResponse, Error> {
3185                let names: Vec<String> = request.tools.iter().map(|t| t.name.clone()).collect();
3186                self.tool_names_seen.lock().expect("lock").push(names);
3187
3188                let mut responses = self.responses.lock().expect("lock");
3189                if responses.is_empty() {
3190                    return Err(Error::Agent("no more mock responses".into()));
3191                }
3192                Ok(responses.remove(0))
3193            }
3194        }
3195
3196        let bb = Arc::new(InMemoryBlackboard::new());
3197
3198        let provider = Arc::new(ToolTrackingProvider {
3199            responses: Mutex::new(vec![
3200                // 1: Orchestrator delegates
3201                CompletionResponse {
3202                    content: vec![ContentBlock::ToolUse {
3203                        id: "call-1".into(),
3204                        name: "delegate_task".into(),
3205                        input: json!({
3206                            "tasks": [{"agent": "worker", "task": "do work"}]
3207                        }),
3208                    }],
3209                    stop_reason: StopReason::ToolUse,
3210                    usage: TokenUsage::default(),
3211                    model: None,
3212                },
3213                // 2: Sub-agent responds
3214                CompletionResponse {
3215                    content: vec![ContentBlock::Text {
3216                        text: "Work done.".into(),
3217                    }],
3218                    stop_reason: StopReason::EndTurn,
3219                    usage: TokenUsage::default(),
3220                    model: None,
3221                },
3222                // 3: Orchestrator synthesis
3223                CompletionResponse {
3224                    content: vec![ContentBlock::Text {
3225                        text: "All done.".into(),
3226                    }],
3227                    stop_reason: StopReason::EndTurn,
3228                    usage: TokenUsage::default(),
3229                    model: None,
3230                },
3231            ]),
3232            tool_names_seen: Mutex::new(vec![]),
3233        });
3234
3235        let mut orch = Orchestrator::builder(provider.clone())
3236            .sub_agent("worker", "Worker agent", "You work.")
3237            .blackboard(bb)
3238            .build()
3239            .unwrap();
3240
3241        orch.run("do work").await.unwrap();
3242
3243        // The second LLM call is from the sub-agent — check its tools
3244        let all_tool_names = provider.tool_names_seen.lock().expect("lock");
3245        assert!(
3246            all_tool_names.len() >= 2,
3247            "expected at least 2 LLM calls, got {}",
3248            all_tool_names.len()
3249        );
3250        let sub_agent_tools = &all_tool_names[1];
3251        assert!(
3252            sub_agent_tools.contains(&"blackboard_read".to_string()),
3253            "sub-agent should have blackboard_read tool, got: {sub_agent_tools:?}"
3254        );
3255        assert!(
3256            sub_agent_tools.contains(&"blackboard_write".to_string()),
3257            "sub-agent should have blackboard_write tool, got: {sub_agent_tools:?}"
3258        );
3259        assert!(
3260            sub_agent_tools.contains(&"blackboard_list".to_string()),
3261            "sub-agent should have blackboard_list tool, got: {sub_agent_tools:?}"
3262        );
3263    }
3264
3265    #[test]
3266    fn blackboard_builder_method_works() {
3267        use crate::agent::blackboard::InMemoryBlackboard;
3268
3269        let bb = Arc::new(InMemoryBlackboard::new());
3270        let provider = Arc::new(MockProvider::new(vec![]));
3271
3272        // Should build successfully with blackboard
3273        let result = Orchestrator::builder(provider)
3274            .sub_agent("agent1", "Agent one", "You are agent 1.")
3275            .blackboard(bb)
3276            .build();
3277
3278        assert!(result.is_ok());
3279    }
3280
3281    #[test]
3282    fn knowledge_builder_method_works() {
3283        use crate::knowledge::in_memory::InMemoryKnowledgeBase;
3284
3285        let kb: Arc<dyn KnowledgeBase> = Arc::new(InMemoryKnowledgeBase::new());
3286        let provider = Arc::new(MockProvider::new(vec![]));
3287
3288        let result = Orchestrator::builder(provider)
3289            .sub_agent("agent1", "Agent one", "You are agent 1.")
3290            .knowledge(kb)
3291            .build();
3292
3293        assert!(result.is_ok());
3294    }
3295
3296    #[tokio::test]
3297    async fn sub_agents_receive_knowledge_tools() {
3298        use crate::knowledge::in_memory::InMemoryKnowledgeBase;
3299        use crate::llm::types::CompletionRequest;
3300
3301        struct ToolTrackingProvider {
3302            responses: Mutex<Vec<CompletionResponse>>,
3303            tool_names_seen: Mutex<Vec<Vec<String>>>,
3304        }
3305
3306        impl LlmProvider for ToolTrackingProvider {
3307            async fn complete(
3308                &self,
3309                request: CompletionRequest,
3310            ) -> Result<CompletionResponse, Error> {
3311                let names: Vec<String> = request.tools.iter().map(|t| t.name.clone()).collect();
3312                self.tool_names_seen.lock().expect("lock").push(names);
3313
3314                let mut responses = self.responses.lock().expect("lock");
3315                if responses.is_empty() {
3316                    return Err(Error::Agent("no more mock responses".into()));
3317                }
3318                Ok(responses.remove(0))
3319            }
3320        }
3321
3322        let kb: Arc<dyn KnowledgeBase> = Arc::new(InMemoryKnowledgeBase::new());
3323
3324        let provider = Arc::new(ToolTrackingProvider {
3325            responses: Mutex::new(vec![
3326                // 1: Orchestrator delegates
3327                CompletionResponse {
3328                    content: vec![ContentBlock::ToolUse {
3329                        id: "call-1".into(),
3330                        name: "delegate_task".into(),
3331                        input: json!({
3332                            "tasks": [{"agent": "worker", "task": "do work"}]
3333                        }),
3334                    }],
3335                    stop_reason: StopReason::ToolUse,
3336                    usage: TokenUsage::default(),
3337                    model: None,
3338                },
3339                // 2: Sub-agent responds
3340                CompletionResponse {
3341                    content: vec![ContentBlock::Text {
3342                        text: "Work done.".into(),
3343                    }],
3344                    stop_reason: StopReason::EndTurn,
3345                    usage: TokenUsage::default(),
3346                    model: None,
3347                },
3348                // 3: Orchestrator synthesis
3349                CompletionResponse {
3350                    content: vec![ContentBlock::Text {
3351                        text: "All done.".into(),
3352                    }],
3353                    stop_reason: StopReason::EndTurn,
3354                    usage: TokenUsage::default(),
3355                    model: None,
3356                },
3357            ]),
3358            tool_names_seen: Mutex::new(vec![]),
3359        });
3360
3361        let mut orch = Orchestrator::builder(provider.clone())
3362            .sub_agent("worker", "Worker agent", "You work.")
3363            .knowledge(kb)
3364            .build()
3365            .unwrap();
3366
3367        orch.run("do work").await.unwrap();
3368
3369        let all_tool_names = provider.tool_names_seen.lock().expect("lock");
3370        assert!(
3371            all_tool_names.len() >= 2,
3372            "expected at least 2 LLM calls, got {}",
3373            all_tool_names.len()
3374        );
3375        let sub_agent_tools = &all_tool_names[1];
3376        assert!(
3377            sub_agent_tools.contains(&"knowledge_search".to_string()),
3378            "sub-agent should have knowledge_search tool, got: {sub_agent_tools:?}"
3379        );
3380    }
3381
3382    #[tokio::test]
3383    async fn orchestrator_accumulates_cache_tokens_through_delegation() {
3384        let provider = Arc::new(MockProvider::new(vec![
3385            // 1: Orchestrator decides to delegate
3386            CompletionResponse {
3387                content: vec![ContentBlock::ToolUse {
3388                    id: "call-1".into(),
3389                    name: "delegate_task".into(),
3390                    input: json!({
3391                        "tasks": [{"agent": "researcher", "task": "Research Rust"}]
3392                    }),
3393                }],
3394                stop_reason: StopReason::ToolUse,
3395                usage: TokenUsage {
3396                    input_tokens: 50,
3397                    output_tokens: 20,
3398                    cache_creation_input_tokens: 100,
3399                    cache_read_input_tokens: 0,
3400                    reasoning_tokens: 0,
3401                },
3402                model: None,
3403            },
3404            // 2: Sub-agent "researcher" response (cache hit on second call)
3405            CompletionResponse {
3406                content: vec![ContentBlock::Text {
3407                    text: "Rust is fast.".into(),
3408                }],
3409                stop_reason: StopReason::EndTurn,
3410                usage: TokenUsage {
3411                    input_tokens: 10,
3412                    output_tokens: 8,
3413                    cache_creation_input_tokens: 0,
3414                    cache_read_input_tokens: 30,
3415                    reasoning_tokens: 0,
3416                },
3417                model: None,
3418            },
3419            // 3: Orchestrator synthesis (cache hit)
3420            CompletionResponse {
3421                content: vec![ContentBlock::Text {
3422                    text: "Rust is excellent.".into(),
3423                }],
3424                stop_reason: StopReason::EndTurn,
3425                usage: TokenUsage {
3426                    input_tokens: 80,
3427                    output_tokens: 30,
3428                    cache_creation_input_tokens: 0,
3429                    cache_read_input_tokens: 90,
3430                    reasoning_tokens: 0,
3431                },
3432                model: None,
3433            },
3434        ]));
3435
3436        let mut orch = Orchestrator::builder(provider)
3437            .sub_agent("researcher", "Research specialist", "You research.")
3438            .build()
3439            .unwrap();
3440
3441        let output = orch.run("Analyze Rust").await.unwrap();
3442        // Orchestrator: 50+80=130 in, 20+30=50 out, 100+0 cache_create, 0+90 cache_read
3443        // Sub-agent: 10 in, 8 out, 0 cache_create, 30 cache_read
3444        assert_eq!(output.tokens_used.input_tokens, 50 + 80 + 10);
3445        assert_eq!(output.tokens_used.output_tokens, 20 + 30 + 8);
3446        assert_eq!(output.tokens_used.cache_creation_input_tokens, 100);
3447        assert_eq!(output.tokens_used.cache_read_input_tokens, 90 + 30);
3448    }
3449
3450    #[tokio::test]
3451    async fn orchestrator_error_includes_sub_agent_tokens() {
3452        // Scenario: orchestrator delegates, sub-agent succeeds, but orchestrator
3453        // hits max turns before synthesizing. The error's partial_usage should
3454        // include both orchestrator AND sub-agent tokens.
3455        let provider = Arc::new(MockProvider::new(vec![
3456            // 1: Orchestrator delegates
3457            CompletionResponse {
3458                content: vec![ContentBlock::ToolUse {
3459                    id: "call-1".into(),
3460                    name: "delegate_task".into(),
3461                    input: json!({
3462                        "tasks": [{"agent": "researcher", "task": "Research Rust"}]
3463                    }),
3464                }],
3465                stop_reason: StopReason::ToolUse,
3466                usage: TokenUsage {
3467                    input_tokens: 50,
3468                    output_tokens: 20,
3469                    ..Default::default()
3470                },
3471                model: None,
3472            },
3473            // 2: Sub-agent responds (tokens we must NOT lose)
3474            CompletionResponse {
3475                content: vec![ContentBlock::Text {
3476                    text: "Rust is fast.".into(),
3477                }],
3478                stop_reason: StopReason::EndTurn,
3479                usage: TokenUsage {
3480                    input_tokens: 15,
3481                    output_tokens: 10,
3482                    ..Default::default()
3483                },
3484                model: None,
3485            },
3486            // 3: Orchestrator tries to delegate again (turn 2 = max_turns exceeded)
3487            CompletionResponse {
3488                content: vec![ContentBlock::ToolUse {
3489                    id: "call-2".into(),
3490                    name: "delegate_task".into(),
3491                    input: json!({
3492                        "tasks": [{"agent": "researcher", "task": "More research"}]
3493                    }),
3494                }],
3495                stop_reason: StopReason::ToolUse,
3496                usage: TokenUsage {
3497                    input_tokens: 80,
3498                    output_tokens: 25,
3499                    ..Default::default()
3500                },
3501                model: None,
3502            },
3503            // 4: Second sub-agent call
3504            CompletionResponse {
3505                content: vec![ContentBlock::Text {
3506                    text: "More info.".into(),
3507                }],
3508                stop_reason: StopReason::EndTurn,
3509                usage: TokenUsage {
3510                    input_tokens: 12,
3511                    output_tokens: 8,
3512                    ..Default::default()
3513                },
3514                model: None,
3515            },
3516        ]));
3517
3518        let mut orch = Orchestrator::builder(provider)
3519            .sub_agent("researcher", "Research", "prompt")
3520            .max_turns(2)
3521            .build()
3522            .unwrap();
3523
3524        let err = orch.run("research deeply").await.unwrap_err();
3525
3526        // Error should be exactly one layer of WithPartialUsage wrapping MaxTurnsExceeded
3527        match &err {
3528            Error::WithPartialUsage { source, .. } => {
3529                assert!(
3530                    matches!(**source, Error::MaxTurnsExceeded(2)),
3531                    "inner error should be MaxTurnsExceeded(2), got: {source}"
3532                );
3533            }
3534            other => panic!("expected WithPartialUsage, got: {other}"),
3535        }
3536
3537        let usage = err.partial_usage();
3538        // Orchestrator: 50+80 in, 20+25 out. Sub-agents: 15+12 in, 10+8 out.
3539        assert_eq!(
3540            usage.input_tokens,
3541            50 + 80 + 15 + 12,
3542            "input tokens: orchestrator(50+80) + sub-agent(15+12)"
3543        );
3544        assert_eq!(
3545            usage.output_tokens,
3546            20 + 25 + 10 + 8,
3547            "output tokens: orchestrator(20+25) + sub-agent(10+8)"
3548        );
3549    }
3550
3551    #[tokio::test]
3552    async fn on_event_emits_sub_agent_events() {
3553        use crate::agent::events::AgentEvent;
3554
3555        let events: Arc<std::sync::Mutex<Vec<AgentEvent>>> =
3556            Arc::new(std::sync::Mutex::new(vec![]));
3557        let events_clone = events.clone();
3558
3559        let provider = Arc::new(MockProvider::new(vec![
3560            // 1: Orchestrator delegates to one agent
3561            CompletionResponse {
3562                content: vec![ContentBlock::ToolUse {
3563                    id: "call-1".into(),
3564                    name: "delegate_task".into(),
3565                    input: json!({
3566                        "tasks": [{"agent": "researcher", "task": "Research Rust"}]
3567                    }),
3568                }],
3569                stop_reason: StopReason::ToolUse,
3570                usage: TokenUsage::default(),
3571                model: None,
3572            },
3573            // 2: Sub-agent responds
3574            CompletionResponse {
3575                content: vec![ContentBlock::Text {
3576                    text: "Rust is fast.".into(),
3577                }],
3578                stop_reason: StopReason::EndTurn,
3579                usage: TokenUsage {
3580                    input_tokens: 10,
3581                    output_tokens: 5,
3582                    ..Default::default()
3583                },
3584                model: None,
3585            },
3586            // 3: Orchestrator synthesizes
3587            CompletionResponse {
3588                content: vec![ContentBlock::Text {
3589                    text: "Summary: Rust is fast.".into(),
3590                }],
3591                stop_reason: StopReason::EndTurn,
3592                usage: TokenUsage::default(),
3593                model: None,
3594            },
3595        ]));
3596
3597        let mut orch = Orchestrator::builder(provider)
3598            .sub_agent("researcher", "Research", "prompt")
3599            .on_event(Arc::new(move |e| {
3600                events_clone.lock().unwrap().push(e);
3601            }))
3602            .build()
3603            .unwrap();
3604
3605        orch.run("research task").await.unwrap();
3606
3607        let events = events.lock().unwrap();
3608
3609        let dispatched: Vec<_> = events
3610            .iter()
3611            .filter(|e| matches!(e, AgentEvent::SubAgentsDispatched { .. }))
3612            .collect();
3613        assert_eq!(dispatched.len(), 1, "expected 1 SubAgentsDispatched");
3614        match &dispatched[0] {
3615            AgentEvent::SubAgentsDispatched { agent, agents } => {
3616                assert_eq!(agent, "orchestrator");
3617                assert_eq!(agents, &["researcher"]);
3618            }
3619            _ => unreachable!(),
3620        }
3621
3622        let completed: Vec<_> = events
3623            .iter()
3624            .filter(|e| matches!(e, AgentEvent::SubAgentCompleted { .. }))
3625            .collect();
3626        assert_eq!(completed.len(), 1, "expected 1 SubAgentCompleted");
3627        match &completed[0] {
3628            AgentEvent::SubAgentCompleted {
3629                agent,
3630                success,
3631                usage,
3632            } => {
3633                assert_eq!(agent, "researcher");
3634                assert!(success);
3635                assert_eq!(usage.input_tokens, 10);
3636            }
3637            _ => unreachable!(),
3638        }
3639    }
3640
3641    #[tokio::test]
3642    async fn sub_agent_receives_guardrails() {
3643        use crate::agent::guardrail::Guardrail;
3644        use crate::llm::types::CompletionRequest;
3645
3646        // A guardrail that injects a marker into system prompts
3647        struct MarkerGuardrail;
3648        impl Guardrail for MarkerGuardrail {
3649            fn pre_llm(
3650                &self,
3651                request: &mut CompletionRequest,
3652            ) -> std::pin::Pin<
3653                Box<dyn std::future::Future<Output = Result<(), crate::error::Error>> + Send + '_>,
3654            > {
3655                request.system = format!("{} [GUARDRAIL_ACTIVE]", request.system);
3656                Box::pin(async { Ok(()) })
3657            }
3658        }
3659
3660        struct CapturingProvider {
3661            responses: Mutex<Vec<CompletionResponse>>,
3662            systems_seen: Mutex<Vec<String>>,
3663        }
3664
3665        impl LlmProvider for CapturingProvider {
3666            async fn complete(
3667                &self,
3668                request: CompletionRequest,
3669            ) -> Result<CompletionResponse, crate::error::Error> {
3670                self.systems_seen
3671                    .lock()
3672                    .unwrap()
3673                    .push(request.system.clone());
3674                let mut responses = self.responses.lock().unwrap();
3675                if responses.is_empty() {
3676                    return Err(crate::error::Error::Agent("no more responses".into()));
3677                }
3678                Ok(responses.remove(0))
3679            }
3680        }
3681
3682        let guardrail: Arc<dyn Guardrail> = Arc::new(MarkerGuardrail);
3683
3684        let provider = Arc::new(CapturingProvider {
3685            responses: Mutex::new(vec![
3686                // 1: Orchestrator delegates
3687                CompletionResponse {
3688                    content: vec![ContentBlock::ToolUse {
3689                        id: "call-1".into(),
3690                        name: "delegate_task".into(),
3691                        input: json!({
3692                            "tasks": [{"agent": "worker", "task": "do work"}]
3693                        }),
3694                    }],
3695                    stop_reason: StopReason::ToolUse,
3696                    usage: TokenUsage::default(),
3697                    model: None,
3698                },
3699                // 2: Sub-agent responds
3700                CompletionResponse {
3701                    content: vec![ContentBlock::Text {
3702                        text: "Work done.".into(),
3703                    }],
3704                    stop_reason: StopReason::EndTurn,
3705                    usage: TokenUsage::default(),
3706                    model: None,
3707                },
3708                // 3: Orchestrator synthesis
3709                CompletionResponse {
3710                    content: vec![ContentBlock::Text {
3711                        text: "All done.".into(),
3712                    }],
3713                    stop_reason: StopReason::EndTurn,
3714                    usage: TokenUsage::default(),
3715                    model: None,
3716                },
3717            ]),
3718            systems_seen: Mutex::new(vec![]),
3719        });
3720
3721        let mut orch = Orchestrator::builder(provider.clone())
3722            .sub_agent_full(SubAgentConfig {
3723                name: "worker".into(),
3724                description: "Worker agent".into(),
3725                system_prompt: "You work.".into(),
3726                tools: vec![],
3727                context_strategy: None,
3728                summarize_threshold: None,
3729                tool_timeout: None,
3730                max_tool_output_bytes: None,
3731                max_turns: None,
3732                max_tokens: None,
3733                response_schema: None,
3734                run_timeout: None,
3735                guardrails: vec![guardrail],
3736                provider: None,
3737                reasoning_effort: None,
3738                enable_reflection: None,
3739                tool_output_compression_threshold: None,
3740                max_tools_per_turn: None,
3741                tool_profile: None,
3742                max_identical_tool_calls: None,
3743                max_fuzzy_identical_tool_calls: None,
3744                max_tool_calls_per_turn: None,
3745                session_prune_config: None,
3746                enable_recursive_summarization: None,
3747                reflection_threshold: None,
3748                consolidate_on_exit: None,
3749                workspace: None,
3750                max_total_tokens: None,
3751                audit_trail: None,
3752                audit_user_id: None,
3753                audit_tenant_id: None,
3754                audit_delegation_chain: Vec::new(),
3755            })
3756            .build()
3757            .unwrap();
3758
3759        orch.run("do work").await.unwrap();
3760
3761        // The sub-agent's system prompt (second LLM call) should contain the guardrail marker
3762        let systems = provider.systems_seen.lock().unwrap();
3763        assert!(
3764            systems.len() >= 2,
3765            "expected at least 2 LLM calls, got {}",
3766            systems.len()
3767        );
3768        // systems[1] is the sub-agent call
3769        assert!(
3770            systems[1].contains("[GUARDRAIL_ACTIVE]"),
3771            "sub-agent system prompt should contain guardrail marker: {}",
3772            systems[1]
3773        );
3774        // systems[0] is the orchestrator call (no guardrail on orchestrator)
3775        assert!(
3776            !systems[0].contains("[GUARDRAIL_ACTIVE]"),
3777            "orchestrator system prompt should NOT contain guardrail marker: {}",
3778            systems[0]
3779        );
3780    }
3781
3782    /// SECURITY (F-AGENT-2): orchestrator-level guardrails must propagate to
3783    /// sub-agents launched via the delegate_task path (not just via SpawnAgentTool).
3784    /// Before the fix, an opérator who hardened the orchestrator with PII /
3785    /// secret-scanner / LLM-judge guardrails saw their defenses silently drop the
3786    /// moment work was delegated to a sub-agent that had no guardrails of its own.
3787    #[tokio::test]
3788    async fn orchestrator_guardrails_propagate_to_delegated_sub_agents() {
3789        use crate::agent::guardrail::Guardrail;
3790        use crate::llm::types::CompletionRequest;
3791
3792        struct MarkerGuardrail;
3793        impl Guardrail for MarkerGuardrail {
3794            fn pre_llm(
3795                &self,
3796                request: &mut CompletionRequest,
3797            ) -> std::pin::Pin<
3798                Box<dyn std::future::Future<Output = Result<(), crate::error::Error>> + Send + '_>,
3799            > {
3800                request.system = format!("{} [ORCH_GUARD_ACTIVE]", request.system);
3801                Box::pin(async { Ok(()) })
3802            }
3803        }
3804
3805        struct CapturingProvider {
3806            responses: Mutex<Vec<CompletionResponse>>,
3807            systems_seen: Mutex<Vec<String>>,
3808        }
3809
3810        impl LlmProvider for CapturingProvider {
3811            async fn complete(
3812                &self,
3813                request: CompletionRequest,
3814            ) -> Result<CompletionResponse, crate::error::Error> {
3815                self.systems_seen
3816                    .lock()
3817                    .unwrap()
3818                    .push(request.system.clone());
3819                let mut responses = self.responses.lock().unwrap();
3820                if responses.is_empty() {
3821                    return Err(crate::error::Error::Agent("no more responses".into()));
3822                }
3823                Ok(responses.remove(0))
3824            }
3825        }
3826
3827        let guardrail: Arc<dyn Guardrail> = Arc::new(MarkerGuardrail);
3828
3829        let provider = Arc::new(CapturingProvider {
3830            responses: Mutex::new(vec![
3831                // 1: Orchestrator delegates
3832                CompletionResponse {
3833                    content: vec![ContentBlock::ToolUse {
3834                        id: "call-1".into(),
3835                        name: "delegate_task".into(),
3836                        input: json!({
3837                            "tasks": [{"agent": "worker", "task": "do work"}]
3838                        }),
3839                    }],
3840                    stop_reason: StopReason::ToolUse,
3841                    usage: TokenUsage::default(),
3842                    model: None,
3843                },
3844                // 2: Sub-agent responds (NO guardrail of its own)
3845                CompletionResponse {
3846                    content: vec![ContentBlock::Text {
3847                        text: "Work done.".into(),
3848                    }],
3849                    stop_reason: StopReason::EndTurn,
3850                    usage: TokenUsage::default(),
3851                    model: None,
3852                },
3853                // 3: Orchestrator synthesis
3854                CompletionResponse {
3855                    content: vec![ContentBlock::Text {
3856                        text: "Synthesized.".into(),
3857                    }],
3858                    stop_reason: StopReason::EndTurn,
3859                    usage: TokenUsage::default(),
3860                    model: None,
3861                },
3862            ]),
3863            systems_seen: Mutex::new(vec![]),
3864        });
3865
3866        // Sub-agent has NO guardrails of its own — the only protection comes
3867        // from the orchestrator-level guardrail being propagated.
3868        let mut orch = Orchestrator::builder(provider.clone())
3869            .guardrail(guardrail)
3870            .sub_agent_full(SubAgentConfig {
3871                name: "worker".into(),
3872                description: "Worker agent".into(),
3873                system_prompt: "You work.".into(),
3874                tools: vec![],
3875                context_strategy: None,
3876                summarize_threshold: None,
3877                tool_timeout: None,
3878                max_tool_output_bytes: None,
3879                max_turns: None,
3880                max_tokens: None,
3881                response_schema: None,
3882                run_timeout: None,
3883                guardrails: vec![],
3884                provider: None,
3885                reasoning_effort: None,
3886                enable_reflection: None,
3887                tool_output_compression_threshold: None,
3888                max_tools_per_turn: None,
3889                tool_profile: None,
3890                max_identical_tool_calls: None,
3891                max_fuzzy_identical_tool_calls: None,
3892                max_tool_calls_per_turn: None,
3893                session_prune_config: None,
3894                enable_recursive_summarization: None,
3895                reflection_threshold: None,
3896                consolidate_on_exit: None,
3897                workspace: None,
3898                max_total_tokens: None,
3899                audit_trail: None,
3900                audit_user_id: None,
3901                audit_tenant_id: None,
3902                audit_delegation_chain: Vec::new(),
3903            })
3904            .build()
3905            .unwrap();
3906
3907        orch.run("do work").await.unwrap();
3908
3909        let systems = provider.systems_seen.lock().unwrap();
3910        assert!(systems.len() >= 2, "expected at least 2 LLM calls");
3911        // systems[1] is the sub-agent call. The marker MUST be present —
3912        // proves the orchestrator's guardrail propagated to the delegated agent.
3913        assert!(
3914            systems[1].contains("[ORCH_GUARD_ACTIVE]"),
3915            "sub-agent system prompt should contain orchestrator guardrail marker; got: {}",
3916            systems[1]
3917        );
3918    }
3919
3920    #[test]
3921    fn build_rejects_sub_agent_with_zero_max_turns() {
3922        let provider = Arc::new(MockProvider::new(vec![]));
3923        let result = Orchestrator::builder(provider)
3924            .sub_agent_full(SubAgentConfig {
3925                name: "agent1".into(),
3926                description: "Test agent".into(),
3927                system_prompt: "prompt".into(),
3928                tools: vec![],
3929                context_strategy: None,
3930                summarize_threshold: None,
3931                tool_timeout: None,
3932                max_tool_output_bytes: None,
3933                max_turns: Some(0),
3934                max_tokens: None,
3935                response_schema: None,
3936                run_timeout: None,
3937                guardrails: vec![],
3938                provider: None,
3939                reasoning_effort: None,
3940                enable_reflection: None,
3941                tool_output_compression_threshold: None,
3942                max_tools_per_turn: None,
3943                tool_profile: None,
3944                max_identical_tool_calls: None,
3945                max_fuzzy_identical_tool_calls: None,
3946                max_tool_calls_per_turn: None,
3947                session_prune_config: None,
3948                enable_recursive_summarization: None,
3949                reflection_threshold: None,
3950                consolidate_on_exit: None,
3951                workspace: None,
3952                max_total_tokens: None,
3953                audit_trail: None,
3954                audit_user_id: None,
3955                audit_tenant_id: None,
3956                audit_delegation_chain: Vec::new(),
3957            })
3958            .build();
3959
3960        match result {
3961            Err(e) => assert!(
3962                e.to_string().contains("max_turns must be > 0"),
3963                "expected max_turns error, got: {e}"
3964            ),
3965            Ok(_) => panic!("expected build to fail with zero max_turns"),
3966        }
3967    }
3968
3969    #[test]
3970    fn build_rejects_sub_agent_with_zero_max_tokens() {
3971        let provider = Arc::new(MockProvider::new(vec![]));
3972        let result = Orchestrator::builder(provider)
3973            .sub_agent_full(SubAgentConfig {
3974                name: "agent1".into(),
3975                description: "Test agent".into(),
3976                system_prompt: "prompt".into(),
3977                tools: vec![],
3978                context_strategy: None,
3979                summarize_threshold: None,
3980                tool_timeout: None,
3981                max_tool_output_bytes: None,
3982                max_turns: None,
3983                max_tokens: Some(0),
3984                response_schema: None,
3985                run_timeout: None,
3986                guardrails: vec![],
3987                provider: None,
3988                reasoning_effort: None,
3989                enable_reflection: None,
3990                tool_output_compression_threshold: None,
3991                max_tools_per_turn: None,
3992                tool_profile: None,
3993                max_identical_tool_calls: None,
3994                max_fuzzy_identical_tool_calls: None,
3995                max_tool_calls_per_turn: None,
3996                session_prune_config: None,
3997                enable_recursive_summarization: None,
3998                reflection_threshold: None,
3999                consolidate_on_exit: None,
4000                workspace: None,
4001                max_total_tokens: None,
4002                audit_trail: None,
4003                audit_user_id: None,
4004                audit_tenant_id: None,
4005                audit_delegation_chain: Vec::new(),
4006            })
4007            .build();
4008
4009        match result {
4010            Err(e) => assert!(
4011                e.to_string().contains("max_tokens must be > 0"),
4012                "expected max_tokens error, got: {e}"
4013            ),
4014            Ok(_) => panic!("expected build to fail with zero max_tokens"),
4015        }
4016    }
4017
4018    #[tokio::test]
4019    async fn sub_agent_uses_override_provider() {
4020        use crate::llm::types::CompletionRequest;
4021
4022        // Provider that returns a model identifier in the response text
4023        struct IdentifiedProvider {
4024            id: String,
4025            responses: Mutex<Vec<CompletionResponse>>,
4026        }
4027
4028        impl LlmProvider for IdentifiedProvider {
4029            async fn complete(
4030                &self,
4031                _request: CompletionRequest,
4032            ) -> Result<CompletionResponse, Error> {
4033                let mut responses = self.responses.lock().expect("lock");
4034                if responses.is_empty() {
4035                    return Err(Error::Agent(format!("no more responses for {}", self.id)));
4036                }
4037                Ok(responses.remove(0))
4038            }
4039        }
4040
4041        // Orchestrator uses "opus" provider, sub-agent overrides with "haiku" provider
4042        let opus_provider = Arc::new(IdentifiedProvider {
4043            id: "opus".into(),
4044            responses: Mutex::new(vec![
4045                // 1: Orchestrator delegates
4046                CompletionResponse {
4047                    content: vec![ContentBlock::ToolUse {
4048                        id: "call-1".into(),
4049                        name: "delegate_task".into(),
4050                        input: json!({
4051                            "tasks": [{"agent": "cheap", "task": "do cheap work"}]
4052                        }),
4053                    }],
4054                    stop_reason: StopReason::ToolUse,
4055                    usage: TokenUsage::default(),
4056                    model: None,
4057                },
4058                // 3: Orchestrator synthesis
4059                CompletionResponse {
4060                    content: vec![ContentBlock::Text {
4061                        text: "Done.".into(),
4062                    }],
4063                    stop_reason: StopReason::EndTurn,
4064                    usage: TokenUsage::default(),
4065                    model: None,
4066                },
4067            ]),
4068        });
4069
4070        let haiku_provider: Arc<BoxedProvider> = Arc::new(BoxedProvider::new(IdentifiedProvider {
4071            id: "haiku".into(),
4072            responses: Mutex::new(vec![
4073                // 2: Sub-agent responds via haiku
4074                CompletionResponse {
4075                    content: vec![ContentBlock::Text {
4076                        text: "Cheap work done.".into(),
4077                    }],
4078                    stop_reason: StopReason::EndTurn,
4079                    usage: TokenUsage {
4080                        input_tokens: 5,
4081                        output_tokens: 3,
4082                        ..Default::default()
4083                    },
4084                    model: None,
4085                },
4086            ]),
4087        }));
4088
4089        let mut orch = Orchestrator::builder(opus_provider)
4090            .sub_agent_full(SubAgentConfig {
4091                name: "cheap".into(),
4092                description: "Cheap agent".into(),
4093                system_prompt: "You do cheap work.".into(),
4094                tools: vec![],
4095                context_strategy: None,
4096                summarize_threshold: None,
4097                tool_timeout: None,
4098                max_tool_output_bytes: None,
4099                max_turns: None,
4100                max_tokens: None,
4101                response_schema: None,
4102                run_timeout: None,
4103                guardrails: vec![],
4104                provider: Some(haiku_provider),
4105                reasoning_effort: None,
4106                enable_reflection: None,
4107                tool_output_compression_threshold: None,
4108                max_tools_per_turn: None,
4109                tool_profile: None,
4110                max_identical_tool_calls: None,
4111                max_fuzzy_identical_tool_calls: None,
4112                max_tool_calls_per_turn: None,
4113                session_prune_config: None,
4114                enable_recursive_summarization: None,
4115                reflection_threshold: None,
4116                consolidate_on_exit: None,
4117                workspace: None,
4118                max_total_tokens: None,
4119                audit_trail: None,
4120                audit_user_id: None,
4121                audit_tenant_id: None,
4122                audit_delegation_chain: Vec::new(),
4123            })
4124            .build()
4125            .unwrap();
4126
4127        let output = orch.run("do work cheaply").await.unwrap();
4128        assert_eq!(output.result, "Done.");
4129        // Sub-agent tokens should be accumulated from the haiku provider
4130        assert_eq!(output.tokens_used.input_tokens, 5);
4131    }
4132
4133    #[tokio::test]
4134    async fn sub_agent_inherits_default_provider() {
4135        // When no override is set, sub-agent uses the orchestrator's provider
4136        let provider = Arc::new(MockProvider::new(vec![
4137            // 1: Orchestrator delegates
4138            CompletionResponse {
4139                content: vec![ContentBlock::ToolUse {
4140                    id: "call-1".into(),
4141                    name: "delegate_task".into(),
4142                    input: json!({
4143                        "tasks": [{"agent": "worker", "task": "do work"}]
4144                    }),
4145                }],
4146                stop_reason: StopReason::ToolUse,
4147                usage: TokenUsage::default(),
4148                model: None,
4149            },
4150            // 2: Sub-agent responds (from shared provider)
4151            CompletionResponse {
4152                content: vec![ContentBlock::Text {
4153                    text: "Work done.".into(),
4154                }],
4155                stop_reason: StopReason::EndTurn,
4156                usage: TokenUsage::default(),
4157                model: None,
4158            },
4159            // 3: Orchestrator synthesis
4160            CompletionResponse {
4161                content: vec![ContentBlock::Text {
4162                    text: "All done.".into(),
4163                }],
4164                stop_reason: StopReason::EndTurn,
4165                usage: TokenUsage::default(),
4166                model: None,
4167            },
4168        ]));
4169
4170        let mut orch = Orchestrator::builder(provider)
4171            .sub_agent_full(SubAgentConfig {
4172                name: "worker".into(),
4173                description: "Worker".into(),
4174                system_prompt: "Work.".into(),
4175                tools: vec![],
4176                context_strategy: None,
4177                summarize_threshold: None,
4178                tool_timeout: None,
4179                max_tool_output_bytes: None,
4180                max_turns: None,
4181                max_tokens: None,
4182                response_schema: None,
4183                run_timeout: None,
4184                guardrails: vec![],
4185                provider: None,
4186                reasoning_effort: None,
4187                enable_reflection: None,
4188                tool_output_compression_threshold: None,
4189                max_tools_per_turn: None,
4190                tool_profile: None,
4191                max_identical_tool_calls: None,
4192                max_fuzzy_identical_tool_calls: None,
4193                max_tool_calls_per_turn: None,
4194                session_prune_config: None,
4195                enable_recursive_summarization: None,
4196                reflection_threshold: None,
4197                consolidate_on_exit: None,
4198                workspace: None,
4199                max_total_tokens: None,
4200                audit_trail: None,
4201                audit_user_id: None,
4202                audit_tenant_id: None,
4203                audit_delegation_chain: Vec::new(),
4204            })
4205            .build()
4206            .unwrap();
4207
4208        let output = orch.run("do work").await.unwrap();
4209        assert_eq!(output.result, "All done.");
4210    }
4211
4212    // --- FormSquadTool tests ---
4213
4214    #[test]
4215    fn form_squad_tool_definition_schema() {
4216        let tools = vec!["web_search".to_string()];
4217        let agents: Vec<(&str, &str, &[String])> = vec![
4218            ("researcher", "Research specialist", tools.as_slice()),
4219            ("analyst", "Analysis expert", &[]),
4220        ];
4221        let def = build_form_squad_tool_schema(&agents);
4222        assert_eq!(def.name, "form_squad");
4223        assert!(
4224            def.description.contains("researcher"),
4225            "description should list agents: {}",
4226            def.description
4227        );
4228        assert!(
4229            def.description.contains("analyst"),
4230            "description should list agents: {}",
4231            def.description
4232        );
4233        assert!(
4234            def.description.contains("blackboard"),
4235            "description should mention shared blackboard: {}",
4236            def.description
4237        );
4238        assert!(
4239            def.description.contains("Unlike delegate_task"),
4240            "description should contrast with delegate_task: {}",
4241            def.description
4242        );
4243        // Check input schema uses tasks array (same format as delegate_task)
4244        assert_eq!(
4245            def.input_schema["properties"]["tasks"]["type"], "array",
4246            "schema should have tasks array"
4247        );
4248        assert_eq!(
4249            def.input_schema["properties"]["tasks"]["items"]["properties"]["agent"]["type"],
4250            "string",
4251            "tasks items should have agent field"
4252        );
4253        assert_eq!(
4254            def.input_schema["properties"]["tasks"]["items"]["properties"]["task"]["type"],
4255            "string",
4256            "tasks items should have task field"
4257        );
4258        let required = def.input_schema["required"]
4259            .as_array()
4260            .expect("required should be array");
4261        assert!(
4262            required.contains(&json!("tasks")),
4263            "tasks should be required"
4264        );
4265    }
4266
4267    #[tokio::test]
4268    async fn form_squad_dispatches_directly() {
4269        // 3 agents: researcher, analyst, coder. Squad of researcher + analyst.
4270        // No squad-leader LLM calls — agents are dispatched directly.
4271        let provider = Arc::new(MockProvider::new(vec![
4272            // 1: Outer orchestrator calls form_squad with per-agent tasks
4273            CompletionResponse {
4274                content: vec![ContentBlock::ToolUse {
4275                    id: "call-1".into(),
4276                    name: "form_squad".into(),
4277                    input: json!({
4278                        "tasks": [
4279                            {"agent": "researcher", "task": "Research Rust"},
4280                            {"agent": "analyst", "task": "Analyze findings"}
4281                        ]
4282                    }),
4283                }],
4284                stop_reason: StopReason::ToolUse,
4285                usage: TokenUsage {
4286                    input_tokens: 50,
4287                    output_tokens: 20,
4288                    ..Default::default()
4289                },
4290                model: None,
4291            },
4292            // 2: Squad member "researcher" responds
4293            CompletionResponse {
4294                content: vec![ContentBlock::Text {
4295                    text: "Rust is fast and safe.".into(),
4296                }],
4297                stop_reason: StopReason::EndTurn,
4298                usage: TokenUsage {
4299                    input_tokens: 10,
4300                    output_tokens: 8,
4301                    ..Default::default()
4302                },
4303                model: None,
4304            },
4305            // 3: Squad member "analyst" responds
4306            CompletionResponse {
4307                content: vec![ContentBlock::Text {
4308                    text: "Strengths: memory safety.".into(),
4309                }],
4310                stop_reason: StopReason::EndTurn,
4311                usage: TokenUsage {
4312                    input_tokens: 12,
4313                    output_tokens: 10,
4314                    ..Default::default()
4315                },
4316                model: None,
4317            },
4318            // 4: Outer orchestrator synthesizes
4319            CompletionResponse {
4320                content: vec![ContentBlock::Text {
4321                    text: "Final: Rust is excellent.".into(),
4322                }],
4323                stop_reason: StopReason::EndTurn,
4324                usage: TokenUsage {
4325                    input_tokens: 60,
4326                    output_tokens: 25,
4327                    ..Default::default()
4328                },
4329                model: None,
4330            },
4331        ]));
4332
4333        let mut orch = Orchestrator::builder(provider)
4334            .sub_agent("researcher", "Research specialist", "You research.")
4335            .sub_agent("analyst", "Analysis expert", "You analyze.")
4336            .sub_agent("coder", "Coding expert", "You code.")
4337            .build()
4338            .unwrap();
4339
4340        let output = orch.run("Analyze Rust deeply").await.unwrap();
4341        assert_eq!(output.result, "Final: Rust is excellent.");
4342    }
4343
4344    #[tokio::test]
4345    async fn form_squad_tokens_roll_up() {
4346        let provider = Arc::new(MockProvider::new(vec![
4347            // 1: Outer orchestrator calls form_squad
4348            CompletionResponse {
4349                content: vec![ContentBlock::ToolUse {
4350                    id: "call-1".into(),
4351                    name: "form_squad".into(),
4352                    input: json!({
4353                        "tasks": [
4354                            {"agent": "agent_a", "task": "Task A"},
4355                            {"agent": "agent_b", "task": "Task B"}
4356                        ]
4357                    }),
4358                }],
4359                stop_reason: StopReason::ToolUse,
4360                usage: TokenUsage {
4361                    input_tokens: 50,
4362                    output_tokens: 20,
4363                    ..Default::default()
4364                },
4365                model: None,
4366            },
4367            // 2: Squad member agent_a responds
4368            CompletionResponse {
4369                content: vec![ContentBlock::Text {
4370                    text: "Done A.".into(),
4371                }],
4372                stop_reason: StopReason::EndTurn,
4373                usage: TokenUsage {
4374                    input_tokens: 10,
4375                    output_tokens: 5,
4376                    ..Default::default()
4377                },
4378                model: None,
4379            },
4380            // 3: Squad member agent_b responds
4381            CompletionResponse {
4382                content: vec![ContentBlock::Text {
4383                    text: "Done B.".into(),
4384                }],
4385                stop_reason: StopReason::EndTurn,
4386                usage: TokenUsage {
4387                    input_tokens: 12,
4388                    output_tokens: 6,
4389                    ..Default::default()
4390                },
4391                model: None,
4392            },
4393            // 4: Outer orchestrator synthesizes
4394            CompletionResponse {
4395                content: vec![ContentBlock::Text {
4396                    text: "All done.".into(),
4397                }],
4398                stop_reason: StopReason::EndTurn,
4399                usage: TokenUsage {
4400                    input_tokens: 60,
4401                    output_tokens: 25,
4402                    ..Default::default()
4403                },
4404                model: None,
4405            },
4406        ]));
4407
4408        let mut orch = Orchestrator::builder(provider)
4409            .sub_agent("agent_a", "Agent A", "You are A.")
4410            .sub_agent("agent_b", "Agent B", "You are B.")
4411            .build()
4412            .unwrap();
4413
4414        let output = orch.run("Collaborate").await.unwrap();
4415        // Outer orchestrator: 50+60 in, 20+25 out
4416        // Squad members: agent_a 10 in + 5 out, agent_b 12 in + 6 out
4417        // No squad-leader overhead
4418        assert_eq!(
4419            output.tokens_used.input_tokens,
4420            50 + 60 + 10 + 12,
4421            "all token levels should roll up"
4422        );
4423        assert_eq!(
4424            output.tokens_used.output_tokens,
4425            20 + 25 + 5 + 6,
4426            "all token levels should roll up"
4427        );
4428    }
4429
4430    #[tokio::test]
4431    async fn form_squad_returns_error_for_unknown_agent() {
4432        let provider = Arc::new(MockProvider::new(vec![
4433            // 1: Outer orchestrator calls form_squad with unknown agent
4434            CompletionResponse {
4435                content: vec![ContentBlock::ToolUse {
4436                    id: "call-1".into(),
4437                    name: "form_squad".into(),
4438                    input: json!({
4439                        "tasks": [
4440                            {"agent": "researcher", "task": "Do research"},
4441                            {"agent": "nonexistent", "task": "Do stuff"}
4442                        ]
4443                    }),
4444                }],
4445                stop_reason: StopReason::ToolUse,
4446                usage: TokenUsage::default(),
4447                model: None,
4448            },
4449            // 2: Orchestrator recovers
4450            CompletionResponse {
4451                content: vec![ContentBlock::Text {
4452                    text: "No such agent available.".into(),
4453                }],
4454                stop_reason: StopReason::EndTurn,
4455                usage: TokenUsage::default(),
4456                model: None,
4457            },
4458        ]));
4459
4460        let mut orch = Orchestrator::builder(provider)
4461            .sub_agent("researcher", "Research", "prompt")
4462            .sub_agent("analyst", "Analysis", "prompt")
4463            .build()
4464            .unwrap();
4465
4466        let output = orch.run("delegate to unknown squad").await.unwrap();
4467        assert_eq!(output.result, "No such agent available.");
4468    }
4469
4470    #[tokio::test]
4471    async fn form_squad_requires_at_least_two_agents() {
4472        let provider = Arc::new(MockProvider::new(vec![
4473            // 1: Outer orchestrator tries to form squad with 1 task
4474            CompletionResponse {
4475                content: vec![ContentBlock::ToolUse {
4476                    id: "call-1".into(),
4477                    name: "form_squad".into(),
4478                    input: json!({
4479                        "tasks": [
4480                            {"agent": "researcher", "task": "Solo task"}
4481                        ]
4482                    }),
4483                }],
4484                stop_reason: StopReason::ToolUse,
4485                usage: TokenUsage::default(),
4486                model: None,
4487            },
4488            // 2: Orchestrator recovers
4489            CompletionResponse {
4490                content: vec![ContentBlock::Text {
4491                    text: "Using delegate_task instead.".into(),
4492                }],
4493                stop_reason: StopReason::EndTurn,
4494                usage: TokenUsage::default(),
4495                model: None,
4496            },
4497        ]));
4498
4499        let mut orch = Orchestrator::builder(provider)
4500            .sub_agent("researcher", "Research", "prompt")
4501            .sub_agent("analyst", "Analysis", "prompt")
4502            .build()
4503            .unwrap();
4504
4505        let output = orch.run("form solo squad").await.unwrap();
4506        assert_eq!(output.result, "Using delegate_task instead.");
4507    }
4508
4509    #[tokio::test]
4510    async fn form_squad_rejects_duplicate_agents() {
4511        let provider = Arc::new(MockProvider::new(vec![
4512            // 1: Outer orchestrator sends duplicate agent names
4513            CompletionResponse {
4514                content: vec![ContentBlock::ToolUse {
4515                    id: "call-1".into(),
4516                    name: "form_squad".into(),
4517                    input: json!({
4518                        "tasks": [
4519                            {"agent": "researcher", "task": "Task 1"},
4520                            {"agent": "researcher", "task": "Task 2"}
4521                        ]
4522                    }),
4523                }],
4524                stop_reason: StopReason::ToolUse,
4525                usage: TokenUsage::default(),
4526                model: None,
4527            },
4528            // 2: Orchestrator recovers
4529            CompletionResponse {
4530                content: vec![ContentBlock::Text {
4531                    text: "Fixed duplicate issue.".into(),
4532                }],
4533                stop_reason: StopReason::EndTurn,
4534                usage: TokenUsage::default(),
4535                model: None,
4536            },
4537        ]));
4538
4539        let mut orch = Orchestrator::builder(provider)
4540            .sub_agent("researcher", "Research", "prompt")
4541            .sub_agent("analyst", "Analysis", "prompt")
4542            .build()
4543            .unwrap();
4544
4545        let output = orch.run("form squad with dupes").await.unwrap();
4546        assert_eq!(output.result, "Fixed duplicate issue.");
4547    }
4548
4549    #[tokio::test]
4550    async fn form_squad_private_blackboard() {
4551        use crate::agent::blackboard::InMemoryBlackboard;
4552
4553        let outer_bb = Arc::new(InMemoryBlackboard::new());
4554
4555        let provider = Arc::new(MockProvider::new(vec![
4556            // 1: Outer orchestrator calls form_squad
4557            CompletionResponse {
4558                content: vec![ContentBlock::ToolUse {
4559                    id: "call-1".into(),
4560                    name: "form_squad".into(),
4561                    input: json!({
4562                        "tasks": [
4563                            {"agent": "writer_a", "task": "Write something"},
4564                            {"agent": "writer_b", "task": "Write something else"}
4565                        ]
4566                    }),
4567                }],
4568                stop_reason: StopReason::ToolUse,
4569                usage: TokenUsage::default(),
4570                model: None,
4571            },
4572            // 2: Squad member writer_a responds
4573            CompletionResponse {
4574                content: vec![ContentBlock::Text {
4575                    text: "Written to squad blackboard.".into(),
4576                }],
4577                stop_reason: StopReason::EndTurn,
4578                usage: TokenUsage::default(),
4579                model: None,
4580            },
4581            // 3: Squad member writer_b responds
4582            CompletionResponse {
4583                content: vec![ContentBlock::Text {
4584                    text: "Also written.".into(),
4585                }],
4586                stop_reason: StopReason::EndTurn,
4587                usage: TokenUsage::default(),
4588                model: None,
4589            },
4590            // 4: Outer orchestrator synthesizes
4591            CompletionResponse {
4592                content: vec![ContentBlock::Text {
4593                    text: "Done.".into(),
4594                }],
4595                stop_reason: StopReason::EndTurn,
4596                usage: TokenUsage::default(),
4597                model: None,
4598            },
4599        ]));
4600
4601        let mut orch = Orchestrator::builder(provider)
4602            .sub_agent("writer_a", "Writer A", "You write.")
4603            .sub_agent("writer_b", "Writer B", "You write.")
4604            .blackboard(outer_bb.clone())
4605            .build()
4606            .unwrap();
4607
4608        orch.run("write to blackboard").await.unwrap();
4609
4610        // Squad result IS written to the outer blackboard under the "squad:" key.
4611        let squad_key = "squad:writer_a+writer_b";
4612        let val = outer_bb.read(squad_key).await.unwrap();
4613        assert!(
4614            val.is_some(),
4615            "outer blackboard should have squad result under '{squad_key}'"
4616        );
4617
4618        // The squad member's agent:writer_a key should NOT be in the outer blackboard
4619        // (it was written to the private blackboard inside the squad)
4620        let agent_key = "agent:writer_a";
4621        let val = outer_bb.read(agent_key).await.unwrap();
4622        assert!(
4623            val.is_none(),
4624            "outer blackboard should NOT have '{agent_key}' — that's on the private blackboard"
4625        );
4626    }
4627
4628    #[tokio::test]
4629    async fn form_squad_error_returns_tool_error_not_hard_error() {
4630        // One squad member's provider always fails → FormSquadTool returns
4631        // ToolOutput::error → outer orchestrator recovers gracefully.
4632        //
4633        // We use provider_override so agent_b gets a dedicated failing provider,
4634        // avoiding non-deterministic response ordering from a shared MockProvider.
4635
4636        let failing_provider = Arc::new(BoxedProvider::new(MockProvider::new(vec![])));
4637
4638        let provider = Arc::new(MockProvider::new(vec![
4639            // 1: Outer orchestrator calls form_squad
4640            CompletionResponse {
4641                content: vec![ContentBlock::ToolUse {
4642                    id: "call-1".into(),
4643                    name: "form_squad".into(),
4644                    input: json!({
4645                        "tasks": [
4646                            {"agent": "agent_a", "task": "Do A"},
4647                            {"agent": "agent_b", "task": "Do B"}
4648                        ]
4649                    }),
4650                }],
4651                stop_reason: StopReason::ToolUse,
4652                usage: TokenUsage {
4653                    input_tokens: 50,
4654                    output_tokens: 20,
4655                    ..Default::default()
4656                },
4657                model: None,
4658            },
4659            // 2: Squad member agent_a responds (agent_b uses its own failing provider)
4660            CompletionResponse {
4661                content: vec![ContentBlock::Text {
4662                    text: "Done A.".into(),
4663                }],
4664                stop_reason: StopReason::EndTurn,
4665                usage: TokenUsage {
4666                    input_tokens: 10,
4667                    output_tokens: 5,
4668                    ..Default::default()
4669                },
4670                model: None,
4671            },
4672            // 3: Outer orchestrator recovers from the squad error
4673            CompletionResponse {
4674                content: vec![ContentBlock::Text {
4675                    text: "Squad failed, falling back.".into(),
4676                }],
4677                stop_reason: StopReason::EndTurn,
4678                usage: TokenUsage {
4679                    input_tokens: 60,
4680                    output_tokens: 25,
4681                    ..Default::default()
4682                },
4683                model: None,
4684            },
4685        ]));
4686
4687        let mut orch = Orchestrator::builder(provider)
4688            .sub_agent("agent_a", "Agent A", "You are A.")
4689            .sub_agent_full(SubAgentConfig {
4690                name: "agent_b".into(),
4691                description: "Agent B".into(),
4692                system_prompt: "You are B.".into(),
4693                tools: vec![],
4694                context_strategy: None,
4695                summarize_threshold: None,
4696                tool_timeout: None,
4697                max_tool_output_bytes: None,
4698                max_turns: None,
4699                max_tokens: None,
4700                response_schema: None,
4701                run_timeout: None,
4702                guardrails: vec![],
4703                provider: Some(failing_provider),
4704                reasoning_effort: None,
4705                enable_reflection: None,
4706                tool_output_compression_threshold: None,
4707                max_tools_per_turn: None,
4708                tool_profile: None,
4709                max_identical_tool_calls: None,
4710                max_fuzzy_identical_tool_calls: None,
4711                max_tool_calls_per_turn: None,
4712                session_prune_config: None,
4713                enable_recursive_summarization: None,
4714                reflection_threshold: None,
4715                consolidate_on_exit: None,
4716                workspace: None,
4717                max_total_tokens: None,
4718                audit_trail: None,
4719                audit_user_id: None,
4720                audit_tenant_id: None,
4721                audit_delegation_chain: Vec::new(),
4722            })
4723            .build()
4724            .unwrap();
4725
4726        let output = orch.run("complex task").await.unwrap();
4727        assert_eq!(output.result, "Squad failed, falling back.");
4728        // Partial tokens from the successful squad member should be accumulated
4729        assert!(
4730            output.tokens_used.input_tokens > 50 + 60,
4731            "should include partial squad tokens: {}",
4732            output.tokens_used.input_tokens
4733        );
4734    }
4735
4736    #[tokio::test]
4737    async fn orchestrator_registers_both_tools() {
4738        use crate::llm::types::CompletionRequest;
4739
4740        struct ToolCapturingProvider {
4741            responses: Mutex<Vec<CompletionResponse>>,
4742            tool_names_seen: Mutex<Vec<Vec<String>>>,
4743        }
4744
4745        impl LlmProvider for ToolCapturingProvider {
4746            async fn complete(
4747                &self,
4748                request: CompletionRequest,
4749            ) -> Result<CompletionResponse, Error> {
4750                let names: Vec<String> = request.tools.iter().map(|t| t.name.clone()).collect();
4751                self.tool_names_seen.lock().expect("lock").push(names);
4752                let mut responses = self.responses.lock().expect("lock");
4753                if responses.is_empty() {
4754                    return Err(Error::Agent("no more responses".into()));
4755                }
4756                Ok(responses.remove(0))
4757            }
4758        }
4759
4760        let provider = Arc::new(ToolCapturingProvider {
4761            responses: Mutex::new(vec![CompletionResponse {
4762                content: vec![ContentBlock::Text {
4763                    text: "Direct answer.".into(),
4764                }],
4765                stop_reason: StopReason::EndTurn,
4766                usage: TokenUsage::default(),
4767                model: None,
4768            }]),
4769            tool_names_seen: Mutex::new(vec![]),
4770        });
4771
4772        // >= 2 agents → squads auto-enabled
4773        let mut orch = Orchestrator::builder(provider.clone())
4774            .sub_agent("researcher", "Research", "prompt")
4775            .sub_agent("analyst", "Analysis", "prompt")
4776            .build()
4777            .unwrap();
4778
4779        orch.run("test").await.unwrap();
4780
4781        let tool_names = provider.tool_names_seen.lock().unwrap();
4782        assert!(
4783            tool_names[0].contains(&"delegate_task".to_string()),
4784            "should have delegate_task: {:?}",
4785            tool_names[0]
4786        );
4787        assert!(
4788            tool_names[0].contains(&"form_squad".to_string()),
4789            "should have form_squad: {:?}",
4790            tool_names[0]
4791        );
4792    }
4793
4794    #[test]
4795    fn orchestrator_single_agent_no_squads() {
4796        let provider = Arc::new(MockProvider::new(vec![]));
4797
4798        // Only 1 agent → squads auto-disabled
4799        let result = Orchestrator::builder(provider)
4800            .sub_agent("researcher", "Research", "prompt")
4801            .build();
4802
4803        assert!(result.is_ok());
4804    }
4805
4806    #[tokio::test]
4807    async fn orchestrator_squads_disabled_explicitly() {
4808        use crate::llm::types::CompletionRequest;
4809
4810        struct ToolCapturingProvider {
4811            responses: Mutex<Vec<CompletionResponse>>,
4812            tool_names_seen: Mutex<Vec<Vec<String>>>,
4813        }
4814
4815        impl LlmProvider for ToolCapturingProvider {
4816            async fn complete(
4817                &self,
4818                request: CompletionRequest,
4819            ) -> Result<CompletionResponse, Error> {
4820                let names: Vec<String> = request.tools.iter().map(|t| t.name.clone()).collect();
4821                self.tool_names_seen.lock().expect("lock").push(names);
4822                let mut responses = self.responses.lock().expect("lock");
4823                if responses.is_empty() {
4824                    return Err(Error::Agent("no more responses".into()));
4825                }
4826                Ok(responses.remove(0))
4827            }
4828        }
4829
4830        let provider = Arc::new(ToolCapturingProvider {
4831            responses: Mutex::new(vec![CompletionResponse {
4832                content: vec![ContentBlock::Text {
4833                    text: "Direct answer.".into(),
4834                }],
4835                stop_reason: StopReason::EndTurn,
4836                usage: TokenUsage::default(),
4837                model: None,
4838            }]),
4839            tool_names_seen: Mutex::new(vec![]),
4840        });
4841
4842        // 2 agents BUT squads explicitly disabled
4843        let mut orch = Orchestrator::builder(provider.clone())
4844            .sub_agent("researcher", "Research", "prompt")
4845            .sub_agent("analyst", "Analysis", "prompt")
4846            .enable_squads(false)
4847            .build()
4848            .unwrap();
4849
4850        orch.run("test").await.unwrap();
4851
4852        let tool_names = provider.tool_names_seen.lock().unwrap();
4853        assert!(
4854            tool_names[0].contains(&"delegate_task".to_string()),
4855            "should have delegate_task: {:?}",
4856            tool_names[0]
4857        );
4858        assert!(
4859            !tool_names[0].contains(&"form_squad".to_string()),
4860            "should NOT have form_squad when disabled: {:?}",
4861            tool_names[0]
4862        );
4863    }
4864
4865    #[test]
4866    fn system_prompt_mentions_both_tools_when_squads_enabled() {
4867        let tools = vec!["web_search".to_string()];
4868        let agents: Vec<(&str, &str, &[String])> = vec![
4869            ("researcher", "Research specialist", tools.as_slice()),
4870            ("analyst", "Analysis expert", &[]),
4871        ];
4872
4873        let prompt = build_system_prompt(&agents, true, DispatchMode::Parallel);
4874        assert!(
4875            prompt.contains("delegate_task"),
4876            "prompt should mention delegate_task: {prompt}"
4877        );
4878        assert!(
4879            prompt.contains("form_squad"),
4880            "prompt should mention form_squad: {prompt}"
4881        );
4882        assert!(
4883            prompt.contains("two delegation tools"),
4884            "prompt should explain both tools: {prompt}"
4885        );
4886        // Squads-enabled prompt should distinguish isolation vs collaboration
4887        assert!(
4888            prompt.contains("isolation"),
4889            "prompt should mention isolation for delegate_task: {prompt}"
4890        );
4891        assert!(
4892            prompt.contains("blackboard"),
4893            "prompt should mention shared blackboard for form_squad: {prompt}"
4894        );
4895    }
4896
4897    #[test]
4898    fn system_prompt_only_delegate_when_squads_disabled() {
4899        let agents: Vec<(&str, &str, &[String])> = vec![("researcher", "Research specialist", &[])];
4900
4901        let prompt = build_system_prompt(&agents, false, DispatchMode::Parallel);
4902        assert!(
4903            prompt.contains("delegate_task"),
4904            "prompt should mention delegate_task: {prompt}"
4905        );
4906        assert!(
4907            !prompt.contains("form_squad"),
4908            "prompt should NOT mention form_squad: {prompt}"
4909        );
4910        // Still has decision framework
4911        assert!(
4912            prompt.contains("Decision Process"),
4913            "prompt should contain Decision Process even without squads: {prompt}"
4914        );
4915        assert!(
4916            prompt.contains("Effort Scaling"),
4917            "prompt should contain Effort Scaling even without squads: {prompt}"
4918        );
4919    }
4920
4921    #[tokio::test]
4922    async fn delegate_forwards_on_event_to_sub_agents() {
4923        // Verify that on_event receives events from sub-agents (not just orchestrator)
4924        let provider = Arc::new(MockProvider::new(vec![
4925            // 1: Orchestrator decides to delegate
4926            CompletionResponse {
4927                content: vec![ContentBlock::ToolUse {
4928                    id: "call-1".into(),
4929                    name: "delegate_task".into(),
4930                    input: json!({
4931                        "tasks": [
4932                            {"agent": "worker", "task": "do work"}
4933                        ]
4934                    }),
4935                }],
4936                stop_reason: StopReason::ToolUse,
4937                usage: TokenUsage::default(),
4938                model: None,
4939            },
4940            // 2: Sub-agent "worker" response
4941            CompletionResponse {
4942                content: vec![ContentBlock::Text {
4943                    text: "done".into(),
4944                }],
4945                stop_reason: StopReason::EndTurn,
4946                usage: TokenUsage::default(),
4947                model: None,
4948            },
4949            // 3: Orchestrator synthesis
4950            CompletionResponse {
4951                content: vec![ContentBlock::Text {
4952                    text: "All done.".into(),
4953                }],
4954                stop_reason: StopReason::EndTurn,
4955                usage: TokenUsage::default(),
4956                model: None,
4957            },
4958        ]));
4959
4960        let events = Arc::new(Mutex::new(Vec::<AgentEvent>::new()));
4961        let events_clone = events.clone();
4962        let on_event: Arc<OnEvent> = Arc::new(move |event: AgentEvent| {
4963            events_clone.lock().expect("test lock").push(event);
4964        });
4965
4966        let mut orch = Orchestrator::builder(provider)
4967            .sub_agent("worker", "Worker agent", "You do work.")
4968            .on_event(on_event)
4969            .build()
4970            .unwrap();
4971
4972        let _output = orch.run("delegate some work").await.unwrap();
4973
4974        let events = events.lock().expect("test lock");
4975
4976        // Should have events from both "orchestrator" and "worker"
4977        let orchestrator_events: Vec<_> = events
4978            .iter()
4979            .filter(|e| match e {
4980                AgentEvent::RunStarted { agent, .. }
4981                | AgentEvent::TurnStarted { agent, .. }
4982                | AgentEvent::LlmResponse { agent, .. }
4983                | AgentEvent::RunCompleted { agent, .. } => agent == "orchestrator",
4984                _ => false,
4985            })
4986            .collect();
4987        let worker_events: Vec<_> = events
4988            .iter()
4989            .filter(|e| match e {
4990                AgentEvent::RunStarted { agent, .. }
4991                | AgentEvent::TurnStarted { agent, .. }
4992                | AgentEvent::LlmResponse { agent, .. }
4993                | AgentEvent::RunCompleted { agent, .. } => agent == "worker",
4994                _ => false,
4995            })
4996            .collect();
4997
4998        assert!(
4999            !orchestrator_events.is_empty(),
5000            "should have orchestrator events"
5001        );
5002        assert!(
5003            !worker_events.is_empty(),
5004            "should have sub-agent worker events (forwarded via on_event)"
5005        );
5006
5007        // Verify worker had a RunStarted event
5008        let worker_run_started = events
5009            .iter()
5010            .any(|e| matches!(e, AgentEvent::RunStarted { agent, .. } if agent == "worker"));
5011        assert!(
5012            worker_run_started,
5013            "sub-agent should emit RunStarted via forwarded on_event"
5014        );
5015    }
5016
5017    /// Complex end-to-end audit trail test.
5018    ///
5019    /// Scenario:
5020    ///   1. Orchestrator delegates to 2 sub-agents: "researcher" (has web_search tool)
5021    ///      and "coder" (has read_file tool)
5022    ///   2. Each sub-agent calls its tool, produces output
5023    ///   3. Orchestrator synthesizes results
5024    ///
5025    /// Verifies:
5026    ///   - Complete event stream ordering
5027    ///   - Agent names on every event
5028    ///   - LlmResponse contains text, latency_ms > 0, model name
5029    ///   - ToolCallStarted contains input JSON
5030    ///   - ToolCallCompleted contains output content
5031    ///   - Sub-agent events are forwarded (not just orchestrator events)
5032    ///   - SubAgentsDispatched + SubAgentCompleted bracket sub-agent work
5033    ///   - Token usage rolls up correctly
5034    ///   - Truncation works for oversized tool output
5035    #[tokio::test]
5036    async fn full_audit_trail_end_to_end() {
5037        // Build a long tool output to test truncation
5038        let long_output = "x".repeat(70_000); // > EVENT_MAX_PAYLOAD_BYTES (65536)
5039
5040        let provider = Arc::new(MockProvider::new(vec![
5041            // 1: Orchestrator decides to delegate to both agents (with reasoning text)
5042            CompletionResponse {
5043                content: vec![
5044                    ContentBlock::Text {
5045                        text: "I'll delegate to the researcher and coder.".into(),
5046                    },
5047                    ContentBlock::ToolUse {
5048                        id: "orch-call-1".into(),
5049                        name: "delegate_task".into(),
5050                        input: json!({
5051                            "tasks": [
5052                                {"agent": "researcher", "task": "Search for Rust concurrency patterns"},
5053                                {"agent": "coder", "task": "Read the main.rs file"}
5054                            ]
5055                        }),
5056                    },
5057                ],
5058                stop_reason: StopReason::ToolUse,
5059                usage: TokenUsage {
5060                    input_tokens: 100,
5061                    output_tokens: 40,
5062                    ..Default::default()
5063                },
5064                model: None,
5065            },
5066            // 2: Sub-agent "researcher" LLM response: calls web_search tool
5067            CompletionResponse {
5068                content: vec![
5069                    ContentBlock::Text {
5070                        text: "Let me search for Rust concurrency info.".into(),
5071                    },
5072                    ContentBlock::ToolUse {
5073                        id: "res-call-1".into(),
5074                        name: "web_search".into(),
5075                        input: json!({"query": "rust async concurrency"}),
5076                    },
5077                ],
5078                stop_reason: StopReason::ToolUse,
5079                usage: TokenUsage {
5080                    input_tokens: 20,
5081                    output_tokens: 10,
5082                    ..Default::default()
5083                },
5084                model: None,
5085            },
5086            // 3: Sub-agent "researcher" final response after tool result
5087            CompletionResponse {
5088                content: vec![ContentBlock::Text {
5089                    text: "Rust uses async/await with tokio for concurrency.".into(),
5090                }],
5091                stop_reason: StopReason::EndTurn,
5092                usage: TokenUsage {
5093                    input_tokens: 30,
5094                    output_tokens: 15,
5095                    ..Default::default()
5096                },
5097                model: None,
5098            },
5099            // 4: Sub-agent "coder" LLM response: calls read_file tool
5100            CompletionResponse {
5101                content: vec![
5102                    ContentBlock::Text {
5103                        text: "I'll read the main.rs file.".into(),
5104                    },
5105                    ContentBlock::ToolUse {
5106                        id: "cod-call-1".into(),
5107                        name: "read_file".into(),
5108                        input: json!({"path": "/src/main.rs"}),
5109                    },
5110                ],
5111                stop_reason: StopReason::ToolUse,
5112                usage: TokenUsage {
5113                    input_tokens: 15,
5114                    output_tokens: 8,
5115                    ..Default::default()
5116                },
5117                model: None,
5118            },
5119            // 5: Sub-agent "coder" final response after tool result
5120            CompletionResponse {
5121                content: vec![ContentBlock::Text {
5122                    text: "The main.rs contains the entry point.".into(),
5123                }],
5124                stop_reason: StopReason::EndTurn,
5125                usage: TokenUsage {
5126                    input_tokens: 25,
5127                    output_tokens: 12,
5128                    ..Default::default()
5129                },
5130                model: None,
5131            },
5132            // 6: Orchestrator synthesis
5133            CompletionResponse {
5134                content: vec![ContentBlock::Text {
5135                    text: "Combined analysis: Rust async is great for concurrency.".into(),
5136                }],
5137                stop_reason: StopReason::EndTurn,
5138                usage: TokenUsage {
5139                    input_tokens: 200,
5140                    output_tokens: 50,
5141                    ..Default::default()
5142                },
5143                model: None,
5144            },
5145        ]));
5146
5147        let events = Arc::new(Mutex::new(Vec::<AgentEvent>::new()));
5148        let events_clone = events.clone();
5149        let on_event: Arc<OnEvent> = Arc::new(move |event: AgentEvent| {
5150            events_clone.lock().expect("test lock").push(event);
5151        });
5152
5153        // Both agents get both tools so the test is resilient to JoinSet ordering.
5154        // (Sub-agents share a MockProvider and run concurrently — response order is non-deterministic.)
5155        let long_output_clone = long_output.clone();
5156        let shared_tools: Vec<Arc<dyn Tool>> = vec![
5157            Arc::new(MockTool::new("web_search", &long_output_clone)),
5158            Arc::new(MockTool::new(
5159                "read_file",
5160                "fn main() { println!(\"hello\"); }",
5161            )),
5162        ];
5163        let mut orch = Orchestrator::builder(provider)
5164            .sub_agent_with_tools(
5165                "researcher",
5166                "Research specialist",
5167                "You research topics.",
5168                shared_tools.clone(),
5169            )
5170            .sub_agent_with_tools(
5171                "coder",
5172                "Code expert",
5173                "You read and analyze code.",
5174                shared_tools.clone(),
5175            )
5176            .on_event(on_event)
5177            .build()
5178            .unwrap();
5179
5180        let output = orch
5181            .run("Analyze Rust concurrency and the main.rs file")
5182            .await
5183            .unwrap();
5184
5185        // === Verify final output ===
5186        assert_eq!(
5187            output.result,
5188            "Combined analysis: Rust async is great for concurrency."
5189        );
5190        assert_eq!(output.tool_calls_made, 1); // orchestrator made 1 delegate_task call
5191
5192        // === Collect and categorize events ===
5193        let events = events.lock().expect("test lock");
5194
5195        // Helper to extract agent name from any event
5196        fn agent_of(e: &AgentEvent) -> &str {
5197            match e {
5198                AgentEvent::RunStarted { agent, .. }
5199                | AgentEvent::TurnStarted { agent, .. }
5200                | AgentEvent::LlmResponse { agent, .. }
5201                | AgentEvent::ToolCallStarted { agent, .. }
5202                | AgentEvent::ToolCallCompleted { agent, .. }
5203                | AgentEvent::RunCompleted { agent, .. }
5204                | AgentEvent::RunFailed { agent, .. }
5205                | AgentEvent::SubAgentsDispatched { agent, .. }
5206                | AgentEvent::SubAgentCompleted { agent, .. }
5207                | AgentEvent::ApprovalRequested { agent, .. }
5208                | AgentEvent::ApprovalDecision { agent, .. }
5209                | AgentEvent::ContextSummarized { agent, .. }
5210                | AgentEvent::GuardrailDenied { agent, .. }
5211                | AgentEvent::GuardrailWarned { agent, .. }
5212                | AgentEvent::RetryAttempt { agent, .. }
5213                | AgentEvent::DoomLoopDetected { agent, .. }
5214                | AgentEvent::FuzzyDoomLoopDetected { agent, .. }
5215                | AgentEvent::AutoCompactionTriggered { agent, .. }
5216                | AgentEvent::SessionPruned { agent, .. }
5217                | AgentEvent::ModelEscalated { agent, .. }
5218                | AgentEvent::BudgetExceeded { agent, .. }
5219                | AgentEvent::AgentSpawned { agent, .. }
5220                | AgentEvent::KillSwitchActivated { agent, .. }
5221                | AgentEvent::ToolNameRepaired { agent, .. } => agent,
5222                AgentEvent::SensorEventProcessed { sensor_name, .. } => sensor_name,
5223                AgentEvent::StoryUpdated { story_id, .. } => story_id,
5224                AgentEvent::TaskRouted { decision, .. } => decision,
5225                AgentEvent::WorkflowNodeStarted { node, .. }
5226                | AgentEvent::WorkflowNodeCompleted { node, .. }
5227                | AgentEvent::WorkflowNodeFailed { node, .. } => node,
5228            }
5229        }
5230
5231        // Print event stream for debugging
5232        let event_summary: Vec<String> = events
5233            .iter()
5234            .enumerate()
5235            .map(|(i, e)| format!("{i}: [{:>12}] {:?}", agent_of(e), std::mem::discriminant(e)))
5236            .collect();
5237
5238        // === 1. Verify we have events from all 3 agents ===
5239        let agents_seen: std::collections::HashSet<&str> = events.iter().map(agent_of).collect();
5240        assert!(
5241            agents_seen.contains("orchestrator"),
5242            "missing orchestrator events.\nEvent stream:\n{}",
5243            event_summary.join("\n")
5244        );
5245        assert!(
5246            agents_seen.contains("researcher"),
5247            "missing researcher events (should be forwarded).\nEvent stream:\n{}",
5248            event_summary.join("\n")
5249        );
5250        assert!(
5251            agents_seen.contains("coder"),
5252            "missing coder events (should be forwarded).\nEvent stream:\n{}",
5253            event_summary.join("\n")
5254        );
5255
5256        // === 2. Verify orchestrator event sequence ===
5257        let orch_events: Vec<&AgentEvent> = events
5258            .iter()
5259            .filter(|e| agent_of(e) == "orchestrator")
5260            .collect();
5261
5262        // First orchestrator event: RunStarted
5263        assert!(
5264            matches!(orch_events[0], AgentEvent::RunStarted { task, .. } if task.contains("Analyze Rust")),
5265            "first orch event should be RunStarted, got: {:?}",
5266            orch_events[0]
5267        );
5268        // Last orchestrator event: RunCompleted
5269        assert!(
5270            matches!(orch_events.last().unwrap(), AgentEvent::RunCompleted { .. }),
5271            "last orch event should be RunCompleted, got: {:?}",
5272            orch_events.last().unwrap()
5273        );
5274
5275        // === 3. Verify LlmResponse events have text, latency, and model ===
5276        let llm_responses: Vec<&AgentEvent> = events
5277            .iter()
5278            .filter(|e| matches!(e, AgentEvent::LlmResponse { .. }))
5279            .collect();
5280        assert!(
5281            llm_responses.len() >= 3,
5282            "expected >= 3 LlmResponse events (1 orch + at least 1 per sub-agent), got {}.\nEvents:\n{}",
5283            llm_responses.len(),
5284            event_summary.join("\n")
5285        );
5286
5287        for llm_event in &llm_responses {
5288            match llm_event {
5289                AgentEvent::LlmResponse {
5290                    agent, text, model, ..
5291                } => {
5292                    // model_name should always be present (MockProvider returns "mock-model-v1")
5293                    assert_eq!(
5294                        model.as_deref(),
5295                        Some("mock-model-v1"),
5296                        "LlmResponse for '{agent}' should have model name"
5297                    );
5298                    // text should be non-empty (all our mock responses produce content)
5299                    assert!(
5300                        !text.is_empty(),
5301                        "LlmResponse for '{agent}' should have non-empty text"
5302                    );
5303                }
5304                _ => unreachable!(),
5305            }
5306        }
5307
5308        // === 4. Verify ToolCallStarted events have input ===
5309        let tool_started: Vec<&AgentEvent> = events
5310            .iter()
5311            .filter(|e| matches!(e, AgentEvent::ToolCallStarted { .. }))
5312            .collect();
5313        // Expect 3 tool calls: 1 delegate_task (orch) + 1 web_search (researcher) + 1 read_file (coder)
5314        assert!(
5315            tool_started.len() >= 3,
5316            "expected >= 3 ToolCallStarted events, got {}.\nEvents:\n{}",
5317            tool_started.len(),
5318            event_summary.join("\n")
5319        );
5320
5321        // Find the web_search ToolCallStarted and verify input contains the query.
5322        // Note: agent name is NOT asserted because JoinSet ordering is non-deterministic.
5323        let web_search_started = tool_started.iter().find(|e| {
5324            matches!(e, AgentEvent::ToolCallStarted { tool_name, .. } if tool_name == "web_search")
5325        });
5326        assert!(
5327            web_search_started.is_some(),
5328            "should have a web_search ToolCallStarted"
5329        );
5330        match web_search_started.unwrap() {
5331            AgentEvent::ToolCallStarted { input, .. } => {
5332                assert!(
5333                    input.contains("rust async concurrency"),
5334                    "web_search input should contain query, got: {input}"
5335                );
5336            }
5337            _ => unreachable!(),
5338        }
5339
5340        // Find the read_file ToolCallStarted and verify input
5341        let read_file_started = tool_started.iter().find(|e| {
5342            matches!(e, AgentEvent::ToolCallStarted { tool_name, .. } if tool_name == "read_file")
5343        });
5344        assert!(
5345            read_file_started.is_some(),
5346            "should have a read_file ToolCallStarted"
5347        );
5348        match read_file_started.unwrap() {
5349            AgentEvent::ToolCallStarted { input, .. } => {
5350                assert!(
5351                    input.contains("/src/main.rs"),
5352                    "read_file input should contain path, got: {input}"
5353                );
5354            }
5355            _ => unreachable!(),
5356        }
5357
5358        // Find the delegate_task ToolCallStarted and verify input
5359        let delegate_started = tool_started.iter().find(|e| {
5360            matches!(e, AgentEvent::ToolCallStarted { tool_name, .. } if tool_name == "delegate_task")
5361        });
5362        assert!(
5363            delegate_started.is_some(),
5364            "should have a delegate_task ToolCallStarted"
5365        );
5366        match delegate_started.unwrap() {
5367            AgentEvent::ToolCallStarted { agent, input, .. } => {
5368                assert_eq!(agent, "orchestrator");
5369                assert!(
5370                    input.contains("researcher"),
5371                    "delegate_task input should contain agent names, got: {input}"
5372                );
5373            }
5374            _ => unreachable!(),
5375        }
5376
5377        // === 5. Verify ToolCallCompleted events have output ===
5378        let tool_completed: Vec<&AgentEvent> = events
5379            .iter()
5380            .filter(|e| matches!(e, AgentEvent::ToolCallCompleted { .. }))
5381            .collect();
5382        assert!(
5383            tool_completed.len() >= 3,
5384            "expected >= 3 ToolCallCompleted events, got {}",
5385            tool_completed.len()
5386        );
5387
5388        // Find the web_search completion and verify truncation
5389        let web_search_completed = tool_completed.iter().find(|e| {
5390            matches!(e, AgentEvent::ToolCallCompleted { tool_name, .. } if tool_name == "web_search")
5391        });
5392        assert!(
5393            web_search_completed.is_some(),
5394            "should have a web_search ToolCallCompleted"
5395        );
5396        match web_search_completed.unwrap() {
5397            AgentEvent::ToolCallCompleted {
5398                output, is_error, ..
5399            } => {
5400                assert!(!is_error);
5401                // The web_search tool returns 70_000 bytes, exceeds EVENT_MAX_PAYLOAD_BYTES (65536)
5402                assert!(
5403                    output.contains("[truncated:"),
5404                    "web_search output (70000 bytes) should be truncated in event, got {} bytes: {}",
5405                    output.len(),
5406                    &output[..output.len().min(100)]
5407                );
5408            }
5409            _ => unreachable!(),
5410        }
5411
5412        // Find the read_file completion — should NOT be truncated (short output)
5413        let read_file_completed = tool_completed.iter().find(|e| {
5414            matches!(e, AgentEvent::ToolCallCompleted { tool_name, .. } if tool_name == "read_file")
5415        });
5416        assert!(
5417            read_file_completed.is_some(),
5418            "should have a read_file ToolCallCompleted"
5419        );
5420        match read_file_completed.unwrap() {
5421            AgentEvent::ToolCallCompleted {
5422                output, is_error, ..
5423            } => {
5424                assert!(!is_error);
5425                assert!(
5426                    output.contains("fn main()"),
5427                    "read_file output should contain file content, got: {output}"
5428                );
5429                assert!(
5430                    !output.contains("[truncated:"),
5431                    "read_file output should NOT be truncated"
5432                );
5433            }
5434            _ => unreachable!(),
5435        }
5436
5437        // === 6. Verify SubAgentsDispatched and SubAgentCompleted events ===
5438        let dispatched: Vec<&AgentEvent> = events
5439            .iter()
5440            .filter(|e| matches!(e, AgentEvent::SubAgentsDispatched { .. }))
5441            .collect();
5442        assert_eq!(dispatched.len(), 1, "expected 1 SubAgentsDispatched event");
5443        match dispatched[0] {
5444            AgentEvent::SubAgentsDispatched { agents, .. } => {
5445                assert!(
5446                    agents.contains(&"researcher".to_string()),
5447                    "dispatched agents should include researcher"
5448                );
5449                assert!(
5450                    agents.contains(&"coder".to_string()),
5451                    "dispatched agents should include coder"
5452                );
5453            }
5454            _ => unreachable!(),
5455        }
5456
5457        let completed: Vec<&AgentEvent> = events
5458            .iter()
5459            .filter(|e| matches!(e, AgentEvent::SubAgentCompleted { .. }))
5460            .collect();
5461        assert_eq!(
5462            completed.len(),
5463            2,
5464            "expected 2 SubAgentCompleted events (one per sub-agent)"
5465        );
5466        for c in &completed {
5467            match c {
5468                AgentEvent::SubAgentCompleted { success, agent, .. } => {
5469                    assert!(success, "sub-agent '{agent}' should succeed");
5470                }
5471                _ => unreachable!(),
5472            }
5473        }
5474
5475        // === 7. Verify event ordering: RunStarted is always first for each agent ===
5476        for agent_name in &["orchestrator", "researcher", "coder"] {
5477            let agent_events: Vec<&AgentEvent> = events
5478                .iter()
5479                .filter(|e| agent_of(e) == *agent_name)
5480                .collect();
5481            if !agent_events.is_empty() {
5482                assert!(
5483                    matches!(agent_events[0], AgentEvent::RunStarted { .. }),
5484                    "first event for '{agent_name}' should be RunStarted, got: {:?}",
5485                    agent_events[0]
5486                );
5487            }
5488        }
5489
5490        // === 8. Verify token roll-up ===
5491        // Orchestrator: 100+200 input, 40+50 output
5492        // Researcher: 20+30 input, 10+15 output
5493        // Coder: 15+25 input, 8+12 output
5494        // Total: 390 input, 135 output
5495        assert_eq!(
5496            output.tokens_used.input_tokens,
5497            100 + 200 + 20 + 30 + 15 + 25,
5498            "total input tokens should include orchestrator + sub-agents"
5499        );
5500        assert_eq!(
5501            output.tokens_used.output_tokens,
5502            40 + 50 + 10 + 15 + 8 + 12,
5503            "total output tokens should include orchestrator + sub-agents"
5504        );
5505
5506        // === 9. Verify a sub-agent LlmResponse contains research text ===
5507        // (Agent name not asserted due to JoinSet ordering non-determinism)
5508        let sub_agent_llm = llm_responses.iter().find(|e| {
5509            matches!(e, AgentEvent::LlmResponse { text, .. }
5510                if text.contains("async/await"))
5511        });
5512        assert!(
5513            sub_agent_llm.is_some(),
5514            "should have a sub-agent LlmResponse with text about async/await"
5515        );
5516
5517        // === 10. Verify total event count is reasonable ===
5518        // Minimum: 3 agents × (RunStarted + TurnStarted + LlmResponse) + tool events + completion events
5519        // Orchestrator: RunStarted, TurnStarted, LlmResponse, ToolCallStarted(delegate), ToolCallCompleted(delegate),
5520        //               TurnStarted, LlmResponse, RunCompleted = ~8
5521        // Researcher:   RunStarted, TurnStarted, LlmResponse, ToolCallStarted, ToolCallCompleted,
5522        //               TurnStarted, LlmResponse, RunCompleted = ~8
5523        // Coder:        same = ~8
5524        // + SubAgentsDispatched + 2× SubAgentCompleted = 3
5525        // Total ~27 events
5526        assert!(
5527            events.len() >= 20,
5528            "expected at least 20 events for full audit trail, got {}.\nEvents:\n{}",
5529            events.len(),
5530            event_summary.join("\n")
5531        );
5532    }
5533
5534    #[tokio::test]
5535    async fn sub_agent_run_timeout_fires_when_configured() {
5536        // Provider that responds immediately for the orchestrator (delegate call),
5537        // then hangs forever for the sub-agent (simulating a slow LLM),
5538        // then synthesizes the timeout error result.
5539        let provider = Arc::new(MockProvider::new(vec![
5540            // 1: Orchestrator decides to delegate to "slow-agent"
5541            CompletionResponse {
5542                content: vec![ContentBlock::ToolUse {
5543                    id: "call-1".into(),
5544                    name: "delegate_task".into(),
5545                    input: json!({
5546                        "tasks": [{"agent": "slow-agent", "task": "do something"}]
5547                    }),
5548                }],
5549                stop_reason: StopReason::ToolUse,
5550                usage: TokenUsage::default(),
5551                model: None,
5552            },
5553            // 2: Sub-agent "slow-agent" — hangs forever (timeout will fire)
5554            // This response won't be consumed because the provider will run out
5555            // But we need a 3rd response for the orchestrator's synthesis turn.
5556        ]));
5557
5558        // Use a separate SlowProvider for the sub-agent via provider override
5559        struct SlowProvider;
5560        impl LlmProvider for SlowProvider {
5561            async fn complete(
5562                &self,
5563                _request: CompletionRequest,
5564            ) -> Result<CompletionResponse, Error> {
5565                tokio::time::sleep(Duration::from_secs(3600)).await;
5566                unreachable!()
5567            }
5568        }
5569        let slow_provider = Arc::new(BoxedProvider::new(SlowProvider));
5570
5571        let mut orch = Orchestrator::builder(provider)
5572            .sub_agent_full(SubAgentConfig {
5573                name: "slow-agent".into(),
5574                description: "A slow agent".into(),
5575                system_prompt: "sys".into(),
5576                tools: vec![],
5577                context_strategy: None,
5578                summarize_threshold: None,
5579                tool_timeout: None,
5580                max_tool_output_bytes: None,
5581                max_turns: None,
5582                max_tokens: None,
5583                response_schema: None,
5584                run_timeout: Some(Duration::from_millis(100)),
5585                guardrails: vec![],
5586                provider: Some(slow_provider),
5587                reasoning_effort: None,
5588                enable_reflection: None,
5589                tool_output_compression_threshold: None,
5590                max_tools_per_turn: None,
5591                tool_profile: None,
5592                max_identical_tool_calls: None,
5593                max_fuzzy_identical_tool_calls: None,
5594                max_tool_calls_per_turn: None,
5595                session_prune_config: None,
5596                enable_recursive_summarization: None,
5597                reflection_threshold: None,
5598                consolidate_on_exit: None,
5599                workspace: None,
5600                max_total_tokens: None,
5601                audit_trail: None,
5602                audit_user_id: None,
5603                audit_tenant_id: None,
5604                audit_delegation_chain: Vec::new(),
5605            })
5606            .build()
5607            .unwrap();
5608
5609        // The orchestrator will delegate, the sub-agent will timeout,
5610        // and the error propagates back as a delegate_task tool result.
5611        // The orchestrator then tries to synthesize but has no more responses.
5612        let result = orch.run("go").await;
5613        // The orchestrator either returns an error (no more mock responses for synthesis)
5614        // or the result mentions timeout. Either way, the sub-agent's run_timeout fired.
5615        match result {
5616            Ok(output) => {
5617                // If somehow we got a result, it should mention timeout
5618                assert!(
5619                    output.result.contains("timeout") || output.result.contains("Timeout"),
5620                    "expected timeout in result, got: {}",
5621                    output.result
5622                );
5623            }
5624            Err(e) => {
5625                // The orchestrator ran out of mock responses after the sub-agent timed out,
5626                // which is fine — it confirms the sub-agent timeout was wired.
5627                let msg = e.to_string();
5628                assert!(
5629                    msg.contains("no more mock responses")
5630                        || msg.contains("timeout")
5631                        || msg.contains("Timeout"),
5632                    "expected timeout-related error, got: {msg}"
5633                );
5634            }
5635        }
5636    }
5637
5638    /// Complex squad integration test exercising:
5639    /// - 3 squad agents with mixed capabilities (tools, plain, failing)
5640    /// - Private blackboard isolation from outer blackboard
5641    /// - Full event capture and audit trail analysis
5642    /// - Tool execution within a squad member (multi-turn agent)
5643    /// - Graceful error handling when one squad member fails
5644    /// - Token roll-up from successful + partial-failure squad members
5645    /// - Event ordering invariants (RunStarted first per agent, SubAgentsDispatched before SubAgentCompleted)
5646    #[tokio::test]
5647    async fn form_squad_complex_with_tools_events_and_failure() {
5648        use crate::agent::blackboard::InMemoryBlackboard;
5649
5650        let outer_bb = Arc::new(InMemoryBlackboard::new());
5651
5652        // The "reviewer" agent gets a dedicated failing provider (empty responses).
5653        let failing_provider = Arc::new(BoxedProvider::new(MockProvider::new(vec![])));
5654
5655        // Shared provider serves orchestrator + planner + worker (in call order).
5656        // worker is multi-turn: calls `compute` tool, then produces final text.
5657        let provider = Arc::new(MockProvider::new(vec![
5658            // 1: Orchestrator forms a squad of planner + worker + reviewer
5659            CompletionResponse {
5660                content: vec![ContentBlock::ToolUse {
5661                    id: "orch-call-1".into(),
5662                    name: "form_squad".into(),
5663                    input: json!({
5664                        "tasks": [
5665                            {"agent": "planner", "task": "Create a plan for the analysis"},
5666                            {"agent": "worker", "task": "Compute the metrics"},
5667                            {"agent": "reviewer", "task": "Review all findings"}
5668                        ]
5669                    }),
5670                }],
5671                stop_reason: StopReason::ToolUse,
5672                usage: TokenUsage {
5673                    input_tokens: 100,
5674                    output_tokens: 40,
5675                    cache_creation_input_tokens: 5,
5676                    cache_read_input_tokens: 3,
5677                    reasoning_tokens: 0,
5678                },
5679                model: None,
5680            },
5681            // 2: Squad member "planner" responds immediately (single turn)
5682            CompletionResponse {
5683                content: vec![ContentBlock::Text {
5684                    text: "Plan: Step 1 gather data, Step 2 compute, Step 3 review.".into(),
5685                }],
5686                stop_reason: StopReason::EndTurn,
5687                usage: TokenUsage {
5688                    input_tokens: 20,
5689                    output_tokens: 15,
5690                    reasoning_tokens: 8,
5691                    ..Default::default()
5692                },
5693                model: None,
5694            },
5695            // 3: Squad member "worker" calls the `compute` tool (turn 1)
5696            CompletionResponse {
5697                content: vec![
5698                    ContentBlock::Text {
5699                        text: "I'll compute the metrics now.".into(),
5700                    },
5701                    ContentBlock::ToolUse {
5702                        id: "worker-call-1".into(),
5703                        name: "compute".into(),
5704                        input: json!({"expression": "42 * 17"}),
5705                    },
5706                ],
5707                stop_reason: StopReason::ToolUse,
5708                usage: TokenUsage {
5709                    input_tokens: 25,
5710                    output_tokens: 12,
5711                    ..Default::default()
5712                },
5713                model: None,
5714            },
5715            // 4: Squad member "worker" produces final text after tool result (turn 2)
5716            CompletionResponse {
5717                content: vec![ContentBlock::Text {
5718                    text: "Computation result: 714. Analysis complete.".into(),
5719                }],
5720                stop_reason: StopReason::EndTurn,
5721                usage: TokenUsage {
5722                    input_tokens: 35,
5723                    output_tokens: 18,
5724                    ..Default::default()
5725                },
5726                model: None,
5727            },
5728            // 5: (reviewer uses failing_provider, not this queue)
5729            // 6: Orchestrator recovers — synthesizes from partial squad results
5730            CompletionResponse {
5731                content: vec![ContentBlock::Text {
5732                    text: "Squad partial success: plan and computation done, review failed.".into(),
5733                }],
5734                stop_reason: StopReason::EndTurn,
5735                usage: TokenUsage {
5736                    input_tokens: 200,
5737                    output_tokens: 60,
5738                    ..Default::default()
5739                },
5740                model: None,
5741            },
5742        ]));
5743
5744        // Capture all events
5745        let events = Arc::new(Mutex::new(Vec::<AgentEvent>::new()));
5746        let events_clone = events.clone();
5747        let on_event: Arc<OnEvent> = Arc::new(move |event: AgentEvent| {
5748            events_clone.lock().expect("test lock").push(event);
5749        });
5750
5751        // Both planner and worker get the compute tool so shared MockProvider
5752        // response ordering is resilient to JoinSet non-determinism.
5753        let compute_tool: Arc<dyn Tool> = Arc::new(MockTool::new("compute", "714"));
5754
5755        let mut orch = Orchestrator::builder(provider)
5756            .sub_agent_with_tools(
5757                "planner",
5758                "Planning specialist",
5759                "You create plans.",
5760                vec![compute_tool.clone()],
5761            )
5762            .sub_agent_with_tools(
5763                "worker",
5764                "Computation worker",
5765                "You compute metrics.",
5766                vec![compute_tool.clone()],
5767            )
5768            .sub_agent_full(SubAgentConfig {
5769                name: "reviewer".into(),
5770                description: "Review specialist".into(),
5771                system_prompt: "You review findings.".into(),
5772                tools: vec![],
5773                context_strategy: None,
5774                summarize_threshold: None,
5775                tool_timeout: None,
5776                max_tool_output_bytes: None,
5777                max_turns: None,
5778                max_tokens: None,
5779                response_schema: None,
5780                run_timeout: None,
5781                guardrails: vec![],
5782                provider: Some(failing_provider),
5783                reasoning_effort: None,
5784                enable_reflection: None,
5785                tool_output_compression_threshold: None,
5786                max_tools_per_turn: None,
5787                tool_profile: None,
5788                max_identical_tool_calls: None,
5789                max_fuzzy_identical_tool_calls: None,
5790                max_tool_calls_per_turn: None,
5791                session_prune_config: None,
5792                enable_recursive_summarization: None,
5793                reflection_threshold: None,
5794                consolidate_on_exit: None,
5795                workspace: None,
5796                max_total_tokens: None,
5797                audit_trail: None,
5798                audit_user_id: None,
5799                audit_tenant_id: None,
5800                audit_delegation_chain: Vec::new(),
5801            })
5802            .blackboard(outer_bb.clone())
5803            .on_event(on_event)
5804            .build()
5805            .unwrap();
5806
5807        let output = orch.run("Analyze the system performance").await.unwrap();
5808
5809        // === 1. Verify final output ===
5810        assert_eq!(
5811            output.result,
5812            "Squad partial success: plan and computation done, review failed."
5813        );
5814
5815        // === 2. Verify token roll-up ===
5816        // Orchestrator: 100+200 in, 40+60 out, 5 cache_create, 3 cache_read
5817        // Planner: 20 in, 15 out, 8 reasoning
5818        // Worker: 25+35 in, 12+18 out
5819        // Reviewer: 0 (failed before any response)
5820        let expected_input = 100 + 200 + 20 + 25 + 35;
5821        let expected_output = 40 + 60 + 15 + 12 + 18;
5822        assert_eq!(
5823            output.tokens_used.input_tokens, expected_input,
5824            "input tokens should sum orchestrator + planner + worker (reviewer failed)"
5825        );
5826        assert_eq!(
5827            output.tokens_used.output_tokens, expected_output,
5828            "output tokens should sum orchestrator + planner + worker"
5829        );
5830        assert_eq!(
5831            output.tokens_used.reasoning_tokens, 8,
5832            "reasoning tokens should come from planner"
5833        );
5834        assert_eq!(
5835            output.tokens_used.cache_creation_input_tokens, 5,
5836            "cache creation tokens from orchestrator"
5837        );
5838        assert_eq!(
5839            output.tokens_used.cache_read_input_tokens, 3,
5840            "cache read tokens from orchestrator"
5841        );
5842
5843        // === 3. Verify blackboard writes ===
5844        // Squad result should be on the outer blackboard under "squad:planner+worker+reviewer"
5845        let squad_key = "squad:planner+worker+reviewer";
5846        let squad_val = outer_bb.read(squad_key).await.unwrap();
5847        assert!(
5848            squad_val.is_some(),
5849            "outer blackboard should have squad result under '{squad_key}'"
5850        );
5851        let squad_text = squad_val.unwrap().to_string();
5852        // Planner's result should appear
5853        assert!(
5854            squad_text.contains("Plan: Step 1"),
5855            "squad result should include planner's output"
5856        );
5857        // Worker's result should appear
5858        assert!(
5859            squad_text.contains("Computation result: 714"),
5860            "squad result should include worker's output"
5861        );
5862        // Reviewer's error should appear
5863        assert!(
5864            squad_text.contains("Error"),
5865            "squad result should include reviewer's error"
5866        );
5867
5868        // Private blackboard keys should NOT be on the outer blackboard
5869        assert!(
5870            outer_bb.read("agent:planner").await.unwrap().is_none(),
5871            "outer blackboard should NOT have agent:planner"
5872        );
5873        assert!(
5874            outer_bb.read("agent:worker").await.unwrap().is_none(),
5875            "outer blackboard should NOT have agent:worker"
5876        );
5877
5878        // === 4. Analyze events ===
5879        let events = events.lock().expect("test lock");
5880
5881        fn agent_of(e: &AgentEvent) -> &str {
5882            match e {
5883                AgentEvent::RunStarted { agent, .. }
5884                | AgentEvent::TurnStarted { agent, .. }
5885                | AgentEvent::LlmResponse { agent, .. }
5886                | AgentEvent::ToolCallStarted { agent, .. }
5887                | AgentEvent::ToolCallCompleted { agent, .. }
5888                | AgentEvent::RunCompleted { agent, .. }
5889                | AgentEvent::RunFailed { agent, .. }
5890                | AgentEvent::SubAgentsDispatched { agent, .. }
5891                | AgentEvent::SubAgentCompleted { agent, .. }
5892                | AgentEvent::ApprovalRequested { agent, .. }
5893                | AgentEvent::ApprovalDecision { agent, .. }
5894                | AgentEvent::ContextSummarized { agent, .. }
5895                | AgentEvent::GuardrailDenied { agent, .. }
5896                | AgentEvent::GuardrailWarned { agent, .. }
5897                | AgentEvent::RetryAttempt { agent, .. }
5898                | AgentEvent::DoomLoopDetected { agent, .. }
5899                | AgentEvent::FuzzyDoomLoopDetected { agent, .. }
5900                | AgentEvent::AutoCompactionTriggered { agent, .. }
5901                | AgentEvent::SessionPruned { agent, .. }
5902                | AgentEvent::ModelEscalated { agent, .. }
5903                | AgentEvent::BudgetExceeded { agent, .. }
5904                | AgentEvent::AgentSpawned { agent, .. }
5905                | AgentEvent::KillSwitchActivated { agent, .. }
5906                | AgentEvent::ToolNameRepaired { agent, .. } => agent,
5907                AgentEvent::SensorEventProcessed { sensor_name, .. } => sensor_name,
5908                AgentEvent::StoryUpdated { story_id, .. } => story_id,
5909                AgentEvent::TaskRouted { decision, .. } => decision,
5910                AgentEvent::WorkflowNodeStarted { node, .. }
5911                | AgentEvent::WorkflowNodeCompleted { node, .. }
5912                | AgentEvent::WorkflowNodeFailed { node, .. } => node,
5913            }
5914        }
5915
5916        fn event_type(e: &AgentEvent) -> &'static str {
5917            match e {
5918                AgentEvent::RunStarted { .. } => "RunStarted",
5919                AgentEvent::TurnStarted { .. } => "TurnStarted",
5920                AgentEvent::LlmResponse { .. } => "LlmResponse",
5921                AgentEvent::ToolCallStarted { .. } => "ToolCallStarted",
5922                AgentEvent::ToolCallCompleted { .. } => "ToolCallCompleted",
5923                AgentEvent::RunCompleted { .. } => "RunCompleted",
5924                AgentEvent::RunFailed { .. } => "RunFailed",
5925                AgentEvent::SubAgentsDispatched { .. } => "SubAgentsDispatched",
5926                AgentEvent::SubAgentCompleted { .. } => "SubAgentCompleted",
5927                AgentEvent::ApprovalRequested { .. } => "ApprovalRequested",
5928                AgentEvent::ApprovalDecision { .. } => "ApprovalDecision",
5929                AgentEvent::ContextSummarized { .. } => "ContextSummarized",
5930                AgentEvent::GuardrailDenied { .. } => "GuardrailDenied",
5931                AgentEvent::GuardrailWarned { .. } => "GuardrailWarned",
5932                AgentEvent::RetryAttempt { .. } => "RetryAttempt",
5933                AgentEvent::DoomLoopDetected { .. } => "DoomLoopDetected",
5934                AgentEvent::FuzzyDoomLoopDetected { .. } => "FuzzyDoomLoopDetected",
5935                AgentEvent::AutoCompactionTriggered { .. } => "AutoCompactionTriggered",
5936                AgentEvent::SessionPruned { .. } => "SessionPruned",
5937                AgentEvent::SensorEventProcessed { .. } => "SensorEventProcessed",
5938                AgentEvent::StoryUpdated { .. } => "StoryUpdated",
5939                AgentEvent::TaskRouted { .. } => "TaskRouted",
5940                AgentEvent::ModelEscalated { .. } => "ModelEscalated",
5941                AgentEvent::BudgetExceeded { .. } => "BudgetExceeded",
5942                AgentEvent::AgentSpawned { .. } => "AgentSpawned",
5943                AgentEvent::KillSwitchActivated { .. } => "KillSwitchActivated",
5944                AgentEvent::WorkflowNodeStarted { .. } => "WorkflowNodeStarted",
5945                AgentEvent::WorkflowNodeCompleted { .. } => "WorkflowNodeCompleted",
5946                AgentEvent::WorkflowNodeFailed { .. } => "WorkflowNodeFailed",
5947                AgentEvent::ToolNameRepaired { .. } => "ToolNameRepaired",
5948            }
5949        }
5950
5951        let event_summary: Vec<String> = events
5952            .iter()
5953            .enumerate()
5954            .map(|(i, e)| format!("{i}: [{:>12}] {}", agent_of(e), event_type(e)))
5955            .collect();
5956        let event_log = event_summary.join("\n");
5957
5958        // 4a. Verify events from expected agents
5959        let agents_seen: std::collections::HashSet<&str> = events.iter().map(agent_of).collect();
5960        assert!(
5961            agents_seen.contains("orchestrator"),
5962            "missing orchestrator events.\n{event_log}"
5963        );
5964        // At least planner or worker should be present (JoinSet order non-deterministic)
5965        let has_planner = agents_seen.contains("planner");
5966        let has_worker = agents_seen.contains("worker");
5967        assert!(
5968            has_planner || has_worker,
5969            "should have events from at least one successful squad member.\n{event_log}"
5970        );
5971
5972        // 4b. SubAgentsDispatched should fire exactly once (from squad-leader)
5973        let dispatched: Vec<&AgentEvent> = events
5974            .iter()
5975            .filter(|e| matches!(e, AgentEvent::SubAgentsDispatched { .. }))
5976            .collect();
5977        assert_eq!(
5978            dispatched.len(),
5979            1,
5980            "expected exactly 1 SubAgentsDispatched event.\n{event_log}"
5981        );
5982        match dispatched[0] {
5983            AgentEvent::SubAgentsDispatched { agents, agent } => {
5984                assert_eq!(
5985                    agent, "squad-leader",
5986                    "form_squad uses 'squad-leader' label"
5987                );
5988                assert_eq!(agents.len(), 3, "should dispatch 3 squad members");
5989                assert!(agents.contains(&"planner".to_string()));
5990                assert!(agents.contains(&"worker".to_string()));
5991                assert!(agents.contains(&"reviewer".to_string()));
5992            }
5993            _ => unreachable!(),
5994        }
5995
5996        // 4c. SubAgentCompleted events: 3 per-agent + 1 aggregate = 4 total
5997        let completed: Vec<&AgentEvent> = events
5998            .iter()
5999            .filter(|e| matches!(e, AgentEvent::SubAgentCompleted { .. }))
6000            .collect();
6001        assert_eq!(
6002            completed.len(),
6003            4,
6004            "expected 4 SubAgentCompleted events (3 per-agent + 1 aggregate).\n{event_log}"
6005        );
6006
6007        // Per-agent completions: planner and worker succeed, reviewer fails
6008        let per_agent: Vec<&AgentEvent> = completed
6009            .iter()
6010            .filter(|e| {
6011                matches!(e, AgentEvent::SubAgentCompleted { agent, .. }
6012                    if !agent.starts_with("squad["))
6013            })
6014            .copied()
6015            .collect();
6016        assert_eq!(per_agent.len(), 3, "3 per-agent completion events");
6017
6018        // Find reviewer completion — should have success=false
6019        let reviewer_completed = per_agent.iter().find(
6020            |e| matches!(e, AgentEvent::SubAgentCompleted { agent, .. } if agent == "reviewer"),
6021        );
6022        assert!(
6023            reviewer_completed.is_some(),
6024            "should have reviewer SubAgentCompleted"
6025        );
6026        match reviewer_completed.unwrap() {
6027            AgentEvent::SubAgentCompleted { success, .. } => {
6028                assert!(!success, "reviewer should have failed");
6029            }
6030            _ => unreachable!(),
6031        }
6032
6033        // Find planner completion — should have success=true
6034        let planner_completed = per_agent.iter().find(
6035            |e| matches!(e, AgentEvent::SubAgentCompleted { agent, .. } if agent == "planner"),
6036        );
6037        assert!(
6038            planner_completed.is_some(),
6039            "should have planner SubAgentCompleted"
6040        );
6041        match planner_completed.unwrap() {
6042            AgentEvent::SubAgentCompleted { success, usage, .. } => {
6043                assert!(success, "planner should have succeeded");
6044                assert_eq!(usage.input_tokens, 20);
6045                assert_eq!(usage.output_tokens, 15);
6046                assert_eq!(usage.reasoning_tokens, 8);
6047            }
6048            _ => unreachable!(),
6049        }
6050
6051        // Aggregate squad completion event
6052        let squad_completed = completed.iter().find(|e| {
6053            matches!(e, AgentEvent::SubAgentCompleted { agent, .. }
6054                if agent.starts_with("squad["))
6055        });
6056        assert!(
6057            squad_completed.is_some(),
6058            "should have aggregate squad completion event.\n{event_log}"
6059        );
6060        match squad_completed.unwrap() {
6061            AgentEvent::SubAgentCompleted {
6062                agent,
6063                success,
6064                usage,
6065            } => {
6066                assert!(
6067                    agent.contains("planner")
6068                        && agent.contains("worker")
6069                        && agent.contains("reviewer"),
6070                    "aggregate label should list all agents: {agent}"
6071                );
6072                assert!(
6073                    !success,
6074                    "aggregate should be false because reviewer failed"
6075                );
6076                // Aggregate tokens = planner + worker + reviewer(0)
6077                assert_eq!(usage.input_tokens, 20 + 25 + 35, "aggregate input tokens");
6078                assert_eq!(usage.output_tokens, 15 + 12 + 18, "aggregate output tokens");
6079            }
6080            _ => unreachable!(),
6081        }
6082
6083        // 4d. Verify worker had tool events (ToolCallStarted + ToolCallCompleted for "compute")
6084        let tool_started: Vec<&AgentEvent> = events
6085            .iter()
6086            .filter(|e| {
6087                matches!(e, AgentEvent::ToolCallStarted { tool_name, .. } if tool_name == "compute")
6088            })
6089            .collect();
6090        // Due to JoinSet non-determinism, either planner or worker may call compute
6091        // (both have it). We just verify it was called.
6092        assert!(
6093            !tool_started.is_empty(),
6094            "should have at least one compute ToolCallStarted.\n{event_log}"
6095        );
6096
6097        let tool_completed: Vec<&AgentEvent> = events
6098            .iter()
6099            .filter(|e| {
6100                matches!(e, AgentEvent::ToolCallCompleted { tool_name, .. } if tool_name == "compute")
6101            })
6102            .collect();
6103        assert!(
6104            !tool_completed.is_empty(),
6105            "should have at least one compute ToolCallCompleted.\n{event_log}"
6106        );
6107        match tool_completed[0] {
6108            AgentEvent::ToolCallCompleted {
6109                output, is_error, ..
6110            } => {
6111                assert!(!is_error, "compute tool should succeed");
6112                assert!(
6113                    output.contains("714"),
6114                    "compute output should be '714', got: {output}"
6115                );
6116            }
6117            _ => unreachable!(),
6118        }
6119
6120        // 4e. RunStarted is first event for each agent
6121        for agent_name in &["orchestrator", "planner", "worker"] {
6122            let agent_events: Vec<&AgentEvent> = events
6123                .iter()
6124                .filter(|e| agent_of(e) == *agent_name)
6125                .collect();
6126            if !agent_events.is_empty() {
6127                assert!(
6128                    matches!(agent_events[0], AgentEvent::RunStarted { .. }),
6129                    "first event for '{agent_name}' should be RunStarted, got: {:?}\n{event_log}",
6130                    agent_events[0]
6131                );
6132            }
6133        }
6134
6135        // 4f. Reviewer should have RunStarted then RunFailed (provider error)
6136        let reviewer_events: Vec<&AgentEvent> = events
6137            .iter()
6138            .filter(|e| agent_of(e) == "reviewer")
6139            .collect();
6140        if !reviewer_events.is_empty() {
6141            assert!(
6142                matches!(reviewer_events[0], AgentEvent::RunStarted { .. }),
6143                "reviewer first event should be RunStarted"
6144            );
6145            // Check for RunFailed
6146            let has_failed = reviewer_events
6147                .iter()
6148                .any(|e| matches!(e, AgentEvent::RunFailed { .. }));
6149            assert!(
6150                has_failed,
6151                "reviewer should have a RunFailed event.\n{event_log}"
6152            );
6153        }
6154
6155        // 4g. SubAgentsDispatched appears before any SubAgentCompleted
6156        let dispatch_idx = events
6157            .iter()
6158            .position(|e| matches!(e, AgentEvent::SubAgentsDispatched { .. }));
6159        let first_completed_idx = events
6160            .iter()
6161            .position(|e| matches!(e, AgentEvent::SubAgentCompleted { .. }));
6162        if let (Some(d), Some(c)) = (dispatch_idx, first_completed_idx) {
6163            assert!(
6164                d < c,
6165                "SubAgentsDispatched (idx {d}) should precede SubAgentCompleted (idx {c})\n{event_log}"
6166            );
6167        }
6168
6169        // 4h. LlmResponse events should carry model info from MockProvider
6170        let llm_responses: Vec<&AgentEvent> = events
6171            .iter()
6172            .filter(|e| matches!(e, AgentEvent::LlmResponse { .. }))
6173            .collect();
6174        assert!(
6175            !llm_responses.is_empty(),
6176            "should have LlmResponse events.\n{event_log}"
6177        );
6178        for lr in &llm_responses {
6179            match lr {
6180                AgentEvent::LlmResponse { model, .. } => {
6181                    assert_eq!(
6182                        model.as_deref(),
6183                        Some("mock-model-v1"),
6184                        "LlmResponse should carry provider model name"
6185                    );
6186                }
6187                _ => unreachable!(),
6188            }
6189        }
6190
6191        // === 5. Verify total event count is reasonable ===
6192        // Orchestrator: RunStarted, TurnStarted, LlmResponse(squad call), ToolCallStarted(form_squad),
6193        //              ToolCallCompleted(form_squad), TurnStarted, LlmResponse(synthesis), RunCompleted = ~8
6194        // Planner: RunStarted, TurnStarted, LlmResponse, RunCompleted = ~4
6195        // Worker: RunStarted, TurnStarted, LlmResponse, ToolCallStarted(compute),
6196        //         ToolCallCompleted(compute), TurnStarted, LlmResponse, RunCompleted = ~8
6197        // Reviewer: RunStarted, TurnStarted(?), RunFailed = ~2-3
6198        // + SubAgentsDispatched + 3 SubAgentCompleted + 1 aggregate = 5
6199        // Total ~25-28 events
6200        assert!(
6201            events.len() >= 15,
6202            "expected at least 15 events for complex squad test, got {}.\n{event_log}",
6203            events.len(),
6204        );
6205    }
6206
6207    #[test]
6208    fn build_rejects_empty_sub_agent_name() {
6209        let provider = Arc::new(MockProvider::new(vec![]));
6210        let result = Orchestrator::builder(provider)
6211            .sub_agent("", "Empty name agent", "prompt")
6212            .build();
6213        match result {
6214            Err(Error::Config(msg)) => {
6215                assert!(
6216                    msg.contains("must not be empty"),
6217                    "expected empty name error, got: {msg}"
6218                );
6219            }
6220            Err(other) => panic!("expected Config error, got: {other:?}"),
6221            Ok(_) => panic!("expected error for empty sub-agent name"),
6222        }
6223    }
6224
6225    #[tokio::test]
6226    async fn instruction_text_wired_to_orchestrator_system_prompt() {
6227        // CapturingProvider records the system prompt from LLM calls.
6228        struct CapturingProvider {
6229            captured_systems: Mutex<Vec<String>>,
6230        }
6231        impl LlmProvider for CapturingProvider {
6232            async fn complete(
6233                &self,
6234                request: CompletionRequest,
6235            ) -> Result<CompletionResponse, Error> {
6236                self.captured_systems
6237                    .lock()
6238                    .expect("lock")
6239                    .push(request.system.clone());
6240                // Return end_turn immediately so the orchestrator finishes
6241                Ok(CompletionResponse {
6242                    content: vec![ContentBlock::Text {
6243                        text: "Task complete.".into(),
6244                    }],
6245                    stop_reason: StopReason::EndTurn,
6246                    usage: TokenUsage::default(),
6247                    model: None,
6248                })
6249            }
6250        }
6251
6252        let provider = Arc::new(CapturingProvider {
6253            captured_systems: Mutex::new(Vec::new()),
6254        });
6255        let mut orchestrator = Orchestrator::builder(provider.clone())
6256            .sub_agent("agent-a", "Does things", "You are agent A.")
6257            .instruction_text("Always verify your work.")
6258            .build()
6259            .unwrap();
6260
6261        let _output = orchestrator.run("test task").await.unwrap();
6262        let systems = provider.captured_systems.lock().expect("lock").clone();
6263        // The orchestrator's own LLM call should have instructions prepended
6264        assert!(!systems.is_empty(), "should have at least one LLM call");
6265        let orchestrator_system = &systems[0];
6266        assert!(
6267            orchestrator_system.contains("# Project Instructions"),
6268            "orchestrator system prompt should contain instruction header"
6269        );
6270        assert!(
6271            orchestrator_system.contains("Always verify your work."),
6272            "orchestrator system prompt should contain instruction text"
6273        );
6274    }
6275
6276    #[tokio::test]
6277    async fn permission_rules_propagate_to_sub_agents() {
6278        // Orchestrator-level permission rules should apply to sub-agent tool calls.
6279        // Here we deny "bash" at the orchestrator level and verify the worker's
6280        // bash call is rejected.
6281        let provider = Arc::new(MockProvider::new(vec![
6282            // 1: Orchestrator delegates to worker
6283            CompletionResponse {
6284                content: vec![ContentBlock::ToolUse {
6285                    id: "orch-1".into(),
6286                    name: "delegate_task".into(),
6287                    input: json!({
6288                        "tasks": [{"agent": "worker", "task": "run a bash command"}]
6289                    }),
6290                }],
6291                stop_reason: StopReason::ToolUse,
6292                usage: TokenUsage::default(),
6293                model: None,
6294            },
6295            // 2: Worker tries to call bash (will be denied by permission rules)
6296            CompletionResponse {
6297                content: vec![ContentBlock::ToolUse {
6298                    id: "worker-1".into(),
6299                    name: "bash".into(),
6300                    input: json!({"command": "echo hello"}),
6301                }],
6302                stop_reason: StopReason::ToolUse,
6303                usage: TokenUsage::default(),
6304                model: None,
6305            },
6306            // 3: Worker sees the denial and responds
6307            CompletionResponse {
6308                content: vec![ContentBlock::Text {
6309                    text: "Bash was denied.".into(),
6310                }],
6311                stop_reason: StopReason::EndTurn,
6312                usage: TokenUsage::default(),
6313                model: None,
6314            },
6315            // 4: Orchestrator synthesis
6316            CompletionResponse {
6317                content: vec![ContentBlock::Text {
6318                    text: "Worker reported bash was denied.".into(),
6319                }],
6320                stop_reason: StopReason::EndTurn,
6321                usage: TokenUsage::default(),
6322                model: None,
6323            },
6324        ]));
6325
6326        let events = Arc::new(Mutex::new(Vec::<AgentEvent>::new()));
6327        let events_clone = events.clone();
6328        let on_event: Arc<OnEvent> = Arc::new(move |event: AgentEvent| {
6329            events_clone.lock().expect("test lock").push(event);
6330        });
6331
6332        let deny_bash = crate::agent::permission::PermissionRuleset::new(vec![
6333            crate::agent::permission::PermissionRule {
6334                tool: "bash".into(),
6335                pattern: "*".into(),
6336                action: crate::agent::permission::PermissionAction::Deny,
6337            },
6338        ]);
6339
6340        let bash_tool: Arc<dyn Tool> = Arc::new(MockTool::new("bash", "executed"));
6341
6342        let mut orch = Orchestrator::builder(provider)
6343            .sub_agent_with_tools("worker", "Bash worker", "You run bash.", vec![bash_tool])
6344            .permission_rules(deny_bash)
6345            .on_event(on_event)
6346            .build()
6347            .unwrap();
6348
6349        let output = orch.run("run bash via worker").await.unwrap();
6350        assert_eq!(output.result, "Worker reported bash was denied.");
6351
6352        // The worker should NOT have ToolCallStarted/ToolCallCompleted for bash
6353        // because permission-denied calls skip event emission.
6354        let events = events.lock().expect("test lock");
6355        let worker_tool_events: Vec<_> = events
6356            .iter()
6357            .filter(|e| {
6358                matches!(
6359                    e,
6360                    AgentEvent::ToolCallStarted { agent, tool_name, .. }
6361                    | AgentEvent::ToolCallCompleted { agent, tool_name, .. }
6362                        if agent == "worker" && tool_name == "bash"
6363                )
6364            })
6365            .collect();
6366        assert!(
6367            worker_tool_events.is_empty(),
6368            "bash tool calls in worker should be denied (no events emitted), got: {worker_tool_events:?}"
6369        );
6370    }
6371
6372    #[tokio::test]
6373    async fn permission_rules_propagate_to_squad_members() {
6374        // Same test but via form_squad path.
6375        let provider = Arc::new(MockProvider::new(vec![
6376            // 1: Orchestrator forms a squad
6377            CompletionResponse {
6378                content: vec![ContentBlock::ToolUse {
6379                    id: "orch-1".into(),
6380                    name: "form_squad".into(),
6381                    input: json!({
6382                        "tasks": [
6383                            {"agent": "alpha", "task": "run bash"},
6384                            {"agent": "beta", "task": "say hello"}
6385                        ]
6386                    }),
6387                }],
6388                stop_reason: StopReason::ToolUse,
6389                usage: TokenUsage::default(),
6390                model: None,
6391            },
6392            // 2: alpha tries bash (denied)
6393            CompletionResponse {
6394                content: vec![ContentBlock::ToolUse {
6395                    id: "alpha-1".into(),
6396                    name: "bash".into(),
6397                    input: json!({"command": "ls"}),
6398                }],
6399                stop_reason: StopReason::ToolUse,
6400                usage: TokenUsage::default(),
6401                model: None,
6402            },
6403            // 3: alpha sees denial
6404            CompletionResponse {
6405                content: vec![ContentBlock::Text {
6406                    text: "Bash denied.".into(),
6407                }],
6408                stop_reason: StopReason::EndTurn,
6409                usage: TokenUsage::default(),
6410                model: None,
6411            },
6412            // 4: beta just responds (no bash)
6413            CompletionResponse {
6414                content: vec![ContentBlock::Text {
6415                    text: "Hello!".into(),
6416                }],
6417                stop_reason: StopReason::EndTurn,
6418                usage: TokenUsage::default(),
6419                model: None,
6420            },
6421            // 5: Orchestrator synthesis
6422            CompletionResponse {
6423                content: vec![ContentBlock::Text {
6424                    text: "Squad done.".into(),
6425                }],
6426                stop_reason: StopReason::EndTurn,
6427                usage: TokenUsage::default(),
6428                model: None,
6429            },
6430        ]));
6431
6432        let deny_bash = crate::agent::permission::PermissionRuleset::new(vec![
6433            crate::agent::permission::PermissionRule {
6434                tool: "bash".into(),
6435                pattern: "*".into(),
6436                action: crate::agent::permission::PermissionAction::Deny,
6437            },
6438        ]);
6439
6440        let bash_tool: Arc<dyn Tool> = Arc::new(MockTool::new("bash", "executed"));
6441
6442        let events = Arc::new(Mutex::new(Vec::<AgentEvent>::new()));
6443        let events_clone = events.clone();
6444        let on_event: Arc<OnEvent> = Arc::new(move |event: AgentEvent| {
6445            events_clone.lock().expect("test lock").push(event);
6446        });
6447
6448        let mut orch = Orchestrator::builder(provider)
6449            .sub_agent_with_tools(
6450                "alpha",
6451                "Alpha agent",
6452                "You run bash.",
6453                vec![bash_tool.clone()],
6454            )
6455            .sub_agent("beta", "Beta agent", "You say hello.")
6456            .permission_rules(deny_bash)
6457            .on_event(on_event)
6458            .build()
6459            .unwrap();
6460
6461        let output = orch.run("form a squad").await.unwrap();
6462        assert_eq!(output.result, "Squad done.");
6463
6464        // Alpha's bash call should be denied — no ToolCallStarted/Completed events for bash
6465        let events = events.lock().expect("test lock");
6466        let bash_events: Vec<_> = events
6467            .iter()
6468            .filter(|e| {
6469                matches!(
6470                    e,
6471                    AgentEvent::ToolCallStarted { tool_name, .. }
6472                    | AgentEvent::ToolCallCompleted { tool_name, .. }
6473                        if tool_name == "bash"
6474                )
6475            })
6476            .collect();
6477        assert!(
6478            bash_events.is_empty(),
6479            "bash tool calls in squad should be denied (no events), got: {bash_events:?}"
6480        );
6481    }
6482
6483    #[test]
6484    fn workspace_propagates_from_builder_to_sub_agents() {
6485        let provider = Arc::new(MockProvider::new(vec![]));
6486        let builder = Orchestrator::builder(provider)
6487            .workspace("/shared/workspace")
6488            .sub_agent_full(SubAgentConfig {
6489                name: "agent1".into(),
6490                description: "test".into(),
6491                workspace: None, // Should inherit from builder
6492                ..Default::default()
6493            });
6494
6495        let agent = &builder.sub_agents[0];
6496        assert_eq!(
6497            agent.workspace.as_deref(),
6498            Some(std::path::Path::new("/shared/workspace")),
6499            "sub-agent should inherit workspace from builder"
6500        );
6501    }
6502
6503    #[test]
6504    fn sub_agent_workspace_overrides_builder() {
6505        let provider = Arc::new(MockProvider::new(vec![]));
6506        let builder = Orchestrator::builder(provider)
6507            .workspace("/shared/workspace")
6508            .sub_agent_full(SubAgentConfig {
6509                name: "agent1".into(),
6510                description: "test".into(),
6511                workspace: Some("/custom/workspace".into()),
6512                ..Default::default()
6513            });
6514
6515        let agent = &builder.sub_agents[0];
6516        assert_eq!(
6517            agent.workspace.as_deref(),
6518            Some(std::path::Path::new("/custom/workspace")),
6519            "sub-agent should use its own workspace over builder's"
6520        );
6521    }
6522
6523    #[test]
6524    fn no_workspace_when_builder_has_none() {
6525        let provider = Arc::new(MockProvider::new(vec![]));
6526        let builder = Orchestrator::builder(provider).sub_agent_full(SubAgentConfig {
6527            name: "agent1".into(),
6528            description: "test".into(),
6529            workspace: None,
6530            ..Default::default()
6531        });
6532
6533        let agent = &builder.sub_agents[0];
6534        assert!(
6535            agent.workspace.is_none(),
6536            "sub-agent should have no workspace when builder has none"
6537        );
6538    }
6539
6540    #[test]
6541    fn multi_agent_prompt_enabled_by_default() {
6542        let provider = Arc::new(MockProvider::new(vec![]));
6543        let builder = Orchestrator::builder(provider);
6544        assert!(builder.multi_agent_prompt);
6545    }
6546
6547    #[test]
6548    fn multi_agent_prompt_can_be_disabled() {
6549        let provider = Arc::new(MockProvider::new(vec![]));
6550        let builder = Orchestrator::builder(provider).multi_agent_prompt(false);
6551        assert!(!builder.multi_agent_prompt);
6552    }
6553
6554    #[test]
6555    fn build_injects_collab_prompt_when_enabled() {
6556        let provider = Arc::new(MockProvider::new(vec![]));
6557        let mut builder = Orchestrator::builder(provider).sub_agent(
6558            "writer",
6559            "Writes content",
6560            "You are a writer.",
6561        );
6562        // Before build, prompt is not yet injected
6563        assert!(
6564            !builder.sub_agents[0]
6565                .system_prompt
6566                .contains("MULTI-AGENT COLLABORATION PROTOCOL")
6567        );
6568        // Manually apply the same logic as build() to inspect
6569        if builder.multi_agent_prompt {
6570            for agent in &mut builder.sub_agents {
6571                agent
6572                    .system_prompt
6573                    .push_str(&crate::agent::prompts::render_collab_prompt(
6574                        &agent.name,
6575                        &agent.description,
6576                    ));
6577            }
6578        }
6579        assert!(
6580            builder.sub_agents[0]
6581                .system_prompt
6582                .contains("MULTI-AGENT COLLABORATION PROTOCOL")
6583        );
6584        assert!(builder.sub_agents[0].system_prompt.contains("`writer`"));
6585        assert!(
6586            builder.sub_agents[0]
6587                .system_prompt
6588                .contains("Writes content")
6589        );
6590    }
6591
6592    #[test]
6593    fn build_omits_collab_prompt_when_disabled() {
6594        let provider = Arc::new(MockProvider::new(vec![]));
6595        let mut builder = Orchestrator::builder(provider)
6596            .multi_agent_prompt(false)
6597            .sub_agent("writer", "Writes content", "You are a writer.");
6598        // Manually apply the same logic as build()
6599        if builder.multi_agent_prompt {
6600            for agent in &mut builder.sub_agents {
6601                agent
6602                    .system_prompt
6603                    .push_str(&crate::agent::prompts::render_collab_prompt(
6604                        &agent.name,
6605                        &agent.description,
6606                    ));
6607            }
6608        }
6609        assert!(
6610            !builder.sub_agents[0]
6611                .system_prompt
6612                .contains("MULTI-AGENT COLLABORATION PROTOCOL")
6613        );
6614        assert_eq!(builder.sub_agents[0].system_prompt, "You are a writer.");
6615    }
6616
6617    // ── SpawnAgentTool tests ──
6618
6619    fn make_spawn_config() -> crate::types::SpawnConfig {
6620        crate::types::SpawnConfig {
6621            max_spawned_agents: 3,
6622            tool_allowlist: vec![],
6623            max_turns: 5,
6624            max_tokens: 1024,
6625            max_total_tokens: 10_000,
6626        }
6627    }
6628
6629    fn build_spawn_tool(
6630        provider: Arc<MockProvider>,
6631        config: crate::types::SpawnConfig,
6632        tools: Vec<Arc<dyn Tool>>,
6633    ) -> SpawnAgentTool {
6634        let mut tool_pool = std::collections::HashMap::new();
6635        for tool in &tools {
6636            let name = tool.definition().name;
6637            if config.tool_allowlist.contains(&name) {
6638                tool_pool.insert(name, tool.clone());
6639            }
6640        }
6641        let cached_definition = SpawnAgentTool::build_definition(&config);
6642        SpawnAgentTool {
6643            shared_provider: Arc::new(BoxedProvider::from_arc(provider)),
6644            spawn_config: config,
6645            tool_pool,
6646            spawn_count: Arc::new(std::sync::atomic::AtomicU32::new(0)),
6647            spawned_names: Arc::new(Mutex::new(std::collections::HashSet::new())),
6648            accumulated_tokens: Arc::new(Mutex::new(TokenUsage::default())),
6649            permission_rules: crate::agent::permission::PermissionRuleset::default(),
6650            shared_memory: None,
6651            memory_namespace_prefix: None,
6652            on_event: None,
6653            on_text: None,
6654            lsp_manager: None,
6655            observability_mode: crate::agent::observability::ObservabilityMode::Production,
6656            workspace: None,
6657            guardrails: vec![],
6658            audit_trail: None,
6659            audit_user_id: None,
6660            audit_tenant_id: None,
6661            audit_delegation_chain: vec![],
6662            cached_definition,
6663            tenant_tracker: None,
6664        }
6665    }
6666
6667    #[tokio::test]
6668    async fn spawn_agent_basic_execution() {
6669        let provider = Arc::new(MockProvider::new(vec![CompletionResponse {
6670            content: vec![ContentBlock::Text {
6671                text: "Tax analysis complete.".into(),
6672            }],
6673            usage: TokenUsage {
6674                input_tokens: 50,
6675                output_tokens: 20,
6676                ..Default::default()
6677            },
6678            stop_reason: StopReason::EndTurn,
6679            model: None,
6680        }]));
6681
6682        let tool = build_spawn_tool(provider, make_spawn_config(), vec![]);
6683
6684        let result = tool
6685            .spawn(SpawnAgentInput {
6686                name: "tax_specialist".into(),
6687                system_prompt: "You are a tax law expert.".into(),
6688                tools: vec![],
6689                task: "Analyze tax implications.".into(),
6690            })
6691            .await
6692            .unwrap();
6693
6694        assert!(!result.is_error);
6695        assert!(result.content.contains("Tax analysis complete."));
6696        assert!(result.content.contains("spawn:tax_specialist"));
6697        assert_eq!(
6698            tool.spawn_count.load(std::sync::atomic::Ordering::Relaxed),
6699            1
6700        );
6701    }
6702
6703    #[tokio::test]
6704    async fn spawn_agent_rejects_invalid_name() {
6705        let provider = Arc::new(MockProvider::new(vec![]));
6706        let tool = build_spawn_tool(provider, make_spawn_config(), vec![]);
6707
6708        let invalid_names = vec![
6709            "Tax-Specialist",
6710            "123abc",
6711            "",
6712            "has spaces",
6713            "../path",
6714            "a/b",
6715            "UPPER",
6716        ];
6717
6718        for name in invalid_names {
6719            let result = tool
6720                .spawn(SpawnAgentInput {
6721                    name: name.into(),
6722                    system_prompt: "test".into(),
6723                    tools: vec![],
6724                    task: "test".into(),
6725                })
6726                .await
6727                .unwrap();
6728            assert!(
6729                result.is_error,
6730                "expected error for name '{name}', got success: {}",
6731                result.content
6732            );
6733            assert!(
6734                result.content.contains("Invalid agent name"),
6735                "expected 'Invalid agent name' in error for name '{name}', got: {}",
6736                result.content
6737            );
6738        }
6739    }
6740
6741    #[tokio::test]
6742    async fn spawn_agent_rejects_duplicate_name() {
6743        let provider = Arc::new(MockProvider::new(vec![
6744            CompletionResponse {
6745                content: vec![ContentBlock::Text {
6746                    text: "done".into(),
6747                }],
6748                usage: TokenUsage::default(),
6749                stop_reason: StopReason::EndTurn,
6750                model: None,
6751            },
6752            // Second response shouldn't be needed
6753        ]));
6754        let tool = build_spawn_tool(provider, make_spawn_config(), vec![]);
6755
6756        // First spawn succeeds
6757        let r1 = tool
6758            .spawn(SpawnAgentInput {
6759                name: "helper".into(),
6760                system_prompt: "test".into(),
6761                tools: vec![],
6762                task: "test".into(),
6763            })
6764            .await
6765            .unwrap();
6766        assert!(!r1.is_error);
6767
6768        // Second spawn with same name fails
6769        let r2 = tool
6770            .spawn(SpawnAgentInput {
6771                name: "helper".into(),
6772                system_prompt: "test".into(),
6773                tools: vec![],
6774                task: "test".into(),
6775            })
6776            .await
6777            .unwrap();
6778        assert!(r2.is_error);
6779        assert!(r2.content.contains("already used"));
6780    }
6781
6782    #[tokio::test]
6783    async fn spawn_agent_enforces_tool_allowlist() {
6784        let provider = Arc::new(MockProvider::new(vec![]));
6785        let mut config = make_spawn_config();
6786        config.tool_allowlist = vec!["mock_read".into()];
6787
6788        let mock = MockTool::new("mock_read", "file content");
6789        let tool = build_spawn_tool(provider, config, vec![Arc::new(mock)]);
6790
6791        // Request a tool not in the allowlist
6792        let result = tool
6793            .spawn(SpawnAgentInput {
6794                name: "reader".into(),
6795                system_prompt: "test".into(),
6796                tools: vec!["bash".into()],
6797                task: "test".into(),
6798            })
6799            .await
6800            .unwrap();
6801        assert!(result.is_error);
6802        assert!(result.content.contains("not in allowlist"));
6803    }
6804
6805    #[tokio::test]
6806    async fn spawn_agent_count_cap() {
6807        let provider = Arc::new(MockProvider::new(vec![CompletionResponse {
6808            content: vec![ContentBlock::Text {
6809                text: "done".into(),
6810            }],
6811            usage: TokenUsage::default(),
6812            stop_reason: StopReason::EndTurn,
6813            model: None,
6814        }]));
6815        let mut config = make_spawn_config();
6816        config.max_spawned_agents = 1;
6817        let tool = build_spawn_tool(provider, config, vec![]);
6818
6819        // First spawn succeeds
6820        let r1 = tool
6821            .spawn(SpawnAgentInput {
6822                name: "first".into(),
6823                system_prompt: "test".into(),
6824                tools: vec![],
6825                task: "test".into(),
6826            })
6827            .await
6828            .unwrap();
6829        assert!(!r1.is_error);
6830
6831        // Second spawn hits the cap
6832        let r2 = tool
6833            .spawn(SpawnAgentInput {
6834                name: "second".into(),
6835                system_prompt: "test".into(),
6836                tools: vec![],
6837                task: "test".into(),
6838            })
6839            .await
6840            .unwrap();
6841        assert!(r2.is_error);
6842        assert!(r2.content.contains("Spawn limit reached"));
6843    }
6844
6845    #[tokio::test]
6846    async fn spawn_agent_token_budget_enforcement() {
6847        let provider = Arc::new(MockProvider::new(vec![]));
6848        let mut config = make_spawn_config();
6849        config.max_total_tokens = 100;
6850        let tool = build_spawn_tool(provider, config, vec![]);
6851
6852        // Pre-fill token accumulator near the limit
6853        {
6854            let mut acc = tool.accumulated_tokens.lock().unwrap();
6855            *acc = TokenUsage {
6856                input_tokens: 60,
6857                output_tokens: 50,
6858                ..Default::default()
6859            };
6860        }
6861
6862        let result = tool
6863            .spawn(SpawnAgentInput {
6864                name: "spender".into(),
6865                system_prompt: "test".into(),
6866                tools: vec![],
6867                task: "test".into(),
6868            })
6869            .await
6870            .unwrap();
6871        assert!(result.is_error);
6872        assert!(result.content.contains("budget exhausted"));
6873    }
6874
6875    #[tokio::test]
6876    async fn spawn_agent_no_delegation_tools() {
6877        // Verify that delegation tools are stripped from the pool even if
6878        // somehow included in the allowlist
6879        let provider = Arc::new(MockProvider::new(vec![]));
6880        let mut config = make_spawn_config();
6881        config.tool_allowlist = vec!["mock_read".into()];
6882
6883        let mock = MockTool::new("mock_read", "content");
6884        let tools: Vec<Arc<dyn Tool>> = vec![Arc::new(mock)];
6885
6886        let mut tool_pool = std::collections::HashMap::new();
6887        for t in &tools {
6888            let name = t.definition().name;
6889            if config.tool_allowlist.contains(&name) {
6890                tool_pool.insert(name, t.clone());
6891            }
6892        }
6893        // Manually inject a delegation tool to prove it gets stripped in build()
6894        tool_pool.insert(
6895            "delegate_task".into(),
6896            Arc::new(MockTool::new("delegate_task", "bad")),
6897        );
6898        tool_pool.remove("delegate_task");
6899        tool_pool.remove("form_squad");
6900        tool_pool.remove("spawn_agent");
6901
6902        // Pool should only have mock_read
6903        assert!(tool_pool.contains_key("mock_read"));
6904        assert!(!tool_pool.contains_key("delegate_task"));
6905        assert!(!tool_pool.contains_key("form_squad"));
6906        assert!(!tool_pool.contains_key("spawn_agent"));
6907        drop(provider);
6908    }
6909
6910    #[tokio::test]
6911    async fn spawn_agent_emits_events() {
6912        let events: Arc<Mutex<Vec<AgentEvent>>> = Arc::new(Mutex::new(vec![]));
6913        let events_clone = events.clone();
6914
6915        let provider = Arc::new(MockProvider::new(vec![CompletionResponse {
6916            content: vec![ContentBlock::Text {
6917                text: "result".into(),
6918            }],
6919            usage: TokenUsage::default(),
6920            stop_reason: StopReason::EndTurn,
6921            model: None,
6922        }]));
6923
6924        let mut tool = build_spawn_tool(provider, make_spawn_config(), vec![]);
6925        tool.on_event = Some(Arc::new(move |e: AgentEvent| {
6926            events_clone.lock().unwrap().push(e);
6927        }));
6928
6929        let _ = tool
6930            .spawn(SpawnAgentInput {
6931                name: "emitter".into(),
6932                system_prompt: "test".into(),
6933                tools: vec![],
6934                task: "test".into(),
6935            })
6936            .await;
6937
6938        let events = events.lock().unwrap();
6939        let spawned = events
6940            .iter()
6941            .any(|e| matches!(e, AgentEvent::AgentSpawned { spawned_name, .. } if spawned_name == "spawn:emitter"));
6942        assert!(spawned, "expected AgentSpawned event");
6943
6944        let completed = events
6945            .iter()
6946            .any(|e| matches!(e, AgentEvent::SubAgentCompleted { agent, success, .. } if agent == "spawn:emitter" && *success));
6947        assert!(completed, "expected SubAgentCompleted event");
6948    }
6949
6950    #[tokio::test]
6951    async fn spawn_agent_empty_tools() {
6952        // An empty tools array should create a reasoning-only agent that works
6953        let provider = Arc::new(MockProvider::new(vec![CompletionResponse {
6954            content: vec![ContentBlock::Text {
6955                text: "Pure reasoning response".into(),
6956            }],
6957            usage: TokenUsage::default(),
6958            stop_reason: StopReason::EndTurn,
6959            model: None,
6960        }]));
6961
6962        let tool = build_spawn_tool(provider, make_spawn_config(), vec![]);
6963
6964        let result = tool
6965            .spawn(SpawnAgentInput {
6966                name: "thinker".into(),
6967                system_prompt: "You are a reasoning agent.".into(),
6968                tools: vec![],
6969                task: "Think about this.".into(),
6970            })
6971            .await
6972            .unwrap();
6973        assert!(!result.is_error);
6974        assert!(result.content.contains("Pure reasoning response"));
6975    }
6976
6977    #[tokio::test]
6978    async fn spawn_agent_prompt_too_long() {
6979        let provider = Arc::new(MockProvider::new(vec![]));
6980        let tool = build_spawn_tool(provider, make_spawn_config(), vec![]);
6981
6982        let long_prompt = "x".repeat(SPAWN_MAX_PROMPT_BYTES + 1);
6983        let result = tool
6984            .spawn(SpawnAgentInput {
6985                name: "verbose".into(),
6986                system_prompt: long_prompt,
6987                tools: vec![],
6988                task: "test".into(),
6989            })
6990            .await
6991            .unwrap();
6992        assert!(result.is_error);
6993        assert!(result.content.contains("System prompt too long"));
6994    }
6995
6996    #[test]
6997    fn spawn_config_validation_rejects_zero_agents() {
6998        let toml_str = r#"
6999[provider]
7000name = "anthropic"
7001model = "claude-sonnet-4-20250514"
7002
7003[orchestrator.spawn]
7004max_spawned_agents = 0
7005"#;
7006        let err = crate::config::HeartbitConfig::from_toml(toml_str).unwrap_err();
7007        assert!(
7008            err.to_string()
7009                .contains("max_spawned_agents must be at least 1"),
7010            "err: {err}"
7011        );
7012    }
7013
7014    #[test]
7015    fn spawn_config_validation_rejects_zero_turns() {
7016        let toml_str = r#"
7017[provider]
7018name = "anthropic"
7019model = "claude-sonnet-4-20250514"
7020
7021[orchestrator.spawn]
7022max_turns = 0
7023"#;
7024        let err = crate::config::HeartbitConfig::from_toml(toml_str).unwrap_err();
7025        assert!(
7026            err.to_string().contains("max_turns must be at least 1"),
7027            "err: {err}"
7028        );
7029    }
7030
7031    #[test]
7032    fn spawn_config_from_toml() {
7033        let toml_str = r#"
7034[provider]
7035name = "anthropic"
7036model = "claude-sonnet-4-20250514"
7037
7038[orchestrator.spawn]
7039max_spawned_agents = 5
7040tool_allowlist = ["read", "grep", "bash"]
7041max_turns = 20
7042max_tokens = 8192
7043max_total_tokens = 100000
7044"#;
7045        let config: crate::config::HeartbitConfig = toml::from_str(toml_str).unwrap();
7046        let spawn = config.orchestrator.spawn.as_ref().unwrap();
7047        assert_eq!(spawn.max_spawned_agents, 5);
7048        assert_eq!(spawn.tool_allowlist, vec!["read", "grep", "bash"]);
7049        assert_eq!(spawn.max_turns, 20);
7050        assert_eq!(spawn.max_tokens, 8192);
7051        assert_eq!(spawn.max_total_tokens, 100_000);
7052    }
7053
7054    #[test]
7055    fn spawn_disabled_by_default() {
7056        let toml_str = r#"
7057[provider]
7058name = "anthropic"
7059model = "claude-sonnet-4-20250514"
7060"#;
7061        let config: crate::config::HeartbitConfig = toml::from_str(toml_str).unwrap();
7062        assert!(config.orchestrator.spawn.is_none());
7063    }
7064
7065    #[test]
7066    fn spawn_config_invalid_tool_rejected_at_build() {
7067        let provider = Arc::new(MockProvider::new(vec![]));
7068        let mut config = make_spawn_config();
7069        config.tool_allowlist = vec!["nonexistent_tool".into()];
7070
7071        let result = Orchestrator::builder(provider)
7072            .sub_agent("worker", "does work", "You work.")
7073            .spawn_config(config, vec![]) // empty builtins
7074            .build();
7075
7076        match result {
7077            Err(e) => {
7078                let msg = e.to_string();
7079                assert!(
7080                    msg.contains("nonexistent_tool"),
7081                    "expected error mentioning the bad tool, got: {msg}"
7082                );
7083            }
7084            Ok(_) => panic!("expected build error for invalid tool in allowlist"),
7085        }
7086    }
7087
7088    #[test]
7089    fn spawn_tool_definition_includes_allowlist() {
7090        let mut config = make_spawn_config();
7091        config.tool_allowlist = vec!["read".into(), "grep".into()];
7092        let def = SpawnAgentTool::build_definition(&config);
7093        assert_eq!(def.name, "spawn_agent");
7094        assert!(def.description.contains("read, grep"));
7095        assert!(def.description.contains("3 agents max"));
7096    }
7097
7098    #[tokio::test]
7099    async fn spawn_system_prompt_added_when_configured() {
7100        // When spawn is configured, the orchestrator should have spawn_agent in its tool list.
7101        // We verify by running the orchestrator and inspecting the LLM request.
7102        use std::sync::Arc;
7103
7104        let provider = Arc::new(MockProvider::new(vec![
7105            // Orchestrator response (EndTurn, no tool calls)
7106            CompletionResponse {
7107                content: vec![ContentBlock::Text {
7108                    text: "No delegation needed.".into(),
7109                }],
7110                usage: TokenUsage::default(),
7111                stop_reason: StopReason::EndTurn,
7112                model: None,
7113            },
7114        ]));
7115
7116        let config = make_spawn_config();
7117        let mut orchestrator = Orchestrator::builder(provider)
7118            .sub_agent("worker", "does work", "You work.")
7119            .spawn_config(config, vec![])
7120            .build()
7121            .unwrap();
7122
7123        let output = orchestrator.run("test task").await.unwrap();
7124        // If spawn_agent tool is present, the orchestrator prompt should mention it
7125        assert!(output.result.contains("No delegation needed."));
7126    }
7127
7128    /// Verify that when `OrchestratorBuilder::tenant_tracker` is set, both the
7129    /// orchestrator's own LLM calls AND sub-agent LLM calls are tracked by the
7130    /// same `TenantTokenTracker`.
7131    ///
7132    /// `adjust()` in the runner is a reconciliation call — it only mutates state
7133    /// for tenants that already have a map entry (created by a prior `reserve()`).
7134    /// We simulate the daemon's submit-time admission check by doing a
7135    /// `reserve()`+`drop()` before the run, which creates the entry. The `adjust()`
7136    /// calls during the run then accumulate into `in_flight` and `high_water`.
7137    ///
7138    /// ## Discriminating test design
7139    ///
7140    /// We make the sub-agent's token count **dominate** so `high_water` is only
7141    /// large when the sub-agent is wired to the tracker:
7142    ///
7143    ///   - Orchestrator turn 1 (delegates): 5 input + 5 output = 10 cumulative.
7144    ///   - Sub-agent turn   (completes):   400 input + 100 output = 500 total.
7145    ///   - Orchestrator turn 2 (synth):     2 input + 2 output — orch cumulative = 14.
7146    ///
7147    /// **With** sub-agent wired: peak in-flight = 10 (orch) + 500 (sub) = 510 → `high_water = 510`.
7148    /// **Without** sub-agent wired: sub-agent never adjusts → peak = 14 → `high_water = 14`.
7149    ///
7150    /// Asserting `high_water >= 500` is only true when the sub-agent is wired.
7151    #[tokio::test(flavor = "multi_thread")]
7152    async fn orchestrator_propagates_tenant_tracker_to_sub_agents() {
7153        use crate::agent::tenant_tracker::TenantTokenTracker;
7154        use crate::auth::TenantScope;
7155
7156        let tracker = Arc::new(TenantTokenTracker::new(1_000_000));
7157
7158        // Simulate the admission-check reservation (creates the map entry).
7159        // Drop immediately — this matches the daemon's submit-time semantics.
7160        let scope = TenantScope::new("tenant-abc");
7161        drop(tracker.reserve(&scope, 100_000).unwrap());
7162
7163        // Verify the entry now exists with in_flight=0.
7164        let initial_snap = tracker.snapshot();
7165        assert_eq!(initial_snap.len(), 1, "entry must exist after reserve+drop");
7166        assert_eq!(initial_snap[0].1.in_flight, 0);
7167
7168        // Orchestrator turn 1 tokens: tiny (so the sub-agent dominates high_water).
7169        // Sub-agent tokens: large (500) so high_water > 500 only when wired.
7170        // Orchestrator turn 3 tokens: tiny (stays below 500).
7171        let provider = Arc::new(MockProvider::new(vec![
7172            // Orchestrator turn 1: delegates to "worker" (5+5=10 tokens)
7173            CompletionResponse {
7174                content: vec![ContentBlock::ToolUse {
7175                    id: "tt-call-1".into(),
7176                    name: "delegate_task".into(),
7177                    input: json!({
7178                        "tasks": [{"agent": "worker", "task": "do work"}]
7179                    }),
7180                }],
7181                stop_reason: StopReason::ToolUse,
7182                usage: TokenUsage {
7183                    input_tokens: 5,
7184                    output_tokens: 5,
7185                    ..Default::default()
7186                },
7187                model: None,
7188            },
7189            // Sub-agent "worker": large token usage (400+100=500)
7190            CompletionResponse {
7191                content: vec![ContentBlock::Text {
7192                    text: "Work done.".into(),
7193                }],
7194                stop_reason: StopReason::EndTurn,
7195                usage: TokenUsage {
7196                    input_tokens: 400,
7197                    output_tokens: 100,
7198                    ..Default::default()
7199                },
7200                model: None,
7201            },
7202            // Orchestrator turn 2: synthesises (2+2=4 tokens; cumulative=14)
7203            CompletionResponse {
7204                content: vec![ContentBlock::Text {
7205                    text: "All done.".into(),
7206                }],
7207                stop_reason: StopReason::EndTurn,
7208                usage: TokenUsage {
7209                    input_tokens: 2,
7210                    output_tokens: 2,
7211                    ..Default::default()
7212                },
7213                model: None,
7214            },
7215        ]));
7216
7217        // audit_user_context MUST be set before sub_agent() so the tenant ID
7218        // propagates into the sub-agent def (the builder copies it at call time).
7219        let mut orch = Orchestrator::builder(provider)
7220            .audit_user_context("user-1", "tenant-abc")
7221            .sub_agent("worker", "Does work", "You work.")
7222            .tenant_tracker(tracker.clone())
7223            .build()
7224            .unwrap();
7225
7226        orch.run("do work").await.unwrap();
7227
7228        // high_water check: with wiring, peak = orch(10) + sub-agent(500) = 510.
7229        // Without wiring, peak = orch turn-2 cumulative (14) — well below 500.
7230        let snap_mid = tracker.snapshot();
7231        let state_mid = snap_mid
7232            .iter()
7233            .find(|(tid, _)| tid == "tenant-abc")
7234            .map(|(_, s)| s.clone())
7235            .expect("entry for 'tenant-abc' should still exist after the run");
7236
7237        assert!(
7238            state_mid.high_water >= 500,
7239            "high_water ({}) must be >= 500 (sub-agent's 500 tokens); \
7240             if it is < 500, the sub-agent runner is not propagating the tracker",
7241            state_mid.high_water
7242        );
7243
7244        // Drop orch to release the orchestrator runner's RAII hold, then confirm
7245        // in_flight returns to 0.
7246        drop(orch);
7247
7248        let snap_final = tracker.snapshot();
7249        let state_final = snap_final
7250            .iter()
7251            .find(|(tid, _)| tid == "tenant-abc")
7252            .map(|(_, s)| s.clone())
7253            .expect("entry should still exist after drop");
7254
7255        assert_eq!(
7256            state_final.in_flight, 0,
7257            "in_flight should return to 0 after all runners are dropped"
7258        );
7259    }
7260}
heartbit_core/agent/orchestrator.rs

heartbit_core/agent/
orchestrator.rs