car-multi 0.32.0

//! Swarm — N agents working on the same problem.
//!
//! Modes:
//! - **Parallel**: all agents run concurrently, then a synthesizer combines results.
//! - **Sequential**: agents run one after another, each seeing prior agents' outputs.
//! - **Debate**: two rounds — initial answers, then critique, then a judge picks the best.

use crate::error::MultiError;
use crate::mailbox::Mailbox;
use crate::runner::AgentRunner;
use crate::shared::SharedInfra;
use crate::types::{AgentOutput, AgentSpec};
use serde::{Deserialize, Serialize};
use std::sync::Arc;
use std::time::Instant;
use tracing::instrument;

#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
pub enum SwarmMode {
    Parallel,
    Sequential,
    Debate,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SwarmResult {
    pub task: String,
    pub outputs: Vec<AgentOutput>,
    pub final_summary: String,
}

pub struct Swarm {
    pub agents: Vec<AgentSpec>,
    pub mode: SwarmMode,
    pub synthesizer: Option<AgentSpec>,
    /// When true, each agent gets an isolated state overlay.
    /// Writes go to a per-agent local store; reads fall through to the shared parent.
    /// On success, local state is merged back to the parent.
    pub isolated: bool,
    /// When set (parallel mode), each agent gets an isolated filesystem
    /// workspace, advertised to the runner via `AgentSpec.metadata["workspace"]`.
    pub workspaces: Option<crate::workspace::WorkspaceConfig>,
}

impl Swarm {
    pub fn new(agents: Vec<AgentSpec>, mode: SwarmMode) -> Self {
        Self {
            agents,
            mode,
            synthesizer: None,
            isolated: false,
            workspaces: None,
        }
    }

    pub fn with_synthesizer(mut self, spec: AgentSpec) -> Self {
        self.synthesizer = Some(spec);
        self
    }

    /// Enable per-agent state isolation for this swarm.
    pub fn with_isolation(mut self) -> Self {
        self.isolated = true;
        self
    }

    /// Provision an isolated filesystem workspace per agent (parallel mode). Each
    /// agent's [`AgentSpec`] gets a `workspace` metadata entry with its directory;
    /// the runner is expected to run its file tools there. Workspaces are removed
    /// when the run completes. Prevents parallel file-mutating agents from
    /// clobbering one another.
    pub fn with_workspaces(mut self, config: crate::workspace::WorkspaceConfig) -> Self {
        self.workspaces = Some(config);
        self
    }

    #[instrument(name = "multi.swarm", skip_all)]
    pub fn run<'a>(
        &'a self,
        task: &'a str,
        runner: &'a Arc<dyn AgentRunner>,
        infra: &'a SharedInfra,
    ) -> futures::future::BoxFuture<'a, Result<SwarmResult, MultiError>> {
        Box::pin(async move {
            match self.mode {
                SwarmMode::Parallel => self.run_parallel(task, runner, infra).await,
                SwarmMode::Sequential => self.run_sequential(task, runner, infra).await,
                SwarmMode::Debate => self.run_debate(task, runner, infra).await,
            }
        })
    }

    async fn run_parallel(
        &self,
        task: &str,
        runner: &Arc<dyn AgentRunner>,
        infra: &SharedInfra,
    ) -> Result<SwarmResult, MultiError> {
        let mailbox = Arc::new(Mailbox::default());

        // Concurrency-anomaly gating (A5) applies only to the isolated path,
        // where writes are deferred to a merge barrier we can inspect. In the
        // non-isolated path writes land in shared state during the run — there
        // is no barrier to gate. When enabled, snapshot the parent keys that
        // exist *before* the batch: an agent that writes a key which already
        // existed is doing a read-modify-write, which is what makes concurrent
        // overwrites a lost-update (stale-generation) hazard rather than a fresh
        // insert.
        let cc = if self.isolated {
            infra.concurrency.clone()
        } else {
            None
        };
        let parent_keys_before: std::collections::HashSet<String> = if cc.is_some() {
            infra.state.keys().into_iter().collect()
        } else {
            std::collections::HashSet::new()
        };

        // Each agent slot is either spawned (index into `handles`) or pre-empted
        // by the coordination budget. Keeping per-agent slots preserves output
        // order even when some agents are skipped.
        enum Slot {
            Spawned(usize),
            Skipped(AgentOutput),
        }

        // When isolated, each handle returns (Result, Option<AgentContext>) so we
        // can merge state back on success.  When not isolated, the context is None.
        // The trailing (read_at, commit_at) are the logical timestamps bounding
        // the agent's generate window, stamped on the shared concurrency clock
        // when gating is enabled (0/0 otherwise) — the schedule the gate reasons
        // over.
        let mut handles: Vec<
            tokio::task::JoinHandle<(
                Result<AgentOutput, MultiError>,
                Option<crate::task_context::AgentContext>,
                u64,
                u64,
            )>,
        > = Vec::new();
        let mut slots: Vec<Slot> = Vec::new();

        for spec in &self.agents {
            // Provision an isolated filesystem workspace if configured, and
            // advertise its path to the runner via the spec's metadata. Done
            // BEFORE the budget reservation so a provisioning failure doesn't
            // burn a (non-refundable) agent slot. On failure, fail this agent
            // closed rather than running it unisolated and risk clobbering a
            // sibling.
            let workspace = match &self.workspaces {
                Some(cfg) => match crate::workspace::AgentWorkspace::provision(cfg, &spec.name) {
                    Ok(ws) => Some(ws),
                    Err(e) => {
                        slots.push(Slot::Skipped(AgentOutput {
                            name: spec.name.clone(),
                            answer: String::new(),
                            turns: 0,
                            tool_calls: 0,
                            duration_ms: 0.0,
                            error: Some(format!("workspace provisioning failed: {e}")),
                            outcome: None,
                            tokens: None,
                            tools_used: Vec::new(),
                        }));
                        continue;
                    }
                },
                None => None,
            };

            // Budget pre-flight: a crossed token/cost ceiling or the agent cap
            // stops further spawns. The whole parallel batch is launched at once,
            // so this gates the batch rather than metering mid-batch. On denial
            // the just-provisioned `workspace` guard drops here and cleans up.
            if let Err(e) = infra.begin_agent() {
                slots.push(Slot::Skipped(crate::budget::budget_skipped_output(
                    &spec.name, &e,
                )));
                continue;
            }

            let runner = Arc::clone(runner);
            let mut spec = spec.clone();
            if let Some(ws) = &workspace {
                spec = ws.inject(spec);
            }
            let task = task.to_string();
            let mailbox = Arc::clone(&mailbox);

            let cc = cc.clone();
            if self.isolated {
                let (rt, ctx) = infra.make_isolated_runtime(&spec.name);
                for tool in &spec.tools {
                    rt.register_tool(tool).await;
                }
                let ctx_clone = ctx.clone();
                handles.push(tokio::spawn(async move {
                    // Hold the workspace guard for the agent's lifetime; dropped
                    // (cleaned up) when the task finishes.
                    let _workspace = workspace;
                    // Stamp the generate window on the shared logical clock:
                    // read_at before the agent runs, commit_at when it returns.
                    let read_at = cc.as_ref().map(|c| c.tick()).unwrap_or(0);
                    let result = crate::task_context::TaskScope::run(ctx_clone, async {
                        runner.run(&spec, &task, &rt, &mailbox).await
                    })
                    .await;
                    let commit_at = cc.as_ref().map(|c| c.tick()).unwrap_or(0);
                    (result, Some(ctx), read_at, commit_at)
                }));
            } else {
                let rt = infra.make_runtime();
                for tool in &spec.tools {
                    rt.register_tool(tool).await;
                }
                handles.push(tokio::spawn(async move {
                    let _workspace = workspace;
                    let read_at = cc.as_ref().map(|c| c.tick()).unwrap_or(0);
                    let result = runner.run(&spec, &task, &rt, &mailbox).await;
                    let commit_at = cc.as_ref().map(|c| c.tick()).unwrap_or(0);
                    (result, None, read_at, commit_at)
                }));
            }
            slots.push(Slot::Spawned(handles.len() - 1));
        }

        // Move owned join results out by handle index as each slot is visited.
        let mut results: Vec<Option<_>> = futures::future::join_all(handles)
            .await
            .into_iter()
            .map(Some)
            .collect();

        // --- Phase 1: resolve every slot without committing. A successful
        // isolated agent's writes stay pending in its `ctx` so the concurrency
        // gate below can veto the merge; meanwhile build the `AgentOp` schedule
        // the gate reasons over. Terminal outputs (skips, errors) pass through. ---
        enum Resolved {
            Pending {
                output: AgentOutput,
                ctx: Option<crate::task_context::AgentContext>,
            },
            Terminal(AgentOutput),
        }
        let mut resolved: Vec<Resolved> = Vec::new();
        let mut ops: Vec<car_verify::concurrency::AgentOp> = Vec::new();
        for (i, slot) in slots.into_iter().enumerate() {
            let handle_idx = match slot {
                Slot::Skipped(output) => {
                    resolved.push(Resolved::Terminal(output));
                    continue;
                }
                Slot::Spawned(idx) => idx,
            };
            match results.get_mut(handle_idx).and_then(Option::take) {
                Some(Ok((Ok(output), ctx, read_at, commit_at))) => {
                    // Instrument the agent as an AgentOp when gating is on. Its
                    // write_set is the overlay it would merge; a write to a key
                    // that already existed in the parent is a read-modify-write,
                    // which is what turns a concurrent overwrite into a
                    // lost-update (stale-generation) hazard rather than a fresh
                    // insert.
                    if cc.is_some() {
                        if let Some(ctx) = &ctx {
                            let write_set = ctx.local_state.keys();
                            let read_set: Vec<String> = write_set
                                .iter()
                                .filter(|k| parent_keys_before.contains(*k))
                                .cloned()
                                .collect();
                            ops.push(car_verify::concurrency::AgentOp {
                                id: output.name.clone(),
                                agent: output.name.clone(),
                                read_set,
                                write_set,
                                tools_read: output.tools_used.clone(),
                                tools_written: Vec::new(),
                                depends_on: Vec::new(),
                                read_at,
                                commit_at,
                            });
                        }
                    }
                    resolved.push(Resolved::Pending { output, ctx });
                }
                Some(Ok((Err(e), _ctx, _r, _c))) => {
                    // Note: an agent that spent tokens before returning Err has
                    // that spend dropped — the error path carries no token
                    // payload, so the budget can under-count failed work.
                    resolved.push(Resolved::Terminal(AgentOutput {
                        name: self.agents[i].name.clone(),
                        answer: String::new(),
                        turns: 0,
                        tool_calls: 0,
                        duration_ms: 0.0,
                        error: Some(e.to_string()),
                        outcome: None,
                        tokens: None,
                        tools_used: Vec::new(),
                    }));
                }
                Some(Err(e)) => {
                    resolved.push(Resolved::Terminal(AgentOutput {
                        name: self.agents[i].name.clone(),
                        answer: String::new(),
                        turns: 0,
                        tool_calls: 0,
                        duration_ms: 0.0,
                        error: Some(format!("join error: {}", e)),
                        outcome: None,
                        tokens: None,
                        tools_used: Vec::new(),
                    }));
                }
                None => {
                    resolved.push(Resolved::Terminal(AgentOutput {
                        name: self.agents[i].name.clone(),
                        answer: String::new(),
                        turns: 0,
                        tool_calls: 0,
                        duration_ms: 0.0,
                        error: Some("internal: missing join result".to_string()),
                        outcome: None,
                        tokens: None,
                        tools_used: Vec::new(),
                    }));
                }
            }
        }

        // --- Phase 2: gate the schedule, then commit the survivors. The gate
        // emits its audit event here. A causal-cascade aborts the whole batch
        // (nothing merges); a stale generation rejects only the offending
        // commit; a write reorder is auto-remediated by committing in a
        // deterministic order (below). ---
        let guard = match &cc {
            Some(control) => Some(control.guard(&ops, &infra.log).await),
            None => None,
        };
        if let Some(g) = &guard {
            if g.abort {
                // Meter the batch's REAL token spend before aborting (linus
                // review): the agents ran and billed regardless of the merge
                // outcome. Skipping this let a retrying budget-capped loop
                // exceed its cap without bound.
                for entry in &resolved {
                    if let Resolved::Pending { output, .. } = entry {
                        infra.record_output_metered(output).await;
                    }
                }
                return Err(MultiError::ConcurrencyAbort(g.anomaly_summary()));
            }
        }

        // Outputs preserve agent-spec order, but the isolated-state *merges* are
        // applied in a deterministic order (agent name ascending) so a reorder
        // hazard resolves the same way every run instead of by nondeterministic
        // completion order — the `SerializeWriters` remediation. Answer keys
        // (`agent.<name>.answer`) are unique per agent and don't contend, so
        // they're written inline.
        let mut outputs = Vec::new();
        let mut to_merge: Vec<crate::task_context::AgentContext> = Vec::new();
        for entry in resolved {
            match entry {
                Resolved::Terminal(o) => outputs.push(o),
                Resolved::Pending { output, ctx } => {
                    let committable = guard
                        .as_ref()
                        .map(|g| g.may_commit(&output.name))
                        .unwrap_or(true);
                    if committable {
                        // Defer the isolated-state merge to the ordered pass below.
                        if let Some(ctx) = ctx {
                            to_merge.push(ctx);
                        }
                        // Record reported spend against the coordination budget.
                        infra.record_output_metered(&output).await;
                        // Write to shared state
                        infra.state.set(
                            &format!("agent.{}.answer", output.name),
                            serde_json::Value::String(output.answer.clone()),
                            &format!("swarm.{}", output.name),
                        );
                        outputs.push(output);
                    } else {
                        // Concurrency gate rejected this commit (stale
                        // generation): drop its writes, surface the reason.
                        // Sibling commits still stand. The agent RAN and
                        // billed, so its spend is metered and its real
                        // turns/tokens are preserved on the output (linus
                        // review) — only the answer/writes are withheld.
                        infra.record_output_metered(&output).await;
                        let reason = guard
                            .as_ref()
                            .and_then(|g| g.rejection_reason(&output.name))
                            .unwrap_or_else(|| {
                                "concurrency gate rejected commit".to_string()
                            });
                        outputs.push(AgentOutput {
                            name: output.name.clone(),
                            answer: String::new(),
                            turns: output.turns,
                            tool_calls: output.tool_calls,
                            duration_ms: output.duration_ms,
                            error: Some(reason),
                            outcome: None,
                            tokens: output.tokens.clone(),
                            tools_used: output.tools_used.clone(),
                        });
                    }
                }
            }
        }
        // Apply the deferred merges. When gating is on, order them by agent name
        // so contended keys land last-writer-wins by a stable rule; without
        // gating, completion order is preserved (unchanged behavior).
        if guard.is_some() {
            to_merge.sort_by(|a, b| a.agent_name.cmp(&b.agent_name));
        }
        for ctx in &to_merge {
            ctx.merge_to_parent();
        }

        let summary = self.synthesize(task, &outputs, runner, infra).await;

        Ok(SwarmResult {
            task: task.to_string(),
            outputs,
            final_summary: summary,
        })
    }

    async fn run_sequential(
        &self,
        task: &str,
        runner: &Arc<dyn AgentRunner>,
        infra: &SharedInfra,
    ) -> Result<SwarmResult, MultiError> {
        let mailbox = Arc::new(Mailbox::default());
        let mut outputs = Vec::new();

        for spec in &self.agents {
            // Budget gate before each agent. In a sequential chain this is real
            // between-agent enforcement: once a prior agent's reported spend
            // crosses a limit, the remaining agents are skipped.
            if let Err(e) = infra.begin_agent() {
                outputs.push(crate::budget::budget_skipped_output(&spec.name, &e));
                continue;
            }

            // Enrich task with prior results
            let enriched = if outputs.is_empty() {
                task.to_string()
            } else {
                let prior: Vec<String> = outputs
                    .iter()
                    .filter_map(|o: &AgentOutput| {
                        if o.succeeded() {
                            Some(format!("- {}: {}", o.name, truncate(&o.answer, 300)))
                        } else {
                            None
                        }
                    })
                    .collect();
                format!("{}\n\nPrior agents' findings:\n{}", task, prior.join("\n"))
            };

            let rt = infra.make_runtime();
            for tool in &spec.tools {
                rt.register_tool(tool).await;
            }

            let start = Instant::now();
            match runner.run(spec, &enriched, &rt, &mailbox).await {
                Ok(output) => {
                    infra.record_output_metered(&output).await;
                    infra.state.set(
                        &format!("agent.{}.answer", output.name),
                        serde_json::Value::String(output.answer.clone()),
                        &format!("swarm.{}", output.name),
                    );
                    outputs.push(output);
                }
                Err(e) => {
                    outputs.push(AgentOutput {
                        name: spec.name.clone(),
                        answer: String::new(),
                        turns: 0,
                        tool_calls: 0,
                        duration_ms: start.elapsed().as_secs_f64() * 1000.0,
                        error: Some(e.to_string()),
                        outcome: None,
                        tokens: None,
                        tools_used: Vec::new(),
                    });
                }
            }
        }

        let summary = self.synthesize(task, &outputs, runner, infra).await;

        Ok(SwarmResult {
            task: task.to_string(),
            outputs,
            final_summary: summary,
        })
    }

    async fn run_debate(
        &self,
        task: &str,
        runner: &Arc<dyn AgentRunner>,
        infra: &SharedInfra,
    ) -> Result<SwarmResult, MultiError> {
        // Round 1: independent answers
        let round1 = Swarm::new(self.agents.clone(), SwarmMode::Parallel)
            .run(task, runner, infra)
            .await?;

        // Round 2: each agent critiques the others
        let mut critique_specs = Vec::new();
        for spec in &self.agents {
            let others: Vec<String> = round1
                .outputs
                .iter()
                .filter(|o| o.name != spec.name && o.succeeded())
                .map(|o| format!("- {}: {}", o.name, truncate(&o.answer, 300)))
                .collect();

            let critique_prompt = format!(
                "{}\n\nOriginal task: {}\n\nOther agents' answers:\n{}\n\n\
                 Critique these answers and provide your improved response.",
                spec.system_prompt,
                task,
                others.join("\n")
            );

            let mut critique_spec = spec.clone();
            critique_spec.name = format!("{}_critique", spec.name);
            critique_spec.system_prompt = critique_prompt;
            critique_specs.push(critique_spec);
        }

        let round2 = Swarm::new(critique_specs, SwarmMode::Parallel)
            .run(task, runner, infra)
            .await?;

        // Combine both rounds
        let mut all_outputs = round1.outputs;
        all_outputs.extend(round2.outputs);

        let summary = self.synthesize(task, &all_outputs, runner, infra).await;

        Ok(SwarmResult {
            task: task.to_string(),
            outputs: all_outputs,
            final_summary: summary,
        })
    }

    async fn synthesize(
        &self,
        task: &str,
        outputs: &[AgentOutput],
        runner: &Arc<dyn AgentRunner>,
        infra: &SharedInfra,
    ) -> String {
        let answers: Vec<&AgentOutput> = outputs.iter().filter(|o| o.succeeded()).collect();
        if answers.is_empty() {
            return "[no agent produced an answer]".to_string();
        }
        if answers.len() == 1 {
            return answers[0].answer.clone();
        }

        if let Some(synth_spec) = &self.synthesizer {
            let summaries: Vec<String> = answers
                .iter()
                .map(|o| format!("- {}: {}", o.name, truncate(&o.answer, 500)))
                .collect();

            let synth_task = format!(
                "Original task: {}\n\nAgent outputs:\n{}\n\nSynthesize these into a single coherent answer.",
                task,
                summaries.join("\n")
            );

            // Gate the synthesizer on the budget too; on denial fall through to
            // the default concatenation rather than failing the whole run.
            if infra.begin_agent().is_ok() {
                let mailbox = Mailbox::default();
                let rt = infra.make_runtime();
                if let Ok(output) = runner.run(synth_spec, &synth_task, &rt, &mailbox).await {
                    infra.record_output_metered(&output).await;
                    return output.answer;
                }
            }
        }

        // Default: concatenate with headers
        answers
            .iter()
            .map(|o| format!("## {}\n{}", o.name, o.answer))
            .collect::<Vec<_>>()
            .join("\n\n")
    }
}

fn truncate(s: &str, max_len: usize) -> &str {
    if s.len() <= max_len {
        return s;
    }
    let mut end = max_len;
    while end > 0 && !s.is_char_boundary(end) {
        end -= 1;
    }
    &s[..end]
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::error::MultiError;
    use crate::mailbox::Mailbox;
    use crate::runner::AgentRunner;
    use crate::types::{AgentOutput, AgentSpec};
    use car_engine::Runtime;
    use std::sync::atomic::{AtomicU32, Ordering};

    struct MockRunner {
        call_count: AtomicU32,
    }

    #[async_trait::async_trait]
    impl AgentRunner for MockRunner {
        async fn run(
            &self,
            spec: &AgentSpec,
            task: &str,
            _runtime: &Runtime,
            _mailbox: &Mailbox,
        ) -> Result<AgentOutput, MultiError> {
            let _n = self.call_count.fetch_add(1, Ordering::SeqCst);
            Ok(AgentOutput {
                name: spec.name.clone(),
                answer: format!(
                    "answer from {} for: {}",
                    spec.name,
                    &task[..task.len().min(50)]
                ),
                turns: 1,
                tool_calls: 0,
                duration_ms: 10.0,
                error: None,
                outcome: None,
                tokens: None,
                tools_used: Vec::new(),
            })
        }
    }

    #[tokio::test]
    async fn test_parallel_swarm() {
        let agents = vec![
            AgentSpec::new("alice", "You are Alice"),
            AgentSpec::new("bob", "You are Bob"),
        ];
        let runner: Arc<dyn AgentRunner> = Arc::new(MockRunner {
            call_count: AtomicU32::new(0),
        });
        let infra = SharedInfra::new();

        let result = Swarm::new(agents, SwarmMode::Parallel)
            .run("test task", &runner, &infra)
            .await
            .unwrap();

        assert_eq!(result.outputs.len(), 2);
        assert!(result.outputs.iter().all(|o| o.succeeded()));

        // Check shared state was written
        assert!(infra.state.get("agent.alice.answer").is_some());
        assert!(infra.state.get("agent.bob.answer").is_some());
    }

    /// G3: a multi-agent run attributes token/cost per agent via the metered
    /// events emitted at each successful output.
    #[tokio::test]
    async fn per_agent_cost_is_attributed() {
        let agents = vec![AgentSpec::new("researcher", ""), AgentSpec::new("coordinator", "")];
        let runner: Arc<dyn AgentRunner> = Arc::new(TokenRunner {
            per_call_total: 100,
        });
        let infra = SharedInfra::new();

        Swarm::new(agents, SwarmMode::Sequential)
            .run("task", &runner, &infra)
            .await
            .unwrap();

        let log = infra.log.lock().await;
        let report = log.cost_by_agent();
        assert_eq!(report.len(), 2, "one cost row per agent: {report:?}");
        // BTreeMap order: coordinator before researcher.
        assert_eq!(report[0].agent, "coordinator");
        assert_eq!(report[0].calls, 1);
        assert_eq!(report[0].tokens_in, 100);
        assert_eq!(report[1].agent, "researcher");
        assert_eq!(report[1].tokens_in, 100);
    }

    #[tokio::test]
    async fn test_sequential_swarm() {
        let agents = vec![
            AgentSpec::new("first", "Go first"),
            AgentSpec::new("second", "Go second"),
        ];
        let runner: Arc<dyn AgentRunner> = Arc::new(MockRunner {
            call_count: AtomicU32::new(0),
        });
        let infra = SharedInfra::new();

        let result = Swarm::new(agents, SwarmMode::Sequential)
            .run("sequential task", &runner, &infra)
            .await
            .unwrap();

        assert_eq!(result.outputs.len(), 2);
        // Second agent should see first agent's output in enriched task
        assert!(result.outputs[1].answer.contains("Prior agents"));
    }

    /// Reports a fixed token spend per call so a budget can meter it.
    struct TokenRunner {
        per_call_total: u64,
    }

    #[async_trait::async_trait]
    impl AgentRunner for TokenRunner {
        async fn run(
            &self,
            spec: &AgentSpec,
            _task: &str,
            _runtime: &Runtime,
            _mailbox: &Mailbox,
        ) -> Result<AgentOutput, MultiError> {
            Ok(AgentOutput {
                name: spec.name.clone(),
                answer: format!("answer from {}", spec.name),
                turns: 1,
                tool_calls: 0,
                duration_ms: 1.0,
                error: None,
                outcome: None,
                tools_used: Vec::new(),
                tokens: Some(crate::types::TokenAccounting::new(
                    self.per_call_total,
                    0,
                    0.0,
                )),
            })
        }
    }

    #[tokio::test]
    async fn sequential_budget_stops_chain_when_tokens_exhausted() {
        // Three agents, each reporting 100 tokens; a 150-token ceiling lets the
        // first two run (cumulative 200 crosses 150 only after the second) and
        // denies the third.
        let agents = vec![
            AgentSpec::new("a", ""),
            AgentSpec::new("b", ""),
            AgentSpec::new("c", ""),
        ];
        let runner: Arc<dyn AgentRunner> = Arc::new(TokenRunner {
            per_call_total: 100,
        });
        let infra = SharedInfra::new().with_budget(crate::BudgetLimits {
            max_total_tokens: Some(150),
            ..Default::default()
        });

        let result = Swarm::new(agents, SwarmMode::Sequential)
            .run("task", &runner, &infra)
            .await
            .unwrap();

        assert_eq!(result.outputs.len(), 3);
        assert!(result.outputs[0].succeeded());
        assert!(result.outputs[1].succeeded());
        assert!(!result.outputs[2].succeeded());
        assert!(crate::is_budget_skipped(&result.outputs[2]));
        assert_eq!(infra.budget.snapshot().total_tokens, 200);
    }

    /// Records the `workspace` metadata each agent was handed.
    struct WorkspaceProbeRunner {
        seen: std::sync::Arc<std::sync::Mutex<Vec<String>>>,
    }

    #[async_trait::async_trait]
    impl AgentRunner for WorkspaceProbeRunner {
        async fn run(
            &self,
            spec: &AgentSpec,
            _task: &str,
            _runtime: &Runtime,
            _mailbox: &Mailbox,
        ) -> Result<AgentOutput, MultiError> {
            let ws = spec
                .metadata
                .get(crate::workspace::WORKSPACE_METADATA_KEY)
                .and_then(|v| v.as_str())
                .unwrap_or("")
                .to_string();
            self.seen.lock().unwrap().push(ws.clone());
            // The directory must exist while the agent runs.
            assert!(!ws.is_empty() && std::path::Path::new(&ws).is_dir());
            Ok(AgentOutput {
                name: spec.name.clone(),
                answer: "ok".into(),
                turns: 1,
                tool_calls: 0,
                duration_ms: 1.0,
                error: None,
                outcome: None,
                tokens: None,
                tools_used: Vec::new(),
            })
        }
    }

    #[tokio::test]
    async fn parallel_workspaces_are_provisioned_and_distinct() {
        let base = std::env::temp_dir().join(format!("car-swarm-ws-{}", std::process::id()));
        let seen = std::sync::Arc::new(std::sync::Mutex::new(Vec::new()));
        let runner: Arc<dyn AgentRunner> = Arc::new(WorkspaceProbeRunner { seen: seen.clone() });
        let infra = SharedInfra::new();

        let agents = vec![AgentSpec::new("alice", ""), AgentSpec::new("bob", "")];
        let result = Swarm::new(agents, SwarmMode::Parallel)
            .with_workspaces(crate::workspace::WorkspaceConfig::directory(&base))
            .run("task", &runner, &infra)
            .await
            .unwrap();

        assert_eq!(result.outputs.len(), 2);
        assert!(result.outputs.iter().all(|o| o.succeeded()));
        let paths = seen.lock().unwrap().clone();
        assert_eq!(paths.len(), 2);
        assert_ne!(paths[0], paths[1], "each agent gets a distinct workspace");
        // Cleaned up after the run.
        for p in &paths {
            assert!(
                !std::path::Path::new(p).exists(),
                "workspace removed on drop"
            );
        }
        let _ = std::fs::remove_dir_all(&base);
    }

    #[tokio::test]
    async fn parallel_budget_agent_cap_skips_excess() {
        // Five agents, cap of 2: exactly two run, three are skipped.
        let agents: Vec<AgentSpec> = (0..5)
            .map(|i| AgentSpec::new(&format!("a{}", i), ""))
            .collect();
        let runner: Arc<dyn AgentRunner> = Arc::new(MockRunner {
            call_count: AtomicU32::new(0),
        });
        let infra = SharedInfra::new().with_budget(crate::BudgetLimits {
            max_agents: Some(2),
            ..Default::default()
        });

        let result = Swarm::new(agents, SwarmMode::Parallel)
            .run("task", &runner, &infra)
            .await
            .unwrap();

        assert_eq!(result.outputs.len(), 5);
        let ran = result.outputs.iter().filter(|o| o.succeeded()).count();
        let skipped = result
            .outputs
            .iter()
            .filter(|o| crate::is_budget_skipped(o))
            .count();
        assert_eq!(ran, 2);
        assert_eq!(skipped, 3);
    }

    #[tokio::test]
    async fn test_debate_swarm() {
        let agents = vec![
            AgentSpec::new("debater_a", "Argue for"),
            AgentSpec::new("debater_b", "Argue against"),
        ];
        let runner: Arc<dyn AgentRunner> = Arc::new(MockRunner {
            call_count: AtomicU32::new(0),
        });
        let infra = SharedInfra::new();

        let result = Swarm::new(agents, SwarmMode::Debate)
            .run("debate topic", &runner, &infra)
            .await
            .unwrap();

        // 2 agents x 2 rounds = 4 outputs
        assert_eq!(result.outputs.len(), 4);
    }

    // --- Real tool-callback seam (ToolExecutor) ---
    //
    // Every other test here uses a MockRunner that ignores the Runtime it is
    // handed and reports `tool_calls: 0`. This exercises the path that actually
    // runs: the per-agent `Runtime` from `infra.make_runtime()` carries NO
    // executor, so the runner installs one via `set_executor`, builds a
    // `ToolCall` proposal, and drives it through `Runtime::execute`. The
    // executor's hit counter proves the engine routed the action through the
    // caller-provided callback rather than a stub.

    /// A `ToolExecutor` that counts invocations and echoes back its params.
    struct CountingExecutor {
        hits: Arc<AtomicU32>,
    }

    #[async_trait::async_trait]
    impl car_engine::ToolExecutor for CountingExecutor {
        async fn execute(
            &self,
            tool: &str,
            params: &serde_json::Value,
        ) -> Result<serde_json::Value, String> {
            self.hits.fetch_add(1, Ordering::SeqCst);
            Ok(serde_json::json!({
                "tool": tool,
                "echo": params.get("payload").cloned().unwrap_or(serde_json::Value::Null),
            }))
        }
    }

    /// A runner that installs a `CountingExecutor` on the runtime it is handed,
    /// runs a one-action `ToolCall` proposal, and surfaces the echoed payload.
    struct ToolRunner {
        hits: Arc<AtomicU32>,
    }

    #[async_trait::async_trait]
    impl AgentRunner for ToolRunner {
        async fn run(
            &self,
            spec: &AgentSpec,
            _task: &str,
            runtime: &Runtime,
            _mailbox: &Mailbox,
        ) -> Result<AgentOutput, MultiError> {
            runtime
                .set_executor(Arc::new(CountingExecutor {
                    hits: Arc::clone(&self.hits),
                }))
                .await;

            let action = car_ir::Action {
                id: format!("act-{}", spec.name),
                action_type: car_ir::ActionType::ToolCall,
                tool: Some("echo".into()),
                parameters: [(
                    "payload".to_string(),
                    serde_json::Value::from(format!("ping-{}", spec.name)),
                )]
                .into(),
                preconditions: vec![],
                expected_effects: std::collections::HashMap::new(),
                state_dependencies: vec![],
                read_set: vec![],
                write_set: vec![],
                assumptions: vec![],
                invocation_mode: Default::default(),
                idempotent: false,
                max_retries: 0,
                failure_behavior: car_ir::FailureBehavior::Abort,
                timeout_ms: None,
                metadata: std::collections::HashMap::new(),
            };
            let proposal = car_ir::ActionProposal {
                id: format!("p-{}", spec.name),
                source: "test".into(),
                actions: vec![action],
                timestamp: chrono::Utc::now(),
                context: std::collections::HashMap::new(),
            };

            let result = runtime.execute(&proposal).await;
            assert!(
                result.all_succeeded(),
                "tool-call proposal must succeed via the installed executor"
            );
            let echoed = result.results[0]
                .output
                .as_ref()
                .and_then(|v| v.get("echo"))
                .and_then(|v| v.as_str())
                .unwrap_or_default()
                .to_string();

            Ok(AgentOutput {
                name: spec.name.clone(),
                answer: echoed,
                turns: 1,
                tool_calls: 1,
                duration_ms: 1.0,
                error: None,
                outcome: None,
                tokens: None,
                tools_used: vec!["echo".into()],
            })
        }
    }

    #[tokio::test]
    async fn parallel_swarm_routes_through_tool_executor() {
        // The `echo` tool must be registered for validation to admit the
        // ToolCall; the swarm pre-registers `spec.tools` on the per-agent
        // runtime, and the runner then installs the executor that handles it.
        let agents = vec![
            AgentSpec::new("alice", "You are Alice").with_tools(vec!["echo".into()]),
            AgentSpec::new("bob", "You are Bob").with_tools(vec!["echo".into()]),
        ];
        let hits = Arc::new(AtomicU32::new(0));
        let runner: Arc<dyn AgentRunner> = Arc::new(ToolRunner {
            hits: Arc::clone(&hits),
        });
        let infra = SharedInfra::new();

        let result = Swarm::new(agents, SwarmMode::Parallel)
            .run("tool task", &runner, &infra)
            .await
            .unwrap();

        // One tool dispatch per agent, all through the caller's executor.
        assert_eq!(hits.load(Ordering::SeqCst), 2);
        assert_eq!(result.outputs.len(), 2);
        assert!(result.outputs.iter().all(|o| o.succeeded()));
        assert!(result.outputs.iter().all(|o| o.tool_calls == 1));

        // Each output carries the payload echoed back by the executor.
        let mut answers: Vec<&str> = result.outputs.iter().map(|o| o.answer.as_str()).collect();
        answers.sort();
        assert_eq!(answers, vec!["ping-alice", "ping-bob"]);
    }

    // --- A5: concurrency gate wired into the isolated parallel commit barrier ---
    //
    // A runner that writes one shared key via a `StateWrite` proposal on the
    // per-agent isolated runtime, synchronizing on a barrier so both agents'
    // generate windows provably overlap (otherwise fast mock agents could run
    // strictly sequentially and no anomaly would exist to detect).

    struct ContendedWriter {
        barrier: Arc<tokio::sync::Barrier>,
        key: String,
    }

    #[async_trait::async_trait]
    impl AgentRunner for ContendedWriter {
        async fn run(
            &self,
            spec: &AgentSpec,
            _task: &str,
            runtime: &Runtime,
            _mailbox: &Mailbox,
        ) -> Result<AgentOutput, MultiError> {
            // Both agents read (the swarm stamped read_at just before this)
            // before either commits (stamped just after) → overlapping windows.
            self.barrier.wait().await;

            let action = car_ir::Action {
                id: format!("w-{}", spec.name),
                action_type: car_ir::ActionType::StateWrite,
                tool: None,
                parameters: [
                    ("key".to_string(), serde_json::Value::from(self.key.clone())),
                    (
                        "value".to_string(),
                        serde_json::Value::from(spec.name.clone()),
                    ),
                ]
                .into(),
                preconditions: vec![],
                expected_effects: std::collections::HashMap::new(),
                state_dependencies: vec![],
                read_set: vec![self.key.clone()],
                write_set: vec![self.key.clone()],
                assumptions: vec![],
                invocation_mode: Default::default(),
                idempotent: false,
                max_retries: 0,
                failure_behavior: car_ir::FailureBehavior::Abort,
                timeout_ms: None,
                metadata: std::collections::HashMap::new(),
            };
            let proposal = car_ir::ActionProposal {
                id: format!("p-{}", spec.name),
                source: "test".into(),
                actions: vec![action],
                timestamp: chrono::Utc::now(),
                context: std::collections::HashMap::new(),
            };
            let result = runtime.execute(&proposal).await;
            assert!(result.all_succeeded(), "state write must succeed");

            Ok(AgentOutput {
                name: spec.name.clone(),
                answer: format!("wrote {}", self.key),
                turns: 1,
                tool_calls: 0,
                duration_ms: 1.0,
                error: None,
                outcome: None,
                tokens: None,
                tools_used: Vec::new(),
            })
        }
    }

    /// Two isolated agents both write a **fresh** shared key with overlapping
    /// windows: no read-modify-write, so it's a write reorder (L3) — the gate
    /// auto-remediates by committing in a deterministic order; both agents
    /// succeed and the winner is stable.
    #[tokio::test]
    async fn isolated_parallel_reorder_is_auto_remediated() {
        let barrier = Arc::new(tokio::sync::Barrier::new(2));
        let runner: Arc<dyn AgentRunner> = Arc::new(ContendedWriter {
            barrier,
            key: "fresh".into(),
        });
        let infra = SharedInfra::new().with_concurrency_gating();
        let agents = vec![AgentSpec::new("alice", ""), AgentSpec::new("bob", "")];

        let result = Swarm::new(agents, SwarmMode::Parallel)
            .with_isolation()
            .run("task", &runner, &infra)
            .await
            .unwrap();

        // Reorder auto-remediates: both commit, nothing rejected.
        assert_eq!(result.outputs.len(), 2);
        assert!(
            result.outputs.iter().all(|o| o.succeeded()),
            "reorder is auto-remediated, so both agents commit: {:?}",
            result.outputs
        );
        // Deterministic serialize order (name ascending) → bob merges last.
        assert_eq!(infra.state.get("fresh"), Some(serde_json::json!("bob")));

        // The gate audited its decision.
        let log = infra.log.lock().await;
        let ev = log
            .events()
            .iter()
            .find(|e| e.data.get("gate").and_then(|v| v.as_str()) == Some("concurrency"))
            .expect("a concurrency gate event was emitted");
        assert_eq!(ev.kind, car_eventlog::EventKind::AdmissionGateDecision);
    }

    /// Two isolated agents both overwrite a **pre-existing** shared key with
    /// overlapping windows: a read-modify-write on both sides → stale generation
    /// (L1). The gate rejects the offending (later-committing) op; exactly one
    /// agent's write survives and the other is surfaced as errored.
    #[tokio::test]
    async fn isolated_parallel_stale_generation_rejects_one_commit() {
        let barrier = Arc::new(tokio::sync::Barrier::new(2));
        let runner: Arc<dyn AgentRunner> = Arc::new(ContendedWriter {
            barrier,
            key: "counter".into(),
        });
        let infra = SharedInfra::new().with_concurrency_gating();
        // Seed the key so both agents' writes are read-modify-writes.
        infra
            .state
            .set("counter", serde_json::json!("seed"), "test");
        let agents = vec![AgentSpec::new("alice", ""), AgentSpec::new("bob", "")];

        let result = Swarm::new(agents, SwarmMode::Parallel)
            .with_isolation()
            .run("task", &runner, &infra)
            .await
            .unwrap();

        assert_eq!(result.outputs.len(), 2);
        let succeeded = result.outputs.iter().filter(|o| o.succeeded()).count();
        let rejected = result
            .outputs
            .iter()
            .filter(|o| {
                o.error
                    .as_deref()
                    .map(|e| e.contains("concurrency gate"))
                    .unwrap_or(false)
            })
            .count();
        assert_eq!(succeeded, 1, "exactly one commit survives a lost update");
        assert_eq!(rejected, 1, "the stale writer is rejected");

        // The surviving write is one of the two agents (not the stale seed).
        let final_val = infra.state.get("counter").unwrap();
        assert!(
            final_val == serde_json::json!("alice") || final_val == serde_json::json!("bob"),
            "the committed value is the surviving agent's write, got {final_val:?}"
        );

        // The gate escalated to needs_approval (fail-closed rejection).
        let log = infra.log.lock().await;
        let ev = log
            .events()
            .iter()
            .find(|e| e.data.get("gate").and_then(|v| v.as_str()) == Some("concurrency"))
            .expect("a concurrency gate event was emitted");
        assert_eq!(
            ev.data.get("decision").and_then(|v| v.as_str()),
            Some("needs_approval")
        );
    }

    /// Gating is opt-in: without `with_concurrency_gating`, the isolated swarm
    /// behaves exactly as before — both contended writes merge, last-writer-wins
    /// by completion order, nothing rejected, no gate event.
    #[tokio::test]
    async fn gating_is_opt_in() {
        let barrier = Arc::new(tokio::sync::Barrier::new(2));
        let runner: Arc<dyn AgentRunner> = Arc::new(ContendedWriter {
            barrier,
            key: "counter".into(),
        });
        let infra = SharedInfra::new(); // no gating
        infra
            .state
            .set("counter", serde_json::json!("seed"), "test");
        let agents = vec![AgentSpec::new("alice", ""), AgentSpec::new("bob", "")];

        let result = Swarm::new(agents, SwarmMode::Parallel)
            .with_isolation()
            .run("task", &runner, &infra)
            .await
            .unwrap();

        assert!(
            result.outputs.iter().all(|o| o.succeeded()),
            "no gate → both commit"
        );
        let log = infra.log.lock().await;
        assert!(
            !log.events()
                .iter()
                .any(|e| e.data.get("gate").and_then(|v| v.as_str()) == Some("concurrency")),
            "no concurrency gate event when gating is off"
        );
    }
}