car-multi 0.26.0

//! B2 — the farm-out harness.
//!
//! Provisions a git worktree per subtask, runs an agent in it, captures the
//! worktree's changes (both as [`FileChange`]s for the gate and as a git patch
//! for later integration), and feeds the changes to the B1 [`gate`](super::gate).
//!
//! Two verification scopes:
//! - **Per-worktree** ([`run_farm_out`]) — each subtask's worktree is gated
//!   against its own base. This catches a subtask that is broken *on its own*.
//! - **Union** ([`integrate_and_verify`]) — accepted subtasks' patches are
//!   applied into one fresh staging worktree, in level order, and the gate runs
//!   on the *integrated* tree. This is what catches the cross-subtask conflict
//!   class (two subtasks each fine alone, broken when merged) that per-worktree
//!   isolation is structurally blind to — the failure mode B3 must measure.
//!
//! This is still the *dumb* partitioner (file-disjoint leveling, no symbol-level
//! footprints — that is B4) and there is no replan yet (B5).
//!
//! `car-multi` stays free of a `car-external-agents` dependency: the agent is a
//! [`WorktreeAgent`] trait the caller implements. The daemon-side impl (B6)
//! wraps `car_external_agents::invoke`; tests provide a stub.

use std::collections::{BTreeSet, HashSet};
use std::io::Write;
use std::path::{Path, PathBuf};
use std::process::{Command, Stdio};
use std::sync::Arc;

use async_trait::async_trait;

use super::gate::{
    verify_changes, BuildTestStatus, DeclaredFootprint, FileChange, GateConfig, MergeVerdict,
    NoVerifyWaiver,
};
use crate::shared::SharedInfra;
use crate::workspace::{AgentWorkspace, WorkspaceConfig};

/// A live per-subtask progress event from [`run_farm_out_with_progress`]. The
/// run-level stages (planning / planned / union_verified) are emitted by the
/// *caller* (e.g. the coder loop); this is the finer-grained per-subtask
/// lifecycle the batch return value (`FarmOutResult`) otherwise only exposes
/// after the whole run finishes. Used to drive a live UI (a foreman pane) that
/// shows each subtask's worktree advancing and its gate verdict as it lands.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum ForemanProgress {
    /// A subtask's worktree was provisioned and its agent is starting. `index`
    /// is its position in the original subtask list; `level` is its schedule
    /// level (subtasks in the same level run concurrently); `total` is the batch
    /// size.
    SubtaskStarted {
        subtask_id: String,
        index: usize,
        level: usize,
        total: usize,
    },
    /// The agent finished editing; the gate's build/test is now running. This
    /// phase can dominate wall-clock (a cold `cargo check` / `swift build`), so
    /// it gets its own event — a live UI can show "verifying" distinctly from
    /// "agent still editing" instead of one undifferentiated spinner.
    SubtaskVerifying { subtask_id: String },
    /// A subtask reached a terminal state. `status` is one of `accepted`,
    /// `rejected`, `inconclusive`, or `error` (agent/workspace failed before the
    /// gate could run). `accepted` mirrors `status == "accepted"` for callers
    /// that only need the boolean.
    SubtaskGated {
        subtask_id: String,
        accepted: bool,
        status: String,
    },
}

/// Sink for [`ForemanProgress`] events. Shared (`Arc`) and `Send + Sync` because
/// subtasks within a schedule level fire concurrently from `join_all` futures.
pub type ForemanProgressSink = Arc<dyn Fn(ForemanProgress) + Send + Sync>;

/// Short terminal-status label for a gate verdict — the `status` field of
/// [`ForemanProgress::SubtaskGated`].
fn verdict_status(verdict: &MergeVerdict) -> &'static str {
    match verdict {
        MergeVerdict::Accepted { .. } => "accepted",
        MergeVerdict::Rejected { .. } => "rejected",
        MergeVerdict::Inconclusive { .. } => "inconclusive",
    }
}

#[inline]
fn emit(sink: &Option<ForemanProgressSink>, event: ForemanProgress) {
    if let Some(sink) = sink {
        sink(event);
    }
}

/// Errors from running the farm-out harness.
#[derive(Debug, thiserror::Error)]
pub enum ForemanError {
    #[error("workspace provisioning failed: {0}")]
    Workspace(String),
    #[error("agent execution failed: {0}")]
    Agent(String),
    #[error("git error: {0}")]
    Git(String),
}

/// One unit of farmed-out work. Under the B2 dumb partitioner, `files` is the
/// set of repo-relative paths the subtask is expected to touch — the partition
/// key. (B4 replaces this with symbol-level footprints.)
#[derive(Debug, Clone)]
pub struct Subtask {
    pub id: String,
    pub prompt: String,
    /// Repo-relative paths this subtask is expected to touch — the dumb
    /// partitioner's key, and a fallback when no footprint is declared.
    pub files: Vec<String>,
    /// The subtask's DECLARED symbol footprint (what it promised to write/read).
    /// When present on every subtask, scheduling uses the B4 footprint analyzer
    /// instead of the file partitioner, and the gate's containment is checked
    /// against the declared *writes*. Kept distinct from the scheduler's
    /// *expanded* (blast-radius) footprint — the gate must never see the
    /// expanded set, or containment would silently widen.
    pub footprint: Option<car_ast::SymbolFootprint>,
}

impl Subtask {
    /// A file-only subtask (no symbol footprint — schedules via the dumb
    /// partitioner, gate containment disabled).
    pub fn files_only(id: impl Into<String>, prompt: impl Into<String>, files: Vec<String>) -> Self {
        Self {
            id: id.into(),
            prompt: prompt.into(),
            files,
            footprint: None,
        }
    }
}

/// What an agent reported after running in a worktree.
#[derive(Debug, Clone, Default)]
pub struct AgentRunSummary {
    pub answer: String,
}

/// Everything an agent needs to run one subtask in a worktree. Carrying the
/// governance surface (`allowed_tools`, `mcp_endpoint`) now means the
/// daemon-side impl that wraps `car_external_agents::invoke` does not force a
/// breaking trait change at B6.
#[derive(Debug)]
pub struct WorktreeAgentRequest<'a> {
    pub subtask: &'a Subtask,
    pub cwd: &'a Path,
    pub allowed_tools: Option<Vec<String>>,
    pub mcp_endpoint: Option<String>,
}

/// Runs an agent's prompt against a working tree, editing files in place. The
/// real impl (daemon-side, B6) wraps `car_external_agents::invoke`; tests stub
/// it. Keeping this a trait is what keeps `car-multi` independent of
/// `car-external-agents`.
#[async_trait]
pub trait WorktreeAgent: Send + Sync {
    async fn run_in(
        &self,
        req: &WorktreeAgentRequest<'_>,
    ) -> Result<AgentRunSummary, ForemanError>;
}

/// Tuning for a farm-out run.
#[derive(Debug, Clone, Default)]
pub struct FarmOutConfig {
    /// Per-worktree gate command — a **regression** check ("does this one
    /// subtask's change compile / not break existing tests?"). A subtask
    /// legitimately implements only PART of the goal, so a goal-level test that
    /// needs every subtask must NOT run here (it would reject each subtask).
    pub verify_command: Option<Vec<String>>,
    /// Integrated-**union** gate command — the **goal** check ("does the merged
    /// result actually achieve the goal?"). Falls back to [`verify_command`]
    /// when `None`, so callers that want one command for both can leave it unset.
    ///
    /// [`verify_command`]: FarmOutConfig::verify_command
    pub union_verify_command: Option<Vec<String>>,
    /// Tool allowlist passed to each agent invocation.
    pub allowed_tools: Option<Vec<String>>,
    /// MCP governance endpoint passed to each agent invocation.
    pub mcp_endpoint: Option<String>,
    /// When the decomposed parallel path fails to integrate, recover by
    /// re-running the whole goal as one session (see `run_foreman`). **Off by
    /// default** because it spends a full extra (serial) session on top of the
    /// failed parallel spend — delivery-first callers (e.g. the daemon's
    /// `foreman.run`) opt in; cost-sensitive callers leave it off and inspect the
    /// `delivered()` flag / retained integration evidence instead.
    pub recover_via_single_session: bool,
    /// Base directory under which per-subtask git worktrees are created. When
    /// `None`, defaults to an **out-of-repo** location derived from `repo_root`
    /// (see [`default_worktree_base`]) so a worktree — and especially one
    /// *leaked* by a crash mid-run — never appears as an untracked entry inside
    /// the caller's checkout (which would make every later `git status` read
    /// dirty). Set this to pin worktrees to a specific scratch area, e.g. a
    /// daemon state dir.
    pub worktree_base: Option<PathBuf>,
    /// Explicit waiver letting the gate accept a change with **no** configured
    /// build/test command. Without it, a missing verify command yields
    /// [`MergeVerdict::Inconclusive`] (fail-closed), never `Accepted` — so a
    /// decomposed run over a project with no reliably-detectable build command
    /// can never deliver. Supply this when containment + apply-conflict +
    /// duplicate-declaration checks are the intended soundness boundary and a
    /// build leg genuinely can't be determined. Threaded into every gate
    /// (per-subtask, union, regional).
    pub no_verify_waiver: Option<NoVerifyWaiver>,
}

/// Default **out-of-repo** base for Foreman worktrees, keyed by the repo path so
/// distinct repos don't collide and a crashed run can't leave worktree
/// directories inside the caller's checkout (where `git status` would report
/// them as untracked and any consumer guarding on a clean tree would wedge).
/// Deterministic for a given `repo_root`.
fn default_worktree_base(repo_root: &Path) -> PathBuf {
    use std::collections::hash_map::DefaultHasher;
    use std::hash::{Hash, Hasher};
    let mut hasher = DefaultHasher::new();
    repo_root.hash(&mut hasher);
    std::env::temp_dir()
        .join("car-foreman-worktrees")
        .join(format!("{:016x}", hasher.finish()))
}

/// Build the worktree [`WorkspaceConfig`] for a farm-out run: a git worktree of
/// `repo_root` created under an out-of-repo base (overridable via
/// [`FarmOutConfig::worktree_base`]). Uses [`WorkspaceConfig::git_worktree_at`]
/// so the worktree's working directory lives outside the repo.
fn worktree_workspace_config(repo_root: &Path, config: &FarmOutConfig) -> WorkspaceConfig {
    let base = config
        .worktree_base
        .clone()
        .unwrap_or_else(|| default_worktree_base(repo_root));
    WorkspaceConfig::git_worktree_at(repo_root, base)
}

/// Group subtasks into levels of file-disjoint work (dumb partitioner): within a
/// level no two subtasks declare the same file, so their worktrees cannot merge
/// conflict; subtasks that share a file land in different levels (serialized).
/// Greedy first-fit; B4 replaces it with footprint-aware scheduling.
///
/// A subtask that declares *no* files has unknown blast radius, so it gets its
/// own isolated level rather than packing in parallel with everything (which it
/// might actually collide with at runtime).
pub fn partition_by_files(subtasks: &[Subtask]) -> Vec<Vec<usize>> {
    struct Level {
        ids: Vec<usize>,
        claimed: HashSet<String>,
        /// Closed levels (the isolated no-files ones) never accept more members.
        open: bool,
    }
    let mut levels: Vec<Level> = Vec::new();

    for (i, st) in subtasks.iter().enumerate() {
        let files: HashSet<String> = st.files.iter().cloned().collect();
        if files.is_empty() {
            levels.push(Level {
                ids: vec![i],
                claimed: HashSet::new(),
                open: false,
            });
            continue;
        }
        match levels
            .iter_mut()
            .find(|l| l.open && l.claimed.is_disjoint(&files))
        {
            Some(level) => {
                level.ids.push(i);
                level.claimed.extend(files);
            }
            None => levels.push(Level {
                ids: vec![i],
                claimed: files,
                open: true,
            }),
        }
    }
    levels.into_iter().map(|l| l.ids).collect()
}

/// Blast-radius expansion depth for footprint scheduling. Shared with the
/// planner's pre-check so the two cannot drift.
pub(crate) const FOOTPRINT_BLAST_DEPTH: usize = 3;

/// Choose the parallel schedule. When EVERY subtask declares a symbol footprint,
/// use the B4 footprint analyzer (build a `ProjectIndex`, expand each declared
/// footprint to its blast radius, and level by symbol conflicts — fail-closed on
/// uncertainty). Otherwise fall back to the dumb file partitioner. Returns levels
/// of indices into `subtasks`.
fn schedule(repo_root: &Path, subtasks: &[Subtask]) -> Vec<Vec<usize>> {
    if subtasks.is_empty() || !subtasks.iter().all(|s| s.footprint.is_some()) {
        return partition_by_files(subtasks);
    }
    // Duplicate ids would make the id→index map below ambiguous and silently drop
    // a subtask; the index-based file partitioner is safe in that case.
    let mut seen = HashSet::new();
    if !subtasks.iter().all(|s| seen.insert(s.id.as_str())) {
        return partition_by_files(subtasks);
    }
    let index = car_ast::ProjectIndex::build(repo_root);
    let fsubs: Vec<car_ast::FootprintSubtask> = subtasks
        .iter()
        .map(|s| car_ast::FootprintSubtask {
            id: s.id.clone(),
            // Scheduler consumes the EXPANDED footprint (blast radius). The gate
            // separately consumes the DECLARED footprint — never this one.
            footprint: car_ast::expand_footprint(
                &index,
                s.footprint.as_ref().expect("all footprints present"),
                FOOTPRINT_BLAST_DEPTH,
            ),
        })
        .collect();
    let plan = car_ast::analyze(&fsubs);
    plan.levels
        .iter()
        .map(|level| {
            level
                .iter()
                .map(|id| subtasks.iter().position(|s| &s.id == id).unwrap())
                .collect()
        })
        .collect()
}

/// Outcome of one farmed-out subtask. `verdict` is `None` only if the agent or
/// workspace failed before the gate could run (captured in `error`); one
/// subtask's failure never aborts the batch. `patch` (when present) is the git
/// patch of this subtask's changes, retained so [`integrate_and_verify`] can
/// replay it into a staging tree after the worktree is gone.
#[derive(Debug)]
pub struct SubtaskOutcome {
    pub subtask_id: String,
    pub verdict: Option<MergeVerdict>,
    pub changes: Vec<FileChange>,
    pub patch: Option<String>,
    pub error: Option<String>,
}

impl SubtaskOutcome {
    pub fn is_accepted(&self) -> bool {
        self.verdict.as_ref().is_some_and(|v| v.is_accepted())
    }
}

/// Result of farming out a batch of subtasks.
#[derive(Debug)]
pub struct FarmOutResult {
    /// The dependency levels the partitioner produced (indices into `subtasks`).
    pub levels: Vec<Vec<usize>>,
    pub outcomes: Vec<SubtaskOutcome>,
}

impl FarmOutResult {
    pub fn accepted_count(&self) -> usize {
        self.outcomes.iter().filter(|o| o.is_accepted()).count()
    }
}

/// Provision a worktree per subtask, run the agent in it, capture its changes
/// (as `FileChange`s and a retained patch), and run the B1 gate per worktree.
/// Levels run sequentially; subtasks within a level run concurrently. Each
/// worktree is cleaned up when its `AgentWorkspace` drops, after the gate has run
/// its build/test inside that tree — which is why the patch is captured first.
pub async fn run_farm_out(
    repo_root: &Path,
    subtasks: &[Subtask],
    agent: &dyn WorktreeAgent,
    config: &FarmOutConfig,
    infra: &SharedInfra,
) -> FarmOutResult {
    run_farm_out_inner(repo_root, subtasks, agent, config, infra, None).await
}

/// Like [`run_farm_out`], but streams [`ForemanProgress`] events to `progress`
/// as each subtask's worktree starts and gates. The return value is identical;
/// the sink is purely for live observation (a UI pane). Events from subtasks in
/// the same schedule level interleave (they run concurrently).
pub async fn run_farm_out_with_progress(
    repo_root: &Path,
    subtasks: &[Subtask],
    agent: &dyn WorktreeAgent,
    config: &FarmOutConfig,
    infra: &SharedInfra,
    progress: ForemanProgressSink,
) -> FarmOutResult {
    run_farm_out_inner(repo_root, subtasks, agent, config, infra, Some(progress)).await
}

async fn run_farm_out_inner(
    repo_root: &Path,
    subtasks: &[Subtask],
    agent: &dyn WorktreeAgent,
    config: &FarmOutConfig,
    infra: &SharedInfra,
    progress: Option<ForemanProgressSink>,
) -> FarmOutResult {
    let levels = schedule(repo_root, subtasks);
    let total = subtasks.len();
    let mut outcomes = Vec::with_capacity(subtasks.len());

    for (level_idx, level) in levels.iter().enumerate() {
        let level_futs = level.iter().map(|&i| {
            run_one_subtask(
                repo_root,
                i,
                &subtasks[i],
                agent,
                config,
                infra,
                level_idx,
                total,
                progress.as_ref(),
            )
        });
        outcomes.extend(futures::future::join_all(level_futs).await);
    }

    FarmOutResult { levels, outcomes }
}

#[allow(clippy::too_many_arguments)]
async fn run_one_subtask(
    repo_root: &Path,
    index: usize,
    subtask: &Subtask,
    agent: &dyn WorktreeAgent,
    config: &FarmOutConfig,
    infra: &SharedInfra,
    level: usize,
    total: usize,
    progress: Option<&ForemanProgressSink>,
) -> SubtaskOutcome {
    // Clone so the helper closures hold an owned `Option<ForemanProgressSink>`
    // (the `&ForemanProgressSink` borrow can't outlive the early-return paths).
    let progress = progress.cloned();
    emit(
        &progress,
        ForemanProgress::SubtaskStarted {
            subtask_id: subtask.id.clone(),
            index,
            level,
            total,
        },
    );
    let fail = |error: ForemanError| {
        emit(
            &progress,
            ForemanProgress::SubtaskGated {
                subtask_id: subtask.id.clone(),
                accepted: false,
                status: "error".into(),
            },
        );
        SubtaskOutcome {
            subtask_id: subtask.id.clone(),
            verdict: None,
            changes: Vec::new(),
            patch: None,
            error: Some(error.to_string()),
        }
    };

    // Index-prefix the workspace name so two subtask ids that sanitize to the
    // same directory cannot collide and self-heal-clobber each other's worktree.
    let ws_name = format!("{index:04}-{}", subtask.id);
    let workspace =
        match AgentWorkspace::provision(&worktree_workspace_config(repo_root, config), &ws_name) {
            Ok(ws) => ws,
            Err(e) => return fail(ForemanError::Workspace(e)),
        };
    let cwd = workspace.path().to_path_buf();

    let req = WorktreeAgentRequest {
        subtask,
        cwd: &cwd,
        allowed_tools: config.allowed_tools.clone(),
        mcp_endpoint: config.mcp_endpoint.clone(),
    };
    if let Err(e) = agent.run_in(&req).await {
        return fail(e);
    }

    let cwd_for_blocking = cwd.clone();
    let (changes, patch) = match tokio::task::spawn_blocking(move || {
        let changes = collect_file_changes(&cwd_for_blocking)?;
        let patch = capture_patch(&cwd_for_blocking)?;
        Ok::<_, ForemanError>((changes, patch))
    })
    .await
    {
        Ok(Ok(v)) => v,
        Ok(Err(e)) => return fail(e),
        Err(e) => return fail(ForemanError::Git(format!("collect task panicked: {e}"))),
    };

    emit(
        &progress,
        ForemanProgress::SubtaskVerifying {
            subtask_id: subtask.id.clone(),
        },
    );
    let mut gate_config = GateConfig::new(subtask.id.clone(), &cwd);
    gate_config.verify_command = config.verify_command.clone();
    gate_config.no_verify_waiver = config.no_verify_waiver.clone();
    // The gate's containment checks the DECLARED writes only (never the
    // scheduler's expanded blast radius). No declaration ⇒ containment disabled.
    let footprint = match &subtask.footprint {
        Some(fp) => DeclaredFootprint::from_refs(fp.writes.iter().cloned()),
        None => DeclaredFootprint::unconstrained(),
    };
    let verdict = verify_changes(&gate_config, &changes, &footprint, infra).await;
    emit(
        &progress,
        ForemanProgress::SubtaskGated {
            subtask_id: subtask.id.clone(),
            accepted: verdict.is_accepted(),
            status: verdict_status(&verdict).into(),
        },
    );

    SubtaskOutcome {
        subtask_id: subtask.id.clone(),
        verdict: Some(verdict),
        changes,
        patch: Some(patch),
        error: None,
    }
}

/// Result of integrating accepted subtasks and gating the union.
#[derive(Debug)]
pub struct IntegrationResult {
    pub applied: usize,
    /// Patches that failed to apply cleanly (textual integration conflicts).
    /// Human-readable `"{subtask_id}: {error}"` strings; [`blame`] carries the
    /// structured form.
    ///
    /// [`blame`]: IntegrationResult::blame
    pub apply_conflicts: Vec<String>,
    /// Verdict on the integrated union — `None` if a patch failed to apply
    /// (which is itself an integration failure the caller must treat as reject).
    pub verdict: Option<MergeVerdict>,
    /// Structured attribution of *why* the union failed — which subtask / file /
    /// symbol / check is implicated. `None` when the union integrated cleanly.
    pub blame: Option<IntegrationBlame>,
}

impl IntegrationResult {
    /// The integration is sound only if every patch applied AND the union gate
    /// accepted. A textual apply conflict is never a success.
    pub fn integrated_cleanly(&self) -> bool {
        self.apply_conflicts.is_empty()
            && self.verdict.as_ref().is_some_and(|v| v.is_accepted())
    }
}

/// A patch that failed to apply during union integration, attributed to its
/// subtask and the files its patch targets (parsed from the patch headers).
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ApplyConflict {
    pub subtask_id: String,
    pub files: Vec<String>,
    pub detail: String,
}

/// A duplicate declaration the union gate found. `candidate_subtask_ids` are the
/// subtasks whose patches touched the offending *file* — candidates, not proven
/// culprits: file-granularity can implicate a subtask that edited the file but
/// not the duplicated symbol. Authoritative symbol-level culpability would need
/// the declared footprints; advisory blame deliberately stays at file level.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct DuplicateBlame {
    pub file: String,
    pub symbol: String,
    pub candidate_subtask_ids: Vec<String>,
}

/// The union build/test leg's failure (the goal check that rejected the merge).
/// `candidate_subtask_ids` is the localized region: the subtasks whose touched
/// files appear in the failure output (compilers print `file:line`), biased to
/// inclusion. It falls back to the WHOLE integrated set when the output names no
/// known file — we can't localize, so the caller redoes everything (whole-goal)
/// rather than risk dropping the real culprit. See [`localize_build_failure`].
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct BuildTestFailure {
    pub code: Option<i32>,
    pub output_tail: String,
    pub candidate_subtask_ids: Vec<String>,
}

/// Structured attribution of *why* a union integration failed — which subtask,
/// file, symbol, or check is implicated. Populated only on failure. This is the
/// data a regional replan needs to retry just the failing region (instead of
/// re-running the whole goal), and what a UI reads to show "why did this run
/// fail", not merely "it failed".
///
/// Deliberately out of scope: parsing the build/test *output* to map a failing
/// test back to a symbol — that needs a language-specific parser. The raw
/// (bounded) output tail is retained in [`build_test`] instead.
///
/// [`build_test`]: IntegrationBlame::build_test
#[derive(Debug, Clone, Default, PartialEq, Eq)]
pub struct IntegrationBlame {
    /// Patches that failed to apply (textual conflicts), attributed to subtask + files.
    pub apply_conflicts: Vec<ApplyConflict>,
    /// Duplicate declarations the union gate found, attributed to subtasks.
    pub duplicate_conflicts: Vec<DuplicateBlame>,
    /// The union build/test failure, when that is what rejected the merge.
    pub build_test: Option<BuildTestFailure>,
}

impl IntegrationBlame {
    pub fn is_empty(&self) -> bool {
        self.apply_conflicts.is_empty()
            && self.duplicate_conflicts.is_empty()
            && self.build_test.is_none()
    }

    /// The subtasks a regional replan must DROP and redo — every subtask any
    /// failure cause implicates. A `build_test` failure carries its *localized*
    /// region (the subtasks whose files the failure output named); when that
    /// couldn't be localized it carries the whole integrated set instead, which
    /// forces the implicated set to everything ⇒ the caller computes an empty
    /// "clean" set ⇒ it falls back to a whole-goal session rather than a regional
    /// one.
    pub fn implicated_subtasks(&self) -> BTreeSet<String> {
        let mut ids = BTreeSet::new();
        for c in &self.apply_conflicts {
            ids.insert(c.subtask_id.clone());
        }
        for d in &self.duplicate_conflicts {
            ids.extend(d.candidate_subtask_ids.iter().cloned());
        }
        if let Some(bt) = &self.build_test {
            ids.extend(bt.candidate_subtask_ids.iter().cloned());
        }
        ids
    }
}

/// Subtasks a build/test failure implicates, by matching the files they touched
/// against the paths named in the failure output (compilers and panics print
/// `file:line`). Biased toward INCLUSION: an over-broad region just makes the
/// regional replan redo more (still gated), whereas under-inclusion would
/// reproduce the failure (caught by the gate → whole-goal fallback). An empty
/// result means the output named no known file — the caller can't localize and
/// keeps the whole integrated set.
fn localize_build_failure(
    output: &str,
    file_to_subtasks: &std::collections::HashMap<String, Vec<String>>,
) -> Vec<String> {
    let mut ids = BTreeSet::new();
    for (file, subtasks) in file_to_subtasks {
        // Plain (non-boundary-aware) substring: a short name like `x.rs` can also
        // match `xxx.rs` in the output, over-implicating. That's deliberate — the
        // inclusion bias is the safe direction (over-broad region just redoes
        // more, still gated; under-inclusion would reproduce the failure). Do NOT
        // "fix" this into a boundary check without preserving that bias.
        if output.contains(file.as_str()) {
            ids.extend(subtasks.iter().cloned());
        }
    }
    ids.into_iter().collect()
}

/// Repo-relative file paths a unified-diff patch targets, parsed from its
/// `diff --git a/<x> b/<y>` headers (the `b/` post-image side). Used to attribute
/// an apply conflict / duplicate to the files — and thence the subtasks —
/// involved, without re-running git. Best-effort: paths with embedded `" b/"`
/// are rare and only degrade blame precision, never correctness elsewhere.
fn files_in_patch(patch: &str) -> Vec<String> {
    let mut files = Vec::new();
    for line in patch.lines() {
        if let Some(rest) = line.strip_prefix("diff --git ") {
            if let Some(pos) = rest.rfind(" b/") {
                let file = &rest[pos + 3..];
                if !file.is_empty() && !files.iter().any(|f| f == file) {
                    files.push(file.to_string());
                }
            }
        }
    }
    files
}

/// Apply accepted subtasks' patches into one fresh staging worktree from the
/// common base, in the given order, and run the gate on the **integrated union**.
/// This is the integration-failure check the per-worktree gate cannot perform:
/// two subtasks each accepted in isolation can still produce duplicate
/// declarations, broken references, or a failing build once merged.
///
/// `subtask_label` labels the union for the gate's audit/subtask field.
pub async fn integrate_and_verify(
    repo_root: &Path,
    subtask_label: &str,
    accepted_patches: &[(String, String)], // (subtask_id, patch)
    config: &FarmOutConfig,
    infra: &SharedInfra,
) -> Result<IntegrationResult, ForemanError> {
    let staging = AgentWorkspace::provision(
        &worktree_workspace_config(repo_root, config),
        &format!("integrate-{subtask_label}"),
    )
    .map_err(ForemanError::Workspace)?;
    let staging_path = staging.path().to_path_buf();
    let patches: Vec<(String, String)> = accepted_patches.to_vec();
    let patch_count = patches.len();
    // The union is graded by the GOAL check (union_verify_command), falling back
    // to the per-worktree command when the caller supplied only one.
    let verify_command = config
        .union_verify_command
        .clone()
        .or_else(|| config.verify_command.clone());
    let label = subtask_label.to_string();

    // Map file → subtasks whose patch touched it, for blame attribution. Built
    // from the patch headers, so a duplicate the union gate finds in a file can
    // name the subtasks that contributed it. `union_members` is the whole
    // integrated set (deterministic order) — the region for a build/test failure
    // that can't be localized further.
    let mut file_to_subtasks: std::collections::HashMap<String, Vec<String>> =
        std::collections::HashMap::new();
    let mut union_members: Vec<String> = Vec::new();
    for (id, patch) in &patches {
        if !union_members.contains(id) {
            union_members.push(id.clone());
        }
        for file in files_in_patch(patch) {
            file_to_subtasks.entry(file).or_default().push(id.clone());
        }
    }

    // Apply + collect is pure git I/O — keep it off the async executor.
    let staging_for_blocking = staging_path.clone();
    let (conflicts, changes) = tokio::task::spawn_blocking(move || {
        let mut conflicts: Vec<ApplyConflict> = Vec::new();
        for (id, patch) in &patches {
            if patch.trim().is_empty() {
                continue;
            }
            if let Err(e) = git_apply(&staging_for_blocking, patch) {
                conflicts.push(ApplyConflict {
                    subtask_id: id.clone(),
                    files: files_in_patch(patch),
                    detail: e.to_string(),
                });
            }
        }
        let changes = collect_file_changes(&staging_for_blocking)?;
        Ok::<_, ForemanError>((conflicts, changes))
    })
    .await
    .map_err(|e| ForemanError::Git(format!("integrate task panicked: {e}")))??;

    // A patch that didn't apply is a textual integration conflict — do not run
    // the gate on a partial tree; report it as the integration failure it is.
    if !conflicts.is_empty() {
        let apply_conflicts: Vec<String> = conflicts
            .iter()
            .map(|c| format!("{}: {}", c.subtask_id, c.detail))
            .collect();
        return Ok(IntegrationResult {
            applied: patch_count - conflicts.len(),
            apply_conflicts,
            verdict: None,
            blame: Some(IntegrationBlame {
                apply_conflicts: conflicts,
                ..Default::default()
            }),
        });
    }

    let mut gate_config = GateConfig::new(format!("union:{label}"), &staging_path);
    gate_config.verify_command = verify_command;
    gate_config.no_verify_waiver = config.no_verify_waiver.clone();
    let verdict =
        verify_changes(&gate_config, &changes, &DeclaredFootprint::unconstrained(), infra).await;

    // On a rejected/inconclusive union, attribute the gate's findings: each
    // duplicate declaration to the subtasks that touched its file, plus the
    // build/test failure (raw output tail; symbol-level test blame is deferred).
    let blame = if verdict.is_accepted() {
        None
    } else {
        let ev = verdict.evidence();
        let duplicate_conflicts = ev
            .semantic_conflicts
            .iter()
            .map(|d| DuplicateBlame {
                file: d.file.clone(),
                symbol: d.symbol.clone(),
                candidate_subtask_ids: file_to_subtasks.get(&d.file).cloned().unwrap_or_default(),
            })
            .collect();
        let build_test = match &ev.build_test {
            BuildTestStatus::Failed { code, output } => {
                // Localize by matching the failure output against the files each
                // subtask touched. When the output names no known file we can't
                // localize → keep the whole integrated set (the conservative
                // region, which routes the caller to a whole-goal recovery).
                let localized = localize_build_failure(output, &file_to_subtasks);
                let candidate_subtask_ids = if localized.is_empty() {
                    union_members.clone()
                } else {
                    localized
                };
                Some(BuildTestFailure {
                    code: *code,
                    output_tail: output.clone(), // already bounded by the gate's max_output_bytes
                    candidate_subtask_ids,
                })
            }
            _ => None,
        };
        Some(IntegrationBlame {
            apply_conflicts: Vec::new(),
            duplicate_conflicts,
            build_test,
        })
    };

    Ok(IntegrationResult {
        applied: patch_count,
        apply_conflicts: Vec::new(),
        verdict: Some(verdict),
        blame,
    })
}

/// Recover a failed parallel union by RESUMING from the clean (non-implicated)
/// accepted patches and completing the goal in one session — redoing only the
/// failing region while preserving the successful parallel work.
///
/// `clean_patches` are the accepted patches blame did NOT implicate. They are
/// applied (uncommitted) into a fresh worktree; the agent then runs over the
/// ORIGINAL `goal`, sees the clean work already on disk, and completes only what
/// is missing. The full clean+region tree is gated with the GOAL check (the same
/// merge-verify gate) and returned as one [`SubtaskOutcome`].
///
/// Returns `None` when the regional attempt could not even be staged — a clean
/// patch failed to apply (the clean set is NOT guaranteed internally consistent;
/// this is a *checked* precondition, not an assumption), the workspace couldn't
/// be provisioned, or the agent/capture errored — so the caller falls back to a
/// whole-goal session. A `Some` outcome that isn't accepted likewise signals the
/// caller to fall back; the gate stays the soundness boundary either way.
pub async fn regional_replan(
    repo_root: &Path,
    goal: &str,
    clean_patches: &[(String, String)],
    agent: &dyn WorktreeAgent,
    config: &FarmOutConfig,
    infra: &SharedInfra,
) -> Option<SubtaskOutcome> {
    let workspace =
        AgentWorkspace::provision(&worktree_workspace_config(repo_root, config), "regional-replan")
            .ok()?;
    let cwd = workspace.path().to_path_buf();

    // Checked precondition (per design review): the clean set is NOT guaranteed
    // to apply cleanly among itself — verify it actually does, and bail to the
    // whole-goal fallback if not. We're provisioning the worktree anyway.
    let cwd_for_apply = cwd.clone();
    let clean = clean_patches.to_vec();
    let staged = tokio::task::spawn_blocking(move || {
        for (_id, patch) in &clean {
            if patch.trim().is_empty() {
                continue;
            }
            if git_apply(&cwd_for_apply, patch).is_err() {
                return false;
            }
        }
        true
    })
    .await
    .ok()?;
    if !staged {
        return None;
    }

    // Resume the ORIGINAL goal — the agent sees the clean work on disk and only
    // does the remainder. No footprint ⇒ containment off (this is whole-of-region
    // work, not a declared single symbol).
    let subtask = Subtask::files_only("__regional_replan__", goal.to_string(), Vec::new());
    let req = WorktreeAgentRequest {
        subtask: &subtask,
        cwd: &cwd,
        allowed_tools: config.allowed_tools.clone(),
        mcp_endpoint: config.mcp_endpoint.clone(),
    };
    if agent.run_in(&req).await.is_err() {
        return None;
    }

    let cwd_for_blocking = cwd.clone();
    let (changes, patch) = tokio::task::spawn_blocking(move || {
        let changes = collect_file_changes(&cwd_for_blocking)?;
        let patch = capture_patch(&cwd_for_blocking)?;
        Ok::<_, ForemanError>((changes, patch))
    })
    .await
    .ok()?
    .ok()?;

    // Gate the full clean+region tree with the GOAL check (union command, falling
    // back to the per-worktree one) — it must achieve the whole goal, not a piece.
    let goal_check = config
        .union_verify_command
        .clone()
        .or_else(|| config.verify_command.clone());
    let mut gate_config = GateConfig::new("__regional_replan__", &cwd);
    gate_config.verify_command = goal_check;
    gate_config.no_verify_waiver = config.no_verify_waiver.clone();
    let verdict =
        verify_changes(&gate_config, &changes, &DeclaredFootprint::unconstrained(), infra).await;

    Some(SubtaskOutcome {
        subtask_id: "__regional_replan__".into(),
        verdict: Some(verdict),
        changes,
        patch: Some(patch),
        error: None,
    })
}

/// Derive the uncommitted changes in a worktree (relative to its HEAD) as
/// [`FileChange`]s. Handles `-z` porcelain correctly, including renames (emitted
/// as two NUL fields) and non-UTF8 files (kept as lossy text rather than
/// silently dropped, so policy and the gate's `unparsed` path still see them).
fn collect_file_changes(worktree: &Path) -> Result<Vec<FileChange>, ForemanError> {
    let porcelain = git(
        worktree,
        &["status", "--porcelain", "-z", "--untracked-files=all"],
    )?;

    let mut changes = Vec::new();
    let mut fields = porcelain.split('\0');
    while let Some(entry) = fields.next() {
        // A primary entry is "XY <path>" — 2 status columns, a space, the path.
        if entry.len() < 4 {
            continue;
        }
        // Index (staged) column. Only X carries the rename/copy source field in
        // porcelain v1; the worktree column Y never does.
        let x = entry.as_bytes()[0];
        let path = entry[3..].to_string();

        // Renames/copies put the SOURCE path in the *next* NUL field with no
        // status prefix. Only the INDEX column (X) carries this in porcelain v1;
        // the worktree column (Y) never does. Consume the source field and emit
        // its deletion side so a rename's removed symbols still reach the gate.
        // (A copy's source isn't really deleted; modeling it as a deletion is
        // conservative — it can only make the gate stricter, never miss a
        // conflict.)
        if x == b'R' || x == b'C' {
            if let Some(old) = fields.next() {
                if !old.is_empty() {
                    changes.push(FileChange {
                        path: old.to_string(),
                        before: read_head(worktree, old),
                        after: None,
                    });
                }
            }
            changes.push(FileChange {
                path: path.clone(),
                before: None,
                after: read_worktree(worktree, &path),
            });
            continue;
        }

        let before = read_head(worktree, &path);
        let after = read_worktree(worktree, &path);
        if before.is_none() && after.is_none() {
            continue;
        }
        changes.push(FileChange {
            path,
            before,
            after,
        });
    }
    Ok(changes)
}

/// Content of `path` at HEAD, or `None` if it didn't exist there. The `./`
/// prefix stops git from interpreting a leading `:` or other revision syntax.
fn read_head(worktree: &Path, path: &str) -> Option<String> {
    git(worktree, &["show", &format!("HEAD:./{path}")]).ok()
}

/// Worktree content of `path`, lossily decoded so binary files are surfaced
/// (and recorded by the gate as unparsed) rather than silently dropped.
fn read_worktree(worktree: &Path, path: &str) -> Option<String> {
    std::fs::read(worktree.join(path))
        .ok()
        .map(|bytes| String::from_utf8_lossy(&bytes).into_owned())
}

/// Capture the full patch of a worktree's changes (including untracked and
/// binary files) so it can be replayed into a staging tree later.
fn capture_patch(worktree: &Path) -> Result<String, ForemanError> {
    // `add -N` registers untracked files so `diff` includes them, without
    // staging content (keeps the worktree state otherwise untouched).
    git(worktree, &["add", "-AN"])?;
    // `--no-textconv` + autocrlf off keep the patch a faithful byte image even
    // when the target repo has textconv/clean filters or CRLF normalization, so
    // it re-applies identically in the staging tree.
    git(
        worktree,
        &[
            "-c",
            "core.autocrlf=false",
            "diff",
            "HEAD",
            "--binary",
            "--no-textconv",
        ],
    )
}

fn git(cwd: &Path, args: &[&str]) -> Result<String, ForemanError> {
    let out = Command::new("git")
        .args(args)
        .current_dir(cwd)
        .output()
        .map_err(|e| ForemanError::Git(format!("spawn git: {e}")))?;
    if !out.status.success() {
        return Err(ForemanError::Git(
            String::from_utf8_lossy(&out.stderr).trim().to_string(),
        ));
    }
    Ok(String::from_utf8_lossy(&out.stdout).into_owned())
}

/// Apply a patch into a worktree. Plain `git apply` (NOT `--3way`) on purpose:
/// a false-accept benchmark must surface overlapping/adjacent edits as loud
/// integration conflicts, not silently auto-merge them. A failure here is a
/// textual integration conflict the caller treats as an integration failure.
fn git_apply(cwd: &Path, patch: &str) -> Result<(), ForemanError> {
    let mut child = Command::new("git")
        .args(["apply", "--whitespace=nowarn"])
        .current_dir(cwd)
        .stdin(Stdio::piped())
        .stdout(Stdio::piped())
        .stderr(Stdio::piped())
        .spawn()
        .map_err(|e| ForemanError::Git(format!("spawn git apply: {e}")))?;
    child
        .stdin
        .take()
        .ok_or_else(|| ForemanError::Git("no stdin for git apply".into()))?
        .write_all(patch.as_bytes())
        .map_err(|e| ForemanError::Git(format!("write patch: {e}")))?;
    let out = child
        .wait_with_output()
        .map_err(|e| ForemanError::Git(format!("git apply: {e}")))?;
    if out.status.success() {
        Ok(())
    } else {
        Err(ForemanError::Git(
            String::from_utf8_lossy(&out.stderr).trim().to_string(),
        ))
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    fn st(id: &str, files: &[&str]) -> Subtask {
        Subtask::files_only(id, format!("do {id}"), files.iter().map(|s| s.to_string()).collect())
    }

    // ---- partitioner ----

    #[test]
    fn disjoint_files_pack_into_one_level() {
        let levels = partition_by_files(&[st("a", &["src/a.rs"]), st("b", &["src/b.rs"])]);
        assert_eq!(levels.len(), 1);
        assert_eq!(levels[0].len(), 2);
    }

    #[test]
    fn shared_file_forces_separate_levels() {
        let levels =
            partition_by_files(&[st("a", &["src/shared.rs"]), st("b", &["src/shared.rs"])]);
        assert_eq!(levels.len(), 2);
    }

    #[test]
    fn no_files_subtask_gets_its_own_isolated_level() {
        // The no-files subtask must NOT pack with the file-declaring ones.
        let levels = partition_by_files(&[st("a", &["x.rs"]), st("nofiles", &[]), st("b", &["y.rs"])]);
        let nofiles_level = levels.iter().find(|l| l.contains(&1)).unwrap();
        assert_eq!(nofiles_level, &vec![1], "no-files subtask is isolated");
    }

    // ---- git plumbing through a real repo ----

    fn git_ok(cwd: &Path, args: &[&str]) {
        let out = Command::new("git").args(args).current_dir(cwd).output().unwrap();
        assert!(out.status.success(), "git {args:?}: {}", String::from_utf8_lossy(&out.stderr));
    }

    fn init_repo() -> tempfile::TempDir {
        let dir = tempfile::tempdir().unwrap();
        let root = dir.path();
        git_ok(root, &["init", "-q", "-b", "main"]);
        git_ok(root, &["config", "user.email", "t@t.t"]);
        git_ok(root, &["config", "user.name", "t"]);
        std::fs::create_dir_all(root.join("src")).unwrap();
        std::fs::write(root.join("src/lib.rs"), "pub fn original() {}\n").unwrap();
        git_ok(root, &["add", "-A"]);
        git_ok(root, &["commit", "-q", "-m", "init"]);
        dir
    }

    #[test]
    fn collect_changes_handles_rename_with_spaces() {
        let repo = init_repo();
        let root = repo.path();
        std::fs::write(root.join("old name.rs"), "pub fn moved() {}\n").unwrap();
        git_ok(root, &["add", "-A"]);
        git_ok(root, &["commit", "-q", "-m", "add"]);
        // Rename it — this is the case the blind parser corrupted.
        git_ok(root, &["mv", "old name.rs", "new name.rs"]);

        let changes = collect_file_changes(root).unwrap();
        let paths: Vec<_> = changes.iter().map(|c| c.path.as_str()).collect();
        assert!(paths.contains(&"old name.rs"), "rename deletion side present: {paths:?}");
        assert!(paths.contains(&"new name.rs"), "rename addition side present: {paths:?}");
        let old = changes.iter().find(|c| c.path == "old name.rs").unwrap();
        assert!(old.before.is_some() && old.after.is_none(), "old path is a deletion");
    }

    struct WriteAgent {
        path: String,
        content: String,
    }

    #[async_trait]
    impl WorktreeAgent for WriteAgent {
        async fn run_in(
            &self,
            req: &WorktreeAgentRequest<'_>,
        ) -> Result<AgentRunSummary, ForemanError> {
            let target = req.cwd.join(&self.path);
            if let Some(parent) = target.parent() {
                std::fs::create_dir_all(parent).ok();
            }
            std::fs::write(target, &self.content).map_err(|e| ForemanError::Agent(e.to_string()))?;
            Ok(AgentRunSummary::default())
        }
    }

    fn cfg(verify: &[&str]) -> FarmOutConfig {
        FarmOutConfig {
            verify_command: Some(verify.iter().map(|s| s.to_string()).collect()),
            ..Default::default()
        }
    }

    #[tokio::test]
    async fn clean_edit_is_verified_through_harness() {
        let repo = init_repo();
        let agent = WriteAgent {
            path: "src/lib.rs".into(),
            content: "pub fn original() {}\npub fn added() {}\n".into(),
        };
        let infra = SharedInfra::new();
        let result = run_farm_out(
            repo.path(),
            &[st("edit", &["src/lib.rs"])],
            &agent,
            &cfg(&["true"]),
            &infra,
        )
        .await;
        let o = &result.outcomes[0];
        assert!(o.error.is_none(), "{o:?}");
        assert!(o.verdict.as_ref().unwrap().is_verified());
        assert!(o.patch.as_ref().unwrap().contains("added"), "patch retained");
    }

    #[test]
    fn default_worktree_base_is_outside_the_repo() {
        let repo = init_repo();
        let root = repo.path();
        let base = default_worktree_base(root);
        assert!(
            !base.starts_with(root),
            "worktree base {base:?} must not be inside repo {root:?}"
        );
        assert!(base.starts_with(std::env::temp_dir()));
        // Deterministic per repo path.
        assert_eq!(base, default_worktree_base(root));
        // git_worktree_at carries the repo so worktrees are of `root`, under `base`.
        let cfg = FarmOutConfig::default();
        let _ = worktree_workspace_config(root, &cfg); // smoke: builds without panic
    }

    #[tokio::test]
    async fn worktrees_are_provisioned_outside_the_repo() {
        // The agent records the worktree cwd it was handed; assert it lives
        // outside the repo so a crash can't leak dirs into the user's checkout.
        struct CwdProbe {
            seen: Arc<std::sync::Mutex<Option<PathBuf>>>,
        }
        #[async_trait]
        impl WorktreeAgent for CwdProbe {
            async fn run_in(
                &self,
                req: &WorktreeAgentRequest<'_>,
            ) -> Result<AgentRunSummary, ForemanError> {
                *self.seen.lock().unwrap() = Some(req.cwd.to_path_buf());
                std::fs::write(req.cwd.join("src/added.rs"), "pub fn a() {}\n")
                    .map_err(|e| ForemanError::Agent(e.to_string()))?;
                Ok(AgentRunSummary::default())
            }
        }

        let repo = init_repo();
        let repo_root = repo.path().canonicalize().unwrap();
        let seen = Arc::new(std::sync::Mutex::new(None));
        let agent = CwdProbe { seen: Arc::clone(&seen) };
        let infra = SharedInfra::new();
        let _ = run_farm_out(
            repo.path(),
            &[st("edit", &["src/added.rs"])],
            &agent,
            &cfg(&["true"]),
            &infra,
        )
        .await;

        let cwd = seen.lock().unwrap().clone().expect("agent ran");
        let cwd = cwd.canonicalize().unwrap_or(cwd);
        assert!(
            !cwd.starts_with(&repo_root),
            "worktree {cwd:?} must be OUTSIDE repo {repo_root:?}"
        );
    }

    #[tokio::test]
    async fn no_verify_waiver_accepts_when_no_build_command() {
        let repo = init_repo();
        let agent = WriteAgent {
            path: "src/added.rs".into(),
            content: "pub fn added() {}\n".into(),
        };
        let infra = SharedInfra::new();

        // No verify command and no waiver → the gate is fail-closed (Inconclusive),
        // so the subtask is NOT accepted.
        let r1 = run_farm_out(
            repo.path(),
            &[st("edit", &["src/added.rs"])],
            &agent,
            &FarmOutConfig::default(),
            &infra,
        )
        .await;
        assert!(
            !r1.outcomes[0].is_accepted(),
            "no command + no waiver must not be accepted: {:?}",
            r1.outcomes[0]
        );

        // Same change, with an explicit waiver → accepted on containment alone,
        // but NOT build-verified.
        let waived = FarmOutConfig {
            no_verify_waiver: Some(NoVerifyWaiver {
                class: "no-build-gate".into(),
                reason: "no reliable build command for this project".into(),
            }),
            ..Default::default()
        };
        let r2 = run_farm_out(
            repo.path(),
            &[st("edit", &["src/added.rs"])],
            &agent,
            &waived,
            &infra,
        )
        .await;
        let o = &r2.outcomes[0];
        assert!(o.is_accepted(), "waiver must yield acceptance: {o:?}");
        assert!(
            !o.verdict.as_ref().unwrap().is_verified(),
            "waiver-based acceptance is not build-verified"
        );
    }

    #[tokio::test]
    async fn progress_streams_started_then_gated_per_subtask() {
        let repo = init_repo();
        // Two file-disjoint subtasks: each writes its own new file (so the gate
        // accepts — `true` always passes — and footprint containment is off).
        let agent = WriteAgent { path: "src/added.rs".into(), content: "pub fn a() {}\n".into() };

        let events: Arc<std::sync::Mutex<Vec<ForemanProgress>>> = Arc::new(std::sync::Mutex::new(Vec::new()));
        let sink: ForemanProgressSink = {
            let events = Arc::clone(&events);
            Arc::new(move |ev| events.lock().unwrap().push(ev))
        };

        let infra = SharedInfra::new();
        let result = run_farm_out_with_progress(
            repo.path(),
            &[st("only", &["src/added.rs"])],
            &agent,
            &cfg(&["true"]),
            &infra,
            sink,
        )
        .await;
        assert!(result.outcomes[0].is_accepted());

        let events = events.lock().unwrap();
        assert_eq!(events.len(), 3, "started + verifying + gated: {events:?}");
        assert!(matches!(
            &events[0],
            ForemanProgress::SubtaskStarted { subtask_id, index: 0, level: 0, total: 1 } if subtask_id == "only"
        ), "first event is started: {:?}", events[0]);
        assert!(matches!(
            &events[1],
            ForemanProgress::SubtaskVerifying { subtask_id } if subtask_id == "only"
        ), "second event is verifying: {:?}", events[1]);
        assert!(matches!(
            &events[2],
            ForemanProgress::SubtaskGated { subtask_id, accepted: true, status } if subtask_id == "only" && status == "accepted"
        ), "third event is an accepted gate: {:?}", events[2]);
    }

    #[tokio::test]
    async fn progress_reports_error_status_when_agent_fails() {
        let repo = init_repo();
        // Agent that always errors before the gate — terminal status is `error`.
        struct FailAgent;
        #[async_trait]
        impl WorktreeAgent for FailAgent {
            async fn run_in(&self, _: &WorktreeAgentRequest<'_>) -> Result<AgentRunSummary, ForemanError> {
                Err(ForemanError::Agent("boom".into()))
            }
        }
        let events: Arc<std::sync::Mutex<Vec<ForemanProgress>>> = Arc::new(std::sync::Mutex::new(Vec::new()));
        let sink: ForemanProgressSink = {
            let events = Arc::clone(&events);
            Arc::new(move |ev| events.lock().unwrap().push(ev))
        };
        let infra = SharedInfra::new();
        let _ = run_farm_out_with_progress(
            repo.path(),
            &[st("boom", &["src/x.rs"])],
            &FailAgent,
            &cfg(&["true"]),
            &infra,
            sink,
        )
        .await;
        let events = events.lock().unwrap();
        assert_eq!(events.len(), 2, "started + gated(error): {events:?}");
        assert!(matches!(
            &events[1],
            ForemanProgress::SubtaskGated { accepted: false, status, .. } if status == "error"
        ), "agent failure surfaces as an error gate: {:?}", events[1]);
    }

    #[tokio::test]
    async fn declared_footprint_containment_rejects_out_of_scope_edit() {
        // Subtask declares it will only write `foo`, but the agent also edits
        // `other` — the gate's containment (fed the DECLARED footprint) rejects.
        let repo = init_repo(); // base src/lib.rs = "pub fn original() {}\n"
        // Seed a second symbol to escape to.
        std::fs::write(
            repo.path().join("src/lib.rs"),
            "pub fn foo() {}\npub fn other() {}\n",
        )
        .unwrap();
        git_ok(repo.path(), &["commit", "-qam", "two fns"]);

        let mut subtask = Subtask::files_only("a", "edit foo", vec!["src/lib.rs".into()]);
        subtask.footprint = Some(car_ast::SymbolFootprint::writing([car_ast::SymbolRef::new(
            "src/lib.rs",
            "foo",
        )]));

        // Agent edits BOTH foo (allowed) and other (NOT allowed).
        let agent = WriteAgent {
            path: "src/lib.rs".into(),
            content: "pub fn foo() -> u8 { 1 }\npub fn other() -> u8 { 2 }\n".into(),
        };
        let infra = SharedInfra::new();
        let result = run_farm_out(repo.path(), &[subtask], &agent, &cfg(&["true"]), &infra).await;
        let verdict = result.outcomes[0].verdict.as_ref().unwrap();
        assert!(
            matches!(verdict, MergeVerdict::Rejected { .. }),
            "out-of-footprint edit must be rejected: {verdict:?}"
        );
        assert!(verdict
            .evidence()
            .containment_violations
            .iter()
            .any(|v| v.changed.symbol == "other"));
    }

    #[tokio::test]
    async fn workspace_failure_is_captured_not_propagated() {
        let dir = tempfile::tempdir().unwrap(); // not a git repo
        let agent = WriteAgent { path: "x".into(), content: String::new() };
        let infra = SharedInfra::new();
        let result = run_farm_out(dir.path(), &[st("x", &["a.rs"])], &agent, &cfg(&["true"]), &infra).await;
        assert!(result.outcomes[0].verdict.is_none());
        assert!(result.outcomes[0].error.is_some());
    }

    #[tokio::test]
    async fn union_integration_catches_cross_subtask_duplicate() {
        // The integration-failure class per-worktree isolation is blind to:
        // both subtasks add `fn foo` to src/lib.rs at DIFFERENT positions. Each
        // worktree builds and is accepted alone. The two diffs are disjoint
        // hunks, so 3-way merge applies BOTH — yielding two `foo` in the union,
        // a duplicate declaration only the union gate can see.
        let repo = init_repo(); // base src/lib.rs = "pub fn original() {}\n"
        let infra = SharedInfra::new();

        let agent_a = WriteAgent {
            path: "src/lib.rs".into(),
            content: "pub fn foo() {}\npub fn original() {}\n".into(), // foo before
        };
        let agent_b = WriteAgent {
            path: "src/lib.rs".into(),
            content: "pub fn original() {}\npub fn foo() {}\n".into(), // foo after
        };

        let a = run_farm_out(repo.path(), &[st("a", &["src/lib.rs"])], &agent_a, &cfg(&["true"]), &infra).await;
        let b = run_farm_out(repo.path(), &[st("b", &["src/lib.rs"])], &agent_b, &cfg(&["true"]), &infra).await;
        // Each is fine in isolation — neither worktree has a duplicate.
        assert!(a.outcomes[0].is_accepted(), "A alone: {:?}", a.outcomes[0].verdict);
        assert!(b.outcomes[0].is_accepted(), "B alone: {:?}", b.outcomes[0].verdict);

        let patches = vec![
            ("a".to_string(), a.outcomes[0].patch.clone().unwrap()),
            ("b".to_string(), b.outcomes[0].patch.clone().unwrap()),
        ];
        let integ = integrate_and_verify(repo.path(), "ab", &patches, &cfg(&["true"]), &infra)
            .await
            .unwrap();
        // The union has two `foo` — must NOT integrate cleanly. (Caught either as
        // a 3-way apply conflict or as a duplicate declaration by the gate.)
        assert!(
            !integ.integrated_cleanly(),
            "union of two subtasks both adding foo must be rejected: {integ:?}"
        );
    }

    #[tokio::test]
    async fn union_surfaces_overlapping_edit_as_apply_conflict() {
        // Both subtasks rewrite the SAME line of src/lib.rs differently. Each is
        // accepted alone. Plain `git apply` (not --3way) must surface the second
        // as a loud apply conflict rather than silently auto-merging — exactly
        // the under-reporting a false-accept benchmark must avoid.
        let repo = init_repo();
        let infra = SharedInfra::new();
        let agent_a = WriteAgent { path: "src/lib.rs".into(), content: "pub fn original() -> u8 { 1 }\n".into() };
        let agent_b = WriteAgent { path: "src/lib.rs".into(), content: "pub fn original() -> u16 { 2 }\n".into() };
        let a = run_farm_out(repo.path(), &[st("a", &["src/lib.rs"])], &agent_a, &cfg(&["true"]), &infra).await;
        let b = run_farm_out(repo.path(), &[st("b", &["src/lib.rs"])], &agent_b, &cfg(&["true"]), &infra).await;
        let patches = vec![
            ("a".to_string(), a.outcomes[0].patch.clone().unwrap()),
            ("b".to_string(), b.outcomes[0].patch.clone().unwrap()),
        ];
        let integ = integrate_and_verify(repo.path(), "ab", &patches, &cfg(&["true"]), &infra)
            .await
            .unwrap();
        assert!(!integ.apply_conflicts.is_empty(), "overlapping edit must conflict loudly: {integ:?}");
        assert!(!integ.integrated_cleanly());
    }

    #[tokio::test]
    async fn union_of_disjoint_subtasks_integrates_cleanly() {
        let repo = init_repo();
        let infra = SharedInfra::new();
        let agent_a = WriteAgent { path: "a.rs".into(), content: "pub fn a() {}\n".into() };
        let agent_b = WriteAgent { path: "b.rs".into(), content: "pub fn b() {}\n".into() };
        let a = run_farm_out(repo.path(), &[st("a", &["a.rs"])], &agent_a, &cfg(&["true"]), &infra).await;
        let b = run_farm_out(repo.path(), &[st("b", &["b.rs"])], &agent_b, &cfg(&["true"]), &infra).await;
        let patches = vec![
            ("a".to_string(), a.outcomes[0].patch.clone().unwrap()),
            ("b".to_string(), b.outcomes[0].patch.clone().unwrap()),
        ];
        let integ = integrate_and_verify(repo.path(), "ab", &patches, &cfg(&["true"]), &infra)
            .await
            .unwrap();
        assert!(integ.integrated_cleanly(), "disjoint union integrates: {integ:?}");
    }

    #[tokio::test]
    async fn union_uses_union_verify_command_not_worktree_command() {
        // Per-worktree gate (verify_command) passes, but the union goal check
        // (union_verify_command) fails → the subtask is accepted in isolation yet
        // the union is rejected. Proves the two gates use different commands.
        let repo = init_repo();
        let infra = SharedInfra::new();
        let agent = WriteAgent {
            path: "src/lib.rs".into(),
            content: "pub fn original() {}\npub fn added() {}\n".into(),
        };
        let config = FarmOutConfig {
            verify_command: Some(vec!["true".into()]),        // per-worktree regression: pass
            union_verify_command: Some(vec!["false".into()]), // union goal: fail
            ..Default::default()
        };
        let r = run_farm_out(repo.path(), &[st("a", &["src/lib.rs"])], &agent, &config, &infra).await;
        assert!(r.outcomes[0].is_accepted(), "per-worktree (true) accepts");

        let patches = vec![("a".to_string(), r.outcomes[0].patch.clone().unwrap())];
        let integ = integrate_and_verify(repo.path(), "u", &patches, &config, &infra)
            .await
            .unwrap();
        assert!(
            !integ.integrated_cleanly(),
            "union must run union_verify_command (false) and reject: {integ:?}"
        );
    }

    #[tokio::test]
    async fn regional_replan_resumes_from_clean_and_delivers() {
        let repo = init_repo();
        let infra = SharedInfra::new();
        // A "clean" patch from a prior accepted subtask: it created keep.rs.
        let keeper = WriteAgent { path: "keep.rs".into(), content: "pub fn keep() {}\n".into() };
        let k = run_farm_out(repo.path(), &[st("keep", &["keep.rs"])], &keeper, &cfg(&["true"]), &infra).await;
        let clean = vec![("keep".to_string(), k.outcomes[0].patch.clone().unwrap())];

        // Regional resumes: applies keep.rs, the agent completes the goal (writes
        // good.txt), gated by a goal check requiring BOTH (clean preserved + done).
        let agent = WriteAgent { path: "good.txt".into(), content: "done".into() };
        let config = FarmOutConfig {
            union_verify_command: Some(vec![
                "sh".into(),
                "-c".into(),
                "test -f good.txt && test -f keep.rs".into(),
            ]),
            ..Default::default()
        };
        let outcome = regional_replan(repo.path(), "finish it", &clean, &agent, &config, &infra)
            .await
            .expect("regional ran");
        assert!(outcome.is_accepted(), "regional delivered clean+region: {outcome:?}");
        let patch = outcome.patch.unwrap();
        assert!(patch.contains("keep.rs"), "clean work preserved in result: {patch}");
        assert!(patch.contains("good.txt"), "region work present: {patch}");
    }

    #[tokio::test]
    async fn regional_replan_bails_when_a_clean_patch_does_not_apply() {
        let repo = init_repo();
        let infra = SharedInfra::new();
        let agent = WriteAgent { path: "good.txt".into(), content: "done".into() };
        // A clean patch that can't apply → the clean set isn't internally
        // consistent → bail (None) so the caller falls back to a whole-goal session.
        let clean = vec![("broken".to_string(), "this is not a valid patch\n".to_string())];
        let outcome =
            regional_replan(repo.path(), "finish it", &clean, &agent, &cfg(&["true"]), &infra).await;
        assert!(outcome.is_none(), "unappliable clean set bails to fallback");
    }

    #[test]
    fn localize_build_failure_picks_subtasks_whose_files_are_named() {
        let mut map = std::collections::HashMap::new();
        map.insert("src/a.rs".to_string(), vec!["a".to_string()]);
        map.insert("src/b.rs".to_string(), vec!["b".to_string()]);
        // Compiler output naming a.rs → only subtask a is implicated.
        let ids = localize_build_failure("error[E0277]: in src/a.rs:42:5\n", &map);
        assert_eq!(ids, vec!["a".to_string()], "localized to the named file's subtask");
        // Output naming no known file → empty (caller keeps the whole set).
        assert!(localize_build_failure("linker error, no file named\n", &map).is_empty());
    }

    #[test]
    fn files_in_patch_parses_target_paths() {
        let patch = "diff --git a/src/foo.rs b/src/foo.rs\n\
                     index e69de29..abc1234 100644\n\
                     --- a/src/foo.rs\n+++ b/src/foo.rs\n\
                     @@ -0,0 +1 @@\n+pub fn foo() {}\n\
                     diff --git a/bar.rs b/bar.rs\n--- a/bar.rs\n+++ b/bar.rs\n";
        assert_eq!(
            files_in_patch(patch),
            vec!["src/foo.rs".to_string(), "bar.rs".to_string()]
        );
    }

    #[tokio::test]
    async fn blame_attributes_apply_conflict_to_subtask_and_files() {
        let repo = init_repo();
        let infra = SharedInfra::new();
        let agent_a = WriteAgent { path: "src/lib.rs".into(), content: "pub fn original() -> u8 { 1 }\n".into() };
        let agent_b = WriteAgent { path: "src/lib.rs".into(), content: "pub fn original() -> u16 { 2 }\n".into() };
        let a = run_farm_out(repo.path(), &[st("a", &["src/lib.rs"])], &agent_a, &cfg(&["true"]), &infra).await;
        let b = run_farm_out(repo.path(), &[st("b", &["src/lib.rs"])], &agent_b, &cfg(&["true"]), &infra).await;
        let patches = vec![
            ("a".to_string(), a.outcomes[0].patch.clone().unwrap()),
            ("b".to_string(), b.outcomes[0].patch.clone().unwrap()),
        ];
        let integ = integrate_and_verify(repo.path(), "ab", &patches, &cfg(&["true"]), &infra)
            .await
            .unwrap();
        let blame = integ.blame.expect("apply conflict produces blame");
        assert_eq!(blame.apply_conflicts.len(), 1, "{blame:?}");
        let c = &blame.apply_conflicts[0];
        assert_eq!(c.subtask_id, "b", "the second patch is the one that conflicts");
        assert!(c.files.contains(&"src/lib.rs".to_string()), "files attributed: {c:?}");
    }

    #[tokio::test]
    async fn blame_carries_union_build_test_failure() {
        let repo = init_repo();
        let infra = SharedInfra::new();
        let agent = WriteAgent {
            path: "src/lib.rs".into(),
            content: "pub fn original() {}\npub fn added() {}\n".into(),
        };
        let config = FarmOutConfig {
            verify_command: Some(vec!["true".into()]),
            union_verify_command: Some(vec!["false".into()]), // union goal fails
            ..Default::default()
        };
        let r = run_farm_out(repo.path(), &[st("a", &["src/lib.rs"])], &agent, &config, &infra).await;
        let patches = vec![("a".to_string(), r.outcomes[0].patch.clone().unwrap())];
        let integ = integrate_and_verify(repo.path(), "u", &patches, &config, &infra)
            .await
            .unwrap();
        let blame = integ.blame.expect("rejected union produces blame");
        let bt = blame.build_test.expect("union build/test failure recorded");
        assert_eq!(bt.code, Some(1), "`false` exits 1: {bt:?}");
        assert_eq!(bt.candidate_subtask_ids, vec!["a".to_string()], "region named: {bt:?}");
    }

    #[tokio::test]
    async fn blame_attributes_duplicate_declaration_to_both_subtasks() {
        // Two subtasks each add `fn dup` to src/lib.rs in disjoint hunks: both
        // patches apply, but the union has a duplicate the gate flags — and blame
        // attributes it to BOTH a and b (the subtasks whose patches touched the file).
        let repo = init_repo();
        let pad = "pub fn original() {}\npub fn p1() {}\npub fn p2() {}\npub fn p3() {}\n";
        std::fs::write(repo.path().join("src/lib.rs"), pad).unwrap();
        git_ok(repo.path(), &["commit", "-qam", "pad"]);
        let infra = SharedInfra::new();
        let agent_a = WriteAgent { path: "src/lib.rs".into(), content: format!("pub fn dup() {{}}\n{pad}") };
        let agent_b = WriteAgent { path: "src/lib.rs".into(), content: format!("{pad}pub fn dup() {{}}\n") };
        let a = run_farm_out(repo.path(), &[st("a", &["src/lib.rs"])], &agent_a, &cfg(&["true"]), &infra).await;
        let b = run_farm_out(repo.path(), &[st("b", &["src/lib.rs"])], &agent_b, &cfg(&["true"]), &infra).await;
        let patches = vec![
            ("a".to_string(), a.outcomes[0].patch.clone().unwrap()),
            ("b".to_string(), b.outcomes[0].patch.clone().unwrap()),
        ];
        let integ = integrate_and_verify(repo.path(), "ab", &patches, &cfg(&["true"]), &infra)
            .await
            .unwrap();
        assert!(integ.apply_conflicts.is_empty(), "disjoint hunks both apply: {integ:?}");
        let blame = integ.blame.expect("duplicate union produces blame");
        let dup = blame
            .duplicate_conflicts
            .iter()
            .find(|d| d.symbol == "dup")
            .unwrap_or_else(|| panic!("duplicate `dup` attributed: {blame:?}"));
        assert_eq!(dup.file, "src/lib.rs");
        let mut ids = dup.candidate_subtask_ids.clone();
        ids.sort();
        assert_eq!(ids, vec!["a".to_string(), "b".to_string()], "both subtasks blamed");
    }
}