car-multi 0.26.0

//! B5 — the decomposition planner.
//!
//! Turns a natural-language goal into footprint-annotated [`Subtask`]s and a
//! parallel schedule. Follows `car-builder`'s injected-generate pattern: the
//! caller supplies a `generate` closure (tests pass a fake; the daemon passes
//! inference), and the planner runs prompt → generate → parse → verify → repair.
//! The *verify* step is the B4 footprint analyzer: if the model double-assigned a
//! symbol (two subtasks declaring the same write), that surfaces as a conflict
//! and is fed back for repair.
//!
//! Planning never bypasses the merge-verify gate — a produced plan is only a
//! *proposal*; correctness is still established per-subtask and at the union by
//! the gate during [`run_farm_out`](super::harness::run_farm_out). The planner's
//! decomposability check is purely advisory: when no real parallelism is found
//! it recommends a single session instead.

use std::future::Future;
use std::path::Path;

use std::collections::HashSet;

use car_ast::{analyze, expand_footprint, FootprintSubtask, ProjectIndex, SymbolFootprint, SymbolRef};
use serde::Deserialize;

use super::harness::{Subtask, FOOTPRINT_BLAST_DEPTH};

/// Outcome of decomposing a goal.
#[derive(Debug)]
pub struct DecomposeResult {
    /// Footprint-annotated subtasks (empty if no valid plan was produced).
    pub subtasks: Vec<Subtask>,
    /// The scheduled levels (subtask ids) the footprint analyzer produced.
    pub levels: Vec<Vec<String>>,
    /// `true` when farming out buys no parallel speedup (≤1 subtask, or every
    /// level is a single subtask). Advisory only — a serialized multi-subtask
    /// plan can still be worth farming out (context isolation, per-subtask model
    /// choice, checkpointing), so the caller may override.
    pub prefer_single_session: bool,
    pub attempts: u32,
    /// Parse / decomposition issues from the final attempt (empty on success).
    pub issues: Vec<String>,
}

impl DecomposeResult {
    pub fn is_valid(&self) -> bool {
        !self.subtasks.is_empty() && self.issues.is_empty()
    }
}

// ---- wire shape the model emits ----

#[derive(Deserialize)]
struct WireRef {
    file: String,
    symbol: String,
}

#[derive(Deserialize)]
struct WireSubtask {
    id: String,
    #[serde(default)]
    prompt: String,
    #[serde(default)]
    files: Vec<String>,
    #[serde(default)]
    writes: Vec<WireRef>,
    #[serde(default)]
    reads: Vec<WireRef>,
}

#[derive(Deserialize)]
struct WirePlan {
    subtasks: Vec<WireSubtask>,
}

/// Parse a model response into footprint-annotated subtasks. Tolerant of
/// markdown fences / preamble: tries a direct parse, then the first `{...}`
/// block.
pub fn parse_plan(text: &str) -> Result<Vec<Subtask>, String> {
    let wire: WirePlan = serde_json::from_str(text)
        .or_else(|_| {
            let start = text.find('{').ok_or("no JSON object found")?;
            let end = text.rfind('}').ok_or("no closing brace")?;
            if end <= start {
                return Err("malformed JSON span".to_string());
            }
            serde_json::from_str(&text[start..=end]).map_err(|e| e.to_string())
        })
        .map_err(|e: String| format!("parse failed: {e}"))?;

    if wire.subtasks.is_empty() {
        return Err("plan has no subtasks".to_string());
    }
    let mut seen = HashSet::new();
    for w in &wire.subtasks {
        if !seen.insert(w.id.as_str()) {
            return Err(format!("duplicate subtask id '{}'", w.id));
        }
    }

    let subtasks = wire
        .subtasks
        .into_iter()
        .map(|w| {
            let footprint = if w.writes.is_empty() && w.reads.is_empty() {
                None
            } else {
                Some(SymbolFootprint {
                    writes: w
                        .writes
                        .iter()
                        .map(|r| SymbolRef::new(r.file.clone(), r.symbol.clone()))
                        .collect(),
                    reads: w
                        .reads
                        .iter()
                        .map(|r| SymbolRef::new(r.file.clone(), r.symbol.clone()))
                        .collect(),
                    uncertain: false,
                })
            };
            // Files default to the union of footprint file paths when omitted, so
            // the dumb partitioner still has a key if footprints are dropped.
            let files = if w.files.is_empty() {
                let mut fs: Vec<String> =
                    w.writes.iter().chain(&w.reads).map(|r| r.file.clone()).collect();
                fs.sort();
                fs.dedup();
                fs
            } else {
                w.files
            };
            Subtask {
                id: w.id,
                prompt: w.prompt,
                files,
                footprint,
            }
        })
        .collect();
    Ok(subtasks)
}

/// Schedule the plan and detect double-assigned symbols (two subtasks declaring
/// the same write — a planner mistake the gate would later catch, but cheaper to
/// fix here). Returns `(levels, conflicts)`.
fn evaluate(index: &ProjectIndex, subtasks: &[Subtask]) -> (Vec<Vec<String>>, Vec<String>) {
    let fsubs: Vec<FootprintSubtask> = subtasks
        .iter()
        .filter_map(|s| {
            s.footprint.as_ref().map(|fp| FootprintSubtask {
                id: s.id.clone(),
                footprint: expand_footprint(&index, fp, FOOTPRINT_BLAST_DEPTH),
            })
        })
        .collect();
    if fsubs.len() != subtasks.len() {
        // Some subtasks have no footprint — can't analyze symbolically; let the
        // harness fall back to file partitioning, no conflicts surfaced here.
        return (Vec::new(), Vec::new());
    }
    let plan = analyze(&fsubs);
    let conflicts = plan
        .conflicts
        .iter()
        .map(|(a, b)| format!("subtasks '{a}' and '{b}' write overlapping symbols — split, merge, or hoist a shared contract"))
        .collect();
    (plan.levels, conflicts)
}

/// Decompose `goal` into footprint-annotated subtasks. `generate(prompt)`
/// returns the model's raw response. Repairs up to `max_attempts` times when the
/// response won't parse or the declared footprints conflict.
pub async fn decompose<F, Fut>(
    repo_root: &Path,
    goal: &str,
    max_attempts: u32,
    generate: F,
) -> DecomposeResult
where
    F: Fn(String) -> Fut,
    Fut: Future<Output = Result<String, String>>,
{
    let max_attempts = max_attempts.max(1);
    let mut issues: Vec<String> = Vec::new();
    let mut attempts = 0;
    // Build the symbol index ONCE — the repo doesn't change across repair
    // attempts, so rebuilding it per attempt just re-pays the index budget (the
    // 5s/5000-file wall) for nothing.
    let index = ProjectIndex::build(repo_root);

    while attempts < max_attempts {
        attempts += 1;
        let prompt = build_prompt(goal, &issues);
        let raw = match generate(prompt).await {
            Ok(r) => r,
            Err(e) => {
                issues = vec![format!("generation failed: {e}")];
                continue;
            }
        };
        let subtasks = match parse_plan(&raw) {
            Ok(s) => s,
            Err(e) => {
                issues = vec![e];
                continue;
            }
        };
        // Every subtask must declare a footprint. A missing one silently disables
        // both symbolic scheduling and the conflict pre-check, so treat it as a
        // repair-worthy defect rather than accepting a half-analyzed plan.
        let missing: Vec<&str> = subtasks
            .iter()
            .filter(|s| s.footprint.is_none())
            .map(|s| s.id.as_str())
            .collect();
        if !missing.is_empty() {
            issues = vec![format!(
                "subtasks {missing:?} declared no writes/reads; every subtask must declare its symbol footprint"
            )];
            continue;
        }
        let (levels, conflicts) = evaluate(&index, &subtasks);
        if !conflicts.is_empty() {
            issues = conflicts;
            continue;
        }

        // Success.
        let prefer_single_session = subtasks.len() <= 1
            || (!levels.is_empty() && levels.iter().all(|l| l.len() <= 1));
        return DecomposeResult {
            subtasks,
            levels,
            prefer_single_session,
            attempts,
            issues: Vec::new(),
        };
    }

    DecomposeResult {
        subtasks: Vec::new(),
        levels: Vec::new(),
        prefer_single_session: true,
        attempts,
        issues,
    }
}

fn build_prompt(goal: &str, prior_issues: &[String]) -> String {
    let mut p = String::new();
    p.push_str("Decompose this coding goal into independent subtasks. Emit JSON:\n");
    p.push_str(
        "{\"subtasks\":[{\"id\":\"...\",\"prompt\":\"...\",\"writes\":[{\"file\":\"path\",\"symbol\":\"name\"}],\"reads\":[...]}]}\n",
    );
    p.push_str("Each subtask declares the symbols it WRITES (defines/modifies) and READS.\n");
    p.push_str("Rules:\n");
    // Curb over-decomposition: the model invented scaffolding/"registry"
    // subtasks (e.g. one just for `pub mod ...;` wiring), which collide with the
    // real subtasks. Fewer subtasks, no scaffolding.
    p.push_str(
        "- Use the FEWEST subtasks that cover the goal. Do NOT add scaffolding/setup/registry subtasks — module declarations, imports, and wiring already exist or belong to the subtask that needs them.\n",
    );
    // The conflict-vs-dependency distinction is the whole point: a symbol one
    // subtask creates and another uses is a READ on the second, not a duplicate
    // WRITE. Exactly ONE subtask writes each symbol.
    p.push_str(
        "- Exactly ONE subtask WRITES each symbol. If subtask B uses a symbol that subtask A defines, put that symbol in B's `reads` (NOT B's `writes`).\n",
    );
    p.push_str(
        "- If two subtasks would have to modify the SAME symbol, they are not independent — merge them into one subtask.\n",
    );
    p.push_str("- One file per subtask is a good default.\n");
    // Worked example of the read-vs-write distinction — the stubborn case where
    // the model declares a *consumed* symbol as a write. Calling a function is a
    // READ of it, not a WRITE.
    p.push_str(
        "Example — goal \"add `parse()` in util.rs, and `total()` in lib.rs that calls `parse()`\":\n",
    );
    p.push_str(
        "  {\"subtasks\":[{\"id\":\"parse\",\"writes\":[{\"file\":\"util.rs\",\"symbol\":\"parse\"}]},{\"id\":\"total\",\"writes\":[{\"file\":\"lib.rs\",\"symbol\":\"total\"}],\"reads\":[{\"file\":\"util.rs\",\"symbol\":\"parse\"}]}]}\n",
    );
    p.push_str(
        "  `total` calls `parse`, so `parse` is in `total`'s READS — NOT its writes. Only `parse`'s own subtask writes it.\n\nGOAL: ",
    );
    p.push_str(goal);
    if !prior_issues.is_empty() {
        p.push_str("\n\nFix these problems from the previous attempt:\n");
        for issue in prior_issues {
            p.push_str("- ");
            p.push_str(issue);
            p.push('\n');
        }
    }
    p
}

#[cfg(test)]
mod tests {
    use super::*;

    fn repo() -> tempfile::TempDir {
        let dir = tempfile::tempdir().unwrap();
        let root = dir.path();
        for args in [
            vec!["init", "-q", "-b", "main"],
            vec!["config", "user.email", "t@t.t"],
            vec!["config", "user.name", "t"],
        ] {
            std::process::Command::new("git").args(&args).current_dir(root).output().unwrap();
        }
        std::fs::create_dir_all(root.join("src")).unwrap();
        std::fs::write(root.join("src/a.rs"), "pub fn a() {}\n").unwrap();
        std::fs::write(root.join("src/b.rs"), "pub fn b() {}\n").unwrap();
        std::process::Command::new("git").args(["add", "-A"]).current_dir(root).output().unwrap();
        std::process::Command::new("git").args(["commit", "-qm", "base"]).current_dir(root).output().unwrap();
        dir
    }

    #[test]
    fn parse_plan_extracts_footprints() {
        let text = r#"prose... {"subtasks":[
            {"id":"x","prompt":"do x","writes":[{"file":"src/a.rs","symbol":"a"}]},
            {"id":"y","prompt":"do y","reads":[{"file":"src/a.rs","symbol":"a"}]}
        ]} trailing"#;
        let subs = parse_plan(text).unwrap();
        assert_eq!(subs.len(), 2);
        assert!(subs[0].footprint.as_ref().unwrap().writes.iter().any(|r| r.symbol == "a"));
        assert_eq!(subs[0].files, vec!["src/a.rs".to_string()]);
    }

    #[tokio::test]
    async fn decompose_accepts_disjoint_plan() {
        let dir = repo();
        let json = r#"{"subtasks":[
            {"id":"x","prompt":"edit a","writes":[{"file":"src/a.rs","symbol":"a"}]},
            {"id":"y","prompt":"edit b","writes":[{"file":"src/b.rs","symbol":"b"}]}
        ]}"#;
        let result = decompose(dir.path(), "do both", 3, |_p| {
            let j = json.to_string();
            async move { Ok(j) }
        })
        .await;
        assert!(result.is_valid(), "{result:?}");
        assert_eq!(result.subtasks.len(), 2);
        // Disjoint writes → one parallel level → parallelism is worth it.
        assert!(!result.prefer_single_session, "{:?}", result.levels);
    }

    #[tokio::test]
    async fn decompose_repairs_conflicting_plan_then_gives_up() {
        let dir = repo();
        // Always returns two subtasks writing the SAME symbol → conflict every
        // attempt → exhausts attempts and recommends single session.
        let bad = r#"{"subtasks":[
            {"id":"x","prompt":"p","writes":[{"file":"src/a.rs","symbol":"a"}]},
            {"id":"y","prompt":"q","writes":[{"file":"src/a.rs","symbol":"a"}]}
        ]}"#;
        let result = decompose(dir.path(), "g", 3, |_p| {
            let j = bad.to_string();
            async move { Ok(j) }
        })
        .await;
        assert_eq!(result.attempts, 3, "retried on conflict");
        assert!(!result.is_valid());
        assert!(result.prefer_single_session);
        assert!(result.issues.iter().any(|i| i.contains("overlapping")));
    }

    #[test]
    fn parse_plan_rejects_duplicate_ids() {
        let text = r#"{"subtasks":[
            {"id":"x","writes":[{"file":"a.rs","symbol":"a"}]},
            {"id":"x","writes":[{"file":"b.rs","symbol":"b"}]}
        ]}"#;
        let err = parse_plan(text).unwrap_err();
        assert!(err.contains("duplicate subtask id"), "{err}");
    }

    #[tokio::test]
    async fn decompose_treats_missing_footprint_as_repairworthy() {
        let dir = repo();
        // A subtask with neither writes nor reads → no footprint → must be
        // repaired, not silently accepted as a half-analyzed plan.
        let no_fp = r#"{"subtasks":[{"id":"x","prompt":"p"}]}"#;
        let result = decompose(dir.path(), "g", 2, |_p| {
            let j = no_fp.to_string();
            async move { Ok(j) }
        })
        .await;
        assert!(!result.is_valid());
        assert!(result.issues.iter().any(|i| i.contains("declared no writes/reads")));
    }

    #[tokio::test]
    async fn decompose_repairs_bad_json_then_succeeds() {
        let dir = repo();
        let calls = std::sync::atomic::AtomicU32::new(0);
        let good = r#"{"subtasks":[{"id":"x","prompt":"p","writes":[{"file":"src/a.rs","symbol":"a"}]}]}"#;
        let result = decompose(dir.path(), "g", 3, |_p| {
            let n = calls.fetch_add(1, std::sync::atomic::Ordering::SeqCst);
            let good = good.to_string();
            async move {
                if n == 0 {
                    Ok("not json".to_string())
                } else {
                    Ok(good)
                }
            }
        })
        .await;
        assert!(result.is_valid(), "{result:?}");
        assert_eq!(result.attempts, 2, "first attempt bad json, second good");
        // Single subtask → single session recommended.
        assert!(result.prefer_single_session);
    }
}