use std::future::Future;
use std::path::Path;
use std::collections::HashSet;
use car_ast::{analyze, expand_footprint, FootprintSubtask, ProjectIndex, SymbolFootprint, SymbolRef};
use serde::Deserialize;
use super::harness::{Subtask, FOOTPRINT_BLAST_DEPTH};
#[derive(Debug)]
pub struct DecomposeResult {
pub subtasks: Vec<Subtask>,
pub levels: Vec<Vec<String>>,
pub prefer_single_session: bool,
pub attempts: u32,
pub issues: Vec<String>,
}
impl DecomposeResult {
pub fn is_valid(&self) -> bool {
!self.subtasks.is_empty() && self.issues.is_empty()
}
}
#[derive(Deserialize)]
struct WireRef {
file: String,
symbol: String,
}
#[derive(Deserialize)]
struct WireSubtask {
id: String,
#[serde(default)]
prompt: String,
#[serde(default)]
files: Vec<String>,
#[serde(default)]
writes: Vec<WireRef>,
#[serde(default)]
reads: Vec<WireRef>,
}
#[derive(Deserialize)]
struct WirePlan {
subtasks: Vec<WireSubtask>,
}
pub fn parse_plan(text: &str) -> Result<Vec<Subtask>, String> {
let wire: WirePlan = serde_json::from_str(text)
.or_else(|_| {
let start = text.find('{').ok_or("no JSON object found")?;
let end = text.rfind('}').ok_or("no closing brace")?;
if end <= start {
return Err("malformed JSON span".to_string());
}
serde_json::from_str(&text[start..=end]).map_err(|e| e.to_string())
})
.map_err(|e: String| format!("parse failed: {e}"))?;
if wire.subtasks.is_empty() {
return Err("plan has no subtasks".to_string());
}
let mut seen = HashSet::new();
for w in &wire.subtasks {
if !seen.insert(w.id.as_str()) {
return Err(format!("duplicate subtask id '{}'", w.id));
}
}
let subtasks = wire
.subtasks
.into_iter()
.map(|w| {
let footprint = if w.writes.is_empty() && w.reads.is_empty() {
None
} else {
Some(SymbolFootprint {
writes: w
.writes
.iter()
.map(|r| SymbolRef::new(r.file.clone(), r.symbol.clone()))
.collect(),
reads: w
.reads
.iter()
.map(|r| SymbolRef::new(r.file.clone(), r.symbol.clone()))
.collect(),
uncertain: false,
})
};
let files = if w.files.is_empty() {
let mut fs: Vec<String> =
w.writes.iter().chain(&w.reads).map(|r| r.file.clone()).collect();
fs.sort();
fs.dedup();
fs
} else {
w.files
};
Subtask {
id: w.id,
prompt: w.prompt,
files,
footprint,
}
})
.collect();
Ok(subtasks)
}
fn evaluate(index: &ProjectIndex, subtasks: &[Subtask]) -> (Vec<Vec<String>>, Vec<String>) {
let fsubs: Vec<FootprintSubtask> = subtasks
.iter()
.filter_map(|s| {
s.footprint.as_ref().map(|fp| FootprintSubtask {
id: s.id.clone(),
footprint: expand_footprint(&index, fp, FOOTPRINT_BLAST_DEPTH),
})
})
.collect();
if fsubs.len() != subtasks.len() {
return (Vec::new(), Vec::new());
}
let plan = analyze(&fsubs);
let conflicts = plan
.conflicts
.iter()
.map(|(a, b)| format!("subtasks '{a}' and '{b}' write overlapping symbols — split, merge, or hoist a shared contract"))
.collect();
(plan.levels, conflicts)
}
pub async fn decompose<F, Fut>(
repo_root: &Path,
goal: &str,
max_attempts: u32,
generate: F,
) -> DecomposeResult
where
F: Fn(String) -> Fut,
Fut: Future<Output = Result<String, String>>,
{
let max_attempts = max_attempts.max(1);
let mut issues: Vec<String> = Vec::new();
let mut attempts = 0;
let index = ProjectIndex::build(repo_root);
while attempts < max_attempts {
attempts += 1;
let prompt = build_prompt(goal, &issues);
let raw = match generate(prompt).await {
Ok(r) => r,
Err(e) => {
issues = vec![format!("generation failed: {e}")];
continue;
}
};
let subtasks = match parse_plan(&raw) {
Ok(s) => s,
Err(e) => {
issues = vec![e];
continue;
}
};
let missing: Vec<&str> = subtasks
.iter()
.filter(|s| s.footprint.is_none())
.map(|s| s.id.as_str())
.collect();
if !missing.is_empty() {
issues = vec![format!(
"subtasks {missing:?} declared no writes/reads; every subtask must declare its symbol footprint"
)];
continue;
}
let (levels, conflicts) = evaluate(&index, &subtasks);
if !conflicts.is_empty() {
issues = conflicts;
continue;
}
let prefer_single_session = subtasks.len() <= 1
|| (!levels.is_empty() && levels.iter().all(|l| l.len() <= 1));
return DecomposeResult {
subtasks,
levels,
prefer_single_session,
attempts,
issues: Vec::new(),
};
}
DecomposeResult {
subtasks: Vec::new(),
levels: Vec::new(),
prefer_single_session: true,
attempts,
issues,
}
}
fn build_prompt(goal: &str, prior_issues: &[String]) -> String {
let mut p = String::new();
p.push_str("Decompose this coding goal into independent subtasks. Emit JSON:\n");
p.push_str(
"{\"subtasks\":[{\"id\":\"...\",\"prompt\":\"...\",\"writes\":[{\"file\":\"path\",\"symbol\":\"name\"}],\"reads\":[...]}]}\n",
);
p.push_str("Each subtask declares the symbols it WRITES (defines/modifies) and READS.\n");
p.push_str("Rules:\n");
p.push_str(
"- Use the FEWEST subtasks that cover the goal. Do NOT add scaffolding/setup/registry subtasks — module declarations, imports, and wiring already exist or belong to the subtask that needs them.\n",
);
p.push_str(
"- Exactly ONE subtask WRITES each symbol. If subtask B uses a symbol that subtask A defines, put that symbol in B's `reads` (NOT B's `writes`).\n",
);
p.push_str(
"- If two subtasks would have to modify the SAME symbol, they are not independent — merge them into one subtask.\n",
);
p.push_str("- One file per subtask is a good default.\n");
p.push_str(
"Example — goal \"add `parse()` in util.rs, and `total()` in lib.rs that calls `parse()`\":\n",
);
p.push_str(
" {\"subtasks\":[{\"id\":\"parse\",\"writes\":[{\"file\":\"util.rs\",\"symbol\":\"parse\"}]},{\"id\":\"total\",\"writes\":[{\"file\":\"lib.rs\",\"symbol\":\"total\"}],\"reads\":[{\"file\":\"util.rs\",\"symbol\":\"parse\"}]}]}\n",
);
p.push_str(
" `total` calls `parse`, so `parse` is in `total`'s READS — NOT its writes. Only `parse`'s own subtask writes it.\n\nGOAL: ",
);
p.push_str(goal);
if !prior_issues.is_empty() {
p.push_str("\n\nFix these problems from the previous attempt:\n");
for issue in prior_issues {
p.push_str("- ");
p.push_str(issue);
p.push('\n');
}
}
p
}
#[cfg(test)]
mod tests {
use super::*;
fn repo() -> tempfile::TempDir {
let dir = tempfile::tempdir().unwrap();
let root = dir.path();
for args in [
vec!["init", "-q", "-b", "main"],
vec!["config", "user.email", "t@t.t"],
vec!["config", "user.name", "t"],
] {
std::process::Command::new("git").args(&args).current_dir(root).output().unwrap();
}
std::fs::create_dir_all(root.join("src")).unwrap();
std::fs::write(root.join("src/a.rs"), "pub fn a() {}\n").unwrap();
std::fs::write(root.join("src/b.rs"), "pub fn b() {}\n").unwrap();
std::process::Command::new("git").args(["add", "-A"]).current_dir(root).output().unwrap();
std::process::Command::new("git").args(["commit", "-qm", "base"]).current_dir(root).output().unwrap();
dir
}
#[test]
fn parse_plan_extracts_footprints() {
let text = r#"prose... {"subtasks":[
{"id":"x","prompt":"do x","writes":[{"file":"src/a.rs","symbol":"a"}]},
{"id":"y","prompt":"do y","reads":[{"file":"src/a.rs","symbol":"a"}]}
]} trailing"#;
let subs = parse_plan(text).unwrap();
assert_eq!(subs.len(), 2);
assert!(subs[0].footprint.as_ref().unwrap().writes.iter().any(|r| r.symbol == "a"));
assert_eq!(subs[0].files, vec!["src/a.rs".to_string()]);
}
#[tokio::test]
async fn decompose_accepts_disjoint_plan() {
let dir = repo();
let json = r#"{"subtasks":[
{"id":"x","prompt":"edit a","writes":[{"file":"src/a.rs","symbol":"a"}]},
{"id":"y","prompt":"edit b","writes":[{"file":"src/b.rs","symbol":"b"}]}
]}"#;
let result = decompose(dir.path(), "do both", 3, |_p| {
let j = json.to_string();
async move { Ok(j) }
})
.await;
assert!(result.is_valid(), "{result:?}");
assert_eq!(result.subtasks.len(), 2);
assert!(!result.prefer_single_session, "{:?}", result.levels);
}
#[tokio::test]
async fn decompose_repairs_conflicting_plan_then_gives_up() {
let dir = repo();
let bad = r#"{"subtasks":[
{"id":"x","prompt":"p","writes":[{"file":"src/a.rs","symbol":"a"}]},
{"id":"y","prompt":"q","writes":[{"file":"src/a.rs","symbol":"a"}]}
]}"#;
let result = decompose(dir.path(), "g", 3, |_p| {
let j = bad.to_string();
async move { Ok(j) }
})
.await;
assert_eq!(result.attempts, 3, "retried on conflict");
assert!(!result.is_valid());
assert!(result.prefer_single_session);
assert!(result.issues.iter().any(|i| i.contains("overlapping")));
}
#[test]
fn parse_plan_rejects_duplicate_ids() {
let text = r#"{"subtasks":[
{"id":"x","writes":[{"file":"a.rs","symbol":"a"}]},
{"id":"x","writes":[{"file":"b.rs","symbol":"b"}]}
]}"#;
let err = parse_plan(text).unwrap_err();
assert!(err.contains("duplicate subtask id"), "{err}");
}
#[tokio::test]
async fn decompose_treats_missing_footprint_as_repairworthy() {
let dir = repo();
let no_fp = r#"{"subtasks":[{"id":"x","prompt":"p"}]}"#;
let result = decompose(dir.path(), "g", 2, |_p| {
let j = no_fp.to_string();
async move { Ok(j) }
})
.await;
assert!(!result.is_valid());
assert!(result.issues.iter().any(|i| i.contains("declared no writes/reads")));
}
#[tokio::test]
async fn decompose_repairs_bad_json_then_succeeds() {
let dir = repo();
let calls = std::sync::atomic::AtomicU32::new(0);
let good = r#"{"subtasks":[{"id":"x","prompt":"p","writes":[{"file":"src/a.rs","symbol":"a"}]}]}"#;
let result = decompose(dir.path(), "g", 3, |_p| {
let n = calls.fetch_add(1, std::sync::atomic::Ordering::SeqCst);
let good = good.to_string();
async move {
if n == 0 {
Ok("not json".to_string())
} else {
Ok(good)
}
}
})
.await;
assert!(result.is_valid(), "{result:?}");
assert_eq!(result.attempts, 2, "first attempt bad json, second good");
assert!(result.prefer_single_session);
}
}