use serde::{Deserialize, Serialize};
use serde_json::Value;
use std::future::Future;
use super::session::{CoderEventKind, EventSink};
use super::shell_tool::WorktreeExecutor;
fn default_true() -> bool {
true
}
fn default_check_timeout() -> u64 {
120
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct OutcomeContract {
pub description: String,
pub checks: Vec<ContractCheck>,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct ContractCheck {
pub name: String,
pub command: String,
#[serde(default = "default_true")]
pub expect_exit_zero: bool,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub output_contains: Option<String>,
#[serde(default = "default_check_timeout")]
pub timeout_secs: u64,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct CheckResult {
pub name: String,
pub passed: bool,
pub exit_code: Option<i64>,
pub output_tail: String,
pub duration_ms: u64,
}
impl OutcomeContract {
pub fn validate(&self) -> Vec<String> {
let mut issues = Vec::new();
if self.checks.is_empty() {
issues.push("contract has no checks — at least one is required".to_string());
}
let mut seen = std::collections::HashSet::new();
for (i, c) in self.checks.iter().enumerate() {
let name = c.name.trim();
if name.is_empty() {
issues.push(format!("check #{i} has an empty name"));
}
if name == "unique_snake_case_label" {
issues.push(format!(
"check #{i} kept the literal placeholder name \
'unique_snake_case_label' — give it a real descriptive label"
));
}
if c.command.trim().is_empty() {
issues.push(format!("check '{}' has an empty command", c.name));
} else if is_toolchain_only(c.command.trim()) {
issues.push(format!(
"check '{}' runs a toolchain-only no-op (`{}`) that verifies the \
tool is installed, not the task — replace it with a command that \
exercises the actual change",
c.name,
c.command.trim()
));
}
if !seen.insert(name.to_string()) {
issues.push(format!("duplicate check name '{}'", c.name));
}
if !c.expect_exit_zero && c.output_contains.is_none() {
issues.push(format!(
"check '{}' asserts nothing (expect_exit_zero=false and no output_contains)",
c.name
));
}
}
issues
}
pub fn render(&self) -> String {
let mut out = format!("{}\nChecks:\n", self.description.trim());
for c in &self.checks {
out.push_str(&format!("- {}: `{}`", c.name, c.command));
let mut expects = Vec::new();
if c.expect_exit_zero {
expects.push("exit 0".to_string());
}
if let Some(s) = &c.output_contains {
expects.push(format!("output contains {s:?}"));
}
if !expects.is_empty() {
out.push_str(&format!(" (expects {})", expects.join(", ")));
}
out.push('\n');
}
out
}
}
fn is_toolchain_only(command: &str) -> bool {
if command.contains("&&")
|| command.contains("||")
|| command.contains('|')
|| command.contains(';')
|| command.contains('\n')
{
return false;
}
let tokens: Vec<&str> = command.split_whitespace().collect();
let [tool, flag] = tokens.as_slice() else {
return false;
};
const TOOLS: &[&str] = &[
"cargo", "rustc", "rustup", "node", "npm", "npx", "yarn", "pnpm", "python", "python3",
"pip", "pip3", "go", "java", "javac", "ruby", "gem", "dotnet", "deno", "bun", "tsc", "gcc",
"clang", "make", "cmake",
];
const FLAGS: &[&str] = &["--version", "-V", "-v", "--help", "-h", "version"];
TOOLS.contains(tool) && FLAGS.contains(flag)
}
fn build_contract_prompt(intent: &str, repo_summary: &str, issues: &[String]) -> String {
let mut p = format!(
"You are deriving an OUTCOME CONTRACT for a coding task: a small set of shell \
commands that objectively verify the task is done. The commands run at the root of a \
fresh git checkout of the repository, non-interactively, with no TTY.\n\n\
Task intent:\n{intent}\n\n\
Repository summary:\n{repo_summary}\n\n\
Respond with ONLY a JSON object, no prose, no markdown fences, in this shape:\n\
{{\n \"description\": \"one-sentence definition of done\",\n \"checks\": [\n \
{{\"name\": \"unique_snake_case_label\", \"command\": \"shell command\", \
\"expect_exit_zero\": true, \"output_contains\": null, \"timeout_secs\": 120}}\n ]\n}}\n\n\
Rules:\n\
- 1 to 5 checks. Each must verify THE TASK ITSELF, not just that the toolchain works \
(e.g. `rustc --version` or `cargo --version` prove nothing about the change).\n\
- At least one check should exercise the actual new behaviour the intent describes \
(run the program/test that the change affects).\n\
- `name` must be a real, descriptive snake_case label unique within the contract — \
never the literal placeholder `unique_snake_case_label`.\n\
- Every command must run non-interactively and deterministically (no prompts, no \
watchers, no servers that don't exit). Use the repo's own build/test commands when \
the summary reveals them — a build that must compile the change is a strong check.\n\
- `expect_exit_zero: true` (the default) is usually enough. Only set `output_contains` \
to a substring you are CERTAIN will appear verbatim in stdout/stderr; if unsure, \
leave it null. Do NOT invent example output or placeholder values.\n\
- Never use git push, network access, sudo, or anything destructive outside the \
checkout. Timeouts are in seconds; keep them realistic for a build.\n"
);
if !issues.is_empty() {
p.push_str("\nYour previous attempt FAILED validation with these issues — fix them:\n");
for i in issues {
p.push_str(&format!("- {i}\n"));
}
}
p
}
pub(crate) fn extract_json_object(text: &str) -> Result<Value, String> {
let start = text.find('{').ok_or("no JSON object found in output")?;
let end = text.rfind('}').ok_or("no closing brace found in output")?;
if end < start {
return Err("malformed JSON object in output".to_string());
}
serde_json::from_str(&text[start..=end]).map_err(|e| format!("invalid JSON: {e}"))
}
pub async fn derive_contract<F, Fut>(
generate: F,
intent: &str,
repo_summary: &str,
max_attempts: u32,
) -> Result<OutcomeContract, String>
where
F: Fn(String) -> Fut + Send + Sync,
Fut: Future<Output = Result<String, String>> + Send,
{
let max = max_attempts.max(1);
let mut issues: Vec<String> = Vec::new();
let mut last_err = String::new();
for _ in 0..max {
let prompt = build_contract_prompt(intent, repo_summary, &issues);
let text = match generate(prompt).await {
Ok(t) => t,
Err(e) => {
last_err = format!("generation failed: {e}");
continue;
}
};
let value = match extract_json_object(&text) {
Ok(v) => v,
Err(e) => {
issues = vec![format!("output did not parse: {e}. Return ONLY the JSON object.")];
last_err = issues.join("; ");
continue;
}
};
let contract: OutcomeContract = match serde_json::from_value(value) {
Ok(c) => c,
Err(e) => {
issues = vec![format!("JSON did not match the contract schema: {e}")];
last_err = issues.join("; ");
continue;
}
};
let problems = contract.validate();
if problems.is_empty() {
return Ok(contract);
}
last_err = problems.join("; ");
issues = problems;
}
Err(format!(
"could not derive a valid outcome contract after {max} attempts: {last_err}"
))
}
pub async fn evaluate_contract(
contract: &OutcomeContract,
executor: &WorktreeExecutor,
sink: &EventSink,
) -> Vec<CheckResult> {
let mut results = Vec::with_capacity(contract.checks.len());
for check in &contract.checks {
sink.emit(CoderEventKind::CheckStarted {
name: check.name.clone(),
});
let started = std::time::Instant::now();
let outcome = executor
.run_shell(&check.command, Some(check.timeout_secs))
.await;
let duration_ms = started.elapsed().as_millis() as u64;
let result = match outcome {
Ok(v) => {
let exit_code = v.get("exit_code").and_then(Value::as_i64);
let output = v.get("output").and_then(Value::as_str).unwrap_or_default();
let timed_out = v
.get("timed_out")
.and_then(Value::as_bool)
.unwrap_or(false);
let exit_ok = !check.expect_exit_zero || exit_code == Some(0);
let contains_ok = check
.output_contains
.as_deref()
.map(|needle| output.contains(needle))
.unwrap_or(true);
CheckResult {
name: check.name.clone(),
passed: exit_ok && contains_ok && !timed_out,
exit_code,
output_tail: super::shell_tool::tail(output, 4 * 1024),
duration_ms,
}
}
Err(e) => CheckResult {
name: check.name.clone(),
passed: false,
exit_code: None,
output_tail: format!("check failed to run: {e}"),
duration_ms,
},
};
sink.emit(CoderEventKind::CheckCompleted {
result: result.clone(),
});
results.push(result);
}
results
}
#[cfg(test)]
mod tests {
use super::*;
use std::sync::atomic::{AtomicUsize, Ordering};
const VALID: &str = r#"{
"description": "file exists",
"checks": [{"name": "exists", "command": "test -f x.txt"}]
}"#;
#[test]
fn prompt_steers_toward_verifying_the_task_and_real_labels() {
let p = build_contract_prompt(
"add a --version flag",
"Top-level entries: Cargo.toml, src\nBuild systems detected: Rust (cargo)",
&[],
);
assert!(p.contains("add a --version flag"));
assert!(p.contains("Rust (cargo)"));
assert!(p.contains("verify THE TASK ITSELF"));
assert!(p.contains("rustc --version"), "names the toolchain-only anti-pattern");
assert!(p.contains("never the literal placeholder"));
assert!(p.contains("non-interactively"));
assert!(p.contains("CERTAIN will appear"), "output_contains caution present");
assert!(p.contains("no markdown fences"));
}
#[test]
fn repair_prompt_appends_prior_issues() {
let p = build_contract_prompt("t", "r", &["check 'a' has an empty command".into()]);
assert!(p.contains("FAILED validation"));
assert!(p.contains("empty command"));
}
#[tokio::test]
async fn derives_on_first_valid_attempt() {
let c = derive_contract(|_p| async { Ok::<_, String>(VALID.into()) }, "make x", "repo", 3)
.await
.unwrap();
assert_eq!(c.checks.len(), 1);
assert!(c.checks[0].expect_exit_zero, "default applies");
assert_eq!(c.checks[0].timeout_secs, 120);
}
#[tokio::test]
async fn repairs_fenced_and_chatty_output() {
let fenced = format!("Sure! Here is the contract:\n```json\n{VALID}\n```");
let c = derive_contract(
|_p| {
let text = fenced.clone();
async move { Ok::<_, String>(text) }
},
"x",
"r",
3,
)
.await
.unwrap();
assert_eq!(c.checks[0].name, "exists");
}
#[tokio::test]
async fn invalid_then_repaired() {
let calls = AtomicUsize::new(0);
let c = derive_contract(
|prompt: String| {
let n = calls.fetch_add(1, Ordering::SeqCst) + 1;
async move {
if n == 1 {
Ok::<_, String>(r#"{"description": "no checks", "checks": []}"#.into())
} else {
assert!(prompt.contains("FAILED validation"), "repair prompt carries issues");
Ok(VALID.into())
}
}
},
"x",
"r",
3,
)
.await
.unwrap();
assert_eq!(c.checks.len(), 1);
}
#[tokio::test]
async fn gives_up_with_error_after_max() {
let err = derive_contract(
|_p| async { Ok::<_, String>("not json at all".into()) },
"x",
"r",
2,
)
.await
.unwrap_err();
assert!(err.contains("after 2 attempts"), "{err}");
}
#[test]
fn is_toolchain_only_flags_bare_version_probes_only() {
for c in [
"cargo --version",
"cargo -V",
"rustc --version",
"node -v",
"npm --version",
"python3 --version",
"go version",
"make --help",
] {
assert!(is_toolchain_only(c), "should flag `{c}`");
}
for c in [
"cargo build",
"cargo test",
"cargo run -- --version",
"cargo run --release -- --version",
"./target/debug/greeter --version",
"cargo --version && cargo build",
"test -f src/main.rs",
"grep -q version Cargo.toml",
"rustc src/main.rs -o /tmp/x",
] {
assert!(!is_toolchain_only(c), "should NOT flag `{c}`");
}
}
#[test]
fn validate_rejects_toolchain_only_and_placeholder_name() {
let c = OutcomeContract {
description: "d".into(),
checks: vec![ContractCheck {
name: "unique_snake_case_label".into(),
command: "cargo --version".into(),
expect_exit_zero: true,
output_contains: None,
timeout_secs: 120,
}],
};
let issues = c.validate();
assert!(
issues.iter().any(|i| i.contains("placeholder name")),
"{issues:?}"
);
assert!(
issues.iter().any(|i| i.contains("toolchain-only no-op")),
"{issues:?}"
);
}
#[tokio::test]
async fn derive_repairs_a_toolchain_only_first_attempt() {
let calls = AtomicUsize::new(0);
let toolchain_only = r#"{"description":"v","checks":[
{"name":"unique_snake_case_label","command":"cargo --version"}]}"#;
let real = r#"{"description":"v","checks":[
{"name":"version_flag_prints","command":"cargo run -- --version"}]}"#;
let c = derive_contract(
|prompt: String| {
let n = calls.fetch_add(1, Ordering::SeqCst) + 1;
async move {
if n == 1 {
Ok::<_, String>(toolchain_only.into())
} else {
assert!(prompt.contains("toolchain-only no-op"), "{prompt}");
assert!(prompt.contains("placeholder name"), "{prompt}");
Ok(real.into())
}
}
},
"add a --version flag",
"Rust (cargo)",
3,
)
.await
.unwrap();
assert_eq!(c.checks[0].command, "cargo run -- --version");
assert_eq!(calls.load(Ordering::SeqCst), 2, "took exactly one repair");
}
#[test]
fn validate_catches_empty_and_duplicate_and_assertless() {
let c = OutcomeContract {
description: "d".into(),
checks: vec![
ContractCheck {
name: "a".into(),
command: "true".into(),
expect_exit_zero: false,
output_contains: None,
timeout_secs: 5,
},
ContractCheck {
name: "a".into(),
command: "".into(),
expect_exit_zero: true,
output_contains: None,
timeout_secs: 5,
},
],
};
let issues = c.validate();
assert!(issues.iter().any(|i| i.contains("asserts nothing")));
assert!(issues.iter().any(|i| i.contains("empty command")));
assert!(issues.iter().any(|i| i.contains("duplicate")));
}
#[tokio::test]
async fn evaluate_passes_and_fails_checks_in_a_real_dir() {
let dir = tempfile::tempdir().unwrap();
std::fs::write(dir.path().join("present.txt"), "hello needle").unwrap();
let exec = WorktreeExecutor::new(dir.path());
let sink = EventSink::test_sink();
let contract = OutcomeContract {
description: "d".into(),
checks: vec![
ContractCheck {
name: "exists".into(),
command: "test -f present.txt".into(),
expect_exit_zero: true,
output_contains: None,
timeout_secs: 10,
},
ContractCheck {
name: "content".into(),
command: "cat present.txt".into(),
expect_exit_zero: true,
output_contains: Some("needle".into()),
timeout_secs: 10,
},
ContractCheck {
name: "missing".into(),
command: "test -f absent.txt".into(),
expect_exit_zero: true,
output_contains: None,
timeout_secs: 10,
},
],
};
let results = evaluate_contract(&contract, &exec, &sink).await;
assert_eq!(results.len(), 3, "all checks run even after a failure");
assert!(results[0].passed);
assert!(results[1].passed);
assert!(!results[2].passed);
assert_eq!(results[2].exit_code, Some(1));
}
#[tokio::test]
async fn evaluate_fails_on_missing_substring() {
let dir = tempfile::tempdir().unwrap();
let exec = WorktreeExecutor::new(dir.path());
let sink = EventSink::test_sink();
let contract = OutcomeContract {
description: "d".into(),
checks: vec![ContractCheck {
name: "needle".into(),
command: "echo haystack".into(),
expect_exit_zero: true,
output_contains: Some("needle".into()),
timeout_secs: 10,
}],
};
let results = evaluate_contract(&contract, &exec, &sink).await;
assert!(!results[0].passed, "exit 0 but substring missing must fail");
assert_eq!(results[0].exit_code, Some(0));
}
}