use std::path::Path;
use serde::Serialize;
use crate::schemas::{RunResult, Task, TaskState};
#[derive(Debug, Clone, Serialize)]
pub struct Check {
pub name: String,
pub passed: bool,
pub fatal: bool,
pub note: String,
}
#[derive(Debug, Clone, Serialize)]
pub struct Evaluation {
pub run_id: String,
pub task_id: String,
pub status: String,
pub checks: Vec<Check>,
pub next_task_state: TaskState,
}
fn check(name: &str, passed: bool, note: impl Into<String>) -> Check {
Check {
name: name.into(),
passed,
fatal: true,
note: note.into(),
}
}
fn advisory(name: &str, passed: bool, note: impl Into<String>) -> Check {
Check {
name: name.into(),
passed,
fatal: false,
note: note.into(),
}
}
pub fn fatal_failure(name: &str, note: impl Into<String>) -> Check {
check(name, false, note)
}
pub fn evaluate(
run_dir: &Path,
run_id: &str,
task: &Task,
actual_changes: Option<&[String]>,
) -> Evaluation {
let mut checks = Vec::new();
let result_path = run_dir.join("result.json");
let result: Option<RunResult> = std::fs::read_to_string(&result_path)
.ok()
.and_then(|t| serde_json::from_str(&t).ok());
checks.push(check(
"result_file_present",
result_path.is_file(),
"result.json exists",
));
checks.push(check(
"result_schema_valid",
result.is_some(),
"result.json parses against the result schema",
));
checks.push(check(
"handoff_present",
run_dir.join("handoff.md").is_file(),
"handoff.md exists",
));
let mut reported_status = "failed".to_string();
if let Some(r) = &result {
checks.push(check(
"ids_match",
r.task_id == task.id && r.run_id == run_id,
format!("result ids match run {run_id} / task {}", task.id),
));
checks.push(check(
"no_uncontrolled_drift",
!r.intent_adherence.drift_detected,
if r.intent_adherence.drift_detected {
format!("worker reported drift: {}", r.intent_adherence.notes)
} else {
"worker reported no scope drift".to_string()
},
));
let reported: Vec<String> = r
.changes
.files_modified
.iter()
.chain(&r.changes.files_created)
.chain(&r.changes.files_deleted)
.cloned()
.collect();
let basis: &[String] = actual_changes.unwrap_or(&reported);
let forbidden = forbidden_in(basis.iter());
checks.push(check(
"forbidden_paths_untouched",
forbidden.is_empty(),
if forbidden.is_empty() {
"no sensitive or out-of-workspace paths changed".to_string()
} else {
format!("changed forbidden path(s): {}", forbidden.join(", "))
},
));
if let Some(actual) = actual_changes {
let norm = |p: &str| p.trim_start_matches("./").to_string();
let reported_norm: std::collections::HashSet<String> =
reported.iter().map(|p| norm(p)).collect();
let undisclosed: Vec<String> = actual
.iter()
.map(|p| norm(p))
.filter(|p| !reported_norm.contains(p))
.collect();
checks.push(advisory(
"diff_matches_report",
undisclosed.is_empty(),
if undisclosed.is_empty() {
"worker-reported changes match the actual diff".to_string()
} else {
format!("changed but not reported: {}", undisclosed.join(", "))
},
));
}
checks.push(advisory(
"validation_ran",
!r.validation.commands_run.is_empty(),
if r.validation.commands_run.is_empty() {
"no validation commands were run (may be fine for this task)".to_string()
} else {
format!(
"{} validation command(s) run",
r.validation.commands_run.len()
)
},
));
let is_review = matches!(crate::packet::role_for(&task.kind), "reviewer" | "security");
if is_review {
let failed: Vec<&str> = r
.verdict
.iter()
.filter(|v| !v.pass)
.map(|v| v.criterion_id.as_str())
.collect();
checks.push(check(
"review_verdict_present",
!r.verdict.is_empty(),
if r.verdict.is_empty() {
"review task wrote no structured verdict".to_string()
} else {
format!("{} criterion verdict(s)", r.verdict.len())
},
));
checks.push(check(
"review_criteria_pass",
failed.is_empty(),
if failed.is_empty() {
"all judged criteria pass".to_string()
} else {
format!("criteria failed: {}", failed.join(", "))
},
));
} else if !r.verdict.is_empty() {
let passed = r.verdict.iter().filter(|v| v.pass).count();
checks.push(advisory(
"self_verdict",
passed == r.verdict.len(),
format!("{}/{} self-checked criteria pass", passed, r.verdict.len()),
));
}
reported_status = r.status.clone();
}
let all_fatal_passed = checks.iter().filter(|c| c.fatal).all(|c| c.passed);
let next_task_state = decide_state(&reported_status, all_fatal_passed, result.as_ref());
Evaluation {
run_id: run_id.to_string(),
task_id: task.id.clone(),
status: reported_status,
checks,
next_task_state,
}
}
fn forbidden_in<'a>(paths: impl Iterator<Item = &'a String>) -> Vec<String> {
const SENSITIVE: &[&str] = &[
".env",
".ssh",
"credentials",
"secret",
".key",
".pem",
".p12",
];
let mut bad = Vec::new();
for f in paths {
let lower = f.to_lowercase();
let escapes = f.starts_with('/') || f.contains("..");
let sensitive = SENSITIVE.iter().any(|p| lower.contains(p));
if escapes || sensitive {
bad.push(f.clone());
}
}
bad
}
pub fn changed_paths(cwd: &Path) -> Vec<String> {
let out = std::process::Command::new("git")
.arg("-C")
.arg(cwd)
.args(["status", "--porcelain=v1", "--untracked-files=all"])
.output();
let Ok(out) = out else {
return Vec::new();
};
if !out.status.success() {
return Vec::new();
}
let mut paths = Vec::new();
for line in String::from_utf8_lossy(&out.stdout).lines() {
if line.len() < 4 {
continue;
}
let rest = line[3..].trim();
let path = rest.rsplit(" -> ").next().unwrap_or(rest).trim_matches('"');
if !path.is_empty() {
paths.push(path.to_string());
}
}
paths
}
fn decide_state(reported: &str, all_passed: bool, result: Option<&RunResult>) -> TaskState {
match reported {
"done" if all_passed => TaskState::Done,
"done" => TaskState::Failed, "partial" => TaskState::Partial,
"blocked" => TaskState::Blocked,
"needs_user" => TaskState::NeedsUser,
"failed" => TaskState::Failed,
_ => {
if result.is_none() {
TaskState::Failed
} else {
TaskState::Blocked
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::schemas::Changes;
fn dummy_result() -> RunResult {
RunResult {
schema_version: 1,
run_id: "r".into(),
task_id: "t".into(),
status: "partial".into(),
intent_adherence: Default::default(),
changes: Default::default(),
validation: Default::default(),
question_for_user: None,
compact_summary: String::new(),
verdict: vec![],
harness_suggestions: vec![],
}
}
#[test]
fn done_requires_all_checks_passing() {
assert_eq!(decide_state("done", true, None), TaskState::Done);
assert_eq!(decide_state("done", false, None), TaskState::Failed);
}
#[test]
fn non_done_states_map_safely() {
assert_eq!(decide_state("partial", true, None), TaskState::Partial);
assert_eq!(decide_state("blocked", true, None), TaskState::Blocked);
assert_eq!(decide_state("needs_user", true, None), TaskState::NeedsUser);
assert_eq!(decide_state("failed", true, None), TaskState::Failed);
}
#[test]
fn unknown_status_depends_on_evidence() {
assert_eq!(decide_state("weird", true, None), TaskState::Failed);
let r = dummy_result();
assert_eq!(decide_state("weird", true, Some(&r)), TaskState::Blocked);
}
#[test]
fn forbidden_paths_flagged() {
let c = Changes {
files_modified: vec!["src/main.rs".into(), "../outside.txt".into()],
files_created: vec![".env".into()],
files_deleted: vec!["/etc/hosts".into()],
};
let all: Vec<String> = c
.files_modified
.iter()
.chain(&c.files_created)
.chain(&c.files_deleted)
.cloned()
.collect();
let bad = forbidden_in(all.iter());
assert!(bad.contains(&"../outside.txt".to_string()));
assert!(bad.contains(&".env".to_string()));
assert!(bad.contains(&"/etc/hosts".to_string()));
assert!(!bad.contains(&"src/main.rs".to_string()));
}
fn eval_with(kind: &str, status: &str, verdict: Vec<crate::schemas::Verdict>) -> Evaluation {
let dir = std::env::temp_dir().join(format!(
"yard-eval-verdict-{}-{}",
std::process::id(),
kind.to_string() + status
));
let _ = std::fs::remove_dir_all(&dir);
std::fs::create_dir_all(&dir).unwrap();
std::fs::write(dir.join("handoff.md"), "h").unwrap();
let mut r = dummy_result();
r.run_id = "run-x".into();
r.task_id = "YARD-9".into();
r.status = status.into();
r.verdict = verdict;
std::fs::write(dir.join("result.json"), serde_json::to_string(&r).unwrap()).unwrap();
let mut t = crate::schemas::Task {
id: "YARD-9".into(),
title: "t".into(),
state: TaskState::Running,
priority: 0,
risk: String::new(),
kind: kind.into(),
preferred_worker: String::new(),
model: String::new(),
effort: String::new(),
depends_on: vec![],
skills: vec![],
allowed_scope: vec![],
acceptance: vec![],
validation: None,
approval: None,
interaction: None,
worker_rationale: None,
};
t.kind = kind.into();
let e = evaluate(&dir, "run-x", &t, None);
let _ = std::fs::remove_dir_all(&dir);
e
}
#[test]
fn forbidden_path_uses_actual_diff_over_worker_report() {
let dir = std::env::temp_dir().join(format!("yard-eval-diff-{}", std::process::id()));
let _ = std::fs::remove_dir_all(&dir);
std::fs::create_dir_all(&dir).unwrap();
std::fs::write(dir.join("handoff.md"), "h").unwrap();
let mut r = dummy_result();
r.run_id = "run-x".into();
r.task_id = "YARD-9".into();
r.status = "done".into();
r.changes.files_modified = vec!["src/main.rs".into()];
std::fs::write(dir.join("result.json"), serde_json::to_string(&r).unwrap()).unwrap();
let t = crate::schemas::Task {
id: "YARD-9".into(),
title: "t".into(),
state: TaskState::Running,
priority: 0,
risk: String::new(),
kind: "implementation".into(),
preferred_worker: String::new(),
model: String::new(),
effort: String::new(),
depends_on: vec![],
skills: vec![],
allowed_scope: vec![],
acceptance: vec![],
validation: None,
approval: None,
interaction: None,
worker_rationale: None,
};
let actual = vec!["src/main.rs".to_string(), ".env".to_string()];
let e = evaluate(&dir, "run-x", &t, Some(&actual));
let fb = e
.checks
.iter()
.find(|c| c.name == "forbidden_paths_untouched")
.unwrap();
assert!(
!fb.passed,
".env in the actual diff must fail the forbidden check"
);
assert_eq!(e.next_task_state, TaskState::Failed);
let _ = std::fs::remove_dir_all(&dir);
}
#[test]
fn review_task_needs_a_structured_verdict() {
use crate::schemas::Verdict;
let v = |id: &str, pass: bool| Verdict {
criterion_id: id.into(),
pass,
evidence: "e".into(),
};
let e = eval_with("review", "done", vec![v("AC-001", true), v("AC-002", true)]);
assert_eq!(e.next_task_state, TaskState::Done);
let e = eval_with("review", "done", vec![v("AC-001", false)]);
assert_eq!(e.next_task_state, TaskState::Failed);
assert!(e
.checks
.iter()
.any(|c| c.name == "review_criteria_pass" && !c.passed));
let e = eval_with("review", "needs_user", vec![v("AC-001", false)]);
assert_eq!(e.next_task_state, TaskState::NeedsUser);
let e = eval_with("review", "done", vec![]);
assert_eq!(e.next_task_state, TaskState::Failed);
assert!(e
.checks
.iter()
.any(|c| c.name == "review_verdict_present" && !c.passed));
let e = eval_with("implementation", "done", vec![]);
assert_eq!(e.next_task_state, TaskState::Done);
}
#[test]
fn done_with_no_validation_is_still_done() {
let dir =
std::env::temp_dir().join(format!("yard-eval-{}-{}", std::process::id(), "novalidate"));
std::fs::create_dir_all(&dir).unwrap();
std::fs::write(dir.join("handoff.md"), "h").unwrap();
let result = RunResult {
schema_version: 1,
run_id: "run-x".into(),
task_id: "YARD-9".into(),
status: "done".into(),
intent_adherence: Default::default(),
changes: Default::default(),
validation: Default::default(), question_for_user: None,
compact_summary: "ok".into(),
verdict: vec![],
harness_suggestions: vec![],
};
std::fs::write(
dir.join("result.json"),
serde_json::to_string(&result).unwrap(),
)
.unwrap();
let t = crate::schemas::Task {
id: "YARD-9".into(),
title: "t".into(),
state: TaskState::Running,
priority: 0,
risk: String::new(),
kind: String::new(),
preferred_worker: String::new(),
model: String::new(),
effort: String::new(),
depends_on: vec![],
skills: vec![],
allowed_scope: vec![],
acceptance: vec![],
validation: None,
approval: None,
interaction: None,
worker_rationale: None,
};
let eval = evaluate(&dir, "run-x", &t, None);
assert_eq!(eval.next_task_state, TaskState::Done);
let v = eval
.checks
.iter()
.find(|c| c.name == "validation_ran")
.unwrap();
assert!(!v.fatal && !v.passed); let _ = std::fs::remove_dir_all(&dir);
}
}