#[allow(unused_imports)]
use crate::sync_util::LockExt;
use std::sync::{Arc, Mutex};
use super::message::{LoopMessage, UserMessage};
use super::result::LoopToolResult;
pub const VERIFY_TAG: &str = "[verify-before-done]";
const VERIFY_NUDGE: &str = "[verify-before-done] You changed code this run but didn't run the tests or build to check it. Verify it works before reporting done — or, if there's nothing to run or you verified another way, say so briefly and finish. Don't re-edit just to look busy.";
const FAILED_NUDGE: &str = "[verify-before-done] Your last build or test command failed after you changed code. Don't report done on a red build — fix the failure. If it's pre-existing or expected, say so explicitly before finishing.";
#[derive(Debug)]
pub struct VerifierGate {
inner: Mutex<Inner>,
}
#[derive(Debug, Default)]
struct Inner {
edited_code: bool,
ran_verification: bool,
verification_failed: bool,
fired: bool,
}
impl VerifierGate {
pub fn new() -> Arc<Self> {
Arc::new(Self {
inner: Mutex::new(Inner::default()),
})
}
pub fn record_outcome(
&self,
tool_name: &str,
args: &serde_json::Value,
result: &LoopToolResult,
is_error: bool,
) {
let mut inner = self.inner.lock_ignore_poison();
match tool_name {
"write" | "edit" | "apply_patch" | "edit_minified" => {
if touches_code_file(args) {
inner.edited_code = true;
}
}
"bash" => {
let command = args.get("command").and_then(|v| v.as_str()).unwrap_or("");
if is_verification_command(command) {
inner.ran_verification = true;
inner.verification_failed = is_error || result_indicates_failure(result);
}
}
_ => {}
}
}
pub fn check_before_finalize(&self) -> Vec<LoopMessage> {
let mut inner = self.inner.lock_ignore_poison();
if inner.fired || !inner.edited_code {
return Vec::new();
}
let nudge = if inner.verification_failed {
Some(FAILED_NUDGE)
} else if !inner.ran_verification {
Some(VERIFY_NUDGE)
} else {
None };
match nudge {
Some(text) => {
inner.fired = true;
vec![LoopMessage::User(UserMessage {
content: text.to_string(),
})]
}
None => Vec::new(),
}
}
}
fn result_text(result: &LoopToolResult) -> String {
result
.content
.iter()
.filter_map(|b| b.get("text").and_then(|v| v.as_str()))
.collect::<Vec<_>>()
.join("\n")
}
fn result_indicates_failure(result: &LoopToolResult) -> bool {
result_text(result).contains("Exit code:")
}
fn is_verification_command(command: &str) -> bool {
const MARKERS: &[&str] = &[
"test", "build", "check", "lint", "compile", "cargo", "npm", "pnpm", "yarn", "pytest",
"tox", "make", "gradle", "mvn", "ctest", "cmake", "rustc", "tsc", "jest", "vitest",
"mocha", "clippy", "go vet", "go run",
];
let lower = command.to_ascii_lowercase();
MARKERS.iter().any(|m| lower.contains(m))
}
fn touches_code_file(args: &serde_json::Value) -> bool {
let Some(obj) = args.as_object() else {
return false;
};
let mut paths: Vec<&str> = Vec::new();
for key in ["path", "file_path", "file"] {
if let Some(s) = obj.get(key).and_then(|v| v.as_str()) {
paths.push(s);
}
}
if let Some(ops) = obj.get("operations").and_then(|v| v.as_array()) {
for op in ops {
if let Some(s) = op.get("path").and_then(|v| v.as_str()) {
paths.push(s);
}
}
}
paths.iter().any(|p| is_code_path(p))
}
const CODE_EXTS: &[&str] = &[
"rs", "py", "ts", "tsx", "js", "jsx", "mjs", "cjs", "go", "rb", "java", "kt", "kts", "c", "h",
"cc", "cpp", "hpp", "cxx", "cs", "swift", "php", "scala", "clj", "cljs", "cljc", "ex", "exs",
"sh", "bash", "lua", "pl", "hs", "ml", "sql", "vue", "svelte",
];
fn is_code_path(path: &str) -> bool {
match path.rsplit_once('.') {
Some((_, ext)) => CODE_EXTS.contains(&ext.to_ascii_lowercase().as_str()),
None => false,
}
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
fn ok_result() -> LoopToolResult {
LoopToolResult {
content: vec![json!({"type": "text", "text": "ok"})],
details: json!(null),
terminate: None,
}
}
fn failed_result() -> LoopToolResult {
LoopToolResult {
content: vec![json!({"type": "text", "text": "test failed\nExit code: 101"})],
details: json!(null),
terminate: None,
}
}
fn nudge(gate: &VerifierGate) -> Option<String> {
gate.check_before_finalize()
.into_iter()
.next()
.map(|m| match m {
LoopMessage::User(u) => u.content,
_ => panic!("expected user message"),
})
}
#[test]
fn edited_code_without_running_nudges_to_verify() {
let g = VerifierGate::new();
g.record_outcome("edit", &json!({"path": "src/auth.rs"}), &ok_result(), false);
let n = nudge(&g).expect("should nudge");
assert!(n.contains("didn't run the tests"), "verify nudge: {n}");
}
#[test]
fn edit_minified_counts_as_a_code_edit() {
let g = VerifierGate::new();
g.record_outcome(
"edit_minified",
&json!({"path": "src/auth.rs"}),
&ok_result(),
false,
);
let n = nudge(&g).expect("edit_minified should arm the verify nudge");
assert!(n.contains("didn't run the tests"), "verify nudge: {n}");
}
#[test]
fn edited_code_then_passing_test_is_silent() {
let g = VerifierGate::new();
g.record_outcome("edit", &json!({"path": "src/auth.rs"}), &ok_result(), false);
g.record_outcome(
"bash",
&json!({"command": "cargo test"}),
&ok_result(),
false,
);
assert!(
nudge(&g).is_none(),
"passing verification should stay silent"
);
}
#[test]
fn edited_code_then_failing_test_nudges_to_fix() {
let g = VerifierGate::new();
g.record_outcome("edit", &json!({"path": "src/auth.rs"}), &ok_result(), false);
g.record_outcome(
"bash",
&json!({"command": "cargo test"}),
&failed_result(),
false,
);
let n = nudge(&g).expect("should nudge on red build");
assert!(n.contains("failed"), "fix-it nudge: {n}");
assert!(
n.contains("red build"),
"should mention not finishing on red: {n}"
);
}
#[test]
fn rerun_green_after_failure_clears_the_nudge() {
let g = VerifierGate::new();
g.record_outcome("edit", &json!({"path": "src/auth.rs"}), &ok_result(), false);
g.record_outcome(
"bash",
&json!({"command": "cargo test"}),
&failed_result(),
false,
);
g.record_outcome(
"bash",
&json!({"command": "cargo test"}),
&ok_result(),
false,
);
assert!(
nudge(&g).is_none(),
"a subsequent green run should clear the failure"
);
}
#[test]
fn non_verification_command_does_not_count_as_verified() {
let g = VerifierGate::new();
g.record_outcome("edit", &json!({"path": "src/auth.rs"}), &ok_result(), false);
g.record_outcome("bash", &json!({"command": "ls -la"}), &ok_result(), false);
let n = nudge(&g).expect("ls is not verification");
assert!(n.contains("didn't run the tests"));
}
#[test]
fn tool_execution_error_counts_as_failure() {
let g = VerifierGate::new();
g.record_outcome("edit", &json!({"path": "src/auth.rs"}), &ok_result(), false);
g.record_outcome("bash", &json!({"command": "make test"}), &ok_result(), true);
let n = nudge(&g).expect("errored verification is a failure");
assert!(n.contains("failed"));
}
#[test]
fn doc_only_edit_never_nudges() {
let g = VerifierGate::new();
g.record_outcome("write", &json!({"path": "README.md"}), &ok_result(), false);
assert!(nudge(&g).is_none());
}
#[test]
fn no_edits_never_nudges() {
let g = VerifierGate::new();
g.record_outcome("read", &json!({"path": "src/auth.rs"}), &ok_result(), false);
assert!(nudge(&g).is_none());
}
#[test]
fn nudge_fires_at_most_once() {
let g = VerifierGate::new();
g.record_outcome("edit", &json!({"path": "src/auth.rs"}), &ok_result(), false);
assert!(nudge(&g).is_some());
assert!(nudge(&g).is_none(), "bounded to once per run");
}
#[test]
fn apply_patch_with_code_operation_counts_as_edit() {
let g = VerifierGate::new();
g.record_outcome(
"apply_patch",
&json!({"operations": [{"type": "update", "path": "src/lib.rs"}]}),
&ok_result(),
false,
);
assert!(nudge(&g).is_some());
}
#[test]
fn is_code_path_recognizes_common_extensions() {
assert!(is_code_path("src/main.rs"));
assert!(is_code_path("app/Foo.TS"));
assert!(!is_code_path("README.md"));
assert!(!is_code_path("Makefile"));
}
}