//! Candidate safety pipeline.
//!
//! This module owns the acceptance-critical path:
//! hook checks, isolated validation, patched scoring, final landing,
//! final validation, and rollback.
use crate::hooks::{evaluate_builtin_hook, HookContext, HookDecision, HookPolicy, HookStage};
use crate::registry::{AgentContract, RegisteredAgent};
use crate::runner::AgentRunResult;
use mdx_rust_analysis::editing::{ProposedEdit, ValidationCommandRecord};
use std::path::Path;
use std::time::{Duration, Instant};
#[derive(Debug, Clone, Copy)]
pub struct CandidateExecutionConfig<'a> {
pub hook_policy: &'a HookPolicy,
pub review_before_apply: bool,
pub quiet: bool,
pub candidate_timeout: Duration,
}
#[derive(Debug, Clone)]
pub struct CandidateExecutionOutcome {
pub validated: u32,
pub landed: u32,
pub accepted: u32,
pub accepted_diff: Option<String>,
pub patched_score: Option<f32>,
pub holdout_score: Option<f32>,
pub delta: Option<f32>,
pub note: String,
pub hook_decisions: Vec<HookDecision>,
pub validation_commands: Vec<ValidationCommandRecord>,
pub final_validation_commands: Vec<ValidationCommandRecord>,
pub rollback_succeeded: Option<bool>,
pub rollback_error: Option<String>,
pub timed_out: bool,
}
impl CandidateExecutionOutcome {
fn empty(note: impl Into<String>, hook_decisions: Vec<HookDecision>) -> Self {
Self {
validated: 0,
landed: 0,
accepted: 0,
accepted_diff: None,
patched_score: None,
holdout_score: None,
delta: None,
note: note.into(),
hook_decisions,
validation_commands: Vec::new(),
final_validation_commands: Vec::new(),
rollback_succeeded: None,
rollback_error: None,
timed_out: false,
}
}
}
pub struct CandidateExecutionContext<'a> {
pub agent: &'a RegisteredAgent,
pub config: CandidateExecutionConfig<'a>,
pub iteration: u32,
pub candidate_index: usize,
pub edit: &'a ProposedEdit,
pub test_inputs: &'a [serde_json::Value],
pub holdout_inputs: &'a [serde_json::Value],
pub baseline_score: f32,
pub scorer: fn(&AgentRunResult) -> f32,
}
pub async fn execute_candidate_edit(
context: CandidateExecutionContext<'_>,
) -> CandidateExecutionOutcome {
let timeout = context.config.candidate_timeout;
match tokio::time::timeout(timeout, execute_candidate_edit_inner(context)).await {
Ok(outcome) => outcome,
Err(_) => CandidateExecutionOutcome {
timed_out: true,
..CandidateExecutionOutcome::empty(
format!(" (candidate timed out after {}s)", timeout.as_secs()),
Vec::new(),
)
},
}
}
async fn execute_candidate_edit_inner(
context: CandidateExecutionContext<'_>,
) -> CandidateExecutionOutcome {
let agent = context.agent;
let edit = context.edit;
let mut hook_decisions = Vec::new();
let deadline_start = Instant::now();
if let Err(err) = ensure_single_file_patch_scope(&agent.path, edit) {
return CandidateExecutionOutcome::empty(
format!(" (edit scope rejected: {err})"),
hook_decisions,
);
}
if deadline_start.elapsed() >= context.config.candidate_timeout {
return timed_out_outcome(context.config.candidate_timeout, hook_decisions);
}
let pre_edit = evaluate_builtin_hook(
context.config.hook_policy,
&HookContext {
stage: HookStage::PreEdit,
agent_name: agent.name.clone(),
edit_description: Some(edit.description.clone()),
patch_bytes: edit.patch.len(),
command: None,
validation_passed: None,
score_delta: None,
},
);
let denied = pre_edit.denied();
hook_decisions.push(pre_edit);
if denied {
return CandidateExecutionOutcome::empty(
" (pre-edit hook denied candidate)",
hook_decisions,
);
}
let pre_command = evaluate_builtin_hook(
context.config.hook_policy,
&HookContext {
stage: HookStage::PreCommand,
agent_name: agent.name.clone(),
edit_description: Some(edit.description.clone()),
patch_bytes: edit.patch.len(),
command: Some("cargo check && cargo clippy -- -D warnings".to_string()),
validation_passed: None,
score_delta: None,
},
);
let denied = pre_command.denied();
hook_decisions.push(pre_command);
if denied {
return CandidateExecutionOutcome::empty(
" (pre-command hook denied validation)",
hook_decisions,
);
}
let wt_name = format!("opt-{}-{}", context.iteration, context.candidate_index);
let Some(validation_budget) =
remaining_budget(deadline_start, context.config.candidate_timeout)
else {
return timed_out_outcome(context.config.candidate_timeout, hook_decisions);
};
let validation_result = mdx_rust_analysis::editing::apply_and_validate_with_budget(
&agent.path,
edit,
&wt_name,
validation_budget,
);
let Ok(validation) = validation_result else {
if !context.config.quiet {
println!(" [Safe Apply] Validation in isolated workspace failed to run.");
}
return CandidateExecutionOutcome::empty(" (validation failed to run)", hook_decisions);
};
if !validation.passed {
let validation_commands = validation.command_records;
let validation_timed_out = validation_commands.iter().any(|record| record.timed_out);
let decision = evaluate_builtin_hook(
context.config.hook_policy,
&HookContext {
stage: HookStage::PostValidation,
agent_name: agent.name.clone(),
edit_description: Some(edit.description.clone()),
patch_bytes: edit.patch.len(),
command: None,
validation_passed: Some(false),
score_delta: None,
},
);
hook_decisions.push(decision);
if !context.config.quiet {
println!(" [Safe Apply] Validation in isolated workspace failed.");
}
return CandidateExecutionOutcome {
validation_commands,
timed_out: validation_timed_out,
..CandidateExecutionOutcome::empty(
format!(
" (validation rejected candidate: {})",
validation
.cargo_check_output
.lines()
.last()
.unwrap_or("no output")
),
hook_decisions,
)
};
}
let validation_commands = validation.command_records;
if deadline_start.elapsed() >= context.config.candidate_timeout {
return CandidateExecutionOutcome {
validated: 1,
validation_commands,
..timed_out_outcome(context.config.candidate_timeout, hook_decisions)
};
}
let post_validation = evaluate_builtin_hook(
context.config.hook_policy,
&HookContext {
stage: HookStage::PostValidation,
agent_name: agent.name.clone(),
edit_description: Some(edit.description.clone()),
patch_bytes: edit.patch.len(),
command: None,
validation_passed: Some(true),
score_delta: None,
},
);
let denied = post_validation.denied();
hook_decisions.push(post_validation);
if denied {
return CandidateExecutionOutcome {
validated: 1,
validation_commands,
..CandidateExecutionOutcome::empty(
" (post-validation hook denied candidate)",
hook_decisions,
)
};
}
if !context.config.quiet {
println!(
" [Safe Apply] Edit validated in isolated workspace (cargo check + clippy OK)."
);
}
let patched_score = {
let score_name = format!("score-{}-{}", context.iteration, context.candidate_index);
match mdx_rust_analysis::editing::create_isolated_workspace(&agent.path, &score_name) {
Ok(isolated) => {
let score = if mdx_rust_analysis::editing::apply_edit(&agent.path, &isolated, edit)
.is_ok()
{
evaluate_workspace(&isolated, context.test_inputs, context.scorer)
.await
.unwrap_or(context.baseline_score)
} else {
context.baseline_score
};
mdx_rust_analysis::editing::cleanup_isolated_workspace(&agent.path, &isolated);
score
}
Err(_) => context.baseline_score,
}
};
if deadline_start.elapsed() >= context.config.candidate_timeout {
return CandidateExecutionOutcome {
validated: 1,
patched_score: Some(patched_score),
delta: Some(patched_score - context.baseline_score),
validation_commands,
..timed_out_outcome(context.config.candidate_timeout, hook_decisions)
};
}
let delta = patched_score - context.baseline_score;
let pre_accept = evaluate_builtin_hook(
context.config.hook_policy,
&HookContext {
stage: HookStage::PreAccept,
agent_name: agent.name.clone(),
edit_description: Some(edit.description.clone()),
patch_bytes: edit.patch.len(),
command: None,
validation_passed: Some(true),
score_delta: Some(delta),
},
);
let denied = pre_accept.denied();
hook_decisions.push(pre_accept);
if denied {
return CandidateExecutionOutcome {
validated: 1,
patched_score: Some(patched_score),
delta: Some(delta),
validation_commands,
..CandidateExecutionOutcome::empty(
format!(" (pre-accept hook denied delta {delta:.2})"),
hook_decisions,
)
};
}
if delta <= 0.0 {
if !context.config.quiet {
println!(
" [Net-Negative] Patched score {:.2} vs baseline {:.2} (delta {:.2}) - change rejected.",
patched_score, context.baseline_score, delta
);
}
return CandidateExecutionOutcome {
validated: 1,
patched_score: Some(patched_score),
delta: Some(delta),
validation_commands,
..CandidateExecutionOutcome::empty(
format!(
" (net-negative {:.2}->{:.2})",
context.baseline_score, patched_score
),
hook_decisions,
)
};
}
if context.config.review_before_apply {
if !context.config.quiet {
println!(" [Review] Change validated in isolation but not applied (--review).");
}
return CandidateExecutionOutcome {
validated: 1,
patched_score: Some(patched_score),
delta: Some(delta),
validation_commands,
..CandidateExecutionOutcome::empty(
" (review mode: validated in isolation, not applied)",
hook_decisions,
)
};
}
let snapshot = match mdx_rust_analysis::editing::snapshot_file(&edit.file) {
Ok(snapshot) => snapshot,
Err(err) => {
return CandidateExecutionOutcome {
validated: 1,
patched_score: Some(patched_score),
delta: Some(delta),
validation_commands,
..CandidateExecutionOutcome::empty(
format!(" (snapshot failed: {err})"),
hook_decisions,
)
};
}
};
if let Err(err) = mdx_rust_analysis::editing::apply_edit_to_agent(&agent.path, edit) {
if !context.config.quiet {
println!(
" [Land Failed] Could not apply validated patch to real source: {}",
err
);
}
return CandidateExecutionOutcome {
validated: 1,
patched_score: Some(patched_score),
delta: Some(delta),
validation_commands,
..CandidateExecutionOutcome::empty(" (landing failed)", hook_decisions)
};
}
let final_budget = remaining_budget(deadline_start, context.config.candidate_timeout)
.unwrap_or_else(|| Duration::from_secs(0));
let final_report =
mdx_rust_analysis::editing::validate_build_detailed_with_budget(&agent.path, final_budget);
let final_ok = final_report.passed;
let final_validation_commands = final_report.command_records;
let final_validation_timed_out = final_validation_commands
.iter()
.any(|record| record.timed_out);
if deadline_start.elapsed() >= context.config.candidate_timeout || final_validation_timed_out {
let rollback_result = mdx_rust_analysis::editing::restore_file(&snapshot);
let rollback_error = rollback_result.as_ref().err().map(ToString::to_string);
let rollback_succeeded = rollback_result.is_ok();
return CandidateExecutionOutcome {
validated: 1,
landed: 0,
accepted: 0,
accepted_diff: None,
patched_score: Some(patched_score),
holdout_score: None,
delta: Some(delta),
note: format!(
" (candidate timed out after {}s and was rolled back)",
context.config.candidate_timeout.as_secs()
),
hook_decisions,
validation_commands,
final_validation_commands,
rollback_succeeded: Some(rollback_succeeded),
rollback_error,
timed_out: true,
};
}
if final_ok {
let holdout_score = if context.holdout_inputs.is_empty() {
None
} else {
evaluate_workspace(&agent.path, context.holdout_inputs, context.scorer)
.await
.ok()
};
if !context.config.quiet {
println!(
" [Accepted] Landed + final validation OK (score {:.2} -> {:.2}, delta {:.2}).",
context.baseline_score, patched_score, delta
);
}
CandidateExecutionOutcome {
validated: 1,
landed: 1,
accepted: 1,
accepted_diff: Some(edit.patch.clone()),
patched_score: Some(patched_score),
holdout_score,
delta: Some(delta),
note: format!(" (accepted +{delta:.2})"),
hook_decisions,
validation_commands,
final_validation_commands,
rollback_succeeded: None,
rollback_error: None,
timed_out: false,
}
} else {
let rollback_result = mdx_rust_analysis::editing::restore_file(&snapshot);
let rollback_error = rollback_result.as_ref().err().map(ToString::to_string);
let rollback_succeeded = rollback_result.is_ok();
let _ = mdx_rust_analysis::editing::validate_build(&agent.path);
if !context.config.quiet {
println!(
" [Final Validation Failed] Change rolled back after re-validation failed."
);
}
CandidateExecutionOutcome {
validated: 1,
landed: 0,
accepted: 0,
accepted_diff: None,
patched_score: Some(patched_score),
holdout_score: None,
delta: Some(delta),
note: " (final validation failed and rolled back)".to_string(),
hook_decisions,
validation_commands,
final_validation_commands,
rollback_succeeded: Some(rollback_succeeded),
rollback_error,
timed_out: false,
}
}
}
fn timed_out_outcome(
timeout: Duration,
hook_decisions: Vec<HookDecision>,
) -> CandidateExecutionOutcome {
CandidateExecutionOutcome {
timed_out: true,
..CandidateExecutionOutcome::empty(
format!(" (candidate timed out after {}s)", timeout.as_secs()),
hook_decisions,
)
}
}
fn remaining_budget(start: Instant, total: Duration) -> Option<Duration> {
total
.checked_sub(start.elapsed())
.filter(|remaining| !remaining.is_zero())
}
fn ensure_single_file_patch_scope(agent_root: &Path, edit: &ProposedEdit) -> anyhow::Result<()> {
let expected = if edit.file.is_absolute() {
edit.file.strip_prefix(agent_root).map_err(|_| {
anyhow::anyhow!("edit file is outside agent root: {}", edit.file.display())
})?
} else {
edit.file.as_path()
};
for line in edit.patch.lines() {
for path in diff_paths_from_line(line) {
if path == "/dev/null" {
continue;
}
if Path::new(&path) != expected {
anyhow::bail!(
"patch touches {}, but ProposedEdit.file is {}",
path,
expected.display()
);
}
}
}
Ok(())
}
fn diff_paths_from_line(line: &str) -> Vec<String> {
if let Some(path) = line
.strip_prefix("+++ ")
.or_else(|| line.strip_prefix("--- "))
{
return normalize_diff_path(path).into_iter().collect();
}
if let Some(rest) = line.strip_prefix("diff --git ") {
return rest
.split_whitespace()
.filter_map(normalize_diff_path)
.collect();
}
for prefix in ["rename from ", "rename to ", "copy from ", "copy to "] {
if let Some(path) = line.strip_prefix(prefix) {
return normalize_diff_path(path).into_iter().collect();
}
}
if let Some(rest) = line.strip_prefix("Binary files ") {
if let Some((left, right_with_suffix)) = rest.split_once(" and ") {
let right = right_with_suffix
.strip_suffix(" differ")
.unwrap_or(right_with_suffix);
return [left, right]
.into_iter()
.filter_map(normalize_diff_path)
.collect();
}
}
Vec::new()
}
fn normalize_diff_path(raw: &str) -> Option<String> {
let path = raw.trim().trim_matches('"');
if path == "/dev/null" {
return Some(path.to_string());
}
path.strip_prefix("a/")
.or_else(|| path.strip_prefix("b/"))
.or(Some(path))
.map(str::to_string)
}
async fn evaluate_workspace(
dir: &std::path::Path,
inputs: &[serde_json::Value],
scorer: fn(&AgentRunResult) -> f32,
) -> anyhow::Result<f32> {
let temp_agent = RegisteredAgent {
name: "isolated-eval".to_string(),
path: dir.to_path_buf(),
contract: AgentContract::Process,
registered_at: "".to_string(),
};
let mut scores = vec![];
for input in inputs {
let res = crate::runner::run_agent(&temp_agent, input.clone()).await?;
scores.push(scorer(&res));
}
if scores.is_empty() {
return Ok(0.0);
}
Ok(scores.iter().sum::<f32>() / scores.len() as f32)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::optimizer::mechanical_score;
use tempfile::tempdir;
fn temp_agent_source(answer_suffix: &str) -> String {
r#"use std::io::BufRead;
fn main() {
let mut input = String::new();
std::io::stdin().lock().read_line(&mut input).unwrap();
println!("{{\"answer\":\"A stable useful answer __SUFFIX__\",\"confidence\":0.70,\"reasoning\":\"Think step by step.\"}}");
}
"#
.replace("__SUFFIX__", answer_suffix)
}
fn write_temp_agent(with_final_failure_marker: bool) -> (tempfile::TempDir, RegisteredAgent) {
let dir = tempdir().unwrap();
std::fs::create_dir_all(dir.path().join("src")).unwrap();
std::fs::write(
dir.path().join("Cargo.toml"),
"[package]\nname=\"safety-agent\"\nversion=\"0.1.0\"\nedition=\"2021\"\n",
)
.unwrap();
std::fs::write(dir.path().join("src/main.rs"), temp_agent_source("before")).unwrap();
if with_final_failure_marker {
std::fs::write(
dir.path().join("build.rs"),
r#"
fn main() {
if std::path::Path::new(".mdx-rust/fail-final").exists() {
panic!("intentional final validation failure");
}
}
"#,
)
.unwrap();
std::fs::create_dir_all(dir.path().join(".mdx-rust")).unwrap();
std::fs::write(dir.path().join(".mdx-rust/fail-final"), "1").unwrap();
}
let agent = RegisteredAgent {
name: "safety-agent".to_string(),
path: dir.path().to_path_buf(),
contract: AgentContract::Process,
registered_at: "test".to_string(),
};
(dir, agent)
}
fn comment_patch() -> String {
"diff --git a/src/main.rs b/src/main.rs\n--- a/src/main.rs\n+++ b/src/main.rs\n@@ -1,5 +1,6 @@\n use std::io::BufRead;\n+// mdx safety invariant test\n \n fn main() {\n let mut input = String::new();\n std::io::stdin().lock().read_line(&mut input).unwrap();\n"
.to_string()
}
fn improved_patch() -> String {
"diff --git a/src/main.rs b/src/main.rs\n--- a/src/main.rs\n+++ b/src/main.rs\n@@ -2,6 +2,6 @@ use std::io::BufRead;\n \n fn main() {\n let mut input = String::new();\n std::io::stdin().lock().read_line(&mut input).unwrap();\n- println!(\"{{\\\"answer\\\":\\\"A stable useful answer before\\\",\\\"confidence\\\":0.70,\\\"reasoning\\\":\\\"Think step by step.\\\"}}\");\n+ println!(\"{{\\\"answer\\\":\\\"A stable useful answer after with much more useful detail\\\",\\\"confidence\\\":0.70,\\\"reasoning\\\":\\\"Think step by step.\\\"}}\");\n }\n"
.to_string()
}
fn execution_config<'a>(policy: &'a HookPolicy) -> CandidateExecutionConfig<'a> {
CandidateExecutionConfig {
hook_policy: policy,
review_before_apply: false,
quiet: true,
candidate_timeout: Duration::from_secs(30),
}
}
#[tokio::test]
async fn deny_hook_cannot_accept_or_validate() {
let (_dir, agent) = write_temp_agent(false);
let policy = HookPolicy {
max_patch_bytes: 1,
require_positive_delta: true,
};
let edit = ProposedEdit {
file: agent.path.join("src/main.rs"),
description: "too large".to_string(),
patch: comment_patch(),
};
let outcome = execute_candidate_edit(CandidateExecutionContext {
agent: &agent,
config: execution_config(&policy),
iteration: 0,
candidate_index: 0,
edit: &edit,
test_inputs: &[serde_json::json!({"query":"hi"})],
holdout_inputs: &[],
baseline_score: 0.0,
scorer: mechanical_score,
})
.await;
assert_eq!(outcome.validated, 0);
assert_eq!(outcome.landed, 0);
assert_eq!(outcome.accepted, 0);
assert!(outcome
.hook_decisions
.iter()
.any(|decision| decision.denied()));
}
#[tokio::test]
async fn net_negative_candidate_is_rejected_before_landing() {
let (_dir, agent) = write_temp_agent(false);
let before = std::fs::read_to_string(agent.path.join("src/main.rs")).unwrap();
let policy = HookPolicy::default();
let edit = ProposedEdit {
file: agent.path.join("src/main.rs"),
description: "comment only".to_string(),
patch: comment_patch(),
};
let outcome = execute_candidate_edit(CandidateExecutionContext {
agent: &agent,
config: execution_config(&policy),
iteration: 0,
candidate_index: 0,
edit: &edit,
test_inputs: &[serde_json::json!({"query":"hi"})],
holdout_inputs: &[],
baseline_score: 0.95,
scorer: mechanical_score,
})
.await;
let after = std::fs::read_to_string(agent.path.join("src/main.rs")).unwrap();
assert!(
outcome.note.is_empty() || !outcome.note.contains("validation rejected"),
"{}",
outcome.note
);
assert_eq!(outcome.validated, 1, "{}", outcome.note);
assert_eq!(outcome.landed, 0);
assert_eq!(outcome.accepted, 0);
assert_eq!(before, after);
}
#[tokio::test]
async fn final_validation_failure_rolls_back_and_does_not_accept() {
let (_dir, agent) = write_temp_agent(true);
let before = std::fs::read_to_string(agent.path.join("src/main.rs")).unwrap();
let policy = HookPolicy::default();
let edit = ProposedEdit {
file: agent.path.join("src/main.rs"),
description: "improve answer".to_string(),
patch: improved_patch(),
};
let outcome = execute_candidate_edit(CandidateExecutionContext {
agent: &agent,
config: execution_config(&policy),
iteration: 0,
candidate_index: 0,
edit: &edit,
test_inputs: &[serde_json::json!({"query":"hi"})],
holdout_inputs: &[],
baseline_score: 0.40,
scorer: mechanical_score,
})
.await;
let after = std::fs::read_to_string(agent.path.join("src/main.rs")).unwrap();
assert!(
outcome.note.is_empty() || !outcome.note.contains("validation rejected"),
"{}",
outcome.note
);
assert_eq!(outcome.validated, 1, "{}", outcome.note);
assert_eq!(outcome.landed, 0);
assert_eq!(outcome.accepted, 0);
assert_eq!(before, after);
assert!(outcome.note.contains("rolled back"));
}
#[tokio::test]
async fn patch_scope_mismatch_is_rejected_before_validation() {
let (_dir, agent) = write_temp_agent(false);
let policy = HookPolicy::default();
let before = std::fs::read_to_string(agent.path.join("src/main.rs")).unwrap();
let edit = ProposedEdit {
file: agent.path.join("src/main.rs"),
description: "bad multi-file patch".to_string(),
patch: "diff --git a/src/lib.rs b/src/lib.rs\n--- a/src/lib.rs\n+++ b/src/lib.rs\n@@ -1,1 +1,1 @@\n-a\n+b\n".to_string(),
};
let outcome = execute_candidate_edit(CandidateExecutionContext {
agent: &agent,
config: execution_config(&policy),
iteration: 0,
candidate_index: 0,
edit: &edit,
test_inputs: &[serde_json::json!({"query":"hi"})],
holdout_inputs: &[],
baseline_score: 0.40,
scorer: mechanical_score,
})
.await;
assert_eq!(outcome.validated, 0);
assert_eq!(outcome.landed, 0);
assert_eq!(outcome.accepted, 0);
assert!(outcome.note.contains("edit scope rejected"));
assert_eq!(
std::fs::read_to_string(agent.path.join("src/main.rs")).unwrap(),
before
);
}
#[tokio::test]
async fn diff_git_scope_mismatch_is_rejected_before_validation() {
let (_dir, agent) = write_temp_agent(false);
let policy = HookPolicy::default();
let edit = ProposedEdit {
file: agent.path.join("src/main.rs"),
description: "bad diff header".to_string(),
patch: "diff --git a/src/main.rs b/src/lib.rs\n--- a/src/main.rs\n+++ b/src/lib.rs\n@@ -1,1 +1,1 @@\n-a\n+b\n".to_string(),
};
let outcome = execute_candidate_edit(CandidateExecutionContext {
agent: &agent,
config: execution_config(&policy),
iteration: 0,
candidate_index: 0,
edit: &edit,
test_inputs: &[serde_json::json!({"query":"hi"})],
holdout_inputs: &[],
baseline_score: 0.40,
scorer: mechanical_score,
})
.await;
assert_eq!(outcome.validated, 0);
assert_eq!(outcome.landed, 0);
assert_eq!(outcome.accepted, 0);
assert!(outcome.note.contains("edit scope rejected"));
}
#[tokio::test]
async fn rename_scope_mismatch_is_rejected_before_validation() {
let (_dir, agent) = write_temp_agent(false);
let policy = HookPolicy::default();
let edit = ProposedEdit {
file: agent.path.join("src/main.rs"),
description: "bad rename".to_string(),
patch: "diff --git a/src/main.rs b/src/lib.rs\nsimilarity index 100%\nrename from src/main.rs\nrename to src/lib.rs\n".to_string(),
};
let outcome = execute_candidate_edit(CandidateExecutionContext {
agent: &agent,
config: execution_config(&policy),
iteration: 0,
candidate_index: 0,
edit: &edit,
test_inputs: &[serde_json::json!({"query":"hi"})],
holdout_inputs: &[],
baseline_score: 0.40,
scorer: mechanical_score,
})
.await;
assert_eq!(outcome.validated, 0);
assert_eq!(outcome.landed, 0);
assert_eq!(outcome.accepted, 0);
assert!(outcome.note.contains("edit scope rejected"));
}
#[tokio::test]
async fn exhausted_candidate_timeout_stops_before_validation() {
let (_dir, agent) = write_temp_agent(false);
let policy = HookPolicy::default();
let edit = ProposedEdit {
file: agent.path.join("src/main.rs"),
description: "comment only".to_string(),
patch: comment_patch(),
};
let config = CandidateExecutionConfig {
hook_policy: &policy,
review_before_apply: false,
quiet: true,
candidate_timeout: Duration::from_secs(0),
};
let outcome = execute_candidate_edit(CandidateExecutionContext {
agent: &agent,
config,
iteration: 0,
candidate_index: 0,
edit: &edit,
test_inputs: &[serde_json::json!({"query":"hi"})],
holdout_inputs: &[],
baseline_score: 0.40,
scorer: mechanical_score,
})
.await;
assert!(outcome.timed_out);
assert_eq!(outcome.validated, 0);
assert_eq!(outcome.landed, 0);
assert_eq!(outcome.accepted, 0);
}
}