use crate::registry::RegisteredAgent;
use crate::runner::AgentRunResult;
use crate::safety_pipeline::{
execute_candidate_edit, CandidateExecutionConfig, CandidateExecutionContext,
};
use crate::{
diagnose_run, split_dataset, EvaluationDataset, ExperimentLedger, FailureKind, HookDecision,
HookPolicy, OptimizationBudget, PromptVariantRecord, ScorerMetadata, TraceDiagnosis,
};
use mdx_rust_analysis::editing::ProposedEdit;
use mdx_rust_analysis::editing::ValidationCommandRecord;
use mdx_rust_analysis::AgentBundle;
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
use std::path::{Path, PathBuf};
use std::time::Duration;
fn generate_preamble_patch(file_path: &Path, source: &str, old: &str, new: &str) -> String {
let diff_path = file_path.to_string_lossy();
if !source.contains(old) {
return format!(
"diff --git a/{diff_path} b/{diff_path}\n--- a/{diff_path}\n+++ b/{diff_path}\n@@ -1,1 +1,1 @@\n-{old}\n+{new}\n"
);
}
let lines: Vec<&str> = source.lines().collect();
let mut patch_lines = Vec::new();
patch_lines.push(format!("diff --git a/{diff_path} b/{diff_path}"));
patch_lines.push(format!("--- a/{diff_path}"));
patch_lines.push(format!("+++ b/{diff_path}"));
let mut hunk_start = 0usize;
let mut old_line_idx = None;
for (i, line) in lines.iter().enumerate() {
if line.contains(old) {
old_line_idx = Some(i);
hunk_start = i.saturating_sub(3);
break;
}
}
if let Some(idx) = old_line_idx {
let context_before = &lines[hunk_start..idx];
let context_after = if idx + 1 < lines.len() {
&lines[idx + 1..(idx + 1 + 3).min(lines.len())]
} else {
&[][..]
};
let new_line = lines[idx].replace(old, new);
let hunk_header = format!(
"@@ -{},{} +{},{} @@",
hunk_start + 1,
context_before.len() + 1 + context_after.len(),
hunk_start + 1,
context_before.len() + 1 + context_after.len()
);
patch_lines.push(hunk_header);
for l in context_before {
patch_lines.push(format!(" {}", l));
}
patch_lines.push(format!("-{}", lines[idx]));
patch_lines.push(format!("+{}", new_line));
for l in context_after {
patch_lines.push(format!(" {}", l));
}
} else {
patch_lines.push("@@ -1,1 +1,1 @@".to_string());
patch_lines.push(format!("-{}", old));
patch_lines.push(format!("+{}", new));
}
patch_lines.join("\n")
}
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
pub struct OptimizeConfig {
pub max_iterations: u32,
pub candidates_per_iteration: u32,
pub use_llm_judge: bool,
#[serde(default)]
pub budget: OptimizationBudget,
#[serde(default)]
pub hook_policy: HookPolicy,
#[serde(default)]
pub review_before_apply: bool,
#[serde(default)]
pub quiet: bool,
#[serde(skip, default = "default_candidate_timeout")]
#[schemars(skip)]
pub candidate_timeout: Duration,
}
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
pub struct OptimizationRun {
pub iteration: u32,
pub scores: Vec<f32>,
pub validated_changes: u32,
pub landed_changes: u32,
pub accepted_changes: u32,
pub notes: String,
pub candidates: Vec<Candidate>,
#[serde(default)]
pub diff: Option<String>,
#[serde(default)]
pub policy_hash: Option<String>,
#[serde(default)]
pub dataset_version: Option<String>,
#[serde(default)]
pub dataset_hash: Option<String>,
#[serde(default)]
pub baseline_score: Option<f32>,
#[serde(default)]
pub patched_score: Option<f32>,
#[serde(default)]
pub score_delta: Option<f32>,
#[serde(default)]
pub git_sha_before: Option<String>,
#[serde(default)]
pub git_sha_after: Option<String>,
#[serde(default)]
pub diff_hash: Option<String>,
#[serde(default)]
pub working_tree_dirty_after: Option<bool>,
#[serde(default)]
pub scorer: Option<String>,
#[serde(default)]
pub validation_commands: Option<Vec<String>>,
#[serde(default)]
pub validation_command_records: Vec<ValidationCommandRecord>,
#[serde(default)]
pub final_validation_command_records: Vec<ValidationCommandRecord>,
#[serde(default)]
pub trace_diagnosis: Vec<TraceDiagnosis>,
#[serde(default)]
pub hook_decisions: Vec<HookDecision>,
#[serde(default)]
pub ledger: Option<ExperimentLedger>,
#[serde(default)]
pub holdout_score: Option<f32>,
#[serde(default)]
pub budget: Option<OptimizationBudget>,
#[serde(default)]
pub policy_path: Option<String>,
#[serde(default)]
pub model: Option<ModelProvenance>,
#[serde(default)]
pub rollback_succeeded: Option<bool>,
#[serde(default)]
pub rollback_error: Option<String>,
#[serde(default)]
pub candidate_timed_out: bool,
#[serde(default)]
pub audit_packet: Option<AuditPacket>,
}
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
pub struct ModelProvenance {
pub role: String,
pub provider: String,
pub model: String,
pub used: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
pub struct AuditPacket {
pub schema_version: String,
pub agent_name: String,
pub iteration: u32,
pub edit_scope_contract: String,
pub accepted_edit: AcceptedEditSummary,
pub provenance: AuditProvenance,
pub scores: ScoreProvenance,
pub hook_decisions: Vec<HookDecision>,
pub validation_command_records: Vec<ValidationCommandRecord>,
pub final_validation_command_records: Vec<ValidationCommandRecord>,
pub rollback_succeeded: Option<bool>,
pub rollback_error: Option<String>,
pub candidate_timed_out: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
pub struct AcceptedEditSummary {
pub description: String,
pub changed_file: String,
pub diff_hash: String,
pub diff: String,
}
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
pub struct AuditProvenance {
pub git_sha_before: Option<String>,
pub git_sha_after: Option<String>,
pub working_tree_dirty_after: Option<bool>,
pub policy_path: Option<String>,
pub policy_hash: Option<String>,
pub dataset_version: String,
pub dataset_hash: String,
pub scorer_id: String,
pub scorer_version: String,
pub model: ModelProvenance,
}
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
pub struct ScoreProvenance {
pub baseline_score: f32,
pub patched_score: f32,
pub score_delta: f32,
pub holdout_score: Option<f32>,
}
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
pub struct Candidate {
pub focus: String, pub description: String,
pub expected_improvement: String,
#[serde(default)]
pub strategy: Option<EditStrategy>,
}
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, PartialEq, Eq)]
pub enum EditStrategy {
SystemPrompt,
ToolDescription,
FallbackLogic,
OutputSchema,
ModelConfig,
}
fn default_candidate_timeout() -> Duration {
Duration::from_secs(300)
}
pub async fn run_optimization(
agent: &RegisteredAgent,
config: &OptimizeConfig,
) -> anyhow::Result<Vec<OptimizationRun>> {
let mut runs = vec![];
let dataset = EvaluationDataset::synthetic_v1();
let split = split_dataset(&dataset, config.budget);
let mut ledger = ExperimentLedger::new(config.budget, &dataset, &split);
let dataset_hash = dataset.content_hash();
let scorer = ScorerMetadata::mechanical_v1();
let test_inputs: Vec<serde_json::Value> = split
.train
.iter()
.map(|sample| sample.input.clone())
.collect();
let holdout_inputs: Vec<serde_json::Value> = split
.holdout
.iter()
.map(|sample| sample.input.clone())
.collect();
let baseline_score: f32 = {
let mut total = 0.0f32;
for input in &test_inputs {
if let Ok(res) = crate::runner::run_agent(agent, input.clone()).await {
total += mechanical_score(&res);
}
}
if test_inputs.is_empty() {
0.0
} else {
total / test_inputs.len() as f32
}
};
let git_sha_before: Option<String> = std::process::Command::new("git")
.current_dir(&agent.path)
.args(["rev-parse", "--short", "HEAD"])
.output()
.ok()
.and_then(|o| {
if o.status.success() {
Some(String::from_utf8_lossy(&o.stdout).trim().to_string())
} else {
None
}
});
let policy_info = load_policy_info(&agent.name);
for iteration in 0..config.max_iterations {
let mut scores_this_iter = vec![];
let mut accepted_patched: Option<f32> = None;
let mut accepted_delta: Option<f32> = None;
let mut validated = 0;
let mut landed = 0;
let mut trace_diagnoses = Vec::new();
let mut hook_decisions = Vec::new();
let mut accepted_holdout_score = None;
let mut accepted_validation_commands = Vec::new();
let mut accepted_final_validation_commands = Vec::new();
let mut accepted_rollback_succeeded = None;
let mut accepted_rollback_error = None;
let mut accepted_edit_description: Option<String> = None;
let mut accepted_edit_file: Option<String> = None;
let mut any_candidate_timed_out = false;
for input in &test_inputs {
let run_result = crate::runner::run_agent(agent, input.clone()).await?;
trace_diagnoses.push(diagnose_run(&run_result));
let score = mechanical_score(&run_result);
scores_this_iter.push(score);
}
let avg_score: f32 = if scores_this_iter.is_empty() {
0.0
} else {
scores_this_iter.iter().sum::<f32>() / scores_this_iter.len() as f32
};
let rich_bundle = mdx_rust_analysis::analyze_agent(&agent.path, None).ok();
let file_count = rich_bundle
.as_ref()
.map(|b| b.scope.optimizable_paths.len())
.unwrap_or(0);
let bundle_summary = if let Some(ref b) = rich_bundle {
let mut s = format!(
"{} source files, Rig agent = {}",
file_count, b.is_rig_agent
);
if !b.preambles.is_empty() {
s.push_str(&format!(
", current preambles: {:?}",
b.preambles.iter().map(|p| &p.text).collect::<Vec<_>>()
));
}
if !b.tools.is_empty() {
s.push_str(&format!(
", tools: {:?}",
b.tools.iter().map(|t| &t.name).collect::<Vec<_>>()
));
}
s
} else {
format!("{} source files (limited analysis)", file_count)
};
let llm = crate::llm::LlmClient::default();
let diag_req = crate::llm::DiagnosisRequest {
policy: "Improve the agent so it gives high-quality, reasoned answers instead of echoing. Prefer explicit step-by-step reasoning in the system prompt.".to_string(),
bundle_summary,
traces_summary: summarize_trace_diagnoses(&trace_diagnoses),
scores: scores_this_iter.clone(),
};
let diagnosis_result = llm.diagnose(diag_req).await;
let diagnosis_model_used = diagnosis_result.is_ok();
let diagnosis = diagnosis_result.ok();
let mut candidates = vec![];
let mut accepted = 0;
let mut notes = format!(
"Avg score this iter: {:.2} ({} files in bundle)",
avg_score, file_count
);
let mut accepted_diff: Option<String> = None;
if let Some(d) = diagnosis {
notes.push_str(&format!(" → LLM: {}", d.summary));
for c in d.candidates {
let strategy = strategy_for_focus(&c.focus);
candidates.push(Candidate {
focus: c.focus,
description: c.description,
expected_improvement: c.expected_improvement,
strategy: Some(strategy),
});
}
} else {
candidates = fallback_candidates_from_trace(&trace_diagnoses);
}
if !candidates.is_empty() {
let candidate_limit = config
.budget
.candidate_limit(config.candidates_per_iteration);
for (candidate_index, candidate) in candidates.iter().take(candidate_limit).enumerate()
{
if accepted > 0 {
break;
}
let Some(edit) =
build_edit_for_candidate(&agent.path, rich_bundle.as_ref(), candidate)?
else {
notes.push_str(&format!(
" (candidate {} skipped: no safe edit plan for {:?})",
candidate.focus, candidate.strategy
));
continue;
};
notes.push_str(&format!(
" → Candidate {}: {} ({:?})",
candidate_index + 1,
candidate.focus,
candidate.strategy
));
ledger.record_variant(PromptVariantRecord::from_patch(
format!("{:?}", candidate.strategy),
edit.file.display().to_string(),
edit.description.clone(),
&edit.patch,
));
let outcome = execute_candidate_edit(CandidateExecutionContext {
agent,
config: CandidateExecutionConfig {
hook_policy: &config.hook_policy,
review_before_apply: config.review_before_apply,
quiet: config.quiet,
candidate_timeout: config.candidate_timeout,
},
iteration,
candidate_index,
edit: &edit,
test_inputs: &test_inputs,
holdout_inputs: &holdout_inputs,
baseline_score,
scorer: mechanical_score,
})
.await;
validated += outcome.validated;
landed += outcome.landed;
any_candidate_timed_out |= outcome.timed_out;
hook_decisions.extend(outcome.hook_decisions);
if outcome.accepted > 0 {
accepted = outcome.accepted;
accepted_diff = outcome.accepted_diff;
accepted_patched = outcome.patched_score;
accepted_delta = outcome.delta;
accepted_holdout_score = outcome.holdout_score;
accepted_validation_commands = outcome.validation_commands;
accepted_final_validation_commands = outcome.final_validation_commands;
accepted_rollback_succeeded = outcome.rollback_succeeded;
accepted_rollback_error = outcome.rollback_error;
accepted_edit_description = Some(edit.description.clone());
accepted_edit_file = Some(edit.file.display().to_string());
}
notes.push_str(&outcome.note);
}
} else {
accepted = 0; notes.push_str(" → No new candidates — current behavior is good (no change applied)");
}
let (run_baseline, run_patched, run_delta) = if accepted > 0 {
(Some(baseline_score), accepted_patched, accepted_delta)
} else {
(None, None, None)
};
let (prov_before, prov_after, prov_diff_hash, prov_dirty, prov_scorer, prov_cmds) =
if accepted > 0 {
let after = std::process::Command::new("git")
.current_dir(&agent.path)
.args(["rev-parse", "--short", "HEAD"])
.output()
.ok()
.and_then(|o| {
if o.status.success() {
Some(String::from_utf8_lossy(&o.stdout).trim().to_string())
} else {
None
}
});
let dirty_after = std::process::Command::new("git")
.current_dir(&agent.path)
.args(["status", "--porcelain"])
.output()
.ok()
.filter(|output| output.status.success())
.map(|output| !output.stdout.is_empty());
(
git_sha_before.clone(),
after,
accepted_diff
.as_ref()
.map(|diff| stable_hash_hex(diff.as_bytes())),
dirty_after,
Some(scorer.label()),
Some(vec![
"cargo check (isolated)".to_string(),
"cargo clippy -D warnings (isolated)".to_string(),
"final validate_build after land (real tree)".to_string(),
]),
)
} else {
(None, None, None, None, None, None)
};
let model_provenance = llm.provenance(diagnosis_model_used);
let audit_packet = if accepted > 0 {
build_audit_packet(AuditPacketInput {
agent_name: &agent.name,
iteration,
edit_description: accepted_edit_description.as_deref(),
edit_file: accepted_edit_file.as_deref(),
diff: accepted_diff.as_deref(),
diff_hash: prov_diff_hash.as_deref(),
git_sha_before: prov_before.clone(),
git_sha_after: prov_after.clone(),
working_tree_dirty_after: prov_dirty,
policy_path: policy_info
.as_ref()
.map(|policy| policy.path.display().to_string()),
policy_hash: policy_info.as_ref().map(|policy| policy.hash.clone()),
dataset_version: &dataset.version,
dataset_hash: &dataset_hash,
scorer: &scorer,
model: model_provenance.clone(),
baseline_score,
patched_score: accepted_patched,
score_delta: accepted_delta,
holdout_score: accepted_holdout_score,
hook_decisions: hook_decisions.clone(),
validation_command_records: accepted_validation_commands.clone(),
final_validation_command_records: accepted_final_validation_commands.clone(),
rollback_succeeded: accepted_rollback_succeeded,
rollback_error: accepted_rollback_error.clone(),
candidate_timed_out: any_candidate_timed_out,
})
} else {
None
};
runs.push(OptimizationRun {
iteration,
scores: scores_this_iter,
validated_changes: validated,
landed_changes: landed,
accepted_changes: accepted,
notes,
candidates,
diff: accepted_diff,
policy_hash: policy_info.as_ref().map(|policy| policy.hash.clone()),
dataset_version: Some(dataset.version.clone()),
dataset_hash: Some(dataset_hash.clone()),
baseline_score: run_baseline,
patched_score: run_patched,
score_delta: run_delta,
git_sha_before: prov_before,
git_sha_after: prov_after,
diff_hash: prov_diff_hash,
working_tree_dirty_after: prov_dirty,
scorer: prov_scorer,
validation_commands: prov_cmds,
validation_command_records: accepted_validation_commands,
final_validation_command_records: accepted_final_validation_commands,
trace_diagnosis: trace_diagnoses,
hook_decisions,
ledger: Some(ledger.clone()),
holdout_score: accepted_holdout_score,
budget: Some(config.budget),
policy_path: policy_info
.as_ref()
.map(|policy| policy.path.display().to_string()),
model: Some(model_provenance),
rollback_succeeded: accepted_rollback_succeeded,
rollback_error: accepted_rollback_error,
candidate_timed_out: any_candidate_timed_out,
audit_packet,
});
if accepted > 0 && iteration > 0 {
}
}
let experiment_dir = std::env::current_dir()?
.join(".mdx-rust")
.join("agents")
.join(&agent.name)
.join("experiments");
std::fs::create_dir_all(&experiment_dir).ok();
let timestamp = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.map(|d| d.as_secs())
.unwrap_or(0);
let experiment_file = experiment_dir.join(format!("run-{}.json", timestamp));
if let Ok(content) = serde_json::to_string_pretty(&runs) {
let _ = std::fs::write(experiment_file, content);
}
for run in &runs {
if let Some(packet) = &run.audit_packet {
let audit_file = experiment_dir.join(format!(
"audit-packet-{}-iteration-{}.json",
timestamp, run.iteration
));
if let Ok(content) = serde_json::to_string_pretty(packet) {
let _ = std::fs::write(audit_file, content);
}
}
}
if runs.iter().any(|r| r.accepted_changes > 0) {
let git_sha = std::process::Command::new("git")
.args(["rev-parse", "--short", "HEAD"])
.output()
.ok()
.and_then(|o| String::from_utf8(o.stdout).ok())
.map(|s| s.trim().to_string())
.unwrap_or_else(|| "unknown".to_string());
let mut report = format!(
"# Optimization Report for '{}'\n\nTimestamp: {}\nGit SHA: {}\n\n## Summary\n\n",
agent.name, timestamp, git_sha
);
for run in &runs {
if run.accepted_changes > 0 {
report.push_str(&format!(
"- Iteration {}: Accepted {} change(s)\n Notes: {}\n",
run.iteration, run.accepted_changes, run.notes
));
if let Some(d) = &run.diff {
report.push_str(&format!("\n```diff\n{}\n```\n", d));
} else {
report.push_str(" (Change persisted to src/main.rs)\n");
}
if let Some(h) = &run.policy_hash {
report.push_str(&format!(" Policy hash: {}\n", h));
}
if let Some(v) = &run.dataset_version {
report.push_str(&format!(" Dataset version: {}\n", v));
}
if let Some(path) = &run.policy_path {
report.push_str(&format!(" Policy path: {}\n", path));
}
if let Some(model) = &run.model {
report.push_str(&format!(
" Diagnosis model: {}:{} (used={})\n",
model.provider, model.model, model.used
));
}
if !run.validation_command_records.is_empty() {
report.push_str(" Isolated validation commands:\n");
for command in &run.validation_command_records {
report.push_str(&format!(
" - {} | success={} | timeout={} | status={:?} | duration_ms={}\n",
command.command,
command.success,
command.timed_out,
command.status_code,
command.duration_ms
));
}
}
if !run.final_validation_command_records.is_empty() {
report.push_str(" Final validation commands:\n");
for command in &run.final_validation_command_records {
report.push_str(&format!(
" - {} | success={} | timeout={} | status={:?} | duration_ms={}\n",
command.command,
command.success,
command.timed_out,
command.status_code,
command.duration_ms
));
}
}
}
}
report.push_str("\n## Candidates Considered\n\n");
for run in &runs {
for (i, c) in run.candidates.iter().enumerate() {
report.push_str(&format!(
"- [{}] {}: {}\n Expected: {}\n\n",
i + 1,
c.focus,
c.description,
c.expected_improvement
));
}
}
let _ = std::fs::write(
experiment_dir.join(format!("report-{}.md", timestamp)),
report,
);
}
if runs.iter().any(|r| r.accepted_changes > 0) {
let mut final_scores = vec![];
for input in &test_inputs {
if let Ok(res) = crate::runner::run_agent(agent, input.clone()).await {
final_scores.push(mechanical_score(&res));
}
}
if !final_scores.is_empty() {
let final_avg = final_scores.iter().sum::<f32>() / final_scores.len() as f32;
if !config.quiet {
println!(
" Final re-evaluation after accepted changes: {:.2}",
final_avg
);
}
}
}
Ok(runs)
}
#[derive(Debug, Clone)]
struct PolicyInfo {
path: PathBuf,
hash: String,
}
struct AuditPacketInput<'a> {
agent_name: &'a str,
iteration: u32,
edit_description: Option<&'a str>,
edit_file: Option<&'a str>,
diff: Option<&'a str>,
diff_hash: Option<&'a str>,
git_sha_before: Option<String>,
git_sha_after: Option<String>,
working_tree_dirty_after: Option<bool>,
policy_path: Option<String>,
policy_hash: Option<String>,
dataset_version: &'a str,
dataset_hash: &'a str,
scorer: &'a ScorerMetadata,
model: ModelProvenance,
baseline_score: f32,
patched_score: Option<f32>,
score_delta: Option<f32>,
holdout_score: Option<f32>,
hook_decisions: Vec<HookDecision>,
validation_command_records: Vec<ValidationCommandRecord>,
final_validation_command_records: Vec<ValidationCommandRecord>,
rollback_succeeded: Option<bool>,
rollback_error: Option<String>,
candidate_timed_out: bool,
}
fn build_audit_packet(input: AuditPacketInput<'_>) -> Option<AuditPacket> {
let diff = input.diff?.to_string();
let diff_hash = input
.diff_hash
.map(str::to_string)
.unwrap_or_else(|| stable_hash_hex(diff.as_bytes()));
let patched_score = input.patched_score?;
let score_delta = input.score_delta?;
Some(AuditPacket {
schema_version: "0.2".to_string(),
agent_name: input.agent_name.to_string(),
iteration: input.iteration,
edit_scope_contract: "single-file-v0.2".to_string(),
accepted_edit: AcceptedEditSummary {
description: input
.edit_description
.unwrap_or("accepted optimizer edit")
.to_string(),
changed_file: input.edit_file.unwrap_or("unknown").to_string(),
diff_hash,
diff,
},
provenance: AuditProvenance {
git_sha_before: input.git_sha_before,
git_sha_after: input.git_sha_after,
working_tree_dirty_after: input.working_tree_dirty_after,
policy_path: input.policy_path,
policy_hash: input.policy_hash,
dataset_version: input.dataset_version.to_string(),
dataset_hash: input.dataset_hash.to_string(),
scorer_id: input.scorer.id.clone(),
scorer_version: input.scorer.version.clone(),
model: input.model,
},
scores: ScoreProvenance {
baseline_score: input.baseline_score,
patched_score,
score_delta,
holdout_score: input.holdout_score,
},
hook_decisions: input.hook_decisions,
validation_command_records: input.validation_command_records,
final_validation_command_records: input.final_validation_command_records,
rollback_succeeded: input.rollback_succeeded,
rollback_error: input.rollback_error,
candidate_timed_out: input.candidate_timed_out,
})
}
fn load_policy_info(agent_name: &str) -> Option<PolicyInfo> {
let cwd = std::env::current_dir().ok()?;
let candidates = [
cwd.join(".mdx-rust")
.join("agents")
.join(agent_name)
.join("policies.md"),
cwd.join(".mdx-rust").join("policies.md"),
];
candidates
.iter()
.find_map(|path| std::fs::read(path).ok().map(|content| (path, content)))
.map(|(path, content)| PolicyInfo {
path: path.clone(),
hash: stable_hash_hex(&content),
})
}
fn stable_hash_hex(bytes: &[u8]) -> String {
crate::eval::stable_hash_hex(bytes)
}
fn strategy_for_focus(focus: &str) -> EditStrategy {
let normalized = focus.to_lowercase();
if normalized.contains("tool") {
EditStrategy::ToolDescription
} else if normalized.contains("fallback") || normalized.contains("logic") {
EditStrategy::FallbackLogic
} else if normalized.contains("schema") || normalized.contains("output") {
EditStrategy::OutputSchema
} else if normalized.contains("model") || normalized.contains("temperature") {
EditStrategy::ModelConfig
} else {
EditStrategy::SystemPrompt
}
}
fn fallback_candidates_from_trace(diagnoses: &[TraceDiagnosis]) -> Vec<Candidate> {
let mut candidates = Vec::new();
if diagnoses.iter().any(|diagnosis| {
diagnosis
.signals
.iter()
.any(|signal| signal.kind == FailureKind::EchoFallback)
}) {
candidates.push(Candidate {
focus: "fallback_logic".to_string(),
description: "Prevent echo fallback and require a useful best-effort answer."
.to_string(),
expected_improvement: "Reduce low-value echo responses.".to_string(),
strategy: Some(EditStrategy::FallbackLogic),
});
}
if diagnoses.iter().any(|diagnosis| {
diagnosis
.signals
.iter()
.any(|signal| signal.kind == FailureKind::InvalidJson)
}) {
candidates.push(Candidate {
focus: "output_schema".to_string(),
description: "Make the output contract explicit for answer, reasoning, and confidence."
.to_string(),
expected_improvement: "Improve parseability for agent callers.".to_string(),
strategy: Some(EditStrategy::OutputSchema),
});
}
if diagnoses.iter().any(|diagnosis| {
diagnosis.signals.iter().any(|signal| {
matches!(
signal.kind,
FailureKind::MissingReasoning | FailureKind::LowConfidence
)
})
}) {
candidates.push(Candidate {
focus: "system_prompt".to_string(),
description: "Strengthen the system prompt with explicit reasoning instructions."
.to_string(),
expected_improvement: "Increase reasoning quality and confidence.".to_string(),
strategy: Some(EditStrategy::SystemPrompt),
});
}
if candidates.is_empty() {
candidates.push(Candidate {
focus: "system_prompt".to_string(),
description: "Strengthen the system prompt with explicit reasoning instructions."
.to_string(),
expected_improvement: "Improve answer quality.".to_string(),
strategy: Some(EditStrategy::SystemPrompt),
});
}
candidates
}
fn summarize_trace_diagnoses(diagnoses: &[TraceDiagnosis]) -> String {
let mut summaries = Vec::new();
for diagnosis in diagnoses {
if diagnosis.has_failures() {
summaries.push(diagnosis.compact_summary());
}
}
if summaries.is_empty() {
"No obvious trace failures detected.".to_string()
} else {
format!("Trace failures: {}", summaries.join(" | "))
}
}
fn build_edit_for_candidate(
agent_root: &Path,
bundle: Option<&AgentBundle>,
candidate: &Candidate,
) -> anyhow::Result<Option<ProposedEdit>> {
let strategy = candidate
.strategy
.clone()
.unwrap_or_else(|| strategy_for_focus(&candidate.focus));
let Some((target_file, old_preamble)) = select_preamble_target(agent_root, bundle) else {
if strategy == EditStrategy::FallbackLogic {
return build_echo_fallback_edit(agent_root, bundle, &candidate.description);
}
return Ok(None);
};
if strategy == EditStrategy::FallbackLogic {
if let Some(edit) = build_echo_fallback_edit(agent_root, bundle, &candidate.description)? {
return Ok(Some(edit));
}
}
let Some(new_preamble) = evolved_preamble_for_strategy(&old_preamble, &strategy, bundle) else {
return Ok(None);
};
if normalize_prompt(&new_preamble) == normalize_prompt(&old_preamble) {
return Ok(None);
}
let content = std::fs::read_to_string(&target_file)?;
let relative_target = target_file
.strip_prefix(agent_root)
.unwrap_or(&target_file)
.to_path_buf();
let patch = generate_preamble_patch(&relative_target, &content, &old_preamble, &new_preamble);
Ok(Some(ProposedEdit {
file: target_file,
description: format!("{:?}: {}", strategy, candidate.description),
patch,
}))
}
fn build_echo_fallback_edit(
agent_root: &Path,
bundle: Option<&AgentBundle>,
description: &str,
) -> anyhow::Result<Option<ProposedEdit>> {
let mut candidates: Vec<PathBuf> = bundle
.map(|bundle| {
bundle
.scope
.optimizable_paths
.iter()
.filter(|path| path.extension().is_some_and(|extension| extension == "rs"))
.cloned()
.collect()
})
.unwrap_or_default();
if candidates.is_empty() {
candidates.push(agent_root.join("src/main.rs"));
}
for target_file in candidates {
let Ok(content) = std::fs::read_to_string(&target_file) else {
continue;
};
let replacements = [
(
"Echo: {}",
"Best-effort answer after reasoning: {}",
"replace echo fallback format string",
),
(
"Echo: ",
"Best-effort answer after reasoning: ",
"replace echo fallback prefix",
),
];
for (old, new, label) in replacements {
if !content.contains(old) {
continue;
}
let relative_target = target_file
.strip_prefix(agent_root)
.unwrap_or(&target_file)
.to_path_buf();
let patch = generate_preamble_patch(&relative_target, &content, old, new);
return Ok(Some(ProposedEdit {
file: target_file,
description: format!("FallbackLogic: {description} ({label})"),
patch,
}));
}
}
Ok(None)
}
fn select_preamble_target(
agent_root: &Path,
bundle: Option<&AgentBundle>,
) -> Option<(PathBuf, String)> {
if let Some(prompt) = bundle.and_then(|bundle| bundle.preambles.first()) {
return Some((PathBuf::from(&prompt.file), prompt.text.clone()));
}
let target = bundle
.and_then(|bundle| {
bundle.scope.optimizable_paths.iter().find(|path| {
let name = path.file_name().unwrap_or_default().to_string_lossy();
name.ends_with(".rs") && (name == "main.rs" || name.contains("agent"))
})
})
.cloned()
.unwrap_or_else(|| agent_root.join("src/main.rs"));
let content = std::fs::read_to_string(&target).ok()?;
extract_first_preamble_literal(&content).map(|prompt| (target, prompt))
}
fn extract_first_preamble_literal(content: &str) -> Option<String> {
let marker = ".preamble(\"";
let start = content.find(marker)? + marker.len();
let rest = &content[start..];
let end = rest.find('"')?;
Some(rest[..end].to_string())
}
fn evolved_preamble_for_strategy(
old: &str,
strategy: &EditStrategy,
bundle: Option<&AgentBundle>,
) -> Option<String> {
let addition = match strategy {
EditStrategy::SystemPrompt => {
"Think step-by-step before answering. Always explain your reasoning in one sentence, then give the final answer."
}
EditStrategy::FallbackLogic => {
"Never echo the user input as the final answer. If uncertain, state assumptions, reason briefly, and provide the best useful answer."
}
EditStrategy::OutputSchema => {
"Always produce an answer, reasoning, and confidence from 0 to 1."
}
EditStrategy::ToolDescription => {
let has_tools = bundle.is_some_and(|bundle| !bundle.tools.is_empty());
if !has_tools {
return None;
}
"Before answering, decide whether available tools improve factuality or completeness, and only use them when they add real value."
}
EditStrategy::ModelConfig => return None,
};
if normalize_prompt(old).contains(&normalize_prompt(addition)) {
return Some(old.to_string());
}
let mut base = old.trim().trim_end_matches('.').to_string();
if base.is_empty() {
base = "You are a concise, helpful assistant".to_string();
}
Some(format!("{base}. {addition}"))
}
fn normalize_prompt(value: &str) -> String {
value
.split_whitespace()
.collect::<Vec<_>>()
.join(" ")
.to_lowercase()
}
pub fn mechanical_score(result: &AgentRunResult) -> f32 {
let answer = result
.output
.get("answer")
.and_then(|v| v.as_str())
.unwrap_or("");
let reasoning = result
.output
.get("reasoning")
.and_then(|v| v.as_str())
.unwrap_or("");
if answer.starts_with("Echo:") {
return 0.4;
}
let mut score = 0.75f32;
if reasoning.to_lowercase().contains("think")
|| reasoning.to_lowercase().contains("reason")
|| reasoning.to_lowercase().contains("step")
{
score += 0.12;
}
if answer.len() > 20 {
score += 0.08;
}
score.min(0.95)
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::tempdir;
#[test]
fn test_mechanical_score_echo_vs_reasoned() {
let echo = AgentRunResult {
output: serde_json::json!({"answer": "Echo: hello", "reasoning": "no key"}),
duration_ms: 10,
success: true,
error: None,
traces: vec![],
};
let good = AgentRunResult {
output: serde_json::json!({"answer": "The answer is 42 because...", "reasoning": "Think step by step: 6*7"}),
duration_ms: 120,
success: true,
error: None,
traces: vec![],
};
assert!(mechanical_score(&echo) < 0.5);
assert!(mechanical_score(&good) > 0.8);
}
#[test]
fn test_optimize_config_defaults() {
let cfg = OptimizeConfig {
max_iterations: 1,
candidates_per_iteration: 1,
use_llm_judge: false,
budget: OptimizationBudget::Medium,
hook_policy: HookPolicy::default(),
review_before_apply: false,
quiet: false,
candidate_timeout: default_candidate_timeout(),
};
assert_eq!(cfg.max_iterations, 1);
}
#[test]
fn strategy_for_focus_maps_common_candidate_names() {
assert_eq!(
strategy_for_focus("improve tool descriptions"),
EditStrategy::ToolDescription
);
assert_eq!(
strategy_for_focus("fix fallback logic"),
EditStrategy::FallbackLogic
);
assert_eq!(
strategy_for_focus("tighten output schema"),
EditStrategy::OutputSchema
);
assert_eq!(
strategy_for_focus("lower model temperature"),
EditStrategy::ModelConfig
);
assert_eq!(strategy_for_focus("reasoning"), EditStrategy::SystemPrompt);
}
#[test]
fn fallback_candidates_follow_trace_failures() {
let candidates = fallback_candidates_from_trace(&[TraceDiagnosis {
signals: vec![
crate::FailureSignal {
kind: FailureKind::EchoFallback,
severity: 2,
evidence: "Echo: hello".to_string(),
span_id: None,
},
crate::FailureSignal {
kind: FailureKind::InvalidJson,
severity: 2,
evidence: "raw stdout".to_string(),
span_id: None,
},
],
ranked_span_ids: vec![],
}]);
assert_eq!(candidates[0].strategy, Some(EditStrategy::FallbackLogic));
assert!(candidates
.iter()
.any(|candidate| candidate.strategy == Some(EditStrategy::OutputSchema)));
}
#[test]
fn build_edit_for_candidate_creates_schema_preamble_patch() {
let dir = tempdir().unwrap();
let src = dir.path().join("src");
std::fs::create_dir_all(&src).unwrap();
let main = src.join("main.rs");
std::fs::write(
&main,
r#"fn main() { let _agent = client.agent("m").preamble("You are helpful.").build(); }"#,
)
.unwrap();
let candidate = Candidate {
focus: "output_schema".to_string(),
description: "make output contract explicit".to_string(),
expected_improvement: "more parseable output".to_string(),
strategy: Some(EditStrategy::OutputSchema),
};
let edit = build_edit_for_candidate(dir.path(), None, &candidate)
.unwrap()
.expect("schema strategy should produce a prompt edit");
assert_eq!(edit.file, main);
assert!(edit.patch.contains("answer, reasoning, and confidence"));
}
#[test]
fn tool_strategy_requires_discovered_tools() {
let dir = tempdir().unwrap();
let src = dir.path().join("src");
std::fs::create_dir_all(&src).unwrap();
let main = src.join("main.rs");
std::fs::write(
&main,
r#"fn main() { let _agent = client.agent("m").preamble("You are helpful.").build(); }"#,
)
.unwrap();
let candidate = Candidate {
focus: "tool_description".to_string(),
description: "clarify tool use".to_string(),
expected_improvement: "better tool calls".to_string(),
strategy: Some(EditStrategy::ToolDescription),
};
let without_tools = build_edit_for_candidate(dir.path(), None, &candidate).unwrap();
assert!(without_tools.is_none());
let bundle = AgentBundle {
scope: mdx_rust_analysis::BundleScope {
optimizable_paths: vec![main],
read_only_paths: vec![],
},
preambles: vec![],
tools: vec![mdx_rust_analysis::ExtractedTool {
file: "src/main.rs".to_string(),
name: "search".to_string(),
description: None,
}],
is_rig_agent: true,
key_files: vec![],
};
let with_tools = build_edit_for_candidate(dir.path(), Some(&bundle), &candidate)
.unwrap()
.expect("tool strategy should produce a prompt edit when tools exist");
assert!(with_tools
.patch
.contains("available tools improve factuality"));
}
#[test]
fn fallback_logic_strategy_can_patch_echo_fallback() {
let dir = tempdir().unwrap();
let src = dir.path().join("src");
std::fs::create_dir_all(&src).unwrap();
let main = src.join("main.rs");
std::fs::write(
&main,
r#"fn main() { println!("{}", format!("Echo: {}", "hello")); }"#,
)
.unwrap();
let candidate = Candidate {
focus: "fallback_logic".to_string(),
description: "avoid echo fallback".to_string(),
expected_improvement: "more useful fallback".to_string(),
strategy: Some(EditStrategy::FallbackLogic),
};
let edit = build_edit_for_candidate(dir.path(), None, &candidate)
.unwrap()
.expect("fallback logic should patch simple echo fallback");
assert_eq!(edit.file, main);
assert!(edit.patch.contains("Best-effort answer after reasoning"));
}
#[test]
fn agent_facing_records_have_json_schemas() {
let audit_schema = schemars::schema_for!(AuditPacket);
let candidate_schema = schemars::schema_for!(Candidate);
let config_schema = schemars::schema_for!(OptimizeConfig);
assert_eq!(
audit_schema
.schema
.metadata
.as_ref()
.and_then(|m| m.title.as_deref()),
Some("AuditPacket")
);
assert_eq!(
candidate_schema
.schema
.metadata
.as_ref()
.and_then(|m| m.title.as_deref()),
Some("Candidate")
);
assert_eq!(
config_schema
.schema
.metadata
.as_ref()
.and_then(|m| m.title.as_deref()),
Some("OptimizeConfig")
);
}
}