1use crate::registry::RegisteredAgent;
17use crate::runner::AgentRunResult;
18use crate::safety_pipeline::{
19 execute_candidate_edit, CandidateExecutionConfig, CandidateExecutionContext,
20};
21use crate::{
22 diagnose_run, split_dataset, EvaluationDataset, ExperimentLedger, FailureKind, HookDecision,
23 HookPolicy, OptimizationBudget, PromptVariantRecord, ScorerMetadata, TraceDiagnosis,
24};
25use mdx_rust_analysis::editing::ProposedEdit;
26use mdx_rust_analysis::editing::ValidationCommandRecord;
27use mdx_rust_analysis::AgentBundle;
28use serde::{Deserialize, Serialize};
29use std::path::{Path, PathBuf};
30use std::time::Duration;
31
32fn generate_preamble_patch(file_path: &Path, source: &str, old: &str, new: &str) -> String {
35 let diff_path = file_path.to_string_lossy();
36
37 if !source.contains(old) {
38 return format!(
40 "diff --git a/{diff_path} b/{diff_path}\n--- a/{diff_path}\n+++ b/{diff_path}\n@@ -1,1 +1,1 @@\n-{old}\n+{new}\n"
41 );
42 }
43
44 let lines: Vec<&str> = source.lines().collect();
45 let mut patch_lines = Vec::new();
46 patch_lines.push(format!("diff --git a/{diff_path} b/{diff_path}"));
47 patch_lines.push(format!("--- a/{diff_path}"));
48 patch_lines.push(format!("+++ b/{diff_path}"));
49
50 let mut hunk_start = 0usize;
52 let mut old_line_idx = None;
53 for (i, line) in lines.iter().enumerate() {
54 if line.contains(old) {
55 old_line_idx = Some(i);
56 hunk_start = i.saturating_sub(3);
57 break;
58 }
59 }
60
61 if let Some(idx) = old_line_idx {
62 let context_before = &lines[hunk_start..idx];
63 let context_after = if idx + 1 < lines.len() {
64 &lines[idx + 1..(idx + 1 + 3).min(lines.len())]
65 } else {
66 &[][..]
67 };
68
69 let new_line = lines[idx].replace(old, new);
70
71 let hunk_header = format!(
72 "@@ -{},{} +{},{} @@",
73 hunk_start + 1,
74 context_before.len() + 1 + context_after.len(),
75 hunk_start + 1,
76 context_before.len() + 1 + context_after.len()
77 );
78 patch_lines.push(hunk_header);
79
80 for l in context_before {
81 patch_lines.push(format!(" {}", l));
82 }
83 patch_lines.push(format!("-{}", lines[idx]));
84 patch_lines.push(format!("+{}", new_line));
85 for l in context_after {
86 patch_lines.push(format!(" {}", l));
87 }
88 } else {
89 patch_lines.push("@@ -1,1 +1,1 @@".to_string());
91 patch_lines.push(format!("-{}", old));
92 patch_lines.push(format!("+{}", new));
93 }
94
95 patch_lines.join("\n")
96}
97
98#[derive(Debug, Clone, Serialize, Deserialize)]
100pub struct OptimizeConfig {
101 pub max_iterations: u32,
102 pub candidates_per_iteration: u32,
103 pub use_llm_judge: bool,
104 #[serde(default)]
105 pub budget: OptimizationBudget,
106 #[serde(default)]
107 pub hook_policy: HookPolicy,
108 #[serde(default)]
110 pub review_before_apply: bool,
111 #[serde(default)]
113 pub quiet: bool,
114 #[serde(skip, default = "default_candidate_timeout")]
115 pub candidate_timeout: Duration,
116}
117
118#[derive(Debug, Clone, Serialize, Deserialize)]
120pub struct OptimizationRun {
121 pub iteration: u32,
122 pub scores: Vec<f32>,
123 pub validated_changes: u32,
125 pub landed_changes: u32,
127 pub accepted_changes: u32,
129 pub notes: String,
130 pub candidates: Vec<Candidate>,
131 #[serde(default)]
133 pub diff: Option<String>,
134 #[serde(default)]
135 pub policy_hash: Option<String>,
136 #[serde(default)]
137 pub dataset_version: Option<String>,
138 #[serde(default)]
139 pub dataset_hash: Option<String>,
140 #[serde(default)]
142 pub baseline_score: Option<f32>,
143 #[serde(default)]
144 pub patched_score: Option<f32>,
145 #[serde(default)]
146 pub score_delta: Option<f32>,
147
148 #[serde(default)]
150 pub git_sha_before: Option<String>,
151 #[serde(default)]
152 pub git_sha_after: Option<String>,
153 #[serde(default)]
154 pub diff_hash: Option<String>,
155 #[serde(default)]
156 pub working_tree_dirty_after: Option<bool>,
157 #[serde(default)]
158 pub scorer: Option<String>,
159 #[serde(default)]
160 pub validation_commands: Option<Vec<String>>,
161 #[serde(default)]
162 pub validation_command_records: Vec<ValidationCommandRecord>,
163 #[serde(default)]
164 pub final_validation_command_records: Vec<ValidationCommandRecord>,
165 #[serde(default)]
166 pub trace_diagnosis: Vec<TraceDiagnosis>,
167 #[serde(default)]
168 pub hook_decisions: Vec<HookDecision>,
169 #[serde(default)]
170 pub ledger: Option<ExperimentLedger>,
171 #[serde(default)]
172 pub holdout_score: Option<f32>,
173 #[serde(default)]
174 pub budget: Option<OptimizationBudget>,
175 #[serde(default)]
176 pub policy_path: Option<String>,
177 #[serde(default)]
178 pub model: Option<ModelProvenance>,
179 #[serde(default)]
180 pub rollback_succeeded: Option<bool>,
181 #[serde(default)]
182 pub rollback_error: Option<String>,
183 #[serde(default)]
184 pub candidate_timed_out: bool,
185}
186
187#[derive(Debug, Clone, Serialize, Deserialize)]
188pub struct ModelProvenance {
189 pub role: String,
190 pub provider: String,
191 pub model: String,
192 pub used: bool,
193}
194
195#[derive(Debug, Clone, Serialize, Deserialize)]
197pub struct Candidate {
198 pub focus: String, pub description: String,
200 pub expected_improvement: String,
201 #[serde(default)]
202 pub strategy: Option<EditStrategy>,
203}
204
205#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
206pub enum EditStrategy {
207 SystemPrompt,
208 ToolDescription,
209 FallbackLogic,
210 OutputSchema,
211 ModelConfig,
212}
213
214fn default_candidate_timeout() -> Duration {
215 Duration::from_secs(300)
216}
217
218pub async fn run_optimization(
225 agent: &RegisteredAgent,
226 config: &OptimizeConfig,
227) -> anyhow::Result<Vec<OptimizationRun>> {
228 let mut runs = vec![];
229
230 let dataset = EvaluationDataset::synthetic_v1();
231 let split = split_dataset(&dataset, config.budget);
232 let mut ledger = ExperimentLedger::new(config.budget, &dataset, &split);
233 let dataset_hash = dataset.content_hash();
234 let scorer = ScorerMetadata::mechanical_v1();
235 let test_inputs: Vec<serde_json::Value> = split
236 .train
237 .iter()
238 .map(|sample| sample.input.clone())
239 .collect();
240 let holdout_inputs: Vec<serde_json::Value> = split
241 .holdout
242 .iter()
243 .map(|sample| sample.input.clone())
244 .collect();
245
246 let baseline_score: f32 = {
248 let mut total = 0.0f32;
249 for input in &test_inputs {
250 if let Ok(res) = crate::runner::run_agent(agent, input.clone()).await {
251 total += mechanical_score(&res);
252 }
253 }
254 if test_inputs.is_empty() {
255 0.0
256 } else {
257 total / test_inputs.len() as f32
258 }
259 };
260
261 let git_sha_before: Option<String> = std::process::Command::new("git")
263 .current_dir(&agent.path)
264 .args(["rev-parse", "--short", "HEAD"])
265 .output()
266 .ok()
267 .and_then(|o| {
268 if o.status.success() {
269 Some(String::from_utf8_lossy(&o.stdout).trim().to_string())
270 } else {
271 None
272 }
273 });
274 let policy_info = load_policy_info(&agent.name);
275
276 for iteration in 0..config.max_iterations {
277 let mut scores_this_iter = vec![];
278 let mut accepted_patched: Option<f32> = None;
279 let mut accepted_delta: Option<f32> = None;
280 let mut validated = 0;
281 let mut landed = 0;
282 let mut trace_diagnoses = Vec::new();
283 let mut hook_decisions = Vec::new();
284 let mut accepted_holdout_score = None;
285 let mut accepted_validation_commands = Vec::new();
286 let mut accepted_final_validation_commands = Vec::new();
287 let mut accepted_rollback_succeeded = None;
288 let mut accepted_rollback_error = None;
289 let mut any_candidate_timed_out = false;
290
291 for input in &test_inputs {
292 let run_result = crate::runner::run_agent(agent, input.clone()).await?;
293 trace_diagnoses.push(diagnose_run(&run_result));
294 let score = mechanical_score(&run_result);
295 scores_this_iter.push(score);
296 }
297
298 let avg_score: f32 = if scores_this_iter.is_empty() {
299 0.0
300 } else {
301 scores_this_iter.iter().sum::<f32>() / scores_this_iter.len() as f32
302 };
303
304 let rich_bundle = mdx_rust_analysis::analyze_agent(&agent.path, None).ok();
306 let file_count = rich_bundle
307 .as_ref()
308 .map(|b| b.scope.optimizable_paths.len())
309 .unwrap_or(0);
310
311 let bundle_summary = if let Some(ref b) = rich_bundle {
313 let mut s = format!(
314 "{} source files, Rig agent = {}",
315 file_count, b.is_rig_agent
316 );
317 if !b.preambles.is_empty() {
318 s.push_str(&format!(
319 ", current preambles: {:?}",
320 b.preambles.iter().map(|p| &p.text).collect::<Vec<_>>()
321 ));
322 }
323 if !b.tools.is_empty() {
324 s.push_str(&format!(
325 ", tools: {:?}",
326 b.tools.iter().map(|t| &t.name).collect::<Vec<_>>()
327 ));
328 }
329 s
330 } else {
331 format!("{} source files (limited analysis)", file_count)
332 };
333
334 let llm = crate::llm::LlmClient::default();
335 let diag_req = crate::llm::DiagnosisRequest {
336 policy: "Improve the agent so it gives high-quality, reasoned answers instead of echoing. Prefer explicit step-by-step reasoning in the system prompt.".to_string(),
337 bundle_summary,
338 traces_summary: summarize_trace_diagnoses(&trace_diagnoses),
339 scores: scores_this_iter.clone(),
340 };
341
342 let diagnosis_result = llm.diagnose(diag_req).await;
343 let diagnosis_model_used = diagnosis_result.is_ok();
344 let diagnosis = diagnosis_result.ok();
345
346 let mut candidates = vec![];
347 let mut accepted = 0;
348 let mut notes = format!(
349 "Avg score this iter: {:.2} ({} files in bundle)",
350 avg_score, file_count
351 );
352 let mut accepted_diff: Option<String> = None;
353
354 if let Some(d) = diagnosis {
355 notes.push_str(&format!(" → LLM: {}", d.summary));
356 for c in d.candidates {
357 let strategy = strategy_for_focus(&c.focus);
358 candidates.push(Candidate {
359 focus: c.focus,
360 description: c.description,
361 expected_improvement: c.expected_improvement,
362 strategy: Some(strategy),
363 });
364 }
365 } else {
366 candidates = fallback_candidates_from_trace(&trace_diagnoses);
367 }
368
369 if !candidates.is_empty() {
370 let candidate_limit = config
371 .budget
372 .candidate_limit(config.candidates_per_iteration);
373 for (candidate_index, candidate) in candidates.iter().take(candidate_limit).enumerate()
374 {
375 if accepted > 0 {
376 break;
377 }
378
379 let Some(edit) =
380 build_edit_for_candidate(&agent.path, rich_bundle.as_ref(), candidate)?
381 else {
382 notes.push_str(&format!(
383 " (candidate {} skipped: no safe edit plan for {:?})",
384 candidate.focus, candidate.strategy
385 ));
386 continue;
387 };
388
389 notes.push_str(&format!(
390 " → Candidate {}: {} ({:?})",
391 candidate_index + 1,
392 candidate.focus,
393 candidate.strategy
394 ));
395
396 ledger.record_variant(PromptVariantRecord::from_patch(
397 format!("{:?}", candidate.strategy),
398 edit.file.display().to_string(),
399 edit.description.clone(),
400 &edit.patch,
401 ));
402
403 let outcome = execute_candidate_edit(CandidateExecutionContext {
404 agent,
405 config: CandidateExecutionConfig {
406 hook_policy: &config.hook_policy,
407 review_before_apply: config.review_before_apply,
408 quiet: config.quiet,
409 candidate_timeout: config.candidate_timeout,
410 },
411 iteration,
412 candidate_index,
413 edit: &edit,
414 test_inputs: &test_inputs,
415 holdout_inputs: &holdout_inputs,
416 baseline_score,
417 scorer: mechanical_score,
418 })
419 .await;
420
421 validated += outcome.validated;
422 landed += outcome.landed;
423 any_candidate_timed_out |= outcome.timed_out;
424 hook_decisions.extend(outcome.hook_decisions);
425
426 if outcome.accepted > 0 {
427 accepted = outcome.accepted;
428 accepted_diff = outcome.accepted_diff;
429 accepted_patched = outcome.patched_score;
430 accepted_delta = outcome.delta;
431 accepted_holdout_score = outcome.holdout_score;
432 accepted_validation_commands = outcome.validation_commands;
433 accepted_final_validation_commands = outcome.final_validation_commands;
434 accepted_rollback_succeeded = outcome.rollback_succeeded;
435 accepted_rollback_error = outcome.rollback_error;
436 }
437
438 notes.push_str(&outcome.note);
439 }
440 } else {
441 accepted = 0; notes.push_str(" → No new candidates — current behavior is good (no change applied)");
443 }
444
445 let (run_baseline, run_patched, run_delta) = if accepted > 0 {
446 (Some(baseline_score), accepted_patched, accepted_delta)
447 } else {
448 (None, None, None)
449 };
450
451 let (prov_before, prov_after, prov_diff_hash, prov_dirty, prov_scorer, prov_cmds) =
453 if accepted > 0 {
454 let after = std::process::Command::new("git")
455 .current_dir(&agent.path)
456 .args(["rev-parse", "--short", "HEAD"])
457 .output()
458 .ok()
459 .and_then(|o| {
460 if o.status.success() {
461 Some(String::from_utf8_lossy(&o.stdout).trim().to_string())
462 } else {
463 None
464 }
465 });
466 let dirty_after = std::process::Command::new("git")
467 .current_dir(&agent.path)
468 .args(["status", "--porcelain"])
469 .output()
470 .ok()
471 .filter(|output| output.status.success())
472 .map(|output| !output.stdout.is_empty());
473
474 (
475 git_sha_before.clone(),
476 after,
477 accepted_diff
478 .as_ref()
479 .map(|diff| stable_hash_hex(diff.as_bytes())),
480 dirty_after,
481 Some(scorer.label()),
482 Some(vec![
483 "cargo check (isolated)".to_string(),
484 "cargo clippy -D warnings (isolated)".to_string(),
485 "final validate_build after land (real tree)".to_string(),
486 ]),
487 )
488 } else {
489 (None, None, None, None, None, None)
490 };
491
492 runs.push(OptimizationRun {
493 iteration,
494 scores: scores_this_iter,
495 validated_changes: validated,
496 landed_changes: landed,
497 accepted_changes: accepted,
498 notes,
499 candidates,
500 diff: accepted_diff,
501 policy_hash: policy_info.as_ref().map(|policy| policy.hash.clone()),
502 dataset_version: Some(dataset.version.clone()),
503 dataset_hash: Some(dataset_hash.clone()),
504 baseline_score: run_baseline,
505 patched_score: run_patched,
506 score_delta: run_delta,
507 git_sha_before: prov_before,
508 git_sha_after: prov_after,
509 diff_hash: prov_diff_hash,
510 working_tree_dirty_after: prov_dirty,
511 scorer: prov_scorer,
512 validation_commands: prov_cmds,
513 validation_command_records: accepted_validation_commands,
514 final_validation_command_records: accepted_final_validation_commands,
515 trace_diagnosis: trace_diagnoses,
516 hook_decisions,
517 ledger: Some(ledger.clone()),
518 holdout_score: accepted_holdout_score,
519 budget: Some(config.budget),
520 policy_path: policy_info
521 .as_ref()
522 .map(|policy| policy.path.display().to_string()),
523 model: Some(llm.provenance(diagnosis_model_used)),
524 rollback_succeeded: accepted_rollback_succeeded,
525 rollback_error: accepted_rollback_error,
526 candidate_timed_out: any_candidate_timed_out,
527 });
528
529 if accepted > 0 && iteration > 0 {
530 }
532 }
533
534 let experiment_dir = std::env::current_dir()?
536 .join(".mdx-rust")
537 .join("agents")
538 .join(&agent.name)
539 .join("experiments");
540
541 std::fs::create_dir_all(&experiment_dir).ok();
542
543 let timestamp = std::time::SystemTime::now()
544 .duration_since(std::time::UNIX_EPOCH)
545 .map(|d| d.as_secs())
546 .unwrap_or(0);
547
548 let experiment_file = experiment_dir.join(format!("run-{}.json", timestamp));
549 if let Ok(content) = serde_json::to_string_pretty(&runs) {
550 let _ = std::fs::write(experiment_file, content);
551 }
552
553 if runs.iter().any(|r| r.accepted_changes > 0) {
555 let git_sha = std::process::Command::new("git")
556 .args(["rev-parse", "--short", "HEAD"])
557 .output()
558 .ok()
559 .and_then(|o| String::from_utf8(o.stdout).ok())
560 .map(|s| s.trim().to_string())
561 .unwrap_or_else(|| "unknown".to_string());
562
563 let mut report = format!(
564 "# Optimization Report for '{}'\n\nTimestamp: {}\nGit SHA: {}\n\n## Summary\n\n",
565 agent.name, timestamp, git_sha
566 );
567
568 for run in &runs {
569 if run.accepted_changes > 0 {
570 report.push_str(&format!(
571 "- Iteration {}: Accepted {} change(s)\n Notes: {}\n",
572 run.iteration, run.accepted_changes, run.notes
573 ));
574
575 if let Some(d) = &run.diff {
576 report.push_str(&format!("\n```diff\n{}\n```\n", d));
577 } else {
578 report.push_str(" (Change persisted to src/main.rs)\n");
579 }
580
581 if let Some(h) = &run.policy_hash {
582 report.push_str(&format!(" Policy hash: {}\n", h));
583 }
584 if let Some(v) = &run.dataset_version {
585 report.push_str(&format!(" Dataset version: {}\n", v));
586 }
587 if let Some(path) = &run.policy_path {
588 report.push_str(&format!(" Policy path: {}\n", path));
589 }
590 if let Some(model) = &run.model {
591 report.push_str(&format!(
592 " Diagnosis model: {}:{} (used={})\n",
593 model.provider, model.model, model.used
594 ));
595 }
596 if !run.validation_command_records.is_empty() {
597 report.push_str(" Isolated validation commands:\n");
598 for command in &run.validation_command_records {
599 report.push_str(&format!(
600 " - {} | success={} | timeout={} | status={:?} | duration_ms={}\n",
601 command.command,
602 command.success,
603 command.timed_out,
604 command.status_code,
605 command.duration_ms
606 ));
607 }
608 }
609 if !run.final_validation_command_records.is_empty() {
610 report.push_str(" Final validation commands:\n");
611 for command in &run.final_validation_command_records {
612 report.push_str(&format!(
613 " - {} | success={} | timeout={} | status={:?} | duration_ms={}\n",
614 command.command,
615 command.success,
616 command.timed_out,
617 command.status_code,
618 command.duration_ms
619 ));
620 }
621 }
622 }
623 }
624
625 report.push_str("\n## Candidates Considered\n\n");
626 for run in &runs {
627 for (i, c) in run.candidates.iter().enumerate() {
628 report.push_str(&format!(
629 "- [{}] {}: {}\n Expected: {}\n\n",
630 i + 1,
631 c.focus,
632 c.description,
633 c.expected_improvement
634 ));
635 }
636 }
637
638 let _ = std::fs::write(
639 experiment_dir.join(format!("report-{}.md", timestamp)),
640 report,
641 );
642 }
643
644 if runs.iter().any(|r| r.accepted_changes > 0) {
646 let mut final_scores = vec![];
647 for input in &test_inputs {
648 if let Ok(res) = crate::runner::run_agent(agent, input.clone()).await {
649 final_scores.push(mechanical_score(&res));
650 }
651 }
652 if !final_scores.is_empty() {
653 let final_avg = final_scores.iter().sum::<f32>() / final_scores.len() as f32;
654 if !config.quiet {
655 println!(
656 " Final re-evaluation after accepted changes: {:.2}",
657 final_avg
658 );
659 }
660 }
661 }
662
663 Ok(runs)
664}
665
666#[derive(Debug, Clone)]
667struct PolicyInfo {
668 path: PathBuf,
669 hash: String,
670}
671
672fn load_policy_info(agent_name: &str) -> Option<PolicyInfo> {
673 let cwd = std::env::current_dir().ok()?;
674 let candidates = [
675 cwd.join(".mdx-rust")
676 .join("agents")
677 .join(agent_name)
678 .join("policies.md"),
679 cwd.join(".mdx-rust").join("policies.md"),
680 ];
681
682 candidates
683 .iter()
684 .find_map(|path| std::fs::read(path).ok().map(|content| (path, content)))
685 .map(|(path, content)| PolicyInfo {
686 path: path.clone(),
687 hash: stable_hash_hex(&content),
688 })
689}
690
691fn stable_hash_hex(bytes: &[u8]) -> String {
692 crate::eval::stable_hash_hex(bytes)
693}
694
695fn strategy_for_focus(focus: &str) -> EditStrategy {
696 let normalized = focus.to_lowercase();
697
698 if normalized.contains("tool") {
699 EditStrategy::ToolDescription
700 } else if normalized.contains("fallback") || normalized.contains("logic") {
701 EditStrategy::FallbackLogic
702 } else if normalized.contains("schema") || normalized.contains("output") {
703 EditStrategy::OutputSchema
704 } else if normalized.contains("model") || normalized.contains("temperature") {
705 EditStrategy::ModelConfig
706 } else {
707 EditStrategy::SystemPrompt
708 }
709}
710
711fn fallback_candidates_from_trace(diagnoses: &[TraceDiagnosis]) -> Vec<Candidate> {
712 let mut candidates = Vec::new();
713
714 if diagnoses.iter().any(|diagnosis| {
715 diagnosis
716 .signals
717 .iter()
718 .any(|signal| signal.kind == FailureKind::EchoFallback)
719 }) {
720 candidates.push(Candidate {
721 focus: "fallback_logic".to_string(),
722 description: "Prevent echo fallback and require a useful best-effort answer."
723 .to_string(),
724 expected_improvement: "Reduce low-value echo responses.".to_string(),
725 strategy: Some(EditStrategy::FallbackLogic),
726 });
727 }
728
729 if diagnoses.iter().any(|diagnosis| {
730 diagnosis
731 .signals
732 .iter()
733 .any(|signal| signal.kind == FailureKind::InvalidJson)
734 }) {
735 candidates.push(Candidate {
736 focus: "output_schema".to_string(),
737 description: "Make the output contract explicit for answer, reasoning, and confidence."
738 .to_string(),
739 expected_improvement: "Improve parseability for agent callers.".to_string(),
740 strategy: Some(EditStrategy::OutputSchema),
741 });
742 }
743
744 if diagnoses.iter().any(|diagnosis| {
745 diagnosis.signals.iter().any(|signal| {
746 matches!(
747 signal.kind,
748 FailureKind::MissingReasoning | FailureKind::LowConfidence
749 )
750 })
751 }) {
752 candidates.push(Candidate {
753 focus: "system_prompt".to_string(),
754 description: "Strengthen the system prompt with explicit reasoning instructions."
755 .to_string(),
756 expected_improvement: "Increase reasoning quality and confidence.".to_string(),
757 strategy: Some(EditStrategy::SystemPrompt),
758 });
759 }
760
761 if candidates.is_empty() {
762 candidates.push(Candidate {
763 focus: "system_prompt".to_string(),
764 description: "Strengthen the system prompt with explicit reasoning instructions."
765 .to_string(),
766 expected_improvement: "Improve answer quality.".to_string(),
767 strategy: Some(EditStrategy::SystemPrompt),
768 });
769 }
770
771 candidates
772}
773
774fn summarize_trace_diagnoses(diagnoses: &[TraceDiagnosis]) -> String {
775 let mut summaries = Vec::new();
776
777 for diagnosis in diagnoses {
778 if diagnosis.has_failures() {
779 summaries.push(diagnosis.compact_summary());
780 }
781 }
782
783 if summaries.is_empty() {
784 "No obvious trace failures detected.".to_string()
785 } else {
786 format!("Trace failures: {}", summaries.join(" | "))
787 }
788}
789
790fn build_edit_for_candidate(
791 agent_root: &Path,
792 bundle: Option<&AgentBundle>,
793 candidate: &Candidate,
794) -> anyhow::Result<Option<ProposedEdit>> {
795 let strategy = candidate
796 .strategy
797 .clone()
798 .unwrap_or_else(|| strategy_for_focus(&candidate.focus));
799
800 let Some((target_file, old_preamble)) = select_preamble_target(agent_root, bundle) else {
801 if strategy == EditStrategy::FallbackLogic {
802 return build_echo_fallback_edit(agent_root, bundle, &candidate.description);
803 }
804 return Ok(None);
805 };
806
807 if strategy == EditStrategy::FallbackLogic {
808 if let Some(edit) = build_echo_fallback_edit(agent_root, bundle, &candidate.description)? {
809 return Ok(Some(edit));
810 }
811 }
812
813 let Some(new_preamble) = evolved_preamble_for_strategy(&old_preamble, &strategy, bundle) else {
814 return Ok(None);
815 };
816
817 if normalize_prompt(&new_preamble) == normalize_prompt(&old_preamble) {
818 return Ok(None);
819 }
820
821 let content = std::fs::read_to_string(&target_file)?;
822 let relative_target = target_file
823 .strip_prefix(agent_root)
824 .unwrap_or(&target_file)
825 .to_path_buf();
826 let patch = generate_preamble_patch(&relative_target, &content, &old_preamble, &new_preamble);
827
828 Ok(Some(ProposedEdit {
829 file: target_file,
830 description: format!("{:?}: {}", strategy, candidate.description),
831 patch,
832 }))
833}
834
835fn build_echo_fallback_edit(
836 agent_root: &Path,
837 bundle: Option<&AgentBundle>,
838 description: &str,
839) -> anyhow::Result<Option<ProposedEdit>> {
840 let mut candidates: Vec<PathBuf> = bundle
841 .map(|bundle| {
842 bundle
843 .scope
844 .optimizable_paths
845 .iter()
846 .filter(|path| path.extension().is_some_and(|extension| extension == "rs"))
847 .cloned()
848 .collect()
849 })
850 .unwrap_or_default();
851
852 if candidates.is_empty() {
853 candidates.push(agent_root.join("src/main.rs"));
854 }
855
856 for target_file in candidates {
857 let Ok(content) = std::fs::read_to_string(&target_file) else {
858 continue;
859 };
860
861 let replacements = [
862 (
863 "Echo: {}",
864 "Best-effort answer after reasoning: {}",
865 "replace echo fallback format string",
866 ),
867 (
868 "Echo: ",
869 "Best-effort answer after reasoning: ",
870 "replace echo fallback prefix",
871 ),
872 ];
873
874 for (old, new, label) in replacements {
875 if !content.contains(old) {
876 continue;
877 }
878
879 let relative_target = target_file
880 .strip_prefix(agent_root)
881 .unwrap_or(&target_file)
882 .to_path_buf();
883 let patch = generate_preamble_patch(&relative_target, &content, old, new);
884
885 return Ok(Some(ProposedEdit {
886 file: target_file,
887 description: format!("FallbackLogic: {description} ({label})"),
888 patch,
889 }));
890 }
891 }
892
893 Ok(None)
894}
895
896fn select_preamble_target(
897 agent_root: &Path,
898 bundle: Option<&AgentBundle>,
899) -> Option<(PathBuf, String)> {
900 if let Some(prompt) = bundle.and_then(|bundle| bundle.preambles.first()) {
901 return Some((PathBuf::from(&prompt.file), prompt.text.clone()));
902 }
903
904 let target = bundle
905 .and_then(|bundle| {
906 bundle.scope.optimizable_paths.iter().find(|path| {
907 let name = path.file_name().unwrap_or_default().to_string_lossy();
908 name.ends_with(".rs") && (name == "main.rs" || name.contains("agent"))
909 })
910 })
911 .cloned()
912 .unwrap_or_else(|| agent_root.join("src/main.rs"));
913
914 let content = std::fs::read_to_string(&target).ok()?;
915 extract_first_preamble_literal(&content).map(|prompt| (target, prompt))
916}
917
918fn extract_first_preamble_literal(content: &str) -> Option<String> {
919 let marker = ".preamble(\"";
920 let start = content.find(marker)? + marker.len();
921 let rest = &content[start..];
922 let end = rest.find('"')?;
923 Some(rest[..end].to_string())
924}
925
926fn evolved_preamble_for_strategy(
927 old: &str,
928 strategy: &EditStrategy,
929 bundle: Option<&AgentBundle>,
930) -> Option<String> {
931 let addition = match strategy {
932 EditStrategy::SystemPrompt => {
933 "Think step-by-step before answering. Always explain your reasoning in one sentence, then give the final answer."
934 }
935 EditStrategy::FallbackLogic => {
936 "Never echo the user input as the final answer. If uncertain, state assumptions, reason briefly, and provide the best useful answer."
937 }
938 EditStrategy::OutputSchema => {
939 "Always produce an answer, reasoning, and confidence from 0 to 1."
940 }
941 EditStrategy::ToolDescription => {
942 let has_tools = bundle.is_some_and(|bundle| !bundle.tools.is_empty());
943 if !has_tools {
944 return None;
945 }
946 "Before answering, decide whether available tools improve factuality or completeness, and only use them when they add real value."
947 }
948 EditStrategy::ModelConfig => return None,
949 };
950
951 if normalize_prompt(old).contains(&normalize_prompt(addition)) {
952 return Some(old.to_string());
953 }
954
955 let mut base = old.trim().trim_end_matches('.').to_string();
956 if base.is_empty() {
957 base = "You are a concise, helpful assistant".to_string();
958 }
959 Some(format!("{base}. {addition}"))
960}
961
962fn normalize_prompt(value: &str) -> String {
963 value
964 .split_whitespace()
965 .collect::<Vec<_>>()
966 .join(" ")
967 .to_lowercase()
968}
969
970pub fn mechanical_score(result: &AgentRunResult) -> f32 {
973 let answer = result
974 .output
975 .get("answer")
976 .and_then(|v| v.as_str())
977 .unwrap_or("");
978 let reasoning = result
979 .output
980 .get("reasoning")
981 .and_then(|v| v.as_str())
982 .unwrap_or("");
983
984 if answer.starts_with("Echo:") {
985 return 0.4;
986 }
987
988 let mut score = 0.75f32;
989
990 if reasoning.to_lowercase().contains("think")
992 || reasoning.to_lowercase().contains("reason")
993 || reasoning.to_lowercase().contains("step")
994 {
995 score += 0.12;
996 }
997
998 if answer.len() > 20 {
1000 score += 0.08;
1001 }
1002
1003 score.min(0.95)
1004}
1005
1006#[cfg(test)]
1007mod tests {
1008 use super::*;
1009 use tempfile::tempdir;
1010
1011 #[test]
1012 fn test_mechanical_score_echo_vs_reasoned() {
1013 let echo = AgentRunResult {
1014 output: serde_json::json!({"answer": "Echo: hello", "reasoning": "no key"}),
1015 duration_ms: 10,
1016 success: true,
1017 error: None,
1018 traces: vec![],
1019 };
1020 let good = AgentRunResult {
1021 output: serde_json::json!({"answer": "The answer is 42 because...", "reasoning": "Think step by step: 6*7"}),
1022 duration_ms: 120,
1023 success: true,
1024 error: None,
1025 traces: vec![],
1026 };
1027
1028 assert!(mechanical_score(&echo) < 0.5);
1029 assert!(mechanical_score(&good) > 0.8);
1030 }
1031
1032 #[test]
1033 fn test_optimize_config_defaults() {
1034 let cfg = OptimizeConfig {
1035 max_iterations: 1,
1036 candidates_per_iteration: 1,
1037 use_llm_judge: false,
1038 budget: OptimizationBudget::Medium,
1039 hook_policy: HookPolicy::default(),
1040 review_before_apply: false,
1041 quiet: false,
1042 candidate_timeout: default_candidate_timeout(),
1043 };
1044 assert_eq!(cfg.max_iterations, 1);
1045 }
1046
1047 #[test]
1048 fn strategy_for_focus_maps_common_candidate_names() {
1049 assert_eq!(
1050 strategy_for_focus("improve tool descriptions"),
1051 EditStrategy::ToolDescription
1052 );
1053 assert_eq!(
1054 strategy_for_focus("fix fallback logic"),
1055 EditStrategy::FallbackLogic
1056 );
1057 assert_eq!(
1058 strategy_for_focus("tighten output schema"),
1059 EditStrategy::OutputSchema
1060 );
1061 assert_eq!(
1062 strategy_for_focus("lower model temperature"),
1063 EditStrategy::ModelConfig
1064 );
1065 assert_eq!(strategy_for_focus("reasoning"), EditStrategy::SystemPrompt);
1066 }
1067
1068 #[test]
1069 fn fallback_candidates_follow_trace_failures() {
1070 let candidates = fallback_candidates_from_trace(&[TraceDiagnosis {
1071 signals: vec![
1072 crate::FailureSignal {
1073 kind: FailureKind::EchoFallback,
1074 severity: 2,
1075 evidence: "Echo: hello".to_string(),
1076 span_id: None,
1077 },
1078 crate::FailureSignal {
1079 kind: FailureKind::InvalidJson,
1080 severity: 2,
1081 evidence: "raw stdout".to_string(),
1082 span_id: None,
1083 },
1084 ],
1085 ranked_span_ids: vec![],
1086 }]);
1087
1088 assert_eq!(candidates[0].strategy, Some(EditStrategy::FallbackLogic));
1089 assert!(candidates
1090 .iter()
1091 .any(|candidate| candidate.strategy == Some(EditStrategy::OutputSchema)));
1092 }
1093
1094 #[test]
1095 fn build_edit_for_candidate_creates_schema_preamble_patch() {
1096 let dir = tempdir().unwrap();
1097 let src = dir.path().join("src");
1098 std::fs::create_dir_all(&src).unwrap();
1099 let main = src.join("main.rs");
1100 std::fs::write(
1101 &main,
1102 r#"fn main() { let _agent = client.agent("m").preamble("You are helpful.").build(); }"#,
1103 )
1104 .unwrap();
1105
1106 let candidate = Candidate {
1107 focus: "output_schema".to_string(),
1108 description: "make output contract explicit".to_string(),
1109 expected_improvement: "more parseable output".to_string(),
1110 strategy: Some(EditStrategy::OutputSchema),
1111 };
1112
1113 let edit = build_edit_for_candidate(dir.path(), None, &candidate)
1114 .unwrap()
1115 .expect("schema strategy should produce a prompt edit");
1116
1117 assert_eq!(edit.file, main);
1118 assert!(edit.patch.contains("answer, reasoning, and confidence"));
1119 }
1120
1121 #[test]
1122 fn tool_strategy_requires_discovered_tools() {
1123 let dir = tempdir().unwrap();
1124 let src = dir.path().join("src");
1125 std::fs::create_dir_all(&src).unwrap();
1126 let main = src.join("main.rs");
1127 std::fs::write(
1128 &main,
1129 r#"fn main() { let _agent = client.agent("m").preamble("You are helpful.").build(); }"#,
1130 )
1131 .unwrap();
1132
1133 let candidate = Candidate {
1134 focus: "tool_description".to_string(),
1135 description: "clarify tool use".to_string(),
1136 expected_improvement: "better tool calls".to_string(),
1137 strategy: Some(EditStrategy::ToolDescription),
1138 };
1139
1140 let without_tools = build_edit_for_candidate(dir.path(), None, &candidate).unwrap();
1141 assert!(without_tools.is_none());
1142
1143 let bundle = AgentBundle {
1144 scope: mdx_rust_analysis::BundleScope {
1145 optimizable_paths: vec![main],
1146 read_only_paths: vec![],
1147 },
1148 preambles: vec![],
1149 tools: vec![mdx_rust_analysis::ExtractedTool {
1150 file: "src/main.rs".to_string(),
1151 name: "search".to_string(),
1152 description: None,
1153 }],
1154 is_rig_agent: true,
1155 key_files: vec![],
1156 };
1157
1158 let with_tools = build_edit_for_candidate(dir.path(), Some(&bundle), &candidate)
1159 .unwrap()
1160 .expect("tool strategy should produce a prompt edit when tools exist");
1161 assert!(with_tools
1162 .patch
1163 .contains("available tools improve factuality"));
1164 }
1165
1166 #[test]
1167 fn fallback_logic_strategy_can_patch_echo_fallback() {
1168 let dir = tempdir().unwrap();
1169 let src = dir.path().join("src");
1170 std::fs::create_dir_all(&src).unwrap();
1171 let main = src.join("main.rs");
1172 std::fs::write(
1173 &main,
1174 r#"fn main() { println!("{}", format!("Echo: {}", "hello")); }"#,
1175 )
1176 .unwrap();
1177
1178 let candidate = Candidate {
1179 focus: "fallback_logic".to_string(),
1180 description: "avoid echo fallback".to_string(),
1181 expected_improvement: "more useful fallback".to_string(),
1182 strategy: Some(EditStrategy::FallbackLogic),
1183 };
1184
1185 let edit = build_edit_for_candidate(dir.path(), None, &candidate)
1186 .unwrap()
1187 .expect("fallback logic should patch simple echo fallback");
1188
1189 assert_eq!(edit.file, main);
1190 assert!(edit.patch.contains("Best-effort answer after reasoning"));
1191 }
1192}