1use serde::{Deserialize, Serialize};
14use std::path::{Path, PathBuf};
15use std::time::Instant;
16
17#[derive(Debug, Clone, Serialize, Deserialize)]
19pub struct RunStats {
20 pub steps: usize,
22 pub tool_errors: usize,
24 pub loop_warnings: usize,
26 pub loop_aborts: usize,
28 pub patch_failures: usize,
30 pub successful_calls: usize,
32 pub completed: bool,
34 pub cost_chars: usize,
36}
37
38impl Default for RunStats {
39 #[allow(clippy::derivable_impls)]
40 fn default() -> Self {
41 Self {
42 steps: 0,
43 tool_errors: 0,
44 loop_warnings: 0,
45 loop_aborts: 0,
46 patch_failures: 0,
47 successful_calls: 0,
48 completed: false,
49 cost_chars: 0,
50 }
51 }
52}
53
54pub fn score(stats: &RunStats) -> f64 {
61 if stats.steps == 0 {
62 return 0.0;
63 }
64 let efficiency = stats.successful_calls as f64 / stats.steps as f64;
65 let completion_bonus = if stats.completed { 1.0 } else { 0.5 };
66 let loop_penalty = 1.0 - (stats.loop_warnings as f64 * 0.05).min(0.3);
67 (efficiency * completion_bonus * loop_penalty).clamp(0.0, 1.0)
68}
69
70#[derive(Debug, Clone, Serialize, Deserialize)]
76pub struct EvolutionEntry {
77 pub ts: String,
79 pub commit: String,
81 pub title: String,
83 pub score_before: f64,
85 pub score_after: f64,
87 pub status: String,
89 pub stats: RunStats,
91}
92
93pub fn evolution_log_path(agent_home: &str) -> PathBuf {
95 PathBuf::from(agent_home).join("evolution.jsonl")
96}
97
98pub fn log_evolution(agent_home: &str, entry: &EvolutionEntry) -> Result<(), String> {
100 let path = evolution_log_path(agent_home);
101 if let Some(parent) = path.parent() {
102 std::fs::create_dir_all(parent).map_err(|e| format!("mkdir: {}", e))?;
103 }
104 let line = serde_json::to_string(entry).map_err(|e| format!("serialize: {}", e))?;
105 use std::io::Write;
106 let mut f = std::fs::OpenOptions::new()
107 .create(true)
108 .append(true)
109 .open(&path)
110 .map_err(|e| format!("open: {}", e))?;
111 writeln!(f, "{}", line).map_err(|e| format!("write: {}", e))?;
112 Ok(())
113}
114
115pub fn load_evolution(agent_home: &str) -> Vec<EvolutionEntry> {
117 let path = evolution_log_path(agent_home);
118 let content = match std::fs::read_to_string(&path) {
119 Ok(c) => c,
120 Err(_) => return Vec::new(),
121 };
122 content
123 .lines()
124 .filter(|l| !l.trim().is_empty())
125 .filter_map(|l| serde_json::from_str(l).ok())
126 .collect()
127}
128
129pub fn baseline_score(agent_home: &str) -> f64 {
131 load_evolution(agent_home)
132 .last()
133 .map(|e| e.score_after)
134 .unwrap_or(0.0)
135}
136
137pub fn evolution_summary(agent_home: &str) -> (usize, usize, usize) {
139 let entries = load_evolution(agent_home);
140 let keep = entries.iter().filter(|e| e.status == "keep").count();
141 let discard = entries.iter().filter(|e| e.status == "discard").count();
142 let crash = entries.iter().filter(|e| e.status == "crash").count();
143 (keep, discard, crash)
144}
145
146#[derive(Debug, Clone, Serialize, Deserialize)]
152pub struct Improvement {
153 pub title: String,
155 pub reason: String,
157 pub approach: String,
159 pub priority: u8,
161 pub target_files: Vec<String>,
163}
164
165pub fn evaluate(stats: &RunStats) -> Vec<Improvement> {
167 let mut improvements = Vec::new();
168
169 if stats.tool_errors > 3 && stats.steps > 0 {
171 let error_rate = stats.tool_errors as f64 / stats.steps as f64;
172 if error_rate > 0.3 {
173 improvements.push(Improvement {
174 title: "Reduce tool error rate".into(),
175 reason: format!(
176 "{} errors in {} steps ({:.0}% error rate)",
177 stats.tool_errors,
178 stats.steps,
179 error_rate * 100.0
180 ),
181 approach: "Check error patterns in session log. Common fixes: better error messages, input validation, fallback strategies.".into(),
182 priority: 1,
183 target_files: vec!["crates/rc-cli/src/agent.rs".into()],
184 });
185 }
186 }
187
188 if stats.patch_failures > 2 {
190 improvements.push(Improvement {
191 title: "Fix apply_patch reliability".into(),
192 reason: format!("{} patch failures this run", stats.patch_failures),
193 approach: "Check apply_patch error messages. Improve context matching, quote handling, or whitespace tolerance.".into(),
194 priority: 1,
195 target_files: vec!["crates/sgr-agent/src/app_tools/apply_patch.rs".into()],
196 });
197 }
198
199 if stats.loop_warnings > 2 {
201 improvements.push(Improvement {
202 title: "Reduce agent loops".into(),
203 reason: format!(
204 "{} loop warnings, {} aborts",
205 stats.loop_warnings, stats.loop_aborts
206 ),
207 approach: "Analyze which actions loop. Add better error feedback, earlier detection, or alternative strategies in system prompt.".into(),
208 priority: 2,
209 target_files: vec![
210 "crates/rc-cli/src/agent.rs".into(),
211 "crates/sgr-agent/src/loop_detect.rs".into(),
212 ],
213 });
214 }
215
216 if stats.completed && stats.steps > 20 {
218 improvements.push(Improvement {
219 title: "Reduce step count".into(),
220 reason: format!(
221 "Task took {} steps (target: <15)",
222 stats.steps
223 ),
224 approach: "Use parallel actions more aggressively. Combine read+edit into fewer steps. Improve system prompt for directness.".into(),
225 priority: 3,
226 target_files: vec!["crates/rc-cli/src/agent.rs".into()],
227 });
228 }
229
230 if !stats.completed && stats.steps > 5 {
232 improvements.push(Improvement {
233 title: "Fix task completion".into(),
234 reason: format!(
235 "Task aborted after {} steps without completing",
236 stats.steps
237 ),
238 approach: "Check why agent couldn't finish. Missing tool? Wrong approach? Need better planning phase?".into(),
239 priority: 1,
240 target_files: vec!["crates/rc-cli/src/agent.rs".into()],
241 });
242 }
243
244 improvements.sort_by_key(|i| i.priority);
245 improvements
246}
247
248pub fn format_improvements(improvements: &[Improvement]) -> String {
250 if improvements.is_empty() {
251 return "No improvements needed — run was clean.".into();
252 }
253
254 let mut out = String::from("## Self-Improvement Proposals\n\n");
255 for (i, imp) in improvements.iter().enumerate() {
256 out.push_str(&format!(
257 "{}. **[P{}] {}**\n Reason: {}\n Approach: {}\n Files: {}\n\n",
258 i + 1,
259 imp.priority,
260 imp.title,
261 imp.reason,
262 imp.approach,
263 imp.target_files.join(", ")
264 ));
265 }
266 out
267}
268
269pub fn evolution_prompt(stats: &RunStats) -> Option<String> {
271 let improvements = evaluate(stats);
272 if improvements.is_empty() {
273 return None;
274 }
275
276 let report = format_improvements(&improvements);
277 Some(format!(
278 "## Self-Evolution Task\n\n\
279 Your last run stats: {} steps, {} errors, {} loops, completed={}\n\n\
280 {}\n\
281 Pick the highest-priority improvement. Read the target file(s), \
282 make the minimal change, write tests, run `make check`, commit, \
283 and finish with RESTART_AGENT if you modified agent code.",
284 stats.steps, stats.tool_errors, stats.loop_warnings, stats.completed, report,
285 ))
286}
287
288#[derive(Debug, Clone, Serialize, Deserialize)]
294pub struct SessionPattern {
295 pub pattern: String,
297 pub count: usize,
299 pub examples: Vec<String>,
301}
302
303pub fn analyze_sessions(agent_home: &str, max_sessions: usize) -> Vec<SessionPattern> {
306 let dir = PathBuf::from(agent_home);
307 let mut session_files: Vec<PathBuf> = std::fs::read_dir(&dir)
308 .map(|entries| {
309 entries
310 .flatten()
311 .map(|e| e.path())
312 .filter(|p| {
313 p.extension().map(|e| e == "jsonl").unwrap_or(false)
314 && p.file_name()
315 .map(|n| n.to_string_lossy().starts_with("session_"))
316 .unwrap_or(false)
317 })
318 .collect()
319 })
320 .unwrap_or_default();
321 session_files.sort();
322 session_files.reverse(); session_files.truncate(max_sessions);
324
325 let mut patch_errors: Vec<String> = Vec::new();
327 let mut loop_warnings: usize = 0;
328 let mut tool_errors: Vec<String> = Vec::new();
329 let mut reread_warnings: usize = 0;
330 let mut _total_messages: usize = 0;
331
332 for path in &session_files {
333 let content = match std::fs::read_to_string(path) {
334 Ok(c) => c,
335 Err(_) => continue,
336 };
337 for line in content.lines() {
338 let msg: serde_json::Value = match serde_json::from_str(line) {
339 Ok(v) => v,
340 Err(_) => continue,
341 };
342 let role = msg.get("role").and_then(|r| r.as_str()).unwrap_or("");
343 let text = msg.get("content").and_then(|c| c.as_str()).unwrap_or("");
344
345 if role == "tool" || role == "assistant" {
346 _total_messages += 1;
347
348 if text.contains("apply_patch error") || text.contains("Commit FAILED") {
350 let snippet: String = text.lines().take(2).collect::<Vec<_>>().join(" ");
351 patch_errors.push(truncate_string(&snippet, 100));
352 }
353
354 if text.contains("Loop detected") || text.contains("LOOP WARNING") {
356 loop_warnings += 1;
357 }
358
359 if text.contains("FAILED") || text.starts_with("Error") {
361 let snippet: String = text.lines().next().unwrap_or("").to_string();
362 tool_errors.push(truncate_string(&snippet, 100));
363 }
364
365 if text.contains("RE-READ") || text.contains("already read") {
367 reread_warnings += 1;
368 }
369 }
370 }
371 }
372
373 let mut patterns = Vec::new();
374
375 if patch_errors.len() > 2 {
376 patterns.push(SessionPattern {
377 pattern: format!(
378 "apply_patch failures ({} across {} sessions)",
379 patch_errors.len(),
380 session_files.len()
381 ),
382 count: patch_errors.len(),
383 examples: patch_errors.into_iter().take(3).collect(),
384 });
385 }
386
387 if loop_warnings > 3 {
388 patterns.push(SessionPattern {
389 pattern: format!(
390 "Loop warnings ({} across {} sessions)",
391 loop_warnings,
392 session_files.len()
393 ),
394 count: loop_warnings,
395 examples: vec![],
396 });
397 }
398
399 if tool_errors.len() > 5 {
400 let mut error_types: std::collections::HashMap<String, usize> =
402 std::collections::HashMap::new();
403 for err in &tool_errors {
404 let key = err.split_whitespace().take(3).collect::<Vec<_>>().join(" ");
405 *error_types.entry(key).or_insert(0) += 1;
406 }
407 let mut sorted: Vec<_> = error_types.into_iter().collect();
408 sorted.sort_by(|a, b| b.1.cmp(&a.1));
409
410 for (error_type, count) in sorted.into_iter().take(3) {
411 if count > 2 {
412 patterns.push(SessionPattern {
413 pattern: format!("Recurring error: '{}' ({}x)", error_type, count),
414 count,
415 examples: tool_errors
416 .iter()
417 .filter(|e| e.contains(error_type.split_whitespace().next().unwrap_or("")))
418 .take(2)
419 .cloned()
420 .collect(),
421 });
422 }
423 }
424 }
425
426 if reread_warnings > 3 {
427 patterns.push(SessionPattern {
428 pattern: format!(
429 "File re-reads ({} — agent wastes tokens re-reading)",
430 reread_warnings
431 ),
432 count: reread_warnings,
433 examples: vec![],
434 });
435 }
436
437 patterns.sort_by(|a, b| b.count.cmp(&a.count));
438 patterns
439}
440
441fn truncate_string(s: &str, max: usize) -> String {
442 if s.len() <= max {
443 s.to_string()
444 } else {
445 format!(
446 "{}...",
447 &s[..s
448 .char_indices()
449 .take(max)
450 .last()
451 .map(|(i, _)| i)
452 .unwrap_or(0)]
453 )
454 }
455}
456
457pub fn evolution_prompt_with_history(stats: &RunStats, agent_home: &str) -> Option<String> {
459 let improvements = evaluate(stats);
460 let patterns = analyze_sessions(agent_home, 10);
461
462 if improvements.is_empty() && patterns.is_empty() {
463 return None;
464 }
465
466 let mut prompt = format!(
467 "## Self-Evolution Task\n\n\
468 Your last run stats: {} steps, {} errors, {} loops, completed={}\n\n",
469 stats.steps, stats.tool_errors, stats.loop_warnings, stats.completed,
470 );
471
472 if !patterns.is_empty() {
473 prompt.push_str("### Recurring Issues (from last 10 sessions)\n\n");
474 for p in &patterns {
475 prompt.push_str(&format!("- **{}**\n", p.pattern));
476 for ex in &p.examples {
477 prompt.push_str(&format!(" - `{}`\n", ex));
478 }
479 }
480 prompt.push('\n');
481 }
482
483 if !improvements.is_empty() {
484 prompt.push_str(&format_improvements(&improvements));
485 }
486
487 prompt.push_str(
488 "\nPick the highest-priority issue. Read the target file(s), \
489 make the minimal change, write tests, run `make check`, commit, \
490 and finish with RESTART_AGENT if you modified agent code.",
491 );
492
493 Some(prompt)
494}
495
496#[derive(Debug, Clone, PartialEq)]
504pub enum SoloSignal {
505 Done,
506 Redo,
507 None,
508}
509
510pub fn parse_signal(output: &str) -> SoloSignal {
512 if output.contains("<solo:done/>") {
513 SoloSignal::Done
514 } else if output.contains("<solo:redo/>") {
515 SoloSignal::Redo
516 } else {
517 SoloSignal::None
518 }
519}
520
521#[derive(Debug, Clone, PartialEq)]
524pub enum ControlAction {
525 Continue,
526 Stop,
527 Pause,
528 Skip,
529}
530
531pub fn check_control(control_path: &Path) -> ControlAction {
533 let content = match std::fs::read_to_string(control_path) {
534 Ok(c) => c.trim().to_lowercase(),
535 Err(_) => return ControlAction::Continue,
536 };
537 match content.as_str() {
538 "stop" => {
539 let _ = std::fs::remove_file(control_path);
540 ControlAction::Stop
541 }
542 "pause" => ControlAction::Pause, "skip" => {
544 let _ = std::fs::remove_file(control_path);
545 ControlAction::Skip
546 }
547 _ => ControlAction::Continue,
548 }
549}
550
551#[derive(Debug)]
553pub struct CircuitBreaker {
554 last_fingerprint: String,
555 consecutive: usize,
556 limit: usize,
557}
558
559impl CircuitBreaker {
560 pub fn new(limit: usize) -> Self {
561 Self {
562 last_fingerprint: String::new(),
563 consecutive: 0,
564 limit,
565 }
566 }
567
568 pub fn record(&mut self, success: bool, fingerprint: &str) -> bool {
570 if success {
571 self.consecutive = 0;
572 self.last_fingerprint.clear();
573 return false;
574 }
575 if fingerprint == self.last_fingerprint {
576 self.consecutive += 1;
577 } else {
578 self.last_fingerprint = fingerprint.to_string();
579 self.consecutive = 1;
580 }
581 self.consecutive >= self.limit
582 }
583
584 pub fn consecutive_failures(&self) -> usize {
585 self.consecutive
586 }
587}
588
589#[derive(Debug, Clone)]
591pub struct LoopOptions {
592 pub max_iterations: usize,
594 pub max_hours: f64,
596 pub control_file: PathBuf,
598 pub circuit_breaker_limit: usize,
600 pub agent_home: String,
602 pub mode: LoopMode,
604}
605
606#[derive(Debug, Clone, PartialEq)]
607pub enum LoopMode {
608 Loop,
610 Evolve,
612}
613
614impl Default for LoopOptions {
615 fn default() -> Self {
616 Self {
617 max_iterations: 20,
618 max_hours: 0.0,
619 control_file: PathBuf::from(".rust-code/loop-control"),
620 circuit_breaker_limit: 3,
621 agent_home: ".rust-code".into(),
622 mode: LoopMode::Loop,
623 }
624 }
625}
626
627#[derive(Debug)]
629pub struct LoopState {
630 pub iteration: usize,
631 pub start_time: Instant,
632 pub breaker: CircuitBreaker,
633 pub options: LoopOptions,
634 pub total_score: f64,
635 pub keep_count: usize,
636 pub discard_count: usize,
637}
638
639impl LoopState {
640 pub fn new(options: LoopOptions) -> Self {
641 let limit = options.circuit_breaker_limit;
642 Self {
643 iteration: 0,
644 start_time: Instant::now(),
645 breaker: CircuitBreaker::new(limit),
646 options,
647 total_score: 0.0,
648 keep_count: 0,
649 discard_count: 0,
650 }
651 }
652
653 pub fn should_stop(&self) -> Option<String> {
655 if self.options.max_iterations > 0 && self.iteration >= self.options.max_iterations {
657 return Some(format!(
658 "Max iterations reached ({})",
659 self.options.max_iterations
660 ));
661 }
662 if self.options.max_hours > 0.0 {
664 let elapsed_hours = self.start_time.elapsed().as_secs_f64() / 3600.0;
665 if elapsed_hours >= self.options.max_hours {
666 return Some(format!("Timeout ({:.1}h)", self.options.max_hours));
667 }
668 }
669 match check_control(&self.options.control_file) {
671 ControlAction::Stop => return Some("Stop requested via control file".into()),
672 ControlAction::Pause => {
673 return Some("Paused via control file (delete to resume)".into())
674 }
675 _ => {}
676 }
677 None
678 }
679
680 pub fn record_iteration(&mut self, stats: &RunStats) -> bool {
682 self.iteration += 1;
683 let s = score(stats);
684 self.total_score += s;
685 let fingerprint = format!(
686 "errors:{},loops:{},patches:{}",
687 stats.tool_errors, stats.loop_warnings, stats.patch_failures
688 );
689 let success = stats.completed && stats.tool_errors == 0;
690 if success {
691 self.keep_count += 1;
692 } else {
693 self.discard_count += 1;
694 }
695 self.breaker.record(success, &fingerprint)
697 }
698
699 pub fn elapsed_display(&self) -> String {
701 let secs = self.start_time.elapsed().as_secs();
702 if secs < 60 {
703 format!("{}s", secs)
704 } else if secs < 3600 {
705 format!("{}m{}s", secs / 60, secs % 60)
706 } else {
707 format!("{}h{}m", secs / 3600, (secs % 3600) / 60)
708 }
709 }
710
711 pub fn summary(&self) -> String {
713 let avg = if self.iteration > 0 {
714 self.total_score / self.iteration as f64
715 } else {
716 0.0
717 };
718 format!(
719 "{} iterations in {} | keep:{} discard:{} | avg score:{:.3}",
720 self.iteration,
721 self.elapsed_display(),
722 self.keep_count,
723 self.discard_count,
724 avg,
725 )
726 }
727}
728
729#[cfg(test)]
730mod tests {
731 use super::*;
732
733 #[test]
734 fn clean_run_no_improvements() {
735 let stats = RunStats {
736 steps: 5,
737 tool_errors: 0,
738 loop_warnings: 0,
739 loop_aborts: 0,
740 patch_failures: 0,
741 successful_calls: 5,
742 completed: true,
743 cost_chars: 1000,
744 };
745 assert!(evaluate(&stats).is_empty());
746 }
747
748 #[test]
749 fn high_error_rate_triggers_improvement() {
750 let stats = RunStats {
751 steps: 10,
752 tool_errors: 5,
753 completed: true,
754 ..Default::default()
755 };
756 let imps = evaluate(&stats);
757 assert!(!imps.is_empty());
758 assert!(imps[0].title.contains("error rate"));
759 }
760
761 #[test]
762 fn patch_failures_trigger_improvement() {
763 let stats = RunStats {
764 steps: 10,
765 patch_failures: 4,
766 completed: true,
767 ..Default::default()
768 };
769 let imps = evaluate(&stats);
770 assert!(imps.iter().any(|i| i.title.contains("apply_patch")));
771 }
772
773 #[test]
774 fn loop_warnings_trigger_improvement() {
775 let stats = RunStats {
776 steps: 15,
777 loop_warnings: 5,
778 loop_aborts: 1,
779 completed: true,
780 ..Default::default()
781 };
782 let imps = evaluate(&stats);
783 assert!(imps.iter().any(|i| i.title.contains("loop")));
784 }
785
786 #[test]
787 fn too_many_steps_triggers_improvement() {
788 let stats = RunStats {
789 steps: 30,
790 completed: true,
791 ..Default::default()
792 };
793 let imps = evaluate(&stats);
794 assert!(imps.iter().any(|i| i.title.contains("step count")));
795 }
796
797 #[test]
798 fn incomplete_task_triggers_improvement() {
799 let stats = RunStats {
800 steps: 10,
801 completed: false,
802 ..Default::default()
803 };
804 let imps = evaluate(&stats);
805 assert!(imps.iter().any(|i| i.title.contains("completion")));
806 }
807
808 #[test]
809 fn improvements_sorted_by_priority() {
810 let stats = RunStats {
811 steps: 30,
812 tool_errors: 5,
813 loop_warnings: 3,
814 patch_failures: 3,
815 completed: true,
816 ..Default::default()
817 };
818 let imps = evaluate(&stats);
819 for w in imps.windows(2) {
820 assert!(w[0].priority <= w[1].priority);
821 }
822 }
823
824 #[test]
825 fn evolution_prompt_none_when_clean() {
826 let stats = RunStats {
827 steps: 5,
828 completed: true,
829 ..Default::default()
830 };
831 assert!(evolution_prompt(&stats).is_none());
832 }
833
834 #[test]
835 fn evolution_prompt_some_when_issues() {
836 let stats = RunStats {
837 steps: 10,
838 tool_errors: 5,
839 completed: false,
840 ..Default::default()
841 };
842 let prompt = evolution_prompt(&stats).unwrap();
843 assert!(prompt.contains("Self-Evolution"));
844 assert!(prompt.contains("RESTART_AGENT"));
845 }
846
847 #[test]
850 fn analyze_sessions_empty_dir() {
851 let dir = tempfile::tempdir().unwrap();
852 let patterns = analyze_sessions(dir.path().to_str().unwrap(), 10);
853 assert!(patterns.is_empty());
854 }
855
856 #[test]
857 fn analyze_sessions_finds_patch_errors() {
858 let dir = tempfile::tempdir().unwrap();
859 let home = dir.path().to_str().unwrap();
860 let session = [
862 r#"{"role":"user","content":"fix bug"}"#,
863 r#"{"role":"tool","content":"apply_patch error: failed to find match"}"#,
864 r#"{"role":"tool","content":"apply_patch error: invalid hunk"}"#,
865 r#"{"role":"tool","content":"apply_patch error: context mismatch"}"#,
866 r#"{"role":"tool","content":"done"}"#,
867 ];
868 std::fs::write(dir.path().join("session_1000.jsonl"), session.join("\n")).unwrap();
869
870 let patterns = analyze_sessions(home, 10);
871 assert!(
872 patterns.iter().any(|p| p.pattern.contains("apply_patch")),
873 "should find patch errors, got: {:?}",
874 patterns
875 );
876 }
877
878 #[test]
879 fn analyze_sessions_finds_loops() {
880 let dir = tempfile::tempdir().unwrap();
881 let home = dir.path().to_str().unwrap();
882 let mut lines = vec![r#"{"role":"user","content":"task"}"#.to_string()];
883 for _ in 0..5 {
884 lines.push(
885 r#"{"role":"tool","content":"LOOP WARNING: Loop detected — 5 repeats"}"#
886 .to_string(),
887 );
888 }
889 std::fs::write(dir.path().join("session_2000.jsonl"), lines.join("\n")).unwrap();
890
891 let patterns = analyze_sessions(home, 10);
892 assert!(patterns.iter().any(|p| p.pattern.contains("Loop")));
893 }
894
895 #[test]
896 fn evolution_prompt_with_history_includes_patterns() {
897 let dir = tempfile::tempdir().unwrap();
898 let home = dir.path().to_str().unwrap();
899 let session = [
900 r#"{"role":"tool","content":"apply_patch error: x"}"#,
901 r#"{"role":"tool","content":"apply_patch error: y"}"#,
902 r#"{"role":"tool","content":"apply_patch error: z"}"#,
903 ];
904 std::fs::write(dir.path().join("session_3000.jsonl"), session.join("\n")).unwrap();
905
906 let stats = RunStats {
907 steps: 10,
908 tool_errors: 5,
909 completed: false,
910 ..Default::default()
911 };
912 let prompt = evolution_prompt_with_history(&stats, home).unwrap();
913 assert!(prompt.contains("Recurring Issues"));
914 assert!(prompt.contains("apply_patch"));
915 }
916
917 #[test]
920 fn parse_signal_done() {
921 assert_eq!(parse_signal("result <solo:done/>"), SoloSignal::Done);
922 }
923
924 #[test]
925 fn parse_signal_redo() {
926 assert_eq!(parse_signal("needs fix <solo:redo/>"), SoloSignal::Redo);
927 }
928
929 #[test]
930 fn parse_signal_none() {
931 assert_eq!(parse_signal("just text"), SoloSignal::None);
932 }
933
934 #[test]
937 fn control_file_missing() {
938 let dir = tempfile::tempdir().unwrap();
939 let ctrl = dir.path().join("control");
940 assert_eq!(check_control(&ctrl), ControlAction::Continue);
941 }
942
943 #[test]
944 fn control_file_stop() {
945 let dir = tempfile::tempdir().unwrap();
946 let ctrl = dir.path().join("control");
947 std::fs::write(&ctrl, "stop").unwrap();
948 assert_eq!(check_control(&ctrl), ControlAction::Stop);
949 assert!(!ctrl.exists()); }
951
952 #[test]
953 fn control_file_pause() {
954 let dir = tempfile::tempdir().unwrap();
955 let ctrl = dir.path().join("control");
956 std::fs::write(&ctrl, "pause").unwrap();
957 assert_eq!(check_control(&ctrl), ControlAction::Pause);
958 assert!(ctrl.exists()); }
960
961 #[test]
964 fn circuit_breaker_trips_on_consecutive() {
965 let mut cb = CircuitBreaker::new(3);
966 assert!(!cb.record(false, "err1"));
967 assert!(!cb.record(false, "err1"));
968 assert!(cb.record(false, "err1")); }
970
971 #[test]
972 fn circuit_breaker_resets_on_success() {
973 let mut cb = CircuitBreaker::new(3);
974 cb.record(false, "err1");
975 cb.record(false, "err1");
976 cb.record(true, ""); assert_eq!(cb.consecutive_failures(), 0);
978 assert!(!cb.record(false, "err1")); }
980
981 #[test]
982 fn circuit_breaker_resets_on_different_error() {
983 let mut cb = CircuitBreaker::new(3);
984 cb.record(false, "err1");
985 cb.record(false, "err1");
986 assert!(!cb.record(false, "err2")); assert_eq!(cb.consecutive_failures(), 1);
988 }
989
990 #[test]
993 fn loop_state_max_iterations() {
994 let opts = LoopOptions {
995 max_iterations: 3,
996 ..Default::default()
997 };
998 let mut state = LoopState::new(opts);
999 assert!(state.should_stop().is_none());
1000 state.iteration = 3;
1001 assert!(state.should_stop().is_some());
1002 }
1003
1004 #[test]
1005 fn loop_state_summary() {
1006 let mut state = LoopState::new(LoopOptions::default());
1007 state.iteration = 5;
1008 state.keep_count = 3;
1009 state.discard_count = 2;
1010 state.total_score = 4.0;
1011 let s = state.summary();
1012 assert!(s.contains("5 iterations"));
1013 assert!(s.contains("keep:3"));
1014 assert!(s.contains("discard:2"));
1015 }
1016
1017 #[test]
1020 fn score_perfect_run() {
1021 let stats = RunStats {
1022 steps: 5,
1023 successful_calls: 5,
1024 completed: true,
1025 ..Default::default()
1026 };
1027 let s = score(&stats);
1028 assert!(s > 0.9, "perfect run score should be >0.9, got {}", s);
1029 }
1030
1031 #[test]
1032 fn score_zero_steps() {
1033 assert_eq!(score(&RunStats::default()), 0.0);
1034 }
1035
1036 #[test]
1037 fn score_incomplete_penalized() {
1038 let complete = RunStats {
1039 steps: 10,
1040 successful_calls: 8,
1041 completed: true,
1042 ..Default::default()
1043 };
1044 let incomplete = RunStats {
1045 steps: 10,
1046 successful_calls: 8,
1047 completed: false,
1048 ..Default::default()
1049 };
1050 assert!(score(&complete) > score(&incomplete));
1051 }
1052
1053 #[test]
1054 fn score_loops_penalized() {
1055 let clean = RunStats {
1056 steps: 10,
1057 successful_calls: 8,
1058 completed: true,
1059 ..Default::default()
1060 };
1061 let loopy = RunStats {
1062 steps: 10,
1063 successful_calls: 8,
1064 completed: true,
1065 loop_warnings: 5,
1066 ..Default::default()
1067 };
1068 assert!(score(&clean) > score(&loopy));
1069 }
1070
1071 #[test]
1072 fn score_clamped_to_01() {
1073 let stats = RunStats {
1074 steps: 1,
1075 successful_calls: 100, completed: true,
1077 ..Default::default()
1078 };
1079 assert!(score(&stats) <= 1.0);
1080 }
1081
1082 #[test]
1085 fn log_and_load_evolution() {
1086 let dir = tempfile::tempdir().unwrap();
1087 let home = dir.path().to_str().unwrap();
1088
1089 let entry = EvolutionEntry {
1090 ts: "2026-03-14T12:00:00Z".into(),
1091 commit: "abc1234".into(),
1092 title: "test improvement".into(),
1093 score_before: 0.5,
1094 score_after: 0.7,
1095 status: "keep".into(),
1096 stats: RunStats {
1097 steps: 10,
1098 successful_calls: 8,
1099 completed: true,
1100 ..Default::default()
1101 },
1102 };
1103
1104 log_evolution(home, &entry).unwrap();
1105 log_evolution(home, &entry).unwrap();
1106
1107 let history = load_evolution(home);
1108 assert_eq!(history.len(), 2);
1109 assert_eq!(history[0].title, "test improvement");
1110 assert_eq!(history[0].score_after, 0.7);
1111 }
1112
1113 #[test]
1114 fn baseline_score_empty() {
1115 let dir = tempfile::tempdir().unwrap();
1116 assert_eq!(baseline_score(dir.path().to_str().unwrap()), 0.0);
1117 }
1118
1119 #[test]
1120 fn baseline_score_from_history() {
1121 let dir = tempfile::tempdir().unwrap();
1122 let home = dir.path().to_str().unwrap();
1123
1124 log_evolution(
1125 home,
1126 &EvolutionEntry {
1127 ts: "t1".into(),
1128 commit: "a".into(),
1129 title: "first".into(),
1130 score_before: 0.0,
1131 score_after: 0.5,
1132 status: "keep".into(),
1133 stats: Default::default(),
1134 },
1135 )
1136 .unwrap();
1137 log_evolution(
1138 home,
1139 &EvolutionEntry {
1140 ts: "t2".into(),
1141 commit: "b".into(),
1142 title: "second".into(),
1143 score_before: 0.5,
1144 score_after: 0.8,
1145 status: "keep".into(),
1146 stats: Default::default(),
1147 },
1148 )
1149 .unwrap();
1150
1151 assert_eq!(baseline_score(home), 0.8);
1152 }
1153
1154 #[test]
1155 fn evolution_summary_counts() {
1156 let dir = tempfile::tempdir().unwrap();
1157 let home = dir.path().to_str().unwrap();
1158 let base = EvolutionEntry {
1159 ts: "t".into(),
1160 commit: "x".into(),
1161 title: "x".into(),
1162 score_before: 0.0,
1163 score_after: 0.0,
1164 status: "keep".into(),
1165 stats: Default::default(),
1166 };
1167 log_evolution(home, &base).unwrap();
1168 log_evolution(
1169 home,
1170 &EvolutionEntry {
1171 status: "discard".into(),
1172 ..base.clone()
1173 },
1174 )
1175 .unwrap();
1176 log_evolution(
1177 home,
1178 &EvolutionEntry {
1179 status: "crash".into(),
1180 ..base.clone()
1181 },
1182 )
1183 .unwrap();
1184 log_evolution(home, &base).unwrap();
1185
1186 let (keep, discard, crash) = evolution_summary(home);
1187 assert_eq!(keep, 2);
1188 assert_eq!(discard, 1);
1189 assert_eq!(crash, 1);
1190 }
1191}