1use std::fs::{self, OpenOptions};
4use std::io::{BufRead, BufReader, Write};
5use std::path::{Path, PathBuf};
6use std::time::Instant;
7
8use anyhow::{Context, Result, bail};
9use serde::{Deserialize, Serialize};
10
11use super::parity::ParityReport;
12
13const RESEARCH_DIR: &str = "research";
14const CURRENT_MISSION_FILE: &str = "current.json";
15const MISSION_STATE_FILE: &str = "mission.json";
16const LEDGER_FILE: &str = "ledger.jsonl";
17
18#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
19#[serde(rename_all = "snake_case")]
20pub enum EvaluatorFormat {
21 Json,
22 ExitCode,
23}
24
25#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
26#[serde(rename_all = "snake_case")]
27pub enum KeepPolicy {
28 PassOnly,
29 ScoreImprovement,
30 ParityImprovement,
31}
32
33#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
34pub struct EvaluationResult {
35 pub pass: bool,
36 pub score: Option<f64>,
37 pub parity_pct: Option<u32>,
38 pub exit_code: i32,
39 pub stdout: String,
40 pub stderr: String,
41 pub duration_secs: f64,
42}
43
44#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
45#[serde(rename_all = "snake_case")]
46pub enum ResearchDecision {
47 Baseline,
48 Keep,
49 Discard,
50 Error,
51}
52
53#[derive(Debug, Clone, Serialize, Deserialize)]
54pub struct ResearchMission {
55 pub id: String,
56 pub hypothesis: String,
57 pub evaluator_command: String,
58 pub evaluator_format: EvaluatorFormat,
59 pub keep_policy: KeepPolicy,
60 pub max_iterations: u32,
61 pub worktree_dir: PathBuf,
62 pub baseline: Option<EvaluationResult>,
63}
64
65#[derive(Debug, Clone, Serialize, Deserialize)]
66pub struct LedgerEntry {
67 pub iteration: u32,
68 pub decision: ResearchDecision,
69 pub evaluation: EvaluationResult,
70 pub commit: String,
71 pub timestamp: chrono::DateTime<chrono::Utc>,
72}
73
74#[derive(Debug, Clone)]
75pub struct ResearchLedger {
76 pub entries: Vec<LedgerEntry>,
77 pub path: PathBuf,
78}
79
80#[derive(Debug, Clone, Serialize, Deserialize)]
81struct MissionPointer {
82 mission_id: String,
83}
84
85#[derive(Debug, Clone)]
86pub struct ResearchStatus {
87 pub mission: ResearchMission,
88 pub latest_entry: Option<LedgerEntry>,
89}
90
91impl ResearchLedger {
92 pub fn load(path: PathBuf) -> Result<Self> {
93 if !path.exists() {
94 return Ok(Self {
95 entries: Vec::new(),
96 path,
97 });
98 }
99
100 let file =
101 fs::File::open(&path).with_context(|| format!("failed to open {}", path.display()))?;
102 let reader = BufReader::new(file);
103 let mut entries = Vec::new();
104 for line in reader.lines() {
105 let line = line.with_context(|| format!("failed to read {}", path.display()))?;
106 let trimmed = line.trim();
107 if trimmed.is_empty() {
108 continue;
109 }
110 entries.push(
111 serde_json::from_str(trimmed)
112 .with_context(|| format!("failed to parse {}", path.display()))?,
113 );
114 }
115 Ok(Self { entries, path })
116 }
117
118 pub fn record(&mut self, entry: LedgerEntry) -> Result<()> {
119 if let Some(parent) = self.path.parent() {
120 fs::create_dir_all(parent)
121 .with_context(|| format!("failed to create {}", parent.display()))?;
122 }
123 let mut file = OpenOptions::new()
124 .create(true)
125 .append(true)
126 .open(&self.path)
127 .with_context(|| format!("failed to open {}", self.path.display()))?;
128 serde_json::to_writer(&mut file, &entry)
129 .with_context(|| format!("failed to write {}", self.path.display()))?;
130 writeln!(file).with_context(|| format!("failed to write {}", self.path.display()))?;
131 self.entries.push(entry);
132 Ok(())
133 }
134
135 pub fn len(&self) -> usize {
136 self.entries.len()
137 }
138
139 pub fn is_empty(&self) -> bool {
140 self.entries.is_empty()
141 }
142
143 pub fn last_kept_commit(&self) -> Option<&str> {
144 self.entries
145 .iter()
146 .rev()
147 .find(|entry| {
148 matches!(
149 entry.decision,
150 ResearchDecision::Baseline | ResearchDecision::Keep
151 )
152 })
153 .map(|entry| entry.commit.as_str())
154 }
155
156 pub fn latest(&self) -> Option<&LedgerEntry> {
157 self.entries.last()
158 }
159}
160
161#[derive(Debug, Clone)]
162pub struct StartResearchOptions {
163 pub hypothesis: String,
164 pub evaluator_command: String,
165 pub evaluator_format: EvaluatorFormat,
166 pub keep_policy: KeepPolicy,
167 pub max_iterations: u32,
168 pub worktree_dir: PathBuf,
169}
170
171pub fn start_research(
172 project_root: &Path,
173 options: StartResearchOptions,
174) -> Result<ResearchMission> {
175 if !options.worktree_dir.exists() {
176 bail!(
177 "research worktree does not exist: {}",
178 options.worktree_dir.display()
179 );
180 }
181
182 let mission_id = mission_id(&options.hypothesis);
183 let mission_dir = mission_dir(project_root, &mission_id);
184 fs::create_dir_all(&mission_dir)
185 .with_context(|| format!("failed to create {}", mission_dir.display()))?;
186
187 let mut mission = ResearchMission {
188 id: mission_id.clone(),
189 hypothesis: options.hypothesis,
190 evaluator_command: options.evaluator_command,
191 evaluator_format: options.evaluator_format,
192 keep_policy: options.keep_policy,
193 max_iterations: options.max_iterations.max(1),
194 worktree_dir: options.worktree_dir,
195 baseline: None,
196 };
197
198 let baseline = run_evaluator(
199 &mission.evaluator_command,
200 &mission.worktree_dir,
201 &mission.evaluator_format,
202 )?;
203 mission.baseline = Some(baseline.clone());
204 let mut ledger = ResearchLedger::load(ledger_path(project_root, &mission.id))?;
205 ledger.record(LedgerEntry {
206 iteration: 0,
207 decision: ResearchDecision::Baseline,
208 evaluation: baseline,
209 commit: git_head(&mission.worktree_dir)?,
210 timestamp: chrono::Utc::now(),
211 })?;
212 save_mission(project_root, &mission)?;
213 set_current_mission(project_root, &mission.id)?;
214 Ok(mission)
215}
216
217pub fn run_research_iteration(
218 mission: &mut ResearchMission,
219 ledger: &mut ResearchLedger,
220) -> Result<ResearchDecision> {
221 let result = run_evaluator(
222 &mission.evaluator_command,
223 &mission.worktree_dir,
224 &mission.evaluator_format,
225 )?;
226
227 let decision = match mission.keep_policy {
228 KeepPolicy::PassOnly => {
229 if result.pass {
230 ResearchDecision::Keep
231 } else {
232 ResearchDecision::Discard
233 }
234 }
235 KeepPolicy::ScoreImprovement => {
236 let baseline_score = mission
237 .baseline
238 .as_ref()
239 .and_then(|baseline| baseline.score)
240 .unwrap_or(0.0);
241 if result.score.unwrap_or(0.0) > baseline_score {
242 ResearchDecision::Keep
243 } else {
244 ResearchDecision::Discard
245 }
246 }
247 KeepPolicy::ParityImprovement => {
248 let baseline_parity = mission
249 .baseline
250 .as_ref()
251 .and_then(|baseline| baseline.parity_pct)
252 .unwrap_or(0);
253 if result.parity_pct.unwrap_or(0) > baseline_parity {
254 ResearchDecision::Keep
255 } else {
256 ResearchDecision::Discard
257 }
258 }
259 };
260
261 match decision {
262 ResearchDecision::Keep => {
263 git_commit_all(
264 &mission.worktree_dir,
265 &format!("research: iteration {}", ledger.len()),
266 )?;
267 mission.baseline = Some(result.clone());
268 }
269 ResearchDecision::Discard => {
270 let Some(commit) = ledger.last_kept_commit() else {
271 bail!("cannot discard without a kept or baseline commit");
272 };
273 git_reset_hard(&mission.worktree_dir, commit)?;
274 }
275 ResearchDecision::Baseline | ResearchDecision::Error => {}
276 }
277
278 let commit = git_head(&mission.worktree_dir)?;
279 ledger.record(LedgerEntry {
280 iteration: ledger.len() as u32,
281 decision: decision.clone(),
282 evaluation: result,
283 commit,
284 timestamp: chrono::Utc::now(),
285 })?;
286
287 Ok(decision)
288}
289
290pub fn current_status(project_root: &Path) -> Result<Option<ResearchStatus>> {
291 let Some(mission) = load_current_mission(project_root)? else {
292 return Ok(None);
293 };
294 let ledger = ResearchLedger::load(ledger_path(project_root, &mission.id))?;
295 Ok(Some(ResearchStatus {
296 mission,
297 latest_entry: ledger.latest().cloned(),
298 }))
299}
300
301pub fn read_current_ledger(project_root: &Path) -> Result<Option<ResearchLedger>> {
302 let Some(mission) = load_current_mission(project_root)? else {
303 return Ok(None);
304 };
305 Ok(Some(ResearchLedger::load(ledger_path(
306 project_root,
307 &mission.id,
308 ))?))
309}
310
311pub fn stop_current_research(project_root: &Path) -> Result<Option<ResearchMission>> {
312 let mission = load_current_mission(project_root)?;
313 let path = current_mission_path(project_root);
314 if path.exists() {
315 fs::remove_file(&path).with_context(|| format!("failed to remove {}", path.display()))?;
316 }
317 Ok(mission)
318}
319
320pub fn print_status(project_root: &Path) -> Result<()> {
321 let Some(status) = current_status(project_root)? else {
322 println!("No active research mission.");
323 return Ok(());
324 };
325 println!("Mission: {}", status.mission.id);
326 println!("Hypothesis: {}", status.mission.hypothesis);
327 println!("Worktree: {}", status.mission.worktree_dir.display());
328 println!(
329 "Keep policy: {}",
330 keep_policy_name(&status.mission.keep_policy)
331 );
332 println!(
333 "Baseline: {}",
334 status
335 .mission
336 .baseline
337 .as_ref()
338 .map(summary_line)
339 .unwrap_or_else(|| "none".to_string())
340 );
341 if let Some(entry) = status.latest_entry {
342 println!(
343 "Latest: iteration={} decision={} commit={} {}",
344 entry.iteration,
345 decision_name(&entry.decision),
346 entry.commit,
347 summary_line(&entry.evaluation)
348 );
349 }
350 Ok(())
351}
352
353pub fn print_ledger(project_root: &Path) -> Result<()> {
354 let Some(ledger) = read_current_ledger(project_root)? else {
355 println!("No active research mission.");
356 return Ok(());
357 };
358 println!("iteration commit pass score parity decision");
359 for entry in ledger.entries {
360 println!(
361 "{:<10} {:<8} {:<5} {:<6} {:<7} {}",
362 entry.iteration,
363 shorten_commit(&entry.commit),
364 entry.evaluation.pass,
365 entry
366 .evaluation
367 .score
368 .map(|score| format!("{score:.2}"))
369 .unwrap_or_else(|| "-".to_string()),
370 entry
371 .evaluation
372 .parity_pct
373 .map(|pct| format!("{pct}%"))
374 .unwrap_or_else(|| "-".to_string()),
375 decision_name(&entry.decision),
376 );
377 }
378 Ok(())
379}
380
381fn summary_line(result: &EvaluationResult) -> String {
382 format!(
383 "pass={} score={} parity={} exit={}",
384 result.pass,
385 result
386 .score
387 .map(|score| format!("{score:.2}"))
388 .unwrap_or_else(|| "-".to_string()),
389 result
390 .parity_pct
391 .map(|pct| format!("{pct}%"))
392 .unwrap_or_else(|| "-".to_string()),
393 result.exit_code
394 )
395}
396
397fn run_evaluator(
398 command: &str,
399 worktree_dir: &Path,
400 format: &EvaluatorFormat,
401) -> Result<EvaluationResult> {
402 let started = Instant::now();
403 let output = std::process::Command::new("sh")
404 .args(["-lc", command])
405 .current_dir(worktree_dir)
406 .output()
407 .with_context(|| {
408 format!(
409 "failed to execute evaluator `{command}` in {}",
410 worktree_dir.display()
411 )
412 })?;
413 let duration_secs = started.elapsed().as_secs_f64();
414 let stdout = String::from_utf8_lossy(&output.stdout).to_string();
415 let stderr = String::from_utf8_lossy(&output.stderr).to_string();
416 let exit_code = output.status.code().unwrap_or(-1);
417
418 match format {
419 EvaluatorFormat::Json => {
420 #[derive(Deserialize)]
421 struct JsonEvaluation {
422 pass: Option<bool>,
423 score: Option<f64>,
424 parity_pct: Option<u32>,
425 }
426
427 let parsed: JsonEvaluation = serde_json::from_str(stdout.trim())
428 .with_context(|| format!("failed to parse evaluator JSON from `{command}`"))?;
429 Ok(EvaluationResult {
430 pass: parsed.pass.unwrap_or(output.status.success()),
431 score: parsed.score,
432 parity_pct: parsed
433 .parity_pct
434 .or_else(|| current_parity_pct(worktree_dir)),
435 exit_code,
436 stdout,
437 stderr,
438 duration_secs,
439 })
440 }
441 EvaluatorFormat::ExitCode => Ok(EvaluationResult {
442 pass: output.status.success(),
443 score: None,
444 parity_pct: current_parity_pct(worktree_dir),
445 exit_code,
446 stdout,
447 stderr,
448 duration_secs,
449 }),
450 }
451}
452
453fn current_parity_pct(project_root: &Path) -> Option<u32> {
454 ParityReport::load(project_root)
455 .ok()
456 .map(|report| report.summary().overall_parity_pct as u32)
457}
458
459fn git_head(worktree_dir: &Path) -> Result<String> {
460 let output = std::process::Command::new("git")
461 .args(["rev-parse", "HEAD"])
462 .current_dir(worktree_dir)
463 .output()
464 .with_context(|| format!("failed to read HEAD in {}", worktree_dir.display()))?;
465 if !output.status.success() {
466 bail!("failed to read HEAD in {}", worktree_dir.display());
467 }
468 Ok(String::from_utf8_lossy(&output.stdout).trim().to_string())
469}
470
471fn git_commit_all(worktree_dir: &Path, message: &str) -> Result<()> {
472 let add = std::process::Command::new("git")
473 .args(["add", "-A"])
474 .current_dir(worktree_dir)
475 .status()
476 .with_context(|| format!("failed to stage worktree {}", worktree_dir.display()))?;
477 if !add.success() {
478 bail!("failed to stage worktree {}", worktree_dir.display());
479 }
480
481 let commit = std::process::Command::new("git")
482 .args(["commit", "-m", message])
483 .current_dir(worktree_dir)
484 .status()
485 .with_context(|| format!("failed to commit worktree {}", worktree_dir.display()))?;
486 if !commit.success() {
487 bail!("failed to commit worktree {}", worktree_dir.display());
488 }
489 Ok(())
490}
491
492fn git_reset_hard(worktree_dir: &Path, target: &str) -> Result<()> {
493 let status = std::process::Command::new("git")
494 .args(["reset", "--hard", target])
495 .current_dir(worktree_dir)
496 .status()
497 .with_context(|| format!("failed to reset {}", worktree_dir.display()))?;
498 if !status.success() {
499 bail!("failed to reset {} to {}", worktree_dir.display(), target);
500 }
501 Ok(())
502}
503
504fn mission_dir(project_root: &Path, mission_id: &str) -> PathBuf {
505 project_root
506 .join(".batty")
507 .join(RESEARCH_DIR)
508 .join(mission_id)
509}
510
511fn ledger_path(project_root: &Path, mission_id: &str) -> PathBuf {
512 mission_dir(project_root, mission_id).join(LEDGER_FILE)
513}
514
515fn mission_state_path(project_root: &Path, mission_id: &str) -> PathBuf {
516 mission_dir(project_root, mission_id).join(MISSION_STATE_FILE)
517}
518
519fn current_mission_path(project_root: &Path) -> PathBuf {
520 project_root
521 .join(".batty")
522 .join(RESEARCH_DIR)
523 .join(CURRENT_MISSION_FILE)
524}
525
526fn save_mission(project_root: &Path, mission: &ResearchMission) -> Result<()> {
527 let path = mission_state_path(project_root, &mission.id);
528 if let Some(parent) = path.parent() {
529 fs::create_dir_all(parent)
530 .with_context(|| format!("failed to create {}", parent.display()))?;
531 }
532 let content = serde_json::to_vec_pretty(mission)
533 .with_context(|| format!("failed to serialize {}", mission.id))?;
534 fs::write(&path, content).with_context(|| format!("failed to write {}", path.display()))?;
535 Ok(())
536}
537
538fn load_current_mission(project_root: &Path) -> Result<Option<ResearchMission>> {
539 let current = current_mission_path(project_root);
540 if !current.exists() {
541 return Ok(None);
542 }
543 let pointer: MissionPointer = serde_json::from_slice(
544 &fs::read(¤t).with_context(|| format!("failed to read {}", current.display()))?,
545 )
546 .with_context(|| format!("failed to parse {}", current.display()))?;
547 let state_path = mission_state_path(project_root, &pointer.mission_id);
548 let mission = serde_json::from_slice(
549 &fs::read(&state_path)
550 .with_context(|| format!("failed to read {}", state_path.display()))?,
551 )
552 .with_context(|| format!("failed to parse {}", state_path.display()))?;
553 Ok(Some(mission))
554}
555
556fn set_current_mission(project_root: &Path, mission_id: &str) -> Result<()> {
557 let path = current_mission_path(project_root);
558 if let Some(parent) = path.parent() {
559 fs::create_dir_all(parent)
560 .with_context(|| format!("failed to create {}", parent.display()))?;
561 }
562 let content = serde_json::to_vec_pretty(&MissionPointer {
563 mission_id: mission_id.to_string(),
564 })?;
565 fs::write(&path, content).with_context(|| format!("failed to write {}", path.display()))?;
566 Ok(())
567}
568
569fn mission_id(hypothesis: &str) -> String {
570 let slug: String = hypothesis
571 .chars()
572 .map(|ch| {
573 if ch.is_ascii_alphanumeric() {
574 ch.to_ascii_lowercase()
575 } else {
576 '-'
577 }
578 })
579 .collect();
580 let compact = slug
581 .split('-')
582 .filter(|part| !part.is_empty())
583 .take(6)
584 .collect::<Vec<_>>()
585 .join("-");
586 format!("{}-{}", compact, chrono::Utc::now().timestamp())
587}
588
589fn shorten_commit(commit: &str) -> String {
590 commit.chars().take(7).collect()
591}
592
593fn decision_name(decision: &ResearchDecision) -> &'static str {
594 match decision {
595 ResearchDecision::Baseline => "baseline",
596 ResearchDecision::Keep => "keep",
597 ResearchDecision::Discard => "discard",
598 ResearchDecision::Error => "error",
599 }
600}
601
602fn keep_policy_name(policy: &KeepPolicy) -> &'static str {
603 match policy {
604 KeepPolicy::PassOnly => "pass-only",
605 KeepPolicy::ScoreImprovement => "score-improvement",
606 KeepPolicy::ParityImprovement => "parity-improvement",
607 }
608}
609
610#[cfg(test)]
611mod tests {
612 use super::*;
613
614 fn git(dir: &Path, args: &[&str]) {
615 let status = std::process::Command::new("git")
616 .args(args)
617 .current_dir(dir)
618 .status()
619 .unwrap();
620 assert!(status.success(), "git {:?} failed", args);
621 }
622
623 fn repo_with_file() -> tempfile::TempDir {
624 let tmp = tempfile::tempdir().unwrap();
625 git(tmp.path(), &["init"]);
626 git(tmp.path(), &["config", "user.email", "test@example.com"]);
627 git(tmp.path(), &["config", "user.name", "Test User"]);
628 fs::write(tmp.path().join("note.txt"), "baseline\n").unwrap();
629 git(tmp.path(), &["add", "note.txt"]);
630 git(tmp.path(), &["commit", "-m", "baseline"]);
631 tmp
632 }
633
634 fn baseline_result(
635 score: Option<f64>,
636 parity_pct: Option<u32>,
637 pass: bool,
638 ) -> EvaluationResult {
639 EvaluationResult {
640 pass,
641 score,
642 parity_pct,
643 exit_code: if pass { 0 } else { 1 },
644 stdout: String::new(),
645 stderr: String::new(),
646 duration_secs: 0.0,
647 }
648 }
649
650 #[test]
651 fn pass_only_policy_discards_failures() {
652 let tmp = repo_with_file();
653 let baseline_commit = git_head(tmp.path()).unwrap();
654 fs::write(tmp.path().join("note.txt"), "candidate\n").unwrap();
655 let mut mission = ResearchMission {
656 id: "mission".to_string(),
657 hypothesis: "pass only".to_string(),
658 evaluator_command: "printf '{\"pass\":false}' && exit 1".to_string(),
659 evaluator_format: EvaluatorFormat::Json,
660 keep_policy: KeepPolicy::PassOnly,
661 max_iterations: 3,
662 worktree_dir: tmp.path().to_path_buf(),
663 baseline: Some(baseline_result(None, None, true)),
664 };
665 let mut ledger = ResearchLedger {
666 entries: vec![LedgerEntry {
667 iteration: 0,
668 decision: ResearchDecision::Baseline,
669 evaluation: baseline_result(None, None, true),
670 commit: baseline_commit.clone(),
671 timestamp: chrono::Utc::now(),
672 }],
673 path: tmp.path().join("ledger.jsonl"),
674 };
675
676 let decision = run_research_iteration(&mut mission, &mut ledger).unwrap();
677 assert_eq!(decision, ResearchDecision::Discard);
678 assert_eq!(git_head(tmp.path()).unwrap(), baseline_commit);
679 }
680
681 #[test]
682 fn start_research_records_baseline_entry() {
683 let root = tempfile::tempdir().unwrap();
684 let worktree = repo_with_file();
685
686 let mission = start_research(
687 root.path(),
688 StartResearchOptions {
689 hypothesis: "record baseline".to_string(),
690 evaluator_command: "printf '{\"pass\":true,\"score\":1.0}'".to_string(),
691 evaluator_format: EvaluatorFormat::Json,
692 keep_policy: KeepPolicy::ScoreImprovement,
693 max_iterations: 10,
694 worktree_dir: worktree.path().to_path_buf(),
695 },
696 )
697 .unwrap();
698
699 let current = current_status(root.path()).unwrap().unwrap();
700 assert_eq!(current.mission.id, mission.id);
701 assert_eq!(
702 current
703 .latest_entry
704 .as_ref()
705 .map(|entry| entry.decision.clone()),
706 Some(ResearchDecision::Baseline)
707 );
708
709 let ledger = read_current_ledger(root.path()).unwrap().unwrap();
710 assert_eq!(ledger.entries.len(), 1);
711 assert_eq!(ledger.entries[0].evaluation.score, Some(1.0));
712 }
713
714 #[test]
715 fn score_improvement_keeps_and_commits() {
716 let tmp = repo_with_file();
717 fs::write(tmp.path().join("note.txt"), "candidate\n").unwrap();
718 let mut mission = ResearchMission {
719 id: "mission".to_string(),
720 hypothesis: "improve score".to_string(),
721 evaluator_command: "printf '{\"pass\":true,\"score\":2.0}'".to_string(),
722 evaluator_format: EvaluatorFormat::Json,
723 keep_policy: KeepPolicy::ScoreImprovement,
724 max_iterations: 3,
725 worktree_dir: tmp.path().to_path_buf(),
726 baseline: Some(baseline_result(Some(1.0), None, true)),
727 };
728 let mut ledger = ResearchLedger {
729 entries: vec![LedgerEntry {
730 iteration: 0,
731 decision: ResearchDecision::Baseline,
732 evaluation: baseline_result(Some(1.0), None, true),
733 commit: git_head(tmp.path()).unwrap(),
734 timestamp: chrono::Utc::now(),
735 }],
736 path: tmp.path().join("ledger.jsonl"),
737 };
738
739 let decision = run_research_iteration(&mut mission, &mut ledger).unwrap();
740 assert_eq!(decision, ResearchDecision::Keep);
741 assert_eq!(
742 mission.baseline.as_ref().and_then(|result| result.score),
743 Some(2.0)
744 );
745 assert_eq!(ledger.entries.len(), 2);
746 }
747
748 #[test]
749 fn discard_resets_to_last_kept_commit() {
750 let tmp = repo_with_file();
751 let baseline_commit = git_head(tmp.path()).unwrap();
752 fs::write(tmp.path().join("note.txt"), "bad candidate\n").unwrap();
753 let mut mission = ResearchMission {
754 id: "mission".to_string(),
755 hypothesis: "avoid regression".to_string(),
756 evaluator_command: "printf '{\"pass\":true,\"score\":1.0}'".to_string(),
757 evaluator_format: EvaluatorFormat::Json,
758 keep_policy: KeepPolicy::ScoreImprovement,
759 max_iterations: 3,
760 worktree_dir: tmp.path().to_path_buf(),
761 baseline: Some(baseline_result(Some(2.0), None, true)),
762 };
763 let mut ledger = ResearchLedger {
764 entries: vec![LedgerEntry {
765 iteration: 0,
766 decision: ResearchDecision::Baseline,
767 evaluation: baseline_result(Some(2.0), None, true),
768 commit: baseline_commit.clone(),
769 timestamp: chrono::Utc::now(),
770 }],
771 path: tmp.path().join("ledger.jsonl"),
772 };
773
774 let decision = run_research_iteration(&mut mission, &mut ledger).unwrap();
775 assert_eq!(decision, ResearchDecision::Discard);
776 assert_eq!(
777 fs::read_to_string(tmp.path().join("note.txt")).unwrap(),
778 "baseline\n"
779 );
780 assert_eq!(git_head(tmp.path()).unwrap(), baseline_commit);
781 }
782
783 #[test]
784 fn parity_improvement_uses_parity_report() {
785 let tmp = repo_with_file();
786 fs::write(
787 tmp.path().join("PARITY.md"),
788 concat!(
789 "---\n",
790 "project: trivial\n",
791 "target: trivial.z80\n",
792 "source_platform: zx-spectrum-z80\n",
793 "target_language: rust\n",
794 "last_verified: 2026-04-06\n",
795 "overall_parity: 50%\n",
796 "---\n\n",
797 "| Behavior | Spec | Test | Implementation | Verified | Notes |\n",
798 "| --- | --- | --- | --- | --- | --- |\n",
799 "| Startup | complete | complete | complete | PASS | ok |\n",
800 "| Errors | complete | complete | draft | -- | pending |\n",
801 ),
802 )
803 .unwrap();
804
805 let result = run_evaluator("true", tmp.path(), &EvaluatorFormat::ExitCode).unwrap();
806 assert_eq!(result.parity_pct, Some(50));
807 }
808}