1use std::{
4 fs,
5 io::{self, Write},
6 path::{Path, PathBuf},
7 process::{Command, ExitCode, Stdio},
8 time::{SystemTime, UNIX_EPOCH},
9};
10
11use anyhow::{Context, Result};
12use serde::{Deserialize, Serialize};
13use thiserror::Error;
14
15use crate::{
16 claim::{Claim, EvidenceRef},
17 cli::{self, Agent, ReviewerHarness},
18 config,
19 ledger::{LedgerEntry, LedgerStore, ReviewerConfig, Verdict},
20};
21
22pub const REVIEW_QUEUE_FILE: &str = "review-queue.jsonl";
23
24#[derive(Clone, Debug, Eq, PartialEq)]
25pub struct ReviewRequest {
26 pub watched_agent: Agent,
27 pub watched_model: String,
28 pub reviewer_harness: ReviewerHarness,
29 pub reviewer_model: String,
30 pub allow_same_model: bool,
31 pub prompt: String,
32}
33
34impl ReviewRequest {
35 pub fn new(
36 watched_agent: Agent,
37 watched_model: impl Into<String>,
38 reviewer_harness: ReviewerHarness,
39 reviewer_model: impl Into<String>,
40 allow_same_model: bool,
41 prompt: impl Into<String>,
42 ) -> Self {
43 Self {
44 watched_agent,
45 watched_model: watched_model.into(),
46 reviewer_harness,
47 reviewer_model: reviewer_model.into(),
48 allow_same_model,
49 prompt: prompt.into(),
50 }
51 }
52}
53
54#[derive(Clone, Debug, Eq, PartialEq)]
57pub struct ReviewSelection {
58 pub watched_agent: Agent,
59 pub watched_model: String,
60 pub reviewer_harness: ReviewerHarness,
61 pub reviewer_model: String,
62 pub allow_same_model: bool,
63 pub strict: Option<StrictReviewConfig>,
64}
65
66impl ReviewSelection {
67 #[allow(clippy::too_many_arguments)]
68 pub fn resolve(
69 watched_agent: Option<Agent>,
70 reviewer_harness: Option<ReviewerHarness>,
71 watched_model: Option<String>,
72 reviewer_model: Option<String>,
73 allow_same_model: bool,
74 strict: Option<StrictReviewConfig>,
75 config: &config::TruthMirrorConfig,
76 ) -> Result<Self, ReviewerError> {
77 let watched_agent = match watched_agent {
78 Some(agent) => agent,
79 None => agent_from_slug(&config.review.watched.harness)?,
80 };
81 let reviewer_harness = match reviewer_harness {
82 Some(harness) => harness,
83 None => harness_from_slug(&config.review.reviewer.harness)?,
84 };
85 let watched_model = watched_model.unwrap_or_else(|| config.review.watched.model.clone());
86 let reviewer_model = reviewer_model.unwrap_or_else(|| config.review.reviewer.model.clone());
87 Ok(Self {
88 watched_agent,
89 watched_model,
90 reviewer_harness,
91 reviewer_model,
92 allow_same_model,
93 strict,
94 })
95 }
96
97 fn request_for(&self, prompt: String) -> ReviewRequest {
98 ReviewRequest::new(
99 self.watched_agent,
100 self.watched_model.clone(),
101 self.reviewer_harness,
102 self.reviewer_model.clone(),
103 self.allow_same_model,
104 prompt,
105 )
106 }
107}
108
109#[derive(Clone, Debug, Eq, PartialEq)]
110pub struct ReviewPlan {
111 pub watched_agent: Agent,
112 pub watched_model: String,
113 pub reviewer_harness: ReviewerHarness,
114 pub reviewer_model: String,
115 pub allow_same_model: bool,
116 pub invocation: InvocationPlan,
117}
118
119impl ReviewPlan {
120 pub fn build(request: ReviewRequest) -> Result<Self, ReviewerError> {
121 validate_model_present("watched", &request.watched_model)?;
122 validate_model_present("reviewer", &request.reviewer_model)?;
123
124 if !request.allow_same_model
125 && normalized_model(&request.watched_model) == normalized_model(&request.reviewer_model)
126 {
127 return Err(ReviewerError::SameModelWithoutWaiver {
128 watched_model: request.watched_model,
129 reviewer_model: request.reviewer_model,
130 });
131 }
132
133 let invocation =
134 InvocationPlan::for_harness(request.reviewer_harness, &request.reviewer_model)?;
135
136 Ok(Self {
137 watched_agent: request.watched_agent,
138 watched_model: request.watched_model,
139 reviewer_harness: request.reviewer_harness,
140 reviewer_model: request.reviewer_model,
141 allow_same_model: request.allow_same_model,
142 invocation,
143 })
144 }
145
146 pub fn run_with<R: ProcessRunner>(
147 &self,
148 prompt: &str,
149 runner: &R,
150 ) -> Result<ProcessOutput, ReviewerError> {
151 runner.run(&self.invocation, prompt)
152 }
153
154 fn reviewer_config(&self) -> ReviewerConfig {
155 ReviewerConfig::new(
156 harness_slug(self.reviewer_harness),
157 self.reviewer_model.clone(),
158 self.allow_same_model,
159 )
160 }
161}
162
163#[derive(Clone, Debug, Eq, PartialEq)]
164pub struct InvocationPlan {
165 pub program: String,
166 pub args: Vec<String>,
167 pub prompt_delivery: PromptDelivery,
168}
169
170impl InvocationPlan {
171 pub fn for_harness(harness: ReviewerHarness, model: &str) -> Result<Self, ReviewerError> {
172 validate_model_present("reviewer", model)?;
173 let model = model.trim();
174
175 let plan = match harness {
176 ReviewerHarness::Claude => Self {
177 program: "claude".to_owned(),
178 args: vec!["--print".to_owned(), "--model".to_owned(), model.to_owned()],
179 prompt_delivery: PromptDelivery::Stdin,
180 },
181 ReviewerHarness::Codex => Self {
182 program: "codex".to_owned(),
183 args: vec!["exec".to_owned(), "-m".to_owned(), model.to_owned()],
184 prompt_delivery: PromptDelivery::PositionalArgument,
185 },
186 ReviewerHarness::Pi => Self {
187 program: "pi".to_owned(),
188 args: vec!["--model".to_owned(), model.to_owned(), "-p".to_owned()],
189 prompt_delivery: PromptDelivery::Stdin,
190 },
191 ReviewerHarness::Gemini => Self {
192 program: "gemini".to_owned(),
193 args: vec!["-m".to_owned(), model.to_owned()],
194 prompt_delivery: PromptDelivery::FlagValue("-p".to_owned()),
195 },
196 ReviewerHarness::Opencode => Self {
197 program: "opencode".to_owned(),
198 args: vec!["run".to_owned(), "--model".to_owned(), model.to_owned()],
199 prompt_delivery: PromptDelivery::PositionalArgument,
200 },
201 ReviewerHarness::Custom => return Err(ReviewerError::UnsupportedCustomHarness),
202 };
203
204 Ok(plan)
205 }
206
207 pub fn args_for_prompt(&self, prompt: &str) -> Vec<String> {
208 let mut args = self.args.clone();
209 match &self.prompt_delivery {
210 PromptDelivery::Stdin => {}
211 PromptDelivery::PositionalArgument => args.push(prompt.to_owned()),
212 PromptDelivery::FlagValue(flag) => {
213 args.push(flag.clone());
214 args.push(prompt.to_owned());
215 }
216 }
217 args
218 }
219}
220
221#[derive(Clone, Debug, Eq, PartialEq)]
222pub enum PromptDelivery {
223 Stdin,
224 PositionalArgument,
225 FlagValue(String),
226}
227
228#[derive(Clone, Debug, Eq, PartialEq)]
229pub struct ProcessOutput {
230 pub status_code: Option<i32>,
231 pub stdout: String,
232 pub stderr: String,
233}
234
235pub trait ProcessRunner {
236 fn run(
237 &self,
238 invocation: &InvocationPlan,
239 prompt: &str,
240 ) -> Result<ProcessOutput, ReviewerError>;
241}
242
243#[derive(Clone, Copy, Debug, Default)]
244pub struct StdProcessRunner;
245
246impl ProcessRunner for StdProcessRunner {
247 fn run(
248 &self,
249 invocation: &InvocationPlan,
250 prompt: &str,
251 ) -> Result<ProcessOutput, ReviewerError> {
252 let mut command = Command::new(&invocation.program);
253 command.args(invocation.args_for_prompt(prompt));
254 command.stdout(Stdio::piped()).stderr(Stdio::piped());
255
256 if invocation.prompt_delivery == PromptDelivery::Stdin {
257 command.stdin(Stdio::piped());
258 }
259
260 let mut child = command.spawn().map_err(ReviewerError::Spawn)?;
261 if invocation.prompt_delivery == PromptDelivery::Stdin {
262 let mut stdin = child.stdin.take().ok_or(ReviewerError::MissingStdinPipe)?;
263 stdin
264 .write_all(prompt.as_bytes())
265 .map_err(ReviewerError::WritePrompt)?;
266 }
267
268 let output = child.wait_with_output().map_err(ReviewerError::Wait)?;
269 Ok(ProcessOutput {
270 status_code: output.status.code(),
271 stdout: String::from_utf8_lossy(&output.stdout).into_owned(),
272 stderr: String::from_utf8_lossy(&output.stderr).into_owned(),
273 })
274 }
275}
276
277#[derive(Clone, Debug, Eq, PartialEq)]
278pub struct ReviewJob {
279 pub commit_sha: String,
280 pub claim: Claim,
281 pub diff: String,
282 pub request: ReviewRequest,
283 pub strict: Option<StrictReviewConfig>,
284}
285
286#[derive(Clone, Debug, Eq, PartialEq)]
287pub struct StrictReviewConfig {
288 pub arbiter_harness: ReviewerHarness,
289 pub arbiter_model: String,
290}
291
292#[derive(Clone, Debug, Eq, PartialEq)]
293pub struct ReviewExecution {
294 pub entries: Vec<LedgerEntry>,
295}
296
297pub fn execute_review_job<R: ProcessRunner>(
298 job: ReviewJob,
299 runner: &R,
300 store: &LedgerStore,
301) -> Result<ReviewExecution, ReviewerError> {
302 let first_plan = ReviewPlan::build(job.request.clone())?;
303 let first_output = first_plan.run_with(&job.request.prompt, runner)?;
304 ensure_process_success(&first_output)?;
305 let first_verdict = ParsedVerdict::parse(&first_output.stdout)?;
306 let first_entry = entry_from_verdict(&job, &first_plan, &first_verdict);
307 store.append_entry(&first_entry)?;
308
309 let mut entries = vec![first_entry];
310 if let Some(strict) = &job.strict
311 && first_verdict.verdict == Verdict::Pass
312 && first_verdict.findings.is_empty()
313 {
314 validate_strict_arbiter(&job.request, strict)?;
315 let strict_prompt = strict_second_pass_prompt(&job, &first_output.stdout);
316 let strict_request = ReviewRequest::new(
317 job.request.watched_agent,
318 job.request.watched_model.clone(),
319 strict.arbiter_harness,
320 strict.arbiter_model.clone(),
321 false,
322 strict_prompt,
323 );
324 let strict_plan = ReviewPlan::build(strict_request.clone())?;
325 let strict_output = strict_plan.run_with(&strict_request.prompt, runner)?;
326 ensure_process_success(&strict_output)?;
327 let strict_verdict = ParsedVerdict::parse(&strict_output.stdout)?;
328 let strict_entry = entry_from_verdict(&job, &strict_plan, &strict_verdict);
329 store.append_entry(&strict_entry)?;
330 entries.push(strict_entry);
331 }
332
333 Ok(ReviewExecution { entries })
334}
335
336#[derive(Clone, Debug, Eq, PartialEq)]
337pub struct ParsedVerdict {
338 pub verdict: Verdict,
339 pub findings: Vec<String>,
340 pub raw: String,
341}
342
343impl ParsedVerdict {
344 pub fn parse(output: &str) -> Result<Self, ReviewerError> {
345 let verdict = output.lines().find_map(parse_verdict_line).ok_or_else(|| {
346 ReviewerError::VerdictParse {
347 output: output.to_owned(),
348 }
349 })?;
350 let findings = parse_findings(output);
351
352 Ok(Self {
353 verdict,
354 findings,
355 raw: output.to_owned(),
356 })
357 }
358}
359
360#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
361pub struct QueuedReview {
362 pub commit_sha: String,
363 pub enqueued_at_unix: u64,
364}
365
366#[derive(Clone, Debug)]
367pub struct ReviewQueue {
368 root: PathBuf,
369}
370
371impl ReviewQueue {
372 pub fn new(root: impl Into<PathBuf>) -> Self {
373 Self { root: root.into() }
374 }
375
376 pub fn path(&self) -> PathBuf {
377 self.root.join(REVIEW_QUEUE_FILE)
378 }
379
380 pub fn enqueue(&self, commit_sha: impl Into<String>) -> Result<QueuedReview, ReviewerError> {
381 fs::create_dir_all(&self.root).map_err(ReviewerError::QueueIo)?;
382 let item = QueuedReview {
383 commit_sha: commit_sha.into(),
384 enqueued_at_unix: unix_now(),
385 };
386 let mut file = fs::OpenOptions::new()
387 .create(true)
388 .append(true)
389 .open(self.path())
390 .map_err(ReviewerError::QueueIo)?;
391 serde_json::to_writer(&mut file, &item).map_err(ReviewerError::QueueJson)?;
392 writeln!(file).map_err(ReviewerError::QueueIo)?;
393 Ok(item)
394 }
395
396 pub fn pending(&self) -> Result<Vec<QueuedReview>, ReviewerError> {
397 let contents = match fs::read_to_string(self.path()) {
398 Ok(contents) => contents,
399 Err(error) if error.kind() == io::ErrorKind::NotFound => return Ok(Vec::new()),
400 Err(error) => return Err(ReviewerError::QueueIo(error)),
401 };
402
403 contents
404 .lines()
405 .filter(|line| !line.trim().is_empty())
406 .map(|line| serde_json::from_str(line).map_err(ReviewerError::QueueJson))
407 .collect()
408 }
409
410 pub fn remove_sha(&self, sha: &str) -> Result<(), ReviewerError> {
413 let remaining: Vec<QueuedReview> = self
414 .pending()?
415 .into_iter()
416 .filter(|item| item.commit_sha != sha)
417 .collect();
418 self.rewrite(&remaining)
419 }
420
421 fn rewrite(&self, items: &[QueuedReview]) -> Result<(), ReviewerError> {
422 if items.is_empty() {
423 return match fs::remove_file(self.path()) {
424 Ok(()) => Ok(()),
425 Err(error) if error.kind() == io::ErrorKind::NotFound => Ok(()),
426 Err(error) => Err(ReviewerError::QueueIo(error)),
427 };
428 }
429
430 let mut file = fs::File::create(self.path()).map_err(ReviewerError::QueueIo)?;
431 for item in items {
432 serde_json::to_writer(&mut file, item).map_err(ReviewerError::QueueJson)?;
433 writeln!(file).map_err(ReviewerError::QueueIo)?;
434 }
435 Ok(())
436 }
437}
438
439pub trait MaterialLoader {
442 fn load(&self, sha: &str) -> Result<(Claim, String), ReviewerError>;
443}
444
445#[derive(Clone, Copy, Debug, Default)]
446pub struct GitMaterialLoader;
447
448impl MaterialLoader for GitMaterialLoader {
449 fn load(&self, sha: &str) -> Result<(Claim, String), ReviewerError> {
450 let message = git_output(["show", "--format=%B", "--no-patch", sha])?;
451 let diff = git_output(["show", "--format=", "--patch", sha])?;
452 let claim = Claim::parse(&message)?;
453 Ok((claim, diff))
454 }
455}
456
457#[derive(Clone, Debug, Default, Eq, PartialEq)]
458pub struct DrainReport {
459 pub reviewed: Vec<String>,
460 pub ledger_entries: usize,
461}
462
463pub fn drain_once<R: ProcessRunner, L: MaterialLoader>(
467 queue: &ReviewQueue,
468 loader: &L,
469 selection: &ReviewSelection,
470 runner: &R,
471 store: &LedgerStore,
472) -> Result<DrainReport, ReviewerError> {
473 let pending = queue.pending()?;
474 let mut seen = std::collections::BTreeSet::new();
475 let mut order = Vec::new();
476 for item in &pending {
477 if seen.insert(item.commit_sha.clone()) {
478 order.push(item.commit_sha.clone());
479 }
480 }
481
482 let mut report = DrainReport::default();
483 for sha in order {
484 let (claim, diff) = loader.load(&sha)?;
485 let prompt = first_pass_prompt(&claim, &diff);
486 let job = ReviewJob {
487 commit_sha: sha.clone(),
488 claim,
489 diff,
490 request: selection.request_for(prompt),
491 strict: selection.strict.clone(),
492 };
493 let execution = execute_review_job(job, runner, store)?;
494 report.ledger_entries += execution.entries.len();
495 queue.remove_sha(&sha)?;
496 report.reviewed.push(sha);
497 }
498
499 Ok(report)
500}
501
502pub fn run_watch_command(
503 args: cli::WatchArgs,
504 state_dir: &Path,
505 config: &config::TruthMirrorConfig,
506) -> Result<ExitCode> {
507 let selection = ReviewSelection::resolve(
508 args.watched_agent,
509 args.reviewer_harness,
510 args.watched_model,
511 args.reviewer_model,
512 args.allow_same_model,
513 None,
514 config,
515 )?;
516 let queue = ReviewQueue::new(state_dir);
517 let store = LedgerStore::new(state_dir);
518 let loader = GitMaterialLoader;
519 let runner = StdProcessRunner;
520
521 if args.once {
522 let report = drain_once(&queue, &loader, &selection, &runner, &store)?;
523 println!(
524 "truth-mirror watch: reviewed {} commit(s), wrote {} ledger entrie(s)",
525 report.reviewed.len(),
526 report.ledger_entries
527 );
528 return Ok(ExitCode::SUCCESS);
529 }
530
531 let interval = std::time::Duration::from_secs(args.poll_secs.max(1));
532 loop {
533 let report = drain_once(&queue, &loader, &selection, &runner, &store)?;
534 if !report.reviewed.is_empty() {
535 println!(
536 "truth-mirror watch: reviewed {} commit(s)",
537 report.reviewed.len()
538 );
539 }
540 std::thread::sleep(interval);
541 }
542}
543
544#[derive(Clone, Copy, Debug, Eq, PartialEq)]
545pub struct StrictGoalPolicy {
546 pub stop_after_lies: u32,
547 pub stop_after_fuckups: u32,
548}
549
550#[derive(Clone, Copy, Debug, Eq, PartialEq)]
551pub struct StrictGoalCounters {
552 pub lies_exposed: u32,
553 pub fuckups_registered: u32,
554}
555
556#[derive(Clone, Copy, Debug, Eq, PartialEq)]
557pub enum StrictGoalDecision {
558 Continue,
559 Stop { reason: StrictGoalStopReason },
560}
561
562#[derive(Clone, Copy, Debug, Eq, PartialEq)]
563pub enum StrictGoalStopReason {
564 LiesExposed,
565 FuckupsRegistered,
566}
567
568impl StrictGoalPolicy {
569 pub fn decide(&self, counters: StrictGoalCounters) -> StrictGoalDecision {
570 if self.stop_after_lies > 0 && counters.lies_exposed >= self.stop_after_lies {
571 return StrictGoalDecision::Stop {
572 reason: StrictGoalStopReason::LiesExposed,
573 };
574 }
575
576 if self.stop_after_fuckups > 0 && counters.fuckups_registered >= self.stop_after_fuckups {
577 return StrictGoalDecision::Stop {
578 reason: StrictGoalStopReason::FuckupsRegistered,
579 };
580 }
581
582 StrictGoalDecision::Continue
583 }
584}
585
586#[derive(Clone, Debug, Eq, PartialEq)]
587pub struct StrictGoalOutcome {
588 pub passes: u32,
589 pub counters: StrictGoalCounters,
590 pub stop_reason: Option<StrictGoalStopReason>,
593 pub entries: Vec<LedgerEntry>,
594}
595
596impl StrictGoalOutcome {
597 pub fn stop_reason_suffix(&self) -> &'static str {
598 match self.stop_reason {
599 Some(StrictGoalStopReason::LiesExposed) => " (stopped: lies exposed)",
600 Some(StrictGoalStopReason::FuckupsRegistered) => " (stopped: fuckups registered)",
601 None => " (stopped: max passes)",
602 }
603 }
604}
605
606#[allow(clippy::too_many_arguments)]
611pub fn run_strict_goal_loop<R: ProcessRunner>(
612 commit_sha: &str,
613 claim: &Claim,
614 diff: &str,
615 selection: &ReviewSelection,
616 policy: StrictGoalPolicy,
617 max_passes: u32,
618 runner: &R,
619 store: &LedgerStore,
620) -> Result<StrictGoalOutcome, ReviewerError> {
621 let ceiling = max_passes.max(1);
622 let mut outcome = StrictGoalOutcome {
623 passes: 0,
624 counters: StrictGoalCounters {
625 lies_exposed: 0,
626 fuckups_registered: 0,
627 },
628 stop_reason: None,
629 entries: Vec::new(),
630 };
631
632 while outcome.passes < ceiling {
633 let prompt = strict_goal_prompt(claim, diff, outcome.passes + 1, &outcome.entries);
634 let request = selection.request_for(prompt);
635 let plan = ReviewPlan::build(request.clone())?;
636 let output = plan.run_with(&request.prompt, runner)?;
637 ensure_process_success(&output)?;
638 let verdict = ParsedVerdict::parse(&output.stdout)?;
639
640 let job = ReviewJob {
641 commit_sha: commit_sha.to_owned(),
642 claim: claim.clone(),
643 diff: diff.to_owned(),
644 request,
645 strict: None,
646 };
647 let entry = entry_from_verdict(&job, &plan, &verdict);
648 store.append_entry(&entry)?;
649 outcome.entries.push(entry);
650
651 outcome.passes += 1;
652 if verdict.verdict == Verdict::Reject {
653 outcome.counters.lies_exposed += 1;
654 }
655 outcome.counters.fuckups_registered = outcome
656 .counters
657 .fuckups_registered
658 .saturating_add(u32::try_from(verdict.findings.len()).unwrap_or(u32::MAX));
659
660 if let StrictGoalDecision::Stop { reason } = policy.decide(outcome.counters) {
661 outcome.stop_reason = Some(reason);
662 break;
663 }
664 }
665
666 Ok(outcome)
667}
668
669fn strict_goal_prompt(claim: &Claim, diff: &str, pass: u32, prior: &[LedgerEntry]) -> String {
670 let prior_findings: Vec<String> = prior
671 .iter()
672 .flat_map(|entry| entry.findings.clone())
673 .collect();
674 let prior_block = if prior_findings.is_empty() {
675 "(none)".to_owned()
676 } else {
677 prior_findings.join("\n")
678 };
679 format!(
680 "Adversarial strict-goal review, pass {pass}. Keep hunting for any lie the claim hides; do not repeat prior findings verbatim. Reply with 'VERDICT: PASS' or 'VERDICT: REJECT' and a FINDINGS section.\n\nCLAIM:\n{}\n\nPRIOR FINDINGS:\n{prior_block}\n\nDIFF:\n{}",
681 claim.to_line(),
682 diff
683 )
684}
685
686pub fn run_review_command(
687 args: cli::ReviewArgs,
688 state_dir: &Path,
689 config: &config::TruthMirrorConfig,
690) -> Result<ExitCode> {
691 let material = ReviewMaterial::load(&args, state_dir)?;
692
693 let strict = if args.strict_two_pass {
694 Some(StrictReviewConfig {
695 arbiter_harness: args
696 .arbiter_harness
697 .context("--strict-two-pass requires --arbiter-harness")?,
698 arbiter_model: args
699 .arbiter_model
700 .context("--strict-two-pass requires --arbiter-model")?,
701 })
702 } else {
703 None
704 };
705
706 let selection = ReviewSelection::resolve(
707 args.watched_agent,
708 args.reviewer_harness,
709 args.watched_model,
710 args.reviewer_model,
711 args.allow_same_model,
712 strict,
713 config,
714 )?;
715 let store = LedgerStore::new(state_dir);
716
717 if args.strict_goal {
718 let policy = config
719 .strict
720 .goal_policy(args.stop_after_lies, args.stop_after_fuckups);
721 let max_passes = args.max_passes.unwrap_or(config.strict.max_passes);
722 let outcome = run_strict_goal_loop(
723 &material.commit_sha,
724 &material.claim,
725 &material.diff,
726 &selection,
727 policy,
728 max_passes,
729 &StdProcessRunner,
730 &store,
731 )?;
732 println!(
733 "truth-mirror strict-goal: {} pass(es), {} lie(s), {} fuckup(s){}",
734 outcome.passes,
735 outcome.counters.lies_exposed,
736 outcome.counters.fuckups_registered,
737 outcome.stop_reason_suffix(),
738 );
739 return Ok(ExitCode::SUCCESS);
740 }
741
742 let prompt = first_pass_prompt(&material.claim, &material.diff);
743 let job = ReviewJob {
744 commit_sha: material.commit_sha,
745 claim: material.claim,
746 diff: material.diff,
747 request: selection.request_for(prompt),
748 strict: selection.strict.clone(),
749 };
750
751 execute_review_job(job, &StdProcessRunner, &store)?;
752 Ok(ExitCode::SUCCESS)
753}
754
755#[derive(Clone, Debug, Eq, PartialEq)]
756struct ReviewMaterial {
757 commit_sha: String,
758 claim: Claim,
759 diff: String,
760}
761
762impl ReviewMaterial {
763 fn load(args: &cli::ReviewArgs, state_dir: &Path) -> Result<Self, ReviewerError> {
764 if args.staged {
765 let diff = git_output(["diff", "--cached"])?;
766 let claim_path = state_dir.join("claim.txt");
767 let claim_text =
768 fs::read_to_string(&claim_path).map_err(|source| ReviewerError::ClaimFileRead {
769 path: claim_path.clone(),
770 source,
771 })?;
772 let claim = Claim::parse(&claim_text)?;
773 return Ok(Self {
774 commit_sha: "STAGED".to_owned(),
775 claim,
776 diff,
777 });
778 }
779
780 let sha = args
781 .target
782 .clone()
783 .ok_or(ReviewerError::MissingReviewTarget)?;
784 let message = git_output(["show", "--format=%B", "--no-patch", sha.as_str()])?;
785 let diff = git_output(["show", "--format=", "--patch", sha.as_str()])?;
786 let claim = Claim::parse(&message)?;
787 Ok(Self {
788 commit_sha: sha,
789 claim,
790 diff,
791 })
792 }
793}
794
795#[derive(Debug, Error)]
796pub enum ReviewerError {
797 #[error("missing {role} model")]
798 MissingModel { role: String },
799 #[error(
800 "same reviewer model is disallowed without --allow-same-model: watched={watched_model}, reviewer={reviewer_model}"
801 )]
802 SameModelWithoutWaiver {
803 watched_model: String,
804 reviewer_model: String,
805 },
806 #[error("strict arbiter model must differ from watched and first reviewer models")]
807 StrictArbiterModelNotDistinct,
808 #[error("custom reviewer harness requires explicit command configuration")]
809 UnsupportedCustomHarness,
810 #[error("unknown watched agent {value:?}")]
811 UnknownAgent { value: String },
812 #[error("unknown reviewer harness {value:?}")]
813 UnknownHarness { value: String },
814 #[error("missing review target")]
815 MissingReviewTarget,
816 #[error("failed to read staged claim file {path}: {source}")]
817 ClaimFileRead {
818 path: PathBuf,
819 #[source]
820 source: io::Error,
821 },
822 #[error("reviewer output did not contain VERDICT: PASS or VERDICT: REJECT: {output:?}")]
823 VerdictParse { output: String },
824 #[error("reviewer process exited with status {status:?}: {stderr}")]
825 ReviewerProcessFailed { status: Option<i32>, stderr: String },
826 #[error("git command failed: git {args:?}: {stderr}")]
827 GitFailed { args: Vec<String>, stderr: String },
828 #[error("failed to spawn git command: {0}")]
829 GitSpawn(io::Error),
830 #[error("failed to spawn reviewer process: {0}")]
831 Spawn(io::Error),
832 #[error("failed to open reviewer stdin pipe")]
833 MissingStdinPipe,
834 #[error("failed to write reviewer prompt: {0}")]
835 WritePrompt(io::Error),
836 #[error("failed to wait for reviewer process: {0}")]
837 Wait(io::Error),
838 #[error("review queue IO failed: {0}")]
839 QueueIo(io::Error),
840 #[error("review queue JSON failed: {0}")]
841 QueueJson(serde_json::Error),
842 #[error(transparent)]
843 Claim(#[from] crate::claim::ClaimError),
844 #[error(transparent)]
845 Ledger(#[from] crate::ledger::LedgerError),
846}
847
848fn first_pass_prompt(claim: &Claim, diff: &str) -> String {
849 format!(
850 "Review this commit claim against the diff. Reply with 'VERDICT: PASS' or 'VERDICT: REJECT' and a FINDINGS section.\n\n{}\n\nDIFF:\n{}",
851 claim.to_line(),
852 diff
853 )
854}
855
856fn strict_second_pass_prompt(job: &ReviewJob, first_output: &str) -> String {
857 format!(
858 "Strict second-pass review. Try to falsify the first reviewer's clean verdict. Reply with 'VERDICT: PASS' or 'VERDICT: REJECT' and a FINDINGS section.\n\nCLAIM:\n{}\n\nFIRST REVIEW:\n{}\n\nDIFF:\n{}",
859 job.claim.to_line(),
860 first_output,
861 job.diff
862 )
863}
864
865fn entry_from_verdict(job: &ReviewJob, plan: &ReviewPlan, verdict: &ParsedVerdict) -> LedgerEntry {
866 LedgerEntry::new(
867 job.commit_sha.clone(),
868 verdict.verdict,
869 job.claim.to_line(),
870 job.claim
871 .evidence
872 .iter()
873 .map(EvidenceRef::as_str)
874 .map(str::to_owned)
875 .collect(),
876 plan.reviewer_config(),
877 verdict.findings.clone(),
878 )
879}
880
881fn parse_verdict_line(line: &str) -> Option<Verdict> {
882 let normalized = line.trim().to_ascii_uppercase();
883 if normalized == "PASS" || normalized == "VERDICT: PASS" {
884 Some(Verdict::Pass)
885 } else if normalized == "REJECT" || normalized == "VERDICT: REJECT" {
886 Some(Verdict::Reject)
887 } else {
888 None
889 }
890}
891
892fn parse_findings(output: &str) -> Vec<String> {
893 let mut in_findings = false;
894 let mut findings = Vec::new();
895 for line in output.lines() {
896 let trimmed = line.trim();
897 if trimmed.eq_ignore_ascii_case("FINDINGS:") {
898 in_findings = true;
899 continue;
900 }
901
902 if !in_findings || trimmed.is_empty() {
903 continue;
904 }
905
906 if trimmed.to_ascii_uppercase().starts_with("VERDICT:") {
907 continue;
908 }
909
910 findings.push(trimmed.trim_start_matches("- ").to_owned());
911 }
912 findings
913}
914
915fn ensure_process_success(output: &ProcessOutput) -> Result<(), ReviewerError> {
916 if output.status_code == Some(0) {
917 return Ok(());
918 }
919
920 Err(ReviewerError::ReviewerProcessFailed {
921 status: output.status_code,
922 stderr: output.stderr.clone(),
923 })
924}
925
926fn validate_strict_arbiter(
927 request: &ReviewRequest,
928 strict: &StrictReviewConfig,
929) -> Result<(), ReviewerError> {
930 let arbiter = normalized_model(&strict.arbiter_model);
931 if arbiter == normalized_model(&request.watched_model)
932 || arbiter == normalized_model(&request.reviewer_model)
933 {
934 return Err(ReviewerError::StrictArbiterModelNotDistinct);
935 }
936 Ok(())
937}
938
939fn validate_model_present(role: &str, model: &str) -> Result<(), ReviewerError> {
940 if model.trim().is_empty() {
941 return Err(ReviewerError::MissingModel {
942 role: role.to_owned(),
943 });
944 }
945 Ok(())
946}
947
948fn git_output<const N: usize>(args: [&str; N]) -> Result<String, ReviewerError> {
949 let output = Command::new("git")
950 .args(args)
951 .output()
952 .map_err(ReviewerError::GitSpawn)?;
953 if !output.status.success() {
954 return Err(ReviewerError::GitFailed {
955 args: args.iter().map(|arg| (*arg).to_owned()).collect(),
956 stderr: String::from_utf8_lossy(&output.stderr).into_owned(),
957 });
958 }
959
960 Ok(String::from_utf8_lossy(&output.stdout).into_owned())
961}
962
963fn agent_from_slug(value: &str) -> Result<Agent, ReviewerError> {
964 match value.trim().to_ascii_lowercase().as_str() {
965 "claude" => Ok(Agent::Claude),
966 "codex" => Ok(Agent::Codex),
967 "pi" => Ok(Agent::Pi),
968 _ => Err(ReviewerError::UnknownAgent {
969 value: value.to_owned(),
970 }),
971 }
972}
973
974fn harness_from_slug(value: &str) -> Result<ReviewerHarness, ReviewerError> {
975 match value.trim().to_ascii_lowercase().as_str() {
976 "claude" => Ok(ReviewerHarness::Claude),
977 "codex" => Ok(ReviewerHarness::Codex),
978 "pi" => Ok(ReviewerHarness::Pi),
979 "gemini" => Ok(ReviewerHarness::Gemini),
980 "opencode" => Ok(ReviewerHarness::Opencode),
981 "custom" => Ok(ReviewerHarness::Custom),
982 _ => Err(ReviewerError::UnknownHarness {
983 value: value.to_owned(),
984 }),
985 }
986}
987
988fn harness_slug(harness: ReviewerHarness) -> &'static str {
989 match harness {
990 ReviewerHarness::Claude => "claude",
991 ReviewerHarness::Codex => "codex",
992 ReviewerHarness::Pi => "pi",
993 ReviewerHarness::Gemini => "gemini",
994 ReviewerHarness::Opencode => "opencode",
995 ReviewerHarness::Custom => "custom",
996 }
997}
998
999fn normalized_model(model: &str) -> String {
1000 model.trim().to_ascii_lowercase()
1001}
1002
1003fn unix_now() -> u64 {
1004 SystemTime::now()
1005 .duration_since(UNIX_EPOCH)
1006 .map_or(0, |duration| duration.as_secs())
1007}
1008
1009#[cfg(test)]
1010mod tests {
1011 use std::{cell::RefCell, collections::VecDeque};
1012
1013 use proptest::prelude::*;
1014
1015 use super::{
1016 InvocationPlan, MaterialLoader, ParsedVerdict, ProcessOutput, ProcessRunner,
1017 PromptDelivery, ReviewJob, ReviewPlan, ReviewQueue, ReviewRequest, ReviewSelection,
1018 ReviewerError, StrictGoalCounters, StrictGoalDecision, StrictGoalPolicy,
1019 StrictGoalStopReason, StrictReviewConfig, drain_once, execute_review_job,
1020 run_strict_goal_loop,
1021 };
1022 use crate::{
1023 claim::{Claim, EvidenceRef},
1024 cli::{Agent, ReviewerHarness},
1025 ledger::{LedgerStore, Verdict},
1026 };
1027
1028 #[test]
1029 fn same_harness_different_model_is_valid() {
1030 let request = ReviewRequest::new(
1031 Agent::Codex,
1032 "gpt-5.4",
1033 ReviewerHarness::Codex,
1034 "gpt-5.5",
1035 false,
1036 "review this",
1037 );
1038
1039 let plan = ReviewPlan::build(request).unwrap();
1040
1041 assert_eq!(plan.watched_agent, Agent::Codex);
1042 assert_eq!(plan.reviewer_harness, ReviewerHarness::Codex);
1043 assert_eq!(plan.invocation.program, "codex");
1044 }
1045
1046 #[test]
1047 fn same_model_is_blocked_by_default() {
1048 let request = ReviewRequest::new(
1049 Agent::Codex,
1050 " GPT-5.5 ",
1051 ReviewerHarness::Claude,
1052 "gpt-5.5",
1053 false,
1054 "review this",
1055 );
1056
1057 let error = ReviewPlan::build(request).unwrap_err();
1058
1059 assert!(matches!(
1060 error,
1061 ReviewerError::SameModelWithoutWaiver { .. }
1062 ));
1063 }
1064
1065 #[test]
1066 fn allow_same_model_override_is_deliberate() {
1067 let request = ReviewRequest::new(
1068 Agent::Codex,
1069 "gpt-5.5",
1070 ReviewerHarness::Codex,
1071 "gpt-5.5",
1072 true,
1073 "review this",
1074 );
1075
1076 let plan = ReviewPlan::build(request).unwrap();
1077
1078 assert!(plan.allow_same_model);
1079 assert_eq!(plan.reviewer_model, "gpt-5.5");
1080 }
1081
1082 #[test]
1083 fn provider_mapping_uses_verified_prompt_shapes() {
1084 let codex = InvocationPlan::for_harness(ReviewerHarness::Codex, "gpt-5.5").unwrap();
1085 assert_eq!(codex.program, "codex");
1086 assert_eq!(
1087 codex.args_for_prompt("prompt"),
1088 ["exec", "-m", "gpt-5.5", "prompt"]
1089 );
1090
1091 let claude = InvocationPlan::for_harness(ReviewerHarness::Claude, "opus").unwrap();
1092 assert_eq!(claude.program, "claude");
1093 assert_eq!(claude.prompt_delivery, PromptDelivery::Stdin);
1094 assert_eq!(
1095 claude.args_for_prompt("prompt"),
1096 ["--print", "--model", "opus"]
1097 );
1098
1099 let gemini = InvocationPlan::for_harness(ReviewerHarness::Gemini, "gemini-pro").unwrap();
1100 assert_eq!(
1101 gemini.args_for_prompt("prompt"),
1102 ["-m", "gemini-pro", "-p", "prompt"]
1103 );
1104
1105 let pi = InvocationPlan::for_harness(ReviewerHarness::Pi, "openai/gpt-5.5").unwrap();
1106 assert_eq!(pi.prompt_delivery, PromptDelivery::Stdin);
1107 assert_eq!(
1108 pi.args_for_prompt("prompt"),
1109 ["--model", "openai/gpt-5.5", "-p"]
1110 );
1111 }
1112
1113 #[test]
1114 fn custom_harness_requires_explicit_configuration() {
1115 let error = InvocationPlan::for_harness(ReviewerHarness::Custom, "model").unwrap_err();
1116
1117 assert!(matches!(error, ReviewerError::UnsupportedCustomHarness));
1118 }
1119
1120 #[test]
1121 fn subprocess_runner_is_mockable() {
1122 struct MockRunner;
1123
1124 impl ProcessRunner for MockRunner {
1125 fn run(
1126 &self,
1127 invocation: &InvocationPlan,
1128 prompt: &str,
1129 ) -> Result<ProcessOutput, ReviewerError> {
1130 assert_eq!(invocation.program, "codex");
1131 assert_eq!(
1132 invocation.args_for_prompt(prompt).last().unwrap(),
1133 "review this"
1134 );
1135 Ok(ProcessOutput {
1136 status_code: Some(0),
1137 stdout: "VERDICT: PASS\nFINDINGS:\n".to_owned(),
1138 stderr: String::new(),
1139 })
1140 }
1141 }
1142
1143 let request = ReviewRequest::new(
1144 Agent::Codex,
1145 "gpt-5.4",
1146 ReviewerHarness::Codex,
1147 "gpt-5.5",
1148 false,
1149 "review this",
1150 );
1151 let plan = ReviewPlan::build(request).unwrap();
1152 let output = plan.run_with("review this", &MockRunner).unwrap();
1153
1154 assert!(output.stdout.contains("PASS"));
1155 }
1156
1157 #[test]
1158 fn verdict_parser_extracts_rejection_findings() {
1159 let verdict =
1160 ParsedVerdict::parse("VERDICT: REJECT\nFINDINGS:\n- missing proof\n").unwrap();
1161
1162 assert_eq!(verdict.verdict, Verdict::Reject);
1163 assert_eq!(verdict.findings, ["missing proof"]);
1164 }
1165
1166 #[test]
1167 fn review_queue_schedules_commits_without_running_models() {
1168 let temp = tempfile::tempdir().unwrap();
1169 let queue = ReviewQueue::new(temp.path());
1170
1171 queue.enqueue("abc123").unwrap();
1172
1173 let pending = queue.pending().unwrap();
1174 assert_eq!(pending.len(), 1);
1175 assert_eq!(pending[0].commit_sha, "abc123");
1176 }
1177
1178 #[test]
1179 fn execute_review_records_reject_verdict() {
1180 let temp = tempfile::tempdir().unwrap();
1181 let store = LedgerStore::new(temp.path());
1182 let job = review_job(false);
1183 let runner = SequenceRunner::new(["VERDICT: REJECT\nFINDINGS:\n- unsupported\n"]);
1184
1185 let execution = execute_review_job(job, &runner, &store).unwrap();
1186
1187 assert_eq!(execution.entries.len(), 1);
1188 assert_eq!(execution.entries[0].verdict, Verdict::Reject);
1189 assert_eq!(store.unresolved_rejections().unwrap().len(), 1);
1190 }
1191
1192 #[test]
1193 fn strict_two_pass_records_both_clean_passes() {
1194 let temp = tempfile::tempdir().unwrap();
1195 let store = LedgerStore::new(temp.path());
1196 let job = review_job(true);
1197 let runner =
1198 SequenceRunner::new(["VERDICT: PASS\nFINDINGS:\n", "VERDICT: PASS\nFINDINGS:\n"]);
1199
1200 let execution = execute_review_job(job, &runner, &store).unwrap();
1201
1202 assert_eq!(execution.entries.len(), 2);
1203 assert_eq!(store.read_history().unwrap().len(), 2);
1204 assert_eq!(execution.entries[0].reviewer.model, "gpt-5.5");
1205 assert_eq!(execution.entries[1].reviewer.model, "claude-opus-4-1");
1206 }
1207
1208 #[test]
1209 fn strict_arbiter_model_must_be_third_model() {
1210 let temp = tempfile::tempdir().unwrap();
1211 let store = LedgerStore::new(temp.path());
1212 let mut job = review_job(true);
1213 job.strict.as_mut().unwrap().arbiter_model = "gpt-5.5".to_owned();
1214 let runner = SequenceRunner::new(["VERDICT: PASS\nFINDINGS:\n"]);
1215
1216 let error = execute_review_job(job, &runner, &store).unwrap_err();
1217
1218 assert!(matches!(
1219 error,
1220 ReviewerError::StrictArbiterModelNotDistinct
1221 ));
1222 }
1223
1224 #[test]
1225 fn strict_goal_policy_stops_at_configured_lie_or_fuckup_count() {
1226 let policy = StrictGoalPolicy {
1227 stop_after_lies: 2,
1228 stop_after_fuckups: 3,
1229 };
1230
1231 assert_eq!(
1232 policy.decide(StrictGoalCounters {
1233 lies_exposed: 1,
1234 fuckups_registered: 2
1235 }),
1236 StrictGoalDecision::Continue
1237 );
1238 assert_eq!(
1239 policy.decide(StrictGoalCounters {
1240 lies_exposed: 2,
1241 fuckups_registered: 0
1242 }),
1243 StrictGoalDecision::Stop {
1244 reason: StrictGoalStopReason::LiesExposed
1245 }
1246 );
1247 assert_eq!(
1248 policy.decide(StrictGoalCounters {
1249 lies_exposed: 0,
1250 fuckups_registered: 3
1251 }),
1252 StrictGoalDecision::Stop {
1253 reason: StrictGoalStopReason::FuckupsRegistered
1254 }
1255 );
1256 }
1257
1258 #[test]
1259 fn drain_once_reviews_each_commit_once_and_clears_queue() {
1260 let temp = tempfile::tempdir().unwrap();
1261 let store = LedgerStore::new(temp.path());
1262 let queue = ReviewQueue::new(temp.path());
1263 queue.enqueue("abc123").unwrap();
1264 queue.enqueue("abc123").unwrap(); queue.enqueue("def456").unwrap();
1266
1267 let loader = StaticLoader::new();
1268 let runner = SequenceRunner::new([
1269 "VERDICT: REJECT\nFINDINGS:\n- unsupported\n",
1270 "VERDICT: PASS\nFINDINGS:\n",
1271 ]);
1272 let selection = selection();
1273
1274 let report = drain_once(&queue, &loader, &selection, &runner, &store).unwrap();
1275
1276 assert_eq!(report.reviewed, ["abc123", "def456"]);
1277 assert_eq!(report.ledger_entries, 2);
1278 assert!(queue.pending().unwrap().is_empty());
1279 assert_eq!(store.read_history().unwrap().len(), 2);
1280 assert_eq!(store.unresolved_rejections().unwrap().len(), 1);
1281 }
1282
1283 #[test]
1284 fn drain_once_is_a_noop_on_empty_queue() {
1285 let temp = tempfile::tempdir().unwrap();
1286 let store = LedgerStore::new(temp.path());
1287 let queue = ReviewQueue::new(temp.path());
1288 let loader = StaticLoader::new();
1289 let runner = ConstRunner::new("VERDICT: PASS\nFINDINGS:\n");
1290
1291 let report = drain_once(&queue, &loader, &selection(), &runner, &store).unwrap();
1292
1293 assert!(report.reviewed.is_empty());
1294 assert_eq!(report.ledger_entries, 0);
1295 assert_eq!(store.read_history().unwrap().len(), 0);
1296 }
1297
1298 #[test]
1299 fn strict_goal_loop_stops_at_configured_lie_count() {
1300 let temp = tempfile::tempdir().unwrap();
1301 let store = LedgerStore::new(temp.path());
1302 let policy = StrictGoalPolicy {
1303 stop_after_lies: 1,
1304 stop_after_fuckups: 0,
1305 };
1306 let runner = SequenceRunner::new(["VERDICT: REJECT\nFINDINGS:\n- lie\n"]);
1307
1308 let outcome = run_strict_goal_loop(
1309 "abc123",
1310 &claim(),
1311 "diff",
1312 &selection(),
1313 policy,
1314 5,
1315 &runner,
1316 &store,
1317 )
1318 .unwrap();
1319
1320 assert_eq!(outcome.passes, 1);
1321 assert_eq!(outcome.counters.lies_exposed, 1);
1322 assert_eq!(outcome.stop_reason, Some(StrictGoalStopReason::LiesExposed));
1323 assert_eq!(store.read_history().unwrap().len(), 1);
1324 }
1325
1326 #[test]
1327 fn strict_goal_loop_terminates_at_max_passes_for_honest_agent() {
1328 let temp = tempfile::tempdir().unwrap();
1329 let store = LedgerStore::new(temp.path());
1330 let policy = StrictGoalPolicy {
1331 stop_after_lies: 2,
1332 stop_after_fuckups: 5,
1333 };
1334 let runner = ConstRunner::new("VERDICT: PASS\nFINDINGS:\n");
1335
1336 let outcome = run_strict_goal_loop(
1337 "abc123",
1338 &claim(),
1339 "diff",
1340 &selection(),
1341 policy,
1342 3,
1343 &runner,
1344 &store,
1345 )
1346 .unwrap();
1347
1348 assert_eq!(outcome.passes, 3);
1349 assert_eq!(outcome.counters.lies_exposed, 0);
1350 assert_eq!(outcome.stop_reason, None);
1351 assert_eq!(store.read_history().unwrap().len(), 3);
1352 }
1353
1354 #[test]
1355 fn strict_goal_loop_stops_when_fuckups_accumulate() {
1356 let temp = tempfile::tempdir().unwrap();
1357 let store = LedgerStore::new(temp.path());
1358 let policy = StrictGoalPolicy {
1359 stop_after_lies: 0,
1360 stop_after_fuckups: 2,
1361 };
1362 let runner = ConstRunner::new("VERDICT: PASS\nFINDINGS:\n- nit\n");
1364
1365 let outcome = run_strict_goal_loop(
1366 "abc123",
1367 &claim(),
1368 "diff",
1369 &selection(),
1370 policy,
1371 10,
1372 &runner,
1373 &store,
1374 )
1375 .unwrap();
1376
1377 assert_eq!(outcome.passes, 2);
1378 assert_eq!(outcome.counters.lies_exposed, 0);
1379 assert_eq!(outcome.counters.fuckups_registered, 2);
1380 assert_eq!(
1381 outcome.stop_reason,
1382 Some(StrictGoalStopReason::FuckupsRegistered)
1383 );
1384 }
1385
1386 proptest! {
1387 #[test]
1388 fn strict_goal_loop_never_exceeds_max_passes(max in 1u32..6) {
1389 let temp = tempfile::tempdir().unwrap();
1390 let store = LedgerStore::new(temp.path());
1391 let policy = StrictGoalPolicy { stop_after_lies: 0, stop_after_fuckups: 0 };
1393 let runner = ConstRunner::new("VERDICT: PASS\nFINDINGS:\n");
1394
1395 let outcome = run_strict_goal_loop(
1396 "abc123", &claim(), "diff", &selection(), policy, max, &runner, &store,
1397 )
1398 .unwrap();
1399
1400 prop_assert!(outcome.passes <= max);
1401 prop_assert_eq!(outcome.passes, max);
1402 prop_assert!(outcome.stop_reason.is_none());
1403 }
1404 }
1405
1406 proptest! {
1407 #[test]
1408 fn model_opposition_is_enforced_for_arbitrary_models(
1409 watched in "[A-Za-z0-9._/-]{1,32}",
1410 reviewer in "[A-Za-z0-9._/-]{1,32}",
1411 ) {
1412 let request = ReviewRequest::new(
1413 Agent::Codex,
1414 watched.clone(),
1415 ReviewerHarness::Codex,
1416 reviewer.clone(),
1417 false,
1418 "review this",
1419 );
1420 let result = ReviewPlan::build(request);
1421
1422 if watched.trim().eq_ignore_ascii_case(reviewer.trim()) {
1423 let blocked = matches!(result, Err(ReviewerError::SameModelWithoutWaiver { .. }));
1424 prop_assert!(blocked);
1425 } else {
1426 prop_assert!(result.is_ok());
1427 }
1428 }
1429 }
1430
1431 fn claim() -> Claim {
1432 Claim::new(
1433 "add review",
1434 "cargo test",
1435 vec![EvidenceRef::parse("tests:cargo-test").unwrap()],
1436 )
1437 .unwrap()
1438 }
1439
1440 fn selection() -> ReviewSelection {
1441 ReviewSelection {
1442 watched_agent: Agent::Codex,
1443 watched_model: "gpt-5.4".to_owned(),
1444 reviewer_harness: ReviewerHarness::Codex,
1445 reviewer_model: "gpt-5.5".to_owned(),
1446 allow_same_model: false,
1447 strict: None,
1448 }
1449 }
1450
1451 struct StaticLoader {
1452 claim: Claim,
1453 diff: String,
1454 }
1455
1456 impl StaticLoader {
1457 fn new() -> Self {
1458 Self {
1459 claim: claim(),
1460 diff: "diff --git a/src/lib.rs b/src/lib.rs".to_owned(),
1461 }
1462 }
1463 }
1464
1465 impl MaterialLoader for StaticLoader {
1466 fn load(&self, _sha: &str) -> Result<(Claim, String), ReviewerError> {
1467 Ok((self.claim.clone(), self.diff.clone()))
1468 }
1469 }
1470
1471 struct ConstRunner {
1472 output: String,
1473 }
1474
1475 impl ConstRunner {
1476 fn new(output: &str) -> Self {
1477 Self {
1478 output: output.to_owned(),
1479 }
1480 }
1481 }
1482
1483 impl ProcessRunner for ConstRunner {
1484 fn run(
1485 &self,
1486 _invocation: &InvocationPlan,
1487 _prompt: &str,
1488 ) -> Result<ProcessOutput, ReviewerError> {
1489 Ok(ProcessOutput {
1490 status_code: Some(0),
1491 stdout: self.output.clone(),
1492 stderr: String::new(),
1493 })
1494 }
1495 }
1496
1497 fn review_job(strict: bool) -> ReviewJob {
1498 let claim = claim();
1499 ReviewJob {
1500 commit_sha: "abc123".to_owned(),
1501 diff: "diff --git a/src/lib.rs b/src/lib.rs".to_owned(),
1502 request: ReviewRequest::new(
1503 Agent::Codex,
1504 "gpt-5.4",
1505 ReviewerHarness::Codex,
1506 "gpt-5.5",
1507 false,
1508 "review this",
1509 ),
1510 claim,
1511 strict: strict.then_some(StrictReviewConfig {
1512 arbiter_harness: ReviewerHarness::Claude,
1513 arbiter_model: "claude-opus-4-1".to_owned(),
1514 }),
1515 }
1516 }
1517
1518 struct SequenceRunner {
1519 outputs: RefCell<VecDeque<String>>,
1520 }
1521
1522 impl SequenceRunner {
1523 fn new<const N: usize>(outputs: [&str; N]) -> Self {
1524 Self {
1525 outputs: RefCell::new(outputs.into_iter().map(str::to_owned).collect()),
1526 }
1527 }
1528 }
1529
1530 impl ProcessRunner for SequenceRunner {
1531 fn run(
1532 &self,
1533 _invocation: &InvocationPlan,
1534 _prompt: &str,
1535 ) -> Result<ProcessOutput, ReviewerError> {
1536 let stdout = self.outputs.borrow_mut().pop_front().unwrap();
1537 Ok(ProcessOutput {
1538 status_code: Some(0),
1539 stdout,
1540 stderr: String::new(),
1541 })
1542 }
1543 }
1544}