1use std::{
4 fs,
5 io::{self, Write},
6 path::{Path, PathBuf},
7 process::{Command, ExitCode, Stdio},
8 time::{SystemTime, UNIX_EPOCH},
9};
10
11use anyhow::Result;
12use serde::{Deserialize, Serialize};
13use thiserror::Error;
14
15use crate::{
16 claim::{Claim, EvidenceRef},
17 cli::{self, Agent, ReviewScope, ReviewerHarness},
18 config::{self, Effort},
19 ledger::{LedgerEntry, LedgerStore, ReviewerConfig, StructuredFinding, Verdict},
20 surface,
21};
22
23pub const REVIEW_QUEUE_FILE: &str = "review-queue.jsonl";
24pub const REVIEW_RUNS_DIR: &str = "runs";
25const MAX_INLINE_DIFF_FILES: usize = 2;
26const MAX_INLINE_DIFF_BYTES: usize = 256 * 1024;
27const MAX_UNTRACKED_FILE_BYTES: u64 = 16 * 1024;
28
29#[derive(Clone, Debug, Eq, PartialEq)]
30pub struct ReviewRequest {
31 pub watched_agent: Agent,
32 pub watched_model: String,
33 pub reviewer_harness: ReviewerHarness,
34 pub reviewer_model: String,
35 pub reviewer_effort: Effort,
36 pub allow_same_model: bool,
37 pub prompt: String,
38}
39
40impl ReviewRequest {
41 pub fn new(
42 watched_agent: Agent,
43 watched_model: impl Into<String>,
44 reviewer_harness: ReviewerHarness,
45 reviewer_model: impl Into<String>,
46 allow_same_model: bool,
47 prompt: impl Into<String>,
48 ) -> Self {
49 Self {
50 watched_agent,
51 watched_model: watched_model.into(),
52 reviewer_harness,
53 reviewer_model: reviewer_model.into(),
54 reviewer_effort: Effort::highest(),
55 allow_same_model,
56 prompt: prompt.into(),
57 }
58 }
59
60 pub fn with_effort(mut self, effort: Effort) -> Self {
61 self.reviewer_effort = effort;
62 self
63 }
64}
65
66#[derive(Clone, Debug, Eq, PartialEq)]
69pub struct ReviewSelection {
70 pub watched_agent: Agent,
71 pub watched_model: String,
72 pub reviewer_harness: ReviewerHarness,
73 pub reviewer_model: String,
74 pub reviewer_effort: Effort,
75 pub allow_same_model: bool,
76 pub strict: Option<StrictReviewConfig>,
77}
78
79impl ReviewSelection {
80 #[allow(clippy::too_many_arguments)]
83 pub fn resolve(
84 watched_agent: Option<Agent>,
85 watched_model: Option<String>,
86 reviewer_harness: Option<ReviewerHarness>,
87 reviewer_model: Option<String>,
88 reviewer_effort: Option<Effort>,
89 allow_same_model: bool,
90 config: &config::TruthMirrorConfig,
91 ) -> Result<Self, ReviewerError> {
92 let watched_agent = match watched_agent {
93 Some(agent) => agent,
94 None => agent_from_slug(&config.default_writer)?,
95 };
96 let writer_slug = surface::agent_slug(watched_agent);
97 let pair = config.pair_for(writer_slug);
98
99 let harness_from_cli = reviewer_harness.is_some();
100 let reviewer_harness = match reviewer_harness {
101 Some(harness) => harness,
102 None => {
103 let slug = pair
104 .map(|pair| pair.reviewer.harness.as_str())
105 .ok_or_else(|| ReviewerError::NoPairForWriter {
106 writer: writer_slug.to_owned(),
107 })?;
108 harness_from_slug(slug)?
109 }
110 };
111 let reviewer_model = match reviewer_model {
112 Some(model) => model,
113 None => {
114 let pair = pair.ok_or_else(|| ReviewerError::NoPairForWriter {
115 writer: writer_slug.to_owned(),
116 })?;
117 if harness_from_cli
120 && !pair
121 .reviewer
122 .harness
123 .eq_ignore_ascii_case(harness_slug(reviewer_harness))
124 {
125 return Err(ReviewerError::OverrideNeedsModel {
126 role: "reviewer".to_owned(),
127 harness: harness_slug(reviewer_harness).to_owned(),
128 });
129 }
130 pair.reviewer.model.clone()
131 }
132 };
133 let reviewer_effort = reviewer_effort
134 .or_else(|| pair.map(|pair| pair.reviewer.effort))
135 .unwrap_or_else(Effort::highest);
136
137 Ok(Self {
138 watched_agent,
139 watched_model: watched_model.unwrap_or_default(),
140 reviewer_harness,
141 reviewer_model,
142 reviewer_effort,
143 allow_same_model: allow_same_model || config.allow_same_model,
145 strict: None,
146 })
147 }
148
149 pub fn resolve_arbiter(
152 watched_agent: Agent,
153 arbiter_harness: Option<ReviewerHarness>,
154 arbiter_model: Option<String>,
155 arbiter_effort: Option<Effort>,
156 config: &config::TruthMirrorConfig,
157 ) -> Result<StrictReviewConfig, ReviewerError> {
158 let pair_arbiter = config
159 .pair_for(surface::agent_slug(watched_agent))
160 .and_then(|pair| pair.arbiter.clone());
161
162 let harness_from_cli = arbiter_harness.is_some();
163 let harness = match arbiter_harness {
164 Some(harness) => harness,
165 None => {
166 let slug = pair_arbiter
167 .as_ref()
168 .map(|arbiter| arbiter.harness.as_str())
169 .ok_or(ReviewerError::MissingArbiter)?;
170 harness_from_slug(slug)?
171 }
172 };
173 let model = match arbiter_model {
174 Some(model) => model,
175 None => {
176 let arbiter = pair_arbiter.as_ref().ok_or(ReviewerError::MissingArbiter)?;
177 if harness_from_cli && !arbiter.harness.eq_ignore_ascii_case(harness_slug(harness))
178 {
179 return Err(ReviewerError::OverrideNeedsModel {
180 role: "arbiter".to_owned(),
181 harness: harness_slug(harness).to_owned(),
182 });
183 }
184 arbiter.model.clone()
185 }
186 };
187 let effort = arbiter_effort
188 .or_else(|| pair_arbiter.as_ref().map(|arbiter| arbiter.effort))
189 .unwrap_or_else(Effort::highest);
190
191 Ok(StrictReviewConfig {
192 arbiter_harness: harness,
193 arbiter_model: model,
194 arbiter_effort: effort,
195 })
196 }
197
198 fn request_for(&self, prompt: String) -> ReviewRequest {
199 ReviewRequest::new(
200 self.watched_agent,
201 self.watched_model.clone(),
202 self.reviewer_harness,
203 self.reviewer_model.clone(),
204 self.allow_same_model,
205 prompt,
206 )
207 .with_effort(self.reviewer_effort)
208 }
209}
210
211#[derive(Clone, Debug, Eq, PartialEq)]
212pub struct ReviewPlan {
213 pub watched_agent: Agent,
214 pub watched_model: String,
215 pub reviewer_harness: ReviewerHarness,
216 pub reviewer_model: String,
217 pub allow_same_model: bool,
218 pub invocation: InvocationPlan,
219}
220
221impl ReviewPlan {
222 pub fn build(request: ReviewRequest) -> Result<Self, ReviewerError> {
223 validate_model_present("reviewer", &request.reviewer_model)?;
224
225 if !request.watched_model.trim().is_empty()
228 && !request.allow_same_model
229 && normalized_model(&request.watched_model) == normalized_model(&request.reviewer_model)
230 {
231 return Err(ReviewerError::SameModelWithoutWaiver {
232 watched_model: request.watched_model,
233 reviewer_model: request.reviewer_model,
234 });
235 }
236
237 let invocation = InvocationPlan::for_harness(
238 request.reviewer_harness,
239 &request.reviewer_model,
240 request.reviewer_effort,
241 )?;
242
243 Ok(Self {
244 watched_agent: request.watched_agent,
245 watched_model: request.watched_model,
246 reviewer_harness: request.reviewer_harness,
247 reviewer_model: request.reviewer_model,
248 allow_same_model: request.allow_same_model,
249 invocation,
250 })
251 }
252
253 pub fn run_with<R: ProcessRunner>(
254 &self,
255 prompt: &str,
256 runner: &R,
257 ) -> Result<ProcessOutput, ReviewerError> {
258 runner.run(&self.invocation, prompt)
259 }
260
261 fn reviewer_config(&self) -> ReviewerConfig {
262 ReviewerConfig::new(
263 harness_slug(self.reviewer_harness),
264 self.reviewer_model.clone(),
265 self.allow_same_model,
266 )
267 }
268}
269
270#[derive(Clone, Debug, Eq, PartialEq)]
271pub struct InvocationPlan {
272 pub program: String,
273 pub args: Vec<String>,
274 pub prompt_delivery: PromptDelivery,
275}
276
277impl InvocationPlan {
278 pub fn for_harness(
279 harness: ReviewerHarness,
280 model: &str,
281 effort: Effort,
282 ) -> Result<Self, ReviewerError> {
283 validate_model_present("reviewer", model)?;
284 let model = model.trim();
285 let e = effort.as_str();
286
287 let plan = match harness {
291 ReviewerHarness::Claude => Self {
292 program: "claude".to_owned(),
293 args: vec![
294 "--print".to_owned(),
295 "--model".to_owned(),
296 model.to_owned(),
297 "--effort".to_owned(),
298 effort.claude_value().to_owned(),
300 ],
301 prompt_delivery: PromptDelivery::Stdin,
302 },
303 ReviewerHarness::Codex => Self {
304 program: "codex".to_owned(),
305 args: vec![
306 "exec".to_owned(),
307 "-m".to_owned(),
308 model.to_owned(),
309 "-c".to_owned(),
310 format!("model_reasoning_effort={e}"),
311 ],
312 prompt_delivery: PromptDelivery::PositionalArgument,
313 },
314 ReviewerHarness::Pi => Self {
315 program: "pi".to_owned(),
316 args: vec![
317 "--model".to_owned(),
318 model.to_owned(),
319 "--thinking".to_owned(),
320 e.to_owned(),
321 "--tools".to_owned(),
324 "read,grep,find,ls".to_owned(),
325 "-p".to_owned(),
326 ],
327 prompt_delivery: PromptDelivery::Stdin,
328 },
329 ReviewerHarness::Gemini => Self {
330 program: "gemini".to_owned(),
331 args: vec!["-m".to_owned(), model.to_owned()],
332 prompt_delivery: PromptDelivery::FlagValue("-p".to_owned()),
333 },
334 ReviewerHarness::Opencode => Self {
335 program: "opencode".to_owned(),
336 args: vec!["run".to_owned(), "--model".to_owned(), model.to_owned()],
337 prompt_delivery: PromptDelivery::PositionalArgument,
338 },
339 ReviewerHarness::Custom => return Err(ReviewerError::UnsupportedCustomHarness),
340 };
341
342 Ok(plan)
343 }
344
345 pub fn args_for_prompt(&self, prompt: &str) -> Vec<String> {
346 let mut args = self.args.clone();
347 match &self.prompt_delivery {
348 PromptDelivery::Stdin => {}
349 PromptDelivery::PositionalArgument => args.push(prompt.to_owned()),
350 PromptDelivery::FlagValue(flag) => {
351 args.push(flag.clone());
352 args.push(prompt.to_owned());
353 }
354 }
355 args
356 }
357}
358
359#[derive(Clone, Debug, Eq, PartialEq)]
360pub enum PromptDelivery {
361 Stdin,
362 PositionalArgument,
363 FlagValue(String),
364}
365
366#[derive(Clone, Debug, Eq, PartialEq)]
367pub struct ProcessOutput {
368 pub status_code: Option<i32>,
369 pub stdout: String,
370 pub stderr: String,
371}
372
373pub trait ProcessRunner {
374 fn run(
375 &self,
376 invocation: &InvocationPlan,
377 prompt: &str,
378 ) -> Result<ProcessOutput, ReviewerError>;
379}
380
381#[derive(Clone, Copy, Debug, Default)]
382pub struct StdProcessRunner;
383
384impl ProcessRunner for StdProcessRunner {
385 fn run(
386 &self,
387 invocation: &InvocationPlan,
388 prompt: &str,
389 ) -> Result<ProcessOutput, ReviewerError> {
390 let mut command = Command::new(&invocation.program);
391 command.args(invocation.args_for_prompt(prompt));
392 command.stdout(Stdio::piped()).stderr(Stdio::piped());
393
394 if invocation.prompt_delivery == PromptDelivery::Stdin {
395 command.stdin(Stdio::piped());
396 }
397
398 let mut child = command.spawn().map_err(ReviewerError::Spawn)?;
399 if invocation.prompt_delivery == PromptDelivery::Stdin {
400 let mut stdin = child.stdin.take().ok_or(ReviewerError::MissingStdinPipe)?;
401 stdin
402 .write_all(prompt.as_bytes())
403 .map_err(ReviewerError::WritePrompt)?;
404 }
405
406 let output = child.wait_with_output().map_err(ReviewerError::Wait)?;
407 Ok(ProcessOutput {
408 status_code: output.status.code(),
409 stdout: String::from_utf8_lossy(&output.stdout).into_owned(),
410 stderr: String::from_utf8_lossy(&output.stderr).into_owned(),
411 })
412 }
413}
414
415#[derive(Clone, Debug, Eq, PartialEq)]
416pub struct ReviewJob {
417 pub commit_sha: String,
418 pub claim: Claim,
419 pub diff: String,
420 pub context: String,
422 pub request: ReviewRequest,
423 pub strict: Option<StrictReviewConfig>,
424}
425
426#[derive(Clone, Debug, Eq, PartialEq)]
427pub struct StrictReviewConfig {
428 pub arbiter_harness: ReviewerHarness,
429 pub arbiter_model: String,
430 pub arbiter_effort: Effort,
431}
432
433#[derive(Clone, Debug, Eq, PartialEq)]
434pub struct ReviewExecution {
435 pub entries: Vec<LedgerEntry>,
436}
437
438pub fn execute_review_job<R: ProcessRunner>(
439 job: ReviewJob,
440 runner: &R,
441 store: &LedgerStore,
442) -> Result<ReviewExecution, ReviewerError> {
443 let first_plan = ReviewPlan::build(job.request.clone())?;
444 let first_output = first_plan.run_with(&job.request.prompt, runner)?;
445 ensure_process_success(&first_output)?;
446 let first_verdict = ParsedVerdict::parse(&first_output.stdout)?;
447 let first_entry = entry_from_verdict(&job, &first_plan, &first_verdict);
448 store.append_entry(&first_entry)?;
449
450 let mut entries = vec![first_entry];
451 if let Some(strict) = &job.strict
452 && first_verdict.verdict == Verdict::Pass
453 && first_verdict.findings.is_empty()
454 {
455 validate_strict_arbiter(&job.request, strict)?;
456 let strict_prompt = strict_second_pass_prompt(&job, &first_output.stdout);
457 let strict_request = ReviewRequest::new(
458 job.request.watched_agent,
459 job.request.watched_model.clone(),
460 strict.arbiter_harness,
461 strict.arbiter_model.clone(),
462 false,
463 strict_prompt,
464 )
465 .with_effort(strict.arbiter_effort);
466 let strict_plan = ReviewPlan::build(strict_request.clone())?;
467 let strict_output = strict_plan.run_with(&strict_request.prompt, runner)?;
468 ensure_process_success(&strict_output)?;
469 let strict_verdict = ParsedVerdict::parse(&strict_output.stdout)?;
470 let strict_entry = entry_from_verdict(&job, &strict_plan, &strict_verdict);
471 store.append_entry(&strict_entry)?;
472 entries.push(strict_entry);
473 }
474
475 Ok(ReviewExecution { entries })
476}
477
478#[derive(Clone, Debug, Eq, PartialEq)]
479pub struct ParsedVerdict {
480 pub verdict: Verdict,
481 pub summary: String,
482 pub findings: Vec<String>,
483 pub structured_findings: Vec<StructuredFinding>,
484 pub next_steps: Vec<String>,
485 pub raw: String,
486}
487
488impl ParsedVerdict {
489 pub fn parse(output: &str) -> Result<Self, ReviewerError> {
490 let parsed: ReviewerJsonOutput =
491 serde_json::from_str(output.trim()).map_err(|source| ReviewerError::VerdictJson {
492 source,
493 output: output.to_owned(),
494 })?;
495 parsed.validate()?;
496 let findings = parsed
497 .findings
498 .iter()
499 .map(StructuredFinding::display_line)
500 .collect();
501
502 Ok(Self {
503 verdict: parsed.verdict,
504 summary: parsed.summary,
505 findings,
506 structured_findings: parsed.findings,
507 next_steps: parsed.next_steps,
508 raw: output.to_owned(),
509 })
510 }
511}
512
513#[derive(Clone, Debug, Deserialize, Eq, PartialEq)]
514struct ReviewerJsonOutput {
515 verdict: Verdict,
516 summary: String,
517 #[serde(default)]
518 findings: Vec<StructuredFinding>,
519 #[serde(default)]
520 next_steps: Vec<String>,
521}
522
523impl ReviewerJsonOutput {
524 fn validate(&self) -> Result<(), ReviewerError> {
525 if self.summary.trim().is_empty() {
526 return Err(ReviewerError::VerdictSchema {
527 message: "summary must not be empty".to_owned(),
528 });
529 }
530
531 for finding in &self.findings {
532 if finding.title.trim().is_empty() {
533 return Err(ReviewerError::VerdictSchema {
534 message: "finding title must not be empty".to_owned(),
535 });
536 }
537 if finding.body.trim().is_empty() {
538 return Err(ReviewerError::VerdictSchema {
539 message: "finding body must not be empty".to_owned(),
540 });
541 }
542 if finding.file.trim().is_empty() {
543 return Err(ReviewerError::VerdictSchema {
544 message: "finding file must not be empty".to_owned(),
545 });
546 }
547 if finding.line_start == 0 || finding.line_end == 0 {
548 return Err(ReviewerError::VerdictSchema {
549 message: "finding lines must be one-based".to_owned(),
550 });
551 }
552 if finding.line_end < finding.line_start {
553 return Err(ReviewerError::VerdictSchema {
554 message: "finding line_end must be greater than or equal to line_start"
555 .to_owned(),
556 });
557 }
558 if finding.confidence > 100 {
559 return Err(ReviewerError::VerdictSchema {
560 message: "finding confidence must be between 0 and 100".to_owned(),
561 });
562 }
563 if finding.recommendation.trim().is_empty() {
564 return Err(ReviewerError::VerdictSchema {
565 message: "finding recommendation must not be empty".to_owned(),
566 });
567 }
568 }
569
570 if self.verdict == Verdict::Pass && !self.findings.is_empty() {
571 return Err(ReviewerError::VerdictSchema {
572 message: "PASS verdict must not include findings".to_owned(),
573 });
574 }
575 if self.verdict == Verdict::Reject && self.findings.is_empty() {
576 return Err(ReviewerError::VerdictSchema {
577 message: "REJECT verdict must include at least one finding".to_owned(),
578 });
579 }
580
581 Ok(())
582 }
583}
584
585#[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, Serialize)]
586#[serde(rename_all = "kebab-case")]
587pub enum ReviewRunStatus {
588 Queued,
589 Running,
590 Completed,
591 Failed,
592 Cancelled,
593}
594
595impl std::fmt::Display for ReviewRunStatus {
596 fn fmt(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
597 match self {
598 Self::Queued => formatter.write_str("queued"),
599 Self::Running => formatter.write_str("running"),
600 Self::Completed => formatter.write_str("completed"),
601 Self::Failed => formatter.write_str("failed"),
602 Self::Cancelled => formatter.write_str("cancelled"),
603 }
604 }
605}
606
607#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
608pub struct ReviewRun {
609 pub id: String,
610 pub commit_sha: String,
611 pub target: String,
612 pub status: ReviewRunStatus,
613 pub phase: String,
614 pub ledger_entries: usize,
615 pub error: Option<String>,
616 #[serde(default)]
621 pub worker_pid: Option<u32>,
622 pub created_at_unix: u64,
623 pub updated_at_unix: u64,
624 pub started_at_unix: Option<u64>,
625 pub completed_at_unix: Option<u64>,
626}
627
628impl ReviewRun {
629 fn queued(
630 id: impl Into<String>,
631 commit_sha: impl Into<String>,
632 target: impl Into<String>,
633 ) -> Self {
634 let timestamp = unix_now();
635 Self {
636 id: id.into(),
637 commit_sha: commit_sha.into(),
638 target: target.into(),
639 status: ReviewRunStatus::Queued,
640 phase: "queued".to_owned(),
641 ledger_entries: 0,
642 error: None,
643 worker_pid: None,
644 created_at_unix: timestamp,
645 updated_at_unix: timestamp,
646 started_at_unix: None,
647 completed_at_unix: None,
648 }
649 }
650
651 fn mark_running(&mut self, phase: impl Into<String>) {
652 let timestamp = unix_now();
653 self.status = ReviewRunStatus::Running;
654 self.phase = phase.into();
655 self.error = None;
656 self.worker_pid = Some(std::process::id());
657 self.updated_at_unix = timestamp;
658 self.started_at_unix = Some(timestamp);
659 self.completed_at_unix = None;
660 }
661
662 fn mark_completed(&mut self, ledger_entries: usize) {
663 let timestamp = unix_now();
664 self.status = ReviewRunStatus::Completed;
665 self.phase = "completed".to_owned();
666 self.ledger_entries = ledger_entries;
667 self.error = None;
668 self.worker_pid = None;
669 self.updated_at_unix = timestamp;
670 self.completed_at_unix = Some(timestamp);
671 }
672
673 fn mark_failed(&mut self, error: impl Into<String>) {
674 let timestamp = unix_now();
675 self.status = ReviewRunStatus::Failed;
676 self.phase = "failed".to_owned();
677 self.error = Some(error.into());
678 self.worker_pid = None;
679 self.updated_at_unix = timestamp;
680 self.completed_at_unix = Some(timestamp);
681 }
682
683 fn mark_cancelled(&mut self) {
684 let timestamp = unix_now();
685 self.status = ReviewRunStatus::Cancelled;
686 self.phase = "cancelled".to_owned();
687 self.error = None;
688 self.worker_pid = None;
689 self.updated_at_unix = timestamp;
690 self.completed_at_unix = Some(timestamp);
691 }
692
693 fn reconcile_liveness(&mut self, is_alive: impl Fn(u32) -> bool) -> bool {
701 if self.status != ReviewRunStatus::Running {
702 return false;
703 }
704 match self.worker_pid {
705 Some(pid) if !is_alive(pid) => {
706 self.mark_failed(stale_worker_reason(pid));
707 true
708 }
709 _ => false,
710 }
711 }
712}
713
714fn stale_worker_reason(pid: u32) -> String {
716 format!("worker process {pid} exited without recording a verdict (stale run)")
717}
718
719fn pid_is_alive(pid: u32) -> bool {
725 Command::new("kill")
726 .arg("-0")
727 .arg(pid.to_string())
728 .stdout(Stdio::null())
729 .stderr(Stdio::null())
730 .status()
731 .map(|status| status.success())
732 .unwrap_or(false)
733}
734
735fn kill_pid(pid: u32) -> Result<(), ReviewerError> {
738 let status = Command::new("kill")
739 .arg("-KILL")
740 .arg(pid.to_string())
741 .stdout(Stdio::null())
742 .stderr(Stdio::null())
743 .status()
744 .map_err(ReviewerError::KillWorker)?;
745 if status.success() || !pid_is_alive(pid) {
746 Ok(())
747 } else {
748 Err(ReviewerError::KillWorkerFailed { pid })
749 }
750}
751
752#[derive(Clone, Debug)]
753pub struct ReviewRunStore {
754 root: PathBuf,
755}
756
757impl ReviewRunStore {
758 pub fn new(root: impl Into<PathBuf>) -> Self {
759 Self { root: root.into() }
760 }
761
762 pub fn runs_dir(&self) -> PathBuf {
763 self.root.join(REVIEW_RUNS_DIR)
764 }
765
766 pub fn path(&self, id: &str) -> PathBuf {
767 self.runs_dir().join(format!("{id}.json"))
768 }
769
770 pub fn create_queued(
771 &self,
772 commit_sha: &str,
773 target: impl Into<String>,
774 ) -> Result<ReviewRun, ReviewerError> {
775 let run = ReviewRun::queued(generate_run_id(commit_sha), commit_sha, target);
776 self.write(&run)?;
777 Ok(run)
778 }
779
780 fn ensure_queued(
781 &self,
782 run_id: &str,
783 commit_sha: &str,
784 target: &str,
785 ) -> Result<ReviewRun, ReviewerError> {
786 match self.read(run_id) {
787 Ok(run) => Ok(run),
788 Err(ReviewerError::ReviewRunNotFound { .. }) => {
789 let run = ReviewRun::queued(run_id, commit_sha, target);
790 self.write(&run)?;
791 Ok(run)
792 }
793 Err(error) => Err(error),
794 }
795 }
796
797 pub fn read(&self, id: &str) -> Result<ReviewRun, ReviewerError> {
798 let path = self.path(id);
799 let contents = fs::read_to_string(&path).map_err(|source| match source.kind() {
800 io::ErrorKind::NotFound => ReviewerError::ReviewRunNotFound { id: id.to_owned() },
801 _ => ReviewerError::RunIo(source),
802 })?;
803 serde_json::from_str(&contents).map_err(ReviewerError::RunJson)
804 }
805
806 pub fn list(&self) -> Result<Vec<ReviewRun>, ReviewerError> {
807 let dir = self.runs_dir();
808 let entries = match fs::read_dir(&dir) {
809 Ok(entries) => entries,
810 Err(error) if error.kind() == io::ErrorKind::NotFound => return Ok(Vec::new()),
811 Err(error) => return Err(ReviewerError::RunIo(error)),
812 };
813 let mut runs: Vec<ReviewRun> = Vec::new();
814 for entry in entries {
815 let entry = entry.map_err(ReviewerError::RunIo)?;
816 if entry
817 .path()
818 .extension()
819 .is_none_or(|extension| extension != "json")
820 {
821 continue;
822 }
823 let contents = fs::read_to_string(entry.path()).map_err(ReviewerError::RunIo)?;
824 runs.push(serde_json::from_str(&contents).map_err(ReviewerError::RunJson)?);
825 }
826 runs.sort_by(|left, right| {
827 right
828 .updated_at_unix
829 .cmp(&left.updated_at_unix)
830 .then_with(|| right.id.cmp(&left.id))
831 });
832 Ok(runs)
833 }
834
835 pub fn latest_result(&self) -> Result<ReviewRun, ReviewerError> {
836 self.list()?
837 .into_iter()
838 .find(|run| {
839 matches!(
840 run.status,
841 ReviewRunStatus::Completed
842 | ReviewRunStatus::Failed
843 | ReviewRunStatus::Cancelled
844 )
845 })
846 .ok_or(ReviewerError::NoReviewRuns)
847 }
848
849 pub fn mark_running(&self, id: &str, phase: &str) -> Result<ReviewRun, ReviewerError> {
850 let mut run = self.read(id)?;
851 run.mark_running(phase);
852 self.write(&run)?;
853 Ok(run)
854 }
855
856 pub fn mark_completed(
857 &self,
858 id: &str,
859 ledger_entries: usize,
860 ) -> Result<ReviewRun, ReviewerError> {
861 let mut run = self.read(id)?;
862 run.mark_completed(ledger_entries);
863 self.write(&run)?;
864 Ok(run)
865 }
866
867 pub fn mark_failed(
868 &self,
869 id: &str,
870 error: impl Into<String>,
871 ) -> Result<ReviewRun, ReviewerError> {
872 let mut run = self.read(id)?;
873 run.mark_failed(error);
874 self.write(&run)?;
875 Ok(run)
876 }
877
878 pub fn cancel_queued(&self, id: &str) -> Result<ReviewRun, ReviewerError> {
879 let mut run = self.read(id)?;
880 if run.status != ReviewRunStatus::Queued {
881 return Err(ReviewerError::CannotCancelReview {
882 id: id.to_owned(),
883 status: run.status,
884 });
885 }
886 run.mark_cancelled();
887 self.write(&run)?;
888 Ok(run)
889 }
890
891 pub fn read_reconciled(&self, id: &str) -> Result<ReviewRun, ReviewerError> {
895 let mut run = self.read(id)?;
896 if run.reconcile_liveness(pid_is_alive) {
897 self.write(&run)?;
898 }
899 Ok(run)
900 }
901
902 pub fn list_reconciled(&self) -> Result<Vec<ReviewRun>, ReviewerError> {
905 let mut runs = self.list()?;
906 for run in &mut runs {
907 if run.reconcile_liveness(pid_is_alive) {
908 self.write(run)?;
909 }
910 }
911 Ok(runs)
912 }
913
914 pub fn cancel(&self, id: &str, force: bool) -> Result<ReviewRun, ReviewerError> {
924 let mut run = self.read(id)?;
925 match run.status {
926 ReviewRunStatus::Queued => run.mark_cancelled(),
927 ReviewRunStatus::Running => match run.worker_pid {
928 Some(pid) if pid_is_alive(pid) => {
929 if !force {
930 return Err(ReviewerError::ReviewRunStillAlive {
931 id: id.to_owned(),
932 pid,
933 });
934 }
935 kill_pid(pid)?;
936 run.mark_cancelled();
937 }
938 Some(pid) => run.mark_failed(stale_worker_reason(pid)),
939 None => {
940 if !force {
941 return Err(ReviewerError::ReviewRunLivenessUnknown { id: id.to_owned() });
942 }
943 run.mark_failed("worker liveness could not be verified; force-cancelled");
944 }
945 },
946 terminal => {
947 return Err(ReviewerError::CannotCancelReview {
948 id: id.to_owned(),
949 status: terminal,
950 });
951 }
952 }
953 self.write(&run)?;
954 Ok(run)
955 }
956
957 fn write(&self, run: &ReviewRun) -> Result<(), ReviewerError> {
958 fs::create_dir_all(self.runs_dir()).map_err(ReviewerError::RunIo)?;
959 let bytes = serde_json::to_vec_pretty(run).map_err(ReviewerError::RunJson)?;
960 fs::write(self.path(&run.id), bytes).map_err(ReviewerError::RunIo)
961 }
962}
963
964#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
965pub struct QueuedReview {
966 #[serde(default)]
967 pub run_id: String,
968 pub commit_sha: String,
969 pub enqueued_at_unix: u64,
970}
971
972#[derive(Clone, Debug)]
973pub struct ReviewQueue {
974 root: PathBuf,
975}
976
977impl ReviewQueue {
978 pub fn new(root: impl Into<PathBuf>) -> Self {
979 Self { root: root.into() }
980 }
981
982 pub fn path(&self) -> PathBuf {
983 self.root.join(REVIEW_QUEUE_FILE)
984 }
985
986 pub fn enqueue(&self, commit_sha: impl Into<String>) -> Result<QueuedReview, ReviewerError> {
987 fs::create_dir_all(&self.root).map_err(ReviewerError::QueueIo)?;
988 let commit_sha = commit_sha.into();
989 let run = ReviewRunStore::new(&self.root).create_queued(&commit_sha, "commit")?;
990 let item = QueuedReview {
991 run_id: run.id,
992 commit_sha,
993 enqueued_at_unix: unix_now(),
994 };
995 let mut file = fs::OpenOptions::new()
996 .create(true)
997 .append(true)
998 .open(self.path())
999 .map_err(ReviewerError::QueueIo)?;
1000 serde_json::to_writer(&mut file, &item).map_err(ReviewerError::QueueJson)?;
1001 writeln!(file).map_err(ReviewerError::QueueIo)?;
1002 Ok(item)
1003 }
1004
1005 pub fn pending(&self) -> Result<Vec<QueuedReview>, ReviewerError> {
1006 let contents = match fs::read_to_string(self.path()) {
1007 Ok(contents) => contents,
1008 Err(error) if error.kind() == io::ErrorKind::NotFound => return Ok(Vec::new()),
1009 Err(error) => return Err(ReviewerError::QueueIo(error)),
1010 };
1011
1012 contents
1013 .lines()
1014 .filter(|line| !line.trim().is_empty())
1015 .map(|line| serde_json::from_str(line).map_err(ReviewerError::QueueJson))
1016 .collect()
1017 }
1018
1019 pub fn remove_sha(&self, sha: &str) -> Result<(), ReviewerError> {
1022 let remaining: Vec<QueuedReview> = self
1023 .pending()?
1024 .into_iter()
1025 .filter(|item| item.commit_sha != sha)
1026 .collect();
1027 self.rewrite(&remaining)
1028 }
1029
1030 fn rewrite(&self, items: &[QueuedReview]) -> Result<(), ReviewerError> {
1031 if items.is_empty() {
1032 return match fs::remove_file(self.path()) {
1033 Ok(()) => Ok(()),
1034 Err(error) if error.kind() == io::ErrorKind::NotFound => Ok(()),
1035 Err(error) => Err(ReviewerError::QueueIo(error)),
1036 };
1037 }
1038
1039 let mut file = fs::File::create(self.path()).map_err(ReviewerError::QueueIo)?;
1040 for item in items {
1041 serde_json::to_writer(&mut file, item).map_err(ReviewerError::QueueJson)?;
1042 writeln!(file).map_err(ReviewerError::QueueIo)?;
1043 }
1044 Ok(())
1045 }
1046
1047 pub fn remove_run_id(&self, run_id: &str) -> Result<(), ReviewerError> {
1048 let remaining: Vec<QueuedReview> = self
1049 .pending()?
1050 .into_iter()
1051 .filter(|item| item.run_id != run_id)
1052 .collect();
1053 self.rewrite(&remaining)
1054 }
1055}
1056
1057pub trait MaterialLoader {
1060 fn load(&self, sha: &str) -> Result<(Claim, String), ReviewerError>;
1061}
1062
1063#[derive(Clone, Debug, Default)]
1064pub struct GitMaterialLoader {
1065 pub evidence_patterns: Vec<String>,
1068}
1069
1070impl GitMaterialLoader {
1071 pub fn with_patterns(evidence_patterns: Vec<String>) -> Self {
1072 Self { evidence_patterns }
1073 }
1074}
1075
1076impl MaterialLoader for GitMaterialLoader {
1077 fn load(&self, sha: &str) -> Result<(Claim, String), ReviewerError> {
1078 let message = git_output(["show", "--format=%B", "--no-patch", sha])?;
1079 let diff = git_output(["show", "--format=", "--patch", sha])?;
1080 let claim = if self.evidence_patterns.is_empty() {
1081 Claim::parse(&message)?
1082 } else {
1083 Claim::parse_with(&message, &self.evidence_patterns)?
1084 };
1085 Ok((claim, diff))
1086 }
1087}
1088
1089#[derive(Clone, Debug, Default, Eq, PartialEq)]
1090pub struct DrainReport {
1091 pub reviewed: Vec<String>,
1092 pub ledger_entries: usize,
1093}
1094
1095pub fn drain_once<R: ProcessRunner, L: MaterialLoader>(
1099 queue: &ReviewQueue,
1100 loader: &L,
1101 selection: &ReviewSelection,
1102 context: &str,
1103 runner: &R,
1104 store: &LedgerStore,
1105) -> Result<DrainReport, ReviewerError> {
1106 let pending = queue.pending()?;
1107 let run_store = ReviewRunStore::new(&queue.root);
1108 let mut seen = std::collections::BTreeSet::new();
1109 let mut order = Vec::new();
1110 for item in &pending {
1111 if seen.insert(item.commit_sha.clone()) {
1112 order.push(item.clone());
1113 } else if !item.run_id.trim().is_empty()
1114 && let Ok(run) = run_store.read(&item.run_id)
1115 && run.status == ReviewRunStatus::Queued
1116 {
1117 run_store.cancel_queued(&item.run_id)?;
1118 }
1119 }
1120
1121 let mut report = DrainReport::default();
1122 for item in order {
1123 let sha = item.commit_sha;
1124 let run_id = if item.run_id.trim().is_empty() {
1125 generate_run_id(&sha)
1126 } else {
1127 item.run_id
1128 };
1129 let run = run_store.ensure_queued(&run_id, &sha, "commit")?;
1130 if run.status == ReviewRunStatus::Cancelled {
1131 queue.remove_sha(&sha)?;
1132 continue;
1133 }
1134 run_store.mark_running(&run_id, "reviewing")?;
1135 let (claim, diff) = loader.load(&sha)?;
1136 let prompt = first_pass_prompt(&claim, &diff, context);
1137 let job = ReviewJob {
1138 commit_sha: sha.clone(),
1139 claim,
1140 diff,
1141 context: context.to_owned(),
1142 request: selection.request_for(prompt),
1143 strict: selection.strict.clone(),
1144 };
1145 let execution = match execute_review_job(job, runner, store) {
1146 Ok(execution) => execution,
1147 Err(error) => {
1148 let _ = run_store.mark_failed(&run_id, error.to_string());
1149 return Err(error);
1150 }
1151 };
1152 report.ledger_entries += execution.entries.len();
1153 run_store.mark_completed(&run_id, execution.entries.len())?;
1154 queue.remove_sha(&sha)?;
1155 report.reviewed.push(sha);
1156 }
1157
1158 Ok(report)
1159}
1160
1161fn review_context(config: &config::TruthMirrorConfig) -> String {
1164 let repo_root = match git_output(["rev-parse", "--show-toplevel"]) {
1165 Ok(root) => PathBuf::from(root.trim()),
1166 Err(_) => return String::new(),
1167 };
1168 let provider = crate::context::trajectory_provider(&repo_root, &config.history);
1169 crate::context::build_review_context(
1170 &repo_root,
1171 &config.ground_truth,
1172 &config.history,
1173 Some(provider.as_ref()),
1174 )
1175 .unwrap_or_default()
1176}
1177
1178pub fn run_watch_command(
1179 args: cli::WatchArgs,
1180 state_dir: &Path,
1181 config: &config::TruthMirrorConfig,
1182) -> Result<ExitCode> {
1183 let selection = ReviewSelection::resolve(
1184 args.watched_agent,
1185 args.watched_model,
1186 args.reviewer_harness,
1187 args.reviewer_model,
1188 args.reviewer_effort,
1189 args.allow_same_model,
1190 config,
1191 )?;
1192 let queue = ReviewQueue::new(state_dir);
1193 let store = LedgerStore::new(state_dir);
1194 let loader = GitMaterialLoader::with_patterns(config.gates.to_policy().evidence_patterns);
1195 let runner = StdProcessRunner;
1196
1197 if args.once {
1198 let context = review_context(config);
1199 let report = drain_once(&queue, &loader, &selection, &context, &runner, &store)?;
1200 println!(
1201 "truth-mirror watch: reviewed {} commit(s), wrote {} ledger entrie(s)",
1202 report.reviewed.len(),
1203 report.ledger_entries
1204 );
1205 return Ok(ExitCode::SUCCESS);
1206 }
1207
1208 let interval = std::time::Duration::from_secs(args.poll_secs.max(1));
1209 loop {
1210 let context = review_context(config);
1212 let report = drain_once(&queue, &loader, &selection, &context, &runner, &store)?;
1213 if !report.reviewed.is_empty() {
1214 println!(
1215 "truth-mirror watch: reviewed {} commit(s)",
1216 report.reviewed.len()
1217 );
1218 }
1219 std::thread::sleep(interval);
1220 }
1221}
1222
1223#[derive(Clone, Copy, Debug, Eq, PartialEq)]
1224pub struct StrictGoalPolicy {
1225 pub stop_after_lies: u32,
1226 pub stop_after_fuckups: u32,
1227}
1228
1229#[derive(Clone, Copy, Debug, Eq, PartialEq)]
1230pub struct StrictGoalCounters {
1231 pub lies_exposed: u32,
1232 pub fuckups_registered: u32,
1233}
1234
1235#[derive(Clone, Copy, Debug, Eq, PartialEq)]
1236pub enum StrictGoalDecision {
1237 Continue,
1238 Stop { reason: StrictGoalStopReason },
1239}
1240
1241#[derive(Clone, Copy, Debug, Eq, PartialEq)]
1242pub enum StrictGoalStopReason {
1243 LiesExposed,
1244 FuckupsRegistered,
1245}
1246
1247impl StrictGoalPolicy {
1248 pub fn decide(&self, counters: StrictGoalCounters) -> StrictGoalDecision {
1249 if self.stop_after_lies > 0 && counters.lies_exposed >= self.stop_after_lies {
1250 return StrictGoalDecision::Stop {
1251 reason: StrictGoalStopReason::LiesExposed,
1252 };
1253 }
1254
1255 if self.stop_after_fuckups > 0 && counters.fuckups_registered >= self.stop_after_fuckups {
1256 return StrictGoalDecision::Stop {
1257 reason: StrictGoalStopReason::FuckupsRegistered,
1258 };
1259 }
1260
1261 StrictGoalDecision::Continue
1262 }
1263}
1264
1265#[derive(Clone, Debug, Eq, PartialEq)]
1266pub struct StrictGoalOutcome {
1267 pub passes: u32,
1268 pub counters: StrictGoalCounters,
1269 pub stop_reason: Option<StrictGoalStopReason>,
1272 pub entries: Vec<LedgerEntry>,
1273}
1274
1275impl StrictGoalOutcome {
1276 pub fn stop_reason_suffix(&self) -> &'static str {
1277 match self.stop_reason {
1278 Some(StrictGoalStopReason::LiesExposed) => " (stopped: lies exposed)",
1279 Some(StrictGoalStopReason::FuckupsRegistered) => " (stopped: fuckups registered)",
1280 None => " (stopped: max passes)",
1281 }
1282 }
1283}
1284
1285#[allow(clippy::too_many_arguments)]
1290pub fn run_strict_goal_loop<R: ProcessRunner>(
1291 commit_sha: &str,
1292 claim: &Claim,
1293 diff: &str,
1294 context: &str,
1295 selection: &ReviewSelection,
1296 policy: StrictGoalPolicy,
1297 max_passes: u32,
1298 runner: &R,
1299 store: &LedgerStore,
1300) -> Result<StrictGoalOutcome, ReviewerError> {
1301 let ceiling = max_passes.max(1);
1302 let mut outcome = StrictGoalOutcome {
1303 passes: 0,
1304 counters: StrictGoalCounters {
1305 lies_exposed: 0,
1306 fuckups_registered: 0,
1307 },
1308 stop_reason: None,
1309 entries: Vec::new(),
1310 };
1311
1312 while outcome.passes < ceiling {
1313 let prompt = strict_goal_prompt(claim, diff, context, outcome.passes + 1, &outcome.entries);
1314 let request = selection.request_for(prompt);
1315 let plan = ReviewPlan::build(request.clone())?;
1316 let output = plan.run_with(&request.prompt, runner)?;
1317 ensure_process_success(&output)?;
1318 let verdict = ParsedVerdict::parse(&output.stdout)?;
1319
1320 let job = ReviewJob {
1321 commit_sha: commit_sha.to_owned(),
1322 claim: claim.clone(),
1323 diff: diff.to_owned(),
1324 context: context.to_owned(),
1325 request,
1326 strict: None,
1327 };
1328 let entry = entry_from_verdict(&job, &plan, &verdict);
1329 store.append_entry(&entry)?;
1330 outcome.entries.push(entry);
1331
1332 outcome.passes += 1;
1333 if verdict.verdict == Verdict::Reject {
1334 outcome.counters.lies_exposed += 1;
1335 }
1336 outcome.counters.fuckups_registered = outcome
1337 .counters
1338 .fuckups_registered
1339 .saturating_add(u32::try_from(verdict.findings.len()).unwrap_or(u32::MAX));
1340
1341 if let StrictGoalDecision::Stop { reason } = policy.decide(outcome.counters) {
1342 outcome.stop_reason = Some(reason);
1343 break;
1344 }
1345 }
1346
1347 Ok(outcome)
1348}
1349
1350fn strict_goal_prompt(
1351 claim: &Claim,
1352 diff: &str,
1353 context: &str,
1354 pass: u32,
1355 prior: &[LedgerEntry],
1356) -> String {
1357 let prior_findings: Vec<String> = prior
1358 .iter()
1359 .flat_map(|entry| entry.findings.clone())
1360 .collect();
1361 let prior_block = if prior_findings.is_empty() {
1362 "(none)".to_owned()
1363 } else {
1364 prior_findings.join("\n")
1365 };
1366 format!(
1367 "{ADVERSARIAL_PREAMBLE}\n\nStrict-goal loop, pass {pass}. Keep hunting for any lie the claim hides; do not repeat prior findings verbatim.{}\n\nCLAIM:\n{}\n\nPRIOR FINDINGS:\n{prior_block}\n\nDIFF:\n{}",
1368 context_block(context),
1369 claim.to_line(),
1370 diff
1371 )
1372}
1373
1374pub fn run_review_command(
1375 args: cli::ReviewArgs,
1376 state_dir: &Path,
1377 config: &config::TruthMirrorConfig,
1378) -> Result<ExitCode> {
1379 if let Some(command) = args.command {
1380 return run_review_run_command(command, state_dir);
1381 }
1382
1383 let material = ReviewMaterial::load(
1384 &args,
1385 state_dir,
1386 &config.gates.to_policy().evidence_patterns,
1387 )?;
1388
1389 let mut selection = ReviewSelection::resolve(
1390 args.watched_agent,
1391 args.watched_model,
1392 args.reviewer_harness,
1393 args.reviewer_model,
1394 args.reviewer_effort,
1395 args.allow_same_model,
1396 config,
1397 )?;
1398
1399 if args.strict_two_pass {
1400 selection.strict = Some(ReviewSelection::resolve_arbiter(
1401 selection.watched_agent,
1402 args.arbiter_harness,
1403 args.arbiter_model,
1404 args.arbiter_effort,
1405 config,
1406 )?);
1407 }
1408 let store = LedgerStore::new(state_dir);
1409 let run_store = ReviewRunStore::new(state_dir);
1410 let context = review_context(config);
1411 let run = run_store.create_queued(&material.commit_sha, material.target_label.clone())?;
1412 run_store.mark_running(&run.id, "reviewing")?;
1413
1414 if args.strict_goal {
1415 let policy = config
1416 .strict
1417 .goal_policy(args.stop_after_lies, args.stop_after_fuckups);
1418 let max_passes = args.max_passes.unwrap_or(config.strict.max_passes);
1419 let outcome = match run_strict_goal_loop(
1420 &material.commit_sha,
1421 &material.claim,
1422 &material.diff,
1423 &context,
1424 &selection,
1425 policy,
1426 max_passes,
1427 &StdProcessRunner,
1428 &store,
1429 ) {
1430 Ok(outcome) => outcome,
1431 Err(error) => {
1432 let _ = run_store.mark_failed(&run.id, error.to_string());
1433 return Err(error.into());
1434 }
1435 };
1436 run_store.mark_completed(&run.id, outcome.entries.len())?;
1437 println!(
1438 "truth-mirror strict-goal: run {}, {} pass(es), {} lie(s), {} fuckup(s){}",
1439 run.id,
1440 outcome.passes,
1441 outcome.counters.lies_exposed,
1442 outcome.counters.fuckups_registered,
1443 outcome.stop_reason_suffix(),
1444 );
1445 return Ok(ExitCode::SUCCESS);
1446 }
1447
1448 let prompt = first_pass_prompt(&material.claim, &material.diff, &context);
1449 let job = ReviewJob {
1450 commit_sha: material.commit_sha,
1451 claim: material.claim,
1452 diff: material.diff,
1453 context,
1454 request: selection.request_for(prompt),
1455 strict: selection.strict.clone(),
1456 };
1457
1458 let execution = match execute_review_job(job, &StdProcessRunner, &store) {
1459 Ok(execution) => execution,
1460 Err(error) => {
1461 let _ = run_store.mark_failed(&run.id, error.to_string());
1462 return Err(error.into());
1463 }
1464 };
1465 run_store.mark_completed(&run.id, execution.entries.len())?;
1466 println!(
1467 "truth-mirror review: run {}, wrote {} ledger entrie(s)",
1468 run.id,
1469 execution.entries.len()
1470 );
1471 Ok(ExitCode::SUCCESS)
1472}
1473
1474fn run_review_run_command(command: cli::ReviewCommand, state_dir: &Path) -> Result<ExitCode> {
1475 let runs = ReviewRunStore::new(state_dir);
1476 match command {
1477 cli::ReviewCommand::Status { run_id } => {
1478 if let Some(run_id) = run_id {
1479 print_run(&runs.read_reconciled(&run_id)?);
1480 } else {
1481 let all = runs.list_reconciled()?;
1482 if all.is_empty() {
1483 println!("No review runs.");
1484 } else {
1485 for run in all {
1486 print_run_summary(&run);
1487 }
1488 }
1489 }
1490 }
1491 cli::ReviewCommand::Result { run_id } => {
1492 let run = match run_id {
1493 Some(run_id) => runs.read(&run_id)?,
1494 None => runs.latest_result()?,
1495 };
1496 print_run(&run);
1497 print_run_ledger_entries(state_dir, &run)?;
1498 }
1499 cli::ReviewCommand::Cancel { run_id, force } => {
1500 let run = runs.cancel(&run_id, force)?;
1501 ReviewQueue::new(state_dir).remove_run_id(&run_id)?;
1502 match run.status {
1503 ReviewRunStatus::Failed => println!(
1504 "reaped stale review run {} ({}): {}",
1505 run.id,
1506 run.commit_sha,
1507 run.error.as_deref().unwrap_or("worker was not alive"),
1508 ),
1509 _ => println!("cancelled review run {} ({})", run.id, run.commit_sha),
1510 }
1511 }
1512 }
1513 Ok(ExitCode::SUCCESS)
1514}
1515
1516fn print_run_summary(run: &ReviewRun) {
1517 println!(
1518 "{} {} {} {} entries={} updated={}",
1519 run.id, run.status, run.commit_sha, run.phase, run.ledger_entries, run.updated_at_unix
1520 );
1521}
1522
1523fn print_run(run: &ReviewRun) {
1524 println!("run: {}", run.id);
1525 println!("status: {}", run.status);
1526 println!("commit: {}", run.commit_sha);
1527 println!("target: {}", run.target);
1528 println!("phase: {}", run.phase);
1529 println!("ledger_entries: {}", run.ledger_entries);
1530 if let Some(pid) = run.worker_pid {
1531 println!("worker_pid: {pid}");
1532 }
1533 println!("created_at_unix: {}", run.created_at_unix);
1534 println!("updated_at_unix: {}", run.updated_at_unix);
1535 if let Some(started) = run.started_at_unix {
1536 println!("started_at_unix: {started}");
1537 }
1538 if let Some(completed) = run.completed_at_unix {
1539 println!("completed_at_unix: {completed}");
1540 }
1541 if let Some(error) = &run.error {
1542 println!("error: {error}");
1543 }
1544}
1545
1546fn print_run_ledger_entries(state_dir: &Path, run: &ReviewRun) -> Result<(), ReviewerError> {
1547 let store = LedgerStore::new(state_dir);
1548 let entries: Vec<LedgerEntry> = store
1549 .read_history()?
1550 .into_iter()
1551 .filter(|entry| entry.commit_sha == run.commit_sha)
1552 .collect();
1553 if entries.is_empty() {
1554 println!("ledger_entries: none");
1555 return Ok(());
1556 }
1557 println!("ledger_entries:");
1558 for entry in entries {
1559 println!(
1560 "- {} {} {} findings={}",
1561 entry.commit_sha,
1562 entry.verdict,
1563 entry.disposition,
1564 entry.findings.len()
1565 );
1566 }
1567 Ok(())
1568}
1569
1570#[derive(Clone, Debug, Eq, PartialEq)]
1571struct ReviewMaterial {
1572 commit_sha: String,
1573 target_label: String,
1574 claim: Claim,
1575 diff: String,
1576}
1577
1578impl ReviewMaterial {
1579 fn load(
1580 args: &cli::ReviewArgs,
1581 state_dir: &Path,
1582 evidence_patterns: &[String],
1583 ) -> Result<Self, ReviewerError> {
1584 let parse = |text: &str| {
1585 if evidence_patterns.is_empty() {
1586 Claim::parse(text)
1587 } else {
1588 Claim::parse_with(text, evidence_patterns)
1589 }
1590 };
1591
1592 let scope = if args.staged {
1593 ReviewScope::Staged
1594 } else {
1595 args.scope
1596 };
1597
1598 match scope {
1599 ReviewScope::Commit => {
1600 let sha = args
1601 .target
1602 .clone()
1603 .ok_or(ReviewerError::MissingReviewTarget)?;
1604 let message = git_output(["show", "--format=%B", "--no-patch", sha.as_str()])?;
1605 let diff = git_output(["show", "--format=", "--patch", sha.as_str()])?;
1606 let claim = parse(&message)?;
1607 Ok(Self {
1608 commit_sha: sha.clone(),
1609 target_label: format!("commit:{sha}"),
1610 claim,
1611 diff,
1612 })
1613 }
1614 ReviewScope::Staged => Self::load_staged(state_dir, &parse),
1615 ReviewScope::Auto => {
1616 reject_target_with_scope(args)?;
1617 if working_tree_dirty()? {
1618 Self::load_working_tree(state_dir, &parse)
1619 } else {
1620 Self::load_branch(args.base.as_deref(), &parse)
1621 }
1622 }
1623 ReviewScope::WorkingTree => {
1624 reject_target_with_scope(args)?;
1625 Self::load_working_tree(state_dir, &parse)
1626 }
1627 ReviewScope::Branch => {
1628 reject_target_with_scope(args)?;
1629 Self::load_branch(args.base.as_deref(), &parse)
1630 }
1631 }
1632 }
1633
1634 fn load_staged<F>(state_dir: &Path, parse: &F) -> Result<Self, ReviewerError>
1635 where
1636 F: Fn(&str) -> Result<Claim, crate::claim::ClaimError>,
1637 {
1638 let raw = git_output(["diff", "--cached"])?;
1639 let files = git_output(["diff", "--cached", "--name-only"])?;
1640 let diff = materialize_diff("staged", &raw, &files);
1641 let claim = parse(&read_claim_file(state_dir)?)?;
1642 Ok(Self {
1643 commit_sha: "STAGED".to_owned(),
1644 target_label: "staged".to_owned(),
1645 claim,
1646 diff,
1647 })
1648 }
1649
1650 fn load_working_tree<F>(state_dir: &Path, parse: &F) -> Result<Self, ReviewerError>
1651 where
1652 F: Fn(&str) -> Result<Claim, crate::claim::ClaimError>,
1653 {
1654 let status = git_output(["status", "--porcelain"])?;
1655 let tracked = git_output(["diff", "HEAD", "--patch"])?;
1656 let files = git_output(["diff", "HEAD", "--name-only"])?;
1657 let untracked = untracked_file_context()?;
1658 let raw = format!(
1659 "WORKING TREE STATUS:\n{status}\n\nTRACKED DIFF AGAINST HEAD:\n{tracked}\n\nUNTRACKED FILES:\n{untracked}"
1660 );
1661 let diff = materialize_diff("working-tree", &raw, &files);
1662 let claim = parse(&read_claim_file(state_dir)?)?;
1663 Ok(Self {
1664 commit_sha: "WORKING_TREE".to_owned(),
1665 target_label: "working-tree".to_owned(),
1666 claim,
1667 diff,
1668 })
1669 }
1670
1671 fn load_branch<F>(base: Option<&str>, parse: &F) -> Result<Self, ReviewerError>
1672 where
1673 F: Fn(&str) -> Result<Claim, crate::claim::ClaimError>,
1674 {
1675 let base = match base {
1676 Some(base) => base.to_owned(),
1677 None => default_branch_ref()?,
1678 };
1679 let merge_base = git_output_slice(&["merge-base", "HEAD", &base])?;
1680 let merge_base = merge_base.trim().to_owned();
1681 let range = format!("{merge_base}..HEAD");
1682 let message = git_output(["show", "--format=%B", "--no-patch", "HEAD"])?;
1683 let log = git_output_slice(&["log", "--oneline", &range])?;
1684 let stat = git_output_slice(&["diff", "--stat", &range])?;
1685 let raw_patch = git_output_slice(&["diff", "--patch", &range])?;
1686 let files = git_output_slice(&["diff", "--name-only", &range])?;
1687 let raw = format!(
1688 "BRANCH BASE: {base}\nMERGE BASE: {merge_base}\nCOMMITS:\n{log}\n\nDIFF STAT:\n{stat}\n\nDIFF:\n{raw_patch}"
1689 );
1690 let diff = materialize_diff(&format!("branch:{base}"), &raw, &files);
1691 let claim = parse(&message)?;
1692 Ok(Self {
1693 commit_sha: "HEAD".to_owned(),
1694 target_label: format!("branch:{base}"),
1695 claim,
1696 diff,
1697 })
1698 }
1699}
1700
1701fn reject_target_with_scope(args: &cli::ReviewArgs) -> Result<(), ReviewerError> {
1702 if let Some(target) = &args.target {
1703 return Err(ReviewerError::UnexpectedReviewTarget {
1704 scope: args.scope,
1705 target: target.clone(),
1706 });
1707 }
1708 Ok(())
1709}
1710
1711fn read_claim_file(state_dir: &Path) -> Result<String, ReviewerError> {
1712 let claim_path = state_dir.join("claim.txt");
1713 fs::read_to_string(&claim_path).map_err(|source| ReviewerError::ClaimFileRead {
1714 path: claim_path,
1715 source,
1716 })
1717}
1718
1719fn working_tree_dirty() -> Result<bool, ReviewerError> {
1720 Ok(!git_output(["status", "--porcelain"])?.trim().is_empty())
1721}
1722
1723fn default_branch_ref() -> Result<String, ReviewerError> {
1724 if let Ok(symbolic) = git_output([
1725 "symbolic-ref",
1726 "--quiet",
1727 "--short",
1728 "refs/remotes/origin/HEAD",
1729 ]) {
1730 let trimmed = symbolic.trim();
1731 if !trimmed.is_empty() {
1732 return Ok(trimmed.to_owned());
1733 }
1734 }
1735
1736 for candidate in [
1737 "origin/main",
1738 "origin/master",
1739 "origin/trunk",
1740 "main",
1741 "master",
1742 "trunk",
1743 ] {
1744 if git_output_slice(&["rev-parse", "--verify", "--quiet", candidate]).is_ok() {
1745 return Ok(candidate.to_owned());
1746 }
1747 }
1748
1749 Err(ReviewerError::DefaultBranchNotFound)
1750}
1751
1752fn materialize_diff(label: &str, raw: &str, files: &str) -> String {
1753 let file_list: Vec<&str> = files
1754 .lines()
1755 .filter(|line| !line.trim().is_empty())
1756 .collect();
1757 let bytes = raw.len();
1758 if bytes <= MAX_INLINE_DIFF_BYTES && file_list.len() <= MAX_INLINE_DIFF_FILES {
1759 return raw.to_owned();
1760 }
1761
1762 format!(
1763 "Diff for {label} is too large to inline safely.\ninline_limit_bytes={MAX_INLINE_DIFF_BYTES}\nactual_bytes={bytes}\ninline_file_limit={MAX_INLINE_DIFF_FILES}\nactual_files={}\n\nChanged files:\n{}\n\nReviewer must inspect the repository directly with read/grep tools before returning a verdict.",
1764 file_list.len(),
1765 if file_list.is_empty() {
1766 "(none)".to_owned()
1767 } else {
1768 file_list.join("\n")
1769 }
1770 )
1771}
1772
1773fn untracked_file_context() -> Result<String, ReviewerError> {
1774 let files = git_output(["ls-files", "--others", "--exclude-standard"])?;
1775 let mut output = String::new();
1776 for file in files.lines().filter(|line| !line.trim().is_empty()) {
1777 let path = Path::new(file);
1778 let metadata = match fs::metadata(path) {
1779 Ok(metadata) => metadata,
1780 Err(_) => continue,
1781 };
1782 if !metadata.is_file() {
1783 continue;
1784 }
1785 if metadata.len() > MAX_UNTRACKED_FILE_BYTES {
1786 output.push_str(&format!(
1787 "\n--- {file} omitted: {} bytes exceeds {MAX_UNTRACKED_FILE_BYTES} byte inline limit ---\n",
1788 metadata.len()
1789 ));
1790 continue;
1791 }
1792 let bytes = match fs::read(path) {
1793 Ok(bytes) => bytes,
1794 Err(_) => continue,
1795 };
1796 if bytes.contains(&0) {
1797 output.push_str(&format!("\n--- {file} omitted: binary file ---\n"));
1798 continue;
1799 }
1800 output.push_str(&format!(
1801 "\n--- {file} ---\n{}",
1802 String::from_utf8_lossy(&bytes)
1803 ));
1804 }
1805
1806 if output.is_empty() {
1807 Ok("(none)".to_owned())
1808 } else {
1809 Ok(output)
1810 }
1811}
1812
1813#[derive(Debug, Error)]
1814pub enum ReviewerError {
1815 #[error("missing {role} model")]
1816 MissingModel { role: String },
1817 #[error(
1818 "same reviewer model is disallowed without --allow-same-model: watched={watched_model}, reviewer={reviewer_model}"
1819 )]
1820 SameModelWithoutWaiver {
1821 watched_model: String,
1822 reviewer_model: String,
1823 },
1824 #[error("strict arbiter model must differ from watched and first reviewer models")]
1825 StrictArbiterModelNotDistinct,
1826 #[error("no adversarial pair configured for writer harness {writer:?}")]
1827 NoPairForWriter { writer: String },
1828 #[error(
1829 "strict review requires an arbiter (pair.arbiter or --arbiter-harness/--arbiter-model)"
1830 )]
1831 MissingArbiter,
1832 #[error(
1833 "--{role}-harness={harness:?} was overridden without a matching --{role}-model; the pair's model is for a different harness"
1834 )]
1835 OverrideNeedsModel { role: String, harness: String },
1836 #[error("custom reviewer harness requires explicit command configuration")]
1837 UnsupportedCustomHarness,
1838 #[error("unknown watched agent {value:?}")]
1839 UnknownAgent { value: String },
1840 #[error("unknown reviewer harness {value:?}")]
1841 UnknownHarness { value: String },
1842 #[error("missing review target")]
1843 MissingReviewTarget,
1844 #[error("--scope={scope:?} does not accept positional target {target:?}")]
1845 UnexpectedReviewTarget { scope: ReviewScope, target: String },
1846 #[error("could not determine default branch; pass --base explicitly")]
1847 DefaultBranchNotFound,
1848 #[error("failed to read staged claim file {path}: {source}")]
1849 ClaimFileRead {
1850 path: PathBuf,
1851 #[source]
1852 source: io::Error,
1853 },
1854 #[error("reviewer output was not valid structured JSON verdict: {source}: {output:?}")]
1855 VerdictJson {
1856 source: serde_json::Error,
1857 output: String,
1858 },
1859 #[error("reviewer structured verdict violated schema: {message}")]
1860 VerdictSchema { message: String },
1861 #[error("reviewer process exited with status {status:?}: {stderr}")]
1862 ReviewerProcessFailed { status: Option<i32>, stderr: String },
1863 #[error("git command failed: git {args:?}: {stderr}")]
1864 GitFailed { args: Vec<String>, stderr: String },
1865 #[error("failed to spawn git command: {0}")]
1866 GitSpawn(io::Error),
1867 #[error("failed to spawn reviewer process: {0}")]
1868 Spawn(io::Error),
1869 #[error("failed to open reviewer stdin pipe")]
1870 MissingStdinPipe,
1871 #[error("failed to write reviewer prompt: {0}")]
1872 WritePrompt(io::Error),
1873 #[error("failed to wait for reviewer process: {0}")]
1874 Wait(io::Error),
1875 #[error("review queue IO failed: {0}")]
1876 QueueIo(io::Error),
1877 #[error("review queue JSON failed: {0}")]
1878 QueueJson(serde_json::Error),
1879 #[error("review run IO failed: {0}")]
1880 RunIo(io::Error),
1881 #[error("review run JSON failed: {0}")]
1882 RunJson(serde_json::Error),
1883 #[error("review run not found: {id}")]
1884 ReviewRunNotFound { id: String },
1885 #[error("no review runs found")]
1886 NoReviewRuns,
1887 #[error("cannot cancel review run {id} with status {status}; it has already finished")]
1888 CannotCancelReview { id: String, status: ReviewRunStatus },
1889 #[error(
1890 "review run {id} is still running (worker pid {pid} is alive); pass --force to kill it"
1891 )]
1892 ReviewRunStillAlive { id: String, pid: u32 },
1893 #[error(
1894 "review run {id} is running but records no worker pid; pass --force to reap it if it is stuck"
1895 )]
1896 ReviewRunLivenessUnknown { id: String },
1897 #[error("failed to spawn kill for stale worker: {0}")]
1898 KillWorker(io::Error),
1899 #[error("failed to kill worker process {pid}")]
1900 KillWorkerFailed { pid: u32 },
1901 #[error(transparent)]
1902 Claim(#[from] crate::claim::ClaimError),
1903 #[error(transparent)]
1904 Ledger(#[from] crate::ledger::LedgerError),
1905}
1906
1907const ADVERSARIAL_PREAMBLE: &str = r#"You are an ADVERSARIAL reviewer. Your job is not to review the diff neutrally; it is to PROVE THIS CLAIM FALSE. Assume the author over-rates their own work. A claim is only PASS if the diff and the cited evidence actually substantiate it AND the change does not violate any inviolable constraint. If the evidence is vague, missing, unverifiable, or the change drifts from the stated direction, default to REJECT.
1908
1909Attack the change for auth and permission holes, data loss, rollback gaps, races, stale state, version skew, observability gaps, missing evidence, fake evidence, broad matchers, gates that fail open, and code that only fixes the instance instead of the defect class.
1910
1911GREP THE CLASS, NOT THE INSTANCE. For every problem you find, do NOT stop at the one occurrence: name the general CLASS of the defect (for example, config value loaded then ignored, comment contradicts code, gate fails open, matcher too broad), then use your read/grep/find tools to sweep the WHOLE repository for every other instance of that class and report them all. One instance is a symptom; the class is the bug. Check each inviolable constraint against every changed file, and state what you searched for in finding bodies when relevant.
1912
1913Return valid JSON only. Do not wrap it in Markdown. The schema is:
1914{
1915 "verdict": "PASS" | "REJECT",
1916 "summary": "one concise sentence explaining why the claim passes or fails",
1917 "findings": [
1918 {
1919 "severity": "critical" | "high" | "medium" | "low",
1920 "title": "short defect title",
1921 "body": "what can go wrong, why this code is vulnerable, and what evidence proves it",
1922 "file": "repo-relative file path",
1923 "line_start": 1,
1924 "line_end": 1,
1925 "confidence": 0,
1926 "recommendation": "concrete change required"
1927 }
1928 ],
1929 "next_steps": ["short concrete follow-up commands or edits"]
1930}
1931
1932Use "PASS" only when there are no findings. Use "REJECT" when there is at least one material finding."#;
1933
1934fn context_block(context: &str) -> String {
1935 if context.trim().is_empty() {
1936 String::new()
1937 } else {
1938 format!("\n\n{context}")
1939 }
1940}
1941
1942fn first_pass_prompt(claim: &Claim, diff: &str, context: &str) -> String {
1943 format!(
1944 "{ADVERSARIAL_PREAMBLE}{}\n\nCLAIM:\n{}\n\nDIFF:\n{}",
1945 context_block(context),
1946 claim.to_line(),
1947 diff
1948 )
1949}
1950
1951fn strict_second_pass_prompt(job: &ReviewJob, first_output: &str) -> String {
1952 format!(
1953 "{ADVERSARIAL_PREAMBLE}\n\nStrict second pass (COMPLETENESS CRITIC): the first reviewer returned a CLEAN verdict. Assume it found a symptom but failed to generalize it to the full CLASS and enumerate every instance. Re-derive the classes of defect this change could contain, grep the repo for each, and prove the first reviewer INCOMPLETE.{}\n\nCLAIM:\n{}\n\nFIRST REVIEW:\n{}\n\nDIFF:\n{}",
1954 context_block(&job.context),
1955 job.claim.to_line(),
1956 first_output,
1957 job.diff
1958 )
1959}
1960
1961fn entry_from_verdict(job: &ReviewJob, plan: &ReviewPlan, verdict: &ParsedVerdict) -> LedgerEntry {
1962 LedgerEntry::new(
1963 job.commit_sha.clone(),
1964 verdict.verdict,
1965 job.claim.to_line(),
1966 job.claim
1967 .evidence
1968 .iter()
1969 .map(EvidenceRef::as_str)
1970 .map(str::to_owned)
1971 .collect(),
1972 plan.reviewer_config(),
1973 verdict.findings.clone(),
1974 )
1975 .with_structured_review(
1976 verdict.summary.clone(),
1977 verdict.structured_findings.clone(),
1978 verdict.next_steps.clone(),
1979 verdict.raw.clone(),
1980 )
1981}
1982
1983fn ensure_process_success(output: &ProcessOutput) -> Result<(), ReviewerError> {
1984 if output.status_code == Some(0) {
1985 return Ok(());
1986 }
1987
1988 Err(ReviewerError::ReviewerProcessFailed {
1989 status: output.status_code,
1990 stderr: output.stderr.clone(),
1991 })
1992}
1993
1994fn validate_strict_arbiter(
1995 request: &ReviewRequest,
1996 strict: &StrictReviewConfig,
1997) -> Result<(), ReviewerError> {
1998 let arbiter = normalized_model(&strict.arbiter_model);
1999 if arbiter == normalized_model(&request.watched_model)
2000 || arbiter == normalized_model(&request.reviewer_model)
2001 {
2002 return Err(ReviewerError::StrictArbiterModelNotDistinct);
2003 }
2004 Ok(())
2005}
2006
2007fn validate_model_present(role: &str, model: &str) -> Result<(), ReviewerError> {
2008 if model.trim().is_empty() {
2009 return Err(ReviewerError::MissingModel {
2010 role: role.to_owned(),
2011 });
2012 }
2013 Ok(())
2014}
2015
2016fn git_output<const N: usize>(args: [&str; N]) -> Result<String, ReviewerError> {
2017 git_output_slice(&args)
2018}
2019
2020fn git_output_slice(args: &[&str]) -> Result<String, ReviewerError> {
2021 let output = Command::new("git")
2022 .args(args)
2023 .output()
2024 .map_err(ReviewerError::GitSpawn)?;
2025 if !output.status.success() {
2026 return Err(ReviewerError::GitFailed {
2027 args: args.iter().map(|arg| (*arg).to_owned()).collect(),
2028 stderr: String::from_utf8_lossy(&output.stderr).into_owned(),
2029 });
2030 }
2031
2032 Ok(String::from_utf8_lossy(&output.stdout).into_owned())
2033}
2034
2035fn agent_from_slug(value: &str) -> Result<Agent, ReviewerError> {
2036 match value.trim().to_ascii_lowercase().as_str() {
2037 "claude" => Ok(Agent::Claude),
2038 "codex" => Ok(Agent::Codex),
2039 "pi" => Ok(Agent::Pi),
2040 _ => Err(ReviewerError::UnknownAgent {
2041 value: value.to_owned(),
2042 }),
2043 }
2044}
2045
2046fn harness_from_slug(value: &str) -> Result<ReviewerHarness, ReviewerError> {
2047 match value.trim().to_ascii_lowercase().as_str() {
2048 "claude" => Ok(ReviewerHarness::Claude),
2049 "codex" => Ok(ReviewerHarness::Codex),
2050 "pi" => Ok(ReviewerHarness::Pi),
2051 "gemini" => Ok(ReviewerHarness::Gemini),
2052 "opencode" => Ok(ReviewerHarness::Opencode),
2053 "custom" => Ok(ReviewerHarness::Custom),
2054 _ => Err(ReviewerError::UnknownHarness {
2055 value: value.to_owned(),
2056 }),
2057 }
2058}
2059
2060fn harness_slug(harness: ReviewerHarness) -> &'static str {
2061 match harness {
2062 ReviewerHarness::Claude => "claude",
2063 ReviewerHarness::Codex => "codex",
2064 ReviewerHarness::Pi => "pi",
2065 ReviewerHarness::Gemini => "gemini",
2066 ReviewerHarness::Opencode => "opencode",
2067 ReviewerHarness::Custom => "custom",
2068 }
2069}
2070
2071fn normalized_model(model: &str) -> String {
2072 model.trim().to_ascii_lowercase()
2073}
2074
2075fn unix_now() -> u64 {
2076 SystemTime::now()
2077 .duration_since(UNIX_EPOCH)
2078 .map_or(0, |duration| duration.as_secs())
2079}
2080
2081fn generate_run_id(commit_sha: &str) -> String {
2082 let nanos = SystemTime::now()
2083 .duration_since(UNIX_EPOCH)
2084 .map_or(0, |duration| duration.as_nanos());
2085 let short_sha: String = commit_sha
2086 .chars()
2087 .filter(|character| character.is_ascii_alphanumeric())
2088 .take(12)
2089 .collect();
2090 if short_sha.is_empty() {
2091 format!("{nanos}-{}", std::process::id())
2092 } else {
2093 format!("{nanos}-{}-{short_sha}", std::process::id())
2094 }
2095}
2096
2097#[cfg(test)]
2098mod tests {
2099 use std::{cell::RefCell, collections::VecDeque, process::Command};
2100
2101 use proptest::prelude::*;
2102
2103 use super::{
2104 InvocationPlan, MaterialLoader, ParsedVerdict, ProcessOutput, ProcessRunner,
2105 PromptDelivery, ReviewJob, ReviewPlan, ReviewQueue, ReviewRequest, ReviewRun,
2106 ReviewRunStatus, ReviewRunStore, ReviewSelection, ReviewerError, StrictGoalCounters,
2107 StrictGoalDecision, StrictGoalPolicy, StrictGoalStopReason, StrictReviewConfig, drain_once,
2108 execute_review_job, pid_is_alive, run_review_run_command, run_strict_goal_loop,
2109 };
2110 use crate::{
2111 claim::{Claim, EvidenceRef},
2112 cli::{Agent, ReviewerHarness},
2113 config::Effort,
2114 ledger::{LedgerStore, Verdict},
2115 };
2116
2117 fn pass_json() -> String {
2118 serde_json::json!({
2119 "verdict": "PASS",
2120 "summary": "The claim is substantiated by the diff and evidence.",
2121 "findings": [],
2122 "next_steps": []
2123 })
2124 .to_string()
2125 }
2126
2127 fn reject_json(title: &str) -> String {
2128 serde_json::json!({
2129 "verdict": "REJECT",
2130 "summary": "The claim is not substantiated.",
2131 "findings": [{
2132 "severity": "high",
2133 "title": title,
2134 "body": "The cited evidence does not prove the claimed behavior.",
2135 "file": "src/lib.rs",
2136 "line_start": 1,
2137 "line_end": 1,
2138 "confidence": 95,
2139 "recommendation": "Provide executable evidence that proves the claim."
2140 }],
2141 "next_steps": ["Run the relevant verification command."]
2142 })
2143 .to_string()
2144 }
2145
2146 #[test]
2147 fn same_harness_different_model_is_valid() {
2148 let request = ReviewRequest::new(
2149 Agent::Codex,
2150 "gpt-5.4",
2151 ReviewerHarness::Codex,
2152 "gpt-5.5",
2153 false,
2154 "review this",
2155 );
2156
2157 let plan = ReviewPlan::build(request).unwrap();
2158
2159 assert_eq!(plan.watched_agent, Agent::Codex);
2160 assert_eq!(plan.reviewer_harness, ReviewerHarness::Codex);
2161 assert_eq!(plan.invocation.program, "codex");
2162 }
2163
2164 #[test]
2165 fn same_model_is_blocked_by_default() {
2166 let request = ReviewRequest::new(
2167 Agent::Codex,
2168 " GPT-5.5 ",
2169 ReviewerHarness::Claude,
2170 "gpt-5.5",
2171 false,
2172 "review this",
2173 );
2174
2175 let error = ReviewPlan::build(request).unwrap_err();
2176
2177 assert!(matches!(
2178 error,
2179 ReviewerError::SameModelWithoutWaiver { .. }
2180 ));
2181 }
2182
2183 #[test]
2184 fn allow_same_model_override_is_deliberate() {
2185 let request = ReviewRequest::new(
2186 Agent::Codex,
2187 "gpt-5.5",
2188 ReviewerHarness::Codex,
2189 "gpt-5.5",
2190 true,
2191 "review this",
2192 );
2193
2194 let plan = ReviewPlan::build(request).unwrap();
2195
2196 assert!(plan.allow_same_model);
2197 assert_eq!(plan.reviewer_model, "gpt-5.5");
2198 }
2199
2200 #[test]
2201 fn provider_mapping_uses_verified_prompt_shapes_and_effort() {
2202 let codex =
2203 InvocationPlan::for_harness(ReviewerHarness::Codex, "gpt-5.5", Effort::Xhigh).unwrap();
2204 assert_eq!(codex.program, "codex");
2205 assert_eq!(
2206 codex.args_for_prompt("prompt"),
2207 [
2208 "exec",
2209 "-m",
2210 "gpt-5.5",
2211 "-c",
2212 "model_reasoning_effort=xhigh",
2213 "prompt"
2214 ]
2215 );
2216
2217 let claude =
2218 InvocationPlan::for_harness(ReviewerHarness::Claude, "opus", Effort::High).unwrap();
2219 assert_eq!(claude.program, "claude");
2220 assert_eq!(claude.prompt_delivery, PromptDelivery::Stdin);
2221 assert_eq!(
2222 claude.args_for_prompt("prompt"),
2223 ["--print", "--model", "opus", "--effort", "high"]
2224 );
2225
2226 let gemini =
2227 InvocationPlan::for_harness(ReviewerHarness::Gemini, "gemini-pro", Effort::Xhigh)
2228 .unwrap();
2229 assert_eq!(
2230 gemini.args_for_prompt("prompt"),
2231 ["-m", "gemini-pro", "-p", "prompt"]
2232 );
2233
2234 let pi = InvocationPlan::for_harness(ReviewerHarness::Pi, "openai/gpt-5.5", Effort::Xhigh)
2235 .unwrap();
2236 assert_eq!(pi.prompt_delivery, PromptDelivery::Stdin);
2237 assert_eq!(
2238 pi.args_for_prompt("prompt"),
2239 [
2240 "--model",
2241 "openai/gpt-5.5",
2242 "--thinking",
2243 "xhigh",
2244 "--tools",
2245 "read,grep,find,ls",
2246 "-p"
2247 ]
2248 );
2249 }
2250
2251 #[test]
2252 fn custom_harness_requires_explicit_configuration() {
2253 let error = InvocationPlan::for_harness(ReviewerHarness::Custom, "model", Effort::Xhigh)
2254 .unwrap_err();
2255
2256 assert!(matches!(error, ReviewerError::UnsupportedCustomHarness));
2257 }
2258
2259 #[test]
2260 fn effort_maps_to_each_harness_flag() {
2261 for effort in [
2262 Effort::Minimal,
2263 Effort::Low,
2264 Effort::Medium,
2265 Effort::High,
2266 Effort::Xhigh,
2267 ] {
2268 let e = effort.as_str();
2269
2270 let codex = InvocationPlan::for_harness(ReviewerHarness::Codex, "m", effort).unwrap();
2271 assert!(codex.args.contains(&format!("model_reasoning_effort={e}")));
2272
2273 let claude = InvocationPlan::for_harness(ReviewerHarness::Claude, "m", effort).unwrap();
2274 let claude_idx = claude.args.iter().position(|a| a == "--effort").unwrap();
2275 assert_eq!(claude.args[claude_idx + 1], effort.claude_value());
2277 assert_ne!(claude.args[claude_idx + 1], "minimal");
2278
2279 let pi = InvocationPlan::for_harness(ReviewerHarness::Pi, "m", effort).unwrap();
2280 let pi_idx = pi.args.iter().position(|a| a == "--thinking").unwrap();
2281 assert_eq!(pi.args[pi_idx + 1], e);
2282 }
2283 }
2284
2285 #[test]
2286 fn resolve_picks_configured_reviewer_for_every_writer() {
2287 let config = crate::config::TruthMirrorConfig::default();
2288
2289 let cases = [
2290 (Agent::Codex, ReviewerHarness::Claude, "claude-opus-4-8"),
2291 (Agent::Claude, ReviewerHarness::Codex, "gpt-5.5"),
2292 (Agent::Pi, ReviewerHarness::Codex, "gpt-5.5"),
2293 ];
2294
2295 for (writer, reviewer_harness, reviewer_model) in cases {
2296 let selection =
2297 ReviewSelection::resolve(Some(writer), None, None, None, None, false, &config)
2298 .unwrap();
2299
2300 assert_eq!(selection.reviewer_harness, reviewer_harness);
2301 assert_eq!(selection.reviewer_model, reviewer_model);
2302 assert_eq!(selection.reviewer_effort, Effort::Xhigh);
2303 }
2304 }
2305
2306 #[test]
2307 fn overriding_reviewer_harness_without_model_is_rejected() {
2308 let config = crate::config::TruthMirrorConfig::default();
2311 let error = ReviewSelection::resolve(
2312 Some(Agent::Codex),
2313 None,
2314 Some(ReviewerHarness::Pi),
2315 None,
2316 None,
2317 false,
2318 &config,
2319 )
2320 .unwrap_err();
2321
2322 assert!(matches!(error, ReviewerError::OverrideNeedsModel { .. }));
2323 }
2324
2325 #[test]
2326 fn overriding_reviewer_harness_matching_pair_is_ok() {
2327 let config = crate::config::TruthMirrorConfig::default();
2328 let selection = ReviewSelection::resolve(
2329 Some(Agent::Codex),
2330 None,
2331 Some(ReviewerHarness::Claude),
2332 None,
2333 None,
2334 false,
2335 &config,
2336 )
2337 .unwrap();
2338
2339 assert_eq!(selection.reviewer_harness, ReviewerHarness::Claude);
2340 assert_eq!(selection.reviewer_model, "claude-opus-4-8");
2341 }
2342
2343 #[test]
2344 fn config_allow_same_model_waives_opposition() {
2345 let config = crate::config::TruthMirrorConfig {
2346 allow_same_model: true,
2347 ..crate::config::TruthMirrorConfig::default()
2348 };
2349
2350 let selection = ReviewSelection::resolve(
2351 Some(Agent::Codex),
2352 Some("gpt-5.5".to_owned()),
2353 Some(ReviewerHarness::Codex),
2354 Some("gpt-5.5".to_owned()),
2355 None,
2356 false, &config,
2358 )
2359 .unwrap();
2360
2361 assert!(selection.allow_same_model);
2362 assert!(ReviewPlan::build(selection.request_for("review".to_owned())).is_ok());
2364 }
2365
2366 #[test]
2367 fn resolve_arbiter_uses_pair_when_cli_absent() {
2368 let config = crate::config::TruthMirrorConfig::default();
2369 let arbiter =
2370 ReviewSelection::resolve_arbiter(Agent::Codex, None, None, None, &config).unwrap();
2371
2372 assert_eq!(arbiter.arbiter_harness, ReviewerHarness::Pi);
2373 assert_eq!(arbiter.arbiter_effort, Effort::Xhigh);
2374 }
2375
2376 #[test]
2377 fn first_pass_prompt_is_adversarial_and_injects_context() {
2378 let prompt = super::first_pass_prompt(
2379 &claim(),
2380 "THE_DIFF_BODY",
2381 "INVIOLABLE CONSTRAINTS: never fake tests",
2382 );
2383
2384 assert!(prompt.contains("PROVE THIS CLAIM FALSE"));
2385 assert!(prompt.contains("default to REJECT"));
2386 assert!(prompt.contains("INVIOLABLE CONSTRAINTS: never fake tests"));
2387 assert!(prompt.contains("THE_DIFF_BODY"));
2388 assert!(prompt.contains("GREP THE CLASS, NOT THE INSTANCE"));
2390 assert!(prompt.contains("\"severity\""));
2391 assert!(prompt.contains("\"recommendation\""));
2392 }
2393
2394 #[test]
2395 fn strict_second_pass_is_a_completeness_critic() {
2396 let job = review_job(true);
2397 let first_output = pass_json();
2398 let prompt = super::strict_second_pass_prompt(&job, &first_output);
2399
2400 assert!(prompt.contains("COMPLETENESS CRITIC"));
2401 assert!(prompt.contains("generalize"));
2402 assert!(prompt.contains("GREP THE CLASS, NOT THE INSTANCE"));
2404 }
2405
2406 #[test]
2407 fn prompt_omits_context_block_when_empty() {
2408 let prompt = super::first_pass_prompt(&claim(), "d", "");
2409 assert!(!prompt.contains("INVIOLABLE CONSTRAINTS"));
2411 assert!(prompt.contains("PROVE THIS CLAIM FALSE"));
2412 }
2413
2414 #[test]
2415 fn subprocess_runner_is_mockable() {
2416 struct MockRunner;
2417
2418 impl ProcessRunner for MockRunner {
2419 fn run(
2420 &self,
2421 invocation: &InvocationPlan,
2422 prompt: &str,
2423 ) -> Result<ProcessOutput, ReviewerError> {
2424 assert_eq!(invocation.program, "codex");
2425 assert_eq!(
2426 invocation.args_for_prompt(prompt).last().unwrap(),
2427 "review this"
2428 );
2429 Ok(ProcessOutput {
2430 status_code: Some(0),
2431 stdout: pass_json(),
2432 stderr: String::new(),
2433 })
2434 }
2435 }
2436
2437 let request = ReviewRequest::new(
2438 Agent::Codex,
2439 "gpt-5.4",
2440 ReviewerHarness::Codex,
2441 "gpt-5.5",
2442 false,
2443 "review this",
2444 );
2445 let plan = ReviewPlan::build(request).unwrap();
2446 let output = plan.run_with("review this", &MockRunner).unwrap();
2447
2448 assert!(output.stdout.contains("PASS"));
2449 }
2450
2451 #[test]
2452 fn verdict_parser_extracts_rejection_findings() {
2453 let verdict = ParsedVerdict::parse(&reject_json("missing proof")).unwrap();
2454
2455 assert_eq!(verdict.verdict, Verdict::Reject);
2456 assert_eq!(verdict.structured_findings[0].title, "missing proof");
2457 assert_eq!(verdict.structured_findings[0].confidence, 95);
2458 assert!(verdict.findings[0].contains("missing proof"));
2459 }
2460
2461 #[test]
2462 fn verdict_parser_accepts_normalized_float_confidence() {
2463 let mut output: serde_json::Value =
2464 serde_json::from_str(&reject_json("missing proof")).unwrap();
2465 output["findings"][0]["confidence"] = serde_json::json!(0.95);
2466
2467 let verdict = ParsedVerdict::parse(&output.to_string()).unwrap();
2468
2469 assert_eq!(verdict.structured_findings[0].confidence, 95);
2470 }
2471
2472 #[test]
2473 fn verdict_parser_rejects_legacy_line_protocol() {
2474 let error =
2475 ParsedVerdict::parse("VERDICT: REJECT\nFINDINGS:\n- missing proof\n").unwrap_err();
2476
2477 assert!(matches!(error, ReviewerError::VerdictJson { .. }));
2478 }
2479
2480 #[test]
2481 fn large_diff_materialization_falls_back_to_file_summary() {
2482 let files = "a.rs\nb.rs\nc.rs\n";
2483 let materialized = super::materialize_diff("branch:main", "tiny diff", files);
2484
2485 assert!(materialized.contains("too large to inline safely"));
2486 assert!(materialized.contains("actual_files=3"));
2487 assert!(materialized.contains("a.rs\nb.rs\nc.rs"));
2488 assert!(materialized.contains("inspect the repository directly"));
2489 }
2490
2491 #[test]
2492 fn review_queue_schedules_commits_without_running_models() {
2493 let temp = tempfile::tempdir().unwrap();
2494 let queue = ReviewQueue::new(temp.path());
2495
2496 queue.enqueue("abc123").unwrap();
2497
2498 let pending = queue.pending().unwrap();
2499 assert_eq!(pending.len(), 1);
2500 assert_eq!(pending[0].commit_sha, "abc123");
2501 assert!(!pending[0].run_id.is_empty());
2502
2503 let run = ReviewRunStore::new(temp.path())
2504 .read(&pending[0].run_id)
2505 .unwrap();
2506 assert_eq!(run.commit_sha, "abc123");
2507 assert_eq!(run.status, ReviewRunStatus::Queued);
2508 }
2509
2510 #[test]
2511 fn review_cancel_marks_queued_run_and_removes_queue_item() {
2512 let temp = tempfile::tempdir().unwrap();
2513 let queue = ReviewQueue::new(temp.path());
2514 let queued = queue.enqueue("abc123").unwrap();
2515
2516 run_review_run_command(
2517 crate::cli::ReviewCommand::Cancel {
2518 run_id: queued.run_id.clone(),
2519 force: false,
2520 },
2521 temp.path(),
2522 )
2523 .unwrap();
2524
2525 assert!(queue.pending().unwrap().is_empty());
2526 let run = ReviewRunStore::new(temp.path())
2527 .read(&queued.run_id)
2528 .unwrap();
2529 assert_eq!(run.status, ReviewRunStatus::Cancelled);
2530 }
2531
2532 fn reaped_pid() -> u32 {
2534 let mut child = Command::new("true").spawn().expect("spawn `true`");
2535 let pid = child.id();
2536 child.wait().expect("reap `true`");
2537 pid
2538 }
2539
2540 fn write_running_run(store: &ReviewRunStore, worker_pid: Option<u32>) -> ReviewRun {
2543 let mut run = store.create_queued("abc123", "commit").unwrap();
2544 run.status = ReviewRunStatus::Running;
2545 run.phase = "reviewing".to_owned();
2546 run.worker_pid = worker_pid;
2547 store.write(&run).unwrap();
2548 run
2549 }
2550
2551 #[test]
2552 fn pid_liveness_probe_tracks_real_processes() {
2553 assert!(pid_is_alive(std::process::id()));
2554 assert!(!pid_is_alive(reaped_pid()));
2555 }
2556
2557 #[test]
2558 fn reconcile_liveness_only_reaps_dead_running_runs() {
2559 let mut queued = ReviewRun::queued("id", "abc123", "commit");
2560 assert!(!queued.reconcile_liveness(|_| false));
2562 assert_eq!(queued.status, ReviewRunStatus::Queued);
2563
2564 queued.mark_running("reviewing");
2565 assert!(!queued.reconcile_liveness(|_| true));
2567 assert_eq!(queued.status, ReviewRunStatus::Running);
2568 assert!(queued.reconcile_liveness(|_| false));
2570 assert_eq!(queued.status, ReviewRunStatus::Failed);
2571 assert!(queued.error.as_deref().unwrap().contains("stale run"));
2572 assert!(queued.worker_pid.is_none());
2573
2574 let mut legacy = ReviewRun::queued("id2", "def456", "commit");
2576 legacy.status = ReviewRunStatus::Running;
2577 legacy.worker_pid = None;
2578 assert!(!legacy.reconcile_liveness(|_| false));
2579 assert_eq!(legacy.status, ReviewRunStatus::Running);
2580 }
2581
2582 #[test]
2583 fn review_status_reaps_running_run_with_dead_worker_and_persists() {
2584 let temp = tempfile::tempdir().unwrap();
2585 let store = ReviewRunStore::new(temp.path());
2586 let run = write_running_run(&store, Some(reaped_pid()));
2587
2588 let reconciled = store.read_reconciled(&run.id).unwrap();
2589 assert_eq!(reconciled.status, ReviewRunStatus::Failed);
2590 assert!(reconciled.error.as_deref().unwrap().contains("stale run"));
2591
2592 assert_eq!(store.read(&run.id).unwrap().status, ReviewRunStatus::Failed);
2594 let listed = store.list_reconciled().unwrap();
2596 assert_eq!(listed.len(), 1);
2597 assert_eq!(listed[0].status, ReviewRunStatus::Failed);
2598 }
2599
2600 #[test]
2601 fn review_status_leaves_running_run_with_live_worker() {
2602 let temp = tempfile::tempdir().unwrap();
2603 let store = ReviewRunStore::new(temp.path());
2604 let run = write_running_run(&store, Some(std::process::id()));
2605
2606 let reconciled = store.read_reconciled(&run.id).unwrap();
2607 assert_eq!(reconciled.status, ReviewRunStatus::Running);
2608 }
2609
2610 #[test]
2611 fn cancel_reaps_running_run_with_dead_worker_without_force() {
2612 let temp = tempfile::tempdir().unwrap();
2613 let store = ReviewRunStore::new(temp.path());
2614 let run = write_running_run(&store, Some(reaped_pid()));
2615
2616 let cancelled = store.cancel(&run.id, false).unwrap();
2617 assert_eq!(cancelled.status, ReviewRunStatus::Failed);
2618 assert!(cancelled.error.as_deref().unwrap().contains("stale run"));
2619 }
2620
2621 #[test]
2622 fn cancel_refuses_live_running_run_without_force() {
2623 let temp = tempfile::tempdir().unwrap();
2624 let store = ReviewRunStore::new(temp.path());
2625 let run = write_running_run(&store, Some(std::process::id()));
2626
2627 let error = store.cancel(&run.id, false).unwrap_err();
2628 assert!(matches!(error, ReviewerError::ReviewRunStillAlive { .. }));
2629 assert_eq!(
2631 store.read(&run.id).unwrap().status,
2632 ReviewRunStatus::Running
2633 );
2634 }
2635
2636 #[test]
2637 fn cancel_force_kills_live_worker_and_cancels() {
2638 let temp = tempfile::tempdir().unwrap();
2639 let store = ReviewRunStore::new(temp.path());
2640 let mut child = Command::new("sleep")
2641 .arg("30")
2642 .spawn()
2643 .expect("spawn sleep");
2644 let pid = child.id();
2645 let run = write_running_run(&store, Some(pid));
2646
2647 let cancelled = store.cancel(&run.id, true).unwrap();
2648 assert_eq!(cancelled.status, ReviewRunStatus::Cancelled);
2649
2650 let _ = child.wait();
2652 assert!(!pid_is_alive(pid));
2653 }
2654
2655 #[test]
2656 fn cancel_legacy_running_run_requires_force_then_reaps() {
2657 let temp = tempfile::tempdir().unwrap();
2658 let store = ReviewRunStore::new(temp.path());
2659 let run = write_running_run(&store, None);
2660
2661 let error = store.cancel(&run.id, false).unwrap_err();
2662 assert!(matches!(
2663 error,
2664 ReviewerError::ReviewRunLivenessUnknown { .. }
2665 ));
2666
2667 let reaped = store.cancel(&run.id, true).unwrap();
2668 assert_eq!(reaped.status, ReviewRunStatus::Failed);
2669 }
2670
2671 #[test]
2672 fn cancel_refuses_already_terminal_run() {
2673 let temp = tempfile::tempdir().unwrap();
2674 let store = ReviewRunStore::new(temp.path());
2675 let run = store.create_queued("abc123", "commit").unwrap();
2676 store.mark_completed(&run.id, 0).unwrap();
2677
2678 let error = store.cancel(&run.id, true).unwrap_err();
2679 assert!(matches!(
2680 error,
2681 ReviewerError::CannotCancelReview {
2682 status: ReviewRunStatus::Completed,
2683 ..
2684 }
2685 ));
2686 }
2687
2688 #[test]
2689 fn execute_review_records_reject_verdict() {
2690 let temp = tempfile::tempdir().unwrap();
2691 let store = LedgerStore::new(temp.path());
2692 let job = review_job(false);
2693 let runner = SequenceRunner::new([reject_json("unsupported")]);
2694
2695 let execution = execute_review_job(job, &runner, &store).unwrap();
2696
2697 assert_eq!(execution.entries.len(), 1);
2698 assert_eq!(execution.entries[0].verdict, Verdict::Reject);
2699 assert_eq!(
2700 execution.entries[0].structured_findings[0].title,
2701 "unsupported"
2702 );
2703 assert!(
2704 execution.entries[0]
2705 .raw_reviewer_output
2706 .contains("\"REJECT\"")
2707 );
2708 assert_eq!(store.unresolved_rejections().unwrap().len(), 1);
2709 }
2710
2711 #[test]
2712 fn strict_two_pass_records_both_clean_passes() {
2713 let temp = tempfile::tempdir().unwrap();
2714 let store = LedgerStore::new(temp.path());
2715 let job = review_job(true);
2716 let runner = SequenceRunner::new([pass_json(), pass_json()]);
2717
2718 let execution = execute_review_job(job, &runner, &store).unwrap();
2719
2720 assert_eq!(execution.entries.len(), 2);
2721 assert_eq!(store.read_history().unwrap().len(), 2);
2722 assert_eq!(execution.entries[0].reviewer.model, "gpt-5.5");
2723 assert_eq!(execution.entries[1].reviewer.model, "claude-opus-4-8");
2724 }
2725
2726 #[test]
2727 fn strict_arbiter_model_must_be_third_model() {
2728 let temp = tempfile::tempdir().unwrap();
2729 let store = LedgerStore::new(temp.path());
2730 let mut job = review_job(true);
2731 job.strict.as_mut().unwrap().arbiter_model = "gpt-5.5".to_owned();
2732 let runner = SequenceRunner::new([pass_json()]);
2733
2734 let error = execute_review_job(job, &runner, &store).unwrap_err();
2735
2736 assert!(matches!(
2737 error,
2738 ReviewerError::StrictArbiterModelNotDistinct
2739 ));
2740 }
2741
2742 #[test]
2743 fn strict_goal_policy_stops_at_configured_lie_or_fuckup_count() {
2744 let policy = StrictGoalPolicy {
2745 stop_after_lies: 2,
2746 stop_after_fuckups: 3,
2747 };
2748
2749 assert_eq!(
2750 policy.decide(StrictGoalCounters {
2751 lies_exposed: 1,
2752 fuckups_registered: 2
2753 }),
2754 StrictGoalDecision::Continue
2755 );
2756 assert_eq!(
2757 policy.decide(StrictGoalCounters {
2758 lies_exposed: 2,
2759 fuckups_registered: 0
2760 }),
2761 StrictGoalDecision::Stop {
2762 reason: StrictGoalStopReason::LiesExposed
2763 }
2764 );
2765 assert_eq!(
2766 policy.decide(StrictGoalCounters {
2767 lies_exposed: 0,
2768 fuckups_registered: 3
2769 }),
2770 StrictGoalDecision::Stop {
2771 reason: StrictGoalStopReason::FuckupsRegistered
2772 }
2773 );
2774 }
2775
2776 #[test]
2777 fn drain_once_reviews_each_commit_once_and_clears_queue() {
2778 let temp = tempfile::tempdir().unwrap();
2779 let store = LedgerStore::new(temp.path());
2780 let queue = ReviewQueue::new(temp.path());
2781 queue.enqueue("abc123").unwrap();
2782 queue.enqueue("abc123").unwrap(); queue.enqueue("def456").unwrap();
2784
2785 let loader = StaticLoader::new();
2786 let runner = SequenceRunner::new([reject_json("unsupported"), pass_json()]);
2787 let selection = selection();
2788
2789 let report = drain_once(&queue, &loader, &selection, "", &runner, &store).unwrap();
2790
2791 assert_eq!(report.reviewed, ["abc123", "def456"]);
2792 assert_eq!(report.ledger_entries, 2);
2793 assert!(queue.pending().unwrap().is_empty());
2794 assert_eq!(store.read_history().unwrap().len(), 2);
2795 assert_eq!(store.unresolved_rejections().unwrap().len(), 1);
2796
2797 let runs = ReviewRunStore::new(temp.path()).list().unwrap();
2798 assert_eq!(runs.len(), 3);
2799 assert_eq!(
2800 runs.iter()
2801 .filter(|run| run.status == ReviewRunStatus::Completed)
2802 .count(),
2803 2
2804 );
2805 assert_eq!(
2806 runs.iter()
2807 .filter(|run| run.status == ReviewRunStatus::Cancelled)
2808 .count(),
2809 1
2810 );
2811 }
2812
2813 #[test]
2814 fn drain_once_is_a_noop_on_empty_queue() {
2815 let temp = tempfile::tempdir().unwrap();
2816 let store = LedgerStore::new(temp.path());
2817 let queue = ReviewQueue::new(temp.path());
2818 let loader = StaticLoader::new();
2819 let runner = ConstRunner::new(pass_json());
2820
2821 let report = drain_once(&queue, &loader, &selection(), "", &runner, &store).unwrap();
2822
2823 assert!(report.reviewed.is_empty());
2824 assert_eq!(report.ledger_entries, 0);
2825 assert_eq!(store.read_history().unwrap().len(), 0);
2826 }
2827
2828 #[test]
2829 fn strict_goal_loop_stops_at_configured_lie_count() {
2830 let temp = tempfile::tempdir().unwrap();
2831 let store = LedgerStore::new(temp.path());
2832 let policy = StrictGoalPolicy {
2833 stop_after_lies: 1,
2834 stop_after_fuckups: 0,
2835 };
2836 let runner = SequenceRunner::new([reject_json("lie")]);
2837
2838 let outcome = run_strict_goal_loop(
2839 "abc123",
2840 &claim(),
2841 "diff",
2842 "",
2843 &selection(),
2844 policy,
2845 5,
2846 &runner,
2847 &store,
2848 )
2849 .unwrap();
2850
2851 assert_eq!(outcome.passes, 1);
2852 assert_eq!(outcome.counters.lies_exposed, 1);
2853 assert_eq!(outcome.stop_reason, Some(StrictGoalStopReason::LiesExposed));
2854 assert_eq!(store.read_history().unwrap().len(), 1);
2855 }
2856
2857 #[test]
2858 fn strict_goal_loop_terminates_at_max_passes_for_honest_agent() {
2859 let temp = tempfile::tempdir().unwrap();
2860 let store = LedgerStore::new(temp.path());
2861 let policy = StrictGoalPolicy {
2862 stop_after_lies: 2,
2863 stop_after_fuckups: 5,
2864 };
2865 let runner = ConstRunner::new(pass_json());
2866
2867 let outcome = run_strict_goal_loop(
2868 "abc123",
2869 &claim(),
2870 "diff",
2871 "",
2872 &selection(),
2873 policy,
2874 3,
2875 &runner,
2876 &store,
2877 )
2878 .unwrap();
2879
2880 assert_eq!(outcome.passes, 3);
2881 assert_eq!(outcome.counters.lies_exposed, 0);
2882 assert_eq!(outcome.stop_reason, None);
2883 assert_eq!(store.read_history().unwrap().len(), 3);
2884 }
2885
2886 #[test]
2887 fn strict_goal_loop_stops_when_fuckups_accumulate() {
2888 let temp = tempfile::tempdir().unwrap();
2889 let store = LedgerStore::new(temp.path());
2890 let policy = StrictGoalPolicy {
2891 stop_after_lies: 0,
2892 stop_after_fuckups: 2,
2893 };
2894 let runner = ConstRunner::new(reject_json("nit"));
2896
2897 let outcome = run_strict_goal_loop(
2898 "abc123",
2899 &claim(),
2900 "diff",
2901 "",
2902 &selection(),
2903 policy,
2904 10,
2905 &runner,
2906 &store,
2907 )
2908 .unwrap();
2909
2910 assert_eq!(outcome.passes, 2);
2911 assert_eq!(outcome.counters.lies_exposed, 2);
2912 assert_eq!(outcome.counters.fuckups_registered, 2);
2913 assert_eq!(
2914 outcome.stop_reason,
2915 Some(StrictGoalStopReason::FuckupsRegistered)
2916 );
2917 }
2918
2919 proptest! {
2920 #[test]
2921 fn strict_goal_loop_never_exceeds_max_passes(max in 1u32..6) {
2922 let temp = tempfile::tempdir().unwrap();
2923 let store = LedgerStore::new(temp.path());
2924 let policy = StrictGoalPolicy { stop_after_lies: 0, stop_after_fuckups: 0 };
2926 let runner = ConstRunner::new(pass_json());
2927
2928 let outcome = run_strict_goal_loop(
2929 "abc123", &claim(), "diff", "", &selection(), policy, max, &runner, &store,
2930 )
2931 .unwrap();
2932
2933 prop_assert!(outcome.passes <= max);
2934 prop_assert_eq!(outcome.passes, max);
2935 prop_assert!(outcome.stop_reason.is_none());
2936 }
2937 }
2938
2939 proptest! {
2940 #[test]
2941 fn model_opposition_is_enforced_for_arbitrary_models(
2942 watched in "[A-Za-z0-9._/-]{1,32}",
2943 reviewer in "[A-Za-z0-9._/-]{1,32}",
2944 ) {
2945 let request = ReviewRequest::new(
2946 Agent::Codex,
2947 watched.clone(),
2948 ReviewerHarness::Codex,
2949 reviewer.clone(),
2950 false,
2951 "review this",
2952 );
2953 let result = ReviewPlan::build(request);
2954
2955 if watched.trim().eq_ignore_ascii_case(reviewer.trim()) {
2956 let blocked = matches!(result, Err(ReviewerError::SameModelWithoutWaiver { .. }));
2957 prop_assert!(blocked);
2958 } else {
2959 prop_assert!(result.is_ok());
2960 }
2961 }
2962 }
2963
2964 fn claim() -> Claim {
2965 Claim::new(
2966 "add review",
2967 "cargo test",
2968 vec![EvidenceRef::parse("tests:cargo-test").unwrap()],
2969 )
2970 .unwrap()
2971 }
2972
2973 fn selection() -> ReviewSelection {
2974 ReviewSelection {
2975 watched_agent: Agent::Codex,
2976 watched_model: "gpt-5.4".to_owned(),
2977 reviewer_harness: ReviewerHarness::Codex,
2978 reviewer_model: "gpt-5.5".to_owned(),
2979 reviewer_effort: Effort::Xhigh,
2980 allow_same_model: false,
2981 strict: None,
2982 }
2983 }
2984
2985 struct StaticLoader {
2986 claim: Claim,
2987 diff: String,
2988 }
2989
2990 impl StaticLoader {
2991 fn new() -> Self {
2992 Self {
2993 claim: claim(),
2994 diff: "diff --git a/src/lib.rs b/src/lib.rs".to_owned(),
2995 }
2996 }
2997 }
2998
2999 impl MaterialLoader for StaticLoader {
3000 fn load(&self, _sha: &str) -> Result<(Claim, String), ReviewerError> {
3001 Ok((self.claim.clone(), self.diff.clone()))
3002 }
3003 }
3004
3005 struct ConstRunner {
3006 output: String,
3007 }
3008
3009 impl ConstRunner {
3010 fn new(output: impl Into<String>) -> Self {
3011 Self {
3012 output: output.into(),
3013 }
3014 }
3015 }
3016
3017 impl ProcessRunner for ConstRunner {
3018 fn run(
3019 &self,
3020 _invocation: &InvocationPlan,
3021 _prompt: &str,
3022 ) -> Result<ProcessOutput, ReviewerError> {
3023 Ok(ProcessOutput {
3024 status_code: Some(0),
3025 stdout: self.output.clone(),
3026 stderr: String::new(),
3027 })
3028 }
3029 }
3030
3031 fn review_job(strict: bool) -> ReviewJob {
3032 let claim = claim();
3033 ReviewJob {
3034 commit_sha: "abc123".to_owned(),
3035 diff: "diff --git a/src/lib.rs b/src/lib.rs".to_owned(),
3036 context: String::new(),
3037 request: ReviewRequest::new(
3038 Agent::Codex,
3039 "gpt-5.4",
3040 ReviewerHarness::Codex,
3041 "gpt-5.5",
3042 false,
3043 "review this",
3044 ),
3045 claim,
3046 strict: strict.then_some(StrictReviewConfig {
3047 arbiter_harness: ReviewerHarness::Claude,
3048 arbiter_model: "claude-opus-4-8".to_owned(),
3049 arbiter_effort: Effort::Xhigh,
3050 }),
3051 }
3052 }
3053
3054 struct SequenceRunner {
3055 outputs: RefCell<VecDeque<String>>,
3056 }
3057
3058 impl SequenceRunner {
3059 fn new<I, S>(outputs: I) -> Self
3060 where
3061 I: IntoIterator<Item = S>,
3062 S: Into<String>,
3063 {
3064 Self {
3065 outputs: RefCell::new(outputs.into_iter().map(Into::into).collect()),
3066 }
3067 }
3068 }
3069
3070 impl ProcessRunner for SequenceRunner {
3071 fn run(
3072 &self,
3073 _invocation: &InvocationPlan,
3074 _prompt: &str,
3075 ) -> Result<ProcessOutput, ReviewerError> {
3076 let stdout = self.outputs.borrow_mut().pop_front().unwrap();
3077 Ok(ProcessOutput {
3078 status_code: Some(0),
3079 stdout,
3080 stderr: String::new(),
3081 })
3082 }
3083 }
3084}