Skip to main content

truth_mirror/
reviewer.rs

1//! Reviewer process harness, model opposition, async queue, and verdict execution.
2
3use std::{
4    fs,
5    io::{self, Write},
6    path::{Path, PathBuf},
7    process::{Command, ExitCode, Stdio},
8    time::{SystemTime, UNIX_EPOCH},
9};
10
11use anyhow::Result;
12use serde::{Deserialize, Serialize};
13use thiserror::Error;
14
15use crate::{
16    claim::{Claim, EvidenceRef},
17    cli::{self, Agent, ReviewScope, ReviewerHarness},
18    config::{self, Effort},
19    ledger::{LedgerEntry, LedgerStore, ReviewerConfig, StructuredFinding, Verdict},
20    surface,
21};
22
23pub const REVIEW_QUEUE_FILE: &str = "review-queue.jsonl";
24pub const REVIEW_RUNS_DIR: &str = "runs";
25const MAX_INLINE_DIFF_FILES: usize = 2;
26const MAX_INLINE_DIFF_BYTES: usize = 256 * 1024;
27const MAX_UNTRACKED_FILE_BYTES: u64 = 16 * 1024;
28
29#[derive(Clone, Debug, Eq, PartialEq)]
30pub struct ReviewRequest {
31    pub watched_agent: Agent,
32    pub watched_model: String,
33    pub reviewer_harness: ReviewerHarness,
34    pub reviewer_model: String,
35    pub reviewer_effort: Effort,
36    pub allow_same_model: bool,
37    pub prompt: String,
38}
39
40impl ReviewRequest {
41    pub fn new(
42        watched_agent: Agent,
43        watched_model: impl Into<String>,
44        reviewer_harness: ReviewerHarness,
45        reviewer_model: impl Into<String>,
46        allow_same_model: bool,
47        prompt: impl Into<String>,
48    ) -> Self {
49        Self {
50            watched_agent,
51            watched_model: watched_model.into(),
52            reviewer_harness,
53            reviewer_model: reviewer_model.into(),
54            reviewer_effort: Effort::highest(),
55            allow_same_model,
56            prompt: prompt.into(),
57        }
58    }
59
60    pub fn with_effort(mut self, effort: Effort) -> Self {
61        self.reviewer_effort = effort;
62        self
63    }
64}
65
66/// Resolved reviewer selection shared by `review` and `watch`. The reviewer is
67/// chosen from the writer harness's adversarial pair; explicit CLI values win.
68#[derive(Clone, Debug, Eq, PartialEq)]
69pub struct ReviewSelection {
70    pub watched_agent: Agent,
71    pub watched_model: String,
72    pub reviewer_harness: ReviewerHarness,
73    pub reviewer_model: String,
74    pub reviewer_effort: Effort,
75    pub allow_same_model: bool,
76    pub strict: Option<StrictReviewConfig>,
77}
78
79impl ReviewSelection {
80    /// Resolve the reviewer for a writer harness from its adversarial pair,
81    /// applying any explicit CLI overrides. `strict` is set separately.
82    #[allow(clippy::too_many_arguments)]
83    pub fn resolve(
84        watched_agent: Option<Agent>,
85        watched_model: Option<String>,
86        reviewer_harness: Option<ReviewerHarness>,
87        reviewer_model: Option<String>,
88        reviewer_effort: Option<Effort>,
89        allow_same_model: bool,
90        config: &config::TruthMirrorConfig,
91    ) -> Result<Self, ReviewerError> {
92        let watched_agent = match watched_agent {
93            Some(agent) => agent,
94            None => agent_from_slug(&config.default_writer)?,
95        };
96        let writer_slug = surface::agent_slug(watched_agent);
97        let pair = config.pair_for(writer_slug);
98
99        let harness_from_cli = reviewer_harness.is_some();
100        let reviewer_harness = match reviewer_harness {
101            Some(harness) => harness,
102            None => {
103                let slug = pair
104                    .map(|pair| pair.reviewer.harness.as_str())
105                    .ok_or_else(|| ReviewerError::NoPairForWriter {
106                        writer: writer_slug.to_owned(),
107                    })?;
108                harness_from_slug(slug)?
109            }
110        };
111        let reviewer_model = match reviewer_model {
112            Some(model) => model,
113            None => {
114                let pair = pair.ok_or_else(|| ReviewerError::NoPairForWriter {
115                    writer: writer_slug.to_owned(),
116                })?;
117                // Don't pair a CLI-overridden harness with a model string meant for
118                // a different harness — that silently reviews with the wrong tool.
119                if harness_from_cli
120                    && !pair
121                        .reviewer
122                        .harness
123                        .eq_ignore_ascii_case(harness_slug(reviewer_harness))
124                {
125                    return Err(ReviewerError::OverrideNeedsModel {
126                        role: "reviewer".to_owned(),
127                        harness: harness_slug(reviewer_harness).to_owned(),
128                    });
129                }
130                pair.reviewer.model.clone()
131            }
132        };
133        let reviewer_effort = reviewer_effort
134            .or_else(|| pair.map(|pair| pair.reviewer.effort))
135            .unwrap_or_else(Effort::highest);
136
137        Ok(Self {
138            watched_agent,
139            watched_model: watched_model.unwrap_or_default(),
140            reviewer_harness,
141            reviewer_model,
142            reviewer_effort,
143            // Either the CLI flag or config may waive model opposition.
144            allow_same_model: allow_same_model || config.allow_same_model,
145            strict: None,
146        })
147    }
148
149    /// Resolve the second-pass arbiter for the writer, preferring CLI overrides,
150    /// then the writer pair's `arbiter`.
151    pub fn resolve_arbiter(
152        watched_agent: Agent,
153        arbiter_harness: Option<ReviewerHarness>,
154        arbiter_model: Option<String>,
155        arbiter_effort: Option<Effort>,
156        config: &config::TruthMirrorConfig,
157    ) -> Result<StrictReviewConfig, ReviewerError> {
158        let pair_arbiter = config
159            .pair_for(surface::agent_slug(watched_agent))
160            .and_then(|pair| pair.arbiter.clone());
161
162        let harness_from_cli = arbiter_harness.is_some();
163        let harness = match arbiter_harness {
164            Some(harness) => harness,
165            None => {
166                let slug = pair_arbiter
167                    .as_ref()
168                    .map(|arbiter| arbiter.harness.as_str())
169                    .ok_or(ReviewerError::MissingArbiter)?;
170                harness_from_slug(slug)?
171            }
172        };
173        let model = match arbiter_model {
174            Some(model) => model,
175            None => {
176                let arbiter = pair_arbiter.as_ref().ok_or(ReviewerError::MissingArbiter)?;
177                if harness_from_cli && !arbiter.harness.eq_ignore_ascii_case(harness_slug(harness))
178                {
179                    return Err(ReviewerError::OverrideNeedsModel {
180                        role: "arbiter".to_owned(),
181                        harness: harness_slug(harness).to_owned(),
182                    });
183                }
184                arbiter.model.clone()
185            }
186        };
187        let effort = arbiter_effort
188            .or_else(|| pair_arbiter.as_ref().map(|arbiter| arbiter.effort))
189            .unwrap_or_else(Effort::highest);
190
191        Ok(StrictReviewConfig {
192            arbiter_harness: harness,
193            arbiter_model: model,
194            arbiter_effort: effort,
195        })
196    }
197
198    fn request_for(&self, prompt: String) -> ReviewRequest {
199        ReviewRequest::new(
200            self.watched_agent,
201            self.watched_model.clone(),
202            self.reviewer_harness,
203            self.reviewer_model.clone(),
204            self.allow_same_model,
205            prompt,
206        )
207        .with_effort(self.reviewer_effort)
208    }
209}
210
211#[derive(Clone, Debug, Eq, PartialEq)]
212pub struct ReviewPlan {
213    pub watched_agent: Agent,
214    pub watched_model: String,
215    pub reviewer_harness: ReviewerHarness,
216    pub reviewer_model: String,
217    pub allow_same_model: bool,
218    pub invocation: InvocationPlan,
219}
220
221impl ReviewPlan {
222    pub fn build(request: ReviewRequest) -> Result<Self, ReviewerError> {
223        validate_model_present("reviewer", &request.reviewer_model)?;
224
225        // The writer model may be unknown (pair-based selection doesn't require it);
226        // opposition is enforced only when the writer model is actually provided.
227        if !request.watched_model.trim().is_empty()
228            && !request.allow_same_model
229            && normalized_model(&request.watched_model) == normalized_model(&request.reviewer_model)
230        {
231            return Err(ReviewerError::SameModelWithoutWaiver {
232                watched_model: request.watched_model,
233                reviewer_model: request.reviewer_model,
234            });
235        }
236
237        let invocation = InvocationPlan::for_harness(
238            request.reviewer_harness,
239            &request.reviewer_model,
240            request.reviewer_effort,
241        )?;
242
243        Ok(Self {
244            watched_agent: request.watched_agent,
245            watched_model: request.watched_model,
246            reviewer_harness: request.reviewer_harness,
247            reviewer_model: request.reviewer_model,
248            allow_same_model: request.allow_same_model,
249            invocation,
250        })
251    }
252
253    pub fn run_with<R: ProcessRunner>(
254        &self,
255        prompt: &str,
256        runner: &R,
257    ) -> Result<ProcessOutput, ReviewerError> {
258        runner.run(&self.invocation, prompt)
259    }
260
261    fn reviewer_config(&self) -> ReviewerConfig {
262        ReviewerConfig::new(
263            harness_slug(self.reviewer_harness),
264            self.reviewer_model.clone(),
265            self.allow_same_model,
266        )
267    }
268}
269
270#[derive(Clone, Debug, Eq, PartialEq)]
271pub struct InvocationPlan {
272    pub program: String,
273    pub args: Vec<String>,
274    pub prompt_delivery: PromptDelivery,
275}
276
277impl InvocationPlan {
278    pub fn for_harness(
279        harness: ReviewerHarness,
280        model: &str,
281        effort: Effort,
282    ) -> Result<Self, ReviewerError> {
283        validate_model_present("reviewer", model)?;
284        let model = model.trim();
285        let e = effort.as_str();
286
287        // Reasoning-effort flags verified against source: codex ReasoningEffort
288        // (`-c model_reasoning_effort`), pi `--thinking` (cli/args.js:249), claude
289        // `--effort`. Gemini/OpenCode have no effort flag, so effort is omitted.
290        let plan = match harness {
291            ReviewerHarness::Claude => Self {
292                program: "claude".to_owned(),
293                args: vec![
294                    "--print".to_owned(),
295                    "--model".to_owned(),
296                    model.to_owned(),
297                    "--effort".to_owned(),
298                    // Claude has no `minimal`; clamp to a valid level.
299                    effort.claude_value().to_owned(),
300                ],
301                prompt_delivery: PromptDelivery::Stdin,
302            },
303            ReviewerHarness::Codex => Self {
304                program: "codex".to_owned(),
305                args: vec![
306                    "exec".to_owned(),
307                    "-m".to_owned(),
308                    model.to_owned(),
309                    "-c".to_owned(),
310                    format!("model_reasoning_effort={e}"),
311                ],
312                prompt_delivery: PromptDelivery::PositionalArgument,
313            },
314            ReviewerHarness::Pi => Self {
315                program: "pi".to_owned(),
316                args: vec![
317                    "--model".to_owned(),
318                    model.to_owned(),
319                    "--thinking".to_owned(),
320                    e.to_owned(),
321                    // Read-only tools so the reviewer can grep the repo for the whole
322                    // defect class (Codex `exec` / Claude `-p` have read tools already).
323                    "--tools".to_owned(),
324                    "read,grep,find,ls".to_owned(),
325                    "-p".to_owned(),
326                ],
327                prompt_delivery: PromptDelivery::Stdin,
328            },
329            ReviewerHarness::Gemini => Self {
330                program: "gemini".to_owned(),
331                args: vec!["-m".to_owned(), model.to_owned()],
332                prompt_delivery: PromptDelivery::FlagValue("-p".to_owned()),
333            },
334            ReviewerHarness::Opencode => Self {
335                program: "opencode".to_owned(),
336                args: vec!["run".to_owned(), "--model".to_owned(), model.to_owned()],
337                prompt_delivery: PromptDelivery::PositionalArgument,
338            },
339            ReviewerHarness::Custom => return Err(ReviewerError::UnsupportedCustomHarness),
340        };
341
342        Ok(plan)
343    }
344
345    pub fn args_for_prompt(&self, prompt: &str) -> Vec<String> {
346        let mut args = self.args.clone();
347        match &self.prompt_delivery {
348            PromptDelivery::Stdin => {}
349            PromptDelivery::PositionalArgument => args.push(prompt.to_owned()),
350            PromptDelivery::FlagValue(flag) => {
351                args.push(flag.clone());
352                args.push(prompt.to_owned());
353            }
354        }
355        args
356    }
357}
358
359#[derive(Clone, Debug, Eq, PartialEq)]
360pub enum PromptDelivery {
361    Stdin,
362    PositionalArgument,
363    FlagValue(String),
364}
365
366#[derive(Clone, Debug, Eq, PartialEq)]
367pub struct ProcessOutput {
368    pub status_code: Option<i32>,
369    pub stdout: String,
370    pub stderr: String,
371}
372
373pub trait ProcessRunner {
374    fn run(
375        &self,
376        invocation: &InvocationPlan,
377        prompt: &str,
378    ) -> Result<ProcessOutput, ReviewerError>;
379}
380
381#[derive(Clone, Copy, Debug, Default)]
382pub struct StdProcessRunner;
383
384impl ProcessRunner for StdProcessRunner {
385    fn run(
386        &self,
387        invocation: &InvocationPlan,
388        prompt: &str,
389    ) -> Result<ProcessOutput, ReviewerError> {
390        let mut command = Command::new(&invocation.program);
391        command.args(invocation.args_for_prompt(prompt));
392        command.stdout(Stdio::piped()).stderr(Stdio::piped());
393
394        if invocation.prompt_delivery == PromptDelivery::Stdin {
395            command.stdin(Stdio::piped());
396        }
397
398        let mut child = command.spawn().map_err(ReviewerError::Spawn)?;
399        if invocation.prompt_delivery == PromptDelivery::Stdin {
400            let mut stdin = child.stdin.take().ok_or(ReviewerError::MissingStdinPipe)?;
401            stdin
402                .write_all(prompt.as_bytes())
403                .map_err(ReviewerError::WritePrompt)?;
404        }
405
406        let output = child.wait_with_output().map_err(ReviewerError::Wait)?;
407        Ok(ProcessOutput {
408            status_code: output.status.code(),
409            stdout: String::from_utf8_lossy(&output.stdout).into_owned(),
410            stderr: String::from_utf8_lossy(&output.stderr).into_owned(),
411        })
412    }
413}
414
415#[derive(Clone, Debug, Eq, PartialEq)]
416pub struct ReviewJob {
417    pub commit_sha: String,
418    pub claim: Claim,
419    pub diff: String,
420    /// Ground-truth constraints + recent trajectory injected into review prompts.
421    pub context: String,
422    pub request: ReviewRequest,
423    pub strict: Option<StrictReviewConfig>,
424}
425
426#[derive(Clone, Debug, Eq, PartialEq)]
427pub struct StrictReviewConfig {
428    pub arbiter_harness: ReviewerHarness,
429    pub arbiter_model: String,
430    pub arbiter_effort: Effort,
431}
432
433#[derive(Clone, Debug, Eq, PartialEq)]
434pub struct ReviewExecution {
435    pub entries: Vec<LedgerEntry>,
436}
437
438pub fn execute_review_job<R: ProcessRunner>(
439    job: ReviewJob,
440    runner: &R,
441    store: &LedgerStore,
442) -> Result<ReviewExecution, ReviewerError> {
443    let first_plan = ReviewPlan::build(job.request.clone())?;
444    let first_output = first_plan.run_with(&job.request.prompt, runner)?;
445    ensure_process_success(&first_output)?;
446    let first_verdict = ParsedVerdict::parse(&first_output.stdout)?;
447    let first_entry = entry_from_verdict(&job, &first_plan, &first_verdict);
448    store.append_entry(&first_entry)?;
449
450    let mut entries = vec![first_entry];
451    if let Some(strict) = &job.strict
452        && first_verdict.verdict == Verdict::Pass
453        && first_verdict.findings.is_empty()
454    {
455        validate_strict_arbiter(&job.request, strict)?;
456        let strict_prompt = strict_second_pass_prompt(&job, &first_output.stdout);
457        let strict_request = ReviewRequest::new(
458            job.request.watched_agent,
459            job.request.watched_model.clone(),
460            strict.arbiter_harness,
461            strict.arbiter_model.clone(),
462            false,
463            strict_prompt,
464        )
465        .with_effort(strict.arbiter_effort);
466        let strict_plan = ReviewPlan::build(strict_request.clone())?;
467        let strict_output = strict_plan.run_with(&strict_request.prompt, runner)?;
468        ensure_process_success(&strict_output)?;
469        let strict_verdict = ParsedVerdict::parse(&strict_output.stdout)?;
470        let strict_entry = entry_from_verdict(&job, &strict_plan, &strict_verdict);
471        store.append_entry(&strict_entry)?;
472        entries.push(strict_entry);
473    }
474
475    Ok(ReviewExecution { entries })
476}
477
478#[derive(Clone, Debug, Eq, PartialEq)]
479pub struct ParsedVerdict {
480    pub verdict: Verdict,
481    pub summary: String,
482    pub findings: Vec<String>,
483    pub structured_findings: Vec<StructuredFinding>,
484    pub next_steps: Vec<String>,
485    pub raw: String,
486}
487
488impl ParsedVerdict {
489    pub fn parse(output: &str) -> Result<Self, ReviewerError> {
490        let parsed: ReviewerJsonOutput =
491            serde_json::from_str(output.trim()).map_err(|source| ReviewerError::VerdictJson {
492                source,
493                output: output.to_owned(),
494            })?;
495        parsed.validate()?;
496        let findings = parsed
497            .findings
498            .iter()
499            .map(StructuredFinding::display_line)
500            .collect();
501
502        Ok(Self {
503            verdict: parsed.verdict,
504            summary: parsed.summary,
505            findings,
506            structured_findings: parsed.findings,
507            next_steps: parsed.next_steps,
508            raw: output.to_owned(),
509        })
510    }
511}
512
513#[derive(Clone, Debug, Deserialize, Eq, PartialEq)]
514struct ReviewerJsonOutput {
515    verdict: Verdict,
516    summary: String,
517    #[serde(default)]
518    findings: Vec<StructuredFinding>,
519    #[serde(default)]
520    next_steps: Vec<String>,
521}
522
523impl ReviewerJsonOutput {
524    fn validate(&self) -> Result<(), ReviewerError> {
525        if self.summary.trim().is_empty() {
526            return Err(ReviewerError::VerdictSchema {
527                message: "summary must not be empty".to_owned(),
528            });
529        }
530
531        for finding in &self.findings {
532            if finding.title.trim().is_empty() {
533                return Err(ReviewerError::VerdictSchema {
534                    message: "finding title must not be empty".to_owned(),
535                });
536            }
537            if finding.body.trim().is_empty() {
538                return Err(ReviewerError::VerdictSchema {
539                    message: "finding body must not be empty".to_owned(),
540                });
541            }
542            if finding.file.trim().is_empty() {
543                return Err(ReviewerError::VerdictSchema {
544                    message: "finding file must not be empty".to_owned(),
545                });
546            }
547            if finding.line_start == 0 || finding.line_end == 0 {
548                return Err(ReviewerError::VerdictSchema {
549                    message: "finding lines must be one-based".to_owned(),
550                });
551            }
552            if finding.line_end < finding.line_start {
553                return Err(ReviewerError::VerdictSchema {
554                    message: "finding line_end must be greater than or equal to line_start"
555                        .to_owned(),
556                });
557            }
558            if finding.confidence > 100 {
559                return Err(ReviewerError::VerdictSchema {
560                    message: "finding confidence must be between 0 and 100".to_owned(),
561                });
562            }
563            if finding.recommendation.trim().is_empty() {
564                return Err(ReviewerError::VerdictSchema {
565                    message: "finding recommendation must not be empty".to_owned(),
566                });
567            }
568        }
569
570        if self.verdict == Verdict::Pass && !self.findings.is_empty() {
571            return Err(ReviewerError::VerdictSchema {
572                message: "PASS verdict must not include findings".to_owned(),
573            });
574        }
575        if self.verdict == Verdict::Reject && self.findings.is_empty() {
576            return Err(ReviewerError::VerdictSchema {
577                message: "REJECT verdict must include at least one finding".to_owned(),
578            });
579        }
580
581        Ok(())
582    }
583}
584
585#[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, Serialize)]
586#[serde(rename_all = "kebab-case")]
587pub enum ReviewRunStatus {
588    Queued,
589    Running,
590    Completed,
591    Failed,
592    Cancelled,
593}
594
595impl std::fmt::Display for ReviewRunStatus {
596    fn fmt(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
597        match self {
598            Self::Queued => formatter.write_str("queued"),
599            Self::Running => formatter.write_str("running"),
600            Self::Completed => formatter.write_str("completed"),
601            Self::Failed => formatter.write_str("failed"),
602            Self::Cancelled => formatter.write_str("cancelled"),
603        }
604    }
605}
606
607#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
608pub struct ReviewRun {
609    pub id: String,
610    pub commit_sha: String,
611    pub target: String,
612    pub status: ReviewRunStatus,
613    pub phase: String,
614    pub ledger_entries: usize,
615    pub error: Option<String>,
616    pub created_at_unix: u64,
617    pub updated_at_unix: u64,
618    pub started_at_unix: Option<u64>,
619    pub completed_at_unix: Option<u64>,
620}
621
622impl ReviewRun {
623    fn queued(
624        id: impl Into<String>,
625        commit_sha: impl Into<String>,
626        target: impl Into<String>,
627    ) -> Self {
628        let timestamp = unix_now();
629        Self {
630            id: id.into(),
631            commit_sha: commit_sha.into(),
632            target: target.into(),
633            status: ReviewRunStatus::Queued,
634            phase: "queued".to_owned(),
635            ledger_entries: 0,
636            error: None,
637            created_at_unix: timestamp,
638            updated_at_unix: timestamp,
639            started_at_unix: None,
640            completed_at_unix: None,
641        }
642    }
643
644    fn mark_running(&mut self, phase: impl Into<String>) {
645        let timestamp = unix_now();
646        self.status = ReviewRunStatus::Running;
647        self.phase = phase.into();
648        self.error = None;
649        self.updated_at_unix = timestamp;
650        self.started_at_unix = Some(timestamp);
651        self.completed_at_unix = None;
652    }
653
654    fn mark_completed(&mut self, ledger_entries: usize) {
655        let timestamp = unix_now();
656        self.status = ReviewRunStatus::Completed;
657        self.phase = "completed".to_owned();
658        self.ledger_entries = ledger_entries;
659        self.error = None;
660        self.updated_at_unix = timestamp;
661        self.completed_at_unix = Some(timestamp);
662    }
663
664    fn mark_failed(&mut self, error: impl Into<String>) {
665        let timestamp = unix_now();
666        self.status = ReviewRunStatus::Failed;
667        self.phase = "failed".to_owned();
668        self.error = Some(error.into());
669        self.updated_at_unix = timestamp;
670        self.completed_at_unix = Some(timestamp);
671    }
672
673    fn mark_cancelled(&mut self) {
674        let timestamp = unix_now();
675        self.status = ReviewRunStatus::Cancelled;
676        self.phase = "cancelled".to_owned();
677        self.error = None;
678        self.updated_at_unix = timestamp;
679        self.completed_at_unix = Some(timestamp);
680    }
681}
682
683#[derive(Clone, Debug)]
684pub struct ReviewRunStore {
685    root: PathBuf,
686}
687
688impl ReviewRunStore {
689    pub fn new(root: impl Into<PathBuf>) -> Self {
690        Self { root: root.into() }
691    }
692
693    pub fn runs_dir(&self) -> PathBuf {
694        self.root.join(REVIEW_RUNS_DIR)
695    }
696
697    pub fn path(&self, id: &str) -> PathBuf {
698        self.runs_dir().join(format!("{id}.json"))
699    }
700
701    pub fn create_queued(
702        &self,
703        commit_sha: &str,
704        target: impl Into<String>,
705    ) -> Result<ReviewRun, ReviewerError> {
706        let run = ReviewRun::queued(generate_run_id(commit_sha), commit_sha, target);
707        self.write(&run)?;
708        Ok(run)
709    }
710
711    fn ensure_queued(
712        &self,
713        run_id: &str,
714        commit_sha: &str,
715        target: &str,
716    ) -> Result<ReviewRun, ReviewerError> {
717        match self.read(run_id) {
718            Ok(run) => Ok(run),
719            Err(ReviewerError::ReviewRunNotFound { .. }) => {
720                let run = ReviewRun::queued(run_id, commit_sha, target);
721                self.write(&run)?;
722                Ok(run)
723            }
724            Err(error) => Err(error),
725        }
726    }
727
728    pub fn read(&self, id: &str) -> Result<ReviewRun, ReviewerError> {
729        let path = self.path(id);
730        let contents = fs::read_to_string(&path).map_err(|source| match source.kind() {
731            io::ErrorKind::NotFound => ReviewerError::ReviewRunNotFound { id: id.to_owned() },
732            _ => ReviewerError::RunIo(source),
733        })?;
734        serde_json::from_str(&contents).map_err(ReviewerError::RunJson)
735    }
736
737    pub fn list(&self) -> Result<Vec<ReviewRun>, ReviewerError> {
738        let dir = self.runs_dir();
739        let entries = match fs::read_dir(&dir) {
740            Ok(entries) => entries,
741            Err(error) if error.kind() == io::ErrorKind::NotFound => return Ok(Vec::new()),
742            Err(error) => return Err(ReviewerError::RunIo(error)),
743        };
744        let mut runs: Vec<ReviewRun> = Vec::new();
745        for entry in entries {
746            let entry = entry.map_err(ReviewerError::RunIo)?;
747            if entry
748                .path()
749                .extension()
750                .is_none_or(|extension| extension != "json")
751            {
752                continue;
753            }
754            let contents = fs::read_to_string(entry.path()).map_err(ReviewerError::RunIo)?;
755            runs.push(serde_json::from_str(&contents).map_err(ReviewerError::RunJson)?);
756        }
757        runs.sort_by(|left, right| {
758            right
759                .updated_at_unix
760                .cmp(&left.updated_at_unix)
761                .then_with(|| right.id.cmp(&left.id))
762        });
763        Ok(runs)
764    }
765
766    pub fn latest_result(&self) -> Result<ReviewRun, ReviewerError> {
767        self.list()?
768            .into_iter()
769            .find(|run| {
770                matches!(
771                    run.status,
772                    ReviewRunStatus::Completed
773                        | ReviewRunStatus::Failed
774                        | ReviewRunStatus::Cancelled
775                )
776            })
777            .ok_or(ReviewerError::NoReviewRuns)
778    }
779
780    pub fn mark_running(&self, id: &str, phase: &str) -> Result<ReviewRun, ReviewerError> {
781        let mut run = self.read(id)?;
782        run.mark_running(phase);
783        self.write(&run)?;
784        Ok(run)
785    }
786
787    pub fn mark_completed(
788        &self,
789        id: &str,
790        ledger_entries: usize,
791    ) -> Result<ReviewRun, ReviewerError> {
792        let mut run = self.read(id)?;
793        run.mark_completed(ledger_entries);
794        self.write(&run)?;
795        Ok(run)
796    }
797
798    pub fn mark_failed(
799        &self,
800        id: &str,
801        error: impl Into<String>,
802    ) -> Result<ReviewRun, ReviewerError> {
803        let mut run = self.read(id)?;
804        run.mark_failed(error);
805        self.write(&run)?;
806        Ok(run)
807    }
808
809    pub fn cancel_queued(&self, id: &str) -> Result<ReviewRun, ReviewerError> {
810        let mut run = self.read(id)?;
811        if run.status != ReviewRunStatus::Queued {
812            return Err(ReviewerError::CannotCancelReview {
813                id: id.to_owned(),
814                status: run.status,
815            });
816        }
817        run.mark_cancelled();
818        self.write(&run)?;
819        Ok(run)
820    }
821
822    fn write(&self, run: &ReviewRun) -> Result<(), ReviewerError> {
823        fs::create_dir_all(self.runs_dir()).map_err(ReviewerError::RunIo)?;
824        let bytes = serde_json::to_vec_pretty(run).map_err(ReviewerError::RunJson)?;
825        fs::write(self.path(&run.id), bytes).map_err(ReviewerError::RunIo)
826    }
827}
828
829#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
830pub struct QueuedReview {
831    #[serde(default)]
832    pub run_id: String,
833    pub commit_sha: String,
834    pub enqueued_at_unix: u64,
835}
836
837#[derive(Clone, Debug)]
838pub struct ReviewQueue {
839    root: PathBuf,
840}
841
842impl ReviewQueue {
843    pub fn new(root: impl Into<PathBuf>) -> Self {
844        Self { root: root.into() }
845    }
846
847    pub fn path(&self) -> PathBuf {
848        self.root.join(REVIEW_QUEUE_FILE)
849    }
850
851    pub fn enqueue(&self, commit_sha: impl Into<String>) -> Result<QueuedReview, ReviewerError> {
852        fs::create_dir_all(&self.root).map_err(ReviewerError::QueueIo)?;
853        let commit_sha = commit_sha.into();
854        let run = ReviewRunStore::new(&self.root).create_queued(&commit_sha, "commit")?;
855        let item = QueuedReview {
856            run_id: run.id,
857            commit_sha,
858            enqueued_at_unix: unix_now(),
859        };
860        let mut file = fs::OpenOptions::new()
861            .create(true)
862            .append(true)
863            .open(self.path())
864            .map_err(ReviewerError::QueueIo)?;
865        serde_json::to_writer(&mut file, &item).map_err(ReviewerError::QueueJson)?;
866        writeln!(file).map_err(ReviewerError::QueueIo)?;
867        Ok(item)
868    }
869
870    pub fn pending(&self) -> Result<Vec<QueuedReview>, ReviewerError> {
871        let contents = match fs::read_to_string(self.path()) {
872            Ok(contents) => contents,
873            Err(error) if error.kind() == io::ErrorKind::NotFound => return Ok(Vec::new()),
874            Err(error) => return Err(ReviewerError::QueueIo(error)),
875        };
876
877        contents
878            .lines()
879            .filter(|line| !line.trim().is_empty())
880            .map(|line| serde_json::from_str(line).map_err(ReviewerError::QueueJson))
881            .collect()
882    }
883
884    /// Drop every queued item for `sha`, preserving any items appended for other
885    /// commits. Called after a commit is reviewed so a drain never repeats work.
886    pub fn remove_sha(&self, sha: &str) -> Result<(), ReviewerError> {
887        let remaining: Vec<QueuedReview> = self
888            .pending()?
889            .into_iter()
890            .filter(|item| item.commit_sha != sha)
891            .collect();
892        self.rewrite(&remaining)
893    }
894
895    fn rewrite(&self, items: &[QueuedReview]) -> Result<(), ReviewerError> {
896        if items.is_empty() {
897            return match fs::remove_file(self.path()) {
898                Ok(()) => Ok(()),
899                Err(error) if error.kind() == io::ErrorKind::NotFound => Ok(()),
900                Err(error) => Err(ReviewerError::QueueIo(error)),
901            };
902        }
903
904        let mut file = fs::File::create(self.path()).map_err(ReviewerError::QueueIo)?;
905        for item in items {
906            serde_json::to_writer(&mut file, item).map_err(ReviewerError::QueueJson)?;
907            writeln!(file).map_err(ReviewerError::QueueIo)?;
908        }
909        Ok(())
910    }
911
912    pub fn remove_run_id(&self, run_id: &str) -> Result<(), ReviewerError> {
913        let remaining: Vec<QueuedReview> = self
914            .pending()?
915            .into_iter()
916            .filter(|item| item.run_id != run_id)
917            .collect();
918        self.rewrite(&remaining)
919    }
920}
921
922/// Loads the claim and diff for a commit so the reviewer can run against it.
923/// Abstracted so `drain_once` can be unit-tested without a real git repository.
924pub trait MaterialLoader {
925    fn load(&self, sha: &str) -> Result<(Claim, String), ReviewerError>;
926}
927
928#[derive(Clone, Debug, Default)]
929pub struct GitMaterialLoader {
930    /// Evidence-pointer patterns from config so async review parses claims the
931    /// same way the commit-msg gate did (a repo `jira:` pointer stays valid).
932    pub evidence_patterns: Vec<String>,
933}
934
935impl GitMaterialLoader {
936    pub fn with_patterns(evidence_patterns: Vec<String>) -> Self {
937        Self { evidence_patterns }
938    }
939}
940
941impl MaterialLoader for GitMaterialLoader {
942    fn load(&self, sha: &str) -> Result<(Claim, String), ReviewerError> {
943        let message = git_output(["show", "--format=%B", "--no-patch", sha])?;
944        let diff = git_output(["show", "--format=", "--patch", sha])?;
945        let claim = if self.evidence_patterns.is_empty() {
946            Claim::parse(&message)?
947        } else {
948            Claim::parse_with(&message, &self.evidence_patterns)?
949        };
950        Ok((claim, diff))
951    }
952}
953
954#[derive(Clone, Debug, Default, Eq, PartialEq)]
955pub struct DrainReport {
956    pub reviewed: Vec<String>,
957    pub ledger_entries: usize,
958}
959
960/// Review every distinct queued commit exactly once, record verdicts, and remove
961/// each commit from the queue as soon as its review lands. A commit whose review
962/// errors stays queued for the next drain.
963pub fn drain_once<R: ProcessRunner, L: MaterialLoader>(
964    queue: &ReviewQueue,
965    loader: &L,
966    selection: &ReviewSelection,
967    context: &str,
968    runner: &R,
969    store: &LedgerStore,
970) -> Result<DrainReport, ReviewerError> {
971    let pending = queue.pending()?;
972    let run_store = ReviewRunStore::new(&queue.root);
973    let mut seen = std::collections::BTreeSet::new();
974    let mut order = Vec::new();
975    for item in &pending {
976        if seen.insert(item.commit_sha.clone()) {
977            order.push(item.clone());
978        } else if !item.run_id.trim().is_empty()
979            && let Ok(run) = run_store.read(&item.run_id)
980            && run.status == ReviewRunStatus::Queued
981        {
982            run_store.cancel_queued(&item.run_id)?;
983        }
984    }
985
986    let mut report = DrainReport::default();
987    for item in order {
988        let sha = item.commit_sha;
989        let run_id = if item.run_id.trim().is_empty() {
990            generate_run_id(&sha)
991        } else {
992            item.run_id
993        };
994        let run = run_store.ensure_queued(&run_id, &sha, "commit")?;
995        if run.status == ReviewRunStatus::Cancelled {
996            queue.remove_sha(&sha)?;
997            continue;
998        }
999        run_store.mark_running(&run_id, "reviewing")?;
1000        let (claim, diff) = loader.load(&sha)?;
1001        let prompt = first_pass_prompt(&claim, &diff, context);
1002        let job = ReviewJob {
1003            commit_sha: sha.clone(),
1004            claim,
1005            diff,
1006            context: context.to_owned(),
1007            request: selection.request_for(prompt),
1008            strict: selection.strict.clone(),
1009        };
1010        let execution = match execute_review_job(job, runner, store) {
1011            Ok(execution) => execution,
1012            Err(error) => {
1013                let _ = run_store.mark_failed(&run_id, error.to_string());
1014                return Err(error);
1015            }
1016        };
1017        report.ledger_entries += execution.entries.len();
1018        run_store.mark_completed(&run_id, execution.entries.len())?;
1019        queue.remove_sha(&sha)?;
1020        report.reviewed.push(sha);
1021    }
1022
1023    Ok(report)
1024}
1025
1026/// Build the ground-truth + trajectory context block for review prompts.
1027/// Best-effort: an unavailable repo or provider yields an empty block.
1028fn review_context(config: &config::TruthMirrorConfig) -> String {
1029    let repo_root = match git_output(["rev-parse", "--show-toplevel"]) {
1030        Ok(root) => PathBuf::from(root.trim()),
1031        Err(_) => return String::new(),
1032    };
1033    let provider = crate::context::trajectory_provider(&repo_root, &config.history);
1034    crate::context::build_review_context(
1035        &repo_root,
1036        &config.ground_truth,
1037        &config.history,
1038        Some(provider.as_ref()),
1039    )
1040    .unwrap_or_default()
1041}
1042
1043pub fn run_watch_command(
1044    args: cli::WatchArgs,
1045    state_dir: &Path,
1046    config: &config::TruthMirrorConfig,
1047) -> Result<ExitCode> {
1048    let selection = ReviewSelection::resolve(
1049        args.watched_agent,
1050        args.watched_model,
1051        args.reviewer_harness,
1052        args.reviewer_model,
1053        args.reviewer_effort,
1054        args.allow_same_model,
1055        config,
1056    )?;
1057    let queue = ReviewQueue::new(state_dir);
1058    let store = LedgerStore::new(state_dir);
1059    let loader = GitMaterialLoader::with_patterns(config.gates.to_policy().evidence_patterns);
1060    let runner = StdProcessRunner;
1061
1062    if args.once {
1063        let context = review_context(config);
1064        let report = drain_once(&queue, &loader, &selection, &context, &runner, &store)?;
1065        println!(
1066            "truth-mirror watch: reviewed {} commit(s), wrote {} ledger entrie(s)",
1067            report.reviewed.len(),
1068            report.ledger_entries
1069        );
1070        return Ok(ExitCode::SUCCESS);
1071    }
1072
1073    let interval = std::time::Duration::from_secs(args.poll_secs.max(1));
1074    loop {
1075        // Rebuild context each poll so ground truth and trajectory stay current.
1076        let context = review_context(config);
1077        let report = drain_once(&queue, &loader, &selection, &context, &runner, &store)?;
1078        if !report.reviewed.is_empty() {
1079            println!(
1080                "truth-mirror watch: reviewed {} commit(s)",
1081                report.reviewed.len()
1082            );
1083        }
1084        std::thread::sleep(interval);
1085    }
1086}
1087
1088#[derive(Clone, Copy, Debug, Eq, PartialEq)]
1089pub struct StrictGoalPolicy {
1090    pub stop_after_lies: u32,
1091    pub stop_after_fuckups: u32,
1092}
1093
1094#[derive(Clone, Copy, Debug, Eq, PartialEq)]
1095pub struct StrictGoalCounters {
1096    pub lies_exposed: u32,
1097    pub fuckups_registered: u32,
1098}
1099
1100#[derive(Clone, Copy, Debug, Eq, PartialEq)]
1101pub enum StrictGoalDecision {
1102    Continue,
1103    Stop { reason: StrictGoalStopReason },
1104}
1105
1106#[derive(Clone, Copy, Debug, Eq, PartialEq)]
1107pub enum StrictGoalStopReason {
1108    LiesExposed,
1109    FuckupsRegistered,
1110}
1111
1112impl StrictGoalPolicy {
1113    pub fn decide(&self, counters: StrictGoalCounters) -> StrictGoalDecision {
1114        if self.stop_after_lies > 0 && counters.lies_exposed >= self.stop_after_lies {
1115            return StrictGoalDecision::Stop {
1116                reason: StrictGoalStopReason::LiesExposed,
1117            };
1118        }
1119
1120        if self.stop_after_fuckups > 0 && counters.fuckups_registered >= self.stop_after_fuckups {
1121            return StrictGoalDecision::Stop {
1122                reason: StrictGoalStopReason::FuckupsRegistered,
1123            };
1124        }
1125
1126        StrictGoalDecision::Continue
1127    }
1128}
1129
1130#[derive(Clone, Debug, Eq, PartialEq)]
1131pub struct StrictGoalOutcome {
1132    pub passes: u32,
1133    pub counters: StrictGoalCounters,
1134    /// `None` means the loop stopped at the `max_passes` ceiling rather than
1135    /// hitting a configured lie/fuckup threshold.
1136    pub stop_reason: Option<StrictGoalStopReason>,
1137    pub entries: Vec<LedgerEntry>,
1138}
1139
1140impl StrictGoalOutcome {
1141    pub fn stop_reason_suffix(&self) -> &'static str {
1142        match self.stop_reason {
1143            Some(StrictGoalStopReason::LiesExposed) => " (stopped: lies exposed)",
1144            Some(StrictGoalStopReason::FuckupsRegistered) => " (stopped: fuckups registered)",
1145            None => " (stopped: max passes)",
1146        }
1147    }
1148}
1149
1150/// Sic the adversarial reviewer on a commit in a loop, accumulating exposed lies
1151/// (REJECT verdicts) and registered fuckups (individual findings). Every pass is
1152/// recorded in the ledger. The loop stops when `policy` says the configured `N`
1153/// is reached, or when `max_passes` is hit so an honest agent still terminates.
1154#[allow(clippy::too_many_arguments)]
1155pub fn run_strict_goal_loop<R: ProcessRunner>(
1156    commit_sha: &str,
1157    claim: &Claim,
1158    diff: &str,
1159    context: &str,
1160    selection: &ReviewSelection,
1161    policy: StrictGoalPolicy,
1162    max_passes: u32,
1163    runner: &R,
1164    store: &LedgerStore,
1165) -> Result<StrictGoalOutcome, ReviewerError> {
1166    let ceiling = max_passes.max(1);
1167    let mut outcome = StrictGoalOutcome {
1168        passes: 0,
1169        counters: StrictGoalCounters {
1170            lies_exposed: 0,
1171            fuckups_registered: 0,
1172        },
1173        stop_reason: None,
1174        entries: Vec::new(),
1175    };
1176
1177    while outcome.passes < ceiling {
1178        let prompt = strict_goal_prompt(claim, diff, context, outcome.passes + 1, &outcome.entries);
1179        let request = selection.request_for(prompt);
1180        let plan = ReviewPlan::build(request.clone())?;
1181        let output = plan.run_with(&request.prompt, runner)?;
1182        ensure_process_success(&output)?;
1183        let verdict = ParsedVerdict::parse(&output.stdout)?;
1184
1185        let job = ReviewJob {
1186            commit_sha: commit_sha.to_owned(),
1187            claim: claim.clone(),
1188            diff: diff.to_owned(),
1189            context: context.to_owned(),
1190            request,
1191            strict: None,
1192        };
1193        let entry = entry_from_verdict(&job, &plan, &verdict);
1194        store.append_entry(&entry)?;
1195        outcome.entries.push(entry);
1196
1197        outcome.passes += 1;
1198        if verdict.verdict == Verdict::Reject {
1199            outcome.counters.lies_exposed += 1;
1200        }
1201        outcome.counters.fuckups_registered = outcome
1202            .counters
1203            .fuckups_registered
1204            .saturating_add(u32::try_from(verdict.findings.len()).unwrap_or(u32::MAX));
1205
1206        if let StrictGoalDecision::Stop { reason } = policy.decide(outcome.counters) {
1207            outcome.stop_reason = Some(reason);
1208            break;
1209        }
1210    }
1211
1212    Ok(outcome)
1213}
1214
1215fn strict_goal_prompt(
1216    claim: &Claim,
1217    diff: &str,
1218    context: &str,
1219    pass: u32,
1220    prior: &[LedgerEntry],
1221) -> String {
1222    let prior_findings: Vec<String> = prior
1223        .iter()
1224        .flat_map(|entry| entry.findings.clone())
1225        .collect();
1226    let prior_block = if prior_findings.is_empty() {
1227        "(none)".to_owned()
1228    } else {
1229        prior_findings.join("\n")
1230    };
1231    format!(
1232        "{ADVERSARIAL_PREAMBLE}\n\nStrict-goal loop, pass {pass}. Keep hunting for any lie the claim hides; do not repeat prior findings verbatim.{}\n\nCLAIM:\n{}\n\nPRIOR FINDINGS:\n{prior_block}\n\nDIFF:\n{}",
1233        context_block(context),
1234        claim.to_line(),
1235        diff
1236    )
1237}
1238
1239pub fn run_review_command(
1240    args: cli::ReviewArgs,
1241    state_dir: &Path,
1242    config: &config::TruthMirrorConfig,
1243) -> Result<ExitCode> {
1244    if let Some(command) = args.command {
1245        return run_review_run_command(command, state_dir);
1246    }
1247
1248    let material = ReviewMaterial::load(
1249        &args,
1250        state_dir,
1251        &config.gates.to_policy().evidence_patterns,
1252    )?;
1253
1254    let mut selection = ReviewSelection::resolve(
1255        args.watched_agent,
1256        args.watched_model,
1257        args.reviewer_harness,
1258        args.reviewer_model,
1259        args.reviewer_effort,
1260        args.allow_same_model,
1261        config,
1262    )?;
1263
1264    if args.strict_two_pass {
1265        selection.strict = Some(ReviewSelection::resolve_arbiter(
1266            selection.watched_agent,
1267            args.arbiter_harness,
1268            args.arbiter_model,
1269            args.arbiter_effort,
1270            config,
1271        )?);
1272    }
1273    let store = LedgerStore::new(state_dir);
1274    let run_store = ReviewRunStore::new(state_dir);
1275    let context = review_context(config);
1276    let run = run_store.create_queued(&material.commit_sha, material.target_label.clone())?;
1277    run_store.mark_running(&run.id, "reviewing")?;
1278
1279    if args.strict_goal {
1280        let policy = config
1281            .strict
1282            .goal_policy(args.stop_after_lies, args.stop_after_fuckups);
1283        let max_passes = args.max_passes.unwrap_or(config.strict.max_passes);
1284        let outcome = match run_strict_goal_loop(
1285            &material.commit_sha,
1286            &material.claim,
1287            &material.diff,
1288            &context,
1289            &selection,
1290            policy,
1291            max_passes,
1292            &StdProcessRunner,
1293            &store,
1294        ) {
1295            Ok(outcome) => outcome,
1296            Err(error) => {
1297                let _ = run_store.mark_failed(&run.id, error.to_string());
1298                return Err(error.into());
1299            }
1300        };
1301        run_store.mark_completed(&run.id, outcome.entries.len())?;
1302        println!(
1303            "truth-mirror strict-goal: run {}, {} pass(es), {} lie(s), {} fuckup(s){}",
1304            run.id,
1305            outcome.passes,
1306            outcome.counters.lies_exposed,
1307            outcome.counters.fuckups_registered,
1308            outcome.stop_reason_suffix(),
1309        );
1310        return Ok(ExitCode::SUCCESS);
1311    }
1312
1313    let prompt = first_pass_prompt(&material.claim, &material.diff, &context);
1314    let job = ReviewJob {
1315        commit_sha: material.commit_sha,
1316        claim: material.claim,
1317        diff: material.diff,
1318        context,
1319        request: selection.request_for(prompt),
1320        strict: selection.strict.clone(),
1321    };
1322
1323    let execution = match execute_review_job(job, &StdProcessRunner, &store) {
1324        Ok(execution) => execution,
1325        Err(error) => {
1326            let _ = run_store.mark_failed(&run.id, error.to_string());
1327            return Err(error.into());
1328        }
1329    };
1330    run_store.mark_completed(&run.id, execution.entries.len())?;
1331    println!(
1332        "truth-mirror review: run {}, wrote {} ledger entrie(s)",
1333        run.id,
1334        execution.entries.len()
1335    );
1336    Ok(ExitCode::SUCCESS)
1337}
1338
1339fn run_review_run_command(command: cli::ReviewCommand, state_dir: &Path) -> Result<ExitCode> {
1340    let runs = ReviewRunStore::new(state_dir);
1341    match command {
1342        cli::ReviewCommand::Status { run_id } => {
1343            if let Some(run_id) = run_id {
1344                print_run(&runs.read(&run_id)?);
1345            } else {
1346                let all = runs.list()?;
1347                if all.is_empty() {
1348                    println!("No review runs.");
1349                } else {
1350                    for run in all {
1351                        print_run_summary(&run);
1352                    }
1353                }
1354            }
1355        }
1356        cli::ReviewCommand::Result { run_id } => {
1357            let run = match run_id {
1358                Some(run_id) => runs.read(&run_id)?,
1359                None => runs.latest_result()?,
1360            };
1361            print_run(&run);
1362            print_run_ledger_entries(state_dir, &run)?;
1363        }
1364        cli::ReviewCommand::Cancel { run_id } => {
1365            let run = runs.cancel_queued(&run_id)?;
1366            ReviewQueue::new(state_dir).remove_run_id(&run_id)?;
1367            println!("cancelled review run {} ({})", run.id, run.commit_sha);
1368        }
1369    }
1370    Ok(ExitCode::SUCCESS)
1371}
1372
1373fn print_run_summary(run: &ReviewRun) {
1374    println!(
1375        "{} {} {} {} entries={} updated={}",
1376        run.id, run.status, run.commit_sha, run.phase, run.ledger_entries, run.updated_at_unix
1377    );
1378}
1379
1380fn print_run(run: &ReviewRun) {
1381    println!("run: {}", run.id);
1382    println!("status: {}", run.status);
1383    println!("commit: {}", run.commit_sha);
1384    println!("target: {}", run.target);
1385    println!("phase: {}", run.phase);
1386    println!("ledger_entries: {}", run.ledger_entries);
1387    println!("created_at_unix: {}", run.created_at_unix);
1388    println!("updated_at_unix: {}", run.updated_at_unix);
1389    if let Some(started) = run.started_at_unix {
1390        println!("started_at_unix: {started}");
1391    }
1392    if let Some(completed) = run.completed_at_unix {
1393        println!("completed_at_unix: {completed}");
1394    }
1395    if let Some(error) = &run.error {
1396        println!("error: {error}");
1397    }
1398}
1399
1400fn print_run_ledger_entries(state_dir: &Path, run: &ReviewRun) -> Result<(), ReviewerError> {
1401    let store = LedgerStore::new(state_dir);
1402    let entries: Vec<LedgerEntry> = store
1403        .read_history()?
1404        .into_iter()
1405        .filter(|entry| entry.commit_sha == run.commit_sha)
1406        .collect();
1407    if entries.is_empty() {
1408        println!("ledger_entries: none");
1409        return Ok(());
1410    }
1411    println!("ledger_entries:");
1412    for entry in entries {
1413        println!(
1414            "- {} {} {} findings={}",
1415            entry.commit_sha,
1416            entry.verdict,
1417            entry.disposition,
1418            entry.findings.len()
1419        );
1420    }
1421    Ok(())
1422}
1423
1424#[derive(Clone, Debug, Eq, PartialEq)]
1425struct ReviewMaterial {
1426    commit_sha: String,
1427    target_label: String,
1428    claim: Claim,
1429    diff: String,
1430}
1431
1432impl ReviewMaterial {
1433    fn load(
1434        args: &cli::ReviewArgs,
1435        state_dir: &Path,
1436        evidence_patterns: &[String],
1437    ) -> Result<Self, ReviewerError> {
1438        let parse = |text: &str| {
1439            if evidence_patterns.is_empty() {
1440                Claim::parse(text)
1441            } else {
1442                Claim::parse_with(text, evidence_patterns)
1443            }
1444        };
1445
1446        let scope = if args.staged {
1447            ReviewScope::Staged
1448        } else {
1449            args.scope
1450        };
1451
1452        match scope {
1453            ReviewScope::Commit => {
1454                let sha = args
1455                    .target
1456                    .clone()
1457                    .ok_or(ReviewerError::MissingReviewTarget)?;
1458                let message = git_output(["show", "--format=%B", "--no-patch", sha.as_str()])?;
1459                let diff = git_output(["show", "--format=", "--patch", sha.as_str()])?;
1460                let claim = parse(&message)?;
1461                Ok(Self {
1462                    commit_sha: sha.clone(),
1463                    target_label: format!("commit:{sha}"),
1464                    claim,
1465                    diff,
1466                })
1467            }
1468            ReviewScope::Staged => Self::load_staged(state_dir, &parse),
1469            ReviewScope::Auto => {
1470                reject_target_with_scope(args)?;
1471                if working_tree_dirty()? {
1472                    Self::load_working_tree(state_dir, &parse)
1473                } else {
1474                    Self::load_branch(args.base.as_deref(), &parse)
1475                }
1476            }
1477            ReviewScope::WorkingTree => {
1478                reject_target_with_scope(args)?;
1479                Self::load_working_tree(state_dir, &parse)
1480            }
1481            ReviewScope::Branch => {
1482                reject_target_with_scope(args)?;
1483                Self::load_branch(args.base.as_deref(), &parse)
1484            }
1485        }
1486    }
1487
1488    fn load_staged<F>(state_dir: &Path, parse: &F) -> Result<Self, ReviewerError>
1489    where
1490        F: Fn(&str) -> Result<Claim, crate::claim::ClaimError>,
1491    {
1492        let raw = git_output(["diff", "--cached"])?;
1493        let files = git_output(["diff", "--cached", "--name-only"])?;
1494        let diff = materialize_diff("staged", &raw, &files);
1495        let claim = parse(&read_claim_file(state_dir)?)?;
1496        Ok(Self {
1497            commit_sha: "STAGED".to_owned(),
1498            target_label: "staged".to_owned(),
1499            claim,
1500            diff,
1501        })
1502    }
1503
1504    fn load_working_tree<F>(state_dir: &Path, parse: &F) -> Result<Self, ReviewerError>
1505    where
1506        F: Fn(&str) -> Result<Claim, crate::claim::ClaimError>,
1507    {
1508        let status = git_output(["status", "--porcelain"])?;
1509        let tracked = git_output(["diff", "HEAD", "--patch"])?;
1510        let files = git_output(["diff", "HEAD", "--name-only"])?;
1511        let untracked = untracked_file_context()?;
1512        let raw = format!(
1513            "WORKING TREE STATUS:\n{status}\n\nTRACKED DIFF AGAINST HEAD:\n{tracked}\n\nUNTRACKED FILES:\n{untracked}"
1514        );
1515        let diff = materialize_diff("working-tree", &raw, &files);
1516        let claim = parse(&read_claim_file(state_dir)?)?;
1517        Ok(Self {
1518            commit_sha: "WORKING_TREE".to_owned(),
1519            target_label: "working-tree".to_owned(),
1520            claim,
1521            diff,
1522        })
1523    }
1524
1525    fn load_branch<F>(base: Option<&str>, parse: &F) -> Result<Self, ReviewerError>
1526    where
1527        F: Fn(&str) -> Result<Claim, crate::claim::ClaimError>,
1528    {
1529        let base = match base {
1530            Some(base) => base.to_owned(),
1531            None => default_branch_ref()?,
1532        };
1533        let merge_base = git_output_slice(&["merge-base", "HEAD", &base])?;
1534        let merge_base = merge_base.trim().to_owned();
1535        let range = format!("{merge_base}..HEAD");
1536        let message = git_output(["show", "--format=%B", "--no-patch", "HEAD"])?;
1537        let log = git_output_slice(&["log", "--oneline", &range])?;
1538        let stat = git_output_slice(&["diff", "--stat", &range])?;
1539        let raw_patch = git_output_slice(&["diff", "--patch", &range])?;
1540        let files = git_output_slice(&["diff", "--name-only", &range])?;
1541        let raw = format!(
1542            "BRANCH BASE: {base}\nMERGE BASE: {merge_base}\nCOMMITS:\n{log}\n\nDIFF STAT:\n{stat}\n\nDIFF:\n{raw_patch}"
1543        );
1544        let diff = materialize_diff(&format!("branch:{base}"), &raw, &files);
1545        let claim = parse(&message)?;
1546        Ok(Self {
1547            commit_sha: "HEAD".to_owned(),
1548            target_label: format!("branch:{base}"),
1549            claim,
1550            diff,
1551        })
1552    }
1553}
1554
1555fn reject_target_with_scope(args: &cli::ReviewArgs) -> Result<(), ReviewerError> {
1556    if let Some(target) = &args.target {
1557        return Err(ReviewerError::UnexpectedReviewTarget {
1558            scope: args.scope,
1559            target: target.clone(),
1560        });
1561    }
1562    Ok(())
1563}
1564
1565fn read_claim_file(state_dir: &Path) -> Result<String, ReviewerError> {
1566    let claim_path = state_dir.join("claim.txt");
1567    fs::read_to_string(&claim_path).map_err(|source| ReviewerError::ClaimFileRead {
1568        path: claim_path,
1569        source,
1570    })
1571}
1572
1573fn working_tree_dirty() -> Result<bool, ReviewerError> {
1574    Ok(!git_output(["status", "--porcelain"])?.trim().is_empty())
1575}
1576
1577fn default_branch_ref() -> Result<String, ReviewerError> {
1578    if let Ok(symbolic) = git_output([
1579        "symbolic-ref",
1580        "--quiet",
1581        "--short",
1582        "refs/remotes/origin/HEAD",
1583    ]) {
1584        let trimmed = symbolic.trim();
1585        if !trimmed.is_empty() {
1586            return Ok(trimmed.to_owned());
1587        }
1588    }
1589
1590    for candidate in [
1591        "origin/main",
1592        "origin/master",
1593        "origin/trunk",
1594        "main",
1595        "master",
1596        "trunk",
1597    ] {
1598        if git_output_slice(&["rev-parse", "--verify", "--quiet", candidate]).is_ok() {
1599            return Ok(candidate.to_owned());
1600        }
1601    }
1602
1603    Err(ReviewerError::DefaultBranchNotFound)
1604}
1605
1606fn materialize_diff(label: &str, raw: &str, files: &str) -> String {
1607    let file_list: Vec<&str> = files
1608        .lines()
1609        .filter(|line| !line.trim().is_empty())
1610        .collect();
1611    let bytes = raw.len();
1612    if bytes <= MAX_INLINE_DIFF_BYTES && file_list.len() <= MAX_INLINE_DIFF_FILES {
1613        return raw.to_owned();
1614    }
1615
1616    format!(
1617        "Diff for {label} is too large to inline safely.\ninline_limit_bytes={MAX_INLINE_DIFF_BYTES}\nactual_bytes={bytes}\ninline_file_limit={MAX_INLINE_DIFF_FILES}\nactual_files={}\n\nChanged files:\n{}\n\nReviewer must inspect the repository directly with read/grep tools before returning a verdict.",
1618        file_list.len(),
1619        if file_list.is_empty() {
1620            "(none)".to_owned()
1621        } else {
1622            file_list.join("\n")
1623        }
1624    )
1625}
1626
1627fn untracked_file_context() -> Result<String, ReviewerError> {
1628    let files = git_output(["ls-files", "--others", "--exclude-standard"])?;
1629    let mut output = String::new();
1630    for file in files.lines().filter(|line| !line.trim().is_empty()) {
1631        let path = Path::new(file);
1632        let metadata = match fs::metadata(path) {
1633            Ok(metadata) => metadata,
1634            Err(_) => continue,
1635        };
1636        if !metadata.is_file() {
1637            continue;
1638        }
1639        if metadata.len() > MAX_UNTRACKED_FILE_BYTES {
1640            output.push_str(&format!(
1641                "\n--- {file} omitted: {} bytes exceeds {MAX_UNTRACKED_FILE_BYTES} byte inline limit ---\n",
1642                metadata.len()
1643            ));
1644            continue;
1645        }
1646        let bytes = match fs::read(path) {
1647            Ok(bytes) => bytes,
1648            Err(_) => continue,
1649        };
1650        if bytes.contains(&0) {
1651            output.push_str(&format!("\n--- {file} omitted: binary file ---\n"));
1652            continue;
1653        }
1654        output.push_str(&format!(
1655            "\n--- {file} ---\n{}",
1656            String::from_utf8_lossy(&bytes)
1657        ));
1658    }
1659
1660    if output.is_empty() {
1661        Ok("(none)".to_owned())
1662    } else {
1663        Ok(output)
1664    }
1665}
1666
1667#[derive(Debug, Error)]
1668pub enum ReviewerError {
1669    #[error("missing {role} model")]
1670    MissingModel { role: String },
1671    #[error(
1672        "same reviewer model is disallowed without --allow-same-model: watched={watched_model}, reviewer={reviewer_model}"
1673    )]
1674    SameModelWithoutWaiver {
1675        watched_model: String,
1676        reviewer_model: String,
1677    },
1678    #[error("strict arbiter model must differ from watched and first reviewer models")]
1679    StrictArbiterModelNotDistinct,
1680    #[error("no adversarial pair configured for writer harness {writer:?}")]
1681    NoPairForWriter { writer: String },
1682    #[error(
1683        "strict review requires an arbiter (pair.arbiter or --arbiter-harness/--arbiter-model)"
1684    )]
1685    MissingArbiter,
1686    #[error(
1687        "--{role}-harness={harness:?} was overridden without a matching --{role}-model; the pair's model is for a different harness"
1688    )]
1689    OverrideNeedsModel { role: String, harness: String },
1690    #[error("custom reviewer harness requires explicit command configuration")]
1691    UnsupportedCustomHarness,
1692    #[error("unknown watched agent {value:?}")]
1693    UnknownAgent { value: String },
1694    #[error("unknown reviewer harness {value:?}")]
1695    UnknownHarness { value: String },
1696    #[error("missing review target")]
1697    MissingReviewTarget,
1698    #[error("--scope={scope:?} does not accept positional target {target:?}")]
1699    UnexpectedReviewTarget { scope: ReviewScope, target: String },
1700    #[error("could not determine default branch; pass --base explicitly")]
1701    DefaultBranchNotFound,
1702    #[error("failed to read staged claim file {path}: {source}")]
1703    ClaimFileRead {
1704        path: PathBuf,
1705        #[source]
1706        source: io::Error,
1707    },
1708    #[error("reviewer output was not valid structured JSON verdict: {source}: {output:?}")]
1709    VerdictJson {
1710        source: serde_json::Error,
1711        output: String,
1712    },
1713    #[error("reviewer structured verdict violated schema: {message}")]
1714    VerdictSchema { message: String },
1715    #[error("reviewer process exited with status {status:?}: {stderr}")]
1716    ReviewerProcessFailed { status: Option<i32>, stderr: String },
1717    #[error("git command failed: git {args:?}: {stderr}")]
1718    GitFailed { args: Vec<String>, stderr: String },
1719    #[error("failed to spawn git command: {0}")]
1720    GitSpawn(io::Error),
1721    #[error("failed to spawn reviewer process: {0}")]
1722    Spawn(io::Error),
1723    #[error("failed to open reviewer stdin pipe")]
1724    MissingStdinPipe,
1725    #[error("failed to write reviewer prompt: {0}")]
1726    WritePrompt(io::Error),
1727    #[error("failed to wait for reviewer process: {0}")]
1728    Wait(io::Error),
1729    #[error("review queue IO failed: {0}")]
1730    QueueIo(io::Error),
1731    #[error("review queue JSON failed: {0}")]
1732    QueueJson(serde_json::Error),
1733    #[error("review run IO failed: {0}")]
1734    RunIo(io::Error),
1735    #[error("review run JSON failed: {0}")]
1736    RunJson(serde_json::Error),
1737    #[error("review run not found: {id}")]
1738    ReviewRunNotFound { id: String },
1739    #[error("no review runs found")]
1740    NoReviewRuns,
1741    #[error(
1742        "cannot cancel review run {id} with status {status}; only queued runs can be cancelled"
1743    )]
1744    CannotCancelReview { id: String, status: ReviewRunStatus },
1745    #[error(transparent)]
1746    Claim(#[from] crate::claim::ClaimError),
1747    #[error(transparent)]
1748    Ledger(#[from] crate::ledger::LedgerError),
1749}
1750
1751const ADVERSARIAL_PREAMBLE: &str = r#"You are an ADVERSARIAL reviewer. Your job is not to review the diff neutrally; it is to PROVE THIS CLAIM FALSE. Assume the author over-rates their own work. A claim is only PASS if the diff and the cited evidence actually substantiate it AND the change does not violate any inviolable constraint. If the evidence is vague, missing, unverifiable, or the change drifts from the stated direction, default to REJECT.
1752
1753Attack the change for auth and permission holes, data loss, rollback gaps, races, stale state, version skew, observability gaps, missing evidence, fake evidence, broad matchers, gates that fail open, and code that only fixes the instance instead of the defect class.
1754
1755GREP THE CLASS, NOT THE INSTANCE. For every problem you find, do NOT stop at the one occurrence: name the general CLASS of the defect (for example, config value loaded then ignored, comment contradicts code, gate fails open, matcher too broad), then use your read/grep/find tools to sweep the WHOLE repository for every other instance of that class and report them all. One instance is a symptom; the class is the bug. Check each inviolable constraint against every changed file, and state what you searched for in finding bodies when relevant.
1756
1757Return valid JSON only. Do not wrap it in Markdown. The schema is:
1758{
1759  "verdict": "PASS" | "REJECT",
1760  "summary": "one concise sentence explaining why the claim passes or fails",
1761  "findings": [
1762    {
1763      "severity": "critical" | "high" | "medium" | "low",
1764      "title": "short defect title",
1765      "body": "what can go wrong, why this code is vulnerable, and what evidence proves it",
1766      "file": "repo-relative file path",
1767      "line_start": 1,
1768      "line_end": 1,
1769      "confidence": 0,
1770      "recommendation": "concrete change required"
1771    }
1772  ],
1773  "next_steps": ["short concrete follow-up commands or edits"]
1774}
1775
1776Use "PASS" only when there are no findings. Use "REJECT" when there is at least one material finding."#;
1777
1778fn context_block(context: &str) -> String {
1779    if context.trim().is_empty() {
1780        String::new()
1781    } else {
1782        format!("\n\n{context}")
1783    }
1784}
1785
1786fn first_pass_prompt(claim: &Claim, diff: &str, context: &str) -> String {
1787    format!(
1788        "{ADVERSARIAL_PREAMBLE}{}\n\nCLAIM:\n{}\n\nDIFF:\n{}",
1789        context_block(context),
1790        claim.to_line(),
1791        diff
1792    )
1793}
1794
1795fn strict_second_pass_prompt(job: &ReviewJob, first_output: &str) -> String {
1796    format!(
1797        "{ADVERSARIAL_PREAMBLE}\n\nStrict second pass (COMPLETENESS CRITIC): the first reviewer returned a CLEAN verdict. Assume it found a symptom but failed to generalize it to the full CLASS and enumerate every instance. Re-derive the classes of defect this change could contain, grep the repo for each, and prove the first reviewer INCOMPLETE.{}\n\nCLAIM:\n{}\n\nFIRST REVIEW:\n{}\n\nDIFF:\n{}",
1798        context_block(&job.context),
1799        job.claim.to_line(),
1800        first_output,
1801        job.diff
1802    )
1803}
1804
1805fn entry_from_verdict(job: &ReviewJob, plan: &ReviewPlan, verdict: &ParsedVerdict) -> LedgerEntry {
1806    LedgerEntry::new(
1807        job.commit_sha.clone(),
1808        verdict.verdict,
1809        job.claim.to_line(),
1810        job.claim
1811            .evidence
1812            .iter()
1813            .map(EvidenceRef::as_str)
1814            .map(str::to_owned)
1815            .collect(),
1816        plan.reviewer_config(),
1817        verdict.findings.clone(),
1818    )
1819    .with_structured_review(
1820        verdict.summary.clone(),
1821        verdict.structured_findings.clone(),
1822        verdict.next_steps.clone(),
1823        verdict.raw.clone(),
1824    )
1825}
1826
1827fn ensure_process_success(output: &ProcessOutput) -> Result<(), ReviewerError> {
1828    if output.status_code == Some(0) {
1829        return Ok(());
1830    }
1831
1832    Err(ReviewerError::ReviewerProcessFailed {
1833        status: output.status_code,
1834        stderr: output.stderr.clone(),
1835    })
1836}
1837
1838fn validate_strict_arbiter(
1839    request: &ReviewRequest,
1840    strict: &StrictReviewConfig,
1841) -> Result<(), ReviewerError> {
1842    let arbiter = normalized_model(&strict.arbiter_model);
1843    if arbiter == normalized_model(&request.watched_model)
1844        || arbiter == normalized_model(&request.reviewer_model)
1845    {
1846        return Err(ReviewerError::StrictArbiterModelNotDistinct);
1847    }
1848    Ok(())
1849}
1850
1851fn validate_model_present(role: &str, model: &str) -> Result<(), ReviewerError> {
1852    if model.trim().is_empty() {
1853        return Err(ReviewerError::MissingModel {
1854            role: role.to_owned(),
1855        });
1856    }
1857    Ok(())
1858}
1859
1860fn git_output<const N: usize>(args: [&str; N]) -> Result<String, ReviewerError> {
1861    git_output_slice(&args)
1862}
1863
1864fn git_output_slice(args: &[&str]) -> Result<String, ReviewerError> {
1865    let output = Command::new("git")
1866        .args(args)
1867        .output()
1868        .map_err(ReviewerError::GitSpawn)?;
1869    if !output.status.success() {
1870        return Err(ReviewerError::GitFailed {
1871            args: args.iter().map(|arg| (*arg).to_owned()).collect(),
1872            stderr: String::from_utf8_lossy(&output.stderr).into_owned(),
1873        });
1874    }
1875
1876    Ok(String::from_utf8_lossy(&output.stdout).into_owned())
1877}
1878
1879fn agent_from_slug(value: &str) -> Result<Agent, ReviewerError> {
1880    match value.trim().to_ascii_lowercase().as_str() {
1881        "claude" => Ok(Agent::Claude),
1882        "codex" => Ok(Agent::Codex),
1883        "pi" => Ok(Agent::Pi),
1884        _ => Err(ReviewerError::UnknownAgent {
1885            value: value.to_owned(),
1886        }),
1887    }
1888}
1889
1890fn harness_from_slug(value: &str) -> Result<ReviewerHarness, ReviewerError> {
1891    match value.trim().to_ascii_lowercase().as_str() {
1892        "claude" => Ok(ReviewerHarness::Claude),
1893        "codex" => Ok(ReviewerHarness::Codex),
1894        "pi" => Ok(ReviewerHarness::Pi),
1895        "gemini" => Ok(ReviewerHarness::Gemini),
1896        "opencode" => Ok(ReviewerHarness::Opencode),
1897        "custom" => Ok(ReviewerHarness::Custom),
1898        _ => Err(ReviewerError::UnknownHarness {
1899            value: value.to_owned(),
1900        }),
1901    }
1902}
1903
1904fn harness_slug(harness: ReviewerHarness) -> &'static str {
1905    match harness {
1906        ReviewerHarness::Claude => "claude",
1907        ReviewerHarness::Codex => "codex",
1908        ReviewerHarness::Pi => "pi",
1909        ReviewerHarness::Gemini => "gemini",
1910        ReviewerHarness::Opencode => "opencode",
1911        ReviewerHarness::Custom => "custom",
1912    }
1913}
1914
1915fn normalized_model(model: &str) -> String {
1916    model.trim().to_ascii_lowercase()
1917}
1918
1919fn unix_now() -> u64 {
1920    SystemTime::now()
1921        .duration_since(UNIX_EPOCH)
1922        .map_or(0, |duration| duration.as_secs())
1923}
1924
1925fn generate_run_id(commit_sha: &str) -> String {
1926    let nanos = SystemTime::now()
1927        .duration_since(UNIX_EPOCH)
1928        .map_or(0, |duration| duration.as_nanos());
1929    let short_sha: String = commit_sha
1930        .chars()
1931        .filter(|character| character.is_ascii_alphanumeric())
1932        .take(12)
1933        .collect();
1934    if short_sha.is_empty() {
1935        format!("{nanos}-{}", std::process::id())
1936    } else {
1937        format!("{nanos}-{}-{short_sha}", std::process::id())
1938    }
1939}
1940
1941#[cfg(test)]
1942mod tests {
1943    use std::{cell::RefCell, collections::VecDeque};
1944
1945    use proptest::prelude::*;
1946
1947    use super::{
1948        InvocationPlan, MaterialLoader, ParsedVerdict, ProcessOutput, ProcessRunner,
1949        PromptDelivery, ReviewJob, ReviewPlan, ReviewQueue, ReviewRequest, ReviewRunStatus,
1950        ReviewRunStore, ReviewSelection, ReviewerError, StrictGoalCounters, StrictGoalDecision,
1951        StrictGoalPolicy, StrictGoalStopReason, StrictReviewConfig, drain_once, execute_review_job,
1952        run_review_run_command, run_strict_goal_loop,
1953    };
1954    use crate::{
1955        claim::{Claim, EvidenceRef},
1956        cli::{Agent, ReviewerHarness},
1957        config::Effort,
1958        ledger::{LedgerStore, Verdict},
1959    };
1960
1961    fn pass_json() -> String {
1962        serde_json::json!({
1963            "verdict": "PASS",
1964            "summary": "The claim is substantiated by the diff and evidence.",
1965            "findings": [],
1966            "next_steps": []
1967        })
1968        .to_string()
1969    }
1970
1971    fn reject_json(title: &str) -> String {
1972        serde_json::json!({
1973            "verdict": "REJECT",
1974            "summary": "The claim is not substantiated.",
1975            "findings": [{
1976                "severity": "high",
1977                "title": title,
1978                "body": "The cited evidence does not prove the claimed behavior.",
1979                "file": "src/lib.rs",
1980                "line_start": 1,
1981                "line_end": 1,
1982                "confidence": 95,
1983                "recommendation": "Provide executable evidence that proves the claim."
1984            }],
1985            "next_steps": ["Run the relevant verification command."]
1986        })
1987        .to_string()
1988    }
1989
1990    #[test]
1991    fn same_harness_different_model_is_valid() {
1992        let request = ReviewRequest::new(
1993            Agent::Codex,
1994            "gpt-5.4",
1995            ReviewerHarness::Codex,
1996            "gpt-5.5",
1997            false,
1998            "review this",
1999        );
2000
2001        let plan = ReviewPlan::build(request).unwrap();
2002
2003        assert_eq!(plan.watched_agent, Agent::Codex);
2004        assert_eq!(plan.reviewer_harness, ReviewerHarness::Codex);
2005        assert_eq!(plan.invocation.program, "codex");
2006    }
2007
2008    #[test]
2009    fn same_model_is_blocked_by_default() {
2010        let request = ReviewRequest::new(
2011            Agent::Codex,
2012            " GPT-5.5 ",
2013            ReviewerHarness::Claude,
2014            "gpt-5.5",
2015            false,
2016            "review this",
2017        );
2018
2019        let error = ReviewPlan::build(request).unwrap_err();
2020
2021        assert!(matches!(
2022            error,
2023            ReviewerError::SameModelWithoutWaiver { .. }
2024        ));
2025    }
2026
2027    #[test]
2028    fn allow_same_model_override_is_deliberate() {
2029        let request = ReviewRequest::new(
2030            Agent::Codex,
2031            "gpt-5.5",
2032            ReviewerHarness::Codex,
2033            "gpt-5.5",
2034            true,
2035            "review this",
2036        );
2037
2038        let plan = ReviewPlan::build(request).unwrap();
2039
2040        assert!(plan.allow_same_model);
2041        assert_eq!(plan.reviewer_model, "gpt-5.5");
2042    }
2043
2044    #[test]
2045    fn provider_mapping_uses_verified_prompt_shapes_and_effort() {
2046        let codex =
2047            InvocationPlan::for_harness(ReviewerHarness::Codex, "gpt-5.5", Effort::Xhigh).unwrap();
2048        assert_eq!(codex.program, "codex");
2049        assert_eq!(
2050            codex.args_for_prompt("prompt"),
2051            [
2052                "exec",
2053                "-m",
2054                "gpt-5.5",
2055                "-c",
2056                "model_reasoning_effort=xhigh",
2057                "prompt"
2058            ]
2059        );
2060
2061        let claude =
2062            InvocationPlan::for_harness(ReviewerHarness::Claude, "opus", Effort::High).unwrap();
2063        assert_eq!(claude.program, "claude");
2064        assert_eq!(claude.prompt_delivery, PromptDelivery::Stdin);
2065        assert_eq!(
2066            claude.args_for_prompt("prompt"),
2067            ["--print", "--model", "opus", "--effort", "high"]
2068        );
2069
2070        let gemini =
2071            InvocationPlan::for_harness(ReviewerHarness::Gemini, "gemini-pro", Effort::Xhigh)
2072                .unwrap();
2073        assert_eq!(
2074            gemini.args_for_prompt("prompt"),
2075            ["-m", "gemini-pro", "-p", "prompt"]
2076        );
2077
2078        let pi = InvocationPlan::for_harness(ReviewerHarness::Pi, "openai/gpt-5.5", Effort::Xhigh)
2079            .unwrap();
2080        assert_eq!(pi.prompt_delivery, PromptDelivery::Stdin);
2081        assert_eq!(
2082            pi.args_for_prompt("prompt"),
2083            [
2084                "--model",
2085                "openai/gpt-5.5",
2086                "--thinking",
2087                "xhigh",
2088                "--tools",
2089                "read,grep,find,ls",
2090                "-p"
2091            ]
2092        );
2093    }
2094
2095    #[test]
2096    fn custom_harness_requires_explicit_configuration() {
2097        let error = InvocationPlan::for_harness(ReviewerHarness::Custom, "model", Effort::Xhigh)
2098            .unwrap_err();
2099
2100        assert!(matches!(error, ReviewerError::UnsupportedCustomHarness));
2101    }
2102
2103    #[test]
2104    fn effort_maps_to_each_harness_flag() {
2105        for effort in [
2106            Effort::Minimal,
2107            Effort::Low,
2108            Effort::Medium,
2109            Effort::High,
2110            Effort::Xhigh,
2111        ] {
2112            let e = effort.as_str();
2113
2114            let codex = InvocationPlan::for_harness(ReviewerHarness::Codex, "m", effort).unwrap();
2115            assert!(codex.args.contains(&format!("model_reasoning_effort={e}")));
2116
2117            let claude = InvocationPlan::for_harness(ReviewerHarness::Claude, "m", effort).unwrap();
2118            let claude_idx = claude.args.iter().position(|a| a == "--effort").unwrap();
2119            // Claude has no `minimal`; it clamps to a valid level (`low`).
2120            assert_eq!(claude.args[claude_idx + 1], effort.claude_value());
2121            assert_ne!(claude.args[claude_idx + 1], "minimal");
2122
2123            let pi = InvocationPlan::for_harness(ReviewerHarness::Pi, "m", effort).unwrap();
2124            let pi_idx = pi.args.iter().position(|a| a == "--thinking").unwrap();
2125            assert_eq!(pi.args[pi_idx + 1], e);
2126        }
2127    }
2128
2129    #[test]
2130    fn resolve_picks_configured_reviewer_for_every_writer() {
2131        let config = crate::config::TruthMirrorConfig::default();
2132
2133        let cases = [
2134            (Agent::Codex, ReviewerHarness::Claude, "claude-opus-4-8"),
2135            (Agent::Claude, ReviewerHarness::Codex, "gpt-5.5"),
2136            (Agent::Pi, ReviewerHarness::Codex, "gpt-5.5"),
2137        ];
2138
2139        for (writer, reviewer_harness, reviewer_model) in cases {
2140            let selection =
2141                ReviewSelection::resolve(Some(writer), None, None, None, None, false, &config)
2142                    .unwrap();
2143
2144            assert_eq!(selection.reviewer_harness, reviewer_harness);
2145            assert_eq!(selection.reviewer_model, reviewer_model);
2146            assert_eq!(selection.reviewer_effort, Effort::Xhigh);
2147        }
2148    }
2149
2150    #[test]
2151    fn overriding_reviewer_harness_without_model_is_rejected() {
2152        // codex's default pair reviewer is claude; overriding harness to pi with no
2153        // model would pair the pi harness with a claude model string.
2154        let config = crate::config::TruthMirrorConfig::default();
2155        let error = ReviewSelection::resolve(
2156            Some(Agent::Codex),
2157            None,
2158            Some(ReviewerHarness::Pi),
2159            None,
2160            None,
2161            false,
2162            &config,
2163        )
2164        .unwrap_err();
2165
2166        assert!(matches!(error, ReviewerError::OverrideNeedsModel { .. }));
2167    }
2168
2169    #[test]
2170    fn overriding_reviewer_harness_matching_pair_is_ok() {
2171        let config = crate::config::TruthMirrorConfig::default();
2172        let selection = ReviewSelection::resolve(
2173            Some(Agent::Codex),
2174            None,
2175            Some(ReviewerHarness::Claude),
2176            None,
2177            None,
2178            false,
2179            &config,
2180        )
2181        .unwrap();
2182
2183        assert_eq!(selection.reviewer_harness, ReviewerHarness::Claude);
2184        assert_eq!(selection.reviewer_model, "claude-opus-4-8");
2185    }
2186
2187    #[test]
2188    fn config_allow_same_model_waives_opposition() {
2189        let config = crate::config::TruthMirrorConfig {
2190            allow_same_model: true,
2191            ..crate::config::TruthMirrorConfig::default()
2192        };
2193
2194        let selection = ReviewSelection::resolve(
2195            Some(Agent::Codex),
2196            Some("gpt-5.5".to_owned()),
2197            Some(ReviewerHarness::Codex),
2198            Some("gpt-5.5".to_owned()),
2199            None,
2200            false, // CLI flag not set — the config waiver must carry it
2201            &config,
2202        )
2203        .unwrap();
2204
2205        assert!(selection.allow_same_model);
2206        // Same watched+reviewer model builds because the config waiver applies.
2207        assert!(ReviewPlan::build(selection.request_for("review".to_owned())).is_ok());
2208    }
2209
2210    #[test]
2211    fn resolve_arbiter_uses_pair_when_cli_absent() {
2212        let config = crate::config::TruthMirrorConfig::default();
2213        let arbiter =
2214            ReviewSelection::resolve_arbiter(Agent::Codex, None, None, None, &config).unwrap();
2215
2216        assert_eq!(arbiter.arbiter_harness, ReviewerHarness::Pi);
2217        assert_eq!(arbiter.arbiter_effort, Effort::Xhigh);
2218    }
2219
2220    #[test]
2221    fn first_pass_prompt_is_adversarial_and_injects_context() {
2222        let prompt = super::first_pass_prompt(
2223            &claim(),
2224            "THE_DIFF_BODY",
2225            "INVIOLABLE CONSTRAINTS: never fake tests",
2226        );
2227
2228        assert!(prompt.contains("PROVE THIS CLAIM FALSE"));
2229        assert!(prompt.contains("default to REJECT"));
2230        assert!(prompt.contains("INVIOLABLE CONSTRAINTS: never fake tests"));
2231        assert!(prompt.contains("THE_DIFF_BODY"));
2232        // Class-generalized review: grep the class, not the instance.
2233        assert!(prompt.contains("GREP THE CLASS, NOT THE INSTANCE"));
2234        assert!(prompt.contains("\"severity\""));
2235        assert!(prompt.contains("\"recommendation\""));
2236    }
2237
2238    #[test]
2239    fn strict_second_pass_is_a_completeness_critic() {
2240        let job = review_job(true);
2241        let first_output = pass_json();
2242        let prompt = super::strict_second_pass_prompt(&job, &first_output);
2243
2244        assert!(prompt.contains("COMPLETENESS CRITIC"));
2245        assert!(prompt.contains("generalize"));
2246        // Inherits the class-sweep preamble.
2247        assert!(prompt.contains("GREP THE CLASS, NOT THE INSTANCE"));
2248    }
2249
2250    #[test]
2251    fn prompt_omits_context_block_when_empty() {
2252        let prompt = super::first_pass_prompt(&claim(), "d", "");
2253        // No dangling empty context header.
2254        assert!(!prompt.contains("INVIOLABLE CONSTRAINTS"));
2255        assert!(prompt.contains("PROVE THIS CLAIM FALSE"));
2256    }
2257
2258    #[test]
2259    fn subprocess_runner_is_mockable() {
2260        struct MockRunner;
2261
2262        impl ProcessRunner for MockRunner {
2263            fn run(
2264                &self,
2265                invocation: &InvocationPlan,
2266                prompt: &str,
2267            ) -> Result<ProcessOutput, ReviewerError> {
2268                assert_eq!(invocation.program, "codex");
2269                assert_eq!(
2270                    invocation.args_for_prompt(prompt).last().unwrap(),
2271                    "review this"
2272                );
2273                Ok(ProcessOutput {
2274                    status_code: Some(0),
2275                    stdout: pass_json(),
2276                    stderr: String::new(),
2277                })
2278            }
2279        }
2280
2281        let request = ReviewRequest::new(
2282            Agent::Codex,
2283            "gpt-5.4",
2284            ReviewerHarness::Codex,
2285            "gpt-5.5",
2286            false,
2287            "review this",
2288        );
2289        let plan = ReviewPlan::build(request).unwrap();
2290        let output = plan.run_with("review this", &MockRunner).unwrap();
2291
2292        assert!(output.stdout.contains("PASS"));
2293    }
2294
2295    #[test]
2296    fn verdict_parser_extracts_rejection_findings() {
2297        let verdict = ParsedVerdict::parse(&reject_json("missing proof")).unwrap();
2298
2299        assert_eq!(verdict.verdict, Verdict::Reject);
2300        assert_eq!(verdict.structured_findings[0].title, "missing proof");
2301        assert!(verdict.findings[0].contains("missing proof"));
2302    }
2303
2304    #[test]
2305    fn verdict_parser_rejects_legacy_line_protocol() {
2306        let error =
2307            ParsedVerdict::parse("VERDICT: REJECT\nFINDINGS:\n- missing proof\n").unwrap_err();
2308
2309        assert!(matches!(error, ReviewerError::VerdictJson { .. }));
2310    }
2311
2312    #[test]
2313    fn large_diff_materialization_falls_back_to_file_summary() {
2314        let files = "a.rs\nb.rs\nc.rs\n";
2315        let materialized = super::materialize_diff("branch:main", "tiny diff", files);
2316
2317        assert!(materialized.contains("too large to inline safely"));
2318        assert!(materialized.contains("actual_files=3"));
2319        assert!(materialized.contains("a.rs\nb.rs\nc.rs"));
2320        assert!(materialized.contains("inspect the repository directly"));
2321    }
2322
2323    #[test]
2324    fn review_queue_schedules_commits_without_running_models() {
2325        let temp = tempfile::tempdir().unwrap();
2326        let queue = ReviewQueue::new(temp.path());
2327
2328        queue.enqueue("abc123").unwrap();
2329
2330        let pending = queue.pending().unwrap();
2331        assert_eq!(pending.len(), 1);
2332        assert_eq!(pending[0].commit_sha, "abc123");
2333        assert!(!pending[0].run_id.is_empty());
2334
2335        let run = ReviewRunStore::new(temp.path())
2336            .read(&pending[0].run_id)
2337            .unwrap();
2338        assert_eq!(run.commit_sha, "abc123");
2339        assert_eq!(run.status, ReviewRunStatus::Queued);
2340    }
2341
2342    #[test]
2343    fn review_cancel_marks_queued_run_and_removes_queue_item() {
2344        let temp = tempfile::tempdir().unwrap();
2345        let queue = ReviewQueue::new(temp.path());
2346        let queued = queue.enqueue("abc123").unwrap();
2347
2348        run_review_run_command(
2349            crate::cli::ReviewCommand::Cancel {
2350                run_id: queued.run_id.clone(),
2351            },
2352            temp.path(),
2353        )
2354        .unwrap();
2355
2356        assert!(queue.pending().unwrap().is_empty());
2357        let run = ReviewRunStore::new(temp.path())
2358            .read(&queued.run_id)
2359            .unwrap();
2360        assert_eq!(run.status, ReviewRunStatus::Cancelled);
2361    }
2362
2363    #[test]
2364    fn execute_review_records_reject_verdict() {
2365        let temp = tempfile::tempdir().unwrap();
2366        let store = LedgerStore::new(temp.path());
2367        let job = review_job(false);
2368        let runner = SequenceRunner::new([reject_json("unsupported")]);
2369
2370        let execution = execute_review_job(job, &runner, &store).unwrap();
2371
2372        assert_eq!(execution.entries.len(), 1);
2373        assert_eq!(execution.entries[0].verdict, Verdict::Reject);
2374        assert_eq!(
2375            execution.entries[0].structured_findings[0].title,
2376            "unsupported"
2377        );
2378        assert!(
2379            execution.entries[0]
2380                .raw_reviewer_output
2381                .contains("\"REJECT\"")
2382        );
2383        assert_eq!(store.unresolved_rejections().unwrap().len(), 1);
2384    }
2385
2386    #[test]
2387    fn strict_two_pass_records_both_clean_passes() {
2388        let temp = tempfile::tempdir().unwrap();
2389        let store = LedgerStore::new(temp.path());
2390        let job = review_job(true);
2391        let runner = SequenceRunner::new([pass_json(), pass_json()]);
2392
2393        let execution = execute_review_job(job, &runner, &store).unwrap();
2394
2395        assert_eq!(execution.entries.len(), 2);
2396        assert_eq!(store.read_history().unwrap().len(), 2);
2397        assert_eq!(execution.entries[0].reviewer.model, "gpt-5.5");
2398        assert_eq!(execution.entries[1].reviewer.model, "claude-opus-4-8");
2399    }
2400
2401    #[test]
2402    fn strict_arbiter_model_must_be_third_model() {
2403        let temp = tempfile::tempdir().unwrap();
2404        let store = LedgerStore::new(temp.path());
2405        let mut job = review_job(true);
2406        job.strict.as_mut().unwrap().arbiter_model = "gpt-5.5".to_owned();
2407        let runner = SequenceRunner::new([pass_json()]);
2408
2409        let error = execute_review_job(job, &runner, &store).unwrap_err();
2410
2411        assert!(matches!(
2412            error,
2413            ReviewerError::StrictArbiterModelNotDistinct
2414        ));
2415    }
2416
2417    #[test]
2418    fn strict_goal_policy_stops_at_configured_lie_or_fuckup_count() {
2419        let policy = StrictGoalPolicy {
2420            stop_after_lies: 2,
2421            stop_after_fuckups: 3,
2422        };
2423
2424        assert_eq!(
2425            policy.decide(StrictGoalCounters {
2426                lies_exposed: 1,
2427                fuckups_registered: 2
2428            }),
2429            StrictGoalDecision::Continue
2430        );
2431        assert_eq!(
2432            policy.decide(StrictGoalCounters {
2433                lies_exposed: 2,
2434                fuckups_registered: 0
2435            }),
2436            StrictGoalDecision::Stop {
2437                reason: StrictGoalStopReason::LiesExposed
2438            }
2439        );
2440        assert_eq!(
2441            policy.decide(StrictGoalCounters {
2442                lies_exposed: 0,
2443                fuckups_registered: 3
2444            }),
2445            StrictGoalDecision::Stop {
2446                reason: StrictGoalStopReason::FuckupsRegistered
2447            }
2448        );
2449    }
2450
2451    #[test]
2452    fn drain_once_reviews_each_commit_once_and_clears_queue() {
2453        let temp = tempfile::tempdir().unwrap();
2454        let store = LedgerStore::new(temp.path());
2455        let queue = ReviewQueue::new(temp.path());
2456        queue.enqueue("abc123").unwrap();
2457        queue.enqueue("abc123").unwrap(); // duplicate SHA reviewed only once
2458        queue.enqueue("def456").unwrap();
2459
2460        let loader = StaticLoader::new();
2461        let runner = SequenceRunner::new([reject_json("unsupported"), pass_json()]);
2462        let selection = selection();
2463
2464        let report = drain_once(&queue, &loader, &selection, "", &runner, &store).unwrap();
2465
2466        assert_eq!(report.reviewed, ["abc123", "def456"]);
2467        assert_eq!(report.ledger_entries, 2);
2468        assert!(queue.pending().unwrap().is_empty());
2469        assert_eq!(store.read_history().unwrap().len(), 2);
2470        assert_eq!(store.unresolved_rejections().unwrap().len(), 1);
2471
2472        let runs = ReviewRunStore::new(temp.path()).list().unwrap();
2473        assert_eq!(runs.len(), 3);
2474        assert_eq!(
2475            runs.iter()
2476                .filter(|run| run.status == ReviewRunStatus::Completed)
2477                .count(),
2478            2
2479        );
2480        assert_eq!(
2481            runs.iter()
2482                .filter(|run| run.status == ReviewRunStatus::Cancelled)
2483                .count(),
2484            1
2485        );
2486    }
2487
2488    #[test]
2489    fn drain_once_is_a_noop_on_empty_queue() {
2490        let temp = tempfile::tempdir().unwrap();
2491        let store = LedgerStore::new(temp.path());
2492        let queue = ReviewQueue::new(temp.path());
2493        let loader = StaticLoader::new();
2494        let runner = ConstRunner::new(pass_json());
2495
2496        let report = drain_once(&queue, &loader, &selection(), "", &runner, &store).unwrap();
2497
2498        assert!(report.reviewed.is_empty());
2499        assert_eq!(report.ledger_entries, 0);
2500        assert_eq!(store.read_history().unwrap().len(), 0);
2501    }
2502
2503    #[test]
2504    fn strict_goal_loop_stops_at_configured_lie_count() {
2505        let temp = tempfile::tempdir().unwrap();
2506        let store = LedgerStore::new(temp.path());
2507        let policy = StrictGoalPolicy {
2508            stop_after_lies: 1,
2509            stop_after_fuckups: 0,
2510        };
2511        let runner = SequenceRunner::new([reject_json("lie")]);
2512
2513        let outcome = run_strict_goal_loop(
2514            "abc123",
2515            &claim(),
2516            "diff",
2517            "",
2518            &selection(),
2519            policy,
2520            5,
2521            &runner,
2522            &store,
2523        )
2524        .unwrap();
2525
2526        assert_eq!(outcome.passes, 1);
2527        assert_eq!(outcome.counters.lies_exposed, 1);
2528        assert_eq!(outcome.stop_reason, Some(StrictGoalStopReason::LiesExposed));
2529        assert_eq!(store.read_history().unwrap().len(), 1);
2530    }
2531
2532    #[test]
2533    fn strict_goal_loop_terminates_at_max_passes_for_honest_agent() {
2534        let temp = tempfile::tempdir().unwrap();
2535        let store = LedgerStore::new(temp.path());
2536        let policy = StrictGoalPolicy {
2537            stop_after_lies: 2,
2538            stop_after_fuckups: 5,
2539        };
2540        let runner = ConstRunner::new(pass_json());
2541
2542        let outcome = run_strict_goal_loop(
2543            "abc123",
2544            &claim(),
2545            "diff",
2546            "",
2547            &selection(),
2548            policy,
2549            3,
2550            &runner,
2551            &store,
2552        )
2553        .unwrap();
2554
2555        assert_eq!(outcome.passes, 3);
2556        assert_eq!(outcome.counters.lies_exposed, 0);
2557        assert_eq!(outcome.stop_reason, None);
2558        assert_eq!(store.read_history().unwrap().len(), 3);
2559    }
2560
2561    #[test]
2562    fn strict_goal_loop_stops_when_fuckups_accumulate() {
2563        let temp = tempfile::tempdir().unwrap();
2564        let store = LedgerStore::new(temp.path());
2565        let policy = StrictGoalPolicy {
2566            stop_after_lies: 0,
2567            stop_after_fuckups: 2,
2568        };
2569        // Each structured finding registers one fuckup; two passes hit N=2.
2570        let runner = ConstRunner::new(reject_json("nit"));
2571
2572        let outcome = run_strict_goal_loop(
2573            "abc123",
2574            &claim(),
2575            "diff",
2576            "",
2577            &selection(),
2578            policy,
2579            10,
2580            &runner,
2581            &store,
2582        )
2583        .unwrap();
2584
2585        assert_eq!(outcome.passes, 2);
2586        assert_eq!(outcome.counters.lies_exposed, 2);
2587        assert_eq!(outcome.counters.fuckups_registered, 2);
2588        assert_eq!(
2589            outcome.stop_reason,
2590            Some(StrictGoalStopReason::FuckupsRegistered)
2591        );
2592    }
2593
2594    proptest! {
2595        #[test]
2596        fn strict_goal_loop_never_exceeds_max_passes(max in 1u32..6) {
2597            let temp = tempfile::tempdir().unwrap();
2598            let store = LedgerStore::new(temp.path());
2599            // Both thresholds disabled: only the ceiling can stop the loop.
2600            let policy = StrictGoalPolicy { stop_after_lies: 0, stop_after_fuckups: 0 };
2601            let runner = ConstRunner::new(pass_json());
2602
2603            let outcome = run_strict_goal_loop(
2604                "abc123", &claim(), "diff", "", &selection(), policy, max, &runner, &store,
2605            )
2606            .unwrap();
2607
2608            prop_assert!(outcome.passes <= max);
2609            prop_assert_eq!(outcome.passes, max);
2610            prop_assert!(outcome.stop_reason.is_none());
2611        }
2612    }
2613
2614    proptest! {
2615        #[test]
2616        fn model_opposition_is_enforced_for_arbitrary_models(
2617            watched in "[A-Za-z0-9._/-]{1,32}",
2618            reviewer in "[A-Za-z0-9._/-]{1,32}",
2619        ) {
2620            let request = ReviewRequest::new(
2621                Agent::Codex,
2622                watched.clone(),
2623                ReviewerHarness::Codex,
2624                reviewer.clone(),
2625                false,
2626                "review this",
2627            );
2628            let result = ReviewPlan::build(request);
2629
2630            if watched.trim().eq_ignore_ascii_case(reviewer.trim()) {
2631                let blocked = matches!(result, Err(ReviewerError::SameModelWithoutWaiver { .. }));
2632                prop_assert!(blocked);
2633            } else {
2634                prop_assert!(result.is_ok());
2635            }
2636        }
2637    }
2638
2639    fn claim() -> Claim {
2640        Claim::new(
2641            "add review",
2642            "cargo test",
2643            vec![EvidenceRef::parse("tests:cargo-test").unwrap()],
2644        )
2645        .unwrap()
2646    }
2647
2648    fn selection() -> ReviewSelection {
2649        ReviewSelection {
2650            watched_agent: Agent::Codex,
2651            watched_model: "gpt-5.4".to_owned(),
2652            reviewer_harness: ReviewerHarness::Codex,
2653            reviewer_model: "gpt-5.5".to_owned(),
2654            reviewer_effort: Effort::Xhigh,
2655            allow_same_model: false,
2656            strict: None,
2657        }
2658    }
2659
2660    struct StaticLoader {
2661        claim: Claim,
2662        diff: String,
2663    }
2664
2665    impl StaticLoader {
2666        fn new() -> Self {
2667            Self {
2668                claim: claim(),
2669                diff: "diff --git a/src/lib.rs b/src/lib.rs".to_owned(),
2670            }
2671        }
2672    }
2673
2674    impl MaterialLoader for StaticLoader {
2675        fn load(&self, _sha: &str) -> Result<(Claim, String), ReviewerError> {
2676            Ok((self.claim.clone(), self.diff.clone()))
2677        }
2678    }
2679
2680    struct ConstRunner {
2681        output: String,
2682    }
2683
2684    impl ConstRunner {
2685        fn new(output: impl Into<String>) -> Self {
2686            Self {
2687                output: output.into(),
2688            }
2689        }
2690    }
2691
2692    impl ProcessRunner for ConstRunner {
2693        fn run(
2694            &self,
2695            _invocation: &InvocationPlan,
2696            _prompt: &str,
2697        ) -> Result<ProcessOutput, ReviewerError> {
2698            Ok(ProcessOutput {
2699                status_code: Some(0),
2700                stdout: self.output.clone(),
2701                stderr: String::new(),
2702            })
2703        }
2704    }
2705
2706    fn review_job(strict: bool) -> ReviewJob {
2707        let claim = claim();
2708        ReviewJob {
2709            commit_sha: "abc123".to_owned(),
2710            diff: "diff --git a/src/lib.rs b/src/lib.rs".to_owned(),
2711            context: String::new(),
2712            request: ReviewRequest::new(
2713                Agent::Codex,
2714                "gpt-5.4",
2715                ReviewerHarness::Codex,
2716                "gpt-5.5",
2717                false,
2718                "review this",
2719            ),
2720            claim,
2721            strict: strict.then_some(StrictReviewConfig {
2722                arbiter_harness: ReviewerHarness::Claude,
2723                arbiter_model: "claude-opus-4-8".to_owned(),
2724                arbiter_effort: Effort::Xhigh,
2725            }),
2726        }
2727    }
2728
2729    struct SequenceRunner {
2730        outputs: RefCell<VecDeque<String>>,
2731    }
2732
2733    impl SequenceRunner {
2734        fn new<I, S>(outputs: I) -> Self
2735        where
2736            I: IntoIterator<Item = S>,
2737            S: Into<String>,
2738        {
2739            Self {
2740                outputs: RefCell::new(outputs.into_iter().map(Into::into).collect()),
2741            }
2742        }
2743    }
2744
2745    impl ProcessRunner for SequenceRunner {
2746        fn run(
2747            &self,
2748            _invocation: &InvocationPlan,
2749            _prompt: &str,
2750        ) -> Result<ProcessOutput, ReviewerError> {
2751            let stdout = self.outputs.borrow_mut().pop_front().unwrap();
2752            Ok(ProcessOutput {
2753                status_code: Some(0),
2754                stdout,
2755                stderr: String::new(),
2756            })
2757        }
2758    }
2759}