//! Reviewer process harness, model opposition, async queue, and verdict execution.
use std::{
fs,
io::{self, Write},
path::{Path, PathBuf},
process::{Command, ExitCode, Stdio},
time::{SystemTime, UNIX_EPOCH},
};
use anyhow::Result;
use serde::{Deserialize, Serialize};
use thiserror::Error;
use crate::{
claim::{Claim, EvidenceRef},
cli::{self, Agent, ReviewScope, ReviewerHarness},
config::{self, Effort},
ledger::{LedgerEntry, LedgerStore, ReviewerConfig, StructuredFinding, Verdict},
surface,
};
pub const REVIEW_QUEUE_FILE: &str = "review-queue.jsonl";
pub const REVIEW_RUNS_DIR: &str = "runs";
const MAX_INLINE_DIFF_FILES: usize = 2;
const MAX_INLINE_DIFF_BYTES: usize = 256 * 1024;
const MAX_UNTRACKED_FILE_BYTES: u64 = 16 * 1024;
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct ReviewRequest {
pub watched_agent: Agent,
pub watched_model: String,
pub reviewer_harness: ReviewerHarness,
pub reviewer_model: String,
pub reviewer_effort: Effort,
pub allow_same_model: bool,
pub prompt: String,
}
impl ReviewRequest {
pub fn new(
watched_agent: Agent,
watched_model: impl Into<String>,
reviewer_harness: ReviewerHarness,
reviewer_model: impl Into<String>,
allow_same_model: bool,
prompt: impl Into<String>,
) -> Self {
Self {
watched_agent,
watched_model: watched_model.into(),
reviewer_harness,
reviewer_model: reviewer_model.into(),
reviewer_effort: Effort::highest(),
allow_same_model,
prompt: prompt.into(),
}
}
pub fn with_effort(mut self, effort: Effort) -> Self {
self.reviewer_effort = effort;
self
}
}
/// Resolved reviewer selection shared by `review` and `watch`. The reviewer is
/// chosen from the writer harness's adversarial pair; explicit CLI values win.
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct ReviewSelection {
pub watched_agent: Agent,
pub watched_model: String,
pub reviewer_harness: ReviewerHarness,
pub reviewer_model: String,
pub reviewer_effort: Effort,
pub allow_same_model: bool,
pub strict: Option<StrictReviewConfig>,
}
impl ReviewSelection {
/// Resolve the reviewer for a writer harness from its adversarial pair,
/// applying any explicit CLI overrides. `strict` is set separately.
#[allow(clippy::too_many_arguments)]
pub fn resolve(
watched_agent: Option<Agent>,
watched_model: Option<String>,
reviewer_harness: Option<ReviewerHarness>,
reviewer_model: Option<String>,
reviewer_effort: Option<Effort>,
allow_same_model: bool,
config: &config::TruthMirrorConfig,
) -> Result<Self, ReviewerError> {
let watched_agent = match watched_agent {
Some(agent) => agent,
None => agent_from_slug(&config.default_writer)?,
};
let writer_slug = surface::agent_slug(watched_agent);
let pair = config.pair_for(writer_slug);
let harness_from_cli = reviewer_harness.is_some();
let reviewer_harness = match reviewer_harness {
Some(harness) => harness,
None => {
let slug = pair
.map(|pair| pair.reviewer.harness.as_str())
.ok_or_else(|| ReviewerError::NoPairForWriter {
writer: writer_slug.to_owned(),
})?;
harness_from_slug(slug)?
}
};
let reviewer_model = match reviewer_model {
Some(model) => model,
None => {
let pair = pair.ok_or_else(|| ReviewerError::NoPairForWriter {
writer: writer_slug.to_owned(),
})?;
// Don't pair a CLI-overridden harness with a model string meant for
// a different harness — that silently reviews with the wrong tool.
if harness_from_cli
&& !pair
.reviewer
.harness
.eq_ignore_ascii_case(harness_slug(reviewer_harness))
{
return Err(ReviewerError::OverrideNeedsModel {
role: "reviewer".to_owned(),
harness: harness_slug(reviewer_harness).to_owned(),
});
}
pair.reviewer.model.clone()
}
};
let reviewer_effort = reviewer_effort
.or_else(|| pair.map(|pair| pair.reviewer.effort))
.unwrap_or_else(Effort::highest);
Ok(Self {
watched_agent,
watched_model: watched_model.unwrap_or_default(),
reviewer_harness,
reviewer_model,
reviewer_effort,
// Either the CLI flag or config may waive model opposition.
allow_same_model: allow_same_model || config.allow_same_model,
strict: None,
})
}
/// Resolve the second-pass arbiter for the writer, preferring CLI overrides,
/// then the writer pair's `arbiter`.
pub fn resolve_arbiter(
watched_agent: Agent,
arbiter_harness: Option<ReviewerHarness>,
arbiter_model: Option<String>,
arbiter_effort: Option<Effort>,
config: &config::TruthMirrorConfig,
) -> Result<StrictReviewConfig, ReviewerError> {
let pair_arbiter = config
.pair_for(surface::agent_slug(watched_agent))
.and_then(|pair| pair.arbiter.clone());
let harness_from_cli = arbiter_harness.is_some();
let harness = match arbiter_harness {
Some(harness) => harness,
None => {
let slug = pair_arbiter
.as_ref()
.map(|arbiter| arbiter.harness.as_str())
.ok_or(ReviewerError::MissingArbiter)?;
harness_from_slug(slug)?
}
};
let model = match arbiter_model {
Some(model) => model,
None => {
let arbiter = pair_arbiter.as_ref().ok_or(ReviewerError::MissingArbiter)?;
if harness_from_cli && !arbiter.harness.eq_ignore_ascii_case(harness_slug(harness))
{
return Err(ReviewerError::OverrideNeedsModel {
role: "arbiter".to_owned(),
harness: harness_slug(harness).to_owned(),
});
}
arbiter.model.clone()
}
};
let effort = arbiter_effort
.or_else(|| pair_arbiter.as_ref().map(|arbiter| arbiter.effort))
.unwrap_or_else(Effort::highest);
Ok(StrictReviewConfig {
arbiter_harness: harness,
arbiter_model: model,
arbiter_effort: effort,
})
}
fn request_for(&self, prompt: String) -> ReviewRequest {
ReviewRequest::new(
self.watched_agent,
self.watched_model.clone(),
self.reviewer_harness,
self.reviewer_model.clone(),
self.allow_same_model,
prompt,
)
.with_effort(self.reviewer_effort)
}
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct ReviewPlan {
pub watched_agent: Agent,
pub watched_model: String,
pub reviewer_harness: ReviewerHarness,
pub reviewer_model: String,
pub allow_same_model: bool,
pub invocation: InvocationPlan,
}
impl ReviewPlan {
pub fn build(request: ReviewRequest) -> Result<Self, ReviewerError> {
validate_model_present("reviewer", &request.reviewer_model)?;
// The writer model may be unknown (pair-based selection doesn't require it);
// opposition is enforced only when the writer model is actually provided.
if !request.watched_model.trim().is_empty()
&& !request.allow_same_model
&& normalized_model(&request.watched_model) == normalized_model(&request.reviewer_model)
{
return Err(ReviewerError::SameModelWithoutWaiver {
watched_model: request.watched_model,
reviewer_model: request.reviewer_model,
});
}
let invocation = InvocationPlan::for_harness(
request.reviewer_harness,
&request.reviewer_model,
request.reviewer_effort,
)?;
Ok(Self {
watched_agent: request.watched_agent,
watched_model: request.watched_model,
reviewer_harness: request.reviewer_harness,
reviewer_model: request.reviewer_model,
allow_same_model: request.allow_same_model,
invocation,
})
}
pub fn run_with<R: ProcessRunner>(
&self,
prompt: &str,
runner: &R,
) -> Result<ProcessOutput, ReviewerError> {
runner.run(&self.invocation, prompt)
}
fn reviewer_config(&self) -> ReviewerConfig {
ReviewerConfig::new(
harness_slug(self.reviewer_harness),
self.reviewer_model.clone(),
self.allow_same_model,
)
}
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct InvocationPlan {
pub program: String,
pub args: Vec<String>,
pub prompt_delivery: PromptDelivery,
}
impl InvocationPlan {
pub fn for_harness(
harness: ReviewerHarness,
model: &str,
effort: Effort,
) -> Result<Self, ReviewerError> {
validate_model_present("reviewer", model)?;
let model = model.trim();
let e = effort.as_str();
// Reasoning-effort flags verified against source: codex ReasoningEffort
// (`-c model_reasoning_effort`), pi `--thinking` (cli/args.js:249), claude
// `--effort`. Gemini/OpenCode have no effort flag, so effort is omitted.
let plan = match harness {
ReviewerHarness::Claude => Self {
program: "claude".to_owned(),
args: vec![
"--print".to_owned(),
"--model".to_owned(),
model.to_owned(),
"--effort".to_owned(),
// Claude has no `minimal`; clamp to a valid level.
effort.claude_value().to_owned(),
],
prompt_delivery: PromptDelivery::Stdin,
},
ReviewerHarness::Codex => Self {
program: "codex".to_owned(),
args: vec![
"exec".to_owned(),
"-m".to_owned(),
model.to_owned(),
"-c".to_owned(),
format!("model_reasoning_effort={e}"),
],
prompt_delivery: PromptDelivery::PositionalArgument,
},
ReviewerHarness::Pi => Self {
program: "pi".to_owned(),
args: vec![
"--model".to_owned(),
model.to_owned(),
"--thinking".to_owned(),
e.to_owned(),
// Read-only tools so the reviewer can grep the repo for the whole
// defect class (Codex `exec` / Claude `-p` have read tools already).
"--tools".to_owned(),
"read,grep,find,ls".to_owned(),
"-p".to_owned(),
],
prompt_delivery: PromptDelivery::Stdin,
},
ReviewerHarness::Gemini => Self {
program: "gemini".to_owned(),
args: vec!["-m".to_owned(), model.to_owned()],
prompt_delivery: PromptDelivery::FlagValue("-p".to_owned()),
},
ReviewerHarness::Opencode => Self {
program: "opencode".to_owned(),
args: vec!["run".to_owned(), "--model".to_owned(), model.to_owned()],
prompt_delivery: PromptDelivery::PositionalArgument,
},
ReviewerHarness::Custom => return Err(ReviewerError::UnsupportedCustomHarness),
};
Ok(plan)
}
pub fn args_for_prompt(&self, prompt: &str) -> Vec<String> {
let mut args = self.args.clone();
match &self.prompt_delivery {
PromptDelivery::Stdin => {}
PromptDelivery::PositionalArgument => args.push(prompt.to_owned()),
PromptDelivery::FlagValue(flag) => {
args.push(flag.clone());
args.push(prompt.to_owned());
}
}
args
}
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum PromptDelivery {
Stdin,
PositionalArgument,
FlagValue(String),
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct ProcessOutput {
pub status_code: Option<i32>,
pub stdout: String,
pub stderr: String,
}
pub trait ProcessRunner {
fn run(
&self,
invocation: &InvocationPlan,
prompt: &str,
) -> Result<ProcessOutput, ReviewerError>;
}
#[derive(Clone, Copy, Debug, Default)]
pub struct StdProcessRunner;
impl ProcessRunner for StdProcessRunner {
fn run(
&self,
invocation: &InvocationPlan,
prompt: &str,
) -> Result<ProcessOutput, ReviewerError> {
let mut command = Command::new(&invocation.program);
command.args(invocation.args_for_prompt(prompt));
command.stdout(Stdio::piped()).stderr(Stdio::piped());
if invocation.prompt_delivery == PromptDelivery::Stdin {
command.stdin(Stdio::piped());
}
let mut child = command.spawn().map_err(ReviewerError::Spawn)?;
if invocation.prompt_delivery == PromptDelivery::Stdin {
let mut stdin = child.stdin.take().ok_or(ReviewerError::MissingStdinPipe)?;
stdin
.write_all(prompt.as_bytes())
.map_err(ReviewerError::WritePrompt)?;
}
let output = child.wait_with_output().map_err(ReviewerError::Wait)?;
Ok(ProcessOutput {
status_code: output.status.code(),
stdout: String::from_utf8_lossy(&output.stdout).into_owned(),
stderr: String::from_utf8_lossy(&output.stderr).into_owned(),
})
}
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct ReviewJob {
pub commit_sha: String,
pub claim: Claim,
pub diff: String,
/// Ground-truth constraints + recent trajectory injected into review prompts.
pub context: String,
pub request: ReviewRequest,
pub strict: Option<StrictReviewConfig>,
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct StrictReviewConfig {
pub arbiter_harness: ReviewerHarness,
pub arbiter_model: String,
pub arbiter_effort: Effort,
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct ReviewExecution {
pub entries: Vec<LedgerEntry>,
}
pub fn execute_review_job<R: ProcessRunner>(
job: ReviewJob,
runner: &R,
store: &LedgerStore,
) -> Result<ReviewExecution, ReviewerError> {
let first_plan = ReviewPlan::build(job.request.clone())?;
let first_output = first_plan.run_with(&job.request.prompt, runner)?;
ensure_process_success(&first_output)?;
let first_verdict = ParsedVerdict::parse(&first_output.stdout)?;
let first_entry = entry_from_verdict(&job, &first_plan, &first_verdict);
store.append_entry(&first_entry)?;
let mut entries = vec![first_entry];
if let Some(strict) = &job.strict
&& first_verdict.verdict == Verdict::Pass
&& first_verdict.findings.is_empty()
{
validate_strict_arbiter(&job.request, strict)?;
let strict_prompt = strict_second_pass_prompt(&job, &first_output.stdout);
let strict_request = ReviewRequest::new(
job.request.watched_agent,
job.request.watched_model.clone(),
strict.arbiter_harness,
strict.arbiter_model.clone(),
false,
strict_prompt,
)
.with_effort(strict.arbiter_effort);
let strict_plan = ReviewPlan::build(strict_request.clone())?;
let strict_output = strict_plan.run_with(&strict_request.prompt, runner)?;
ensure_process_success(&strict_output)?;
let strict_verdict = ParsedVerdict::parse(&strict_output.stdout)?;
let strict_entry = entry_from_verdict(&job, &strict_plan, &strict_verdict);
store.append_entry(&strict_entry)?;
entries.push(strict_entry);
}
Ok(ReviewExecution { entries })
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct ParsedVerdict {
pub verdict: Verdict,
pub summary: String,
pub findings: Vec<String>,
pub structured_findings: Vec<StructuredFinding>,
pub next_steps: Vec<String>,
pub raw: String,
}
impl ParsedVerdict {
pub fn parse(output: &str) -> Result<Self, ReviewerError> {
let parsed: ReviewerJsonOutput =
serde_json::from_str(output.trim()).map_err(|source| ReviewerError::VerdictJson {
source,
output: output.to_owned(),
})?;
parsed.validate()?;
let findings = parsed
.findings
.iter()
.map(StructuredFinding::display_line)
.collect();
Ok(Self {
verdict: parsed.verdict,
summary: parsed.summary,
findings,
structured_findings: parsed.findings,
next_steps: parsed.next_steps,
raw: output.to_owned(),
})
}
}
#[derive(Clone, Debug, Deserialize, Eq, PartialEq)]
struct ReviewerJsonOutput {
verdict: Verdict,
summary: String,
#[serde(default)]
findings: Vec<StructuredFinding>,
#[serde(default)]
next_steps: Vec<String>,
}
impl ReviewerJsonOutput {
fn validate(&self) -> Result<(), ReviewerError> {
if self.summary.trim().is_empty() {
return Err(ReviewerError::VerdictSchema {
message: "summary must not be empty".to_owned(),
});
}
for finding in &self.findings {
if finding.title.trim().is_empty() {
return Err(ReviewerError::VerdictSchema {
message: "finding title must not be empty".to_owned(),
});
}
if finding.body.trim().is_empty() {
return Err(ReviewerError::VerdictSchema {
message: "finding body must not be empty".to_owned(),
});
}
if finding.file.trim().is_empty() {
return Err(ReviewerError::VerdictSchema {
message: "finding file must not be empty".to_owned(),
});
}
if finding.line_start == 0 || finding.line_end == 0 {
return Err(ReviewerError::VerdictSchema {
message: "finding lines must be one-based".to_owned(),
});
}
if finding.line_end < finding.line_start {
return Err(ReviewerError::VerdictSchema {
message: "finding line_end must be greater than or equal to line_start"
.to_owned(),
});
}
if finding.confidence > 100 {
return Err(ReviewerError::VerdictSchema {
message: "finding confidence must be between 0 and 100".to_owned(),
});
}
if finding.recommendation.trim().is_empty() {
return Err(ReviewerError::VerdictSchema {
message: "finding recommendation must not be empty".to_owned(),
});
}
}
if self.verdict == Verdict::Pass && !self.findings.is_empty() {
return Err(ReviewerError::VerdictSchema {
message: "PASS verdict must not include findings".to_owned(),
});
}
if self.verdict == Verdict::Reject && self.findings.is_empty() {
return Err(ReviewerError::VerdictSchema {
message: "REJECT verdict must include at least one finding".to_owned(),
});
}
Ok(())
}
}
#[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, Serialize)]
#[serde(rename_all = "kebab-case")]
pub enum ReviewRunStatus {
Queued,
Running,
Completed,
Failed,
Cancelled,
}
impl std::fmt::Display for ReviewRunStatus {
fn fmt(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Queued => formatter.write_str("queued"),
Self::Running => formatter.write_str("running"),
Self::Completed => formatter.write_str("completed"),
Self::Failed => formatter.write_str("failed"),
Self::Cancelled => formatter.write_str("cancelled"),
}
}
}
#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
pub struct ReviewRun {
pub id: String,
pub commit_sha: String,
pub target: String,
pub status: ReviewRunStatus,
pub phase: String,
pub ledger_entries: usize,
pub error: Option<String>,
pub created_at_unix: u64,
pub updated_at_unix: u64,
pub started_at_unix: Option<u64>,
pub completed_at_unix: Option<u64>,
}
impl ReviewRun {
fn queued(
id: impl Into<String>,
commit_sha: impl Into<String>,
target: impl Into<String>,
) -> Self {
let timestamp = unix_now();
Self {
id: id.into(),
commit_sha: commit_sha.into(),
target: target.into(),
status: ReviewRunStatus::Queued,
phase: "queued".to_owned(),
ledger_entries: 0,
error: None,
created_at_unix: timestamp,
updated_at_unix: timestamp,
started_at_unix: None,
completed_at_unix: None,
}
}
fn mark_running(&mut self, phase: impl Into<String>) {
let timestamp = unix_now();
self.status = ReviewRunStatus::Running;
self.phase = phase.into();
self.error = None;
self.updated_at_unix = timestamp;
self.started_at_unix = Some(timestamp);
self.completed_at_unix = None;
}
fn mark_completed(&mut self, ledger_entries: usize) {
let timestamp = unix_now();
self.status = ReviewRunStatus::Completed;
self.phase = "completed".to_owned();
self.ledger_entries = ledger_entries;
self.error = None;
self.updated_at_unix = timestamp;
self.completed_at_unix = Some(timestamp);
}
fn mark_failed(&mut self, error: impl Into<String>) {
let timestamp = unix_now();
self.status = ReviewRunStatus::Failed;
self.phase = "failed".to_owned();
self.error = Some(error.into());
self.updated_at_unix = timestamp;
self.completed_at_unix = Some(timestamp);
}
fn mark_cancelled(&mut self) {
let timestamp = unix_now();
self.status = ReviewRunStatus::Cancelled;
self.phase = "cancelled".to_owned();
self.error = None;
self.updated_at_unix = timestamp;
self.completed_at_unix = Some(timestamp);
}
}
#[derive(Clone, Debug)]
pub struct ReviewRunStore {
root: PathBuf,
}
impl ReviewRunStore {
pub fn new(root: impl Into<PathBuf>) -> Self {
Self { root: root.into() }
}
pub fn runs_dir(&self) -> PathBuf {
self.root.join(REVIEW_RUNS_DIR)
}
pub fn path(&self, id: &str) -> PathBuf {
self.runs_dir().join(format!("{id}.json"))
}
pub fn create_queued(
&self,
commit_sha: &str,
target: impl Into<String>,
) -> Result<ReviewRun, ReviewerError> {
let run = ReviewRun::queued(generate_run_id(commit_sha), commit_sha, target);
self.write(&run)?;
Ok(run)
}
fn ensure_queued(
&self,
run_id: &str,
commit_sha: &str,
target: &str,
) -> Result<ReviewRun, ReviewerError> {
match self.read(run_id) {
Ok(run) => Ok(run),
Err(ReviewerError::ReviewRunNotFound { .. }) => {
let run = ReviewRun::queued(run_id, commit_sha, target);
self.write(&run)?;
Ok(run)
}
Err(error) => Err(error),
}
}
pub fn read(&self, id: &str) -> Result<ReviewRun, ReviewerError> {
let path = self.path(id);
let contents = fs::read_to_string(&path).map_err(|source| match source.kind() {
io::ErrorKind::NotFound => ReviewerError::ReviewRunNotFound { id: id.to_owned() },
_ => ReviewerError::RunIo(source),
})?;
serde_json::from_str(&contents).map_err(ReviewerError::RunJson)
}
pub fn list(&self) -> Result<Vec<ReviewRun>, ReviewerError> {
let dir = self.runs_dir();
let entries = match fs::read_dir(&dir) {
Ok(entries) => entries,
Err(error) if error.kind() == io::ErrorKind::NotFound => return Ok(Vec::new()),
Err(error) => return Err(ReviewerError::RunIo(error)),
};
let mut runs: Vec<ReviewRun> = Vec::new();
for entry in entries {
let entry = entry.map_err(ReviewerError::RunIo)?;
if entry
.path()
.extension()
.is_none_or(|extension| extension != "json")
{
continue;
}
let contents = fs::read_to_string(entry.path()).map_err(ReviewerError::RunIo)?;
runs.push(serde_json::from_str(&contents).map_err(ReviewerError::RunJson)?);
}
runs.sort_by(|left, right| {
right
.updated_at_unix
.cmp(&left.updated_at_unix)
.then_with(|| right.id.cmp(&left.id))
});
Ok(runs)
}
pub fn latest_result(&self) -> Result<ReviewRun, ReviewerError> {
self.list()?
.into_iter()
.find(|run| {
matches!(
run.status,
ReviewRunStatus::Completed
| ReviewRunStatus::Failed
| ReviewRunStatus::Cancelled
)
})
.ok_or(ReviewerError::NoReviewRuns)
}
pub fn mark_running(&self, id: &str, phase: &str) -> Result<ReviewRun, ReviewerError> {
let mut run = self.read(id)?;
run.mark_running(phase);
self.write(&run)?;
Ok(run)
}
pub fn mark_completed(
&self,
id: &str,
ledger_entries: usize,
) -> Result<ReviewRun, ReviewerError> {
let mut run = self.read(id)?;
run.mark_completed(ledger_entries);
self.write(&run)?;
Ok(run)
}
pub fn mark_failed(
&self,
id: &str,
error: impl Into<String>,
) -> Result<ReviewRun, ReviewerError> {
let mut run = self.read(id)?;
run.mark_failed(error);
self.write(&run)?;
Ok(run)
}
pub fn cancel_queued(&self, id: &str) -> Result<ReviewRun, ReviewerError> {
let mut run = self.read(id)?;
if run.status != ReviewRunStatus::Queued {
return Err(ReviewerError::CannotCancelReview {
id: id.to_owned(),
status: run.status,
});
}
run.mark_cancelled();
self.write(&run)?;
Ok(run)
}
fn write(&self, run: &ReviewRun) -> Result<(), ReviewerError> {
fs::create_dir_all(self.runs_dir()).map_err(ReviewerError::RunIo)?;
let bytes = serde_json::to_vec_pretty(run).map_err(ReviewerError::RunJson)?;
fs::write(self.path(&run.id), bytes).map_err(ReviewerError::RunIo)
}
}
#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
pub struct QueuedReview {
#[serde(default)]
pub run_id: String,
pub commit_sha: String,
pub enqueued_at_unix: u64,
}
#[derive(Clone, Debug)]
pub struct ReviewQueue {
root: PathBuf,
}
impl ReviewQueue {
pub fn new(root: impl Into<PathBuf>) -> Self {
Self { root: root.into() }
}
pub fn path(&self) -> PathBuf {
self.root.join(REVIEW_QUEUE_FILE)
}
pub fn enqueue(&self, commit_sha: impl Into<String>) -> Result<QueuedReview, ReviewerError> {
fs::create_dir_all(&self.root).map_err(ReviewerError::QueueIo)?;
let commit_sha = commit_sha.into();
let run = ReviewRunStore::new(&self.root).create_queued(&commit_sha, "commit")?;
let item = QueuedReview {
run_id: run.id,
commit_sha,
enqueued_at_unix: unix_now(),
};
let mut file = fs::OpenOptions::new()
.create(true)
.append(true)
.open(self.path())
.map_err(ReviewerError::QueueIo)?;
serde_json::to_writer(&mut file, &item).map_err(ReviewerError::QueueJson)?;
writeln!(file).map_err(ReviewerError::QueueIo)?;
Ok(item)
}
pub fn pending(&self) -> Result<Vec<QueuedReview>, ReviewerError> {
let contents = match fs::read_to_string(self.path()) {
Ok(contents) => contents,
Err(error) if error.kind() == io::ErrorKind::NotFound => return Ok(Vec::new()),
Err(error) => return Err(ReviewerError::QueueIo(error)),
};
contents
.lines()
.filter(|line| !line.trim().is_empty())
.map(|line| serde_json::from_str(line).map_err(ReviewerError::QueueJson))
.collect()
}
/// Drop every queued item for `sha`, preserving any items appended for other
/// commits. Called after a commit is reviewed so a drain never repeats work.
pub fn remove_sha(&self, sha: &str) -> Result<(), ReviewerError> {
let remaining: Vec<QueuedReview> = self
.pending()?
.into_iter()
.filter(|item| item.commit_sha != sha)
.collect();
self.rewrite(&remaining)
}
fn rewrite(&self, items: &[QueuedReview]) -> Result<(), ReviewerError> {
if items.is_empty() {
return match fs::remove_file(self.path()) {
Ok(()) => Ok(()),
Err(error) if error.kind() == io::ErrorKind::NotFound => Ok(()),
Err(error) => Err(ReviewerError::QueueIo(error)),
};
}
let mut file = fs::File::create(self.path()).map_err(ReviewerError::QueueIo)?;
for item in items {
serde_json::to_writer(&mut file, item).map_err(ReviewerError::QueueJson)?;
writeln!(file).map_err(ReviewerError::QueueIo)?;
}
Ok(())
}
pub fn remove_run_id(&self, run_id: &str) -> Result<(), ReviewerError> {
let remaining: Vec<QueuedReview> = self
.pending()?
.into_iter()
.filter(|item| item.run_id != run_id)
.collect();
self.rewrite(&remaining)
}
}
/// Loads the claim and diff for a commit so the reviewer can run against it.
/// Abstracted so `drain_once` can be unit-tested without a real git repository.
pub trait MaterialLoader {
fn load(&self, sha: &str) -> Result<(Claim, String), ReviewerError>;
}
#[derive(Clone, Debug, Default)]
pub struct GitMaterialLoader {
/// Evidence-pointer patterns from config so async review parses claims the
/// same way the commit-msg gate did (a repo `jira:` pointer stays valid).
pub evidence_patterns: Vec<String>,
}
impl GitMaterialLoader {
pub fn with_patterns(evidence_patterns: Vec<String>) -> Self {
Self { evidence_patterns }
}
}
impl MaterialLoader for GitMaterialLoader {
fn load(&self, sha: &str) -> Result<(Claim, String), ReviewerError> {
let message = git_output(["show", "--format=%B", "--no-patch", sha])?;
let diff = git_output(["show", "--format=", "--patch", sha])?;
let claim = if self.evidence_patterns.is_empty() {
Claim::parse(&message)?
} else {
Claim::parse_with(&message, &self.evidence_patterns)?
};
Ok((claim, diff))
}
}
#[derive(Clone, Debug, Default, Eq, PartialEq)]
pub struct DrainReport {
pub reviewed: Vec<String>,
pub ledger_entries: usize,
}
/// Review every distinct queued commit exactly once, record verdicts, and remove
/// each commit from the queue as soon as its review lands. A commit whose review
/// errors stays queued for the next drain.
pub fn drain_once<R: ProcessRunner, L: MaterialLoader>(
queue: &ReviewQueue,
loader: &L,
selection: &ReviewSelection,
context: &str,
runner: &R,
store: &LedgerStore,
) -> Result<DrainReport, ReviewerError> {
let pending = queue.pending()?;
let run_store = ReviewRunStore::new(&queue.root);
let mut seen = std::collections::BTreeSet::new();
let mut order = Vec::new();
for item in &pending {
if seen.insert(item.commit_sha.clone()) {
order.push(item.clone());
} else if !item.run_id.trim().is_empty()
&& let Ok(run) = run_store.read(&item.run_id)
&& run.status == ReviewRunStatus::Queued
{
run_store.cancel_queued(&item.run_id)?;
}
}
let mut report = DrainReport::default();
for item in order {
let sha = item.commit_sha;
let run_id = if item.run_id.trim().is_empty() {
generate_run_id(&sha)
} else {
item.run_id
};
let run = run_store.ensure_queued(&run_id, &sha, "commit")?;
if run.status == ReviewRunStatus::Cancelled {
queue.remove_sha(&sha)?;
continue;
}
run_store.mark_running(&run_id, "reviewing")?;
let (claim, diff) = loader.load(&sha)?;
let prompt = first_pass_prompt(&claim, &diff, context);
let job = ReviewJob {
commit_sha: sha.clone(),
claim,
diff,
context: context.to_owned(),
request: selection.request_for(prompt),
strict: selection.strict.clone(),
};
let execution = match execute_review_job(job, runner, store) {
Ok(execution) => execution,
Err(error) => {
let _ = run_store.mark_failed(&run_id, error.to_string());
return Err(error);
}
};
report.ledger_entries += execution.entries.len();
run_store.mark_completed(&run_id, execution.entries.len())?;
queue.remove_sha(&sha)?;
report.reviewed.push(sha);
}
Ok(report)
}
/// Build the ground-truth + trajectory context block for review prompts.
/// Best-effort: an unavailable repo or provider yields an empty block.
fn review_context(config: &config::TruthMirrorConfig) -> String {
let repo_root = match git_output(["rev-parse", "--show-toplevel"]) {
Ok(root) => PathBuf::from(root.trim()),
Err(_) => return String::new(),
};
let provider = crate::context::trajectory_provider(&repo_root, &config.history);
crate::context::build_review_context(
&repo_root,
&config.ground_truth,
&config.history,
Some(provider.as_ref()),
)
.unwrap_or_default()
}
pub fn run_watch_command(
args: cli::WatchArgs,
state_dir: &Path,
config: &config::TruthMirrorConfig,
) -> Result<ExitCode> {
let selection = ReviewSelection::resolve(
args.watched_agent,
args.watched_model,
args.reviewer_harness,
args.reviewer_model,
args.reviewer_effort,
args.allow_same_model,
config,
)?;
let queue = ReviewQueue::new(state_dir);
let store = LedgerStore::new(state_dir);
let loader = GitMaterialLoader::with_patterns(config.gates.to_policy().evidence_patterns);
let runner = StdProcessRunner;
if args.once {
let context = review_context(config);
let report = drain_once(&queue, &loader, &selection, &context, &runner, &store)?;
println!(
"truth-mirror watch: reviewed {} commit(s), wrote {} ledger entrie(s)",
report.reviewed.len(),
report.ledger_entries
);
return Ok(ExitCode::SUCCESS);
}
let interval = std::time::Duration::from_secs(args.poll_secs.max(1));
loop {
// Rebuild context each poll so ground truth and trajectory stay current.
let context = review_context(config);
let report = drain_once(&queue, &loader, &selection, &context, &runner, &store)?;
if !report.reviewed.is_empty() {
println!(
"truth-mirror watch: reviewed {} commit(s)",
report.reviewed.len()
);
}
std::thread::sleep(interval);
}
}
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub struct StrictGoalPolicy {
pub stop_after_lies: u32,
pub stop_after_fuckups: u32,
}
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub struct StrictGoalCounters {
pub lies_exposed: u32,
pub fuckups_registered: u32,
}
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum StrictGoalDecision {
Continue,
Stop { reason: StrictGoalStopReason },
}
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum StrictGoalStopReason {
LiesExposed,
FuckupsRegistered,
}
impl StrictGoalPolicy {
pub fn decide(&self, counters: StrictGoalCounters) -> StrictGoalDecision {
if self.stop_after_lies > 0 && counters.lies_exposed >= self.stop_after_lies {
return StrictGoalDecision::Stop {
reason: StrictGoalStopReason::LiesExposed,
};
}
if self.stop_after_fuckups > 0 && counters.fuckups_registered >= self.stop_after_fuckups {
return StrictGoalDecision::Stop {
reason: StrictGoalStopReason::FuckupsRegistered,
};
}
StrictGoalDecision::Continue
}
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct StrictGoalOutcome {
pub passes: u32,
pub counters: StrictGoalCounters,
/// `None` means the loop stopped at the `max_passes` ceiling rather than
/// hitting a configured lie/fuckup threshold.
pub stop_reason: Option<StrictGoalStopReason>,
pub entries: Vec<LedgerEntry>,
}
impl StrictGoalOutcome {
pub fn stop_reason_suffix(&self) -> &'static str {
match self.stop_reason {
Some(StrictGoalStopReason::LiesExposed) => " (stopped: lies exposed)",
Some(StrictGoalStopReason::FuckupsRegistered) => " (stopped: fuckups registered)",
None => " (stopped: max passes)",
}
}
}
/// Sic the adversarial reviewer on a commit in a loop, accumulating exposed lies
/// (REJECT verdicts) and registered fuckups (individual findings). Every pass is
/// recorded in the ledger. The loop stops when `policy` says the configured `N`
/// is reached, or when `max_passes` is hit so an honest agent still terminates.
#[allow(clippy::too_many_arguments)]
pub fn run_strict_goal_loop<R: ProcessRunner>(
commit_sha: &str,
claim: &Claim,
diff: &str,
context: &str,
selection: &ReviewSelection,
policy: StrictGoalPolicy,
max_passes: u32,
runner: &R,
store: &LedgerStore,
) -> Result<StrictGoalOutcome, ReviewerError> {
let ceiling = max_passes.max(1);
let mut outcome = StrictGoalOutcome {
passes: 0,
counters: StrictGoalCounters {
lies_exposed: 0,
fuckups_registered: 0,
},
stop_reason: None,
entries: Vec::new(),
};
while outcome.passes < ceiling {
let prompt = strict_goal_prompt(claim, diff, context, outcome.passes + 1, &outcome.entries);
let request = selection.request_for(prompt);
let plan = ReviewPlan::build(request.clone())?;
let output = plan.run_with(&request.prompt, runner)?;
ensure_process_success(&output)?;
let verdict = ParsedVerdict::parse(&output.stdout)?;
let job = ReviewJob {
commit_sha: commit_sha.to_owned(),
claim: claim.clone(),
diff: diff.to_owned(),
context: context.to_owned(),
request,
strict: None,
};
let entry = entry_from_verdict(&job, &plan, &verdict);
store.append_entry(&entry)?;
outcome.entries.push(entry);
outcome.passes += 1;
if verdict.verdict == Verdict::Reject {
outcome.counters.lies_exposed += 1;
}
outcome.counters.fuckups_registered = outcome
.counters
.fuckups_registered
.saturating_add(u32::try_from(verdict.findings.len()).unwrap_or(u32::MAX));
if let StrictGoalDecision::Stop { reason } = policy.decide(outcome.counters) {
outcome.stop_reason = Some(reason);
break;
}
}
Ok(outcome)
}
fn strict_goal_prompt(
claim: &Claim,
diff: &str,
context: &str,
pass: u32,
prior: &[LedgerEntry],
) -> String {
let prior_findings: Vec<String> = prior
.iter()
.flat_map(|entry| entry.findings.clone())
.collect();
let prior_block = if prior_findings.is_empty() {
"(none)".to_owned()
} else {
prior_findings.join("\n")
};
format!(
"{ADVERSARIAL_PREAMBLE}\n\nStrict-goal loop, pass {pass}. Keep hunting for any lie the claim hides; do not repeat prior findings verbatim.{}\n\nCLAIM:\n{}\n\nPRIOR FINDINGS:\n{prior_block}\n\nDIFF:\n{}",
context_block(context),
claim.to_line(),
diff
)
}
pub fn run_review_command(
args: cli::ReviewArgs,
state_dir: &Path,
config: &config::TruthMirrorConfig,
) -> Result<ExitCode> {
if let Some(command) = args.command {
return run_review_run_command(command, state_dir);
}
let material = ReviewMaterial::load(
&args,
state_dir,
&config.gates.to_policy().evidence_patterns,
)?;
let mut selection = ReviewSelection::resolve(
args.watched_agent,
args.watched_model,
args.reviewer_harness,
args.reviewer_model,
args.reviewer_effort,
args.allow_same_model,
config,
)?;
if args.strict_two_pass {
selection.strict = Some(ReviewSelection::resolve_arbiter(
selection.watched_agent,
args.arbiter_harness,
args.arbiter_model,
args.arbiter_effort,
config,
)?);
}
let store = LedgerStore::new(state_dir);
let run_store = ReviewRunStore::new(state_dir);
let context = review_context(config);
let run = run_store.create_queued(&material.commit_sha, material.target_label.clone())?;
run_store.mark_running(&run.id, "reviewing")?;
if args.strict_goal {
let policy = config
.strict
.goal_policy(args.stop_after_lies, args.stop_after_fuckups);
let max_passes = args.max_passes.unwrap_or(config.strict.max_passes);
let outcome = match run_strict_goal_loop(
&material.commit_sha,
&material.claim,
&material.diff,
&context,
&selection,
policy,
max_passes,
&StdProcessRunner,
&store,
) {
Ok(outcome) => outcome,
Err(error) => {
let _ = run_store.mark_failed(&run.id, error.to_string());
return Err(error.into());
}
};
run_store.mark_completed(&run.id, outcome.entries.len())?;
println!(
"truth-mirror strict-goal: run {}, {} pass(es), {} lie(s), {} fuckup(s){}",
run.id,
outcome.passes,
outcome.counters.lies_exposed,
outcome.counters.fuckups_registered,
outcome.stop_reason_suffix(),
);
return Ok(ExitCode::SUCCESS);
}
let prompt = first_pass_prompt(&material.claim, &material.diff, &context);
let job = ReviewJob {
commit_sha: material.commit_sha,
claim: material.claim,
diff: material.diff,
context,
request: selection.request_for(prompt),
strict: selection.strict.clone(),
};
let execution = match execute_review_job(job, &StdProcessRunner, &store) {
Ok(execution) => execution,
Err(error) => {
let _ = run_store.mark_failed(&run.id, error.to_string());
return Err(error.into());
}
};
run_store.mark_completed(&run.id, execution.entries.len())?;
println!(
"truth-mirror review: run {}, wrote {} ledger entrie(s)",
run.id,
execution.entries.len()
);
Ok(ExitCode::SUCCESS)
}
fn run_review_run_command(command: cli::ReviewCommand, state_dir: &Path) -> Result<ExitCode> {
let runs = ReviewRunStore::new(state_dir);
match command {
cli::ReviewCommand::Status { run_id } => {
if let Some(run_id) = run_id {
print_run(&runs.read(&run_id)?);
} else {
let all = runs.list()?;
if all.is_empty() {
println!("No review runs.");
} else {
for run in all {
print_run_summary(&run);
}
}
}
}
cli::ReviewCommand::Result { run_id } => {
let run = match run_id {
Some(run_id) => runs.read(&run_id)?,
None => runs.latest_result()?,
};
print_run(&run);
print_run_ledger_entries(state_dir, &run)?;
}
cli::ReviewCommand::Cancel { run_id } => {
let run = runs.cancel_queued(&run_id)?;
ReviewQueue::new(state_dir).remove_run_id(&run_id)?;
println!("cancelled review run {} ({})", run.id, run.commit_sha);
}
}
Ok(ExitCode::SUCCESS)
}
fn print_run_summary(run: &ReviewRun) {
println!(
"{} {} {} {} entries={} updated={}",
run.id, run.status, run.commit_sha, run.phase, run.ledger_entries, run.updated_at_unix
);
}
fn print_run(run: &ReviewRun) {
println!("run: {}", run.id);
println!("status: {}", run.status);
println!("commit: {}", run.commit_sha);
println!("target: {}", run.target);
println!("phase: {}", run.phase);
println!("ledger_entries: {}", run.ledger_entries);
println!("created_at_unix: {}", run.created_at_unix);
println!("updated_at_unix: {}", run.updated_at_unix);
if let Some(started) = run.started_at_unix {
println!("started_at_unix: {started}");
}
if let Some(completed) = run.completed_at_unix {
println!("completed_at_unix: {completed}");
}
if let Some(error) = &run.error {
println!("error: {error}");
}
}
fn print_run_ledger_entries(state_dir: &Path, run: &ReviewRun) -> Result<(), ReviewerError> {
let store = LedgerStore::new(state_dir);
let entries: Vec<LedgerEntry> = store
.read_history()?
.into_iter()
.filter(|entry| entry.commit_sha == run.commit_sha)
.collect();
if entries.is_empty() {
println!("ledger_entries: none");
return Ok(());
}
println!("ledger_entries:");
for entry in entries {
println!(
"- {} {} {} findings={}",
entry.commit_sha,
entry.verdict,
entry.disposition,
entry.findings.len()
);
}
Ok(())
}
#[derive(Clone, Debug, Eq, PartialEq)]
struct ReviewMaterial {
commit_sha: String,
target_label: String,
claim: Claim,
diff: String,
}
impl ReviewMaterial {
fn load(
args: &cli::ReviewArgs,
state_dir: &Path,
evidence_patterns: &[String],
) -> Result<Self, ReviewerError> {
let parse = |text: &str| {
if evidence_patterns.is_empty() {
Claim::parse(text)
} else {
Claim::parse_with(text, evidence_patterns)
}
};
let scope = if args.staged {
ReviewScope::Staged
} else {
args.scope
};
match scope {
ReviewScope::Commit => {
let sha = args
.target
.clone()
.ok_or(ReviewerError::MissingReviewTarget)?;
let message = git_output(["show", "--format=%B", "--no-patch", sha.as_str()])?;
let diff = git_output(["show", "--format=", "--patch", sha.as_str()])?;
let claim = parse(&message)?;
Ok(Self {
commit_sha: sha.clone(),
target_label: format!("commit:{sha}"),
claim,
diff,
})
}
ReviewScope::Staged => Self::load_staged(state_dir, &parse),
ReviewScope::Auto => {
reject_target_with_scope(args)?;
if working_tree_dirty()? {
Self::load_working_tree(state_dir, &parse)
} else {
Self::load_branch(args.base.as_deref(), &parse)
}
}
ReviewScope::WorkingTree => {
reject_target_with_scope(args)?;
Self::load_working_tree(state_dir, &parse)
}
ReviewScope::Branch => {
reject_target_with_scope(args)?;
Self::load_branch(args.base.as_deref(), &parse)
}
}
}
fn load_staged<F>(state_dir: &Path, parse: &F) -> Result<Self, ReviewerError>
where
F: Fn(&str) -> Result<Claim, crate::claim::ClaimError>,
{
let raw = git_output(["diff", "--cached"])?;
let files = git_output(["diff", "--cached", "--name-only"])?;
let diff = materialize_diff("staged", &raw, &files);
let claim = parse(&read_claim_file(state_dir)?)?;
Ok(Self {
commit_sha: "STAGED".to_owned(),
target_label: "staged".to_owned(),
claim,
diff,
})
}
fn load_working_tree<F>(state_dir: &Path, parse: &F) -> Result<Self, ReviewerError>
where
F: Fn(&str) -> Result<Claim, crate::claim::ClaimError>,
{
let status = git_output(["status", "--porcelain"])?;
let tracked = git_output(["diff", "HEAD", "--patch"])?;
let files = git_output(["diff", "HEAD", "--name-only"])?;
let untracked = untracked_file_context()?;
let raw = format!(
"WORKING TREE STATUS:\n{status}\n\nTRACKED DIFF AGAINST HEAD:\n{tracked}\n\nUNTRACKED FILES:\n{untracked}"
);
let diff = materialize_diff("working-tree", &raw, &files);
let claim = parse(&read_claim_file(state_dir)?)?;
Ok(Self {
commit_sha: "WORKING_TREE".to_owned(),
target_label: "working-tree".to_owned(),
claim,
diff,
})
}
fn load_branch<F>(base: Option<&str>, parse: &F) -> Result<Self, ReviewerError>
where
F: Fn(&str) -> Result<Claim, crate::claim::ClaimError>,
{
let base = match base {
Some(base) => base.to_owned(),
None => default_branch_ref()?,
};
let merge_base = git_output_slice(&["merge-base", "HEAD", &base])?;
let merge_base = merge_base.trim().to_owned();
let range = format!("{merge_base}..HEAD");
let message = git_output(["show", "--format=%B", "--no-patch", "HEAD"])?;
let log = git_output_slice(&["log", "--oneline", &range])?;
let stat = git_output_slice(&["diff", "--stat", &range])?;
let raw_patch = git_output_slice(&["diff", "--patch", &range])?;
let files = git_output_slice(&["diff", "--name-only", &range])?;
let raw = format!(
"BRANCH BASE: {base}\nMERGE BASE: {merge_base}\nCOMMITS:\n{log}\n\nDIFF STAT:\n{stat}\n\nDIFF:\n{raw_patch}"
);
let diff = materialize_diff(&format!("branch:{base}"), &raw, &files);
let claim = parse(&message)?;
Ok(Self {
commit_sha: "HEAD".to_owned(),
target_label: format!("branch:{base}"),
claim,
diff,
})
}
}
fn reject_target_with_scope(args: &cli::ReviewArgs) -> Result<(), ReviewerError> {
if let Some(target) = &args.target {
return Err(ReviewerError::UnexpectedReviewTarget {
scope: args.scope,
target: target.clone(),
});
}
Ok(())
}
fn read_claim_file(state_dir: &Path) -> Result<String, ReviewerError> {
let claim_path = state_dir.join("claim.txt");
fs::read_to_string(&claim_path).map_err(|source| ReviewerError::ClaimFileRead {
path: claim_path,
source,
})
}
fn working_tree_dirty() -> Result<bool, ReviewerError> {
Ok(!git_output(["status", "--porcelain"])?.trim().is_empty())
}
fn default_branch_ref() -> Result<String, ReviewerError> {
if let Ok(symbolic) = git_output([
"symbolic-ref",
"--quiet",
"--short",
"refs/remotes/origin/HEAD",
]) {
let trimmed = symbolic.trim();
if !trimmed.is_empty() {
return Ok(trimmed.to_owned());
}
}
for candidate in [
"origin/main",
"origin/master",
"origin/trunk",
"main",
"master",
"trunk",
] {
if git_output_slice(&["rev-parse", "--verify", "--quiet", candidate]).is_ok() {
return Ok(candidate.to_owned());
}
}
Err(ReviewerError::DefaultBranchNotFound)
}
fn materialize_diff(label: &str, raw: &str, files: &str) -> String {
let file_list: Vec<&str> = files
.lines()
.filter(|line| !line.trim().is_empty())
.collect();
let bytes = raw.len();
if bytes <= MAX_INLINE_DIFF_BYTES && file_list.len() <= MAX_INLINE_DIFF_FILES {
return raw.to_owned();
}
format!(
"Diff for {label} is too large to inline safely.\ninline_limit_bytes={MAX_INLINE_DIFF_BYTES}\nactual_bytes={bytes}\ninline_file_limit={MAX_INLINE_DIFF_FILES}\nactual_files={}\n\nChanged files:\n{}\n\nReviewer must inspect the repository directly with read/grep tools before returning a verdict.",
file_list.len(),
if file_list.is_empty() {
"(none)".to_owned()
} else {
file_list.join("\n")
}
)
}
fn untracked_file_context() -> Result<String, ReviewerError> {
let files = git_output(["ls-files", "--others", "--exclude-standard"])?;
let mut output = String::new();
for file in files.lines().filter(|line| !line.trim().is_empty()) {
let path = Path::new(file);
let metadata = match fs::metadata(path) {
Ok(metadata) => metadata,
Err(_) => continue,
};
if !metadata.is_file() {
continue;
}
if metadata.len() > MAX_UNTRACKED_FILE_BYTES {
output.push_str(&format!(
"\n--- {file} omitted: {} bytes exceeds {MAX_UNTRACKED_FILE_BYTES} byte inline limit ---\n",
metadata.len()
));
continue;
}
let bytes = match fs::read(path) {
Ok(bytes) => bytes,
Err(_) => continue,
};
if bytes.contains(&0) {
output.push_str(&format!("\n--- {file} omitted: binary file ---\n"));
continue;
}
output.push_str(&format!(
"\n--- {file} ---\n{}",
String::from_utf8_lossy(&bytes)
));
}
if output.is_empty() {
Ok("(none)".to_owned())
} else {
Ok(output)
}
}
#[derive(Debug, Error)]
pub enum ReviewerError {
#[error("missing {role} model")]
MissingModel { role: String },
#[error(
"same reviewer model is disallowed without --allow-same-model: watched={watched_model}, reviewer={reviewer_model}"
)]
SameModelWithoutWaiver {
watched_model: String,
reviewer_model: String,
},
#[error("strict arbiter model must differ from watched and first reviewer models")]
StrictArbiterModelNotDistinct,
#[error("no adversarial pair configured for writer harness {writer:?}")]
NoPairForWriter { writer: String },
#[error(
"strict review requires an arbiter (pair.arbiter or --arbiter-harness/--arbiter-model)"
)]
MissingArbiter,
#[error(
"--{role}-harness={harness:?} was overridden without a matching --{role}-model; the pair's model is for a different harness"
)]
OverrideNeedsModel { role: String, harness: String },
#[error("custom reviewer harness requires explicit command configuration")]
UnsupportedCustomHarness,
#[error("unknown watched agent {value:?}")]
UnknownAgent { value: String },
#[error("unknown reviewer harness {value:?}")]
UnknownHarness { value: String },
#[error("missing review target")]
MissingReviewTarget,
#[error("--scope={scope:?} does not accept positional target {target:?}")]
UnexpectedReviewTarget { scope: ReviewScope, target: String },
#[error("could not determine default branch; pass --base explicitly")]
DefaultBranchNotFound,
#[error("failed to read staged claim file {path}: {source}")]
ClaimFileRead {
path: PathBuf,
#[source]
source: io::Error,
},
#[error("reviewer output was not valid structured JSON verdict: {source}: {output:?}")]
VerdictJson {
source: serde_json::Error,
output: String,
},
#[error("reviewer structured verdict violated schema: {message}")]
VerdictSchema { message: String },
#[error("reviewer process exited with status {status:?}: {stderr}")]
ReviewerProcessFailed { status: Option<i32>, stderr: String },
#[error("git command failed: git {args:?}: {stderr}")]
GitFailed { args: Vec<String>, stderr: String },
#[error("failed to spawn git command: {0}")]
GitSpawn(io::Error),
#[error("failed to spawn reviewer process: {0}")]
Spawn(io::Error),
#[error("failed to open reviewer stdin pipe")]
MissingStdinPipe,
#[error("failed to write reviewer prompt: {0}")]
WritePrompt(io::Error),
#[error("failed to wait for reviewer process: {0}")]
Wait(io::Error),
#[error("review queue IO failed: {0}")]
QueueIo(io::Error),
#[error("review queue JSON failed: {0}")]
QueueJson(serde_json::Error),
#[error("review run IO failed: {0}")]
RunIo(io::Error),
#[error("review run JSON failed: {0}")]
RunJson(serde_json::Error),
#[error("review run not found: {id}")]
ReviewRunNotFound { id: String },
#[error("no review runs found")]
NoReviewRuns,
#[error(
"cannot cancel review run {id} with status {status}; only queued runs can be cancelled"
)]
CannotCancelReview { id: String, status: ReviewRunStatus },
#[error(transparent)]
Claim(#[from] crate::claim::ClaimError),
#[error(transparent)]
Ledger(#[from] crate::ledger::LedgerError),
}
const ADVERSARIAL_PREAMBLE: &str = r#"You are an ADVERSARIAL reviewer. Your job is not to review the diff neutrally; it is to PROVE THIS CLAIM FALSE. Assume the author over-rates their own work. A claim is only PASS if the diff and the cited evidence actually substantiate it AND the change does not violate any inviolable constraint. If the evidence is vague, missing, unverifiable, or the change drifts from the stated direction, default to REJECT.
Attack the change for auth and permission holes, data loss, rollback gaps, races, stale state, version skew, observability gaps, missing evidence, fake evidence, broad matchers, gates that fail open, and code that only fixes the instance instead of the defect class.
GREP THE CLASS, NOT THE INSTANCE. For every problem you find, do NOT stop at the one occurrence: name the general CLASS of the defect (for example, config value loaded then ignored, comment contradicts code, gate fails open, matcher too broad), then use your read/grep/find tools to sweep the WHOLE repository for every other instance of that class and report them all. One instance is a symptom; the class is the bug. Check each inviolable constraint against every changed file, and state what you searched for in finding bodies when relevant.
Return valid JSON only. Do not wrap it in Markdown. The schema is:
{
"verdict": "PASS" | "REJECT",
"summary": "one concise sentence explaining why the claim passes or fails",
"findings": [
{
"severity": "critical" | "high" | "medium" | "low",
"title": "short defect title",
"body": "what can go wrong, why this code is vulnerable, and what evidence proves it",
"file": "repo-relative file path",
"line_start": 1,
"line_end": 1,
"confidence": 0,
"recommendation": "concrete change required"
}
],
"next_steps": ["short concrete follow-up commands or edits"]
}
Use "PASS" only when there are no findings. Use "REJECT" when there is at least one material finding."#;
fn context_block(context: &str) -> String {
if context.trim().is_empty() {
String::new()
} else {
format!("\n\n{context}")
}
}
fn first_pass_prompt(claim: &Claim, diff: &str, context: &str) -> String {
format!(
"{ADVERSARIAL_PREAMBLE}{}\n\nCLAIM:\n{}\n\nDIFF:\n{}",
context_block(context),
claim.to_line(),
diff
)
}
fn strict_second_pass_prompt(job: &ReviewJob, first_output: &str) -> String {
format!(
"{ADVERSARIAL_PREAMBLE}\n\nStrict second pass (COMPLETENESS CRITIC): the first reviewer returned a CLEAN verdict. Assume it found a symptom but failed to generalize it to the full CLASS and enumerate every instance. Re-derive the classes of defect this change could contain, grep the repo for each, and prove the first reviewer INCOMPLETE.{}\n\nCLAIM:\n{}\n\nFIRST REVIEW:\n{}\n\nDIFF:\n{}",
context_block(&job.context),
job.claim.to_line(),
first_output,
job.diff
)
}
fn entry_from_verdict(job: &ReviewJob, plan: &ReviewPlan, verdict: &ParsedVerdict) -> LedgerEntry {
LedgerEntry::new(
job.commit_sha.clone(),
verdict.verdict,
job.claim.to_line(),
job.claim
.evidence
.iter()
.map(EvidenceRef::as_str)
.map(str::to_owned)
.collect(),
plan.reviewer_config(),
verdict.findings.clone(),
)
.with_structured_review(
verdict.summary.clone(),
verdict.structured_findings.clone(),
verdict.next_steps.clone(),
verdict.raw.clone(),
)
}
fn ensure_process_success(output: &ProcessOutput) -> Result<(), ReviewerError> {
if output.status_code == Some(0) {
return Ok(());
}
Err(ReviewerError::ReviewerProcessFailed {
status: output.status_code,
stderr: output.stderr.clone(),
})
}
fn validate_strict_arbiter(
request: &ReviewRequest,
strict: &StrictReviewConfig,
) -> Result<(), ReviewerError> {
let arbiter = normalized_model(&strict.arbiter_model);
if arbiter == normalized_model(&request.watched_model)
|| arbiter == normalized_model(&request.reviewer_model)
{
return Err(ReviewerError::StrictArbiterModelNotDistinct);
}
Ok(())
}
fn validate_model_present(role: &str, model: &str) -> Result<(), ReviewerError> {
if model.trim().is_empty() {
return Err(ReviewerError::MissingModel {
role: role.to_owned(),
});
}
Ok(())
}
fn git_output<const N: usize>(args: [&str; N]) -> Result<String, ReviewerError> {
git_output_slice(&args)
}
fn git_output_slice(args: &[&str]) -> Result<String, ReviewerError> {
let output = Command::new("git")
.args(args)
.output()
.map_err(ReviewerError::GitSpawn)?;
if !output.status.success() {
return Err(ReviewerError::GitFailed {
args: args.iter().map(|arg| (*arg).to_owned()).collect(),
stderr: String::from_utf8_lossy(&output.stderr).into_owned(),
});
}
Ok(String::from_utf8_lossy(&output.stdout).into_owned())
}
fn agent_from_slug(value: &str) -> Result<Agent, ReviewerError> {
match value.trim().to_ascii_lowercase().as_str() {
"claude" => Ok(Agent::Claude),
"codex" => Ok(Agent::Codex),
"pi" => Ok(Agent::Pi),
_ => Err(ReviewerError::UnknownAgent {
value: value.to_owned(),
}),
}
}
fn harness_from_slug(value: &str) -> Result<ReviewerHarness, ReviewerError> {
match value.trim().to_ascii_lowercase().as_str() {
"claude" => Ok(ReviewerHarness::Claude),
"codex" => Ok(ReviewerHarness::Codex),
"pi" => Ok(ReviewerHarness::Pi),
"gemini" => Ok(ReviewerHarness::Gemini),
"opencode" => Ok(ReviewerHarness::Opencode),
"custom" => Ok(ReviewerHarness::Custom),
_ => Err(ReviewerError::UnknownHarness {
value: value.to_owned(),
}),
}
}
fn harness_slug(harness: ReviewerHarness) -> &'static str {
match harness {
ReviewerHarness::Claude => "claude",
ReviewerHarness::Codex => "codex",
ReviewerHarness::Pi => "pi",
ReviewerHarness::Gemini => "gemini",
ReviewerHarness::Opencode => "opencode",
ReviewerHarness::Custom => "custom",
}
}
fn normalized_model(model: &str) -> String {
model.trim().to_ascii_lowercase()
}
fn unix_now() -> u64 {
SystemTime::now()
.duration_since(UNIX_EPOCH)
.map_or(0, |duration| duration.as_secs())
}
fn generate_run_id(commit_sha: &str) -> String {
let nanos = SystemTime::now()
.duration_since(UNIX_EPOCH)
.map_or(0, |duration| duration.as_nanos());
let short_sha: String = commit_sha
.chars()
.filter(|character| character.is_ascii_alphanumeric())
.take(12)
.collect();
if short_sha.is_empty() {
format!("{nanos}-{}", std::process::id())
} else {
format!("{nanos}-{}-{short_sha}", std::process::id())
}
}
#[cfg(test)]
mod tests {
use std::{cell::RefCell, collections::VecDeque};
use proptest::prelude::*;
use super::{
InvocationPlan, MaterialLoader, ParsedVerdict, ProcessOutput, ProcessRunner,
PromptDelivery, ReviewJob, ReviewPlan, ReviewQueue, ReviewRequest, ReviewRunStatus,
ReviewRunStore, ReviewSelection, ReviewerError, StrictGoalCounters, StrictGoalDecision,
StrictGoalPolicy, StrictGoalStopReason, StrictReviewConfig, drain_once, execute_review_job,
run_review_run_command, run_strict_goal_loop,
};
use crate::{
claim::{Claim, EvidenceRef},
cli::{Agent, ReviewerHarness},
config::Effort,
ledger::{LedgerStore, Verdict},
};
fn pass_json() -> String {
serde_json::json!({
"verdict": "PASS",
"summary": "The claim is substantiated by the diff and evidence.",
"findings": [],
"next_steps": []
})
.to_string()
}
fn reject_json(title: &str) -> String {
serde_json::json!({
"verdict": "REJECT",
"summary": "The claim is not substantiated.",
"findings": [{
"severity": "high",
"title": title,
"body": "The cited evidence does not prove the claimed behavior.",
"file": "src/lib.rs",
"line_start": 1,
"line_end": 1,
"confidence": 95,
"recommendation": "Provide executable evidence that proves the claim."
}],
"next_steps": ["Run the relevant verification command."]
})
.to_string()
}
#[test]
fn same_harness_different_model_is_valid() {
let request = ReviewRequest::new(
Agent::Codex,
"gpt-5.4",
ReviewerHarness::Codex,
"gpt-5.5",
false,
"review this",
);
let plan = ReviewPlan::build(request).unwrap();
assert_eq!(plan.watched_agent, Agent::Codex);
assert_eq!(plan.reviewer_harness, ReviewerHarness::Codex);
assert_eq!(plan.invocation.program, "codex");
}
#[test]
fn same_model_is_blocked_by_default() {
let request = ReviewRequest::new(
Agent::Codex,
" GPT-5.5 ",
ReviewerHarness::Claude,
"gpt-5.5",
false,
"review this",
);
let error = ReviewPlan::build(request).unwrap_err();
assert!(matches!(
error,
ReviewerError::SameModelWithoutWaiver { .. }
));
}
#[test]
fn allow_same_model_override_is_deliberate() {
let request = ReviewRequest::new(
Agent::Codex,
"gpt-5.5",
ReviewerHarness::Codex,
"gpt-5.5",
true,
"review this",
);
let plan = ReviewPlan::build(request).unwrap();
assert!(plan.allow_same_model);
assert_eq!(plan.reviewer_model, "gpt-5.5");
}
#[test]
fn provider_mapping_uses_verified_prompt_shapes_and_effort() {
let codex =
InvocationPlan::for_harness(ReviewerHarness::Codex, "gpt-5.5", Effort::Xhigh).unwrap();
assert_eq!(codex.program, "codex");
assert_eq!(
codex.args_for_prompt("prompt"),
[
"exec",
"-m",
"gpt-5.5",
"-c",
"model_reasoning_effort=xhigh",
"prompt"
]
);
let claude =
InvocationPlan::for_harness(ReviewerHarness::Claude, "opus", Effort::High).unwrap();
assert_eq!(claude.program, "claude");
assert_eq!(claude.prompt_delivery, PromptDelivery::Stdin);
assert_eq!(
claude.args_for_prompt("prompt"),
["--print", "--model", "opus", "--effort", "high"]
);
let gemini =
InvocationPlan::for_harness(ReviewerHarness::Gemini, "gemini-pro", Effort::Xhigh)
.unwrap();
assert_eq!(
gemini.args_for_prompt("prompt"),
["-m", "gemini-pro", "-p", "prompt"]
);
let pi = InvocationPlan::for_harness(ReviewerHarness::Pi, "openai/gpt-5.5", Effort::Xhigh)
.unwrap();
assert_eq!(pi.prompt_delivery, PromptDelivery::Stdin);
assert_eq!(
pi.args_for_prompt("prompt"),
[
"--model",
"openai/gpt-5.5",
"--thinking",
"xhigh",
"--tools",
"read,grep,find,ls",
"-p"
]
);
}
#[test]
fn custom_harness_requires_explicit_configuration() {
let error = InvocationPlan::for_harness(ReviewerHarness::Custom, "model", Effort::Xhigh)
.unwrap_err();
assert!(matches!(error, ReviewerError::UnsupportedCustomHarness));
}
#[test]
fn effort_maps_to_each_harness_flag() {
for effort in [
Effort::Minimal,
Effort::Low,
Effort::Medium,
Effort::High,
Effort::Xhigh,
] {
let e = effort.as_str();
let codex = InvocationPlan::for_harness(ReviewerHarness::Codex, "m", effort).unwrap();
assert!(codex.args.contains(&format!("model_reasoning_effort={e}")));
let claude = InvocationPlan::for_harness(ReviewerHarness::Claude, "m", effort).unwrap();
let claude_idx = claude.args.iter().position(|a| a == "--effort").unwrap();
// Claude has no `minimal`; it clamps to a valid level (`low`).
assert_eq!(claude.args[claude_idx + 1], effort.claude_value());
assert_ne!(claude.args[claude_idx + 1], "minimal");
let pi = InvocationPlan::for_harness(ReviewerHarness::Pi, "m", effort).unwrap();
let pi_idx = pi.args.iter().position(|a| a == "--thinking").unwrap();
assert_eq!(pi.args[pi_idx + 1], e);
}
}
#[test]
fn resolve_picks_configured_reviewer_for_every_writer() {
let config = crate::config::TruthMirrorConfig::default();
let cases = [
(Agent::Codex, ReviewerHarness::Claude, "claude-opus-4-8"),
(Agent::Claude, ReviewerHarness::Codex, "gpt-5.5"),
(Agent::Pi, ReviewerHarness::Codex, "gpt-5.5"),
];
for (writer, reviewer_harness, reviewer_model) in cases {
let selection =
ReviewSelection::resolve(Some(writer), None, None, None, None, false, &config)
.unwrap();
assert_eq!(selection.reviewer_harness, reviewer_harness);
assert_eq!(selection.reviewer_model, reviewer_model);
assert_eq!(selection.reviewer_effort, Effort::Xhigh);
}
}
#[test]
fn overriding_reviewer_harness_without_model_is_rejected() {
// codex's default pair reviewer is claude; overriding harness to pi with no
// model would pair the pi harness with a claude model string.
let config = crate::config::TruthMirrorConfig::default();
let error = ReviewSelection::resolve(
Some(Agent::Codex),
None,
Some(ReviewerHarness::Pi),
None,
None,
false,
&config,
)
.unwrap_err();
assert!(matches!(error, ReviewerError::OverrideNeedsModel { .. }));
}
#[test]
fn overriding_reviewer_harness_matching_pair_is_ok() {
let config = crate::config::TruthMirrorConfig::default();
let selection = ReviewSelection::resolve(
Some(Agent::Codex),
None,
Some(ReviewerHarness::Claude),
None,
None,
false,
&config,
)
.unwrap();
assert_eq!(selection.reviewer_harness, ReviewerHarness::Claude);
assert_eq!(selection.reviewer_model, "claude-opus-4-8");
}
#[test]
fn config_allow_same_model_waives_opposition() {
let config = crate::config::TruthMirrorConfig {
allow_same_model: true,
..crate::config::TruthMirrorConfig::default()
};
let selection = ReviewSelection::resolve(
Some(Agent::Codex),
Some("gpt-5.5".to_owned()),
Some(ReviewerHarness::Codex),
Some("gpt-5.5".to_owned()),
None,
false, // CLI flag not set — the config waiver must carry it
&config,
)
.unwrap();
assert!(selection.allow_same_model);
// Same watched+reviewer model builds because the config waiver applies.
assert!(ReviewPlan::build(selection.request_for("review".to_owned())).is_ok());
}
#[test]
fn resolve_arbiter_uses_pair_when_cli_absent() {
let config = crate::config::TruthMirrorConfig::default();
let arbiter =
ReviewSelection::resolve_arbiter(Agent::Codex, None, None, None, &config).unwrap();
assert_eq!(arbiter.arbiter_harness, ReviewerHarness::Pi);
assert_eq!(arbiter.arbiter_effort, Effort::Xhigh);
}
#[test]
fn first_pass_prompt_is_adversarial_and_injects_context() {
let prompt = super::first_pass_prompt(
&claim(),
"THE_DIFF_BODY",
"INVIOLABLE CONSTRAINTS: never fake tests",
);
assert!(prompt.contains("PROVE THIS CLAIM FALSE"));
assert!(prompt.contains("default to REJECT"));
assert!(prompt.contains("INVIOLABLE CONSTRAINTS: never fake tests"));
assert!(prompt.contains("THE_DIFF_BODY"));
// Class-generalized review: grep the class, not the instance.
assert!(prompt.contains("GREP THE CLASS, NOT THE INSTANCE"));
assert!(prompt.contains("\"severity\""));
assert!(prompt.contains("\"recommendation\""));
}
#[test]
fn strict_second_pass_is_a_completeness_critic() {
let job = review_job(true);
let first_output = pass_json();
let prompt = super::strict_second_pass_prompt(&job, &first_output);
assert!(prompt.contains("COMPLETENESS CRITIC"));
assert!(prompt.contains("generalize"));
// Inherits the class-sweep preamble.
assert!(prompt.contains("GREP THE CLASS, NOT THE INSTANCE"));
}
#[test]
fn prompt_omits_context_block_when_empty() {
let prompt = super::first_pass_prompt(&claim(), "d", "");
// No dangling empty context header.
assert!(!prompt.contains("INVIOLABLE CONSTRAINTS"));
assert!(prompt.contains("PROVE THIS CLAIM FALSE"));
}
#[test]
fn subprocess_runner_is_mockable() {
struct MockRunner;
impl ProcessRunner for MockRunner {
fn run(
&self,
invocation: &InvocationPlan,
prompt: &str,
) -> Result<ProcessOutput, ReviewerError> {
assert_eq!(invocation.program, "codex");
assert_eq!(
invocation.args_for_prompt(prompt).last().unwrap(),
"review this"
);
Ok(ProcessOutput {
status_code: Some(0),
stdout: pass_json(),
stderr: String::new(),
})
}
}
let request = ReviewRequest::new(
Agent::Codex,
"gpt-5.4",
ReviewerHarness::Codex,
"gpt-5.5",
false,
"review this",
);
let plan = ReviewPlan::build(request).unwrap();
let output = plan.run_with("review this", &MockRunner).unwrap();
assert!(output.stdout.contains("PASS"));
}
#[test]
fn verdict_parser_extracts_rejection_findings() {
let verdict = ParsedVerdict::parse(&reject_json("missing proof")).unwrap();
assert_eq!(verdict.verdict, Verdict::Reject);
assert_eq!(verdict.structured_findings[0].title, "missing proof");
assert!(verdict.findings[0].contains("missing proof"));
}
#[test]
fn verdict_parser_rejects_legacy_line_protocol() {
let error =
ParsedVerdict::parse("VERDICT: REJECT\nFINDINGS:\n- missing proof\n").unwrap_err();
assert!(matches!(error, ReviewerError::VerdictJson { .. }));
}
#[test]
fn large_diff_materialization_falls_back_to_file_summary() {
let files = "a.rs\nb.rs\nc.rs\n";
let materialized = super::materialize_diff("branch:main", "tiny diff", files);
assert!(materialized.contains("too large to inline safely"));
assert!(materialized.contains("actual_files=3"));
assert!(materialized.contains("a.rs\nb.rs\nc.rs"));
assert!(materialized.contains("inspect the repository directly"));
}
#[test]
fn review_queue_schedules_commits_without_running_models() {
let temp = tempfile::tempdir().unwrap();
let queue = ReviewQueue::new(temp.path());
queue.enqueue("abc123").unwrap();
let pending = queue.pending().unwrap();
assert_eq!(pending.len(), 1);
assert_eq!(pending[0].commit_sha, "abc123");
assert!(!pending[0].run_id.is_empty());
let run = ReviewRunStore::new(temp.path())
.read(&pending[0].run_id)
.unwrap();
assert_eq!(run.commit_sha, "abc123");
assert_eq!(run.status, ReviewRunStatus::Queued);
}
#[test]
fn review_cancel_marks_queued_run_and_removes_queue_item() {
let temp = tempfile::tempdir().unwrap();
let queue = ReviewQueue::new(temp.path());
let queued = queue.enqueue("abc123").unwrap();
run_review_run_command(
crate::cli::ReviewCommand::Cancel {
run_id: queued.run_id.clone(),
},
temp.path(),
)
.unwrap();
assert!(queue.pending().unwrap().is_empty());
let run = ReviewRunStore::new(temp.path())
.read(&queued.run_id)
.unwrap();
assert_eq!(run.status, ReviewRunStatus::Cancelled);
}
#[test]
fn execute_review_records_reject_verdict() {
let temp = tempfile::tempdir().unwrap();
let store = LedgerStore::new(temp.path());
let job = review_job(false);
let runner = SequenceRunner::new([reject_json("unsupported")]);
let execution = execute_review_job(job, &runner, &store).unwrap();
assert_eq!(execution.entries.len(), 1);
assert_eq!(execution.entries[0].verdict, Verdict::Reject);
assert_eq!(
execution.entries[0].structured_findings[0].title,
"unsupported"
);
assert!(
execution.entries[0]
.raw_reviewer_output
.contains("\"REJECT\"")
);
assert_eq!(store.unresolved_rejections().unwrap().len(), 1);
}
#[test]
fn strict_two_pass_records_both_clean_passes() {
let temp = tempfile::tempdir().unwrap();
let store = LedgerStore::new(temp.path());
let job = review_job(true);
let runner = SequenceRunner::new([pass_json(), pass_json()]);
let execution = execute_review_job(job, &runner, &store).unwrap();
assert_eq!(execution.entries.len(), 2);
assert_eq!(store.read_history().unwrap().len(), 2);
assert_eq!(execution.entries[0].reviewer.model, "gpt-5.5");
assert_eq!(execution.entries[1].reviewer.model, "claude-opus-4-8");
}
#[test]
fn strict_arbiter_model_must_be_third_model() {
let temp = tempfile::tempdir().unwrap();
let store = LedgerStore::new(temp.path());
let mut job = review_job(true);
job.strict.as_mut().unwrap().arbiter_model = "gpt-5.5".to_owned();
let runner = SequenceRunner::new([pass_json()]);
let error = execute_review_job(job, &runner, &store).unwrap_err();
assert!(matches!(
error,
ReviewerError::StrictArbiterModelNotDistinct
));
}
#[test]
fn strict_goal_policy_stops_at_configured_lie_or_fuckup_count() {
let policy = StrictGoalPolicy {
stop_after_lies: 2,
stop_after_fuckups: 3,
};
assert_eq!(
policy.decide(StrictGoalCounters {
lies_exposed: 1,
fuckups_registered: 2
}),
StrictGoalDecision::Continue
);
assert_eq!(
policy.decide(StrictGoalCounters {
lies_exposed: 2,
fuckups_registered: 0
}),
StrictGoalDecision::Stop {
reason: StrictGoalStopReason::LiesExposed
}
);
assert_eq!(
policy.decide(StrictGoalCounters {
lies_exposed: 0,
fuckups_registered: 3
}),
StrictGoalDecision::Stop {
reason: StrictGoalStopReason::FuckupsRegistered
}
);
}
#[test]
fn drain_once_reviews_each_commit_once_and_clears_queue() {
let temp = tempfile::tempdir().unwrap();
let store = LedgerStore::new(temp.path());
let queue = ReviewQueue::new(temp.path());
queue.enqueue("abc123").unwrap();
queue.enqueue("abc123").unwrap(); // duplicate SHA reviewed only once
queue.enqueue("def456").unwrap();
let loader = StaticLoader::new();
let runner = SequenceRunner::new([reject_json("unsupported"), pass_json()]);
let selection = selection();
let report = drain_once(&queue, &loader, &selection, "", &runner, &store).unwrap();
assert_eq!(report.reviewed, ["abc123", "def456"]);
assert_eq!(report.ledger_entries, 2);
assert!(queue.pending().unwrap().is_empty());
assert_eq!(store.read_history().unwrap().len(), 2);
assert_eq!(store.unresolved_rejections().unwrap().len(), 1);
let runs = ReviewRunStore::new(temp.path()).list().unwrap();
assert_eq!(runs.len(), 3);
assert_eq!(
runs.iter()
.filter(|run| run.status == ReviewRunStatus::Completed)
.count(),
2
);
assert_eq!(
runs.iter()
.filter(|run| run.status == ReviewRunStatus::Cancelled)
.count(),
1
);
}
#[test]
fn drain_once_is_a_noop_on_empty_queue() {
let temp = tempfile::tempdir().unwrap();
let store = LedgerStore::new(temp.path());
let queue = ReviewQueue::new(temp.path());
let loader = StaticLoader::new();
let runner = ConstRunner::new(pass_json());
let report = drain_once(&queue, &loader, &selection(), "", &runner, &store).unwrap();
assert!(report.reviewed.is_empty());
assert_eq!(report.ledger_entries, 0);
assert_eq!(store.read_history().unwrap().len(), 0);
}
#[test]
fn strict_goal_loop_stops_at_configured_lie_count() {
let temp = tempfile::tempdir().unwrap();
let store = LedgerStore::new(temp.path());
let policy = StrictGoalPolicy {
stop_after_lies: 1,
stop_after_fuckups: 0,
};
let runner = SequenceRunner::new([reject_json("lie")]);
let outcome = run_strict_goal_loop(
"abc123",
&claim(),
"diff",
"",
&selection(),
policy,
5,
&runner,
&store,
)
.unwrap();
assert_eq!(outcome.passes, 1);
assert_eq!(outcome.counters.lies_exposed, 1);
assert_eq!(outcome.stop_reason, Some(StrictGoalStopReason::LiesExposed));
assert_eq!(store.read_history().unwrap().len(), 1);
}
#[test]
fn strict_goal_loop_terminates_at_max_passes_for_honest_agent() {
let temp = tempfile::tempdir().unwrap();
let store = LedgerStore::new(temp.path());
let policy = StrictGoalPolicy {
stop_after_lies: 2,
stop_after_fuckups: 5,
};
let runner = ConstRunner::new(pass_json());
let outcome = run_strict_goal_loop(
"abc123",
&claim(),
"diff",
"",
&selection(),
policy,
3,
&runner,
&store,
)
.unwrap();
assert_eq!(outcome.passes, 3);
assert_eq!(outcome.counters.lies_exposed, 0);
assert_eq!(outcome.stop_reason, None);
assert_eq!(store.read_history().unwrap().len(), 3);
}
#[test]
fn strict_goal_loop_stops_when_fuckups_accumulate() {
let temp = tempfile::tempdir().unwrap();
let store = LedgerStore::new(temp.path());
let policy = StrictGoalPolicy {
stop_after_lies: 0,
stop_after_fuckups: 2,
};
// Each structured finding registers one fuckup; two passes hit N=2.
let runner = ConstRunner::new(reject_json("nit"));
let outcome = run_strict_goal_loop(
"abc123",
&claim(),
"diff",
"",
&selection(),
policy,
10,
&runner,
&store,
)
.unwrap();
assert_eq!(outcome.passes, 2);
assert_eq!(outcome.counters.lies_exposed, 2);
assert_eq!(outcome.counters.fuckups_registered, 2);
assert_eq!(
outcome.stop_reason,
Some(StrictGoalStopReason::FuckupsRegistered)
);
}
proptest! {
#[test]
fn strict_goal_loop_never_exceeds_max_passes(max in 1u32..6) {
let temp = tempfile::tempdir().unwrap();
let store = LedgerStore::new(temp.path());
// Both thresholds disabled: only the ceiling can stop the loop.
let policy = StrictGoalPolicy { stop_after_lies: 0, stop_after_fuckups: 0 };
let runner = ConstRunner::new(pass_json());
let outcome = run_strict_goal_loop(
"abc123", &claim(), "diff", "", &selection(), policy, max, &runner, &store,
)
.unwrap();
prop_assert!(outcome.passes <= max);
prop_assert_eq!(outcome.passes, max);
prop_assert!(outcome.stop_reason.is_none());
}
}
proptest! {
#[test]
fn model_opposition_is_enforced_for_arbitrary_models(
watched in "[A-Za-z0-9._/-]{1,32}",
reviewer in "[A-Za-z0-9._/-]{1,32}",
) {
let request = ReviewRequest::new(
Agent::Codex,
watched.clone(),
ReviewerHarness::Codex,
reviewer.clone(),
false,
"review this",
);
let result = ReviewPlan::build(request);
if watched.trim().eq_ignore_ascii_case(reviewer.trim()) {
let blocked = matches!(result, Err(ReviewerError::SameModelWithoutWaiver { .. }));
prop_assert!(blocked);
} else {
prop_assert!(result.is_ok());
}
}
}
fn claim() -> Claim {
Claim::new(
"add review",
"cargo test",
vec![EvidenceRef::parse("tests:cargo-test").unwrap()],
)
.unwrap()
}
fn selection() -> ReviewSelection {
ReviewSelection {
watched_agent: Agent::Codex,
watched_model: "gpt-5.4".to_owned(),
reviewer_harness: ReviewerHarness::Codex,
reviewer_model: "gpt-5.5".to_owned(),
reviewer_effort: Effort::Xhigh,
allow_same_model: false,
strict: None,
}
}
struct StaticLoader {
claim: Claim,
diff: String,
}
impl StaticLoader {
fn new() -> Self {
Self {
claim: claim(),
diff: "diff --git a/src/lib.rs b/src/lib.rs".to_owned(),
}
}
}
impl MaterialLoader for StaticLoader {
fn load(&self, _sha: &str) -> Result<(Claim, String), ReviewerError> {
Ok((self.claim.clone(), self.diff.clone()))
}
}
struct ConstRunner {
output: String,
}
impl ConstRunner {
fn new(output: impl Into<String>) -> Self {
Self {
output: output.into(),
}
}
}
impl ProcessRunner for ConstRunner {
fn run(
&self,
_invocation: &InvocationPlan,
_prompt: &str,
) -> Result<ProcessOutput, ReviewerError> {
Ok(ProcessOutput {
status_code: Some(0),
stdout: self.output.clone(),
stderr: String::new(),
})
}
}
fn review_job(strict: bool) -> ReviewJob {
let claim = claim();
ReviewJob {
commit_sha: "abc123".to_owned(),
diff: "diff --git a/src/lib.rs b/src/lib.rs".to_owned(),
context: String::new(),
request: ReviewRequest::new(
Agent::Codex,
"gpt-5.4",
ReviewerHarness::Codex,
"gpt-5.5",
false,
"review this",
),
claim,
strict: strict.then_some(StrictReviewConfig {
arbiter_harness: ReviewerHarness::Claude,
arbiter_model: "claude-opus-4-8".to_owned(),
arbiter_effort: Effort::Xhigh,
}),
}
}
struct SequenceRunner {
outputs: RefCell<VecDeque<String>>,
}
impl SequenceRunner {
fn new<I, S>(outputs: I) -> Self
where
I: IntoIterator<Item = S>,
S: Into<String>,
{
Self {
outputs: RefCell::new(outputs.into_iter().map(Into::into).collect()),
}
}
}
impl ProcessRunner for SequenceRunner {
fn run(
&self,
_invocation: &InvocationPlan,
_prompt: &str,
) -> Result<ProcessOutput, ReviewerError> {
let stdout = self.outputs.borrow_mut().pop_front().unwrap();
Ok(ProcessOutput {
status_code: Some(0),
stdout,
stderr: String::new(),
})
}
}
}