use std::{
fs,
io::{self, Write},
path::{Path, PathBuf},
process::{Command, ExitCode, Stdio},
time::{SystemTime, UNIX_EPOCH},
};
use anyhow::{Context, Result};
use serde::{Deserialize, Serialize};
use thiserror::Error;
use crate::{
claim::{Claim, EvidenceRef},
cli::{self, Agent, ReviewerHarness},
config,
ledger::{LedgerEntry, LedgerStore, ReviewerConfig, Verdict},
};
pub const REVIEW_QUEUE_FILE: &str = "review-queue.jsonl";
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct ReviewRequest {
pub watched_agent: Agent,
pub watched_model: String,
pub reviewer_harness: ReviewerHarness,
pub reviewer_model: String,
pub allow_same_model: bool,
pub prompt: String,
}
impl ReviewRequest {
pub fn new(
watched_agent: Agent,
watched_model: impl Into<String>,
reviewer_harness: ReviewerHarness,
reviewer_model: impl Into<String>,
allow_same_model: bool,
prompt: impl Into<String>,
) -> Self {
Self {
watched_agent,
watched_model: watched_model.into(),
reviewer_harness,
reviewer_model: reviewer_model.into(),
allow_same_model,
prompt: prompt.into(),
}
}
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct ReviewSelection {
pub watched_agent: Agent,
pub watched_model: String,
pub reviewer_harness: ReviewerHarness,
pub reviewer_model: String,
pub allow_same_model: bool,
pub strict: Option<StrictReviewConfig>,
}
impl ReviewSelection {
#[allow(clippy::too_many_arguments)]
pub fn resolve(
watched_agent: Option<Agent>,
reviewer_harness: Option<ReviewerHarness>,
watched_model: Option<String>,
reviewer_model: Option<String>,
allow_same_model: bool,
strict: Option<StrictReviewConfig>,
config: &config::TruthMirrorConfig,
) -> Result<Self, ReviewerError> {
let watched_agent = match watched_agent {
Some(agent) => agent,
None => agent_from_slug(&config.review.watched.harness)?,
};
let reviewer_harness = match reviewer_harness {
Some(harness) => harness,
None => harness_from_slug(&config.review.reviewer.harness)?,
};
let watched_model = watched_model.unwrap_or_else(|| config.review.watched.model.clone());
let reviewer_model = reviewer_model.unwrap_or_else(|| config.review.reviewer.model.clone());
Ok(Self {
watched_agent,
watched_model,
reviewer_harness,
reviewer_model,
allow_same_model,
strict,
})
}
fn request_for(&self, prompt: String) -> ReviewRequest {
ReviewRequest::new(
self.watched_agent,
self.watched_model.clone(),
self.reviewer_harness,
self.reviewer_model.clone(),
self.allow_same_model,
prompt,
)
}
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct ReviewPlan {
pub watched_agent: Agent,
pub watched_model: String,
pub reviewer_harness: ReviewerHarness,
pub reviewer_model: String,
pub allow_same_model: bool,
pub invocation: InvocationPlan,
}
impl ReviewPlan {
pub fn build(request: ReviewRequest) -> Result<Self, ReviewerError> {
validate_model_present("watched", &request.watched_model)?;
validate_model_present("reviewer", &request.reviewer_model)?;
if !request.allow_same_model
&& normalized_model(&request.watched_model) == normalized_model(&request.reviewer_model)
{
return Err(ReviewerError::SameModelWithoutWaiver {
watched_model: request.watched_model,
reviewer_model: request.reviewer_model,
});
}
let invocation =
InvocationPlan::for_harness(request.reviewer_harness, &request.reviewer_model)?;
Ok(Self {
watched_agent: request.watched_agent,
watched_model: request.watched_model,
reviewer_harness: request.reviewer_harness,
reviewer_model: request.reviewer_model,
allow_same_model: request.allow_same_model,
invocation,
})
}
pub fn run_with<R: ProcessRunner>(
&self,
prompt: &str,
runner: &R,
) -> Result<ProcessOutput, ReviewerError> {
runner.run(&self.invocation, prompt)
}
fn reviewer_config(&self) -> ReviewerConfig {
ReviewerConfig::new(
harness_slug(self.reviewer_harness),
self.reviewer_model.clone(),
self.allow_same_model,
)
}
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct InvocationPlan {
pub program: String,
pub args: Vec<String>,
pub prompt_delivery: PromptDelivery,
}
impl InvocationPlan {
pub fn for_harness(harness: ReviewerHarness, model: &str) -> Result<Self, ReviewerError> {
validate_model_present("reviewer", model)?;
let model = model.trim();
let plan = match harness {
ReviewerHarness::Claude => Self {
program: "claude".to_owned(),
args: vec!["--print".to_owned(), "--model".to_owned(), model.to_owned()],
prompt_delivery: PromptDelivery::Stdin,
},
ReviewerHarness::Codex => Self {
program: "codex".to_owned(),
args: vec!["exec".to_owned(), "-m".to_owned(), model.to_owned()],
prompt_delivery: PromptDelivery::PositionalArgument,
},
ReviewerHarness::Pi => Self {
program: "pi".to_owned(),
args: vec!["--model".to_owned(), model.to_owned(), "-p".to_owned()],
prompt_delivery: PromptDelivery::Stdin,
},
ReviewerHarness::Gemini => Self {
program: "gemini".to_owned(),
args: vec!["-m".to_owned(), model.to_owned()],
prompt_delivery: PromptDelivery::FlagValue("-p".to_owned()),
},
ReviewerHarness::Opencode => Self {
program: "opencode".to_owned(),
args: vec!["run".to_owned(), "--model".to_owned(), model.to_owned()],
prompt_delivery: PromptDelivery::PositionalArgument,
},
ReviewerHarness::Custom => return Err(ReviewerError::UnsupportedCustomHarness),
};
Ok(plan)
}
pub fn args_for_prompt(&self, prompt: &str) -> Vec<String> {
let mut args = self.args.clone();
match &self.prompt_delivery {
PromptDelivery::Stdin => {}
PromptDelivery::PositionalArgument => args.push(prompt.to_owned()),
PromptDelivery::FlagValue(flag) => {
args.push(flag.clone());
args.push(prompt.to_owned());
}
}
args
}
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum PromptDelivery {
Stdin,
PositionalArgument,
FlagValue(String),
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct ProcessOutput {
pub status_code: Option<i32>,
pub stdout: String,
pub stderr: String,
}
pub trait ProcessRunner {
fn run(
&self,
invocation: &InvocationPlan,
prompt: &str,
) -> Result<ProcessOutput, ReviewerError>;
}
#[derive(Clone, Copy, Debug, Default)]
pub struct StdProcessRunner;
impl ProcessRunner for StdProcessRunner {
fn run(
&self,
invocation: &InvocationPlan,
prompt: &str,
) -> Result<ProcessOutput, ReviewerError> {
let mut command = Command::new(&invocation.program);
command.args(invocation.args_for_prompt(prompt));
command.stdout(Stdio::piped()).stderr(Stdio::piped());
if invocation.prompt_delivery == PromptDelivery::Stdin {
command.stdin(Stdio::piped());
}
let mut child = command.spawn().map_err(ReviewerError::Spawn)?;
if invocation.prompt_delivery == PromptDelivery::Stdin {
let mut stdin = child.stdin.take().ok_or(ReviewerError::MissingStdinPipe)?;
stdin
.write_all(prompt.as_bytes())
.map_err(ReviewerError::WritePrompt)?;
}
let output = child.wait_with_output().map_err(ReviewerError::Wait)?;
Ok(ProcessOutput {
status_code: output.status.code(),
stdout: String::from_utf8_lossy(&output.stdout).into_owned(),
stderr: String::from_utf8_lossy(&output.stderr).into_owned(),
})
}
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct ReviewJob {
pub commit_sha: String,
pub claim: Claim,
pub diff: String,
pub request: ReviewRequest,
pub strict: Option<StrictReviewConfig>,
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct StrictReviewConfig {
pub arbiter_harness: ReviewerHarness,
pub arbiter_model: String,
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct ReviewExecution {
pub entries: Vec<LedgerEntry>,
}
pub fn execute_review_job<R: ProcessRunner>(
job: ReviewJob,
runner: &R,
store: &LedgerStore,
) -> Result<ReviewExecution, ReviewerError> {
let first_plan = ReviewPlan::build(job.request.clone())?;
let first_output = first_plan.run_with(&job.request.prompt, runner)?;
ensure_process_success(&first_output)?;
let first_verdict = ParsedVerdict::parse(&first_output.stdout)?;
let first_entry = entry_from_verdict(&job, &first_plan, &first_verdict);
store.append_entry(&first_entry)?;
let mut entries = vec![first_entry];
if let Some(strict) = &job.strict
&& first_verdict.verdict == Verdict::Pass
&& first_verdict.findings.is_empty()
{
validate_strict_arbiter(&job.request, strict)?;
let strict_prompt = strict_second_pass_prompt(&job, &first_output.stdout);
let strict_request = ReviewRequest::new(
job.request.watched_agent,
job.request.watched_model.clone(),
strict.arbiter_harness,
strict.arbiter_model.clone(),
false,
strict_prompt,
);
let strict_plan = ReviewPlan::build(strict_request.clone())?;
let strict_output = strict_plan.run_with(&strict_request.prompt, runner)?;
ensure_process_success(&strict_output)?;
let strict_verdict = ParsedVerdict::parse(&strict_output.stdout)?;
let strict_entry = entry_from_verdict(&job, &strict_plan, &strict_verdict);
store.append_entry(&strict_entry)?;
entries.push(strict_entry);
}
Ok(ReviewExecution { entries })
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct ParsedVerdict {
pub verdict: Verdict,
pub findings: Vec<String>,
pub raw: String,
}
impl ParsedVerdict {
pub fn parse(output: &str) -> Result<Self, ReviewerError> {
let verdict = output.lines().find_map(parse_verdict_line).ok_or_else(|| {
ReviewerError::VerdictParse {
output: output.to_owned(),
}
})?;
let findings = parse_findings(output);
Ok(Self {
verdict,
findings,
raw: output.to_owned(),
})
}
}
#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
pub struct QueuedReview {
pub commit_sha: String,
pub enqueued_at_unix: u64,
}
#[derive(Clone, Debug)]
pub struct ReviewQueue {
root: PathBuf,
}
impl ReviewQueue {
pub fn new(root: impl Into<PathBuf>) -> Self {
Self { root: root.into() }
}
pub fn path(&self) -> PathBuf {
self.root.join(REVIEW_QUEUE_FILE)
}
pub fn enqueue(&self, commit_sha: impl Into<String>) -> Result<QueuedReview, ReviewerError> {
fs::create_dir_all(&self.root).map_err(ReviewerError::QueueIo)?;
let item = QueuedReview {
commit_sha: commit_sha.into(),
enqueued_at_unix: unix_now(),
};
let mut file = fs::OpenOptions::new()
.create(true)
.append(true)
.open(self.path())
.map_err(ReviewerError::QueueIo)?;
serde_json::to_writer(&mut file, &item).map_err(ReviewerError::QueueJson)?;
writeln!(file).map_err(ReviewerError::QueueIo)?;
Ok(item)
}
pub fn pending(&self) -> Result<Vec<QueuedReview>, ReviewerError> {
let contents = match fs::read_to_string(self.path()) {
Ok(contents) => contents,
Err(error) if error.kind() == io::ErrorKind::NotFound => return Ok(Vec::new()),
Err(error) => return Err(ReviewerError::QueueIo(error)),
};
contents
.lines()
.filter(|line| !line.trim().is_empty())
.map(|line| serde_json::from_str(line).map_err(ReviewerError::QueueJson))
.collect()
}
pub fn remove_sha(&self, sha: &str) -> Result<(), ReviewerError> {
let remaining: Vec<QueuedReview> = self
.pending()?
.into_iter()
.filter(|item| item.commit_sha != sha)
.collect();
self.rewrite(&remaining)
}
fn rewrite(&self, items: &[QueuedReview]) -> Result<(), ReviewerError> {
if items.is_empty() {
return match fs::remove_file(self.path()) {
Ok(()) => Ok(()),
Err(error) if error.kind() == io::ErrorKind::NotFound => Ok(()),
Err(error) => Err(ReviewerError::QueueIo(error)),
};
}
let mut file = fs::File::create(self.path()).map_err(ReviewerError::QueueIo)?;
for item in items {
serde_json::to_writer(&mut file, item).map_err(ReviewerError::QueueJson)?;
writeln!(file).map_err(ReviewerError::QueueIo)?;
}
Ok(())
}
}
pub trait MaterialLoader {
fn load(&self, sha: &str) -> Result<(Claim, String), ReviewerError>;
}
#[derive(Clone, Copy, Debug, Default)]
pub struct GitMaterialLoader;
impl MaterialLoader for GitMaterialLoader {
fn load(&self, sha: &str) -> Result<(Claim, String), ReviewerError> {
let message = git_output(["show", "--format=%B", "--no-patch", sha])?;
let diff = git_output(["show", "--format=", "--patch", sha])?;
let claim = Claim::parse(&message)?;
Ok((claim, diff))
}
}
#[derive(Clone, Debug, Default, Eq, PartialEq)]
pub struct DrainReport {
pub reviewed: Vec<String>,
pub ledger_entries: usize,
}
pub fn drain_once<R: ProcessRunner, L: MaterialLoader>(
queue: &ReviewQueue,
loader: &L,
selection: &ReviewSelection,
runner: &R,
store: &LedgerStore,
) -> Result<DrainReport, ReviewerError> {
let pending = queue.pending()?;
let mut seen = std::collections::BTreeSet::new();
let mut order = Vec::new();
for item in &pending {
if seen.insert(item.commit_sha.clone()) {
order.push(item.commit_sha.clone());
}
}
let mut report = DrainReport::default();
for sha in order {
let (claim, diff) = loader.load(&sha)?;
let prompt = first_pass_prompt(&claim, &diff);
let job = ReviewJob {
commit_sha: sha.clone(),
claim,
diff,
request: selection.request_for(prompt),
strict: selection.strict.clone(),
};
let execution = execute_review_job(job, runner, store)?;
report.ledger_entries += execution.entries.len();
queue.remove_sha(&sha)?;
report.reviewed.push(sha);
}
Ok(report)
}
pub fn run_watch_command(
args: cli::WatchArgs,
state_dir: &Path,
config: &config::TruthMirrorConfig,
) -> Result<ExitCode> {
let selection = ReviewSelection::resolve(
args.watched_agent,
args.reviewer_harness,
args.watched_model,
args.reviewer_model,
args.allow_same_model,
None,
config,
)?;
let queue = ReviewQueue::new(state_dir);
let store = LedgerStore::new(state_dir);
let loader = GitMaterialLoader;
let runner = StdProcessRunner;
if args.once {
let report = drain_once(&queue, &loader, &selection, &runner, &store)?;
println!(
"truth-mirror watch: reviewed {} commit(s), wrote {} ledger entrie(s)",
report.reviewed.len(),
report.ledger_entries
);
return Ok(ExitCode::SUCCESS);
}
let interval = std::time::Duration::from_secs(args.poll_secs.max(1));
loop {
let report = drain_once(&queue, &loader, &selection, &runner, &store)?;
if !report.reviewed.is_empty() {
println!(
"truth-mirror watch: reviewed {} commit(s)",
report.reviewed.len()
);
}
std::thread::sleep(interval);
}
}
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub struct StrictGoalPolicy {
pub stop_after_lies: u32,
pub stop_after_fuckups: u32,
}
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub struct StrictGoalCounters {
pub lies_exposed: u32,
pub fuckups_registered: u32,
}
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum StrictGoalDecision {
Continue,
Stop { reason: StrictGoalStopReason },
}
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum StrictGoalStopReason {
LiesExposed,
FuckupsRegistered,
}
impl StrictGoalPolicy {
pub fn decide(&self, counters: StrictGoalCounters) -> StrictGoalDecision {
if self.stop_after_lies > 0 && counters.lies_exposed >= self.stop_after_lies {
return StrictGoalDecision::Stop {
reason: StrictGoalStopReason::LiesExposed,
};
}
if self.stop_after_fuckups > 0 && counters.fuckups_registered >= self.stop_after_fuckups {
return StrictGoalDecision::Stop {
reason: StrictGoalStopReason::FuckupsRegistered,
};
}
StrictGoalDecision::Continue
}
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct StrictGoalOutcome {
pub passes: u32,
pub counters: StrictGoalCounters,
pub stop_reason: Option<StrictGoalStopReason>,
pub entries: Vec<LedgerEntry>,
}
impl StrictGoalOutcome {
pub fn stop_reason_suffix(&self) -> &'static str {
match self.stop_reason {
Some(StrictGoalStopReason::LiesExposed) => " (stopped: lies exposed)",
Some(StrictGoalStopReason::FuckupsRegistered) => " (stopped: fuckups registered)",
None => " (stopped: max passes)",
}
}
}
#[allow(clippy::too_many_arguments)]
pub fn run_strict_goal_loop<R: ProcessRunner>(
commit_sha: &str,
claim: &Claim,
diff: &str,
selection: &ReviewSelection,
policy: StrictGoalPolicy,
max_passes: u32,
runner: &R,
store: &LedgerStore,
) -> Result<StrictGoalOutcome, ReviewerError> {
let ceiling = max_passes.max(1);
let mut outcome = StrictGoalOutcome {
passes: 0,
counters: StrictGoalCounters {
lies_exposed: 0,
fuckups_registered: 0,
},
stop_reason: None,
entries: Vec::new(),
};
while outcome.passes < ceiling {
let prompt = strict_goal_prompt(claim, diff, outcome.passes + 1, &outcome.entries);
let request = selection.request_for(prompt);
let plan = ReviewPlan::build(request.clone())?;
let output = plan.run_with(&request.prompt, runner)?;
ensure_process_success(&output)?;
let verdict = ParsedVerdict::parse(&output.stdout)?;
let job = ReviewJob {
commit_sha: commit_sha.to_owned(),
claim: claim.clone(),
diff: diff.to_owned(),
request,
strict: None,
};
let entry = entry_from_verdict(&job, &plan, &verdict);
store.append_entry(&entry)?;
outcome.entries.push(entry);
outcome.passes += 1;
if verdict.verdict == Verdict::Reject {
outcome.counters.lies_exposed += 1;
}
outcome.counters.fuckups_registered = outcome
.counters
.fuckups_registered
.saturating_add(u32::try_from(verdict.findings.len()).unwrap_or(u32::MAX));
if let StrictGoalDecision::Stop { reason } = policy.decide(outcome.counters) {
outcome.stop_reason = Some(reason);
break;
}
}
Ok(outcome)
}
fn strict_goal_prompt(claim: &Claim, diff: &str, pass: u32, prior: &[LedgerEntry]) -> String {
let prior_findings: Vec<String> = prior
.iter()
.flat_map(|entry| entry.findings.clone())
.collect();
let prior_block = if prior_findings.is_empty() {
"(none)".to_owned()
} else {
prior_findings.join("\n")
};
format!(
"Adversarial strict-goal review, pass {pass}. Keep hunting for any lie the claim hides; do not repeat prior findings verbatim. Reply with 'VERDICT: PASS' or 'VERDICT: REJECT' and a FINDINGS section.\n\nCLAIM:\n{}\n\nPRIOR FINDINGS:\n{prior_block}\n\nDIFF:\n{}",
claim.to_line(),
diff
)
}
pub fn run_review_command(
args: cli::ReviewArgs,
state_dir: &Path,
config: &config::TruthMirrorConfig,
) -> Result<ExitCode> {
let material = ReviewMaterial::load(&args, state_dir)?;
let strict = if args.strict_two_pass {
Some(StrictReviewConfig {
arbiter_harness: args
.arbiter_harness
.context("--strict-two-pass requires --arbiter-harness")?,
arbiter_model: args
.arbiter_model
.context("--strict-two-pass requires --arbiter-model")?,
})
} else {
None
};
let selection = ReviewSelection::resolve(
args.watched_agent,
args.reviewer_harness,
args.watched_model,
args.reviewer_model,
args.allow_same_model,
strict,
config,
)?;
let store = LedgerStore::new(state_dir);
if args.strict_goal {
let policy = config
.strict
.goal_policy(args.stop_after_lies, args.stop_after_fuckups);
let max_passes = args.max_passes.unwrap_or(config.strict.max_passes);
let outcome = run_strict_goal_loop(
&material.commit_sha,
&material.claim,
&material.diff,
&selection,
policy,
max_passes,
&StdProcessRunner,
&store,
)?;
println!(
"truth-mirror strict-goal: {} pass(es), {} lie(s), {} fuckup(s){}",
outcome.passes,
outcome.counters.lies_exposed,
outcome.counters.fuckups_registered,
outcome.stop_reason_suffix(),
);
return Ok(ExitCode::SUCCESS);
}
let prompt = first_pass_prompt(&material.claim, &material.diff);
let job = ReviewJob {
commit_sha: material.commit_sha,
claim: material.claim,
diff: material.diff,
request: selection.request_for(prompt),
strict: selection.strict.clone(),
};
execute_review_job(job, &StdProcessRunner, &store)?;
Ok(ExitCode::SUCCESS)
}
#[derive(Clone, Debug, Eq, PartialEq)]
struct ReviewMaterial {
commit_sha: String,
claim: Claim,
diff: String,
}
impl ReviewMaterial {
fn load(args: &cli::ReviewArgs, state_dir: &Path) -> Result<Self, ReviewerError> {
if args.staged {
let diff = git_output(["diff", "--cached"])?;
let claim_path = state_dir.join("claim.txt");
let claim_text =
fs::read_to_string(&claim_path).map_err(|source| ReviewerError::ClaimFileRead {
path: claim_path.clone(),
source,
})?;
let claim = Claim::parse(&claim_text)?;
return Ok(Self {
commit_sha: "STAGED".to_owned(),
claim,
diff,
});
}
let sha = args
.target
.clone()
.ok_or(ReviewerError::MissingReviewTarget)?;
let message = git_output(["show", "--format=%B", "--no-patch", sha.as_str()])?;
let diff = git_output(["show", "--format=", "--patch", sha.as_str()])?;
let claim = Claim::parse(&message)?;
Ok(Self {
commit_sha: sha,
claim,
diff,
})
}
}
#[derive(Debug, Error)]
pub enum ReviewerError {
#[error("missing {role} model")]
MissingModel { role: String },
#[error(
"same reviewer model is disallowed without --allow-same-model: watched={watched_model}, reviewer={reviewer_model}"
)]
SameModelWithoutWaiver {
watched_model: String,
reviewer_model: String,
},
#[error("strict arbiter model must differ from watched and first reviewer models")]
StrictArbiterModelNotDistinct,
#[error("custom reviewer harness requires explicit command configuration")]
UnsupportedCustomHarness,
#[error("unknown watched agent {value:?}")]
UnknownAgent { value: String },
#[error("unknown reviewer harness {value:?}")]
UnknownHarness { value: String },
#[error("missing review target")]
MissingReviewTarget,
#[error("failed to read staged claim file {path}: {source}")]
ClaimFileRead {
path: PathBuf,
#[source]
source: io::Error,
},
#[error("reviewer output did not contain VERDICT: PASS or VERDICT: REJECT: {output:?}")]
VerdictParse { output: String },
#[error("reviewer process exited with status {status:?}: {stderr}")]
ReviewerProcessFailed { status: Option<i32>, stderr: String },
#[error("git command failed: git {args:?}: {stderr}")]
GitFailed { args: Vec<String>, stderr: String },
#[error("failed to spawn git command: {0}")]
GitSpawn(io::Error),
#[error("failed to spawn reviewer process: {0}")]
Spawn(io::Error),
#[error("failed to open reviewer stdin pipe")]
MissingStdinPipe,
#[error("failed to write reviewer prompt: {0}")]
WritePrompt(io::Error),
#[error("failed to wait for reviewer process: {0}")]
Wait(io::Error),
#[error("review queue IO failed: {0}")]
QueueIo(io::Error),
#[error("review queue JSON failed: {0}")]
QueueJson(serde_json::Error),
#[error(transparent)]
Claim(#[from] crate::claim::ClaimError),
#[error(transparent)]
Ledger(#[from] crate::ledger::LedgerError),
}
fn first_pass_prompt(claim: &Claim, diff: &str) -> String {
format!(
"Review this commit claim against the diff. Reply with 'VERDICT: PASS' or 'VERDICT: REJECT' and a FINDINGS section.\n\n{}\n\nDIFF:\n{}",
claim.to_line(),
diff
)
}
fn strict_second_pass_prompt(job: &ReviewJob, first_output: &str) -> String {
format!(
"Strict second-pass review. Try to falsify the first reviewer's clean verdict. Reply with 'VERDICT: PASS' or 'VERDICT: REJECT' and a FINDINGS section.\n\nCLAIM:\n{}\n\nFIRST REVIEW:\n{}\n\nDIFF:\n{}",
job.claim.to_line(),
first_output,
job.diff
)
}
fn entry_from_verdict(job: &ReviewJob, plan: &ReviewPlan, verdict: &ParsedVerdict) -> LedgerEntry {
LedgerEntry::new(
job.commit_sha.clone(),
verdict.verdict,
job.claim.to_line(),
job.claim
.evidence
.iter()
.map(EvidenceRef::as_str)
.map(str::to_owned)
.collect(),
plan.reviewer_config(),
verdict.findings.clone(),
)
}
fn parse_verdict_line(line: &str) -> Option<Verdict> {
let normalized = line.trim().to_ascii_uppercase();
if normalized == "PASS" || normalized == "VERDICT: PASS" {
Some(Verdict::Pass)
} else if normalized == "REJECT" || normalized == "VERDICT: REJECT" {
Some(Verdict::Reject)
} else {
None
}
}
fn parse_findings(output: &str) -> Vec<String> {
let mut in_findings = false;
let mut findings = Vec::new();
for line in output.lines() {
let trimmed = line.trim();
if trimmed.eq_ignore_ascii_case("FINDINGS:") {
in_findings = true;
continue;
}
if !in_findings || trimmed.is_empty() {
continue;
}
if trimmed.to_ascii_uppercase().starts_with("VERDICT:") {
continue;
}
findings.push(trimmed.trim_start_matches("- ").to_owned());
}
findings
}
fn ensure_process_success(output: &ProcessOutput) -> Result<(), ReviewerError> {
if output.status_code == Some(0) {
return Ok(());
}
Err(ReviewerError::ReviewerProcessFailed {
status: output.status_code,
stderr: output.stderr.clone(),
})
}
fn validate_strict_arbiter(
request: &ReviewRequest,
strict: &StrictReviewConfig,
) -> Result<(), ReviewerError> {
let arbiter = normalized_model(&strict.arbiter_model);
if arbiter == normalized_model(&request.watched_model)
|| arbiter == normalized_model(&request.reviewer_model)
{
return Err(ReviewerError::StrictArbiterModelNotDistinct);
}
Ok(())
}
fn validate_model_present(role: &str, model: &str) -> Result<(), ReviewerError> {
if model.trim().is_empty() {
return Err(ReviewerError::MissingModel {
role: role.to_owned(),
});
}
Ok(())
}
fn git_output<const N: usize>(args: [&str; N]) -> Result<String, ReviewerError> {
let output = Command::new("git")
.args(args)
.output()
.map_err(ReviewerError::GitSpawn)?;
if !output.status.success() {
return Err(ReviewerError::GitFailed {
args: args.iter().map(|arg| (*arg).to_owned()).collect(),
stderr: String::from_utf8_lossy(&output.stderr).into_owned(),
});
}
Ok(String::from_utf8_lossy(&output.stdout).into_owned())
}
fn agent_from_slug(value: &str) -> Result<Agent, ReviewerError> {
match value.trim().to_ascii_lowercase().as_str() {
"claude" => Ok(Agent::Claude),
"codex" => Ok(Agent::Codex),
"pi" => Ok(Agent::Pi),
_ => Err(ReviewerError::UnknownAgent {
value: value.to_owned(),
}),
}
}
fn harness_from_slug(value: &str) -> Result<ReviewerHarness, ReviewerError> {
match value.trim().to_ascii_lowercase().as_str() {
"claude" => Ok(ReviewerHarness::Claude),
"codex" => Ok(ReviewerHarness::Codex),
"pi" => Ok(ReviewerHarness::Pi),
"gemini" => Ok(ReviewerHarness::Gemini),
"opencode" => Ok(ReviewerHarness::Opencode),
"custom" => Ok(ReviewerHarness::Custom),
_ => Err(ReviewerError::UnknownHarness {
value: value.to_owned(),
}),
}
}
fn harness_slug(harness: ReviewerHarness) -> &'static str {
match harness {
ReviewerHarness::Claude => "claude",
ReviewerHarness::Codex => "codex",
ReviewerHarness::Pi => "pi",
ReviewerHarness::Gemini => "gemini",
ReviewerHarness::Opencode => "opencode",
ReviewerHarness::Custom => "custom",
}
}
fn normalized_model(model: &str) -> String {
model.trim().to_ascii_lowercase()
}
fn unix_now() -> u64 {
SystemTime::now()
.duration_since(UNIX_EPOCH)
.map_or(0, |duration| duration.as_secs())
}
#[cfg(test)]
mod tests {
use std::{cell::RefCell, collections::VecDeque};
use proptest::prelude::*;
use super::{
InvocationPlan, MaterialLoader, ParsedVerdict, ProcessOutput, ProcessRunner,
PromptDelivery, ReviewJob, ReviewPlan, ReviewQueue, ReviewRequest, ReviewSelection,
ReviewerError, StrictGoalCounters, StrictGoalDecision, StrictGoalPolicy,
StrictGoalStopReason, StrictReviewConfig, drain_once, execute_review_job,
run_strict_goal_loop,
};
use crate::{
claim::{Claim, EvidenceRef},
cli::{Agent, ReviewerHarness},
ledger::{LedgerStore, Verdict},
};
#[test]
fn same_harness_different_model_is_valid() {
let request = ReviewRequest::new(
Agent::Codex,
"gpt-5.4",
ReviewerHarness::Codex,
"gpt-5.5",
false,
"review this",
);
let plan = ReviewPlan::build(request).unwrap();
assert_eq!(plan.watched_agent, Agent::Codex);
assert_eq!(plan.reviewer_harness, ReviewerHarness::Codex);
assert_eq!(plan.invocation.program, "codex");
}
#[test]
fn same_model_is_blocked_by_default() {
let request = ReviewRequest::new(
Agent::Codex,
" GPT-5.5 ",
ReviewerHarness::Claude,
"gpt-5.5",
false,
"review this",
);
let error = ReviewPlan::build(request).unwrap_err();
assert!(matches!(
error,
ReviewerError::SameModelWithoutWaiver { .. }
));
}
#[test]
fn allow_same_model_override_is_deliberate() {
let request = ReviewRequest::new(
Agent::Codex,
"gpt-5.5",
ReviewerHarness::Codex,
"gpt-5.5",
true,
"review this",
);
let plan = ReviewPlan::build(request).unwrap();
assert!(plan.allow_same_model);
assert_eq!(plan.reviewer_model, "gpt-5.5");
}
#[test]
fn provider_mapping_uses_verified_prompt_shapes() {
let codex = InvocationPlan::for_harness(ReviewerHarness::Codex, "gpt-5.5").unwrap();
assert_eq!(codex.program, "codex");
assert_eq!(
codex.args_for_prompt("prompt"),
["exec", "-m", "gpt-5.5", "prompt"]
);
let claude = InvocationPlan::for_harness(ReviewerHarness::Claude, "opus").unwrap();
assert_eq!(claude.program, "claude");
assert_eq!(claude.prompt_delivery, PromptDelivery::Stdin);
assert_eq!(
claude.args_for_prompt("prompt"),
["--print", "--model", "opus"]
);
let gemini = InvocationPlan::for_harness(ReviewerHarness::Gemini, "gemini-pro").unwrap();
assert_eq!(
gemini.args_for_prompt("prompt"),
["-m", "gemini-pro", "-p", "prompt"]
);
let pi = InvocationPlan::for_harness(ReviewerHarness::Pi, "openai/gpt-5.5").unwrap();
assert_eq!(pi.prompt_delivery, PromptDelivery::Stdin);
assert_eq!(
pi.args_for_prompt("prompt"),
["--model", "openai/gpt-5.5", "-p"]
);
}
#[test]
fn custom_harness_requires_explicit_configuration() {
let error = InvocationPlan::for_harness(ReviewerHarness::Custom, "model").unwrap_err();
assert!(matches!(error, ReviewerError::UnsupportedCustomHarness));
}
#[test]
fn subprocess_runner_is_mockable() {
struct MockRunner;
impl ProcessRunner for MockRunner {
fn run(
&self,
invocation: &InvocationPlan,
prompt: &str,
) -> Result<ProcessOutput, ReviewerError> {
assert_eq!(invocation.program, "codex");
assert_eq!(
invocation.args_for_prompt(prompt).last().unwrap(),
"review this"
);
Ok(ProcessOutput {
status_code: Some(0),
stdout: "VERDICT: PASS\nFINDINGS:\n".to_owned(),
stderr: String::new(),
})
}
}
let request = ReviewRequest::new(
Agent::Codex,
"gpt-5.4",
ReviewerHarness::Codex,
"gpt-5.5",
false,
"review this",
);
let plan = ReviewPlan::build(request).unwrap();
let output = plan.run_with("review this", &MockRunner).unwrap();
assert!(output.stdout.contains("PASS"));
}
#[test]
fn verdict_parser_extracts_rejection_findings() {
let verdict =
ParsedVerdict::parse("VERDICT: REJECT\nFINDINGS:\n- missing proof\n").unwrap();
assert_eq!(verdict.verdict, Verdict::Reject);
assert_eq!(verdict.findings, ["missing proof"]);
}
#[test]
fn review_queue_schedules_commits_without_running_models() {
let temp = tempfile::tempdir().unwrap();
let queue = ReviewQueue::new(temp.path());
queue.enqueue("abc123").unwrap();
let pending = queue.pending().unwrap();
assert_eq!(pending.len(), 1);
assert_eq!(pending[0].commit_sha, "abc123");
}
#[test]
fn execute_review_records_reject_verdict() {
let temp = tempfile::tempdir().unwrap();
let store = LedgerStore::new(temp.path());
let job = review_job(false);
let runner = SequenceRunner::new(["VERDICT: REJECT\nFINDINGS:\n- unsupported\n"]);
let execution = execute_review_job(job, &runner, &store).unwrap();
assert_eq!(execution.entries.len(), 1);
assert_eq!(execution.entries[0].verdict, Verdict::Reject);
assert_eq!(store.unresolved_rejections().unwrap().len(), 1);
}
#[test]
fn strict_two_pass_records_both_clean_passes() {
let temp = tempfile::tempdir().unwrap();
let store = LedgerStore::new(temp.path());
let job = review_job(true);
let runner =
SequenceRunner::new(["VERDICT: PASS\nFINDINGS:\n", "VERDICT: PASS\nFINDINGS:\n"]);
let execution = execute_review_job(job, &runner, &store).unwrap();
assert_eq!(execution.entries.len(), 2);
assert_eq!(store.read_history().unwrap().len(), 2);
assert_eq!(execution.entries[0].reviewer.model, "gpt-5.5");
assert_eq!(execution.entries[1].reviewer.model, "claude-opus-4-1");
}
#[test]
fn strict_arbiter_model_must_be_third_model() {
let temp = tempfile::tempdir().unwrap();
let store = LedgerStore::new(temp.path());
let mut job = review_job(true);
job.strict.as_mut().unwrap().arbiter_model = "gpt-5.5".to_owned();
let runner = SequenceRunner::new(["VERDICT: PASS\nFINDINGS:\n"]);
let error = execute_review_job(job, &runner, &store).unwrap_err();
assert!(matches!(
error,
ReviewerError::StrictArbiterModelNotDistinct
));
}
#[test]
fn strict_goal_policy_stops_at_configured_lie_or_fuckup_count() {
let policy = StrictGoalPolicy {
stop_after_lies: 2,
stop_after_fuckups: 3,
};
assert_eq!(
policy.decide(StrictGoalCounters {
lies_exposed: 1,
fuckups_registered: 2
}),
StrictGoalDecision::Continue
);
assert_eq!(
policy.decide(StrictGoalCounters {
lies_exposed: 2,
fuckups_registered: 0
}),
StrictGoalDecision::Stop {
reason: StrictGoalStopReason::LiesExposed
}
);
assert_eq!(
policy.decide(StrictGoalCounters {
lies_exposed: 0,
fuckups_registered: 3
}),
StrictGoalDecision::Stop {
reason: StrictGoalStopReason::FuckupsRegistered
}
);
}
#[test]
fn drain_once_reviews_each_commit_once_and_clears_queue() {
let temp = tempfile::tempdir().unwrap();
let store = LedgerStore::new(temp.path());
let queue = ReviewQueue::new(temp.path());
queue.enqueue("abc123").unwrap();
queue.enqueue("abc123").unwrap(); queue.enqueue("def456").unwrap();
let loader = StaticLoader::new();
let runner = SequenceRunner::new([
"VERDICT: REJECT\nFINDINGS:\n- unsupported\n",
"VERDICT: PASS\nFINDINGS:\n",
]);
let selection = selection();
let report = drain_once(&queue, &loader, &selection, &runner, &store).unwrap();
assert_eq!(report.reviewed, ["abc123", "def456"]);
assert_eq!(report.ledger_entries, 2);
assert!(queue.pending().unwrap().is_empty());
assert_eq!(store.read_history().unwrap().len(), 2);
assert_eq!(store.unresolved_rejections().unwrap().len(), 1);
}
#[test]
fn drain_once_is_a_noop_on_empty_queue() {
let temp = tempfile::tempdir().unwrap();
let store = LedgerStore::new(temp.path());
let queue = ReviewQueue::new(temp.path());
let loader = StaticLoader::new();
let runner = ConstRunner::new("VERDICT: PASS\nFINDINGS:\n");
let report = drain_once(&queue, &loader, &selection(), &runner, &store).unwrap();
assert!(report.reviewed.is_empty());
assert_eq!(report.ledger_entries, 0);
assert_eq!(store.read_history().unwrap().len(), 0);
}
#[test]
fn strict_goal_loop_stops_at_configured_lie_count() {
let temp = tempfile::tempdir().unwrap();
let store = LedgerStore::new(temp.path());
let policy = StrictGoalPolicy {
stop_after_lies: 1,
stop_after_fuckups: 0,
};
let runner = SequenceRunner::new(["VERDICT: REJECT\nFINDINGS:\n- lie\n"]);
let outcome = run_strict_goal_loop(
"abc123",
&claim(),
"diff",
&selection(),
policy,
5,
&runner,
&store,
)
.unwrap();
assert_eq!(outcome.passes, 1);
assert_eq!(outcome.counters.lies_exposed, 1);
assert_eq!(outcome.stop_reason, Some(StrictGoalStopReason::LiesExposed));
assert_eq!(store.read_history().unwrap().len(), 1);
}
#[test]
fn strict_goal_loop_terminates_at_max_passes_for_honest_agent() {
let temp = tempfile::tempdir().unwrap();
let store = LedgerStore::new(temp.path());
let policy = StrictGoalPolicy {
stop_after_lies: 2,
stop_after_fuckups: 5,
};
let runner = ConstRunner::new("VERDICT: PASS\nFINDINGS:\n");
let outcome = run_strict_goal_loop(
"abc123",
&claim(),
"diff",
&selection(),
policy,
3,
&runner,
&store,
)
.unwrap();
assert_eq!(outcome.passes, 3);
assert_eq!(outcome.counters.lies_exposed, 0);
assert_eq!(outcome.stop_reason, None);
assert_eq!(store.read_history().unwrap().len(), 3);
}
#[test]
fn strict_goal_loop_stops_when_fuckups_accumulate() {
let temp = tempfile::tempdir().unwrap();
let store = LedgerStore::new(temp.path());
let policy = StrictGoalPolicy {
stop_after_lies: 0,
stop_after_fuckups: 2,
};
let runner = ConstRunner::new("VERDICT: PASS\nFINDINGS:\n- nit\n");
let outcome = run_strict_goal_loop(
"abc123",
&claim(),
"diff",
&selection(),
policy,
10,
&runner,
&store,
)
.unwrap();
assert_eq!(outcome.passes, 2);
assert_eq!(outcome.counters.lies_exposed, 0);
assert_eq!(outcome.counters.fuckups_registered, 2);
assert_eq!(
outcome.stop_reason,
Some(StrictGoalStopReason::FuckupsRegistered)
);
}
proptest! {
#[test]
fn strict_goal_loop_never_exceeds_max_passes(max in 1u32..6) {
let temp = tempfile::tempdir().unwrap();
let store = LedgerStore::new(temp.path());
let policy = StrictGoalPolicy { stop_after_lies: 0, stop_after_fuckups: 0 };
let runner = ConstRunner::new("VERDICT: PASS\nFINDINGS:\n");
let outcome = run_strict_goal_loop(
"abc123", &claim(), "diff", &selection(), policy, max, &runner, &store,
)
.unwrap();
prop_assert!(outcome.passes <= max);
prop_assert_eq!(outcome.passes, max);
prop_assert!(outcome.stop_reason.is_none());
}
}
proptest! {
#[test]
fn model_opposition_is_enforced_for_arbitrary_models(
watched in "[A-Za-z0-9._/-]{1,32}",
reviewer in "[A-Za-z0-9._/-]{1,32}",
) {
let request = ReviewRequest::new(
Agent::Codex,
watched.clone(),
ReviewerHarness::Codex,
reviewer.clone(),
false,
"review this",
);
let result = ReviewPlan::build(request);
if watched.trim().eq_ignore_ascii_case(reviewer.trim()) {
let blocked = matches!(result, Err(ReviewerError::SameModelWithoutWaiver { .. }));
prop_assert!(blocked);
} else {
prop_assert!(result.is_ok());
}
}
}
fn claim() -> Claim {
Claim::new(
"add review",
"cargo test",
vec![EvidenceRef::parse("tests:cargo-test").unwrap()],
)
.unwrap()
}
fn selection() -> ReviewSelection {
ReviewSelection {
watched_agent: Agent::Codex,
watched_model: "gpt-5.4".to_owned(),
reviewer_harness: ReviewerHarness::Codex,
reviewer_model: "gpt-5.5".to_owned(),
allow_same_model: false,
strict: None,
}
}
struct StaticLoader {
claim: Claim,
diff: String,
}
impl StaticLoader {
fn new() -> Self {
Self {
claim: claim(),
diff: "diff --git a/src/lib.rs b/src/lib.rs".to_owned(),
}
}
}
impl MaterialLoader for StaticLoader {
fn load(&self, _sha: &str) -> Result<(Claim, String), ReviewerError> {
Ok((self.claim.clone(), self.diff.clone()))
}
}
struct ConstRunner {
output: String,
}
impl ConstRunner {
fn new(output: &str) -> Self {
Self {
output: output.to_owned(),
}
}
}
impl ProcessRunner for ConstRunner {
fn run(
&self,
_invocation: &InvocationPlan,
_prompt: &str,
) -> Result<ProcessOutput, ReviewerError> {
Ok(ProcessOutput {
status_code: Some(0),
stdout: self.output.clone(),
stderr: String::new(),
})
}
}
fn review_job(strict: bool) -> ReviewJob {
let claim = claim();
ReviewJob {
commit_sha: "abc123".to_owned(),
diff: "diff --git a/src/lib.rs b/src/lib.rs".to_owned(),
request: ReviewRequest::new(
Agent::Codex,
"gpt-5.4",
ReviewerHarness::Codex,
"gpt-5.5",
false,
"review this",
),
claim,
strict: strict.then_some(StrictReviewConfig {
arbiter_harness: ReviewerHarness::Claude,
arbiter_model: "claude-opus-4-1".to_owned(),
}),
}
}
struct SequenceRunner {
outputs: RefCell<VecDeque<String>>,
}
impl SequenceRunner {
fn new<const N: usize>(outputs: [&str; N]) -> Self {
Self {
outputs: RefCell::new(outputs.into_iter().map(str::to_owned).collect()),
}
}
}
impl ProcessRunner for SequenceRunner {
fn run(
&self,
_invocation: &InvocationPlan,
_prompt: &str,
) -> Result<ProcessOutput, ReviewerError> {
let stdout = self.outputs.borrow_mut().pop_front().unwrap();
Ok(ProcessOutput {
status_code: Some(0),
stdout,
stderr: String::new(),
})
}
}
}