1use std::{
4 fs,
5 io::{self, Write},
6 path::{Path, PathBuf},
7 process::{Command, ExitCode, Stdio},
8 time::{SystemTime, UNIX_EPOCH},
9};
10
11use anyhow::Result;
12use serde::{Deserialize, Serialize};
13use thiserror::Error;
14
15use crate::{
16 claim::{Claim, EvidenceRef},
17 cli::{self, Agent, ReviewScope, ReviewerHarness},
18 config::{self, Effort},
19 ledger::{LedgerEntry, LedgerStore, ReviewerConfig, StructuredFinding, Verdict},
20 surface,
21};
22
23pub const REVIEW_QUEUE_FILE: &str = "review-queue.jsonl";
24pub const REVIEW_RUNS_DIR: &str = "runs";
25const MAX_INLINE_DIFF_FILES: usize = 2;
26const MAX_INLINE_DIFF_BYTES: usize = 256 * 1024;
27const MAX_UNTRACKED_FILE_BYTES: u64 = 16 * 1024;
28
29#[derive(Clone, Debug, Eq, PartialEq)]
30pub struct ReviewRequest {
31 pub watched_agent: Agent,
32 pub watched_model: String,
33 pub reviewer_harness: ReviewerHarness,
34 pub reviewer_model: String,
35 pub reviewer_effort: Effort,
36 pub allow_same_model: bool,
37 pub prompt: String,
38}
39
40impl ReviewRequest {
41 pub fn new(
42 watched_agent: Agent,
43 watched_model: impl Into<String>,
44 reviewer_harness: ReviewerHarness,
45 reviewer_model: impl Into<String>,
46 allow_same_model: bool,
47 prompt: impl Into<String>,
48 ) -> Self {
49 Self {
50 watched_agent,
51 watched_model: watched_model.into(),
52 reviewer_harness,
53 reviewer_model: reviewer_model.into(),
54 reviewer_effort: Effort::highest(),
55 allow_same_model,
56 prompt: prompt.into(),
57 }
58 }
59
60 pub fn with_effort(mut self, effort: Effort) -> Self {
61 self.reviewer_effort = effort;
62 self
63 }
64}
65
66#[derive(Clone, Debug, Eq, PartialEq)]
69pub struct ReviewSelection {
70 pub watched_agent: Agent,
71 pub watched_model: String,
72 pub reviewer_harness: ReviewerHarness,
73 pub reviewer_model: String,
74 pub reviewer_effort: Effort,
75 pub allow_same_model: bool,
76 pub strict: Option<StrictReviewConfig>,
77}
78
79impl ReviewSelection {
80 #[allow(clippy::too_many_arguments)]
83 pub fn resolve(
84 watched_agent: Option<Agent>,
85 watched_model: Option<String>,
86 reviewer_harness: Option<ReviewerHarness>,
87 reviewer_model: Option<String>,
88 reviewer_effort: Option<Effort>,
89 allow_same_model: bool,
90 config: &config::TruthMirrorConfig,
91 ) -> Result<Self, ReviewerError> {
92 let watched_agent = match watched_agent {
93 Some(agent) => agent,
94 None => agent_from_slug(&config.default_writer)?,
95 };
96 let writer_slug = surface::agent_slug(watched_agent);
97 let pair = config.pair_for(writer_slug);
98
99 let harness_from_cli = reviewer_harness.is_some();
100 let reviewer_harness = match reviewer_harness {
101 Some(harness) => harness,
102 None => {
103 let slug = pair
104 .map(|pair| pair.reviewer.harness.as_str())
105 .ok_or_else(|| ReviewerError::NoPairForWriter {
106 writer: writer_slug.to_owned(),
107 })?;
108 harness_from_slug(slug)?
109 }
110 };
111 let reviewer_model = match reviewer_model {
112 Some(model) => model,
113 None => {
114 let pair = pair.ok_or_else(|| ReviewerError::NoPairForWriter {
115 writer: writer_slug.to_owned(),
116 })?;
117 if harness_from_cli
120 && !pair
121 .reviewer
122 .harness
123 .eq_ignore_ascii_case(harness_slug(reviewer_harness))
124 {
125 return Err(ReviewerError::OverrideNeedsModel {
126 role: "reviewer".to_owned(),
127 harness: harness_slug(reviewer_harness).to_owned(),
128 });
129 }
130 pair.reviewer.model.clone()
131 }
132 };
133 let reviewer_effort = reviewer_effort
134 .or_else(|| pair.map(|pair| pair.reviewer.effort))
135 .unwrap_or_else(Effort::highest);
136
137 Ok(Self {
138 watched_agent,
139 watched_model: watched_model.unwrap_or_default(),
140 reviewer_harness,
141 reviewer_model,
142 reviewer_effort,
143 allow_same_model: allow_same_model || config.allow_same_model,
145 strict: None,
146 })
147 }
148
149 pub fn resolve_arbiter(
152 watched_agent: Agent,
153 arbiter_harness: Option<ReviewerHarness>,
154 arbiter_model: Option<String>,
155 arbiter_effort: Option<Effort>,
156 config: &config::TruthMirrorConfig,
157 ) -> Result<StrictReviewConfig, ReviewerError> {
158 let pair_arbiter = config
159 .pair_for(surface::agent_slug(watched_agent))
160 .and_then(|pair| pair.arbiter.clone());
161
162 let harness_from_cli = arbiter_harness.is_some();
163 let harness = match arbiter_harness {
164 Some(harness) => harness,
165 None => {
166 let slug = pair_arbiter
167 .as_ref()
168 .map(|arbiter| arbiter.harness.as_str())
169 .ok_or(ReviewerError::MissingArbiter)?;
170 harness_from_slug(slug)?
171 }
172 };
173 let model = match arbiter_model {
174 Some(model) => model,
175 None => {
176 let arbiter = pair_arbiter.as_ref().ok_or(ReviewerError::MissingArbiter)?;
177 if harness_from_cli && !arbiter.harness.eq_ignore_ascii_case(harness_slug(harness))
178 {
179 return Err(ReviewerError::OverrideNeedsModel {
180 role: "arbiter".to_owned(),
181 harness: harness_slug(harness).to_owned(),
182 });
183 }
184 arbiter.model.clone()
185 }
186 };
187 let effort = arbiter_effort
188 .or_else(|| pair_arbiter.as_ref().map(|arbiter| arbiter.effort))
189 .unwrap_or_else(Effort::highest);
190
191 Ok(StrictReviewConfig {
192 arbiter_harness: harness,
193 arbiter_model: model,
194 arbiter_effort: effort,
195 })
196 }
197
198 fn request_for(&self, prompt: String) -> ReviewRequest {
199 ReviewRequest::new(
200 self.watched_agent,
201 self.watched_model.clone(),
202 self.reviewer_harness,
203 self.reviewer_model.clone(),
204 self.allow_same_model,
205 prompt,
206 )
207 .with_effort(self.reviewer_effort)
208 }
209}
210
211#[derive(Clone, Debug, Eq, PartialEq)]
212pub struct ReviewPlan {
213 pub watched_agent: Agent,
214 pub watched_model: String,
215 pub reviewer_harness: ReviewerHarness,
216 pub reviewer_model: String,
217 pub allow_same_model: bool,
218 pub invocation: InvocationPlan,
219}
220
221impl ReviewPlan {
222 pub fn build(request: ReviewRequest) -> Result<Self, ReviewerError> {
223 validate_model_present("reviewer", &request.reviewer_model)?;
224
225 if !request.watched_model.trim().is_empty()
228 && !request.allow_same_model
229 && normalized_model(&request.watched_model) == normalized_model(&request.reviewer_model)
230 {
231 return Err(ReviewerError::SameModelWithoutWaiver {
232 watched_model: request.watched_model,
233 reviewer_model: request.reviewer_model,
234 });
235 }
236
237 let invocation = InvocationPlan::for_harness(
238 request.reviewer_harness,
239 &request.reviewer_model,
240 request.reviewer_effort,
241 )?;
242
243 Ok(Self {
244 watched_agent: request.watched_agent,
245 watched_model: request.watched_model,
246 reviewer_harness: request.reviewer_harness,
247 reviewer_model: request.reviewer_model,
248 allow_same_model: request.allow_same_model,
249 invocation,
250 })
251 }
252
253 pub fn run_with<R: ProcessRunner>(
254 &self,
255 prompt: &str,
256 runner: &R,
257 ) -> Result<ProcessOutput, ReviewerError> {
258 runner.run(&self.invocation, prompt)
259 }
260
261 fn reviewer_config(&self) -> ReviewerConfig {
262 ReviewerConfig::new(
263 harness_slug(self.reviewer_harness),
264 self.reviewer_model.clone(),
265 self.allow_same_model,
266 )
267 }
268}
269
270#[derive(Clone, Debug, Eq, PartialEq)]
271pub struct InvocationPlan {
272 pub program: String,
273 pub args: Vec<String>,
274 pub prompt_delivery: PromptDelivery,
275}
276
277impl InvocationPlan {
278 pub fn for_harness(
279 harness: ReviewerHarness,
280 model: &str,
281 effort: Effort,
282 ) -> Result<Self, ReviewerError> {
283 validate_model_present("reviewer", model)?;
284 let model = model.trim();
285 let e = effort.as_str();
286
287 let plan = match harness {
291 ReviewerHarness::Claude => Self {
292 program: "claude".to_owned(),
293 args: vec![
294 "--print".to_owned(),
295 "--model".to_owned(),
296 model.to_owned(),
297 "--effort".to_owned(),
298 effort.claude_value().to_owned(),
300 ],
301 prompt_delivery: PromptDelivery::Stdin,
302 },
303 ReviewerHarness::Codex => Self {
304 program: "codex".to_owned(),
305 args: vec![
306 "exec".to_owned(),
307 "-m".to_owned(),
308 model.to_owned(),
309 "-c".to_owned(),
310 format!("model_reasoning_effort={e}"),
311 ],
312 prompt_delivery: PromptDelivery::PositionalArgument,
313 },
314 ReviewerHarness::Pi => Self {
315 program: "pi".to_owned(),
316 args: vec![
317 "--model".to_owned(),
318 model.to_owned(),
319 "--thinking".to_owned(),
320 e.to_owned(),
321 "--tools".to_owned(),
324 "read,grep,find,ls".to_owned(),
325 "-p".to_owned(),
326 ],
327 prompt_delivery: PromptDelivery::Stdin,
328 },
329 ReviewerHarness::Gemini => Self {
330 program: "gemini".to_owned(),
331 args: vec!["-m".to_owned(), model.to_owned()],
332 prompt_delivery: PromptDelivery::FlagValue("-p".to_owned()),
333 },
334 ReviewerHarness::Opencode => Self {
335 program: "opencode".to_owned(),
336 args: vec!["run".to_owned(), "--model".to_owned(), model.to_owned()],
337 prompt_delivery: PromptDelivery::PositionalArgument,
338 },
339 ReviewerHarness::Custom => return Err(ReviewerError::UnsupportedCustomHarness),
340 };
341
342 Ok(plan)
343 }
344
345 pub fn args_for_prompt(&self, prompt: &str) -> Vec<String> {
346 let mut args = self.args.clone();
347 match &self.prompt_delivery {
348 PromptDelivery::Stdin => {}
349 PromptDelivery::PositionalArgument => args.push(prompt.to_owned()),
350 PromptDelivery::FlagValue(flag) => {
351 args.push(flag.clone());
352 args.push(prompt.to_owned());
353 }
354 }
355 args
356 }
357}
358
359#[derive(Clone, Debug, Eq, PartialEq)]
360pub enum PromptDelivery {
361 Stdin,
362 PositionalArgument,
363 FlagValue(String),
364}
365
366#[derive(Clone, Debug, Eq, PartialEq)]
367pub struct ProcessOutput {
368 pub status_code: Option<i32>,
369 pub stdout: String,
370 pub stderr: String,
371}
372
373pub trait ProcessRunner {
374 fn run(
375 &self,
376 invocation: &InvocationPlan,
377 prompt: &str,
378 ) -> Result<ProcessOutput, ReviewerError>;
379}
380
381#[derive(Clone, Copy, Debug, Default)]
382pub struct StdProcessRunner;
383
384impl ProcessRunner for StdProcessRunner {
385 fn run(
386 &self,
387 invocation: &InvocationPlan,
388 prompt: &str,
389 ) -> Result<ProcessOutput, ReviewerError> {
390 let mut command = Command::new(&invocation.program);
391 command.args(invocation.args_for_prompt(prompt));
392 command.stdout(Stdio::piped()).stderr(Stdio::piped());
393
394 if invocation.prompt_delivery == PromptDelivery::Stdin {
395 command.stdin(Stdio::piped());
396 }
397
398 let mut child = command.spawn().map_err(ReviewerError::Spawn)?;
399 if invocation.prompt_delivery == PromptDelivery::Stdin {
400 let mut stdin = child.stdin.take().ok_or(ReviewerError::MissingStdinPipe)?;
401 stdin
402 .write_all(prompt.as_bytes())
403 .map_err(ReviewerError::WritePrompt)?;
404 }
405
406 let output = child.wait_with_output().map_err(ReviewerError::Wait)?;
407 Ok(ProcessOutput {
408 status_code: output.status.code(),
409 stdout: String::from_utf8_lossy(&output.stdout).into_owned(),
410 stderr: String::from_utf8_lossy(&output.stderr).into_owned(),
411 })
412 }
413}
414
415#[derive(Clone, Debug, Eq, PartialEq)]
416pub struct ReviewJob {
417 pub commit_sha: String,
418 pub claim: Claim,
419 pub diff: String,
420 pub context: String,
422 pub request: ReviewRequest,
423 pub strict: Option<StrictReviewConfig>,
424}
425
426#[derive(Clone, Debug, Eq, PartialEq)]
427pub struct StrictReviewConfig {
428 pub arbiter_harness: ReviewerHarness,
429 pub arbiter_model: String,
430 pub arbiter_effort: Effort,
431}
432
433#[derive(Clone, Debug, Eq, PartialEq)]
434pub struct ReviewExecution {
435 pub entries: Vec<LedgerEntry>,
436}
437
438pub fn execute_review_job<R: ProcessRunner>(
439 job: ReviewJob,
440 runner: &R,
441 store: &LedgerStore,
442) -> Result<ReviewExecution, ReviewerError> {
443 let first_plan = ReviewPlan::build(job.request.clone())?;
444 let first_output = first_plan.run_with(&job.request.prompt, runner)?;
445 ensure_process_success(&first_output)?;
446 let first_verdict = ParsedVerdict::parse(&first_output.stdout)?;
447 let first_entry = entry_from_verdict(&job, &first_plan, &first_verdict);
448 store.append_entry(&first_entry)?;
449
450 let mut entries = vec![first_entry];
451 if let Some(strict) = &job.strict
452 && first_verdict.verdict == Verdict::Pass
453 && first_verdict.findings.is_empty()
454 {
455 validate_strict_arbiter(&job.request, strict)?;
456 let strict_prompt = strict_second_pass_prompt(&job, &first_output.stdout);
457 let strict_request = ReviewRequest::new(
458 job.request.watched_agent,
459 job.request.watched_model.clone(),
460 strict.arbiter_harness,
461 strict.arbiter_model.clone(),
462 false,
463 strict_prompt,
464 )
465 .with_effort(strict.arbiter_effort);
466 let strict_plan = ReviewPlan::build(strict_request.clone())?;
467 let strict_output = strict_plan.run_with(&strict_request.prompt, runner)?;
468 ensure_process_success(&strict_output)?;
469 let strict_verdict = ParsedVerdict::parse(&strict_output.stdout)?;
470 let strict_entry = entry_from_verdict(&job, &strict_plan, &strict_verdict);
471 store.append_entry(&strict_entry)?;
472 entries.push(strict_entry);
473 }
474
475 Ok(ReviewExecution { entries })
476}
477
478#[derive(Clone, Debug, Eq, PartialEq)]
479pub struct ParsedVerdict {
480 pub verdict: Verdict,
481 pub summary: String,
482 pub findings: Vec<String>,
483 pub structured_findings: Vec<StructuredFinding>,
484 pub next_steps: Vec<String>,
485 pub raw: String,
486}
487
488impl ParsedVerdict {
489 pub fn parse(output: &str) -> Result<Self, ReviewerError> {
490 let parsed: ReviewerJsonOutput =
491 serde_json::from_str(output.trim()).map_err(|source| ReviewerError::VerdictJson {
492 source,
493 output: output.to_owned(),
494 })?;
495 parsed.validate()?;
496 let findings = parsed
497 .findings
498 .iter()
499 .map(StructuredFinding::display_line)
500 .collect();
501
502 Ok(Self {
503 verdict: parsed.verdict,
504 summary: parsed.summary,
505 findings,
506 structured_findings: parsed.findings,
507 next_steps: parsed.next_steps,
508 raw: output.to_owned(),
509 })
510 }
511}
512
513#[derive(Clone, Debug, Deserialize, Eq, PartialEq)]
514struct ReviewerJsonOutput {
515 verdict: Verdict,
516 summary: String,
517 #[serde(default)]
518 findings: Vec<StructuredFinding>,
519 #[serde(default)]
520 next_steps: Vec<String>,
521}
522
523impl ReviewerJsonOutput {
524 fn validate(&self) -> Result<(), ReviewerError> {
525 if self.summary.trim().is_empty() {
526 return Err(ReviewerError::VerdictSchema {
527 message: "summary must not be empty".to_owned(),
528 });
529 }
530
531 for finding in &self.findings {
532 if finding.title.trim().is_empty() {
533 return Err(ReviewerError::VerdictSchema {
534 message: "finding title must not be empty".to_owned(),
535 });
536 }
537 if finding.body.trim().is_empty() {
538 return Err(ReviewerError::VerdictSchema {
539 message: "finding body must not be empty".to_owned(),
540 });
541 }
542 if finding.file.trim().is_empty() {
543 return Err(ReviewerError::VerdictSchema {
544 message: "finding file must not be empty".to_owned(),
545 });
546 }
547 if finding.line_start == 0 || finding.line_end == 0 {
548 return Err(ReviewerError::VerdictSchema {
549 message: "finding lines must be one-based".to_owned(),
550 });
551 }
552 if finding.line_end < finding.line_start {
553 return Err(ReviewerError::VerdictSchema {
554 message: "finding line_end must be greater than or equal to line_start"
555 .to_owned(),
556 });
557 }
558 if finding.confidence > 100 {
559 return Err(ReviewerError::VerdictSchema {
560 message: "finding confidence must be between 0 and 100".to_owned(),
561 });
562 }
563 if finding.recommendation.trim().is_empty() {
564 return Err(ReviewerError::VerdictSchema {
565 message: "finding recommendation must not be empty".to_owned(),
566 });
567 }
568 }
569
570 if self.verdict == Verdict::Pass && !self.findings.is_empty() {
571 return Err(ReviewerError::VerdictSchema {
572 message: "PASS verdict must not include findings".to_owned(),
573 });
574 }
575 if self.verdict == Verdict::Reject && self.findings.is_empty() {
576 return Err(ReviewerError::VerdictSchema {
577 message: "REJECT verdict must include at least one finding".to_owned(),
578 });
579 }
580
581 Ok(())
582 }
583}
584
585#[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, Serialize)]
586#[serde(rename_all = "kebab-case")]
587pub enum ReviewRunStatus {
588 Queued,
589 Running,
590 Completed,
591 Failed,
592 Cancelled,
593}
594
595impl std::fmt::Display for ReviewRunStatus {
596 fn fmt(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
597 match self {
598 Self::Queued => formatter.write_str("queued"),
599 Self::Running => formatter.write_str("running"),
600 Self::Completed => formatter.write_str("completed"),
601 Self::Failed => formatter.write_str("failed"),
602 Self::Cancelled => formatter.write_str("cancelled"),
603 }
604 }
605}
606
607#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
608pub struct ReviewRun {
609 pub id: String,
610 pub commit_sha: String,
611 pub target: String,
612 pub status: ReviewRunStatus,
613 pub phase: String,
614 pub ledger_entries: usize,
615 pub error: Option<String>,
616 pub created_at_unix: u64,
617 pub updated_at_unix: u64,
618 pub started_at_unix: Option<u64>,
619 pub completed_at_unix: Option<u64>,
620}
621
622impl ReviewRun {
623 fn queued(
624 id: impl Into<String>,
625 commit_sha: impl Into<String>,
626 target: impl Into<String>,
627 ) -> Self {
628 let timestamp = unix_now();
629 Self {
630 id: id.into(),
631 commit_sha: commit_sha.into(),
632 target: target.into(),
633 status: ReviewRunStatus::Queued,
634 phase: "queued".to_owned(),
635 ledger_entries: 0,
636 error: None,
637 created_at_unix: timestamp,
638 updated_at_unix: timestamp,
639 started_at_unix: None,
640 completed_at_unix: None,
641 }
642 }
643
644 fn mark_running(&mut self, phase: impl Into<String>) {
645 let timestamp = unix_now();
646 self.status = ReviewRunStatus::Running;
647 self.phase = phase.into();
648 self.error = None;
649 self.updated_at_unix = timestamp;
650 self.started_at_unix = Some(timestamp);
651 self.completed_at_unix = None;
652 }
653
654 fn mark_completed(&mut self, ledger_entries: usize) {
655 let timestamp = unix_now();
656 self.status = ReviewRunStatus::Completed;
657 self.phase = "completed".to_owned();
658 self.ledger_entries = ledger_entries;
659 self.error = None;
660 self.updated_at_unix = timestamp;
661 self.completed_at_unix = Some(timestamp);
662 }
663
664 fn mark_failed(&mut self, error: impl Into<String>) {
665 let timestamp = unix_now();
666 self.status = ReviewRunStatus::Failed;
667 self.phase = "failed".to_owned();
668 self.error = Some(error.into());
669 self.updated_at_unix = timestamp;
670 self.completed_at_unix = Some(timestamp);
671 }
672
673 fn mark_cancelled(&mut self) {
674 let timestamp = unix_now();
675 self.status = ReviewRunStatus::Cancelled;
676 self.phase = "cancelled".to_owned();
677 self.error = None;
678 self.updated_at_unix = timestamp;
679 self.completed_at_unix = Some(timestamp);
680 }
681}
682
683#[derive(Clone, Debug)]
684pub struct ReviewRunStore {
685 root: PathBuf,
686}
687
688impl ReviewRunStore {
689 pub fn new(root: impl Into<PathBuf>) -> Self {
690 Self { root: root.into() }
691 }
692
693 pub fn runs_dir(&self) -> PathBuf {
694 self.root.join(REVIEW_RUNS_DIR)
695 }
696
697 pub fn path(&self, id: &str) -> PathBuf {
698 self.runs_dir().join(format!("{id}.json"))
699 }
700
701 pub fn create_queued(
702 &self,
703 commit_sha: &str,
704 target: impl Into<String>,
705 ) -> Result<ReviewRun, ReviewerError> {
706 let run = ReviewRun::queued(generate_run_id(commit_sha), commit_sha, target);
707 self.write(&run)?;
708 Ok(run)
709 }
710
711 fn ensure_queued(
712 &self,
713 run_id: &str,
714 commit_sha: &str,
715 target: &str,
716 ) -> Result<ReviewRun, ReviewerError> {
717 match self.read(run_id) {
718 Ok(run) => Ok(run),
719 Err(ReviewerError::ReviewRunNotFound { .. }) => {
720 let run = ReviewRun::queued(run_id, commit_sha, target);
721 self.write(&run)?;
722 Ok(run)
723 }
724 Err(error) => Err(error),
725 }
726 }
727
728 pub fn read(&self, id: &str) -> Result<ReviewRun, ReviewerError> {
729 let path = self.path(id);
730 let contents = fs::read_to_string(&path).map_err(|source| match source.kind() {
731 io::ErrorKind::NotFound => ReviewerError::ReviewRunNotFound { id: id.to_owned() },
732 _ => ReviewerError::RunIo(source),
733 })?;
734 serde_json::from_str(&contents).map_err(ReviewerError::RunJson)
735 }
736
737 pub fn list(&self) -> Result<Vec<ReviewRun>, ReviewerError> {
738 let dir = self.runs_dir();
739 let entries = match fs::read_dir(&dir) {
740 Ok(entries) => entries,
741 Err(error) if error.kind() == io::ErrorKind::NotFound => return Ok(Vec::new()),
742 Err(error) => return Err(ReviewerError::RunIo(error)),
743 };
744 let mut runs: Vec<ReviewRun> = Vec::new();
745 for entry in entries {
746 let entry = entry.map_err(ReviewerError::RunIo)?;
747 if entry
748 .path()
749 .extension()
750 .is_none_or(|extension| extension != "json")
751 {
752 continue;
753 }
754 let contents = fs::read_to_string(entry.path()).map_err(ReviewerError::RunIo)?;
755 runs.push(serde_json::from_str(&contents).map_err(ReviewerError::RunJson)?);
756 }
757 runs.sort_by(|left, right| {
758 right
759 .updated_at_unix
760 .cmp(&left.updated_at_unix)
761 .then_with(|| right.id.cmp(&left.id))
762 });
763 Ok(runs)
764 }
765
766 pub fn latest_result(&self) -> Result<ReviewRun, ReviewerError> {
767 self.list()?
768 .into_iter()
769 .find(|run| {
770 matches!(
771 run.status,
772 ReviewRunStatus::Completed
773 | ReviewRunStatus::Failed
774 | ReviewRunStatus::Cancelled
775 )
776 })
777 .ok_or(ReviewerError::NoReviewRuns)
778 }
779
780 pub fn mark_running(&self, id: &str, phase: &str) -> Result<ReviewRun, ReviewerError> {
781 let mut run = self.read(id)?;
782 run.mark_running(phase);
783 self.write(&run)?;
784 Ok(run)
785 }
786
787 pub fn mark_completed(
788 &self,
789 id: &str,
790 ledger_entries: usize,
791 ) -> Result<ReviewRun, ReviewerError> {
792 let mut run = self.read(id)?;
793 run.mark_completed(ledger_entries);
794 self.write(&run)?;
795 Ok(run)
796 }
797
798 pub fn mark_failed(
799 &self,
800 id: &str,
801 error: impl Into<String>,
802 ) -> Result<ReviewRun, ReviewerError> {
803 let mut run = self.read(id)?;
804 run.mark_failed(error);
805 self.write(&run)?;
806 Ok(run)
807 }
808
809 pub fn cancel_queued(&self, id: &str) -> Result<ReviewRun, ReviewerError> {
810 let mut run = self.read(id)?;
811 if run.status != ReviewRunStatus::Queued {
812 return Err(ReviewerError::CannotCancelReview {
813 id: id.to_owned(),
814 status: run.status,
815 });
816 }
817 run.mark_cancelled();
818 self.write(&run)?;
819 Ok(run)
820 }
821
822 fn write(&self, run: &ReviewRun) -> Result<(), ReviewerError> {
823 fs::create_dir_all(self.runs_dir()).map_err(ReviewerError::RunIo)?;
824 let bytes = serde_json::to_vec_pretty(run).map_err(ReviewerError::RunJson)?;
825 fs::write(self.path(&run.id), bytes).map_err(ReviewerError::RunIo)
826 }
827}
828
829#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
830pub struct QueuedReview {
831 #[serde(default)]
832 pub run_id: String,
833 pub commit_sha: String,
834 pub enqueued_at_unix: u64,
835}
836
837#[derive(Clone, Debug)]
838pub struct ReviewQueue {
839 root: PathBuf,
840}
841
842impl ReviewQueue {
843 pub fn new(root: impl Into<PathBuf>) -> Self {
844 Self { root: root.into() }
845 }
846
847 pub fn path(&self) -> PathBuf {
848 self.root.join(REVIEW_QUEUE_FILE)
849 }
850
851 pub fn enqueue(&self, commit_sha: impl Into<String>) -> Result<QueuedReview, ReviewerError> {
852 fs::create_dir_all(&self.root).map_err(ReviewerError::QueueIo)?;
853 let commit_sha = commit_sha.into();
854 let run = ReviewRunStore::new(&self.root).create_queued(&commit_sha, "commit")?;
855 let item = QueuedReview {
856 run_id: run.id,
857 commit_sha,
858 enqueued_at_unix: unix_now(),
859 };
860 let mut file = fs::OpenOptions::new()
861 .create(true)
862 .append(true)
863 .open(self.path())
864 .map_err(ReviewerError::QueueIo)?;
865 serde_json::to_writer(&mut file, &item).map_err(ReviewerError::QueueJson)?;
866 writeln!(file).map_err(ReviewerError::QueueIo)?;
867 Ok(item)
868 }
869
870 pub fn pending(&self) -> Result<Vec<QueuedReview>, ReviewerError> {
871 let contents = match fs::read_to_string(self.path()) {
872 Ok(contents) => contents,
873 Err(error) if error.kind() == io::ErrorKind::NotFound => return Ok(Vec::new()),
874 Err(error) => return Err(ReviewerError::QueueIo(error)),
875 };
876
877 contents
878 .lines()
879 .filter(|line| !line.trim().is_empty())
880 .map(|line| serde_json::from_str(line).map_err(ReviewerError::QueueJson))
881 .collect()
882 }
883
884 pub fn remove_sha(&self, sha: &str) -> Result<(), ReviewerError> {
887 let remaining: Vec<QueuedReview> = self
888 .pending()?
889 .into_iter()
890 .filter(|item| item.commit_sha != sha)
891 .collect();
892 self.rewrite(&remaining)
893 }
894
895 fn rewrite(&self, items: &[QueuedReview]) -> Result<(), ReviewerError> {
896 if items.is_empty() {
897 return match fs::remove_file(self.path()) {
898 Ok(()) => Ok(()),
899 Err(error) if error.kind() == io::ErrorKind::NotFound => Ok(()),
900 Err(error) => Err(ReviewerError::QueueIo(error)),
901 };
902 }
903
904 let mut file = fs::File::create(self.path()).map_err(ReviewerError::QueueIo)?;
905 for item in items {
906 serde_json::to_writer(&mut file, item).map_err(ReviewerError::QueueJson)?;
907 writeln!(file).map_err(ReviewerError::QueueIo)?;
908 }
909 Ok(())
910 }
911
912 pub fn remove_run_id(&self, run_id: &str) -> Result<(), ReviewerError> {
913 let remaining: Vec<QueuedReview> = self
914 .pending()?
915 .into_iter()
916 .filter(|item| item.run_id != run_id)
917 .collect();
918 self.rewrite(&remaining)
919 }
920}
921
922pub trait MaterialLoader {
925 fn load(&self, sha: &str) -> Result<(Claim, String), ReviewerError>;
926}
927
928#[derive(Clone, Debug, Default)]
929pub struct GitMaterialLoader {
930 pub evidence_patterns: Vec<String>,
933}
934
935impl GitMaterialLoader {
936 pub fn with_patterns(evidence_patterns: Vec<String>) -> Self {
937 Self { evidence_patterns }
938 }
939}
940
941impl MaterialLoader for GitMaterialLoader {
942 fn load(&self, sha: &str) -> Result<(Claim, String), ReviewerError> {
943 let message = git_output(["show", "--format=%B", "--no-patch", sha])?;
944 let diff = git_output(["show", "--format=", "--patch", sha])?;
945 let claim = if self.evidence_patterns.is_empty() {
946 Claim::parse(&message)?
947 } else {
948 Claim::parse_with(&message, &self.evidence_patterns)?
949 };
950 Ok((claim, diff))
951 }
952}
953
954#[derive(Clone, Debug, Default, Eq, PartialEq)]
955pub struct DrainReport {
956 pub reviewed: Vec<String>,
957 pub ledger_entries: usize,
958}
959
960pub fn drain_once<R: ProcessRunner, L: MaterialLoader>(
964 queue: &ReviewQueue,
965 loader: &L,
966 selection: &ReviewSelection,
967 context: &str,
968 runner: &R,
969 store: &LedgerStore,
970) -> Result<DrainReport, ReviewerError> {
971 let pending = queue.pending()?;
972 let run_store = ReviewRunStore::new(&queue.root);
973 let mut seen = std::collections::BTreeSet::new();
974 let mut order = Vec::new();
975 for item in &pending {
976 if seen.insert(item.commit_sha.clone()) {
977 order.push(item.clone());
978 } else if !item.run_id.trim().is_empty()
979 && let Ok(run) = run_store.read(&item.run_id)
980 && run.status == ReviewRunStatus::Queued
981 {
982 run_store.cancel_queued(&item.run_id)?;
983 }
984 }
985
986 let mut report = DrainReport::default();
987 for item in order {
988 let sha = item.commit_sha;
989 let run_id = if item.run_id.trim().is_empty() {
990 generate_run_id(&sha)
991 } else {
992 item.run_id
993 };
994 let run = run_store.ensure_queued(&run_id, &sha, "commit")?;
995 if run.status == ReviewRunStatus::Cancelled {
996 queue.remove_sha(&sha)?;
997 continue;
998 }
999 run_store.mark_running(&run_id, "reviewing")?;
1000 let (claim, diff) = loader.load(&sha)?;
1001 let prompt = first_pass_prompt(&claim, &diff, context);
1002 let job = ReviewJob {
1003 commit_sha: sha.clone(),
1004 claim,
1005 diff,
1006 context: context.to_owned(),
1007 request: selection.request_for(prompt),
1008 strict: selection.strict.clone(),
1009 };
1010 let execution = match execute_review_job(job, runner, store) {
1011 Ok(execution) => execution,
1012 Err(error) => {
1013 let _ = run_store.mark_failed(&run_id, error.to_string());
1014 return Err(error);
1015 }
1016 };
1017 report.ledger_entries += execution.entries.len();
1018 run_store.mark_completed(&run_id, execution.entries.len())?;
1019 queue.remove_sha(&sha)?;
1020 report.reviewed.push(sha);
1021 }
1022
1023 Ok(report)
1024}
1025
1026fn review_context(config: &config::TruthMirrorConfig) -> String {
1029 let repo_root = match git_output(["rev-parse", "--show-toplevel"]) {
1030 Ok(root) => PathBuf::from(root.trim()),
1031 Err(_) => return String::new(),
1032 };
1033 let provider = crate::context::trajectory_provider(&repo_root, &config.history);
1034 crate::context::build_review_context(
1035 &repo_root,
1036 &config.ground_truth,
1037 &config.history,
1038 Some(provider.as_ref()),
1039 )
1040 .unwrap_or_default()
1041}
1042
1043pub fn run_watch_command(
1044 args: cli::WatchArgs,
1045 state_dir: &Path,
1046 config: &config::TruthMirrorConfig,
1047) -> Result<ExitCode> {
1048 let selection = ReviewSelection::resolve(
1049 args.watched_agent,
1050 args.watched_model,
1051 args.reviewer_harness,
1052 args.reviewer_model,
1053 args.reviewer_effort,
1054 args.allow_same_model,
1055 config,
1056 )?;
1057 let queue = ReviewQueue::new(state_dir);
1058 let store = LedgerStore::new(state_dir);
1059 let loader = GitMaterialLoader::with_patterns(config.gates.to_policy().evidence_patterns);
1060 let runner = StdProcessRunner;
1061
1062 if args.once {
1063 let context = review_context(config);
1064 let report = drain_once(&queue, &loader, &selection, &context, &runner, &store)?;
1065 println!(
1066 "truth-mirror watch: reviewed {} commit(s), wrote {} ledger entrie(s)",
1067 report.reviewed.len(),
1068 report.ledger_entries
1069 );
1070 return Ok(ExitCode::SUCCESS);
1071 }
1072
1073 let interval = std::time::Duration::from_secs(args.poll_secs.max(1));
1074 loop {
1075 let context = review_context(config);
1077 let report = drain_once(&queue, &loader, &selection, &context, &runner, &store)?;
1078 if !report.reviewed.is_empty() {
1079 println!(
1080 "truth-mirror watch: reviewed {} commit(s)",
1081 report.reviewed.len()
1082 );
1083 }
1084 std::thread::sleep(interval);
1085 }
1086}
1087
1088#[derive(Clone, Copy, Debug, Eq, PartialEq)]
1089pub struct StrictGoalPolicy {
1090 pub stop_after_lies: u32,
1091 pub stop_after_fuckups: u32,
1092}
1093
1094#[derive(Clone, Copy, Debug, Eq, PartialEq)]
1095pub struct StrictGoalCounters {
1096 pub lies_exposed: u32,
1097 pub fuckups_registered: u32,
1098}
1099
1100#[derive(Clone, Copy, Debug, Eq, PartialEq)]
1101pub enum StrictGoalDecision {
1102 Continue,
1103 Stop { reason: StrictGoalStopReason },
1104}
1105
1106#[derive(Clone, Copy, Debug, Eq, PartialEq)]
1107pub enum StrictGoalStopReason {
1108 LiesExposed,
1109 FuckupsRegistered,
1110}
1111
1112impl StrictGoalPolicy {
1113 pub fn decide(&self, counters: StrictGoalCounters) -> StrictGoalDecision {
1114 if self.stop_after_lies > 0 && counters.lies_exposed >= self.stop_after_lies {
1115 return StrictGoalDecision::Stop {
1116 reason: StrictGoalStopReason::LiesExposed,
1117 };
1118 }
1119
1120 if self.stop_after_fuckups > 0 && counters.fuckups_registered >= self.stop_after_fuckups {
1121 return StrictGoalDecision::Stop {
1122 reason: StrictGoalStopReason::FuckupsRegistered,
1123 };
1124 }
1125
1126 StrictGoalDecision::Continue
1127 }
1128}
1129
1130#[derive(Clone, Debug, Eq, PartialEq)]
1131pub struct StrictGoalOutcome {
1132 pub passes: u32,
1133 pub counters: StrictGoalCounters,
1134 pub stop_reason: Option<StrictGoalStopReason>,
1137 pub entries: Vec<LedgerEntry>,
1138}
1139
1140impl StrictGoalOutcome {
1141 pub fn stop_reason_suffix(&self) -> &'static str {
1142 match self.stop_reason {
1143 Some(StrictGoalStopReason::LiesExposed) => " (stopped: lies exposed)",
1144 Some(StrictGoalStopReason::FuckupsRegistered) => " (stopped: fuckups registered)",
1145 None => " (stopped: max passes)",
1146 }
1147 }
1148}
1149
1150#[allow(clippy::too_many_arguments)]
1155pub fn run_strict_goal_loop<R: ProcessRunner>(
1156 commit_sha: &str,
1157 claim: &Claim,
1158 diff: &str,
1159 context: &str,
1160 selection: &ReviewSelection,
1161 policy: StrictGoalPolicy,
1162 max_passes: u32,
1163 runner: &R,
1164 store: &LedgerStore,
1165) -> Result<StrictGoalOutcome, ReviewerError> {
1166 let ceiling = max_passes.max(1);
1167 let mut outcome = StrictGoalOutcome {
1168 passes: 0,
1169 counters: StrictGoalCounters {
1170 lies_exposed: 0,
1171 fuckups_registered: 0,
1172 },
1173 stop_reason: None,
1174 entries: Vec::new(),
1175 };
1176
1177 while outcome.passes < ceiling {
1178 let prompt = strict_goal_prompt(claim, diff, context, outcome.passes + 1, &outcome.entries);
1179 let request = selection.request_for(prompt);
1180 let plan = ReviewPlan::build(request.clone())?;
1181 let output = plan.run_with(&request.prompt, runner)?;
1182 ensure_process_success(&output)?;
1183 let verdict = ParsedVerdict::parse(&output.stdout)?;
1184
1185 let job = ReviewJob {
1186 commit_sha: commit_sha.to_owned(),
1187 claim: claim.clone(),
1188 diff: diff.to_owned(),
1189 context: context.to_owned(),
1190 request,
1191 strict: None,
1192 };
1193 let entry = entry_from_verdict(&job, &plan, &verdict);
1194 store.append_entry(&entry)?;
1195 outcome.entries.push(entry);
1196
1197 outcome.passes += 1;
1198 if verdict.verdict == Verdict::Reject {
1199 outcome.counters.lies_exposed += 1;
1200 }
1201 outcome.counters.fuckups_registered = outcome
1202 .counters
1203 .fuckups_registered
1204 .saturating_add(u32::try_from(verdict.findings.len()).unwrap_or(u32::MAX));
1205
1206 if let StrictGoalDecision::Stop { reason } = policy.decide(outcome.counters) {
1207 outcome.stop_reason = Some(reason);
1208 break;
1209 }
1210 }
1211
1212 Ok(outcome)
1213}
1214
1215fn strict_goal_prompt(
1216 claim: &Claim,
1217 diff: &str,
1218 context: &str,
1219 pass: u32,
1220 prior: &[LedgerEntry],
1221) -> String {
1222 let prior_findings: Vec<String> = prior
1223 .iter()
1224 .flat_map(|entry| entry.findings.clone())
1225 .collect();
1226 let prior_block = if prior_findings.is_empty() {
1227 "(none)".to_owned()
1228 } else {
1229 prior_findings.join("\n")
1230 };
1231 format!(
1232 "{ADVERSARIAL_PREAMBLE}\n\nStrict-goal loop, pass {pass}. Keep hunting for any lie the claim hides; do not repeat prior findings verbatim.{}\n\nCLAIM:\n{}\n\nPRIOR FINDINGS:\n{prior_block}\n\nDIFF:\n{}",
1233 context_block(context),
1234 claim.to_line(),
1235 diff
1236 )
1237}
1238
1239pub fn run_review_command(
1240 args: cli::ReviewArgs,
1241 state_dir: &Path,
1242 config: &config::TruthMirrorConfig,
1243) -> Result<ExitCode> {
1244 if let Some(command) = args.command {
1245 return run_review_run_command(command, state_dir);
1246 }
1247
1248 let material = ReviewMaterial::load(
1249 &args,
1250 state_dir,
1251 &config.gates.to_policy().evidence_patterns,
1252 )?;
1253
1254 let mut selection = ReviewSelection::resolve(
1255 args.watched_agent,
1256 args.watched_model,
1257 args.reviewer_harness,
1258 args.reviewer_model,
1259 args.reviewer_effort,
1260 args.allow_same_model,
1261 config,
1262 )?;
1263
1264 if args.strict_two_pass {
1265 selection.strict = Some(ReviewSelection::resolve_arbiter(
1266 selection.watched_agent,
1267 args.arbiter_harness,
1268 args.arbiter_model,
1269 args.arbiter_effort,
1270 config,
1271 )?);
1272 }
1273 let store = LedgerStore::new(state_dir);
1274 let run_store = ReviewRunStore::new(state_dir);
1275 let context = review_context(config);
1276 let run = run_store.create_queued(&material.commit_sha, material.target_label.clone())?;
1277 run_store.mark_running(&run.id, "reviewing")?;
1278
1279 if args.strict_goal {
1280 let policy = config
1281 .strict
1282 .goal_policy(args.stop_after_lies, args.stop_after_fuckups);
1283 let max_passes = args.max_passes.unwrap_or(config.strict.max_passes);
1284 let outcome = match run_strict_goal_loop(
1285 &material.commit_sha,
1286 &material.claim,
1287 &material.diff,
1288 &context,
1289 &selection,
1290 policy,
1291 max_passes,
1292 &StdProcessRunner,
1293 &store,
1294 ) {
1295 Ok(outcome) => outcome,
1296 Err(error) => {
1297 let _ = run_store.mark_failed(&run.id, error.to_string());
1298 return Err(error.into());
1299 }
1300 };
1301 run_store.mark_completed(&run.id, outcome.entries.len())?;
1302 println!(
1303 "truth-mirror strict-goal: run {}, {} pass(es), {} lie(s), {} fuckup(s){}",
1304 run.id,
1305 outcome.passes,
1306 outcome.counters.lies_exposed,
1307 outcome.counters.fuckups_registered,
1308 outcome.stop_reason_suffix(),
1309 );
1310 return Ok(ExitCode::SUCCESS);
1311 }
1312
1313 let prompt = first_pass_prompt(&material.claim, &material.diff, &context);
1314 let job = ReviewJob {
1315 commit_sha: material.commit_sha,
1316 claim: material.claim,
1317 diff: material.diff,
1318 context,
1319 request: selection.request_for(prompt),
1320 strict: selection.strict.clone(),
1321 };
1322
1323 let execution = match execute_review_job(job, &StdProcessRunner, &store) {
1324 Ok(execution) => execution,
1325 Err(error) => {
1326 let _ = run_store.mark_failed(&run.id, error.to_string());
1327 return Err(error.into());
1328 }
1329 };
1330 run_store.mark_completed(&run.id, execution.entries.len())?;
1331 println!(
1332 "truth-mirror review: run {}, wrote {} ledger entrie(s)",
1333 run.id,
1334 execution.entries.len()
1335 );
1336 Ok(ExitCode::SUCCESS)
1337}
1338
1339fn run_review_run_command(command: cli::ReviewCommand, state_dir: &Path) -> Result<ExitCode> {
1340 let runs = ReviewRunStore::new(state_dir);
1341 match command {
1342 cli::ReviewCommand::Status { run_id } => {
1343 if let Some(run_id) = run_id {
1344 print_run(&runs.read(&run_id)?);
1345 } else {
1346 let all = runs.list()?;
1347 if all.is_empty() {
1348 println!("No review runs.");
1349 } else {
1350 for run in all {
1351 print_run_summary(&run);
1352 }
1353 }
1354 }
1355 }
1356 cli::ReviewCommand::Result { run_id } => {
1357 let run = match run_id {
1358 Some(run_id) => runs.read(&run_id)?,
1359 None => runs.latest_result()?,
1360 };
1361 print_run(&run);
1362 print_run_ledger_entries(state_dir, &run)?;
1363 }
1364 cli::ReviewCommand::Cancel { run_id } => {
1365 let run = runs.cancel_queued(&run_id)?;
1366 ReviewQueue::new(state_dir).remove_run_id(&run_id)?;
1367 println!("cancelled review run {} ({})", run.id, run.commit_sha);
1368 }
1369 }
1370 Ok(ExitCode::SUCCESS)
1371}
1372
1373fn print_run_summary(run: &ReviewRun) {
1374 println!(
1375 "{} {} {} {} entries={} updated={}",
1376 run.id, run.status, run.commit_sha, run.phase, run.ledger_entries, run.updated_at_unix
1377 );
1378}
1379
1380fn print_run(run: &ReviewRun) {
1381 println!("run: {}", run.id);
1382 println!("status: {}", run.status);
1383 println!("commit: {}", run.commit_sha);
1384 println!("target: {}", run.target);
1385 println!("phase: {}", run.phase);
1386 println!("ledger_entries: {}", run.ledger_entries);
1387 println!("created_at_unix: {}", run.created_at_unix);
1388 println!("updated_at_unix: {}", run.updated_at_unix);
1389 if let Some(started) = run.started_at_unix {
1390 println!("started_at_unix: {started}");
1391 }
1392 if let Some(completed) = run.completed_at_unix {
1393 println!("completed_at_unix: {completed}");
1394 }
1395 if let Some(error) = &run.error {
1396 println!("error: {error}");
1397 }
1398}
1399
1400fn print_run_ledger_entries(state_dir: &Path, run: &ReviewRun) -> Result<(), ReviewerError> {
1401 let store = LedgerStore::new(state_dir);
1402 let entries: Vec<LedgerEntry> = store
1403 .read_history()?
1404 .into_iter()
1405 .filter(|entry| entry.commit_sha == run.commit_sha)
1406 .collect();
1407 if entries.is_empty() {
1408 println!("ledger_entries: none");
1409 return Ok(());
1410 }
1411 println!("ledger_entries:");
1412 for entry in entries {
1413 println!(
1414 "- {} {} {} findings={}",
1415 entry.commit_sha,
1416 entry.verdict,
1417 entry.disposition,
1418 entry.findings.len()
1419 );
1420 }
1421 Ok(())
1422}
1423
1424#[derive(Clone, Debug, Eq, PartialEq)]
1425struct ReviewMaterial {
1426 commit_sha: String,
1427 target_label: String,
1428 claim: Claim,
1429 diff: String,
1430}
1431
1432impl ReviewMaterial {
1433 fn load(
1434 args: &cli::ReviewArgs,
1435 state_dir: &Path,
1436 evidence_patterns: &[String],
1437 ) -> Result<Self, ReviewerError> {
1438 let parse = |text: &str| {
1439 if evidence_patterns.is_empty() {
1440 Claim::parse(text)
1441 } else {
1442 Claim::parse_with(text, evidence_patterns)
1443 }
1444 };
1445
1446 let scope = if args.staged {
1447 ReviewScope::Staged
1448 } else {
1449 args.scope
1450 };
1451
1452 match scope {
1453 ReviewScope::Commit => {
1454 let sha = args
1455 .target
1456 .clone()
1457 .ok_or(ReviewerError::MissingReviewTarget)?;
1458 let message = git_output(["show", "--format=%B", "--no-patch", sha.as_str()])?;
1459 let diff = git_output(["show", "--format=", "--patch", sha.as_str()])?;
1460 let claim = parse(&message)?;
1461 Ok(Self {
1462 commit_sha: sha.clone(),
1463 target_label: format!("commit:{sha}"),
1464 claim,
1465 diff,
1466 })
1467 }
1468 ReviewScope::Staged => Self::load_staged(state_dir, &parse),
1469 ReviewScope::Auto => {
1470 reject_target_with_scope(args)?;
1471 if working_tree_dirty()? {
1472 Self::load_working_tree(state_dir, &parse)
1473 } else {
1474 Self::load_branch(args.base.as_deref(), &parse)
1475 }
1476 }
1477 ReviewScope::WorkingTree => {
1478 reject_target_with_scope(args)?;
1479 Self::load_working_tree(state_dir, &parse)
1480 }
1481 ReviewScope::Branch => {
1482 reject_target_with_scope(args)?;
1483 Self::load_branch(args.base.as_deref(), &parse)
1484 }
1485 }
1486 }
1487
1488 fn load_staged<F>(state_dir: &Path, parse: &F) -> Result<Self, ReviewerError>
1489 where
1490 F: Fn(&str) -> Result<Claim, crate::claim::ClaimError>,
1491 {
1492 let raw = git_output(["diff", "--cached"])?;
1493 let files = git_output(["diff", "--cached", "--name-only"])?;
1494 let diff = materialize_diff("staged", &raw, &files);
1495 let claim = parse(&read_claim_file(state_dir)?)?;
1496 Ok(Self {
1497 commit_sha: "STAGED".to_owned(),
1498 target_label: "staged".to_owned(),
1499 claim,
1500 diff,
1501 })
1502 }
1503
1504 fn load_working_tree<F>(state_dir: &Path, parse: &F) -> Result<Self, ReviewerError>
1505 where
1506 F: Fn(&str) -> Result<Claim, crate::claim::ClaimError>,
1507 {
1508 let status = git_output(["status", "--porcelain"])?;
1509 let tracked = git_output(["diff", "HEAD", "--patch"])?;
1510 let files = git_output(["diff", "HEAD", "--name-only"])?;
1511 let untracked = untracked_file_context()?;
1512 let raw = format!(
1513 "WORKING TREE STATUS:\n{status}\n\nTRACKED DIFF AGAINST HEAD:\n{tracked}\n\nUNTRACKED FILES:\n{untracked}"
1514 );
1515 let diff = materialize_diff("working-tree", &raw, &files);
1516 let claim = parse(&read_claim_file(state_dir)?)?;
1517 Ok(Self {
1518 commit_sha: "WORKING_TREE".to_owned(),
1519 target_label: "working-tree".to_owned(),
1520 claim,
1521 diff,
1522 })
1523 }
1524
1525 fn load_branch<F>(base: Option<&str>, parse: &F) -> Result<Self, ReviewerError>
1526 where
1527 F: Fn(&str) -> Result<Claim, crate::claim::ClaimError>,
1528 {
1529 let base = match base {
1530 Some(base) => base.to_owned(),
1531 None => default_branch_ref()?,
1532 };
1533 let merge_base = git_output_slice(&["merge-base", "HEAD", &base])?;
1534 let merge_base = merge_base.trim().to_owned();
1535 let range = format!("{merge_base}..HEAD");
1536 let message = git_output(["show", "--format=%B", "--no-patch", "HEAD"])?;
1537 let log = git_output_slice(&["log", "--oneline", &range])?;
1538 let stat = git_output_slice(&["diff", "--stat", &range])?;
1539 let raw_patch = git_output_slice(&["diff", "--patch", &range])?;
1540 let files = git_output_slice(&["diff", "--name-only", &range])?;
1541 let raw = format!(
1542 "BRANCH BASE: {base}\nMERGE BASE: {merge_base}\nCOMMITS:\n{log}\n\nDIFF STAT:\n{stat}\n\nDIFF:\n{raw_patch}"
1543 );
1544 let diff = materialize_diff(&format!("branch:{base}"), &raw, &files);
1545 let claim = parse(&message)?;
1546 Ok(Self {
1547 commit_sha: "HEAD".to_owned(),
1548 target_label: format!("branch:{base}"),
1549 claim,
1550 diff,
1551 })
1552 }
1553}
1554
1555fn reject_target_with_scope(args: &cli::ReviewArgs) -> Result<(), ReviewerError> {
1556 if let Some(target) = &args.target {
1557 return Err(ReviewerError::UnexpectedReviewTarget {
1558 scope: args.scope,
1559 target: target.clone(),
1560 });
1561 }
1562 Ok(())
1563}
1564
1565fn read_claim_file(state_dir: &Path) -> Result<String, ReviewerError> {
1566 let claim_path = state_dir.join("claim.txt");
1567 fs::read_to_string(&claim_path).map_err(|source| ReviewerError::ClaimFileRead {
1568 path: claim_path,
1569 source,
1570 })
1571}
1572
1573fn working_tree_dirty() -> Result<bool, ReviewerError> {
1574 Ok(!git_output(["status", "--porcelain"])?.trim().is_empty())
1575}
1576
1577fn default_branch_ref() -> Result<String, ReviewerError> {
1578 if let Ok(symbolic) = git_output([
1579 "symbolic-ref",
1580 "--quiet",
1581 "--short",
1582 "refs/remotes/origin/HEAD",
1583 ]) {
1584 let trimmed = symbolic.trim();
1585 if !trimmed.is_empty() {
1586 return Ok(trimmed.to_owned());
1587 }
1588 }
1589
1590 for candidate in [
1591 "origin/main",
1592 "origin/master",
1593 "origin/trunk",
1594 "main",
1595 "master",
1596 "trunk",
1597 ] {
1598 if git_output_slice(&["rev-parse", "--verify", "--quiet", candidate]).is_ok() {
1599 return Ok(candidate.to_owned());
1600 }
1601 }
1602
1603 Err(ReviewerError::DefaultBranchNotFound)
1604}
1605
1606fn materialize_diff(label: &str, raw: &str, files: &str) -> String {
1607 let file_list: Vec<&str> = files
1608 .lines()
1609 .filter(|line| !line.trim().is_empty())
1610 .collect();
1611 let bytes = raw.len();
1612 if bytes <= MAX_INLINE_DIFF_BYTES && file_list.len() <= MAX_INLINE_DIFF_FILES {
1613 return raw.to_owned();
1614 }
1615
1616 format!(
1617 "Diff for {label} is too large to inline safely.\ninline_limit_bytes={MAX_INLINE_DIFF_BYTES}\nactual_bytes={bytes}\ninline_file_limit={MAX_INLINE_DIFF_FILES}\nactual_files={}\n\nChanged files:\n{}\n\nReviewer must inspect the repository directly with read/grep tools before returning a verdict.",
1618 file_list.len(),
1619 if file_list.is_empty() {
1620 "(none)".to_owned()
1621 } else {
1622 file_list.join("\n")
1623 }
1624 )
1625}
1626
1627fn untracked_file_context() -> Result<String, ReviewerError> {
1628 let files = git_output(["ls-files", "--others", "--exclude-standard"])?;
1629 let mut output = String::new();
1630 for file in files.lines().filter(|line| !line.trim().is_empty()) {
1631 let path = Path::new(file);
1632 let metadata = match fs::metadata(path) {
1633 Ok(metadata) => metadata,
1634 Err(_) => continue,
1635 };
1636 if !metadata.is_file() {
1637 continue;
1638 }
1639 if metadata.len() > MAX_UNTRACKED_FILE_BYTES {
1640 output.push_str(&format!(
1641 "\n--- {file} omitted: {} bytes exceeds {MAX_UNTRACKED_FILE_BYTES} byte inline limit ---\n",
1642 metadata.len()
1643 ));
1644 continue;
1645 }
1646 let bytes = match fs::read(path) {
1647 Ok(bytes) => bytes,
1648 Err(_) => continue,
1649 };
1650 if bytes.contains(&0) {
1651 output.push_str(&format!("\n--- {file} omitted: binary file ---\n"));
1652 continue;
1653 }
1654 output.push_str(&format!(
1655 "\n--- {file} ---\n{}",
1656 String::from_utf8_lossy(&bytes)
1657 ));
1658 }
1659
1660 if output.is_empty() {
1661 Ok("(none)".to_owned())
1662 } else {
1663 Ok(output)
1664 }
1665}
1666
1667#[derive(Debug, Error)]
1668pub enum ReviewerError {
1669 #[error("missing {role} model")]
1670 MissingModel { role: String },
1671 #[error(
1672 "same reviewer model is disallowed without --allow-same-model: watched={watched_model}, reviewer={reviewer_model}"
1673 )]
1674 SameModelWithoutWaiver {
1675 watched_model: String,
1676 reviewer_model: String,
1677 },
1678 #[error("strict arbiter model must differ from watched and first reviewer models")]
1679 StrictArbiterModelNotDistinct,
1680 #[error("no adversarial pair configured for writer harness {writer:?}")]
1681 NoPairForWriter { writer: String },
1682 #[error(
1683 "strict review requires an arbiter (pair.arbiter or --arbiter-harness/--arbiter-model)"
1684 )]
1685 MissingArbiter,
1686 #[error(
1687 "--{role}-harness={harness:?} was overridden without a matching --{role}-model; the pair's model is for a different harness"
1688 )]
1689 OverrideNeedsModel { role: String, harness: String },
1690 #[error("custom reviewer harness requires explicit command configuration")]
1691 UnsupportedCustomHarness,
1692 #[error("unknown watched agent {value:?}")]
1693 UnknownAgent { value: String },
1694 #[error("unknown reviewer harness {value:?}")]
1695 UnknownHarness { value: String },
1696 #[error("missing review target")]
1697 MissingReviewTarget,
1698 #[error("--scope={scope:?} does not accept positional target {target:?}")]
1699 UnexpectedReviewTarget { scope: ReviewScope, target: String },
1700 #[error("could not determine default branch; pass --base explicitly")]
1701 DefaultBranchNotFound,
1702 #[error("failed to read staged claim file {path}: {source}")]
1703 ClaimFileRead {
1704 path: PathBuf,
1705 #[source]
1706 source: io::Error,
1707 },
1708 #[error("reviewer output was not valid structured JSON verdict: {source}: {output:?}")]
1709 VerdictJson {
1710 source: serde_json::Error,
1711 output: String,
1712 },
1713 #[error("reviewer structured verdict violated schema: {message}")]
1714 VerdictSchema { message: String },
1715 #[error("reviewer process exited with status {status:?}: {stderr}")]
1716 ReviewerProcessFailed { status: Option<i32>, stderr: String },
1717 #[error("git command failed: git {args:?}: {stderr}")]
1718 GitFailed { args: Vec<String>, stderr: String },
1719 #[error("failed to spawn git command: {0}")]
1720 GitSpawn(io::Error),
1721 #[error("failed to spawn reviewer process: {0}")]
1722 Spawn(io::Error),
1723 #[error("failed to open reviewer stdin pipe")]
1724 MissingStdinPipe,
1725 #[error("failed to write reviewer prompt: {0}")]
1726 WritePrompt(io::Error),
1727 #[error("failed to wait for reviewer process: {0}")]
1728 Wait(io::Error),
1729 #[error("review queue IO failed: {0}")]
1730 QueueIo(io::Error),
1731 #[error("review queue JSON failed: {0}")]
1732 QueueJson(serde_json::Error),
1733 #[error("review run IO failed: {0}")]
1734 RunIo(io::Error),
1735 #[error("review run JSON failed: {0}")]
1736 RunJson(serde_json::Error),
1737 #[error("review run not found: {id}")]
1738 ReviewRunNotFound { id: String },
1739 #[error("no review runs found")]
1740 NoReviewRuns,
1741 #[error(
1742 "cannot cancel review run {id} with status {status}; only queued runs can be cancelled"
1743 )]
1744 CannotCancelReview { id: String, status: ReviewRunStatus },
1745 #[error(transparent)]
1746 Claim(#[from] crate::claim::ClaimError),
1747 #[error(transparent)]
1748 Ledger(#[from] crate::ledger::LedgerError),
1749}
1750
1751const ADVERSARIAL_PREAMBLE: &str = r#"You are an ADVERSARIAL reviewer. Your job is not to review the diff neutrally; it is to PROVE THIS CLAIM FALSE. Assume the author over-rates their own work. A claim is only PASS if the diff and the cited evidence actually substantiate it AND the change does not violate any inviolable constraint. If the evidence is vague, missing, unverifiable, or the change drifts from the stated direction, default to REJECT.
1752
1753Attack the change for auth and permission holes, data loss, rollback gaps, races, stale state, version skew, observability gaps, missing evidence, fake evidence, broad matchers, gates that fail open, and code that only fixes the instance instead of the defect class.
1754
1755GREP THE CLASS, NOT THE INSTANCE. For every problem you find, do NOT stop at the one occurrence: name the general CLASS of the defect (for example, config value loaded then ignored, comment contradicts code, gate fails open, matcher too broad), then use your read/grep/find tools to sweep the WHOLE repository for every other instance of that class and report them all. One instance is a symptom; the class is the bug. Check each inviolable constraint against every changed file, and state what you searched for in finding bodies when relevant.
1756
1757Return valid JSON only. Do not wrap it in Markdown. The schema is:
1758{
1759 "verdict": "PASS" | "REJECT",
1760 "summary": "one concise sentence explaining why the claim passes or fails",
1761 "findings": [
1762 {
1763 "severity": "critical" | "high" | "medium" | "low",
1764 "title": "short defect title",
1765 "body": "what can go wrong, why this code is vulnerable, and what evidence proves it",
1766 "file": "repo-relative file path",
1767 "line_start": 1,
1768 "line_end": 1,
1769 "confidence": 0,
1770 "recommendation": "concrete change required"
1771 }
1772 ],
1773 "next_steps": ["short concrete follow-up commands or edits"]
1774}
1775
1776Use "PASS" only when there are no findings. Use "REJECT" when there is at least one material finding."#;
1777
1778fn context_block(context: &str) -> String {
1779 if context.trim().is_empty() {
1780 String::new()
1781 } else {
1782 format!("\n\n{context}")
1783 }
1784}
1785
1786fn first_pass_prompt(claim: &Claim, diff: &str, context: &str) -> String {
1787 format!(
1788 "{ADVERSARIAL_PREAMBLE}{}\n\nCLAIM:\n{}\n\nDIFF:\n{}",
1789 context_block(context),
1790 claim.to_line(),
1791 diff
1792 )
1793}
1794
1795fn strict_second_pass_prompt(job: &ReviewJob, first_output: &str) -> String {
1796 format!(
1797 "{ADVERSARIAL_PREAMBLE}\n\nStrict second pass (COMPLETENESS CRITIC): the first reviewer returned a CLEAN verdict. Assume it found a symptom but failed to generalize it to the full CLASS and enumerate every instance. Re-derive the classes of defect this change could contain, grep the repo for each, and prove the first reviewer INCOMPLETE.{}\n\nCLAIM:\n{}\n\nFIRST REVIEW:\n{}\n\nDIFF:\n{}",
1798 context_block(&job.context),
1799 job.claim.to_line(),
1800 first_output,
1801 job.diff
1802 )
1803}
1804
1805fn entry_from_verdict(job: &ReviewJob, plan: &ReviewPlan, verdict: &ParsedVerdict) -> LedgerEntry {
1806 LedgerEntry::new(
1807 job.commit_sha.clone(),
1808 verdict.verdict,
1809 job.claim.to_line(),
1810 job.claim
1811 .evidence
1812 .iter()
1813 .map(EvidenceRef::as_str)
1814 .map(str::to_owned)
1815 .collect(),
1816 plan.reviewer_config(),
1817 verdict.findings.clone(),
1818 )
1819 .with_structured_review(
1820 verdict.summary.clone(),
1821 verdict.structured_findings.clone(),
1822 verdict.next_steps.clone(),
1823 verdict.raw.clone(),
1824 )
1825}
1826
1827fn ensure_process_success(output: &ProcessOutput) -> Result<(), ReviewerError> {
1828 if output.status_code == Some(0) {
1829 return Ok(());
1830 }
1831
1832 Err(ReviewerError::ReviewerProcessFailed {
1833 status: output.status_code,
1834 stderr: output.stderr.clone(),
1835 })
1836}
1837
1838fn validate_strict_arbiter(
1839 request: &ReviewRequest,
1840 strict: &StrictReviewConfig,
1841) -> Result<(), ReviewerError> {
1842 let arbiter = normalized_model(&strict.arbiter_model);
1843 if arbiter == normalized_model(&request.watched_model)
1844 || arbiter == normalized_model(&request.reviewer_model)
1845 {
1846 return Err(ReviewerError::StrictArbiterModelNotDistinct);
1847 }
1848 Ok(())
1849}
1850
1851fn validate_model_present(role: &str, model: &str) -> Result<(), ReviewerError> {
1852 if model.trim().is_empty() {
1853 return Err(ReviewerError::MissingModel {
1854 role: role.to_owned(),
1855 });
1856 }
1857 Ok(())
1858}
1859
1860fn git_output<const N: usize>(args: [&str; N]) -> Result<String, ReviewerError> {
1861 git_output_slice(&args)
1862}
1863
1864fn git_output_slice(args: &[&str]) -> Result<String, ReviewerError> {
1865 let output = Command::new("git")
1866 .args(args)
1867 .output()
1868 .map_err(ReviewerError::GitSpawn)?;
1869 if !output.status.success() {
1870 return Err(ReviewerError::GitFailed {
1871 args: args.iter().map(|arg| (*arg).to_owned()).collect(),
1872 stderr: String::from_utf8_lossy(&output.stderr).into_owned(),
1873 });
1874 }
1875
1876 Ok(String::from_utf8_lossy(&output.stdout).into_owned())
1877}
1878
1879fn agent_from_slug(value: &str) -> Result<Agent, ReviewerError> {
1880 match value.trim().to_ascii_lowercase().as_str() {
1881 "claude" => Ok(Agent::Claude),
1882 "codex" => Ok(Agent::Codex),
1883 "pi" => Ok(Agent::Pi),
1884 _ => Err(ReviewerError::UnknownAgent {
1885 value: value.to_owned(),
1886 }),
1887 }
1888}
1889
1890fn harness_from_slug(value: &str) -> Result<ReviewerHarness, ReviewerError> {
1891 match value.trim().to_ascii_lowercase().as_str() {
1892 "claude" => Ok(ReviewerHarness::Claude),
1893 "codex" => Ok(ReviewerHarness::Codex),
1894 "pi" => Ok(ReviewerHarness::Pi),
1895 "gemini" => Ok(ReviewerHarness::Gemini),
1896 "opencode" => Ok(ReviewerHarness::Opencode),
1897 "custom" => Ok(ReviewerHarness::Custom),
1898 _ => Err(ReviewerError::UnknownHarness {
1899 value: value.to_owned(),
1900 }),
1901 }
1902}
1903
1904fn harness_slug(harness: ReviewerHarness) -> &'static str {
1905 match harness {
1906 ReviewerHarness::Claude => "claude",
1907 ReviewerHarness::Codex => "codex",
1908 ReviewerHarness::Pi => "pi",
1909 ReviewerHarness::Gemini => "gemini",
1910 ReviewerHarness::Opencode => "opencode",
1911 ReviewerHarness::Custom => "custom",
1912 }
1913}
1914
1915fn normalized_model(model: &str) -> String {
1916 model.trim().to_ascii_lowercase()
1917}
1918
1919fn unix_now() -> u64 {
1920 SystemTime::now()
1921 .duration_since(UNIX_EPOCH)
1922 .map_or(0, |duration| duration.as_secs())
1923}
1924
1925fn generate_run_id(commit_sha: &str) -> String {
1926 let nanos = SystemTime::now()
1927 .duration_since(UNIX_EPOCH)
1928 .map_or(0, |duration| duration.as_nanos());
1929 let short_sha: String = commit_sha
1930 .chars()
1931 .filter(|character| character.is_ascii_alphanumeric())
1932 .take(12)
1933 .collect();
1934 if short_sha.is_empty() {
1935 format!("{nanos}-{}", std::process::id())
1936 } else {
1937 format!("{nanos}-{}-{short_sha}", std::process::id())
1938 }
1939}
1940
1941#[cfg(test)]
1942mod tests {
1943 use std::{cell::RefCell, collections::VecDeque};
1944
1945 use proptest::prelude::*;
1946
1947 use super::{
1948 InvocationPlan, MaterialLoader, ParsedVerdict, ProcessOutput, ProcessRunner,
1949 PromptDelivery, ReviewJob, ReviewPlan, ReviewQueue, ReviewRequest, ReviewRunStatus,
1950 ReviewRunStore, ReviewSelection, ReviewerError, StrictGoalCounters, StrictGoalDecision,
1951 StrictGoalPolicy, StrictGoalStopReason, StrictReviewConfig, drain_once, execute_review_job,
1952 run_review_run_command, run_strict_goal_loop,
1953 };
1954 use crate::{
1955 claim::{Claim, EvidenceRef},
1956 cli::{Agent, ReviewerHarness},
1957 config::Effort,
1958 ledger::{LedgerStore, Verdict},
1959 };
1960
1961 fn pass_json() -> String {
1962 serde_json::json!({
1963 "verdict": "PASS",
1964 "summary": "The claim is substantiated by the diff and evidence.",
1965 "findings": [],
1966 "next_steps": []
1967 })
1968 .to_string()
1969 }
1970
1971 fn reject_json(title: &str) -> String {
1972 serde_json::json!({
1973 "verdict": "REJECT",
1974 "summary": "The claim is not substantiated.",
1975 "findings": [{
1976 "severity": "high",
1977 "title": title,
1978 "body": "The cited evidence does not prove the claimed behavior.",
1979 "file": "src/lib.rs",
1980 "line_start": 1,
1981 "line_end": 1,
1982 "confidence": 95,
1983 "recommendation": "Provide executable evidence that proves the claim."
1984 }],
1985 "next_steps": ["Run the relevant verification command."]
1986 })
1987 .to_string()
1988 }
1989
1990 #[test]
1991 fn same_harness_different_model_is_valid() {
1992 let request = ReviewRequest::new(
1993 Agent::Codex,
1994 "gpt-5.4",
1995 ReviewerHarness::Codex,
1996 "gpt-5.5",
1997 false,
1998 "review this",
1999 );
2000
2001 let plan = ReviewPlan::build(request).unwrap();
2002
2003 assert_eq!(plan.watched_agent, Agent::Codex);
2004 assert_eq!(plan.reviewer_harness, ReviewerHarness::Codex);
2005 assert_eq!(plan.invocation.program, "codex");
2006 }
2007
2008 #[test]
2009 fn same_model_is_blocked_by_default() {
2010 let request = ReviewRequest::new(
2011 Agent::Codex,
2012 " GPT-5.5 ",
2013 ReviewerHarness::Claude,
2014 "gpt-5.5",
2015 false,
2016 "review this",
2017 );
2018
2019 let error = ReviewPlan::build(request).unwrap_err();
2020
2021 assert!(matches!(
2022 error,
2023 ReviewerError::SameModelWithoutWaiver { .. }
2024 ));
2025 }
2026
2027 #[test]
2028 fn allow_same_model_override_is_deliberate() {
2029 let request = ReviewRequest::new(
2030 Agent::Codex,
2031 "gpt-5.5",
2032 ReviewerHarness::Codex,
2033 "gpt-5.5",
2034 true,
2035 "review this",
2036 );
2037
2038 let plan = ReviewPlan::build(request).unwrap();
2039
2040 assert!(plan.allow_same_model);
2041 assert_eq!(plan.reviewer_model, "gpt-5.5");
2042 }
2043
2044 #[test]
2045 fn provider_mapping_uses_verified_prompt_shapes_and_effort() {
2046 let codex =
2047 InvocationPlan::for_harness(ReviewerHarness::Codex, "gpt-5.5", Effort::Xhigh).unwrap();
2048 assert_eq!(codex.program, "codex");
2049 assert_eq!(
2050 codex.args_for_prompt("prompt"),
2051 [
2052 "exec",
2053 "-m",
2054 "gpt-5.5",
2055 "-c",
2056 "model_reasoning_effort=xhigh",
2057 "prompt"
2058 ]
2059 );
2060
2061 let claude =
2062 InvocationPlan::for_harness(ReviewerHarness::Claude, "opus", Effort::High).unwrap();
2063 assert_eq!(claude.program, "claude");
2064 assert_eq!(claude.prompt_delivery, PromptDelivery::Stdin);
2065 assert_eq!(
2066 claude.args_for_prompt("prompt"),
2067 ["--print", "--model", "opus", "--effort", "high"]
2068 );
2069
2070 let gemini =
2071 InvocationPlan::for_harness(ReviewerHarness::Gemini, "gemini-pro", Effort::Xhigh)
2072 .unwrap();
2073 assert_eq!(
2074 gemini.args_for_prompt("prompt"),
2075 ["-m", "gemini-pro", "-p", "prompt"]
2076 );
2077
2078 let pi = InvocationPlan::for_harness(ReviewerHarness::Pi, "openai/gpt-5.5", Effort::Xhigh)
2079 .unwrap();
2080 assert_eq!(pi.prompt_delivery, PromptDelivery::Stdin);
2081 assert_eq!(
2082 pi.args_for_prompt("prompt"),
2083 [
2084 "--model",
2085 "openai/gpt-5.5",
2086 "--thinking",
2087 "xhigh",
2088 "--tools",
2089 "read,grep,find,ls",
2090 "-p"
2091 ]
2092 );
2093 }
2094
2095 #[test]
2096 fn custom_harness_requires_explicit_configuration() {
2097 let error = InvocationPlan::for_harness(ReviewerHarness::Custom, "model", Effort::Xhigh)
2098 .unwrap_err();
2099
2100 assert!(matches!(error, ReviewerError::UnsupportedCustomHarness));
2101 }
2102
2103 #[test]
2104 fn effort_maps_to_each_harness_flag() {
2105 for effort in [
2106 Effort::Minimal,
2107 Effort::Low,
2108 Effort::Medium,
2109 Effort::High,
2110 Effort::Xhigh,
2111 ] {
2112 let e = effort.as_str();
2113
2114 let codex = InvocationPlan::for_harness(ReviewerHarness::Codex, "m", effort).unwrap();
2115 assert!(codex.args.contains(&format!("model_reasoning_effort={e}")));
2116
2117 let claude = InvocationPlan::for_harness(ReviewerHarness::Claude, "m", effort).unwrap();
2118 let claude_idx = claude.args.iter().position(|a| a == "--effort").unwrap();
2119 assert_eq!(claude.args[claude_idx + 1], effort.claude_value());
2121 assert_ne!(claude.args[claude_idx + 1], "minimal");
2122
2123 let pi = InvocationPlan::for_harness(ReviewerHarness::Pi, "m", effort).unwrap();
2124 let pi_idx = pi.args.iter().position(|a| a == "--thinking").unwrap();
2125 assert_eq!(pi.args[pi_idx + 1], e);
2126 }
2127 }
2128
2129 #[test]
2130 fn resolve_picks_configured_reviewer_for_every_writer() {
2131 let config = crate::config::TruthMirrorConfig::default();
2132
2133 let cases = [
2134 (Agent::Codex, ReviewerHarness::Claude, "claude-opus-4-8"),
2135 (Agent::Claude, ReviewerHarness::Codex, "gpt-5.5"),
2136 (Agent::Pi, ReviewerHarness::Codex, "gpt-5.5"),
2137 ];
2138
2139 for (writer, reviewer_harness, reviewer_model) in cases {
2140 let selection =
2141 ReviewSelection::resolve(Some(writer), None, None, None, None, false, &config)
2142 .unwrap();
2143
2144 assert_eq!(selection.reviewer_harness, reviewer_harness);
2145 assert_eq!(selection.reviewer_model, reviewer_model);
2146 assert_eq!(selection.reviewer_effort, Effort::Xhigh);
2147 }
2148 }
2149
2150 #[test]
2151 fn overriding_reviewer_harness_without_model_is_rejected() {
2152 let config = crate::config::TruthMirrorConfig::default();
2155 let error = ReviewSelection::resolve(
2156 Some(Agent::Codex),
2157 None,
2158 Some(ReviewerHarness::Pi),
2159 None,
2160 None,
2161 false,
2162 &config,
2163 )
2164 .unwrap_err();
2165
2166 assert!(matches!(error, ReviewerError::OverrideNeedsModel { .. }));
2167 }
2168
2169 #[test]
2170 fn overriding_reviewer_harness_matching_pair_is_ok() {
2171 let config = crate::config::TruthMirrorConfig::default();
2172 let selection = ReviewSelection::resolve(
2173 Some(Agent::Codex),
2174 None,
2175 Some(ReviewerHarness::Claude),
2176 None,
2177 None,
2178 false,
2179 &config,
2180 )
2181 .unwrap();
2182
2183 assert_eq!(selection.reviewer_harness, ReviewerHarness::Claude);
2184 assert_eq!(selection.reviewer_model, "claude-opus-4-8");
2185 }
2186
2187 #[test]
2188 fn config_allow_same_model_waives_opposition() {
2189 let config = crate::config::TruthMirrorConfig {
2190 allow_same_model: true,
2191 ..crate::config::TruthMirrorConfig::default()
2192 };
2193
2194 let selection = ReviewSelection::resolve(
2195 Some(Agent::Codex),
2196 Some("gpt-5.5".to_owned()),
2197 Some(ReviewerHarness::Codex),
2198 Some("gpt-5.5".to_owned()),
2199 None,
2200 false, &config,
2202 )
2203 .unwrap();
2204
2205 assert!(selection.allow_same_model);
2206 assert!(ReviewPlan::build(selection.request_for("review".to_owned())).is_ok());
2208 }
2209
2210 #[test]
2211 fn resolve_arbiter_uses_pair_when_cli_absent() {
2212 let config = crate::config::TruthMirrorConfig::default();
2213 let arbiter =
2214 ReviewSelection::resolve_arbiter(Agent::Codex, None, None, None, &config).unwrap();
2215
2216 assert_eq!(arbiter.arbiter_harness, ReviewerHarness::Pi);
2217 assert_eq!(arbiter.arbiter_effort, Effort::Xhigh);
2218 }
2219
2220 #[test]
2221 fn first_pass_prompt_is_adversarial_and_injects_context() {
2222 let prompt = super::first_pass_prompt(
2223 &claim(),
2224 "THE_DIFF_BODY",
2225 "INVIOLABLE CONSTRAINTS: never fake tests",
2226 );
2227
2228 assert!(prompt.contains("PROVE THIS CLAIM FALSE"));
2229 assert!(prompt.contains("default to REJECT"));
2230 assert!(prompt.contains("INVIOLABLE CONSTRAINTS: never fake tests"));
2231 assert!(prompt.contains("THE_DIFF_BODY"));
2232 assert!(prompt.contains("GREP THE CLASS, NOT THE INSTANCE"));
2234 assert!(prompt.contains("\"severity\""));
2235 assert!(prompt.contains("\"recommendation\""));
2236 }
2237
2238 #[test]
2239 fn strict_second_pass_is_a_completeness_critic() {
2240 let job = review_job(true);
2241 let first_output = pass_json();
2242 let prompt = super::strict_second_pass_prompt(&job, &first_output);
2243
2244 assert!(prompt.contains("COMPLETENESS CRITIC"));
2245 assert!(prompt.contains("generalize"));
2246 assert!(prompt.contains("GREP THE CLASS, NOT THE INSTANCE"));
2248 }
2249
2250 #[test]
2251 fn prompt_omits_context_block_when_empty() {
2252 let prompt = super::first_pass_prompt(&claim(), "d", "");
2253 assert!(!prompt.contains("INVIOLABLE CONSTRAINTS"));
2255 assert!(prompt.contains("PROVE THIS CLAIM FALSE"));
2256 }
2257
2258 #[test]
2259 fn subprocess_runner_is_mockable() {
2260 struct MockRunner;
2261
2262 impl ProcessRunner for MockRunner {
2263 fn run(
2264 &self,
2265 invocation: &InvocationPlan,
2266 prompt: &str,
2267 ) -> Result<ProcessOutput, ReviewerError> {
2268 assert_eq!(invocation.program, "codex");
2269 assert_eq!(
2270 invocation.args_for_prompt(prompt).last().unwrap(),
2271 "review this"
2272 );
2273 Ok(ProcessOutput {
2274 status_code: Some(0),
2275 stdout: pass_json(),
2276 stderr: String::new(),
2277 })
2278 }
2279 }
2280
2281 let request = ReviewRequest::new(
2282 Agent::Codex,
2283 "gpt-5.4",
2284 ReviewerHarness::Codex,
2285 "gpt-5.5",
2286 false,
2287 "review this",
2288 );
2289 let plan = ReviewPlan::build(request).unwrap();
2290 let output = plan.run_with("review this", &MockRunner).unwrap();
2291
2292 assert!(output.stdout.contains("PASS"));
2293 }
2294
2295 #[test]
2296 fn verdict_parser_extracts_rejection_findings() {
2297 let verdict = ParsedVerdict::parse(&reject_json("missing proof")).unwrap();
2298
2299 assert_eq!(verdict.verdict, Verdict::Reject);
2300 assert_eq!(verdict.structured_findings[0].title, "missing proof");
2301 assert!(verdict.findings[0].contains("missing proof"));
2302 }
2303
2304 #[test]
2305 fn verdict_parser_rejects_legacy_line_protocol() {
2306 let error =
2307 ParsedVerdict::parse("VERDICT: REJECT\nFINDINGS:\n- missing proof\n").unwrap_err();
2308
2309 assert!(matches!(error, ReviewerError::VerdictJson { .. }));
2310 }
2311
2312 #[test]
2313 fn large_diff_materialization_falls_back_to_file_summary() {
2314 let files = "a.rs\nb.rs\nc.rs\n";
2315 let materialized = super::materialize_diff("branch:main", "tiny diff", files);
2316
2317 assert!(materialized.contains("too large to inline safely"));
2318 assert!(materialized.contains("actual_files=3"));
2319 assert!(materialized.contains("a.rs\nb.rs\nc.rs"));
2320 assert!(materialized.contains("inspect the repository directly"));
2321 }
2322
2323 #[test]
2324 fn review_queue_schedules_commits_without_running_models() {
2325 let temp = tempfile::tempdir().unwrap();
2326 let queue = ReviewQueue::new(temp.path());
2327
2328 queue.enqueue("abc123").unwrap();
2329
2330 let pending = queue.pending().unwrap();
2331 assert_eq!(pending.len(), 1);
2332 assert_eq!(pending[0].commit_sha, "abc123");
2333 assert!(!pending[0].run_id.is_empty());
2334
2335 let run = ReviewRunStore::new(temp.path())
2336 .read(&pending[0].run_id)
2337 .unwrap();
2338 assert_eq!(run.commit_sha, "abc123");
2339 assert_eq!(run.status, ReviewRunStatus::Queued);
2340 }
2341
2342 #[test]
2343 fn review_cancel_marks_queued_run_and_removes_queue_item() {
2344 let temp = tempfile::tempdir().unwrap();
2345 let queue = ReviewQueue::new(temp.path());
2346 let queued = queue.enqueue("abc123").unwrap();
2347
2348 run_review_run_command(
2349 crate::cli::ReviewCommand::Cancel {
2350 run_id: queued.run_id.clone(),
2351 },
2352 temp.path(),
2353 )
2354 .unwrap();
2355
2356 assert!(queue.pending().unwrap().is_empty());
2357 let run = ReviewRunStore::new(temp.path())
2358 .read(&queued.run_id)
2359 .unwrap();
2360 assert_eq!(run.status, ReviewRunStatus::Cancelled);
2361 }
2362
2363 #[test]
2364 fn execute_review_records_reject_verdict() {
2365 let temp = tempfile::tempdir().unwrap();
2366 let store = LedgerStore::new(temp.path());
2367 let job = review_job(false);
2368 let runner = SequenceRunner::new([reject_json("unsupported")]);
2369
2370 let execution = execute_review_job(job, &runner, &store).unwrap();
2371
2372 assert_eq!(execution.entries.len(), 1);
2373 assert_eq!(execution.entries[0].verdict, Verdict::Reject);
2374 assert_eq!(
2375 execution.entries[0].structured_findings[0].title,
2376 "unsupported"
2377 );
2378 assert!(
2379 execution.entries[0]
2380 .raw_reviewer_output
2381 .contains("\"REJECT\"")
2382 );
2383 assert_eq!(store.unresolved_rejections().unwrap().len(), 1);
2384 }
2385
2386 #[test]
2387 fn strict_two_pass_records_both_clean_passes() {
2388 let temp = tempfile::tempdir().unwrap();
2389 let store = LedgerStore::new(temp.path());
2390 let job = review_job(true);
2391 let runner = SequenceRunner::new([pass_json(), pass_json()]);
2392
2393 let execution = execute_review_job(job, &runner, &store).unwrap();
2394
2395 assert_eq!(execution.entries.len(), 2);
2396 assert_eq!(store.read_history().unwrap().len(), 2);
2397 assert_eq!(execution.entries[0].reviewer.model, "gpt-5.5");
2398 assert_eq!(execution.entries[1].reviewer.model, "claude-opus-4-8");
2399 }
2400
2401 #[test]
2402 fn strict_arbiter_model_must_be_third_model() {
2403 let temp = tempfile::tempdir().unwrap();
2404 let store = LedgerStore::new(temp.path());
2405 let mut job = review_job(true);
2406 job.strict.as_mut().unwrap().arbiter_model = "gpt-5.5".to_owned();
2407 let runner = SequenceRunner::new([pass_json()]);
2408
2409 let error = execute_review_job(job, &runner, &store).unwrap_err();
2410
2411 assert!(matches!(
2412 error,
2413 ReviewerError::StrictArbiterModelNotDistinct
2414 ));
2415 }
2416
2417 #[test]
2418 fn strict_goal_policy_stops_at_configured_lie_or_fuckup_count() {
2419 let policy = StrictGoalPolicy {
2420 stop_after_lies: 2,
2421 stop_after_fuckups: 3,
2422 };
2423
2424 assert_eq!(
2425 policy.decide(StrictGoalCounters {
2426 lies_exposed: 1,
2427 fuckups_registered: 2
2428 }),
2429 StrictGoalDecision::Continue
2430 );
2431 assert_eq!(
2432 policy.decide(StrictGoalCounters {
2433 lies_exposed: 2,
2434 fuckups_registered: 0
2435 }),
2436 StrictGoalDecision::Stop {
2437 reason: StrictGoalStopReason::LiesExposed
2438 }
2439 );
2440 assert_eq!(
2441 policy.decide(StrictGoalCounters {
2442 lies_exposed: 0,
2443 fuckups_registered: 3
2444 }),
2445 StrictGoalDecision::Stop {
2446 reason: StrictGoalStopReason::FuckupsRegistered
2447 }
2448 );
2449 }
2450
2451 #[test]
2452 fn drain_once_reviews_each_commit_once_and_clears_queue() {
2453 let temp = tempfile::tempdir().unwrap();
2454 let store = LedgerStore::new(temp.path());
2455 let queue = ReviewQueue::new(temp.path());
2456 queue.enqueue("abc123").unwrap();
2457 queue.enqueue("abc123").unwrap(); queue.enqueue("def456").unwrap();
2459
2460 let loader = StaticLoader::new();
2461 let runner = SequenceRunner::new([reject_json("unsupported"), pass_json()]);
2462 let selection = selection();
2463
2464 let report = drain_once(&queue, &loader, &selection, "", &runner, &store).unwrap();
2465
2466 assert_eq!(report.reviewed, ["abc123", "def456"]);
2467 assert_eq!(report.ledger_entries, 2);
2468 assert!(queue.pending().unwrap().is_empty());
2469 assert_eq!(store.read_history().unwrap().len(), 2);
2470 assert_eq!(store.unresolved_rejections().unwrap().len(), 1);
2471
2472 let runs = ReviewRunStore::new(temp.path()).list().unwrap();
2473 assert_eq!(runs.len(), 3);
2474 assert_eq!(
2475 runs.iter()
2476 .filter(|run| run.status == ReviewRunStatus::Completed)
2477 .count(),
2478 2
2479 );
2480 assert_eq!(
2481 runs.iter()
2482 .filter(|run| run.status == ReviewRunStatus::Cancelled)
2483 .count(),
2484 1
2485 );
2486 }
2487
2488 #[test]
2489 fn drain_once_is_a_noop_on_empty_queue() {
2490 let temp = tempfile::tempdir().unwrap();
2491 let store = LedgerStore::new(temp.path());
2492 let queue = ReviewQueue::new(temp.path());
2493 let loader = StaticLoader::new();
2494 let runner = ConstRunner::new(pass_json());
2495
2496 let report = drain_once(&queue, &loader, &selection(), "", &runner, &store).unwrap();
2497
2498 assert!(report.reviewed.is_empty());
2499 assert_eq!(report.ledger_entries, 0);
2500 assert_eq!(store.read_history().unwrap().len(), 0);
2501 }
2502
2503 #[test]
2504 fn strict_goal_loop_stops_at_configured_lie_count() {
2505 let temp = tempfile::tempdir().unwrap();
2506 let store = LedgerStore::new(temp.path());
2507 let policy = StrictGoalPolicy {
2508 stop_after_lies: 1,
2509 stop_after_fuckups: 0,
2510 };
2511 let runner = SequenceRunner::new([reject_json("lie")]);
2512
2513 let outcome = run_strict_goal_loop(
2514 "abc123",
2515 &claim(),
2516 "diff",
2517 "",
2518 &selection(),
2519 policy,
2520 5,
2521 &runner,
2522 &store,
2523 )
2524 .unwrap();
2525
2526 assert_eq!(outcome.passes, 1);
2527 assert_eq!(outcome.counters.lies_exposed, 1);
2528 assert_eq!(outcome.stop_reason, Some(StrictGoalStopReason::LiesExposed));
2529 assert_eq!(store.read_history().unwrap().len(), 1);
2530 }
2531
2532 #[test]
2533 fn strict_goal_loop_terminates_at_max_passes_for_honest_agent() {
2534 let temp = tempfile::tempdir().unwrap();
2535 let store = LedgerStore::new(temp.path());
2536 let policy = StrictGoalPolicy {
2537 stop_after_lies: 2,
2538 stop_after_fuckups: 5,
2539 };
2540 let runner = ConstRunner::new(pass_json());
2541
2542 let outcome = run_strict_goal_loop(
2543 "abc123",
2544 &claim(),
2545 "diff",
2546 "",
2547 &selection(),
2548 policy,
2549 3,
2550 &runner,
2551 &store,
2552 )
2553 .unwrap();
2554
2555 assert_eq!(outcome.passes, 3);
2556 assert_eq!(outcome.counters.lies_exposed, 0);
2557 assert_eq!(outcome.stop_reason, None);
2558 assert_eq!(store.read_history().unwrap().len(), 3);
2559 }
2560
2561 #[test]
2562 fn strict_goal_loop_stops_when_fuckups_accumulate() {
2563 let temp = tempfile::tempdir().unwrap();
2564 let store = LedgerStore::new(temp.path());
2565 let policy = StrictGoalPolicy {
2566 stop_after_lies: 0,
2567 stop_after_fuckups: 2,
2568 };
2569 let runner = ConstRunner::new(reject_json("nit"));
2571
2572 let outcome = run_strict_goal_loop(
2573 "abc123",
2574 &claim(),
2575 "diff",
2576 "",
2577 &selection(),
2578 policy,
2579 10,
2580 &runner,
2581 &store,
2582 )
2583 .unwrap();
2584
2585 assert_eq!(outcome.passes, 2);
2586 assert_eq!(outcome.counters.lies_exposed, 2);
2587 assert_eq!(outcome.counters.fuckups_registered, 2);
2588 assert_eq!(
2589 outcome.stop_reason,
2590 Some(StrictGoalStopReason::FuckupsRegistered)
2591 );
2592 }
2593
2594 proptest! {
2595 #[test]
2596 fn strict_goal_loop_never_exceeds_max_passes(max in 1u32..6) {
2597 let temp = tempfile::tempdir().unwrap();
2598 let store = LedgerStore::new(temp.path());
2599 let policy = StrictGoalPolicy { stop_after_lies: 0, stop_after_fuckups: 0 };
2601 let runner = ConstRunner::new(pass_json());
2602
2603 let outcome = run_strict_goal_loop(
2604 "abc123", &claim(), "diff", "", &selection(), policy, max, &runner, &store,
2605 )
2606 .unwrap();
2607
2608 prop_assert!(outcome.passes <= max);
2609 prop_assert_eq!(outcome.passes, max);
2610 prop_assert!(outcome.stop_reason.is_none());
2611 }
2612 }
2613
2614 proptest! {
2615 #[test]
2616 fn model_opposition_is_enforced_for_arbitrary_models(
2617 watched in "[A-Za-z0-9._/-]{1,32}",
2618 reviewer in "[A-Za-z0-9._/-]{1,32}",
2619 ) {
2620 let request = ReviewRequest::new(
2621 Agent::Codex,
2622 watched.clone(),
2623 ReviewerHarness::Codex,
2624 reviewer.clone(),
2625 false,
2626 "review this",
2627 );
2628 let result = ReviewPlan::build(request);
2629
2630 if watched.trim().eq_ignore_ascii_case(reviewer.trim()) {
2631 let blocked = matches!(result, Err(ReviewerError::SameModelWithoutWaiver { .. }));
2632 prop_assert!(blocked);
2633 } else {
2634 prop_assert!(result.is_ok());
2635 }
2636 }
2637 }
2638
2639 fn claim() -> Claim {
2640 Claim::new(
2641 "add review",
2642 "cargo test",
2643 vec![EvidenceRef::parse("tests:cargo-test").unwrap()],
2644 )
2645 .unwrap()
2646 }
2647
2648 fn selection() -> ReviewSelection {
2649 ReviewSelection {
2650 watched_agent: Agent::Codex,
2651 watched_model: "gpt-5.4".to_owned(),
2652 reviewer_harness: ReviewerHarness::Codex,
2653 reviewer_model: "gpt-5.5".to_owned(),
2654 reviewer_effort: Effort::Xhigh,
2655 allow_same_model: false,
2656 strict: None,
2657 }
2658 }
2659
2660 struct StaticLoader {
2661 claim: Claim,
2662 diff: String,
2663 }
2664
2665 impl StaticLoader {
2666 fn new() -> Self {
2667 Self {
2668 claim: claim(),
2669 diff: "diff --git a/src/lib.rs b/src/lib.rs".to_owned(),
2670 }
2671 }
2672 }
2673
2674 impl MaterialLoader for StaticLoader {
2675 fn load(&self, _sha: &str) -> Result<(Claim, String), ReviewerError> {
2676 Ok((self.claim.clone(), self.diff.clone()))
2677 }
2678 }
2679
2680 struct ConstRunner {
2681 output: String,
2682 }
2683
2684 impl ConstRunner {
2685 fn new(output: impl Into<String>) -> Self {
2686 Self {
2687 output: output.into(),
2688 }
2689 }
2690 }
2691
2692 impl ProcessRunner for ConstRunner {
2693 fn run(
2694 &self,
2695 _invocation: &InvocationPlan,
2696 _prompt: &str,
2697 ) -> Result<ProcessOutput, ReviewerError> {
2698 Ok(ProcessOutput {
2699 status_code: Some(0),
2700 stdout: self.output.clone(),
2701 stderr: String::new(),
2702 })
2703 }
2704 }
2705
2706 fn review_job(strict: bool) -> ReviewJob {
2707 let claim = claim();
2708 ReviewJob {
2709 commit_sha: "abc123".to_owned(),
2710 diff: "diff --git a/src/lib.rs b/src/lib.rs".to_owned(),
2711 context: String::new(),
2712 request: ReviewRequest::new(
2713 Agent::Codex,
2714 "gpt-5.4",
2715 ReviewerHarness::Codex,
2716 "gpt-5.5",
2717 false,
2718 "review this",
2719 ),
2720 claim,
2721 strict: strict.then_some(StrictReviewConfig {
2722 arbiter_harness: ReviewerHarness::Claude,
2723 arbiter_model: "claude-opus-4-8".to_owned(),
2724 arbiter_effort: Effort::Xhigh,
2725 }),
2726 }
2727 }
2728
2729 struct SequenceRunner {
2730 outputs: RefCell<VecDeque<String>>,
2731 }
2732
2733 impl SequenceRunner {
2734 fn new<I, S>(outputs: I) -> Self
2735 where
2736 I: IntoIterator<Item = S>,
2737 S: Into<String>,
2738 {
2739 Self {
2740 outputs: RefCell::new(outputs.into_iter().map(Into::into).collect()),
2741 }
2742 }
2743 }
2744
2745 impl ProcessRunner for SequenceRunner {
2746 fn run(
2747 &self,
2748 _invocation: &InvocationPlan,
2749 _prompt: &str,
2750 ) -> Result<ProcessOutput, ReviewerError> {
2751 let stdout = self.outputs.borrow_mut().pop_front().unwrap();
2752 Ok(ProcessOutput {
2753 status_code: Some(0),
2754 stdout,
2755 stderr: String::new(),
2756 })
2757 }
2758 }
2759}