trusty-review 0.4.0

LLM-backed code review service — reviews GitHub PRs and unified diffs via AWS Bedrock or OpenRouter
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
//! Core data models for trusty-review.
//!
//! Why: defines the shared data shapes used by all pipeline stages and
//! stored in the review log.  Keeping them in a dedicated module ensures
//! a single authoritative definition and prevents type drift between the
//! pipeline, the LLM provider layer, and the store.
//! What: exposes `Verdict`, `Effort`, `VerifyOutcome`, `Finding`
//! (FixSuggestion), and `ReviewResult` — all serde-serialisable.
//! Test: `verdict_serde_roundtrip`, `review_result_serde_roundtrip`,
//! and `finding_confidence_clamping` in this module.

use serde::{Deserialize, Serialize};

pub mod status;
pub use status::ReviewStatus;

use crate::config::constants::REVIEW_VERSION;

// ─── Verdict ──────────────────────────────────────────────────────────────────

/// Review verdict tier aligned to the duetto-code-intelligence board grades.
///
/// Why: the pipeline maps LLM output to one of these tiers to drive the
/// GitHub review action (approve vs request-changes vs block) and the
/// notification payload.  Using the same string tokens as the calibration
/// board enables clean round-trips through JSON without any conversion.
/// What: serialises to EXACT board strings — `"APPROVE"`, `"APPROVE*"`,
/// `"REQUEST_CHANGES"`, `"BLOCK"`, `"UNKNOWN"`.  Deserialises the same.
/// Display prints the same strings.
///
/// # Fail-safe policy (fail-CLOSED since #1241)
/// When the pipeline encounters a genuine parse/transport error or a truncated
/// model response, the fail-safe default is `Unknown` (fail-CLOSED).  Ticket
/// #1241 supersedes spec REV-130's original fail-OPEN `Approve`: a silent APPROVE
/// on unparseable/truncated output posts a green check for a review that never
/// happened, which is a safety hole.  `Unknown` surfaces a clear "could not
/// review" state and is never treated as a merge-approval downstream.  The model
/// *itself* also emits `Unknown` when it judges the diff too truncated to assess,
/// so the board can distinguish "clean review" from "could not assess".
///
/// Test: `verdict_serde_roundtrip`, `verdict_display`,
/// `verdict_unknown_round_trip`.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "SCREAMING_SNAKE_CASE")]
pub enum Verdict {
    /// No significant concerns; the reviewer is satisfied.
    Approve,
    /// Approve with non-blocking advisory notes (a concern worth noting, but
    /// not blocking). Board grade: `APPROVE*`.
    #[serde(rename = "APPROVE*")]
    ApproveWithReservations,
    /// Reviewer requests at least one real correctness/logic change before merge.
    RequestChanges,
    /// Critical issue — build-breaking, data-corrupting, or auth-bypassing.
    /// Must not merge without explicit bypass.
    Block,
    /// The diff was too truncated or contained insufficient context for the
    /// model to assess.  Not a clean APPROVE — the reviewer could not form an
    /// opinion.  Board grade: `UNKNOWN`.
    Unknown,
}

impl Verdict {
    /// Ordinal severity for this verdict (higher = more severe).
    ///
    /// Why: the grade-derivation (`grade.rs`) and verification re-derivation
    /// (`verify.rs`) paths both need a total order over verdicts to compute the
    /// stricter-of / less-severe-of two verdicts.  Before #1357 each module
    /// carried its OWN private `verdict_ord` with an identical mapping — two
    /// copies of the same ordinal table is a maintenance hazard (drift between
    /// them would silently corrupt the floor/ceiling logic).  Centralising the
    /// mapping here makes `Verdict` the single source of truth.
    /// What: returns `APPROVE=0 < APPROVE*=1 < REQUEST_CHANGES=2 < BLOCK=3`.
    /// `Unknown=4` is a terminal state (the diff was unassessable); callers
    /// short-circuit it before any ordinal comparison, so it never participates
    /// in normal flow — the value is defined only so the match is exhaustive.
    /// Test: `verdict_ordinal_is_monotonic` in this module; exercised
    /// transitively by `grade.rs::stricter_of` and `verify.rs::verdict_min`.
    pub fn ordinal(&self) -> u8 {
        match self {
            Verdict::Approve => 0,
            Verdict::ApproveWithReservations => 1,
            Verdict::RequestChanges => 2,
            Verdict::Block => 3,
            Verdict::Unknown => 4,
        }
    }
}

impl std::fmt::Display for Verdict {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            Verdict::Approve => write!(f, "APPROVE"),
            Verdict::ApproveWithReservations => write!(f, "APPROVE*"),
            Verdict::RequestChanges => write!(f, "REQUEST_CHANGES"),
            Verdict::Block => write!(f, "BLOCK"),
            Verdict::Unknown => write!(f, "UNKNOWN"),
        }
    }
}

// ─── Effort ───────────────────────────────────────────────────────────────────

/// Estimated remediation effort for a finding.
///
/// Why: only Low/Medium findings are eligible for tracker-issue filing
/// (spec §07 REV-605, source-analysis §2.3).
/// What: serialised as lowercase.
/// Test: `effort_serde_roundtrip`.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum Effort {
    /// Small change, typically under an hour.
    Low,
    /// Moderate change, typically a few hours.
    Medium,
    /// Large refactoring or cross-cutting change.
    High,
}

impl Effort {
    /// Returns `true` if this effort level is eligible for tracker-issue filing.
    ///
    /// Why: spec REV-605 restricts issue filing to Low/Medium; High efforts
    /// are noted in the review but not filed as issues automatically.
    /// What: returns `true` for `Low` and `Medium`.
    /// Test: `effort_issue_eligibility`.
    pub fn is_issue_eligible(&self) -> bool {
        matches!(self, Effort::Low | Effort::Medium)
    }
}

impl std::fmt::Display for Effort {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            Effort::Low => write!(f, "low"),
            Effort::Medium => write!(f, "medium"),
            Effort::High => write!(f, "high"),
        }
    }
}

// ─── Verification outcome ─────────────────────────────────────────────────────

/// Outcome of the per-finding verification round.
///
/// Why: the pipeline needs to distinguish "verified true", "refuted by LLM",
/// "refuted because of error", etc., to compute the correct verdict and emit
/// the right alarm (spec §07 REV-606, source-analysis §12.1).
/// What: `ErrorRefuted` carries the error class string so config/lifecycle
/// failures are distinguishable in logs.
/// Test: `verify_outcome_serde_roundtrip`.
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum VerifyOutcome {
    /// Verifier LLM confirmed the finding is real.
    Confirmed,
    /// Verifier LLM refuted the finding.
    Refuted,
    /// Verifier call failed with a config/lifecycle error (see spec REV-340).
    /// The finding is treated as refuted but an alarm is emitted.
    ErrorRefuted {
        /// Human-readable error class (e.g. `"ModelNotFound"`, `"AccessDenied"`).
        error_class: String,
    },
    /// Verifier call failed due to truncation / context-length overrun.
    TruncationRefuted,
    /// The finding was below the verification confidence threshold; not sent
    /// to the verifier.
    Skipped,
}

// ─── Finding category ──────────────────────────────────────────────────────────

/// The axis a finding speaks to: code correctness vs. intent/method conformance.
///
/// Why: the intent/method-conformance back gate (DOC-15, `SPEC-CONFORMANCE-02`,
/// #1359) adds a second, distinct *kind* of finding — one that flags a diff
/// **contradicting an explicit method the ticket/spec stated**, separate from a
/// correctness bug.  The two must be distinguishable so the verdict floor can
/// treat them differently: a conformance divergence caps at `REQUEST_CHANGES`
/// and NEVER drives `BLOCK` (BLOCK is reserved for correctness/safety), whereas a
/// correctness finding floors normally.  Threading a category through the finding
/// model is the minimal way to carry that distinction from the LLM JSON all the
/// way to `grade::severity_floor`.
/// What: a two-variant enum serialised kebab-case (`"correctness"` /
/// `"method-conformance"`).  It is `#[serde(default)]` (→ `Correctness`) wherever
/// it appears so pre-#1359 fixtures and LLM responses that omit `category` still
/// deserialise unchanged (back-compat — every legacy finding is a correctness
/// finding).
/// Test: `finding_category_serde_roundtrip`,
/// `finding_defaults_category_correctness`, and the parser/grade tests that
/// assert the conformance cap.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
#[serde(rename_all = "kebab-case")]
pub enum FindingCategory {
    /// A correctness / logic / safety finding (the default, legacy behaviour).
    #[default]
    Correctness,
    /// An intent/method-conformance divergence: the diff contradicts an explicit
    /// method the ticket or spec prescribed (`SPEC-CONFORMANCE-02`, M5).
    MethodConformance,
}

// ─── Finding ──────────────────────────────────────────────────────────────────

/// A single code-review finding / fix suggestion.
///
/// Why: the pipeline extracts findings from the LLM review body and attaches
/// metadata (confidence, effort, file location) so downstream stages (issue
/// filing, verification) can process them uniformly.
/// What: a direct port of `FixSuggestion` from spec §07 REV-602.  The
/// `verified` and `issue_eligible` fields are transient pipeline state; they
/// are serialised for the review log but not required on deserialisation.
/// Test: `finding_confidence_clamping`.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Finding {
    /// Changed file path this finding refers to.
    pub file: String,
    /// Optional line number within the file.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub line: Option<u32>,
    /// Finding type / category (e.g. `"security"`, `"logic-error"`).
    pub kind: String,
    /// Human-readable description of the issue.
    pub description: String,
    /// Brief "what goes wrong if unaddressed" — the failure mechanism (#1416).
    ///
    /// Why: a finding that states the failure consequence (not just the issue) is
    /// far more actionable — it tells the author *why it matters* and helps them
    /// prioritise.  Kept as its own field so the renderer can place it distinctly
    /// (a `_Why it matters:_` line) rather than burying it in the description.
    /// `#[serde(default)]` (empty string) keeps every pre-#1416 finding parsing.
    #[serde(default)]
    pub consequence: String,
    /// Proposed fix or remediation suggestion (prose).
    pub suggestion: String,
    /// Exact replacement code for a committable GitHub `suggestion` block (#1415).
    ///
    /// Why: a one-click-appliable GitHub ```suggestion block must contain the
    /// *exact* replacement lines for the anchored location — not prose.  Keeping
    /// it in a dedicated field (separate from the prose `suggestion`) lets the
    /// renderer emit a committable block only when the model provides concrete
    /// replacement code, and degrade to prose otherwise.  `#[serde(default)]` +
    /// skip-if-none keeps every pre-#1415 finding deserialising with `None`.
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub suggested_replacement: Option<String>,
    /// Confidence score in `[0.0, 1.0]`.  Clamped at construction time.
    pub confidence: f32,
    /// Estimated remediation effort.
    pub effort: Effort,
    /// Which axis this finding speaks to (correctness vs. method-conformance).
    ///
    /// Why: the back gate (#1359) caps `MethodConformance` findings at
    /// `REQUEST_CHANGES` in the verdict floor.  `#[serde(default)]` keeps every
    /// pre-#1359 serialised finding deserialising as `Correctness`.
    #[serde(default)]
    pub category: FindingCategory,
    // ── Transient pipeline state ──────────────────────────────────────────
    /// Verification outcome; `None` before the verification round.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub verified: Option<VerifyOutcome>,
    /// Whether this finding is eligible for tracker-issue filing.
    #[serde(default)]
    pub issue_eligible: bool,
}

impl Finding {
    /// Construct a finding, clamping `confidence` to `[0.0, 1.0]`.
    ///
    /// Why: spec REV-607 requires clamping rather than erroring to defend
    /// against malformed LLM JSON.
    /// What: any out-of-range value is silently clamped.
    /// Test: `finding_confidence_clamping`.
    pub fn new(
        file: impl Into<String>,
        kind: impl Into<String>,
        description: impl Into<String>,
        suggestion: impl Into<String>,
        confidence: f32,
        effort: Effort,
    ) -> Self {
        Self {
            file: file.into(),
            line: None,
            kind: kind.into(),
            description: description.into(),
            consequence: String::new(),
            suggestion: suggestion.into(),
            suggested_replacement: None,
            confidence: confidence.clamp(0.0, 1.0),
            effort,
            category: FindingCategory::Correctness,
            verified: None,
            issue_eligible: false,
        }
    }

    /// Set this finding's category, returning the finding for chaining.
    ///
    /// Why: `Finding::new` defaults to `Correctness` so every existing call site
    /// is unchanged; the back-gate parser (#1359) needs to override the category
    /// for an LLM-emitted `method-conformance` finding without a new constructor
    /// arity.  A small builder keeps construction ergonomic and the default safe.
    /// What: overwrites `self.category` and returns `self`.
    /// Test: `finding_with_category_sets_method_conformance`.
    #[must_use]
    pub fn with_category(mut self, category: FindingCategory) -> Self {
        self.category = category;
        self
    }
}

// ─── InlineCommentOut ───────────────────────────────────────────────────────────

/// A serialisable inline review comment anchored to a PR diff line (#1414).
///
/// Why: the runner builds the inline-comment set from the findings + diff, and it
/// must be carried on the `ReviewResult` so (a) the live poster can attach it to
/// the GitHub review and (b) the dry-run / MCP response can preview exactly what
/// would be posted.  It is a flat, owned projection (no borrows, serde-friendly)
/// distinct from the posting-layer's internal `InlineComment` type.
/// What: `path` + `line` are the new-side anchor; `body` is the rendered markdown.
/// Test: `inline_comment_out_serde_roundtrip`.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct InlineCommentOut {
    /// Changed file path (new side), e.g. `src/db.rs`.
    pub path: String,
    /// New-side line number the comment anchors to.
    pub line: u32,
    /// Rendered inline-comment markdown body.
    pub body: String,
}

// ─── ReviewResult ─────────────────────────────────────────────────────────────

/// The complete output of a PR review pass.
///
/// Why: every piece of review output is captured in one struct so it can be
/// serialised to the review log, compared between models, and diffed across
/// pipeline versions.
/// What: a subset of spec §07 REV-600 fields covering the MVP verdict loop.
/// The full field set (JIRA/APEX/Confluence context, multi-pass tokens, etc.)
/// will be added in later stages.
/// Test: `review_result_serde_roundtrip`.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ReviewResult {
    // ── PR identity ───────────────────────────────────────────────────────
    /// GitHub organisation.
    pub owner: String,
    /// GitHub repository name.
    pub repo: String,
    /// PR number.
    pub pr_number: u64,
    /// PR title.
    pub pr_title: String,
    /// GitHub PR URL.
    pub pr_url: String,

    // ── Review output ─────────────────────────────────────────────────────
    /// Full LLM review markdown.
    pub review_body: String,
    /// Normalised verdict.
    pub verdict: Verdict,
    /// Letter grade (e.g. "B+", "F"); `None` only for legacy pre-#732 results.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub grade: Option<String>,
    /// Extracted findings.
    pub findings: Vec<Finding>,
    /// Per-line inline review comments that were (or, in dry-run, would be)
    /// posted to the PR diff (#1414).
    ///
    /// Why: when posting findings as inline per-line comments, the dry-run /MCP
    /// response must show the exact set of would-be inline comments so a caller
    /// can preview what live mode would attach to the diff.  Storing them on the
    /// result is the single source of truth for both the live POST and the
    /// dry-run preview.  `#[serde(default)]` keeps every pre-#1414 serialised
    /// result deserialising with an empty list.
    #[serde(default, skip_serializing_if = "Vec::is_empty")]
    pub inline_comments: Vec<InlineCommentOut>,
    /// Indices (into `findings`) of the findings that were placed inline (#1414).
    ///
    /// Why: the summary body must render exactly the findings that did NOT land
    /// inline.  Deriving that set by matching `(file, line)` against
    /// `inline_comments` is lossy — two distinct findings at the same anchor
    /// collide and one is silently omitted from *both* the inline set and the
    /// summary (data loss).  Carrying the authoritative inline index set from the
    /// `InlinePlan` lets the renderer partition by finding identity, so no finding
    /// is ever dropped.  `#[serde(default)]` + skip-if-empty keeps pre-existing
    /// serialised results deserialising unchanged.
    #[serde(default, skip_serializing_if = "Vec::is_empty")]
    pub inline_finding_indices: Vec<usize>,
    /// Count of low-severity nits suppressed past the inline cap (#1420).
    ///
    /// Why: the nit-volume cap rolls overflow nits into one summary line rather
    /// than spamming inline comments; the count must reach the body renderer (and
    /// the dry-run / MCP response) so the suppression is acknowledged, not silent.
    /// `#[serde(default)]` + skip-if-zero keeps pre-#1420 results unchanged.
    #[serde(default, skip_serializing_if = "is_zero_usize")]
    pub suppressed_nits: usize,

    // ── Telemetry ─────────────────────────────────────────────────────────
    /// Model id used for the main reviewer call.
    pub model: String,
    /// Input token count for the reviewer call.
    pub input_tokens: u32,
    /// Output token count for the reviewer call.
    pub output_tokens: u32,
    /// Estimated USD cost for the reviewer call.
    pub cost_estimate_usd: f64,
    /// Wall-clock latency of the reviewer call in milliseconds.
    pub latency_ms: u64,

    // ── Pipeline flags ────────────────────────────────────────────────────
    /// Run status — `Completed` (authoritative), `Skipped` (required context
    /// dep down; no verdict), or `Degraded` (opted-in context-free, #590).
    #[serde(default)]
    pub status: ReviewStatus,
    /// True if this is a dry run (no GitHub comment posted).
    pub dry_run: bool,
    /// True if the review comment was posted to GitHub.
    pub posted: bool,
    /// ISO-8601 timestamp of the review.
    pub timestamp: String,
    /// Non-None if the pipeline encountered an error.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub error: Option<String>,

    // ── Dedup ─────────────────────────────────────────────────────────────
    /// Head commit SHA; used as the dedup key.
    pub head_sha: String,

    // ── Versioning ────────────────────────────────────────────────────────
    /// Pipeline version string (e.g. `"tr-0.1"`).
    pub review_version: String,
}

impl ReviewResult {
    /// Construct a `ReviewResult` skeleton with sensible defaults.
    ///
    /// Why: most fields are filled in by pipeline stages; a builder avoids
    /// long constructor signatures and eases struct evolution.
    /// What: sets `review_version`, `dry_run = true`, `posted = false`, timestamps.
    /// Test: `review_result_serde_roundtrip`.
    pub fn new(
        owner: impl Into<String>,
        repo: impl Into<String>,
        pr_number: u64,
        pr_title: impl Into<String>,
        pr_url: impl Into<String>,
    ) -> Self {
        Self {
            owner: owner.into(),
            repo: repo.into(),
            pr_number,
            pr_title: pr_title.into(),
            pr_url: pr_url.into(),
            review_body: String::new(),
            verdict: Verdict::Unknown,
            grade: None,
            findings: Vec::new(),
            inline_comments: Vec::new(),
            inline_finding_indices: Vec::new(),
            suppressed_nits: 0,
            model: String::new(),
            input_tokens: 0,
            output_tokens: 0,
            cost_estimate_usd: 0.0,
            latency_ms: 0,
            status: ReviewStatus::Completed,
            dry_run: true,
            posted: false,
            timestamp: chrono_now(),
            error: None,
            head_sha: String::new(),
            review_version: REVIEW_VERSION.to_string(),
        }
    }

    /// Copy telemetry fields from an `LlmResponse` onto this result.
    ///
    /// Why: the pipeline applies response fields after a successful LLM call.
    /// What: copies model, tokens, cost, latency, and the raw review body text.
    /// Test: covered transitively by pipeline tests.
    pub fn apply_llm_response(&mut self, resp: &crate::llm::LlmResponse) {
        self.model = resp.model.clone();
        self.input_tokens = resp.input_tokens;
        self.output_tokens = resp.output_tokens;
        self.cost_estimate_usd = resp.cost_usd;
        self.latency_ms = resp.latency_ms;
        self.review_body = resp.text.clone();
    }
}

/// Serde skip-predicate: true when a `usize` is zero (#1420).
///
/// Why: `#[serde(skip_serializing_if)]` needs a path to a predicate; this keeps
/// `suppressed_nits: 0` out of serialised results so pre-#1420 logs are unchanged.
/// What: returns `*n == 0`.
/// Test: covered transitively by `review_result_serde_roundtrip`.
fn is_zero_usize(n: &usize) -> bool {
    *n == 0
}

/// Return a simple ISO-8601 UTC timestamp without depending on chrono.
///
/// Why: we want `ReviewResult` to have a timestamp but don't want to add
/// `chrono` as an unconditional dep just for Stage 1.  This helper uses the
/// system clock and formats a basic ISO-8601 string.
/// What: formats as `YYYY-MM-DDTHH:MM:SSZ` using `std::time::SystemTime`.
/// Test: `timestamp_format_is_iso8601`.
fn chrono_now() -> String {
    use std::time::{SystemTime, UNIX_EPOCH};
    let secs = SystemTime::now()
        .duration_since(UNIX_EPOCH)
        .unwrap_or_default()
        .as_secs();
    // Convert epoch seconds to date-time components.
    let s = secs % 60;
    let m = (secs / 60) % 60;
    let h = (secs / 3600) % 24;
    let days = secs / 86400;
    // Simplified Gregorian calendar for the year 2000+ range.
    let (year, month, day) = epoch_days_to_ymd(days);
    format!("{year:04}-{month:02}-{day:02}T{h:02}:{m:02}:{s:02}Z")
}

/// Convert days since Unix epoch to (year, month, day).
fn epoch_days_to_ymd(days: u64) -> (u64, u64, u64) {
    // Reference: https://en.wikipedia.org/wiki/Julian_day (simplified)
    let z = days + 719468;
    let era = z / 146097;
    let doe = z % 146097;
    let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146096) / 365;
    let y = yoe + era * 400;
    let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
    let mp = (5 * doy + 2) / 153;
    let d = doy - (153 * mp + 2) / 5 + 1;
    let m = if mp < 10 { mp + 3 } else { mp - 9 };
    let y = if m <= 2 { y + 1 } else { y };
    (y, m, d)
}

// ─── Unit tests ───────────────────────────────────────────────────────────────

#[cfg(test)]
mod tests {
    use super::*;

    /// Verify serde round-trip for all five board grades including APPROVE*.
    ///
    /// Why: `APPROVE*` contains an asterisk which is unusual for JSON enum
    /// values; a regression would silently produce the wrong board grade.
    /// What: serialises each variant, asserts the exact board string, then
    /// deserialises and asserts equality.
    /// Test: this test itself; no network.
    #[test]
    fn verdict_serde_roundtrip() {
        let cases = [
            (Verdict::Approve, "\"APPROVE\""),
            (Verdict::ApproveWithReservations, "\"APPROVE*\""),
            (Verdict::RequestChanges, "\"REQUEST_CHANGES\""),
            (Verdict::Block, "\"BLOCK\""),
            (Verdict::Unknown, "\"UNKNOWN\""),
        ];
        for (v, expected_json) in cases {
            let json = serde_json::to_string(&v).unwrap();
            assert_eq!(json, expected_json, "serialise mismatch for {v:?}");
            let back: Verdict = serde_json::from_str(&json).unwrap();
            assert_eq!(back, v, "deserialise mismatch for {expected_json}");
        }
    }

    /// Verify Display prints the exact board strings.
    ///
    /// Why: the compare table and Markdown log use `verdict.to_string()`;
    /// any mismatch would show the wrong grade to users.
    /// What: asserts Display output for all five variants matches board strings.
    /// Test: this test itself.
    #[test]
    fn verdict_display() {
        assert_eq!(Verdict::Approve.to_string(), "APPROVE");
        assert_eq!(Verdict::ApproveWithReservations.to_string(), "APPROVE*");
        assert_eq!(Verdict::RequestChanges.to_string(), "REQUEST_CHANGES");
        assert_eq!(Verdict::Block.to_string(), "BLOCK");
        assert_eq!(Verdict::Unknown.to_string(), "UNKNOWN");
    }

    /// Verify UNKNOWN round-trips correctly (board-grade special case).
    ///
    /// Why: UNKNOWN is emitted when the diff is too truncated to assess; it
    /// must survive a serde round-trip so the calibration board sees the
    /// correct grade.
    /// What: serialises `Unknown`, asserts `"UNKNOWN"`, deserialises back.
    /// Test: this test itself.
    #[test]
    fn verdict_unknown_round_trip() {
        let json = serde_json::to_string(&Verdict::Unknown).unwrap();
        assert_eq!(json, "\"UNKNOWN\"");
        let back: Verdict = serde_json::from_str(&json).unwrap();
        assert_eq!(back, Verdict::Unknown);
    }

    /// Verify the single-source-of-truth verdict ordinal is strictly monotonic.
    ///
    /// Why: #1357 collapsed two duplicate `verdict_ord` tables (grade.rs +
    /// verify.rs) into `Verdict::ordinal`.  This test pins the ordering both
    /// call sites depend on so a future reorder can't silently invert the
    /// floor/ceiling comparisons.
    /// What: asserts APPROVE < APPROVE* < REQUEST_CHANGES < BLOCK < UNKNOWN.
    /// Test: this test itself.
    #[test]
    fn verdict_ordinal_is_monotonic() {
        assert!(Verdict::Approve.ordinal() < Verdict::ApproveWithReservations.ordinal());
        assert!(Verdict::ApproveWithReservations.ordinal() < Verdict::RequestChanges.ordinal());
        assert!(Verdict::RequestChanges.ordinal() < Verdict::Block.ordinal());
        assert!(Verdict::Block.ordinal() < Verdict::Unknown.ordinal());
        // Pin the exact values the comparison logic relies on.
        assert_eq!(Verdict::Approve.ordinal(), 0);
        assert_eq!(Verdict::Block.ordinal(), 3);
    }

    #[test]
    fn effort_serde_roundtrip() {
        let json = serde_json::to_string(&Effort::Low).unwrap();
        assert_eq!(json, "\"low\"");
        let back: Effort = serde_json::from_str(&json).unwrap();
        assert_eq!(back, Effort::Low);
    }

    #[test]
    fn effort_issue_eligibility() {
        assert!(Effort::Low.is_issue_eligible());
        assert!(Effort::Medium.is_issue_eligible());
        assert!(!Effort::High.is_issue_eligible());
    }

    #[test]
    fn finding_confidence_clamping() {
        let f_over = Finding::new("src/lib.rs", "bug", "desc", "fix", 1.5, Effort::Low);
        assert!(
            (f_over.confidence - 1.0_f32).abs() < f32::EPSILON,
            "over 1.0 should clamp to 1.0"
        );

        let f_under = Finding::new("src/lib.rs", "bug", "desc", "fix", -0.1, Effort::Low);
        assert!(
            (f_under.confidence - 0.0_f32).abs() < f32::EPSILON,
            "under 0.0 should clamp to 0.0"
        );

        let f_mid = Finding::new("src/lib.rs", "bug", "desc", "fix", 0.85, Effort::Medium);
        assert!((f_mid.confidence - 0.85_f32).abs() < f32::EPSILON);
    }

    /// `FindingCategory` round-trips via its kebab-case wire form.
    ///
    /// Why: the LLM emits and the review log persists the category as a string;
    /// the back gate (#1359) keys verdict-floor behaviour off it, so the wire
    /// shape must be stable.
    /// What: serialises both variants, asserts the exact tokens, deserialises back.
    /// Test: this test itself.
    #[test]
    fn finding_category_serde_roundtrip() {
        assert_eq!(
            serde_json::to_string(&FindingCategory::Correctness).unwrap(),
            "\"correctness\""
        );
        assert_eq!(
            serde_json::to_string(&FindingCategory::MethodConformance).unwrap(),
            "\"method-conformance\""
        );
        let back: FindingCategory = serde_json::from_str("\"method-conformance\"").unwrap();
        assert_eq!(back, FindingCategory::MethodConformance);
        assert_eq!(FindingCategory::default(), FindingCategory::Correctness);
    }

    /// A `Finding` constructed via `new` defaults to the `Correctness` category.
    ///
    /// Why: every existing call site must keep producing correctness findings
    /// (back-compat); only the back-gate parser opts into `MethodConformance`.
    /// What: builds a finding via `new`, asserts the category.
    /// Test: this test itself.
    #[test]
    fn finding_defaults_category_correctness() {
        let f = Finding::new("src/lib.rs", "bug", "desc", "fix", 0.9, Effort::Low);
        assert_eq!(f.category, FindingCategory::Correctness);
    }

    /// `with_category` overrides the default category.
    ///
    /// Why: the back-gate parser threads `MethodConformance` onto an LLM finding
    /// via this builder.
    /// What: chains `with_category`, asserts the override took effect.
    /// Test: this test itself.
    #[test]
    fn finding_with_category_sets_method_conformance() {
        let f = Finding::new("src/lib.rs", "bug", "desc", "fix", 0.9, Effort::Medium)
            .with_category(FindingCategory::MethodConformance);
        assert_eq!(f.category, FindingCategory::MethodConformance);
    }

    /// A serialised finding WITHOUT a `category` field still deserialises (the
    /// `#[serde(default)]` back-compat guarantee for pre-#1359 fixtures).
    ///
    /// Why: AC requires existing finding fixtures (no `category` key) to keep
    /// deserialising, defaulting to `Correctness`.
    /// What: deserialises a category-less JSON object, asserts the default.
    /// Test: this test itself.
    #[test]
    fn finding_without_category_field_defaults_correctness() {
        let json = r#"{
            "file": "src/main.rs",
            "kind": "logic-error",
            "description": "off-by-one",
            "suggestion": "use <=",
            "confidence": 0.7,
            "effort": "medium"
        }"#;
        let f: Finding = serde_json::from_str(json).expect("legacy finding must deserialise");
        assert_eq!(f.category, FindingCategory::Correctness);
        assert_eq!(f.kind, "logic-error");
    }

    /// `InlineCommentOut` survives a serde round-trip (#1414).
    ///
    /// Why: the dry-run / MCP response serialises `ReviewResult.inline_comments`;
    /// the projection must round-trip so callers can preview would-be comments.
    /// What: serialises an `InlineCommentOut`, deserialises it, asserts equality.
    /// Test: this test itself.
    #[test]
    fn inline_comment_out_serde_roundtrip() {
        let c = InlineCommentOut {
            path: "src/db.rs".to_string(),
            line: 42,
            body: "**security** — SQL injection".to_string(),
        };
        let json = serde_json::to_string(&c).expect("serialise");
        let back: InlineCommentOut = serde_json::from_str(&json).expect("deserialise");
        assert_eq!(back, c);
    }

    /// A pre-#1414 `ReviewResult` JSON (no `inline_comments`) still deserialises.
    ///
    /// Why: `inline_comments` is `#[serde(default)]` so older review logs keep
    /// loading with an empty inline-comment list.
    /// What: builds a result, serialises with no comments (skipped when empty),
    /// re-parses, asserts the field defaults to empty.
    /// Test: this test itself.
    #[test]
    fn review_result_without_inline_comments_defaults_empty() {
        let result = ReviewResult::new("o", "r", 1, "t", "u");
        let json = serde_json::to_string(&result).expect("serialise");
        assert!(
            !json.contains("inline_comments"),
            "empty inline_comments is skipped in serialisation"
        );
        let back: ReviewResult = serde_json::from_str(&json).expect("deserialise");
        assert!(back.inline_comments.is_empty());
    }

    #[test]
    fn review_result_serde_roundtrip() {
        let mut result = ReviewResult::new(
            "acme",
            "backend",
            42,
            "Add feature X",
            "https://github.com/acme/backend/pull/42",
        );
        result.verdict = Verdict::RequestChanges;
        result.review_version = "tr-0.1".to_string();
        result.findings.push(Finding::new(
            "src/main.rs",
            "security",
            "SQL injection risk",
            "Use parameterised query",
            0.92,
            Effort::Medium,
        ));

        let json = serde_json::to_string(&result).expect("serialise");
        let back: ReviewResult = serde_json::from_str(&json).expect("deserialise");

        assert_eq!(back.owner, "acme");
        assert_eq!(back.repo, "backend");
        assert_eq!(back.pr_number, 42);
        assert_eq!(back.verdict, Verdict::RequestChanges);
        assert_eq!(back.review_version, "tr-0.1");
        assert_eq!(back.findings.len(), 1);
        assert_eq!(back.findings[0].kind, "security");
        assert!((back.findings[0].confidence - 0.92_f32).abs() < f32::EPSILON);
        assert!(back.dry_run, "dry_run defaults to true");
        assert!(!back.posted, "posted defaults to false");
    }

    #[test]
    fn timestamp_format_is_iso8601() {
        let ts = chrono_now();
        // Basic format check: YYYY-MM-DDTHH:MM:SSZ
        assert_eq!(ts.len(), 20, "timestamp should be 20 chars: {ts}");
        assert_eq!(&ts[4..5], "-");
        assert_eq!(&ts[7..8], "-");
        assert_eq!(&ts[10..11], "T");
        assert_eq!(&ts[13..14], ":");
        assert_eq!(&ts[16..17], ":");
        assert_eq!(&ts[19..20], "Z");
    }

    #[test]
    fn verify_outcome_serde() {
        let confirmed = VerifyOutcome::Confirmed;
        let json = serde_json::to_string(&confirmed).unwrap();
        assert_eq!(json, "\"confirmed\"");

        let error_refuted = VerifyOutcome::ErrorRefuted {
            error_class: "ModelNotFound".to_string(),
        };
        let json = serde_json::to_string(&error_refuted).unwrap();
        let back: VerifyOutcome = serde_json::from_str(&json).unwrap();
        assert!(
            matches!(back, VerifyOutcome::ErrorRefuted { error_class } if error_class == "ModelNotFound")
        );
    }
}