trusty-review 0.4.0

LLM-backed code review service — reviews GitHub PRs and unified diffs via AWS Bedrock or OpenRouter
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
//! Unit tests for grade.rs — severity-floor derivation and grade-aware derivation.
//!
//! Why: extracted to a sibling file to keep `grade.rs` under the 500-line cap
//! while preserving full test coverage for both `derive_verdict` and
//! `derive_verdict_with_grade`.
//! What: covers all severity-floor tiers, UNKNOWN preservation, low-confidence
//! collapse, and the grade-aware derivation including the reconciliation test
//! that confirms a confirmed High-effort finding clamps a model "A" grade down
//! to a verdict-consistent band.
//! Test: this file is the test module.

use super::*;
use crate::models::{Finding, FindingCategory, VerifyOutcome};
use crate::pipeline::letter_grade::Grade;

fn finding(effort: Effort, confidence: f32) -> Finding {
    Finding::new("src/lib.rs", "test", "desc", "", confidence, effort)
}

/// Build a method-conformance finding (#1359) at a given effort + confidence.
///
/// Why: the back-gate verdict-floor tests need conformance-category findings to
/// assert the REQUEST_CHANGES cap (never BLOCK) and the 0.80 advisory gate.
/// What: constructs a finding and tags its category `MethodConformance`.
/// Test: used by the `conformance_*` tests below.
fn conformance_finding(effort: Effort, confidence: f32) -> Finding {
    finding(effort, confidence).with_category(FindingCategory::MethodConformance)
}

/// Build a finding with a `verified` outcome already recorded.
///
/// Why: the #1343 regression fixtures need findings tagged `Refuted` to assert
/// they are excluded from the verdict floor.
/// What: constructs a finding, sets its `verified` field, returns it.
/// Test: used by `floor_excludes_refuted_and_low_confidence_findings` and
/// `approve_b_plus_survives_refuted_and_low_confidence_findings`.
fn verified_finding(effort: Effort, confidence: f32, outcome: VerifyOutcome) -> Finding {
    let mut f = finding(effort, confidence);
    f.verified = Some(outcome);
    f
}

// ── Tier 1: Critical / High ──────────────────────────────────────────────────

/// Any High-effort finding must floor to BLOCK.
///
/// Why: the calibration run showed 0% BLOCK detection; this rule is the
/// primary fix — High-effort (critical/high severity) findings must BLOCK.
/// What: model proposes APPROVE*, one High-effort finding → BLOCK.
#[test]
fn grade_critical_high_effort_yields_block() {
    let findings = vec![finding(Effort::High, 0.9)];
    let verdict = derive_verdict(Verdict::ApproveWithReservations, &findings);
    assert_eq!(
        verdict,
        Verdict::Block,
        "High-effort finding must floor to BLOCK"
    );
}

/// High-effort floor beats a model-proposed REQUEST_CHANGES.
///
/// Why: even if the model correctly escalates to REQUEST_CHANGES, a Critical
/// finding must escalate further to BLOCK.
#[test]
fn grade_high_effort_beats_request_changes() {
    let findings = vec![finding(Effort::High, 0.85)];
    let verdict = derive_verdict(Verdict::RequestChanges, &findings);
    assert_eq!(verdict, Verdict::Block);
}

// ── Tier 2: ≥2 Medium ────────────────────────────────────────────────────────

/// Two high-confidence Medium findings (confidence > 0.80) must floor to REQUEST_CHANGES.
///
/// Why: the calibration run showed REQUEST_CHANGES only 36% — this tier closes
/// the gap for PRs with multiple well-grounded concerns.  Only findings with
/// confidence > FLOOR_MIN_CONFIDENCE (0.80) count toward the floor (#1015).
#[test]
fn grade_two_medium_yields_request_changes() {
    let findings = vec![finding(Effort::Medium, 0.85), finding(Effort::Medium, 0.82)];
    let verdict = derive_verdict(Verdict::ApproveWithReservations, &findings);
    assert_eq!(verdict, Verdict::RequestChanges);
}

/// Three high-confidence Medium findings, but the MODEL itself said APPROVE_STAR
/// → REQUEST_CHANGES.
///
/// Why: when the model's own verdict is APPROVE* (not a clean APPROVE), the
/// count-based floor is free to escalate to REQUEST_CHANGES — the model already
/// flagged reservations, so the floor is not contradicting an APPROVE review_body.
/// What: model APPROVE* + three Medium@0.85 → floor REQUEST_CHANGES (stricter wins).
#[test]
fn grade_three_medium_yields_request_changes() {
    let findings = vec![
        finding(Effort::Medium, 0.85),
        finding(Effort::Medium, 0.85),
        finding(Effort::Medium, 0.85),
    ];
    let verdict = derive_verdict(Verdict::ApproveWithReservations, &findings);
    assert_eq!(verdict, Verdict::RequestChanges);
}

/// #1343: a clean model APPROVE must NOT be count-overridden to REQUEST_CHANGES by
/// the Medium-count floor — it caps at APPROVE*.
///
/// Why: this is the core calibration bug.  The model holistically judged the change
/// APPROVE; a count-based REQUEST_CHANGES floor (≥2 high-confidence Mediums) must
/// not contradict the model's own verdict.  The floor still surfaces the concern as
/// an advisory APPROVE* (not silent APPROVE), but never hardens an APPROVE
/// review_body to REQUEST_CHANGES.
/// What: model APPROVE + three Medium@0.85 → APPROVE* (capped, not REQUEST_CHANGES).
#[test]
fn grade_model_approve_caps_medium_count_floor_at_approve_star() {
    let findings = vec![
        finding(Effort::Medium, 0.85),
        finding(Effort::Medium, 0.85),
        finding(Effort::Medium, 0.85),
    ];
    let verdict = derive_verdict(Verdict::Approve, &findings);
    assert_eq!(
        verdict,
        Verdict::ApproveWithReservations,
        "model APPROVE must cap the Medium-count floor at APPROVE* (#1343)"
    );
}

// ── Tier 3: Exactly 1 Medium ─────────────────────────────────────────────────

/// One high-confidence Medium finding (confidence > 0.80) must floor to APPROVE*.
///
/// Why: a single well-grounded concern should not block the PR but warrants
/// noting.  Only findings with confidence > FLOOR_MIN_CONFIDENCE (0.80) count
/// toward the floor (#1015).
#[test]
fn grade_one_medium_yields_approve_star() {
    let findings = vec![finding(Effort::Medium, 0.85)];
    let verdict = derive_verdict(Verdict::Approve, &findings);
    assert_eq!(verdict, Verdict::ApproveWithReservations);
}

// ── Tier 4: Only Low or no findings ─────────────────────────────────────────

/// No findings → APPROVE.
#[test]
fn grade_no_findings_yields_approve() {
    let verdict = derive_verdict(Verdict::Approve, &[]);
    assert_eq!(verdict, Verdict::Approve);
}

/// Only Low-effort findings → APPROVE.
#[test]
fn grade_only_low_yields_approve() {
    let findings = vec![finding(Effort::Low, 0.9), finding(Effort::Low, 0.7)];
    let verdict = derive_verdict(Verdict::Approve, &findings);
    assert_eq!(verdict, Verdict::Approve);
}

// ── UNKNOWN preservation ─────────────────────────────────────────────────────

/// Verdict::Unknown from the model is always preserved — diff unassessable.
///
/// Why: UNKNOWN signals "model could not assess", not "clean PR"; we must not
/// collapse it to APPROVE.
#[test]
fn grade_unknown_is_preserved() {
    let findings = vec![finding(Effort::Low, 0.9)];
    let verdict = derive_verdict(Verdict::Unknown, &findings);
    assert_eq!(verdict, Verdict::Unknown, "UNKNOWN must be preserved");
}

#[test]
fn grade_unknown_preserved_with_no_findings() {
    let verdict = derive_verdict(Verdict::Unknown, &[]);
    assert_eq!(verdict, Verdict::Unknown);
}

// ── Floor takes the stricter ─────────────────────────────────────────────────

/// Floor beats a model-proposed APPROVE when findings are High.
///
/// Why: this is the core "stricter floor" invariant — the model cannot soften a
/// High finding by proposing APPROVE.
#[test]
fn grade_floor_overrides_model_approve() {
    let findings = vec![finding(Effort::High, 0.95)];
    let verdict = derive_verdict(Verdict::Approve, &findings);
    assert_eq!(
        verdict,
        Verdict::Block,
        "severity floor must override model-proposed APPROVE"
    );
}

/// Model-proposed BLOCK is kept even when no High finding (model knows more).
///
/// Why: the floor is a minimum; the model can still escalate beyond the floor.
#[test]
fn grade_model_block_kept_when_no_critical_finding() {
    let findings = vec![finding(Effort::Medium, 0.9)];
    let verdict = derive_verdict(Verdict::Block, &findings);
    assert_eq!(
        verdict,
        Verdict::Block,
        "model BLOCK must not be downgraded by floor"
    );
}

#[test]
fn grade_model_request_changes_preserved_over_lower_floor() {
    let findings = vec![finding(Effort::Low, 0.9)];
    let verdict = derive_verdict(Verdict::RequestChanges, &findings);
    assert_eq!(verdict, Verdict::RequestChanges);
}

// ── Low-confidence collapse ──────────────────────────────────────────────────

/// All findings confidence ≤ 0.65 with Medium effort → APPROVE (not APPROVE*).
///
/// Why: Fix 4 — curb APPROVE* over-fire on clean PRs.
#[test]
fn grade_low_confidence_all_medium_yields_approve() {
    let findings = vec![finding(Effort::Medium, 0.6), finding(Effort::Medium, 0.55)];
    let verdict = derive_verdict(Verdict::ApproveWithReservations, &findings);
    assert_eq!(
        verdict,
        Verdict::Approve,
        "all-low-confidence advisory batch must not fire APPROVE*"
    );
}

#[test]
fn grade_confidence_at_threshold_collapses() {
    let findings = vec![finding(Effort::Medium, 0.65)];
    let verdict = derive_verdict(Verdict::ApproveWithReservations, &findings);
    assert_eq!(
        verdict,
        Verdict::Approve,
        "confidence at threshold must collapse"
    );
}

/// One Medium finding above LOW_CONFIDENCE_THRESHOLD but below FLOOR_MIN_CONFIDENCE.
///
/// Why: this finding (confidence 0.66) is above the all-advisory-batch collapse
/// threshold (0.65), so it prevents the low-confidence override from firing.
/// However, it is below FLOOR_MIN_CONFIDENCE (0.80), so it does NOT count toward
/// the REQUEST_CHANGES / APPROVE* floor — the floor is APPROVE.
/// What: one Medium@0.66 → medium_count=0 (not > 0.80) → floor=APPROVE.
/// Test: this test itself.
#[test]
fn grade_high_confidence_medium_beats_low_confidence_check() {
    let findings = vec![finding(Effort::Medium, 0.66)];
    let verdict = derive_verdict(Verdict::Approve, &findings);
    // 0.66 > LOW_CONFIDENCE_THRESHOLD so all-low-confidence override does NOT fire.
    // 0.66 ≤ FLOOR_MIN_CONFIDENCE so medium_count=0 → floor=APPROVE → APPROVE.
    assert_eq!(verdict, Verdict::Approve);
}

/// Mixed-confidence Medium findings: one above FLOOR_MIN_CONFIDENCE, one below.
///
/// Why: only the finding with confidence > 0.80 counts toward the floor (#1015).
/// One floor-counting Medium → APPROVE* (not REQUEST_CHANGES).  The old test
/// (confidence 0.8, 0.5 → REQUEST_CHANGES) encoded the over-aggressive behavior
/// that caused #1015; confidence 0.8 is NOT > 0.80.
#[test]
fn grade_mixed_confidence_two_medium_only_one_counts() {
    let findings = vec![finding(Effort::Medium, 0.85), finding(Effort::Medium, 0.5)];
    let verdict = derive_verdict(Verdict::Approve, &findings);
    // Only the 0.85 finding counts (> 0.80); one floor-counting Medium → APPROVE*.
    assert_eq!(verdict, Verdict::ApproveWithReservations);
}

// ── Compile-break BLOCK rule ─────────────────────────────────────────────────

#[test]
fn grade_compile_break_high_effort_flows_to_block() {
    let findings = vec![finding(Effort::High, 0.95)];
    let verdict = derive_verdict(Verdict::ApproveWithReservations, &findings);
    assert_eq!(
        verdict,
        Verdict::Block,
        "compile-break (High effort) must escalate to BLOCK"
    );
}

// ── derive_verdict_with_grade — boundary tests (#732) ───────────────────────

/// Grade "A", no findings, model APPROVE → verdict=APPROVE, grade=A.
///
/// Why: A grade is in the APPROVE band; with no high/medium findings, no floor
/// applies — APPROVE is returned and grade is unchanged.
#[test]
fn derive_verdict_with_grade_grade_a_no_findings_approve() {
    let (v, g) = derive_verdict_with_grade(Verdict::Approve, Grade::A, &[]);
    assert_eq!(v, Verdict::Approve);
    assert_eq!(g, Grade::A);
}

/// Grade "F", no findings, model APPROVE → verdict=BLOCK (grade floors it).
///
/// Why: the grade "F" implies BLOCK; even though the severity floor on zero
/// findings is APPROVE, the grade takes the stricter — the effective model
/// proposal is BLOCK, and BLOCK with no findings stays BLOCK.
#[test]
fn derive_verdict_with_grade_grade_f_no_findings_block() {
    let (v, g) = derive_verdict_with_grade(Verdict::Approve, Grade::F, &[]);
    assert_eq!(v, Verdict::Block);
    assert_eq!(g, Grade::F);
}

/// Grade "A", model APPROVE, ONE High-effort finding → verdict=BLOCK, grade=F.
///
/// Why: the severity floor (High-effort finding → BLOCK) overrides the grade "A".
/// The grade is then clamped to F to stay consistent with BLOCK.
/// This is the key reconciliation test: a confirmed High-severity finding
/// clamps a model "A" grade down to F.
#[test]
fn derive_verdict_with_grade_severity_overrides_grade_a() {
    let findings = vec![finding(Effort::High, 0.9)];
    let (v, g) = derive_verdict_with_grade(Verdict::Approve, Grade::A, &findings);
    assert_eq!(v, Verdict::Block, "severity floor must override grade A");
    assert_eq!(g, Grade::F, "grade must be clamped to F when verdict=BLOCK");
}

/// Grade "B-" (APPROVE floor) → verdict=APPROVE.
///
/// Why: boundary test for the B- / C+ transition.
#[test]
fn derive_verdict_with_grade_b_minus_yields_approve() {
    let (v, g) = derive_verdict_with_grade(Verdict::Approve, Grade::BMinus, &[]);
    assert_eq!(v, Verdict::Approve);
    assert_eq!(g, Grade::BMinus);
}

/// Grade "C+" (lowest APPROVE* grade) → verdict=APPROVE*.
///
/// Why: boundary test for C+ / B- transition.
#[test]
fn derive_verdict_with_grade_c_plus_yields_approve_star() {
    let (v, g) = derive_verdict_with_grade(Verdict::Approve, Grade::CPlus, &[]);
    assert_eq!(v, Verdict::ApproveWithReservations);
    // CPlus is the ceiling of APPROVE*, no clamping needed.
    assert_eq!(g, Grade::CPlus);
}

/// Grade "C-" → verdict=APPROVE*.
#[test]
fn derive_verdict_with_grade_c_minus_yields_approve_star() {
    let (v, _g) = derive_verdict_with_grade(Verdict::Approve, Grade::CMinus, &[]);
    assert_eq!(v, Verdict::ApproveWithReservations);
}

/// Grade "D+" → verdict=REQUEST_CHANGES.
#[test]
fn derive_verdict_with_grade_d_plus_yields_request_changes() {
    let (v, g) = derive_verdict_with_grade(Verdict::Approve, Grade::DPlus, &[]);
    assert_eq!(v, Verdict::RequestChanges);
    assert_eq!(g, Grade::DPlus);
}

/// Grade "D-" → verdict=REQUEST_CHANGES.
#[test]
fn derive_verdict_with_grade_d_minus_yields_request_changes() {
    let (v, _g) = derive_verdict_with_grade(Verdict::Approve, Grade::DMinus, &[]);
    assert_eq!(v, Verdict::RequestChanges);
}

/// Grade "A", model APPROVE*, no findings → verdict=APPROVE* (model wins over grade).
///
/// Why: max(APPROVE from grade, APPROVE* from model) = APPROVE*.
/// The model may have used explicit advisory language; its escalation stands.
#[test]
fn derive_verdict_with_grade_model_escalates_above_grade() {
    let (v, g) = derive_verdict_with_grade(Verdict::ApproveWithReservations, Grade::A, &[]);
    assert_eq!(v, Verdict::ApproveWithReservations);
    // Grade "A" clamped to C+ (ceiling of APPROVE* band) since verdict is APPROVE*.
    assert_eq!(g, Grade::CPlus);
}

/// Grade "C-", model APPROVE, two high-confidence Medium findings → REQUEST_CHANGES.
///
/// Why: grade "C-" → APPROVE*, model APPROVE → effective = APPROVE*.  Two Medium
/// findings with confidence > 0.80 floor to REQUEST_CHANGES (stricter than APPROVE*).
/// Grade "C-" must then clamp to D+ (ceiling of REQUEST_CHANGES band).
/// Note: confidence must be > FLOOR_MIN_CONFIDENCE (0.80); findings at 0.80 no
/// longer count (#1015).
#[test]
fn derive_verdict_with_grade_floor_stricter_than_grade() {
    let findings = vec![finding(Effort::Medium, 0.85), finding(Effort::Medium, 0.85)];
    let (v, g) = derive_verdict_with_grade(Verdict::Approve, Grade::CMinus, &findings);
    assert_eq!(v, Verdict::RequestChanges);
    assert_eq!(
        g,
        Grade::DPlus,
        "grade must clamp to D+ (ceiling of REQUEST_CHANGES)"
    );
}

// ── #1015 regression: advisory Medium findings must not over-escalate ────────

/// Model APPROVE/B+ + two Medium findings at confidence 0.70 must NOT escalate
/// to REQUEST_CHANGES (#1015 primary regression).
///
/// Why: advisory-tier Medium findings (confidence ≤ FLOOR_MIN_CONFIDENCE = 0.80)
/// are speculative; the floor must not override the model's holistic APPROVE/B+
/// judgment.  This was the live bug: top-level REQUEST_CHANGES on PRs with only
/// advisory findings.
/// What: zero floor-counting Mediums (both 0.70 ≤ 0.80) → floor = APPROVE →
/// final = max(APPROVE, APPROVE) = APPROVE.
/// Test: this test itself.
#[test]
fn grade_approve_b_plus_two_medium_advisory_stays_approve() {
    let findings = vec![finding(Effort::Medium, 0.70), finding(Effort::Medium, 0.70)];
    let (v, g) = derive_verdict_with_grade(Verdict::Approve, Grade::BPlus, &findings);
    assert_eq!(
        v,
        Verdict::Approve,
        "advisory Medium@0.70 must not escalate APPROVE/B+ to REQUEST_CHANGES (#1015)"
    );
    // Grade B+ is in the APPROVE band — no clamping needed.
    assert_eq!(g, Grade::BPlus);
}

/// Advisory Medium findings do not count even at the LOW_CONFIDENCE_THRESHOLD boundary.
///
/// Why: confidence 0.70 is above LOW_CONFIDENCE_THRESHOLD (0.65) so the all-low-
/// confidence override does NOT fire, but it is below FLOOR_MIN_CONFIDENCE (0.80)
/// so the floor-count does not trigger either.  These findings are neither
/// "all advisory noise" nor "confirmed blocking concerns" — and that is correct.
/// What: two Medium@0.70 → floor = APPROVE → APPROVE.
/// Test: this test itself.
#[test]
fn grade_advisory_medium_below_floor_threshold_does_not_escalate() {
    let findings = vec![
        finding(Effort::Medium, 0.70),
        finding(Effort::Medium, 0.72),
        finding(Effort::Medium, 0.75),
    ];
    let verdict = derive_verdict(Verdict::Approve, &findings);
    assert_eq!(
        verdict,
        Verdict::Approve,
        "Medium findings below FLOOR_MIN_CONFIDENCE must not force REQUEST_CHANGES"
    );
}

/// Two Medium findings ABOVE the floor threshold DO escalate when the model did
/// not give a clean APPROVE.
///
/// Why: confirms the complementary behavior — the fix is calibrated, not a
/// blanket suppression.  Well-grounded Medium findings (confidence > 0.80) still
/// trigger REQUEST_CHANGES when the model itself flagged reservations (APPROVE*).
/// What: model APPROVE* + two Medium@0.85 → both count → floor = REQUEST_CHANGES.
/// Test: this test itself.
#[test]
fn grade_high_confidence_medium_above_floor_threshold_escalates() {
    let findings = vec![finding(Effort::Medium, 0.85), finding(Effort::Medium, 0.85)];
    let verdict = derive_verdict(Verdict::ApproveWithReservations, &findings);
    assert_eq!(
        verdict,
        Verdict::RequestChanges,
        "Medium findings above FLOOR_MIN_CONFIDENCE must still trigger REQUEST_CHANGES \
         when the model did not give a clean APPROVE"
    );
}

// ── #1343 regression: structured verdict/grade must honor the model review_body ─

/// #1343: refuted and sub-0.50-confidence findings are excluded from the floor.
///
/// Why: the calibration bug surfaced REQUEST_CHANGES/D+ partly because
/// `verified:"refuted"` findings (demoted to 0.10) and raw `confidence:0.1`
/// findings were still fed into the severity floor.  They must be treated as noise.
/// What: model APPROVE + one refuted High@0.95 + one Medium@0.1 → APPROVE (no floor
/// escalation, because neither finding is substantive).
/// Test: this test itself.
#[test]
fn floor_excludes_refuted_and_low_confidence_findings() {
    let findings = vec![
        verified_finding(Effort::High, 0.10, VerifyOutcome::Refuted),
        finding(Effort::Medium, 0.10),
    ];
    let verdict = derive_verdict(Verdict::Approve, &findings);
    assert_eq!(
        verdict,
        Verdict::Approve,
        "refuted + sub-0.50-confidence findings must not harden the verdict (#1343)"
    );
}

/// #1343 end-to-end: a model review_body of APPROVE / B+ must NOT surface a
/// structured REQUEST_CHANGES / D+, even with refuted + low-confidence findings.
///
/// Why: this is the exact PR #1342 evidence pattern — the inner reviewer said
/// APPROVE/B+ every round while refuted (confidence 0.10) and other low-confidence
/// findings were present.  The structured verdict/grade must reconcile to the
/// model's own APPROVE/B+ rather than hardening to REQUEST_CHANGES/D+.
/// What: model APPROVE, grade B+, findings = [refuted High@0.10, Medium@0.1,
/// Low@0.3] → (APPROVE, B+).  Grade is NOT clamped to D+.
/// Test: this test itself.
#[test]
fn approve_b_plus_survives_refuted_and_low_confidence_findings() {
    let findings = vec![
        verified_finding(Effort::High, 0.10, VerifyOutcome::Refuted),
        finding(Effort::Medium, 0.10),
        finding(Effort::Low, 0.30),
    ];
    let (v, g) = derive_verdict_with_grade(Verdict::Approve, Grade::BPlus, &findings);
    assert_eq!(
        v,
        Verdict::Approve,
        "APPROVE review_body must not surface structured REQUEST_CHANGES (#1343)"
    );
    assert_eq!(
        g,
        Grade::BPlus,
        "B+ grade must not be clamped down to D+ (#1343 footer/grade consistency)"
    );
}

/// #1343: even high-confidence, non-refuted Medium findings cannot count-override a
/// clean APPROVE/B+ review_body to REQUEST_CHANGES — they cap at APPROVE* / C+.
///
/// Why: the source-of-truth reconciliation: a count-based REQUEST_CHANGES floor
/// must never contradict the model's own APPROVE verdict.  The concern is surfaced
/// as an advisory APPROVE* (grade clamped to C+, the APPROVE* ceiling), never as a
/// REQUEST_CHANGES that loops the PM merge workflow forever.
/// What: model APPROVE, grade B+, two Medium@0.85 → (APPROVE*, C+).
/// Test: this test itself.
#[test]
fn approve_b_plus_two_high_conf_medium_caps_at_approve_star() {
    let findings = vec![finding(Effort::Medium, 0.85), finding(Effort::Medium, 0.85)];
    let (v, g) = derive_verdict_with_grade(Verdict::Approve, Grade::BPlus, &findings);
    assert_eq!(
        v,
        Verdict::ApproveWithReservations,
        "clean APPROVE must cap the Medium-count floor at APPROVE* (#1343)"
    );
    assert_eq!(
        g,
        Grade::CPlus,
        "grade clamps to C+ (APPROVE* ceiling), never D+ (#1343)"
    );
}

/// #1343 guardrail: a genuine model REQUEST_CHANGES must still surface
/// REQUEST_CHANGES (the reconciliation only protects an APPROVE review_body).
///
/// Why: the fix must not over-correct — when the model itself requests changes,
/// the structured verdict must honor that, not relax it.
/// What: model REQUEST_CHANGES, grade D+, no findings → REQUEST_CHANGES / D+.
/// Test: this test itself.
#[test]
fn model_request_changes_review_body_still_surfaces_request_changes() {
    let (v, g) = derive_verdict_with_grade(Verdict::RequestChanges, Grade::DPlus, &[]);
    assert_eq!(
        v,
        Verdict::RequestChanges,
        "a genuine REQUEST_CHANGES review_body must still surface REQUEST_CHANGES (#1343)"
    );
    assert_eq!(g, Grade::DPlus);
}

/// #1343: a confirmed High finding still BLOCKs an APPROVE — verified critical
/// evidence is allowed to override the model (the reconciliation is count-only).
///
/// Why: the source-of-truth reconciliation must not disarm the genuine safety net.
/// A High-effort (critical) finding floors to BLOCK regardless of the model verdict,
/// because BLOCK is grounded critical evidence, not a Medium-count heuristic.
/// What: model APPROVE, grade B+, one High@0.95 (substantive, non-refuted) → BLOCK/F.
/// Test: this test itself.
#[test]
fn high_effort_finding_still_overrides_approve() {
    let findings = vec![finding(Effort::High, 0.95)];
    let (v, g) = derive_verdict_with_grade(Verdict::Approve, Grade::BPlus, &findings);
    assert_eq!(
        v,
        Verdict::Block,
        "a substantive High-effort finding must still BLOCK an APPROVE (#1343)"
    );
    assert_eq!(g, Grade::F);
}

// ── PR #1350 advisory fix A: High-effort findings keep their floor seat ──────

/// A High-effort finding with confidence < 0.50 (and NOT refuted) must STILL drive
/// the verdict floor (PR #1350 safety-net restoration).
///
/// Why: the original #1343 `is_substantive` predicate dropped EVERY finding below
/// 0.50 confidence, including genuine High-effort criticals.  That silently
/// softened an uncertain-but-critical finding to APPROVE — exactly the safety net
/// PR #1350's review flagged.  A non-refuted High-effort finding must keep its seat
/// at the floor regardless of confidence, so it still escalates to BLOCK.
/// What: model APPROVE + one High@0.45 (non-refuted, below FLOOR_COUNT_MIN_CONFIDENCE)
/// → BLOCK (has_high path is reached, severity_floor returns BLOCK).
/// Test: this test itself.
#[test]
fn low_confidence_high_effort_finding_still_drives_floor() {
    let findings = vec![finding(Effort::High, 0.45)];
    let verdict = derive_verdict(Verdict::Approve, &findings);
    assert_eq!(
        verdict,
        Verdict::Block,
        "a non-refuted High-effort finding below 0.50 confidence must still BLOCK (PR #1350)"
    );
}

/// End-to-end form: a low-confidence non-refuted High-effort finding clamps a clean
/// APPROVE/B+ down to BLOCK/F via the grade-aware entry point (PR #1350).
///
/// Why: confirms the restored safety net flows through `derive_verdict_with_grade`,
/// not just the bare `derive_verdict` — the uncertain critical hardens both verdict
/// and grade.
/// What: model APPROVE, grade B+, one High@0.40 → (BLOCK, F).
/// Test: this test itself.
#[test]
fn low_confidence_high_effort_clamps_grade_to_block() {
    let findings = vec![finding(Effort::High, 0.40)];
    let (v, g) = derive_verdict_with_grade(Verdict::Approve, Grade::BPlus, &findings);
    assert_eq!(
        v,
        Verdict::Block,
        "uncertain critical (High@0.40) must still BLOCK through the grade pipeline (PR #1350)"
    );
    assert_eq!(g, Grade::F, "grade must clamp to F when verdict=BLOCK");
}

/// A REFUTED High-effort finding (even at high confidence) must STILL be excluded —
/// the safety-net fix retains uncertain criticals but never disproven ones (PR #1350).
///
/// Why: advisory fix A widens the floor net for *uncertain* High-effort findings,
/// but a verifier-`Refuted` finding is disproven evidence and must never harden the
/// verdict — even when its effort is High.  This guards against the fix being
/// mis-read as "all High-effort findings always count".
/// What: model APPROVE + one refuted High@0.95 → APPROVE (the refuted critical is
/// excluded; no other substantive finding remains).
/// Test: this test itself.
#[test]
fn refuted_high_effort_finding_is_still_excluded() {
    let findings = vec![verified_finding(Effort::High, 0.95, VerifyOutcome::Refuted)];
    let verdict = derive_verdict(Verdict::Approve, &findings);
    assert_eq!(
        verdict,
        Verdict::Approve,
        "a REFUTED High-effort finding must not harden the verdict, even high-confidence (PR #1350)"
    );
}

/// #1352: the explicit `is_high_severity` predicate identifies exactly the
/// critical/high-severity tier and drives the verdict floor accordingly.
///
/// Why: #1352 replaced the bare `f.effort == Effort::High` check in the floor
/// guard with a named `is_high_severity` predicate to make the *severity* intent
/// explicit.  This test pins (a) the predicate's own truth table and (b) that it
/// drives the floor for an uncertain (low-confidence) high-severity finding — the
/// #1350 safety-net path that depends on it.  Behaviour must stay equivalent to
/// the prior `Effort::High` check.
/// What: asserts `is_high_severity` is true only for `Effort::High`, then asserts
/// a low-confidence (0.30) High-effort finding still floors a model APPROVE to
/// BLOCK (the safety net), while a low-confidence Medium does not.
/// Test: this test itself.
#[test]
fn is_high_severity_matches_high_effort() {
    // (a) Predicate truth table — High only.
    assert!(is_high_severity(&finding(Effort::High, 0.5)));
    assert!(!is_high_severity(&finding(Effort::Medium, 0.5)));
    assert!(!is_high_severity(&finding(Effort::Low, 0.5)));

    // (b) The predicate drives the floor: a low-confidence High-severity finding
    // still escalates an APPROVE to BLOCK (the #1350 safety net the predicate gates).
    let high_low_conf = vec![finding(Effort::High, 0.30)];
    assert_eq!(
        derive_verdict(Verdict::Approve, &high_low_conf),
        Verdict::Block,
        "a low-confidence high-severity finding must still drive the BLOCK floor"
    );

    // A low-confidence Medium (non-high-severity) is filtered out → no escalation.
    let medium_low_conf = vec![finding(Effort::Medium, 0.30)];
    assert_eq!(
        derive_verdict(Verdict::Approve, &medium_low_conf),
        Verdict::Approve,
        "a low-confidence Medium is NOT high-severity and must not escalate"
    );
}

/// A confirmed High finding still drives BLOCK even with a B+ grade (#1015 regression).
///
/// Why: the fix must not soften correctness blockers.  High-effort findings are
/// independent of FLOOR_MIN_CONFIDENCE — they always floor to BLOCK.
/// What: grade B+ (APPROVE) + model APPROVE + one High@0.90 → BLOCK, grade F.
/// Test: this test itself.
#[test]
fn grade_confirmed_high_still_blocks_despite_b_plus_grade() {
    let findings = vec![finding(Effort::High, 0.90)];
    let (v, g) = derive_verdict_with_grade(Verdict::Approve, Grade::BPlus, &findings);
    assert_eq!(
        v,
        Verdict::Block,
        "High-effort finding must still BLOCK regardless of grade (#1015 regression)"
    );
    assert_eq!(g, Grade::F, "grade must clamp to F when verdict=BLOCK");
}

// ── Method-conformance back gate (#1359, SPEC-CONFORMANCE-02 §5.2; AC-8..AC-12) ─

/// AC-8: a confident conformance divergence floors the verdict to REQUEST_CHANGES
/// even when the model proposed APPROVE.
///
/// Why: a confirmed contradiction between the diff and an explicit ticket/spec
/// method (M5) must surface as REQUEST_CHANGES; the #1343 source-of-truth cap is
/// exempt for grounded conformance evidence (mirrors the High-effort exemption).
/// What: model APPROVE + one Medium@0.90 conformance finding → REQUEST_CHANGES.
/// Test: this test itself.
#[test]
fn conformance_finding_caps_at_request_changes() {
    let findings = vec![conformance_finding(Effort::Medium, 0.90)];
    let verdict = derive_verdict(Verdict::Approve, &findings);
    assert_eq!(
        verdict,
        Verdict::RequestChanges,
        "a confident conformance divergence must floor to REQUEST_CHANGES (AC-8)"
    );
}

/// AC-8 (never-BLOCK): a HIGH-effort conformance finding is still capped at
/// REQUEST_CHANGES — conformance NEVER drives BLOCK.
///
/// Why: BLOCK is reserved for correctness/safety (OQ-5).  Even a high-severity
/// conformance divergence must not block; the conformance floor caps it.
/// What: model APPROVE + one High@0.95 conformance finding → REQUEST_CHANGES
/// (NOT BLOCK, the value a High *correctness* finding would yield).
/// Test: this test itself.
#[test]
fn conformance_high_effort_never_blocks() {
    let findings = vec![conformance_finding(Effort::High, 0.95)];
    let verdict = derive_verdict(Verdict::Approve, &findings);
    assert_eq!(
        verdict,
        Verdict::RequestChanges,
        "conformance must cap at REQUEST_CHANGES and NEVER drive BLOCK (AC-8)"
    );
    assert_ne!(verdict, Verdict::Block, "conformance must never BLOCK");
}

/// AC-12: a conformance finding BELOW FLOOR_MIN_CONFIDENCE (0.80) is advisory and
/// does NOT raise the verdict floor.
///
/// Why: the 0.80 gate is the primary false-positive guard (G3); a low-confidence
/// conformance finding must not move the verdict.
/// What: model APPROVE + one Medium@0.75 conformance finding → APPROVE.
/// Test: this test itself.
#[test]
fn conformance_below_floor_confidence_is_advisory() {
    let findings = vec![conformance_finding(Effort::Medium, 0.75)];
    let verdict = derive_verdict(Verdict::Approve, &findings);
    assert_eq!(
        verdict,
        Verdict::Approve,
        "a sub-0.80 conformance finding is advisory only and must not raise the floor (AC-12)"
    );
}

/// AC-12 (High-effort variant): even a HIGH-effort conformance finding below 0.80
/// must not block — it stays advisory on the conformance axis.
///
/// Why: the never-BLOCK ceiling and the 0.80 advisory gate must hold together; a
/// low-confidence high-severity conformance finding must not sneak to BLOCK via
/// the correctness `has_high` path.
/// What: model APPROVE + one High@0.60 conformance finding → APPROVE (the
/// low-confidence override keeps it advisory; it never reaches BLOCK).
/// Test: this test itself.
#[test]
fn conformance_low_confidence_high_effort_never_blocks() {
    let findings = vec![conformance_finding(Effort::High, 0.60)];
    let verdict = derive_verdict(Verdict::Approve, &findings);
    assert_ne!(
        verdict,
        Verdict::Block,
        "a conformance finding must never BLOCK regardless of effort/confidence (AC-8/AC-12)"
    );
}

/// AC-9: no conformance finding (a gap / conforming diff) leaves the verdict
/// unchanged by conformance.
///
/// Why: when intent is a gap (M3) or the diff conforms, the back gate emits no
/// conformance finding and must not perturb the verdict.
/// What: model APPROVE + only a Low correctness finding → APPROVE.
/// Test: this test itself.
#[test]
fn conformance_absent_leaves_verdict_unchanged() {
    let findings = vec![finding(Effort::Low, 0.95)];
    let verdict = derive_verdict(Verdict::Approve, &findings);
    assert_eq!(
        verdict,
        Verdict::Approve,
        "no conformance finding → unchanged (AC-9)"
    );
}

/// A conformance finding must NEVER yield BLOCK even when combined with the
/// grade-aware entry point and an F-implying grade is absent.
///
/// Why: the verdict ceiling for conformance is REQUEST_CHANGES at every entry
/// point, including `derive_verdict_with_grade`.
/// What: model APPROVE, grade B (APPROVE) + one High@0.90 conformance finding →
/// REQUEST_CHANGES, not BLOCK.
/// Test: this test itself.
#[test]
fn conformance_never_blocks_via_grade_entry_point() {
    let findings = vec![conformance_finding(Effort::High, 0.90)];
    let (v, _g) = derive_verdict_with_grade(Verdict::Approve, Grade::B, &findings);
    assert_eq!(
        v,
        Verdict::RequestChanges,
        "conformance caps at REQUEST_CHANGES"
    );
    assert_ne!(v, Verdict::Block, "conformance never BLOCKs (AC-8)");
}

/// A confident conformance finding combined with a confirmed High *correctness*
/// finding still BLOCKs — the correctness axis is unaffected by the conformance cap.
///
/// Why: the conformance cap must only bound the conformance axis; a real
/// correctness blocker in the same review still drives BLOCK.
/// What: one High@0.90 correctness + one Medium@0.90 conformance → BLOCK
/// (stricter_of(BLOCK, REQUEST_CHANGES)).
/// Test: this test itself.
#[test]
fn conformance_cap_does_not_weaken_correctness_block() {
    let findings = vec![
        finding(Effort::High, 0.90),
        conformance_finding(Effort::Medium, 0.90),
    ];
    let verdict = derive_verdict(Verdict::Approve, &findings);
    assert_eq!(
        verdict,
        Verdict::Block,
        "a real correctness High finding still BLOCKs alongside a conformance finding"
    );
}