1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
//! Pipeline-tuning constants for trusty-review.
//!
//! Why: centralises all confidence-threshold constants so they have one
//! authoritative definition and are easy to audit, override in config files,
//! or extend. Magic numbers scattered across the pipeline lead to
//! inconsistent gate values (lesson learned §12.6).
//! What: every constant matches the Python predecessor's default and is
//! annotated with its spec reference.
//! Test: `constants_are_in_unit_interval` asserts that all confidence
//! thresholds are in `[0.0, 1.0]`.
// ─── Confidence thresholds (spec §06 REV-502, source-analysis §2.3) ──────────
/// Minimum confidence to include a finding as a fix suggestion in the review.
///
/// Why: filters out low-confidence hunches before they reach the review body.
/// What: findings below this threshold are omitted entirely.
pub const FIX_ISSUE_MIN_CONFIDENCE: f32 = 0.60;
/// Minimum confidence for a finding to be eligible to file a tracker issue.
///
/// Why: only high-confidence findings justify opening a GitHub/JIRA issue.
/// What: corresponds to `issue_threshold` in per-repo config; overrideable.
pub const BLOCK_ISSUE_MIN_CONFIDENCE: f32 = 0.75;
/// Minimum confidence for a finding to be a BLOCK-tier verdict candidate.
///
/// Why: BLOCK is the strongest verdict tier; it requires very high confidence.
/// What: corresponds to `block_threshold` in per-repo config.
pub const BLOCK_VERDICT_MIN_CONFIDENCE: f32 = 0.90;
/// Minimum confidence for a finding to be flagged as high-confidence in the
/// PR comment.
///
/// Why: the PR comment can distinguish "FYI" from "definitely fix this".
/// What: corresponds to `pr_threshold` in per-repo config. Renamed from the
/// former `VERIFY_CANDIDATE_MIN_CONFIDENCE` (Phase 2, #583) — that name is now
/// the verification candidate-selection floor (0.50); this constant always
/// meant the PR high-confidence flag (`pr_threshold`), never the verify gate.
pub const PR_HIGH_CONFIDENCE_THRESHOLD: f32 = 0.95;
/// Minimum confidence to include a finding in the verification round.
///
/// Why: low-confidence findings are not worth the latency cost of a verifier
/// LLM call.
/// What: findings below this are skipped by the verifier and treated as
/// unverified.
pub const VERIFICATION_MIN_CONFIDENCE: f32 = 0.65;
/// Minimum confidence for a finding to be a verification *candidate* when the
/// primary verdict is REQUEST_CHANGES / BLOCK (Phase 2, #583).
///
/// Why: when the reviewer already wants to block the merge, the verification
/// round must cast a wide net — even moderate-confidence findings can be the
/// sole reason the verdict escalated, so they must be confirmed or refuted
/// before they are allowed to drive a blocking verdict. This is distinct from
/// (and deliberately lower than) `VERIFICATION_MIN_CONFIDENCE`, which gates the
/// *advisory* path; on a blocking verdict we widen the candidate set down to
/// this floor so a false-positive blocking finding cannot slip past unverified.
/// What: on a REQUEST_CHANGES / BLOCK primary verdict, every finding with
/// `confidence >= VERIFY_CANDIDATE_MIN_CONFIDENCE` is sent to the verifier.
/// Default 0.50 (matches the Phase 2 ticket #583 work item (b)).
pub const VERIFY_CANDIDATE_MIN_CONFIDENCE: f32 = 0.50;
/// Demoted confidence assigned to a finding the verifier REFUTED (Phase 2, #583).
///
/// Why: a refuted finding must not surface or drive a verdict, but the spec
/// (REV-606) requires we keep it on the result for transparency rather than
/// silently dropping it. Demoting its confidence below every advisory / block
/// gate makes `derive_verdict` treat it as noise while the `verified` field
/// records *why* it was demoted.
/// What: set below `FIX_ISSUE_MIN_CONFIDENCE` (0.60), `VERIFICATION_MIN_CONFIDENCE`
/// (0.65), and `LOW_CONFIDENCE_THRESHOLD` (0.65 in grade.rs) so a refuted
/// finding is always treated as advisory-only noise and collapses the floor.
/// Test: `refuted_finding_is_demoted_below_advisory_tier` in `verify_tests.rs`.
pub const VERIFY_REFUTED_CONFIDENCE: f32 = 0.10;
// ─── Suppression (spec §06 REV-530) ──────────────────────────────────────────
/// Jaccard overlap threshold for suppression pattern matching.
///
/// Why: substring matching alone misses paraphrases; word-overlap matching
/// catches them.
/// What: if the normalised word-set Jaccard similarity between the finding
/// description and a suppression pattern reaches this value, the finding is
/// suppressed.
pub const SUPPRESS_OVERLAP_THRESHOLD: f32 = 0.70;
/// Finding similarity threshold used by the related-finding dedup helper
/// (distinct from suppression — spec §06 REV-530 note).
pub const FINDING_SIMILARITY_THRESHOLD: f32 = 0.60;
// ─── Dedup / pipeline ─────────────────────────────────────────────────────────
/// Seconds after which a dedup claim is considered stale and may be purged.
///
/// Why: a crashed reviewer leaves a claim in the store forever without this.
/// What: claims older than this value are ignored and overwritten on the next
/// claim attempt.
pub const DEDUP_STALE_SECS: u64 = 7200; // 2 hours.
/// Maximum length of the full diff text (characters) fed to the LLM.
///
/// Why: the reviewer model (Bedrock Claude Sonnet 4.6) has a 200 K-token context
/// window; the old 60 K-char cap (~15 K tokens) was overly conservative and
/// caused real PRs with large fixture churn to drop substantive code changes.
/// Raised to 160 K chars (≈40 K tokens) — still ~5× under the 200 K window —
/// to give the DiffAnalyzer noise filter enough headroom to work.
/// What: `truncate_diff` and `DiffAnalyzer::render_for_prompt` both use this cap
/// as their final safety net. Closes: #624.
pub const MAX_DIFF_CHARS: usize = 160_000;
/// Maximum number of context files retrieved from trusty-search per review.
pub const MAX_CONTEXT_FILES: usize = 20;
/// Maximum additional enrichment rounds (spec REV-502).
pub const MAX_ENRICHMENT_ROUNDS: u32 = 3;
/// Maximum tracker issues filed per PR.
pub const FIX_ISSUE_MAX_PER_PR: u32 = 3;
// ─── Effort gate (spec §07 REV-605) ──────────────────────────────────────────
/// Effort levels that are eligible for tracker-issue filing.
///
/// Why: HIGH-effort findings are unlikely to be actioned quickly; only
/// Low/Medium findings are issue-filed by default.
/// What: matches `FIX_ISSUE_ALLOWED_EFFORTS` in the Python predecessor
/// (source-analysis §2.3).
pub const FIX_ISSUE_ALLOWED_EFFORTS: & = &;
// ─── APEX/KB context (Phase 6 PR-B, #550, REV-420) ──────────────────────────
/// Maximum APEX/KB results injected into the reviewer prompt per review.
///
/// Why: the reviewer context window is bounded; more than 2 APEX snippets would
/// swamp the code context that is the primary review signal. Two spec excerpts
/// are enough to anchor the reviewer to the relevant product intent.
/// What: `fetch_apex_context` truncates after this many results.
/// Test: `apex_context_caps_at_max_results` in `integrations::apex_context`.
pub const MAX_APEX_RESULTS: usize = 2;
/// Maximum characters of an APEX snippet embedded in the prompt.
///
/// Why: individual spec pages can be large; capping the excerpt keeps total
/// prompt size predictable while still giving the reviewer meaningful product
/// context.
/// What: snippets longer than this are truncated (UTF-8 safe) before insertion.
/// Test: `apex_context_truncates_snippet` in `integrations::apex_context`.
pub const MAX_APEX_SNIPPET_CHARS: usize = 600;
/// Maximum characters of the APEX cross-query string.
///
/// Why: the PR title + description can be very long; the search daemon benefits
/// from a bounded query.
/// What: the first `MAX_APEX_QUERY_CHARS` chars of the cross-query are sent to
/// trusty-search (UTF-8 safe).
/// Test: `apex_context_truncates_long_query` in `integrations::apex_context`.
pub const MAX_APEX_QUERY_CHARS: usize = 1000;
// ─── Review version string ────────────────────────────────────────────────────
/// Pipeline version identifier embedded in every `ReviewResult`.
///
/// Why: allows tooling to distinguish review logs produced by different
/// pipeline versions without parsing the review body.
/// What: written to `ReviewResult::review_version` on every review.
pub const REVIEW_VERSION: &str = "tr-0.1";
// ─── Unit tests ───────────────────────────────────────────────────────────────