marque_rules/confidence.rs
1// SPDX-FileCopyrightText: 2026 Knitli Inc.
2//
3// SPDX-License-Identifier: LicenseRef-MarqueLicense-1.0
4
5//! Confidence — Phase D audit-provenance payload.
6//!
7//! Every [`FixProposal`](crate::FixProposal) carries a `Confidence`
8//! record describing how the engine arrived at the proposal. The
9//! record stores two primary scalar confidence axes —
10//! `recognition` and `rule` — plus optional auxiliary fields
11//! (`region` and `runner_up_ratio`) and a list of named feature
12//! contributions. Together they reconstruct the decoder's scoring
13//! path so an auditor can verify *why* a given fix was promoted.
14//!
15//! The engine's current threshold-facing combined score is
16//! `recognition * rule` as exposed by [`Confidence::combined`].
17//! `region` is recorded as additional audit/context metadata when
18//! available, but it does not currently participate in that
19//! combined score. `runner_up_ratio` likewise provides decoder
20//! provenance rather than a direct multiplicative/additive input to
21//! `combined()`.
22//!
23//! ## Precision: `f32` throughout
24//!
25//! All scores are `f32`. The decoder scores in `f64` internally
26//! (log-priors and posteriors accumulate across many features), but
27//! the emitted `Confidence` downcasts once at the boundary so the
28//! audit record stays compact and byte-stable. This matches the
29//! foundational-plan invariant line 739-757.
30//!
31//! ## `features` is closed
32//!
33//! [`FeatureId`] is a non-`#[non_exhaustive]` closed enum. A new
34//! feature means a new variant and a coordinated bump of the audit
35//! schema version (`MARQUE_AUDIT_SCHEMA`) — silent additions would
36//! break the auditability contract on already-emitted records.
37
38/// Multi-axis confidence attached to every [`FixProposal`](crate::FixProposal).
39///
40/// Fields:
41///
42/// - `recognition` — posterior from the [`Recognizer`](marque_scheme::Recognizer)
43/// that surfaced this candidate (0.0–1.0).
44/// - `rule` — confidence the emitting rule has in its own fix
45/// (0.0–1.0). Strict-path rules report 1.0 when the invariant is
46/// unambiguous.
47/// - `region` — optional region-level confidence (a page-context
48/// prior, for example).
49/// - `runner_up_ratio` — optional ratio of top candidate to runner-up
50/// posterior. Decoder-sourced fixes carry this; strict-path fixes
51/// leave it `None` because the strict grammar has no runner-up by
52/// construction.
53/// - `features` — the concrete evidence features that contributed to
54/// `recognition`. Used by the corpus-accuracy harness to break down
55/// where posterior mass came from.
56///
57/// Construction happens via [`Confidence::strict`] (for rules that
58/// bypass the decoder) or the decoder's scoring path (Phase 4 / task
59/// T061).
60#[derive(Debug, Clone, PartialEq)]
61pub struct Confidence {
62 /// Recognizer posterior in `[0.0, 1.0]`.
63 pub recognition: f32,
64 /// Rule-level confidence in `[0.0, 1.0]`.
65 pub rule: f32,
66 /// Region / page-context confidence, when a rule computes one.
67 pub region: Option<f32>,
68 /// Posterior ratio between top candidate and runner-up
69 /// (`None` for strict-path fixes; set by decoder-sourced fixes).
70 pub runner_up_ratio: Option<f32>,
71 /// Per-feature contributions to `recognition`.
72 pub features: Vec<FeatureContribution>,
73}
74
75impl Confidence {
76 /// Confidence record for a strict-path fix where recognition was
77 /// unambiguous.
78 ///
79 /// `rule_confidence` is the rule's own confidence in its proposed
80 /// fix (typically 1.0 for migrations, lower for heuristics). The
81 /// recognition axis is pinned at 1.0 because the strict grammar
82 /// has one unambiguous match by definition, and no feature
83 /// contributions are recorded — strict-path fixes do not traverse
84 /// the decoder's feature graph.
85 #[inline]
86 pub fn strict(rule_confidence: f32) -> Self {
87 assert!(
88 (0.0..=1.0).contains(&rule_confidence) && !rule_confidence.is_nan(),
89 "Confidence::strict rule confidence must be in [0.0, 1.0] and not NaN, got {rule_confidence}"
90 );
91 Self {
92 recognition: 1.0,
93 rule: rule_confidence,
94 region: None,
95 runner_up_ratio: None,
96 features: Vec::new(),
97 }
98 }
99
100 /// Product of `recognition` and `rule`. The engine's
101 /// confidence-threshold gate compares this combined score against
102 /// the configured threshold (FR-016).
103 #[inline]
104 pub fn combined(&self) -> f32 {
105 self.recognition * self.rule
106 }
107
108 /// Validate every axis of this `Confidence` record.
109 ///
110 /// Returns `Err(message)` naming the first invalid axis. Checks:
111 ///
112 /// - `recognition` and `rule` in `[0.0, 1.0]` and not NaN.
113 /// - `region`, when `Some`, in `[0.0, 1.0]` and not NaN.
114 /// - `runner_up_ratio`, when `Some`, finite and not NaN. No range
115 /// constraint — a well-behaved decoder returns `≥ 1.0` (top /
116 /// runner-up) but infinity (runner-up posterior = 0) and values
117 /// `< 1.0` are legal for debugging / inspection code.
118 /// - Every `features[i].delta` finite and not NaN. `delta` carries
119 /// signed log-posterior contributions so any finite value is
120 /// legal; `NaN` / infinity would poison downstream audit-sum
121 /// invariants silently.
122 ///
123 /// The zero-axis edge case (recognition = 0 or rule = 0) is valid
124 /// — `combined() = 0.0` is a legitimate below-threshold result,
125 /// not an invariant violation.
126 pub fn validate(&self) -> Result<(), String> {
127 let check_unit = |label: &str, v: f32| -> Result<(), String> {
128 if v.is_nan() || !(0.0..=1.0).contains(&v) {
129 Err(format!(
130 "Confidence.{label} must be in [0.0, 1.0] and not NaN, got {v}"
131 ))
132 } else {
133 Ok(())
134 }
135 };
136 let check_finite = |label: &str, v: f32| -> Result<(), String> {
137 if v.is_nan() || !v.is_finite() {
138 Err(format!(
139 "Confidence.{label} must be finite and not NaN, got {v}"
140 ))
141 } else {
142 Ok(())
143 }
144 };
145
146 check_unit("recognition", self.recognition)?;
147 check_unit("rule", self.rule)?;
148 if let Some(r) = self.region {
149 check_unit("region", r)?;
150 }
151 if let Some(r) = self.runner_up_ratio {
152 check_finite("runner_up_ratio", r)?;
153 }
154 for (i, feature) in self.features.iter().enumerate() {
155 if feature.delta.is_nan() || !feature.delta.is_finite() {
156 return Err(format!(
157 "Confidence.features[{i}].delta must be finite and not NaN, got {}",
158 feature.delta
159 ));
160 }
161 }
162 Ok(())
163 }
164}
165
166/// One named contribution to [`Confidence::recognition`].
167#[derive(Debug, Clone, Copy, PartialEq)]
168pub struct FeatureContribution {
169 /// Which feature.
170 pub id: FeatureId,
171 /// Signed delta added to the log-posterior by this feature.
172 pub delta: f32,
173}
174
175/// Closed enumeration of features the decoder can record.
176///
177/// New variants MUST bump the audit schema version (see
178/// `MARQUE_AUDIT_SCHEMA` in `crates/engine/build.rs`). Treat this
179/// enum as part of the on-the-wire audit contract.
180#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
181pub enum FeatureId {
182 /// Observed form is edit-distance 1 from a canonical token.
183 EditDistance1,
184 /// Observed form is edit-distance 2 from a canonical token.
185 EditDistance2,
186 /// Observed form is a token-order permutation of a canonical
187 /// banner/portion shape.
188 TokenReorder,
189 /// Observed form is a known CAPCO-2016-superseded token whose
190 /// replacement is unambiguous (e.g., `COMINT → SI`).
191 SupersededToken,
192 /// The candidate's base rate in the target corpus dominates the
193 /// posterior (common-marking prior).
194 BaseRateCommonMarking,
195 /// Strict-context classification floor (FR-011) applied — e.g.,
196 /// banner at TOP SECRET forces a strict posterior for
197 /// classification tokens at ≥ that level on the same page.
198 StrictContextClassification,
199 /// Corpus-override data (opt-in, non-WASM, non-server) shifted
200 /// the posterior. Recorded so an auditor can identify fixes
201 /// produced under organizational overrides vs. stock priors.
202 CorpusOverrideInEffect,
203}
204
205impl FeatureId {
206 /// Canonical on-the-wire string label for this feature.
207 ///
208 /// This is the **single source of truth** for `FeatureId →
209 /// audit-record-string` projection. Audit emitters (CLI, WASM,
210 /// server) and snapshot tests MUST call this method rather than
211 /// re-implementing the match. A new `FeatureId` variant added
212 /// without a matching `as_str` arm fails the exhaustiveness check
213 /// here at compile time, so the on-the-wire contract cannot drift
214 /// silently across emitters.
215 ///
216 /// Returns a `&'static str` so callers can embed the value in
217 /// zero-copy serialization paths (`Serialize` derives,
218 /// `serde_json::json!` etc.) without an allocation.
219 #[inline]
220 pub const fn as_str(self) -> &'static str {
221 match self {
222 FeatureId::EditDistance1 => "EditDistance1",
223 FeatureId::EditDistance2 => "EditDistance2",
224 FeatureId::TokenReorder => "TokenReorder",
225 FeatureId::SupersededToken => "SupersededToken",
226 FeatureId::BaseRateCommonMarking => "BaseRateCommonMarking",
227 FeatureId::StrictContextClassification => "StrictContextClassification",
228 FeatureId::CorpusOverrideInEffect => "CorpusOverrideInEffect",
229 }
230 }
231}
232
233#[cfg(test)]
234#[cfg_attr(coverage_nightly, coverage(off))]
235mod tests {
236 use super::*;
237
238 #[test]
239 fn strict_pins_recognition_at_one() {
240 let c = Confidence::strict(0.85);
241 assert_eq!(c.recognition, 1.0);
242 assert_eq!(c.rule, 0.85);
243 assert!(c.region.is_none());
244 assert!(c.runner_up_ratio.is_none());
245 assert!(c.features.is_empty());
246 }
247
248 #[test]
249 fn combined_is_product_of_axes() {
250 let c = Confidence::strict(0.9);
251 assert!((c.combined() - 0.9).abs() < 1e-6);
252
253 let c2 = Confidence {
254 recognition: 0.8,
255 rule: 0.5,
256 region: None,
257 runner_up_ratio: None,
258 features: Vec::new(),
259 };
260 assert!((c2.combined() - 0.4).abs() < 1e-6);
261 }
262
263 #[test]
264 #[should_panic(expected = "Confidence::strict rule confidence")]
265 fn strict_panics_on_nan() {
266 let _ = Confidence::strict(f32::NAN);
267 }
268
269 #[test]
270 #[should_panic(expected = "Confidence::strict rule confidence")]
271 fn strict_panics_above_one() {
272 let _ = Confidence::strict(1.01);
273 }
274
275 #[test]
276 fn feature_id_as_str_matches_audit_contract() {
277 // Pin the on-the-wire labels for `FeatureId`. These strings are
278 // part of the audit-record contract (see
279 // `contracts/audit-record-v2.md`); a future rename here MUST be
280 // a deliberate audit-schema bump (`MARQUE_AUDIT_SCHEMA`), not an
281 // accidental refactor. Kept as an explicit per-variant table
282 // (rather than a round-trip) so a label drift is loud.
283 let cases: &[(FeatureId, &str)] = &[
284 (FeatureId::EditDistance1, "EditDistance1"),
285 (FeatureId::EditDistance2, "EditDistance2"),
286 (FeatureId::TokenReorder, "TokenReorder"),
287 (FeatureId::SupersededToken, "SupersededToken"),
288 (FeatureId::BaseRateCommonMarking, "BaseRateCommonMarking"),
289 (
290 FeatureId::StrictContextClassification,
291 "StrictContextClassification",
292 ),
293 (FeatureId::CorpusOverrideInEffect, "CorpusOverrideInEffect"),
294 ];
295 for (id, expected) in cases {
296 assert_eq!(id.as_str(), *expected, "label drift for {id:?}");
297 }
298 }
299
300 #[test]
301 fn feature_contribution_roundtrip() {
302 let fc = FeatureContribution {
303 id: FeatureId::EditDistance1,
304 delta: -0.3,
305 };
306 assert_eq!(fc.id, FeatureId::EditDistance1);
307 assert!((fc.delta - (-0.3)).abs() < 1e-6);
308 }
309
310 #[test]
311 fn validate_accepts_well_formed_record() {
312 assert!(Confidence::strict(0.85).validate().is_ok());
313 assert!(
314 Confidence {
315 recognition: 0.9,
316 rule: 0.8,
317 region: Some(0.5),
318 runner_up_ratio: Some(2.7),
319 features: vec![FeatureContribution {
320 id: FeatureId::EditDistance1,
321 delta: -0.5,
322 }],
323 }
324 .validate()
325 .is_ok()
326 );
327 }
328
329 #[test]
330 fn validate_rejects_out_of_range_recognition() {
331 let c = Confidence {
332 recognition: 1.5,
333 rule: 0.5,
334 region: None,
335 runner_up_ratio: None,
336 features: Vec::new(),
337 };
338 let err = c.validate().unwrap_err();
339 assert!(
340 err.contains("recognition"),
341 "error should name the offending axis, got: {err}"
342 );
343 }
344
345 #[test]
346 fn validate_rejects_out_of_range_rule() {
347 let c = Confidence {
348 recognition: 0.5,
349 rule: -0.1,
350 region: None,
351 runner_up_ratio: None,
352 features: Vec::new(),
353 };
354 let err = c.validate().unwrap_err();
355 assert!(err.contains("rule"), "got: {err}");
356 }
357
358 #[test]
359 fn validate_rejects_out_of_range_region() {
360 let c = Confidence {
361 recognition: 0.5,
362 rule: 0.5,
363 region: Some(1.5),
364 runner_up_ratio: None,
365 features: Vec::new(),
366 };
367 let err = c.validate().unwrap_err();
368 assert!(err.contains("region"), "got: {err}");
369 }
370
371 #[test]
372 fn validate_rejects_non_finite_runner_up_ratio() {
373 for bad in [f32::NAN, f32::INFINITY, f32::NEG_INFINITY] {
374 let c = Confidence {
375 recognition: 0.5,
376 rule: 0.5,
377 region: None,
378 runner_up_ratio: Some(bad),
379 features: Vec::new(),
380 };
381 assert!(
382 c.validate().is_err(),
383 "runner_up_ratio = {bad:?} should fail validation"
384 );
385 }
386 }
387
388 #[test]
389 fn validate_accepts_finite_runner_up_ratio_of_any_magnitude() {
390 // No range constraint on the ratio — verify low values pass.
391 let c = Confidence {
392 recognition: 0.5,
393 rule: 0.5,
394 region: None,
395 runner_up_ratio: Some(0.01),
396 features: Vec::new(),
397 };
398 assert!(c.validate().is_ok());
399 }
400
401 #[test]
402 fn validate_rejects_non_finite_feature_delta() {
403 for bad in [f32::NAN, f32::INFINITY, f32::NEG_INFINITY] {
404 let c = Confidence {
405 recognition: 0.5,
406 rule: 0.5,
407 region: None,
408 runner_up_ratio: None,
409 features: vec![FeatureContribution {
410 id: FeatureId::EditDistance1,
411 delta: bad,
412 }],
413 };
414 assert!(
415 c.validate().is_err(),
416 "feature delta = {bad:?} should fail validation"
417 );
418 }
419 }
420
421 #[test]
422 fn validate_accepts_zero_axes() {
423 // Zero is a legal below-threshold value, not an invariant
424 // violation — check that validate doesn't treat it specially.
425 let c = Confidence {
426 recognition: 0.0,
427 rule: 0.0,
428 region: Some(0.0),
429 runner_up_ratio: None,
430 features: Vec::new(),
431 };
432 assert!(c.validate().is_ok());
433 }
434}