marque_rules/
confidence.rs

1// SPDX-FileCopyrightText: 2026 Knitli Inc.
2//
3// SPDX-License-Identifier: LicenseRef-MarqueLicense-1.0
4
5//! Confidence — Phase D audit-provenance payload.
6//!
7//! Every [`FixProposal`](crate::FixProposal) carries a `Confidence`
8//! record describing how the engine arrived at the proposal. The
9//! record stores two primary scalar confidence axes —
10//! `recognition` and `rule` — plus optional auxiliary fields
11//! (`region` and `runner_up_ratio`) and a list of named feature
12//! contributions. Together they reconstruct the decoder's scoring
13//! path so an auditor can verify *why* a given fix was promoted.
14//!
15//! The engine's current threshold-facing combined score is
16//! `recognition * rule` as exposed by [`Confidence::combined`].
17//! `region` is recorded as additional audit/context metadata when
18//! available, but it does not currently participate in that
19//! combined score. `runner_up_ratio` likewise provides decoder
20//! provenance rather than a direct multiplicative/additive input to
21//! `combined()`.
22//!
23//! ## Precision: `f32` throughout
24//!
25//! All scores are `f32`. The decoder scores in `f64` internally
26//! (log-priors and posteriors accumulate across many features), but
27//! the emitted `Confidence` downcasts once at the boundary so the
28//! audit record stays compact and byte-stable. This matches the
29//! foundational-plan invariant line 739-757.
30//!
31//! ## `features` is closed
32//!
33//! [`FeatureId`] is a non-`#[non_exhaustive]` closed enum. A new
34//! feature means a new variant and a coordinated bump of the audit
35//! schema version (`MARQUE_AUDIT_SCHEMA`) — silent additions would
36//! break the auditability contract on already-emitted records.
37
38/// Multi-axis confidence attached to every [`FixProposal`](crate::FixProposal).
39///
40/// Fields:
41///
42/// - `recognition` — posterior from the [`Recognizer`](marque_scheme::Recognizer)
43///   that surfaced this candidate (0.0–1.0).
44/// - `rule` — confidence the emitting rule has in its own fix
45///   (0.0–1.0). Strict-path rules report 1.0 when the invariant is
46///   unambiguous.
47/// - `region` — optional region-level confidence (a page-context
48///   prior, for example).
49/// - `runner_up_ratio` — optional ratio of top candidate to runner-up
50///   posterior. Decoder-sourced fixes carry this; strict-path fixes
51///   leave it `None` because the strict grammar has no runner-up by
52///   construction.
53/// - `features` — the concrete evidence features that contributed to
54///   `recognition`. Used by the corpus-accuracy harness to break down
55///   where posterior mass came from.
56///
57/// Construction happens via [`Confidence::strict`] (for rules that
58/// bypass the decoder) or the decoder's scoring path (Phase 4 / task
59/// T061).
60#[derive(Debug, Clone, PartialEq)]
61pub struct Confidence {
62    /// Recognizer posterior in `[0.0, 1.0]`.
63    pub recognition: f32,
64    /// Rule-level confidence in `[0.0, 1.0]`.
65    pub rule: f32,
66    /// Region / page-context confidence, when a rule computes one.
67    pub region: Option<f32>,
68    /// Posterior ratio between top candidate and runner-up
69    /// (`None` for strict-path fixes; set by decoder-sourced fixes).
70    pub runner_up_ratio: Option<f32>,
71    /// Per-feature contributions to `recognition`.
72    pub features: Vec<FeatureContribution>,
73}
74
75impl Confidence {
76    /// Confidence record for a strict-path fix where recognition was
77    /// unambiguous.
78    ///
79    /// `rule_confidence` is the rule's own confidence in its proposed
80    /// fix (typically 1.0 for migrations, lower for heuristics). The
81    /// recognition axis is pinned at 1.0 because the strict grammar
82    /// has one unambiguous match by definition, and no feature
83    /// contributions are recorded — strict-path fixes do not traverse
84    /// the decoder's feature graph.
85    #[inline]
86    pub fn strict(rule_confidence: f32) -> Self {
87        assert!(
88            (0.0..=1.0).contains(&rule_confidence) && !rule_confidence.is_nan(),
89            "Confidence::strict rule confidence must be in [0.0, 1.0] and not NaN, got {rule_confidence}"
90        );
91        Self {
92            recognition: 1.0,
93            rule: rule_confidence,
94            region: None,
95            runner_up_ratio: None,
96            features: Vec::new(),
97        }
98    }
99
100    /// Product of `recognition` and `rule`. The engine's
101    /// confidence-threshold gate compares this combined score against
102    /// the configured threshold (FR-016).
103    #[inline]
104    pub fn combined(&self) -> f32 {
105        self.recognition * self.rule
106    }
107
108    /// Validate every axis of this `Confidence` record.
109    ///
110    /// Returns `Err(message)` naming the first invalid axis. Checks:
111    ///
112    /// - `recognition` and `rule` in `[0.0, 1.0]` and not NaN.
113    /// - `region`, when `Some`, in `[0.0, 1.0]` and not NaN.
114    /// - `runner_up_ratio`, when `Some`, finite and not NaN. No range
115    ///   constraint — a well-behaved decoder returns `≥ 1.0` (top /
116    ///   runner-up) but infinity (runner-up posterior = 0) and values
117    ///   `< 1.0` are legal for debugging / inspection code.
118    /// - Every `features[i].delta` finite and not NaN. `delta` carries
119    ///   signed log-posterior contributions so any finite value is
120    ///   legal; `NaN` / infinity would poison downstream audit-sum
121    ///   invariants silently.
122    ///
123    /// The zero-axis edge case (recognition = 0 or rule = 0) is valid
124    /// — `combined() = 0.0` is a legitimate below-threshold result,
125    /// not an invariant violation.
126    pub fn validate(&self) -> Result<(), String> {
127        let check_unit = |label: &str, v: f32| -> Result<(), String> {
128            if v.is_nan() || !(0.0..=1.0).contains(&v) {
129                Err(format!(
130                    "Confidence.{label} must be in [0.0, 1.0] and not NaN, got {v}"
131                ))
132            } else {
133                Ok(())
134            }
135        };
136        let check_finite = |label: &str, v: f32| -> Result<(), String> {
137            if v.is_nan() || !v.is_finite() {
138                Err(format!(
139                    "Confidence.{label} must be finite and not NaN, got {v}"
140                ))
141            } else {
142                Ok(())
143            }
144        };
145
146        check_unit("recognition", self.recognition)?;
147        check_unit("rule", self.rule)?;
148        if let Some(r) = self.region {
149            check_unit("region", r)?;
150        }
151        if let Some(r) = self.runner_up_ratio {
152            check_finite("runner_up_ratio", r)?;
153        }
154        for (i, feature) in self.features.iter().enumerate() {
155            if feature.delta.is_nan() || !feature.delta.is_finite() {
156                return Err(format!(
157                    "Confidence.features[{i}].delta must be finite and not NaN, got {}",
158                    feature.delta
159                ));
160            }
161        }
162        Ok(())
163    }
164}
165
166/// One named contribution to [`Confidence::recognition`].
167#[derive(Debug, Clone, Copy, PartialEq)]
168pub struct FeatureContribution {
169    /// Which feature.
170    pub id: FeatureId,
171    /// Signed delta added to the log-posterior by this feature.
172    pub delta: f32,
173}
174
175/// Closed enumeration of features the decoder can record.
176///
177/// New variants MUST bump the audit schema version (see
178/// `MARQUE_AUDIT_SCHEMA` in `crates/engine/build.rs`). Treat this
179/// enum as part of the on-the-wire audit contract.
180#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
181pub enum FeatureId {
182    /// Observed form is edit-distance 1 from a canonical token.
183    EditDistance1,
184    /// Observed form is edit-distance 2 from a canonical token.
185    EditDistance2,
186    /// Observed form is a token-order permutation of a canonical
187    /// banner/portion shape.
188    TokenReorder,
189    /// Observed form is a known CAPCO-2016-superseded token whose
190    /// replacement is unambiguous (e.g., `COMINT → SI`).
191    SupersededToken,
192    /// The candidate's base rate in the target corpus dominates the
193    /// posterior (common-marking prior).
194    BaseRateCommonMarking,
195    /// Strict-context classification floor (FR-011) applied — e.g.,
196    /// banner at TOP SECRET forces a strict posterior for
197    /// classification tokens at ≥ that level on the same page.
198    StrictContextClassification,
199    /// Corpus-override data (opt-in, non-WASM, non-server) shifted
200    /// the posterior. Recorded so an auditor can identify fixes
201    /// produced under organizational overrides vs. stock priors.
202    CorpusOverrideInEffect,
203}
204
205impl FeatureId {
206    /// Canonical on-the-wire string label for this feature.
207    ///
208    /// This is the **single source of truth** for `FeatureId →
209    /// audit-record-string` projection. Audit emitters (CLI, WASM,
210    /// server) and snapshot tests MUST call this method rather than
211    /// re-implementing the match. A new `FeatureId` variant added
212    /// without a matching `as_str` arm fails the exhaustiveness check
213    /// here at compile time, so the on-the-wire contract cannot drift
214    /// silently across emitters.
215    ///
216    /// Returns a `&'static str` so callers can embed the value in
217    /// zero-copy serialization paths (`Serialize` derives,
218    /// `serde_json::json!` etc.) without an allocation.
219    #[inline]
220    pub const fn as_str(self) -> &'static str {
221        match self {
222            FeatureId::EditDistance1 => "EditDistance1",
223            FeatureId::EditDistance2 => "EditDistance2",
224            FeatureId::TokenReorder => "TokenReorder",
225            FeatureId::SupersededToken => "SupersededToken",
226            FeatureId::BaseRateCommonMarking => "BaseRateCommonMarking",
227            FeatureId::StrictContextClassification => "StrictContextClassification",
228            FeatureId::CorpusOverrideInEffect => "CorpusOverrideInEffect",
229        }
230    }
231}
232
233#[cfg(test)]
234#[cfg_attr(coverage_nightly, coverage(off))]
235mod tests {
236    use super::*;
237
238    #[test]
239    fn strict_pins_recognition_at_one() {
240        let c = Confidence::strict(0.85);
241        assert_eq!(c.recognition, 1.0);
242        assert_eq!(c.rule, 0.85);
243        assert!(c.region.is_none());
244        assert!(c.runner_up_ratio.is_none());
245        assert!(c.features.is_empty());
246    }
247
248    #[test]
249    fn combined_is_product_of_axes() {
250        let c = Confidence::strict(0.9);
251        assert!((c.combined() - 0.9).abs() < 1e-6);
252
253        let c2 = Confidence {
254            recognition: 0.8,
255            rule: 0.5,
256            region: None,
257            runner_up_ratio: None,
258            features: Vec::new(),
259        };
260        assert!((c2.combined() - 0.4).abs() < 1e-6);
261    }
262
263    #[test]
264    #[should_panic(expected = "Confidence::strict rule confidence")]
265    fn strict_panics_on_nan() {
266        let _ = Confidence::strict(f32::NAN);
267    }
268
269    #[test]
270    #[should_panic(expected = "Confidence::strict rule confidence")]
271    fn strict_panics_above_one() {
272        let _ = Confidence::strict(1.01);
273    }
274
275    #[test]
276    fn feature_id_as_str_matches_audit_contract() {
277        // Pin the on-the-wire labels for `FeatureId`. These strings are
278        // part of the audit-record contract (see
279        // `contracts/audit-record-v2.md`); a future rename here MUST be
280        // a deliberate audit-schema bump (`MARQUE_AUDIT_SCHEMA`), not an
281        // accidental refactor. Kept as an explicit per-variant table
282        // (rather than a round-trip) so a label drift is loud.
283        let cases: &[(FeatureId, &str)] = &[
284            (FeatureId::EditDistance1, "EditDistance1"),
285            (FeatureId::EditDistance2, "EditDistance2"),
286            (FeatureId::TokenReorder, "TokenReorder"),
287            (FeatureId::SupersededToken, "SupersededToken"),
288            (FeatureId::BaseRateCommonMarking, "BaseRateCommonMarking"),
289            (
290                FeatureId::StrictContextClassification,
291                "StrictContextClassification",
292            ),
293            (FeatureId::CorpusOverrideInEffect, "CorpusOverrideInEffect"),
294        ];
295        for (id, expected) in cases {
296            assert_eq!(id.as_str(), *expected, "label drift for {id:?}");
297        }
298    }
299
300    #[test]
301    fn feature_contribution_roundtrip() {
302        let fc = FeatureContribution {
303            id: FeatureId::EditDistance1,
304            delta: -0.3,
305        };
306        assert_eq!(fc.id, FeatureId::EditDistance1);
307        assert!((fc.delta - (-0.3)).abs() < 1e-6);
308    }
309
310    #[test]
311    fn validate_accepts_well_formed_record() {
312        assert!(Confidence::strict(0.85).validate().is_ok());
313        assert!(
314            Confidence {
315                recognition: 0.9,
316                rule: 0.8,
317                region: Some(0.5),
318                runner_up_ratio: Some(2.7),
319                features: vec![FeatureContribution {
320                    id: FeatureId::EditDistance1,
321                    delta: -0.5,
322                }],
323            }
324            .validate()
325            .is_ok()
326        );
327    }
328
329    #[test]
330    fn validate_rejects_out_of_range_recognition() {
331        let c = Confidence {
332            recognition: 1.5,
333            rule: 0.5,
334            region: None,
335            runner_up_ratio: None,
336            features: Vec::new(),
337        };
338        let err = c.validate().unwrap_err();
339        assert!(
340            err.contains("recognition"),
341            "error should name the offending axis, got: {err}"
342        );
343    }
344
345    #[test]
346    fn validate_rejects_out_of_range_rule() {
347        let c = Confidence {
348            recognition: 0.5,
349            rule: -0.1,
350            region: None,
351            runner_up_ratio: None,
352            features: Vec::new(),
353        };
354        let err = c.validate().unwrap_err();
355        assert!(err.contains("rule"), "got: {err}");
356    }
357
358    #[test]
359    fn validate_rejects_out_of_range_region() {
360        let c = Confidence {
361            recognition: 0.5,
362            rule: 0.5,
363            region: Some(1.5),
364            runner_up_ratio: None,
365            features: Vec::new(),
366        };
367        let err = c.validate().unwrap_err();
368        assert!(err.contains("region"), "got: {err}");
369    }
370
371    #[test]
372    fn validate_rejects_non_finite_runner_up_ratio() {
373        for bad in [f32::NAN, f32::INFINITY, f32::NEG_INFINITY] {
374            let c = Confidence {
375                recognition: 0.5,
376                rule: 0.5,
377                region: None,
378                runner_up_ratio: Some(bad),
379                features: Vec::new(),
380            };
381            assert!(
382                c.validate().is_err(),
383                "runner_up_ratio = {bad:?} should fail validation"
384            );
385        }
386    }
387
388    #[test]
389    fn validate_accepts_finite_runner_up_ratio_of_any_magnitude() {
390        // No range constraint on the ratio — verify low values pass.
391        let c = Confidence {
392            recognition: 0.5,
393            rule: 0.5,
394            region: None,
395            runner_up_ratio: Some(0.01),
396            features: Vec::new(),
397        };
398        assert!(c.validate().is_ok());
399    }
400
401    #[test]
402    fn validate_rejects_non_finite_feature_delta() {
403        for bad in [f32::NAN, f32::INFINITY, f32::NEG_INFINITY] {
404            let c = Confidence {
405                recognition: 0.5,
406                rule: 0.5,
407                region: None,
408                runner_up_ratio: None,
409                features: vec![FeatureContribution {
410                    id: FeatureId::EditDistance1,
411                    delta: bad,
412                }],
413            };
414            assert!(
415                c.validate().is_err(),
416                "feature delta = {bad:?} should fail validation"
417            );
418        }
419    }
420
421    #[test]
422    fn validate_accepts_zero_axes() {
423        // Zero is a legal below-threshold value, not an invariant
424        // violation — check that validate doesn't treat it specially.
425        let c = Confidence {
426            recognition: 0.0,
427            rule: 0.0,
428            region: Some(0.0),
429            runner_up_ratio: None,
430            features: Vec::new(),
431        };
432        assert!(c.validate().is_ok());
433    }
434}
marque_rules/confidence.rs

marque_rules/
confidence.rs