marque_rules/
lib.rs

1// SPDX-FileCopyrightText: 2026 Knitli Inc.
2//
3// SPDX-License-Identifier: LicenseRef-MarqueLicense-1.0
4
5#![forbid(unsafe_code)]
6#![cfg_attr(coverage_nightly, feature(coverage_attribute))]
7
8//! marque-rules — trait definitions for the marque rule system.
9//!
10//! This crate defines the contract every rule crate must satisfy.
11//! It has no rule implementations — those live in `marque-capco` and future crates.
12//! The engine depends only on this crate, enabling rule crates to be swapped.
13//!
14//! # Type split: FixProposal vs AppliedFix
15//!
16//! `FixProposal` is pure data emitted by rules — deterministic, timestamp-free,
17//! classifier-free. `AppliedFix` wraps a proposal with runtime context (timestamp,
18//! classifier id, dry-run flag) and is constructed **only** by `Engine::fix`.
19//! This makes "suggested vs applied" a type-system invariant.
20
21pub mod confidence;
22
23use marque_ism::{IsmAttributes, Span};
24use std::collections::HashMap;
25use std::sync::Arc;
26use std::time::SystemTime;
27
28pub use confidence::{Confidence, FeatureContribution, FeatureId};
29pub use marque_ism::{DocumentPosition, MarkingType, Zone};
30
31// ---------------------------------------------------------------------------
32// RuleId
33// ---------------------------------------------------------------------------
34
35/// Unique rule identifier string (e.g., "E001", "capco/portion-mark-in-banner").
36///
37/// The inner `&'static str` is private; construct via [`RuleId::new`] so that
38/// construction is explicit at every call site.
39#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
40pub struct RuleId(&'static str);
41
42impl RuleId {
43    /// Construct a rule identifier from a static string slice.
44    #[inline]
45    pub const fn new(id: &'static str) -> Self {
46        Self(id)
47    }
48
49    /// Return the rule identifier as a string slice.
50    #[inline]
51    pub const fn as_str(&self) -> &'static str {
52        self.0
53    }
54}
55
56impl std::fmt::Display for RuleId {
57    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
58        f.write_str(self.0)
59    }
60}
61
62// ---------------------------------------------------------------------------
63// Severity
64// ---------------------------------------------------------------------------
65
66/// Rule severity level. Configurable per rule in `.marque.toml`.
67///
68/// # Ordering
69///
70/// The derived `Ord` is `Off < Suggest < Info < Warn < Error < Fix`.
71/// The ordering is exposed for consumers that want to compare
72/// severities (e.g., "is this at least `Error`?") but the config
73/// loader does **not** use it as a merge operator today. `Suggest`
74/// sits between `Off` and `Info` because it is the lightest
75/// firing-but-non-actionable channel — quieter than `Info` (which
76/// has no candidate replacement attached) and louder than `Off`
77/// (which is non-firing entirely).
78///
79/// # Exit-code semantics
80///
81/// `marque check` maps severities to exit codes as follows:
82///
83/// | Severity counts present       | Exit code              |
84/// |-------------------------------|------------------------|
85/// | `Error` or `Fix`              | `1` (`EX_DIAG_ERROR`)  |
86/// | `Warn` only                   | `2` (`EX_DIAG_WARN`)   |
87/// | `Info` / `Suggest` only, none | `0` (`EX_OK`)          |
88///
89/// `Info` and `Suggest` are the only severities whose diagnostics are
90/// emitted *and* keep the exit code at zero. `Warn` still fails CI
91/// via `EX_DIAG_WARN`. The tonal distinction is advisory: `Warn`
92/// means "this might be wrong"; `Info` means "FYI, probably
93/// intentional but worth surfacing"; `Suggest` means "I have a
94/// candidate replacement but I'm not confident enough to auto-apply
95/// it — eyes on it." Rules like `W034 sci-custom-control-info`
96/// (which reports unpublished SCI control systems — legitimate per
97/// CAPCO but rare) are natural `Info` candidates; rules like `S004
98/// rel-to-trigraph-suggest` (which proposes a higher-prior trigraph
99/// alternative for an ambiguous REL TO entry) emit at `Suggest`.
100///
101/// # `Suggest` channel semantics
102///
103/// `Suggest` is the firing-but-non-applying channel: a diagnostic
104/// emitted at `Suggest` carries a candidate `FixProposal` that the
105/// engine will **never** auto-apply, regardless of `confidence`. The
106/// fix is informational — it tells the user what the rule would
107/// suggest if confidence were higher. Two paths produce
108/// `Suggest`-severity diagnostics:
109///
110/// 1. **Explicit emission**: a rule constructs the diagnostic with
111///    `Severity::Suggest` directly. `S004 rel-to-trigraph-suggest`
112///    is the first such rule.
113/// 2. **Engine rewrite**: any diagnostic whose attached `FixProposal`
114///    has `confidence.combined() < confidence_threshold` is rewritten
115///    to `Severity::Suggest` by the engine in `lint`. This subsumes
116///    the prior silent-drop behavior at threshold-gate time so
117///    below-threshold proposals stay observable.
118///
119/// In both cases, `Engine::fix` filters out `Suggest` diagnostics
120/// from auto-apply by construction. `Suggest` diagnostics with
121/// `fix: None` are also valid (informational suggestion with no
122/// candidate replacement — used by future rules like #206's
123/// REL TO opaque-uncertain reduction, where the rule has signal
124/// to surface but no specific replacement to propose); the
125/// renderer handles the missing-fix case cleanly.
126///
127/// # Merge semantics (current: last-write-wins)
128///
129/// `marque-config` merges layers in strict precedence order — env vars
130/// override `.marque.local.toml` which overrides `.marque.toml`. Whatever
131/// the highest-precedence layer says for a given rule wins, including
132/// downgrades: a local override of `"off"` will suppress a project-config
133/// `"error"`. This is intentional — individual classifiers sometimes need
134/// to silence a rule while iterating, and the audit log still records the
135/// configured severity for every applied fix.
136///
137/// If a future policy requires strictness-only merging (where a lower
138/// layer cannot downgrade a higher layer's severity), change the loader
139/// to `.max()` over `Severity::parse_config` values rather than `extend`.
140/// The derived `Ord` above is already the correct operator for that case.
141#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
142pub enum Severity {
143    /// Rule is disabled entirely. FR-008: severity=off is unrepresentable on emitted diagnostics
144    /// — a rule at `Off` never fires, so no `Diagnostic` is produced.
145    Off,
146    /// Advisory channel — diagnostic carries a candidate fix that
147    /// will **not** auto-apply.
148    ///
149    /// Distinct from `Info` (FYI, no actionable replacement) and
150    /// from `Off` (non-firing). The fix-bearing diagnostic remains
151    /// visible in lint output but the engine excludes it from
152    /// auto-apply regardless of `confidence`. This is the
153    /// suggest-don't-fix channel: rules with low-confidence
154    /// candidate corrections (e.g., `S004 rel-to-trigraph-suggest`)
155    /// can surface "did you mean?" hints without committing to the
156    /// rewrite.
157    ///
158    /// `Suggest` keeps the CLI exit code at `0` (same as `Info`),
159    /// so it is CI-silent.
160    Suggest,
161    /// Emit informational diagnostic; does not block `check`-mode exit
162    /// code. Intended for "audit-visible but probably intentional"
163    /// signals — cases where the marking may be correct but the user
164    /// may want to verify (e.g., unpublished SCI control systems).
165    Info,
166    /// Emit warning; non-error, but still non-zero in `check` mode
167    /// (produces `EX_DIAG_WARN` = 2). Different from `Info` in tone
168    /// *and* exit-code impact: Warn is "this might be wrong" and
169    /// CI-visible; Info is "FYI, probably intentional but worth
170    /// surfacing" and CI-silent (exit 0).
171    Warn,
172    /// Emit error; blocks `--check` exit code.
173    Error,
174    /// Apply fix automatically when `--fix` flag is present.
175    Fix,
176}
177
178impl Severity {
179    /// Parse a severity level from a config string. Returns `None` for
180    /// unrecognized values; the config loader treats `None` as a hard error.
181    pub fn parse_config(s: &str) -> Option<Self> {
182        match s {
183            "off" => Some(Self::Off),
184            "suggest" => Some(Self::Suggest),
185            "info" => Some(Self::Info),
186            "warn" => Some(Self::Warn),
187            "error" => Some(Self::Error),
188            "fix" => Some(Self::Fix),
189            _ => None,
190        }
191    }
192
193    /// Canonical lowercase string form, suitable for JSON output.
194    ///
195    /// This is the inverse of [`Severity::parse_config`] and is the stable
196    /// surface that JSON consumers should depend on — never `format!("{:?}")`
197    /// (which exposes Debug formatting as an unintended API).
198    pub const fn as_str(self) -> &'static str {
199        match self {
200            Self::Off => "off",
201            Self::Suggest => "suggest",
202            Self::Info => "info",
203            Self::Warn => "warn",
204            Self::Error => "error",
205            Self::Fix => "fix",
206        }
207    }
208}
209
210impl std::fmt::Display for Severity {
211    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
212        f.write_str(self.as_str())
213    }
214}
215
216// ---------------------------------------------------------------------------
217// RuleContext
218// ---------------------------------------------------------------------------
219
220/// Document position context passed to rules alongside parsed markings.
221///
222/// Phase 3 made `zone` and `position` `Option`-typed: the scanner cannot
223/// reliably determine header/footer/body or document position from raw
224/// text alone, so a rule that reads either field must handle `None`.
225/// They will become populated in a future scanner pass that consumes
226/// document structural metadata (page count, line numbers, header/footer
227/// detection on extracted documents).
228///
229/// `page_context` is populated by the engine for every non-portion
230/// candidate (Banner, CAB) so banner-validation rules can compare the
231/// observed banner against the composite expected from all preceding
232/// portions. The engine resets it at scanner-emitted `MarkingType::PageBreak`
233/// candidates (form-feed `\f` and `\n\n\n+` heuristics) so the context
234/// reflects only the current page.
235#[derive(Debug, Clone)]
236pub struct RuleContext {
237    pub marking_type: MarkingType,
238    /// Document zone (header/footer/body/CAB) when known. `None` in Phase 3
239    /// — the scanner cannot prove header vs footer from raw text.
240    pub zone: Option<Zone>,
241    /// Coarse document position when known. `None` in Phase 3.
242    pub position: Option<DocumentPosition>,
243    /// Accumulated portion data for the current page, reset at every
244    /// scanner-emitted `MarkingType::PageBreak`.
245    pub page_context: Option<std::sync::Arc<marque_ism::PageContext>>,
246    /// Organization-specific corrections map from config `[corrections]`.
247    /// `None` when no corrections are configured.
248    pub corrections: Option<Arc<HashMap<String, String>>>,
249}
250
251// ---------------------------------------------------------------------------
252// FixSource
253// ---------------------------------------------------------------------------
254
255/// Provenance of a fix proposal — where the fix recommendation originated.
256#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
257pub enum FixSource {
258    /// Hand-written Layer 2 CAPCO rule.
259    BuiltinRule,
260    /// User `[corrections]` entry (FR-009).
261    CorrectionsMap,
262    /// Deterministic deprecated-marking conversion (FR-004a).
263    MigrationTable,
264    /// Probabilistic decoder produced this fix from a recognition
265    /// candidate's posterior (Phase D, see
266    /// `docs/plans/2026-04-16-probabilistic-recognition.md`). Paired
267    /// with a non-trivial `features` list in
268    /// [`FixProposal::confidence`] so auditors can reconstruct the
269    /// scoring path.
270    DecoderPosterior,
271    /// Decoder produced this fix via a position-aware short-token
272    /// classification heuristic — a keyboard-proximity table applied
273    /// to the leading classification slot of a portion or banner
274    /// marking when the token is too short for vocab-based fuzzy
275    /// matching (e.g., `(YS//NF) → (TS//NF)`, `(W//NF) → (S//NF)`).
276    /// See issue #133 PR 2.
277    ///
278    /// The heuristic is inherently less certain than a fuzzy-vocab
279    /// match because the inference is "this token is keyboard-
280    /// adjacent to a known classification" rather than "this token
281    /// is edit-distance ≤ 2 from a known canonical token in a
282    /// closed vocabulary." The engine therefore (a) emits the
283    /// diagnostic at [`Severity::Warn`] (the fix-and-warn pattern —
284    /// always visible, non-zero exit code in `--check`), and
285    /// (b) caps [`Confidence::rule`] at `0.80` so `combined ≤ 0.80`
286    /// stays below the default `confidence_threshold` of `0.95`.
287    /// The fix only auto-applies when the user has explicitly
288    /// lowered the threshold to opt into the heuristic's bar.
289    DecoderClassificationHeuristic,
290}
291
292/// Canonical citation string for diagnostics whose authority is the user's
293/// `[corrections]` config entry (C001 and the engine's pre-scanner text-scan
294/// path). C001 is not a CAPCO rule — no CAPCO passage governs user-defined
295/// typo replacements — so the citation is a config pointer rather than a
296/// §/page/line reference. Holding the string in one place prevents silent
297/// drift between the rule-pipeline emission site in `marque-capco` and the
298/// pre-scanner emission site in `marque-engine`; both paths produce the
299/// same audit-record shape.
300pub const CORRECTIONS_MAP_CITATION: &str = "CONFIG:[corrections]";
301
302// ---------------------------------------------------------------------------
303// FixProposal
304// ---------------------------------------------------------------------------
305
306/// A proposed fix for a diagnostic violation.
307///
308/// Pure data — deterministic, timestamp-free, classifier-free, safe to snapshot
309/// in tests. A `FixProposal` is a *suggestion* until `Engine::fix` promotes it
310/// to an `AppliedFix` when `confidence.combined() >= configuration.confidence_threshold`.
311///
312/// # Phase D: Multi-axis confidence
313///
314/// `confidence` is a [`Confidence`] record rather than a scalar. Strict-path
315/// rules construct it via [`Confidence::strict`]; the Phase D decoder
316/// constructs a full record with `recognition`, `runner_up_ratio`, and
317/// feature contributions. The engine threshold gate uses
318/// [`Confidence::combined`] so a 0.95-recognition × 0.9-rule fix that
319/// previously would have been scalar-0.855 still gates the same way.
320#[non_exhaustive]
321#[derive(Debug, Clone)]
322pub struct FixProposal {
323    /// The rule that generated this proposal.
324    pub rule: RuleId,
325    /// Provenance: built-in rule, corrections map, migration table, or
326    /// decoder posterior.
327    pub source: FixSource,
328    /// Byte range in original source to replace.
329    pub span: Span,
330    /// The bytes currently occupying `span`.
331    pub original: Box<str>,
332    /// Replacement text.
333    pub replacement: Box<str>,
334    /// Multi-axis confidence for this fix.
335    pub confidence: Confidence,
336    /// Reference to the CAPCO rule or migration document justifying this fix.
337    pub migration_ref: Option<&'static str>,
338}
339
340impl FixProposal {
341    /// Create a new fix proposal with invariant checks.
342    ///
343    /// # Panics
344    ///
345    /// Panics if `confidence` fails [`Confidence::validate`] — i.e.,
346    /// any individual axis is out of range or `NaN` / non-finite. The
347    /// per-axis check is the load-bearing one: `combined() =
348    /// recognition × rule` can land in `[0.0, 1.0]` for individually-
349    /// invalid axes (e.g., `recognition = 2.0`, `rule = 0.4` ⇒
350    /// `combined = 0.8`), so validating only the product would let an
351    /// invalid record through. The check runs in release builds (not
352    /// just debug) because `NaN` silently fails every threshold
353    /// comparison and `INFINITY` silently bypasses every threshold —
354    /// both are correctness-impacting bugs in release.
355    pub fn new(
356        rule: RuleId,
357        source: FixSource,
358        span: Span,
359        original: impl Into<Box<str>>,
360        replacement: impl Into<Box<str>>,
361        confidence: Confidence,
362        migration_ref: Option<&'static str>,
363    ) -> Self {
364        if let Err(msg) = confidence.validate() {
365            panic!("FixProposal invalid confidence: {msg}");
366        }
367        Self {
368            rule,
369            source,
370            span,
371            original: original.into(),
372            replacement: replacement.into(),
373            confidence,
374            migration_ref,
375        }
376    }
377}
378
379// ---------------------------------------------------------------------------
380// AppliedFix (= Audit Record)
381// ---------------------------------------------------------------------------
382
383/// A promoted `FixProposal` with runtime context.
384///
385/// Constructed **only** by `Engine::fix` at the moment a `FixProposal` meets
386/// the confidence threshold. Never constructed by a rule or suggestion path.
387///
388/// Serves as the audit record: the NDJSON schemas at `contracts/audit-record*.json`
389/// serialize this type.
390///
391/// `classifier_id` is an `Arc<str>` so promoting many fixes from a single
392/// document only clones an atomic refcount, not the underlying string.
393///
394/// # v2 audit fields (`confidence`, `source`)
395///
396/// Phase D promotes the fix's [`Confidence`] and [`FixSource`] to
397/// **top-level** fields on `AppliedFix` so the v2 audit emitter doesn't
398/// need to descend into `.proposal` to find them. They are a snapshot
399/// at promotion time — the engine may (in future phases) adjust them
400/// for region context before promotion, so they can diverge from the
401/// original `proposal.confidence` / `proposal.source`. Today the
402/// engine promotes them unchanged from the proposal.
403///
404/// Both fields are redundant with the `proposal` sub-struct by design:
405/// the v1 schema reads them through `proposal`; the v2 schema reads
406/// the top-level fields. Keeping both paths live makes the v1→v2
407/// transition a pure emitter change rather than a data-model change.
408#[non_exhaustive]
409#[derive(Debug, Clone)]
410pub struct AppliedFix {
411    /// The original proposal that was applied.
412    pub proposal: FixProposal,
413    /// Snapshot of the fix's confidence at promotion time (v2 audit).
414    pub confidence: Confidence,
415    /// Snapshot of the fix's provenance at promotion time (v2 audit).
416    pub source: FixSource,
417    /// Timestamp of application (clock-injected).
418    pub timestamp: SystemTime,
419    /// Classifier identity from runtime config. `None` if not configured.
420    pub classifier_id: Option<Arc<str>>,
421    /// `true` if produced under `--dry-run` (FR-006).
422    pub dry_run: bool,
423    /// Caller-supplied input identifier (file path, "-" for stdin, `None` if N/A).
424    pub input: Option<Arc<str>>,
425}
426
427impl AppliedFix {
428    /// Promote a `FixProposal` to an `AppliedFix` with runtime context.
429    ///
430    /// # Engine-only contract (production code)
431    ///
432    /// This constructor exists in `marque-rules` for type co-location, but
433    /// in **production code** **must only be called from
434    /// `marque-engine::Engine::fix`**. Rule crates and CLI code must never
435    /// construct `AppliedFix` directly — they produce `FixProposal`
436    /// values and let the engine promote them.
437    ///
438    /// The engine snapshots `proposal.confidence` and `proposal.source`
439    /// into the top-level `confidence` / `source` fields at promotion
440    /// time. A future phase may adjust these per region-context before
441    /// snapshotting; Phase 2 copies them unchanged.
442    ///
443    /// # Type-level seal
444    ///
445    /// The `_token: EnginePromotionToken` parameter is the seal: an
446    /// instance can only be obtained via
447    /// [`EnginePromotionToken::__engine_construct`], whose
448    /// engine-only contract mirrors this one. Because
449    /// `EnginePromotionToken`'s sole field is private to
450    /// `marque-rules`, no external crate can brace-construct one — the
451    /// bypass surface collapses to a single named type. A grep for
452    /// `EnginePromotionToken` outside `marque-engine` (or test code
453    /// covered by the carve-out below) flags every Constitution V
454    /// violation in one pass.
455    ///
456    /// The seal is still convention-based at the cross-crate level
457    /// (Rust does not provide a way to scope `pub` to a specific
458    /// downstream crate without `cfg` features that any caller can
459    /// flip), but the convention is now load-bearing at the type
460    /// level: the named token threads the bypass through one
461    /// auditable choke point instead of leaving it as a single
462    /// generically-named function.
463    ///
464    /// # Test-fixture carve-out
465    ///
466    /// Test code MAY call `__engine_promote` directly (and mint a
467    /// token via [`EnginePromotionToken::__engine_construct`]) to
468    /// construct synthetic `AppliedFix` fixtures for unit-testing
469    /// audit-emission machinery (renderers, sentinel checks, NDJSON
470    /// serialization) without spinning up a full `Engine`. The
471    /// carve-out is scoped per Constitution V Principle V:
472    ///
473    /// - Call sites MUST live inside `#[cfg(test)]` modules, `tests/`
474    ///   integration files, or test-utility crates gated as
475    ///   `dev-dependencies`. Production code calling this constructor
476    ///   from `cfg(not(test))` violates the contract.
477    /// - Fabricated `AppliedFix` values MUST NOT be commingled with
478    ///   engine-promoted fixes (spliced into a real audit stream,
479    ///   etc.).
480    /// - The carve-out covers test-fixture *construction* only. CLI
481    ///   helpers, batch tooling, and benchmark drivers that want an
482    ///   `AppliedFix` for non-test purposes are not in scope.
483    ///
484    /// Each test call site SHOULD carry an inline comment naming the
485    /// carve-out so future reviewers don't have to re-derive the
486    /// policy.
487    #[doc(hidden)]
488    pub fn __engine_promote(
489        proposal: FixProposal,
490        timestamp: SystemTime,
491        classifier_id: Option<Arc<str>>,
492        dry_run: bool,
493        input: Option<Arc<str>>,
494        _token: EnginePromotionToken,
495    ) -> Self {
496        let confidence = proposal.confidence.clone();
497        let source = proposal.source;
498        Self {
499            proposal,
500            confidence,
501            source,
502            timestamp,
503            classifier_id,
504            dry_run,
505            input,
506        }
507    }
508}
509
510/// Engine-only proof-of-construction token for [`AppliedFix::__engine_promote`].
511///
512/// `AppliedFix::__engine_promote` accepts an `EnginePromotionToken`; the
513/// only way to obtain one is [`EnginePromotionToken::__engine_construct`].
514/// Because the token's sole field is private to `marque-rules`, no
515/// external crate can brace-construct one, and the constructor is
516/// `#[doc(hidden)]` and named to make the bypass intent obvious at the
517/// call site.
518///
519/// This is the type-level seal for Constitution V Principle V's
520/// engine-only contract on audit-record promotion. See
521/// [`AppliedFix::__engine_promote`] for the binding contract and the
522/// test-fixture carve-out.
523///
524/// # Compile-fail proof of the seal
525///
526/// External crates cannot brace-construct an `EnginePromotionToken`
527/// because the `_seal` field is private to `marque-rules`. Doctests
528/// compile as separate crates against the library's public API, so
529/// the following snippet is rejected by the compiler — which is what
530/// `compile_fail` asserts:
531///
532/// ```compile_fail
533/// // External crates see `EnginePromotionToken` but not `_seal`,
534/// // so brace-construction is rejected. Bypass requires calling
535/// // `EnginePromotionToken::__engine_construct()`, which is the
536/// // single auditable bypass surface.
537/// let _token = marque_rules::EnginePromotionToken { _seal: () };
538/// ```
539#[derive(Debug)]
540pub struct EnginePromotionToken {
541    _seal: (),
542}
543
544impl EnginePromotionToken {
545    /// Mint an [`EnginePromotionToken`].
546    ///
547    /// # Engine-only contract (production code)
548    ///
549    /// Only `marque-engine` may call this in production code. The
550    /// same three-constraint test-fixture carve-out from
551    /// [`AppliedFix::__engine_promote`] applies here verbatim — see
552    /// that constructor's doc comment for the binding definition.
553    /// Outside the engine, calling this from `cfg(not(test))` code
554    /// violates Constitution V Principle V.
555    #[doc(hidden)]
556    #[inline]
557    pub const fn __engine_construct() -> Self {
558        Self { _seal: () }
559    }
560}
561
562// ---------------------------------------------------------------------------
563// Diagnostic
564// ---------------------------------------------------------------------------
565
566/// A single diagnostic emitted by a rule check.
567#[non_exhaustive]
568#[derive(Debug, Clone)]
569pub struct Diagnostic {
570    pub rule: RuleId,
571    pub severity: Severity,
572    /// Byte span in the original source buffer.
573    pub span: Span,
574    /// Human-readable description of the violation.
575    pub message: Box<str>,
576    /// CAPCO section citation, e.g., "CAPCO-2016 §A.6"
577    /// (refers to the CAPCO Register and Manual, 2016).
578    pub citation: &'static str,
579    /// Proposed fix, if the rule can generate one.
580    pub fix: Option<FixProposal>,
581}
582
583impl Diagnostic {
584    /// Construct a new diagnostic.
585    pub fn new(
586        rule: RuleId,
587        severity: Severity,
588        span: Span,
589        message: impl Into<Box<str>>,
590        citation: &'static str,
591        fix: Option<FixProposal>,
592    ) -> Self {
593        Self {
594            rule,
595            severity,
596            span,
597            message: message.into(),
598            citation,
599            fix,
600        }
601    }
602}
603
604// ---------------------------------------------------------------------------
605// Rule trait
606// ---------------------------------------------------------------------------
607
608/// The core trait every rule implementation must satisfy.
609///
610/// Rules are stateless. All configuration (severity overrides, corrections map)
611/// is resolved by the engine before rule invocation and passed via context.
612pub trait Rule: Send + Sync {
613    fn id(&self) -> RuleId;
614    fn name(&self) -> &'static str;
615    /// Default severity — overridable per rule in `.marque.toml`.
616    fn default_severity(&self) -> Severity;
617    fn check(&self, attrs: &IsmAttributes, ctx: &RuleContext) -> Vec<Diagnostic>;
618}
619
620/// A collection of rules provided by a rule crate.
621/// Returned by the rule crate's entry point function.
622pub trait RuleSet: Send + Sync {
623    fn rules(&self) -> &[Box<dyn Rule>];
624    fn schema_version(&self) -> &'static str;
625}
626
627#[cfg(test)]
628#[cfg_attr(coverage_nightly, coverage(off))]
629mod tests {
630    use super::*;
631
632    #[test]
633    fn rule_id_round_trip() {
634        let r = RuleId::new("E001");
635        assert_eq!(r.as_str(), "E001");
636        assert_eq!(r.to_string(), "E001");
637    }
638
639    #[test]
640    fn severity_parse_config_accepts_known_values() {
641        assert_eq!(Severity::parse_config("off"), Some(Severity::Off));
642        assert_eq!(Severity::parse_config("suggest"), Some(Severity::Suggest));
643        assert_eq!(Severity::parse_config("info"), Some(Severity::Info));
644        assert_eq!(Severity::parse_config("warn"), Some(Severity::Warn));
645        assert_eq!(Severity::parse_config("error"), Some(Severity::Error));
646        assert_eq!(Severity::parse_config("fix"), Some(Severity::Fix));
647    }
648
649    #[test]
650    fn severity_parse_config_is_case_sensitive() {
651        assert_eq!(Severity::parse_config("OFF"), None);
652        assert_eq!(Severity::parse_config("Warn"), None);
653    }
654
655    #[test]
656    fn severity_parse_config_rejects_unknown_strings() {
657        assert_eq!(Severity::parse_config("err"), None);
658        assert_eq!(Severity::parse_config("disable"), None);
659        assert_eq!(Severity::parse_config(""), None);
660    }
661
662    #[test]
663    fn severity_display_round_trips() {
664        for s in [
665            Severity::Off,
666            Severity::Suggest,
667            Severity::Info,
668            Severity::Warn,
669            Severity::Error,
670            Severity::Fix,
671        ] {
672            assert_eq!(Severity::parse_config(s.as_str()), Some(s));
673            assert_eq!(s.to_string(), s.as_str());
674        }
675    }
676
677    #[test]
678    fn severity_ord_off_is_lowest() {
679        // Off < Suggest < Info < Warn < Error < Fix — see the doc comment
680        // on Severity for the intentional design rationale.
681        assert!(Severity::Off < Severity::Suggest);
682        assert!(Severity::Suggest < Severity::Info);
683        assert!(Severity::Info < Severity::Warn);
684        assert!(Severity::Warn < Severity::Error);
685        assert!(Severity::Error < Severity::Fix);
686    }
687
688    #[test]
689    fn severity_suggest_round_trips_through_config_string() {
690        // Issue #235 / #186 PR-3: the suggest-don't-fix channel must be
691        // a stable parse target. The config string "suggest" must round
692        // trip through both parse_config and as_str.
693        assert_eq!(Severity::parse_config("suggest"), Some(Severity::Suggest));
694        assert_eq!(Severity::Suggest.as_str(), "suggest");
695        assert_eq!(Severity::Suggest.to_string(), "suggest");
696    }
697
698    #[test]
699    fn severity_suggest_is_strictly_below_info_in_ord() {
700        // The renderer relies on Suggest sorting BELOW Info so that
701        // CI exit-code logic ("Info or none → exit 0") generalizes
702        // to ("Info-or-Suggest or none → exit 0") via the same
703        // strict-less-than comparison.
704        assert!(Severity::Suggest < Severity::Info);
705        assert!(Severity::Off < Severity::Suggest);
706    }
707
708    #[test]
709    fn fix_proposal_new_accepts_boundary_confidence() {
710        let _zero = FixProposal::new(
711            RuleId::new("E001"),
712            FixSource::BuiltinRule,
713            Span::new(0, 0),
714            "x",
715            "y",
716            Confidence::strict(0.0),
717            None,
718        );
719        let _one = FixProposal::new(
720            RuleId::new("E001"),
721            FixSource::BuiltinRule,
722            Span::new(0, 0),
723            "x",
724            "y",
725            Confidence::strict(1.0),
726            None,
727        );
728    }
729
730    #[test]
731    #[should_panic(expected = "Confidence::strict rule confidence")]
732    fn fix_proposal_new_panics_on_negative_confidence() {
733        let _ = FixProposal::new(
734            RuleId::new("E001"),
735            FixSource::BuiltinRule,
736            Span::new(0, 0),
737            "x",
738            "y",
739            Confidence::strict(-0.1),
740            None,
741        );
742    }
743
744    #[test]
745    #[should_panic(expected = "Confidence::strict rule confidence")]
746    fn fix_proposal_new_panics_on_above_one_confidence() {
747        let _ = FixProposal::new(
748            RuleId::new("E001"),
749            FixSource::BuiltinRule,
750            Span::new(0, 0),
751            "x",
752            "y",
753            Confidence::strict(1.5),
754            None,
755        );
756    }
757
758    #[test]
759    #[should_panic(expected = "Confidence::strict rule confidence")]
760    fn fix_proposal_new_panics_on_nan_confidence() {
761        let _ = FixProposal::new(
762            RuleId::new("E001"),
763            FixSource::BuiltinRule,
764            Span::new(0, 0),
765            "x",
766            "y",
767            Confidence::strict(f32::NAN),
768            None,
769        );
770    }
771
772    #[test]
773    fn fix_proposal_new_panics_when_axis_is_nan() {
774        // A directly-constructed Confidence can still have NaN axes
775        // that slip past the strict-path assert. Verify the
776        // FixProposal::new gate catches that case too.
777        let bad = Confidence {
778            recognition: f32::NAN,
779            rule: 1.0,
780            region: None,
781            runner_up_ratio: None,
782            features: Vec::new(),
783        };
784        let caught = std::panic::catch_unwind(|| {
785            FixProposal::new(
786                RuleId::new("E001"),
787                FixSource::BuiltinRule,
788                Span::new(0, 0),
789                "x",
790                "y",
791                bad,
792                None,
793            );
794        });
795        assert!(
796            caught.is_err(),
797            "expected FixProposal::new to panic on NaN recognition axis"
798        );
799    }
800
801    #[test]
802    fn fix_proposal_new_panics_when_axis_out_of_range() {
803        // combined() = recognition × rule can still land in [0, 1]
804        // even when an individual axis is out of range
805        // (e.g. recognition = 2.0, rule = 0.4 ⇒ combined = 0.8).
806        // Validating only the product would let this through; the
807        // per-axis check catches it.
808        let bad = Confidence {
809            recognition: 2.0,
810            rule: 0.4,
811            region: None,
812            runner_up_ratio: None,
813            features: Vec::new(),
814        };
815        // Sanity check: combined() IS in [0, 1] — that's the whole
816        // point of adding per-axis validation.
817        assert!((0.0..=1.0).contains(&bad.combined()));
818        let caught = std::panic::catch_unwind(|| {
819            FixProposal::new(
820                RuleId::new("E001"),
821                FixSource::BuiltinRule,
822                Span::new(0, 0),
823                "x",
824                "y",
825                bad,
826                None,
827            );
828        });
829        assert!(
830            caught.is_err(),
831            "expected FixProposal::new to panic on out-of-range recognition axis"
832        );
833    }
834
835    #[test]
836    fn fix_proposal_new_panics_when_feature_delta_is_nan() {
837        let bad = Confidence {
838            recognition: 0.9,
839            rule: 0.9,
840            region: None,
841            runner_up_ratio: None,
842            features: vec![FeatureContribution {
843                id: FeatureId::EditDistance1,
844                delta: f32::NAN,
845            }],
846        };
847        let caught = std::panic::catch_unwind(|| {
848            FixProposal::new(
849                RuleId::new("E001"),
850                FixSource::BuiltinRule,
851                Span::new(0, 0),
852                "x",
853                "y",
854                bad,
855                None,
856            );
857        });
858        assert!(
859            caught.is_err(),
860            "expected FixProposal::new to panic on NaN feature delta"
861        );
862    }
863
864    #[test]
865    fn fix_proposal_new_accepts_runner_up_ratio_above_one() {
866        // runner_up_ratio can legitimately be > 1.0 — it's a ratio,
867        // not a unit interval. Verify the per-axis validator doesn't
868        // over-constrain it.
869        let ok = Confidence {
870            recognition: 0.9,
871            rule: 0.9,
872            region: None,
873            runner_up_ratio: Some(3.5),
874            features: Vec::new(),
875        };
876        let _ = FixProposal::new(
877            RuleId::new("E001"),
878            FixSource::BuiltinRule,
879            Span::new(0, 0),
880            "x",
881            "y",
882            ok,
883            None,
884        );
885    }
886}
marque_rules/lib.rs

marque_rules/
lib.rs