marque_rules/lib.rs
1// SPDX-FileCopyrightText: 2026 Knitli Inc.
2//
3// SPDX-License-Identifier: LicenseRef-MarqueLicense-1.0
4
5#![forbid(unsafe_code)]
6#![cfg_attr(coverage_nightly, feature(coverage_attribute))]
7
8//! marque-rules — trait definitions for the marque rule system.
9//!
10//! This crate defines the contract every rule crate must satisfy.
11//! It has no rule implementations — those live in `marque-capco` and future crates.
12//! The engine depends only on this crate, enabling rule crates to be swapped.
13//!
14//! # Type split: FixProposal vs AppliedFix
15//!
16//! `FixProposal` is pure data emitted by rules — deterministic, timestamp-free,
17//! classifier-free. `AppliedFix` wraps a proposal with runtime context (timestamp,
18//! classifier id, dry-run flag) and is constructed **only** by `Engine::fix`.
19//! This makes "suggested vs applied" a type-system invariant.
20
21pub mod confidence;
22
23use marque_ism::{IsmAttributes, Span};
24use std::collections::HashMap;
25use std::sync::Arc;
26use std::time::SystemTime;
27
28pub use confidence::{Confidence, FeatureContribution, FeatureId};
29pub use marque_ism::{DocumentPosition, MarkingType, Zone};
30
31// ---------------------------------------------------------------------------
32// RuleId
33// ---------------------------------------------------------------------------
34
35/// Unique rule identifier string (e.g., "E001", "capco/portion-mark-in-banner").
36///
37/// The inner `&'static str` is private; construct via [`RuleId::new`] so that
38/// construction is explicit at every call site.
39#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
40pub struct RuleId(&'static str);
41
42impl RuleId {
43 /// Construct a rule identifier from a static string slice.
44 #[inline]
45 pub const fn new(id: &'static str) -> Self {
46 Self(id)
47 }
48
49 /// Return the rule identifier as a string slice.
50 #[inline]
51 pub const fn as_str(&self) -> &'static str {
52 self.0
53 }
54}
55
56impl std::fmt::Display for RuleId {
57 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
58 f.write_str(self.0)
59 }
60}
61
62// ---------------------------------------------------------------------------
63// Severity
64// ---------------------------------------------------------------------------
65
66/// Rule severity level. Configurable per rule in `.marque.toml`.
67///
68/// # Ordering
69///
70/// The derived `Ord` is `Off < Suggest < Info < Warn < Error < Fix`.
71/// The ordering is exposed for consumers that want to compare
72/// severities (e.g., "is this at least `Error`?") but the config
73/// loader does **not** use it as a merge operator today. `Suggest`
74/// sits between `Off` and `Info` because it is the lightest
75/// firing-but-non-actionable channel — quieter than `Info` (which
76/// has no candidate replacement attached) and louder than `Off`
77/// (which is non-firing entirely).
78///
79/// # Exit-code semantics
80///
81/// `marque check` maps severities to exit codes as follows:
82///
83/// | Severity counts present | Exit code |
84/// |-------------------------------|------------------------|
85/// | `Error` or `Fix` | `1` (`EX_DIAG_ERROR`) |
86/// | `Warn` only | `2` (`EX_DIAG_WARN`) |
87/// | `Info` / `Suggest` only, none | `0` (`EX_OK`) |
88///
89/// `Info` and `Suggest` are the only severities whose diagnostics are
90/// emitted *and* keep the exit code at zero. `Warn` still fails CI
91/// via `EX_DIAG_WARN`. The tonal distinction is advisory: `Warn`
92/// means "this might be wrong"; `Info` means "FYI, probably
93/// intentional but worth surfacing"; `Suggest` means "I have a
94/// candidate replacement but I'm not confident enough to auto-apply
95/// it — eyes on it." Rules like `W034 sci-custom-control-info`
96/// (which reports unpublished SCI control systems — legitimate per
97/// CAPCO but rare) are natural `Info` candidates; rules like `S004
98/// rel-to-trigraph-suggest` (which proposes a higher-prior trigraph
99/// alternative for an ambiguous REL TO entry) emit at `Suggest`.
100///
101/// # `Suggest` channel semantics
102///
103/// `Suggest` is the firing-but-non-applying channel: a diagnostic
104/// emitted at `Suggest` carries a candidate `FixProposal` that the
105/// engine will **never** auto-apply, regardless of `confidence`. The
106/// fix is informational — it tells the user what the rule would
107/// suggest if confidence were higher. Two paths produce
108/// `Suggest`-severity diagnostics:
109///
110/// 1. **Explicit emission**: a rule constructs the diagnostic with
111/// `Severity::Suggest` directly. `S004 rel-to-trigraph-suggest`
112/// is the first such rule.
113/// 2. **Engine rewrite**: any diagnostic whose attached `FixProposal`
114/// has `confidence.combined() < confidence_threshold` is rewritten
115/// to `Severity::Suggest` by the engine in `lint`. This subsumes
116/// the prior silent-drop behavior at threshold-gate time so
117/// below-threshold proposals stay observable.
118///
119/// In both cases, `Engine::fix` filters out `Suggest` diagnostics
120/// from auto-apply by construction. `Suggest` diagnostics with
121/// `fix: None` are also valid (informational suggestion with no
122/// candidate replacement — used by future rules like #206's
123/// REL TO opaque-uncertain reduction, where the rule has signal
124/// to surface but no specific replacement to propose); the
125/// renderer handles the missing-fix case cleanly.
126///
127/// # Merge semantics (current: last-write-wins)
128///
129/// `marque-config` merges layers in strict precedence order — env vars
130/// override `.marque.local.toml` which overrides `.marque.toml`. Whatever
131/// the highest-precedence layer says for a given rule wins, including
132/// downgrades: a local override of `"off"` will suppress a project-config
133/// `"error"`. This is intentional — individual classifiers sometimes need
134/// to silence a rule while iterating, and the audit log still records the
135/// configured severity for every applied fix.
136///
137/// If a future policy requires strictness-only merging (where a lower
138/// layer cannot downgrade a higher layer's severity), change the loader
139/// to `.max()` over `Severity::parse_config` values rather than `extend`.
140/// The derived `Ord` above is already the correct operator for that case.
141#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
142pub enum Severity {
143 /// Rule is disabled entirely. FR-008: severity=off is unrepresentable on emitted diagnostics
144 /// — a rule at `Off` never fires, so no `Diagnostic` is produced.
145 Off,
146 /// Advisory channel — diagnostic carries a candidate fix that
147 /// will **not** auto-apply.
148 ///
149 /// Distinct from `Info` (FYI, no actionable replacement) and
150 /// from `Off` (non-firing). The fix-bearing diagnostic remains
151 /// visible in lint output but the engine excludes it from
152 /// auto-apply regardless of `confidence`. This is the
153 /// suggest-don't-fix channel: rules with low-confidence
154 /// candidate corrections (e.g., `S004 rel-to-trigraph-suggest`)
155 /// can surface "did you mean?" hints without committing to the
156 /// rewrite.
157 ///
158 /// `Suggest` keeps the CLI exit code at `0` (same as `Info`),
159 /// so it is CI-silent.
160 Suggest,
161 /// Emit informational diagnostic; does not block `check`-mode exit
162 /// code. Intended for "audit-visible but probably intentional"
163 /// signals — cases where the marking may be correct but the user
164 /// may want to verify (e.g., unpublished SCI control systems).
165 Info,
166 /// Emit warning; non-error, but still non-zero in `check` mode
167 /// (produces `EX_DIAG_WARN` = 2). Different from `Info` in tone
168 /// *and* exit-code impact: Warn is "this might be wrong" and
169 /// CI-visible; Info is "FYI, probably intentional but worth
170 /// surfacing" and CI-silent (exit 0).
171 Warn,
172 /// Emit error; blocks `--check` exit code.
173 Error,
174 /// Apply fix automatically when `--fix` flag is present.
175 Fix,
176}
177
178impl Severity {
179 /// Parse a severity level from a config string. Returns `None` for
180 /// unrecognized values; the config loader treats `None` as a hard error.
181 pub fn parse_config(s: &str) -> Option<Self> {
182 match s {
183 "off" => Some(Self::Off),
184 "suggest" => Some(Self::Suggest),
185 "info" => Some(Self::Info),
186 "warn" => Some(Self::Warn),
187 "error" => Some(Self::Error),
188 "fix" => Some(Self::Fix),
189 _ => None,
190 }
191 }
192
193 /// Canonical lowercase string form, suitable for JSON output.
194 ///
195 /// This is the inverse of [`Severity::parse_config`] and is the stable
196 /// surface that JSON consumers should depend on — never `format!("{:?}")`
197 /// (which exposes Debug formatting as an unintended API).
198 pub const fn as_str(self) -> &'static str {
199 match self {
200 Self::Off => "off",
201 Self::Suggest => "suggest",
202 Self::Info => "info",
203 Self::Warn => "warn",
204 Self::Error => "error",
205 Self::Fix => "fix",
206 }
207 }
208}
209
210impl std::fmt::Display for Severity {
211 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
212 f.write_str(self.as_str())
213 }
214}
215
216// ---------------------------------------------------------------------------
217// RuleContext
218// ---------------------------------------------------------------------------
219
220/// Document position context passed to rules alongside parsed markings.
221///
222/// Phase 3 made `zone` and `position` `Option`-typed: the scanner cannot
223/// reliably determine header/footer/body or document position from raw
224/// text alone, so a rule that reads either field must handle `None`.
225/// They will become populated in a future scanner pass that consumes
226/// document structural metadata (page count, line numbers, header/footer
227/// detection on extracted documents).
228///
229/// `page_context` is populated by the engine for every non-portion
230/// candidate (Banner, CAB) so banner-validation rules can compare the
231/// observed banner against the composite expected from all preceding
232/// portions. The engine resets it at scanner-emitted `MarkingType::PageBreak`
233/// candidates (form-feed `\f` and `\n\n\n+` heuristics) so the context
234/// reflects only the current page.
235#[derive(Debug, Clone)]
236pub struct RuleContext {
237 pub marking_type: MarkingType,
238 /// Document zone (header/footer/body/CAB) when known. `None` in Phase 3
239 /// — the scanner cannot prove header vs footer from raw text.
240 pub zone: Option<Zone>,
241 /// Coarse document position when known. `None` in Phase 3.
242 pub position: Option<DocumentPosition>,
243 /// Accumulated portion data for the current page, reset at every
244 /// scanner-emitted `MarkingType::PageBreak`.
245 pub page_context: Option<std::sync::Arc<marque_ism::PageContext>>,
246 /// Organization-specific corrections map from config `[corrections]`.
247 /// `None` when no corrections are configured.
248 pub corrections: Option<Arc<HashMap<String, String>>>,
249}
250
251// ---------------------------------------------------------------------------
252// FixSource
253// ---------------------------------------------------------------------------
254
255/// Provenance of a fix proposal — where the fix recommendation originated.
256#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
257pub enum FixSource {
258 /// Hand-written Layer 2 CAPCO rule.
259 BuiltinRule,
260 /// User `[corrections]` entry (FR-009).
261 CorrectionsMap,
262 /// Deterministic deprecated-marking conversion (FR-004a).
263 MigrationTable,
264 /// Probabilistic decoder produced this fix from a recognition
265 /// candidate's posterior (Phase D, see
266 /// `docs/plans/2026-04-16-probabilistic-recognition.md`). Paired
267 /// with a non-trivial `features` list in
268 /// [`FixProposal::confidence`] so auditors can reconstruct the
269 /// scoring path.
270 DecoderPosterior,
271 /// Decoder produced this fix via a position-aware short-token
272 /// classification heuristic — a keyboard-proximity table applied
273 /// to the leading classification slot of a portion or banner
274 /// marking when the token is too short for vocab-based fuzzy
275 /// matching (e.g., `(YS//NF) → (TS//NF)`, `(W//NF) → (S//NF)`).
276 /// See issue #133 PR 2.
277 ///
278 /// The heuristic is inherently less certain than a fuzzy-vocab
279 /// match because the inference is "this token is keyboard-
280 /// adjacent to a known classification" rather than "this token
281 /// is edit-distance ≤ 2 from a known canonical token in a
282 /// closed vocabulary." The engine therefore (a) emits the
283 /// diagnostic at [`Severity::Warn`] (the fix-and-warn pattern —
284 /// always visible, non-zero exit code in `--check`), and
285 /// (b) caps [`Confidence::rule`] at `0.80` so `combined ≤ 0.80`
286 /// stays below the default `confidence_threshold` of `0.95`.
287 /// The fix only auto-applies when the user has explicitly
288 /// lowered the threshold to opt into the heuristic's bar.
289 DecoderClassificationHeuristic,
290}
291
292/// Canonical citation string for diagnostics whose authority is the user's
293/// `[corrections]` config entry (C001 and the engine's pre-scanner text-scan
294/// path). C001 is not a CAPCO rule — no CAPCO passage governs user-defined
295/// typo replacements — so the citation is a config pointer rather than a
296/// §/page/line reference. Holding the string in one place prevents silent
297/// drift between the rule-pipeline emission site in `marque-capco` and the
298/// pre-scanner emission site in `marque-engine`; both paths produce the
299/// same audit-record shape.
300pub const CORRECTIONS_MAP_CITATION: &str = "CONFIG:[corrections]";
301
302// ---------------------------------------------------------------------------
303// FixProposal
304// ---------------------------------------------------------------------------
305
306/// A proposed fix for a diagnostic violation.
307///
308/// Pure data — deterministic, timestamp-free, classifier-free, safe to snapshot
309/// in tests. A `FixProposal` is a *suggestion* until `Engine::fix` promotes it
310/// to an `AppliedFix` when `confidence.combined() >= configuration.confidence_threshold`.
311///
312/// # Phase D: Multi-axis confidence
313///
314/// `confidence` is a [`Confidence`] record rather than a scalar. Strict-path
315/// rules construct it via [`Confidence::strict`]; the Phase D decoder
316/// constructs a full record with `recognition`, `runner_up_ratio`, and
317/// feature contributions. The engine threshold gate uses
318/// [`Confidence::combined`] so a 0.95-recognition × 0.9-rule fix that
319/// previously would have been scalar-0.855 still gates the same way.
320#[non_exhaustive]
321#[derive(Debug, Clone)]
322pub struct FixProposal {
323 /// The rule that generated this proposal.
324 pub rule: RuleId,
325 /// Provenance: built-in rule, corrections map, migration table, or
326 /// decoder posterior.
327 pub source: FixSource,
328 /// Byte range in original source to replace.
329 pub span: Span,
330 /// The bytes currently occupying `span`.
331 pub original: Box<str>,
332 /// Replacement text.
333 pub replacement: Box<str>,
334 /// Multi-axis confidence for this fix.
335 pub confidence: Confidence,
336 /// Reference to the CAPCO rule or migration document justifying this fix.
337 pub migration_ref: Option<&'static str>,
338}
339
340impl FixProposal {
341 /// Create a new fix proposal with invariant checks.
342 ///
343 /// # Panics
344 ///
345 /// Panics if `confidence` fails [`Confidence::validate`] — i.e.,
346 /// any individual axis is out of range or `NaN` / non-finite. The
347 /// per-axis check is the load-bearing one: `combined() =
348 /// recognition × rule` can land in `[0.0, 1.0]` for individually-
349 /// invalid axes (e.g., `recognition = 2.0`, `rule = 0.4` ⇒
350 /// `combined = 0.8`), so validating only the product would let an
351 /// invalid record through. The check runs in release builds (not
352 /// just debug) because `NaN` silently fails every threshold
353 /// comparison and `INFINITY` silently bypasses every threshold —
354 /// both are correctness-impacting bugs in release.
355 pub fn new(
356 rule: RuleId,
357 source: FixSource,
358 span: Span,
359 original: impl Into<Box<str>>,
360 replacement: impl Into<Box<str>>,
361 confidence: Confidence,
362 migration_ref: Option<&'static str>,
363 ) -> Self {
364 if let Err(msg) = confidence.validate() {
365 panic!("FixProposal invalid confidence: {msg}");
366 }
367 Self {
368 rule,
369 source,
370 span,
371 original: original.into(),
372 replacement: replacement.into(),
373 confidence,
374 migration_ref,
375 }
376 }
377}
378
379// ---------------------------------------------------------------------------
380// AppliedFix (= Audit Record)
381// ---------------------------------------------------------------------------
382
383/// A promoted `FixProposal` with runtime context.
384///
385/// Constructed **only** by `Engine::fix` at the moment a `FixProposal` meets
386/// the confidence threshold. Never constructed by a rule or suggestion path.
387///
388/// Serves as the audit record: the NDJSON schemas at `contracts/audit-record*.json`
389/// serialize this type.
390///
391/// `classifier_id` is an `Arc<str>` so promoting many fixes from a single
392/// document only clones an atomic refcount, not the underlying string.
393///
394/// # v2 audit fields (`confidence`, `source`)
395///
396/// Phase D promotes the fix's [`Confidence`] and [`FixSource`] to
397/// **top-level** fields on `AppliedFix` so the v2 audit emitter doesn't
398/// need to descend into `.proposal` to find them. They are a snapshot
399/// at promotion time — the engine may (in future phases) adjust them
400/// for region context before promotion, so they can diverge from the
401/// original `proposal.confidence` / `proposal.source`. Today the
402/// engine promotes them unchanged from the proposal.
403///
404/// Both fields are redundant with the `proposal` sub-struct by design:
405/// the v1 schema reads them through `proposal`; the v2 schema reads
406/// the top-level fields. Keeping both paths live makes the v1→v2
407/// transition a pure emitter change rather than a data-model change.
408#[non_exhaustive]
409#[derive(Debug, Clone)]
410pub struct AppliedFix {
411 /// The original proposal that was applied.
412 pub proposal: FixProposal,
413 /// Snapshot of the fix's confidence at promotion time (v2 audit).
414 pub confidence: Confidence,
415 /// Snapshot of the fix's provenance at promotion time (v2 audit).
416 pub source: FixSource,
417 /// Timestamp of application (clock-injected).
418 pub timestamp: SystemTime,
419 /// Classifier identity from runtime config. `None` if not configured.
420 pub classifier_id: Option<Arc<str>>,
421 /// `true` if produced under `--dry-run` (FR-006).
422 pub dry_run: bool,
423 /// Caller-supplied input identifier (file path, "-" for stdin, `None` if N/A).
424 pub input: Option<Arc<str>>,
425}
426
427impl AppliedFix {
428 /// Promote a `FixProposal` to an `AppliedFix` with runtime context.
429 ///
430 /// # Engine-only contract (production code)
431 ///
432 /// This constructor exists in `marque-rules` for type co-location, but
433 /// in **production code** **must only be called from
434 /// `marque-engine::Engine::fix`**. Rule crates and CLI code must never
435 /// construct `AppliedFix` directly — they produce `FixProposal`
436 /// values and let the engine promote them.
437 ///
438 /// The engine snapshots `proposal.confidence` and `proposal.source`
439 /// into the top-level `confidence` / `source` fields at promotion
440 /// time. A future phase may adjust these per region-context before
441 /// snapshotting; Phase 2 copies them unchanged.
442 ///
443 /// # Type-level seal
444 ///
445 /// The `_token: EnginePromotionToken` parameter is the seal: an
446 /// instance can only be obtained via
447 /// [`EnginePromotionToken::__engine_construct`], whose
448 /// engine-only contract mirrors this one. Because
449 /// `EnginePromotionToken`'s sole field is private to
450 /// `marque-rules`, no external crate can brace-construct one — the
451 /// bypass surface collapses to a single named type. A grep for
452 /// `EnginePromotionToken` outside `marque-engine` (or test code
453 /// covered by the carve-out below) flags every Constitution V
454 /// violation in one pass.
455 ///
456 /// The seal is still convention-based at the cross-crate level
457 /// (Rust does not provide a way to scope `pub` to a specific
458 /// downstream crate without `cfg` features that any caller can
459 /// flip), but the convention is now load-bearing at the type
460 /// level: the named token threads the bypass through one
461 /// auditable choke point instead of leaving it as a single
462 /// generically-named function.
463 ///
464 /// # Test-fixture carve-out
465 ///
466 /// Test code MAY call `__engine_promote` directly (and mint a
467 /// token via [`EnginePromotionToken::__engine_construct`]) to
468 /// construct synthetic `AppliedFix` fixtures for unit-testing
469 /// audit-emission machinery (renderers, sentinel checks, NDJSON
470 /// serialization) without spinning up a full `Engine`. The
471 /// carve-out is scoped per Constitution V Principle V:
472 ///
473 /// - Call sites MUST live inside `#[cfg(test)]` modules, `tests/`
474 /// integration files, or test-utility crates gated as
475 /// `dev-dependencies`. Production code calling this constructor
476 /// from `cfg(not(test))` violates the contract.
477 /// - Fabricated `AppliedFix` values MUST NOT be commingled with
478 /// engine-promoted fixes (spliced into a real audit stream,
479 /// etc.).
480 /// - The carve-out covers test-fixture *construction* only. CLI
481 /// helpers, batch tooling, and benchmark drivers that want an
482 /// `AppliedFix` for non-test purposes are not in scope.
483 ///
484 /// Each test call site SHOULD carry an inline comment naming the
485 /// carve-out so future reviewers don't have to re-derive the
486 /// policy.
487 #[doc(hidden)]
488 pub fn __engine_promote(
489 proposal: FixProposal,
490 timestamp: SystemTime,
491 classifier_id: Option<Arc<str>>,
492 dry_run: bool,
493 input: Option<Arc<str>>,
494 _token: EnginePromotionToken,
495 ) -> Self {
496 let confidence = proposal.confidence.clone();
497 let source = proposal.source;
498 Self {
499 proposal,
500 confidence,
501 source,
502 timestamp,
503 classifier_id,
504 dry_run,
505 input,
506 }
507 }
508}
509
510/// Engine-only proof-of-construction token for [`AppliedFix::__engine_promote`].
511///
512/// `AppliedFix::__engine_promote` accepts an `EnginePromotionToken`; the
513/// only way to obtain one is [`EnginePromotionToken::__engine_construct`].
514/// Because the token's sole field is private to `marque-rules`, no
515/// external crate can brace-construct one, and the constructor is
516/// `#[doc(hidden)]` and named to make the bypass intent obvious at the
517/// call site.
518///
519/// This is the type-level seal for Constitution V Principle V's
520/// engine-only contract on audit-record promotion. See
521/// [`AppliedFix::__engine_promote`] for the binding contract and the
522/// test-fixture carve-out.
523///
524/// # Compile-fail proof of the seal
525///
526/// External crates cannot brace-construct an `EnginePromotionToken`
527/// because the `_seal` field is private to `marque-rules`. Doctests
528/// compile as separate crates against the library's public API, so
529/// the following snippet is rejected by the compiler — which is what
530/// `compile_fail` asserts:
531///
532/// ```compile_fail
533/// // External crates see `EnginePromotionToken` but not `_seal`,
534/// // so brace-construction is rejected. Bypass requires calling
535/// // `EnginePromotionToken::__engine_construct()`, which is the
536/// // single auditable bypass surface.
537/// let _token = marque_rules::EnginePromotionToken { _seal: () };
538/// ```
539#[derive(Debug)]
540pub struct EnginePromotionToken {
541 _seal: (),
542}
543
544impl EnginePromotionToken {
545 /// Mint an [`EnginePromotionToken`].
546 ///
547 /// # Engine-only contract (production code)
548 ///
549 /// Only `marque-engine` may call this in production code. The
550 /// same three-constraint test-fixture carve-out from
551 /// [`AppliedFix::__engine_promote`] applies here verbatim — see
552 /// that constructor's doc comment for the binding definition.
553 /// Outside the engine, calling this from `cfg(not(test))` code
554 /// violates Constitution V Principle V.
555 #[doc(hidden)]
556 #[inline]
557 pub const fn __engine_construct() -> Self {
558 Self { _seal: () }
559 }
560}
561
562// ---------------------------------------------------------------------------
563// Diagnostic
564// ---------------------------------------------------------------------------
565
566/// A single diagnostic emitted by a rule check.
567#[non_exhaustive]
568#[derive(Debug, Clone)]
569pub struct Diagnostic {
570 pub rule: RuleId,
571 pub severity: Severity,
572 /// Byte span in the original source buffer.
573 pub span: Span,
574 /// Human-readable description of the violation.
575 pub message: Box<str>,
576 /// CAPCO section citation, e.g., "CAPCO-2016 §A.6"
577 /// (refers to the CAPCO Register and Manual, 2016).
578 pub citation: &'static str,
579 /// Proposed fix, if the rule can generate one.
580 pub fix: Option<FixProposal>,
581}
582
583impl Diagnostic {
584 /// Construct a new diagnostic.
585 pub fn new(
586 rule: RuleId,
587 severity: Severity,
588 span: Span,
589 message: impl Into<Box<str>>,
590 citation: &'static str,
591 fix: Option<FixProposal>,
592 ) -> Self {
593 Self {
594 rule,
595 severity,
596 span,
597 message: message.into(),
598 citation,
599 fix,
600 }
601 }
602}
603
604// ---------------------------------------------------------------------------
605// Rule trait
606// ---------------------------------------------------------------------------
607
608/// The core trait every rule implementation must satisfy.
609///
610/// Rules are stateless. All configuration (severity overrides, corrections map)
611/// is resolved by the engine before rule invocation and passed via context.
612pub trait Rule: Send + Sync {
613 fn id(&self) -> RuleId;
614 fn name(&self) -> &'static str;
615 /// Default severity — overridable per rule in `.marque.toml`.
616 fn default_severity(&self) -> Severity;
617 fn check(&self, attrs: &IsmAttributes, ctx: &RuleContext) -> Vec<Diagnostic>;
618}
619
620/// A collection of rules provided by a rule crate.
621/// Returned by the rule crate's entry point function.
622pub trait RuleSet: Send + Sync {
623 fn rules(&self) -> &[Box<dyn Rule>];
624 fn schema_version(&self) -> &'static str;
625}
626
627#[cfg(test)]
628#[cfg_attr(coverage_nightly, coverage(off))]
629mod tests {
630 use super::*;
631
632 #[test]
633 fn rule_id_round_trip() {
634 let r = RuleId::new("E001");
635 assert_eq!(r.as_str(), "E001");
636 assert_eq!(r.to_string(), "E001");
637 }
638
639 #[test]
640 fn severity_parse_config_accepts_known_values() {
641 assert_eq!(Severity::parse_config("off"), Some(Severity::Off));
642 assert_eq!(Severity::parse_config("suggest"), Some(Severity::Suggest));
643 assert_eq!(Severity::parse_config("info"), Some(Severity::Info));
644 assert_eq!(Severity::parse_config("warn"), Some(Severity::Warn));
645 assert_eq!(Severity::parse_config("error"), Some(Severity::Error));
646 assert_eq!(Severity::parse_config("fix"), Some(Severity::Fix));
647 }
648
649 #[test]
650 fn severity_parse_config_is_case_sensitive() {
651 assert_eq!(Severity::parse_config("OFF"), None);
652 assert_eq!(Severity::parse_config("Warn"), None);
653 }
654
655 #[test]
656 fn severity_parse_config_rejects_unknown_strings() {
657 assert_eq!(Severity::parse_config("err"), None);
658 assert_eq!(Severity::parse_config("disable"), None);
659 assert_eq!(Severity::parse_config(""), None);
660 }
661
662 #[test]
663 fn severity_display_round_trips() {
664 for s in [
665 Severity::Off,
666 Severity::Suggest,
667 Severity::Info,
668 Severity::Warn,
669 Severity::Error,
670 Severity::Fix,
671 ] {
672 assert_eq!(Severity::parse_config(s.as_str()), Some(s));
673 assert_eq!(s.to_string(), s.as_str());
674 }
675 }
676
677 #[test]
678 fn severity_ord_off_is_lowest() {
679 // Off < Suggest < Info < Warn < Error < Fix — see the doc comment
680 // on Severity for the intentional design rationale.
681 assert!(Severity::Off < Severity::Suggest);
682 assert!(Severity::Suggest < Severity::Info);
683 assert!(Severity::Info < Severity::Warn);
684 assert!(Severity::Warn < Severity::Error);
685 assert!(Severity::Error < Severity::Fix);
686 }
687
688 #[test]
689 fn severity_suggest_round_trips_through_config_string() {
690 // Issue #235 / #186 PR-3: the suggest-don't-fix channel must be
691 // a stable parse target. The config string "suggest" must round
692 // trip through both parse_config and as_str.
693 assert_eq!(Severity::parse_config("suggest"), Some(Severity::Suggest));
694 assert_eq!(Severity::Suggest.as_str(), "suggest");
695 assert_eq!(Severity::Suggest.to_string(), "suggest");
696 }
697
698 #[test]
699 fn severity_suggest_is_strictly_below_info_in_ord() {
700 // The renderer relies on Suggest sorting BELOW Info so that
701 // CI exit-code logic ("Info or none → exit 0") generalizes
702 // to ("Info-or-Suggest or none → exit 0") via the same
703 // strict-less-than comparison.
704 assert!(Severity::Suggest < Severity::Info);
705 assert!(Severity::Off < Severity::Suggest);
706 }
707
708 #[test]
709 fn fix_proposal_new_accepts_boundary_confidence() {
710 let _zero = FixProposal::new(
711 RuleId::new("E001"),
712 FixSource::BuiltinRule,
713 Span::new(0, 0),
714 "x",
715 "y",
716 Confidence::strict(0.0),
717 None,
718 );
719 let _one = FixProposal::new(
720 RuleId::new("E001"),
721 FixSource::BuiltinRule,
722 Span::new(0, 0),
723 "x",
724 "y",
725 Confidence::strict(1.0),
726 None,
727 );
728 }
729
730 #[test]
731 #[should_panic(expected = "Confidence::strict rule confidence")]
732 fn fix_proposal_new_panics_on_negative_confidence() {
733 let _ = FixProposal::new(
734 RuleId::new("E001"),
735 FixSource::BuiltinRule,
736 Span::new(0, 0),
737 "x",
738 "y",
739 Confidence::strict(-0.1),
740 None,
741 );
742 }
743
744 #[test]
745 #[should_panic(expected = "Confidence::strict rule confidence")]
746 fn fix_proposal_new_panics_on_above_one_confidence() {
747 let _ = FixProposal::new(
748 RuleId::new("E001"),
749 FixSource::BuiltinRule,
750 Span::new(0, 0),
751 "x",
752 "y",
753 Confidence::strict(1.5),
754 None,
755 );
756 }
757
758 #[test]
759 #[should_panic(expected = "Confidence::strict rule confidence")]
760 fn fix_proposal_new_panics_on_nan_confidence() {
761 let _ = FixProposal::new(
762 RuleId::new("E001"),
763 FixSource::BuiltinRule,
764 Span::new(0, 0),
765 "x",
766 "y",
767 Confidence::strict(f32::NAN),
768 None,
769 );
770 }
771
772 #[test]
773 fn fix_proposal_new_panics_when_axis_is_nan() {
774 // A directly-constructed Confidence can still have NaN axes
775 // that slip past the strict-path assert. Verify the
776 // FixProposal::new gate catches that case too.
777 let bad = Confidence {
778 recognition: f32::NAN,
779 rule: 1.0,
780 region: None,
781 runner_up_ratio: None,
782 features: Vec::new(),
783 };
784 let caught = std::panic::catch_unwind(|| {
785 FixProposal::new(
786 RuleId::new("E001"),
787 FixSource::BuiltinRule,
788 Span::new(0, 0),
789 "x",
790 "y",
791 bad,
792 None,
793 );
794 });
795 assert!(
796 caught.is_err(),
797 "expected FixProposal::new to panic on NaN recognition axis"
798 );
799 }
800
801 #[test]
802 fn fix_proposal_new_panics_when_axis_out_of_range() {
803 // combined() = recognition × rule can still land in [0, 1]
804 // even when an individual axis is out of range
805 // (e.g. recognition = 2.0, rule = 0.4 ⇒ combined = 0.8).
806 // Validating only the product would let this through; the
807 // per-axis check catches it.
808 let bad = Confidence {
809 recognition: 2.0,
810 rule: 0.4,
811 region: None,
812 runner_up_ratio: None,
813 features: Vec::new(),
814 };
815 // Sanity check: combined() IS in [0, 1] — that's the whole
816 // point of adding per-axis validation.
817 assert!((0.0..=1.0).contains(&bad.combined()));
818 let caught = std::panic::catch_unwind(|| {
819 FixProposal::new(
820 RuleId::new("E001"),
821 FixSource::BuiltinRule,
822 Span::new(0, 0),
823 "x",
824 "y",
825 bad,
826 None,
827 );
828 });
829 assert!(
830 caught.is_err(),
831 "expected FixProposal::new to panic on out-of-range recognition axis"
832 );
833 }
834
835 #[test]
836 fn fix_proposal_new_panics_when_feature_delta_is_nan() {
837 let bad = Confidence {
838 recognition: 0.9,
839 rule: 0.9,
840 region: None,
841 runner_up_ratio: None,
842 features: vec![FeatureContribution {
843 id: FeatureId::EditDistance1,
844 delta: f32::NAN,
845 }],
846 };
847 let caught = std::panic::catch_unwind(|| {
848 FixProposal::new(
849 RuleId::new("E001"),
850 FixSource::BuiltinRule,
851 Span::new(0, 0),
852 "x",
853 "y",
854 bad,
855 None,
856 );
857 });
858 assert!(
859 caught.is_err(),
860 "expected FixProposal::new to panic on NaN feature delta"
861 );
862 }
863
864 #[test]
865 fn fix_proposal_new_accepts_runner_up_ratio_above_one() {
866 // runner_up_ratio can legitimately be > 1.0 — it's a ratio,
867 // not a unit interval. Verify the per-axis validator doesn't
868 // over-constrain it.
869 let ok = Confidence {
870 recognition: 0.9,
871 rule: 0.9,
872 region: None,
873 runner_up_ratio: Some(3.5),
874 features: Vec::new(),
875 };
876 let _ = FixProposal::new(
877 RuleId::new("E001"),
878 FixSource::BuiltinRule,
879 Span::new(0, 0),
880 "x",
881 "y",
882 ok,
883 None,
884 );
885 }
886}