Skip to main content

marque_rules/
lib.rs

1//! marque-rules — trait definitions for the marque rule system.
2//!
3//! This crate defines the contract every rule crate must satisfy.
4//! It has no rule implementations — those live in `marque-capco` and future crates.
5//! The engine depends only on this crate, enabling rule crates to be swapped.
6//!
7//! # Type split: FixProposal vs AppliedFix
8//!
9//! `FixProposal` is pure data emitted by rules — deterministic, timestamp-free,
10//! classifier-free. `AppliedFix` wraps a proposal with runtime context (timestamp,
11//! classifier id, dry-run flag) and is constructed **only** by `Engine::fix`.
12//! This makes "suggested vs applied" a type-system invariant.
13
14use marque_ism::{IsmAttributes, Span};
15use std::collections::HashMap;
16use std::sync::Arc;
17use std::time::SystemTime;
18
19pub use marque_ism::{DocumentPosition, MarkingType, Zone};
20
21// ---------------------------------------------------------------------------
22// RuleId
23// ---------------------------------------------------------------------------
24
25/// Unique rule identifier string (e.g., "E001", "capco/banner-abbreviation").
26///
27/// The inner `&'static str` is private; construct via [`RuleId::new`] so that
28/// construction is explicit at every call site.
29#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
30pub struct RuleId(&'static str);
31
32impl RuleId {
33    /// Construct a rule identifier from a static string slice.
34    #[inline]
35    pub const fn new(id: &'static str) -> Self {
36        Self(id)
37    }
38
39    /// Return the rule identifier as a string slice.
40    #[inline]
41    pub const fn as_str(&self) -> &'static str {
42        self.0
43    }
44}
45
46impl std::fmt::Display for RuleId {
47    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
48        f.write_str(self.0)
49    }
50}
51
52// ---------------------------------------------------------------------------
53// Severity
54// ---------------------------------------------------------------------------
55
56/// Rule severity level. Configurable per rule in `.marque.toml`.
57///
58/// # Ordering
59///
60/// The derived `Ord` is `Off < Warn < Error < Fix`. The ordering is
61/// exposed for consumers that want to compare severities (e.g.,
62/// "is this at least `Error`?") but the config loader does **not** use it
63/// as a merge operator today.
64///
65/// # Merge semantics (current: last-write-wins)
66///
67/// `marque-config` merges layers in strict precedence order — env vars
68/// override `.marque.local.toml` which overrides `.marque.toml`. Whatever
69/// the highest-precedence layer says for a given rule wins, including
70/// downgrades: a local override of `"off"` will suppress a project-config
71/// `"error"`. This is intentional — individual classifiers sometimes need
72/// to silence a rule while iterating, and the audit log still records the
73/// configured severity for every applied fix.
74///
75/// If a future policy requires strictness-only merging (where a lower
76/// layer cannot downgrade a higher layer's severity), change the loader
77/// to `.max()` over `Severity::parse_config` values rather than `extend`.
78/// The derived `Ord` above is already the correct operator for that case.
79#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
80pub enum Severity {
81    /// Rule is disabled entirely. FR-008: severity=off is unrepresentable on emitted diagnostics
82    /// — a rule at `Off` never fires, so no `Diagnostic` is produced.
83    Off,
84    /// Emit warning; do not block.
85    Warn,
86    /// Emit error; blocks `--check` exit code.
87    Error,
88    /// Apply fix automatically when `--fix` flag is present.
89    Fix,
90}
91
92impl Severity {
93    /// Parse a severity level from a config string. Returns `None` for
94    /// unrecognized values; the config loader treats `None` as a hard error.
95    pub fn parse_config(s: &str) -> Option<Self> {
96        match s {
97            "off" => Some(Self::Off),
98            "warn" => Some(Self::Warn),
99            "error" => Some(Self::Error),
100            "fix" => Some(Self::Fix),
101            _ => None,
102        }
103    }
104
105    /// Canonical lowercase string form, suitable for JSON output.
106    ///
107    /// This is the inverse of [`Severity::parse_config`] and is the stable
108    /// surface that JSON consumers should depend on — never `format!("{:?}")`
109    /// (which exposes Debug formatting as an unintended API).
110    pub const fn as_str(self) -> &'static str {
111        match self {
112            Self::Off => "off",
113            Self::Warn => "warn",
114            Self::Error => "error",
115            Self::Fix => "fix",
116        }
117    }
118}
119
120impl std::fmt::Display for Severity {
121    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
122        f.write_str(self.as_str())
123    }
124}
125
126// ---------------------------------------------------------------------------
127// RuleContext
128// ---------------------------------------------------------------------------
129
130/// Document position context passed to rules alongside parsed markings.
131///
132/// Phase 3 made `zone` and `position` `Option`-typed: the scanner cannot
133/// reliably determine header/footer/body or document position from raw
134/// text alone, so a rule that reads either field must handle `None`.
135/// They will become populated in a future scanner pass that consumes
136/// document structural metadata (page count, line numbers, header/footer
137/// detection on extracted documents).
138///
139/// `page_context` is populated by the engine for every non-portion
140/// candidate (Banner, CAB) so banner-validation rules can compare the
141/// observed banner against the composite expected from all preceding
142/// portions. The engine resets it at scanner-emitted `MarkingType::PageBreak`
143/// candidates (form-feed `\f` and `\n\n\n+` heuristics) so the context
144/// reflects only the current page.
145#[derive(Debug, Clone)]
146pub struct RuleContext {
147    pub marking_type: MarkingType,
148    /// Document zone (header/footer/body/CAB) when known. `None` in Phase 3
149    /// — the scanner cannot prove header vs footer from raw text.
150    pub zone: Option<Zone>,
151    /// Coarse document position when known. `None` in Phase 3.
152    pub position: Option<DocumentPosition>,
153    /// Accumulated portion data for the current page, reset at every
154    /// scanner-emitted `MarkingType::PageBreak`.
155    pub page_context: Option<std::sync::Arc<marque_ism::PageContext>>,
156    /// Organization-specific corrections map from config `[corrections]`.
157    /// `None` when no corrections are configured.
158    pub corrections: Option<Arc<HashMap<String, String>>>,
159}
160
161// ---------------------------------------------------------------------------
162// FixSource
163// ---------------------------------------------------------------------------
164
165/// Provenance of a fix proposal — where the fix recommendation originated.
166#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
167pub enum FixSource {
168    /// Hand-written Layer 2 CAPCO rule.
169    BuiltinRule,
170    /// User `[corrections]` entry (FR-009).
171    CorrectionsMap,
172    /// Deterministic deprecated-marking conversion (FR-004a).
173    MigrationTable,
174}
175
176// ---------------------------------------------------------------------------
177// FixProposal
178// ---------------------------------------------------------------------------
179
180/// A proposed fix for a diagnostic violation.
181///
182/// Pure data — deterministic, timestamp-free, classifier-free, safe to snapshot
183/// in tests. A `FixProposal` is a *suggestion* until `Engine::fix` promotes it
184/// to an `AppliedFix` when `confidence >= configuration.confidence_threshold`.
185#[non_exhaustive]
186#[derive(Debug, Clone)]
187pub struct FixProposal {
188    /// The rule that generated this proposal.
189    pub rule: RuleId,
190    /// Provenance: built-in rule, corrections map, or migration table.
191    pub source: FixSource,
192    /// Byte range in original source to replace.
193    pub span: Span,
194    /// The bytes currently occupying `span`.
195    pub original: Box<str>,
196    /// Replacement text.
197    pub replacement: Box<str>,
198    /// Confidence in this fix (0.0–1.0). Fixes below the configured threshold
199    /// are surfaced as suggestions rather than applied automatically.
200    pub confidence: f32,
201    /// Reference to the CAPCO rule or migration document justifying this fix.
202    pub migration_ref: Option<&'static str>,
203}
204
205impl FixProposal {
206    /// Create a new fix proposal with invariant checks.
207    ///
208    /// # Panics
209    ///
210    /// Panics if `confidence` is outside `[0.0, 1.0]` or is `NaN`. The check
211    /// runs in release builds (not just debug) because `NaN` silently fails
212    /// every threshold comparison and `INFINITY` silently bypasses every
213    /// threshold — both are correctness-impacting bugs in release.
214    pub fn new(
215        rule: RuleId,
216        source: FixSource,
217        span: Span,
218        original: impl Into<Box<str>>,
219        replacement: impl Into<Box<str>>,
220        confidence: f32,
221        migration_ref: Option<&'static str>,
222    ) -> Self {
223        assert!(
224            (0.0..=1.0).contains(&confidence) && !confidence.is_nan(),
225            "FixProposal confidence must be in [0.0, 1.0] and not NaN, got {confidence}"
226        );
227        Self {
228            rule,
229            source,
230            span,
231            original: original.into(),
232            replacement: replacement.into(),
233            confidence,
234            migration_ref,
235        }
236    }
237}
238
239// ---------------------------------------------------------------------------
240// AppliedFix (= Audit Record)
241// ---------------------------------------------------------------------------
242
243/// A promoted `FixProposal` with runtime context.
244///
245/// Constructed **only** by `Engine::fix` at the moment a `FixProposal` meets
246/// the confidence threshold. Never constructed by a rule or suggestion path.
247///
248/// Serves as the audit record: the NDJSON schema at `contracts/audit-record.json`
249/// serializes this type.
250///
251/// `classifier_id` is an `Arc<str>` so promoting many fixes from a single
252/// document only clones an atomic refcount, not the underlying string.
253#[non_exhaustive]
254#[derive(Debug, Clone)]
255pub struct AppliedFix {
256    /// The original proposal that was applied.
257    pub proposal: FixProposal,
258    /// Timestamp of application (clock-injected).
259    pub timestamp: SystemTime,
260    /// Classifier identity from runtime config. `None` if not configured.
261    pub classifier_id: Option<Arc<str>>,
262    /// `true` if produced under `--dry-run` (FR-006).
263    pub dry_run: bool,
264    /// Caller-supplied input identifier (file path, "-" for stdin, `None` if N/A).
265    pub input: Option<Arc<str>>,
266}
267
268impl AppliedFix {
269    /// Promote a `FixProposal` to an `AppliedFix` with runtime context.
270    ///
271    /// # Engine-only contract
272    ///
273    /// This constructor exists in `marque-rules` for type co-location, but
274    /// **must only be called from `marque-engine::Engine::fix`**. Rule crates
275    /// and CLI code must never construct `AppliedFix` directly — they produce
276    /// `FixProposal` values and let the engine promote them.
277    ///
278    /// This is enforced by convention and code review, not by the type system,
279    /// because `AppliedFix` must be defined in `marque-rules` (which the engine
280    /// depends on, not the reverse).
281    #[doc(hidden)]
282    pub fn __engine_promote(
283        proposal: FixProposal,
284        timestamp: SystemTime,
285        classifier_id: Option<Arc<str>>,
286        dry_run: bool,
287        input: Option<Arc<str>>,
288    ) -> Self {
289        Self {
290            proposal,
291            timestamp,
292            classifier_id,
293            dry_run,
294            input,
295        }
296    }
297}
298
299// ---------------------------------------------------------------------------
300// Diagnostic
301// ---------------------------------------------------------------------------
302
303/// A single diagnostic emitted by a rule check.
304#[non_exhaustive]
305#[derive(Debug, Clone)]
306pub struct Diagnostic {
307    pub rule: RuleId,
308    pub severity: Severity,
309    /// Byte span in the original source buffer.
310    pub span: Span,
311    /// Human-readable description of the violation.
312    pub message: Box<str>,
313    /// CAPCO section citation, e.g., "CAPCO-2023-§3.1".
314    pub citation: &'static str,
315    /// Proposed fix, if the rule can generate one.
316    pub fix: Option<FixProposal>,
317}
318
319impl Diagnostic {
320    /// Construct a new diagnostic.
321    pub fn new(
322        rule: RuleId,
323        severity: Severity,
324        span: Span,
325        message: impl Into<Box<str>>,
326        citation: &'static str,
327        fix: Option<FixProposal>,
328    ) -> Self {
329        Self {
330            rule,
331            severity,
332            span,
333            message: message.into(),
334            citation,
335            fix,
336        }
337    }
338}
339
340// ---------------------------------------------------------------------------
341// Rule trait
342// ---------------------------------------------------------------------------
343
344/// The core trait every rule implementation must satisfy.
345///
346/// Rules are stateless. All configuration (severity overrides, corrections map)
347/// is resolved by the engine before rule invocation and passed via context.
348pub trait Rule: Send + Sync {
349    fn id(&self) -> RuleId;
350    fn name(&self) -> &'static str;
351    /// Default severity — overridable per rule in `.marque.toml`.
352    fn default_severity(&self) -> Severity;
353    fn check(&self, attrs: &IsmAttributes, ctx: &RuleContext) -> Vec<Diagnostic>;
354}
355
356/// A collection of rules provided by a rule crate.
357/// Returned by the rule crate's entry point function.
358pub trait RuleSet: Send + Sync {
359    fn rules(&self) -> &[Box<dyn Rule>];
360    fn schema_version(&self) -> &'static str;
361}
362
363#[cfg(test)]
364mod tests {
365    use super::*;
366
367    #[test]
368    fn rule_id_round_trip() {
369        let r = RuleId::new("E001");
370        assert_eq!(r.as_str(), "E001");
371        assert_eq!(r.to_string(), "E001");
372    }
373
374    #[test]
375    fn severity_parse_config_accepts_known_values() {
376        assert_eq!(Severity::parse_config("off"), Some(Severity::Off));
377        assert_eq!(Severity::parse_config("warn"), Some(Severity::Warn));
378        assert_eq!(Severity::parse_config("error"), Some(Severity::Error));
379        assert_eq!(Severity::parse_config("fix"), Some(Severity::Fix));
380    }
381
382    #[test]
383    fn severity_parse_config_is_case_sensitive() {
384        assert_eq!(Severity::parse_config("OFF"), None);
385        assert_eq!(Severity::parse_config("Warn"), None);
386    }
387
388    #[test]
389    fn severity_parse_config_rejects_unknown_strings() {
390        assert_eq!(Severity::parse_config("err"), None);
391        assert_eq!(Severity::parse_config("disable"), None);
392        assert_eq!(Severity::parse_config(""), None);
393    }
394
395    #[test]
396    fn severity_display_round_trips() {
397        for s in [
398            Severity::Off,
399            Severity::Warn,
400            Severity::Error,
401            Severity::Fix,
402        ] {
403            assert_eq!(Severity::parse_config(s.as_str()), Some(s));
404            assert_eq!(s.to_string(), s.as_str());
405        }
406    }
407
408    #[test]
409    fn severity_ord_off_is_lowest() {
410        // Off < Warn < Error < Fix — see the doc comment on Severity for the
411        // intentional design rationale.
412        assert!(Severity::Off < Severity::Warn);
413        assert!(Severity::Warn < Severity::Error);
414        assert!(Severity::Error < Severity::Fix);
415    }
416
417    #[test]
418    fn fix_proposal_new_accepts_boundary_confidence() {
419        let _zero = FixProposal::new(
420            RuleId::new("E001"),
421            FixSource::BuiltinRule,
422            Span::new(0, 0),
423            "x",
424            "y",
425            0.0,
426            None,
427        );
428        let _one = FixProposal::new(
429            RuleId::new("E001"),
430            FixSource::BuiltinRule,
431            Span::new(0, 0),
432            "x",
433            "y",
434            1.0,
435            None,
436        );
437    }
438
439    #[test]
440    #[should_panic(expected = "FixProposal confidence")]
441    fn fix_proposal_new_panics_on_negative_confidence() {
442        let _ = FixProposal::new(
443            RuleId::new("E001"),
444            FixSource::BuiltinRule,
445            Span::new(0, 0),
446            "x",
447            "y",
448            -0.1,
449            None,
450        );
451    }
452
453    #[test]
454    #[should_panic(expected = "FixProposal confidence")]
455    fn fix_proposal_new_panics_on_above_one_confidence() {
456        let _ = FixProposal::new(
457            RuleId::new("E001"),
458            FixSource::BuiltinRule,
459            Span::new(0, 0),
460            "x",
461            "y",
462            1.5,
463            None,
464        );
465    }
466
467    #[test]
468    #[should_panic(expected = "FixProposal confidence")]
469    fn fix_proposal_new_panics_on_nan_confidence() {
470        let _ = FixProposal::new(
471            RuleId::new("E001"),
472            FixSource::BuiltinRule,
473            Span::new(0, 0),
474            "x",
475            "y",
476            f32::NAN,
477            None,
478        );
479    }
480}