marque_rules/lib.rs
1//! marque-rules — trait definitions for the marque rule system.
2//!
3//! This crate defines the contract every rule crate must satisfy.
4//! It has no rule implementations — those live in `marque-capco` and future crates.
5//! The engine depends only on this crate, enabling rule crates to be swapped.
6//!
7//! # Type split: FixProposal vs AppliedFix
8//!
9//! `FixProposal` is pure data emitted by rules — deterministic, timestamp-free,
10//! classifier-free. `AppliedFix` wraps a proposal with runtime context (timestamp,
11//! classifier id, dry-run flag) and is constructed **only** by `Engine::fix`.
12//! This makes "suggested vs applied" a type-system invariant.
13
14use marque_ism::{IsmAttributes, Span};
15use std::collections::HashMap;
16use std::sync::Arc;
17use std::time::SystemTime;
18
19pub use marque_ism::{DocumentPosition, MarkingType, Zone};
20
21// ---------------------------------------------------------------------------
22// RuleId
23// ---------------------------------------------------------------------------
24
25/// Unique rule identifier string (e.g., "E001", "capco/banner-abbreviation").
26///
27/// The inner `&'static str` is private; construct via [`RuleId::new`] so that
28/// construction is explicit at every call site.
29#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
30pub struct RuleId(&'static str);
31
32impl RuleId {
33 /// Construct a rule identifier from a static string slice.
34 #[inline]
35 pub const fn new(id: &'static str) -> Self {
36 Self(id)
37 }
38
39 /// Return the rule identifier as a string slice.
40 #[inline]
41 pub const fn as_str(&self) -> &'static str {
42 self.0
43 }
44}
45
46impl std::fmt::Display for RuleId {
47 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
48 f.write_str(self.0)
49 }
50}
51
52// ---------------------------------------------------------------------------
53// Severity
54// ---------------------------------------------------------------------------
55
56/// Rule severity level. Configurable per rule in `.marque.toml`.
57///
58/// # Ordering
59///
60/// The derived `Ord` is `Off < Warn < Error < Fix`. The ordering is
61/// exposed for consumers that want to compare severities (e.g.,
62/// "is this at least `Error`?") but the config loader does **not** use it
63/// as a merge operator today.
64///
65/// # Merge semantics (current: last-write-wins)
66///
67/// `marque-config` merges layers in strict precedence order — env vars
68/// override `.marque.local.toml` which overrides `.marque.toml`. Whatever
69/// the highest-precedence layer says for a given rule wins, including
70/// downgrades: a local override of `"off"` will suppress a project-config
71/// `"error"`. This is intentional — individual classifiers sometimes need
72/// to silence a rule while iterating, and the audit log still records the
73/// configured severity for every applied fix.
74///
75/// If a future policy requires strictness-only merging (where a lower
76/// layer cannot downgrade a higher layer's severity), change the loader
77/// to `.max()` over `Severity::parse_config` values rather than `extend`.
78/// The derived `Ord` above is already the correct operator for that case.
79#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
80pub enum Severity {
81 /// Rule is disabled entirely. FR-008: severity=off is unrepresentable on emitted diagnostics
82 /// — a rule at `Off` never fires, so no `Diagnostic` is produced.
83 Off,
84 /// Emit warning; do not block.
85 Warn,
86 /// Emit error; blocks `--check` exit code.
87 Error,
88 /// Apply fix automatically when `--fix` flag is present.
89 Fix,
90}
91
92impl Severity {
93 /// Parse a severity level from a config string. Returns `None` for
94 /// unrecognized values; the config loader treats `None` as a hard error.
95 pub fn parse_config(s: &str) -> Option<Self> {
96 match s {
97 "off" => Some(Self::Off),
98 "warn" => Some(Self::Warn),
99 "error" => Some(Self::Error),
100 "fix" => Some(Self::Fix),
101 _ => None,
102 }
103 }
104
105 /// Canonical lowercase string form, suitable for JSON output.
106 ///
107 /// This is the inverse of [`Severity::parse_config`] and is the stable
108 /// surface that JSON consumers should depend on — never `format!("{:?}")`
109 /// (which exposes Debug formatting as an unintended API).
110 pub const fn as_str(self) -> &'static str {
111 match self {
112 Self::Off => "off",
113 Self::Warn => "warn",
114 Self::Error => "error",
115 Self::Fix => "fix",
116 }
117 }
118}
119
120impl std::fmt::Display for Severity {
121 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
122 f.write_str(self.as_str())
123 }
124}
125
126// ---------------------------------------------------------------------------
127// RuleContext
128// ---------------------------------------------------------------------------
129
130/// Document position context passed to rules alongside parsed markings.
131///
132/// Phase 3 made `zone` and `position` `Option`-typed: the scanner cannot
133/// reliably determine header/footer/body or document position from raw
134/// text alone, so a rule that reads either field must handle `None`.
135/// They will become populated in a future scanner pass that consumes
136/// document structural metadata (page count, line numbers, header/footer
137/// detection on extracted documents).
138///
139/// `page_context` is populated by the engine for every non-portion
140/// candidate (Banner, CAB) so banner-validation rules can compare the
141/// observed banner against the composite expected from all preceding
142/// portions. The engine resets it at scanner-emitted `MarkingType::PageBreak`
143/// candidates (form-feed `\f` and `\n\n\n+` heuristics) so the context
144/// reflects only the current page.
145#[derive(Debug, Clone)]
146pub struct RuleContext {
147 pub marking_type: MarkingType,
148 /// Document zone (header/footer/body/CAB) when known. `None` in Phase 3
149 /// — the scanner cannot prove header vs footer from raw text.
150 pub zone: Option<Zone>,
151 /// Coarse document position when known. `None` in Phase 3.
152 pub position: Option<DocumentPosition>,
153 /// Accumulated portion data for the current page, reset at every
154 /// scanner-emitted `MarkingType::PageBreak`.
155 pub page_context: Option<std::sync::Arc<marque_ism::PageContext>>,
156 /// Organization-specific corrections map from config `[corrections]`.
157 /// `None` when no corrections are configured.
158 pub corrections: Option<Arc<HashMap<String, String>>>,
159}
160
161// ---------------------------------------------------------------------------
162// FixSource
163// ---------------------------------------------------------------------------
164
165/// Provenance of a fix proposal — where the fix recommendation originated.
166#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
167pub enum FixSource {
168 /// Hand-written Layer 2 CAPCO rule.
169 BuiltinRule,
170 /// User `[corrections]` entry (FR-009).
171 CorrectionsMap,
172 /// Deterministic deprecated-marking conversion (FR-004a).
173 MigrationTable,
174}
175
176// ---------------------------------------------------------------------------
177// FixProposal
178// ---------------------------------------------------------------------------
179
180/// A proposed fix for a diagnostic violation.
181///
182/// Pure data — deterministic, timestamp-free, classifier-free, safe to snapshot
183/// in tests. A `FixProposal` is a *suggestion* until `Engine::fix` promotes it
184/// to an `AppliedFix` when `confidence >= configuration.confidence_threshold`.
185#[non_exhaustive]
186#[derive(Debug, Clone)]
187pub struct FixProposal {
188 /// The rule that generated this proposal.
189 pub rule: RuleId,
190 /// Provenance: built-in rule, corrections map, or migration table.
191 pub source: FixSource,
192 /// Byte range in original source to replace.
193 pub span: Span,
194 /// The bytes currently occupying `span`.
195 pub original: Box<str>,
196 /// Replacement text.
197 pub replacement: Box<str>,
198 /// Confidence in this fix (0.0–1.0). Fixes below the configured threshold
199 /// are surfaced as suggestions rather than applied automatically.
200 pub confidence: f32,
201 /// Reference to the CAPCO rule or migration document justifying this fix.
202 pub migration_ref: Option<&'static str>,
203}
204
205impl FixProposal {
206 /// Create a new fix proposal with invariant checks.
207 ///
208 /// # Panics
209 ///
210 /// Panics if `confidence` is outside `[0.0, 1.0]` or is `NaN`. The check
211 /// runs in release builds (not just debug) because `NaN` silently fails
212 /// every threshold comparison and `INFINITY` silently bypasses every
213 /// threshold — both are correctness-impacting bugs in release.
214 pub fn new(
215 rule: RuleId,
216 source: FixSource,
217 span: Span,
218 original: impl Into<Box<str>>,
219 replacement: impl Into<Box<str>>,
220 confidence: f32,
221 migration_ref: Option<&'static str>,
222 ) -> Self {
223 assert!(
224 (0.0..=1.0).contains(&confidence) && !confidence.is_nan(),
225 "FixProposal confidence must be in [0.0, 1.0] and not NaN, got {confidence}"
226 );
227 Self {
228 rule,
229 source,
230 span,
231 original: original.into(),
232 replacement: replacement.into(),
233 confidence,
234 migration_ref,
235 }
236 }
237}
238
239// ---------------------------------------------------------------------------
240// AppliedFix (= Audit Record)
241// ---------------------------------------------------------------------------
242
243/// A promoted `FixProposal` with runtime context.
244///
245/// Constructed **only** by `Engine::fix` at the moment a `FixProposal` meets
246/// the confidence threshold. Never constructed by a rule or suggestion path.
247///
248/// Serves as the audit record: the NDJSON schema at `contracts/audit-record.json`
249/// serializes this type.
250///
251/// `classifier_id` is an `Arc<str>` so promoting many fixes from a single
252/// document only clones an atomic refcount, not the underlying string.
253#[non_exhaustive]
254#[derive(Debug, Clone)]
255pub struct AppliedFix {
256 /// The original proposal that was applied.
257 pub proposal: FixProposal,
258 /// Timestamp of application (clock-injected).
259 pub timestamp: SystemTime,
260 /// Classifier identity from runtime config. `None` if not configured.
261 pub classifier_id: Option<Arc<str>>,
262 /// `true` if produced under `--dry-run` (FR-006).
263 pub dry_run: bool,
264 /// Caller-supplied input identifier (file path, "-" for stdin, `None` if N/A).
265 pub input: Option<Arc<str>>,
266}
267
268impl AppliedFix {
269 /// Promote a `FixProposal` to an `AppliedFix` with runtime context.
270 ///
271 /// # Engine-only contract
272 ///
273 /// This constructor exists in `marque-rules` for type co-location, but
274 /// **must only be called from `marque-engine::Engine::fix`**. Rule crates
275 /// and CLI code must never construct `AppliedFix` directly — they produce
276 /// `FixProposal` values and let the engine promote them.
277 ///
278 /// This is enforced by convention and code review, not by the type system,
279 /// because `AppliedFix` must be defined in `marque-rules` (which the engine
280 /// depends on, not the reverse).
281 #[doc(hidden)]
282 pub fn __engine_promote(
283 proposal: FixProposal,
284 timestamp: SystemTime,
285 classifier_id: Option<Arc<str>>,
286 dry_run: bool,
287 input: Option<Arc<str>>,
288 ) -> Self {
289 Self {
290 proposal,
291 timestamp,
292 classifier_id,
293 dry_run,
294 input,
295 }
296 }
297}
298
299// ---------------------------------------------------------------------------
300// Diagnostic
301// ---------------------------------------------------------------------------
302
303/// A single diagnostic emitted by a rule check.
304#[non_exhaustive]
305#[derive(Debug, Clone)]
306pub struct Diagnostic {
307 pub rule: RuleId,
308 pub severity: Severity,
309 /// Byte span in the original source buffer.
310 pub span: Span,
311 /// Human-readable description of the violation.
312 pub message: Box<str>,
313 /// CAPCO section citation, e.g., "CAPCO-2023-§3.1".
314 pub citation: &'static str,
315 /// Proposed fix, if the rule can generate one.
316 pub fix: Option<FixProposal>,
317}
318
319impl Diagnostic {
320 /// Construct a new diagnostic.
321 pub fn new(
322 rule: RuleId,
323 severity: Severity,
324 span: Span,
325 message: impl Into<Box<str>>,
326 citation: &'static str,
327 fix: Option<FixProposal>,
328 ) -> Self {
329 Self {
330 rule,
331 severity,
332 span,
333 message: message.into(),
334 citation,
335 fix,
336 }
337 }
338}
339
340// ---------------------------------------------------------------------------
341// Rule trait
342// ---------------------------------------------------------------------------
343
344/// The core trait every rule implementation must satisfy.
345///
346/// Rules are stateless. All configuration (severity overrides, corrections map)
347/// is resolved by the engine before rule invocation and passed via context.
348pub trait Rule: Send + Sync {
349 fn id(&self) -> RuleId;
350 fn name(&self) -> &'static str;
351 /// Default severity — overridable per rule in `.marque.toml`.
352 fn default_severity(&self) -> Severity;
353 fn check(&self, attrs: &IsmAttributes, ctx: &RuleContext) -> Vec<Diagnostic>;
354}
355
356/// A collection of rules provided by a rule crate.
357/// Returned by the rule crate's entry point function.
358pub trait RuleSet: Send + Sync {
359 fn rules(&self) -> &[Box<dyn Rule>];
360 fn schema_version(&self) -> &'static str;
361}
362
363#[cfg(test)]
364mod tests {
365 use super::*;
366
367 #[test]
368 fn rule_id_round_trip() {
369 let r = RuleId::new("E001");
370 assert_eq!(r.as_str(), "E001");
371 assert_eq!(r.to_string(), "E001");
372 }
373
374 #[test]
375 fn severity_parse_config_accepts_known_values() {
376 assert_eq!(Severity::parse_config("off"), Some(Severity::Off));
377 assert_eq!(Severity::parse_config("warn"), Some(Severity::Warn));
378 assert_eq!(Severity::parse_config("error"), Some(Severity::Error));
379 assert_eq!(Severity::parse_config("fix"), Some(Severity::Fix));
380 }
381
382 #[test]
383 fn severity_parse_config_is_case_sensitive() {
384 assert_eq!(Severity::parse_config("OFF"), None);
385 assert_eq!(Severity::parse_config("Warn"), None);
386 }
387
388 #[test]
389 fn severity_parse_config_rejects_unknown_strings() {
390 assert_eq!(Severity::parse_config("err"), None);
391 assert_eq!(Severity::parse_config("disable"), None);
392 assert_eq!(Severity::parse_config(""), None);
393 }
394
395 #[test]
396 fn severity_display_round_trips() {
397 for s in [
398 Severity::Off,
399 Severity::Warn,
400 Severity::Error,
401 Severity::Fix,
402 ] {
403 assert_eq!(Severity::parse_config(s.as_str()), Some(s));
404 assert_eq!(s.to_string(), s.as_str());
405 }
406 }
407
408 #[test]
409 fn severity_ord_off_is_lowest() {
410 // Off < Warn < Error < Fix — see the doc comment on Severity for the
411 // intentional design rationale.
412 assert!(Severity::Off < Severity::Warn);
413 assert!(Severity::Warn < Severity::Error);
414 assert!(Severity::Error < Severity::Fix);
415 }
416
417 #[test]
418 fn fix_proposal_new_accepts_boundary_confidence() {
419 let _zero = FixProposal::new(
420 RuleId::new("E001"),
421 FixSource::BuiltinRule,
422 Span::new(0, 0),
423 "x",
424 "y",
425 0.0,
426 None,
427 );
428 let _one = FixProposal::new(
429 RuleId::new("E001"),
430 FixSource::BuiltinRule,
431 Span::new(0, 0),
432 "x",
433 "y",
434 1.0,
435 None,
436 );
437 }
438
439 #[test]
440 #[should_panic(expected = "FixProposal confidence")]
441 fn fix_proposal_new_panics_on_negative_confidence() {
442 let _ = FixProposal::new(
443 RuleId::new("E001"),
444 FixSource::BuiltinRule,
445 Span::new(0, 0),
446 "x",
447 "y",
448 -0.1,
449 None,
450 );
451 }
452
453 #[test]
454 #[should_panic(expected = "FixProposal confidence")]
455 fn fix_proposal_new_panics_on_above_one_confidence() {
456 let _ = FixProposal::new(
457 RuleId::new("E001"),
458 FixSource::BuiltinRule,
459 Span::new(0, 0),
460 "x",
461 "y",
462 1.5,
463 None,
464 );
465 }
466
467 #[test]
468 #[should_panic(expected = "FixProposal confidence")]
469 fn fix_proposal_new_panics_on_nan_confidence() {
470 let _ = FixProposal::new(
471 RuleId::new("E001"),
472 FixSource::BuiltinRule,
473 Span::new(0, 0),
474 "x",
475 "y",
476 f32::NAN,
477 None,
478 );
479 }
480}