Skip to main content

rumdl_lib/
rule.rs

1//!
2//! This module defines the Rule trait and related types for implementing linting rules in rumdl.
3
4use dyn_clone::DynClone;
5use serde::{Deserialize, Serialize};
6use std::ops::Range;
7use thiserror::Error;
8
9use crate::lint_context::LintContext;
10
11// Macro to implement box_clone for Rule implementors
12#[macro_export]
13macro_rules! impl_rule_clone {
14    ($ty:ty) => {
15        impl $ty {
16            fn box_clone(&self) -> Box<dyn Rule> {
17                Box::new(self.clone())
18            }
19        }
20    };
21}
22
23#[derive(Debug, Error)]
24pub enum LintError {
25    #[error("Invalid input: {0}")]
26    InvalidInput(String),
27    #[error("Fix failed: {0}")]
28    FixFailed(String),
29    #[error("IO error: {0}")]
30    IoError(#[from] std::io::Error),
31    #[error("Parsing error: {0}")]
32    ParsingError(String),
33}
34
35pub type LintResult = Result<Vec<LintWarning>, LintError>;
36
37#[derive(Debug, PartialEq, Clone, Serialize, Deserialize)]
38pub struct LintWarning {
39    pub message: String,
40    pub line: usize,       // 1-indexed start line
41    pub column: usize,     // 1-indexed start column
42    pub end_line: usize,   // 1-indexed end line
43    pub end_column: usize, // 1-indexed end column
44    pub severity: Severity,
45    pub fix: Option<Fix>,
46    pub rule_name: Option<String>,
47}
48
49/// One atomic fix attached to a `LintWarning`.
50///
51/// `range`/`replacement` describe the primary edit. `additional_edits`
52/// carries any *paired* edits that must apply together with the primary one
53/// for the result to be a valid document — for example, MD054's conversion
54/// of an inline link to a reference style produces the in-place link rewrite
55/// **and** a reference-definition append at end-of-file; applying only one
56/// half would leave a dangling reference.
57///
58/// All fix consumers (the LSP code-action layer, CLI counters, the
59/// `apply_warning_fixes` helper) treat the primary edit and the
60/// `additional_edits` as a single unit. The field is empty by default so
61/// rules that only need a single-location fix can keep using
62/// `Fix::new(range, replacement)`; only rules that need multi-location
63/// atomicity populate it via `Fix::with_additional_edits(...)`.
64///
65/// `additional_edits` is intentionally a flat `Vec<Fix>` — nesting beyond
66/// one level isn't needed today and would complicate the apply contract.
67/// Apply order is "primary first, then additional in their declared order"
68/// when offsets are non-overlapping; consumers that batch multiple fixes
69/// across warnings still sort by `range.start` descending so earlier offsets
70/// remain valid as later edits mutate the buffer.
71#[derive(Debug, PartialEq, Clone, Default, Serialize, Deserialize)]
72pub struct Fix {
73    pub range: Range<usize>,
74    pub replacement: String,
75    /// Edits applied atomically with the primary `range`/`replacement` pair.
76    /// Empty for the common single-edit case. See struct docs for semantics.
77    #[serde(default, skip_serializing_if = "Vec::is_empty")]
78    pub additional_edits: Vec<Fix>,
79}
80
81impl Fix {
82    /// Construct a single-edit fix. Use this for the overwhelming common case
83    /// where a fix is one in-place replacement.
84    pub fn new(range: Range<usize>, replacement: String) -> Self {
85        Self {
86            range,
87            replacement,
88            additional_edits: Vec::new(),
89        }
90    }
91
92    /// Construct a multi-edit fix bundle. The primary edit is applied first,
93    /// followed by every entry in `additional_edits` as part of the same
94    /// atomic operation.
95    pub fn with_additional_edits(range: Range<usize>, replacement: String, additional_edits: Vec<Fix>) -> Self {
96        Self {
97            range,
98            replacement,
99            additional_edits,
100        }
101    }
102}
103
104#[derive(Debug, PartialEq, Clone, Copy, Serialize, schemars::JsonSchema)]
105#[serde(rename_all = "lowercase")]
106pub enum Severity {
107    Error,
108    Warning,
109    Info,
110}
111
112impl<'de> serde::Deserialize<'de> for Severity {
113    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
114    where
115        D: serde::Deserializer<'de>,
116    {
117        let s = String::deserialize(deserializer)?;
118        match s.to_lowercase().as_str() {
119            "error" => Ok(Severity::Error),
120            "warning" => Ok(Severity::Warning),
121            "info" => Ok(Severity::Info),
122            _ => Err(serde::de::Error::custom(format!(
123                "Invalid severity: '{s}'. Valid values: error, warning, info"
124            ))),
125        }
126    }
127}
128
129/// Type of rule for selective processing
130#[derive(Debug, Clone, Copy, PartialEq, Eq)]
131pub enum RuleCategory {
132    Heading,
133    List,
134    CodeBlock,
135    Link,
136    Image,
137    Html,
138    Emphasis,
139    Whitespace,
140    Blockquote,
141    Table,
142    FrontMatter,
143    Other,
144}
145
146/// Capability of a rule to fix issues
147#[derive(Debug, Clone, Copy, PartialEq, Eq)]
148pub enum FixCapability {
149    /// Rule can automatically fix all violations it detects
150    FullyFixable,
151    /// Rule can fix some violations based on context
152    ConditionallyFixable,
153    /// Rule cannot fix violations (by design)
154    Unfixable,
155}
156
157/// Declares what cross-file data a rule needs
158///
159/// Most rules only need single-file context and should use `None` (the default).
160/// Rules that need to validate references across files (like MD051) should use `Workspace`.
161#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
162pub enum CrossFileScope {
163    /// Single-file only - no cross-file analysis needed (default for 99% of rules)
164    #[default]
165    None,
166    /// Needs workspace-wide index for cross-file validation
167    Workspace,
168}
169
170/// Remove marker /// TRAIT_MARKER_V1
171pub trait Rule: DynClone + Send + Sync {
172    fn name(&self) -> &'static str;
173    fn description(&self) -> &'static str;
174    fn check(&self, ctx: &LintContext) -> LintResult;
175    fn fix(&self, ctx: &LintContext) -> Result<String, LintError>;
176
177    /// Check if this rule should quickly skip processing based on content
178    fn should_skip(&self, _ctx: &LintContext) -> bool {
179        false
180    }
181
182    /// Get the category of this rule for selective processing
183    fn category(&self) -> RuleCategory {
184        RuleCategory::Other // Default implementation returns Other
185    }
186
187    fn as_any(&self) -> &dyn std::any::Any;
188
189    // DocumentStructure has been merged into LintContext - this method is no longer used
190    // fn as_maybe_document_structure(&self) -> Option<&dyn MaybeDocumentStructure> {
191    //     None
192    // }
193
194    /// Returns the rule name and default config table if the rule has config.
195    /// If a rule implements this, it MUST be defined on the `impl Rule for ...` block,
196    /// not just the inherent impl.
197    fn default_config_section(&self) -> Option<(String, toml::Value)> {
198        None
199    }
200
201    /// Returns config key aliases for this rule
202    /// This allows rules to accept alternative config key names for backwards compatibility
203    fn config_aliases(&self) -> Option<std::collections::HashMap<String, String>> {
204        None
205    }
206
207    /// Returns the list of config keys whose deserializer accepts more than one TOML
208    /// type (e.g. either a scalar or a list). The schema is built from a serialized
209    /// default that can only encode one variant, so the validator would reject the
210    /// alternative form. The registry replaces the schema entry for each listed key
211    /// with a polymorphic sentinel so type checking is skipped while the key name
212    /// is still validated. Keep `default_config_section()` returning clean defaults
213    /// — the sentinel is a schema concern and must not leak into user-facing output
214    /// like `rumdl config --defaults`.
215    fn polymorphic_config_keys(&self) -> &'static [&'static str] {
216        &[]
217    }
218
219    /// Declares the fix capability of this rule
220    fn fix_capability(&self) -> FixCapability {
221        FixCapability::FullyFixable // Safe default for backward compatibility
222    }
223
224    /// Declares cross-file analysis requirements for this rule
225    ///
226    /// Returns `CrossFileScope::None` by default, meaning the rule only needs
227    /// single-file context. Rules that need workspace-wide data should override
228    /// this to return `CrossFileScope::Workspace`.
229    fn cross_file_scope(&self) -> CrossFileScope {
230        CrossFileScope::None
231    }
232
233    /// Contribute data to the workspace index during linting
234    ///
235    /// Called during the single-file linting phase for rules that return
236    /// `CrossFileScope::Workspace`. Rules should extract headings, links,
237    /// and other data needed for cross-file validation.
238    ///
239    /// This is called as a side effect of linting, so LintContext is already
240    /// created - no duplicate parsing required.
241    fn contribute_to_index(&self, _ctx: &LintContext, _file_index: &mut crate::workspace_index::FileIndex) {
242        // Default: no contribution
243    }
244
245    /// Perform cross-file validation after all files have been linted
246    ///
247    /// Called once per file after the entire workspace has been indexed.
248    /// Rules receive the file_index (from contribute_to_index) and the full
249    /// workspace_index for cross-file lookups.
250    ///
251    /// Note: This receives the FileIndex instead of LintContext to avoid re-parsing
252    /// each file. The FileIndex was already populated during contribute_to_index.
253    ///
254    /// Rules can use workspace_index methods for cross-file validation:
255    /// - `get_file(path)` - to look up headings in target files (for MD051)
256    /// - `files()` - to iterate all indexed files
257    ///
258    /// Returns additional warnings for cross-file issues. These are appended
259    /// to the single-file warnings.
260    fn cross_file_check(
261        &self,
262        _file_path: &std::path::Path,
263        _file_index: &crate::workspace_index::FileIndex,
264        _workspace_index: &crate::workspace_index::WorkspaceIndex,
265    ) -> LintResult {
266        Ok(Vec::new()) // Default: no cross-file warnings
267    }
268
269    /// Factory: create a rule from config (if present), or use defaults.
270    fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
271    where
272        Self: Sized,
273    {
274        panic!(
275            "from_config not implemented for rule: {}",
276            std::any::type_name::<Self>()
277        );
278    }
279}
280
281// Implement the cloning logic for the Rule trait object
282dyn_clone::clone_trait_object!(Rule);
283
284/// Extension trait to add downcasting capabilities to Rule
285pub trait RuleExt {
286    fn downcast_ref<T: 'static>(&self) -> Option<&T>;
287}
288
289impl<R: Rule + 'static> RuleExt for Box<R> {
290    fn downcast_ref<T: 'static>(&self) -> Option<&T> {
291        if std::any::TypeId::of::<R>() == std::any::TypeId::of::<T>() {
292            unsafe { Some(&*std::ptr::from_ref(self.as_ref()).cast::<T>()) }
293        } else {
294            None
295        }
296    }
297}
298
299// Inline config parsing functions are in inline_config.rs.
300// Use InlineConfig::from_content() for the full inline configuration system,
301// or inline_config::parse_disable_comment/parse_enable_comment for low-level parsing.
302
303#[cfg(test)]
304mod tests {
305    use super::*;
306
307    #[test]
308    fn test_severity_serialization() {
309        let warning = LintWarning {
310            message: "Test warning".to_string(),
311            line: 1,
312            column: 1,
313            end_line: 1,
314            end_column: 10,
315            severity: Severity::Warning,
316            fix: None,
317            rule_name: Some("MD001".to_string()),
318        };
319
320        let serialized = serde_json::to_string(&warning).unwrap();
321        assert!(serialized.contains("\"severity\":\"warning\""));
322
323        let error = LintWarning {
324            severity: Severity::Error,
325            ..warning
326        };
327
328        let serialized = serde_json::to_string(&error).unwrap();
329        assert!(serialized.contains("\"severity\":\"error\""));
330    }
331
332    #[test]
333    fn test_fix_serialization() {
334        let fix = Fix::new(0..10, "fixed text".to_string());
335
336        let warning = LintWarning {
337            message: "Test warning".to_string(),
338            line: 1,
339            column: 1,
340            end_line: 1,
341            end_column: 10,
342            severity: Severity::Warning,
343            fix: Some(fix),
344            rule_name: Some("MD001".to_string()),
345        };
346
347        let serialized = serde_json::to_string(&warning).unwrap();
348        assert!(serialized.contains("\"fix\""));
349        assert!(serialized.contains("\"replacement\":\"fixed text\""));
350    }
351
352    #[test]
353    fn test_rule_category_equality() {
354        assert_eq!(RuleCategory::Heading, RuleCategory::Heading);
355        assert_ne!(RuleCategory::Heading, RuleCategory::List);
356
357        // Test all categories are distinct
358        let categories = [
359            RuleCategory::Heading,
360            RuleCategory::List,
361            RuleCategory::CodeBlock,
362            RuleCategory::Link,
363            RuleCategory::Image,
364            RuleCategory::Html,
365            RuleCategory::Emphasis,
366            RuleCategory::Whitespace,
367            RuleCategory::Blockquote,
368            RuleCategory::Table,
369            RuleCategory::FrontMatter,
370            RuleCategory::Other,
371        ];
372
373        for (i, cat1) in categories.iter().enumerate() {
374            for (j, cat2) in categories.iter().enumerate() {
375                if i == j {
376                    assert_eq!(cat1, cat2);
377                } else {
378                    assert_ne!(cat1, cat2);
379                }
380            }
381        }
382    }
383
384    #[test]
385    fn test_lint_error_conversions() {
386        use std::io;
387
388        // Test From<io::Error>
389        let io_error = io::Error::new(io::ErrorKind::NotFound, "file not found");
390        let lint_error: LintError = io_error.into();
391        match lint_error {
392            LintError::IoError(_) => {}
393            _ => panic!("Expected IoError variant"),
394        }
395
396        // Test Display trait
397        let invalid_input = LintError::InvalidInput("bad input".to_string());
398        assert_eq!(invalid_input.to_string(), "Invalid input: bad input");
399
400        let fix_failed = LintError::FixFailed("couldn't fix".to_string());
401        assert_eq!(fix_failed.to_string(), "Fix failed: couldn't fix");
402
403        let parsing_error = LintError::ParsingError("parse error".to_string());
404        assert_eq!(parsing_error.to_string(), "Parsing error: parse error");
405    }
406}