quickmark_core/
linter.rs

1use std::{cell::RefCell, collections::HashMap, fmt::Display, path::PathBuf, rc::Rc};
2use tree_sitter::{Node, Parser};
3use tree_sitter_md::LANGUAGE;
4
5use crate::{
6    config::{QuickmarkConfig, RuleSeverity},
7    rules::{Rule, ALL_RULES},
8    tree_sitter_walker::TreeSitterWalker,
9};
10
11#[derive(Debug, Clone)]
12pub struct CharPosition {
13    pub line: usize,
14    pub character: usize,
15}
16
17#[derive(Debug, Clone)]
18pub struct Range {
19    pub start: CharPosition,
20    pub end: CharPosition,
21}
22#[derive(Debug)]
23pub struct Location {
24    pub file_path: PathBuf,
25    pub range: Range,
26}
27
28#[derive(Debug)]
29pub struct RuleViolation {
30    location: Location,
31    message: String,
32    rule: &'static Rule,
33}
34
35impl RuleViolation {
36    pub fn new(rule: &'static Rule, message: String, file_path: PathBuf, range: Range) -> Self {
37        Self {
38            rule,
39            message,
40            location: Location { file_path, range },
41        }
42    }
43
44    pub fn location(&self) -> &Location {
45        &self.location
46    }
47
48    pub fn message(&self) -> &str {
49        &self.message
50    }
51
52    pub fn rule(&self) -> &'static Rule {
53        self.rule
54    }
55}
56
57/// Convert from tree-sitter range to library range
58pub fn range_from_tree_sitter(ts_range: &tree_sitter::Range) -> Range {
59    Range {
60        start: CharPosition {
61            line: ts_range.start_point.row,
62            character: ts_range.start_point.column,
63        },
64        end: CharPosition {
65            line: ts_range.end_point.row,
66            character: ts_range.end_point.column,
67        },
68    }
69}
70
71impl Display for RuleViolation {
72    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
73        write!(
74            f,
75            "{}:{}:{} {}/{} {}",
76            self.location().file_path.to_string_lossy(),
77            self.location().range.start.line,
78            self.location().range.start.character,
79            self.rule().id,
80            self.rule().alias,
81            self.message()
82        )
83    }
84}
85
86/// **SINGLE-USE CONTRACT**: Context instances are designed for one-time use only.
87///
88/// Each Context instance should be used to analyze exactly one source document.
89/// The lazy initialization of caches (lines, node_cache) happens once and the
90/// context becomes immutable after that point.
91///
92#[derive(Debug)]
93pub struct Context {
94    pub file_path: PathBuf,
95    pub config: QuickmarkConfig,
96    /// Raw text lines for line-based rules (MD013, MD010, etc.) - initialized once per document
97    pub lines: RefCell<Vec<String>>,
98    /// Cached AST nodes filtered by type for efficient access - initialized once per document
99    pub node_cache: RefCell<HashMap<String, Vec<NodeInfo>>>,
100    /// Original document content for byte-based access - initialized once per document
101    pub document_content: RefCell<String>,
102}
103
104/// Lightweight node information for caching
105#[derive(Debug, Clone)]
106pub struct NodeInfo {
107    pub line_start: usize,
108    pub line_end: usize,
109    pub kind: String,
110}
111
112impl Context {
113    pub fn new(
114        file_path: PathBuf,
115        config: QuickmarkConfig,
116        source: &str,
117        root_node: &Node,
118    ) -> Self {
119        // Parse lines in a way that's compatible with markdownlint's line counting
120        // markdownlint counts a trailing newline as creating an additional empty line
121        let mut lines: Vec<String> = source.lines().map(String::from).collect();
122
123        // If the source ends with a newline, add an empty line to match markdownlint's behavior
124        if source.ends_with('\n') {
125            lines.push(String::new());
126        }
127        let node_cache = Self::build_node_cache(root_node);
128
129        Self {
130            file_path,
131            config,
132            lines: RefCell::new(lines),
133            node_cache: RefCell::new(node_cache),
134            document_content: RefCell::new(source.to_string()),
135        }
136    }
137
138    /// Get the full document content as a string reference
139    /// Returns a reference to the original document content stored during initialization
140    pub fn get_document_content(&self) -> std::cell::Ref<'_, String> {
141        self.document_content.borrow()
142    }
143
144    /// Build cache of nodes filtered by type for efficient rule access
145    fn build_node_cache(root_node: &Node) -> HashMap<String, Vec<NodeInfo>> {
146        let mut cache = HashMap::new();
147        Self::collect_nodes_recursive(root_node, &mut cache);
148        cache
149    }
150
151    fn collect_nodes_recursive(node: &Node, cache: &mut HashMap<String, Vec<NodeInfo>>) {
152        let kind = node.kind();
153        let kind_string = kind.to_string();
154        let node_info = NodeInfo {
155            line_start: node.start_position().row,
156            line_end: node.end_position().row,
157            kind: kind_string.clone(),
158        };
159
160        // Add to cache for this node type
161        cache
162            .entry(kind_string)
163            .or_default()
164            .push(node_info.clone());
165
166        // Add to cache for pattern-based lookups (e.g., all heading types)
167        if kind.contains("heading") {
168            cache
169                .entry("*heading*".to_string())
170                .or_default()
171                .push(node_info);
172        }
173
174        // Recursively process children
175        for i in 0..node.child_count() {
176            if let Some(child) = node.child(i) {
177                Self::collect_nodes_recursive(&child, cache);
178            }
179        }
180    }
181
182    /// Get cached nodes of specific types - optimized equivalent of filterByTypesCached
183    pub fn get_nodes(&self, node_types: &[&str]) -> Vec<NodeInfo> {
184        let cache = self.node_cache.borrow();
185        let mut result = Vec::new();
186        for node_type in node_types {
187            if let Some(nodes) = cache.get(*node_type) {
188                result.extend(nodes.iter().cloned());
189            }
190        }
191        result
192    }
193
194    /// Get the most specific node type that contains a given line number
195    pub fn get_node_type_for_line(&self, line_number: usize) -> String {
196        let cache = self.node_cache.borrow();
197        // Find the most specific (smallest range) node that contains this line
198        let mut best_match: Option<&NodeInfo> = None;
199        let mut smallest_range = usize::MAX;
200
201        for nodes in cache.values() {
202            for node in nodes {
203                if line_number >= node.line_start && line_number <= node.line_end {
204                    let range_size = node.line_end - node.line_start;
205                    if range_size < smallest_range {
206                        smallest_range = range_size;
207                        best_match = Some(node);
208                    }
209                }
210            }
211        }
212
213        best_match
214            .map(|n| n.kind.clone())
215            .unwrap_or_else(|| "text".to_string())
216    }
217}
218
219/// **SINGLE-USE CONTRACT**: RuleLinter instances are designed for one-time use only.
220///
221/// Each RuleLinter instance should be used to analyze exactly one source document
222/// and then discarded. This eliminates the complexity of state management and cleanup:
223///
224/// - No reset/cleanup methods needed
225/// - No state contamination between different documents
226/// - Simpler, more predictable behavior
227///
228/// After calling `analyze()` on a `MultiRuleLinter`, the entire linter and all its
229/// rule instances become invalid and should not be reused.
230///
231/// ## Usage Pattern
232/// ```rust,no_run
233/// # use quickmark_core::linter::MultiRuleLinter;
234/// # use quickmark_core::config::QuickmarkConfig;
235/// # use std::path::PathBuf;
236/// # let path = PathBuf::new();
237/// # let config: QuickmarkConfig = unimplemented!();
238/// # let source1 = "";
239/// # let source2 = "";
240///
241/// // Correct: Create fresh linter for each document
242/// let mut linter1 = MultiRuleLinter::new_for_document(path.clone(), config.clone(), source1);
243/// let violations1 = linter1.analyze(); // Use once, then discard
244///
245/// // Create new linter for next document
246/// let mut linter2 = MultiRuleLinter::new_for_document(path, config, source2);
247/// let violations2 = linter2.analyze(); // Fresh linter, no contamination
248/// ```
249pub trait RuleLinter {
250    /// Process a single AST node and accumulate state for violation detection.
251    ///
252    /// **CONTRACT**: This method will be called exactly once per AST node
253    /// for a single document analysis session. Rule linters have access to the
254    /// document content and parsed data through their initialized Context.
255    fn feed(&mut self, node: &Node);
256
257    /// Called after all nodes have been processed to return all violations found.
258    ///
259    /// **CONTRACT**: This method will be called exactly once at the end of document analysis.
260    fn finalize(&mut self) -> Vec<RuleViolation>;
261}
262/// **SINGLE-USE CONTRACT**: MultiRuleLinter instances are designed for one-time use only.
263///
264/// Create a fresh MultiRuleLinter for each document you want to analyze using `new_for_document()`.
265/// After calling `analyze()`, the linter and all its rule instances should be discarded.
266pub struct MultiRuleLinter {
267    linters: Vec<Box<dyn RuleLinter>>,
268    tree: Option<tree_sitter::Tree>,
269}
270
271impl MultiRuleLinter {
272    /// **SINGLE-USE API ENFORCEMENT**: Create a MultiRuleLinter bound to a specific document.
273    ///
274    /// This constructor enforces the single-use contract by:
275    /// 1. Taking the document content immediately
276    /// 2. Parsing and initializing the context cache upfront
277    /// 3. Creating rule linters with pre-initialized context
278    /// 4. Making the linter ready for immediate use with `analyze()`
279    ///
280    /// After calling `analyze()`, this linter instance should be discarded.
281    pub fn new_for_document(file_path: PathBuf, config: QuickmarkConfig, document: &str) -> Self {
282        // Early exit optimization: Check if any rules are enabled before expensive operations
283        let active_rules: Vec<_> = ALL_RULES
284            .iter()
285            .filter(|r| {
286                config
287                    .linters
288                    .severity
289                    .get(r.alias)
290                    .map(|severity| *severity != RuleSeverity::Off)
291                    .unwrap_or(false)
292            })
293            .collect();
294
295        // If no rules are active, create minimal linter that does no work
296        if active_rules.is_empty() {
297            return Self {
298                linters: Vec::new(),
299                tree: None,
300            };
301        }
302
303        // Parse the document only when we have active rules
304        let mut parser = Parser::new();
305        parser
306            .set_language(&LANGUAGE.into())
307            .expect("Error loading Markdown grammar");
308        let tree = parser.parse(document, None).expect("Parse failed");
309
310        // Create context with pre-initialized cache only for active rules
311        let context = Rc::new(Context::new(file_path, config, document, &tree.root_node()));
312
313        // Create rule linters for active rules only
314        let linters = active_rules
315            .iter()
316            .map(|r| ((r.new_linter)(context.clone())))
317            .collect();
318
319        Self {
320            linters,
321            tree: Some(tree),
322        }
323    }
324
325    /// Analyze the document that was provided during construction.
326    ///
327    /// **SINGLE-USE CONTRACT**: This method should be called exactly once.
328    /// After calling this method, the linter instance should be discarded.
329    pub fn analyze(&mut self) -> Vec<RuleViolation> {
330        // Early exit optimization: If no linters are active, return immediately
331        if self.linters.is_empty() {
332            return Vec::new();
333        }
334
335        // If we have linters but no tree (shouldn't happen), return empty
336        let tree = match &self.tree {
337            Some(tree) => tree,
338            None => return Vec::new(),
339        };
340
341        let walker = TreeSitterWalker::new(tree);
342
343        // Feed all nodes to all linters
344        walker.walk(|node| {
345            for linter in &mut self.linters {
346                linter.feed(&node);
347            }
348        });
349
350        // Collect all violations from finalize
351        let mut violations = Vec::new();
352        for linter in &mut self.linters {
353            let linter_violations = linter.finalize();
354            violations.extend(linter_violations);
355        }
356
357        violations
358    }
359}
360
361#[cfg(test)]
362mod test {
363    use std::{collections::HashMap, path::PathBuf};
364
365    use crate::{
366        config::{self, QuickmarkConfig, RuleSeverity},
367        rules::{md001::MD001, md003::MD003, md013::MD013},
368    };
369
370    use super::MultiRuleLinter;
371
372    #[test]
373    fn test_multiple_violations() {
374        let severity: HashMap<_, _> = vec![
375            (MD001.alias.to_string(), RuleSeverity::Error),
376            (MD003.alias.to_string(), RuleSeverity::Error),
377            (MD013.alias.to_string(), RuleSeverity::Error),
378        ]
379        .into_iter()
380        .collect();
381
382        let config = QuickmarkConfig {
383            linters: config::LintersTable {
384                severity,
385                settings: config::LintersSettingsTable {
386                    heading_style: config::MD003HeadingStyleTable {
387                        style: config::HeadingStyle::ATX,
388                    },
389                    ..Default::default()
390                },
391            },
392        };
393
394        // This creates a setext h1 after an ATX h1, which should violate:
395        // MD003: mixes ATX and setext styles when ATX is enforced
396        // It's also at the wrong level for MD001 testing, so let's use a different approach
397        let input = "
398# First heading
399Second heading
400==============
401#### Fourth level
402";
403
404        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
405        let violations = linter.analyze();
406        assert_eq!(
407            2,
408            violations.len(),
409            "Should find both MD001 and MD003 violations"
410        );
411        assert_eq!(MD001.id, violations[0].rule().id);
412        assert_eq!(4, violations[0].location().range.start.line);
413        assert_eq!(MD003.id, violations[1].rule().id);
414        assert_eq!(2, violations[1].location().range.start.line);
415    }
416}