rumdl/
lib.rs

1pub mod config;
2pub mod init;
3pub mod lint_context;
4pub mod markdownlint_config;
5pub mod profiling;
6pub mod rule;
7pub mod rules;
8pub mod utils;
9
10#[cfg(feature = "python")]
11pub mod python;
12
13pub use rules::heading_utils::{Heading, HeadingStyle};
14pub use rules::*;
15
16pub use crate::lint_context::LintContext;
17use crate::rule::{LintResult, Rule, RuleCategory};
18use crate::utils::document_structure::DocumentStructure;
19use std::time::Instant;
20
21/// Content characteristics for efficient rule filtering
22#[derive(Debug, Default)]
23struct ContentCharacteristics {
24    has_headings: bool,      // # or setext headings
25    has_lists: bool,         // *, -, +, 1. etc
26    has_links: bool,         // [text](url) or [text][ref]
27    has_code: bool,          // ``` or ~~~ or indented code
28    has_emphasis: bool,      // * or _ for emphasis
29    has_html: bool,          // < > tags
30    has_tables: bool,        // | pipes
31    has_blockquotes: bool,   // > markers
32    has_images: bool,        // ![alt](url)
33    line_count: usize,
34}
35
36impl ContentCharacteristics {
37    fn analyze(content: &str) -> Self {
38        let mut chars = Self::default();
39        chars.line_count = content.lines().count();
40
41        // Quick single-pass analysis
42        let mut has_atx_heading = false;
43        let mut has_setext_heading = false;
44
45        for line in content.lines() {
46            let trimmed = line.trim();
47
48            // Headings: ATX (#) or Setext (underlines)
49            if !has_atx_heading && trimmed.starts_with('#') {
50                has_atx_heading = true;
51            }
52            if !has_setext_heading && (trimmed.chars().all(|c| c == '=' || c == '-') && trimmed.len() > 1) {
53                has_setext_heading = true;
54            }
55
56            // Quick character-based detection (more efficient than regex)
57            if !chars.has_lists && (line.contains("* ") || line.contains("- ") || line.contains("+ ")) {
58                chars.has_lists = true;
59            }
60            if !chars.has_lists && line.chars().next().map_or(false, |c| c.is_ascii_digit()) && line.contains(". ") {
61                chars.has_lists = true;
62            }
63            if !chars.has_links && line.contains('[') {
64                chars.has_links = true;
65            }
66            if !chars.has_images && line.contains("![") {
67                chars.has_images = true;
68            }
69            if !chars.has_code && (line.contains('`') || line.contains("~~~")) {
70                chars.has_code = true;
71            }
72            if !chars.has_emphasis && (line.contains('*') || line.contains('_')) {
73                chars.has_emphasis = true;
74            }
75            if !chars.has_html && line.contains('<') {
76                chars.has_html = true;
77            }
78            if !chars.has_tables && line.contains('|') {
79                chars.has_tables = true;
80            }
81            if !chars.has_blockquotes && line.starts_with('>') {
82                chars.has_blockquotes = true;
83            }
84        }
85
86        chars.has_headings = has_atx_heading || has_setext_heading;
87        chars
88    }
89
90    /// Check if a rule should be skipped based on content characteristics
91    fn should_skip_rule(&self, rule: &dyn Rule) -> bool {
92        match rule.category() {
93            RuleCategory::Heading => !self.has_headings,
94            RuleCategory::List => !self.has_lists,
95            RuleCategory::Link => !self.has_links && !self.has_images,
96            RuleCategory::Image => !self.has_images,
97            RuleCategory::CodeBlock => !self.has_code,
98            RuleCategory::Html => !self.has_html,
99            RuleCategory::Emphasis => !self.has_emphasis,
100            RuleCategory::Blockquote => !self.has_blockquotes,
101            RuleCategory::Table => !self.has_tables,
102            // Always check these categories as they apply to all content
103            RuleCategory::Whitespace | RuleCategory::FrontMatter | RuleCategory::Other => false,
104        }
105    }
106}
107
108/// Lint a file against the given rules with intelligent rule filtering
109/// Assumes the provided `rules` vector contains the final,
110/// configured, and filtered set of rules to be executed.
111pub fn lint(content: &str, rules: &[Box<dyn Rule>], _verbose: bool) -> LintResult {
112    let mut warnings = Vec::new();
113    let _overall_start = Instant::now();
114
115    // Early return for empty content
116    if content.is_empty() {
117        return Ok(warnings);
118    }
119
120    // Analyze content characteristics for rule filtering
121    let characteristics = ContentCharacteristics::analyze(content);
122
123    // Filter rules based on content characteristics
124    let applicable_rules: Vec<_> = rules
125        .iter()
126        .filter(|rule| !characteristics.should_skip_rule(rule.as_ref()))
127        .collect();
128
129    // Calculate skipped rules count before consuming applicable_rules
130    let _total_rules = rules.len();
131    let _applicable_count = applicable_rules.len();
132
133    // Parse DocumentStructure once
134    let structure = DocumentStructure::new(content);
135
136    // Parse AST once for rules that can benefit from it
137    let ast_rules_count = applicable_rules.iter().filter(|rule| rule.uses_ast()).count();
138    let ast = if ast_rules_count > 0 {
139        Some(crate::utils::ast_utils::get_cached_ast(content))
140    } else {
141        None
142    };
143
144    // Parse LintContext once (migration step)
145    let lint_ctx = crate::lint_context::LintContext::new(content);
146
147    for rule in applicable_rules {
148        let _rule_start = Instant::now();
149
150        // Try optimized paths in order of preference
151        let result = if rule.uses_ast() && ast.is_some() {
152            // 1. AST-based path
153            rule.as_maybe_ast()
154                .and_then(|ext| ext.check_with_ast_opt(&lint_ctx, ast.as_ref().unwrap()))
155                .unwrap_or_else(|| rule.check_with_ast(&lint_ctx, ast.as_ref().unwrap()))
156        } else {
157            // 2. Document structure path
158            rule.as_maybe_document_structure()
159                .and_then(|ext| ext.check_with_structure_opt(&lint_ctx, &structure))
160                .unwrap_or_else(|| rule.check(&lint_ctx))
161        };
162
163        match result {
164            Ok(rule_warnings) => {
165                warnings.extend(rule_warnings);
166            }
167            Err(e) => {
168                log::error!("Error checking rule {}: {}", rule.name(), e);
169                return Err(e);
170            }
171        }
172
173        #[cfg(not(test))]
174        if _verbose {
175            let rule_duration = _rule_start.elapsed();
176            if rule_duration.as_millis() > 500 {
177                log::debug!("Rule {} took {:?}", rule.name(), rule_duration);
178            }
179        }
180    }
181
182    #[cfg(not(test))]
183    if _verbose {
184        let skipped_rules = _total_rules - _applicable_count;
185        if skipped_rules > 0 {
186            log::debug!("Skipped {} of {} rules based on content analysis", skipped_rules, _total_rules);
187        }
188        if ast.is_some() {
189            log::debug!("Used shared AST for {} rules", ast_rules_count);
190        }
191    }
192
193    Ok(warnings)
194}
195
196/// Get the profiling report
197pub fn get_profiling_report() -> String {
198    profiling::get_report()
199}
200
201/// Reset the profiling data
202pub fn reset_profiling() {
203    profiling::reset()
204}
205
206/// Get regex cache statistics for performance monitoring
207pub fn get_regex_cache_stats() -> std::collections::HashMap<String, u64> {
208    crate::utils::regex_cache::get_cache_stats()
209}
210
211/// Get AST cache statistics for performance monitoring
212pub fn get_ast_cache_stats() -> std::collections::HashMap<u64, u64> {
213    crate::utils::ast_utils::get_ast_cache_stats()
214}
215
216/// Clear all caches (useful for testing and memory management)
217pub fn clear_all_caches() {
218    crate::utils::ast_utils::clear_ast_cache();
219    // Note: Regex cache is intentionally not cleared as it's global and shared
220}
221
222/// Get comprehensive cache performance report
223pub fn get_cache_performance_report() -> String {
224    let regex_stats = get_regex_cache_stats();
225    let ast_stats = get_ast_cache_stats();
226
227    let mut report = String::new();
228
229    report.push_str("=== Cache Performance Report ===\n\n");
230
231    // Regex cache statistics
232    report.push_str("Regex Cache:\n");
233    if regex_stats.is_empty() {
234        report.push_str("  No regex patterns cached\n");
235    } else {
236        let total_usage: u64 = regex_stats.values().sum();
237        report.push_str(&format!("  Total patterns: {}\n", regex_stats.len()));
238        report.push_str(&format!("  Total usage: {}\n", total_usage));
239
240        // Show top 5 most used patterns
241        let mut sorted_patterns: Vec<_> = regex_stats.iter().collect();
242        sorted_patterns.sort_by(|a, b| b.1.cmp(a.1));
243
244        report.push_str("  Top patterns by usage:\n");
245        for (pattern, count) in sorted_patterns.iter().take(5) {
246            let truncated_pattern = if pattern.len() > 50 {
247                format!("{}...", &pattern[..47])
248            } else {
249                pattern.to_string()
250            };
251            report.push_str(&format!("    {} ({}x): {}\n", count, pattern.len().min(50), truncated_pattern));
252        }
253    }
254
255    report.push_str("\n");
256
257    // AST cache statistics
258    report.push_str("AST Cache:\n");
259    if ast_stats.is_empty() {
260        report.push_str("  No AST nodes cached\n");
261    } else {
262        let total_usage: u64 = ast_stats.values().sum();
263        report.push_str(&format!("  Total ASTs: {}\n", ast_stats.len()));
264        report.push_str(&format!("  Total usage: {}\n", total_usage));
265
266        if total_usage > ast_stats.len() as u64 {
267            let cache_hit_rate = ((total_usage - ast_stats.len() as u64) as f64 / total_usage as f64) * 100.0;
268            report.push_str(&format!("  Cache hit rate: {:.1}%\n", cache_hit_rate));
269        }
270    }
271
272    report
273}