rumdl_lib/
lib.rs

1pub mod config;
2pub mod exit_codes;
3pub mod filtered_lines;
4pub mod fix_coordinator;
5pub mod inline_config;
6pub mod lint_context;
7pub mod lsp;
8pub mod markdownlint_config;
9pub mod output;
10pub mod parallel;
11pub mod performance;
12pub mod profiling;
13pub mod rule;
14pub mod vscode;
15#[macro_use]
16pub mod rule_config;
17#[macro_use]
18pub mod rule_config_serde;
19pub mod rules;
20pub mod utils;
21
22pub use rules::heading_utils::{Heading, HeadingStyle};
23pub use rules::*;
24
25pub use crate::lint_context::{LineInfo, LintContext, ListItemInfo};
26use crate::rule::{LintResult, Rule, RuleCategory};
27use std::time::Instant;
28
29/// Content characteristics for efficient rule filtering
30#[derive(Debug, Default)]
31struct ContentCharacteristics {
32    has_headings: bool,    // # or setext headings
33    has_lists: bool,       // *, -, +, 1. etc
34    has_links: bool,       // [text](url) or [text][ref]
35    has_code: bool,        // ``` or ~~~ or indented code
36    has_emphasis: bool,    // * or _ for emphasis
37    has_html: bool,        // < > tags
38    has_tables: bool,      // | pipes
39    has_blockquotes: bool, // > markers
40    has_images: bool,      // ![alt](url)
41}
42
43impl ContentCharacteristics {
44    fn analyze(content: &str) -> Self {
45        let mut chars = Self { ..Default::default() };
46
47        // Quick single-pass analysis
48        let mut has_atx_heading = false;
49        let mut has_setext_heading = false;
50
51        for line in content.lines() {
52            let trimmed = line.trim();
53
54            // Headings: ATX (#) or Setext (underlines)
55            if !has_atx_heading && trimmed.starts_with('#') {
56                has_atx_heading = true;
57            }
58            if !has_setext_heading && (trimmed.chars().all(|c| c == '=' || c == '-') && trimmed.len() > 1) {
59                has_setext_heading = true;
60            }
61
62            // Quick character-based detection (more efficient than regex)
63            if !chars.has_lists && (line.contains("* ") || line.contains("- ") || line.contains("+ ")) {
64                chars.has_lists = true;
65            }
66            if !chars.has_lists && line.chars().next().is_some_and(|c| c.is_ascii_digit()) && line.contains(". ") {
67                chars.has_lists = true;
68            }
69            if !chars.has_links
70                && (line.contains('[')
71                    || line.contains("http://")
72                    || line.contains("https://")
73                    || line.contains("ftp://"))
74            {
75                chars.has_links = true;
76            }
77            if !chars.has_images && line.contains("![") {
78                chars.has_images = true;
79            }
80            if !chars.has_code && (line.contains('`') || line.contains("~~~")) {
81                chars.has_code = true;
82            }
83            if !chars.has_emphasis && (line.contains('*') || line.contains('_')) {
84                chars.has_emphasis = true;
85            }
86            if !chars.has_html && line.contains('<') {
87                chars.has_html = true;
88            }
89            if !chars.has_tables && line.contains('|') {
90                chars.has_tables = true;
91            }
92            if !chars.has_blockquotes && line.starts_with('>') {
93                chars.has_blockquotes = true;
94            }
95        }
96
97        chars.has_headings = has_atx_heading || has_setext_heading;
98        chars
99    }
100
101    /// Check if a rule should be skipped based on content characteristics
102    fn should_skip_rule(&self, rule: &dyn Rule) -> bool {
103        match rule.category() {
104            RuleCategory::Heading => !self.has_headings,
105            RuleCategory::List => !self.has_lists,
106            RuleCategory::Link => !self.has_links && !self.has_images,
107            RuleCategory::Image => !self.has_images,
108            RuleCategory::CodeBlock => !self.has_code,
109            RuleCategory::Html => !self.has_html,
110            RuleCategory::Emphasis => !self.has_emphasis,
111            RuleCategory::Blockquote => !self.has_blockquotes,
112            RuleCategory::Table => !self.has_tables,
113            // Always check these categories as they apply to all content
114            RuleCategory::Whitespace | RuleCategory::FrontMatter | RuleCategory::Other => false,
115        }
116    }
117}
118
119/// Lint a file against the given rules with intelligent rule filtering
120/// Assumes the provided `rules` vector contains the final,
121/// configured, and filtered set of rules to be executed.
122pub fn lint(
123    content: &str,
124    rules: &[Box<dyn Rule>],
125    _verbose: bool,
126    flavor: crate::config::MarkdownFlavor,
127) -> LintResult {
128    let mut warnings = Vec::new();
129    let _overall_start = Instant::now();
130
131    // Early return for empty content
132    if content.is_empty() {
133        return Ok(warnings);
134    }
135
136    // Parse inline configuration comments once
137    let inline_config = crate::inline_config::InlineConfig::from_content(content);
138
139    // Analyze content characteristics for rule filtering
140    let characteristics = ContentCharacteristics::analyze(content);
141
142    // Filter rules based on content characteristics
143    let applicable_rules: Vec<_> = rules
144        .iter()
145        .filter(|rule| !characteristics.should_skip_rule(rule.as_ref()))
146        .collect();
147
148    // Calculate skipped rules count before consuming applicable_rules
149    let _total_rules = rules.len();
150    let _applicable_count = applicable_rules.len();
151
152    // Parse AST once for rules that can benefit from it
153    let ast_rules_count = applicable_rules.iter().filter(|rule| rule.uses_ast()).count();
154    let ast = if ast_rules_count > 0 {
155        Some(crate::utils::ast_utils::get_cached_ast(content))
156    } else {
157        None
158    };
159
160    // Parse LintContext once (migration step) with the provided flavor
161    let lint_ctx = crate::lint_context::LintContext::new(content, flavor);
162
163    for rule in applicable_rules {
164        let _rule_start = Instant::now();
165
166        // Try optimized paths in order of preference
167        let result = if rule.uses_ast() {
168            if let Some(ref ast_ref) = ast {
169                // 1. AST-based path
170                rule.as_maybe_ast()
171                    .and_then(|ext| ext.check_with_ast_opt(&lint_ctx, ast_ref))
172                    .unwrap_or_else(|| rule.check_with_ast(&lint_ctx, ast_ref))
173            } else {
174                // Fallback to regular check if no AST
175                rule.check(&lint_ctx)
176            }
177        } else {
178            // 2. Regular check path
179            rule.check(&lint_ctx)
180        };
181
182        match result {
183            Ok(rule_warnings) => {
184                // Filter out warnings for rules disabled via inline comments
185                let filtered_warnings: Vec<_> = rule_warnings
186                    .into_iter()
187                    .filter(|warning| {
188                        // Use the warning's rule_name if available, otherwise use the rule's name
189                        let rule_name_to_check = warning.rule_name.as_deref().unwrap_or(rule.name());
190
191                        // Extract the base rule name for sub-rules like "MD029-style" -> "MD029"
192                        let base_rule_name = if let Some(dash_pos) = rule_name_to_check.find('-') {
193                            &rule_name_to_check[..dash_pos]
194                        } else {
195                            rule_name_to_check
196                        };
197
198                        !inline_config.is_rule_disabled(
199                            base_rule_name,
200                            warning.line, // Already 1-indexed
201                        )
202                    })
203                    .collect();
204                warnings.extend(filtered_warnings);
205            }
206            Err(e) => {
207                log::error!("Error checking rule {}: {}", rule.name(), e);
208                return Err(e);
209            }
210        }
211
212        #[cfg(not(test))]
213        if _verbose {
214            let rule_duration = _rule_start.elapsed();
215            if rule_duration.as_millis() > 500 {
216                log::debug!("Rule {} took {:?}", rule.name(), rule_duration);
217            }
218        }
219    }
220
221    #[cfg(not(test))]
222    if _verbose {
223        let skipped_rules = _total_rules - _applicable_count;
224        if skipped_rules > 0 {
225            log::debug!("Skipped {skipped_rules} of {_total_rules} rules based on content analysis");
226        }
227        if ast.is_some() {
228            log::debug!("Used shared AST for {ast_rules_count} rules");
229        }
230    }
231
232    Ok(warnings)
233}
234
235/// Get the profiling report
236pub fn get_profiling_report() -> String {
237    profiling::get_report()
238}
239
240/// Reset the profiling data
241pub fn reset_profiling() {
242    profiling::reset()
243}
244
245/// Get regex cache statistics for performance monitoring
246pub fn get_regex_cache_stats() -> std::collections::HashMap<String, u64> {
247    crate::utils::regex_cache::get_cache_stats()
248}
249
250/// Get AST cache statistics for performance monitoring
251pub fn get_ast_cache_stats() -> std::collections::HashMap<u64, u64> {
252    crate::utils::ast_utils::get_ast_cache_stats()
253}
254
255/// Clear all caches (useful for testing and memory management)
256pub fn clear_all_caches() {
257    crate::utils::ast_utils::clear_ast_cache();
258    // Note: Regex cache is intentionally not cleared as it's global and shared
259}
260
261/// Get comprehensive cache performance report
262pub fn get_cache_performance_report() -> String {
263    let regex_stats = get_regex_cache_stats();
264    let ast_stats = get_ast_cache_stats();
265
266    let mut report = String::new();
267
268    report.push_str("=== Cache Performance Report ===\n\n");
269
270    // Regex cache statistics
271    report.push_str("Regex Cache:\n");
272    if regex_stats.is_empty() {
273        report.push_str("  No regex patterns cached\n");
274    } else {
275        let total_usage: u64 = regex_stats.values().sum();
276        report.push_str(&format!("  Total patterns: {}\n", regex_stats.len()));
277        report.push_str(&format!("  Total usage: {total_usage}\n"));
278
279        // Show top 5 most used patterns
280        let mut sorted_patterns: Vec<_> = regex_stats.iter().collect();
281        sorted_patterns.sort_by(|a, b| b.1.cmp(a.1));
282
283        report.push_str("  Top patterns by usage:\n");
284        for (pattern, count) in sorted_patterns.iter().take(5) {
285            let truncated_pattern = if pattern.len() > 50 {
286                format!("{}...", &pattern[..47])
287            } else {
288                pattern.to_string()
289            };
290            report.push_str(&format!(
291                "    {} ({}x): {}\n",
292                count,
293                pattern.len().min(50),
294                truncated_pattern
295            ));
296        }
297    }
298
299    report.push('\n');
300
301    // AST cache statistics
302    report.push_str("AST Cache:\n");
303    if ast_stats.is_empty() {
304        report.push_str("  No ASTs cached\n");
305    } else {
306        let total_usage: u64 = ast_stats.values().sum();
307        report.push_str(&format!("  Total ASTs: {}\n", ast_stats.len()));
308        report.push_str(&format!("  Total usage: {total_usage}\n"));
309
310        if total_usage > ast_stats.len() as u64 {
311            let cache_hit_rate = ((total_usage - ast_stats.len() as u64) as f64 / total_usage as f64) * 100.0;
312            report.push_str(&format!("  Cache hit rate: {cache_hit_rate:.1}%\n"));
313        }
314    }
315
316    report
317}
318
319#[cfg(test)]
320mod tests {
321    use super::*;
322    use crate::rule::Rule;
323    use crate::rules::{MD001HeadingIncrement, MD009TrailingSpaces, MD012NoMultipleBlanks};
324
325    #[test]
326    fn test_content_characteristics_analyze() {
327        // Test empty content
328        let chars = ContentCharacteristics::analyze("");
329        assert!(!chars.has_headings);
330        assert!(!chars.has_lists);
331        assert!(!chars.has_links);
332        assert!(!chars.has_code);
333        assert!(!chars.has_emphasis);
334        assert!(!chars.has_html);
335        assert!(!chars.has_tables);
336        assert!(!chars.has_blockquotes);
337        assert!(!chars.has_images);
338
339        // Test content with headings
340        let chars = ContentCharacteristics::analyze("# Heading");
341        assert!(chars.has_headings);
342
343        // Test setext headings
344        let chars = ContentCharacteristics::analyze("Heading\n=======");
345        assert!(chars.has_headings);
346
347        // Test lists
348        let chars = ContentCharacteristics::analyze("* Item\n- Item 2\n+ Item 3");
349        assert!(chars.has_lists);
350
351        // Test ordered lists
352        let chars = ContentCharacteristics::analyze("1. First\n2. Second");
353        assert!(chars.has_lists);
354
355        // Test links
356        let chars = ContentCharacteristics::analyze("[link](url)");
357        assert!(chars.has_links);
358
359        // Test URLs
360        let chars = ContentCharacteristics::analyze("Visit https://example.com");
361        assert!(chars.has_links);
362
363        // Test images
364        let chars = ContentCharacteristics::analyze("![alt text](image.png)");
365        assert!(chars.has_images);
366
367        // Test code
368        let chars = ContentCharacteristics::analyze("`inline code`");
369        assert!(chars.has_code);
370
371        let chars = ContentCharacteristics::analyze("~~~\ncode block\n~~~");
372        assert!(chars.has_code);
373
374        // Test emphasis
375        let chars = ContentCharacteristics::analyze("*emphasis* and _more_");
376        assert!(chars.has_emphasis);
377
378        // Test HTML
379        let chars = ContentCharacteristics::analyze("<div>HTML content</div>");
380        assert!(chars.has_html);
381
382        // Test tables
383        let chars = ContentCharacteristics::analyze("| Header | Header |\n|--------|--------|");
384        assert!(chars.has_tables);
385
386        // Test blockquotes
387        let chars = ContentCharacteristics::analyze("> Quote");
388        assert!(chars.has_blockquotes);
389
390        // Test mixed content
391        let content = "# Heading\n* List item\n[link](url)\n`code`\n*emphasis*\n<p>html</p>\n| table |\n> quote\n![image](img.png)";
392        let chars = ContentCharacteristics::analyze(content);
393        assert!(chars.has_headings);
394        assert!(chars.has_lists);
395        assert!(chars.has_links);
396        assert!(chars.has_code);
397        assert!(chars.has_emphasis);
398        assert!(chars.has_html);
399        assert!(chars.has_tables);
400        assert!(chars.has_blockquotes);
401        assert!(chars.has_images);
402    }
403
404    #[test]
405    fn test_content_characteristics_should_skip_rule() {
406        let chars = ContentCharacteristics {
407            has_headings: true,
408            has_lists: false,
409            has_links: true,
410            has_code: false,
411            has_emphasis: true,
412            has_html: false,
413            has_tables: true,
414            has_blockquotes: false,
415            has_images: false,
416        };
417
418        // Create test rules for different categories
419        let heading_rule = MD001HeadingIncrement;
420        assert!(!chars.should_skip_rule(&heading_rule));
421
422        let trailing_spaces_rule = MD009TrailingSpaces::new(2, false);
423        assert!(!chars.should_skip_rule(&trailing_spaces_rule)); // Whitespace rules always run
424
425        // Test skipping based on content
426        let chars_no_headings = ContentCharacteristics {
427            has_headings: false,
428            ..Default::default()
429        };
430        assert!(chars_no_headings.should_skip_rule(&heading_rule));
431    }
432
433    #[test]
434    fn test_lint_empty_content() {
435        let rules: Vec<Box<dyn Rule>> = vec![Box::new(MD001HeadingIncrement)];
436
437        let result = lint("", &rules, false, crate::config::MarkdownFlavor::Standard);
438        assert!(result.is_ok());
439        assert!(result.unwrap().is_empty());
440    }
441
442    #[test]
443    fn test_lint_with_violations() {
444        let content = "## Level 2\n#### Level 4"; // Skips level 3
445        let rules: Vec<Box<dyn Rule>> = vec![Box::new(MD001HeadingIncrement)];
446
447        let result = lint(content, &rules, false, crate::config::MarkdownFlavor::Standard);
448        assert!(result.is_ok());
449        let warnings = result.unwrap();
450        assert!(!warnings.is_empty());
451        // Check the rule field of LintWarning struct
452        assert_eq!(warnings[0].rule_name.as_deref(), Some("MD001"));
453    }
454
455    #[test]
456    fn test_lint_with_inline_disable() {
457        let content = "<!-- rumdl-disable MD001 -->\n## Level 2\n#### Level 4";
458        let rules: Vec<Box<dyn Rule>> = vec![Box::new(MD001HeadingIncrement)];
459
460        let result = lint(content, &rules, false, crate::config::MarkdownFlavor::Standard);
461        assert!(result.is_ok());
462        let warnings = result.unwrap();
463        assert!(warnings.is_empty()); // Should be disabled by inline comment
464    }
465
466    #[test]
467    fn test_lint_rule_filtering() {
468        // Content with no lists
469        let content = "# Heading\nJust text";
470        let rules: Vec<Box<dyn Rule>> = vec![
471            Box::new(MD001HeadingIncrement),
472            // A list-related rule would be skipped
473        ];
474
475        let result = lint(content, &rules, false, crate::config::MarkdownFlavor::Standard);
476        assert!(result.is_ok());
477    }
478
479    #[test]
480    fn test_get_profiling_report() {
481        // Just test that it returns a string without panicking
482        let report = get_profiling_report();
483        assert!(!report.is_empty());
484        assert!(report.contains("Profiling"));
485    }
486
487    #[test]
488    fn test_reset_profiling() {
489        // Test that reset_profiling doesn't panic
490        reset_profiling();
491
492        // After reset, report should indicate no measurements or profiling disabled
493        let report = get_profiling_report();
494        assert!(report.contains("disabled") || report.contains("no measurements"));
495    }
496
497    #[test]
498    fn test_get_regex_cache_stats() {
499        let stats = get_regex_cache_stats();
500        // Stats should be a valid HashMap (might be empty)
501        assert!(stats.is_empty() || !stats.is_empty());
502
503        // If not empty, all values should be positive
504        for count in stats.values() {
505            assert!(*count > 0);
506        }
507    }
508
509    #[test]
510    fn test_get_ast_cache_stats() {
511        let stats = get_ast_cache_stats();
512        // Stats should be a valid HashMap (might be empty)
513        assert!(stats.is_empty() || !stats.is_empty());
514
515        // If not empty, all values should be positive
516        for count in stats.values() {
517            assert!(*count > 0);
518        }
519    }
520
521    #[test]
522    fn test_clear_all_caches() {
523        // Test that clear_all_caches doesn't panic
524        clear_all_caches();
525
526        // Function completes successfully - cache state is process-global and may
527        // be modified by other tests, so we don't assert on specific state
528    }
529
530    #[test]
531    fn test_get_cache_performance_report() {
532        // Test that the report generation works and has the correct structure
533        let report = get_cache_performance_report();
534
535        // Report should always contain expected section headers
536        assert!(report.contains("Cache Performance Report"));
537        assert!(report.contains("Regex Cache:"));
538        assert!(report.contains("AST Cache:"));
539
540        // Report should contain either usage stats or "no cache" messages
541        // (depends on whether other tests have populated the cache)
542        assert!(report.contains("Total patterns:") || report.contains("No regex patterns cached"));
543        assert!(report.contains("Total ASTs:") || report.contains("No ASTs cached"));
544    }
545
546    #[test]
547    fn test_lint_with_ast_rules() {
548        // Create content that would benefit from AST parsing
549        let content = "# Heading\n\nParagraph with **bold** text.";
550        let rules: Vec<Box<dyn Rule>> = vec![Box::new(MD012NoMultipleBlanks::new(1))];
551
552        let result = lint(content, &rules, false, crate::config::MarkdownFlavor::Standard);
553        assert!(result.is_ok());
554    }
555
556    #[test]
557    fn test_content_characteristics_edge_cases() {
558        // Test setext heading edge case
559        let chars = ContentCharacteristics::analyze("-"); // Single dash, not a heading
560        assert!(!chars.has_headings);
561
562        let chars = ContentCharacteristics::analyze("--"); // Two dashes, valid setext
563        assert!(chars.has_headings);
564
565        // Test list detection edge cases
566        let chars = ContentCharacteristics::analyze("*emphasis*"); // Not a list
567        assert!(!chars.has_lists);
568
569        let chars = ContentCharacteristics::analyze("1.Item"); // No space after period
570        assert!(!chars.has_lists);
571
572        // Test blockquote must be at start of line
573        let chars = ContentCharacteristics::analyze("text > not a quote");
574        assert!(!chars.has_blockquotes);
575    }
576
577    #[test]
578    fn test_cache_performance_report_formatting() {
579        // Add some data to caches to test formatting
580        // (Would require actual usage of the caches, which happens during linting)
581
582        let report = get_cache_performance_report();
583
584        // Test truncation of long patterns
585        // Since we can't easily add a long pattern to the cache in this test,
586        // we'll just verify the report structure is correct
587        assert!(!report.is_empty());
588        assert!(report.lines().count() > 3); // Should have multiple lines
589    }
590}