rumdl/
lib.rs

1pub mod config;
2pub mod exit_codes;
3pub mod init;
4pub mod inline_config;
5pub mod lint_context;
6pub mod lsp;
7pub mod markdownlint_config;
8pub mod output;
9pub mod parallel;
10pub mod performance;
11pub mod profiling;
12pub mod rule;
13pub mod vscode;
14#[macro_use]
15pub mod rule_config;
16#[macro_use]
17pub mod rule_config_serde;
18pub mod rules;
19pub mod utils;
20
21#[cfg(feature = "python")]
22pub mod python;
23
24pub use rules::heading_utils::{Heading, HeadingStyle};
25pub use rules::*;
26
27pub use crate::lint_context::{LineInfo, LintContext, ListItemInfo};
28use crate::rule::{LintResult, Rule, RuleCategory};
29use crate::utils::document_structure::DocumentStructure;
30use std::time::Instant;
31
32/// Content characteristics for efficient rule filtering
33#[derive(Debug, Default)]
34struct ContentCharacteristics {
35    has_headings: bool,    // # or setext headings
36    has_lists: bool,       // *, -, +, 1. etc
37    has_links: bool,       // [text](url) or [text][ref]
38    has_code: bool,        // ``` or ~~~ or indented code
39    has_emphasis: bool,    // * or _ for emphasis
40    has_html: bool,        // < > tags
41    has_tables: bool,      // | pipes
42    has_blockquotes: bool, // > markers
43    has_images: bool,      // ![alt](url)
44}
45
46impl ContentCharacteristics {
47    fn analyze(content: &str) -> Self {
48        let mut chars = Self { ..Default::default() };
49
50        // Quick single-pass analysis
51        let mut has_atx_heading = false;
52        let mut has_setext_heading = false;
53
54        for line in content.lines() {
55            let trimmed = line.trim();
56
57            // Headings: ATX (#) or Setext (underlines)
58            if !has_atx_heading && trimmed.starts_with('#') {
59                has_atx_heading = true;
60            }
61            if !has_setext_heading && (trimmed.chars().all(|c| c == '=' || c == '-') && trimmed.len() > 1) {
62                has_setext_heading = true;
63            }
64
65            // Quick character-based detection (more efficient than regex)
66            if !chars.has_lists && (line.contains("* ") || line.contains("- ") || line.contains("+ ")) {
67                chars.has_lists = true;
68            }
69            if !chars.has_lists && line.chars().next().is_some_and(|c| c.is_ascii_digit()) && line.contains(". ") {
70                chars.has_lists = true;
71            }
72            if !chars.has_links
73                && (line.contains('[')
74                    || line.contains("http://")
75                    || line.contains("https://")
76                    || line.contains("ftp://"))
77            {
78                chars.has_links = true;
79            }
80            if !chars.has_images && line.contains("![") {
81                chars.has_images = true;
82            }
83            if !chars.has_code && (line.contains('`') || line.contains("~~~")) {
84                chars.has_code = true;
85            }
86            if !chars.has_emphasis && (line.contains('*') || line.contains('_')) {
87                chars.has_emphasis = true;
88            }
89            if !chars.has_html && line.contains('<') {
90                chars.has_html = true;
91            }
92            if !chars.has_tables && line.contains('|') {
93                chars.has_tables = true;
94            }
95            if !chars.has_blockquotes && line.starts_with('>') {
96                chars.has_blockquotes = true;
97            }
98        }
99
100        chars.has_headings = has_atx_heading || has_setext_heading;
101        chars
102    }
103
104    /// Check if a rule should be skipped based on content characteristics
105    fn should_skip_rule(&self, rule: &dyn Rule) -> bool {
106        match rule.category() {
107            RuleCategory::Heading => !self.has_headings,
108            RuleCategory::List => !self.has_lists,
109            RuleCategory::Link => !self.has_links && !self.has_images,
110            RuleCategory::Image => !self.has_images,
111            RuleCategory::CodeBlock => !self.has_code,
112            RuleCategory::Html => !self.has_html,
113            RuleCategory::Emphasis => !self.has_emphasis,
114            RuleCategory::Blockquote => !self.has_blockquotes,
115            RuleCategory::Table => !self.has_tables,
116            // Always check these categories as they apply to all content
117            RuleCategory::Whitespace | RuleCategory::FrontMatter | RuleCategory::Other => false,
118        }
119    }
120}
121
122/// Lint a file against the given rules with intelligent rule filtering
123/// Assumes the provided `rules` vector contains the final,
124/// configured, and filtered set of rules to be executed.
125pub fn lint(content: &str, rules: &[Box<dyn Rule>], _verbose: bool) -> LintResult {
126    let mut warnings = Vec::new();
127    let _overall_start = Instant::now();
128
129    // Early return for empty content
130    if content.is_empty() {
131        return Ok(warnings);
132    }
133
134    // Parse inline configuration comments once
135    let inline_config = crate::inline_config::InlineConfig::from_content(content);
136
137    // Analyze content characteristics for rule filtering
138    let characteristics = ContentCharacteristics::analyze(content);
139
140    // Filter rules based on content characteristics
141    let applicable_rules: Vec<_> = rules
142        .iter()
143        .filter(|rule| !characteristics.should_skip_rule(rule.as_ref()))
144        .collect();
145
146    // Calculate skipped rules count before consuming applicable_rules
147    let _total_rules = rules.len();
148    let _applicable_count = applicable_rules.len();
149
150    // Parse DocumentStructure once
151    let structure = DocumentStructure::new(content);
152
153    // Parse AST once for rules that can benefit from it
154    let ast_rules_count = applicable_rules.iter().filter(|rule| rule.uses_ast()).count();
155    let ast = if ast_rules_count > 0 {
156        Some(crate::utils::ast_utils::get_cached_ast(content))
157    } else {
158        None
159    };
160
161    // Parse LintContext once (migration step)
162    let lint_ctx = crate::lint_context::LintContext::new(content);
163
164    for rule in applicable_rules {
165        let _rule_start = Instant::now();
166
167        // Try optimized paths in order of preference
168        let result = if rule.uses_ast() {
169            if let Some(ref ast_ref) = ast {
170                // 1. AST-based path
171                rule.as_maybe_ast()
172                    .and_then(|ext| ext.check_with_ast_opt(&lint_ctx, ast_ref))
173                    .unwrap_or_else(|| rule.check_with_ast(&lint_ctx, ast_ref))
174            } else {
175                // Fallback to regular check if no AST
176                rule.as_maybe_document_structure()
177                    .and_then(|ext| ext.check_with_structure_opt(&lint_ctx, &structure))
178                    .unwrap_or_else(|| rule.check(&lint_ctx))
179            }
180        } else {
181            // 2. Document structure path
182            rule.as_maybe_document_structure()
183                .and_then(|ext| ext.check_with_structure_opt(&lint_ctx, &structure))
184                .unwrap_or_else(|| rule.check(&lint_ctx))
185        };
186
187        match result {
188            Ok(rule_warnings) => {
189                // Filter out warnings for rules disabled via inline comments
190                let filtered_warnings: Vec<_> = rule_warnings
191                    .into_iter()
192                    .filter(|warning| {
193                        !inline_config.is_rule_disabled(
194                            rule.name(),
195                            warning.line, // Already 1-indexed
196                        )
197                    })
198                    .collect();
199                warnings.extend(filtered_warnings);
200            }
201            Err(e) => {
202                log::error!("Error checking rule {}: {}", rule.name(), e);
203                return Err(e);
204            }
205        }
206
207        #[cfg(not(test))]
208        if _verbose {
209            let rule_duration = _rule_start.elapsed();
210            if rule_duration.as_millis() > 500 {
211                log::debug!("Rule {} took {:?}", rule.name(), rule_duration);
212            }
213        }
214    }
215
216    #[cfg(not(test))]
217    if _verbose {
218        let skipped_rules = _total_rules - _applicable_count;
219        if skipped_rules > 0 {
220            log::debug!("Skipped {skipped_rules} of {_total_rules} rules based on content analysis");
221        }
222        if ast.is_some() {
223            log::debug!("Used shared AST for {ast_rules_count} rules");
224        }
225    }
226
227    Ok(warnings)
228}
229
230/// Get the profiling report
231pub fn get_profiling_report() -> String {
232    profiling::get_report()
233}
234
235/// Reset the profiling data
236pub fn reset_profiling() {
237    profiling::reset()
238}
239
240/// Get regex cache statistics for performance monitoring
241pub fn get_regex_cache_stats() -> std::collections::HashMap<String, u64> {
242    crate::utils::regex_cache::get_cache_stats()
243}
244
245/// Get AST cache statistics for performance monitoring
246pub fn get_ast_cache_stats() -> std::collections::HashMap<u64, u64> {
247    crate::utils::ast_utils::get_ast_cache_stats()
248}
249
250/// Clear all caches (useful for testing and memory management)
251pub fn clear_all_caches() {
252    crate::utils::ast_utils::clear_ast_cache();
253    // Note: Regex cache is intentionally not cleared as it's global and shared
254}
255
256/// Get comprehensive cache performance report
257pub fn get_cache_performance_report() -> String {
258    let regex_stats = get_regex_cache_stats();
259    let ast_stats = get_ast_cache_stats();
260
261    let mut report = String::new();
262
263    report.push_str("=== Cache Performance Report ===\n\n");
264
265    // Regex cache statistics
266    report.push_str("Regex Cache:\n");
267    if regex_stats.is_empty() {
268        report.push_str("  No regex patterns cached\n");
269    } else {
270        let total_usage: u64 = regex_stats.values().sum();
271        report.push_str(&format!("  Total patterns: {}\n", regex_stats.len()));
272        report.push_str(&format!("  Total usage: {total_usage}\n"));
273
274        // Show top 5 most used patterns
275        let mut sorted_patterns: Vec<_> = regex_stats.iter().collect();
276        sorted_patterns.sort_by(|a, b| b.1.cmp(a.1));
277
278        report.push_str("  Top patterns by usage:\n");
279        for (pattern, count) in sorted_patterns.iter().take(5) {
280            let truncated_pattern = if pattern.len() > 50 {
281                format!("{}...", &pattern[..47])
282            } else {
283                pattern.to_string()
284            };
285            report.push_str(&format!(
286                "    {} ({}x): {}\n",
287                count,
288                pattern.len().min(50),
289                truncated_pattern
290            ));
291        }
292    }
293
294    report.push('\n');
295
296    // AST cache statistics
297    report.push_str("AST Cache:\n");
298    if ast_stats.is_empty() {
299        report.push_str("  No AST nodes cached\n");
300    } else {
301        let total_usage: u64 = ast_stats.values().sum();
302        report.push_str(&format!("  Total ASTs: {}\n", ast_stats.len()));
303        report.push_str(&format!("  Total usage: {total_usage}\n"));
304
305        if total_usage > ast_stats.len() as u64 {
306            let cache_hit_rate = ((total_usage - ast_stats.len() as u64) as f64 / total_usage as f64) * 100.0;
307            report.push_str(&format!("  Cache hit rate: {cache_hit_rate:.1}%\n"));
308        }
309    }
310
311    report
312}
313
314#[cfg(test)]
315mod tests {
316    use super::*;
317    use crate::rule::Rule;
318    use crate::rules::{MD001HeadingIncrement, MD009TrailingSpaces, MD012NoMultipleBlanks};
319
320    #[test]
321    fn test_content_characteristics_analyze() {
322        // Test empty content
323        let chars = ContentCharacteristics::analyze("");
324        assert!(!chars.has_headings);
325        assert!(!chars.has_lists);
326        assert!(!chars.has_links);
327        assert!(!chars.has_code);
328        assert!(!chars.has_emphasis);
329        assert!(!chars.has_html);
330        assert!(!chars.has_tables);
331        assert!(!chars.has_blockquotes);
332        assert!(!chars.has_images);
333
334        // Test content with headings
335        let chars = ContentCharacteristics::analyze("# Heading");
336        assert!(chars.has_headings);
337
338        // Test setext headings
339        let chars = ContentCharacteristics::analyze("Heading\n=======");
340        assert!(chars.has_headings);
341
342        // Test lists
343        let chars = ContentCharacteristics::analyze("* Item\n- Item 2\n+ Item 3");
344        assert!(chars.has_lists);
345
346        // Test ordered lists
347        let chars = ContentCharacteristics::analyze("1. First\n2. Second");
348        assert!(chars.has_lists);
349
350        // Test links
351        let chars = ContentCharacteristics::analyze("[link](url)");
352        assert!(chars.has_links);
353
354        // Test URLs
355        let chars = ContentCharacteristics::analyze("Visit https://example.com");
356        assert!(chars.has_links);
357
358        // Test images
359        let chars = ContentCharacteristics::analyze("![alt text](image.png)");
360        assert!(chars.has_images);
361
362        // Test code
363        let chars = ContentCharacteristics::analyze("`inline code`");
364        assert!(chars.has_code);
365
366        let chars = ContentCharacteristics::analyze("~~~\ncode block\n~~~");
367        assert!(chars.has_code);
368
369        // Test emphasis
370        let chars = ContentCharacteristics::analyze("*emphasis* and _more_");
371        assert!(chars.has_emphasis);
372
373        // Test HTML
374        let chars = ContentCharacteristics::analyze("<div>HTML content</div>");
375        assert!(chars.has_html);
376
377        // Test tables
378        let chars = ContentCharacteristics::analyze("| Header | Header |\n|--------|--------|");
379        assert!(chars.has_tables);
380
381        // Test blockquotes
382        let chars = ContentCharacteristics::analyze("> Quote");
383        assert!(chars.has_blockquotes);
384
385        // Test mixed content
386        let content = "# Heading\n* List item\n[link](url)\n`code`\n*emphasis*\n<p>html</p>\n| table |\n> quote\n![image](img.png)";
387        let chars = ContentCharacteristics::analyze(content);
388        assert!(chars.has_headings);
389        assert!(chars.has_lists);
390        assert!(chars.has_links);
391        assert!(chars.has_code);
392        assert!(chars.has_emphasis);
393        assert!(chars.has_html);
394        assert!(chars.has_tables);
395        assert!(chars.has_blockquotes);
396        assert!(chars.has_images);
397    }
398
399    #[test]
400    fn test_content_characteristics_should_skip_rule() {
401        let chars = ContentCharacteristics {
402            has_headings: true,
403            has_lists: false,
404            has_links: true,
405            has_code: false,
406            has_emphasis: true,
407            has_html: false,
408            has_tables: true,
409            has_blockquotes: false,
410            has_images: false,
411        };
412
413        // Create test rules for different categories
414        let heading_rule = MD001HeadingIncrement;
415        assert!(!chars.should_skip_rule(&heading_rule));
416
417        let trailing_spaces_rule = MD009TrailingSpaces::new(2, false);
418        assert!(!chars.should_skip_rule(&trailing_spaces_rule)); // Whitespace rules always run
419
420        // Test skipping based on content
421        let chars_no_headings = ContentCharacteristics {
422            has_headings: false,
423            ..Default::default()
424        };
425        assert!(chars_no_headings.should_skip_rule(&heading_rule));
426    }
427
428    #[test]
429    fn test_lint_empty_content() {
430        let rules: Vec<Box<dyn Rule>> = vec![Box::new(MD001HeadingIncrement)];
431
432        let result = lint("", &rules, false);
433        assert!(result.is_ok());
434        assert!(result.unwrap().is_empty());
435    }
436
437    #[test]
438    fn test_lint_with_violations() {
439        let content = "## Level 2\n#### Level 4"; // Skips level 3
440        let rules: Vec<Box<dyn Rule>> = vec![Box::new(MD001HeadingIncrement)];
441
442        let result = lint(content, &rules, false);
443        assert!(result.is_ok());
444        let warnings = result.unwrap();
445        assert!(!warnings.is_empty());
446        // Check the rule field of LintWarning struct
447        assert_eq!(warnings[0].rule_name, Some("MD001"));
448    }
449
450    #[test]
451    fn test_lint_with_inline_disable() {
452        let content = "<!-- rumdl-disable MD001 -->\n## Level 2\n#### Level 4";
453        let rules: Vec<Box<dyn Rule>> = vec![Box::new(MD001HeadingIncrement)];
454
455        let result = lint(content, &rules, false);
456        assert!(result.is_ok());
457        let warnings = result.unwrap();
458        assert!(warnings.is_empty()); // Should be disabled by inline comment
459    }
460
461    #[test]
462    fn test_lint_rule_filtering() {
463        // Content with no lists
464        let content = "# Heading\nJust text";
465        let rules: Vec<Box<dyn Rule>> = vec![
466            Box::new(MD001HeadingIncrement),
467            // A list-related rule would be skipped
468        ];
469
470        let result = lint(content, &rules, false);
471        assert!(result.is_ok());
472    }
473
474    #[test]
475    fn test_get_profiling_report() {
476        // Just test that it returns a string without panicking
477        let report = get_profiling_report();
478        assert!(!report.is_empty());
479        assert!(report.contains("Profiling"));
480    }
481
482    #[test]
483    fn test_reset_profiling() {
484        // Test that reset_profiling doesn't panic
485        reset_profiling();
486
487        // After reset, report should indicate no measurements or profiling disabled
488        let report = get_profiling_report();
489        assert!(report.contains("disabled") || report.contains("no measurements"));
490    }
491
492    #[test]
493    fn test_get_regex_cache_stats() {
494        let stats = get_regex_cache_stats();
495        // Stats should be a valid HashMap (might be empty)
496        assert!(stats.is_empty() || !stats.is_empty());
497
498        // If not empty, all values should be positive
499        for count in stats.values() {
500            assert!(*count > 0);
501        }
502    }
503
504    #[test]
505    fn test_get_ast_cache_stats() {
506        let stats = get_ast_cache_stats();
507        // Stats should be a valid HashMap (might be empty)
508        assert!(stats.is_empty() || !stats.is_empty());
509
510        // If not empty, all values should be positive
511        for count in stats.values() {
512            assert!(*count > 0);
513        }
514    }
515
516    #[test]
517    fn test_clear_all_caches() {
518        // Test that clear_all_caches doesn't panic
519        clear_all_caches();
520
521        // After clearing, AST cache should be empty
522        let ast_stats = get_ast_cache_stats();
523        assert!(ast_stats.is_empty());
524    }
525
526    #[test]
527    fn test_get_cache_performance_report() {
528        let report = get_cache_performance_report();
529
530        // Report should contain expected sections
531        assert!(report.contains("Cache Performance Report"));
532        assert!(report.contains("Regex Cache:"));
533        assert!(report.contains("AST Cache:"));
534
535        // Test with empty caches
536        clear_all_caches();
537        let report_empty = get_cache_performance_report();
538        assert!(report_empty.contains("No AST nodes cached"));
539    }
540
541    #[test]
542    fn test_lint_with_ast_rules() {
543        // Create content that would benefit from AST parsing
544        let content = "# Heading\n\nParagraph with **bold** text.";
545        let rules: Vec<Box<dyn Rule>> = vec![Box::new(MD012NoMultipleBlanks::new(1))];
546
547        let result = lint(content, &rules, false);
548        assert!(result.is_ok());
549    }
550
551    #[test]
552    fn test_content_characteristics_edge_cases() {
553        // Test setext heading edge case
554        let chars = ContentCharacteristics::analyze("-"); // Single dash, not a heading
555        assert!(!chars.has_headings);
556
557        let chars = ContentCharacteristics::analyze("--"); // Two dashes, valid setext
558        assert!(chars.has_headings);
559
560        // Test list detection edge cases
561        let chars = ContentCharacteristics::analyze("*emphasis*"); // Not a list
562        assert!(!chars.has_lists);
563
564        let chars = ContentCharacteristics::analyze("1.Item"); // No space after period
565        assert!(!chars.has_lists);
566
567        // Test blockquote must be at start of line
568        let chars = ContentCharacteristics::analyze("text > not a quote");
569        assert!(!chars.has_blockquotes);
570    }
571
572    #[test]
573    fn test_cache_performance_report_formatting() {
574        // Add some data to caches to test formatting
575        // (Would require actual usage of the caches, which happens during linting)
576
577        let report = get_cache_performance_report();
578
579        // Test truncation of long patterns
580        // Since we can't easily add a long pattern to the cache in this test,
581        // we'll just verify the report structure is correct
582        assert!(!report.is_empty());
583        assert!(report.lines().count() > 3); // Should have multiple lines
584    }
585}