rumdl_lib/
lib.rs

1pub mod config;
2pub mod exit_codes;
3pub mod fix_coordinator;
4pub mod inline_config;
5pub mod lint_context;
6pub mod lsp;
7pub mod markdownlint_config;
8pub mod output;
9pub mod parallel;
10pub mod performance;
11pub mod profiling;
12pub mod rule;
13pub mod vscode;
14#[macro_use]
15pub mod rule_config;
16#[macro_use]
17pub mod rule_config_serde;
18pub mod rules;
19pub mod utils;
20
21pub use rules::heading_utils::{Heading, HeadingStyle};
22pub use rules::*;
23
24pub use crate::lint_context::{LineInfo, LintContext, ListItemInfo};
25use crate::rule::{LintResult, Rule, RuleCategory};
26use std::time::Instant;
27
28/// Content characteristics for efficient rule filtering
29#[derive(Debug, Default)]
30struct ContentCharacteristics {
31    has_headings: bool,    // # or setext headings
32    has_lists: bool,       // *, -, +, 1. etc
33    has_links: bool,       // [text](url) or [text][ref]
34    has_code: bool,        // ``` or ~~~ or indented code
35    has_emphasis: bool,    // * or _ for emphasis
36    has_html: bool,        // < > tags
37    has_tables: bool,      // | pipes
38    has_blockquotes: bool, // > markers
39    has_images: bool,      // ![alt](url)
40}
41
42impl ContentCharacteristics {
43    fn analyze(content: &str) -> Self {
44        let mut chars = Self { ..Default::default() };
45
46        // Quick single-pass analysis
47        let mut has_atx_heading = false;
48        let mut has_setext_heading = false;
49
50        for line in content.lines() {
51            let trimmed = line.trim();
52
53            // Headings: ATX (#) or Setext (underlines)
54            if !has_atx_heading && trimmed.starts_with('#') {
55                has_atx_heading = true;
56            }
57            if !has_setext_heading && (trimmed.chars().all(|c| c == '=' || c == '-') && trimmed.len() > 1) {
58                has_setext_heading = true;
59            }
60
61            // Quick character-based detection (more efficient than regex)
62            if !chars.has_lists && (line.contains("* ") || line.contains("- ") || line.contains("+ ")) {
63                chars.has_lists = true;
64            }
65            if !chars.has_lists && line.chars().next().is_some_and(|c| c.is_ascii_digit()) && line.contains(". ") {
66                chars.has_lists = true;
67            }
68            if !chars.has_links
69                && (line.contains('[')
70                    || line.contains("http://")
71                    || line.contains("https://")
72                    || line.contains("ftp://"))
73            {
74                chars.has_links = true;
75            }
76            if !chars.has_images && line.contains("![") {
77                chars.has_images = true;
78            }
79            if !chars.has_code && (line.contains('`') || line.contains("~~~")) {
80                chars.has_code = true;
81            }
82            if !chars.has_emphasis && (line.contains('*') || line.contains('_')) {
83                chars.has_emphasis = true;
84            }
85            if !chars.has_html && line.contains('<') {
86                chars.has_html = true;
87            }
88            if !chars.has_tables && line.contains('|') {
89                chars.has_tables = true;
90            }
91            if !chars.has_blockquotes && line.starts_with('>') {
92                chars.has_blockquotes = true;
93            }
94        }
95
96        chars.has_headings = has_atx_heading || has_setext_heading;
97        chars
98    }
99
100    /// Check if a rule should be skipped based on content characteristics
101    fn should_skip_rule(&self, rule: &dyn Rule) -> bool {
102        match rule.category() {
103            RuleCategory::Heading => !self.has_headings,
104            RuleCategory::List => !self.has_lists,
105            RuleCategory::Link => !self.has_links && !self.has_images,
106            RuleCategory::Image => !self.has_images,
107            RuleCategory::CodeBlock => !self.has_code,
108            RuleCategory::Html => !self.has_html,
109            RuleCategory::Emphasis => !self.has_emphasis,
110            RuleCategory::Blockquote => !self.has_blockquotes,
111            RuleCategory::Table => !self.has_tables,
112            // Always check these categories as they apply to all content
113            RuleCategory::Whitespace | RuleCategory::FrontMatter | RuleCategory::Other => false,
114        }
115    }
116}
117
118/// Lint a file against the given rules with intelligent rule filtering
119/// Assumes the provided `rules` vector contains the final,
120/// configured, and filtered set of rules to be executed.
121pub fn lint(
122    content: &str,
123    rules: &[Box<dyn Rule>],
124    _verbose: bool,
125    flavor: crate::config::MarkdownFlavor,
126) -> LintResult {
127    let mut warnings = Vec::new();
128    let _overall_start = Instant::now();
129
130    // Early return for empty content
131    if content.is_empty() {
132        return Ok(warnings);
133    }
134
135    // Parse inline configuration comments once
136    let inline_config = crate::inline_config::InlineConfig::from_content(content);
137
138    // Analyze content characteristics for rule filtering
139    let characteristics = ContentCharacteristics::analyze(content);
140
141    // Filter rules based on content characteristics
142    let applicable_rules: Vec<_> = rules
143        .iter()
144        .filter(|rule| !characteristics.should_skip_rule(rule.as_ref()))
145        .collect();
146
147    // Calculate skipped rules count before consuming applicable_rules
148    let _total_rules = rules.len();
149    let _applicable_count = applicable_rules.len();
150
151    // Parse AST once for rules that can benefit from it
152    let ast_rules_count = applicable_rules.iter().filter(|rule| rule.uses_ast()).count();
153    let ast = if ast_rules_count > 0 {
154        Some(crate::utils::ast_utils::get_cached_ast(content))
155    } else {
156        None
157    };
158
159    // Parse LintContext once (migration step) with the provided flavor
160    let lint_ctx = crate::lint_context::LintContext::new(content, flavor);
161
162    for rule in applicable_rules {
163        let _rule_start = Instant::now();
164
165        // Try optimized paths in order of preference
166        let result = if rule.uses_ast() {
167            if let Some(ref ast_ref) = ast {
168                // 1. AST-based path
169                rule.as_maybe_ast()
170                    .and_then(|ext| ext.check_with_ast_opt(&lint_ctx, ast_ref))
171                    .unwrap_or_else(|| rule.check_with_ast(&lint_ctx, ast_ref))
172            } else {
173                // Fallback to regular check if no AST
174                rule.check(&lint_ctx)
175            }
176        } else {
177            // 2. Regular check path
178            rule.check(&lint_ctx)
179        };
180
181        match result {
182            Ok(rule_warnings) => {
183                // Filter out warnings for rules disabled via inline comments
184                let filtered_warnings: Vec<_> = rule_warnings
185                    .into_iter()
186                    .filter(|warning| {
187                        // Use the warning's rule_name if available, otherwise use the rule's name
188                        let rule_name_to_check = warning.rule_name.unwrap_or(rule.name());
189
190                        // Extract the base rule name for sub-rules like "MD029-style" -> "MD029"
191                        let base_rule_name = if let Some(dash_pos) = rule_name_to_check.find('-') {
192                            &rule_name_to_check[..dash_pos]
193                        } else {
194                            rule_name_to_check
195                        };
196
197                        !inline_config.is_rule_disabled(
198                            base_rule_name,
199                            warning.line, // Already 1-indexed
200                        )
201                    })
202                    .collect();
203                warnings.extend(filtered_warnings);
204            }
205            Err(e) => {
206                log::error!("Error checking rule {}: {}", rule.name(), e);
207                return Err(e);
208            }
209        }
210
211        #[cfg(not(test))]
212        if _verbose {
213            let rule_duration = _rule_start.elapsed();
214            if rule_duration.as_millis() > 500 {
215                log::debug!("Rule {} took {:?}", rule.name(), rule_duration);
216            }
217        }
218    }
219
220    #[cfg(not(test))]
221    if _verbose {
222        let skipped_rules = _total_rules - _applicable_count;
223        if skipped_rules > 0 {
224            log::debug!("Skipped {skipped_rules} of {_total_rules} rules based on content analysis");
225        }
226        if ast.is_some() {
227            log::debug!("Used shared AST for {ast_rules_count} rules");
228        }
229    }
230
231    Ok(warnings)
232}
233
234/// Get the profiling report
235pub fn get_profiling_report() -> String {
236    profiling::get_report()
237}
238
239/// Reset the profiling data
240pub fn reset_profiling() {
241    profiling::reset()
242}
243
244/// Get regex cache statistics for performance monitoring
245pub fn get_regex_cache_stats() -> std::collections::HashMap<String, u64> {
246    crate::utils::regex_cache::get_cache_stats()
247}
248
249/// Get AST cache statistics for performance monitoring
250pub fn get_ast_cache_stats() -> std::collections::HashMap<u64, u64> {
251    crate::utils::ast_utils::get_ast_cache_stats()
252}
253
254/// Clear all caches (useful for testing and memory management)
255pub fn clear_all_caches() {
256    crate::utils::ast_utils::clear_ast_cache();
257    // Note: Regex cache is intentionally not cleared as it's global and shared
258}
259
260/// Get comprehensive cache performance report
261pub fn get_cache_performance_report() -> String {
262    let regex_stats = get_regex_cache_stats();
263    let ast_stats = get_ast_cache_stats();
264
265    let mut report = String::new();
266
267    report.push_str("=== Cache Performance Report ===\n\n");
268
269    // Regex cache statistics
270    report.push_str("Regex Cache:\n");
271    if regex_stats.is_empty() {
272        report.push_str("  No regex patterns cached\n");
273    } else {
274        let total_usage: u64 = regex_stats.values().sum();
275        report.push_str(&format!("  Total patterns: {}\n", regex_stats.len()));
276        report.push_str(&format!("  Total usage: {total_usage}\n"));
277
278        // Show top 5 most used patterns
279        let mut sorted_patterns: Vec<_> = regex_stats.iter().collect();
280        sorted_patterns.sort_by(|a, b| b.1.cmp(a.1));
281
282        report.push_str("  Top patterns by usage:\n");
283        for (pattern, count) in sorted_patterns.iter().take(5) {
284            let truncated_pattern = if pattern.len() > 50 {
285                format!("{}...", &pattern[..47])
286            } else {
287                pattern.to_string()
288            };
289            report.push_str(&format!(
290                "    {} ({}x): {}\n",
291                count,
292                pattern.len().min(50),
293                truncated_pattern
294            ));
295        }
296    }
297
298    report.push('\n');
299
300    // AST cache statistics
301    report.push_str("AST Cache:\n");
302    if ast_stats.is_empty() {
303        report.push_str("  No AST nodes cached\n");
304    } else {
305        let total_usage: u64 = ast_stats.values().sum();
306        report.push_str(&format!("  Total ASTs: {}\n", ast_stats.len()));
307        report.push_str(&format!("  Total usage: {total_usage}\n"));
308
309        if total_usage > ast_stats.len() as u64 {
310            let cache_hit_rate = ((total_usage - ast_stats.len() as u64) as f64 / total_usage as f64) * 100.0;
311            report.push_str(&format!("  Cache hit rate: {cache_hit_rate:.1}%\n"));
312        }
313    }
314
315    report
316}
317
318#[cfg(test)]
319mod tests {
320    use super::*;
321    use crate::rule::Rule;
322    use crate::rules::{MD001HeadingIncrement, MD009TrailingSpaces, MD012NoMultipleBlanks};
323
324    #[test]
325    fn test_content_characteristics_analyze() {
326        // Test empty content
327        let chars = ContentCharacteristics::analyze("");
328        assert!(!chars.has_headings);
329        assert!(!chars.has_lists);
330        assert!(!chars.has_links);
331        assert!(!chars.has_code);
332        assert!(!chars.has_emphasis);
333        assert!(!chars.has_html);
334        assert!(!chars.has_tables);
335        assert!(!chars.has_blockquotes);
336        assert!(!chars.has_images);
337
338        // Test content with headings
339        let chars = ContentCharacteristics::analyze("# Heading");
340        assert!(chars.has_headings);
341
342        // Test setext headings
343        let chars = ContentCharacteristics::analyze("Heading\n=======");
344        assert!(chars.has_headings);
345
346        // Test lists
347        let chars = ContentCharacteristics::analyze("* Item\n- Item 2\n+ Item 3");
348        assert!(chars.has_lists);
349
350        // Test ordered lists
351        let chars = ContentCharacteristics::analyze("1. First\n2. Second");
352        assert!(chars.has_lists);
353
354        // Test links
355        let chars = ContentCharacteristics::analyze("[link](url)");
356        assert!(chars.has_links);
357
358        // Test URLs
359        let chars = ContentCharacteristics::analyze("Visit https://example.com");
360        assert!(chars.has_links);
361
362        // Test images
363        let chars = ContentCharacteristics::analyze("![alt text](image.png)");
364        assert!(chars.has_images);
365
366        // Test code
367        let chars = ContentCharacteristics::analyze("`inline code`");
368        assert!(chars.has_code);
369
370        let chars = ContentCharacteristics::analyze("~~~\ncode block\n~~~");
371        assert!(chars.has_code);
372
373        // Test emphasis
374        let chars = ContentCharacteristics::analyze("*emphasis* and _more_");
375        assert!(chars.has_emphasis);
376
377        // Test HTML
378        let chars = ContentCharacteristics::analyze("<div>HTML content</div>");
379        assert!(chars.has_html);
380
381        // Test tables
382        let chars = ContentCharacteristics::analyze("| Header | Header |\n|--------|--------|");
383        assert!(chars.has_tables);
384
385        // Test blockquotes
386        let chars = ContentCharacteristics::analyze("> Quote");
387        assert!(chars.has_blockquotes);
388
389        // Test mixed content
390        let content = "# Heading\n* List item\n[link](url)\n`code`\n*emphasis*\n<p>html</p>\n| table |\n> quote\n![image](img.png)";
391        let chars = ContentCharacteristics::analyze(content);
392        assert!(chars.has_headings);
393        assert!(chars.has_lists);
394        assert!(chars.has_links);
395        assert!(chars.has_code);
396        assert!(chars.has_emphasis);
397        assert!(chars.has_html);
398        assert!(chars.has_tables);
399        assert!(chars.has_blockquotes);
400        assert!(chars.has_images);
401    }
402
403    #[test]
404    fn test_content_characteristics_should_skip_rule() {
405        let chars = ContentCharacteristics {
406            has_headings: true,
407            has_lists: false,
408            has_links: true,
409            has_code: false,
410            has_emphasis: true,
411            has_html: false,
412            has_tables: true,
413            has_blockquotes: false,
414            has_images: false,
415        };
416
417        // Create test rules for different categories
418        let heading_rule = MD001HeadingIncrement;
419        assert!(!chars.should_skip_rule(&heading_rule));
420
421        let trailing_spaces_rule = MD009TrailingSpaces::new(2, false);
422        assert!(!chars.should_skip_rule(&trailing_spaces_rule)); // Whitespace rules always run
423
424        // Test skipping based on content
425        let chars_no_headings = ContentCharacteristics {
426            has_headings: false,
427            ..Default::default()
428        };
429        assert!(chars_no_headings.should_skip_rule(&heading_rule));
430    }
431
432    #[test]
433    fn test_lint_empty_content() {
434        let rules: Vec<Box<dyn Rule>> = vec![Box::new(MD001HeadingIncrement)];
435
436        let result = lint("", &rules, false, crate::config::MarkdownFlavor::Standard);
437        assert!(result.is_ok());
438        assert!(result.unwrap().is_empty());
439    }
440
441    #[test]
442    fn test_lint_with_violations() {
443        let content = "## Level 2\n#### Level 4"; // Skips level 3
444        let rules: Vec<Box<dyn Rule>> = vec![Box::new(MD001HeadingIncrement)];
445
446        let result = lint(content, &rules, false, crate::config::MarkdownFlavor::Standard);
447        assert!(result.is_ok());
448        let warnings = result.unwrap();
449        assert!(!warnings.is_empty());
450        // Check the rule field of LintWarning struct
451        assert_eq!(warnings[0].rule_name, Some("MD001"));
452    }
453
454    #[test]
455    fn test_lint_with_inline_disable() {
456        let content = "<!-- rumdl-disable MD001 -->\n## Level 2\n#### Level 4";
457        let rules: Vec<Box<dyn Rule>> = vec![Box::new(MD001HeadingIncrement)];
458
459        let result = lint(content, &rules, false, crate::config::MarkdownFlavor::Standard);
460        assert!(result.is_ok());
461        let warnings = result.unwrap();
462        assert!(warnings.is_empty()); // Should be disabled by inline comment
463    }
464
465    #[test]
466    fn test_lint_rule_filtering() {
467        // Content with no lists
468        let content = "# Heading\nJust text";
469        let rules: Vec<Box<dyn Rule>> = vec![
470            Box::new(MD001HeadingIncrement),
471            // A list-related rule would be skipped
472        ];
473
474        let result = lint(content, &rules, false, crate::config::MarkdownFlavor::Standard);
475        assert!(result.is_ok());
476    }
477
478    #[test]
479    fn test_get_profiling_report() {
480        // Just test that it returns a string without panicking
481        let report = get_profiling_report();
482        assert!(!report.is_empty());
483        assert!(report.contains("Profiling"));
484    }
485
486    #[test]
487    fn test_reset_profiling() {
488        // Test that reset_profiling doesn't panic
489        reset_profiling();
490
491        // After reset, report should indicate no measurements or profiling disabled
492        let report = get_profiling_report();
493        assert!(report.contains("disabled") || report.contains("no measurements"));
494    }
495
496    #[test]
497    fn test_get_regex_cache_stats() {
498        let stats = get_regex_cache_stats();
499        // Stats should be a valid HashMap (might be empty)
500        assert!(stats.is_empty() || !stats.is_empty());
501
502        // If not empty, all values should be positive
503        for count in stats.values() {
504            assert!(*count > 0);
505        }
506    }
507
508    #[test]
509    fn test_get_ast_cache_stats() {
510        let stats = get_ast_cache_stats();
511        // Stats should be a valid HashMap (might be empty)
512        assert!(stats.is_empty() || !stats.is_empty());
513
514        // If not empty, all values should be positive
515        for count in stats.values() {
516            assert!(*count > 0);
517        }
518    }
519
520    #[test]
521    fn test_clear_all_caches() {
522        // Test that clear_all_caches doesn't panic
523        clear_all_caches();
524
525        // Function completes successfully - cache state is process-global and may
526        // be modified by other tests, so we don't assert on specific state
527    }
528
529    #[test]
530    fn test_get_cache_performance_report() {
531        // Test that the report generation works and has the correct structure
532        let report = get_cache_performance_report();
533
534        // Report should always contain expected section headers
535        assert!(report.contains("Cache Performance Report"));
536        assert!(report.contains("Regex Cache:"));
537        assert!(report.contains("AST Cache:"));
538
539        // Report should contain either usage stats or "no cache" messages
540        // (depends on whether other tests have populated the cache)
541        assert!(report.contains("Total patterns:") || report.contains("No regex patterns cached"));
542        assert!(report.contains("Total nodes:") || report.contains("No AST nodes cached"));
543    }
544
545    #[test]
546    fn test_lint_with_ast_rules() {
547        // Create content that would benefit from AST parsing
548        let content = "# Heading\n\nParagraph with **bold** text.";
549        let rules: Vec<Box<dyn Rule>> = vec![Box::new(MD012NoMultipleBlanks::new(1))];
550
551        let result = lint(content, &rules, false, crate::config::MarkdownFlavor::Standard);
552        assert!(result.is_ok());
553    }
554
555    #[test]
556    fn test_content_characteristics_edge_cases() {
557        // Test setext heading edge case
558        let chars = ContentCharacteristics::analyze("-"); // Single dash, not a heading
559        assert!(!chars.has_headings);
560
561        let chars = ContentCharacteristics::analyze("--"); // Two dashes, valid setext
562        assert!(chars.has_headings);
563
564        // Test list detection edge cases
565        let chars = ContentCharacteristics::analyze("*emphasis*"); // Not a list
566        assert!(!chars.has_lists);
567
568        let chars = ContentCharacteristics::analyze("1.Item"); // No space after period
569        assert!(!chars.has_lists);
570
571        // Test blockquote must be at start of line
572        let chars = ContentCharacteristics::analyze("text > not a quote");
573        assert!(!chars.has_blockquotes);
574    }
575
576    #[test]
577    fn test_cache_performance_report_formatting() {
578        // Add some data to caches to test formatting
579        // (Would require actual usage of the caches, which happens during linting)
580
581        let report = get_cache_performance_report();
582
583        // Test truncation of long patterns
584        // Since we can't easily add a long pattern to the cache in this test,
585        // we'll just verify the report structure is correct
586        assert!(!report.is_empty());
587        assert!(report.lines().count() > 3); // Should have multiple lines
588    }
589}
rumdl_lib/lib.rs

rumdl_lib/
lib.rs