rumdl/
lib.rs

1pub mod config;
2pub mod init;
3pub mod profiling;
4pub mod rule;
5pub mod rules;
6pub mod utils;
7
8#[cfg(feature = "python")]
9pub mod python;
10
11pub use rules::heading_utils::{Heading, HeadingStyle};
12pub use rules::*;
13
14use crate::rule::{LintResult, Rule};
15use globset::GlobBuilder;
16use std::path::{Path, PathBuf};
17
18/// Collect patterns from .gitignore files
19///
20/// This function reads the closest .gitignore file and returns a list of patterns
21/// that can be used to exclude files from linting.
22pub fn collect_gitignore_patterns(start_dir: &str) -> Vec<String> {
23    use std::fs;
24
25    let mut patterns = Vec::new();
26
27    // Start from the given directory and look for .gitignore files
28    // going up to parent directories
29    let path = Path::new(start_dir);
30    let mut current_dir = if path.is_file() {
31        path.parent().unwrap_or(Path::new(".")).to_path_buf()
32    } else {
33        path.to_path_buf()
34    };
35
36    // Track visited directories to avoid duplicates
37    let mut visited_dirs = std::collections::HashSet::new();
38
39    while visited_dirs.insert(current_dir.clone()) {
40        let gitignore_path = current_dir.join(".gitignore");
41
42        if gitignore_path.exists() && gitignore_path.is_file() {
43            // Read the .gitignore file and process each pattern
44            if let Ok(content) = fs::read_to_string(&gitignore_path) {
45                for line in content.lines() {
46                    // Skip comments and empty lines
47                    let trimmed = line.trim();
48                    if !trimmed.is_empty() && !trimmed.starts_with('#') {
49                        // Normalize pattern to fit our exclude format
50                        let pattern = normalize_gitignore_pattern(trimmed);
51                        if !pattern.is_empty() {
52                            patterns.push(pattern);
53                        }
54                    }
55                }
56            }
57        }
58
59        // Check for global gitignore in .git/info/exclude
60        let git_dir = current_dir.join(".git");
61        if git_dir.exists() && git_dir.is_dir() {
62            let exclude_path = git_dir.join("info/exclude");
63            if exclude_path.exists() && exclude_path.is_file() {
64                if let Ok(content) = fs::read_to_string(&exclude_path) {
65                    for line in content.lines() {
66                        // Skip comments and empty lines
67                        let trimmed = line.trim();
68                        if !trimmed.is_empty() && !trimmed.starts_with('#') {
69                            // Normalize pattern to fit our exclude format
70                            let pattern = normalize_gitignore_pattern(trimmed);
71                            if !pattern.is_empty() {
72                                patterns.push(pattern);
73                            }
74                        }
75                    }
76                }
77            }
78        }
79
80        // Go up to parent directory
81        match current_dir.parent() {
82            Some(parent) => current_dir = parent.to_path_buf(),
83            None => break,
84        }
85    }
86
87    patterns
88}
89
90/// Normalize a gitignore pattern to fit our exclude format
91///
92/// This function converts gitignore-style patterns to glob patterns
93/// that can be used with the `should_exclude` function.
94fn normalize_gitignore_pattern(pattern: &str) -> String {
95    let mut normalized = pattern.trim().to_string();
96
97    // Remove leading slash (gitignore uses it for absolute paths)
98    if normalized.starts_with('/') {
99        normalized = normalized[1..].to_string();
100    }
101
102    // Remove trailing slash (used in gitignore to specify directories)
103    if normalized.ends_with('/') && normalized.len() > 1 {
104        normalized = normalized[..normalized.len() - 1].to_string();
105    }
106
107    // Handle negated patterns (we don't support them currently)
108    if normalized.starts_with('!') {
109        return String::new();
110    }
111
112    // Convert ** pattern
113    if normalized.contains("**") {
114        return normalized;
115    }
116
117    // Add trailing / for directories
118    if !normalized.contains('/') && !normalized.contains('*') {
119        // This could be either a file or directory name, treat it as both
120        normalized
121    } else {
122        normalized
123    }
124}
125
126/// Match a path against a gitignore pattern
127fn matches_gitignore_pattern(path: &str, pattern: &str) -> bool {
128    // Handle directory patterns (ending with / or no glob chars)
129    if pattern.ends_with('/') || !pattern.contains('*') {
130        let dir_pattern = pattern.trim_end_matches('/');
131        // For directory patterns, we want to match the entire path component
132        let path_components: Vec<&str> = path.split('/').collect();
133        let pattern_components: Vec<&str> = dir_pattern.split('/').collect();
134
135        // Check if any path component matches the pattern
136        path_components
137            .windows(pattern_components.len())
138            .any(|window| {
139                window
140                    .iter()
141                    .zip(pattern_components.iter())
142                    .all(|(p, pat)| p == pat)
143            })
144    } else {
145        // Use globset for glob patterns
146        if let Ok(glob_result) = GlobBuilder::new(pattern).literal_separator(true).build() {
147            let matcher = glob_result.compile_matcher();
148            matcher.is_match(path)
149        } else {
150            // If glob compilation fails, treat it as a literal string
151            path.contains(pattern)
152        }
153    }
154}
155
156/// Normalize a file path for pattern matching
157/// 
158/// This function converts a file path to a normalized form that can be used for pattern matching.
159fn normalize_path_for_matching(file_path: &str) -> String {
160    // Convert to absolute path
161    let path = Path::new(file_path);
162    let absolute_path = if path.is_absolute() {
163        path.to_path_buf()
164    } else {
165        std::env::current_dir()
166            .unwrap_or_else(|_| PathBuf::from("."))
167            .join(path)
168    };
169
170    // Get the path relative to the current directory
171    let relative_path = if let Ok(current_dir) = std::env::current_dir() {
172        if let Ok(stripped) = absolute_path.strip_prefix(&current_dir) {
173            stripped.to_path_buf()
174        } else {
175            absolute_path.clone()
176        }
177    } else {
178        absolute_path.clone()
179    };
180
181    // Convert to string for pattern matching
182    relative_path.to_string_lossy().to_string()
183}
184
185/// Should exclude a file based on patterns
186///
187/// This function checks if a file should be excluded based on a list of glob patterns.
188pub fn should_exclude(
189    file_path: &str,
190    exclude_patterns: &[String],
191    ignore_gitignore: bool,
192) -> bool {
193    // Skip empty patterns as an optimization
194    if exclude_patterns.is_empty() && ignore_gitignore {
195        return false;
196    }
197
198    // Get normalized path for pattern matching
199    let normalized_path_str = normalize_path_for_matching(file_path);
200
201    // Unless ignore_gitignore is true, check .gitignore patterns first
202    if !ignore_gitignore {
203        let gitignore_patterns = collect_gitignore_patterns(file_path);
204        for pattern in &gitignore_patterns {
205            let normalized_pattern = pattern.strip_prefix("./").unwrap_or(pattern);
206            if matches_gitignore_pattern(&normalized_path_str, normalized_pattern) {
207                return true;
208            }
209        }
210    }
211
212    // Then check explicit exclude patterns
213    for pattern in exclude_patterns {
214        // Normalize the pattern by removing leading ./ if present
215        let normalized_pattern = pattern.strip_prefix("./").unwrap_or(pattern);
216
217        // Special case: Handle patterns ending with slash (directory patterns)
218        if normalized_pattern.ends_with('/') {
219            // Convert "dir/" to "dir/**/*" to match all files in that directory and subdirectories
220            let dir_glob_pattern = format!("{}**/*", normalized_pattern);
221            
222            let glob_result = GlobBuilder::new(&dir_glob_pattern)
223                .literal_separator(false)
224                .build()
225                .map(|glob| glob.compile_matcher());
226                
227            if let Ok(matcher) = glob_result {
228                if matcher.is_match(&normalized_path_str) {
229                    return true;
230                }
231            } else {
232                // Fallback to prefix matching if glob fails
233                if normalized_path_str.starts_with(normalized_pattern) {
234                    return true;
235                }
236            }
237            
238            continue;
239        }
240
241        // Handle invalid glob-like patterns as literal strings
242        if pattern.contains('[') && !pattern.contains(']')
243            || pattern.contains('{') && !pattern.contains('}')
244        {
245            if normalized_path_str.contains(normalized_pattern) {
246                return true;
247            }
248            continue;
249        }
250
251        // Try to create a glob pattern
252        let glob_result = GlobBuilder::new(normalized_pattern)
253            .literal_separator(true) // Make sure * doesn't match /
254            .build()
255            .map(|glob| glob.compile_matcher());
256
257        match glob_result {
258            Ok(matcher) => {
259                if matcher.is_match(&normalized_path_str) {
260                    return true;
261                }
262            }
263            Err(_) => {
264                // If pattern is invalid as a glob, treat it as a literal string
265                if normalized_path_str.contains(normalized_pattern) {
266                    return true;
267                }
268            }
269        }
270    }
271
272    false
273}
274
275/// Determines if a file should be included based on patterns
276///
277/// This function checks if a file should be included based on a list of glob patterns.
278/// If include_patterns is empty, all files are included.
279pub fn should_include(file_path: &str, include_patterns: &[String]) -> bool {
280    // If no include patterns are specified, include everything
281    if include_patterns.is_empty() {
282        return true;
283    }
284
285    // Get normalized path for pattern matching
286    let normalized_path_str = normalize_path_for_matching(file_path);
287
288    for pattern in include_patterns {
289        // Special case: Treat invalid glob-like patterns as literal strings
290        if pattern.contains('[') && !pattern.contains(']')
291            || pattern.contains('{') && !pattern.contains('}')
292        {
293            if normalized_path_str.contains(pattern) {
294                return true;
295            }
296            continue;
297        }
298
299        // Normalize the pattern by removing leading ./ if present
300        let normalized_pattern = pattern.strip_prefix("./").unwrap_or(pattern);
301
302        // Special case: Handle patterns ending with slash (directory patterns)
303        if normalized_pattern.ends_with('/') {
304            // Convert "dir/" to "dir/**/*" to match all files in that directory and subdirectories
305            let dir_glob_pattern = format!("{}**/*", normalized_pattern);
306            
307            let glob_result = GlobBuilder::new(&dir_glob_pattern)
308                .literal_separator(false)
309                .build()
310                .map(|glob| glob.compile_matcher());
311                
312            if let Ok(matcher) = glob_result {
313                if matcher.is_match(&normalized_path_str) {
314                    return true;
315                }
316            } else {
317                // Fallback to prefix matching if glob fails
318                if normalized_path_str.starts_with(normalized_pattern) {
319                    return true;
320                }
321            }
322            
323            continue;
324        }
325
326        // Handle path traversal patterns (../ patterns)
327        if normalized_pattern.contains("../") {
328            // For path traversal patterns, we do a direct string comparison
329            // since these are explicitly addressing paths outside current directory
330            if normalized_path_str == normalized_pattern {
331                return true;
332            }
333
334            // Try to normalize both paths for comparison
335            // This handles cases like "./docs/../src/file.md" matching "src/file.md"
336            if let Ok(normalized_pattern_path) = Path::new(normalized_pattern).canonicalize() {
337                if let Ok(normalized_file_path) = Path::new(&normalized_path_str).canonicalize() {
338                    if normalized_pattern_path == normalized_file_path {
339                        return true;
340                    }
341                }
342            }
343
344            // Another approach: try to resolve the pattern using path logic
345            if let Some(resolved_pattern) = normalize_path(normalized_pattern) {
346                // Compare with the file path directly
347                if normalized_path_str == resolved_pattern {
348                    return true;
349                }
350
351                // Try as a glob pattern
352                let glob_result = GlobBuilder::new(&resolved_pattern)
353                    .literal_separator(true)
354                    .build()
355                    .map(|glob| glob.compile_matcher());
356
357                if let Ok(matcher) = glob_result {
358                    if matcher.is_match(&normalized_path_str) {
359                        return true;
360                    }
361                }
362            }
363
364            // Try to create a glob pattern for traversal
365            match GlobBuilder::new(normalized_pattern)
366                .literal_separator(false) // Allow matching across directory boundaries
367                .build()
368                .map(|glob| glob.compile_matcher())
369            {
370                Ok(matcher) => {
371                    if matcher.is_match(&normalized_path_str) {
372                        return true;
373                    }
374                }
375                Err(_) => {
376                    // Treat as literal string if it's not a valid glob
377                    if normalized_path_str.contains(normalized_pattern) {
378                        return true;
379                    }
380                }
381            }
382            continue;
383        }
384
385        // Special case for certain valid glob characters
386        // that we want to handle as exact-match globs
387
388        // For exact filename matches, we want to match only the filename component
389        if !normalized_pattern.contains('/') && !normalized_pattern.contains('*') {
390            // 1. Get just the filename part of the path
391            let file_name = Path::new(&normalized_path_str)
392                .file_name()
393                .map(|n| n.to_string_lossy().to_string())
394                .unwrap_or_default();
395
396            // 2. Check if the file is directly in the root (no directory component)
397            let _parent = Path::new(&normalized_path_str).parent();
398            
399            // 3. If it matches the file name directly, include it
400            if file_name == normalized_pattern {
401                return true;
402            }
403
404            // 4. Try with a specific glob pattern to match either the filename in any directory
405            // or a subdirectory with this name
406            let glob_pattern = format!("**/{}", normalized_pattern);
407            let glob_result = GlobBuilder::new(&glob_pattern)
408                .literal_separator(true)
409                .build()
410                .map(|glob| glob.compile_matcher());
411
412            if let Ok(matcher) = glob_result {
413                if matcher.is_match(&normalized_path_str) {
414                    return true;
415                }
416            }
417
418            // Skip further checks
419            continue;
420        }
421
422        // First, try with exact glob pattern
423        let glob_result = GlobBuilder::new(normalized_pattern)
424            .literal_separator(true)
425            .build()
426            .map(|glob| glob.compile_matcher());
427
428        match glob_result {
429            Ok(matcher) => {
430                if matcher.is_match(&normalized_path_str) {
431                    return true;
432                }
433
434                // If it doesn't match with exact pattern, try with a more flexible pattern
435                // This adds ** prefix if not already there
436                if !normalized_pattern.starts_with("**") {
437                    let flexible_pattern = format!("**/{}", normalized_pattern);
438                    let flexible_glob_result = GlobBuilder::new(&flexible_pattern)
439                        .literal_separator(true)
440                        .build()
441                        .map(|glob| glob.compile_matcher());
442
443                    if let Ok(flexible_matcher) = flexible_glob_result {
444                        if flexible_matcher.is_match(&normalized_path_str) {
445                            return true;
446                        }
447                    }
448                }
449            }
450            Err(_) => {
451                // If glob compilation fails, treat it as a literal string
452                if normalized_path_str.contains(normalized_pattern) {
453                    return true;
454                }
455            }
456        }
457    }
458
459    false
460}
461
462// Helper function to normalize a path with ../ references
463fn normalize_path(path: &str) -> Option<String> {
464    let mut stack: Vec<&str> = Vec::new();
465    for part in path.split('/') {
466        match part {
467            "." => continue, // Current directory, just skip
468            ".." => {
469                stack.pop(); // Go up one directory
470            }
471            "" => continue,        // Empty part (from consecutive slashes)
472            _ => stack.push(part), // Normal directory or file
473        }
474    }
475
476    // Rebuild the path
477    let normalized = stack.join("/");
478    Some(normalized)
479}
480
481/// Lint a file against the given rules
482pub fn lint(content: &str, rules: &[Box<dyn Rule>]) -> LintResult {
483    let mut warnings = Vec::new();
484
485    for rule in rules {
486        match rule.check(content) {
487            Ok(rule_warnings) => {
488                warnings.extend(rule_warnings);
489            }
490            Err(e) => {
491                // Only print errors in non-parallel mode and when not running tests
492                #[cfg(not(test))]
493                eprintln!("Error checking rule {}: {}", rule.name(), e);
494                return Err(e);
495            }
496        }
497    }
498
499    // Only print warning counts in debug mode and when not running tests
500    #[cfg(all(debug_assertions, not(test)))]
501    if !warnings.is_empty() {
502        eprintln!("Found {} warnings", warnings.len());
503    }
504
505    Ok(warnings)
506}
507
508/// Get the profiling report
509pub fn get_profiling_report() -> String {
510    profiling::get_report()
511}
512
513/// Reset the profiling data
514pub fn reset_profiling() {
515    profiling::reset()
516}
517
518// Comment out the parallel processing functions as they're causing compilation errors
519/*
520#[cfg(feature = "parallel")]
521pub fn lint_parallel(content: &str, rules: &[Box<dyn Rule>]) -> LintResult {
522    let warnings = Arc::new(Mutex::new(Vec::new()));
523    let errors = Arc::new(Mutex::new(Vec::new()));
524
525    rules.par_iter().for_each(|rule| {
526        let rule_result = rule.check(content);
527        match rule_result {
528            Ok(rule_warnings) => {
529                let mut warnings_lock = warnings.lock().unwrap();
530                warnings_lock.extend(rule_warnings);
531            }
532            Err(error) => {
533                let mut errors_lock = errors.lock().unwrap();
534                errors_lock.push(error);
535            }
536        }
537    });
538
539    // Don't print errors in parallel mode - previously: eprintln!("{}", error);
540    let errors_lock = errors.lock().unwrap();
541    if !errors_lock.is_empty() {
542        // In parallel mode, we just log that errors occurred without showing the full content
543        if !errors_lock.is_empty() {
544            // DEBUG LINE REMOVED: Previously showed error count
545        }
546    }
547
548    Ok(warnings.lock().unwrap().clone())
549}
550
551#[cfg(feature = "parallel")]
552pub fn lint_parallel_with_structure(content: &str, rules: &[Box<dyn Rule>]) -> LintResult {
553    let structure = match DocumentStructure::parse(content) {
554        Ok(s) => s,
555        Err(e) => return Err(LintError::new(&format!("Failed to parse document structure: {}", e))),
556    };
557
558    // Filter rules that can skip execution based on the content
559    let filtered_rules: Vec<_> = rules
560        .iter()
561        .filter(|&rule| {
562            if let Some(skippable) = rule.as_any().downcast_ref::<dyn RuleSkippable>() {
563                !skippable.should_skip(&structure)
564            } else {
565                true
566            }
567        })
568        .collect();
569
570    let warnings = Arc::new(Mutex::new(Vec::new()));
571    let errors = Arc::new(Mutex::new(Vec::new()));
572
573    filtered_rules.par_iter().for_each(|rule| {
574        let rule_result = rule.check(content);
575        match rule_result {
576            Ok(rule_warnings) => {
577                let mut warnings_lock = warnings.lock().unwrap();
578                warnings_lock.extend(rule_warnings);
579            }
580            Err(error) => {
581                let mut errors_lock = errors.lock().unwrap();
582                errors_lock.push(error);
583            }
584        }
585    });
586
587    // Don't print errors in parallel mode to avoid content leakage
588    let errors_lock = errors.lock().unwrap();
589    if !errors_lock.is_empty() {
590        // In parallel mode, we just log that errors occurred without showing the full content
591        // DEBUG LINE REMOVED: Previously showed error count and contents
592        // Previously: for error in errors_lock.iter() { eprintln!("{}", error); }
593    }
594
595    Ok(warnings.lock().unwrap().clone())
596}
597
598#[cfg(feature = "parallel")]
599pub fn lint_selective_parallel(content: &str, rules: &[Box<dyn Rule>]) -> LintResult {
600    let structure = match DocumentStructure::parse(content) {
601        Ok(s) => s,
602        Err(e) => return Err(LintError::new(&format!("Failed to parse document structure: {}", e))),
603    };
604
605    // Determine relevant rule categories for the content
606    let relevant_categories = determine_relevant_categories(&structure);
607
608    // Filter rules based on their categories and skippability
609    let filtered_rules: Vec<_> = rules
610        .iter()
611        .filter(|&rule| {
612            // First, check if the rule is in a relevant category
613            let rule_categories: Vec<RuleCategory> = if let Some(categorized) = rule.as_any().downcast_ref::<dyn RuleCategorized>() {
614                categorized.categories()
615            } else {
616                vec![RuleCategory::Uncategorized]
617            };
618
619            // If ANY of the rule's categories are relevant, include it
620            if !rule_categories.iter().any(|cat| relevant_categories.contains(cat)) {
621                return false;
622            }
623
624            // Then check if the rule should be skipped
625            if let Some(skippable) = rule.as_any().downcast_ref::<dyn RuleSkippable>() {
626                !skippable.should_skip(&structure)
627            } else {
628                true
629            }
630        })
631        .collect();
632
633    // If we have no rules left, return empty results
634    if filtered_rules.is_empty() {
635        return Ok(Vec::new());
636    }
637
638    let warnings = Arc::new(Mutex::new(Vec::new()));
639    let errors = Arc::new(Mutex::new(Vec::new()));
640
641    filtered_rules.par_iter().for_each(|rule| {
642        let rule_result = rule.check(content);
643        match rule_result {
644            Ok(rule_warnings) => {
645                let mut warnings_lock = warnings.lock().unwrap();
646                warnings_lock.extend(rule_warnings);
647            }
648            Err(error) => {
649                let mut errors_lock = errors.lock().unwrap();
650                errors_lock.push(error);
651            }
652        }
653    });
654
655    // Don't print errors in parallel mode to avoid content leakage
656    let errors_lock = errors.lock().unwrap();
657    if !errors_lock.is_empty() {
658        // In parallel mode, we just log that errors occurred without showing the full content
659        // DEBUG LINE REMOVED: Previously showed error count and contents
660        // Previously: for error in errors_lock.iter() { eprintln!("{}", error); }
661    }
662
663    Ok(warnings.lock().unwrap().clone())
664}
665
666#[cfg(feature = "parallel")]
667pub fn lint_optimized(content: &str, rules: &[Box<dyn Rule>], optimize_flags: OptimizeFlags) -> LintResult {
668    // Track our linter time
669    let _timer = profiling::ScopedTimer::new("lint_optimized");
670
671    // If parallel processing is enabled
672    if optimize_flags.enable_parallel {
673        // If document structure optimization is enabled
674        if optimize_flags.enable_document_structure {
675            // If selective linting is enabled
676            if optimize_flags.enable_selective_linting {
677                return lint_selective_parallel(content, rules);
678            } else {
679                return lint_parallel_with_structure(content, rules);
680            }
681        } else {
682            return lint_parallel(content, rules);
683        }
684    } else {
685        // Non-parallel processing
686        // If document structure optimization is enabled
687        if optimize_flags.enable_document_structure {
688            // If selective linting is enabled
689            if optimize_flags.enable_selective_linting {
690                return lint_selective(content, rules);
691            } else {
692                return lint_with_structure(content, rules);
693            }
694        } else {
695            return lint(content, rules);
696        }
697    }
698}
699*/