rumdl/
lib.rs

1pub mod config;
2pub mod init;
3pub mod profiling;
4pub mod rule;
5pub mod rules;
6pub mod utils;
7
8#[cfg(feature = "python")]
9pub mod python;
10
11// Re-export commonly used types
12pub use rules::heading_utils::{Heading, HeadingStyle};
13pub use rules::*;
14
15use globset::GlobBuilder;
16use std::path::{Path, PathBuf};
17
18/// Collect patterns from .gitignore files
19///
20/// This function reads the closest .gitignore file and returns a list of patterns
21/// that can be used to exclude files from linting.
22pub fn collect_gitignore_patterns(start_dir: &str) -> Vec<String> {
23    use std::fs;
24
25    let mut patterns = Vec::new();
26
27    // Start from the given directory and look for .gitignore files
28    // going up to parent directories
29    let path = Path::new(start_dir);
30    let mut current_dir = if path.is_file() {
31        path.parent().unwrap_or(Path::new(".")).to_path_buf()
32    } else {
33        path.to_path_buf()
34    };
35
36    // Track visited directories to avoid duplicates
37    let mut visited_dirs = std::collections::HashSet::new();
38
39    while visited_dirs.insert(current_dir.clone()) {
40        let gitignore_path = current_dir.join(".gitignore");
41
42        if gitignore_path.exists() && gitignore_path.is_file() {
43            // Read the .gitignore file and process each pattern
44            if let Ok(content) = fs::read_to_string(&gitignore_path) {
45                for line in content.lines() {
46                    // Skip comments and empty lines
47                    let trimmed = line.trim();
48                    if !trimmed.is_empty() && !trimmed.starts_with('#') {
49                        // Normalize pattern to fit our exclude format
50                        let pattern = normalize_gitignore_pattern(trimmed);
51                        if !pattern.is_empty() {
52                            patterns.push(pattern);
53                        }
54                    }
55                }
56            }
57        }
58
59        // Check for global gitignore in .git/info/exclude
60        let git_dir = current_dir.join(".git");
61        if git_dir.exists() && git_dir.is_dir() {
62            let exclude_path = git_dir.join("info/exclude");
63            if exclude_path.exists() && exclude_path.is_file() {
64                if let Ok(content) = fs::read_to_string(&exclude_path) {
65                    for line in content.lines() {
66                        // Skip comments and empty lines
67                        let trimmed = line.trim();
68                        if !trimmed.is_empty() && !trimmed.starts_with('#') {
69                            // Normalize pattern to fit our exclude format
70                            let pattern = normalize_gitignore_pattern(trimmed);
71                            if !pattern.is_empty() {
72                                patterns.push(pattern);
73                            }
74                        }
75                    }
76                }
77            }
78        }
79
80        // Go up to parent directory
81        match current_dir.parent() {
82            Some(parent) => current_dir = parent.to_path_buf(),
83            None => break,
84        }
85    }
86
87    // Add some common patterns that are usually in .gitignore files
88    // but might not be in the specific project's .gitignore
89    let common_patterns = vec![
90        "node_modules",
91        ".git",
92        ".github",
93        ".vscode",
94        ".idea",
95        "dist",
96        "build",
97        "target",
98    ];
99
100    for pattern in common_patterns {
101        if !patterns.iter().any(|p| p == pattern) {
102            patterns.push(pattern.to_string());
103        }
104    }
105
106    patterns
107}
108
109/// Normalize a gitignore pattern to fit our exclude format
110///
111/// This function converts gitignore-style patterns to glob patterns
112/// that can be used with the `should_exclude` function.
113fn normalize_gitignore_pattern(pattern: &str) -> String {
114    let mut normalized = pattern.trim().to_string();
115
116    // Remove leading slash (gitignore uses it for absolute paths)
117    if normalized.starts_with('/') {
118        normalized = normalized[1..].to_string();
119    }
120
121    // Remove trailing slash (used in gitignore to specify directories)
122    if normalized.ends_with('/') && normalized.len() > 1 {
123        normalized = normalized[..normalized.len() - 1].to_string();
124    }
125
126    // Handle negated patterns (we don't support them currently)
127    if normalized.starts_with('!') {
128        return String::new();
129    }
130
131    // Convert ** pattern
132    if normalized.contains("**") {
133        return normalized;
134    }
135
136    // Add trailing / for directories
137    if !normalized.contains('/') && !normalized.contains('*') {
138        // This could be either a file or directory name, treat it as both
139        normalized
140    } else {
141        normalized
142    }
143}
144
145/// Match a path against a gitignore pattern
146fn matches_gitignore_pattern(path: &str, pattern: &str) -> bool {
147    // Handle directory patterns (ending with / or no glob chars)
148    if pattern.ends_with('/') || !pattern.contains('*') {
149        let dir_pattern = pattern.trim_end_matches('/');
150        // For directory patterns, we want to match the entire path component
151        let path_components: Vec<&str> = path.split('/').collect();
152        let pattern_components: Vec<&str> = dir_pattern.split('/').collect();
153
154        // Check if any path component matches the pattern
155        path_components.windows(pattern_components.len()).any(|window| {
156            window.iter().zip(pattern_components.iter()).all(|(p, pat)| {
157                p == pat
158            })
159        })
160    } else {
161        // Use globset for glob patterns
162        if let Ok(glob_result) = GlobBuilder::new(pattern)
163            .literal_separator(true)
164            .build()
165        {
166            let matcher = glob_result.compile_matcher();
167            matcher.is_match(path)
168        } else {
169            // If glob compilation fails, treat it as a literal string
170            path.contains(pattern)
171        }
172    }
173}
174
175/// Should exclude a file based on patterns
176///
177/// This function checks if a file should be excluded based on a list of glob patterns.
178pub fn should_exclude(file_path: &str, exclude_patterns: &[String], respect_gitignore: bool) -> bool {
179    // Convert to absolute path
180    let path = Path::new(file_path);
181    let absolute_path = if path.is_absolute() {
182        path.to_path_buf()
183    } else {
184        std::env::current_dir().unwrap_or_else(|_| PathBuf::from(".")).join(path)
185    };
186
187    // Get the path relative to the current directory
188    let relative_path = if let Ok(current_dir) = std::env::current_dir() {
189        if let Ok(stripped) = absolute_path.strip_prefix(&current_dir) {
190            stripped.to_path_buf()
191        } else {
192            absolute_path.clone()
193        }
194    } else {
195        absolute_path.clone()
196    };
197
198    // Convert to string for pattern matching
199    let normalized_path = relative_path.to_string_lossy();
200    let normalized_path_str = normalized_path.as_ref();
201
202    // If respect_gitignore is true, check .gitignore patterns first
203    if respect_gitignore {
204        let gitignore_patterns = collect_gitignore_patterns(file_path);
205        for pattern in &gitignore_patterns {
206            let normalized_pattern = pattern.strip_prefix("./").unwrap_or(pattern);
207            if matches_gitignore_pattern(normalized_path_str, normalized_pattern) {
208                return true;
209            }
210        }
211    }
212
213    // Then check explicit exclude patterns
214    for pattern in exclude_patterns {
215        // Normalize the pattern by removing leading ./ if present
216        let normalized_pattern = pattern.strip_prefix("./").unwrap_or(pattern);
217
218        // Handle directory patterns (ending with / or no glob chars)
219        if normalized_pattern.ends_with('/') || !normalized_pattern.contains('*') {
220            let dir_pattern = normalized_pattern.trim_end_matches('/');
221            // For directory patterns, we want to match the entire path component
222            let path_components: Vec<&str> = normalized_path_str.split('/').collect();
223            let pattern_components: Vec<&str> = dir_pattern.split('/').collect();
224
225            // Check if pattern components match at any position in the path
226            for i in 0..=path_components.len().saturating_sub(pattern_components.len()) {
227                let mut matches = true;
228                for (j, pattern_part) in pattern_components.iter().enumerate() {
229                    if path_components.get(i + j) != Some(pattern_part) {
230                        matches = false;
231                        break;
232                    }
233                }
234                if matches {
235                    return true;
236                }
237            }
238
239            // If it's not a directory pattern (no /), also try as a literal string
240            if !normalized_pattern.contains('/') {
241                if normalized_path_str.contains(dir_pattern) {
242                    return true;
243                }
244            }
245            continue;
246        }
247
248        // Try to create a glob pattern
249        let glob_result = GlobBuilder::new(normalized_pattern)
250            .literal_separator(true)  // Make sure * doesn't match /
251            .build()
252            .and_then(|glob| Ok(glob.compile_matcher()));
253
254        match glob_result {
255            Ok(matcher) => {
256                if matcher.is_match(normalized_path_str) {
257                    return true;
258                }
259            }
260            Err(_) => {
261                // If pattern is invalid as a glob, treat it as a literal string
262                if normalized_path_str.contains(normalized_pattern) {
263                    return true;
264                }
265            }
266        }
267    }
268
269    false
270}
271
272/// Determines if a file should be included based on patterns
273///
274/// This function checks if a file should be included based on a list of glob patterns.
275/// If include_patterns is empty, all files are included.
276pub fn should_include(file_path: &str, include_patterns: &[String]) -> bool {
277    // If no include patterns are specified, include everything
278    if include_patterns.is_empty() {
279        return true;
280    }
281
282    // Convert to absolute path
283    let path = Path::new(file_path);
284    let absolute_path = if path.is_absolute() {
285        path.to_path_buf()
286    } else {
287        std::env::current_dir().unwrap_or_else(|_| PathBuf::from(".")).join(path)
288    };
289
290    // Get the path relative to the current directory
291    let relative_path = if let Ok(current_dir) = std::env::current_dir() {
292        if let Ok(stripped) = absolute_path.strip_prefix(&current_dir) {
293            stripped.to_path_buf()
294        } else {
295            absolute_path.clone()
296        }
297    } else {
298        absolute_path.clone()
299    };
300
301    // Convert to string for pattern matching
302    let normalized_path = relative_path.to_string_lossy();
303    let normalized_path_str = normalized_path.as_ref();
304
305    for pattern in include_patterns {
306        // Normalize the pattern by removing leading ./ if present
307        let normalized_pattern = pattern.strip_prefix("./").unwrap_or(pattern);
308
309        // Handle directory patterns (ending with / or no glob chars)
310        if normalized_pattern.ends_with('/') || !normalized_pattern.contains('*') {
311            let dir_pattern = normalized_pattern.trim_end_matches('/');
312            // For directory patterns, we want to match the entire path component
313            let path_components: Vec<&str> = normalized_path_str.split('/').collect();
314            let pattern_components: Vec<&str> = dir_pattern.split('/').collect();
315
316            // Check if pattern components match at any position in the path
317            for i in 0..=path_components.len().saturating_sub(pattern_components.len()) {
318                let mut matches = true;
319                for (j, pattern_part) in pattern_components.iter().enumerate() {
320                    if path_components.get(i + j) != Some(pattern_part) {
321                        matches = false;
322                        break;
323                    }
324                }
325                if matches {
326                    return true;
327                }
328            }
329
330            // If it's not a directory pattern (no /), also try as a literal string
331            if !normalized_pattern.contains('/') {
332                if normalized_path_str.contains(dir_pattern) {
333                    return true;
334                }
335            }
336            continue;
337        }
338
339        // Try to create a glob pattern
340        let glob_result = GlobBuilder::new(normalized_pattern)
341            .literal_separator(true)  // Make sure * doesn't match /
342            .build()
343            .and_then(|glob| Ok(glob.compile_matcher()));
344
345        match glob_result {
346            Ok(matcher) => {
347                if matcher.is_match(normalized_path_str) {
348                    return true;
349                }
350            }
351            Err(_) => {
352                // If pattern is invalid as a glob, treat it as a literal string
353                if normalized_path_str.contains(normalized_pattern) {
354                    return true;
355                }
356            }
357        }
358    }
359
360    false
361}
362
363/// Lint a Markdown file
364pub fn lint(content: &str, rules: &[Box<dyn rule::Rule>]) -> rule::LintResult {
365    let _timer = profiling::ScopedTimer::new("lint_total");
366
367    let mut warnings = Vec::new();
368
369    for rule in rules {
370        let _rule_timer = profiling::ScopedTimer::new(&format!("rule:{}", rule.name()));
371
372        match rule.check(content) {
373            Ok(rule_warnings) => {
374                warnings.extend(rule_warnings);
375            }
376            Err(e) => {
377                eprintln!("Error checking rule {}: {}", rule.name(), e);
378            }
379        }
380    }
381
382    // Force profiling to be enabled in debug mode
383    #[cfg(debug_assertions)]
384    {
385        if !warnings.is_empty() {
386            eprintln!("Found {} warnings", warnings.len());
387        }
388    }
389
390    Ok(warnings)
391}
392
393/// Get the profiling report
394pub fn get_profiling_report() -> String {
395    profiling::get_report()
396}
397
398/// Reset the profiling data
399pub fn reset_profiling() {
400    profiling::reset()
401}