rumdl/lib.rs
1pub mod config;
2pub mod init;
3pub mod profiling;
4pub mod rule;
5pub mod rules;
6pub mod utils;
7
8#[cfg(feature = "python")]
9pub mod python;
10
11pub use rules::heading_utils::{Heading, HeadingStyle};
12pub use rules::*;
13
14use crate::rule::{LintResult, Rule};
15use globset::GlobBuilder;
16use std::path::{Path, PathBuf};
17
18/// Collect patterns from .gitignore files
19///
20/// This function reads the closest .gitignore file and returns a list of patterns
21/// that can be used to exclude files from linting.
22pub fn collect_gitignore_patterns(start_dir: &str) -> Vec<String> {
23 use std::fs;
24
25 let mut patterns = Vec::new();
26
27 // Start from the given directory and look for .gitignore files
28 // going up to parent directories
29 let path = Path::new(start_dir);
30 let mut current_dir = if path.is_file() {
31 path.parent().unwrap_or(Path::new(".")).to_path_buf()
32 } else {
33 path.to_path_buf()
34 };
35
36 // Track visited directories to avoid duplicates
37 let mut visited_dirs = std::collections::HashSet::new();
38
39 while visited_dirs.insert(current_dir.clone()) {
40 let gitignore_path = current_dir.join(".gitignore");
41
42 if gitignore_path.exists() && gitignore_path.is_file() {
43 // Read the .gitignore file and process each pattern
44 if let Ok(content) = fs::read_to_string(&gitignore_path) {
45 for line in content.lines() {
46 // Skip comments and empty lines
47 let trimmed = line.trim();
48 if !trimmed.is_empty() && !trimmed.starts_with('#') {
49 // Normalize pattern to fit our exclude format
50 let pattern = normalize_gitignore_pattern(trimmed);
51 if !pattern.is_empty() {
52 patterns.push(pattern);
53 }
54 }
55 }
56 }
57 }
58
59 // Check for global gitignore in .git/info/exclude
60 let git_dir = current_dir.join(".git");
61 if git_dir.exists() && git_dir.is_dir() {
62 let exclude_path = git_dir.join("info/exclude");
63 if exclude_path.exists() && exclude_path.is_file() {
64 if let Ok(content) = fs::read_to_string(&exclude_path) {
65 for line in content.lines() {
66 // Skip comments and empty lines
67 let trimmed = line.trim();
68 if !trimmed.is_empty() && !trimmed.starts_with('#') {
69 // Normalize pattern to fit our exclude format
70 let pattern = normalize_gitignore_pattern(trimmed);
71 if !pattern.is_empty() {
72 patterns.push(pattern);
73 }
74 }
75 }
76 }
77 }
78 }
79
80 // Go up to parent directory
81 match current_dir.parent() {
82 Some(parent) => current_dir = parent.to_path_buf(),
83 None => break,
84 }
85 }
86
87 patterns
88}
89
90/// Normalize a gitignore pattern to fit our exclude format
91///
92/// This function converts gitignore-style patterns to glob patterns
93/// that can be used with the `should_exclude` function.
94fn normalize_gitignore_pattern(pattern: &str) -> String {
95 let mut normalized = pattern.trim().to_string();
96
97 // Remove leading slash (gitignore uses it for absolute paths)
98 if normalized.starts_with('/') {
99 normalized = normalized[1..].to_string();
100 }
101
102 // Remove trailing slash (used in gitignore to specify directories)
103 if normalized.ends_with('/') && normalized.len() > 1 {
104 normalized = normalized[..normalized.len() - 1].to_string();
105 }
106
107 // Handle negated patterns (we don't support them currently)
108 if normalized.starts_with('!') {
109 return String::new();
110 }
111
112 // Convert ** pattern
113 if normalized.contains("**") {
114 return normalized;
115 }
116
117 // Add trailing / for directories
118 if !normalized.contains('/') && !normalized.contains('*') {
119 // This could be either a file or directory name, treat it as both
120 normalized
121 } else {
122 normalized
123 }
124}
125
126/// Match a path against a gitignore pattern
127fn matches_gitignore_pattern(path: &str, pattern: &str) -> bool {
128 // Handle directory patterns (ending with / or no glob chars)
129 if pattern.ends_with('/') || !pattern.contains('*') {
130 let dir_pattern = pattern.trim_end_matches('/');
131 // For directory patterns, we want to match the entire path component
132 let path_components: Vec<&str> = path.split('/').collect();
133 let pattern_components: Vec<&str> = dir_pattern.split('/').collect();
134
135 // Check if any path component matches the pattern
136 path_components
137 .windows(pattern_components.len())
138 .any(|window| {
139 window
140 .iter()
141 .zip(pattern_components.iter())
142 .all(|(p, pat)| p == pat)
143 })
144 } else {
145 // Use globset for glob patterns
146 if let Ok(glob_result) = GlobBuilder::new(pattern).literal_separator(true).build() {
147 let matcher = glob_result.compile_matcher();
148 matcher.is_match(path)
149 } else {
150 // If glob compilation fails, treat it as a literal string
151 path.contains(pattern)
152 }
153 }
154}
155
156/// Normalize a file path for pattern matching
157///
158/// This function converts a file path to a normalized form that can be used for pattern matching.
159fn normalize_path_for_matching(file_path: &str) -> String {
160 // Convert to absolute path
161 let path = Path::new(file_path);
162 let absolute_path = if path.is_absolute() {
163 path.to_path_buf()
164 } else {
165 std::env::current_dir()
166 .unwrap_or_else(|_| PathBuf::from("."))
167 .join(path)
168 };
169
170 // Get the path relative to the current directory
171 let relative_path = if let Ok(current_dir) = std::env::current_dir() {
172 if let Ok(stripped) = absolute_path.strip_prefix(¤t_dir) {
173 stripped.to_path_buf()
174 } else {
175 absolute_path.clone()
176 }
177 } else {
178 absolute_path.clone()
179 };
180
181 // Convert to string for pattern matching
182 relative_path.to_string_lossy().to_string()
183}
184
185/// Should exclude a file based on patterns
186///
187/// This function checks if a file should be excluded based on a list of glob patterns.
188pub fn should_exclude(
189 file_path: &str,
190 exclude_patterns: &[String],
191 ignore_gitignore: bool,
192) -> bool {
193 // Skip empty patterns as an optimization
194 if exclude_patterns.is_empty() && ignore_gitignore {
195 return false;
196 }
197
198 // Get normalized path for pattern matching
199 let normalized_path_str = normalize_path_for_matching(file_path);
200
201 // Unless ignore_gitignore is true, check .gitignore patterns first
202 if !ignore_gitignore {
203 let gitignore_patterns = collect_gitignore_patterns(file_path);
204 for pattern in &gitignore_patterns {
205 let normalized_pattern = pattern.strip_prefix("./").unwrap_or(pattern);
206 if matches_gitignore_pattern(&normalized_path_str, normalized_pattern) {
207 return true;
208 }
209 }
210 }
211
212 // Then check explicit exclude patterns
213 for pattern in exclude_patterns {
214 // Normalize the pattern by removing leading ./ if present
215 let normalized_pattern = pattern.strip_prefix("./").unwrap_or(pattern);
216
217 // Special case: Handle patterns ending with slash (directory patterns)
218 if normalized_pattern.ends_with('/') {
219 // Convert "dir/" to "dir/**/*" to match all files in that directory and subdirectories
220 let dir_glob_pattern = format!("{}**/*", normalized_pattern);
221
222 let glob_result = GlobBuilder::new(&dir_glob_pattern)
223 .literal_separator(false)
224 .build()
225 .map(|glob| glob.compile_matcher());
226
227 if let Ok(matcher) = glob_result {
228 if matcher.is_match(&normalized_path_str) {
229 return true;
230 }
231 } else {
232 // Fallback to prefix matching if glob fails
233 if normalized_path_str.starts_with(normalized_pattern) {
234 return true;
235 }
236 }
237
238 continue;
239 }
240
241 // Handle invalid glob-like patterns as literal strings
242 if pattern.contains('[') && !pattern.contains(']')
243 || pattern.contains('{') && !pattern.contains('}')
244 {
245 if normalized_path_str.contains(normalized_pattern) {
246 return true;
247 }
248 continue;
249 }
250
251 // Try to create a glob pattern
252 let glob_result = GlobBuilder::new(normalized_pattern)
253 .literal_separator(true) // Make sure * doesn't match /
254 .build()
255 .map(|glob| glob.compile_matcher());
256
257 match glob_result {
258 Ok(matcher) => {
259 if matcher.is_match(&normalized_path_str) {
260 return true;
261 }
262 }
263 Err(_) => {
264 // If pattern is invalid as a glob, treat it as a literal string
265 if normalized_path_str.contains(normalized_pattern) {
266 return true;
267 }
268 }
269 }
270 }
271
272 false
273}
274
275/// Determines if a file should be included based on patterns
276///
277/// This function checks if a file should be included based on a list of glob patterns.
278/// If include_patterns is empty, all files are included.
279pub fn should_include(file_path: &str, include_patterns: &[String]) -> bool {
280 // If no include patterns are specified, include everything
281 if include_patterns.is_empty() {
282 return true;
283 }
284
285 // Get normalized path for pattern matching
286 let normalized_path_str = normalize_path_for_matching(file_path);
287
288 for pattern in include_patterns {
289 // Special case: Treat invalid glob-like patterns as literal strings
290 if pattern.contains('[') && !pattern.contains(']')
291 || pattern.contains('{') && !pattern.contains('}')
292 {
293 if normalized_path_str.contains(pattern) {
294 return true;
295 }
296 continue;
297 }
298
299 // Normalize the pattern by removing leading ./ if present
300 let normalized_pattern = pattern.strip_prefix("./").unwrap_or(pattern);
301
302 // Special case: Handle patterns ending with slash (directory patterns)
303 if normalized_pattern.ends_with('/') {
304 // Convert "dir/" to "dir/**/*" to match all files in that directory and subdirectories
305 let dir_glob_pattern = format!("{}**/*", normalized_pattern);
306
307 let glob_result = GlobBuilder::new(&dir_glob_pattern)
308 .literal_separator(false)
309 .build()
310 .map(|glob| glob.compile_matcher());
311
312 if let Ok(matcher) = glob_result {
313 if matcher.is_match(&normalized_path_str) {
314 return true;
315 }
316 } else {
317 // Fallback to prefix matching if glob fails
318 if normalized_path_str.starts_with(normalized_pattern) {
319 return true;
320 }
321 }
322
323 continue;
324 }
325
326 // Handle path traversal patterns (../ patterns)
327 if normalized_pattern.contains("../") {
328 // For path traversal patterns, we do a direct string comparison
329 // since these are explicitly addressing paths outside current directory
330 if normalized_path_str == normalized_pattern {
331 return true;
332 }
333
334 // Try to normalize both paths for comparison
335 // This handles cases like "./docs/../src/file.md" matching "src/file.md"
336 if let Ok(normalized_pattern_path) = Path::new(normalized_pattern).canonicalize() {
337 if let Ok(normalized_file_path) = Path::new(&normalized_path_str).canonicalize() {
338 if normalized_pattern_path == normalized_file_path {
339 return true;
340 }
341 }
342 }
343
344 // Another approach: try to resolve the pattern using path logic
345 if let Some(resolved_pattern) = normalize_path(normalized_pattern) {
346 // Compare with the file path directly
347 if normalized_path_str == resolved_pattern {
348 return true;
349 }
350
351 // Try as a glob pattern
352 let glob_result = GlobBuilder::new(&resolved_pattern)
353 .literal_separator(true)
354 .build()
355 .map(|glob| glob.compile_matcher());
356
357 if let Ok(matcher) = glob_result {
358 if matcher.is_match(&normalized_path_str) {
359 return true;
360 }
361 }
362 }
363
364 // Try to create a glob pattern for traversal
365 match GlobBuilder::new(normalized_pattern)
366 .literal_separator(false) // Allow matching across directory boundaries
367 .build()
368 .map(|glob| glob.compile_matcher())
369 {
370 Ok(matcher) => {
371 if matcher.is_match(&normalized_path_str) {
372 return true;
373 }
374 }
375 Err(_) => {
376 // Treat as literal string if it's not a valid glob
377 if normalized_path_str.contains(normalized_pattern) {
378 return true;
379 }
380 }
381 }
382 continue;
383 }
384
385 // Special case for certain valid glob characters
386 // that we want to handle as exact-match globs
387
388 // For exact filename matches, we want to match only the filename component
389 if !normalized_pattern.contains('/') && !normalized_pattern.contains('*') {
390 // 1. Get just the filename part of the path
391 let file_name = Path::new(&normalized_path_str)
392 .file_name()
393 .map(|n| n.to_string_lossy().to_string())
394 .unwrap_or_default();
395
396 // 2. Check if the file is directly in the root (no directory component)
397 let _parent = Path::new(&normalized_path_str).parent();
398
399 // 3. If it matches the file name directly, include it
400 if file_name == normalized_pattern {
401 return true;
402 }
403
404 // 4. Try with a specific glob pattern to match either the filename in any directory
405 // or a subdirectory with this name
406 let glob_pattern = format!("**/{}", normalized_pattern);
407 let glob_result = GlobBuilder::new(&glob_pattern)
408 .literal_separator(true)
409 .build()
410 .map(|glob| glob.compile_matcher());
411
412 if let Ok(matcher) = glob_result {
413 if matcher.is_match(&normalized_path_str) {
414 return true;
415 }
416 }
417
418 // Skip further checks
419 continue;
420 }
421
422 // First, try with exact glob pattern
423 let glob_result = GlobBuilder::new(normalized_pattern)
424 .literal_separator(true)
425 .build()
426 .map(|glob| glob.compile_matcher());
427
428 match glob_result {
429 Ok(matcher) => {
430 if matcher.is_match(&normalized_path_str) {
431 return true;
432 }
433
434 // If it doesn't match with exact pattern, try with a more flexible pattern
435 // This adds ** prefix if not already there
436 if !normalized_pattern.starts_with("**") {
437 let flexible_pattern = format!("**/{}", normalized_pattern);
438 let flexible_glob_result = GlobBuilder::new(&flexible_pattern)
439 .literal_separator(true)
440 .build()
441 .map(|glob| glob.compile_matcher());
442
443 if let Ok(flexible_matcher) = flexible_glob_result {
444 if flexible_matcher.is_match(&normalized_path_str) {
445 return true;
446 }
447 }
448 }
449 }
450 Err(_) => {
451 // If glob compilation fails, treat it as a literal string
452 if normalized_path_str.contains(normalized_pattern) {
453 return true;
454 }
455 }
456 }
457 }
458
459 false
460}
461
462// Helper function to normalize a path with ../ references
463fn normalize_path(path: &str) -> Option<String> {
464 let mut stack: Vec<&str> = Vec::new();
465 for part in path.split('/') {
466 match part {
467 "." => continue, // Current directory, just skip
468 ".." => {
469 stack.pop(); // Go up one directory
470 }
471 "" => continue, // Empty part (from consecutive slashes)
472 _ => stack.push(part), // Normal directory or file
473 }
474 }
475
476 // Rebuild the path
477 let normalized = stack.join("/");
478 Some(normalized)
479}
480
481/// Lint a file against the given rules
482pub fn lint(content: &str, rules: &[Box<dyn Rule>]) -> LintResult {
483 let mut warnings = Vec::new();
484
485 for rule in rules {
486 match rule.check(content) {
487 Ok(rule_warnings) => {
488 warnings.extend(rule_warnings);
489 }
490 Err(e) => {
491 // Only print errors in non-parallel mode and when not running tests
492 #[cfg(not(test))]
493 eprintln!("Error checking rule {}: {}", rule.name(), e);
494 return Err(e);
495 }
496 }
497 }
498
499 // Only print warning counts in debug mode and when not running tests
500 #[cfg(all(debug_assertions, not(test)))]
501 if !warnings.is_empty() {
502 eprintln!("Found {} warnings", warnings.len());
503 }
504
505 Ok(warnings)
506}
507
508/// Get the profiling report
509pub fn get_profiling_report() -> String {
510 profiling::get_report()
511}
512
513/// Reset the profiling data
514pub fn reset_profiling() {
515 profiling::reset()
516}
517
518// Comment out the parallel processing functions as they're causing compilation errors
519/*
520#[cfg(feature = "parallel")]
521pub fn lint_parallel(content: &str, rules: &[Box<dyn Rule>]) -> LintResult {
522 let warnings = Arc::new(Mutex::new(Vec::new()));
523 let errors = Arc::new(Mutex::new(Vec::new()));
524
525 rules.par_iter().for_each(|rule| {
526 let rule_result = rule.check(content);
527 match rule_result {
528 Ok(rule_warnings) => {
529 let mut warnings_lock = warnings.lock().unwrap();
530 warnings_lock.extend(rule_warnings);
531 }
532 Err(error) => {
533 let mut errors_lock = errors.lock().unwrap();
534 errors_lock.push(error);
535 }
536 }
537 });
538
539 // Don't print errors in parallel mode - previously: eprintln!("{}", error);
540 let errors_lock = errors.lock().unwrap();
541 if !errors_lock.is_empty() {
542 // In parallel mode, we just log that errors occurred without showing the full content
543 if !errors_lock.is_empty() {
544 // DEBUG LINE REMOVED: Previously showed error count
545 }
546 }
547
548 Ok(warnings.lock().unwrap().clone())
549}
550
551#[cfg(feature = "parallel")]
552pub fn lint_parallel_with_structure(content: &str, rules: &[Box<dyn Rule>]) -> LintResult {
553 let structure = match DocumentStructure::parse(content) {
554 Ok(s) => s,
555 Err(e) => return Err(LintError::new(&format!("Failed to parse document structure: {}", e))),
556 };
557
558 // Filter rules that can skip execution based on the content
559 let filtered_rules: Vec<_> = rules
560 .iter()
561 .filter(|&rule| {
562 if let Some(skippable) = rule.as_any().downcast_ref::<dyn RuleSkippable>() {
563 !skippable.should_skip(&structure)
564 } else {
565 true
566 }
567 })
568 .collect();
569
570 let warnings = Arc::new(Mutex::new(Vec::new()));
571 let errors = Arc::new(Mutex::new(Vec::new()));
572
573 filtered_rules.par_iter().for_each(|rule| {
574 let rule_result = rule.check(content);
575 match rule_result {
576 Ok(rule_warnings) => {
577 let mut warnings_lock = warnings.lock().unwrap();
578 warnings_lock.extend(rule_warnings);
579 }
580 Err(error) => {
581 let mut errors_lock = errors.lock().unwrap();
582 errors_lock.push(error);
583 }
584 }
585 });
586
587 // Don't print errors in parallel mode to avoid content leakage
588 let errors_lock = errors.lock().unwrap();
589 if !errors_lock.is_empty() {
590 // In parallel mode, we just log that errors occurred without showing the full content
591 // DEBUG LINE REMOVED: Previously showed error count and contents
592 // Previously: for error in errors_lock.iter() { eprintln!("{}", error); }
593 }
594
595 Ok(warnings.lock().unwrap().clone())
596}
597
598#[cfg(feature = "parallel")]
599pub fn lint_selective_parallel(content: &str, rules: &[Box<dyn Rule>]) -> LintResult {
600 let structure = match DocumentStructure::parse(content) {
601 Ok(s) => s,
602 Err(e) => return Err(LintError::new(&format!("Failed to parse document structure: {}", e))),
603 };
604
605 // Determine relevant rule categories for the content
606 let relevant_categories = determine_relevant_categories(&structure);
607
608 // Filter rules based on their categories and skippability
609 let filtered_rules: Vec<_> = rules
610 .iter()
611 .filter(|&rule| {
612 // First, check if the rule is in a relevant category
613 let rule_categories: Vec<RuleCategory> = if let Some(categorized) = rule.as_any().downcast_ref::<dyn RuleCategorized>() {
614 categorized.categories()
615 } else {
616 vec![RuleCategory::Uncategorized]
617 };
618
619 // If ANY of the rule's categories are relevant, include it
620 if !rule_categories.iter().any(|cat| relevant_categories.contains(cat)) {
621 return false;
622 }
623
624 // Then check if the rule should be skipped
625 if let Some(skippable) = rule.as_any().downcast_ref::<dyn RuleSkippable>() {
626 !skippable.should_skip(&structure)
627 } else {
628 true
629 }
630 })
631 .collect();
632
633 // If we have no rules left, return empty results
634 if filtered_rules.is_empty() {
635 return Ok(Vec::new());
636 }
637
638 let warnings = Arc::new(Mutex::new(Vec::new()));
639 let errors = Arc::new(Mutex::new(Vec::new()));
640
641 filtered_rules.par_iter().for_each(|rule| {
642 let rule_result = rule.check(content);
643 match rule_result {
644 Ok(rule_warnings) => {
645 let mut warnings_lock = warnings.lock().unwrap();
646 warnings_lock.extend(rule_warnings);
647 }
648 Err(error) => {
649 let mut errors_lock = errors.lock().unwrap();
650 errors_lock.push(error);
651 }
652 }
653 });
654
655 // Don't print errors in parallel mode to avoid content leakage
656 let errors_lock = errors.lock().unwrap();
657 if !errors_lock.is_empty() {
658 // In parallel mode, we just log that errors occurred without showing the full content
659 // DEBUG LINE REMOVED: Previously showed error count and contents
660 // Previously: for error in errors_lock.iter() { eprintln!("{}", error); }
661 }
662
663 Ok(warnings.lock().unwrap().clone())
664}
665
666#[cfg(feature = "parallel")]
667pub fn lint_optimized(content: &str, rules: &[Box<dyn Rule>], optimize_flags: OptimizeFlags) -> LintResult {
668 // Track our linter time
669 let _timer = profiling::ScopedTimer::new("lint_optimized");
670
671 // If parallel processing is enabled
672 if optimize_flags.enable_parallel {
673 // If document structure optimization is enabled
674 if optimize_flags.enable_document_structure {
675 // If selective linting is enabled
676 if optimize_flags.enable_selective_linting {
677 return lint_selective_parallel(content, rules);
678 } else {
679 return lint_parallel_with_structure(content, rules);
680 }
681 } else {
682 return lint_parallel(content, rules);
683 }
684 } else {
685 // Non-parallel processing
686 // If document structure optimization is enabled
687 if optimize_flags.enable_document_structure {
688 // If selective linting is enabled
689 if optimize_flags.enable_selective_linting {
690 return lint_selective(content, rules);
691 } else {
692 return lint_with_structure(content, rules);
693 }
694 } else {
695 return lint(content, rules);
696 }
697 }
698}
699*/