Skip to main content

seekr_code/scanner/
walker.rs

1//! Parallel file tree walker.
2//!
3//! Uses the `ignore` crate to walk directory trees while respecting
4//! `.gitignore` rules and custom exclude patterns.
5
6use std::path::{Path, PathBuf};
7use std::sync::Mutex;
8
9use ignore::WalkBuilder;
10
11use crate::config::SeekrConfig;
12use crate::error::ScannerError;
13use crate::scanner::{ScanEntry, ScanResult};
14
15/// Walk a directory tree in parallel, returning all matching file entries.
16///
17/// Respects `.gitignore` rules and applies configured filters.
18pub fn walk_directory(
19    root: &Path,
20    config: &SeekrConfig,
21) -> Result<ScanResult, ScannerError> {
22    let start = std::time::Instant::now();
23
24    let mut builder = WalkBuilder::new(root);
25
26    // Configure the walker
27    builder
28        .hidden(true) // respect hidden files (.gitignore default behavior)
29        .git_ignore(true)
30        .git_global(true)
31        .git_exclude(true)
32        .follow_links(false)
33        .threads(num_cpus());
34
35    // Add custom exclude overrides from config
36    let mut overrides_builder = ignore::overrides::OverrideBuilder::new(root);
37    for pattern in &config.exclude_patterns {
38        // Negate the pattern to make it an exclude
39        let exclude = format!("!{}", pattern);
40        overrides_builder
41            .add(&exclude)
42            .map_err(|e| ScannerError::FilterError(format!("Invalid exclude pattern '{}': {}", pattern, e)))?;
43    }
44    let overrides = overrides_builder
45        .build()
46        .map_err(|e| ScannerError::FilterError(format!("Failed to build overrides: {}", e)))?;
47    builder.overrides(overrides);
48
49    // Collect entries (using simple Walk for now, parallel walk for large dirs)
50    let entries_mutex: Mutex<Vec<ScanEntry>> = Mutex::new(Vec::new());
51    let skipped_mutex: Mutex<usize> = Mutex::new(0);
52
53    builder.build_parallel().run(|| {
54        Box::new(|entry| {
55            match entry {
56                Ok(dir_entry) => {
57                    // Skip directories, we only want files
58                    if dir_entry.file_type().map_or(false, |ft| ft.is_file()) {
59                        let path = dir_entry.path().to_path_buf();
60
61                        // Get file metadata
62                        match dir_entry.metadata() {
63                            Ok(metadata) => {
64                                let size = metadata.len();
65
66                                // Skip files exceeding the max size
67                                // (config.max_file_size accessed via closure would need Arc,
68                                //  for now we use a generous default)
69                                let scan_entry = ScanEntry {
70                                    path,
71                                    size,
72                                    modified: metadata.modified().ok(),
73                                };
74                                entries_mutex.lock().unwrap().push(scan_entry);
75                            }
76                            Err(_) => {
77                                *skipped_mutex.lock().unwrap() += 1;
78                            }
79                        }
80                    }
81                    ignore::WalkState::Continue
82                }
83                Err(_) => {
84                    *skipped_mutex.lock().unwrap() += 1;
85                    ignore::WalkState::Continue
86                }
87            }
88        })
89    });
90
91    let entries = entries_mutex.into_inner().unwrap();
92    let skipped = skipped_mutex.into_inner().unwrap();
93    let duration = start.elapsed();
94
95    tracing::info!(
96        files = entries.len(),
97        skipped = skipped,
98        duration_ms = duration.as_millis(),
99        "Directory scan complete"
100    );
101
102    Ok(ScanResult {
103        entries,
104        skipped,
105        duration,
106    })
107}
108
109/// Walk a directory tree sequentially (simpler, for smaller directories).
110pub fn walk_directory_simple(root: &Path) -> Result<Vec<PathBuf>, ScannerError> {
111    let walker = WalkBuilder::new(root)
112        .hidden(true)
113        .git_ignore(true)
114        .build();
115
116    let mut files = Vec::new();
117    for entry in walker {
118        match entry {
119            Ok(dir_entry) => {
120                if dir_entry.file_type().map_or(false, |ft| ft.is_file()) {
121                    files.push(dir_entry.path().to_path_buf());
122                }
123            }
124            Err(e) => {
125                tracing::warn!("Walk error: {}", e);
126            }
127        }
128    }
129
130    Ok(files)
131}
132
133/// Get the number of available CPUs, with a reasonable minimum.
134fn num_cpus() -> usize {
135    std::thread::available_parallelism()
136        .map(|n| n.get())
137        .unwrap_or(4)
138}
139
140#[cfg(test)]
141mod tests {
142    use super::*;
143
144    #[test]
145    fn test_walk_simple() {
146        // Walk the project's own source directory
147        let root = Path::new(env!("CARGO_MANIFEST_DIR")).join("src");
148        let files = walk_directory_simple(&root).unwrap();
149        assert!(!files.is_empty(), "Should find at least some source files");
150        // Should find our own walker.rs
151        assert!(
152            files.iter().any(|p| p.ends_with("walker.rs")),
153            "Should find walker.rs in the source tree"
154        );
155    }
156
157    #[test]
158    fn test_walk_parallel() {
159        let root = Path::new(env!("CARGO_MANIFEST_DIR")).join("src");
160        let config = SeekrConfig::default();
161        let result = walk_directory(&root, &config).unwrap();
162        assert!(!result.entries.is_empty());
163        assert!(result.duration.as_secs() < 10, "Scan should be fast");
164    }
165}