Skip to main content

seekr_code/scanner/
walker.rs

1//! Parallel file tree walker.
2//!
3//! Uses the `ignore` crate to walk directory trees while respecting
4//! `.gitignore` rules and custom exclude patterns.
5
6use std::path::{Path, PathBuf};
7use std::sync::Mutex;
8
9use ignore::WalkBuilder;
10
11use crate::config::SeekrConfig;
12use crate::error::ScannerError;
13use crate::scanner::{ScanEntry, ScanResult};
14
15/// Walk a directory tree in parallel, returning all matching file entries.
16///
17/// Respects `.gitignore` rules and applies configured filters.
18pub fn walk_directory(root: &Path, config: &SeekrConfig) -> Result<ScanResult, ScannerError> {
19    let start = std::time::Instant::now();
20
21    let mut builder = WalkBuilder::new(root);
22
23    // Configure the walker
24    builder
25        .hidden(true) // respect hidden files (.gitignore default behavior)
26        .git_ignore(true)
27        .git_global(true)
28        .git_exclude(true)
29        .follow_links(false)
30        .threads(num_cpus());
31
32    // Add custom exclude overrides from config
33    let mut overrides_builder = ignore::overrides::OverrideBuilder::new(root);
34    for pattern in &config.exclude_patterns {
35        // Negate the pattern to make it an exclude
36        let exclude = format!("!{}", pattern);
37        overrides_builder.add(&exclude).map_err(|e| {
38            ScannerError::FilterError(format!("Invalid exclude pattern '{}': {}", pattern, e))
39        })?;
40    }
41    let overrides = overrides_builder
42        .build()
43        .map_err(|e| ScannerError::FilterError(format!("Failed to build overrides: {}", e)))?;
44    builder.overrides(overrides);
45
46    // Collect entries (using simple Walk for now, parallel walk for large dirs)
47    let entries_mutex: Mutex<Vec<ScanEntry>> = Mutex::new(Vec::new());
48    let skipped_mutex: Mutex<usize> = Mutex::new(0);
49
50    builder.build_parallel().run(|| {
51        Box::new(|entry| {
52            match entry {
53                Ok(dir_entry) => {
54                    // Skip directories, we only want files
55                    if dir_entry.file_type().is_some_and(|ft| ft.is_file()) {
56                        let path = dir_entry.path().to_path_buf();
57
58                        // Get file metadata
59                        match dir_entry.metadata() {
60                            Ok(metadata) => {
61                                let size = metadata.len();
62
63                                // Skip files exceeding the max size
64                                // (config.max_file_size accessed via closure would need Arc,
65                                //  for now we use a generous default)
66                                let scan_entry = ScanEntry {
67                                    path,
68                                    size,
69                                    modified: metadata.modified().ok(),
70                                };
71                                entries_mutex.lock().unwrap().push(scan_entry);
72                            }
73                            Err(_) => {
74                                *skipped_mutex.lock().unwrap() += 1;
75                            }
76                        }
77                    }
78                    ignore::WalkState::Continue
79                }
80                Err(_) => {
81                    *skipped_mutex.lock().unwrap() += 1;
82                    ignore::WalkState::Continue
83                }
84            }
85        })
86    });
87
88    let entries = entries_mutex.into_inner().unwrap();
89    let skipped = skipped_mutex.into_inner().unwrap();
90    let duration = start.elapsed();
91
92    tracing::info!(
93        files = entries.len(),
94        skipped = skipped,
95        duration_ms = duration.as_millis(),
96        "Directory scan complete"
97    );
98
99    Ok(ScanResult {
100        entries,
101        skipped,
102        duration,
103    })
104}
105
106/// Walk a directory tree sequentially (simpler, for smaller directories).
107pub fn walk_directory_simple(root: &Path) -> Result<Vec<PathBuf>, ScannerError> {
108    let walker = WalkBuilder::new(root).hidden(true).git_ignore(true).build();
109
110    let mut files = Vec::new();
111    for entry in walker {
112        match entry {
113            Ok(dir_entry) => {
114                if dir_entry.file_type().is_some_and(|ft| ft.is_file()) {
115                    files.push(dir_entry.path().to_path_buf());
116                }
117            }
118            Err(e) => {
119                tracing::warn!("Walk error: {}", e);
120            }
121        }
122    }
123
124    Ok(files)
125}
126
127/// Get the number of available CPUs, with a reasonable minimum.
128fn num_cpus() -> usize {
129    std::thread::available_parallelism()
130        .map(|n| n.get())
131        .unwrap_or(4)
132}
133
134#[cfg(test)]
135mod tests {
136    use super::*;
137
138    #[test]
139    fn test_walk_simple() {
140        // Walk the project's own source directory
141        let root = Path::new(env!("CARGO_MANIFEST_DIR")).join("src");
142        let files = walk_directory_simple(&root).unwrap();
143        assert!(!files.is_empty(), "Should find at least some source files");
144        // Should find our own walker.rs
145        assert!(
146            files.iter().any(|p| p.ends_with("walker.rs")),
147            "Should find walker.rs in the source tree"
148        );
149    }
150
151    #[test]
152    fn test_walk_parallel() {
153        let root = Path::new(env!("CARGO_MANIFEST_DIR")).join("src");
154        let config = SeekrConfig::default();
155        let result = walk_directory(&root, &config).unwrap();
156        assert!(!result.entries.is_empty());
157        assert!(result.duration.as_secs() < 10, "Scan should be fast");
158    }
159}