seekr_code/scanner/
walker.rs1use std::path::{Path, PathBuf};
7use std::sync::Mutex;
8
9use ignore::WalkBuilder;
10
11use crate::config::SeekrConfig;
12use crate::error::ScannerError;
13use crate::scanner::{ScanEntry, ScanResult};
14
15pub fn walk_directory(root: &Path, config: &SeekrConfig) -> Result<ScanResult, ScannerError> {
19 let start = std::time::Instant::now();
20
21 let mut builder = WalkBuilder::new(root);
22
23 builder
25 .hidden(true) .git_ignore(true)
27 .git_global(true)
28 .git_exclude(true)
29 .follow_links(false)
30 .threads(num_cpus());
31
32 let mut overrides_builder = ignore::overrides::OverrideBuilder::new(root);
34 for pattern in &config.exclude_patterns {
35 let exclude = format!("!{}", pattern);
37 overrides_builder.add(&exclude).map_err(|e| {
38 ScannerError::FilterError(format!("Invalid exclude pattern '{}': {}", pattern, e))
39 })?;
40 }
41 let overrides = overrides_builder
42 .build()
43 .map_err(|e| ScannerError::FilterError(format!("Failed to build overrides: {}", e)))?;
44 builder.overrides(overrides);
45
46 let entries_mutex: Mutex<Vec<ScanEntry>> = Mutex::new(Vec::new());
48 let skipped_mutex: Mutex<usize> = Mutex::new(0);
49
50 builder.build_parallel().run(|| {
51 Box::new(|entry| {
52 match entry {
53 Ok(dir_entry) => {
54 if dir_entry.file_type().is_some_and(|ft| ft.is_file()) {
56 let path = dir_entry.path().to_path_buf();
57
58 match dir_entry.metadata() {
60 Ok(metadata) => {
61 let size = metadata.len();
62
63 let scan_entry = ScanEntry {
67 path,
68 size,
69 modified: metadata.modified().ok(),
70 };
71 entries_mutex.lock().unwrap().push(scan_entry);
72 }
73 Err(_) => {
74 *skipped_mutex.lock().unwrap() += 1;
75 }
76 }
77 }
78 ignore::WalkState::Continue
79 }
80 Err(_) => {
81 *skipped_mutex.lock().unwrap() += 1;
82 ignore::WalkState::Continue
83 }
84 }
85 })
86 });
87
88 let entries = entries_mutex.into_inner().unwrap();
89 let skipped = skipped_mutex.into_inner().unwrap();
90 let duration = start.elapsed();
91
92 tracing::info!(
93 files = entries.len(),
94 skipped = skipped,
95 duration_ms = duration.as_millis(),
96 "Directory scan complete"
97 );
98
99 Ok(ScanResult {
100 entries,
101 skipped,
102 duration,
103 })
104}
105
106pub fn walk_directory_simple(root: &Path) -> Result<Vec<PathBuf>, ScannerError> {
108 let walker = WalkBuilder::new(root).hidden(true).git_ignore(true).build();
109
110 let mut files = Vec::new();
111 for entry in walker {
112 match entry {
113 Ok(dir_entry) => {
114 if dir_entry.file_type().is_some_and(|ft| ft.is_file()) {
115 files.push(dir_entry.path().to_path_buf());
116 }
117 }
118 Err(e) => {
119 tracing::warn!("Walk error: {}", e);
120 }
121 }
122 }
123
124 Ok(files)
125}
126
127fn num_cpus() -> usize {
129 std::thread::available_parallelism()
130 .map(|n| n.get())
131 .unwrap_or(4)
132}
133
134#[cfg(test)]
135mod tests {
136 use super::*;
137
138 #[test]
139 fn test_walk_simple() {
140 let root = Path::new(env!("CARGO_MANIFEST_DIR")).join("src");
142 let files = walk_directory_simple(&root).unwrap();
143 assert!(!files.is_empty(), "Should find at least some source files");
144 assert!(
146 files.iter().any(|p| p.ends_with("walker.rs")),
147 "Should find walker.rs in the source tree"
148 );
149 }
150
151 #[test]
152 fn test_walk_parallel() {
153 let root = Path::new(env!("CARGO_MANIFEST_DIR")).join("src");
154 let config = SeekrConfig::default();
155 let result = walk_directory(&root, &config).unwrap();
156 assert!(!result.entries.is_empty());
157 assert!(result.duration.as_secs() < 10, "Scan should be fast");
158 }
159}