seekr_code/scanner/
walker.rs1use std::path::{Path, PathBuf};
7use std::sync::Mutex;
8
9use ignore::WalkBuilder;
10
11use crate::config::SeekrConfig;
12use crate::error::ScannerError;
13use crate::scanner::{ScanEntry, ScanResult};
14
15pub fn walk_directory(
19 root: &Path,
20 config: &SeekrConfig,
21) -> Result<ScanResult, ScannerError> {
22 let start = std::time::Instant::now();
23
24 let mut builder = WalkBuilder::new(root);
25
26 builder
28 .hidden(true) .git_ignore(true)
30 .git_global(true)
31 .git_exclude(true)
32 .follow_links(false)
33 .threads(num_cpus());
34
35 let mut overrides_builder = ignore::overrides::OverrideBuilder::new(root);
37 for pattern in &config.exclude_patterns {
38 let exclude = format!("!{}", pattern);
40 overrides_builder
41 .add(&exclude)
42 .map_err(|e| ScannerError::FilterError(format!("Invalid exclude pattern '{}': {}", pattern, e)))?;
43 }
44 let overrides = overrides_builder
45 .build()
46 .map_err(|e| ScannerError::FilterError(format!("Failed to build overrides: {}", e)))?;
47 builder.overrides(overrides);
48
49 let entries_mutex: Mutex<Vec<ScanEntry>> = Mutex::new(Vec::new());
51 let skipped_mutex: Mutex<usize> = Mutex::new(0);
52
53 builder.build_parallel().run(|| {
54 Box::new(|entry| {
55 match entry {
56 Ok(dir_entry) => {
57 if dir_entry.file_type().map_or(false, |ft| ft.is_file()) {
59 let path = dir_entry.path().to_path_buf();
60
61 match dir_entry.metadata() {
63 Ok(metadata) => {
64 let size = metadata.len();
65
66 let scan_entry = ScanEntry {
70 path,
71 size,
72 modified: metadata.modified().ok(),
73 };
74 entries_mutex.lock().unwrap().push(scan_entry);
75 }
76 Err(_) => {
77 *skipped_mutex.lock().unwrap() += 1;
78 }
79 }
80 }
81 ignore::WalkState::Continue
82 }
83 Err(_) => {
84 *skipped_mutex.lock().unwrap() += 1;
85 ignore::WalkState::Continue
86 }
87 }
88 })
89 });
90
91 let entries = entries_mutex.into_inner().unwrap();
92 let skipped = skipped_mutex.into_inner().unwrap();
93 let duration = start.elapsed();
94
95 tracing::info!(
96 files = entries.len(),
97 skipped = skipped,
98 duration_ms = duration.as_millis(),
99 "Directory scan complete"
100 );
101
102 Ok(ScanResult {
103 entries,
104 skipped,
105 duration,
106 })
107}
108
109pub fn walk_directory_simple(root: &Path) -> Result<Vec<PathBuf>, ScannerError> {
111 let walker = WalkBuilder::new(root)
112 .hidden(true)
113 .git_ignore(true)
114 .build();
115
116 let mut files = Vec::new();
117 for entry in walker {
118 match entry {
119 Ok(dir_entry) => {
120 if dir_entry.file_type().map_or(false, |ft| ft.is_file()) {
121 files.push(dir_entry.path().to_path_buf());
122 }
123 }
124 Err(e) => {
125 tracing::warn!("Walk error: {}", e);
126 }
127 }
128 }
129
130 Ok(files)
131}
132
133fn num_cpus() -> usize {
135 std::thread::available_parallelism()
136 .map(|n| n.get())
137 .unwrap_or(4)
138}
139
140#[cfg(test)]
141mod tests {
142 use super::*;
143
144 #[test]
145 fn test_walk_simple() {
146 let root = Path::new(env!("CARGO_MANIFEST_DIR")).join("src");
148 let files = walk_directory_simple(&root).unwrap();
149 assert!(!files.is_empty(), "Should find at least some source files");
150 assert!(
152 files.iter().any(|p| p.ends_with("walker.rs")),
153 "Should find walker.rs in the source tree"
154 );
155 }
156
157 #[test]
158 fn test_walk_parallel() {
159 let root = Path::new(env!("CARGO_MANIFEST_DIR")).join("src");
160 let config = SeekrConfig::default();
161 let result = walk_directory(&root, &config).unwrap();
162 assert!(!result.entries.is_empty());
163 assert!(result.duration.as_secs() < 10, "Scan should be fast");
164 }
165}