1use crate::error::{Error, Result};
8use crate::hasher::{HashedInput, VcsHasher};
9use async_trait::async_trait;
10use globset::{Glob, GlobSet, GlobSetBuilder};
11use sha2::{Digest, Sha256};
12use std::collections::BTreeSet;
13use std::fs;
14use std::io::Read;
15use std::path::{Component, Path, PathBuf};
16use tracing::{debug, trace};
17use walkdir::WalkDir;
18
19#[derive(Debug, Clone)]
21pub struct WalkHasher {
22 workspace_root: PathBuf,
23}
24
25impl WalkHasher {
26 #[must_use]
28 pub fn new(workspace_root: impl AsRef<Path>) -> Self {
29 Self {
30 workspace_root: workspace_root.as_ref().to_path_buf(),
31 }
32 }
33
34 #[must_use]
36 pub fn workspace_root(&self) -> &Path {
37 &self.workspace_root
38 }
39
40 fn hash_file(path: &Path) -> Result<(String, u64)> {
41 let mut file = fs::File::open(path).map_err(|e| Error::io(e, path, "open"))?;
42 let mut hasher = Sha256::new();
43 let mut buf: Box<[u8]> = vec![0u8; 64 * 1024].into_boxed_slice();
44 let mut size: u64 = 0;
45 loop {
46 let n = file
47 .read(&mut buf)
48 .map_err(|e| Error::io(e, path, "read"))?;
49 if n == 0 {
50 break;
51 }
52 hasher.update(&buf[..n]);
53 size += n as u64;
54 }
55 Ok((hex::encode(hasher.finalize()), size))
56 }
57
58 fn resolve_sync(&self, patterns: &[String]) -> Result<Vec<HashedInput>> {
59 let mut explicit_files: Vec<String> = Vec::new();
60 let mut dirs_to_walk: Vec<(String, GlobSet)> = Vec::new();
61
62 for pat in patterns {
63 let trimmed = pat.trim();
64 if trimmed.is_empty() {
65 continue;
66 }
67 let looks_like_glob = trimmed.contains('*')
68 || trimmed.contains('{')
69 || trimmed.contains('?')
70 || trimmed.contains('[');
71 let abs = self.workspace_root.join(trimmed);
72
73 if looks_like_glob {
74 let base_dir = extract_glob_base(trimmed);
75 let glob = Glob::new(trimmed).map_err(|e| {
76 Error::pattern(format!("invalid glob pattern `{trimmed}`: {e}"))
77 })?;
78 let set = GlobSetBuilder::new()
79 .add(glob)
80 .build()
81 .map_err(|e| Error::pattern(format!("failed to build globset: {e}")))?;
82 dirs_to_walk.push((base_dir, set));
83 } else if abs.is_dir() {
84 let glob_pat = format!("{}/**/*", trimmed.trim_end_matches('/'));
85 let glob = Glob::new(&glob_pat).map_err(|e| {
86 Error::pattern(format!("invalid glob pattern `{glob_pat}`: {e}"))
87 })?;
88 let set = GlobSetBuilder::new()
89 .add(glob)
90 .build()
91 .map_err(|e| Error::pattern(format!("failed to build globset: {e}")))?;
92 dirs_to_walk.push((trimmed.to_string(), set));
93 } else {
94 explicit_files.push(trimmed.to_string());
95 }
96 }
97
98 let mut seen: BTreeSet<PathBuf> = BTreeSet::new();
99 let mut results: Vec<HashedInput> = Vec::new();
100
101 for raw in &explicit_files {
102 let abs = self.workspace_root.join(raw);
103 if abs.is_file() {
104 let rel = normalize_rel_path(Path::new(raw));
105 if seen.insert(rel.clone()) {
106 let (hash, size) = Self::hash_file(&abs)?;
107 results.push(HashedInput {
108 relative_path: rel,
109 absolute_path: canonical_or_abs(&abs),
110 sha256: hash,
111 size,
112 is_executable: is_executable(&abs)?,
113 });
114 }
115 } else {
116 return Err(Error::io(
117 std::io::Error::new(
118 std::io::ErrorKind::NotFound,
119 format!("explicit input file '{raw}' not found"),
120 ),
121 &abs,
122 "open",
123 ));
124 }
125 }
126
127 for (base_dir, globset) in &dirs_to_walk {
128 let walk_root = self.workspace_root.join(base_dir);
129 if !walk_root.exists() {
130 debug!(dir = %base_dir, "Directory does not exist, skipping");
131 continue;
132 }
133 for entry in WalkDir::new(&walk_root).follow_links(true) {
134 let entry = entry.map_err(|e| {
135 let path = e.path().unwrap_or(walk_root.as_path());
136 Error::io(
137 std::io::Error::new(
138 e.io_error()
139 .map_or(std::io::ErrorKind::Other, std::io::Error::kind),
140 format!("walkdir error under {}: {e}", walk_root.display()),
141 ),
142 path,
143 "walkdir",
144 )
145 })?;
146 let path = entry.path();
147 if path.is_dir() {
148 continue;
149 }
150 let Ok(rel) = path.strip_prefix(&self.workspace_root) else {
151 continue;
152 };
153 let rel_norm = normalize_rel_path(rel);
154 if globset.is_match(rel_norm.as_path()) && seen.insert(rel_norm.clone()) {
155 let (hash, size) = Self::hash_file(path)?;
156 results.push(HashedInput {
157 relative_path: rel_norm,
158 absolute_path: canonical_or_abs(path),
159 sha256: hash,
160 size,
161 is_executable: is_executable(path)?,
162 });
163 }
164 }
165 }
166
167 results.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
170 trace!(count = results.len(), "WalkHasher resolved inputs");
171 Ok(results)
172 }
173}
174
175#[async_trait]
176impl VcsHasher for WalkHasher {
177 async fn resolve_and_hash(&self, patterns: &[String]) -> Result<Vec<HashedInput>> {
178 self.resolve_sync(patterns)
182 }
183
184 fn name(&self) -> &'static str {
185 "walk"
186 }
187}
188
189fn normalize_rel_path(p: &Path) -> PathBuf {
192 let mut out = PathBuf::new();
193 for comp in p.components() {
194 match comp {
195 Component::ParentDir => {
196 out.pop();
197 }
198 Component::Normal(s) => out.push(s),
199 _ => {}
200 }
201 }
202 out
203}
204
205fn canonical_or_abs(p: &Path) -> PathBuf {
207 fs::canonicalize(p).unwrap_or_else(|_| {
208 if p.is_absolute() {
209 p.to_path_buf()
210 } else {
211 std::env::current_dir()
212 .unwrap_or_else(|_| PathBuf::from("."))
213 .join(p)
214 }
215 })
216}
217
218#[cfg(unix)]
219fn is_executable(path: &Path) -> Result<bool> {
220 use std::os::unix::fs::PermissionsExt;
221
222 let metadata = fs::metadata(path).map_err(|e| Error::io(e, path, "metadata"))?;
223 Ok(metadata.permissions().mode() & 0o111 != 0)
224}
225
226#[cfg(not(unix))]
227fn is_executable(_path: &Path) -> Result<bool> {
228 Ok(false)
229}
230
231fn extract_glob_base(pattern: &str) -> String {
237 let mut parts = Vec::new();
238 for segment in pattern.split('/') {
239 if segment.contains('*')
240 || segment.contains('{')
241 || segment.contains('?')
242 || segment.contains('[')
243 {
244 break;
245 }
246 if !segment.is_empty() {
247 parts.push(segment);
248 }
249 }
250 parts.join("/")
251}
252
253#[cfg(test)]
254mod tests {
255 use super::*;
256 use tempfile::TempDir;
257
258 #[test]
259 fn resolves_explicit_files_dirs_and_globs() {
260 let tmp = TempDir::new().unwrap();
261 let root = tmp.path();
262 fs::create_dir_all(root.join("src/sub")).unwrap();
263 fs::write(root.join("src/a.ts"), "A").unwrap();
264 fs::write(root.join("src/sub/b.ts"), "B").unwrap();
265 fs::write(root.join("README.md"), "readme").unwrap();
266
267 let hasher = WalkHasher::new(root);
268 let inputs = hasher
269 .resolve_sync(&["src".into(), "README.md".into(), "**/*.ts".into()])
270 .unwrap();
271 let rels: Vec<String> = inputs
272 .iter()
273 .map(|f| f.relative_path.to_string_lossy().into_owned())
274 .collect();
275 assert!(rels.contains(&"src/a.ts".to_string()));
276 assert!(rels.contains(&"src/sub/b.ts".to_string()));
277 assert!(rels.contains(&"README.md".to_string()));
278 }
279
280 #[test]
281 fn deduplicates_overlapping_patterns() {
282 let tmp = TempDir::new().unwrap();
283 fs::write(tmp.path().join("a.txt"), "content").unwrap();
284 let hasher = WalkHasher::new(tmp.path());
285 let inputs = hasher
286 .resolve_sync(&["a.txt".into(), "*.txt".into()])
287 .unwrap();
288 assert_eq!(inputs.len(), 1);
289 }
290
291 #[test]
292 fn empty_and_whitespace_patterns_are_ignored() {
293 let tmp = TempDir::new().unwrap();
294 fs::write(tmp.path().join("a.txt"), "content").unwrap();
295 let hasher = WalkHasher::new(tmp.path());
296 let inputs = hasher.resolve_sync(&[String::new(), " ".into()]).unwrap();
297 assert!(inputs.is_empty());
298 }
299
300 #[test]
301 fn missing_file_errors() {
302 let tmp = TempDir::new().unwrap();
303 let hasher = WalkHasher::new(tmp.path());
304 let err = hasher
305 .resolve_sync(&["nonexistent.txt".into()])
306 .unwrap_err();
307 assert!(matches!(
308 err,
309 Error::Io { source, .. } if source.kind() == std::io::ErrorKind::NotFound
310 ));
311 }
312
313 #[test]
314 fn same_content_yields_same_hash() {
315 let tmp = TempDir::new().unwrap();
316 fs::write(tmp.path().join("a.txt"), "payload").unwrap();
317 fs::write(tmp.path().join("b.txt"), "payload").unwrap();
318 let hasher = WalkHasher::new(tmp.path());
319 let inputs = hasher.resolve_sync(&["*.txt".into()]).unwrap();
320 assert_eq!(inputs.len(), 2);
321 assert_eq!(inputs[0].sha256, inputs[1].sha256);
322 }
323
324 #[test]
325 fn different_content_yields_different_hash() {
326 let tmp = TempDir::new().unwrap();
327 fs::write(tmp.path().join("a.txt"), "one").unwrap();
328 fs::write(tmp.path().join("b.txt"), "two").unwrap();
329 let hasher = WalkHasher::new(tmp.path());
330 let inputs = hasher.resolve_sync(&["*.txt".into()]).unwrap();
331 assert_eq!(inputs.len(), 2);
332 assert_ne!(inputs[0].sha256, inputs[1].sha256);
333 }
334
335 #[test]
336 fn results_are_sorted_by_relative_path() {
337 let tmp = TempDir::new().unwrap();
338 for name in ["c.txt", "a.txt", "b.txt"] {
339 fs::write(tmp.path().join(name), name).unwrap();
340 }
341 let hasher = WalkHasher::new(tmp.path());
342 let inputs = hasher.resolve_sync(&["*.txt".into()]).unwrap();
343 let names: Vec<String> = inputs
344 .iter()
345 .map(|i| i.relative_path.to_string_lossy().into_owned())
346 .collect();
347 assert_eq!(names, vec!["a.txt", "b.txt", "c.txt"]);
348 }
349
350 #[test]
351 fn nested_directory_walks_recursively() {
352 let tmp = TempDir::new().unwrap();
353 fs::create_dir_all(tmp.path().join("a/b/c")).unwrap();
354 fs::write(tmp.path().join("a/b/c/deep.txt"), "deep").unwrap();
355 let hasher = WalkHasher::new(tmp.path());
356 let inputs = hasher.resolve_sync(&["a".into()]).unwrap();
357 assert_eq!(inputs.len(), 1);
358 assert_eq!(inputs[0].relative_path, PathBuf::from("a/b/c/deep.txt"));
359 }
360
361 #[test]
362 fn glob_brackets_work() {
363 let tmp = TempDir::new().unwrap();
364 for name in ["a1.txt", "a2.txt", "b1.txt"] {
365 fs::write(tmp.path().join(name), name).unwrap();
366 }
367 let hasher = WalkHasher::new(tmp.path());
368 let inputs = hasher.resolve_sync(&["a[12].txt".into()]).unwrap();
369 assert_eq!(inputs.len(), 2);
370 }
371
372 #[cfg(unix)]
373 #[test]
374 fn walkdir_errors_are_not_silently_dropped() {
375 use std::os::unix::fs::PermissionsExt;
376
377 let tmp = TempDir::new().unwrap();
378 let unreadable = tmp.path().join("restricted");
379 fs::create_dir_all(&unreadable).unwrap();
380 fs::write(unreadable.join("secret.txt"), "secret").unwrap();
381
382 let mut permissions = fs::metadata(&unreadable).unwrap().permissions();
383 permissions.set_mode(0o000);
384 fs::set_permissions(&unreadable, permissions).unwrap();
385
386 let hasher = WalkHasher::new(tmp.path());
387 let err = hasher.resolve_sync(&["restricted".into()]).unwrap_err();
388
389 let mut cleanup_permissions = fs::metadata(&unreadable).unwrap().permissions();
390 cleanup_permissions.set_mode(0o755);
391 fs::set_permissions(&unreadable, cleanup_permissions).unwrap();
392
393 assert!(err.to_string().contains("walkdir"));
394 }
395
396 #[test]
397 fn walker_name_is_walk() {
398 let tmp = TempDir::new().unwrap();
399 let hasher = WalkHasher::new(tmp.path());
400 assert_eq!(hasher.name(), "walk");
401 }
402
403 #[tokio::test]
404 async fn async_trait_method_works() {
405 let tmp = TempDir::new().unwrap();
406 fs::write(tmp.path().join("x.txt"), "x").unwrap();
407 let hasher = WalkHasher::new(tmp.path());
408 let inputs = hasher.resolve_and_hash(&["*.txt".into()]).await.unwrap();
409 assert_eq!(inputs.len(), 1);
410 }
411
412 #[test]
413 fn extract_glob_base_handles_common_shapes() {
414 assert_eq!(extract_glob_base("src/**/*.ts"), "src");
415 assert_eq!(extract_glob_base("**/*.ts"), "");
416 assert_eq!(extract_glob_base("foo/bar/*.rs"), "foo/bar");
417 assert_eq!(extract_glob_base("*.txt"), "");
418 }
419
420 #[test]
421 fn normalize_rel_path_strips_dots() {
422 assert_eq!(normalize_rel_path(Path::new("./a/b")), PathBuf::from("a/b"));
423 assert_eq!(normalize_rel_path(Path::new("a/../b")), PathBuf::from("b"));
424 }
425}