1use crate::{Error, Result};
2use globset::{Glob, GlobSet, GlobSetBuilder};
3use sha2::{Digest, Sha256};
4use std::collections::{BTreeMap, BTreeSet};
5use std::fs;
6use std::io::Read;
7use std::path::{Component, Path, PathBuf};
8use tracing;
9use walkdir::WalkDir;
10
11#[derive(Debug, Clone)]
12pub struct ResolvedInputFile {
13 pub rel_path: PathBuf,
14 pub source_path: PathBuf,
15 pub sha256: String,
16 pub size: u64,
17}
18
19#[derive(Debug, Clone)]
20pub struct ResolvedInputs {
21 pub files: Vec<ResolvedInputFile>,
22}
23
24impl ResolvedInputs {
25 pub fn to_summary_map(&self) -> BTreeMap<String, String> {
26 let mut map = BTreeMap::new();
27 for f in &self.files {
28 map.insert(
29 normalize_rel_path(&f.rel_path)
30 .to_string_lossy()
31 .to_string(),
32 f.sha256.clone(),
33 );
34 }
35 map
36 }
37}
38
39fn normalize_rel_path(p: &Path) -> PathBuf {
40 let mut out = PathBuf::new();
41 for comp in p.components() {
42 match comp {
43 Component::CurDir => {}
44 Component::ParentDir => {
45 out.pop();
46 }
47 Component::Normal(s) => out.push(s),
48 _ => {}
49 }
50 }
51 out
52}
53
54pub fn sha256_file(path: &Path) -> Result<(String, u64)> {
55 let _span = tracing::trace_span!("sha256_file", path = %path.display()).entered();
56 let mut file = fs::File::open(path).map_err(|e| Error::Io {
57 source: e,
58 path: Some(path.into()),
59 operation: "open".into(),
60 })?;
61 let mut hasher = Sha256::new();
62 let mut buf = [0u8; 1024 * 64];
63 let mut total: u64 = 0;
64 loop {
65 let n = file.read(&mut buf).map_err(|e| Error::Io {
66 source: e,
67 path: Some(path.into()),
68 operation: "read".into(),
69 })?;
70 if n == 0 {
71 break;
72 }
73 hasher.update(&buf[..n]);
74 total += n as u64;
75 }
76 let digest = hasher.finalize();
77 tracing::trace!(path = %path.display(), size = total, "Hashed file");
78 Ok((hex::encode(digest), total))
79}
80
81pub struct InputResolver {
82 project_root: PathBuf,
83}
84
85impl InputResolver {
86 pub fn new(project_root: impl AsRef<Path>) -> Self {
87 Self {
88 project_root: project_root.as_ref().to_path_buf(),
89 }
90 }
91
92 pub fn resolve(&self, patterns: &[String]) -> Result<ResolvedInputs> {
93 let resolve_span = tracing::info_span!(
94 "input_resolver.resolve",
95 root = %self.project_root.display(),
96 pattern_count = patterns.len()
97 );
98 let _resolve_guard = resolve_span.enter();
99
100 tracing::debug!(
101 patterns = ?patterns,
102 "Starting input resolution"
103 );
104
105 let mut explicit_files: Vec<String> = Vec::new();
107 let mut dirs_to_walk: Vec<(String, GlobSet)> = Vec::new(); let pattern_span = tracing::debug_span!("patterns.analyze");
110 {
111 let _g = pattern_span.enter();
112 for pat in patterns {
113 let p = pat.trim();
114 if p.is_empty() {
115 continue;
116 }
117
118 let looks_like_glob =
119 p.contains('*') || p.contains('{') || p.contains('?') || p.contains('[');
120 let abs = self.project_root.join(p);
121
122 if looks_like_glob {
123 let base_dir = extract_glob_base(p);
125 let glob_pat = p.to_string();
126 let glob = Glob::new(&glob_pat).map_err(|e| {
127 Error::configuration(format!("Invalid glob pattern '{glob_pat}': {e}"))
128 })?;
129 let set = GlobSetBuilder::new().add(glob).build().map_err(|e| {
130 Error::configuration(format!("Failed to build glob set: {e}"))
131 })?;
132 dirs_to_walk.push((base_dir, set));
133 } else if abs.is_dir() {
134 let glob_pat = format!("{}/**/*", p.trim_end_matches('/'));
136 let glob = Glob::new(&glob_pat).map_err(|e| {
137 Error::configuration(format!("Invalid glob pattern '{glob_pat}': {e}"))
138 })?;
139 let set = GlobSetBuilder::new().add(glob).build().map_err(|e| {
140 Error::configuration(format!("Failed to build glob set: {e}"))
141 })?;
142 dirs_to_walk.push((p.to_string(), set));
143 } else {
144 explicit_files.push(p.to_string());
146 }
147 }
148
149 tracing::debug!(
150 explicit_file_count = explicit_files.len(),
151 dirs_to_walk_count = dirs_to_walk.len(),
152 "Categorized input patterns"
153 );
154 }
155
156 let mut seen: BTreeSet<PathBuf> = BTreeSet::new();
157 let mut files: Vec<ResolvedInputFile> = Vec::new();
158
159 let explicit_span =
161 tracing::debug_span!("explicit_files.resolve", count = explicit_files.len());
162 {
163 let _g = explicit_span.enter();
164 for raw in &explicit_files {
165 let abs = self.project_root.join(raw);
166 if abs.is_file() {
167 let rel = normalize_rel_path(Path::new(raw));
168 if seen.insert(rel.clone()) {
169 let (hash, size) = sha256_file(&abs)?;
170 files.push(ResolvedInputFile {
171 rel_path: rel,
172 source_path: canonical_or_abs(&abs)?,
173 sha256: hash,
174 size,
175 });
176 }
177 } else {
178 tracing::warn!(path = %raw, "Explicit input file not found");
179 }
180 }
181 tracing::debug!(
182 explicit_files_found = files.len(),
183 "Explicit files resolved"
184 );
185 }
186
187 if !dirs_to_walk.is_empty() {
189 let walkdir_span =
190 tracing::info_span!("walkdir.traverse", dirs_count = dirs_to_walk.len());
191 let _g = walkdir_span.enter();
192
193 let mut total_entries_visited: u64 = 0;
194 let mut total_files_matched: u64 = 0;
195 let mut total_bytes_hashed: u64 = 0;
196
197 for (base_dir, globset) in &dirs_to_walk {
198 let walk_root = self.project_root.join(base_dir);
199 if !walk_root.exists() {
200 tracing::debug!(dir = %base_dir, "Directory does not exist, skipping");
201 continue;
202 }
203
204 tracing::debug!(dir = %base_dir, "Walking directory for glob matches");
205
206 for entry in WalkDir::new(&walk_root)
207 .follow_links(true)
208 .into_iter()
209 .filter_map(|e| e.ok())
210 {
211 total_entries_visited += 1;
212 let path = entry.path();
213 if path.is_dir() {
214 continue;
215 }
216
217 let rel = match path.strip_prefix(&self.project_root) {
219 Ok(p) => p,
220 Err(_) => continue,
221 };
222 let rel_norm = normalize_rel_path(rel);
223
224 if globset.is_match(rel_norm.as_path()) && seen.insert(rel_norm.clone()) {
226 total_files_matched += 1;
227 let src = canonical_or_abs(path)?;
228 let (hash, size) = sha256_file(&src)?;
229 total_bytes_hashed += size;
230 files.push(ResolvedInputFile {
231 rel_path: rel_norm,
232 source_path: src,
233 sha256: hash,
234 size,
235 });
236 }
237 }
238 }
239
240 tracing::info!(
241 entries_visited = total_entries_visited,
242 files_matched = total_files_matched,
243 total_bytes_hashed,
244 "WalkDir traversal complete"
245 );
246 } else {
247 tracing::debug!("No directories to walk, skipping WalkDir");
248 }
249
250 files.sort_by(|a, b| a.rel_path.cmp(&b.rel_path));
252
253 tracing::info!(total_files = files.len(), "Input resolution complete");
254
255 Ok(ResolvedInputs { files })
256 }
257}
258
259fn extract_glob_base(pattern: &str) -> String {
265 let mut base_parts = Vec::new();
266 for part in pattern.split('/') {
267 if part.contains('*') || part.contains('{') || part.contains('?') || part.contains('[') {
268 break;
269 }
270 if !part.is_empty() {
271 base_parts.push(part);
272 }
273 }
274 base_parts.join("/")
275}
276
277fn canonical_or_abs(p: &Path) -> Result<PathBuf> {
278 match fs::canonicalize(p) {
280 Ok(c) => Ok(c),
281 Err(_) => Ok(p.absolutize()),
282 }
283}
284
285trait Absolutize {
286 fn absolutize(&self) -> PathBuf;
287}
288impl Absolutize for &Path {
289 fn absolutize(&self) -> PathBuf {
290 if self.is_absolute() {
291 self.to_path_buf()
292 } else {
293 std::env::current_dir()
294 .unwrap_or_else(|_| PathBuf::from("."))
295 .join(self)
296 }
297 }
298}
299
300pub fn populate_hermetic_dir(resolved: &ResolvedInputs, hermetic_root: &Path) -> Result<()> {
301 for f in &resolved.files {
303 let dest = hermetic_root.join(&f.rel_path);
304 if let Some(parent) = dest.parent() {
305 fs::create_dir_all(parent).map_err(|e| Error::Io {
306 source: e,
307 path: Some(parent.into()),
308 operation: "create_dir_all".into(),
309 })?;
310 }
311 match fs::hard_link(&f.source_path, &dest) {
313 Ok(_) => {}
314 Err(_e) => {
315 fs::copy(&f.source_path, &dest).map_err(|e2| Error::Io {
317 source: e2,
318 path: Some(dest.into()),
319 operation: "copy".into(),
320 })?;
321 }
322 }
323 }
324 Ok(())
325}
326
327pub fn collect_outputs(hermetic_root: &Path, patterns: &[String]) -> Result<Vec<PathBuf>> {
328 if patterns.is_empty() {
329 return Ok(vec![]);
330 }
331 let mut builder = GlobSetBuilder::new();
332 for p in patterns {
333 let looks_like_glob =
334 p.contains('*') || p.contains('{') || p.contains('?') || p.contains('[');
335 let mut pat = p.clone();
336 let abs = hermetic_root.join(&pat);
337 if abs.is_dir() && !looks_like_glob {
338 pat = format!("{}/**/*", pat.trim_end_matches('/'));
339 }
340 let glob = Glob::new(&pat)
341 .map_err(|e| Error::configuration(format!("Invalid output glob '{pat}': {e}")))?;
342 builder.add(glob);
343 }
344 let set = builder
345 .build()
346 .map_err(|e| Error::configuration(format!("Failed to build output globset: {e}")))?;
347
348 let mut results = Vec::new();
349 for entry in WalkDir::new(hermetic_root)
350 .into_iter()
351 .filter_map(|e| e.ok())
352 {
353 let path = entry.path();
354 if path.is_dir() {
355 continue;
356 }
357 let rel = match path.strip_prefix(hermetic_root) {
358 Ok(p) => p,
359 Err(_) => continue,
360 };
361 if set.is_match(rel) {
362 results.push(rel.to_path_buf());
363 }
364 }
365 results.sort();
366 Ok(results)
367}
368
369#[cfg(test)]
370mod tests {
371 use super::*;
372 use tempfile::TempDir;
373
374 #[test]
375 fn resolves_files_dirs_and_globs() {
376 let tmp = TempDir::new().unwrap();
377 let root = tmp.path();
378 std::fs::create_dir_all(root.join("src/sub")).unwrap();
380 std::fs::write(root.join("src/a.ts"), "A").unwrap();
381 std::fs::write(root.join("src/sub/b.ts"), "B").unwrap();
382 std::fs::write(root.join("README.md"), "readme").unwrap();
383
384 let resolver = InputResolver::new(root);
385 let inputs = resolver
386 .resolve(&["src".into(), "README.md".into(), "**/*.ts".into()])
387 .unwrap();
388 let rels: Vec<String> = inputs
389 .files
390 .iter()
391 .map(|f| f.rel_path.to_string_lossy().to_string())
392 .collect();
393 assert!(rels.contains(&"src/a.ts".to_string()));
394 assert!(rels.contains(&"src/sub/b.ts".to_string()));
395 assert!(rels.contains(&"README.md".to_string()));
396 }
397
398 #[cfg(unix)]
399 #[test]
400 fn resolves_symlink_targets() {
401 use std::os::unix::fs as unixfs;
402 let tmp = TempDir::new().unwrap();
403 let root = tmp.path();
404 std::fs::create_dir_all(root.join("data")).unwrap();
405 std::fs::write(root.join("data/real.txt"), "hello").unwrap();
406 unixfs::symlink("real.txt", root.join("data/link.txt")).unwrap();
407 let resolver = InputResolver::new(root);
408 let inputs = resolver.resolve(&["data/link.txt".into()]).unwrap();
409 assert_eq!(inputs.files.len(), 1);
410 assert!(inputs.files[0].source_path.ends_with("real.txt"));
411 }
412
413 #[test]
414 fn populates_hermetic_dir() {
415 let tmp = TempDir::new().unwrap();
416 let root = tmp.path();
417 std::fs::create_dir_all(root.join("dir")).unwrap();
418 std::fs::write(root.join("dir/x.txt"), "x").unwrap();
419 let resolver = InputResolver::new(root);
420 let resolved = resolver.resolve(&["dir".into()]).unwrap();
421 let herm = TempDir::new().unwrap();
422 populate_hermetic_dir(&resolved, herm.path()).unwrap();
423 assert!(herm.path().join("dir/x.txt").exists());
424 }
425
426 #[test]
427 fn test_resolved_input_file_fields() {
428 let file = ResolvedInputFile {
429 rel_path: PathBuf::from("src/main.rs"),
430 source_path: PathBuf::from("/project/src/main.rs"),
431 sha256: "abc123".to_string(),
432 size: 1024,
433 };
434 assert_eq!(file.rel_path, PathBuf::from("src/main.rs"));
435 assert_eq!(file.sha256, "abc123");
436 assert_eq!(file.size, 1024);
437 }
438
439 #[test]
440 fn test_resolved_inputs_to_summary_map() {
441 let inputs = ResolvedInputs {
442 files: vec![
443 ResolvedInputFile {
444 rel_path: PathBuf::from("a.txt"),
445 source_path: PathBuf::from("/a.txt"),
446 sha256: "hash_a".to_string(),
447 size: 10,
448 },
449 ResolvedInputFile {
450 rel_path: PathBuf::from("b.txt"),
451 source_path: PathBuf::from("/b.txt"),
452 sha256: "hash_b".to_string(),
453 size: 20,
454 },
455 ],
456 };
457 let map = inputs.to_summary_map();
458 assert_eq!(map.len(), 2);
459 assert_eq!(map.get("a.txt"), Some(&"hash_a".to_string()));
460 assert_eq!(map.get("b.txt"), Some(&"hash_b".to_string()));
461 }
462
463 #[test]
464 fn test_normalize_rel_path() {
465 assert_eq!(normalize_rel_path(Path::new("./a/b")), PathBuf::from("a/b"));
466 assert_eq!(normalize_rel_path(Path::new("a/../b")), PathBuf::from("b"));
467 assert_eq!(
468 normalize_rel_path(Path::new("./a/./b/../c")),
469 PathBuf::from("a/c")
470 );
471 assert_eq!(
472 normalize_rel_path(Path::new("a/b/c")),
473 PathBuf::from("a/b/c")
474 );
475 }
476
477 #[test]
478 fn test_sha256_file() {
479 let tmp = TempDir::new().unwrap();
480 let path = tmp.path().join("test.txt");
481 std::fs::write(&path, "hello world").unwrap();
482
483 let (hash, size) = sha256_file(&path).unwrap();
484 assert!(!hash.is_empty());
485 assert_eq!(size, 11); assert_eq!(
488 hash,
489 "b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9"
490 );
491 }
492
493 #[test]
494 fn test_sha256_file_empty() {
495 let tmp = TempDir::new().unwrap();
496 let path = tmp.path().join("empty.txt");
497 std::fs::write(&path, "").unwrap();
498
499 let (hash, size) = sha256_file(&path).unwrap();
500 assert!(!hash.is_empty());
501 assert_eq!(size, 0);
502 }
503
504 #[test]
505 fn test_sha256_file_not_found() {
506 let result = sha256_file(Path::new("/nonexistent/file.txt"));
507 assert!(result.is_err());
508 }
509
510 #[test]
511 fn test_extract_glob_base() {
512 assert_eq!(extract_glob_base("src/**/*.ts"), "src");
513 assert_eq!(extract_glob_base("**/*.ts"), "");
514 assert_eq!(extract_glob_base("foo/bar/*.rs"), "foo/bar");
515 assert_eq!(extract_glob_base("*.txt"), "");
516 assert_eq!(extract_glob_base("a/b/c/*.txt"), "a/b/c");
517 assert_eq!(extract_glob_base("a/{b,c}/*.txt"), "a");
518 assert_eq!(extract_glob_base("a/b?/*.txt"), "a");
519 }
520
521 #[test]
522 fn test_input_resolver_empty_patterns() {
523 let tmp = TempDir::new().unwrap();
524 let resolver = InputResolver::new(tmp.path());
525 let inputs = resolver.resolve(&[]).unwrap();
526 assert!(inputs.files.is_empty());
527 }
528
529 #[test]
530 fn test_input_resolver_whitespace_patterns() {
531 let tmp = TempDir::new().unwrap();
532 std::fs::write(tmp.path().join("a.txt"), "content").unwrap();
533 let resolver = InputResolver::new(tmp.path());
534 let inputs = resolver.resolve(&["".into(), " ".into()]).unwrap();
535 assert!(inputs.files.is_empty());
536 }
537
538 #[test]
539 fn test_input_resolver_missing_file() {
540 let tmp = TempDir::new().unwrap();
541 let resolver = InputResolver::new(tmp.path());
542 let inputs = resolver.resolve(&["nonexistent.txt".into()]).unwrap();
544 assert!(inputs.files.is_empty());
545 }
546
547 #[test]
548 fn test_input_resolver_missing_directory() {
549 let tmp = TempDir::new().unwrap();
550 let resolver = InputResolver::new(tmp.path());
551 let inputs = resolver.resolve(&["nonexistent/**/*.txt".into()]).unwrap();
553 assert!(inputs.files.is_empty());
554 }
555
556 #[test]
557 fn test_input_resolver_deduplicates() {
558 let tmp = TempDir::new().unwrap();
559 std::fs::write(tmp.path().join("a.txt"), "content").unwrap();
560 let resolver = InputResolver::new(tmp.path());
561 let inputs = resolver.resolve(&["a.txt".into(), "*.txt".into()]).unwrap();
563 assert_eq!(inputs.files.len(), 1);
564 }
565
566 #[test]
567 fn test_collect_outputs_empty() {
568 let tmp = TempDir::new().unwrap();
569 let outputs = collect_outputs(tmp.path(), &[]).unwrap();
570 assert!(outputs.is_empty());
571 }
572
573 #[test]
574 fn test_collect_outputs_with_files() {
575 let tmp = TempDir::new().unwrap();
576 std::fs::create_dir_all(tmp.path().join("build")).unwrap();
577 std::fs::write(tmp.path().join("build/output.js"), "code").unwrap();
578 std::fs::write(tmp.path().join("build/output.css"), "styles").unwrap();
579
580 let outputs = collect_outputs(tmp.path(), &["build/*.js".into()]).unwrap();
581 assert_eq!(outputs.len(), 1);
582 assert_eq!(outputs[0], PathBuf::from("build/output.js"));
583 }
584
585 #[test]
586 fn test_collect_outputs_directory_pattern() {
587 let tmp = TempDir::new().unwrap();
588 std::fs::create_dir_all(tmp.path().join("dist/nested")).unwrap();
589 std::fs::write(tmp.path().join("dist/a.txt"), "a").unwrap();
590 std::fs::write(tmp.path().join("dist/nested/b.txt"), "b").unwrap();
591
592 let outputs = collect_outputs(tmp.path(), &["dist".into()]).unwrap();
593 assert_eq!(outputs.len(), 2);
594 }
595
596 #[test]
597 fn test_collect_outputs_sorted() {
598 let tmp = TempDir::new().unwrap();
599 std::fs::write(tmp.path().join("c.txt"), "c").unwrap();
600 std::fs::write(tmp.path().join("a.txt"), "a").unwrap();
601 std::fs::write(tmp.path().join("b.txt"), "b").unwrap();
602
603 let outputs = collect_outputs(tmp.path(), &["*.txt".into()]).unwrap();
604 assert_eq!(outputs[0], PathBuf::from("a.txt"));
605 assert_eq!(outputs[1], PathBuf::from("b.txt"));
606 assert_eq!(outputs[2], PathBuf::from("c.txt"));
607 }
608
609 #[test]
610 fn test_populate_hermetic_dir_nested() {
611 let tmp = TempDir::new().unwrap();
612 let root = tmp.path();
613 std::fs::create_dir_all(root.join("a/b/c")).unwrap();
614 std::fs::write(root.join("a/b/c/deep.txt"), "deep content").unwrap();
615
616 let resolver = InputResolver::new(root);
617 let resolved = resolver.resolve(&["a".into()]).unwrap();
618
619 let herm = TempDir::new().unwrap();
620 populate_hermetic_dir(&resolved, herm.path()).unwrap();
621 assert!(herm.path().join("a/b/c/deep.txt").exists());
622
623 let content = std::fs::read_to_string(herm.path().join("a/b/c/deep.txt")).unwrap();
625 assert_eq!(content, "deep content");
626 }
627
628 #[test]
629 fn test_absolutize_relative_path() {
630 let p = Path::new("relative/path");
631 let abs = p.absolutize();
632 assert!(abs.is_absolute());
633 }
634
635 #[test]
636 fn test_absolutize_absolute_path() {
637 let p = Path::new("/absolute/path");
638 let abs = p.absolutize();
639 assert_eq!(abs, PathBuf::from("/absolute/path"));
640 }
641
642 #[test]
643 fn test_resolved_input_file_clone() {
644 let file = ResolvedInputFile {
645 rel_path: PathBuf::from("test.rs"),
646 source_path: PathBuf::from("/src/test.rs"),
647 sha256: "hash".to_string(),
648 size: 100,
649 };
650 let cloned = file.clone();
651 assert_eq!(cloned.rel_path, file.rel_path);
652 assert_eq!(cloned.sha256, file.sha256);
653 }
654
655 #[test]
656 fn test_resolved_inputs_clone() {
657 let inputs = ResolvedInputs {
658 files: vec![ResolvedInputFile {
659 rel_path: PathBuf::from("test.rs"),
660 source_path: PathBuf::from("/src/test.rs"),
661 sha256: "hash".to_string(),
662 size: 100,
663 }],
664 };
665 let cloned = inputs.clone();
666 assert_eq!(cloned.files.len(), 1);
667 }
668
669 #[test]
670 fn test_input_resolver_with_glob_brackets() {
671 let tmp = TempDir::new().unwrap();
672 let root = tmp.path();
673 std::fs::write(root.join("a1.txt"), "a1").unwrap();
674 std::fs::write(root.join("a2.txt"), "a2").unwrap();
675 std::fs::write(root.join("b1.txt"), "b1").unwrap();
676
677 let resolver = InputResolver::new(root);
678 let inputs = resolver.resolve(&["a[12].txt".into()]).unwrap();
679 assert_eq!(inputs.files.len(), 2);
680 }
681
682 #[test]
683 fn test_input_resolver_with_question_mark() {
684 let tmp = TempDir::new().unwrap();
685 let root = tmp.path();
686 std::fs::write(root.join("a.txt"), "a").unwrap();
687 std::fs::write(root.join("ab.txt"), "ab").unwrap();
688 std::fs::write(root.join("abc.txt"), "abc").unwrap();
689
690 let resolver = InputResolver::new(root);
691 let inputs = resolver.resolve(&["a?.txt".into()]).unwrap();
692 assert_eq!(inputs.files.len(), 1);
693 assert!(
694 inputs.files[0]
695 .rel_path
696 .to_string_lossy()
697 .contains("ab.txt")
698 );
699 }
700}