1use crate::{Error, Result};
2use globset::{Glob, GlobSet, GlobSetBuilder};
3use sha2::{Digest, Sha256};
4use std::collections::{BTreeMap, BTreeSet};
5use std::fs;
6use std::io::Read;
7use std::path::{Component, Path, PathBuf};
8use tracing;
9use walkdir::WalkDir;
10
11#[derive(Debug, Clone)]
12pub struct ResolvedInputFile {
13 pub rel_path: PathBuf,
14 pub source_path: PathBuf,
15 pub sha256: String,
16 pub size: u64,
17}
18
19#[derive(Debug, Clone)]
20pub struct ResolvedInputs {
21 pub files: Vec<ResolvedInputFile>,
22}
23
24impl ResolvedInputs {
25 pub fn to_summary_map(&self) -> BTreeMap<String, String> {
26 let mut map = BTreeMap::new();
27 for f in &self.files {
28 map.insert(
29 normalize_rel_path(&f.rel_path)
30 .to_string_lossy()
31 .to_string(),
32 f.sha256.clone(),
33 );
34 }
35 map
36 }
37}
38
39fn normalize_rel_path(p: &Path) -> PathBuf {
40 let mut out = PathBuf::new();
41 for comp in p.components() {
42 match comp {
43 Component::CurDir => {}
44 Component::ParentDir => {
45 out.pop();
46 }
47 Component::Normal(s) => out.push(s),
48 _ => {}
49 }
50 }
51 out
52}
53
54pub fn sha256_file(path: &Path) -> Result<(String, u64)> {
55 let _span = tracing::trace_span!("sha256_file", path = %path.display()).entered();
56 let mut file = fs::File::open(path).map_err(|e| Error::Io {
57 source: e,
58 path: Some(path.into()),
59 operation: "open".into(),
60 })?;
61 let mut hasher = Sha256::new();
62 let mut buf = [0u8; 1024 * 64];
63 let mut total: u64 = 0;
64 loop {
65 let n = file.read(&mut buf).map_err(|e| Error::Io {
66 source: e,
67 path: Some(path.into()),
68 operation: "read".into(),
69 })?;
70 if n == 0 {
71 break;
72 }
73 hasher.update(&buf[..n]);
74 total += n as u64;
75 }
76 let digest = hasher.finalize();
77 tracing::trace!(path = %path.display(), size = total, "Hashed file");
78 Ok((hex::encode(digest), total))
79}
80
81pub struct InputResolver {
82 project_root: PathBuf,
83}
84
85impl InputResolver {
86 pub fn new(project_root: impl AsRef<Path>) -> Self {
87 Self {
88 project_root: project_root.as_ref().to_path_buf(),
89 }
90 }
91
92 pub fn resolve(&self, patterns: &[String]) -> Result<ResolvedInputs> {
93 let resolve_span = tracing::info_span!(
94 "input_resolver.resolve",
95 root = %self.project_root.display(),
96 pattern_count = patterns.len()
97 );
98 let _resolve_guard = resolve_span.enter();
99
100 tracing::debug!(
101 patterns = ?patterns,
102 "Starting input resolution"
103 );
104
105 let mut explicit_files: Vec<String> = Vec::new();
107 let mut dirs_to_walk: Vec<(String, GlobSet)> = Vec::new(); let pattern_span = tracing::debug_span!("patterns.analyze");
110 {
111 let _g = pattern_span.enter();
112 for pat in patterns {
113 let p = pat.trim();
114 if p.is_empty() {
115 continue;
116 }
117
118 let looks_like_glob =
119 p.contains('*') || p.contains('{') || p.contains('?') || p.contains('[');
120 let abs = self.project_root.join(p);
121
122 if looks_like_glob {
123 let base_dir = extract_glob_base(p);
125 let glob_pat = p.to_string();
126 let glob = Glob::new(&glob_pat).map_err(|e| {
127 Error::configuration(format!("Invalid glob pattern '{glob_pat}': {e}"))
128 })?;
129 let set = GlobSetBuilder::new().add(glob).build().map_err(|e| {
130 Error::configuration(format!("Failed to build glob set: {e}"))
131 })?;
132 dirs_to_walk.push((base_dir, set));
133 } else if abs.is_dir() {
134 let glob_pat = format!("{}/**/*", p.trim_end_matches('/'));
136 let glob = Glob::new(&glob_pat).map_err(|e| {
137 Error::configuration(format!("Invalid glob pattern '{glob_pat}': {e}"))
138 })?;
139 let set = GlobSetBuilder::new().add(glob).build().map_err(|e| {
140 Error::configuration(format!("Failed to build glob set: {e}"))
141 })?;
142 dirs_to_walk.push((p.to_string(), set));
143 } else {
144 explicit_files.push(p.to_string());
146 }
147 }
148
149 tracing::debug!(
150 explicit_file_count = explicit_files.len(),
151 dirs_to_walk_count = dirs_to_walk.len(),
152 "Categorized input patterns"
153 );
154 }
155
156 let mut seen: BTreeSet<PathBuf> = BTreeSet::new();
157 let mut files: Vec<ResolvedInputFile> = Vec::new();
158
159 let explicit_span =
161 tracing::debug_span!("explicit_files.resolve", count = explicit_files.len());
162 {
163 let _g = explicit_span.enter();
164 for raw in &explicit_files {
165 let abs = self.project_root.join(raw);
166 if abs.is_file() {
167 let rel = normalize_rel_path(Path::new(raw));
168 if seen.insert(rel.clone()) {
169 let (hash, size) = sha256_file(&abs)?;
170 files.push(ResolvedInputFile {
171 rel_path: rel,
172 source_path: canonical_or_abs(&abs)?,
173 sha256: hash,
174 size,
175 });
176 }
177 } else {
178 tracing::warn!(path = %raw, "Explicit input file not found");
179 }
180 }
181 tracing::debug!(
182 explicit_files_found = files.len(),
183 "Explicit files resolved"
184 );
185 }
186
187 if !dirs_to_walk.is_empty() {
189 let walkdir_span =
190 tracing::info_span!("walkdir.traverse", dirs_count = dirs_to_walk.len());
191 let _g = walkdir_span.enter();
192
193 let mut total_entries_visited: u64 = 0;
194 let mut total_files_matched: u64 = 0;
195 let mut total_bytes_hashed: u64 = 0;
196
197 for (base_dir, globset) in &dirs_to_walk {
198 let walk_root = self.project_root.join(base_dir);
199 if !walk_root.exists() {
200 tracing::debug!(dir = %base_dir, "Directory does not exist, skipping");
201 continue;
202 }
203
204 tracing::debug!(dir = %base_dir, "Walking directory for glob matches");
205
206 for entry in WalkDir::new(&walk_root)
207 .follow_links(true)
208 .into_iter()
209 .filter_map(|e| e.ok())
210 {
211 total_entries_visited += 1;
212 let path = entry.path();
213 if path.is_dir() {
214 continue;
215 }
216
217 let rel = match path.strip_prefix(&self.project_root) {
219 Ok(p) => p,
220 Err(_) => continue,
221 };
222 let rel_norm = normalize_rel_path(rel);
223
224 if globset.is_match(rel_norm.as_path()) && seen.insert(rel_norm.clone()) {
226 total_files_matched += 1;
227 let src = canonical_or_abs(path)?;
228 let (hash, size) = sha256_file(&src)?;
229 total_bytes_hashed += size;
230 files.push(ResolvedInputFile {
231 rel_path: rel_norm,
232 source_path: src,
233 sha256: hash,
234 size,
235 });
236 }
237 }
238 }
239
240 tracing::info!(
241 entries_visited = total_entries_visited,
242 files_matched = total_files_matched,
243 total_bytes_hashed,
244 "WalkDir traversal complete"
245 );
246 } else {
247 tracing::debug!("No directories to walk, skipping WalkDir");
248 }
249
250 files.sort_by(|a, b| a.rel_path.cmp(&b.rel_path));
252
253 tracing::info!(total_files = files.len(), "Input resolution complete");
254
255 Ok(ResolvedInputs { files })
256 }
257}
258
259fn extract_glob_base(pattern: &str) -> String {
265 let mut base_parts = Vec::new();
266 for part in pattern.split('/') {
267 if part.contains('*') || part.contains('{') || part.contains('?') || part.contains('[') {
268 break;
269 }
270 if !part.is_empty() {
271 base_parts.push(part);
272 }
273 }
274 base_parts.join("/")
275}
276
277fn canonical_or_abs(p: &Path) -> Result<PathBuf> {
278 match fs::canonicalize(p) {
280 Ok(c) => Ok(c),
281 Err(_) => Ok(p.absolutize()),
282 }
283}
284
285trait Absolutize {
286 fn absolutize(&self) -> PathBuf;
287}
288impl Absolutize for &Path {
289 fn absolutize(&self) -> PathBuf {
290 if self.is_absolute() {
291 self.to_path_buf()
292 } else {
293 std::env::current_dir()
294 .unwrap_or_else(|_| PathBuf::from("."))
295 .join(self)
296 }
297 }
298}
299
300pub fn populate_hermetic_dir(resolved: &ResolvedInputs, hermetic_root: &Path) -> Result<()> {
301 for f in &resolved.files {
303 let dest = hermetic_root.join(&f.rel_path);
304 if let Some(parent) = dest.parent() {
305 fs::create_dir_all(parent).map_err(|e| Error::Io {
306 source: e,
307 path: Some(parent.into()),
308 operation: "create_dir_all".into(),
309 })?;
310 }
311 match fs::hard_link(&f.source_path, &dest) {
313 Ok(_) => {}
314 Err(_e) => {
315 fs::copy(&f.source_path, &dest).map_err(|e2| Error::Io {
317 source: e2,
318 path: Some(dest.into()),
319 operation: "copy".into(),
320 })?;
321 }
322 }
323 }
324 Ok(())
325}
326
327pub fn collect_outputs(hermetic_root: &Path, patterns: &[String]) -> Result<Vec<PathBuf>> {
328 if patterns.is_empty() {
329 return Ok(vec![]);
330 }
331 let mut builder = GlobSetBuilder::new();
332 for p in patterns {
333 let looks_like_glob =
334 p.contains('*') || p.contains('{') || p.contains('?') || p.contains('[');
335 let mut pat = p.clone();
336 let abs = hermetic_root.join(&pat);
337 if abs.is_dir() && !looks_like_glob {
338 pat = format!("{}/**/*", pat.trim_end_matches('/'));
339 }
340 let glob = Glob::new(&pat)
341 .map_err(|e| Error::configuration(format!("Invalid output glob '{pat}': {e}")))?;
342 builder.add(glob);
343 }
344 let set = builder
345 .build()
346 .map_err(|e| Error::configuration(format!("Failed to build output globset: {e}")))?;
347
348 let mut results = Vec::new();
349 for entry in WalkDir::new(hermetic_root)
350 .into_iter()
351 .filter_map(|e| e.ok())
352 {
353 let path = entry.path();
354 if path.is_dir() {
355 continue;
356 }
357 let rel = match path.strip_prefix(hermetic_root) {
358 Ok(p) => p,
359 Err(_) => continue,
360 };
361 if set.is_match(rel) {
362 results.push(rel.to_path_buf());
363 }
364 }
365 results.sort();
366 Ok(results)
367}
368
369pub fn snapshot_workspace_tar_zst(src_root: &Path, dst_file: &Path) -> Result<()> {
370 let file = fs::File::create(dst_file).map_err(|e| Error::Io {
371 source: e,
372 path: Some(dst_file.into()),
373 operation: "create".into(),
374 })?;
375 let enc = zstd::Encoder::new(file, 3)
376 .map_err(|e| Error::configuration(format!("zstd encoder error: {e}")))?;
377 let mut builder = tar::Builder::new(enc);
378
379 match builder.append_dir_all(".", src_root) {
380 Ok(()) => {}
381 Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
382 let _ = fs::remove_file(dst_file);
386 tracing::warn!(
387 root = %src_root.display(),
388 "Skipping workspace snapshot; files disappeared during archive: {e}"
389 );
390 return Ok(());
391 }
392 Err(e) => {
393 return Err(Error::configuration(format!("tar append failed: {e}")));
394 }
395 }
396
397 let enc = builder
398 .into_inner()
399 .map_err(|e| Error::configuration(format!("tar finalize failed: {e}")))?;
400 enc.finish()
401 .map_err(|e| Error::configuration(format!("zstd finish failed: {e}")))?;
402 Ok(())
403}
404
405#[cfg(test)]
406mod tests {
407 use super::*;
408 use tempfile::TempDir;
409
410 #[test]
411 fn resolves_files_dirs_and_globs() {
412 let tmp = TempDir::new().unwrap();
413 let root = tmp.path();
414 std::fs::create_dir_all(root.join("src/sub")).unwrap();
416 std::fs::write(root.join("src/a.ts"), "A").unwrap();
417 std::fs::write(root.join("src/sub/b.ts"), "B").unwrap();
418 std::fs::write(root.join("README.md"), "readme").unwrap();
419
420 let resolver = InputResolver::new(root);
421 let inputs = resolver
422 .resolve(&["src".into(), "README.md".into(), "**/*.ts".into()])
423 .unwrap();
424 let rels: Vec<String> = inputs
425 .files
426 .iter()
427 .map(|f| f.rel_path.to_string_lossy().to_string())
428 .collect();
429 assert!(rels.contains(&"src/a.ts".to_string()));
430 assert!(rels.contains(&"src/sub/b.ts".to_string()));
431 assert!(rels.contains(&"README.md".to_string()));
432 }
433
434 #[cfg(unix)]
435 #[test]
436 fn resolves_symlink_targets() {
437 use std::os::unix::fs as unixfs;
438 let tmp = TempDir::new().unwrap();
439 let root = tmp.path();
440 std::fs::create_dir_all(root.join("data")).unwrap();
441 std::fs::write(root.join("data/real.txt"), "hello").unwrap();
442 unixfs::symlink("real.txt", root.join("data/link.txt")).unwrap();
443 let resolver = InputResolver::new(root);
444 let inputs = resolver.resolve(&["data/link.txt".into()]).unwrap();
445 assert_eq!(inputs.files.len(), 1);
446 assert!(inputs.files[0].source_path.ends_with("real.txt"));
447 }
448
449 #[test]
450 fn populates_hermetic_dir() {
451 let tmp = TempDir::new().unwrap();
452 let root = tmp.path();
453 std::fs::create_dir_all(root.join("dir")).unwrap();
454 std::fs::write(root.join("dir/x.txt"), "x").unwrap();
455 let resolver = InputResolver::new(root);
456 let resolved = resolver.resolve(&["dir".into()]).unwrap();
457 let herm = TempDir::new().unwrap();
458 populate_hermetic_dir(&resolved, herm.path()).unwrap();
459 assert!(herm.path().join("dir/x.txt").exists());
460 }
461}