1use crate::{Error, Result};
2use globset::{Glob, GlobSet, GlobSetBuilder};
3use sha2::{Digest, Sha256};
4use std::collections::{BTreeMap, BTreeSet};
5use std::fs;
6use std::io::Read;
7use std::path::{Component, Path, PathBuf};
8use tracing;
9use walkdir::WalkDir;
10
11#[derive(Debug, Clone)]
12pub struct ResolvedInputFile {
13 pub rel_path: PathBuf,
14 pub source_path: PathBuf,
15 pub sha256: String,
16 pub size: u64,
17}
18
19#[derive(Debug, Clone)]
20pub struct ResolvedInputs {
21 pub files: Vec<ResolvedInputFile>,
22}
23
24impl ResolvedInputs {
25 pub fn to_summary_map(&self) -> BTreeMap<String, String> {
26 let mut map = BTreeMap::new();
27 for f in &self.files {
28 map.insert(
29 normalize_rel_path(&f.rel_path)
30 .to_string_lossy()
31 .to_string(),
32 f.sha256.clone(),
33 );
34 }
35 map
36 }
37}
38
39fn normalize_rel_path(p: &Path) -> PathBuf {
40 let mut out = PathBuf::new();
41 for comp in p.components() {
42 match comp {
43 Component::CurDir => {}
44 Component::ParentDir => {
45 out.pop();
46 }
47 Component::Normal(s) => out.push(s),
48 _ => {}
49 }
50 }
51 out
52}
53
54pub fn sha256_file(path: &Path) -> Result<(String, u64)> {
55 let mut file = fs::File::open(path).map_err(|e| Error::Io {
56 source: e,
57 path: Some(path.into()),
58 operation: "open".into(),
59 })?;
60 let mut hasher = Sha256::new();
61 let mut buf = [0u8; 1024 * 64];
62 let mut total: u64 = 0;
63 loop {
64 let n = file.read(&mut buf).map_err(|e| Error::Io {
65 source: e,
66 path: Some(path.into()),
67 operation: "read".into(),
68 })?;
69 if n == 0 {
70 break;
71 }
72 hasher.update(&buf[..n]);
73 total += n as u64;
74 }
75 let digest = hasher.finalize();
76 Ok((hex::encode(digest), total))
77}
78
79pub struct InputResolver {
80 project_root: PathBuf,
81}
82
83impl InputResolver {
84 pub fn new(project_root: impl AsRef<Path>) -> Self {
85 Self {
86 project_root: project_root.as_ref().to_path_buf(),
87 }
88 }
89
90 pub fn resolve(&self, patterns: &[String]) -> Result<ResolvedInputs> {
91 let mut builder = GlobSetBuilder::new();
93 let mut raw_patterns: Vec<(String, bool)> = Vec::new(); for pat in patterns {
96 let p = pat.trim();
97 if p.is_empty() {
98 continue;
99 }
100 let abs = self.project_root.join(p);
101 let is_dir_hint = abs.is_dir();
102 raw_patterns.push((p.to_string(), is_dir_hint));
103
104 let looks_like_glob =
106 p.contains('*') || p.contains('{') || p.contains('?') || p.contains('[');
107 let glob_pat = if looks_like_glob {
108 p.to_string()
109 } else if is_dir_hint {
110 format!("{}/**/*", p.trim_end_matches('/'))
112 } else {
113 p.to_string()
114 };
115 let glob = Glob::new(&glob_pat).map_err(|e| {
116 Error::configuration(format!("Invalid glob pattern '{glob_pat}': {e}"))
117 })?;
118 builder.add(glob);
119 }
120 let set: GlobSet = builder
121 .build()
122 .map_err(|e| Error::configuration(format!("Failed to build glob set: {e}")))?;
123
124 let mut seen: BTreeSet<PathBuf> = BTreeSet::new();
126 let mut files: Vec<ResolvedInputFile> = Vec::new();
127
128 for (raw, _is_dir) in &raw_patterns {
130 let abs = self.project_root.join(raw);
131 if abs.is_file() {
132 let rel = normalize_rel_path(Path::new(raw));
133 if seen.insert(rel.clone()) {
134 let (hash, size) = sha256_file(&abs)?;
135 files.push(ResolvedInputFile {
136 rel_path: rel,
137 source_path: canonical_or_abs(&abs)?,
138 sha256: hash,
139 size,
140 });
141 }
142 }
143 }
144
145 for entry in WalkDir::new(&self.project_root)
146 .follow_links(true)
147 .into_iter()
148 .filter_map(|e| e.ok())
149 {
150 let path = entry.path();
151 if path.is_dir() {
152 continue;
153 }
154 let rel = match path.strip_prefix(&self.project_root) {
156 Ok(p) => p,
157 Err(_) => continue,
158 };
159 let rel_norm = normalize_rel_path(rel);
160 if set.is_match(rel_norm.as_path()) && seen.insert(rel_norm.clone()) {
162 let src = canonical_or_abs(path)?;
163 let (hash, size) = sha256_file(&src)?;
164 files.push(ResolvedInputFile {
165 rel_path: rel_norm,
166 source_path: src,
167 sha256: hash,
168 size,
169 });
170 }
171 }
172
173 files.sort_by(|a, b| a.rel_path.cmp(&b.rel_path));
175 Ok(ResolvedInputs { files })
176 }
177}
178
179fn canonical_or_abs(p: &Path) -> Result<PathBuf> {
180 match fs::canonicalize(p) {
182 Ok(c) => Ok(c),
183 Err(_) => Ok(p.absolutize()),
184 }
185}
186
187trait Absolutize {
188 fn absolutize(&self) -> PathBuf;
189}
190impl Absolutize for &Path {
191 fn absolutize(&self) -> PathBuf {
192 if self.is_absolute() {
193 self.to_path_buf()
194 } else {
195 std::env::current_dir()
196 .unwrap_or_else(|_| PathBuf::from("."))
197 .join(self)
198 }
199 }
200}
201
202pub fn populate_hermetic_dir(resolved: &ResolvedInputs, hermetic_root: &Path) -> Result<()> {
203 for f in &resolved.files {
205 let dest = hermetic_root.join(&f.rel_path);
206 if let Some(parent) = dest.parent() {
207 fs::create_dir_all(parent).map_err(|e| Error::Io {
208 source: e,
209 path: Some(parent.into()),
210 operation: "create_dir_all".into(),
211 })?;
212 }
213 match fs::hard_link(&f.source_path, &dest) {
215 Ok(_) => {}
216 Err(_e) => {
217 fs::copy(&f.source_path, &dest).map_err(|e2| Error::Io {
219 source: e2,
220 path: Some(dest.into()),
221 operation: "copy".into(),
222 })?;
223 }
224 }
225 }
226 Ok(())
227}
228
229pub fn collect_outputs(hermetic_root: &Path, patterns: &[String]) -> Result<Vec<PathBuf>> {
230 if patterns.is_empty() {
231 return Ok(vec![]);
232 }
233 let mut builder = GlobSetBuilder::new();
234 for p in patterns {
235 let looks_like_glob =
236 p.contains('*') || p.contains('{') || p.contains('?') || p.contains('[');
237 let mut pat = p.clone();
238 let abs = hermetic_root.join(&pat);
239 if abs.is_dir() && !looks_like_glob {
240 pat = format!("{}/**/*", pat.trim_end_matches('/'));
241 }
242 let glob = Glob::new(&pat)
243 .map_err(|e| Error::configuration(format!("Invalid output glob '{pat}': {e}")))?;
244 builder.add(glob);
245 }
246 let set = builder
247 .build()
248 .map_err(|e| Error::configuration(format!("Failed to build output globset: {e}")))?;
249
250 let mut results = Vec::new();
251 for entry in WalkDir::new(hermetic_root)
252 .into_iter()
253 .filter_map(|e| e.ok())
254 {
255 let path = entry.path();
256 if path.is_dir() {
257 continue;
258 }
259 let rel = match path.strip_prefix(hermetic_root) {
260 Ok(p) => p,
261 Err(_) => continue,
262 };
263 if set.is_match(rel) {
264 results.push(rel.to_path_buf());
265 }
266 }
267 results.sort();
268 Ok(results)
269}
270
271pub fn snapshot_workspace_tar_zst(src_root: &Path, dst_file: &Path) -> Result<()> {
272 let file = fs::File::create(dst_file).map_err(|e| Error::Io {
273 source: e,
274 path: Some(dst_file.into()),
275 operation: "create".into(),
276 })?;
277 let enc = zstd::Encoder::new(file, 3)
278 .map_err(|e| Error::configuration(format!("zstd encoder error: {e}")))?;
279 let mut builder = tar::Builder::new(enc);
280
281 match builder.append_dir_all(".", src_root) {
282 Ok(()) => {}
283 Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
284 let _ = fs::remove_file(dst_file);
288 tracing::warn!(
289 root = %src_root.display(),
290 "Skipping workspace snapshot; files disappeared during archive: {e}"
291 );
292 return Ok(());
293 }
294 Err(e) => {
295 return Err(Error::configuration(format!("tar append failed: {e}")));
296 }
297 }
298
299 let enc = builder
300 .into_inner()
301 .map_err(|e| Error::configuration(format!("tar finalize failed: {e}")))?;
302 enc.finish()
303 .map_err(|e| Error::configuration(format!("zstd finish failed: {e}")))?;
304 Ok(())
305}
306
307#[cfg(test)]
308mod tests {
309 use super::*;
310 use tempfile::TempDir;
311
312 #[test]
313 fn resolves_files_dirs_and_globs() {
314 let tmp = TempDir::new().unwrap();
315 let root = tmp.path();
316 std::fs::create_dir_all(root.join("src/sub")).unwrap();
318 std::fs::write(root.join("src/a.ts"), "A").unwrap();
319 std::fs::write(root.join("src/sub/b.ts"), "B").unwrap();
320 std::fs::write(root.join("README.md"), "readme").unwrap();
321
322 let resolver = InputResolver::new(root);
323 let inputs = resolver
324 .resolve(&["src".into(), "README.md".into(), "**/*.ts".into()])
325 .unwrap();
326 let rels: Vec<String> = inputs
327 .files
328 .iter()
329 .map(|f| f.rel_path.to_string_lossy().to_string())
330 .collect();
331 assert!(rels.contains(&"src/a.ts".to_string()));
332 assert!(rels.contains(&"src/sub/b.ts".to_string()));
333 assert!(rels.contains(&"README.md".to_string()));
334 }
335
336 #[cfg(unix)]
337 #[test]
338 fn resolves_symlink_targets() {
339 use std::os::unix::fs as unixfs;
340 let tmp = TempDir::new().unwrap();
341 let root = tmp.path();
342 std::fs::create_dir_all(root.join("data")).unwrap();
343 std::fs::write(root.join("data/real.txt"), "hello").unwrap();
344 unixfs::symlink("real.txt", root.join("data/link.txt")).unwrap();
345 let resolver = InputResolver::new(root);
346 let inputs = resolver.resolve(&["data/link.txt".into()]).unwrap();
347 assert_eq!(inputs.files.len(), 1);
348 assert!(inputs.files[0].source_path.ends_with("real.txt"));
349 }
350
351 #[test]
352 fn populates_hermetic_dir() {
353 let tmp = TempDir::new().unwrap();
354 let root = tmp.path();
355 std::fs::create_dir_all(root.join("dir")).unwrap();
356 std::fs::write(root.join("dir/x.txt"), "x").unwrap();
357 let resolver = InputResolver::new(root);
358 let resolved = resolver.resolve(&["dir".into()]).unwrap();
359 let herm = TempDir::new().unwrap();
360 populate_hermetic_dir(&resolved, herm.path()).unwrap();
361 assert!(herm.path().join("dir/x.txt").exists());
362 }
363}