cuenv_core/tasks/
io.rs

1use crate::{Error, Result};
2use globset::{Glob, GlobSet, GlobSetBuilder};
3use sha2::{Digest, Sha256};
4use std::collections::{BTreeMap, BTreeSet};
5use std::fs;
6use std::io::Read;
7use std::path::{Component, Path, PathBuf};
8use tracing;
9use walkdir::WalkDir;
10
11#[derive(Debug, Clone)]
12pub struct ResolvedInputFile {
13    pub rel_path: PathBuf,
14    pub source_path: PathBuf,
15    pub sha256: String,
16    pub size: u64,
17}
18
19#[derive(Debug, Clone)]
20pub struct ResolvedInputs {
21    pub files: Vec<ResolvedInputFile>,
22}
23
24impl ResolvedInputs {
25    pub fn to_summary_map(&self) -> BTreeMap<String, String> {
26        let mut map = BTreeMap::new();
27        for f in &self.files {
28            map.insert(
29                normalize_rel_path(&f.rel_path)
30                    .to_string_lossy()
31                    .to_string(),
32                f.sha256.clone(),
33            );
34        }
35        map
36    }
37}
38
39fn normalize_rel_path(p: &Path) -> PathBuf {
40    let mut out = PathBuf::new();
41    for comp in p.components() {
42        match comp {
43            Component::CurDir => {}
44            Component::ParentDir => {
45                out.pop();
46            }
47            Component::Normal(s) => out.push(s),
48            _ => {}
49        }
50    }
51    out
52}
53
54pub fn sha256_file(path: &Path) -> Result<(String, u64)> {
55    let mut file = fs::File::open(path).map_err(|e| Error::Io {
56        source: e,
57        path: Some(path.into()),
58        operation: "open".into(),
59    })?;
60    let mut hasher = Sha256::new();
61    let mut buf = [0u8; 1024 * 64];
62    let mut total: u64 = 0;
63    loop {
64        let n = file.read(&mut buf).map_err(|e| Error::Io {
65            source: e,
66            path: Some(path.into()),
67            operation: "read".into(),
68        })?;
69        if n == 0 {
70            break;
71        }
72        hasher.update(&buf[..n]);
73        total += n as u64;
74    }
75    let digest = hasher.finalize();
76    Ok((hex::encode(digest), total))
77}
78
79pub struct InputResolver {
80    project_root: PathBuf,
81}
82
83impl InputResolver {
84    pub fn new(project_root: impl AsRef<Path>) -> Self {
85        Self {
86            project_root: project_root.as_ref().to_path_buf(),
87        }
88    }
89
90    pub fn resolve(&self, patterns: &[String]) -> Result<ResolvedInputs> {
91        // Build a globset for all file patterns; directories are expanded via walk
92        let mut builder = GlobSetBuilder::new();
93        let mut raw_patterns: Vec<(String, bool)> = Vec::new(); // (pattern, is_dir_hint)
94
95        for pat in patterns {
96            let p = pat.trim();
97            if p.is_empty() {
98                continue;
99            }
100            let abs = self.project_root.join(p);
101            let is_dir_hint = abs.is_dir();
102            raw_patterns.push((p.to_string(), is_dir_hint));
103
104            // If it looks like a glob, add as-is; else if dir, add /**
105            let looks_like_glob =
106                p.contains('*') || p.contains('{') || p.contains('?') || p.contains('[');
107            let glob_pat = if looks_like_glob {
108                p.to_string()
109            } else if is_dir_hint {
110                // ensure trailing slash insensitive recursive
111                format!("{}/**/*", p.trim_end_matches('/'))
112            } else {
113                p.to_string()
114            };
115            let glob = Glob::new(&glob_pat).map_err(|e| {
116                Error::configuration(format!("Invalid glob pattern '{glob_pat}': {e}"))
117            })?;
118            builder.add(glob);
119        }
120        let set: GlobSet = builder
121            .build()
122            .map_err(|e| Error::configuration(format!("Failed to build glob set: {e}")))?;
123
124        // Walk project_root and pick files that match any pattern, plus explicit file paths
125        let mut seen: BTreeSet<PathBuf> = BTreeSet::new();
126        let mut files: Vec<ResolvedInputFile> = Vec::new();
127
128        // Fast path: also queue explicit file paths even if the globset wouldn't match due to being plain path
129        for (raw, _is_dir) in &raw_patterns {
130            let abs = self.project_root.join(raw);
131            if abs.is_file() {
132                let rel = normalize_rel_path(Path::new(raw));
133                if seen.insert(rel.clone()) {
134                    let (hash, size) = sha256_file(&abs)?;
135                    files.push(ResolvedInputFile {
136                        rel_path: rel,
137                        source_path: canonical_or_abs(&abs)?,
138                        sha256: hash,
139                        size,
140                    });
141                }
142            }
143        }
144
145        for entry in WalkDir::new(&self.project_root)
146            .follow_links(true)
147            .into_iter()
148            .filter_map(|e| e.ok())
149        {
150            let path = entry.path();
151            if path.is_dir() {
152                continue;
153            }
154            // Relative to root
155            let rel = match path.strip_prefix(&self.project_root) {
156                Ok(p) => p,
157                Err(_) => continue,
158            };
159            let rel_norm = normalize_rel_path(rel);
160            // Match globset relative path
161            if set.is_match(rel_norm.as_path()) && seen.insert(rel_norm.clone()) {
162                let src = canonical_or_abs(path)?;
163                let (hash, size) = sha256_file(&src)?;
164                files.push(ResolvedInputFile {
165                    rel_path: rel_norm,
166                    source_path: src,
167                    sha256: hash,
168                    size,
169                });
170            }
171        }
172
173        // Deterministic ordering
174        files.sort_by(|a, b| a.rel_path.cmp(&b.rel_path));
175        Ok(ResolvedInputs { files })
176    }
177}
178
179fn canonical_or_abs(p: &Path) -> Result<PathBuf> {
180    // Resolve symlinks to target content; fall back to absolute if canonicalize fails
181    match fs::canonicalize(p) {
182        Ok(c) => Ok(c),
183        Err(_) => Ok(p.absolutize()),
184    }
185}
186
187trait Absolutize {
188    fn absolutize(&self) -> PathBuf;
189}
190impl Absolutize for &Path {
191    fn absolutize(&self) -> PathBuf {
192        if self.is_absolute() {
193            self.to_path_buf()
194        } else {
195            std::env::current_dir()
196                .unwrap_or_else(|_| PathBuf::from("."))
197                .join(self)
198        }
199    }
200}
201
202pub fn populate_hermetic_dir(resolved: &ResolvedInputs, hermetic_root: &Path) -> Result<()> {
203    // Create directories and populate files preserving relative structure
204    for f in &resolved.files {
205        let dest = hermetic_root.join(&f.rel_path);
206        if let Some(parent) = dest.parent() {
207            fs::create_dir_all(parent).map_err(|e| Error::Io {
208                source: e,
209                path: Some(parent.into()),
210                operation: "create_dir_all".into(),
211            })?;
212        }
213        // Try hardlink first
214        match fs::hard_link(&f.source_path, &dest) {
215            Ok(_) => {}
216            Err(_e) => {
217                // Fall back to copy on any error creating hardlink
218                fs::copy(&f.source_path, &dest).map_err(|e2| Error::Io {
219                    source: e2,
220                    path: Some(dest.into()),
221                    operation: "copy".into(),
222                })?;
223            }
224        }
225    }
226    Ok(())
227}
228
229pub fn collect_outputs(hermetic_root: &Path, patterns: &[String]) -> Result<Vec<PathBuf>> {
230    if patterns.is_empty() {
231        return Ok(vec![]);
232    }
233    let mut builder = GlobSetBuilder::new();
234    for p in patterns {
235        let looks_like_glob =
236            p.contains('*') || p.contains('{') || p.contains('?') || p.contains('[');
237        let mut pat = p.clone();
238        let abs = hermetic_root.join(&pat);
239        if abs.is_dir() && !looks_like_glob {
240            pat = format!("{}/**/*", pat.trim_end_matches('/'));
241        }
242        let glob = Glob::new(&pat)
243            .map_err(|e| Error::configuration(format!("Invalid output glob '{pat}': {e}")))?;
244        builder.add(glob);
245    }
246    let set = builder
247        .build()
248        .map_err(|e| Error::configuration(format!("Failed to build output globset: {e}")))?;
249
250    let mut results = Vec::new();
251    for entry in WalkDir::new(hermetic_root)
252        .into_iter()
253        .filter_map(|e| e.ok())
254    {
255        let path = entry.path();
256        if path.is_dir() {
257            continue;
258        }
259        let rel = match path.strip_prefix(hermetic_root) {
260            Ok(p) => p,
261            Err(_) => continue,
262        };
263        if set.is_match(rel) {
264            results.push(rel.to_path_buf());
265        }
266    }
267    results.sort();
268    Ok(results)
269}
270
271pub fn snapshot_workspace_tar_zst(src_root: &Path, dst_file: &Path) -> Result<()> {
272    let file = fs::File::create(dst_file).map_err(|e| Error::Io {
273        source: e,
274        path: Some(dst_file.into()),
275        operation: "create".into(),
276    })?;
277    let enc = zstd::Encoder::new(file, 3)
278        .map_err(|e| Error::configuration(format!("zstd encoder error: {e}")))?;
279    let mut builder = tar::Builder::new(enc);
280
281    match builder.append_dir_all(".", src_root) {
282        Ok(()) => {}
283        Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
284            // Workspace contents can legitimately disappear during a task (e.g.
285            // package managers removing temp files). Skip snapshotting instead
286            // of failing the whole task cache write.
287            let _ = fs::remove_file(dst_file);
288            tracing::warn!(
289                root = %src_root.display(),
290                "Skipping workspace snapshot; files disappeared during archive: {e}"
291            );
292            return Ok(());
293        }
294        Err(e) => {
295            return Err(Error::configuration(format!("tar append failed: {e}")));
296        }
297    }
298
299    let enc = builder
300        .into_inner()
301        .map_err(|e| Error::configuration(format!("tar finalize failed: {e}")))?;
302    enc.finish()
303        .map_err(|e| Error::configuration(format!("zstd finish failed: {e}")))?;
304    Ok(())
305}
306
307#[cfg(test)]
308mod tests {
309    use super::*;
310    use tempfile::TempDir;
311
312    #[test]
313    fn resolves_files_dirs_and_globs() {
314        let tmp = TempDir::new().unwrap();
315        let root = tmp.path();
316        // create structure
317        std::fs::create_dir_all(root.join("src/sub")).unwrap();
318        std::fs::write(root.join("src/a.ts"), "A").unwrap();
319        std::fs::write(root.join("src/sub/b.ts"), "B").unwrap();
320        std::fs::write(root.join("README.md"), "readme").unwrap();
321
322        let resolver = InputResolver::new(root);
323        let inputs = resolver
324            .resolve(&["src".into(), "README.md".into(), "**/*.ts".into()])
325            .unwrap();
326        let rels: Vec<String> = inputs
327            .files
328            .iter()
329            .map(|f| f.rel_path.to_string_lossy().to_string())
330            .collect();
331        assert!(rels.contains(&"src/a.ts".to_string()));
332        assert!(rels.contains(&"src/sub/b.ts".to_string()));
333        assert!(rels.contains(&"README.md".to_string()));
334    }
335
336    #[cfg(unix)]
337    #[test]
338    fn resolves_symlink_targets() {
339        use std::os::unix::fs as unixfs;
340        let tmp = TempDir::new().unwrap();
341        let root = tmp.path();
342        std::fs::create_dir_all(root.join("data")).unwrap();
343        std::fs::write(root.join("data/real.txt"), "hello").unwrap();
344        unixfs::symlink("real.txt", root.join("data/link.txt")).unwrap();
345        let resolver = InputResolver::new(root);
346        let inputs = resolver.resolve(&["data/link.txt".into()]).unwrap();
347        assert_eq!(inputs.files.len(), 1);
348        assert!(inputs.files[0].source_path.ends_with("real.txt"));
349    }
350
351    #[test]
352    fn populates_hermetic_dir() {
353        let tmp = TempDir::new().unwrap();
354        let root = tmp.path();
355        std::fs::create_dir_all(root.join("dir")).unwrap();
356        std::fs::write(root.join("dir/x.txt"), "x").unwrap();
357        let resolver = InputResolver::new(root);
358        let resolved = resolver.resolve(&["dir".into()]).unwrap();
359        let herm = TempDir::new().unwrap();
360        populate_hermetic_dir(&resolved, herm.path()).unwrap();
361        assert!(herm.path().join("dir/x.txt").exists());
362    }
363}