cuenv_core/cache/
tasks.rs

1use crate::{Error, Result};
2use chrono::{DateTime, Utc};
3use dirs::{cache_dir, home_dir};
4use serde::{Deserialize, Serialize};
5use sha2::{Digest, Sha256};
6use std::collections::BTreeMap;
7use std::fs;
8use std::path::{Path, PathBuf};
9
10#[derive(Debug, Clone, Serialize, Deserialize)]
11pub struct OutputIndexEntry {
12    pub rel_path: String,
13    pub size: u64,
14    pub sha256: String,
15}
16
17#[derive(Debug, Clone, Serialize, Deserialize)]
18pub struct TaskResultMeta {
19    pub task_name: String,
20    pub command: String,
21    pub args: Vec<String>,
22    pub env_summary: BTreeMap<String, String>,
23    pub inputs_summary: BTreeMap<String, String>,
24    pub created_at: DateTime<Utc>,
25    pub cuenv_version: String,
26    pub platform: String,
27    pub duration_ms: u128,
28    pub exit_code: i32,
29    pub cache_key_envelope: serde_json::Value,
30    pub output_index: Vec<OutputIndexEntry>,
31}
32
33#[derive(Debug, Clone)]
34pub struct CacheEntry {
35    pub key: String,
36    pub path: PathBuf,
37}
38
39#[derive(Debug, Clone)]
40struct CacheInputs {
41    cuenv_cache_dir: Option<PathBuf>,
42    xdg_cache_home: Option<PathBuf>,
43    os_cache_dir: Option<PathBuf>,
44    home_dir: Option<PathBuf>,
45    temp_dir: PathBuf,
46}
47
48fn cache_root_from_inputs(inputs: CacheInputs) -> Result<PathBuf> {
49    // Resolution order (first writable wins):
50    // 1) CUENV_CACHE_DIR (explicit override)
51    // 2) XDG_CACHE_HOME/cuenv/tasks
52    // 3) OS cache dir/cuenv/tasks
53    // 4) ~/.cuenv/cache/tasks (legacy)
54    // 5) TMPDIR/cuenv/cache/tasks (fallback)
55    let mut candidates: Vec<PathBuf> = Vec::new();
56
57    if let Some(dir) = inputs.cuenv_cache_dir.filter(|p| !p.as_os_str().is_empty()) {
58        candidates.push(dir);
59    }
60    if let Some(xdg) = inputs.xdg_cache_home {
61        candidates.push(xdg.join("cuenv/tasks"));
62    }
63    if let Some(os_cache) = inputs.os_cache_dir {
64        candidates.push(os_cache.join("cuenv/tasks"));
65    }
66    if let Some(home) = inputs.home_dir {
67        candidates.push(home.join(".cuenv/cache/tasks"));
68    }
69    candidates.push(inputs.temp_dir.join("cuenv/cache/tasks"));
70
71    for path in candidates {
72        if path.starts_with("/homeless-shelter") {
73            continue;
74        }
75        // If the path already exists, ensure it is writable; some CI environments
76        // provide read‑only cache directories under $HOME.
77        if path.exists() {
78            let probe = path.join(".write_probe");
79            match std::fs::OpenOptions::new()
80                .create(true)
81                .truncate(true)
82                .write(true)
83                .open(&probe)
84            {
85                Ok(_) => {
86                    let _ = std::fs::remove_file(&probe);
87                    return Ok(path);
88                }
89                Err(_) => {
90                    // Not writable, try next candidate
91                    continue;
92                }
93            }
94        }
95        match std::fs::create_dir_all(&path) {
96            Ok(_) => return Ok(path),
97            Err(e) if e.kind() == std::io::ErrorKind::PermissionDenied => continue,
98            Err(_) => continue,
99        }
100    }
101    Err(Error::configuration(
102        "Failed to determine a writable cache directory",
103    ))
104}
105
106fn cache_root() -> Result<PathBuf> {
107    let inputs = CacheInputs {
108        cuenv_cache_dir: std::env::var("CUENV_CACHE_DIR")
109            .ok()
110            .filter(|s| !s.trim().is_empty())
111            .map(PathBuf::from),
112        xdg_cache_home: std::env::var("XDG_CACHE_HOME")
113            .ok()
114            .filter(|s| !s.trim().is_empty())
115            .map(PathBuf::from),
116        os_cache_dir: cache_dir(),
117        home_dir: home_dir(),
118        temp_dir: std::env::temp_dir(),
119    };
120    cache_root_from_inputs(inputs)
121}
122
123pub fn key_to_path(key: &str, root: Option<&Path>) -> Result<PathBuf> {
124    let base = if let Some(r) = root {
125        r.to_path_buf()
126    } else {
127        cache_root()?
128    };
129    Ok(base.join(key))
130}
131
132pub fn lookup(key: &str, root: Option<&Path>) -> Option<CacheEntry> {
133    let path = match key_to_path(key, root) {
134        Ok(p) => p,
135        Err(_) => return None,
136    };
137    if path.exists() {
138        Some(CacheEntry {
139            key: key.to_string(),
140            path,
141        })
142    } else {
143        None
144    }
145}
146
147pub struct TaskLogs {
148    pub stdout: Option<String>,
149    pub stderr: Option<String>,
150}
151
152pub fn save_result(
153    key: &str,
154    meta: &TaskResultMeta,
155    outputs_root: &Path,
156    hermetic_root: &Path,
157    logs: TaskLogs,
158    root: Option<&Path>,
159) -> Result<()> {
160    let path = key_to_path(key, root)?;
161    fs::create_dir_all(&path).map_err(|e| Error::Io {
162        source: e,
163        path: Some(path.clone().into()),
164        operation: "create_dir_all".into(),
165    })?;
166
167    // metadata.json
168    let meta_path = path.join("metadata.json");
169    let json = serde_json::to_vec_pretty(meta)
170        .map_err(|e| Error::configuration(format!("Failed to serialize metadata: {e}")))?;
171    fs::write(&meta_path, json).map_err(|e| Error::Io {
172        source: e,
173        path: Some(meta_path.into()),
174        operation: "write".into(),
175    })?;
176
177    // outputs/
178    let out_dir = path.join("outputs");
179    fs::create_dir_all(&out_dir).map_err(|e| Error::Io {
180        source: e,
181        path: Some(out_dir.clone().into()),
182        operation: "create_dir_all".into(),
183    })?;
184    // Copy tree from outputs_root (already collected) if exists
185    if outputs_root.exists() {
186        for entry in walkdir::WalkDir::new(outputs_root)
187            .into_iter()
188            .filter_map(|e| e.ok())
189        {
190            let p = entry.path();
191            if p.is_dir() {
192                continue;
193            }
194            let rel = p
195                .strip_prefix(outputs_root)
196                .expect("WalkDir entry is under outputs_root");
197            let dst = out_dir.join(rel);
198            if let Some(parent) = dst.parent() {
199                fs::create_dir_all(parent).ok();
200            }
201            fs::copy(p, &dst).map_err(|e| Error::Io {
202                source: e,
203                path: Some(dst.into()),
204                operation: "copy".into(),
205            })?;
206        }
207    }
208
209    // logs/
210    let logs_dir = path.join("logs");
211    fs::create_dir_all(&logs_dir).ok();
212    if let Some(s) = logs.stdout.as_ref() {
213        let _ = fs::write(logs_dir.join("stdout.log"), s);
214    }
215    if let Some(s) = logs.stderr.as_ref() {
216        let _ = fs::write(logs_dir.join("stderr.log"), s);
217    }
218
219    // workspace snapshot
220    let snapshot = path.join("workspace.tar.zst");
221    crate::tasks::io::snapshot_workspace_tar_zst(hermetic_root, &snapshot)?;
222
223    Ok(())
224}
225
226pub fn materialize_outputs(key: &str, destination: &Path, root: Option<&Path>) -> Result<usize> {
227    let entry = lookup(key, root)
228        .ok_or_else(|| Error::configuration(format!("Cache key not found: {key}")))?;
229    let out_dir = entry.path.join("outputs");
230    if !out_dir.exists() {
231        return Ok(0);
232    }
233    let mut count = 0usize;
234    for e in walkdir::WalkDir::new(&out_dir)
235        .into_iter()
236        .filter_map(|e| e.ok())
237    {
238        let p = e.path();
239        if p.is_dir() {
240            continue;
241        }
242        let rel = p
243            .strip_prefix(&out_dir)
244            .expect("WalkDir entry is under out_dir");
245        let dst = destination.join(rel);
246        if let Some(parent) = dst.parent() {
247            fs::create_dir_all(parent).ok();
248        }
249        fs::copy(p, &dst).map_err(|e| Error::Io {
250            source: e,
251            path: Some(dst.into()),
252            operation: "copy".into(),
253        })?;
254        count += 1;
255    }
256    Ok(count)
257}
258
259/// Index mapping task names to their latest cache keys (per project)
260#[derive(Debug, Clone, Serialize, Deserialize, Default)]
261pub struct TaskLatestIndex {
262    /// Map of (project_root_hash, task_name) -> cache_key
263    pub entries: BTreeMap<String, BTreeMap<String, String>>,
264}
265
266fn latest_index_path(root: Option<&Path>) -> Result<PathBuf> {
267    let base = if let Some(r) = root {
268        r.to_path_buf()
269    } else {
270        cache_root()?
271    };
272    Ok(base.join("task-latest.json"))
273}
274
275fn project_hash(project_root: &Path) -> String {
276    let digest = Sha256::digest(project_root.to_string_lossy().as_bytes());
277    hex::encode(&digest[..8])
278}
279
280/// Record the latest cache key for a task in a project
281pub fn record_latest(
282    project_root: &Path,
283    task_name: &str,
284    cache_key: &str,
285    root: Option<&Path>,
286) -> Result<()> {
287    let path = latest_index_path(root)?;
288    let mut index: TaskLatestIndex = if path.exists() {
289        let content = fs::read_to_string(&path).unwrap_or_default();
290        serde_json::from_str(&content).unwrap_or_default()
291    } else {
292        TaskLatestIndex::default()
293    };
294
295    let proj_hash = project_hash(project_root);
296    index
297        .entries
298        .entry(proj_hash)
299        .or_default()
300        .insert(task_name.to_string(), cache_key.to_string());
301
302    let json = serde_json::to_string_pretty(&index)
303        .map_err(|e| Error::configuration(format!("Failed to serialize latest index: {e}")))?;
304    if let Some(parent) = path.parent() {
305        fs::create_dir_all(parent).ok();
306    }
307    fs::write(&path, json).map_err(|e| Error::Io {
308        source: e,
309        path: Some(path.into()),
310        operation: "write".into(),
311    })?;
312    Ok(())
313}
314
315/// Look up the latest cache key for a task in a project
316pub fn lookup_latest(project_root: &Path, task_name: &str, root: Option<&Path>) -> Option<String> {
317    let path = latest_index_path(root).ok()?;
318    if !path.exists() {
319        return None;
320    }
321    let content = fs::read_to_string(&path).ok()?;
322    let index: TaskLatestIndex = serde_json::from_str(&content).ok()?;
323    let proj_hash = project_hash(project_root);
324    index.entries.get(&proj_hash)?.get(task_name).cloned()
325}
326
327#[derive(Debug, Clone, Serialize, Deserialize)]
328pub struct CacheKeyEnvelope {
329    pub inputs: BTreeMap<String, String>,
330    pub command: String,
331    pub args: Vec<String>,
332    pub shell: Option<serde_json::Value>,
333    pub env: BTreeMap<String, String>,
334    pub cuenv_version: String,
335    pub platform: String,
336    /// Hashes of the workspace lockfiles (key = workspace name)
337    #[serde(skip_serializing_if = "Option::is_none")]
338    pub workspace_lockfile_hashes: Option<BTreeMap<String, String>>,
339    /// Hashes of workspace member packages (if relevant)
340    #[serde(skip_serializing_if = "Option::is_none")]
341    pub workspace_package_hashes: Option<BTreeMap<String, String>>,
342}
343
344pub fn compute_cache_key(envelope: &CacheKeyEnvelope) -> Result<(String, serde_json::Value)> {
345    // Canonical JSON with sorted keys (BTreeMap ensures deterministic ordering for maps)
346    let json = serde_json::to_value(envelope)
347        .map_err(|e| Error::configuration(format!("Failed to encode envelope: {e}")))?;
348    let bytes = serde_json::to_vec(&json)
349        .map_err(|e| Error::configuration(format!("Failed to serialize envelope: {e}")))?;
350    let digest = Sha256::digest(bytes);
351    Ok((hex::encode(digest), json))
352}
353
354#[cfg(test)]
355mod tests {
356    use super::*;
357    use std::path::PathBuf;
358    use tempfile::TempDir;
359
360    #[allow(dead_code)]
361    struct EnvVarGuard {
362        key: String,
363        prev: Option<String>,
364    }
365
366    impl EnvVarGuard {
367        #[allow(dead_code)]
368        fn set<K: Into<String>, V: Into<String>>(key: K, value: V) -> Self {
369            let key_s = key.into();
370            let prev = std::env::var(&key_s).ok();
371            // Rust 2024 makes env mutation unsafe; this test confines changes to the current thread
372            // and restores previous values via Drop.
373            unsafe {
374                std::env::set_var(&key_s, value.into());
375            }
376            Self { key: key_s, prev }
377        }
378    }
379
380    impl Drop for EnvVarGuard {
381        fn drop(&mut self) {
382            if let Some(ref v) = self.prev {
383                unsafe {
384                    std::env::set_var(&self.key, v);
385                }
386            } else {
387                unsafe {
388                    std::env::remove_var(&self.key);
389                }
390            }
391        }
392    }
393
394    #[test]
395    fn cache_key_is_deterministic_and_order_invariant() {
396        let mut env_a = BTreeMap::new();
397        env_a.insert("A".to_string(), "1".to_string());
398        env_a.insert("B".to_string(), "2".to_string());
399        let mut inputs1 = BTreeMap::new();
400        inputs1.insert("b.txt".to_string(), "hashb".to_string());
401        inputs1.insert("a.txt".to_string(), "hasha".to_string());
402        let e1 = CacheKeyEnvelope {
403            inputs: inputs1,
404            command: "echo".into(),
405            args: vec!["hi".into()],
406            shell: None,
407            env: env_a.clone(),
408            cuenv_version: "0.1.1".into(),
409            platform: "linux-x86_64".into(),
410            workspace_lockfile_hashes: None,
411            workspace_package_hashes: None,
412        };
413        let (k1, _) = compute_cache_key(&e1).unwrap();
414
415        // Same data but different insertion orders
416        let mut env_b = BTreeMap::new();
417        env_b.insert("B".to_string(), "2".to_string());
418        env_b.insert("A".to_string(), "1".to_string());
419        let mut inputs2 = BTreeMap::new();
420        inputs2.insert("a.txt".to_string(), "hasha".to_string());
421        inputs2.insert("b.txt".to_string(), "hashb".to_string());
422        let e2 = CacheKeyEnvelope {
423            inputs: inputs2,
424            command: "echo".into(),
425            args: vec!["hi".into()],
426            shell: None,
427            env: env_b,
428            cuenv_version: "0.1.1".into(),
429            platform: "linux-x86_64".into(),
430            workspace_lockfile_hashes: None,
431            workspace_package_hashes: None,
432        };
433        let (k2, _) = compute_cache_key(&e2).unwrap();
434
435        assert_eq!(k1, k2);
436    }
437
438    #[test]
439    fn cache_root_skips_homeless_shelter() {
440        let tmp = std::env::temp_dir();
441        let inputs = CacheInputs {
442            cuenv_cache_dir: None,
443            xdg_cache_home: Some(PathBuf::from("/homeless-shelter/.cache")),
444            os_cache_dir: None,
445            home_dir: Some(PathBuf::from("/homeless-shelter")),
446            temp_dir: tmp.clone(),
447        };
448        let dir =
449            cache_root_from_inputs(inputs).expect("cache_root should choose a writable fallback");
450        assert!(!dir.starts_with("/homeless-shelter"));
451        assert!(dir.starts_with(&tmp));
452    }
453
454    #[test]
455    fn cache_root_respects_override_env() {
456        let tmp = std::env::temp_dir().join("cuenv-test-override");
457        let _ = std::fs::remove_dir_all(&tmp);
458        let inputs = CacheInputs {
459            cuenv_cache_dir: Some(tmp.clone()),
460            xdg_cache_home: None,
461            os_cache_dir: None,
462            home_dir: None,
463            temp_dir: std::env::temp_dir(),
464        };
465        let dir = cache_root_from_inputs(inputs).expect("cache_root should use override");
466        assert!(dir.starts_with(&tmp));
467        let _ = std::fs::remove_dir_all(&tmp);
468    }
469
470    #[test]
471    fn save_and_materialize_outputs_roundtrip() {
472        // Force cache root into a temp directory to avoid touching user dirs
473        let cache_tmp = TempDir::new().expect("tempdir");
474
475        // Prepare fake outputs
476        let outputs = TempDir::new().expect("outputs tempdir");
477        std::fs::create_dir_all(outputs.path().join("dir")).unwrap();
478        std::fs::write(outputs.path().join("foo.txt"), b"foo").unwrap();
479        std::fs::write(outputs.path().join("dir/bar.bin"), b"bar").unwrap();
480
481        // Prepare hermetic workspace to snapshot
482        let herm = TempDir::new().expect("hermetic tempdir");
483        std::fs::create_dir_all(herm.path().join("work")).unwrap();
484        std::fs::write(herm.path().join("work/a.txt"), b"a").unwrap();
485
486        // Minimal metadata
487        let mut env_summary = BTreeMap::new();
488        env_summary.insert("FOO".to_string(), "1".to_string());
489        let inputs_summary = BTreeMap::new();
490        let output_index = vec![
491            OutputIndexEntry {
492                rel_path: "foo.txt".to_string(),
493                size: 3,
494                sha256: {
495                    use sha2::{Digest, Sha256};
496                    let mut h = Sha256::new();
497                    h.update(b"foo");
498                    hex::encode(h.finalize())
499                },
500            },
501            OutputIndexEntry {
502                rel_path: "dir/bar.bin".to_string(),
503                size: 3,
504                sha256: {
505                    use sha2::{Digest, Sha256};
506                    let mut h = Sha256::new();
507                    h.update(b"bar");
508                    hex::encode(h.finalize())
509                },
510            },
511        ];
512
513        let meta = TaskResultMeta {
514            task_name: "unit".into(),
515            command: "echo".into(),
516            args: vec!["ok".into()],
517            env_summary,
518            inputs_summary,
519            created_at: chrono::Utc::now(),
520            cuenv_version: "0.0.0-test".into(),
521            platform: std::env::consts::OS.to_string(),
522            duration_ms: 1,
523            exit_code: 0,
524            cache_key_envelope: serde_json::json!({}),
525            output_index,
526        };
527
528        let logs = TaskLogs {
529            stdout: Some("hello".into()),
530            stderr: Some("".into()),
531        };
532
533        let key = "roundtrip-key-123";
534        save_result(
535            key,
536            &meta,
537            outputs.path(),
538            herm.path(),
539            logs,
540            Some(cache_tmp.path()),
541        )
542        .expect("save_result");
543
544        // Verify cache layout
545        let base = key_to_path(key, Some(cache_tmp.path())).expect("key_to_path");
546        assert!(base.join("metadata.json").exists());
547        assert!(base.join("outputs/foo.txt").exists());
548        assert!(base.join("outputs/dir/bar.bin").exists());
549        assert!(base.join("logs/stdout.log").exists());
550        let snapshot = base.join("workspace.tar.zst");
551        let snap_meta = std::fs::metadata(&snapshot).unwrap();
552        assert!(snap_meta.len() > 0);
553
554        // Materialize into fresh destination
555        let dest = TempDir::new().expect("dest tempdir");
556        let copied = materialize_outputs(key, dest.path(), Some(cache_tmp.path()))
557            .expect("materialize_outputs");
558        assert_eq!(copied, 2);
559        assert_eq!(std::fs::read(dest.path().join("foo.txt")).unwrap(), b"foo");
560        assert_eq!(
561            std::fs::read(dest.path().join("dir/bar.bin")).unwrap(),
562            b"bar"
563        );
564    }
565}