cuenv_core/cache/
tasks.rs

1use crate::{Error, Result};
2use chrono::{DateTime, Utc};
3use dirs::{cache_dir, home_dir};
4use serde::{Deserialize, Serialize};
5use sha2::{Digest, Sha256};
6use std::collections::BTreeMap;
7use std::fs;
8use std::path::{Path, PathBuf};
9
10#[derive(Debug, Clone, Serialize, Deserialize)]
11pub struct OutputIndexEntry {
12    pub rel_path: String,
13    pub size: u64,
14    pub sha256: String,
15}
16
17#[derive(Debug, Clone, Serialize, Deserialize)]
18pub struct TaskResultMeta {
19    pub task_name: String,
20    pub command: String,
21    pub args: Vec<String>,
22    pub env_summary: BTreeMap<String, String>,
23    pub inputs_summary: BTreeMap<String, String>,
24    pub created_at: DateTime<Utc>,
25    pub cuenv_version: String,
26    pub platform: String,
27    pub duration_ms: u128,
28    pub exit_code: i32,
29    pub cache_key_envelope: serde_json::Value,
30    pub output_index: Vec<OutputIndexEntry>,
31}
32
33#[derive(Debug, Clone)]
34pub struct CacheEntry {
35    pub key: String,
36    pub path: PathBuf,
37}
38
39#[derive(Debug, Clone)]
40struct CacheInputs {
41    cuenv_cache_dir: Option<PathBuf>,
42    xdg_cache_home: Option<PathBuf>,
43    os_cache_dir: Option<PathBuf>,
44    home_dir: Option<PathBuf>,
45    temp_dir: PathBuf,
46}
47
48fn cache_root_from_inputs(inputs: CacheInputs) -> Result<PathBuf> {
49    // Resolution order (first writable wins):
50    // 1) CUENV_CACHE_DIR (explicit override)
51    // 2) XDG_CACHE_HOME/cuenv/tasks
52    // 3) OS cache dir/cuenv/tasks
53    // 4) ~/.cuenv/cache/tasks (legacy)
54    // 5) TMPDIR/cuenv/cache/tasks (fallback)
55    let mut candidates: Vec<PathBuf> = Vec::new();
56
57    if let Some(dir) = inputs.cuenv_cache_dir.filter(|p| !p.as_os_str().is_empty()) {
58        candidates.push(dir);
59    }
60    if let Some(xdg) = inputs.xdg_cache_home {
61        candidates.push(xdg.join("cuenv/tasks"));
62    }
63    if let Some(os_cache) = inputs.os_cache_dir {
64        candidates.push(os_cache.join("cuenv/tasks"));
65    }
66    if let Some(home) = inputs.home_dir {
67        candidates.push(home.join(".cuenv/cache/tasks"));
68    }
69    candidates.push(inputs.temp_dir.join("cuenv/cache/tasks"));
70
71    for path in candidates {
72        if path.starts_with("/homeless-shelter") {
73            continue;
74        }
75        // If the path already exists, ensure it is writable; some CI environments
76        // provide read‑only cache directories under $HOME.
77        if path.exists() {
78            let probe = path.join(".write_probe");
79            match std::fs::OpenOptions::new()
80                .create(true)
81                .truncate(true)
82                .write(true)
83                .open(&probe)
84            {
85                Ok(_) => {
86                    let _ = std::fs::remove_file(&probe);
87                    return Ok(path);
88                }
89                Err(_) => {
90                    // Not writable, try next candidate
91                    continue;
92                }
93            }
94        }
95        match std::fs::create_dir_all(&path) {
96            Ok(_) => return Ok(path),
97            Err(e) if e.kind() == std::io::ErrorKind::PermissionDenied => continue,
98            Err(_) => continue,
99        }
100    }
101    Err(Error::configuration(
102        "Failed to determine a writable cache directory",
103    ))
104}
105
106fn cache_root() -> Result<PathBuf> {
107    let inputs = CacheInputs {
108        cuenv_cache_dir: std::env::var("CUENV_CACHE_DIR")
109            .ok()
110            .filter(|s| !s.trim().is_empty())
111            .map(PathBuf::from),
112        xdg_cache_home: std::env::var("XDG_CACHE_HOME")
113            .ok()
114            .filter(|s| !s.trim().is_empty())
115            .map(PathBuf::from),
116        os_cache_dir: cache_dir(),
117        home_dir: home_dir(),
118        temp_dir: std::env::temp_dir(),
119    };
120    cache_root_from_inputs(inputs)
121}
122
123pub fn key_to_path(key: &str, root: Option<&Path>) -> Result<PathBuf> {
124    let base = if let Some(r) = root {
125        r.to_path_buf()
126    } else {
127        cache_root()?
128    };
129    Ok(base.join(key))
130}
131
132pub fn lookup(key: &str, root: Option<&Path>) -> Option<CacheEntry> {
133    let path = match key_to_path(key, root) {
134        Ok(p) => p,
135        Err(_) => return None,
136    };
137    if path.exists() {
138        Some(CacheEntry {
139            key: key.to_string(),
140            path,
141        })
142    } else {
143        None
144    }
145}
146
147pub struct TaskLogs {
148    pub stdout: Option<String>,
149    pub stderr: Option<String>,
150}
151
152pub fn save_result(
153    key: &str,
154    meta: &TaskResultMeta,
155    outputs_root: &Path,
156    hermetic_root: &Path,
157    logs: TaskLogs,
158    root: Option<&Path>,
159) -> Result<()> {
160    let path = key_to_path(key, root)?;
161    fs::create_dir_all(&path).map_err(|e| Error::Io {
162        source: e,
163        path: Some(path.clone().into()),
164        operation: "create_dir_all".into(),
165    })?;
166
167    // metadata.json
168    let meta_path = path.join("metadata.json");
169    let json = serde_json::to_vec_pretty(meta)
170        .map_err(|e| Error::configuration(format!("Failed to serialize metadata: {e}")))?;
171    fs::write(&meta_path, json).map_err(|e| Error::Io {
172        source: e,
173        path: Some(meta_path.into()),
174        operation: "write".into(),
175    })?;
176
177    // outputs/
178    let out_dir = path.join("outputs");
179    fs::create_dir_all(&out_dir).map_err(|e| Error::Io {
180        source: e,
181        path: Some(out_dir.clone().into()),
182        operation: "create_dir_all".into(),
183    })?;
184    // Copy tree from outputs_root (already collected) if exists
185    if outputs_root.exists() {
186        for entry in walkdir::WalkDir::new(outputs_root)
187            .into_iter()
188            .filter_map(|e| e.ok())
189        {
190            let p = entry.path();
191            if p.is_dir() {
192                continue;
193            }
194            let rel = p.strip_prefix(outputs_root).unwrap();
195            let dst = out_dir.join(rel);
196            if let Some(parent) = dst.parent() {
197                fs::create_dir_all(parent).ok();
198            }
199            fs::copy(p, &dst).map_err(|e| Error::Io {
200                source: e,
201                path: Some(dst.into()),
202                operation: "copy".into(),
203            })?;
204        }
205    }
206
207    // logs/
208    let logs_dir = path.join("logs");
209    fs::create_dir_all(&logs_dir).ok();
210    if let Some(s) = logs.stdout.as_ref() {
211        let _ = fs::write(logs_dir.join("stdout.log"), s);
212    }
213    if let Some(s) = logs.stderr.as_ref() {
214        let _ = fs::write(logs_dir.join("stderr.log"), s);
215    }
216
217    // workspace snapshot
218    let snapshot = path.join("workspace.tar.zst");
219    crate::tasks::io::snapshot_workspace_tar_zst(hermetic_root, &snapshot)?;
220
221    Ok(())
222}
223
224pub fn materialize_outputs(key: &str, destination: &Path, root: Option<&Path>) -> Result<usize> {
225    let entry = lookup(key, root)
226        .ok_or_else(|| Error::configuration(format!("Cache key not found: {key}")))?;
227    let out_dir = entry.path.join("outputs");
228    if !out_dir.exists() {
229        return Ok(0);
230    }
231    let mut count = 0usize;
232    for e in walkdir::WalkDir::new(&out_dir)
233        .into_iter()
234        .filter_map(|e| e.ok())
235    {
236        let p = e.path();
237        if p.is_dir() {
238            continue;
239        }
240        let rel = p.strip_prefix(&out_dir).unwrap();
241        let dst = destination.join(rel);
242        if let Some(parent) = dst.parent() {
243            fs::create_dir_all(parent).ok();
244        }
245        fs::copy(p, &dst).map_err(|e| Error::Io {
246            source: e,
247            path: Some(dst.into()),
248            operation: "copy".into(),
249        })?;
250        count += 1;
251    }
252    Ok(count)
253}
254
255/// Index mapping task names to their latest cache keys (per project)
256#[derive(Debug, Clone, Serialize, Deserialize, Default)]
257pub struct TaskLatestIndex {
258    /// Map of (project_root_hash, task_name) -> cache_key
259    pub entries: BTreeMap<String, BTreeMap<String, String>>,
260}
261
262fn latest_index_path(root: Option<&Path>) -> Result<PathBuf> {
263    let base = if let Some(r) = root {
264        r.to_path_buf()
265    } else {
266        cache_root()?
267    };
268    Ok(base.join("task-latest.json"))
269}
270
271fn project_hash(project_root: &Path) -> String {
272    let digest = Sha256::digest(project_root.to_string_lossy().as_bytes());
273    hex::encode(&digest[..8])
274}
275
276/// Record the latest cache key for a task in a project
277pub fn record_latest(
278    project_root: &Path,
279    task_name: &str,
280    cache_key: &str,
281    root: Option<&Path>,
282) -> Result<()> {
283    let path = latest_index_path(root)?;
284    let mut index: TaskLatestIndex = if path.exists() {
285        let content = fs::read_to_string(&path).unwrap_or_default();
286        serde_json::from_str(&content).unwrap_or_default()
287    } else {
288        TaskLatestIndex::default()
289    };
290
291    let proj_hash = project_hash(project_root);
292    index
293        .entries
294        .entry(proj_hash)
295        .or_default()
296        .insert(task_name.to_string(), cache_key.to_string());
297
298    let json = serde_json::to_string_pretty(&index)
299        .map_err(|e| Error::configuration(format!("Failed to serialize latest index: {e}")))?;
300    if let Some(parent) = path.parent() {
301        fs::create_dir_all(parent).ok();
302    }
303    fs::write(&path, json).map_err(|e| Error::Io {
304        source: e,
305        path: Some(path.into()),
306        operation: "write".into(),
307    })?;
308    Ok(())
309}
310
311/// Look up the latest cache key for a task in a project
312pub fn lookup_latest(project_root: &Path, task_name: &str, root: Option<&Path>) -> Option<String> {
313    let path = latest_index_path(root).ok()?;
314    if !path.exists() {
315        return None;
316    }
317    let content = fs::read_to_string(&path).ok()?;
318    let index: TaskLatestIndex = serde_json::from_str(&content).ok()?;
319    let proj_hash = project_hash(project_root);
320    index.entries.get(&proj_hash)?.get(task_name).cloned()
321}
322
323#[derive(Debug, Clone, Serialize, Deserialize)]
324pub struct CacheKeyEnvelope {
325    pub inputs: BTreeMap<String, String>,
326    pub command: String,
327    pub args: Vec<String>,
328    pub shell: Option<serde_json::Value>,
329    pub env: BTreeMap<String, String>,
330    pub cuenv_version: String,
331    pub platform: String,
332    /// Hashes of the workspace lockfiles (key = workspace name)
333    #[serde(skip_serializing_if = "Option::is_none")]
334    pub workspace_lockfile_hashes: Option<BTreeMap<String, String>>,
335    /// Hashes of workspace member packages (if relevant)
336    #[serde(skip_serializing_if = "Option::is_none")]
337    pub workspace_package_hashes: Option<BTreeMap<String, String>>,
338}
339
340pub fn compute_cache_key(envelope: &CacheKeyEnvelope) -> Result<(String, serde_json::Value)> {
341    // Canonical JSON with sorted keys (BTreeMap ensures deterministic ordering for maps)
342    let json = serde_json::to_value(envelope)
343        .map_err(|e| Error::configuration(format!("Failed to encode envelope: {e}")))?;
344    let bytes = serde_json::to_vec(&json)
345        .map_err(|e| Error::configuration(format!("Failed to serialize envelope: {e}")))?;
346    let digest = Sha256::digest(bytes);
347    Ok((hex::encode(digest), json))
348}
349
350#[cfg(test)]
351mod tests {
352    use super::*;
353    use std::path::PathBuf;
354    use tempfile::TempDir;
355
356    #[allow(dead_code)]
357    struct EnvVarGuard {
358        key: String,
359        prev: Option<String>,
360    }
361
362    impl EnvVarGuard {
363        #[allow(dead_code)]
364        fn set<K: Into<String>, V: Into<String>>(key: K, value: V) -> Self {
365            let key_s = key.into();
366            let prev = std::env::var(&key_s).ok();
367            // Rust 2024 makes env mutation unsafe; this test confines changes to the current thread
368            // and restores previous values via Drop.
369            unsafe {
370                std::env::set_var(&key_s, value.into());
371            }
372            Self { key: key_s, prev }
373        }
374    }
375
376    impl Drop for EnvVarGuard {
377        fn drop(&mut self) {
378            if let Some(ref v) = self.prev {
379                unsafe {
380                    std::env::set_var(&self.key, v);
381                }
382            } else {
383                unsafe {
384                    std::env::remove_var(&self.key);
385                }
386            }
387        }
388    }
389
390    #[test]
391    fn cache_key_is_deterministic_and_order_invariant() {
392        let mut env_a = BTreeMap::new();
393        env_a.insert("A".to_string(), "1".to_string());
394        env_a.insert("B".to_string(), "2".to_string());
395        let mut inputs1 = BTreeMap::new();
396        inputs1.insert("b.txt".to_string(), "hashb".to_string());
397        inputs1.insert("a.txt".to_string(), "hasha".to_string());
398        let e1 = CacheKeyEnvelope {
399            inputs: inputs1,
400            command: "echo".into(),
401            args: vec!["hi".into()],
402            shell: None,
403            env: env_a.clone(),
404            cuenv_version: "0.1.1".into(),
405            platform: "linux-x86_64".into(),
406            workspace_lockfile_hashes: None,
407            workspace_package_hashes: None,
408        };
409        let (k1, _) = compute_cache_key(&e1).unwrap();
410
411        // Same data but different insertion orders
412        let mut env_b = BTreeMap::new();
413        env_b.insert("B".to_string(), "2".to_string());
414        env_b.insert("A".to_string(), "1".to_string());
415        let mut inputs2 = BTreeMap::new();
416        inputs2.insert("a.txt".to_string(), "hasha".to_string());
417        inputs2.insert("b.txt".to_string(), "hashb".to_string());
418        let e2 = CacheKeyEnvelope {
419            inputs: inputs2,
420            command: "echo".into(),
421            args: vec!["hi".into()],
422            shell: None,
423            env: env_b,
424            cuenv_version: "0.1.1".into(),
425            platform: "linux-x86_64".into(),
426            workspace_lockfile_hashes: None,
427            workspace_package_hashes: None,
428        };
429        let (k2, _) = compute_cache_key(&e2).unwrap();
430
431        assert_eq!(k1, k2);
432    }
433
434    #[test]
435    fn cache_root_skips_homeless_shelter() {
436        let tmp = std::env::temp_dir();
437        let inputs = CacheInputs {
438            cuenv_cache_dir: None,
439            xdg_cache_home: Some(PathBuf::from("/homeless-shelter/.cache")),
440            os_cache_dir: None,
441            home_dir: Some(PathBuf::from("/homeless-shelter")),
442            temp_dir: tmp.clone(),
443        };
444        let dir =
445            cache_root_from_inputs(inputs).expect("cache_root should choose a writable fallback");
446        assert!(!dir.starts_with("/homeless-shelter"));
447        assert!(dir.starts_with(&tmp));
448    }
449
450    #[test]
451    fn cache_root_respects_override_env() {
452        let tmp = std::env::temp_dir().join("cuenv-test-override");
453        let _ = std::fs::remove_dir_all(&tmp);
454        let inputs = CacheInputs {
455            cuenv_cache_dir: Some(tmp.clone()),
456            xdg_cache_home: None,
457            os_cache_dir: None,
458            home_dir: None,
459            temp_dir: std::env::temp_dir(),
460        };
461        let dir = cache_root_from_inputs(inputs).expect("cache_root should use override");
462        assert!(dir.starts_with(&tmp));
463        let _ = std::fs::remove_dir_all(&tmp);
464    }
465
466    #[test]
467    fn save_and_materialize_outputs_roundtrip() {
468        // Force cache root into a temp directory to avoid touching user dirs
469        let cache_tmp = TempDir::new().expect("tempdir");
470
471        // Prepare fake outputs
472        let outputs = TempDir::new().expect("outputs tempdir");
473        std::fs::create_dir_all(outputs.path().join("dir")).unwrap();
474        std::fs::write(outputs.path().join("foo.txt"), b"foo").unwrap();
475        std::fs::write(outputs.path().join("dir/bar.bin"), b"bar").unwrap();
476
477        // Prepare hermetic workspace to snapshot
478        let herm = TempDir::new().expect("hermetic tempdir");
479        std::fs::create_dir_all(herm.path().join("work")).unwrap();
480        std::fs::write(herm.path().join("work/a.txt"), b"a").unwrap();
481
482        // Minimal metadata
483        let mut env_summary = BTreeMap::new();
484        env_summary.insert("FOO".to_string(), "1".to_string());
485        let inputs_summary = BTreeMap::new();
486        let output_index = vec![
487            OutputIndexEntry {
488                rel_path: "foo.txt".to_string(),
489                size: 3,
490                sha256: {
491                    use sha2::{Digest, Sha256};
492                    let mut h = Sha256::new();
493                    h.update(b"foo");
494                    hex::encode(h.finalize())
495                },
496            },
497            OutputIndexEntry {
498                rel_path: "dir/bar.bin".to_string(),
499                size: 3,
500                sha256: {
501                    use sha2::{Digest, Sha256};
502                    let mut h = Sha256::new();
503                    h.update(b"bar");
504                    hex::encode(h.finalize())
505                },
506            },
507        ];
508
509        let meta = TaskResultMeta {
510            task_name: "unit".into(),
511            command: "echo".into(),
512            args: vec!["ok".into()],
513            env_summary,
514            inputs_summary,
515            created_at: chrono::Utc::now(),
516            cuenv_version: "0.0.0-test".into(),
517            platform: std::env::consts::OS.to_string(),
518            duration_ms: 1,
519            exit_code: 0,
520            cache_key_envelope: serde_json::json!({}),
521            output_index,
522        };
523
524        let logs = TaskLogs {
525            stdout: Some("hello".into()),
526            stderr: Some("".into()),
527        };
528
529        let key = "roundtrip-key-123";
530        save_result(
531            key,
532            &meta,
533            outputs.path(),
534            herm.path(),
535            logs,
536            Some(cache_tmp.path()),
537        )
538        .expect("save_result");
539
540        // Verify cache layout
541        let base = key_to_path(key, Some(cache_tmp.path())).expect("key_to_path");
542        assert!(base.join("metadata.json").exists());
543        assert!(base.join("outputs/foo.txt").exists());
544        assert!(base.join("outputs/dir/bar.bin").exists());
545        assert!(base.join("logs/stdout.log").exists());
546        let snapshot = base.join("workspace.tar.zst");
547        let snap_meta = std::fs::metadata(&snapshot).unwrap();
548        assert!(snap_meta.len() > 0);
549
550        // Materialize into fresh destination
551        let dest = TempDir::new().expect("dest tempdir");
552        let copied = materialize_outputs(key, dest.path(), Some(cache_tmp.path()))
553            .expect("materialize_outputs");
554        assert_eq!(copied, 2);
555        assert_eq!(std::fs::read(dest.path().join("foo.txt")).unwrap(), b"foo");
556        assert_eq!(
557            std::fs::read(dest.path().join("dir/bar.bin")).unwrap(),
558            b"bar"
559        );
560    }
561}