cuenv_core/cache/
tasks.rs

1use crate::{Error, Result};
2use chrono::{DateTime, Utc};
3use dirs::{cache_dir, home_dir};
4use serde::{Deserialize, Serialize};
5use sha2::{Digest, Sha256};
6use std::collections::BTreeMap;
7use std::fs;
8use std::path::{Path, PathBuf};
9
10#[derive(Debug, Clone, Serialize, Deserialize)]
11pub struct OutputIndexEntry {
12    pub rel_path: String,
13    pub size: u64,
14    pub sha256: String,
15}
16
17#[derive(Debug, Clone, Serialize, Deserialize)]
18pub struct TaskResultMeta {
19    pub task_name: String,
20    pub command: String,
21    pub args: Vec<String>,
22    pub env_summary: BTreeMap<String, String>,
23    pub inputs_summary: BTreeMap<String, String>,
24    pub created_at: DateTime<Utc>,
25    pub cuenv_version: String,
26    pub platform: String,
27    pub duration_ms: u128,
28    pub exit_code: i32,
29    pub cache_key_envelope: serde_json::Value,
30    pub output_index: Vec<OutputIndexEntry>,
31}
32
33#[derive(Debug, Clone)]
34pub struct CacheEntry {
35    pub key: String,
36    pub path: PathBuf,
37}
38
39#[derive(Debug, Clone)]
40struct CacheInputs {
41    cuenv_cache_dir: Option<PathBuf>,
42    xdg_cache_home: Option<PathBuf>,
43    os_cache_dir: Option<PathBuf>,
44    home_dir: Option<PathBuf>,
45    temp_dir: PathBuf,
46}
47
48fn cache_root_from_inputs(inputs: CacheInputs) -> Result<PathBuf> {
49    // Resolution order (first writable wins):
50    // 1) CUENV_CACHE_DIR (explicit override)
51    // 2) XDG_CACHE_HOME/cuenv/tasks
52    // 3) OS cache dir/cuenv/tasks
53    // 4) ~/.cuenv/cache/tasks (legacy)
54    // 5) TMPDIR/cuenv/cache/tasks (fallback)
55    let mut candidates: Vec<PathBuf> = Vec::new();
56
57    if let Some(dir) = inputs.cuenv_cache_dir.filter(|p| !p.as_os_str().is_empty()) {
58        candidates.push(dir);
59    }
60    if let Some(xdg) = inputs.xdg_cache_home {
61        candidates.push(xdg.join("cuenv/tasks"));
62    }
63    if let Some(os_cache) = inputs.os_cache_dir {
64        candidates.push(os_cache.join("cuenv/tasks"));
65    }
66    if let Some(home) = inputs.home_dir {
67        candidates.push(home.join(".cuenv/cache/tasks"));
68    }
69    candidates.push(inputs.temp_dir.join("cuenv/cache/tasks"));
70
71    for path in candidates {
72        if path.starts_with("/homeless-shelter") {
73            continue;
74        }
75        // If the path already exists, ensure it is writable; some CI environments
76        // provide read‑only cache directories under $HOME.
77        if path.exists() {
78            let probe = path.join(".write_probe");
79            match std::fs::OpenOptions::new()
80                .create(true)
81                .truncate(true)
82                .write(true)
83                .open(&probe)
84            {
85                Ok(_) => {
86                    let _ = std::fs::remove_file(&probe);
87                    return Ok(path);
88                }
89                Err(_) => {
90                    // Not writable, try next candidate
91                    continue;
92                }
93            }
94        }
95        match std::fs::create_dir_all(&path) {
96            Ok(_) => return Ok(path),
97            Err(e) if e.kind() == std::io::ErrorKind::PermissionDenied => continue,
98            Err(_) => continue,
99        }
100    }
101    Err(Error::configuration(
102        "Failed to determine a writable cache directory",
103    ))
104}
105
106fn cache_root() -> Result<PathBuf> {
107    let inputs = CacheInputs {
108        cuenv_cache_dir: std::env::var("CUENV_CACHE_DIR")
109            .ok()
110            .filter(|s| !s.trim().is_empty())
111            .map(PathBuf::from),
112        xdg_cache_home: std::env::var("XDG_CACHE_HOME")
113            .ok()
114            .filter(|s| !s.trim().is_empty())
115            .map(PathBuf::from),
116        os_cache_dir: cache_dir(),
117        home_dir: home_dir(),
118        temp_dir: std::env::temp_dir(),
119    };
120    cache_root_from_inputs(inputs)
121}
122
123pub fn key_to_path(key: &str, root: Option<&Path>) -> Result<PathBuf> {
124    let base = if let Some(r) = root {
125        r.to_path_buf()
126    } else {
127        cache_root()?
128    };
129    Ok(base.join(key))
130}
131
132pub fn lookup(key: &str, root: Option<&Path>) -> Option<CacheEntry> {
133    let path = match key_to_path(key, root) {
134        Ok(p) => p,
135        Err(_) => return None,
136    };
137    if path.exists() {
138        Some(CacheEntry {
139            key: key.to_string(),
140            path,
141        })
142    } else {
143        None
144    }
145}
146
147pub struct TaskLogs {
148    pub stdout: Option<String>,
149    pub stderr: Option<String>,
150}
151
152pub fn save_result(
153    key: &str,
154    meta: &TaskResultMeta,
155    outputs_root: &Path,
156    hermetic_root: &Path,
157    logs: TaskLogs,
158    root: Option<&Path>,
159) -> Result<()> {
160    let path = key_to_path(key, root)?;
161    fs::create_dir_all(&path).map_err(|e| Error::Io {
162        source: e,
163        path: Some(path.clone().into()),
164        operation: "create_dir_all".into(),
165    })?;
166
167    // metadata.json
168    let meta_path = path.join("metadata.json");
169    let json = serde_json::to_vec_pretty(meta)
170        .map_err(|e| Error::configuration(format!("Failed to serialize metadata: {e}")))?;
171    fs::write(&meta_path, json).map_err(|e| Error::Io {
172        source: e,
173        path: Some(meta_path.into()),
174        operation: "write".into(),
175    })?;
176
177    // outputs/
178    let out_dir = path.join("outputs");
179    fs::create_dir_all(&out_dir).map_err(|e| Error::Io {
180        source: e,
181        path: Some(out_dir.clone().into()),
182        operation: "create_dir_all".into(),
183    })?;
184    // Copy tree from outputs_root (already collected) if exists
185    if outputs_root.exists() {
186        for entry in walkdir::WalkDir::new(outputs_root)
187            .into_iter()
188            .filter_map(|e| e.ok())
189        {
190            let p = entry.path();
191            if p.is_dir() {
192                continue;
193            }
194            let rel = p.strip_prefix(outputs_root).map_err(|_| {
195                Error::configuration(format!(
196                    "path {} is not under outputs_root {}",
197                    p.display(),
198                    outputs_root.display()
199                ))
200            })?;
201            let dst = out_dir.join(rel);
202            if let Some(parent) = dst.parent() {
203                fs::create_dir_all(parent).ok();
204            }
205            fs::copy(p, &dst).map_err(|e| Error::Io {
206                source: e,
207                path: Some(dst.into()),
208                operation: "copy".into(),
209            })?;
210        }
211    }
212
213    // logs/
214    let logs_dir = path.join("logs");
215    fs::create_dir_all(&logs_dir).ok();
216    if let Some(s) = logs.stdout.as_ref() {
217        let _ = fs::write(logs_dir.join("stdout.log"), s);
218    }
219    if let Some(s) = logs.stderr.as_ref() {
220        let _ = fs::write(logs_dir.join("stderr.log"), s);
221    }
222
223    // workspace snapshot
224    let snapshot = path.join("workspace.tar.zst");
225    crate::tasks::io::snapshot_workspace_tar_zst(hermetic_root, &snapshot)?;
226
227    Ok(())
228}
229
230pub fn materialize_outputs(key: &str, destination: &Path, root: Option<&Path>) -> Result<usize> {
231    let entry = lookup(key, root)
232        .ok_or_else(|| Error::configuration(format!("Cache key not found: {key}")))?;
233    let out_dir = entry.path.join("outputs");
234    if !out_dir.exists() {
235        return Ok(0);
236    }
237    let mut count = 0usize;
238    for e in walkdir::WalkDir::new(&out_dir)
239        .into_iter()
240        .filter_map(|e| e.ok())
241    {
242        let p = e.path();
243        if p.is_dir() {
244            continue;
245        }
246        let rel = p.strip_prefix(&out_dir).map_err(|_| {
247            Error::configuration(format!(
248                "path {} is not under out_dir {}",
249                p.display(),
250                out_dir.display()
251            ))
252        })?;
253        let dst = destination.join(rel);
254        if let Some(parent) = dst.parent() {
255            fs::create_dir_all(parent).ok();
256        }
257        fs::copy(p, &dst).map_err(|e| Error::Io {
258            source: e,
259            path: Some(dst.into()),
260            operation: "copy".into(),
261        })?;
262        count += 1;
263    }
264    Ok(count)
265}
266
267/// Index mapping task names to their latest cache keys (per project)
268#[derive(Debug, Clone, Serialize, Deserialize, Default)]
269pub struct TaskLatestIndex {
270    /// Map of (project_root_hash, task_name) -> cache_key
271    pub entries: BTreeMap<String, BTreeMap<String, String>>,
272}
273
274fn latest_index_path(root: Option<&Path>) -> Result<PathBuf> {
275    let base = if let Some(r) = root {
276        r.to_path_buf()
277    } else {
278        cache_root()?
279    };
280    Ok(base.join("task-latest.json"))
281}
282
283fn project_hash(project_root: &Path) -> String {
284    let digest = Sha256::digest(project_root.to_string_lossy().as_bytes());
285    hex::encode(&digest[..8])
286}
287
288/// Record the latest cache key for a task in a project
289pub fn record_latest(
290    project_root: &Path,
291    task_name: &str,
292    cache_key: &str,
293    root: Option<&Path>,
294) -> Result<()> {
295    let path = latest_index_path(root)?;
296    let mut index: TaskLatestIndex = if path.exists() {
297        let content = fs::read_to_string(&path).unwrap_or_default();
298        serde_json::from_str(&content).unwrap_or_default()
299    } else {
300        TaskLatestIndex::default()
301    };
302
303    let proj_hash = project_hash(project_root);
304    index
305        .entries
306        .entry(proj_hash)
307        .or_default()
308        .insert(task_name.to_string(), cache_key.to_string());
309
310    let json = serde_json::to_string_pretty(&index)
311        .map_err(|e| Error::configuration(format!("Failed to serialize latest index: {e}")))?;
312    if let Some(parent) = path.parent() {
313        fs::create_dir_all(parent).ok();
314    }
315    fs::write(&path, json).map_err(|e| Error::Io {
316        source: e,
317        path: Some(path.into()),
318        operation: "write".into(),
319    })?;
320    Ok(())
321}
322
323/// Look up the latest cache key for a task in a project
324pub fn lookup_latest(project_root: &Path, task_name: &str, root: Option<&Path>) -> Option<String> {
325    let path = latest_index_path(root).ok()?;
326    if !path.exists() {
327        return None;
328    }
329    let content = fs::read_to_string(&path).ok()?;
330    let index: TaskLatestIndex = serde_json::from_str(&content).ok()?;
331    let proj_hash = project_hash(project_root);
332    index.entries.get(&proj_hash)?.get(task_name).cloned()
333}
334
335/// Retrieve all latest cache keys for a given project
336pub fn get_project_cache_keys(
337    project_root: &Path,
338    root: Option<&Path>,
339) -> Result<Option<BTreeMap<String, String>>> {
340    let path = latest_index_path(root)?;
341    if !path.exists() {
342        return Ok(None);
343    }
344    let content = fs::read_to_string(&path).map_err(|e| Error::Io {
345        source: e,
346        path: Some(path.clone().into()),
347        operation: "read".into(),
348    })?;
349    let index: TaskLatestIndex = serde_json::from_str(&content)
350        .map_err(|e| Error::configuration(format!("Failed to parse task index: {e}")))?;
351    let proj_hash = project_hash(project_root);
352    Ok(index.entries.get(&proj_hash).cloned())
353}
354
355#[derive(Debug, Clone, Serialize, Deserialize)]
356pub struct CacheKeyEnvelope {
357    pub inputs: BTreeMap<String, String>,
358    pub command: String,
359    pub args: Vec<String>,
360    pub shell: Option<serde_json::Value>,
361    pub env: BTreeMap<String, String>,
362    pub cuenv_version: String,
363    pub platform: String,
364    /// Hashes of the workspace lockfiles (key = workspace name)
365    #[serde(skip_serializing_if = "Option::is_none")]
366    pub workspace_lockfile_hashes: Option<BTreeMap<String, String>>,
367    /// Hashes of workspace member packages (if relevant)
368    #[serde(skip_serializing_if = "Option::is_none")]
369    pub workspace_package_hashes: Option<BTreeMap<String, String>>,
370}
371
372pub fn compute_cache_key(envelope: &CacheKeyEnvelope) -> Result<(String, serde_json::Value)> {
373    // Canonical JSON with sorted keys (BTreeMap ensures deterministic ordering for maps)
374    let json = serde_json::to_value(envelope)
375        .map_err(|e| Error::configuration(format!("Failed to encode envelope: {e}")))?;
376    let bytes = serde_json::to_vec(&json)
377        .map_err(|e| Error::configuration(format!("Failed to serialize envelope: {e}")))?;
378    let digest = Sha256::digest(bytes);
379    Ok((hex::encode(digest), json))
380}
381
382#[cfg(test)]
383mod tests {
384    use super::*;
385    use std::path::PathBuf;
386    use tempfile::TempDir;
387
388    #[allow(dead_code)]
389    struct EnvVarGuard {
390        key: String,
391        prev: Option<String>,
392    }
393
394    impl EnvVarGuard {
395        #[allow(dead_code)]
396        fn set<K: Into<String>, V: Into<String>>(key: K, value: V) -> Self {
397            let key_s = key.into();
398            let prev = std::env::var(&key_s).ok();
399            // Rust 2024 makes env mutation unsafe; this test confines changes to the current thread
400            // and restores previous values via Drop.
401            unsafe {
402                std::env::set_var(&key_s, value.into());
403            }
404            Self { key: key_s, prev }
405        }
406    }
407
408    impl Drop for EnvVarGuard {
409        fn drop(&mut self) {
410            if let Some(ref v) = self.prev {
411                unsafe {
412                    std::env::set_var(&self.key, v);
413                }
414            } else {
415                unsafe {
416                    std::env::remove_var(&self.key);
417                }
418            }
419        }
420    }
421
422    #[test]
423    fn cache_key_is_deterministic_and_order_invariant() {
424        let mut env_a = BTreeMap::new();
425        env_a.insert("A".to_string(), "1".to_string());
426        env_a.insert("B".to_string(), "2".to_string());
427        let mut inputs1 = BTreeMap::new();
428        inputs1.insert("b.txt".to_string(), "hashb".to_string());
429        inputs1.insert("a.txt".to_string(), "hasha".to_string());
430        let e1 = CacheKeyEnvelope {
431            inputs: inputs1,
432            command: "echo".into(),
433            args: vec!["hi".into()],
434            shell: None,
435            env: env_a.clone(),
436            cuenv_version: "0.1.1".into(),
437            platform: "linux-x86_64".into(),
438            workspace_lockfile_hashes: None,
439            workspace_package_hashes: None,
440        };
441        let (k1, _) = compute_cache_key(&e1).unwrap();
442
443        // Same data but different insertion orders
444        let mut env_b = BTreeMap::new();
445        env_b.insert("B".to_string(), "2".to_string());
446        env_b.insert("A".to_string(), "1".to_string());
447        let mut inputs2 = BTreeMap::new();
448        inputs2.insert("a.txt".to_string(), "hasha".to_string());
449        inputs2.insert("b.txt".to_string(), "hashb".to_string());
450        let e2 = CacheKeyEnvelope {
451            inputs: inputs2,
452            command: "echo".into(),
453            args: vec!["hi".into()],
454            shell: None,
455            env: env_b,
456            cuenv_version: "0.1.1".into(),
457            platform: "linux-x86_64".into(),
458            workspace_lockfile_hashes: None,
459            workspace_package_hashes: None,
460        };
461        let (k2, _) = compute_cache_key(&e2).unwrap();
462
463        assert_eq!(k1, k2);
464    }
465
466    #[test]
467    fn cache_root_skips_homeless_shelter() {
468        let tmp = std::env::temp_dir();
469        let inputs = CacheInputs {
470            cuenv_cache_dir: None,
471            xdg_cache_home: Some(PathBuf::from("/homeless-shelter/.cache")),
472            os_cache_dir: None,
473            home_dir: Some(PathBuf::from("/homeless-shelter")),
474            temp_dir: tmp.clone(),
475        };
476        let dir =
477            cache_root_from_inputs(inputs).expect("cache_root should choose a writable fallback");
478        assert!(!dir.starts_with("/homeless-shelter"));
479        assert!(dir.starts_with(&tmp));
480    }
481
482    #[test]
483    fn cache_root_respects_override_env() {
484        let tmp = std::env::temp_dir().join("cuenv-test-override");
485        let _ = std::fs::remove_dir_all(&tmp);
486        let inputs = CacheInputs {
487            cuenv_cache_dir: Some(tmp.clone()),
488            xdg_cache_home: None,
489            os_cache_dir: None,
490            home_dir: None,
491            temp_dir: std::env::temp_dir(),
492        };
493        let dir = cache_root_from_inputs(inputs).expect("cache_root should use override");
494        assert!(dir.starts_with(&tmp));
495        let _ = std::fs::remove_dir_all(&tmp);
496    }
497
498    #[test]
499    fn save_and_materialize_outputs_roundtrip() {
500        // Force cache root into a temp directory to avoid touching user dirs
501        let cache_tmp = TempDir::new().expect("tempdir");
502
503        // Prepare fake outputs
504        let outputs = TempDir::new().expect("outputs tempdir");
505        std::fs::create_dir_all(outputs.path().join("dir")).unwrap();
506        std::fs::write(outputs.path().join("foo.txt"), b"foo").unwrap();
507        std::fs::write(outputs.path().join("dir/bar.bin"), b"bar").unwrap();
508
509        // Prepare hermetic workspace to snapshot
510        let herm = TempDir::new().expect("hermetic tempdir");
511        std::fs::create_dir_all(herm.path().join("work")).unwrap();
512        std::fs::write(herm.path().join("work/a.txt"), b"a").unwrap();
513
514        // Minimal metadata
515        let mut env_summary = BTreeMap::new();
516        env_summary.insert("FOO".to_string(), "1".to_string());
517        let inputs_summary = BTreeMap::new();
518        let output_index = vec![
519            OutputIndexEntry {
520                rel_path: "foo.txt".to_string(),
521                size: 3,
522                sha256: {
523                    use sha2::{Digest, Sha256};
524                    let mut h = Sha256::new();
525                    h.update(b"foo");
526                    hex::encode(h.finalize())
527                },
528            },
529            OutputIndexEntry {
530                rel_path: "dir/bar.bin".to_string(),
531                size: 3,
532                sha256: {
533                    use sha2::{Digest, Sha256};
534                    let mut h = Sha256::new();
535                    h.update(b"bar");
536                    hex::encode(h.finalize())
537                },
538            },
539        ];
540
541        let meta = TaskResultMeta {
542            task_name: "unit".into(),
543            command: "echo".into(),
544            args: vec!["ok".into()],
545            env_summary,
546            inputs_summary,
547            created_at: chrono::Utc::now(),
548            cuenv_version: "0.0.0-test".into(),
549            platform: std::env::consts::OS.to_string(),
550            duration_ms: 1,
551            exit_code: 0,
552            cache_key_envelope: serde_json::json!({}),
553            output_index,
554        };
555
556        let logs = TaskLogs {
557            stdout: Some("hello".into()),
558            stderr: Some("".into()),
559        };
560
561        let key = "roundtrip-key-123";
562        save_result(
563            key,
564            &meta,
565            outputs.path(),
566            herm.path(),
567            logs,
568            Some(cache_tmp.path()),
569        )
570        .expect("save_result");
571
572        // Verify cache layout
573        let base = key_to_path(key, Some(cache_tmp.path())).expect("key_to_path");
574        assert!(base.join("metadata.json").exists());
575        assert!(base.join("outputs/foo.txt").exists());
576        assert!(base.join("outputs/dir/bar.bin").exists());
577        assert!(base.join("logs/stdout.log").exists());
578        let snapshot = base.join("workspace.tar.zst");
579        let snap_meta = std::fs::metadata(&snapshot).unwrap();
580        assert!(snap_meta.len() > 0);
581
582        // Materialize into fresh destination
583        let dest = TempDir::new().expect("dest tempdir");
584        let copied = materialize_outputs(key, dest.path(), Some(cache_tmp.path()))
585            .expect("materialize_outputs");
586        assert_eq!(copied, 2);
587        assert_eq!(std::fs::read(dest.path().join("foo.txt")).unwrap(), b"foo");
588        assert_eq!(
589            std::fs::read(dest.path().join("dir/bar.bin")).unwrap(),
590            b"bar"
591        );
592    }
593}