cuenv_cache/
tasks.rs

1//! Task result caching with content-addressed storage
2
3use crate::{Error, Result};
4use chrono::{DateTime, Utc};
5use dirs::{cache_dir, home_dir};
6use serde::{Deserialize, Serialize};
7use sha2::{Digest, Sha256};
8use std::collections::BTreeMap;
9use std::fs;
10use std::path::{Path, PathBuf};
11
12/// Entry in the output file index
13#[derive(Debug, Clone, Serialize, Deserialize)]
14pub struct OutputIndexEntry {
15    /// Relative path within output directory
16    pub rel_path: String,
17    /// File size in bytes
18    pub size: u64,
19    /// SHA256 hash of file contents
20    pub sha256: String,
21}
22
23/// Metadata about a cached task result
24#[derive(Debug, Clone, Serialize, Deserialize)]
25pub struct TaskResultMeta {
26    /// Name of the task
27    pub task_name: String,
28    /// Command that was executed
29    pub command: String,
30    /// Arguments passed to the command
31    pub args: Vec<String>,
32    /// Summary of environment variables (non-secret)
33    pub env_summary: BTreeMap<String, String>,
34    /// Summary of input file hashes
35    pub inputs_summary: BTreeMap<String, String>,
36    /// When the result was created
37    pub created_at: DateTime<Utc>,
38    /// Version of cuenv that created this cache entry
39    pub cuenv_version: String,
40    /// Platform identifier
41    pub platform: String,
42    /// Execution duration in milliseconds
43    pub duration_ms: u128,
44    /// Exit code of the command
45    pub exit_code: i32,
46    /// Full cache key envelope for debugging
47    pub cache_key_envelope: serde_json::Value,
48    /// Index of output files
49    pub output_index: Vec<OutputIndexEntry>,
50}
51
52/// A resolved cache entry
53#[derive(Debug, Clone)]
54pub struct CacheEntry {
55    /// The cache key
56    pub key: String,
57    /// Path to the cache entry directory
58    pub path: PathBuf,
59}
60
61/// Inputs for determining cache root directory
62#[derive(Debug, Clone)]
63struct CacheInputs {
64    cuenv_cache_dir: Option<PathBuf>,
65    xdg_cache_home: Option<PathBuf>,
66    os_cache_dir: Option<PathBuf>,
67    home_dir: Option<PathBuf>,
68    temp_dir: PathBuf,
69}
70
71fn cache_root_from_inputs(inputs: CacheInputs) -> Result<PathBuf> {
72    // Resolution order (first writable wins):
73    // 1) CUENV_CACHE_DIR (explicit override)
74    // 2) XDG_CACHE_HOME/cuenv/tasks
75    // 3) OS cache dir/cuenv/tasks
76    // 4) ~/.cuenv/cache/tasks (legacy)
77    // 5) TMPDIR/cuenv/cache/tasks (fallback)
78    let mut candidates: Vec<PathBuf> = Vec::new();
79
80    if let Some(dir) = inputs.cuenv_cache_dir.filter(|p| !p.as_os_str().is_empty()) {
81        candidates.push(dir);
82    }
83    if let Some(xdg) = inputs.xdg_cache_home {
84        candidates.push(xdg.join("cuenv/tasks"));
85    }
86    if let Some(os_cache) = inputs.os_cache_dir {
87        candidates.push(os_cache.join("cuenv/tasks"));
88    }
89    if let Some(home) = inputs.home_dir {
90        candidates.push(home.join(".cuenv/cache/tasks"));
91    }
92    candidates.push(inputs.temp_dir.join("cuenv/cache/tasks"));
93
94    for path in candidates {
95        if path.starts_with("/homeless-shelter") {
96            continue;
97        }
98        // If the path already exists, ensure it is writable; some CI environments
99        // provide read-only cache directories under $HOME.
100        if path.exists() {
101            let probe = path.join(".write_probe");
102            match std::fs::OpenOptions::new()
103                .create(true)
104                .truncate(true)
105                .write(true)
106                .open(&probe)
107            {
108                Ok(_) => {
109                    let _ = std::fs::remove_file(&probe);
110                    return Ok(path);
111                }
112                Err(_) => {
113                    // Not writable, try next candidate
114                    continue;
115                }
116            }
117        }
118        if std::fs::create_dir_all(&path).is_ok() {
119            return Ok(path);
120        }
121        // Permission denied or other errors - try next candidate
122    }
123    Err(Error::configuration(
124        "Failed to determine a writable cache directory",
125    ))
126}
127
128fn cache_root() -> Result<PathBuf> {
129    let inputs = CacheInputs {
130        cuenv_cache_dir: std::env::var("CUENV_CACHE_DIR")
131            .ok()
132            .filter(|s| !s.trim().is_empty())
133            .map(PathBuf::from),
134        xdg_cache_home: std::env::var("XDG_CACHE_HOME")
135            .ok()
136            .filter(|s| !s.trim().is_empty())
137            .map(PathBuf::from),
138        os_cache_dir: cache_dir(),
139        home_dir: home_dir(),
140        temp_dir: std::env::temp_dir(),
141    };
142    cache_root_from_inputs(inputs)
143}
144
145/// Convert a cache key to its storage path
146pub fn key_to_path(key: &str, root: Option<&Path>) -> Result<PathBuf> {
147    let base = if let Some(r) = root {
148        r.to_path_buf()
149    } else {
150        cache_root()?
151    };
152    Ok(base.join(key))
153}
154
155/// Look up a cache entry by key
156#[must_use]
157pub fn lookup(key: &str, root: Option<&Path>) -> Option<CacheEntry> {
158    let Ok(path) = key_to_path(key, root) else {
159        return None;
160    };
161    if path.exists() {
162        Some(CacheEntry {
163            key: key.to_string(),
164            path,
165        })
166    } else {
167        None
168    }
169}
170
171/// Task execution logs
172pub struct TaskLogs {
173    /// Standard output from task
174    pub stdout: Option<String>,
175    /// Standard error from task
176    pub stderr: Option<String>,
177}
178
179/// Save a task result to the cache
180#[allow(clippy::too_many_arguments)] // Task result caching requires multiple path parameters
181pub fn save_result(
182    key: &str,
183    meta: &TaskResultMeta,
184    outputs_root: &Path,
185    hermetic_root: &Path,
186    logs: &TaskLogs,
187    root: Option<&Path>,
188) -> Result<()> {
189    let path = key_to_path(key, root)?;
190    fs::create_dir_all(&path).map_err(|e| Error::io(e, &path, "create_dir_all"))?;
191
192    // metadata.json
193    let meta_path = path.join("metadata.json");
194    let json = serde_json::to_vec_pretty(meta)
195        .map_err(|e| Error::serialization(format!("Failed to serialize metadata: {e}")))?;
196    fs::write(&meta_path, json).map_err(|e| Error::io(e, &meta_path, "write"))?;
197
198    // outputs/
199    let out_dir = path.join("outputs");
200    fs::create_dir_all(&out_dir).map_err(|e| Error::io(e, &out_dir, "create_dir_all"))?;
201    // Copy tree from outputs_root (already collected) if exists
202    if outputs_root.exists() {
203        for entry in walkdir::WalkDir::new(outputs_root)
204            .into_iter()
205            .filter_map(|e| e.ok())
206        {
207            let p = entry.path();
208            if p.is_dir() {
209                continue;
210            }
211            let rel = p.strip_prefix(outputs_root).map_err(|_| {
212                Error::configuration(format!(
213                    "path {} is not under outputs_root {}",
214                    p.display(),
215                    outputs_root.display()
216                ))
217            })?;
218            let dst = out_dir.join(rel);
219            if let Some(parent) = dst.parent() {
220                fs::create_dir_all(parent).ok();
221            }
222            fs::copy(p, &dst).map_err(|e| Error::io(e, &dst, "copy"))?;
223        }
224    }
225
226    // logs/ - redact secrets before writing to disk
227    let logs_dir = path.join("logs");
228    fs::create_dir_all(&logs_dir).ok();
229    if let Some(s) = logs.stdout.as_ref() {
230        let redacted = cuenv_events::redact(s);
231        let _ = fs::write(logs_dir.join("stdout.log"), redacted);
232    }
233    if let Some(s) = logs.stderr.as_ref() {
234        let redacted = cuenv_events::redact(s);
235        let _ = fs::write(logs_dir.join("stderr.log"), redacted);
236    }
237
238    // workspace snapshot
239    let snapshot = path.join("workspace.tar.zst");
240    snapshot_workspace_tar_zst(hermetic_root, &snapshot)?;
241
242    Ok(())
243}
244
245/// Materialize cached outputs to a destination directory
246pub fn materialize_outputs(key: &str, destination: &Path, root: Option<&Path>) -> Result<usize> {
247    let entry = lookup(key, root).ok_or_else(|| Error::not_found(key))?;
248    let out_dir = entry.path.join("outputs");
249    if !out_dir.exists() {
250        return Ok(0);
251    }
252    let mut count = 0usize;
253    for e in walkdir::WalkDir::new(&out_dir)
254        .into_iter()
255        .filter_map(|e| e.ok())
256    {
257        let p = e.path();
258        if p.is_dir() {
259            continue;
260        }
261        let rel = p.strip_prefix(&out_dir).map_err(|_| {
262            Error::configuration(format!(
263                "path {} is not under out_dir {}",
264                p.display(),
265                out_dir.display()
266            ))
267        })?;
268        let dst = destination.join(rel);
269        if let Some(parent) = dst.parent() {
270            fs::create_dir_all(parent).ok();
271        }
272        fs::copy(p, &dst).map_err(|e| Error::io(e, &dst, "copy"))?;
273        count += 1;
274    }
275    Ok(count)
276}
277
278/// Index mapping task names to their latest cache keys (per project)
279#[derive(Debug, Clone, Serialize, Deserialize, Default)]
280pub struct TaskLatestIndex {
281    /// Map of (project_root_hash, task_name) -> cache_key
282    pub entries: BTreeMap<String, BTreeMap<String, String>>,
283}
284
285fn latest_index_path(root: Option<&Path>) -> Result<PathBuf> {
286    let base = if let Some(r) = root {
287        r.to_path_buf()
288    } else {
289        cache_root()?
290    };
291    Ok(base.join("task-latest.json"))
292}
293
294fn project_hash(project_root: &Path) -> String {
295    let digest = Sha256::digest(project_root.to_string_lossy().as_bytes());
296    hex::encode(&digest[..8])
297}
298
299/// Record the latest cache key for a task in a project
300pub fn record_latest(
301    project_root: &Path,
302    task_name: &str,
303    cache_key: &str,
304    root: Option<&Path>,
305) -> Result<()> {
306    let path = latest_index_path(root)?;
307    let mut index: TaskLatestIndex = if path.exists() {
308        let content = fs::read_to_string(&path).unwrap_or_default();
309        serde_json::from_str(&content).unwrap_or_default()
310    } else {
311        TaskLatestIndex::default()
312    };
313
314    let proj_hash = project_hash(project_root);
315    index
316        .entries
317        .entry(proj_hash)
318        .or_default()
319        .insert(task_name.to_string(), cache_key.to_string());
320
321    let json = serde_json::to_string_pretty(&index)
322        .map_err(|e| Error::serialization(format!("Failed to serialize latest index: {e}")))?;
323    if let Some(parent) = path.parent() {
324        fs::create_dir_all(parent).ok();
325    }
326    fs::write(&path, json).map_err(|e| Error::io(e, &path, "write"))?;
327    Ok(())
328}
329
330/// Look up the latest cache key for a task in a project
331#[must_use]
332pub fn lookup_latest(project_root: &Path, task_name: &str, root: Option<&Path>) -> Option<String> {
333    let path = latest_index_path(root).ok()?;
334    if !path.exists() {
335        return None;
336    }
337    let content = fs::read_to_string(&path).ok()?;
338    let index: TaskLatestIndex = serde_json::from_str(&content).ok()?;
339    let proj_hash = project_hash(project_root);
340    index.entries.get(&proj_hash)?.get(task_name).cloned()
341}
342
343/// Retrieve all latest cache keys for a given project
344pub fn get_project_cache_keys(
345    project_root: &Path,
346    root: Option<&Path>,
347) -> Result<Option<BTreeMap<String, String>>> {
348    let path = latest_index_path(root)?;
349    if !path.exists() {
350        return Ok(None);
351    }
352    let content = fs::read_to_string(&path).map_err(|e| Error::io(e, &path, "read"))?;
353    let index: TaskLatestIndex = serde_json::from_str(&content)
354        .map_err(|e| Error::serialization(format!("Failed to parse task index: {e}")))?;
355    let proj_hash = project_hash(project_root);
356    Ok(index.entries.get(&proj_hash).cloned())
357}
358
359/// Cache key envelope for computing deterministic cache keys
360#[derive(Debug, Clone, Serialize, Deserialize)]
361pub struct CacheKeyEnvelope {
362    /// Input file hashes
363    pub inputs: BTreeMap<String, String>,
364    /// Command to execute
365    pub command: String,
366    /// Command arguments
367    pub args: Vec<String>,
368    /// Shell configuration
369    pub shell: Option<serde_json::Value>,
370    /// Environment variables
371    pub env: BTreeMap<String, String>,
372    /// cuenv version
373    pub cuenv_version: String,
374    /// Platform identifier
375    pub platform: String,
376    /// Hashes of the workspace lockfiles (key = workspace name)
377    #[serde(skip_serializing_if = "Option::is_none")]
378    pub workspace_lockfile_hashes: Option<BTreeMap<String, String>>,
379    /// Hashes of workspace member packages (if relevant)
380    #[serde(skip_serializing_if = "Option::is_none")]
381    pub workspace_package_hashes: Option<BTreeMap<String, String>>,
382}
383
384/// Compute a deterministic cache key from the envelope
385pub fn compute_cache_key(envelope: &CacheKeyEnvelope) -> Result<(String, serde_json::Value)> {
386    // Canonical JSON with sorted keys (BTreeMap ensures deterministic ordering for maps)
387    let json = serde_json::to_value(envelope)
388        .map_err(|e| Error::serialization(format!("Failed to encode envelope: {e}")))?;
389    let bytes = serde_json::to_vec(&json)
390        .map_err(|e| Error::serialization(format!("Failed to serialize envelope: {e}")))?;
391    let digest = Sha256::digest(bytes);
392    Ok((hex::encode(digest), json))
393}
394
395/// Create a compressed tar archive of a workspace directory
396pub fn snapshot_workspace_tar_zst(src_root: &Path, dst_file: &Path) -> Result<()> {
397    let file = fs::File::create(dst_file).map_err(|e| Error::io(e, dst_file, "create"))?;
398    let enc = zstd::Encoder::new(file, 3)
399        .map_err(|e| Error::configuration(format!("zstd encoder error: {e}")))?;
400    let mut builder = tar::Builder::new(enc);
401
402    match builder.append_dir_all(".", src_root) {
403        Ok(()) => {}
404        Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
405            // Workspace contents can legitimately disappear during a task (e.g.
406            // package managers removing temp files). Skip snapshotting instead
407            // of failing the whole task cache write.
408            let _ = fs::remove_file(dst_file);
409            tracing::warn!(
410                root = %src_root.display(),
411                "Skipping workspace snapshot; files disappeared during archive: {e}"
412            );
413            return Ok(());
414        }
415        Err(e) => {
416            return Err(Error::configuration(format!("tar append failed: {e}")));
417        }
418    }
419
420    let enc = builder
421        .into_inner()
422        .map_err(|e| Error::configuration(format!("tar finalize failed: {e}")))?;
423    enc.finish()
424        .map_err(|e| Error::configuration(format!("zstd finish failed: {e}")))?;
425    Ok(())
426}
427
428#[cfg(test)]
429mod tests {
430    use super::*;
431    use std::path::PathBuf;
432    use tempfile::TempDir;
433
434    #[allow(dead_code, unsafe_code)]
435    struct EnvVarGuard {
436        key: String,
437        prev: Option<String>,
438    }
439
440    impl EnvVarGuard {
441        #[allow(dead_code, unsafe_code)]
442        fn set<K: Into<String>, V: Into<String>>(key: K, value: V) -> Self {
443            let key_s = key.into();
444            let prev = std::env::var(&key_s).ok();
445            // Rust 2024 makes env mutation unsafe; this test confines changes to the current thread
446            // and restores previous values via Drop.
447            unsafe {
448                std::env::set_var(&key_s, value.into());
449            }
450            Self { key: key_s, prev }
451        }
452    }
453
454    #[allow(unsafe_code)]
455    impl Drop for EnvVarGuard {
456        fn drop(&mut self) {
457            if let Some(ref v) = self.prev {
458                unsafe {
459                    std::env::set_var(&self.key, v);
460                }
461            } else {
462                unsafe {
463                    std::env::remove_var(&self.key);
464                }
465            }
466        }
467    }
468
469    // ==========================================================================
470    // OutputIndexEntry tests
471    // ==========================================================================
472
473    #[test]
474    fn test_output_index_entry_serde() {
475        let entry = OutputIndexEntry {
476            rel_path: "output/file.txt".to_string(),
477            size: 1024,
478            sha256: "abc123".to_string(),
479        };
480
481        let json = serde_json::to_string(&entry).unwrap();
482        let parsed: OutputIndexEntry = serde_json::from_str(&json).unwrap();
483
484        assert_eq!(parsed.rel_path, "output/file.txt");
485        assert_eq!(parsed.size, 1024);
486        assert_eq!(parsed.sha256, "abc123");
487    }
488
489    #[test]
490    fn test_output_index_entry_clone() {
491        let entry = OutputIndexEntry {
492            rel_path: "test.txt".to_string(),
493            size: 100,
494            sha256: "hash".to_string(),
495        };
496
497        let cloned = entry.clone();
498        assert_eq!(cloned.rel_path, "test.txt");
499    }
500
501    // ==========================================================================
502    // TaskResultMeta tests
503    // ==========================================================================
504
505    #[test]
506    fn test_task_result_meta_serde() {
507        let meta = TaskResultMeta {
508            task_name: "build".to_string(),
509            command: "cargo".to_string(),
510            args: vec!["build".to_string()],
511            env_summary: BTreeMap::new(),
512            inputs_summary: BTreeMap::new(),
513            created_at: chrono::Utc::now(),
514            cuenv_version: "0.1.0".to_string(),
515            platform: "linux-x86_64".to_string(),
516            duration_ms: 5000,
517            exit_code: 0,
518            cache_key_envelope: serde_json::json!({}),
519            output_index: vec![],
520        };
521
522        let json = serde_json::to_string(&meta).unwrap();
523        let parsed: TaskResultMeta = serde_json::from_str(&json).unwrap();
524
525        assert_eq!(parsed.task_name, "build");
526        assert_eq!(parsed.command, "cargo");
527        assert_eq!(parsed.exit_code, 0);
528    }
529
530    #[test]
531    fn test_task_result_meta_with_env() {
532        let mut env_summary = BTreeMap::new();
533        env_summary.insert("RUST_LOG".to_string(), "debug".to_string());
534
535        let meta = TaskResultMeta {
536            task_name: "test".to_string(),
537            command: "cargo".to_string(),
538            args: vec!["test".to_string()],
539            env_summary,
540            inputs_summary: BTreeMap::new(),
541            created_at: chrono::Utc::now(),
542            cuenv_version: "0.1.0".to_string(),
543            platform: "linux-x86_64".to_string(),
544            duration_ms: 10000,
545            exit_code: 0,
546            cache_key_envelope: serde_json::json!({}),
547            output_index: vec![],
548        };
549
550        assert_eq!(meta.env_summary.len(), 1);
551        assert_eq!(meta.env_summary.get("RUST_LOG"), Some(&"debug".to_string()));
552    }
553
554    // ==========================================================================
555    // CacheEntry tests
556    // ==========================================================================
557
558    #[test]
559    fn test_cache_entry_fields() {
560        let entry = CacheEntry {
561            key: "abc123".to_string(),
562            path: PathBuf::from("/cache/abc123"),
563        };
564
565        assert_eq!(entry.key, "abc123");
566        assert_eq!(entry.path, PathBuf::from("/cache/abc123"));
567    }
568
569    #[test]
570    fn test_cache_entry_clone() {
571        let entry = CacheEntry {
572            key: "key".to_string(),
573            path: PathBuf::from("/path"),
574        };
575
576        let cloned = entry.clone();
577        assert_eq!(cloned.key, "key");
578    }
579
580    // ==========================================================================
581    // TaskLatestIndex tests
582    // ==========================================================================
583
584    #[test]
585    fn test_task_latest_index_default() {
586        let index = TaskLatestIndex::default();
587        assert!(index.entries.is_empty());
588    }
589
590    #[test]
591    fn test_task_latest_index_serde() {
592        let mut index = TaskLatestIndex::default();
593        let mut tasks = BTreeMap::new();
594        tasks.insert("build".to_string(), "key123".to_string());
595        index.entries.insert("project_hash".to_string(), tasks);
596
597        let json = serde_json::to_string(&index).unwrap();
598        let parsed: TaskLatestIndex = serde_json::from_str(&json).unwrap();
599
600        assert!(parsed.entries.contains_key("project_hash"));
601    }
602
603    // ==========================================================================
604    // CacheKeyEnvelope tests
605    // ==========================================================================
606
607    #[test]
608    fn test_cache_key_envelope_serde() {
609        let envelope = CacheKeyEnvelope {
610            inputs: BTreeMap::from([("file.txt".to_string(), "hash1".to_string())]),
611            command: "echo".to_string(),
612            args: vec!["hello".to_string()],
613            shell: None,
614            env: BTreeMap::new(),
615            cuenv_version: "0.1.0".to_string(),
616            platform: "linux".to_string(),
617            workspace_lockfile_hashes: None,
618            workspace_package_hashes: None,
619        };
620
621        let json = serde_json::to_string(&envelope).unwrap();
622        let parsed: CacheKeyEnvelope = serde_json::from_str(&json).unwrap();
623
624        assert_eq!(parsed.command, "echo");
625    }
626
627    #[test]
628    fn test_cache_key_envelope_with_optional_fields() {
629        let envelope = CacheKeyEnvelope {
630            inputs: BTreeMap::new(),
631            command: "npm".to_string(),
632            args: vec!["install".to_string()],
633            shell: Some(serde_json::json!({"type": "bash"})),
634            env: BTreeMap::new(),
635            cuenv_version: "0.1.0".to_string(),
636            platform: "darwin".to_string(),
637            workspace_lockfile_hashes: Some(BTreeMap::from([(
638                "npm".to_string(),
639                "lockfile_hash".to_string(),
640            )])),
641            workspace_package_hashes: Some(BTreeMap::from([(
642                "pkg".to_string(),
643                "pkg_hash".to_string(),
644            )])),
645        };
646
647        let json = serde_json::to_string(&envelope).unwrap();
648        assert!(json.contains("workspace_lockfile_hashes"));
649        assert!(json.contains("workspace_package_hashes"));
650    }
651
652    // ==========================================================================
653    // key_to_path tests
654    // ==========================================================================
655
656    #[test]
657    fn test_key_to_path_with_root() {
658        let temp = TempDir::new().unwrap();
659        let path = key_to_path("mykey", Some(temp.path())).unwrap();
660        assert!(path.ends_with("mykey"));
661        assert!(path.starts_with(temp.path()));
662    }
663
664    // ==========================================================================
665    // lookup tests
666    // ==========================================================================
667
668    #[test]
669    fn test_lookup_not_found() {
670        let temp = TempDir::new().unwrap();
671        let result = lookup("nonexistent", Some(temp.path()));
672        assert!(result.is_none());
673    }
674
675    #[test]
676    fn test_lookup_found() {
677        let temp = TempDir::new().unwrap();
678        let key_dir = temp.path().join("mykey");
679        fs::create_dir_all(&key_dir).unwrap();
680
681        let result = lookup("mykey", Some(temp.path()));
682        assert!(result.is_some());
683        let entry = result.unwrap();
684        assert_eq!(entry.key, "mykey");
685    }
686
687    // ==========================================================================
688    // record_latest and lookup_latest tests
689    // ==========================================================================
690
691    #[test]
692    fn test_record_and_lookup_latest() {
693        let temp = TempDir::new().unwrap();
694        let project_root = temp.path().join("project");
695        fs::create_dir_all(&project_root).unwrap();
696
697        record_latest(&project_root, "build", "key123", Some(temp.path())).unwrap();
698
699        let result = lookup_latest(&project_root, "build", Some(temp.path()));
700        assert_eq!(result, Some("key123".to_string()));
701    }
702
703    #[test]
704    fn test_lookup_latest_not_found() {
705        let temp = TempDir::new().unwrap();
706        let project_root = temp.path().join("project");
707
708        let result = lookup_latest(&project_root, "nonexistent", Some(temp.path()));
709        assert!(result.is_none());
710    }
711
712    #[test]
713    fn test_record_latest_overwrites() {
714        let temp = TempDir::new().unwrap();
715        let project_root = temp.path().join("project");
716        fs::create_dir_all(&project_root).unwrap();
717
718        record_latest(&project_root, "build", "key1", Some(temp.path())).unwrap();
719        record_latest(&project_root, "build", "key2", Some(temp.path())).unwrap();
720
721        let result = lookup_latest(&project_root, "build", Some(temp.path()));
722        assert_eq!(result, Some("key2".to_string()));
723    }
724
725    // ==========================================================================
726    // get_project_cache_keys tests
727    // ==========================================================================
728
729    #[test]
730    fn test_get_project_cache_keys_empty() {
731        let temp = TempDir::new().unwrap();
732        let project_root = temp.path().join("project");
733
734        let result = get_project_cache_keys(&project_root, Some(temp.path())).unwrap();
735        assert!(result.is_none());
736    }
737
738    #[test]
739    fn test_get_project_cache_keys_with_data() {
740        let temp = TempDir::new().unwrap();
741        let project_root = temp.path().join("project");
742        fs::create_dir_all(&project_root).unwrap();
743
744        record_latest(&project_root, "build", "key1", Some(temp.path())).unwrap();
745        record_latest(&project_root, "test", "key2", Some(temp.path())).unwrap();
746
747        let result = get_project_cache_keys(&project_root, Some(temp.path()))
748            .unwrap()
749            .unwrap();
750        assert_eq!(result.len(), 2);
751        assert_eq!(result.get("build"), Some(&"key1".to_string()));
752        assert_eq!(result.get("test"), Some(&"key2".to_string()));
753    }
754
755    // ==========================================================================
756    // compute_cache_key tests
757    // ==========================================================================
758
759    #[test]
760    fn cache_key_is_deterministic_and_order_invariant() {
761        let mut env_a = BTreeMap::new();
762        env_a.insert("A".to_string(), "1".to_string());
763        env_a.insert("B".to_string(), "2".to_string());
764        let mut inputs1 = BTreeMap::new();
765        inputs1.insert("b.txt".to_string(), "hashb".to_string());
766        inputs1.insert("a.txt".to_string(), "hasha".to_string());
767        let e1 = CacheKeyEnvelope {
768            inputs: inputs1,
769            command: "echo".into(),
770            args: vec!["hi".into()],
771            shell: None,
772            env: env_a.clone(),
773            cuenv_version: "0.1.1".into(),
774            platform: "linux-x86_64".into(),
775            workspace_lockfile_hashes: None,
776            workspace_package_hashes: None,
777        };
778        let (k1, _) = compute_cache_key(&e1).unwrap();
779
780        // Same data but different insertion orders
781        let mut env_b = BTreeMap::new();
782        env_b.insert("B".to_string(), "2".to_string());
783        env_b.insert("A".to_string(), "1".to_string());
784        let mut inputs2 = BTreeMap::new();
785        inputs2.insert("a.txt".to_string(), "hasha".to_string());
786        inputs2.insert("b.txt".to_string(), "hashb".to_string());
787        let e2 = CacheKeyEnvelope {
788            inputs: inputs2,
789            command: "echo".into(),
790            args: vec!["hi".into()],
791            shell: None,
792            env: env_b,
793            cuenv_version: "0.1.1".into(),
794            platform: "linux-x86_64".into(),
795            workspace_lockfile_hashes: None,
796            workspace_package_hashes: None,
797        };
798        let (k2, _) = compute_cache_key(&e2).unwrap();
799
800        assert_eq!(k1, k2);
801    }
802
803    // ==========================================================================
804    // Cache Invalidation Behavioral Tests
805    // ==========================================================================
806    // These tests verify the behavioral contracts around cache invalidation:
807    // When any component of the cache key changes, the key MUST change.
808
809    /// Helper to create a baseline envelope for invalidation tests
810    fn baseline_envelope() -> CacheKeyEnvelope {
811        CacheKeyEnvelope {
812            inputs: BTreeMap::from([
813                ("src/main.rs".to_string(), "abc123".to_string()),
814                ("Cargo.toml".to_string(), "def456".to_string()),
815            ]),
816            command: "cargo".to_string(),
817            args: vec!["build".to_string(), "--release".to_string()],
818            shell: None,
819            env: BTreeMap::from([
820                ("RUST_LOG".to_string(), "debug".to_string()),
821                ("CC".to_string(), "clang".to_string()),
822            ]),
823            cuenv_version: "1.0.0".to_string(),
824            platform: "linux-x86_64".to_string(),
825            workspace_lockfile_hashes: None,
826            workspace_package_hashes: None,
827        }
828    }
829
830    #[test]
831    fn cache_invalidates_when_input_file_content_changes() {
832        // Given: A task with specific input file hashes
833        let base = baseline_envelope();
834        let (base_key, _) = compute_cache_key(&base).unwrap();
835
836        // When: An input file's content changes (different hash)
837        let mut modified = base.clone();
838        modified
839            .inputs
840            .insert("src/main.rs".to_string(), "changed_hash".to_string());
841        let (new_key, _) = compute_cache_key(&modified).unwrap();
842
843        // Then: Cache key must be different (cache is invalidated)
844        assert_ne!(
845            base_key, new_key,
846            "Cache must invalidate when input file content changes"
847        );
848    }
849
850    #[test]
851    fn cache_invalidates_when_new_input_file_added() {
852        // Given: A task with specific inputs
853        let base = baseline_envelope();
854        let (base_key, _) = compute_cache_key(&base).unwrap();
855
856        // When: A new input file is added
857        let mut modified = base.clone();
858        modified
859            .inputs
860            .insert("src/lib.rs".to_string(), "new_file_hash".to_string());
861        let (new_key, _) = compute_cache_key(&modified).unwrap();
862
863        // Then: Cache key must be different
864        assert_ne!(
865            base_key, new_key,
866            "Cache must invalidate when new input file is added"
867        );
868    }
869
870    #[test]
871    fn cache_invalidates_when_input_file_removed() {
872        // Given: A task with specific inputs
873        let base = baseline_envelope();
874        let (base_key, _) = compute_cache_key(&base).unwrap();
875
876        // When: An input file is removed
877        let mut modified = base.clone();
878        modified.inputs.remove("src/main.rs");
879        let (new_key, _) = compute_cache_key(&modified).unwrap();
880
881        // Then: Cache key must be different
882        assert_ne!(
883            base_key, new_key,
884            "Cache must invalidate when input file is removed"
885        );
886    }
887
888    #[test]
889    fn cache_invalidates_when_command_changes() {
890        // Given: A task with a specific command
891        let base = baseline_envelope();
892        let (base_key, _) = compute_cache_key(&base).unwrap();
893
894        // When: The command changes
895        let mut modified = base.clone();
896        modified.command = "rustc".to_string();
897        let (new_key, _) = compute_cache_key(&modified).unwrap();
898
899        // Then: Cache key must be different
900        assert_ne!(
901            base_key, new_key,
902            "Cache must invalidate when command changes"
903        );
904    }
905
906    #[test]
907    fn cache_invalidates_when_args_change() {
908        // Given: A task with specific arguments
909        let base = baseline_envelope();
910        let (base_key, _) = compute_cache_key(&base).unwrap();
911
912        // When: Arguments change
913        let mut modified = base.clone();
914        modified.args = vec!["build".to_string()]; // removed --release
915        let (new_key, _) = compute_cache_key(&modified).unwrap();
916
917        // Then: Cache key must be different
918        assert_ne!(
919            base_key, new_key,
920            "Cache must invalidate when command arguments change"
921        );
922    }
923
924    #[test]
925    fn cache_invalidates_when_env_var_value_changes() {
926        // Given: A task with specific environment variables
927        let base = baseline_envelope();
928        let (base_key, _) = compute_cache_key(&base).unwrap();
929
930        // When: An environment variable value changes
931        let mut modified = base.clone();
932        modified
933            .env
934            .insert("RUST_LOG".to_string(), "info".to_string());
935        let (new_key, _) = compute_cache_key(&modified).unwrap();
936
937        // Then: Cache key must be different
938        assert_ne!(
939            base_key, new_key,
940            "Cache must invalidate when environment variable value changes"
941        );
942    }
943
944    #[test]
945    fn cache_invalidates_when_env_var_added() {
946        // Given: A task with specific environment variables
947        let base = baseline_envelope();
948        let (base_key, _) = compute_cache_key(&base).unwrap();
949
950        // When: A new environment variable is added
951        let mut modified = base.clone();
952        modified
953            .env
954            .insert("NEW_VAR".to_string(), "value".to_string());
955        let (new_key, _) = compute_cache_key(&modified).unwrap();
956
957        // Then: Cache key must be different
958        assert_ne!(
959            base_key, new_key,
960            "Cache must invalidate when new environment variable is added"
961        );
962    }
963
964    #[test]
965    fn cache_invalidates_when_platform_changes() {
966        // Given: A task built for a specific platform
967        let base = baseline_envelope();
968        let (base_key, _) = compute_cache_key(&base).unwrap();
969
970        // When: The platform changes (cross-compilation or different machine)
971        let mut modified = base.clone();
972        modified.platform = "darwin-aarch64".to_string();
973        let (new_key, _) = compute_cache_key(&modified).unwrap();
974
975        // Then: Cache key must be different
976        assert_ne!(
977            base_key, new_key,
978            "Cache must invalidate when platform changes"
979        );
980    }
981
982    #[test]
983    fn cache_invalidates_when_cuenv_version_changes() {
984        // Given: A task built with a specific cuenv version
985        let base = baseline_envelope();
986        let (base_key, _) = compute_cache_key(&base).unwrap();
987
988        // When: cuenv version changes (may affect execution semantics)
989        let mut modified = base.clone();
990        modified.cuenv_version = "2.0.0".to_string();
991        let (new_key, _) = compute_cache_key(&modified).unwrap();
992
993        // Then: Cache key must be different
994        assert_ne!(
995            base_key, new_key,
996            "Cache must invalidate when cuenv version changes"
997        );
998    }
999
1000    #[test]
1001    fn cache_invalidates_when_workspace_lockfile_changes() {
1002        // Given: A task with no workspace lockfile hashes
1003        let base = baseline_envelope();
1004        let (base_key, _) = compute_cache_key(&base).unwrap();
1005
1006        // When: Workspace lockfile is added or changes
1007        let mut modified = base.clone();
1008        modified.workspace_lockfile_hashes = Some(BTreeMap::from([(
1009            "cargo".to_string(),
1010            "lockfile_hash_123".to_string(),
1011        )]));
1012        let (new_key, _) = compute_cache_key(&modified).unwrap();
1013
1014        // Then: Cache key must be different
1015        assert_ne!(
1016            base_key, new_key,
1017            "Cache must invalidate when workspace lockfile changes"
1018        );
1019    }
1020
1021    #[test]
1022    fn cache_stable_when_nothing_changes() {
1023        // Given: A task configuration
1024        let envelope = baseline_envelope();
1025
1026        // When: We compute the cache key multiple times
1027        let (key1, _) = compute_cache_key(&envelope).unwrap();
1028        let (key2, _) = compute_cache_key(&envelope).unwrap();
1029        let (key3, _) = compute_cache_key(&envelope).unwrap();
1030
1031        // Then: All keys should be identical (cache hits work correctly)
1032        assert_eq!(key1, key2, "Cache key must be stable across calls");
1033        assert_eq!(key2, key3, "Cache key must be stable across calls");
1034    }
1035
1036    #[test]
1037    fn cache_root_skips_homeless_shelter() {
1038        let tmp = std::env::temp_dir();
1039        let inputs = CacheInputs {
1040            cuenv_cache_dir: None,
1041            xdg_cache_home: Some(PathBuf::from("/homeless-shelter/.cache")),
1042            os_cache_dir: None,
1043            home_dir: Some(PathBuf::from("/homeless-shelter")),
1044            temp_dir: tmp.clone(),
1045        };
1046        let dir =
1047            cache_root_from_inputs(inputs).expect("cache_root should choose a writable fallback");
1048        assert!(!dir.starts_with("/homeless-shelter"));
1049        assert!(dir.starts_with(&tmp));
1050    }
1051
1052    #[test]
1053    fn cache_root_respects_override_env() {
1054        let tmp = std::env::temp_dir().join("cuenv-test-override");
1055        let _ = std::fs::remove_dir_all(&tmp);
1056        let inputs = CacheInputs {
1057            cuenv_cache_dir: Some(tmp.clone()),
1058            xdg_cache_home: None,
1059            os_cache_dir: None,
1060            home_dir: None,
1061            temp_dir: std::env::temp_dir(),
1062        };
1063        let dir = cache_root_from_inputs(inputs).expect("cache_root should use override");
1064        assert!(dir.starts_with(&tmp));
1065        let _ = std::fs::remove_dir_all(&tmp);
1066    }
1067
1068    #[test]
1069    fn save_and_materialize_outputs_roundtrip() {
1070        // Force cache root into a temp directory to avoid touching user dirs
1071        let cache_tmp = TempDir::new().expect("tempdir");
1072
1073        // Prepare fake outputs
1074        let outputs = TempDir::new().expect("outputs tempdir");
1075        std::fs::create_dir_all(outputs.path().join("dir")).unwrap();
1076        std::fs::write(outputs.path().join("foo.txt"), b"foo").unwrap();
1077        std::fs::write(outputs.path().join("dir/bar.bin"), b"bar").unwrap();
1078
1079        // Prepare hermetic workspace to snapshot
1080        let herm = TempDir::new().expect("hermetic tempdir");
1081        std::fs::create_dir_all(herm.path().join("work")).unwrap();
1082        std::fs::write(herm.path().join("work/a.txt"), b"a").unwrap();
1083
1084        // Minimal metadata
1085        let mut env_summary = BTreeMap::new();
1086        env_summary.insert("FOO".to_string(), "1".to_string());
1087        let inputs_summary = BTreeMap::new();
1088        let output_index = vec![
1089            OutputIndexEntry {
1090                rel_path: "foo.txt".to_string(),
1091                size: 3,
1092                sha256: {
1093                    use sha2::{Digest, Sha256};
1094                    let mut h = Sha256::new();
1095                    h.update(b"foo");
1096                    hex::encode(h.finalize())
1097                },
1098            },
1099            OutputIndexEntry {
1100                rel_path: "dir/bar.bin".to_string(),
1101                size: 3,
1102                sha256: {
1103                    use sha2::{Digest, Sha256};
1104                    let mut h = Sha256::new();
1105                    h.update(b"bar");
1106                    hex::encode(h.finalize())
1107                },
1108            },
1109        ];
1110
1111        let meta = TaskResultMeta {
1112            task_name: "unit".into(),
1113            command: "echo".into(),
1114            args: vec!["ok".into()],
1115            env_summary,
1116            inputs_summary,
1117            created_at: chrono::Utc::now(),
1118            cuenv_version: "0.0.0-test".into(),
1119            platform: std::env::consts::OS.to_string(),
1120            duration_ms: 1,
1121            exit_code: 0,
1122            cache_key_envelope: serde_json::json!({}),
1123            output_index,
1124        };
1125
1126        let logs = TaskLogs {
1127            stdout: Some("hello".into()),
1128            stderr: Some(String::new()),
1129        };
1130
1131        let key = "roundtrip-key-123";
1132        save_result(
1133            key,
1134            &meta,
1135            outputs.path(),
1136            herm.path(),
1137            &logs,
1138            Some(cache_tmp.path()),
1139        )
1140        .expect("save_result");
1141
1142        // Verify cache layout
1143        let base = key_to_path(key, Some(cache_tmp.path())).expect("key_to_path");
1144        assert!(base.join("metadata.json").exists());
1145        assert!(base.join("outputs/foo.txt").exists());
1146        assert!(base.join("outputs/dir/bar.bin").exists());
1147        assert!(base.join("logs/stdout.log").exists());
1148        let snapshot = base.join("workspace.tar.zst");
1149        let snap_meta = std::fs::metadata(&snapshot).unwrap();
1150        assert!(snap_meta.len() > 0);
1151
1152        // Materialize into fresh destination
1153        let dest = TempDir::new().expect("dest tempdir");
1154        let copied = materialize_outputs(key, dest.path(), Some(cache_tmp.path()))
1155            .expect("materialize_outputs");
1156        assert_eq!(copied, 2);
1157        assert_eq!(std::fs::read(dest.path().join("foo.txt")).unwrap(), b"foo");
1158        assert_eq!(
1159            std::fs::read(dest.path().join("dir/bar.bin")).unwrap(),
1160            b"bar"
1161        );
1162    }
1163
1164    #[test]
1165    fn test_snapshot_workspace_tar_zst() {
1166        let src = TempDir::new().unwrap();
1167        std::fs::create_dir_all(src.path().join("subdir")).unwrap();
1168        std::fs::write(src.path().join("file.txt"), "content").unwrap();
1169        std::fs::write(src.path().join("subdir/nested.txt"), "nested").unwrap();
1170
1171        let dst = TempDir::new().unwrap();
1172        let archive_path = dst.path().join("archive.tar.zst");
1173
1174        snapshot_workspace_tar_zst(src.path(), &archive_path).unwrap();
1175        assert!(archive_path.exists());
1176        // Verify the archive is non-empty
1177        let metadata = std::fs::metadata(&archive_path).unwrap();
1178        assert!(metadata.len() > 0);
1179    }
1180}