Skip to main content

cuenv_cache/
tasks.rs

1//! Task result caching with content-addressed storage
2
3use crate::{Error, Result};
4use chrono::{DateTime, Utc};
5use dirs::{cache_dir, home_dir};
6use serde::{Deserialize, Serialize};
7use sha2::{Digest, Sha256};
8use std::collections::BTreeMap;
9use std::fs;
10use std::path::{Path, PathBuf};
11
12/// Entry in the output file index
13#[derive(Debug, Clone, Serialize, Deserialize)]
14pub struct OutputIndexEntry {
15    /// Relative path within output directory
16    pub rel_path: String,
17    /// File size in bytes
18    pub size: u64,
19    /// SHA256 hash of file contents
20    pub sha256: String,
21}
22
23/// Metadata about a cached task result
24#[derive(Debug, Clone, Serialize, Deserialize)]
25pub struct TaskResultMeta {
26    /// Name of the task
27    pub task_name: String,
28    /// Command that was executed
29    pub command: String,
30    /// Arguments passed to the command
31    pub args: Vec<String>,
32    /// Summary of environment variables (non-secret)
33    pub env_summary: BTreeMap<String, String>,
34    /// Summary of input file hashes
35    pub inputs_summary: BTreeMap<String, String>,
36    /// When the result was created
37    pub created_at: DateTime<Utc>,
38    /// Version of cuenv that created this cache entry
39    pub cuenv_version: String,
40    /// Platform identifier
41    pub platform: String,
42    /// Execution duration in milliseconds
43    pub duration_ms: u128,
44    /// Exit code of the command
45    pub exit_code: i32,
46    /// Full cache key envelope for debugging
47    pub cache_key_envelope: serde_json::Value,
48    /// Index of output files
49    pub output_index: Vec<OutputIndexEntry>,
50}
51
52/// A resolved cache entry
53#[derive(Debug, Clone)]
54pub struct CacheEntry {
55    /// The cache key
56    pub key: String,
57    /// Path to the cache entry directory
58    pub path: PathBuf,
59}
60
61/// Inputs for determining cache root directory
62#[derive(Debug, Clone)]
63struct CacheInputs {
64    cuenv_cache_dir: Option<PathBuf>,
65    xdg_cache_home: Option<PathBuf>,
66    os_cache_dir: Option<PathBuf>,
67    home_dir: Option<PathBuf>,
68    temp_dir: PathBuf,
69}
70
71fn cache_root_from_inputs(inputs: CacheInputs) -> Result<PathBuf> {
72    // Resolution order (first writable wins):
73    // 1) CUENV_CACHE_DIR (explicit override)
74    // 2) XDG_CACHE_HOME/cuenv/tasks
75    // 3) OS cache dir/cuenv/tasks
76    // 4) ~/.cuenv/cache/tasks (legacy)
77    // 5) TMPDIR/cuenv/cache/tasks (fallback)
78    let mut candidates: Vec<PathBuf> = Vec::new();
79
80    if let Some(dir) = inputs.cuenv_cache_dir.filter(|p| !p.as_os_str().is_empty()) {
81        candidates.push(dir);
82    }
83    if let Some(xdg) = inputs.xdg_cache_home {
84        candidates.push(xdg.join("cuenv/tasks"));
85    }
86    if let Some(os_cache) = inputs.os_cache_dir {
87        candidates.push(os_cache.join("cuenv/tasks"));
88    }
89    if let Some(home) = inputs.home_dir {
90        candidates.push(home.join(".cuenv/cache/tasks"));
91    }
92    candidates.push(inputs.temp_dir.join("cuenv/cache/tasks"));
93
94    for path in candidates {
95        if path.starts_with("/homeless-shelter") {
96            continue;
97        }
98        // If the path already exists, ensure it is writable; some CI environments
99        // provide read-only cache directories under $HOME.
100        if path.exists() {
101            let probe = path.join(".write_probe");
102            match std::fs::OpenOptions::new()
103                .create(true)
104                .truncate(true)
105                .write(true)
106                .open(&probe)
107            {
108                Ok(_) => {
109                    let _ = std::fs::remove_file(&probe);
110                    return Ok(path);
111                }
112                Err(_) => {
113                    // Not writable, try next candidate
114                    continue;
115                }
116            }
117        }
118        if std::fs::create_dir_all(&path).is_ok() {
119            return Ok(path);
120        }
121        // Permission denied or other errors - try next candidate
122    }
123    Err(Error::configuration(
124        "Failed to determine a writable cache directory",
125    ))
126}
127
128fn cache_root() -> Result<PathBuf> {
129    let inputs = CacheInputs {
130        cuenv_cache_dir: std::env::var("CUENV_CACHE_DIR")
131            .ok()
132            .filter(|s| !s.trim().is_empty())
133            .map(PathBuf::from),
134        xdg_cache_home: std::env::var("XDG_CACHE_HOME")
135            .ok()
136            .filter(|s| !s.trim().is_empty())
137            .map(PathBuf::from),
138        os_cache_dir: cache_dir(),
139        home_dir: home_dir(),
140        temp_dir: std::env::temp_dir(),
141    };
142    cache_root_from_inputs(inputs)
143}
144
145/// Convert a cache key to its storage path
146pub fn key_to_path(key: &str, root: Option<&Path>) -> Result<PathBuf> {
147    let base = if let Some(r) = root {
148        r.to_path_buf()
149    } else {
150        cache_root()?
151    };
152    Ok(base.join(key))
153}
154
155/// Look up a cache entry by key
156#[must_use]
157pub fn lookup(key: &str, root: Option<&Path>) -> Option<CacheEntry> {
158    let Ok(path) = key_to_path(key, root) else {
159        return None;
160    };
161    if path.exists() {
162        Some(CacheEntry {
163            key: key.to_string(),
164            path,
165        })
166    } else {
167        None
168    }
169}
170
171/// Task execution logs
172pub struct TaskLogs {
173    /// Standard output from task
174    pub stdout: Option<String>,
175    /// Standard error from task
176    pub stderr: Option<String>,
177}
178
179/// All data needed to save a task result to the cache.
180pub struct SaveResultData<'a> {
181    /// Cache key for this result
182    pub key: &'a str,
183    /// Metadata about the task execution
184    pub meta: &'a TaskResultMeta,
185    /// Directory containing task output files
186    pub outputs_root: &'a Path,
187    /// Directory containing the hermetic workspace snapshot
188    pub hermetic_root: &'a Path,
189    /// Captured stdout/stderr logs
190    pub logs: &'a TaskLogs,
191    /// Optional override for the cache root directory
192    pub root: Option<&'a Path>,
193}
194
195/// Save a task result to the cache
196pub fn save_result(data: &SaveResultData<'_>) -> Result<()> {
197    let key = data.key;
198    let meta = data.meta;
199    let outputs_root = data.outputs_root;
200    let hermetic_root = data.hermetic_root;
201    let logs = data.logs;
202    let root = data.root;
203    let path = key_to_path(key, root)?;
204    fs::create_dir_all(&path).map_err(|e| Error::io(e, &path, "create_dir_all"))?;
205
206    // metadata.json
207    let meta_path = path.join("metadata.json");
208    let json = serde_json::to_vec_pretty(meta)
209        .map_err(|e| Error::serialization(format!("Failed to serialize metadata: {e}")))?;
210    fs::write(&meta_path, json).map_err(|e| Error::io(e, &meta_path, "write"))?;
211
212    // outputs/
213    let out_dir = path.join("outputs");
214    fs::create_dir_all(&out_dir).map_err(|e| Error::io(e, &out_dir, "create_dir_all"))?;
215    // Copy tree from outputs_root (already collected) if exists
216    if outputs_root.exists() {
217        for entry in walkdir::WalkDir::new(outputs_root)
218            .into_iter()
219            .filter_map(|e| e.ok())
220        {
221            let p = entry.path();
222            if p.is_dir() {
223                continue;
224            }
225            let rel = p.strip_prefix(outputs_root).map_err(|_| {
226                Error::configuration(format!(
227                    "path {} is not under outputs_root {}",
228                    p.display(),
229                    outputs_root.display()
230                ))
231            })?;
232            let dst = out_dir.join(rel);
233            if let Some(parent) = dst.parent() {
234                fs::create_dir_all(parent).ok();
235            }
236            fs::copy(p, &dst).map_err(|e| Error::io(e, &dst, "copy"))?;
237        }
238    }
239
240    // logs/ - redact secrets before writing to disk
241    let logs_dir = path.join("logs");
242    fs::create_dir_all(&logs_dir).ok();
243    if let Some(s) = logs.stdout.as_ref() {
244        let redacted = cuenv_events::redact(s);
245        let _ = fs::write(logs_dir.join("stdout.log"), redacted);
246    }
247    if let Some(s) = logs.stderr.as_ref() {
248        let redacted = cuenv_events::redact(s);
249        let _ = fs::write(logs_dir.join("stderr.log"), redacted);
250    }
251
252    // workspace snapshot
253    let snapshot = path.join("workspace.tar.zst");
254    snapshot_workspace_tar_zst(hermetic_root, &snapshot)?;
255
256    Ok(())
257}
258
259/// Materialize cached outputs to a destination directory
260pub fn materialize_outputs(key: &str, destination: &Path, root: Option<&Path>) -> Result<usize> {
261    let entry = lookup(key, root).ok_or_else(|| Error::not_found(key))?;
262    let out_dir = entry.path.join("outputs");
263    if !out_dir.exists() {
264        return Ok(0);
265    }
266    let mut count = 0usize;
267    for e in walkdir::WalkDir::new(&out_dir)
268        .into_iter()
269        .filter_map(|e| e.ok())
270    {
271        let p = e.path();
272        if p.is_dir() {
273            continue;
274        }
275        let rel = p.strip_prefix(&out_dir).map_err(|_| {
276            Error::configuration(format!(
277                "path {} is not under out_dir {}",
278                p.display(),
279                out_dir.display()
280            ))
281        })?;
282        let dst = destination.join(rel);
283        if let Some(parent) = dst.parent() {
284            fs::create_dir_all(parent).ok();
285        }
286        fs::copy(p, &dst).map_err(|e| Error::io(e, &dst, "copy"))?;
287        count += 1;
288    }
289    Ok(count)
290}
291
292/// Index mapping task names to their latest cache keys (per project)
293#[derive(Debug, Clone, Serialize, Deserialize, Default)]
294pub struct TaskLatestIndex {
295    /// Map of (project_root_hash, task_name) -> cache_key
296    pub entries: BTreeMap<String, BTreeMap<String, String>>,
297}
298
299fn latest_index_path(root: Option<&Path>) -> Result<PathBuf> {
300    let base = if let Some(r) = root {
301        r.to_path_buf()
302    } else {
303        cache_root()?
304    };
305    Ok(base.join("task-latest.json"))
306}
307
308fn project_hash(project_root: &Path) -> String {
309    let digest = Sha256::digest(project_root.to_string_lossy().as_bytes());
310    hex::encode(&digest[..8])
311}
312
313/// Record the latest cache key for a task in a project
314pub fn record_latest(
315    project_root: &Path,
316    task_name: &str,
317    cache_key: &str,
318    root: Option<&Path>,
319) -> Result<()> {
320    let path = latest_index_path(root)?;
321    let mut index: TaskLatestIndex = if path.exists() {
322        let content = fs::read_to_string(&path).unwrap_or_default();
323        serde_json::from_str(&content).unwrap_or_default()
324    } else {
325        TaskLatestIndex::default()
326    };
327
328    let proj_hash = project_hash(project_root);
329    index
330        .entries
331        .entry(proj_hash)
332        .or_default()
333        .insert(task_name.to_string(), cache_key.to_string());
334
335    let json = serde_json::to_string_pretty(&index)
336        .map_err(|e| Error::serialization(format!("Failed to serialize latest index: {e}")))?;
337    if let Some(parent) = path.parent() {
338        fs::create_dir_all(parent).ok();
339    }
340    fs::write(&path, json).map_err(|e| Error::io(e, &path, "write"))?;
341    Ok(())
342}
343
344/// Look up the latest cache key for a task in a project
345#[must_use]
346pub fn lookup_latest(project_root: &Path, task_name: &str, root: Option<&Path>) -> Option<String> {
347    let path = latest_index_path(root).ok()?;
348    if !path.exists() {
349        return None;
350    }
351    let content = fs::read_to_string(&path).ok()?;
352    let index: TaskLatestIndex = serde_json::from_str(&content).ok()?;
353    let proj_hash = project_hash(project_root);
354    index.entries.get(&proj_hash)?.get(task_name).cloned()
355}
356
357/// Retrieve all latest cache keys for a given project
358pub fn get_project_cache_keys(
359    project_root: &Path,
360    root: Option<&Path>,
361) -> Result<Option<BTreeMap<String, String>>> {
362    let path = latest_index_path(root)?;
363    if !path.exists() {
364        return Ok(None);
365    }
366    let content = fs::read_to_string(&path).map_err(|e| Error::io(e, &path, "read"))?;
367    let index: TaskLatestIndex = serde_json::from_str(&content)
368        .map_err(|e| Error::serialization(format!("Failed to parse task index: {e}")))?;
369    let proj_hash = project_hash(project_root);
370    Ok(index.entries.get(&proj_hash).cloned())
371}
372
373/// Cache key envelope for computing deterministic cache keys
374#[derive(Debug, Clone, Serialize, Deserialize)]
375pub struct CacheKeyEnvelope {
376    /// Input file hashes
377    pub inputs: BTreeMap<String, String>,
378    /// Command to execute
379    pub command: String,
380    /// Command arguments
381    pub args: Vec<String>,
382    /// Shell configuration
383    pub shell: Option<serde_json::Value>,
384    /// Environment variables
385    pub env: BTreeMap<String, String>,
386    /// cuenv version
387    pub cuenv_version: String,
388    /// Platform identifier
389    pub platform: String,
390    /// Hashes of the workspace lockfiles (key = workspace name)
391    #[serde(skip_serializing_if = "Option::is_none")]
392    pub workspace_lockfile_hashes: Option<BTreeMap<String, String>>,
393    /// Hashes of workspace member packages (if relevant)
394    #[serde(skip_serializing_if = "Option::is_none")]
395    pub workspace_package_hashes: Option<BTreeMap<String, String>>,
396}
397
398/// Compute a deterministic cache key from the envelope
399pub fn compute_cache_key(envelope: &CacheKeyEnvelope) -> Result<(String, serde_json::Value)> {
400    // Canonical JSON with sorted keys (BTreeMap ensures deterministic ordering for maps)
401    let json = serde_json::to_value(envelope)
402        .map_err(|e| Error::serialization(format!("Failed to encode envelope: {e}")))?;
403    let bytes = serde_json::to_vec(&json)
404        .map_err(|e| Error::serialization(format!("Failed to serialize envelope: {e}")))?;
405    let digest = Sha256::digest(bytes);
406    Ok((hex::encode(digest), json))
407}
408
409/// Create a compressed tar archive of a workspace directory
410pub fn snapshot_workspace_tar_zst(src_root: &Path, dst_file: &Path) -> Result<()> {
411    let file = fs::File::create(dst_file).map_err(|e| Error::io(e, dst_file, "create"))?;
412    let enc = zstd::Encoder::new(file, 3)
413        .map_err(|e| Error::configuration(format!("zstd encoder error: {e}")))?;
414    let mut builder = tar::Builder::new(enc);
415
416    match builder.append_dir_all(".", src_root) {
417        Ok(()) => {}
418        Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
419            // Workspace contents can legitimately disappear during a task (e.g.
420            // package managers removing temp files). Skip snapshotting instead
421            // of failing the whole task cache write.
422            let _ = fs::remove_file(dst_file);
423            tracing::warn!(
424                root = %src_root.display(),
425                "Skipping workspace snapshot; files disappeared during archive: {e}"
426            );
427            return Ok(());
428        }
429        Err(e) => {
430            return Err(Error::configuration(format!("tar append failed: {e}")));
431        }
432    }
433
434    let enc = builder
435        .into_inner()
436        .map_err(|e| Error::configuration(format!("tar finalize failed: {e}")))?;
437    enc.finish()
438        .map_err(|e| Error::configuration(format!("zstd finish failed: {e}")))?;
439    Ok(())
440}
441
442#[cfg(test)]
443mod tests {
444    use super::*;
445    use std::path::PathBuf;
446    use tempfile::TempDir;
447
448    #[allow(dead_code, unsafe_code)]
449    struct EnvVarGuard {
450        key: String,
451        prev: Option<String>,
452    }
453
454    impl EnvVarGuard {
455        #[allow(dead_code, unsafe_code)]
456        fn set<K: Into<String>, V: Into<String>>(key: K, value: V) -> Self {
457            let key_s = key.into();
458            let prev = std::env::var(&key_s).ok();
459            // Rust 2024 makes env mutation unsafe; this test confines changes to the current thread
460            // and restores previous values via Drop.
461            unsafe {
462                std::env::set_var(&key_s, value.into());
463            }
464            Self { key: key_s, prev }
465        }
466    }
467
468    #[allow(unsafe_code)]
469    impl Drop for EnvVarGuard {
470        fn drop(&mut self) {
471            if let Some(ref v) = self.prev {
472                unsafe {
473                    std::env::set_var(&self.key, v);
474                }
475            } else {
476                unsafe {
477                    std::env::remove_var(&self.key);
478                }
479            }
480        }
481    }
482
483    // ==========================================================================
484    // OutputIndexEntry tests
485    // ==========================================================================
486
487    #[test]
488    fn test_output_index_entry_serde() {
489        let entry = OutputIndexEntry {
490            rel_path: "output/file.txt".to_string(),
491            size: 1024,
492            sha256: "abc123".to_string(),
493        };
494
495        let json = serde_json::to_string(&entry).unwrap();
496        let parsed: OutputIndexEntry = serde_json::from_str(&json).unwrap();
497
498        assert_eq!(parsed.rel_path, "output/file.txt");
499        assert_eq!(parsed.size, 1024);
500        assert_eq!(parsed.sha256, "abc123");
501    }
502
503    #[test]
504    fn test_output_index_entry_clone() {
505        let entry = OutputIndexEntry {
506            rel_path: "test.txt".to_string(),
507            size: 100,
508            sha256: "hash".to_string(),
509        };
510
511        let cloned = entry.clone();
512        assert_eq!(cloned.rel_path, "test.txt");
513    }
514
515    // ==========================================================================
516    // TaskResultMeta tests
517    // ==========================================================================
518
519    #[test]
520    fn test_task_result_meta_serde() {
521        let meta = TaskResultMeta {
522            task_name: "build".to_string(),
523            command: "cargo".to_string(),
524            args: vec!["build".to_string()],
525            env_summary: BTreeMap::new(),
526            inputs_summary: BTreeMap::new(),
527            created_at: chrono::Utc::now(),
528            cuenv_version: "0.1.0".to_string(),
529            platform: "linux-x86_64".to_string(),
530            duration_ms: 5000,
531            exit_code: 0,
532            cache_key_envelope: serde_json::json!({}),
533            output_index: vec![],
534        };
535
536        let json = serde_json::to_string(&meta).unwrap();
537        let parsed: TaskResultMeta = serde_json::from_str(&json).unwrap();
538
539        assert_eq!(parsed.task_name, "build");
540        assert_eq!(parsed.command, "cargo");
541        assert_eq!(parsed.exit_code, 0);
542    }
543
544    #[test]
545    fn test_task_result_meta_with_env() {
546        let mut env_summary = BTreeMap::new();
547        env_summary.insert("RUST_LOG".to_string(), "debug".to_string());
548
549        let meta = TaskResultMeta {
550            task_name: "test".to_string(),
551            command: "cargo".to_string(),
552            args: vec!["test".to_string()],
553            env_summary,
554            inputs_summary: BTreeMap::new(),
555            created_at: chrono::Utc::now(),
556            cuenv_version: "0.1.0".to_string(),
557            platform: "linux-x86_64".to_string(),
558            duration_ms: 10000,
559            exit_code: 0,
560            cache_key_envelope: serde_json::json!({}),
561            output_index: vec![],
562        };
563
564        assert_eq!(meta.env_summary.len(), 1);
565        assert_eq!(meta.env_summary.get("RUST_LOG"), Some(&"debug".to_string()));
566    }
567
568    // ==========================================================================
569    // CacheEntry tests
570    // ==========================================================================
571
572    #[test]
573    fn test_cache_entry_fields() {
574        let entry = CacheEntry {
575            key: "abc123".to_string(),
576            path: PathBuf::from("/cache/abc123"),
577        };
578
579        assert_eq!(entry.key, "abc123");
580        assert_eq!(entry.path, PathBuf::from("/cache/abc123"));
581    }
582
583    #[test]
584    fn test_cache_entry_clone() {
585        let entry = CacheEntry {
586            key: "key".to_string(),
587            path: PathBuf::from("/path"),
588        };
589
590        let cloned = entry.clone();
591        assert_eq!(cloned.key, "key");
592    }
593
594    // ==========================================================================
595    // TaskLatestIndex tests
596    // ==========================================================================
597
598    #[test]
599    fn test_task_latest_index_default() {
600        let index = TaskLatestIndex::default();
601        assert!(index.entries.is_empty());
602    }
603
604    #[test]
605    fn test_task_latest_index_serde() {
606        let mut index = TaskLatestIndex::default();
607        let mut tasks = BTreeMap::new();
608        tasks.insert("build".to_string(), "key123".to_string());
609        index.entries.insert("project_hash".to_string(), tasks);
610
611        let json = serde_json::to_string(&index).unwrap();
612        let parsed: TaskLatestIndex = serde_json::from_str(&json).unwrap();
613
614        assert!(parsed.entries.contains_key("project_hash"));
615    }
616
617    // ==========================================================================
618    // CacheKeyEnvelope tests
619    // ==========================================================================
620
621    #[test]
622    fn test_cache_key_envelope_serde() {
623        let envelope = CacheKeyEnvelope {
624            inputs: BTreeMap::from([("file.txt".to_string(), "hash1".to_string())]),
625            command: "echo".to_string(),
626            args: vec!["hello".to_string()],
627            shell: None,
628            env: BTreeMap::new(),
629            cuenv_version: "0.1.0".to_string(),
630            platform: "linux".to_string(),
631            workspace_lockfile_hashes: None,
632            workspace_package_hashes: None,
633        };
634
635        let json = serde_json::to_string(&envelope).unwrap();
636        let parsed: CacheKeyEnvelope = serde_json::from_str(&json).unwrap();
637
638        assert_eq!(parsed.command, "echo");
639    }
640
641    #[test]
642    fn test_cache_key_envelope_with_optional_fields() {
643        let envelope = CacheKeyEnvelope {
644            inputs: BTreeMap::new(),
645            command: "npm".to_string(),
646            args: vec!["install".to_string()],
647            shell: Some(serde_json::json!({"type": "bash"})),
648            env: BTreeMap::new(),
649            cuenv_version: "0.1.0".to_string(),
650            platform: "darwin".to_string(),
651            workspace_lockfile_hashes: Some(BTreeMap::from([(
652                "npm".to_string(),
653                "lockfile_hash".to_string(),
654            )])),
655            workspace_package_hashes: Some(BTreeMap::from([(
656                "pkg".to_string(),
657                "pkg_hash".to_string(),
658            )])),
659        };
660
661        let json = serde_json::to_string(&envelope).unwrap();
662        assert!(json.contains("workspace_lockfile_hashes"));
663        assert!(json.contains("workspace_package_hashes"));
664    }
665
666    // ==========================================================================
667    // key_to_path tests
668    // ==========================================================================
669
670    #[test]
671    fn test_key_to_path_with_root() {
672        let temp = TempDir::new().unwrap();
673        let path = key_to_path("mykey", Some(temp.path())).unwrap();
674        assert!(path.ends_with("mykey"));
675        assert!(path.starts_with(temp.path()));
676    }
677
678    // ==========================================================================
679    // lookup tests
680    // ==========================================================================
681
682    #[test]
683    fn test_lookup_not_found() {
684        let temp = TempDir::new().unwrap();
685        let result = lookup("nonexistent", Some(temp.path()));
686        assert!(result.is_none());
687    }
688
689    #[test]
690    fn test_lookup_found() {
691        let temp = TempDir::new().unwrap();
692        let key_dir = temp.path().join("mykey");
693        fs::create_dir_all(&key_dir).unwrap();
694
695        let result = lookup("mykey", Some(temp.path()));
696        assert!(result.is_some());
697        let entry = result.unwrap();
698        assert_eq!(entry.key, "mykey");
699    }
700
701    // ==========================================================================
702    // record_latest and lookup_latest tests
703    // ==========================================================================
704
705    #[test]
706    fn test_record_and_lookup_latest() {
707        let temp = TempDir::new().unwrap();
708        let project_root = temp.path().join("project");
709        fs::create_dir_all(&project_root).unwrap();
710
711        record_latest(&project_root, "build", "key123", Some(temp.path())).unwrap();
712
713        let result = lookup_latest(&project_root, "build", Some(temp.path()));
714        assert_eq!(result, Some("key123".to_string()));
715    }
716
717    #[test]
718    fn test_lookup_latest_not_found() {
719        let temp = TempDir::new().unwrap();
720        let project_root = temp.path().join("project");
721
722        let result = lookup_latest(&project_root, "nonexistent", Some(temp.path()));
723        assert!(result.is_none());
724    }
725
726    #[test]
727    fn test_record_latest_overwrites() {
728        let temp = TempDir::new().unwrap();
729        let project_root = temp.path().join("project");
730        fs::create_dir_all(&project_root).unwrap();
731
732        record_latest(&project_root, "build", "key1", Some(temp.path())).unwrap();
733        record_latest(&project_root, "build", "key2", Some(temp.path())).unwrap();
734
735        let result = lookup_latest(&project_root, "build", Some(temp.path()));
736        assert_eq!(result, Some("key2".to_string()));
737    }
738
739    // ==========================================================================
740    // get_project_cache_keys tests
741    // ==========================================================================
742
743    #[test]
744    fn test_get_project_cache_keys_empty() {
745        let temp = TempDir::new().unwrap();
746        let project_root = temp.path().join("project");
747
748        let result = get_project_cache_keys(&project_root, Some(temp.path())).unwrap();
749        assert!(result.is_none());
750    }
751
752    #[test]
753    fn test_get_project_cache_keys_with_data() {
754        let temp = TempDir::new().unwrap();
755        let project_root = temp.path().join("project");
756        fs::create_dir_all(&project_root).unwrap();
757
758        record_latest(&project_root, "build", "key1", Some(temp.path())).unwrap();
759        record_latest(&project_root, "test", "key2", Some(temp.path())).unwrap();
760
761        let result = get_project_cache_keys(&project_root, Some(temp.path()))
762            .unwrap()
763            .unwrap();
764        assert_eq!(result.len(), 2);
765        assert_eq!(result.get("build"), Some(&"key1".to_string()));
766        assert_eq!(result.get("test"), Some(&"key2".to_string()));
767    }
768
769    // ==========================================================================
770    // compute_cache_key tests
771    // ==========================================================================
772
773    #[test]
774    fn cache_key_is_deterministic_and_order_invariant() {
775        let mut env_a = BTreeMap::new();
776        env_a.insert("A".to_string(), "1".to_string());
777        env_a.insert("B".to_string(), "2".to_string());
778        let mut inputs1 = BTreeMap::new();
779        inputs1.insert("b.txt".to_string(), "hashb".to_string());
780        inputs1.insert("a.txt".to_string(), "hasha".to_string());
781        let e1 = CacheKeyEnvelope {
782            inputs: inputs1,
783            command: "echo".into(),
784            args: vec!["hi".into()],
785            shell: None,
786            env: env_a.clone(),
787            cuenv_version: "0.1.1".into(),
788            platform: "linux-x86_64".into(),
789            workspace_lockfile_hashes: None,
790            workspace_package_hashes: None,
791        };
792        let (k1, _) = compute_cache_key(&e1).unwrap();
793
794        // Same data but different insertion orders
795        let mut env_b = BTreeMap::new();
796        env_b.insert("B".to_string(), "2".to_string());
797        env_b.insert("A".to_string(), "1".to_string());
798        let mut inputs2 = BTreeMap::new();
799        inputs2.insert("a.txt".to_string(), "hasha".to_string());
800        inputs2.insert("b.txt".to_string(), "hashb".to_string());
801        let e2 = CacheKeyEnvelope {
802            inputs: inputs2,
803            command: "echo".into(),
804            args: vec!["hi".into()],
805            shell: None,
806            env: env_b,
807            cuenv_version: "0.1.1".into(),
808            platform: "linux-x86_64".into(),
809            workspace_lockfile_hashes: None,
810            workspace_package_hashes: None,
811        };
812        let (k2, _) = compute_cache_key(&e2).unwrap();
813
814        assert_eq!(k1, k2);
815    }
816
817    // ==========================================================================
818    // Cache Invalidation Behavioral Tests
819    // ==========================================================================
820    // These tests verify the behavioral contracts around cache invalidation:
821    // When any component of the cache key changes, the key MUST change.
822
823    /// Helper to create a baseline envelope for invalidation tests
824    fn baseline_envelope() -> CacheKeyEnvelope {
825        CacheKeyEnvelope {
826            inputs: BTreeMap::from([
827                ("src/main.rs".to_string(), "abc123".to_string()),
828                ("Cargo.toml".to_string(), "def456".to_string()),
829            ]),
830            command: "cargo".to_string(),
831            args: vec!["build".to_string(), "--release".to_string()],
832            shell: None,
833            env: BTreeMap::from([
834                ("RUST_LOG".to_string(), "debug".to_string()),
835                ("CC".to_string(), "clang".to_string()),
836            ]),
837            cuenv_version: "1.0.0".to_string(),
838            platform: "linux-x86_64".to_string(),
839            workspace_lockfile_hashes: None,
840            workspace_package_hashes: None,
841        }
842    }
843
844    #[test]
845    fn cache_invalidates_when_input_file_content_changes() {
846        // Given: A task with specific input file hashes
847        let base = baseline_envelope();
848        let (base_key, _) = compute_cache_key(&base).unwrap();
849
850        // When: An input file's content changes (different hash)
851        let mut modified = base.clone();
852        modified
853            .inputs
854            .insert("src/main.rs".to_string(), "changed_hash".to_string());
855        let (new_key, _) = compute_cache_key(&modified).unwrap();
856
857        // Then: Cache key must be different (cache is invalidated)
858        assert_ne!(
859            base_key, new_key,
860            "Cache must invalidate when input file content changes"
861        );
862    }
863
864    #[test]
865    fn cache_invalidates_when_new_input_file_added() {
866        // Given: A task with specific inputs
867        let base = baseline_envelope();
868        let (base_key, _) = compute_cache_key(&base).unwrap();
869
870        // When: A new input file is added
871        let mut modified = base.clone();
872        modified
873            .inputs
874            .insert("src/lib.rs".to_string(), "new_file_hash".to_string());
875        let (new_key, _) = compute_cache_key(&modified).unwrap();
876
877        // Then: Cache key must be different
878        assert_ne!(
879            base_key, new_key,
880            "Cache must invalidate when new input file is added"
881        );
882    }
883
884    #[test]
885    fn cache_invalidates_when_input_file_removed() {
886        // Given: A task with specific inputs
887        let base = baseline_envelope();
888        let (base_key, _) = compute_cache_key(&base).unwrap();
889
890        // When: An input file is removed
891        let mut modified = base.clone();
892        modified.inputs.remove("src/main.rs");
893        let (new_key, _) = compute_cache_key(&modified).unwrap();
894
895        // Then: Cache key must be different
896        assert_ne!(
897            base_key, new_key,
898            "Cache must invalidate when input file is removed"
899        );
900    }
901
902    #[test]
903    fn cache_invalidates_when_command_changes() {
904        // Given: A task with a specific command
905        let base = baseline_envelope();
906        let (base_key, _) = compute_cache_key(&base).unwrap();
907
908        // When: The command changes
909        let mut modified = base.clone();
910        modified.command = "rustc".to_string();
911        let (new_key, _) = compute_cache_key(&modified).unwrap();
912
913        // Then: Cache key must be different
914        assert_ne!(
915            base_key, new_key,
916            "Cache must invalidate when command changes"
917        );
918    }
919
920    #[test]
921    fn cache_invalidates_when_args_change() {
922        // Given: A task with specific arguments
923        let base = baseline_envelope();
924        let (base_key, _) = compute_cache_key(&base).unwrap();
925
926        // When: Arguments change
927        let mut modified = base.clone();
928        modified.args = vec!["build".to_string()]; // removed --release
929        let (new_key, _) = compute_cache_key(&modified).unwrap();
930
931        // Then: Cache key must be different
932        assert_ne!(
933            base_key, new_key,
934            "Cache must invalidate when command arguments change"
935        );
936    }
937
938    #[test]
939    fn cache_invalidates_when_env_var_value_changes() {
940        // Given: A task with specific environment variables
941        let base = baseline_envelope();
942        let (base_key, _) = compute_cache_key(&base).unwrap();
943
944        // When: An environment variable value changes
945        let mut modified = base.clone();
946        modified
947            .env
948            .insert("RUST_LOG".to_string(), "info".to_string());
949        let (new_key, _) = compute_cache_key(&modified).unwrap();
950
951        // Then: Cache key must be different
952        assert_ne!(
953            base_key, new_key,
954            "Cache must invalidate when environment variable value changes"
955        );
956    }
957
958    #[test]
959    fn cache_invalidates_when_env_var_added() {
960        // Given: A task with specific environment variables
961        let base = baseline_envelope();
962        let (base_key, _) = compute_cache_key(&base).unwrap();
963
964        // When: A new environment variable is added
965        let mut modified = base.clone();
966        modified
967            .env
968            .insert("NEW_VAR".to_string(), "value".to_string());
969        let (new_key, _) = compute_cache_key(&modified).unwrap();
970
971        // Then: Cache key must be different
972        assert_ne!(
973            base_key, new_key,
974            "Cache must invalidate when new environment variable is added"
975        );
976    }
977
978    #[test]
979    fn cache_invalidates_when_platform_changes() {
980        // Given: A task built for a specific platform
981        let base = baseline_envelope();
982        let (base_key, _) = compute_cache_key(&base).unwrap();
983
984        // When: The platform changes (cross-compilation or different machine)
985        let mut modified = base.clone();
986        modified.platform = "darwin-aarch64".to_string();
987        let (new_key, _) = compute_cache_key(&modified).unwrap();
988
989        // Then: Cache key must be different
990        assert_ne!(
991            base_key, new_key,
992            "Cache must invalidate when platform changes"
993        );
994    }
995
996    #[test]
997    fn cache_invalidates_when_cuenv_version_changes() {
998        // Given: A task built with a specific cuenv version
999        let base = baseline_envelope();
1000        let (base_key, _) = compute_cache_key(&base).unwrap();
1001
1002        // When: cuenv version changes (may affect execution semantics)
1003        let mut modified = base.clone();
1004        modified.cuenv_version = "2.0.0".to_string();
1005        let (new_key, _) = compute_cache_key(&modified).unwrap();
1006
1007        // Then: Cache key must be different
1008        assert_ne!(
1009            base_key, new_key,
1010            "Cache must invalidate when cuenv version changes"
1011        );
1012    }
1013
1014    #[test]
1015    fn cache_invalidates_when_workspace_lockfile_changes() {
1016        // Given: A task with no workspace lockfile hashes
1017        let base = baseline_envelope();
1018        let (base_key, _) = compute_cache_key(&base).unwrap();
1019
1020        // When: Workspace lockfile is added or changes
1021        let mut modified = base.clone();
1022        modified.workspace_lockfile_hashes = Some(BTreeMap::from([(
1023            "cargo".to_string(),
1024            "lockfile_hash_123".to_string(),
1025        )]));
1026        let (new_key, _) = compute_cache_key(&modified).unwrap();
1027
1028        // Then: Cache key must be different
1029        assert_ne!(
1030            base_key, new_key,
1031            "Cache must invalidate when workspace lockfile changes"
1032        );
1033    }
1034
1035    #[test]
1036    fn cache_stable_when_nothing_changes() {
1037        // Given: A task configuration
1038        let envelope = baseline_envelope();
1039
1040        // When: We compute the cache key multiple times
1041        let (key1, _) = compute_cache_key(&envelope).unwrap();
1042        let (key2, _) = compute_cache_key(&envelope).unwrap();
1043        let (key3, _) = compute_cache_key(&envelope).unwrap();
1044
1045        // Then: All keys should be identical (cache hits work correctly)
1046        assert_eq!(key1, key2, "Cache key must be stable across calls");
1047        assert_eq!(key2, key3, "Cache key must be stable across calls");
1048    }
1049
1050    #[test]
1051    fn cache_root_skips_homeless_shelter() {
1052        let tmp = std::env::temp_dir();
1053        let inputs = CacheInputs {
1054            cuenv_cache_dir: None,
1055            xdg_cache_home: Some(PathBuf::from("/homeless-shelter/.cache")),
1056            os_cache_dir: None,
1057            home_dir: Some(PathBuf::from("/homeless-shelter")),
1058            temp_dir: tmp.clone(),
1059        };
1060        let dir =
1061            cache_root_from_inputs(inputs).expect("cache_root should choose a writable fallback");
1062        assert!(!dir.starts_with("/homeless-shelter"));
1063        assert!(dir.starts_with(&tmp));
1064    }
1065
1066    #[test]
1067    fn cache_root_respects_override_env() {
1068        let tmp = std::env::temp_dir().join("cuenv-test-override");
1069        let _ = std::fs::remove_dir_all(&tmp);
1070        let inputs = CacheInputs {
1071            cuenv_cache_dir: Some(tmp.clone()),
1072            xdg_cache_home: None,
1073            os_cache_dir: None,
1074            home_dir: None,
1075            temp_dir: std::env::temp_dir(),
1076        };
1077        let dir = cache_root_from_inputs(inputs).expect("cache_root should use override");
1078        assert!(dir.starts_with(&tmp));
1079        let _ = std::fs::remove_dir_all(&tmp);
1080    }
1081
1082    #[test]
1083    fn save_and_materialize_outputs_roundtrip() {
1084        // Force cache root into a temp directory to avoid touching user dirs
1085        let cache_tmp = TempDir::new().expect("tempdir");
1086
1087        // Prepare fake outputs
1088        let outputs = TempDir::new().expect("outputs tempdir");
1089        std::fs::create_dir_all(outputs.path().join("dir")).unwrap();
1090        std::fs::write(outputs.path().join("foo.txt"), b"foo").unwrap();
1091        std::fs::write(outputs.path().join("dir/bar.bin"), b"bar").unwrap();
1092
1093        // Prepare hermetic workspace to snapshot
1094        let herm = TempDir::new().expect("hermetic tempdir");
1095        std::fs::create_dir_all(herm.path().join("work")).unwrap();
1096        std::fs::write(herm.path().join("work/a.txt"), b"a").unwrap();
1097
1098        // Minimal metadata
1099        let mut env_summary = BTreeMap::new();
1100        env_summary.insert("FOO".to_string(), "1".to_string());
1101        let inputs_summary = BTreeMap::new();
1102        let output_index = vec![
1103            OutputIndexEntry {
1104                rel_path: "foo.txt".to_string(),
1105                size: 3,
1106                sha256: {
1107                    use sha2::{Digest, Sha256};
1108                    let mut h = Sha256::new();
1109                    h.update(b"foo");
1110                    hex::encode(h.finalize())
1111                },
1112            },
1113            OutputIndexEntry {
1114                rel_path: "dir/bar.bin".to_string(),
1115                size: 3,
1116                sha256: {
1117                    use sha2::{Digest, Sha256};
1118                    let mut h = Sha256::new();
1119                    h.update(b"bar");
1120                    hex::encode(h.finalize())
1121                },
1122            },
1123        ];
1124
1125        let meta = TaskResultMeta {
1126            task_name: "unit".into(),
1127            command: "echo".into(),
1128            args: vec!["ok".into()],
1129            env_summary,
1130            inputs_summary,
1131            created_at: chrono::Utc::now(),
1132            cuenv_version: "0.0.0-test".into(),
1133            platform: std::env::consts::OS.to_string(),
1134            duration_ms: 1,
1135            exit_code: 0,
1136            cache_key_envelope: serde_json::json!({}),
1137            output_index,
1138        };
1139
1140        let logs = TaskLogs {
1141            stdout: Some("hello".into()),
1142            stderr: Some(String::new()),
1143        };
1144
1145        let key = "roundtrip-key-123";
1146        save_result(&SaveResultData {
1147            key,
1148            meta: &meta,
1149            outputs_root: outputs.path(),
1150            hermetic_root: herm.path(),
1151            logs: &logs,
1152            root: Some(cache_tmp.path()),
1153        })
1154        .expect("save_result");
1155
1156        // Verify cache layout
1157        let base = key_to_path(key, Some(cache_tmp.path())).expect("key_to_path");
1158        assert!(base.join("metadata.json").exists());
1159        assert!(base.join("outputs/foo.txt").exists());
1160        assert!(base.join("outputs/dir/bar.bin").exists());
1161        assert!(base.join("logs/stdout.log").exists());
1162        let snapshot = base.join("workspace.tar.zst");
1163        let snap_meta = std::fs::metadata(&snapshot).unwrap();
1164        assert!(snap_meta.len() > 0);
1165
1166        // Materialize into fresh destination
1167        let dest = TempDir::new().expect("dest tempdir");
1168        let copied = materialize_outputs(key, dest.path(), Some(cache_tmp.path()))
1169            .expect("materialize_outputs");
1170        assert_eq!(copied, 2);
1171        assert_eq!(std::fs::read(dest.path().join("foo.txt")).unwrap(), b"foo");
1172        assert_eq!(
1173            std::fs::read(dest.path().join("dir/bar.bin")).unwrap(),
1174            b"bar"
1175        );
1176    }
1177
1178    #[test]
1179    fn test_snapshot_workspace_tar_zst() {
1180        let src = TempDir::new().unwrap();
1181        std::fs::create_dir_all(src.path().join("subdir")).unwrap();
1182        std::fs::write(src.path().join("file.txt"), "content").unwrap();
1183        std::fs::write(src.path().join("subdir/nested.txt"), "nested").unwrap();
1184
1185        let dst = TempDir::new().unwrap();
1186        let archive_path = dst.path().join("archive.tar.zst");
1187
1188        snapshot_workspace_tar_zst(src.path(), &archive_path).unwrap();
1189        assert!(archive_path.exists());
1190        // Verify the archive is non-empty
1191        let metadata = std::fs::metadata(&archive_path).unwrap();
1192        assert!(metadata.len() > 0);
1193    }
1194}