Skip to main content

guild_cli/
cache.rs

1use std::collections::BTreeMap;
2use std::fs;
3use std::path::{Path, PathBuf};
4
5use chrono::{DateTime, Utc};
6use serde::{Deserialize, Serialize};
7use sha2::{Digest, Sha256};
8
9use crate::error::CacheError;
10use crate::graph::TaskId;
11
12/// Directory name for the cache within workspace root.
13const CACHE_DIR: &str = ".guild/cache";
14
15/// A cache entry stored for a task.
16#[derive(Debug, Clone, Serialize, Deserialize)]
17pub struct CacheEntry {
18    /// Hash of the task inputs.
19    pub input_hash: String,
20    /// When the cache entry was created.
21    pub timestamp: DateTime<Utc>,
22    /// Whether the task succeeded.
23    pub success: bool,
24    /// The command that was executed.
25    pub command: String,
26}
27
28/// Statistics about the cache.
29#[derive(Debug, Clone, Default)]
30pub struct CacheStats {
31    /// Total number of cache entries.
32    pub entry_count: usize,
33    /// Total size of the cache directory in bytes.
34    pub total_size: u64,
35    /// Number of cache hits during this run.
36    pub hits: usize,
37    /// Number of cache misses during this run.
38    pub misses: usize,
39}
40
41/// Input-based cache for task outputs.
42#[derive(Debug)]
43pub struct Cache {
44    /// Root directory of the cache (workspace_root/.guild/cache).
45    cache_dir: PathBuf,
46    /// Statistics for the current run.
47    stats: CacheStats,
48}
49
50impl Cache {
51    /// Create a new cache at the given workspace root.
52    pub fn new(workspace_root: &Path) -> Self {
53        Self {
54            cache_dir: workspace_root.join(CACHE_DIR),
55            stats: CacheStats::default(),
56        }
57    }
58
59    /// Get the path to a cache entry file for a task.
60    fn entry_path(&self, task_id: &TaskId) -> PathBuf {
61        let key = format!("{}:{}", task_id.project(), task_id.target());
62        let mut hasher = Sha256::new();
63        hasher.update(key.as_bytes());
64        let hash = format!("{:x}", hasher.finalize());
65        self.cache_dir.join(format!("{hash}.json"))
66    }
67
68    /// Compute the input hash for a task.
69    ///
70    /// The hash is computed from:
71    /// - The target command
72    /// - Sorted input file contents (matched by glob patterns)
73    /// - Dependency cache keys (for transitivity)
74    pub fn compute_input_hash(
75        &self,
76        command: &str,
77        project_root: &Path,
78        input_patterns: &[String],
79        dependency_hashes: &[String],
80    ) -> Result<String, CacheError> {
81        let mut hasher = Sha256::new();
82
83        // Hash the command
84        hasher.update(command.as_bytes());
85        hasher.update(b"\0");
86
87        // Hash input files sorted by path
88        let mut file_hashes: BTreeMap<PathBuf, String> = BTreeMap::new();
89
90        for pattern in input_patterns {
91            let full_pattern = project_root.join(pattern);
92            let pattern_str = full_pattern.to_string_lossy();
93
94            let entries = glob::glob(&pattern_str).map_err(|e| CacheError::GlobPattern {
95                pattern: pattern.clone(),
96                source: e,
97            })?;
98
99            for entry in entries {
100                let path = entry.map_err(|e| CacheError::GlobEntry { source: e })?;
101                if path.is_file() {
102                    let content = fs::read(&path).map_err(|e| CacheError::ReadFile {
103                        path: path.clone(),
104                        source: e,
105                    })?;
106                    let mut file_hasher = Sha256::new();
107                    file_hasher.update(&content);
108                    let file_hash = format!("{:x}", file_hasher.finalize());
109                    file_hashes.insert(path, file_hash);
110                }
111            }
112        }
113
114        // If no input patterns specified, use a default hash based on command only
115        // This allows caching for targets without explicit inputs
116        for (path, hash) in &file_hashes {
117            hasher.update(path.to_string_lossy().as_bytes());
118            hasher.update(b":");
119            hasher.update(hash.as_bytes());
120            hasher.update(b"\0");
121        }
122
123        // Hash dependency cache keys (sorted for determinism)
124        let mut sorted_deps: Vec<&String> = dependency_hashes.iter().collect();
125        sorted_deps.sort();
126        for dep_hash in sorted_deps {
127            hasher.update(dep_hash.as_bytes());
128            hasher.update(b"\0");
129        }
130
131        Ok(format!("{:x}", hasher.finalize()))
132    }
133
134    /// Check if a task has a valid cache entry.
135    ///
136    /// Returns `Some(entry)` if the cache is valid, `None` otherwise.
137    pub fn check(&mut self, task_id: &TaskId, current_hash: &str) -> Option<CacheEntry> {
138        let path = self.entry_path(task_id);
139
140        if !path.exists() {
141            self.stats.misses += 1;
142            return None;
143        }
144
145        match fs::read_to_string(&path) {
146            Ok(content) => match serde_json::from_str::<CacheEntry>(&content) {
147                Ok(entry) if entry.input_hash == current_hash && entry.success => {
148                    self.stats.hits += 1;
149                    Some(entry)
150                }
151                _ => {
152                    self.stats.misses += 1;
153                    None
154                }
155            },
156            Err(_) => {
157                self.stats.misses += 1;
158                None
159            }
160        }
161    }
162
163    /// Write a cache entry for a task.
164    pub fn write(
165        &self,
166        task_id: &TaskId,
167        input_hash: String,
168        success: bool,
169        command: String,
170    ) -> Result<(), CacheError> {
171        // Ensure cache directory exists
172        fs::create_dir_all(&self.cache_dir).map_err(|e| CacheError::CreateDir {
173            path: self.cache_dir.clone(),
174            source: e,
175        })?;
176
177        let entry = CacheEntry {
178            input_hash,
179            timestamp: Utc::now(),
180            success,
181            command,
182        };
183
184        let path = self.entry_path(task_id);
185        let content =
186            serde_json::to_string_pretty(&entry).map_err(|e| CacheError::SerializeEntry {
187                task: task_id.to_string(),
188                source: e,
189            })?;
190
191        fs::write(&path, content).map_err(|e| CacheError::WriteFile { path, source: e })?;
192
193        Ok(())
194    }
195
196    /// Get cache statistics.
197    pub fn stats(&self) -> Result<CacheStats, CacheError> {
198        let mut stats = self.stats.clone();
199
200        if self.cache_dir.exists() {
201            for entry in fs::read_dir(&self.cache_dir).map_err(|e| CacheError::ReadDir {
202                path: self.cache_dir.clone(),
203                source: e,
204            })? {
205                let entry = entry.map_err(|e| CacheError::ReadDir {
206                    path: self.cache_dir.clone(),
207                    source: e,
208                })?;
209
210                let path = entry.path();
211                if path.extension().is_some_and(|ext| ext == "json") {
212                    stats.entry_count += 1;
213                    if let Ok(metadata) = fs::metadata(&path) {
214                        stats.total_size += metadata.len();
215                    }
216                }
217            }
218        }
219
220        Ok(stats)
221    }
222
223    /// Clean the cache directory.
224    pub fn clean(&self) -> Result<usize, CacheError> {
225        if !self.cache_dir.exists() {
226            return Ok(0);
227        }
228
229        let mut removed = 0;
230
231        for entry in fs::read_dir(&self.cache_dir).map_err(|e| CacheError::ReadDir {
232            path: self.cache_dir.clone(),
233            source: e,
234        })? {
235            let entry = entry.map_err(|e| CacheError::ReadDir {
236                path: self.cache_dir.clone(),
237                source: e,
238            })?;
239
240            let path = entry.path();
241            if path.extension().is_some_and(|ext| ext == "json") {
242                fs::remove_file(&path).map_err(|e| CacheError::RemoveFile {
243                    path: path.clone(),
244                    source: e,
245                })?;
246                removed += 1;
247            }
248        }
249
250        // Try to remove the cache directory if empty
251        // Ignore errors since .guild might have other contents
252        let _ = fs::remove_dir(&self.cache_dir);
253        let _ = fs::remove_dir(self.cache_dir.parent().unwrap_or(&self.cache_dir));
254
255        Ok(removed)
256    }
257
258    /// Get the cache directory path.
259    pub fn cache_dir(&self) -> &Path {
260        &self.cache_dir
261    }
262}
263
264#[cfg(test)]
265mod tests {
266    use super::*;
267    use crate::config::{ProjectName, TargetName};
268    use tempfile::TempDir;
269
270    fn task_id(project: &str, target: &str) -> TaskId {
271        TaskId::new(
272            project.parse::<ProjectName>().unwrap(),
273            target.parse::<TargetName>().unwrap(),
274        )
275    }
276
277    #[test]
278    fn test_compute_input_hash_deterministic() {
279        let temp = TempDir::new().unwrap();
280        let cache = Cache::new(temp.path());
281
282        // Create a test file
283        let src_dir = temp.path().join("src");
284        fs::create_dir_all(&src_dir).unwrap();
285        fs::write(src_dir.join("main.rs"), "fn main() {}").unwrap();
286
287        let hash1 = cache
288            .compute_input_hash(
289                "cargo build",
290                temp.path(),
291                &["src/**/*.rs".to_string()],
292                &[],
293            )
294            .unwrap();
295
296        let hash2 = cache
297            .compute_input_hash(
298                "cargo build",
299                temp.path(),
300                &["src/**/*.rs".to_string()],
301                &[],
302            )
303            .unwrap();
304
305        assert_eq!(hash1, hash2);
306    }
307
308    #[test]
309    fn test_compute_input_hash_changes_with_file_content() {
310        let temp = TempDir::new().unwrap();
311        let cache = Cache::new(temp.path());
312
313        let src_dir = temp.path().join("src");
314        fs::create_dir_all(&src_dir).unwrap();
315        fs::write(src_dir.join("main.rs"), "fn main() {}").unwrap();
316
317        let hash1 = cache
318            .compute_input_hash(
319                "cargo build",
320                temp.path(),
321                &["src/**/*.rs".to_string()],
322                &[],
323            )
324            .unwrap();
325
326        // Modify the file
327        fs::write(src_dir.join("main.rs"), "fn main() { println!(\"hi\"); }").unwrap();
328
329        let hash2 = cache
330            .compute_input_hash(
331                "cargo build",
332                temp.path(),
333                &["src/**/*.rs".to_string()],
334                &[],
335            )
336            .unwrap();
337
338        assert_ne!(hash1, hash2);
339    }
340
341    #[test]
342    fn test_compute_input_hash_changes_with_command() {
343        let temp = TempDir::new().unwrap();
344        let cache = Cache::new(temp.path());
345
346        let hash1 = cache
347            .compute_input_hash("cargo build", temp.path(), &[], &[])
348            .unwrap();
349
350        let hash2 = cache
351            .compute_input_hash("cargo build --release", temp.path(), &[], &[])
352            .unwrap();
353
354        assert_ne!(hash1, hash2);
355    }
356
357    #[test]
358    fn test_cache_write_and_check() {
359        let temp = TempDir::new().unwrap();
360        let mut cache = Cache::new(temp.path());
361
362        let task = task_id("my-app", "build");
363        let hash = "abc123".to_string();
364
365        // Initially no cache entry
366        assert!(cache.check(&task, &hash).is_none());
367
368        // Write cache entry
369        cache
370            .write(&task, hash.clone(), true, "cargo build".to_string())
371            .unwrap();
372
373        // Now cache hit
374        let entry = cache.check(&task, &hash).unwrap();
375        assert!(entry.success);
376        assert_eq!(entry.input_hash, hash);
377    }
378
379    #[test]
380    fn test_cache_miss_on_different_hash() {
381        let temp = TempDir::new().unwrap();
382        let mut cache = Cache::new(temp.path());
383
384        let task = task_id("my-app", "build");
385
386        cache
387            .write(&task, "hash1".to_string(), true, "cargo build".to_string())
388            .unwrap();
389
390        // Check with different hash
391        assert!(cache.check(&task, "hash2").is_none());
392    }
393
394    #[test]
395    fn test_cache_miss_on_failed_entry() {
396        let temp = TempDir::new().unwrap();
397        let mut cache = Cache::new(temp.path());
398
399        let task = task_id("my-app", "build");
400        let hash = "abc123".to_string();
401
402        // Write failed entry
403        cache
404            .write(&task, hash.clone(), false, "cargo build".to_string())
405            .unwrap();
406
407        // Failed entries don't count as cache hits
408        assert!(cache.check(&task, &hash).is_none());
409    }
410
411    #[test]
412    fn test_cache_clean() {
413        let temp = TempDir::new().unwrap();
414        let cache = Cache::new(temp.path());
415
416        let task1 = task_id("app", "build");
417        let task2 = task_id("lib", "build");
418
419        cache
420            .write(&task1, "hash1".to_string(), true, "cmd1".to_string())
421            .unwrap();
422        cache
423            .write(&task2, "hash2".to_string(), true, "cmd2".to_string())
424            .unwrap();
425
426        let stats = cache.stats().unwrap();
427        assert_eq!(stats.entry_count, 2);
428
429        let removed = cache.clean().unwrap();
430        assert_eq!(removed, 2);
431
432        let stats = cache.stats().unwrap();
433        assert_eq!(stats.entry_count, 0);
434    }
435
436    #[test]
437    fn test_cache_stats() {
438        let temp = TempDir::new().unwrap();
439        let mut cache = Cache::new(temp.path());
440
441        let task = task_id("my-app", "build");
442        let hash = "abc123".to_string();
443
444        // Miss
445        cache.check(&task, &hash);
446
447        cache
448            .write(&task, hash.clone(), true, "cargo build".to_string())
449            .unwrap();
450
451        // Hit
452        cache.check(&task, &hash);
453
454        let stats = cache.stats().unwrap();
455        assert_eq!(stats.entry_count, 1);
456        assert_eq!(stats.hits, 1);
457        assert_eq!(stats.misses, 1);
458        assert!(stats.total_size > 0);
459    }
460
461    #[test]
462    fn test_dependency_hashes_affect_input_hash() {
463        let temp = TempDir::new().unwrap();
464        let cache = Cache::new(temp.path());
465
466        let hash1 = cache
467            .compute_input_hash("cargo build", temp.path(), &[], &["dep_hash_1".to_string()])
468            .unwrap();
469
470        let hash2 = cache
471            .compute_input_hash("cargo build", temp.path(), &[], &["dep_hash_2".to_string()])
472            .unwrap();
473
474        assert_ne!(hash1, hash2);
475    }
476}