cuenv_ci/
gc.rs

1//! Garbage Collection
2//!
3//! LRU-based cleanup for local cache and optionally Nix store closures.
4
5use std::fs::{self, Metadata};
6use std::path::{Path, PathBuf};
7use std::time::{Duration, SystemTime};
8use thiserror::Error;
9
10/// Default max cache size (10 GB)
11pub const DEFAULT_MAX_SIZE_BYTES: u64 = 10 * 1024 * 1024 * 1024;
12
13/// Default max age for cache entries (30 days)
14pub const DEFAULT_MAX_AGE_DAYS: u32 = 30;
15
16/// Errors for garbage collection
17#[derive(Debug, Error)]
18pub enum GCError {
19    /// IO error
20    #[error("IO error: {0}")]
21    Io(#[from] std::io::Error),
22
23    /// Cache directory not found
24    #[error("Cache directory not found: {0}")]
25    CacheDirNotFound(PathBuf),
26
27    /// Nix garbage collection failed
28    #[error("Nix garbage collection failed: {0}")]
29    NixGCFailed(String),
30}
31
32/// Statistics from garbage collection run
33#[derive(Debug, Clone, Default)]
34pub struct GCStats {
35    /// Number of entries scanned
36    pub entries_scanned: usize,
37    /// Number of entries removed
38    pub entries_removed: usize,
39    /// Bytes freed
40    pub bytes_freed: u64,
41    /// Current cache size after GC
42    pub current_size: u64,
43    /// Duration of GC run
44    pub duration_ms: u64,
45}
46
47/// Cache entry with metadata for LRU sorting
48#[derive(Debug)]
49struct CacheEntry {
50    path: PathBuf,
51    size: u64,
52    last_accessed: SystemTime,
53}
54
55/// Garbage collector configuration
56#[derive(Debug, Clone)]
57pub struct GCConfig {
58    /// Cache directory to clean
59    pub cache_dir: PathBuf,
60    /// Maximum total cache size in bytes
61    pub max_size_bytes: u64,
62    /// Maximum age for cache entries in days
63    pub max_age_days: u32,
64    /// Whether to run Nix garbage collection
65    pub run_nix_gc: bool,
66    /// Dry run (don't actually delete)
67    pub dry_run: bool,
68}
69
70impl Default for GCConfig {
71    fn default() -> Self {
72        Self {
73            cache_dir: PathBuf::from(".cuenv/cache"),
74            max_size_bytes: DEFAULT_MAX_SIZE_BYTES,
75            max_age_days: DEFAULT_MAX_AGE_DAYS,
76            run_nix_gc: false,
77            dry_run: false,
78        }
79    }
80}
81
82/// Garbage collector for CI cache
83pub struct GarbageCollector {
84    config: GCConfig,
85}
86
87impl GarbageCollector {
88    /// Create a new garbage collector with default config
89    #[must_use]
90    pub fn new() -> Self {
91        Self {
92            config: GCConfig::default(),
93        }
94    }
95
96    /// Create with custom configuration
97    #[must_use]
98    pub fn with_config(config: GCConfig) -> Self {
99        Self { config }
100    }
101
102    /// Set the cache directory
103    #[must_use]
104    pub fn cache_dir(mut self, dir: impl Into<PathBuf>) -> Self {
105        self.config.cache_dir = dir.into();
106        self
107    }
108
109    /// Set max cache size
110    #[must_use]
111    pub fn max_size(mut self, bytes: u64) -> Self {
112        self.config.max_size_bytes = bytes;
113        self
114    }
115
116    /// Set max age in days
117    #[must_use]
118    pub fn max_age_days(mut self, days: u32) -> Self {
119        self.config.max_age_days = days;
120        self
121    }
122
123    /// Enable Nix garbage collection
124    #[must_use]
125    pub fn with_nix_gc(mut self) -> Self {
126        self.config.run_nix_gc = true;
127        self
128    }
129
130    /// Enable dry run mode
131    #[must_use]
132    pub fn dry_run(mut self) -> Self {
133        self.config.dry_run = true;
134        self
135    }
136
137    /// Run garbage collection
138    ///
139    /// # Errors
140    ///
141    /// Returns `GCError` if garbage collection fails.
142    pub fn run(&self) -> Result<GCStats, GCError> {
143        let start = std::time::Instant::now();
144        let mut stats = GCStats::default();
145
146        if !self.config.cache_dir.exists() {
147            tracing::debug!(
148                dir = %self.config.cache_dir.display(),
149                "Cache directory does not exist, nothing to clean"
150            );
151            return Ok(stats);
152        }
153
154        // Collect all cache entries
155        let mut entries = Self::scan_cache(&self.config.cache_dir)?;
156        stats.entries_scanned = entries.len();
157
158        // Calculate current size
159        let total_size: u64 = entries.iter().map(|e| e.size).sum();
160        tracing::info!(
161            entries = entries.len(),
162            size_mb = total_size / (1024 * 1024),
163            "Scanned cache"
164        );
165
166        // Sort by last accessed (oldest first)
167        entries.sort_by(|a, b| a.last_accessed.cmp(&b.last_accessed));
168
169        let now = SystemTime::now();
170        let max_age = Duration::from_secs(u64::from(self.config.max_age_days) * 24 * 60 * 60);
171        let mut current_size = total_size;
172
173        // Remove entries that are too old or exceed size limit
174        for entry in entries {
175            let age = now
176                .duration_since(entry.last_accessed)
177                .unwrap_or(Duration::ZERO);
178
179            let should_remove = age > max_age || current_size > self.config.max_size_bytes;
180
181            if should_remove {
182                if self.config.dry_run {
183                    tracing::info!(
184                        path = %entry.path.display(),
185                        size = entry.size,
186                        age_days = age.as_secs() / (24 * 60 * 60),
187                        "[dry-run] Would remove"
188                    );
189                } else {
190                    match Self::remove_entry(&entry.path) {
191                        Ok(()) => {
192                            tracing::debug!(
193                                path = %entry.path.display(),
194                                size = entry.size,
195                                "Removed cache entry"
196                            );
197                            stats.entries_removed += 1;
198                            stats.bytes_freed += entry.size;
199                            current_size = current_size.saturating_sub(entry.size);
200                        }
201                        Err(e) => {
202                            tracing::warn!(
203                                path = %entry.path.display(),
204                                error = %e,
205                                "Failed to remove cache entry"
206                            );
207                        }
208                    }
209                }
210            }
211
212            // Stop if we're under the size limit and past max age check
213            if current_size <= self.config.max_size_bytes && age <= max_age {
214                break;
215            }
216        }
217
218        stats.current_size = current_size;
219
220        // Run Nix GC if configured
221        if self.config.run_nix_gc
222            && !self.config.dry_run
223            && let Err(e) = Self::run_nix_gc()
224        {
225            tracing::warn!(error = %e, "Nix garbage collection failed");
226        }
227
228        stats.duration_ms = u64::try_from(start.elapsed().as_millis()).unwrap_or(u64::MAX);
229
230        tracing::info!(
231            removed = stats.entries_removed,
232            freed_mb = stats.bytes_freed / (1024 * 1024),
233            current_mb = stats.current_size / (1024 * 1024),
234            duration_ms = stats.duration_ms,
235            "Garbage collection complete"
236        );
237
238        Ok(stats)
239    }
240
241    fn scan_cache(dir: &Path) -> Result<Vec<CacheEntry>, GCError> {
242        let mut entries = Vec::new();
243        Self::scan_dir_recursive(dir, &mut entries)?;
244        Ok(entries)
245    }
246
247    fn scan_dir_recursive(dir: &Path, entries: &mut Vec<CacheEntry>) -> Result<(), GCError> {
248        for entry in fs::read_dir(dir)? {
249            let entry = entry?;
250            let path = entry.path();
251            let metadata = entry.metadata()?;
252
253            if metadata.is_dir() {
254                Self::scan_dir_recursive(&path, entries)?;
255            } else if metadata.is_file()
256                && let Some(cache_entry) = Self::create_entry(&path, &metadata)
257            {
258                entries.push(cache_entry);
259            }
260        }
261        Ok(())
262    }
263
264    fn create_entry(path: &Path, metadata: &Metadata) -> Option<CacheEntry> {
265        let size = metadata.len();
266        let last_accessed = metadata.accessed().or_else(|_| metadata.modified()).ok()?;
267
268        Some(CacheEntry {
269            path: path.to_path_buf(),
270            size,
271            last_accessed,
272        })
273    }
274
275    fn remove_entry(path: &Path) -> Result<(), GCError> {
276        if path.is_dir() {
277            fs::remove_dir_all(path)?;
278        } else {
279            fs::remove_file(path)?;
280        }
281        Ok(())
282    }
283
284    fn run_nix_gc() -> Result<(), GCError> {
285        tracing::info!("Running Nix garbage collection...");
286
287        let output = std::process::Command::new("nix-collect-garbage")
288            .arg("-d") // Delete old generations
289            .output()
290            .map_err(|e| GCError::NixGCFailed(e.to_string()))?;
291
292        if !output.status.success() {
293            let stderr = String::from_utf8_lossy(&output.stderr);
294            return Err(GCError::NixGCFailed(stderr.to_string()));
295        }
296
297        let stdout = String::from_utf8_lossy(&output.stdout);
298        tracing::info!(output = %stdout, "Nix garbage collection complete");
299
300        Ok(())
301    }
302}
303
304impl Default for GarbageCollector {
305    fn default() -> Self {
306        Self::new()
307    }
308}
309
310/// Convenience function to run GC with default settings
311///
312/// # Errors
313///
314/// Returns `GCError` if garbage collection fails.
315pub fn run_gc(cache_dir: &Path) -> Result<GCStats, GCError> {
316    GarbageCollector::new().cache_dir(cache_dir).run()
317}
318
319/// Run GC in dry-run mode to see what would be deleted
320///
321/// # Errors
322///
323/// Returns `GCError` if garbage collection preview fails.
324pub fn preview_gc(cache_dir: &Path) -> Result<GCStats, GCError> {
325    GarbageCollector::new().cache_dir(cache_dir).dry_run().run()
326}
327
328#[cfg(test)]
329mod tests {
330    use super::*;
331    use std::fs::File;
332    use std::io::Write;
333    use tempfile::TempDir;
334
335    fn create_test_file(dir: &Path, name: &str, size: usize) -> PathBuf {
336        let path = dir.join(name);
337        let mut file = File::create(&path).unwrap();
338        file.write_all(&vec![0u8; size]).unwrap();
339        path
340    }
341
342    #[test]
343    fn test_empty_cache() {
344        let tmp = TempDir::new().unwrap();
345        let gc = GarbageCollector::new().cache_dir(tmp.path());
346        let stats = gc.run().unwrap();
347        assert_eq!(stats.entries_scanned, 0);
348        assert_eq!(stats.entries_removed, 0);
349    }
350
351    #[test]
352    fn test_cache_under_limit() {
353        let tmp = TempDir::new().unwrap();
354        create_test_file(tmp.path(), "file1.cache", 1000);
355        create_test_file(tmp.path(), "file2.cache", 2000);
356
357        let gc = GarbageCollector::new()
358            .cache_dir(tmp.path())
359            .max_size(1024 * 1024); // 1MB limit
360
361        let stats = gc.run().unwrap();
362        assert_eq!(stats.entries_scanned, 2);
363        assert_eq!(stats.entries_removed, 0); // Nothing removed, under limit
364    }
365
366    #[test]
367    fn test_cache_over_limit() {
368        let tmp = TempDir::new().unwrap();
369        create_test_file(tmp.path(), "file1.cache", 500);
370        create_test_file(tmp.path(), "file2.cache", 500);
371        create_test_file(tmp.path(), "file3.cache", 500);
372
373        let gc = GarbageCollector::new().cache_dir(tmp.path()).max_size(1000); // Limit to 1000 bytes
374
375        let stats = gc.run().unwrap();
376        assert!(stats.entries_removed > 0);
377        assert!(stats.current_size <= 1000);
378    }
379
380    #[test]
381    fn test_dry_run() {
382        let tmp = TempDir::new().unwrap();
383        let file = create_test_file(tmp.path(), "file1.cache", 500);
384
385        let gc = GarbageCollector::new()
386            .cache_dir(tmp.path())
387            .max_size(100) // Force removal
388            .dry_run();
389
390        let stats = gc.run().unwrap();
391        // File should still exist in dry run mode
392        assert!(file.exists());
393        assert_eq!(stats.entries_removed, 0); // Dry run doesn't count as removed
394    }
395
396    #[test]
397    fn test_nested_directories() {
398        let tmp = TempDir::new().unwrap();
399        let subdir = tmp.path().join("subdir");
400        fs::create_dir(&subdir).unwrap();
401
402        create_test_file(tmp.path(), "root.cache", 100);
403        create_test_file(&subdir, "nested.cache", 100);
404
405        let gc = GarbageCollector::new().cache_dir(tmp.path());
406        let stats = gc.run().unwrap();
407
408        assert_eq!(stats.entries_scanned, 2);
409    }
410
411    #[test]
412    fn test_nonexistent_cache_dir() {
413        let gc = GarbageCollector::new().cache_dir("/nonexistent/path");
414        let stats = gc.run().unwrap();
415        assert_eq!(stats.entries_scanned, 0);
416    }
417
418    #[test]
419    fn test_gc_stats_defaults() {
420        let stats = GCStats::default();
421        assert_eq!(stats.entries_scanned, 0);
422        assert_eq!(stats.entries_removed, 0);
423        assert_eq!(stats.bytes_freed, 0);
424    }
425}