Skip to main content

cuenv_ci/
gc.rs

1//! Garbage Collection
2//!
3//! LRU-based cleanup for local cache and optionally Nix store closures.
4
5// GC involves complex file system traversal with LRU and size calculations
6#![allow(clippy::cognitive_complexity, clippy::too_many_lines)]
7
8use cuenv_core::DryRun;
9use std::fs::{self, Metadata};
10use std::path::{Path, PathBuf};
11use std::time::{Duration, SystemTime};
12use thiserror::Error;
13
14/// Default max cache size (10 GB)
15pub const DEFAULT_MAX_SIZE_BYTES: u64 = 10 * 1024 * 1024 * 1024;
16
17/// Default max age for cache entries (30 days)
18pub const DEFAULT_MAX_AGE_DAYS: u32 = 30;
19
20/// Errors for garbage collection
21#[derive(Debug, Error)]
22pub enum GCError {
23    /// IO error
24    #[error("IO error: {0}")]
25    Io(#[from] std::io::Error),
26
27    /// Cache directory not found
28    #[error("Cache directory not found: {0}")]
29    CacheDirNotFound(PathBuf),
30
31    /// Nix garbage collection failed
32    #[error("Nix garbage collection failed: {0}")]
33    NixGCFailed(String),
34}
35
36/// Statistics from garbage collection run
37#[derive(Debug, Clone, Default)]
38pub struct GCStats {
39    /// Number of entries scanned
40    pub entries_scanned: usize,
41    /// Number of entries removed
42    pub entries_removed: usize,
43    /// Bytes freed
44    pub bytes_freed: u64,
45    /// Current cache size after GC
46    pub current_size: u64,
47    /// Duration of GC run
48    pub duration_ms: u64,
49}
50
51/// Cache entry with metadata for LRU sorting
52#[derive(Debug)]
53struct CacheEntry {
54    path: PathBuf,
55    size: u64,
56    last_accessed: SystemTime,
57}
58
59/// Garbage collector configuration
60#[derive(Debug, Clone)]
61pub struct GCConfig {
62    /// Cache directory to clean
63    pub cache_dir: PathBuf,
64    /// Maximum total cache size in bytes
65    pub max_size_bytes: u64,
66    /// Maximum age for cache entries in days
67    pub max_age_days: u32,
68    /// Whether to run Nix garbage collection
69    pub run_nix_gc: bool,
70    /// Dry run (don't actually delete)
71    pub dry_run: DryRun,
72}
73
74impl Default for GCConfig {
75    fn default() -> Self {
76        Self {
77            cache_dir: PathBuf::from(".cuenv/cache"),
78            max_size_bytes: DEFAULT_MAX_SIZE_BYTES,
79            max_age_days: DEFAULT_MAX_AGE_DAYS,
80            run_nix_gc: false,
81            dry_run: DryRun::No,
82        }
83    }
84}
85
86/// Garbage collector for CI cache
87pub struct GarbageCollector {
88    config: GCConfig,
89}
90
91impl GarbageCollector {
92    /// Create a new garbage collector with default config
93    #[must_use]
94    pub fn new() -> Self {
95        Self {
96            config: GCConfig::default(),
97        }
98    }
99
100    /// Create with custom configuration
101    #[must_use]
102    pub const fn with_config(config: GCConfig) -> Self {
103        Self { config }
104    }
105
106    /// Set the cache directory
107    #[must_use]
108    pub fn cache_dir(mut self, dir: impl Into<PathBuf>) -> Self {
109        self.config.cache_dir = dir.into();
110        self
111    }
112
113    /// Set max cache size
114    #[must_use]
115    pub const fn max_size(mut self, bytes: u64) -> Self {
116        self.config.max_size_bytes = bytes;
117        self
118    }
119
120    /// Set max age in days
121    #[must_use]
122    pub const fn max_age_days(mut self, days: u32) -> Self {
123        self.config.max_age_days = days;
124        self
125    }
126
127    /// Enable Nix garbage collection
128    #[must_use]
129    pub const fn with_nix_gc(mut self) -> Self {
130        self.config.run_nix_gc = true;
131        self
132    }
133
134    /// Enable dry run mode
135    #[must_use]
136    pub const fn dry_run(mut self) -> Self {
137        self.config.dry_run = DryRun::Yes;
138        self
139    }
140
141    /// Run garbage collection
142    ///
143    /// # Errors
144    ///
145    /// Returns `GCError` if garbage collection fails.
146    pub fn run(&self) -> Result<GCStats, GCError> {
147        let start = std::time::Instant::now();
148        let mut stats = GCStats::default();
149
150        if !self.config.cache_dir.exists() {
151            tracing::debug!(
152                dir = %self.config.cache_dir.display(),
153                "Cache directory does not exist, nothing to clean"
154            );
155            return Ok(stats);
156        }
157
158        // Collect all cache entries
159        let mut entries = Self::scan_cache(&self.config.cache_dir)?;
160        stats.entries_scanned = entries.len();
161
162        // Calculate current size
163        let total_size: u64 = entries.iter().map(|e| e.size).sum();
164        tracing::info!(
165            entries = entries.len(),
166            size_mb = total_size / (1024 * 1024),
167            "Scanned cache"
168        );
169
170        // Sort by last accessed (oldest first)
171        entries.sort_by(|a, b| a.last_accessed.cmp(&b.last_accessed));
172
173        let now = SystemTime::now();
174        let max_age = Duration::from_secs(u64::from(self.config.max_age_days) * 24 * 60 * 60);
175        let mut current_size = total_size;
176
177        // Remove entries that are too old or exceed size limit
178        for entry in entries {
179            let age = now
180                .duration_since(entry.last_accessed)
181                .unwrap_or(Duration::ZERO);
182
183            let should_remove = age > max_age || current_size > self.config.max_size_bytes;
184
185            if should_remove {
186                if self.config.dry_run.is_dry_run() {
187                    tracing::info!(
188                        path = %entry.path.display(),
189                        size = entry.size,
190                        age_days = age.as_secs() / (24 * 60 * 60),
191                        "[dry-run] Would remove"
192                    );
193                } else {
194                    match Self::remove_entry(&entry.path) {
195                        Ok(()) => {
196                            tracing::debug!(
197                                path = %entry.path.display(),
198                                size = entry.size,
199                                "Removed cache entry"
200                            );
201                            stats.entries_removed += 1;
202                            stats.bytes_freed += entry.size;
203                            current_size = current_size.saturating_sub(entry.size);
204                        }
205                        Err(e) => {
206                            tracing::warn!(
207                                path = %entry.path.display(),
208                                error = %e,
209                                "Failed to remove cache entry"
210                            );
211                        }
212                    }
213                }
214            }
215
216            // Stop if we're under the size limit and past max age check
217            if current_size <= self.config.max_size_bytes && age <= max_age {
218                break;
219            }
220        }
221
222        stats.current_size = current_size;
223
224        // Run Nix GC if configured
225        if self.config.run_nix_gc
226            && !self.config.dry_run.is_dry_run()
227            && let Err(e) = Self::run_nix_gc()
228        {
229            tracing::warn!(error = %e, "Nix garbage collection failed");
230        }
231
232        stats.duration_ms = u64::try_from(start.elapsed().as_millis()).unwrap_or(u64::MAX);
233
234        tracing::info!(
235            removed = stats.entries_removed,
236            freed_mb = stats.bytes_freed / (1024 * 1024),
237            current_mb = stats.current_size / (1024 * 1024),
238            duration_ms = stats.duration_ms,
239            "Garbage collection complete"
240        );
241
242        Ok(stats)
243    }
244
245    /// Scan the cache directory and collect all cache entries with their metadata.
246    fn scan_cache(dir: &Path) -> Result<Vec<CacheEntry>, GCError> {
247        let mut entries = Vec::new();
248        Self::scan_dir_recursive(dir, &mut entries)?;
249        Ok(entries)
250    }
251
252    /// Recursively traverse a directory tree, collecting file entries.
253    fn scan_dir_recursive(dir: &Path, entries: &mut Vec<CacheEntry>) -> Result<(), GCError> {
254        for entry in fs::read_dir(dir)? {
255            let entry = entry?;
256            let path = entry.path();
257            let metadata = entry.metadata()?;
258
259            if metadata.is_dir() {
260                Self::scan_dir_recursive(&path, entries)?;
261            } else if metadata.is_file()
262                && let Some(cache_entry) = Self::create_entry(&path, &metadata)
263            {
264                entries.push(cache_entry);
265            }
266        }
267        Ok(())
268    }
269
270    /// Create a cache entry from file path and metadata.
271    ///
272    /// Returns `None` if the access/modification time cannot be determined.
273    fn create_entry(path: &Path, metadata: &Metadata) -> Option<CacheEntry> {
274        let size = metadata.len();
275        let last_accessed = metadata.accessed().or_else(|_| metadata.modified()).ok()?;
276
277        Some(CacheEntry {
278            path: path.to_path_buf(),
279            size,
280            last_accessed,
281        })
282    }
283
284    /// Remove a cache entry (file or directory).
285    fn remove_entry(path: &Path) -> Result<(), GCError> {
286        if path.is_dir() {
287            fs::remove_dir_all(path)?;
288        } else {
289            fs::remove_file(path)?;
290        }
291        Ok(())
292    }
293
294    fn run_nix_gc() -> Result<(), GCError> {
295        tracing::info!("Running Nix garbage collection...");
296
297        let output = std::process::Command::new("nix-collect-garbage")
298            .arg("-d") // Delete old generations
299            .output()
300            .map_err(|e| GCError::NixGCFailed(e.to_string()))?;
301
302        if !output.status.success() {
303            let stderr = String::from_utf8_lossy(&output.stderr);
304            return Err(GCError::NixGCFailed(stderr.to_string()));
305        }
306
307        let stdout = String::from_utf8_lossy(&output.stdout);
308        tracing::info!(output = %stdout, "Nix garbage collection complete");
309
310        Ok(())
311    }
312}
313
314impl Default for GarbageCollector {
315    fn default() -> Self {
316        Self::new()
317    }
318}
319
320/// Convenience function to run GC with default settings
321///
322/// # Errors
323///
324/// Returns `GCError` if garbage collection fails.
325pub fn run_gc(cache_dir: &Path) -> Result<GCStats, GCError> {
326    GarbageCollector::new().cache_dir(cache_dir).run()
327}
328
329/// Run GC in dry-run mode to see what would be deleted
330///
331/// # Errors
332///
333/// Returns `GCError` if garbage collection preview fails.
334pub fn preview_gc(cache_dir: &Path) -> Result<GCStats, GCError> {
335    GarbageCollector::new().cache_dir(cache_dir).dry_run().run()
336}
337
338#[cfg(test)]
339mod tests {
340    use super::*;
341    use std::fs::File;
342    use std::io::Write;
343    use tempfile::TempDir;
344
345    fn create_test_file(dir: &Path, name: &str, size: usize) -> PathBuf {
346        let path = dir.join(name);
347        let mut file = File::create(&path).unwrap();
348        file.write_all(&vec![0u8; size]).unwrap();
349        path
350    }
351
352    #[test]
353    fn test_empty_cache() {
354        let tmp = TempDir::new().unwrap();
355        let gc = GarbageCollector::new().cache_dir(tmp.path());
356        let stats = gc.run().unwrap();
357        assert_eq!(stats.entries_scanned, 0);
358        assert_eq!(stats.entries_removed, 0);
359    }
360
361    #[test]
362    fn test_cache_under_limit() {
363        let tmp = TempDir::new().unwrap();
364        create_test_file(tmp.path(), "file1.cache", 1000);
365        create_test_file(tmp.path(), "file2.cache", 2000);
366
367        let gc = GarbageCollector::new()
368            .cache_dir(tmp.path())
369            .max_size(1024 * 1024); // 1MB limit
370
371        let stats = gc.run().unwrap();
372        assert_eq!(stats.entries_scanned, 2);
373        assert_eq!(stats.entries_removed, 0); // Nothing removed, under limit
374    }
375
376    #[test]
377    fn test_cache_over_limit() {
378        let tmp = TempDir::new().unwrap();
379        create_test_file(tmp.path(), "file1.cache", 500);
380        create_test_file(tmp.path(), "file2.cache", 500);
381        create_test_file(tmp.path(), "file3.cache", 500);
382
383        let gc = GarbageCollector::new().cache_dir(tmp.path()).max_size(1000); // Limit to 1000 bytes
384
385        let stats = gc.run().unwrap();
386        assert!(stats.entries_removed > 0);
387        assert!(stats.current_size <= 1000);
388    }
389
390    #[test]
391    fn test_dry_run() {
392        let tmp = TempDir::new().unwrap();
393        let file = create_test_file(tmp.path(), "file1.cache", 500);
394
395        let gc = GarbageCollector::new()
396            .cache_dir(tmp.path())
397            .max_size(100) // Force removal
398            .dry_run();
399
400        let stats = gc.run().unwrap();
401        // File should still exist in dry run mode
402        assert!(file.exists());
403        assert_eq!(stats.entries_removed, 0); // Dry run doesn't count as removed
404    }
405
406    #[test]
407    fn test_nested_directories() {
408        let tmp = TempDir::new().unwrap();
409        let subdir = tmp.path().join("subdir");
410        fs::create_dir(&subdir).unwrap();
411
412        create_test_file(tmp.path(), "root.cache", 100);
413        create_test_file(&subdir, "nested.cache", 100);
414
415        let gc = GarbageCollector::new().cache_dir(tmp.path());
416        let stats = gc.run().unwrap();
417
418        assert_eq!(stats.entries_scanned, 2);
419    }
420
421    #[test]
422    fn test_nonexistent_cache_dir() {
423        let gc = GarbageCollector::new().cache_dir("/nonexistent/path");
424        let stats = gc.run().unwrap();
425        assert_eq!(stats.entries_scanned, 0);
426    }
427
428    #[test]
429    fn test_gc_stats_defaults() {
430        let stats = GCStats::default();
431        assert_eq!(stats.entries_scanned, 0);
432        assert_eq!(stats.entries_removed, 0);
433        assert_eq!(stats.bytes_freed, 0);
434    }
435
436    #[test]
437    fn test_gc_stats_clone() {
438        let stats = GCStats {
439            entries_scanned: 10,
440            entries_removed: 5,
441            bytes_freed: 1024,
442            current_size: 2048,
443            duration_ms: 100,
444        };
445        let cloned = stats.clone();
446        assert_eq!(cloned.entries_scanned, 10);
447        assert_eq!(cloned.entries_removed, 5);
448        assert_eq!(cloned.bytes_freed, 1024);
449    }
450
451    #[test]
452    fn test_gc_stats_debug() {
453        let stats = GCStats::default();
454        let debug_str = format!("{:?}", stats);
455        assert!(debug_str.contains("GCStats"));
456    }
457
458    #[test]
459    fn test_gc_config_default() {
460        let config = GCConfig::default();
461        assert_eq!(config.max_size_bytes, DEFAULT_MAX_SIZE_BYTES);
462        assert_eq!(config.max_age_days, DEFAULT_MAX_AGE_DAYS);
463        assert!(!config.run_nix_gc);
464        assert!(!config.dry_run.is_dry_run());
465    }
466
467    #[test]
468    fn test_gc_config_clone() {
469        let config = GCConfig {
470            cache_dir: PathBuf::from("/test"),
471            max_size_bytes: 1000,
472            max_age_days: 7,
473            run_nix_gc: true,
474            dry_run: DryRun::Yes,
475        };
476        let cloned = config.clone();
477        assert_eq!(cloned.cache_dir, PathBuf::from("/test"));
478        assert_eq!(cloned.max_size_bytes, 1000);
479    }
480
481    #[test]
482    fn test_gc_config_debug() {
483        let config = GCConfig::default();
484        let debug_str = format!("{:?}", config);
485        assert!(debug_str.contains("GCConfig"));
486    }
487
488    #[test]
489    fn test_garbage_collector_new() {
490        let gc = GarbageCollector::new();
491        // Just verify it can be created
492        assert!(gc.config.cache_dir.to_string_lossy().contains("cache"));
493    }
494
495    #[test]
496    fn test_garbage_collector_default() {
497        let gc = GarbageCollector::default();
498        assert!(gc.config.cache_dir.to_string_lossy().contains("cache"));
499    }
500
501    #[test]
502    fn test_garbage_collector_with_config() {
503        let config = GCConfig {
504            cache_dir: PathBuf::from("/custom/path"),
505            max_size_bytes: 5000,
506            max_age_days: 14,
507            run_nix_gc: false,
508            dry_run: DryRun::No,
509        };
510        let gc = GarbageCollector::with_config(config);
511        assert_eq!(gc.config.cache_dir, PathBuf::from("/custom/path"));
512        assert_eq!(gc.config.max_size_bytes, 5000);
513    }
514
515    #[test]
516    fn test_garbage_collector_builder_cache_dir() {
517        let gc = GarbageCollector::new().cache_dir("/test/cache");
518        assert_eq!(gc.config.cache_dir, PathBuf::from("/test/cache"));
519    }
520
521    #[test]
522    fn test_garbage_collector_builder_max_size() {
523        let gc = GarbageCollector::new().max_size(12345);
524        assert_eq!(gc.config.max_size_bytes, 12345);
525    }
526
527    #[test]
528    fn test_garbage_collector_builder_max_age_days() {
529        let gc = GarbageCollector::new().max_age_days(60);
530        assert_eq!(gc.config.max_age_days, 60);
531    }
532
533    #[test]
534    fn test_garbage_collector_builder_with_nix_gc() {
535        let gc = GarbageCollector::new().with_nix_gc();
536        assert!(gc.config.run_nix_gc);
537    }
538
539    #[test]
540    fn test_garbage_collector_builder_dry_run() {
541        let gc = GarbageCollector::new().dry_run();
542        assert!(gc.config.dry_run.is_dry_run());
543    }
544
545    #[test]
546    fn test_garbage_collector_builder_chained() {
547        let gc = GarbageCollector::new()
548            .cache_dir("/test")
549            .max_size(1000)
550            .max_age_days(7)
551            .with_nix_gc()
552            .dry_run();
553
554        assert_eq!(gc.config.cache_dir, PathBuf::from("/test"));
555        assert_eq!(gc.config.max_size_bytes, 1000);
556        assert_eq!(gc.config.max_age_days, 7);
557        assert!(gc.config.run_nix_gc);
558        assert!(gc.config.dry_run.is_dry_run());
559    }
560
561    #[test]
562    fn test_run_gc_convenience() {
563        let tmp = TempDir::new().unwrap();
564        let stats = run_gc(tmp.path()).unwrap();
565        assert_eq!(stats.entries_scanned, 0);
566    }
567
568    #[test]
569    fn test_preview_gc_convenience() {
570        let tmp = TempDir::new().unwrap();
571        create_test_file(tmp.path(), "file.cache", 100);
572
573        let stats = preview_gc(tmp.path()).unwrap();
574        assert_eq!(stats.entries_scanned, 1);
575        // File should still exist after preview
576        assert!(tmp.path().join("file.cache").exists());
577    }
578
579    #[test]
580    fn test_gc_error_io() {
581        let err = GCError::Io(std::io::Error::from(std::io::ErrorKind::NotFound));
582        let display = format!("{}", err);
583        assert!(display.contains("IO error"));
584    }
585
586    #[test]
587    fn test_gc_error_cache_dir_not_found() {
588        let err = GCError::CacheDirNotFound(PathBuf::from("/test"));
589        let display = format!("{}", err);
590        assert!(display.contains("Cache directory not found"));
591    }
592
593    #[test]
594    fn test_gc_error_nix_gc_failed() {
595        let err = GCError::NixGCFailed("command failed".to_string());
596        let display = format!("{}", err);
597        assert!(display.contains("Nix garbage collection failed"));
598    }
599
600    #[test]
601    fn test_gc_error_debug() {
602        let err = GCError::NixGCFailed("test".to_string());
603        let debug_str = format!("{:?}", err);
604        assert!(debug_str.contains("NixGCFailed"));
605    }
606
607    #[test]
608    #[allow(clippy::assertions_on_constants)]
609    fn test_constants() {
610        // Verify the default constants are reasonable
611        assert!(DEFAULT_MAX_SIZE_BYTES > 1024 * 1024); // > 1MB
612        assert!(DEFAULT_MAX_AGE_DAYS > 0);
613        assert!(DEFAULT_MAX_AGE_DAYS <= 365);
614    }
615
616    #[test]
617    fn test_deeply_nested_directories() {
618        let tmp = TempDir::new().unwrap();
619        let level1 = tmp.path().join("level1");
620        let level2 = level1.join("level2");
621        let level3 = level2.join("level3");
622        fs::create_dir_all(&level3).unwrap();
623
624        create_test_file(&level3, "deep.cache", 100);
625
626        let gc = GarbageCollector::new().cache_dir(tmp.path());
627        let stats = gc.run().unwrap();
628
629        assert_eq!(stats.entries_scanned, 1);
630    }
631
632    #[test]
633    fn test_gc_with_mixed_content() {
634        let tmp = TempDir::new().unwrap();
635
636        // Create files of various sizes
637        create_test_file(tmp.path(), "small.cache", 10);
638        create_test_file(tmp.path(), "medium.cache", 1000);
639        create_test_file(tmp.path(), "large.cache", 10000);
640
641        let gc = GarbageCollector::new().cache_dir(tmp.path()).max_size(5000);
642
643        let stats = gc.run().unwrap();
644        assert_eq!(stats.entries_scanned, 3);
645        // At least one file should be removed to get under limit
646        assert!(stats.entries_removed >= 1);
647    }
648}