Skip to main content

sqry_core/cache/
prune.rs

1// ! Cache pruning engine for managing cache lifecycle.
2//!
3//! This module provides the core logic for cache pruning operations,
4//! allowing users to reclaim disk space by removing old or excessive entries.
5//!
6//! # Features
7//!
8//! - **Time-based retention**: Remove entries older than N days
9//! - **Size-based retention**: Cap cache to maximum size (oldest-first eviction)
10//! - **Dry-run mode**: Preview deletions without modifying cache
11//! - **Detailed reporting**: Summary of removed/remaining entries and bytes
12//!
13//! # Usage
14//!
15//! ```rust,ignore
16//! use sqry_core::cache::{CacheManager, PruneOptions};
17//! use std::time::Duration;
18//!
19//! let cache = CacheManager::default();
20//! let options = PruneOptions::new()
21//!     .with_max_age(Duration::from_secs(7 * 24 * 3600)) // 7 days
22//!     .with_dry_run(true);
23//!
24//! let report = cache.prune(&options)?;
25//! println!("Would remove {} entries ({} bytes)",
26//!          report.entries_removed, report.bytes_removed);
27//! ```
28
29use anyhow::Result;
30use serde::Serialize;
31use std::fs;
32use std::path::{Path, PathBuf};
33use std::time::{Duration, SystemTime};
34use walkdir::WalkDir;
35
36/// Options for cache pruning operations.
37#[derive(Debug, Clone)]
38pub struct PruneOptions {
39    /// Maximum age for cache entries (older entries will be removed)
40    pub max_age: Option<Duration>,
41    /// Maximum total cache size in bytes (oldest entries removed first)
42    pub max_size: Option<u64>,
43    /// Preview mode - don't actually delete files
44    pub dry_run: bool,
45    /// Output format for results
46    pub output_mode: PruneOutputMode,
47    /// Target cache directory (defaults to user cache dir)
48    pub target_dir: Option<PathBuf>,
49}
50
51impl PruneOptions {
52    /// Create new prune options with defaults
53    #[must_use]
54    pub fn new() -> Self {
55        Self {
56            max_age: None,
57            max_size: None,
58            dry_run: false,
59            output_mode: PruneOutputMode::Human,
60            target_dir: None,
61        }
62    }
63
64    /// Set maximum age for cache entries
65    #[must_use]
66    pub fn with_max_age(mut self, age: Duration) -> Self {
67        self.max_age = Some(age);
68        self
69    }
70
71    /// Set maximum cache size in bytes
72    #[must_use]
73    pub fn with_max_size(mut self, size: u64) -> Self {
74        self.max_size = Some(size);
75        self
76    }
77
78    /// Enable dry-run mode (no actual deletions)
79    #[must_use]
80    pub fn with_dry_run(mut self, enabled: bool) -> Self {
81        self.dry_run = enabled;
82        self
83    }
84
85    /// Set output mode
86    #[must_use]
87    pub fn with_output_mode(mut self, mode: PruneOutputMode) -> Self {
88        self.output_mode = mode;
89        self
90    }
91
92    /// Set target directory
93    #[must_use]
94    pub fn with_target_dir(mut self, dir: PathBuf) -> Self {
95        self.target_dir = Some(dir);
96        self
97    }
98
99    /// Validate options
100    ///
101    /// # Errors
102    ///
103    /// Returns [`anyhow::Error`] when no retention policy is specified.
104    pub fn validate(&self) -> Result<()> {
105        if self.max_age.is_none() && self.max_size.is_none() {
106            anyhow::bail!("At least one retention policy must be specified (--days or --size)");
107        }
108        Ok(())
109    }
110}
111
112impl Default for PruneOptions {
113    fn default() -> Self {
114        Self::new()
115    }
116}
117
118/// Output format for prune results.
119#[derive(Debug, Clone, Copy, PartialEq, Eq)]
120pub enum PruneOutputMode {
121    /// Human-readable text output
122    Human,
123    /// Machine-readable JSON output
124    Json,
125}
126
127/// Report summarizing a prune operation.
128#[derive(Debug, Clone, Serialize)]
129pub struct PruneReport {
130    /// Total number of entries examined
131    pub entries_considered: usize,
132    /// Number of entries removed (or would be removed in dry-run)
133    pub entries_removed: usize,
134    /// Bytes reclaimed (or would be reclaimed in dry-run)
135    pub bytes_removed: u64,
136    /// Number of entries remaining after prune
137    pub remaining_entries: usize,
138    /// Bytes remaining after prune
139    pub remaining_bytes: u64,
140    /// Individual operations performed
141    #[serde(skip_serializing_if = "Vec::is_empty")]
142    pub operations: Vec<PruneOperation>,
143}
144
145impl PruneReport {
146    /// Create a new empty report
147    #[must_use]
148    pub fn new() -> Self {
149        Self {
150            entries_considered: 0,
151            entries_removed: 0,
152            bytes_removed: 0,
153            remaining_entries: 0,
154            remaining_bytes: 0,
155            operations: Vec::new(),
156        }
157    }
158}
159
160impl Default for PruneReport {
161    fn default() -> Self {
162        Self::new()
163    }
164}
165
166/// Details of a single prune operation.
167#[derive(Debug, Clone, Serialize)]
168pub struct PruneOperation {
169    /// Path to the cache entry
170    pub path: PathBuf,
171    /// Size of the entry in bytes
172    pub size_bytes: u64,
173    /// Last modified time
174    pub last_modified: SystemTime,
175    /// Reason for pruning this entry
176    pub reason: PruneReason,
177    /// Whether this was a dry-run operation
178    pub dry_run: bool,
179}
180
181/// Reason why an entry was pruned.
182#[derive(Debug, Clone, Serialize)]
183pub enum PruneReason {
184    /// Entry older than max age threshold
185    OlderThan(#[serde(with = "duration_serde")] Duration),
186    /// Entry removed to enforce size cap
187    ExceedsSizeCap {
188        /// Maximum cache size in bytes
189        cap: u64,
190    },
191}
192
193/// Serde module for Duration serialization
194mod duration_serde {
195    use serde::{Serialize, Serializer};
196    use std::time::Duration;
197
198    pub fn serialize<S>(duration: &Duration, serializer: S) -> Result<S::Ok, S::Error>
199    where
200        S: Serializer,
201    {
202        duration.as_secs().serialize(serializer)
203    }
204}
205
206/// Internal representation of a cache entry for pruning.
207#[derive(Debug, Clone)]
208struct CacheEntry {
209    /// Path to the .bin file
210    data_path: PathBuf,
211    /// Path to the .bin.lock file (if it exists)
212    lock_path: Option<PathBuf>,
213    /// Total size in bytes (data + lock)
214    size_bytes: u64,
215    /// Last modified timestamp
216    last_modified: SystemTime,
217}
218
219impl CacheEntry {
220    /// Total size including lock file
221    fn total_size(&self) -> u64 {
222        self.size_bytes
223    }
224}
225
226/// Engine for performing cache prune operations.
227pub struct PruneEngine {
228    options: PruneOptions,
229}
230
231impl PruneEngine {
232    /// Create a new prune engine with the given options
233    ///
234    /// # Errors
235    ///
236    /// Returns [`anyhow::Error`] when option validation fails.
237    pub fn new(options: PruneOptions) -> Result<Self> {
238        options.validate()?;
239        Ok(Self { options })
240    }
241
242    /// Execute the prune operation and return a report
243    ///
244    /// # Errors
245    ///
246    /// Returns [`anyhow::Error`] when determining a prune reason fails.
247    pub fn execute(&self, cache_dir: &Path) -> Result<PruneReport> {
248        if !cache_dir.exists() {
249            return Ok(PruneReport::new());
250        }
251
252        // Collect all cache entries
253        let entries = Self::collect_entries(cache_dir);
254
255        if entries.is_empty() {
256            return Ok(PruneReport::new());
257        }
258
259        let to_remove = self.select_entries_to_remove(&entries);
260        let mut report = Self::initialize_report(&entries, &to_remove);
261
262        self.perform_pruning(&entries, &to_remove, &mut report)?;
263
264        Ok(report)
265    }
266
267    fn initialize_report(
268        entries: &[CacheEntry],
269        to_remove: &std::collections::HashSet<PathBuf>,
270    ) -> PruneReport {
271        let mut report = PruneReport::new();
272        report.entries_considered = entries.len();
273
274        // Calculate statistics
275        for entry in entries {
276            if to_remove.contains(&entry.data_path) {
277                report.entries_removed += 1;
278                report.bytes_removed += entry.total_size();
279            } else {
280                report.remaining_entries += 1;
281                report.remaining_bytes += entry.total_size();
282            }
283        }
284        report
285    }
286
287    fn perform_pruning(
288        &self,
289        entries: &[CacheEntry],
290        to_remove: &std::collections::HashSet<PathBuf>,
291        report: &mut PruneReport,
292    ) -> Result<()> {
293        // Perform deletions (unless dry-run)
294        for entry in entries {
295            if to_remove.contains(&entry.data_path) {
296                let reason = self.determine_reason(entry)?;
297
298                if !self.options.dry_run {
299                    Self::delete_entry(entry);
300                }
301
302                report.operations.push(PruneOperation {
303                    path: entry.data_path.clone(),
304                    size_bytes: entry.total_size(),
305                    last_modified: entry.last_modified,
306                    reason,
307                    dry_run: self.options.dry_run,
308                });
309            }
310        }
311        Ok(())
312    }
313
314    /// Collect all cache entries from the cache directory
315    fn collect_entries(cache_dir: &Path) -> Vec<CacheEntry> {
316        let mut entries = Vec::new();
317
318        for entry in WalkDir::new(cache_dir)
319            .follow_links(false)
320            .into_iter()
321            .filter_map(std::result::Result::ok)
322        {
323            if let Some(cache_entry) = Self::process_dir_entry(entry.path()) {
324                entries.push(cache_entry);
325            }
326        }
327
328        entries
329    }
330
331    fn process_dir_entry(path: &Path) -> Option<CacheEntry> {
332        // Only process .bin files (not .bin.lock)
333        if !path.is_file() || path.extension().is_none_or(|ext| ext != "bin") {
334            return None;
335        }
336
337        // Skip if this is a .bin.lock file
338        if path
339            .file_name()
340            .and_then(|n| n.to_str())
341            .is_some_and(|n| n.ends_with(".bin.lock"))
342        {
343            return None;
344        }
345
346        // Get metadata
347        let metadata = match fs::metadata(path) {
348            Ok(m) => m,
349            Err(e) => {
350                log::warn!("Failed to read metadata for {}: {e}", path.display());
351                return None;
352            }
353        };
354
355        // Note: We use .ok() here instead of context() because we are in an Option context
356        let last_modified = metadata.modified().ok()?;
357
358        // Check for corresponding lock file
359        let mut lock_path_buf = path.to_path_buf();
360        lock_path_buf.set_extension("bin.lock");
361        let lock_path = if lock_path_buf.exists() {
362            Some(lock_path_buf)
363        } else {
364            None
365        };
366
367        // Calculate total size (data + lock)
368        let lock_size = if let Some(ref lp) = lock_path {
369            fs::metadata(lp).map(|m| m.len()).unwrap_or(0)
370        } else {
371            0
372        };
373
374        Some(CacheEntry {
375            data_path: path.to_path_buf(),
376            lock_path,
377            size_bytes: metadata.len() + lock_size,
378            last_modified,
379        })
380    }
381
382    /// Select which entries should be removed based on policies
383    fn select_entries_to_remove(
384        &self,
385        entries: &[CacheEntry],
386    ) -> std::collections::HashSet<PathBuf> {
387        let mut to_remove = std::collections::HashSet::new();
388        let now = SystemTime::now();
389
390        self.apply_age_policy(entries, now, &mut to_remove);
391        self.apply_size_policy(entries, &mut to_remove);
392
393        to_remove
394    }
395
396    fn apply_age_policy(
397        &self,
398        entries: &[CacheEntry],
399        now: SystemTime,
400        to_remove: &mut std::collections::HashSet<PathBuf>,
401    ) {
402        if let Some(max_age) = self.options.max_age {
403            let cutoff = now - max_age;
404            for entry in entries {
405                if entry.last_modified < cutoff {
406                    to_remove.insert(entry.data_path.clone());
407                }
408            }
409        }
410    }
411
412    fn apply_size_policy(
413        &self,
414        entries: &[CacheEntry],
415        to_remove: &mut std::collections::HashSet<PathBuf>,
416    ) {
417        let Some(max_size) = self.options.max_size else {
418            return;
419        };
420
421        let mut remaining = collect_remaining_entries(entries, to_remove);
422        remaining.sort_by_key(|e| e.last_modified);
423
424        let current_size = remaining_total_size(&remaining);
425        if current_size > max_size {
426            mark_entries_for_size_limit(&remaining, max_size, current_size, to_remove);
427        }
428    }
429
430    /// Determine the reason for removing an entry
431    fn determine_reason(&self, entry: &CacheEntry) -> Result<PruneReason> {
432        let now = SystemTime::now();
433
434        // Check age policy first
435        if let Some(max_age) = self.options.max_age {
436            let cutoff = now - max_age;
437            if entry.last_modified < cutoff {
438                return Ok(PruneReason::OlderThan(max_age));
439            }
440        }
441
442        // Otherwise it must be size policy
443        if let Some(max_size) = self.options.max_size {
444            return Ok(PruneReason::ExceedsSizeCap { cap: max_size });
445        }
446
447        // Should not reach here due to validation
448        anyhow::bail!("No valid prune reason found for entry");
449    }
450
451    /// Delete a cache entry and its associated lock file
452    fn delete_entry(entry: &CacheEntry) {
453        // Delete data file
454        if let Err(e) = fs::remove_file(&entry.data_path) {
455            log::warn!("Failed to delete {}: {}", entry.data_path.display(), e);
456        }
457
458        // Delete lock file if it exists
459        if let Some(ref lock_path) = entry.lock_path
460            && let Err(e) = fs::remove_file(lock_path)
461        {
462            log::warn!("Failed to delete lock file {}: {e}", lock_path.display());
463        }
464    }
465}
466
467fn collect_remaining_entries<'a>(
468    entries: &'a [CacheEntry],
469    to_remove: &std::collections::HashSet<PathBuf>,
470) -> Vec<&'a CacheEntry> {
471    entries
472        .iter()
473        .filter(|entry| !to_remove.contains(&entry.data_path))
474        .collect()
475}
476
477fn remaining_total_size(remaining: &[&CacheEntry]) -> u64 {
478    remaining.iter().map(|entry| entry.total_size()).sum()
479}
480
481fn mark_entries_for_size_limit(
482    remaining: &[&CacheEntry],
483    max_size: u64,
484    current_size: u64,
485    to_remove: &mut std::collections::HashSet<PathBuf>,
486) {
487    let mut cumulative_size = current_size;
488    for entry in remaining {
489        if cumulative_size <= max_size {
490            break;
491        }
492        to_remove.insert(entry.data_path.clone());
493        cumulative_size -= entry.total_size();
494    }
495}
496
497#[cfg(test)]
498mod tests {
499    use super::*;
500    use std::fs;
501    use tempfile::TempDir;
502
503    fn create_test_entry(dir: &Path, name: &str, size: u64, age_days: u64) -> PathBuf {
504        let path = dir.join(name);
505        // Test file sizes may exceed usize::MAX on 32-bit systems; clamp to max
506        let size_usize = size.try_into().unwrap_or(usize::MAX);
507        let content = vec![0u8; size_usize];
508        fs::write(&path, content).unwrap();
509
510        // Set modification time
511        let mtime = SystemTime::now() - Duration::from_secs(age_days * 24 * 3600);
512        filetime::set_file_mtime(&path, filetime::FileTime::from_system_time(mtime)).unwrap();
513
514        path
515    }
516
517    #[test]
518    fn test_prune_options_validation() {
519        let opts = PruneOptions::new();
520        assert!(
521            opts.validate().is_err(),
522            "Should require at least one policy"
523        );
524
525        let opts_age = PruneOptions::new().with_max_age(Duration::from_secs(86400));
526        assert!(opts_age.validate().is_ok());
527
528        let opts_size = PruneOptions::new().with_max_size(1024 * 1024);
529        assert!(opts_size.validate().is_ok());
530    }
531
532    #[test]
533    fn test_age_policy_filters_correctly() {
534        let tmp_cache_dir = TempDir::new().unwrap();
535
536        // Create entries of different ages
537        create_test_entry(tmp_cache_dir.path(), "old.bin", 100, 10); // 10 days old
538        create_test_entry(tmp_cache_dir.path(), "recent.bin", 100, 2); // 2 days old
539
540        let opts = PruneOptions::new()
541            .with_max_age(Duration::from_secs(7 * 24 * 3600)) // 7 days
542            .with_dry_run(true);
543
544        let engine = PruneEngine::new(opts).unwrap();
545        let report = engine.execute(tmp_cache_dir.path()).unwrap();
546
547        assert_eq!(report.entries_considered, 2);
548        assert_eq!(report.entries_removed, 1);
549        assert_eq!(report.remaining_entries, 1);
550    }
551
552    #[test]
553    fn test_dry_run_no_deletions() {
554        let tmp_cache_dir = TempDir::new().unwrap();
555        create_test_entry(tmp_cache_dir.path(), "old.bin", 100, 10);
556
557        let opts = PruneOptions::new()
558            .with_max_age(Duration::from_secs(7 * 24 * 3600))
559            .with_dry_run(true);
560
561        let engine = PruneEngine::new(opts).unwrap();
562        let _report = engine.execute(tmp_cache_dir.path()).unwrap();
563
564        // File should still exist
565        assert!(tmp_cache_dir.path().join("old.bin").exists());
566    }
567
568    #[test]
569    fn test_size_policy_culls_oldest_first() {
570        let tmp_cache_dir = TempDir::new().unwrap();
571
572        // Create 3 entries: oldest, middle, newest
573        create_test_entry(tmp_cache_dir.path(), "oldest.bin", 100, 10);
574        create_test_entry(tmp_cache_dir.path(), "middle.bin", 100, 5);
575        create_test_entry(tmp_cache_dir.path(), "newest.bin", 100, 1);
576
577        // Set size cap to allow only 2 entries
578        let opts = PruneOptions::new().with_max_size(200).with_dry_run(true);
579
580        let engine = PruneEngine::new(opts).unwrap();
581        let report = engine.execute(tmp_cache_dir.path()).unwrap();
582
583        assert_eq!(report.entries_considered, 3);
584        assert_eq!(report.entries_removed, 1);
585        assert_eq!(report.remaining_entries, 2);
586        assert!(report.remaining_bytes <= 200);
587    }
588}