Skip to main content

devboy_assets/
rotation.rs

1//! LRU rotation / eviction policy for the asset cache.
2//!
3//! The rotator is stateless: every call evaluates the index and deletes
4//! whichever assets need to go based on the configured
5//! [`EvictionPolicy`] and size / age limits.
6//!
7//! Eviction priority for LRU:
8//!
9//! 1. Assets older than `max_file_age` (by `last_accessed`) are removed
10//!    unconditionally. This implements the TTL behaviour described in the
11//!    ADR.
12//! 2. If the remaining cache is still over `max_cache_size`, assets are
13//!    removed in LRU order (oldest `last_accessed` first) until the budget
14//!    is met. Within the same timestamp bucket, larger files go first so
15//!    one multi-GB video is preferred over many small logs.
16
17use std::time::Duration;
18
19use crate::cache::CacheManager;
20use crate::config::{EvictionPolicy, ResolvedAssetConfig};
21use crate::error::Result;
22use crate::index::{AssetIndex, CachedAsset, now_ms};
23
24/// Stats produced by a rotation pass — exposed so callers can log what
25/// happened and tests can assert on side-effects.
26#[derive(Debug, Clone, Default, PartialEq, Eq)]
27pub struct RotationStats {
28    /// Number of assets removed by age / TTL.
29    pub aged_out: usize,
30    /// Number of assets removed by cache-size enforcement (LRU or FIFO,
31    /// depending on the configured [`EvictionPolicy`]).
32    pub size_evicted: usize,
33    pub bytes_freed: u64,
34}
35
36impl RotationStats {
37    /// Total number of assets removed.
38    pub fn removed(&self) -> usize {
39        self.aged_out + self.size_evicted
40    }
41}
42
43/// Applies an [`EvictionPolicy`] to an [`AssetIndex`] + cache root.
44#[derive(Debug, Clone)]
45pub struct Rotator {
46    max_cache_size: u64,
47    max_file_age: Duration,
48    policy: EvictionPolicy,
49}
50
51impl Rotator {
52    /// Build a rotator from a resolved config.
53    pub fn new(config: &ResolvedAssetConfig) -> Self {
54        Self {
55            max_cache_size: config.max_cache_size,
56            max_file_age: config.max_file_age,
57            policy: config.eviction_policy,
58        }
59    }
60
61    /// Run a rotation pass in-place. Mutates the index and deletes files
62    /// from disk via the supplied [`CacheManager`].
63    ///
64    /// For [`EvictionPolicy::None`] this is a no-op.
65    pub fn rotate(&self, index: &mut AssetIndex, cache: &CacheManager) -> Result<RotationStats> {
66        let mut stats = RotationStats::default();
67        if matches!(self.policy, EvictionPolicy::None) {
68            return Ok(stats);
69        }
70
71        // Phase 1 — age / TTL.
72        //
73        // `Duration::as_millis()` returns `u128`, so very large values would
74        // silently truncate on the `as u64` cast. Clamp to `u64::MAX`
75        // explicitly — in practice that means "effectively infinite" and
76        // the saturating subtraction below yields a `cutoff_ms` of 0,
77        // disabling the TTL check.
78        let max_age_ms: u64 = self.max_file_age.as_millis().min(u128::from(u64::MAX)) as u64;
79        let cutoff_ms = now_ms().saturating_sub(max_age_ms);
80        let expired: Vec<String> = index
81            .assets
82            .iter()
83            .filter(|(_, a)| a.last_accessed_ms < cutoff_ms)
84            .map(|(id, _)| id.clone())
85            .collect();
86        for id in expired {
87            if let Some(asset) = index.remove(&id) {
88                remove_cached_file(cache, &asset, &mut stats.bytes_freed)?;
89                stats.aged_out += 1;
90            }
91        }
92
93        // Phase 2 — size budget. A budget of 0 is treated as "unlimited",
94        // matching the semantics enforced in `AssetManager::store`.
95        //
96        // We track a running `current_size` instead of recomputing
97        // `index.total_size()` on every iteration: the naive version is
98        // O(n²) because each call re-sums every remaining asset, and a
99        // large cache can pay that cost repeatedly during a single
100        // eviction pass. Subtracting removed sizes keeps the loop
101        // O(n log n) dominated by the sort.
102        let mut current_size = index.total_size();
103        if self.max_cache_size > 0 && current_size > self.max_cache_size {
104            let mut entries: Vec<CachedAsset> = index.assets.values().cloned().collect();
105            self.sort_for_eviction(&mut entries);
106
107            for asset in entries {
108                if current_size <= self.max_cache_size {
109                    break;
110                }
111                if let Some(removed) = index.remove(&asset.id) {
112                    current_size = current_size.saturating_sub(removed.size);
113                    remove_cached_file(cache, &removed, &mut stats.bytes_freed)?;
114                    stats.size_evicted += 1;
115                }
116            }
117        }
118
119        Ok(stats)
120    }
121
122    /// Sort entries so that the first element is the best eviction
123    /// candidate. A deterministic final tie-breaker on `asset.id` ensures
124    /// the order is fully reproducible even when timestamps and sizes are
125    /// equal (which can happen with rapid consecutive stores or in tests).
126    /// Without this, `HashMap::values()` iteration order would leak into
127    /// eviction decisions, making `store()` unreliable under tight budgets.
128    fn sort_for_eviction(&self, entries: &mut [CachedAsset]) {
129        match self.policy {
130            EvictionPolicy::Lru => {
131                entries.sort_by(|a, b| {
132                    a.last_accessed_ms
133                        .cmp(&b.last_accessed_ms)
134                        .then_with(|| b.size.cmp(&a.size))
135                        .then_with(|| a.id.cmp(&b.id))
136                });
137            }
138            EvictionPolicy::Fifo => {
139                entries.sort_by(|a, b| {
140                    a.downloaded_at_ms
141                        .cmp(&b.downloaded_at_ms)
142                        .then_with(|| a.id.cmp(&b.id))
143                });
144            }
145            EvictionPolicy::None => {}
146        }
147    }
148
149    /// Check whether the index is already within the configured budget.
150    ///
151    /// This is an index-only check: it compares the total tracked size
152    /// against the configured limit without touching the filesystem. A
153    /// budget of 0 is treated as "unlimited".
154    pub fn within_budget(&self, index: &AssetIndex) -> bool {
155        self.max_cache_size == 0 || index.total_size() <= self.max_cache_size
156    }
157}
158
159/// Resolve and delete a cached file belonging to an index entry, bumping
160/// `bytes_freed` **only** when a file that actually existed was removed
161/// from disk. Stale entries whose file has already been removed externally,
162/// or whose stored path resolves outside the cache root, are dropped from
163/// the index but their size is not counted as "bytes freed".
164///
165/// The resolution goes through [`crate::cache::resolve_under_root`], which
166/// validates that the target stays inside the cache directory — a safety
167/// guard against tampered `index.json` entries with absolute or traversing
168/// paths.
169fn remove_cached_file(
170    cache: &CacheManager,
171    asset: &CachedAsset,
172    bytes_freed: &mut u64,
173) -> Result<()> {
174    let Some(abs) = crate::cache::resolve_under_root(cache.root(), &asset.local_path) else {
175        tracing::warn!(
176            asset_id = asset.id.as_str(),
177            path = ?asset.local_path,
178            "skipping eviction — index entry points outside the cache root",
179        );
180        return Ok(());
181    };
182
183    // Only count bytes we actually freed. `CacheManager::delete` is
184    // idempotent and treats `NotFound` as success, so we need to check
185    // upfront whether the file is really there.
186    let existed = cache.exists(&abs);
187    cache.delete(&abs)?;
188    if existed {
189        *bytes_freed += asset.size;
190    }
191    Ok(())
192}
193
194#[cfg(test)]
195mod tests {
196    use super::*;
197    use crate::cache::CacheManager;
198    use crate::config::ResolvedAssetConfig;
199    use crate::index::{CachedAsset, NewCachedAsset};
200    use devboy_core::asset::AssetContext;
201    use std::path::PathBuf;
202    use tempfile::tempdir;
203
204    fn cfg(cache_dir: PathBuf, max_size: u64, max_age: Duration) -> ResolvedAssetConfig {
205        ResolvedAssetConfig {
206            cache_dir,
207            max_cache_size: max_size,
208            max_file_age: max_age,
209            eviction_policy: EvictionPolicy::Lru,
210        }
211    }
212
213    fn store_asset(cache: &CacheManager, id: &str, size: u64) -> CachedAsset {
214        let ctx = AssetContext::Issue {
215            key: "DEV-1".into(),
216        };
217        let data = vec![0u8; size as usize];
218        let stored = cache
219            .store(&ctx, id, &format!("{id}.bin"), &data)
220            .expect("store");
221        let rel = stored
222            .path
223            .strip_prefix(cache.root())
224            .unwrap()
225            .to_path_buf();
226        CachedAsset::new(NewCachedAsset {
227            id: id.into(),
228            filename: format!("{id}.bin"),
229            mime_type: None,
230            size: stored.size,
231            local_path: rel,
232            context: ctx,
233            checksum_sha256: stored.checksum_sha256,
234            remote_url: None,
235        })
236    }
237
238    #[test]
239    fn policy_none_is_noop() {
240        let tmp = tempdir().unwrap();
241        let cache = CacheManager::new(tmp.path().to_path_buf()).unwrap();
242        let mut index = AssetIndex::empty();
243
244        let asset = store_asset(&cache, "a1", 10);
245        index.upsert(asset);
246
247        let mut resolved = cfg(tmp.path().to_path_buf(), 0, Duration::from_secs(1));
248        resolved.eviction_policy = EvictionPolicy::None;
249        let rotator = Rotator::new(&resolved);
250
251        let stats = rotator.rotate(&mut index, &cache).unwrap();
252        assert_eq!(stats.removed(), 0);
253        assert_eq!(index.len(), 1);
254    }
255
256    #[test]
257    fn age_based_eviction() {
258        let tmp = tempdir().unwrap();
259        let cache = CacheManager::new(tmp.path().to_path_buf()).unwrap();
260        let mut index = AssetIndex::empty();
261
262        // old asset — last accessed 10 minutes ago
263        let mut old = store_asset(&cache, "old", 5);
264        old.last_accessed_ms = now_ms() - 600_000;
265        index.upsert(old);
266
267        // fresh asset — just accessed
268        index.upsert(store_asset(&cache, "fresh", 5));
269
270        let resolved = cfg(
271            tmp.path().to_path_buf(),
272            1024 * 1024,
273            Duration::from_secs(60),
274        );
275        let rotator = Rotator::new(&resolved);
276
277        let stats = rotator.rotate(&mut index, &cache).unwrap();
278        assert_eq!(stats.aged_out, 1);
279        assert_eq!(stats.size_evicted, 0);
280        assert!(index.get("old").is_none());
281        assert!(index.get("fresh").is_some());
282    }
283
284    #[test]
285    fn size_based_lru_eviction() {
286        let tmp = tempdir().unwrap();
287        let cache = CacheManager::new(tmp.path().to_path_buf()).unwrap();
288        let mut index = AssetIndex::empty();
289
290        let mut a = store_asset(&cache, "a", 100);
291        a.last_accessed_ms = 1_000;
292        index.upsert(a);
293
294        let mut b = store_asset(&cache, "b", 100);
295        b.last_accessed_ms = 2_000;
296        index.upsert(b);
297
298        let mut c = store_asset(&cache, "c", 100);
299        c.last_accessed_ms = 3_000;
300        index.upsert(c);
301
302        // Budget = 150 bytes. We expect `a` and `b` to be evicted (oldest),
303        // leaving `c` (100 bytes) which fits.
304        let resolved = cfg(
305            tmp.path().to_path_buf(),
306            150,
307            Duration::from_secs(100 * 365 * 86_400),
308        );
309        let rotator = Rotator::new(&resolved);
310
311        let stats = rotator.rotate(&mut index, &cache).unwrap();
312        assert_eq!(stats.aged_out, 0);
313        assert_eq!(stats.size_evicted, 2);
314        assert!(index.get("a").is_none());
315        assert!(index.get("b").is_none());
316        assert!(index.get("c").is_some());
317        assert!(index.total_size() <= 150);
318    }
319
320    #[test]
321    fn fifo_orders_by_download_time() {
322        let tmp = tempdir().unwrap();
323        let cache = CacheManager::new(tmp.path().to_path_buf()).unwrap();
324        let mut index = AssetIndex::empty();
325
326        let mut a = store_asset(&cache, "a", 100);
327        a.downloaded_at_ms = 1_000;
328        a.last_accessed_ms = 9_000; // FIFO should ignore this
329        index.upsert(a);
330
331        let mut b = store_asset(&cache, "b", 100);
332        b.downloaded_at_ms = 2_000;
333        b.last_accessed_ms = 1_000;
334        index.upsert(b);
335
336        let mut resolved = cfg(
337            tmp.path().to_path_buf(),
338            150,
339            Duration::from_secs(100 * 365 * 86_400),
340        );
341        resolved.eviction_policy = EvictionPolicy::Fifo;
342        let rotator = Rotator::new(&resolved);
343
344        rotator.rotate(&mut index, &cache).unwrap();
345        // FIFO keeps `b` (newer download), evicts `a` (older download).
346        assert!(index.get("a").is_none());
347        assert!(index.get("b").is_some());
348    }
349
350    #[test]
351    fn within_budget_fast_path() {
352        let tmp = tempdir().unwrap();
353        let mut index = AssetIndex::empty();
354        let resolved = cfg(
355            tmp.path().to_path_buf(),
356            1_000,
357            Duration::from_secs(100 * 365 * 86_400),
358        );
359        let rotator = Rotator::new(&resolved);
360        assert!(rotator.within_budget(&index));
361
362        index.upsert(CachedAsset::new(NewCachedAsset {
363            id: "a".into(),
364            filename: "a.bin".into(),
365            mime_type: None,
366            size: 500,
367            local_path: PathBuf::from("issues/x/a.bin"),
368            context: AssetContext::Issue { key: "x".into() },
369            checksum_sha256: "deadbeef".into(),
370            remote_url: None,
371        }));
372        assert!(rotator.within_budget(&index));
373    }
374
375    #[test]
376    fn age_ties_prefer_larger_files() {
377        let tmp = tempdir().unwrap();
378        let cache = CacheManager::new(tmp.path().to_path_buf()).unwrap();
379        let mut index = AssetIndex::empty();
380
381        let mut small = store_asset(&cache, "small", 10);
382        small.last_accessed_ms = 1_000;
383        index.upsert(small);
384
385        let mut big = store_asset(&cache, "big", 1_000);
386        big.last_accessed_ms = 1_000;
387        index.upsert(big);
388
389        // Budget tiny so at least one has to go; ties → larger first.
390        let resolved = cfg(
391            tmp.path().to_path_buf(),
392            100,
393            Duration::from_secs(100 * 365 * 86_400),
394        );
395        let rotator = Rotator::new(&resolved);
396        rotator.rotate(&mut index, &cache).unwrap();
397        // The big one should be evicted first.
398        assert!(index.get("big").is_none());
399        assert!(index.get("small").is_some());
400    }
401
402    #[test]
403    fn bytes_freed_only_counts_existing_files() {
404        let tmp = tempdir().unwrap();
405        let cache = CacheManager::new(tmp.path().to_path_buf()).unwrap();
406        let mut index = AssetIndex::empty();
407
408        // Real, on-disk asset — eviction should count its bytes.
409        let mut real = store_asset(&cache, "real", 100);
410        real.last_accessed_ms = 1_000;
411        index.upsert(real);
412
413        // Phantom asset — index entry exists but the file is gone. The
414        // rotator must still drop it from the index but must not count it
415        // as "freed" since there was nothing on disk to free.
416        let phantom = CachedAsset::new(NewCachedAsset {
417            id: "phantom".into(),
418            filename: "phantom.bin".into(),
419            mime_type: None,
420            size: 999,
421            local_path: PathBuf::from("issues/ghost/phantom.bin"),
422            context: AssetContext::Issue {
423                key: "ghost".into(),
424            },
425            checksum_sha256: "abcd".into(),
426            remote_url: None,
427        });
428        index.upsert(CachedAsset {
429            last_accessed_ms: 1_000,
430            ..phantom
431        });
432
433        // Budget tiny so both entries get considered; ages equal.
434        let resolved = cfg(
435            tmp.path().to_path_buf(),
436            50,
437            Duration::from_secs(100 * 365 * 86_400),
438        );
439        let rotator = Rotator::new(&resolved);
440        let stats = rotator.rotate(&mut index, &cache).unwrap();
441
442        // Both were dropped from the index…
443        assert!(index.get("real").is_none());
444        assert!(index.get("phantom").is_none());
445        // …but only the real file counted.
446        assert_eq!(stats.bytes_freed, 100);
447    }
448
449    #[test]
450    fn bytes_freed_skips_unsafe_paths() {
451        let tmp = tempdir().unwrap();
452        let cache = CacheManager::new(tmp.path().to_path_buf()).unwrap();
453        let mut index = AssetIndex::empty();
454
455        // Tampered index entry pointing outside the cache root.
456        index.upsert(CachedAsset {
457            last_accessed_ms: 1_000,
458            ..CachedAsset::new(NewCachedAsset {
459                id: "hostile".into(),
460                filename: "passwd".into(),
461                mime_type: None,
462                size: 4096,
463                local_path: PathBuf::from("../../etc/passwd"),
464                context: AssetContext::Issue { key: "x".into() },
465                checksum_sha256: "dead".into(),
466                remote_url: None,
467            })
468        });
469
470        let resolved = cfg(
471            tmp.path().to_path_buf(),
472            1,
473            Duration::from_secs(100 * 365 * 86_400),
474        );
475        let rotator = Rotator::new(&resolved);
476        let stats = rotator.rotate(&mut index, &cache).unwrap();
477
478        // Entry dropped from the index but bytes_freed stays at 0 — we
479        // never touched the outside path.
480        assert!(index.get("hostile").is_none());
481        assert_eq!(stats.bytes_freed, 0);
482    }
483}