Skip to main content

devboy_assets/
index.rs

1//! On-disk index of cached assets (`index.json`).
2//!
3//! The index is a plain JSON file persisted alongside the cache directory.
4//! It stores the metadata that must survive process restarts:
5//!
6//! - Map of `asset_id -> CachedAsset` (filename, size, checksum, context, ...)
7//! - `last_accessed` timestamps used by the LRU rotator
8//!
9//! Writes go through a temp file + rename for atomicity, so that a crash
10//! mid-write cannot leave a half-written index on disk.
11
12use devboy_core::asset::AssetContext;
13use serde::{Deserialize, Serialize};
14use std::collections::HashMap;
15use std::io::Write as _;
16use std::path::{Path, PathBuf};
17use std::time::{SystemTime, UNIX_EPOCH};
18
19use crate::error::{AssetError, Result};
20
21/// Filename of the on-disk index, relative to the cache root.
22pub const INDEX_FILENAME: &str = "index.json";
23
24/// Current schema version — bumped when breaking changes are made.
25pub const INDEX_VERSION: u32 = 1;
26
27/// A single cached asset as persisted in the index.
28#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
29pub struct CachedAsset {
30    /// Stable identifier (UUID or provider id).
31    pub id: String,
32    pub filename: String,
33    /// MIME type if known.
34    #[serde(default, skip_serializing_if = "Option::is_none")]
35    pub mime_type: Option<String>,
36    pub size: u64,
37    /// Path relative to the cache root.
38    pub local_path: PathBuf,
39    /// Context the asset belongs to.
40    pub context: AssetContext,
41    /// SHA-256 checksum in hex.
42    pub checksum_sha256: String,
43    /// Remote URL at the provider, if available.
44    #[serde(default, skip_serializing_if = "Option::is_none")]
45    pub remote_url: Option<String>,
46    /// UNIX epoch milliseconds — when the file was first downloaded.
47    pub downloaded_at_ms: u64,
48    /// UNIX epoch milliseconds — when the file was last accessed.
49    pub last_accessed_ms: u64,
50}
51
52/// Parameters accepted by [`CachedAsset::new`].
53///
54/// Introduced to keep the constructor below the clippy
55/// `too_many_arguments` lint threshold.
56#[derive(Debug, Clone)]
57pub struct NewCachedAsset {
58    /// Stable identifier (UUID or provider id).
59    pub id: String,
60    pub filename: String,
61    /// MIME type if known.
62    pub mime_type: Option<String>,
63    pub size: u64,
64    /// Path relative to the cache root.
65    pub local_path: PathBuf,
66    /// Context the asset belongs to.
67    pub context: AssetContext,
68    /// SHA-256 checksum in hex.
69    pub checksum_sha256: String,
70    /// Remote URL at the provider if any.
71    pub remote_url: Option<String>,
72}
73
74impl CachedAsset {
75    /// Convenience constructor — wraps [`NewCachedAsset`] and stamps
76    /// `downloaded_at` / `last_accessed` to the current time.
77    pub fn new(params: NewCachedAsset) -> Self {
78        let now = now_ms();
79        Self {
80            id: params.id,
81            filename: params.filename,
82            mime_type: params.mime_type,
83            size: params.size,
84            local_path: params.local_path,
85            context: params.context,
86            checksum_sha256: params.checksum_sha256,
87            remote_url: params.remote_url,
88            downloaded_at_ms: now,
89            last_accessed_ms: now,
90        }
91    }
92}
93
94/// In-memory representation of the `index.json` file.
95#[derive(Debug, Clone, Serialize, Deserialize)]
96pub struct AssetIndex {
97    /// Schema version.
98    pub version: u32,
99    /// All known assets keyed by id.
100    #[serde(default)]
101    pub assets: HashMap<String, CachedAsset>,
102}
103
104impl Default for AssetIndex {
105    fn default() -> Self {
106        Self {
107            version: INDEX_VERSION,
108            assets: HashMap::new(),
109        }
110    }
111}
112
113impl AssetIndex {
114    /// Create an empty index at the current schema version.
115    pub fn empty() -> Self {
116        Self::default()
117    }
118
119    /// Load the index from `cache_dir/index.json`, returning an empty index
120    /// if the file does not exist.
121    ///
122    /// If the file exists but cannot be parsed (or has a version mismatch),
123    /// an empty index is returned and all non-index files under
124    /// `cache_dir` are purged so that blobs that are no longer tracked by
125    /// the index cannot accumulate indefinitely on disk.
126    pub fn load(cache_dir: &Path) -> Result<Self> {
127        let path = cache_dir.join(INDEX_FILENAME);
128        if !path.exists() {
129            return Ok(Self::empty());
130        }
131
132        let bytes = std::fs::read(&path)?;
133        match serde_json::from_slice::<Self>(&bytes) {
134            Ok(mut index) => {
135                if index.version != INDEX_VERSION {
136                    tracing::warn!(
137                        expected = INDEX_VERSION,
138                        found = index.version,
139                        "asset index version mismatch, purging cache and rebuilding"
140                    );
141                    purge_cache_blobs(cache_dir);
142                    index = Self::empty();
143                }
144                Ok(index)
145            }
146            Err(err) => {
147                tracing::warn!(
148                    ?err,
149                    "failed to parse asset index, purging cache and starting fresh"
150                );
151                purge_cache_blobs(cache_dir);
152                Ok(Self::empty())
153            }
154        }
155    }
156
157    /// Persist the index to `cache_dir/index.json` atomically via
158    /// temp file + rename.
159    pub fn save(&self, cache_dir: &Path) -> Result<()> {
160        std::fs::create_dir_all(cache_dir)?;
161        let path = cache_dir.join(INDEX_FILENAME);
162
163        let bytes = serde_json::to_vec_pretty(self)?;
164
165        // NamedTempFile in the same directory guarantees the final rename
166        // stays on the same filesystem.
167        let mut tmp = tempfile::NamedTempFile::new_in(cache_dir)
168            .map_err(|e| AssetError::cache_dir(format!("temp file: {e}")))?;
169        tmp.write_all(&bytes)?;
170        tmp.flush()?;
171        tmp.persist(&path)
172            .map_err(|e| AssetError::cache_dir(format!("persist index: {e}")))?;
173        Ok(())
174    }
175
176    /// Insert or replace an asset entry.
177    pub fn upsert(&mut self, asset: CachedAsset) {
178        self.assets.insert(asset.id.clone(), asset);
179    }
180
181    /// Remove an asset entry, returning the old value if any.
182    pub fn remove(&mut self, id: &str) -> Option<CachedAsset> {
183        self.assets.remove(id)
184    }
185
186    /// Look up an asset by id.
187    pub fn get(&self, id: &str) -> Option<&CachedAsset> {
188        self.assets.get(id)
189    }
190
191    /// Mutably look up an asset by id.
192    pub fn get_mut(&mut self, id: &str) -> Option<&mut CachedAsset> {
193        self.assets.get_mut(id)
194    }
195
196    /// Mark `last_accessed` on an asset as "now".
197    pub fn touch(&mut self, id: &str) -> bool {
198        if let Some(asset) = self.assets.get_mut(id) {
199            asset.last_accessed_ms = now_ms();
200            true
201        } else {
202            false
203        }
204    }
205
206    /// Total size in bytes of all tracked assets.
207    pub fn total_size(&self) -> u64 {
208        self.assets.values().map(|a| a.size).sum()
209    }
210
211    /// Number of tracked assets.
212    pub fn len(&self) -> usize {
213        self.assets.len()
214    }
215
216    /// Whether the index contains no assets.
217    pub fn is_empty(&self) -> bool {
218        self.assets.is_empty()
219    }
220}
221
222/// Remove all files and subdirectories under `cache_dir` except the index
223/// file itself. Called when the index is unrecoverable (corrupt or version
224/// mismatch) so that orphaned blobs don't accumulate on disk.
225///
226/// Best-effort: individual I/O errors are logged and skipped — we would
227/// rather start with a fresh (possibly partially cleaned) cache than fail
228/// to open the manager entirely.
229fn purge_cache_blobs(cache_dir: &Path) {
230    let entries = match std::fs::read_dir(cache_dir) {
231        Ok(entries) => entries,
232        Err(e) => {
233            tracing::warn!(?e, "failed to list cache directory for purge");
234            return;
235        }
236    };
237    for entry in entries.flatten() {
238        let path = entry.path();
239        // Keep the index file itself — it will be overwritten by the
240        // caller with an empty index.
241        if path.file_name().is_some_and(|n| n == INDEX_FILENAME) {
242            continue;
243        }
244        // Use `symlink_metadata` (lstat) instead of `is_dir()` so that
245        // symlinks are never followed. A symlink pointing outside the
246        // cache root must be unlinked with `remove_file`, not chased
247        // into with `remove_dir_all`.
248        let is_real_dir = match std::fs::symlink_metadata(&path) {
249            Ok(meta) => meta.is_dir(),
250            Err(e) => {
251                tracing::warn!(?e, path = ?path, "failed to stat cached entry");
252                continue;
253            }
254        };
255        let result = if is_real_dir {
256            std::fs::remove_dir_all(&path)
257        } else {
258            std::fs::remove_file(&path)
259        };
260        if let Err(e) = result {
261            tracing::warn!(?e, path = ?path, "failed to purge cached file");
262        }
263    }
264}
265
266/// Current time as UNIX epoch milliseconds.
267pub fn now_ms() -> u64 {
268    SystemTime::now()
269        .duration_since(UNIX_EPOCH)
270        .map(|d| d.as_millis() as u64)
271        .unwrap_or(0)
272}
273
274#[cfg(test)]
275mod tests {
276    use super::*;
277    use devboy_core::asset::AssetContext;
278    use tempfile::tempdir;
279
280    fn make_asset(id: &str, size: u64) -> CachedAsset {
281        CachedAsset::new(NewCachedAsset {
282            id: id.into(),
283            filename: format!("{id}.txt"),
284            mime_type: Some("text/plain".into()),
285            size,
286            local_path: PathBuf::from(format!("files/{id}.txt")),
287            context: AssetContext::Issue {
288                key: "DEV-1".into(),
289            },
290            checksum_sha256: "abcd".into(),
291            remote_url: None,
292        })
293    }
294
295    #[test]
296    fn upsert_get_remove() {
297        let mut index = AssetIndex::empty();
298        index.upsert(make_asset("a1", 10));
299        index.upsert(make_asset("a2", 20));
300        assert_eq!(index.len(), 2);
301        assert_eq!(index.total_size(), 30);
302
303        assert_eq!(index.get("a1").unwrap().size, 10);
304        let removed = index.remove("a1").unwrap();
305        assert_eq!(removed.id, "a1");
306        assert_eq!(index.len(), 1);
307        assert!(index.get("a1").is_none());
308    }
309
310    #[test]
311    fn touch_updates_last_accessed() {
312        let mut index = AssetIndex::empty();
313        index.upsert(make_asset("a1", 10));
314        let original = index.get("a1").unwrap().last_accessed_ms;
315
316        // Small sleep to ensure ms tick; spin-loop is enough for the test.
317        std::thread::sleep(std::time::Duration::from_millis(2));
318        assert!(index.touch("a1"));
319        assert!(index.get("a1").unwrap().last_accessed_ms > original);
320        assert!(!index.touch("missing"));
321    }
322
323    #[test]
324    fn load_missing_returns_empty() {
325        let tmp = tempdir().unwrap();
326        let index = AssetIndex::load(tmp.path()).unwrap();
327        assert!(index.is_empty());
328        assert_eq!(index.version, INDEX_VERSION);
329    }
330
331    #[test]
332    fn save_and_reload_roundtrip() {
333        let tmp = tempdir().unwrap();
334        let mut index = AssetIndex::empty();
335        index.upsert(make_asset("a1", 42));
336        index.save(tmp.path()).unwrap();
337
338        let reloaded = AssetIndex::load(tmp.path()).unwrap();
339        assert_eq!(reloaded.len(), 1);
340        assert_eq!(reloaded.get("a1").unwrap().size, 42);
341    }
342
343    #[test]
344    fn corrupt_index_falls_back_to_empty() {
345        let tmp = tempdir().unwrap();
346        std::fs::write(tmp.path().join(INDEX_FILENAME), b"not json").unwrap();
347        let index = AssetIndex::load(tmp.path()).unwrap();
348        assert!(index.is_empty(), "corrupt index should fall back to empty");
349    }
350
351    #[test]
352    fn version_mismatch_falls_back_to_empty() {
353        let tmp = tempdir().unwrap();
354        std::fs::write(
355            tmp.path().join(INDEX_FILENAME),
356            br#"{"version":999,"assets":{}}"#,
357        )
358        .unwrap();
359        let index = AssetIndex::load(tmp.path()).unwrap();
360        assert_eq!(index.version, INDEX_VERSION);
361        assert!(index.is_empty());
362    }
363
364    #[test]
365    fn save_is_atomic_under_overwrite() {
366        let tmp = tempdir().unwrap();
367        let mut index = AssetIndex::empty();
368        index.upsert(make_asset("a1", 1));
369        index.save(tmp.path()).unwrap();
370
371        // Overwrite with different content; no stale temp files should linger.
372        index.upsert(make_asset("a2", 2));
373        index.save(tmp.path()).unwrap();
374
375        let reloaded = AssetIndex::load(tmp.path()).unwrap();
376        assert_eq!(reloaded.len(), 2);
377
378        // Check that no temp files are left behind.
379        let stragglers: Vec<_> = std::fs::read_dir(tmp.path())
380            .unwrap()
381            .filter_map(|e| e.ok())
382            .filter(|e| {
383                let name = e.file_name();
384                let name = name.to_string_lossy();
385                name != INDEX_FILENAME
386            })
387            .collect();
388        assert!(stragglers.is_empty(), "unexpected files: {stragglers:?}");
389    }
390}