fetch_source/
cache.rs

1// A BTree maintains key order
2use std::collections::BTreeMap;
3use std::path::{Path, PathBuf};
4
5use derive_more::Deref;
6
7use crate::{Artefact, Digest, Source};
8
9const CACHE_FILE_NAME: &str = "fetch-source-cache.json";
10
11/***
12NOTE: For the special path newtypes below, we derive `Deref` as this models these types as "subtypes" of
13`PathBuf` i.e. they should be able to do everything a `PathBuf` can do, and have additional semantics
14at certain places in the code. They model paths that are special to the `Cache` so are only
15constructed in specific places, and requiring them as arguments rather than any `PathBuf` indicates
16where their special meaning to the cache matters.
17***/
18
19/// The root directory of a cache
20#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Deref)]
21pub struct CacheRoot(PathBuf);
22
23/// The path of a cached artefact relative to the cache root
24#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Deref)]
25pub struct RelCacheDir(PathBuf);
26
27/// The absolute path to a cached artefact
28#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Deref)]
29pub struct CacheDir(PathBuf);
30
31impl CacheRoot {
32    /// Get the absolute path to an artefact
33    pub fn append(&self, relative: RelCacheDir) -> CacheDir {
34        CacheDir(self.0.join(relative.0))
35    }
36}
37
38/// Records data about the cached sources and where their artefacts are within a [`Cache`](Cache).
39///
40/// When a [`Source`] is fetched, insert its [`Artefact`] into a cache to avoid repeatedly fetching
41/// the same source definition.
42///
43/// When fetching a source, check the cache subdirectory to use with [`CacheItems::relative_path`].
44#[derive(Debug, Default, serde::Deserialize, serde::Serialize, PartialEq, Eq)]
45pub struct CacheItems {
46    #[serde(flatten)]
47    map: BTreeMap<Digest, Artefact>,
48}
49
50impl CacheItems {
51    /// Create an empty collection.
52    pub fn new() -> Self {
53        Self {
54            map: BTreeMap::new(),
55        }
56    }
57
58    /// Retrieves a cached artefact for the given source, if it exists.
59    pub fn get(&self, source: &Source) -> Option<&Artefact> {
60        self.map.get(&Source::digest(source))
61    }
62
63    /// Check whether the cache contains the given source.
64    pub fn contains(&self, source: &Source) -> bool {
65        self.map.contains_key(&Source::digest(source))
66    }
67
68    /// Cache an artefact and return the digest of the [`Source`] which created it. Replaces any
69    /// previous value for this source.
70    pub fn insert(&mut self, artefact: Artefact) {
71        self.map.insert(Source::digest(&artefact), artefact);
72    }
73
74    /// Removes a cached value for the given source, returning it if it existed.
75    pub fn remove(&mut self, source: &Source) -> Option<Artefact> {
76        self.map.remove(&Source::digest(source))
77    }
78
79    /// Returns an iterator over the cached values.
80    pub fn values(&self) -> impl Iterator<Item = &Artefact> {
81        self.map.values()
82    }
83
84    /// Checks if the cache is empty.
85    pub fn is_empty(&self) -> bool {
86        self.map.is_empty()
87    }
88
89    /// Returns the number of cached values.
90    pub fn len(&self) -> usize {
91        self.map.len()
92    }
93
94    /// Get the relative path for a source within a cache directory
95    pub fn relative_path<S: AsRef<Source>>(&self, source: S) -> RelCacheDir {
96        RelCacheDir(PathBuf::from(Source::digest(source).as_ref()))
97    }
98}
99
100/// Owns [`data`](CacheItems) about cached sources and is responsible for its persistence.
101#[derive(Debug)]
102pub struct Cache {
103    items: CacheItems,
104    cache_file: PathBuf,
105}
106
107impl Cache {
108    /// Normalise to the path of a cache file. The cache dir is required to be the absolute path to
109    /// the cache directory. We rely on `canonicalize` to error when the path doesn't exist.
110    ///
111    /// Returns an IO error if the directory doesn't exist
112    #[inline]
113    fn normalise_cache_file<P>(cache_dir: P) -> std::io::Result<std::path::PathBuf>
114    where
115        P: AsRef<Path>,
116    {
117        Ok(cache_dir
118            .as_ref()
119            .to_path_buf()
120            .canonicalize()?
121            .join(CACHE_FILE_NAME))
122    }
123
124    /// Create a new cache at the specified file path.
125    fn create_at(cache_file: PathBuf) -> Self {
126        Self {
127            items: CacheItems::new(),
128            cache_file,
129        }
130    }
131
132    /// Read the cache in the given directory.
133    ///
134    /// Error if the directory or cache file do not exist, of if a deserialisation error occurs
135    /// when reading the cache file
136    pub fn read<P>(cache_dir: P) -> Result<Self, crate::Error>
137    where
138        P: AsRef<Path>,
139    {
140        let cache_file = Self::normalise_cache_file(cache_dir)?;
141        let contents = std::fs::read_to_string(&cache_file)?;
142        let items: CacheItems = serde_json::from_str(&contents)?;
143        Ok(Self { items, cache_file })
144    }
145
146    /// Create a new cache in the given directory.
147    ///
148    /// Error if the directory doesn't exist, or if there is already a cache file in this directory.
149    pub fn new<P>(cache_dir: P) -> Result<Self, crate::Error>
150    where
151        P: AsRef<Path>,
152    {
153        let cache_file = Self::normalise_cache_file(&cache_dir)?;
154        if cache_file.is_file() {
155            return Err(std::io::Error::new(
156                std::io::ErrorKind::AlreadyExists,
157                "Cache file already exists",
158            )
159            .into());
160        }
161        Ok(Self::create_at(cache_file))
162    }
163
164    /// Loads the cache from a JSON file in the given directory, creating a new cache if the file
165    /// does not exist. Requires that `cache_dir` exists. Note that this function doesn't
166    /// actually create the cache file - this happens when the cache is saved.
167    ///
168    /// Returns an error if `cache_dir` doesn't exist, or if a deserialisation error occurs when
169    /// reading the cache file.
170    pub fn load_or_create<P>(cache_dir: P) -> Result<Self, crate::Error>
171    where
172        P: AsRef<Path>,
173    {
174        let cache_file = Self::normalise_cache_file(&cache_dir)?;
175        if cache_file.is_file() {
176            Self::read(cache_dir)
177        } else {
178            Ok(Self::create_at(cache_file))
179        }
180    }
181
182    /// Saves the cache in the directory where it was created.
183    ///
184    /// Returns an error if a serialisation or I/O error occurs.
185    pub fn save(&self) -> Result<(), crate::Error> {
186        let json = serde_json::to_string_pretty(&self.items)?;
187        Ok(std::fs::write(&self.cache_file, json)?)
188    }
189
190    /// Get the cache file path.
191    pub fn cache_file(&self) -> &Path {
192        &self.cache_file
193    }
194
195    /// Get the directory of the cache file
196    pub fn cache_dir(&self) -> CacheRoot {
197        CacheRoot(self.cache_file.parent().unwrap().to_path_buf())
198    }
199
200    /// Calculate the absolute path where a fetched source would be stored within the cache
201    pub fn cached_path(&self, source: &Source) -> CacheDir {
202        self.cache_dir().append(self.items.relative_path(source))
203    }
204
205    /// Get a reference to the cache items.
206    pub fn items(&self) -> &CacheItems {
207        &self.items
208    }
209
210    /// Get a mutable reference to the cache items.
211    pub fn items_mut(&mut self) -> &mut CacheItems {
212        &mut self.items
213    }
214
215    /// Check whether the cache file exists in the given directory.
216    pub fn cache_file_exists<P>(cache_dir: P) -> bool
217    where
218        P: AsRef<Path>,
219    {
220        cache_dir.as_ref().join(CACHE_FILE_NAME).is_file()
221    }
222}
223
224impl IntoIterator for CacheItems {
225    type Item = (Digest, Artefact);
226    type IntoIter = std::collections::btree_map::IntoIter<Digest, Artefact>;
227
228    fn into_iter(self) -> Self::IntoIter {
229        self.map.into_iter()
230    }
231}
232
233impl<'a> IntoIterator for &'a CacheItems {
234    type Item = (&'a Digest, &'a Artefact);
235    type IntoIter = std::collections::btree_map::Iter<'a, Digest, Artefact>;
236
237    fn into_iter(self) -> Self::IntoIter {
238        self.map.iter()
239    }
240}
241
242impl IntoIterator for Cache {
243    type Item = (Digest, Artefact);
244    type IntoIter = std::collections::btree_map::IntoIter<Digest, Artefact>;
245
246    fn into_iter(self) -> Self::IntoIter {
247        self.items.into_iter()
248    }
249}
250
251impl<'a> IntoIterator for &'a Cache {
252    type Item = (&'a Digest, &'a Artefact);
253    type IntoIter = std::collections::btree_map::Iter<'a, Digest, Artefact>;
254
255    fn into_iter(self) -> Self::IntoIter {
256        (&self.items).into_iter()
257    }
258}
259
260#[cfg(test)]
261mod tests {
262    use super::*;
263    use tempfile::tempdir;
264
265    // Helper macro for creating test caches
266    macro_rules! mock_cache_at {
267        ($cache_file:expr) => {{ Cache::create_at(PathBuf::from($cache_file).join(CACHE_FILE_NAME)) }};
268    }
269
270    #[test]
271    fn artefact_path_is_digest() {
272        // The cache should determine the path to a cached artefact relative to the cache directory,
273        // where the subdirectory is the digest of the source.
274        let cache = mock_cache_at! {"/foo/bar"};
275        let source: Source =
276            crate::build_from_json! { "tar": "www.example.com/test.tar.gz" }.unwrap();
277        assert_eq!(
278            PathBuf::from("/foo/bar/").join(Source::digest(&source).as_ref()),
279            *cache.cached_path(&source)
280        );
281    }
282
283    #[test]
284    fn same_artefact_with_multiple_names_exists_once() {
285        let mut cache = mock_cache_at! {"/foo/bar"};
286        let artefact_1: crate::Artefact = crate::build_from_json! {
287            "source": { "tar": "www.example.com/test.tar.gz" },
288            "path": "AAAAAAAA",
289        }
290        .unwrap();
291        let artefact_2: crate::Artefact = crate::build_from_json! {
292            "source": { "tar": "www.example.com/test.tar.gz" },
293            "path": "BBBBBBBB",
294        }
295        .unwrap();
296        cache.items_mut().insert(artefact_1);
297        cache.items_mut().insert(artefact_2);
298        assert_eq!(cache.items().len(), 1);
299    }
300
301    #[test]
302    fn cache_items_insert_and_get() {
303        let mut items = CacheItems::new();
304        let artefact: crate::Artefact = crate::build_from_json! {
305            "source": { "tar": "www.example.com/test.tar.gz" },
306            "path": "/some/path",
307        }
308        .unwrap();
309
310        let source: Source =
311            crate::build_from_json! { "tar": "www.example.com/test.tar.gz" }.unwrap();
312        assert!(!items.contains(&source));
313
314        items.insert(artefact);
315        assert!(items.contains(&source));
316        assert_eq!(items.len(), 1);
317
318        let retrieved = items.get(&source).unwrap();
319        assert_eq!(
320            <crate::Artefact as AsRef<Path>>::as_ref(retrieved),
321            Path::new("/some/path")
322        );
323    }
324
325    #[test]
326    fn cache_read_on_existing_dir_missing_file_fails() {
327        let temp_dir = tempdir().unwrap();
328        let cache_file = Cache::normalise_cache_file(&temp_dir).unwrap();
329        let result = Cache::read(&temp_dir);
330        assert!(!cache_file.exists(), "File shouldn't exist before test");
331        assert!(result.is_err(), "Read should fail when file doesn't exist");
332        assert!(
333            !cache_file.exists(),
334            "File shouldn't be created by `read` operation"
335        );
336    }
337
338    #[test]
339    fn cache_load_on_existing_dir_missing_file_gives_empty_cache() {
340        let temp_dir = tempdir().unwrap();
341        let cache_file = Cache::normalise_cache_file(&temp_dir).unwrap();
342        assert!(!cache_file.exists(), "File shouldn't exist before test");
343        let result = Cache::load_or_create(&temp_dir);
344        assert!(
345            result.is_ok(),
346            "load_or_create should succeed when file doesn't exist"
347        );
348        assert!(
349            !cache_file.exists(),
350            "File shouldn't exist after test - only created when saved"
351        );
352        assert!(result.unwrap().items().is_empty());
353    }
354
355    #[test]
356    fn cache_load_on_missing_dir_fails() {
357        let temp_dir = std::env::temp_dir().join("1729288131-doesnt-exist-6168255555");
358        assert!(
359            !temp_dir.exists(),
360            "The temporary directory shouldn't exist before test"
361        );
362        let result = Cache::load_or_create(&temp_dir);
363        assert!(
364            !temp_dir.exists(),
365            "The temporary directory shouldn't exist after test"
366        );
367        assert!(
368            result.is_err(),
369            "load_or_create should fail when directory doesn't exist"
370        );
371        assert_eq!(result.unwrap_err().kind(), &crate::ErrorKind::Io);
372    }
373
374    #[test]
375    fn cache_load_save_roundtrip() {
376        let temp_dir = std::env::temp_dir().join("cache_test_migration");
377        std::fs::create_dir_all(&temp_dir).unwrap();
378
379        // Create and populate cache
380        let mut cache = Cache::create_at(temp_dir.join(CACHE_FILE_NAME));
381        let artefact: crate::Artefact = crate::build_from_json! {
382            "source": { "tar": "www.example.com/test.tar.gz" },
383            "path": "/some/path",
384        }
385        .unwrap();
386        cache.items_mut().insert(artefact);
387
388        // Save
389        cache.save().unwrap();
390
391        // Load
392        let loaded_cache = Cache::load_or_create(&temp_dir).unwrap();
393        assert_eq!(loaded_cache.items().len(), 1);
394
395        let source: Source =
396            crate::build_from_json! { "tar": "www.example.com/test.tar.gz" }.unwrap();
397        let loaded_artefact = loaded_cache.items().get(&source).unwrap();
398        assert_eq!(
399            <crate::Artefact as AsRef<Path>>::as_ref(loaded_artefact),
400            Path::new("/some/path")
401        );
402
403        // Clean up
404        std::fs::remove_dir_all(&temp_dir).ok();
405    }
406}