Skip to main content

wordchipper_disk_cache/
disk_cache.rs

1//! # Wordchipper Disk Cache
2
3use crate::{WORDCHIPPER_CACHE_CONFIG, path_utils};
4use anyhow::Context;
5use downloader::{Download, Downloader};
6use std::fs;
7use std::path::{Path, PathBuf};
8
9/// Options for [`WordchipperDiskCache`].
10#[derive(Clone, Default, Debug)]
11pub struct WordchipperDiskCacheOptions {
12    /// Optional path to the cache directory.
13    pub cache_dir: Option<PathBuf>,
14
15    /// Optional path to the data directory.
16    pub data_dir: Option<PathBuf>,
17
18    /// Optional [`Downloader`] builder.
19    pub downloader: Option<fn() -> Downloader>,
20}
21
22impl WordchipperDiskCacheOptions {
23    /// Set the cache directory.
24    pub fn with_cache_dir<P: AsRef<Path>>(
25        mut self,
26        cache_dir: Option<P>,
27    ) -> Self {
28        self.cache_dir = cache_dir.map(|p| p.as_ref().to_path_buf());
29        self
30    }
31
32    /// Set the data directory.
33    pub fn with_data_dir<P: AsRef<Path>>(
34        mut self,
35        data_dir: Option<P>,
36    ) -> Self {
37        self.data_dir = data_dir.map(|p| p.as_ref().to_path_buf());
38        self
39    }
40
41    /// Set the downloader builder.
42    pub fn with_downloader(
43        mut self,
44        downloader: Option<fn() -> Downloader>,
45    ) -> Self {
46        self.downloader = downloader;
47        self
48    }
49}
50
51/// Disk cache for downloaded files.
52pub struct WordchipperDiskCache {
53    /// Cache directory.
54    cache_dir: PathBuf,
55
56    /// Data directory.
57    data_dir: PathBuf,
58
59    /// Connection pool for downloading files.
60    downloader: Downloader,
61}
62
63impl Default for WordchipperDiskCache {
64    fn default() -> Self {
65        Self::init(WordchipperDiskCacheOptions::default()).unwrap()
66    }
67}
68
69impl WordchipperDiskCache {
70    /// Construct a new [`WordchipperDiskCache`].
71    pub fn init(options: WordchipperDiskCacheOptions) -> anyhow::Result<Self> {
72        let cache_dir = WORDCHIPPER_CACHE_CONFIG
73            .resolve_cache_dir(options.cache_dir)
74            .context("failed to resolve cache directory")?;
75
76        let data_dir = WORDCHIPPER_CACHE_CONFIG
77            .resolve_data_dir(options.data_dir)
78            .context("failed to resolve data directory")?;
79
80        let downloader = match options.downloader {
81            Some(builder) => builder(),
82            None => Downloader::builder().build()?,
83        };
84
85        Ok(Self {
86            cache_dir,
87            data_dir,
88            downloader,
89        })
90    }
91
92    /// Get the cache directory.
93    pub fn cache_dir(&self) -> &Path {
94        &self.cache_dir
95    }
96
97    /// Get the data directory.
98    pub fn data_dir(&self) -> &Path {
99        &self.data_dir
100    }
101
102    /// Get the downloader.
103    pub fn downloader(&self) -> &Downloader {
104        &self.downloader
105    }
106
107    /// Get the cache path for the given key.
108    ///
109    /// * Does not check that the path exists.
110    /// * Does not initialize the containing directories.
111    ///
112    /// # Arguments
113    /// * `context` - prefix dirs, inserted between `self.cache_dir` and `file`.
114    /// * `file` - the final file name.
115    pub fn cache_path<C, F>(
116        &self,
117        context: &[C],
118        file: F,
119    ) -> PathBuf
120    where
121        C: AsRef<Path>,
122        F: AsRef<Path>,
123    {
124        path_utils::extend_path(&self.cache_dir, context, file)
125    }
126
127    /// Loads a cached file from a specified path or downloads it if it does not exist.
128    ///
129    /// # Arguments
130    /// * `context`: A slice of `C` containing path-related context used in determining the
131    ///   cache location. These paths are combined to build the cached file's location.
132    /// * `urls`: A slice of string references specifying the URLs to download the file from
133    ///   if it is not already cached.
134    /// * `download`: A boolean flag indicating whether to attempt downloading the file
135    ///   from the provided URLs if it does not already exist in the cache.
136    ///
137    /// # Returns
138    /// * Returns a [`PathBuf`] pointing to the cached file if it exists or is successfully downloaded.
139    /// * Returns an error if the file is not found in the cache and downloading is not allowed
140    ///   or fails.
141    ///
142    /// # Errors
143    /// * Returns an error if the cached file does not exist and `download` is `false`.
144    /// * Returns an error if the downloading process fails.
145    pub fn load_cached_path<C>(
146        &mut self,
147        context: &[C],
148        urls: &[&str],
149        download: bool,
150        /* TODO: hash: Option<&str>, */
151    ) -> anyhow::Result<PathBuf>
152    where
153        C: AsRef<Path>,
154    {
155        let mut dl = Download::new_mirrored(urls);
156        let file_name = dl.file_name.clone();
157        let path = self.cache_path(context, &file_name);
158        dl.file_name = path.clone();
159
160        if path.exists() {
161            return Ok(path);
162        }
163
164        if !download {
165            anyhow::bail!("cached file not found: {}", path.display());
166        }
167
168        fs::create_dir_all(path.parent().unwrap())?;
169
170        self.downloader.download(&[dl])?;
171
172        Ok(path)
173    }
174
175    /// Get the data path for the given key.
176    ///
177    /// * Does not check that the path exists.
178    /// * Does not initialize the containing directories.
179    ///
180    /// # Arguments
181    /// * `context` - prefix dirs, inserted between `self.cache_dir` and `file`.
182    /// * `file` - the final file name.
183    pub fn data_path<C, F>(
184        &self,
185        context: &[C],
186        file: F,
187    ) -> PathBuf
188    where
189        C: AsRef<Path>,
190        F: AsRef<Path>,
191    {
192        path_utils::extend_path(&self.data_dir, context, file)
193    }
194}
195
196#[cfg(test)]
197mod tests {
198    use crate::disk_cache::{WordchipperDiskCache, WordchipperDiskCacheOptions};
199    use crate::{WORDCHIPPER_CACHE_CONFIG, WORDCHIPPER_CACHE_DIR, WORDCHIPPER_DATA_DIR};
200    use serial_test::serial;
201    use std::env;
202    use std::path::PathBuf;
203
204    #[test]
205    #[serial]
206    fn test_resolve_dirs() {
207        let orig_cache_dir = env::var(WORDCHIPPER_CACHE_DIR);
208        let orig_data_dir = env::var(WORDCHIPPER_CACHE_DIR);
209
210        let pds = WORDCHIPPER_CACHE_CONFIG
211            .project_dirs()
212            .expect("failed to get project dirs");
213
214        let user_cache_dir = PathBuf::from("/tmp/wordchipper/cache");
215        let user_data_dir = PathBuf::from("/tmp/wordchipper/data");
216
217        let env_cache_dir = PathBuf::from("/tmp/wordchipper/env_cache");
218        let env_data_dir = PathBuf::from("/tmp/wordchipper/env_data");
219
220        // No env vars
221        unsafe {
222            env::remove_var(WORDCHIPPER_CACHE_DIR);
223            env::remove_var(WORDCHIPPER_DATA_DIR);
224        }
225
226        let cache = WordchipperDiskCache::init(
227            WordchipperDiskCacheOptions::default()
228                .with_cache_dir(Some(user_cache_dir.clone()))
229                .with_data_dir(Some(user_data_dir.clone())),
230        )
231        .unwrap();
232        assert_eq!(&cache.cache_dir(), &user_cache_dir);
233        assert_eq!(&cache.data_dir(), &user_data_dir);
234
235        let cache = WordchipperDiskCache::init(WordchipperDiskCacheOptions::default()).unwrap();
236        assert_eq!(&cache.cache_dir(), &pds.cache_dir().to_path_buf());
237        assert_eq!(&cache.data_dir(), &pds.data_dir().to_path_buf());
238
239        // With env var.
240        unsafe {
241            env::set_var(WORDCHIPPER_CACHE_DIR, env_cache_dir.to_str().unwrap());
242            env::set_var(WORDCHIPPER_DATA_DIR, env_data_dir.to_str().unwrap());
243        }
244
245        let cache = WordchipperDiskCache::init(
246            WordchipperDiskCacheOptions::default()
247                .with_cache_dir(Some(user_cache_dir.clone()))
248                .with_data_dir(Some(user_data_dir.clone())),
249        )
250        .unwrap();
251        assert_eq!(&cache.cache_dir(), &user_cache_dir);
252        assert_eq!(&cache.data_dir(), &user_data_dir);
253
254        let cache = WordchipperDiskCache::init(WordchipperDiskCacheOptions::default()).unwrap();
255        assert_eq!(&cache.cache_dir(), &env_cache_dir);
256        assert_eq!(&cache.data_dir(), &env_data_dir);
257
258        // restore original env var.
259        match orig_cache_dir {
260            Ok(original) => unsafe { env::set_var(WORDCHIPPER_CACHE_DIR, original) },
261            Err(_) => unsafe { env::remove_var(WORDCHIPPER_CACHE_DIR) },
262        }
263        match orig_data_dir {
264            Ok(original) => unsafe { env::set_var(WORDCHIPPER_DATA_DIR, original) },
265            Err(_) => unsafe { env::remove_var(WORDCHIPPER_DATA_DIR) },
266        }
267    }
268
269    #[test]
270    fn test_data_path() {
271        let cache = WordchipperDiskCache::init(WordchipperDiskCacheOptions::default()).unwrap();
272        let path = cache.data_path(&["prefix"], "file.txt");
273        assert_eq!(path, cache.data_dir.join("prefix").join("file.txt"));
274    }
275
276    #[test]
277    fn test_cache_path() {
278        let cache = WordchipperDiskCache::init(WordchipperDiskCacheOptions::default()).unwrap();
279        let path = cache.cache_path(&["prefix"], "file.txt");
280        assert_eq!(path, cache.cache_dir.join("prefix").join("file.txt"));
281    }
282}