Skip to main content

wordchipper_disk_cache/
disk_cache.rs

1//! # Wordchipper Disk Cache
2
3use std::{
4    fs,
5    path::{Path, PathBuf},
6};
7
8use anyhow::Context;
9use downloader::{Download, Downloader};
10
11use crate::{WORDCHIPPER_CACHE_CONFIG, path_utils};
12
13/// Options for [`WordchipperDiskCache`].
14#[derive(Clone, Default, Debug)]
15pub struct WordchipperDiskCacheOptions {
16    /// Optional path to the cache directory.
17    pub cache_dir: Option<PathBuf>,
18
19    /// Optional path to the data directory.
20    pub data_dir: Option<PathBuf>,
21
22    /// Optional [`Downloader`] builder.
23    pub downloader: Option<fn() -> Downloader>,
24}
25
26impl WordchipperDiskCacheOptions {
27    /// Set the cache directory.
28    pub fn with_cache_dir<P: AsRef<Path>>(
29        mut self,
30        cache_dir: Option<P>,
31    ) -> Self {
32        self.cache_dir = cache_dir.map(|p| p.as_ref().to_path_buf());
33        self
34    }
35
36    /// Set the data directory.
37    pub fn with_data_dir<P: AsRef<Path>>(
38        mut self,
39        data_dir: Option<P>,
40    ) -> Self {
41        self.data_dir = data_dir.map(|p| p.as_ref().to_path_buf());
42        self
43    }
44
45    /// Set the downloader builder.
46    pub fn with_downloader(
47        mut self,
48        downloader: Option<fn() -> Downloader>,
49    ) -> Self {
50        self.downloader = downloader;
51        self
52    }
53}
54
55/// Disk cache for downloaded files.
56pub struct WordchipperDiskCache {
57    /// Cache directory.
58    cache_dir: PathBuf,
59
60    /// Data directory.
61    data_dir: PathBuf,
62
63    /// Connection pool for downloading files.
64    downloader: Downloader,
65}
66
67impl Default for WordchipperDiskCache {
68    fn default() -> Self {
69        Self::new(WordchipperDiskCacheOptions::default()).unwrap()
70    }
71}
72
73impl WordchipperDiskCache {
74    /// Construct a new [`WordchipperDiskCache`].
75    pub fn new(options: WordchipperDiskCacheOptions) -> anyhow::Result<Self> {
76        let cache_dir = WORDCHIPPER_CACHE_CONFIG
77            .resolve_cache_dir(options.cache_dir)
78            .context("failed to resolve cache directory")?;
79
80        let data_dir = WORDCHIPPER_CACHE_CONFIG
81            .resolve_data_dir(options.data_dir)
82            .context("failed to resolve data directory")?;
83
84        let downloader = match options.downloader {
85            Some(builder) => builder(),
86            None => Downloader::builder().build()?,
87        };
88
89        Ok(Self {
90            cache_dir,
91            data_dir,
92            downloader,
93        })
94    }
95
96    /// Get the cache directory.
97    pub fn cache_dir(&self) -> &Path {
98        &self.cache_dir
99    }
100
101    /// Get the data directory.
102    pub fn data_dir(&self) -> &Path {
103        &self.data_dir
104    }
105
106    /// Get the downloader.
107    pub fn downloader(&self) -> &Downloader {
108        &self.downloader
109    }
110
111    /// Get the cache path for the given key.
112    ///
113    /// * Does not check that the path exists.
114    /// * Does not initialize the containing directories.
115    ///
116    /// # Arguments
117    /// * `context` - prefix dirs, inserted between `self.cache_dir` and `file`.
118    /// * `file` - the final file name.
119    pub fn cache_path<C, F>(
120        &self,
121        context: &[C],
122        file: F,
123    ) -> PathBuf
124    where
125        C: AsRef<Path>,
126        F: AsRef<Path>,
127    {
128        path_utils::extend_path(&self.cache_dir, context, file)
129    }
130
131    /// Loads a cached file from a specified path or downloads it if it does not exist.
132    ///
133    /// # Arguments
134    /// * `context`: A slice of `C` containing path-related context used in determining the
135    ///   cache location. These paths are combined to build the cached file's location.
136    /// * `urls`: A slice of string references specifying the URLs to download the file from
137    ///   if it is not already cached.
138    /// * `download`: A boolean flag indicating whether to attempt downloading the file
139    ///   from the provided URLs if it does not already exist in the cache.
140    ///
141    /// # Returns
142    /// * Returns a [`PathBuf`] pointing to the cached file if it exists or is successfully downloaded.
143    /// * Returns an error if the file is not found in the cache and downloading is not allowed
144    ///   or fails.
145    ///
146    /// # Errors
147    /// * Returns an error if the cached file does not exist and `download` is `false`.
148    /// * Returns an error if the downloading process fails.
149    pub fn load_cached_path<C>(
150        &mut self,
151        context: &[C],
152        urls: &[&str],
153        download: bool,
154        /* TODO: hash: Option<&str>, */
155    ) -> anyhow::Result<PathBuf>
156    where
157        C: AsRef<Path>,
158    {
159        let mut dl = Download::new_mirrored(urls);
160        let file_name = dl.file_name.clone();
161        let path = self.cache_path(context, &file_name);
162        dl.file_name = path.clone();
163
164        if path.exists() {
165            return Ok(path);
166        }
167
168        if !download {
169            anyhow::bail!("cached file not found: {}", path.display());
170        }
171
172        fs::create_dir_all(path.parent().unwrap())?;
173
174        self.downloader.download(&[dl])?;
175
176        Ok(path)
177    }
178
179    /// Get the data path for the given key.
180    ///
181    /// * Does not check that the path exists.
182    /// * Does not initialize the containing directories.
183    ///
184    /// # Arguments
185    /// * `context` - prefix dirs, inserted between `self.cache_dir` and `file`.
186    /// * `file` - the final file name.
187    pub fn data_path<C, F>(
188        &self,
189        context: &[C],
190        file: F,
191    ) -> PathBuf
192    where
193        C: AsRef<Path>,
194        F: AsRef<Path>,
195    {
196        path_utils::extend_path(&self.data_dir, context, file)
197    }
198}
199
200#[cfg(test)]
201mod tests {
202    use std::{env, path::PathBuf};
203
204    use serial_test::serial;
205
206    use crate::{
207        WORDCHIPPER_CACHE_CONFIG,
208        WORDCHIPPER_CACHE_DIR,
209        WORDCHIPPER_DATA_DIR,
210        disk_cache::{WordchipperDiskCache, WordchipperDiskCacheOptions},
211    };
212
213    #[test]
214    #[serial]
215    fn test_resolve_dirs() {
216        let orig_cache_dir = env::var(WORDCHIPPER_CACHE_DIR);
217        let orig_data_dir = env::var(WORDCHIPPER_CACHE_DIR);
218
219        let pds = WORDCHIPPER_CACHE_CONFIG
220            .project_dirs()
221            .expect("failed to get project dirs");
222
223        let user_cache_dir = PathBuf::from("/tmp/wordchipper/cache");
224        let user_data_dir = PathBuf::from("/tmp/wordchipper/data");
225
226        let env_cache_dir = PathBuf::from("/tmp/wordchipper/env_cache");
227        let env_data_dir = PathBuf::from("/tmp/wordchipper/env_data");
228
229        // No env vars
230        unsafe {
231            env::remove_var(WORDCHIPPER_CACHE_DIR);
232            env::remove_var(WORDCHIPPER_DATA_DIR);
233        }
234
235        let cache = WordchipperDiskCache::new(
236            WordchipperDiskCacheOptions::default()
237                .with_cache_dir(Some(user_cache_dir.clone()))
238                .with_data_dir(Some(user_data_dir.clone())),
239        )
240        .unwrap();
241        assert_eq!(&cache.cache_dir(), &user_cache_dir);
242        assert_eq!(&cache.data_dir(), &user_data_dir);
243
244        let cache = WordchipperDiskCache::new(WordchipperDiskCacheOptions::default()).unwrap();
245        assert_eq!(&cache.cache_dir(), &pds.cache_dir().to_path_buf());
246        assert_eq!(&cache.data_dir(), &pds.data_dir().to_path_buf());
247
248        // With env var.
249        unsafe {
250            env::set_var(WORDCHIPPER_CACHE_DIR, env_cache_dir.to_str().unwrap());
251            env::set_var(WORDCHIPPER_DATA_DIR, env_data_dir.to_str().unwrap());
252        }
253
254        let cache = WordchipperDiskCache::new(
255            WordchipperDiskCacheOptions::default()
256                .with_cache_dir(Some(user_cache_dir.clone()))
257                .with_data_dir(Some(user_data_dir.clone())),
258        )
259        .unwrap();
260        assert_eq!(&cache.cache_dir(), &user_cache_dir);
261        assert_eq!(&cache.data_dir(), &user_data_dir);
262
263        let cache = WordchipperDiskCache::new(WordchipperDiskCacheOptions::default()).unwrap();
264        assert_eq!(&cache.cache_dir(), &env_cache_dir);
265        assert_eq!(&cache.data_dir(), &env_data_dir);
266
267        // restore original env var.
268        match orig_cache_dir {
269            Ok(original) => unsafe { env::set_var(WORDCHIPPER_CACHE_DIR, original) },
270            Err(_) => unsafe { env::remove_var(WORDCHIPPER_CACHE_DIR) },
271        }
272        match orig_data_dir {
273            Ok(original) => unsafe { env::set_var(WORDCHIPPER_DATA_DIR, original) },
274            Err(_) => unsafe { env::remove_var(WORDCHIPPER_DATA_DIR) },
275        }
276    }
277
278    #[test]
279    fn test_data_path() {
280        let cache = WordchipperDiskCache::new(WordchipperDiskCacheOptions::default()).unwrap();
281        let path = cache.data_path(&["prefix"], "file.txt");
282        assert_eq!(path, cache.data_dir.join("prefix").join("file.txt"));
283    }
284
285    #[test]
286    fn test_cache_path() {
287        let cache = WordchipperDiskCache::new(WordchipperDiskCacheOptions::default()).unwrap();
288        let path = cache.cache_path(&["prefix"], "file.txt");
289        assert_eq!(path, cache.cache_dir.join("prefix").join("file.txt"));
290    }
291}