Skip to main content

lru_disk_cache/
lib.rs

1#![deny(rust_2018_idioms)]
2
3#[macro_use]
4extern crate log;
5//extern crate lru_cache;
6pub mod lru_cache;
7
8use std::borrow::Borrow;
9use std::boxed::Box;
10use std::collections::hash_map::RandomState;
11use std::error::Error as StdError;
12use std::ffi::{OsStr, OsString};
13use std::fmt;
14use std::fs::{self, File};
15use std::hash::BuildHasher;
16use std::io;
17use std::io::prelude::*;
18use std::path::{Path, PathBuf};
19
20pub use crate::lru_cache::{LruCache, Meter};
21use filetime::{set_file_times, FileTime};
22use walkdir::WalkDir;
23
24struct FileSize;
25
26/// Given a tuple of (path, filesize), use the filesize for measurement.
27impl<K> Meter<K, u64> for FileSize {
28    type Measure = usize;
29    fn measure<Q: ?Sized>(&self, _: &Q, v: &u64) -> usize
30    where
31        K: Borrow<Q>,
32    {
33        *v as usize
34    }
35}
36
37/// Return an iterator of `(path, size)` of files under `path` sorted by ascending last-modified
38/// time, such that the oldest modified file is returned first.
39fn get_all_files<P: AsRef<Path>>(path: P) -> Box<dyn Iterator<Item = (PathBuf, u64)>> {
40    let mut files: Vec<_> = WalkDir::new(path.as_ref())
41        .into_iter()
42        .filter_map(|e| {
43            e.ok().and_then(|f| {
44                // Only look at files
45                if f.file_type().is_file() {
46                    // Get the last-modified time, size, and the full path.
47                    f.metadata().ok().and_then(|m| {
48                        m.modified()
49                            .ok()
50                            .map(|mtime| (mtime, f.path().to_owned(), m.len()))
51                    })
52                } else {
53                    None
54                }
55            })
56        })
57        .collect();
58    // Sort by last-modified-time, so oldest file first.
59    files.sort_by_key(|k| k.0);
60    Box::new(files.into_iter().map(|(_mtime, path, size)| (path, size)))
61}
62
63/// An LRU cache of files on disk.
64pub struct LruDiskCache<S: BuildHasher = RandomState> {
65    lru: LruCache<OsString, u64, S, FileSize>,
66    root: PathBuf,
67}
68
69/// Errors returned by this crate.
70#[derive(Debug)]
71pub enum Error {
72    /// The file was too large to fit in the cache.
73    FileTooLarge,
74    /// The file was not in the cache.
75    FileNotInCache,
76    /// An IO Error occurred.
77    Io(io::Error),
78}
79
80impl fmt::Display for Error {
81    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
82        match self {
83            Error::FileTooLarge => write!(f, "File too large"),
84            Error::FileNotInCache => write!(f, "File not in cache"),
85            Error::Io(ref e) => write!(f, "{}", e),
86        }
87    }
88}
89
90impl StdError for Error {
91    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
92        match self {
93            Error::FileTooLarge => None,
94            Error::FileNotInCache => None,
95            Error::Io(ref e) => Some(e),
96        }
97    }
98}
99
100impl From<io::Error> for Error {
101    fn from(e: io::Error) -> Error {
102        Error::Io(e)
103    }
104}
105
106/// A convenience `Result` type
107pub type Result<T> = std::result::Result<T, Error>;
108
109/// Trait objects can't be bounded by more than one non-builtin trait.
110pub trait ReadSeek: Read + Seek + Send {}
111
112impl<T: Read + Seek + Send> ReadSeek for T {}
113
114enum AddFile<'a> {
115    AbsPath(PathBuf),
116    RelPath(&'a OsStr),
117}
118
119impl LruDiskCache {
120    /// Create an `LruDiskCache` that stores files in `path`, limited to `size` bytes.
121    ///
122    /// Existing files in `path` will be stored with their last-modified time from the filesystem
123    /// used as the order for the recency of their use. Any files that are individually larger
124    /// than `size` bytes will be removed.
125    ///
126    /// The cache is not observant of changes to files under `path` from external sources, it
127    /// expects to have sole maintence of the contents.
128    pub fn new<T>(path: T, size: u64) -> Result<Self>
129    where
130        PathBuf: From<T>,
131    {
132        LruDiskCache {
133            lru: LruCache::with_meter(size, FileSize),
134            root: PathBuf::from(path),
135        }
136        .init()
137    }
138
139    /// Return the current size of all the files in the cache.
140    pub fn size(&self) -> u64 {
141        self.lru.size()
142    }
143
144    /// Return the count of entries in the cache.
145    pub fn len(&self) -> usize {
146        self.lru.len()
147    }
148
149    pub fn is_empty(&self) -> bool {
150        self.lru.len() == 0
151    }
152
153    /// Return the maximum size of the cache.
154    pub fn capacity(&self) -> u64 {
155        self.lru.capacity()
156    }
157
158    /// Return the path in which the cache is stored.
159    pub fn path(&self) -> &Path {
160        self.root.as_path()
161    }
162
163    /// Return the path that `key` would be stored at.
164    fn rel_to_abs_path<K: AsRef<Path>>(&self, rel_path: K) -> PathBuf {
165        self.root.join(rel_path)
166    }
167
168    /// Scan `self.root` for existing files and store them.
169    fn init(mut self) -> Result<Self> {
170        fs::create_dir_all(&self.root)?;
171        for (file, size) in get_all_files(&self.root) {
172            if !self.can_store(size) {
173                fs::remove_file(file).unwrap_or_else(|e| {
174                    error!(
175                        "Error removing file `{}` which is too large for the cache ({} bytes)",
176                        e, size
177                    )
178                });
179            } else {
180                self.add_file(AddFile::AbsPath(file), size)
181                    .unwrap_or_else(|e| error!("Error adding file: {}", e));
182            }
183        }
184        Ok(self)
185    }
186
187    /// Returns `true` if the disk cache can store a file of `size` bytes.
188    pub fn can_store(&self, size: u64) -> bool {
189        size <= self.lru.capacity() as u64
190    }
191
192    /// Add the file at `path` of size `size` to the cache.
193    fn add_file(&mut self, addfile_path: AddFile<'_>, size: u64) -> Result<()> {
194        if !self.can_store(size) {
195            return Err(Error::FileTooLarge);
196        }
197        let rel_path = match addfile_path {
198            AddFile::AbsPath(ref p) => p.strip_prefix(&self.root).expect("Bad path?").as_os_str(),
199            AddFile::RelPath(p) => p,
200        };
201        //TODO: ideally LRUCache::insert would give us back the entries it had to remove.
202        while self.lru.size() as u64 + size > self.lru.capacity() as u64 {
203            let (rel_path, _) = self.lru.remove_lru().expect("Unexpectedly empty cache!");
204            let remove_path = self.rel_to_abs_path(rel_path);
205            //TODO: check that files are removable during `init`, so that this is only
206            // due to outside interference.
207            fs::remove_file(&remove_path).unwrap_or_else(|e| {
208                panic!("Error removing file from cache: `{:?}`: {}", remove_path, e)
209            });
210        }
211        self.lru.insert(rel_path.to_owned(), size);
212        Ok(())
213    }
214
215    fn insert_by<K: AsRef<OsStr>, F: FnOnce(&Path) -> io::Result<()>>(
216        &mut self,
217        key: K,
218        size: Option<u64>,
219        by: F,
220    ) -> Result<()> {
221        if let Some(size) = size {
222            if !self.can_store(size) {
223                return Err(Error::FileTooLarge);
224            }
225        }
226        let rel_path = key.as_ref();
227        let path = self.rel_to_abs_path(rel_path);
228        fs::create_dir_all(path.parent().expect("Bad path?"))?;
229        by(&path)?;
230        let size = match size {
231            Some(size) => size,
232            None => fs::metadata(path)?.len(),
233        };
234        self.add_file(AddFile::RelPath(rel_path), size)
235            .map_err(|e| {
236                error!(
237                    "Failed to insert file `{}`: {}",
238                    rel_path.to_string_lossy(),
239                    e
240                );
241                fs::remove_file(&self.rel_to_abs_path(rel_path))
242                    .expect("Failed to remove file we just created!");
243                e
244            })
245    }
246
247    /// Add a file by calling `with` with the open `File` corresponding to the cache at path `key`.
248    pub fn insert_with<K: AsRef<OsStr>, F: FnOnce(File) -> io::Result<()>>(
249        &mut self,
250        key: K,
251        with: F,
252    ) -> Result<()> {
253        self.insert_by(key, None, |path| with(File::create(&path)?))
254    }
255
256    /// Add a file with `bytes` as its contents to the cache at path `key`.
257    pub fn insert_bytes<K: AsRef<OsStr>>(&mut self, key: K, bytes: &[u8]) -> Result<()> {
258        self.insert_by(key, Some(bytes.len() as u64), |path| {
259            let mut f = File::create(&path)?;
260            f.write_all(bytes)?;
261            Ok(())
262        })
263    }
264
265    /// Add an existing file at `path` to the cache at path `key`.
266    pub fn insert_file<K: AsRef<OsStr>, P: AsRef<OsStr>>(&mut self, key: K, path: P) -> Result<()> {
267        let size = fs::metadata(path.as_ref())?.len();
268        self.insert_by(key, Some(size), |new_path| {
269            fs::rename(path.as_ref(), new_path).or_else(|_| {
270                warn!("fs::rename failed, falling back to copy!");
271                fs::copy(path.as_ref(), new_path)?;
272                fs::remove_file(path.as_ref()).unwrap_or_else(|e| {
273                    error!("Failed to remove original file in insert_file: {}", e)
274                });
275                Ok(())
276            })
277        })
278    }
279
280    /// Return `true` if a file with path `key` is in the cache.
281    pub fn contains_key<K: AsRef<OsStr>>(&self, key: K) -> bool {
282        self.lru.contains_key(key.as_ref())
283    }
284
285    /// Get an opened `File` for `key`, if one exists and can be opened. Updates the LRU state
286    /// of the file if present. Avoid using this method if at all possible, prefer `.get`.
287    pub fn get_file<K: AsRef<OsStr>>(&mut self, key: K) -> Result<File> {
288        let rel_path = key.as_ref();
289        let path = self.rel_to_abs_path(rel_path);
290        self.lru
291            .get(rel_path)
292            .ok_or(Error::FileNotInCache)
293            .and_then(|_| {
294                let t = FileTime::now();
295                set_file_times(&path, t, t)?;
296                File::open(path).map_err(Into::into)
297            })
298    }
299
300    /// Get an opened readable and seekable handle to the file at `key`, if one exists and can
301    /// be opened. Updates the LRU state of the file if present.
302    pub fn get<K: AsRef<OsStr>>(&mut self, key: K) -> Result<Box<dyn ReadSeek>> {
303        self.get_file(key).map(|f| Box::new(f) as Box<dyn ReadSeek>)
304    }
305
306    /// Remove the given key from the cache.
307    pub fn remove<K: AsRef<OsStr>>(&mut self, key: K) -> Result<()> {
308        match self.lru.remove(key.as_ref()) {
309            Some(_) => {
310                let path = self.rel_to_abs_path(key.as_ref());
311                fs::remove_file(&path).map_err(|e| {
312                    error!("Error removing file from cache: `{:?}`: {}", path, e);
313                    Into::into(e)
314                })
315            }
316            None => Ok(()),
317        }
318    }
319}
320
321#[cfg(test)]
322mod tests {
323    use super::{Error, LruDiskCache};
324
325    use filetime::{set_file_times, FileTime};
326    use std::fs::{self, File};
327    use std::io::{self, Read, Write};
328    use std::path::{Path, PathBuf};
329    use tempfile::TempDir;
330
331    struct TestFixture {
332        /// Temp directory.
333        pub tempdir: TempDir,
334    }
335
336    fn create_file<T: AsRef<Path>, F: FnOnce(File) -> io::Result<()>>(
337        dir: &Path,
338        path: T,
339        fill_contents: F,
340    ) -> io::Result<PathBuf> {
341        let b = dir.join(path);
342        fs::create_dir_all(b.parent().unwrap())?;
343        let f = fs::File::create(&b)?;
344        fill_contents(f)?;
345        b.canonicalize()
346    }
347
348    /// Set the last modified time of `path` backwards by `seconds` seconds.
349    fn set_mtime_back<T: AsRef<Path>>(path: T, seconds: usize) {
350        let m = fs::metadata(path.as_ref()).unwrap();
351        let t = FileTime::from_last_modification_time(&m);
352        let t = FileTime::from_unix_time(t.unix_seconds() - seconds as i64, t.nanoseconds());
353        set_file_times(path, t, t).unwrap();
354    }
355
356    fn read_all<R: Read>(r: &mut R) -> io::Result<Vec<u8>> {
357        let mut v = vec![];
358        r.read_to_end(&mut v)?;
359        Ok(v)
360    }
361
362    impl TestFixture {
363        pub fn new() -> TestFixture {
364            TestFixture {
365                tempdir: tempfile::Builder::new()
366                    .prefix("lru-disk-cache-test")
367                    .tempdir()
368                    .unwrap(),
369            }
370        }
371
372        pub fn tmp(&self) -> &Path {
373            self.tempdir.path()
374        }
375
376        pub fn create_file<T: AsRef<Path>>(&self, path: T, size: usize) -> PathBuf {
377            create_file(self.tempdir.path(), path, |mut f| {
378                f.write_all(&vec![0; size])
379            })
380            .unwrap()
381        }
382    }
383
384    #[test]
385    fn test_empty_dir() {
386        let f = TestFixture::new();
387        LruDiskCache::new(f.tmp(), 1024).unwrap();
388    }
389
390    #[test]
391    fn test_missing_root() {
392        let f = TestFixture::new();
393        LruDiskCache::new(f.tmp().join("not-here"), 1024).unwrap();
394    }
395
396    #[test]
397    fn test_some_existing_files() {
398        let f = TestFixture::new();
399        f.create_file("file1", 10);
400        f.create_file("file2", 10);
401        let c = LruDiskCache::new(f.tmp(), 20).unwrap();
402        assert_eq!(c.size(), 20);
403        assert_eq!(c.len(), 2);
404    }
405
406    #[test]
407    fn test_existing_file_too_large() {
408        let f = TestFixture::new();
409        // Create files explicitly in the past.
410        set_mtime_back(f.create_file("file1", 10), 10);
411        set_mtime_back(f.create_file("file2", 10), 5);
412        let c = LruDiskCache::new(f.tmp(), 15).unwrap();
413        assert_eq!(c.size(), 10);
414        assert_eq!(c.len(), 1);
415        assert!(!c.contains_key("file1"));
416        assert!(c.contains_key("file2"));
417    }
418
419    #[test]
420    fn test_existing_files_lru_mtime() {
421        let f = TestFixture::new();
422        // Create files explicitly in the past.
423        set_mtime_back(f.create_file("file1", 10), 5);
424        set_mtime_back(f.create_file("file2", 10), 10);
425        let mut c = LruDiskCache::new(f.tmp(), 25).unwrap();
426        assert_eq!(c.size(), 20);
427        c.insert_bytes("file3", &vec![0; 10]).unwrap();
428        assert_eq!(c.size(), 20);
429        // The oldest file on disk should have been removed.
430        assert!(!c.contains_key("file2"));
431        assert!(c.contains_key("file1"));
432    }
433
434    #[test]
435    fn test_insert_bytes() {
436        let f = TestFixture::new();
437        let mut c = LruDiskCache::new(f.tmp(), 25).unwrap();
438        c.insert_bytes("a/b/c", &vec![0; 10]).unwrap();
439        assert!(c.contains_key("a/b/c"));
440        c.insert_bytes("a/b/d", &vec![0; 10]).unwrap();
441        assert_eq!(c.size(), 20);
442        // Adding this third file should put the cache above the limit.
443        c.insert_bytes("x/y/z", &vec![0; 10]).unwrap();
444        assert_eq!(c.size(), 20);
445        // The least-recently-used file should have been removed.
446        assert!(!c.contains_key("a/b/c"));
447        assert!(!f.tmp().join("a/b/c").exists());
448    }
449
450    #[test]
451    fn test_insert_bytes_exact() {
452        // Test that files adding up to exactly the size limit works.
453        let f = TestFixture::new();
454        let mut c = LruDiskCache::new(f.tmp(), 20).unwrap();
455        c.insert_bytes("file1", &vec![1; 10]).unwrap();
456        c.insert_bytes("file2", &vec![2; 10]).unwrap();
457        assert_eq!(c.size(), 20);
458        c.insert_bytes("file3", &vec![3; 10]).unwrap();
459        assert_eq!(c.size(), 20);
460        assert!(!c.contains_key("file1"));
461    }
462
463    #[test]
464    fn test_add_get_lru() {
465        let f = TestFixture::new();
466        {
467            let mut c = LruDiskCache::new(f.tmp(), 25).unwrap();
468            c.insert_bytes("file1", &vec![1; 10]).unwrap();
469            c.insert_bytes("file2", &vec![2; 10]).unwrap();
470            // Get the file to bump its LRU status.
471            assert_eq!(
472                read_all(&mut c.get("file1").unwrap()).unwrap(),
473                vec![1u8; 10]
474            );
475            // Adding this third file should put the cache above the limit.
476            c.insert_bytes("file3", &vec![3; 10]).unwrap();
477            assert_eq!(c.size(), 20);
478            // The least-recently-used file should have been removed.
479            assert!(!c.contains_key("file2"));
480        }
481        // Get rid of the cache, to test that the LRU persists on-disk as mtimes.
482        // This is hacky, but mtime resolution on my mac with HFS+ is only 1 second, so we either
483        // need to have a 1 second sleep in the test (boo) or adjust the mtimes back a bit so
484        // that updating one file to the current time actually works to make it newer.
485        set_mtime_back(f.tmp().join("file1"), 5);
486        set_mtime_back(f.tmp().join("file3"), 5);
487        {
488            let mut c = LruDiskCache::new(f.tmp(), 25).unwrap();
489            // Bump file1 again.
490            c.get("file1").unwrap();
491        }
492        // Now check that the on-disk mtimes were updated and used.
493        {
494            let mut c = LruDiskCache::new(f.tmp(), 25).unwrap();
495            assert!(c.contains_key("file1"));
496            assert!(c.contains_key("file3"));
497            assert_eq!(c.size(), 20);
498            // Add another file to bump out the least-recently-used.
499            c.insert_bytes("file4", &vec![4; 10]).unwrap();
500            assert_eq!(c.size(), 20);
501            assert!(!c.contains_key("file3"));
502            assert!(c.contains_key("file1"));
503        }
504    }
505
506    #[test]
507    fn test_insert_bytes_too_large() {
508        let f = TestFixture::new();
509        let mut c = LruDiskCache::new(f.tmp(), 1).unwrap();
510        match c.insert_bytes("a/b/c", &vec![0; 2]) {
511            Err(Error::FileTooLarge) => assert!(true),
512            x @ _ => panic!("Unexpected result: {:?}", x),
513        }
514    }
515
516    #[test]
517    fn test_insert_file() {
518        let f = TestFixture::new();
519        let p1 = f.create_file("file1", 10);
520        let p2 = f.create_file("file2", 10);
521        let p3 = f.create_file("file3", 10);
522        let mut c = LruDiskCache::new(f.tmp().join("cache"), 25).unwrap();
523        c.insert_file("file1", &p1).unwrap();
524        assert_eq!(c.len(), 1);
525        c.insert_file("file2", &p2).unwrap();
526        assert_eq!(c.len(), 2);
527        // Get the file to bump its LRU status.
528        assert_eq!(
529            read_all(&mut c.get("file1").unwrap()).unwrap(),
530            vec![0u8; 10]
531        );
532        // Adding this third file should put the cache above the limit.
533        c.insert_file("file3", &p3).unwrap();
534        assert_eq!(c.len(), 2);
535        assert_eq!(c.size(), 20);
536        // The least-recently-used file should have been removed.
537        assert!(!c.contains_key("file2"));
538        assert!(!p1.exists());
539        assert!(!p2.exists());
540        assert!(!p3.exists());
541    }
542
543    #[test]
544    fn test_remove() {
545        let f = TestFixture::new();
546        let p1 = f.create_file("file1", 10);
547        let p2 = f.create_file("file2", 10);
548        let p3 = f.create_file("file3", 10);
549        let mut c = LruDiskCache::new(f.tmp().join("cache"), 25).unwrap();
550        c.insert_file("file1", &p1).unwrap();
551        c.insert_file("file2", &p2).unwrap();
552        c.remove("file1").unwrap();
553        c.insert_file("file3", &p3).unwrap();
554        assert_eq!(c.len(), 2);
555        assert_eq!(c.size(), 20);
556
557        // file1 should have been removed.
558        assert!(!c.contains_key("file1"));
559        assert!(!f.tmp().join("cache").join("file1").exists());
560        assert!(f.tmp().join("cache").join("file2").exists());
561        assert!(f.tmp().join("cache").join("file3").exists());
562        assert!(!p1.exists());
563        assert!(!p2.exists());
564        assert!(!p3.exists());
565
566        let p4 = f.create_file("file1", 10);
567        c.insert_file("file1", &p4).unwrap();
568        assert_eq!(c.len(), 2);
569        // file2 should have been removed.
570        assert!(c.contains_key("file1"));
571        assert!(!c.contains_key("file2"));
572        assert!(!f.tmp().join("cache").join("file2").exists());
573        assert!(!p4.exists());
574    }
575}