kismet_cache/
plain.rs

1//! A [`crate::plain::Cache`] stores all cached file in a single
2//! directory (there may also be a `.kismet_temp` subdirectory for
3//! temporary files), and periodically scans for evictions with a
4//! second chance strategy.  This implementation does not scale up to
5//! more than a few hundred files per cache directory (a
6//! [`crate::sharded::Cache`] can go higher), but interoperates
7//! seamlessly with other file-based programs that store cache files
8//! in flat directories.
9//!
10//! This module is useful for lower level usage; in most cases, the
11//! [`crate::Cache`] is more convenient and just as efficient.  In
12//! particular, a `crate::plain::Cache` *does not* invoke
13//! [`std::fs::File::sync_all`] or [`std::fs::File::sync_data`]: the
14//! caller should sync files before letting Kismet persist them in a
15//! directory, if necessary.
16//!
17//! The cache's contents will grow past its stated capacity, but
18//! should rarely reach more than twice that capacity.
19use std::borrow::Cow;
20use std::fs::File;
21use std::io::Result;
22use std::path::Path;
23use std::path::PathBuf;
24
25use crate::cache_dir::CacheDir;
26use crate::trigger::PeriodicTrigger;
27use crate::KISMET_TEMPORARY_SUBDIRECTORY as TEMP_SUBDIR;
28
29/// How many times we want to trigger maintenance per "capacity"
30/// inserts.  For example, `MAINTENANCE_SCALE = 3` means we will
31/// expect to trigger maintenance after inserting or updating
32/// ~capacity / 3 files in the cache.
33const MAINTENANCE_SCALE: usize = 3;
34
35/// A "plain" cache is a single directory of files.  Given a capacity
36/// of `k` files, we will trigger a second chance maintance roughly
37/// every `k / 3` (`k / 6` in the long run, given the way
38/// `PeriodicTrigger` is implemented) insertions.
39#[derive(Clone, Debug)]
40pub struct Cache {
41    // The cached files are siblings of this directory for temporary
42    // files.
43    temp_dir: PathBuf,
44
45    // Initialised to trigger a second chance maintenance roughly
46    // every `capacity / MAINTENANCE_SCALE` cache writes.
47    trigger: PeriodicTrigger,
48
49    // The directory has a capacity of roughly this many files;
50    // between maintenance, the actual file count may temporarily
51    // exceed that capacity.
52    capacity: usize,
53}
54
55impl CacheDir for Cache {
56    #[inline]
57    fn temp_dir(&self) -> Cow<Path> {
58        Cow::from(&self.temp_dir)
59    }
60
61    #[inline]
62    fn base_dir(&self) -> Cow<Path> {
63        Cow::from(self.temp_dir.parent().unwrap_or(&self.temp_dir))
64    }
65
66    #[inline]
67    fn trigger(&self) -> &PeriodicTrigger {
68        &self.trigger
69    }
70
71    #[inline]
72    fn capacity(&self) -> usize {
73        self.capacity
74    }
75}
76
77impl Cache {
78    /// Returns a new cache for approximately `capacity` files in
79    /// `base_dir`.
80    pub fn new(base_dir: PathBuf, capacity: usize) -> Cache {
81        let mut temp_dir = base_dir;
82
83        temp_dir.push(TEMP_SUBDIR);
84        Cache {
85            temp_dir,
86            trigger: PeriodicTrigger::new((capacity / MAINTENANCE_SCALE) as u64),
87            capacity,
88        }
89    }
90
91    /// Returns a read-only file for `name` in the cache directory if
92    /// it exists, or None if there is no such file.  Fails with
93    /// `ErrorKind::InvalidInput` if `name` is invalid (empty, or
94    /// starts with a dot or a forward or back slash).
95    ///
96    ///
97    /// Implicitly "touches" the cached file `name` if it exists.
98    pub fn get(&self, name: &str) -> Result<Option<File>> {
99        CacheDir::get(self, name)
100    }
101
102    /// Returns a temporary directory suitable for temporary files
103    /// that will be published to the cache directory.
104    pub fn temp_dir(&self) -> Result<Cow<Path>> {
105        CacheDir::ensure_temp_dir(self)
106    }
107
108    /// Inserts or overwrites the file at `value` as `name` in the
109    /// cache directory.  Fails with `ErrorKind::InvalidInput` if
110    /// `name` is invalid (empty, or starts with a dot or a forward
111    /// or back slash).
112    ///
113    /// Always consumes the file at `value` on success; may consume it
114    /// on error.
115    pub fn set(&self, name: &str, value: &Path) -> Result<()> {
116        CacheDir::set(self, name, value)?;
117        Ok(())
118    }
119
120    /// Inserts the file at `value` as `name` in the cache directory
121    /// if there is no such cached entry already, or touches the
122    /// cached file if it already exists.  Fails with
123    /// `ErrorKind::InvalidInput` if `name` is invalid (empty, or
124    /// starts with a dot or a forward or back slash).
125    ///
126    /// Always consumes the file at `value` on success; may consume it
127    /// on error.
128    pub fn put(&self, name: &str, value: &Path) -> Result<()> {
129        CacheDir::put(self, name, value)?;
130        Ok(())
131    }
132
133    /// Marks the cached file `name` as newly used, if it exists.
134    /// Fails with `ErrorKind::InvalidInput` if `name` is invalid
135    /// (empty, or starts with a dot or a forward or back slash).
136    ///
137    /// Returns whether `name` exists.
138    pub fn touch(&self, name: &str) -> Result<bool> {
139        CacheDir::touch(self, name)
140    }
141}
142
143/// Put 20 files in a 10-file cache.  We should find at least 10, but
144/// fewer than 20, and their contents should match.
145#[test]
146fn smoke_test() {
147    use tempfile::NamedTempFile;
148    use test_dir::{DirBuilder, FileType, TestDir};
149
150    // The payload for file `i` is `PAYLOAD_MULTIPLIER * i`.
151    const PAYLOAD_MULTIPLIER: usize = 13;
152
153    // Also leave a file in the temporary subdirectory; we'll check
154    // that it gets cleaned up before leaving this function..
155    let temp = TestDir::temp()
156        .create(TEMP_SUBDIR, FileType::Dir)
157        .create(&format!("{}/garbage", TEMP_SUBDIR), FileType::ZeroFile(10));
158    // The garbage file must exist.
159    assert!(std::fs::metadata(temp.path(&format!("{}/garbage", TEMP_SUBDIR))).is_ok());
160
161    // Make sure the garbage file is old enough to be deleted.
162    std::thread::sleep(std::time::Duration::from_secs_f64(2.5));
163    let cache = Cache::new(temp.path("."), 10);
164
165    for i in 0..20 {
166        let name = format!("{}", i);
167
168        let tmp = NamedTempFile::new_in(cache.temp_dir().expect("temp_dir must succeed"))
169            .expect("new temp file must succeed");
170        std::fs::write(tmp.path(), format!("{}", PAYLOAD_MULTIPLIER * i))
171            .expect("write must succeed");
172        cache.put(&name, tmp.path()).expect("put must succeed");
173    }
174
175    let present: usize = (0..20)
176        .map(|i| {
177            let name = format!("{}", i);
178            match cache.get(&name).expect("get must succeed") {
179                Some(mut file) => {
180                    use std::io::Read;
181                    let mut buf = Vec::new();
182                    file.read_to_end(&mut buf).expect("read must succeed");
183                    assert_eq!(buf, format!("{}", PAYLOAD_MULTIPLIER * i).into_bytes());
184                    1
185                }
186                None => 0,
187            }
188        })
189        .sum();
190
191    assert!(present >= 10);
192    assert!(present < 20);
193    // The temporary garbage file must have been deleted by now.
194    assert!(
195        matches!(std::fs::metadata(temp.path(&format!("{}/garbage", TEMP_SUBDIR))),
196    Err(e) if e.kind() == std::io::ErrorKind::NotFound)
197    );
198}
199
200/// Publish a file, make sure we can read it, then overwrite, and
201/// confirm that the new contents are visible.
202#[test]
203fn test_set() {
204    use std::io::{Read, Write};
205    use tempfile::NamedTempFile;
206    use test_dir::{DirBuilder, TestDir};
207
208    let temp = TestDir::temp();
209    let cache = Cache::new(temp.path("."), 1);
210
211    {
212        let tmp = NamedTempFile::new_in(cache.temp_dir().expect("temp_dir must succeed"))
213            .expect("new temp file must succeed");
214        tmp.as_file().write_all(b"v1").expect("write must succeed");
215
216        cache
217            .set("entry", tmp.path())
218            .expect("initial set must succeed");
219    }
220
221    {
222        let mut cached = cache
223            .get("entry")
224            .expect("must succeed")
225            .expect("must be found");
226        let mut dst = Vec::new();
227        cached.read_to_end(&mut dst).expect("read must succeed");
228        assert_eq!(&dst, b"v1");
229    }
230
231    // Now overwrite; it should take.
232    {
233        let tmp = NamedTempFile::new_in(cache.temp_dir().expect("temp_dir must succeed"))
234            .expect("new temp file must succeed");
235        tmp.as_file().write_all(b"v2").expect("write must succeed");
236
237        cache
238            .set("entry", tmp.path())
239            .expect("overwrite must succeed");
240    }
241
242    {
243        let mut cached = cache
244            .get("entry")
245            .expect("must succeed")
246            .expect("must be found");
247        let mut dst = Vec::new();
248        cached.read_to_end(&mut dst).expect("read must succeed");
249        assert_eq!(&dst, b"v2");
250    }
251}
252
253/// Publish a file, make sure we can read it, and make sure that a
254/// second put does not update its contents.
255#[test]
256fn test_put() {
257    use std::io::{Read, Write};
258    use tempfile::NamedTempFile;
259    use test_dir::{DirBuilder, TestDir};
260
261    let temp = TestDir::temp();
262    let cache = Cache::new(temp.path("."), 1);
263
264    {
265        let tmp = NamedTempFile::new_in(cache.temp_dir().expect("temp_dir must succeed"))
266            .expect("new temp file must succeed");
267        tmp.as_file().write_all(b"v1").expect("write must succeed");
268
269        cache
270            .put("entry", tmp.path())
271            .expect("initial set must succeed");
272    }
273
274    {
275        let mut cached = cache
276            .get("entry")
277            .expect("must succeed")
278            .expect("must be found");
279        let mut dst = Vec::new();
280        cached.read_to_end(&mut dst).expect("read must succeed");
281        assert_eq!(&dst, b"v1");
282    }
283
284    // Now put again; it shouldn't overwrite.
285    {
286        let tmp = NamedTempFile::new_in(cache.temp_dir().expect("temp_dir must succeed"))
287            .expect("new temp file must succeed");
288        tmp.as_file().write_all(b"v2").expect("write must succeed");
289
290        cache
291            .put("entry", tmp.path())
292            .expect("overwrite must succeed");
293    }
294
295    {
296        let mut cached = cache
297            .get("entry")
298            .expect("must succeed")
299            .expect("must be found");
300        let mut dst = Vec::new();
301        cached.read_to_end(&mut dst).expect("read must succeed");
302        assert_eq!(&dst, b"v1");
303    }
304}
305
306/// Keep publishing new files, but also always touch the first.
307/// That first file should never be deleted.
308#[test]
309fn test_touch() {
310    use tempfile::NamedTempFile;
311    use test_dir::{DirBuilder, TestDir};
312
313    let temp = TestDir::temp();
314    let cache = Cache::new(temp.path("."), 5);
315
316    for i in 0..15 {
317        let name = format!("{}", i);
318
319        // After the first write, touch should find our file.
320        assert_eq!(cache.touch("0").expect("touch must not fail"), i > 0);
321
322        let tmp = NamedTempFile::new_in(cache.temp_dir().expect("temp_dir must succeed"))
323            .expect("new temp file must succeed");
324        cache.put(&name, tmp.path()).expect("put must succeed");
325        // Make sure enough time elapses for the first file to get
326        // an older timestamp than the rest.
327        if i == 0 {
328            std::thread::sleep(std::time::Duration::from_secs_f64(1.5));
329        }
330    }
331
332    // We should still find "0": it's the oldest, but we also keep
333    // touching it.
334    cache.get("0").expect("must succed").expect("must be found");
335}
336
337/// Trigger a cleanup while a very recent file is still in the
338/// temporary subdirectory.  It should remain there.
339#[test]
340fn test_recent_temp_file() {
341    use tempfile::NamedTempFile;
342    use test_dir::{DirBuilder, FileType, TestDir};
343
344    // Also leave a file in the temporary subdirectory; we'll check
345    // that it gets cleaned up before leaving this function..
346    let temp = TestDir::temp()
347        .create(TEMP_SUBDIR, FileType::Dir)
348        .create(&format!("{}/garbage", TEMP_SUBDIR), FileType::ZeroFile(10));
349    // The garbage file must exist.
350    assert!(std::fs::metadata(temp.path(&format!("{}/garbage", TEMP_SUBDIR))).is_ok());
351
352    let cache = Cache::new(temp.path("."), 1);
353
354    for i in 0..2 {
355        let tmp = NamedTempFile::new_in(cache.temp_dir().expect("temp_dir must succeed"))
356            .expect("new temp file must succeed");
357        cache
358            .put(&format!("{}", i), tmp.path())
359            .expect("put must succeed");
360    }
361
362    // The garbage file must still exist.
363    assert!(std::fs::metadata(temp.path(&format!("{}/garbage", TEMP_SUBDIR))).is_ok());
364}