kismet_cache/
readonly.rs

1//! A `ReadOnlyCache` wraps an arbitrary number of caches, and
2//! attempts to satisfy `get` and `touch` requests by hitting each
3//! cache in order.  For read-only usage, this should be a simple
4//! and easy-to-use interface that erases the difference between plain
5//! and sharded caches.
6use std::fs::File;
7#[allow(unused_imports)] // We refer to this enum in comments.
8use std::io::ErrorKind;
9use std::io::Result;
10use std::path::Path;
11use std::sync::Arc;
12
13use derivative::Derivative;
14
15use crate::plain::Cache as PlainCache;
16use crate::sharded::Cache as ShardedCache;
17use crate::Key;
18
19/// A `ConsistencyChecker` function compares cached values for the
20/// same key and returns `Err` when the values are incompatible.
21type ConsistencyChecker = Arc<
22    dyn Fn(&mut File, &mut File) -> Result<()>
23        + Sync
24        + Send
25        + std::panic::RefUnwindSafe
26        + std::panic::UnwindSafe,
27>;
28
29/// The `ReadSide` trait offers `get` and `touch`, as implemented by
30/// both plain and sharded caches.
31trait ReadSide:
32    std::fmt::Debug + Sync + Send + std::panic::RefUnwindSafe + std::panic::UnwindSafe
33{
34    /// Returns a read-only file for `key` in the cache directory if
35    /// it exists, or None if there is no such file.
36    ///
37    /// Implicitly "touches" the cached file if it exists.
38    fn get(&self, key: Key) -> Result<Option<File>>;
39
40    /// Marks the cached file `key` as newly used, if it exists.
41    ///
42    /// Returns whether a file for `key` exists in the cache.
43    fn touch(&self, key: Key) -> Result<bool>;
44}
45
46impl ReadSide for PlainCache {
47    fn get(&self, key: Key) -> Result<Option<File>> {
48        PlainCache::get(self, key.name)
49    }
50
51    fn touch(&self, key: Key) -> Result<bool> {
52        PlainCache::touch(self, key.name)
53    }
54}
55
56impl ReadSide for ShardedCache {
57    fn get(&self, key: Key) -> Result<Option<File>> {
58        ShardedCache::get(self, key)
59    }
60
61    fn touch(&self, key: Key) -> Result<bool> {
62        ShardedCache::touch(self, key)
63    }
64}
65
66/// Construct a [`ReadOnlyCache`] with this builder.  The resulting
67/// cache will access each constituent cache directory in the order
68/// they were added.
69///
70/// The default builder is a fresh builder with no constituent cache
71/// and no consistency check function.
72#[derive(Default, Derivative)]
73#[derivative(Debug)]
74pub struct ReadOnlyCacheBuilder {
75    stack: Vec<Box<dyn ReadSide>>,
76
77    #[derivative(Debug = "ignore")]
78    consistency_checker: Option<ConsistencyChecker>,
79}
80
81/// A [`ReadOnlyCache`] wraps an arbitrary number of
82/// [`crate::plain::Cache`] and [`crate::sharded::Cache`], and attempts
83/// to satisfy [`ReadOnlyCache::get`] and [`ReadOnlyCache::touch`]
84/// requests by hitting each constituent cache in order.  This
85/// interface hides the difference between plain and sharded cache
86/// directories, and should be the first resort for read-only uses.
87///
88/// The default cache wraps an empty set of constituent caches and
89/// performs no consistency check.
90///
91/// [`ReadOnlyCache`] objects are stateless and cheap to clone; don't
92/// put an [`Arc`] on them.  Avoid creating multiple
93/// [`ReadOnlyCache`]s for the same stack of directories: there is no
94/// internal state to maintain, so multiple instances simply waste
95/// memory without any benefit.
96#[derive(Clone, Derivative)]
97#[derivative(Debug)]
98pub struct ReadOnlyCache {
99    stack: Arc<[Box<dyn ReadSide>]>,
100
101    /// When populated, the `ReadOnlyCache` keeps searching after the
102    /// first cache hit, and compares subsequent hits with the first one
103    /// by calling the `consistency_checker` function.  That function
104    /// should return `Ok(())` if the two files are compatible (identical),
105    /// and `Err` otherwise.
106    #[derivative(Debug = "ignore")]
107    consistency_checker: Option<ConsistencyChecker>,
108}
109
110impl ReadOnlyCacheBuilder {
111    /// Returns a fresh empty builder.
112    pub fn new() -> Self {
113        Self::default()
114    }
115
116    /// Sets the consistency checker function: when the function is
117    /// provided, the `ReadOnlyCache` will keep searching after the
118    /// first cache hit, and compare subsequent hits with the first
119    /// one by calling `checker`.  The `checker` function should
120    /// return `Ok(())` if the two files are compatible (identical),
121    /// and `Err` otherwise.
122    ///
123    /// Kismet will propagate the error on mismatch.
124    pub fn consistency_checker(
125        &mut self,
126        checker: impl Fn(&mut File, &mut File) -> Result<()>
127            + Sync
128            + Send
129            + std::panic::RefUnwindSafe
130            + std::panic::UnwindSafe
131            + Sized
132            + 'static,
133    ) -> &mut Self {
134        self.arc_consistency_checker(Some(Arc::new(checker)))
135    }
136
137    /// Sets the consistency checker function to
138    /// [`crate::byte_equality_checker`]: the contents of all cache
139    /// hits must be bytewise identical, without considering any
140    /// metadata.
141    pub fn byte_equality_checker(&mut self) -> &mut Self {
142        self.consistency_checker(crate::byte_equality_checker)
143    }
144
145    /// Sets the consistency checker function to
146    /// [`crate::panicking_byte_equality_checker`]: the contents of
147    /// all cache hits must be bytewise identical, without considering
148    /// any metadata, and the call will panic on mismatch.
149    pub fn panicking_byte_equality_checker(&mut self) -> &mut Self {
150        self.consistency_checker(crate::panicking_byte_equality_checker)
151    }
152
153    /// Removes the consistency checker function, if any.
154    pub fn clear_consistency_checker(&mut self) -> &mut Self {
155        self.arc_consistency_checker(None)
156    }
157
158    /// Sets the consistency checker function.  `None` clears the
159    /// checker function.  See
160    /// [`ReadOnlyCacheBuilder::consistency_checker`].
161    #[allow(clippy::type_complexity)] // We want the public type to be transparent
162    pub fn arc_consistency_checker(
163        &mut self,
164        checker: Option<
165            Arc<
166                dyn Fn(&mut File, &mut File) -> Result<()>
167                    + Sync
168                    + Send
169                    + std::panic::RefUnwindSafe
170                    + std::panic::UnwindSafe,
171            >,
172        >,
173    ) -> &mut Self {
174        self.consistency_checker = checker;
175        self
176    }
177
178    /// Adds a new cache directory at `path` to the end of the cache
179    /// builder's search list.
180    ///
181    /// Adds a plain cache directory if `num_shards <= 1`, and an
182    /// actual sharded directory otherwise.
183    pub fn cache(&mut self, path: impl AsRef<Path>, num_shards: usize) -> &mut Self {
184        if num_shards <= 1 {
185            self.plain(path)
186        } else {
187            self.sharded(path, num_shards)
188        }
189    }
190
191    /// Adds a new plain cache directory at `path` to the end of the
192    /// cache builder's search list.  A plain cache directory is
193    /// merely a directory of files where the files' names match their
194    /// key's name.
195    pub fn plain(&mut self, path: impl AsRef<Path>) -> &mut Self {
196        self.stack.push(Box::new(PlainCache::new(
197            path.as_ref().to_owned(),
198            usize::MAX,
199        )));
200
201        self
202    }
203
204    /// Adds a new plain cache directory for each path in `paths`.
205    /// The caches are appended in order to the end of the cache
206    /// builder's search list.
207    pub fn plain_caches<P>(&mut self, paths: impl IntoIterator<Item = P>) -> &mut Self
208    where
209        P: AsRef<Path>,
210    {
211        for path in paths {
212            self.plain(path);
213        }
214
215        self
216    }
217
218    /// Adds a new sharded cache directory at `path` to the end of the
219    /// cache builder's search list.
220    pub fn sharded(&mut self, path: impl AsRef<Path>, num_shards: usize) -> &mut Self {
221        self.stack.push(Box::new(ShardedCache::new(
222            path.as_ref().to_owned(),
223            num_shards,
224            usize::MAX,
225        )));
226        self
227    }
228
229    /// Returns the contents of `self` as a fresh value; `self` is
230    /// reset to the default empty builder state.  This makes it
231    /// possible to declare simple configurations in a single
232    /// expression, with `.take().build()`.
233    pub fn take(&mut self) -> Self {
234        std::mem::take(self)
235    }
236
237    /// Returns a fresh [`ReadOnlyCache`] for the builder's search list
238    /// of constituent cache directories.
239    pub fn build(self) -> ReadOnlyCache {
240        ReadOnlyCache::new(self.stack, self.consistency_checker)
241    }
242}
243
244impl Default for ReadOnlyCache {
245    fn default() -> ReadOnlyCache {
246        ReadOnlyCache::new(Default::default(), None)
247    }
248}
249
250impl ReadOnlyCache {
251    fn new(
252        stack: Vec<Box<dyn ReadSide>>,
253        consistency_checker: Option<ConsistencyChecker>,
254    ) -> ReadOnlyCache {
255        ReadOnlyCache {
256            stack: stack.into_boxed_slice().into(),
257            consistency_checker,
258        }
259    }
260
261    /// Attempts to open a read-only file for `key`.  The
262    /// [`ReadOnlyCache`] will query each constituent cache in order
263    /// of registration, and return a read-only file for the first
264    /// hit.
265    ///
266    /// Fails with [`ErrorKind::InvalidInput`] if `key.name` is
267    /// invalid (empty, or starts with a dot or a forward or back slash).
268    ///
269    /// Returns [`None`] if no file for `key` can be found in any of
270    /// the constituent caches, and bubbles up the first I/O error
271    /// encountered, if any.
272    ///
273    /// In the worst case, each call to `get` attempts to open two
274    /// files for each cache directory in the `ReadOnlyCache` stack.
275    pub fn get<'a>(&self, key: impl Into<Key<'a>>) -> Result<Option<File>> {
276        fn doit(
277            stack: &[Box<dyn ReadSide>],
278            checker: &Option<ConsistencyChecker>,
279            key: Key,
280        ) -> Result<Option<File>> {
281            use std::io::Seek;
282            use std::io::SeekFrom;
283
284            let mut ret = None;
285            for cache in stack.iter() {
286                let mut hit = match cache.get(key)? {
287                    Some(hit) => hit,
288                    None => continue,
289                };
290
291                match checker {
292                    None => return Ok(Some(hit)),
293                    Some(checker) => match ret.as_mut() {
294                        None => ret = Some(hit),
295                        Some(prev) => {
296                            checker(prev, &mut hit)?;
297                            prev.seek(SeekFrom::Start(0))?;
298                        }
299                    },
300                }
301            }
302
303            Ok(ret)
304        }
305
306        if self.stack.is_empty() {
307            return Ok(None);
308        }
309
310        doit(&self.stack, &self.consistency_checker, key.into())
311    }
312
313    /// Marks a cache entry for `key` as accessed (read).  The
314    /// [`ReadOnlyCache`] will touch the same file that would be
315    /// returned by `get`.
316    ///
317    /// Fails with [`ErrorKind::InvalidInput`] if `key.name` is
318    /// invalid (empty, or starts with a dot or a forward or back slash).
319    ///
320    /// Returns whether a file for `key` could be found, and bubbles
321    /// up the first I/O error encountered, if any.
322    ///
323    /// In the worst case, each call to `touch` attempts to update the
324    /// access time on two files for each cache directory in the
325    /// `ReadOnlyCache` stack.
326    pub fn touch<'a>(&self, key: impl Into<Key<'a>>) -> Result<bool> {
327        fn doit(stack: &[Box<dyn ReadSide>], key: Key) -> Result<bool> {
328            for cache in stack.iter() {
329                if cache.touch(key)? {
330                    return Ok(true);
331                }
332            }
333
334            Ok(false)
335        }
336
337        if self.stack.is_empty() {
338            return Ok(false);
339        }
340
341        doit(&self.stack, key.into())
342    }
343}
344
345#[cfg(test)]
346mod test {
347    use std::fs::File;
348    use std::sync::atomic::AtomicU64;
349    use std::sync::atomic::Ordering;
350    use std::sync::Arc;
351
352    use crate::plain::Cache as PlainCache;
353    use crate::sharded::Cache as ShardedCache;
354    use crate::Key;
355    use crate::ReadOnlyCache;
356    use crate::ReadOnlyCacheBuilder;
357
358    struct TestKey {
359        key: String,
360    }
361
362    impl TestKey {
363        fn new(key: &str) -> TestKey {
364            TestKey {
365                key: key.to_string(),
366            }
367        }
368    }
369
370    impl<'a> From<&'a TestKey> for Key<'a> {
371        fn from(x: &'a TestKey) -> Key<'a> {
372            Key::new(&x.key, 0, 1)
373        }
374    }
375
376    fn byte_equality_checker(
377        counter: Arc<AtomicU64>,
378    ) -> impl 'static + Fn(&mut File, &mut File) -> std::io::Result<()> {
379        move |x: &mut File, y: &mut File| {
380            counter.fetch_add(1, Ordering::Relaxed);
381            crate::byte_equality_checker(x, y)
382        }
383    }
384
385    /// A stack of 0 caches should always succeed with a trivial result.
386    #[test]
387    fn empty() {
388        let ro: ReadOnlyCache = Default::default();
389
390        assert!(matches!(ro.get(Key::new("foo", 1, 2)), Ok(None)));
391        assert!(matches!(ro.touch(Key::new("foo", 1, 2)), Ok(false)));
392    }
393
394    /// Populate two plain caches and set a consistency checker.  We
395    /// should access both.
396    #[test]
397    fn consistency_checker_success() {
398        use std::io::Read;
399        use test_dir::{DirBuilder, FileType, TestDir};
400
401        let temp = TestDir::temp()
402            .create("first", FileType::Dir)
403            .create("second", FileType::Dir)
404            .create("first/0", FileType::ZeroFile(2))
405            .create("second/0", FileType::ZeroFile(2))
406            .create("first/1", FileType::RandomFile(10))
407            .create("second/2", FileType::RandomFile(10));
408
409        let counter = Arc::new(AtomicU64::new(0));
410
411        let ro = ReadOnlyCacheBuilder::new()
412            .plain(temp.path("first"))
413            .plain(temp.path("second"))
414            .consistency_checker(byte_equality_checker(counter.clone()))
415            .take()
416            .build();
417
418        let mut hit = ro
419            .get(&TestKey::new("0"))
420            .expect("must succeed")
421            .expect("must exist");
422
423        assert_eq!(counter.load(Ordering::Relaxed), 1);
424
425        let mut contents = Vec::new();
426        hit.read_to_end(&mut contents).expect("read should succeed");
427        assert_eq!(contents, "00".as_bytes());
428
429        let _ = ro
430            .get(&TestKey::new("1"))
431            .expect("must succeed")
432            .expect("must exist");
433        // Only found in one subcache, there's nothing to check.
434        assert_eq!(counter.load(Ordering::Relaxed), 1);
435
436        let _ = ro
437            .get(&TestKey::new("2"))
438            .expect("must succeed")
439            .expect("must exist");
440        // Only found in one subcache, there's nothing to check.
441        assert_eq!(counter.load(Ordering::Relaxed), 1);
442    }
443
444    /// Populate two plain caches and set a consistency checker.  We
445    /// should error on mismatch.
446    #[test]
447    fn consistency_checker_failure() {
448        use test_dir::{DirBuilder, FileType, TestDir};
449
450        let temp = TestDir::temp()
451            .create("first", FileType::Dir)
452            .create("second", FileType::Dir)
453            .create("first/0", FileType::ZeroFile(2))
454            .create("second/0", FileType::ZeroFile(3));
455
456        let counter = Arc::new(AtomicU64::new(0));
457        let ro = ReadOnlyCacheBuilder::new()
458            .plain(temp.path("first"))
459            .plain(temp.path("second"))
460            .consistency_checker(byte_equality_checker(counter))
461            .take()
462            .build();
463
464        // This call should error.
465        assert!(ro.get(&TestKey::new("0")).is_err());
466    }
467
468    /// Populate two plain caches and unset the consistency checker.  We
469    /// should not error.
470    #[test]
471    fn consistency_checker_silent_failure() {
472        use test_dir::{DirBuilder, FileType, TestDir};
473
474        let temp = TestDir::temp()
475            .create("first", FileType::Dir)
476            .create("second", FileType::Dir)
477            .create("first/0", FileType::ZeroFile(2))
478            .create("second/0", FileType::ZeroFile(3));
479
480        let counter = Arc::new(AtomicU64::new(0));
481
482        let ro = ReadOnlyCacheBuilder::new()
483            .plain(temp.path("first"))
484            .plain(temp.path("second"))
485            .consistency_checker(byte_equality_checker(counter.clone()))
486            .clear_consistency_checker()
487            .take()
488            .build();
489
490        // This call should not error.
491        let _ = ro
492            .get(&TestKey::new("0"))
493            .expect("must succeed")
494            .expect("must exist");
495
496        // There should be no call to the checker function.
497        assert_eq!(counter.load(Ordering::Relaxed), 0);
498    }
499
500    /// Populate two plain caches.  We should read from both.
501    #[test]
502    fn two_plain_caches() {
503        use test_dir::{DirBuilder, FileType, TestDir};
504
505        let temp = TestDir::temp()
506            .create("first", FileType::Dir)
507            .create("second", FileType::Dir)
508            .create("first/0", FileType::ZeroFile(2))
509            .create("second/1", FileType::ZeroFile(3));
510
511        let ro = ReadOnlyCacheBuilder::new()
512            .plain_caches(["first", "second"].iter().map(|p| temp.path(p)))
513            .take()
514            .build();
515
516        // We should find 0 and 1.
517        let _ = ro
518            .get(&TestKey::new("0"))
519            .expect("must succeed")
520            .expect("must exist");
521
522        let _ = ro
523            .get(&TestKey::new("1"))
524            .expect("must succeed")
525            .expect("must exist");
526
527        // But not 2.
528        assert!(ro.get(&TestKey::new("2")).expect("must succeed").is_none());
529    }
530
531    /// Use a byte equality checker with two different cache files for
532    /// the same key.  We should find an error.
533    #[test]
534    fn test_byte_equality_checker() {
535        use test_dir::{DirBuilder, FileType, TestDir};
536
537        let temp = TestDir::temp()
538            .create("first", FileType::Dir)
539            .create("second", FileType::Dir)
540            .create("first/0", FileType::ZeroFile(2))
541            .create("second/0", FileType::ZeroFile(3));
542
543        let ro = ReadOnlyCacheBuilder::new()
544            .plain_caches(["first", "second"].iter().map(|p| temp.path(p)))
545            .byte_equality_checker()
546            .take()
547            .build();
548
549        assert!(ro.get(&TestKey::new("0")).is_err());
550    }
551
552    /// Use a panicking byte equality checker with two different cache
553    /// files for the same key.  We should find an error.
554    #[test]
555    #[should_panic(expected = "file contents do not match")]
556    fn test_panicking_byte_equality_checker() {
557        use test_dir::{DirBuilder, FileType, TestDir};
558
559        let temp = TestDir::temp()
560            .create("first", FileType::Dir)
561            .create("second", FileType::Dir)
562            .create("first/0", FileType::ZeroFile(2))
563            .create("second/0", FileType::ZeroFile(3));
564
565        let ro = ReadOnlyCacheBuilder::new()
566            .plain_caches(["first", "second"].iter().map(|p| temp.path(p)))
567            .panicking_byte_equality_checker()
568            .take()
569            .build();
570
571        // We should fail before returning Err.
572        assert!(ro.get(&TestKey::new("0")).is_ok());
573    }
574
575    /// Populate a plain and a sharded cache. We should be able to access
576    /// both.
577    #[test]
578    fn smoke_test() {
579        use std::io::{Read, Write};
580        use tempfile::NamedTempFile;
581        use test_dir::{DirBuilder, FileType, TestDir};
582
583        let temp = TestDir::temp()
584            .create("sharded", FileType::Dir)
585            .create("plain", FileType::Dir);
586
587        {
588            let cache = ShardedCache::new(temp.path("sharded"), 10, 20);
589
590            let tmp = NamedTempFile::new_in(cache.temp_dir(None).expect("temp_dir must succeed"))
591                .expect("new temp file must succeed");
592            tmp.as_file()
593                .write_all(b"sharded")
594                .expect("write must succeed");
595
596            cache
597                .put(Key::new("a", 0, 1), tmp.path())
598                .expect("put must succeed");
599
600            let tmp2 = NamedTempFile::new_in(cache.temp_dir(None).expect("temp_dir must succeed"))
601                .expect("new temp file must succeed");
602            tmp2.as_file()
603                .write_all(b"sharded2")
604                .expect("write must succeed");
605
606            cache
607                .put(Key::new("b", 0, 1), tmp2.path())
608                .expect("put must succeed");
609        }
610
611        {
612            let cache = PlainCache::new(temp.path("plain"), 10);
613
614            let tmp = NamedTempFile::new_in(cache.temp_dir().expect("temp_dir must succeed"))
615                .expect("new temp file must succeed");
616            tmp.as_file()
617                .write_all(b"plain")
618                .expect("write must succeed");
619
620            cache.put("b", tmp.path()).expect("put must succeed");
621
622            let tmp2 = NamedTempFile::new_in(cache.temp_dir().expect("temp_dir must succeed"))
623                .expect("new temp file must succeed");
624            tmp2.as_file()
625                .write_all(b"plain2")
626                .expect("write must succeed");
627
628            cache.put("c", tmp2.path()).expect("put must succeed");
629        }
630
631        // sharded.a => "sharded"
632        // sharded.b => "sharded2"
633        // plain.b => "plain"
634        // plain.c => "plain2"
635
636        // Read from sharded, then plain.
637        {
638            let ro = ReadOnlyCacheBuilder::new()
639                .sharded(temp.path("sharded"), 10)
640                .plain(temp.path("plain"))
641                .take()
642                .build();
643
644            assert!(matches!(ro.get(&TestKey::new("Missing")), Ok(None)));
645            assert!(matches!(ro.touch(&TestKey::new("Missing")), Ok(false)));
646
647            // We should be able to touch `a`.
648            assert!(matches!(ro.touch(&TestKey::new("a")), Ok(true)));
649
650            // And now check that we get the correct file contents.
651            {
652                let mut a_file = ro
653                    .get(&TestKey::new("a"))
654                    .expect("must succeed")
655                    .expect("must exist");
656                let mut dst = Vec::new();
657                a_file.read_to_end(&mut dst).expect("read must succeed");
658                assert_eq!(&dst, b"sharded");
659            }
660
661            {
662                let mut b_file = ro
663                    .get(&TestKey::new("b"))
664                    .expect("must succeed")
665                    .expect("must exist");
666                let mut dst = Vec::new();
667                b_file.read_to_end(&mut dst).expect("read must succeed");
668                assert_eq!(&dst, b"sharded2");
669            }
670
671            {
672                let mut c_file = ro
673                    .get(&TestKey::new("c"))
674                    .expect("must succeed")
675                    .expect("must exist");
676                let mut dst = Vec::new();
677                c_file.read_to_end(&mut dst).expect("read must succeed");
678                assert_eq!(&dst, b"plain2");
679            }
680        }
681
682        // Read from plain then sharded.
683        {
684            let ro = ReadOnlyCacheBuilder::new()
685                .cache(temp.path("plain"), 1)
686                .cache(temp.path("sharded"), 10)
687                .take()
688                .build();
689
690            {
691                let mut a_file = ro
692                    .get(&TestKey::new("a"))
693                    .expect("must succeed")
694                    .expect("must exist");
695                let mut dst = Vec::new();
696                a_file.read_to_end(&mut dst).expect("read must succeed");
697                assert_eq!(&dst, b"sharded");
698            }
699
700            {
701                let mut b_file = ro
702                    .get(&TestKey::new("b"))
703                    .expect("must succeed")
704                    .expect("must exist");
705                let mut dst = Vec::new();
706                b_file.read_to_end(&mut dst).expect("read must succeed");
707                assert_eq!(&dst, b"plain");
708            }
709
710            {
711                let mut c_file = ro
712                    .get(&TestKey::new("c"))
713                    .expect("must succeed")
714                    .expect("must exist");
715                let mut dst = Vec::new();
716                c_file.read_to_end(&mut dst).expect("read must succeed");
717                assert_eq!(&dst, b"plain2");
718            }
719        }
720    }
721}