dyn_cache/
lib.rs

1#![forbid(unsafe_code)]
2#![deny(clippy::all, missing_docs)]
3
4//! Caches for storing the results of repeated function calls. The caches
5//! use minimal dynamic dispatch to store arbitrarily many
6//! types of query results in a single store.
7//!
8//! Cache storage is indexed by dynamic [scopes](#scopes):
9//!
10//! ```
11//! let storage = dyn_cache::local::SharedLocalCache::default();
12//!
13//! // scopes can be identified by ~anything Eq + Hash
14//! let a_scope = 'a';
15//! let b_scope = 'b';
16//!
17//! // we use interior mutability here to demonstrate query side effects
18//! let count = std::cell::Cell::new(0);
19//! let increment = |&to_add: &i32| -> i32 {
20//!     // let's pretend that there's some other interesting work happening here...
21//!     let new = count.get() + to_add;
22//!     count.set(new);
23//!     new
24//! };
25//!
26//! // now we'll define some "queries" to the cache
27//! let a_inc = |n| storage.cache(&a_scope, &n, &increment);
28//! let b_inc = |n| storage.cache(&b_scope, &n, &increment);
29//!
30//! assert_eq!(count.get(), 0, "haven't called any queries");
31//!
32//! assert_eq!(a_inc(1), 1);
33//! assert_eq!(count.get(), 1, "called 'a'(1) once");
34//!
35//! assert_eq!(a_inc(1), 1);
36//! assert_eq!(count.get(), 1, "called 'a'(1) twice, only ran once");
37//!
38//! assert_eq!(b_inc(2), 3);
39//! assert_eq!(count.get(), 3, "called 'a'(1) and 'b'(2)");
40//!
41//! assert_eq!(a_inc(1), 1, "retains cached value");
42//! assert_eq!(count.get(), 3, "queries only affect their own scope");
43//!
44//! assert_eq!(a_inc(2), 5);
45//! assert_eq!(count.get(), 5, "called 'a'(1), 'a'(2), 'b'(2)");
46//!
47//! assert_eq!(a_inc(1), 6, "only the most recent revision is cached");
48//! assert_eq!(count.get(), 6);
49//! ```
50//!
51//! A single cache instance can hold multiple types of [scope](#scopes):
52//!
53//! ```
54//! let storage = dyn_cache::local::SharedLocalCache::default();
55//! let count = std::cell::Cell::new(0);
56//! let increment = |&to_add: &i32| -> i32 {
57//!     // let's pretend that there's some other interesting work happening here...
58//!     let new = count.get() + to_add;
59//!     count.set(new);
60//!     new
61//! };
62//!
63//! let one_scope = 1u8;
64//! let two_scope = 2i32;
65//! let red_scope = b"red";
66//! let blue_scope = "blue";
67//!
68//! // each of these queries has a different type of scope
69//! // and while the inputs/outputs are the same they could also
70//! // vary without interfering with each other
71//! let one_inc = |n| storage.cache(&one_scope, &n, increment);
72//! let two_inc = |n| storage.cache(&two_scope, &n, increment);
73//! let red_inc = |n| storage.cache(&red_scope, &n, increment);
74//! let blue_inc = |n| storage.cache(&blue_scope, &n, increment);
75//!
76//! assert_eq!(one_inc(1), 1);
77//! assert_eq!(count.get(), 1);
78//!
79//! assert_eq!(two_inc(1), 2);
80//! assert_eq!(one_inc(1), 1, "still cached");
81//! assert_eq!(count.get(), 2, "only one of the queries ran");
82//!
83//! assert_eq!(red_inc(2), 4);
84//! assert_eq!(two_inc(1), 2, "still cached");
85//! assert_eq!(one_inc(1), 1, "still cached");
86//! assert_eq!(count.get(), 4, "only one of the queries ran");
87//!
88//! assert_eq!(blue_inc(3), 7);
89//! assert_eq!(red_inc(2), 4, "still cached");
90//! assert_eq!(two_inc(1), 2, "still cached");
91//! assert_eq!(one_inc(1), 1, "still cached");
92//! assert_eq!(count.get(), 7, "only one of the queries ran");
93//!
94//! // invalidation still happens once per scope (type)
95//! assert_eq!(blue_inc(5), 12, "blue has a different input");
96//! assert_eq!(red_inc(2), 4, "still cached");
97//! assert_eq!(two_inc(1), 2, "still cached");
98//! assert_eq!(one_inc(1), 1, "still cached");
99//! assert_eq!(count.get(), 12, "only one of the queries ran");
100//! ```
101//!
102//! # Cache types
103//!
104//! There are two main flavors of cache available for use in this crate:
105//!
106//! | Shared type                 | Synchronized? |
107//! |-----------------------------|---------------|
108//! | [`sync::SharedSendCache`]   | Mutex         |
109//! | [`local::SharedLocalCache`] | RefCell       |
110//!
111//! These variants are used by calling [`sync::SharedSendCache::cache_with`] or
112//! [`local::SharedLocalCache::cache`].
113//!
114//! The shared cache types above are implemented by wrapping these "inner"
115//! types:
116//!
117//! | Mutable type          | Requires `Send`? |
118//! |-----------------------|------------------|
119//! | [`sync::SendCache`]   | yes              |
120//! | [`local::LocalCache`] | no               |
121//!
122//! These "inner" caches require mutable access to call their functions like
123//! [`local::LocalCache::get`] which returns either a reference or a
124//! [`CacheMiss`] that can be passed back to the cache in
125//! [`local::LocalCache::store`] to initialize a value in the cache:
126//!
127//! ```
128//! let mut cache = dyn_cache::local::LocalCache::default();
129//! let scope = &'a';
130//! let arg = &1;
131//!
132//! let miss = cache.get(scope, arg).expect_err("first access will always be a miss");
133//! let (entry, result): (_, Vec<usize>) = miss.init(|&n| {
134//!     let v: Vec<usize> = vec![n; n];
135//!     (v.clone(), v)
136//! });
137//! cache.store(entry);
138//! assert_eq!(result, vec![1usize]);
139//!
140//! let result: &Vec<usize> = cache.get(scope, arg).unwrap();
141//! assert_eq!(result, &vec![1]);
142//! ```
143//!
144//! See [`sync::SendCache::get`] and [`sync::SendCache::store`] for the
145//! thread-safe equivalents.
146//!
147//! The shared variants are defined by wrapping these inner cache types in
148//! reference counting and synchronized mutability.
149//!
150//! # Query types
151//!
152//! Each query type maps to a typed "namespace" within the unityped cache
153//! storage, each query having a distinct type each for its scope, input, and
154//! output.
155//!
156//! ## Scopes
157//!
158//! The scope of a query is its identifier within cache storage for the given
159//! input & output types. Scopes must implement `Eq` and `Hash` so that results
160//! can be efficiently and uniquely indexed.
161//!
162//! Each scope identifies 0-1 `(Input, Output)` pairs in each namespace. The
163//! same type of scope can be used in multiple queries without collision if
164//! the types of inputs, outputs, or both differ.
165//!
166//! ## Inputs
167//!
168//! The input to a query determines when it is re-run. If a given query is
169//! present in the cache then the previous input is compared to the new input.
170//! If the input hasn't changed, the query can be skipped and its
171//! previously-stored output is returned.
172//!
173//! ## Outputs
174//!
175//! The only constraint on query outputs is that they are owned (`Output:
176//! 'static`). This imposes the inconvenient requirement that all access to
177//! stored values occurs during the scope of a closure (similar to thread-locals
178//! in the standard library).
179//!
180//! The most common way to work around this requirement is to choose output
181//! types that cheaply implement [`std::clone::Clone`].
182//!
183//! # Allocations
184//!
185//! In order to store distinct query results in the same container, allocations
186//! and indirection are required.
187//!
188//! ## Borrowed query parameters
189//!
190//! All of the cache functions accept a reference to a type `Key:
191//! ToOwned<Owned=Scope>` so that the scope is only cloned on the first
192//! insertion to its storage and all subsequent lookups can be with a borrowed
193//! type.
194//!
195//! Like the query scope, functions to get cache values accept a borrowed
196//! version of the input and only clone it when the input has changed.
197//!
198//! ## Causes
199//!
200//! There are three situations where these caches allocate:
201//!
202//! 1. caching new types which haven't been seen by that cache instance yet
203//! 2. storing the results of a new query
204//! 3. updating the results of a stored query
205//!
206//! There are several types of allocations performed by the caches in this
207//! crate:
208//!
209//! | Allocation                         | Causes        |
210//! |------------------------------------|---------------|
211//! | box a new, empty namespace         | (1)           |
212//! | resize a cache's map of namespaces | (1)           |
213//! | call `.to_owned()` on a scope/key  | (2)           |
214//! | resize a namespace's storage       | (2)           |
215//! | call `.to_owned()` on an input/arg | (2), (3)      |
216//! | update an output's dependents      | (1), (2), (3) |
217//!
218//! Outside of these, only user-defined functions should perform any allocation.
219//!
220//! # Garbage Collection
221//!
222//! All of the caches have a `gc()` method which retains only used values. A
223//! value is used if it or a value which depends on it has been used/rooted
224//! since the last call to `gc()`.
225//!
226//! ```
227//! let storage = dyn_cache::local::SharedLocalCache::default();
228//! let a_scope = 'a';
229//! let b_scope = 'b';
230//!
231//! // we use interior mutability here to demonstrate query side effects
232//! let count = std::cell::Cell::new(0);
233//! let increment = |&to_add: &i32| -> i32 {
234//!     // let's pretend that there's some other interesting work happening here...
235//!     let new = count.get() + to_add;
236//!     count.set(new);
237//!     new
238//! };
239//!
240//! // we'll define the same "queries" to the cache as in the previous example
241//! let a_inc = |n| storage.cache(&a_scope, &n, &increment);
242//! let b_inc = |n| storage.cache(&b_scope, &n, &increment);
243//!
244//! assert_eq!(a_inc(1), 1);
245//! assert_eq!(count.get(), 1, "called 'a'(1) once");
246//!
247//! assert_eq!(b_inc(2), 3);
248//! assert_eq!(count.get(), 3, "called 'a'(1) and 'b'(2)");
249//!
250//! // mark the end of this "revision" in the cache
251//! // this won't drop anything yet, just marks all cached values as unused
252//! storage.gc();
253//!
254//! // run only one of the queries to mark it live
255//! assert_eq!(a_inc(1), 1, "value is still cached");
256//! assert_eq!(count.get(), 3, "nothing has touched our side effect tracker");
257//!
258//! storage.gc(); // drops b_inc from storage
259//!
260//! assert_eq!(b_inc(2), 5, "b_inc was dropped from the cache, ran again");
261//! assert_eq!(count.get(), 5);
262//!
263//! assert_eq!(a_inc(1), 1, "value is still cached");
264//! assert_eq!(count.get(), 5);
265//! ```
266//!
267//! ## Nesting
268//!
269//! When a cache read *fails*, we expect that the value will be populated
270//! immediately after and a new node in the dependency graph is created. The new
271//! dependency node is marked as an incoming dependent on any cache values which
272//! are accessed during the initialization of the new value. The new node is
273//! then marked as a "root" for the garbage collector once it has
274//! been initialized and the cache populated. If in subsequent revisions the
275//! rooted value is accessed again it will be re-rooted and its dependents will
276//! be marked as live even if they were not directly accessed in that revision.
277//!
278//! When a cache read *succeeds*, its dependency node is marked as being
279//! depended upon by the node (if any) which was being initialized during the
280//! read, linking the two dependencies together.
281//!
282//! ```
283//! let storage = dyn_cache::local::SharedLocalCache::default();
284//! let a_scope = 'a';
285//! let b_scope = 'b';
286//!
287//! let count = std::cell::Cell::new(0);
288//! let increment = |&to_add: &i32| -> i32 {
289//!     // let's pretend that there's some other interesting work happening here...
290//!     let new = count.get() + to_add;
291//!     count.set(new);
292//!     new
293//! };
294//!
295//! let a_inc = |n| storage.cache(&a_scope, &n, &increment);
296//!
297//! // this new query "depends on" a_inc by calling it in its own init closure
298//! let b_inc = |n| storage.cache(&b_scope, &n, |&n| a_inc(n));
299//!
300//! assert_eq!(b_inc(2), 2);
301//! assert_eq!(count.get(), 2);
302//!
303//! // until now, we haven't called a_inc directly
304//! assert_eq!(a_inc(2), 2, "a_inc is indeed cached as a dep of b_inc");
305//! assert_eq!(count.get(), 2);
306//!
307//! storage.gc(); // mark both queries dead
308//!
309//! // in this revision we'll only call b_inc directly
310//! assert_eq!(b_inc(3), 5);
311//! assert_eq!(count.get(), 5);
312//!
313//! storage.gc(); // doesn't actually drop anything
314//!
315//! // both queries should still have their outputs for input=3 cached
316//! assert_eq!(b_inc(3), 5);
317//! assert_eq!(a_inc(3), 5);
318//! assert_eq!(count.get(), 5);
319//!
320//! // we can also check to make sure that neither query is touching the cell
321//! count.set(0);
322//! assert_eq!(b_inc(3), 5);
323//! assert_eq!(a_inc(3), 5);
324//! assert_eq!(count.get(), 0);
325//! ```
326
327use downcast_rs::{impl_downcast, Downcast};
328use hash_hasher::HashBuildHasher;
329use hashbrown::hash_map::DefaultHashBuilder;
330use std::{
331    any::TypeId,
332    fmt::{Debug, Formatter, Result as FmtResult},
333    hash::{BuildHasher, Hash, Hasher},
334    marker::PhantomData,
335};
336
337#[macro_use]
338mod definition;
339
340mod cache_cell;
341mod dep_node;
342mod namespace;
343
344use namespace::{KeyMiss, Namespace};
345
346/// The result of a failed attempt to retrieve a value from the cache.
347/// Initialize a full [`CacheEntry`] for storage with [`CacheMiss::init`].
348///
349/// ```
350/// use dyn_cache::local::LocalCache;
351/// let mut cache = LocalCache::default();
352/// let (scope, arg) = (&'a', &1);
353///
354/// let miss = cache.get(scope, arg).expect_err("first access will always be a miss");
355/// # let (entry, result): (_, Vec<usize>) = miss.init(|&n| {
356/// #     let v: Vec<usize> = vec![n; n];
357/// #     (v.clone(), v)
358/// # });
359/// # cache.store(entry);
360/// # assert_eq!(result, vec![1usize]);
361/// ```
362#[derive(Clone, Eq, PartialEq)]
363pub struct CacheMiss<'k, Key: ?Sized, Scope, Input, Output, H = DefaultHashBuilder> {
364    query: Query<Scope, Input, Output>,
365    key_miss: KeyMiss<'k, Key, Input, H>,
366}
367
368impl<'k, Key: ?Sized, Scope, Input, Output, H> CacheMiss<'k, Key, Scope, Input, Output, H> {
369    /// Prepare the cache miss to be populated by running `query(arg)`,
370    /// returning a separate value. The value returned (`R`) is typically
371    /// derived in some way from the stored `Output`.
372    ///
373    /// ```
374    /// # use dyn_cache::local::LocalCache;
375    /// # let mut cache = LocalCache::default();
376    /// # let (scope, arg) = (&'a', &1);
377    /// # let miss = cache.get(scope, arg).expect_err("first access will always be a miss");
378    /// let (entry, result): (_, Vec<usize>) = miss.init(|&n| {
379    ///     let v: Vec<usize> = vec![n; n];
380    ///     (v.clone(), v)
381    /// });
382    /// cache.store(entry);
383    /// assert_eq!(result, vec![1usize]);
384    /// ```
385    pub fn init<R>(
386        self,
387        query: impl FnOnce(&Input) -> (Output, R),
388    ) -> (CacheEntry<'k, Key, Scope, Input, Output, H>, R) {
389        let (output, to_return) = self.key_miss.init(query);
390        (CacheEntry { output, miss: self }, to_return)
391    }
392}
393
394impl<'k, Key, Scope, Input, Output, H> Debug for CacheMiss<'k, Key, Scope, Input, Output, H>
395where
396    Key: Debug + ?Sized,
397    Scope: Debug,
398    Input: Debug,
399{
400    fn fmt(&self, f: &mut Formatter) -> FmtResult {
401        f.debug_struct("CacheMiss")
402            .field("query", &self.query)
403            .field("key_miss", &self.key_miss)
404            .finish()
405    }
406}
407
408/// A fully-initialized input/output entry, ready to be written to the cache.
409/// Obtained from [`CacheMiss::init`] and passed to [`local::LocalCache::store`]
410/// or [`sync::SendCache::store`].
411#[derive(Clone, Debug, Eq, PartialEq)]
412pub struct CacheEntry<'k, Key: ?Sized, Scope, Input, Output, H = DefaultHashBuilder> {
413    miss: CacheMiss<'k, Key, Scope, Input, Output, H>,
414    output: Output,
415}
416
417/// A cache for types which are not thread-safe (`?Send`).
418pub mod local {
419    use std::{cell::RefCell, rc::Rc};
420
421    define_cache!(local, LocalCache, Rc, RefCell::borrow_mut);
422}
423
424/// A thread-safe cache which requires stored types implement `Send`.
425pub mod sync {
426    use parking_lot::Mutex;
427    use std::sync::Arc;
428
429    define_cache!(sync, SendCache: Send, Arc, Mutex::lock);
430}
431
432/// A type which can contain values of varying liveness.
433trait Storage: Downcast + Debug {
434    /// Traverse stored values, identifying roots.
435    fn mark(&mut self, revision: u64);
436
437    /// Remove dead entries.
438    fn sweep(&mut self);
439}
440
441impl_downcast!(Storage);
442
443/// Describes the outcome of garbage collection for a cached value.
444#[derive(Clone, Copy, Debug, PartialEq)]
445enum Liveness {
446    /// The value is still live.
447    Live,
448    /// The value should be dropped.
449    Dead,
450}
451
452/// The type of a dynamic cache query, used to shard storage in a fashion
453/// similar to `anymap` or `typemap`.
454#[derive(Clone, Copy, Eq, Hash, PartialEq)]
455struct Query<Scope, Input, Output, H = HashBuildHasher> {
456    ty: PhantomData<(Scope, Input, Output)>,
457    hasher: PhantomData<H>,
458    hash: u64,
459}
460
461impl<Scope, Input, Output, H> Query<Scope, Input, Output, H>
462where
463    Scope: 'static,
464    Input: 'static,
465    Output: 'static,
466    H: BuildHasher,
467{
468    fn new(build: &H) -> Self {
469        // this is a bit unrustic but it lets us keep the typeid defined once
470        let mut new = Query { ty: PhantomData, hasher: PhantomData, hash: 0 };
471        let mut hasher = build.build_hasher();
472        new.ty().hash(&mut hasher);
473        new.hash = hasher.finish();
474        new
475    }
476
477    fn make_namespace(&self) -> Box<Namespace<Scope, Input, Output>> {
478        Box::new(Namespace::default())
479    }
480
481    fn hash(&self) -> u64 {
482        self.hash
483    }
484
485    fn ty(&self) -> TypeId {
486        TypeId::of::<(Scope, Input, Output)>()
487    }
488}
489
490impl<Scope, Input, Output, H> Debug for Query<Scope, Input, Output, H> {
491    fn fmt(&self, f: &mut Formatter) -> FmtResult {
492        f.debug_struct("Query")
493            .field("ty", &std::any::type_name::<(Scope, Input, Output)>())
494            .field("hasher", &std::any::type_name::<H>())
495            .field("hash", &self.hash)
496            .finish()
497    }
498}