evmap/
lib.rs

1//! A lock-free, eventually consistent, concurrent multi-value map.
2//!
3//! This map implementation allows reads and writes to execute entirely in parallel, with no
4//! implicit synchronization overhead. Reads never take locks on their critical path, and neither
5//! do writes assuming there is a single writer (multi-writer is possible using a `Mutex`), which
6//! significantly improves performance under contention. See the [`left-right` crate](left_right)
7//! for details on the underlying concurrency primitive.
8//!
9//! The trade-off exposed by this type is one of eventual consistency: writes are not visible to
10//! readers except following explicit synchronization. Specifically, readers only see the
11//! operations that preceeded the last call to `WriteHandle::refresh` by a writer. This lets
12//! writers decide how stale they are willing to let reads get. They can refresh the map after
13//! every write to emulate a regular concurrent `HashMap`, or they can refresh only occasionally to
14//! reduce the synchronization overhead at the cost of stale reads.
15//!
16//! For read-heavy workloads, the scheme used by this module is particularly useful. Writers can
17//! afford to refresh after every write, which provides up-to-date reads, and readers remain fast
18//! as they do not need to ever take locks.
19//!
20//! The map is multi-value, meaning that every key maps to a *collection* of values. This
21//! introduces some memory cost by adding a layer of indirection through a `Vec` for each value,
22//! but enables more advanced use. This choice was made as it would not be possible to emulate such
23//! functionality on top of the semantics of this map (think about it -- what would the operational
24//! log contain?).
25//!
26//! To faciliate more advanced use-cases, each of the two maps also carry some customizeable
27//! meta-information. The writers may update this at will, and when a refresh happens, the current
28//! meta will also be made visible to readers. This could be useful, for example, to indicate what
29//! time the refresh happened.
30//!
31//! # Features
32//!
33//!  - `eviction`: Gives you access to [`WriteHandle::empty_random`] to empty out randomly chosen
34//!    keys from the map.
35//!  - `amortize`: Amortizes the cost of resizes in the underlying data structures. See
36//!    [`griddle`](https://github.com/jonhoo/griddle/) and
37//!    [`atone`](https://github.com/jonhoo/atone/) for details. This requires a nightly compiler
38//!    [for the time being](https://docs.rs/indexmap-amortized/1.0/indexmap_amortized/#rust-version).
39//!
40//!
41//! # Examples
42//!
43//! Single-reader, single-writer
44//!
45//! ```
46//! // new will use the default HashMap hasher, and a meta of ()
47//! // note that we get separate read and write handles
48//! // the read handle can be cloned to have more readers
49//! let (mut book_reviews_w, book_reviews_r) = evmap::new();
50//!
51//! // review some books.
52//! book_reviews_w.insert("Adventures of Huckleberry Finn",    "My favorite book.");
53//! book_reviews_w.insert("Grimms' Fairy Tales",               "Masterpiece.");
54//! book_reviews_w.insert("Pride and Prejudice",               "Very enjoyable.");
55//! book_reviews_w.insert("The Adventures of Sherlock Holmes", "Eye lyked it alot.");
56//!
57//! // at this point, reads from book_reviews_r will not see any of the reviews!
58//! assert_eq!(book_reviews_r.len(), 0);
59//! // we need to refresh first to make the writes visible
60//! book_reviews_w.publish();
61//! assert_eq!(book_reviews_r.len(), 4);
62//! // reads will now return Some() because the map has been initialized
63//! assert_eq!(book_reviews_r.get("Grimms' Fairy Tales").map(|rs| rs.len()), Some(1));
64//!
65//! // remember, this is a multi-value map, so we can have many reviews
66//! book_reviews_w.insert("Grimms' Fairy Tales",               "Eh, the title seemed weird.");
67//! book_reviews_w.insert("Pride and Prejudice",               "Too many words.");
68//!
69//! // but again, new writes are not yet visible
70//! assert_eq!(book_reviews_r.get("Grimms' Fairy Tales").map(|rs| rs.len()), Some(1));
71//!
72//! // we need to refresh first
73//! book_reviews_w.publish();
74//! assert_eq!(book_reviews_r.get("Grimms' Fairy Tales").map(|rs| rs.len()), Some(2));
75//!
76//! // oops, this review has a lot of spelling mistakes, let's delete it.
77//! // remove_entry deletes *all* reviews (though in this case, just one)
78//! book_reviews_w.remove_entry("The Adventures of Sherlock Holmes");
79//! // but again, it's not visible to readers until we refresh
80//! assert_eq!(book_reviews_r.get("The Adventures of Sherlock Holmes").map(|rs| rs.len()), Some(1));
81//! book_reviews_w.publish();
82//! assert_eq!(book_reviews_r.get("The Adventures of Sherlock Holmes").map(|rs| rs.len()), None);
83//!
84//! // look up the values associated with some keys.
85//! let to_find = ["Pride and Prejudice", "Alice's Adventure in Wonderland"];
86//! for book in &to_find {
87//!     if let Some(reviews) = book_reviews_r.get(book) {
88//!         for review in &*reviews {
89//!             println!("{}: {}", book, review);
90//!         }
91//!     } else {
92//!         println!("{} is unreviewed.", book);
93//!     }
94//! }
95//!
96//! // iterate over everything.
97//! for (book, reviews) in &book_reviews_r.enter().unwrap() {
98//!     for review in reviews {
99//!         println!("{}: \"{}\"", book, review);
100//!     }
101//! }
102//! ```
103//!
104//! Reads from multiple threads are possible by cloning the `ReadHandle`.
105//!
106//! ```
107//! use std::thread;
108//! let (mut book_reviews_w, book_reviews_r) = evmap::new();
109//!
110//! // start some readers
111//! let readers: Vec<_> = (0..4).map(|_| {
112//!     let r = book_reviews_r.clone();
113//!     thread::spawn(move || {
114//!         loop {
115//!             let l = r.len();
116//!             if l == 0 {
117//!                 thread::yield_now();
118//!             } else {
119//!                 // the reader will either see all the reviews,
120//!                 // or none of them, since refresh() is atomic.
121//!                 assert_eq!(l, 4);
122//!                 break;
123//!             }
124//!         }
125//!     })
126//! }).collect();
127//!
128//! // do some writes
129//! book_reviews_w.insert("Adventures of Huckleberry Finn",    "My favorite book.");
130//! book_reviews_w.insert("Grimms' Fairy Tales",               "Masterpiece.");
131//! book_reviews_w.insert("Pride and Prejudice",               "Very enjoyable.");
132//! book_reviews_w.insert("The Adventures of Sherlock Holmes", "Eye lyked it alot.");
133//! // expose the writes
134//! book_reviews_w.publish();
135//!
136//! // you can read through the write handle
137//! assert_eq!(book_reviews_w.len(), 4);
138//!
139//! // the original read handle still works too
140//! assert_eq!(book_reviews_r.len(), 4);
141//!
142//! // all the threads should eventually see .len() == 4
143//! for r in readers.into_iter() {
144//!     assert!(r.join().is_ok());
145//! }
146//! ```
147//!
148//! If multiple writers are needed, the `WriteHandle` must be protected by a `Mutex`.
149//!
150//! ```
151//! use std::thread;
152//! use std::sync::{Arc, Mutex};
153//! let (mut book_reviews_w, book_reviews_r) = evmap::new();
154//!
155//! // start some writers.
156//! // since evmap does not support concurrent writes, we need
157//! // to protect the write handle by a mutex.
158//! let w = Arc::new(Mutex::new(book_reviews_w));
159//! let writers: Vec<_> = (0..4).map(|i| {
160//!     let w = w.clone();
161//!     thread::spawn(move || {
162//!         let mut w = w.lock().unwrap();
163//!         w.insert(i, true);
164//!         w.publish();
165//!     })
166//! }).collect();
167//!
168//! // eventually we should see all the writes
169//! while book_reviews_r.len() < 4 { thread::yield_now(); };
170//!
171//! // all the threads should eventually finish writing
172//! for w in writers.into_iter() {
173//!     assert!(w.join().is_ok());
174//! }
175//! ```
176//!
177//! [`ReadHandle`] is not `Sync` as sharing a single instance amongst threads would introduce a
178//! significant performance bottleneck. A fresh `ReadHandle` needs to be created for each thread
179//! either by cloning a [`ReadHandle`] or from a [`handles::ReadHandleFactory`]. For further
180//! information, see [`left_right::ReadHandle`].
181//!
182//! # Implementation
183//!
184//! Under the hood, the map is implemented using two regular `HashMap`s and some magic. Take a look
185//! at [`left-right`](left_right) for a much more in-depth discussion. Since the implementation
186//! uses regular `HashMap`s under the hood, table resizing is fully supported. It does, however,
187//! also mean that the memory usage of this implementation is approximately twice of that of a
188//! regular `HashMap`, and more if writers rarely refresh after writing.
189//!
190//! # Value storage
191//!
192//! The values for each key in the map are stored in [`refs::Values`]. Conceptually, each `Values`
193//! is a _bag_ or _multiset_; it can store multiple copies of the same value. `evmap` applies some
194//! cleverness in an attempt to reduce unnecessary allocations and keep the cost of operations on
195//! even large value-bags small. For small bags, `Values` uses the `smallvec` crate. This avoids
196//! allocation entirely for single-element bags, and uses a `Vec` if the bag is relatively small.
197//! For large bags, `Values` uses the `hashbag` crate, which enables `evmap` to efficiently look up
198//! and remove specific elements in the value bag. For bags larger than one element, but smaller
199//! than the threshold for moving to `hashbag`, we use `smallvec` to avoid unnecessary hashing.
200//! Operations such as `Fit` and `Replace` will automatically switch back to the inline storage if
201//! possible. This is ideal for maps that mostly use one element per key, as it can improvate
202//! memory locality with less indirection.
203#![warn(
204    missing_docs,
205    rust_2018_idioms,
206    missing_debug_implementations,
207    rustdoc::broken_intra_doc_links
208)]
209#![allow(clippy::type_complexity)]
210// This _should_ detect if we ever accidentally leak aliasing::NoDrop.
211// But, currently, it does not..
212#![deny(unreachable_pub)]
213#![cfg_attr(docsrs, feature(doc_cfg))]
214
215use crate::inner::Inner;
216use crate::read::ReadHandle;
217use crate::write::WriteHandle;
218use left_right::aliasing::Aliased;
219use std::collections::hash_map::RandomState;
220use std::fmt;
221use std::hash::{BuildHasher, Hash};
222
223mod inner;
224mod read;
225mod stable_hash_eq;
226mod values;
227mod write;
228
229pub use stable_hash_eq::StableHashEq;
230
231/// Handles to the read and write halves of an `evmap`.
232pub mod handles {
233    pub use crate::write::WriteHandle;
234
235    // These cannot use ::{..} syntax because of
236    // https://github.com/rust-lang/rust/issues/57411
237    pub use crate::read::ReadHandle;
238    pub use crate::read::ReadHandleFactory;
239}
240
241/// Helper types that give access to values inside the read half of an `evmap`.
242pub mod refs {
243    // Same here, ::{..} won't work.
244    pub use super::values::Values;
245    pub use crate::read::MapReadRef;
246    pub use crate::read::ReadGuardIter;
247
248    // Expose `ReadGuard` since it has useful methods the user will likely care about.
249    #[doc(inline)]
250    pub use left_right::ReadGuard;
251}
252
253// NOTE: It is _critical_ that this module is not public.
254mod aliasing;
255
256/// Options for how to initialize the map.
257///
258/// In particular, the options dictate the hashing function, meta type, and initial capacity of the
259/// map.
260pub struct Options<M, S>
261where
262    S: BuildHasher,
263{
264    meta: M,
265    hasher: S,
266    capacity: Option<usize>,
267}
268
269impl<M, S> fmt::Debug for Options<M, S>
270where
271    S: BuildHasher,
272    M: fmt::Debug,
273{
274    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
275        f.debug_struct("Options")
276            .field("meta", &self.meta)
277            .field("capacity", &self.capacity)
278            .finish()
279    }
280}
281
282impl Default for Options<(), RandomState> {
283    fn default() -> Self {
284        Options {
285            meta: (),
286            hasher: RandomState::default(),
287            capacity: None,
288        }
289    }
290}
291
292impl<M, S> Options<M, S>
293where
294    S: BuildHasher,
295{
296    /// Set the initial meta value for the map.
297    pub fn with_meta<M2>(self, meta: M2) -> Options<M2, S> {
298        Options {
299            meta,
300            hasher: self.hasher,
301            capacity: self.capacity,
302        }
303    }
304
305    /// Set the hasher used for the map.
306    ///
307    /// # Safety
308    ///
309    /// This method is safe to call as long as the given hasher is deterministic. That is, it must
310    /// yield the same hash if given the same sequence of inputs.
311    pub unsafe fn with_hasher<S2>(self, hash_builder: S2) -> Options<M, S2>
312    where
313        S2: BuildHasher + Clone,
314    {
315        Options {
316            meta: self.meta,
317            hasher: hash_builder,
318            capacity: self.capacity,
319        }
320    }
321
322    /// Set the initial capacity for the map.
323    pub fn with_capacity(self, capacity: usize) -> Options<M, S> {
324        Options {
325            meta: self.meta,
326            hasher: self.hasher,
327            capacity: Some(capacity),
328        }
329    }
330
331    /// Create the map, and construct the read and write handles used to access it.
332    ///
333    /// If you want to use arbitrary types for the keys and values, use [`assert_stable`][Options::assert_stable].
334    #[allow(clippy::type_complexity)]
335    pub fn construct<K, V>(self) -> (WriteHandle<K, V, M, S>, ReadHandle<K, V, M, S>)
336    where
337        K: StableHashEq + Clone,
338        S: BuildHasher + Clone,
339        V: StableHashEq,
340        M: 'static + Clone,
341    {
342        unsafe { self.assert_stable() }
343    }
344
345    /// Create the map, and construct the read and write handles used to access it.
346    ///
347    /// # Safety
348    ///
349    /// This method is safe to call as long as the implementation of `Hash` and `Eq` for both `K`
350    /// and `V` are deterministic. That is, they must always yield the same result if given the
351    /// same inputs. For keys of type `K`, the result must also be consistent between different clones
352    /// of the same key.
353    #[allow(clippy::type_complexity)]
354    pub unsafe fn assert_stable<K, V>(self) -> (WriteHandle<K, V, M, S>, ReadHandle<K, V, M, S>)
355    where
356        K: Eq + Hash + Clone,
357        S: BuildHasher + Clone,
358        V: Eq + Hash,
359        M: 'static + Clone,
360    {
361        let inner = if let Some(cap) = self.capacity {
362            Inner::with_capacity_and_hasher(self.meta, cap, self.hasher)
363        } else {
364            Inner::with_hasher(self.meta, self.hasher)
365        };
366
367        let (mut w, r) = left_right::new_from_empty(inner);
368        w.append(write::Operation::MarkReady);
369
370        (WriteHandle::new(w), ReadHandle::new(r))
371    }
372}
373
374/// Create an empty eventually consistent map.
375///
376/// Use the [`Options`](./struct.Options.html) builder for more control over initialization.
377///
378/// If you want to use arbitrary types for the keys and values, use [`new_assert_stable`].
379#[allow(clippy::type_complexity)]
380pub fn new<K, V>() -> (
381    WriteHandle<K, V, (), RandomState>,
382    ReadHandle<K, V, (), RandomState>,
383)
384where
385    K: StableHashEq + Clone,
386    V: StableHashEq,
387{
388    Options::default().construct()
389}
390
391/// Create an empty eventually consistent map.
392///
393/// Use the [`Options`](./struct.Options.html) builder for more control over initialization.
394///
395/// # Safety
396///
397/// This method is safe to call as long as the implementation of `Hash` and `Eq` for both `K` and
398/// `V` are deterministic. That is, they must always yield the same result if given the same
399/// inputs. For keys of type `K`, the result must also be consistent between different clones
400/// of the same key.
401#[allow(clippy::type_complexity)]
402pub unsafe fn new_assert_stable<K, V>() -> (
403    WriteHandle<K, V, (), RandomState>,
404    ReadHandle<K, V, (), RandomState>,
405)
406where
407    K: Eq + Hash + Clone,
408    V: Eq + Hash,
409{
410    Options::default().assert_stable()
411}
412
413/// Create an empty eventually consistent map with meta information and custom hasher.
414///
415/// Use the [`Options`](./struct.Options.html) builder for more control over initialization.
416///
417/// # Safety
418///
419/// This method is safe to call as long as the implementation of `Hash` and `Eq` for both `K` and
420/// `V`, and the implementation of `BuildHasher` for `S` and [`Hasher`][std::hash::Hasher]
421/// for <code>S::[Hasher][BuildHasher::Hasher]</code> are deterministic. That is, they must always
422/// yield the same result if given the same inputs. For keys of type `K` and hashers of type `S`,
423/// their behavior must also be consistent between different clones of the same value.
424#[allow(clippy::type_complexity)]
425pub unsafe fn with_hasher<K, V, M, S>(
426    meta: M,
427    hasher: S,
428) -> (WriteHandle<K, V, M, S>, ReadHandle<K, V, M, S>)
429where
430    K: Eq + Hash + Clone,
431    V: Eq + Hash,
432    M: 'static + Clone,
433    S: BuildHasher + Clone,
434{
435    Options::default()
436        .with_hasher(hasher)
437        .with_meta(meta)
438        .assert_stable()
439}