evmap/lib.rs
1//! A lock-free, eventually consistent, concurrent multi-value map.
2//!
3//! This map implementation allows reads and writes to execute entirely in parallel, with no
4//! implicit synchronization overhead. Reads never take locks on their critical path, and neither
5//! do writes assuming there is a single writer (multi-writer is possible using a `Mutex`), which
6//! significantly improves performance under contention. See the [`left-right` crate](left_right)
7//! for details on the underlying concurrency primitive.
8//!
9//! The trade-off exposed by this type is one of eventual consistency: writes are not visible to
10//! readers except following explicit synchronization. Specifically, readers only see the
11//! operations that preceeded the last call to `WriteHandle::refresh` by a writer. This lets
12//! writers decide how stale they are willing to let reads get. They can refresh the map after
13//! every write to emulate a regular concurrent `HashMap`, or they can refresh only occasionally to
14//! reduce the synchronization overhead at the cost of stale reads.
15//!
16//! For read-heavy workloads, the scheme used by this module is particularly useful. Writers can
17//! afford to refresh after every write, which provides up-to-date reads, and readers remain fast
18//! as they do not need to ever take locks.
19//!
20//! The map is multi-value, meaning that every key maps to a *collection* of values. This
21//! introduces some memory cost by adding a layer of indirection through a `Vec` for each value,
22//! but enables more advanced use. This choice was made as it would not be possible to emulate such
23//! functionality on top of the semantics of this map (think about it -- what would the operational
24//! log contain?).
25//!
26//! To faciliate more advanced use-cases, each of the two maps also carry some customizeable
27//! meta-information. The writers may update this at will, and when a refresh happens, the current
28//! meta will also be made visible to readers. This could be useful, for example, to indicate what
29//! time the refresh happened.
30//!
31//! # Features
32//!
33//! - `eviction`: Gives you access to [`WriteHandle::empty_random`] to empty out randomly chosen
34//! keys from the map.
35//! - `amortize`: Amortizes the cost of resizes in the underlying data structures. See
36//! [`griddle`](https://github.com/jonhoo/griddle/) and
37//! [`atone`](https://github.com/jonhoo/atone/) for details. This requires a nightly compiler
38//! [for the time being](https://docs.rs/indexmap-amortized/1.0/indexmap_amortized/#rust-version).
39//!
40//!
41//! # Examples
42//!
43//! Single-reader, single-writer
44//!
45//! ```
46//! // new will use the default HashMap hasher, and a meta of ()
47//! // note that we get separate read and write handles
48//! // the read handle can be cloned to have more readers
49//! let (mut book_reviews_w, book_reviews_r) = evmap::new();
50//!
51//! // review some books.
52//! book_reviews_w.insert("Adventures of Huckleberry Finn", "My favorite book.");
53//! book_reviews_w.insert("Grimms' Fairy Tales", "Masterpiece.");
54//! book_reviews_w.insert("Pride and Prejudice", "Very enjoyable.");
55//! book_reviews_w.insert("The Adventures of Sherlock Holmes", "Eye lyked it alot.");
56//!
57//! // at this point, reads from book_reviews_r will not see any of the reviews!
58//! assert_eq!(book_reviews_r.len(), 0);
59//! // we need to refresh first to make the writes visible
60//! book_reviews_w.publish();
61//! assert_eq!(book_reviews_r.len(), 4);
62//! // reads will now return Some() because the map has been initialized
63//! assert_eq!(book_reviews_r.get("Grimms' Fairy Tales").map(|rs| rs.len()), Some(1));
64//!
65//! // remember, this is a multi-value map, so we can have many reviews
66//! book_reviews_w.insert("Grimms' Fairy Tales", "Eh, the title seemed weird.");
67//! book_reviews_w.insert("Pride and Prejudice", "Too many words.");
68//!
69//! // but again, new writes are not yet visible
70//! assert_eq!(book_reviews_r.get("Grimms' Fairy Tales").map(|rs| rs.len()), Some(1));
71//!
72//! // we need to refresh first
73//! book_reviews_w.publish();
74//! assert_eq!(book_reviews_r.get("Grimms' Fairy Tales").map(|rs| rs.len()), Some(2));
75//!
76//! // oops, this review has a lot of spelling mistakes, let's delete it.
77//! // remove_entry deletes *all* reviews (though in this case, just one)
78//! book_reviews_w.remove_entry("The Adventures of Sherlock Holmes");
79//! // but again, it's not visible to readers until we refresh
80//! assert_eq!(book_reviews_r.get("The Adventures of Sherlock Holmes").map(|rs| rs.len()), Some(1));
81//! book_reviews_w.publish();
82//! assert_eq!(book_reviews_r.get("The Adventures of Sherlock Holmes").map(|rs| rs.len()), None);
83//!
84//! // look up the values associated with some keys.
85//! let to_find = ["Pride and Prejudice", "Alice's Adventure in Wonderland"];
86//! for book in &to_find {
87//! if let Some(reviews) = book_reviews_r.get(book) {
88//! for review in &*reviews {
89//! println!("{}: {}", book, review);
90//! }
91//! } else {
92//! println!("{} is unreviewed.", book);
93//! }
94//! }
95//!
96//! // iterate over everything.
97//! for (book, reviews) in &book_reviews_r.enter().unwrap() {
98//! for review in reviews {
99//! println!("{}: \"{}\"", book, review);
100//! }
101//! }
102//! ```
103//!
104//! Reads from multiple threads are possible by cloning the `ReadHandle`.
105//!
106//! ```
107//! use std::thread;
108//! let (mut book_reviews_w, book_reviews_r) = evmap::new();
109//!
110//! // start some readers
111//! let readers: Vec<_> = (0..4).map(|_| {
112//! let r = book_reviews_r.clone();
113//! thread::spawn(move || {
114//! loop {
115//! let l = r.len();
116//! if l == 0 {
117//! thread::yield_now();
118//! } else {
119//! // the reader will either see all the reviews,
120//! // or none of them, since refresh() is atomic.
121//! assert_eq!(l, 4);
122//! break;
123//! }
124//! }
125//! })
126//! }).collect();
127//!
128//! // do some writes
129//! book_reviews_w.insert("Adventures of Huckleberry Finn", "My favorite book.");
130//! book_reviews_w.insert("Grimms' Fairy Tales", "Masterpiece.");
131//! book_reviews_w.insert("Pride and Prejudice", "Very enjoyable.");
132//! book_reviews_w.insert("The Adventures of Sherlock Holmes", "Eye lyked it alot.");
133//! // expose the writes
134//! book_reviews_w.publish();
135//!
136//! // you can read through the write handle
137//! assert_eq!(book_reviews_w.len(), 4);
138//!
139//! // the original read handle still works too
140//! assert_eq!(book_reviews_r.len(), 4);
141//!
142//! // all the threads should eventually see .len() == 4
143//! for r in readers.into_iter() {
144//! assert!(r.join().is_ok());
145//! }
146//! ```
147//!
148//! If multiple writers are needed, the `WriteHandle` must be protected by a `Mutex`.
149//!
150//! ```
151//! use std::thread;
152//! use std::sync::{Arc, Mutex};
153//! let (mut book_reviews_w, book_reviews_r) = evmap::new();
154//!
155//! // start some writers.
156//! // since evmap does not support concurrent writes, we need
157//! // to protect the write handle by a mutex.
158//! let w = Arc::new(Mutex::new(book_reviews_w));
159//! let writers: Vec<_> = (0..4).map(|i| {
160//! let w = w.clone();
161//! thread::spawn(move || {
162//! let mut w = w.lock().unwrap();
163//! w.insert(i, true);
164//! w.publish();
165//! })
166//! }).collect();
167//!
168//! // eventually we should see all the writes
169//! while book_reviews_r.len() < 4 { thread::yield_now(); };
170//!
171//! // all the threads should eventually finish writing
172//! for w in writers.into_iter() {
173//! assert!(w.join().is_ok());
174//! }
175//! ```
176//!
177//! [`ReadHandle`] is not `Sync` as sharing a single instance amongst threads would introduce a
178//! significant performance bottleneck. A fresh `ReadHandle` needs to be created for each thread
179//! either by cloning a [`ReadHandle`] or from a [`handles::ReadHandleFactory`]. For further
180//! information, see [`left_right::ReadHandle`].
181//!
182//! # Implementation
183//!
184//! Under the hood, the map is implemented using two regular `HashMap`s and some magic. Take a look
185//! at [`left-right`](left_right) for a much more in-depth discussion. Since the implementation
186//! uses regular `HashMap`s under the hood, table resizing is fully supported. It does, however,
187//! also mean that the memory usage of this implementation is approximately twice of that of a
188//! regular `HashMap`, and more if writers rarely refresh after writing.
189//!
190//! # Value storage
191//!
192//! The values for each key in the map are stored in [`refs::Values`]. Conceptually, each `Values`
193//! is a _bag_ or _multiset_; it can store multiple copies of the same value. `evmap` applies some
194//! cleverness in an attempt to reduce unnecessary allocations and keep the cost of operations on
195//! even large value-bags small. For small bags, `Values` uses the `smallvec` crate. This avoids
196//! allocation entirely for single-element bags, and uses a `Vec` if the bag is relatively small.
197//! For large bags, `Values` uses the `hashbag` crate, which enables `evmap` to efficiently look up
198//! and remove specific elements in the value bag. For bags larger than one element, but smaller
199//! than the threshold for moving to `hashbag`, we use `smallvec` to avoid unnecessary hashing.
200//! Operations such as `Fit` and `Replace` will automatically switch back to the inline storage if
201//! possible. This is ideal for maps that mostly use one element per key, as it can improvate
202//! memory locality with less indirection.
203#![warn(
204 missing_docs,
205 rust_2018_idioms,
206 missing_debug_implementations,
207 rustdoc::broken_intra_doc_links
208)]
209#![allow(clippy::type_complexity)]
210// This _should_ detect if we ever accidentally leak aliasing::NoDrop.
211// But, currently, it does not..
212#![deny(unreachable_pub)]
213#![cfg_attr(docsrs, feature(doc_cfg))]
214
215use crate::inner::Inner;
216use crate::read::ReadHandle;
217use crate::write::WriteHandle;
218use left_right::aliasing::Aliased;
219use std::collections::hash_map::RandomState;
220use std::fmt;
221use std::hash::{BuildHasher, Hash};
222
223mod inner;
224mod read;
225mod stable_hash_eq;
226mod values;
227mod write;
228
229pub use stable_hash_eq::StableHashEq;
230
231/// Handles to the read and write halves of an `evmap`.
232pub mod handles {
233 pub use crate::write::WriteHandle;
234
235 // These cannot use ::{..} syntax because of
236 // https://github.com/rust-lang/rust/issues/57411
237 pub use crate::read::ReadHandle;
238 pub use crate::read::ReadHandleFactory;
239}
240
241/// Helper types that give access to values inside the read half of an `evmap`.
242pub mod refs {
243 // Same here, ::{..} won't work.
244 pub use super::values::Values;
245 pub use crate::read::MapReadRef;
246 pub use crate::read::ReadGuardIter;
247
248 // Expose `ReadGuard` since it has useful methods the user will likely care about.
249 #[doc(inline)]
250 pub use left_right::ReadGuard;
251}
252
253// NOTE: It is _critical_ that this module is not public.
254mod aliasing;
255
256/// Options for how to initialize the map.
257///
258/// In particular, the options dictate the hashing function, meta type, and initial capacity of the
259/// map.
260pub struct Options<M, S>
261where
262 S: BuildHasher,
263{
264 meta: M,
265 hasher: S,
266 capacity: Option<usize>,
267}
268
269impl<M, S> fmt::Debug for Options<M, S>
270where
271 S: BuildHasher,
272 M: fmt::Debug,
273{
274 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
275 f.debug_struct("Options")
276 .field("meta", &self.meta)
277 .field("capacity", &self.capacity)
278 .finish()
279 }
280}
281
282impl Default for Options<(), RandomState> {
283 fn default() -> Self {
284 Options {
285 meta: (),
286 hasher: RandomState::default(),
287 capacity: None,
288 }
289 }
290}
291
292impl<M, S> Options<M, S>
293where
294 S: BuildHasher,
295{
296 /// Set the initial meta value for the map.
297 pub fn with_meta<M2>(self, meta: M2) -> Options<M2, S> {
298 Options {
299 meta,
300 hasher: self.hasher,
301 capacity: self.capacity,
302 }
303 }
304
305 /// Set the hasher used for the map.
306 ///
307 /// # Safety
308 ///
309 /// This method is safe to call as long as the given hasher is deterministic. That is, it must
310 /// yield the same hash if given the same sequence of inputs.
311 pub unsafe fn with_hasher<S2>(self, hash_builder: S2) -> Options<M, S2>
312 where
313 S2: BuildHasher + Clone,
314 {
315 Options {
316 meta: self.meta,
317 hasher: hash_builder,
318 capacity: self.capacity,
319 }
320 }
321
322 /// Set the initial capacity for the map.
323 pub fn with_capacity(self, capacity: usize) -> Options<M, S> {
324 Options {
325 meta: self.meta,
326 hasher: self.hasher,
327 capacity: Some(capacity),
328 }
329 }
330
331 /// Create the map, and construct the read and write handles used to access it.
332 ///
333 /// If you want to use arbitrary types for the keys and values, use [`assert_stable`][Options::assert_stable].
334 #[allow(clippy::type_complexity)]
335 pub fn construct<K, V>(self) -> (WriteHandle<K, V, M, S>, ReadHandle<K, V, M, S>)
336 where
337 K: StableHashEq + Clone,
338 S: BuildHasher + Clone,
339 V: StableHashEq,
340 M: 'static + Clone,
341 {
342 unsafe { self.assert_stable() }
343 }
344
345 /// Create the map, and construct the read and write handles used to access it.
346 ///
347 /// # Safety
348 ///
349 /// This method is safe to call as long as the implementation of `Hash` and `Eq` for both `K`
350 /// and `V` are deterministic. That is, they must always yield the same result if given the
351 /// same inputs. For keys of type `K`, the result must also be consistent between different clones
352 /// of the same key.
353 #[allow(clippy::type_complexity)]
354 pub unsafe fn assert_stable<K, V>(self) -> (WriteHandle<K, V, M, S>, ReadHandle<K, V, M, S>)
355 where
356 K: Eq + Hash + Clone,
357 S: BuildHasher + Clone,
358 V: Eq + Hash,
359 M: 'static + Clone,
360 {
361 let inner = if let Some(cap) = self.capacity {
362 Inner::with_capacity_and_hasher(self.meta, cap, self.hasher)
363 } else {
364 Inner::with_hasher(self.meta, self.hasher)
365 };
366
367 let (mut w, r) = left_right::new_from_empty(inner);
368 w.append(write::Operation::MarkReady);
369
370 (WriteHandle::new(w), ReadHandle::new(r))
371 }
372}
373
374/// Create an empty eventually consistent map.
375///
376/// Use the [`Options`](./struct.Options.html) builder for more control over initialization.
377///
378/// If you want to use arbitrary types for the keys and values, use [`new_assert_stable`].
379#[allow(clippy::type_complexity)]
380pub fn new<K, V>() -> (
381 WriteHandle<K, V, (), RandomState>,
382 ReadHandle<K, V, (), RandomState>,
383)
384where
385 K: StableHashEq + Clone,
386 V: StableHashEq,
387{
388 Options::default().construct()
389}
390
391/// Create an empty eventually consistent map.
392///
393/// Use the [`Options`](./struct.Options.html) builder for more control over initialization.
394///
395/// # Safety
396///
397/// This method is safe to call as long as the implementation of `Hash` and `Eq` for both `K` and
398/// `V` are deterministic. That is, they must always yield the same result if given the same
399/// inputs. For keys of type `K`, the result must also be consistent between different clones
400/// of the same key.
401#[allow(clippy::type_complexity)]
402pub unsafe fn new_assert_stable<K, V>() -> (
403 WriteHandle<K, V, (), RandomState>,
404 ReadHandle<K, V, (), RandomState>,
405)
406where
407 K: Eq + Hash + Clone,
408 V: Eq + Hash,
409{
410 Options::default().assert_stable()
411}
412
413/// Create an empty eventually consistent map with meta information and custom hasher.
414///
415/// Use the [`Options`](./struct.Options.html) builder for more control over initialization.
416///
417/// # Safety
418///
419/// This method is safe to call as long as the implementation of `Hash` and `Eq` for both `K` and
420/// `V`, and the implementation of `BuildHasher` for `S` and [`Hasher`][std::hash::Hasher]
421/// for <code>S::[Hasher][BuildHasher::Hasher]</code> are deterministic. That is, they must always
422/// yield the same result if given the same inputs. For keys of type `K` and hashers of type `S`,
423/// their behavior must also be consistent between different clones of the same value.
424#[allow(clippy::type_complexity)]
425pub unsafe fn with_hasher<K, V, M, S>(
426 meta: M,
427 hasher: S,
428) -> (WriteHandle<K, V, M, S>, ReadHandle<K, V, M, S>)
429where
430 K: Eq + Hash + Clone,
431 V: Eq + Hash,
432 M: 'static + Clone,
433 S: BuildHasher + Clone,
434{
435 Options::default()
436 .with_hasher(hasher)
437 .with_meta(meta)
438 .assert_stable()
439}