chashmap_serde/
lib.rs

1//! Concurrent hash maps.
2//!
3//! This is a clone of https://gitlab.redox-os.org/redox-os/chashmap, which at time of writing seems largely unmaintained.
4//! <b>I take no credit for the original author's hard work.</b> <i>My contributions are to add serde support and any other features
5//! that I may personally need.</i>
6//!
7//! This crate implements concurrent hash maps, based on bucket-level multi-reader locks. It has
8//! excellent performance characteristics¹ and supports resizing, in-place mutation and more.
9//!
10//! The API derives directly from `std::collections::HashMap`, giving it a familiar feel.
11//!
12//! ¹Note that it heavily depends on the behavior of your program, but in most cases, it's really
13//!  good. In some (rare) cases you might want atomic hash maps instead.
14//!
15//! # How it works
16//!
17//! `chashmap` is not lockless, but it distributes locks across the map such that lock contentions
18//! (which is what could make accesses expensive) are very rare.
19//!
20//! Hash maps consists of so called "buckets", which each defines a potential entry in the table.
21//! The bucket of some key-value pair is determined by the hash of the key. By holding a read-write
22//! lock for each bucket, we ensure that you will generally be able to insert, read, modify, etc.
23//! with only one or two locking subroutines.
24//!
25//! There is a special-case: reallocation. When the table is filled up such that very few buckets
26//! are free (note that this is "very few" and not "no", since the load factor shouldn't get too
27//! high as it hurts performance), a global lock is obtained while rehashing the table. This is
28//! pretty inefficient, but it rarely happens, and due to the adaptive nature of the capacity, it
29//! will only happen a few times when the map has just been initialized.
30//!
31//! ## Collision resolution
32//!
33//! When two hashes collide, they cannot share the same bucket, so there must be an algorithm which
34//! can resolve collisions. In our case, we use linear probing, which means that we take the bucket
35//! following it, and repeat until we find a free bucket.
36//!
37//! This method is far from ideal, but superior methods like Robin-Hood hashing works poorly (if at
38//! all) in a concurrent structure.
39//!
40//! # The API
41//!
42//! The API should feel very familiar, if you are used to the libstd hash map implementation. They
43//! share many of the methods, and I've carefully made sure that all the items, which have similarly
44//! named items in libstd, matches in semantics and behavior.
45
46extern crate owning_ref;
47extern crate parking_lot;
48extern crate serde;
49
50#[cfg(test)]
51mod tests;
52
53use owning_ref::{OwningHandle, OwningRef};
54use parking_lot::{RwLock, RwLockReadGuard, RwLockWriteGuard};
55
56use serde::{
57    de::MapAccess, de::Visitor, ser::SerializeMap, Deserialize, Deserializer, Serialize, Serializer,
58};
59
60use std::marker::PhantomData;
61
62use std::borrow::Borrow;
63use std::collections::hash_map::RandomState;
64use std::hash::{BuildHasher, Hash, Hasher};
65use std::sync::atomic::{self, AtomicUsize};
66use std::{cmp, fmt, iter, mem, ops};
67
68/// The atomic ordering used throughout the code.
69const ORDERING: atomic::Ordering = atomic::Ordering::Relaxed;
70/// The length-to-capacity factor.
71const LENGTH_MULTIPLIER: usize = 4;
72/// The maximal load factor's numerator.
73const MAX_LOAD_FACTOR_NUM: usize = 100 - 15;
74/// The maximal load factor's denominator.
75const MAX_LOAD_FACTOR_DENOM: usize = 100;
76/// The default initial capacity.
77const DEFAULT_INITIAL_CAPACITY: usize = 64;
78/// The lowest capacity a table can have.
79const MINIMUM_CAPACITY: usize = 8;
80
81/// A bucket state.
82///
83/// Buckets are the bricks of hash tables. They represent a single entry into the table.
84#[derive(Clone)]
85enum Bucket<K, V> {
86    /// The bucket contains a key-value pair.
87    Contains(K, V),
88    /// The bucket is empty and has never been used.
89    ///
90    /// Since hash collisions are resolved by jumping to the next bucket, some buckets can cluster
91    /// together, meaning that they are potential candidates for lookups. Empty buckets can be seen
92    /// as the delimiter of such cluters.
93    Empty,
94    /// The bucket was removed.
95    ///
96    /// The technique of distincting between "empty" and "removed" was first described by Knuth.
97    /// The idea is that when you search for a key, you will probe over these buckets, since the
98    /// key could have been pushed behind the removed element:
99    ///```notest
100    ///     Contains(k1, v1) // hash = h
101    ///     Removed
102    ///     Contains(k2, v2) // hash = h
103    ///```
104    /// If we stopped at `Removed`, we won't be able to find the second KV pair. So `Removed` is
105    /// semantically different from `Empty`, as the search won't stop.
106    ///
107    /// However, we are still able to insert new pairs at the removed buckets.
108    Removed,
109}
110
111impl<K, V> Bucket<K, V> {
112    /// Is this bucket 'empty'?
113    fn is_empty(&self) -> bool {
114        if let Bucket::Empty = *self {
115            true
116        } else {
117            false
118        }
119    }
120
121    /// Is this bucket 'removed'?
122    fn is_removed(&self) -> bool {
123        if let Bucket::Removed = *self {
124            true
125        } else {
126            false
127        }
128    }
129
130    /// Is this bucket free?
131    ///
132    /// "Free" means that it can safely be replace by another bucket — namely that the bucket is
133    /// not occupied.
134    fn is_free(&self) -> bool {
135        match *self {
136            // The two replacable bucket types are removed buckets and empty buckets.
137            Bucket::Removed | Bucket::Empty => true,
138            // KV pairs can't be replaced as they contain data.
139            Bucket::Contains(..) => false,
140        }
141    }
142
143    /// Get the value (if any) of this bucket.
144    ///
145    /// This gets the value of the KV pair, if any. If the bucket is not a KV pair, `None` is
146    /// returned.
147    fn value(self) -> Option<V> {
148        if let Bucket::Contains(_, val) = self {
149            Some(val)
150        } else {
151            None
152        }
153    }
154
155    /// Get a reference to the value of the bucket (if any).
156    ///
157    /// This returns a reference to the value of the bucket, if it is a KV pair. If not, it will
158    /// return `None`.
159    ///
160    /// Rather than `Option`, it returns a `Result`, in order to make it easier to work with the
161    /// `owning_ref` crate (`try_new` and `try_map` of `OwningHandle` and `OwningRef`
162    /// respectively).
163    fn value_ref(&self) -> Result<&V, ()> {
164        if let Bucket::Contains(_, ref val) = *self {
165            Ok(val)
166        } else {
167            Err(())
168        }
169    }
170
171    /// Does the bucket match a given key?
172    ///
173    /// This returns `true` if the bucket is a KV pair with key `key`. If not, `false` is returned.
174    fn key_matches(&self, key: &K) -> bool
175    where
176        K: PartialEq,
177    {
178        if let Bucket::Contains(ref candidate_key, _) = *self {
179            // Check if the keys matches.
180            candidate_key == key
181        } else {
182            // The bucket isn't a KV pair, so we'll return false, since there is no key to test
183            // against.
184            false
185        }
186    }
187}
188
189/// The low-level representation of the hash table.
190///
191/// This is different from `CHashMap` in two ways:
192///
193/// 1. It is not wrapped in a lock, meaning that resizing and reallocation is not possible.
194/// 2. It does not track the number of occupied buckets, making it expensive to obtain the load
195///    factor.
196struct Table<K, V, S> {
197    /// The hash function builder.
198    ///
199    /// When a `Table` use the default hash builder, it randomly picks a hash function from
200    /// some family of functions in libstd. This effectively eliminates the issue of hash flooding.
201    hash_builder: S,
202    /// The bucket array.
203    ///
204    /// This vector stores the buckets. The order in which they're stored is far from arbitrary: A
205    /// KV pair `(key, val)`'s first priority location is at `self.hash(&key) % len`. If not
206    /// possible, the next bucket is used, and this process repeats until the bucket is free (or
207    /// the end is reached, in which we simply wrap around).
208    buckets: Vec<RwLock<Bucket<K, V>>>,
209}
210
211impl<K, V> Table<K, V, RandomState> {
212    /// Create a table with a certain number of buckets.
213    fn new(buckets: usize) -> Self {
214        // TODO: For some obscure reason `RwLock` doesn't implement `Clone`.
215
216        // Fill a vector with `buckets` of `Empty` buckets.
217        let mut vec = Vec::with_capacity(buckets);
218        for _ in 0..buckets {
219            vec.push(RwLock::new(Bucket::Empty));
220        }
221
222        Table {
223            // Generate a hash function.
224            hash_builder: RandomState::new(),
225            buckets: vec,
226        }
227    }
228
229    /// Create a table with at least some capacity.
230    fn with_capacity(cap: usize) -> Self {
231        // The + 1 is needed to avoid losing fractional bucket to integer division.
232        Table::new(cmp::max(
233            MINIMUM_CAPACITY,
234            cap * MAX_LOAD_FACTOR_DENOM / MAX_LOAD_FACTOR_NUM + 1,
235        ))
236    }
237}
238
239impl<K, V, S: BuildHasher> Table<K, V, S> {
240    /// Create a `Table` with the `BuildHasher`
241    fn with_hasher(buckets: usize, hash_builder: S) -> Self {
242        // TODO: For some obscure reason `RwLock` doesn't implement `Clone`.
243
244        // Fill a vector with `buckets` of `Empty` buckets.
245        let mut vec = Vec::with_capacity(buckets);
246        for _ in 0..buckets {
247            vec.push(RwLock::new(Bucket::Empty));
248        }
249
250        Table {
251            hash_builder,
252            buckets: vec,
253        }
254    }
255
256    /// Create a `Table` with a specific capacity and the `BuildHasher`
257    fn with_capacity_and_hasher(cap: usize, hash_builder: S) -> Self {
258        // The + 1 is needed to avoid losing fractional bucket to integer division.
259        Table::with_hasher(
260            cmp::max(
261                MINIMUM_CAPACITY,
262                cap * MAX_LOAD_FACTOR_DENOM / MAX_LOAD_FACTOR_NUM + 1,
263            ),
264            hash_builder,
265        )
266    }
267}
268
269impl<K: PartialEq + Hash, V, S: BuildHasher> Table<K, V, S> {
270    /// Hash some key through the internal hash function.
271    fn hash<T: ?Sized>(&self, key: &T) -> usize
272    where
273        T: Hash,
274    {
275        // Build the initial hash function state.
276        let mut hasher = self.hash_builder.build_hasher();
277        // Hash the key.
278        key.hash(&mut hasher);
279        // Cast to `usize`. Since the hash function returns `u64`, this cast won't ever cause
280        // entropy less than the output space.
281        hasher.finish() as usize
282    }
283
284    /// Scan from the first priority of a key until a match is found.
285    ///
286    /// This scans from the first priority of `key` (as defined by its hash), until a match is
287    /// found (will wrap on end), i.e. `matches` returns `true` with the bucket as argument.
288    ///
289    /// The read guard from the RW-lock of the bucket is returned.
290    fn scan<F, Q: ?Sized>(&self, key: &Q, matches: F) -> RwLockReadGuard<Bucket<K, V>>
291    where
292        F: Fn(&Bucket<K, V>) -> bool,
293        K: Borrow<Q>,
294        Q: Hash,
295    {
296        // Hash the key.
297        let hash = self.hash(key);
298
299        // Start at the first priority bucket, and then move upwards, searching for the matching
300        // bucket.
301        for i in 0..self.buckets.len() {
302            // Get the lock of the `i`'th bucket after the first priority bucket (wrap on end).
303            let lock = self.buckets[(hash + i) % self.buckets.len()].read();
304
305            // Check if it is a match.
306            if matches(&lock) {
307                // Yup. Return.
308                return lock;
309            }
310        }
311        panic!("`CHashMap` scan failed! No entry found.");
312    }
313
314    /// Scan from the first priority of a key until a match is found (mutable guard).
315    ///
316    /// This is similar to `scan`, but instead of an immutable lock guard, a mutable lock guard is
317    /// returned.
318    fn scan_mut<F, Q: ?Sized>(&self, key: &Q, matches: F) -> RwLockWriteGuard<Bucket<K, V>>
319    where
320        F: Fn(&Bucket<K, V>) -> bool,
321        K: Borrow<Q>,
322        Q: Hash,
323    {
324        // Hash the key.
325        let hash = self.hash(key);
326
327        // Start at the first priority bucket, and then move upwards, searching for the matching
328        // bucket.
329        for i in 0..self.buckets.len() {
330            // Get the lock of the `i`'th bucket after the first priority bucket (wrap on end).
331            let lock = self.buckets[(hash + i) % self.buckets.len()].write();
332
333            // Check if it is a match.
334            if matches(&lock) {
335                // Yup. Return.
336                return lock;
337            }
338        }
339        panic!("`CHashMap` scan_mut failed! No entry found.");
340    }
341
342    /// Scan from the first priority of a key until a match is found (bypass locks).
343    ///
344    /// This is similar to `scan_mut`, but it safely bypasses the locks by making use of the
345    /// aliasing invariants of `&mut`.
346    fn scan_mut_no_lock<F>(&mut self, key: &K, matches: F) -> &mut Bucket<K, V>
347    where
348        F: Fn(&Bucket<K, V>) -> bool,
349    {
350        // Hash the key.
351        let hash = self.hash(key);
352        // TODO: To tame the borrowchecker, we fetch this in advance.
353        let len = self.buckets.len();
354
355        // Start at the first priority bucket, and then move upwards, searching for the matching
356        // bucket.
357        for i in 0..self.buckets.len() {
358            // TODO: hacky hacky
359            let idx = (hash + i) % len;
360
361            // Get the lock of the `i`'th bucket after the first priority bucket (wrap on end).
362
363            // Check if it is a match.
364            if {
365                let bucket = self.buckets[idx].get_mut();
366                matches(&bucket)
367            } {
368                // Yup. Return.
369                return self.buckets[idx].get_mut();
370            }
371        }
372        panic!("`CHashMap` scan_mut_no_lock failed! No entry found.");
373    }
374
375    /// Find a bucket with some key, or a free bucket in same cluster.
376    ///
377    /// This scans for buckets with key `key`. If one is found, it will be returned. If none are
378    /// found, it will return a free bucket in the same cluster.
379    fn lookup_or_free(&self, key: &K) -> RwLockWriteGuard<Bucket<K, V>> {
380        // Hash the key.
381        let hash = self.hash(key);
382        // The encountered free bucket.
383        let mut free = None;
384
385        // Start at the first priority bucket, and then move upwards, searching for the matching
386        // bucket.
387        for i in 0..self.buckets.len() {
388            // Get the lock of the `i`'th bucket after the first priority bucket (wrap on end).
389            let lock = self.buckets[(hash + i) % self.buckets.len()].write();
390
391            if lock.key_matches(key) {
392                // We found a match.
393                return lock;
394            } else if lock.is_empty() {
395                // The cluster is over. Use the encountered free bucket, if any.
396                return free.unwrap_or(lock);
397            } else if lock.is_removed() && free.is_none() {
398                // We found a free bucket, so we can store it to later (if we don't already have
399                // one).
400                free = Some(lock)
401            }
402        }
403
404        free.expect("No free buckets found")
405    }
406
407    /// Lookup some key.
408    ///
409    /// This searches some key `key`, and returns a immutable lock guard to its bucket. If the key
410    /// couldn't be found, the returned value will be an `Empty` cluster.
411    fn lookup<Q: ?Sized>(&self, key: &Q) -> RwLockReadGuard<Bucket<K, V>>
412    where
413        K: Borrow<Q>,
414        Q: PartialEq + Hash,
415    {
416        self.scan(key, |x| match *x {
417            // We'll check that the keys does indeed match, as the chance of hash collisions
418            // happening is inevitable
419            Bucket::Contains(ref candidate_key, _) if key.eq(candidate_key.borrow()) => true,
420            // We reached an empty bucket, meaning that there are no more buckets, not even removed
421            // ones, to search.
422            Bucket::Empty => true,
423            _ => false,
424        })
425    }
426
427    /// Lookup some key, mutably.
428    ///
429    /// This is similar to `lookup`, but it returns a mutable guard.
430    ///
431    /// Replacing at this bucket is safe as the bucket will be in the same cluster of buckets as
432    /// the first priority cluster.
433    fn lookup_mut<Q: ?Sized>(&self, key: &Q) -> RwLockWriteGuard<Bucket<K, V>>
434    where
435        K: Borrow<Q>,
436        Q: PartialEq + Hash,
437    {
438        self.scan_mut(key, |x| match *x {
439            // We'll check that the keys does indeed match, as the chance of hash collisions
440            // happening is inevitable
441            Bucket::Contains(ref candidate_key, _) if key.eq(candidate_key.borrow()) => true,
442            // We reached an empty bucket, meaning that there are no more buckets, not even removed
443            // ones, to search.
444            Bucket::Empty => true,
445            _ => false,
446        })
447    }
448
449    /// Find a free bucket in the same cluster as some key.
450    ///
451    /// This means that the returned lock guard defines a valid, free bucket, where `key` can be
452    /// inserted.
453    fn find_free(&self, key: &K) -> RwLockWriteGuard<Bucket<K, V>> {
454        self.scan_mut(key, |x| x.is_free())
455    }
456
457    /// Find a free bucket in the same cluster as some key (bypassing locks).
458    ///
459    /// This is similar to `find_free`, except that it safely bypasses locks through the aliasing
460    /// guarantees of `&mut`.
461    fn find_free_no_lock(&mut self, key: &K) -> &mut Bucket<K, V> {
462        self.scan_mut_no_lock(key, |x| x.is_free())
463    }
464
465    /// Fill the table with data from another table.
466    ///
467    /// This is used to efficiently copy the data of `table` into `self`.
468    ///
469    /// # Important
470    ///
471    /// The table should be empty for this to work correctly/logically.
472    fn fill(&mut self, table: Self) {
473        // Run over all the buckets.
474        for i in table.buckets {
475            // We'll only transfer the bucket if it is a KV pair.
476            if let Bucket::Contains(key, val) = i.into_inner() {
477                // Find a bucket where the KV pair can be inserted.
478                let bucket = self.scan_mut_no_lock(&key, |x| match *x {
479                    // Halt on an empty bucket.
480                    Bucket::Empty => true,
481                    // We'll assume that the rest of the buckets either contains other KV pairs (in
482                    // particular, no buckets have been removed in the newly construct table).
483                    _ => false,
484                });
485
486                // Set the bucket to the KV pair.
487                *bucket = Bucket::Contains(key, val);
488            }
489        }
490    }
491}
492
493impl<K: Clone, V: Clone, S: Clone> Clone for Table<K, V, S> {
494    fn clone(&self) -> Self {
495        Table {
496            // Since we copy plainly without rehashing etc., it is important that we keep the same
497            // hash function.
498            hash_builder: self.hash_builder.clone(),
499            // Lock and clone every bucket individually.
500            buckets: self
501                .buckets
502                .iter()
503                .map(|x| RwLock::new(x.read().clone()))
504                .collect(),
505        }
506    }
507}
508
509impl<K: fmt::Debug, V: fmt::Debug, S> fmt::Debug for Table<K, V, S> {
510    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
511        // create a debug map and fill with entries
512        let mut map = f.debug_map();
513        // We'll just run over all buckets and output one after one.
514        for i in &self.buckets {
515            // Acquire the lock.
516            let lock = i.read();
517            // Check if the bucket actually contains anything.
518            if let Bucket::Contains(ref key, ref val) = *lock {
519                // add this entry to the map
520                map.entry(key, val);
521            }
522        }
523        map.finish()
524    }
525}
526
527/// An iterator over the entries of some table.
528pub struct IntoIter<K, V, S> {
529    /// The inner table.
530    table: Table<K, V, S>,
531}
532
533impl<K, V, S> Iterator for IntoIter<K, V, S> {
534    type Item = (K, V);
535
536    fn next(&mut self) -> Option<(K, V)> {
537        // We own the table, and can thus do what we want with it. We'll simply pop from the
538        // buckets until we find a bucket containing data.
539        while let Some(bucket) = self.table.buckets.pop() {
540            // We can bypass dem ebil locks.
541            if let Bucket::Contains(key, val) = bucket.into_inner() {
542                // The bucket contained data, so we'll return the pair.
543                return Some((key, val));
544            }
545        }
546
547        // We've exhausted all the buckets, and no more data could be found.
548        None
549    }
550}
551
552impl<K, V, S> IntoIterator for Table<K, V, S> {
553    type Item = (K, V);
554    type IntoIter = IntoIter<K, V, S>;
555
556    fn into_iter(self) -> Self::IntoIter {
557        IntoIter { table: self }
558    }
559}
560
561/// A RAII guard for reading an entry of a hash map.
562///
563/// This is an access type dereferencing to the inner value of the entry. It will handle unlocking
564/// on drop.
565pub struct ReadGuard<'a, K: 'a, V: 'a, S> {
566    /// The inner hecking long type.
567    inner: OwningRef<
568        OwningHandle<RwLockReadGuard<'a, Table<K, V, S>>, RwLockReadGuard<'a, Bucket<K, V>>>,
569        V,
570    >,
571}
572
573impl<'a, K, V, S> ops::Deref for ReadGuard<'a, K, V, S> {
574    type Target = V;
575
576    fn deref(&self) -> &V {
577        &self.inner
578    }
579}
580
581impl<'a, K, V: PartialEq, S> cmp::PartialEq for ReadGuard<'a, K, V, S> {
582    fn eq(&self, other: &ReadGuard<'a, K, V, S>) -> bool {
583        self == other
584    }
585}
586impl<'a, K, V: Eq, S> cmp::Eq for ReadGuard<'a, K, V, S> {}
587
588impl<'a, K: fmt::Debug, V: fmt::Debug, S> fmt::Debug for ReadGuard<'a, K, V, S> {
589    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
590        write!(f, "ReadGuard({:?})", &**self)
591    }
592}
593
594/// A mutable RAII guard for reading an entry of a hash map.
595///
596/// This is an access type dereferencing to the inner value of the entry. It will handle unlocking
597/// on drop.
598pub struct WriteGuard<'a, K: 'a, V: 'a, S> {
599    /// The inner hecking long type.
600    inner: OwningHandle<
601        OwningHandle<RwLockReadGuard<'a, Table<K, V, S>>, RwLockWriteGuard<'a, Bucket<K, V>>>,
602        &'a mut V,
603    >,
604}
605
606impl<'a, K, V, S> ops::Deref for WriteGuard<'a, K, V, S> {
607    type Target = V;
608
609    fn deref(&self) -> &V {
610        &self.inner
611    }
612}
613
614impl<'a, K, V, S> ops::DerefMut for WriteGuard<'a, K, V, S> {
615    fn deref_mut(&mut self) -> &mut V {
616        &mut self.inner
617    }
618}
619
620impl<'a, K, V: PartialEq, S> cmp::PartialEq for WriteGuard<'a, K, V, S> {
621    fn eq(&self, other: &Self) -> bool {
622        self == other
623    }
624}
625impl<'a, K, V: Eq, S> cmp::Eq for WriteGuard<'a, K, V, S> {}
626
627impl<'a, K: fmt::Debug, V: fmt::Debug, S> fmt::Debug for WriteGuard<'a, K, V, S> {
628    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
629        write!(f, "WriteGuard({:?})", &**self)
630    }
631}
632
633/// A concurrent hash map.
634///
635/// This type defines a concurrent associative array, based on hash tables with linear probing and
636/// dynamic resizing.
637///
638/// The idea is to let each entry hold a multi-reader lock, effectively limiting lock contentions
639/// to writing simultaneously on the same entry, and resizing the table.
640///
641/// It is not an atomic or lockless hash table, since such construction is only useful in very few
642/// cases, due to limitations on in-place operations on values.
643pub struct CHashMap<K, V, S = RandomState> {
644    /// The inner table.
645    table: RwLock<Table<K, V, S>>,
646    /// The total number of KV pairs in the table.
647    ///
648    /// This is used to calculate the load factor.
649    len: AtomicUsize,
650}
651
652impl<K, V> CHashMap<K, V, RandomState> {
653    /// Create a new hash map with a certain capacity.
654    ///
655    /// "Capacity" means the amount of entries the hash map can hold before reallocating. This
656    /// function allocates a hash map with at least the capacity of `cap`.
657    pub fn with_capacity(cap: usize) -> Self {
658        CHashMap {
659            // Start at 0 KV pairs.
660            len: AtomicUsize::new(0),
661            // Make a new empty table. We will make sure that it is at least one.
662            table: RwLock::new(Table::with_capacity(cap)),
663        }
664    }
665
666    /// Create a new hash map.
667    ///
668    /// This creates a new hash map with some fixed initial capacity.
669    pub fn new() -> Self {
670        CHashMap::with_capacity(DEFAULT_INITIAL_CAPACITY)
671    }
672}
673
674impl<K, V, S> CHashMap<K, V, S> {
675    /// Get the number of entries in the hash table.
676    ///
677    /// This is entirely atomic, and will not acquire any locks.
678    ///
679    /// This is guaranteed to reflect the number of entries _at this particular moment.
680    pub fn len(&self) -> usize {
681        self.len.load(ORDERING)
682    }
683
684    /// Get the capacity of the hash table.
685    ///
686    /// The capacity is equal to the number of entries the table can hold before reallocating.
687    pub fn capacity(&self) -> usize {
688        cmp::max(MINIMUM_CAPACITY, self.buckets()) * MAX_LOAD_FACTOR_NUM / MAX_LOAD_FACTOR_DENOM
689    }
690
691    /// Get the number of buckets of the hash table.
692    ///
693    /// "Buckets" refers to the amount of potential entries in the inner table. It is different
694    /// from capacity, in the sense that the map cannot hold this number of entries, since it needs
695    /// to keep the load factor low.
696    pub fn buckets(&self) -> usize {
697        self.table.read().buckets.len()
698    }
699
700    /// Is the hash table empty?
701    pub fn is_empty(&self) -> bool {
702        self.len() == 0
703    }
704
705    /// Deprecated. Do not use.
706    #[deprecated]
707    pub fn filter<F>(&self, predicate: F)
708    where
709        F: Fn(&K, &V) -> bool,
710    {
711        // Following the naming conventions of the standard library...
712        self.retain(predicate)
713    }
714
715    /// Filter the map based on some predicate.
716    ///
717    /// This tests every entry in the hash map by closure `predicate`. If it returns `true`, the
718    /// map will retain the entry. If not, the entry will be removed.
719    ///
720    /// This won't lock the table. This can be a major performance trade-off, as it means that it
721    /// must lock on every table entry. However, it won't block other operations of the table,
722    /// while filtering.
723    pub fn retain<F>(&self, predicate: F)
724    where
725        F: Fn(&K, &V) -> bool,
726    {
727        // Acquire the read lock to the table.
728        let table = self.table.read();
729        // Run over every bucket and apply the filter.
730        for bucket in &table.buckets {
731            // Acquire the read lock, which we will upgrade if necessary.
732            // TODO: Use read lock and upgrade later.
733            let mut lock = bucket.write();
734            // Skip the free buckets.
735            // TODO: Fold the `if` into the `match` when the borrowck gets smarter.
736            if match *lock {
737                Bucket::Contains(ref key, ref val) => !predicate(key, val),
738                _ => false,
739            } {
740                // Predicate didn't match. Set the bucket to removed.
741                *lock = Bucket::Removed;
742                // Decrement the length to account for the removed bucket.
743                // TODO: Can we somehow bundle these up to reduce the overhead of atomic
744                //       operations? Storing in a local variable and then subtracting causes
745                //       issues with consistency.
746                self.len.fetch_sub(1, ORDERING);
747            }
748        }
749    }
750}
751
752impl<K, V, S: Default + BuildHasher> CHashMap<K, V, S> {
753    /// Creates an empty `CHashMap` with the specified capacity, using `hash_builder`
754    /// to hash the keys.
755    ///
756    /// The hash map will be able to hold at least `capacity` elements without
757    /// reallocating. If `capacity` is 0, the hash map will not allocate.
758    ///
759    /// Warning: `hash_builder` is normally randomly generated, and
760    /// is designed to allow HashMaps to be resistant to attacks that
761    /// cause many collisions and very poor performance. Setting it
762    /// manually using this function can expose a DoS attack vector.
763    ///
764    pub fn with_hasher_and_capacity(cap: usize, hash_builder: S) -> Self {
765        CHashMap {
766            // Start at 0 KV pairs.
767            len: AtomicUsize::new(0),
768            // Make a new empty table. We will make sure that it is at least one.
769            table: RwLock::new(Table::with_capacity_and_hasher(cap, hash_builder)),
770        }
771    }
772
773    /// Creates an empty `CHashMap` which will use the given hash builder to hash
774    /// keys.
775    ///
776    /// The created map has the default initial capacity.
777    ///
778    /// Warning: `hash_builder` is normally randomly generated, and
779    /// is designed to allow HashMaps to be resistant to attacks that
780    /// cause many collisions and very poor performance. Setting it
781    /// manually using this function can expose a DoS attack vector.
782    pub fn with_hasher(hash_builder: S) -> Self {
783        CHashMap::with_hasher_and_capacity(DEFAULT_INITIAL_CAPACITY, hash_builder)
784    }
785}
786
787impl<K, V, S> CHashMap<K, V, S>
788where
789    S: Default + BuildHasher,
790{
791    /// Clear the map.
792    ///
793    /// This clears the hash map and returns the previous version of the map.
794    ///
795    /// It is relatively efficient, although it needs to write lock a RW lock.
796    pub fn clear(&self) -> CHashMap<K, V, S> {
797        // Acquire a writable lock.
798        let mut lock = self.table.write();
799        CHashMap {
800            // Replace the old table with an empty initial table.
801            table: RwLock::new(mem::replace(
802                &mut *lock,
803                Table::with_hasher(DEFAULT_INITIAL_CAPACITY, S::default()),
804            )),
805            // Replace the length with 0 and use the old length.
806            len: AtomicUsize::new(self.len.swap(0, ORDERING)),
807        }
808    }
809}
810
811impl<K: PartialEq + Hash, V, S: BuildHasher> CHashMap<K, V, S> {
812    /// Get the value of some key.
813    ///
814    /// This will lookup the entry of some key `key`, and acquire the read-only lock. This means
815    /// that all other parties are blocked from _writing_ (not reading) this value while the guard
816    /// is held.
817    pub fn get<Q: ?Sized>(&self, key: &Q) -> Option<ReadGuard<K, V, S>>
818    where
819        K: Borrow<Q>,
820        Q: Hash + PartialEq,
821    {
822        // Acquire the read lock and lookup in the table.
823        if let Ok(inner) = OwningRef::new(OwningHandle::new_with_fn(self.table.read(), |x| {
824            unsafe { &*x }.lookup(key)
825        }))
826        .try_map(|x| x.value_ref())
827        {
828            // The bucket contains data.
829            Some(ReadGuard { inner: inner })
830        } else {
831            // The bucket is empty/removed.
832            None
833        }
834    }
835
836    /// Get the (mutable) value of some key.
837    ///
838    /// This will lookup the entry of some key `key`, and acquire the writable lock. This means
839    /// that all other parties are blocked from both reading and writing this value while the guard
840    /// is held.
841    pub fn get_mut<Q: ?Sized>(&self, key: &Q) -> Option<WriteGuard<K, V, S>>
842    where
843        K: Borrow<Q>,
844        Q: Hash + PartialEq,
845    {
846        // Acquire the write lock and lookup in the table.
847        if let Ok(inner) = OwningHandle::try_new(
848            OwningHandle::new_with_fn(self.table.read(), |x| unsafe { &*x }.lookup_mut(key)),
849            |x| {
850                if let &mut Bucket::Contains(_, ref mut val) =
851                    unsafe { &mut *(x as *mut Bucket<K, V>) }
852                {
853                    // The bucket contains data.
854                    Ok(val)
855                } else {
856                    // The bucket is empty/removed.
857                    Err(())
858                }
859            },
860        ) {
861            Some(WriteGuard { inner: inner })
862        } else {
863            None
864        }
865    }
866
867    /// Does the hash map contain this key?
868    pub fn contains_key<Q: ?Sized>(&self, key: &Q) -> bool
869    where
870        K: Borrow<Q>,
871        Q: Hash + PartialEq,
872    {
873        // Acquire the lock.
874        let lock = self.table.read();
875        // Look the key up in the table
876        let bucket = lock.lookup(key);
877        // Test if it is free or not.
878        !bucket.is_free()
879    }
880}
881
882impl<K, V, S> CHashMap<K, V, S>
883where
884    K: PartialEq + Hash,
885    S: BuildHasher + Default,
886{
887    /// Insert a **new** entry.
888    ///
889    /// This inserts an entry, which the map does not already contain, into the table. If the entry
890    /// exists, the old entry won't be replaced, nor will an error be returned. It will possibly
891    /// introduce silent bugs.
892    ///
893    /// To be more specific, it assumes that the entry does not already exist, and will simply skip
894    /// to the end of the cluster, even if it does exist.
895    ///
896    /// This is faster than e.g. `insert`, but should only be used, if you know that the entry
897    /// doesn't already exist.
898    ///
899    /// # Warning
900    ///
901    /// Only use this, if you know what you're doing. This can easily introduce very complex logic
902    /// errors.
903    ///
904    /// For most other purposes, use `insert`.
905    ///
906    /// # Panics
907    ///
908    /// This might perform checks in debug mode testing if the key exists already.
909    pub fn insert_new(&self, key: K, val: V) {
910        debug_assert!(
911            !self.contains_key(&key),
912            "Hash table contains already key, contrary to \
913                      the assumptions about `insert_new`'s arguments."
914        );
915
916        // Expand and lock the table. We need to expand to ensure the bounds on the load factor.
917        let lock = self.table.read();
918        {
919            // Find the free bucket.
920            let mut bucket = lock.find_free(&key);
921
922            // Set the bucket to the new KV pair.
923            *bucket = Bucket::Contains(key, val);
924        }
925        // Expand the table (we know beforehand that the entry didn't already exist).
926        self.expand(lock);
927    }
928
929    /// Replace an existing entry, or insert a new one.
930    ///
931    /// This will replace an existing entry and return the old entry, if any. If no entry exists,
932    /// it will simply insert the new entry and return `None`.
933    pub fn insert(&self, key: K, val: V) -> Option<V> {
934        let ret;
935        // Expand and lock the table. We need to expand to ensure the bounds on the load factor.
936        let lock = self.table.read();
937        {
938            // Lookup the key or a free bucket in the inner table.
939            let mut bucket = lock.lookup_or_free(&key);
940
941            // Replace the bucket.
942            ret = mem::replace(&mut *bucket, Bucket::Contains(key, val)).value();
943        }
944
945        // Expand the table if no bucket was overwritten (i.e. the entry is fresh).
946        if ret.is_none() {
947            self.expand(lock);
948        }
949
950        ret
951    }
952
953    /// Insert or update.
954    ///
955    /// This looks up `key`. If it exists, the reference to its value is passed through closure
956    /// `update`.  If it doesn't exist, the result of closure `insert` is inserted.
957    pub fn upsert<F, G>(&self, key: K, insert: F, update: G)
958    where
959        F: FnOnce() -> V,
960        G: FnOnce(&mut V),
961    {
962        // Expand and lock the table. We need to expand to ensure the bounds on the load factor.
963        let lock = self.table.read();
964        {
965            // Lookup the key or a free bucket in the inner table.
966            let mut bucket = lock.lookup_or_free(&key);
967
968            match *bucket {
969                // The bucket had KV pair!
970                Bucket::Contains(_, ref mut val) => {
971                    // Run it through the closure.
972                    update(val);
973                    // TODO: We return to stop the borrowck to yell at us. This prevents the control flow
974                    //       from reaching the expansion after the match if it has been right here.
975                    return;
976                }
977                // The bucket was empty, simply insert.
978                ref mut x => *x = Bucket::Contains(key, insert()),
979            }
980        }
981
982        // Expand the table (this will only happen if the function haven't returned yet).
983        self.expand(lock);
984    }
985
986    /// Map or insert an entry.
987    ///
988    /// This sets the value associated with key `key` to `f(Some(old_val))` (if it returns `None`,
989    /// the entry is removed) if it exists. If it does not exist, it inserts it with value
990    /// `f(None)`, unless the closure returns `None`.
991    ///
992    /// Note that if `f` returns `None`, the entry of key `key` is removed unconditionally.
993    pub fn alter<F>(&self, key: K, f: F)
994    where
995        F: FnOnce(Option<V>) -> Option<V>,
996    {
997        // Expand and lock the table. We need to expand to ensure the bounds on the load factor.
998        let lock = self.table.read();
999        {
1000            // Lookup the key or a free bucket in the inner table.
1001            let mut bucket = lock.lookup_or_free(&key);
1002
1003            match mem::replace(&mut *bucket, Bucket::Removed) {
1004                Bucket::Contains(_, val) => {
1005                    if let Some(new_val) = f(Some(val)) {
1006                        // Set the bucket to a KV pair with the new value.
1007                        *bucket = Bucket::Contains(key, new_val);
1008                        // No extension required, as the bucket already had a KV pair previously.
1009                        return;
1010                    } else {
1011                        // The old entry was removed, so we decrement the length of the map.
1012                        self.len.fetch_sub(1, ORDERING);
1013                        // TODO: We return as a hack to avoid the borrowchecker from thinking we moved a
1014                        //       referenced object. Namely, under this match arm the expansion after the match
1015                        //       statement won't ever be reached.
1016                        return;
1017                    }
1018                }
1019                _ => {
1020                    if let Some(new_val) = f(None) {
1021                        // The previously free cluster will get a KV pair with the new value.
1022                        *bucket = Bucket::Contains(key, new_val);
1023                    } else {
1024                        return;
1025                    }
1026                }
1027            }
1028        }
1029
1030        // A new entry was inserted, so naturally, we expand the table.
1031        self.expand(lock);
1032    }
1033
1034    /// Remove an entry.
1035    ///
1036    /// This removes and returns the entry with key `key`. If no entry with said key exists, it
1037    /// will simply return `None`.
1038    pub fn remove<Q: ?Sized>(&self, key: &Q) -> Option<V>
1039    where
1040        K: Borrow<Q>,
1041        Q: PartialEq + Hash,
1042    {
1043        // Acquire the read lock of the table.
1044        let lock = self.table.read();
1045
1046        // Lookup the table, mutably.
1047        let mut bucket = lock.lookup_mut(&key);
1048        // Remove the bucket.
1049        match &mut *bucket {
1050            // There was nothing to remove.
1051            &mut Bucket::Removed | &mut Bucket::Empty => None,
1052            // TODO: We know that this is a `Bucket::Contains` variant, but to bypass borrowck
1053            //       madness, we do weird weird stuff.
1054            bucket => {
1055                // Decrement the length of the map.
1056                self.len.fetch_sub(1, ORDERING);
1057
1058                // Set the bucket to "removed" and return its value.
1059                mem::replace(bucket, Bucket::Removed).value()
1060            }
1061        }
1062    }
1063
1064    /// Reserve additional space.
1065    ///
1066    /// This reserves additional `additional` buckets to the table. Note that it might reserve more
1067    /// in order make reallocation less common.
1068    pub fn reserve(&self, additional: usize) {
1069        // Get the new length.
1070        let len = (self.len() + additional) * LENGTH_MULTIPLIER;
1071        // Acquire the write lock (needed because we'll mess with the table).
1072        let mut lock = self.table.write();
1073        // Handle the case where another thread has resized the table while we were acquiring the
1074        // lock.
1075        if lock.buckets.len() < len {
1076            // Swap the table out with a new table of desired size (multiplied by some factor).
1077            let table = mem::replace(
1078                &mut *lock,
1079                Table::with_capacity_and_hasher(len, S::default()),
1080            );
1081            // Fill the new table with the data from the old table.
1082            lock.fill(table);
1083        }
1084    }
1085
1086    /// Shrink the capacity of the map to reduce space usage.
1087    ///
1088    /// This will shrink the capacity of the map to the needed amount (plus some additional space
1089    /// to avoid reallocations), effectively reducing memory usage in cases where there is
1090    /// excessive space.
1091    ///
1092    /// It is healthy to run this once in a while, if the size of your hash map changes a lot (e.g.
1093    /// has a high maximum case).
1094    pub fn shrink_to_fit(&self) {
1095        // Acquire the write lock (needed because we'll mess with the table).
1096        let mut lock = self.table.write();
1097        // Swap the table out with a new table of desired size (multiplied by some factor).
1098        let table = mem::replace(
1099            &mut *lock,
1100            Table::with_capacity_and_hasher(self.len(), S::default()),
1101        );
1102        // Fill the new table with the data from the old table.
1103        lock.fill(table);
1104    }
1105
1106    /// Increment the size of the hash map and expand it so one more entry can fit in.
1107    ///
1108    /// This returns the read lock, such that the caller won't have to acquire it twice.
1109    fn expand(&self, lock: RwLockReadGuard<Table<K, V, S>>) {
1110        // Increment the length to take the new element into account.
1111        let len = self.len.fetch_add(1, ORDERING) + 1;
1112
1113        // Extend if necessary. We multiply by some constant to adjust our load factor.
1114        if len * MAX_LOAD_FACTOR_DENOM > lock.buckets.len() * MAX_LOAD_FACTOR_NUM {
1115            // Drop the read lock to avoid deadlocks when acquiring the write lock.
1116            drop(lock);
1117            // Reserve 1 entry in space (the function will handle the excessive space logic).
1118            self.reserve(1);
1119        }
1120    }
1121}
1122
1123impl<K, V, S> CHashMap<K, V, S>
1124where
1125    K: Clone,
1126{
1127    /// returns a vec of keys. Note that this does a clone operation.
1128    pub fn keys(&self) -> Vec<K> {
1129        let mut result = Vec::<K>::with_capacity(self.len());
1130        let lock = self.table.read();
1131        {
1132            for bucket in lock.buckets.iter() {
1133                {
1134                    if let Bucket::Contains(ref key, _) = *bucket.read() {
1135                        result.push(key.clone());
1136                    }
1137                }
1138            }
1139        }
1140        result
1141    }
1142}
1143
1144impl<K: Serialize, V: Serialize, S> Serialize for CHashMap<K, V, S> {
1145    fn serialize<T>(&self, serializer: T) -> Result<T::Ok, T::Error>
1146    where
1147        T: Serializer,
1148    {
1149        let lock = self.table.read();
1150        let mut map_serializer = serializer.serialize_map(Some(self.len()))?;
1151        {
1152            for bucket in lock.buckets.iter() {
1153                {
1154                    if let Bucket::Contains(ref key, ref value) = *bucket.read() {
1155                        map_serializer.serialize_entry(key, value)?;
1156                    }
1157                }
1158            }
1159        }
1160        map_serializer.end()
1161    }
1162}
1163
1164impl<'de, K: Deserialize<'de> + PartialEq + Hash, V: Deserialize<'de>, S> Deserialize<'de>
1165    for CHashMap<K, V, S>
1166where
1167    S: Default + BuildHasher,
1168{
1169    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
1170    where
1171        D: Deserializer<'de>,
1172    {
1173        deserializer.deserialize_map(ChashMapVisitor::new())
1174    }
1175}
1176
1177impl<K, V, S: Default + BuildHasher> Default for CHashMap<K, V, S> {
1178    fn default() -> Self {
1179        // Forward the call to `new`.
1180        CHashMap::with_hasher(S::default())
1181    }
1182}
1183
1184impl<K: Clone, V: Clone, S: Clone> Clone for CHashMap<K, V, S> {
1185    fn clone(&self) -> Self {
1186        CHashMap {
1187            table: RwLock::new(self.table.read().clone()),
1188            len: AtomicUsize::new(self.len.load(ORDERING)),
1189        }
1190    }
1191}
1192
1193impl<K: fmt::Debug, V: fmt::Debug, S> fmt::Debug for CHashMap<K, V, S> {
1194    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1195        (*self.table.read()).fmt(f)
1196    }
1197}
1198
1199impl<K, V, S> IntoIterator for CHashMap<K, V, S> {
1200    type Item = (K, V);
1201    type IntoIter = IntoIter<K, V, S>;
1202
1203    fn into_iter(self) -> IntoIter<K, V, S> {
1204        self.table.into_inner().into_iter()
1205    }
1206}
1207
1208impl<K: PartialEq + Hash, V, S: Default + BuildHasher> iter::FromIterator<(K, V)>
1209    for CHashMap<K, V, S>
1210{
1211    fn from_iter<I: IntoIterator<Item = (K, V)>>(iter: I) -> Self {
1212        // TODO: This step is required to obtain the length of the iterator. Eliminate it.
1213        let vec: Vec<_> = iter.into_iter().collect();
1214        let len = vec.len();
1215
1216        // Start with an empty table.
1217        let mut table = Table::with_capacity_and_hasher(len, S::default());
1218        // Fill the table with the pairs from the iterator.
1219        for (key, val) in vec {
1220            // Insert the KV pair. This is fine, as we are ensured that there are no duplicates in
1221            // the iterator.
1222            let bucket = table.find_free_no_lock(&key);
1223            *bucket = Bucket::Contains(key, val);
1224        }
1225
1226        CHashMap {
1227            table: RwLock::new(table),
1228            len: AtomicUsize::new(len),
1229        }
1230    }
1231}
1232
1233struct ChashMapVisitor<K, V, S = RandomState> {
1234    marker: PhantomData<fn() -> CHashMap<K, V, S>>,
1235}
1236
1237impl<K, V, S> ChashMapVisitor<K, V, S> {
1238    fn new() -> Self {
1239        ChashMapVisitor {
1240            marker: PhantomData,
1241        }
1242    }
1243}
1244
1245impl<'de, K, V, S> Visitor<'de> for ChashMapVisitor<K, V, S>
1246where
1247    K: Deserialize<'de> + PartialEq + Hash,
1248    V: Deserialize<'de>,
1249    S: Default + BuildHasher,
1250{
1251    type Value = CHashMap<K, V, S>;
1252
1253    fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
1254        formatter.write_str("CHashMap")
1255    }
1256
1257    fn visit_map<M>(self, mut access: M) -> Result<Self::Value, M::Error>
1258    where
1259        M: MapAccess<'de>,
1260    {
1261        let map = CHashMap::<K, V, S>::with_hasher_and_capacity(
1262            access.size_hint().unwrap_or(0),
1263            S::default(), // this should not be default but I don't know a way around it.
1264        );
1265
1266        while let Some((key, value)) = access.next_entry()? {
1267            map.insert(key, value);
1268        }
1269
1270        Ok(map)
1271    }
1272}
chashmap_serde/lib.rs

chashmap_serde/
lib.rs