chashmap_next/
lib.rs

1//! Concurrent hash maps.
2//!
3//! This crate implements concurrent hash maps, based on bucket-level multi-reader locks. It has
4//! excellent performance characteristics¹ and supports resizing, in-place mutation and more.
5//!
6//! The API derives directly from `std::collections::HashMap`, giving it a familiar feel.
7//!
8//! ¹Note that it heavily depends on the behavior of your program, but in most cases, it's really
9//!  good. In some (rare) cases you might want atomic hash maps instead.
10//!
11//! # How it works
12//!
13//! `chashmap` is not lockless, but it distributes locks across the map such that lock contentions
14//! (which is what could make accesses expensive) are very rare.
15//!
16//! Hash maps consists of so called "buckets", which each defines a potential entry in the table.
17//! The bucket of some key-value pair is determined by the hash of the key. By holding a read-write
18//! lock for each bucket, we ensure that you will generally be able to insert, read, modify, etc.
19//! with only one or two locking subroutines.
20//!
21//! There is a special-case: reallocation. When the table is filled up such that very few buckets
22//! are free (note that this is "very few" and not "no", since the load factor shouldn't get too
23//! high as it hurts performance), a global lock is obtained while rehashing the table. This is
24//! pretty inefficient, but it rarely happens, and due to the adaptive nature of the capacity, it
25//! will only happen a few times when the map has just been initialized.
26//!
27//! ## Collision resolution
28//!
29//! When two hashes collide, they cannot share the same bucket, so there must be an algorithm which
30//! can resolve collisions. In our case, we use linear probing, which means that we take the bucket
31//! following it, and repeat until we find a free bucket.
32//!
33//! This method is far from ideal, but superior methods like Robin-Hood hashing works poorly (if at
34//! all) in a concurrent structure.
35//!
36//! # The API
37//!
38//! The API should feel very familiar, if you are used to the libstd hash map implementation. They
39//! share many of the methods, and I've carefully made sure that all the items, which have similarly
40//! named items in libstd, matches in semantics and behavior.
41
42extern crate owning_ref;
43extern crate parking_lot;
44
45#[cfg(test)]
46mod tests;
47
48use owning_ref::{OwningHandle, OwningRef};
49use parking_lot::{RwLock, RwLockReadGuard, RwLockWriteGuard};
50use std::borrow::Borrow;
51use std::collections::hash_map::RandomState;
52use std::hash::{BuildHasher, Hash, Hasher};
53use std::sync::atomic::{self, AtomicUsize};
54use std::{cmp, fmt, iter, mem, ops};
55
56/// The atomic ordering used throughout the code.
57const ORDERING: atomic::Ordering = atomic::Ordering::Relaxed;
58/// The length-to-capacity factor.
59const LENGTH_MULTIPLIER: usize = 4;
60/// The maximal load factor's numerator.
61const MAX_LOAD_FACTOR_NUM: usize = 100 - 15;
62/// The maximal load factor's denominator.
63const MAX_LOAD_FACTOR_DENOM: usize = 100;
64/// The default initial capacity.
65const DEFAULT_INITIAL_CAPACITY: usize = 64;
66/// The lowest capacity a table can have.
67const MINIMUM_CAPACITY: usize = 8;
68
69/// A bucket state.
70///
71/// Buckets are the bricks of hash tables. They represent a single entry into the table.
72#[derive(Clone)]
73enum Bucket<K, V> {
74    /// The bucket contains a key-value pair.
75    Contains(K, V),
76    /// The bucket is empty and has never been used.
77    ///
78    /// Since hash collisions are resolved by jumping to the next bucket, some buckets can cluster
79    /// together, meaning that they are potential candidates for lookups. Empty buckets can be seen
80    /// as the delimiter of such cluters.
81    Empty,
82    /// The bucket was removed.
83    ///
84    /// The technique of distincting between "empty" and "removed" was first described by Knuth.
85    /// The idea is that when you search for a key, you will probe over these buckets, since the
86    /// key could have been pushed behind the removed element:
87    ///```notest
88    ///     Contains(k1, v1) // hash = h
89    ///     Removed
90    ///     Contains(k2, v2) // hash = h
91    ///```
92    /// If we stopped at `Removed`, we won't be able to find the second KV pair. So `Removed` is
93    /// semantically different from `Empty`, as the search won't stop.
94    ///
95    /// However, we are still able to insert new pairs at the removed buckets.
96    Removed,
97}
98
99impl<K, V> Bucket<K, V> {
100    /// Is this bucket 'empty'?
101    fn is_empty(&self) -> bool {
102        if let Bucket::Empty = *self {
103            true
104        } else {
105            false
106        }
107    }
108
109    /// Is this bucket 'removed'?
110    fn is_removed(&self) -> bool {
111        if let Bucket::Removed = *self {
112            true
113        } else {
114            false
115        }
116    }
117
118    /// Is this bucket free?
119    ///
120    /// "Free" means that it can safely be replace by another bucket — namely that the bucket is
121    /// not occupied.
122    fn is_free(&self) -> bool {
123        match *self {
124            // The two replacable bucket types are removed buckets and empty buckets.
125            Bucket::Removed | Bucket::Empty => true,
126            // KV pairs can't be replaced as they contain data.
127            Bucket::Contains(..) => false,
128        }
129    }
130
131    /// Get the value (if any) of this bucket.
132    ///
133    /// This gets the value of the KV pair, if any. If the bucket is not a KV pair, `None` is
134    /// returned.
135    fn value(self) -> Option<V> {
136        if let Bucket::Contains(_, val) = self {
137            Some(val)
138        } else {
139            None
140        }
141    }
142
143    /// Get a reference to the value of the bucket (if any).
144    ///
145    /// This returns a reference to the value of the bucket, if it is a KV pair. If not, it will
146    /// return `None`.
147    ///
148    /// Rather than `Option`, it returns a `Result`, in order to make it easier to work with the
149    /// `owning_ref` crate (`try_new` and `try_map` of `OwningHandle` and `OwningRef`
150    /// respectively).
151    fn value_ref(&self) -> Result<&V, ()> {
152        if let Bucket::Contains(_, ref val) = *self {
153            Ok(val)
154        } else {
155            Err(())
156        }
157    }
158
159    /// Does the bucket match a given key?
160    ///
161    /// This returns `true` if the bucket is a KV pair with key `key`. If not, `false` is returned.
162    fn key_matches(&self, key: &K) -> bool
163    where
164        K: PartialEq,
165    {
166        if let Bucket::Contains(ref candidate_key, _) = *self {
167            // Check if the keys matches.
168            candidate_key == key
169        } else {
170            // The bucket isn't a KV pair, so we'll return false, since there is no key to test
171            // against.
172            false
173        }
174    }
175}
176
177/// The low-level representation of the hash table.
178///
179/// This is different from `CHashMap` in two ways:
180///
181/// 1. It is not wrapped in a lock, meaning that resizing and reallocation is not possible.
182/// 2. It does not track the number of occupied buckets, making it expensive to obtain the load
183///    factor.
184struct Table<K, V, S> {
185    /// The hash function builder.
186    ///
187    /// When a `Table` use the default hash builder, it randomly picks a hash function from
188    /// some family of functions in libstd. This effectively eliminates the issue of hash flooding.
189    hash_builder: S,
190    /// The bucket array.
191    ///
192    /// This vector stores the buckets. The order in which they're stored is far from arbitrary: A
193    /// KV pair `(key, val)`'s first priority location is at `self.hash(&key) % len`. If not
194    /// possible, the next bucket is used, and this process repeats until the bucket is free (or
195    /// the end is reached, in which we simply wrap around).
196    buckets: Vec<RwLock<Bucket<K, V>>>,
197}
198
199impl<K, V> Table<K, V, RandomState> {
200    /// Create a table with a certain number of buckets.
201    fn new(buckets: usize) -> Self {
202        // TODO: For some obscure reason `RwLock` doesn't implement `Clone`.
203
204        // Fill a vector with `buckets` of `Empty` buckets.
205        let mut vec = Vec::with_capacity(buckets);
206        for _ in 0..buckets {
207            vec.push(RwLock::new(Bucket::Empty));
208        }
209
210        Table {
211            // Generate a hash function.
212            hash_builder: RandomState::new(),
213            buckets: vec,
214        }
215    }
216
217    /// Create a table with at least some capacity.
218    fn with_capacity(cap: usize) -> Self {
219        // The + 1 is needed to avoid losing fractional bucket to integer division.
220        Table::new(cmp::max(
221            MINIMUM_CAPACITY,
222            cap * MAX_LOAD_FACTOR_DENOM / MAX_LOAD_FACTOR_NUM + 1,
223        ))
224    }
225}
226
227impl<K, V, S: BuildHasher> Table<K, V, S> {
228    /// Create a `Table` with the `BuildHasher`
229    fn with_hasher(buckets: usize, hash_builder: S) -> Self {
230        // TODO: For some obscure reason `RwLock` doesn't implement `Clone`.
231
232        // Fill a vector with `buckets` of `Empty` buckets.
233        let mut vec = Vec::with_capacity(buckets);
234        for _ in 0..buckets {
235            vec.push(RwLock::new(Bucket::Empty));
236        }
237
238        Table {
239            hash_builder,
240            buckets: vec,
241        }
242    }
243
244    /// Create a `Table` with a specific capacity and the `BuildHasher`
245    fn with_capacity_and_hasher(cap: usize, hash_builder: S) -> Self {
246        // The + 1 is needed to avoid losing fractional bucket to integer division.
247        Table::with_hasher(
248            cmp::max(
249                MINIMUM_CAPACITY,
250                cap * MAX_LOAD_FACTOR_DENOM / MAX_LOAD_FACTOR_NUM + 1,
251            ),
252            hash_builder,
253        )
254    }
255}
256
257impl<K: PartialEq + Hash, V, S: BuildHasher> Table<K, V, S> {
258    /// Hash some key through the internal hash function.
259    fn hash<T: ?Sized>(&self, key: &T) -> usize
260    where
261        T: Hash,
262    {
263        // Build the initial hash function state.
264        let mut hasher = self.hash_builder.build_hasher();
265        // Hash the key.
266        key.hash(&mut hasher);
267        // Cast to `usize`. Since the hash function returns `u64`, this cast won't ever cause
268        // entropy less than the output space.
269        hasher.finish() as usize
270    }
271
272    /// Scan from the first priority of a key until a match is found.
273    ///
274    /// This scans from the first priority of `key` (as defined by its hash), until a match is
275    /// found (will wrap on end), i.e. `matches` returns `true` with the bucket as argument.
276    ///
277    /// The read guard from the RW-lock of the bucket is returned.
278    fn scan<F, Q: ?Sized>(&self, key: &Q, matches: F) -> RwLockReadGuard<Bucket<K, V>>
279    where
280        F: Fn(&Bucket<K, V>) -> bool,
281        K: Borrow<Q>,
282        Q: Hash,
283    {
284        // Hash the key.
285        let hash = self.hash(key);
286
287        // Start at the first priority bucket, and then move upwards, searching for the matching
288        // bucket.
289        for i in 0..self.buckets.len() {
290            // Get the lock of the `i`'th bucket after the first priority bucket (wrap on end).
291            let lock = self.buckets[(hash + i) % self.buckets.len()].read();
292
293            // Check if it is a match.
294            if matches(&lock) {
295                // Yup. Return.
296                return lock;
297            }
298        }
299        panic!("`CHashMap` scan failed! No entry found.");
300    }
301
302    /// Scan from the first priority of a key until a match is found (mutable guard).
303    ///
304    /// This is similar to `scan`, but instead of an immutable lock guard, a mutable lock guard is
305    /// returned.
306    fn scan_mut<F, Q: ?Sized>(&self, key: &Q, matches: F) -> RwLockWriteGuard<Bucket<K, V>>
307    where
308        F: Fn(&Bucket<K, V>) -> bool,
309        K: Borrow<Q>,
310        Q: Hash,
311    {
312        // Hash the key.
313        let hash = self.hash(key);
314
315        // Start at the first priority bucket, and then move upwards, searching for the matching
316        // bucket.
317        for i in 0..self.buckets.len() {
318            // Get the lock of the `i`'th bucket after the first priority bucket (wrap on end).
319            let lock = self.buckets[(hash + i) % self.buckets.len()].write();
320
321            // Check if it is a match.
322            if matches(&lock) {
323                // Yup. Return.
324                return lock;
325            }
326        }
327        panic!("`CHashMap` scan_mut failed! No entry found.");
328    }
329
330    /// Scan from the first priority of a key until a match is found (bypass locks).
331    ///
332    /// This is similar to `scan_mut`, but it safely bypasses the locks by making use of the
333    /// aliasing invariants of `&mut`.
334    fn scan_mut_no_lock<F>(&mut self, key: &K, matches: F) -> &mut Bucket<K, V>
335    where
336        F: Fn(&Bucket<K, V>) -> bool,
337    {
338        // Hash the key.
339        let hash = self.hash(key);
340        // TODO: To tame the borrowchecker, we fetch this in advance.
341        let len = self.buckets.len();
342
343        // Start at the first priority bucket, and then move upwards, searching for the matching
344        // bucket.
345        for i in 0..self.buckets.len() {
346            // TODO: hacky hacky
347            let idx = (hash + i) % len;
348
349            // Get the lock of the `i`'th bucket after the first priority bucket (wrap on end).
350
351            // Check if it is a match.
352            if {
353                let bucket = self.buckets[idx].get_mut();
354                matches(&bucket)
355            } {
356                // Yup. Return.
357                return self.buckets[idx].get_mut();
358            }
359        }
360        panic!("`CHashMap` scan_mut_no_lock failed! No entry found.");
361    }
362
363    /// Find a bucket with some key, or a free bucket in same cluster.
364    ///
365    /// This scans for buckets with key `key`. If one is found, it will be returned. If none are
366    /// found, it will return a free bucket in the same cluster.
367    fn lookup_or_free(&self, key: &K) -> RwLockWriteGuard<Bucket<K, V>> {
368        // Hash the key.
369        let hash = self.hash(key);
370        // The encountered free bucket.
371        let mut free = None;
372
373        // Start at the first priority bucket, and then move upwards, searching for the matching
374        // bucket.
375        for i in 0..self.buckets.len() {
376            // Get the lock of the `i`'th bucket after the first priority bucket (wrap on end).
377            let lock = self.buckets[(hash + i) % self.buckets.len()].write();
378
379            if lock.key_matches(key) {
380                // We found a match.
381                return lock;
382            } else if lock.is_empty() {
383                // The cluster is over. Use the encountered free bucket, if any.
384                return free.unwrap_or(lock);
385            } else if lock.is_removed() && free.is_none() {
386                // We found a free bucket, so we can store it to later (if we don't already have
387                // one).
388                free = Some(lock)
389            }
390        }
391
392        free.expect("No free buckets found")
393    }
394
395    /// Lookup some key.
396    ///
397    /// This searches some key `key`, and returns a immutable lock guard to its bucket. If the key
398    /// couldn't be found, the returned value will be an `Empty` cluster.
399    fn lookup<Q: ?Sized>(&self, key: &Q) -> RwLockReadGuard<Bucket<K, V>>
400    where
401        K: Borrow<Q>,
402        Q: PartialEq + Hash,
403    {
404        self.scan(key, |x| match *x {
405            // We'll check that the keys does indeed match, as the chance of hash collisions
406            // happening is inevitable
407            Bucket::Contains(ref candidate_key, _) if key.eq(candidate_key.borrow()) => true,
408            // We reached an empty bucket, meaning that there are no more buckets, not even removed
409            // ones, to search.
410            Bucket::Empty => true,
411            _ => false,
412        })
413    }
414
415    /// Lookup some key, mutably.
416    ///
417    /// This is similar to `lookup`, but it returns a mutable guard.
418    ///
419    /// Replacing at this bucket is safe as the bucket will be in the same cluster of buckets as
420    /// the first priority cluster.
421    fn lookup_mut<Q: ?Sized>(&self, key: &Q) -> RwLockWriteGuard<Bucket<K, V>>
422    where
423        K: Borrow<Q>,
424        Q: PartialEq + Hash,
425    {
426        self.scan_mut(key, |x| match *x {
427            // We'll check that the keys does indeed match, as the chance of hash collisions
428            // happening is inevitable
429            Bucket::Contains(ref candidate_key, _) if key.eq(candidate_key.borrow()) => true,
430            // We reached an empty bucket, meaning that there are no more buckets, not even removed
431            // ones, to search.
432            Bucket::Empty => true,
433            _ => false,
434        })
435    }
436
437    /// Find a free bucket in the same cluster as some key.
438    ///
439    /// This means that the returned lock guard defines a valid, free bucket, where `key` can be
440    /// inserted.
441    fn find_free(&self, key: &K) -> RwLockWriteGuard<Bucket<K, V>> {
442        self.scan_mut(key, |x| x.is_free())
443    }
444
445    /// Find a free bucket in the same cluster as some key (bypassing locks).
446    ///
447    /// This is similar to `find_free`, except that it safely bypasses locks through the aliasing
448    /// guarantees of `&mut`.
449    fn find_free_no_lock(&mut self, key: &K) -> &mut Bucket<K, V> {
450        self.scan_mut_no_lock(key, |x| x.is_free())
451    }
452
453    /// Fill the table with data from another table.
454    ///
455    /// This is used to efficiently copy the data of `table` into `self`.
456    ///
457    /// # Important
458    ///
459    /// The table should be empty for this to work correctly/logically.
460    fn fill(&mut self, table: Self) {
461        // Run over all the buckets.
462        for i in table.buckets {
463            // We'll only transfer the bucket if it is a KV pair.
464            if let Bucket::Contains(key, val) = i.into_inner() {
465                // Find a bucket where the KV pair can be inserted.
466                let mut bucket = self.scan_mut_no_lock(&key, |x| match *x {
467                    // Halt on an empty bucket.
468                    Bucket::Empty => true,
469                    // We'll assume that the rest of the buckets either contains other KV pairs (in
470                    // particular, no buckets have been removed in the newly construct table).
471                    _ => false,
472                });
473
474                // Set the bucket to the KV pair.
475                *bucket = Bucket::Contains(key, val);
476            }
477        }
478    }
479}
480
481impl<K: Clone, V: Clone, S: Clone> Clone for Table<K, V, S> {
482    fn clone(&self) -> Self {
483        Table {
484            // Since we copy plainly without rehashing etc., it is important that we keep the same
485            // hash function.
486            hash_builder: self.hash_builder.clone(),
487            // Lock and clone every bucket individually.
488            buckets: self
489                .buckets
490                .iter()
491                .map(|x| RwLock::new(x.read().clone()))
492                .collect(),
493        }
494    }
495}
496
497impl<K: fmt::Debug, V: fmt::Debug, S> fmt::Debug for Table<K, V, S> {
498    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
499        // create a debug map and fill with entries
500        let mut map = f.debug_map();
501        // We'll just run over all buckets and output one after one.
502        for i in &self.buckets {
503            // Acquire the lock.
504            let lock = i.read();
505            // Check if the bucket actually contains anything.
506            if let Bucket::Contains(ref key, ref val) = *lock {
507                // add this entry to the map
508                map.entry(key, val);
509            }
510        }
511        map.finish()
512    }
513}
514
515/// An iterator over the entries of some table.
516pub struct IntoIter<K, V, S> {
517    /// The inner table.
518    table: Table<K, V, S>,
519}
520
521impl<K, V, S> Iterator for IntoIter<K, V, S> {
522    type Item = (K, V);
523
524    fn next(&mut self) -> Option<(K, V)> {
525        // We own the table, and can thus do what we want with it. We'll simply pop from the
526        // buckets until we find a bucket containing data.
527        while let Some(bucket) = self.table.buckets.pop() {
528            // We can bypass dem ebil locks.
529            if let Bucket::Contains(key, val) = bucket.into_inner() {
530                // The bucket contained data, so we'll return the pair.
531                return Some((key, val));
532            }
533        }
534
535        // We've exhausted all the buckets, and no more data could be found.
536        None
537    }
538}
539
540impl<K, V, S> IntoIterator for Table<K, V, S> {
541    type Item = (K, V);
542    type IntoIter = IntoIter<K, V, S>;
543
544    fn into_iter(self) -> Self::IntoIter {
545        IntoIter { table: self }
546    }
547}
548
549/// A RAII guard for reading an entry of a hash map.
550///
551/// This is an access type dereferencing to the inner value of the entry. It will handle unlocking
552/// on drop.
553pub struct ReadGuard<'a, K: 'a, V: 'a, S> {
554    /// The inner hecking long type.
555    inner: OwningRef<
556        OwningHandle<RwLockReadGuard<'a, Table<K, V, S>>, RwLockReadGuard<'a, Bucket<K, V>>>,
557        V,
558    >,
559}
560
561impl<'a, K, V, S> ops::Deref for ReadGuard<'a, K, V, S> {
562    type Target = V;
563
564    fn deref(&self) -> &V {
565        &self.inner
566    }
567}
568
569impl<'a, K, V: PartialEq, S> cmp::PartialEq for ReadGuard<'a, K, V, S> {
570    fn eq(&self, other: &ReadGuard<'a, K, V, S>) -> bool {
571        self == other
572    }
573}
574impl<'a, K, V: Eq, S> cmp::Eq for ReadGuard<'a, K, V, S> {}
575
576impl<'a, K: fmt::Debug, V: fmt::Debug, S> fmt::Debug for ReadGuard<'a, K, V, S> {
577    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
578        write!(f, "ReadGuard({:?})", &**self)
579    }
580}
581
582/// A mutable RAII guard for reading an entry of a hash map.
583///
584/// This is an access type dereferencing to the inner value of the entry. It will handle unlocking
585/// on drop.
586pub struct WriteGuard<'a, K: 'a, V: 'a, S> {
587    /// The inner hecking long type.
588    inner: OwningHandle<
589        OwningHandle<RwLockReadGuard<'a, Table<K, V, S>>, RwLockWriteGuard<'a, Bucket<K, V>>>,
590        &'a mut V,
591    >,
592}
593
594impl<'a, K, V, S> ops::Deref for WriteGuard<'a, K, V, S> {
595    type Target = V;
596
597    fn deref(&self) -> &V {
598        &self.inner
599    }
600}
601
602impl<'a, K, V, S> ops::DerefMut for WriteGuard<'a, K, V, S> {
603    fn deref_mut(&mut self) -> &mut V {
604        &mut self.inner
605    }
606}
607
608impl<'a, K, V: PartialEq, S> cmp::PartialEq for WriteGuard<'a, K, V, S> {
609    fn eq(&self, other: &Self) -> bool {
610        self == other
611    }
612}
613impl<'a, K, V: Eq, S> cmp::Eq for WriteGuard<'a, K, V, S> {}
614
615impl<'a, K: fmt::Debug, V: fmt::Debug, S> fmt::Debug for WriteGuard<'a, K, V, S> {
616    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
617        write!(f, "WriteGuard({:?})", &**self)
618    }
619}
620
621/// A concurrent hash map.
622///
623/// This type defines a concurrent associative array, based on hash tables with linear probing and
624/// dynamic resizing.
625///
626/// The idea is to let each entry hold a multi-reader lock, effectively limiting lock contentions
627/// to writing simultaneously on the same entry, and resizing the table.
628///
629/// It is not an atomic or lockless hash table, since such construction is only useful in very few
630/// cases, due to limitations on in-place operations on values.
631pub struct CHashMap<K, V, S = RandomState> {
632    /// The inner table.
633    table: RwLock<Table<K, V, S>>,
634    /// The total number of KV pairs in the table.
635    ///
636    /// This is used to calculate the load factor.
637    len: AtomicUsize,
638}
639
640impl<K, V> CHashMap<K, V, RandomState> {
641    /// Create a new hash map with a certain capacity.
642    ///
643    /// "Capacity" means the amount of entries the hash map can hold before reallocating. This
644    /// function allocates a hash map with at least the capacity of `cap`.
645    pub fn with_capacity(cap: usize) -> Self {
646        CHashMap {
647            // Start at 0 KV pairs.
648            len: AtomicUsize::new(0),
649            // Make a new empty table. We will make sure that it is at least one.
650            table: RwLock::new(Table::with_capacity(cap)),
651        }
652    }
653
654    /// Create a new hash map.
655    ///
656    /// This creates a new hash map with some fixed initial capacity.
657    pub fn new() -> Self {
658        CHashMap::with_capacity(DEFAULT_INITIAL_CAPACITY)
659    }
660}
661
662impl<K, V, S> CHashMap<K, V, S> {
663    /// Get the number of entries in the hash table.
664    ///
665    /// This is entirely atomic, and will not acquire any locks.
666    ///
667    /// This is guaranteed to reflect the number of entries _at this particular moment.
668    pub fn len(&self) -> usize {
669        self.len.load(ORDERING)
670    }
671
672    /// Get the capacity of the hash table.
673    ///
674    /// The capacity is equal to the number of entries the table can hold before reallocating.
675    pub fn capacity(&self) -> usize {
676        cmp::max(MINIMUM_CAPACITY, self.buckets()) * MAX_LOAD_FACTOR_NUM / MAX_LOAD_FACTOR_DENOM
677    }
678
679    /// Get the number of buckets of the hash table.
680    ///
681    /// "Buckets" refers to the amount of potential entries in the inner table. It is different
682    /// from capacity, in the sense that the map cannot hold this number of entries, since it needs
683    /// to keep the load factor low.
684    pub fn buckets(&self) -> usize {
685        self.table.read().buckets.len()
686    }
687
688    /// Is the hash table empty?
689    pub fn is_empty(&self) -> bool {
690        self.len() == 0
691    }
692
693    /// Deprecated. Do not use.
694    #[deprecated]
695    pub fn filter<F>(&self, predicate: F)
696    where
697        F: Fn(&K, &V) -> bool,
698    {
699        // Following the naming conventions of the standard library...
700        self.retain(predicate)
701    }
702
703    /// Filter the map based on some predicate.
704    ///
705    /// This tests every entry in the hash map by closure `predicate`. If it returns `true`, the
706    /// map will retain the entry. If not, the entry will be removed.
707    ///
708    /// This won't lock the table. This can be a major performance trade-off, as it means that it
709    /// must lock on every table entry. However, it won't block other operations of the table,
710    /// while filtering.
711    pub fn retain<F>(&self, predicate: F)
712    where
713        F: Fn(&K, &V) -> bool,
714    {
715        // Acquire the read lock to the table.
716        let table = self.table.read();
717        // Run over every bucket and apply the filter.
718        for bucket in &table.buckets {
719            // Acquire the read lock, which we will upgrade if necessary.
720            // TODO: Use read lock and upgrade later.
721            let mut lock = bucket.write();
722            // Skip the free buckets.
723            // TODO: Fold the `if` into the `match` when the borrowck gets smarter.
724            if match *lock {
725                Bucket::Contains(ref key, ref val) => !predicate(key, val),
726                _ => false,
727            } {
728                // Predicate didn't match. Set the bucket to removed.
729                *lock = Bucket::Removed;
730                // Decrement the length to account for the removed bucket.
731                // TODO: Can we somehow bundle these up to reduce the overhead of atomic
732                //       operations? Storing in a local variable and then subtracting causes
733                //       issues with consistency.
734                self.len.fetch_sub(1, ORDERING);
735            }
736        }
737    }
738}
739
740impl<K, V, S: Default + BuildHasher> CHashMap<K, V, S> {
741    /// Creates an empty `CHashMap` with the specified capacity, using `hash_builder`
742    /// to hash the keys.
743    ///
744    /// The hash map will be able to hold at least `capacity` elements without
745    /// reallocating. If `capacity` is 0, the hash map will not allocate.
746    ///
747    /// Warning: `hash_builder` is normally randomly generated, and
748    /// is designed to allow HashMaps to be resistant to attacks that
749    /// cause many collisions and very poor performance. Setting it
750    /// manually using this function can expose a DoS attack vector.
751    ///
752    pub fn with_hasher_and_capacity(cap: usize, hash_builder: S) -> Self {
753        CHashMap {
754            // Start at 0 KV pairs.
755            len: AtomicUsize::new(0),
756            // Make a new empty table. We will make sure that it is at least one.
757            table: RwLock::new(Table::with_capacity_and_hasher(cap, hash_builder)),
758        }
759    }
760
761    /// Creates an empty `CHashMap` which will use the given hash builder to hash
762    /// keys.
763    ///
764    /// The created map has the default initial capacity.
765    ///
766    /// Warning: `hash_builder` is normally randomly generated, and
767    /// is designed to allow HashMaps to be resistant to attacks that
768    /// cause many collisions and very poor performance. Setting it
769    /// manually using this function can expose a DoS attack vector.
770    pub fn with_hasher(hash_builder: S) -> Self {
771        CHashMap::with_hasher_and_capacity(DEFAULT_INITIAL_CAPACITY, hash_builder)
772    }
773}
774
775impl<K, V, S> CHashMap<K, V, S>
776where
777    S: Default + BuildHasher,
778{
779    /// Clear the map.
780    ///
781    /// This clears the hash map and returns the previous version of the map.
782    ///
783    /// It is relatively efficient, although it needs to write lock a RW lock.
784    pub fn clear(&self) -> CHashMap<K, V, S> {
785        // Acquire a writable lock.
786        let mut lock = self.table.write();
787        CHashMap {
788            // Replace the old table with an empty initial table.
789            table: RwLock::new(mem::replace(
790                &mut *lock,
791                Table::with_hasher(DEFAULT_INITIAL_CAPACITY, S::default()),
792            )),
793            // Replace the length with 0 and use the old length.
794            len: AtomicUsize::new(self.len.swap(0, ORDERING)),
795        }
796    }
797}
798
799impl<K: PartialEq + Hash, V, S: BuildHasher> CHashMap<K, V, S> {
800    /// Get the value of some key.
801    ///
802    /// This will lookup the entry of some key `key`, and acquire the read-only lock. This means
803    /// that all other parties are blocked from _writing_ (not reading) this value while the guard
804    /// is held.
805    pub fn get<Q: ?Sized>(&self, key: &Q) -> Option<ReadGuard<K, V, S>>
806    where
807        K: Borrow<Q>,
808        Q: Hash + PartialEq,
809    {
810        // Acquire the read lock and lookup in the table.
811        if let Ok(inner) = OwningRef::new(OwningHandle::new_with_fn(self.table.read(), |x| {
812            unsafe { &*x }.lookup(key)
813        }))
814        .try_map(|x| x.value_ref())
815        {
816            // The bucket contains data.
817            Some(ReadGuard { inner: inner })
818        } else {
819            // The bucket is empty/removed.
820            None
821        }
822    }
823
824    /// Get the (mutable) value of some key.
825    ///
826    /// This will lookup the entry of some key `key`, and acquire the writable lock. This means
827    /// that all other parties are blocked from both reading and writing this value while the guard
828    /// is held.
829    pub fn get_mut<Q: ?Sized>(&self, key: &Q) -> Option<WriteGuard<K, V, S>>
830    where
831        K: Borrow<Q>,
832        Q: Hash + PartialEq,
833    {
834        // Acquire the write lock and lookup in the table.
835        if let Ok(inner) = OwningHandle::try_new(
836            OwningHandle::new_with_fn(self.table.read(), |x| unsafe { &*x }.lookup_mut(key)),
837            |x| {
838                if let &mut Bucket::Contains(_, ref mut val) =
839                    unsafe { &mut *(x as *mut Bucket<K, V>) }
840                {
841                    // The bucket contains data.
842                    Ok(val)
843                } else {
844                    // The bucket is empty/removed.
845                    Err(())
846                }
847            },
848        ) {
849            Some(WriteGuard { inner: inner })
850        } else {
851            None
852        }
853    }
854
855    /// Does the hash map contain this key?
856    pub fn contains_key<Q: ?Sized>(&self, key: &Q) -> bool
857    where
858        K: Borrow<Q>,
859        Q: Hash + PartialEq,
860    {
861        // Acquire the lock.
862        let lock = self.table.read();
863        // Look the key up in the table
864        let bucket = lock.lookup(key);
865        // Test if it is free or not.
866        !bucket.is_free()
867
868        // fuck im sleepy rn
869    }
870}
871
872impl<K, V, S> CHashMap<K, V, S>
873where
874    K: PartialEq + Hash,
875    S: BuildHasher + Default,
876{
877    /// Insert a **new** entry.
878    ///
879    /// This inserts an entry, which the map does not already contain, into the table. If the entry
880    /// exists, the old entry won't be replaced, nor will an error be returned. It will possibly
881    /// introduce silent bugs.
882    ///
883    /// To be more specific, it assumes that the entry does not already exist, and will simply skip
884    /// to the end of the cluster, even if it does exist.
885    ///
886    /// This is faster than e.g. `insert`, but should only be used, if you know that the entry
887    /// doesn't already exist.
888    ///
889    /// # Warning
890    ///
891    /// Only use this, if you know what you're doing. This can easily introduce very complex logic
892    /// errors.
893    ///
894    /// For most other purposes, use `insert`.
895    ///
896    /// # Panics
897    ///
898    /// This might perform checks in debug mode testing if the key exists already.
899    pub fn insert_new(&self, key: K, val: V) {
900        debug_assert!(
901            !self.contains_key(&key),
902            "Hash table contains already key, contrary to \
903                      the assumptions about `insert_new`'s arguments."
904        );
905
906        // Expand and lock the table. We need to expand to ensure the bounds on the load factor.
907        let lock = self.table.read();
908        {
909            // Find the free bucket.
910            let mut bucket = lock.find_free(&key);
911
912            // Set the bucket to the new KV pair.
913            *bucket = Bucket::Contains(key, val);
914        }
915        // Expand the table (we know beforehand that the entry didn't already exist).
916        self.expand(lock);
917    }
918
919    /// Replace an existing entry, or insert a new one.
920    ///
921    /// This will replace an existing entry and return the old entry, if any. If no entry exists,
922    /// it will simply insert the new entry and return `None`.
923    pub fn insert(&self, key: K, val: V) -> Option<V> {
924        let ret;
925        // Expand and lock the table. We need to expand to ensure the bounds on the load factor.
926        let lock = self.table.read();
927        {
928            // Lookup the key or a free bucket in the inner table.
929            let mut bucket = lock.lookup_or_free(&key);
930
931            // Replace the bucket.
932            ret = mem::replace(&mut *bucket, Bucket::Contains(key, val)).value();
933        }
934
935        // Expand the table if no bucket was overwritten (i.e. the entry is fresh).
936        if ret.is_none() {
937            self.expand(lock);
938        }
939
940        ret
941    }
942
943    /// Insert or update.
944    ///
945    /// This looks up `key`. If it exists, the reference to its value is passed through closure
946    /// `update`.  If it doesn't exist, the result of closure `insert` is inserted.
947    pub fn upsert<F, G>(&self, key: K, insert: F, update: G)
948    where
949        F: FnOnce() -> V,
950        G: FnOnce(&mut V),
951    {
952        // Expand and lock the table. We need to expand to ensure the bounds on the load factor.
953        let lock = self.table.read();
954        {
955            // Lookup the key or a free bucket in the inner table.
956            let mut bucket = lock.lookup_or_free(&key);
957
958            match *bucket {
959                // The bucket had KV pair!
960                Bucket::Contains(_, ref mut val) => {
961                    // Run it through the closure.
962                    update(val);
963                    // TODO: We return to stop the borrowck to yell at us. This prevents the control flow
964                    //       from reaching the expansion after the match if it has been right here.
965                    return;
966                }
967                // The bucket was empty, simply insert.
968                ref mut x => *x = Bucket::Contains(key, insert()),
969            }
970        }
971
972        // Expand the table (this will only happen if the function haven't returned yet).
973        self.expand(lock);
974    }
975
976    /// Map or insert an entry.
977    ///
978    /// This sets the value associated with key `key` to `f(Some(old_val))` (if it returns `None`,
979    /// the entry is removed) if it exists. If it does not exist, it inserts it with value
980    /// `f(None)`, unless the closure returns `None`.
981    ///
982    /// Note that if `f` returns `None`, the entry of key `key` is removed unconditionally.
983    pub fn alter<F>(&self, key: K, f: F)
984    where
985        F: FnOnce(Option<V>) -> Option<V>,
986    {
987        // Expand and lock the table. We need to expand to ensure the bounds on the load factor.
988        let lock = self.table.read();
989        {
990            // Lookup the key or a free bucket in the inner table.
991            let mut bucket = lock.lookup_or_free(&key);
992
993            match mem::replace(&mut *bucket, Bucket::Removed) {
994                Bucket::Contains(_, val) => {
995                    if let Some(new_val) = f(Some(val)) {
996                        // Set the bucket to a KV pair with the new value.
997                        *bucket = Bucket::Contains(key, new_val);
998                        // No extension required, as the bucket already had a KV pair previously.
999                        return;
1000                    } else {
1001                        // The old entry was removed, so we decrement the length of the map.
1002                        self.len.fetch_sub(1, ORDERING);
1003                        // TODO: We return as a hack to avoid the borrowchecker from thinking we moved a
1004                        //       referenced object. Namely, under this match arm the expansion after the match
1005                        //       statement won't ever be reached.
1006                        return;
1007                    }
1008                }
1009                _ => {
1010                    if let Some(new_val) = f(None) {
1011                        // The previously free cluster will get a KV pair with the new value.
1012                        *bucket = Bucket::Contains(key, new_val);
1013                    } else {
1014                        return;
1015                    }
1016                }
1017            }
1018        }
1019
1020        // A new entry was inserted, so naturally, we expand the table.
1021        self.expand(lock);
1022    }
1023
1024    /// Remove an entry.
1025    ///
1026    /// This removes and returns the entry with key `key`. If no entry with said key exists, it
1027    /// will simply return `None`.
1028    pub fn remove<Q: ?Sized>(&self, key: &Q) -> Option<V>
1029    where
1030        K: Borrow<Q>,
1031        Q: PartialEq + Hash,
1032    {
1033        // Acquire the read lock of the table.
1034        let lock = self.table.read();
1035
1036        // Lookup the table, mutably.
1037        let mut bucket = lock.lookup_mut(&key);
1038        // Remove the bucket.
1039        match &mut *bucket {
1040            // There was nothing to remove.
1041            &mut Bucket::Removed | &mut Bucket::Empty => None,
1042            // TODO: We know that this is a `Bucket::Contains` variant, but to bypass borrowck
1043            //       madness, we do weird weird stuff.
1044            bucket => {
1045                // Decrement the length of the map.
1046                self.len.fetch_sub(1, ORDERING);
1047
1048                // Set the bucket to "removed" and return its value.
1049                mem::replace(bucket, Bucket::Removed).value()
1050            }
1051        }
1052    }
1053
1054    /// Reserve additional space.
1055    ///
1056    /// This reserves additional `additional` buckets to the table. Note that it might reserve more
1057    /// in order make reallocation less common.
1058    pub fn reserve(&self, additional: usize) {
1059        // Get the new length.
1060        let len = (self.len() + additional) * LENGTH_MULTIPLIER;
1061        // Acquire the write lock (needed because we'll mess with the table).
1062        let mut lock = self.table.write();
1063        // Handle the case where another thread has resized the table while we were acquiring the
1064        // lock.
1065        if lock.buckets.len() < len {
1066            // Swap the table out with a new table of desired size (multiplied by some factor).
1067            let table = mem::replace(
1068                &mut *lock,
1069                Table::with_capacity_and_hasher(len, S::default()),
1070            );
1071            // Fill the new table with the data from the old table.
1072            lock.fill(table);
1073        }
1074    }
1075
1076    /// Shrink the capacity of the map to reduce space usage.
1077    ///
1078    /// This will shrink the capacity of the map to the needed amount (plus some additional space
1079    /// to avoid reallocations), effectively reducing memory usage in cases where there is
1080    /// excessive space.
1081    ///
1082    /// It is healthy to run this once in a while, if the size of your hash map changes a lot (e.g.
1083    /// has a high maximum case).
1084    pub fn shrink_to_fit(&self) {
1085        // Acquire the write lock (needed because we'll mess with the table).
1086        let mut lock = self.table.write();
1087        // Swap the table out with a new table of desired size (multiplied by some factor).
1088        let table = mem::replace(
1089            &mut *lock,
1090            Table::with_capacity_and_hasher(self.len(), S::default()),
1091        );
1092        // Fill the new table with the data from the old table.
1093        lock.fill(table);
1094    }
1095
1096    /// Increment the size of the hash map and expand it so one more entry can fit in.
1097    ///
1098    /// This returns the read lock, such that the caller won't have to acquire it twice.
1099    fn expand(&self, lock: RwLockReadGuard<Table<K, V, S>>) {
1100        // Increment the length to take the new element into account.
1101        let len = self.len.fetch_add(1, ORDERING) + 1;
1102
1103        // Extend if necessary. We multiply by some constant to adjust our load factor.
1104        if len * MAX_LOAD_FACTOR_DENOM > lock.buckets.len() * MAX_LOAD_FACTOR_NUM {
1105            // Drop the read lock to avoid deadlocks when acquiring the write lock.
1106            drop(lock);
1107            // Reserve 1 entry in space (the function will handle the excessive space logic).
1108            self.reserve(1);
1109        }
1110    }
1111}
1112
1113impl<K, V, S: Default + BuildHasher> Default for CHashMap<K, V, S> {
1114    fn default() -> Self {
1115        // Forward the call to `new`.
1116        CHashMap::with_hasher(S::default())
1117    }
1118}
1119
1120impl<K: Clone, V: Clone, S: Clone> Clone for CHashMap<K, V, S> {
1121    fn clone(&self) -> Self {
1122        CHashMap {
1123            table: RwLock::new(self.table.read().clone()),
1124            len: AtomicUsize::new(self.len.load(ORDERING)),
1125        }
1126    }
1127}
1128
1129impl<K: fmt::Debug, V: fmt::Debug, S> fmt::Debug for CHashMap<K, V, S> {
1130    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1131        (*self.table.read()).fmt(f)
1132    }
1133}
1134
1135impl<K, V, S> IntoIterator for CHashMap<K, V, S> {
1136    type Item = (K, V);
1137    type IntoIter = IntoIter<K, V, S>;
1138
1139    fn into_iter(self) -> IntoIter<K, V, S> {
1140        self.table.into_inner().into_iter()
1141    }
1142}
1143
1144impl<K: PartialEq + Hash, V, S: Default + BuildHasher> iter::FromIterator<(K, V)>
1145    for CHashMap<K, V, S>
1146{
1147    fn from_iter<I: IntoIterator<Item = (K, V)>>(iter: I) -> Self {
1148        // TODO: This step is required to obtain the length of the iterator. Eliminate it.
1149        let vec: Vec<_> = iter.into_iter().collect();
1150        let len = vec.len();
1151
1152        // Start with an empty table.
1153        let mut table = Table::with_capacity_and_hasher(len, S::default());
1154        // Fill the table with the pairs from the iterator.
1155        for (key, val) in vec {
1156            // Insert the KV pair. This is fine, as we are ensured that there are no duplicates in
1157            // the iterator.
1158            let bucket = table.find_free_no_lock(&key);
1159            *bucket = Bucket::Contains(key, val);
1160        }
1161
1162        CHashMap {
1163            table: RwLock::new(table),
1164            len: AtomicUsize::new(len),
1165        }
1166    }
1167}
chashmap_next/lib.rs

chashmap_next/
lib.rs