chashmap_next/lib.rs
1//! Concurrent hash maps.
2//!
3//! This crate implements concurrent hash maps, based on bucket-level multi-reader locks. It has
4//! excellent performance characteristics¹ and supports resizing, in-place mutation and more.
5//!
6//! The API derives directly from `std::collections::HashMap`, giving it a familiar feel.
7//!
8//! ¹Note that it heavily depends on the behavior of your program, but in most cases, it's really
9//! good. In some (rare) cases you might want atomic hash maps instead.
10//!
11//! # How it works
12//!
13//! `chashmap` is not lockless, but it distributes locks across the map such that lock contentions
14//! (which is what could make accesses expensive) are very rare.
15//!
16//! Hash maps consists of so called "buckets", which each defines a potential entry in the table.
17//! The bucket of some key-value pair is determined by the hash of the key. By holding a read-write
18//! lock for each bucket, we ensure that you will generally be able to insert, read, modify, etc.
19//! with only one or two locking subroutines.
20//!
21//! There is a special-case: reallocation. When the table is filled up such that very few buckets
22//! are free (note that this is "very few" and not "no", since the load factor shouldn't get too
23//! high as it hurts performance), a global lock is obtained while rehashing the table. This is
24//! pretty inefficient, but it rarely happens, and due to the adaptive nature of the capacity, it
25//! will only happen a few times when the map has just been initialized.
26//!
27//! ## Collision resolution
28//!
29//! When two hashes collide, they cannot share the same bucket, so there must be an algorithm which
30//! can resolve collisions. In our case, we use linear probing, which means that we take the bucket
31//! following it, and repeat until we find a free bucket.
32//!
33//! This method is far from ideal, but superior methods like Robin-Hood hashing works poorly (if at
34//! all) in a concurrent structure.
35//!
36//! # The API
37//!
38//! The API should feel very familiar, if you are used to the libstd hash map implementation. They
39//! share many of the methods, and I've carefully made sure that all the items, which have similarly
40//! named items in libstd, matches in semantics and behavior.
41
42extern crate owning_ref;
43extern crate parking_lot;
44
45#[cfg(test)]
46mod tests;
47
48use owning_ref::{OwningHandle, OwningRef};
49use parking_lot::{RwLock, RwLockReadGuard, RwLockWriteGuard};
50use std::borrow::Borrow;
51use std::collections::hash_map::RandomState;
52use std::hash::{BuildHasher, Hash, Hasher};
53use std::sync::atomic::{self, AtomicUsize};
54use std::{cmp, fmt, iter, mem, ops};
55
56/// The atomic ordering used throughout the code.
57const ORDERING: atomic::Ordering = atomic::Ordering::Relaxed;
58/// The length-to-capacity factor.
59const LENGTH_MULTIPLIER: usize = 4;
60/// The maximal load factor's numerator.
61const MAX_LOAD_FACTOR_NUM: usize = 100 - 15;
62/// The maximal load factor's denominator.
63const MAX_LOAD_FACTOR_DENOM: usize = 100;
64/// The default initial capacity.
65const DEFAULT_INITIAL_CAPACITY: usize = 64;
66/// The lowest capacity a table can have.
67const MINIMUM_CAPACITY: usize = 8;
68
69/// A bucket state.
70///
71/// Buckets are the bricks of hash tables. They represent a single entry into the table.
72#[derive(Clone)]
73enum Bucket<K, V> {
74 /// The bucket contains a key-value pair.
75 Contains(K, V),
76 /// The bucket is empty and has never been used.
77 ///
78 /// Since hash collisions are resolved by jumping to the next bucket, some buckets can cluster
79 /// together, meaning that they are potential candidates for lookups. Empty buckets can be seen
80 /// as the delimiter of such cluters.
81 Empty,
82 /// The bucket was removed.
83 ///
84 /// The technique of distincting between "empty" and "removed" was first described by Knuth.
85 /// The idea is that when you search for a key, you will probe over these buckets, since the
86 /// key could have been pushed behind the removed element:
87 ///```notest
88 /// Contains(k1, v1) // hash = h
89 /// Removed
90 /// Contains(k2, v2) // hash = h
91 ///```
92 /// If we stopped at `Removed`, we won't be able to find the second KV pair. So `Removed` is
93 /// semantically different from `Empty`, as the search won't stop.
94 ///
95 /// However, we are still able to insert new pairs at the removed buckets.
96 Removed,
97}
98
99impl<K, V> Bucket<K, V> {
100 /// Is this bucket 'empty'?
101 fn is_empty(&self) -> bool {
102 if let Bucket::Empty = *self {
103 true
104 } else {
105 false
106 }
107 }
108
109 /// Is this bucket 'removed'?
110 fn is_removed(&self) -> bool {
111 if let Bucket::Removed = *self {
112 true
113 } else {
114 false
115 }
116 }
117
118 /// Is this bucket free?
119 ///
120 /// "Free" means that it can safely be replace by another bucket — namely that the bucket is
121 /// not occupied.
122 fn is_free(&self) -> bool {
123 match *self {
124 // The two replacable bucket types are removed buckets and empty buckets.
125 Bucket::Removed | Bucket::Empty => true,
126 // KV pairs can't be replaced as they contain data.
127 Bucket::Contains(..) => false,
128 }
129 }
130
131 /// Get the value (if any) of this bucket.
132 ///
133 /// This gets the value of the KV pair, if any. If the bucket is not a KV pair, `None` is
134 /// returned.
135 fn value(self) -> Option<V> {
136 if let Bucket::Contains(_, val) = self {
137 Some(val)
138 } else {
139 None
140 }
141 }
142
143 /// Get a reference to the value of the bucket (if any).
144 ///
145 /// This returns a reference to the value of the bucket, if it is a KV pair. If not, it will
146 /// return `None`.
147 ///
148 /// Rather than `Option`, it returns a `Result`, in order to make it easier to work with the
149 /// `owning_ref` crate (`try_new` and `try_map` of `OwningHandle` and `OwningRef`
150 /// respectively).
151 fn value_ref(&self) -> Result<&V, ()> {
152 if let Bucket::Contains(_, ref val) = *self {
153 Ok(val)
154 } else {
155 Err(())
156 }
157 }
158
159 /// Does the bucket match a given key?
160 ///
161 /// This returns `true` if the bucket is a KV pair with key `key`. If not, `false` is returned.
162 fn key_matches(&self, key: &K) -> bool
163 where
164 K: PartialEq,
165 {
166 if let Bucket::Contains(ref candidate_key, _) = *self {
167 // Check if the keys matches.
168 candidate_key == key
169 } else {
170 // The bucket isn't a KV pair, so we'll return false, since there is no key to test
171 // against.
172 false
173 }
174 }
175}
176
177/// The low-level representation of the hash table.
178///
179/// This is different from `CHashMap` in two ways:
180///
181/// 1. It is not wrapped in a lock, meaning that resizing and reallocation is not possible.
182/// 2. It does not track the number of occupied buckets, making it expensive to obtain the load
183/// factor.
184struct Table<K, V, S> {
185 /// The hash function builder.
186 ///
187 /// When a `Table` use the default hash builder, it randomly picks a hash function from
188 /// some family of functions in libstd. This effectively eliminates the issue of hash flooding.
189 hash_builder: S,
190 /// The bucket array.
191 ///
192 /// This vector stores the buckets. The order in which they're stored is far from arbitrary: A
193 /// KV pair `(key, val)`'s first priority location is at `self.hash(&key) % len`. If not
194 /// possible, the next bucket is used, and this process repeats until the bucket is free (or
195 /// the end is reached, in which we simply wrap around).
196 buckets: Vec<RwLock<Bucket<K, V>>>,
197}
198
199impl<K, V> Table<K, V, RandomState> {
200 /// Create a table with a certain number of buckets.
201 fn new(buckets: usize) -> Self {
202 // TODO: For some obscure reason `RwLock` doesn't implement `Clone`.
203
204 // Fill a vector with `buckets` of `Empty` buckets.
205 let mut vec = Vec::with_capacity(buckets);
206 for _ in 0..buckets {
207 vec.push(RwLock::new(Bucket::Empty));
208 }
209
210 Table {
211 // Generate a hash function.
212 hash_builder: RandomState::new(),
213 buckets: vec,
214 }
215 }
216
217 /// Create a table with at least some capacity.
218 fn with_capacity(cap: usize) -> Self {
219 // The + 1 is needed to avoid losing fractional bucket to integer division.
220 Table::new(cmp::max(
221 MINIMUM_CAPACITY,
222 cap * MAX_LOAD_FACTOR_DENOM / MAX_LOAD_FACTOR_NUM + 1,
223 ))
224 }
225}
226
227impl<K, V, S: BuildHasher> Table<K, V, S> {
228 /// Create a `Table` with the `BuildHasher`
229 fn with_hasher(buckets: usize, hash_builder: S) -> Self {
230 // TODO: For some obscure reason `RwLock` doesn't implement `Clone`.
231
232 // Fill a vector with `buckets` of `Empty` buckets.
233 let mut vec = Vec::with_capacity(buckets);
234 for _ in 0..buckets {
235 vec.push(RwLock::new(Bucket::Empty));
236 }
237
238 Table {
239 hash_builder,
240 buckets: vec,
241 }
242 }
243
244 /// Create a `Table` with a specific capacity and the `BuildHasher`
245 fn with_capacity_and_hasher(cap: usize, hash_builder: S) -> Self {
246 // The + 1 is needed to avoid losing fractional bucket to integer division.
247 Table::with_hasher(
248 cmp::max(
249 MINIMUM_CAPACITY,
250 cap * MAX_LOAD_FACTOR_DENOM / MAX_LOAD_FACTOR_NUM + 1,
251 ),
252 hash_builder,
253 )
254 }
255}
256
257impl<K: PartialEq + Hash, V, S: BuildHasher> Table<K, V, S> {
258 /// Hash some key through the internal hash function.
259 fn hash<T: ?Sized>(&self, key: &T) -> usize
260 where
261 T: Hash,
262 {
263 // Build the initial hash function state.
264 let mut hasher = self.hash_builder.build_hasher();
265 // Hash the key.
266 key.hash(&mut hasher);
267 // Cast to `usize`. Since the hash function returns `u64`, this cast won't ever cause
268 // entropy less than the output space.
269 hasher.finish() as usize
270 }
271
272 /// Scan from the first priority of a key until a match is found.
273 ///
274 /// This scans from the first priority of `key` (as defined by its hash), until a match is
275 /// found (will wrap on end), i.e. `matches` returns `true` with the bucket as argument.
276 ///
277 /// The read guard from the RW-lock of the bucket is returned.
278 fn scan<F, Q: ?Sized>(&self, key: &Q, matches: F) -> RwLockReadGuard<Bucket<K, V>>
279 where
280 F: Fn(&Bucket<K, V>) -> bool,
281 K: Borrow<Q>,
282 Q: Hash,
283 {
284 // Hash the key.
285 let hash = self.hash(key);
286
287 // Start at the first priority bucket, and then move upwards, searching for the matching
288 // bucket.
289 for i in 0..self.buckets.len() {
290 // Get the lock of the `i`'th bucket after the first priority bucket (wrap on end).
291 let lock = self.buckets[(hash + i) % self.buckets.len()].read();
292
293 // Check if it is a match.
294 if matches(&lock) {
295 // Yup. Return.
296 return lock;
297 }
298 }
299 panic!("`CHashMap` scan failed! No entry found.");
300 }
301
302 /// Scan from the first priority of a key until a match is found (mutable guard).
303 ///
304 /// This is similar to `scan`, but instead of an immutable lock guard, a mutable lock guard is
305 /// returned.
306 fn scan_mut<F, Q: ?Sized>(&self, key: &Q, matches: F) -> RwLockWriteGuard<Bucket<K, V>>
307 where
308 F: Fn(&Bucket<K, V>) -> bool,
309 K: Borrow<Q>,
310 Q: Hash,
311 {
312 // Hash the key.
313 let hash = self.hash(key);
314
315 // Start at the first priority bucket, and then move upwards, searching for the matching
316 // bucket.
317 for i in 0..self.buckets.len() {
318 // Get the lock of the `i`'th bucket after the first priority bucket (wrap on end).
319 let lock = self.buckets[(hash + i) % self.buckets.len()].write();
320
321 // Check if it is a match.
322 if matches(&lock) {
323 // Yup. Return.
324 return lock;
325 }
326 }
327 panic!("`CHashMap` scan_mut failed! No entry found.");
328 }
329
330 /// Scan from the first priority of a key until a match is found (bypass locks).
331 ///
332 /// This is similar to `scan_mut`, but it safely bypasses the locks by making use of the
333 /// aliasing invariants of `&mut`.
334 fn scan_mut_no_lock<F>(&mut self, key: &K, matches: F) -> &mut Bucket<K, V>
335 where
336 F: Fn(&Bucket<K, V>) -> bool,
337 {
338 // Hash the key.
339 let hash = self.hash(key);
340 // TODO: To tame the borrowchecker, we fetch this in advance.
341 let len = self.buckets.len();
342
343 // Start at the first priority bucket, and then move upwards, searching for the matching
344 // bucket.
345 for i in 0..self.buckets.len() {
346 // TODO: hacky hacky
347 let idx = (hash + i) % len;
348
349 // Get the lock of the `i`'th bucket after the first priority bucket (wrap on end).
350
351 // Check if it is a match.
352 if {
353 let bucket = self.buckets[idx].get_mut();
354 matches(&bucket)
355 } {
356 // Yup. Return.
357 return self.buckets[idx].get_mut();
358 }
359 }
360 panic!("`CHashMap` scan_mut_no_lock failed! No entry found.");
361 }
362
363 /// Find a bucket with some key, or a free bucket in same cluster.
364 ///
365 /// This scans for buckets with key `key`. If one is found, it will be returned. If none are
366 /// found, it will return a free bucket in the same cluster.
367 fn lookup_or_free(&self, key: &K) -> RwLockWriteGuard<Bucket<K, V>> {
368 // Hash the key.
369 let hash = self.hash(key);
370 // The encountered free bucket.
371 let mut free = None;
372
373 // Start at the first priority bucket, and then move upwards, searching for the matching
374 // bucket.
375 for i in 0..self.buckets.len() {
376 // Get the lock of the `i`'th bucket after the first priority bucket (wrap on end).
377 let lock = self.buckets[(hash + i) % self.buckets.len()].write();
378
379 if lock.key_matches(key) {
380 // We found a match.
381 return lock;
382 } else if lock.is_empty() {
383 // The cluster is over. Use the encountered free bucket, if any.
384 return free.unwrap_or(lock);
385 } else if lock.is_removed() && free.is_none() {
386 // We found a free bucket, so we can store it to later (if we don't already have
387 // one).
388 free = Some(lock)
389 }
390 }
391
392 free.expect("No free buckets found")
393 }
394
395 /// Lookup some key.
396 ///
397 /// This searches some key `key`, and returns a immutable lock guard to its bucket. If the key
398 /// couldn't be found, the returned value will be an `Empty` cluster.
399 fn lookup<Q: ?Sized>(&self, key: &Q) -> RwLockReadGuard<Bucket<K, V>>
400 where
401 K: Borrow<Q>,
402 Q: PartialEq + Hash,
403 {
404 self.scan(key, |x| match *x {
405 // We'll check that the keys does indeed match, as the chance of hash collisions
406 // happening is inevitable
407 Bucket::Contains(ref candidate_key, _) if key.eq(candidate_key.borrow()) => true,
408 // We reached an empty bucket, meaning that there are no more buckets, not even removed
409 // ones, to search.
410 Bucket::Empty => true,
411 _ => false,
412 })
413 }
414
415 /// Lookup some key, mutably.
416 ///
417 /// This is similar to `lookup`, but it returns a mutable guard.
418 ///
419 /// Replacing at this bucket is safe as the bucket will be in the same cluster of buckets as
420 /// the first priority cluster.
421 fn lookup_mut<Q: ?Sized>(&self, key: &Q) -> RwLockWriteGuard<Bucket<K, V>>
422 where
423 K: Borrow<Q>,
424 Q: PartialEq + Hash,
425 {
426 self.scan_mut(key, |x| match *x {
427 // We'll check that the keys does indeed match, as the chance of hash collisions
428 // happening is inevitable
429 Bucket::Contains(ref candidate_key, _) if key.eq(candidate_key.borrow()) => true,
430 // We reached an empty bucket, meaning that there are no more buckets, not even removed
431 // ones, to search.
432 Bucket::Empty => true,
433 _ => false,
434 })
435 }
436
437 /// Find a free bucket in the same cluster as some key.
438 ///
439 /// This means that the returned lock guard defines a valid, free bucket, where `key` can be
440 /// inserted.
441 fn find_free(&self, key: &K) -> RwLockWriteGuard<Bucket<K, V>> {
442 self.scan_mut(key, |x| x.is_free())
443 }
444
445 /// Find a free bucket in the same cluster as some key (bypassing locks).
446 ///
447 /// This is similar to `find_free`, except that it safely bypasses locks through the aliasing
448 /// guarantees of `&mut`.
449 fn find_free_no_lock(&mut self, key: &K) -> &mut Bucket<K, V> {
450 self.scan_mut_no_lock(key, |x| x.is_free())
451 }
452
453 /// Fill the table with data from another table.
454 ///
455 /// This is used to efficiently copy the data of `table` into `self`.
456 ///
457 /// # Important
458 ///
459 /// The table should be empty for this to work correctly/logically.
460 fn fill(&mut self, table: Self) {
461 // Run over all the buckets.
462 for i in table.buckets {
463 // We'll only transfer the bucket if it is a KV pair.
464 if let Bucket::Contains(key, val) = i.into_inner() {
465 // Find a bucket where the KV pair can be inserted.
466 let mut bucket = self.scan_mut_no_lock(&key, |x| match *x {
467 // Halt on an empty bucket.
468 Bucket::Empty => true,
469 // We'll assume that the rest of the buckets either contains other KV pairs (in
470 // particular, no buckets have been removed in the newly construct table).
471 _ => false,
472 });
473
474 // Set the bucket to the KV pair.
475 *bucket = Bucket::Contains(key, val);
476 }
477 }
478 }
479}
480
481impl<K: Clone, V: Clone, S: Clone> Clone for Table<K, V, S> {
482 fn clone(&self) -> Self {
483 Table {
484 // Since we copy plainly without rehashing etc., it is important that we keep the same
485 // hash function.
486 hash_builder: self.hash_builder.clone(),
487 // Lock and clone every bucket individually.
488 buckets: self
489 .buckets
490 .iter()
491 .map(|x| RwLock::new(x.read().clone()))
492 .collect(),
493 }
494 }
495}
496
497impl<K: fmt::Debug, V: fmt::Debug, S> fmt::Debug for Table<K, V, S> {
498 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
499 // create a debug map and fill with entries
500 let mut map = f.debug_map();
501 // We'll just run over all buckets and output one after one.
502 for i in &self.buckets {
503 // Acquire the lock.
504 let lock = i.read();
505 // Check if the bucket actually contains anything.
506 if let Bucket::Contains(ref key, ref val) = *lock {
507 // add this entry to the map
508 map.entry(key, val);
509 }
510 }
511 map.finish()
512 }
513}
514
515/// An iterator over the entries of some table.
516pub struct IntoIter<K, V, S> {
517 /// The inner table.
518 table: Table<K, V, S>,
519}
520
521impl<K, V, S> Iterator for IntoIter<K, V, S> {
522 type Item = (K, V);
523
524 fn next(&mut self) -> Option<(K, V)> {
525 // We own the table, and can thus do what we want with it. We'll simply pop from the
526 // buckets until we find a bucket containing data.
527 while let Some(bucket) = self.table.buckets.pop() {
528 // We can bypass dem ebil locks.
529 if let Bucket::Contains(key, val) = bucket.into_inner() {
530 // The bucket contained data, so we'll return the pair.
531 return Some((key, val));
532 }
533 }
534
535 // We've exhausted all the buckets, and no more data could be found.
536 None
537 }
538}
539
540impl<K, V, S> IntoIterator for Table<K, V, S> {
541 type Item = (K, V);
542 type IntoIter = IntoIter<K, V, S>;
543
544 fn into_iter(self) -> Self::IntoIter {
545 IntoIter { table: self }
546 }
547}
548
549/// A RAII guard for reading an entry of a hash map.
550///
551/// This is an access type dereferencing to the inner value of the entry. It will handle unlocking
552/// on drop.
553pub struct ReadGuard<'a, K: 'a, V: 'a, S> {
554 /// The inner hecking long type.
555 inner: OwningRef<
556 OwningHandle<RwLockReadGuard<'a, Table<K, V, S>>, RwLockReadGuard<'a, Bucket<K, V>>>,
557 V,
558 >,
559}
560
561impl<'a, K, V, S> ops::Deref for ReadGuard<'a, K, V, S> {
562 type Target = V;
563
564 fn deref(&self) -> &V {
565 &self.inner
566 }
567}
568
569impl<'a, K, V: PartialEq, S> cmp::PartialEq for ReadGuard<'a, K, V, S> {
570 fn eq(&self, other: &ReadGuard<'a, K, V, S>) -> bool {
571 self == other
572 }
573}
574impl<'a, K, V: Eq, S> cmp::Eq for ReadGuard<'a, K, V, S> {}
575
576impl<'a, K: fmt::Debug, V: fmt::Debug, S> fmt::Debug for ReadGuard<'a, K, V, S> {
577 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
578 write!(f, "ReadGuard({:?})", &**self)
579 }
580}
581
582/// A mutable RAII guard for reading an entry of a hash map.
583///
584/// This is an access type dereferencing to the inner value of the entry. It will handle unlocking
585/// on drop.
586pub struct WriteGuard<'a, K: 'a, V: 'a, S> {
587 /// The inner hecking long type.
588 inner: OwningHandle<
589 OwningHandle<RwLockReadGuard<'a, Table<K, V, S>>, RwLockWriteGuard<'a, Bucket<K, V>>>,
590 &'a mut V,
591 >,
592}
593
594impl<'a, K, V, S> ops::Deref for WriteGuard<'a, K, V, S> {
595 type Target = V;
596
597 fn deref(&self) -> &V {
598 &self.inner
599 }
600}
601
602impl<'a, K, V, S> ops::DerefMut for WriteGuard<'a, K, V, S> {
603 fn deref_mut(&mut self) -> &mut V {
604 &mut self.inner
605 }
606}
607
608impl<'a, K, V: PartialEq, S> cmp::PartialEq for WriteGuard<'a, K, V, S> {
609 fn eq(&self, other: &Self) -> bool {
610 self == other
611 }
612}
613impl<'a, K, V: Eq, S> cmp::Eq for WriteGuard<'a, K, V, S> {}
614
615impl<'a, K: fmt::Debug, V: fmt::Debug, S> fmt::Debug for WriteGuard<'a, K, V, S> {
616 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
617 write!(f, "WriteGuard({:?})", &**self)
618 }
619}
620
621/// A concurrent hash map.
622///
623/// This type defines a concurrent associative array, based on hash tables with linear probing and
624/// dynamic resizing.
625///
626/// The idea is to let each entry hold a multi-reader lock, effectively limiting lock contentions
627/// to writing simultaneously on the same entry, and resizing the table.
628///
629/// It is not an atomic or lockless hash table, since such construction is only useful in very few
630/// cases, due to limitations on in-place operations on values.
631pub struct CHashMap<K, V, S = RandomState> {
632 /// The inner table.
633 table: RwLock<Table<K, V, S>>,
634 /// The total number of KV pairs in the table.
635 ///
636 /// This is used to calculate the load factor.
637 len: AtomicUsize,
638}
639
640impl<K, V> CHashMap<K, V, RandomState> {
641 /// Create a new hash map with a certain capacity.
642 ///
643 /// "Capacity" means the amount of entries the hash map can hold before reallocating. This
644 /// function allocates a hash map with at least the capacity of `cap`.
645 pub fn with_capacity(cap: usize) -> Self {
646 CHashMap {
647 // Start at 0 KV pairs.
648 len: AtomicUsize::new(0),
649 // Make a new empty table. We will make sure that it is at least one.
650 table: RwLock::new(Table::with_capacity(cap)),
651 }
652 }
653
654 /// Create a new hash map.
655 ///
656 /// This creates a new hash map with some fixed initial capacity.
657 pub fn new() -> Self {
658 CHashMap::with_capacity(DEFAULT_INITIAL_CAPACITY)
659 }
660}
661
662impl<K, V, S> CHashMap<K, V, S> {
663 /// Get the number of entries in the hash table.
664 ///
665 /// This is entirely atomic, and will not acquire any locks.
666 ///
667 /// This is guaranteed to reflect the number of entries _at this particular moment.
668 pub fn len(&self) -> usize {
669 self.len.load(ORDERING)
670 }
671
672 /// Get the capacity of the hash table.
673 ///
674 /// The capacity is equal to the number of entries the table can hold before reallocating.
675 pub fn capacity(&self) -> usize {
676 cmp::max(MINIMUM_CAPACITY, self.buckets()) * MAX_LOAD_FACTOR_NUM / MAX_LOAD_FACTOR_DENOM
677 }
678
679 /// Get the number of buckets of the hash table.
680 ///
681 /// "Buckets" refers to the amount of potential entries in the inner table. It is different
682 /// from capacity, in the sense that the map cannot hold this number of entries, since it needs
683 /// to keep the load factor low.
684 pub fn buckets(&self) -> usize {
685 self.table.read().buckets.len()
686 }
687
688 /// Is the hash table empty?
689 pub fn is_empty(&self) -> bool {
690 self.len() == 0
691 }
692
693 /// Deprecated. Do not use.
694 #[deprecated]
695 pub fn filter<F>(&self, predicate: F)
696 where
697 F: Fn(&K, &V) -> bool,
698 {
699 // Following the naming conventions of the standard library...
700 self.retain(predicate)
701 }
702
703 /// Filter the map based on some predicate.
704 ///
705 /// This tests every entry in the hash map by closure `predicate`. If it returns `true`, the
706 /// map will retain the entry. If not, the entry will be removed.
707 ///
708 /// This won't lock the table. This can be a major performance trade-off, as it means that it
709 /// must lock on every table entry. However, it won't block other operations of the table,
710 /// while filtering.
711 pub fn retain<F>(&self, predicate: F)
712 where
713 F: Fn(&K, &V) -> bool,
714 {
715 // Acquire the read lock to the table.
716 let table = self.table.read();
717 // Run over every bucket and apply the filter.
718 for bucket in &table.buckets {
719 // Acquire the read lock, which we will upgrade if necessary.
720 // TODO: Use read lock and upgrade later.
721 let mut lock = bucket.write();
722 // Skip the free buckets.
723 // TODO: Fold the `if` into the `match` when the borrowck gets smarter.
724 if match *lock {
725 Bucket::Contains(ref key, ref val) => !predicate(key, val),
726 _ => false,
727 } {
728 // Predicate didn't match. Set the bucket to removed.
729 *lock = Bucket::Removed;
730 // Decrement the length to account for the removed bucket.
731 // TODO: Can we somehow bundle these up to reduce the overhead of atomic
732 // operations? Storing in a local variable and then subtracting causes
733 // issues with consistency.
734 self.len.fetch_sub(1, ORDERING);
735 }
736 }
737 }
738}
739
740impl<K, V, S: Default + BuildHasher> CHashMap<K, V, S> {
741 /// Creates an empty `CHashMap` with the specified capacity, using `hash_builder`
742 /// to hash the keys.
743 ///
744 /// The hash map will be able to hold at least `capacity` elements without
745 /// reallocating. If `capacity` is 0, the hash map will not allocate.
746 ///
747 /// Warning: `hash_builder` is normally randomly generated, and
748 /// is designed to allow HashMaps to be resistant to attacks that
749 /// cause many collisions and very poor performance. Setting it
750 /// manually using this function can expose a DoS attack vector.
751 ///
752 pub fn with_hasher_and_capacity(cap: usize, hash_builder: S) -> Self {
753 CHashMap {
754 // Start at 0 KV pairs.
755 len: AtomicUsize::new(0),
756 // Make a new empty table. We will make sure that it is at least one.
757 table: RwLock::new(Table::with_capacity_and_hasher(cap, hash_builder)),
758 }
759 }
760
761 /// Creates an empty `CHashMap` which will use the given hash builder to hash
762 /// keys.
763 ///
764 /// The created map has the default initial capacity.
765 ///
766 /// Warning: `hash_builder` is normally randomly generated, and
767 /// is designed to allow HashMaps to be resistant to attacks that
768 /// cause many collisions and very poor performance. Setting it
769 /// manually using this function can expose a DoS attack vector.
770 pub fn with_hasher(hash_builder: S) -> Self {
771 CHashMap::with_hasher_and_capacity(DEFAULT_INITIAL_CAPACITY, hash_builder)
772 }
773}
774
775impl<K, V, S> CHashMap<K, V, S>
776where
777 S: Default + BuildHasher,
778{
779 /// Clear the map.
780 ///
781 /// This clears the hash map and returns the previous version of the map.
782 ///
783 /// It is relatively efficient, although it needs to write lock a RW lock.
784 pub fn clear(&self) -> CHashMap<K, V, S> {
785 // Acquire a writable lock.
786 let mut lock = self.table.write();
787 CHashMap {
788 // Replace the old table with an empty initial table.
789 table: RwLock::new(mem::replace(
790 &mut *lock,
791 Table::with_hasher(DEFAULT_INITIAL_CAPACITY, S::default()),
792 )),
793 // Replace the length with 0 and use the old length.
794 len: AtomicUsize::new(self.len.swap(0, ORDERING)),
795 }
796 }
797}
798
799impl<K: PartialEq + Hash, V, S: BuildHasher> CHashMap<K, V, S> {
800 /// Get the value of some key.
801 ///
802 /// This will lookup the entry of some key `key`, and acquire the read-only lock. This means
803 /// that all other parties are blocked from _writing_ (not reading) this value while the guard
804 /// is held.
805 pub fn get<Q: ?Sized>(&self, key: &Q) -> Option<ReadGuard<K, V, S>>
806 where
807 K: Borrow<Q>,
808 Q: Hash + PartialEq,
809 {
810 // Acquire the read lock and lookup in the table.
811 if let Ok(inner) = OwningRef::new(OwningHandle::new_with_fn(self.table.read(), |x| {
812 unsafe { &*x }.lookup(key)
813 }))
814 .try_map(|x| x.value_ref())
815 {
816 // The bucket contains data.
817 Some(ReadGuard { inner: inner })
818 } else {
819 // The bucket is empty/removed.
820 None
821 }
822 }
823
824 /// Get the (mutable) value of some key.
825 ///
826 /// This will lookup the entry of some key `key`, and acquire the writable lock. This means
827 /// that all other parties are blocked from both reading and writing this value while the guard
828 /// is held.
829 pub fn get_mut<Q: ?Sized>(&self, key: &Q) -> Option<WriteGuard<K, V, S>>
830 where
831 K: Borrow<Q>,
832 Q: Hash + PartialEq,
833 {
834 // Acquire the write lock and lookup in the table.
835 if let Ok(inner) = OwningHandle::try_new(
836 OwningHandle::new_with_fn(self.table.read(), |x| unsafe { &*x }.lookup_mut(key)),
837 |x| {
838 if let &mut Bucket::Contains(_, ref mut val) =
839 unsafe { &mut *(x as *mut Bucket<K, V>) }
840 {
841 // The bucket contains data.
842 Ok(val)
843 } else {
844 // The bucket is empty/removed.
845 Err(())
846 }
847 },
848 ) {
849 Some(WriteGuard { inner: inner })
850 } else {
851 None
852 }
853 }
854
855 /// Does the hash map contain this key?
856 pub fn contains_key<Q: ?Sized>(&self, key: &Q) -> bool
857 where
858 K: Borrow<Q>,
859 Q: Hash + PartialEq,
860 {
861 // Acquire the lock.
862 let lock = self.table.read();
863 // Look the key up in the table
864 let bucket = lock.lookup(key);
865 // Test if it is free or not.
866 !bucket.is_free()
867
868 // fuck im sleepy rn
869 }
870}
871
872impl<K, V, S> CHashMap<K, V, S>
873where
874 K: PartialEq + Hash,
875 S: BuildHasher + Default,
876{
877 /// Insert a **new** entry.
878 ///
879 /// This inserts an entry, which the map does not already contain, into the table. If the entry
880 /// exists, the old entry won't be replaced, nor will an error be returned. It will possibly
881 /// introduce silent bugs.
882 ///
883 /// To be more specific, it assumes that the entry does not already exist, and will simply skip
884 /// to the end of the cluster, even if it does exist.
885 ///
886 /// This is faster than e.g. `insert`, but should only be used, if you know that the entry
887 /// doesn't already exist.
888 ///
889 /// # Warning
890 ///
891 /// Only use this, if you know what you're doing. This can easily introduce very complex logic
892 /// errors.
893 ///
894 /// For most other purposes, use `insert`.
895 ///
896 /// # Panics
897 ///
898 /// This might perform checks in debug mode testing if the key exists already.
899 pub fn insert_new(&self, key: K, val: V) {
900 debug_assert!(
901 !self.contains_key(&key),
902 "Hash table contains already key, contrary to \
903 the assumptions about `insert_new`'s arguments."
904 );
905
906 // Expand and lock the table. We need to expand to ensure the bounds on the load factor.
907 let lock = self.table.read();
908 {
909 // Find the free bucket.
910 let mut bucket = lock.find_free(&key);
911
912 // Set the bucket to the new KV pair.
913 *bucket = Bucket::Contains(key, val);
914 }
915 // Expand the table (we know beforehand that the entry didn't already exist).
916 self.expand(lock);
917 }
918
919 /// Replace an existing entry, or insert a new one.
920 ///
921 /// This will replace an existing entry and return the old entry, if any. If no entry exists,
922 /// it will simply insert the new entry and return `None`.
923 pub fn insert(&self, key: K, val: V) -> Option<V> {
924 let ret;
925 // Expand and lock the table. We need to expand to ensure the bounds on the load factor.
926 let lock = self.table.read();
927 {
928 // Lookup the key or a free bucket in the inner table.
929 let mut bucket = lock.lookup_or_free(&key);
930
931 // Replace the bucket.
932 ret = mem::replace(&mut *bucket, Bucket::Contains(key, val)).value();
933 }
934
935 // Expand the table if no bucket was overwritten (i.e. the entry is fresh).
936 if ret.is_none() {
937 self.expand(lock);
938 }
939
940 ret
941 }
942
943 /// Insert or update.
944 ///
945 /// This looks up `key`. If it exists, the reference to its value is passed through closure
946 /// `update`. If it doesn't exist, the result of closure `insert` is inserted.
947 pub fn upsert<F, G>(&self, key: K, insert: F, update: G)
948 where
949 F: FnOnce() -> V,
950 G: FnOnce(&mut V),
951 {
952 // Expand and lock the table. We need to expand to ensure the bounds on the load factor.
953 let lock = self.table.read();
954 {
955 // Lookup the key or a free bucket in the inner table.
956 let mut bucket = lock.lookup_or_free(&key);
957
958 match *bucket {
959 // The bucket had KV pair!
960 Bucket::Contains(_, ref mut val) => {
961 // Run it through the closure.
962 update(val);
963 // TODO: We return to stop the borrowck to yell at us. This prevents the control flow
964 // from reaching the expansion after the match if it has been right here.
965 return;
966 }
967 // The bucket was empty, simply insert.
968 ref mut x => *x = Bucket::Contains(key, insert()),
969 }
970 }
971
972 // Expand the table (this will only happen if the function haven't returned yet).
973 self.expand(lock);
974 }
975
976 /// Map or insert an entry.
977 ///
978 /// This sets the value associated with key `key` to `f(Some(old_val))` (if it returns `None`,
979 /// the entry is removed) if it exists. If it does not exist, it inserts it with value
980 /// `f(None)`, unless the closure returns `None`.
981 ///
982 /// Note that if `f` returns `None`, the entry of key `key` is removed unconditionally.
983 pub fn alter<F>(&self, key: K, f: F)
984 where
985 F: FnOnce(Option<V>) -> Option<V>,
986 {
987 // Expand and lock the table. We need to expand to ensure the bounds on the load factor.
988 let lock = self.table.read();
989 {
990 // Lookup the key or a free bucket in the inner table.
991 let mut bucket = lock.lookup_or_free(&key);
992
993 match mem::replace(&mut *bucket, Bucket::Removed) {
994 Bucket::Contains(_, val) => {
995 if let Some(new_val) = f(Some(val)) {
996 // Set the bucket to a KV pair with the new value.
997 *bucket = Bucket::Contains(key, new_val);
998 // No extension required, as the bucket already had a KV pair previously.
999 return;
1000 } else {
1001 // The old entry was removed, so we decrement the length of the map.
1002 self.len.fetch_sub(1, ORDERING);
1003 // TODO: We return as a hack to avoid the borrowchecker from thinking we moved a
1004 // referenced object. Namely, under this match arm the expansion after the match
1005 // statement won't ever be reached.
1006 return;
1007 }
1008 }
1009 _ => {
1010 if let Some(new_val) = f(None) {
1011 // The previously free cluster will get a KV pair with the new value.
1012 *bucket = Bucket::Contains(key, new_val);
1013 } else {
1014 return;
1015 }
1016 }
1017 }
1018 }
1019
1020 // A new entry was inserted, so naturally, we expand the table.
1021 self.expand(lock);
1022 }
1023
1024 /// Remove an entry.
1025 ///
1026 /// This removes and returns the entry with key `key`. If no entry with said key exists, it
1027 /// will simply return `None`.
1028 pub fn remove<Q: ?Sized>(&self, key: &Q) -> Option<V>
1029 where
1030 K: Borrow<Q>,
1031 Q: PartialEq + Hash,
1032 {
1033 // Acquire the read lock of the table.
1034 let lock = self.table.read();
1035
1036 // Lookup the table, mutably.
1037 let mut bucket = lock.lookup_mut(&key);
1038 // Remove the bucket.
1039 match &mut *bucket {
1040 // There was nothing to remove.
1041 &mut Bucket::Removed | &mut Bucket::Empty => None,
1042 // TODO: We know that this is a `Bucket::Contains` variant, but to bypass borrowck
1043 // madness, we do weird weird stuff.
1044 bucket => {
1045 // Decrement the length of the map.
1046 self.len.fetch_sub(1, ORDERING);
1047
1048 // Set the bucket to "removed" and return its value.
1049 mem::replace(bucket, Bucket::Removed).value()
1050 }
1051 }
1052 }
1053
1054 /// Reserve additional space.
1055 ///
1056 /// This reserves additional `additional` buckets to the table. Note that it might reserve more
1057 /// in order make reallocation less common.
1058 pub fn reserve(&self, additional: usize) {
1059 // Get the new length.
1060 let len = (self.len() + additional) * LENGTH_MULTIPLIER;
1061 // Acquire the write lock (needed because we'll mess with the table).
1062 let mut lock = self.table.write();
1063 // Handle the case where another thread has resized the table while we were acquiring the
1064 // lock.
1065 if lock.buckets.len() < len {
1066 // Swap the table out with a new table of desired size (multiplied by some factor).
1067 let table = mem::replace(
1068 &mut *lock,
1069 Table::with_capacity_and_hasher(len, S::default()),
1070 );
1071 // Fill the new table with the data from the old table.
1072 lock.fill(table);
1073 }
1074 }
1075
1076 /// Shrink the capacity of the map to reduce space usage.
1077 ///
1078 /// This will shrink the capacity of the map to the needed amount (plus some additional space
1079 /// to avoid reallocations), effectively reducing memory usage in cases where there is
1080 /// excessive space.
1081 ///
1082 /// It is healthy to run this once in a while, if the size of your hash map changes a lot (e.g.
1083 /// has a high maximum case).
1084 pub fn shrink_to_fit(&self) {
1085 // Acquire the write lock (needed because we'll mess with the table).
1086 let mut lock = self.table.write();
1087 // Swap the table out with a new table of desired size (multiplied by some factor).
1088 let table = mem::replace(
1089 &mut *lock,
1090 Table::with_capacity_and_hasher(self.len(), S::default()),
1091 );
1092 // Fill the new table with the data from the old table.
1093 lock.fill(table);
1094 }
1095
1096 /// Increment the size of the hash map and expand it so one more entry can fit in.
1097 ///
1098 /// This returns the read lock, such that the caller won't have to acquire it twice.
1099 fn expand(&self, lock: RwLockReadGuard<Table<K, V, S>>) {
1100 // Increment the length to take the new element into account.
1101 let len = self.len.fetch_add(1, ORDERING) + 1;
1102
1103 // Extend if necessary. We multiply by some constant to adjust our load factor.
1104 if len * MAX_LOAD_FACTOR_DENOM > lock.buckets.len() * MAX_LOAD_FACTOR_NUM {
1105 // Drop the read lock to avoid deadlocks when acquiring the write lock.
1106 drop(lock);
1107 // Reserve 1 entry in space (the function will handle the excessive space logic).
1108 self.reserve(1);
1109 }
1110 }
1111}
1112
1113impl<K, V, S: Default + BuildHasher> Default for CHashMap<K, V, S> {
1114 fn default() -> Self {
1115 // Forward the call to `new`.
1116 CHashMap::with_hasher(S::default())
1117 }
1118}
1119
1120impl<K: Clone, V: Clone, S: Clone> Clone for CHashMap<K, V, S> {
1121 fn clone(&self) -> Self {
1122 CHashMap {
1123 table: RwLock::new(self.table.read().clone()),
1124 len: AtomicUsize::new(self.len.load(ORDERING)),
1125 }
1126 }
1127}
1128
1129impl<K: fmt::Debug, V: fmt::Debug, S> fmt::Debug for CHashMap<K, V, S> {
1130 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1131 (*self.table.read()).fmt(f)
1132 }
1133}
1134
1135impl<K, V, S> IntoIterator for CHashMap<K, V, S> {
1136 type Item = (K, V);
1137 type IntoIter = IntoIter<K, V, S>;
1138
1139 fn into_iter(self) -> IntoIter<K, V, S> {
1140 self.table.into_inner().into_iter()
1141 }
1142}
1143
1144impl<K: PartialEq + Hash, V, S: Default + BuildHasher> iter::FromIterator<(K, V)>
1145 for CHashMap<K, V, S>
1146{
1147 fn from_iter<I: IntoIterator<Item = (K, V)>>(iter: I) -> Self {
1148 // TODO: This step is required to obtain the length of the iterator. Eliminate it.
1149 let vec: Vec<_> = iter.into_iter().collect();
1150 let len = vec.len();
1151
1152 // Start with an empty table.
1153 let mut table = Table::with_capacity_and_hasher(len, S::default());
1154 // Fill the table with the pairs from the iterator.
1155 for (key, val) in vec {
1156 // Insert the KV pair. This is fine, as we are ensured that there are no duplicates in
1157 // the iterator.
1158 let bucket = table.find_free_no_lock(&key);
1159 *bucket = Bucket::Contains(key, val);
1160 }
1161
1162 CHashMap {
1163 table: RwLock::new(table),
1164 len: AtomicUsize::new(len),
1165 }
1166 }
1167}