chashmap_serde/lib.rs
1//! Concurrent hash maps.
2//!
3//! This is a clone of https://gitlab.redox-os.org/redox-os/chashmap, which at time of writing seems largely unmaintained.
4//! <b>I take no credit for the original author's hard work.</b> <i>My contributions are to add serde support and any other features
5//! that I may personally need.</i>
6//!
7//! This crate implements concurrent hash maps, based on bucket-level multi-reader locks. It has
8//! excellent performance characteristics¹ and supports resizing, in-place mutation and more.
9//!
10//! The API derives directly from `std::collections::HashMap`, giving it a familiar feel.
11//!
12//! ¹Note that it heavily depends on the behavior of your program, but in most cases, it's really
13//! good. In some (rare) cases you might want atomic hash maps instead.
14//!
15//! # How it works
16//!
17//! `chashmap` is not lockless, but it distributes locks across the map such that lock contentions
18//! (which is what could make accesses expensive) are very rare.
19//!
20//! Hash maps consists of so called "buckets", which each defines a potential entry in the table.
21//! The bucket of some key-value pair is determined by the hash of the key. By holding a read-write
22//! lock for each bucket, we ensure that you will generally be able to insert, read, modify, etc.
23//! with only one or two locking subroutines.
24//!
25//! There is a special-case: reallocation. When the table is filled up such that very few buckets
26//! are free (note that this is "very few" and not "no", since the load factor shouldn't get too
27//! high as it hurts performance), a global lock is obtained while rehashing the table. This is
28//! pretty inefficient, but it rarely happens, and due to the adaptive nature of the capacity, it
29//! will only happen a few times when the map has just been initialized.
30//!
31//! ## Collision resolution
32//!
33//! When two hashes collide, they cannot share the same bucket, so there must be an algorithm which
34//! can resolve collisions. In our case, we use linear probing, which means that we take the bucket
35//! following it, and repeat until we find a free bucket.
36//!
37//! This method is far from ideal, but superior methods like Robin-Hood hashing works poorly (if at
38//! all) in a concurrent structure.
39//!
40//! # The API
41//!
42//! The API should feel very familiar, if you are used to the libstd hash map implementation. They
43//! share many of the methods, and I've carefully made sure that all the items, which have similarly
44//! named items in libstd, matches in semantics and behavior.
45
46extern crate owning_ref;
47extern crate parking_lot;
48extern crate serde;
49
50#[cfg(test)]
51mod tests;
52
53use owning_ref::{OwningHandle, OwningRef};
54use parking_lot::{RwLock, RwLockReadGuard, RwLockWriteGuard};
55
56use serde::{
57 de::MapAccess, de::Visitor, ser::SerializeMap, Deserialize, Deserializer, Serialize, Serializer,
58};
59
60use std::marker::PhantomData;
61
62use std::borrow::Borrow;
63use std::collections::hash_map::RandomState;
64use std::hash::{BuildHasher, Hash, Hasher};
65use std::sync::atomic::{self, AtomicUsize};
66use std::{cmp, fmt, iter, mem, ops};
67
68/// The atomic ordering used throughout the code.
69const ORDERING: atomic::Ordering = atomic::Ordering::Relaxed;
70/// The length-to-capacity factor.
71const LENGTH_MULTIPLIER: usize = 4;
72/// The maximal load factor's numerator.
73const MAX_LOAD_FACTOR_NUM: usize = 100 - 15;
74/// The maximal load factor's denominator.
75const MAX_LOAD_FACTOR_DENOM: usize = 100;
76/// The default initial capacity.
77const DEFAULT_INITIAL_CAPACITY: usize = 64;
78/// The lowest capacity a table can have.
79const MINIMUM_CAPACITY: usize = 8;
80
81/// A bucket state.
82///
83/// Buckets are the bricks of hash tables. They represent a single entry into the table.
84#[derive(Clone)]
85enum Bucket<K, V> {
86 /// The bucket contains a key-value pair.
87 Contains(K, V),
88 /// The bucket is empty and has never been used.
89 ///
90 /// Since hash collisions are resolved by jumping to the next bucket, some buckets can cluster
91 /// together, meaning that they are potential candidates for lookups. Empty buckets can be seen
92 /// as the delimiter of such cluters.
93 Empty,
94 /// The bucket was removed.
95 ///
96 /// The technique of distincting between "empty" and "removed" was first described by Knuth.
97 /// The idea is that when you search for a key, you will probe over these buckets, since the
98 /// key could have been pushed behind the removed element:
99 ///```notest
100 /// Contains(k1, v1) // hash = h
101 /// Removed
102 /// Contains(k2, v2) // hash = h
103 ///```
104 /// If we stopped at `Removed`, we won't be able to find the second KV pair. So `Removed` is
105 /// semantically different from `Empty`, as the search won't stop.
106 ///
107 /// However, we are still able to insert new pairs at the removed buckets.
108 Removed,
109}
110
111impl<K, V> Bucket<K, V> {
112 /// Is this bucket 'empty'?
113 fn is_empty(&self) -> bool {
114 if let Bucket::Empty = *self {
115 true
116 } else {
117 false
118 }
119 }
120
121 /// Is this bucket 'removed'?
122 fn is_removed(&self) -> bool {
123 if let Bucket::Removed = *self {
124 true
125 } else {
126 false
127 }
128 }
129
130 /// Is this bucket free?
131 ///
132 /// "Free" means that it can safely be replace by another bucket — namely that the bucket is
133 /// not occupied.
134 fn is_free(&self) -> bool {
135 match *self {
136 // The two replacable bucket types are removed buckets and empty buckets.
137 Bucket::Removed | Bucket::Empty => true,
138 // KV pairs can't be replaced as they contain data.
139 Bucket::Contains(..) => false,
140 }
141 }
142
143 /// Get the value (if any) of this bucket.
144 ///
145 /// This gets the value of the KV pair, if any. If the bucket is not a KV pair, `None` is
146 /// returned.
147 fn value(self) -> Option<V> {
148 if let Bucket::Contains(_, val) = self {
149 Some(val)
150 } else {
151 None
152 }
153 }
154
155 /// Get a reference to the value of the bucket (if any).
156 ///
157 /// This returns a reference to the value of the bucket, if it is a KV pair. If not, it will
158 /// return `None`.
159 ///
160 /// Rather than `Option`, it returns a `Result`, in order to make it easier to work with the
161 /// `owning_ref` crate (`try_new` and `try_map` of `OwningHandle` and `OwningRef`
162 /// respectively).
163 fn value_ref(&self) -> Result<&V, ()> {
164 if let Bucket::Contains(_, ref val) = *self {
165 Ok(val)
166 } else {
167 Err(())
168 }
169 }
170
171 /// Does the bucket match a given key?
172 ///
173 /// This returns `true` if the bucket is a KV pair with key `key`. If not, `false` is returned.
174 fn key_matches(&self, key: &K) -> bool
175 where
176 K: PartialEq,
177 {
178 if let Bucket::Contains(ref candidate_key, _) = *self {
179 // Check if the keys matches.
180 candidate_key == key
181 } else {
182 // The bucket isn't a KV pair, so we'll return false, since there is no key to test
183 // against.
184 false
185 }
186 }
187}
188
189/// The low-level representation of the hash table.
190///
191/// This is different from `CHashMap` in two ways:
192///
193/// 1. It is not wrapped in a lock, meaning that resizing and reallocation is not possible.
194/// 2. It does not track the number of occupied buckets, making it expensive to obtain the load
195/// factor.
196struct Table<K, V, S> {
197 /// The hash function builder.
198 ///
199 /// When a `Table` use the default hash builder, it randomly picks a hash function from
200 /// some family of functions in libstd. This effectively eliminates the issue of hash flooding.
201 hash_builder: S,
202 /// The bucket array.
203 ///
204 /// This vector stores the buckets. The order in which they're stored is far from arbitrary: A
205 /// KV pair `(key, val)`'s first priority location is at `self.hash(&key) % len`. If not
206 /// possible, the next bucket is used, and this process repeats until the bucket is free (or
207 /// the end is reached, in which we simply wrap around).
208 buckets: Vec<RwLock<Bucket<K, V>>>,
209}
210
211impl<K, V> Table<K, V, RandomState> {
212 /// Create a table with a certain number of buckets.
213 fn new(buckets: usize) -> Self {
214 // TODO: For some obscure reason `RwLock` doesn't implement `Clone`.
215
216 // Fill a vector with `buckets` of `Empty` buckets.
217 let mut vec = Vec::with_capacity(buckets);
218 for _ in 0..buckets {
219 vec.push(RwLock::new(Bucket::Empty));
220 }
221
222 Table {
223 // Generate a hash function.
224 hash_builder: RandomState::new(),
225 buckets: vec,
226 }
227 }
228
229 /// Create a table with at least some capacity.
230 fn with_capacity(cap: usize) -> Self {
231 // The + 1 is needed to avoid losing fractional bucket to integer division.
232 Table::new(cmp::max(
233 MINIMUM_CAPACITY,
234 cap * MAX_LOAD_FACTOR_DENOM / MAX_LOAD_FACTOR_NUM + 1,
235 ))
236 }
237}
238
239impl<K, V, S: BuildHasher> Table<K, V, S> {
240 /// Create a `Table` with the `BuildHasher`
241 fn with_hasher(buckets: usize, hash_builder: S) -> Self {
242 // TODO: For some obscure reason `RwLock` doesn't implement `Clone`.
243
244 // Fill a vector with `buckets` of `Empty` buckets.
245 let mut vec = Vec::with_capacity(buckets);
246 for _ in 0..buckets {
247 vec.push(RwLock::new(Bucket::Empty));
248 }
249
250 Table {
251 hash_builder,
252 buckets: vec,
253 }
254 }
255
256 /// Create a `Table` with a specific capacity and the `BuildHasher`
257 fn with_capacity_and_hasher(cap: usize, hash_builder: S) -> Self {
258 // The + 1 is needed to avoid losing fractional bucket to integer division.
259 Table::with_hasher(
260 cmp::max(
261 MINIMUM_CAPACITY,
262 cap * MAX_LOAD_FACTOR_DENOM / MAX_LOAD_FACTOR_NUM + 1,
263 ),
264 hash_builder,
265 )
266 }
267}
268
269impl<K: PartialEq + Hash, V, S: BuildHasher> Table<K, V, S> {
270 /// Hash some key through the internal hash function.
271 fn hash<T: ?Sized>(&self, key: &T) -> usize
272 where
273 T: Hash,
274 {
275 // Build the initial hash function state.
276 let mut hasher = self.hash_builder.build_hasher();
277 // Hash the key.
278 key.hash(&mut hasher);
279 // Cast to `usize`. Since the hash function returns `u64`, this cast won't ever cause
280 // entropy less than the output space.
281 hasher.finish() as usize
282 }
283
284 /// Scan from the first priority of a key until a match is found.
285 ///
286 /// This scans from the first priority of `key` (as defined by its hash), until a match is
287 /// found (will wrap on end), i.e. `matches` returns `true` with the bucket as argument.
288 ///
289 /// The read guard from the RW-lock of the bucket is returned.
290 fn scan<F, Q: ?Sized>(&self, key: &Q, matches: F) -> RwLockReadGuard<Bucket<K, V>>
291 where
292 F: Fn(&Bucket<K, V>) -> bool,
293 K: Borrow<Q>,
294 Q: Hash,
295 {
296 // Hash the key.
297 let hash = self.hash(key);
298
299 // Start at the first priority bucket, and then move upwards, searching for the matching
300 // bucket.
301 for i in 0..self.buckets.len() {
302 // Get the lock of the `i`'th bucket after the first priority bucket (wrap on end).
303 let lock = self.buckets[(hash + i) % self.buckets.len()].read();
304
305 // Check if it is a match.
306 if matches(&lock) {
307 // Yup. Return.
308 return lock;
309 }
310 }
311 panic!("`CHashMap` scan failed! No entry found.");
312 }
313
314 /// Scan from the first priority of a key until a match is found (mutable guard).
315 ///
316 /// This is similar to `scan`, but instead of an immutable lock guard, a mutable lock guard is
317 /// returned.
318 fn scan_mut<F, Q: ?Sized>(&self, key: &Q, matches: F) -> RwLockWriteGuard<Bucket<K, V>>
319 where
320 F: Fn(&Bucket<K, V>) -> bool,
321 K: Borrow<Q>,
322 Q: Hash,
323 {
324 // Hash the key.
325 let hash = self.hash(key);
326
327 // Start at the first priority bucket, and then move upwards, searching for the matching
328 // bucket.
329 for i in 0..self.buckets.len() {
330 // Get the lock of the `i`'th bucket after the first priority bucket (wrap on end).
331 let lock = self.buckets[(hash + i) % self.buckets.len()].write();
332
333 // Check if it is a match.
334 if matches(&lock) {
335 // Yup. Return.
336 return lock;
337 }
338 }
339 panic!("`CHashMap` scan_mut failed! No entry found.");
340 }
341
342 /// Scan from the first priority of a key until a match is found (bypass locks).
343 ///
344 /// This is similar to `scan_mut`, but it safely bypasses the locks by making use of the
345 /// aliasing invariants of `&mut`.
346 fn scan_mut_no_lock<F>(&mut self, key: &K, matches: F) -> &mut Bucket<K, V>
347 where
348 F: Fn(&Bucket<K, V>) -> bool,
349 {
350 // Hash the key.
351 let hash = self.hash(key);
352 // TODO: To tame the borrowchecker, we fetch this in advance.
353 let len = self.buckets.len();
354
355 // Start at the first priority bucket, and then move upwards, searching for the matching
356 // bucket.
357 for i in 0..self.buckets.len() {
358 // TODO: hacky hacky
359 let idx = (hash + i) % len;
360
361 // Get the lock of the `i`'th bucket after the first priority bucket (wrap on end).
362
363 // Check if it is a match.
364 if {
365 let bucket = self.buckets[idx].get_mut();
366 matches(&bucket)
367 } {
368 // Yup. Return.
369 return self.buckets[idx].get_mut();
370 }
371 }
372 panic!("`CHashMap` scan_mut_no_lock failed! No entry found.");
373 }
374
375 /// Find a bucket with some key, or a free bucket in same cluster.
376 ///
377 /// This scans for buckets with key `key`. If one is found, it will be returned. If none are
378 /// found, it will return a free bucket in the same cluster.
379 fn lookup_or_free(&self, key: &K) -> RwLockWriteGuard<Bucket<K, V>> {
380 // Hash the key.
381 let hash = self.hash(key);
382 // The encountered free bucket.
383 let mut free = None;
384
385 // Start at the first priority bucket, and then move upwards, searching for the matching
386 // bucket.
387 for i in 0..self.buckets.len() {
388 // Get the lock of the `i`'th bucket after the first priority bucket (wrap on end).
389 let lock = self.buckets[(hash + i) % self.buckets.len()].write();
390
391 if lock.key_matches(key) {
392 // We found a match.
393 return lock;
394 } else if lock.is_empty() {
395 // The cluster is over. Use the encountered free bucket, if any.
396 return free.unwrap_or(lock);
397 } else if lock.is_removed() && free.is_none() {
398 // We found a free bucket, so we can store it to later (if we don't already have
399 // one).
400 free = Some(lock)
401 }
402 }
403
404 free.expect("No free buckets found")
405 }
406
407 /// Lookup some key.
408 ///
409 /// This searches some key `key`, and returns a immutable lock guard to its bucket. If the key
410 /// couldn't be found, the returned value will be an `Empty` cluster.
411 fn lookup<Q: ?Sized>(&self, key: &Q) -> RwLockReadGuard<Bucket<K, V>>
412 where
413 K: Borrow<Q>,
414 Q: PartialEq + Hash,
415 {
416 self.scan(key, |x| match *x {
417 // We'll check that the keys does indeed match, as the chance of hash collisions
418 // happening is inevitable
419 Bucket::Contains(ref candidate_key, _) if key.eq(candidate_key.borrow()) => true,
420 // We reached an empty bucket, meaning that there are no more buckets, not even removed
421 // ones, to search.
422 Bucket::Empty => true,
423 _ => false,
424 })
425 }
426
427 /// Lookup some key, mutably.
428 ///
429 /// This is similar to `lookup`, but it returns a mutable guard.
430 ///
431 /// Replacing at this bucket is safe as the bucket will be in the same cluster of buckets as
432 /// the first priority cluster.
433 fn lookup_mut<Q: ?Sized>(&self, key: &Q) -> RwLockWriteGuard<Bucket<K, V>>
434 where
435 K: Borrow<Q>,
436 Q: PartialEq + Hash,
437 {
438 self.scan_mut(key, |x| match *x {
439 // We'll check that the keys does indeed match, as the chance of hash collisions
440 // happening is inevitable
441 Bucket::Contains(ref candidate_key, _) if key.eq(candidate_key.borrow()) => true,
442 // We reached an empty bucket, meaning that there are no more buckets, not even removed
443 // ones, to search.
444 Bucket::Empty => true,
445 _ => false,
446 })
447 }
448
449 /// Find a free bucket in the same cluster as some key.
450 ///
451 /// This means that the returned lock guard defines a valid, free bucket, where `key` can be
452 /// inserted.
453 fn find_free(&self, key: &K) -> RwLockWriteGuard<Bucket<K, V>> {
454 self.scan_mut(key, |x| x.is_free())
455 }
456
457 /// Find a free bucket in the same cluster as some key (bypassing locks).
458 ///
459 /// This is similar to `find_free`, except that it safely bypasses locks through the aliasing
460 /// guarantees of `&mut`.
461 fn find_free_no_lock(&mut self, key: &K) -> &mut Bucket<K, V> {
462 self.scan_mut_no_lock(key, |x| x.is_free())
463 }
464
465 /// Fill the table with data from another table.
466 ///
467 /// This is used to efficiently copy the data of `table` into `self`.
468 ///
469 /// # Important
470 ///
471 /// The table should be empty for this to work correctly/logically.
472 fn fill(&mut self, table: Self) {
473 // Run over all the buckets.
474 for i in table.buckets {
475 // We'll only transfer the bucket if it is a KV pair.
476 if let Bucket::Contains(key, val) = i.into_inner() {
477 // Find a bucket where the KV pair can be inserted.
478 let bucket = self.scan_mut_no_lock(&key, |x| match *x {
479 // Halt on an empty bucket.
480 Bucket::Empty => true,
481 // We'll assume that the rest of the buckets either contains other KV pairs (in
482 // particular, no buckets have been removed in the newly construct table).
483 _ => false,
484 });
485
486 // Set the bucket to the KV pair.
487 *bucket = Bucket::Contains(key, val);
488 }
489 }
490 }
491}
492
493impl<K: Clone, V: Clone, S: Clone> Clone for Table<K, V, S> {
494 fn clone(&self) -> Self {
495 Table {
496 // Since we copy plainly without rehashing etc., it is important that we keep the same
497 // hash function.
498 hash_builder: self.hash_builder.clone(),
499 // Lock and clone every bucket individually.
500 buckets: self
501 .buckets
502 .iter()
503 .map(|x| RwLock::new(x.read().clone()))
504 .collect(),
505 }
506 }
507}
508
509impl<K: fmt::Debug, V: fmt::Debug, S> fmt::Debug for Table<K, V, S> {
510 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
511 // create a debug map and fill with entries
512 let mut map = f.debug_map();
513 // We'll just run over all buckets and output one after one.
514 for i in &self.buckets {
515 // Acquire the lock.
516 let lock = i.read();
517 // Check if the bucket actually contains anything.
518 if let Bucket::Contains(ref key, ref val) = *lock {
519 // add this entry to the map
520 map.entry(key, val);
521 }
522 }
523 map.finish()
524 }
525}
526
527/// An iterator over the entries of some table.
528pub struct IntoIter<K, V, S> {
529 /// The inner table.
530 table: Table<K, V, S>,
531}
532
533impl<K, V, S> Iterator for IntoIter<K, V, S> {
534 type Item = (K, V);
535
536 fn next(&mut self) -> Option<(K, V)> {
537 // We own the table, and can thus do what we want with it. We'll simply pop from the
538 // buckets until we find a bucket containing data.
539 while let Some(bucket) = self.table.buckets.pop() {
540 // We can bypass dem ebil locks.
541 if let Bucket::Contains(key, val) = bucket.into_inner() {
542 // The bucket contained data, so we'll return the pair.
543 return Some((key, val));
544 }
545 }
546
547 // We've exhausted all the buckets, and no more data could be found.
548 None
549 }
550}
551
552impl<K, V, S> IntoIterator for Table<K, V, S> {
553 type Item = (K, V);
554 type IntoIter = IntoIter<K, V, S>;
555
556 fn into_iter(self) -> Self::IntoIter {
557 IntoIter { table: self }
558 }
559}
560
561/// A RAII guard for reading an entry of a hash map.
562///
563/// This is an access type dereferencing to the inner value of the entry. It will handle unlocking
564/// on drop.
565pub struct ReadGuard<'a, K: 'a, V: 'a, S> {
566 /// The inner hecking long type.
567 inner: OwningRef<
568 OwningHandle<RwLockReadGuard<'a, Table<K, V, S>>, RwLockReadGuard<'a, Bucket<K, V>>>,
569 V,
570 >,
571}
572
573impl<'a, K, V, S> ops::Deref for ReadGuard<'a, K, V, S> {
574 type Target = V;
575
576 fn deref(&self) -> &V {
577 &self.inner
578 }
579}
580
581impl<'a, K, V: PartialEq, S> cmp::PartialEq for ReadGuard<'a, K, V, S> {
582 fn eq(&self, other: &ReadGuard<'a, K, V, S>) -> bool {
583 self == other
584 }
585}
586impl<'a, K, V: Eq, S> cmp::Eq for ReadGuard<'a, K, V, S> {}
587
588impl<'a, K: fmt::Debug, V: fmt::Debug, S> fmt::Debug for ReadGuard<'a, K, V, S> {
589 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
590 write!(f, "ReadGuard({:?})", &**self)
591 }
592}
593
594/// A mutable RAII guard for reading an entry of a hash map.
595///
596/// This is an access type dereferencing to the inner value of the entry. It will handle unlocking
597/// on drop.
598pub struct WriteGuard<'a, K: 'a, V: 'a, S> {
599 /// The inner hecking long type.
600 inner: OwningHandle<
601 OwningHandle<RwLockReadGuard<'a, Table<K, V, S>>, RwLockWriteGuard<'a, Bucket<K, V>>>,
602 &'a mut V,
603 >,
604}
605
606impl<'a, K, V, S> ops::Deref for WriteGuard<'a, K, V, S> {
607 type Target = V;
608
609 fn deref(&self) -> &V {
610 &self.inner
611 }
612}
613
614impl<'a, K, V, S> ops::DerefMut for WriteGuard<'a, K, V, S> {
615 fn deref_mut(&mut self) -> &mut V {
616 &mut self.inner
617 }
618}
619
620impl<'a, K, V: PartialEq, S> cmp::PartialEq for WriteGuard<'a, K, V, S> {
621 fn eq(&self, other: &Self) -> bool {
622 self == other
623 }
624}
625impl<'a, K, V: Eq, S> cmp::Eq for WriteGuard<'a, K, V, S> {}
626
627impl<'a, K: fmt::Debug, V: fmt::Debug, S> fmt::Debug for WriteGuard<'a, K, V, S> {
628 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
629 write!(f, "WriteGuard({:?})", &**self)
630 }
631}
632
633/// A concurrent hash map.
634///
635/// This type defines a concurrent associative array, based on hash tables with linear probing and
636/// dynamic resizing.
637///
638/// The idea is to let each entry hold a multi-reader lock, effectively limiting lock contentions
639/// to writing simultaneously on the same entry, and resizing the table.
640///
641/// It is not an atomic or lockless hash table, since such construction is only useful in very few
642/// cases, due to limitations on in-place operations on values.
643pub struct CHashMap<K, V, S = RandomState> {
644 /// The inner table.
645 table: RwLock<Table<K, V, S>>,
646 /// The total number of KV pairs in the table.
647 ///
648 /// This is used to calculate the load factor.
649 len: AtomicUsize,
650}
651
652impl<K, V> CHashMap<K, V, RandomState> {
653 /// Create a new hash map with a certain capacity.
654 ///
655 /// "Capacity" means the amount of entries the hash map can hold before reallocating. This
656 /// function allocates a hash map with at least the capacity of `cap`.
657 pub fn with_capacity(cap: usize) -> Self {
658 CHashMap {
659 // Start at 0 KV pairs.
660 len: AtomicUsize::new(0),
661 // Make a new empty table. We will make sure that it is at least one.
662 table: RwLock::new(Table::with_capacity(cap)),
663 }
664 }
665
666 /// Create a new hash map.
667 ///
668 /// This creates a new hash map with some fixed initial capacity.
669 pub fn new() -> Self {
670 CHashMap::with_capacity(DEFAULT_INITIAL_CAPACITY)
671 }
672}
673
674impl<K, V, S> CHashMap<K, V, S> {
675 /// Get the number of entries in the hash table.
676 ///
677 /// This is entirely atomic, and will not acquire any locks.
678 ///
679 /// This is guaranteed to reflect the number of entries _at this particular moment.
680 pub fn len(&self) -> usize {
681 self.len.load(ORDERING)
682 }
683
684 /// Get the capacity of the hash table.
685 ///
686 /// The capacity is equal to the number of entries the table can hold before reallocating.
687 pub fn capacity(&self) -> usize {
688 cmp::max(MINIMUM_CAPACITY, self.buckets()) * MAX_LOAD_FACTOR_NUM / MAX_LOAD_FACTOR_DENOM
689 }
690
691 /// Get the number of buckets of the hash table.
692 ///
693 /// "Buckets" refers to the amount of potential entries in the inner table. It is different
694 /// from capacity, in the sense that the map cannot hold this number of entries, since it needs
695 /// to keep the load factor low.
696 pub fn buckets(&self) -> usize {
697 self.table.read().buckets.len()
698 }
699
700 /// Is the hash table empty?
701 pub fn is_empty(&self) -> bool {
702 self.len() == 0
703 }
704
705 /// Deprecated. Do not use.
706 #[deprecated]
707 pub fn filter<F>(&self, predicate: F)
708 where
709 F: Fn(&K, &V) -> bool,
710 {
711 // Following the naming conventions of the standard library...
712 self.retain(predicate)
713 }
714
715 /// Filter the map based on some predicate.
716 ///
717 /// This tests every entry in the hash map by closure `predicate`. If it returns `true`, the
718 /// map will retain the entry. If not, the entry will be removed.
719 ///
720 /// This won't lock the table. This can be a major performance trade-off, as it means that it
721 /// must lock on every table entry. However, it won't block other operations of the table,
722 /// while filtering.
723 pub fn retain<F>(&self, predicate: F)
724 where
725 F: Fn(&K, &V) -> bool,
726 {
727 // Acquire the read lock to the table.
728 let table = self.table.read();
729 // Run over every bucket and apply the filter.
730 for bucket in &table.buckets {
731 // Acquire the read lock, which we will upgrade if necessary.
732 // TODO: Use read lock and upgrade later.
733 let mut lock = bucket.write();
734 // Skip the free buckets.
735 // TODO: Fold the `if` into the `match` when the borrowck gets smarter.
736 if match *lock {
737 Bucket::Contains(ref key, ref val) => !predicate(key, val),
738 _ => false,
739 } {
740 // Predicate didn't match. Set the bucket to removed.
741 *lock = Bucket::Removed;
742 // Decrement the length to account for the removed bucket.
743 // TODO: Can we somehow bundle these up to reduce the overhead of atomic
744 // operations? Storing in a local variable and then subtracting causes
745 // issues with consistency.
746 self.len.fetch_sub(1, ORDERING);
747 }
748 }
749 }
750}
751
752impl<K, V, S: Default + BuildHasher> CHashMap<K, V, S> {
753 /// Creates an empty `CHashMap` with the specified capacity, using `hash_builder`
754 /// to hash the keys.
755 ///
756 /// The hash map will be able to hold at least `capacity` elements without
757 /// reallocating. If `capacity` is 0, the hash map will not allocate.
758 ///
759 /// Warning: `hash_builder` is normally randomly generated, and
760 /// is designed to allow HashMaps to be resistant to attacks that
761 /// cause many collisions and very poor performance. Setting it
762 /// manually using this function can expose a DoS attack vector.
763 ///
764 pub fn with_hasher_and_capacity(cap: usize, hash_builder: S) -> Self {
765 CHashMap {
766 // Start at 0 KV pairs.
767 len: AtomicUsize::new(0),
768 // Make a new empty table. We will make sure that it is at least one.
769 table: RwLock::new(Table::with_capacity_and_hasher(cap, hash_builder)),
770 }
771 }
772
773 /// Creates an empty `CHashMap` which will use the given hash builder to hash
774 /// keys.
775 ///
776 /// The created map has the default initial capacity.
777 ///
778 /// Warning: `hash_builder` is normally randomly generated, and
779 /// is designed to allow HashMaps to be resistant to attacks that
780 /// cause many collisions and very poor performance. Setting it
781 /// manually using this function can expose a DoS attack vector.
782 pub fn with_hasher(hash_builder: S) -> Self {
783 CHashMap::with_hasher_and_capacity(DEFAULT_INITIAL_CAPACITY, hash_builder)
784 }
785}
786
787impl<K, V, S> CHashMap<K, V, S>
788where
789 S: Default + BuildHasher,
790{
791 /// Clear the map.
792 ///
793 /// This clears the hash map and returns the previous version of the map.
794 ///
795 /// It is relatively efficient, although it needs to write lock a RW lock.
796 pub fn clear(&self) -> CHashMap<K, V, S> {
797 // Acquire a writable lock.
798 let mut lock = self.table.write();
799 CHashMap {
800 // Replace the old table with an empty initial table.
801 table: RwLock::new(mem::replace(
802 &mut *lock,
803 Table::with_hasher(DEFAULT_INITIAL_CAPACITY, S::default()),
804 )),
805 // Replace the length with 0 and use the old length.
806 len: AtomicUsize::new(self.len.swap(0, ORDERING)),
807 }
808 }
809}
810
811impl<K: PartialEq + Hash, V, S: BuildHasher> CHashMap<K, V, S> {
812 /// Get the value of some key.
813 ///
814 /// This will lookup the entry of some key `key`, and acquire the read-only lock. This means
815 /// that all other parties are blocked from _writing_ (not reading) this value while the guard
816 /// is held.
817 pub fn get<Q: ?Sized>(&self, key: &Q) -> Option<ReadGuard<K, V, S>>
818 where
819 K: Borrow<Q>,
820 Q: Hash + PartialEq,
821 {
822 // Acquire the read lock and lookup in the table.
823 if let Ok(inner) = OwningRef::new(OwningHandle::new_with_fn(self.table.read(), |x| {
824 unsafe { &*x }.lookup(key)
825 }))
826 .try_map(|x| x.value_ref())
827 {
828 // The bucket contains data.
829 Some(ReadGuard { inner: inner })
830 } else {
831 // The bucket is empty/removed.
832 None
833 }
834 }
835
836 /// Get the (mutable) value of some key.
837 ///
838 /// This will lookup the entry of some key `key`, and acquire the writable lock. This means
839 /// that all other parties are blocked from both reading and writing this value while the guard
840 /// is held.
841 pub fn get_mut<Q: ?Sized>(&self, key: &Q) -> Option<WriteGuard<K, V, S>>
842 where
843 K: Borrow<Q>,
844 Q: Hash + PartialEq,
845 {
846 // Acquire the write lock and lookup in the table.
847 if let Ok(inner) = OwningHandle::try_new(
848 OwningHandle::new_with_fn(self.table.read(), |x| unsafe { &*x }.lookup_mut(key)),
849 |x| {
850 if let &mut Bucket::Contains(_, ref mut val) =
851 unsafe { &mut *(x as *mut Bucket<K, V>) }
852 {
853 // The bucket contains data.
854 Ok(val)
855 } else {
856 // The bucket is empty/removed.
857 Err(())
858 }
859 },
860 ) {
861 Some(WriteGuard { inner: inner })
862 } else {
863 None
864 }
865 }
866
867 /// Does the hash map contain this key?
868 pub fn contains_key<Q: ?Sized>(&self, key: &Q) -> bool
869 where
870 K: Borrow<Q>,
871 Q: Hash + PartialEq,
872 {
873 // Acquire the lock.
874 let lock = self.table.read();
875 // Look the key up in the table
876 let bucket = lock.lookup(key);
877 // Test if it is free or not.
878 !bucket.is_free()
879 }
880}
881
882impl<K, V, S> CHashMap<K, V, S>
883where
884 K: PartialEq + Hash,
885 S: BuildHasher + Default,
886{
887 /// Insert a **new** entry.
888 ///
889 /// This inserts an entry, which the map does not already contain, into the table. If the entry
890 /// exists, the old entry won't be replaced, nor will an error be returned. It will possibly
891 /// introduce silent bugs.
892 ///
893 /// To be more specific, it assumes that the entry does not already exist, and will simply skip
894 /// to the end of the cluster, even if it does exist.
895 ///
896 /// This is faster than e.g. `insert`, but should only be used, if you know that the entry
897 /// doesn't already exist.
898 ///
899 /// # Warning
900 ///
901 /// Only use this, if you know what you're doing. This can easily introduce very complex logic
902 /// errors.
903 ///
904 /// For most other purposes, use `insert`.
905 ///
906 /// # Panics
907 ///
908 /// This might perform checks in debug mode testing if the key exists already.
909 pub fn insert_new(&self, key: K, val: V) {
910 debug_assert!(
911 !self.contains_key(&key),
912 "Hash table contains already key, contrary to \
913 the assumptions about `insert_new`'s arguments."
914 );
915
916 // Expand and lock the table. We need to expand to ensure the bounds on the load factor.
917 let lock = self.table.read();
918 {
919 // Find the free bucket.
920 let mut bucket = lock.find_free(&key);
921
922 // Set the bucket to the new KV pair.
923 *bucket = Bucket::Contains(key, val);
924 }
925 // Expand the table (we know beforehand that the entry didn't already exist).
926 self.expand(lock);
927 }
928
929 /// Replace an existing entry, or insert a new one.
930 ///
931 /// This will replace an existing entry and return the old entry, if any. If no entry exists,
932 /// it will simply insert the new entry and return `None`.
933 pub fn insert(&self, key: K, val: V) -> Option<V> {
934 let ret;
935 // Expand and lock the table. We need to expand to ensure the bounds on the load factor.
936 let lock = self.table.read();
937 {
938 // Lookup the key or a free bucket in the inner table.
939 let mut bucket = lock.lookup_or_free(&key);
940
941 // Replace the bucket.
942 ret = mem::replace(&mut *bucket, Bucket::Contains(key, val)).value();
943 }
944
945 // Expand the table if no bucket was overwritten (i.e. the entry is fresh).
946 if ret.is_none() {
947 self.expand(lock);
948 }
949
950 ret
951 }
952
953 /// Insert or update.
954 ///
955 /// This looks up `key`. If it exists, the reference to its value is passed through closure
956 /// `update`. If it doesn't exist, the result of closure `insert` is inserted.
957 pub fn upsert<F, G>(&self, key: K, insert: F, update: G)
958 where
959 F: FnOnce() -> V,
960 G: FnOnce(&mut V),
961 {
962 // Expand and lock the table. We need to expand to ensure the bounds on the load factor.
963 let lock = self.table.read();
964 {
965 // Lookup the key or a free bucket in the inner table.
966 let mut bucket = lock.lookup_or_free(&key);
967
968 match *bucket {
969 // The bucket had KV pair!
970 Bucket::Contains(_, ref mut val) => {
971 // Run it through the closure.
972 update(val);
973 // TODO: We return to stop the borrowck to yell at us. This prevents the control flow
974 // from reaching the expansion after the match if it has been right here.
975 return;
976 }
977 // The bucket was empty, simply insert.
978 ref mut x => *x = Bucket::Contains(key, insert()),
979 }
980 }
981
982 // Expand the table (this will only happen if the function haven't returned yet).
983 self.expand(lock);
984 }
985
986 /// Map or insert an entry.
987 ///
988 /// This sets the value associated with key `key` to `f(Some(old_val))` (if it returns `None`,
989 /// the entry is removed) if it exists. If it does not exist, it inserts it with value
990 /// `f(None)`, unless the closure returns `None`.
991 ///
992 /// Note that if `f` returns `None`, the entry of key `key` is removed unconditionally.
993 pub fn alter<F>(&self, key: K, f: F)
994 where
995 F: FnOnce(Option<V>) -> Option<V>,
996 {
997 // Expand and lock the table. We need to expand to ensure the bounds on the load factor.
998 let lock = self.table.read();
999 {
1000 // Lookup the key or a free bucket in the inner table.
1001 let mut bucket = lock.lookup_or_free(&key);
1002
1003 match mem::replace(&mut *bucket, Bucket::Removed) {
1004 Bucket::Contains(_, val) => {
1005 if let Some(new_val) = f(Some(val)) {
1006 // Set the bucket to a KV pair with the new value.
1007 *bucket = Bucket::Contains(key, new_val);
1008 // No extension required, as the bucket already had a KV pair previously.
1009 return;
1010 } else {
1011 // The old entry was removed, so we decrement the length of the map.
1012 self.len.fetch_sub(1, ORDERING);
1013 // TODO: We return as a hack to avoid the borrowchecker from thinking we moved a
1014 // referenced object. Namely, under this match arm the expansion after the match
1015 // statement won't ever be reached.
1016 return;
1017 }
1018 }
1019 _ => {
1020 if let Some(new_val) = f(None) {
1021 // The previously free cluster will get a KV pair with the new value.
1022 *bucket = Bucket::Contains(key, new_val);
1023 } else {
1024 return;
1025 }
1026 }
1027 }
1028 }
1029
1030 // A new entry was inserted, so naturally, we expand the table.
1031 self.expand(lock);
1032 }
1033
1034 /// Remove an entry.
1035 ///
1036 /// This removes and returns the entry with key `key`. If no entry with said key exists, it
1037 /// will simply return `None`.
1038 pub fn remove<Q: ?Sized>(&self, key: &Q) -> Option<V>
1039 where
1040 K: Borrow<Q>,
1041 Q: PartialEq + Hash,
1042 {
1043 // Acquire the read lock of the table.
1044 let lock = self.table.read();
1045
1046 // Lookup the table, mutably.
1047 let mut bucket = lock.lookup_mut(&key);
1048 // Remove the bucket.
1049 match &mut *bucket {
1050 // There was nothing to remove.
1051 &mut Bucket::Removed | &mut Bucket::Empty => None,
1052 // TODO: We know that this is a `Bucket::Contains` variant, but to bypass borrowck
1053 // madness, we do weird weird stuff.
1054 bucket => {
1055 // Decrement the length of the map.
1056 self.len.fetch_sub(1, ORDERING);
1057
1058 // Set the bucket to "removed" and return its value.
1059 mem::replace(bucket, Bucket::Removed).value()
1060 }
1061 }
1062 }
1063
1064 /// Reserve additional space.
1065 ///
1066 /// This reserves additional `additional` buckets to the table. Note that it might reserve more
1067 /// in order make reallocation less common.
1068 pub fn reserve(&self, additional: usize) {
1069 // Get the new length.
1070 let len = (self.len() + additional) * LENGTH_MULTIPLIER;
1071 // Acquire the write lock (needed because we'll mess with the table).
1072 let mut lock = self.table.write();
1073 // Handle the case where another thread has resized the table while we were acquiring the
1074 // lock.
1075 if lock.buckets.len() < len {
1076 // Swap the table out with a new table of desired size (multiplied by some factor).
1077 let table = mem::replace(
1078 &mut *lock,
1079 Table::with_capacity_and_hasher(len, S::default()),
1080 );
1081 // Fill the new table with the data from the old table.
1082 lock.fill(table);
1083 }
1084 }
1085
1086 /// Shrink the capacity of the map to reduce space usage.
1087 ///
1088 /// This will shrink the capacity of the map to the needed amount (plus some additional space
1089 /// to avoid reallocations), effectively reducing memory usage in cases where there is
1090 /// excessive space.
1091 ///
1092 /// It is healthy to run this once in a while, if the size of your hash map changes a lot (e.g.
1093 /// has a high maximum case).
1094 pub fn shrink_to_fit(&self) {
1095 // Acquire the write lock (needed because we'll mess with the table).
1096 let mut lock = self.table.write();
1097 // Swap the table out with a new table of desired size (multiplied by some factor).
1098 let table = mem::replace(
1099 &mut *lock,
1100 Table::with_capacity_and_hasher(self.len(), S::default()),
1101 );
1102 // Fill the new table with the data from the old table.
1103 lock.fill(table);
1104 }
1105
1106 /// Increment the size of the hash map and expand it so one more entry can fit in.
1107 ///
1108 /// This returns the read lock, such that the caller won't have to acquire it twice.
1109 fn expand(&self, lock: RwLockReadGuard<Table<K, V, S>>) {
1110 // Increment the length to take the new element into account.
1111 let len = self.len.fetch_add(1, ORDERING) + 1;
1112
1113 // Extend if necessary. We multiply by some constant to adjust our load factor.
1114 if len * MAX_LOAD_FACTOR_DENOM > lock.buckets.len() * MAX_LOAD_FACTOR_NUM {
1115 // Drop the read lock to avoid deadlocks when acquiring the write lock.
1116 drop(lock);
1117 // Reserve 1 entry in space (the function will handle the excessive space logic).
1118 self.reserve(1);
1119 }
1120 }
1121}
1122
1123impl<K, V, S> CHashMap<K, V, S>
1124where
1125 K: Clone,
1126{
1127 /// returns a vec of keys. Note that this does a clone operation.
1128 pub fn keys(&self) -> Vec<K> {
1129 let mut result = Vec::<K>::with_capacity(self.len());
1130 let lock = self.table.read();
1131 {
1132 for bucket in lock.buckets.iter() {
1133 {
1134 if let Bucket::Contains(ref key, _) = *bucket.read() {
1135 result.push(key.clone());
1136 }
1137 }
1138 }
1139 }
1140 result
1141 }
1142}
1143
1144impl<K: Serialize, V: Serialize, S> Serialize for CHashMap<K, V, S> {
1145 fn serialize<T>(&self, serializer: T) -> Result<T::Ok, T::Error>
1146 where
1147 T: Serializer,
1148 {
1149 let lock = self.table.read();
1150 let mut map_serializer = serializer.serialize_map(Some(self.len()))?;
1151 {
1152 for bucket in lock.buckets.iter() {
1153 {
1154 if let Bucket::Contains(ref key, ref value) = *bucket.read() {
1155 map_serializer.serialize_entry(key, value)?;
1156 }
1157 }
1158 }
1159 }
1160 map_serializer.end()
1161 }
1162}
1163
1164impl<'de, K: Deserialize<'de> + PartialEq + Hash, V: Deserialize<'de>, S> Deserialize<'de>
1165 for CHashMap<K, V, S>
1166where
1167 S: Default + BuildHasher,
1168{
1169 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
1170 where
1171 D: Deserializer<'de>,
1172 {
1173 deserializer.deserialize_map(ChashMapVisitor::new())
1174 }
1175}
1176
1177impl<K, V, S: Default + BuildHasher> Default for CHashMap<K, V, S> {
1178 fn default() -> Self {
1179 // Forward the call to `new`.
1180 CHashMap::with_hasher(S::default())
1181 }
1182}
1183
1184impl<K: Clone, V: Clone, S: Clone> Clone for CHashMap<K, V, S> {
1185 fn clone(&self) -> Self {
1186 CHashMap {
1187 table: RwLock::new(self.table.read().clone()),
1188 len: AtomicUsize::new(self.len.load(ORDERING)),
1189 }
1190 }
1191}
1192
1193impl<K: fmt::Debug, V: fmt::Debug, S> fmt::Debug for CHashMap<K, V, S> {
1194 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1195 (*self.table.read()).fmt(f)
1196 }
1197}
1198
1199impl<K, V, S> IntoIterator for CHashMap<K, V, S> {
1200 type Item = (K, V);
1201 type IntoIter = IntoIter<K, V, S>;
1202
1203 fn into_iter(self) -> IntoIter<K, V, S> {
1204 self.table.into_inner().into_iter()
1205 }
1206}
1207
1208impl<K: PartialEq + Hash, V, S: Default + BuildHasher> iter::FromIterator<(K, V)>
1209 for CHashMap<K, V, S>
1210{
1211 fn from_iter<I: IntoIterator<Item = (K, V)>>(iter: I) -> Self {
1212 // TODO: This step is required to obtain the length of the iterator. Eliminate it.
1213 let vec: Vec<_> = iter.into_iter().collect();
1214 let len = vec.len();
1215
1216 // Start with an empty table.
1217 let mut table = Table::with_capacity_and_hasher(len, S::default());
1218 // Fill the table with the pairs from the iterator.
1219 for (key, val) in vec {
1220 // Insert the KV pair. This is fine, as we are ensured that there are no duplicates in
1221 // the iterator.
1222 let bucket = table.find_free_no_lock(&key);
1223 *bucket = Bucket::Contains(key, val);
1224 }
1225
1226 CHashMap {
1227 table: RwLock::new(table),
1228 len: AtomicUsize::new(len),
1229 }
1230 }
1231}
1232
1233struct ChashMapVisitor<K, V, S = RandomState> {
1234 marker: PhantomData<fn() -> CHashMap<K, V, S>>,
1235}
1236
1237impl<K, V, S> ChashMapVisitor<K, V, S> {
1238 fn new() -> Self {
1239 ChashMapVisitor {
1240 marker: PhantomData,
1241 }
1242 }
1243}
1244
1245impl<'de, K, V, S> Visitor<'de> for ChashMapVisitor<K, V, S>
1246where
1247 K: Deserialize<'de> + PartialEq + Hash,
1248 V: Deserialize<'de>,
1249 S: Default + BuildHasher,
1250{
1251 type Value = CHashMap<K, V, S>;
1252
1253 fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
1254 formatter.write_str("CHashMap")
1255 }
1256
1257 fn visit_map<M>(self, mut access: M) -> Result<Self::Value, M::Error>
1258 where
1259 M: MapAccess<'de>,
1260 {
1261 let map = CHashMap::<K, V, S>::with_hasher_and_capacity(
1262 access.size_hint().unwrap_or(0),
1263 S::default(), // this should not be default but I don't know a way around it.
1264 );
1265
1266 while let Some((key, value)) = access.next_entry()? {
1267 map.insert(key, value);
1268 }
1269
1270 Ok(map)
1271 }
1272}