ahtable/lib.rs
1//! An implementation of ArrayHash
2//!
3//! ArrayHash is a data structure where the index is determined by hash and
4//! each entry in array is a `Vec` that store data and all it collision.
5//!
6//! Oritinal paper can be found [here](Askitis, N. & Zobel, J. (2005), Cache-conscious collision resolution for string hash tables, in ‘Proc. SPIRE String Processing and Information Retrieval Symp.’, Springer-Verlag, pp. 92–104)
7//!
8//! This implementation try to use generic wherever possible.
9//! It end up with ArrayHash that take anything that is clonable as value and anything that
10//! implement `Hash` and `PartialEq` as key.
11//! It let you choose whichever `Hasher` that you want. The only constraint is that
12//! `Hasher` must implement `Clone` trait.
13//!
14//! It supports read only iteration, mutably iteration, and owned iteration.
15//!
16//! To create [ArrayHash](struct.ArrayHash.html) use [ArrayHashBuilder](struct.ArrayHashBuilder.html).
17//! The default `Hasher` is `XxHasher64`.
18
19use twox_hash::{RandomXxHashBuilder64, XxHash64};
20use core::hash::{BuildHasher, Hash, Hasher};
21use core::borrow::Borrow;
22
23const MAX_LOAD_FACTOR: usize = 100_000; // Number of element before resize the table
24
25// Make each bucket fit into single memory page
26const DEFAULT_BUCKETS_SIZE: usize = 4096 / std::mem::size_of::<usize>();
27const DEFAULT_SLOT_SIZE: usize = 8;
28
29/// A builder that use for build an [ArrayHash](struct.ArrayHash.html).
30#[derive(Clone, Hash, PartialEq)]
31pub struct ArrayHashBuilder<H> {
32 hasher: H,
33 buckets_size: usize,
34 max_load_factor: usize,
35 slot_size: usize
36}
37
38/// Create new ArrayHashBuilder with default hasher and size.
39/// As currently is, the default allocated number of slot per bucket is (4096 / size of usize) slots.
40/// Each slot has 8 elements. It will use `XxHash64` as default hasher
41///
42/// Since all slots are Vec, it will be re-allocate if it grow larger than this default.
43/// The number of slots per bucket will be held until the number of entry grew pass `max_load_factor`.
44/// When it reach the `max_load_factor`, it will double the bucket size.
45impl Default for ArrayHashBuilder<XxHash64> {
46 fn default() -> ArrayHashBuilder<XxHash64> {
47 ArrayHashBuilder {
48 hasher: RandomXxHashBuilder64::default().build_hasher(),
49 buckets_size: DEFAULT_BUCKETS_SIZE,
50 max_load_factor: MAX_LOAD_FACTOR,
51 slot_size: DEFAULT_SLOT_SIZE
52 }
53 }
54}
55
56/// Make [ArrayHashBuilder](struct.ArrayHashBuilder.html) with spec from existing [ArrayHash](struct.ArrayHash.html).
57///
58/// This is useful for creating an object of array hash that may be later compare to baseline array for equality.
59impl<'a, H, K, V> From<&'a ArrayHash<H, K, V>> for ArrayHashBuilder<H> where H: Clone + Hasher, K: Hash + PartialEq {
60 fn from(ah: &'a ArrayHash<H, K, V>) -> ArrayHashBuilder<H> {
61 let buckets = ah.buckets.as_ref().unwrap();
62
63 debug_assert!(buckets.len() > 0);
64
65 ArrayHashBuilder {
66 buckets_size: buckets.len(),
67 hasher: ah.hasher.clone(),
68 max_load_factor: ah.max_load_factor,
69 slot_size: buckets[0].len()
70 }
71 }
72}
73
74impl<H> ArrayHashBuilder<H> where H: core::hash::Hasher {
75 /// Create new ArrayHashBuilder by using given hasher.
76 /// As currently is, the default allocated number of slot per bucket is (4096 / size of usize) slots.
77 ///
78 /// Since all slots are Vec, it will be re-allocate if it grow larger than this default.
79 #[inline]
80 pub fn with_hasher(hasher: H) -> ArrayHashBuilder<H> {
81 ArrayHashBuilder {
82 hasher: hasher,
83 buckets_size: DEFAULT_BUCKETS_SIZE,
84 max_load_factor: MAX_LOAD_FACTOR,
85 slot_size: DEFAULT_SLOT_SIZE
86 }
87 }
88
89 /// Switch hasher to other hasher. This will consume current builder and
90 /// return a new one with new builder
91 #[inline]
92 pub fn hasher<H2>(self, hasher: H2) -> ArrayHashBuilder<H2> {
93 ArrayHashBuilder {
94 hasher,
95 buckets_size: self.buckets_size,
96 max_load_factor: self.max_load_factor,
97 slot_size: self.slot_size
98 }
99 }
100
101 /// Change buckets size of [ArrayHasher](struct.ArrayHasher.html).
102 /// Buckets size scale once max_load_factor is reached.
103 /// The new size after it scaled is double of old size.
104 ///
105 /// # Parameter
106 /// `size` - A number of buckets in this table. It must be greater than 0.
107 pub fn buckets_size(mut self, size: usize) -> Self {
108 debug_assert!(size > 0);
109 self.buckets_size = size;
110 self
111 }
112
113 /// Change max number of entry before double the buckets size.
114 ///
115 /// # Parameter
116 /// `factor` - A number of item before it double the bucket size.
117 pub fn max_load_factor(mut self, factor: usize) -> Self {
118 debug_assert!(factor > 0);
119 self.max_load_factor = factor;
120 self
121 }
122
123 /// Default initialized slot size. Every slot in bucket will be
124 /// allocated by given size.
125 /// Keep in mind that each slot is a `vec`. It can grow pass this
126 /// number. It'd be best to give a proper estimation to prevent unnecessary
127 /// re-allocation.
128 ///
129 /// # Parameter
130 /// `size` - A default size for each slot.
131 pub fn slot_size(mut self, size: usize) -> Self {
132 self.slot_size = size;
133 self
134 }
135
136 /// Consume this builder and construct a new [ArrayHash](struct.ArrayHash.html)
137 pub fn build<K, V>(self) -> ArrayHash<H, K, V> where H: core::hash::Hasher + Clone, K: core::hash::Hash + core::cmp::PartialEq {
138 ArrayHash {
139 buckets: Some((0..self.buckets_size).map(|_| Vec::with_capacity(self.slot_size)).collect()),
140 hasher: self.hasher,
141 capacity: self.buckets_size,
142 max_load_factor: self.max_load_factor,
143 size: 0
144 }
145 }
146}
147
148/// An implementation of ArrayHash in pure Rust.
149///
150/// ArrayHash is a data structure where the index is determined by hash and
151/// each entry in array is a `Vec` that store data and all it collision.
152///
153/// Oritinal paper can be found [here](Askitis, N. & Zobel, J. (2005), Cache-conscious collision resolution for string hash tables, in ‘Proc. SPIRE String Processing and Information Retrieval Symp.’, Springer-Verlag, pp. 92–104)
154///
155/// In this implementation, user can supplied their own choice of hasher but it need to implement `Clone`.
156///
157/// The data can be anything that implement `Hash`.
158///
159/// The default `Hasher`, if not provided, will be `XxHash64`.
160#[derive(Clone, Debug)]
161pub struct ArrayHash<H, K, V> where H: core::hash::Hasher + Clone, K: core::hash::Hash + PartialEq {
162 buckets: Option<Box<[Vec<(K, V)>]>>,
163 hasher: H,
164 capacity: usize,
165 max_load_factor: usize,
166 size: usize
167}
168
169/// Generalize implementation that let two array hash of different type of key and value to be
170/// comparable.
171/// It requires that both side must use the same hasher with exactly same seed.
172/// It rely on a contract that if any `K1 == K2` then it mean those two hash is also
173/// equals.
174impl<H, K1, V1, K2, V2> PartialEq<ArrayHash<H, K1, V1>> for ArrayHash<H, K2, V2>
175where H: Clone + Hasher + PartialEq,
176 K1: Hash + PartialEq + PartialEq<K2>,
177 V1: PartialEq<V2>,
178 K2: Hash + PartialEq
179{
180 fn eq(&self, rhs: &ArrayHash<H, K1, V1>) -> bool {
181 self.is_hasher_eq(rhs) &&
182 self.capacity == rhs.capacity &&
183 self.size == rhs.size &&
184 rhs.buckets.as_deref().unwrap().iter().zip(self.buckets.as_deref().unwrap().iter()).all(|(rhs, lhs)| {
185 rhs.len() == lhs.len() && // Guard case where one side is prefix array of another
186 rhs.iter().zip(lhs.iter()).all(|((k1, v1), (k2, v2))| {k1 == k2 && v1 == v2})
187 })
188 }
189}
190
191/// Implement hash by using only `K` and `V` pair to calculate hash.
192/// It will still conform to following contract:
193/// For any `AH1 == AH2`, `hash(AH1) == hash(AH2)`.
194///
195/// This is because the `PartialEq` implementation check if both side have
196/// the same size, same hasher, same number of items, and exactly the same
197/// key and value pair returned by each yield of both AH1's and AH2's iterator.
198/// However, hasher contract need no symmetric property.
199/// It mean that `hash(AH1) == hash(AH2) ` doesn't imply that `AH1 == AH2`.
200/// Thus, we will have the same hash if we iterate through array and hash both key and value
201/// for each yield on both `AH1` and `AH2`.
202impl<H, K, V> Hash for ArrayHash<H, K, V> where H: Hasher + Clone, K: Hash + PartialEq, V: Hash {
203 fn hash<H2: Hasher>(&self, hasher: &mut H2) {
204 // Since rust iterate on slice to hash and `Box<T>` is hash by deref into `T`,
205 // we can simply hash on `self.buckets`. It is equals to iterate on each
206 // inner element then hash each individual of it.
207 self.buckets.hash(hasher);
208 }
209}
210
211impl<H, K, V> ArrayHash<H, K, V> where H: core::hash::Hasher + Clone, K: core::hash::Hash + PartialEq {
212
213 /// Make a builder with default specification equals to current specification of this
214 /// array.
215 ///
216 /// Note: The current specification of array may be different from the spec used to create
217 /// the array. For example, if original `max_load_factor` is `2` but 3 elements were put
218 /// into array, the `max_load_factor` will be `4` and the `bucket_size` will be double of
219 /// the original `bucket_size`.
220 #[inline]
221 pub fn to_builder(&self) -> ArrayHashBuilder<H> {
222 ArrayHashBuilder::from(self)
223 }
224
225 /// Check if two array use the same hasher with exactly same seed.
226 /// This mean that value `A == B` on these two array will have exactly the same hash value.
227 #[inline]
228 pub fn is_hasher_eq<K2, V2>(&self, rhs: &ArrayHash<H, K2, V2>) -> bool
229 where H: PartialEq,
230 K2: Hash + PartialEq
231 {
232 self.hasher == rhs.hasher
233 }
234
235 /// Add or replace entry into this `HashMap`.
236 /// If entry is replaced, it will be return in `Option`.
237 /// Otherwise, it return `None`
238 ///
239 /// # Parameter
240 /// - `entry` - A tuple to be add to this.
241 ///
242 /// # Return
243 /// Option that contains tuple of (key, value) that got replaced or `None` if it is
244 /// new entry
245 pub fn put(&mut self, key: K, value: V) -> Option<(K, V)> {
246 let mut index = self.make_key(&key);
247 let result;
248
249 if let Some(i) = self.buckets.as_mut().unwrap()[index].iter().position(|(k, _)| *k == key) {
250 result = Some(self.buckets.as_mut().unwrap()[index].swap_remove(i));
251 } else {
252 self.size += 1;
253
254 if self.maybe_expand() {
255 index = self.make_key(&key);
256 }
257
258 result = None
259 }
260
261 self.buckets.as_mut().unwrap()[index].push((key, value));
262
263 result
264 }
265
266 /// Try to put value into this `ArrayHash`.
267 /// If the given key is already in used, leave old entry as is
268 /// and return key/value along with current reference to value associated with the key.
269 /// Otherwise, add entry to this `ArrayHash` and return reference to current value.
270 ///
271 /// # Parameter
272 /// - `entry` - A tuple of (key, value) to be add to this.
273 /// # Return
274 /// It return `Ok(&V)` if key/value were put into this collection.
275 /// Otherwise, it return `Err((K, V, &V))` where `K` is given key,
276 /// `V` is given value, and `&V` is reference to current value associated
277 /// with given key
278 pub fn try_put(&mut self, key: K, value: V) -> Result<&V, (K, V, &V)> {
279 let mut index = self.make_key(&key);
280
281 if let Some(i) = self.buckets.as_ref().unwrap()[index].iter().position(|(k, _)| *k == key) {
282 Err((key, value, &self.buckets.as_ref().unwrap()[index][i].1))
283 } else {
284 self.size += 1;
285 if self.maybe_expand() {
286 index = self.make_key(&key);
287 }
288 let bucket = &mut self.buckets.as_mut().unwrap()[index];
289 bucket.push((key, value));
290 Ok(&bucket[bucket.len() - 1].1)
291 }
292 }
293
294 /// Get a value of given key from this `ArrayHash` relying on contractual
295 /// implementation of `PartialEq` and `Hash` where following contract applied:
296 /// - for any `A == B` then `B == A` then hash of `A` == hash of `B`
297 /// - for any `A == B` and `B == C` then `A == C` then hash of `A` == hash of `B` == hash of `C`
298 ///
299 /// # Parameter
300 /// - `key` - A key to look for.
301 ///
302 /// # Return
303 /// An `Option` contains a value or `None` if it is not found.
304 pub fn get<T>(&self, key: &T) -> Option<&V> where T: core::hash::Hash + PartialEq<K> {
305 let index = self.make_key(&key);
306 let slot = &self.buckets.as_ref().unwrap()[index];
307 return slot.iter().find(|(k, _)| {key == k}).map(|(_, v)| v)
308
309 // for (ref k, ref v) in slot.iter() {
310 // if *k == *key {
311 // return Some(v)
312 // }
313 // }
314 // None
315 }
316
317 /// Get a value using deref type.
318 ///
319 /// This is usable only if the key is a type of smart pointer that can be deref into another type
320 /// which implement `Hash` and `PartialEq`.
321 ///
322 /// For example, if K is `Box<[u8]>`, you can use `&[u8]` to query for a value
323 ///
324 /// # Parameter
325 /// `key` - Any type that implement `Deref` where type after `Deref` is that same type
326 /// as actual type of `key` beneath type `K`.
327 ///
328 /// # Return
329 /// `Some(&V)` if key exist in this table. Otherwise None.
330 pub fn smart_get<T, Q>(&self, key: Q) -> Option<&V> where Q: core::ops::Deref<Target=T>, K: core::ops::Deref<Target=T>, T: core::hash::Hash + core::cmp::PartialEq {
331 let index = self.make_key(&*key);
332
333 let slot = &self.buckets.as_ref().unwrap()[index];
334 slot.iter().find(|(k, _)| **k == *key).map(|(_, v)| v)
335
336 // for (ref k, ref v) in slot.iter() {
337 // if **k == *key {
338 // return Some(v)
339 // }
340 // }
341 // None
342 }
343
344 /// Get a value of given key from this `ArrayHash` relying on contractual
345 /// implementation of `PartialEq` and `Hash` where following contract applied:
346 /// - `B` can be borrowed as type `A`
347 /// - for any `A == B` then `B == A` then hash of `A` == hash of `B`
348 /// - for any `A == B` and `B == C` then `A == C` then hash of `A` == hash of `B` == hash of `C`
349 /// - for any `&A == &B` then `A == B`
350 ///
351 /// It is useful for case where the stored key and query key is different type but the stored key
352 /// can be borrow into the same type as query. For example, stored `Vec<T>` but query with `&[T]`.
353 /// It isn't possible to use [get](struct.ArrayHash.html#method.get) method as `[T]`
354 /// isn't implement `PartialEq<Vec<T>>`.
355 ///
356 /// # Parameter
357 /// - `key` - A key to look for.
358 ///
359 /// # Return
360 /// An `Option` contains a value or `None` if it is not found.
361 pub fn coerce_get<T>(&self, key: &T) -> Option<&V> where T: core::hash::Hash + PartialEq + ?Sized, K: Borrow<T> {
362 let index = self.make_key(key);
363 let slot = &self.buckets.as_ref().unwrap()[index];
364 return slot.iter().find(|(k, _)| {key == k.borrow()}).map(|(_, v)| v)
365
366 // for (ref k, ref v) in slot.iter() {
367 // if *k == *key {
368 // return Some(v)
369 // }
370 // }
371 // None
372 }
373
374 /// Attempt to remove entry with given key from this `ArrayHash` relying on contractual
375 /// implementation of `PartialEq` and `Hash` where following contract applied:
376 /// - for any `A == B` then `B == A` then hash of `A` == hash of `B`
377 /// - for any `A == B` and `B == C` then `A == C` then hash of `A` == hash of `B` == hash of `C`
378 ///
379 /// # Parameter
380 /// - `key` - A key of entry to be remove.
381 ///
382 /// # Return
383 /// Option that contain tuple of (key, value) or `None` of key is not found
384 pub fn remove<T>(&mut self, key: &T) -> Option<(K, V)> where T: core::hash::Hash + PartialEq<K> {
385 let slot_idx = self.make_key(key);
386 let slot = self.buckets.as_mut().unwrap();
387 let entry_idx = slot[slot_idx].iter().position(|(k, _)| {key == k});
388 if let Some(i) = entry_idx {
389 self.size -= 1;
390 Some(slot[slot_idx].swap_remove(i))
391 } else {
392 None
393 }
394 }
395
396 /// Attempt to remove entry with given key from this `ArrayHash`.
397 ///
398 /// This is usable only if the key is a type of smart pointer that can be deref into another type
399 /// which implement `Hash` and `PartialEq`.
400 ///
401 /// For example, if K is `Box<[u8]>`, you can use `&[u8]` to remove it.
402 ///
403 /// # Parameter
404 /// - `key` - A key of entry to be remove.
405 ///
406 /// # Return
407 /// Option that contain tuple of (key, value) or `None` of key is not found
408 pub fn smart_remove<Q, T>(&mut self, key: Q) -> Option<(K, V)> where Q: core::ops::Deref<Target=T>, K: core::ops::Deref<Target=T>, T: core::hash::Hash + core::cmp::PartialEq {
409 let slot_idx = self.make_key(&*key);
410 let slot = self.buckets.as_mut().unwrap();
411 let entry_idx = slot[slot_idx].iter().position(|(k, _)| {*key == **k});
412 if let Some(i) = entry_idx {
413 self.size -= 1;
414 Some(slot[slot_idx].swap_remove(i))
415 } else {
416 None
417 }
418 }
419
420 /// Attempt to remove entry with given key from this `ArrayHash` relying on contractual
421 /// implementation of `PartialEq` and `Hash` where following contract applied:
422 /// - `B` can be borrowed as type `A`
423 /// - for any `A == B` then `B == A` then hash of `A` == hash of `B`
424 /// - for any `A == B` and `B == C` then `A == C` then hash of `A` == hash of `B` == hash of `C`
425 /// - for any `&A == &B` then `A == B`
426 ///
427 /// It is useful for case where the stored key and query key is different type but the stored key
428 /// can be borrow into the same type as query. For example, stored `Vec<T>` but query with `&[T]`.
429 /// It isn't possible to use [remove](struct.ArrayHash.html#method.remove) method as `[T]`
430 /// isn't implement `PartialEq<Vec<T>>`.
431 ///
432 /// # Parameter
433 /// - `key` - A key of entry to be remove.
434 ///
435 /// # Return
436 /// Option that contain tuple of (key, value) or `None` of key is not found
437 pub fn coerce_remove<T>(&mut self, key: &T) -> Option<(K, V)> where T: core::hash::Hash + PartialEq + ?Sized, K: Borrow<T> {
438 let slot_idx = self.make_key(key);
439 let slot = self.buckets.as_mut().unwrap();
440 let entry_idx = slot[slot_idx].iter().position(|(k, _)| {key == k.borrow()});
441 if let Some(i) = entry_idx {
442 self.size -= 1;
443 Some(slot[slot_idx].swap_remove(i))
444 } else {
445 None
446 }
447 }
448
449 /// Current number of entry in this `ArrayHash`
450 #[inline]
451 pub fn len(&self) -> usize {
452 self.size
453 }
454
455 /// Check if this array hash contains every entry found in given other iterator that yield
456 /// `&(K, V)` and `V` implements `PartialEq`.
457 ///
458 /// # Parameter
459 /// - `other` - A type that implement `IntoIterator<Item=&(K, V)>`
460 ///
461 /// # Return
462 /// true if this array hash contains every entry that other iterator yield. Otherwise, false.
463 pub fn contains_iter<'a, I>(&self, other: I) -> bool where I: IntoIterator<Item=&'a (K, V)>, K: 'a, V: 'a + PartialEq {
464 for (key, value) in other.into_iter() {
465 if let Some(v) = self.get(key) {
466 if v != value {
467 return false
468 }
469 } else {
470 return false
471 }
472 }
473
474 true
475 }
476
477 /// Get an iterator over this `ArrayHash`.
478 ///
479 /// # Return
480 /// [ArrayHashIterator](struct.ArrayHashIterator.html) that return reference
481 /// to each entry in this `ArrayHash`
482 pub fn iter(&self) -> ArrayHashIterator<'_, K, V> {
483 let slots = self.buckets.as_ref().unwrap();
484 let mut buckets = slots.iter();
485 let first_iter = buckets.next().unwrap().iter();
486 ArrayHashIterator {
487 buckets: buckets,
488 current_iterator: first_iter,
489 size: self.size
490 }
491 }
492
493 /// Get a mutable iterator over this `ArrayHash`.
494 ///
495 /// Warning, you shall not modify the key part of entry. If you do, it might end up
496 /// accessible only by iterator but not with [get](struct.ArrayHash.html#method.get).
497 ///
498 /// # Return
499 /// [ArrayHashIterMut](struct.ArrayHashIterMut.html) that return mutably reference
500 /// to each entry in this `ArrayHash`
501 pub fn iter_mut(&mut self) -> ArrayHashIterMut<'_, K, V> {
502 if self.size > 0 {
503 let buckets: Box<[core::slice::IterMut<(K, V)>]> = self.buckets.as_mut().unwrap().iter_mut().filter_map(|slot| {
504 if slot.len() > 0 {Some(slot.iter_mut())} else {None}
505 }).collect();
506 let remain_slots = buckets.len() - 1;
507 ArrayHashIterMut {
508 // Only get iter_mut from entry with some element
509 buckets,
510 remain_slots, // similar to immutable iter, 0 index is already in process
511 slot_cursor: 0,
512 size: self.size
513 }
514 } else {
515 ArrayHashIterMut {
516 // Create an empty iterator so it will be called only once then finish.
517 // We cannot use iter::empty() as the type is incompatible.
518 buckets: vec![[].iter_mut()].into_boxed_slice(),
519 remain_slots: 0,
520 slot_cursor: 0,
521 size: self.size
522 }
523 }
524 }
525
526 /// Return an iterator that drain all entry out of this [ArrayHash](struct.ArrayHash.html).
527 ///
528 /// After the iterator is done, this [ArrayHash](struct.ArrayHash.html) will become empty.
529 ///
530 /// This method will immediately set size to 0.
531 ///
532 /// # Return
533 /// [DrainIter](struct.DrainIter.html) - An iterator that will drain all element
534 /// out of this [ArrayHash](struct.ArrayHash.html).
535 pub fn drain(&mut self) -> DrainIter<K, V> {
536 let mut bucket_iter = self.buckets.as_mut().unwrap().iter_mut();
537 let current_slot = bucket_iter.next();
538 self.size = 0;
539
540 DrainIter {
541 bucket_iter,
542 current_slot,
543 size: self.size
544 }
545 }
546
547 /// Return an iterator that drain some entry out of this [ArrayHash](struct.ArrayHash.html).
548 ///
549 /// After the iterator is done, this [ArrayHash](struct.ArrayHash.html) size will be shrink.
550 ///
551 /// This method will return an iterator where each element it drain will cause a size deduction
552 /// on this [ArrayHash](struct.ArrayHash.html).
553 ///
554 /// # Return
555 /// [DrainWithIter](struct.DrainWithIter.html) - An iterator that will drain all element
556 /// out of this [ArrayHash](struct.ArrayHash.html).
557 pub fn drain_with<F>(&mut self, pred: F) -> DrainWithIter<F, K, V> where F: Fn(&(K, V)) -> bool {
558 let mut bucket_iter = self.buckets.as_mut().unwrap().iter_mut();
559 let current_slot = bucket_iter.next();
560 let size = self.size; // Max size of iterator
561
562 DrainWithIter {
563 bucket_iter,
564 cur_size: &mut self.size,
565 current_slot,
566 predicate: pred,
567 size
568 }
569 }
570
571 /// Split this [ArrayHash](struct.ArrayHash.html) by given predicate closure.
572 /// Every element that closure evaluate to true will be remove from this [ArrayHash](struct.ArrayHash.html)
573 /// and return in new instance of [ArrayHash](struct.ArrayHash.html).
574 ///
575 /// This is different from using [drain_with](struct.ArrayHash.html#method.drain_with) to drain
576 /// some element into another [ArrayHash](struct.ArrayHash.html) by this method will return
577 /// [ArrayHash](struct.ArrayHash.html) with exactly identical property, i.e. Hasher, buckets_size,
578 /// and max_load_factor, whereas [drain_with](struct.ArrayHash.html#method.drain_with) let
579 /// caller instantiate [ArrayHash](struct.ArrayHash.html) yourself.
580 ///
581 /// Since the instant it returns, use the same Hasher. It is safe to assume that all elements shall
582 /// reside in the same bucket number thus this method speed up the split by ignore hashing altogether and
583 /// store the entry directly into the same bucket number as in this [ArrayHash](struct.ArrayHash.html)
584 ///
585 /// # Parameter
586 /// `pred` - A closure that evaluate an entry. If it return true, the entry shall be moved into a new
587 /// [ArrayHash](struct.ArrayHash.html).
588 ///
589 /// # Return
590 /// An [ArrayHash](struct.ArrayHash.html) that contains all entry that `pred` evaluate to true.
591 pub fn split_by<F>(&mut self, pred: F) -> ArrayHash<H, K, V> where F: Fn(&(K, V)) -> bool {
592 let mut other = self.to_builder().build();
593 let buckets = self.buckets.as_mut().unwrap();
594 for i in 0..buckets.len() {
595 let mut j = 0;
596
597 loop {
598 if j >= buckets[i].len() {
599 break;
600 }
601 if pred(&buckets[i][j]) {
602 other.buckets.as_mut().unwrap()[i].push(buckets[i].swap_remove(j));
603 other.size += 1;
604 self.size -= 1;
605 } else {
606 j += 1;
607 }
608 }
609 }
610
611 other
612 }
613
614 /// Since version 0.1.3, any type is acceptable as long as it implements `Hash`.
615 #[inline(always)]
616 fn make_key<T>(&self, key: &T) -> usize where T: core::hash::Hash + ?Sized {
617 let mut local_hasher = self.hasher.clone();
618 key.hash(&mut local_hasher);
619 local_hasher.finish() as usize % self.capacity
620 }
621
622 /// Check if it over scaling threshold. If it is, expand the bucket, rehash all the key, and
623 /// put everything to it new place.
624 /// # Return
625 /// true if it was expanded, false if it doesn't need to expand
626 #[inline(always)]
627 fn maybe_expand(&mut self) -> bool {
628 if self.size < self.max_load_factor {
629 return false
630 }
631
632 let old_buckets = self.buckets.take().unwrap().into_vec();
633 let new_capacity = self.capacity * 2;
634 self.capacity = new_capacity;
635 self.max_load_factor *= 2;
636 // Assume hash is evenly distribute entry in bucket, the new slot size shall be <= old slot size.
637 // This is because the bucket size is doubled.
638 let mut buckets: Vec<Vec<(K, V)>> = (0..new_capacity).map(|_| Vec::with_capacity(old_buckets[0].len())).collect();
639 old_buckets.into_iter().for_each(|slot| {
640 for (key, value) in slot {
641 let index = self.make_key(&key);
642 buckets[index % new_capacity].push((key, value));
643 }
644 });
645 self.buckets = Some(buckets.into_boxed_slice());
646
647 true
648 }
649}
650
651/// An iterator that return a reference to each entry in `ArrayHash`.
652/// It is useful for scanning entire `ArrayHash`.
653#[derive(Debug)]
654pub struct ArrayHashIterator<'a, K, V> where K: core::hash::Hash + core::cmp::PartialEq {
655 buckets: core::slice::Iter<'a, Vec<(K, V)>>,
656 current_iterator: core::slice::Iter<'a, (K, V)>,
657 size: usize
658}
659
660impl<'a, K, V> Iterator for ArrayHashIterator<'a, K, V> where K: core::hash::Hash + core::cmp::PartialEq {
661 type Item=&'a (K, V);
662
663 fn next(&mut self) -> Option<Self::Item> {
664 let mut result = self.current_iterator.next();
665
666 while result.is_none() {
667 if let Some(slot) = self.buckets.next() {
668 self.current_iterator = slot.iter();
669 result = self.current_iterator.next();
670 } else {
671 break
672 }
673 }
674
675 result
676 }
677}
678
679impl<'a, K, V> core::iter::FusedIterator for ArrayHashIterator<'a, K, V> where K: core::hash::Hash + core::cmp::PartialEq {}
680
681impl<'a, K, V> core::iter::ExactSizeIterator for ArrayHashIterator<'a, K, V> where K: core::hash::Hash + core::cmp::PartialEq {
682 #[inline]
683 fn len(&self) -> usize {
684 self.size
685 }
686}
687
688/// An iterator that return a mutably reference to each entry in `ArrayHash`.
689/// It is useful for scanning entire `ArrayHash` to manipulate it value.
690///
691/// It can cause undesired behavior if user alter the key in place as the slot position is
692/// calculated by hashed value of the key. It might endup having duplicate key on different slot and
693/// anytime caller use [get method](struct.ArrayHash.html#method.get), it will always return that value instead
694/// of this modified key.
695///
696/// If you need to modify key, consider [remove](struct.ArrayHash.html#method.remove) old key first then
697/// [put](struct.ArrayHash.html#method.put) the new key back in.
698#[derive(Debug)]
699pub struct ArrayHashIterMut<'a, K, V> where K: core::hash::Hash + core::cmp::PartialEq {
700 buckets: Box<[core::slice::IterMut<'a, (K, V)>]>,
701 remain_slots: usize,
702 slot_cursor: usize,
703 size: usize
704}
705
706impl<'a, K, V> Iterator for ArrayHashIterMut<'a, K, V> where K: core::hash::Hash + core::cmp::PartialEq {
707 type Item=&'a mut (K, V);
708
709 fn next(&mut self) -> Option<Self::Item> {
710 let mut result = self.buckets[self.slot_cursor].next();
711
712 while result.is_none() {
713 if self.slot_cursor < self.remain_slots {
714 self.slot_cursor += 1;
715 result = self.buckets[self.slot_cursor].next();
716 } else {
717 break;
718 }
719 }
720
721 result
722 }
723}
724
725impl<'a, K, V> core::iter::FusedIterator for ArrayHashIterMut<'a, K, V> where K: core::hash::Hash + core::cmp::PartialEq {}
726
727impl<'a, K, V> core::iter::ExactSizeIterator for ArrayHashIterMut<'a, K, V> where K: core::hash::Hash + core::cmp::PartialEq {
728 #[inline]
729 fn len(&self) -> usize {
730 self.size
731 }
732}
733
734#[derive(Debug)]
735pub struct ArrayHashIntoIter<K, V> where K: core::hash::Hash + core::cmp::PartialEq {
736 buckets: std::vec::IntoIter<Vec<(K, V)>>,
737 current_iterator: std::vec::IntoIter<(K, V)>,
738 size: usize
739}
740
741impl<K, V> Iterator for ArrayHashIntoIter<K, V> where K: core::hash::Hash + core::cmp::PartialEq {
742 type Item=(K, V);
743
744 fn next(&mut self) -> Option<Self::Item> {
745 let mut result = self.current_iterator.next();
746
747 while result.is_none() {
748 if let Some(slot) = self.buckets.next() {
749 if slot.len() > 0 { // skip those slot that have 0 entry
750 self.current_iterator = slot.into_iter();
751 result = self.current_iterator.next();
752 }
753 } else {
754 // entire ArrayHash is exhausted
755 break
756 }
757 }
758
759 result
760 }
761}
762
763impl<K, V> core::iter::FusedIterator for ArrayHashIntoIter<K, V> where K: core::hash::Hash + core::cmp::PartialEq {}
764
765impl<K, V> core::iter::ExactSizeIterator for ArrayHashIntoIter<K, V> where K: core::hash::Hash + core::cmp::PartialEq {
766 #[inline]
767 fn len(&self) -> usize {
768 self.size
769 }
770}
771
772impl<H, K, V> IntoIterator for ArrayHash<H, K, V> where H: core::hash::Hasher + Clone, K: core::hash::Hash + core::cmp::PartialEq {
773 type Item=(K, V);
774 type IntoIter=ArrayHashIntoIter<K, V>;
775
776 fn into_iter(self) -> Self::IntoIter {
777 if self.size >= 1 {
778 let mut buckets = self.buckets.unwrap().into_vec().into_iter();
779 let current_iterator = buckets.next().unwrap().into_iter();
780 ArrayHashIntoIter {
781 buckets,
782 current_iterator,
783 size: self.size
784 }
785 } else {
786 let mut emptied_bucket = vec![vec![]].into_iter();
787 let emptied_iterator = emptied_bucket.next().unwrap().into_iter();
788 ArrayHashIntoIter {
789 buckets: emptied_bucket,
790 current_iterator: emptied_iterator,
791 size: 0
792 }
793 }
794 }
795}
796
797impl<'a, H, K, V> IntoIterator for &'a ArrayHash<H, K, V> where H: core::hash::Hasher + Clone, K: core::hash::Hash + core::cmp::PartialEq {
798 type Item=&'a(K, V);
799 type IntoIter=ArrayHashIterator<'a, K, V>;
800
801 #[inline]
802 fn into_iter(self) -> Self::IntoIter {
803 self.iter()
804 }
805}
806
807impl<'a, H, K, V> IntoIterator for &'a mut ArrayHash<H, K, V> where H: core::hash::Hasher + Clone, K: core::hash::Hash + core::cmp::PartialEq {
808 type Item=&'a mut (K, V);
809 type IntoIter=ArrayHashIterMut<'a, K, V>;
810
811 #[inline]
812 fn into_iter(self) -> Self::IntoIter {
813 self.iter_mut()
814 }
815}
816
817/// An iterator that will drain it underlying [ArrayHash](struct.ArrayHash.html).
818#[derive(Debug)]
819pub struct DrainIter<'a, K, V> where K: core::hash::Hash + core::cmp::PartialEq {
820 bucket_iter: core::slice::IterMut<'a, Vec<(K, V)>>,
821 current_slot: Option<&'a mut Vec<(K, V)>>,
822 size: usize,
823}
824
825impl<'a, K, V> Iterator for DrainIter<'a, K, V> where K: core::hash::Hash + core::cmp::PartialEq {
826 type Item=(K, V);
827
828 fn next(&mut self) -> Option<Self::Item> {
829 let mut result = self.current_slot.as_mut().unwrap().pop();
830
831 while result.is_none() {
832 self.current_slot = self.bucket_iter.next();
833 if self.current_slot.is_some() {
834 result = self.current_slot.as_mut().unwrap().pop();
835 } else {
836 break;
837 }
838 }
839
840 result
841 }
842}
843
844impl<'a, K, V> core::iter::FusedIterator for DrainIter<'a, K, V> where K: core::hash::Hash + core::cmp::PartialEq {}
845impl<'a, K, V> core::iter::ExactSizeIterator for DrainIter<'a, K, V> where K: core::hash::Hash + core::cmp::PartialEq {
846 #[inline]
847 fn len(&self) -> usize {
848 self.size
849 }
850}
851
852/// An iterator that remove and return element that satisfy predicate.
853/// It will also update the size of borrowed [ArrayHash](struct.ArrayHash.html) on each
854/// iteration.
855#[derive(Debug)]
856pub struct DrainWithIter<'a, F, K, V> where F: for<'r> Fn(&'r (K, V)) -> bool, K: core::hash::Hash + core::cmp::PartialEq {
857 bucket_iter: core::slice::IterMut<'a, Vec<(K, V)>>,
858 cur_size: &'a mut usize,
859 current_slot: Option<&'a mut Vec<(K, V)>>,
860 predicate: F,
861 size: usize
862}
863
864impl<'a, F, K, V> Iterator for DrainWithIter<'a, F, K, V> where F: for<'r> Fn(&'r (K, V)) -> bool, K: core::hash::Hash + core::cmp::PartialEq {
865 type Item=(K, V);
866
867 fn next(&mut self) -> Option<Self::Item> {
868 while let Some(ref mut v) = self.current_slot {
869 for i in 0..v.len() {
870 if (self.predicate)(&v[i]) {
871 // Found match
872 *self.cur_size -= 1;
873 return Some(v.swap_remove(i))
874 }
875 }
876
877 loop {
878 self.current_slot = self.bucket_iter.next();
879 if self.current_slot.is_some() {
880 if self.current_slot.as_ref().unwrap().len() == 0 {
881 // Keep iterating until non-empty slot is found
882 continue;
883 } else {
884 // Found bucket that has some slot to evaluate
885 break;
886 }
887 } else {
888 // All slot in every buckets are evaulated now
889 return None
890 }
891 }
892 }
893
894 None
895 }
896}
897
898impl<'a, F, K, V> core::iter::FusedIterator for DrainWithIter<'a, F, K, V> where F: for<'r> Fn(&'r (K, V)) -> bool, K: core::hash::Hash + core::cmp::PartialEq {}
899impl<'a, F, K, V> core::iter::ExactSizeIterator for DrainWithIter<'a, F, K, V> where F: for<'r> Fn(&'r (K, V)) -> bool, K: core::hash::Hash + core::cmp::PartialEq {
900 #[inline]
901 fn len(&self) -> usize {
902 self.size
903 }
904}
905
906#[cfg(test)]
907mod tests;