Skip to main content

ahtable/
lib.rs

1//! An implementation of ArrayHash
2//! 
3//! ArrayHash is a data structure where the index is determined by hash and
4//! each entry in array is a `Vec` that store data and all it collision.
5//! 
6//! Oritinal paper can be found [here](Askitis, N. & Zobel, J. (2005), Cache-conscious collision resolution for string hash tables, in ‘Proc. SPIRE String Processing and Information Retrieval Symp.’, Springer-Verlag, pp. 92–104)
7//! 
8//! This implementation try to use generic wherever possible.
9//! It end up with ArrayHash that take anything that is clonable as value and anything that
10//! implement `Hash` and `PartialEq` as key.
11//! It let you choose whichever `Hasher` that you want. The only constraint is that
12//! `Hasher` must implement `Clone` trait.
13//! 
14//! It supports read only iteration, mutably iteration, and owned iteration.
15//! 
16//! To create [ArrayHash](struct.ArrayHash.html) use [ArrayHashBuilder](struct.ArrayHashBuilder.html).
17//! The default `Hasher` is `XxHasher64`.
18
19use twox_hash::{RandomXxHashBuilder64, XxHash64};
20use core::hash::{BuildHasher, Hash, Hasher};
21use core::borrow::Borrow;
22
23const MAX_LOAD_FACTOR: usize = 100_000; // Number of element before resize the table
24
25// Make each bucket fit into single memory page
26const DEFAULT_BUCKETS_SIZE: usize = 4096 / std::mem::size_of::<usize>(); 
27const DEFAULT_SLOT_SIZE: usize = 8;
28
29/// A builder that use for build an [ArrayHash](struct.ArrayHash.html).
30#[derive(Clone, Hash, PartialEq)]
31pub struct ArrayHashBuilder<H> {
32    hasher: H,
33    buckets_size: usize,
34    max_load_factor: usize,
35    slot_size: usize
36}
37
38/// Create new ArrayHashBuilder with default hasher and size.
39/// As currently is, the default allocated number of slot per bucket is (4096 / size of usize) slots.
40/// Each slot has 8 elements. It will use `XxHash64` as default hasher
41/// 
42/// Since all slots are Vec, it will be re-allocate if it grow larger than this default.
43/// The number of slots per bucket will be held until the number of entry grew pass `max_load_factor`.
44/// When it reach the `max_load_factor`, it will double the bucket size.
45impl Default for ArrayHashBuilder<XxHash64> {
46    fn default() -> ArrayHashBuilder<XxHash64> {
47        ArrayHashBuilder {
48            hasher: RandomXxHashBuilder64::default().build_hasher(),
49            buckets_size: DEFAULT_BUCKETS_SIZE,
50            max_load_factor: MAX_LOAD_FACTOR,
51            slot_size: DEFAULT_SLOT_SIZE
52        }
53    }
54}
55
56/// Make [ArrayHashBuilder](struct.ArrayHashBuilder.html) with spec from existing [ArrayHash](struct.ArrayHash.html).
57/// 
58/// This is useful for creating an object of array hash that may be later compare to baseline array for equality.
59impl<'a, H, K, V> From<&'a ArrayHash<H, K, V>> for ArrayHashBuilder<H> where H: Clone + Hasher, K: Hash + PartialEq {
60    fn from(ah: &'a ArrayHash<H, K, V>) -> ArrayHashBuilder<H> {
61        let buckets = ah.buckets.as_ref().unwrap();
62        
63        debug_assert!(buckets.len() > 0);
64
65        ArrayHashBuilder {
66            buckets_size: buckets.len(),
67            hasher: ah.hasher.clone(),
68            max_load_factor: ah.max_load_factor,
69            slot_size: buckets[0].len()
70        }
71    }
72}
73
74impl<H> ArrayHashBuilder<H> where H: core::hash::Hasher {
75    /// Create new ArrayHashBuilder by using given hasher.
76    /// As currently is, the default allocated number of slot per bucket is (4096 / size of usize) slots.
77    /// 
78    /// Since all slots are Vec, it will be re-allocate if it grow larger than this default.
79    #[inline]
80    pub fn with_hasher(hasher: H) -> ArrayHashBuilder<H> {
81        ArrayHashBuilder {
82            hasher: hasher,
83            buckets_size: DEFAULT_BUCKETS_SIZE,
84            max_load_factor: MAX_LOAD_FACTOR,
85            slot_size: DEFAULT_SLOT_SIZE
86        }
87    }
88
89    /// Switch hasher to other hasher. This will consume current builder and
90    /// return a new one with new builder
91    #[inline]
92    pub fn hasher<H2>(self, hasher: H2) -> ArrayHashBuilder<H2> {
93        ArrayHashBuilder {
94            hasher,
95            buckets_size: self.buckets_size,
96            max_load_factor: self.max_load_factor,
97            slot_size: self.slot_size
98        }
99    }
100
101    /// Change buckets size of [ArrayHasher](struct.ArrayHasher.html).
102    /// Buckets size scale once max_load_factor is reached.
103    /// The new size after it scaled is double of old size.
104    /// 
105    /// # Parameter
106    /// `size` - A number of buckets in this table. It must be greater than 0.
107    pub fn buckets_size(mut self, size: usize) -> Self {
108        debug_assert!(size > 0);
109        self.buckets_size = size;
110        self
111    }
112
113    /// Change max number of entry before double the buckets size.
114    /// 
115    /// # Parameter
116    /// `factor` - A number of item before it double the bucket size.
117    pub fn max_load_factor(mut self, factor: usize) -> Self {
118        debug_assert!(factor > 0);
119        self.max_load_factor = factor;
120        self
121    }
122
123    /// Default initialized slot size. Every slot in bucket will be 
124    /// allocated by given size. 
125    /// Keep in mind that each slot is a `vec`. It can grow pass this
126    /// number. It'd be best to give a proper estimation to prevent unnecessary
127    /// re-allocation.
128    /// 
129    /// # Parameter
130    /// `size` - A default size for each slot. 
131    pub fn slot_size(mut self, size: usize) -> Self {
132        self.slot_size = size;
133        self
134    }
135
136    /// Consume this builder and construct a new [ArrayHash](struct.ArrayHash.html)
137    pub fn build<K, V>(self) -> ArrayHash<H, K, V> where H: core::hash::Hasher + Clone, K: core::hash::Hash + core::cmp::PartialEq {
138        ArrayHash {
139            buckets: Some((0..self.buckets_size).map(|_| Vec::with_capacity(self.slot_size)).collect()),
140            hasher: self.hasher,
141            capacity: self.buckets_size,
142            max_load_factor: self.max_load_factor,
143            size: 0
144        }
145    }
146}
147
148/// An implementation of ArrayHash in pure Rust.
149/// 
150/// ArrayHash is a data structure where the index is determined by hash and
151/// each entry in array is a `Vec` that store data and all it collision.
152/// 
153/// Oritinal paper can be found [here](Askitis, N. & Zobel, J. (2005), Cache-conscious collision resolution for string hash tables, in ‘Proc. SPIRE String Processing and Information Retrieval Symp.’, Springer-Verlag, pp. 92–104)
154/// 
155/// In this implementation, user can supplied their own choice of hasher but it need to implement `Clone`.
156/// 
157/// The data can be anything that implement `Hash`. 
158/// 
159/// The default `Hasher`, if not provided, will be `XxHash64`.
160#[derive(Clone, Debug)]
161pub struct ArrayHash<H, K, V> where H: core::hash::Hasher + Clone, K: core::hash::Hash + PartialEq {
162    buckets: Option<Box<[Vec<(K, V)>]>>,
163    hasher: H,
164    capacity: usize,
165    max_load_factor: usize,
166    size: usize
167}
168
169/// Generalize implementation that let two array hash of different type of key and value to be
170/// comparable. 
171/// It requires that both side must use the same hasher with exactly same seed.
172/// It rely on a contract that if any `K1 == K2` then it mean those two hash is also
173/// equals.
174impl<H, K1, V1, K2, V2> PartialEq<ArrayHash<H, K1, V1>> for ArrayHash<H, K2, V2>
175where H: Clone + Hasher + PartialEq,
176      K1: Hash + PartialEq + PartialEq<K2>,
177      V1: PartialEq<V2>,
178      K2: Hash + PartialEq
179{
180    fn eq(&self, rhs: &ArrayHash<H, K1, V1>) -> bool {
181        self.is_hasher_eq(rhs) && 
182        self.capacity == rhs.capacity &&
183        self.size == rhs.size &&
184        rhs.buckets.as_deref().unwrap().iter().zip(self.buckets.as_deref().unwrap().iter()).all(|(rhs, lhs)| {
185            rhs.len() == lhs.len() && // Guard case where one side is prefix array of another
186            rhs.iter().zip(lhs.iter()).all(|((k1, v1), (k2, v2))| {k1 == k2 && v1 == v2})
187        })
188    }
189}
190
191/// Implement hash by using only `K` and `V` pair to calculate hash.
192/// It will still conform to following contract:
193/// For any `AH1 ==  AH2`, `hash(AH1) == hash(AH2)`.
194/// 
195/// This is because the `PartialEq` implementation check if both side have
196/// the same size, same hasher, same number of items, and exactly the same
197/// key and value pair returned by each yield of both AH1's and AH2's iterator. 
198/// However, hasher contract need no symmetric property.
199/// It mean that `hash(AH1) == hash(AH2) ` doesn't imply that `AH1 == AH2`.
200/// Thus, we will have the same hash if we iterate through array and hash both key and value
201/// for each yield on both `AH1` and `AH2`.
202impl<H, K, V> Hash for ArrayHash<H, K, V> where H: Hasher + Clone, K: Hash + PartialEq, V: Hash {
203    fn hash<H2: Hasher>(&self, hasher: &mut H2) {
204        // Since rust iterate on slice to hash and `Box<T>` is hash by deref into `T`,
205        // we can simply hash on `self.buckets`. It is equals to iterate on each
206        // inner element then hash each individual of it.
207        self.buckets.hash(hasher);
208    }
209}
210
211impl<H, K, V> ArrayHash<H, K, V> where H: core::hash::Hasher + Clone, K: core::hash::Hash + PartialEq {
212
213    /// Make a builder with default specification equals to current specification of this
214    /// array. 
215    /// 
216    /// Note: The current specification of array may be different from the spec used to create
217    /// the array. For example, if original `max_load_factor` is `2` but 3 elements were put
218    /// into array, the `max_load_factor` will be `4` and the `bucket_size` will be double of 
219    /// the original `bucket_size`.
220    #[inline]
221    pub fn to_builder(&self) -> ArrayHashBuilder<H> {
222        ArrayHashBuilder::from(self)
223    }
224
225    /// Check if two array use the same hasher with exactly same seed.
226    /// This mean that value `A == B` on these two array will have exactly the same hash value.
227    #[inline]
228    pub fn is_hasher_eq<K2, V2>(&self, rhs: &ArrayHash<H, K2, V2>) -> bool
229    where H: PartialEq,
230          K2: Hash + PartialEq
231    {
232        self.hasher == rhs.hasher
233    }
234
235    /// Add or replace entry into this `HashMap`.
236    /// If entry is replaced, it will be return in `Option`.
237    /// Otherwise, it return `None`
238    /// 
239    /// # Parameter
240    /// - `entry` - A tuple to be add to this.
241    /// 
242    /// # Return
243    /// Option that contains tuple of (key, value) that got replaced or `None` if it is
244    /// new entry
245    pub fn put(&mut self, key: K, value: V) -> Option<(K, V)> {
246        let mut index = self.make_key(&key);
247        let result;
248
249        if let Some(i) = self.buckets.as_mut().unwrap()[index].iter().position(|(k, _)| *k == key) {
250            result = Some(self.buckets.as_mut().unwrap()[index].swap_remove(i));
251        } else {
252            self.size += 1;
253
254            if self.maybe_expand() {
255                index = self.make_key(&key);
256            }
257
258            result = None
259        }
260
261        self.buckets.as_mut().unwrap()[index].push((key, value));
262
263        result
264    }
265
266    /// Try to put value into this `ArrayHash`.
267    /// If the given key is already in used, leave old entry as is 
268    /// and return key/value along with current reference to value associated with the key.
269    /// Otherwise, add entry to this `ArrayHash` and return reference to current value.
270    /// 
271    /// # Parameter
272    /// - `entry` - A tuple of (key, value) to be add to this.
273    /// # Return
274    /// It return `Ok(&V)` if key/value were put into this collection.
275    /// Otherwise, it return `Err((K, V, &V))` where `K` is given key,
276    /// `V` is given value, and `&V` is reference to current value associated
277    /// with given key
278    pub fn try_put(&mut self, key: K, value: V) -> Result<&V, (K, V, &V)> {
279        let mut index = self.make_key(&key);
280
281        if let Some(i) = self.buckets.as_ref().unwrap()[index].iter().position(|(k, _)| *k == key) {
282            Err((key, value, &self.buckets.as_ref().unwrap()[index][i].1))
283        } else {
284            self.size += 1;
285            if self.maybe_expand() {
286                index = self.make_key(&key);
287            }
288            let bucket = &mut self.buckets.as_mut().unwrap()[index];
289            bucket.push((key, value));
290            Ok(&bucket[bucket.len() - 1].1)
291        }
292    }
293
294    /// Get a value of given key from this `ArrayHash` relying on contractual
295    /// implementation of `PartialEq` and `Hash` where following contract applied:
296    /// - for any `A == B` then `B == A` then hash of `A` == hash of `B`
297    /// - for any `A == B` and `B == C` then `A == C` then hash of `A` == hash of `B` == hash of `C`
298    /// 
299    /// # Parameter
300    /// - `key` - A key to look for. 
301    /// 
302    /// # Return
303    /// An `Option` contains a value or `None` if it is not found.
304    pub fn get<T>(&self, key: &T) -> Option<&V> where T: core::hash::Hash + PartialEq<K> {
305        let index = self.make_key(&key);
306        let slot = &self.buckets.as_ref().unwrap()[index];
307        return slot.iter().find(|(k, _)| {key == k}).map(|(_, v)| v)
308
309        // for (ref k, ref v) in slot.iter() {
310        //     if *k == *key {
311        //         return Some(v)
312        //     }
313        // }
314        // None
315    }
316
317    /// Get a value using deref type.
318    /// 
319    /// This is usable only if the key is a type of smart pointer that can be deref into another type
320    /// which implement `Hash` and `PartialEq`.
321    /// 
322    /// For example, if K is `Box<[u8]>`, you can use `&[u8]` to query for a value
323    /// 
324    /// # Parameter
325    /// `key` - Any type that implement `Deref` where type after `Deref` is that same type
326    /// as actual type of `key` beneath type `K`.
327    /// 
328    /// # Return
329    /// `Some(&V)` if key exist in this table. Otherwise None.
330    pub fn smart_get<T, Q>(&self, key: Q) -> Option<&V> where Q: core::ops::Deref<Target=T>, K: core::ops::Deref<Target=T>, T: core::hash::Hash + core::cmp::PartialEq {
331        let index = self.make_key(&*key);
332
333        let slot = &self.buckets.as_ref().unwrap()[index];
334        slot.iter().find(|(k, _)| **k == *key).map(|(_, v)| v)
335
336        // for (ref k, ref v) in slot.iter() {
337        //     if **k == *key {
338        //         return Some(v)
339        //     }
340        // }
341        // None
342    }
343
344    /// Get a value of given key from this `ArrayHash` relying on contractual
345    /// implementation of `PartialEq` and `Hash` where following contract applied:
346    /// - `B` can be borrowed as type `A`
347    /// - for any `A == B` then `B == A` then hash of `A` == hash of `B`
348    /// - for any `A == B` and `B == C` then `A == C` then hash of `A` == hash of `B` == hash of `C`
349    /// - for any `&A == &B` then `A == B`
350    /// 
351    /// It is useful for case where the stored key and query key is different type but the stored key
352    /// can be borrow into the same type as query. For example, stored `Vec<T>` but query with `&[T]`.
353    /// It isn't possible to use [get](struct.ArrayHash.html#method.get) method as `[T]` 
354    /// isn't implement `PartialEq<Vec<T>>`.
355    /// 
356    /// # Parameter
357    /// - `key` - A key to look for. 
358    /// 
359    /// # Return
360    /// An `Option` contains a value or `None` if it is not found.
361    pub fn coerce_get<T>(&self, key: &T) -> Option<&V> where T: core::hash::Hash + PartialEq + ?Sized, K: Borrow<T> {
362        let index = self.make_key(key);
363        let slot = &self.buckets.as_ref().unwrap()[index];
364        return slot.iter().find(|(k, _)| {key == k.borrow()}).map(|(_, v)| v)
365
366        // for (ref k, ref v) in slot.iter() {
367        //     if *k == *key {
368        //         return Some(v)
369        //     }
370        // }
371        // None
372    }
373
374    /// Attempt to remove entry with given key from this `ArrayHash` relying on contractual
375    /// implementation of `PartialEq` and `Hash` where following contract applied:
376    /// - for any `A == B` then `B == A` then hash of `A` == hash of `B`
377    /// - for any `A == B` and `B == C` then `A == C` then hash of `A` == hash of `B` == hash of `C`
378    /// 
379    /// # Parameter
380    /// - `key` - A key of entry to be remove.
381    /// 
382    /// # Return
383    /// Option that contain tuple of (key, value) or `None` of key is not found
384    pub fn remove<T>(&mut self, key: &T) -> Option<(K, V)> where T: core::hash::Hash + PartialEq<K> {
385        let slot_idx = self.make_key(key);
386        let slot = self.buckets.as_mut().unwrap();
387        let entry_idx = slot[slot_idx].iter().position(|(k, _)| {key == k});
388        if let Some(i) = entry_idx {
389            self.size -= 1;
390            Some(slot[slot_idx].swap_remove(i))
391        } else {
392            None
393        }
394    }
395
396    /// Attempt to remove entry with given key from this `ArrayHash`.
397    /// 
398    /// This is usable only if the key is a type of smart pointer that can be deref into another type
399    /// which implement `Hash` and `PartialEq`.
400    /// 
401    /// For example, if K is `Box<[u8]>`, you can use `&[u8]` to remove it.
402    /// 
403    /// # Parameter
404    /// - `key` - A key of entry to be remove.
405    /// 
406    /// # Return
407    /// Option that contain tuple of (key, value) or `None` of key is not found
408    pub fn smart_remove<Q, T>(&mut self, key: Q) -> Option<(K, V)>  where Q: core::ops::Deref<Target=T>, K: core::ops::Deref<Target=T>, T: core::hash::Hash + core::cmp::PartialEq {
409        let slot_idx = self.make_key(&*key);
410        let slot = self.buckets.as_mut().unwrap();
411        let entry_idx = slot[slot_idx].iter().position(|(k, _)| {*key == **k});
412        if let Some(i) = entry_idx {
413            self.size -= 1;
414            Some(slot[slot_idx].swap_remove(i))
415        } else {
416            None
417        }
418    }
419
420    /// Attempt to remove entry with given key from this `ArrayHash` relying on contractual
421    /// implementation of `PartialEq` and `Hash` where following contract applied:
422    /// - `B` can be borrowed as type `A`
423    /// - for any `A == B` then `B == A` then hash of `A` == hash of `B`
424    /// - for any `A == B` and `B == C` then `A == C` then hash of `A` == hash of `B` == hash of `C`
425    /// - for any `&A == &B` then `A == B`
426    /// 
427    /// It is useful for case where the stored key and query key is different type but the stored key
428    /// can be borrow into the same type as query. For example, stored `Vec<T>` but query with `&[T]`.
429    /// It isn't possible to use [remove](struct.ArrayHash.html#method.remove) method as `[T]` 
430    /// isn't implement `PartialEq<Vec<T>>`.
431    /// 
432    /// # Parameter
433    /// - `key` - A key of entry to be remove.
434    /// 
435    /// # Return
436    /// Option that contain tuple of (key, value) or `None` of key is not found
437    pub fn coerce_remove<T>(&mut self, key: &T) -> Option<(K, V)> where T: core::hash::Hash + PartialEq + ?Sized, K: Borrow<T> {
438        let slot_idx = self.make_key(key);
439        let slot = self.buckets.as_mut().unwrap();
440        let entry_idx = slot[slot_idx].iter().position(|(k, _)| {key == k.borrow()});
441        if let Some(i) = entry_idx {
442            self.size -= 1;
443            Some(slot[slot_idx].swap_remove(i))
444        } else {
445            None
446        }
447    }
448
449    /// Current number of entry in this `ArrayHash`
450    #[inline]
451    pub fn len(&self) -> usize {
452        self.size
453    }
454
455    /// Check if this array hash contains every entry found in given other iterator that yield
456    /// `&(K, V)` and `V` implements `PartialEq`.
457    /// 
458    /// # Parameter
459    /// - `other` - A type that implement `IntoIterator<Item=&(K, V)>`
460    /// 
461    /// # Return
462    /// true if this array hash contains every entry that other iterator yield. Otherwise, false.
463    pub fn contains_iter<'a, I>(&self, other: I) -> bool where I: IntoIterator<Item=&'a (K, V)>, K: 'a, V: 'a + PartialEq {
464        for (key, value) in other.into_iter() {
465            if let Some(v) = self.get(key) {
466                if v != value {
467                    return false
468                }
469            } else {
470                return false
471            }
472        }
473
474        true
475    }
476
477    /// Get an iterator over this `ArrayHash`. 
478    /// 
479    /// # Return
480    /// [ArrayHashIterator](struct.ArrayHashIterator.html) that return reference
481    /// to each entry in this `ArrayHash`
482    pub fn iter(&self) -> ArrayHashIterator<'_, K, V> {
483        let slots = self.buckets.as_ref().unwrap();
484        let mut buckets = slots.iter();
485        let first_iter = buckets.next().unwrap().iter();
486        ArrayHashIterator {
487            buckets: buckets,
488            current_iterator: first_iter,
489            size: self.size
490        }
491    }
492
493    /// Get a mutable iterator over this `ArrayHash`.
494    /// 
495    /// Warning, you shall not modify the key part of entry. If you do, it might end up
496    /// accessible only by iterator but not with [get](struct.ArrayHash.html#method.get).
497    /// 
498    /// # Return
499    /// [ArrayHashIterMut](struct.ArrayHashIterMut.html) that return mutably reference
500    /// to each entry in this `ArrayHash`
501    pub fn iter_mut(&mut self) -> ArrayHashIterMut<'_, K, V> {
502        if self.size > 0 {
503            let buckets: Box<[core::slice::IterMut<(K, V)>]> = self.buckets.as_mut().unwrap().iter_mut().filter_map(|slot| {
504                if slot.len() > 0 {Some(slot.iter_mut())} else {None} 
505            }).collect();
506            let remain_slots = buckets.len() - 1;
507            ArrayHashIterMut {
508                // Only get iter_mut from entry with some element
509                buckets,
510                remain_slots, // similar to immutable iter, 0 index is already in process
511                slot_cursor: 0,
512                size: self.size
513            }
514        } else {
515            ArrayHashIterMut {
516                // Create an empty iterator so it will be called only once then finish.
517                // We cannot use iter::empty() as the type is incompatible.
518                buckets: vec![[].iter_mut()].into_boxed_slice(),
519                remain_slots: 0,
520                slot_cursor: 0,
521                size: self.size
522            }
523        }
524    }
525
526    /// Return an iterator that drain all entry out of this [ArrayHash](struct.ArrayHash.html).
527    /// 
528    /// After the iterator is done, this [ArrayHash](struct.ArrayHash.html) will become empty.
529    /// 
530    /// This method will immediately set size to 0.
531    /// 
532    /// # Return
533    /// [DrainIter](struct.DrainIter.html) - An iterator that will drain all element
534    /// out of this [ArrayHash](struct.ArrayHash.html).
535    pub fn drain(&mut self) -> DrainIter<K, V> {
536        let mut bucket_iter = self.buckets.as_mut().unwrap().iter_mut();
537        let current_slot = bucket_iter.next();
538        self.size = 0;
539
540        DrainIter {
541            bucket_iter,
542            current_slot,
543            size: self.size
544        }
545    }
546
547    /// Return an iterator that drain some entry out of this [ArrayHash](struct.ArrayHash.html).
548    /// 
549    /// After the iterator is done, this [ArrayHash](struct.ArrayHash.html) size will be shrink.
550    /// 
551    /// This method will return an iterator where each element it drain will cause a size deduction
552    /// on this [ArrayHash](struct.ArrayHash.html).
553    /// 
554    /// # Return
555    /// [DrainWithIter](struct.DrainWithIter.html) - An iterator that will drain all element
556    /// out of this [ArrayHash](struct.ArrayHash.html).
557    pub fn drain_with<F>(&mut self, pred: F) -> DrainWithIter<F, K, V> where F: Fn(&(K, V)) -> bool {
558        let mut bucket_iter = self.buckets.as_mut().unwrap().iter_mut();
559        let current_slot = bucket_iter.next();
560        let size = self.size; // Max size of iterator
561
562        DrainWithIter {
563            bucket_iter,
564            cur_size: &mut self.size,
565            current_slot,
566            predicate: pred,
567            size
568        }
569    }
570
571    /// Split this [ArrayHash](struct.ArrayHash.html) by given predicate closure. 
572    /// Every element that closure evaluate to true will be remove from this [ArrayHash](struct.ArrayHash.html)
573    /// and return in new instance of [ArrayHash](struct.ArrayHash.html).
574    /// 
575    /// This is different from using [drain_with](struct.ArrayHash.html#method.drain_with) to drain
576    /// some element into another [ArrayHash](struct.ArrayHash.html) by this method will return 
577    /// [ArrayHash](struct.ArrayHash.html) with exactly identical property, i.e. Hasher, buckets_size,
578    /// and max_load_factor, whereas [drain_with](struct.ArrayHash.html#method.drain_with) let
579    /// caller instantiate [ArrayHash](struct.ArrayHash.html) yourself.
580    /// 
581    /// Since the instant it returns, use the same Hasher. It is safe to assume that all elements shall
582    /// reside in the same bucket number thus this method speed up the split by ignore hashing altogether and
583    /// store the entry directly into the same bucket number as in this [ArrayHash](struct.ArrayHash.html)
584    /// 
585    /// # Parameter
586    /// `pred` - A closure that evaluate an entry. If it return true, the entry shall be moved into a new 
587    /// [ArrayHash](struct.ArrayHash.html).
588    /// 
589    /// # Return
590    /// An [ArrayHash](struct.ArrayHash.html) that contains all entry that `pred` evaluate to true.
591    pub fn split_by<F>(&mut self, pred: F) -> ArrayHash<H, K, V> where F: Fn(&(K, V)) -> bool {
592        let mut other = self.to_builder().build();
593        let buckets = self.buckets.as_mut().unwrap();
594        for i in 0..buckets.len() {
595            let mut j = 0;
596
597            loop {
598                if j >= buckets[i].len() {
599                    break;
600                }
601                if pred(&buckets[i][j]) {
602                    other.buckets.as_mut().unwrap()[i].push(buckets[i].swap_remove(j));
603                    other.size += 1;
604                    self.size -= 1;
605                } else {
606                    j += 1;
607                }
608            }
609        }
610
611        other
612    }
613
614    /// Since version 0.1.3, any type is acceptable as long as it implements `Hash`.
615    #[inline(always)]
616    fn make_key<T>(&self, key: &T) -> usize where T: core::hash::Hash + ?Sized {
617        let mut local_hasher = self.hasher.clone();
618        key.hash(&mut local_hasher);
619        local_hasher.finish() as usize % self.capacity
620    }
621
622    /// Check if it over scaling threshold. If it is, expand the bucket, rehash all the key, and
623    /// put everything to it new place.
624    /// # Return
625    /// true if it was expanded, false if it doesn't need to expand
626    #[inline(always)]
627    fn maybe_expand(&mut self) -> bool {
628        if self.size < self.max_load_factor {
629            return false
630        }
631
632        let old_buckets = self.buckets.take().unwrap().into_vec();
633        let new_capacity = self.capacity * 2;
634        self.capacity = new_capacity;
635        self.max_load_factor *= 2;
636        // Assume hash is evenly distribute entry in bucket, the new slot size shall be <= old slot size.
637        // This is because the bucket size is doubled.
638        let mut buckets: Vec<Vec<(K, V)>> = (0..new_capacity).map(|_| Vec::with_capacity(old_buckets[0].len())).collect();
639        old_buckets.into_iter().for_each(|slot| {
640            for (key, value) in slot {
641                let index = self.make_key(&key);
642                buckets[index % new_capacity].push((key, value));
643            }
644        });
645        self.buckets = Some(buckets.into_boxed_slice());
646
647        true
648    }
649}
650
651/// An iterator that return a reference to each entry in `ArrayHash`.
652/// It is useful for scanning entire `ArrayHash`.
653#[derive(Debug)]
654pub struct ArrayHashIterator<'a, K, V> where K: core::hash::Hash + core::cmp::PartialEq {
655    buckets: core::slice::Iter<'a, Vec<(K, V)>>,
656    current_iterator: core::slice::Iter<'a, (K, V)>,
657    size: usize
658}
659
660impl<'a, K, V> Iterator for ArrayHashIterator<'a, K, V> where K: core::hash::Hash + core::cmp::PartialEq {
661    type Item=&'a (K, V);
662
663    fn next(&mut self) -> Option<Self::Item> {
664        let mut result = self.current_iterator.next();
665
666        while result.is_none() {
667            if let Some(slot) = self.buckets.next() {
668                self.current_iterator = slot.iter();
669                result = self.current_iterator.next();
670            } else {
671                break
672            }
673        }
674
675        result
676    }
677}
678
679impl<'a, K, V> core::iter::FusedIterator for ArrayHashIterator<'a, K, V> where K: core::hash::Hash + core::cmp::PartialEq {}
680
681impl<'a, K, V> core::iter::ExactSizeIterator for ArrayHashIterator<'a, K, V> where K: core::hash::Hash + core::cmp::PartialEq {
682    #[inline]
683    fn len(&self) -> usize {
684        self.size
685    }
686}
687
688/// An iterator that return a mutably reference to each entry in `ArrayHash`.
689/// It is useful for scanning entire `ArrayHash` to manipulate it value.
690/// 
691/// It can cause undesired behavior if user alter the key in place as the slot position is
692/// calculated by hashed value of the key. It might endup having duplicate key on different slot and
693/// anytime caller use [get method](struct.ArrayHash.html#method.get), it will always return that value instead
694/// of this modified key.
695/// 
696/// If you need to modify key, consider [remove](struct.ArrayHash.html#method.remove) old key first then
697/// [put](struct.ArrayHash.html#method.put) the new key back in.
698#[derive(Debug)]
699pub struct ArrayHashIterMut<'a, K, V> where K: core::hash::Hash + core::cmp::PartialEq {
700    buckets: Box<[core::slice::IterMut<'a, (K, V)>]>,
701    remain_slots: usize,
702    slot_cursor: usize,
703    size: usize
704}
705
706impl<'a, K, V> Iterator for ArrayHashIterMut<'a, K, V> where K: core::hash::Hash + core::cmp::PartialEq {
707    type Item=&'a mut (K, V);
708
709    fn next(&mut self) -> Option<Self::Item> {
710        let mut result = self.buckets[self.slot_cursor].next();
711        
712        while result.is_none() {
713            if self.slot_cursor < self.remain_slots {
714                self.slot_cursor += 1;
715                result = self.buckets[self.slot_cursor].next();
716            } else {
717                break;
718            }
719        }
720
721        result
722    }
723}
724
725impl<'a, K, V> core::iter::FusedIterator for ArrayHashIterMut<'a, K, V> where K: core::hash::Hash + core::cmp::PartialEq {}
726
727impl<'a, K, V> core::iter::ExactSizeIterator for ArrayHashIterMut<'a, K, V> where K: core::hash::Hash + core::cmp::PartialEq {
728    #[inline]
729    fn len(&self) -> usize {
730        self.size
731    }
732}
733
734#[derive(Debug)]
735pub struct ArrayHashIntoIter<K, V> where K: core::hash::Hash + core::cmp::PartialEq {
736    buckets: std::vec::IntoIter<Vec<(K, V)>>,
737    current_iterator: std::vec::IntoIter<(K, V)>,
738    size: usize
739}
740
741impl<K, V> Iterator for ArrayHashIntoIter<K, V> where K: core::hash::Hash + core::cmp::PartialEq {
742    type Item=(K, V);
743
744    fn next(&mut self) -> Option<Self::Item> {
745        let mut result = self.current_iterator.next();
746
747        while result.is_none() {
748            if let Some(slot) = self.buckets.next() {
749                if slot.len() > 0 { // skip those slot that have 0 entry
750                    self.current_iterator = slot.into_iter();
751                    result = self.current_iterator.next();
752                }
753            } else {
754                // entire ArrayHash is exhausted
755                break
756            }
757        }
758
759        result
760    }
761}
762
763impl<K, V> core::iter::FusedIterator for ArrayHashIntoIter<K, V> where K: core::hash::Hash + core::cmp::PartialEq {}
764
765impl<K, V> core::iter::ExactSizeIterator for ArrayHashIntoIter<K, V> where K: core::hash::Hash + core::cmp::PartialEq {
766    #[inline]
767    fn len(&self) -> usize {
768        self.size
769    }
770}
771
772impl<H, K, V> IntoIterator for ArrayHash<H, K, V> where H: core::hash::Hasher + Clone, K: core::hash::Hash + core::cmp::PartialEq {
773    type Item=(K, V);
774    type IntoIter=ArrayHashIntoIter<K, V>;
775
776    fn into_iter(self) -> Self::IntoIter {
777        if self.size >= 1 {
778            let mut buckets = self.buckets.unwrap().into_vec().into_iter();
779            let current_iterator = buckets.next().unwrap().into_iter();
780            ArrayHashIntoIter {
781                buckets,
782                current_iterator,
783                size: self.size
784            }
785        } else {
786            let mut emptied_bucket = vec![vec![]].into_iter();
787            let emptied_iterator = emptied_bucket.next().unwrap().into_iter();
788            ArrayHashIntoIter {
789                buckets: emptied_bucket,
790                current_iterator: emptied_iterator,
791                size: 0
792            }
793        }
794    }
795}
796
797impl<'a, H, K, V> IntoIterator for &'a ArrayHash<H, K, V> where H: core::hash::Hasher + Clone, K: core::hash::Hash + core::cmp::PartialEq {
798    type Item=&'a(K, V);
799    type IntoIter=ArrayHashIterator<'a, K, V>;
800
801    #[inline]
802    fn into_iter(self) -> Self::IntoIter {
803        self.iter()
804    }
805}
806
807impl<'a, H, K, V> IntoIterator for &'a mut ArrayHash<H, K, V> where H: core::hash::Hasher + Clone, K: core::hash::Hash + core::cmp::PartialEq {
808    type Item=&'a mut (K, V);
809    type IntoIter=ArrayHashIterMut<'a, K, V>;
810
811    #[inline]
812    fn into_iter(self) -> Self::IntoIter {
813        self.iter_mut()
814    }
815}
816
817/// An iterator that will drain it underlying [ArrayHash](struct.ArrayHash.html).
818#[derive(Debug)]
819pub struct DrainIter<'a, K, V> where K: core::hash::Hash + core::cmp::PartialEq {
820    bucket_iter: core::slice::IterMut<'a, Vec<(K, V)>>,
821    current_slot: Option<&'a mut Vec<(K, V)>>,
822    size: usize,
823}
824
825impl<'a, K, V> Iterator for DrainIter<'a, K, V> where K: core::hash::Hash + core::cmp::PartialEq {
826    type Item=(K, V);
827
828    fn next(&mut self) -> Option<Self::Item> {
829        let mut result = self.current_slot.as_mut().unwrap().pop();
830
831        while result.is_none() {
832            self.current_slot = self.bucket_iter.next();
833            if self.current_slot.is_some() {
834                result = self.current_slot.as_mut().unwrap().pop();
835            } else {
836                break;
837            }
838        }
839
840        result
841    }
842}
843
844impl<'a, K, V> core::iter::FusedIterator for DrainIter<'a, K, V> where K: core::hash::Hash + core::cmp::PartialEq {}
845impl<'a, K, V> core::iter::ExactSizeIterator for DrainIter<'a, K, V> where K: core::hash::Hash + core::cmp::PartialEq {
846    #[inline]
847    fn len(&self) -> usize {
848        self.size
849    }
850}
851
852/// An iterator that remove and return element that satisfy predicate.
853/// It will also update the size of borrowed [ArrayHash](struct.ArrayHash.html) on each
854/// iteration.
855#[derive(Debug)]
856pub struct DrainWithIter<'a, F, K, V> where F: for<'r> Fn(&'r (K, V)) -> bool, K: core::hash::Hash + core::cmp::PartialEq {
857    bucket_iter: core::slice::IterMut<'a, Vec<(K, V)>>,
858    cur_size: &'a mut usize,
859    current_slot: Option<&'a mut Vec<(K, V)>>,
860    predicate: F,
861    size: usize
862}
863
864impl<'a, F, K, V> Iterator for DrainWithIter<'a, F, K, V> where F: for<'r> Fn(&'r (K, V)) -> bool, K: core::hash::Hash + core::cmp::PartialEq {
865    type Item=(K, V);
866
867    fn next(&mut self) -> Option<Self::Item> {
868        while let Some(ref mut v) = self.current_slot {
869            for i in 0..v.len() {
870                if (self.predicate)(&v[i]) {
871                    // Found match
872                    *self.cur_size -= 1;
873                    return Some(v.swap_remove(i))
874                }
875            }
876
877            loop {
878                self.current_slot = self.bucket_iter.next();
879                if self.current_slot.is_some() {
880                    if self.current_slot.as_ref().unwrap().len() == 0 {
881                        // Keep iterating until non-empty slot is found
882                        continue;
883                    } else {
884                        // Found bucket that has some slot to evaluate
885                        break;
886                    }
887                } else {
888                    // All slot in every buckets are evaulated now
889                    return None
890                }
891            }
892        }
893
894        None
895    }
896}
897
898impl<'a, F, K, V> core::iter::FusedIterator for DrainWithIter<'a, F, K, V> where F: for<'r> Fn(&'r (K, V)) -> bool, K: core::hash::Hash + core::cmp::PartialEq {}
899impl<'a, F, K, V> core::iter::ExactSizeIterator for DrainWithIter<'a, F, K, V> where F: for<'r> Fn(&'r (K, V)) -> bool, K: core::hash::Hash + core::cmp::PartialEq {
900    #[inline]
901    fn len(&self) -> usize {
902        self.size
903    }
904}
905
906#[cfg(test)]
907mod tests;