hash_table/
lib.rs

1#![no_std]
2
3#![feature(maybe_uninit_ref)]
4#![feature(non_ascii_idents)]
5
6extern crate siphasher as sip;
7
8#[cfg(test)] extern crate quickcheck;
9#[cfg(test)] #[macro_use]
10             extern crate quickcheck_macros;
11#[cfg(test)] #[macro_use]
12             extern crate std;
13
14use core::borrow::Borrow;
15use core::hash::{Hash, Hasher};
16use core::marker::PhantomData;
17use core::ops::{Index, IndexMut, RangeFull};
18use core::{mem, ptr};
19use core::mem::MaybeUninit as Slot;
20
21#[derive(Clone, Default)]
22pub struct DefaultHasher(::sip::sip::SipHasher);
23
24impl Hasher for DefaultHasher {
25    #[inline] fn finish(&self) -> u64 { self.0.finish() }
26    #[inline] fn write(&mut self, bs: &[u8]) { self.0.write(bs) }
27}
28
29pub struct HashTable<K: Eq + Hash, T,
30                     Hs: IndexMut<usize, Output = usize> + Index<RangeFull, Output = [usize]>,
31                     Es: IndexMut<usize, Output = Slot<(K, T)>>, H: Clone + Hasher = DefaultHasher> {
32    hashes: Hs,
33    elems : Es,
34    free_n: usize,
35    hasher: H,
36}
37
38#[inline]
39fn log2(n: usize) -> u32 { 0usize.count_zeros() - n.leading_zeros() - 1 }
40
41impl<K: Eq + Hash, T, Hs: IndexMut<usize, Output = usize> + Index<RangeFull, Output = [usize]>,
42     Es: IndexMut<usize, Output = Slot<(K, T)>>, H: Clone + Hasher> HashTable<K, T, Hs, Es, H> {
43    #[inline]
44    pub fn from_parts(mut hashes: Hs, elems: Es, hasher: H) -> Self {
45        #[allow(clippy::needless_range_loop)]
46        for k in 0..hashes[..].len() { hashes[k] = 0; }
47        let free_n = 1 << log2(hashes[..].len());
48        HashTable { hashes, elems, hasher, free_n }
49    }
50
51    fn log_cap(&self) -> u32 { log2(self.hashes[..].len()) }
52
53    fn hash<Q: ?Sized>(&self, k: &Q) -> usize where Q: Hash {
54        let mut h = self.hasher.clone();
55        k.hash(&mut h);
56        (h.finish() as usize | hash_flag) & !dead_flag
57    }
58
59    fn find_ix<Q: ?Sized>(&self, k: &Q) -> Option<usize> where K: Borrow<Q>, Q: Eq + Hash {
60        debug_assert!(self.free_n >= 1);
61        let wrap_mask = (1 << self.log_cap()) - 1;
62        let h = self.hash(k);
63        let mut i = h & wrap_mask;
64        let mut psl = 0;
65        loop {
66            if self.hashes[i] == 0 || psl > compute_psl(&self.hashes[..], i) { return None };
67            if self.hashes[i] == h && unsafe { self.elems[i].assume_init_ref().0.borrow() == k } { return Some(i); }
68            i = (i+1)&wrap_mask;
69            debug_assert_ne!(h & wrap_mask, i);
70            psl += 1;
71        }
72    }
73
74    #[inline]
75    pub fn find_with_ix<Q: ?Sized>(&self, k: &Q) -> Option<(usize, &K, &T)> where K: Borrow<Q>, Q: Eq + Hash {
76        self.find_ix(k).map(move |i| unsafe { (i, &self.elems[i].assume_init_ref().0, &self.elems[i].assume_init_ref().1) })
77    }
78
79    #[inline]
80    pub fn find_mut_with_ix<Q: ?Sized>(&mut self, k: &Q) -> Option<(usize, &K, &mut T)> where K: Borrow<Q>, Q: Eq + Hash {
81        self.find_ix(k).map(move |i| unsafe { let &mut (ref k, ref mut v) = self.elems[i].assume_init_mut(); (i, k, v) })
82    }
83
84    #[inline]
85    pub fn find<Q: ?Sized>(&self, k: &Q) -> Option<(&K, &T)> where K: Borrow<Q>, Q: Eq + Hash {
86        self.find_with_ix(k).map(|(_, k, x)| (k, x))
87    }
88
89    #[inline]
90    pub fn find_mut<Q: ?Sized>(&mut self, k: &Q) -> Option<(&K, &mut T)> where K: Borrow<Q>, Q: Eq + Hash {
91        self.find_mut_with_ix(k).map(|(_, k, x)| (k, x))
92    }
93
94    #[inline]
95    pub fn insert_with<F: FnOnce(Option<T>) -> T>(&mut self, k: K, f: F) -> Result<(usize, &K, &mut T), (K, F)> {
96        if 1 >= self.free_n { return Err((k, f)); }
97        self.free_n -= 1;
98
99        let cap = 1 << self.log_cap();
100        let mut h = self.hash(&k);
101        let mut i = h&(cap-1);
102        let mut psl = 0;
103        loop { unsafe {
104            if self.hashes[i] == 0 {
105                self.hashes[i] = h;
106                ptr::write(self.elems[i].assume_init_mut(), (k, f(None)));
107                let &mut (ref k, ref mut v) = self.elems[i].assume_init_mut();
108                return Ok((i, k, v))
109            }
110
111            if self.hashes[i] == h && self.elems[i].assume_init_ref().0 == k {
112                self.hashes[i] |=  dead_flag;
113                let x = ptr::read(&self.elems[i].assume_init_ref().1);
114                ptr::write(&mut self.elems[i].assume_init_mut().1, f(Some(x)));
115                self.hashes[i] &= !dead_flag;
116                let &mut (ref k, ref mut v) = self.elems[i].assume_init_mut();
117                return Ok((i, k, v))
118            }
119
120            if psl > compute_psl(&self.hashes[..], i) {
121                let mut e = (k, f(None));
122                loop {
123                    mem::swap(&mut h, &mut self.hashes[i]);
124                    mem::swap(&mut e, self.elems[i].assume_init_mut());
125                    if h == 0 || is_dead(h) {
126                        mem::forget(e);
127                        let &mut (ref k, ref mut v) = self.elems[i].assume_init_mut();
128                        return Ok((i, k, v));
129                    };
130                    i = (i+1)&(cap-1);
131                }
132            }
133
134            i = (i+1)&(cap-1);
135            debug_assert_ne!(h&(cap-1), i);
136            psl += 1;
137        } }
138    }
139
140    #[inline]
141    pub fn insert_with_ix(&mut self, k: K, x: T) -> Result<(usize, Option<T>), (K, T)> {
142        let mut opt_y = None;
143        self.insert_with(k, |opt_x| { opt_y = opt_x; x })
144            .map_err(|(k, f)| (k, f(None))).map(|(k, _, _)| (k, opt_y))
145    }
146
147    #[inline]
148    pub fn insert(&mut self, k: K, x: T) -> Result<Option<T>, (K, T)> {
149        self.insert_with_ix(k, x).map(|(_, opt_y)| opt_y)
150    }
151
152    #[inline]
153    pub fn delete<Q: ?Sized>(&mut self, k: &Q) -> Option<T> where K: Borrow<Q>, Q: Eq + Hash {
154        self.find_ix(k).map(move |i| unsafe {
155            self.free_n += 1;
156            debug_assert!(1 << self.log_cap() >= self.free_n);
157            let (_, x) = ptr::read(self.elems[i].assume_init_ref());
158            self.hashes[i] |= dead_flag;
159            x
160        })
161    }
162
163    #[inline]
164    pub fn drain(&mut self) -> Drain<K, T> {
165        DrainFilter {
166            φ: PhantomData,
167            hash_ptr: &mut self.hashes[0],
168            elms_ptr: &mut self.elems[0] as *mut Slot<_> as *mut _,
169            hash_end: (&mut self.hashes[0] as *mut usize).wrapping_add(self.hashes[..].len()),
170            f: |_, _| true,
171        }
172    }
173
174    #[inline]
175    pub fn drain_filter<F: FnMut(&K, &mut T) -> bool>(&mut self, f: F) -> DrainFilter<K, T, F> {
176        DrainFilter {
177            φ: PhantomData,
178            hash_ptr: &mut self.hashes[0],
179            elms_ptr: &mut self.elems[0] as *mut Slot<_> as *mut _,
180            hash_end: (&mut self.hashes[0] as *mut usize).wrapping_add(self.hashes[..].len()),
181            f,
182        }
183    }
184
185    #[inline]
186    pub fn iter_with_ix(&self) -> IterWithIx<K, T> {
187        IterWithIx {
188            φ: PhantomData,
189            hash_ptr: &self.hashes[0],
190            elms_ptr: &self.elems[0] as *const Slot<_> as *const _,
191            hash_top: &self.hashes[0],
192            hash_end: self.hashes[..].as_ptr().wrapping_add(self.hashes[..].len()),
193        }
194    }
195
196    #[inline]
197    pub fn iter_mut_with_ix(&mut self) -> IterMutWithIx<K, T> {
198        IterMutWithIx {
199            φ: PhantomData,
200            hash_ptr: &self.hashes[0],
201            elms_ptr: &mut self.elems[0] as *mut Slot<_> as *mut _,
202            hash_top: &self.hashes[0],
203            hash_end: self.hashes[..].as_ptr().wrapping_add(self.hashes[..].len()),
204        }
205    }
206
207    #[inline]
208    pub fn hasher(&self) -> &H { &self.hasher }
209}
210
211#[derive(Clone, Copy)]
212pub struct IterWithIx<'a, K, T> {
213    φ: PhantomData<&'a ()>,
214    hash_ptr: *const usize,
215    elms_ptr: *const (K, T),
216    hash_top: *const usize,
217    hash_end: *const usize,
218}
219
220impl<'a, K: 'a, T: 'a> Iterator for IterWithIx<'a, K, T> {
221    type Item = (usize, &'a K, &'a T);
222
223    #[inline]
224    fn next(&mut self) -> Option<Self::Item> {
225        let mut r = None;
226        while r.is_none() && self.hash_ptr != self.hash_end { unsafe {
227            if 0 != ptr::read(self.hash_ptr) { r = Some((ptr_diff(self.hash_ptr, self.hash_top),
228                                                         &(*self.elms_ptr).0,
229                                                         &(*self.elms_ptr).1)); }
230            self.hash_ptr = self.hash_ptr.wrapping_offset(1);
231            self.elms_ptr = self.elms_ptr.offset(1);
232        } }
233        r
234    }
235}
236
237unsafe impl<'a, K: Sync, T: Sync> Send for IterWithIx<'a, K, T> {}
238unsafe impl<'a, K: Sync, T: Sync> Sync for IterWithIx<'a, K, T> {}
239
240pub struct IterMutWithIx<'a, K, T> {
241    φ: PhantomData<&'a mut ()>,
242    hash_ptr: *const usize,
243    elms_ptr: *mut (K, T),
244    hash_top: *const usize,
245    hash_end: *const usize,
246}
247
248unsafe impl<'a, K: Sync, T: Send> Send for IterMutWithIx<'a, K, T> {}
249unsafe impl<'a, K: Sync, T: Sync> Sync for IterMutWithIx<'a, K, T> {}
250
251impl<'a, K: 'a, T: 'a> Iterator for IterMutWithIx<'a, K, T> {
252    type Item = (usize, &'a K, &'a mut T);
253
254    #[inline]
255    fn next(&mut self) -> Option<Self::Item> {
256        let mut r = None;
257        while r.is_none() && self.hash_ptr != self.hash_end { unsafe {
258            if 0 != ptr::read(self.hash_ptr) { r = Some((ptr_diff(self.hash_ptr, self.hash_top),
259                                                         &    (*self.elms_ptr).0,
260                                                         &mut (*self.elms_ptr).1)); }
261            self.hash_ptr = self.hash_ptr.wrapping_offset(1);
262            self.elms_ptr = self.elms_ptr.offset(1);
263        } }
264        r
265    }
266}
267
268pub type Drain<'a, K, T> = DrainFilter<'a, K, T, fn(&K, &mut T) -> bool>;
269
270pub struct DrainFilter<'a, K, T, F: FnMut(&K, &mut T) -> bool> {
271    φ: PhantomData<&'a mut ()>,
272    hash_ptr: *mut usize,
273    elms_ptr: *mut (K, T),
274    hash_end: *mut usize,
275    f: F
276}
277
278unsafe impl<'a, K: Sync, T: Send, F: FnMut(&K, &mut T) -> bool + Send> Send for DrainFilter<'a, K, T, F> {}
279unsafe impl<'a, K: Sync, T: Sync, F: FnMut(&K, &mut T) -> bool + Sync> Sync for DrainFilter<'a, K, T, F> {}
280
281impl<'a, K, T, F: FnMut(&K, &mut T) -> bool> Iterator for DrainFilter<'a, K, T, F> {
282    type Item = (K, T);
283
284    #[inline]
285    fn next(&mut self) -> Option<Self::Item> {
286        let mut r = None;
287        while r.is_none() && self.hash_ptr != self.hash_end { unsafe {
288            if 0 != *self.hash_ptr {
289                let (ref k, ref mut v) = &mut *self.elms_ptr;
290                if (self.f)(k, v) {
291                    *self.hash_ptr = 0;
292                    r = Some(ptr::read(self.elms_ptr));
293                }
294            }
295            self.hash_ptr = self.hash_ptr.wrapping_offset(1);
296            self.elms_ptr = self.elms_ptr.offset(1);
297        } }
298        r
299    }
300}
301
302impl<'a, K, T, F: FnMut(&K, &mut T) -> bool> Drop for DrainFilter<'a, K, T, F> {
303    #[inline]
304    fn drop(&mut self) { for _ in self {} }
305}
306
307#[inline] fn compute_psl(hs: &[usize], i: usize) -> usize { usize::wrapping_sub(i, hs[i])&(hs.len()-1) }
308
309#[inline] fn is_dead(h: usize) -> bool { 0 != h & dead_flag }
310
311const dead_flag: usize = !(!0>>1);
312const hash_flag: usize = dead_flag>>1;
313
314impl<K: Eq + Hash, T,
315     Hs: IndexMut<usize, Output = usize> + Index<RangeFull, Output = [usize]>,
316     Es: IndexMut<usize, Output = Slot<(K, T)>>, H: Clone + Hasher> Drop for HashTable<K, T, Hs, Es, H> {
317    #[inline] fn drop(&mut self) { unsafe {
318        for i in 0..self.hashes[..].len() {
319            if self.hashes[i] != 0 && !is_dead(self.hashes[i]) { ptr::drop_in_place(self.elems[i].assume_init_mut()); }
320        }
321    } }
322}
323
324#[inline] pub fn ptr_diff<T>(p: *const T, q: *const T) -> usize {
325    use ::core::num::Wrapping as w;
326    (w(p as usize) - w(q as usize)).0/mem::size_of::<T>()
327}
328
329#[cfg(test)] mod tests {
330    use quickcheck::{ Arbitrary, Gen };
331    use std::hash::*;
332    use std::vec::Vec;
333
334    use super::*;
335
336    fn nub_by_0<S: Ord, T>(v: &mut Vec<(S, T)>) {
337        // Only last element of test input vector with each key is kept in table, so we must delete the others.
338        // We can not merely sort by reverse comparison rather than sort and reverse as we rely on stability.
339        v.sort_by(|&(ref i, _), &(ref j, _)| Ord::cmp(i, j));
340        v.reverse();
341        let mut i = 1;
342        while i < v.len() {
343            while i < v.len() && v[i-1].0 == v[i].0 { v.remove(i); }
344            i += 1;
345        }
346    }
347
348    fn mk_ht<K: Eq + Hash, T, H: Clone + Hasher>(cap: usize, h: H) -> HashTable<K, T, Vec<usize>, Vec<Slot<(K, T)>>, H> {
349        let mut es = Vec::with_capacity(cap);
350        unsafe { es.set_len(cap) };
351        HashTable::from_parts(vec![0; cap], es, h)
352    }
353
354    #[derive(Clone)]
355    struct XorBytesHasher(u64);
356    impl Hasher for XorBytesHasher {
357        fn finish(&self) -> u64 { match self { &XorBytesHasher(h) => h } }
358        fn write(&mut self, bs: &[u8]) {
359            for &b in bs { self.0 ^= b as u64; }
360        }
361    }
362
363    #[derive(Clone)]
364    struct NullHasher;
365    impl Hasher for NullHasher {
366        fn finish(&self) -> u64 { 0 }
367        fn write(&mut self, _: &[u8]) {}
368    }
369
370    // ¬([_; 0x100]: Copy + Clone). Grr. *irritation*
371    #[derive(Copy, Clone, Debug)]
372    struct ArrayOf0x100<T: Copy>([[T; 0x10]; 0x10]);
373    impl<T: Arbitrary + Copy> Arbitrary for ArrayOf0x100<T> {
374        fn arbitrary<G: Gen>(g: &mut G) -> Self {
375            unsafe {
376                let mut a: [[T; 0x10]; 0x10] = mem::uninitialized();
377                for i in 0..0x10 { for j in 0..0x10 { ptr::write(&mut a[i][j], T::arbitrary(g)); } }
378                ArrayOf0x100(a)
379            }
380        }
381    }
382
383    #[derive(Clone)]
384    struct ArrayOf0x100Hasher([[u64; 0x10]; 0x10], u64);
385    impl Hasher for ArrayOf0x100Hasher {
386        fn finish(&self) -> u64 { match self { &ArrayOf0x100Hasher(_, h) => h } }
387        fn write(&mut self, bs: &[u8]) {
388            for &b in bs { self.1 ^= self.0[b as usize >> 4][b as usize & 0x0F]; }
389        }
390    }
391
392    #[quickcheck] fn insertion_sans_collision(mut v: Vec<u64>) -> bool {
393        v.truncate(0x100);
394        let log_cap = (v.len() + 1).next_power_of_two().trailing_zeros();
395        let mut t = mk_ht::<u8, u64, _>(1 << log_cap, XorBytesHasher(0));
396        for (k, &x) in v.iter().enumerate() { t.insert(k as u8, x).unwrap(); }
397        v.iter().enumerate().all(|(k, x)| t.find(&(k as u8)) == Some((&(k as u8), &x)))
398    }
399
400    #[quickcheck] fn insertion_with_collision(mut v: Vec<(u8, u64)>) -> bool {
401        let log_cap = (v.len() + 1).next_power_of_two().trailing_zeros();
402        let mut t = mk_ht::<u8, u64, _>(1 << log_cap, NullHasher);
403        for (k, x) in v.clone() { t.insert(k, x).unwrap(); }
404
405        nub_by_0(&mut v);
406        v.iter().all(|&(k, x)| t.find(&k) == Some((&k, &x)))
407    }
408
409    #[quickcheck] fn insertion_with_random_hash(a: ArrayOf0x100<u64>, mut v: Vec<(u8, u64)>) -> bool {
410        let ArrayOf0x100(a) = a;
411
412        let log_cap = (v.len() + 1).next_power_of_two().trailing_zeros();
413        let mut t = mk_ht::<u8, u64, ArrayOf0x100Hasher>(1 << log_cap, ArrayOf0x100Hasher(a, 0));
414        for (k, x) in v.clone() { t.insert(k, x).unwrap(); }
415
416        nub_by_0(&mut v);
417        v.iter().all(|&(k, x)| t.find(&k) == Some((&k, &x)))
418    }
419
420    #[quickcheck] fn deletion_sans_collision(mut v: Vec<u64>) -> bool {
421        v.truncate(0x100);
422        let log_cap = (v.len() + 1).next_power_of_two().trailing_zeros();
423        let mut t = mk_ht::<u8, u64, _>(1 << log_cap, XorBytesHasher(0));
424        for (k, &x) in v.iter().enumerate() { t.insert(k as u8, x).unwrap(); }
425        v.iter().enumerate().all(|(k, &x)| t.find(&(k as u8)) == Some((&(k as u8), &x)) && t.delete(&(k as u8)) == Some(x) && t.find(&(k as u8)) == None)
426    }
427
428    #[quickcheck] fn deletion_with_collision(mut v: Vec<(u8, u64)>) -> bool {
429        v.truncate(8);
430        let log_cap = (v.len() + 1).next_power_of_two().trailing_zeros();
431        let mut t = mk_ht::<u8, u64, _>(1 << log_cap, NullHasher);
432        for (k, x) in v.clone() { t.insert(k, x).unwrap(); }
433
434        nub_by_0(&mut v);
435        v.iter().all(|&(k, x)| t.find(&(k as u8)) == Some((&k, &x)) && t.delete(&k) == Some(x) && t.find(&k) == None)
436    }
437
438    #[quickcheck] fn deletion_with_random_hash(a: ArrayOf0x100<u64>, mut v: Vec<(u8, u64)>) -> bool {
439        let ArrayOf0x100(a) = a;
440
441        let log_cap = (v.len() + 1).next_power_of_two().trailing_zeros();
442        let mut t = mk_ht::<u8, u64, _>(1 << log_cap, ArrayOf0x100Hasher(a, 0));
443        for (k, x) in v.clone() { t.insert(k, x).unwrap(); }
444
445        nub_by_0(&mut v);
446        v.iter().all(|&(k, x)| t.find(&k) == Some((&k, &x)) && t.delete(&k) == Some(x) && t.find(&k) == None)
447    }
448
449    #[quickcheck] fn iter(v: Vec<(u8, u64)>) -> bool {
450        let log_cap = (v.len() + 1).next_power_of_two().trailing_zeros();
451        let mut t = mk_ht::<u8, u64, _>(1 << log_cap, DefaultHasher::default());
452        for (k, x) in v.clone() { t.insert(k, x).unwrap(); }
453
454        t.iter_with_ix().all(|(k, &i, &x)| k < 1 << log_cap &&
455                                           v.iter().any(|&(j, y)| (i, x) == (j, y)))
456    }
457
458    #[test] fn full_table_forbidden() {
459        let mut t = mk_ht::<u8, (), _>(2, DefaultHasher::default());
460        t.insert(0, ()).unwrap();
461        assert!(t.insert(1, ()).is_err());
462    }
463}