pfds/
hashmap.rs

1use std::marker::PhantomData;
2//
3// Copyright 2021-Present (c) Raja Lehtihet & Wael El Oraiby
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are met:
7//
8// 1. Redistributions of source code must retain the above copyright notice,
9// this list of conditions and the following disclaimer.
10//
11// 2. Redistributions in binary form must reproduce the above copyright notice,
12// this list of conditions and the following disclaimer in the documentation
13// and/or other materials provided with the distribution.
14//
15// 3. Neither the name of the copyright holder nor the names of its contributors
16// may be used to endorse or promote products derived from this software without
17// specific prior written permission.
18//
19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29// POSSIBILITY OF SUCH DAMAGE.
30//
31use crate::{Hashable, TRIE_BITS, TRIE_MASK, TRIE_SIZE};
32use std::mem::*;
33use std::sync::Arc;
34
35#[derive(Clone)]
36enum HashMapNode<K: Hashable + Eq + Clone, V: Clone> {
37    Empty,
38    One(usize, K, V),
39    Node(usize, Arc<[N<K, V>; TRIE_SIZE]>),
40}
41
42use HashMapNode::*;
43
44type N<K, V> = HashMapNode<K, V>;
45type H<K, V> = Arc<HashMapNode<K, V>>;
46
47impl<K: Hashable + Eq + Clone, V: Clone> HashMapNode<K, V> {
48    fn empty() -> H<K, V> {
49        H::new(Empty)
50    }
51
52    fn new_empty_slice() -> [N<K, V>; TRIE_SIZE] {
53        let mut s: [MaybeUninit<N<K, V>>; TRIE_SIZE] = unsafe { MaybeUninit::uninit().assume_init() };
54        for i in s.iter_mut().take(TRIE_SIZE) {
55            *i = MaybeUninit::new(N::Empty);
56        }
57
58        // TODO: issue: https://github.com/rust-lang/rust/issues/61956
59        // use transmute
60        // let ptr = &mut s as *mut _ as *mut [N<K, V>; TRIE_SIZE];
61        // let res = unsafe { ptr.read() };
62        // forget(s);
63        // res
64
65        unsafe { (&*(&MaybeUninit::new(s) as *const _ as *const MaybeUninit<_>)).assume_init_read() }
66    }
67
68    fn insert(h: &N<K, V>, l: u32, k: K, v: V) -> Option<N<K, V>> {
69        let kh = k.hash() as usize;
70        let idx = kh.wrapping_shr(l) & TRIE_MASK;
71
72        match h {
73            Empty => Some(N::One(kh, k, v)),
74            One(hh, k2, _) if kh == *hh && k == *k2 =>
75            /* (1) */
76            {
77                None
78            }
79            One(kh2, k2, v2) => {
80                let mut slice = N::new_empty_slice();
81                slice[idx] = N::One(kh, k, v);
82                let idx2 = kh2.wrapping_shr(l) & TRIE_MASK;
83                if idx2 != idx {
84                    slice[idx2] = N::One(*kh2, k2.clone(), v2.clone());
85                    let n = Node(2, Arc::new(slice));
86                    Some(n)
87                } else {
88                    let n = Node(1, Arc::new(slice));
89                    match N::insert(&n, l, k2.clone(), v2.clone()) {
90                        Some(n2) => Some(n2), // return the new one
91                        None => Some(n),      // this case should never be exausted: look at (1)
92                    }
93                }
94            }
95            Node(size, slice) => match N::insert(&slice[idx], l + TRIE_BITS, k, v) {
96                None => None,
97                Some(n) => {
98                    let mut slice2 = slice.as_ref().clone();
99                    slice2[idx] = n;
100                    Some(Node(size + 1, Arc::new(slice2)))
101                }
102            },
103        }
104    }
105
106    fn exist(h: &N<K, V>, l: u32, k: &K) -> bool {
107        let kh = k.hash() as usize;
108        let idx = kh.wrapping_shr(l) & TRIE_MASK;
109
110        match h {
111            Empty => false,
112            One(hh, k2, _) => kh == *hh && k == k2,
113            Node(_, slice) => N::exist(&slice[idx], l + TRIE_BITS, k),
114        }
115    }
116
117    fn find(&self, l: u32, k: &K) -> Option<&V> {
118        let kh = k.hash() as usize;
119        let idx = kh.wrapping_shr(l) & TRIE_MASK;
120
121        match self {
122            Empty => None,
123            One(hh, k2, v) if kh == *hh && k == k2 => Some(v),
124            One(_, _, _) => None,
125            Node(_, slice) => slice[idx].find(l + TRIE_BITS, k),
126        }
127    }
128
129    fn remove(h: &N<K, V>, l: u32, k: K) -> Option<N<K, V>> {
130        let kh = k.hash() as usize;
131        let idx = kh.wrapping_shr(l) & TRIE_MASK;
132        match h {
133            Empty => None,
134            One(hh, k2, _) if kh == *hh && k == *k2 =>
135            /* (1) */
136            {
137                Some(Empty)
138            }
139            One(_, _, _) => None,
140            Node(size, slice) => match N::remove(&slice[idx], l + TRIE_BITS, k) {
141                None => None,
142                Some(n) if matches!(n, Empty) && *size == 1 => Some(Empty),
143                Some(n) => {
144                    let new_size = match n {
145                        Empty => size - 1,
146                        _ => *size,
147                    };
148                    let mut slice2 = slice.as_ref().clone();
149                    slice2[idx] = n;
150                    Some(Node(new_size, Arc::new(slice2)))
151                }
152            },
153        }
154    }
155
156    fn to_vec_internal(&self, v: &mut Vec<(K, V)>) {
157        match self {
158            Empty => (),
159            One(_, k, vv) => v.push((k.clone(), vv.clone())),
160            Node(_, slice) => {
161                for n in slice.as_ref() {
162                    n.to_vec_internal(v);
163                }
164            }
165        }
166    }
167
168    fn to_vec(&self) -> Vec<(K, V)> {
169        let mut v = Vec::new();
170        self.to_vec_internal(&mut v);
171        v
172    }
173}
174
175#[derive(Clone)]
176pub struct HashMap<K: Hashable + Eq + Clone, V: Clone> {
177    n: H<K, V>,
178    count: usize,
179}
180
181impl<K: Hashable + Eq + Clone, V: Clone> HashMap<K, V> {
182    ///
183    /// create and return a new empty map
184    ///
185    pub fn empty() -> Self {
186        Self { n: N::empty(), count: 0 }
187    }
188
189    ///
190    /// create and return a new map containing the new key, value pair
191    ///
192    pub fn insert(&self, k: K, v: V) -> Self {
193        let n = N::insert(self.n.as_ref(), 0, k.clone(), v.clone());
194        match n {
195            Some(n) => Self {
196                n: H::new(n),
197                count: self.count + 1,
198            },
199            None => {
200                // the key is already found, overwrite it
201                let n = N::insert(self.remove(k.clone()).n.as_ref(), 0, k, v).unwrap();
202                Self {
203                    n: H::new(n),
204                    count: self.count,
205                }
206            }
207        }
208    }
209
210    ///
211    /// create and return a new map with the key, value pair removed
212    ///
213    pub fn remove(&self, k: K) -> Self {
214        let n = N::remove(self.n.as_ref(), 0, k);
215        match n {
216            Some(n) => Self {
217                n: H::new(n),
218                count: self.count - 1,
219            },
220            None => Self {
221                n: self.n.clone(),
222                count: self.count,
223            },
224        }
225    }
226
227    ///
228    /// search for a key and return true if the key exist, false otherwise
229    ///
230    pub fn exist(&self, k: &K) -> bool {
231        N::exist(self.n.as_ref(), 0, k)
232    }
233
234    ///
235    /// search for a key and return a pointer to the value if the key exists, None otherwise
236    ///
237    pub fn find(&self, k: &K) -> Option<&V> {
238        self.n.as_ref().find(0, k)
239    }
240
241    ///
242    /// walk the list/stack and build a vector of keys and return it
243    ///
244    pub fn to_vec(&self) -> Vec<(K, V)> {
245        self.n.to_vec()
246    }
247
248    ///
249    /// return the number of elements in the set
250    ///
251    pub fn is_empty(&self) -> bool {
252        self.count == 0
253    }
254
255    ///
256    /// return the number of elements in the set
257    ///
258    pub fn len(&self) -> usize {
259        self.count
260    }
261
262    ///
263    /// returns an iterator
264    ///
265    pub fn iter<'a>(&self) -> Iter<'a, K, V> {
266        Iter {
267            stack: Vec::new(),
268            current: Pointer { node: self.n.clone(), idx: 0 },
269            _phantom: PhantomData::default(),
270        }
271    }
272}
273
274#[derive(Clone)]
275struct Pointer<K: Clone + Eq + Hashable, V: Clone> {
276    idx: usize,
277    node: H<K, V>,
278}
279
280pub struct Iter<'a, K: Clone + Eq + Hashable, V: Clone> {
281    stack: Vec<Pointer<K, V>>,
282    current: Pointer<K, V>,
283    _phantom: PhantomData<&'a (K, V)>,
284}
285
286impl<'a, K: Clone + Eq + Hashable, V: Clone> Iter<'a, K, V> {
287    fn pop(&mut self) {
288        match self.stack.pop() {
289            Some(Pointer { idx: i, node: n }) => self.current = Pointer { idx: i + 1, node: n },
290
291            None => {
292                self.current = Pointer {
293                    idx: 0,
294                    node: Arc::new(HashMapNode::Empty),
295                }
296            }
297        }
298    }
299}
300
301impl<'a, K: Clone + Eq + Hashable, V: Clone> std::iter::Iterator for Iter<'a, K, V> {
302    type Item = (K, V);
303
304    fn next(&mut self) -> Option<Self::Item> {
305        let nc = self.current.clone(); // needless, but required for the borrow checker
306        let n = nc.node.as_ref();
307        match n {
308            HashMapNode::Empty => {
309                // we only enter this one if the root can be empty!
310                None
311            }
312
313            HashMapNode::One(_s, k, v) => {
314                // we only enter this one if it's in the root!
315                if self.current.idx == 0 {
316                    self.current.idx += 1;
317                    Some((k.clone(), v.clone()))
318                } else {
319                    None
320                }
321            }
322
323            HashMapNode::Node(size, entries) => {
324                while self.current.idx < TRIE_SIZE {
325                    match &entries[self.current.idx] {
326                        HashMapNode::Empty => self.current.idx += 1,
327                        HashMapNode::One(_s, k, v) => {
328                            self.current.idx += 1;
329                            return Some((k.clone(), v.clone()));
330                        }
331                        HashMapNode::Node(new_size, new_entries) => {
332                            self.stack.push(Pointer {
333                                idx: self.current.idx,
334                                node: Arc::new(HashMapNode::Node(*size, entries.clone())),
335                            });
336                            self.current = Pointer {
337                                idx: 0,
338                                node: Arc::new(HashMapNode::Node(*new_size, new_entries.clone())),
339                            };
340                            return self.next();
341                        }
342                    }
343                }
344                self.pop();
345                self.next()
346            }
347        }
348    }
349}
350
351#[cfg(test)]
352mod tests {
353    use crate::hashmap::*;
354
355    static mut SEED: usize = 777;
356
357    fn rand() -> usize {
358        unsafe {
359            SEED = SEED.wrapping_mul(1664525).wrapping_add(1013904223);
360            (SEED >> 24) as i32 as usize
361        }
362    }
363
364    #[test]
365    fn insert() {
366        let numbers = [3, 3, 0x13, 120, 4, 9, 27, 1, 45];
367        let mut n = HashMap::empty();
368        for i in numbers {
369            n = n.insert(i, i * i);
370        }
371
372        assert_eq!(n.len(), 8);
373
374        for i in 0..numbers.len() {
375            assert_eq!(n.exist(&numbers[i]), true);
376        }
377    }
378
379    #[test]
380    fn insert_redundant_key_different_values() {
381        let numbers = [3, 3, 0x13, 120, 4, 9, 27, 1, 45];
382        let mut n = HashMap::empty();
383        for i in numbers {
384            n = n.insert(i, i * i);
385        }
386
387        for i in numbers {
388            n = n.insert(i, 2 * i * i);
389        }
390
391        assert_eq!(n.len(), 8);
392
393        for i in 0..numbers.len() {
394            let num = numbers[i];
395            assert!(n.exist(&num));
396            assert_eq!(*n.find(&num).unwrap(), 2 * num * num);
397        }
398    }
399
400    #[test]
401    fn remove() {
402        let numbers = [3, 3, 0x13, 120, 4, 9, 27, 1, 45];
403        let mut n = HashMap::empty();
404        for i in numbers {
405            n = n.insert(i, i * i);
406        }
407
408        assert_eq!(n.len(), 8);
409
410        for i in 0..numbers.len() {
411            assert_eq!(n.exist(&numbers[i]), true);
412        }
413
414        for i in numbers {
415            n = n.remove(i);
416            assert_eq!(n.exist(&i), false);
417        }
418    }
419
420    #[test]
421    fn insert_1000000() {
422        let mut numbers = Vec::new();
423        let mut n = HashMap::empty();
424        for _ in 0..1000000 {
425            let r = rand() % 100000;
426            n = n.insert(r, r * r);
427            numbers.push(r);
428        }
429
430        let mut sorted = numbers.clone();
431        sorted.sort();
432        sorted.dedup();
433
434        assert_eq!(n.len(), sorted.len());
435
436        for i in 0..numbers.len() {
437            assert_eq!(n.exist(&numbers[i]), true);
438            let k = numbers[i];
439
440            assert_eq!(n.find(&k).is_some(), true);
441            assert_eq!(*n.find(&k).unwrap(), k * k);
442        }
443
444        let mut v = n.to_vec();
445        v.sort();
446        assert_eq!(v.len(), sorted.len());
447        for i in 0..sorted.len() {
448            assert_eq!(sorted[i], v[i].0);
449        }
450    }
451
452    #[test]
453    fn remove_1000000() {
454        let mut numbers = Vec::new();
455        let mut n = HashMap::empty();
456        for _ in 0..1000000 {
457            let r = rand() % 100000;
458            n = n.insert(r, r * r);
459            numbers.push(r);
460        }
461
462        let mut sorted = numbers.clone();
463        sorted.sort();
464        sorted.dedup();
465
466        assert_eq!(n.len(), sorted.len());
467
468        for i in 0..numbers.len() {
469            assert_eq!(n.exist(&numbers[i]), true);
470        }
471
472        let mut v = n.to_vec();
473        v.sort();
474        assert_eq!(v.len(), sorted.len());
475        for i in sorted {
476            n = n.remove(i);
477            assert_eq!(n.exist(&i), false);
478        }
479
480        assert_eq!(n.len(), 0);
481    }
482
483    #[test]
484    fn iter_1000000() {
485        let mut numbers = Vec::new();
486        let mut n = HashMap::empty();
487        for _ in 0..1000000 {
488            let r = rand() % 100000;
489            n = n.insert(r, r * r);
490            numbers.push(r);
491        }
492
493        let mut sorted = numbers.clone();
494        sorted.sort();
495        sorted.dedup();
496
497        assert_eq!(n.len(), sorted.len());
498
499        for i in 0..numbers.len() {
500            assert_eq!(n.exist(&numbers[i]), true);
501            let k = numbers[i];
502
503            assert_eq!(n.find(&k).is_some(), true);
504            assert_eq!(*n.find(&k).unwrap(), k * k);
505        }
506
507        let mut v = n.iter().collect::<Vec<_>>();
508        v.sort();
509        assert_eq!(v.len(), sorted.len());
510        for i in 0..sorted.len() {
511            assert_eq!(sorted[i], v[i].0);
512        }
513    }
514
515    #[test]
516    fn iter_1() {
517        let mut n = HashMap::empty();
518        n = n.insert(1, 1);
519        for (k, v) in n.iter() {
520            assert_eq!(k, 1);
521            assert_eq!(v, 1);
522        }
523    }
524}