pfds/
hashset.rs

1use std::hash::Hash;
2use std::marker::PhantomData;
3//
4// Copyright 2021-Present (c) Raja Lehtihet & Wael El Oraiby
5//
6// Redistribution and use in source and binary forms, with or without
7// modification, are permitted provided that the following conditions are met:
8//
9// 1. Redistributions of source code must retain the above copyright notice,
10// this list of conditions and the following disclaimer.
11//
12// 2. Redistributions in binary form must reproduce the above copyright notice,
13// this list of conditions and the following disclaimer in the documentation
14// and/or other materials provided with the distribution.
15//
16// 3. Neither the name of the copyright holder nor the names of its contributors
17// may be used to endorse or promote products derived from this software without
18// specific prior written permission.
19//
20// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
24// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30// POSSIBILITY OF SUCH DAMAGE.
31//
32use crate::{Hashable, TRIE_BITS, TRIE_MASK, TRIE_SIZE};
33use std::mem::*;
34use std::sync::Arc;
35
36#[derive(Clone)]
37enum HashSetNode<K: Hashable + Eq + Clone> {
38    Empty,
39    One(usize, K),
40    Node(usize, Arc<[N<K>; TRIE_SIZE]>),
41}
42
43use HashSetNode::*;
44
45type N<K> = HashSetNode<K>;
46type H<K> = Arc<HashSetNode<K>>;
47
48impl<K: Hashable + Eq + Clone> HashSetNode<K> {
49    fn empty() -> H<K> {
50        H::new(Empty)
51    }
52
53    fn new_empty_slice() -> [N<K>; TRIE_SIZE] {
54        let mut s: [MaybeUninit<N<K>>; TRIE_SIZE] = unsafe { MaybeUninit::uninit().assume_init() };
55        for i in s.iter_mut().take(TRIE_SIZE) {
56            *i = MaybeUninit::new(N::Empty);
57        }
58        //
59        // // TODO: issue: https://github.com/rust-lang/rust/issues/61956
60        // // use transmute
61        //
62        // let ptr = &mut s as *mut _ as *mut [N<K>; TRIE_SIZE];
63        // let res = unsafe { ptr.read() };
64        // forget(s);
65        // res
66
67        unsafe { (&*(&MaybeUninit::new(s) as *const _ as *const MaybeUninit<_>)).assume_init_read() }
68    }
69
70    fn insert(h: &N<K>, l: u32, k: K) -> Option<N<K>> {
71        let kh = k.hash() as usize;
72        let idx = kh.wrapping_shr(l) & TRIE_MASK;
73
74        match h {
75            Empty => Some(N::One(kh, k)),
76            One(hh, k2) if kh == *hh && k == *k2 =>
77            /* (1) */
78            {
79                None
80            }
81            One(kh2, k2) => {
82                let mut slice = N::new_empty_slice();
83                slice[idx] = N::One(kh, k);
84                let idx2 = kh2.wrapping_shr(l) & TRIE_MASK;
85                if idx2 != idx {
86                    slice[idx2] = N::One(*kh2, k2.clone());
87                    let n = Node(2, Arc::new(slice));
88                    Some(n)
89                } else {
90                    let n = Node(1, Arc::new(slice));
91                    match N::insert(&n, l, k2.clone()) {
92                        Some(n2) => Some(n2), // return the new one
93                        None => Some(n),      // this case should never be exhausted: look at (1)
94                    }
95                }
96            }
97            Node(size, slice) => match N::insert(&slice[idx], l + TRIE_BITS, k) {
98                None => None,
99                Some(n) => {
100                    let mut slice2 = slice.as_ref().clone();
101                    slice2[idx] = n;
102                    Some(Node(size + 1, Arc::new(slice2)))
103                }
104            },
105        }
106    }
107
108    fn exist(h: &N<K>, l: u32, k: K) -> bool {
109        let kh = k.hash() as usize;
110        let idx = kh.wrapping_shr(l) & TRIE_MASK;
111
112        match h {
113            Empty => false,
114            One(hh, k2) => kh == *hh && k == *k2,
115            Node(_, slice) => N::exist(&slice[idx], l + TRIE_BITS, k),
116        }
117    }
118
119    fn remove(h: &N<K>, l: u32, k: K) -> Option<N<K>> {
120        let kh = k.hash() as usize;
121        let idx = kh.wrapping_shr(l) & TRIE_MASK;
122        match h {
123            Empty => None,
124            One(hh, k2) if kh == *hh && k == *k2 =>
125            /* (1) */
126            {
127                Some(Empty)
128            }
129            One(_, _) => None,
130            Node(size, slice) => match N::remove(&slice[idx], l + TRIE_BITS, k) {
131                None => None,
132                Some(n) if matches!(n, Empty) && *size == 1 => Some(Empty),
133                Some(n) => {
134                    let new_size = match n {
135                        Empty => size - 1,
136                        _ => *size,
137                    };
138                    let mut slice2 = slice.as_ref().clone();
139                    slice2[idx] = n;
140                    Some(Node(new_size, Arc::new(slice2)))
141                }
142            },
143        }
144    }
145
146    fn to_vec_internal(&self, v: &mut Vec<K>) {
147        match self {
148            Empty => (),
149            One(_, k) => v.push(k.clone()),
150            Node(_, slice) => {
151                for n in slice.as_ref() {
152                    n.to_vec_internal(v);
153                }
154            }
155        }
156    }
157
158    fn to_vec(&self) -> Vec<K> {
159        let mut v = Vec::new();
160        self.to_vec_internal(&mut v);
161        v
162    }
163}
164
165#[derive(Clone)]
166pub struct HashSet<K: Hashable + Eq + Clone> {
167    n: H<K>,
168    count: usize,
169}
170
171impl<K: Hashable + Eq + Clone> HashSet<K> {
172    ///
173    /// create and return a new empty set
174    ///
175    pub fn empty() -> Self {
176        Self { n: N::empty(), count: 0 }
177    }
178
179    ///
180    /// insert a new key and return a new set with the new element added to it
181    ///
182    pub fn insert(&self, k: K) -> Self {
183        let n = N::insert(self.n.as_ref(), 0, k.clone());
184        match n {
185            Some(n) => Self {
186                n: H::new(n),
187                count: self.count + 1,
188            },
189            None => {
190                // the key is already found, overwrite it
191                let n = N::insert(self.remove(k.clone()).n.as_ref(), 0, k).unwrap();
192                Self {
193                    n: H::new(n),
194                    count: self.count,
195                }
196            }
197        }
198    }
199
200    ///
201    /// remove a key and return a new set with the element removed to it
202    ///
203    pub fn remove(&self, k: K) -> Self {
204        let n = N::remove(self.n.as_ref(), 0, k);
205        match n {
206            Some(n) => Self {
207                n: H::new(n),
208                count: self.count - 1,
209            },
210            None => Self {
211                n: self.n.clone(),
212                count: self.count,
213            },
214        }
215    }
216
217    ///
218    /// walk the list/stack and build a vector of keys and return it
219    ///
220    pub fn exist(&self, k: K) -> bool {
221        N::exist(self.n.as_ref(), 0, k)
222    }
223
224    pub fn to_vec(&self) -> Vec<K> {
225        self.n.to_vec()
226    }
227
228    ///
229    /// return true if the set is empty
230    ///
231    pub fn is_true(&self) -> bool {
232        self.count == 0
233    }
234
235    ///
236    /// return true if the set is empty
237    ///
238    pub fn is_empty(&self) -> bool {
239        self.count == 0
240    }
241
242    ///
243    /// return the number of elements in the set
244    ///
245    pub fn len(&self) -> usize {
246        self.count
247    }
248
249    ///
250    /// returns an iterator
251    ///
252    pub fn iter<'a>(&self) -> Iter<'a, K> {
253        Iter {
254            stack: Vec::new(),
255            current: Pointer { node: self.n.clone(), idx: 0 },
256            _phantom: PhantomData::default(),
257        }
258    }
259}
260
261#[derive(Clone)]
262struct Pointer<E: Clone + Eq + Hashable> {
263    idx: usize,
264    node: H<E>,
265}
266
267pub struct Iter<'a, E: Clone + Eq + Hashable> {
268    stack: Vec<Pointer<E>>,
269    current: Pointer<E>,
270    _phantom: PhantomData<&'a E>,
271}
272
273impl<'a, E: Clone + Eq + Hashable> Iter<'a, E> {
274    fn pop(&mut self) {
275        match self.stack.pop() {
276            Some(Pointer { idx: i, node: n }) => self.current = Pointer { idx: i + 1, node: n },
277
278            None => {
279                self.current = Pointer {
280                    idx: 0,
281                    node: Arc::new(HashSetNode::Empty),
282                }
283            }
284        }
285    }
286}
287
288impl<'a, E: Clone + Eq + Hashable> std::iter::Iterator for Iter<'a, E> {
289    type Item = E;
290
291    fn next(&mut self) -> Option<Self::Item> {
292        let nc = self.current.clone(); // needless, but required for the borrow checker
293        let n = nc.node.as_ref();
294        match n {
295            HashSetNode::Empty => {
296                // we only enter this one if the root can be empty!
297                None
298            }
299
300            HashSetNode::One(_k, v) => {
301                // we only enter this one if it's in the root!
302                if self.current.idx == 0 {
303                    self.current.idx += 1;
304                    Some(v.clone())
305                } else {
306                    None
307                }
308            }
309
310            HashSetNode::Node(size, entries) => {
311                while self.current.idx < TRIE_SIZE {
312                    match &entries[self.current.idx] {
313                        HashSetNode::Empty => self.current.idx += 1,
314                        HashSetNode::One(_k, v) => {
315                            self.current.idx += 1;
316                            return Some(v.clone());
317                        }
318                        HashSetNode::Node(new_size, new_entries) => {
319                            self.stack.push(Pointer {
320                                idx: self.current.idx,
321                                node: Arc::new(HashSetNode::Node(*size, entries.clone())),
322                            });
323                            self.current = Pointer {
324                                idx: 0,
325                                node: Arc::new(HashSetNode::Node(*new_size, new_entries.clone())),
326                            };
327                            return self.next();
328                        }
329                    }
330                }
331                self.pop();
332                self.next()
333            }
334        }
335    }
336}
337
338#[cfg(test)]
339mod tests {
340    use crate::hashset::*;
341
342    static mut SEED: usize = 777;
343
344    fn rand() -> usize {
345        unsafe {
346            SEED = SEED.wrapping_mul(1664525).wrapping_add(1013904223);
347            (SEED >> 24) as i32 as usize
348        }
349    }
350
351    #[test]
352    fn insert() {
353        let numbers = [3, 3, 0x13, 120, 4, 9, 27, 1, 45];
354        let mut n = HashSet::empty();
355        for i in numbers {
356            n = n.insert(i);
357        }
358
359        assert_eq!(n.len(), 8);
360
361        for i in 0..numbers.len() {
362            assert_eq!(n.exist(numbers[i]), true);
363        }
364    }
365
366    #[test]
367    fn remove() {
368        let numbers = [3, 3, 0x13, 120, 4, 9, 27, 1, 45];
369        let mut n = HashSet::empty();
370        for i in numbers {
371            n = n.insert(i);
372        }
373
374        assert_eq!(n.len(), 8);
375
376        for i in 0..numbers.len() {
377            assert_eq!(n.exist(numbers[i]), true);
378        }
379
380        for i in numbers {
381            n = n.remove(i);
382            assert_eq!(n.exist(i), false);
383        }
384    }
385
386    #[test]
387    fn insert_1000000() {
388        let mut numbers = Vec::new();
389        let mut n = HashSet::empty();
390        for _ in 0..1000000 {
391            let r = rand() % 100000;
392            n = n.insert(r);
393            numbers.push(r);
394        }
395
396        let mut sorted = numbers.clone();
397        sorted.sort();
398        sorted.dedup();
399
400        assert_eq!(n.len(), sorted.len());
401
402        for i in 0..numbers.len() {
403            assert_eq!(n.exist(numbers[i]), true);
404        }
405
406        let mut v = n.to_vec();
407        v.sort();
408        assert_eq!(v.len(), sorted.len());
409        for i in 0..sorted.len() {
410            assert_eq!(sorted[i], v[i]);
411        }
412    }
413
414    #[test]
415    fn remove_1000000() {
416        let mut numbers = Vec::new();
417        let mut n = HashSet::empty();
418        for _ in 0..1000000 {
419            let r = rand() % 100000;
420            n = n.insert(r);
421            numbers.push(r);
422        }
423
424        let mut sorted = numbers.clone();
425        sorted.sort();
426        sorted.dedup();
427
428        assert_eq!(n.len(), sorted.len());
429
430        for i in 0..numbers.len() {
431            assert_eq!(n.exist(numbers[i]), true);
432        }
433
434        let mut v = n.to_vec();
435        v.sort();
436        assert_eq!(v.len(), sorted.len());
437        for i in sorted {
438            n = n.remove(i);
439            assert_eq!(n.exist(i), false);
440        }
441
442        assert_eq!(n.len(), 0);
443    }
444
445    #[test]
446    fn iter_1000000() {
447        let mut numbers = Vec::new();
448        let mut n = HashSet::empty();
449        for _ in 0..1000000 {
450            let r = rand() % 100000;
451            n = n.insert(r);
452            numbers.push(r);
453        }
454
455        let mut sorted = numbers.clone();
456        sorted.sort();
457        sorted.dedup();
458
459        assert_eq!(n.len(), sorted.len());
460
461        for i in 0..numbers.len() {
462            assert_eq!(n.exist(numbers[i]), true);
463        }
464
465        let mut v = n.iter().collect::<Vec<_>>();
466        v.sort();
467        assert_eq!(v.len(), sorted.len());
468        for i in 0..sorted.len() {
469            assert_eq!(sorted[i], v[i]);
470        }
471    }
472
473    #[test]
474    fn iter_1() {
475        let mut n = HashSet::empty();
476        n = n.insert(1);
477        for c in n.iter() {
478            assert_eq!(c, 1);
479        }
480    }
481}