david_set/
castset.rs

1use std;
2
3/// Trait for any type that can be converted to a `usize`.  This could
4/// actually be a hash function, but we will assume that it is *fast*,
5/// so I'm not calling it `Hash`.
6pub trait Cast : Copy+Eq {
7    /// Convert to a `usize`.
8    fn cast(self) -> usize;
9    /// A unique invalid value for this type.  If you cannot identify
10    /// an invalid value, then you don't get to use CastSet, since we
11    /// would need to store an `Option<T>` which would probably double
12    /// the size of the set.
13    fn invalid() -> Self;
14}
15
16impl Cast for usize {
17    fn cast(self) -> usize { self }
18    fn invalid() -> Self { (-1 as i64) as Self }
19}
20impl Cast for u64 {
21    fn cast(self) -> usize { self as usize }
22    fn invalid() -> Self { (-1 as i64) as Self }
23}
24impl Cast for u32 {
25    fn cast(self) -> usize { self as usize }
26    fn invalid() -> Self { (-1 as i32) as Self }
27}
28impl Cast for u16 {
29    fn cast(self) -> usize { self as usize }
30    fn invalid() -> Self { (-1 as i16) as Self }
31}
32impl Cast for u8 {
33    fn cast(self) -> usize { self as usize }
34    fn invalid() -> Self { (-1 as i8) as Self }
35}
36
37enum SearchResult {
38    Present(usize),
39    Empty(usize),
40    /// The element is not present, but there is someone richer than
41    /// us we could steal from!
42    Richer(usize),
43}
44
45/// A set implemented for types that can be cast to usize
46#[derive(Debug,Clone)]
47pub struct CastSet<T: Cast> {
48    v: Data<T>,
49}
50
51#[derive(Debug, Clone)]
52enum Data<T: Cast> {
53    Sm(u32, [usize; 2]),
54    V(u32, Box<[T]>)
55}
56impl<T: Cast> Data<T> {
57    fn cutoff() -> usize {
58        std::mem::size_of::<[usize;2]>()/std::mem::size_of::<T>()
59    }
60    fn new() -> Data<T> {
61        let num = Data::<T>::cutoff();
62        let mut v = Data::Sm(0,[0;2]);
63        for i in 0..num {
64            v.mu()[i] = T::invalid();
65        }
66        v
67    }
68    fn len(&self) -> usize {
69        match self {
70            &Data::Sm(_,_) => {
71                Data::<T>::cutoff()
72            },
73            &Data::V(_,ref v) => v.len(),
74        }
75    }
76    fn sl(&self) -> &[T] {
77        match self {
78            &Data::Sm(_,ref v) => {
79                let num = Data::<T>::cutoff();
80                match num {
81                    1 => unsafe { std::mem::transmute::<&[usize;2],&[T;1]>(v) },
82                    2 => unsafe { std::mem::transmute::<&[usize;2],&[T;2]>(v) },
83                    4 => unsafe { std::mem::transmute::<&[usize;2],&[T;4]>(v) },
84                    8 => unsafe { std::mem::transmute::<&[usize;2],&[T;8]>(v) },
85                    16 => unsafe { std::mem::transmute::<&[usize;2],&[T;16]>(v) },
86                    _ => unreachable!(),
87                }
88            },
89            &Data::V(_,ref v) => v,
90        }
91    }
92    fn mu(&mut self) -> &mut [T] {
93        match self {
94            &mut Data::Sm(_,ref mut v) => {
95                let num = Data::<T>::cutoff();
96                match num {
97                    1 => unsafe { std::mem::transmute::<&mut [usize;2],&mut [T;1]>(v) },
98                    2 => unsafe { std::mem::transmute::<&mut [usize;2],&mut [T;2]>(v) },
99                    4 => unsafe { std::mem::transmute::<&mut [usize;2],&mut [T;4]>(v) },
100                    8 => unsafe { std::mem::transmute::<&mut [usize;2],&mut [T;8]>(v) },
101                    16 => unsafe { std::mem::transmute::<&mut [usize;2],&mut [T;16]>(v) },
102                    _ => unreachable!(),
103                }
104            },
105            &mut Data::V(_,ref mut v) => v,
106        }
107    }
108}
109
110fn capacity_to_rawcapacity(cap: usize) -> usize {
111    (1+cap*11/10).next_power_of_two()
112}
113
114impl<T: Cast> CastSet<T> {
115    fn mut_sz(&mut self) -> &mut u32 {
116        match &mut self.v {
117            &mut Data::Sm(ref mut sz,_) => sz,
118            &mut Data::V(ref mut sz,_) => sz,
119        }
120    }
121    /// Creates an empty set..
122    pub fn new() -> CastSet<T> {
123        CastSet::with_capacity(0)
124    }
125    /// Creates an empty set with the specified capacity.
126    pub fn with_capacity(cap: usize) -> CastSet<T> {
127        if cap <= Data::<T>::cutoff() {
128            CastSet { v: Data::new() }
129        } else {
130            let cap = capacity_to_rawcapacity(cap);
131            CastSet {
132                v: Data::V(0, vec![T::invalid(); cap].into_boxed_slice()),
133            }
134        }
135    }
136    /// Returns the number of elements in the set.
137    pub fn len(&self) -> usize {
138        match &self.v {
139            &Data::Sm(sz,_) => sz as usize,
140            &Data::V(sz,_) => sz as usize,
141        }
142    }
143    /// Reserves capacity for at least `additional` more elements to be
144    /// inserted in the set. The collection may reserve more space
145    /// to avoid frequent reallocations.
146    pub fn reserve(&mut self, additional: usize) {
147        let cap = capacity_to_rawcapacity(self.len() + additional);
148        if cap > self.v.sl().len() {
149            let oldv = std::mem::replace(&mut self.v,
150                                         Data::V(0,vec![T::invalid(); cap]
151                                                     .into_boxed_slice()));
152            let invalid = T::invalid();
153            for &e in oldv.sl().iter() {
154                if e != invalid {
155                    self.insert_unchecked(e);
156                }
157            }
158        }
159    }
160    /// Adds a value to the set.
161    ///
162    /// If the set did not have this value present, `true` is returned.
163    ///
164    /// If the set did have this value present, `false` is returned.
165    pub fn insert(&mut self, elem: T) -> bool {
166        self.reserve(1);
167        self.insert_unchecked(elem)
168    }
169    fn insert_unchecked(&mut self, mut elem: T) -> bool {
170        match self.search(elem) {
171            SearchResult::Present(_) => false,
172            SearchResult::Empty(i) => {
173                self.v.mu()[i] = elem;
174                *self.mut_sz() += 1;
175                true
176            },
177            SearchResult::Richer(i) => {
178                std::mem::swap(&mut elem, &mut self.v.mu()[i]);
179                self.steal(i, elem);
180                *self.mut_sz() += 1;
181                true
182            },
183        }
184    }
185    /// Returns true if the set contains a value.
186    pub fn contains(&self, value: &T) -> bool {
187        match self.search(*value) {
188            SearchResult::Present(_) => true,
189            SearchResult::Empty(_) => false,
190            SearchResult::Richer(_) => false,
191        }
192    }
193    /// Removes an element, and returns true if that element was present.
194    pub fn remove(&mut self, value: &T) -> bool {
195        match self.search(*value) {
196            SearchResult::Present(mut i) => {
197                *self.mut_sz() -= 1;
198                let mut v = self.v.mu();
199                let mask = v.len() - 1;
200                let invalid = T::invalid();
201                loop {
202                    let iplus1 = (i+1) & mask;
203                    if v[iplus1] == invalid ||
204                        (v[iplus1].cast().wrapping_sub(iplus1) & mask) == 0
205                    {
206                        v[i] = invalid;
207                        return true;
208                    }
209                    v[i] = v[iplus1];
210                    i = iplus1;
211                }
212            },
213            SearchResult::Empty(_) => false,
214            SearchResult::Richer(_) => false,
215        }
216    }
217    fn steal(&mut self, mut i: usize, mut elem: T) {
218        loop {
219            match self.search_from(i, elem) {
220                SearchResult::Present(_) => return,
221                SearchResult::Empty(i) => {
222                    self.v.mu()[i] = elem;
223                    return;
224                },
225                SearchResult::Richer(inew) => {
226                    std::mem::swap(&mut elem, &mut self.v.mu()[inew]);
227                    i = inew;
228                },
229        }
230        }
231    }
232    fn search(&self, elem: T) -> SearchResult {
233        let h = elem.cast();
234        let mask = self.v.len() - 1;
235        let invalid = T::invalid();
236        let mut dist = 0;
237        let v = self.v.sl();
238        loop {
239            let i = h+dist & mask;
240            if v[i] == invalid {
241                return SearchResult::Empty(i);
242            } else if v[i] == elem {
243                return SearchResult::Present(i);
244            }
245            // the following is a bit contorted, to compute distance
246            // when wrapped.
247            let his_dist = i.wrapping_sub(v[i].cast()) & mask;
248            if his_dist < dist {
249                return SearchResult::Richer(i);
250            }
251            dist += 1;
252            assert!(dist < v.len());
253        }
254    }
255    fn search_from(&self, i_start: usize, elem: T) -> SearchResult {
256        let h = elem.cast();
257        let mask = self.v.len() - 1;
258        let invalid = T::invalid();
259        let mut dist = i_start.wrapping_sub(h.cast()) & mask;
260        let v = self.v.sl();
261        loop {
262            let i = h+dist & mask;
263            if v[i] == invalid {
264                return SearchResult::Empty(i);
265            } else if v[i] == elem {
266                return SearchResult::Present(i);
267            }
268            // the following is a bit contorted, to compute distance
269            // when wrapped.
270            let his_dist = i.wrapping_sub(v[i].cast()) & mask;
271            if his_dist < dist {
272                return SearchResult::Richer(i);
273            }
274            dist += 1;
275            assert!(dist < v.len());
276        }
277    }
278    /// Returns an iterator over the set.
279    pub fn iter(&self) -> Iter<T> {
280        Iter {
281            slice: self.v.sl(),
282            nleft: self.len(),
283        }
284    }
285    /// Clears the set, returning all elements in an iterator.
286    pub fn drain(&mut self) -> IntoIter<T> {
287        let set = std::mem::replace(self, CastSet::new());
288        let sz = set.len();
289        IntoIter { set: set, nleft: sz }
290    }
291}
292
293pub struct Iter<'a, T: 'a+Cast> {
294    slice: &'a [T],
295    nleft: usize,
296}
297
298impl<'a, T: 'a+Cast> Iterator for Iter<'a, T> {
299    type Item = &'a T;
300    fn next(&mut self) -> Option<&'a T> {
301        if self.nleft == 0 {
302            None
303        } else {
304            assert!(self.slice.len() >= self.nleft as usize);
305            while self.slice[0] == T::invalid() {
306                self.slice = self.slice.split_first().unwrap().1;
307            }
308            let val = &self.slice[0];
309            self.slice = self.slice.split_first().unwrap().1;
310            self.nleft -= 1;
311            Some(val)
312        }
313    }
314    fn size_hint(&self) -> (usize, Option<usize>) {
315        (self.nleft, Some(self.nleft))
316    }
317}
318
319impl<'a, T: Cast> IntoIterator for &'a CastSet<T> {
320    type Item = &'a T;
321    type IntoIter = Iter<'a, T>;
322
323    fn into_iter(self) -> Iter<'a, T> {
324        self.iter()
325    }
326}
327
328pub struct IntoIter<T: Cast> {
329    set: CastSet<T>,
330    nleft: usize,
331}
332
333impl<T: Cast> Iterator for IntoIter<T> {
334    type Item = T;
335    fn next(&mut self) -> Option<T> {
336        if self.nleft == 0 {
337            None
338        } else {
339            self.nleft -= 1;
340            let mut i = self.nleft;
341            loop {
342                let val = std::mem::replace(&mut self.set.v.mu()[i], T::invalid());
343                if val != T::invalid() {
344                    return Some(val);
345                }
346                i -= 1;
347            }
348        }
349    }
350    fn size_hint(&self) -> (usize, Option<usize>) {
351        (self.nleft, Some(self.nleft))
352    }
353}
354
355
356#[cfg(test)]
357mod tests {
358    use super::*;
359    use std::collections::HashSet;
360    use rand::{XorShiftRng, SeedableRng, Rand};
361    #[test]
362    fn it_works() {
363        let mut ss: CastSet<usize> = CastSet::new();
364        println!("inserting 5");
365        ss.insert(5);
366        println!("contains 5");
367        assert!(ss.contains(&5));
368        println!("contains 4");
369        assert!(!ss.contains(&4));
370        println!("inserting 3");
371        ss.insert(3);
372        println!("now {:?}", &ss);
373        assert!(ss.contains(&3));
374        assert!(ss.contains(&5));
375        assert_eq!(ss.len(), 2);
376        for num in ss.iter() {
377            println!("num is {}", num);
378            assert!(ss.contains(num));
379        }
380        assert!(!ss.remove(&2));
381        assert!(ss.remove(&3));
382        assert!(!ss.contains(&3));
383        assert_eq!(ss.len(), 1);
384    }
385    #[test]
386    fn size_unwasted() {
387        println!("small size: {}", std::mem::size_of::<CastSet<usize>>());
388        println!(" hash size: {}", std::mem::size_of::<HashSet<usize>>());
389        assert!(std::mem::size_of::<CastSet<usize>>() <=
390                2*std::mem::size_of::<HashSet<usize>>());
391        assert!(std::mem::size_of::<CastSet<usize>>() <= 24);
392    }
393
394    macro_rules! initialize {
395        ($set: ident, $item: ident, $num: expr) => {{
396            let mut rng = XorShiftRng::from_seed([$num as u32,$num as u32,3,4]);
397            let mut set = $set::<$item>::new();
398            let mut refset = HashSet::<$item>::new();
399            if $num > 0 {
400                while set.len() < $num {
401                    let ins = $item::rand(&mut rng) % (2*$num as $item);
402                    let rem = $item::rand(&mut rng) % (2*$num as $item);
403                    set.insert(ins);
404                    if !set.contains(&ins) {
405                        println!("oops insert");
406                    }
407                    set.remove(&rem);
408                    if set.contains(&rem) {
409                        println!("oops remove");
410                    }
411                    refset.insert(ins);
412                    refset.remove(&rem);
413                    println!("inserting {}, removing {} => {}", ins, rem, set.len());
414                    println!("set: {:?}", set);
415                    println!("refset: {:?}", refset);
416                    let mut fails = false;
417                    for i in 0..255 {
418                        fails = fails || set.contains(&i) != refset.contains(&i);
419                    }
420                    if fails {
421                        for i in 0..255 {
422                            println!("i {}", i);
423                            assert_eq!(set.contains(&i), refset.contains(&i));
424                        }
425                    }
426                }
427            }
428            set
429        }};
430    }
431
432    #[test]
433    fn random_inserts_and_removals_u8() {
434        for sz in 0..50 {
435            println!("\nCastSet {}\n", sz);
436            let myset = initialize!(CastSet, u8, sz);
437            println!("\nHashSet {}\n", sz);
438            let refset = initialize!(HashSet, u8, sz);
439            for i in 0..255 {
440                assert_eq!(myset.contains(&i), refset.contains(&i));
441            }
442        }
443    }
444
445    #[test]
446    fn random_inserts_and_removals_u16() {
447        for sz in 0..20 {
448            println!("\nCastSet {}\n", sz);
449            let myset = initialize!(CastSet, u16, sz);
450            println!("\nHashSet {}\n", sz);
451            let refset = initialize!(HashSet, u16, sz);
452            for i in 0..50 {
453                assert_eq!(myset.contains(&i), refset.contains(&i));
454            }
455        }
456    }
457
458    #[test]
459    fn test_matches_u8() {
460        let mut steps: Vec<Result<u8,u8>> = vec![Err(8), Ok(0), Ok(16), Ok(1), Ok(8)];
461        let mut set = CastSet::<u8>::new();
462        let mut refset = HashSet::<u8>::new();
463        loop {
464            match steps.pop() {
465                Some(Ok(v)) => {
466                    println!("\ninserting {}", v);
467                    set.insert(v); refset.insert(v);
468                },
469                Some(Err(v)) => {
470                    println!("\nremoving {}", v);
471                    set.remove(&v); refset.remove(&v);
472                },
473                None => return,
474            }
475            println!("set: {:?}", set);
476            println!("refset: {:?}", refset);
477            assert_eq!(set.len(), refset.len());
478            for i in 0..255 {
479                if set.contains(&i) != refset.contains(&i) {
480                    println!("trouble at {}", i);
481                    assert_eq!(set.contains(&i), refset.contains(&i));
482                }
483            }
484        }
485    }
486
487    #[cfg(test)]
488    quickcheck! {
489        fn prop_matches_u8(steps: Vec<Result<u8,u8>>) -> bool {
490            let mut steps = steps;
491            let mut set = CastSet::<u8>::new();
492            let mut refset = HashSet::<u8>::new();
493            loop {
494                match steps.pop() {
495                    Some(Ok(v)) => {
496                        set.insert(v); refset.insert(v);
497                    },
498                    Some(Err(v)) => {
499                        set.remove(&v); refset.remove(&v);
500                    },
501                    None => return true,
502                }
503                if set.len() != refset.len() { return false; }
504                for i in 0..255 {
505                    if set.contains(&i) != refset.contains(&i) { return false; }
506                }
507            }
508        }
509    }
510
511    #[cfg(test)]
512    quickcheck! {
513        fn prop_matches_usize(steps: Vec<Result<usize,usize>>) -> bool {
514            let mut steps = steps;
515            let mut set = CastSet::<usize>::new();
516            let mut refset = HashSet::<usize>::new();
517            loop {
518                match steps.pop() {
519                    Some(Ok(v)) => {
520                        set.insert(v); refset.insert(v);
521                    },
522                    Some(Err(v)) => {
523                        set.remove(&v); refset.remove(&v);
524                    },
525                    None => return true,
526                }
527                if set.len() != refset.len() { return false; }
528                for i in 0..2550 {
529                    if set.contains(&i) != refset.contains(&i) { return false; }
530                }
531            }
532        }
533    }
534}
535