david_set/
copyset.rs

1
2use std;
3use std::collections::HashSet;
4use std::hash::Hash;
5use std::borrow::Borrow;
6
7/// The number of elements stored in an array before moving up to the
8/// `HashSet` implementation.
9pub const CAPACITY: usize = 8;
10
11/// A set that is a `HashSet` when it has many elements, but is just
12/// an array for small set sizes.
13///
14/// As with the `HashSet` type, a `Set` requires that the
15/// elements implement the Eq and Hash traits.  This can frequently be
16/// achieved by using #[derive(PartialEq, Eq, Hash)]. In addition,
17/// `Set` requires that the elements implement the `Copy` trait,
18/// and really they should be pretty small, since Set always
19/// stores room for `CAPACITY` elements.
20#[derive(Debug, Clone)]
21pub struct Set<T: Copy + Eq + Hash> {
22    inner: SS<T>,
23}
24
25#[derive(Debug, Clone)]
26enum SS<T: Copy+Eq+Hash> {
27    Small(usize, [T;CAPACITY]),
28    Large(HashSet<T>),
29}
30
31/// An iterator for consuming sets.
32pub struct IntoIter<T: Copy+Eq+Hash> {
33    inner: IntoIt<T>,
34}
35
36enum IntoIt<T: Copy+Eq+Hash> {
37    Small(std::vec::IntoIter<T>),
38    Large(std::collections::hash_set::IntoIter<T>),
39}
40
41/// An iterator for sets.
42pub struct Iter<'a, T: 'a+Copy+Eq+Hash> {
43    inner: It<'a, T>,
44}
45
46enum It<'a, T: 'a+Copy+Eq+Hash> {
47    Small(std::slice::Iter<'a, T>),
48    Large(std::collections::hash_set::Iter<'a, T>),
49}
50
51impl<T: Copy+Eq+Hash> Set<T> {
52    /// Creates an empty set..
53    pub fn new() -> Set<T> {
54        Set { inner: SS::Small(0, unsafe { std::mem::uninitialized() }) }
55    }
56    /// Creates an empty set with the specified capacity.
57    pub fn with_capacity(cap: usize) -> Set<T> {
58        if cap > CAPACITY {
59            Set { inner: SS::Large(HashSet::with_capacity(cap)) }
60        } else {
61            Set::new()
62        }
63    }
64    /// Returns the number of elements in the set.
65    pub fn len(&self) -> usize {
66        match self.inner {
67            SS::Large(ref s) => s.len(),
68            SS::Small(len, _) => len,
69        }
70    }
71    /// Reserves capacity for at least `additional` more elements to be
72    /// inserted in the set. The collection may reserve more space
73    /// to avoid frequent reallocations.
74    pub fn reserve(&mut self, additional: usize) {
75        match self.inner {
76            SS::Large(ref mut s) => {
77                s.reserve(additional);
78                return;
79            },
80            SS::Small(len, arr) => {
81                let mut s = HashSet::with_capacity(additional+CAPACITY);
82                for i in 0..len {
83                    s.insert(arr[i]);
84                }
85                *self = Set { inner: SS::Large(s) }
86            },
87        }
88    }
89    /// Adds a value to the set.
90    ///
91    /// If the set did not have this value present, `true` is returned.
92    ///
93    /// If the set did have this value present, `false` is returned.
94    pub fn insert(&mut self, elem: T) -> bool {
95        match self.inner {
96            SS::Large(ref mut s) => {
97                return s.insert(elem);
98            },
99            SS::Small(ref mut len, ref mut arr) => {
100                for i in 0 .. *len {
101                    if arr[i] == elem {
102                        return false;
103                    }
104                }
105                if *len < CAPACITY {
106                    arr[*len] = elem;
107                    *len += 1;
108                    return true;
109                }
110            },
111        }
112        match self.inner {
113            SS::Large(_) => unreachable!(),
114            SS::Small(len, arr) => {
115                let mut s = HashSet::with_capacity(1+CAPACITY);
116                for i in 0..len {
117                    s.insert(arr[i]);
118                }
119                s.insert(elem);
120                *self = Set { inner: SS::Large(s) };
121                true
122            },
123        }
124    }
125    /// Removes an element, and returns true if that element was present.
126    pub fn remove<Q: ?Sized>(&mut self, value: &Q) -> bool
127        where
128        T: Borrow<Q>, Q: Hash + Eq,
129    {
130        match self.inner {
131            SS::Large(ref mut s) => s.remove(value),
132            SS::Small(ref mut len, ref mut arr) => {
133                for i in 0..*len {
134                    if arr[i].borrow() == value {
135                        *len -= 1;
136                        for j in i..*len {
137                            arr[j] = arr[j+1];
138                        }
139                        return true;
140                    }
141                }
142                false
143            },
144        }
145    }
146    /// Returns true if the set contains a value.
147    pub fn contains<Q: ?Sized>(&self, value: &Q) -> bool
148        where
149        T: Borrow<Q>, Q: Hash + Eq,
150    {
151        match self.inner {
152            SS::Large(ref s) => {
153                s.contains(value)
154            },
155            SS::Small(len, ref arr) => {
156                for i in 0 .. len {
157                    if arr[i].borrow() == value {
158                        return true;
159                    }
160                }
161                false
162            },
163        }
164    }
165    /// Returns an iterator over the set.
166    pub fn iter(&self) -> Iter<T> {
167        Iter {
168            inner:
169            match self.inner {
170                SS::Large(ref s) => {
171                    It::Large(s.iter())
172                },
173                SS::Small(len, ref arr) => {
174                    It::Small(arr[0..len].iter())
175                },
176            }
177        }
178    }
179    /// Clears the set, returning all elements in an iterator.
180    pub fn drain(&mut self) -> IntoIter<T> {
181        let mut s = Set::new();
182        std::mem::swap(&mut s, self);
183        IntoIter {
184            inner:
185            match s.inner {
186                SS::Large(s) => {
187                    IntoIt::Large(s.into_iter())
188                },
189                SS::Small(len, ref arr) => {
190                    IntoIt::Small(Vec::from(&arr[0..len]).into_iter())
191                },
192            }
193        }
194    }
195}
196
197impl<T: Hash+Copy+Eq> std::iter::FromIterator<T> for Set<T> {
198    fn from_iter<I: IntoIterator<Item=T>>(iter: I) -> Self {
199        let iter = iter.into_iter();
200        let (sz,_) = iter.size_hint();
201        let mut c = Set::with_capacity(sz);
202        for i in iter {
203            c.insert(i);
204        }
205        c
206    }
207}
208
209impl<'a, T: 'a+Eq+Hash+Copy> Iterator for Iter<'a, T> {
210    type Item = &'a T;
211    fn next(&mut self) -> Option<&'a T> {
212        match self.inner {
213            It::Large(ref mut it) => it.next(),
214            It::Small(ref mut it) => it.next(),
215        }
216    }
217    fn size_hint(&self) -> (usize, Option<usize>) {
218        match self.inner {
219            It::Large(ref it) => it.size_hint(),
220            It::Small(ref it) => it.size_hint(),
221        }
222    }
223}
224
225impl<T: Eq+Hash+Copy> Iterator for IntoIter<T> {
226    type Item = T;
227    fn next(&mut self) -> Option<T> {
228        match self.inner {
229            IntoIt::Large(ref mut it) => it.next(),
230            IntoIt::Small(ref mut it) => it.next(),
231        }
232    }
233    fn size_hint(&self) -> (usize, Option<usize>) {
234        match self.inner {
235            IntoIt::Large(ref it) => it.size_hint(),
236            IntoIt::Small(ref it) => it.size_hint(),
237        }
238    }
239}
240
241impl<'a, T: Eq+Hash+Copy> IntoIterator for &'a Set<T> {
242    type Item = &'a T;
243    type IntoIter = Iter<'a, T>;
244
245    fn into_iter(self) -> Iter<'a, T> {
246        self.iter()
247    }
248}
249
250impl<T: Eq+Hash+Copy> IntoIterator for Set<T> {
251    type Item = T;
252    type IntoIter = IntoIter<T>;
253
254    /// Creates a consuming iterator, that is, one that moves each value out
255    /// of the set in arbitrary order. The set cannot be used after calling
256    /// this.
257    ///
258    /// # Examples
259    ///
260    /// ```
261    /// use david_set::Set;
262    /// let mut set: Set<u32> = Set::new();
263    /// set.insert(2);
264    /// set.insert(5);
265    ///
266    /// // Not possible to collect to a Vec<String> with a regular `.iter()`.
267    /// let v: Vec<_> = set.into_iter().collect();
268    ///
269    /// // Will print in an arbitrary order.
270    /// for x in &v {
271    ///     println!("{}", x);
272    /// }
273    /// ```
274    fn into_iter(self) -> IntoIter<T> {
275        IntoIter {
276            inner:
277            match self.inner {
278                SS::Large(s) => {
279                    IntoIt::Large(s.into_iter())
280                },
281                SS::Small(len, arr) => {
282                    IntoIt::Small(Vec::from(&arr[0..len]).into_iter())
283                },
284            }
285        }
286    }
287}
288
289impl<'a, 'b, T: Eq+Hash+Copy> std::ops::Sub<&'b Set<T>> for &'a Set<T> {
290    type Output = Set<T>;
291
292    /// Returns the difference of `self` and `rhs` as a new `Set<T>`.
293    ///
294    /// # Examples
295    ///
296    /// ```
297    /// use david_set::Set;
298    ///
299    /// let a: Set<u32> = vec![1, 2, 3].into_iter().collect();
300    /// let b: Set<u32> = vec![3, 4, 5].into_iter().collect();
301    ///
302    /// let set = &a - &b;
303    ///
304    /// let mut i = 0;
305    /// let expected = [1, 2];
306    /// for x in &set {
307    ///     assert!(expected.contains(x));
308    ///     i += 1;
309    /// }
310    /// assert_eq!(i, expected.len());
311    /// ```
312    fn sub(self, rhs: &Set<T>) -> Set<T> {
313        let mut s = Set::with_capacity(self.len());
314        for v in self.iter() {
315            if !rhs.contains(v) {
316                s.insert(*v);
317            }
318        }
319        s
320    }
321}
322
323impl<T: Eq+Hash+Copy> Extend<T> for Set<T> {
324    /// Adds a bunch of elements to the set
325    ///
326    /// # Examples
327    ///
328    /// ```
329    /// use david_set::Set;
330    ///
331    /// let mut a: Set<u32> = vec![1, 2, 3].into_iter().collect();
332    /// a.extend(vec![3, 4, 5]);
333    ///
334    /// let mut i = 0;
335    /// let expected = [1, 2, 3, 4, 5];
336    /// for x in &a {
337    ///     assert!(expected.contains(x));
338    ///     i += 1;
339    /// }
340    /// assert_eq!(i, expected.len());
341    /// ```
342    fn extend<I: IntoIterator<Item = T>>(&mut self, iter: I) {
343        let iter = iter.into_iter();
344        let (sz,_) = iter.size_hint();
345        self.reserve(sz);
346        for i in iter {
347            self.insert(i);
348        }
349    }
350}
351
352impl<'a, 'b, T: Eq+Hash+Copy> std::ops::BitOr<&'b Set<T>> for &'a Set<T> {
353    type Output = Set<T>;
354
355    /// Returns the union of `self` and `rhs` as a new `Set<T>`.
356    ///
357    /// # Examples
358    ///
359    /// ```
360    /// use david_set::Set;
361    ///
362    /// let a: Set<u32> = vec![1, 2, 3].into_iter().collect();
363    /// let b: Set<u32> = vec![3, 4, 5].into_iter().collect();
364    ///
365    /// let set = &a | &b;
366    ///
367    /// let mut i = 0;
368    /// let expected = [1, 2, 3, 4, 5];
369    /// for x in &set {
370    ///     assert!(expected.contains(x));
371    ///     i += 1;
372    /// }
373    /// assert_eq!(i, expected.len());
374    /// ```
375    fn bitor(self, rhs: &Set<T>) -> Set<T> {
376        let mut s: Set<T> = Set::with_capacity(self.len() + rhs.len());
377        for &x in self.iter() {
378            s.insert(x);
379        }
380        for &x in rhs.iter() {
381            s.insert(x);
382        }
383        s
384    }
385}
386
387#[cfg(test)]
388mod tests {
389    use super::*;
390    #[test]
391    fn it_works() {
392        let mut ss: Set<usize> = Set::new();
393        ss.insert(5);
394        assert!(ss.contains(&5));
395        assert!(!ss.contains(&4));
396        ss.insert(3);
397        println!("now {:?}", &ss);
398        assert!(ss.contains(&3));
399        assert!(ss.contains(&5));
400        assert!(ss.len() == 2);
401        for num in ss.iter() {
402            assert!(ss.contains(num));
403        }
404    }
405    #[test]
406    fn size_unwasted() {
407        println!("small size: {}", std::mem::size_of::<Set<usize>>());
408        println!(" hash size: {}", std::mem::size_of::<HashSet<usize>>());
409        assert!(std::mem::size_of::<Set<usize>>() <=
410                2*std::mem::size_of::<HashSet<usize>>());
411    }
412}