indexed_hash_set/
lib.rs

1//! This crate provides a bidirectional set with the following properties:
2//! - Each entry is distinct (no double elements).
3//! - Entries can be accessed by reference, like in a standard `HashSet`.
4//! - Entries can be accessed via indices avoiding the cost of hashing the element.
5//! - Indices are reference-counted. This means when no external index is around
6//!   an entry is considered _unused_ and will be dropped on [`drop_unused()`].
7//! - Internal a [generational arena] is used to allow for effective mutation
8//!   of the set.
9//!
10//! [`drop_unused()`]: ./struct.IndexedHashSet.html#method.drop_unused
11//! [generational arena]: https://docs.rs/generational-arena/latest/
12
13#![deny(missing_docs)]
14
15use generational_arena::{Arena, Index as AIndex};
16use std::borrow::Borrow;
17use std::cell::RefCell;
18use std::collections::HashMap;
19use std::hash::Hash;
20use std::rc::Rc;
21
22mod internal_ref;
23use self::internal_ref::{InternalRef, Wrap as _};
24
25/// An entry in the set.
26#[derive(Debug)]
27struct Entry<T> {
28    /// Elements are boxed to allow correct self-references in the
29    /// element-to-index-map. Otherwise a re-allocation of the arena due to
30    /// growth could invalidate the supporting map.
31    elem: Box<T>,
32    /// Count of existing indices referencing this entry. If this is zero the
33    /// entry can be dropped.
34    usage_cnt: Rc<RefCell<usize>>,
35}
36
37impl<T> Entry<T> {
38    /// A new entry with a `usage_cnt` of zero.
39    fn new(elem: T) -> Self {
40        Entry {
41            elem: Box::new(elem),
42            usage_cnt: Default::default(),
43        }
44    }
45    fn cnt_handle(&self) -> Rc<RefCell<usize>> {
46        self.usage_cnt.clone()
47    }
48    fn cnt(&self) -> usize {
49        *(*self.usage_cnt).borrow()
50    }
51    fn elem(&self) -> &T {
52        self.elem.as_ref()
53    }
54}
55
56/// An indexed hash set. Can be accessed either by index of hashing.
57#[derive(Debug)]
58pub struct IndexedHashSet<T>
59where
60    T: 'static,
61{
62    /// The actual store of entries.
63    arena: Arena<Entry<T>>,
64    /// Map from elements to indices that ultimately retrieve the entries.
65    ///
66    /// The keys are fake `'static`. Actually they **self-reference** the
67    /// entries in the arena.
68    map: HashMap<InternalRef<T>, AIndex>,
69}
70
71impl<T> IndexedHashSet<T>
72where
73    T: 'static + Eq + Hash,
74{
75    /// A new, empty set.
76    pub fn new() -> Self {
77        Default::default()
78    }
79    /// Number of elements in the set, including the unused ones.
80    pub fn len(&self) -> usize {
81        self.arena.len()
82    }
83    /// Get the usage count of an element by hash.
84    pub fn get_cnt<Q>(&self, elem: &Q) -> Option<usize>
85    where
86        T: Borrow<Q>,
87        Q: ?Sized + Hash + Eq,
88    {
89        let idx = self.map.get_key_value(elem.wrap()).map(|(_, idx)| *idx)?;
90        let entry = &self.arena[idx];
91        Some(entry.cnt())
92    }
93    /// Get a reference to the stored element by hash.
94    pub fn get_ref_by_hash<'a, Q>(&'a self, elem: &Q) -> Option<&'a T>
95    where
96        T: Borrow<Q>,
97        Q: ?Sized + Hash + Eq,
98    {
99        // points to the same entry, no need for a lookup in the arena
100        self.map.get_key_value(elem.wrap()).map(|(k, _)| k.as_ref())
101    }
102    /// Get the index of the stored element by hash.
103    pub fn get_index_by_hash<'a, Q>(&'a self, elem: &Q) -> Option<RcIndex>
104    where
105        T: Borrow<Q>,
106        Q: ?Sized + Hash + Eq,
107    {
108        let a_idx = self.map.get(elem.wrap())?;
109        Some(self.aidx_to_rcidx(*a_idx))
110    }
111    /// Get a reference to the stored element by index.
112    ///
113    /// As the index can be from another `IndexedHashSet` this operation is
114    /// fallible.
115    ///
116    /// Alternatively, the [index notation](struct.IndexedHashSet.html#impl-Index<%26'a RcIndex>)
117    /// can be used, e.g. `set[&rc_idx]`. However, this may panic with a
118    /// foreign `RcIndex`.
119    //#impl-Index<%26'a RcIndex>
120    pub fn get_ref_by_index<'a>(&'a self, idx: &RcIndex) -> Option<&'a T> {
121        let entry = self.arena.get(idx.inner)?;
122        Some(entry.elem.as_ref())
123    }
124    /// Insert a new element into the set.
125    ///
126    /// If the element is already in the set `None` is returned else the index
127    /// of the new entry is returned.
128    ///
129    /// _Note:_ The returned `RcIndex` is the initial usage of the entry. If it
130    /// is dropped without cloning the `usage_cnt` goes to zero and the new
131    /// element is dropped on the next [`drop_unused()`](#method.drop_unused)!
132    #[must_use = "If not stored usage count of the new element goes to zero."]
133    pub fn insert(&mut self, elem: T) -> Option<RcIndex> {
134        if self.map.get(elem.wrap()).is_some() {
135            return None;
136        }
137
138        Some(self.insert_unchecked(elem))
139    }
140    /// Gets the index of the element in the set if present. If not the element
141    /// is inserted and the new index is returned.
142    pub fn get_or_insert(&mut self, elem: &T) -> RcIndex
143    where
144        T: Clone,
145    {
146        if let Some(a_idx) = self.map.get(elem.wrap()) {
147            self.aidx_to_rcidx(*a_idx)
148        } else {
149            self.insert_unchecked(elem.clone())
150        }
151    }
152    /// Unconditionally inserts the element.
153    ///
154    /// If not checked carefully this may violate the `IndexedHashSet`'s
155    /// contract that elements are distinct as the arena doesn't have the
156    /// properties of a set.
157    fn insert_unchecked(&mut self, elem: T) -> RcIndex {
158        let entry = Entry::new(elem);
159        let cnt_handle = entry.cnt_handle();
160        let inner_ref = InternalRef::from_ref(entry.elem());
161
162        let a_idx = self.arena.insert(entry);
163        self.map.insert(inner_ref, a_idx);
164
165        RcIndex::new(a_idx, cnt_handle)
166    }
167    /// Drop all entries whose `usage_cnt` is zero.
168    pub fn drop_unused(&mut self) -> usize {
169        // tell Rust that both mutable borrows are distinct.
170        let arena = &mut self.arena;
171        let map = &mut self.map;
172
173        let before = arena.len();
174
175        arena.retain(|_, entry| {
176            if entry.cnt() == 0 {
177                map.remove(entry.elem().wrap());
178                false
179            } else {
180                true
181            }
182        });
183
184        before - arena.len()
185    }
186    /// Iterates over all elements in the set with `usage_cnt != 0`.
187    pub fn iter(&self) -> impl Iterator<Item = &T> {
188        self.arena
189            .iter()
190            .filter_map(|(_, e)| if e.cnt() != 0 { Some(e.elem()) } else { None })
191    }
192    /// Returns the respective `RcIndex` for an index of the arena.
193    ///
194    /// # Panics
195    ///
196    /// This panics if the arena index is not present. However, since these
197    /// kind of indices are only used internally this should never be the case.
198    fn aidx_to_rcidx(&self, a_idx: AIndex) -> RcIndex {
199        let entry = &self.arena[a_idx];
200        let handle = entry.cnt_handle();
201        RcIndex::new(a_idx, handle)
202    }
203}
204
205impl<T: 'static> Default for IndexedHashSet<T> {
206    fn default() -> Self {
207        Self {
208            arena: Default::default(),
209            map: Default::default(),
210        }
211    }
212}
213
214/// Allows to access the set like `set[&rc_idx]`.
215///
216/// This panics if the `RcIndex` used is not from this `IndexedHashSet`.
217impl<'a, T> std::ops::Index<&'a RcIndex> for IndexedHashSet<T>
218where
219    T: 'static + Eq + Hash,
220{
221    type Output = T;
222
223    fn index(&self, index: &'a RcIndex) -> &Self::Output {
224        self.get_ref_by_index(index).unwrap()
225    }
226}
227
228/// The `!Send` internal references are only used internally. Therefore, this
229/// type is safe to be `Send`.
230unsafe impl<T> Send for IndexedHashSet<T> {}
231
232/// A reference-counted index to an entry of the set.
233#[derive(Debug)]
234pub struct RcIndex {
235    /// Original index into the arena.
236    inner: AIndex,
237    /// Usage count. Incremented at index construction and decremented at drop.
238    cnt: Rc<RefCell<usize>>,
239}
240
241impl RcIndex {
242    /// Creates a new reference-counted index.
243    ///
244    /// On creation the `usage_cnt` is incremented.
245    fn new(idx: AIndex, cnt_handle: Rc<RefCell<usize>>) -> Self {
246        {
247            let mut cnt = cnt_handle.borrow_mut();
248            *cnt += 1;
249        }
250        Self {
251            inner: idx,
252            cnt: cnt_handle,
253        }
254    }
255    /// Get the usage count of the element.
256    pub fn cnt(&self) -> usize {
257        *(*self.cnt).borrow()
258    }
259}
260
261impl Clone for RcIndex {
262    fn clone(&self) -> Self {
263        let mut cnt = self.cnt.borrow_mut();
264        *cnt += 1;
265        Self {
266            inner: self.inner,
267            cnt: self.cnt.clone(),
268        }
269    }
270}
271
272impl Drop for RcIndex {
273    fn drop(&mut self) {
274        let mut cnt = self.cnt.borrow_mut();
275        *cnt -= 1;
276    }
277}
278
279#[cfg(test)]
280mod tests {
281    use super::*;
282
283    /// Set with three entries with each usage count equal to zero.
284    fn standard_set() -> IndexedHashSet<String> {
285        let mut set = IndexedHashSet::new();
286        set.insert("Olaf".to_owned()).unwrap();
287        set.insert("Eijnar".to_owned()).unwrap();
288        set.insert("Harald".to_owned()).unwrap();
289        set
290    }
291
292    #[test]
293    fn unused_entries() {
294        let mut set = standard_set();
295        assert_eq!(set.len(), 3);
296        assert_eq!(set.drop_unused(), 3);
297        assert_eq!(set.len(), 0);
298    }
299
300    #[test]
301    fn usage_cnt() {
302        let mut set = IndexedHashSet::new();
303        let o1 = set.insert("Olaf".to_owned()).unwrap();
304        assert_eq!(o1.cnt(), 1);
305        let _o2 = set.get_index_by_hash("Olaf").unwrap();
306        assert_eq!(o1.cnt(), 2);
307        {
308            let _o3 = o1.clone();
309            assert_eq!(o1.cnt(), 3);
310        }
311        assert_eq!(o1.cnt(), 2);
312    }
313}