set_trie/
lib.rs

1#![warn(
2    clippy::all,
3    clippy::pedantic,
4    clippy::nursery,
5    clippy::cargo,
6    missing_docs
7)]
8//! Fast subset and superset queries based on tries.
9//!
10//! ```rust
11//! use set_trie::SetTrie;
12//!
13//! let mut employees = SetTrie::new();
14//! employees.insert(&["accounting", "banking"], "Daniels");
15//! employees.insert(&["accounting", "banking", "crime"], "Stevens");
16//!
17//! assert_eq!(employees.subsets(&[&"accounting", &"banking", &"crime"]).collect::<Vec<_>>(), vec![&"Daniels", &"Stevens"]);
18//! assert_eq!(employees.subsets(&[&"accounting", &"banking"]).collect::<Vec<_>>(), vec![&"Daniels"]);
19//! assert_eq!(employees.supersets(&[&"accounting"]).collect::<Vec<_>>(), vec![&"Daniels", &"Stevens"]);
20//! ```
21
22use crate::subset::Subset;
23use crate::superset::SuperSet;
24use crate::values::Values;
25use std::iter::FromIterator;
26
27mod entry;
28mod subset;
29mod superset;
30mod values;
31
32pub use entry::{CreatedEntry, Entry, EntryBuilder, ExistingEntry};
33
34#[derive(Debug, Default, Eq, PartialEq)]
35struct Node<K, T> {
36    children: Vec<(K, Node<K, T>)>,
37    leaves: Vec<T>,
38}
39
40impl<K, T> Node<K, T> {
41    pub const fn new() -> Self {
42        Self {
43            children: vec![],
44            leaves: vec![],
45        }
46    }
47}
48
49/// Due to the recursive nature of the implementation of Drop, large `SetTries` cause a stack overflow
50/// during deallocation. Our own implementation uses an iterative algorithm to deallocate.
51impl<K, T> Drop for Node<K, T> {
52    fn drop(&mut self) {
53        let mut stack = Vec::with_capacity(self.children.len());
54        while let Some((_, child)) = self.children.pop() {
55            stack.push(child);
56            while let Some(mut current) = stack.pop() {
57                while let Some((_, child)) = current.children.pop() {
58                    stack.push(child)
59                }
60            }
61        }
62    }
63}
64
65impl<K, T> Node<K, T>
66where
67    K: Ord,
68{
69    fn has_descendant(&self, key: &K) -> bool {
70        println!("here");
71        if self.children.binary_search_by(|(k, _)| k.cmp(key)).is_ok() {
72            return true;
73        }
74        self.children
75            .iter()
76            .take_while(|(k, _)| k < key)
77            .any(|(_, n)| n.has_descendant(key))
78    }
79
80    fn between_inclusive(&self, from: &K, to: &K) -> &[(K, Self)] {
81        match (
82            self.children.binary_search_by(|(k, _)| k.cmp(from)),
83            self.children.binary_search_by(|(k, _)| k.cmp(to)),
84        ) {
85            (Ok(from), Ok(to)) | (Err(from), Ok(to)) => &self.children[from..=to],
86            (Ok(from), Err(to)) | (Err(from), Err(to)) => &self.children[from..to],
87        }
88    }
89}
90
91/// `SetTries` allow for efficient subset and superset queries. Think of it as a
92/// [`HashMap`](std::collections::HashMap), where you want the key to be within or containing a range.
93///
94/// ```rust
95/// let mut trie = set_trie::SetTrie::new();
96///
97/// trie.insert(&[1, 3, 5], "foo");
98/// trie.insert(&[3], "bar");
99///
100/// assert_eq!(trie.subsets(&[&1, &3, &5, &6]).collect::<Vec<_>>(), vec![&"foo", &"bar"]);
101/// assert_eq!(trie.supersets(&[&5]).collect::<Vec<_>>(), vec![&"foo"])
102/// ```
103///
104/// # Restrictions
105///
106/// Keys are required to be Ord, as the trie stores the nodes in sorted order by key. This means
107/// that the caller must ensure that provided keys are in sorted order, lest nonsensical results be
108/// returned.
109///
110/// # Performance
111///
112/// Subsets and Supersets are lazily evaluated. Note that superset queries are far more expensive
113/// than subset queries, so attempt to structure your problem around subsets.
114#[derive(Debug, Default)]
115pub struct SetTrie<K, T>(Node<K, T>);
116
117impl<K, T> SetTrie<K, T> {
118    /// Create a new, empty `SetTrie`, without allocating any space for the nodes.
119    #[must_use]
120    pub const fn new() -> Self {
121        Self(Node::new())
122    }
123}
124
125impl<K, T> SetTrie<K, T>
126where
127    K: Ord,
128{
129    /// A view into a single node in the trie; which must either be created or already exists.
130    #[must_use]
131    pub fn entry<IK: IntoIterator<Item = K>>(
132        &mut self,
133        keys: IK,
134    ) -> EntryBuilder<K, T, IK::IntoIter> {
135        EntryBuilder::new(self, keys.into_iter())
136    }
137
138    /// Insert the item in the given node. Will create the node if needed.
139    pub fn insert(&mut self, keys: impl IntoIterator<Item = K>, item: T) {
140        self.entry(keys.into_iter()).and_insert(item);
141    }
142
143    /// Inserts multiple items in the given node. More performant that repeatedly calling insert.
144    pub fn insert_many<IK: IntoIterator<Item = K>, IT: IntoIterator<Item = T>>(
145        &mut self,
146        keys: IK,
147        item: IT,
148    ) {
149        self.entry(keys.into_iter()).and_extend(item);
150    }
151
152    /// Iterates over all subsets of `keys` using DFS, meaning that the keys are visited
153    /// in order of the query:
154    ///
155    /// ```rust
156    /// let mut trie = set_trie::SetTrie::new();
157    /// trie.insert(&[1], "foo");
158    /// trie.insert(&[1, 2], "bar");
159    /// trie.insert(&[1, 2, 3], "baz");
160    ///
161    /// assert_eq!(trie.subsets(&[&1, &2, &3]).collect::<Vec<_>>(), vec![&"foo", &"bar", &"baz"]);
162    /// ```
163    #[must_use]
164    pub fn subsets<'a, 'b>(&'a self, keys: &'b [K]) -> Subset<'a, 'b, K, T> {
165        Subset::new(self, keys)
166    }
167
168    /// Iterates over all values in the trie using DFS, meaning that values are visited in order
169    /// of the keys stored in the trie.
170    ///
171    ///
172    /// ```rust
173    /// let mut trie = set_trie::SetTrie::new();
174    /// trie.insert(&[1], "foo");
175    /// trie.insert(&[1, 2], "bar");
176    /// trie.insert(&[1, 2, 3], "baz");
177    ///
178    /// assert_eq!(trie.values().collect::<Vec<_>>(), vec![&"foo", &"bar", &"baz"]);
179    /// ```
180    #[must_use]
181    pub fn values(&self) -> Values<K, T> {
182        Values::new(self)
183    }
184
185    /// Iterates over all supersets of `keys` in the trie using DFS, meaning that values are visited
186    /// in order of the query.
187    ///
188    ///
189    /// ```rust
190    /// let mut trie = set_trie::SetTrie::new();
191    /// trie.insert(&[1], "foo");
192    /// trie.insert(&[1, 2], "bar");
193    /// trie.insert(&[1, 2, 3], "baz");
194    ///
195    /// assert_eq!(trie.supersets(&[&1]).collect::<Vec<_>>(), vec![&"foo", &"bar", &"baz"]);
196    /// ```
197    ///
198    /// # Remarks
199    ///
200    /// Note that the empty set will provide the same result as values. There is currently no fast
201    /// path in the trie, so if you know that your query contains no keys, use [`SetTrie::values`]
202    /// instead.
203    #[must_use]
204    pub fn supersets<'a, 'b>(&'a self, keys: &'b [K]) -> SuperSet<'a, 'b, K, T> {
205        SuperSet::new(self, keys)
206    }
207}
208
209impl<I, K, T> Extend<(I, T)> for SetTrie<K, T>
210where
211    I: IntoIterator<Item = K>,
212    K: Ord,
213{
214    fn extend<F: IntoIterator<Item = (I, T)>>(&mut self, iter: F) {
215        for (k, t) in iter {
216            self.insert(k, t)
217        }
218    }
219}
220
221impl<I, K, T> FromIterator<(I, T)> for SetTrie<K, T>
222where
223    I: IntoIterator<Item = K>,
224    K: Ord,
225{
226    fn from_iter<F: IntoIterator<Item = (I, T)>>(iter: F) -> Self {
227        let mut trie = Self::new();
228        trie.extend(iter);
229        trie
230    }
231}
232
233#[cfg(test)]
234mod tests {
235    use super::*;
236
237    mod doctests {
238        include!(concat!(env!("OUT_DIR"), "/skeptic-tests.rs"));
239    }
240
241    #[test]
242    fn insert() {
243        let mut trie = SetTrie::new();
244        trie.insert(&[1], "c");
245        trie.insert(&[1, 2], "c");
246        trie.insert(&[1, 2, 3], "a");
247        trie.insert(&[1, 2, 3], "b");
248        assert_eq!(trie.entry(&[1, 2, 3]).items(), Some(&vec!["a", "b"]))
249    }
250
251    /// Due to the recursive structure; the default Drop implementation actually causes a stack
252    /// overflow.
253    #[test]
254    fn test_stack_overflow() {
255        let seed = 2000000;
256        let mut trie = SetTrie::new();
257
258        let mut current = trie.entry(0..1).or_insert(0);
259        for i in 1..seed {
260            current = current.entry(i - 1..i).or_insert(i)
261        }
262    }
263}