set_trie/lib.rs
1#![warn(
2 clippy::all,
3 clippy::pedantic,
4 clippy::nursery,
5 clippy::cargo,
6 missing_docs
7)]
8//! Fast subset and superset queries based on tries.
9//!
10//! ```rust
11//! use set_trie::SetTrie;
12//!
13//! let mut employees = SetTrie::new();
14//! employees.insert(&["accounting", "banking"], "Daniels");
15//! employees.insert(&["accounting", "banking", "crime"], "Stevens");
16//!
17//! assert_eq!(employees.subsets(&[&"accounting", &"banking", &"crime"]).collect::<Vec<_>>(), vec![&"Daniels", &"Stevens"]);
18//! assert_eq!(employees.subsets(&[&"accounting", &"banking"]).collect::<Vec<_>>(), vec![&"Daniels"]);
19//! assert_eq!(employees.supersets(&[&"accounting"]).collect::<Vec<_>>(), vec![&"Daniels", &"Stevens"]);
20//! ```
21
22use crate::subset::Subset;
23use crate::superset::SuperSet;
24use crate::values::Values;
25use std::iter::FromIterator;
26
27mod entry;
28mod subset;
29mod superset;
30mod values;
31
32pub use entry::{CreatedEntry, Entry, EntryBuilder, ExistingEntry};
33
34#[derive(Debug, Default, Eq, PartialEq)]
35struct Node<K, T> {
36 children: Vec<(K, Node<K, T>)>,
37 leaves: Vec<T>,
38}
39
40impl<K, T> Node<K, T> {
41 pub const fn new() -> Self {
42 Self {
43 children: vec![],
44 leaves: vec![],
45 }
46 }
47}
48
49/// Due to the recursive nature of the implementation of Drop, large `SetTries` cause a stack overflow
50/// during deallocation. Our own implementation uses an iterative algorithm to deallocate.
51impl<K, T> Drop for Node<K, T> {
52 fn drop(&mut self) {
53 let mut stack = Vec::with_capacity(self.children.len());
54 while let Some((_, child)) = self.children.pop() {
55 stack.push(child);
56 while let Some(mut current) = stack.pop() {
57 while let Some((_, child)) = current.children.pop() {
58 stack.push(child)
59 }
60 }
61 }
62 }
63}
64
65impl<K, T> Node<K, T>
66where
67 K: Ord,
68{
69 fn has_descendant(&self, key: &K) -> bool {
70 println!("here");
71 if self.children.binary_search_by(|(k, _)| k.cmp(key)).is_ok() {
72 return true;
73 }
74 self.children
75 .iter()
76 .take_while(|(k, _)| k < key)
77 .any(|(_, n)| n.has_descendant(key))
78 }
79
80 fn between_inclusive(&self, from: &K, to: &K) -> &[(K, Self)] {
81 match (
82 self.children.binary_search_by(|(k, _)| k.cmp(from)),
83 self.children.binary_search_by(|(k, _)| k.cmp(to)),
84 ) {
85 (Ok(from), Ok(to)) | (Err(from), Ok(to)) => &self.children[from..=to],
86 (Ok(from), Err(to)) | (Err(from), Err(to)) => &self.children[from..to],
87 }
88 }
89}
90
91/// `SetTries` allow for efficient subset and superset queries. Think of it as a
92/// [`HashMap`](std::collections::HashMap), where you want the key to be within or containing a range.
93///
94/// ```rust
95/// let mut trie = set_trie::SetTrie::new();
96///
97/// trie.insert(&[1, 3, 5], "foo");
98/// trie.insert(&[3], "bar");
99///
100/// assert_eq!(trie.subsets(&[&1, &3, &5, &6]).collect::<Vec<_>>(), vec![&"foo", &"bar"]);
101/// assert_eq!(trie.supersets(&[&5]).collect::<Vec<_>>(), vec![&"foo"])
102/// ```
103///
104/// # Restrictions
105///
106/// Keys are required to be Ord, as the trie stores the nodes in sorted order by key. This means
107/// that the caller must ensure that provided keys are in sorted order, lest nonsensical results be
108/// returned.
109///
110/// # Performance
111///
112/// Subsets and Supersets are lazily evaluated. Note that superset queries are far more expensive
113/// than subset queries, so attempt to structure your problem around subsets.
114#[derive(Debug, Default)]
115pub struct SetTrie<K, T>(Node<K, T>);
116
117impl<K, T> SetTrie<K, T> {
118 /// Create a new, empty `SetTrie`, without allocating any space for the nodes.
119 #[must_use]
120 pub const fn new() -> Self {
121 Self(Node::new())
122 }
123}
124
125impl<K, T> SetTrie<K, T>
126where
127 K: Ord,
128{
129 /// A view into a single node in the trie; which must either be created or already exists.
130 #[must_use]
131 pub fn entry<IK: IntoIterator<Item = K>>(
132 &mut self,
133 keys: IK,
134 ) -> EntryBuilder<K, T, IK::IntoIter> {
135 EntryBuilder::new(self, keys.into_iter())
136 }
137
138 /// Insert the item in the given node. Will create the node if needed.
139 pub fn insert(&mut self, keys: impl IntoIterator<Item = K>, item: T) {
140 self.entry(keys.into_iter()).and_insert(item);
141 }
142
143 /// Inserts multiple items in the given node. More performant that repeatedly calling insert.
144 pub fn insert_many<IK: IntoIterator<Item = K>, IT: IntoIterator<Item = T>>(
145 &mut self,
146 keys: IK,
147 item: IT,
148 ) {
149 self.entry(keys.into_iter()).and_extend(item);
150 }
151
152 /// Iterates over all subsets of `keys` using DFS, meaning that the keys are visited
153 /// in order of the query:
154 ///
155 /// ```rust
156 /// let mut trie = set_trie::SetTrie::new();
157 /// trie.insert(&[1], "foo");
158 /// trie.insert(&[1, 2], "bar");
159 /// trie.insert(&[1, 2, 3], "baz");
160 ///
161 /// assert_eq!(trie.subsets(&[&1, &2, &3]).collect::<Vec<_>>(), vec![&"foo", &"bar", &"baz"]);
162 /// ```
163 #[must_use]
164 pub fn subsets<'a, 'b>(&'a self, keys: &'b [K]) -> Subset<'a, 'b, K, T> {
165 Subset::new(self, keys)
166 }
167
168 /// Iterates over all values in the trie using DFS, meaning that values are visited in order
169 /// of the keys stored in the trie.
170 ///
171 ///
172 /// ```rust
173 /// let mut trie = set_trie::SetTrie::new();
174 /// trie.insert(&[1], "foo");
175 /// trie.insert(&[1, 2], "bar");
176 /// trie.insert(&[1, 2, 3], "baz");
177 ///
178 /// assert_eq!(trie.values().collect::<Vec<_>>(), vec![&"foo", &"bar", &"baz"]);
179 /// ```
180 #[must_use]
181 pub fn values(&self) -> Values<K, T> {
182 Values::new(self)
183 }
184
185 /// Iterates over all supersets of `keys` in the trie using DFS, meaning that values are visited
186 /// in order of the query.
187 ///
188 ///
189 /// ```rust
190 /// let mut trie = set_trie::SetTrie::new();
191 /// trie.insert(&[1], "foo");
192 /// trie.insert(&[1, 2], "bar");
193 /// trie.insert(&[1, 2, 3], "baz");
194 ///
195 /// assert_eq!(trie.supersets(&[&1]).collect::<Vec<_>>(), vec![&"foo", &"bar", &"baz"]);
196 /// ```
197 ///
198 /// # Remarks
199 ///
200 /// Note that the empty set will provide the same result as values. There is currently no fast
201 /// path in the trie, so if you know that your query contains no keys, use [`SetTrie::values`]
202 /// instead.
203 #[must_use]
204 pub fn supersets<'a, 'b>(&'a self, keys: &'b [K]) -> SuperSet<'a, 'b, K, T> {
205 SuperSet::new(self, keys)
206 }
207}
208
209impl<I, K, T> Extend<(I, T)> for SetTrie<K, T>
210where
211 I: IntoIterator<Item = K>,
212 K: Ord,
213{
214 fn extend<F: IntoIterator<Item = (I, T)>>(&mut self, iter: F) {
215 for (k, t) in iter {
216 self.insert(k, t)
217 }
218 }
219}
220
221impl<I, K, T> FromIterator<(I, T)> for SetTrie<K, T>
222where
223 I: IntoIterator<Item = K>,
224 K: Ord,
225{
226 fn from_iter<F: IntoIterator<Item = (I, T)>>(iter: F) -> Self {
227 let mut trie = Self::new();
228 trie.extend(iter);
229 trie
230 }
231}
232
233#[cfg(test)]
234mod tests {
235 use super::*;
236
237 mod doctests {
238 include!(concat!(env!("OUT_DIR"), "/skeptic-tests.rs"));
239 }
240
241 #[test]
242 fn insert() {
243 let mut trie = SetTrie::new();
244 trie.insert(&[1], "c");
245 trie.insert(&[1, 2], "c");
246 trie.insert(&[1, 2, 3], "a");
247 trie.insert(&[1, 2, 3], "b");
248 assert_eq!(trie.entry(&[1, 2, 3]).items(), Some(&vec!["a", "b"]))
249 }
250
251 /// Due to the recursive structure; the default Drop implementation actually causes a stack
252 /// overflow.
253 #[test]
254 fn test_stack_overflow() {
255 let seed = 2000000;
256 let mut trie = SetTrie::new();
257
258 let mut current = trie.entry(0..1).or_insert(0);
259 for i in 1..seed {
260 current = current.entry(i - 1..i).or_insert(i)
261 }
262 }
263}