prefix_array/set.rs
1//! A Set API based on a prefix array, this module contains the [`PrefixArraySet`] type.
2
3#[cfg(any(test, feature = "std"))]
4extern crate std;
5
6extern crate alloc;
7
8use alloc::{borrow::ToOwned, vec::Vec};
9use core::{borrow::Borrow, fmt, ops::Deref};
10
11mod iter;
12pub use iter::{Drain, IntoIter, Iter};
13
14use crate::shared::{PrefixBorrowed, PrefixOwned, ScratchSpace};
15
16/// A generic search-by-prefix array designed to find strings with common prefixes in `O(log n)` time, and easily search on subslices to refine a previous search.
17///
18/// The generic parameter is mainly in place so that `&'a str`, `String`, and `&'static str` may all be used for the backing storage.
19/// It is a logical error for an implementer of `AsRef<str>` to return different data across multiple calls while it remains in this container.
20/// Doing so renders the datastructure useless (but will NOT cause UB).
21///
22/// The main downside of a [`PrefixArraySet`] over a trie type datastructure is that insertions have a significant `O(n)` cost,
23/// so if you are adding multiple values over the lifetime of the [`PrefixArraySet`] it may become less efficient overall than a traditional tree.
24#[derive(PartialEq, Eq)]
25pub struct PrefixArraySet<K: Borrow<str>>(pub(crate) Vec<K>);
26
27impl<K: Borrow<str> + fmt::Debug> fmt::Debug for PrefixArraySet<K> {
28 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
29 write!(f, "PrefixArraySet")?;
30 f.debug_set().entries(self.iter()).finish()
31 }
32}
33
34// Manually impl to get clone_from
35impl<K: Borrow<str> + Clone> Clone for PrefixArraySet<K> {
36 fn clone(&self) -> Self {
37 Self(self.0.clone())
38 }
39
40 fn clone_from(&mut self, other: &Self) {
41 self.0.clone_from(&other.0);
42 }
43}
44
45impl<K: Borrow<str>> Default for PrefixArraySet<K> {
46 fn default() -> Self {
47 PrefixArraySet::new()
48 }
49}
50
51impl<K: Borrow<str>> PrefixArraySet<K> {
52 /// Create a new empty [`PrefixArraySet`].
53 ///
54 /// This function will not allocate anything.
55 #[must_use]
56 pub const fn new() -> Self {
57 Self(Vec::new())
58 }
59
60 /// Creates a new empty [`PrefixArraySet`] with space for at least `capacity` elements.
61 ///
62 /// See [`Vec::with_capacity`] for additional notes.
63 ///
64 /// # Panics:
65 /// Panics if the new capacity exceeds `isize::MAX` bytes.
66 #[must_use]
67 pub fn with_capacity(capacity: usize) -> Self {
68 Self(Vec::with_capacity(capacity))
69 }
70
71 /// Reserves capacity for at least `additional` more elements to be inserted, the collection may reserve additional space as a speculative optimization.
72 /// Does nothing if capacity is already sufficient.
73 ///
74 /// See [`Vec::reserve`] for additional notes.
75 ///
76 /// # Panics:
77 /// Panics if the new capacity exceeds `isize::MAX` bytes.
78 pub fn reserve(&mut self, additional: usize) {
79 self.0.reserve(additional);
80 }
81
82 /// Reserves the minimum capacity to append `additional` more elements. Or, will not speculatively over-allocate like [`reserve`][PrefixArraySet::reserve].
83 /// Does nothing if capacity is already sufficient.
84 ///
85 /// See [`Vec::reserve_exact`] for additional notes.
86 ///
87 /// # Panics:
88 /// Panics if the new capacity exceeds `isize::MAX` bytes.
89 pub fn reserve_exact(&mut self, additional: usize) {
90 self.0.reserve_exact(additional);
91 }
92
93 /// Creates a new [`PrefixArraySet`] from a `Vec<K>`, removing any duplicate keys.
94 ///
95 /// This operation is `O(n log n)`.
96 #[must_use]
97 pub fn from_vec_lossy(v: Vec<K>) -> Self {
98 Self::from_vec_lossy_impl(v)
99 }
100
101 /// Inserts the given K into the [`PrefixArraySet`], returning true if the key was not already in the set
102 ///
103 /// This operation is `O(n)`.
104 pub fn insert(&mut self, key: K) -> bool {
105 self.insert_impl(key).is_none()
106 }
107
108 /// Adds a value to the set, replacing the existing value, if any, that is equal to the given one.
109 /// Returns the replaced value.
110 pub fn replace(&mut self, key: K) -> Option<K> {
111 self.insert_replace_impl(key)
112 }
113
114 /// Removes all values with the prefix provided, shifting the array in the process to account for the empty space.
115 ///
116 /// This operation is `O(n)`.
117 ///
118 /// # Examples
119 /// ```rust
120 /// # use prefix_array::PrefixArraySet;
121 /// let mut set = PrefixArraySet::from_iter(["a", "b", "c"]);
122 ///
123 /// set.drain_all_with_prefix("b");
124 ///
125 /// assert_eq!(set.to_vec(), &["a", "c"]);
126 /// ```
127 pub fn drain_all_with_prefix<'a>(&'a mut self, prefix: &str) -> Drain<'a, K> {
128 let range = self.find_all_with_prefix_idx_impl(prefix);
129
130 Drain(self.0.drain(range))
131 }
132
133 /// Drains all elements of the [`PrefixArraySet`], returning them in an iterator.
134 /// Keeps the backing allocation intact, unlike [`IntoIter`].
135 ///
136 /// When this iterator is dropped it drops all remaining elements.
137 pub fn drain(&mut self) -> Drain<'_, K> {
138 Drain(self.0.drain(..))
139 }
140
141 /// Removes the value that matches the given key, returning true if it was present in the set
142 ///
143 /// This operation is `O(log n)` if the key was not found, and `O(n)` if it was.
144 pub fn remove(&mut self, key: &str) -> bool {
145 self.remove_entry_impl(key).is_some()
146 }
147
148 /// Returns the total capacity that the [`PrefixArraySet`] has.
149 #[must_use]
150 pub fn capacity(&self) -> usize {
151 self.0.capacity()
152 }
153
154 /// Clears the [`PrefixArraySet`], removing all values.
155 ///
156 /// Capacity will not be freed.
157 pub fn clear(&mut self) {
158 self.0.clear();
159 }
160
161 /// Shrinks the capacity of this collection as much as possible.
162 ///
163 /// Additional capacity may still be left over after this operation.
164 pub fn shrink_to_fit(&mut self) {
165 self.0.shrink_to_fit();
166 }
167
168 /// Shrinks the capacity of this collection with a lower limit. It will drop down no lower than the supplied limit.
169 ///
170 /// If the current capacity is less than the lower limit, this is a no-op.
171 pub fn shrink_to(&mut self, min_capacity: usize) {
172 self.0.shrink_to(min_capacity);
173 }
174
175 /// Extends the collection with items from an iterator, this is functionally equivalent to the
176 /// `Extend` implementation and carries the same edge cases, but it allows passing a scratch
177 /// space to potentially avoid reallocations when calling `extend_with` multiple times.
178 pub fn extend_with<I>(&mut self, scratch: &mut ScratchSpace<Self>, iter: I)
179 where
180 I: IntoIterator<Item = K>,
181 {
182 self.extend_with_impl(&mut scratch.0, iter);
183 }
184
185 /// Makes a `PrefixArraySet` from an iterator in which all key items are unique
186 fn from_unique_iter<T: IntoIterator<Item = K>>(v: T) -> Self {
187 Self::from_unique_iter_impl(v)
188 }
189}
190
191impl<K: Borrow<str>> Extend<K> for PrefixArraySet<K> {
192 /// Extends the [`PrefixArraySet`] with more values, skipping updating any duplicates.
193 ///
194 /// It is currently unspecified if two identical values are given, who are not already in the set, which value will be kept.
195 ///
196 /// This operation is `O(n + k log k)` where k is the number of elements in the iterator.
197 fn extend<T>(&mut self, iter: T)
198 where
199 T: IntoIterator<Item = K>,
200 {
201 self.extend_with(&mut ScratchSpace::new(), iter);
202 }
203}
204
205#[cfg(feature = "std")]
206impl<K: Borrow<str>, H> From<std::collections::HashSet<K, H>> for PrefixArraySet<K> {
207 /// Performs a lossless conversion from a `HashSet<K>` to a `PrefixArraySet<K>` in `O(n log n)` time.
208 fn from(v: std::collections::HashSet<K, H>) -> Self {
209 Self::from_unique_iter(v)
210 }
211}
212
213impl<K: Borrow<str>> From<alloc::collections::BTreeSet<K>> for PrefixArraySet<K> {
214 /// Performs a lossless conversion from a `BTreeSet<K>` to a `PrefixArraySet<K>` in `O(n log n)` time.
215 fn from(v: alloc::collections::BTreeSet<K>) -> Self {
216 Self::from_unique_iter(v)
217 }
218}
219
220impl<K: Borrow<str>> From<PrefixArraySet<K>> for Vec<K> {
221 fn from(v: PrefixArraySet<K>) -> Vec<K> {
222 v.0
223 }
224}
225
226impl<K: Borrow<str>> Deref for PrefixArraySet<K> {
227 type Target = SetSubSlice<K>;
228
229 fn deref(&self) -> &Self::Target {
230 SetSubSlice::cast_from_slice_core(&self.0)
231 }
232}
233
234impl<K: Borrow<str>> core::borrow::Borrow<SetSubSlice<K>> for PrefixArraySet<K> {
235 fn borrow(&self) -> &SetSubSlice<K> {
236 self
237 }
238}
239
240impl<K: Borrow<str> + Clone> ToOwned for SetSubSlice<K> {
241 type Owned = PrefixArraySet<K>;
242
243 fn to_owned(&self) -> PrefixArraySet<K> {
244 // here we can assert the invariants were upheld
245 PrefixArraySet(self.to_vec())
246 }
247
248 fn clone_into(&self, target: &mut PrefixArraySet<K>) {
249 self.0.clone_into(&mut target.0);
250 }
251}
252
253impl<K: Borrow<str> + fmt::Debug> fmt::Debug for SetSubSlice<K> {
254 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
255 write!(f, "SetSubSlice")?;
256 f.debug_set().entries(self.iter()).finish()
257 }
258}
259
260/// A subslice of a [`PrefixArraySet`] in which all items contain a common prefix (which may be the unit prefix `""`).
261///
262/// The [`SetSubSlice`] does not store what that common prefix is for performance reasons (but it can be computed, see: [`SetSubSlice::common_prefix`]), it is up to the user to keep track of.
263// SAFETY: this type must remain repr(transparent) to [K] for cast_from_slice(_mut)_core invariants
264#[repr(transparent)]
265#[derive(PartialEq, Eq)]
266pub struct SetSubSlice<K: Borrow<str>>(pub(crate) [K]);
267
268impl<K: Borrow<str>> SetSubSlice<K> {
269 /// creates a shared ref to Self from a ref to backing storage
270 pub(crate) fn cast_from_slice_core(v: &[K]) -> &Self {
271 // SAFETY: this type is repr(transparent), and the lifetimes are both &'a
272 unsafe { &*(v as *const [K] as *const Self) }
273 }
274
275 /// creates an owned ref to Self from a ref to backing storage
276 pub(crate) fn cast_from_slice_mut_core(v: &mut [K]) -> &mut Self {
277 // SAFETY: this type is repr(transparent), and the lifetimes are both &'a
278 unsafe { &mut *(v as *mut [K] as *mut Self) }
279 }
280
281 /// Returns an iterator over all of the elements of this [`SetSubSlice`]
282 pub fn iter(&self) -> Iter<'_, K> {
283 Iter(self.0.iter())
284 }
285
286 /// Creates an owned copy of this [`SetSubSlice`] as a [`Vec`].
287 /// If you wish to preserve [`PrefixArraySet`] semantics consider using [`ToOwned`] instead.
288 pub fn to_vec(&self) -> Vec<K>
289 where
290 K: Clone,
291 {
292 self.0.to_vec()
293 }
294
295 /// Returns the `SetSubSlice` where all `K` have the same prefix `prefix`.
296 ///
297 /// Will return an empty array if there are no matches.
298 ///
299 /// This operation is `O(log n)`
300 ///
301 /// # Examples
302 /// ```rust
303 /// # use prefix_array::PrefixArraySet;
304 /// let set = PrefixArraySet::from_iter(["foo", "bar", "baz"]);
305 ///
306 /// assert_eq!(set.find_all_with_prefix("b").to_vec(), vec!["bar", "baz"]);
307 /// ```
308 pub fn find_all_with_prefix<'a>(&'a self, prefix: &str) -> &'a Self {
309 let range = self.find_all_with_prefix_idx_impl(prefix);
310 self.reslice(range)
311 }
312
313 /// Compute the common prefix of this [`SetSubSlice`] from the data.
314 /// Will return an empty string if this subslice is empty.
315 ///
316 /// Note that this may be more specific than what was searched for, i/e:
317 /// ```rust
318 /// # use prefix_array::PrefixArraySet;
319 /// let arr = PrefixArraySet::from_iter(["12346", "12345", "12341"]);
320 /// // Common prefix is *computed*, so even though we only
321 /// // searched for "12" we got something more specific
322 /// assert_eq!(arr.find_all_with_prefix("12").common_prefix(), "1234");
323 /// ```
324 ///
325 /// This operation is `O(1)`, but it is not computationally free.
326 pub fn common_prefix(&self) -> &str {
327 self.common_prefix_impl()
328 }
329
330 /// Returns whether this [`SetSubSlice`] contains the given key
331 ///
332 /// This operation is `O(log n)`.
333 pub fn contains(&self, key: &str) -> bool {
334 self.contains_key_impl(key)
335 }
336
337 /// Returns whether this [`SetSubSlice`] is empty
338 pub const fn is_empty(&self) -> bool {
339 self.0.is_empty()
340 }
341
342 /// Returns the length of this [`SetSubSlice`]
343 pub const fn len(&self) -> usize {
344 self.0.len()
345 }
346}
347
348#[cfg(test)]
349mod tests {
350 use super::*;
351
352 use alloc::vec;
353
354 #[test]
355 fn replace_replaces() {
356 #[derive(Debug)]
357 struct TrackerStr<'a>(&'a str, u64);
358
359 impl core::borrow::Borrow<str> for TrackerStr<'_> {
360 fn borrow(&self) -> &str {
361 self.0
362 }
363 }
364
365 let mut parray = PrefixArraySet::from_iter([TrackerStr("abc", 0)]);
366
367 assert!(matches!(
368 parray.replace(TrackerStr("abc", 1)),
369 Some(TrackerStr("abc", 0))
370 ));
371
372 assert!(parray.contains("abc"));
373 }
374
375 #[test]
376 fn submatches() {
377 let parray = PrefixArraySet::from_vec_lossy(vec![
378 "abcde", "234234", "among", "we", "weed", "who", "what", "why", "abde", "abch",
379 "america",
380 ]);
381
382 assert_eq!(
383 &["abcde", "abch", "abde"],
384 &*parray.find_all_with_prefix("ab").to_vec()
385 );
386
387 assert_eq!("ab", parray.find_all_with_prefix("ab").common_prefix());
388
389 let mut parraysingle = PrefixArraySet::from_vec_lossy(vec!["abcde"]);
390
391 assert_eq!("abcde", parraysingle.to_vec()[0]);
392 assert_eq!(
393 &["abcde"],
394 &*parraysingle.find_all_with_prefix("a").to_vec()
395 );
396
397 assert!(parraysingle.find_all_with_prefix("b").is_empty());
398
399 _ = parraysingle.drain_all_with_prefix("a");
400
401 assert!(parraysingle.is_empty());
402 }
403
404 #[test]
405 fn is_eq() {
406 let arr1 = PrefixArraySet::from_iter(["abcde", "among"]);
407 let arr2 = PrefixArraySet::from_iter(["abcde", "among"]);
408
409 assert_eq!(arr1, arr2);
410
411 let arr3 = PrefixArraySet::new();
412
413 assert_ne!(&*arr3, &*arr2);
414 }
415}