prefix_array/
set.rs

1//! A Set API based on a prefix array, this module contains the [`PrefixArraySet`] type.
2
3#[cfg(any(test, feature = "std"))]
4extern crate std;
5
6extern crate alloc;
7
8use alloc::{borrow::ToOwned, vec::Vec};
9use core::{borrow::Borrow, fmt, ops::Deref};
10
11mod iter;
12pub use iter::{Drain, IntoIter, Iter};
13
14use crate::shared::{PrefixBorrowed, PrefixOwned, ScratchSpace};
15
16/// A generic search-by-prefix array designed to find strings with common prefixes in `O(log n)` time, and easily search on subslices to refine a previous search.
17///
18/// The generic parameter is mainly in place so that `&'a str`, `String`, and `&'static str` may all be used for the backing storage.
19///  It is a logical error for an implementer of `AsRef<str>` to return different data across multiple calls while it remains in this container.
20///  Doing so renders the datastructure useless (but will NOT cause UB).
21///
22/// The main downside of a [`PrefixArraySet`] over a trie type datastructure is that insertions have a significant `O(n)` cost,
23/// so if you are adding multiple values over the lifetime of the [`PrefixArraySet`] it may become less efficient overall than a traditional tree.
24#[derive(PartialEq, Eq)]
25pub struct PrefixArraySet<K: Borrow<str>>(pub(crate) Vec<K>);
26
27impl<K: Borrow<str> + fmt::Debug> fmt::Debug for PrefixArraySet<K> {
28    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
29        write!(f, "PrefixArraySet")?;
30        f.debug_set().entries(self.iter()).finish()
31    }
32}
33
34// Manually impl to get clone_from
35impl<K: Borrow<str> + Clone> Clone for PrefixArraySet<K> {
36    fn clone(&self) -> Self {
37        Self(self.0.clone())
38    }
39
40    fn clone_from(&mut self, other: &Self) {
41        self.0.clone_from(&other.0);
42    }
43}
44
45impl<K: Borrow<str>> Default for PrefixArraySet<K> {
46    fn default() -> Self {
47        PrefixArraySet::new()
48    }
49}
50
51impl<K: Borrow<str>> PrefixArraySet<K> {
52    /// Create a new empty [`PrefixArraySet`].
53    ///
54    /// This function will not allocate anything.
55    #[must_use]
56    pub const fn new() -> Self {
57        Self(Vec::new())
58    }
59
60    /// Creates a new empty [`PrefixArraySet`] with space for at least `capacity` elements.
61    ///
62    /// See [`Vec::with_capacity`] for additional notes.
63    ///
64    /// # Panics:
65    /// Panics if the new capacity exceeds `isize::MAX` bytes.
66    #[must_use]
67    pub fn with_capacity(capacity: usize) -> Self {
68        Self(Vec::with_capacity(capacity))
69    }
70
71    /// Reserves capacity for at least `additional` more elements to be inserted, the collection may reserve additional space as a speculative optimization.
72    /// Does nothing if capacity is already sufficient.
73    ///
74    /// See [`Vec::reserve`] for additional notes.
75    ///
76    /// # Panics:
77    /// Panics if the new capacity exceeds `isize::MAX` bytes.
78    pub fn reserve(&mut self, additional: usize) {
79        self.0.reserve(additional);
80    }
81
82    /// Reserves the minimum capacity to append `additional` more elements. Or, will not speculatively over-allocate like [`reserve`][PrefixArraySet::reserve].
83    /// Does nothing if capacity is already sufficient.
84    ///
85    /// See [`Vec::reserve_exact`] for additional notes.
86    ///
87    /// # Panics:
88    /// Panics if the new capacity exceeds `isize::MAX` bytes.
89    pub fn reserve_exact(&mut self, additional: usize) {
90        self.0.reserve_exact(additional);
91    }
92
93    /// Creates a new [`PrefixArraySet`] from a `Vec<K>`, removing any duplicate keys.
94    ///
95    /// This operation is `O(n log n)`.
96    #[must_use]
97    pub fn from_vec_lossy(v: Vec<K>) -> Self {
98        Self::from_vec_lossy_impl(v)
99    }
100
101    /// Inserts the given K into the [`PrefixArraySet`], returning true if the key was not already in the set
102    ///
103    /// This operation is `O(n)`.
104    pub fn insert(&mut self, key: K) -> bool {
105        self.insert_impl(key).is_none()
106    }
107
108    /// Adds a value to the set, replacing the existing value, if any, that is equal to the given one.
109    /// Returns the replaced value.
110    pub fn replace(&mut self, key: K) -> Option<K> {
111        self.insert_replace_impl(key)
112    }
113
114    /// Removes all values with the prefix provided, shifting the array in the process to account for the empty space.
115    ///
116    /// This operation is `O(n)`.
117    ///
118    /// # Examples
119    /// ```rust
120    /// # use prefix_array::PrefixArraySet;
121    /// let mut set = PrefixArraySet::from_iter(["a", "b", "c"]);
122    ///
123    /// set.drain_all_with_prefix("b");
124    ///
125    /// assert_eq!(set.to_vec(), &["a", "c"]);
126    /// ```
127    pub fn drain_all_with_prefix<'a>(&'a mut self, prefix: &str) -> Drain<'a, K> {
128        let range = self.find_all_with_prefix_idx_impl(prefix);
129
130        Drain(self.0.drain(range))
131    }
132
133    /// Drains all elements of the [`PrefixArraySet`], returning them in an iterator.
134    /// Keeps the backing allocation intact, unlike [`IntoIter`].
135    ///
136    /// When this iterator is dropped it drops all remaining elements.
137    pub fn drain(&mut self) -> Drain<'_, K> {
138        Drain(self.0.drain(..))
139    }
140
141    /// Removes the value that matches the given key, returning true if it was present in the set
142    ///
143    /// This operation is `O(log n)` if the key was not found, and `O(n)` if it was.
144    pub fn remove(&mut self, key: &str) -> bool {
145        self.remove_entry_impl(key).is_some()
146    }
147
148    /// Returns the total capacity that the [`PrefixArraySet`] has.
149    #[must_use]
150    pub fn capacity(&self) -> usize {
151        self.0.capacity()
152    }
153
154    /// Clears the [`PrefixArraySet`], removing all values.
155    ///
156    /// Capacity will not be freed.
157    pub fn clear(&mut self) {
158        self.0.clear();
159    }
160
161    /// Shrinks the capacity of this collection as much as possible.
162    ///
163    /// Additional capacity may still be left over after this operation.
164    pub fn shrink_to_fit(&mut self) {
165        self.0.shrink_to_fit();
166    }
167
168    /// Shrinks the capacity of this collection with a lower limit. It will drop down no lower than the supplied limit.
169    ///
170    /// If the current capacity is less than the lower limit, this is a no-op.
171    pub fn shrink_to(&mut self, min_capacity: usize) {
172        self.0.shrink_to(min_capacity);
173    }
174
175    /// Extends the collection with items from an iterator, this is functionally equivalent to the
176    /// `Extend` implementation and carries the same edge cases, but it allows passing a scratch
177    /// space to potentially avoid reallocations when calling `extend_with` multiple times.
178    pub fn extend_with<I>(&mut self, scratch: &mut ScratchSpace<Self>, iter: I)
179    where
180        I: IntoIterator<Item = K>,
181    {
182        self.extend_with_impl(&mut scratch.0, iter);
183    }
184
185    /// Makes a `PrefixArraySet` from an iterator in which all key items are unique
186    fn from_unique_iter<T: IntoIterator<Item = K>>(v: T) -> Self {
187        Self::from_unique_iter_impl(v)
188    }
189}
190
191impl<K: Borrow<str>> Extend<K> for PrefixArraySet<K> {
192    /// Extends the [`PrefixArraySet`] with more values, skipping updating any duplicates.
193    ///
194    /// It is currently unspecified if two identical values are given, who are not already in the set, which value will be kept.
195    ///
196    /// This operation is `O(n + k log k)` where k is the number of elements in the iterator.
197    fn extend<T>(&mut self, iter: T)
198    where
199        T: IntoIterator<Item = K>,
200    {
201        self.extend_with(&mut ScratchSpace::new(), iter);
202    }
203}
204
205#[cfg(feature = "std")]
206impl<K: Borrow<str>, H> From<std::collections::HashSet<K, H>> for PrefixArraySet<K> {
207    /// Performs a lossless conversion from a `HashSet<K>` to a `PrefixArraySet<K>` in `O(n log n)` time.
208    fn from(v: std::collections::HashSet<K, H>) -> Self {
209        Self::from_unique_iter(v)
210    }
211}
212
213impl<K: Borrow<str>> From<alloc::collections::BTreeSet<K>> for PrefixArraySet<K> {
214    /// Performs a lossless conversion from a `BTreeSet<K>` to a `PrefixArraySet<K>` in `O(n log n)` time.
215    fn from(v: alloc::collections::BTreeSet<K>) -> Self {
216        Self::from_unique_iter(v)
217    }
218}
219
220impl<K: Borrow<str>> From<PrefixArraySet<K>> for Vec<K> {
221    fn from(v: PrefixArraySet<K>) -> Vec<K> {
222        v.0
223    }
224}
225
226impl<K: Borrow<str>> Deref for PrefixArraySet<K> {
227    type Target = SetSubSlice<K>;
228
229    fn deref(&self) -> &Self::Target {
230        SetSubSlice::cast_from_slice_core(&self.0)
231    }
232}
233
234impl<K: Borrow<str>> core::borrow::Borrow<SetSubSlice<K>> for PrefixArraySet<K> {
235    fn borrow(&self) -> &SetSubSlice<K> {
236        self
237    }
238}
239
240impl<K: Borrow<str> + Clone> ToOwned for SetSubSlice<K> {
241    type Owned = PrefixArraySet<K>;
242
243    fn to_owned(&self) -> PrefixArraySet<K> {
244        // here we can assert the invariants were upheld
245        PrefixArraySet(self.to_vec())
246    }
247
248    fn clone_into(&self, target: &mut PrefixArraySet<K>) {
249        self.0.clone_into(&mut target.0);
250    }
251}
252
253impl<K: Borrow<str> + fmt::Debug> fmt::Debug for SetSubSlice<K> {
254    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
255        write!(f, "SetSubSlice")?;
256        f.debug_set().entries(self.iter()).finish()
257    }
258}
259
260/// A subslice of a [`PrefixArraySet`] in which all items contain a common prefix (which may be the unit prefix `""`).
261///
262/// The [`SetSubSlice`] does not store what that common prefix is for performance reasons (but it can be computed, see: [`SetSubSlice::common_prefix`]), it is up to the user to keep track of.
263// SAFETY: this type must remain repr(transparent) to [K] for cast_from_slice(_mut)_core invariants
264#[repr(transparent)]
265#[derive(PartialEq, Eq)]
266pub struct SetSubSlice<K: Borrow<str>>(pub(crate) [K]);
267
268impl<K: Borrow<str>> SetSubSlice<K> {
269    /// creates a shared ref to Self from a ref to backing storage
270    pub(crate) fn cast_from_slice_core(v: &[K]) -> &Self {
271        // SAFETY: this type is repr(transparent), and the lifetimes are both &'a
272        unsafe { &*(v as *const [K] as *const Self) }
273    }
274
275    /// creates an owned ref to Self from a ref to backing storage
276    pub(crate) fn cast_from_slice_mut_core(v: &mut [K]) -> &mut Self {
277        // SAFETY: this type is repr(transparent), and the lifetimes are both &'a
278        unsafe { &mut *(v as *mut [K] as *mut Self) }
279    }
280
281    /// Returns an iterator over all of the elements of this [`SetSubSlice`]
282    pub fn iter(&self) -> Iter<'_, K> {
283        Iter(self.0.iter())
284    }
285
286    /// Creates an owned copy of this [`SetSubSlice`] as a [`Vec`].
287    /// If you wish to preserve [`PrefixArraySet`] semantics consider using [`ToOwned`] instead.
288    pub fn to_vec(&self) -> Vec<K>
289    where
290        K: Clone,
291    {
292        self.0.to_vec()
293    }
294
295    /// Returns the `SetSubSlice` where all `K` have the same prefix `prefix`.
296    ///
297    /// Will return an empty array if there are no matches.
298    ///
299    /// This operation is `O(log n)`
300    ///
301    /// # Examples
302    /// ```rust
303    /// # use prefix_array::PrefixArraySet;
304    /// let set = PrefixArraySet::from_iter(["foo", "bar", "baz"]);
305    ///
306    /// assert_eq!(set.find_all_with_prefix("b").to_vec(), vec!["bar", "baz"]);
307    /// ```
308    pub fn find_all_with_prefix<'a>(&'a self, prefix: &str) -> &'a Self {
309        let range = self.find_all_with_prefix_idx_impl(prefix);
310        self.reslice(range)
311    }
312
313    /// Compute the common prefix of this [`SetSubSlice`] from the data.
314    /// Will return an empty string if this subslice is empty.
315    ///
316    /// Note that this may be more specific than what was searched for, i/e:
317    /// ```rust
318    /// # use prefix_array::PrefixArraySet;
319    /// let arr = PrefixArraySet::from_iter(["12346", "12345", "12341"]);
320    /// // Common prefix is *computed*, so even though we only
321    /// //  searched for "12" we got something more specific
322    /// assert_eq!(arr.find_all_with_prefix("12").common_prefix(), "1234");
323    /// ```
324    ///
325    /// This operation is `O(1)`, but it is not computationally free.
326    pub fn common_prefix(&self) -> &str {
327        self.common_prefix_impl()
328    }
329
330    /// Returns whether this [`SetSubSlice`] contains the given key
331    ///
332    /// This operation is `O(log n)`.
333    pub fn contains(&self, key: &str) -> bool {
334        self.contains_key_impl(key)
335    }
336
337    /// Returns whether this [`SetSubSlice`] is empty
338    pub const fn is_empty(&self) -> bool {
339        self.0.is_empty()
340    }
341
342    /// Returns the length of this [`SetSubSlice`]
343    pub const fn len(&self) -> usize {
344        self.0.len()
345    }
346}
347
348#[cfg(test)]
349mod tests {
350    use super::*;
351
352    use alloc::vec;
353
354    #[test]
355    fn replace_replaces() {
356        #[derive(Debug)]
357        struct TrackerStr<'a>(&'a str, u64);
358
359        impl core::borrow::Borrow<str> for TrackerStr<'_> {
360            fn borrow(&self) -> &str {
361                self.0
362            }
363        }
364
365        let mut parray = PrefixArraySet::from_iter([TrackerStr("abc", 0)]);
366
367        assert!(matches!(
368            parray.replace(TrackerStr("abc", 1)),
369            Some(TrackerStr("abc", 0))
370        ));
371
372        assert!(parray.contains("abc"));
373    }
374
375    #[test]
376    fn submatches() {
377        let parray = PrefixArraySet::from_vec_lossy(vec![
378            "abcde", "234234", "among", "we", "weed", "who", "what", "why", "abde", "abch",
379            "america",
380        ]);
381
382        assert_eq!(
383            &["abcde", "abch", "abde"],
384            &*parray.find_all_with_prefix("ab").to_vec()
385        );
386
387        assert_eq!("ab", parray.find_all_with_prefix("ab").common_prefix());
388
389        let mut parraysingle = PrefixArraySet::from_vec_lossy(vec!["abcde"]);
390
391        assert_eq!("abcde", parraysingle.to_vec()[0]);
392        assert_eq!(
393            &["abcde"],
394            &*parraysingle.find_all_with_prefix("a").to_vec()
395        );
396
397        assert!(parraysingle.find_all_with_prefix("b").is_empty());
398
399        _ = parraysingle.drain_all_with_prefix("a");
400
401        assert!(parraysingle.is_empty());
402    }
403
404    #[test]
405    fn is_eq() {
406        let arr1 = PrefixArraySet::from_iter(["abcde", "among"]);
407        let arr2 = PrefixArraySet::from_iter(["abcde", "among"]);
408
409        assert_eq!(arr1, arr2);
410
411        let arr3 = PrefixArraySet::new();
412
413        assert_ne!(&*arr3, &*arr2);
414    }
415}