composable_indexes/index/
suffix_tree.rs

1use alloc::collections::BTreeMap;
2use alloc::rc::Rc;
3use alloc::string::{String, ToString};
4use alloc::vec::Vec;
5use hashbrown::HashSet;
6
7use crate::{
8    Key,
9    core::{Index, Seal},
10    index::generic::{DefaultKeySet, KeySet},
11};
12
13/// An index for contains_string queries backed by a suffix tree.
14pub struct SuffixTree<KeySet_ = DefaultKeySet> {
15    suffix_tree: BTreeMap<Suffix<'static>, KeySet_>,
16}
17
18impl<KeySet_: KeySet> Default for SuffixTree<KeySet_> {
19    fn default() -> Self {
20        SuffixTree {
21            suffix_tree: BTreeMap::new(),
22        }
23    }
24}
25
26impl SuffixTree<DefaultKeySet> {
27    pub fn new() -> Self {
28        Self::default()
29    }
30}
31
32impl<KeySet_> SuffixTree<KeySet_>
33where
34    KeySet_: KeySet,
35{
36    pub fn with_keyset() -> Self {
37        SuffixTree {
38            suffix_tree: BTreeMap::new(),
39        }
40    }
41}
42
43impl<KeySet_> Index<String> for SuffixTree<KeySet_>
44where
45    KeySet_: crate::index::generic::KeySet,
46{
47    #[inline]
48    fn insert(&mut self, _seal: Seal, op: &crate::core::Insert<String>) {
49        let suffixes = Suffix::all_suffixes(op.new);
50        for suffix in suffixes {
51            self.suffix_tree.entry(suffix).or_default().insert(op.key);
52        }
53    }
54
55    #[inline]
56    fn remove(&mut self, _seal: Seal, op: &crate::core::Remove<String>) {
57        let suffixes = Suffix::all_suffixes(op.existing);
58        for suffix in suffixes {
59            let key_set = self.suffix_tree.get_mut(&suffix).unwrap();
60            key_set.remove(&op.key);
61            if key_set.is_empty() {
62                self.suffix_tree.remove(&suffix);
63            }
64        }
65    }
66}
67
68impl<KeySet_> SuffixTree<KeySet_>
69where
70    KeySet_: KeySet,
71{
72    pub fn contains_get_all(&self, pattern: &str) -> HashSet<Key> {
73        let suffix = Suffix::Ref { suffix: pattern };
74        self.suffix_tree
75            .range(suffix..)
76            .next()
77            .and_then(|(suffix, key_set)| {
78                if suffix.as_ref().starts_with(pattern) {
79                    Some(key_set.iter().collect())
80                } else {
81                    None
82                }
83            })
84            .unwrap_or_default()
85    }
86
87    pub fn contains_get_one(&self, pattern: &str) -> Option<Key> {
88        let suffix = Suffix::Ref { suffix: pattern };
89        self.suffix_tree
90            .range(suffix..)
91            .next()
92            .and_then(|(suffix, key_set)| {
93                if suffix.as_ref().starts_with(pattern) {
94                    key_set.iter().next()
95                } else {
96                    None
97                }
98            })
99    }
100
101    pub fn ends_with_get_one(&self, pattern: &str) -> Option<Key> {
102        let suffix = Suffix::Ref { suffix: pattern };
103        self.suffix_tree
104            .get(&suffix)
105            .and_then(|key_set| key_set.iter().next())
106    }
107
108    pub fn ends_with_get_all(&self, pattern: &str) -> HashSet<Key> {
109        let suffix = Suffix::Ref { suffix: pattern };
110        self.suffix_tree
111            .get(&suffix)
112            .map(|key_set| key_set.iter().collect())
113            .unwrap_or_default()
114    }
115
116    pub fn count_distinct_suffixes(&self) -> usize {
117        self.suffix_tree.len()
118    }
119}
120
121enum Suffix<'a> {
122    Owned { base: Rc<String>, index: usize },
123    Ref { suffix: &'a str },
124}
125
126impl AsRef<str> for Suffix<'_> {
127    fn as_ref(&self) -> &str {
128        match self {
129            Suffix::Owned { base, index } => &base[*index..],
130            Suffix::Ref { suffix } => suffix,
131        }
132    }
133}
134
135impl Suffix<'_> {
136    fn all_suffixes(s: &str) -> Vec<Suffix<'static>> {
137        let mut suffixes = Vec::new();
138
139        let base = Rc::new(s.to_string());
140        for (i, _) in s.char_indices() {
141            suffixes.push(Suffix::Owned {
142                base: Rc::clone(&base),
143                index: i,
144            });
145        }
146        suffixes
147    }
148}
149
150impl PartialEq for Suffix<'_> {
151    fn eq(&self, other: &Self) -> bool {
152        self.as_ref() == other.as_ref()
153    }
154}
155
156impl Eq for Suffix<'_> {}
157
158impl PartialOrd for Suffix<'_> {
159    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
160        Some(self.cmp(other))
161    }
162}
163
164impl Ord for Suffix<'_> {
165    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
166        self.as_ref().cmp(other.as_ref())
167    }
168}
169
170#[cfg(test)]
171mod tests {
172    use crate::testutils::{SortedVec, prop_assert_reference};
173
174    use super::*;
175
176    #[test]
177    fn test_contains_ref() {
178        prop_assert_reference(
179            SuffixTree::new,
180            |db| {
181                db.query(|ix| ix.contains_get_all("aaa"))
182                    .into_iter()
183                    .cloned()
184                    .collect::<SortedVec<_>>()
185            },
186            |data| {
187                data.iter()
188                    .filter(|s| s.contains("aaa"))
189                    .cloned()
190                    .collect::<SortedVec<_>>()
191            },
192            None,
193        );
194    }
195}