arbor_graph/
search_index.rs1use crate::graph::NodeId;
7use std::collections::{HashMap, HashSet};
8
9const MIN_NGRAM_LEN: usize = 2;
11
12const MAX_NGRAM_LEN: usize = 4;
14
15#[derive(Debug, Default, Clone)]
21pub struct SearchIndex {
22 exact_index: HashMap<String, Vec<NodeId>>,
24 ngram_index: HashMap<String, HashSet<NodeId>>,
26}
27
28impl SearchIndex {
29 pub fn new() -> Self {
31 Self::default()
32 }
33
34 pub fn insert(&mut self, name: &str, id: NodeId) {
36 let lower = name.to_lowercase();
37
38 self.exact_index.entry(lower.clone()).or_default().push(id);
40
41 for ngram in self.generate_ngrams(&lower) {
43 self.ngram_index.entry(ngram).or_default().insert(id);
44 }
45 }
46
47 pub fn remove(&mut self, name: &str, id: NodeId) {
49 let lower = name.to_lowercase();
50
51 if let Some(ids) = self.exact_index.get_mut(&lower) {
53 ids.retain(|&x| x != id);
54 if ids.is_empty() {
55 self.exact_index.remove(&lower);
56 }
57 }
58
59 for ngram in self.generate_ngrams(&lower) {
61 if let Some(ids) = self.ngram_index.get_mut(&ngram) {
62 ids.remove(&id);
63 if ids.is_empty() {
64 self.ngram_index.remove(&ngram);
65 }
66 }
67 }
68 }
69
70 pub fn search(&self, query: &str) -> Vec<NodeId> {
74 if query.is_empty() {
75 return Vec::new();
76 }
77
78 let query_lower = query.to_lowercase();
79
80 if query_lower.len() < MIN_NGRAM_LEN {
82 let mut results: Vec<NodeId> = self
83 .exact_index
84 .iter()
85 .filter(|(name, _)| name.starts_with(&query_lower))
86 .flat_map(|(_, ids)| ids.iter().copied())
87 .collect();
88 results.sort();
89 results.dedup();
90 return results;
91 }
92
93 let query_ngrams: Vec<String> = self.generate_ngrams(&query_lower);
95
96 if query_ngrams.is_empty() {
97 return Vec::new();
98 }
99
100 let mut candidates: Option<HashSet<NodeId>> = None;
102
103 for ngram in &query_ngrams {
104 if let Some(ids) = self.ngram_index.get(ngram) {
105 match &mut candidates {
106 None => candidates = Some(ids.clone()),
107 Some(c) => {
108 c.retain(|id| ids.contains(id));
109 }
110 }
111 } else {
112 return Vec::new();
114 }
115 }
116
117 let mut results: Vec<NodeId> = candidates
120 .unwrap_or_default()
121 .into_iter()
122 .filter(|id| {
123 self.exact_index
124 .iter()
125 .any(|(name, ids)| ids.contains(id) && name.contains(&query_lower))
126 })
127 .collect();
128
129 results.sort();
130 results
131 }
132
133 fn generate_ngrams(&self, s: &str) -> Vec<String> {
135 let chars: Vec<char> = s.chars().collect();
136 let mut ngrams = Vec::new();
137
138 for n in MIN_NGRAM_LEN..=MAX_NGRAM_LEN {
139 if chars.len() >= n {
140 for i in 0..=(chars.len() - n) {
141 ngrams.push(chars[i..i + n].iter().collect());
142 }
143 }
144 }
145
146 ngrams
147 }
148
149 pub fn len(&self) -> usize {
151 self.exact_index.len()
152 }
153
154 pub fn is_empty(&self) -> bool {
156 self.exact_index.is_empty()
157 }
158}
159
160#[cfg(test)]
161mod tests {
162 use super::*;
163 use petgraph::graph::NodeIndex;
164
165 fn node_id(n: u32) -> NodeId {
166 NodeIndex::new(n as usize)
167 }
168
169 #[test]
170 fn test_insert_and_search_exact() {
171 let mut index = SearchIndex::new();
172 index.insert("validate_user", node_id(0));
173 index.insert("validate_email", node_id(1));
174 index.insert("send_email", node_id(2));
175
176 let results = index.search("validate_user");
177 assert_eq!(results, vec![node_id(0)]);
178 }
179
180 #[test]
181 fn test_search_substring() {
182 let mut index = SearchIndex::new();
183 index.insert("validate_user", node_id(0));
184 index.insert("validate_email", node_id(1));
185 index.insert("send_email", node_id(2));
186
187 let results = index.search("validate");
188 assert!(results.contains(&node_id(0)));
189 assert!(results.contains(&node_id(1)));
190 assert!(!results.contains(&node_id(2)));
191 }
192
193 #[test]
194 fn test_search_case_insensitive() {
195 let mut index = SearchIndex::new();
196 index.insert("ValidateUser", node_id(0));
197
198 let results = index.search("validateuser");
199 assert_eq!(results, vec![node_id(0)]);
200
201 let results = index.search("VALIDATEUSER");
202 assert_eq!(results, vec![node_id(0)]);
203 }
204
205 #[test]
206 fn test_search_middle_substring() {
207 let mut index = SearchIndex::new();
208 index.insert("get_user_profile", node_id(0));
209
210 let results = index.search("user");
211 assert_eq!(results, vec![node_id(0)]);
212
213 let results = index.search("_user_");
214 assert_eq!(results, vec![node_id(0)]);
215 }
216
217 #[test]
218 fn test_remove_from_index() {
219 let mut index = SearchIndex::new();
220 index.insert("foo", node_id(0));
221 index.insert("foobar", node_id(1));
222
223 index.remove("foo", node_id(0));
224
225 let results = index.search("foo");
226 assert!(!results.contains(&node_id(0)));
227 assert!(results.contains(&node_id(1)));
228 }
229
230 #[test]
231 fn test_search_no_match() {
232 let mut index = SearchIndex::new();
233 index.insert("hello", node_id(0));
234
235 let results = index.search("world");
236 assert!(results.is_empty());
237 }
238
239 #[test]
240 fn test_short_query() {
241 let mut index = SearchIndex::new();
242 index.insert("ab", node_id(0));
243 index.insert("abc", node_id(1));
244 index.insert("xyz", node_id(2));
245
246 let results = index.search("a");
248 assert!(results.contains(&node_id(0)));
249 assert!(results.contains(&node_id(1)));
250 assert!(!results.contains(&node_id(2)));
251 }
252
253 #[test]
254 fn test_index_len_and_is_empty() {
255 let mut index = SearchIndex::new();
256 assert!(index.is_empty());
257 assert_eq!(index.len(), 0);
258
259 index.insert("foo", node_id(0));
260 assert!(!index.is_empty());
261 assert_eq!(index.len(), 1);
262
263 index.insert("bar", node_id(1));
264 assert_eq!(index.len(), 2);
265 }
266
267 #[test]
268 fn test_duplicate_name_different_ids() {
269 let mut index = SearchIndex::new();
270 index.insert("process", node_id(0));
272 index.insert("process", node_id(1));
273
274 let results = index.search("process");
275 assert!(results.contains(&node_id(0)));
276 assert!(results.contains(&node_id(1)));
277 assert_eq!(index.len(), 1);
279 }
280
281 #[test]
282 fn test_remove_nonexistent_does_not_panic() {
283 let mut index = SearchIndex::new();
284 index.remove("nonexistent", node_id(99));
286 assert!(index.is_empty());
287 }
288
289 #[test]
290 fn test_search_empty_query() {
291 let mut index = SearchIndex::new();
292 index.insert("hello", node_id(0));
293
294 let results = index.search("");
296 assert!(results.is_empty());
297 }
298}