Skip to main content

mcp_memory/
search.rs

1use crate::intern::StrId;
2
3/// Inverted word index for fast entity search.
4///
5/// For each entity, we tokenize its name, type, and observations,
6/// store each token → set of matching entity indices.
7///
8/// Uses a flat `Vec<(StrId, u32)>` sorted by (token, entity_idx)
9/// for cache-friendly lookups via binary search.
10pub struct SearchIndex {
11    // Sorted by (token, entity_idx), no duplicates.
12    entries: Vec<(StrId, u32)>,
13    lower_buf: Vec<u8>,
14}
15
16impl SearchIndex {
17    pub fn new() -> Self {
18        Self {
19            entries: Vec::new(),
20            lower_buf: Vec::with_capacity(256),
21        }
22    }
23
24    pub fn clear(&mut self) {
25        self.entries.clear();
26    }
27
28    pub const fn len(&self) -> usize {
29        self.entries.len()
30    }
31
32    pub const fn is_empty(&self) -> bool {
33        self.entries.is_empty()
34    }
35
36    /// Index a single entity by its name, type, and observations.
37    /// All strings must already be interned.
38    /// `entity_idx` is the position in the entity storage vec.
39    pub fn index_entity(
40        &mut self,
41        interner: &mut crate::intern::StringInterner,
42        entity_idx: u32,
43        name: StrId,
44        entity_type: StrId,
45        observations: &[StrId],
46    ) {
47        self.tokenize_and_insert(interner, entity_idx, name);
48        self.tokenize_and_insert(interner, entity_idx, entity_type);
49        for &obs in observations {
50            self.tokenize_and_insert(interner, entity_idx, obs);
51        }
52    }
53
54    /// Remove all entries for a given entity (before re-indexing).
55    pub fn remove_entity(&mut self, entity_idx: u32) {
56        self.entries.retain(|&(_, idx)| idx != entity_idx);
57    }
58
59    /// Search for entities matching the query.
60    /// Uses binary search for exact token matches (O(log n + matches))
61    /// and falls back to prefix matching for partial queries.
62    pub fn search(&self, query: &str, interner: &crate::intern::StringInterner) -> Vec<u32> {
63        if query.is_empty() || self.entries.is_empty() {
64            return Vec::new();
65        }
66
67        let lower_query: String = query.to_ascii_lowercase();
68
69        // Fast path: exact token match via binary search
70        let mut matched = if let Some(token_id) = interner.get_optional(&lower_query) {
71            let range_begin = self.entries.binary_search_by(|(t, _)| t.cmp(&token_id));
72            let range_end = self.entries.binary_search_by(|(t, _)| {
73                if *t <= token_id { std::cmp::Ordering::Less } else { std::cmp::Ordering::Greater }
74            });
75            if let (Ok(begin), Err(end)) = (range_begin, range_end) {
76                self.entries[begin..end].iter().map(|&(_, idx)| idx).collect()
77            } else {
78                Vec::new()
79            }
80        } else {
81            Vec::new()
82        };
83
84        // Prefix match scan for tokens starting with the query
85        for &(token_id, entity_idx) in &self.entries {
86            if matched.last().is_none_or(|&last| last != entity_idx) {
87                let token = interner.lookup(token_id);
88                if token.len() >= lower_query.len()
89                    && token.as_bytes().starts_with(lower_query.as_bytes())
90                {
91                    matched.push(entity_idx);
92                }
93            }
94        }
95
96        matched.sort_unstable();
97        matched.dedup();
98        matched
99    }
100
101    fn tokenize_and_insert(
102        &mut self,
103        interner: &mut crate::intern::StringInterner,
104        entity_idx: u32,
105        text: StrId,
106    ) {
107        let s = interner.lookup(text);
108        if s.is_empty() {
109            return;
110        }
111
112        self.lower_buf.clear();
113        self.lower_buf.extend(s.bytes().map(|b| b.to_ascii_lowercase()));
114        let lowered = unsafe { std::str::from_utf8_unchecked(&self.lower_buf) };
115
116        let tokens: Vec<&str> = lowered.split_whitespace().filter(|t| !t.is_empty()).collect();
117        let interned: Vec<StrId> = tokens.iter().map(|t| interner.intern(t)).collect();
118
119        for token_id in &interned {
120            self.insert_entry(*token_id, entity_idx);
121        }
122    }
123
124    fn insert_entry(&mut self, token: StrId, entity_idx: u32) {
125        let entry = (token, entity_idx);
126        let pos = match self.entries.binary_search(&entry) {
127            Ok(_) => return,
128            Err(pos) => pos,
129        };
130        self.entries.insert(pos, entry);
131    }
132}
133
134impl Default for SearchIndex {
135    fn default() -> Self {
136        Self::new()
137    }
138}
139
140#[cfg(test)]
141mod tests {
142    use super::*;
143    use crate::intern::StringInterner;
144
145    #[test]
146    fn test_index_and_search() {
147        let mut interner = StringInterner::new();
148        let mut index = SearchIndex::new();
149
150        let alice_name = interner.intern("Alice");
151        let alice_type = interner.intern("person");
152        let alice_obs = interner.intern("likes coffee");
153
154        index.index_entity(&mut interner, 0, alice_name, alice_type, &[alice_obs]);
155
156        let bob_name = interner.intern("Bob");
157        let bob_type = interner.intern("person");
158        let bob_obs = interner.intern("drinks tea");
159
160        index.index_entity(&mut interner, 1, bob_name, bob_type, &[bob_obs]);
161
162        let results = index.search("coffee", &interner);
163        assert_eq!(results, vec![0]);
164
165        let results = index.search("person", &interner);
166        assert_eq!(results.len(), 2);
167    }
168
169    #[test]
170    fn test_remove_entity() {
171        let mut interner = StringInterner::new();
172        let mut index = SearchIndex::new();
173
174        let name = interner.intern("Test");
175        let typ = interner.intern("type");
176
177        index.index_entity(&mut interner, 0, name, typ, &[]);
178        assert!(!index.is_empty());
179
180        index.remove_entity(0);
181        assert!(index.entries.iter().all(|&(_, idx)| idx != 0));
182    }
183
184    #[test]
185    fn test_search_empty_query() {
186        let interner = StringInterner::new();
187        let index = SearchIndex::new();
188        assert!(index.search("", &interner).is_empty());
189    }
190
191    #[test]
192    fn test_search_no_match() {
193        let mut interner = StringInterner::new();
194        let mut index = SearchIndex::new();
195        let name = interner.intern("Alice");
196        let typ = interner.intern("person");
197        index.index_entity(&mut interner, 0, name, typ, &[]);
198        assert!(index.search("zzzzzz", &interner).is_empty());
199    }
200
201    #[test]
202    fn test_search_case_insensitive() {
203        let mut interner = StringInterner::new();
204        let mut index = SearchIndex::new();
205        let name = interner.intern("Alice");
206        let typ = interner.intern("person");
207        index.index_entity(&mut interner, 0, name, typ, &[]);
208        let results = index.search("ALICE", &interner);
209        assert_eq!(results, vec![0]);
210    }
211
212    #[test]
213    fn test_search_partial_substring() {
214        let mut interner = StringInterner::new();
215        let mut index = SearchIndex::new();
216        let name = interner.intern("Alice");
217        let typ = interner.intern("person");
218        index.index_entity(&mut interner, 0, name, typ, &[]);
219        let results = index.search("Ali", &interner);
220        assert_eq!(results, vec![0]);
221    }
222
223    #[test]
224    fn test_multi_token_search() {
225        let mut interner = StringInterner::new();
226        let mut index = SearchIndex::new();
227        let obs = interner.intern("likes drinking coffee");
228        let alice = interner.intern("Alice");
229        let person = interner.intern("person");
230        index.index_entity(
231            &mut interner,
232            0,
233            alice,
234            person,
235            &[obs],
236        );
237        assert_eq!(index.search("likes", &interner), vec![0]);
238        assert_eq!(index.search("drinking", &interner), vec![0]);
239        assert_eq!(index.search("coffee", &interner), vec![0]);
240    }
241
242    #[test]
243    fn test_remove_then_reindex() {
244        let mut interner = StringInterner::new();
245        let mut index = SearchIndex::new();
246        let name = interner.intern("Alice");
247        let typ = interner.intern("person");
248        index.index_entity(&mut interner, 0, name, typ, &[]);
249
250        assert_eq!(index.search("Alice", &interner).len(), 1);
251        index.remove_entity(0);
252        assert!(index.search("Alice", &interner).is_empty());
253
254        index.index_entity(&mut interner, 0, name, typ, &[]);
255        assert_eq!(index.search("Alice", &interner).len(), 1);
256    }
257
258    #[test]
259    fn test_query_longer_than_token() {
260        let mut interner = StringInterner::new();
261        let mut index = SearchIndex::new();
262        let name = interner.intern("Alice");
263        let person = interner.intern("person");
264        index.index_entity(&mut interner, 0, name, person, &[]);
265        assert!(index.search("AliceInWonderland", &interner).is_empty());
266    }
267
268    #[test]
269    fn test_empty_index() {
270        let interner = StringInterner::new();
271        let index = SearchIndex::new();
272        assert!(index.search("anything", &interner).is_empty());
273    }
274
275    #[test]
276    fn test_clear_index() {
277        let mut interner = StringInterner::new();
278        let mut index = SearchIndex::new();
279        let name = interner.intern("Alice");
280        let person = interner.intern("person");
281        index.index_entity(&mut interner, 0, name, person, &[]);
282        assert!(!index.is_empty());
283        index.clear();
284        assert!(index.is_empty());
285        assert!(index.search("Alice", &interner).is_empty());
286    }
287}