oxihuman_core/
search_index.rs1#![allow(dead_code)]
4
5use std::collections::HashMap;
8
9#[allow(dead_code)]
11#[derive(Debug, Clone)]
12pub struct SearchDoc {
13 pub id: u64,
14 pub title: String,
15 pub body: String,
16}
17
18#[allow(dead_code)]
20pub struct SearchIndex {
21 docs: HashMap<u64, SearchDoc>,
22 index: HashMap<String, Vec<u64>>,
23 next_id: u64,
24 total_indexed: usize,
25}
26
27#[allow(dead_code)]
28impl SearchIndex {
29 pub fn new() -> Self {
30 Self {
31 docs: HashMap::new(),
32 index: HashMap::new(),
33 next_id: 0,
34 total_indexed: 0,
35 }
36 }
37
38 fn tokenize(text: &str) -> Vec<String> {
40 text.split(|c: char| !c.is_alphanumeric())
41 .filter(|s| !s.is_empty())
42 .map(|s| s.to_lowercase())
43 .collect()
44 }
45
46 pub fn insert(&mut self, title: &str, body: &str) -> u64 {
48 let id = self.next_id;
49 self.next_id += 1;
50 let doc = SearchDoc {
51 id,
52 title: title.to_string(),
53 body: body.to_string(),
54 };
55 let tokens = Self::tokenize(&format!("{} {}", title, body));
56 self.total_indexed += tokens.len();
57 for tok in tokens {
58 self.index.entry(tok).or_default().push(id);
59 }
60 self.docs.insert(id, doc);
61 id
62 }
63
64 pub fn remove(&mut self, id: u64) -> bool {
66 if self.docs.remove(&id).is_none() {
67 return false;
68 }
69 for ids in self.index.values_mut() {
70 ids.retain(|&i| i != id);
71 }
72 true
73 }
74
75 pub fn search(&self, query: &str) -> Vec<u64> {
77 let tokens = Self::tokenize(query);
78 if tokens.is_empty() {
79 return Vec::new();
80 }
81 let mut result: Option<Vec<u64>> = None;
82 for tok in &tokens {
83 let ids: Vec<u64> = self.index.get(tok).cloned().unwrap_or_default();
84 result = Some(match result {
85 None => ids,
86 Some(prev) => {
87 let mut set: Vec<u64> = prev;
88 set.retain(|id| ids.contains(id));
89 set
90 }
91 });
92 }
93 let mut out = result.unwrap_or_default();
94 out.sort_unstable();
95 out.dedup();
96 out
97 }
98
99 pub fn get(&self, id: u64) -> Option<&SearchDoc> {
101 self.docs.get(&id)
102 }
103
104 pub fn doc_count(&self) -> usize {
106 self.docs.len()
107 }
108
109 pub fn token_count(&self) -> usize {
111 self.index.len()
112 }
113
114 pub fn total_indexed(&self) -> usize {
116 self.total_indexed
117 }
118
119 pub fn is_empty(&self) -> bool {
120 self.docs.is_empty()
121 }
122
123 pub fn clear(&mut self) {
124 self.docs.clear();
125 self.index.clear();
126 self.total_indexed = 0;
127 }
128}
129
130impl Default for SearchIndex {
131 fn default() -> Self {
132 Self::new()
133 }
134}
135
136pub fn new_search_index() -> SearchIndex {
137 SearchIndex::new()
138}
139
140#[cfg(test)]
141mod tests {
142 use super::*;
143
144 #[test]
145 fn insert_and_search() {
146 let mut idx = new_search_index();
147 idx.insert("hello world", "foo bar");
148 let hits = idx.search("hello");
149 assert_eq!(hits.len(), 1);
150 }
151
152 #[test]
153 fn multi_token_and() {
154 let mut idx = new_search_index();
155 idx.insert("alpha beta", "body");
156 idx.insert("alpha only", "stuff");
157 let hits = idx.search("alpha beta");
158 assert_eq!(hits.len(), 1);
159 }
160
161 #[test]
162 fn no_match_returns_empty() {
163 let mut idx = new_search_index();
164 idx.insert("rust programming", "systems");
165 assert!(idx.search("java").is_empty());
166 }
167
168 #[test]
169 fn case_insensitive() {
170 let mut idx = new_search_index();
171 idx.insert("Hello World", "body");
172 let hits = idx.search("HELLO");
173 assert_eq!(hits.len(), 1);
174 }
175
176 #[test]
177 fn remove_document() {
178 let mut idx = new_search_index();
179 let id = idx.insert("test doc", "");
180 assert!(idx.remove(id));
181 assert!(idx.search("test").is_empty());
182 }
183
184 #[test]
185 fn doc_count() {
186 let mut idx = new_search_index();
187 idx.insert("a", "");
188 idx.insert("b", "");
189 assert_eq!(idx.doc_count(), 2);
190 }
191
192 #[test]
193 fn get_doc() {
194 let mut idx = new_search_index();
195 let id = idx.insert("title", "content");
196 let doc = idx.get(id).expect("should succeed");
197 assert_eq!(doc.title, "title");
198 }
199
200 #[test]
201 fn empty_query_returns_empty() {
202 let mut idx = new_search_index();
203 idx.insert("something", "");
204 assert!(idx.search("").is_empty());
205 }
206
207 #[test]
208 fn clear_index() {
209 let mut idx = new_search_index();
210 idx.insert("doc", "text");
211 idx.clear();
212 assert!(idx.is_empty());
213 }
214
215 #[test]
216 fn token_count_nonzero_after_insert() {
217 let mut idx = new_search_index();
218 idx.insert("unique token here", "");
219 assert!(idx.token_count() > 0);
220 }
221}