mnemo_core/search/
tantivy_index.rs1use std::path::Path;
2use std::sync::Mutex;
3
4use tantivy::collector::TopDocs;
5use tantivy::query::QueryParser;
6use tantivy::schema::Value;
7use tantivy::schema::{STORED, STRING, Schema, TEXT};
8use tantivy::{Index, IndexReader, IndexWriter, ReloadPolicy, TantivyDocument};
9
10use crate::error::{Error, Result};
11use crate::search::FullTextIndex;
12use uuid::Uuid;
13
14pub struct TantivyFullTextIndex {
15 index: Index,
16 writer: Mutex<IndexWriter>,
17 reader: IndexReader,
18 id_field: tantivy::schema::Field,
19 content_field: tantivy::schema::Field,
20}
21
22fn build_schema() -> (Schema, tantivy::schema::Field, tantivy::schema::Field) {
23 let mut schema_builder = Schema::builder();
24 let id_field = schema_builder.add_text_field("id", STRING | STORED);
25 let content_field = schema_builder.add_text_field("content", TEXT);
26 (schema_builder.build(), id_field, content_field)
27}
28
29impl TantivyFullTextIndex {
30 pub fn new(path: &Path) -> Result<Self> {
31 let (schema, id_field, content_field) = build_schema();
32
33 std::fs::create_dir_all(path).map_err(|e| Error::Index(e.to_string()))?;
34
35 let dir = tantivy::directory::MmapDirectory::open(path)
36 .map_err(|e| Error::Index(e.to_string()))?;
37
38 let index = if Index::exists(&dir).map_err(|e| Error::Index(e.to_string()))? {
39 Index::open(dir).map_err(|e| Error::Index(e.to_string()))?
40 } else {
41 Index::create(dir, schema, tantivy::IndexSettings::default())
42 .map_err(|e| Error::Index(e.to_string()))?
43 };
44
45 let writer = index
46 .writer(50_000_000) .map_err(|e| Error::Index(e.to_string()))?;
48
49 let reader = index
50 .reader_builder()
51 .reload_policy(ReloadPolicy::OnCommitWithDelay)
52 .try_into()
53 .map_err(|e| Error::Index(e.to_string()))?;
54
55 Ok(Self {
56 index,
57 writer: Mutex::new(writer),
58 reader,
59 id_field,
60 content_field,
61 })
62 }
63
64 pub fn open_in_memory() -> Result<Self> {
65 let (schema, id_field, content_field) = build_schema();
66
67 let index = Index::create_in_ram(schema);
68
69 let writer = index
70 .writer(50_000_000)
71 .map_err(|e| Error::Index(e.to_string()))?;
72
73 let reader = index
74 .reader_builder()
75 .reload_policy(ReloadPolicy::OnCommitWithDelay)
76 .try_into()
77 .map_err(|e| Error::Index(e.to_string()))?;
78
79 Ok(Self {
80 index,
81 writer: Mutex::new(writer),
82 reader,
83 id_field,
84 content_field,
85 })
86 }
87}
88
89impl FullTextIndex for TantivyFullTextIndex {
90 fn add(&self, id: Uuid, content: &str) -> Result<()> {
91 let writer = self
92 .writer
93 .lock()
94 .map_err(|e| Error::Index(e.to_string()))?;
95
96 let id_term = tantivy::Term::from_field_text(self.id_field, &id.to_string());
98 writer.delete_term(id_term);
99
100 let mut doc = TantivyDocument::default();
101 doc.add_text(self.id_field, id.to_string());
102 doc.add_text(self.content_field, content);
103 writer
104 .add_document(doc)
105 .map_err(|e| Error::Index(e.to_string()))?;
106 Ok(())
107 }
108
109 fn remove(&self, id: Uuid) -> Result<()> {
110 let writer = self
111 .writer
112 .lock()
113 .map_err(|e| Error::Index(e.to_string()))?;
114 let id_term = tantivy::Term::from_field_text(self.id_field, &id.to_string());
115 writer.delete_term(id_term);
116 Ok(())
117 }
118
119 fn search(&self, query: &str, limit: usize) -> Result<Vec<(Uuid, f32)>> {
120 let searcher = self.reader.searcher();
121 let query_parser = QueryParser::for_index(&self.index, vec![self.content_field]);
122 let parsed_query = query_parser
123 .parse_query(query)
124 .map_err(|e| Error::Index(e.to_string()))?;
125
126 let top_docs = searcher
130 .search(&parsed_query, &TopDocs::with_limit(limit).order_by_score())
131 .map_err(|e| Error::Index(e.to_string()))?;
132
133 let mut results = Vec::new();
134 for (score, doc_address) in top_docs {
135 let doc: TantivyDocument = searcher
136 .doc(doc_address)
137 .map_err(|e| Error::Index(e.to_string()))?;
138 if let Some(id_value) = doc.get_first(self.id_field)
139 && let Some(id_str) = id_value.as_str()
140 && let Ok(uuid) = Uuid::parse_str(id_str)
141 {
142 results.push((uuid, score));
143 }
144 }
145 Ok(results)
146 }
147
148 fn commit(&self) -> Result<()> {
149 let mut writer = self
150 .writer
151 .lock()
152 .map_err(|e| Error::Index(e.to_string()))?;
153 writer.commit().map_err(|e| Error::Index(e.to_string()))?;
154 self.reader
155 .reload()
156 .map_err(|e| Error::Index(e.to_string()))?;
157 Ok(())
158 }
159
160 fn save(&self) -> Result<()> {
161 self.commit()
162 }
163
164 fn len(&self) -> usize {
165 let searcher = self.reader.searcher();
166 searcher.num_docs() as usize
167 }
168}
169
170#[cfg(test)]
171mod tests {
172 use super::*;
173
174 #[test]
175 fn test_tantivy_add_and_search() {
176 let index = TantivyFullTextIndex::open_in_memory().unwrap();
177
178 let id1 = Uuid::now_v7();
179 let id2 = Uuid::now_v7();
180 let id3 = Uuid::now_v7();
181
182 index
183 .add(id1, "The user prefers dark mode for all applications")
184 .unwrap();
185 index
186 .add(id2, "Rust programming language is fast and safe")
187 .unwrap();
188 index.add(id3, "Python is great for data science").unwrap();
189 index.commit().unwrap();
190
191 assert_eq!(index.len(), 3);
192
193 let results = index.search("dark mode", 10).unwrap();
194 assert!(!results.is_empty());
195 assert_eq!(results[0].0, id1);
196
197 let results = index.search("Rust programming", 10).unwrap();
198 assert!(!results.is_empty());
199 assert_eq!(results[0].0, id2);
200 }
201
202 #[test]
203 fn test_tantivy_remove() {
204 let index = TantivyFullTextIndex::open_in_memory().unwrap();
205
206 let id1 = Uuid::now_v7();
207 index.add(id1, "test content to remove").unwrap();
208 index.commit().unwrap();
209 assert_eq!(index.len(), 1);
210
211 index.remove(id1).unwrap();
212 index.commit().unwrap();
213 assert_eq!(index.len(), 0);
214
215 let results = index.search("test content", 10).unwrap();
216 assert!(results.is_empty());
217 }
218
219 #[test]
220 fn test_tantivy_save_and_load() {
221 let dir = std::env::temp_dir().join(format!("tantivy_test_{}", Uuid::now_v7()));
222
223 let id1 = Uuid::now_v7();
224 {
225 let index = TantivyFullTextIndex::new(&dir).unwrap();
226 index.add(id1, "persistent test content").unwrap();
227 index.commit().unwrap();
228 index.save().unwrap();
229 }
230
231 {
233 let index = TantivyFullTextIndex::new(&dir).unwrap();
234 assert_eq!(index.len(), 1);
235 let results = index.search("persistent", 10).unwrap();
236 assert!(!results.is_empty());
237 assert_eq!(results[0].0, id1);
238 }
239
240 std::fs::remove_dir_all(&dir).ok();
241 }
242}