1use std::{collections::HashMap, fs, marker::PhantomData, path::Path};
2
3use tantivy::{
4 DocAddress, Index, IndexWriter, ReloadPolicy, Searcher, TantivyDocument, Term,
5 collector::TopDocs,
6 directory::MmapDirectory,
7 query::{
8 BooleanQuery, FuzzyTermQuery, Occur, PhrasePrefixQuery, Query, QueryParser, RegexQuery,
9 },
10 schema::Schema,
11};
12use text_search_core::Indexable;
13
14pub struct Indexer<T: Indexable> {
15 index: Index,
16 schema: Schema,
17 index_writer: Option<IndexWriter>,
18 _marker: PhantomData<T>,
19}
20
21impl<T: Indexable> Indexer<T> {
22 pub fn new(path: &Path) -> Self {
23 if !path.exists() {
24 let _ = fs::create_dir(path);
25 }
26
27 let dir = MmapDirectory::open(&path).expect("Error while opening directory");
28 let schema = T::get_struct_info().generate_schema();
29 let index = Index::open_or_create(dir, schema.clone())
30 .expect("Error while opening or creating index. If schema has been updated, remove the old data.");
31
32 Self {
33 index,
34 schema,
35 index_writer: None,
36 _marker: PhantomData,
37 }
38 }
39
40 fn create_index_writer(&mut self) {
41 if self.index_writer.is_none() {
42 self.index_writer = Some(
43 self.index
44 .writer(50_000_000)
45 .expect("Error while creating index writer."),
46 );
47 }
48 }
49
50 pub fn index(&mut self, data: T) {
51 self.create_index_writer();
52
53 let doc = data.as_document();
54 self.index_writer
55 .as_ref()
56 .unwrap()
57 .add_document(doc)
58 .expect("Error while adding document.");
59 }
60
61 pub fn delete(&mut self, data: T) {
62 self.create_index_writer();
63 self.index_writer
64 .as_ref()
65 .unwrap()
66 .delete_term(data.get_id_term());
67 }
68
69 pub fn delete_using_term(&mut self, term: tantivy::Term) {
70 self.create_index_writer();
71 self.index_writer.as_ref().unwrap().delete_term(term);
72 }
73
74 pub fn delete_using_filters(&mut self, filters: HashMap<&str, &str>) {
75 self.create_index_writer();
76 let query = BooleanQuery::from(self.new_boolean_query_filters(filters));
77 let _ = self
78 .index_writer
79 .as_ref()
80 .unwrap()
81 .delete_query(Box::new(query));
82 }
83
84 pub fn update(&mut self, data: T) {
85 self.delete(data.clone());
86 self.index(data);
87 }
88
89 pub fn commit(&mut self) {
90 if self.index_writer.is_some() {
91 self.index_writer
92 .as_mut()
93 .unwrap()
94 .commit()
95 .expect("Error while commiting index data.");
96 }
97
98 self.index_writer = None;
99 }
100
101 pub fn search(
102 &self,
103 filter: HashMap<&str, &str>,
104 field_name: &str,
105 query: &str,
106 result_count: usize,
107 ) -> Vec<T> {
108 let field = self
109 .schema
110 .get_field(field_name)
111 .expect("Field with provided field name does not exsit in schema.");
112
113 let search_query = QueryParser::for_index(&self.index, vec![field])
114 .parse_query(query)
115 .expect("Error while parsing query.");
116
117 self._search(filter, search_query, result_count)
118 }
119
120 pub fn fuzzy_search(
121 &self,
122 filter: HashMap<&str, &str>,
123 field_name: &str,
124 query: &str,
125 result_count: usize,
126 ) -> Vec<T> {
127 let field = self
128 .schema
129 .get_field(field_name)
130 .expect("Field with provided field name does not exsit in schema.");
131
132 let term: Term = Term::from_field_text(field, query);
133 let query = FuzzyTermQuery::new(term, 2, true);
134
135 self._search(filter, Box::new(query), result_count)
136 }
137
138 pub fn regex_search(
139 &self,
140 filter: HashMap<&str, &str>,
141 field_name: &str,
142 query: &str,
143 result_count: usize,
144 ) -> Vec<T> {
145 let field = self
146 .schema
147 .get_field(field_name)
148 .expect("Field with provided field name does not exsit in schema.");
149
150 let query =
151 RegexQuery::from_pattern(query, field).expect("Error while building regex query.");
152
153 self._search(filter, Box::new(query), result_count)
154 }
155
156 pub fn hybrid_search(
159 &self,
160 filter: HashMap<&str, &str>,
161 field_name: &str,
162 query: &str,
163 result_count: usize,
164 ) -> Vec<T> {
165 let field = self
166 .schema
167 .get_field(field_name)
168 .expect("Field with provided field name does not exsit in schema.");
169
170 let terms: Vec<Term> = query
171 .to_lowercase()
172 .split(" ")
173 .map(|term| Term::from_field_text(field, term))
174 .collect();
175
176 let fuzzy_queries: Vec<(Occur, Box<dyn Query>)> = terms
177 .iter()
178 .map(|term| {
179 (
180 Occur::Should,
181 Box::new(FuzzyTermQuery::new(term.clone(), 2, true)) as Box<dyn Query>,
182 )
183 })
184 .collect();
185
186 let phrase_prefix_query: (Occur, Box<dyn Query>) = (
187 Occur::Should,
188 Box::new(PhrasePrefixQuery::new(terms)) as Box<dyn Query>,
189 );
190
191 let mut boolean_quries: Vec<(Occur, Box<dyn Query>)> = vec![phrase_prefix_query];
192 boolean_quries.extend(fuzzy_queries);
193
194 let query = BooleanQuery::new(boolean_quries);
195 self._search(filter, Box::new(query), result_count)
196 }
197
198 fn filter_query(&self, filters: HashMap<&str, &str>, query: Box<dyn Query>) -> Box<dyn Query> {
199 let filter_query = if filters.is_empty() {
200 None
201 } else {
202 Some(self.new_boolean_query_filters(filters))
203 };
204
205 match filter_query {
206 Some(mut x) => {
207 x.push((Occur::Must, Box::new(query)));
208 Box::new(BooleanQuery::from(x))
209 }
210 None => query,
211 }
212 }
213
214 fn new_boolean_query_filters(
215 &self,
216 filters: HashMap<&str, &str>,
217 ) -> Vec<(Occur, Box<dyn Query>)> {
218 filters
219 .iter()
220 .map(|x| {
221 let field = self.schema.get_field(x.0).expect(&format!(
222 "Field with provided field name `{}` does not exists in schema.",
223 x.0
224 ));
225 let phrase = format!("\"{}\"", x.1);
226
227 let filter_query = QueryParser::for_index(&self.index, vec![field])
228 .parse_query(&phrase)
229 .expect("Error while parsing query.");
230 (Occur::Must, filter_query)
231 })
232 .collect()
233 }
234
235 fn _search(
236 &self,
237 filter: HashMap<&str, &str>,
238 query: Box<dyn Query>,
239 result_count: usize,
240 ) -> Vec<T> {
241 let reader = self
242 .index
243 .reader_builder()
244 .reload_policy(ReloadPolicy::OnCommitWithDelay)
245 .try_into()
246 .expect("Error while constructing reader for search operation.");
247 let searcher = reader.searcher();
248
249 let query = self.filter_query(filter, query);
250
251 let top_docs = searcher
252 .search(&query, &TopDocs::with_limit(result_count))
253 .expect("Error while performing search operation.");
254
255 Self::docs_to_t(top_docs, &searcher)
256 }
257
258 fn docs_to_t(top_docs: Vec<(f32, DocAddress)>, searcher: &Searcher) -> Vec<T> {
259 let mut result: Vec<T> = vec![];
260 for (_score, doc_address) in top_docs {
261 let doc: TantivyDocument = searcher
262 .doc(doc_address)
263 .expect("Error while trying to find search document.");
264 result.push(T::from_doc(doc));
265 }
266 result
267 }
268}