1use std::{collections::HashMap, fs, marker::PhantomData, path::Path};
2
3use tantivy::{
4 DocAddress, Index, IndexWriter, ReloadPolicy, Searcher, TantivyDocument, Term,
5 collector::TopDocs,
6 directory::MmapDirectory,
7 query::{
8 BooleanQuery, FuzzyTermQuery, Occur, PhrasePrefixQuery, Query, QueryParser, RegexQuery,
9 },
10 schema::Schema,
11};
12use text_search_core::Indexable;
13
14pub struct Indexer<T: Indexable> {
15 index: Index,
16 schema: Schema,
17 index_writer: Option<IndexWriter>,
18 _marker: PhantomData<T>,
19}
20
21impl<T: Indexable> Indexer<T> {
22 pub fn new(path: &Path) -> Self {
23 if !path.exists() {
24 let _ = fs::create_dir(path);
25 }
26
27 let dir = MmapDirectory::open(&path).expect("Error while opening directory");
28 let schema = T::get_struct_info().generate_schema();
29 let index = Index::open_or_create(dir, schema.clone())
30 .expect("Error while opening or creating index. If schema has been updated, remove the old data.");
31
32 Self {
33 index,
34 schema,
35 index_writer: None,
36 _marker: PhantomData,
37 }
38 }
39
40 fn create_index_writer(&mut self) {
41 if self.index_writer.is_none() {
42 self.index_writer = Some(
43 self.index
44 .writer(50_000_000)
45 .expect("Error while creating index writer."),
46 );
47 }
48 }
49
50 pub fn index(&mut self, data: T) {
51 self.create_index_writer();
52
53 let doc = data.as_document();
54 self.index_writer
55 .as_ref()
56 .unwrap()
57 .add_document(doc)
58 .expect("Error while adding document.");
59 }
60
61 pub fn delete(&mut self, data: T) {
62 self.create_index_writer();
63 self.index_writer
64 .as_ref()
65 .unwrap()
66 .delete_term(data.get_id_term());
67 }
68
69 pub fn delete_using_term(&mut self, term: tantivy::Term) {
70 self.create_index_writer();
71 self.index_writer.as_ref().unwrap().delete_term(term);
72 }
73
74 pub fn delete_using_filters(&mut self, filters: HashMap<&str, &str>) {
75 self.create_index_writer();
76 let query = BooleanQuery::from(self.new_boolean_query_filters(filters));
77 println!("query : {:?}", query);
78 let _ = self
79 .index_writer
80 .as_ref()
81 .unwrap()
82 .delete_query(Box::new(query));
83 }
84
85 pub fn update(&mut self, data: T) {
86 self.delete(data.clone());
87 self.index(data);
88 }
89
90 pub fn commit(&mut self) {
91 if self.index_writer.is_some() {
92 self.index_writer
93 .as_mut()
94 .unwrap()
95 .commit()
96 .expect("Error while commiting index data.");
97 }
98
99 self.index_writer = None;
100 }
101
102 pub fn search(
103 &self,
104 filter: HashMap<&str, &str>,
105 field_name: &str,
106 query: &str,
107 result_count: usize,
108 ) -> Vec<T> {
109 let field = self
110 .schema
111 .get_field(field_name)
112 .expect("Field with provided field name does not exsit in schema.");
113
114 let search_query = QueryParser::for_index(&self.index, vec![field])
115 .parse_query(query)
116 .expect("Error while parsing query.");
117
118 self._search(filter, search_query, result_count)
119 }
120
121 pub fn fuzzy_search(
122 &self,
123 filter: HashMap<&str, &str>,
124 field_name: &str,
125 query: &str,
126 result_count: usize,
127 ) -> Vec<T> {
128 let field = self
129 .schema
130 .get_field(field_name)
131 .expect("Field with provided field name does not exsit in schema.");
132
133 let term: Term = Term::from_field_text(field, query);
134 let query = FuzzyTermQuery::new(term, 2, true);
135
136 self._search(filter, Box::new(query), result_count)
137 }
138
139 pub fn regex_search(
140 &self,
141 filter: HashMap<&str, &str>,
142 field_name: &str,
143 query: &str,
144 result_count: usize,
145 ) -> Vec<T> {
146 let field = self
147 .schema
148 .get_field(field_name)
149 .expect("Field with provided field name does not exsit in schema.");
150
151 let query =
152 RegexQuery::from_pattern(query, field).expect("Error while building regex query.");
153
154 self._search(filter, Box::new(query), result_count)
155 }
156
157 pub fn hybrid_search(
160 &self,
161 filter: HashMap<&str, &str>,
162 field_name: &str,
163 query: &str,
164 result_count: usize,
165 ) -> Vec<T> {
166 let field = self
167 .schema
168 .get_field(field_name)
169 .expect("Field with provided field name does not exsit in schema.");
170
171 let terms: Vec<Term> = query
172 .to_lowercase()
173 .split(" ")
174 .map(|term| Term::from_field_text(field, term))
175 .collect();
176
177 let fuzzy_queries: Vec<(Occur, Box<dyn Query>)> = terms
178 .iter()
179 .map(|term| {
180 (
181 Occur::Should,
182 Box::new(FuzzyTermQuery::new(term.clone(), 2, true)) as Box<dyn Query>,
183 )
184 })
185 .collect();
186
187 let phrase_prefix_query: (Occur, Box<dyn Query>) = (
188 Occur::Should,
189 Box::new(PhrasePrefixQuery::new(terms)) as Box<dyn Query>,
190 );
191
192 let mut boolean_quries: Vec<(Occur, Box<dyn Query>)> = vec![phrase_prefix_query];
193 boolean_quries.extend(fuzzy_queries);
194
195 let query = BooleanQuery::new(boolean_quries);
196 self._search(filter, Box::new(query), result_count)
197 }
198
199 fn filter_query(&self, filters: HashMap<&str, &str>, query: Box<dyn Query>) -> Box<dyn Query> {
200 let filter_query = if filters.is_empty() {
201 None
202 } else {
203 Some(self.new_boolean_query_filters(filters))
204 };
205
206 match filter_query {
207 Some(mut x) => {
208 x.push((Occur::Should, Box::new(query)));
209 Box::new(BooleanQuery::from(x))
210 }
211 None => query,
212 }
213 }
214
215 fn new_boolean_query_filters(
216 &self,
217 filters: HashMap<&str, &str>,
218 ) -> Vec<(Occur, Box<dyn Query>)> {
219 filters
220 .iter()
221 .map(|x| {
222 let field = self.schema.get_field(x.0).expect(&format!(
223 "Field with provided field name `{}` does not exists in schema.",
224 x.0
225 ));
226 let phrase = format!("\"{}\"", x.1);
227
228 let filter_query = QueryParser::for_index(&self.index, vec![field])
229 .parse_query(&phrase)
230 .expect("Error while parsing query.");
231 (Occur::Must, filter_query)
232 })
233 .collect()
234 }
235
236 fn _search(
237 &self,
238 filter: HashMap<&str, &str>,
239 query: Box<dyn Query>,
240 result_count: usize,
241 ) -> Vec<T> {
242 let reader = self
243 .index
244 .reader_builder()
245 .reload_policy(ReloadPolicy::OnCommitWithDelay)
246 .try_into()
247 .expect("Error while constructing reader for search operation.");
248 let searcher = reader.searcher();
249
250 let query = self.filter_query(filter, query);
251
252 let top_docs = searcher
253 .search(&query, &TopDocs::with_limit(result_count))
254 .expect("Error while performing search operation.");
255
256 Self::docs_to_t(top_docs, &searcher)
257 }
258
259 fn docs_to_t(top_docs: Vec<(f32, DocAddress)>, searcher: &Searcher) -> Vec<T> {
260 let mut result: Vec<T> = vec![];
261 for (_score, doc_address) in top_docs {
262 let doc: TantivyDocument = searcher
263 .doc(doc_address)
264 .expect("Error while trying to find search document.");
265 result.push(T::from_doc(doc));
266 }
267 result
268 }
269}