1use std::cmp::Ordering;
2use std::collections::{HashMap, HashSet};
3use std::hash::Hash;
4
5use crate::error::*;
6use crate::index::*;
7use crate::query::*;
8use crate::query_lexer::*;
9
10pub struct SearchEngine<P> {
19 indices: HashMap<String, Box<dyn SearchIndex<P>>>,
20}
21
22impl<P: Eq + Hash + Clone> Default for SearchEngine<P> {
23 fn default() -> Self {
24 Self::new()
25 }
26}
27
28impl<P: Eq + Hash + Clone> SearchEngine<P> {
29 pub fn new() -> Self {
38 Self {
39 indices: HashMap::new(),
40 }
41 }
42
43 pub fn add_index<T: SearchIndex<P> + 'static>(&mut self, name: &str, index: T) {
56 self.indices.insert(name.into(), Box::new(index));
57 }
58
59 pub fn search(&self, query: &Query) -> Result<HashSet<P>> {
64 match query {
65 Query::Exact(attr, _)
66 | Query::Prefix(attr, _)
67 | Query::InRange(attr, _, _)
68 | Query::OutRange(attr, _, _)
69 | Query::Minimum(attr, _)
70 | Query::Maximum(attr, _) => {
71 let index = self
72 .indices
73 .get(attr)
74 .ok_or(SearchEngineError::UnknownAttribute)?;
75 index.search(query)
76 }
77 Query::Or(vec) => {
78 let mut result_set = HashSet::<P>::new();
79 for pred in vec.iter() {
80 let attribute_set = self.search(pred)?;
81 result_set = result_set.union(&attribute_set).cloned().collect();
82 }
83 Ok(result_set)
84 }
85 Query::And(vec) => {
86 let mut result_set = HashSet::<P>::new();
87 for (i, pred) in vec.iter().enumerate() {
88 let attribute_set = self.search(pred)?;
89 if i == 0 {
90 result_set = attribute_set;
91 } else {
92 result_set = result_set.intersection(&attribute_set).cloned().collect();
93 }
94 if result_set.is_empty() {
95 return Ok(result_set);
96 }
97 }
98 Ok(result_set)
99 }
100 Query::Exclude(base, exclude) => {
101 let mut result_set = self.search(base)?;
102 for pred in exclude.iter() {
103 let attribute_set = self.search(pred)?;
104 result_set = result_set.difference(&attribute_set).cloned().collect();
105 if result_set.is_empty() {
106 return Ok(result_set);
107 }
108 }
109 Ok(result_set)
110 }
111 }
112 }
113
114 pub fn query_from_str<'a>(&self, query_str: &'a str) -> Result<(Query, Vec<&'a str>)> {
181 let mut include = vec![];
182 let mut exclude = vec![];
183 let mut freetexts = vec![];
184
185 let lexer = QueryLexer::new(query_str);
186 for subquery in lexer {
187 match subquery {
188 QueryToken::Attribute(is_include, attribute, values) => {
189 let index = self
190 .indices
191 .get(attribute)
192 .ok_or(SearchEngineError::UnknownAttribute)?;
193 let supported = index.supported_queries();
194
195 let mut qs: Vec<_> = values
196 .iter()
197 .map(|&v| {
198 let attr = attribute.to_owned();
199 if (supported & SUPPORTS_MINIMUM) != 0 && v.starts_with('>') {
200 return Query::Minimum(attr, v[1..].to_owned());
201 }
202 if (supported & SUPPORTS_MAXIMUM) != 0 && v.starts_with('<') {
203 return Query::Maximum(attr, v[1..].to_owned());
204 }
205 if (supported & SUPPORTS_EXACT) != 0 && v.starts_with('=') {
206 return Query::Exact(attr, v[1..].to_owned());
207 }
208 if (supported & SUPPORTS_INRANGE) != 0 && v.contains('-') {
209 let parts = v.split('-').collect::<Vec<_>>();
210 if parts.len() == 2 {
211 return Query::InRange(
212 attr,
213 parts[0].to_owned(),
214 parts[1].to_owned(),
215 );
216 }
217 }
218
219 if (supported & SUPPORTS_PREFIX) != 0 {
222 return Query::Prefix(attr, v.to_owned());
223 }
224 Query::Exact(attr, v.to_owned())
225 })
226 .collect();
227 let q = match qs.len().cmp(&1) {
228 Ordering::Equal => qs.swap_remove(0),
229 Ordering::Greater => Query::Or(qs),
230 Ordering::Less => continue,
231 };
232 if is_include {
233 include.push(q);
234 } else {
235 exclude.push(q);
236 }
237 }
238 QueryToken::Freetext(text) => {
239 freetexts.push(text);
240 }
241 }
242 }
243
244 let base_query = Query::And(include);
245 if !exclude.is_empty() {
246 Ok((Query::Exclude(base_query.into(), exclude), freetexts))
247 } else {
248 Ok((base_query, freetexts))
249 }
250 }
251}
252
253#[cfg(test)]
254mod tests {
255 use super::*;
256
257 struct DummyIndex {
258 fixed_values: HashSet<usize>,
259 supported_queries: SupportedQueries,
260 }
261
262 impl DummyIndex {
263 fn new(vals: Vec<usize>) -> Self {
264 Self {
265 fixed_values: HashSet::from_iter(vals),
266 supported_queries: SUPPORTS_EXACT,
267 }
268 }
269
270 fn supports(sup: SupportedQueries) -> Self {
271 Self {
272 fixed_values: HashSet::new(),
273 supported_queries: sup,
274 }
275 }
276 }
277
278 impl SearchIndex<usize> for DummyIndex {
279 fn search(&self, _query: &Query) -> Result<HashSet<usize>> {
280 Ok(self.fixed_values.clone())
281 }
282
283 fn supported_queries(&self) -> SupportedQueries {
284 self.supported_queries
285 }
286 }
287
288 #[test]
289 fn search_or() {
290 let mut engine = SearchEngine::<usize>::new();
291 engine.add_index("a", DummyIndex::new(vec![1, 2]));
292 engine.add_index("b", DummyIndex::new(vec![3, 4]));
293 engine.add_index("c", DummyIndex::new(vec![2, 5, 6]));
294 let result = engine.search(&Query::Or(vec![
295 Query::Exact("a".into(), "DUMMY".into()),
296 Query::Exact("c".into(), "DUMMY".into()),
297 ]));
298 assert_eq!(result, Ok(HashSet::from_iter(vec![1, 2, 5, 6])));
299 }
300
301 #[test]
302 fn search_and() {
303 let mut engine = SearchEngine::<usize>::new();
304 engine.add_index("a", DummyIndex::new(vec![1, 2]));
305 engine.add_index("b", DummyIndex::new(vec![3, 4]));
306 engine.add_index("c", DummyIndex::new(vec![2, 5, 6]));
307 let result = engine.search(&Query::And(vec![
308 Query::Exact("a".into(), "DUMMY".into()),
309 Query::Exact("c".into(), "DUMMY".into()),
310 ]));
311 assert_eq!(result, Ok(HashSet::from_iter(vec![2])));
312 }
313
314 #[test]
315 fn search_exclude() {
316 let mut engine = SearchEngine::<usize>::new();
317 engine.add_index("a", DummyIndex::new(vec![1, 2]));
318 engine.add_index("b", DummyIndex::new(vec![3, 4]));
319 engine.add_index("c", DummyIndex::new(vec![2, 5, 6]));
320 let result = engine.search(&Query::Exclude(
321 Box::new(Query::Exact("c".into(), "DUMMY".into())),
322 vec![Query::Exact("a".into(), "DUMMY".into())],
323 ));
324 assert_eq!(result, Ok(HashSet::from_iter(vec![5, 6])));
325 }
326
327 fn create_parser_engine() -> SearchEngine<usize> {
328 let mut engine = SearchEngine::<usize>::new();
329 engine.add_index(
330 "zipcode",
331 DummyIndex::supports(
332 SUPPORTS_EXACT | SUPPORTS_MINIMUM | SUPPORTS_MAXIMUM | SUPPORTS_INRANGE,
333 ),
334 );
335 engine.add_index("pet", DummyIndex::supports(SUPPORTS_EXACT));
336 engine.add_index(
337 "name",
338 DummyIndex::supports(SUPPORTS_EXACT | SUPPORTS_PREFIX),
339 );
340 engine
341 }
342
343 #[test]
344 fn query_parser_empty() {
345 let engine = create_parser_engine();
346 let (q, freetext) = engine.query_from_str("").unwrap();
347 assert_eq!(q, Query::And(vec![]));
348 assert_eq!(freetext, vec![] as Vec<&str>);
349 }
350
351 #[test]
352 fn query_parser_basic() {
353 let engine = create_parser_engine();
354 let (q, freetext) = engine
355 .query_from_str("+zipcode:12345 +pet:Dog -name:Hans freetext")
356 .unwrap();
357 assert_eq!(
358 q,
359 Query::Exclude(
360 Box::new(Query::And(vec![
361 Query::Exact("zipcode".into(), "12345".into()),
362 Query::Exact("pet".into(), "Dog".into())
363 ])),
364 vec![Query::Prefix("name".into(), "Hans".into())]
365 )
366 );
367 assert_eq!(freetext, vec!["freetext"]);
368 }
369
370 #[test]
371 fn query_parser_modificators() {
372 let engine = create_parser_engine();
373 let (q, freetext) = engine
374 .query_from_str(
375 "abc +zipcode:>12345 +zipcode:<99999 +zipcode:50000-60000 +name:=Hans def",
376 )
377 .unwrap();
378 assert_eq!(
379 q,
380 Query::And(vec![
381 Query::Minimum("zipcode".into(), "12345".into()),
382 Query::Maximum("zipcode".into(), "99999".into()),
383 Query::InRange("zipcode".into(), "50000".into(), "60000".into()),
384 Query::Exact("name".into(), "Hans".into()),
385 ])
386 );
387 assert_eq!(freetext, vec!["abc", "def"]);
388 }
389
390 #[test]
391 fn query_parser_alternatives() {
392 let engine = create_parser_engine();
393 let (q, freetext) = engine
394 .query_from_str(
395 "start +pet:Cat,Dog +name:Alex,=Hans middle +zipcode:=10000,<500,50000-60000 end",
396 )
397 .unwrap();
398 assert_eq!(
399 q,
400 Query::And(vec![
401 Query::Or(vec![
402 Query::Exact("pet".into(), "Cat".into()),
403 Query::Exact("pet".into(), "Dog".into()),
404 ]),
405 Query::Or(vec![
406 Query::Prefix("name".into(), "Alex".into()),
407 Query::Exact("name".into(), "Hans".into()),
408 ]),
409 Query::Or(vec![
410 Query::Exact("zipcode".into(), "10000".into()),
411 Query::Maximum("zipcode".into(), "500".into()),
412 Query::InRange("zipcode".into(), "50000".into(), "60000".into()),
413 ]),
414 ])
415 );
416 assert_eq!(freetext, vec!["start", "middle", "end"]);
417 }
418}