attribute_search_engine/
engine.rs

1use std::cmp::Ordering;
2use std::collections::{HashMap, HashSet};
3use std::hash::Hash;
4
5use crate::error::*;
6use crate::index::*;
7use crate::query::*;
8use crate::query_lexer::*;
9
10/// A SearchEngine is a wrapper around a collection of [search indices](SearchIndex)
11/// that can process complex [queries](Query) involving multiple indices.
12///
13/// It can also create [queries](Query) from strings that are tailored to the
14/// existing [indices](SearchIndex).
15///
16/// # Example
17/// A complete example can be found on the [front page of this crate](crate).
18pub struct SearchEngine<P> {
19    indices: HashMap<String, Box<dyn SearchIndex<P>>>,
20}
21
22impl<P: Eq + Hash + Clone> Default for SearchEngine<P> {
23    fn default() -> Self {
24        Self::new()
25    }
26}
27
28impl<P: Eq + Hash + Clone> SearchEngine<P> {
29    /// Creates a new `SearchEngine`.
30    ///
31    /// # Example
32    /// ```rust
33    /// use attribute_search_engine::SearchEngine;
34    ///
35    /// let engine = SearchEngine::<usize>::new();
36    /// ```
37    pub fn new() -> Self {
38        Self {
39            indices: HashMap::new(),
40        }
41    }
42
43    /// Add a new index to this search engine.
44    ///
45    /// # Example
46    /// ```rust
47    /// use attribute_search_engine::{SearchEngine, SearchIndexHashMap};
48    ///
49    /// let mut index = SearchIndexHashMap::<_, String>::new();
50    /// // Fill index here...
51    ///
52    /// let mut engine = SearchEngine::<usize>::new();
53    /// engine.add_index("attribute", index);
54    /// ```
55    pub fn add_index<T: SearchIndex<P> + 'static>(&mut self, name: &str, index: T) {
56        self.indices.insert(name.into(), Box::new(index));
57    }
58
59    /// Run a query on the search engine.
60    ///
61    /// The result is a HashSet of all row ids / primary ids
62    /// with rows that matched the query.
63    pub fn search(&self, query: &Query) -> Result<HashSet<P>> {
64        match query {
65            Query::Exact(attr, _)
66            | Query::Prefix(attr, _)
67            | Query::InRange(attr, _, _)
68            | Query::OutRange(attr, _, _)
69            | Query::Minimum(attr, _)
70            | Query::Maximum(attr, _) => {
71                let index = self
72                    .indices
73                    .get(attr)
74                    .ok_or(SearchEngineError::UnknownAttribute)?;
75                index.search(query)
76            }
77            Query::Or(vec) => {
78                let mut result_set = HashSet::<P>::new();
79                for pred in vec.iter() {
80                    let attribute_set = self.search(pred)?;
81                    result_set = result_set.union(&attribute_set).cloned().collect();
82                }
83                Ok(result_set)
84            }
85            Query::And(vec) => {
86                let mut result_set = HashSet::<P>::new();
87                for (i, pred) in vec.iter().enumerate() {
88                    let attribute_set = self.search(pred)?;
89                    if i == 0 {
90                        result_set = attribute_set;
91                    } else {
92                        result_set = result_set.intersection(&attribute_set).cloned().collect();
93                    }
94                    if result_set.is_empty() {
95                        return Ok(result_set);
96                    }
97                }
98                Ok(result_set)
99            }
100            Query::Exclude(base, exclude) => {
101                let mut result_set = self.search(base)?;
102                for pred in exclude.iter() {
103                    let attribute_set = self.search(pred)?;
104                    result_set = result_set.difference(&attribute_set).cloned().collect();
105                    if result_set.is_empty() {
106                        return Ok(result_set);
107                    }
108                }
109                Ok(result_set)
110            }
111        }
112    }
113
114    /// Build a [Query] from a string slice.
115    ///
116    /// This function can return an error if an unknown index is referenced.
117    /// On success this function returns a fully owned Query and a vector of string
118    /// slices into the input query, that contains all parts of the query, that are not
119    /// following the query syntax. They are called "Freetext". They will never contain
120    /// whitespace.
121    ///
122    /// # Basic Syntax
123    /// The following text is an example for a query:
124    /// ```text
125    /// +attr1:foo,bar +attr2:=baz +attr3:<42,>84 -attr4:69-121
126    /// ```
127    /// As a boolean expression it will mean something like this:
128    /// ```text
129    ///    (attr1==foo || attr1==bar)
130    /// && (attr2==baz)
131    /// && (attr3 <= 42 || attr3 >= 84)
132    /// && !(69 <= attr4 <= 121)
133    /// ```
134    ///
135    /// # In-depth Syntax description
136    /// A string query consists of multiple whitespace seperated attribute selectors.
137    /// Each of them starts with a `+` or `-` sign, indicating if the rows matching this
138    /// selector should be included or excluded from the result. This is followed by the
139    /// name of the attribute/index and a single `:` char. Next comes a list of comma seperated
140    /// values that describe the basic queries that are used to select rows. There are
141    /// special operator symbols that can change the meaning of a value if the index
142    /// supports the matching query type. For example, if the Index supports Maximum queries,
143    /// the following value will return a Maximum query instead of an Exact query: `<123`.
144    ///
145    /// The following operator symbols are currently used **if the index supports it**:
146    /// - `>val` - forces a Minimum query
147    /// - `<val` - forces a Maximum query
148    /// - `=val` - forces a Exact query
149    /// - `minval-maxval` - forces a InRange query
150    ///
151    /// If no operator symbol is found, a Prefix query will be used if it is supported by the index.
152    /// Otherwise a Exact query is used, even if the index may not support it (all official indices
153    /// currently implement them).
154    ///
155    /// All non-whitespace substrings in the query, that are not valid attribute selectors are
156    /// considered "Freetext". All of these are returned on success and can be used or
157    /// ignored by the caller. For example they can be used to filter the results further.
158    ///
159    /// # Limits
160    /// - OutRange queries don't have an operator symbol and are currently not supported.
161    ///   But it is possible to build a functionally equivalent query if the index supports
162    ///   Minimum and Maximum queries: `+attr:<10,>20`
163    /// - InRange does not support negative values because only one `-` char is allowed.
164    /// - There is no way to force a Prefix query. It will be automatically used if no
165    ///   operator symbol is found and the index supports them.
166    ///
167    /// # Example
168    /// ```rust
169    /// use attribute_search_engine::{SearchEngine, SearchIndexHashMap, Query};
170    ///
171    /// let mut index = SearchIndexHashMap::<_, String>::new();
172    /// // Fill index here...
173    ///
174    /// let mut engine = SearchEngine::<usize>::new();
175    /// engine.add_index("attribute", index);
176    /// let (q, freetext) = engine.query_from_str("+attribute:foo bar").expect("no error");
177    /// assert_eq!(q, Query::And(vec![Query::Exact("attribute".into(), "foo".into())]));
178    /// assert_eq!(freetext, vec!["bar"]);
179    /// ```
180    pub fn query_from_str<'a>(&self, query_str: &'a str) -> Result<(Query, Vec<&'a str>)> {
181        let mut include = vec![];
182        let mut exclude = vec![];
183        let mut freetexts = vec![];
184
185        let lexer = QueryLexer::new(query_str);
186        for subquery in lexer {
187            match subquery {
188                QueryToken::Attribute(is_include, attribute, values) => {
189                    let index = self
190                        .indices
191                        .get(attribute)
192                        .ok_or(SearchEngineError::UnknownAttribute)?;
193                    let supported = index.supported_queries();
194
195                    let mut qs: Vec<_> = values
196                        .iter()
197                        .map(|&v| {
198                            let attr = attribute.to_owned();
199                            if (supported & SUPPORTS_MINIMUM) != 0 && v.starts_with('>') {
200                                return Query::Minimum(attr, v[1..].to_owned());
201                            }
202                            if (supported & SUPPORTS_MAXIMUM) != 0 && v.starts_with('<') {
203                                return Query::Maximum(attr, v[1..].to_owned());
204                            }
205                            if (supported & SUPPORTS_EXACT) != 0 && v.starts_with('=') {
206                                return Query::Exact(attr, v[1..].to_owned());
207                            }
208                            if (supported & SUPPORTS_INRANGE) != 0 && v.contains('-') {
209                                let parts = v.split('-').collect::<Vec<_>>();
210                                if parts.len() == 2 {
211                                    return Query::InRange(
212                                        attr,
213                                        parts[0].to_owned(),
214                                        parts[1].to_owned(),
215                                    );
216                                }
217                            }
218
219                            // Fallback, if nothing is found we use prefix if we can
220                            // and exact otherwise.
221                            if (supported & SUPPORTS_PREFIX) != 0 {
222                                return Query::Prefix(attr, v.to_owned());
223                            }
224                            Query::Exact(attr, v.to_owned())
225                        })
226                        .collect();
227                    let q = match qs.len().cmp(&1) {
228                        Ordering::Equal => qs.swap_remove(0),
229                        Ordering::Greater => Query::Or(qs),
230                        Ordering::Less => continue,
231                    };
232                    if is_include {
233                        include.push(q);
234                    } else {
235                        exclude.push(q);
236                    }
237                }
238                QueryToken::Freetext(text) => {
239                    freetexts.push(text);
240                }
241            }
242        }
243
244        let base_query = Query::And(include);
245        if !exclude.is_empty() {
246            Ok((Query::Exclude(base_query.into(), exclude), freetexts))
247        } else {
248            Ok((base_query, freetexts))
249        }
250    }
251}
252
253#[cfg(test)]
254mod tests {
255    use super::*;
256
257    struct DummyIndex {
258        fixed_values: HashSet<usize>,
259        supported_queries: SupportedQueries,
260    }
261
262    impl DummyIndex {
263        fn new(vals: Vec<usize>) -> Self {
264            Self {
265                fixed_values: HashSet::from_iter(vals),
266                supported_queries: SUPPORTS_EXACT,
267            }
268        }
269
270        fn supports(sup: SupportedQueries) -> Self {
271            Self {
272                fixed_values: HashSet::new(),
273                supported_queries: sup,
274            }
275        }
276    }
277
278    impl SearchIndex<usize> for DummyIndex {
279        fn search(&self, _query: &Query) -> Result<HashSet<usize>> {
280            Ok(self.fixed_values.clone())
281        }
282
283        fn supported_queries(&self) -> SupportedQueries {
284            self.supported_queries
285        }
286    }
287
288    #[test]
289    fn search_or() {
290        let mut engine = SearchEngine::<usize>::new();
291        engine.add_index("a", DummyIndex::new(vec![1, 2]));
292        engine.add_index("b", DummyIndex::new(vec![3, 4]));
293        engine.add_index("c", DummyIndex::new(vec![2, 5, 6]));
294        let result = engine.search(&Query::Or(vec![
295            Query::Exact("a".into(), "DUMMY".into()),
296            Query::Exact("c".into(), "DUMMY".into()),
297        ]));
298        assert_eq!(result, Ok(HashSet::from_iter(vec![1, 2, 5, 6])));
299    }
300
301    #[test]
302    fn search_and() {
303        let mut engine = SearchEngine::<usize>::new();
304        engine.add_index("a", DummyIndex::new(vec![1, 2]));
305        engine.add_index("b", DummyIndex::new(vec![3, 4]));
306        engine.add_index("c", DummyIndex::new(vec![2, 5, 6]));
307        let result = engine.search(&Query::And(vec![
308            Query::Exact("a".into(), "DUMMY".into()),
309            Query::Exact("c".into(), "DUMMY".into()),
310        ]));
311        assert_eq!(result, Ok(HashSet::from_iter(vec![2])));
312    }
313
314    #[test]
315    fn search_exclude() {
316        let mut engine = SearchEngine::<usize>::new();
317        engine.add_index("a", DummyIndex::new(vec![1, 2]));
318        engine.add_index("b", DummyIndex::new(vec![3, 4]));
319        engine.add_index("c", DummyIndex::new(vec![2, 5, 6]));
320        let result = engine.search(&Query::Exclude(
321            Box::new(Query::Exact("c".into(), "DUMMY".into())),
322            vec![Query::Exact("a".into(), "DUMMY".into())],
323        ));
324        assert_eq!(result, Ok(HashSet::from_iter(vec![5, 6])));
325    }
326
327    fn create_parser_engine() -> SearchEngine<usize> {
328        let mut engine = SearchEngine::<usize>::new();
329        engine.add_index(
330            "zipcode",
331            DummyIndex::supports(
332                SUPPORTS_EXACT | SUPPORTS_MINIMUM | SUPPORTS_MAXIMUM | SUPPORTS_INRANGE,
333            ),
334        );
335        engine.add_index("pet", DummyIndex::supports(SUPPORTS_EXACT));
336        engine.add_index(
337            "name",
338            DummyIndex::supports(SUPPORTS_EXACT | SUPPORTS_PREFIX),
339        );
340        engine
341    }
342
343    #[test]
344    fn query_parser_empty() {
345        let engine = create_parser_engine();
346        let (q, freetext) = engine.query_from_str("").unwrap();
347        assert_eq!(q, Query::And(vec![]));
348        assert_eq!(freetext, vec![] as Vec<&str>);
349    }
350
351    #[test]
352    fn query_parser_basic() {
353        let engine = create_parser_engine();
354        let (q, freetext) = engine
355            .query_from_str("+zipcode:12345 +pet:Dog -name:Hans freetext")
356            .unwrap();
357        assert_eq!(
358            q,
359            Query::Exclude(
360                Box::new(Query::And(vec![
361                    Query::Exact("zipcode".into(), "12345".into()),
362                    Query::Exact("pet".into(), "Dog".into())
363                ])),
364                vec![Query::Prefix("name".into(), "Hans".into())]
365            )
366        );
367        assert_eq!(freetext, vec!["freetext"]);
368    }
369
370    #[test]
371    fn query_parser_modificators() {
372        let engine = create_parser_engine();
373        let (q, freetext) = engine
374            .query_from_str(
375                "abc +zipcode:>12345 +zipcode:<99999 +zipcode:50000-60000 +name:=Hans def",
376            )
377            .unwrap();
378        assert_eq!(
379            q,
380            Query::And(vec![
381                Query::Minimum("zipcode".into(), "12345".into()),
382                Query::Maximum("zipcode".into(), "99999".into()),
383                Query::InRange("zipcode".into(), "50000".into(), "60000".into()),
384                Query::Exact("name".into(), "Hans".into()),
385            ])
386        );
387        assert_eq!(freetext, vec!["abc", "def"]);
388    }
389
390    #[test]
391    fn query_parser_alternatives() {
392        let engine = create_parser_engine();
393        let (q, freetext) = engine
394            .query_from_str(
395                "start +pet:Cat,Dog +name:Alex,=Hans middle +zipcode:=10000,<500,50000-60000 end",
396            )
397            .unwrap();
398        assert_eq!(
399            q,
400            Query::And(vec![
401                Query::Or(vec![
402                    Query::Exact("pet".into(), "Cat".into()),
403                    Query::Exact("pet".into(), "Dog".into()),
404                ]),
405                Query::Or(vec![
406                    Query::Prefix("name".into(), "Alex".into()),
407                    Query::Exact("name".into(), "Hans".into()),
408                ]),
409                Query::Or(vec![
410                    Query::Exact("zipcode".into(), "10000".into()),
411                    Query::Maximum("zipcode".into(), "500".into()),
412                    Query::InRange("zipcode".into(), "50000".into(), "60000".into()),
413                ]),
414            ])
415        );
416        assert_eq!(freetext, vec!["start", "middle", "end"]);
417    }
418}