Skip to main content

lance_index/scalar/inverted/
query.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright The Lance Authors
3
4use crate::scalar::inverted::document_tokenizer::DocType;
5use crate::scalar::inverted::tokenizer::document_tokenizer::LanceTokenizer;
6use lance_core::{Error, Result};
7use serde::ser::SerializeMap;
8use serde::{Deserialize, Serialize};
9use std::collections::{HashMap, HashSet};
10
11#[derive(Debug, Clone, Serialize, Deserialize)]
12pub struct FtsSearchParams {
13    pub limit: Option<usize>,
14    pub wand_factor: f32,
15    pub fuzziness: Option<u32>,
16    pub max_expansions: usize,
17    // None means not a phrase query
18    // Some(n) means a phrase query with slop n
19    pub phrase_slop: Option<u32>,
20    /// The number of beginning characters being unchanged for fuzzy matching.
21    pub prefix_length: u32,
22}
23
24impl FtsSearchParams {
25    pub fn new() -> Self {
26        Self {
27            limit: None,
28            wand_factor: 1.0,
29            fuzziness: Some(0),
30            max_expansions: 50,
31            phrase_slop: None,
32            prefix_length: 0,
33        }
34    }
35
36    pub fn with_limit(mut self, limit: Option<usize>) -> Self {
37        self.limit = limit;
38        self
39    }
40
41    pub fn with_wand_factor(mut self, factor: f32) -> Self {
42        self.wand_factor = factor;
43        self
44    }
45
46    pub fn with_fuzziness(mut self, fuzziness: Option<u32>) -> Self {
47        self.fuzziness = fuzziness;
48        self
49    }
50
51    pub fn with_max_expansions(mut self, max_expansions: usize) -> Self {
52        self.max_expansions = max_expansions;
53        self
54    }
55
56    pub fn with_phrase_slop(mut self, phrase_slop: Option<u32>) -> Self {
57        self.phrase_slop = phrase_slop;
58        self
59    }
60
61    pub fn with_prefix_length(mut self, prefix_length: u32) -> Self {
62        self.prefix_length = prefix_length;
63        self
64    }
65}
66
67impl Default for FtsSearchParams {
68    fn default() -> Self {
69        Self::new()
70    }
71}
72
73#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize, Default)]
74pub enum Operator {
75    And,
76    #[default]
77    Or,
78}
79
80impl TryFrom<&str> for Operator {
81    type Error = Error;
82    fn try_from(value: &str) -> Result<Self> {
83        match value.to_ascii_uppercase().as_str() {
84            "AND" => Ok(Self::And),
85            "OR" => Ok(Self::Or),
86            _ => Err(Error::invalid_input(format!("Invalid operator: {}", value))),
87        }
88    }
89}
90
91impl From<Operator> for &'static str {
92    fn from(operator: Operator) -> Self {
93        match operator {
94            Operator::And => "AND",
95            Operator::Or => "OR",
96        }
97    }
98}
99
100pub trait FtsQueryNode {
101    fn columns(&self) -> HashSet<String>;
102}
103
104#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
105#[serde(rename_all = "snake_case")]
106pub enum FtsQuery {
107    // leaf queries
108    Match(MatchQuery),
109    Phrase(PhraseQuery),
110
111    // compound queries
112    Boost(BoostQuery),
113    MultiMatch(MultiMatchQuery),
114    Boolean(BooleanQuery),
115}
116
117impl std::fmt::Display for FtsQuery {
118    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
119        match self {
120            Self::Match(query) => write!(f, "Match({:?})", query),
121            Self::Phrase(query) => write!(f, "Phrase({:?})", query),
122            Self::Boost(query) => write!(
123                f,
124                "Boosting(positive={}, negative={}, negative_boost={})",
125                query.positive, query.negative, query.negative_boost
126            ),
127            Self::MultiMatch(query) => write!(f, "MultiMatch({:?})", query),
128            Self::Boolean(query) => {
129                write!(
130                    f,
131                    "Boolean(must={:?}, should={:?})",
132                    query.must, query.should
133                )
134            }
135        }
136    }
137}
138
139impl FtsQueryNode for FtsQuery {
140    fn columns(&self) -> HashSet<String> {
141        match self {
142            Self::Match(query) => query.columns(),
143            Self::Phrase(query) => query.columns(),
144            Self::Boost(query) => {
145                let mut columns = query.positive.columns();
146                columns.extend(query.negative.columns());
147                columns
148            }
149            Self::MultiMatch(query) => {
150                let mut columns = HashSet::new();
151                for match_query in &query.match_queries {
152                    columns.extend(match_query.columns());
153                }
154                columns
155            }
156            Self::Boolean(query) => {
157                let mut columns = HashSet::new();
158                for query in &query.must {
159                    columns.extend(query.columns());
160                }
161                for query in &query.should {
162                    columns.extend(query.columns());
163                }
164                columns
165            }
166        }
167    }
168}
169
170impl FtsQuery {
171    pub fn query(&self) -> String {
172        match self {
173            Self::Match(query) => query.terms.clone(),
174            Self::Phrase(query) => format!("\"{}\"", query.terms), // Phrase queries are quoted
175            Self::Boost(query) => query.positive.query(),
176            Self::MultiMatch(query) => query.match_queries[0].terms.clone(),
177            Self::Boolean(_) => {
178                // Bool queries don't have a single query string, they are composed of multiple queries
179                String::new()
180            }
181        }
182    }
183
184    pub fn is_missing_column(&self) -> bool {
185        match self {
186            Self::Match(query) => query.column.is_none(),
187            Self::Phrase(query) => query.column.is_none(),
188            Self::Boost(query) => {
189                query.positive.is_missing_column() || query.negative.is_missing_column()
190            }
191            Self::MultiMatch(query) => query.match_queries.iter().any(|q| q.column.is_none()),
192            Self::Boolean(query) => {
193                query.must.iter().any(|q| q.is_missing_column())
194                    || query.should.iter().any(|q| q.is_missing_column())
195            }
196        }
197    }
198
199    pub fn with_column(self, column: String) -> Self {
200        match self {
201            Self::Match(query) => Self::Match(query.with_column(Some(column))),
202            Self::Phrase(query) => Self::Phrase(query.with_column(Some(column))),
203            Self::Boost(query) => {
204                let positive = query.positive.with_column(column.clone());
205                let negative = query.negative.with_column(column);
206                Self::Boost(BoostQuery {
207                    positive: Box::new(positive),
208                    negative: Box::new(negative),
209                    negative_boost: query.negative_boost,
210                })
211            }
212            Self::MultiMatch(query) => {
213                let match_queries = query
214                    .match_queries
215                    .into_iter()
216                    .map(|q| q.with_column(Some(column.clone())))
217                    .collect();
218                Self::MultiMatch(MultiMatchQuery { match_queries })
219            }
220            Self::Boolean(query) => {
221                let must = query
222                    .must
223                    .into_iter()
224                    .map(|q| q.with_column(column.clone()))
225                    .collect();
226                let should = query
227                    .should
228                    .into_iter()
229                    .map(|q| q.with_column(column.clone()))
230                    .collect();
231                let must_not = query
232                    .must_not
233                    .into_iter()
234                    .map(|q| q.with_column(column.clone()))
235                    .collect();
236                Self::Boolean(BooleanQuery {
237                    must,
238                    should,
239                    must_not,
240                })
241            }
242        }
243    }
244}
245
246impl From<MatchQuery> for FtsQuery {
247    fn from(query: MatchQuery) -> Self {
248        Self::Match(query)
249    }
250}
251
252impl From<PhraseQuery> for FtsQuery {
253    fn from(query: PhraseQuery) -> Self {
254        Self::Phrase(query)
255    }
256}
257
258impl From<BoostQuery> for FtsQuery {
259    fn from(query: BoostQuery) -> Self {
260        Self::Boost(query)
261    }
262}
263
264impl From<MultiMatchQuery> for FtsQuery {
265    fn from(query: MultiMatchQuery) -> Self {
266        Self::MultiMatch(query)
267    }
268}
269
270impl From<BooleanQuery> for FtsQuery {
271    fn from(query: BooleanQuery) -> Self {
272        Self::Boolean(query)
273    }
274}
275
276#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
277pub struct MatchQuery {
278    // The column to search in.
279    // If None, it will be determined at query time.
280    pub column: Option<String>,
281    pub terms: String,
282
283    // literal default is not supported so we set it by function
284    #[serde(default = "MatchQuery::default_boost")]
285    pub boost: f32,
286
287    // The max edit distance for fuzzy matching.
288    // If Some(0), it will be exact match.
289    // If None, it will be determined automatically by the rules:
290    // - 0 for terms with length <= 2
291    // - 1 for terms with length <= 5
292    // - 2 for terms with length > 5
293    pub fuzziness: Option<u32>,
294
295    /// The maximum number of terms to expand for fuzzy matching.
296    /// Default to 50.
297    #[serde(default = "MatchQuery::default_max_expansions")]
298    pub max_expansions: usize,
299
300    /// The operator to use for combining terms.
301    /// This can be either `And` or `Or`, it's 'Or' by default.
302    /// - `And`: All terms must match.
303    /// - `Or`: At least one term must match.
304    #[serde(default)]
305    pub operator: Operator,
306
307    /// The number of beginning characters being unchanged for fuzzy matching.
308    /// Default to 0.
309    #[serde(default)]
310    pub prefix_length: u32,
311}
312
313impl MatchQuery {
314    pub fn new(terms: String) -> Self {
315        Self {
316            column: None,
317            terms,
318            boost: 1.0,
319            fuzziness: Some(0),
320            max_expansions: 50,
321            operator: Operator::Or,
322            prefix_length: 0,
323        }
324    }
325
326    pub(crate) fn default_boost() -> f32 {
327        1.0
328    }
329
330    pub(crate) fn default_max_expansions() -> usize {
331        50
332    }
333
334    pub fn with_column(mut self, column: Option<String>) -> Self {
335        self.column = column;
336        self
337    }
338
339    pub fn with_boost(mut self, boost: f32) -> Self {
340        self.boost = boost;
341        self
342    }
343
344    pub fn with_fuzziness(mut self, fuzziness: Option<u32>) -> Self {
345        self.fuzziness = fuzziness;
346        self
347    }
348
349    pub fn with_max_expansions(mut self, max_expansions: usize) -> Self {
350        self.max_expansions = max_expansions;
351        self
352    }
353
354    pub fn with_operator(mut self, operator: Operator) -> Self {
355        self.operator = operator;
356        self
357    }
358
359    pub fn with_prefix_length(mut self, prefix_length: u32) -> Self {
360        self.prefix_length = prefix_length;
361        self
362    }
363
364    pub fn auto_fuzziness(token: &str) -> u32 {
365        match token.len() {
366            0..=2 => 0,
367            3..=5 => 1,
368            _ => 2,
369        }
370    }
371}
372
373impl FtsQueryNode for MatchQuery {
374    fn columns(&self) -> HashSet<String> {
375        let mut columns = HashSet::new();
376        if let Some(column) = &self.column {
377            columns.insert(column.clone());
378        }
379        columns
380    }
381}
382
383#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
384pub struct PhraseQuery {
385    // The column to search in.
386    // If None, it will be determined at query time.
387    pub column: Option<String>,
388    pub terms: String,
389    #[serde(default = "u32::default")]
390    pub slop: u32,
391}
392
393impl PhraseQuery {
394    pub fn new(terms: String) -> Self {
395        Self {
396            column: None,
397            terms,
398            slop: 0,
399        }
400    }
401
402    pub fn with_column(mut self, column: Option<String>) -> Self {
403        self.column = column;
404        self
405    }
406
407    pub fn with_slop(mut self, slop: u32) -> Self {
408        self.slop = slop;
409        self
410    }
411}
412
413impl FtsQueryNode for PhraseQuery {
414    fn columns(&self) -> HashSet<String> {
415        let mut columns = HashSet::new();
416        if let Some(column) = &self.column {
417            columns.insert(column.clone());
418        }
419        columns
420    }
421}
422
423#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
424pub struct BoostQuery {
425    pub positive: Box<FtsQuery>,
426    pub negative: Box<FtsQuery>,
427    #[serde(default = "BoostQuery::default_negative_boost")]
428    pub negative_boost: f32,
429}
430
431impl BoostQuery {
432    pub fn new(positive: FtsQuery, negative: FtsQuery, negative_boost: Option<f32>) -> Self {
433        Self {
434            positive: Box::new(positive),
435            negative: Box::new(negative),
436            negative_boost: negative_boost.unwrap_or(0.5),
437        }
438    }
439
440    fn default_negative_boost() -> f32 {
441        0.5
442    }
443}
444
445impl FtsQueryNode for BoostQuery {
446    fn columns(&self) -> HashSet<String> {
447        let mut columns = self.positive.columns();
448        columns.extend(self.negative.columns());
449        columns
450    }
451}
452
453#[derive(Debug, Clone, PartialEq)]
454pub struct MultiMatchQuery {
455    // each query must be a match query with specified column
456    pub match_queries: Vec<MatchQuery>,
457}
458
459impl Serialize for MultiMatchQuery {
460    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
461    where
462        S: serde::Serializer,
463    {
464        let mut map = serializer.serialize_map(Some(3))?;
465
466        let query = self.match_queries.first().ok_or(serde::ser::Error::custom(
467            "MultiMatchQuery must have at least one MatchQuery".to_string(),
468        ))?;
469        map.serialize_entry("query", &query.terms)?;
470        let columns = self
471            .match_queries
472            .iter()
473            .map(|q| q.column.as_ref().unwrap().clone())
474            .collect::<Vec<String>>();
475        map.serialize_entry("columns", &columns)?;
476        let boosts = self
477            .match_queries
478            .iter()
479            .map(|q| q.boost)
480            .collect::<Vec<f32>>();
481        map.serialize_entry("boost", &boosts)?;
482        map.end()
483    }
484}
485
486impl<'de> Deserialize<'de> for MultiMatchQuery {
487    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
488    where
489        D: serde::Deserializer<'de>,
490    {
491        #[derive(Deserialize)]
492        struct MultiMatchQueryData {
493            query: String,
494            columns: Vec<String>,
495            boost: Option<Vec<f32>>,
496        }
497
498        let data = MultiMatchQueryData::deserialize(deserializer)?;
499        let boosts = data.boost.unwrap_or(vec![1.0; data.columns.len()]);
500
501        Self::try_new(data.query, data.columns)
502            .map_err(serde::de::Error::custom)?
503            .try_with_boosts(boosts)
504            .map_err(serde::de::Error::custom)
505    }
506}
507
508impl MultiMatchQuery {
509    pub fn try_new(query: String, columns: Vec<String>) -> Result<Self> {
510        if columns.is_empty() {
511            return Err(Error::invalid_input(
512                "Cannot create MultiMatchQuery with no columns".to_string(),
513            ));
514        }
515
516        let match_queries = columns
517            .into_iter()
518            .map(|column| MatchQuery::new(query.clone()).with_column(Some(column)))
519            .collect();
520        Ok(Self { match_queries })
521    }
522
523    pub fn try_with_boosts(mut self, boosts: Vec<f32>) -> Result<Self> {
524        if boosts.len() != self.match_queries.len() {
525            return Err(Error::invalid_input(
526                "The number of boosts must match the number of queries".to_string(),
527            ));
528        }
529
530        for (query, boost) in self.match_queries.iter_mut().zip(boosts) {
531            query.boost = boost;
532        }
533        Ok(self)
534    }
535
536    pub fn with_operator(mut self, operator: Operator) -> Self {
537        for query in &mut self.match_queries {
538            query.operator = operator;
539        }
540        self
541    }
542}
543
544impl FtsQueryNode for MultiMatchQuery {
545    fn columns(&self) -> HashSet<String> {
546        let mut columns = HashSet::with_capacity(self.match_queries.len());
547        for query in &self.match_queries {
548            columns.extend(query.columns());
549        }
550        columns
551    }
552}
553
554pub enum Occur {
555    Should,
556    Must,
557    MustNot,
558}
559
560impl TryFrom<&str> for Occur {
561    type Error = Error;
562    fn try_from(value: &str) -> Result<Self> {
563        match value.to_ascii_uppercase().as_str() {
564            "SHOULD" => Ok(Self::Should),
565            "MUST" => Ok(Self::Must),
566            "MUST_NOT" => Ok(Self::MustNot),
567            _ => Err(Error::invalid_input(format!(
568                "Invalid occur value: {}",
569                value
570            ))),
571        }
572    }
573}
574
575impl From<Occur> for &'static str {
576    fn from(occur: Occur) -> Self {
577        match occur {
578            Occur::Should => "SHOULD",
579            Occur::Must => "MUST",
580            Occur::MustNot => "MUST_NOT",
581        }
582    }
583}
584
585#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
586pub struct BooleanQuery {
587    pub should: Vec<FtsQuery>,
588    pub must: Vec<FtsQuery>,
589    pub must_not: Vec<FtsQuery>,
590}
591
592impl BooleanQuery {
593    pub fn new(iter: impl IntoIterator<Item = (Occur, FtsQuery)>) -> Self {
594        let mut should = Vec::new();
595        let mut must = Vec::new();
596        let mut must_not = Vec::new();
597        for (occur, query) in iter {
598            match occur {
599                Occur::Should => should.push(query),
600                Occur::Must => must.push(query),
601                Occur::MustNot => must_not.push(query),
602            }
603        }
604        Self {
605            should,
606            must,
607            must_not,
608        }
609    }
610
611    pub fn with_should(mut self, query: FtsQuery) -> Self {
612        self.should.push(query);
613        self
614    }
615
616    pub fn with_must(mut self, query: FtsQuery) -> Self {
617        self.must.push(query);
618        self
619    }
620
621    pub fn with_must_not(mut self, query: FtsQuery) -> Self {
622        self.must_not.push(query);
623        self
624    }
625}
626
627#[derive(Debug, Clone, PartialEq)]
628#[cfg(test)]
629pub(crate) struct BooleanMatchPlan {
630    pub column: String,
631    pub should: Vec<MatchQuery>,
632    pub must: Vec<MatchQuery>,
633    pub must_not: Vec<MatchQuery>,
634}
635
636#[cfg(test)]
637impl BooleanMatchPlan {
638    pub(crate) fn try_build(query: &FtsQuery) -> Option<Self> {
639        match query {
640            FtsQuery::Match(match_query) => {
641                let mut column = None;
642                let mut should = Vec::new();
643                Self::push_match(&mut should, &mut column, match_query)?;
644                Some(Self {
645                    column: column?,
646                    should,
647                    must: Vec::new(),
648                    must_not: Vec::new(),
649                })
650            }
651            FtsQuery::Boolean(bool_query) => {
652                let mut column = None;
653                let should = Self::collect_matches(&bool_query.should, &mut column)?;
654                let must = Self::collect_matches(&bool_query.must, &mut column)?;
655                let must_not = Self::collect_matches(&bool_query.must_not, &mut column)?;
656
657                if should.is_empty() && must.is_empty() {
658                    return None;
659                }
660                Some(Self {
661                    column: column?,
662                    should,
663                    must,
664                    must_not,
665                })
666            }
667            _ => None,
668        }
669    }
670
671    fn push_match(
672        dest: &mut Vec<MatchQuery>,
673        column: &mut Option<String>,
674        query: &MatchQuery,
675    ) -> Option<()> {
676        let query_column = query.column.as_ref()?;
677        if let Some(existing) = column.as_ref() {
678            if existing != query_column {
679                return None;
680            }
681        } else {
682            *column = Some(query_column.clone());
683        }
684        dest.push(query.clone());
685        Some(())
686    }
687
688    fn collect_matches(
689        queries: &[FtsQuery],
690        column: &mut Option<String>,
691    ) -> Option<Vec<MatchQuery>> {
692        let mut matches = Vec::with_capacity(queries.len());
693        for query in queries {
694            let FtsQuery::Match(match_query) = query else {
695                return None;
696            };
697            Self::push_match(&mut matches, column, match_query)?;
698        }
699        Some(matches)
700    }
701}
702
703impl FtsQueryNode for BooleanQuery {
704    fn columns(&self) -> HashSet<String> {
705        let mut columns = HashSet::new();
706        for query in &self.should {
707            columns.extend(query.columns());
708        }
709        for query in &self.must {
710            columns.extend(query.columns());
711        }
712        for query in &self.must_not {
713            columns.extend(query.columns());
714        }
715        columns
716    }
717}
718
719#[derive(Clone)]
720pub struct Tokens {
721    tokens: Vec<String>,
722    positions: Vec<u32>,
723    tokens_map: HashMap<String, usize>,
724    token_type: DocType,
725}
726
727impl Tokens {
728    pub fn new(tokens: Vec<String>, token_type: DocType) -> Self {
729        let positions = (0..tokens.len() as u32).collect();
730        Self::with_positions(tokens, positions, token_type)
731    }
732
733    pub fn with_positions(tokens: Vec<String>, positions: Vec<u32>, token_type: DocType) -> Self {
734        debug_assert_eq!(tokens.len(), positions.len());
735        let mut tokens_vec = vec![];
736        let mut tokens_map = HashMap::new();
737        for (idx, token) in tokens.into_iter().enumerate() {
738            tokens_vec.push(token.clone());
739            tokens_map.insert(token, idx);
740        }
741
742        Self {
743            tokens: tokens_vec,
744            positions,
745            tokens_map,
746            token_type,
747        }
748    }
749
750    pub fn len(&self) -> usize {
751        self.tokens.len()
752    }
753
754    pub fn is_empty(&self) -> bool {
755        self.tokens.is_empty()
756    }
757
758    pub fn token_type(&self) -> &DocType {
759        &self.token_type
760    }
761
762    pub fn contains(&self, token: &str) -> bool {
763        self.tokens_map.contains_key(token)
764    }
765
766    pub fn token_index(&self, token: &str) -> Option<usize> {
767        self.tokens_map.get(token).copied()
768    }
769
770    pub fn get_token(&self, index: usize) -> &str {
771        &self.tokens[index]
772    }
773
774    pub fn position(&self, index: usize) -> u32 {
775        self.positions[index]
776    }
777}
778
779impl IntoIterator for Tokens {
780    type Item = String;
781    type IntoIter = std::vec::IntoIter<String>;
782
783    fn into_iter(self) -> Self::IntoIter {
784        self.tokens.into_iter()
785    }
786}
787
788impl<'a> IntoIterator for &'a Tokens {
789    type Item = &'a String;
790    type IntoIter = std::slice::Iter<'a, String>;
791
792    fn into_iter(self) -> Self::IntoIter {
793        self.tokens.iter()
794    }
795}
796
797pub fn collect_query_tokens(text: &str, tokenizer: &mut Box<dyn LanceTokenizer>) -> Tokens {
798    let token_type = tokenizer.doc_type();
799    let mut stream = tokenizer.token_stream_for_search(text);
800    let mut tokens = Vec::new();
801    let mut positions = Vec::new();
802    while let Some(token) = stream.next() {
803        tokens.push(token.text.clone());
804        positions.push(token.position as u32);
805    }
806    Tokens::with_positions(tokens, positions, token_type)
807}
808
809pub fn has_query_token(
810    text: &str,
811    tokenizer: &mut Box<dyn LanceTokenizer>,
812    query_tokens: &Tokens,
813) -> bool {
814    let mut stream = tokenizer.token_stream_for_doc(text);
815    while let Some(token) = stream.next() {
816        if query_tokens.contains(&token.text) {
817            return true;
818        }
819    }
820    false
821}
822
823pub fn fill_fts_query_column(
824    query: &FtsQuery,
825    columns: &[String],
826    replace: bool,
827) -> Result<FtsQuery> {
828    if !query.is_missing_column() && !replace {
829        return Ok(query.clone());
830    }
831    match query {
832        FtsQuery::Match(match_query) => {
833            match columns.len() {
834                0 => {
835                    Err(Error::invalid_input("Cannot perform full text search unless an INVERTED index has been created on at least one column".to_string()))
836                }
837                1 => {
838                    let column = columns[0].clone();
839                    let query = match_query.clone().with_column(Some(column));
840                    Ok(FtsQuery::Match(query))
841                }
842                _ => {
843                    // if there are multiple columns, we need to create a MultiMatch query
844                    let multi_match_query =
845                        MultiMatchQuery::try_new(match_query.terms.clone(), columns.to_vec())?;
846                    Ok(FtsQuery::MultiMatch(multi_match_query))
847                }
848            }
849        }
850        FtsQuery::Phrase(phrase_query) => {
851            match columns.len() {
852                0 => {
853                    Err(Error::invalid_input("Cannot perform full text search unless an INVERTED index has been created on at least one column".to_string()))
854                }
855                1 => {
856                    let column = columns[0].clone();
857                    let query = phrase_query.clone().with_column(Some(column));
858                    Ok(FtsQuery::Phrase(query))
859                }
860                _ => {
861                    Err(Error::invalid_input("the column must be specified in the query".to_string()))
862                }
863            }
864        }
865       FtsQuery::Boost(boost_query) => {
866            let positive = fill_fts_query_column(&boost_query.positive, columns, replace)?;
867            let negative = fill_fts_query_column(&boost_query.negative, columns, replace)?;
868            Ok(FtsQuery::Boost(BoostQuery {
869                positive: Box::new(positive),
870                negative: Box::new(negative),
871                negative_boost: boost_query.negative_boost,
872            }))
873        }
874        FtsQuery::MultiMatch(multi_match_query) => {
875            let match_queries = multi_match_query
876                .match_queries
877                .iter()
878                .map(|query| fill_fts_query_column(&FtsQuery::Match(query.clone()), columns, replace))
879                .map(|result| {
880                    result.map(|query| {
881                        if let FtsQuery::Match(match_query) = query {
882                            match_query
883                        } else {
884                            unreachable!("Expected MatchQuery")
885                        }
886                    })
887                })
888                .collect::<Result<Vec<_>>>()?;
889            Ok(FtsQuery::MultiMatch(MultiMatchQuery { match_queries }))
890       }
891        FtsQuery::Boolean(bool_query) => {
892            let must = bool_query
893                .must
894                .iter()
895                .map(|query| fill_fts_query_column(query, columns, replace))
896                .collect::<Result<Vec<_>>>()?;
897            let should = bool_query
898                .should
899                .iter()
900                .map(|query| fill_fts_query_column(query, columns, replace))
901                .collect::<Result<Vec<_>>>()?;
902            let must_not = bool_query
903                .must_not
904                .iter()
905                .map(|query| fill_fts_query_column(query, columns, replace))
906                .collect::<Result<Vec<_>>>()?;
907            Ok(FtsQuery::Boolean(BooleanQuery { must, should, must_not }))
908        }
909    }
910}
911
912#[cfg(test)]
913mod tests {
914    #[test]
915    fn test_match_query_serde() {
916        use super::*;
917        use serde_json::json;
918
919        let query = MatchQuery::new("hello world".to_string())
920            .with_column(Some("text".to_string()))
921            .with_boost(2.0)
922            .with_fuzziness(Some(1))
923            .with_max_expansions(10)
924            .with_operator(Operator::And);
925
926        let serialized = serde_json::to_value(&query).unwrap();
927        let expected = json!({
928            "column": "text",
929            "terms": "hello world",
930            "boost": 2.0,
931            "fuzziness": 1,
932            "max_expansions": 10,
933            "operator": "And",
934            "prefix_length": 0,
935        });
936        assert_eq!(serialized, expected);
937
938        let expected = json!({
939            "column": "text",
940            "terms": "hello world",
941            "fuzziness": 0,
942        });
943        let query = serde_json::from_str::<MatchQuery>(&expected.to_string()).unwrap();
944        assert_eq!(query.column, Some("text".to_owned()));
945        assert_eq!(query.terms, "hello world");
946        assert_eq!(query.boost, 1.0);
947        assert_eq!(query.fuzziness, Some(0));
948        assert_eq!(query.max_expansions, 50);
949        assert_eq!(query.operator, Operator::Or);
950        assert_eq!(query.prefix_length, 0);
951    }
952
953    #[test]
954    fn test_phrase_query_serde() {
955        use super::*;
956        use serde_json::json;
957
958        let query = json!({
959            "terms": "hello world",
960        });
961        let expected = PhraseQuery::new("hello world".to_string());
962        let query: PhraseQuery = serde_json::from_value(query).unwrap();
963        assert_eq!(query, expected);
964
965        let query = json!({
966            "terms": "hello world",
967            "column": "text",
968            "slop": 2,
969        });
970        let expected = PhraseQuery::new("hello world".to_string())
971            .with_column(Some("text".to_string()))
972            .with_slop(2);
973        let query: PhraseQuery = serde_json::from_value(query).unwrap();
974        assert_eq!(query, expected);
975    }
976
977    #[test]
978    fn test_boolean_match_plan_match_query() {
979        use super::*;
980
981        let query = MatchQuery::new("hello".to_string()).with_column(Some("text".to_string()));
982        let plan = BooleanMatchPlan::try_build(&FtsQuery::Match(query.clone())).unwrap();
983        assert_eq!(plan.column, "text");
984        assert_eq!(plan.should, vec![query]);
985        assert!(plan.must.is_empty());
986        assert!(plan.must_not.is_empty());
987    }
988
989    #[test]
990    fn test_boolean_match_plan_boolean_query() {
991        use super::*;
992
993        let should = MatchQuery::new("a".to_string()).with_column(Some("text".to_string()));
994        let must = MatchQuery::new("b".to_string()).with_column(Some("text".to_string()));
995        let must_not = MatchQuery::new("c".to_string()).with_column(Some("text".to_string()));
996        let query = BooleanQuery::new(vec![
997            (Occur::Should, should.clone().into()),
998            (Occur::Must, must.clone().into()),
999            (Occur::MustNot, must_not.clone().into()),
1000        ]);
1001        let plan = BooleanMatchPlan::try_build(&FtsQuery::Boolean(query)).unwrap();
1002        assert_eq!(plan.column, "text");
1003        assert_eq!(plan.should, vec![should]);
1004        assert_eq!(plan.must, vec![must]);
1005        assert_eq!(plan.must_not, vec![must_not]);
1006    }
1007
1008    #[test]
1009    fn test_boolean_match_plan_rejects_mixed_columns() {
1010        use super::*;
1011
1012        let should = MatchQuery::new("a".to_string()).with_column(Some("text".to_string()));
1013        let must = MatchQuery::new("b".to_string()).with_column(Some("title".to_string()));
1014        let query = BooleanQuery::new(vec![
1015            (Occur::Should, should.into()),
1016            (Occur::Must, must.into()),
1017        ]);
1018        assert!(BooleanMatchPlan::try_build(&FtsQuery::Boolean(query)).is_none());
1019    }
1020
1021    #[test]
1022    fn test_boolean_match_plan_rejects_non_match_queries() {
1023        use super::*;
1024
1025        let phrase =
1026            PhraseQuery::new("hello world".to_string()).with_column(Some("text".to_string()));
1027        let query = BooleanQuery::new(vec![(Occur::Should, phrase.into())]);
1028        assert!(BooleanMatchPlan::try_build(&FtsQuery::Boolean(query)).is_none());
1029    }
1030
1031    #[test]
1032    fn test_boolean_match_plan_rejects_only_must_not() {
1033        use super::*;
1034
1035        let must_not = MatchQuery::new("c".to_string()).with_column(Some("text".to_string()));
1036        let query = BooleanQuery::new(vec![(Occur::MustNot, must_not.into())]);
1037        assert!(BooleanMatchPlan::try_build(&FtsQuery::Boolean(query)).is_none());
1038    }
1039
1040    #[test]
1041    fn test_boolean_match_plan_rejects_missing_column() {
1042        use super::*;
1043
1044        let query = MatchQuery::new("hello".to_string());
1045        assert!(BooleanMatchPlan::try_build(&FtsQuery::Match(query)).is_none());
1046    }
1047}