Skip to main content

lance_index/scalar/inverted/
query.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright The Lance Authors
3
4use crate::scalar::inverted::lance_tokenizer::DocType;
5use crate::scalar::inverted::tokenizer::lance_tokenizer::LanceTokenizer;
6use lance_core::{Error, Result};
7use serde::ser::SerializeMap;
8use serde::{Deserialize, Serialize};
9use snafu::location;
10use std::collections::HashSet;
11
12#[derive(Debug, Clone)]
13pub struct FtsSearchParams {
14    pub limit: Option<usize>,
15    pub wand_factor: f32,
16    pub fuzziness: Option<u32>,
17    pub max_expansions: usize,
18    // None means not a phrase query
19    // Some(n) means a phrase query with slop n
20    pub phrase_slop: Option<u32>,
21    /// The number of beginning characters being unchanged for fuzzy matching.
22    pub prefix_length: u32,
23}
24
25impl FtsSearchParams {
26    pub fn new() -> Self {
27        Self {
28            limit: None,
29            wand_factor: 1.0,
30            fuzziness: Some(0),
31            max_expansions: 50,
32            phrase_slop: None,
33            prefix_length: 0,
34        }
35    }
36
37    pub fn with_limit(mut self, limit: Option<usize>) -> Self {
38        self.limit = limit;
39        self
40    }
41
42    pub fn with_wand_factor(mut self, factor: f32) -> Self {
43        self.wand_factor = factor;
44        self
45    }
46
47    pub fn with_fuzziness(mut self, fuzziness: Option<u32>) -> Self {
48        self.fuzziness = fuzziness;
49        self
50    }
51
52    pub fn with_max_expansions(mut self, max_expansions: usize) -> Self {
53        self.max_expansions = max_expansions;
54        self
55    }
56
57    pub fn with_phrase_slop(mut self, phrase_slop: Option<u32>) -> Self {
58        self.phrase_slop = phrase_slop;
59        self
60    }
61
62    pub fn with_prefix_length(mut self, prefix_length: u32) -> Self {
63        self.prefix_length = prefix_length;
64        self
65    }
66}
67
68impl Default for FtsSearchParams {
69    fn default() -> Self {
70        Self::new()
71    }
72}
73
74#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize, Default)]
75pub enum Operator {
76    And,
77    #[default]
78    Or,
79}
80
81impl TryFrom<&str> for Operator {
82    type Error = Error;
83    fn try_from(value: &str) -> Result<Self> {
84        match value.to_ascii_uppercase().as_str() {
85            "AND" => Ok(Self::And),
86            "OR" => Ok(Self::Or),
87            _ => Err(Error::invalid_input(
88                format!("Invalid operator: {}", value),
89                location!(),
90            )),
91        }
92    }
93}
94
95impl From<Operator> for &'static str {
96    fn from(operator: Operator) -> Self {
97        match operator {
98            Operator::And => "AND",
99            Operator::Or => "OR",
100        }
101    }
102}
103
104pub trait FtsQueryNode {
105    fn columns(&self) -> HashSet<String>;
106}
107
108#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
109#[serde(rename_all = "snake_case")]
110pub enum FtsQuery {
111    // leaf queries
112    Match(MatchQuery),
113    Phrase(PhraseQuery),
114
115    // compound queries
116    Boost(BoostQuery),
117    MultiMatch(MultiMatchQuery),
118    Boolean(BooleanQuery),
119}
120
121impl std::fmt::Display for FtsQuery {
122    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
123        match self {
124            Self::Match(query) => write!(f, "Match({:?})", query),
125            Self::Phrase(query) => write!(f, "Phrase({:?})", query),
126            Self::Boost(query) => write!(
127                f,
128                "Boosting(positive={}, negative={}, negative_boost={})",
129                query.positive, query.negative, query.negative_boost
130            ),
131            Self::MultiMatch(query) => write!(f, "MultiMatch({:?})", query),
132            Self::Boolean(query) => {
133                write!(
134                    f,
135                    "Boolean(must={:?}, should={:?})",
136                    query.must, query.should
137                )
138            }
139        }
140    }
141}
142
143impl FtsQueryNode for FtsQuery {
144    fn columns(&self) -> HashSet<String> {
145        match self {
146            Self::Match(query) => query.columns(),
147            Self::Phrase(query) => query.columns(),
148            Self::Boost(query) => {
149                let mut columns = query.positive.columns();
150                columns.extend(query.negative.columns());
151                columns
152            }
153            Self::MultiMatch(query) => {
154                let mut columns = HashSet::new();
155                for match_query in &query.match_queries {
156                    columns.extend(match_query.columns());
157                }
158                columns
159            }
160            Self::Boolean(query) => {
161                let mut columns = HashSet::new();
162                for query in &query.must {
163                    columns.extend(query.columns());
164                }
165                for query in &query.should {
166                    columns.extend(query.columns());
167                }
168                columns
169            }
170        }
171    }
172}
173
174impl FtsQuery {
175    pub fn query(&self) -> String {
176        match self {
177            Self::Match(query) => query.terms.clone(),
178            Self::Phrase(query) => format!("\"{}\"", query.terms), // Phrase queries are quoted
179            Self::Boost(query) => query.positive.query(),
180            Self::MultiMatch(query) => query.match_queries[0].terms.clone(),
181            Self::Boolean(_) => {
182                // Bool queries don't have a single query string, they are composed of multiple queries
183                String::new()
184            }
185        }
186    }
187
188    pub fn is_missing_column(&self) -> bool {
189        match self {
190            Self::Match(query) => query.column.is_none(),
191            Self::Phrase(query) => query.column.is_none(),
192            Self::Boost(query) => {
193                query.positive.is_missing_column() || query.negative.is_missing_column()
194            }
195            Self::MultiMatch(query) => query.match_queries.iter().any(|q| q.column.is_none()),
196            Self::Boolean(query) => {
197                query.must.iter().any(|q| q.is_missing_column())
198                    || query.should.iter().any(|q| q.is_missing_column())
199            }
200        }
201    }
202
203    pub fn with_column(self, column: String) -> Self {
204        match self {
205            Self::Match(query) => Self::Match(query.with_column(Some(column))),
206            Self::Phrase(query) => Self::Phrase(query.with_column(Some(column))),
207            Self::Boost(query) => {
208                let positive = query.positive.with_column(column.clone());
209                let negative = query.negative.with_column(column);
210                Self::Boost(BoostQuery {
211                    positive: Box::new(positive),
212                    negative: Box::new(negative),
213                    negative_boost: query.negative_boost,
214                })
215            }
216            Self::MultiMatch(query) => {
217                let match_queries = query
218                    .match_queries
219                    .into_iter()
220                    .map(|q| q.with_column(Some(column.clone())))
221                    .collect();
222                Self::MultiMatch(MultiMatchQuery { match_queries })
223            }
224            Self::Boolean(query) => {
225                let must = query
226                    .must
227                    .into_iter()
228                    .map(|q| q.with_column(column.clone()))
229                    .collect();
230                let should = query
231                    .should
232                    .into_iter()
233                    .map(|q| q.with_column(column.clone()))
234                    .collect();
235                let must_not = query
236                    .must_not
237                    .into_iter()
238                    .map(|q| q.with_column(column.clone()))
239                    .collect();
240                Self::Boolean(BooleanQuery {
241                    must,
242                    should,
243                    must_not,
244                })
245            }
246        }
247    }
248}
249
250impl From<MatchQuery> for FtsQuery {
251    fn from(query: MatchQuery) -> Self {
252        Self::Match(query)
253    }
254}
255
256impl From<PhraseQuery> for FtsQuery {
257    fn from(query: PhraseQuery) -> Self {
258        Self::Phrase(query)
259    }
260}
261
262impl From<BoostQuery> for FtsQuery {
263    fn from(query: BoostQuery) -> Self {
264        Self::Boost(query)
265    }
266}
267
268impl From<MultiMatchQuery> for FtsQuery {
269    fn from(query: MultiMatchQuery) -> Self {
270        Self::MultiMatch(query)
271    }
272}
273
274impl From<BooleanQuery> for FtsQuery {
275    fn from(query: BooleanQuery) -> Self {
276        Self::Boolean(query)
277    }
278}
279
280#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
281pub struct MatchQuery {
282    // The column to search in.
283    // If None, it will be determined at query time.
284    pub column: Option<String>,
285    pub terms: String,
286
287    // literal default is not supported so we set it by function
288    #[serde(default = "MatchQuery::default_boost")]
289    pub boost: f32,
290
291    // The max edit distance for fuzzy matching.
292    // If Some(0), it will be exact match.
293    // If None, it will be determined automatically by the rules:
294    // - 0 for terms with length <= 2
295    // - 1 for terms with length <= 5
296    // - 2 for terms with length > 5
297    pub fuzziness: Option<u32>,
298
299    /// The maximum number of terms to expand for fuzzy matching.
300    /// Default to 50.
301    #[serde(default = "MatchQuery::default_max_expansions")]
302    pub max_expansions: usize,
303
304    /// The operator to use for combining terms.
305    /// This can be either `And` or `Or`, it's 'Or' by default.
306    /// - `And`: All terms must match.
307    /// - `Or`: At least one term must match.
308    #[serde(default)]
309    pub operator: Operator,
310
311    /// The number of beginning characters being unchanged for fuzzy matching.
312    /// Default to 0.
313    #[serde(default)]
314    pub prefix_length: u32,
315}
316
317impl MatchQuery {
318    pub fn new(terms: String) -> Self {
319        Self {
320            column: None,
321            terms,
322            boost: 1.0,
323            fuzziness: Some(0),
324            max_expansions: 50,
325            operator: Operator::Or,
326            prefix_length: 0,
327        }
328    }
329
330    pub(crate) fn default_boost() -> f32 {
331        1.0
332    }
333
334    pub(crate) fn default_max_expansions() -> usize {
335        50
336    }
337
338    pub fn with_column(mut self, column: Option<String>) -> Self {
339        self.column = column;
340        self
341    }
342
343    pub fn with_boost(mut self, boost: f32) -> Self {
344        self.boost = boost;
345        self
346    }
347
348    pub fn with_fuzziness(mut self, fuzziness: Option<u32>) -> Self {
349        self.fuzziness = fuzziness;
350        self
351    }
352
353    pub fn with_max_expansions(mut self, max_expansions: usize) -> Self {
354        self.max_expansions = max_expansions;
355        self
356    }
357
358    pub fn with_operator(mut self, operator: Operator) -> Self {
359        self.operator = operator;
360        self
361    }
362
363    pub fn with_prefix_length(mut self, prefix_length: u32) -> Self {
364        self.prefix_length = prefix_length;
365        self
366    }
367
368    pub fn auto_fuzziness(token: &str) -> u32 {
369        match token.len() {
370            0..=2 => 0,
371            3..=5 => 1,
372            _ => 2,
373        }
374    }
375}
376
377impl FtsQueryNode for MatchQuery {
378    fn columns(&self) -> HashSet<String> {
379        let mut columns = HashSet::new();
380        if let Some(column) = &self.column {
381            columns.insert(column.clone());
382        }
383        columns
384    }
385}
386
387#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
388pub struct PhraseQuery {
389    // The column to search in.
390    // If None, it will be determined at query time.
391    pub column: Option<String>,
392    pub terms: String,
393    #[serde(default = "u32::default")]
394    pub slop: u32,
395}
396
397impl PhraseQuery {
398    pub fn new(terms: String) -> Self {
399        Self {
400            column: None,
401            terms,
402            slop: 0,
403        }
404    }
405
406    pub fn with_column(mut self, column: Option<String>) -> Self {
407        self.column = column;
408        self
409    }
410
411    pub fn with_slop(mut self, slop: u32) -> Self {
412        self.slop = slop;
413        self
414    }
415}
416
417impl FtsQueryNode for PhraseQuery {
418    fn columns(&self) -> HashSet<String> {
419        let mut columns = HashSet::new();
420        if let Some(column) = &self.column {
421            columns.insert(column.clone());
422        }
423        columns
424    }
425}
426
427#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
428pub struct BoostQuery {
429    pub positive: Box<FtsQuery>,
430    pub negative: Box<FtsQuery>,
431    #[serde(default = "BoostQuery::default_negative_boost")]
432    pub negative_boost: f32,
433}
434
435impl BoostQuery {
436    pub fn new(positive: FtsQuery, negative: FtsQuery, negative_boost: Option<f32>) -> Self {
437        Self {
438            positive: Box::new(positive),
439            negative: Box::new(negative),
440            negative_boost: negative_boost.unwrap_or(0.5),
441        }
442    }
443
444    fn default_negative_boost() -> f32 {
445        0.5
446    }
447}
448
449impl FtsQueryNode for BoostQuery {
450    fn columns(&self) -> HashSet<String> {
451        let mut columns = self.positive.columns();
452        columns.extend(self.negative.columns());
453        columns
454    }
455}
456
457#[derive(Debug, Clone, PartialEq)]
458pub struct MultiMatchQuery {
459    // each query must be a match query with specified column
460    pub match_queries: Vec<MatchQuery>,
461}
462
463impl Serialize for MultiMatchQuery {
464    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
465    where
466        S: serde::Serializer,
467    {
468        let mut map = serializer.serialize_map(Some(3))?;
469
470        let query = self.match_queries.first().ok_or(serde::ser::Error::custom(
471            "MultiMatchQuery must have at least one MatchQuery".to_string(),
472        ))?;
473        map.serialize_entry("query", &query.terms)?;
474        let columns = self
475            .match_queries
476            .iter()
477            .map(|q| q.column.as_ref().unwrap().clone())
478            .collect::<Vec<String>>();
479        map.serialize_entry("columns", &columns)?;
480        let boosts = self
481            .match_queries
482            .iter()
483            .map(|q| q.boost)
484            .collect::<Vec<f32>>();
485        map.serialize_entry("boost", &boosts)?;
486        map.end()
487    }
488}
489
490impl<'de> Deserialize<'de> for MultiMatchQuery {
491    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
492    where
493        D: serde::Deserializer<'de>,
494    {
495        #[derive(Deserialize)]
496        struct MultiMatchQueryData {
497            query: String,
498            columns: Vec<String>,
499            boost: Option<Vec<f32>>,
500        }
501
502        let data = MultiMatchQueryData::deserialize(deserializer)?;
503        let boosts = data.boost.unwrap_or(vec![1.0; data.columns.len()]);
504
505        Self::try_new(data.query, data.columns)
506            .map_err(serde::de::Error::custom)?
507            .try_with_boosts(boosts)
508            .map_err(serde::de::Error::custom)
509    }
510}
511
512impl MultiMatchQuery {
513    pub fn try_new(query: String, columns: Vec<String>) -> Result<Self> {
514        if columns.is_empty() {
515            return Err(Error::invalid_input(
516                "Cannot create MultiMatchQuery with no columns".to_string(),
517                location!(),
518            ));
519        }
520
521        let match_queries = columns
522            .into_iter()
523            .map(|column| MatchQuery::new(query.clone()).with_column(Some(column)))
524            .collect();
525        Ok(Self { match_queries })
526    }
527
528    pub fn try_with_boosts(mut self, boosts: Vec<f32>) -> Result<Self> {
529        if boosts.len() != self.match_queries.len() {
530            return Err(Error::invalid_input(
531                "The number of boosts must match the number of queries".to_string(),
532                location!(),
533            ));
534        }
535
536        for (query, boost) in self.match_queries.iter_mut().zip(boosts) {
537            query.boost = boost;
538        }
539        Ok(self)
540    }
541
542    pub fn with_operator(mut self, operator: Operator) -> Self {
543        for query in &mut self.match_queries {
544            query.operator = operator;
545        }
546        self
547    }
548}
549
550impl FtsQueryNode for MultiMatchQuery {
551    fn columns(&self) -> HashSet<String> {
552        let mut columns = HashSet::with_capacity(self.match_queries.len());
553        for query in &self.match_queries {
554            columns.extend(query.columns());
555        }
556        columns
557    }
558}
559
560pub enum Occur {
561    Should,
562    Must,
563    MustNot,
564}
565
566impl TryFrom<&str> for Occur {
567    type Error = Error;
568    fn try_from(value: &str) -> Result<Self> {
569        match value.to_ascii_uppercase().as_str() {
570            "SHOULD" => Ok(Self::Should),
571            "MUST" => Ok(Self::Must),
572            "MUST_NOT" => Ok(Self::MustNot),
573            _ => Err(Error::invalid_input(
574                format!("Invalid occur value: {}", value),
575                location!(),
576            )),
577        }
578    }
579}
580
581impl From<Occur> for &'static str {
582    fn from(occur: Occur) -> Self {
583        match occur {
584            Occur::Should => "SHOULD",
585            Occur::Must => "MUST",
586            Occur::MustNot => "MUST_NOT",
587        }
588    }
589}
590
591#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
592pub struct BooleanQuery {
593    pub should: Vec<FtsQuery>,
594    pub must: Vec<FtsQuery>,
595    pub must_not: Vec<FtsQuery>,
596}
597
598impl BooleanQuery {
599    pub fn new(iter: impl IntoIterator<Item = (Occur, FtsQuery)>) -> Self {
600        let mut should = Vec::new();
601        let mut must = Vec::new();
602        let mut must_not = Vec::new();
603        for (occur, query) in iter {
604            match occur {
605                Occur::Should => should.push(query),
606                Occur::Must => must.push(query),
607                Occur::MustNot => must_not.push(query),
608            }
609        }
610        Self {
611            should,
612            must,
613            must_not,
614        }
615    }
616
617    pub fn with_should(mut self, query: FtsQuery) -> Self {
618        self.should.push(query);
619        self
620    }
621
622    pub fn with_must(mut self, query: FtsQuery) -> Self {
623        self.must.push(query);
624        self
625    }
626
627    pub fn with_must_not(mut self, query: FtsQuery) -> Self {
628        self.must_not.push(query);
629        self
630    }
631}
632
633#[derive(Debug, Clone, PartialEq)]
634#[allow(dead_code)]
635pub(crate) struct BooleanMatchPlan {
636    pub column: String,
637    pub should: Vec<MatchQuery>,
638    pub must: Vec<MatchQuery>,
639    pub must_not: Vec<MatchQuery>,
640}
641
642#[allow(dead_code)]
643impl BooleanMatchPlan {
644    pub(crate) fn try_build(query: &FtsQuery) -> Option<Self> {
645        match query {
646            FtsQuery::Match(match_query) => {
647                let mut column = None;
648                let mut should = Vec::new();
649                Self::push_match(&mut should, &mut column, match_query)?;
650                Some(Self {
651                    column: column?,
652                    should,
653                    must: Vec::new(),
654                    must_not: Vec::new(),
655                })
656            }
657            FtsQuery::Boolean(bool_query) => {
658                let mut column = None;
659                let should = Self::collect_matches(&bool_query.should, &mut column)?;
660                let must = Self::collect_matches(&bool_query.must, &mut column)?;
661                let must_not = Self::collect_matches(&bool_query.must_not, &mut column)?;
662
663                if should.is_empty() && must.is_empty() {
664                    return None;
665                }
666                Some(Self {
667                    column: column?,
668                    should,
669                    must,
670                    must_not,
671                })
672            }
673            _ => None,
674        }
675    }
676
677    fn push_match(
678        dest: &mut Vec<MatchQuery>,
679        column: &mut Option<String>,
680        query: &MatchQuery,
681    ) -> Option<()> {
682        let query_column = query.column.as_ref()?;
683        if let Some(existing) = column.as_ref() {
684            if existing != query_column {
685                return None;
686            }
687        } else {
688            *column = Some(query_column.clone());
689        }
690        dest.push(query.clone());
691        Some(())
692    }
693
694    fn collect_matches(
695        queries: &[FtsQuery],
696        column: &mut Option<String>,
697    ) -> Option<Vec<MatchQuery>> {
698        let mut matches = Vec::with_capacity(queries.len());
699        for query in queries {
700            let FtsQuery::Match(match_query) = query else {
701                return None;
702            };
703            Self::push_match(&mut matches, column, match_query)?;
704        }
705        Some(matches)
706    }
707}
708
709impl FtsQueryNode for BooleanQuery {
710    fn columns(&self) -> HashSet<String> {
711        let mut columns = HashSet::new();
712        for query in &self.should {
713            columns.extend(query.columns());
714        }
715        for query in &self.must {
716            columns.extend(query.columns());
717        }
718        for query in &self.must_not {
719            columns.extend(query.columns());
720        }
721        columns
722    }
723}
724
725#[derive(Clone)]
726pub struct Tokens {
727    tokens: Vec<String>,
728    tokens_set: HashSet<String>,
729    token_type: DocType,
730}
731
732impl Tokens {
733    pub fn new(tokens: Vec<String>, token_type: DocType) -> Self {
734        let mut tokens_vec = vec![];
735        let mut tokens_set = HashSet::new();
736        for token in tokens.into_iter() {
737            tokens_vec.push(token.clone());
738            tokens_set.insert(token);
739        }
740
741        Self {
742            tokens: tokens_vec,
743            tokens_set,
744            token_type,
745        }
746    }
747
748    pub fn len(&self) -> usize {
749        self.tokens.len()
750    }
751
752    pub fn is_empty(&self) -> bool {
753        self.tokens.is_empty()
754    }
755
756    pub fn token_type(&self) -> &DocType {
757        &self.token_type
758    }
759
760    pub fn contains(&self, token: &str) -> bool {
761        self.tokens_set.contains(token)
762    }
763}
764
765impl IntoIterator for Tokens {
766    type Item = String;
767    type IntoIter = std::vec::IntoIter<String>;
768
769    fn into_iter(self) -> Self::IntoIter {
770        self.tokens.into_iter()
771    }
772}
773
774impl<'a> IntoIterator for &'a Tokens {
775    type Item = &'a String;
776    type IntoIter = std::slice::Iter<'a, String>;
777
778    fn into_iter(self) -> Self::IntoIter {
779        self.tokens.iter()
780    }
781}
782
783pub fn collect_query_tokens(
784    text: &str,
785    tokenizer: &mut Box<dyn LanceTokenizer>,
786    inclusive: Option<&HashSet<String>>,
787) -> Tokens {
788    let token_type = tokenizer.doc_type();
789    let mut stream = tokenizer.token_stream_for_search(text);
790    let mut tokens = Vec::new();
791    while let Some(token) = stream.next() {
792        if let Some(inclusive) = inclusive {
793            if !inclusive.contains(&token.text) {
794                continue;
795            }
796        }
797        tokens.push(token.text.clone());
798    }
799    Tokens::new(tokens, token_type)
800}
801
802pub fn collect_doc_tokens(
803    text: &str,
804    tokenizer: &mut Box<dyn LanceTokenizer>,
805    inclusive: Option<&Tokens>,
806) -> Tokens {
807    let token_type = tokenizer.doc_type();
808    let mut stream = tokenizer.token_stream_for_doc(text);
809    let mut tokens = Vec::new();
810    while let Some(token) = stream.next() {
811        if let Some(inclusive) = inclusive {
812            if !inclusive.contains(&token.text) {
813                continue;
814            }
815        }
816        tokens.push(token.text.clone());
817    }
818    Tokens::new(tokens, token_type)
819}
820
821pub fn fill_fts_query_column(
822    query: &FtsQuery,
823    columns: &[String],
824    replace: bool,
825) -> Result<FtsQuery> {
826    if !query.is_missing_column() && !replace {
827        return Ok(query.clone());
828    }
829    match query {
830        FtsQuery::Match(match_query) => {
831            match columns.len() {
832                0 => {
833                    Err(Error::invalid_input(
834                        "Cannot perform full text search unless an INVERTED index has been created on at least one column".to_string(),
835                        location!(),
836                    ))
837                }
838                1 => {
839                    let column = columns[0].clone();
840                    let query = match_query.clone().with_column(Some(column));
841                    Ok(FtsQuery::Match(query))
842                }
843                _ => {
844                    // if there are multiple columns, we need to create a MultiMatch query
845                    let multi_match_query =
846                        MultiMatchQuery::try_new(match_query.terms.clone(), columns.to_vec())?;
847                    Ok(FtsQuery::MultiMatch(multi_match_query))
848                }
849            }
850        }
851        FtsQuery::Phrase(phrase_query) => {
852            match columns.len() {
853                0 => {
854                    Err(Error::invalid_input(
855                        "Cannot perform full text search unless an INVERTED index has been created on at least one column".to_string(),
856                        location!(),
857                    ))
858                }
859                1 => {
860                    let column = columns[0].clone();
861                    let query = phrase_query.clone().with_column(Some(column));
862                    Ok(FtsQuery::Phrase(query))
863                }
864                _ => {
865                    Err(Error::invalid_input(
866                        "the column must be specified in the query".to_string(),
867                        location!(),
868                    ))
869                }
870            }
871        }
872       FtsQuery::Boost(boost_query) => {
873            let positive = fill_fts_query_column(&boost_query.positive, columns, replace)?;
874            let negative = fill_fts_query_column(&boost_query.negative, columns, replace)?;
875            Ok(FtsQuery::Boost(BoostQuery {
876                positive: Box::new(positive),
877                negative: Box::new(negative),
878                negative_boost: boost_query.negative_boost,
879            }))
880        }
881        FtsQuery::MultiMatch(multi_match_query) => {
882            let match_queries = multi_match_query
883                .match_queries
884                .iter()
885                .map(|query| fill_fts_query_column(&FtsQuery::Match(query.clone()), columns, replace))
886                .map(|result| {
887                    result.map(|query| {
888                        if let FtsQuery::Match(match_query) = query {
889                            match_query
890                        } else {
891                            unreachable!("Expected MatchQuery")
892                        }
893                    })
894                })
895                .collect::<Result<Vec<_>>>()?;
896            Ok(FtsQuery::MultiMatch(MultiMatchQuery { match_queries }))
897       }
898        FtsQuery::Boolean(bool_query) => {
899            let must = bool_query
900                .must
901                .iter()
902                .map(|query| fill_fts_query_column(query, columns, replace))
903                .collect::<Result<Vec<_>>>()?;
904            let should = bool_query
905                .should
906                .iter()
907                .map(|query| fill_fts_query_column(query, columns, replace))
908                .collect::<Result<Vec<_>>>()?;
909            let must_not = bool_query
910                .must_not
911                .iter()
912                .map(|query| fill_fts_query_column(query, columns, replace))
913                .collect::<Result<Vec<_>>>()?;
914            Ok(FtsQuery::Boolean(BooleanQuery { must, should, must_not }))
915        }
916    }
917}
918
919#[cfg(test)]
920mod tests {
921    #[test]
922    fn test_match_query_serde() {
923        use super::*;
924        use serde_json::json;
925
926        let query = MatchQuery::new("hello world".to_string())
927            .with_column(Some("text".to_string()))
928            .with_boost(2.0)
929            .with_fuzziness(Some(1))
930            .with_max_expansions(10)
931            .with_operator(Operator::And);
932
933        let serialized = serde_json::to_value(&query).unwrap();
934        let expected = json!({
935            "column": "text",
936            "terms": "hello world",
937            "boost": 2.0,
938            "fuzziness": 1,
939            "max_expansions": 10,
940            "operator": "And",
941            "prefix_length": 0,
942        });
943        assert_eq!(serialized, expected);
944
945        let expected = json!({
946            "column": "text",
947            "terms": "hello world",
948            "fuzziness": 0,
949        });
950        let query = serde_json::from_str::<MatchQuery>(&expected.to_string()).unwrap();
951        assert_eq!(query.column, Some("text".to_owned()));
952        assert_eq!(query.terms, "hello world");
953        assert_eq!(query.boost, 1.0);
954        assert_eq!(query.fuzziness, Some(0));
955        assert_eq!(query.max_expansions, 50);
956        assert_eq!(query.operator, Operator::Or);
957        assert_eq!(query.prefix_length, 0);
958    }
959
960    #[test]
961    fn test_phrase_query_serde() {
962        use super::*;
963        use serde_json::json;
964
965        let query = json!({
966            "terms": "hello world",
967        });
968        let expected = PhraseQuery::new("hello world".to_string());
969        let query: PhraseQuery = serde_json::from_value(query).unwrap();
970        assert_eq!(query, expected);
971
972        let query = json!({
973            "terms": "hello world",
974            "column": "text",
975            "slop": 2,
976        });
977        let expected = PhraseQuery::new("hello world".to_string())
978            .with_column(Some("text".to_string()))
979            .with_slop(2);
980        let query: PhraseQuery = serde_json::from_value(query).unwrap();
981        assert_eq!(query, expected);
982    }
983
984    #[test]
985    fn test_boolean_match_plan_match_query() {
986        use super::*;
987
988        let query = MatchQuery::new("hello".to_string()).with_column(Some("text".to_string()));
989        let plan = BooleanMatchPlan::try_build(&FtsQuery::Match(query.clone())).unwrap();
990        assert_eq!(plan.column, "text");
991        assert_eq!(plan.should, vec![query]);
992        assert!(plan.must.is_empty());
993        assert!(plan.must_not.is_empty());
994    }
995
996    #[test]
997    fn test_boolean_match_plan_boolean_query() {
998        use super::*;
999
1000        let should = MatchQuery::new("a".to_string()).with_column(Some("text".to_string()));
1001        let must = MatchQuery::new("b".to_string()).with_column(Some("text".to_string()));
1002        let must_not = MatchQuery::new("c".to_string()).with_column(Some("text".to_string()));
1003        let query = BooleanQuery::new(vec![
1004            (Occur::Should, should.clone().into()),
1005            (Occur::Must, must.clone().into()),
1006            (Occur::MustNot, must_not.clone().into()),
1007        ]);
1008        let plan = BooleanMatchPlan::try_build(&FtsQuery::Boolean(query)).unwrap();
1009        assert_eq!(plan.column, "text");
1010        assert_eq!(plan.should, vec![should]);
1011        assert_eq!(plan.must, vec![must]);
1012        assert_eq!(plan.must_not, vec![must_not]);
1013    }
1014
1015    #[test]
1016    fn test_boolean_match_plan_rejects_mixed_columns() {
1017        use super::*;
1018
1019        let should = MatchQuery::new("a".to_string()).with_column(Some("text".to_string()));
1020        let must = MatchQuery::new("b".to_string()).with_column(Some("title".to_string()));
1021        let query = BooleanQuery::new(vec![
1022            (Occur::Should, should.into()),
1023            (Occur::Must, must.into()),
1024        ]);
1025        assert!(BooleanMatchPlan::try_build(&FtsQuery::Boolean(query)).is_none());
1026    }
1027
1028    #[test]
1029    fn test_boolean_match_plan_rejects_non_match_queries() {
1030        use super::*;
1031
1032        let phrase =
1033            PhraseQuery::new("hello world".to_string()).with_column(Some("text".to_string()));
1034        let query = BooleanQuery::new(vec![(Occur::Should, phrase.into())]);
1035        assert!(BooleanMatchPlan::try_build(&FtsQuery::Boolean(query)).is_none());
1036    }
1037
1038    #[test]
1039    fn test_boolean_match_plan_rejects_only_must_not() {
1040        use super::*;
1041
1042        let must_not = MatchQuery::new("c".to_string()).with_column(Some("text".to_string()));
1043        let query = BooleanQuery::new(vec![(Occur::MustNot, must_not.into())]);
1044        assert!(BooleanMatchPlan::try_build(&FtsQuery::Boolean(query)).is_none());
1045    }
1046
1047    #[test]
1048    fn test_boolean_match_plan_rejects_missing_column() {
1049        use super::*;
1050
1051        let query = MatchQuery::new("hello".to_string());
1052        assert!(BooleanMatchPlan::try_build(&FtsQuery::Match(query)).is_none());
1053    }
1054}