1use crate::scalar::inverted::document_tokenizer::DocType;
5use crate::scalar::inverted::tokenizer::document_tokenizer::LanceTokenizer;
6use lance_core::{Error, Result};
7use serde::ser::SerializeMap;
8use serde::{Deserialize, Serialize};
9use std::collections::{HashMap, HashSet};
10
11#[derive(Debug, Clone, Serialize, Deserialize)]
12pub struct FtsSearchParams {
13 pub limit: Option<usize>,
14 pub wand_factor: f32,
15 pub fuzziness: Option<u32>,
16 pub max_expansions: usize,
17 pub phrase_slop: Option<u32>,
20 pub prefix_length: u32,
22}
23
24impl FtsSearchParams {
25 pub fn new() -> Self {
26 Self {
27 limit: None,
28 wand_factor: 1.0,
29 fuzziness: Some(0),
30 max_expansions: 50,
31 phrase_slop: None,
32 prefix_length: 0,
33 }
34 }
35
36 pub fn with_limit(mut self, limit: Option<usize>) -> Self {
37 self.limit = limit;
38 self
39 }
40
41 pub fn with_wand_factor(mut self, factor: f32) -> Self {
42 self.wand_factor = factor;
43 self
44 }
45
46 pub fn with_fuzziness(mut self, fuzziness: Option<u32>) -> Self {
47 self.fuzziness = fuzziness;
48 self
49 }
50
51 pub fn with_max_expansions(mut self, max_expansions: usize) -> Self {
52 self.max_expansions = max_expansions;
53 self
54 }
55
56 pub fn with_phrase_slop(mut self, phrase_slop: Option<u32>) -> Self {
57 self.phrase_slop = phrase_slop;
58 self
59 }
60
61 pub fn with_prefix_length(mut self, prefix_length: u32) -> Self {
62 self.prefix_length = prefix_length;
63 self
64 }
65}
66
67impl Default for FtsSearchParams {
68 fn default() -> Self {
69 Self::new()
70 }
71}
72
73#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize, Default)]
74pub enum Operator {
75 And,
76 #[default]
77 Or,
78}
79
80impl TryFrom<&str> for Operator {
81 type Error = Error;
82 fn try_from(value: &str) -> Result<Self> {
83 match value.to_ascii_uppercase().as_str() {
84 "AND" => Ok(Self::And),
85 "OR" => Ok(Self::Or),
86 _ => Err(Error::invalid_input(format!("Invalid operator: {}", value))),
87 }
88 }
89}
90
91impl From<Operator> for &'static str {
92 fn from(operator: Operator) -> Self {
93 match operator {
94 Operator::And => "AND",
95 Operator::Or => "OR",
96 }
97 }
98}
99
100pub trait FtsQueryNode {
101 fn columns(&self) -> HashSet<String>;
102}
103
104#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
105#[serde(rename_all = "snake_case")]
106pub enum FtsQuery {
107 Match(MatchQuery),
109 Phrase(PhraseQuery),
110
111 Boost(BoostQuery),
113 MultiMatch(MultiMatchQuery),
114 Boolean(BooleanQuery),
115}
116
117impl std::fmt::Display for FtsQuery {
118 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
119 match self {
120 Self::Match(query) => write!(f, "Match({:?})", query),
121 Self::Phrase(query) => write!(f, "Phrase({:?})", query),
122 Self::Boost(query) => write!(
123 f,
124 "Boosting(positive={}, negative={}, negative_boost={})",
125 query.positive, query.negative, query.negative_boost
126 ),
127 Self::MultiMatch(query) => write!(f, "MultiMatch({:?})", query),
128 Self::Boolean(query) => {
129 write!(
130 f,
131 "Boolean(must={:?}, should={:?})",
132 query.must, query.should
133 )
134 }
135 }
136 }
137}
138
139impl FtsQueryNode for FtsQuery {
140 fn columns(&self) -> HashSet<String> {
141 match self {
142 Self::Match(query) => query.columns(),
143 Self::Phrase(query) => query.columns(),
144 Self::Boost(query) => {
145 let mut columns = query.positive.columns();
146 columns.extend(query.negative.columns());
147 columns
148 }
149 Self::MultiMatch(query) => {
150 let mut columns = HashSet::new();
151 for match_query in &query.match_queries {
152 columns.extend(match_query.columns());
153 }
154 columns
155 }
156 Self::Boolean(query) => {
157 let mut columns = HashSet::new();
158 for query in &query.must {
159 columns.extend(query.columns());
160 }
161 for query in &query.should {
162 columns.extend(query.columns());
163 }
164 columns
165 }
166 }
167 }
168}
169
170impl FtsQuery {
171 pub fn query(&self) -> String {
172 match self {
173 Self::Match(query) => query.terms.clone(),
174 Self::Phrase(query) => format!("\"{}\"", query.terms), Self::Boost(query) => query.positive.query(),
176 Self::MultiMatch(query) => query.match_queries[0].terms.clone(),
177 Self::Boolean(_) => {
178 String::new()
180 }
181 }
182 }
183
184 pub fn is_missing_column(&self) -> bool {
185 match self {
186 Self::Match(query) => query.column.is_none(),
187 Self::Phrase(query) => query.column.is_none(),
188 Self::Boost(query) => {
189 query.positive.is_missing_column() || query.negative.is_missing_column()
190 }
191 Self::MultiMatch(query) => query.match_queries.iter().any(|q| q.column.is_none()),
192 Self::Boolean(query) => {
193 query.must.iter().any(|q| q.is_missing_column())
194 || query.should.iter().any(|q| q.is_missing_column())
195 }
196 }
197 }
198
199 pub fn with_column(self, column: String) -> Self {
200 match self {
201 Self::Match(query) => Self::Match(query.with_column(Some(column))),
202 Self::Phrase(query) => Self::Phrase(query.with_column(Some(column))),
203 Self::Boost(query) => {
204 let positive = query.positive.with_column(column.clone());
205 let negative = query.negative.with_column(column);
206 Self::Boost(BoostQuery {
207 positive: Box::new(positive),
208 negative: Box::new(negative),
209 negative_boost: query.negative_boost,
210 })
211 }
212 Self::MultiMatch(query) => {
213 let match_queries = query
214 .match_queries
215 .into_iter()
216 .map(|q| q.with_column(Some(column.clone())))
217 .collect();
218 Self::MultiMatch(MultiMatchQuery { match_queries })
219 }
220 Self::Boolean(query) => {
221 let must = query
222 .must
223 .into_iter()
224 .map(|q| q.with_column(column.clone()))
225 .collect();
226 let should = query
227 .should
228 .into_iter()
229 .map(|q| q.with_column(column.clone()))
230 .collect();
231 let must_not = query
232 .must_not
233 .into_iter()
234 .map(|q| q.with_column(column.clone()))
235 .collect();
236 Self::Boolean(BooleanQuery {
237 must,
238 should,
239 must_not,
240 })
241 }
242 }
243 }
244}
245
246impl From<MatchQuery> for FtsQuery {
247 fn from(query: MatchQuery) -> Self {
248 Self::Match(query)
249 }
250}
251
252impl From<PhraseQuery> for FtsQuery {
253 fn from(query: PhraseQuery) -> Self {
254 Self::Phrase(query)
255 }
256}
257
258impl From<BoostQuery> for FtsQuery {
259 fn from(query: BoostQuery) -> Self {
260 Self::Boost(query)
261 }
262}
263
264impl From<MultiMatchQuery> for FtsQuery {
265 fn from(query: MultiMatchQuery) -> Self {
266 Self::MultiMatch(query)
267 }
268}
269
270impl From<BooleanQuery> for FtsQuery {
271 fn from(query: BooleanQuery) -> Self {
272 Self::Boolean(query)
273 }
274}
275
276#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
277pub struct MatchQuery {
278 pub column: Option<String>,
281 pub terms: String,
282
283 #[serde(default = "MatchQuery::default_boost")]
285 pub boost: f32,
286
287 pub fuzziness: Option<u32>,
294
295 #[serde(default = "MatchQuery::default_max_expansions")]
298 pub max_expansions: usize,
299
300 #[serde(default)]
305 pub operator: Operator,
306
307 #[serde(default)]
310 pub prefix_length: u32,
311}
312
313impl MatchQuery {
314 pub fn new(terms: String) -> Self {
315 Self {
316 column: None,
317 terms,
318 boost: 1.0,
319 fuzziness: Some(0),
320 max_expansions: 50,
321 operator: Operator::Or,
322 prefix_length: 0,
323 }
324 }
325
326 pub(crate) fn default_boost() -> f32 {
327 1.0
328 }
329
330 pub(crate) fn default_max_expansions() -> usize {
331 50
332 }
333
334 pub fn with_column(mut self, column: Option<String>) -> Self {
335 self.column = column;
336 self
337 }
338
339 pub fn with_boost(mut self, boost: f32) -> Self {
340 self.boost = boost;
341 self
342 }
343
344 pub fn with_fuzziness(mut self, fuzziness: Option<u32>) -> Self {
345 self.fuzziness = fuzziness;
346 self
347 }
348
349 pub fn with_max_expansions(mut self, max_expansions: usize) -> Self {
350 self.max_expansions = max_expansions;
351 self
352 }
353
354 pub fn with_operator(mut self, operator: Operator) -> Self {
355 self.operator = operator;
356 self
357 }
358
359 pub fn with_prefix_length(mut self, prefix_length: u32) -> Self {
360 self.prefix_length = prefix_length;
361 self
362 }
363
364 pub fn auto_fuzziness(token: &str) -> u32 {
365 match token.len() {
366 0..=2 => 0,
367 3..=5 => 1,
368 _ => 2,
369 }
370 }
371}
372
373impl FtsQueryNode for MatchQuery {
374 fn columns(&self) -> HashSet<String> {
375 let mut columns = HashSet::new();
376 if let Some(column) = &self.column {
377 columns.insert(column.clone());
378 }
379 columns
380 }
381}
382
383#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
384pub struct PhraseQuery {
385 pub column: Option<String>,
388 pub terms: String,
389 #[serde(default = "u32::default")]
390 pub slop: u32,
391}
392
393impl PhraseQuery {
394 pub fn new(terms: String) -> Self {
395 Self {
396 column: None,
397 terms,
398 slop: 0,
399 }
400 }
401
402 pub fn with_column(mut self, column: Option<String>) -> Self {
403 self.column = column;
404 self
405 }
406
407 pub fn with_slop(mut self, slop: u32) -> Self {
408 self.slop = slop;
409 self
410 }
411}
412
413impl FtsQueryNode for PhraseQuery {
414 fn columns(&self) -> HashSet<String> {
415 let mut columns = HashSet::new();
416 if let Some(column) = &self.column {
417 columns.insert(column.clone());
418 }
419 columns
420 }
421}
422
423#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
424pub struct BoostQuery {
425 pub positive: Box<FtsQuery>,
426 pub negative: Box<FtsQuery>,
427 #[serde(default = "BoostQuery::default_negative_boost")]
428 pub negative_boost: f32,
429}
430
431impl BoostQuery {
432 pub fn new(positive: FtsQuery, negative: FtsQuery, negative_boost: Option<f32>) -> Self {
433 Self {
434 positive: Box::new(positive),
435 negative: Box::new(negative),
436 negative_boost: negative_boost.unwrap_or(0.5),
437 }
438 }
439
440 fn default_negative_boost() -> f32 {
441 0.5
442 }
443}
444
445impl FtsQueryNode for BoostQuery {
446 fn columns(&self) -> HashSet<String> {
447 let mut columns = self.positive.columns();
448 columns.extend(self.negative.columns());
449 columns
450 }
451}
452
453#[derive(Debug, Clone, PartialEq)]
454pub struct MultiMatchQuery {
455 pub match_queries: Vec<MatchQuery>,
457}
458
459impl Serialize for MultiMatchQuery {
460 fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
461 where
462 S: serde::Serializer,
463 {
464 let mut map = serializer.serialize_map(Some(3))?;
465
466 let query = self.match_queries.first().ok_or(serde::ser::Error::custom(
467 "MultiMatchQuery must have at least one MatchQuery".to_string(),
468 ))?;
469 map.serialize_entry("query", &query.terms)?;
470 let columns = self
471 .match_queries
472 .iter()
473 .map(|q| q.column.as_ref().unwrap().clone())
474 .collect::<Vec<String>>();
475 map.serialize_entry("columns", &columns)?;
476 let boosts = self
477 .match_queries
478 .iter()
479 .map(|q| q.boost)
480 .collect::<Vec<f32>>();
481 map.serialize_entry("boost", &boosts)?;
482 map.end()
483 }
484}
485
486impl<'de> Deserialize<'de> for MultiMatchQuery {
487 fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
488 where
489 D: serde::Deserializer<'de>,
490 {
491 #[derive(Deserialize)]
492 struct MultiMatchQueryData {
493 query: String,
494 columns: Vec<String>,
495 boost: Option<Vec<f32>>,
496 }
497
498 let data = MultiMatchQueryData::deserialize(deserializer)?;
499 let boosts = data.boost.unwrap_or(vec![1.0; data.columns.len()]);
500
501 Self::try_new(data.query, data.columns)
502 .map_err(serde::de::Error::custom)?
503 .try_with_boosts(boosts)
504 .map_err(serde::de::Error::custom)
505 }
506}
507
508impl MultiMatchQuery {
509 pub fn try_new(query: String, columns: Vec<String>) -> Result<Self> {
510 if columns.is_empty() {
511 return Err(Error::invalid_input(
512 "Cannot create MultiMatchQuery with no columns".to_string(),
513 ));
514 }
515
516 let match_queries = columns
517 .into_iter()
518 .map(|column| MatchQuery::new(query.clone()).with_column(Some(column)))
519 .collect();
520 Ok(Self { match_queries })
521 }
522
523 pub fn try_with_boosts(mut self, boosts: Vec<f32>) -> Result<Self> {
524 if boosts.len() != self.match_queries.len() {
525 return Err(Error::invalid_input(
526 "The number of boosts must match the number of queries".to_string(),
527 ));
528 }
529
530 for (query, boost) in self.match_queries.iter_mut().zip(boosts) {
531 query.boost = boost;
532 }
533 Ok(self)
534 }
535
536 pub fn with_operator(mut self, operator: Operator) -> Self {
537 for query in &mut self.match_queries {
538 query.operator = operator;
539 }
540 self
541 }
542}
543
544impl FtsQueryNode for MultiMatchQuery {
545 fn columns(&self) -> HashSet<String> {
546 let mut columns = HashSet::with_capacity(self.match_queries.len());
547 for query in &self.match_queries {
548 columns.extend(query.columns());
549 }
550 columns
551 }
552}
553
554pub enum Occur {
555 Should,
556 Must,
557 MustNot,
558}
559
560impl TryFrom<&str> for Occur {
561 type Error = Error;
562 fn try_from(value: &str) -> Result<Self> {
563 match value.to_ascii_uppercase().as_str() {
564 "SHOULD" => Ok(Self::Should),
565 "MUST" => Ok(Self::Must),
566 "MUST_NOT" => Ok(Self::MustNot),
567 _ => Err(Error::invalid_input(format!(
568 "Invalid occur value: {}",
569 value
570 ))),
571 }
572 }
573}
574
575impl From<Occur> for &'static str {
576 fn from(occur: Occur) -> Self {
577 match occur {
578 Occur::Should => "SHOULD",
579 Occur::Must => "MUST",
580 Occur::MustNot => "MUST_NOT",
581 }
582 }
583}
584
585#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
586pub struct BooleanQuery {
587 pub should: Vec<FtsQuery>,
588 pub must: Vec<FtsQuery>,
589 pub must_not: Vec<FtsQuery>,
590}
591
592impl BooleanQuery {
593 pub fn new(iter: impl IntoIterator<Item = (Occur, FtsQuery)>) -> Self {
594 let mut should = Vec::new();
595 let mut must = Vec::new();
596 let mut must_not = Vec::new();
597 for (occur, query) in iter {
598 match occur {
599 Occur::Should => should.push(query),
600 Occur::Must => must.push(query),
601 Occur::MustNot => must_not.push(query),
602 }
603 }
604 Self {
605 should,
606 must,
607 must_not,
608 }
609 }
610
611 pub fn with_should(mut self, query: FtsQuery) -> Self {
612 self.should.push(query);
613 self
614 }
615
616 pub fn with_must(mut self, query: FtsQuery) -> Self {
617 self.must.push(query);
618 self
619 }
620
621 pub fn with_must_not(mut self, query: FtsQuery) -> Self {
622 self.must_not.push(query);
623 self
624 }
625}
626
627#[derive(Debug, Clone, PartialEq)]
628#[cfg(test)]
629pub(crate) struct BooleanMatchPlan {
630 pub column: String,
631 pub should: Vec<MatchQuery>,
632 pub must: Vec<MatchQuery>,
633 pub must_not: Vec<MatchQuery>,
634}
635
636#[cfg(test)]
637impl BooleanMatchPlan {
638 pub(crate) fn try_build(query: &FtsQuery) -> Option<Self> {
639 match query {
640 FtsQuery::Match(match_query) => {
641 let mut column = None;
642 let mut should = Vec::new();
643 Self::push_match(&mut should, &mut column, match_query)?;
644 Some(Self {
645 column: column?,
646 should,
647 must: Vec::new(),
648 must_not: Vec::new(),
649 })
650 }
651 FtsQuery::Boolean(bool_query) => {
652 let mut column = None;
653 let should = Self::collect_matches(&bool_query.should, &mut column)?;
654 let must = Self::collect_matches(&bool_query.must, &mut column)?;
655 let must_not = Self::collect_matches(&bool_query.must_not, &mut column)?;
656
657 if should.is_empty() && must.is_empty() {
658 return None;
659 }
660 Some(Self {
661 column: column?,
662 should,
663 must,
664 must_not,
665 })
666 }
667 _ => None,
668 }
669 }
670
671 fn push_match(
672 dest: &mut Vec<MatchQuery>,
673 column: &mut Option<String>,
674 query: &MatchQuery,
675 ) -> Option<()> {
676 let query_column = query.column.as_ref()?;
677 if let Some(existing) = column.as_ref() {
678 if existing != query_column {
679 return None;
680 }
681 } else {
682 *column = Some(query_column.clone());
683 }
684 dest.push(query.clone());
685 Some(())
686 }
687
688 fn collect_matches(
689 queries: &[FtsQuery],
690 column: &mut Option<String>,
691 ) -> Option<Vec<MatchQuery>> {
692 let mut matches = Vec::with_capacity(queries.len());
693 for query in queries {
694 let FtsQuery::Match(match_query) = query else {
695 return None;
696 };
697 Self::push_match(&mut matches, column, match_query)?;
698 }
699 Some(matches)
700 }
701}
702
703impl FtsQueryNode for BooleanQuery {
704 fn columns(&self) -> HashSet<String> {
705 let mut columns = HashSet::new();
706 for query in &self.should {
707 columns.extend(query.columns());
708 }
709 for query in &self.must {
710 columns.extend(query.columns());
711 }
712 for query in &self.must_not {
713 columns.extend(query.columns());
714 }
715 columns
716 }
717}
718
719#[derive(Clone)]
720pub struct Tokens {
721 tokens: Vec<String>,
722 positions: Vec<u32>,
723 tokens_map: HashMap<String, usize>,
724 token_type: DocType,
725}
726
727impl Tokens {
728 pub fn new(tokens: Vec<String>, token_type: DocType) -> Self {
729 let positions = (0..tokens.len() as u32).collect();
730 Self::with_positions(tokens, positions, token_type)
731 }
732
733 pub fn with_positions(tokens: Vec<String>, positions: Vec<u32>, token_type: DocType) -> Self {
734 debug_assert_eq!(tokens.len(), positions.len());
735 let mut tokens_vec = vec![];
736 let mut tokens_map = HashMap::new();
737 for (idx, token) in tokens.into_iter().enumerate() {
738 tokens_vec.push(token.clone());
739 tokens_map.insert(token, idx);
740 }
741
742 Self {
743 tokens: tokens_vec,
744 positions,
745 tokens_map,
746 token_type,
747 }
748 }
749
750 pub fn len(&self) -> usize {
751 self.tokens.len()
752 }
753
754 pub fn is_empty(&self) -> bool {
755 self.tokens.is_empty()
756 }
757
758 pub fn token_type(&self) -> &DocType {
759 &self.token_type
760 }
761
762 pub fn contains(&self, token: &str) -> bool {
763 self.tokens_map.contains_key(token)
764 }
765
766 pub fn token_index(&self, token: &str) -> Option<usize> {
767 self.tokens_map.get(token).copied()
768 }
769
770 pub fn get_token(&self, index: usize) -> &str {
771 &self.tokens[index]
772 }
773
774 pub fn position(&self, index: usize) -> u32 {
775 self.positions[index]
776 }
777}
778
779impl IntoIterator for Tokens {
780 type Item = String;
781 type IntoIter = std::vec::IntoIter<String>;
782
783 fn into_iter(self) -> Self::IntoIter {
784 self.tokens.into_iter()
785 }
786}
787
788impl<'a> IntoIterator for &'a Tokens {
789 type Item = &'a String;
790 type IntoIter = std::slice::Iter<'a, String>;
791
792 fn into_iter(self) -> Self::IntoIter {
793 self.tokens.iter()
794 }
795}
796
797pub fn collect_query_tokens(text: &str, tokenizer: &mut Box<dyn LanceTokenizer>) -> Tokens {
798 let token_type = tokenizer.doc_type();
799 let mut stream = tokenizer.token_stream_for_search(text);
800 let mut tokens = Vec::new();
801 let mut positions = Vec::new();
802 while let Some(token) = stream.next() {
803 tokens.push(token.text.clone());
804 positions.push(token.position as u32);
805 }
806 Tokens::with_positions(tokens, positions, token_type)
807}
808
809pub fn has_query_token(
810 text: &str,
811 tokenizer: &mut Box<dyn LanceTokenizer>,
812 query_tokens: &Tokens,
813) -> bool {
814 let mut stream = tokenizer.token_stream_for_doc(text);
815 while let Some(token) = stream.next() {
816 if query_tokens.contains(&token.text) {
817 return true;
818 }
819 }
820 false
821}
822
823pub fn fill_fts_query_column(
824 query: &FtsQuery,
825 columns: &[String],
826 replace: bool,
827) -> Result<FtsQuery> {
828 if !query.is_missing_column() && !replace {
829 return Ok(query.clone());
830 }
831 match query {
832 FtsQuery::Match(match_query) => {
833 match columns.len() {
834 0 => {
835 Err(Error::invalid_input("Cannot perform full text search unless an INVERTED index has been created on at least one column".to_string()))
836 }
837 1 => {
838 let column = columns[0].clone();
839 let query = match_query.clone().with_column(Some(column));
840 Ok(FtsQuery::Match(query))
841 }
842 _ => {
843 let multi_match_query =
845 MultiMatchQuery::try_new(match_query.terms.clone(), columns.to_vec())?;
846 Ok(FtsQuery::MultiMatch(multi_match_query))
847 }
848 }
849 }
850 FtsQuery::Phrase(phrase_query) => {
851 match columns.len() {
852 0 => {
853 Err(Error::invalid_input("Cannot perform full text search unless an INVERTED index has been created on at least one column".to_string()))
854 }
855 1 => {
856 let column = columns[0].clone();
857 let query = phrase_query.clone().with_column(Some(column));
858 Ok(FtsQuery::Phrase(query))
859 }
860 _ => {
861 Err(Error::invalid_input("the column must be specified in the query".to_string()))
862 }
863 }
864 }
865 FtsQuery::Boost(boost_query) => {
866 let positive = fill_fts_query_column(&boost_query.positive, columns, replace)?;
867 let negative = fill_fts_query_column(&boost_query.negative, columns, replace)?;
868 Ok(FtsQuery::Boost(BoostQuery {
869 positive: Box::new(positive),
870 negative: Box::new(negative),
871 negative_boost: boost_query.negative_boost,
872 }))
873 }
874 FtsQuery::MultiMatch(multi_match_query) => {
875 let match_queries = multi_match_query
876 .match_queries
877 .iter()
878 .map(|query| fill_fts_query_column(&FtsQuery::Match(query.clone()), columns, replace))
879 .map(|result| {
880 result.map(|query| {
881 if let FtsQuery::Match(match_query) = query {
882 match_query
883 } else {
884 unreachable!("Expected MatchQuery")
885 }
886 })
887 })
888 .collect::<Result<Vec<_>>>()?;
889 Ok(FtsQuery::MultiMatch(MultiMatchQuery { match_queries }))
890 }
891 FtsQuery::Boolean(bool_query) => {
892 let must = bool_query
893 .must
894 .iter()
895 .map(|query| fill_fts_query_column(query, columns, replace))
896 .collect::<Result<Vec<_>>>()?;
897 let should = bool_query
898 .should
899 .iter()
900 .map(|query| fill_fts_query_column(query, columns, replace))
901 .collect::<Result<Vec<_>>>()?;
902 let must_not = bool_query
903 .must_not
904 .iter()
905 .map(|query| fill_fts_query_column(query, columns, replace))
906 .collect::<Result<Vec<_>>>()?;
907 Ok(FtsQuery::Boolean(BooleanQuery { must, should, must_not }))
908 }
909 }
910}
911
912#[cfg(test)]
913mod tests {
914 #[test]
915 fn test_match_query_serde() {
916 use super::*;
917 use serde_json::json;
918
919 let query = MatchQuery::new("hello world".to_string())
920 .with_column(Some("text".to_string()))
921 .with_boost(2.0)
922 .with_fuzziness(Some(1))
923 .with_max_expansions(10)
924 .with_operator(Operator::And);
925
926 let serialized = serde_json::to_value(&query).unwrap();
927 let expected = json!({
928 "column": "text",
929 "terms": "hello world",
930 "boost": 2.0,
931 "fuzziness": 1,
932 "max_expansions": 10,
933 "operator": "And",
934 "prefix_length": 0,
935 });
936 assert_eq!(serialized, expected);
937
938 let expected = json!({
939 "column": "text",
940 "terms": "hello world",
941 "fuzziness": 0,
942 });
943 let query = serde_json::from_str::<MatchQuery>(&expected.to_string()).unwrap();
944 assert_eq!(query.column, Some("text".to_owned()));
945 assert_eq!(query.terms, "hello world");
946 assert_eq!(query.boost, 1.0);
947 assert_eq!(query.fuzziness, Some(0));
948 assert_eq!(query.max_expansions, 50);
949 assert_eq!(query.operator, Operator::Or);
950 assert_eq!(query.prefix_length, 0);
951 }
952
953 #[test]
954 fn test_phrase_query_serde() {
955 use super::*;
956 use serde_json::json;
957
958 let query = json!({
959 "terms": "hello world",
960 });
961 let expected = PhraseQuery::new("hello world".to_string());
962 let query: PhraseQuery = serde_json::from_value(query).unwrap();
963 assert_eq!(query, expected);
964
965 let query = json!({
966 "terms": "hello world",
967 "column": "text",
968 "slop": 2,
969 });
970 let expected = PhraseQuery::new("hello world".to_string())
971 .with_column(Some("text".to_string()))
972 .with_slop(2);
973 let query: PhraseQuery = serde_json::from_value(query).unwrap();
974 assert_eq!(query, expected);
975 }
976
977 #[test]
978 fn test_boolean_match_plan_match_query() {
979 use super::*;
980
981 let query = MatchQuery::new("hello".to_string()).with_column(Some("text".to_string()));
982 let plan = BooleanMatchPlan::try_build(&FtsQuery::Match(query.clone())).unwrap();
983 assert_eq!(plan.column, "text");
984 assert_eq!(plan.should, vec![query]);
985 assert!(plan.must.is_empty());
986 assert!(plan.must_not.is_empty());
987 }
988
989 #[test]
990 fn test_boolean_match_plan_boolean_query() {
991 use super::*;
992
993 let should = MatchQuery::new("a".to_string()).with_column(Some("text".to_string()));
994 let must = MatchQuery::new("b".to_string()).with_column(Some("text".to_string()));
995 let must_not = MatchQuery::new("c".to_string()).with_column(Some("text".to_string()));
996 let query = BooleanQuery::new(vec![
997 (Occur::Should, should.clone().into()),
998 (Occur::Must, must.clone().into()),
999 (Occur::MustNot, must_not.clone().into()),
1000 ]);
1001 let plan = BooleanMatchPlan::try_build(&FtsQuery::Boolean(query)).unwrap();
1002 assert_eq!(plan.column, "text");
1003 assert_eq!(plan.should, vec![should]);
1004 assert_eq!(plan.must, vec![must]);
1005 assert_eq!(plan.must_not, vec![must_not]);
1006 }
1007
1008 #[test]
1009 fn test_boolean_match_plan_rejects_mixed_columns() {
1010 use super::*;
1011
1012 let should = MatchQuery::new("a".to_string()).with_column(Some("text".to_string()));
1013 let must = MatchQuery::new("b".to_string()).with_column(Some("title".to_string()));
1014 let query = BooleanQuery::new(vec![
1015 (Occur::Should, should.into()),
1016 (Occur::Must, must.into()),
1017 ]);
1018 assert!(BooleanMatchPlan::try_build(&FtsQuery::Boolean(query)).is_none());
1019 }
1020
1021 #[test]
1022 fn test_boolean_match_plan_rejects_non_match_queries() {
1023 use super::*;
1024
1025 let phrase =
1026 PhraseQuery::new("hello world".to_string()).with_column(Some("text".to_string()));
1027 let query = BooleanQuery::new(vec![(Occur::Should, phrase.into())]);
1028 assert!(BooleanMatchPlan::try_build(&FtsQuery::Boolean(query)).is_none());
1029 }
1030
1031 #[test]
1032 fn test_boolean_match_plan_rejects_only_must_not() {
1033 use super::*;
1034
1035 let must_not = MatchQuery::new("c".to_string()).with_column(Some("text".to_string()));
1036 let query = BooleanQuery::new(vec![(Occur::MustNot, must_not.into())]);
1037 assert!(BooleanMatchPlan::try_build(&FtsQuery::Boolean(query)).is_none());
1038 }
1039
1040 #[test]
1041 fn test_boolean_match_plan_rejects_missing_column() {
1042 use super::*;
1043
1044 let query = MatchQuery::new("hello".to_string());
1045 assert!(BooleanMatchPlan::try_build(&FtsQuery::Match(query)).is_none());
1046 }
1047}