1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130
use crate::query::{BooleanWeight, DisjunctionMaxCombiner, EnableScoring, Occur, Query, Weight};
use crate::{Score, Term};
/// The disjunction max query returns documents matching one or more wrapped queries,
/// called query clauses or clauses.
///
/// If a returned document matches multiple query clauses,
/// the `DisjunctionMaxQuery` assigns the document the highest relevance score from any matching
/// clause, plus a tie breaking increment for any additional matching subqueries.
///
/// ```rust
/// use tantivy::collector::TopDocs;
/// use tantivy::doc;
/// use tantivy::query::{DisjunctionMaxQuery, Query, QueryClone, TermQuery};
/// use tantivy::schema::{IndexRecordOption, Schema, TEXT};
/// use tantivy::Term;
/// use tantivy::Index;
/// use tantivy::IndexWriter;
///
/// fn main() -> tantivy::Result<()> {
/// let mut schema_builder = Schema::builder();
/// let title = schema_builder.add_text_field("title", TEXT);
/// let body = schema_builder.add_text_field("body", TEXT);
/// let schema = schema_builder.build();
/// let index = Index::create_in_ram(schema);
/// {
/// let mut index_writer: IndexWriter = index.writer(15_000_000)?;
/// index_writer.add_document(doc!(
/// title => "The Name of Girl",
/// ))?;
/// index_writer.add_document(doc!(
/// title => "The Diary of Muadib",
/// ))?;
/// index_writer.add_document(doc!(
/// title => "The Diary of Girl",
/// ))?;
/// index_writer.commit()?;
/// }
///
/// let reader = index.reader()?;
/// let searcher = reader.searcher();
///
/// // Make TermQuery's for "girl" and "diary" in the title
/// let girl_term_query: Box<dyn Query> = Box::new(TermQuery::new(
/// Term::from_field_text(title, "girl"),
/// IndexRecordOption::Basic,
/// ));
/// let diary_term_query: Box<dyn Query> = Box::new(TermQuery::new(
/// Term::from_field_text(title, "diary"),
/// IndexRecordOption::Basic,
/// ));
///
/// // TermQuery "diary" and "girl" should be present and only one should be accounted in score
/// let queries1 = vec![diary_term_query.box_clone(), girl_term_query.box_clone()];
/// let diary_and_girl = DisjunctionMaxQuery::new(queries1);
/// let documents = searcher.search(&diary_and_girl, &TopDocs::with_limit(3))?;
/// assert_eq!(documents[0].0, documents[1].0);
/// assert_eq!(documents[1].0, documents[2].0);
///
/// // TermQuery "diary" and "girl" should be present
/// // and one should be accounted with multiplier 0.7
/// let queries2 = vec![diary_term_query.box_clone(), girl_term_query.box_clone()];
/// let tie_breaker = 0.7;
/// let diary_and_girl_with_tie_breaker = DisjunctionMaxQuery::with_tie_breaker(queries2, tie_breaker);
/// let documents = searcher.search(&diary_and_girl_with_tie_breaker, &TopDocs::with_limit(3))?;
/// assert_eq!(documents[1].0, documents[2].0);
/// // For this test all terms brings the same score. So we can do easy math and assume that
/// // `DisjunctionMaxQuery` with tie breakers score should be equal
/// // to term1 score + `tie_breaker` * term2 score or (1.0 + tie_breaker) * term score
/// assert!(f32::abs(documents[0].0 - documents[1].0 * (1.0 + tie_breaker)) < 0.001);
/// Ok(())
/// }
/// ```
#[derive(Debug)]
pub struct DisjunctionMaxQuery {
disjuncts: Vec<Box<dyn Query>>,
tie_breaker: Score,
}
impl Clone for DisjunctionMaxQuery {
fn clone(&self) -> Self {
DisjunctionMaxQuery::with_tie_breaker(
self.disjuncts
.iter()
.map(|disjunct| disjunct.box_clone())
.collect::<Vec<_>>(),
self.tie_breaker,
)
}
}
impl Query for DisjunctionMaxQuery {
fn weight(&self, enable_scoring: EnableScoring<'_>) -> crate::Result<Box<dyn Weight>> {
let disjuncts = self
.disjuncts
.iter()
.map(|disjunct| Ok((Occur::Should, disjunct.weight(enable_scoring)?)))
.collect::<crate::Result<_>>()?;
let tie_breaker = self.tie_breaker;
Ok(Box::new(BooleanWeight::new(
disjuncts,
enable_scoring.is_scoring_enabled(),
Box::new(move || DisjunctionMaxCombiner::with_tie_breaker(tie_breaker)),
)))
}
fn query_terms<'a>(&'a self, visitor: &mut dyn FnMut(&'a Term, bool)) {
for disjunct in &self.disjuncts {
disjunct.query_terms(visitor);
}
}
}
impl DisjunctionMaxQuery {
/// Creates a new `DisjunctionMaxQuery` with tie breaker.
pub fn with_tie_breaker(
disjuncts: Vec<Box<dyn Query>>,
tie_breaker: Score,
) -> DisjunctionMaxQuery {
DisjunctionMaxQuery {
disjuncts,
tie_breaker,
}
}
/// Creates a new `DisjunctionMaxQuery` with no tie breaker.
pub fn new(disjuncts: Vec<Box<dyn Query>>) -> DisjunctionMaxQuery {
DisjunctionMaxQuery::with_tie_breaker(disjuncts, 0.0)
}
}