Skip to main content

tantivy/query/
exclude.rs

1use crate::docset::{DocSet, SeekDangerResult, TERMINATED};
2use crate::query::Scorer;
3use crate::{DocId, Score};
4
5/// An exclusion set is a set of documents
6/// that should be excluded from a given DocSet.
7///
8/// It can be a single DocSet, or a Vec of DocSets.
9pub trait ExclusionSet: Send {
10    /// Returns `true` if the given `doc` is in the exclusion set.
11    fn contains(&mut self, doc: DocId) -> bool;
12}
13
14impl<TDocSet: DocSet> ExclusionSet for TDocSet {
15    #[inline]
16    fn contains(&mut self, doc: DocId) -> bool {
17        self.seek_danger(doc) == SeekDangerResult::Found
18    }
19}
20
21impl<TDocSet: DocSet> ExclusionSet for Vec<TDocSet> {
22    #[inline]
23    fn contains(&mut self, doc: DocId) -> bool {
24        for docset in self.iter_mut() {
25            if docset.seek_danger(doc) == SeekDangerResult::Found {
26                return true;
27            }
28        }
29        false
30    }
31}
32
33/// Filters a given `DocSet` by removing the docs from an exclusion set.
34///
35/// The excluding docsets have no impact on scoring.
36pub struct Exclude<TDocSet, TExclusionSet> {
37    underlying_docset: TDocSet,
38    exclusion_set: TExclusionSet,
39}
40
41impl<TDocSet, TExclusionSet> Exclude<TDocSet, TExclusionSet>
42where
43    TDocSet: DocSet,
44    TExclusionSet: ExclusionSet,
45{
46    /// Creates a new `ExcludeScorer`
47    pub fn new(
48        mut underlying_docset: TDocSet,
49        mut exclusion_set: TExclusionSet,
50    ) -> Exclude<TDocSet, TExclusionSet> {
51        while underlying_docset.doc() != TERMINATED {
52            let target = underlying_docset.doc();
53            if !exclusion_set.contains(target) {
54                break;
55            }
56            underlying_docset.advance();
57        }
58        Exclude {
59            underlying_docset,
60            exclusion_set,
61        }
62    }
63}
64
65impl<TDocSet, TExclusionSet> DocSet for Exclude<TDocSet, TExclusionSet>
66where
67    TDocSet: DocSet,
68    TExclusionSet: ExclusionSet,
69{
70    fn advance(&mut self) -> DocId {
71        loop {
72            let candidate = self.underlying_docset.advance();
73            if candidate == TERMINATED {
74                return TERMINATED;
75            }
76            if !self.exclusion_set.contains(candidate) {
77                return candidate;
78            }
79        }
80    }
81
82    fn seek(&mut self, target: DocId) -> DocId {
83        let candidate = self.underlying_docset.seek(target);
84        if candidate == TERMINATED {
85            return TERMINATED;
86        }
87        if !self.exclusion_set.contains(candidate) {
88            return candidate;
89        }
90        self.advance()
91    }
92
93    fn doc(&self) -> DocId {
94        self.underlying_docset.doc()
95    }
96
97    /// `.size_hint()` directly returns the size
98    /// of the underlying docset without taking in account
99    /// the fact that docs might be deleted.
100    fn size_hint(&self) -> u32 {
101        self.underlying_docset.size_hint()
102    }
103}
104
105impl<TScorer, TExclusionSet> Scorer for Exclude<TScorer, TExclusionSet>
106where
107    TScorer: Scorer,
108    TExclusionSet: ExclusionSet + 'static,
109{
110    #[inline]
111    fn score(&mut self) -> Score {
112        self.underlying_docset.score()
113    }
114}
115
116#[cfg(test)]
117mod tests {
118
119    use super::*;
120    use crate::postings::tests::test_skip_against_unoptimized;
121    use crate::query::VecDocSet;
122    use crate::tests::sample_with_seed;
123
124    #[test]
125    fn test_exclude() {
126        let mut exclude_scorer = Exclude::new(
127            VecDocSet::from(vec![1, 2, 5, 8, 10, 15, 24]),
128            VecDocSet::from(vec![1, 2, 3, 10, 16, 24]),
129        );
130        let mut els = vec![];
131        while exclude_scorer.doc() != TERMINATED {
132            els.push(exclude_scorer.doc());
133            exclude_scorer.advance();
134        }
135        assert_eq!(els, vec![5, 8, 15]);
136    }
137
138    #[test]
139    fn test_exclude_skip() {
140        test_skip_against_unoptimized(
141            || {
142                Box::new(Exclude::new(
143                    VecDocSet::from(vec![1, 2, 5, 8, 10, 15, 24]),
144                    VecDocSet::from(vec![1, 2, 3, 10, 16, 24]),
145                ))
146            },
147            vec![5, 8, 10, 15, 24],
148        );
149    }
150
151    #[test]
152    fn test_exclude_skip_random() {
153        let sample_include = sample_with_seed(10_000, 0.1, 1);
154        let sample_exclude = sample_with_seed(10_000, 0.05, 2);
155        let sample_skip = sample_with_seed(10_000, 0.005, 3);
156        test_skip_against_unoptimized(
157            || {
158                Box::new(Exclude::new(
159                    VecDocSet::from(sample_include.clone()),
160                    VecDocSet::from(sample_exclude.clone()),
161                ))
162            },
163            sample_skip,
164        );
165    }
166}