summavy/query/
exclude.rs

1use crate::docset::{DocSet, TERMINATED};
2use crate::query::Scorer;
3use crate::{DocId, Score};
4
5#[inline]
6fn is_within<TDocSetExclude: DocSet>(docset: &mut TDocSetExclude, doc: DocId) -> bool {
7    docset.doc() <= doc && docset.seek(doc) == doc
8}
9
10/// Filters a given `DocSet` by removing the docs from a given `DocSet`.
11///
12/// The excluding docset has no impact on scoring.
13pub struct Exclude<TDocSet, TDocSetExclude> {
14    underlying_docset: TDocSet,
15    excluding_docset: TDocSetExclude,
16}
17
18impl<TDocSet, TDocSetExclude> Exclude<TDocSet, TDocSetExclude>
19where
20    TDocSet: DocSet,
21    TDocSetExclude: DocSet,
22{
23    /// Creates a new `ExcludeScorer`
24    pub fn new(
25        mut underlying_docset: TDocSet,
26        mut excluding_docset: TDocSetExclude,
27    ) -> Exclude<TDocSet, TDocSetExclude> {
28        while underlying_docset.doc() != TERMINATED {
29            let target = underlying_docset.doc();
30            if !is_within(&mut excluding_docset, target) {
31                break;
32            }
33            underlying_docset.advance();
34        }
35        Exclude {
36            underlying_docset,
37            excluding_docset,
38        }
39    }
40}
41
42impl<TDocSet, TDocSetExclude> DocSet for Exclude<TDocSet, TDocSetExclude>
43where
44    TDocSet: DocSet,
45    TDocSetExclude: DocSet,
46{
47    fn advance(&mut self) -> DocId {
48        loop {
49            let candidate = self.underlying_docset.advance();
50            if candidate == TERMINATED {
51                return TERMINATED;
52            }
53            if !is_within(&mut self.excluding_docset, candidate) {
54                return candidate;
55            }
56        }
57    }
58
59    fn seek(&mut self, target: DocId) -> DocId {
60        let candidate = self.underlying_docset.seek(target);
61        if candidate == TERMINATED {
62            return TERMINATED;
63        }
64        if !is_within(&mut self.excluding_docset, candidate) {
65            return candidate;
66        }
67        self.advance()
68    }
69
70    fn doc(&self) -> DocId {
71        self.underlying_docset.doc()
72    }
73
74    /// `.size_hint()` directly returns the size
75    /// of the underlying docset without taking in account
76    /// the fact that docs might be deleted.
77    fn size_hint(&self) -> u32 {
78        self.underlying_docset.size_hint()
79    }
80}
81
82impl<TScorer, TDocSetExclude> Scorer for Exclude<TScorer, TDocSetExclude>
83where
84    TScorer: Scorer,
85    TDocSetExclude: DocSet + 'static,
86{
87    fn score(&mut self) -> Score {
88        self.underlying_docset.score()
89    }
90}
91
92#[cfg(test)]
93mod tests {
94
95    use super::*;
96    use crate::postings::tests::test_skip_against_unoptimized;
97    use crate::query::VecDocSet;
98    use crate::tests::sample_with_seed;
99
100    #[test]
101    fn test_exclude() {
102        let mut exclude_scorer = Exclude::new(
103            VecDocSet::from(vec![1, 2, 5, 8, 10, 15, 24]),
104            VecDocSet::from(vec![1, 2, 3, 10, 16, 24]),
105        );
106        let mut els = vec![];
107        while exclude_scorer.doc() != TERMINATED {
108            els.push(exclude_scorer.doc());
109            exclude_scorer.advance();
110        }
111        assert_eq!(els, vec![5, 8, 15]);
112    }
113
114    #[test]
115    fn test_exclude_skip() {
116        test_skip_against_unoptimized(
117            || {
118                Box::new(Exclude::new(
119                    VecDocSet::from(vec![1, 2, 5, 8, 10, 15, 24]),
120                    VecDocSet::from(vec![1, 2, 3, 10, 16, 24]),
121                ))
122            },
123            vec![5, 8, 10, 15, 24],
124        );
125    }
126
127    #[test]
128    fn test_exclude_skip_random() {
129        let sample_include = sample_with_seed(10_000, 0.1, 1);
130        let sample_exclude = sample_with_seed(10_000, 0.05, 2);
131        let sample_skip = sample_with_seed(10_000, 0.005, 3);
132        test_skip_against_unoptimized(
133            || {
134                Box::new(Exclude::new(
135                    VecDocSet::from(sample_include.clone()),
136                    VecDocSet::from(sample_exclude.clone()),
137                ))
138            },
139            sample_skip,
140        );
141    }
142}