Skip to main content

iqdb_filter/
strategy.rs

1//! [`FilterStrategy`] — how an index applies a [`Filter`], and the selector
2//! that chooses it.
3//!
4//! [`FilterStrategy`] names the four shapes; [`StrategySelector`] and the
5//! Tier-1 [`choose_strategy`] resolve a concrete `PreFilter` / `PostFilter`
6//! from a selectivity estimate. `PreFilter` and `PostFilter` are realised by
7//! [`crate::FilterEvaluator::prefilter`] / [`crate::FilterEvaluator::postfilter`];
8//! `InFilter` (graph-traversal pushdown) remains reserved for a future
9//! approximate-index consumer (see `dev/ROADMAP.md`).
10//!
11//! [`Filter`]: iqdb_types::Filter
12
13/// How an index plans to apply a metadata [`Filter`](iqdb_types::Filter)
14/// relative to its distance scan.
15///
16/// `#[non_exhaustive]` because selection logic will add variants (and likely
17/// adapter parameters) as approximate indexes start honouring filters.
18/// Callers should not exhaustively match on this enum and should not rely on
19/// the variant set being closed.
20///
21/// # Variants in plain terms
22///
23/// - [`FilterStrategy::PreFilter`] — apply the predicate **before** the
24///   distance computation; only matching candidates enter the scan. Cheap
25///   when the predicate is selective, wasteful when it is broad.
26/// - [`FilterStrategy::PostFilter`] — run the distance scan over every
27///   candidate, then drop hits that fail the predicate. Cheap when the
28///   predicate is broad, defeats top-`k` truncation when the predicate is
29///   selective (you may have to scan far past `k` to refill the result set).
30/// - [`FilterStrategy::InFilter`] — interleave predicate evaluation with the
31///   distance walk so a graph index can prune branches it knows can't
32///   produce surviving candidates. Requires `MetadataIndex` co-design.
33/// - [`FilterStrategy::Auto`] — let the index pick from the above based on
34///   estimated selectivity. Requires the selectivity machinery that doesn't
35///   exist yet; documented here so future configs can name it.
36///
37/// # Examples
38///
39/// ```
40/// use iqdb_filter::FilterStrategy;
41///
42/// let chosen = FilterStrategy::PreFilter;
43/// // Callers do not branch on this yet; the enum is vocabulary for later.
44/// assert_ne!(chosen, FilterStrategy::PostFilter);
45/// ```
46#[non_exhaustive]
47#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
48pub enum FilterStrategy {
49    /// Apply the predicate before the distance scan.
50    PreFilter,
51    /// Apply the predicate after the distance scan.
52    PostFilter,
53    /// Interleave the predicate with a graph traversal.
54    InFilter,
55    /// Let the index pick the strategy based on estimated selectivity.
56    Auto,
57}
58
59/// Default cutoff [`StrategySelector`] uses to split `PreFilter` from
60/// `PostFilter`: a filter whose estimated selectivity is at or below this value
61/// is treated as narrow enough to pre-filter.
62///
63/// `0.5` means "pre-filter when the predicate is expected to drop at least half
64/// the corpus". It is a deliberately neutral default; tune it for a specific
65/// index with [`StrategySelector::with_prefilter_threshold`].
66pub const DEFAULT_PREFILTER_THRESHOLD: f64 = 0.5;
67
68/// Picks a concrete [`FilterStrategy`] for a validated filter from its
69/// estimated selectivity — the Tier-2, tunable counterpart to
70/// [`choose_strategy`].
71///
72/// The rule is simple and monotone: a **narrow** predicate (low
73/// [`estimate_selectivity`](crate::estimate_selectivity), at or below the
74/// threshold) resolves to [`FilterStrategy::PreFilter`], because evaluating it
75/// up front skips the distance computation for the rows it rejects; a **broad**
76/// predicate resolves to [`FilterStrategy::PostFilter`], because pre-filtering
77/// would materialise nearly the whole corpus for little gain. The selector
78/// never returns [`FilterStrategy::Auto`] (it is the thing that resolves it) or
79/// [`FilterStrategy::InFilter`] (which needs graph-traversal co-design).
80///
81/// The type is immutable: [`with_prefilter_threshold`](Self::with_prefilter_threshold)
82/// returns a new selector rather than mutating in place.
83///
84/// # Examples
85///
86/// ```
87/// use iqdb_filter::{FilterEvaluator, FilterStrategy, StrategySelector};
88/// use iqdb_types::{Filter, Value};
89///
90/// # fn main() -> iqdb_types::Result<()> {
91/// let selector = StrategySelector::new().with_prefilter_threshold(0.3);
92///
93/// let narrow = FilterEvaluator::new(Filter::eq("id", Value::Int(7)))?;
94/// let broad = FilterEvaluator::new(Filter::neq("id", Value::Int(7)))?;
95///
96/// assert_eq!(selector.choose(&narrow), FilterStrategy::PreFilter);
97/// assert_eq!(selector.choose(&broad), FilterStrategy::PostFilter);
98/// # Ok(())
99/// # }
100/// ```
101#[derive(Debug, Clone, Copy)]
102pub struct StrategySelector {
103    prefilter_threshold: f64,
104}
105
106impl Default for StrategySelector {
107    fn default() -> Self {
108        Self {
109            prefilter_threshold: DEFAULT_PREFILTER_THRESHOLD,
110        }
111    }
112}
113
114impl StrategySelector {
115    /// Creates a selector with the [`DEFAULT_PREFILTER_THRESHOLD`].
116    ///
117    /// # Examples
118    ///
119    /// ```
120    /// use iqdb_filter::{StrategySelector, DEFAULT_PREFILTER_THRESHOLD};
121    ///
122    /// let selector = StrategySelector::new();
123    /// assert_eq!(selector.prefilter_threshold(), DEFAULT_PREFILTER_THRESHOLD);
124    /// ```
125    #[must_use]
126    pub fn new() -> Self {
127        Self::default()
128    }
129
130    /// Returns a new selector that pre-filters when estimated selectivity is at
131    /// or below `threshold`.
132    ///
133    /// `threshold` is clamped to `[0.0, 1.0]`: `0.0` pre-filters only the most
134    /// extreme predicates (effectively always post-filter), `1.0` always
135    /// pre-filters.
136    ///
137    /// # Examples
138    ///
139    /// ```
140    /// use iqdb_filter::StrategySelector;
141    ///
142    /// let always_pre = StrategySelector::new().with_prefilter_threshold(1.0);
143    /// assert_eq!(always_pre.prefilter_threshold(), 1.0);
144    ///
145    /// // Out-of-range values are clamped, never panic.
146    /// let clamped = StrategySelector::new().with_prefilter_threshold(2.5);
147    /// assert_eq!(clamped.prefilter_threshold(), 1.0);
148    /// ```
149    #[must_use]
150    pub fn with_prefilter_threshold(self, threshold: f64) -> Self {
151        Self {
152            prefilter_threshold: threshold.clamp(0.0, 1.0),
153        }
154    }
155
156    /// The selectivity cutoff this selector uses.
157    #[must_use]
158    pub fn prefilter_threshold(&self) -> f64 {
159        self.prefilter_threshold
160    }
161
162    /// Resolves the strategy for `evaluator`'s filter.
163    ///
164    /// Returns [`FilterStrategy::PreFilter`] when the estimated selectivity is
165    /// at or below [`prefilter_threshold`](Self::prefilter_threshold), and
166    /// [`FilterStrategy::PostFilter`] otherwise.
167    ///
168    /// # Examples
169    ///
170    /// ```
171    /// use iqdb_filter::{FilterEvaluator, FilterStrategy, StrategySelector};
172    /// use iqdb_types::{Filter, Value};
173    ///
174    /// # fn main() -> iqdb_types::Result<()> {
175    /// let evaluator = FilterEvaluator::new(Filter::eq("k", Value::Int(1)))?;
176    /// assert_eq!(StrategySelector::new().choose(&evaluator), FilterStrategy::PreFilter);
177    /// # Ok(())
178    /// # }
179    /// ```
180    #[must_use]
181    pub fn choose(&self, evaluator: &crate::FilterEvaluator) -> FilterStrategy {
182        self.resolve(crate::estimate_selectivity(evaluator))
183    }
184
185    /// Resolves the strategy using the **index-backed** selectivity estimate
186    /// from `index` ([`MetadataIndex::estimate_selectivity`](crate::MetadataIndex::estimate_selectivity)),
187    /// which uses real posting counts where it can.
188    ///
189    /// Prefer this over [`choose`](Self::choose) when an index is available:
190    /// the count-based estimate is sharper than the structural one, so the
191    /// pre/post decision is better informed.
192    ///
193    /// # Examples
194    ///
195    /// ```
196    /// use iqdb_filter::{FilterEvaluator, FilterStrategy, MetadataIndex, StrategySelector};
197    /// use iqdb_types::{Filter, Metadata, Value};
198    ///
199    /// # fn main() -> iqdb_types::Result<()> {
200    /// // 1 of 4 rows matches: a genuinely narrow predicate the index can see.
201    /// let rows = [
202    ///     (0_usize, [("tier".to_string(), Value::Int(1))].into_iter().collect::<Metadata>()),
203    ///     (1, [("tier".to_string(), Value::Int(2))].into_iter().collect::<Metadata>()),
204    ///     (2, [("tier".to_string(), Value::Int(2))].into_iter().collect::<Metadata>()),
205    ///     (3, [("tier".to_string(), Value::Int(2))].into_iter().collect::<Metadata>()),
206    /// ];
207    /// let index = MetadataIndex::build(&["tier"], rows.iter().map(|(k, m)| (*k, Some(m))));
208    ///
209    /// let evaluator = FilterEvaluator::new(Filter::eq("tier", Value::Int(1)))?;
210    /// let selector = StrategySelector::new();
211    /// assert_eq!(selector.choose_with_index(&evaluator, &index), FilterStrategy::PreFilter);
212    /// # Ok(())
213    /// # }
214    /// ```
215    #[must_use]
216    pub fn choose_with_index<K>(
217        &self,
218        evaluator: &crate::FilterEvaluator,
219        index: &crate::MetadataIndex<K>,
220    ) -> FilterStrategy
221    where
222        K: Clone + Eq + core::hash::Hash,
223    {
224        self.resolve(index.estimate_selectivity(evaluator))
225    }
226
227    fn resolve(&self, selectivity: f64) -> FilterStrategy {
228        if selectivity <= self.prefilter_threshold {
229            FilterStrategy::PreFilter
230        } else {
231            FilterStrategy::PostFilter
232        }
233    }
234}
235
236/// Picks a concrete [`FilterStrategy`] for a validated filter using the
237/// [`DEFAULT_PREFILTER_THRESHOLD`] — the Tier-1 shortcut for
238/// [`StrategySelector::new().choose(..)`](StrategySelector::choose).
239///
240/// Returns [`FilterStrategy::PreFilter`] for narrow predicates and
241/// [`FilterStrategy::PostFilter`] for broad ones; never `Auto` or `InFilter`.
242///
243/// # Examples
244///
245/// ```
246/// use iqdb_filter::{FilterEvaluator, FilterStrategy, choose_strategy};
247/// use iqdb_types::{Filter, Value};
248///
249/// # fn main() -> iqdb_types::Result<()> {
250/// let evaluator = FilterEvaluator::new(Filter::eq("k", Value::Int(1)))?;
251/// assert_eq!(choose_strategy(&evaluator), FilterStrategy::PreFilter);
252/// # Ok(())
253/// # }
254/// ```
255#[must_use]
256pub fn choose_strategy(evaluator: &crate::FilterEvaluator) -> FilterStrategy {
257    StrategySelector::new().choose(evaluator)
258}
259
260#[cfg(test)]
261mod tests {
262    #![allow(clippy::unwrap_used)]
263
264    use super::*;
265    use crate::FilterEvaluator;
266    use iqdb_types::{Filter, Value};
267
268    #[test]
269    fn variants_compare_by_value() {
270        assert_eq!(FilterStrategy::PreFilter, FilterStrategy::PreFilter);
271        assert_ne!(FilterStrategy::PreFilter, FilterStrategy::Auto);
272    }
273
274    #[test]
275    fn variants_are_copy() {
276        let a = FilterStrategy::InFilter;
277        let b = a;
278        assert_eq!(a, b);
279    }
280
281    #[test]
282    fn default_threshold_is_the_constant() {
283        assert_eq!(
284            StrategySelector::new().prefilter_threshold(),
285            DEFAULT_PREFILTER_THRESHOLD
286        );
287    }
288
289    #[test]
290    fn threshold_is_clamped() {
291        assert_eq!(
292            StrategySelector::new()
293                .with_prefilter_threshold(-1.0)
294                .prefilter_threshold(),
295            0.0
296        );
297        assert_eq!(
298            StrategySelector::new()
299                .with_prefilter_threshold(9.0)
300                .prefilter_threshold(),
301            1.0
302        );
303    }
304
305    #[test]
306    fn narrow_prefilters_broad_postfilters() {
307        let narrow = FilterEvaluator::new(Filter::eq("k", Value::Int(1))).unwrap();
308        let broad = FilterEvaluator::new(Filter::neq("k", Value::Int(1))).unwrap();
309        assert_eq!(choose_strategy(&narrow), FilterStrategy::PreFilter);
310        assert_eq!(choose_strategy(&broad), FilterStrategy::PostFilter);
311    }
312
313    #[test]
314    fn threshold_extremes_force_one_strategy() {
315        let broad = FilterEvaluator::new(Filter::neq("k", Value::Int(1))).unwrap();
316        let always_pre = StrategySelector::new().with_prefilter_threshold(1.0);
317        let always_post = StrategySelector::new().with_prefilter_threshold(0.0);
318        assert_eq!(always_pre.choose(&broad), FilterStrategy::PreFilter);
319        assert_eq!(always_post.choose(&broad), FilterStrategy::PostFilter);
320    }
321}