Skip to main content

iqdb_filter/
strategy.rs

1//! [`FilterStrategy`] — vocabulary for how an index applies a [`Filter`].
2//!
3//! This release ships the vocabulary only. The four named strategies describe
4//! the shapes a future selector will choose between; the selector itself, and
5//! the `MetadataIndex` / cardinality machinery it needs, land in a later
6//! release (see `dev/ROADMAP.md`). Today every consumer applies pre-filtering
7//! through [`crate::FilterEvaluator`] and ignores this enum — the enum exists
8//! so `MetadataIndex`-driven indexes can adopt it without a breaking change.
9//!
10//! [`Filter`]: iqdb_types::Filter
11
12/// How an index plans to apply a metadata [`Filter`](iqdb_types::Filter)
13/// relative to its distance scan.
14///
15/// `#[non_exhaustive]` because selection logic will add variants (and likely
16/// adapter parameters) as approximate indexes start honouring filters.
17/// Callers should not exhaustively match on this enum and should not rely on
18/// the variant set being closed.
19///
20/// # Variants in plain terms
21///
22/// - [`FilterStrategy::PreFilter`] — apply the predicate **before** the
23///   distance computation; only matching candidates enter the scan. Cheap
24///   when the predicate is selective, wasteful when it is broad.
25/// - [`FilterStrategy::PostFilter`] — run the distance scan over every
26///   candidate, then drop hits that fail the predicate. Cheap when the
27///   predicate is broad, defeats top-`k` truncation when the predicate is
28///   selective (you may have to scan far past `k` to refill the result set).
29/// - [`FilterStrategy::InFilter`] — interleave predicate evaluation with the
30///   distance walk so a graph index can prune branches it knows can't
31///   produce surviving candidates. Requires `MetadataIndex` co-design.
32/// - [`FilterStrategy::Auto`] — let the index pick from the above based on
33///   estimated selectivity. Requires the selectivity machinery that doesn't
34///   exist yet; documented here so future configs can name it.
35///
36/// # Examples
37///
38/// ```
39/// use iqdb_filter::FilterStrategy;
40///
41/// let chosen = FilterStrategy::PreFilter;
42/// // Callers do not branch on this yet; the enum is vocabulary for later.
43/// assert_ne!(chosen, FilterStrategy::PostFilter);
44/// ```
45#[non_exhaustive]
46#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
47pub enum FilterStrategy {
48    /// Apply the predicate before the distance scan.
49    PreFilter,
50    /// Apply the predicate after the distance scan.
51    PostFilter,
52    /// Interleave the predicate with a graph traversal.
53    InFilter,
54    /// Let the index pick the strategy based on estimated selectivity.
55    Auto,
56}
57
58/// Default cutoff [`StrategySelector`] uses to split `PreFilter` from
59/// `PostFilter`: a filter whose estimated selectivity is at or below this value
60/// is treated as narrow enough to pre-filter.
61///
62/// `0.5` means "pre-filter when the predicate is expected to drop at least half
63/// the corpus". It is a deliberately neutral default; tune it for a specific
64/// index with [`StrategySelector::with_prefilter_threshold`].
65pub const DEFAULT_PREFILTER_THRESHOLD: f64 = 0.5;
66
67/// Picks a concrete [`FilterStrategy`] for a validated filter from its
68/// estimated selectivity — the Tier-2, tunable counterpart to
69/// [`choose_strategy`].
70///
71/// The rule is simple and monotone: a **narrow** predicate (low
72/// [`estimate_selectivity`](crate::estimate_selectivity), at or below the
73/// threshold) resolves to [`FilterStrategy::PreFilter`], because evaluating it
74/// up front skips the distance computation for the rows it rejects; a **broad**
75/// predicate resolves to [`FilterStrategy::PostFilter`], because pre-filtering
76/// would materialise nearly the whole corpus for little gain. The selector
77/// never returns [`FilterStrategy::Auto`] (it is the thing that resolves it) or
78/// [`FilterStrategy::InFilter`] (which needs graph-traversal co-design).
79///
80/// The type is immutable: [`with_prefilter_threshold`](Self::with_prefilter_threshold)
81/// returns a new selector rather than mutating in place.
82///
83/// # Examples
84///
85/// ```
86/// use iqdb_filter::{FilterEvaluator, FilterStrategy, StrategySelector};
87/// use iqdb_types::{Filter, Value};
88///
89/// # fn main() -> iqdb_types::Result<()> {
90/// let selector = StrategySelector::new().with_prefilter_threshold(0.3);
91///
92/// let narrow = FilterEvaluator::new(Filter::eq("id", Value::Int(7)))?;
93/// let broad = FilterEvaluator::new(Filter::neq("id", Value::Int(7)))?;
94///
95/// assert_eq!(selector.choose(&narrow), FilterStrategy::PreFilter);
96/// assert_eq!(selector.choose(&broad), FilterStrategy::PostFilter);
97/// # Ok(())
98/// # }
99/// ```
100#[derive(Debug, Clone, Copy)]
101pub struct StrategySelector {
102    prefilter_threshold: f64,
103}
104
105impl Default for StrategySelector {
106    fn default() -> Self {
107        Self {
108            prefilter_threshold: DEFAULT_PREFILTER_THRESHOLD,
109        }
110    }
111}
112
113impl StrategySelector {
114    /// Creates a selector with the [`DEFAULT_PREFILTER_THRESHOLD`].
115    ///
116    /// # Examples
117    ///
118    /// ```
119    /// use iqdb_filter::{StrategySelector, DEFAULT_PREFILTER_THRESHOLD};
120    ///
121    /// let selector = StrategySelector::new();
122    /// assert_eq!(selector.prefilter_threshold(), DEFAULT_PREFILTER_THRESHOLD);
123    /// ```
124    #[must_use]
125    pub fn new() -> Self {
126        Self::default()
127    }
128
129    /// Returns a new selector that pre-filters when estimated selectivity is at
130    /// or below `threshold`.
131    ///
132    /// `threshold` is clamped to `[0.0, 1.0]`: `0.0` pre-filters only the most
133    /// extreme predicates (effectively always post-filter), `1.0` always
134    /// pre-filters.
135    ///
136    /// # Examples
137    ///
138    /// ```
139    /// use iqdb_filter::StrategySelector;
140    ///
141    /// let always_pre = StrategySelector::new().with_prefilter_threshold(1.0);
142    /// assert_eq!(always_pre.prefilter_threshold(), 1.0);
143    ///
144    /// // Out-of-range values are clamped, never panic.
145    /// let clamped = StrategySelector::new().with_prefilter_threshold(2.5);
146    /// assert_eq!(clamped.prefilter_threshold(), 1.0);
147    /// ```
148    #[must_use]
149    pub fn with_prefilter_threshold(self, threshold: f64) -> Self {
150        Self {
151            prefilter_threshold: threshold.clamp(0.0, 1.0),
152        }
153    }
154
155    /// The selectivity cutoff this selector uses.
156    #[must_use]
157    pub fn prefilter_threshold(&self) -> f64 {
158        self.prefilter_threshold
159    }
160
161    /// Resolves the strategy for `evaluator`'s filter.
162    ///
163    /// Returns [`FilterStrategy::PreFilter`] when the estimated selectivity is
164    /// at or below [`prefilter_threshold`](Self::prefilter_threshold), and
165    /// [`FilterStrategy::PostFilter`] otherwise.
166    ///
167    /// # Examples
168    ///
169    /// ```
170    /// use iqdb_filter::{FilterEvaluator, FilterStrategy, StrategySelector};
171    /// use iqdb_types::{Filter, Value};
172    ///
173    /// # fn main() -> iqdb_types::Result<()> {
174    /// let evaluator = FilterEvaluator::new(Filter::eq("k", Value::Int(1)))?;
175    /// assert_eq!(StrategySelector::new().choose(&evaluator), FilterStrategy::PreFilter);
176    /// # Ok(())
177    /// # }
178    /// ```
179    #[must_use]
180    pub fn choose(&self, evaluator: &crate::FilterEvaluator) -> FilterStrategy {
181        self.resolve(crate::estimate_selectivity(evaluator))
182    }
183
184    /// Resolves the strategy using the **index-backed** selectivity estimate
185    /// from `index` ([`MetadataIndex::estimate_selectivity`](crate::MetadataIndex::estimate_selectivity)),
186    /// which uses real posting counts where it can.
187    ///
188    /// Prefer this over [`choose`](Self::choose) when an index is available:
189    /// the count-based estimate is sharper than the structural one, so the
190    /// pre/post decision is better informed.
191    ///
192    /// # Examples
193    ///
194    /// ```
195    /// use iqdb_filter::{FilterEvaluator, FilterStrategy, MetadataIndex, StrategySelector};
196    /// use iqdb_types::{Filter, Metadata, Value};
197    ///
198    /// # fn main() -> iqdb_types::Result<()> {
199    /// // 1 of 4 rows matches: a genuinely narrow predicate the index can see.
200    /// let rows = [
201    ///     (0_usize, [("tier".to_string(), Value::Int(1))].into_iter().collect::<Metadata>()),
202    ///     (1, [("tier".to_string(), Value::Int(2))].into_iter().collect::<Metadata>()),
203    ///     (2, [("tier".to_string(), Value::Int(2))].into_iter().collect::<Metadata>()),
204    ///     (3, [("tier".to_string(), Value::Int(2))].into_iter().collect::<Metadata>()),
205    /// ];
206    /// let index = MetadataIndex::build(&["tier"], rows.iter().map(|(k, m)| (*k, Some(m))));
207    ///
208    /// let evaluator = FilterEvaluator::new(Filter::eq("tier", Value::Int(1)))?;
209    /// let selector = StrategySelector::new();
210    /// assert_eq!(selector.choose_with_index(&evaluator, &index), FilterStrategy::PreFilter);
211    /// # Ok(())
212    /// # }
213    /// ```
214    #[must_use]
215    pub fn choose_with_index<K>(
216        &self,
217        evaluator: &crate::FilterEvaluator,
218        index: &crate::MetadataIndex<K>,
219    ) -> FilterStrategy
220    where
221        K: Clone + Eq + core::hash::Hash,
222    {
223        self.resolve(index.estimate_selectivity(evaluator))
224    }
225
226    fn resolve(&self, selectivity: f64) -> FilterStrategy {
227        if selectivity <= self.prefilter_threshold {
228            FilterStrategy::PreFilter
229        } else {
230            FilterStrategy::PostFilter
231        }
232    }
233}
234
235/// Picks a concrete [`FilterStrategy`] for a validated filter using the
236/// [`DEFAULT_PREFILTER_THRESHOLD`] — the Tier-1 shortcut for
237/// [`StrategySelector::new().choose(..)`](StrategySelector::choose).
238///
239/// Returns [`FilterStrategy::PreFilter`] for narrow predicates and
240/// [`FilterStrategy::PostFilter`] for broad ones; never `Auto` or `InFilter`.
241///
242/// # Examples
243///
244/// ```
245/// use iqdb_filter::{FilterEvaluator, FilterStrategy, choose_strategy};
246/// use iqdb_types::{Filter, Value};
247///
248/// # fn main() -> iqdb_types::Result<()> {
249/// let evaluator = FilterEvaluator::new(Filter::eq("k", Value::Int(1)))?;
250/// assert_eq!(choose_strategy(&evaluator), FilterStrategy::PreFilter);
251/// # Ok(())
252/// # }
253/// ```
254#[must_use]
255pub fn choose_strategy(evaluator: &crate::FilterEvaluator) -> FilterStrategy {
256    StrategySelector::new().choose(evaluator)
257}
258
259#[cfg(test)]
260mod tests {
261    #![allow(clippy::unwrap_used)]
262
263    use super::*;
264    use crate::FilterEvaluator;
265    use iqdb_types::{Filter, Value};
266
267    #[test]
268    fn variants_compare_by_value() {
269        assert_eq!(FilterStrategy::PreFilter, FilterStrategy::PreFilter);
270        assert_ne!(FilterStrategy::PreFilter, FilterStrategy::Auto);
271    }
272
273    #[test]
274    fn variants_are_copy() {
275        let a = FilterStrategy::InFilter;
276        let b = a;
277        assert_eq!(a, b);
278    }
279
280    #[test]
281    fn default_threshold_is_the_constant() {
282        assert_eq!(
283            StrategySelector::new().prefilter_threshold(),
284            DEFAULT_PREFILTER_THRESHOLD
285        );
286    }
287
288    #[test]
289    fn threshold_is_clamped() {
290        assert_eq!(
291            StrategySelector::new()
292                .with_prefilter_threshold(-1.0)
293                .prefilter_threshold(),
294            0.0
295        );
296        assert_eq!(
297            StrategySelector::new()
298                .with_prefilter_threshold(9.0)
299                .prefilter_threshold(),
300            1.0
301        );
302    }
303
304    #[test]
305    fn narrow_prefilters_broad_postfilters() {
306        let narrow = FilterEvaluator::new(Filter::eq("k", Value::Int(1))).unwrap();
307        let broad = FilterEvaluator::new(Filter::neq("k", Value::Int(1))).unwrap();
308        assert_eq!(choose_strategy(&narrow), FilterStrategy::PreFilter);
309        assert_eq!(choose_strategy(&broad), FilterStrategy::PostFilter);
310    }
311
312    #[test]
313    fn threshold_extremes_force_one_strategy() {
314        let broad = FilterEvaluator::new(Filter::neq("k", Value::Int(1))).unwrap();
315        let always_pre = StrategySelector::new().with_prefilter_threshold(1.0);
316        let always_post = StrategySelector::new().with_prefilter_threshold(0.0);
317        assert_eq!(always_pre.choose(&broad), FilterStrategy::PreFilter);
318        assert_eq!(always_post.choose(&broad), FilterStrategy::PostFilter);
319    }
320}