iqdb_filter/strategy.rs
1//! [`FilterStrategy`] — vocabulary for how an index applies a [`Filter`].
2//!
3//! This release ships the vocabulary only. The four named strategies describe
4//! the shapes a future selector will choose between; the selector itself, and
5//! the `MetadataIndex` / cardinality machinery it needs, land in a later
6//! release (see `dev/ROADMAP.md`). Today every consumer applies pre-filtering
7//! through [`crate::FilterEvaluator`] and ignores this enum — the enum exists
8//! so `MetadataIndex`-driven indexes can adopt it without a breaking change.
9//!
10//! [`Filter`]: iqdb_types::Filter
11
12/// How an index plans to apply a metadata [`Filter`](iqdb_types::Filter)
13/// relative to its distance scan.
14///
15/// `#[non_exhaustive]` because selection logic will add variants (and likely
16/// adapter parameters) as approximate indexes start honouring filters.
17/// Callers should not exhaustively match on this enum and should not rely on
18/// the variant set being closed.
19///
20/// # Variants in plain terms
21///
22/// - [`FilterStrategy::PreFilter`] — apply the predicate **before** the
23/// distance computation; only matching candidates enter the scan. Cheap
24/// when the predicate is selective, wasteful when it is broad.
25/// - [`FilterStrategy::PostFilter`] — run the distance scan over every
26/// candidate, then drop hits that fail the predicate. Cheap when the
27/// predicate is broad, defeats top-`k` truncation when the predicate is
28/// selective (you may have to scan far past `k` to refill the result set).
29/// - [`FilterStrategy::InFilter`] — interleave predicate evaluation with the
30/// distance walk so a graph index can prune branches it knows can't
31/// produce surviving candidates. Requires `MetadataIndex` co-design.
32/// - [`FilterStrategy::Auto`] — let the index pick from the above based on
33/// estimated selectivity. Requires the selectivity machinery that doesn't
34/// exist yet; documented here so future configs can name it.
35///
36/// # Examples
37///
38/// ```
39/// use iqdb_filter::FilterStrategy;
40///
41/// let chosen = FilterStrategy::PreFilter;
42/// // Callers do not branch on this yet; the enum is vocabulary for later.
43/// assert_ne!(chosen, FilterStrategy::PostFilter);
44/// ```
45#[non_exhaustive]
46#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
47pub enum FilterStrategy {
48 /// Apply the predicate before the distance scan.
49 PreFilter,
50 /// Apply the predicate after the distance scan.
51 PostFilter,
52 /// Interleave the predicate with a graph traversal.
53 InFilter,
54 /// Let the index pick the strategy based on estimated selectivity.
55 Auto,
56}
57
58/// Default cutoff [`StrategySelector`] uses to split `PreFilter` from
59/// `PostFilter`: a filter whose estimated selectivity is at or below this value
60/// is treated as narrow enough to pre-filter.
61///
62/// `0.5` means "pre-filter when the predicate is expected to drop at least half
63/// the corpus". It is a deliberately neutral default; tune it for a specific
64/// index with [`StrategySelector::with_prefilter_threshold`].
65pub const DEFAULT_PREFILTER_THRESHOLD: f64 = 0.5;
66
67/// Picks a concrete [`FilterStrategy`] for a validated filter from its
68/// estimated selectivity — the Tier-2, tunable counterpart to
69/// [`choose_strategy`].
70///
71/// The rule is simple and monotone: a **narrow** predicate (low
72/// [`estimate_selectivity`](crate::estimate_selectivity), at or below the
73/// threshold) resolves to [`FilterStrategy::PreFilter`], because evaluating it
74/// up front skips the distance computation for the rows it rejects; a **broad**
75/// predicate resolves to [`FilterStrategy::PostFilter`], because pre-filtering
76/// would materialise nearly the whole corpus for little gain. The selector
77/// never returns [`FilterStrategy::Auto`] (it is the thing that resolves it) or
78/// [`FilterStrategy::InFilter`] (which needs graph-traversal co-design).
79///
80/// The type is immutable: [`with_prefilter_threshold`](Self::with_prefilter_threshold)
81/// returns a new selector rather than mutating in place.
82///
83/// # Examples
84///
85/// ```
86/// use iqdb_filter::{FilterEvaluator, FilterStrategy, StrategySelector};
87/// use iqdb_types::{Filter, Value};
88///
89/// # fn main() -> iqdb_types::Result<()> {
90/// let selector = StrategySelector::new().with_prefilter_threshold(0.3);
91///
92/// let narrow = FilterEvaluator::new(Filter::eq("id", Value::Int(7)))?;
93/// let broad = FilterEvaluator::new(Filter::neq("id", Value::Int(7)))?;
94///
95/// assert_eq!(selector.choose(&narrow), FilterStrategy::PreFilter);
96/// assert_eq!(selector.choose(&broad), FilterStrategy::PostFilter);
97/// # Ok(())
98/// # }
99/// ```
100#[derive(Debug, Clone, Copy)]
101pub struct StrategySelector {
102 prefilter_threshold: f64,
103}
104
105impl Default for StrategySelector {
106 fn default() -> Self {
107 Self {
108 prefilter_threshold: DEFAULT_PREFILTER_THRESHOLD,
109 }
110 }
111}
112
113impl StrategySelector {
114 /// Creates a selector with the [`DEFAULT_PREFILTER_THRESHOLD`].
115 ///
116 /// # Examples
117 ///
118 /// ```
119 /// use iqdb_filter::{StrategySelector, DEFAULT_PREFILTER_THRESHOLD};
120 ///
121 /// let selector = StrategySelector::new();
122 /// assert_eq!(selector.prefilter_threshold(), DEFAULT_PREFILTER_THRESHOLD);
123 /// ```
124 #[must_use]
125 pub fn new() -> Self {
126 Self::default()
127 }
128
129 /// Returns a new selector that pre-filters when estimated selectivity is at
130 /// or below `threshold`.
131 ///
132 /// `threshold` is clamped to `[0.0, 1.0]`: `0.0` pre-filters only the most
133 /// extreme predicates (effectively always post-filter), `1.0` always
134 /// pre-filters.
135 ///
136 /// # Examples
137 ///
138 /// ```
139 /// use iqdb_filter::StrategySelector;
140 ///
141 /// let always_pre = StrategySelector::new().with_prefilter_threshold(1.0);
142 /// assert_eq!(always_pre.prefilter_threshold(), 1.0);
143 ///
144 /// // Out-of-range values are clamped, never panic.
145 /// let clamped = StrategySelector::new().with_prefilter_threshold(2.5);
146 /// assert_eq!(clamped.prefilter_threshold(), 1.0);
147 /// ```
148 #[must_use]
149 pub fn with_prefilter_threshold(self, threshold: f64) -> Self {
150 Self {
151 prefilter_threshold: threshold.clamp(0.0, 1.0),
152 }
153 }
154
155 /// The selectivity cutoff this selector uses.
156 #[must_use]
157 pub fn prefilter_threshold(&self) -> f64 {
158 self.prefilter_threshold
159 }
160
161 /// Resolves the strategy for `evaluator`'s filter.
162 ///
163 /// Returns [`FilterStrategy::PreFilter`] when the estimated selectivity is
164 /// at or below [`prefilter_threshold`](Self::prefilter_threshold), and
165 /// [`FilterStrategy::PostFilter`] otherwise.
166 ///
167 /// # Examples
168 ///
169 /// ```
170 /// use iqdb_filter::{FilterEvaluator, FilterStrategy, StrategySelector};
171 /// use iqdb_types::{Filter, Value};
172 ///
173 /// # fn main() -> iqdb_types::Result<()> {
174 /// let evaluator = FilterEvaluator::new(Filter::eq("k", Value::Int(1)))?;
175 /// assert_eq!(StrategySelector::new().choose(&evaluator), FilterStrategy::PreFilter);
176 /// # Ok(())
177 /// # }
178 /// ```
179 #[must_use]
180 pub fn choose(&self, evaluator: &crate::FilterEvaluator) -> FilterStrategy {
181 if crate::estimate_selectivity(evaluator) <= self.prefilter_threshold {
182 FilterStrategy::PreFilter
183 } else {
184 FilterStrategy::PostFilter
185 }
186 }
187}
188
189/// Picks a concrete [`FilterStrategy`] for a validated filter using the
190/// [`DEFAULT_PREFILTER_THRESHOLD`] — the Tier-1 shortcut for
191/// [`StrategySelector::new().choose(..)`](StrategySelector::choose).
192///
193/// Returns [`FilterStrategy::PreFilter`] for narrow predicates and
194/// [`FilterStrategy::PostFilter`] for broad ones; never `Auto` or `InFilter`.
195///
196/// # Examples
197///
198/// ```
199/// use iqdb_filter::{FilterEvaluator, FilterStrategy, choose_strategy};
200/// use iqdb_types::{Filter, Value};
201///
202/// # fn main() -> iqdb_types::Result<()> {
203/// let evaluator = FilterEvaluator::new(Filter::eq("k", Value::Int(1)))?;
204/// assert_eq!(choose_strategy(&evaluator), FilterStrategy::PreFilter);
205/// # Ok(())
206/// # }
207/// ```
208#[must_use]
209pub fn choose_strategy(evaluator: &crate::FilterEvaluator) -> FilterStrategy {
210 StrategySelector::new().choose(evaluator)
211}
212
213#[cfg(test)]
214mod tests {
215 #![allow(clippy::unwrap_used)]
216
217 use super::*;
218 use crate::FilterEvaluator;
219 use iqdb_types::{Filter, Value};
220
221 #[test]
222 fn variants_compare_by_value() {
223 assert_eq!(FilterStrategy::PreFilter, FilterStrategy::PreFilter);
224 assert_ne!(FilterStrategy::PreFilter, FilterStrategy::Auto);
225 }
226
227 #[test]
228 fn variants_are_copy() {
229 let a = FilterStrategy::InFilter;
230 let b = a;
231 assert_eq!(a, b);
232 }
233
234 #[test]
235 fn default_threshold_is_the_constant() {
236 assert_eq!(
237 StrategySelector::new().prefilter_threshold(),
238 DEFAULT_PREFILTER_THRESHOLD
239 );
240 }
241
242 #[test]
243 fn threshold_is_clamped() {
244 assert_eq!(
245 StrategySelector::new()
246 .with_prefilter_threshold(-1.0)
247 .prefilter_threshold(),
248 0.0
249 );
250 assert_eq!(
251 StrategySelector::new()
252 .with_prefilter_threshold(9.0)
253 .prefilter_threshold(),
254 1.0
255 );
256 }
257
258 #[test]
259 fn narrow_prefilters_broad_postfilters() {
260 let narrow = FilterEvaluator::new(Filter::eq("k", Value::Int(1))).unwrap();
261 let broad = FilterEvaluator::new(Filter::neq("k", Value::Int(1))).unwrap();
262 assert_eq!(choose_strategy(&narrow), FilterStrategy::PreFilter);
263 assert_eq!(choose_strategy(&broad), FilterStrategy::PostFilter);
264 }
265
266 #[test]
267 fn threshold_extremes_force_one_strategy() {
268 let broad = FilterEvaluator::new(Filter::neq("k", Value::Int(1))).unwrap();
269 let always_pre = StrategySelector::new().with_prefilter_threshold(1.0);
270 let always_post = StrategySelector::new().with_prefilter_threshold(0.0);
271 assert_eq!(always_pre.choose(&broad), FilterStrategy::PreFilter);
272 assert_eq!(always_post.choose(&broad), FilterStrategy::PostFilter);
273 }
274}