iqdb_filter/strategy.rs
1//! [`FilterStrategy`] — how an index applies a [`Filter`], and the selector
2//! that chooses it.
3//!
4//! [`FilterStrategy`] names the four shapes; [`StrategySelector`] and the
5//! Tier-1 [`choose_strategy`] resolve a concrete `PreFilter` / `PostFilter`
6//! from a selectivity estimate. `PreFilter` and `PostFilter` are realised by
7//! [`crate::FilterEvaluator::prefilter`] / [`crate::FilterEvaluator::postfilter`];
8//! `InFilter` (graph-traversal pushdown) remains reserved for a future
9//! approximate-index consumer (see `dev/ROADMAP.md`).
10//!
11//! [`Filter`]: iqdb_types::Filter
12
13/// How an index plans to apply a metadata [`Filter`](iqdb_types::Filter)
14/// relative to its distance scan.
15///
16/// `#[non_exhaustive]` because selection logic will add variants (and likely
17/// adapter parameters) as approximate indexes start honouring filters.
18/// Callers should not exhaustively match on this enum and should not rely on
19/// the variant set being closed.
20///
21/// # Variants in plain terms
22///
23/// - [`FilterStrategy::PreFilter`] — apply the predicate **before** the
24/// distance computation; only matching candidates enter the scan. Cheap
25/// when the predicate is selective, wasteful when it is broad.
26/// - [`FilterStrategy::PostFilter`] — run the distance scan over every
27/// candidate, then drop hits that fail the predicate. Cheap when the
28/// predicate is broad, defeats top-`k` truncation when the predicate is
29/// selective (you may have to scan far past `k` to refill the result set).
30/// - [`FilterStrategy::InFilter`] — interleave predicate evaluation with the
31/// distance walk so a graph index can prune branches it knows can't
32/// produce surviving candidates. Requires `MetadataIndex` co-design.
33/// - [`FilterStrategy::Auto`] — let the index pick from the above based on
34/// estimated selectivity. Requires the selectivity machinery that doesn't
35/// exist yet; documented here so future configs can name it.
36///
37/// # Examples
38///
39/// ```
40/// use iqdb_filter::FilterStrategy;
41///
42/// let chosen = FilterStrategy::PreFilter;
43/// // Callers do not branch on this yet; the enum is vocabulary for later.
44/// assert_ne!(chosen, FilterStrategy::PostFilter);
45/// ```
46#[non_exhaustive]
47#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
48pub enum FilterStrategy {
49 /// Apply the predicate before the distance scan.
50 PreFilter,
51 /// Apply the predicate after the distance scan.
52 PostFilter,
53 /// Interleave the predicate with a graph traversal.
54 InFilter,
55 /// Let the index pick the strategy based on estimated selectivity.
56 Auto,
57}
58
59/// Default cutoff [`StrategySelector`] uses to split `PreFilter` from
60/// `PostFilter`: a filter whose estimated selectivity is at or below this value
61/// is treated as narrow enough to pre-filter.
62///
63/// `0.5` means "pre-filter when the predicate is expected to drop at least half
64/// the corpus". It is a deliberately neutral default; tune it for a specific
65/// index with [`StrategySelector::with_prefilter_threshold`].
66pub const DEFAULT_PREFILTER_THRESHOLD: f64 = 0.5;
67
68/// Picks a concrete [`FilterStrategy`] for a validated filter from its
69/// estimated selectivity — the Tier-2, tunable counterpart to
70/// [`choose_strategy`].
71///
72/// The rule is simple and monotone: a **narrow** predicate (low
73/// [`estimate_selectivity`](crate::estimate_selectivity), at or below the
74/// threshold) resolves to [`FilterStrategy::PreFilter`], because evaluating it
75/// up front skips the distance computation for the rows it rejects; a **broad**
76/// predicate resolves to [`FilterStrategy::PostFilter`], because pre-filtering
77/// would materialise nearly the whole corpus for little gain. The selector
78/// never returns [`FilterStrategy::Auto`] (it is the thing that resolves it) or
79/// [`FilterStrategy::InFilter`] (which needs graph-traversal co-design).
80///
81/// The type is immutable: [`with_prefilter_threshold`](Self::with_prefilter_threshold)
82/// returns a new selector rather than mutating in place.
83///
84/// # Examples
85///
86/// ```
87/// use iqdb_filter::{FilterEvaluator, FilterStrategy, StrategySelector};
88/// use iqdb_types::{Filter, Value};
89///
90/// # fn main() -> iqdb_types::Result<()> {
91/// let selector = StrategySelector::new().with_prefilter_threshold(0.3);
92///
93/// let narrow = FilterEvaluator::new(Filter::eq("id", Value::Int(7)))?;
94/// let broad = FilterEvaluator::new(Filter::neq("id", Value::Int(7)))?;
95///
96/// assert_eq!(selector.choose(&narrow), FilterStrategy::PreFilter);
97/// assert_eq!(selector.choose(&broad), FilterStrategy::PostFilter);
98/// # Ok(())
99/// # }
100/// ```
101#[derive(Debug, Clone, Copy)]
102pub struct StrategySelector {
103 prefilter_threshold: f64,
104}
105
106impl Default for StrategySelector {
107 fn default() -> Self {
108 Self {
109 prefilter_threshold: DEFAULT_PREFILTER_THRESHOLD,
110 }
111 }
112}
113
114impl StrategySelector {
115 /// Creates a selector with the [`DEFAULT_PREFILTER_THRESHOLD`].
116 ///
117 /// # Examples
118 ///
119 /// ```
120 /// use iqdb_filter::{StrategySelector, DEFAULT_PREFILTER_THRESHOLD};
121 ///
122 /// let selector = StrategySelector::new();
123 /// assert_eq!(selector.prefilter_threshold(), DEFAULT_PREFILTER_THRESHOLD);
124 /// ```
125 #[must_use]
126 pub fn new() -> Self {
127 Self::default()
128 }
129
130 /// Returns a new selector that pre-filters when estimated selectivity is at
131 /// or below `threshold`.
132 ///
133 /// `threshold` is clamped to `[0.0, 1.0]`: `0.0` pre-filters only the most
134 /// extreme predicates (effectively always post-filter), `1.0` always
135 /// pre-filters.
136 ///
137 /// # Examples
138 ///
139 /// ```
140 /// use iqdb_filter::StrategySelector;
141 ///
142 /// let always_pre = StrategySelector::new().with_prefilter_threshold(1.0);
143 /// assert_eq!(always_pre.prefilter_threshold(), 1.0);
144 ///
145 /// // Out-of-range values are clamped, never panic.
146 /// let clamped = StrategySelector::new().with_prefilter_threshold(2.5);
147 /// assert_eq!(clamped.prefilter_threshold(), 1.0);
148 /// ```
149 #[must_use]
150 pub fn with_prefilter_threshold(self, threshold: f64) -> Self {
151 Self {
152 prefilter_threshold: threshold.clamp(0.0, 1.0),
153 }
154 }
155
156 /// The selectivity cutoff this selector uses.
157 #[must_use]
158 pub fn prefilter_threshold(&self) -> f64 {
159 self.prefilter_threshold
160 }
161
162 /// Resolves the strategy for `evaluator`'s filter.
163 ///
164 /// Returns [`FilterStrategy::PreFilter`] when the estimated selectivity is
165 /// at or below [`prefilter_threshold`](Self::prefilter_threshold), and
166 /// [`FilterStrategy::PostFilter`] otherwise.
167 ///
168 /// # Examples
169 ///
170 /// ```
171 /// use iqdb_filter::{FilterEvaluator, FilterStrategy, StrategySelector};
172 /// use iqdb_types::{Filter, Value};
173 ///
174 /// # fn main() -> iqdb_types::Result<()> {
175 /// let evaluator = FilterEvaluator::new(Filter::eq("k", Value::Int(1)))?;
176 /// assert_eq!(StrategySelector::new().choose(&evaluator), FilterStrategy::PreFilter);
177 /// # Ok(())
178 /// # }
179 /// ```
180 #[must_use]
181 pub fn choose(&self, evaluator: &crate::FilterEvaluator) -> FilterStrategy {
182 self.resolve(crate::estimate_selectivity(evaluator))
183 }
184
185 /// Resolves the strategy using the **index-backed** selectivity estimate
186 /// from `index` ([`MetadataIndex::estimate_selectivity`](crate::MetadataIndex::estimate_selectivity)),
187 /// which uses real posting counts where it can.
188 ///
189 /// Prefer this over [`choose`](Self::choose) when an index is available:
190 /// the count-based estimate is sharper than the structural one, so the
191 /// pre/post decision is better informed.
192 ///
193 /// # Examples
194 ///
195 /// ```
196 /// use iqdb_filter::{FilterEvaluator, FilterStrategy, MetadataIndex, StrategySelector};
197 /// use iqdb_types::{Filter, Metadata, Value};
198 ///
199 /// # fn main() -> iqdb_types::Result<()> {
200 /// // 1 of 4 rows matches: a genuinely narrow predicate the index can see.
201 /// let rows = [
202 /// (0_usize, [("tier".to_string(), Value::Int(1))].into_iter().collect::<Metadata>()),
203 /// (1, [("tier".to_string(), Value::Int(2))].into_iter().collect::<Metadata>()),
204 /// (2, [("tier".to_string(), Value::Int(2))].into_iter().collect::<Metadata>()),
205 /// (3, [("tier".to_string(), Value::Int(2))].into_iter().collect::<Metadata>()),
206 /// ];
207 /// let index = MetadataIndex::build(&["tier"], rows.iter().map(|(k, m)| (*k, Some(m))));
208 ///
209 /// let evaluator = FilterEvaluator::new(Filter::eq("tier", Value::Int(1)))?;
210 /// let selector = StrategySelector::new();
211 /// assert_eq!(selector.choose_with_index(&evaluator, &index), FilterStrategy::PreFilter);
212 /// # Ok(())
213 /// # }
214 /// ```
215 #[must_use]
216 pub fn choose_with_index<K>(
217 &self,
218 evaluator: &crate::FilterEvaluator,
219 index: &crate::MetadataIndex<K>,
220 ) -> FilterStrategy
221 where
222 K: Clone + Eq + core::hash::Hash,
223 {
224 self.resolve(index.estimate_selectivity(evaluator))
225 }
226
227 fn resolve(&self, selectivity: f64) -> FilterStrategy {
228 if selectivity <= self.prefilter_threshold {
229 FilterStrategy::PreFilter
230 } else {
231 FilterStrategy::PostFilter
232 }
233 }
234}
235
236/// Picks a concrete [`FilterStrategy`] for a validated filter using the
237/// [`DEFAULT_PREFILTER_THRESHOLD`] — the Tier-1 shortcut for
238/// [`StrategySelector::new().choose(..)`](StrategySelector::choose).
239///
240/// Returns [`FilterStrategy::PreFilter`] for narrow predicates and
241/// [`FilterStrategy::PostFilter`] for broad ones; never `Auto` or `InFilter`.
242///
243/// # Examples
244///
245/// ```
246/// use iqdb_filter::{FilterEvaluator, FilterStrategy, choose_strategy};
247/// use iqdb_types::{Filter, Value};
248///
249/// # fn main() -> iqdb_types::Result<()> {
250/// let evaluator = FilterEvaluator::new(Filter::eq("k", Value::Int(1)))?;
251/// assert_eq!(choose_strategy(&evaluator), FilterStrategy::PreFilter);
252/// # Ok(())
253/// # }
254/// ```
255#[must_use]
256pub fn choose_strategy(evaluator: &crate::FilterEvaluator) -> FilterStrategy {
257 StrategySelector::new().choose(evaluator)
258}
259
260#[cfg(test)]
261mod tests {
262 #![allow(clippy::unwrap_used)]
263
264 use super::*;
265 use crate::FilterEvaluator;
266 use iqdb_types::{Filter, Value};
267
268 #[test]
269 fn variants_compare_by_value() {
270 assert_eq!(FilterStrategy::PreFilter, FilterStrategy::PreFilter);
271 assert_ne!(FilterStrategy::PreFilter, FilterStrategy::Auto);
272 }
273
274 #[test]
275 fn variants_are_copy() {
276 let a = FilterStrategy::InFilter;
277 let b = a;
278 assert_eq!(a, b);
279 }
280
281 #[test]
282 fn default_threshold_is_the_constant() {
283 assert_eq!(
284 StrategySelector::new().prefilter_threshold(),
285 DEFAULT_PREFILTER_THRESHOLD
286 );
287 }
288
289 #[test]
290 fn threshold_is_clamped() {
291 assert_eq!(
292 StrategySelector::new()
293 .with_prefilter_threshold(-1.0)
294 .prefilter_threshold(),
295 0.0
296 );
297 assert_eq!(
298 StrategySelector::new()
299 .with_prefilter_threshold(9.0)
300 .prefilter_threshold(),
301 1.0
302 );
303 }
304
305 #[test]
306 fn narrow_prefilters_broad_postfilters() {
307 let narrow = FilterEvaluator::new(Filter::eq("k", Value::Int(1))).unwrap();
308 let broad = FilterEvaluator::new(Filter::neq("k", Value::Int(1))).unwrap();
309 assert_eq!(choose_strategy(&narrow), FilterStrategy::PreFilter);
310 assert_eq!(choose_strategy(&broad), FilterStrategy::PostFilter);
311 }
312
313 #[test]
314 fn threshold_extremes_force_one_strategy() {
315 let broad = FilterEvaluator::new(Filter::neq("k", Value::Int(1))).unwrap();
316 let always_pre = StrategySelector::new().with_prefilter_threshold(1.0);
317 let always_post = StrategySelector::new().with_prefilter_threshold(0.0);
318 assert_eq!(always_pre.choose(&broad), FilterStrategy::PreFilter);
319 assert_eq!(always_post.choose(&broad), FilterStrategy::PostFilter);
320 }
321}