Skip to main content

iqdb_filter/
evaluator.rs

1//! [`FilterEvaluator`] — the public, validated face of the evaluator.
2//!
3//! Construction is the validation gate. After [`FilterEvaluator::new`]
4//! returns `Ok`, the filter is known to satisfy the depth and `In`-cardinality
5//! caps documented on [`MAX_FILTER_DEPTH`] and [`MAX_IN_VALUES`], and the
6//! recursive [`FilterEvaluator::evaluate`] hot path can run without checking
7//! either bound again.
8
9use iqdb_types::{Filter, IqdbError, Metadata, Result};
10
11use crate::eval;
12
13/// Maximum allowed nesting depth of a [`Filter`] passed to
14/// [`FilterEvaluator::new`].
15///
16/// Each `And`/`Or`/`Not` node adds one level of nesting; leaf comparisons
17/// (`Eq`, `Neq`, `Lt`, `Lte`, `Gt`, `Gte`, `In`) do not. A depth of `64`
18/// already exceeds anything a hand-written or generated query is expected to
19/// produce, while sitting well below the recursion limit of every supported
20/// target's default thread stack. Filters that exceed the cap are rejected
21/// with [`IqdbError::InvalidFilter`] so [`FilterEvaluator::evaluate`] cannot
22/// stack-overflow on adversarial input.
23///
24/// Exposed as `pub const` so higher-level validation layers (request parsers,
25/// query builders) can quote the same number in their own error messages.
26///
27/// # Examples
28///
29/// ```
30/// assert!(iqdb_filter::MAX_FILTER_DEPTH >= 32);
31/// ```
32pub const MAX_FILTER_DEPTH: usize = 64;
33
34/// Maximum allowed number of values in a single [`Filter::In`] node.
35///
36/// `Filter::In` is `O(|values|)` per candidate per query; without a cap, an
37/// attacker-supplied predicate of a million values turns every search into a
38/// denial-of-service. `1024` covers realistic "tag in this set" queries while
39/// keeping per-row evaluation cheap. Filters exceeding the cap are rejected
40/// with [`IqdbError::InvalidFilter`].
41///
42/// Exposed as `pub const` so higher layers can pre-check before reaching the
43/// evaluator.
44///
45/// # Examples
46///
47/// ```
48/// assert!(iqdb_filter::MAX_IN_VALUES >= 256);
49/// ```
50pub const MAX_IN_VALUES: usize = 1024;
51
52/// A validated [`Filter`] paired with the canonical evaluator.
53///
54/// Build one with [`FilterEvaluator::new`]; the filter is walked once at
55/// construction to enforce [`MAX_FILTER_DEPTH`] and [`MAX_IN_VALUES`]. After
56/// that, [`FilterEvaluator::evaluate`] is infallible and may be called per
57/// row inside a search loop without revalidation.
58///
59/// # Examples
60///
61/// ```
62/// use iqdb_filter::FilterEvaluator;
63/// use iqdb_types::{Filter, Metadata, Value};
64///
65/// # fn main() -> iqdb_types::Result<()> {
66/// let evaluator = FilterEvaluator::new(Filter::eq("year", Value::Int(2026)))?;
67///
68/// let meta: Metadata =
69///     [("year".to_string(), Value::Int(2026))].into_iter().collect();
70///
71/// assert!(evaluator.evaluate(Some(&meta)));
72/// assert!(!evaluator.evaluate(None));
73/// # Ok(())
74/// # }
75/// ```
76#[derive(Debug, Clone)]
77pub struct FilterEvaluator {
78    filter: Filter,
79}
80
81impl FilterEvaluator {
82    /// Validates `filter` and wraps it for evaluation.
83    ///
84    /// The validation walk is iterative — an explicit work-list, not
85    /// recursion — so `new` itself cannot stack-overflow on a pathological
86    /// input. It returns [`IqdbError::InvalidFilter`] when:
87    ///
88    /// - the filter's nested-boolean depth exceeds [`MAX_FILTER_DEPTH`]; or
89    /// - any [`Filter::In`] node carries more than [`MAX_IN_VALUES`] values.
90    ///
91    /// # Errors
92    ///
93    /// Returns [`IqdbError::InvalidFilter`] for filters that violate either
94    /// cap. The variant carries no extra context — callers that need to
95    /// distinguish "too deep" from "`In` too wide" can re-walk the filter
96    /// themselves or pre-validate against the public consts.
97    ///
98    /// # Examples
99    ///
100    /// ```
101    /// use iqdb_filter::FilterEvaluator;
102    /// use iqdb_types::{Filter, IqdbError, Value};
103    ///
104    /// // Accepted: a small, well-formed filter.
105    /// let ok = FilterEvaluator::new(Filter::eq("k", Value::Int(1)));
106    /// assert!(ok.is_ok());
107    ///
108    /// // Rejected: an oversized `In`.
109    /// let huge = vec![Value::Int(0); iqdb_filter::MAX_IN_VALUES + 1];
110    /// let err = FilterEvaluator::new(Filter::is_in("tag", huge)).unwrap_err();
111    /// assert_eq!(err, IqdbError::InvalidFilter);
112    /// ```
113    pub fn new(filter: Filter) -> Result<Self> {
114        validate(&filter)?;
115        Ok(Self { filter })
116    }
117
118    /// Evaluates the validated filter against `metadata`.
119    ///
120    /// `None` means the record has no metadata at all — distinct from an
121    /// empty `Metadata` only at the API boundary; semantically every leaf
122    /// over `None` and every leaf over an empty `Metadata` evaluates to
123    /// `false` (and `Not` over either evaluates to `true`).
124    ///
125    /// This call is infallible: validation happened in
126    /// [`FilterEvaluator::new`], so the recursive descent is bounded by
127    /// [`MAX_FILTER_DEPTH`] and cannot stack-overflow.
128    ///
129    /// # Examples
130    ///
131    /// ```
132    /// use iqdb_filter::FilterEvaluator;
133    /// use iqdb_types::{Filter, Metadata, Value};
134    ///
135    /// # fn main() -> iqdb_types::Result<()> {
136    /// let evaluator =
137    ///     FilterEvaluator::new(Filter::not(Filter::eq("author", Value::String("ada".into()))))?;
138    ///
139    /// // No metadata → leaf is false → Not flips it to true. This is the
140    /// // documented "records without this field" idiom.
141    /// assert!(evaluator.evaluate(None));
142    ///
143    /// let meta: Metadata = [(
144    ///     "author".to_string(),
145    ///     Value::String("ada".into()),
146    /// )]
147    /// .into_iter()
148    /// .collect();
149    /// assert!(!evaluator.evaluate(Some(&meta)));
150    /// # Ok(())
151    /// # }
152    /// ```
153    #[must_use]
154    pub fn evaluate(&self, metadata: Option<&Metadata>) -> bool {
155        eval::eval(&self.filter, metadata)
156    }
157
158    /// Pre-filter a stream of candidates: yield the key of each candidate whose
159    /// metadata matches, **before** any distance is computed.
160    ///
161    /// This is the [`FilterStrategy::PreFilter`](crate::FilterStrategy::PreFilter)
162    /// shape — reduce the candidate set first, then score only the survivors.
163    /// It is the pattern an exact index uses to skip the distance computation
164    /// for rows the predicate already rejects.
165    ///
166    /// The adapter is lazy and allocation-free: it borrows each candidate's
167    /// metadata and forwards the key untouched. `key` is whatever a caller uses
168    /// to identify a row — a storage index, a [`iqdb_types::VectorId`], a tuple.
169    ///
170    /// # Examples
171    ///
172    /// ```
173    /// use iqdb_filter::FilterEvaluator;
174    /// use iqdb_types::{Filter, Metadata, Value};
175    ///
176    /// # fn main() -> iqdb_types::Result<()> {
177    /// let evaluator = FilterEvaluator::new(Filter::gt("year", Value::Int(2000)))?;
178    ///
179    /// let m2026: Metadata = [("year".to_string(), Value::Int(2026))].into_iter().collect();
180    /// let m1999: Metadata = [("year".to_string(), Value::Int(1999))].into_iter().collect();
181    /// let rows = [(0_usize, Some(&m2026)), (1, Some(&m1999)), (2, None)];
182    ///
183    /// let kept: Vec<usize> = evaluator.prefilter(rows).collect();
184    /// assert_eq!(kept, [0]); // only the 2026 row survives
185    /// # Ok(())
186    /// # }
187    /// ```
188    pub fn prefilter<'a, K, I>(&'a self, candidates: I) -> impl Iterator<Item = K> + 'a
189    where
190        I: IntoIterator<Item = (K, Option<&'a Metadata>)>,
191        I::IntoIter: 'a,
192        K: 'a,
193    {
194        candidates
195            .into_iter()
196            .filter_map(move |(key, metadata)| self.evaluate(metadata).then_some(key))
197    }
198
199    /// Post-filter a stream of already-scored results: yield each hit whose
200    /// metadata matches, **after** the distance scan has ranked candidates.
201    ///
202    /// This is the [`FilterStrategy::PostFilter`](crate::FilterStrategy::PostFilter)
203    /// shape — score everything, then drop the hits the predicate rejects. It
204    /// shares the per-row test with [`prefilter`](Self::prefilter); the
205    /// difference is purely where in the pipeline it runs. Because it is lazy,
206    /// a caller refilling a top-`k` result set can chain `.take(k)` and stop as
207    /// soon as `k` survivors are found.
208    ///
209    /// # Examples
210    ///
211    /// ```
212    /// use iqdb_filter::FilterEvaluator;
213    /// use iqdb_types::{Filter, Metadata, Value};
214    ///
215    /// # fn main() -> iqdb_types::Result<()> {
216    /// let evaluator = FilterEvaluator::new(Filter::eq("lang", Value::String("rust".into())))?;
217    ///
218    /// let rust: Metadata = [("lang".to_string(), Value::String("rust".into()))]
219    ///     .into_iter()
220    ///     .collect();
221    /// let go: Metadata = [("lang".to_string(), Value::String("go".into()))]
222    ///     .into_iter()
223    ///     .collect();
224    ///
225    /// // Hits arrive sorted by distance; keep the first matching one.
226    /// let scored = [("hit-a", Some(&go)), ("hit-b", Some(&rust))];
227    /// let best: Vec<&str> = evaluator.postfilter(scored).take(1).collect();
228    /// assert_eq!(best, ["hit-b"]);
229    /// # Ok(())
230    /// # }
231    /// ```
232    ///
233    /// [`prefilter`]: Self::prefilter
234    pub fn postfilter<'a, H, I>(&'a self, scored: I) -> impl Iterator<Item = H> + 'a
235    where
236        I: IntoIterator<Item = (H, Option<&'a Metadata>)>,
237        I::IntoIter: 'a,
238        H: 'a,
239    {
240        scored
241            .into_iter()
242            .filter_map(move |(hit, metadata)| self.evaluate(metadata).then_some(hit))
243    }
244
245    /// Borrows the inner validated filter.
246    ///
247    /// Useful for adapters that want to introspect the predicate (for
248    /// logging, pushdown, or statistics) without rebuilding it.
249    ///
250    /// # Examples
251    ///
252    /// ```
253    /// use iqdb_filter::FilterEvaluator;
254    /// use iqdb_types::{Filter, Value};
255    ///
256    /// # fn main() -> iqdb_types::Result<()> {
257    /// let evaluator = FilterEvaluator::new(Filter::eq("k", Value::Int(1)))?;
258    /// assert!(matches!(evaluator.filter(), Filter::Eq { .. }));
259    /// # Ok(())
260    /// # }
261    /// ```
262    #[must_use]
263    pub fn filter(&self) -> &Filter {
264        &self.filter
265    }
266}
267
268// One work-list entry: a borrowed sub-filter plus the depth of its parent.
269// The node itself counts toward depth only if it is an And/Or/Not (handled
270// inside `validate`).
271struct Frame<'a> {
272    node: &'a Filter,
273    parent_depth: usize,
274}
275
276fn validate(root: &Filter) -> Result<()> {
277    let mut stack: Vec<Frame<'_>> = Vec::new();
278    stack.push(Frame {
279        node: root,
280        parent_depth: 0,
281    });
282
283    while let Some(Frame { node, parent_depth }) = stack.pop() {
284        match node {
285            Filter::In { values, .. } => {
286                if values.len() > MAX_IN_VALUES {
287                    return Err(IqdbError::InvalidFilter);
288                }
289            }
290            Filter::Eq { .. }
291            | Filter::Neq { .. }
292            | Filter::Lt { .. }
293            | Filter::Lte { .. }
294            | Filter::Gt { .. }
295            | Filter::Gte { .. } => {}
296            Filter::And(children) | Filter::Or(children) => {
297                let depth = parent_depth.saturating_add(1);
298                if depth > MAX_FILTER_DEPTH {
299                    return Err(IqdbError::InvalidFilter);
300                }
301                for child in children {
302                    stack.push(Frame {
303                        node: child,
304                        parent_depth: depth,
305                    });
306                }
307            }
308            Filter::Not(inner) => {
309                let depth = parent_depth.saturating_add(1);
310                if depth > MAX_FILTER_DEPTH {
311                    return Err(IqdbError::InvalidFilter);
312                }
313                stack.push(Frame {
314                    node: inner,
315                    parent_depth: depth,
316                });
317            }
318        }
319    }
320    Ok(())
321}
322
323#[cfg(test)]
324mod tests {
325    #![allow(clippy::unwrap_used)]
326
327    use super::*;
328    use iqdb_types::Value;
329
330    fn nested_not(depth: usize) -> Filter {
331        let mut current = Filter::eq("k", Value::Int(0));
332        for _ in 0..depth {
333            current = Filter::not(current);
334        }
335        current
336    }
337
338    #[test]
339    fn new_accepts_a_leaf() {
340        let f = Filter::eq("k", Value::Int(1));
341        assert!(FilterEvaluator::new(f).is_ok());
342    }
343
344    #[test]
345    fn new_accepts_at_max_depth() {
346        // `MAX_FILTER_DEPTH` Not nodes wrapping a leaf — depth equals the cap.
347        let f = nested_not(MAX_FILTER_DEPTH);
348        assert!(FilterEvaluator::new(f).is_ok());
349    }
350
351    #[test]
352    fn new_rejects_just_over_max_depth() {
353        let f = nested_not(MAX_FILTER_DEPTH + 1);
354        let err = FilterEvaluator::new(f).unwrap_err();
355        assert_eq!(err, IqdbError::InvalidFilter);
356    }
357
358    #[test]
359    fn new_accepts_in_at_cap() {
360        let f = Filter::is_in("tag", vec![Value::Int(0); MAX_IN_VALUES]);
361        assert!(FilterEvaluator::new(f).is_ok());
362    }
363
364    #[test]
365    fn new_rejects_in_just_over_cap() {
366        let f = Filter::is_in("tag", vec![Value::Int(0); MAX_IN_VALUES + 1]);
367        let err = FilterEvaluator::new(f).unwrap_err();
368        assert_eq!(err, IqdbError::InvalidFilter);
369    }
370
371    #[test]
372    fn evaluate_matches_validated_filter() {
373        let evaluator = FilterEvaluator::new(Filter::eq("k", Value::Int(1))).unwrap();
374        let meta: Metadata = [("k".to_string(), Value::Int(1))].into_iter().collect();
375        assert!(evaluator.evaluate(Some(&meta)));
376        assert!(!evaluator.evaluate(None));
377    }
378
379    #[test]
380    fn evaluator_clone_preserves_filter() {
381        let evaluator = FilterEvaluator::new(Filter::eq("k", Value::Int(1))).unwrap();
382        let copy = evaluator.clone();
383        let meta: Metadata = [("k".to_string(), Value::Int(1))].into_iter().collect();
384        assert_eq!(evaluator.evaluate(Some(&meta)), copy.evaluate(Some(&meta)));
385    }
386
387    fn meta(field: &str, value: Value) -> Metadata {
388        [(field.to_string(), value)].into_iter().collect()
389    }
390
391    #[test]
392    fn prefilter_keeps_only_matching_keys() {
393        let evaluator = FilterEvaluator::new(Filter::gt("year", Value::Int(2000))).unwrap();
394        let m2026 = meta("year", Value::Int(2026));
395        let m1999 = meta("year", Value::Int(1999));
396        let rows = [(10_usize, Some(&m2026)), (20, Some(&m1999)), (30, None)];
397
398        let kept: Vec<usize> = evaluator.prefilter(rows).collect();
399        assert_eq!(kept, [10]);
400    }
401
402    #[test]
403    fn postfilter_keeps_only_matching_hits_and_is_lazy() {
404        let evaluator =
405            FilterEvaluator::new(Filter::eq("lang", Value::String("rust".into()))).unwrap();
406        let rust = meta("lang", Value::String("rust".into()));
407        let go = meta("lang", Value::String("go".into()));
408        let scored = [("a", Some(&go)), ("b", Some(&rust)), ("c", Some(&rust))];
409
410        let first: Vec<&str> = evaluator.postfilter(scored).take(1).collect();
411        assert_eq!(first, ["b"]);
412    }
413
414    #[test]
415    fn prefilter_empty_input_yields_nothing() {
416        let evaluator = FilterEvaluator::new(Filter::eq("k", Value::Int(1))).unwrap();
417        let rows: [(usize, Option<&Metadata>); 0] = [];
418        assert_eq!(evaluator.prefilter(rows).count(), 0);
419    }
420}