iqdb_filter/evaluator.rs
1//! [`FilterEvaluator`] — the public, validated face of the evaluator.
2//!
3//! Construction is the validation gate. After [`FilterEvaluator::new`]
4//! returns `Ok`, the filter is known to satisfy the depth and `In`-cardinality
5//! caps documented on [`MAX_FILTER_DEPTH`] and [`MAX_IN_VALUES`], and the
6//! recursive [`FilterEvaluator::evaluate`] hot path can run without checking
7//! either bound again.
8
9use iqdb_types::{Filter, IqdbError, Metadata, Result};
10
11use crate::eval;
12
13/// Maximum allowed nesting depth of a [`Filter`] passed to
14/// [`FilterEvaluator::new`].
15///
16/// Each `And`/`Or`/`Not` node adds one level of nesting; leaf comparisons
17/// (`Eq`, `Neq`, `Lt`, `Lte`, `Gt`, `Gte`, `In`) do not. A depth of `64`
18/// already exceeds anything a hand-written or generated query is expected to
19/// produce, while sitting well below the recursion limit of every supported
20/// target's default thread stack. Filters that exceed the cap are rejected
21/// with [`IqdbError::InvalidFilter`] so [`FilterEvaluator::evaluate`] cannot
22/// stack-overflow on adversarial input.
23///
24/// Exposed as `pub const` so higher-level validation layers (request parsers,
25/// query builders) can quote the same number in their own error messages.
26///
27/// # Examples
28///
29/// ```
30/// assert!(iqdb_filter::MAX_FILTER_DEPTH >= 32);
31/// ```
32pub const MAX_FILTER_DEPTH: usize = 64;
33
34/// Maximum allowed number of values in a single [`Filter::In`] node.
35///
36/// `Filter::In` is `O(|values|)` per candidate per query; without a cap, an
37/// attacker-supplied predicate of a million values turns every search into a
38/// denial-of-service. `1024` covers realistic "tag in this set" queries while
39/// keeping per-row evaluation cheap. Filters exceeding the cap are rejected
40/// with [`IqdbError::InvalidFilter`].
41///
42/// Exposed as `pub const` so higher layers can pre-check before reaching the
43/// evaluator.
44///
45/// # Examples
46///
47/// ```
48/// assert!(iqdb_filter::MAX_IN_VALUES >= 256);
49/// ```
50pub const MAX_IN_VALUES: usize = 1024;
51
52/// A validated [`Filter`] paired with the canonical evaluator.
53///
54/// Build one with [`FilterEvaluator::new`]; the filter is walked once at
55/// construction to enforce [`MAX_FILTER_DEPTH`] and [`MAX_IN_VALUES`]. After
56/// that, [`FilterEvaluator::evaluate`] is infallible and may be called per
57/// row inside a search loop without revalidation.
58///
59/// # Examples
60///
61/// ```
62/// use iqdb_filter::FilterEvaluator;
63/// use iqdb_types::{Filter, Metadata, Value};
64///
65/// # fn main() -> iqdb_types::Result<()> {
66/// let evaluator = FilterEvaluator::new(Filter::eq("year", Value::Int(2026)))?;
67///
68/// let meta: Metadata =
69/// [("year".to_string(), Value::Int(2026))].into_iter().collect();
70///
71/// assert!(evaluator.evaluate(Some(&meta)));
72/// assert!(!evaluator.evaluate(None));
73/// # Ok(())
74/// # }
75/// ```
76#[derive(Debug, Clone)]
77pub struct FilterEvaluator {
78 filter: Filter,
79}
80
81impl FilterEvaluator {
82 /// Validates `filter` and wraps it for evaluation.
83 ///
84 /// The validation walk is iterative — an explicit work-list, not
85 /// recursion — so `new` itself cannot stack-overflow on a pathological
86 /// input. It returns [`IqdbError::InvalidFilter`] when:
87 ///
88 /// - the filter's nested-boolean depth exceeds [`MAX_FILTER_DEPTH`]; or
89 /// - any [`Filter::In`] node carries more than [`MAX_IN_VALUES`] values.
90 ///
91 /// # Errors
92 ///
93 /// Returns [`IqdbError::InvalidFilter`] for filters that violate either
94 /// cap. The variant carries no extra context — callers that need to
95 /// distinguish "too deep" from "`In` too wide" can re-walk the filter
96 /// themselves or pre-validate against the public consts.
97 ///
98 /// # Examples
99 ///
100 /// ```
101 /// use iqdb_filter::FilterEvaluator;
102 /// use iqdb_types::{Filter, IqdbError, Value};
103 ///
104 /// // Accepted: a small, well-formed filter.
105 /// let ok = FilterEvaluator::new(Filter::eq("k", Value::Int(1)));
106 /// assert!(ok.is_ok());
107 ///
108 /// // Rejected: an oversized `In`.
109 /// let huge = vec![Value::Int(0); iqdb_filter::MAX_IN_VALUES + 1];
110 /// let err = FilterEvaluator::new(Filter::is_in("tag", huge)).unwrap_err();
111 /// assert_eq!(err, IqdbError::InvalidFilter);
112 /// ```
113 pub fn new(filter: Filter) -> Result<Self> {
114 validate(&filter)?;
115 Ok(Self { filter })
116 }
117
118 /// Evaluates the validated filter against `metadata`.
119 ///
120 /// `None` means the record has no metadata at all — distinct from an
121 /// empty `Metadata` only at the API boundary; semantically every leaf
122 /// over `None` and every leaf over an empty `Metadata` evaluates to
123 /// `false` (and `Not` over either evaluates to `true`).
124 ///
125 /// This call is infallible: validation happened in
126 /// [`FilterEvaluator::new`], so the recursive descent is bounded by
127 /// [`MAX_FILTER_DEPTH`] and cannot stack-overflow.
128 ///
129 /// # Examples
130 ///
131 /// ```
132 /// use iqdb_filter::FilterEvaluator;
133 /// use iqdb_types::{Filter, Metadata, Value};
134 ///
135 /// # fn main() -> iqdb_types::Result<()> {
136 /// let evaluator =
137 /// FilterEvaluator::new(Filter::not(Filter::eq("author", Value::String("ada".into()))))?;
138 ///
139 /// // No metadata → leaf is false → Not flips it to true. This is the
140 /// // documented "records without this field" idiom.
141 /// assert!(evaluator.evaluate(None));
142 ///
143 /// let meta: Metadata = [(
144 /// "author".to_string(),
145 /// Value::String("ada".into()),
146 /// )]
147 /// .into_iter()
148 /// .collect();
149 /// assert!(!evaluator.evaluate(Some(&meta)));
150 /// # Ok(())
151 /// # }
152 /// ```
153 #[must_use]
154 pub fn evaluate(&self, metadata: Option<&Metadata>) -> bool {
155 eval::eval(&self.filter, metadata)
156 }
157
158 /// Pre-filter a stream of candidates: yield the key of each candidate whose
159 /// metadata matches, **before** any distance is computed.
160 ///
161 /// This is the [`FilterStrategy::PreFilter`](crate::FilterStrategy::PreFilter)
162 /// shape — reduce the candidate set first, then score only the survivors.
163 /// It is the pattern an exact index uses to skip the distance computation
164 /// for rows the predicate already rejects.
165 ///
166 /// The adapter is lazy and allocation-free: it borrows each candidate's
167 /// metadata and forwards the key untouched. `key` is whatever a caller uses
168 /// to identify a row — a storage index, a [`iqdb_types::VectorId`], a tuple.
169 ///
170 /// # Examples
171 ///
172 /// ```
173 /// use iqdb_filter::FilterEvaluator;
174 /// use iqdb_types::{Filter, Metadata, Value};
175 ///
176 /// # fn main() -> iqdb_types::Result<()> {
177 /// let evaluator = FilterEvaluator::new(Filter::gt("year", Value::Int(2000)))?;
178 ///
179 /// let m2026: Metadata = [("year".to_string(), Value::Int(2026))].into_iter().collect();
180 /// let m1999: Metadata = [("year".to_string(), Value::Int(1999))].into_iter().collect();
181 /// let rows = [(0_usize, Some(&m2026)), (1, Some(&m1999)), (2, None)];
182 ///
183 /// let kept: Vec<usize> = evaluator.prefilter(rows).collect();
184 /// assert_eq!(kept, [0]); // only the 2026 row survives
185 /// # Ok(())
186 /// # }
187 /// ```
188 pub fn prefilter<'a, K, I>(&'a self, candidates: I) -> impl Iterator<Item = K> + 'a
189 where
190 I: IntoIterator<Item = (K, Option<&'a Metadata>)>,
191 I::IntoIter: 'a,
192 K: 'a,
193 {
194 candidates
195 .into_iter()
196 .filter_map(move |(key, metadata)| self.evaluate(metadata).then_some(key))
197 }
198
199 /// Post-filter a stream of already-scored results: yield each hit whose
200 /// metadata matches, **after** the distance scan has ranked candidates.
201 ///
202 /// This is the [`FilterStrategy::PostFilter`](crate::FilterStrategy::PostFilter)
203 /// shape — score everything, then drop the hits the predicate rejects. It
204 /// shares the per-row test with [`prefilter`](Self::prefilter); the
205 /// difference is purely where in the pipeline it runs. Because it is lazy,
206 /// a caller refilling a top-`k` result set can chain `.take(k)` and stop as
207 /// soon as `k` survivors are found.
208 ///
209 /// # Examples
210 ///
211 /// ```
212 /// use iqdb_filter::FilterEvaluator;
213 /// use iqdb_types::{Filter, Metadata, Value};
214 ///
215 /// # fn main() -> iqdb_types::Result<()> {
216 /// let evaluator = FilterEvaluator::new(Filter::eq("lang", Value::String("rust".into())))?;
217 ///
218 /// let rust: Metadata = [("lang".to_string(), Value::String("rust".into()))]
219 /// .into_iter()
220 /// .collect();
221 /// let go: Metadata = [("lang".to_string(), Value::String("go".into()))]
222 /// .into_iter()
223 /// .collect();
224 ///
225 /// // Hits arrive sorted by distance; keep the first matching one.
226 /// let scored = [("hit-a", Some(&go)), ("hit-b", Some(&rust))];
227 /// let best: Vec<&str> = evaluator.postfilter(scored).take(1).collect();
228 /// assert_eq!(best, ["hit-b"]);
229 /// # Ok(())
230 /// # }
231 /// ```
232 ///
233 /// [`prefilter`]: Self::prefilter
234 pub fn postfilter<'a, H, I>(&'a self, scored: I) -> impl Iterator<Item = H> + 'a
235 where
236 I: IntoIterator<Item = (H, Option<&'a Metadata>)>,
237 I::IntoIter: 'a,
238 H: 'a,
239 {
240 scored
241 .into_iter()
242 .filter_map(move |(hit, metadata)| self.evaluate(metadata).then_some(hit))
243 }
244
245 /// Borrows the inner validated filter.
246 ///
247 /// Useful for adapters that want to introspect the predicate (for
248 /// logging, pushdown, or statistics) without rebuilding it.
249 ///
250 /// # Examples
251 ///
252 /// ```
253 /// use iqdb_filter::FilterEvaluator;
254 /// use iqdb_types::{Filter, Value};
255 ///
256 /// # fn main() -> iqdb_types::Result<()> {
257 /// let evaluator = FilterEvaluator::new(Filter::eq("k", Value::Int(1)))?;
258 /// assert!(matches!(evaluator.filter(), Filter::Eq { .. }));
259 /// # Ok(())
260 /// # }
261 /// ```
262 #[must_use]
263 pub fn filter(&self) -> &Filter {
264 &self.filter
265 }
266}
267
268// One work-list entry: a borrowed sub-filter plus the depth of its parent.
269// The node itself counts toward depth only if it is an And/Or/Not (handled
270// inside `validate`).
271struct Frame<'a> {
272 node: &'a Filter,
273 parent_depth: usize,
274}
275
276fn validate(root: &Filter) -> Result<()> {
277 let mut stack: Vec<Frame<'_>> = Vec::new();
278 stack.push(Frame {
279 node: root,
280 parent_depth: 0,
281 });
282
283 while let Some(Frame { node, parent_depth }) = stack.pop() {
284 match node {
285 Filter::In { values, .. } => {
286 if values.len() > MAX_IN_VALUES {
287 return Err(IqdbError::InvalidFilter);
288 }
289 }
290 Filter::Eq { .. }
291 | Filter::Neq { .. }
292 | Filter::Lt { .. }
293 | Filter::Lte { .. }
294 | Filter::Gt { .. }
295 | Filter::Gte { .. } => {}
296 Filter::And(children) | Filter::Or(children) => {
297 let depth = parent_depth.saturating_add(1);
298 if depth > MAX_FILTER_DEPTH {
299 return Err(IqdbError::InvalidFilter);
300 }
301 for child in children {
302 stack.push(Frame {
303 node: child,
304 parent_depth: depth,
305 });
306 }
307 }
308 Filter::Not(inner) => {
309 let depth = parent_depth.saturating_add(1);
310 if depth > MAX_FILTER_DEPTH {
311 return Err(IqdbError::InvalidFilter);
312 }
313 stack.push(Frame {
314 node: inner,
315 parent_depth: depth,
316 });
317 }
318 }
319 }
320 Ok(())
321}
322
323#[cfg(test)]
324mod tests {
325 #![allow(clippy::unwrap_used)]
326
327 use super::*;
328 use iqdb_types::Value;
329
330 fn nested_not(depth: usize) -> Filter {
331 let mut current = Filter::eq("k", Value::Int(0));
332 for _ in 0..depth {
333 current = Filter::not(current);
334 }
335 current
336 }
337
338 #[test]
339 fn new_accepts_a_leaf() {
340 let f = Filter::eq("k", Value::Int(1));
341 assert!(FilterEvaluator::new(f).is_ok());
342 }
343
344 #[test]
345 fn new_accepts_at_max_depth() {
346 // `MAX_FILTER_DEPTH` Not nodes wrapping a leaf — depth equals the cap.
347 let f = nested_not(MAX_FILTER_DEPTH);
348 assert!(FilterEvaluator::new(f).is_ok());
349 }
350
351 #[test]
352 fn new_rejects_just_over_max_depth() {
353 let f = nested_not(MAX_FILTER_DEPTH + 1);
354 let err = FilterEvaluator::new(f).unwrap_err();
355 assert_eq!(err, IqdbError::InvalidFilter);
356 }
357
358 #[test]
359 fn new_accepts_in_at_cap() {
360 let f = Filter::is_in("tag", vec![Value::Int(0); MAX_IN_VALUES]);
361 assert!(FilterEvaluator::new(f).is_ok());
362 }
363
364 #[test]
365 fn new_rejects_in_just_over_cap() {
366 let f = Filter::is_in("tag", vec![Value::Int(0); MAX_IN_VALUES + 1]);
367 let err = FilterEvaluator::new(f).unwrap_err();
368 assert_eq!(err, IqdbError::InvalidFilter);
369 }
370
371 #[test]
372 fn evaluate_matches_validated_filter() {
373 let evaluator = FilterEvaluator::new(Filter::eq("k", Value::Int(1))).unwrap();
374 let meta: Metadata = [("k".to_string(), Value::Int(1))].into_iter().collect();
375 assert!(evaluator.evaluate(Some(&meta)));
376 assert!(!evaluator.evaluate(None));
377 }
378
379 #[test]
380 fn evaluator_clone_preserves_filter() {
381 let evaluator = FilterEvaluator::new(Filter::eq("k", Value::Int(1))).unwrap();
382 let copy = evaluator.clone();
383 let meta: Metadata = [("k".to_string(), Value::Int(1))].into_iter().collect();
384 assert_eq!(evaluator.evaluate(Some(&meta)), copy.evaluate(Some(&meta)));
385 }
386
387 fn meta(field: &str, value: Value) -> Metadata {
388 [(field.to_string(), value)].into_iter().collect()
389 }
390
391 #[test]
392 fn prefilter_keeps_only_matching_keys() {
393 let evaluator = FilterEvaluator::new(Filter::gt("year", Value::Int(2000))).unwrap();
394 let m2026 = meta("year", Value::Int(2026));
395 let m1999 = meta("year", Value::Int(1999));
396 let rows = [(10_usize, Some(&m2026)), (20, Some(&m1999)), (30, None)];
397
398 let kept: Vec<usize> = evaluator.prefilter(rows).collect();
399 assert_eq!(kept, [10]);
400 }
401
402 #[test]
403 fn postfilter_keeps_only_matching_hits_and_is_lazy() {
404 let evaluator =
405 FilterEvaluator::new(Filter::eq("lang", Value::String("rust".into()))).unwrap();
406 let rust = meta("lang", Value::String("rust".into()));
407 let go = meta("lang", Value::String("go".into()));
408 let scored = [("a", Some(&go)), ("b", Some(&rust)), ("c", Some(&rust))];
409
410 let first: Vec<&str> = evaluator.postfilter(scored).take(1).collect();
411 assert_eq!(first, ["b"]);
412 }
413
414 #[test]
415 fn prefilter_empty_input_yields_nothing() {
416 let evaluator = FilterEvaluator::new(Filter::eq("k", Value::Int(1))).unwrap();
417 let rows: [(usize, Option<&Metadata>); 0] = [];
418 assert_eq!(evaluator.prefilter(rows).count(), 0);
419 }
420}