Skip to main content

quiver_query/
lib.rs

1// SPDX-License-Identifier: AGPL-3.0-only
2//! Query-side filtering: a typed predicate tree over a point's JSON payload.
3//!
4//! How a filter runs is chosen by the embeddable database's planner: when the
5//! filter is selective on secondary-indexed fields it pre-filters to an exact
6//! candidate scan, and otherwise it post-filters the vector-search candidates
7//! (see `quiver-embed` and `docs/index/design.md`). Either way the [`Filter`]
8//! tree is the stable wire shape and is re-checked on every surviving candidate,
9//! so results are exact.
10//!
11//! Field references are dot-paths into the payload object (`"user.age"`).
12
13use std::cmp::Ordering;
14
15use serde::{Deserialize, Serialize};
16use serde_json::Value;
17
18pub mod sparse;
19pub mod sparse_index;
20pub mod tokenize;
21pub use sparse::{DEFAULT_RRF_K0, SPARSE_KEY, SparseVector, rrf_fuse};
22pub use sparse_index::{BM25_B, BM25_K1, SparseInvertedIndex};
23pub use tokenize::{TEXT_KEY, query_term_ids, term_id, text_to_sparse, tokens};
24
25/// A predicate over a point's JSON payload.
26#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
27#[serde(rename_all = "snake_case")]
28pub enum Filter {
29    /// All sub-filters must match.
30    And(Vec<Filter>),
31    /// At least one sub-filter must match.
32    Or(Vec<Filter>),
33    /// The sub-filter must not match.
34    Not(Box<Filter>),
35    /// Field equals value (numbers compared numerically across int/float).
36    Eq {
37        /// Dot-path to the field.
38        field: String,
39        /// Value to compare against.
40        value: Value,
41    },
42    /// Field is absent or not equal to value.
43    Ne {
44        /// Dot-path to the field.
45        field: String,
46        /// Value to compare against.
47        value: Value,
48    },
49    /// Field equals one of the values.
50    In {
51        /// Dot-path to the field.
52        field: String,
53        /// Allowed values.
54        values: Vec<Value>,
55    },
56    /// Field is strictly less than value (numbers or strings).
57    Lt {
58        /// Dot-path to the field.
59        field: String,
60        /// Value to compare against.
61        value: Value,
62    },
63    /// Field is less than or equal to value.
64    Lte {
65        /// Dot-path to the field.
66        field: String,
67        /// Value to compare against.
68        value: Value,
69    },
70    /// Field is strictly greater than value.
71    Gt {
72        /// Dot-path to the field.
73        field: String,
74        /// Value to compare against.
75        value: Value,
76    },
77    /// Field is greater than or equal to value.
78    Gte {
79        /// Dot-path to the field.
80        field: String,
81        /// Value to compare against.
82        value: Value,
83    },
84    /// Field is present (any value, including null).
85    Exists {
86        /// Dot-path to the field.
87        field: String,
88    },
89}
90
91impl Filter {
92    /// Evaluate the predicate against a payload. A missing field makes
93    /// value-comparisons (`Eq`/`In`/`Lt`/…) false; `Ne` is true for a missing
94    /// field; `Exists` reports presence.
95    #[must_use]
96    pub fn matches(&self, payload: &Value) -> bool {
97        match self {
98            Filter::And(subs) => subs.iter().all(|f| f.matches(payload)),
99            Filter::Or(subs) => subs.iter().any(|f| f.matches(payload)),
100            Filter::Not(sub) => !sub.matches(payload),
101            Filter::Eq { field, value } => {
102                field_value(payload, field).is_some_and(|v| values_eq(v, value))
103            }
104            Filter::Ne { field, value } => {
105                !field_value(payload, field).is_some_and(|v| values_eq(v, value))
106            }
107            Filter::In { field, values } => field_value(payload, field)
108                .is_some_and(|v| values.iter().any(|candidate| values_eq(v, candidate))),
109            Filter::Lt { field, value } => compares(payload, field, value, |o| o == Ordering::Less),
110            Filter::Lte { field, value } => {
111                compares(payload, field, value, |o| o != Ordering::Greater)
112            }
113            Filter::Gt { field, value } => {
114                compares(payload, field, value, |o| o == Ordering::Greater)
115            }
116            Filter::Gte { field, value } => {
117                compares(payload, field, value, |o| o != Ordering::Less)
118            }
119            Filter::Exists { field } => field_value(payload, field).is_some(),
120        }
121    }
122}
123
124// Resolve a dot-path into a payload object.
125fn field_value<'a>(payload: &'a Value, field: &str) -> Option<&'a Value> {
126    let mut current = payload;
127    for part in field.split('.') {
128        current = current.get(part)?;
129    }
130    Some(current)
131}
132
133// Equality with numeric coercion: 1 and 1.0 compare equal.
134fn values_eq(a: &Value, b: &Value) -> bool {
135    match (a, b) {
136        (Value::Number(x), Value::Number(y)) => match (x.as_f64(), y.as_f64()) {
137            (Some(x), Some(y)) => x == y,
138            _ => false,
139        },
140        _ => a == b,
141    }
142}
143
144// Order two values when comparable (number/number or string/string).
145fn order(a: &Value, b: &Value) -> Option<Ordering> {
146    match (a, b) {
147        (Value::Number(x), Value::Number(y)) => x.as_f64()?.partial_cmp(&y.as_f64()?),
148        (Value::String(x), Value::String(y)) => Some(x.cmp(y)),
149        _ => None,
150    }
151}
152
153fn compares(payload: &Value, field: &str, value: &Value, pred: impl Fn(Ordering) -> bool) -> bool {
154    field_value(payload, field)
155        .and_then(|v| order(v, value))
156        .is_some_and(pred)
157}
158
159#[cfg(test)]
160mod tests {
161    use super::*;
162    use serde_json::json;
163
164    fn p() -> Value {
165        json!({"city": "paris", "age": 30, "score": 4.5, "tags": ["a", "b"], "user": {"vip": true}})
166    }
167
168    #[test]
169    fn eq_and_ne() {
170        assert!(
171            Filter::Eq {
172                field: "city".into(),
173                value: json!("paris")
174            }
175            .matches(&p())
176        );
177        assert!(
178            !Filter::Eq {
179                field: "city".into(),
180                value: json!("lyon")
181            }
182            .matches(&p())
183        );
184        // numeric coercion: 30 == 30.0
185        assert!(
186            Filter::Eq {
187                field: "age".into(),
188                value: json!(30.0)
189            }
190            .matches(&p())
191        );
192        assert!(
193            Filter::Ne {
194                field: "city".into(),
195                value: json!("lyon")
196            }
197            .matches(&p())
198        );
199        // Ne on a missing field is true.
200        assert!(
201            Filter::Ne {
202                field: "missing".into(),
203                value: json!(1)
204            }
205            .matches(&p())
206        );
207    }
208
209    #[test]
210    fn ranges_and_in_and_exists() {
211        assert!(
212            Filter::Gt {
213                field: "age".into(),
214                value: json!(18)
215            }
216            .matches(&p())
217        );
218        assert!(
219            Filter::Lte {
220                field: "score".into(),
221                value: json!(4.5)
222            }
223            .matches(&p())
224        );
225        assert!(
226            !Filter::Lt {
227                field: "age".into(),
228                value: json!(30)
229            }
230            .matches(&p())
231        );
232        assert!(
233            Filter::In {
234                field: "city".into(),
235                values: vec![json!("paris"), json!("lyon")]
236            }
237            .matches(&p())
238        );
239        assert!(
240            Filter::Exists {
241                field: "user.vip".into()
242            }
243            .matches(&p())
244        );
245        assert!(
246            !Filter::Exists {
247                field: "user.nope".into()
248            }
249            .matches(&p())
250        );
251        // a comparison against a missing field is false
252        assert!(
253            !Filter::Gt {
254                field: "missing".into(),
255                value: json!(0)
256            }
257            .matches(&p())
258        );
259    }
260
261    #[test]
262    fn boolean_composition_and_nested_paths() {
263        let f = Filter::And(vec![
264            Filter::Eq {
265                field: "city".into(),
266                value: json!("paris"),
267            },
268            Filter::Or(vec![
269                Filter::Gt {
270                    field: "age".into(),
271                    value: json!(100),
272                },
273                Filter::Eq {
274                    field: "user.vip".into(),
275                    value: json!(true),
276                },
277            ]),
278            Filter::Not(Box::new(Filter::Eq {
279                field: "city".into(),
280                value: json!("lyon"),
281            })),
282        ]);
283        assert!(f.matches(&p()));
284    }
285
286    #[test]
287    fn filter_roundtrips_through_json() {
288        let f = Filter::And(vec![
289            Filter::Eq {
290                field: "a".into(),
291                value: json!(1),
292            },
293            Filter::Exists { field: "b".into() },
294        ]);
295        let text = serde_json::to_string(&f).unwrap();
296        let back: Filter = serde_json::from_str(&text).unwrap();
297        assert_eq!(f, back);
298    }
299}