1use std::cmp::Ordering;
14
15use serde::{Deserialize, Serialize};
16use serde_json::Value;
17
18pub mod sparse;
19pub mod sparse_index;
20pub mod tokenize;
21pub use sparse::{DEFAULT_RRF_K0, SPARSE_KEY, SparseVector, rrf_fuse};
22pub use sparse_index::{BM25_B, BM25_K1, SparseInvertedIndex};
23pub use tokenize::{TEXT_KEY, query_term_ids, term_id, text_to_sparse, tokens};
24
25#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
27#[serde(rename_all = "snake_case")]
28pub enum Filter {
29 And(Vec<Filter>),
31 Or(Vec<Filter>),
33 Not(Box<Filter>),
35 Eq {
37 field: String,
39 value: Value,
41 },
42 Ne {
44 field: String,
46 value: Value,
48 },
49 In {
51 field: String,
53 values: Vec<Value>,
55 },
56 Lt {
58 field: String,
60 value: Value,
62 },
63 Lte {
65 field: String,
67 value: Value,
69 },
70 Gt {
72 field: String,
74 value: Value,
76 },
77 Gte {
79 field: String,
81 value: Value,
83 },
84 Exists {
86 field: String,
88 },
89}
90
91impl Filter {
92 #[must_use]
96 pub fn matches(&self, payload: &Value) -> bool {
97 match self {
98 Filter::And(subs) => subs.iter().all(|f| f.matches(payload)),
99 Filter::Or(subs) => subs.iter().any(|f| f.matches(payload)),
100 Filter::Not(sub) => !sub.matches(payload),
101 Filter::Eq { field, value } => {
102 field_value(payload, field).is_some_and(|v| values_eq(v, value))
103 }
104 Filter::Ne { field, value } => {
105 !field_value(payload, field).is_some_and(|v| values_eq(v, value))
106 }
107 Filter::In { field, values } => field_value(payload, field)
108 .is_some_and(|v| values.iter().any(|candidate| values_eq(v, candidate))),
109 Filter::Lt { field, value } => compares(payload, field, value, |o| o == Ordering::Less),
110 Filter::Lte { field, value } => {
111 compares(payload, field, value, |o| o != Ordering::Greater)
112 }
113 Filter::Gt { field, value } => {
114 compares(payload, field, value, |o| o == Ordering::Greater)
115 }
116 Filter::Gte { field, value } => {
117 compares(payload, field, value, |o| o != Ordering::Less)
118 }
119 Filter::Exists { field } => field_value(payload, field).is_some(),
120 }
121 }
122}
123
124fn field_value<'a>(payload: &'a Value, field: &str) -> Option<&'a Value> {
126 let mut current = payload;
127 for part in field.split('.') {
128 current = current.get(part)?;
129 }
130 Some(current)
131}
132
133fn values_eq(a: &Value, b: &Value) -> bool {
135 match (a, b) {
136 (Value::Number(x), Value::Number(y)) => match (x.as_f64(), y.as_f64()) {
137 (Some(x), Some(y)) => x == y,
138 _ => false,
139 },
140 _ => a == b,
141 }
142}
143
144fn order(a: &Value, b: &Value) -> Option<Ordering> {
146 match (a, b) {
147 (Value::Number(x), Value::Number(y)) => x.as_f64()?.partial_cmp(&y.as_f64()?),
148 (Value::String(x), Value::String(y)) => Some(x.cmp(y)),
149 _ => None,
150 }
151}
152
153fn compares(payload: &Value, field: &str, value: &Value, pred: impl Fn(Ordering) -> bool) -> bool {
154 field_value(payload, field)
155 .and_then(|v| order(v, value))
156 .is_some_and(pred)
157}
158
159#[cfg(test)]
160mod tests {
161 use super::*;
162 use serde_json::json;
163
164 fn p() -> Value {
165 json!({"city": "paris", "age": 30, "score": 4.5, "tags": ["a", "b"], "user": {"vip": true}})
166 }
167
168 #[test]
169 fn eq_and_ne() {
170 assert!(
171 Filter::Eq {
172 field: "city".into(),
173 value: json!("paris")
174 }
175 .matches(&p())
176 );
177 assert!(
178 !Filter::Eq {
179 field: "city".into(),
180 value: json!("lyon")
181 }
182 .matches(&p())
183 );
184 assert!(
186 Filter::Eq {
187 field: "age".into(),
188 value: json!(30.0)
189 }
190 .matches(&p())
191 );
192 assert!(
193 Filter::Ne {
194 field: "city".into(),
195 value: json!("lyon")
196 }
197 .matches(&p())
198 );
199 assert!(
201 Filter::Ne {
202 field: "missing".into(),
203 value: json!(1)
204 }
205 .matches(&p())
206 );
207 }
208
209 #[test]
210 fn ranges_and_in_and_exists() {
211 assert!(
212 Filter::Gt {
213 field: "age".into(),
214 value: json!(18)
215 }
216 .matches(&p())
217 );
218 assert!(
219 Filter::Lte {
220 field: "score".into(),
221 value: json!(4.5)
222 }
223 .matches(&p())
224 );
225 assert!(
226 !Filter::Lt {
227 field: "age".into(),
228 value: json!(30)
229 }
230 .matches(&p())
231 );
232 assert!(
233 Filter::In {
234 field: "city".into(),
235 values: vec![json!("paris"), json!("lyon")]
236 }
237 .matches(&p())
238 );
239 assert!(
240 Filter::Exists {
241 field: "user.vip".into()
242 }
243 .matches(&p())
244 );
245 assert!(
246 !Filter::Exists {
247 field: "user.nope".into()
248 }
249 .matches(&p())
250 );
251 assert!(
253 !Filter::Gt {
254 field: "missing".into(),
255 value: json!(0)
256 }
257 .matches(&p())
258 );
259 }
260
261 #[test]
262 fn boolean_composition_and_nested_paths() {
263 let f = Filter::And(vec![
264 Filter::Eq {
265 field: "city".into(),
266 value: json!("paris"),
267 },
268 Filter::Or(vec![
269 Filter::Gt {
270 field: "age".into(),
271 value: json!(100),
272 },
273 Filter::Eq {
274 field: "user.vip".into(),
275 value: json!(true),
276 },
277 ]),
278 Filter::Not(Box::new(Filter::Eq {
279 field: "city".into(),
280 value: json!("lyon"),
281 })),
282 ]);
283 assert!(f.matches(&p()));
284 }
285
286 #[test]
287 fn filter_roundtrips_through_json() {
288 let f = Filter::And(vec![
289 Filter::Eq {
290 field: "a".into(),
291 value: json!(1),
292 },
293 Filter::Exists { field: "b".into() },
294 ]);
295 let text = serde_json::to_string(&f).unwrap();
296 let back: Filter = serde_json::from_str(&text).unwrap();
297 assert_eq!(f, back);
298 }
299}