Skip to main content

engine/
filter.rs

1//! Metadata filter evaluation
2//!
3//! Evaluates filter expressions against JSON metadata values.
4
5use common::{FilterCondition, FilterExpression, FilterValue};
6use regex::Regex;
7use serde_json::Value;
8
9/// Evaluates a filter expression against a metadata JSON value
10pub fn evaluate_filter(filter: &FilterExpression, metadata: Option<&Value>) -> bool {
11    match filter {
12        FilterExpression::And { conditions } => {
13            conditions.iter().all(|c| evaluate_filter(c, metadata))
14        }
15        FilterExpression::Or { conditions } => {
16            conditions.iter().any(|c| evaluate_filter(c, metadata))
17        }
18        FilterExpression::Field { field } => {
19            // Evaluate each field condition (typically just one)
20            for (field_name, condition) in field {
21                if !evaluate_field_condition(field_name, condition, metadata) {
22                    return false;
23                }
24            }
25            true
26        }
27    }
28}
29
30/// Evaluates a single field condition against metadata
31fn evaluate_field_condition(
32    field_name: &str,
33    condition: &FilterCondition,
34    metadata: Option<&Value>,
35) -> bool {
36    // Handle nested field access with dot notation (e.g., "user.name")
37    let field_value = get_nested_field(metadata, field_name);
38
39    match condition {
40        FilterCondition::Exists(should_exist) => field_value.is_some() == *should_exist,
41        FilterCondition::Eq(filter_val) => {
42            field_value.is_some_and(|v| compare_values_eq(v, filter_val))
43        }
44        FilterCondition::Ne(filter_val) => {
45            field_value.is_none_or(|v| !compare_values_eq(v, filter_val))
46        }
47        FilterCondition::Gt(filter_val) => field_value.is_some_and(|v| {
48            compare_values_ord(v, filter_val) == Some(std::cmp::Ordering::Greater)
49        }),
50        FilterCondition::Gte(filter_val) => field_value.is_some_and(|v| {
51            matches!(
52                compare_values_ord(v, filter_val),
53                Some(std::cmp::Ordering::Greater | std::cmp::Ordering::Equal)
54            )
55        }),
56        FilterCondition::Lt(filter_val) => field_value
57            .is_some_and(|v| compare_values_ord(v, filter_val) == Some(std::cmp::Ordering::Less)),
58        FilterCondition::Lte(filter_val) => field_value.is_some_and(|v| {
59            matches!(
60                compare_values_ord(v, filter_val),
61                Some(std::cmp::Ordering::Less | std::cmp::Ordering::Equal)
62            )
63        }),
64        FilterCondition::In(values) => {
65            field_value.is_some_and(|v| values.iter().any(|fv| compare_values_eq(v, fv)))
66        }
67        FilterCondition::NotIn(values) => {
68            field_value.is_none_or(|v| !values.iter().any(|fv| compare_values_eq(v, fv)))
69        }
70        // Enhanced string operators (Turbopuffer-inspired)
71        FilterCondition::Contains(substring) => field_value.is_some_and(|v| {
72            if let Value::String(s) = v {
73                s.contains(substring.as_str())
74            } else {
75                false
76            }
77        }),
78        FilterCondition::IContains(substring) => field_value.is_some_and(|v| {
79            if let Value::String(s) = v {
80                s.to_lowercase().contains(&substring.to_lowercase())
81            } else {
82                false
83            }
84        }),
85        FilterCondition::StartsWith(prefix) => field_value.is_some_and(|v| {
86            if let Value::String(s) = v {
87                s.starts_with(prefix.as_str())
88            } else {
89                false
90            }
91        }),
92        FilterCondition::EndsWith(suffix) => field_value.is_some_and(|v| {
93            if let Value::String(s) = v {
94                s.ends_with(suffix.as_str())
95            } else {
96                false
97            }
98        }),
99        FilterCondition::Glob(pattern) => field_value.is_some_and(|v| {
100            if let Value::String(s) = v {
101                glob_match(pattern, s)
102            } else {
103                false
104            }
105        }),
106        FilterCondition::Regex(pattern) => field_value.is_some_and(|v| {
107            if let Value::String(s) = v {
108                Regex::new(pattern)
109                    .map(|re| re.is_match(s))
110                    .unwrap_or(false)
111            } else {
112                false
113            }
114        }),
115    }
116}
117
118/// Simple glob pattern matching supporting * (any chars) and ? (single char)
119fn glob_match(pattern: &str, text: &str) -> bool {
120    let pattern_chars: Vec<char> = pattern.chars().collect();
121    let text_chars: Vec<char> = text.chars().collect();
122
123    glob_match_recursive(&pattern_chars, &text_chars, 0, 0)
124}
125
126fn glob_match_recursive(pattern: &[char], text: &[char], pi: usize, ti: usize) -> bool {
127    // Both exhausted - match
128    if pi >= pattern.len() && ti >= text.len() {
129        return true;
130    }
131
132    // Pattern exhausted but text remains - no match (unless pattern ends with *)
133    if pi >= pattern.len() {
134        return false;
135    }
136
137    let pc = pattern[pi];
138
139    match pc {
140        '*' => {
141            // Try matching zero or more characters
142            // First try matching zero characters (move pattern forward)
143            if glob_match_recursive(pattern, text, pi + 1, ti) {
144                return true;
145            }
146            // Then try matching one character and continue with *
147            if ti < text.len() && glob_match_recursive(pattern, text, pi, ti + 1) {
148                return true;
149            }
150            false
151        }
152        '?' => {
153            // Match exactly one character
154            if ti < text.len() {
155                glob_match_recursive(pattern, text, pi + 1, ti + 1)
156            } else {
157                false
158            }
159        }
160        _ => {
161            // Regular character - must match exactly
162            if ti < text.len() && pc == text[ti] {
163                glob_match_recursive(pattern, text, pi + 1, ti + 1)
164            } else {
165                false
166            }
167        }
168    }
169}
170
171/// Get a potentially nested field from metadata using dot notation
172fn get_nested_field<'a>(metadata: Option<&'a Value>, field_path: &str) -> Option<&'a Value> {
173    let metadata = metadata?;
174
175    let parts: Vec<&str> = field_path.split('.').collect();
176    let mut current = metadata;
177
178    for part in parts {
179        match current {
180            Value::Object(map) => {
181                current = map.get(part)?;
182            }
183            _ => return None,
184        }
185    }
186
187    Some(current)
188}
189
190/// Compare a JSON value with a filter value for equality
191fn compare_values_eq(json_val: &Value, filter_val: &FilterValue) -> bool {
192    match (json_val, filter_val) {
193        (Value::String(s), FilterValue::String(fs)) => s == fs,
194        (Value::Number(n), FilterValue::Number(fn_)) => {
195            n.as_f64().is_some_and(|nf| (nf - fn_).abs() < f64::EPSILON)
196        }
197        (Value::Number(n), FilterValue::Integer(fi)) => n.as_i64() == Some(*fi),
198        (Value::Bool(b), FilterValue::Boolean(fb)) => b == fb,
199        // Check if json value is in string array
200        (Value::String(s), FilterValue::StringArray(arr)) => arr.contains(s),
201        // Check if json value is in number array
202        (Value::Number(n), FilterValue::NumberArray(arr)) => n
203            .as_f64()
204            .is_some_and(|nf| arr.iter().any(|&af| (nf - af).abs() < f64::EPSILON)),
205        _ => false,
206    }
207}
208
209/// Compare a JSON value with a filter value for ordering
210fn compare_values_ord(json_val: &Value, filter_val: &FilterValue) -> Option<std::cmp::Ordering> {
211    match (json_val, filter_val) {
212        (Value::String(s), FilterValue::String(fs)) => Some(s.cmp(fs)),
213        (Value::Number(n), FilterValue::Number(fn_)) => {
214            n.as_f64().and_then(|nf| nf.partial_cmp(fn_))
215        }
216        (Value::Number(n), FilterValue::Integer(fi)) => {
217            n.as_f64().and_then(|nf| nf.partial_cmp(&(*fi as f64)))
218        }
219        _ => None,
220    }
221}
222
223#[cfg(test)]
224mod tests {
225    use super::*;
226    use serde_json::json;
227    use std::collections::HashMap;
228
229    fn make_field_filter(field: &str, condition: FilterCondition) -> FilterExpression {
230        let mut map = HashMap::new();
231        map.insert(field.to_string(), condition);
232        FilterExpression::Field { field: map }
233    }
234
235    #[test]
236    fn test_eq_string() {
237        let metadata = json!({"category": "electronics"});
238        let filter = make_field_filter(
239            "category",
240            FilterCondition::Eq(FilterValue::String("electronics".to_string())),
241        );
242        assert!(evaluate_filter(&filter, Some(&metadata)));
243
244        let filter = make_field_filter(
245            "category",
246            FilterCondition::Eq(FilterValue::String("books".to_string())),
247        );
248        assert!(!evaluate_filter(&filter, Some(&metadata)));
249    }
250
251    #[test]
252    fn test_eq_number() {
253        let metadata = json!({"price": 99.99});
254        let filter = make_field_filter("price", FilterCondition::Eq(FilterValue::Number(99.99)));
255        assert!(evaluate_filter(&filter, Some(&metadata)));
256
257        let filter = make_field_filter("price", FilterCondition::Eq(FilterValue::Number(100.0)));
258        assert!(!evaluate_filter(&filter, Some(&metadata)));
259    }
260
261    #[test]
262    fn test_ne() {
263        let metadata = json!({"status": "active"});
264        let filter = make_field_filter(
265            "status",
266            FilterCondition::Ne(FilterValue::String("inactive".to_string())),
267        );
268        assert!(evaluate_filter(&filter, Some(&metadata)));
269
270        let filter = make_field_filter(
271            "status",
272            FilterCondition::Ne(FilterValue::String("active".to_string())),
273        );
274        assert!(!evaluate_filter(&filter, Some(&metadata)));
275    }
276
277    #[test]
278    fn test_gt_lt() {
279        let metadata = json!({"price": 50.0});
280
281        // Greater than
282        let filter = make_field_filter("price", FilterCondition::Gt(FilterValue::Number(40.0)));
283        assert!(evaluate_filter(&filter, Some(&metadata)));
284
285        let filter = make_field_filter("price", FilterCondition::Gt(FilterValue::Number(50.0)));
286        assert!(!evaluate_filter(&filter, Some(&metadata)));
287
288        // Less than
289        let filter = make_field_filter("price", FilterCondition::Lt(FilterValue::Number(60.0)));
290        assert!(evaluate_filter(&filter, Some(&metadata)));
291
292        let filter = make_field_filter("price", FilterCondition::Lt(FilterValue::Number(50.0)));
293        assert!(!evaluate_filter(&filter, Some(&metadata)));
294    }
295
296    #[test]
297    fn test_gte_lte() {
298        let metadata = json!({"count": 10});
299
300        // Greater than or equal
301        let filter = make_field_filter("count", FilterCondition::Gte(FilterValue::Integer(10)));
302        assert!(evaluate_filter(&filter, Some(&metadata)));
303
304        let filter = make_field_filter("count", FilterCondition::Gte(FilterValue::Integer(11)));
305        assert!(!evaluate_filter(&filter, Some(&metadata)));
306
307        // Less than or equal
308        let filter = make_field_filter("count", FilterCondition::Lte(FilterValue::Integer(10)));
309        assert!(evaluate_filter(&filter, Some(&metadata)));
310
311        let filter = make_field_filter("count", FilterCondition::Lte(FilterValue::Integer(9)));
312        assert!(!evaluate_filter(&filter, Some(&metadata)));
313    }
314
315    #[test]
316    fn test_in() {
317        let metadata = json!({"category": "electronics"});
318        let filter = make_field_filter(
319            "category",
320            FilterCondition::In(vec![
321                FilterValue::String("electronics".to_string()),
322                FilterValue::String("computers".to_string()),
323            ]),
324        );
325        assert!(evaluate_filter(&filter, Some(&metadata)));
326
327        let filter = make_field_filter(
328            "category",
329            FilterCondition::In(vec![
330                FilterValue::String("books".to_string()),
331                FilterValue::String("clothing".to_string()),
332            ]),
333        );
334        assert!(!evaluate_filter(&filter, Some(&metadata)));
335    }
336
337    #[test]
338    fn test_not_in() {
339        let metadata = json!({"status": "active"});
340        let filter = make_field_filter(
341            "status",
342            FilterCondition::NotIn(vec![
343                FilterValue::String("deleted".to_string()),
344                FilterValue::String("archived".to_string()),
345            ]),
346        );
347        assert!(evaluate_filter(&filter, Some(&metadata)));
348
349        let filter = make_field_filter(
350            "status",
351            FilterCondition::NotIn(vec![
352                FilterValue::String("active".to_string()),
353                FilterValue::String("pending".to_string()),
354            ]),
355        );
356        assert!(!evaluate_filter(&filter, Some(&metadata)));
357    }
358
359    #[test]
360    fn test_exists() {
361        let metadata = json!({"name": "test", "value": null});
362
363        let filter = make_field_filter("name", FilterCondition::Exists(true));
364        assert!(evaluate_filter(&filter, Some(&metadata)));
365
366        let filter = make_field_filter("missing", FilterCondition::Exists(true));
367        assert!(!evaluate_filter(&filter, Some(&metadata)));
368
369        let filter = make_field_filter("missing", FilterCondition::Exists(false));
370        assert!(evaluate_filter(&filter, Some(&metadata)));
371    }
372
373    #[test]
374    fn test_nested_field() {
375        let metadata = json!({
376            "user": {
377                "name": "Alice",
378                "profile": {
379                    "age": 30
380                }
381            }
382        });
383
384        let filter = make_field_filter(
385            "user.name",
386            FilterCondition::Eq(FilterValue::String("Alice".to_string())),
387        );
388        assert!(evaluate_filter(&filter, Some(&metadata)));
389
390        let filter = make_field_filter(
391            "user.profile.age",
392            FilterCondition::Gte(FilterValue::Integer(18)),
393        );
394        assert!(evaluate_filter(&filter, Some(&metadata)));
395    }
396
397    #[test]
398    fn test_and_combinator() {
399        let metadata = json!({"category": "electronics", "price": 50.0});
400
401        let filter = FilterExpression::And {
402            conditions: vec![
403                make_field_filter(
404                    "category",
405                    FilterCondition::Eq(FilterValue::String("electronics".to_string())),
406                ),
407                make_field_filter("price", FilterCondition::Lt(FilterValue::Number(100.0))),
408            ],
409        };
410        assert!(evaluate_filter(&filter, Some(&metadata)));
411
412        // One condition fails
413        let filter = FilterExpression::And {
414            conditions: vec![
415                make_field_filter(
416                    "category",
417                    FilterCondition::Eq(FilterValue::String("electronics".to_string())),
418                ),
419                make_field_filter("price", FilterCondition::Gt(FilterValue::Number(100.0))),
420            ],
421        };
422        assert!(!evaluate_filter(&filter, Some(&metadata)));
423    }
424
425    #[test]
426    fn test_or_combinator() {
427        let metadata = json!({"category": "electronics", "price": 150.0});
428
429        // One condition passes
430        let filter = FilterExpression::Or {
431            conditions: vec![
432                make_field_filter(
433                    "category",
434                    FilterCondition::Eq(FilterValue::String("books".to_string())),
435                ),
436                make_field_filter("price", FilterCondition::Gt(FilterValue::Number(100.0))),
437            ],
438        };
439        assert!(evaluate_filter(&filter, Some(&metadata)));
440
441        // Both conditions fail
442        let filter = FilterExpression::Or {
443            conditions: vec![
444                make_field_filter(
445                    "category",
446                    FilterCondition::Eq(FilterValue::String("books".to_string())),
447                ),
448                make_field_filter("price", FilterCondition::Lt(FilterValue::Number(100.0))),
449            ],
450        };
451        assert!(!evaluate_filter(&filter, Some(&metadata)));
452    }
453
454    #[test]
455    fn test_no_metadata() {
456        let filter = make_field_filter(
457            "category",
458            FilterCondition::Eq(FilterValue::String("test".to_string())),
459        );
460        assert!(!evaluate_filter(&filter, None));
461
462        // $exists false should pass when no metadata
463        let filter = make_field_filter("anything", FilterCondition::Exists(false));
464        assert!(evaluate_filter(&filter, None));
465    }
466
467    #[test]
468    fn test_boolean() {
469        let metadata = json!({"active": true, "verified": false});
470
471        let filter = make_field_filter("active", FilterCondition::Eq(FilterValue::Boolean(true)));
472        assert!(evaluate_filter(&filter, Some(&metadata)));
473
474        let filter =
475            make_field_filter("verified", FilterCondition::Eq(FilterValue::Boolean(false)));
476        assert!(evaluate_filter(&filter, Some(&metadata)));
477    }
478
479    #[test]
480    fn test_contains() {
481        let metadata = json!({"description": "Hello World Example"});
482
483        // Case-sensitive contains
484        let filter = make_field_filter(
485            "description",
486            FilterCondition::Contains("World".to_string()),
487        );
488        assert!(evaluate_filter(&filter, Some(&metadata)));
489
490        let filter = make_field_filter(
491            "description",
492            FilterCondition::Contains("world".to_string()),
493        );
494        assert!(!evaluate_filter(&filter, Some(&metadata))); // Case mismatch
495
496        let filter = make_field_filter(
497            "description",
498            FilterCondition::Contains("NotFound".to_string()),
499        );
500        assert!(!evaluate_filter(&filter, Some(&metadata)));
501    }
502
503    #[test]
504    fn test_icontains() {
505        let metadata = json!({"description": "Hello World Example"});
506
507        // Case-insensitive contains
508        let filter = make_field_filter(
509            "description",
510            FilterCondition::IContains("world".to_string()),
511        );
512        assert!(evaluate_filter(&filter, Some(&metadata)));
513
514        let filter = make_field_filter(
515            "description",
516            FilterCondition::IContains("WORLD".to_string()),
517        );
518        assert!(evaluate_filter(&filter, Some(&metadata)));
519
520        let filter = make_field_filter(
521            "description",
522            FilterCondition::IContains("notfound".to_string()),
523        );
524        assert!(!evaluate_filter(&filter, Some(&metadata)));
525    }
526
527    #[test]
528    fn test_starts_with() {
529        let metadata = json!({"filename": "document.pdf"});
530
531        let filter = make_field_filter(
532            "filename",
533            FilterCondition::StartsWith("document".to_string()),
534        );
535        assert!(evaluate_filter(&filter, Some(&metadata)));
536
537        let filter = make_field_filter("filename", FilterCondition::StartsWith("doc".to_string()));
538        assert!(evaluate_filter(&filter, Some(&metadata)));
539
540        let filter = make_field_filter("filename", FilterCondition::StartsWith("pdf".to_string()));
541        assert!(!evaluate_filter(&filter, Some(&metadata)));
542    }
543
544    #[test]
545    fn test_ends_with() {
546        let metadata = json!({"filename": "document.pdf"});
547
548        let filter = make_field_filter("filename", FilterCondition::EndsWith(".pdf".to_string()));
549        assert!(evaluate_filter(&filter, Some(&metadata)));
550
551        let filter = make_field_filter("filename", FilterCondition::EndsWith("pdf".to_string()));
552        assert!(evaluate_filter(&filter, Some(&metadata)));
553
554        let filter = make_field_filter("filename", FilterCondition::EndsWith(".txt".to_string()));
555        assert!(!evaluate_filter(&filter, Some(&metadata)));
556    }
557
558    #[test]
559    fn test_glob() {
560        let metadata = json!({"path": "src/main/java/App.java"});
561
562        // Test * wildcard (any characters)
563        let filter = make_field_filter("path", FilterCondition::Glob("*.java".to_string()));
564        assert!(evaluate_filter(&filter, Some(&metadata)));
565
566        let filter = make_field_filter("path", FilterCondition::Glob("src/*".to_string()));
567        assert!(evaluate_filter(&filter, Some(&metadata)));
568
569        let filter = make_field_filter("path", FilterCondition::Glob("*App*".to_string()));
570        assert!(evaluate_filter(&filter, Some(&metadata)));
571
572        // Test ? wildcard (single character)
573        let metadata2 = json!({"code": "A1B2"});
574        let filter = make_field_filter("code", FilterCondition::Glob("A?B?".to_string()));
575        assert!(evaluate_filter(&filter, Some(&metadata2)));
576
577        let filter = make_field_filter("code", FilterCondition::Glob("A?B".to_string()));
578        assert!(!evaluate_filter(&filter, Some(&metadata2))); // Too short
579
580        // No match
581        let filter = make_field_filter("path", FilterCondition::Glob("*.rs".to_string()));
582        assert!(!evaluate_filter(&filter, Some(&metadata)));
583    }
584
585    #[test]
586    fn test_regex() {
587        let metadata = json!({"email": "user@example.com"});
588
589        // Valid regex match
590        let filter = make_field_filter(
591            "email",
592            FilterCondition::Regex(r"^[\w.]+@[\w.]+\.\w+$".to_string()),
593        );
594        assert!(evaluate_filter(&filter, Some(&metadata)));
595
596        let filter = make_field_filter(
597            "email",
598            FilterCondition::Regex(r"@example\.com$".to_string()),
599        );
600        assert!(evaluate_filter(&filter, Some(&metadata)));
601
602        // No match
603        let filter = make_field_filter("email", FilterCondition::Regex(r"^admin@".to_string()));
604        assert!(!evaluate_filter(&filter, Some(&metadata)));
605
606        // Invalid regex should return false, not panic
607        let filter = make_field_filter("email", FilterCondition::Regex(r"[invalid".to_string()));
608        assert!(!evaluate_filter(&filter, Some(&metadata)));
609    }
610
611    #[test]
612    fn test_string_operators_on_non_string() {
613        let metadata = json!({"count": 42});
614
615        // String operators on non-string fields should return false
616        let filter = make_field_filter("count", FilterCondition::Contains("4".to_string()));
617        assert!(!evaluate_filter(&filter, Some(&metadata)));
618
619        let filter = make_field_filter("count", FilterCondition::StartsWith("4".to_string()));
620        assert!(!evaluate_filter(&filter, Some(&metadata)));
621
622        let filter = make_field_filter("count", FilterCondition::Glob("*".to_string()));
623        assert!(!evaluate_filter(&filter, Some(&metadata)));
624    }
625}