Skip to main content

rvf_runtime/
filter.rs

1//! Filter expression evaluation for metadata-based vector filtering.
2//!
3//! Filter expressions are boolean predicate trees evaluated against
4//! per-vector metadata. The runtime selects a strategy (pre-filter,
5//! intra-filter, or post-filter) based on estimated selectivity.
6
7use crate::options::MetadataValue;
8
9/// A filter expression for metadata-based vector filtering.
10///
11/// Leaf nodes compare a metadata field against a literal value.
12/// Internal nodes combine sub-expressions with boolean logic.
13#[derive(Clone, Debug)]
14pub enum FilterExpr {
15    /// field == value
16    Eq(u16, FilterValue),
17    /// field != value
18    Ne(u16, FilterValue),
19    /// field < value
20    Lt(u16, FilterValue),
21    /// field <= value
22    Le(u16, FilterValue),
23    /// field > value
24    Gt(u16, FilterValue),
25    /// field >= value
26    Ge(u16, FilterValue),
27    /// field in [values]
28    In(u16, Vec<FilterValue>),
29    /// field in [low, high)
30    Range(u16, FilterValue, FilterValue),
31    /// All sub-expressions must match.
32    And(Vec<FilterExpr>),
33    /// Any sub-expression must match.
34    Or(Vec<FilterExpr>),
35    /// Negate the sub-expression.
36    Not(Box<FilterExpr>),
37}
38
39/// A typed value used in filter comparisons.
40#[derive(Clone, Debug, PartialEq)]
41pub enum FilterValue {
42    U64(u64),
43    I64(i64),
44    F64(f64),
45    String(String),
46    Bool(bool),
47}
48
49impl FilterValue {
50    /// Compare two filter values. Returns None if types are incompatible.
51    fn partial_cmp_value(&self, other: &Self) -> Option<std::cmp::Ordering> {
52        match (self, other) {
53            (FilterValue::U64(a), FilterValue::U64(b)) => a.partial_cmp(b),
54            (FilterValue::I64(a), FilterValue::I64(b)) => a.partial_cmp(b),
55            (FilterValue::F64(a), FilterValue::F64(b)) => a.partial_cmp(b),
56            (FilterValue::String(a), FilterValue::String(b)) => a.partial_cmp(b),
57            (FilterValue::Bool(a), FilterValue::Bool(b)) => a.partial_cmp(b),
58            _ => None,
59        }
60    }
61}
62
63/// In-memory metadata store for filter evaluation.
64/// Maps (vector_id, field_id) -> MetadataValue.
65pub(crate) struct MetadataStore {
66    /// Entries indexed by vector position.
67    entries: Vec<Vec<(u16, FilterValue)>>,
68    /// Mapping from vector_id to position index.
69    id_to_pos: std::collections::HashMap<u64, usize>,
70}
71
72impl MetadataStore {
73    pub(crate) fn new() -> Self {
74        Self {
75            entries: Vec::new(),
76            id_to_pos: std::collections::HashMap::new(),
77        }
78    }
79
80    /// Add metadata for a vector. `fields` are (field_id, value) pairs.
81    pub(crate) fn insert(&mut self, vector_id: u64, fields: Vec<(u16, FilterValue)>) {
82        let pos = self.entries.len();
83        self.id_to_pos.insert(vector_id, pos);
84        self.entries.push(fields);
85    }
86
87    /// Get a field value for a vector.
88    pub(crate) fn get_field(&self, vector_id: u64, field_id: u16) -> Option<&FilterValue> {
89        let pos = self.id_to_pos.get(&vector_id)?;
90        self.entries.get(*pos)?.iter().find(|(fid, _)| *fid == field_id).map(|(_, v)| v)
91    }
92
93    /// Remove all metadata for the given vector IDs.
94    pub(crate) fn remove_ids(&mut self, ids: &[u64]) {
95        for id in ids {
96            self.id_to_pos.remove(id);
97        }
98    }
99
100    /// Return vector count tracked by the metadata store.
101    #[allow(dead_code)]
102    pub(crate) fn len(&self) -> usize {
103        self.id_to_pos.len()
104    }
105}
106
107/// Evaluate a filter expression against a single vector's metadata.
108pub(crate) fn evaluate(expr: &FilterExpr, vector_id: u64, meta: &MetadataStore) -> bool {
109    match expr {
110        FilterExpr::Eq(field_id, val) => {
111            meta.get_field(vector_id, *field_id)
112                .map(|v| v == val)
113                .unwrap_or(false)
114        }
115        FilterExpr::Ne(field_id, val) => {
116            meta.get_field(vector_id, *field_id)
117                .map(|v| v != val)
118                .unwrap_or(true)
119        }
120        FilterExpr::Lt(field_id, val) => {
121            meta.get_field(vector_id, *field_id)
122                .and_then(|v| v.partial_cmp_value(val))
123                .map(|ord| ord == std::cmp::Ordering::Less)
124                .unwrap_or(false)
125        }
126        FilterExpr::Le(field_id, val) => {
127            meta.get_field(vector_id, *field_id)
128                .and_then(|v| v.partial_cmp_value(val))
129                .map(|ord| ord != std::cmp::Ordering::Greater)
130                .unwrap_or(false)
131        }
132        FilterExpr::Gt(field_id, val) => {
133            meta.get_field(vector_id, *field_id)
134                .and_then(|v| v.partial_cmp_value(val))
135                .map(|ord| ord == std::cmp::Ordering::Greater)
136                .unwrap_or(false)
137        }
138        FilterExpr::Ge(field_id, val) => {
139            meta.get_field(vector_id, *field_id)
140                .and_then(|v| v.partial_cmp_value(val))
141                .map(|ord| ord != std::cmp::Ordering::Less)
142                .unwrap_or(false)
143        }
144        FilterExpr::In(field_id, vals) => {
145            meta.get_field(vector_id, *field_id)
146                .map(|v| vals.contains(v))
147                .unwrap_or(false)
148        }
149        FilterExpr::Range(field_id, low, high) => {
150            meta.get_field(vector_id, *field_id)
151                .and_then(|v| {
152                    let ge_low = v.partial_cmp_value(low)
153                        .map(|o| o != std::cmp::Ordering::Less)?;
154                    let lt_high = v.partial_cmp_value(high)
155                        .map(|o| o == std::cmp::Ordering::Less)?;
156                    Some(ge_low && lt_high)
157                })
158                .unwrap_or(false)
159        }
160        FilterExpr::And(exprs) => exprs.iter().all(|e| evaluate(e, vector_id, meta)),
161        FilterExpr::Or(exprs) => exprs.iter().any(|e| evaluate(e, vector_id, meta)),
162        FilterExpr::Not(expr) => !evaluate(expr, vector_id, meta),
163    }
164}
165
166/// Convert a MetadataValue (options module) to a FilterValue for evaluation.
167pub(crate) fn metadata_value_to_filter(mv: &MetadataValue) -> FilterValue {
168    match mv {
169        MetadataValue::U64(v) => FilterValue::U64(*v),
170        MetadataValue::I64(v) => FilterValue::I64(*v),
171        MetadataValue::F64(v) => FilterValue::F64(*v),
172        MetadataValue::String(v) => FilterValue::String(v.clone()),
173        MetadataValue::Bytes(_) => FilterValue::String(String::new()),
174    }
175}
176
177#[cfg(test)]
178mod tests {
179    use super::*;
180
181    fn make_store() -> MetadataStore {
182        let mut store = MetadataStore::new();
183        store.insert(0, vec![
184            (0, FilterValue::String("apple".into())),
185            (1, FilterValue::U64(100)),
186        ]);
187        store.insert(1, vec![
188            (0, FilterValue::String("banana".into())),
189            (1, FilterValue::U64(200)),
190        ]);
191        store.insert(2, vec![
192            (0, FilterValue::String("apple".into())),
193            (1, FilterValue::U64(300)),
194        ]);
195        store
196    }
197
198    #[test]
199    fn filter_eq() {
200        let store = make_store();
201        let expr = FilterExpr::Eq(0, FilterValue::String("apple".into()));
202        assert!(evaluate(&expr, 0, &store));
203        assert!(!evaluate(&expr, 1, &store));
204        assert!(evaluate(&expr, 2, &store));
205    }
206
207    #[test]
208    fn filter_ne() {
209        let store = make_store();
210        let expr = FilterExpr::Ne(0, FilterValue::String("apple".into()));
211        assert!(!evaluate(&expr, 0, &store));
212        assert!(evaluate(&expr, 1, &store));
213    }
214
215    #[test]
216    fn filter_range() {
217        let store = make_store();
218        let expr = FilterExpr::Range(1, FilterValue::U64(150), FilterValue::U64(250));
219        assert!(!evaluate(&expr, 0, &store)); // 100 < 150
220        assert!(evaluate(&expr, 1, &store));  // 200 in [150, 250)
221        assert!(!evaluate(&expr, 2, &store)); // 300 >= 250
222    }
223
224    #[test]
225    fn filter_and_or() {
226        let store = make_store();
227        let expr = FilterExpr::And(vec![
228            FilterExpr::Eq(0, FilterValue::String("apple".into())),
229            FilterExpr::Gt(1, FilterValue::U64(150)),
230        ]);
231        assert!(!evaluate(&expr, 0, &store)); // apple but 100 <= 150
232        assert!(!evaluate(&expr, 1, &store)); // banana
233        assert!(evaluate(&expr, 2, &store));  // apple and 300 > 150
234    }
235
236    #[test]
237    fn filter_not() {
238        let store = make_store();
239        let expr = FilterExpr::Not(Box::new(
240            FilterExpr::Eq(0, FilterValue::String("apple".into())),
241        ));
242        assert!(!evaluate(&expr, 0, &store));
243        assert!(evaluate(&expr, 1, &store));
244    }
245
246    #[test]
247    fn filter_in() {
248        let store = make_store();
249        let expr = FilterExpr::In(1, vec![FilterValue::U64(100), FilterValue::U64(300)]);
250        assert!(evaluate(&expr, 0, &store));
251        assert!(!evaluate(&expr, 1, &store));
252        assert!(evaluate(&expr, 2, &store));
253    }
254}