Skip to main content

nodedb_query/scan_filter/
mod.rs

1//! Post-scan filter evaluation.
2//!
3//! `ScanFilter` represents a single filter predicate. `compare_json_values`
4//! provides total ordering for JSON values used in sort and range comparisons.
5//!
6//! Shared between Origin (Control Plane + Data Plane) and Lite.
7
8pub mod like;
9pub mod parse;
10
11pub use like::sql_like_match;
12pub use parse::parse_simple_predicates;
13
14/// Filter operator enum for O(1) dispatch instead of string comparison.
15#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
16pub enum FilterOp {
17    Eq,
18    Ne,
19    Gt,
20    Gte,
21    Lt,
22    Lte,
23    Contains,
24    Like,
25    NotLike,
26    Ilike,
27    NotIlike,
28    In,
29    NotIn,
30    IsNull,
31    IsNotNull,
32    ArrayContains,
33    ArrayContainsAll,
34    ArrayOverlap,
35    #[default]
36    MatchAll,
37    Exists,
38    NotExists,
39    Or,
40    /// Column-vs-column comparison: `field` op `value` where `value` is a
41    /// `Value::String` containing the name of the other column. The comparison
42    /// reads both fields from the same document row.
43    GtColumn,
44    GteColumn,
45    LtColumn,
46    LteColumn,
47    EqColumn,
48    NeColumn,
49}
50
51impl FilterOp {
52    pub fn parse_op(s: &str) -> Self {
53        match s {
54            "eq" => Self::Eq,
55            "ne" | "neq" => Self::Ne,
56            "gt" => Self::Gt,
57            "gte" | "ge" => Self::Gte,
58            "lt" => Self::Lt,
59            "lte" | "le" => Self::Lte,
60            "contains" => Self::Contains,
61            "like" => Self::Like,
62            "not_like" => Self::NotLike,
63            "ilike" => Self::Ilike,
64            "not_ilike" => Self::NotIlike,
65            "in" => Self::In,
66            "not_in" => Self::NotIn,
67            "is_null" => Self::IsNull,
68            "is_not_null" => Self::IsNotNull,
69            "array_contains" => Self::ArrayContains,
70            "array_contains_all" => Self::ArrayContainsAll,
71            "array_overlap" => Self::ArrayOverlap,
72            "match_all" => Self::MatchAll,
73            "exists" => Self::Exists,
74            "not_exists" => Self::NotExists,
75            "or" => Self::Or,
76            "gt_col" => Self::GtColumn,
77            "gte_col" => Self::GteColumn,
78            "lt_col" => Self::LtColumn,
79            "lte_col" => Self::LteColumn,
80            "eq_col" => Self::EqColumn,
81            "ne_col" => Self::NeColumn,
82            _ => Self::MatchAll,
83        }
84    }
85
86    pub fn as_str(&self) -> &'static str {
87        match self {
88            Self::Eq => "eq",
89            Self::Ne => "ne",
90            Self::Gt => "gt",
91            Self::Gte => "gte",
92            Self::Lt => "lt",
93            Self::Lte => "lte",
94            Self::Contains => "contains",
95            Self::Like => "like",
96            Self::NotLike => "not_like",
97            Self::Ilike => "ilike",
98            Self::NotIlike => "not_ilike",
99            Self::In => "in",
100            Self::NotIn => "not_in",
101            Self::IsNull => "is_null",
102            Self::IsNotNull => "is_not_null",
103            Self::ArrayContains => "array_contains",
104            Self::ArrayContainsAll => "array_contains_all",
105            Self::ArrayOverlap => "array_overlap",
106            Self::MatchAll => "match_all",
107            Self::Exists => "exists",
108            Self::NotExists => "not_exists",
109            Self::Or => "or",
110            Self::GtColumn => "gt_col",
111            Self::GteColumn => "gte_col",
112            Self::LtColumn => "lt_col",
113            Self::LteColumn => "lte_col",
114            Self::EqColumn => "eq_col",
115            Self::NeColumn => "ne_col",
116        }
117    }
118}
119
120impl From<&str> for FilterOp {
121    fn from(s: &str) -> Self {
122        Self::parse_op(s)
123    }
124}
125
126impl From<String> for FilterOp {
127    fn from(s: String) -> Self {
128        Self::parse_op(&s)
129    }
130}
131
132impl serde::Serialize for FilterOp {
133    fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
134        serializer.serialize_str(self.as_str())
135    }
136}
137
138impl<'de> serde::Deserialize<'de> for FilterOp {
139    fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
140        let s = String::deserialize(deserializer)?;
141        Ok(FilterOp::parse_op(&s))
142    }
143}
144
145/// A single filter predicate for document scan evaluation.
146///
147/// Supports simple comparison operators (eq, ne, gt, gte, lt, lte, contains,
148/// is_null, is_not_null) and disjunctive groups via the `"or"` operator.
149///
150/// OR representation: `{"op": "or", "clauses": [[filter1, filter2], [filter3]]}`
151/// means `(filter1 AND filter2) OR filter3`. Each clause is an AND-group;
152/// the document matches if ANY clause group fully matches.
153#[derive(Clone, serde::Serialize, serde::Deserialize, Default)]
154pub struct ScanFilter {
155    #[serde(default)]
156    pub field: String,
157    pub op: FilterOp,
158    #[serde(default)]
159    pub value: nodedb_types::Value,
160    /// Disjunctive clause groups for OR predicates.
161    /// Each inner Vec is an AND-group. The document matches if ANY group matches.
162    #[serde(default)]
163    pub clauses: Vec<Vec<ScanFilter>>,
164}
165
166impl zerompk::ToMessagePack for ScanFilter {
167    fn write<W: zerompk::Write>(&self, writer: &mut W) -> zerompk::Result<()> {
168        writer.write_array_len(4)?;
169        self.field.write(writer)?;
170        writer.write_string(self.op.as_str())?;
171        // Convert nodedb_types::Value → serde_json::Value for wire compat.
172        let json_val: serde_json::Value = self.value.clone().into();
173        nodedb_types::JsonValue(json_val).write(writer)?;
174        self.clauses.write(writer)
175    }
176}
177
178impl<'a> zerompk::FromMessagePack<'a> for ScanFilter {
179    fn read<R: zerompk::Read<'a>>(reader: &mut R) -> zerompk::Result<Self> {
180        reader.check_array_len(4)?;
181        let field = String::read(reader)?;
182        let op_str = String::read(reader)?;
183        let jv = nodedb_types::JsonValue::read(reader)?;
184        let clauses = Vec::<Vec<ScanFilter>>::read(reader)?;
185        Ok(Self {
186            field,
187            op: FilterOp::parse_op(&op_str),
188            // Convert serde_json::Value → nodedb_types::Value at wire boundary.
189            value: nodedb_types::Value::from(jv.0),
190            clauses,
191        })
192    }
193}
194
195impl ScanFilter {
196    /// Evaluate this filter against a `nodedb_types::Value` document.
197    ///
198    /// Same semantics as `matches()` but operates on the native Value type
199    /// instead of serde_json::Value, avoiding lossy JSON roundtrips.
200    pub fn matches_value(&self, doc: &nodedb_types::Value) -> bool {
201        match self.op {
202            FilterOp::MatchAll | FilterOp::Exists | FilterOp::NotExists => return true,
203            FilterOp::Or => {
204                return self
205                    .clauses
206                    .iter()
207                    .any(|clause| clause.iter().all(|f| f.matches_value(doc)));
208            }
209            _ => {}
210        }
211
212        let field_val = match doc.get(&self.field) {
213            Some(v) => v,
214            None => return self.op == FilterOp::IsNull,
215        };
216
217        match self.op {
218            FilterOp::Eq => self.value.eq_coerced(field_val),
219            FilterOp::Ne => !self.value.eq_coerced(field_val),
220            FilterOp::Gt => self.value.cmp_coerced(field_val) == std::cmp::Ordering::Less,
221            FilterOp::Gte => {
222                let cmp = self.value.cmp_coerced(field_val);
223                cmp == std::cmp::Ordering::Less || cmp == std::cmp::Ordering::Equal
224            }
225            FilterOp::Lt => self.value.cmp_coerced(field_val) == std::cmp::Ordering::Greater,
226            FilterOp::Lte => {
227                let cmp = self.value.cmp_coerced(field_val);
228                cmp == std::cmp::Ordering::Greater || cmp == std::cmp::Ordering::Equal
229            }
230            FilterOp::Contains => {
231                if let (Some(s), Some(pattern)) = (field_val.as_str(), self.value.as_str()) {
232                    s.contains(pattern)
233                } else {
234                    false
235                }
236            }
237            FilterOp::Like => {
238                if let (Some(s), Some(pattern)) = (field_val.as_str(), self.value.as_str()) {
239                    like::sql_like_match(s, pattern, false)
240                } else {
241                    false
242                }
243            }
244            FilterOp::NotLike => {
245                if let (Some(s), Some(pattern)) = (field_val.as_str(), self.value.as_str()) {
246                    !like::sql_like_match(s, pattern, false)
247                } else {
248                    false
249                }
250            }
251            FilterOp::Ilike => {
252                if let (Some(s), Some(pattern)) = (field_val.as_str(), self.value.as_str()) {
253                    like::sql_like_match(s, pattern, true)
254                } else {
255                    false
256                }
257            }
258            FilterOp::NotIlike => {
259                if let (Some(s), Some(pattern)) = (field_val.as_str(), self.value.as_str()) {
260                    !like::sql_like_match(s, pattern, true)
261                } else {
262                    false
263                }
264            }
265            FilterOp::In => {
266                if let Some(mut iter) = self.value.as_array_iter() {
267                    iter.any(|v| v.eq_coerced(field_val))
268                } else {
269                    false
270                }
271            }
272            FilterOp::NotIn => {
273                if let Some(mut iter) = self.value.as_array_iter() {
274                    !iter.any(|v| v.eq_coerced(field_val))
275                } else {
276                    true
277                }
278            }
279            FilterOp::IsNull => field_val.is_null(),
280            FilterOp::IsNotNull => !field_val.is_null(),
281            FilterOp::ArrayContains => {
282                if let Some(arr) = field_val.as_array() {
283                    arr.iter().any(|v| self.value.eq_coerced(v))
284                } else {
285                    false
286                }
287            }
288            FilterOp::ArrayContainsAll => {
289                if let (Some(field_arr), Some(mut needles)) =
290                    (field_val.as_array(), self.value.as_array_iter())
291                {
292                    needles.all(|needle| field_arr.iter().any(|v| needle.eq_coerced(v)))
293                } else {
294                    false
295                }
296            }
297            FilterOp::ArrayOverlap => {
298                if let (Some(field_arr), Some(mut needles)) =
299                    (field_val.as_array(), self.value.as_array_iter())
300                {
301                    needles.any(|needle| field_arr.iter().any(|v| needle.eq_coerced(v)))
302                } else {
303                    false
304                }
305            }
306            FilterOp::GtColumn
307            | FilterOp::GteColumn
308            | FilterOp::LtColumn
309            | FilterOp::LteColumn
310            | FilterOp::EqColumn
311            | FilterOp::NeColumn => {
312                let other_col = match &self.value {
313                    nodedb_types::Value::String(s) => s.as_str(),
314                    _ => return false,
315                };
316                let other_val = match doc.get(other_col) {
317                    Some(v) => v,
318                    None => return false,
319                };
320                match self.op {
321                    FilterOp::GtColumn => {
322                        field_val.cmp_coerced(other_val) == std::cmp::Ordering::Greater
323                    }
324                    FilterOp::GteColumn => {
325                        field_val.cmp_coerced(other_val) != std::cmp::Ordering::Less
326                    }
327                    FilterOp::LtColumn => {
328                        field_val.cmp_coerced(other_val) == std::cmp::Ordering::Less
329                    }
330                    FilterOp::LteColumn => {
331                        field_val.cmp_coerced(other_val) != std::cmp::Ordering::Greater
332                    }
333                    FilterOp::EqColumn => field_val.eq_coerced(other_val),
334                    FilterOp::NeColumn => !field_val.eq_coerced(other_val),
335                    _ => false,
336                }
337            }
338            _ => false,
339        }
340    }
341}
342
343#[cfg(test)]
344mod tests {
345    use super::*;
346    use serde_json::json;
347
348    #[test]
349    fn filter_eq_coercion() {
350        let doc = json!({"age": 25});
351        let msgpack = nodedb_types::json_msgpack::json_to_msgpack(&doc).unwrap();
352        let filter = ScanFilter {
353            field: "age".into(),
354            op: "eq".into(),
355            value: nodedb_types::Value::String("25".into()),
356            clauses: vec![],
357        };
358        assert!(filter.matches_binary(&msgpack));
359    }
360
361    #[test]
362    fn filter_gt_coercion() {
363        let doc = json!({"score": "90"});
364        let msgpack = nodedb_types::json_msgpack::json_to_msgpack(&doc).unwrap();
365        let filter = ScanFilter {
366            field: "score".into(),
367            op: "gt".into(),
368            value: nodedb_types::Value::Integer(80),
369            clauses: vec![],
370        };
371        assert!(filter.matches_binary(&msgpack));
372    }
373
374    #[test]
375    fn like_basic() {
376        assert!(sql_like_match("hello world", "%world", false));
377        assert!(sql_like_match("hello world", "hello%", false));
378        assert!(!sql_like_match("hello world", "xyz%", false));
379    }
380
381    #[test]
382    fn ilike_case_insensitive() {
383        assert!(sql_like_match("Hello", "hello", true));
384        assert!(sql_like_match("WORLD", "%world%", true));
385    }
386}