Skip to main content

sqlly_datatable/
filter.rs

1//! Rich per-column filter model plus the pure matching pipeline.
2//!
3//! A [`ColumnFilter`] combines two independent, AND-composed mechanisms that
4//! mirror the Numbers-style filter popover:
5//!
6//! * an optional operator **predicate** (the "Choose One" rule) — a text
7//!   "like" operation (contains / begins with / regex …) for string columns,
8//!   or a numeric/date comparison (greater than / between …) for numeric and
9//!   date columns; and
10//! * an optional **value set** (the searchable checkbox list) — the exact set
11//!   of *formatted* values that are allowed through.
12//!
13//! Either half is inert when unset ([`FilterPredicate::None`] and
14//! `values == None` respectively), so an empty [`ColumnFilter`] passes every
15//! row. This module is intentionally GPUI-free: it operates on
16//! [`CellValue`]/[`ResolvedColumnFormat`] and is reusable from exports, tests,
17//! and server-side previews.
18
19use std::cmp::Ordering;
20use std::collections::HashSet;
21
22use crate::config::ResolvedColumnFormat;
23use crate::data::{CellValue, ColumnKind};
24use crate::format::format_cell;
25
26/// Text ("like") operators offered for string-like columns
27/// ([`ColumnKind::Text`], [`ColumnKind::Boolean`], [`ColumnKind::None`]).
28///
29/// All comparisons except [`TextOp::Matches`] are plain case-insensitive
30/// string operations against the *formatted* cell value. [`TextOp::Matches`]
31/// compiles the operand as a case-insensitive regular expression; an invalid
32/// pattern simply matches nothing.
33#[derive(Clone, Copy, Debug, PartialEq, Eq)]
34pub enum TextOp {
35    /// Formatted value contains the operand.
36    Contains,
37    /// Formatted value does not contain the operand.
38    DoesNotContain,
39    /// Formatted value starts with the operand.
40    BeginsWith,
41    /// Formatted value ends with the operand.
42    EndsWith,
43    /// Formatted value equals the operand exactly.
44    Is,
45    /// Formatted value differs from the operand.
46    IsNot,
47    /// Formatted value matches the operand interpreted as a regex.
48    Matches,
49}
50
51/// Numeric / date comparison operators offered for [`ColumnKind::Integer`],
52/// [`ColumnKind::Decimal`], and [`ColumnKind::Date`] columns.
53///
54/// Operands are stored as `f64`; date operands are the Unix-seconds value of
55/// the parsed calendar date. Comparisons use [`f64::total_cmp`] so `NaN` is
56/// ordered deterministically and never triggers a float-equality lint.
57#[derive(Clone, Copy, Debug, PartialEq, Eq)]
58pub enum NumberOp {
59    /// `value == a`
60    Eq,
61    /// `value != a`
62    Ne,
63    /// `value > a`
64    Gt,
65    /// `value >= a`
66    Ge,
67    /// `value < a`
68    Lt,
69    /// `value <= a`
70    Le,
71    /// `min(a,b) <= value <= max(a,b)`
72    Between,
73    /// `value < min(a,b) || value > max(a,b)`
74    NotBetween,
75}
76
77/// The operator-rule half of a [`ColumnFilter`].
78#[derive(Clone, Debug, Default, PartialEq)]
79pub enum FilterPredicate {
80    /// No operator rule; this half is inert.
81    #[default]
82    None,
83    /// A text "like" rule against the formatted value.
84    Text {
85        /// Which text operation to apply.
86        op: TextOp,
87        /// The right-hand operand (a substring, exact value, or regex).
88        operand: String,
89    },
90    /// A numeric/date comparison. `b` is only used by
91    /// [`NumberOp::Between`]/[`NumberOp::NotBetween`].
92    Number {
93        /// Which comparison to apply.
94        op: NumberOp,
95        /// Primary operand.
96        a: f64,
97        /// Secondary operand (range upper bound); ignored by single-operand ops.
98        b: f64,
99    },
100}
101
102/// A single column's committed filter: an optional operator predicate ANDed
103/// with an optional allow-list of formatted values.
104#[derive(Clone, Debug, Default, PartialEq)]
105pub struct ColumnFilter {
106    /// Operator rule ("Choose One"). [`FilterPredicate::None`] => inert.
107    pub predicate: FilterPredicate,
108    /// Allowed *formatted* values (checkbox list). `None` => every value
109    /// passes; `Some(set)` => only values whose formatted text is in `set`.
110    pub values: Option<HashSet<String>>,
111}
112
113impl ColumnFilter {
114    /// `true` when either half constrains rows (used to paint the
115    /// filtered-column marker and to decide whether "Clear filter" applies).
116    #[must_use]
117    pub fn is_active(&self) -> bool {
118        !matches!(self.predicate, FilterPredicate::None) || self.values.is_some()
119    }
120}
121
122/// `true` when a column of `kind` uses numeric/date operators rather than the
123/// text "like" operator set.
124#[must_use]
125pub fn uses_number_ops(kind: ColumnKind) -> bool {
126    matches!(
127        kind,
128        ColumnKind::Integer | ColumnKind::Decimal | ColumnKind::Date
129    )
130}
131
132/// Evaluate a cell against a column filter. Empty/inert filters pass.
133#[must_use]
134pub fn cell_passes_filter(
135    value: &CellValue,
136    fmt: &ResolvedColumnFormat,
137    filter: &ColumnFilter,
138) -> bool {
139    if !predicate_matches(value, fmt, &filter.predicate) {
140        return false;
141    }
142    if let Some(allowed) = &filter.values {
143        let (formatted, _) = format_cell(value, fmt);
144        if !allowed.contains(&formatted) {
145            return false;
146        }
147    }
148    true
149}
150
151fn predicate_matches(
152    value: &CellValue,
153    fmt: &ResolvedColumnFormat,
154    predicate: &FilterPredicate,
155) -> bool {
156    match predicate {
157        FilterPredicate::None => true,
158        FilterPredicate::Text { op, operand } => text_matches(value, fmt, *op, operand),
159        FilterPredicate::Number { op, a, b } => number_matches(value, *op, *a, *b),
160    }
161}
162
163fn text_matches(value: &CellValue, fmt: &ResolvedColumnFormat, op: TextOp, operand: &str) -> bool {
164    let (formatted, _) = format_cell(value, fmt);
165    if op == TextOp::Matches {
166        return regex_matches(&formatted, operand);
167    }
168    let hay = formatted.to_lowercase();
169    let needle = operand.to_lowercase();
170    match op {
171        TextOp::Contains => hay.contains(&needle),
172        TextOp::DoesNotContain => !hay.contains(&needle),
173        TextOp::BeginsWith => hay.starts_with(&needle),
174        TextOp::EndsWith => hay.ends_with(&needle),
175        TextOp::Is => hay == needle,
176        TextOp::IsNot => hay != needle,
177        TextOp::Matches => unreachable!("handled above"),
178    }
179}
180
181fn regex_matches(hay: &str, pattern: &str) -> bool {
182    if pattern.is_empty() {
183        return true;
184    }
185    match regex::RegexBuilder::new(pattern)
186        .case_insensitive(true)
187        .build()
188    {
189        Ok(re) => re.is_match(hay),
190        // An invalid pattern matches nothing rather than erroring the grid.
191        Err(_) => false,
192    }
193}
194
195/// Numeric projection used by number/date predicates. Non-numeric cells
196/// (`Text`, `Boolean`, `None`) have no numeric value and never satisfy a
197/// numeric predicate.
198fn cell_number(value: &CellValue) -> Option<f64> {
199    match value {
200        CellValue::Integer(i) => Some(*i as f64),
201        CellValue::Decimal(d) => Some(*d),
202        CellValue::Date(t) => Some(*t as f64),
203        CellValue::Text(_) | CellValue::Boolean(_) | CellValue::None => None,
204    }
205}
206
207fn number_matches(value: &CellValue, op: NumberOp, a: f64, b: f64) -> bool {
208    let Some(v) = cell_number(value) else {
209        return false;
210    };
211    // `total_cmp` keeps the comparison total (NaN-safe) and sidesteps the
212    // float-equality lint entirely.
213    let ord = v.total_cmp(&a);
214    match op {
215        NumberOp::Eq => ord == Ordering::Equal,
216        NumberOp::Ne => ord != Ordering::Equal,
217        NumberOp::Gt => ord == Ordering::Greater,
218        NumberOp::Ge => ord != Ordering::Less,
219        NumberOp::Lt => ord == Ordering::Less,
220        NumberOp::Le => ord != Ordering::Greater,
221        NumberOp::Between => in_range(v, a, b),
222        NumberOp::NotBetween => !in_range(v, a, b),
223    }
224}
225
226fn in_range(v: f64, a: f64, b: f64) -> bool {
227    let (lo, hi) = if a.total_cmp(&b) == Ordering::Greater {
228        (b, a)
229    } else {
230        (a, b)
231    };
232    v.total_cmp(&lo) != Ordering::Less && v.total_cmp(&hi) != Ordering::Greater
233}
234
235/// Parse a `YYYY-MM-DD` calendar date into Unix seconds (UTC midnight).
236/// Returns `None` for malformed input. Used to interpret date operands typed
237/// into the filter panel's range fields.
238#[must_use]
239pub fn parse_ymd_to_unix(s: &str) -> Option<i64> {
240    let t = s.trim();
241    let mut parts = t.split('-');
242    let y: i64 = parts.next()?.parse().ok()?;
243    let m: i64 = parts.next()?.parse().ok()?;
244    let d: i64 = parts.next()?.parse().ok()?;
245    if parts.next().is_some() || !(1..=12).contains(&m) || !(1..=31).contains(&d) {
246        return None;
247    }
248    Some(days_from_civil(y, m, d) * 86_400)
249}
250
251/// Howard Hinnant's `days_from_civil`: days since the Unix epoch for a proleptic
252/// Gregorian calendar date. Inverse of `format::days_to_ymd`.
253fn days_from_civil(y: i64, m: i64, d: i64) -> i64 {
254    let y = if m <= 2 { y - 1 } else { y };
255    let era = if y >= 0 { y } else { y - 399 } / 400;
256    let yoe = y - era * 400;
257    let mp = if m > 2 { m - 3 } else { m + 9 };
258    let doy = (153 * mp + 2) / 5 + d - 1;
259    let doe = yoe * 365 + yoe / 4 - yoe / 100 + doy;
260    era * 146_097 + doe - 719_468
261}
262
263#[cfg(test)]
264#[allow(clippy::unwrap_used, clippy::expect_used)]
265mod tests {
266    use super::*;
267    use crate::config::{BooleanFormat, DateFormat, NumberFormat, ReplacementTiming, StringFormat};
268
269    fn resolved(kind: ColumnKind) -> ResolvedColumnFormat {
270        ResolvedColumnFormat {
271            kind,
272            number: NumberFormat::default(),
273            date: DateFormat::default(),
274            boolean: BooleanFormat::default(),
275            string: StringFormat::default(),
276            replacements: vec![],
277            replacement_timing: ReplacementTiming::AfterFormat,
278        }
279    }
280
281    fn text_filter(op: TextOp, operand: &str) -> ColumnFilter {
282        ColumnFilter {
283            predicate: FilterPredicate::Text {
284                op,
285                operand: operand.to_owned(),
286            },
287            values: None,
288        }
289    }
290
291    fn number_filter(op: NumberOp, a: f64, b: f64) -> ColumnFilter {
292        ColumnFilter {
293            predicate: FilterPredicate::Number { op, a, b },
294            values: None,
295        }
296    }
297
298    #[test]
299    fn empty_filter_passes_everything() {
300        let f = ColumnFilter::default();
301        assert!(!f.is_active());
302        assert!(cell_passes_filter(
303            &CellValue::Text("anything".into()),
304            &resolved(ColumnKind::Text),
305            &f
306        ));
307    }
308
309    #[test]
310    fn text_ops_are_case_insensitive() {
311        let fmt = resolved(ColumnKind::Text);
312        let v = CellValue::Text("Hello World".into());
313        assert!(cell_passes_filter(
314            &v,
315            &fmt,
316            &text_filter(TextOp::Contains, "LO W")
317        ));
318        assert!(cell_passes_filter(
319            &v,
320            &fmt,
321            &text_filter(TextOp::BeginsWith, "hell")
322        ));
323        assert!(cell_passes_filter(
324            &v,
325            &fmt,
326            &text_filter(TextOp::EndsWith, "RLD")
327        ));
328        assert!(cell_passes_filter(
329            &v,
330            &fmt,
331            &text_filter(TextOp::Is, "hello world")
332        ));
333        assert!(!cell_passes_filter(
334            &v,
335            &fmt,
336            &text_filter(TextOp::IsNot, "hello world")
337        ));
338        assert!(cell_passes_filter(
339            &v,
340            &fmt,
341            &text_filter(TextOp::DoesNotContain, "zzz")
342        ));
343    }
344
345    #[test]
346    fn text_matches_regex_and_bad_regex_matches_nothing() {
347        let fmt = resolved(ColumnKind::Text);
348        let v = CellValue::Text("abc123".into());
349        assert!(cell_passes_filter(
350            &v,
351            &fmt,
352            &text_filter(TextOp::Matches, r"^abc\d+$")
353        ));
354        assert!(!cell_passes_filter(
355            &v,
356            &fmt,
357            &text_filter(TextOp::Matches, r"^\d+$")
358        ));
359        // Unbalanced group => invalid regex => matches nothing.
360        assert!(!cell_passes_filter(
361            &v,
362            &fmt,
363            &text_filter(TextOp::Matches, "(")
364        ));
365    }
366
367    #[test]
368    fn number_ops_cover_comparisons_and_ranges() {
369        let fmt = resolved(ColumnKind::Integer);
370        let v = CellValue::Integer(50);
371        assert!(cell_passes_filter(
372            &v,
373            &fmt,
374            &number_filter(NumberOp::Eq, 50.0, 0.0)
375        ));
376        assert!(cell_passes_filter(
377            &v,
378            &fmt,
379            &number_filter(NumberOp::Ne, 51.0, 0.0)
380        ));
381        assert!(cell_passes_filter(
382            &v,
383            &fmt,
384            &number_filter(NumberOp::Gt, 49.0, 0.0)
385        ));
386        assert!(cell_passes_filter(
387            &v,
388            &fmt,
389            &number_filter(NumberOp::Ge, 50.0, 0.0)
390        ));
391        assert!(cell_passes_filter(
392            &v,
393            &fmt,
394            &number_filter(NumberOp::Lt, 51.0, 0.0)
395        ));
396        assert!(cell_passes_filter(
397            &v,
398            &fmt,
399            &number_filter(NumberOp::Le, 50.0, 0.0)
400        ));
401        // Between is order-insensitive on its bounds.
402        assert!(cell_passes_filter(
403            &v,
404            &fmt,
405            &number_filter(NumberOp::Between, 100.0, 10.0)
406        ));
407        assert!(!cell_passes_filter(
408            &v,
409            &fmt,
410            &number_filter(NumberOp::NotBetween, 10.0, 100.0)
411        ));
412    }
413
414    #[test]
415    fn number_predicate_rejects_non_numeric_cells() {
416        let fmt = resolved(ColumnKind::Integer);
417        assert!(!cell_passes_filter(
418            &CellValue::None,
419            &fmt,
420            &number_filter(NumberOp::Ge, 0.0, 0.0)
421        ));
422    }
423
424    #[test]
425    fn value_set_allow_list_filters() {
426        let fmt = resolved(ColumnKind::Text);
427        let mut allowed = HashSet::new();
428        allowed.insert("keep".to_owned());
429        let f = ColumnFilter {
430            predicate: FilterPredicate::None,
431            values: Some(allowed),
432        };
433        assert!(f.is_active());
434        assert!(cell_passes_filter(
435            &CellValue::Text("keep".into()),
436            &fmt,
437            &f
438        ));
439        assert!(!cell_passes_filter(
440            &CellValue::Text("drop".into()),
441            &fmt,
442            &f
443        ));
444    }
445
446    #[test]
447    fn predicate_and_value_set_compose_with_and() {
448        let fmt = resolved(ColumnKind::Text);
449        let mut allowed = HashSet::new();
450        allowed.insert("alpha".to_owned());
451        allowed.insert("apex".to_owned());
452        let f = ColumnFilter {
453            predicate: FilterPredicate::Text {
454                op: TextOp::BeginsWith,
455                operand: "al".into(),
456            },
457            values: Some(allowed),
458        };
459        // In the allow-list AND matches the predicate.
460        assert!(cell_passes_filter(
461            &CellValue::Text("alpha".into()),
462            &fmt,
463            &f
464        ));
465        // In the allow-list but fails the predicate.
466        assert!(!cell_passes_filter(
467            &CellValue::Text("apex".into()),
468            &fmt,
469            &f
470        ));
471    }
472
473    #[test]
474    fn date_range_via_parsed_operands() {
475        let fmt = resolved(ColumnKind::Date);
476        // 2024-01-01 UTC == 1_704_067_200.
477        let jan1 = parse_ymd_to_unix("2024-01-01").expect("valid date");
478        assert_eq!(jan1, 1_704_067_200);
479        let feb1 = parse_ymd_to_unix("2024-02-01").expect("valid date");
480        let v = CellValue::Date(1_706_000_000); // late Jan 2024
481        let f = number_filter(NumberOp::Between, jan1 as f64, feb1 as f64);
482        assert!(cell_passes_filter(&v, &fmt, &f));
483    }
484
485    #[test]
486    fn parse_ymd_rejects_garbage() {
487        assert!(parse_ymd_to_unix("not-a-date").is_none());
488        assert!(parse_ymd_to_unix("2024-13-01").is_none());
489        assert!(parse_ymd_to_unix("2024-01-32").is_none());
490        assert!(parse_ymd_to_unix("2024-01-01-01").is_none());
491    }
492
493    #[test]
494    fn uses_number_ops_matches_numeric_kinds() {
495        assert!(uses_number_ops(ColumnKind::Integer));
496        assert!(uses_number_ops(ColumnKind::Decimal));
497        assert!(uses_number_ops(ColumnKind::Date));
498        assert!(!uses_number_ops(ColumnKind::Text));
499        assert!(!uses_number_ops(ColumnKind::Boolean));
500        assert!(!uses_number_ops(ColumnKind::None));
501    }
502}