use std::cmp::Ordering;
use std::collections::HashSet;
use crate::config::ResolvedColumnFormat;
use crate::data::{CellValue, ColumnKind};
use crate::format::format_cell;
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum TextOp {
Contains,
DoesNotContain,
BeginsWith,
EndsWith,
Is,
IsNot,
Matches,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum NumberOp {
Eq,
Ne,
Gt,
Ge,
Lt,
Le,
Between,
NotBetween,
}
#[derive(Clone, Debug, Default, PartialEq)]
pub enum FilterPredicate {
#[default]
None,
Text {
op: TextOp,
operand: String,
},
Number {
op: NumberOp,
a: f64,
b: f64,
},
}
#[derive(Clone, Debug, Default, PartialEq)]
pub struct ColumnFilter {
pub predicate: FilterPredicate,
pub values: Option<HashSet<String>>,
}
impl ColumnFilter {
#[must_use]
pub fn is_active(&self) -> bool {
!matches!(self.predicate, FilterPredicate::None) || self.values.is_some()
}
}
#[must_use]
pub fn uses_number_ops(kind: ColumnKind) -> bool {
matches!(
kind,
ColumnKind::Integer | ColumnKind::Decimal | ColumnKind::Date
)
}
#[must_use]
pub fn cell_passes_filter(
value: &CellValue,
fmt: &ResolvedColumnFormat,
filter: &ColumnFilter,
) -> bool {
if !predicate_matches(value, fmt, &filter.predicate) {
return false;
}
if let Some(allowed) = &filter.values {
let (formatted, _) = format_cell(value, fmt);
if !allowed.contains(&formatted) {
return false;
}
}
true
}
fn predicate_matches(
value: &CellValue,
fmt: &ResolvedColumnFormat,
predicate: &FilterPredicate,
) -> bool {
match predicate {
FilterPredicate::None => true,
FilterPredicate::Text { op, operand } => text_matches(value, fmt, *op, operand),
FilterPredicate::Number { op, a, b } => number_matches(value, *op, *a, *b),
}
}
fn text_matches(value: &CellValue, fmt: &ResolvedColumnFormat, op: TextOp, operand: &str) -> bool {
let (formatted, _) = format_cell(value, fmt);
if op == TextOp::Matches {
return regex_matches(&formatted, operand);
}
let hay = formatted.to_lowercase();
let needle = operand.to_lowercase();
match op {
TextOp::Contains => hay.contains(&needle),
TextOp::DoesNotContain => !hay.contains(&needle),
TextOp::BeginsWith => hay.starts_with(&needle),
TextOp::EndsWith => hay.ends_with(&needle),
TextOp::Is => hay == needle,
TextOp::IsNot => hay != needle,
TextOp::Matches => unreachable!("handled above"),
}
}
fn regex_matches(hay: &str, pattern: &str) -> bool {
if pattern.is_empty() {
return true;
}
match regex::RegexBuilder::new(pattern)
.case_insensitive(true)
.build()
{
Ok(re) => re.is_match(hay),
Err(_) => false,
}
}
fn cell_number(value: &CellValue) -> Option<f64> {
match value {
CellValue::Integer(i) => Some(*i as f64),
CellValue::Decimal(d) => Some(*d),
CellValue::Date(t) => Some(*t as f64),
CellValue::Text(_) | CellValue::Boolean(_) | CellValue::None => None,
}
}
fn number_matches(value: &CellValue, op: NumberOp, a: f64, b: f64) -> bool {
let Some(v) = cell_number(value) else {
return false;
};
let ord = v.total_cmp(&a);
match op {
NumberOp::Eq => ord == Ordering::Equal,
NumberOp::Ne => ord != Ordering::Equal,
NumberOp::Gt => ord == Ordering::Greater,
NumberOp::Ge => ord != Ordering::Less,
NumberOp::Lt => ord == Ordering::Less,
NumberOp::Le => ord != Ordering::Greater,
NumberOp::Between => in_range(v, a, b),
NumberOp::NotBetween => !in_range(v, a, b),
}
}
fn in_range(v: f64, a: f64, b: f64) -> bool {
let (lo, hi) = if a.total_cmp(&b) == Ordering::Greater {
(b, a)
} else {
(a, b)
};
v.total_cmp(&lo) != Ordering::Less && v.total_cmp(&hi) != Ordering::Greater
}
#[must_use]
pub fn parse_ymd_to_unix(s: &str) -> Option<i64> {
let t = s.trim();
let mut parts = t.split('-');
let y: i64 = parts.next()?.parse().ok()?;
let m: i64 = parts.next()?.parse().ok()?;
let d: i64 = parts.next()?.parse().ok()?;
if parts.next().is_some() || !(1..=12).contains(&m) || !(1..=31).contains(&d) {
return None;
}
Some(days_from_civil(y, m, d) * 86_400)
}
fn days_from_civil(y: i64, m: i64, d: i64) -> i64 {
let y = if m <= 2 { y - 1 } else { y };
let era = if y >= 0 { y } else { y - 399 } / 400;
let yoe = y - era * 400;
let mp = if m > 2 { m - 3 } else { m + 9 };
let doy = (153 * mp + 2) / 5 + d - 1;
let doe = yoe * 365 + yoe / 4 - yoe / 100 + doy;
era * 146_097 + doe - 719_468
}
#[cfg(test)]
#[allow(clippy::unwrap_used, clippy::expect_used)]
mod tests {
use super::*;
use crate::config::{BooleanFormat, DateFormat, NumberFormat, ReplacementTiming, StringFormat};
fn resolved(kind: ColumnKind) -> ResolvedColumnFormat {
ResolvedColumnFormat {
kind,
number: NumberFormat::default(),
date: DateFormat::default(),
boolean: BooleanFormat::default(),
string: StringFormat::default(),
replacements: vec![],
replacement_timing: ReplacementTiming::AfterFormat,
}
}
fn text_filter(op: TextOp, operand: &str) -> ColumnFilter {
ColumnFilter {
predicate: FilterPredicate::Text {
op,
operand: operand.to_owned(),
},
values: None,
}
}
fn number_filter(op: NumberOp, a: f64, b: f64) -> ColumnFilter {
ColumnFilter {
predicate: FilterPredicate::Number { op, a, b },
values: None,
}
}
#[test]
fn empty_filter_passes_everything() {
let f = ColumnFilter::default();
assert!(!f.is_active());
assert!(cell_passes_filter(
&CellValue::Text("anything".into()),
&resolved(ColumnKind::Text),
&f
));
}
#[test]
fn text_ops_are_case_insensitive() {
let fmt = resolved(ColumnKind::Text);
let v = CellValue::Text("Hello World".into());
assert!(cell_passes_filter(
&v,
&fmt,
&text_filter(TextOp::Contains, "LO W")
));
assert!(cell_passes_filter(
&v,
&fmt,
&text_filter(TextOp::BeginsWith, "hell")
));
assert!(cell_passes_filter(
&v,
&fmt,
&text_filter(TextOp::EndsWith, "RLD")
));
assert!(cell_passes_filter(
&v,
&fmt,
&text_filter(TextOp::Is, "hello world")
));
assert!(!cell_passes_filter(
&v,
&fmt,
&text_filter(TextOp::IsNot, "hello world")
));
assert!(cell_passes_filter(
&v,
&fmt,
&text_filter(TextOp::DoesNotContain, "zzz")
));
}
#[test]
fn text_matches_regex_and_bad_regex_matches_nothing() {
let fmt = resolved(ColumnKind::Text);
let v = CellValue::Text("abc123".into());
assert!(cell_passes_filter(
&v,
&fmt,
&text_filter(TextOp::Matches, r"^abc\d+$")
));
assert!(!cell_passes_filter(
&v,
&fmt,
&text_filter(TextOp::Matches, r"^\d+$")
));
assert!(!cell_passes_filter(
&v,
&fmt,
&text_filter(TextOp::Matches, "(")
));
}
#[test]
fn number_ops_cover_comparisons_and_ranges() {
let fmt = resolved(ColumnKind::Integer);
let v = CellValue::Integer(50);
assert!(cell_passes_filter(
&v,
&fmt,
&number_filter(NumberOp::Eq, 50.0, 0.0)
));
assert!(cell_passes_filter(
&v,
&fmt,
&number_filter(NumberOp::Ne, 51.0, 0.0)
));
assert!(cell_passes_filter(
&v,
&fmt,
&number_filter(NumberOp::Gt, 49.0, 0.0)
));
assert!(cell_passes_filter(
&v,
&fmt,
&number_filter(NumberOp::Ge, 50.0, 0.0)
));
assert!(cell_passes_filter(
&v,
&fmt,
&number_filter(NumberOp::Lt, 51.0, 0.0)
));
assert!(cell_passes_filter(
&v,
&fmt,
&number_filter(NumberOp::Le, 50.0, 0.0)
));
assert!(cell_passes_filter(
&v,
&fmt,
&number_filter(NumberOp::Between, 100.0, 10.0)
));
assert!(!cell_passes_filter(
&v,
&fmt,
&number_filter(NumberOp::NotBetween, 10.0, 100.0)
));
}
#[test]
fn number_predicate_rejects_non_numeric_cells() {
let fmt = resolved(ColumnKind::Integer);
assert!(!cell_passes_filter(
&CellValue::None,
&fmt,
&number_filter(NumberOp::Ge, 0.0, 0.0)
));
}
#[test]
fn value_set_allow_list_filters() {
let fmt = resolved(ColumnKind::Text);
let mut allowed = HashSet::new();
allowed.insert("keep".to_owned());
let f = ColumnFilter {
predicate: FilterPredicate::None,
values: Some(allowed),
};
assert!(f.is_active());
assert!(cell_passes_filter(
&CellValue::Text("keep".into()),
&fmt,
&f
));
assert!(!cell_passes_filter(
&CellValue::Text("drop".into()),
&fmt,
&f
));
}
#[test]
fn predicate_and_value_set_compose_with_and() {
let fmt = resolved(ColumnKind::Text);
let mut allowed = HashSet::new();
allowed.insert("alpha".to_owned());
allowed.insert("apex".to_owned());
let f = ColumnFilter {
predicate: FilterPredicate::Text {
op: TextOp::BeginsWith,
operand: "al".into(),
},
values: Some(allowed),
};
assert!(cell_passes_filter(
&CellValue::Text("alpha".into()),
&fmt,
&f
));
assert!(!cell_passes_filter(
&CellValue::Text("apex".into()),
&fmt,
&f
));
}
#[test]
fn date_range_via_parsed_operands() {
let fmt = resolved(ColumnKind::Date);
let jan1 = parse_ymd_to_unix("2024-01-01").expect("valid date");
assert_eq!(jan1, 1_704_067_200);
let feb1 = parse_ymd_to_unix("2024-02-01").expect("valid date");
let v = CellValue::Date(1_706_000_000); let f = number_filter(NumberOp::Between, jan1 as f64, feb1 as f64);
assert!(cell_passes_filter(&v, &fmt, &f));
}
#[test]
fn parse_ymd_rejects_garbage() {
assert!(parse_ymd_to_unix("not-a-date").is_none());
assert!(parse_ymd_to_unix("2024-13-01").is_none());
assert!(parse_ymd_to_unix("2024-01-32").is_none());
assert!(parse_ymd_to_unix("2024-01-01-01").is_none());
}
#[test]
fn uses_number_ops_matches_numeric_kinds() {
assert!(uses_number_ops(ColumnKind::Integer));
assert!(uses_number_ops(ColumnKind::Decimal));
assert!(uses_number_ops(ColumnKind::Date));
assert!(!uses_number_ops(ColumnKind::Text));
assert!(!uses_number_ops(ColumnKind::Boolean));
assert!(!uses_number_ops(ColumnKind::None));
}
}