use super::date::parse_date_to_days;
use crate::error::{Error, Result};
use crate::expr::{ComparisonOp, Datum, Predicate};
pub fn parse_filter(input: &str) -> Result<Predicate> {
let input = input.trim();
if input.is_empty() {
return Ok(Predicate::AlwaysTrue);
}
if let Some(pred) = try_parse_or(input)? {
return Ok(pred);
}
Err(Error::invalid_input(format!(
"Failed to parse filter expression: {}",
input
)))
}
fn try_parse_or(input: &str) -> Result<Option<Predicate>> {
let parts = split_by_keyword(input, " OR ");
if parts.len() > 1 {
let mut preds = Vec::new();
for part in parts {
let part_str: &str = part;
if let Some(pred) = try_parse_and(part_str.trim())? {
preds.push(pred);
} else {
return Ok(None);
}
}
return Ok(Some(Predicate::or(preds)));
}
try_parse_and(input)
}
fn try_parse_and(input: &str) -> Result<Option<Predicate>> {
let parts = split_by_keyword(input, " AND ");
if parts.len() > 1 {
let mut preds = Vec::new();
for part in parts {
let part_str: &str = part;
if let Some(pred) = try_parse_comparison(part_str.trim())? {
preds.push(pred);
} else {
return Ok(None);
}
}
return Ok(Some(Predicate::and(preds)));
}
try_parse_comparison(input)
}
fn try_parse_comparison(input: &str) -> Result<Option<Predicate>> {
let input = input.trim();
if let Some(col) = input
.strip_suffix(" IS NOT NULL")
.or_else(|| input.strip_suffix(" is not null"))
{
return Ok(Some(Predicate::is_not_null(col.trim())));
}
if let Some(col) = input
.strip_suffix(" IS NULL")
.or_else(|| input.strip_suffix(" is null"))
{
return Ok(Some(Predicate::is_null(col.trim())));
}
if let Some((col, values)) = try_parse_in(input)? {
return Ok(Some(Predicate::is_in(col, values)));
}
for (op_str, op) in [
("!=", ComparisonOp::NotEq),
("<>", ComparisonOp::NotEq),
(">=", ComparisonOp::GtEq),
("<=", ComparisonOp::LtEq),
("=", ComparisonOp::Eq),
(">", ComparisonOp::Gt),
("<", ComparisonOp::Lt),
] {
if let Some(idx) = input.find(op_str) {
let col = input[..idx].trim();
let val_str = input[idx + op_str.len()..].trim();
if col.is_empty() || val_str.is_empty() {
continue;
}
let datum = parse_value(val_str)?;
return Ok(Some(Predicate::Comparison {
column: col.into(),
op,
value: datum,
}));
}
}
Ok(None)
}
fn try_parse_in(input: &str) -> Result<Option<(String, Vec<Datum>)>> {
let upper = input.to_uppercase();
let Some(in_pos) = upper.find(" IN (") else {
return Ok(None);
};
let col = input[..in_pos].trim();
let rest = input[in_pos + 4..].trim();
if !rest.starts_with('(') || !rest.ends_with(')') {
return Ok(None);
}
let values_str = &rest[1..rest.len() - 1];
let values: Result<Vec<Datum>> = values_str
.split(',')
.map(|s| parse_value(s.trim()))
.collect();
Ok(Some((col.to_string(), values?)))
}
fn parse_value(s: &str) -> Result<Datum> {
let s = s.trim();
if (s.starts_with('\'') && s.ends_with('\'')) || (s.starts_with('"') && s.ends_with('"')) {
let inner = &s[1..s.len() - 1];
if inner.len() == 10
&& inner.chars().nth(4) == Some('-')
&& inner.chars().nth(7) == Some('-')
{
if let Some(days) = parse_date_to_days(inner) {
return Ok(Datum::Date(days));
}
}
return Ok(Datum::String(inner.to_string()));
}
if let Ok(n) = s.parse::<i64>() {
if n >= i32::MIN as i64 && n <= i32::MAX as i64 {
return Ok(Datum::Int(n as i32));
}
return Ok(Datum::Long(n));
}
if let Ok(n) = s.parse::<f64>() {
return Ok(Datum::Double(n));
}
Err(Error::invalid_input(format!(
"Invalid value in filter expression: {}",
s
)))
}
fn split_by_keyword<'a>(input: &'a str, keyword: &str) -> Vec<&'a str> {
let upper = input.to_uppercase();
let keyword_upper = keyword.to_uppercase();
let mut result = Vec::new();
let mut start = 0;
let mut in_quote = false;
let mut quote_char = ' ';
let mut i = 0;
let chars: Vec<char> = input.chars().collect();
while i < chars.len() {
let c = chars[i];
if !in_quote && (c == '\'' || c == '"') {
in_quote = true;
quote_char = c;
} else if in_quote && c == quote_char {
in_quote = false;
} else if !in_quote {
let remaining = &upper[i..];
if remaining.starts_with(&keyword_upper) {
result.push(&input[start..i]);
start = i + keyword.len();
i += keyword.len();
continue;
}
}
i += 1;
}
result.push(&input[start..]);
result
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_simple_eq() {
let pred = parse_filter("status = 'active'").unwrap();
assert!(matches!(
pred,
Predicate::Comparison {
op: ComparisonOp::Eq,
..
}
));
}
#[test]
fn test_parse_gt() {
let pred = parse_filter("age > 18").unwrap();
assert!(matches!(
pred,
Predicate::Comparison {
op: ComparisonOp::Gt,
..
}
));
}
#[test]
fn test_parse_and() {
let pred = parse_filter("status = 'active' AND age > 18").unwrap();
assert!(matches!(pred, Predicate::And(_)));
}
#[test]
fn test_parse_or() {
let pred = parse_filter("region = 'us' OR region = 'eu'").unwrap();
assert!(matches!(pred, Predicate::Or(_)));
}
#[test]
fn test_parse_is_null() {
let pred = parse_filter("email IS NULL").unwrap();
assert!(matches!(pred, Predicate::IsNull(_)));
}
#[test]
fn test_parse_is_not_null() {
let pred = parse_filter("email IS NOT NULL").unwrap();
assert!(matches!(pred, Predicate::IsNotNull(_)));
}
#[test]
fn test_parse_date() {
let pred = parse_filter("date >= '2024-01-01'").unwrap();
if let Predicate::Comparison { value, .. } = pred {
assert!(matches!(value, Datum::Date(_)));
} else {
panic!("Expected comparison predicate");
}
}
#[test]
fn test_parse_in() {
let pred = parse_filter("region IN ('us', 'eu', 'asia')").unwrap();
assert!(matches!(pred, Predicate::In { .. }));
}
#[test]
fn test_parse_complex() {
let pred =
parse_filter("date >= '2024-01-01' AND status = 'active' AND region IN ('us', 'eu')")
.unwrap();
assert!(matches!(pred, Predicate::And(_)));
}
}