use std::str::FromStr;
use data_value::DataValue;
use pest::{iterators::Pair, Parser};
use regex::Regex;
use tracing::trace;
pub mod error;
pub mod filtering;
pub use filtering::*;
type Result<T> = std::result::Result<T, error::Error>;
#[derive(pest_derive::Parser)]
#[grammar = "filter/grammar/data.pest"]
struct DataParser;
pub trait Filtering {
fn prepare_indicies(&self, expression: &Expression) -> Result<Vec<usize>>;
fn apply_function(&self, expression: &Expression) -> Result<Vec<usize>>;
}
#[derive(Debug, Clone, PartialEq, Copy)]
pub enum FilterOperator {
Equal,
NotEqual,
Less,
Greater,
LeOrEq,
GrOrEq,
Regex,
In,
NotIn,
}
#[derive(Debug, Clone, PartialEq, Copy)]
pub enum FilterJoin {
And,
Or,
}
#[derive(Debug, Clone, PartialEq)]
pub struct Expression {
pub left: DataInput,
pub operator: FilterOperator,
pub right: DataInput,
}
#[derive(Debug)]
pub enum FilterArgument {
Value(DataValue),
Regex(regex::Regex),
Vec(Vec<DataValue>),
}
impl FilterArgument {
pub fn value(&self) -> &DataValue {
match self {
FilterArgument::Value(value) => value,
FilterArgument::Regex(_) => &DataValue::Null, FilterArgument::Vec(_vec) => &DataValue::Null,
}
}
pub fn vec(&self) -> Option<&Vec<DataValue>> {
match self {
FilterArgument::Value(value) => {
if let DataValue::Vec(vec) = value {
Some(vec)
} else {
None
}
}
FilterArgument::Regex(_) => None, FilterArgument::Vec(vec) => Some(vec),
}
}
pub fn regex(&self) -> Option<&Regex> {
match self {
FilterArgument::Value(_value) => None,
FilterArgument::Regex(regex) => Some(regex),
FilterArgument::Vec(_) => None, }
}
}
impl Expression {
pub fn filter_argument(&self) -> Result<FilterArgument> {
match self.operator {
FilterOperator::Equal
| FilterOperator::NotEqual
| FilterOperator::Less
| FilterOperator::Greater
| FilterOperator::LeOrEq
| FilterOperator::GrOrEq => Ok(FilterArgument::Value(self.right.value())),
FilterOperator::Regex => {
if let DataValue::String(ref regex) = self.right.value() {
Ok(FilterArgument::Regex(regex::Regex::new(regex)?))
} else {
Err(error::parser_error(
"Expected a regex string for Regex operator",
))
}
}
FilterOperator::In | FilterOperator::NotIn => {
if let DataValue::Vec(ref vec) = self.right.value() {
Ok(FilterArgument::Vec(vec.clone()))
} else {
Err(error::parser_error(
"Expected a vector for In/NotIn operator",
))
}
}
}
}
}
#[derive(Debug, Clone, PartialEq)]
pub enum FilterCombinantion {
Simple(Expression),
And(Expression, Box<FilterCombinantion>),
Or(Expression, Box<FilterCombinantion>),
Grouped(Vec<FilterCombinantion>),
}
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum Function {
Len,
ToDateTimeUs,
}
#[derive(Debug, Clone, PartialEq)]
pub enum DataInput {
Value(DataValue),
Key(String),
Function(String, Function),
Mod(String, DataValue),
}
impl DataInput {
pub fn as_key(&self) -> Option<&str> {
match self {
DataInput::Key(key) => Some(key),
DataInput::Value(_) => None,
DataInput::Function(key, _) => Some(key), DataInput::Mod(key, _) => Some(key),
}
}
pub fn value(&self) -> DataValue {
match self {
DataInput::Value(value) => value.clone(),
DataInput::Key(key) => DataValue::String(key.into()),
DataInput::Function(_, _) => DataValue::Null, DataInput::Mod(..) => DataValue::Null,
}
}
pub fn is_function(&self) -> bool {
matches!(self, DataInput::Function(_, _))
}
pub fn is_mod(&self) -> bool {
matches!(self, DataInput::Mod(_, _))
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct FilterRules {
pub rules: Vec<FilterCombinantion>,
}
impl TryFrom<&str> for FilterRules {
type Error = error::Error;
fn try_from(value: &str) -> std::result::Result<Self, Self::Error> {
DataParser::parse(Rule::full_expression, value)
.map_err(|e| error::parser_error(format!("Failed to parse DataInput: {e}")))?
.next()
.ok_or(error::parser_error(
"Expected a Rule::atom but found nothing",
))
.and_then(parse_full_expression)
}
}
fn parse_left(rule: Pair<Rule>) -> Result<DataInput> {
trace!("Parsing left expression: {rule:?}");
let mut inner = rule.into_inner();
trace!("Parsing left inner: {inner:?}");
let key = inner
.next()
.ok_or(error::parser_error("Expected a key in left expression"))?
.as_str()
.to_string();
if let Some(function) = inner.next() {
let function_name = function.as_str();
if function_name.contains("%") {
let mut inn = function.into_inner();
let atom = inn
.next()
.ok_or(error::parser_error("Expected a key in left expression"))?;
trace!("Atom {atom:?}");
return Ok(DataInput::Mod(key, parse_atom(atom)?.value()));
}
let function = match function_name {
".len()" => Function::Len,
".to_datetime_us()" => Function::ToDateTimeUs,
_ => return Err(error::parser_error("Unknown function: {function_name}")),
};
return Ok(DataInput::Function(key, function));
}
Ok(DataInput::Key(key)) }
fn parse_expression(pair: Pair<Rule>) -> Result<Expression> {
trace!("Parsing expression: {pair:?}");
match pair.as_rule() {
Rule::expression => {
let mut pairs = pair.into_inner();
trace!("Parsing expression pairs: {pairs:?}");
let left = parse_left(
pairs
.next()
.ok_or(error::parser_error("Expected a left expression"))?,
)?;
trace!("Parsing expression left: {left:?}");
let operator = pairs
.next()
.and_then(|s| s.as_str().parse::<FilterOperator>().ok())
.ok_or(error::parser_error("Expected a valid filter operator"))?;
trace!("Parsing expression operator: {operator:?}");
let right = parse_atom(
pairs
.next()
.ok_or(error::parser_error("Expected a right expression"))?,
)?;
trace!("Parsing expression right: {right:?}");
Ok(Expression {
left,
operator,
right,
})
}
e => Err(error::parser_error(format!(
"Unexpected rule in expression {e:?}"
))),
}
}
fn parse_operator(pair: Pair<Rule>) -> Result<FilterJoin> {
match pair.as_str() {
"&&" => Ok(FilterJoin::And),
"||" => Ok(FilterJoin::Or),
_ => Err(error::parser_error(format!(
"Unknown operator: {}",
pair.as_str()
))),
}
}
fn parse_filter_combination(pair: Pair<Rule>) -> Result<FilterCombinantion> {
if pair.as_rule() == Rule::expression {
return Ok(FilterCombinantion::Simple(parse_expression(pair)?));
}
let mut pairs = pair.into_inner();
trace!("Parsing filter combo expression pairs: {pairs:?}");
let first = parse_expression(pairs.next().ok_or(error::parser_error(
"Expected at least one expression in the pair",
))?)?;
if let Some(op) = pairs.next() {
trace!("Parsing filter combo expression: {op:?} vs pairs {pairs:?}");
let op = parse_operator(op)?;
match op {
FilterJoin::And => {
return Ok(FilterCombinantion::And(
first,
Box::new(parse_filter_combination(pairs.next().ok_or(
error::parser_error("Expected a next expression after '&&'"),
)?)?),
));
}
FilterJoin::Or => {
return Ok(FilterCombinantion::Or(
first,
Box::new(parse_filter_combination(pairs.next().ok_or(
error::parser_error("Expected a next expression after '||'"),
)?)?),
));
}
}
}
Ok(FilterCombinantion::Simple(first))
}
fn parse_full_expression(pair: Pair<Rule>) -> Result<FilterRules> {
let mut rules = Vec::new();
trace!("Parsing full expression: {pair:?}");
match pair.as_rule() {
Rule::full_expression => {
let mut pairs = pair.into_inner();
trace!("Parsing full expression pairs: {pairs:?}");
let left = parse_expression(pairs.next().ok_or(error::parser_error(
"Expected at least one expression in the pair",
))?)?;
if let Some(op) = pairs.next() {
trace!("Parsing operator: {op:?}");
let op = parse_operator(op)?;
let right = pairs.next().ok_or(error::parser_error(
"Expected a next expression after operator",
))?;
let ops = |op: FilterJoin,
right: FilterCombinantion,
rules: &mut Vec<FilterCombinantion>|
-> Result<()> {
match op {
FilterJoin::And => {
rules.push(FilterCombinantion::And(left, Box::new(right)));
}
FilterJoin::Or => {
rules.push(FilterCombinantion::Or(left, Box::new(right)));
}
}
Ok(())
};
match right.as_rule() {
Rule::expression => {
let right_expr = parse_expression(right)?;
ops(op, FilterCombinantion::Simple(right_expr), &mut rules)?;
}
Rule::grouped_expression => {
let grouped_expr = parse_filter_combination(right)?;
ops(op, grouped_expr, &mut rules)?;
}
_ => return Err(error::parser_error("Expected an expression after operator")),
}
} else {
rules.push(FilterCombinantion::Simple(left));
}
}
_ => return Err(error::parser_error("Expected a full expression rule")),
}
Ok(FilterRules { rules })
}
impl TryFrom<&str> for DataInput {
type Error = error::Error;
fn try_from(value: &str) -> std::result::Result<Self, Self::Error> {
DataParser::parse(Rule::atom, value)
.map_err(|e| error::parser_error(format!("Failed to parse DataInput: {e}")))?
.next()
.ok_or(error::parser_error(
"Expected a Rule::atom but found nothing",
))
.and_then(parse_atom)
}
}
fn number_to_value<T: FromStr>(number: &str, post_fix: &str) -> Result<T> {
num_to_value(number.split(post_fix).next().ok_or_else(|| {
error::parser_error("Expected a number with postfix '{post_fix}' but found: {number}")
})?)
}
fn num_to_value<T: FromStr>(number: &str) -> Result<T> {
match number.parse::<T>() {
Ok(value) => Ok(value),
Err(_e) => Err(error::parser_error(format!(
"Failed to parse number {number}"
))),
}
}
fn parse_atom(rule: Pair<Rule>) -> Result<DataInput> {
match rule.as_rule() {
Rule::atom => {
let inner = rule.into_inner().next().ok_or(error::parser_error(
"Expected a Rule::atom but found nothing",
))?;
parse_atom(inner)
}
Rule::u32 => number_to_value::<u32>(rule.as_str(), "u32")
.map(|value| DataInput::Value(DataValue::from(value))),
Rule::i32 => number_to_value::<i32>(rule.as_str(), "i32")
.map(|value| DataInput::Value(DataValue::from(value))),
Rule::u64 => number_to_value::<u64>(rule.as_str(), "u64")
.map(|value| DataInput::Value(DataValue::from(value))),
Rule::i64 => {
let str_rule = rule.as_str();
if str_rule.contains("i64") {
number_to_value::<i64>(str_rule, "i64")
.map(|value| DataInput::Value(DataValue::from(value)))
} else {
num_to_value::<i64>(str_rule).map(|val| DataInput::Value(DataValue::from(val)))
}
}
Rule::f32 => number_to_value::<f32>(rule.as_str(), "f32")
.map(|value| DataInput::Value(DataValue::from(value))),
Rule::f64 => number_to_value::<f64>(rule.as_str(), "f64")
.map(|value| DataInput::Value(DataValue::from(value))),
Rule::float => number_to_value::<f64>(rule.as_str(), "f64")
.map(|value| DataInput::Value(DataValue::from(value))),
Rule::string_qt => {
let value = rule.as_str().trim_matches('\'');
Ok(DataInput::Value(DataValue::String(value.into())))
}
Rule::boolean => {
let value = rule.as_str();
match value {
"true" => Ok(DataInput::Value(DataValue::Bool(true))),
"false" => Ok(DataInput::Value(DataValue::Bool(false))),
_ => Err(error::parser_error(
"Expected boolean value but found: {value}",
)),
}
}
Rule::null => Ok(DataInput::Value(DataValue::Null)),
Rule::key => Ok(DataInput::Key(rule.as_str().to_string())),
Rule::array => {
let mut values = Vec::new();
for pair in rule.into_inner() {
match parse_atom(pair)? {
DataInput::Value(value) => values.push(value),
DataInput::Key(key) => {
values.push(DataValue::String(key.into()));
}
DataInput::Function(_, _) => {
return Err(error::parser_error("Function in array is not supported"));
}
DataInput::Mod(_, _) => {
return Err(error::parser_error("Function in array is not supported"));
}
}
}
Ok(DataInput::Value(DataValue::Vec(values)))
}
Rule::left => parse_left(rule),
_ => Err(error::parser_error("{rule} did not match any 'Rule' ")),
}
}
impl std::str::FromStr for FilterOperator {
type Err = error::Error;
fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
match s {
"==" => Ok(FilterOperator::Equal),
"!=" => Ok(FilterOperator::NotEqual),
"<" => Ok(FilterOperator::Less),
">" => Ok(FilterOperator::Greater),
"<=" => Ok(FilterOperator::LeOrEq),
">=" => Ok(FilterOperator::GrOrEq),
"~=" => Ok(FilterOperator::Regex),
"in" => Ok(FilterOperator::In),
"notIn" => Ok(FilterOperator::NotIn),
_ => Err(error::parser_error(format!("Unknown filter operator: {s}"))),
}
}
}
#[cfg(test)]
mod test {
use super::*;
use rstest::*;
#[rstest]
#[case("abc", DataInput::Key("abc".to_string()))]
#[case("'abc'", DataInput::Value(DataValue::from("abc")))]
#[case("1u32", DataInput::Value(DataValue::from(1u32)))]
#[case("1i32", DataInput::Value(DataValue::from(1i32)))]
#[case("1u64", DataInput::Value(DataValue::from(1u64)))]
#[case("1i64", DataInput::Value(DataValue::from(1i64)))]
#[case("1f64", DataInput::Value(DataValue::from(1f64)))]
#[case("null", DataInput::Value(DataValue::Null))]
#[case("true", DataInput::Value(DataValue::from(true)))]
#[case("false", DataInput::Value(DataValue::from(false)))]
#[case("1.0", DataInput::Value(DataValue::from(1f64)))]
#[case("[1u32, 1f64, 'abc', notakey]", DataInput::Value(DataValue::Vec(vec![
DataValue::from(1u32),
DataValue::from(1f64),
DataValue::from("abc"),
DataValue::from("notakey"),
])))]
#[case("1.0f32", DataInput::Value(DataValue::from(1f32)))]
#[case("1", DataInput::Value(DataValue::from(1i64)))]
fn test_parser(#[case] input: &str, #[case] expected: DataInput) {
let result = DataInput::try_from(input);
assert!(result.is_ok(), "Failed to parse '{input}' {result:?}");
assert_eq!(result.unwrap(), expected);
}
#[rstest]
#[case("abc > 1u32", FilterRules{ rules: vec![FilterCombinantion::Simple(Expression {
left: DataInput::Key("abc".to_string()),
operator: FilterOperator::Greater,
right: DataInput::Value(DataValue::from(1u32)),
})] })]
#[case("abc > 1u32 && c == 'a'", FilterRules{ rules: vec![FilterCombinantion::And(Expression {
left: DataInput::Key("abc".to_string()),
operator: FilterOperator::Greater,
right: DataInput::Value(DataValue::from(1u32)),
}, Box::new(
FilterCombinantion::Simple(Expression {
left: DataInput::Key("c".to_string()),
operator: FilterOperator::Equal,
right: DataInput::Value(DataValue::from("a")),
}),
))] })]
#[case("abc > 1u32 || c <= 12.0f64", FilterRules{ rules: vec![FilterCombinantion::Or(Expression {
left: DataInput::Key("abc".to_string()),
operator: FilterOperator::Greater,
right: DataInput::Value(DataValue::from(1u32)),
}, Box::new(
FilterCombinantion::Simple(Expression {
left: DataInput::Key("c".to_string()),
operator: FilterOperator::LeOrEq,
right: DataInput::Value(DataValue::from(12f64)),
}),
))] })]
#[case("abc in [1i32] && (g >= 1u64 || c ~= '.*')", FilterRules{ rules: vec![FilterCombinantion::And(Expression {
left: DataInput::Key("abc".to_string()),
operator: FilterOperator::In,
right: DataInput::Value(DataValue::Vec(vec![1i32.into()])),
}, Box::new(
FilterCombinantion::Or(Expression {
left: DataInput::Key("g".to_string()),
operator: FilterOperator::GrOrEq,
right: DataInput::Value(DataValue::from(1u64)),
}, Box::new(
FilterCombinantion::Simple(Expression {
left: DataInput::Key("c".to_string()),
operator: FilterOperator::Regex,
right: DataInput::Value(DataValue::from(".*")),
}),
)),
))] })]
fn test_parser_filter(#[case] input: &str, #[case] expected: FilterRules) {
let result = FilterRules::try_from(input);
assert!(result.is_ok(), "Failed to parse '{input}' {result:?}");
assert_eq!(result.unwrap(), expected);
}
#[rstest]
#[case("abc.len() > 1u32", FilterRules{ rules: vec![FilterCombinantion::Simple(Expression {
left: DataInput::Function("abc".to_string(), Function::Len),
operator: FilterOperator::Greater,
right: DataInput::Value(DataValue::from(1u32)),
})] })]
#[case("abc.to_datetime_us() > '2025-07-01 00:00:00' && c == 'a'", FilterRules{ rules: vec![FilterCombinantion::And(Expression {
left: DataInput::Function("abc".to_string(), Function::ToDateTimeUs),
operator: FilterOperator::Greater,
right: DataInput::Value(DataValue::from("2025-07-01 00:00:00")),
}, Box::new(
FilterCombinantion::Simple(Expression {
left: DataInput::Key("c".to_string()),
operator: FilterOperator::Equal,
right: DataInput::Value(DataValue::from("a")),
}),
))] })]
#[case("abc % 1u32 == 1u32", FilterRules{ rules: vec![FilterCombinantion::Simple(Expression {
left: DataInput::Mod("abc".to_string(), DataValue::U32(1)),
operator: FilterOperator::Equal,
right: DataInput::Value(DataValue::from(1u32)),
})] })]
fn test_functions(#[case] input: &str, #[case] expected: FilterRules) {
let result = FilterRules::try_from(input);
assert!(result.is_ok(), "Failed to parse '{input}' {result:?}");
assert_eq!(result.unwrap(), expected);
}
}