trs_dataframe/filter/
mod.rs

1use std::str::FromStr;
2
3use data_value::DataValue;
4use pest::{iterators::Pair, Parser};
5use regex::Regex;
6use tracing::trace;
7pub mod error;
8
9type Result<T> = std::result::Result<T, error::Error>;
10
11#[derive(pest_derive::Parser)]
12#[grammar = "filter/grammar/data.pest"]
13struct DataParser;
14
15/// The operators for filtering functions
16#[derive(Debug, Clone, PartialEq, Copy)]
17pub enum FilterOperator {
18    Equal,
19    NotEqual,
20    Less,
21    Greater,
22    LeOrEq,
23    GrOrEq,
24    Regex,
25    In,
26    NotIn,
27}
28
29/// The operators for filtering functions
30#[derive(Debug, Clone, PartialEq, Copy)]
31pub enum FilterJoin {
32    And,
33    Or,
34}
35#[derive(Debug, Clone, PartialEq)]
36pub struct Expression {
37    pub left: DataInput,
38    pub operator: FilterOperator,
39    pub right: DataInput,
40}
41
42#[derive(Debug)]
43pub enum FilterArgument {
44    Value(DataValue),
45    Regex(regex::Regex),
46    Vec(Vec<DataValue>),
47}
48
49impl FilterArgument {
50    pub fn value(&self) -> &DataValue {
51        match self {
52            FilterArgument::Value(value) => value,
53            FilterArgument::Regex(_) => &DataValue::Null, // Regex does not have a value
54            FilterArgument::Vec(_vec) => &DataValue::Null,
55        }
56    }
57
58    pub fn vec(&self) -> Option<&Vec<DataValue>> {
59        match self {
60            FilterArgument::Value(value) => {
61                if let DataValue::Vec(vec) = value {
62                    Some(vec)
63                } else {
64                    None
65                }
66            }
67            FilterArgument::Regex(_) => None, // Regex does not have a value
68            FilterArgument::Vec(vec) => Some(vec),
69        }
70    }
71
72    pub fn regex(&self) -> Option<&Regex> {
73        match self {
74            FilterArgument::Value(_value) => None,
75            FilterArgument::Regex(regex) => Some(regex),
76            FilterArgument::Vec(_) => None, // Vec does not have a regex
77        }
78    }
79}
80
81impl Expression {
82    pub fn filter_argument(&self) -> Result<FilterArgument> {
83        match self.operator {
84            FilterOperator::Equal
85            | FilterOperator::NotEqual
86            | FilterOperator::Less
87            | FilterOperator::Greater
88            | FilterOperator::LeOrEq
89            | FilterOperator::GrOrEq => Ok(FilterArgument::Value(self.right.value())),
90            FilterOperator::Regex => {
91                if let DataValue::String(ref regex) = self.right.value() {
92                    Ok(FilterArgument::Regex(regex::Regex::new(regex)?))
93                } else {
94                    Err(error::parser_error(
95                        "Expected a regex string for Regex operator",
96                    ))
97                }
98            }
99            FilterOperator::In | FilterOperator::NotIn => {
100                if let DataValue::Vec(ref vec) = self.right.value() {
101                    Ok(FilterArgument::Vec(vec.clone()))
102                } else {
103                    Err(error::parser_error(
104                        "Expected a vector for In/NotIn operator",
105                    ))
106                }
107            }
108        }
109    }
110}
111
112#[derive(Debug, Clone, PartialEq)]
113pub enum FilterCombinantion {
114    Simple(Expression),
115    /// and with &&
116    And(Expression, Box<FilterCombinantion>),
117    /// or with ||
118    Or(Expression, Box<FilterCombinantion>),
119    Grouped(Vec<FilterCombinantion>),
120}
121
122#[derive(Debug, Clone, Copy, PartialEq)]
123pub enum Function {
124    Len,
125    ToDateTimeUs,
126}
127
128#[derive(Debug, Clone, PartialEq)]
129pub enum DataInput {
130    Value(DataValue),
131    Key(String),
132    Function(String, Function),
133}
134
135impl DataInput {
136    pub fn as_key(&self) -> Option<&str> {
137        match self {
138            DataInput::Key(key) => Some(key),
139            DataInput::Value(_) => None,
140            DataInput::Function(key, _) => Some(key), // Functions do not have a key
141        }
142    }
143
144    pub fn value(&self) -> DataValue {
145        match self {
146            DataInput::Value(value) => value.clone(),
147            DataInput::Key(key) => DataValue::String(key.into()),
148            DataInput::Function(_, _) => DataValue::Null, // Functions do not have a value
149        }
150    }
151    pub fn is_function(&self) -> bool {
152        matches!(self, DataInput::Function(_, _))
153    }
154}
155
156#[derive(Debug, Clone, PartialEq)]
157pub struct FilterRules {
158    pub rules: Vec<FilterCombinantion>,
159}
160
161impl TryFrom<&str> for FilterRules {
162    type Error = error::Error;
163
164    fn try_from(value: &str) -> std::result::Result<Self, Self::Error> {
165        DataParser::parse(Rule::full_expression, value)
166            .map_err(|e| error::parser_error(format!("Failed to parse DataInput: {e}")))?
167            .next()
168            .ok_or(error::parser_error(
169                "Expected a Rule::atom but found nothing",
170            ))
171            .and_then(parse_full_expression)
172    }
173}
174
175fn parse_left(rule: Pair<Rule>) -> Result<DataInput> {
176    trace!("Parsing left expression: {rule:?}");
177    let mut inner = rule.into_inner();
178    trace!("Parsing left inner: {inner:?}");
179    let key = inner
180        .next()
181        .ok_or(error::parser_error("Expected a key in left expression"))?
182        .as_str()
183        .to_string();
184    if let Some(function) = inner.next() {
185        let function_name = function.as_str();
186        let function = match function_name {
187            ".len()" => Function::Len,
188            ".to_datetime_us()" => Function::ToDateTimeUs,
189            _ => return Err(error::parser_error("Unknown function: {function_name}")),
190        };
191        return Ok(DataInput::Function(key, function));
192    }
193    Ok(DataInput::Key(key)) // Placeholder for Function
194}
195
196fn parse_expression(pair: Pair<Rule>) -> Result<Expression> {
197    trace!("Parsing expression: {pair:?}");
198    match pair.as_rule() {
199        Rule::expression => {
200            let mut pairs = pair.into_inner();
201            trace!("Parsing expression pairs: {pairs:?}");
202            let left = parse_left(
203                pairs
204                    .next()
205                    .ok_or(error::parser_error("Expected a left expression"))?,
206            )?;
207            trace!("Parsing expression left: {left:?}");
208
209            let operator = pairs
210                .next()
211                .and_then(|s| s.as_str().parse::<FilterOperator>().ok())
212                .ok_or(error::parser_error("Expected a valid filter operator"))?;
213            trace!("Parsing expression operator: {operator:?}");
214            let right = parse_atom(
215                pairs
216                    .next()
217                    .ok_or(error::parser_error("Expected a right expression"))?,
218            )?;
219
220            trace!("Parsing expression right: {right:?}");
221            Ok(Expression {
222                left,
223                operator,
224                right,
225            })
226        }
227        e => Err(error::parser_error(format!(
228            "Unexpected rule in expression {e:?}"
229        ))),
230    }
231}
232fn parse_operator(pair: Pair<Rule>) -> Result<FilterJoin> {
233    match pair.as_str() {
234        "&&" => Ok(FilterJoin::And),
235        "||" => Ok(FilterJoin::Or),
236        _ => Err(error::parser_error(format!(
237            "Unknown operator: {}",
238            pair.as_str()
239        ))),
240    }
241}
242fn parse_filter_combination(pair: Pair<Rule>) -> Result<FilterCombinantion> {
243    if pair.as_rule() == Rule::expression {
244        return Ok(FilterCombinantion::Simple(parse_expression(pair)?));
245    }
246    let mut pairs = pair.into_inner();
247    trace!("Parsing filter combo expression pairs: {pairs:?}");
248    let first = parse_expression(pairs.next().ok_or(error::parser_error(
249        "Expected at least one expression in the pair",
250    ))?)?;
251    if let Some(op) = pairs.next() {
252        trace!("Parsing filter combo expression: {op:?} vs pairs {pairs:?}");
253        let op = parse_operator(op)?;
254        match op {
255            FilterJoin::And => {
256                return Ok(FilterCombinantion::And(
257                    first,
258                    Box::new(parse_filter_combination(pairs.next().ok_or(
259                        error::parser_error("Expected a next expression after '&&'"),
260                    )?)?),
261                ));
262            }
263            FilterJoin::Or => {
264                return Ok(FilterCombinantion::Or(
265                    first,
266                    Box::new(parse_filter_combination(pairs.next().ok_or(
267                        error::parser_error("Expected a next expression after '||'"),
268                    )?)?),
269                ));
270            }
271        }
272    }
273    Ok(FilterCombinantion::Simple(first))
274}
275fn parse_full_expression(pair: Pair<Rule>) -> Result<FilterRules> {
276    let mut rules = Vec::new();
277    trace!("Parsing full expression: {pair:?}");
278    match pair.as_rule() {
279        Rule::full_expression => {
280            let mut pairs = pair.into_inner();
281            trace!("Parsing full expression pairs: {pairs:?}");
282            let left = parse_expression(pairs.next().ok_or(error::parser_error(
283                "Expected at least one expression in the pair",
284            ))?)?;
285
286            if let Some(op) = pairs.next() {
287                trace!("Parsing operator: {op:?}");
288                let op = parse_operator(op)?;
289                let right = pairs.next().ok_or(error::parser_error(
290                    "Expected a next expression after operator",
291                ))?;
292                let ops = |op: FilterJoin,
293                           right: FilterCombinantion,
294                           rules: &mut Vec<FilterCombinantion>|
295                 -> Result<()> {
296                    match op {
297                        FilterJoin::And => {
298                            rules.push(FilterCombinantion::And(left, Box::new(right)));
299                        }
300                        FilterJoin::Or => {
301                            rules.push(FilterCombinantion::Or(left, Box::new(right)));
302                        }
303                    }
304                    Ok(())
305                };
306                match right.as_rule() {
307                    Rule::expression => {
308                        let right_expr = parse_expression(right)?;
309                        ops(op, FilterCombinantion::Simple(right_expr), &mut rules)?;
310                    }
311                    Rule::grouped_expression => {
312                        let grouped_expr = parse_filter_combination(right)?;
313                        ops(op, grouped_expr, &mut rules)?;
314                    }
315                    _ => return Err(error::parser_error("Expected an expression after operator")),
316                }
317            } else {
318                rules.push(FilterCombinantion::Simple(left));
319            }
320        }
321        _ => return Err(error::parser_error("Expected a full expression rule")),
322    }
323
324    Ok(FilterRules { rules })
325}
326
327impl TryFrom<&str> for DataInput {
328    type Error = error::Error;
329
330    fn try_from(value: &str) -> std::result::Result<Self, Self::Error> {
331        DataParser::parse(Rule::atom, value)
332            .map_err(|e| error::parser_error(format!("Failed to parse DataInput: {e}")))?
333            .next()
334            .ok_or(error::parser_error(
335                "Expected a Rule::atom but found nothing",
336            ))
337            .and_then(parse_atom)
338    }
339}
340
341fn number_to_value<T: FromStr>(number: &str, post_fix: &str) -> Result<T> {
342    num_to_value(number.split(post_fix).next().ok_or_else(|| {
343        error::parser_error("Expected a number with postfix '{post_fix}' but found: {number}")
344    })?)
345}
346
347fn num_to_value<T: FromStr>(number: &str) -> Result<T> {
348    match number.parse::<T>() {
349        Ok(value) => Ok(value),
350        Err(_e) => Err(error::parser_error(format!(
351            "Failed to parse number {number}"
352        ))),
353    }
354}
355
356fn parse_atom(rule: Pair<Rule>) -> Result<DataInput> {
357    match rule.as_rule() {
358        Rule::atom => {
359            let inner = rule.into_inner().next().ok_or(error::parser_error(
360                "Expected a Rule::atom but found nothing",
361            ))?;
362            parse_atom(inner)
363        }
364        Rule::u32 => number_to_value::<u32>(rule.as_str(), "u32")
365            .map(|value| DataInput::Value(DataValue::from(value))),
366        Rule::i32 => number_to_value::<i32>(rule.as_str(), "i32")
367            .map(|value| DataInput::Value(DataValue::from(value))),
368        Rule::u64 => number_to_value::<u64>(rule.as_str(), "u64")
369            .map(|value| DataInput::Value(DataValue::from(value))),
370        Rule::i64 => {
371            let str_rule = rule.as_str();
372            if str_rule.contains("i64") {
373                number_to_value::<i64>(str_rule, "i64")
374                    .map(|value| DataInput::Value(DataValue::from(value)))
375            } else {
376                num_to_value::<i64>(str_rule).map(|val| DataInput::Value(DataValue::from(val)))
377            }
378        }
379        Rule::f32 => number_to_value::<f32>(rule.as_str(), "f32")
380            .map(|value| DataInput::Value(DataValue::from(value))),
381        Rule::f64 => number_to_value::<f64>(rule.as_str(), "f64")
382            .map(|value| DataInput::Value(DataValue::from(value))),
383        Rule::float => number_to_value::<f64>(rule.as_str(), "f64")
384            .map(|value| DataInput::Value(DataValue::from(value))),
385        Rule::string_qt => {
386            let value = rule.as_str().trim_matches('\'');
387            Ok(DataInput::Value(DataValue::String(value.into())))
388        }
389        Rule::boolean => {
390            let value = rule.as_str();
391            match value {
392                "true" => Ok(DataInput::Value(DataValue::Bool(true))),
393                "false" => Ok(DataInput::Value(DataValue::Bool(false))),
394                _ => Err(error::parser_error(
395                    "Expected boolean value but found: {value}",
396                )),
397            }
398        }
399        Rule::null => Ok(DataInput::Value(DataValue::Null)),
400        Rule::key => Ok(DataInput::Key(rule.as_str().to_string())),
401        Rule::array => {
402            let mut values = Vec::new();
403            for pair in rule.into_inner() {
404                match parse_atom(pair)? {
405                    DataInput::Value(value) => values.push(value),
406                    DataInput::Key(key) => {
407                        values.push(DataValue::String(key.into()));
408                    }
409                    DataInput::Function(_, _) => {
410                        return Err(error::parser_error("Function in array is not supported"));
411                    }
412                }
413            }
414            Ok(DataInput::Value(DataValue::Vec(values)))
415        }
416        Rule::left => parse_left(rule),
417        _ => Err(error::parser_error("{rule} did not match any 'Rule' ")),
418    }
419}
420
421impl std::str::FromStr for FilterOperator {
422    type Err = error::Error;
423
424    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
425        match s {
426            "==" => Ok(FilterOperator::Equal),
427            "!=" => Ok(FilterOperator::NotEqual),
428            "<" => Ok(FilterOperator::Less),
429            ">" => Ok(FilterOperator::Greater),
430            "<=" => Ok(FilterOperator::LeOrEq),
431            ">=" => Ok(FilterOperator::GrOrEq),
432            "~=" => Ok(FilterOperator::Regex),
433            "in" => Ok(FilterOperator::In),
434            "notIn" => Ok(FilterOperator::NotIn),
435            _ => Err(error::parser_error(format!("Unknown filter operator: {s}"))),
436        }
437    }
438}
439
440#[cfg(test)]
441mod test {
442    use super::*;
443    use rstest::*;
444
445    #[rstest]
446    #[case("abc", DataInput::Key("abc".to_string()))]
447    #[case("'abc'", DataInput::Value(DataValue::from("abc")))]
448    #[case("1u32", DataInput::Value(DataValue::from(1u32)))]
449    #[case("1i32", DataInput::Value(DataValue::from(1i32)))]
450    #[case("1u64", DataInput::Value(DataValue::from(1u64)))]
451    #[case("1i64", DataInput::Value(DataValue::from(1i64)))]
452    #[case("1f64", DataInput::Value(DataValue::from(1f64)))]
453    #[case("null", DataInput::Value(DataValue::Null))]
454    #[case("true", DataInput::Value(DataValue::from(true)))]
455    #[case("false", DataInput::Value(DataValue::from(false)))]
456    #[case("1.0", DataInput::Value(DataValue::from(1f64)))]
457    #[case("[1u32, 1f64, 'abc', notakey]", DataInput::Value(DataValue::Vec(vec![
458        DataValue::from(1u32),
459        DataValue::from(1f64),
460        DataValue::from("abc"),
461        DataValue::from("notakey"),
462    ])))]
463    #[case("1.0f32", DataInput::Value(DataValue::from(1f32)))]
464    #[case("1", DataInput::Value(DataValue::from(1i64)))]
465    fn test_parser(#[case] input: &str, #[case] expected: DataInput) {
466        let result = DataInput::try_from(input);
467        assert!(result.is_ok(), "Failed to parse '{input}' {result:?}");
468        assert_eq!(result.unwrap(), expected);
469    }
470
471    #[rstest]
472    #[case("abc > 1u32", FilterRules{ rules: vec![FilterCombinantion::Simple(Expression {
473        left: DataInput::Key("abc".to_string()),
474        operator: FilterOperator::Greater,
475        right: DataInput::Value(DataValue::from(1u32)),
476    })] })]
477    #[case("abc > 1u32 && c == 'a'", FilterRules{ rules: vec![FilterCombinantion::And(Expression {
478        left: DataInput::Key("abc".to_string()),
479        operator: FilterOperator::Greater,
480        right: DataInput::Value(DataValue::from(1u32)),
481    }, Box::new(
482        FilterCombinantion::Simple(Expression {
483            left: DataInput::Key("c".to_string()),
484            operator: FilterOperator::Equal,
485            right: DataInput::Value(DataValue::from("a")),
486        }),
487    ))] })]
488    #[case("abc > 1u32 || c <= 12.0f64", FilterRules{ rules: vec![FilterCombinantion::Or(Expression {
489        left: DataInput::Key("abc".to_string()),
490        operator: FilterOperator::Greater,
491        right: DataInput::Value(DataValue::from(1u32)),
492    }, Box::new(
493        FilterCombinantion::Simple(Expression {
494            left: DataInput::Key("c".to_string()),
495            operator: FilterOperator::LeOrEq,
496            right: DataInput::Value(DataValue::from(12f64)),
497        }),
498    ))] })]
499    #[case("abc in [1i32] && (g >= 1u64 || c ~= '.*')", FilterRules{ rules: vec![FilterCombinantion::And(Expression {
500        left: DataInput::Key("abc".to_string()),
501        operator: FilterOperator::In,
502        right: DataInput::Value(DataValue::Vec(vec![1i32.into()])),
503    }, Box::new(
504        FilterCombinantion::Or(Expression {
505            left: DataInput::Key("g".to_string()),
506            operator: FilterOperator::GrOrEq,
507            right: DataInput::Value(DataValue::from(1u64)),
508        }, Box::new(
509            FilterCombinantion::Simple(Expression {
510                left: DataInput::Key("c".to_string()),
511                operator: FilterOperator::Regex,
512                right: DataInput::Value(DataValue::from(".*")),
513            }),
514        )),
515    ))] })]
516    fn test_parser_filter(#[case] input: &str, #[case] expected: FilterRules) {
517        let result = FilterRules::try_from(input);
518        assert!(result.is_ok(), "Failed to parse '{input}' {result:?}");
519        assert_eq!(result.unwrap(), expected);
520    }
521
522    #[rstest]
523    #[case("abc.len() > 1u32", FilterRules{ rules: vec![FilterCombinantion::Simple(Expression {
524        left: DataInput::Function("abc".to_string(), Function::Len),
525        operator: FilterOperator::Greater,
526        right: DataInput::Value(DataValue::from(1u32)),
527    })] })]
528    #[case("abc.to_datetime_us() > '2025-07-01 00:00:00' && c == 'a'", FilterRules{ rules: vec![FilterCombinantion::And(Expression {
529        left: DataInput::Function("abc".to_string(), Function::ToDateTimeUs),
530        operator: FilterOperator::Greater,
531        right: DataInput::Value(DataValue::from("2025-07-01 00:00:00")),
532    }, Box::new(
533        FilterCombinantion::Simple(Expression {
534            left: DataInput::Key("c".to_string()),
535            operator: FilterOperator::Equal,
536            right: DataInput::Value(DataValue::from("a")),
537        }),
538    ))] })]
539
540    fn test_functions(#[case] input: &str, #[case] expected: FilterRules) {
541        let result = FilterRules::try_from(input);
542        assert!(result.is_ok(), "Failed to parse '{input}' {result:?}");
543        assert_eq!(result.unwrap(), expected);
544    }
545}