trs_dataframe/filter/
mod.rs

1use std::str::FromStr;
2
3use data_value::DataValue;
4use pest::{iterators::Pair, Parser};
5use regex::Regex;
6use tracing::trace;
7pub mod error;
8pub mod filtering;
9pub use filtering::*;
10type Result<T> = std::result::Result<T, error::Error>;
11
12#[derive(pest_derive::Parser)]
13#[grammar = "filter/grammar/data.pest"]
14struct DataParser;
15
16pub trait Filtering {
17    fn prepare_indicies(&self, expression: &Expression) -> Result<Vec<usize>>;
18    fn apply_function(&self, expression: &Expression) -> Result<Vec<usize>>;
19}
20
21/// The operators for filtering functions
22#[derive(Debug, Clone, PartialEq, Copy)]
23pub enum FilterOperator {
24    Equal,
25    NotEqual,
26    Less,
27    Greater,
28    LeOrEq,
29    GrOrEq,
30    Regex,
31    In,
32    NotIn,
33}
34
35/// The operators for filtering functions
36#[derive(Debug, Clone, PartialEq, Copy)]
37pub enum FilterJoin {
38    And,
39    Or,
40}
41#[derive(Debug, Clone, PartialEq)]
42pub struct Expression {
43    pub left: DataInput,
44    pub operator: FilterOperator,
45    pub right: DataInput,
46}
47
48#[derive(Debug)]
49pub enum FilterArgument {
50    Value(DataValue),
51    Regex(regex::Regex),
52    Vec(Vec<DataValue>),
53}
54
55impl FilterArgument {
56    pub fn value(&self) -> &DataValue {
57        match self {
58            FilterArgument::Value(value) => value,
59            FilterArgument::Regex(_) => &DataValue::Null, // Regex does not have a value
60            FilterArgument::Vec(_vec) => &DataValue::Null,
61        }
62    }
63
64    pub fn vec(&self) -> Option<&Vec<DataValue>> {
65        match self {
66            FilterArgument::Value(value) => {
67                if let DataValue::Vec(vec) = value {
68                    Some(vec)
69                } else {
70                    None
71                }
72            }
73            FilterArgument::Regex(_) => None, // Regex does not have a value
74            FilterArgument::Vec(vec) => Some(vec),
75        }
76    }
77
78    pub fn regex(&self) -> Option<&Regex> {
79        match self {
80            FilterArgument::Value(_value) => None,
81            FilterArgument::Regex(regex) => Some(regex),
82            FilterArgument::Vec(_) => None, // Vec does not have a regex
83        }
84    }
85}
86
87impl Expression {
88    pub fn filter_argument(&self) -> Result<FilterArgument> {
89        match self.operator {
90            FilterOperator::Equal
91            | FilterOperator::NotEqual
92            | FilterOperator::Less
93            | FilterOperator::Greater
94            | FilterOperator::LeOrEq
95            | FilterOperator::GrOrEq => Ok(FilterArgument::Value(self.right.value())),
96            FilterOperator::Regex => {
97                if let DataValue::String(ref regex) = self.right.value() {
98                    Ok(FilterArgument::Regex(regex::Regex::new(regex)?))
99                } else {
100                    Err(error::parser_error(
101                        "Expected a regex string for Regex operator",
102                    ))
103                }
104            }
105            FilterOperator::In | FilterOperator::NotIn => {
106                if let DataValue::Vec(ref vec) = self.right.value() {
107                    Ok(FilterArgument::Vec(vec.clone()))
108                } else {
109                    Err(error::parser_error(
110                        "Expected a vector for In/NotIn operator",
111                    ))
112                }
113            }
114        }
115    }
116}
117
118#[derive(Debug, Clone, PartialEq)]
119pub enum FilterCombinantion {
120    Simple(Expression),
121    /// and with &&
122    And(Expression, Box<FilterCombinantion>),
123    /// or with ||
124    Or(Expression, Box<FilterCombinantion>),
125    Grouped(Vec<FilterCombinantion>),
126}
127
128#[derive(Debug, Clone, Copy, PartialEq)]
129pub enum Function {
130    Len,
131    ToDateTimeUs,
132}
133
134#[derive(Debug, Clone, PartialEq)]
135pub enum DataInput {
136    Value(DataValue),
137    Key(String),
138    Function(String, Function),
139    Mod(String, DataValue),
140}
141
142impl DataInput {
143    pub fn as_key(&self) -> Option<&str> {
144        match self {
145            DataInput::Key(key) => Some(key),
146            DataInput::Value(_) => None,
147            DataInput::Function(key, _) => Some(key), // Functions do not have a key
148            DataInput::Mod(key, _) => Some(key),
149        }
150    }
151
152    pub fn value(&self) -> DataValue {
153        match self {
154            DataInput::Value(value) => value.clone(),
155            DataInput::Key(key) => DataValue::String(key.into()),
156            DataInput::Function(_, _) => DataValue::Null, // Functions do not have a value
157            DataInput::Mod(..) => DataValue::Null,
158        }
159    }
160    pub fn is_function(&self) -> bool {
161        matches!(self, DataInput::Function(_, _))
162    }
163
164    pub fn is_mod(&self) -> bool {
165        matches!(self, DataInput::Mod(_, _))
166    }
167}
168
169#[derive(Debug, Clone, PartialEq)]
170pub struct FilterRules {
171    pub rules: Vec<FilterCombinantion>,
172}
173
174impl TryFrom<&str> for FilterRules {
175    type Error = error::Error;
176
177    fn try_from(value: &str) -> std::result::Result<Self, Self::Error> {
178        DataParser::parse(Rule::full_expression, value)
179            .map_err(|e| error::parser_error(format!("Failed to parse DataInput: {e}")))?
180            .next()
181            .ok_or(error::parser_error(
182                "Expected a Rule::atom but found nothing",
183            ))
184            .and_then(parse_full_expression)
185    }
186}
187
188fn parse_left(rule: Pair<Rule>) -> Result<DataInput> {
189    trace!("Parsing left expression: {rule:?}");
190    let mut inner = rule.into_inner();
191    trace!("Parsing left inner: {inner:?}");
192    let key = inner
193        .next()
194        .ok_or(error::parser_error("Expected a key in left expression"))?
195        .as_str()
196        .to_string();
197    if let Some(function) = inner.next() {
198        let function_name = function.as_str();
199
200        if function_name.contains("%") {
201            let mut inn = function.into_inner();
202            // let _ = inn
203            //     .next()
204            //     .ok_or(error::parser_error("Expected a key in left expression"))?;
205            let atom = inn
206                .next()
207                .ok_or(error::parser_error("Expected a key in left expression"))?;
208            trace!("Atom {atom:?}");
209            return Ok(DataInput::Mod(key, parse_atom(atom)?.value()));
210        }
211        let function = match function_name {
212            ".len()" => Function::Len,
213            ".to_datetime_us()" => Function::ToDateTimeUs,
214            _ => return Err(error::parser_error("Unknown function: {function_name}")),
215        };
216        return Ok(DataInput::Function(key, function));
217    }
218    Ok(DataInput::Key(key)) // Placeholder for Function
219}
220
221fn parse_expression(pair: Pair<Rule>) -> Result<Expression> {
222    trace!("Parsing expression: {pair:?}");
223    match pair.as_rule() {
224        Rule::expression => {
225            let mut pairs = pair.into_inner();
226            trace!("Parsing expression pairs: {pairs:?}");
227            let left = parse_left(
228                pairs
229                    .next()
230                    .ok_or(error::parser_error("Expected a left expression"))?,
231            )?;
232            trace!("Parsing expression left: {left:?}");
233
234            let operator = pairs
235                .next()
236                .and_then(|s| s.as_str().parse::<FilterOperator>().ok())
237                .ok_or(error::parser_error("Expected a valid filter operator"))?;
238            trace!("Parsing expression operator: {operator:?}");
239            let right = parse_atom(
240                pairs
241                    .next()
242                    .ok_or(error::parser_error("Expected a right expression"))?,
243            )?;
244
245            trace!("Parsing expression right: {right:?}");
246            Ok(Expression {
247                left,
248                operator,
249                right,
250            })
251        }
252        e => Err(error::parser_error(format!(
253            "Unexpected rule in expression {e:?}"
254        ))),
255    }
256}
257fn parse_operator(pair: Pair<Rule>) -> Result<FilterJoin> {
258    match pair.as_str() {
259        "&&" => Ok(FilterJoin::And),
260        "||" => Ok(FilterJoin::Or),
261        _ => Err(error::parser_error(format!(
262            "Unknown operator: {}",
263            pair.as_str()
264        ))),
265    }
266}
267fn parse_filter_combination(pair: Pair<Rule>) -> Result<FilterCombinantion> {
268    if pair.as_rule() == Rule::expression {
269        return Ok(FilterCombinantion::Simple(parse_expression(pair)?));
270    }
271    let mut pairs = pair.into_inner();
272    trace!("Parsing filter combo expression pairs: {pairs:?}");
273    let first = parse_expression(pairs.next().ok_or(error::parser_error(
274        "Expected at least one expression in the pair",
275    ))?)?;
276    if let Some(op) = pairs.next() {
277        trace!("Parsing filter combo expression: {op:?} vs pairs {pairs:?}");
278        let op = parse_operator(op)?;
279        match op {
280            FilterJoin::And => {
281                return Ok(FilterCombinantion::And(
282                    first,
283                    Box::new(parse_filter_combination(pairs.next().ok_or(
284                        error::parser_error("Expected a next expression after '&&'"),
285                    )?)?),
286                ));
287            }
288            FilterJoin::Or => {
289                return Ok(FilterCombinantion::Or(
290                    first,
291                    Box::new(parse_filter_combination(pairs.next().ok_or(
292                        error::parser_error("Expected a next expression after '||'"),
293                    )?)?),
294                ));
295            }
296        }
297    }
298    Ok(FilterCombinantion::Simple(first))
299}
300fn parse_full_expression(pair: Pair<Rule>) -> Result<FilterRules> {
301    let mut rules = Vec::new();
302    trace!("Parsing full expression: {pair:?}");
303    match pair.as_rule() {
304        Rule::full_expression => {
305            let mut pairs = pair.into_inner();
306            trace!("Parsing full expression pairs: {pairs:?}");
307            let left = parse_expression(pairs.next().ok_or(error::parser_error(
308                "Expected at least one expression in the pair",
309            ))?)?;
310
311            if let Some(op) = pairs.next() {
312                trace!("Parsing operator: {op:?}");
313                let op = parse_operator(op)?;
314                let right = pairs.next().ok_or(error::parser_error(
315                    "Expected a next expression after operator",
316                ))?;
317                let ops = |op: FilterJoin,
318                           right: FilterCombinantion,
319                           rules: &mut Vec<FilterCombinantion>|
320                 -> Result<()> {
321                    match op {
322                        FilterJoin::And => {
323                            rules.push(FilterCombinantion::And(left, Box::new(right)));
324                        }
325                        FilterJoin::Or => {
326                            rules.push(FilterCombinantion::Or(left, Box::new(right)));
327                        }
328                    }
329                    Ok(())
330                };
331                match right.as_rule() {
332                    Rule::expression => {
333                        let right_expr = parse_expression(right)?;
334                        ops(op, FilterCombinantion::Simple(right_expr), &mut rules)?;
335                    }
336                    Rule::grouped_expression => {
337                        let grouped_expr = parse_filter_combination(right)?;
338                        ops(op, grouped_expr, &mut rules)?;
339                    }
340                    _ => return Err(error::parser_error("Expected an expression after operator")),
341                }
342            } else {
343                rules.push(FilterCombinantion::Simple(left));
344            }
345        }
346        _ => return Err(error::parser_error("Expected a full expression rule")),
347    }
348
349    Ok(FilterRules { rules })
350}
351
352impl TryFrom<&str> for DataInput {
353    type Error = error::Error;
354
355    fn try_from(value: &str) -> std::result::Result<Self, Self::Error> {
356        DataParser::parse(Rule::atom, value)
357            .map_err(|e| error::parser_error(format!("Failed to parse DataInput: {e}")))?
358            .next()
359            .ok_or(error::parser_error(
360                "Expected a Rule::atom but found nothing",
361            ))
362            .and_then(parse_atom)
363    }
364}
365
366fn number_to_value<T: FromStr>(number: &str, post_fix: &str) -> Result<T> {
367    num_to_value(number.split(post_fix).next().ok_or_else(|| {
368        error::parser_error("Expected a number with postfix '{post_fix}' but found: {number}")
369    })?)
370}
371
372fn num_to_value<T: FromStr>(number: &str) -> Result<T> {
373    match number.parse::<T>() {
374        Ok(value) => Ok(value),
375        Err(_e) => Err(error::parser_error(format!(
376            "Failed to parse number {number}"
377        ))),
378    }
379}
380
381fn parse_atom(rule: Pair<Rule>) -> Result<DataInput> {
382    match rule.as_rule() {
383        Rule::atom => {
384            let inner = rule.into_inner().next().ok_or(error::parser_error(
385                "Expected a Rule::atom but found nothing",
386            ))?;
387            parse_atom(inner)
388        }
389        Rule::u32 => number_to_value::<u32>(rule.as_str(), "u32")
390            .map(|value| DataInput::Value(DataValue::from(value))),
391        Rule::i32 => number_to_value::<i32>(rule.as_str(), "i32")
392            .map(|value| DataInput::Value(DataValue::from(value))),
393        Rule::u64 => number_to_value::<u64>(rule.as_str(), "u64")
394            .map(|value| DataInput::Value(DataValue::from(value))),
395        Rule::i64 => {
396            let str_rule = rule.as_str();
397            if str_rule.contains("i64") {
398                number_to_value::<i64>(str_rule, "i64")
399                    .map(|value| DataInput::Value(DataValue::from(value)))
400            } else {
401                num_to_value::<i64>(str_rule).map(|val| DataInput::Value(DataValue::from(val)))
402            }
403        }
404        Rule::f32 => number_to_value::<f32>(rule.as_str(), "f32")
405            .map(|value| DataInput::Value(DataValue::from(value))),
406        Rule::f64 => number_to_value::<f64>(rule.as_str(), "f64")
407            .map(|value| DataInput::Value(DataValue::from(value))),
408        Rule::float => number_to_value::<f64>(rule.as_str(), "f64")
409            .map(|value| DataInput::Value(DataValue::from(value))),
410        Rule::string_qt => {
411            let value = rule.as_str().trim_matches('\'');
412            Ok(DataInput::Value(DataValue::String(value.into())))
413        }
414        Rule::boolean => {
415            let value = rule.as_str();
416            match value {
417                "true" => Ok(DataInput::Value(DataValue::Bool(true))),
418                "false" => Ok(DataInput::Value(DataValue::Bool(false))),
419                _ => Err(error::parser_error(
420                    "Expected boolean value but found: {value}",
421                )),
422            }
423        }
424        Rule::null => Ok(DataInput::Value(DataValue::Null)),
425        Rule::key => Ok(DataInput::Key(rule.as_str().to_string())),
426        Rule::array => {
427            let mut values = Vec::new();
428            for pair in rule.into_inner() {
429                match parse_atom(pair)? {
430                    DataInput::Value(value) => values.push(value),
431                    DataInput::Key(key) => {
432                        values.push(DataValue::String(key.into()));
433                    }
434                    DataInput::Function(_, _) => {
435                        return Err(error::parser_error("Function in array is not supported"));
436                    }
437                    DataInput::Mod(_, _) => {
438                        return Err(error::parser_error("Function in array is not supported"));
439                    }
440                }
441            }
442            Ok(DataInput::Value(DataValue::Vec(values)))
443        }
444        Rule::left => parse_left(rule),
445        _ => Err(error::parser_error("{rule} did not match any 'Rule' ")),
446    }
447}
448
449impl std::str::FromStr for FilterOperator {
450    type Err = error::Error;
451
452    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
453        match s {
454            "==" => Ok(FilterOperator::Equal),
455            "!=" => Ok(FilterOperator::NotEqual),
456            "<" => Ok(FilterOperator::Less),
457            ">" => Ok(FilterOperator::Greater),
458            "<=" => Ok(FilterOperator::LeOrEq),
459            ">=" => Ok(FilterOperator::GrOrEq),
460            "~=" => Ok(FilterOperator::Regex),
461            "in" => Ok(FilterOperator::In),
462            "notIn" => Ok(FilterOperator::NotIn),
463            _ => Err(error::parser_error(format!("Unknown filter operator: {s}"))),
464        }
465    }
466}
467
468#[cfg(test)]
469mod test {
470    use super::*;
471    use rstest::*;
472
473    #[rstest]
474    #[case("abc", DataInput::Key("abc".to_string()))]
475    #[case("'abc'", DataInput::Value(DataValue::from("abc")))]
476    #[case("1u32", DataInput::Value(DataValue::from(1u32)))]
477    #[case("1i32", DataInput::Value(DataValue::from(1i32)))]
478    #[case("1u64", DataInput::Value(DataValue::from(1u64)))]
479    #[case("1i64", DataInput::Value(DataValue::from(1i64)))]
480    #[case("1f64", DataInput::Value(DataValue::from(1f64)))]
481    #[case("null", DataInput::Value(DataValue::Null))]
482    #[case("true", DataInput::Value(DataValue::from(true)))]
483    #[case("false", DataInput::Value(DataValue::from(false)))]
484    #[case("1.0", DataInput::Value(DataValue::from(1f64)))]
485    #[case("[1u32, 1f64, 'abc', notakey]", DataInput::Value(DataValue::Vec(vec![
486        DataValue::from(1u32),
487        DataValue::from(1f64),
488        DataValue::from("abc"),
489        DataValue::from("notakey"),
490    ])))]
491    #[case("1.0f32", DataInput::Value(DataValue::from(1f32)))]
492    #[case("1", DataInput::Value(DataValue::from(1i64)))]
493    fn test_parser(#[case] input: &str, #[case] expected: DataInput) {
494        let result = DataInput::try_from(input);
495        assert!(result.is_ok(), "Failed to parse '{input}' {result:?}");
496        assert_eq!(result.unwrap(), expected);
497    }
498
499    #[rstest]
500    #[case("abc > 1u32", FilterRules{ rules: vec![FilterCombinantion::Simple(Expression {
501        left: DataInput::Key("abc".to_string()),
502        operator: FilterOperator::Greater,
503        right: DataInput::Value(DataValue::from(1u32)),
504    })] })]
505    #[case("abc > 1u32 && c == 'a'", FilterRules{ rules: vec![FilterCombinantion::And(Expression {
506        left: DataInput::Key("abc".to_string()),
507        operator: FilterOperator::Greater,
508        right: DataInput::Value(DataValue::from(1u32)),
509    }, Box::new(
510        FilterCombinantion::Simple(Expression {
511            left: DataInput::Key("c".to_string()),
512            operator: FilterOperator::Equal,
513            right: DataInput::Value(DataValue::from("a")),
514        }),
515    ))] })]
516    #[case("abc > 1u32 || c <= 12.0f64", FilterRules{ rules: vec![FilterCombinantion::Or(Expression {
517        left: DataInput::Key("abc".to_string()),
518        operator: FilterOperator::Greater,
519        right: DataInput::Value(DataValue::from(1u32)),
520    }, Box::new(
521        FilterCombinantion::Simple(Expression {
522            left: DataInput::Key("c".to_string()),
523            operator: FilterOperator::LeOrEq,
524            right: DataInput::Value(DataValue::from(12f64)),
525        }),
526    ))] })]
527    #[case("abc in [1i32] && (g >= 1u64 || c ~= '.*')", FilterRules{ rules: vec![FilterCombinantion::And(Expression {
528        left: DataInput::Key("abc".to_string()),
529        operator: FilterOperator::In,
530        right: DataInput::Value(DataValue::Vec(vec![1i32.into()])),
531    }, Box::new(
532        FilterCombinantion::Or(Expression {
533            left: DataInput::Key("g".to_string()),
534            operator: FilterOperator::GrOrEq,
535            right: DataInput::Value(DataValue::from(1u64)),
536        }, Box::new(
537            FilterCombinantion::Simple(Expression {
538                left: DataInput::Key("c".to_string()),
539                operator: FilterOperator::Regex,
540                right: DataInput::Value(DataValue::from(".*")),
541            }),
542        )),
543    ))] })]
544    fn test_parser_filter(#[case] input: &str, #[case] expected: FilterRules) {
545        let result = FilterRules::try_from(input);
546        assert!(result.is_ok(), "Failed to parse '{input}' {result:?}");
547        assert_eq!(result.unwrap(), expected);
548    }
549
550    #[rstest]
551    #[case("abc.len() > 1u32", FilterRules{ rules: vec![FilterCombinantion::Simple(Expression {
552        left: DataInput::Function("abc".to_string(), Function::Len),
553        operator: FilterOperator::Greater,
554        right: DataInput::Value(DataValue::from(1u32)),
555    })] })]
556    #[case("abc.to_datetime_us() > '2025-07-01 00:00:00' && c == 'a'", FilterRules{ rules: vec![FilterCombinantion::And(Expression {
557        left: DataInput::Function("abc".to_string(), Function::ToDateTimeUs),
558        operator: FilterOperator::Greater,
559        right: DataInput::Value(DataValue::from("2025-07-01 00:00:00")),
560    }, Box::new(
561        FilterCombinantion::Simple(Expression {
562            left: DataInput::Key("c".to_string()),
563            operator: FilterOperator::Equal,
564            right: DataInput::Value(DataValue::from("a")),
565        }),
566    ))] })]
567    #[case("abc % 1u32 == 1u32", FilterRules{ rules: vec![FilterCombinantion::Simple(Expression {
568        left: DataInput::Mod("abc".to_string(), DataValue::U32(1)),
569        operator: FilterOperator::Equal,
570        right: DataInput::Value(DataValue::from(1u32)),
571    })] })]
572
573    fn test_functions(#[case] input: &str, #[case] expected: FilterRules) {
574        let result = FilterRules::try_from(input);
575        assert!(result.is_ok(), "Failed to parse '{input}' {result:?}");
576        assert_eq!(result.unwrap(), expected);
577    }
578}