1use std::str::FromStr;
2
3use data_value::DataValue;
4use pest::{iterators::Pair, Parser};
5use regex::Regex;
6use tracing::trace;
7pub mod error;
8pub mod filtering;
9pub use filtering::*;
10type Result<T> = std::result::Result<T, error::Error>;
11
12#[derive(pest_derive::Parser)]
13#[grammar = "filter/grammar/data.pest"]
14struct DataParser;
15
16pub trait Filtering {
17 fn prepare_indicies(&self, expression: &Expression) -> Result<Vec<usize>>;
18 fn apply_function(&self, expression: &Expression) -> Result<Vec<usize>>;
19}
20
21#[derive(Debug, Clone, PartialEq, Copy)]
23pub enum FilterOperator {
24 Equal,
25 NotEqual,
26 Less,
27 Greater,
28 LeOrEq,
29 GrOrEq,
30 Regex,
31 In,
32 NotIn,
33}
34
35#[derive(Debug, Clone, PartialEq, Copy)]
37pub enum FilterJoin {
38 And,
39 Or,
40}
41#[derive(Debug, Clone, PartialEq)]
42pub struct Expression {
43 pub left: DataInput,
44 pub operator: FilterOperator,
45 pub right: DataInput,
46}
47
48#[derive(Debug)]
49pub enum FilterArgument {
50 Value(DataValue),
51 Regex(regex::Regex),
52 Vec(Vec<DataValue>),
53}
54
55impl FilterArgument {
56 pub fn value(&self) -> &DataValue {
57 match self {
58 FilterArgument::Value(value) => value,
59 FilterArgument::Regex(_) => &DataValue::Null, FilterArgument::Vec(_vec) => &DataValue::Null,
61 }
62 }
63
64 pub fn vec(&self) -> Option<&Vec<DataValue>> {
65 match self {
66 FilterArgument::Value(value) => {
67 if let DataValue::Vec(vec) = value {
68 Some(vec)
69 } else {
70 None
71 }
72 }
73 FilterArgument::Regex(_) => None, FilterArgument::Vec(vec) => Some(vec),
75 }
76 }
77
78 pub fn regex(&self) -> Option<&Regex> {
79 match self {
80 FilterArgument::Value(_value) => None,
81 FilterArgument::Regex(regex) => Some(regex),
82 FilterArgument::Vec(_) => None, }
84 }
85}
86
87impl Expression {
88 pub fn filter_argument(&self) -> Result<FilterArgument> {
89 match self.operator {
90 FilterOperator::Equal
91 | FilterOperator::NotEqual
92 | FilterOperator::Less
93 | FilterOperator::Greater
94 | FilterOperator::LeOrEq
95 | FilterOperator::GrOrEq => Ok(FilterArgument::Value(self.right.value())),
96 FilterOperator::Regex => {
97 if let DataValue::String(ref regex) = self.right.value() {
98 Ok(FilterArgument::Regex(regex::Regex::new(regex)?))
99 } else {
100 Err(error::parser_error(
101 "Expected a regex string for Regex operator",
102 ))
103 }
104 }
105 FilterOperator::In | FilterOperator::NotIn => {
106 if let DataValue::Vec(ref vec) = self.right.value() {
107 Ok(FilterArgument::Vec(vec.clone()))
108 } else {
109 Err(error::parser_error(
110 "Expected a vector for In/NotIn operator",
111 ))
112 }
113 }
114 }
115 }
116}
117
118#[derive(Debug, Clone, PartialEq)]
119pub enum FilterCombinantion {
120 Simple(Expression),
121 And(Expression, Box<FilterCombinantion>),
123 Or(Expression, Box<FilterCombinantion>),
125 Grouped(Vec<FilterCombinantion>),
126}
127
128#[derive(Debug, Clone, Copy, PartialEq)]
129pub enum Function {
130 Len,
131 ToDateTimeUs,
132}
133
134#[derive(Debug, Clone, PartialEq)]
135pub enum DataInput {
136 Value(DataValue),
137 Key(String),
138 Function(String, Function),
139 Mod(String, DataValue),
140}
141
142impl DataInput {
143 pub fn as_key(&self) -> Option<&str> {
144 match self {
145 DataInput::Key(key) => Some(key),
146 DataInput::Value(_) => None,
147 DataInput::Function(key, _) => Some(key), DataInput::Mod(key, _) => Some(key),
149 }
150 }
151
152 pub fn value(&self) -> DataValue {
153 match self {
154 DataInput::Value(value) => value.clone(),
155 DataInput::Key(key) => DataValue::String(key.into()),
156 DataInput::Function(_, _) => DataValue::Null, DataInput::Mod(..) => DataValue::Null,
158 }
159 }
160 pub fn is_function(&self) -> bool {
161 matches!(self, DataInput::Function(_, _))
162 }
163
164 pub fn is_mod(&self) -> bool {
165 matches!(self, DataInput::Mod(_, _))
166 }
167}
168
169#[derive(Debug, Clone, PartialEq)]
170pub struct FilterRules {
171 pub rules: Vec<FilterCombinantion>,
172}
173
174impl TryFrom<&str> for FilterRules {
175 type Error = error::Error;
176
177 fn try_from(value: &str) -> std::result::Result<Self, Self::Error> {
178 DataParser::parse(Rule::full_expression, value)
179 .map_err(|e| error::parser_error(format!("Failed to parse DataInput: {e}")))?
180 .next()
181 .ok_or(error::parser_error(
182 "Expected a Rule::atom but found nothing",
183 ))
184 .and_then(parse_full_expression)
185 }
186}
187
188fn parse_left(rule: Pair<Rule>) -> Result<DataInput> {
189 trace!("Parsing left expression: {rule:?}");
190 let mut inner = rule.into_inner();
191 trace!("Parsing left inner: {inner:?}");
192 let key = inner
193 .next()
194 .ok_or(error::parser_error("Expected a key in left expression"))?
195 .as_str()
196 .to_string();
197 if let Some(function) = inner.next() {
198 let function_name = function.as_str();
199
200 if function_name.contains("%") {
201 let mut inn = function.into_inner();
202 let atom = inn
206 .next()
207 .ok_or(error::parser_error("Expected a key in left expression"))?;
208 trace!("Atom {atom:?}");
209 return Ok(DataInput::Mod(key, parse_atom(atom)?.value()));
210 }
211 let function = match function_name {
212 ".len()" => Function::Len,
213 ".to_datetime_us()" => Function::ToDateTimeUs,
214 _ => return Err(error::parser_error("Unknown function: {function_name}")),
215 };
216 return Ok(DataInput::Function(key, function));
217 }
218 Ok(DataInput::Key(key)) }
220
221fn parse_expression(pair: Pair<Rule>) -> Result<Expression> {
222 trace!("Parsing expression: {pair:?}");
223 match pair.as_rule() {
224 Rule::expression => {
225 let mut pairs = pair.into_inner();
226 trace!("Parsing expression pairs: {pairs:?}");
227 let left = parse_left(
228 pairs
229 .next()
230 .ok_or(error::parser_error("Expected a left expression"))?,
231 )?;
232 trace!("Parsing expression left: {left:?}");
233
234 let operator = pairs
235 .next()
236 .and_then(|s| s.as_str().parse::<FilterOperator>().ok())
237 .ok_or(error::parser_error("Expected a valid filter operator"))?;
238 trace!("Parsing expression operator: {operator:?}");
239 let right = parse_atom(
240 pairs
241 .next()
242 .ok_or(error::parser_error("Expected a right expression"))?,
243 )?;
244
245 trace!("Parsing expression right: {right:?}");
246 Ok(Expression {
247 left,
248 operator,
249 right,
250 })
251 }
252 e => Err(error::parser_error(format!(
253 "Unexpected rule in expression {e:?}"
254 ))),
255 }
256}
257fn parse_operator(pair: Pair<Rule>) -> Result<FilterJoin> {
258 match pair.as_str() {
259 "&&" => Ok(FilterJoin::And),
260 "||" => Ok(FilterJoin::Or),
261 _ => Err(error::parser_error(format!(
262 "Unknown operator: {}",
263 pair.as_str()
264 ))),
265 }
266}
267fn parse_filter_combination(pair: Pair<Rule>) -> Result<FilterCombinantion> {
268 if pair.as_rule() == Rule::expression {
269 return Ok(FilterCombinantion::Simple(parse_expression(pair)?));
270 }
271 let mut pairs = pair.into_inner();
272 trace!("Parsing filter combo expression pairs: {pairs:?}");
273 let first = parse_expression(pairs.next().ok_or(error::parser_error(
274 "Expected at least one expression in the pair",
275 ))?)?;
276 if let Some(op) = pairs.next() {
277 trace!("Parsing filter combo expression: {op:?} vs pairs {pairs:?}");
278 let op = parse_operator(op)?;
279 match op {
280 FilterJoin::And => {
281 return Ok(FilterCombinantion::And(
282 first,
283 Box::new(parse_filter_combination(pairs.next().ok_or(
284 error::parser_error("Expected a next expression after '&&'"),
285 )?)?),
286 ));
287 }
288 FilterJoin::Or => {
289 return Ok(FilterCombinantion::Or(
290 first,
291 Box::new(parse_filter_combination(pairs.next().ok_or(
292 error::parser_error("Expected a next expression after '||'"),
293 )?)?),
294 ));
295 }
296 }
297 }
298 Ok(FilterCombinantion::Simple(first))
299}
300fn parse_full_expression(pair: Pair<Rule>) -> Result<FilterRules> {
301 let mut rules = Vec::new();
302 trace!("Parsing full expression: {pair:?}");
303 match pair.as_rule() {
304 Rule::full_expression => {
305 let mut pairs = pair.into_inner();
306 trace!("Parsing full expression pairs: {pairs:?}");
307 let left = parse_expression(pairs.next().ok_or(error::parser_error(
308 "Expected at least one expression in the pair",
309 ))?)?;
310
311 if let Some(op) = pairs.next() {
312 trace!("Parsing operator: {op:?}");
313 let op = parse_operator(op)?;
314 let right = pairs.next().ok_or(error::parser_error(
315 "Expected a next expression after operator",
316 ))?;
317 let ops = |op: FilterJoin,
318 right: FilterCombinantion,
319 rules: &mut Vec<FilterCombinantion>|
320 -> Result<()> {
321 match op {
322 FilterJoin::And => {
323 rules.push(FilterCombinantion::And(left, Box::new(right)));
324 }
325 FilterJoin::Or => {
326 rules.push(FilterCombinantion::Or(left, Box::new(right)));
327 }
328 }
329 Ok(())
330 };
331 match right.as_rule() {
332 Rule::expression => {
333 let right_expr = parse_expression(right)?;
334 ops(op, FilterCombinantion::Simple(right_expr), &mut rules)?;
335 }
336 Rule::grouped_expression => {
337 let grouped_expr = parse_filter_combination(right)?;
338 ops(op, grouped_expr, &mut rules)?;
339 }
340 _ => return Err(error::parser_error("Expected an expression after operator")),
341 }
342 } else {
343 rules.push(FilterCombinantion::Simple(left));
344 }
345 }
346 _ => return Err(error::parser_error("Expected a full expression rule")),
347 }
348
349 Ok(FilterRules { rules })
350}
351
352impl TryFrom<&str> for DataInput {
353 type Error = error::Error;
354
355 fn try_from(value: &str) -> std::result::Result<Self, Self::Error> {
356 DataParser::parse(Rule::atom, value)
357 .map_err(|e| error::parser_error(format!("Failed to parse DataInput: {e}")))?
358 .next()
359 .ok_or(error::parser_error(
360 "Expected a Rule::atom but found nothing",
361 ))
362 .and_then(parse_atom)
363 }
364}
365
366fn number_to_value<T: FromStr>(number: &str, post_fix: &str) -> Result<T> {
367 num_to_value(number.split(post_fix).next().ok_or_else(|| {
368 error::parser_error("Expected a number with postfix '{post_fix}' but found: {number}")
369 })?)
370}
371
372fn num_to_value<T: FromStr>(number: &str) -> Result<T> {
373 match number.parse::<T>() {
374 Ok(value) => Ok(value),
375 Err(_e) => Err(error::parser_error(format!(
376 "Failed to parse number {number}"
377 ))),
378 }
379}
380
381fn parse_atom(rule: Pair<Rule>) -> Result<DataInput> {
382 match rule.as_rule() {
383 Rule::atom => {
384 let inner = rule.into_inner().next().ok_or(error::parser_error(
385 "Expected a Rule::atom but found nothing",
386 ))?;
387 parse_atom(inner)
388 }
389 Rule::u32 => number_to_value::<u32>(rule.as_str(), "u32")
390 .map(|value| DataInput::Value(DataValue::from(value))),
391 Rule::i32 => number_to_value::<i32>(rule.as_str(), "i32")
392 .map(|value| DataInput::Value(DataValue::from(value))),
393 Rule::u64 => number_to_value::<u64>(rule.as_str(), "u64")
394 .map(|value| DataInput::Value(DataValue::from(value))),
395 Rule::i64 => {
396 let str_rule = rule.as_str();
397 if str_rule.contains("i64") {
398 number_to_value::<i64>(str_rule, "i64")
399 .map(|value| DataInput::Value(DataValue::from(value)))
400 } else {
401 num_to_value::<i64>(str_rule).map(|val| DataInput::Value(DataValue::from(val)))
402 }
403 }
404 Rule::f32 => number_to_value::<f32>(rule.as_str(), "f32")
405 .map(|value| DataInput::Value(DataValue::from(value))),
406 Rule::f64 => number_to_value::<f64>(rule.as_str(), "f64")
407 .map(|value| DataInput::Value(DataValue::from(value))),
408 Rule::float => number_to_value::<f64>(rule.as_str(), "f64")
409 .map(|value| DataInput::Value(DataValue::from(value))),
410 Rule::string_qt => {
411 let value = rule.as_str().trim_matches('\'');
412 Ok(DataInput::Value(DataValue::String(value.into())))
413 }
414 Rule::boolean => {
415 let value = rule.as_str();
416 match value {
417 "true" => Ok(DataInput::Value(DataValue::Bool(true))),
418 "false" => Ok(DataInput::Value(DataValue::Bool(false))),
419 _ => Err(error::parser_error(
420 "Expected boolean value but found: {value}",
421 )),
422 }
423 }
424 Rule::null => Ok(DataInput::Value(DataValue::Null)),
425 Rule::key => Ok(DataInput::Key(rule.as_str().to_string())),
426 Rule::array => {
427 let mut values = Vec::new();
428 for pair in rule.into_inner() {
429 match parse_atom(pair)? {
430 DataInput::Value(value) => values.push(value),
431 DataInput::Key(key) => {
432 values.push(DataValue::String(key.into()));
433 }
434 DataInput::Function(_, _) => {
435 return Err(error::parser_error("Function in array is not supported"));
436 }
437 DataInput::Mod(_, _) => {
438 return Err(error::parser_error("Function in array is not supported"));
439 }
440 }
441 }
442 Ok(DataInput::Value(DataValue::Vec(values)))
443 }
444 Rule::left => parse_left(rule),
445 _ => Err(error::parser_error("{rule} did not match any 'Rule' ")),
446 }
447}
448
449impl std::str::FromStr for FilterOperator {
450 type Err = error::Error;
451
452 fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
453 match s {
454 "==" => Ok(FilterOperator::Equal),
455 "!=" => Ok(FilterOperator::NotEqual),
456 "<" => Ok(FilterOperator::Less),
457 ">" => Ok(FilterOperator::Greater),
458 "<=" => Ok(FilterOperator::LeOrEq),
459 ">=" => Ok(FilterOperator::GrOrEq),
460 "~=" => Ok(FilterOperator::Regex),
461 "in" => Ok(FilterOperator::In),
462 "notIn" => Ok(FilterOperator::NotIn),
463 _ => Err(error::parser_error(format!("Unknown filter operator: {s}"))),
464 }
465 }
466}
467
468#[cfg(test)]
469mod test {
470 use super::*;
471 use rstest::*;
472
473 #[rstest]
474 #[case("abc", DataInput::Key("abc".to_string()))]
475 #[case("'abc'", DataInput::Value(DataValue::from("abc")))]
476 #[case("1u32", DataInput::Value(DataValue::from(1u32)))]
477 #[case("1i32", DataInput::Value(DataValue::from(1i32)))]
478 #[case("1u64", DataInput::Value(DataValue::from(1u64)))]
479 #[case("1i64", DataInput::Value(DataValue::from(1i64)))]
480 #[case("1f64", DataInput::Value(DataValue::from(1f64)))]
481 #[case("null", DataInput::Value(DataValue::Null))]
482 #[case("true", DataInput::Value(DataValue::from(true)))]
483 #[case("false", DataInput::Value(DataValue::from(false)))]
484 #[case("1.0", DataInput::Value(DataValue::from(1f64)))]
485 #[case("[1u32, 1f64, 'abc', notakey]", DataInput::Value(DataValue::Vec(vec![
486 DataValue::from(1u32),
487 DataValue::from(1f64),
488 DataValue::from("abc"),
489 DataValue::from("notakey"),
490 ])))]
491 #[case("1.0f32", DataInput::Value(DataValue::from(1f32)))]
492 #[case("1", DataInput::Value(DataValue::from(1i64)))]
493 fn test_parser(#[case] input: &str, #[case] expected: DataInput) {
494 let result = DataInput::try_from(input);
495 assert!(result.is_ok(), "Failed to parse '{input}' {result:?}");
496 assert_eq!(result.unwrap(), expected);
497 }
498
499 #[rstest]
500 #[case("abc > 1u32", FilterRules{ rules: vec![FilterCombinantion::Simple(Expression {
501 left: DataInput::Key("abc".to_string()),
502 operator: FilterOperator::Greater,
503 right: DataInput::Value(DataValue::from(1u32)),
504 })] })]
505 #[case("abc > 1u32 && c == 'a'", FilterRules{ rules: vec![FilterCombinantion::And(Expression {
506 left: DataInput::Key("abc".to_string()),
507 operator: FilterOperator::Greater,
508 right: DataInput::Value(DataValue::from(1u32)),
509 }, Box::new(
510 FilterCombinantion::Simple(Expression {
511 left: DataInput::Key("c".to_string()),
512 operator: FilterOperator::Equal,
513 right: DataInput::Value(DataValue::from("a")),
514 }),
515 ))] })]
516 #[case("abc > 1u32 || c <= 12.0f64", FilterRules{ rules: vec![FilterCombinantion::Or(Expression {
517 left: DataInput::Key("abc".to_string()),
518 operator: FilterOperator::Greater,
519 right: DataInput::Value(DataValue::from(1u32)),
520 }, Box::new(
521 FilterCombinantion::Simple(Expression {
522 left: DataInput::Key("c".to_string()),
523 operator: FilterOperator::LeOrEq,
524 right: DataInput::Value(DataValue::from(12f64)),
525 }),
526 ))] })]
527 #[case("abc in [1i32] && (g >= 1u64 || c ~= '.*')", FilterRules{ rules: vec![FilterCombinantion::And(Expression {
528 left: DataInput::Key("abc".to_string()),
529 operator: FilterOperator::In,
530 right: DataInput::Value(DataValue::Vec(vec![1i32.into()])),
531 }, Box::new(
532 FilterCombinantion::Or(Expression {
533 left: DataInput::Key("g".to_string()),
534 operator: FilterOperator::GrOrEq,
535 right: DataInput::Value(DataValue::from(1u64)),
536 }, Box::new(
537 FilterCombinantion::Simple(Expression {
538 left: DataInput::Key("c".to_string()),
539 operator: FilterOperator::Regex,
540 right: DataInput::Value(DataValue::from(".*")),
541 }),
542 )),
543 ))] })]
544 fn test_parser_filter(#[case] input: &str, #[case] expected: FilterRules) {
545 let result = FilterRules::try_from(input);
546 assert!(result.is_ok(), "Failed to parse '{input}' {result:?}");
547 assert_eq!(result.unwrap(), expected);
548 }
549
550 #[rstest]
551 #[case("abc.len() > 1u32", FilterRules{ rules: vec![FilterCombinantion::Simple(Expression {
552 left: DataInput::Function("abc".to_string(), Function::Len),
553 operator: FilterOperator::Greater,
554 right: DataInput::Value(DataValue::from(1u32)),
555 })] })]
556 #[case("abc.to_datetime_us() > '2025-07-01 00:00:00' && c == 'a'", FilterRules{ rules: vec![FilterCombinantion::And(Expression {
557 left: DataInput::Function("abc".to_string(), Function::ToDateTimeUs),
558 operator: FilterOperator::Greater,
559 right: DataInput::Value(DataValue::from("2025-07-01 00:00:00")),
560 }, Box::new(
561 FilterCombinantion::Simple(Expression {
562 left: DataInput::Key("c".to_string()),
563 operator: FilterOperator::Equal,
564 right: DataInput::Value(DataValue::from("a")),
565 }),
566 ))] })]
567 #[case("abc % 1u32 == 1u32", FilterRules{ rules: vec![FilterCombinantion::Simple(Expression {
568 left: DataInput::Mod("abc".to_string(), DataValue::U32(1)),
569 operator: FilterOperator::Equal,
570 right: DataInput::Value(DataValue::from(1u32)),
571 })] })]
572
573 fn test_functions(#[case] input: &str, #[case] expected: FilterRules) {
574 let result = FilterRules::try_from(input);
575 assert!(result.is_ok(), "Failed to parse '{input}' {result:?}");
576 assert_eq!(result.unwrap(), expected);
577 }
578}