use gettextrs::gettext;
use nom::{
branch::alt,
bytes::complete::{escaped, is_not, tag, take, take_while, take_while1},
character::{is_alphanumeric, is_digit},
combinator::{complete, map, opt, peek, recognize, value},
error::{ErrorKind, ParseError},
sequence::{delimited, separated_pair, terminated, tuple},
IResult, Offset, Parser,
};
use regex_rs::Regex;
use std::cell::OnceCell;
use std::vec::Vec;
use strprintf::fmt;
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Operator {
Equals,
NotEquals,
RegexMatches,
NotRegexMatches,
LessThan,
GreaterThan,
LessThanOrEquals,
GreaterThanOrEquals,
Between,
Contains,
NotContains,
}
pub struct Value {
literal: String,
regex: OnceCell<Result<Regex, String>>,
}
impl Value {
fn new(literal: String) -> Self {
Self {
literal,
regex: OnceCell::new(),
}
}
pub fn literal(&self) -> &str {
&self.literal
}
pub fn as_regex(&self) -> Result<&Regex, &str> {
let regex = self.regex.get_or_init(|| {
use regex_rs::CompFlags;
Regex::new(
&self.literal,
CompFlags::EXTENDED | CompFlags::IGNORE_CASE | CompFlags::NO_SUB,
)
});
match regex {
Ok(regex) => Ok(regex),
Err(message) => Err(message),
}
}
}
impl core::fmt::Debug for Value {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> std::result::Result<(), std::fmt::Error> {
f.debug_struct("Value")
.field("literal", &self.literal)
.finish()
}
}
impl Clone for Value {
fn clone(&self) -> Self {
Self {
literal: self.literal.clone(),
regex: OnceCell::new(),
}
}
}
impl PartialEq for Value {
fn eq(&self, other: &Value) -> bool {
self.literal == other.literal
}
}
impl Eq for Value {}
#[derive(Debug, Clone, PartialEq)]
pub enum Expression {
And(Box<Expression>, Box<Expression>),
Or(Box<Expression>, Box<Expression>),
Comparison {
attribute: String,
op: Operator,
value: Value,
},
}
#[derive(PartialEq, Clone, Copy, Debug)]
enum Expected {
AttributeName,
Operators,
Value,
}
fn translate_expected(expected: Expected) -> String {
match expected {
Expected::AttributeName => gettext("attribute name"),
Expected::Operators => gettext("one of: =~, ==, =, !~, !=, <=, >=, <, >, between, #, !#"),
Expected::Value => gettext("one of: quoted string, range, number"),
}
}
#[derive(PartialEq, Debug)]
enum Error<'a> {
TrailingCharacters(usize, &'a str),
AtPos(usize, Expected),
Internal,
}
#[derive(PartialEq, Debug)]
struct FilterParserError<'a> {
pub errors: Vec<(&'a str, FilterParserErrorKind)>,
}
#[derive(PartialEq, Debug)]
enum FilterParserErrorKind {
Nom(ErrorKind),
Unexpected(Expected),
}
impl<'a> ParseError<&'a str> for FilterParserError<'a> {
fn from_error_kind(input: &'a str, kind: ErrorKind) -> Self {
FilterParserError {
errors: vec![(input, FilterParserErrorKind::Nom(kind))],
}
}
fn append(input: &'a str, kind: ErrorKind, mut other: Self) -> Self {
other.errors.push((input, FilterParserErrorKind::Nom(kind)));
other
}
}
trait ExpectativeError<I>: Sized {
fn add_expectative(input: I, expected: Expected, other: Self) -> Self;
}
impl<'a> ExpectativeError<&'a str> for FilterParserError<'a> {
fn add_expectative(input: &'a str, expected: Expected, mut other: Self) -> Self {
other
.errors
.push((input, FilterParserErrorKind::Unexpected(expected)));
other
}
}
fn expect<I: Clone, E: ExpectativeError<I>, F, O>(
expected: Expected,
mut f: F,
) -> impl FnMut(I) -> IResult<I, O, E>
where
F: Parser<I, O, E>,
{
move |i: I| match f.parse(i.clone()) {
Ok(o) => Ok(o),
Err(nom::Err::Incomplete(i)) => Err(nom::Err::Incomplete(i)),
Err(nom::Err::Error(e)) => Err(nom::Err::Error(E::add_expectative(i, expected, e))),
Err(nom::Err::Failure(e)) => Err(nom::Err::Failure(E::add_expectative(i, expected, e))),
}
}
fn operators<'a, E: ParseError<&'a str> + ExpectativeError<&'a str>>(
input: &'a str,
) -> IResult<&'a str, Operator, E> {
expect(
Expected::Operators,
alt((
value(Operator::RegexMatches, tag("=~")),
value(Operator::Equals, alt((tag("=="), tag("=")))),
value(Operator::NotRegexMatches, tag("!~")),
value(Operator::NotEquals, tag("!=")),
value(Operator::LessThanOrEquals, tag("<=")),
value(Operator::GreaterThanOrEquals, tag(">=")),
value(Operator::LessThan, tag("<")),
value(Operator::GreaterThan, tag(">")),
value(Operator::Between, tag("between")),
value(Operator::Contains, tag("#")),
value(Operator::NotContains, tag("!#")),
)),
)(input)
}
fn quoted_string<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&'a str, Value, E> {
let empty_string = value(String::new(), tag("\"\""));
let nonempty_string = |input| {
let (leftovers, chr) = delimited(
tag("\""),
escaped(is_not("\\\""), '\\', take(1usize)),
tag("\""),
)(input)?;
Ok((leftovers, String::from(chr)))
};
map(alt((nonempty_string, empty_string)), Value::new)(input)
}
fn number<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&'a str, &'a str, E> {
recognize(tuple((opt(tag("-")), take_while1(|c| is_digit(c as u8)))))(input)
}
fn range<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&'a str, Value, E> {
separated_pair(number, tag(":"), number)(input)
.map(|(leftovers, (a, b))| (leftovers, Value::new(format!("{a}:{b}"))))
}
fn space0<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&'a str, &'a str, E> {
take_while(|c| c == ' ')(input)
}
fn space1<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&'a str, &'a str, E> {
take_while1(|c| c == ' ')(input)
}
fn comparison<'a, E: ParseError<&'a str> + ExpectativeError<&'a str>>(
input: &'a str,
) -> IResult<&'a str, Expression, E> {
let mut attribute_name = expect(
Expected::AttributeName,
take_while1(|c| is_alphanumeric(c as u8) || c == '_' || c == '-' || c == '.'),
);
let (input, attr) = attribute_name(input)?;
let attribute = attr.to_string();
let (input, _) = space0(input)?;
let (input, op) = operators(input)?;
let (input, _) = space0(input)?;
let (leftovers, value) = expect(
Expected::Value,
alt((
quoted_string,
range,
map(number, |n| Value::new(n.to_string())),
)),
)(input)?;
Ok((
leftovers,
Expression::Comparison {
attribute,
op,
value,
},
))
}
fn parens<'a, E: ParseError<&'a str> + ExpectativeError<&'a str>>(
input: &'a str,
) -> IResult<&'a str, Expression, E> {
let (input, _) = tag("(")(input)?;
let (input, _) = space0(input)?;
let (input, result) = alt((expression, parens, comparison))(input)?;
let (input, _) = space0(input)?;
let (leftovers, _) = tag(")")(input)?;
Ok((leftovers, result))
}
fn space_after_logop<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&'a str, &'a str, E> {
let opt_space_and_paren = recognize(tuple((space0, tag("("))));
let parser = alt((opt_space_and_paren, space1));
peek(parser)(input)
}
fn expression<'a, E: ParseError<&'a str> + ExpectativeError<&'a str>>(
input: &'a str,
) -> IResult<&'a str, Expression, E> {
#[derive(Clone)]
enum Op {
And,
Or,
}
let (input, left) = alt((parens, comparison))(input)?;
let (input, _) = space0(input)?;
let (input, op) = terminated(
alt((value(Op::And, tag("and")), value(Op::Or, tag("or")))),
space_after_logop,
)(input)?;
let (input, _) = space0(input)?;
let (leftovers, right) = alt((expression, parens, comparison))(input)?;
let op = match op {
Op::And => Expression::And(Box::new(left), Box::new(right)),
Op::Or => Expression::Or(Box::new(left), Box::new(right)),
};
Ok((leftovers, op))
}
fn parser<'a, E: ParseError<&'a str> + ExpectativeError<&'a str>>(
input: &'a str,
) -> IResult<&'a str, Expression, E> {
let parsers = alt((expression, parens, comparison));
let parsers = delimited(space0, parsers, space0);
complete(parsers)(input)
}
fn internal_parse(expr: &str) -> Result<Expression, Error> {
match parser::<FilterParserError>(expr) {
Ok((leftovers, expression)) => {
if leftovers.is_empty() {
Ok(expression)
} else {
Err(Error::TrailingCharacters(expr.offset(leftovers), leftovers))
}
}
Err(error) => {
let handler = |e: FilterParserError| -> Error {
for (chunk, err) in e.errors {
let pos = expr.offset(chunk);
match err {
FilterParserErrorKind::Unexpected(expected) => {
return Error::AtPos(pos, expected)
}
_ => continue,
}
}
Error::Internal
};
match error {
nom::Err::Incomplete(_) => {
panic!("Got nom::Err::Incomplete despite wrapping the parser into `complete()`")
}
nom::Err::Error(e) => Err(handler(e)),
nom::Err::Failure(e) => Err(handler(e)),
}
}
}
}
pub fn parse(expr: &str) -> Result<Expression, String> {
match internal_parse(expr) {
Ok(expression) => Ok(expression),
Err(error) => {
let err = match error {
Error::TrailingCharacters(pos, tail) => fmt!(
&gettext("Parse error: trailing characters after position %s: %s"),
&pos.to_string(),
tail
),
Error::AtPos(pos, expected) => fmt!(
&gettext("Parse error at position %s: expected %s"),
&pos.to_string(),
&translate_expected(expected)
),
Error::Internal => fmt!(&gettext("Internal parse error")),
};
Err(err)
}
}
}
#[cfg(test)]
mod tests {
use super::{Expression::*, *};
#[test]
fn t_error_on_invalid_queries() {
assert_eq!(
internal_parse("title =¯ \"foo\""),
Err(Error::AtPos(7, Expected::Value))
);
assert_eq!(
internal_parse("a = \"b"),
Err(Error::AtPos(4, Expected::Value))
);
assert_eq!(
internal_parse("a = b"),
Err(Error::AtPos(4, Expected::Value))
);
assert_eq!(
internal_parse("a !! \"b\""),
Err(Error::AtPos(2, Expected::Operators))
);
assert_eq!(
internal_parse("((a=\"b\")))"),
Err(Error::TrailingCharacters(9, ")"))
);
assert_eq!(
internal_parse("AAAA between 0:15:30"),
Err(Error::TrailingCharacters(17, ":30"))
);
assert_eq!(
internal_parse("x = 42andy=0"),
Err(Error::TrailingCharacters(6, "andy=0"))
);
assert_eq!(
internal_parse("x = 42 andy=0"),
Err(Error::TrailingCharacters(7, "andy=0"))
);
assert_eq!(
internal_parse("=!"),
Err(Error::AtPos(0, Expected::AttributeName))
);
}
#[test]
fn t_no_error_on_valid_queries() {
assert!(internal_parse("a = \"b\"").is_ok());
assert!(internal_parse("(a=\"b\")").is_ok());
assert!(internal_parse("((a=\"b\"))").is_ok());
assert!(internal_parse("a != \"b\"").is_ok());
assert!(internal_parse("a =~ \"b\"").is_ok());
assert!(internal_parse("a !~ \"b\"").is_ok());
assert!(internal_parse(
"( a = \"b\") and ( b = \"c\" ) or ( ( c != \"d\" ) and ( c !~ \"asdf\" )) or c != \"xx\"").is_ok());
}
#[test]
fn t_both_equals_and_double_equals_are_accepted() {
let expected = Ok(Expression::Comparison {
attribute: "a".to_string(),
op: Operator::Equals,
value: Value::new("abc".to_string()),
});
assert_eq!(internal_parse("a = \"abc\""), expected);
assert_eq!(internal_parse("a == \"abc\""), expected);
}
#[test]
fn t_filterparser_disallows_nul_byte() {
assert!(internal_parse("attri\0bute = 0").is_err());
assert!(internal_parse("attribute\0= 0").is_err());
assert!(internal_parse("attribute = \0").is_err());
assert!(internal_parse("attribute = \\\"\0\\\"").is_err());
assert_eq!(
internal_parse("attribute = \"hello\0world\""),
Ok(Expression::Comparison {
attribute: "attribute".to_string(),
op: Operator::Equals,
value: Value::new("hello\0world".to_string()),
})
);
}
#[test]
fn t_parses_empty_string_literals() {
let expected = Ok(Expression::Comparison {
attribute: "title".to_string(),
op: Operator::Equals,
value: Value::new(String::new()),
});
assert_eq!(internal_parse("title==\"\""), expected);
}
#[test]
fn t_and_operator_requires_space_or_paren_after_it() {
assert!(internal_parse("a=42andy=0").is_err());
assert!(internal_parse("(a=42)andy=0").is_err());
assert!(internal_parse("a=42 andy=0").is_err());
let expected_tree = And(
Box::new(Comparison {
attribute: "a".to_string(),
op: Operator::Equals,
value: Value::new("42".to_string()),
}),
Box::new(Comparison {
attribute: "y".to_string(),
op: Operator::Equals,
value: Value::new("0".to_string()),
}),
);
assert_eq!(internal_parse("a=42and(y=0)"), Ok(expected_tree.clone()));
assert_eq!(internal_parse("(a=42)and(y=0)"), Ok(expected_tree.clone()));
assert_eq!(internal_parse("a=42and y=0"), Ok(expected_tree));
}
#[test]
fn t_or_operator_requires_space_or_paren_after_it() {
assert!(internal_parse("a=42ory=0").is_err());
assert!(internal_parse("(a=42)ory=0").is_err());
assert!(internal_parse("a=42 ory=0").is_err());
let expected_tree = Or(
Box::new(Comparison {
attribute: "a".to_string(),
op: Operator::Equals,
value: Value::new("42".to_string()),
}),
Box::new(Comparison {
attribute: "y".to_string(),
op: Operator::Equals,
value: Value::new("0".to_string()),
}),
);
assert_eq!(internal_parse("a=42or(y=0)"), Ok(expected_tree.clone()));
assert_eq!(internal_parse("(a=42)or(y=0)"), Ok(expected_tree.clone()));
assert_eq!(internal_parse("a=42or y=0"), Ok(expected_tree));
}
#[test]
fn t_space_chars_in_filter_expr_dont_affect_parsing() {
let expected = Comparison {
attribute: "array".to_string(),
op: Operator::Contains,
value: Value::new("bar".to_string()),
};
assert_eq!(internal_parse("array # \"bar\""), Ok(expected.clone()));
assert_eq!(internal_parse(" array # \"bar\""), Ok(expected.clone()));
assert_eq!(internal_parse("array # \"bar\""), Ok(expected.clone()));
assert_eq!(internal_parse("array # \"bar\""), Ok(expected.clone()));
assert_eq!(internal_parse("array # \"bar\" "), Ok(expected.clone()));
assert_eq!(internal_parse("array# \"bar\" "), Ok(expected.clone()));
assert_eq!(
internal_parse(" array # \"bar\" "),
Ok(expected)
);
}
#[test]
fn t_only_space_characters_are_considered_whitespace_by_filter_parser() {
assert_eq!(
internal_parse("attr\t= \"value\""),
Err(Error::AtPos(4, Expected::Operators))
);
assert_eq!(
internal_parse("attr =\t\"value\""),
Err(Error::AtPos(6, Expected::Value))
);
assert_eq!(
internal_parse("attr\n=\t\"value\""),
Err(Error::AtPos(4, Expected::Operators))
);
assert_eq!(
internal_parse("attr\u{b}=\"value\""),
Err(Error::AtPos(4, Expected::Operators))
);
assert_eq!(
internal_parse("attr=\"value\"\r\n"),
Err(Error::TrailingCharacters(12, "\r\n"))
);
}
#[test]
fn t_whitespace_before_and_or_operators_is_not_required() {
assert_eq!(
internal_parse("x = 42and y=0"),
Ok(And(
Box::new(Comparison {
attribute: "x".to_string(),
op: Operator::Equals,
value: Value::new("42".to_string())
}),
Box::new(Comparison {
attribute: "y".to_string(),
op: Operator::Equals,
value: Value::new("0".to_string())
})
))
);
assert_eq!(
internal_parse("x = \"42\"and y=0"),
Ok(And(
Box::new(Comparison {
attribute: "x".to_string(),
op: Operator::Equals,
value: Value::new("42".to_string())
}),
Box::new(Comparison {
attribute: "y".to_string(),
op: Operator::Equals,
value: Value::new("0".to_string())
})
))
);
assert_eq!(
internal_parse("x = \"42\"or y=42"),
Ok(Or(
Box::new(Comparison {
attribute: "x".to_string(),
op: Operator::Equals,
value: Value::new("42".to_string())
}),
Box::new(Comparison {
attribute: "y".to_string(),
op: Operator::Equals,
value: Value::new("42".to_string())
})
))
);
}
#[test]
fn t_parses_simple_queries() {
assert_eq!(
internal_parse("a = \"b\""),
Ok(Comparison {
attribute: "a".to_string(),
op: Operator::Equals,
value: Value::new("b".to_string())
})
);
assert_eq!(
internal_parse("(a!=\"b\")"),
Ok(Comparison {
attribute: "a".to_string(),
op: Operator::NotEquals,
value: Value::new("b".to_string())
})
);
assert_eq!(
internal_parse("((a=~\"b\"))"),
Ok(Comparison {
attribute: "a".to_string(),
op: Operator::RegexMatches,
value: Value::new("b".to_string())
})
);
assert_eq!(
internal_parse("a !~ \"b\""),
Ok(Comparison {
attribute: "a".to_string(),
op: Operator::NotRegexMatches,
value: Value::new("b".to_string())
})
);
assert_eq!(
internal_parse("a < \"b\""),
Ok(Comparison {
attribute: "a".to_string(),
op: Operator::LessThan,
value: Value::new("b".to_string())
})
);
assert_eq!(
internal_parse("a <= \"b\""),
Ok(Comparison {
attribute: "a".to_string(),
op: Operator::LessThanOrEquals,
value: Value::new("b".to_string())
})
);
assert_eq!(
internal_parse("a > \"abc\""),
Ok(Comparison {
attribute: "a".to_string(),
op: Operator::GreaterThan,
value: Value::new("abc".to_string())
})
);
assert_eq!(
internal_parse("a == \"abc\""),
Ok(Comparison {
attribute: "a".to_string(),
op: Operator::Equals,
value: Value::new("abc".to_string())
})
);
assert_eq!(
internal_parse("a >= 3"),
Ok(Comparison {
attribute: "a".to_string(),
op: Operator::GreaterThanOrEquals,
value: Value::new("3".to_string())
})
);
assert_eq!(
internal_parse("some_value between 0:-1"),
Ok(Comparison {
attribute: "some_value".to_string(),
op: Operator::Between,
value: Value::new("0:-1".to_string())
})
);
assert_eq!(
internal_parse("other_string between \"impossible\""),
Ok(Comparison {
attribute: "other_string".to_string(),
op: Operator::Between,
value: Value::new("impossible".to_string())
})
);
assert_eq!(
internal_parse("array # \"name\""),
Ok(Comparison {
attribute: "array".to_string(),
op: Operator::Contains,
value: Value::new("name".to_string())
})
);
assert_eq!(
internal_parse("answers !# 42"),
Ok(Comparison {
attribute: "answers".to_string(),
op: Operator::NotContains,
value: Value::new("42".to_string())
})
);
assert_eq!(
internal_parse("author =~ \"\\s*Doe$\""),
Ok(Comparison {
attribute: "author".to_string(),
op: Operator::RegexMatches,
value: Value::new("\\s*Doe$".to_string())
})
);
}
#[test]
fn t_parses_complex_queries() {
assert_eq!(
internal_parse("a = \"b\" and b = \"c\" or c = \"d\"").unwrap(),
And(
Box::new(Comparison {
attribute: "a".to_string(),
op: Operator::Equals,
value: Value::new("b".to_string())
}),
Box::new(Or(
Box::new(Comparison {
attribute: "b".to_string(),
op: Operator::Equals,
value: Value::new("c".to_string())
}),
Box::new(Comparison {
attribute: "c".to_string(),
op: Operator::Equals,
value: Value::new("d".to_string())
}),
))
)
);
assert_eq!(
internal_parse("a = \"b\" or b = \"c\" and c = \"d\"").unwrap(),
Or(
Box::new(Comparison {
attribute: "a".to_string(),
op: Operator::Equals,
value: Value::new("b".to_string())
}),
Box::new(And(
Box::new(Comparison {
attribute: "b".to_string(),
op: Operator::Equals,
value: Value::new("c".to_string())
}),
Box::new(Comparison {
attribute: "c".to_string(),
op: Operator::Equals,
value: Value::new("d".to_string())
}),
))
)
);
assert_eq!(
internal_parse("(a = \"b\" or b = \"c\") and c = \"d\"").unwrap(),
And(
Box::new(Or(
Box::new(Comparison {
attribute: "a".to_string(),
op: Operator::Equals,
value: Value::new("b".to_string())
}),
Box::new(Comparison {
attribute: "b".to_string(),
op: Operator::Equals,
value: Value::new("c".to_string())
}),
)),
Box::new(Comparison {
attribute: "c".to_string(),
op: Operator::Equals,
value: Value::new("d".to_string())
})
)
);
assert!(internal_parse(
"( a = \"b\") and ( b = \"c\" ) or ( ( c != \"d\" ) and ( c !~ \"asdf\" )) or c != \"xx\""
).is_ok());
}
#[test]
fn t_ranges_accept_negative_numbers() {
assert_eq!(
internal_parse("value between -100:-1"),
Ok(Comparison {
attribute: "value".to_string(),
op: Operator::Between,
value: Value::new("-100:-1".to_string())
})
);
assert_eq!(
internal_parse("value between -100:100500"),
Ok(Comparison {
attribute: "value".to_string(),
op: Operator::Between,
value: Value::new("-100:100500".to_string())
})
);
assert_eq!(
internal_parse("value between 123:-10"),
Ok(Comparison {
attribute: "value".to_string(),
op: Operator::Between,
value: Value::new("123:-10".to_string())
})
);
}
proptest::proptest! {
#[test]
fn does_not_crash_on_any_input(ref input in "\\PC*") {
let _ = internal_parse(input);
}
#[test]
fn whitespace_doesnt_affect_results_1(ref input in r#" *a *!= *"b" *"#) {
assert_eq!(
internal_parse(input),
Ok(Comparison {
attribute: "a".to_string(),
op: Operator::NotEquals,
value: Value::new("b".to_string())
})
);
}
#[test]
fn whitespace_doesnt_affect_results_2(ref input in r#" *( *a *!= *"b" *) *"#) {
assert_eq!(
internal_parse(input),
Ok(Comparison {
attribute: "a".to_string(),
op: Operator::NotEquals,
value: Value::new("b".to_string())
})
);
}
#[test]
fn attribute_names_can_contain_alphanumerics_underscore_dash_and_dot(ref input in r#"[-A-Za-z0-9_.]+ == 0"#) {
assert!(
internal_parse(input).is_ok(),
);
}
#[test]
fn no_internal_parsing_errors(ref input in "\\PC*") {
assert_ne!(internal_parse(input), Err(Error::Internal));
}
}
}