use std::char::CharTryFromError;
use std::sync::LazyLock;
use pest::iterators::Pair;
use pest::pratt_parser::PrattParser;
use pest::Parser;
use thiserror::Error;
use tytanic_utils::fmt::Separators;
mod atom;
mod expr;
mod func;
mod glob;
mod id;
mod num;
mod parser;
mod pat;
mod regex;
mod str;
use parser::Rule;
pub use self::atom::Atom;
pub use self::expr::{Expr, InfixOp, PrefixOp};
pub use self::func::Func;
pub use self::glob::Glob;
pub use self::id::Id;
pub use self::num::Num;
pub use self::pat::Pat;
pub use self::regex::Regex;
pub use self::str::Str;
pub(super) static PRATT_PARSER: LazyLock<PrattParser<Rule>> = LazyLock::new(|| {
use pest::pratt_parser::{Assoc, Op};
PrattParser::new()
.op(Op::infix(Rule::infix_op_pipe, Assoc::Left) | Op::infix(Rule::infix_op_or, Assoc::Left))
.op(Op::infix(Rule::infix_op_amper, Assoc::Left)
| Op::infix(Rule::infix_op_and, Assoc::Left))
.op(Op::infix(Rule::infix_op_tilde, Assoc::Left)
| Op::infix(Rule::infix_op_diff, Assoc::Left))
.op(Op::infix(Rule::infix_op_caret, Assoc::Left)
| Op::infix(Rule::infix_op_xor, Assoc::Left))
.op(Op::prefix(Rule::prefix_op_excl) | Op::prefix(Rule::prefix_op_not))
});
#[tracing::instrument(ret)]
pub fn parse(input: &str) -> Result<Expr, Error> {
let root_expr = parser::ExpressionParser::parse(Rule::main, input)
.map_err(|err| {
Box::new(err.renamed_rules(|r| r.token().to_owned()))
as Box<dyn std::error::Error + Send + Sync + 'static>
})?
.next()
.unwrap()
.into_inner()
.next()
.unwrap();
Expr::parse(root_expr, &PRATT_PARSER)
}
#[derive(Debug, Error)]
pub enum Error {
#[error(
"expected one of {}, found nothing",
Separators::comma_or().with(expected),
)]
UnexpectedEOI {
expected: Vec<&'static str>,
},
#[error("expected no further pairs, found {found}")]
ExpectedEOI {
found: &'static str,
},
#[error(
"expected one of {}, found {found}",
Separators::comma_or().with(expected),
)]
UnexpectedRules {
expected: Vec<&'static str>,
found: &'static str,
},
#[error("a string escape did not describe a valid unicode code point")]
UnicodeEscape(#[from] CharTryFromError),
#[error("a regex pattern could not be parsed")]
Regex(#[from] ::regex::Error),
#[error("a glob pattern could not be parsed")]
Glob(#[from] ::glob::PatternError),
#[error("the expression could not be parsed")]
Other(#[from] Box<dyn std::error::Error + Send + Sync + 'static>),
}
pub trait PairsExt<'a> {
fn try_expect_pair(&mut self, rules: &[Rule]) -> Result<Option<Pair<'a, Rule>>, Error>;
fn expect_pair(&mut self, rules: &[Rule]) -> Result<Pair<'a, Rule>, Error>;
fn expect_end(&mut self) -> Result<(), Error>;
}
impl<'a, I> PairsExt<'a> for I
where
I: Iterator<Item = Pair<'a, Rule>>,
{
fn try_expect_pair(&mut self, rules: &[Rule]) -> Result<Option<Pair<'a, Rule>>, Error> {
self.next()
.map(|pair| pair.expect_rules(rules).map(|_| pair))
.transpose()
}
fn expect_pair(&mut self, rules: &[Rule]) -> Result<Pair<'a, Rule>, Error> {
self.next()
.ok_or_else(|| Error::UnexpectedEOI {
expected: rules.iter().map(|r| r.name()).collect(),
})
.and_then(|pair| pair.expect_rules(rules).map(|_| pair))
}
fn expect_end(&mut self) -> Result<(), Error> {
if let Some(pair) = self.next() {
return Err(Error::ExpectedEOI {
found: pair.as_rule().name(),
});
}
Ok(())
}
}
pub trait PairExt<'a> {
fn expect_rules(&self, rule: &[Rule]) -> Result<(), Error>;
}
impl<'a> PairExt<'a> for Pair<'a, Rule> {
fn expect_rules(&self, rules: &[Rule]) -> Result<(), Error> {
if !rules.contains(&self.as_rule()) {
return Err(Error::UnexpectedRules {
expected: rules.iter().map(|r| r.name()).collect(),
found: self.as_rule().name(),
});
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use std::sync::Arc;
use ecow::eco_vec;
use super::*;
#[test]
fn test_parse_single_string() {
assert_eq!(
parse(r#"'a string \'"#).unwrap(),
Expr::Atom(Atom::Str(r#"a string \"#.into()))
);
}
#[test]
fn test_parse_double_string() {
assert_eq!(
parse(r#""a string \" \u{30}""#).unwrap(),
Expr::Atom(Atom::Str(r#"a string " 0"#.into()))
);
}
#[test]
fn test_parse_identifier() {
assert_eq!(
parse("abc").unwrap(),
Expr::Atom(Atom::Id(Id("abc".into())))
);
assert_eq!(
parse("a-bc").unwrap(),
Expr::Atom(Atom::Id(Id("a-bc".into())))
);
assert_eq!(
parse("a__bc-").unwrap(),
Expr::Atom(Atom::Id(Id("a__bc-".into())))
);
}
#[test]
fn test_parse_number() {
assert_eq!(parse("1234").unwrap(), Expr::Atom(Atom::Num(1234.into())));
assert_eq!(parse("1_000").unwrap(), Expr::Atom(Atom::Num(1000.into())));
}
#[test]
fn test_parse_pattern_string() {
assert_eq!(
parse("r:'^abc*$'").unwrap(),
Expr::Atom(Atom::Pat(Pat::Regex(Regex::new("^abc*$").unwrap())))
);
assert_eq!(
parse(r#"glob:"a/**/b""#).unwrap(),
Expr::Atom(Atom::Pat(Pat::Glob(Glob::new("a/**/b").unwrap())))
);
}
#[test]
fn test_parse_pattern_raw() {
assert_eq!(
parse("g:a/**/b").unwrap(),
Expr::Atom(Atom::Pat(Pat::Glob(Glob::new("a/**/b").unwrap())))
);
assert_eq!(
parse("e:a/b").unwrap(),
Expr::Atom(Atom::Pat(Pat::Exact("a/b".into())))
);
}
#[test]
fn test_parse_func_no_args() {
assert_eq!(
parse("func()").unwrap(),
Expr::Func(Func {
id: Id("func".into()),
args: eco_vec![],
})
);
assert_eq!(
parse("func( )").unwrap(),
Expr::Func(Func {
id: Id("func".into()),
args: eco_vec![],
})
);
}
#[test]
fn test_parse_func_simple_args() {
assert_eq!(
parse("func( 1 , e:'a/b')").unwrap(),
Expr::Func(Func {
id: Id("func".into()),
args: eco_vec![
Expr::Atom(Atom::Num(1.into())),
Expr::Atom(Atom::Pat(Pat::Exact("a/b".into())))
],
})
);
}
#[test]
fn test_parse_prefix_expression() {
assert_eq!(
parse("! not 0").unwrap(),
Expr::Prefix {
op: PrefixOp::Not,
expr: Arc::new(Expr::Prefix {
op: PrefixOp::Not,
expr: Arc::new(Expr::Atom(Atom::Num(Num(0)))),
}),
}
);
}
#[test]
fn test_parse_infix_expression() {
assert_eq!(
parse("0 and 1 or 2").unwrap(),
Expr::Infix {
op: InfixOp::Union,
lhs: Arc::new(Expr::Infix {
op: InfixOp::Inter,
lhs: Arc::new(Expr::Atom(Atom::Num(Num(0)))),
rhs: Arc::new(Expr::Atom(Atom::Num(Num(1)))),
}),
rhs: Arc::new(Expr::Atom(Atom::Num(Num(2)))),
}
);
assert_eq!(
parse("0 and (1 or 2)").unwrap(),
Expr::Infix {
op: InfixOp::Inter,
lhs: Arc::new(Expr::Atom(Atom::Num(Num(0)))),
rhs: Arc::new(Expr::Infix {
op: InfixOp::Union,
lhs: Arc::new(Expr::Atom(Atom::Num(Num(1)))),
rhs: Arc::new(Expr::Atom(Atom::Num(Num(2)))),
}),
}
);
}
#[test]
fn test_parse_expression() {
assert_eq!(
parse("regex:'abc' and not (4_2 | func(0))").unwrap(),
Expr::Infix {
op: InfixOp::Inter,
lhs: Arc::new(Expr::Atom(Atom::Pat(Pat::Regex(
Regex::new("abc").unwrap()
)))),
rhs: Arc::new(Expr::Prefix {
op: PrefixOp::Not,
expr: Arc::new(Expr::Infix {
op: InfixOp::Union,
lhs: Arc::new(Expr::Atom(Atom::Num(Num(42)))),
rhs: Arc::new(Expr::Func(Func {
id: Id("func".into()),
args: eco_vec![Expr::Atom(Atom::Num(Num(0)))]
})),
}),
}),
}
);
}
}