tytanic_filter/ast/
mod.rs

1//! Test set AST types.
2
3use std::char::CharTryFromError;
4use std::sync::LazyLock;
5
6use pest::iterators::Pair;
7use pest::pratt_parser::PrattParser;
8use pest::Parser;
9use thiserror::Error;
10use tytanic_utils::fmt::Separators;
11
12mod atom;
13mod expr;
14mod func;
15mod glob;
16mod id;
17mod num;
18mod parser;
19mod pat;
20mod regex;
21mod str;
22
23// This is an internal re-export and should _never_ leak outside this module.
24use parser::Rule;
25
26pub use self::atom::Atom;
27pub use self::expr::{Expr, InfixOp, PrefixOp};
28pub use self::func::Func;
29pub use self::glob::Glob;
30pub use self::id::Id;
31pub use self::num::Num;
32pub use self::pat::Pat;
33pub use self::regex::Regex;
34pub use self::str::Str;
35
36/// The pratt parser defining the operator precedence.
37pub(super) static PRATT_PARSER: LazyLock<PrattParser<Rule>> = LazyLock::new(|| {
38    use pest::pratt_parser::{Assoc, Op};
39
40    PrattParser::new()
41        .op(Op::infix(Rule::infix_op_pipe, Assoc::Left) | Op::infix(Rule::infix_op_or, Assoc::Left))
42        .op(Op::infix(Rule::infix_op_amper, Assoc::Left)
43            | Op::infix(Rule::infix_op_and, Assoc::Left))
44        .op(Op::infix(Rule::infix_op_tilde, Assoc::Left)
45            | Op::infix(Rule::infix_op_diff, Assoc::Left))
46        .op(Op::infix(Rule::infix_op_caret, Assoc::Left)
47            | Op::infix(Rule::infix_op_xor, Assoc::Left))
48        .op(Op::prefix(Rule::prefix_op_excl) | Op::prefix(Rule::prefix_op_not))
49});
50
51/// Parse the given input into a test set expression.
52#[tracing::instrument(ret)]
53pub fn parse(input: &str) -> Result<Expr, Error> {
54    // unwrap main into its root level expr, removing the EOI pair
55    let root_expr = parser::ExpressionParser::parse(Rule::main, input)
56        .map_err(|err| {
57            Box::new(err.renamed_rules(|r| r.token().to_owned()))
58                as Box<dyn std::error::Error + Send + Sync + 'static>
59        })?
60        .next()
61        .unwrap()
62        .into_inner()
63        .next()
64        .unwrap();
65
66    Expr::parse(root_expr, &PRATT_PARSER)
67}
68
69/// An error for parsing failures.
70#[derive(Debug, Error)]
71pub enum Error {
72    /// The input ended unexpectedly.
73    #[error(
74        "expected one of {}, found nothing",
75        Separators::comma_or().with(expected),
76    )]
77    UnexpectedEOI {
78        /// The expected rules.
79        expected: Vec<&'static str>,
80    },
81
82    /// Expected no further input, but found some.
83    #[error("expected no further pairs, found {found}")]
84    ExpectedEOI {
85        /// The rule that was found.
86        found: &'static str,
87    },
88
89    /// Expected a certain set of rules. but found a different rule.
90    #[error(
91        "expected one of {}, found {found}",
92        Separators::comma_or().with(expected),
93    )]
94    UnexpectedRules {
95        /// The expected rules
96        expected: Vec<&'static str>,
97
98        /// The rule that was found.
99        found: &'static str,
100    },
101
102    /// A string escape did not describe a valid unicode code point.
103    #[error("a string escape did not describe a valid unicode code point")]
104    UnicodeEscape(#[from] CharTryFromError),
105
106    /// A regex pattern could not be parsed.
107    #[error("a regex pattern could not be parsed")]
108    Regex(#[from] ::regex::Error),
109
110    /// A glob pattern could not be parsed.
111    #[error("a glob pattern could not be parsed")]
112    Glob(#[from] ::glob::PatternError),
113
114    /// Some other error occured.
115    #[error("the expression could not be parsed")]
116    Other(#[from] Box<dyn std::error::Error + Send + Sync + 'static>),
117}
118
119/// An extension trait for pest iterators and its adapters.
120pub trait PairsExt<'a> {
121    /// If there is another pair ensure it is of the expected rules.
122    fn try_expect_pair(&mut self, rules: &[Rule]) -> Result<Option<Pair<'a, Rule>>, Error>;
123
124    /// Ensure there is a pair of one of the expected rules.
125    fn expect_pair(&mut self, rules: &[Rule]) -> Result<Pair<'a, Rule>, Error>;
126
127    /// Ensure there are no further pairs.
128    fn expect_end(&mut self) -> Result<(), Error>;
129}
130
131impl<'a, I> PairsExt<'a> for I
132where
133    I: Iterator<Item = Pair<'a, Rule>>,
134{
135    fn try_expect_pair(&mut self, rules: &[Rule]) -> Result<Option<Pair<'a, Rule>>, Error> {
136        self.next()
137            .map(|pair| pair.expect_rules(rules).map(|_| pair))
138            .transpose()
139    }
140
141    fn expect_pair(&mut self, rules: &[Rule]) -> Result<Pair<'a, Rule>, Error> {
142        self.next()
143            .ok_or_else(|| Error::UnexpectedEOI {
144                expected: rules.iter().map(|r| r.name()).collect(),
145            })
146            .and_then(|pair| pair.expect_rules(rules).map(|_| pair))
147    }
148
149    fn expect_end(&mut self) -> Result<(), Error> {
150        if let Some(pair) = self.next() {
151            return Err(Error::ExpectedEOI {
152                found: pair.as_rule().name(),
153            });
154        }
155
156        Ok(())
157    }
158}
159
160/// An extension trait for the [`Pair`] type.
161pub trait PairExt<'a> {
162    fn expect_rules(&self, rule: &[Rule]) -> Result<(), Error>;
163}
164
165impl<'a> PairExt<'a> for Pair<'a, Rule> {
166    fn expect_rules(&self, rules: &[Rule]) -> Result<(), Error> {
167        if !rules.contains(&self.as_rule()) {
168            return Err(Error::UnexpectedRules {
169                expected: rules.iter().map(|r| r.name()).collect(),
170                found: self.as_rule().name(),
171            });
172        }
173
174        Ok(())
175    }
176}
177
178#[cfg(test)]
179mod tests {
180    use std::sync::Arc;
181
182    use ecow::eco_vec;
183
184    use super::*;
185
186    // TODO(tinger): test failures
187
188    #[test]
189    fn test_parse_single_string() {
190        assert_eq!(
191            parse(r#"'a string \'"#).unwrap(),
192            Expr::Atom(Atom::Str(r#"a string \"#.into()))
193        );
194    }
195
196    #[test]
197    fn test_parse_double_string() {
198        assert_eq!(
199            parse(r#""a string \" \u{30}""#).unwrap(),
200            Expr::Atom(Atom::Str(r#"a string " 0"#.into()))
201        );
202    }
203
204    #[test]
205    fn test_parse_identifier() {
206        assert_eq!(
207            parse("abc").unwrap(),
208            Expr::Atom(Atom::Id(Id("abc".into())))
209        );
210        assert_eq!(
211            parse("a-bc").unwrap(),
212            Expr::Atom(Atom::Id(Id("a-bc".into())))
213        );
214        assert_eq!(
215            parse("a__bc-").unwrap(),
216            Expr::Atom(Atom::Id(Id("a__bc-".into())))
217        );
218    }
219
220    #[test]
221    fn test_parse_number() {
222        assert_eq!(parse("1234").unwrap(), Expr::Atom(Atom::Num(1234.into())));
223        assert_eq!(parse("1_000").unwrap(), Expr::Atom(Atom::Num(1000.into())));
224    }
225
226    #[test]
227    fn test_parse_pattern_string() {
228        assert_eq!(
229            parse("r:'^abc*$'").unwrap(),
230            Expr::Atom(Atom::Pat(Pat::Regex(Regex::new("^abc*$").unwrap())))
231        );
232        assert_eq!(
233            parse(r#"glob:"a/**/b""#).unwrap(),
234            Expr::Atom(Atom::Pat(Pat::Glob(Glob::new("a/**/b").unwrap())))
235        );
236    }
237
238    #[test]
239    fn test_parse_pattern_raw() {
240        assert_eq!(
241            parse("g:a/**/b").unwrap(),
242            Expr::Atom(Atom::Pat(Pat::Glob(Glob::new("a/**/b").unwrap())))
243        );
244        assert_eq!(
245            parse("e:a/b").unwrap(),
246            Expr::Atom(Atom::Pat(Pat::Exact("a/b".into())))
247        );
248    }
249
250    #[test]
251    fn test_parse_pattern_in_args() {
252        assert_eq!(
253            parse("func(e:foo)").unwrap(),
254            Expr::Func(Func {
255                id: Id("func".into()),
256                args: eco_vec![Expr::Atom(Atom::Pat(Pat::Exact(Str("foo".into()))))],
257            })
258        );
259        assert_eq!(
260            parse("func(e:foo, bar)").unwrap(),
261            Expr::Func(Func {
262                id: Id("func".into()),
263                args: eco_vec![
264                    Expr::Atom(Atom::Pat(Pat::Exact(Str("foo".into())))),
265                    Expr::Atom(Atom::Id(Id("bar".into()))),
266                ],
267            })
268        );
269    }
270
271    #[test]
272    fn test_parse_func_no_args() {
273        assert_eq!(
274            parse("func()").unwrap(),
275            Expr::Func(Func {
276                id: Id("func".into()),
277                args: eco_vec![],
278            })
279        );
280        assert_eq!(
281            parse("func(  )").unwrap(),
282            Expr::Func(Func {
283                id: Id("func".into()),
284                args: eco_vec![],
285            })
286        );
287    }
288
289    #[test]
290    fn test_parse_func_simple_args() {
291        assert_eq!(
292            parse("func( 1  , e:'a/b')").unwrap(),
293            Expr::Func(Func {
294                id: Id("func".into()),
295                args: eco_vec![
296                    Expr::Atom(Atom::Num(1.into())),
297                    Expr::Atom(Atom::Pat(Pat::Exact("a/b".into())))
298                ],
299            })
300        );
301    }
302
303    #[test]
304    fn test_parse_prefix_expression() {
305        assert_eq!(
306            parse("! not 0").unwrap(),
307            Expr::Prefix {
308                op: PrefixOp::Not,
309                expr: Arc::new(Expr::Prefix {
310                    op: PrefixOp::Not,
311                    expr: Arc::new(Expr::Atom(Atom::Num(Num(0)))),
312                }),
313            }
314        );
315    }
316
317    #[test]
318    fn test_parse_infix_expression() {
319        assert_eq!(
320            parse("0 and 1 or 2").unwrap(),
321            Expr::Infix {
322                op: InfixOp::Union,
323                lhs: Arc::new(Expr::Infix {
324                    op: InfixOp::Inter,
325                    lhs: Arc::new(Expr::Atom(Atom::Num(Num(0)))),
326                    rhs: Arc::new(Expr::Atom(Atom::Num(Num(1)))),
327                }),
328                rhs: Arc::new(Expr::Atom(Atom::Num(Num(2)))),
329            }
330        );
331
332        assert_eq!(
333            parse("0 and (1 or 2)").unwrap(),
334            Expr::Infix {
335                op: InfixOp::Inter,
336                lhs: Arc::new(Expr::Atom(Atom::Num(Num(0)))),
337                rhs: Arc::new(Expr::Infix {
338                    op: InfixOp::Union,
339                    lhs: Arc::new(Expr::Atom(Atom::Num(Num(1)))),
340                    rhs: Arc::new(Expr::Atom(Atom::Num(Num(2)))),
341                }),
342            }
343        );
344    }
345
346    #[test]
347    fn test_parse_expression() {
348        assert_eq!(
349            parse("regex:'abc' and not (4_2 | func(0))").unwrap(),
350            Expr::Infix {
351                op: InfixOp::Inter,
352                lhs: Arc::new(Expr::Atom(Atom::Pat(Pat::Regex(
353                    Regex::new("abc").unwrap()
354                )))),
355                rhs: Arc::new(Expr::Prefix {
356                    op: PrefixOp::Not,
357                    expr: Arc::new(Expr::Infix {
358                        op: InfixOp::Union,
359                        lhs: Arc::new(Expr::Atom(Atom::Num(Num(42)))),
360                        rhs: Arc::new(Expr::Func(Func {
361                            id: Id("func".into()),
362                            args: eco_vec![Expr::Atom(Atom::Num(Num(0)))]
363                        })),
364                    }),
365                }),
366            }
367        );
368    }
369}