tytanic_filter/ast/
mod.rs

1//! Test set AST types.
2
3use std::char::CharTryFromError;
4use std::sync::LazyLock;
5
6use pest::Parser;
7use pest::iterators::Pair;
8use pest::pratt_parser::PrattParser;
9use thiserror::Error;
10use tytanic_utils::fmt::Separators;
11
12mod atom;
13mod expr;
14mod func;
15mod glob;
16mod id;
17mod num;
18mod parser;
19mod pat;
20mod regex;
21mod str;
22
23// This is an internal re-export and should _never_ leak outside this module.
24use parser::Rule;
25
26pub use self::atom::Atom;
27pub use self::expr::Expr;
28pub use self::expr::InfixOp;
29pub use self::expr::PrefixOp;
30pub use self::func::Func;
31pub use self::glob::Glob;
32pub use self::id::Id;
33pub use self::num::Num;
34pub use self::pat::Pat;
35pub use self::regex::Regex;
36pub use self::str::Str;
37
38/// The pratt-parser defining the operator precedence.
39pub(super) static PRATT_PARSER: LazyLock<PrattParser<Rule>> = LazyLock::new(|| {
40    use pest::pratt_parser::Assoc;
41    use pest::pratt_parser::Op;
42
43    PrattParser::new()
44        .op(Op::infix(Rule::infix_op_pipe, Assoc::Left) | Op::infix(Rule::infix_op_or, Assoc::Left))
45        .op(Op::infix(Rule::infix_op_amper, Assoc::Left)
46            | Op::infix(Rule::infix_op_and, Assoc::Left))
47        .op(Op::infix(Rule::infix_op_tilde, Assoc::Left)
48            | Op::infix(Rule::infix_op_diff, Assoc::Left))
49        .op(Op::infix(Rule::infix_op_caret, Assoc::Left)
50            | Op::infix(Rule::infix_op_xor, Assoc::Left))
51        .op(Op::prefix(Rule::prefix_op_excl) | Op::prefix(Rule::prefix_op_not))
52});
53
54/// Parse the given input into a test set expression.
55#[tracing::instrument(ret)]
56pub fn parse(input: &str) -> Result<Expr, Error> {
57    // Unwrap main into its root level expr, removing the EOI pair.
58    let root_expr = parser::ExpressionParser::parse(Rule::main, input)
59        .map_err(|err| {
60            Box::new(err.renamed_rules(|r| r.token().to_owned()))
61                as Box<dyn std::error::Error + Send + Sync + 'static>
62        })?
63        .next()
64        .unwrap()
65        .into_inner()
66        .next()
67        .unwrap();
68
69    Expr::parse(root_expr, &PRATT_PARSER)
70}
71
72/// An error for parsing failures.
73#[derive(Debug, Error)]
74pub enum Error {
75    /// The input ended unexpectedly.
76    #[error(
77        "expected one of {}, found nothing",
78        Separators::comma_or().with(expected),
79    )]
80    UnexpectedEOI {
81        /// The expected rules.
82        expected: Vec<&'static str>,
83    },
84
85    /// Expected no further input, but found some.
86    #[error("expected no further pairs, found {found}")]
87    ExpectedEOI {
88        /// The rule that was found.
89        found: &'static str,
90    },
91
92    /// Expected a certain set of rules. but found a different rule.
93    #[error(
94        "expected one of {}, found {found}",
95        Separators::comma_or().with(expected),
96    )]
97    UnexpectedRules {
98        /// The expected rules
99        expected: Vec<&'static str>,
100
101        /// The rule that was found.
102        found: &'static str,
103    },
104
105    /// A string escape did not describe a valid Unicode code point.
106    #[error("a string escape did not describe a valid unicode code point")]
107    UnicodeEscape(#[from] CharTryFromError),
108
109    /// A regex pattern could not be parsed.
110    #[error("a regex pattern could not be parsed")]
111    Regex(#[from] ::regex::Error),
112
113    /// A glob pattern could not be parsed.
114    #[error("a glob pattern could not be parsed")]
115    Glob(#[from] ::glob::PatternError),
116
117    /// Some other error occurred.
118    #[error("the expression could not be parsed")]
119    Other(#[from] Box<dyn std::error::Error + Send + Sync + 'static>),
120}
121
122/// An extension trait for pest iterators and its adapters.
123pub trait PairsExt<'a> {
124    /// If there is another pair ensure it is of the expected rules.
125    fn try_expect_pair(&mut self, rules: &[Rule]) -> Result<Option<Pair<'a, Rule>>, Error>;
126
127    /// Ensure there is a pair of one of the expected rules.
128    fn expect_pair(&mut self, rules: &[Rule]) -> Result<Pair<'a, Rule>, Error>;
129
130    /// Ensure there are no further pairs.
131    fn expect_end(&mut self) -> Result<(), Error>;
132}
133
134impl<'a, I> PairsExt<'a> for I
135where
136    I: Iterator<Item = Pair<'a, Rule>>,
137{
138    fn try_expect_pair(&mut self, rules: &[Rule]) -> Result<Option<Pair<'a, Rule>>, Error> {
139        self.next()
140            .map(|pair| pair.expect_rules(rules).map(|_| pair))
141            .transpose()
142    }
143
144    fn expect_pair(&mut self, rules: &[Rule]) -> Result<Pair<'a, Rule>, Error> {
145        self.next()
146            .ok_or_else(|| Error::UnexpectedEOI {
147                expected: rules.iter().map(|r| r.name()).collect(),
148            })
149            .and_then(|pair| pair.expect_rules(rules).map(|_| pair))
150    }
151
152    fn expect_end(&mut self) -> Result<(), Error> {
153        if let Some(pair) = self.next() {
154            return Err(Error::ExpectedEOI {
155                found: pair.as_rule().name(),
156            });
157        }
158
159        Ok(())
160    }
161}
162
163/// An extension trait for the [`Pair`] type.
164pub trait PairExt<'a> {
165    fn expect_rules(&self, rule: &[Rule]) -> Result<(), Error>;
166}
167
168impl<'a> PairExt<'a> for Pair<'a, Rule> {
169    fn expect_rules(&self, rules: &[Rule]) -> Result<(), Error> {
170        if !rules.contains(&self.as_rule()) {
171            return Err(Error::UnexpectedRules {
172                expected: rules.iter().map(|r| r.name()).collect(),
173                found: self.as_rule().name(),
174            });
175        }
176
177        Ok(())
178    }
179}
180
181#[cfg(test)]
182mod tests {
183    use std::sync::Arc;
184
185    use ecow::eco_vec;
186
187    use super::*;
188
189    // TODO(tinger): Test failures.
190
191    #[test]
192    fn test_parse_single_string() {
193        assert_eq!(
194            parse(r#"'a string \'"#).unwrap(),
195            Expr::Atom(Atom::Str(r#"a string \"#.into()))
196        );
197    }
198
199    #[test]
200    fn test_parse_double_string() {
201        assert_eq!(
202            parse(r#""a string \" \u{30}""#).unwrap(),
203            Expr::Atom(Atom::Str(r#"a string " 0"#.into()))
204        );
205    }
206
207    #[test]
208    fn test_parse_identifier() {
209        assert_eq!(
210            parse("abc").unwrap(),
211            Expr::Atom(Atom::Id(Id("abc".into())))
212        );
213        assert_eq!(
214            parse("a-bc").unwrap(),
215            Expr::Atom(Atom::Id(Id("a-bc".into())))
216        );
217        assert_eq!(
218            parse("a__bc-").unwrap(),
219            Expr::Atom(Atom::Id(Id("a__bc-".into())))
220        );
221    }
222
223    #[test]
224    fn test_parse_number() {
225        assert_eq!(parse("1234").unwrap(), Expr::Atom(Atom::Num(1234.into())));
226        assert_eq!(parse("1_000").unwrap(), Expr::Atom(Atom::Num(1000.into())));
227    }
228
229    #[test]
230    fn test_parse_pattern_string() {
231        assert_eq!(
232            parse("r:'^abc*$'").unwrap(),
233            Expr::Atom(Atom::Pat(Pat::Regex(Regex::new("^abc*$").unwrap())))
234        );
235        assert_eq!(
236            parse(r#"glob:"a/**/b""#).unwrap(),
237            Expr::Atom(Atom::Pat(Pat::Glob(Glob::new("a/**/b").unwrap())))
238        );
239    }
240
241    #[test]
242    fn test_parse_pattern_raw() {
243        assert_eq!(
244            parse("g:a/**/b").unwrap(),
245            Expr::Atom(Atom::Pat(Pat::Glob(Glob::new("a/**/b").unwrap())))
246        );
247        assert_eq!(
248            parse("e:a/b").unwrap(),
249            Expr::Atom(Atom::Pat(Pat::Exact("a/b".into())))
250        );
251        assert_eq!(
252            parse("r:(a/b\\))").unwrap(),
253            Expr::Atom(Atom::Pat(Pat::Regex(Regex::new("(a/b\\))").unwrap())))
254        );
255        assert_eq!(
256            parse("r:(a/b{3,4})").unwrap(),
257            Expr::Atom(Atom::Pat(Pat::Regex(Regex::new("(a/b{3,4})").unwrap())))
258        );
259    }
260
261    #[test]
262    fn test_parse_pattern_raw_termination() {
263        assert_eq!(
264            parse("foo(e:bar)").unwrap(),
265            Expr::Func(Func {
266                id: Id("foo".into()),
267                args: eco_vec![Expr::Atom(Atom::Pat(Pat::Exact("bar".into())))]
268            }),
269        );
270        assert_eq!(
271            parse("foo(e:bar, r:qux(quuz))").unwrap(),
272            Expr::Func(Func {
273                id: Id("foo".into()),
274                args: eco_vec![
275                    Expr::Atom(Atom::Pat(Pat::Exact("bar".into()))),
276                    Expr::Atom(Atom::Pat(Pat::Regex(Regex::new("qux(quuz)").unwrap())))
277                ]
278            }),
279        );
280        assert_eq!(
281            parse("foo(e:bar, r:qux(quuz{3,4}))").unwrap(),
282            Expr::Func(Func {
283                id: Id("foo".into()),
284                args: eco_vec![
285                    Expr::Atom(Atom::Pat(Pat::Exact("bar".into()))),
286                    Expr::Atom(Atom::Pat(Pat::Regex(Regex::new("qux(quuz{3,4})").unwrap())))
287                ]
288            }),
289        );
290    }
291
292    #[test]
293    fn test_parse_pattern_in_args() {
294        assert_eq!(
295            parse("func(e:foo)").unwrap(),
296            Expr::Func(Func {
297                id: Id("func".into()),
298                args: eco_vec![Expr::Atom(Atom::Pat(Pat::Exact(Str("foo".into()))))],
299            })
300        );
301        assert_eq!(
302            parse("func(e:foo, bar)").unwrap(),
303            Expr::Func(Func {
304                id: Id("func".into()),
305                args: eco_vec![
306                    Expr::Atom(Atom::Pat(Pat::Exact(Str("foo".into())))),
307                    Expr::Atom(Atom::Id(Id("bar".into()))),
308                ],
309            })
310        );
311    }
312
313    #[test]
314    fn test_parse_func_no_args() {
315        assert_eq!(
316            parse("func()").unwrap(),
317            Expr::Func(Func {
318                id: Id("func".into()),
319                args: eco_vec![],
320            })
321        );
322        assert_eq!(
323            parse("func(  )").unwrap(),
324            Expr::Func(Func {
325                id: Id("func".into()),
326                args: eco_vec![],
327            })
328        );
329    }
330
331    #[test]
332    fn test_parse_func_simple_args() {
333        assert_eq!(
334            parse("func( 1  , e:'a/b')").unwrap(),
335            Expr::Func(Func {
336                id: Id("func".into()),
337                args: eco_vec![
338                    Expr::Atom(Atom::Num(1.into())),
339                    Expr::Atom(Atom::Pat(Pat::Exact("a/b".into())))
340                ],
341            })
342        );
343    }
344
345    #[test]
346    fn test_parse_prefix_expression() {
347        assert_eq!(
348            parse("! not 0").unwrap(),
349            Expr::Prefix {
350                op: PrefixOp::Not,
351                expr: Arc::new(Expr::Prefix {
352                    op: PrefixOp::Not,
353                    expr: Arc::new(Expr::Atom(Atom::Num(Num(0)))),
354                }),
355            }
356        );
357    }
358
359    #[test]
360    fn test_parse_infix_expression() {
361        assert_eq!(
362            parse("0 and 1 or 2").unwrap(),
363            Expr::Infix {
364                op: InfixOp::Union,
365                lhs: Arc::new(Expr::Infix {
366                    op: InfixOp::Inter,
367                    lhs: Arc::new(Expr::Atom(Atom::Num(Num(0)))),
368                    rhs: Arc::new(Expr::Atom(Atom::Num(Num(1)))),
369                }),
370                rhs: Arc::new(Expr::Atom(Atom::Num(Num(2)))),
371            }
372        );
373
374        assert_eq!(
375            parse("0 and (1 or 2)").unwrap(),
376            Expr::Infix {
377                op: InfixOp::Inter,
378                lhs: Arc::new(Expr::Atom(Atom::Num(Num(0)))),
379                rhs: Arc::new(Expr::Infix {
380                    op: InfixOp::Union,
381                    lhs: Arc::new(Expr::Atom(Atom::Num(Num(1)))),
382                    rhs: Arc::new(Expr::Atom(Atom::Num(Num(2)))),
383                }),
384            }
385        );
386    }
387
388    #[test]
389    fn test_parse_expression() {
390        assert_eq!(
391            parse("regex:'abc' and not (4_2 | func(0))").unwrap(),
392            Expr::Infix {
393                op: InfixOp::Inter,
394                lhs: Arc::new(Expr::Atom(Atom::Pat(Pat::Regex(
395                    Regex::new("abc").unwrap()
396                )))),
397                rhs: Arc::new(Expr::Prefix {
398                    op: PrefixOp::Not,
399                    expr: Arc::new(Expr::Infix {
400                        op: InfixOp::Union,
401                        lhs: Arc::new(Expr::Atom(Atom::Num(Num(42)))),
402                        rhs: Arc::new(Expr::Func(Func {
403                            id: Id("func".into()),
404                            args: eco_vec![Expr::Atom(Atom::Num(Num(0)))]
405                        })),
406                    }),
407                }),
408            }
409        );
410    }
411}