Skip to main content

dhttp_access/expr/
parse.rs

1use std::{fmt::Display, str::FromStr};
2
3use peg::{error::ParseError, str::LineCol};
4use snafu::ResultExt;
5
6use crate::{
7    expr::{atomics::*, eval::*, exprs::*},
8    pattern::{ClientNamePattern, NormalPattern, ParsePatternError},
9};
10
11pub enum Token<'t> {
12    Unquoted(&'t str),
13    Quoted(String),
14    Symbol(char),
15}
16
17impl<'t> Token<'t> {
18    pub fn len(&self) -> usize {
19        match self {
20            Token::Unquoted(s) => s.len(),
21            Token::Quoted(cow) => cow.len(),
22            Token::Symbol(c) => c.len_utf8(),
23        }
24    }
25
26    pub fn is_empty(&self) -> bool {
27        self.len() == 0
28    }
29}
30
31impl Display for Token<'_> {
32    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
33        match self {
34            Token::Unquoted(s) => s.fmt(f),
35            Token::Quoted(cow) => cow.fmt(f),
36            Token::Symbol(c) => c.fmt(f),
37        }
38    }
39}
40
41pub struct PositionedToken<'t> {
42    token: Token<'t>,
43    position: usize,
44}
45
46impl PartialEq<str> for Token<'_> {
47    fn eq(&self, other: &str) -> bool {
48        match self {
49            Token::Unquoted(lit) => lit.eq_ignore_ascii_case(other),
50            Token::Quoted(lit) => lit.eq_ignore_ascii_case(other),
51            Token::Symbol(c) => c.len_utf8() == other.len() && other.starts_with(*c),
52        }
53    }
54}
55
56pub struct TokenStream<'t> {
57    source: &'t str,
58    tokens: Vec<PositionedToken<'t>>,
59}
60
61impl peg::Parse for TokenStream<'_> {
62    type PositionRepr = LineCol;
63
64    fn start(&self) -> usize {
65        0
66    }
67
68    fn is_eof(&self, p: usize) -> bool {
69        p >= self.tokens.len()
70    }
71
72    fn position_repr(&self, p: usize) -> Self::PositionRepr {
73        let start = LineCol {
74            line: 1,
75            column: 1,
76            offset: 0,
77        };
78        if p >= self.tokens.len() {
79            self.tokens.last().map_or(start, |t| {
80                str::position_repr(self.source, t.position + t.token.len())
81            })
82        } else {
83            str::position_repr(self.source, self.tokens[p].position)
84        }
85    }
86}
87
88impl<'input> peg::ParseElem<'input> for TokenStream<'input> {
89    type Element = &'input Token<'input>;
90
91    fn parse_elem(&'input self, pos: usize) -> peg::RuleResult<Self::Element> {
92        match pos < self.tokens.len() {
93            true => peg::RuleResult::Matched(pos + 1, &self.tokens[pos].token),
94            false => peg::RuleResult::Failed,
95        }
96    }
97}
98
99impl<'input> peg::ParseLiteral for TokenStream<'input> {
100    fn parse_string_literal(&self, pos: usize, literal: &str) -> peg::RuleResult<()> {
101        match self.tokens.get(pos) {
102            Some(PositionedToken { token, .. }) if token == literal => {
103                peg::RuleResult::Matched(pos + 1, ())
104            }
105            _ => peg::RuleResult::Failed,
106        }
107    }
108}
109
110impl<'input> peg::ParseSlice<'input> for TokenStream<'input> {
111    type Slice = &'input [PositionedToken<'input>];
112
113    fn parse_slice(&'input self, p1: usize, p2: usize) -> Self::Slice {
114        &self.tokens[p1..p2]
115    }
116}
117
118peg::parser! {
119    grammar lexer() for str {
120        rule quoted_char() -> char =
121            quiet! {
122                // escape
123                "\\\"" { '"' } /
124                "\\\\" { '\\' } /
125                // end with "
126                c:[^ '"'] { c }
127            } / expected!("any character")
128
129        rule quoted_string() -> String =
130            "\"" chars:quoted_char()* ( quiet! { "\"" } / expected!("end quote") ) { chars.into_iter().collect() }
131
132        rule unquoted_string() -> &'input str =
133            s:$([^c if c.is_whitespace() || matches!(c, ':' | '(' | ')' | '"')]+) { s }
134
135        rule string() -> PositionedToken<'input> =
136            position:position!() string:quoted_string()  {
137                PositionedToken { token: Token::Quoted(string), position }
138            } /
139            position:position!() string:unquoted_string() {
140                PositionedToken { token: Token::Unquoted(string), position }
141            }
142
143        rule symbol() -> PositionedToken<'input> =
144            position:position!() c:$(['(' | ')' | ':']) {
145                PositionedToken { token: Token::Symbol(c.chars().next().unwrap()), position }
146            }
147
148        rule token() -> PositionedToken<'input> =
149            string() / symbol()
150
151        rule _() = [c if c.is_whitespace()]*
152
153        pub rule tokens() -> Vec<PositionedToken<'input>> =
154            _ tokens:(token() ** _) _ { tokens }
155    }
156}
157
158impl<'t> TokenStream<'t> {
159    pub fn new(source: &'t str) -> Result<Self, ParseError<LineCol>> {
160        let tokens = lexer::tokens(source)?;
161        Ok(TokenStream { source, tokens })
162    }
163}
164
165mod utils {
166
167    use super::*;
168
169    #[derive(snafu::Snafu, Debug)]
170    #[snafu(display("invalid value for `{expr}`: invalid pattern"))]
171    #[snafu(visibility(pub))]
172    pub struct InvalidPatternExpr {
173        expr: &'static str,
174        source: ParsePatternError,
175    }
176
177    pub type Result<T, E = InvalidPatternExpr> = core::result::Result<T, E>;
178
179    pub fn chain2<C, T>(c1: impl IntoIterator<Item = T>, c2: impl IntoIterator<Item = T>) -> C
180    where
181        C: FromIterator<T>,
182    {
183        c1.into_iter().chain(c2).collect()
184    }
185
186    pub fn chain3<C, T>(
187        c1: impl IntoIterator<Item = T>,
188        c2: impl IntoIterator<Item = T>,
189        c3: impl IntoIterator<Item = T>,
190    ) -> C
191    where
192        C: FromIterator<T>,
193    {
194        c1.into_iter().chain(c2).chain(c3).collect()
195    }
196}
197
198pub use utils::InvalidPatternExpr;
199
200peg::parser! {
201    grammar parser<'t>() for TokenStream<'t> {
202        use utils::*;
203        use BooleanOperator::*;
204        use Part::*;
205        use Token::*;
206        use peg::error::ErrorState;
207        use peg::RuleResult;
208
209        rule i(literal: &'static str) =
210            quiet!{
211                [Unquoted(l) if l.eq_ignore_ascii_case(literal)] /
212                [Quoted(l)   if l.eq_ignore_ascii_case(literal)]
213            } / expected!(literal)
214
215        rule ikeyword(literal: &'static str) =
216            quiet!{ [Unquoted(l) if l.eq_ignore_ascii_case(literal)] } /
217            expected!(literal)
218
219        // 由于expected的限制,只能使用str
220        rule s(symbol: &'static str) =
221            quiet!{ [token @ Symbol(..) if token == symbol ] } / expected!(symbol)
222
223        rule bracketed<T>(r: rule<T>) -> T = s("(") x:r() s(")") { x }
224
225        // pub rule source() -> Source =
226        //     i("wan") { Source::Wan } /
227        //     i("lan") { Source::Lan } /
228        //     expected!("`lan` or `wan`")
229
230        rule any() = ikeyword("*?")
231
232        // rule append_suffix_pattern(suffix: &'static str) -> Result<NormalPattern, ParsePatternError> =
233        //     token:( quiet!{ [Quoted(l)] } / quiet!{ [Unquoted(l)] } / expected!("pattern")) { match token {
234        //         Quoted(lit) if suffix.is_empty() => NormalPattern::from_str(lit),
235        //         Quoted(lit) => NormalPattern::from_str(format!("{lit}{suffix}").as_str()),
236        //         Unquoted(lit) if suffix.is_empty() => NormalPattern::from_str(lit),
237        //         Unquoted(lit) => NormalPattern::from_str(format!("{lit}{suffix}").as_str()),
238        //         _ => unreachable!(),
239        //     } }
240
241        rule pattern() -> Result<NormalPattern, ParsePatternError> =
242            token:( quiet!{ [Quoted(l)] } / quiet!{ [Unquoted(l)] } / expected!("pattern")) { match token {
243                Quoted(lit) => NormalPattern::from_str(lit),
244                Unquoted(lit) => NormalPattern::from_str(lit),
245                _ => unreachable!(),
246            } }
247
248        rule pattern_expr(expr: &'static str) -> Result<NormalPattern> =
249            pattern:pattern() { pattern.context(InvalidPatternExprSnafu { expr }) }
250
251        rule client_name_pattern() -> Result<ClientNamePattern> =
252            pattern:pattern_expr("client_name_pattern") { pattern.map(ClientNamePattern::from) }
253
254        rule client_name() -> Result<ClientName> = pattern:client_name_pattern() {
255            pattern.map(ClientName::from)
256        }
257
258        rule and() = ikeyword("and")
259        rule or()  = ikeyword("or")
260        rule not() = ikeyword("not")
261
262        rule method_pattern() -> Result<Method> =
263            pattern:pattern_expr("method_pattern") { pattern.map(Method::from) }
264
265        rule method() -> Result<AtomicLocationRuleExpr> =
266            ikeyword("method") method:method_pattern() { method.map(AtomicLocationRuleExpr::Method) }
267
268        rule kv_pattern() -> Result<KVPattern, ParsePatternError> =
269            key:pattern() s(":") value:pattern() {
270                Ok(KVPattern { key: key?, value: value? })
271            } /
272            key:pattern() {
273                Ok(KVPattern { key: key?, value: NormalPattern::new("*")? })
274            }
275
276        rule kv_pattern_expr(key: &'static str, value: &'static str) -> Result<KVPattern> =
277            key:pattern_expr(key) ":" value:pattern_expr(value) {
278                Ok(KVPattern { key: key?, value: value? })
279            } /
280            key:pattern_expr(key) {
281                Ok(KVPattern {
282                    key: key?,
283                    value: NormalPattern::new("*").context(InvalidPatternExprSnafu { expr: value })?,
284                })
285            }
286
287        rule header_pattern() -> Result<Header> =
288            pattern:kv_pattern_expr("header_key", "header_value") { pattern.map(Header::from) }
289
290        rule header() -> Result<AtomicLocationRuleExpr> =
291            i("header") header:header_pattern() { header.map(AtomicLocationRuleExpr::Header) }
292
293        rule query_pattern() -> Result<Query> =
294            pattern:kv_pattern_expr("query_key", "query_value") { pattern.map(Query::from) }
295
296        rule query() -> Result<AtomicLocationRuleExpr> =
297            ikeyword("query") query:query_pattern() { query.map(AtomicLocationRuleExpr::Query) }
298
299        pub rule atomic_location_rule_expr() -> Result<AtomicLocationRuleExpr> =
300            any() { Ok(AtomicLocationRuleExpr::Any(AnyClient)) } /
301            ikeyword("with") with:( header() / query() / method()) { with } /
302            pattern:client_name(){ Ok(AtomicLocationRuleExpr::ClientName(pattern?)) }
303
304        rule location_rule_expr_part() -> Result<Part<BooleanOperator, AtomicLocationRuleExpr>> =
305            not() { Ok(Operator(Not)) } /
306            and() { Ok(Operator(And)) } /
307            or()  { Ok(Operator(Or )) } /
308            atomic:atomic_location_rule_expr() { Ok(Expr(atomic?)) }
309
310        pub rule polish_location_rule_exprs() -> Result<Exprs<BooleanOperator, AtomicLocationRuleExpr>> =
311            exprs:(location_rule_expr_part())* { exprs.into_iter().collect() }
312
313        rule composite<E, T, R>(r: R) -> Result<Exprs<BooleanOperator, E>>
314        where
315            Exprs<BooleanOperator, E>: From<T>,
316            R: Clone + Fn(&'input TokenStream<'t>, &mut ParseState<'input, 't>, &mut ErrorState, usize) -> RuleResult<Result<T>>
317        = precedence! {
318            x:(@) and() y:@ { Ok(chain3([Operator(And)], x?, y?)) }
319            x:(@) or()  y:@ { Ok(chain3([Operator(Or )], x?, y?)) }
320            --
321            not() x:(@) { Ok(chain2([Operator(Not)], x?)) }
322            --
323            exprs:r() { Ok(exprs?.into()) }
324            exprs:bracketed(<composite(r.clone())>) { exprs }
325        }
326
327        rule bracketed_composite_or_atomic<E, T, R>(r: R) -> Result<Exprs<BooleanOperator, E>>
328        where
329            Exprs<BooleanOperator, E>: From<T>,
330            R: Clone + Fn(&'input TokenStream<'t>, &mut ParseState<'input, 't>, &mut ErrorState, usize) -> RuleResult<Result<T>>
331        = exprs:bracketed(<composite(<r()>)>) { exprs } / atomic:r() { Ok(atomic?.into()) }
332
333        rule possiable_negative_expr<E, T>(r: rule<Result<T>>) -> Result<Exprs<BooleanOperator, E>>
334        where
335            Exprs<BooleanOperator, E>: From<T>,
336        = not() exprs:r() { Ok(chain2([Operator(Not)], Exprs::from(exprs?))) } / exprs:r() { Ok(exprs?.into()) }
337
338        rule location_patterns() -> Result<Exprs<BooleanOperator, AtomicLocationRuleExpr>> =
339            ikeyword("header") exprs:bracketed_composite_or_atomic(<pat: header_pattern() { pat.map(AtomicLocationRuleExpr::Header).map(Expr) }>) { exprs } /
340            ikeyword("query")  exprs:bracketed_composite_or_atomic(<pat: query_pattern()  { pat.map(AtomicLocationRuleExpr::Query).map(Expr)  }>) { exprs } /
341            // method not 很合理
342            ikeyword("method") exprs:bracketed_composite_or_atomic(<possiable_negative_expr(<pat: method_pattern() { pat.map(AtomicLocationRuleExpr::Method).map(Expr) }>)>) { exprs }
343
344        rule with<T>(r: rule<Result<Exprs<BooleanOperator, T>>>) -> Result<Exprs<BooleanOperator, T>> =
345            ikeyword("with")    exprs:r() { exprs } /
346            ikeyword("without") exprs:r() { Ok(chain2([Operator(Not)], exprs?)) }
347
348        rule location_profile() -> Result<AtomicLocationRuleExpr> =
349            any() { Ok(AtomicLocationRuleExpr::Any(AnyClient)) } /
350            pattern:client_name() { Ok(AtomicLocationRuleExpr::ClientName(pattern?)) }
351
352        pub rule infix_location_rule_exprs() -> Result<Exprs<BooleanOperator, AtomicLocationRuleExpr>> =
353            profile:location_profile() patterns:with(<composite(<location_patterns()>)>)? {
354                match patterns {
355                   Some(patterns) => Ok(chain3([Operator(And)], [profile.map(Expr)?], patterns?)),
356                   None => Ok(profile.map(Expr)?.into()),
357                }
358            }
359    }
360}
361
362pub use parser::*;
363
364#[cfg(test)]
365mod tests {
366    use Part::*;
367
368    use super::*;
369
370    fn lex(source: &str) -> TokenStream<'_> {
371        let tokens =
372            lexer::tokens(source).unwrap_or_else(|e| panic!("Lex error for `{source}`: {e}"));
373        println!(
374            "Tokens: {:?}",
375            tokens
376                .iter()
377                .map(|t| t.token.to_string())
378                .collect::<Vec<_>>()
379        );
380        TokenStream { tokens, source }
381    }
382
383    #[test]
384    #[should_panic(expected = "Lex error")]
385    fn incomplete_quote() {
386        lex(r#" " "#);
387    }
388
389    fn location_invariant(source: &str) -> Exprs<BooleanOperator, AtomicLocationRuleExpr> {
390        let exprs = infix_location_rule_exprs(&lex(source))
391            .unwrap_or_else(|e| panic!("Parse error for `{source}`: {e}"))
392            .unwrap_or_else(|e| panic!("Invalid value for `{source}`: {e}"));
393
394        println!("Location Exprs: {exprs:?}");
395
396        let json = serde_json::to_string(&exprs).unwrap();
397        let exprs2 = serde_json::from_str::<Exprs<_, _>>(&json)
398            .unwrap_or_else(|e| panic!("(Invariant)Parse error for `{json}`: {e}"));
399        assert!(
400            exprs2 == exprs,
401            "Invariant test failed for `{json}`: got {}",
402            serde_json::to_string(&exprs2).unwrap()
403        );
404
405        exprs
406    }
407
408    #[test]
409    fn escape() {
410        assert!(matches!(
411            &location_invariant( r#" "*.remote" "#)[0],
412            Expr(AtomicLocationRuleExpr::ClientName(pattern)) if pattern.as_ref().as_str() == "*.remote"
413        ));
414
415        assert!(matches!(
416            &location_invariant(r#" "\"*.remote" "#)[0],
417            Expr(AtomicLocationRuleExpr::ClientName(pattern)) if pattern.as_ref().as_str() == r#""*.remote"#
418        ));
419    }
420
421    #[test]
422    fn any() {
423        assert!(matches!(
424            &location_invariant("*?")[0],
425            Expr(AtomicLocationRuleExpr::Any(AnyClient))
426        ));
427    }
428
429    #[test]
430    fn or() {
431        location_invariant(r#" "*.remote" with method ( GET or POST )"#);
432    }
433
434    #[test]
435    fn not() {
436        location_invariant(r#" *? without header H"#);
437    }
438
439    #[test]
440    fn combine_not() {
441        location_invariant(r#" *? with method GET or not header rebot "#);
442    }
443
444    #[test]
445    fn bracket() {
446        location_invariant(r#" *? with method (G*) "#);
447    }
448
449    #[test]
450    fn combine_patterns() {
451        location_invariant(
452            r#" "*.example.com" with (header X-User:admin and method LOGIN) or not method "~ GET|PUT|POST|DELETE|CONNECT" "#,
453        );
454        location_invariant(
455            r#" "*.example.com" without (header X-User:admin and method LOGIN) or not method "~ GET|PUT|POST|DELETE|CONNECT" "#,
456        );
457        location_invariant(
458            r#" *? with (header X-User:admin and method LOGIN) or not method "~ GET|PUT|POST|DELETE|CONNECT" "#,
459        );
460        location_invariant(
461            r#" *? without (header X-User:admin and method LOGIN) or not method "~ GET|PUT|POST|DELETE|CONNECT" "#,
462        );
463    }
464
465    #[test]
466    fn keyword() {
467        location_invariant(r#" *? with method "not" "#);
468        location_invariant(r#" *? with method "method" "#);
469    }
470
471    #[test]
472    #[should_panic]
473    fn keyword_panic() {
474        location_invariant(r#" *? with header "X-Pasword" "and" X-User "#);
475    }
476}