parse_it_codegen/
syntax.rs

1use std::rc::Rc;
2
3use syn::parse::discouraged::Speculative;
4use syn::punctuated::Punctuated;
5use syn::Token;
6
7#[derive(Debug)]
8pub struct ParseItConfig {
9    pub crate_name: Option<syn::Path>,
10    pub parse_macros: Rc<Vec<syn::Path>>,
11}
12
13impl Default for ParseItConfig {
14    fn default() -> Self {
15        Self {
16            crate_name: None,
17            parse_macros: Rc::new(vec![
18                syn::parse_quote! { print },
19                syn::parse_quote! { println },
20                syn::parse_quote! { eprint },
21                syn::parse_quote! { eprintln },
22                syn::parse_quote! { format },
23                syn::parse_quote! { dbg },
24            ]),
25        }
26    }
27}
28
29#[derive(Debug)]
30pub struct ParseIt {
31    pub attrs: Vec<syn::Attribute>,
32    pub mod_name: syn::Ident,
33    pub items: Vec<syn::Item>,
34    pub parsers: Vec<Parser>,
35    pub config: ParseItConfig,
36}
37
38impl syn::parse::Parse for ParseIt {
39    fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> {
40        let mut config = ParseItConfig::default();
41        let mut attrs = vec![];
42        for attr in input.call(syn::Attribute::parse_outer)? {
43            if attr.path().is_ident("parse_it") {
44                attr.parse_nested_meta(|meta| {
45                    if meta.path.is_ident("crate") {
46                        let value = meta.value()?;
47                        let value = value.parse::<syn::LitStr>()?;
48                        config.crate_name = Some(value.parse().map_err(|_| {
49                            syn::Error::new_spanned(value, "expected a valid path")
50                        })?);
51                    } else if meta.path.is_ident("parse_macros") {
52                        let value = meta.value()?;
53                        let value = value.parse::<syn::LitStr>()?;
54                        config.parse_macros = Rc::new(
55                            value
56                                .parse_with(Punctuated::<syn::Path, Token![,]>::parse_terminated)
57                                .map_err(|_| {
58                                    syn::Error::new_spanned(
59                                        value,
60                                        "expected a list of paths separated by commas",
61                                    )
62                                })?
63                                .into_iter()
64                                .collect(),
65                        );
66                    } else {
67                        Err(syn::Error::new_spanned(meta.path, "unknown attribute"))?
68                    }
69                    Ok(())
70                })?;
71            } else {
72                attrs.push(attr);
73            }
74        }
75
76        input.parse::<Token![mod]>()?;
77        let mod_name = input.parse::<syn::Ident>()?;
78
79        let content;
80        syn::braced!(content in input);
81        let mut parsers = vec![];
82        let mut items = vec![];
83        while !content.is_empty() {
84            let fork = content.fork();
85            if let Ok(parser) = fork.parse::<Parser>() {
86                content.advance_to(&fork);
87                parsers.push(parser);
88            } else {
89                let item = content.parse::<syn::Item>()?;
90                items.push(item);
91            }
92        }
93        Ok(ParseIt {
94            attrs,
95            items,
96            mod_name,
97            parsers,
98            config,
99        })
100    }
101}
102
103/// ```text
104/// Parser ::= Vis Name '->' Type '{' Rule+ '}'
105/// ```
106#[derive(Debug)]
107pub struct Parser {
108    pub vis: syn::Visibility,
109    pub name: syn::Ident,
110    pub ty: syn::Type,
111    pub rules: (Rule, Vec<Rule>),
112}
113
114impl Parser {
115    pub fn rules(&self) -> impl Iterator<Item = &Rule> {
116        std::iter::once(&self.rules.0).chain(self.rules.1.iter())
117    }
118}
119
120impl syn::parse::Parse for Parser {
121    fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> {
122        let vis = input.parse::<syn::Visibility>()?;
123        let name = input.parse::<syn::Ident>()?;
124        input.parse::<Token![->]>()?;
125        let ty = input.parse::<syn::Type>()?;
126
127        let content;
128        syn::braced!(content in input);
129
130        let first_rule = content.parse::<Rule>()?;
131        let mut rules = vec![];
132        while !content.is_empty() {
133            let rule = content.parse::<Rule>()?;
134            rules.push(rule);
135        }
136        let rules = (first_rule, rules);
137
138        Ok(Parser {
139            vis,
140            name,
141            ty,
142            rules,
143        })
144    }
145}
146
147/// ```text
148/// Rule ::= Production '=>' Expr
149/// ```
150#[derive(Debug)]
151pub struct Rule {
152    pub production: Production,
153    pub action: syn::Expr,
154}
155
156impl syn::parse::Parse for Rule {
157    fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> {
158        let production = input.parse::<Production>()?;
159        input.parse::<Token![=>]>()?;
160        let action = input.parse::<syn::Expr>()?;
161        if (requires_comma_to_be_match_arm(&action) && !input.is_empty()) || input.peek(Token![,]) {
162            input.parse::<Token![,]>()?;
163        }
164        Ok(Rule { production, action })
165    }
166}
167
168/// ```text
169/// Production ::= Part+
170/// ```
171#[derive(Debug)]
172pub struct Production {
173    /// non-empty: (first, rest)
174    pub parts: (Part, Vec<Part>),
175}
176
177impl Production {
178    pub fn parts(&self) -> impl Iterator<Item = &Part> {
179        std::iter::once(&self.parts.0).chain(self.parts.1.iter())
180    }
181}
182
183impl syn::parse::Parse for Production {
184    fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> {
185        let first_part = input.parse::<Part>()?;
186        let mut rest_parts = Vec::new();
187        while !input.peek(Token![=>]) && !input.peek(Token![|]) && !input.is_empty() {
188            // Production ::= Part+
189            rest_parts.push(input.parse::<Part>()?);
190        }
191
192        let parts = (first_part, rest_parts);
193        Ok(Production { parts })
194    }
195}
196
197#[derive(Debug)]
198pub enum Capture {
199    Named(Box<syn::Pat>),
200    Loud,
201    NotSpecified,
202}
203
204/// ```text
205/// Part ::= (Pat ':')? '@'? ('&' | '!')? Atom ('*' | '+' | '?')?
206/// ```
207#[derive(Debug)]
208pub struct Part {
209    pub capture: Capture,
210    pub part: Atom,
211}
212
213impl syn::parse::Parse for Part {
214    fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> {
215        let fork = input.fork();
216        let capture = if let Ok(pat) = fork
217            .call(syn::Pat::parse_single)
218            .and_then(|pat| fork.parse::<Token![:]>().map(|_| pat))
219        {
220            // Choice ::= Pat ':' Atom ...
221            input.advance_to(&fork);
222            Some(pat)
223        } else {
224            None
225        };
226
227        let non_slient = if input.peek(Token![@]) {
228            // Choice ::= ... '@' ...
229            input.parse::<Token![@]>()?;
230            true
231        } else {
232            false
233        };
234
235        let atom = input.parse::<Atom>()?;
236        let part = if input.peek(Token![*]) {
237            // Choice ::= ... Atom '*'
238            input.parse::<Token![*]>()?;
239            Atom::Repeat(Box::new(atom))
240        } else if input.peek(Token![+]) {
241            // Choice ::= ... Atom '+'
242            input.parse::<Token![+]>()?;
243            Atom::Repeat1(Box::new(atom))
244        } else if input.peek(Token![?]) {
245            // Choice ::= ... Atom '?'
246            input.parse::<Token![?]>()?;
247            Atom::Optional(Box::new(atom))
248        } else {
249            atom
250        };
251
252        let capture = if let Some(capture) = capture {
253            Capture::Named(Box::new(capture))
254        } else if non_slient {
255            Capture::Loud
256        } else {
257            Capture::NotSpecified
258        };
259
260        Ok(Part { capture, part })
261    }
262}
263
264/// ```text
265/// Atom ::= '(' Production ')'
266///        | '[' Production ('|' Production)* ']'
267///        | Terminal
268///        | NonTerminal
269/// ```
270#[derive(Debug)]
271pub enum Atom {
272    Terminal(syn::Lit),
273    NonTerminal(syn::Ident),
274    Sub(Box<Production>),
275    Choice(Box<Production>, Vec<Production>),
276    Repeat(Box<Atom>),
277    Repeat1(Box<Atom>),
278    Optional(Box<Atom>),
279    LookAhead(Box<Atom>),
280    LookAheadNot(Box<Atom>),
281}
282
283impl syn::parse::Parse for Atom {
284    fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> {
285        let lookahead = input.lookahead1();
286        let atom = if lookahead.peek(syn::token::Paren) {
287            // Atom ::= '(' Production ')'
288            let content;
289            syn::parenthesized!(content in input);
290            Atom::Sub(Box::new(content.parse()?))
291        } else if lookahead.peek(syn::token::Bracket) {
292            // Atom ::= '[' Production ('|' Production)* ']'
293            let content;
294            syn::bracketed!(content in input);
295            let mut choices = content
296                .parse_terminated(Production::parse, Token![|])?
297                .into_iter();
298            let first_choice = choices
299                .next()
300                .ok_or_else(|| content.error("expected at least one choice"))?;
301            Atom::Choice(Box::new(first_choice), choices.collect())
302        } else if lookahead.peek(syn::Lit) {
303            // Atom ::= Terminal
304            Atom::Terminal(input.parse()?)
305        } else if lookahead.peek(syn::Ident) {
306            // Atom ::= NonTerminal
307            Atom::NonTerminal(input.parse()?)
308        } else {
309            return Err(lookahead.error());
310        };
311
312        Ok(atom)
313    }
314}
315
316fn requires_comma_to_be_match_arm(expr: &syn::Expr) -> bool {
317    use syn::Expr;
318    !matches!(
319        expr,
320        Expr::If(_)
321            | Expr::Match(_)
322            | Expr::Block(_)
323            | Expr::Unsafe(_)
324            | Expr::While(_)
325            | Expr::Loop(_)
326            | Expr::ForLoop(_)
327            | Expr::TryBlock(_)
328            | Expr::Const(_)
329    )
330}