parse_it_codegen/
syntax.rs

1use std::rc::Rc;
2
3use syn::{parse::discouraged::Speculative, punctuated::Punctuated, Token};
4
5#[derive(Debug)]
6pub struct ParseItConfig {
7    pub crate_name: Option<syn::Path>,
8    pub parse_macros: Rc<Vec<syn::Path>>,
9    pub debug: bool,
10}
11
12impl Default for ParseItConfig {
13    fn default() -> Self {
14        Self {
15            crate_name: None,
16            parse_macros: Rc::new(vec![
17                syn::parse_quote! { print },
18                syn::parse_quote! { println },
19                syn::parse_quote! { eprint },
20                syn::parse_quote! { eprintln },
21                syn::parse_quote! { format },
22                syn::parse_quote! { dbg },
23            ]),
24            debug: false,
25        }
26    }
27}
28
29#[derive(Debug)]
30pub struct ParseIt {
31    pub attrs: Vec<syn::Attribute>,
32    pub mod_name: syn::Ident,
33    pub items: Vec<syn::Item>,
34    pub parsers: Vec<Parser>,
35    pub config: ParseItConfig,
36}
37
38impl syn::parse::Parse for ParseIt {
39    fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> {
40        let mut config = ParseItConfig::default();
41        let mut attrs = vec![];
42        for attr in input.call(syn::Attribute::parse_outer)? {
43            if attr.path().is_ident("parse_it") {
44                attr.parse_nested_meta(|meta| {
45                    if meta.path.is_ident("crate") {
46                        let value = meta.value()?;
47                        let value = value.parse::<syn::LitStr>()?;
48                        config.crate_name = Some(value.parse().map_err(|_| {
49                            syn::Error::new_spanned(value, "expected a valid path")
50                        })?);
51                    } else if meta.path.is_ident("parse_macros") {
52                        let value = meta.value()?;
53                        let value = value.parse::<syn::LitStr>()?;
54                        config.parse_macros = Rc::new(
55                            value
56                                .parse_with(Punctuated::<syn::Path, Token![,]>::parse_terminated)
57                                .map_err(|_| {
58                                    syn::Error::new_spanned(
59                                        value,
60                                        "expected a list of paths separated by commas",
61                                    )
62                                })?
63                                .into_iter()
64                                .collect(),
65                        );
66                    } else if meta.path.is_ident("debug") {
67                        let value = meta.value()?;
68                        let value = value.parse::<syn::LitBool>()?;
69                        config.debug = value.value;
70                    } else {
71                        Err(syn::Error::new_spanned(meta.path, "unknown attribute"))?
72                    }
73                    Ok(())
74                })?;
75            } else {
76                attrs.push(attr);
77            }
78        }
79
80        input.parse::<Token![mod]>()?;
81        let mod_name = input.parse::<syn::Ident>()?;
82
83        let content;
84        syn::braced!(content in input);
85        let mut parsers = vec![];
86        let mut items = vec![];
87        while !content.is_empty() {
88            let fork = content.fork();
89            if let Ok(parser) = fork.parse::<Parser>() {
90                content.advance_to(&fork);
91                parsers.push(parser);
92            } else {
93                let item = content.parse::<syn::Item>()?;
94                items.push(item);
95            }
96        }
97        Ok(ParseIt {
98            attrs,
99            items,
100            mod_name,
101            parsers,
102            config,
103        })
104    }
105}
106
107/// ```text
108/// Parser ::= Vis Name '->' Type '{' Rule+ '}'
109/// ```
110#[derive(Debug)]
111pub struct Parser {
112    pub vis: syn::Visibility,
113    pub name: syn::Ident,
114    pub ty: syn::Type,
115    pub rules: (Rule, Vec<Rule>),
116}
117
118impl Parser {
119    pub fn rules(&self) -> impl Iterator<Item = &Rule> {
120        std::iter::once(&self.rules.0).chain(self.rules.1.iter())
121    }
122}
123
124impl syn::parse::Parse for Parser {
125    fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> {
126        let vis = input.parse::<syn::Visibility>()?;
127        let name = input.parse::<syn::Ident>()?;
128        input.parse::<Token![->]>()?;
129        let ty = input.parse::<syn::Type>()?;
130
131        let content;
132        syn::braced!(content in input);
133
134        let first_rule = content.parse::<Rule>()?;
135        let mut rules = vec![];
136        while !content.is_empty() {
137            let rule = content.parse::<Rule>()?;
138            rules.push(rule);
139        }
140        let rules = (first_rule, rules);
141
142        Ok(Parser {
143            vis,
144            name,
145            ty,
146            rules,
147        })
148    }
149}
150
151/// ```text
152/// Rule ::= Production '=>' Expr
153/// ```
154#[derive(Debug)]
155pub struct Rule {
156    pub production: Production,
157    pub action: syn::Expr,
158}
159
160impl syn::parse::Parse for Rule {
161    fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> {
162        let production = input.parse::<Production>()?;
163        input.parse::<Token![=>]>()?;
164        let action = input.parse::<syn::Expr>()?;
165        if (requires_comma_to_be_match_arm(&action) && !input.is_empty()) || input.peek(Token![,]) {
166            input.parse::<Token![,]>()?;
167        }
168        Ok(Rule { production, action })
169    }
170}
171
172/// ```text
173/// Production ::= Part+
174/// ```
175#[derive(Debug)]
176pub struct Production {
177    /// non-empty: (first, rest)
178    pub parts: (Part, Vec<Part>),
179}
180
181impl Production {
182    pub fn parts(&self) -> impl Iterator<Item = &Part> {
183        std::iter::once(&self.parts.0).chain(self.parts.1.iter())
184    }
185}
186
187impl syn::parse::Parse for Production {
188    fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> {
189        let first_part = input.parse::<Part>()?;
190        let mut rest_parts = Vec::new();
191        while !input.peek(Token![=>]) && !input.peek(Token![|]) && !input.is_empty() {
192            // Production ::= Part+
193            rest_parts.push(input.parse::<Part>()?);
194        }
195
196        let parts = (first_part, rest_parts);
197        Ok(Production { parts })
198    }
199}
200
201#[derive(Debug)]
202pub enum Capture {
203    Named(Box<syn::Pat>),
204    Loud,
205    NotSpecified,
206}
207
208/// ```text
209/// Part ::= (Pat ':')? '@'? ('&' | '!')? Atom ('*' | '+' | '?')?
210/// ```
211#[derive(Debug)]
212pub struct Part {
213    pub capture: Capture,
214    pub part: Atom,
215}
216
217impl syn::parse::Parse for Part {
218    fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> {
219        let fork = input.fork();
220        let capture = if let Ok(pat) = fork
221            .call(syn::Pat::parse_single)
222            .and_then(|pat| fork.parse::<Token![:]>().map(|_| pat))
223        {
224            // Choice ::= Pat ':' Atom ...
225            input.advance_to(&fork);
226            Some(pat)
227        } else {
228            None
229        };
230
231        let non_slient = if input.peek(Token![@]) {
232            // Choice ::= ... '@' ...
233            input.parse::<Token![@]>()?;
234            true
235        } else {
236            false
237        };
238
239        let atom = input.parse::<Atom>()?;
240        let part = if input.peek(Token![*]) {
241            // Choice ::= ... Atom '*'
242            input.parse::<Token![*]>()?;
243            Atom::Repeat(Box::new(atom))
244        } else if input.peek(Token![+]) {
245            // Choice ::= ... Atom '+'
246            input.parse::<Token![+]>()?;
247            Atom::Repeat1(Box::new(atom))
248        } else if input.peek(Token![?]) {
249            // Choice ::= ... Atom '?'
250            input.parse::<Token![?]>()?;
251            Atom::Optional(Box::new(atom))
252        } else {
253            atom
254        };
255
256        let capture = if let Some(capture) = capture {
257            Capture::Named(Box::new(capture))
258        } else if non_slient {
259            Capture::Loud
260        } else {
261            Capture::NotSpecified
262        };
263
264        Ok(Part { capture, part })
265    }
266}
267
268/// ```text
269/// Atom ::= '(' Production ')'
270///        | '[' Production ('|' Production)* ']'
271///        | Terminal
272///        | NonTerminal
273/// ```
274#[derive(Debug)]
275pub enum Atom {
276    Terminal(syn::Lit),
277    PatTerminal(syn::Pat),
278    NonTerminal(syn::Ident),
279    Sub(Box<Production>),
280    Choice(Box<Production>, Vec<Production>),
281    Repeat(Box<Atom>),
282    Repeat1(Box<Atom>),
283    Optional(Box<Atom>),
284    LookAhead(Box<Atom>),
285    LookAheadNot(Box<Atom>),
286}
287
288impl syn::parse::Parse for Atom {
289    fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> {
290        let lookahead = input.lookahead1();
291        let atom = if lookahead.peek(syn::token::Paren) {
292            // Atom ::= '(' Production ')'
293            let content;
294            syn::parenthesized!(content in input);
295            Atom::Sub(Box::new(content.parse()?))
296        } else if lookahead.peek(syn::token::Bracket) {
297            // Atom ::= '[' Production ('|' Production)* ']'
298            let content;
299            syn::bracketed!(content in input);
300            let mut choices = content
301                .parse_terminated(Production::parse, Token![|])?
302                .into_iter();
303            let first_choice = choices
304                .next()
305                .ok_or_else(|| content.error("expected at least one choice"))?;
306            Atom::Choice(Box::new(first_choice), choices.collect())
307        } else if lookahead.peek(syn::Lit) {
308            // Atom ::= Terminal
309            Atom::Terminal(input.parse()?)
310        } else if lookahead.peek(syn::Ident) {
311            let fork = input.fork();
312            if let Ok(pat) = fork.call(syn::Pat::parse_single) {
313                if matches!(&pat, syn::Pat::Ident(_)) {
314                    // Atom ::= NonTerminal
315                    Atom::NonTerminal(input.parse()?)
316                } else {
317                    // Atom ::= PatTerminal
318                    input.advance_to(&fork);
319                    Atom::PatTerminal(pat)
320                }
321            } else {
322                Err(lookahead.error())?
323            }
324        } else {
325            Err(lookahead.error())?
326        };
327
328        Ok(atom)
329    }
330}
331
332fn requires_comma_to_be_match_arm(expr: &syn::Expr) -> bool {
333    use syn::Expr;
334    !matches!(
335        expr,
336        Expr::If(_)
337            | Expr::Match(_)
338            | Expr::Block(_)
339            | Expr::Unsafe(_)
340            | Expr::While(_)
341            | Expr::Loop(_)
342            | Expr::ForLoop(_)
343            | Expr::TryBlock(_)
344            | Expr::Const(_)
345    )
346}