parse_it_codegen/
syntax.rs

1use std::rc::Rc;
2
3use syn::{parse::discouraged::Speculative, punctuated::Punctuated, Attribute, Token};
4
5#[derive(Debug)]
6pub struct ParseIt {
7    pub mods: Vec<Mod>,
8}
9
10impl syn::parse::Parse for ParseIt {
11    fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> {
12        let mut mods = vec![];
13        while !input.is_empty() {
14            let mut attrs = input.call(syn::Attribute::parse_outer)?;
15
16            input.parse::<Token![mod]>()?;
17            let mod_name = input.parse::<syn::Ident>()?;
18
19            let content;
20            syn::braced!(content in input);
21
22            #[derive(Clone, Copy, PartialEq, Eq)]
23            enum ModType {
24                Parser,
25                Lexer,
26            }
27            let mut mod_types = vec![];
28            attrs.retain(|attr| {
29                if attr.path().is_ident("parser") {
30                    mod_types.push(ModType::Parser);
31                    return false;
32                } else if attr.path().is_ident("lexer") {
33                    mod_types.push(ModType::Lexer);
34                    return false;
35                }
36                true
37            });
38            let mod_type = if mod_types.is_empty() {
39                return Err(syn::Error::new_spanned(
40                    mod_name,
41                    "module must be marked as parser or lexer",
42                ));
43            } else if mod_types.len() == 1 {
44                mod_types[0]
45            } else {
46                return Err(syn::Error::new_spanned(
47                    mod_name,
48                    "module can only be marked as parser or lexer, not both",
49                ));
50            };
51            match mod_type {
52                ModType::Parser => {
53                    let parser_mod = ParserMod::parse(attrs, mod_name, &content)?;
54                    mods.push(Mod::Parser(parser_mod));
55                }
56                ModType::Lexer => {
57                    let lexer_mod = LexerMod::parse(attrs, mod_name, &content)?;
58                    mods.push(Mod::Lexer(lexer_mod));
59                }
60            }
61        }
62        Ok(Self { mods })
63    }
64}
65
66#[derive(Debug)]
67pub enum Mod {
68    Parser(ParserMod),
69    Lexer(LexerMod),
70}
71
72#[derive(Debug)]
73pub struct ParserConfig {
74    pub crate_name: Option<syn::Path>,
75    pub parse_macros: Rc<Vec<syn::Path>>,
76    pub debug: bool,
77}
78
79impl Default for ParserConfig {
80    fn default() -> Self {
81        Self {
82            crate_name: None,
83            parse_macros: Rc::new(vec![
84                syn::parse_quote! { print },
85                syn::parse_quote! { println },
86                syn::parse_quote! { eprint },
87                syn::parse_quote! { eprintln },
88                syn::parse_quote! { format },
89                syn::parse_quote! { dbg },
90            ]),
91            debug: false,
92        }
93    }
94}
95
96#[derive(Debug)]
97pub struct ParserMod {
98    pub attrs: Vec<syn::Attribute>,
99    pub mod_name: syn::Ident,
100    pub items: Vec<syn::Item>,
101    pub parsers: Vec<Parser>,
102    pub config: ParserConfig,
103}
104
105impl ParserMod {
106    fn parse(
107        attrs: Vec<Attribute>,
108        mod_name: syn::Ident,
109        content: syn::parse::ParseStream,
110    ) -> syn::Result<Self> {
111        let mut config = ParserConfig::default();
112        let mut common_attrs = vec![];
113        for attr in attrs {
114            if attr.path().is_ident("parse_it") {
115                attr.parse_nested_meta(|meta| {
116                    if meta.path.is_ident("crate") {
117                        let value = meta.value()?;
118                        let value = value.parse::<syn::LitStr>()?;
119                        config.crate_name = Some(value.parse().map_err(|_| {
120                            syn::Error::new_spanned(value, "expected a valid path")
121                        })?);
122                    } else if meta.path.is_ident("parse_macros") {
123                        let value = meta.value()?;
124                        let value = value.parse::<syn::LitStr>()?;
125                        config.parse_macros = Rc::new(
126                            value
127                                .parse_with(Punctuated::<syn::Path, Token![,]>::parse_terminated)
128                                .map_err(|_| {
129                                    syn::Error::new_spanned(
130                                        value,
131                                        "expected a list of paths separated by commas",
132                                    )
133                                })?
134                                .into_iter()
135                                .collect(),
136                        );
137                    } else if meta.path.is_ident("debug") {
138                        let value = meta.value()?;
139                        let value = value.parse::<syn::LitBool>()?;
140                        config.debug = value.value;
141                    } else {
142                        Err(syn::Error::new_spanned(meta.path, "unknown attribute"))?
143                    }
144                    Ok(())
145                })?;
146            } else {
147                common_attrs.push(attr);
148            }
149        }
150
151        let mut parsers = vec![];
152        let mut items = vec![];
153        while !content.is_empty() {
154            let fork = content.fork();
155            if let Ok(parser) = fork.parse::<Parser>() {
156                content.advance_to(&fork);
157                parsers.push(parser);
158            } else {
159                let item = content.parse::<syn::Item>()?;
160                items.push(item);
161            }
162        }
163        Ok(Self {
164            attrs: common_attrs,
165            items,
166            mod_name,
167            parsers,
168            config,
169        })
170    }
171}
172
173/// ```text
174/// Parser ::= Vis Name '->' Type '{' Rule* '}'
175/// ```
176#[derive(Debug)]
177pub struct Parser {
178    pub vis: syn::Visibility,
179    pub name: syn::Ident,
180    pub ty: syn::Type,
181    pub rules: Vec<Rule>,
182}
183
184impl syn::parse::Parse for Parser {
185    fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> {
186        let vis = input.parse::<syn::Visibility>()?;
187        let name = input.parse::<syn::Ident>()?;
188        input.parse::<Token![->]>()?;
189        let ty = input.parse::<syn::Type>()?;
190
191        let content;
192        syn::braced!(content in input);
193
194        let mut rules = vec![];
195        while !content.is_empty() {
196            let rule = content.parse::<Rule>()?;
197            rules.push(rule);
198        }
199
200        Ok(Parser {
201            vis,
202            name,
203            ty,
204            rules,
205        })
206    }
207}
208
209/// ```text
210/// Rule ::= Production '=>' Expr
211/// ```
212#[derive(Debug)]
213pub struct Rule {
214    pub production: Production,
215    pub action: syn::Expr,
216}
217
218impl syn::parse::Parse for Rule {
219    fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> {
220        let production = input.parse::<Production>()?;
221        input.parse::<Token![=>]>()?;
222        let action = input.parse::<syn::Expr>()?;
223        if (requires_comma_to_be_match_arm(&action) && !input.is_empty()) || input.peek(Token![,]) {
224            input.parse::<Token![,]>()?;
225        }
226        Ok(Rule { production, action })
227    }
228}
229
230/// ```text
231/// Production ::= Part+
232/// ```
233#[derive(Debug)]
234pub struct Production {
235    /// non-empty: (first, rest)
236    pub parts: (Part, Vec<Part>),
237}
238
239impl Production {
240    pub fn parts(&self) -> impl Iterator<Item = &Part> {
241        std::iter::once(&self.parts.0).chain(self.parts.1.iter())
242    }
243}
244
245impl syn::parse::Parse for Production {
246    fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> {
247        let first_part = input.parse::<Part>()?;
248        let mut rest_parts = Vec::new();
249        while !input.peek(Token![=>]) && !input.peek(Token![|]) && !input.is_empty() {
250            // Production ::= Part+
251            rest_parts.push(input.parse::<Part>()?);
252        }
253
254        let parts = (first_part, rest_parts);
255        Ok(Production { parts })
256    }
257}
258
259#[derive(Debug)]
260pub enum Capture {
261    Named(Box<syn::Pat>),
262    Loud,
263    NotSpecified,
264}
265
266/// ```text
267/// Part ::= (Pat ':')? '@'? ('&' | '!')? Atom ('*' | '+' | '?')?
268/// ```
269#[derive(Debug)]
270pub struct Part {
271    pub capture: Capture,
272    pub part: Atom,
273}
274
275impl syn::parse::Parse for Part {
276    fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> {
277        let fork = input.fork();
278        let capture = if let Ok(pat) = fork
279            .call(syn::Pat::parse_single)
280            .and_then(|pat| fork.parse::<Token![:]>().map(|_| pat))
281        {
282            // Choice ::= Pat ':' Atom ...
283            input.advance_to(&fork);
284            Some(pat)
285        } else {
286            None
287        };
288
289        let non_slient = if input.peek(Token![@]) {
290            // Choice ::= ... '@' ...
291            input.parse::<Token![@]>()?;
292            true
293        } else {
294            false
295        };
296
297        let lookahead = if input.peek(Token![&]) {
298            // Choice ::= ... '&' Atom ...
299            input.parse::<Token![&]>()?;
300            Some(true)
301        } else if input.peek(Token![!]) {
302            // Choice ::= ... '!' Atom ...
303            input.parse::<Token![!]>()?;
304            Some(false)
305        } else {
306            None
307        };
308
309        let atom = input.parse::<Atom>()?;
310        let part = if input.peek(Token![*]) {
311            // Choice ::= ... Atom '*'
312            input.parse::<Token![*]>()?;
313            Atom::Repeat(Box::new(atom))
314        } else if input.peek(Token![+]) {
315            // Choice ::= ... Atom '+'
316            input.parse::<Token![+]>()?;
317            Atom::Repeat1(Box::new(atom))
318        } else if input.peek(Token![?]) {
319            // Choice ::= ... Atom '?'
320            input.parse::<Token![?]>()?;
321            Atom::Optional(Box::new(atom))
322        } else {
323            atom
324        };
325
326        let part = if let Some(lookahead) = lookahead {
327            if lookahead {
328                Atom::LookAhead(Box::new(part))
329            } else {
330                Atom::LookAheadNot(Box::new(part))
331            }
332        } else {
333            part
334        };
335
336        let capture = if let Some(capture) = capture {
337            Capture::Named(Box::new(capture))
338        } else if non_slient {
339            Capture::Loud
340        } else {
341            Capture::NotSpecified
342        };
343
344        Ok(Part { capture, part })
345    }
346}
347
348/// ```text
349/// Atom ::= '(' Production ')'
350///        | '[' Production ('|' Production)* ']'
351///        | Terminal
352///        | NonTerminal
353/// Terminal ::= Literal | Pat | '<' Type '>'
354/// ```
355#[derive(Debug)]
356pub enum Atom {
357    Terminal(syn::Lit),
358    PatTerminal(syn::Pat),
359    TypePterminal(syn::Type),
360    NonTerminal(syn::Ident),
361    Sub(Box<Production>),
362    Choice(Box<Production>, Vec<Production>),
363    Repeat(Box<Atom>),
364    Repeat1(Box<Atom>),
365    Optional(Box<Atom>),
366    LookAhead(Box<Atom>),
367    LookAheadNot(Box<Atom>),
368}
369
370impl syn::parse::Parse for Atom {
371    fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> {
372        let lookahead = input.lookahead1();
373        let atom = if lookahead.peek(syn::token::Paren) {
374            // Atom ::= '(' Production ')'
375            let content;
376            syn::parenthesized!(content in input);
377            Atom::Sub(Box::new(content.parse()?))
378        } else if lookahead.peek(syn::token::Bracket) {
379            // Atom ::= '[' Production ('|' Production)* ']'
380            let content;
381            syn::bracketed!(content in input);
382            let mut choices = content
383                .parse_terminated(Production::parse, Token![|])?
384                .into_iter();
385            let first_choice = choices
386                .next()
387                .ok_or_else(|| content.error("expected at least one choice"))?;
388            Atom::Choice(Box::new(first_choice), choices.collect())
389        } else if lookahead.peek(syn::Lit) {
390            // Atom ::= Terminal
391            Atom::Terminal(input.parse()?)
392        } else if lookahead.peek(Token![<]) {
393            // Atom ::= '<' Type '>'
394            input.parse::<Token![<]>()?;
395            let ty = input.parse::<syn::Type>()?;
396            input.parse::<Token![>]>()?;
397            Atom::TypePterminal(ty)
398        } else if lookahead.peek(syn::Ident) {
399            let fork = input.fork();
400            if let Ok(pat) = fork.call(syn::Pat::parse_single) {
401                if matches!(&pat, syn::Pat::Ident(_)) {
402                    // Atom ::= NonTerminal
403                    Atom::NonTerminal(input.parse()?)
404                } else {
405                    // Atom ::= PatTerminal
406                    input.advance_to(&fork);
407                    Atom::PatTerminal(pat)
408                }
409            } else {
410                Err(lookahead.error())?
411            }
412        } else {
413            Err(lookahead.error())?
414        };
415
416        Ok(atom)
417    }
418}
419
420#[derive(Debug)]
421pub struct LexerConfig {
422    pub crate_name: Option<syn::Path>,
423    pub parse_macros: Rc<Vec<syn::Path>>,
424    pub debug: bool,
425}
426
427impl Default for LexerConfig {
428    fn default() -> Self {
429        Self {
430            crate_name: None,
431            parse_macros: Rc::new(vec![
432                syn::parse_quote! { print },
433                syn::parse_quote! { println },
434                syn::parse_quote! { eprint },
435                syn::parse_quote! { eprintln },
436                syn::parse_quote! { format },
437                syn::parse_quote! { dbg },
438            ]),
439            debug: false,
440        }
441    }
442}
443
444#[derive(Debug)]
445pub struct LexerMod {
446    pub attrs: Vec<syn::Attribute>,
447    pub mod_name: syn::Ident,
448    pub items: Vec<syn::Item>,
449    pub lexers: Vec<Lexer>,
450    pub config: ParserConfig,
451}
452
453impl LexerMod {
454    pub fn parse(
455        attrs: Vec<Attribute>,
456        mod_name: syn::Ident,
457        content: syn::parse::ParseStream,
458    ) -> syn::Result<Self> {
459        let mut common_attrs = vec![];
460        for attr in attrs {
461            if attr.path().is_ident("parse_it") {
462                attr.parse_nested_meta(|_meta| todo!())?;
463            } else {
464                common_attrs.push(attr);
465            }
466        }
467
468        let mut lexers = vec![];
469        let mut items = vec![];
470        while !content.is_empty() {
471            let fork = content.fork();
472            if let Ok(lexer) = fork.parse::<Lexer>() {
473                content.advance_to(&fork);
474                lexers.push(lexer);
475            } else {
476                let item = content.parse::<syn::Item>()?;
477                items.push(item);
478            }
479        }
480
481        Ok(Self {
482            attrs: common_attrs,
483            mod_name,
484            items,
485            lexers,
486            config: Default::default(),
487        })
488    }
489}
490
491/// ```text
492/// Lexer ::= Vis Name ('(' Parameter* ')')? ('->' Type)? '{' LexerRule+ '}'
493/// ```
494#[derive(Debug)]
495pub struct Lexer {
496    pub vis: syn::Visibility,
497    pub name: syn::Ident,
498    pub ty: Option<syn::Type>,
499    pub inputs: Punctuated<syn::PatType, Token![,]>,
500    pub rules: Vec<LexerRule>,
501}
502
503impl syn::parse::Parse for Lexer {
504    fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> {
505        let vis = input.parse()?;
506        let name = input.parse()?;
507        let ty = if input.peek(Token![->]) {
508            input.parse::<Token![->]>()?;
509            Some(input.parse()?)
510        } else {
511            None
512        };
513
514        let inputs = if input.peek(syn::token::Paren) {
515            // Lexer ::= Vis Name '(' Parameter* ')'
516            let content;
517            syn::parenthesized!(content in input);
518            Punctuated::<syn::PatType, Token![,]>::parse_terminated(&content)?
519        } else {
520            Punctuated::new()
521        };
522
523        let content;
524        syn::braced!(content in input);
525
526        let mut rules = vec![];
527        while !content.is_empty() {
528            let rule = content.parse::<LexerRule>()?;
529            rules.push(rule);
530        }
531
532        Ok(Self {
533            vis,
534            name,
535            ty,
536            inputs,
537            rules,
538        })
539    }
540}
541
542/// ```text
543/// LexerRule ::= LexerPattern '=>' Expr
544/// ```
545#[derive(Debug)]
546pub struct LexerRule {
547    pub pattern: LexerPattern,
548    pub action: syn::Expr,
549}
550
551impl syn::parse::Parse for LexerRule {
552    fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> {
553        let pattern = input.parse::<LexerPattern>()?;
554        input.parse::<Token![=>]>()?;
555        let action = input.parse::<syn::Expr>()?;
556        if (requires_comma_to_be_match_arm(&action) && !input.is_empty()) || input.peek(Token![,]) {
557            input.parse::<Token![,]>()?;
558        }
559        Ok(LexerRule { pattern, action })
560    }
561}
562
563/// ```text
564/// LexerPattern ::= Regex | Name
565/// ```
566#[derive(Debug)]
567pub enum LexerPattern {
568    Regex(syn::LitStr),
569    Name(syn::Ident),
570}
571
572impl syn::parse::Parse for LexerPattern {
573    fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> {
574        let lookahead = input.lookahead1();
575        if lookahead.peek(syn::Ident) {
576            let ident = input.parse()?;
577            Ok(Self::Name(ident))
578        } else if lookahead.peek(syn::LitStr) {
579            let regex = input.parse()?;
580            Ok(Self::Regex(regex))
581        } else {
582            Err(lookahead.error())
583        }
584    }
585}
586
587fn requires_comma_to_be_match_arm(expr: &syn::Expr) -> bool {
588    use syn::Expr;
589    !matches!(
590        expr,
591        Expr::If(_)
592            | Expr::Match(_)
593            | Expr::Block(_)
594            | Expr::Unsafe(_)
595            | Expr::While(_)
596            | Expr::Loop(_)
597            | Expr::ForLoop(_)
598            | Expr::TryBlock(_)
599            | Expr::Const(_)
600    )
601}