parse_it_codegen/
frontend.rs

1use std::{rc::Rc, vec};
2
3use proc_macro2::{Span, TokenStream};
4use quote::{format_ident, quote, quote_spanned, ToTokens};
5use syn::{punctuated::Punctuated, visit_mut::VisitMut, Token};
6
7use crate::{
8    hash::{HashMap, HashSet, OrderedMap, OrderedSet},
9    middle::{Capture, MemoKind, Middle, ParserImpl, ParserRef, Parsing},
10    syntax::{Atom, ParseIt, Parser, Part, Production, Rule},
11};
12
13#[derive(Default)]
14struct Context {
15    pub parse_macros: Rc<Vec<syn::Path>>,
16    pub left_calls: HashMap<syn::Ident, HashSet<syn::Ident>>,
17    pub left_recursion: HashSet<syn::Ident>,
18    pub direct_depends: HashMap<syn::Ident, OrderedMap<syn::Ident, ParserRef>>,
19    pub depends: HashMap<syn::Ident, OrderedMap<syn::Ident, ParserRef>>,
20}
21
22struct ExprVisitor {
23    pub parse_macros: Rc<Vec<syn::Path>>,
24    /// replace `self` with this ident
25    pub self_ident: syn::Ident,
26    /// whether `self` is referred
27    pub referred_self: bool,
28}
29
30impl ExprVisitor {
31    pub fn new(parse_macros: Rc<Vec<syn::Path>>) -> Self {
32        Self {
33            parse_macros,
34            self_ident: format_ident!("r#__self", span = Span::call_site()),
35            referred_self: false,
36        }
37    }
38}
39
40impl VisitMut for ExprVisitor {
41    fn visit_ident_mut(&mut self, i: &mut proc_macro2::Ident) {
42        if i == "self" {
43            let span = i.span();
44            *i = self.self_ident.clone();
45            i.set_span(span);
46            self.referred_self = true;
47        }
48    }
49
50    fn visit_macro_mut(&mut self, m: &mut syn::Macro) {
51        if self.parse_macros.contains(&m.path) {
52            struct MacroArgs(pub Vec<syn::Expr>);
53            impl syn::parse::Parse for MacroArgs {
54                fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> {
55                    let args = Punctuated::<syn::Expr, Token![,]>::parse_terminated(input)?;
56                    Ok(Self(args.into_iter().collect()))
57                }
58            }
59
60            if let Ok(MacroArgs(mut args)) = syn::parse2::<MacroArgs>(m.tokens.clone()) {
61                for expr in args.iter_mut() {
62                    self.visit_expr_mut(expr);
63                }
64                m.tokens = TokenStream::new();
65                for expr in args {
66                    match expr {
67                        syn::Expr::Lit(syn::ExprLit { attrs, lit }) if attrs.is_empty() => {
68                            m.tokens.extend(lit.to_token_stream());
69                        }
70                        _ => {
71                            m.tokens.extend(quote! { #expr });
72                        }
73                    }
74                    m.tokens.extend(quote! {,});
75                }
76            }
77        }
78    }
79}
80
81impl ParseIt {
82    pub fn compile(self) -> Result<Middle, TokenStream> {
83        let mut ctx = Context {
84            parse_macros: self.config.parse_macros.clone(),
85            ..Default::default()
86        };
87
88        self.analyze_left_recursion(&mut ctx);
89        self.analyze_depends(&mut ctx);
90
91        let crate_name = match &self.config.crate_name {
92            Some(crate_name) => quote! { #crate_name },
93            None => quote! { ::parse_it },
94        };
95
96        let mut parsers = Vec::with_capacity(self.parsers.len());
97        for parser in self.parsers {
98            let parser = parser.compile(&mut ctx)?;
99            parsers.push(parser);
100        }
101
102        let mut items = self.items;
103        if !items.iter().any(|item| {
104            if let syn::Item::Type(ty) = item {
105                ty.ident == "Lexer"
106            } else {
107                false
108            }
109        }) {
110            items.push(syn::parse_quote! {
111                type Lexer<'a> = ::parse_it::CharLexer<'a>;
112            });
113        }
114
115        let middle = Middle {
116            attrs: self.attrs,
117            crate_name,
118            mod_name: self.mod_name,
119            items,
120            parsers,
121            debug: self.config.debug,
122        };
123        Ok(middle)
124    }
125
126    fn analyze_left_recursion(&self, ctx: &mut Context) {
127        for parser in &self.parsers {
128            parser.analyze_left_calls(ctx);
129        }
130
131        // left recursion is a FVS in the left_calls graph
132        for name in ctx.left_calls.keys() {
133            if ctx.left_recursion.contains(name) {
134                continue;
135            }
136            let mut stack = OrderedSet::default();
137            stack.insert(name);
138            while let Some(name) = stack.pop_back() {
139                for dep in &ctx.left_calls[name] {
140                    if ctx.left_recursion.contains(dep) {
141                        continue;
142                    }
143                    if !stack.insert(dep) || dep == name {
144                        ctx.left_recursion.insert(name.clone());
145                        break;
146                    }
147                }
148            }
149        }
150    }
151
152    fn analyze_depends(&self, ctx: &mut Context) {
153        for parser in &self.parsers {
154            parser.analyze_direct_depends(ctx);
155        }
156
157        // full dependencies are transitive closure of direct dependencies
158        for name in ctx.direct_depends.keys() {
159            let mut depends = OrderedMap::default();
160            let mut stack = vec![name];
161            while let Some(name) = stack.pop() {
162                if depends.contains_key(name) {
163                    continue;
164                }
165                depends.insert(name.clone(), ParserRef::new(name));
166                stack.extend(ctx.direct_depends[name].keys());
167            }
168            depends.remove(name);
169            ctx.depends.insert(name.clone(), depends);
170        }
171    }
172}
173
174impl Parser {
175    fn compile(self, ctx: &mut Context) -> Result<ParserImpl, TokenStream> {
176        let curr = ParserRef::new(&self.name);
177        let depends = ctx.depends[&self.name]
178            .iter()
179            .map(|(p, i)| (i.clone(), p.clone()))
180            .collect();
181        let mut parser = self.rules.0.compile(ctx)?;
182        if !self.rules.1.is_empty() {
183            parser = parser.choice_nocap(self.rules.1.into_iter().map(|rule| rule.compile(ctx)))?;
184        }
185
186        let memo = if ctx.left_recursion.contains(&self.name) {
187            MemoKind::LeftRec
188        } else {
189            MemoKind::Memorize
190        };
191
192        Ok(ParserImpl {
193            name: self.name,
194            curr,
195            parser,
196            memo,
197            vis: self.vis,
198            ret_ty: self.ty,
199            depends,
200        })
201    }
202
203    fn analyze_left_calls<'a>(&self, ctx: &'a mut Context) -> &'a HashSet<syn::Ident> {
204        ctx.left_calls
205            .entry(self.name.clone())
206            .or_insert_with(move || {
207                let mut set = HashSet::default();
208                for rule in self.rules() {
209                    set.extend(rule.left_calls());
210                }
211                set
212            })
213    }
214
215    fn analyze_direct_depends<'a>(
216        &self,
217        ctx: &'a mut Context,
218    ) -> &'a OrderedMap<syn::Ident, ParserRef> {
219        ctx.direct_depends
220            .entry(self.name.clone())
221            .or_insert_with(move || {
222                let mut depends = OrderedMap::default();
223                for rule in self.rules() {
224                    rule.production
225                        .analyze_direct_depends(&mut depends, &self.name);
226                }
227                depends
228            })
229    }
230}
231
232impl Rule {
233    fn compile(mut self, ctx: &mut Context) -> Result<Parsing, TokenStream> {
234        let mut parser = self.production.compile(ctx)?;
235
236        let mut visitor = ExprVisitor::new(ctx.parse_macros.clone());
237        visitor.visit_expr_mut(&mut self.action);
238        if visitor.referred_self {
239            parser.capture = Capture::Named(
240                Box::new(syn::Pat::Ident(syn::PatIdent {
241                    attrs: Vec::new(),
242                    by_ref: None,
243                    mutability: None,
244                    ident: visitor.self_ident,
245                    subpat: None,
246                })),
247                Box::new(parser.capture),
248            );
249        }
250
251        Ok(parser.map(self.action))
252    }
253
254    fn left_calls(&self) -> impl Iterator<Item = syn::Ident> + '_ {
255        self.production
256            .first_progress()
257            .filter_map(|part| match &part.part {
258                Atom::NonTerminal(p) => Some(p.clone()),
259                _ => None,
260            })
261    }
262}
263
264impl Production {
265    fn compile(self, ctx: &mut Context) -> Result<Parsing, TokenStream> {
266        let mut result = self.parts.0.compile(ctx)?;
267        for part in self.parts.1 {
268            let part = part.compile(ctx)?;
269            result = result.then(Box::new(part));
270        }
271        Ok(result)
272    }
273
274    /// Iterate over the parts that may "make first progress" when parsing.
275    fn first_progress(&self) -> impl Iterator<Item = &Part> {
276        let mut iter = self.parts();
277        let mut finished = false;
278        std::iter::from_fn(move || {
279            if finished {
280                return None;
281            }
282            for part in iter.by_ref() {
283                if part.part.must_progress() {
284                    finished = true;
285                    return Some(part);
286                } else if part.part.may_progress() {
287                    return Some(part);
288                }
289            }
290            finished = true;
291            None
292        })
293    }
294
295    fn analyze_direct_depends(
296        &self,
297        depends: &mut OrderedMap<syn::Ident, ParserRef>,
298        curr: &syn::Ident,
299    ) {
300        for part in self.parts() {
301            part.part.analyze_direct_depends(depends, curr);
302        }
303    }
304
305    /// Whether this production must make progress when parsing.
306    fn must_progress(&self) -> bool {
307        self.first_progress().any(|p| p.part.must_progress())
308    }
309
310    /// Whether this production may make progress when parsing.
311    fn may_progress(&self) -> bool {
312        self.first_progress().any(|p| p.part.may_progress())
313    }
314}
315
316impl Part {
317    fn compile(self, ctx: &mut Context) -> Result<Parsing, TokenStream> {
318        let mut parser = self.part.compile(ctx)?;
319        match self.capture {
320            crate::syntax::Capture::Named(name) => {
321                parser.capture = Capture::Named(name, Box::new(parser.capture));
322            }
323            crate::syntax::Capture::Loud => {
324                if !parser.capture.is_loud() {
325                    parser.capture = Capture::Loud;
326                }
327            }
328            crate::syntax::Capture::NotSpecified => {}
329        }
330        Ok(parser)
331    }
332}
333
334impl Atom {
335    fn compile(self, ctx: &mut Context) -> Result<Parsing, TokenStream> {
336        match self {
337            Atom::Terminal(lit) => Ok(Parsing::just(lit)),
338            Atom::PatTerminal(pat) => Ok(Parsing::just_pat(pat)),
339            Atom::NonTerminal(name) => {
340                let depends = ctx.depends.get(&name).ok_or_else(|| {
341                    quote_spanned! { name.span() => compile_error!("use of undeclared parser") }
342                })?;
343                let depends = depends.iter().map(|(_, p)| p.clone());
344                Ok(Parsing::call(name, depends))
345            }
346            Atom::Sub(p) => p.compile(ctx),
347            Atom::Choice(first, rest) => first
348                .compile(ctx)?
349                .choice(rest.into_iter().map(|p| p.compile(ctx))),
350            Atom::Repeat(p) => Ok(p.compile(ctx)?.repeat(0)),
351            Atom::Repeat1(p) => Ok(p.compile(ctx)?.repeat(1)),
352            Atom::Optional(p) => Ok(p.compile(ctx)?.optional()),
353            Atom::LookAhead(p) => Ok(p.compile(ctx)?.look_ahead()),
354            Atom::LookAheadNot(p) => Ok(p.compile(ctx)?.look_ahead_not()),
355        }
356    }
357
358    fn analyze_direct_depends(
359        &self,
360        depends: &mut OrderedMap<syn::Ident, ParserRef>,
361        curr: &syn::Ident,
362    ) {
363        match self {
364            Atom::NonTerminal(name) if name != curr => {
365                depends.insert(name.clone(), ParserRef::new(name));
366            }
367            Atom::Sub(p) => p.analyze_direct_depends(depends, curr),
368            Atom::Choice(first, rest) => {
369                first.analyze_direct_depends(depends, curr);
370                for p in rest {
371                    p.analyze_direct_depends(depends, curr);
372                }
373            }
374            Atom::Repeat(p)
375            | Atom::Repeat1(p)
376            | Atom::Optional(p)
377            | Atom::LookAhead(p)
378            | Atom::LookAheadNot(p) => p.analyze_direct_depends(depends, curr),
379            _ => {}
380        }
381    }
382
383    /// Whether this atom must make progress when parsing.
384    fn must_progress(&self) -> bool {
385        match self {
386            Atom::Terminal(_) | Atom::PatTerminal(_) | Atom::NonTerminal(_) => true,
387            Atom::Repeat(_) | Atom::Optional(_) | Atom::LookAhead(_) | Atom::LookAheadNot(_) => {
388                false
389            }
390            Atom::Sub(p) => p.must_progress(),
391            Atom::Choice(first, rest) => {
392                first.must_progress() && rest.iter().all(|p| p.must_progress())
393            }
394            Atom::Repeat1(p) => p.must_progress(),
395        }
396    }
397
398    /// Whether this atom may make progress when parsing.
399    fn may_progress(&self) -> bool {
400        match self {
401            Atom::Terminal(_) | Atom::PatTerminal(_) | Atom::NonTerminal(_) => true,
402            Atom::LookAhead(_) | Atom::LookAheadNot(_) => false,
403            Atom::Sub(p) => p.may_progress(),
404            Atom::Choice(first, rest) => {
405                first.may_progress() || rest.iter().any(|p| p.may_progress())
406            }
407            Atom::Repeat(p) | Atom::Repeat1(p) | Atom::Optional(p) => p.may_progress(),
408        }
409    }
410}