parse_it_codegen/parser/
frontend.rs

1use std::{rc::Rc, vec};
2
3use proc_macro2::{Span, TokenStream};
4use quote::{quote, quote_spanned};
5use syn::{spanned::Spanned, visit_mut::VisitMut};
6
7use crate::{
8    hash::{HashMap, HashSet, OrderedMap, OrderedSet},
9    parser::middle::{Capture, MemoKind, Middle, ParserImpl, ParserRef, Parsing},
10    syntax::{Atom, Parser, ParserMod, Part, Production, Rule},
11    utils::RewriteSelfVisitor,
12};
13
14#[derive(Default)]
15struct Context {
16    pub parse_macros: Rc<Vec<syn::Path>>,
17    pub left_calls: HashMap<syn::Ident, HashSet<syn::Ident>>,
18    pub left_recursion: HashSet<syn::Ident>,
19    pub direct_depends: HashMap<syn::Ident, OrderedMap<syn::Ident, ParserRef>>,
20    pub depends: HashMap<syn::Ident, OrderedMap<syn::Ident, ParserRef>>,
21}
22
23impl ParserMod {
24    pub fn compile(self) -> Result<Middle, TokenStream> {
25        let mut ctx = Context {
26            parse_macros: self.config.parse_macros.clone(),
27            ..Default::default()
28        };
29
30        self.check_missing_items(&mut ctx)?;
31        self.analyze_left_recursion(&mut ctx);
32        self.analyze_depends(&mut ctx);
33
34        let crate_name = match &self.config.crate_name {
35            Some(crate_name) => quote! { #crate_name },
36            None => quote! { ::parse_it },
37        };
38
39        let mut parsers = Vec::with_capacity(self.parsers.len());
40        for parser in self.parsers {
41            let parser = parser.compile(&mut ctx)?;
42            parsers.push(parser);
43        }
44
45        let middle = Middle {
46            attrs: self.attrs,
47            crate_name,
48            mod_name: self.mod_name,
49            items: self.items,
50            parsers,
51            debug: self.config.debug,
52        };
53        Ok(middle)
54    }
55
56    fn check_missing_items(&self, ctx: &mut Context) -> Result<(), TokenStream> {
57        let parsers = self
58            .parsers
59            .iter()
60            .map(|p| p.name.clone())
61            .collect::<HashSet<_>>();
62        for parser in &self.parsers {
63            for depend in parser.analyze_direct_depends(ctx).keys() {
64                if !parsers.contains(depend) {
65                    let e = format!("Parser `{depend}` not found");
66                    return Err(quote_spanned! { depend.span() => compile_error!(#e); });
67                }
68            }
69        }
70        Ok(())
71    }
72
73    fn analyze_left_recursion(&self, ctx: &mut Context) {
74        for parser in &self.parsers {
75            parser.analyze_left_calls(ctx);
76        }
77
78        // left recursion is a FVS in the left_calls graph
79        for name in ctx.left_calls.keys() {
80            if ctx.left_recursion.contains(name) {
81                continue;
82            }
83            let mut stack = OrderedSet::default();
84            stack.insert(name);
85            while let Some(name) = stack.pop_back() {
86                for dep in &ctx.left_calls[name] {
87                    if ctx.left_recursion.contains(dep) {
88                        continue;
89                    }
90                    if !stack.insert(dep) || dep == name {
91                        ctx.left_recursion.insert(name.clone());
92                        break;
93                    }
94                }
95            }
96        }
97    }
98
99    fn analyze_depends(&self, ctx: &mut Context) {
100        for parser in &self.parsers {
101            parser.analyze_direct_depends(ctx);
102        }
103
104        // full dependencies are transitive closure of direct dependencies
105        for name in ctx.direct_depends.keys() {
106            let mut depends = OrderedMap::default();
107            let mut stack = vec![name];
108            while let Some(name) = stack.pop() {
109                if depends.contains_key(name) {
110                    continue;
111                }
112                depends.insert(name.clone(), ParserRef::new(name));
113                stack.extend(ctx.direct_depends[name].keys());
114            }
115            depends.remove(name);
116            ctx.depends.insert(name.clone(), depends);
117        }
118    }
119}
120
121impl Parser {
122    fn compile(self, ctx: &mut Context) -> Result<ParserImpl, TokenStream> {
123        let curr = ParserRef::new(&self.name);
124        let depends = ctx.depends[&self.name]
125            .iter()
126            .map(|(p, i)| (i.clone(), p.clone()))
127            .collect();
128        if self.rules.is_empty() {
129            return Err(
130                quote_spanned! { self.name.span() => compile_error!("parser must have at least one rule"); },
131            );
132        }
133        let parser = Parsing::choice_nocap(
134            self.rules.into_iter().map(|rule| rule.compile(ctx)),
135            self.name.span(),
136        )?;
137
138        let memo = if ctx.left_recursion.contains(&self.name) {
139            MemoKind::LeftRec
140        } else {
141            MemoKind::Memorize
142        };
143
144        Ok(ParserImpl {
145            name: self.name,
146            curr,
147            parser,
148            memo,
149            vis: self.vis,
150            ret_ty: self.ty,
151            depends,
152        })
153    }
154
155    fn analyze_left_calls<'a>(&self, ctx: &'a mut Context) -> &'a HashSet<syn::Ident> {
156        ctx.left_calls
157            .entry(self.name.clone())
158            .or_insert_with(move || {
159                let mut set = HashSet::default();
160                for rule in &self.rules {
161                    set.extend(rule.left_calls());
162                }
163                set
164            })
165    }
166
167    fn analyze_direct_depends<'a>(
168        &self,
169        ctx: &'a mut Context,
170    ) -> &'a OrderedMap<syn::Ident, ParserRef> {
171        ctx.direct_depends
172            .entry(self.name.clone())
173            .or_insert_with(move || {
174                let mut depends = OrderedMap::default();
175                for rule in &self.rules {
176                    rule.production
177                        .analyze_direct_depends(&mut depends, &self.name);
178                }
179                depends
180            })
181    }
182}
183
184impl Rule {
185    fn compile(mut self, ctx: &mut Context) -> Result<Parsing, TokenStream> {
186        let span = self.action.span();
187        let mut parser = self.production.compile(ctx, span)?;
188
189        let mut visitor = RewriteSelfVisitor::new(ctx.parse_macros.clone());
190        visitor.visit_expr_mut(&mut self.action);
191        if visitor.referred_self {
192            parser.capture = Capture::Named(
193                Box::new(syn::Pat::Ident(syn::PatIdent {
194                    attrs: Vec::new(),
195                    by_ref: None,
196                    mutability: None,
197                    ident: visitor.self_ident,
198                    subpat: None,
199                })),
200                Box::new(parser.capture),
201            );
202        }
203
204        Ok(parser.map(self.action))
205    }
206
207    fn left_calls(&self) -> impl Iterator<Item = syn::Ident> + '_ {
208        self.production
209            .first_progress()
210            .filter_map(|part| match &part.part {
211                Atom::NonTerminal(p) => Some(p.clone()),
212                _ => None,
213            })
214    }
215}
216
217impl Production {
218    fn compile(self, ctx: &mut Context, span: Span) -> Result<Parsing, TokenStream> {
219        let mut result = self.parts.0.compile(ctx, span)?;
220        for part in self.parts.1 {
221            let part = part.compile(ctx, span)?;
222            result = result.then(Box::new(part));
223        }
224        Ok(result)
225    }
226
227    /// Iterate over the parts that may "make first progress" when parsing.
228    fn first_progress(&self) -> impl Iterator<Item = &Part> {
229        let mut iter = self.parts();
230        let mut finished = false;
231        std::iter::from_fn(move || {
232            if finished {
233                return None;
234            }
235            for part in iter.by_ref() {
236                if part.part.must_progress() {
237                    finished = true;
238                    return Some(part);
239                } else if part.part.may_progress() {
240                    return Some(part);
241                }
242            }
243            finished = true;
244            None
245        })
246    }
247
248    fn analyze_direct_depends(
249        &self,
250        depends: &mut OrderedMap<syn::Ident, ParserRef>,
251        curr: &syn::Ident,
252    ) {
253        for part in self.parts() {
254            part.part.analyze_direct_depends(depends, curr);
255        }
256    }
257
258    /// Whether this production must make progress when parsing.
259    fn must_progress(&self) -> bool {
260        self.first_progress().any(|p| p.part.must_progress())
261    }
262
263    /// Whether this production may make progress when parsing.
264    fn may_progress(&self) -> bool {
265        self.first_progress().any(|p| p.part.may_progress())
266    }
267}
268
269impl Part {
270    fn compile(self, ctx: &mut Context, span: Span) -> Result<Parsing, TokenStream> {
271        let mut parser = self.part.compile(ctx, span)?;
272        match self.capture {
273            crate::syntax::Capture::Named(name) => {
274                parser.capture = Capture::Named(name, Box::new(parser.capture));
275            }
276            crate::syntax::Capture::Loud => {
277                if !parser.capture.is_loud() {
278                    parser.capture = Capture::Loud;
279                }
280            }
281            crate::syntax::Capture::NotSpecified => {}
282        }
283        Ok(parser)
284    }
285}
286
287impl Atom {
288    fn compile(self, ctx: &mut Context, span: Span) -> Result<Parsing, TokenStream> {
289        match self {
290            Atom::Terminal(lit) => Ok(Parsing::just(lit, span)),
291            Atom::PatTerminal(pat) => Ok(Parsing::just_pat(pat, span)),
292            Atom::TypePterminal(ty) => Ok(Parsing::just_type(ty, span)),
293            Atom::NonTerminal(name) => {
294                let depends = ctx.depends.get(&name).ok_or_else(|| {
295                    quote_spanned! { name.span() => compile_error!("use of undeclared parser"); }
296                })?;
297                let depends = depends.iter().map(|(_, p)| p.clone()).collect();
298                Ok(Parsing::call(name, depends, span))
299            }
300            Atom::Sub(p) => p.compile(ctx, span),
301            Atom::Choice(first, rest) => first
302                .compile(ctx, span)?
303                .choice(rest.into_iter().map(|p| p.compile(ctx, span))),
304            Atom::Repeat(p) => Ok(p.compile(ctx, span)?.repeat(0)),
305            Atom::Repeat1(p) => Ok(p.compile(ctx, span)?.repeat(1)),
306            Atom::Optional(p) => Ok(p.compile(ctx, span)?.optional()),
307            Atom::LookAhead(p) => Ok(p.compile(ctx, span)?.look_ahead()),
308            Atom::LookAheadNot(p) => Ok(p.compile(ctx, span)?.look_ahead_not()),
309        }
310    }
311
312    fn analyze_direct_depends(
313        &self,
314        depends: &mut OrderedMap<syn::Ident, ParserRef>,
315        curr: &syn::Ident,
316    ) {
317        match self {
318            Atom::NonTerminal(name) if name != curr => {
319                depends.insert(name.clone(), ParserRef::new(name));
320            }
321            Atom::Sub(p) => p.analyze_direct_depends(depends, curr),
322            Atom::Choice(first, rest) => {
323                first.analyze_direct_depends(depends, curr);
324                for p in rest {
325                    p.analyze_direct_depends(depends, curr);
326                }
327            }
328            Atom::Repeat(p)
329            | Atom::Repeat1(p)
330            | Atom::Optional(p)
331            | Atom::LookAhead(p)
332            | Atom::LookAheadNot(p) => p.analyze_direct_depends(depends, curr),
333            _ => {}
334        }
335    }
336
337    /// Whether this atom must make progress when parsing.
338    fn must_progress(&self) -> bool {
339        match self {
340            Atom::Terminal(_)
341            | Atom::PatTerminal(_)
342            | Atom::TypePterminal(_)
343            | Atom::NonTerminal(_) => true,
344            Atom::Repeat(_) | Atom::Optional(_) | Atom::LookAhead(_) | Atom::LookAheadNot(_) => {
345                false
346            }
347            Atom::Sub(p) => p.must_progress(),
348            Atom::Choice(first, rest) => {
349                first.must_progress() && rest.iter().all(|p| p.must_progress())
350            }
351            Atom::Repeat1(p) => p.must_progress(),
352        }
353    }
354
355    /// Whether this atom may make progress when parsing.
356    fn may_progress(&self) -> bool {
357        match self {
358            Atom::Terminal(_)
359            | Atom::PatTerminal(_)
360            | Atom::TypePterminal(_)
361            | Atom::NonTerminal(_) => true,
362            Atom::LookAhead(_) | Atom::LookAheadNot(_) => false,
363            Atom::Sub(p) => p.may_progress(),
364            Atom::Choice(first, rest) => {
365                first.may_progress() || rest.iter().any(|p| p.may_progress())
366            }
367            Atom::Repeat(p) | Atom::Repeat1(p) | Atom::Optional(p) => p.may_progress(),
368        }
369    }
370}