parse_it_codegen/
frontend.rs

1use std::rc::Rc;
2use std::vec;
3
4use proc_macro2::{Span, TokenStream};
5use quote::{format_ident, quote, quote_spanned, ToTokens};
6use syn::punctuated::Punctuated;
7use syn::visit_mut::VisitMut;
8use syn::Token;
9
10use crate::hash::{HashMap, HashSet, OrderedMap, OrderedSet};
11use crate::middle::{Capture, MemoKind, Middle, ParserImpl, ParserRef, Parsing};
12use crate::syntax::{Atom, ParseIt, Parser, Part, Production, Rule};
13
14#[derive(Default)]
15struct Context {
16    pub parse_macros: Rc<Vec<syn::Path>>,
17    pub left_calls: HashMap<syn::Ident, HashSet<syn::Ident>>,
18    pub left_recursion: HashSet<syn::Ident>,
19    pub direct_depends: HashMap<syn::Ident, OrderedMap<syn::Ident, ParserRef>>,
20    pub depends: HashMap<syn::Ident, OrderedMap<syn::Ident, ParserRef>>,
21}
22
23struct ExprVisitor {
24    pub parse_macros: Rc<Vec<syn::Path>>,
25    /// replace `self` with this ident
26    pub self_ident: syn::Ident,
27    /// whether `self` is referred
28    pub referred_self: bool,
29}
30
31impl ExprVisitor {
32    pub fn new(parse_macros: Rc<Vec<syn::Path>>) -> Self {
33        Self {
34            parse_macros,
35            self_ident: format_ident!("r#__self", span = Span::call_site()),
36            referred_self: false,
37        }
38    }
39}
40
41impl VisitMut for ExprVisitor {
42    fn visit_ident_mut(&mut self, i: &mut proc_macro2::Ident) {
43        if i == "self" {
44            let span = i.span();
45            *i = self.self_ident.clone();
46            i.set_span(span);
47            self.referred_self = true;
48        }
49    }
50
51    fn visit_macro_mut(&mut self, m: &mut syn::Macro) {
52        if self.parse_macros.contains(&m.path) {
53            struct MacroArgs(pub Vec<syn::Expr>);
54            impl syn::parse::Parse for MacroArgs {
55                fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> {
56                    let args = Punctuated::<syn::Expr, Token![,]>::parse_terminated(input)?;
57                    Ok(Self(args.into_iter().collect()))
58                }
59            }
60
61            if let Ok(MacroArgs(mut args)) = syn::parse2::<MacroArgs>(m.tokens.clone()) {
62                for expr in args.iter_mut() {
63                    self.visit_expr_mut(expr);
64                }
65                m.tokens = TokenStream::new();
66                for expr in args {
67                    match expr {
68                        syn::Expr::Lit(syn::ExprLit { attrs, lit }) if attrs.is_empty() => {
69                            m.tokens.extend(lit.to_token_stream());
70                        }
71                        _ => {
72                            m.tokens.extend(quote! { #expr });
73                        }
74                    }
75                    m.tokens.extend(quote! {,});
76                }
77            }
78        }
79    }
80}
81
82impl ParseIt {
83    pub fn compile(self) -> Result<Middle, TokenStream> {
84        let mut ctx = Context {
85            parse_macros: self.config.parse_macros.clone(),
86            ..Default::default()
87        };
88
89        self.analyze_left_recursion(&mut ctx);
90        self.analyze_depends(&mut ctx);
91
92        let crate_name = match &self.config.crate_name {
93            Some(crate_name) => quote! { #crate_name },
94            None => quote! { ::parse_it },
95        };
96        let mut middle = Middle {
97            attrs: self.attrs,
98            crate_name,
99            mod_name: self.mod_name,
100            items: self.items,
101            parsers: vec![],
102        };
103        for parser in self.parsers {
104            let parser = parser.compile(&mut ctx)?;
105            middle.parsers.push(parser);
106        }
107
108        Ok(middle)
109    }
110
111    fn analyze_left_recursion(&self, ctx: &mut Context) {
112        for parser in &self.parsers {
113            parser.analyze_left_calls(ctx);
114        }
115
116        // left recursion is a FVS in the left_calls graph
117        for name in ctx.left_calls.keys() {
118            if ctx.left_recursion.contains(name) {
119                continue;
120            }
121            let mut stack = OrderedSet::default();
122            stack.insert(name);
123            while let Some(name) = stack.pop_back() {
124                for dep in &ctx.left_calls[name] {
125                    if ctx.left_recursion.contains(dep) {
126                        continue;
127                    }
128                    if !stack.insert(dep) || dep == name {
129                        ctx.left_recursion.insert(name.clone());
130                        break;
131                    }
132                }
133            }
134        }
135    }
136
137    fn analyze_depends(&self, ctx: &mut Context) {
138        for parser in &self.parsers {
139            parser.analyze_direct_depends(ctx);
140        }
141
142        // full dependencies are transitive closure of direct dependencies
143        for name in ctx.direct_depends.keys() {
144            let mut depends = OrderedMap::default();
145            let mut stack = vec![name];
146            while let Some(name) = stack.pop() {
147                if depends.contains_key(name) {
148                    continue;
149                }
150                depends.insert(name.clone(), ParserRef::new(name));
151                stack.extend(ctx.direct_depends[name].keys());
152            }
153            depends.remove(name);
154            ctx.depends.insert(name.clone(), depends);
155        }
156    }
157}
158
159impl Parser {
160    fn compile(self, ctx: &mut Context) -> Result<ParserImpl, TokenStream> {
161        let curr = ParserRef::new(&self.name);
162        let depends = ctx.depends[&self.name]
163            .iter()
164            .map(|(p, i)| (i.clone(), p.clone()))
165            .collect();
166        let mut parser = self.rules.0.compile(ctx)?;
167        if !self.rules.1.is_empty() {
168            parser = parser.choice_nocap(self.rules.1.into_iter().map(|rule| rule.compile(ctx)))?;
169        }
170
171        let memo = if ctx.left_recursion.contains(&self.name) {
172            MemoKind::LeftRec
173        } else {
174            MemoKind::Memorize
175        };
176
177        Ok(ParserImpl {
178            name: self.name,
179            curr,
180            parser,
181            memo,
182            vis: self.vis,
183            ret_ty: self.ty,
184            depends,
185        })
186    }
187
188    fn analyze_left_calls<'a>(&self, ctx: &'a mut Context) -> &'a HashSet<syn::Ident> {
189        ctx.left_calls
190            .entry(self.name.clone())
191            .or_insert_with(move || {
192                let mut set = HashSet::default();
193                for rule in self.rules() {
194                    set.extend(rule.left_calls());
195                }
196                set
197            })
198    }
199
200    fn analyze_direct_depends<'a>(
201        &self,
202        ctx: &'a mut Context,
203    ) -> &'a OrderedMap<syn::Ident, ParserRef> {
204        ctx.direct_depends
205            .entry(self.name.clone())
206            .or_insert_with(move || {
207                let mut depends = OrderedMap::default();
208                for rule in self.rules() {
209                    rule.production
210                        .analyze_direct_depends(&mut depends, &self.name);
211                }
212                depends
213            })
214    }
215}
216
217impl Rule {
218    fn compile(mut self, ctx: &mut Context) -> Result<Parsing, TokenStream> {
219        let mut parser = self.production.compile(ctx)?;
220
221        let mut visitor = ExprVisitor::new(ctx.parse_macros.clone());
222        visitor.visit_expr_mut(&mut self.action);
223        if visitor.referred_self {
224            parser.capture = Capture::Named(
225                Box::new(syn::Pat::Ident(syn::PatIdent {
226                    attrs: Vec::new(),
227                    by_ref: None,
228                    mutability: None,
229                    ident: visitor.self_ident,
230                    subpat: None,
231                })),
232                Box::new(parser.capture),
233            );
234        }
235
236        Ok(parser.map(self.action))
237    }
238
239    fn left_calls(&self) -> impl Iterator<Item = syn::Ident> + '_ {
240        self.production
241            .first_progress()
242            .filter_map(|part| match &part.part {
243                Atom::NonTerminal(p) => Some(p.clone()),
244                _ => None,
245            })
246    }
247}
248
249impl Production {
250    fn compile(self, ctx: &mut Context) -> Result<Parsing, TokenStream> {
251        let mut result = self.parts.0.compile(ctx)?;
252        for part in self.parts.1 {
253            let part = part.compile(ctx)?;
254            result = result.then(Box::new(part));
255        }
256        Ok(result)
257    }
258
259    /// Iterate over the parts that may "make first progress" when parsing.
260    fn first_progress(&self) -> impl Iterator<Item = &Part> {
261        let mut iter = self.parts();
262        let mut finished = false;
263        std::iter::from_fn(move || {
264            if finished {
265                return None;
266            }
267            for part in iter.by_ref() {
268                if part.part.must_progress() {
269                    finished = true;
270                    return Some(part);
271                } else if part.part.may_progress() {
272                    return Some(part);
273                }
274            }
275            finished = true;
276            None
277        })
278    }
279
280    fn analyze_direct_depends(
281        &self,
282        depends: &mut OrderedMap<syn::Ident, ParserRef>,
283        curr: &syn::Ident,
284    ) {
285        for part in self.parts() {
286            part.part.analyze_direct_depends(depends, curr);
287        }
288    }
289
290    /// Whether this production must make progress when parsing.
291    fn must_progress(&self) -> bool {
292        self.first_progress().any(|p| p.part.must_progress())
293    }
294
295    /// Whether this production may make progress when parsing.
296    fn may_progress(&self) -> bool {
297        self.first_progress().any(|p| p.part.may_progress())
298    }
299}
300
301impl Part {
302    fn compile(self, ctx: &mut Context) -> Result<Parsing, TokenStream> {
303        let mut parser = self.part.compile(ctx)?;
304        match self.capture {
305            crate::syntax::Capture::Named(name) => {
306                parser.capture = Capture::Named(name, Box::new(parser.capture));
307            }
308            crate::syntax::Capture::Loud => {
309                if !parser.capture.is_loud() {
310                    parser.capture = Capture::Loud;
311                }
312            }
313            crate::syntax::Capture::NotSpecified => {}
314        }
315        Ok(parser)
316    }
317}
318
319impl Atom {
320    fn compile(self, ctx: &mut Context) -> Result<Parsing, TokenStream> {
321        match self {
322            Atom::Terminal(lit) => match lit {
323                syn::Lit::Char(c) => Ok(Parsing::just(c.value())),
324                _ => Err(quote_spanned! { lit.span() => compile_error!("unsupported literal") })?,
325            },
326            Atom::NonTerminal(name) => {
327                let depends = ctx.depends.get(&name).ok_or_else(|| {
328                    quote_spanned! { name.span() => compile_error!("use of undeclared parser") }
329                })?;
330                let depends = depends.iter().map(|(_, p)| p.clone());
331                Ok(Parsing::call(name, depends))
332            }
333            Atom::Sub(p) => p.compile(ctx),
334            Atom::Choice(first, rest) => first
335                .compile(ctx)?
336                .choice(rest.into_iter().map(|p| p.compile(ctx))),
337            Atom::Repeat(p) => Ok(p.compile(ctx)?.repeat(0)),
338            Atom::Repeat1(p) => Ok(p.compile(ctx)?.repeat(1)),
339            Atom::Optional(p) => Ok(p.compile(ctx)?.optional()),
340            Atom::LookAhead(p) => Ok(p.compile(ctx)?.look_ahead()),
341            Atom::LookAheadNot(p) => Ok(p.compile(ctx)?.look_ahead_not()),
342        }
343    }
344
345    fn analyze_direct_depends(
346        &self,
347        depends: &mut OrderedMap<syn::Ident, ParserRef>,
348        curr: &syn::Ident,
349    ) {
350        match self {
351            Atom::NonTerminal(name) if name != curr => {
352                depends.insert(name.clone(), ParserRef::new(name));
353            }
354            Atom::Sub(p) => p.analyze_direct_depends(depends, curr),
355            Atom::Choice(first, rest) => {
356                first.analyze_direct_depends(depends, curr);
357                for p in rest {
358                    p.analyze_direct_depends(depends, curr);
359                }
360            }
361            Atom::Repeat(p)
362            | Atom::Repeat1(p)
363            | Atom::Optional(p)
364            | Atom::LookAhead(p)
365            | Atom::LookAheadNot(p) => p.analyze_direct_depends(depends, curr),
366            _ => {}
367        }
368    }
369
370    /// Whether this atom must make progress when parsing.
371    fn must_progress(&self) -> bool {
372        match self {
373            Atom::Terminal(_) | Atom::NonTerminal(_) => true,
374            Atom::Repeat(_) | Atom::Optional(_) | Atom::LookAhead(_) | Atom::LookAheadNot(_) => {
375                false
376            }
377            Atom::Sub(p) => p.must_progress(),
378            Atom::Choice(first, rest) => {
379                first.must_progress() && rest.iter().all(|p| p.must_progress())
380            }
381            Atom::Repeat1(p) => p.must_progress(),
382        }
383    }
384
385    /// Whether this atom may make progress when parsing.
386    fn may_progress(&self) -> bool {
387        match self {
388            Atom::Terminal(_) | Atom::NonTerminal(_) => true,
389            Atom::LookAhead(_) | Atom::LookAheadNot(_) => false,
390            Atom::Sub(p) => p.may_progress(),
391            Atom::Choice(first, rest) => {
392                first.may_progress() || rest.iter().any(|p| p.may_progress())
393            }
394            Atom::Repeat(p) | Atom::Repeat1(p) | Atom::Optional(p) => p.may_progress(),
395        }
396    }
397}