1use std::{rc::Rc, vec};
2
3use proc_macro2::{Span, TokenStream};
4use quote::{quote, quote_spanned};
5use syn::{spanned::Spanned, visit_mut::VisitMut};
6
7use crate::{
8 hash::{HashMap, HashSet, OrderedMap, OrderedSet},
9 parser::middle::{Capture, MemoKind, Middle, ParserImpl, ParserRef, Parsing},
10 syntax::{Atom, Parser, ParserMod, Part, Production, Rule},
11 utils::RewriteSelfVisitor,
12};
13
14#[derive(Default)]
15struct Context {
16 pub parse_macros: Rc<Vec<syn::Path>>,
17 pub left_calls: HashMap<syn::Ident, HashSet<syn::Ident>>,
18 pub left_recursion: HashSet<syn::Ident>,
19 pub direct_depends: HashMap<syn::Ident, OrderedMap<syn::Ident, ParserRef>>,
20 pub depends: HashMap<syn::Ident, OrderedMap<syn::Ident, ParserRef>>,
21}
22
23impl ParserMod {
24 pub fn compile(self) -> Result<Middle, TokenStream> {
25 let mut ctx = Context {
26 parse_macros: self.config.parse_macros.clone(),
27 ..Default::default()
28 };
29
30 self.check_missing_items(&mut ctx)?;
31 self.analyze_left_recursion(&mut ctx);
32 self.analyze_depends(&mut ctx);
33
34 let crate_name = match &self.config.crate_name {
35 Some(crate_name) => quote! { #crate_name },
36 None => quote! { ::parse_it },
37 };
38
39 let mut parsers = Vec::with_capacity(self.parsers.len());
40 for parser in self.parsers {
41 let parser = parser.compile(&mut ctx)?;
42 parsers.push(parser);
43 }
44
45 let middle = Middle {
46 attrs: self.attrs,
47 crate_name,
48 mod_name: self.mod_name,
49 items: self.items,
50 parsers,
51 debug: self.config.debug,
52 };
53 Ok(middle)
54 }
55
56 fn check_missing_items(&self, ctx: &mut Context) -> Result<(), TokenStream> {
57 let parsers = self
58 .parsers
59 .iter()
60 .map(|p| p.name.clone())
61 .collect::<HashSet<_>>();
62 for parser in &self.parsers {
63 for depend in parser.analyze_direct_depends(ctx).keys() {
64 if !parsers.contains(depend) {
65 let e = format!("Parser `{depend}` not found");
66 return Err(quote_spanned! { depend.span() => compile_error!(#e); });
67 }
68 }
69 }
70 Ok(())
71 }
72
73 fn analyze_left_recursion(&self, ctx: &mut Context) {
74 for parser in &self.parsers {
75 parser.analyze_left_calls(ctx);
76 }
77
78 for name in ctx.left_calls.keys() {
80 if ctx.left_recursion.contains(name) {
81 continue;
82 }
83 let mut stack = OrderedSet::default();
84 stack.insert(name);
85 while let Some(name) = stack.pop_back() {
86 for dep in &ctx.left_calls[name] {
87 if ctx.left_recursion.contains(dep) {
88 continue;
89 }
90 if !stack.insert(dep) || dep == name {
91 ctx.left_recursion.insert(name.clone());
92 break;
93 }
94 }
95 }
96 }
97 }
98
99 fn analyze_depends(&self, ctx: &mut Context) {
100 for parser in &self.parsers {
101 parser.analyze_direct_depends(ctx);
102 }
103
104 for name in ctx.direct_depends.keys() {
106 let mut depends = OrderedMap::default();
107 let mut stack = vec![name];
108 while let Some(name) = stack.pop() {
109 if depends.contains_key(name) {
110 continue;
111 }
112 depends.insert(name.clone(), ParserRef::new(name));
113 stack.extend(ctx.direct_depends[name].keys());
114 }
115 depends.remove(name);
116 ctx.depends.insert(name.clone(), depends);
117 }
118 }
119}
120
121impl Parser {
122 fn compile(self, ctx: &mut Context) -> Result<ParserImpl, TokenStream> {
123 let curr = ParserRef::new(&self.name);
124 let depends = ctx.depends[&self.name]
125 .iter()
126 .map(|(p, i)| (i.clone(), p.clone()))
127 .collect();
128 if self.rules.is_empty() {
129 return Err(
130 quote_spanned! { self.name.span() => compile_error!("parser must have at least one rule"); },
131 );
132 }
133 let parser = Parsing::choice_nocap(
134 self.rules.into_iter().map(|rule| rule.compile(ctx)),
135 self.name.span(),
136 )?;
137
138 let memo = if ctx.left_recursion.contains(&self.name) {
139 MemoKind::LeftRec
140 } else {
141 MemoKind::Memorize
142 };
143
144 Ok(ParserImpl {
145 name: self.name,
146 curr,
147 parser,
148 memo,
149 vis: self.vis,
150 ret_ty: self.ty,
151 depends,
152 })
153 }
154
155 fn analyze_left_calls<'a>(&self, ctx: &'a mut Context) -> &'a HashSet<syn::Ident> {
156 ctx.left_calls
157 .entry(self.name.clone())
158 .or_insert_with(move || {
159 let mut set = HashSet::default();
160 for rule in &self.rules {
161 set.extend(rule.left_calls());
162 }
163 set
164 })
165 }
166
167 fn analyze_direct_depends<'a>(
168 &self,
169 ctx: &'a mut Context,
170 ) -> &'a OrderedMap<syn::Ident, ParserRef> {
171 ctx.direct_depends
172 .entry(self.name.clone())
173 .or_insert_with(move || {
174 let mut depends = OrderedMap::default();
175 for rule in &self.rules {
176 rule.production
177 .analyze_direct_depends(&mut depends, &self.name);
178 }
179 depends
180 })
181 }
182}
183
184impl Rule {
185 fn compile(mut self, ctx: &mut Context) -> Result<Parsing, TokenStream> {
186 let span = self.action.span();
187 let mut parser = self.production.compile(ctx, span)?;
188
189 let mut visitor = RewriteSelfVisitor::new(ctx.parse_macros.clone());
190 visitor.visit_expr_mut(&mut self.action);
191 if visitor.referred_self {
192 parser.capture = Capture::Named(
193 Box::new(syn::Pat::Ident(syn::PatIdent {
194 attrs: Vec::new(),
195 by_ref: None,
196 mutability: None,
197 ident: visitor.self_ident,
198 subpat: None,
199 })),
200 Box::new(parser.capture),
201 );
202 }
203
204 Ok(parser.map(self.action))
205 }
206
207 fn left_calls(&self) -> impl Iterator<Item = syn::Ident> + '_ {
208 self.production
209 .first_progress()
210 .filter_map(|part| match &part.part {
211 Atom::NonTerminal(p) => Some(p.clone()),
212 _ => None,
213 })
214 }
215}
216
217impl Production {
218 fn compile(self, ctx: &mut Context, span: Span) -> Result<Parsing, TokenStream> {
219 let mut result = self.parts.0.compile(ctx, span)?;
220 for part in self.parts.1 {
221 let part = part.compile(ctx, span)?;
222 result = result.then(Box::new(part));
223 }
224 Ok(result)
225 }
226
227 fn first_progress(&self) -> impl Iterator<Item = &Part> {
229 let mut iter = self.parts();
230 let mut finished = false;
231 std::iter::from_fn(move || {
232 if finished {
233 return None;
234 }
235 for part in iter.by_ref() {
236 if part.part.must_progress() {
237 finished = true;
238 return Some(part);
239 } else if part.part.may_progress() {
240 return Some(part);
241 }
242 }
243 finished = true;
244 None
245 })
246 }
247
248 fn analyze_direct_depends(
249 &self,
250 depends: &mut OrderedMap<syn::Ident, ParserRef>,
251 curr: &syn::Ident,
252 ) {
253 for part in self.parts() {
254 part.part.analyze_direct_depends(depends, curr);
255 }
256 }
257
258 fn must_progress(&self) -> bool {
260 self.first_progress().any(|p| p.part.must_progress())
261 }
262
263 fn may_progress(&self) -> bool {
265 self.first_progress().any(|p| p.part.may_progress())
266 }
267}
268
269impl Part {
270 fn compile(self, ctx: &mut Context, span: Span) -> Result<Parsing, TokenStream> {
271 let mut parser = self.part.compile(ctx, span)?;
272 match self.capture {
273 crate::syntax::Capture::Named(name) => {
274 parser.capture = Capture::Named(name, Box::new(parser.capture));
275 }
276 crate::syntax::Capture::Loud => {
277 if !parser.capture.is_loud() {
278 parser.capture = Capture::Loud;
279 }
280 }
281 crate::syntax::Capture::NotSpecified => {}
282 }
283 Ok(parser)
284 }
285}
286
287impl Atom {
288 fn compile(self, ctx: &mut Context, span: Span) -> Result<Parsing, TokenStream> {
289 match self {
290 Atom::Terminal(lit) => Ok(Parsing::just(lit, span)),
291 Atom::PatTerminal(pat) => Ok(Parsing::just_pat(pat, span)),
292 Atom::TypePterminal(ty) => Ok(Parsing::just_type(ty, span)),
293 Atom::NonTerminal(name) => {
294 let depends = ctx.depends.get(&name).ok_or_else(|| {
295 quote_spanned! { name.span() => compile_error!("use of undeclared parser"); }
296 })?;
297 let depends = depends.iter().map(|(_, p)| p.clone()).collect();
298 Ok(Parsing::call(name, depends, span))
299 }
300 Atom::Sub(p) => p.compile(ctx, span),
301 Atom::Choice(first, rest) => first
302 .compile(ctx, span)?
303 .choice(rest.into_iter().map(|p| p.compile(ctx, span))),
304 Atom::Repeat(p) => Ok(p.compile(ctx, span)?.repeat(0)),
305 Atom::Repeat1(p) => Ok(p.compile(ctx, span)?.repeat(1)),
306 Atom::Optional(p) => Ok(p.compile(ctx, span)?.optional()),
307 Atom::LookAhead(p) => Ok(p.compile(ctx, span)?.look_ahead()),
308 Atom::LookAheadNot(p) => Ok(p.compile(ctx, span)?.look_ahead_not()),
309 }
310 }
311
312 fn analyze_direct_depends(
313 &self,
314 depends: &mut OrderedMap<syn::Ident, ParserRef>,
315 curr: &syn::Ident,
316 ) {
317 match self {
318 Atom::NonTerminal(name) if name != curr => {
319 depends.insert(name.clone(), ParserRef::new(name));
320 }
321 Atom::Sub(p) => p.analyze_direct_depends(depends, curr),
322 Atom::Choice(first, rest) => {
323 first.analyze_direct_depends(depends, curr);
324 for p in rest {
325 p.analyze_direct_depends(depends, curr);
326 }
327 }
328 Atom::Repeat(p)
329 | Atom::Repeat1(p)
330 | Atom::Optional(p)
331 | Atom::LookAhead(p)
332 | Atom::LookAheadNot(p) => p.analyze_direct_depends(depends, curr),
333 _ => {}
334 }
335 }
336
337 fn must_progress(&self) -> bool {
339 match self {
340 Atom::Terminal(_)
341 | Atom::PatTerminal(_)
342 | Atom::TypePterminal(_)
343 | Atom::NonTerminal(_) => true,
344 Atom::Repeat(_) | Atom::Optional(_) | Atom::LookAhead(_) | Atom::LookAheadNot(_) => {
345 false
346 }
347 Atom::Sub(p) => p.must_progress(),
348 Atom::Choice(first, rest) => {
349 first.must_progress() && rest.iter().all(|p| p.must_progress())
350 }
351 Atom::Repeat1(p) => p.must_progress(),
352 }
353 }
354
355 fn may_progress(&self) -> bool {
357 match self {
358 Atom::Terminal(_)
359 | Atom::PatTerminal(_)
360 | Atom::TypePterminal(_)
361 | Atom::NonTerminal(_) => true,
362 Atom::LookAhead(_) | Atom::LookAheadNot(_) => false,
363 Atom::Sub(p) => p.may_progress(),
364 Atom::Choice(first, rest) => {
365 first.may_progress() || rest.iter().any(|p| p.may_progress())
366 }
367 Atom::Repeat(p) | Atom::Repeat1(p) | Atom::Optional(p) => p.may_progress(),
368 }
369 }
370}