1use std::{rc::Rc, vec};
2
3use proc_macro2::{Span, TokenStream};
4use quote::{format_ident, quote, quote_spanned, ToTokens};
5use syn::{punctuated::Punctuated, visit_mut::VisitMut, Token};
6
7use crate::{
8 hash::{HashMap, HashSet, OrderedMap, OrderedSet},
9 middle::{Capture, MemoKind, Middle, ParserImpl, ParserRef, Parsing},
10 syntax::{Atom, ParseIt, Parser, Part, Production, Rule},
11};
12
13#[derive(Default)]
14struct Context {
15 pub parse_macros: Rc<Vec<syn::Path>>,
16 pub left_calls: HashMap<syn::Ident, HashSet<syn::Ident>>,
17 pub left_recursion: HashSet<syn::Ident>,
18 pub direct_depends: HashMap<syn::Ident, OrderedMap<syn::Ident, ParserRef>>,
19 pub depends: HashMap<syn::Ident, OrderedMap<syn::Ident, ParserRef>>,
20}
21
22struct ExprVisitor {
23 pub parse_macros: Rc<Vec<syn::Path>>,
24 pub self_ident: syn::Ident,
26 pub referred_self: bool,
28}
29
30impl ExprVisitor {
31 pub fn new(parse_macros: Rc<Vec<syn::Path>>) -> Self {
32 Self {
33 parse_macros,
34 self_ident: format_ident!("r#__self", span = Span::call_site()),
35 referred_self: false,
36 }
37 }
38}
39
40impl VisitMut for ExprVisitor {
41 fn visit_ident_mut(&mut self, i: &mut proc_macro2::Ident) {
42 if i == "self" {
43 let span = i.span();
44 *i = self.self_ident.clone();
45 i.set_span(span);
46 self.referred_self = true;
47 }
48 }
49
50 fn visit_macro_mut(&mut self, m: &mut syn::Macro) {
51 if self.parse_macros.contains(&m.path) {
52 struct MacroArgs(pub Vec<syn::Expr>);
53 impl syn::parse::Parse for MacroArgs {
54 fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> {
55 let args = Punctuated::<syn::Expr, Token![,]>::parse_terminated(input)?;
56 Ok(Self(args.into_iter().collect()))
57 }
58 }
59
60 if let Ok(MacroArgs(mut args)) = syn::parse2::<MacroArgs>(m.tokens.clone()) {
61 for expr in args.iter_mut() {
62 self.visit_expr_mut(expr);
63 }
64 m.tokens = TokenStream::new();
65 for expr in args {
66 match expr {
67 syn::Expr::Lit(syn::ExprLit { attrs, lit }) if attrs.is_empty() => {
68 m.tokens.extend(lit.to_token_stream());
69 }
70 _ => {
71 m.tokens.extend(quote! { #expr });
72 }
73 }
74 m.tokens.extend(quote! {,});
75 }
76 }
77 }
78 }
79}
80
81impl ParseIt {
82 pub fn compile(self) -> Result<Middle, TokenStream> {
83 let mut ctx = Context {
84 parse_macros: self.config.parse_macros.clone(),
85 ..Default::default()
86 };
87
88 self.analyze_left_recursion(&mut ctx);
89 self.analyze_depends(&mut ctx);
90
91 let crate_name = match &self.config.crate_name {
92 Some(crate_name) => quote! { #crate_name },
93 None => quote! { ::parse_it },
94 };
95
96 let mut parsers = Vec::with_capacity(self.parsers.len());
97 for parser in self.parsers {
98 let parser = parser.compile(&mut ctx)?;
99 parsers.push(parser);
100 }
101
102 let mut items = self.items;
103 if !items.iter().any(|item| {
104 if let syn::Item::Type(ty) = item {
105 ty.ident == "Lexer"
106 } else {
107 false
108 }
109 }) {
110 items.push(syn::parse_quote! {
111 type Lexer<'a> = ::parse_it::CharLexer<'a>;
112 });
113 }
114
115 let middle = Middle {
116 attrs: self.attrs,
117 crate_name,
118 mod_name: self.mod_name,
119 items,
120 parsers,
121 debug: self.config.debug,
122 };
123 Ok(middle)
124 }
125
126 fn analyze_left_recursion(&self, ctx: &mut Context) {
127 for parser in &self.parsers {
128 parser.analyze_left_calls(ctx);
129 }
130
131 for name in ctx.left_calls.keys() {
133 if ctx.left_recursion.contains(name) {
134 continue;
135 }
136 let mut stack = OrderedSet::default();
137 stack.insert(name);
138 while let Some(name) = stack.pop_back() {
139 for dep in &ctx.left_calls[name] {
140 if ctx.left_recursion.contains(dep) {
141 continue;
142 }
143 if !stack.insert(dep) || dep == name {
144 ctx.left_recursion.insert(name.clone());
145 break;
146 }
147 }
148 }
149 }
150 }
151
152 fn analyze_depends(&self, ctx: &mut Context) {
153 for parser in &self.parsers {
154 parser.analyze_direct_depends(ctx);
155 }
156
157 for name in ctx.direct_depends.keys() {
159 let mut depends = OrderedMap::default();
160 let mut stack = vec![name];
161 while let Some(name) = stack.pop() {
162 if depends.contains_key(name) {
163 continue;
164 }
165 depends.insert(name.clone(), ParserRef::new(name));
166 stack.extend(ctx.direct_depends[name].keys());
167 }
168 depends.remove(name);
169 ctx.depends.insert(name.clone(), depends);
170 }
171 }
172}
173
174impl Parser {
175 fn compile(self, ctx: &mut Context) -> Result<ParserImpl, TokenStream> {
176 let curr = ParserRef::new(&self.name);
177 let depends = ctx.depends[&self.name]
178 .iter()
179 .map(|(p, i)| (i.clone(), p.clone()))
180 .collect();
181 let mut parser = self.rules.0.compile(ctx)?;
182 if !self.rules.1.is_empty() {
183 parser = parser.choice_nocap(self.rules.1.into_iter().map(|rule| rule.compile(ctx)))?;
184 }
185
186 let memo = if ctx.left_recursion.contains(&self.name) {
187 MemoKind::LeftRec
188 } else {
189 MemoKind::Memorize
190 };
191
192 Ok(ParserImpl {
193 name: self.name,
194 curr,
195 parser,
196 memo,
197 vis: self.vis,
198 ret_ty: self.ty,
199 depends,
200 })
201 }
202
203 fn analyze_left_calls<'a>(&self, ctx: &'a mut Context) -> &'a HashSet<syn::Ident> {
204 ctx.left_calls
205 .entry(self.name.clone())
206 .or_insert_with(move || {
207 let mut set = HashSet::default();
208 for rule in self.rules() {
209 set.extend(rule.left_calls());
210 }
211 set
212 })
213 }
214
215 fn analyze_direct_depends<'a>(
216 &self,
217 ctx: &'a mut Context,
218 ) -> &'a OrderedMap<syn::Ident, ParserRef> {
219 ctx.direct_depends
220 .entry(self.name.clone())
221 .or_insert_with(move || {
222 let mut depends = OrderedMap::default();
223 for rule in self.rules() {
224 rule.production
225 .analyze_direct_depends(&mut depends, &self.name);
226 }
227 depends
228 })
229 }
230}
231
232impl Rule {
233 fn compile(mut self, ctx: &mut Context) -> Result<Parsing, TokenStream> {
234 let mut parser = self.production.compile(ctx)?;
235
236 let mut visitor = ExprVisitor::new(ctx.parse_macros.clone());
237 visitor.visit_expr_mut(&mut self.action);
238 if visitor.referred_self {
239 parser.capture = Capture::Named(
240 Box::new(syn::Pat::Ident(syn::PatIdent {
241 attrs: Vec::new(),
242 by_ref: None,
243 mutability: None,
244 ident: visitor.self_ident,
245 subpat: None,
246 })),
247 Box::new(parser.capture),
248 );
249 }
250
251 Ok(parser.map(self.action))
252 }
253
254 fn left_calls(&self) -> impl Iterator<Item = syn::Ident> + '_ {
255 self.production
256 .first_progress()
257 .filter_map(|part| match &part.part {
258 Atom::NonTerminal(p) => Some(p.clone()),
259 _ => None,
260 })
261 }
262}
263
264impl Production {
265 fn compile(self, ctx: &mut Context) -> Result<Parsing, TokenStream> {
266 let mut result = self.parts.0.compile(ctx)?;
267 for part in self.parts.1 {
268 let part = part.compile(ctx)?;
269 result = result.then(Box::new(part));
270 }
271 Ok(result)
272 }
273
274 fn first_progress(&self) -> impl Iterator<Item = &Part> {
276 let mut iter = self.parts();
277 let mut finished = false;
278 std::iter::from_fn(move || {
279 if finished {
280 return None;
281 }
282 for part in iter.by_ref() {
283 if part.part.must_progress() {
284 finished = true;
285 return Some(part);
286 } else if part.part.may_progress() {
287 return Some(part);
288 }
289 }
290 finished = true;
291 None
292 })
293 }
294
295 fn analyze_direct_depends(
296 &self,
297 depends: &mut OrderedMap<syn::Ident, ParserRef>,
298 curr: &syn::Ident,
299 ) {
300 for part in self.parts() {
301 part.part.analyze_direct_depends(depends, curr);
302 }
303 }
304
305 fn must_progress(&self) -> bool {
307 self.first_progress().any(|p| p.part.must_progress())
308 }
309
310 fn may_progress(&self) -> bool {
312 self.first_progress().any(|p| p.part.may_progress())
313 }
314}
315
316impl Part {
317 fn compile(self, ctx: &mut Context) -> Result<Parsing, TokenStream> {
318 let mut parser = self.part.compile(ctx)?;
319 match self.capture {
320 crate::syntax::Capture::Named(name) => {
321 parser.capture = Capture::Named(name, Box::new(parser.capture));
322 }
323 crate::syntax::Capture::Loud => {
324 if !parser.capture.is_loud() {
325 parser.capture = Capture::Loud;
326 }
327 }
328 crate::syntax::Capture::NotSpecified => {}
329 }
330 Ok(parser)
331 }
332}
333
334impl Atom {
335 fn compile(self, ctx: &mut Context) -> Result<Parsing, TokenStream> {
336 match self {
337 Atom::Terminal(lit) => Ok(Parsing::just(lit)),
338 Atom::PatTerminal(pat) => Ok(Parsing::just_pat(pat)),
339 Atom::NonTerminal(name) => {
340 let depends = ctx.depends.get(&name).ok_or_else(|| {
341 quote_spanned! { name.span() => compile_error!("use of undeclared parser") }
342 })?;
343 let depends = depends.iter().map(|(_, p)| p.clone());
344 Ok(Parsing::call(name, depends))
345 }
346 Atom::Sub(p) => p.compile(ctx),
347 Atom::Choice(first, rest) => first
348 .compile(ctx)?
349 .choice(rest.into_iter().map(|p| p.compile(ctx))),
350 Atom::Repeat(p) => Ok(p.compile(ctx)?.repeat(0)),
351 Atom::Repeat1(p) => Ok(p.compile(ctx)?.repeat(1)),
352 Atom::Optional(p) => Ok(p.compile(ctx)?.optional()),
353 Atom::LookAhead(p) => Ok(p.compile(ctx)?.look_ahead()),
354 Atom::LookAheadNot(p) => Ok(p.compile(ctx)?.look_ahead_not()),
355 }
356 }
357
358 fn analyze_direct_depends(
359 &self,
360 depends: &mut OrderedMap<syn::Ident, ParserRef>,
361 curr: &syn::Ident,
362 ) {
363 match self {
364 Atom::NonTerminal(name) if name != curr => {
365 depends.insert(name.clone(), ParserRef::new(name));
366 }
367 Atom::Sub(p) => p.analyze_direct_depends(depends, curr),
368 Atom::Choice(first, rest) => {
369 first.analyze_direct_depends(depends, curr);
370 for p in rest {
371 p.analyze_direct_depends(depends, curr);
372 }
373 }
374 Atom::Repeat(p)
375 | Atom::Repeat1(p)
376 | Atom::Optional(p)
377 | Atom::LookAhead(p)
378 | Atom::LookAheadNot(p) => p.analyze_direct_depends(depends, curr),
379 _ => {}
380 }
381 }
382
383 fn must_progress(&self) -> bool {
385 match self {
386 Atom::Terminal(_) | Atom::PatTerminal(_) | Atom::NonTerminal(_) => true,
387 Atom::Repeat(_) | Atom::Optional(_) | Atom::LookAhead(_) | Atom::LookAheadNot(_) => {
388 false
389 }
390 Atom::Sub(p) => p.must_progress(),
391 Atom::Choice(first, rest) => {
392 first.must_progress() && rest.iter().all(|p| p.must_progress())
393 }
394 Atom::Repeat1(p) => p.must_progress(),
395 }
396 }
397
398 fn may_progress(&self) -> bool {
400 match self {
401 Atom::Terminal(_) | Atom::PatTerminal(_) | Atom::NonTerminal(_) => true,
402 Atom::LookAhead(_) | Atom::LookAheadNot(_) => false,
403 Atom::Sub(p) => p.may_progress(),
404 Atom::Choice(first, rest) => {
405 first.may_progress() || rest.iter().any(|p| p.may_progress())
406 }
407 Atom::Repeat(p) | Atom::Repeat1(p) | Atom::Optional(p) => p.may_progress(),
408 }
409 }
410}