1use std::rc::Rc;
2use std::vec;
3
4use proc_macro2::{Span, TokenStream};
5use quote::{format_ident, quote, quote_spanned, ToTokens};
6use syn::punctuated::Punctuated;
7use syn::visit_mut::VisitMut;
8use syn::Token;
9
10use crate::hash::{HashMap, HashSet, OrderedMap, OrderedSet};
11use crate::middle::{Capture, MemoKind, Middle, ParserImpl, ParserRef, Parsing};
12use crate::syntax::{Atom, ParseIt, Parser, Part, Production, Rule};
13
14#[derive(Default)]
15struct Context {
16 pub parse_macros: Rc<Vec<syn::Path>>,
17 pub left_calls: HashMap<syn::Ident, HashSet<syn::Ident>>,
18 pub left_recursion: HashSet<syn::Ident>,
19 pub direct_depends: HashMap<syn::Ident, OrderedMap<syn::Ident, ParserRef>>,
20 pub depends: HashMap<syn::Ident, OrderedMap<syn::Ident, ParserRef>>,
21}
22
23struct ExprVisitor {
24 pub parse_macros: Rc<Vec<syn::Path>>,
25 pub self_ident: syn::Ident,
27 pub referred_self: bool,
29}
30
31impl ExprVisitor {
32 pub fn new(parse_macros: Rc<Vec<syn::Path>>) -> Self {
33 Self {
34 parse_macros,
35 self_ident: format_ident!("r#__self", span = Span::call_site()),
36 referred_self: false,
37 }
38 }
39}
40
41impl VisitMut for ExprVisitor {
42 fn visit_ident_mut(&mut self, i: &mut proc_macro2::Ident) {
43 if i == "self" {
44 let span = i.span();
45 *i = self.self_ident.clone();
46 i.set_span(span);
47 self.referred_self = true;
48 }
49 }
50
51 fn visit_macro_mut(&mut self, m: &mut syn::Macro) {
52 if self.parse_macros.contains(&m.path) {
53 struct MacroArgs(pub Vec<syn::Expr>);
54 impl syn::parse::Parse for MacroArgs {
55 fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> {
56 let args = Punctuated::<syn::Expr, Token![,]>::parse_terminated(input)?;
57 Ok(Self(args.into_iter().collect()))
58 }
59 }
60
61 if let Ok(MacroArgs(mut args)) = syn::parse2::<MacroArgs>(m.tokens.clone()) {
62 for expr in args.iter_mut() {
63 self.visit_expr_mut(expr);
64 }
65 m.tokens = TokenStream::new();
66 for expr in args {
67 match expr {
68 syn::Expr::Lit(syn::ExprLit { attrs, lit }) if attrs.is_empty() => {
69 m.tokens.extend(lit.to_token_stream());
70 }
71 _ => {
72 m.tokens.extend(quote! { #expr });
73 }
74 }
75 m.tokens.extend(quote! {,});
76 }
77 }
78 }
79 }
80}
81
82impl ParseIt {
83 pub fn compile(self) -> Result<Middle, TokenStream> {
84 let mut ctx = Context {
85 parse_macros: self.config.parse_macros.clone(),
86 ..Default::default()
87 };
88
89 self.analyze_left_recursion(&mut ctx);
90 self.analyze_depends(&mut ctx);
91
92 let crate_name = match &self.config.crate_name {
93 Some(crate_name) => quote! { #crate_name },
94 None => quote! { ::parse_it },
95 };
96 let mut middle = Middle {
97 attrs: self.attrs,
98 crate_name,
99 mod_name: self.mod_name,
100 items: self.items,
101 parsers: vec![],
102 };
103 for parser in self.parsers {
104 let parser = parser.compile(&mut ctx)?;
105 middle.parsers.push(parser);
106 }
107
108 Ok(middle)
109 }
110
111 fn analyze_left_recursion(&self, ctx: &mut Context) {
112 for parser in &self.parsers {
113 parser.analyze_left_calls(ctx);
114 }
115
116 for name in ctx.left_calls.keys() {
118 if ctx.left_recursion.contains(name) {
119 continue;
120 }
121 let mut stack = OrderedSet::default();
122 stack.insert(name);
123 while let Some(name) = stack.pop_back() {
124 for dep in &ctx.left_calls[name] {
125 if ctx.left_recursion.contains(dep) {
126 continue;
127 }
128 if !stack.insert(dep) || dep == name {
129 ctx.left_recursion.insert(name.clone());
130 break;
131 }
132 }
133 }
134 }
135 }
136
137 fn analyze_depends(&self, ctx: &mut Context) {
138 for parser in &self.parsers {
139 parser.analyze_direct_depends(ctx);
140 }
141
142 for name in ctx.direct_depends.keys() {
144 let mut depends = OrderedMap::default();
145 let mut stack = vec![name];
146 while let Some(name) = stack.pop() {
147 if depends.contains_key(name) {
148 continue;
149 }
150 depends.insert(name.clone(), ParserRef::new(name));
151 stack.extend(ctx.direct_depends[name].keys());
152 }
153 depends.remove(name);
154 ctx.depends.insert(name.clone(), depends);
155 }
156 }
157}
158
159impl Parser {
160 fn compile(self, ctx: &mut Context) -> Result<ParserImpl, TokenStream> {
161 let curr = ParserRef::new(&self.name);
162 let depends = ctx.depends[&self.name]
163 .iter()
164 .map(|(p, i)| (i.clone(), p.clone()))
165 .collect();
166 let mut parser = self.rules.0.compile(ctx)?;
167 if !self.rules.1.is_empty() {
168 parser = parser.choice_nocap(self.rules.1.into_iter().map(|rule| rule.compile(ctx)))?;
169 }
170
171 let memo = if ctx.left_recursion.contains(&self.name) {
172 MemoKind::LeftRec
173 } else {
174 MemoKind::Memorize
175 };
176
177 Ok(ParserImpl {
178 name: self.name,
179 curr,
180 parser,
181 memo,
182 vis: self.vis,
183 ret_ty: self.ty,
184 depends,
185 })
186 }
187
188 fn analyze_left_calls<'a>(&self, ctx: &'a mut Context) -> &'a HashSet<syn::Ident> {
189 ctx.left_calls
190 .entry(self.name.clone())
191 .or_insert_with(move || {
192 let mut set = HashSet::default();
193 for rule in self.rules() {
194 set.extend(rule.left_calls());
195 }
196 set
197 })
198 }
199
200 fn analyze_direct_depends<'a>(
201 &self,
202 ctx: &'a mut Context,
203 ) -> &'a OrderedMap<syn::Ident, ParserRef> {
204 ctx.direct_depends
205 .entry(self.name.clone())
206 .or_insert_with(move || {
207 let mut depends = OrderedMap::default();
208 for rule in self.rules() {
209 rule.production
210 .analyze_direct_depends(&mut depends, &self.name);
211 }
212 depends
213 })
214 }
215}
216
217impl Rule {
218 fn compile(mut self, ctx: &mut Context) -> Result<Parsing, TokenStream> {
219 let mut parser = self.production.compile(ctx)?;
220
221 let mut visitor = ExprVisitor::new(ctx.parse_macros.clone());
222 visitor.visit_expr_mut(&mut self.action);
223 if visitor.referred_self {
224 parser.capture = Capture::Named(
225 Box::new(syn::Pat::Ident(syn::PatIdent {
226 attrs: Vec::new(),
227 by_ref: None,
228 mutability: None,
229 ident: visitor.self_ident,
230 subpat: None,
231 })),
232 Box::new(parser.capture),
233 );
234 }
235
236 Ok(parser.map(self.action))
237 }
238
239 fn left_calls(&self) -> impl Iterator<Item = syn::Ident> + '_ {
240 self.production
241 .first_progress()
242 .filter_map(|part| match &part.part {
243 Atom::NonTerminal(p) => Some(p.clone()),
244 _ => None,
245 })
246 }
247}
248
249impl Production {
250 fn compile(self, ctx: &mut Context) -> Result<Parsing, TokenStream> {
251 let mut result = self.parts.0.compile(ctx)?;
252 for part in self.parts.1 {
253 let part = part.compile(ctx)?;
254 result = result.then(Box::new(part));
255 }
256 Ok(result)
257 }
258
259 fn first_progress(&self) -> impl Iterator<Item = &Part> {
261 let mut iter = self.parts();
262 let mut finished = false;
263 std::iter::from_fn(move || {
264 if finished {
265 return None;
266 }
267 for part in iter.by_ref() {
268 if part.part.must_progress() {
269 finished = true;
270 return Some(part);
271 } else if part.part.may_progress() {
272 return Some(part);
273 }
274 }
275 finished = true;
276 None
277 })
278 }
279
280 fn analyze_direct_depends(
281 &self,
282 depends: &mut OrderedMap<syn::Ident, ParserRef>,
283 curr: &syn::Ident,
284 ) {
285 for part in self.parts() {
286 part.part.analyze_direct_depends(depends, curr);
287 }
288 }
289
290 fn must_progress(&self) -> bool {
292 self.first_progress().any(|p| p.part.must_progress())
293 }
294
295 fn may_progress(&self) -> bool {
297 self.first_progress().any(|p| p.part.may_progress())
298 }
299}
300
301impl Part {
302 fn compile(self, ctx: &mut Context) -> Result<Parsing, TokenStream> {
303 let mut parser = self.part.compile(ctx)?;
304 match self.capture {
305 crate::syntax::Capture::Named(name) => {
306 parser.capture = Capture::Named(name, Box::new(parser.capture));
307 }
308 crate::syntax::Capture::Loud => {
309 if !parser.capture.is_loud() {
310 parser.capture = Capture::Loud;
311 }
312 }
313 crate::syntax::Capture::NotSpecified => {}
314 }
315 Ok(parser)
316 }
317}
318
319impl Atom {
320 fn compile(self, ctx: &mut Context) -> Result<Parsing, TokenStream> {
321 match self {
322 Atom::Terminal(lit) => match lit {
323 syn::Lit::Char(c) => Ok(Parsing::just(c.value())),
324 _ => Err(quote_spanned! { lit.span() => compile_error!("unsupported literal") })?,
325 },
326 Atom::NonTerminal(name) => {
327 let depends = ctx.depends.get(&name).ok_or_else(|| {
328 quote_spanned! { name.span() => compile_error!("use of undeclared parser") }
329 })?;
330 let depends = depends.iter().map(|(_, p)| p.clone());
331 Ok(Parsing::call(name, depends))
332 }
333 Atom::Sub(p) => p.compile(ctx),
334 Atom::Choice(first, rest) => first
335 .compile(ctx)?
336 .choice(rest.into_iter().map(|p| p.compile(ctx))),
337 Atom::Repeat(p) => Ok(p.compile(ctx)?.repeat(0)),
338 Atom::Repeat1(p) => Ok(p.compile(ctx)?.repeat(1)),
339 Atom::Optional(p) => Ok(p.compile(ctx)?.optional()),
340 Atom::LookAhead(p) => Ok(p.compile(ctx)?.look_ahead()),
341 Atom::LookAheadNot(p) => Ok(p.compile(ctx)?.look_ahead_not()),
342 }
343 }
344
345 fn analyze_direct_depends(
346 &self,
347 depends: &mut OrderedMap<syn::Ident, ParserRef>,
348 curr: &syn::Ident,
349 ) {
350 match self {
351 Atom::NonTerminal(name) if name != curr => {
352 depends.insert(name.clone(), ParserRef::new(name));
353 }
354 Atom::Sub(p) => p.analyze_direct_depends(depends, curr),
355 Atom::Choice(first, rest) => {
356 first.analyze_direct_depends(depends, curr);
357 for p in rest {
358 p.analyze_direct_depends(depends, curr);
359 }
360 }
361 Atom::Repeat(p)
362 | Atom::Repeat1(p)
363 | Atom::Optional(p)
364 | Atom::LookAhead(p)
365 | Atom::LookAheadNot(p) => p.analyze_direct_depends(depends, curr),
366 _ => {}
367 }
368 }
369
370 fn must_progress(&self) -> bool {
372 match self {
373 Atom::Terminal(_) | Atom::NonTerminal(_) => true,
374 Atom::Repeat(_) | Atom::Optional(_) | Atom::LookAhead(_) | Atom::LookAheadNot(_) => {
375 false
376 }
377 Atom::Sub(p) => p.must_progress(),
378 Atom::Choice(first, rest) => {
379 first.must_progress() && rest.iter().all(|p| p.must_progress())
380 }
381 Atom::Repeat1(p) => p.must_progress(),
382 }
383 }
384
385 fn may_progress(&self) -> bool {
387 match self {
388 Atom::Terminal(_) | Atom::NonTerminal(_) => true,
389 Atom::LookAhead(_) | Atom::LookAheadNot(_) => false,
390 Atom::Sub(p) => p.may_progress(),
391 Atom::Choice(first, rest) => {
392 first.may_progress() || rest.iter().any(|p| p.may_progress())
393 }
394 Atom::Repeat(p) | Atom::Repeat1(p) | Atom::Optional(p) => p.may_progress(),
395 }
396 }
397}