1use std::rc::Rc;
2
3use syn::{parse::discouraged::Speculative, punctuated::Punctuated, Attribute, Token};
4
5#[derive(Debug)]
6pub struct ParseIt {
7 pub mods: Vec<Mod>,
8}
9
10impl syn::parse::Parse for ParseIt {
11 fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> {
12 let mut mods = vec![];
13 while !input.is_empty() {
14 let mut attrs = input.call(syn::Attribute::parse_outer)?;
15
16 input.parse::<Token![mod]>()?;
17 let mod_name = input.parse::<syn::Ident>()?;
18
19 let content;
20 syn::braced!(content in input);
21
22 #[derive(Clone, Copy, PartialEq, Eq)]
23 enum ModType {
24 Parser,
25 Lexer,
26 }
27 let mut mod_types = vec![];
28 attrs.retain(|attr| {
29 if attr.path().is_ident("parser") {
30 mod_types.push(ModType::Parser);
31 return false;
32 } else if attr.path().is_ident("lexer") {
33 mod_types.push(ModType::Lexer);
34 return false;
35 }
36 true
37 });
38 let mod_type = if mod_types.is_empty() {
39 return Err(syn::Error::new_spanned(
40 mod_name,
41 "module must be marked as parser or lexer",
42 ));
43 } else if mod_types.len() == 1 {
44 mod_types[0]
45 } else {
46 return Err(syn::Error::new_spanned(
47 mod_name,
48 "module can only be marked as parser or lexer, not both",
49 ));
50 };
51 match mod_type {
52 ModType::Parser => {
53 let parser_mod = ParserMod::parse(attrs, mod_name, &content)?;
54 mods.push(Mod::Parser(parser_mod));
55 }
56 ModType::Lexer => {
57 let lexer_mod = LexerMod::parse(attrs, mod_name, &content)?;
58 mods.push(Mod::Lexer(lexer_mod));
59 }
60 }
61 }
62 Ok(Self { mods })
63 }
64}
65
66#[derive(Debug)]
67pub enum Mod {
68 Parser(ParserMod),
69 Lexer(LexerMod),
70}
71
72#[derive(Debug)]
73pub struct ParserConfig {
74 pub crate_name: Option<syn::Path>,
75 pub parse_macros: Rc<Vec<syn::Path>>,
76 pub debug: bool,
77}
78
79impl Default for ParserConfig {
80 fn default() -> Self {
81 Self {
82 crate_name: None,
83 parse_macros: Rc::new(vec![
84 syn::parse_quote! { print },
85 syn::parse_quote! { println },
86 syn::parse_quote! { eprint },
87 syn::parse_quote! { eprintln },
88 syn::parse_quote! { format },
89 syn::parse_quote! { dbg },
90 ]),
91 debug: false,
92 }
93 }
94}
95
96#[derive(Debug)]
97pub struct ParserMod {
98 pub attrs: Vec<syn::Attribute>,
99 pub mod_name: syn::Ident,
100 pub items: Vec<syn::Item>,
101 pub parsers: Vec<Parser>,
102 pub config: ParserConfig,
103}
104
105impl ParserMod {
106 fn parse(
107 attrs: Vec<Attribute>,
108 mod_name: syn::Ident,
109 content: syn::parse::ParseStream,
110 ) -> syn::Result<Self> {
111 let mut config = ParserConfig::default();
112 let mut common_attrs = vec![];
113 for attr in attrs {
114 if attr.path().is_ident("parse_it") {
115 attr.parse_nested_meta(|meta| {
116 if meta.path.is_ident("crate") {
117 let value = meta.value()?;
118 let value = value.parse::<syn::LitStr>()?;
119 config.crate_name = Some(value.parse().map_err(|_| {
120 syn::Error::new_spanned(value, "expected a valid path")
121 })?);
122 } else if meta.path.is_ident("parse_macros") {
123 let value = meta.value()?;
124 let value = value.parse::<syn::LitStr>()?;
125 config.parse_macros = Rc::new(
126 value
127 .parse_with(Punctuated::<syn::Path, Token![,]>::parse_terminated)
128 .map_err(|_| {
129 syn::Error::new_spanned(
130 value,
131 "expected a list of paths separated by commas",
132 )
133 })?
134 .into_iter()
135 .collect(),
136 );
137 } else if meta.path.is_ident("debug") {
138 let value = meta.value()?;
139 let value = value.parse::<syn::LitBool>()?;
140 config.debug = value.value;
141 } else {
142 Err(syn::Error::new_spanned(meta.path, "unknown attribute"))?
143 }
144 Ok(())
145 })?;
146 } else {
147 common_attrs.push(attr);
148 }
149 }
150
151 let mut parsers = vec![];
152 let mut items = vec![];
153 while !content.is_empty() {
154 let fork = content.fork();
155 if let Ok(parser) = fork.parse::<Parser>() {
156 content.advance_to(&fork);
157 parsers.push(parser);
158 } else {
159 let item = content.parse::<syn::Item>()?;
160 items.push(item);
161 }
162 }
163 Ok(Self {
164 attrs: common_attrs,
165 items,
166 mod_name,
167 parsers,
168 config,
169 })
170 }
171}
172
173#[derive(Debug)]
177pub struct Parser {
178 pub vis: syn::Visibility,
179 pub name: syn::Ident,
180 pub ty: syn::Type,
181 pub rules: Vec<Rule>,
182}
183
184impl syn::parse::Parse for Parser {
185 fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> {
186 let vis = input.parse::<syn::Visibility>()?;
187 let name = input.parse::<syn::Ident>()?;
188 input.parse::<Token![->]>()?;
189 let ty = input.parse::<syn::Type>()?;
190
191 let content;
192 syn::braced!(content in input);
193
194 let mut rules = vec![];
195 while !content.is_empty() {
196 let rule = content.parse::<Rule>()?;
197 rules.push(rule);
198 }
199
200 Ok(Parser {
201 vis,
202 name,
203 ty,
204 rules,
205 })
206 }
207}
208
209#[derive(Debug)]
213pub struct Rule {
214 pub production: Production,
215 pub action: syn::Expr,
216}
217
218impl syn::parse::Parse for Rule {
219 fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> {
220 let production = input.parse::<Production>()?;
221 input.parse::<Token![=>]>()?;
222 let action = input.parse::<syn::Expr>()?;
223 if (requires_comma_to_be_match_arm(&action) && !input.is_empty()) || input.peek(Token![,]) {
224 input.parse::<Token![,]>()?;
225 }
226 Ok(Rule { production, action })
227 }
228}
229
230#[derive(Debug)]
234pub struct Production {
235 pub parts: (Part, Vec<Part>),
237}
238
239impl Production {
240 pub fn parts(&self) -> impl Iterator<Item = &Part> {
241 std::iter::once(&self.parts.0).chain(self.parts.1.iter())
242 }
243}
244
245impl syn::parse::Parse for Production {
246 fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> {
247 let first_part = input.parse::<Part>()?;
248 let mut rest_parts = Vec::new();
249 while !input.peek(Token![=>]) && !input.peek(Token![|]) && !input.is_empty() {
250 rest_parts.push(input.parse::<Part>()?);
252 }
253
254 let parts = (first_part, rest_parts);
255 Ok(Production { parts })
256 }
257}
258
259#[derive(Debug)]
260pub enum Capture {
261 Named(Box<syn::Pat>),
262 Loud,
263 NotSpecified,
264}
265
266#[derive(Debug)]
270pub struct Part {
271 pub capture: Capture,
272 pub part: Atom,
273}
274
275impl syn::parse::Parse for Part {
276 fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> {
277 let fork = input.fork();
278 let capture = if let Ok(pat) = fork
279 .call(syn::Pat::parse_single)
280 .and_then(|pat| fork.parse::<Token![:]>().map(|_| pat))
281 {
282 input.advance_to(&fork);
284 Some(pat)
285 } else {
286 None
287 };
288
289 let non_slient = if input.peek(Token![@]) {
290 input.parse::<Token![@]>()?;
292 true
293 } else {
294 false
295 };
296
297 let lookahead = if input.peek(Token![&]) {
298 input.parse::<Token![&]>()?;
300 Some(true)
301 } else if input.peek(Token![!]) {
302 input.parse::<Token![!]>()?;
304 Some(false)
305 } else {
306 None
307 };
308
309 let atom = input.parse::<Atom>()?;
310 let part = if input.peek(Token![*]) {
311 input.parse::<Token![*]>()?;
313 Atom::Repeat(Box::new(atom))
314 } else if input.peek(Token![+]) {
315 input.parse::<Token![+]>()?;
317 Atom::Repeat1(Box::new(atom))
318 } else if input.peek(Token![?]) {
319 input.parse::<Token![?]>()?;
321 Atom::Optional(Box::new(atom))
322 } else {
323 atom
324 };
325
326 let part = if let Some(lookahead) = lookahead {
327 if lookahead {
328 Atom::LookAhead(Box::new(part))
329 } else {
330 Atom::LookAheadNot(Box::new(part))
331 }
332 } else {
333 part
334 };
335
336 let capture = if let Some(capture) = capture {
337 Capture::Named(Box::new(capture))
338 } else if non_slient {
339 Capture::Loud
340 } else {
341 Capture::NotSpecified
342 };
343
344 Ok(Part { capture, part })
345 }
346}
347
348#[derive(Debug)]
356pub enum Atom {
357 Terminal(syn::Lit),
358 PatTerminal(syn::Pat),
359 TypePterminal(syn::Type),
360 NonTerminal(syn::Ident),
361 Sub(Box<Production>),
362 Choice(Box<Production>, Vec<Production>),
363 Repeat(Box<Atom>),
364 Repeat1(Box<Atom>),
365 Optional(Box<Atom>),
366 LookAhead(Box<Atom>),
367 LookAheadNot(Box<Atom>),
368}
369
370impl syn::parse::Parse for Atom {
371 fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> {
372 let lookahead = input.lookahead1();
373 let atom = if lookahead.peek(syn::token::Paren) {
374 let content;
376 syn::parenthesized!(content in input);
377 Atom::Sub(Box::new(content.parse()?))
378 } else if lookahead.peek(syn::token::Bracket) {
379 let content;
381 syn::bracketed!(content in input);
382 let mut choices = content
383 .parse_terminated(Production::parse, Token![|])?
384 .into_iter();
385 let first_choice = choices
386 .next()
387 .ok_or_else(|| content.error("expected at least one choice"))?;
388 Atom::Choice(Box::new(first_choice), choices.collect())
389 } else if lookahead.peek(syn::Lit) {
390 Atom::Terminal(input.parse()?)
392 } else if lookahead.peek(Token![<]) {
393 input.parse::<Token![<]>()?;
395 let ty = input.parse::<syn::Type>()?;
396 input.parse::<Token![>]>()?;
397 Atom::TypePterminal(ty)
398 } else if lookahead.peek(syn::Ident) {
399 let fork = input.fork();
400 if let Ok(pat) = fork.call(syn::Pat::parse_single) {
401 if matches!(&pat, syn::Pat::Ident(_)) {
402 Atom::NonTerminal(input.parse()?)
404 } else {
405 input.advance_to(&fork);
407 Atom::PatTerminal(pat)
408 }
409 } else {
410 Err(lookahead.error())?
411 }
412 } else {
413 Err(lookahead.error())?
414 };
415
416 Ok(atom)
417 }
418}
419
420#[derive(Debug)]
421pub struct LexerConfig {
422 pub crate_name: Option<syn::Path>,
423 pub parse_macros: Rc<Vec<syn::Path>>,
424 pub debug: bool,
425}
426
427impl Default for LexerConfig {
428 fn default() -> Self {
429 Self {
430 crate_name: None,
431 parse_macros: Rc::new(vec![
432 syn::parse_quote! { print },
433 syn::parse_quote! { println },
434 syn::parse_quote! { eprint },
435 syn::parse_quote! { eprintln },
436 syn::parse_quote! { format },
437 syn::parse_quote! { dbg },
438 ]),
439 debug: false,
440 }
441 }
442}
443
444#[derive(Debug)]
445pub struct LexerMod {
446 pub attrs: Vec<syn::Attribute>,
447 pub mod_name: syn::Ident,
448 pub items: Vec<syn::Item>,
449 pub lexers: Vec<Lexer>,
450 pub config: ParserConfig,
451}
452
453impl LexerMod {
454 pub fn parse(
455 attrs: Vec<Attribute>,
456 mod_name: syn::Ident,
457 content: syn::parse::ParseStream,
458 ) -> syn::Result<Self> {
459 let mut common_attrs = vec![];
460 for attr in attrs {
461 if attr.path().is_ident("parse_it") {
462 attr.parse_nested_meta(|_meta| todo!())?;
463 } else {
464 common_attrs.push(attr);
465 }
466 }
467
468 let mut lexers = vec![];
469 let mut items = vec![];
470 while !content.is_empty() {
471 let fork = content.fork();
472 if let Ok(lexer) = fork.parse::<Lexer>() {
473 content.advance_to(&fork);
474 lexers.push(lexer);
475 } else {
476 let item = content.parse::<syn::Item>()?;
477 items.push(item);
478 }
479 }
480
481 Ok(Self {
482 attrs: common_attrs,
483 mod_name,
484 items,
485 lexers,
486 config: Default::default(),
487 })
488 }
489}
490
491#[derive(Debug)]
495pub struct Lexer {
496 pub vis: syn::Visibility,
497 pub name: syn::Ident,
498 pub ty: Option<syn::Type>,
499 pub inputs: Punctuated<syn::PatType, Token![,]>,
500 pub rules: Vec<LexerRule>,
501}
502
503impl syn::parse::Parse for Lexer {
504 fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> {
505 let vis = input.parse()?;
506 let name = input.parse()?;
507 let ty = if input.peek(Token![->]) {
508 input.parse::<Token![->]>()?;
509 Some(input.parse()?)
510 } else {
511 None
512 };
513
514 let inputs = if input.peek(syn::token::Paren) {
515 let content;
517 syn::parenthesized!(content in input);
518 Punctuated::<syn::PatType, Token![,]>::parse_terminated(&content)?
519 } else {
520 Punctuated::new()
521 };
522
523 let content;
524 syn::braced!(content in input);
525
526 let mut rules = vec![];
527 while !content.is_empty() {
528 let rule = content.parse::<LexerRule>()?;
529 rules.push(rule);
530 }
531
532 Ok(Self {
533 vis,
534 name,
535 ty,
536 inputs,
537 rules,
538 })
539 }
540}
541
542#[derive(Debug)]
546pub struct LexerRule {
547 pub pattern: LexerPattern,
548 pub action: syn::Expr,
549}
550
551impl syn::parse::Parse for LexerRule {
552 fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> {
553 let pattern = input.parse::<LexerPattern>()?;
554 input.parse::<Token![=>]>()?;
555 let action = input.parse::<syn::Expr>()?;
556 if (requires_comma_to_be_match_arm(&action) && !input.is_empty()) || input.peek(Token![,]) {
557 input.parse::<Token![,]>()?;
558 }
559 Ok(LexerRule { pattern, action })
560 }
561}
562
563#[derive(Debug)]
567pub enum LexerPattern {
568 Regex(syn::LitStr),
569 Name(syn::Ident),
570}
571
572impl syn::parse::Parse for LexerPattern {
573 fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> {
574 let lookahead = input.lookahead1();
575 if lookahead.peek(syn::Ident) {
576 let ident = input.parse()?;
577 Ok(Self::Name(ident))
578 } else if lookahead.peek(syn::LitStr) {
579 let regex = input.parse()?;
580 Ok(Self::Regex(regex))
581 } else {
582 Err(lookahead.error())
583 }
584 }
585}
586
587fn requires_comma_to_be_match_arm(expr: &syn::Expr) -> bool {
588 use syn::Expr;
589 !matches!(
590 expr,
591 Expr::If(_)
592 | Expr::Match(_)
593 | Expr::Block(_)
594 | Expr::Unsafe(_)
595 | Expr::While(_)
596 | Expr::Loop(_)
597 | Expr::ForLoop(_)
598 | Expr::TryBlock(_)
599 | Expr::Const(_)
600 )
601}