regex_bnf_macro/
lib.rs

1use proc_macro::TokenStream;
2use quote::quote;
3use syn::{
4    bracketed, parenthesized,
5    parse::Parse,
6    parse_macro_input,
7    punctuated::Punctuated,
8    token::{Bracket, Paren},
9    Ident, LitStr, Token,
10};
11
12struct FullBnf {
13    lines: Vec<BnfLine>,
14}
15
16impl Parse for FullBnf {
17    fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> {
18        let mut lines = Vec::new();
19
20        while !input.is_empty() {
21            let line = input.parse()?;
22            let _: Token![;] = input.parse()?;
23            lines.push(line);
24        }
25
26        Ok(Self { lines })
27    }
28}
29
30enum BnfLine {
31    Tag(BnfTag),
32    Enum(BnfEnum),
33}
34
35impl Parse for BnfLine {
36    fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> {
37        if input.peek(Token![enum]) {
38            let tag = input.parse()?;
39            Ok(Self::Enum(tag))
40        } else {
41            let tag = input.parse()?;
42            Ok(Self::Tag(tag))
43        }
44    }
45}
46
47struct BnfTag {
48    name: Ident,
49    args: BnfTagArgs,
50}
51
52impl Parse for BnfTag {
53    fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> {
54        let name = input.parse()?;
55
56        let _: Token![=] = input.parse()?;
57
58        let args = input.parse()?;
59
60        Ok(Self { name, args })
61    }
62}
63
64impl BnfTag {
65    fn as_parse_quote(&self, input: &Ident) -> proc_macro2::TokenStream {
66        let tagname = &self.name;
67        let parse = self.args.args.iter().map(|arg| {
68            if let Some(name) = &arg.name {
69                let parse_kind = arg.parse_kind.as_parse_quote(input);
70                quote! {
71                    let #name = { #parse_kind }
72                        .map_err(|err| ParseError::with_child(
73                            concat!("Failed to parse ", stringify!(#tagname)),
74                            #input.pos,
75                            err,
76                        ))?;
77                }
78            } else {
79                let parse_kind = arg.parse_kind.as_parse_quote(input);
80                quote! {
81                    { #parse_kind }
82                        .map_err(|err| ParseError::with_child(
83                            concat!("Failed to parse ", stringify!(#tagname)),
84                            #input.pos,
85                            err,
86                        ))?;
87                }
88            }
89        });
90
91        let assign = self.args.args.iter().map(|arg| {
92            if let Some(name) = &arg.name {
93                quote! { #name, }
94            } else {
95                quote! {}
96            }
97        });
98
99        let name = &self.name;
100
101        quote! { {
102            #(#parse)*
103            #name { #(#assign)* __lt: Default::default() }
104        } }
105    }
106}
107
108struct BnfEnum {
109    name: Ident,
110    args: Vec<Ident>,
111}
112
113impl BnfEnum {
114    fn as_def_quote(&self) -> proc_macro2::TokenStream {
115        let args = &self.args;
116
117        let args = args.iter().map(|arg| {
118            quote! { #arg(#arg<'a>) }
119        });
120
121        quote! {
122            {
123                #(#args),*
124            }
125        }
126    }
127
128    fn as_parse_quote(&self, input: &Ident) -> proc_macro2::TokenStream {
129        let args = &self.args;
130        let errors = Ident::new("errors", self.name.span());
131
132        let args = args.iter().map(|arg| {
133            quote! {
134                let parsed = #arg::parse(#input);
135                match parsed {
136                    Ok((parsed, input)) => {
137                        return Ok((Self::#arg(parsed), input))
138                    }
139                    Err(e) => {
140                        #errors.push(e);
141                    }
142                }
143            }
144        });
145
146        let name = &self.name;
147
148        quote! {
149            let mut #errors = Vec::new();
150            #(#args)*
151            Err(ParseError::with_children(concat!("Couldn't parse ", stringify!(#name)), #input.pos, #errors))
152        }
153    }
154}
155
156impl Parse for BnfEnum {
157    fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> {
158        let _: Token![enum] = input.parse()?;
159
160        let name = input.parse()?;
161
162        let _: Token![=] = input.parse()?;
163
164        // Parse idents punctuated with | until ;
165        let content;
166        let _ = bracketed!(content in input);
167        let idents: Punctuated<Ident, Token![|]> = content.parse_terminated(Ident::parse)?;
168
169        Ok(Self {
170            name,
171            args: idents.into_iter().collect(),
172        })
173    }
174}
175
176struct BnfTagArgs {
177    args: Vec<BnfTagNamedArg>,
178}
179
180impl BnfTagArgs {
181    fn as_def_quote(&self) -> proc_macro2::TokenStream {
182        let args = self.args.iter().map(|arg| {
183            if let Some(name) = arg.name.as_ref() {
184                let parse_kind = arg.parse_kind.as_def_quote();
185                quote! { pub #name: #parse_kind, }
186            } else {
187                quote! {}
188            }
189        });
190
191        quote! { { #(#args)* __lt: std::marker::PhantomData<&'a ()> } }
192    }
193}
194
195impl Parse for BnfTagArgs {
196    fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> {
197        let mut args = Vec::new();
198        while !input.peek(Token![;]) {
199            args.push(input.parse()?);
200        }
201
202        Ok(Self { args })
203    }
204}
205
206struct BnfTagNamedArg {
207    // is none when ident is _
208    name: Option<Ident>,
209    parse_kind: BnfTagArgKind,
210}
211
212impl Parse for BnfTagNamedArg {
213    fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> {
214        let name = if input.peek(Ident) && input.peek2(Token![:]) {
215            let name = input.parse()?;
216            let _: Token![:] = input.parse()?;
217            Some(name)
218        } else {
219            None
220        };
221
222        let parse_kind = input.parse()?;
223
224        Ok(Self { name, parse_kind })
225    }
226}
227
228enum VecRepeatKind {
229    ZeroOrMore,
230    OneOrMore,
231}
232
233enum BnfTermArgModifier {
234    Box,
235    Vec(VecRepeatKind),
236    VecUntilEof,
237    Lookahead,
238    Not,
239    Optional,
240}
241
242enum BnfTagArgKind {
243    Term {
244        name: Ident,
245        modifier: Option<BnfTermArgModifier>,
246    },
247    String(LitStr),
248    Regex(LitStr),
249    CustomFn(Ident),
250    Eof,
251}
252
253impl BnfTagArgKind {
254    fn as_def_quote(&self) -> proc_macro2::TokenStream {
255        match self {
256            Self::Term { name, modifier } => match modifier {
257                Some(BnfTermArgModifier::Box) => {
258                    quote! { Box<#name<'a>> }
259                }
260                Some(BnfTermArgModifier::Vec(_)) => {
261                    quote! { Vec<#name<'a>> }
262                }
263                Some(BnfTermArgModifier::VecUntilEof) => {
264                    quote! { Vec<#name<'a>> }
265                }
266                Some(BnfTermArgModifier::Optional) => {
267                    quote! { Option<#name<'a>> }
268                }
269                Some(BnfTermArgModifier::Lookahead) => {
270                    quote! { () }
271                }
272                Some(BnfTermArgModifier::Not) => {
273                    quote! { () }
274                }
275                None => {
276                    quote! { #name<'a> }
277                }
278            },
279            Self::String(_) => {
280                quote! { TextSlice<'a> }
281            }
282            Self::Regex(_) => {
283                quote! { TextSlice<'a> }
284            }
285            Self::CustomFn(_) => {
286                quote! { TextSlice<'a> }
287            }
288            Self::Eof => {
289                quote! { () }
290            }
291        }
292    }
293
294    fn as_parse_quote(&self, input: &Ident) -> proc_macro2::TokenStream {
295        let inner = match self {
296            Self::Term { name, modifier } => match modifier {
297                Some(BnfTermArgModifier::Box) => {
298                    quote! {
299                        match #name::parse(#input) {
300                            Ok((result, rest)) => Ok((Box::new(result), rest)),
301                            Err(err) => Err(err),
302                        }
303                    }
304                }
305                Some(BnfTermArgModifier::Vec(len_kind)) => {
306                    let length = quote! { length };
307
308                    let length_check = match len_kind {
309                        VecRepeatKind::ZeroOrMore => quote! {},
310                        VecRepeatKind::OneOrMore => quote! {
311                            if #length == 0 {
312                                return Err(ParseError::new(concat!(stringify!(#name), " expected at least one element"), #input.pos));
313                            }
314                        },
315                    };
316
317                    quote! { {
318                        let mut inp = #input;
319                        let mut vec = Vec::new();
320                        let mut #length = 0;
321                        while let Ok((result, rest)) = #name::parse(inp) {
322                            vec.push(result);
323                            inp = rest;
324                            #length += 1;
325                        }
326                        #length_check;
327                        Ok((vec, inp))
328                    } }
329                }
330                Some(BnfTermArgModifier::VecUntilEof) => {
331                    quote! { {
332                        let mut inp = #input;
333                        let mut vec = Vec::new();
334                        loop {
335                            match #name::parse(inp) {
336                                Ok((result, rest)) => {
337                                    vec.push(result);
338                                    inp = rest;
339                                }
340                                Err(err) => {
341                                    if inp.is_empty() {
342                                        break;
343                                    } else {
344                                        return Err(err);
345                                    }
346                                },
347                            }
348                        }
349                        Ok((vec, inp))
350                    } }
351                }
352                Some(BnfTermArgModifier::Optional) => {
353                    quote! {
354                        match #name::parse(#input) {
355                            Ok((result, rest)) => Ok((Some(result), rest)),
356                            Err(err) => (Ok((None, #input))),
357                        }
358                    }
359                }
360                Some(BnfTermArgModifier::Lookahead) => {
361                    quote! { {
362                        match #name::parse(#input) {
363                            Ok((result, rest)) => Ok(((), #input)),
364                            Err(err) => Err(err),
365                        }
366                    } }
367                }
368                Some(BnfTermArgModifier::Not) => {
369                    quote! { {
370                        match #name::parse(#input) {
371                            Ok((result, rest)) => Err(ParseError::new(concat!(stringify!(#name), " not allowed here"), #input.pos)),
372                            Err(err) => Ok(((), #input)),
373                        }
374                    } }
375                }
376                None => {
377                    quote! { #name::parse(#input) }
378                }
379            },
380            Self::String(lit) => {
381                quote! {
382                    parse_string_lit(#input, #lit)
383                        .map_err(|_| ParseError::new(concat!("Expected \"", #lit, "\""), #input.pos))
384                }
385            }
386            Self::Regex(lit) => {
387                quote! {
388                    parse_string_regex(#input, #lit)
389                        .map_err(|_| ParseError::new(concat!("Expected regex \"", #lit, "\""), #input.pos))
390                }
391            }
392            Self::CustomFn(ident) => {
393                quote! {
394                    #ident(#input)
395                }
396            }
397            Self::Eof => {
398                quote! {
399                    if #input.is_empty() {
400                        Ok(((), #input))
401                    } else {
402                        Err(ParseError::new("Expected end of file", #input.pos))
403                    }
404                }
405            }
406        };
407
408        quote! {
409            {
410                let r = #inner;
411                match r {
412                    Ok((result, rest)) => {
413                        #input = rest;
414                        Ok(result)
415                    }
416                    Err(err) => Err(err),
417                }
418            }
419        }
420    }
421}
422
423impl Parse for BnfTagArgKind {
424    fn parse(input: syn::parse::ParseStream) -> syn::Result<Self> {
425        if input.peek(Token![<]) {
426            let _: Token![<] = input.parse()?;
427
428            let name: Ident;
429            let modifier: Option<BnfTermArgModifier>;
430
431            if input.peek(Bracket) {
432                let content;
433                let _ = bracketed!(content in input);
434
435                name = content.parse()?;
436
437                if input.peek(Token![^]) {
438                    let _: Token![^] = input.parse()?;
439                    modifier = Some(BnfTermArgModifier::VecUntilEof)
440                } else if input.peek(Token![+]) {
441                    let _: Token![+] = input.parse()?;
442                    modifier = Some(BnfTermArgModifier::Vec(VecRepeatKind::OneOrMore))
443                } else if input.peek(Token![*]) {
444                    let _: Token![*] = input.parse()?;
445                    modifier = Some(BnfTermArgModifier::Vec(VecRepeatKind::ZeroOrMore))
446                } else {
447                    return Err(input.error("Expected +, *, or ^ after [token]"));
448                }
449            } else if input.peek(Paren) {
450                let content;
451                let _ = parenthesized!(content in input);
452
453                name = content.parse()?;
454                modifier = Some(BnfTermArgModifier::Lookahead)
455            } else if input.peek(Token![*]) {
456                let _: Token![*] = input.parse()?;
457
458                name = input.parse()?;
459                modifier = Some(BnfTermArgModifier::Box)
460            } else if input.peek(Token![?]) {
461                let _: Token![?] = input.parse()?;
462
463                name = input.parse()?;
464                modifier = Some(BnfTermArgModifier::Optional)
465            } else if input.peek(Token![!]) {
466                let _: Token![!] = input.parse()?;
467
468                name = input.parse()?;
469                modifier = Some(BnfTermArgModifier::Not)
470            } else {
471                name = input.parse()?;
472                modifier = None;
473            }
474
475            let _: Token![>] = input.parse()?;
476            Ok(Self::Term { name, modifier })
477        } else if input.peek(Paren) {
478            let content;
479            let _ = parenthesized!(content in input);
480
481            let ident = content.parse()?;
482            Ok(Self::CustomFn(ident))
483        } else if input.peek(LitStr) {
484            let string: LitStr = input.parse()?;
485
486            // I don't know if there's a better way to detect raw strings
487            if string.token().to_string().starts_with('r') {
488                Ok(Self::Regex(string))
489            } else {
490                Ok(Self::String(string))
491            }
492        } else if input.peek(Token![^]) {
493            let _: Token![^] = input.parse()?;
494
495            Ok(Self::Eof)
496        } else {
497            Err(syn::Error::new(input.span(), "invalid argument"))
498        }
499    }
500}
501
502#[proc_macro]
503pub fn bnf(input: TokenStream) -> TokenStream {
504    // Parse the input tokens into a syntax tree
505    let input = parse_macro_input!(input as FullBnf);
506
507    let mut definitions = Vec::new();
508
509    for line in input.lines {
510        let definition = match &line {
511            BnfLine::Tag(tag) => {
512                let name = &tag.name;
513                let args_quote = tag.args.as_def_quote();
514                quote! {
515                    #[derive(Debug, Clone, Hash, PartialEq)]
516                    pub struct #name <'a> #args_quote
517                }
518            }
519            BnfLine::Enum(en) => {
520                let name = &en.name;
521                let args_quote = en.as_def_quote();
522                quote! {
523                    #[derive(Debug, Clone, Hash, PartialEq)]
524                    pub enum #name <'a> #args_quote
525                }
526            }
527        };
528
529        definitions.push(definition);
530
531        let input_ident = Ident::new("input", proc_macro2::Span::call_site());
532        let definition = match &line {
533            BnfLine::Tag(tag) => {
534                let name = &tag.name;
535                let parse_quote = tag.as_parse_quote(&input_ident);
536                quote! {
537                    impl<'a> #name<'a> {
538                        pub fn parse(mut #input_ident: StringParser<'a>) -> Result<(Self, StringParser<'a>), ParseError> {
539                            Ok((#parse_quote, #input_ident))
540                        }
541
542                        pub fn parse_str(string: &'a str) -> Result<(Self, StringParser<'a>), ParseError> {
543                            let mut #input_ident = StringParser::new(string);
544                            Self::parse(#input_ident)
545                        }
546                    }
547                }
548            }
549            BnfLine::Enum(en) => {
550                let name = &en.name;
551                let parse_quote = en.as_parse_quote(&input_ident);
552                quote! {
553                    impl<'a> #name<'a> {
554                        pub fn parse(mut #input_ident: StringParser<'a>) -> Result<(Self, StringParser<'a>), ParseError> {
555                            #parse_quote
556                        }
557
558                        pub fn parse_str(string: &'a str) -> Result<(Self, StringParser<'a>), ParseError> {
559                            let mut #input_ident = StringParser::new(string);
560                            Self::parse(#input_ident)
561                        }
562                    }
563                }
564            }
565        };
566
567        definitions.push(definition);
568    }
569
570    let extras = quote! {};
571
572    let expanded = quote! {
573        #extras
574        #(#definitions)*
575    };
576
577    // eprintln!("\n\n{}\n\n", &expanded.to_string());
578
579    TokenStream::from(expanded)
580}