Skip to main content

syn_grammar_model/
parser.rs

1// Moved from macros/src/parser.rs
2use proc_macro2::TokenStream;
3use syn::parse::{Parse, ParseStream};
4use syn::{token, Attribute, Generics, Ident, ItemUse, Lit, Result, Token, Type};
5
6mod rt {
7    use syn::ext::IdentExt;
8    use syn::parse::discouraged::Speculative;
9    use syn::parse::ParseStream;
10    use syn::Result;
11
12    pub fn attempt<T>(
13        input: ParseStream,
14        parser: impl FnOnce(ParseStream) -> Result<T>,
15    ) -> Result<Option<T>> {
16        let fork = input.fork();
17        match parser(&fork) {
18            Ok(res) => {
19                input.advance_to(&fork);
20                Ok(Some(res))
21            }
22            Err(_) => Ok(None),
23        }
24    }
25
26    pub fn parse_ident(input: ParseStream) -> Result<syn::Ident> {
27        input.call(syn::Ident::parse_any)
28    }
29}
30
31pub mod kw {
32    syn::custom_keyword!(grammar);
33    syn::custom_keyword!(rule);
34    syn::custom_keyword!(paren);
35    syn::custom_keyword!(recover);
36    syn::custom_keyword!(peek);
37    syn::custom_keyword!(not);
38}
39
40pub struct GrammarDefinition {
41    pub name: Ident,
42    pub inherits: Option<InheritanceSpec>,
43    pub uses: Vec<ItemUse>,
44    pub rules: Vec<Rule>,
45}
46
47impl Parse for GrammarDefinition {
48    fn parse(input: ParseStream) -> Result<Self> {
49        let _ = input.parse::<kw::grammar>()?;
50        let name = rt::parse_ident(input)?;
51
52        let inherits = if input.peek(Token![:]) {
53            Some(input.parse::<InheritanceSpec>()?)
54        } else {
55            None
56        };
57
58        let content;
59        let _ = syn::braced!(content in input);
60
61        let mut uses = Vec::new();
62        while content.peek(Token![use]) {
63            uses.push(content.parse()?);
64        }
65
66        let rules = Rule::parse_all(&content)?;
67
68        Ok(GrammarDefinition {
69            name,
70            inherits,
71            uses,
72            rules,
73        })
74    }
75}
76
77pub struct InheritanceSpec {
78    pub name: Ident,
79}
80
81impl Parse for InheritanceSpec {
82    fn parse(input: ParseStream) -> Result<Self> {
83        let _ = input.parse::<Token![:]>()?;
84        let name = rt::parse_ident(input)?;
85        Ok(InheritanceSpec { name })
86    }
87}
88
89pub struct RuleParameter {
90    pub name: Ident,
91    pub ty: Option<Type>,
92}
93
94impl Parse for RuleParameter {
95    fn parse(input: ParseStream) -> Result<Self> {
96        let name: Ident = input.parse()?;
97        let ty = if input.peek(Token![:]) {
98            let _ = input.parse::<Token![:]>()?;
99            Some(input.parse()?)
100        } else {
101            None
102        };
103        Ok(RuleParameter { name, ty })
104    }
105}
106
107pub struct Rule {
108    pub attrs: Vec<Attribute>,
109    pub is_pub: Option<Token![pub]>,
110    pub name: Ident,
111    pub generics: Generics,
112    pub params: Vec<RuleParameter>,
113    pub return_type: Type,
114    pub variants: Vec<RuleVariant>,
115}
116
117impl Parse for Rule {
118    fn parse(input: ParseStream) -> Result<Self> {
119        let attrs = Attribute::parse_outer(input)?;
120
121        let is_pub = if input.peek(Token![pub]) {
122            Some(input.parse()?)
123        } else {
124            None
125        };
126
127        let _ = input.parse::<kw::rule>()?;
128        let name = rt::parse_ident(input)?;
129
130        // Parse generics if present (e.g., <T, U>)
131        let generics: Generics = input.parse()?;
132
133        let params = if input.peek(token::Paren) {
134            let content;
135            syn::parenthesized!(content in input);
136            let mut params = Vec::new();
137            while !content.is_empty() {
138                params.push(content.parse()?);
139                if content.peek(Token![,]) {
140                    let _ = content.parse::<Token![,]>()?;
141                }
142            }
143            params
144        } else {
145            Vec::new()
146        };
147
148        let _ = input.parse::<Token![->]>()?;
149        let return_type = input.parse::<Type>()?;
150        let _ = input.parse::<Token![=]>()?;
151
152        let variants = RuleVariant::parse_list(input)?;
153
154        Ok(Rule {
155            attrs,
156            is_pub,
157            name,
158            generics,
159            params,
160            return_type,
161            variants,
162        })
163    }
164}
165
166impl Rule {
167    pub fn parse_all(input: ParseStream) -> Result<Vec<Self>> {
168        let mut rules = Vec::new();
169        while !input.is_empty() {
170            rules.push(input.parse()?);
171        }
172        Ok(rules)
173    }
174}
175
176pub struct RuleVariant {
177    pub pattern: Vec<Pattern>,
178    pub action: TokenStream,
179}
180
181impl RuleVariant {
182    pub fn parse_list(input: ParseStream) -> Result<Vec<Self>> {
183        let mut variants = Vec::new();
184        loop {
185            let mut pattern = Vec::new();
186            while !input.peek(Token![->]) && !input.peek(Token![|]) {
187                pattern.push(input.parse()?);
188            }
189
190            let _ = input.parse::<Token![->]>()?;
191
192            let content;
193            syn::braced!(content in input);
194            let action = content.parse()?;
195
196            variants.push(RuleVariant { pattern, action });
197
198            if input.peek(Token![|]) {
199                let _ = input.parse::<Token![|]>()?;
200            } else {
201                break;
202            }
203        }
204        Ok(variants)
205    }
206}
207
208#[derive(Debug, Clone)]
209pub enum Pattern {
210    Cut(Token![=>]),
211    Lit {
212        binding: Option<Ident>,
213        lit: Lit,
214    },
215    RuleCall {
216        binding: Option<Ident>,
217        rule_name: Ident,
218        args: Vec<Pattern>,
219    },
220    Group(Vec<Vec<Pattern>>, token::Paren),
221    Bracketed(Vec<Pattern>, token::Bracket),
222    Braced(Vec<Pattern>, token::Brace),
223    Parenthesized(Vec<Pattern>, kw::paren, token::Paren),
224    Optional(Box<Pattern>, Token![?]),
225    Repeat(Box<Pattern>, Token![*]),
226    Plus(Box<Pattern>, Token![+]),
227    SpanBinding(Box<Pattern>, Ident, Token![@]),
228    Recover {
229        binding: Option<Ident>,
230        body: Box<Pattern>,
231        sync: Box<Pattern>,
232        kw_token: kw::recover,
233    },
234    Peek(Box<Pattern>, kw::peek),
235    Not(Box<Pattern>, kw::not),
236}
237
238impl Parse for Pattern {
239    fn parse(input: ParseStream) -> Result<Self> {
240        let mut pat = parse_atom(input)?;
241
242        loop {
243            if input.peek(Token![*]) {
244                let token = input.parse::<Token![*]>()?;
245                pat = Pattern::Repeat(Box::new(pat), token);
246            } else if input.peek(Token![+]) {
247                let token = input.parse::<Token![+]>()?;
248                pat = Pattern::Plus(Box::new(pat), token);
249            } else if input.peek(Token![?]) {
250                let token = input.parse::<Token![?]>()?;
251                pat = Pattern::Optional(Box::new(pat), token);
252            } else if input.peek(Token![@]) {
253                let token = input.parse::<Token![@]>()?;
254                let ident = input.parse::<Ident>()?;
255                pat = Pattern::SpanBinding(Box::new(pat), ident, token);
256            } else {
257                break;
258            }
259        }
260        Ok(pat)
261    }
262}
263
264fn parse_atom(input: ParseStream) -> Result<Pattern> {
265    // 1. Check for binding
266    let binding = rt::attempt(input, |input| {
267        let id: Ident = input.parse()?;
268        let _ = input.parse::<Token![:]>()?;
269        Ok(id)
270    })?;
271
272    if input.peek(Token![=>]) {
273        if binding.is_some() {
274            return Err(input.error("Cut operator cannot be bound."));
275        }
276        let token = input.parse::<Token![=>]>()?;
277        Ok(Pattern::Cut(token))
278    } else if input.peek(Lit) {
279        Ok(Pattern::Lit {
280            binding,
281            lit: input.parse()?,
282        })
283    } else if input.peek(token::Bracket) {
284        if binding.is_some() {
285            return Err(input.error("Bracketed groups cannot be bound directly."));
286        }
287        let content;
288        let token = syn::bracketed!(content in input);
289        Ok(Pattern::Bracketed(parse_pattern_list(&content)?, token))
290    } else if input.peek(token::Brace) {
291        if binding.is_some() {
292            return Err(input.error("Braced groups cannot be bound directly."));
293        }
294        let content;
295        let token = syn::braced!(content in input);
296        Ok(Pattern::Braced(parse_pattern_list(&content)?, token))
297    } else if input.peek(kw::paren) {
298        if binding.is_some() {
299            return Err(input.error("Parenthesized groups cannot be bound directly."));
300        }
301        let kw = input.parse::<kw::paren>()?;
302        let content;
303        let token = syn::parenthesized!(content in input);
304        Ok(Pattern::Parenthesized(
305            parse_pattern_list(&content)?,
306            kw,
307            token,
308        ))
309    } else if input.peek(token::Paren) {
310        if binding.is_some() {
311            return Err(input.error("Groups cannot be bound directly."));
312        }
313        let content;
314        let token = syn::parenthesized!(content in input);
315        Ok(Pattern::Group(parse_group_content(&content)?, token))
316    } else if input.peek(kw::recover) {
317        let kw_token = input.parse::<kw::recover>()?;
318        let content;
319        syn::parenthesized!(content in input);
320        let body = content.parse()?;
321        let _ = content.parse::<Token![,]>()?;
322        let sync = content.parse()?;
323        Ok(Pattern::Recover {
324            binding,
325            body: Box::new(body),
326            sync: Box::new(sync),
327            kw_token,
328        })
329    } else if input.peek(kw::peek) {
330        if binding.is_some() {
331            return Err(input.error("Peek cannot be bound."));
332        }
333        let kw_token = input.parse::<kw::peek>()?;
334        let content;
335        syn::parenthesized!(content in input);
336        let inner = content.parse()?;
337        Ok(Pattern::Peek(Box::new(inner), kw_token))
338    } else if input.peek(kw::not) {
339        if binding.is_some() {
340            return Err(input.error("Not cannot be bound."));
341        }
342        let kw_token = input.parse::<kw::not>()?;
343        let content;
344        syn::parenthesized!(content in input);
345        let inner = content.parse()?;
346        Ok(Pattern::Not(Box::new(inner), kw_token))
347    } else {
348        let rule_name: Ident = rt::parse_ident(input)?;
349        let args = parse_args(input)?;
350        Ok(Pattern::RuleCall {
351            binding,
352            rule_name,
353            args,
354        })
355    }
356}
357
358fn parse_args(input: ParseStream) -> Result<Vec<Pattern>> {
359    let mut args = Vec::new();
360    if input.peek(token::Paren) {
361        let content;
362        syn::parenthesized!(content in input);
363        while !content.is_empty() {
364            args.push(content.parse()?);
365            if content.peek(Token![,]) {
366                let _ = content.parse::<Token![,]>()?;
367            }
368        }
369    }
370    Ok(args)
371}
372
373fn parse_pattern_list(input: ParseStream) -> Result<Vec<Pattern>> {
374    let mut list = Vec::new();
375    while !input.is_empty() {
376        list.push(input.parse()?);
377    }
378    Ok(list)
379}
380
381fn parse_group_content(input: ParseStream) -> Result<Vec<Vec<Pattern>>> {
382    let mut alts = Vec::new();
383    loop {
384        let mut seq = Vec::new();
385        while !input.is_empty() && !input.peek(Token![|]) {
386            seq.push(input.parse()?);
387        }
388        alts.push(seq);
389        if input.peek(Token![|]) {
390            let _ = input.parse::<Token![|]>()?;
391        } else {
392            break;
393        }
394    }
395    Ok(alts)
396}