rust_sitter_macro/
lib.rs

1use quote::ToTokens;
2use syn::{parse_macro_input, ItemMod};
3
4mod errors;
5mod expansion;
6use expansion::*;
7
8#[proc_macro_attribute]
9/// Marks the top level AST node where parsing should start.
10///
11/// ## Example
12/// ```ignore
13/// #[rust_sitter::language]
14/// pub struct Code {
15///     ...
16/// }
17/// ```
18pub fn language(
19    _attr: proc_macro::TokenStream,
20    item: proc_macro::TokenStream,
21) -> proc_macro::TokenStream {
22    item
23}
24
25#[proc_macro_attribute]
26/// This annotation marks a node as extra, which can safely be skipped while parsing.
27/// This is useful for handling whitespace/newlines/comments.
28///
29/// ## Example
30/// ```ignore
31/// #[rust_sitter::extra]
32/// struct Whitespace {
33///     #[rust_sitter::leaf(pattern = r"\s")]
34///     _whitespace: (),
35/// }
36/// ```
37pub fn extra(
38    _attr: proc_macro::TokenStream,
39    item: proc_macro::TokenStream,
40) -> proc_macro::TokenStream {
41    item
42}
43
44#[proc_macro_attribute]
45/// Defines a field which matches a specific token in the source string.
46/// The token can be defined by passing one of two arguments
47/// - `text`: a string literal that will be exactly matched
48/// - `pattern`: a regular expression that will be matched against the source string
49///
50/// If the resulting token needs to be converted into a richer type at runtime,
51/// such as a number, then the `transform` argument can be used to specify a function
52/// that will be called with the token's text.
53///
54/// The attribute can also be applied to a struct or enum variant with no fields.
55///
56/// ## Examples
57///
58/// Using the `leaf` attribute on a field:
59/// ```ignore
60/// Number(
61///     #[rust_sitter::leaf(pattern = r"\d+", transform = |v| v.parse().unwrap())]
62///     u32
63/// )
64/// ```
65///
66/// Using the attribute on a unit struct or unit enum variant:
67/// ```ignore
68/// #[rust_sitter::leaf(text = "9")]
69/// struct BigDigit;
70///
71/// enum SmallDigit {
72///     #[rust_sitter::leaf(text = "0")]
73///     Zero,
74///     #[rust_sitter::leaf(text = "1")]
75///     One,
76/// }
77/// ```
78///
79pub fn leaf(
80    _attr: proc_macro::TokenStream,
81    item: proc_macro::TokenStream,
82) -> proc_macro::TokenStream {
83    item
84}
85
86#[proc_macro_attribute]
87/// Defines a field that does not correspond to anything in the input string,
88/// such as some metadata. Takes a single, unnamed argument, which is the value
89/// used to populate the field at runtime.
90///
91/// ## Example
92/// ```ignore
93/// struct MyNode {
94///    ...,
95///    #[rust_sitter::skip(false)]
96///    node_visited: bool
97/// }
98/// ```
99pub fn skip(
100    _attr: proc_macro::TokenStream,
101    item: proc_macro::TokenStream,
102) -> proc_macro::TokenStream {
103    item
104}
105
106#[proc_macro_attribute]
107/// Defines a precedence level for a non-terminal that has no associativity.
108///
109/// This annotation takes a single, unnamed parameter, which specifies the precedence level.
110/// This is used to resolve conflicts with other non-terminals, so that the one with the higher
111/// precedence will bind more tightly (appear lower in the parse tree).
112///
113/// ## Example
114/// ```ignore
115/// #[rust_sitter::prec(1)]
116/// PriorityExpr(Box<Expr>, Box<Expr>)
117/// ```
118pub fn prec(
119    _attr: proc_macro::TokenStream,
120    item: proc_macro::TokenStream,
121) -> proc_macro::TokenStream {
122    item
123}
124
125#[proc_macro_attribute]
126/// Defines a precedence level for a non-terminal that should be left-associative.
127/// For example, with subtraction we expect 1 - 2 - 3 to be parsed as (1 - 2) - 3,
128/// which corresponds to a left-associativity.
129///
130/// This annotation takes a single, unnamed parameter, which specifies the precedence level.
131/// This is used to resolve conflicts with other non-terminals, so that the one with the higher
132/// precedence will bind more tightly (appear lower in the parse tree).
133///
134/// ## Example
135/// ```ignore
136/// #[rust_sitter::prec_left(1)]
137/// Subtract(Box<Expr>, Box<Expr>)
138/// ```
139pub fn prec_left(
140    _attr: proc_macro::TokenStream,
141    item: proc_macro::TokenStream,
142) -> proc_macro::TokenStream {
143    item
144}
145
146#[proc_macro_attribute]
147/// Defines a precedence level for a non-terminal that should be right-associative.
148/// For example, with cons we could have 1 :: 2 :: 3 to be parsed as 1 :: (2 :: 3),
149/// which corresponds to a right-associativity.
150///
151/// This annotation takes a single, unnamed parameter, which specifies the precedence level.
152/// This is used to resolve conflicts with other non-terminals, so that the one with the higher
153/// precedence will bind more tightly (appear lower in the parse tree).
154///
155/// ## Example
156/// ```ignore
157/// #[rust_sitter::prec_right(1)]
158/// Cons(Box<Expr>, Box<Expr>)
159/// ```
160pub fn prec_right(
161    _attr: proc_macro::TokenStream,
162    item: proc_macro::TokenStream,
163) -> proc_macro::TokenStream {
164    item
165}
166
167#[proc_macro_attribute]
168/// On `Vec<_>` typed fields, specifies a non-terminal that should be parsed in between the elements.
169/// The [`rust_sitter::repeat`] annotation must be used on the field as well.
170///
171/// This annotation takes a single, unnamed argument, which specifies a field type to parse. This can
172/// either be a reference to another type, or can be defined as a `leaf` field. Generally, the argument
173/// is parsed using the same rules as an unnamed field of an enum variant.
174///
175/// ## Example
176/// ```ignore
177/// #[rust_sitter::delimited(
178///     #[rust_sitter::leaf(text = ",")]
179///     ()
180/// )]
181/// numbers: Vec<Number>
182/// ```
183pub fn delimited(
184    _attr: proc_macro::TokenStream,
185    item: proc_macro::TokenStream,
186) -> proc_macro::TokenStream {
187    item
188}
189
190#[proc_macro_attribute]
191/// On `Vec<_>` typed fields, specifies additional config for how the repeated elements should
192/// be parsed. In particular, this annotation takes the following named arguments:
193/// - `non_empty` - if this argument is `true`, then there must be at least one element parsed
194///
195/// ## Example
196/// ```ignore
197/// #[rust_sitter::repeat(non_empty = true)]
198/// numbers: Vec<Number>
199/// ```
200pub fn repeat(
201    _attr: proc_macro::TokenStream,
202    item: proc_macro::TokenStream,
203) -> proc_macro::TokenStream {
204    item
205}
206
207/// Mark a module to be analyzed for a Rust Sitter grammar. Takes a single, unnamed argument, which
208/// specifies the name of the grammar. This name must be unique across all Rust Sitter grammars within
209/// a compilation unit.
210#[proc_macro_attribute]
211pub fn grammar(
212    attr: proc_macro::TokenStream,
213    input: proc_macro::TokenStream,
214) -> proc_macro::TokenStream {
215    let attr_tokens: proc_macro2::TokenStream = attr.into();
216    let module: ItemMod = parse_macro_input!(input);
217    let expanded = expand_grammar(syn::parse_quote! {
218        #[rust_sitter::grammar[#attr_tokens]]
219        #module
220    })
221    .map(ToTokens::into_token_stream)
222    .unwrap_or_else(syn::Error::into_compile_error);
223    proc_macro::TokenStream::from(expanded)
224}
225
226#[cfg(test)]
227mod tests {
228    use std::fs::File;
229    use std::io::{Read, Write};
230    use std::process::Command;
231
232    use quote::ToTokens;
233    use syn::{parse_quote, Result};
234    use tempfile::tempdir;
235
236    use super::expand_grammar;
237
238    fn rustfmt_code(code: &str) -> String {
239        let dir = tempdir().unwrap();
240        let file_path = dir.path().join("temp.rs");
241        let mut file = File::create(file_path.clone()).unwrap();
242
243        writeln!(file, "{code}").unwrap();
244        drop(file);
245
246        Command::new("rustfmt")
247            .arg(file_path.to_str().unwrap())
248            .spawn()
249            .unwrap()
250            .wait()
251            .unwrap();
252
253        let mut file = File::open(file_path).unwrap();
254        let mut data = String::new();
255        file.read_to_string(&mut data).unwrap();
256        drop(file);
257        dir.close().unwrap();
258        data
259    }
260
261    #[test]
262    fn enum_transformed_fields() -> Result<()> {
263        insta::assert_snapshot!(rustfmt_code(
264            &expand_grammar(parse_quote! {
265                #[rust_sitter::grammar("test")]
266                mod grammar {
267                    #[rust_sitter::language]
268                    pub enum Expression {
269                        Number(
270                            #[rust_sitter::leaf(pattern = r"\d+", transform = |v| v.parse::<i32>().unwrap())]
271                            i32
272                        ),
273                    }
274                }
275            })?
276            .to_token_stream()
277            .to_string()
278        ));
279
280        Ok(())
281    }
282
283    #[test]
284    fn enum_recursive() -> Result<()> {
285        insta::assert_snapshot!(rustfmt_code(
286            &expand_grammar(parse_quote! {
287                #[rust_sitter::grammar("test")]
288                mod grammar {
289                    #[rust_sitter::language]
290                    pub enum Expression {
291                        Number(
292                            #[rust_sitter::leaf(pattern = r"\d+", transform = |v| v.parse().unwrap())]
293                            i32
294                        ),
295                        Neg(
296                            #[rust_sitter::leaf(text = "-")]
297                            (),
298                            Box<Expression>
299                        ),
300                    }
301                }
302            })?
303            .to_token_stream()
304            .to_string()
305        ));
306
307        Ok(())
308    }
309
310    #[test]
311    fn enum_prec_left() -> Result<()> {
312        insta::assert_snapshot!(rustfmt_code(
313            &expand_grammar(parse_quote! {
314                #[rust_sitter::grammar("test")]
315                mod grammar {
316                    #[rust_sitter::language]
317                    pub enum Expression {
318                        Number(
319                            #[rust_sitter::leaf(pattern = r"\d+", transform = |v| v.parse().unwrap())]
320                            i32
321                        ),
322                        #[rust_sitter::prec_left(1)]
323                        Sub(
324                            Box<Expression>,
325                            #[rust_sitter::leaf(text = "-")]
326                            (),
327                            Box<Expression>
328                        ),
329                    }
330                }
331            })?
332            .to_token_stream()
333            .to_string()
334        ));
335
336        Ok(())
337    }
338
339    #[test]
340    fn struct_extra() -> Result<()> {
341        insta::assert_snapshot!(rustfmt_code(
342            &expand_grammar(parse_quote! {
343                #[rust_sitter::grammar("test")]
344                mod grammar {
345                    #[rust_sitter::language]
346                    pub enum Expression {
347                        Number(
348                            #[rust_sitter::leaf(pattern = r"\d+", transform = |v| v.parse().unwrap())] i32,
349                        ),
350                    }
351
352                    #[rust_sitter::extra]
353                    struct Whitespace {
354                        #[rust_sitter::leaf(pattern = r"\s")]
355                        _whitespace: (),
356                    }
357                }
358            })?
359            .to_token_stream()
360            .to_string()
361        ));
362
363        Ok(())
364    }
365
366    #[test]
367    fn grammar_unboxed_field() -> Result<()> {
368        insta::assert_snapshot!(rustfmt_code(
369            &expand_grammar(parse_quote! {
370                #[rust_sitter::grammar("test")]
371                mod grammar {
372                    #[rust_sitter::language]
373                    pub struct Language {
374                        e: Expression,
375                    }
376
377                    pub enum Expression {
378                        Number(
379                            #[rust_sitter::leaf(pattern = r"\d+", transform = |v: &str| v.parse::<i32>().unwrap())]
380                            i32
381                        ),
382                    }
383                }
384            })?
385            .to_token_stream()
386            .to_string()
387        ));
388
389        Ok(())
390    }
391
392    #[test]
393    fn struct_repeat() -> Result<()> {
394        insta::assert_snapshot!(rustfmt_code(
395            &expand_grammar(parse_quote! {
396                #[rust_sitter::grammar("test")]
397                mod grammar {
398                    #[rust_sitter::language]
399                    pub struct NumberList {
400                        numbers: Vec<Number>,
401                    }
402
403                    pub struct Number {
404                        #[rust_sitter::leaf(pattern = r"\d+", transform = |v| v.parse().unwrap())]
405                        v: i32
406                    }
407
408                    #[rust_sitter::extra]
409                    struct Whitespace {
410                        #[rust_sitter::leaf(pattern = r"\s")]
411                        _whitespace: (),
412                    }
413                }
414            })?
415            .to_token_stream()
416            .to_string()
417        ));
418
419        Ok(())
420    }
421
422    #[test]
423    fn struct_optional() -> Result<()> {
424        insta::assert_snapshot!(rustfmt_code(
425            &expand_grammar(parse_quote! {
426                #[rust_sitter::grammar("test")]
427                mod grammar {
428                    #[rust_sitter::language]
429                    pub struct Language {
430                        #[rust_sitter::leaf(pattern = r"\d+", transform = |v| v.parse().unwrap())]
431                        v: Option<i32>,
432                        t: Option<Number>,
433                    }
434
435                    pub struct Number {
436                        #[rust_sitter::leaf(pattern = r"\d+", transform = |v| v.parse().unwrap())]
437                        v: i32
438                    }
439                }
440            })?
441            .to_token_stream()
442            .to_string()
443        ));
444
445        Ok(())
446    }
447
448    #[test]
449    fn enum_with_unamed_vector() -> Result<()> {
450        insta::assert_snapshot!(rustfmt_code(
451            &expand_grammar(parse_quote! {
452                #[rust_sitter::grammar("test")]
453                mod grammar {
454                    pub struct Number {
455                            #[rust_sitter::leaf(pattern = r"\d+", transform = |v| v.parse().unwrap())]
456                            value: u32
457                    }
458
459                    #[rust_sitter::language]
460                    pub enum Expr {
461                        Numbers(
462                            #[rust_sitter::repeat(non_empty = true)]
463                            Vec<Number>
464                        )
465                    }
466                }
467            })?
468            .to_token_stream()
469            .to_string()
470        ));
471
472        Ok(())
473    }
474
475    #[test]
476    fn enum_with_named_field() -> Result<()> {
477        insta::assert_snapshot!(rustfmt_code(
478            &expand_grammar(parse_quote! {
479                #[rust_sitter::grammar("test")]
480                mod grammar {
481                    #[rust_sitter::language]
482                    pub enum Expr {
483                        Number(
484                                #[rust_sitter::leaf(pattern = r"\d+", transform = |v| v.parse().unwrap())]
485                                u32
486                        ),
487                        Neg {
488                            #[rust_sitter::leaf(text = "!")]
489                            _bang: (),
490                            value: Box<Expr>,
491                        }
492                    }
493                }
494            })?
495            .to_token_stream()
496            .to_string()
497        ));
498
499        Ok(())
500    }
501
502    #[test]
503    fn spanned_in_vec() -> Result<()> {
504        insta::assert_snapshot!(rustfmt_code(
505            &expand_grammar(parse_quote! {
506                #[rust_sitter::grammar("test")]
507                mod grammar {
508                    use rust_sitter::Spanned;
509
510                    #[rust_sitter::language]
511                    pub struct NumberList {
512                        numbers: Vec<Spanned<Number>>,
513                    }
514
515                    pub struct Number {
516                        #[rust_sitter::leaf(pattern = r"\d+", transform = |v| v.parse().unwrap())]
517                        v: i32
518                    }
519
520                    #[rust_sitter::extra]
521                    struct Whitespace {
522                        #[rust_sitter::leaf(pattern = r"\s")]
523                        _whitespace: (),
524                    }
525                }
526            })?
527            .to_token_stream()
528            .to_string()
529        ));
530
531        Ok(())
532    }
533}