rust_sitter_macro/lib.rs
1use quote::ToTokens;
2use syn::{parse_macro_input, ItemMod};
3
4mod errors;
5mod expansion;
6use expansion::*;
7
8#[proc_macro_attribute]
9/// Marks the top level AST node where parsing should start.
10///
11/// ## Example
12/// ```ignore
13/// #[rust_sitter::language]
14/// pub struct Code {
15/// ...
16/// }
17/// ```
18pub fn language(
19 _attr: proc_macro::TokenStream,
20 item: proc_macro::TokenStream,
21) -> proc_macro::TokenStream {
22 item
23}
24
25#[proc_macro_attribute]
26/// This annotation marks a node as extra, which can safely be skipped while parsing.
27/// This is useful for handling whitespace/newlines/comments.
28///
29/// ## Example
30/// ```ignore
31/// #[rust_sitter::extra]
32/// struct Whitespace {
33/// #[rust_sitter::leaf(pattern = r"\s")]
34/// _whitespace: (),
35/// }
36/// ```
37pub fn extra(
38 _attr: proc_macro::TokenStream,
39 item: proc_macro::TokenStream,
40) -> proc_macro::TokenStream {
41 item
42}
43
44#[proc_macro_attribute]
45/// Defines a field which matches a specific token in the source string.
46/// The token can be defined by passing one of two arguments
47/// - `text`: a string literal that will be exactly matched
48/// - `pattern`: a regular expression that will be matched against the source string
49///
50/// If the resulting token needs to be converted into a richer type at runtime,
51/// such as a number, then the `transform` argument can be used to specify a function
52/// that will be called with the token's text.
53///
54/// The attribute can also be applied to a struct or enum variant with no fields.
55///
56/// ## Examples
57///
58/// Using the `leaf` attribute on a field:
59/// ```ignore
60/// Number(
61/// #[rust_sitter::leaf(pattern = r"\d+", transform = |v| v.parse().unwrap())]
62/// u32
63/// )
64/// ```
65///
66/// Using the attribute on a unit struct or unit enum variant:
67/// ```ignore
68/// #[rust_sitter::leaf(text = "9")]
69/// struct BigDigit;
70///
71/// enum SmallDigit {
72/// #[rust_sitter::leaf(text = "0")]
73/// Zero,
74/// #[rust_sitter::leaf(text = "1")]
75/// One,
76/// }
77/// ```
78///
79pub fn leaf(
80 _attr: proc_macro::TokenStream,
81 item: proc_macro::TokenStream,
82) -> proc_macro::TokenStream {
83 item
84}
85
86#[proc_macro_attribute]
87/// Defines a field that does not correspond to anything in the input string,
88/// such as some metadata. Takes a single, unnamed argument, which is the value
89/// used to populate the field at runtime.
90///
91/// ## Example
92/// ```ignore
93/// struct MyNode {
94/// ...,
95/// #[rust_sitter::skip(false)]
96/// node_visited: bool
97/// }
98/// ```
99pub fn skip(
100 _attr: proc_macro::TokenStream,
101 item: proc_macro::TokenStream,
102) -> proc_macro::TokenStream {
103 item
104}
105
106#[proc_macro_attribute]
107/// Defines a precedence level for a non-terminal that has no associativity.
108///
109/// This annotation takes a single, unnamed parameter, which specifies the precedence level.
110/// This is used to resolve conflicts with other non-terminals, so that the one with the higher
111/// precedence will bind more tightly (appear lower in the parse tree).
112///
113/// ## Example
114/// ```ignore
115/// #[rust_sitter::prec(1)]
116/// PriorityExpr(Box<Expr>, Box<Expr>)
117/// ```
118pub fn prec(
119 _attr: proc_macro::TokenStream,
120 item: proc_macro::TokenStream,
121) -> proc_macro::TokenStream {
122 item
123}
124
125#[proc_macro_attribute]
126/// Defines a precedence level for a non-terminal that should be left-associative.
127/// For example, with subtraction we expect 1 - 2 - 3 to be parsed as (1 - 2) - 3,
128/// which corresponds to a left-associativity.
129///
130/// This annotation takes a single, unnamed parameter, which specifies the precedence level.
131/// This is used to resolve conflicts with other non-terminals, so that the one with the higher
132/// precedence will bind more tightly (appear lower in the parse tree).
133///
134/// ## Example
135/// ```ignore
136/// #[rust_sitter::prec_left(1)]
137/// Subtract(Box<Expr>, Box<Expr>)
138/// ```
139pub fn prec_left(
140 _attr: proc_macro::TokenStream,
141 item: proc_macro::TokenStream,
142) -> proc_macro::TokenStream {
143 item
144}
145
146#[proc_macro_attribute]
147/// Defines a precedence level for a non-terminal that should be right-associative.
148/// For example, with cons we could have 1 :: 2 :: 3 to be parsed as 1 :: (2 :: 3),
149/// which corresponds to a right-associativity.
150///
151/// This annotation takes a single, unnamed parameter, which specifies the precedence level.
152/// This is used to resolve conflicts with other non-terminals, so that the one with the higher
153/// precedence will bind more tightly (appear lower in the parse tree).
154///
155/// ## Example
156/// ```ignore
157/// #[rust_sitter::prec_right(1)]
158/// Cons(Box<Expr>, Box<Expr>)
159/// ```
160pub fn prec_right(
161 _attr: proc_macro::TokenStream,
162 item: proc_macro::TokenStream,
163) -> proc_macro::TokenStream {
164 item
165}
166
167#[proc_macro_attribute]
168/// On `Vec<_>` typed fields, specifies a non-terminal that should be parsed in between the elements.
169/// The [`rust_sitter::repeat`] annotation must be used on the field as well.
170///
171/// This annotation takes a single, unnamed argument, which specifies a field type to parse. This can
172/// either be a reference to another type, or can be defined as a `leaf` field. Generally, the argument
173/// is parsed using the same rules as an unnamed field of an enum variant.
174///
175/// ## Example
176/// ```ignore
177/// #[rust_sitter::delimited(
178/// #[rust_sitter::leaf(text = ",")]
179/// ()
180/// )]
181/// numbers: Vec<Number>
182/// ```
183pub fn delimited(
184 _attr: proc_macro::TokenStream,
185 item: proc_macro::TokenStream,
186) -> proc_macro::TokenStream {
187 item
188}
189
190#[proc_macro_attribute]
191/// On `Vec<_>` typed fields, specifies additional config for how the repeated elements should
192/// be parsed. In particular, this annotation takes the following named arguments:
193/// - `non_empty` - if this argument is `true`, then there must be at least one element parsed
194///
195/// ## Example
196/// ```ignore
197/// #[rust_sitter::repeat(non_empty = true)]
198/// numbers: Vec<Number>
199/// ```
200pub fn repeat(
201 _attr: proc_macro::TokenStream,
202 item: proc_macro::TokenStream,
203) -> proc_macro::TokenStream {
204 item
205}
206
207/// Mark a module to be analyzed for a Rust Sitter grammar. Takes a single, unnamed argument, which
208/// specifies the name of the grammar. This name must be unique across all Rust Sitter grammars within
209/// a compilation unit.
210#[proc_macro_attribute]
211pub fn grammar(
212 attr: proc_macro::TokenStream,
213 input: proc_macro::TokenStream,
214) -> proc_macro::TokenStream {
215 let attr_tokens: proc_macro2::TokenStream = attr.into();
216 let module: ItemMod = parse_macro_input!(input);
217 let expanded = expand_grammar(syn::parse_quote! {
218 #[rust_sitter::grammar[#attr_tokens]]
219 #module
220 })
221 .map(ToTokens::into_token_stream)
222 .unwrap_or_else(syn::Error::into_compile_error);
223 proc_macro::TokenStream::from(expanded)
224}
225
226#[cfg(test)]
227mod tests {
228 use std::fs::File;
229 use std::io::{Read, Write};
230 use std::process::Command;
231
232 use quote::ToTokens;
233 use syn::{parse_quote, Result};
234 use tempfile::tempdir;
235
236 use super::expand_grammar;
237
238 fn rustfmt_code(code: &str) -> String {
239 let dir = tempdir().unwrap();
240 let file_path = dir.path().join("temp.rs");
241 let mut file = File::create(file_path.clone()).unwrap();
242
243 writeln!(file, "{code}").unwrap();
244 drop(file);
245
246 Command::new("rustfmt")
247 .arg(file_path.to_str().unwrap())
248 .spawn()
249 .unwrap()
250 .wait()
251 .unwrap();
252
253 let mut file = File::open(file_path).unwrap();
254 let mut data = String::new();
255 file.read_to_string(&mut data).unwrap();
256 drop(file);
257 dir.close().unwrap();
258 data
259 }
260
261 #[test]
262 fn enum_transformed_fields() -> Result<()> {
263 insta::assert_snapshot!(rustfmt_code(
264 &expand_grammar(parse_quote! {
265 #[rust_sitter::grammar("test")]
266 mod grammar {
267 #[rust_sitter::language]
268 pub enum Expression {
269 Number(
270 #[rust_sitter::leaf(pattern = r"\d+", transform = |v| v.parse::<i32>().unwrap())]
271 i32
272 ),
273 }
274 }
275 })?
276 .to_token_stream()
277 .to_string()
278 ));
279
280 Ok(())
281 }
282
283 #[test]
284 fn enum_recursive() -> Result<()> {
285 insta::assert_snapshot!(rustfmt_code(
286 &expand_grammar(parse_quote! {
287 #[rust_sitter::grammar("test")]
288 mod grammar {
289 #[rust_sitter::language]
290 pub enum Expression {
291 Number(
292 #[rust_sitter::leaf(pattern = r"\d+", transform = |v| v.parse().unwrap())]
293 i32
294 ),
295 Neg(
296 #[rust_sitter::leaf(text = "-")]
297 (),
298 Box<Expression>
299 ),
300 }
301 }
302 })?
303 .to_token_stream()
304 .to_string()
305 ));
306
307 Ok(())
308 }
309
310 #[test]
311 fn enum_prec_left() -> Result<()> {
312 insta::assert_snapshot!(rustfmt_code(
313 &expand_grammar(parse_quote! {
314 #[rust_sitter::grammar("test")]
315 mod grammar {
316 #[rust_sitter::language]
317 pub enum Expression {
318 Number(
319 #[rust_sitter::leaf(pattern = r"\d+", transform = |v| v.parse().unwrap())]
320 i32
321 ),
322 #[rust_sitter::prec_left(1)]
323 Sub(
324 Box<Expression>,
325 #[rust_sitter::leaf(text = "-")]
326 (),
327 Box<Expression>
328 ),
329 }
330 }
331 })?
332 .to_token_stream()
333 .to_string()
334 ));
335
336 Ok(())
337 }
338
339 #[test]
340 fn struct_extra() -> Result<()> {
341 insta::assert_snapshot!(rustfmt_code(
342 &expand_grammar(parse_quote! {
343 #[rust_sitter::grammar("test")]
344 mod grammar {
345 #[rust_sitter::language]
346 pub enum Expression {
347 Number(
348 #[rust_sitter::leaf(pattern = r"\d+", transform = |v| v.parse().unwrap())] i32,
349 ),
350 }
351
352 #[rust_sitter::extra]
353 struct Whitespace {
354 #[rust_sitter::leaf(pattern = r"\s")]
355 _whitespace: (),
356 }
357 }
358 })?
359 .to_token_stream()
360 .to_string()
361 ));
362
363 Ok(())
364 }
365
366 #[test]
367 fn grammar_unboxed_field() -> Result<()> {
368 insta::assert_snapshot!(rustfmt_code(
369 &expand_grammar(parse_quote! {
370 #[rust_sitter::grammar("test")]
371 mod grammar {
372 #[rust_sitter::language]
373 pub struct Language {
374 e: Expression,
375 }
376
377 pub enum Expression {
378 Number(
379 #[rust_sitter::leaf(pattern = r"\d+", transform = |v: &str| v.parse::<i32>().unwrap())]
380 i32
381 ),
382 }
383 }
384 })?
385 .to_token_stream()
386 .to_string()
387 ));
388
389 Ok(())
390 }
391
392 #[test]
393 fn struct_repeat() -> Result<()> {
394 insta::assert_snapshot!(rustfmt_code(
395 &expand_grammar(parse_quote! {
396 #[rust_sitter::grammar("test")]
397 mod grammar {
398 #[rust_sitter::language]
399 pub struct NumberList {
400 numbers: Vec<Number>,
401 }
402
403 pub struct Number {
404 #[rust_sitter::leaf(pattern = r"\d+", transform = |v| v.parse().unwrap())]
405 v: i32
406 }
407
408 #[rust_sitter::extra]
409 struct Whitespace {
410 #[rust_sitter::leaf(pattern = r"\s")]
411 _whitespace: (),
412 }
413 }
414 })?
415 .to_token_stream()
416 .to_string()
417 ));
418
419 Ok(())
420 }
421
422 #[test]
423 fn struct_optional() -> Result<()> {
424 insta::assert_snapshot!(rustfmt_code(
425 &expand_grammar(parse_quote! {
426 #[rust_sitter::grammar("test")]
427 mod grammar {
428 #[rust_sitter::language]
429 pub struct Language {
430 #[rust_sitter::leaf(pattern = r"\d+", transform = |v| v.parse().unwrap())]
431 v: Option<i32>,
432 t: Option<Number>,
433 }
434
435 pub struct Number {
436 #[rust_sitter::leaf(pattern = r"\d+", transform = |v| v.parse().unwrap())]
437 v: i32
438 }
439 }
440 })?
441 .to_token_stream()
442 .to_string()
443 ));
444
445 Ok(())
446 }
447
448 #[test]
449 fn enum_with_unamed_vector() -> Result<()> {
450 insta::assert_snapshot!(rustfmt_code(
451 &expand_grammar(parse_quote! {
452 #[rust_sitter::grammar("test")]
453 mod grammar {
454 pub struct Number {
455 #[rust_sitter::leaf(pattern = r"\d+", transform = |v| v.parse().unwrap())]
456 value: u32
457 }
458
459 #[rust_sitter::language]
460 pub enum Expr {
461 Numbers(
462 #[rust_sitter::repeat(non_empty = true)]
463 Vec<Number>
464 )
465 }
466 }
467 })?
468 .to_token_stream()
469 .to_string()
470 ));
471
472 Ok(())
473 }
474
475 #[test]
476 fn enum_with_named_field() -> Result<()> {
477 insta::assert_snapshot!(rustfmt_code(
478 &expand_grammar(parse_quote! {
479 #[rust_sitter::grammar("test")]
480 mod grammar {
481 #[rust_sitter::language]
482 pub enum Expr {
483 Number(
484 #[rust_sitter::leaf(pattern = r"\d+", transform = |v| v.parse().unwrap())]
485 u32
486 ),
487 Neg {
488 #[rust_sitter::leaf(text = "!")]
489 _bang: (),
490 value: Box<Expr>,
491 }
492 }
493 }
494 })?
495 .to_token_stream()
496 .to_string()
497 ));
498
499 Ok(())
500 }
501
502 #[test]
503 fn spanned_in_vec() -> Result<()> {
504 insta::assert_snapshot!(rustfmt_code(
505 &expand_grammar(parse_quote! {
506 #[rust_sitter::grammar("test")]
507 mod grammar {
508 use rust_sitter::Spanned;
509
510 #[rust_sitter::language]
511 pub struct NumberList {
512 numbers: Vec<Spanned<Number>>,
513 }
514
515 pub struct Number {
516 #[rust_sitter::leaf(pattern = r"\d+", transform = |v| v.parse().unwrap())]
517 v: i32
518 }
519
520 #[rust_sitter::extra]
521 struct Whitespace {
522 #[rust_sitter::leaf(pattern = r"\s")]
523 _whitespace: (),
524 }
525 }
526 })?
527 .to_token_stream()
528 .to_string()
529 ));
530
531 Ok(())
532 }
533}