rust_sitter_tool/
lib.rs

1use serde_json::Value;
2use syn::{parse_quote, Item};
3
4mod expansion;
5use expansion::*;
6
7const GENERATED_SEMANTIC_VERSION: Option<(u8, u8, u8)> = Some((0, 25, 2));
8
9/// Generates JSON strings defining Tree Sitter grammars for every Rust Sitter
10/// grammar found in the given module and recursive submodules.
11pub fn generate_grammars(root_file: &Path) -> Vec<Value> {
12    let root_file = syn_inline_mod::parse_and_inline_modules(root_file).items;
13    let mut out = vec![];
14    root_file
15        .iter()
16        .for_each(|i| generate_all_grammars(i, &mut out));
17    out
18}
19
20fn generate_all_grammars(item: &Item, out: &mut Vec<Value>) {
21    if let Item::Mod(m) = item {
22        m.content
23            .iter()
24            .for_each(|(_, items)| items.iter().for_each(|i| generate_all_grammars(i, out)));
25
26        if m.attrs
27            .iter()
28            .any(|a| a.path() == &parse_quote!(rust_sitter::grammar))
29        {
30            out.push(generate_grammar(m))
31        }
32    }
33}
34
35#[cfg(feature = "build_parsers")]
36use std::io::Write;
37use std::path::Path;
38
39#[cfg(feature = "build_parsers")]
40use tree_sitter_generate::generate_parser_for_grammar;
41
42#[cfg(feature = "build_parsers")]
43/// Using the `cc` crate, generates and compiles a C parser with Tree Sitter
44/// for every Rust Sitter grammar found in the given module and recursive
45/// submodules.
46pub fn build_parsers(root_file: &Path) {
47    use std::env;
48    let out_dir = env::var("OUT_DIR").unwrap();
49    let emit_artifacts: bool = env::var("RUST_SITTER_EMIT_ARTIFACTS")
50        .map(|s| s.parse().unwrap_or(false))
51        .unwrap_or(false);
52    generate_grammars(root_file).iter().for_each(|grammar| {
53        let (grammar_name, grammar_c) =
54            generate_parser_for_grammar(&grammar.to_string(), GENERATED_SEMANTIC_VERSION).unwrap();
55        let tempfile = tempfile::Builder::new()
56            .prefix("grammar")
57            .tempdir()
58            .unwrap();
59
60        let dir = if emit_artifacts {
61            let grammar_dir = Path::new(out_dir.as_str()).join(format!("grammar_{grammar_name}",));
62            std::fs::remove_dir_all(&grammar_dir).expect("Couldn't clear old artifacts");
63            std::fs::DirBuilder::new()
64                .recursive(true)
65                .create(grammar_dir.clone())
66                .expect("Couldn't create grammar JSON directory");
67            grammar_dir
68        } else {
69            tempfile.path().into()
70        };
71
72        let grammar_file = dir.join("parser.c");
73        let mut f = std::fs::File::create(grammar_file).unwrap();
74
75        f.write_all(grammar_c.as_bytes()).unwrap();
76        drop(f);
77
78        // emit grammar into the build out_dir
79        let mut grammar_json_file =
80            std::fs::File::create(dir.join(format!("{grammar_name}.json"))).unwrap();
81        grammar_json_file
82            .write_all(serde_json::to_string_pretty(grammar).unwrap().as_bytes())
83            .unwrap();
84        drop(grammar_json_file);
85
86        let header_dir = dir.join("tree_sitter");
87        std::fs::create_dir(&header_dir).unwrap();
88        let mut parser_file = std::fs::File::create(header_dir.join("parser.h")).unwrap();
89        parser_file
90            .write_all(tree_sitter::PARSER_HEADER.as_bytes())
91            .unwrap();
92        drop(parser_file);
93
94        let sysroot_dir = dir.join("sysroot");
95        if env::var("TARGET").unwrap().starts_with("wasm32") {
96            std::fs::create_dir(&sysroot_dir).unwrap();
97            let mut stdint = std::fs::File::create(sysroot_dir.join("stdint.h")).unwrap();
98            stdint
99                .write_all(include_bytes!("wasm-sysroot/stdint.h"))
100                .unwrap();
101            drop(stdint);
102
103            let mut stdlib = std::fs::File::create(sysroot_dir.join("stdlib.h")).unwrap();
104            stdlib
105                .write_all(include_bytes!("wasm-sysroot/stdlib.h"))
106                .unwrap();
107            drop(stdlib);
108
109            let mut stdio = std::fs::File::create(sysroot_dir.join("stdio.h")).unwrap();
110            stdio
111                .write_all(include_bytes!("wasm-sysroot/stdio.h"))
112                .unwrap();
113            drop(stdio);
114
115            let mut stdbool = std::fs::File::create(sysroot_dir.join("stdbool.h")).unwrap();
116            stdbool
117                .write_all(include_bytes!("wasm-sysroot/stdbool.h"))
118                .unwrap();
119            drop(stdbool);
120        }
121
122        let mut c_config = cc::Build::new();
123        c_config.std("c11").include(&dir).include(&sysroot_dir);
124        c_config
125            .flag_if_supported("-Wno-unused-label")
126            .flag_if_supported("-Wno-unused-parameter")
127            .flag_if_supported("-Wno-unused-but-set-variable")
128            .flag_if_supported("-Wno-trigraphs")
129            .flag_if_supported("-Wno-everything");
130        c_config.file(dir.join("parser.c"));
131
132        c_config.compile(&grammar_name);
133    });
134}
135
136#[cfg(test)]
137mod tests {
138    use syn::parse_quote;
139
140    use super::{generate_grammar, GENERATED_SEMANTIC_VERSION};
141    use tree_sitter_generate::generate_parser_for_grammar;
142
143    #[test]
144    fn enum_with_named_field() {
145        let m = if let syn::Item::Mod(m) = parse_quote! {
146            #[rust_sitter::grammar("test")]
147            mod grammar {
148                #[rust_sitter::language]
149                pub enum Expr {
150                    Number(
151                            #[rust_sitter::leaf(pattern = r"\d+", transform = |v| v.parse().unwrap())]
152                            u32
153                    ),
154                    Neg {
155                        #[rust_sitter::leaf(text = "!")]
156                        _bang: (),
157                        value: Box<Expr>,
158                    }
159                }
160            }
161        } {
162            m
163        } else {
164            panic!()
165        };
166
167        let grammar = generate_grammar(&m);
168        insta::assert_snapshot!(grammar);
169        generate_parser_for_grammar(&grammar.to_string(), GENERATED_SEMANTIC_VERSION).unwrap();
170    }
171
172    #[test]
173    fn enum_transformed_fields() {
174        let m = if let syn::Item::Mod(m) = parse_quote! {
175            #[rust_sitter::grammar("test")]
176            mod grammar {
177                #[rust_sitter::language]
178                pub enum Expression {
179                    Number(
180                        #[rust_sitter::leaf(pattern = r"\d+", transform = |v: &str| v.parse::<i32>().unwrap())]
181                        i32
182                    ),
183                }
184            }
185        } {
186            m
187        } else {
188            panic!()
189        };
190
191        let grammar = generate_grammar(&m);
192        insta::assert_snapshot!(grammar);
193        generate_parser_for_grammar(&grammar.to_string(), GENERATED_SEMANTIC_VERSION).unwrap();
194    }
195
196    #[test]
197    fn enum_recursive() {
198        let m = if let syn::Item::Mod(m) = parse_quote! {
199            #[rust_sitter::grammar("test")]
200            mod grammar {
201                #[rust_sitter::language]
202                pub enum Expression {
203                    Number(
204                        #[rust_sitter::leaf(pattern = r"\d+", transform = |v: &str| v.parse::<i32>().unwrap())]
205                        i32
206                    ),
207                    Neg(
208                        #[rust_sitter::leaf(text = "-", transform = |v| ())]
209                        (),
210                        Box<Expression>
211                    ),
212                }
213            }
214        } {
215            m
216        } else {
217            panic!()
218        };
219
220        let grammar = generate_grammar(&m);
221        insta::assert_snapshot!(grammar);
222        generate_parser_for_grammar(&grammar.to_string(), GENERATED_SEMANTIC_VERSION).unwrap();
223    }
224
225    #[test]
226    fn enum_prec_left() {
227        let m = if let syn::Item::Mod(m) = parse_quote! {
228            #[rust_sitter::grammar("test")]
229            mod grammar {
230                #[rust_sitter::language]
231                pub enum Expression {
232                    Number(
233                        #[rust_sitter::leaf(pattern = r"\d+", transform = |v: &str| v.parse::<i32>().unwrap())]
234                        i32
235                    ),
236                    #[rust_sitter::prec_left(1)]
237                    Sub(
238                        Box<Expression>,
239                        #[rust_sitter::leaf(text = "-", transform = |v| ())]
240                        (),
241                        Box<Expression>
242                    ),
243                }
244            }
245        } {
246            m
247        } else {
248            panic!()
249        };
250
251        let grammar = generate_grammar(&m);
252        insta::assert_snapshot!(grammar);
253        generate_parser_for_grammar(&grammar.to_string(), GENERATED_SEMANTIC_VERSION).unwrap();
254    }
255
256    #[test]
257    fn grammar_with_extras() {
258        let m = if let syn::Item::Mod(m) = parse_quote! {
259            #[rust_sitter::grammar("test")]
260            mod grammar {
261                #[rust_sitter::language]
262                pub enum Expression {
263                    Number(
264                        #[rust_sitter::leaf(pattern = r"\d+", transform = |v: &str| v.parse::<i32>().unwrap())]
265                        i32
266                    ),
267                }
268
269                #[rust_sitter::extra]
270                struct Whitespace {
271                    #[rust_sitter::leaf(pattern = r"\s", transform = |_v| ())]
272                    _whitespace: (),
273                }
274            }
275        } {
276            m
277        } else {
278            panic!()
279        };
280
281        let grammar = generate_grammar(&m);
282        insta::assert_snapshot!(grammar);
283        generate_parser_for_grammar(&grammar.to_string(), GENERATED_SEMANTIC_VERSION).unwrap();
284    }
285
286    #[test]
287    fn grammar_unboxed_field() {
288        let m = if let syn::Item::Mod(m) = parse_quote! {
289            #[rust_sitter::grammar("test")]
290            mod grammar {
291                #[rust_sitter::language]
292                pub struct Language {
293                    e: Expression,
294                }
295
296                pub enum Expression {
297                    Number(
298                        #[rust_sitter::leaf(pattern = r"\d+", transform = |v: &str| v.parse::<i32>().unwrap())]
299                        i32
300                    ),
301                }
302            }
303        } {
304            m
305        } else {
306            panic!()
307        };
308
309        let grammar = generate_grammar(&m);
310        insta::assert_snapshot!(grammar);
311        generate_parser_for_grammar(&grammar.to_string(), GENERATED_SEMANTIC_VERSION).unwrap();
312    }
313
314    #[test]
315    fn grammar_repeat() {
316        let m = if let syn::Item::Mod(m) = parse_quote! {
317            #[rust_sitter::grammar("test")]
318            pub mod grammar {
319                #[rust_sitter::language]
320                pub struct NumberList {
321                    #[rust_sitter::delimited(
322                        #[rust_sitter::leaf(text = ",")]
323                        ()
324                    )]
325                    numbers: Vec<Number>,
326                }
327
328                pub struct Number {
329                    #[rust_sitter::leaf(pattern = r"\d+", transform = |v| v.parse().unwrap())]
330                    v: i32,
331                }
332
333                #[rust_sitter::extra]
334                struct Whitespace {
335                    #[rust_sitter::leaf(pattern = r"\s")]
336                    _whitespace: (),
337                }
338            }
339        } {
340            m
341        } else {
342            panic!()
343        };
344
345        let grammar = generate_grammar(&m);
346        insta::assert_snapshot!(grammar);
347        generate_parser_for_grammar(&grammar.to_string(), GENERATED_SEMANTIC_VERSION).unwrap();
348    }
349
350    #[test]
351    fn grammar_repeat_no_delimiter() {
352        let m = if let syn::Item::Mod(m) = parse_quote! {
353            #[rust_sitter::grammar("test")]
354            pub mod grammar {
355                #[rust_sitter::language]
356                pub struct NumberList {
357                    numbers: Vec<Number>,
358                }
359
360                pub struct Number {
361                    #[rust_sitter::leaf(pattern = r"\d+", transform = |v| v.parse().unwrap())]
362                    v: i32,
363                }
364
365                #[rust_sitter::extra]
366                struct Whitespace {
367                    #[rust_sitter::leaf(pattern = r"\s")]
368                    _whitespace: (),
369                }
370            }
371        } {
372            m
373        } else {
374            panic!()
375        };
376
377        let grammar = generate_grammar(&m);
378        insta::assert_snapshot!(grammar);
379        generate_parser_for_grammar(&grammar.to_string(), GENERATED_SEMANTIC_VERSION).unwrap();
380    }
381
382    #[test]
383    fn grammar_repeat1() {
384        let m = if let syn::Item::Mod(m) = parse_quote! {
385            #[rust_sitter::grammar("test")]
386            pub mod grammar {
387                #[rust_sitter::language]
388                pub struct NumberList {
389                    #[rust_sitter::repeat(non_empty = true)]
390                    #[rust_sitter::delimited(
391                        #[rust_sitter::leaf(text = ",")]
392                        ()
393                    )]
394                    numbers: Vec<Number>,
395                }
396
397                pub struct Number {
398                    #[rust_sitter::leaf(pattern = r"\d+", transform = |v| v.parse().unwrap())]
399                    v: i32,
400                }
401
402                #[rust_sitter::extra]
403                struct Whitespace {
404                    #[rust_sitter::leaf(pattern = r"\s")]
405                    _whitespace: (),
406                }
407            }
408        } {
409            m
410        } else {
411            panic!()
412        };
413
414        let grammar = generate_grammar(&m);
415        insta::assert_snapshot!(grammar);
416        generate_parser_for_grammar(&grammar.to_string(), GENERATED_SEMANTIC_VERSION).unwrap();
417    }
418
419    #[test]
420    fn struct_optional() {
421        let m = if let syn::Item::Mod(m) = parse_quote! {
422            #[rust_sitter::grammar("test")]
423            mod grammar {
424                #[rust_sitter::language]
425                pub struct Language {
426                    #[rust_sitter::leaf(pattern = r"\d+", transform = |v| v.parse().unwrap())]
427                    v: Option<i32>,
428                    #[rust_sitter::leaf(pattern = r" ", transform = |v| ())]
429                    space: (),
430                    t: Option<Number>,
431                }
432
433                pub struct Number {
434                    #[rust_sitter::leaf(pattern = r"\d+", transform = |v| v.parse().unwrap())]
435                    v: i32
436                }
437            }
438        } {
439            m
440        } else {
441            panic!()
442        };
443
444        let grammar = generate_grammar(&m);
445        insta::assert_snapshot!(grammar);
446        generate_parser_for_grammar(&grammar.to_string(), GENERATED_SEMANTIC_VERSION).unwrap();
447    }
448
449    #[test]
450    fn enum_with_unamed_vector() {
451        let m = if let syn::Item::Mod(m) = parse_quote! {
452            #[rust_sitter::grammar("test")]
453            mod grammar {
454                pub struct Number {
455                        #[rust_sitter::leaf(pattern = r"\d+", transform = |v| v.parse().unwrap())]
456                        value: u32
457                }
458
459                #[rust_sitter::language]
460                pub enum Expr {
461                    Numbers(
462                        #[rust_sitter::repeat(non_empty = true)]
463                        Vec<Number>
464                    )
465                }
466            }
467        } {
468            m
469        } else {
470            panic!()
471        };
472
473        let grammar = generate_grammar(&m);
474        insta::assert_snapshot!(grammar);
475        generate_parser_for_grammar(&grammar.to_string(), GENERATED_SEMANTIC_VERSION).unwrap();
476    }
477
478    #[test]
479    fn spanned_in_vec() {
480        let m = if let syn::Item::Mod(m) = parse_quote! {
481            #[rust_sitter::grammar("test")]
482            mod grammar {
483                use rust_sitter::Spanned;
484
485                #[rust_sitter::language]
486                pub struct NumberList {
487                    #[rust_sitter::leaf(pattern = r"\d+", transform = |v| v.parse().unwrap())]
488                    numbers: Vec<Spanned<i32>>,
489                }
490
491                #[rust_sitter::extra]
492                struct Whitespace {
493                    #[rust_sitter::leaf(pattern = r"\s")]
494                    _whitespace: (),
495                }
496            }
497        } {
498            m
499        } else {
500            panic!()
501        };
502
503        let grammar = generate_grammar(&m);
504        insta::assert_snapshot!(grammar);
505        generate_parser_for_grammar(&grammar.to_string(), GENERATED_SEMANTIC_VERSION).unwrap();
506    }
507}