1use serde_json::Value;
2use syn::{parse_quote, Item};
3
4mod expansion;
5use expansion::*;
6
7const GENERATED_SEMANTIC_VERSION: Option<(u8, u8, u8)> = Some((0, 25, 2));
8
9pub fn generate_grammars(root_file: &Path) -> Vec<Value> {
12 let root_file = syn_inline_mod::parse_and_inline_modules(root_file).items;
13 let mut out = vec![];
14 root_file
15 .iter()
16 .for_each(|i| generate_all_grammars(i, &mut out));
17 out
18}
19
20fn generate_all_grammars(item: &Item, out: &mut Vec<Value>) {
21 if let Item::Mod(m) = item {
22 m.content
23 .iter()
24 .for_each(|(_, items)| items.iter().for_each(|i| generate_all_grammars(i, out)));
25
26 if m.attrs
27 .iter()
28 .any(|a| a.path() == &parse_quote!(rust_sitter::grammar))
29 {
30 out.push(generate_grammar(m))
31 }
32 }
33}
34
35#[cfg(feature = "build_parsers")]
36use std::io::Write;
37use std::path::Path;
38
39#[cfg(feature = "build_parsers")]
40use tree_sitter_generate::generate_parser_for_grammar;
41
42#[cfg(feature = "build_parsers")]
43pub fn build_parsers(root_file: &Path) {
47 use std::env;
48 let out_dir = env::var("OUT_DIR").unwrap();
49 let emit_artifacts: bool = env::var("RUST_SITTER_EMIT_ARTIFACTS")
50 .map(|s| s.parse().unwrap_or(false))
51 .unwrap_or(false);
52 generate_grammars(root_file).iter().for_each(|grammar| {
53 let (grammar_name, grammar_c) =
54 generate_parser_for_grammar(&grammar.to_string(), GENERATED_SEMANTIC_VERSION).unwrap();
55 let tempfile = tempfile::Builder::new()
56 .prefix("grammar")
57 .tempdir()
58 .unwrap();
59
60 let dir = if emit_artifacts {
61 let grammar_dir = Path::new(out_dir.as_str()).join(format!("grammar_{grammar_name}",));
62 std::fs::remove_dir_all(&grammar_dir).expect("Couldn't clear old artifacts");
63 std::fs::DirBuilder::new()
64 .recursive(true)
65 .create(grammar_dir.clone())
66 .expect("Couldn't create grammar JSON directory");
67 grammar_dir
68 } else {
69 tempfile.path().into()
70 };
71
72 let grammar_file = dir.join("parser.c");
73 let mut f = std::fs::File::create(grammar_file).unwrap();
74
75 f.write_all(grammar_c.as_bytes()).unwrap();
76 drop(f);
77
78 let mut grammar_json_file =
80 std::fs::File::create(dir.join(format!("{grammar_name}.json"))).unwrap();
81 grammar_json_file
82 .write_all(serde_json::to_string_pretty(grammar).unwrap().as_bytes())
83 .unwrap();
84 drop(grammar_json_file);
85
86 let header_dir = dir.join("tree_sitter");
87 std::fs::create_dir(&header_dir).unwrap();
88 let mut parser_file = std::fs::File::create(header_dir.join("parser.h")).unwrap();
89 parser_file
90 .write_all(tree_sitter::PARSER_HEADER.as_bytes())
91 .unwrap();
92 drop(parser_file);
93
94 let sysroot_dir = dir.join("sysroot");
95 if env::var("TARGET").unwrap().starts_with("wasm32") {
96 std::fs::create_dir(&sysroot_dir).unwrap();
97 let mut stdint = std::fs::File::create(sysroot_dir.join("stdint.h")).unwrap();
98 stdint
99 .write_all(include_bytes!("wasm-sysroot/stdint.h"))
100 .unwrap();
101 drop(stdint);
102
103 let mut stdlib = std::fs::File::create(sysroot_dir.join("stdlib.h")).unwrap();
104 stdlib
105 .write_all(include_bytes!("wasm-sysroot/stdlib.h"))
106 .unwrap();
107 drop(stdlib);
108
109 let mut stdio = std::fs::File::create(sysroot_dir.join("stdio.h")).unwrap();
110 stdio
111 .write_all(include_bytes!("wasm-sysroot/stdio.h"))
112 .unwrap();
113 drop(stdio);
114
115 let mut stdbool = std::fs::File::create(sysroot_dir.join("stdbool.h")).unwrap();
116 stdbool
117 .write_all(include_bytes!("wasm-sysroot/stdbool.h"))
118 .unwrap();
119 drop(stdbool);
120 }
121
122 let mut c_config = cc::Build::new();
123 c_config.std("c11").include(&dir).include(&sysroot_dir);
124 c_config
125 .flag_if_supported("-Wno-unused-label")
126 .flag_if_supported("-Wno-unused-parameter")
127 .flag_if_supported("-Wno-unused-but-set-variable")
128 .flag_if_supported("-Wno-trigraphs")
129 .flag_if_supported("-Wno-everything");
130 c_config.file(dir.join("parser.c"));
131
132 c_config.compile(&grammar_name);
133 });
134}
135
136#[cfg(test)]
137mod tests {
138 use syn::parse_quote;
139
140 use super::{generate_grammar, GENERATED_SEMANTIC_VERSION};
141 use tree_sitter_generate::generate_parser_for_grammar;
142
143 #[test]
144 fn enum_with_named_field() {
145 let m = if let syn::Item::Mod(m) = parse_quote! {
146 #[rust_sitter::grammar("test")]
147 mod grammar {
148 #[rust_sitter::language]
149 pub enum Expr {
150 Number(
151 #[rust_sitter::leaf(pattern = r"\d+", transform = |v| v.parse().unwrap())]
152 u32
153 ),
154 Neg {
155 #[rust_sitter::leaf(text = "!")]
156 _bang: (),
157 value: Box<Expr>,
158 }
159 }
160 }
161 } {
162 m
163 } else {
164 panic!()
165 };
166
167 let grammar = generate_grammar(&m);
168 insta::assert_snapshot!(grammar);
169 generate_parser_for_grammar(&grammar.to_string(), GENERATED_SEMANTIC_VERSION).unwrap();
170 }
171
172 #[test]
173 fn enum_transformed_fields() {
174 let m = if let syn::Item::Mod(m) = parse_quote! {
175 #[rust_sitter::grammar("test")]
176 mod grammar {
177 #[rust_sitter::language]
178 pub enum Expression {
179 Number(
180 #[rust_sitter::leaf(pattern = r"\d+", transform = |v: &str| v.parse::<i32>().unwrap())]
181 i32
182 ),
183 }
184 }
185 } {
186 m
187 } else {
188 panic!()
189 };
190
191 let grammar = generate_grammar(&m);
192 insta::assert_snapshot!(grammar);
193 generate_parser_for_grammar(&grammar.to_string(), GENERATED_SEMANTIC_VERSION).unwrap();
194 }
195
196 #[test]
197 fn enum_recursive() {
198 let m = if let syn::Item::Mod(m) = parse_quote! {
199 #[rust_sitter::grammar("test")]
200 mod grammar {
201 #[rust_sitter::language]
202 pub enum Expression {
203 Number(
204 #[rust_sitter::leaf(pattern = r"\d+", transform = |v: &str| v.parse::<i32>().unwrap())]
205 i32
206 ),
207 Neg(
208 #[rust_sitter::leaf(text = "-", transform = |v| ())]
209 (),
210 Box<Expression>
211 ),
212 }
213 }
214 } {
215 m
216 } else {
217 panic!()
218 };
219
220 let grammar = generate_grammar(&m);
221 insta::assert_snapshot!(grammar);
222 generate_parser_for_grammar(&grammar.to_string(), GENERATED_SEMANTIC_VERSION).unwrap();
223 }
224
225 #[test]
226 fn enum_prec_left() {
227 let m = if let syn::Item::Mod(m) = parse_quote! {
228 #[rust_sitter::grammar("test")]
229 mod grammar {
230 #[rust_sitter::language]
231 pub enum Expression {
232 Number(
233 #[rust_sitter::leaf(pattern = r"\d+", transform = |v: &str| v.parse::<i32>().unwrap())]
234 i32
235 ),
236 #[rust_sitter::prec_left(1)]
237 Sub(
238 Box<Expression>,
239 #[rust_sitter::leaf(text = "-", transform = |v| ())]
240 (),
241 Box<Expression>
242 ),
243 }
244 }
245 } {
246 m
247 } else {
248 panic!()
249 };
250
251 let grammar = generate_grammar(&m);
252 insta::assert_snapshot!(grammar);
253 generate_parser_for_grammar(&grammar.to_string(), GENERATED_SEMANTIC_VERSION).unwrap();
254 }
255
256 #[test]
257 fn grammar_with_extras() {
258 let m = if let syn::Item::Mod(m) = parse_quote! {
259 #[rust_sitter::grammar("test")]
260 mod grammar {
261 #[rust_sitter::language]
262 pub enum Expression {
263 Number(
264 #[rust_sitter::leaf(pattern = r"\d+", transform = |v: &str| v.parse::<i32>().unwrap())]
265 i32
266 ),
267 }
268
269 #[rust_sitter::extra]
270 struct Whitespace {
271 #[rust_sitter::leaf(pattern = r"\s", transform = |_v| ())]
272 _whitespace: (),
273 }
274 }
275 } {
276 m
277 } else {
278 panic!()
279 };
280
281 let grammar = generate_grammar(&m);
282 insta::assert_snapshot!(grammar);
283 generate_parser_for_grammar(&grammar.to_string(), GENERATED_SEMANTIC_VERSION).unwrap();
284 }
285
286 #[test]
287 fn grammar_unboxed_field() {
288 let m = if let syn::Item::Mod(m) = parse_quote! {
289 #[rust_sitter::grammar("test")]
290 mod grammar {
291 #[rust_sitter::language]
292 pub struct Language {
293 e: Expression,
294 }
295
296 pub enum Expression {
297 Number(
298 #[rust_sitter::leaf(pattern = r"\d+", transform = |v: &str| v.parse::<i32>().unwrap())]
299 i32
300 ),
301 }
302 }
303 } {
304 m
305 } else {
306 panic!()
307 };
308
309 let grammar = generate_grammar(&m);
310 insta::assert_snapshot!(grammar);
311 generate_parser_for_grammar(&grammar.to_string(), GENERATED_SEMANTIC_VERSION).unwrap();
312 }
313
314 #[test]
315 fn grammar_repeat() {
316 let m = if let syn::Item::Mod(m) = parse_quote! {
317 #[rust_sitter::grammar("test")]
318 pub mod grammar {
319 #[rust_sitter::language]
320 pub struct NumberList {
321 #[rust_sitter::delimited(
322 #[rust_sitter::leaf(text = ",")]
323 ()
324 )]
325 numbers: Vec<Number>,
326 }
327
328 pub struct Number {
329 #[rust_sitter::leaf(pattern = r"\d+", transform = |v| v.parse().unwrap())]
330 v: i32,
331 }
332
333 #[rust_sitter::extra]
334 struct Whitespace {
335 #[rust_sitter::leaf(pattern = r"\s")]
336 _whitespace: (),
337 }
338 }
339 } {
340 m
341 } else {
342 panic!()
343 };
344
345 let grammar = generate_grammar(&m);
346 insta::assert_snapshot!(grammar);
347 generate_parser_for_grammar(&grammar.to_string(), GENERATED_SEMANTIC_VERSION).unwrap();
348 }
349
350 #[test]
351 fn grammar_repeat_no_delimiter() {
352 let m = if let syn::Item::Mod(m) = parse_quote! {
353 #[rust_sitter::grammar("test")]
354 pub mod grammar {
355 #[rust_sitter::language]
356 pub struct NumberList {
357 numbers: Vec<Number>,
358 }
359
360 pub struct Number {
361 #[rust_sitter::leaf(pattern = r"\d+", transform = |v| v.parse().unwrap())]
362 v: i32,
363 }
364
365 #[rust_sitter::extra]
366 struct Whitespace {
367 #[rust_sitter::leaf(pattern = r"\s")]
368 _whitespace: (),
369 }
370 }
371 } {
372 m
373 } else {
374 panic!()
375 };
376
377 let grammar = generate_grammar(&m);
378 insta::assert_snapshot!(grammar);
379 generate_parser_for_grammar(&grammar.to_string(), GENERATED_SEMANTIC_VERSION).unwrap();
380 }
381
382 #[test]
383 fn grammar_repeat1() {
384 let m = if let syn::Item::Mod(m) = parse_quote! {
385 #[rust_sitter::grammar("test")]
386 pub mod grammar {
387 #[rust_sitter::language]
388 pub struct NumberList {
389 #[rust_sitter::repeat(non_empty = true)]
390 #[rust_sitter::delimited(
391 #[rust_sitter::leaf(text = ",")]
392 ()
393 )]
394 numbers: Vec<Number>,
395 }
396
397 pub struct Number {
398 #[rust_sitter::leaf(pattern = r"\d+", transform = |v| v.parse().unwrap())]
399 v: i32,
400 }
401
402 #[rust_sitter::extra]
403 struct Whitespace {
404 #[rust_sitter::leaf(pattern = r"\s")]
405 _whitespace: (),
406 }
407 }
408 } {
409 m
410 } else {
411 panic!()
412 };
413
414 let grammar = generate_grammar(&m);
415 insta::assert_snapshot!(grammar);
416 generate_parser_for_grammar(&grammar.to_string(), GENERATED_SEMANTIC_VERSION).unwrap();
417 }
418
419 #[test]
420 fn struct_optional() {
421 let m = if let syn::Item::Mod(m) = parse_quote! {
422 #[rust_sitter::grammar("test")]
423 mod grammar {
424 #[rust_sitter::language]
425 pub struct Language {
426 #[rust_sitter::leaf(pattern = r"\d+", transform = |v| v.parse().unwrap())]
427 v: Option<i32>,
428 #[rust_sitter::leaf(pattern = r" ", transform = |v| ())]
429 space: (),
430 t: Option<Number>,
431 }
432
433 pub struct Number {
434 #[rust_sitter::leaf(pattern = r"\d+", transform = |v| v.parse().unwrap())]
435 v: i32
436 }
437 }
438 } {
439 m
440 } else {
441 panic!()
442 };
443
444 let grammar = generate_grammar(&m);
445 insta::assert_snapshot!(grammar);
446 generate_parser_for_grammar(&grammar.to_string(), GENERATED_SEMANTIC_VERSION).unwrap();
447 }
448
449 #[test]
450 fn enum_with_unamed_vector() {
451 let m = if let syn::Item::Mod(m) = parse_quote! {
452 #[rust_sitter::grammar("test")]
453 mod grammar {
454 pub struct Number {
455 #[rust_sitter::leaf(pattern = r"\d+", transform = |v| v.parse().unwrap())]
456 value: u32
457 }
458
459 #[rust_sitter::language]
460 pub enum Expr {
461 Numbers(
462 #[rust_sitter::repeat(non_empty = true)]
463 Vec<Number>
464 )
465 }
466 }
467 } {
468 m
469 } else {
470 panic!()
471 };
472
473 let grammar = generate_grammar(&m);
474 insta::assert_snapshot!(grammar);
475 generate_parser_for_grammar(&grammar.to_string(), GENERATED_SEMANTIC_VERSION).unwrap();
476 }
477
478 #[test]
479 fn spanned_in_vec() {
480 let m = if let syn::Item::Mod(m) = parse_quote! {
481 #[rust_sitter::grammar("test")]
482 mod grammar {
483 use rust_sitter::Spanned;
484
485 #[rust_sitter::language]
486 pub struct NumberList {
487 #[rust_sitter::leaf(pattern = r"\d+", transform = |v| v.parse().unwrap())]
488 numbers: Vec<Spanned<i32>>,
489 }
490
491 #[rust_sitter::extra]
492 struct Whitespace {
493 #[rust_sitter::leaf(pattern = r"\s")]
494 _whitespace: (),
495 }
496 }
497 } {
498 m
499 } else {
500 panic!()
501 };
502
503 let grammar = generate_grammar(&m);
504 insta::assert_snapshot!(grammar);
505 generate_parser_for_grammar(&grammar.to_string(), GENERATED_SEMANTIC_VERSION).unwrap();
506 }
507}