1use std::{collections::BTreeMap, sync::LazyLock};
2#[cfg(feature = "load")]
3use std::{
4 env, fs,
5 io::Write,
6 path::{Path, PathBuf},
7 process::{Command, Stdio},
8};
9
10use bitflags::bitflags;
11use log::warn;
12use node_types::VariableInfo;
13use regex::{Regex, RegexBuilder};
14use rules::{Alias, Symbol};
15#[cfg(feature = "load")]
16use semver::Version;
17#[cfg(feature = "load")]
18use serde::Deserialize;
19use serde::Serialize;
20use thiserror::Error;
21
22mod build_tables;
23mod dedup;
24mod grammars;
25mod nfa;
26mod node_types;
27pub mod parse_grammar;
28mod prepare_grammar;
29#[cfg(feature = "qjs-rt")]
30mod quickjs;
31mod render;
32mod rules;
33mod tables;
34
35use build_tables::build_tables;
36pub use build_tables::ParseTableBuilderError;
37use grammars::{InlinedProductionMap, InputGrammar, LexicalGrammar, SyntaxGrammar};
38pub use node_types::{SuperTypeCycleError, VariableInfoError};
39use parse_grammar::parse_grammar;
40pub use parse_grammar::ParseGrammarError;
41use prepare_grammar::prepare_grammar;
42pub use prepare_grammar::PrepareGrammarError;
43use render::render_c_code;
44pub use render::{ABI_VERSION_MAX, ABI_VERSION_MIN};
45
46static JSON_COMMENT_REGEX: LazyLock<Regex> = LazyLock::new(|| {
47 RegexBuilder::new("^\\s*//.*")
48 .multi_line(true)
49 .build()
50 .unwrap()
51});
52
53struct JSONOutput {
54 #[cfg(feature = "load")]
55 node_types_json: String,
56 syntax_grammar: SyntaxGrammar,
57 lexical_grammar: LexicalGrammar,
58 inlines: InlinedProductionMap,
59 simple_aliases: BTreeMap<Symbol, Alias>,
60 variable_info: Vec<VariableInfo>,
61}
62
63struct GeneratedParser {
64 c_code: String,
65 #[cfg(feature = "load")]
66 node_types_json: String,
67}
68
69const LANGUAGE_VERSION: usize = 15;
72
73pub const ALLOC_HEADER: &str = include_str!("templates/alloc.h");
74pub const ARRAY_HEADER: &str = include_str!("templates/array.h");
75pub const PARSER_HEADER: &str = include_str!("parser.h.inc");
76
77pub type GenerateResult<T> = Result<T, GenerateError>;
78
79#[derive(Debug, Error, Serialize)]
80pub enum GenerateError {
81 #[error("Error with specified path -- {0}")]
82 GrammarPath(String),
83 #[error(transparent)]
84 IO(IoError),
85 #[cfg(feature = "load")]
86 #[error(transparent)]
87 LoadGrammarFile(#[from] LoadGrammarError),
88 #[error(transparent)]
89 ParseGrammar(#[from] ParseGrammarError),
90 #[error(transparent)]
91 Prepare(#[from] PrepareGrammarError),
92 #[error(transparent)]
93 VariableInfo(#[from] VariableInfoError),
94 #[error(transparent)]
95 BuildTables(#[from] ParseTableBuilderError),
96 #[cfg(feature = "load")]
97 #[error(transparent)]
98 ParseVersion(#[from] ParseVersionError),
99 #[error(transparent)]
100 SuperTypeCycle(#[from] SuperTypeCycleError),
101}
102
103#[derive(Debug, Error, Serialize)]
104pub struct IoError {
105 pub error: String,
106 pub path: Option<String>,
107}
108
109impl IoError {
110 fn new(error: &std::io::Error, path: Option<&Path>) -> Self {
111 Self {
112 error: error.to_string(),
113 path: path.map(|p| p.to_string_lossy().to_string()),
114 }
115 }
116}
117
118impl std::fmt::Display for IoError {
119 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
120 write!(f, "{}", self.error)?;
121 if let Some(ref path) = self.path {
122 write!(f, " ({path})")?;
123 }
124 Ok(())
125 }
126}
127
128#[cfg(feature = "load")]
129pub type LoadGrammarFileResult<T> = Result<T, LoadGrammarError>;
130
131#[cfg(feature = "load")]
132#[derive(Debug, Error, Serialize)]
133pub enum LoadGrammarError {
134 #[error("Path to a grammar file with `.js` or `.json` extension is required")]
135 InvalidPath,
136 #[error("Failed to load grammar.js -- {0}")]
137 LoadJSGrammarFile(#[from] JSError),
138 #[error("Failed to load grammar.json -- {0}")]
139 IO(IoError),
140 #[error("Unknown grammar file extension: {0:?}")]
141 FileExtension(PathBuf),
142}
143
144#[cfg(feature = "load")]
145#[derive(Debug, Error, Serialize)]
146pub enum ParseVersionError {
147 #[error("{0}")]
148 Version(String),
149 #[error("{0}")]
150 JSON(String),
151 #[error(transparent)]
152 IO(IoError),
153}
154
155#[cfg(feature = "load")]
156pub type JSResult<T> = Result<T, JSError>;
157
158#[cfg(feature = "load")]
159#[derive(Debug, Error, Serialize)]
160pub enum JSError {
161 #[error("Failed to run `{runtime}` -- {error}")]
162 JSRuntimeSpawn { runtime: String, error: String },
163 #[error("Got invalid UTF8 from `{runtime}` -- {error}")]
164 JSRuntimeUtf8 { runtime: String, error: String },
165 #[error("`{runtime}` process exited with status {code}")]
166 JSRuntimeExit { runtime: String, code: i32 },
167 #[error("Failed to open stdin for `{runtime}`")]
168 JSRuntimeStdin { runtime: String },
169 #[error("Failed to write {item} to `{runtime}`'s stdin -- {error}")]
170 JSRuntimeWrite {
171 runtime: String,
172 item: String,
173 error: String,
174 },
175 #[error("Failed to read output from `{runtime}` -- {error}")]
176 JSRuntimeRead { runtime: String, error: String },
177 #[error(transparent)]
178 IO(IoError),
179 #[cfg(feature = "qjs-rt")]
180 #[error("Failed to get relative path")]
181 RelativePath,
182 #[error("Could not parse this package's version as semver -- {0}")]
183 Semver(String),
184 #[error("Failed to serialze grammar JSON -- {0}")]
185 Serialzation(String),
186 #[cfg(feature = "qjs-rt")]
187 #[error("QuickJS error: {0}")]
188 QuickJS(String),
189}
190
191#[cfg(feature = "load")]
192impl From<serde_json::Error> for JSError {
193 fn from(value: serde_json::Error) -> Self {
194 Self::Serialzation(value.to_string())
195 }
196}
197
198#[cfg(feature = "load")]
199impl From<semver::Error> for JSError {
200 fn from(value: semver::Error) -> Self {
201 Self::Semver(value.to_string())
202 }
203}
204
205#[cfg(feature = "qjs-rt")]
206impl From<rquickjs::Error> for JSError {
207 fn from(value: rquickjs::Error) -> Self {
208 Self::QuickJS(value.to_string())
209 }
210}
211
212bitflags! {
213 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
214 pub struct OptLevel: u32 {
215 const MergeStates = 1 << 0;
216 }
217}
218
219impl Default for OptLevel {
220 fn default() -> Self {
221 Self::MergeStates
222 }
223}
224
225#[cfg(feature = "load")]
226#[allow(clippy::too_many_arguments)]
227pub fn generate_parser_in_directory<T, U, V>(
228 repo_path: T,
229 out_path: Option<U>,
230 grammar_path: Option<V>,
231 mut abi_version: usize,
232 report_symbol_name: Option<&str>,
233 js_runtime: Option<&str>,
234 generate_parser: bool,
235 optimizations: OptLevel,
236) -> GenerateResult<()>
237where
238 T: Into<PathBuf>,
239 U: Into<PathBuf>,
240 V: Into<PathBuf>,
241{
242 let mut repo_path: PathBuf = repo_path.into();
243
244 let grammar_path = if let Some(path) = grammar_path {
246 let path_buf: PathBuf = path.into();
247 if !path_buf
248 .try_exists()
249 .map_err(|e| GenerateError::GrammarPath(e.to_string()))?
250 {
251 fs::create_dir_all(&path_buf)
252 .map_err(|e| GenerateError::IO(IoError::new(&e, Some(path_buf.as_path()))))?;
253 repo_path = path_buf;
254 repo_path.join("grammar.js")
255 } else {
256 path_buf
257 }
258 } else {
259 repo_path.join("grammar.js")
260 };
261
262 let grammar_json = load_grammar_file(&grammar_path, js_runtime)?;
264
265 let src_path = out_path.map_or_else(|| repo_path.join("src"), |p| p.into());
266 let header_path = src_path.join("tree_sitter");
267
268 fs::create_dir_all(&src_path)
270 .map_err(|e| GenerateError::IO(IoError::new(&e, Some(src_path.as_path()))))?;
271
272 if grammar_path.file_name().unwrap() != "grammar.json" {
273 fs::write(src_path.join("grammar.json"), &grammar_json)
274 .map_err(|e| GenerateError::IO(IoError::new(&e, Some(src_path.as_path()))))?;
275 }
276
277 let input_grammar = parse_grammar(&grammar_json)?;
279
280 if !generate_parser {
281 let node_types_json = generate_node_types_from_grammar(&input_grammar)?.node_types_json;
282 write_file(&src_path.join("node-types.json"), node_types_json)?;
283 return Ok(());
284 }
285
286 let semantic_version = read_grammar_version(&repo_path)?;
287
288 if semantic_version.is_none() && abi_version > ABI_VERSION_MIN {
289 warn!(
290 concat!(
291 "No `tree-sitter.json` file found in your grammar, ",
292 "this file is required to generate with ABI {}. ",
293 "Using ABI version {} instead.\n",
294 "This file can be set up with `tree-sitter init`. ",
295 "For more information, see https://tree-sitter.github.io/tree-sitter/cli/init."
296 ),
297 abi_version, ABI_VERSION_MIN
298 );
299 abi_version = ABI_VERSION_MIN;
300 }
301
302 let GeneratedParser {
304 c_code,
305 node_types_json,
306 } = generate_parser_for_grammar_with_opts(
307 &input_grammar,
308 abi_version,
309 semantic_version.map(|v| (v.major as u8, v.minor as u8, v.patch as u8)),
310 report_symbol_name,
311 optimizations,
312 )?;
313
314 write_file(&src_path.join("parser.c"), c_code)?;
315 write_file(&src_path.join("node-types.json"), node_types_json)?;
316 fs::create_dir_all(&header_path)
317 .map_err(|e| GenerateError::IO(IoError::new(&e, Some(header_path.as_path()))))?;
318 write_file(&header_path.join("alloc.h"), ALLOC_HEADER)?;
319 write_file(&header_path.join("array.h"), ARRAY_HEADER)?;
320 write_file(&header_path.join("parser.h"), PARSER_HEADER)?;
321
322 Ok(())
323}
324
325pub fn generate_parser_for_grammar(
326 grammar_json: &str,
327 semantic_version: Option<(u8, u8, u8)>,
328) -> GenerateResult<(String, String)> {
329 let grammar_json = JSON_COMMENT_REGEX.replace_all(grammar_json, "\n");
330 let input_grammar = parse_grammar(&grammar_json)?;
331 let parser = generate_parser_for_grammar_with_opts(
332 &input_grammar,
333 LANGUAGE_VERSION,
334 semantic_version,
335 None,
336 OptLevel::empty(),
337 )?;
338 Ok((input_grammar.name, parser.c_code))
339}
340
341fn generate_node_types_from_grammar(input_grammar: &InputGrammar) -> GenerateResult<JSONOutput> {
342 let (syntax_grammar, lexical_grammar, inlines, simple_aliases) =
343 prepare_grammar(input_grammar)?;
344 let variable_info =
345 node_types::get_variable_info(&syntax_grammar, &lexical_grammar, &simple_aliases)?;
346
347 #[cfg(feature = "load")]
348 let node_types_json = node_types::generate_node_types_json(
349 &syntax_grammar,
350 &lexical_grammar,
351 &simple_aliases,
352 &variable_info,
353 )?;
354 Ok(JSONOutput {
355 #[cfg(feature = "load")]
356 node_types_json: serde_json::to_string_pretty(&node_types_json).unwrap(),
357 syntax_grammar,
358 lexical_grammar,
359 inlines,
360 simple_aliases,
361 variable_info,
362 })
363}
364
365fn generate_parser_for_grammar_with_opts(
366 input_grammar: &InputGrammar,
367 abi_version: usize,
368 semantic_version: Option<(u8, u8, u8)>,
369 report_symbol_name: Option<&str>,
370 optimizations: OptLevel,
371) -> GenerateResult<GeneratedParser> {
372 let JSONOutput {
373 syntax_grammar,
374 lexical_grammar,
375 inlines,
376 simple_aliases,
377 variable_info,
378 #[cfg(feature = "load")]
379 node_types_json,
380 } = generate_node_types_from_grammar(input_grammar)?;
381 let supertype_symbol_map =
382 node_types::get_supertype_symbol_map(&syntax_grammar, &simple_aliases, &variable_info);
383 let tables = build_tables(
384 &syntax_grammar,
385 &lexical_grammar,
386 &simple_aliases,
387 &variable_info,
388 &inlines,
389 report_symbol_name,
390 optimizations,
391 )?;
392 let c_code = render_c_code(
393 &input_grammar.name,
394 tables,
395 syntax_grammar,
396 lexical_grammar,
397 simple_aliases,
398 abi_version,
399 semantic_version,
400 supertype_symbol_map,
401 );
402 Ok(GeneratedParser {
403 c_code,
404 #[cfg(feature = "load")]
405 node_types_json,
406 })
407}
408
409#[cfg(feature = "load")]
415fn read_grammar_version(repo_path: &Path) -> Result<Option<Version>, ParseVersionError> {
416 #[derive(Deserialize)]
417 struct TreeSitterJson {
418 metadata: Metadata,
419 }
420
421 #[derive(Deserialize)]
422 struct Metadata {
423 version: String,
424 }
425
426 let filename = "tree-sitter.json";
427 let mut path = repo_path.join(filename);
428
429 loop {
430 let json = path
431 .exists()
432 .then(|| {
433 let contents = fs::read_to_string(path.as_path())
434 .map_err(|e| ParseVersionError::IO(IoError::new(&e, Some(path.as_path()))))?;
435 serde_json::from_str::<TreeSitterJson>(&contents).map_err(|e| {
436 ParseVersionError::JSON(format!("Failed to parse `{}` -- {e}", path.display()))
437 })
438 })
439 .transpose()?;
440 if let Some(json) = json {
441 return Version::parse(&json.metadata.version)
442 .map_err(|e| {
443 ParseVersionError::Version(format!(
444 "Failed to parse `{}` version as semver -- {e}",
445 path.display()
446 ))
447 })
448 .map(Some);
449 }
450 path.pop(); if !path.pop() {
452 return Ok(None);
453 }
454 path.push(filename);
455 }
456}
457
458#[cfg(feature = "load")]
459pub fn load_grammar_file(
460 grammar_path: &Path,
461 js_runtime: Option<&str>,
462) -> LoadGrammarFileResult<String> {
463 if grammar_path.is_dir() {
464 Err(LoadGrammarError::InvalidPath)?;
465 }
466 match grammar_path.extension().and_then(|e| e.to_str()) {
467 Some("js") => Ok(load_js_grammar_file(grammar_path, js_runtime)?),
468 Some("json") => Ok(fs::read_to_string(grammar_path)
469 .map_err(|e| LoadGrammarError::IO(IoError::new(&e, Some(grammar_path))))?),
470 _ => Err(LoadGrammarError::FileExtension(grammar_path.to_owned()))?,
471 }
472}
473
474#[cfg(feature = "load")]
475fn load_js_grammar_file(grammar_path: &Path, js_runtime: Option<&str>) -> JSResult<String> {
476 let grammar_path = dunce::canonicalize(grammar_path)
477 .map_err(|e| JSError::IO(IoError::new(&e, Some(grammar_path))))?;
478
479 #[cfg(feature = "qjs-rt")]
480 if js_runtime == Some("native") {
481 return quickjs::execute_native_runtime(&grammar_path);
482 }
483
484 #[cfg(windows)]
487 let grammar_path = PathBuf::from(format!("file:///{}", grammar_path.display()));
488
489 let js_runtime = js_runtime.unwrap_or("node");
490
491 let mut js_command = Command::new(js_runtime);
492 match js_runtime {
493 "node" => {
494 js_command.args(["--input-type=module", "-"]);
495 }
496 "bun" => {
497 js_command.arg("-");
498 }
499 "deno" => {
500 js_command.args(["run", "--allow-all", "-"]);
501 }
502 _ => {}
503 }
504
505 let mut js_process = js_command
506 .env("TREE_SITTER_GRAMMAR_PATH", grammar_path)
507 .stdin(Stdio::piped())
508 .stdout(Stdio::piped())
509 .spawn()
510 .map_err(|e| JSError::JSRuntimeSpawn {
511 runtime: js_runtime.to_string(),
512 error: e.to_string(),
513 })?;
514
515 let mut js_stdin = js_process
516 .stdin
517 .take()
518 .ok_or_else(|| JSError::JSRuntimeStdin {
519 runtime: js_runtime.to_string(),
520 })?;
521
522 let cli_version = Version::parse(env!("CARGO_PKG_VERSION"))?;
523 write!(
524 js_stdin,
525 "globalThis.TREE_SITTER_CLI_VERSION_MAJOR = {};
526 globalThis.TREE_SITTER_CLI_VERSION_MINOR = {};
527 globalThis.TREE_SITTER_CLI_VERSION_PATCH = {};",
528 cli_version.major, cli_version.minor, cli_version.patch,
529 )
530 .map_err(|e| JSError::JSRuntimeWrite {
531 runtime: js_runtime.to_string(),
532 item: "tree-sitter version".to_string(),
533 error: e.to_string(),
534 })?;
535 js_stdin
536 .write(include_bytes!("./dsl.js"))
537 .map_err(|e| JSError::JSRuntimeWrite {
538 runtime: js_runtime.to_string(),
539 item: "grammar dsl".to_string(),
540 error: e.to_string(),
541 })?;
542 drop(js_stdin);
543
544 let output = js_process
545 .wait_with_output()
546 .map_err(|e| JSError::JSRuntimeRead {
547 runtime: js_runtime.to_string(),
548 error: e.to_string(),
549 })?;
550 match output.status.code() {
551 Some(0) => {
552 let stdout = String::from_utf8(output.stdout).map_err(|e| JSError::JSRuntimeUtf8 {
553 runtime: js_runtime.to_string(),
554 error: e.to_string(),
555 })?;
556
557 let mut grammar_json = &stdout[..];
558
559 if let Some(pos) = stdout.rfind('\n') {
560 let node_output = &stdout[..pos];
562 grammar_json = &stdout[pos + 1..];
563
564 let mut stdout = std::io::stdout().lock();
565 stdout
566 .write_all(node_output.as_bytes())
567 .map_err(|e| JSError::IO(IoError::new(&e, None)))?;
568 stdout
569 .write_all(b"\n")
570 .map_err(|e| JSError::IO(IoError::new(&e, None)))?;
571 stdout
572 .flush()
573 .map_err(|e| JSError::IO(IoError::new(&e, None)))?;
574 }
575
576 Ok(serde_json::to_string_pretty(&serde_json::from_str::<
577 serde_json::Value,
578 >(grammar_json)?)?)
579 }
580 Some(code) => Err(JSError::JSRuntimeExit {
581 runtime: js_runtime.to_string(),
582 code,
583 }),
584 None => Err(JSError::JSRuntimeExit {
585 runtime: js_runtime.to_string(),
586 code: -1,
587 }),
588 }
589}
590
591#[cfg(feature = "load")]
592pub fn write_file(path: &Path, body: impl AsRef<[u8]>) -> GenerateResult<()> {
593 fs::write(path, body).map_err(|e| GenerateError::IO(IoError::new(&e, Some(path))))
594}
595
596#[cfg(test)]
597mod tests {
598 use super::{LANGUAGE_VERSION, PARSER_HEADER};
599 #[test]
600 fn test_language_versions_are_in_sync() {
601 let api_h = include_str!("../../../lib/include/tree_sitter/api.h");
602 let api_language_version = api_h
603 .lines()
604 .find_map(|line| {
605 line.trim()
606 .strip_prefix("#define TREE_SITTER_LANGUAGE_VERSION ")
607 .and_then(|v| v.parse::<usize>().ok())
608 })
609 .expect("Failed to find TREE_SITTER_LANGUAGE_VERSION definition in api.h");
610 assert_eq!(LANGUAGE_VERSION, api_language_version);
611 }
612
613 #[test]
614 fn test_parser_header_in_sync() {
615 let parser_h = include_str!("../../../lib/src/parser.h");
616 assert!(
617 parser_h == PARSER_HEADER,
618 "parser.h.inc is out of sync with lib/src/parser.h. Run: cp lib/src/parser.h crates/generate/src/parser.h.inc"
619 );
620 }
621}