1use std::{collections::BTreeMap, sync::LazyLock};
2#[cfg(feature = "load")]
3use std::{
4 env, fs,
5 io::Write,
6 path::{Path, PathBuf},
7 process::{Command, Stdio},
8};
9
10use bitflags::bitflags;
11use log::warn;
12use node_types::VariableInfo;
13use regex::{Regex, RegexBuilder};
14use rules::{Alias, Symbol};
15#[cfg(feature = "load")]
16use semver::Version;
17#[cfg(feature = "load")]
18use serde::Deserialize;
19use serde::Serialize;
20use thiserror::Error;
21
22mod build_tables;
23mod dedup;
24mod grammars;
25mod nfa;
26mod node_types;
27pub mod parse_grammar;
28mod prepare_grammar;
29#[cfg(feature = "qjs-rt")]
30mod quickjs;
31mod render;
32mod rules;
33mod tables;
34
35use build_tables::build_tables;
36pub use build_tables::ParseTableBuilderError;
37use grammars::{InlinedProductionMap, InputGrammar, LexicalGrammar, SyntaxGrammar};
38pub use node_types::{SuperTypeCycleError, VariableInfoError};
39use parse_grammar::parse_grammar;
40pub use parse_grammar::ParseGrammarError;
41use prepare_grammar::prepare_grammar;
42pub use prepare_grammar::PrepareGrammarError;
43use render::render_c_code;
44pub use render::{RenderError, ABI_VERSION_MAX, ABI_VERSION_MIN};
45
46static JSON_COMMENT_REGEX: LazyLock<Regex> = LazyLock::new(|| {
47 RegexBuilder::new("^\\s*//.*")
48 .multi_line(true)
49 .build()
50 .unwrap()
51});
52
53struct JSONOutput {
54 #[cfg(feature = "load")]
55 node_types_json: String,
56 syntax_grammar: SyntaxGrammar,
57 lexical_grammar: LexicalGrammar,
58 inlines: InlinedProductionMap,
59 simple_aliases: BTreeMap<Symbol, Alias>,
60 variable_info: Vec<VariableInfo>,
61}
62
63struct GeneratedParser {
64 c_code: String,
65 #[cfg(feature = "load")]
66 node_types_json: String,
67}
68
69const LANGUAGE_VERSION: usize = 15;
72
73pub const ALLOC_HEADER: &str = include_str!("templates/alloc.h");
74pub const ARRAY_HEADER: &str = include_str!("templates/array.h");
75pub const PARSER_HEADER: &str = include_str!("parser.h.inc");
76
77pub type GenerateResult<T> = Result<T, GenerateError>;
78
79#[derive(Debug, Error, Serialize)]
80pub enum GenerateError {
81 #[error("Error with specified path -- {0}")]
82 GrammarPath(String),
83 #[error(transparent)]
84 IO(IoError),
85 #[cfg(feature = "load")]
86 #[error(transparent)]
87 LoadGrammarFile(#[from] LoadGrammarError),
88 #[error(transparent)]
89 ParseGrammar(#[from] ParseGrammarError),
90 #[error(transparent)]
91 Prepare(#[from] PrepareGrammarError),
92 #[error(transparent)]
93 VariableInfo(#[from] VariableInfoError),
94 #[error(transparent)]
95 BuildTables(#[from] ParseTableBuilderError),
96 #[error(transparent)]
97 Render(#[from] RenderError),
98 #[cfg(feature = "load")]
99 #[error(transparent)]
100 ParseVersion(#[from] ParseVersionError),
101 #[error(transparent)]
102 SuperTypeCycle(#[from] SuperTypeCycleError),
103}
104
105#[derive(Debug, Error, Serialize)]
106pub struct IoError {
107 pub error: String,
108 pub path: Option<String>,
109}
110
111impl IoError {
112 fn new(error: &std::io::Error, path: Option<&Path>) -> Self {
113 Self {
114 error: error.to_string(),
115 path: path.map(|p| p.to_string_lossy().to_string()),
116 }
117 }
118}
119
120impl std::fmt::Display for IoError {
121 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
122 write!(f, "{}", self.error)?;
123 if let Some(ref path) = self.path {
124 write!(f, " ({path})")?;
125 }
126 Ok(())
127 }
128}
129
130#[cfg(feature = "load")]
131pub type LoadGrammarFileResult<T> = Result<T, LoadGrammarError>;
132
133#[cfg(feature = "load")]
134#[derive(Debug, Error, Serialize)]
135pub enum LoadGrammarError {
136 #[error("Path to a grammar file with `.js` or `.json` extension is required")]
137 InvalidPath,
138 #[error("Failed to load grammar.js -- {0}")]
139 LoadJSGrammarFile(#[from] JSError),
140 #[error("Failed to load grammar.json -- {0}")]
141 IO(IoError),
142 #[error("Unknown grammar file extension: {0:?}")]
143 FileExtension(PathBuf),
144}
145
146#[cfg(feature = "load")]
147#[derive(Debug, Error, Serialize)]
148pub enum ParseVersionError {
149 #[error("{0}")]
150 Version(String),
151 #[error("{0}")]
152 JSON(String),
153 #[error(transparent)]
154 IO(IoError),
155}
156
157#[cfg(feature = "load")]
158pub type JSResult<T> = Result<T, JSError>;
159
160#[cfg(feature = "load")]
161#[derive(Debug, Error, Serialize)]
162pub enum JSError {
163 #[error("Failed to run `{runtime}` -- {error}")]
164 JSRuntimeSpawn { runtime: String, error: String },
165 #[error("Got invalid UTF8 from `{runtime}` -- {error}")]
166 JSRuntimeUtf8 { runtime: String, error: String },
167 #[error("`{runtime}` process exited with status {code}")]
168 JSRuntimeExit { runtime: String, code: i32 },
169 #[error("Failed to open stdin for `{runtime}`")]
170 JSRuntimeStdin { runtime: String },
171 #[error("Failed to write {item} to `{runtime}`'s stdin -- {error}")]
172 JSRuntimeWrite {
173 runtime: String,
174 item: String,
175 error: String,
176 },
177 #[error("Failed to read output from `{runtime}` -- {error}")]
178 JSRuntimeRead { runtime: String, error: String },
179 #[error(transparent)]
180 IO(IoError),
181 #[cfg(feature = "qjs-rt")]
182 #[error("Failed to get relative path")]
183 RelativePath,
184 #[error("Could not parse this package's version as semver -- {0}")]
185 Semver(String),
186 #[error("Failed to serialze grammar JSON -- {0}")]
187 Serialzation(String),
188 #[cfg(feature = "qjs-rt")]
189 #[error("QuickJS error: {0}")]
190 QuickJS(String),
191}
192
193#[cfg(feature = "load")]
194impl From<serde_json::Error> for JSError {
195 fn from(value: serde_json::Error) -> Self {
196 Self::Serialzation(value.to_string())
197 }
198}
199
200#[cfg(feature = "load")]
201impl From<semver::Error> for JSError {
202 fn from(value: semver::Error) -> Self {
203 Self::Semver(value.to_string())
204 }
205}
206
207#[cfg(feature = "qjs-rt")]
208impl From<rquickjs::Error> for JSError {
209 fn from(value: rquickjs::Error) -> Self {
210 Self::QuickJS(value.to_string())
211 }
212}
213
214bitflags! {
215 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
216 pub struct OptLevel: u32 {
217 const MergeStates = 1 << 0;
218 }
219}
220
221impl Default for OptLevel {
222 fn default() -> Self {
223 Self::MergeStates
224 }
225}
226
227#[cfg(feature = "load")]
228#[allow(clippy::too_many_arguments)]
229pub fn generate_parser_in_directory<T, U, V>(
230 repo_path: T,
231 out_path: Option<U>,
232 grammar_path: Option<V>,
233 mut abi_version: usize,
234 report_symbol_name: Option<&str>,
235 js_runtime: Option<&str>,
236 generate_parser: bool,
237 optimizations: OptLevel,
238) -> GenerateResult<()>
239where
240 T: Into<PathBuf>,
241 U: Into<PathBuf>,
242 V: Into<PathBuf>,
243{
244 let mut repo_path: PathBuf = repo_path.into();
245
246 let grammar_path = if let Some(path) = grammar_path {
248 let path_buf: PathBuf = path.into();
249 if !path_buf
250 .try_exists()
251 .map_err(|e| GenerateError::GrammarPath(e.to_string()))?
252 {
253 fs::create_dir_all(&path_buf)
254 .map_err(|e| GenerateError::IO(IoError::new(&e, Some(path_buf.as_path()))))?;
255 repo_path = path_buf;
256 repo_path.join("grammar.js")
257 } else {
258 path_buf
259 }
260 } else {
261 repo_path.join("grammar.js")
262 };
263
264 let grammar_json = load_grammar_file(&grammar_path, js_runtime)?;
266
267 let src_path = out_path.map_or_else(|| repo_path.join("src"), |p| p.into());
268 let header_path = src_path.join("tree_sitter");
269
270 fs::create_dir_all(&src_path)
272 .map_err(|e| GenerateError::IO(IoError::new(&e, Some(src_path.as_path()))))?;
273
274 if grammar_path.file_name().unwrap() != "grammar.json" {
275 fs::write(src_path.join("grammar.json"), &grammar_json)
276 .map_err(|e| GenerateError::IO(IoError::new(&e, Some(src_path.as_path()))))?;
277 }
278
279 let input_grammar = parse_grammar(&grammar_json)?;
281
282 if !generate_parser {
283 let node_types_json = generate_node_types_from_grammar(&input_grammar)?.node_types_json;
284 write_file(&src_path.join("node-types.json"), node_types_json)?;
285 return Ok(());
286 }
287
288 let semantic_version = read_grammar_version(&repo_path)?;
289
290 if semantic_version.is_none() && abi_version > ABI_VERSION_MIN {
291 warn!(
292 concat!(
293 "No `tree-sitter.json` file found in your grammar, ",
294 "this file is required to generate with ABI {}. ",
295 "Using ABI version {} instead.\n",
296 "This file can be set up with `tree-sitter init`. ",
297 "For more information, see https://tree-sitter.github.io/tree-sitter/cli/init."
298 ),
299 abi_version, ABI_VERSION_MIN
300 );
301 abi_version = ABI_VERSION_MIN;
302 }
303
304 let GeneratedParser {
306 c_code,
307 node_types_json,
308 } = generate_parser_for_grammar_with_opts(
309 &input_grammar,
310 abi_version,
311 semantic_version.map(|v| (v.major as u8, v.minor as u8, v.patch as u8)),
312 report_symbol_name,
313 optimizations,
314 )?;
315
316 write_file(&src_path.join("parser.c"), c_code)?;
317 write_file(&src_path.join("node-types.json"), node_types_json)?;
318 fs::create_dir_all(&header_path)
319 .map_err(|e| GenerateError::IO(IoError::new(&e, Some(header_path.as_path()))))?;
320 write_file(&header_path.join("alloc.h"), ALLOC_HEADER)?;
321 write_file(&header_path.join("array.h"), ARRAY_HEADER)?;
322 write_file(&header_path.join("parser.h"), PARSER_HEADER)?;
323
324 Ok(())
325}
326
327pub fn generate_parser_for_grammar(
328 grammar_json: &str,
329 semantic_version: Option<(u8, u8, u8)>,
330) -> GenerateResult<(String, String)> {
331 let grammar_json = JSON_COMMENT_REGEX.replace_all(grammar_json, "\n");
332 let input_grammar = parse_grammar(&grammar_json)?;
333 let parser = generate_parser_for_grammar_with_opts(
334 &input_grammar,
335 LANGUAGE_VERSION,
336 semantic_version,
337 None,
338 OptLevel::empty(),
339 )?;
340 Ok((input_grammar.name, parser.c_code))
341}
342
343fn generate_node_types_from_grammar(input_grammar: &InputGrammar) -> GenerateResult<JSONOutput> {
344 let (syntax_grammar, lexical_grammar, inlines, simple_aliases) =
345 prepare_grammar(input_grammar)?;
346 let variable_info =
347 node_types::get_variable_info(&syntax_grammar, &lexical_grammar, &simple_aliases)?;
348
349 #[cfg(feature = "load")]
350 let node_types_json = node_types::generate_node_types_json(
351 &syntax_grammar,
352 &lexical_grammar,
353 &simple_aliases,
354 &variable_info,
355 )?;
356 Ok(JSONOutput {
357 #[cfg(feature = "load")]
358 node_types_json: serde_json::to_string_pretty(&node_types_json).unwrap(),
359 syntax_grammar,
360 lexical_grammar,
361 inlines,
362 simple_aliases,
363 variable_info,
364 })
365}
366
367fn generate_parser_for_grammar_with_opts(
368 input_grammar: &InputGrammar,
369 abi_version: usize,
370 semantic_version: Option<(u8, u8, u8)>,
371 report_symbol_name: Option<&str>,
372 optimizations: OptLevel,
373) -> GenerateResult<GeneratedParser> {
374 let JSONOutput {
375 syntax_grammar,
376 lexical_grammar,
377 inlines,
378 simple_aliases,
379 variable_info,
380 #[cfg(feature = "load")]
381 node_types_json,
382 } = generate_node_types_from_grammar(input_grammar)?;
383 let supertype_symbol_map =
384 node_types::get_supertype_symbol_map(&syntax_grammar, &simple_aliases, &variable_info);
385 let tables = build_tables(
386 &syntax_grammar,
387 &lexical_grammar,
388 &simple_aliases,
389 &variable_info,
390 &inlines,
391 report_symbol_name,
392 optimizations,
393 )?;
394 let c_code = render_c_code(
395 &input_grammar.name,
396 tables,
397 syntax_grammar,
398 lexical_grammar,
399 simple_aliases,
400 abi_version,
401 semantic_version,
402 supertype_symbol_map,
403 )?;
404 Ok(GeneratedParser {
405 c_code,
406 #[cfg(feature = "load")]
407 node_types_json,
408 })
409}
410
411#[cfg(feature = "load")]
417fn read_grammar_version(repo_path: &Path) -> Result<Option<Version>, ParseVersionError> {
418 #[derive(Deserialize)]
419 struct TreeSitterJson {
420 metadata: Metadata,
421 }
422
423 #[derive(Deserialize)]
424 struct Metadata {
425 version: String,
426 }
427
428 let filename = "tree-sitter.json";
429 let mut path = repo_path.join(filename);
430
431 loop {
432 let json = path
433 .exists()
434 .then(|| {
435 let contents = fs::read_to_string(path.as_path())
436 .map_err(|e| ParseVersionError::IO(IoError::new(&e, Some(path.as_path()))))?;
437 serde_json::from_str::<TreeSitterJson>(&contents).map_err(|e| {
438 ParseVersionError::JSON(format!("Failed to parse `{}` -- {e}", path.display()))
439 })
440 })
441 .transpose()?;
442 if let Some(json) = json {
443 return Version::parse(&json.metadata.version)
444 .map_err(|e| {
445 ParseVersionError::Version(format!(
446 "Failed to parse `{}` version as semver -- {e}",
447 path.display()
448 ))
449 })
450 .map(Some);
451 }
452 path.pop(); if !path.pop() {
454 return Ok(None);
455 }
456 path.push(filename);
457 }
458}
459
460#[cfg(feature = "load")]
461pub fn load_grammar_file(
462 grammar_path: &Path,
463 js_runtime: Option<&str>,
464) -> LoadGrammarFileResult<String> {
465 if grammar_path.is_dir() {
466 Err(LoadGrammarError::InvalidPath)?;
467 }
468 match grammar_path.extension().and_then(|e| e.to_str()) {
469 Some("js") => Ok(load_js_grammar_file(grammar_path, js_runtime)?),
470 Some("json") => Ok(fs::read_to_string(grammar_path)
471 .map_err(|e| LoadGrammarError::IO(IoError::new(&e, Some(grammar_path))))?),
472 _ => Err(LoadGrammarError::FileExtension(grammar_path.to_owned()))?,
473 }
474}
475
476#[cfg(feature = "load")]
477fn load_js_grammar_file(grammar_path: &Path, js_runtime: Option<&str>) -> JSResult<String> {
478 let grammar_path = dunce::canonicalize(grammar_path)
479 .map_err(|e| JSError::IO(IoError::new(&e, Some(grammar_path))))?;
480
481 #[cfg(feature = "qjs-rt")]
482 if js_runtime == Some("native") {
483 return quickjs::execute_native_runtime(&grammar_path);
484 }
485
486 #[cfg(windows)]
489 let grammar_path = PathBuf::from(format!("file:///{}", grammar_path.display()));
490
491 let js_runtime = js_runtime.unwrap_or("node");
492
493 let mut js_command = Command::new(js_runtime);
494 match js_runtime {
495 "node" => {
496 js_command.args(["--input-type=module", "-"]);
497 }
498 "bun" => {
499 js_command.arg("-");
500 }
501 "deno" => {
502 js_command.args(["run", "--allow-all", "-"]);
503 }
504 _ => {}
505 }
506
507 let mut js_process = js_command
508 .env("TREE_SITTER_GRAMMAR_PATH", grammar_path)
509 .stdin(Stdio::piped())
510 .stdout(Stdio::piped())
511 .spawn()
512 .map_err(|e| JSError::JSRuntimeSpawn {
513 runtime: js_runtime.to_string(),
514 error: e.to_string(),
515 })?;
516
517 let mut js_stdin = js_process
518 .stdin
519 .take()
520 .ok_or_else(|| JSError::JSRuntimeStdin {
521 runtime: js_runtime.to_string(),
522 })?;
523
524 let cli_version = Version::parse(env!("CARGO_PKG_VERSION"))?;
525 write!(
526 js_stdin,
527 "globalThis.TREE_SITTER_CLI_VERSION_MAJOR = {};
528 globalThis.TREE_SITTER_CLI_VERSION_MINOR = {};
529 globalThis.TREE_SITTER_CLI_VERSION_PATCH = {};",
530 cli_version.major, cli_version.minor, cli_version.patch,
531 )
532 .map_err(|e| JSError::JSRuntimeWrite {
533 runtime: js_runtime.to_string(),
534 item: "tree-sitter version".to_string(),
535 error: e.to_string(),
536 })?;
537 js_stdin
538 .write(include_bytes!("./dsl.js"))
539 .map_err(|e| JSError::JSRuntimeWrite {
540 runtime: js_runtime.to_string(),
541 item: "grammar dsl".to_string(),
542 error: e.to_string(),
543 })?;
544 drop(js_stdin);
545
546 let output = js_process
547 .wait_with_output()
548 .map_err(|e| JSError::JSRuntimeRead {
549 runtime: js_runtime.to_string(),
550 error: e.to_string(),
551 })?;
552 match output.status.code() {
553 Some(0) => {
554 let stdout = String::from_utf8(output.stdout).map_err(|e| JSError::JSRuntimeUtf8 {
555 runtime: js_runtime.to_string(),
556 error: e.to_string(),
557 })?;
558
559 let mut grammar_json = &stdout[..];
560
561 if let Some(pos) = stdout.rfind('\n') {
562 let node_output = &stdout[..pos];
564 grammar_json = &stdout[pos + 1..];
565
566 let mut stdout = std::io::stdout().lock();
567 stdout
568 .write_all(node_output.as_bytes())
569 .map_err(|e| JSError::IO(IoError::new(&e, None)))?;
570 stdout
571 .write_all(b"\n")
572 .map_err(|e| JSError::IO(IoError::new(&e, None)))?;
573 stdout
574 .flush()
575 .map_err(|e| JSError::IO(IoError::new(&e, None)))?;
576 }
577
578 Ok(serde_json::to_string_pretty(&serde_json::from_str::<
579 serde_json::Value,
580 >(grammar_json)?)?)
581 }
582 Some(code) => Err(JSError::JSRuntimeExit {
583 runtime: js_runtime.to_string(),
584 code,
585 }),
586 None => Err(JSError::JSRuntimeExit {
587 runtime: js_runtime.to_string(),
588 code: -1,
589 }),
590 }
591}
592
593#[cfg(feature = "load")]
594pub fn write_file(path: &Path, body: impl AsRef<[u8]>) -> GenerateResult<()> {
595 fs::write(path, body).map_err(|e| GenerateError::IO(IoError::new(&e, Some(path))))
596}
597
598#[cfg(test)]
599mod tests {
600 use super::{LANGUAGE_VERSION, PARSER_HEADER};
601 #[test]
602 fn test_language_versions_are_in_sync() {
603 let api_h = include_str!("../../../lib/include/tree_sitter/api.h");
604 let api_language_version = api_h
605 .lines()
606 .find_map(|line| {
607 line.trim()
608 .strip_prefix("#define TREE_SITTER_LANGUAGE_VERSION ")
609 .and_then(|v| v.parse::<usize>().ok())
610 })
611 .expect("Failed to find TREE_SITTER_LANGUAGE_VERSION definition in api.h");
612 assert_eq!(LANGUAGE_VERSION, api_language_version);
613 }
614
615 #[test]
616 fn test_parser_header_in_sync() {
617 let parser_h = include_str!("../../../lib/src/parser.h");
618 assert!(
619 parser_h == PARSER_HEADER,
620 "parser.h.inc is out of sync with lib/src/parser.h. Run: cp lib/src/parser.h crates/generate/src/parser.h.inc"
621 );
622 }
623}