1#![cfg_attr(not(any(test, doctest)), doc = include_str!("../README.md"))]
2#![cfg_attr(docsrs, feature(doc_cfg))]
3
4#[cfg(any(feature = "tree-sitter-highlight", feature = "tree-sitter-tags"))]
5use std::ops::Range;
6#[cfg(feature = "tree-sitter-highlight")]
7use std::sync::Mutex;
8use std::{
9 collections::HashMap,
10 env, fs,
11 io::{BufRead, BufReader},
12 marker::PhantomData,
13 mem,
14 path::{Path, PathBuf},
15 process::Command,
16 sync::LazyLock,
17 time::{SystemTime, SystemTimeError},
18};
19
20use etcetera::BaseStrategy as _;
21use fs4::fs_std::FileExt;
22use libloading::{Library, Symbol};
23use log::{error, info, warn};
24use once_cell::unsync::OnceCell;
25use regex::{Regex, RegexBuilder};
26use semver::Version;
27use serde::{Deserialize, Deserializer, Serialize};
28use thiserror::Error;
29use tree_sitter::Language;
30#[cfg(any(feature = "tree-sitter-highlight", feature = "tree-sitter-tags"))]
31use tree_sitter::QueryError;
32#[cfg(feature = "tree-sitter-highlight")]
33use tree_sitter::QueryErrorKind;
34#[cfg(feature = "wasm")]
35use tree_sitter::WasmError;
36#[cfg(feature = "tree-sitter-highlight")]
37use tree_sitter_highlight::HighlightConfiguration;
38#[cfg(feature = "tree-sitter-tags")]
39use tree_sitter_tags::{Error as TagsError, TagsConfiguration};
40
41static GRAMMAR_NAME_REGEX: LazyLock<Regex> =
42 LazyLock::new(|| Regex::new(r#""name":\s*"(.*?)""#).unwrap());
43
44const WASI_SDK_VERSION: &str = include_str!("../wasi-sdk-version").trim_ascii();
45
46pub type LoaderResult<T> = Result<T, LoaderError>;
47
48#[derive(Debug, Error)]
49pub enum LoaderError {
50 #[error(transparent)]
51 Compiler(CompilerError),
52 #[error("Parser compilation failed.\nStdout: {0}\nStderr: {1}")]
53 Compilation(String, String),
54 #[error("Failed to execute curl for {0} -- {1}")]
55 Curl(String, std::io::Error),
56 #[error("Failed to load language in current directory:\n{0}")]
57 CurrentDirectoryLoad(Box<Self>),
58 #[error("External file path {0} is outside of parser directory {1}")]
59 ExternalFile(String, String),
60 #[error("Failed to extract archive {0} to {1}")]
61 Extraction(String, String),
62 #[error("Failed to load language for file name {0}:\n{1}")]
63 FileNameLoad(String, Box<Self>),
64 #[error("Failed to parse the language name from grammar.json at {0}")]
65 GrammarJSON(String),
66 #[error(transparent)]
67 HomeDir(#[from] etcetera::HomeDirError),
68 #[error(transparent)]
69 IO(IoError),
70 #[error(transparent)]
71 Library(LibraryError),
72 #[error("Failed to compare binary and source timestamps:\n{0}")]
73 ModifiedTime(Box<Self>),
74 #[error("No language found")]
75 NoLanguage,
76 #[error(transparent)]
77 Query(LoaderQueryError),
78 #[error(transparent)]
79 ScannerSymbols(ScannerSymbolError),
80 #[error("Failed to load language for scope '{0}':\n{1}")]
81 ScopeLoad(String, Box<Self>),
82 #[error(transparent)]
83 Serialization(#[from] serde_json::Error),
84 #[error(transparent)]
85 Symbol(SymbolError),
86 #[error(transparent)]
87 Tags(#[from] TagsError),
88 #[error("Failed to execute tar for {0} -- {1}")]
89 Tar(String, std::io::Error),
90 #[error(transparent)]
91 Time(#[from] SystemTimeError),
92 #[error("Unknown scope '{0}'")]
93 UnknownScope(String),
94 #[error("Failed to download wasi-sdk from {0}")]
95 WasiSDKDownload(String),
96 #[error(transparent)]
97 WasiSDKClang(#[from] WasiSDKClangError),
98 #[error("Unsupported platform for wasi-sdk")]
99 WasiSDKPlatform,
100 #[cfg(feature = "wasm")]
101 #[error(transparent)]
102 Wasm(#[from] WasmError),
103 #[error("Failed to run wasi-sdk clang -- {0}")]
104 WasmCompiler(std::io::Error),
105 #[error("wasi-sdk clang command failed: {0}")]
106 WasmCompilation(String),
107}
108
109#[derive(Debug, Error)]
110pub struct CompilerError {
111 pub error: std::io::Error,
112 pub command: Box<Command>,
113}
114
115impl std::fmt::Display for CompilerError {
116 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
117 write!(
118 f,
119 "Failed to execute the C compiler with the following command:\n{:?}\nError: {}",
120 *self.command, self.error
121 )?;
122 Ok(())
123 }
124}
125
126#[derive(Debug, Error)]
127pub struct IoError {
128 pub error: std::io::Error,
129 pub path: Option<String>,
130}
131
132impl IoError {
133 fn new(error: std::io::Error, path: Option<&Path>) -> Self {
134 Self {
135 error,
136 path: path.map(|p| p.to_string_lossy().to_string()),
137 }
138 }
139}
140
141impl std::fmt::Display for IoError {
142 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
143 write!(f, "{}", self.error)?;
144 if let Some(ref path) = self.path {
145 write!(f, " ({path})")?;
146 }
147 Ok(())
148 }
149}
150
151#[derive(Debug, Error)]
152pub struct LibraryError {
153 pub error: libloading::Error,
154 pub path: String,
155}
156
157impl std::fmt::Display for LibraryError {
158 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
159 write!(
160 f,
161 "Error opening dynamic library {} -- {}",
162 self.path, self.error
163 )?;
164 Ok(())
165 }
166}
167
168#[derive(Debug, Error)]
169pub struct LoaderQueryError {
170 pub error: QueryError,
171 pub file: Option<String>,
172}
173
174impl std::fmt::Display for LoaderQueryError {
175 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
176 if let Some(ref path) = self.file {
177 writeln!(f, "Error in query file {path}:")?;
178 }
179 write!(f, "{}", self.error)?;
180 Ok(())
181 }
182}
183
184#[derive(Debug, Error)]
185pub struct SymbolError {
186 pub error: libloading::Error,
187 pub symbol_name: String,
188 pub path: String,
189}
190
191impl std::fmt::Display for SymbolError {
192 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
193 write!(
194 f,
195 "Failed to load symbol {} from {} -- {}",
196 self.symbol_name, self.path, self.error
197 )?;
198 Ok(())
199 }
200}
201
202#[derive(Debug, Error)]
203pub struct ScannerSymbolError {
204 pub missing: Vec<String>,
205}
206
207impl std::fmt::Display for ScannerSymbolError {
208 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
209 writeln!(
210 f,
211 "Missing required functions in the external scanner, parsing won't work without these!\n"
212 )?;
213 for symbol in &self.missing {
214 writeln!(f, " `{symbol}`")?;
215 }
216 writeln!(
217 f,
218 "You can read more about this at https://tree-sitter.github.io/tree-sitter/creating-parsers/4-external-scanners\n"
219 )?;
220 Ok(())
221 }
222}
223
224#[derive(Debug, Error)]
225pub struct WasiSDKClangError {
226 pub wasi_sdk_dir: String,
227 pub possible_executables: Vec<&'static str>,
228 pub download: bool,
229}
230
231impl std::fmt::Display for WasiSDKClangError {
232 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
233 if self.download {
234 write!(
235 f,
236 "Failed to find clang executable in downloaded wasi-sdk at '{}'.",
237 self.wasi_sdk_dir
238 )?;
239 } else {
240 write!(f, "TREE_SITTER_WASI_SDK_PATH is set to '{}', but no clang executable found in 'bin/' directory.", self.wasi_sdk_dir)?;
241 }
242
243 let possible_exes = self.possible_executables.join(", ");
244 write!(f, " Looked for: {possible_exes}.")?;
245
246 Ok(())
247 }
248}
249
250pub const DEFAULT_HIGHLIGHTS_QUERY_FILE_NAME: &str = "highlights.scm";
251
252pub const DEFAULT_INJECTIONS_QUERY_FILE_NAME: &str = "injections.scm";
253
254pub const DEFAULT_LOCALS_QUERY_FILE_NAME: &str = "locals.scm";
255
256pub const DEFAULT_TAGS_QUERY_FILE_NAME: &str = "tags.scm";
257
258#[derive(Default, Deserialize, Serialize)]
259pub struct Config {
260 #[serde(default)]
261 #[serde(
262 rename = "parser-directories",
263 deserialize_with = "deserialize_parser_directories"
264 )]
265 pub parser_directories: Vec<PathBuf>,
266}
267
268#[derive(Serialize, Deserialize, Clone, Default)]
269#[serde(untagged)]
270pub enum PathsJSON {
271 #[default]
272 Empty,
273 Single(PathBuf),
274 Multiple(Vec<PathBuf>),
275}
276
277impl PathsJSON {
278 fn into_vec(self) -> Option<Vec<PathBuf>> {
279 match self {
280 Self::Empty => None,
281 Self::Single(s) => Some(vec![s]),
282 Self::Multiple(s) => Some(s),
283 }
284 }
285
286 const fn is_empty(&self) -> bool {
287 matches!(self, Self::Empty)
288 }
289
290 #[must_use]
292 pub fn to_variable_value<'a>(&'a self, default: &'a PathBuf) -> &'a str {
293 match self {
294 Self::Empty => Some(default),
295 Self::Single(path_buf) => Some(path_buf),
296 Self::Multiple(paths) => paths.first(),
297 }
298 .map_or("", |path| path.as_os_str().to_str().unwrap_or(""))
299 }
300}
301
302#[derive(Serialize, Deserialize, Clone)]
303#[serde(untagged)]
304pub enum PackageJSONAuthor {
305 String(String),
306 Object {
307 name: String,
308 email: Option<String>,
309 url: Option<String>,
310 },
311}
312
313#[derive(Serialize, Deserialize, Clone)]
314#[serde(untagged)]
315pub enum PackageJSONRepository {
316 String(String),
317 Object { url: String },
318}
319
320#[derive(Serialize, Deserialize)]
321pub struct PackageJSON {
322 pub name: String,
323 pub version: Version,
324 pub description: Option<String>,
325 pub author: Option<PackageJSONAuthor>,
326 pub maintainers: Option<Vec<PackageJSONAuthor>>,
327 pub license: Option<String>,
328 pub repository: Option<PackageJSONRepository>,
329 #[serde(default)]
330 #[serde(rename = "tree-sitter", skip_serializing_if = "Option::is_none")]
331 pub tree_sitter: Option<Vec<LanguageConfigurationJSON>>,
332}
333
334fn default_path() -> PathBuf {
335 PathBuf::from(".")
336}
337
338#[derive(Serialize, Deserialize, Clone)]
339#[serde(rename_all = "kebab-case")]
340pub struct LanguageConfigurationJSON {
341 #[serde(default = "default_path")]
342 pub path: PathBuf,
343 pub scope: Option<String>,
344 pub file_types: Option<Vec<String>>,
345 pub content_regex: Option<String>,
346 pub first_line_regex: Option<String>,
347 pub injection_regex: Option<String>,
348 #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
349 pub highlights: PathsJSON,
350 #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
351 pub injections: PathsJSON,
352 #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
353 pub locals: PathsJSON,
354 #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
355 pub tags: PathsJSON,
356 #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
357 pub external_files: PathsJSON,
358}
359
360#[derive(Serialize, Deserialize)]
361#[serde(rename_all = "kebab-case")]
362pub struct TreeSitterJSON {
363 #[serde(rename = "$schema")]
364 pub schema: Option<String>,
365 pub grammars: Vec<Grammar>,
366 pub metadata: Metadata,
367 #[serde(default)]
368 pub bindings: Bindings,
369}
370
371impl TreeSitterJSON {
372 pub fn from_file(path: &Path) -> LoaderResult<Self> {
373 let path = path.join("tree-sitter.json");
374 Ok(serde_json::from_str(&fs::read_to_string(&path).map_err(
375 |e| LoaderError::IO(IoError::new(e, Some(path.as_path()))),
376 )?)?)
377 }
378
379 #[must_use]
380 pub fn has_multiple_language_configs(&self) -> bool {
381 self.grammars.len() > 1
382 }
383}
384
385#[derive(Serialize, Deserialize)]
386#[serde(rename_all = "kebab-case")]
387pub struct Grammar {
388 pub name: String,
389 #[serde(skip_serializing_if = "Option::is_none")]
390 pub camelcase: Option<String>,
391 #[serde(skip_serializing_if = "Option::is_none")]
392 pub title: Option<String>,
393 pub scope: String,
394 #[serde(skip_serializing_if = "Option::is_none")]
395 pub path: Option<PathBuf>,
396 #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
397 pub external_files: PathsJSON,
398 pub file_types: Option<Vec<String>>,
399 #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
400 pub highlights: PathsJSON,
401 #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
402 pub injections: PathsJSON,
403 #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
404 pub locals: PathsJSON,
405 #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
406 pub tags: PathsJSON,
407 #[serde(skip_serializing_if = "Option::is_none")]
408 pub injection_regex: Option<String>,
409 #[serde(skip_serializing_if = "Option::is_none")]
410 pub first_line_regex: Option<String>,
411 #[serde(skip_serializing_if = "Option::is_none")]
412 pub content_regex: Option<String>,
413 #[serde(skip_serializing_if = "Option::is_none")]
414 pub class_name: Option<String>,
415}
416
417#[derive(Serialize, Deserialize)]
418pub struct Metadata {
419 pub version: Version,
420 #[serde(skip_serializing_if = "Option::is_none")]
421 pub license: Option<String>,
422 #[serde(skip_serializing_if = "Option::is_none")]
423 pub description: Option<String>,
424 #[serde(skip_serializing_if = "Option::is_none")]
425 pub authors: Option<Vec<Author>>,
426 #[serde(skip_serializing_if = "Option::is_none")]
427 pub links: Option<Links>,
428 #[serde(skip)]
429 pub namespace: Option<String>,
430}
431
432#[derive(Serialize, Deserialize)]
433pub struct Author {
434 pub name: String,
435 #[serde(skip_serializing_if = "Option::is_none")]
436 pub email: Option<String>,
437 #[serde(skip_serializing_if = "Option::is_none")]
438 pub url: Option<String>,
439}
440
441#[derive(Serialize, Deserialize)]
442pub struct Links {
443 pub repository: String,
444 #[serde(skip_serializing_if = "Option::is_none")]
445 pub funding: Option<String>,
446}
447
448#[derive(Serialize, Deserialize, Clone)]
449#[serde(default)]
450pub struct Bindings {
451 pub c: bool,
452 pub go: bool,
453 pub java: bool,
454 #[serde(skip)]
455 pub kotlin: bool,
456 pub node: bool,
457 pub python: bool,
458 pub rust: bool,
459 pub swift: bool,
460 pub zig: bool,
461}
462
463impl Bindings {
464 #[must_use]
466 pub const fn languages(&self) -> [(&'static str, bool); 8] {
467 [
468 ("c", true),
469 ("go", true),
470 ("java", false),
471 ("node", true),
474 ("python", true),
475 ("rust", true),
476 ("swift", true),
477 ("zig", false),
478 ]
479 }
480
481 pub fn with_enabled_languages<'a, I>(languages: I) -> Result<Self, &'a str>
483 where
484 I: Iterator<Item = &'a str>,
485 {
486 let mut out = Self {
487 c: false,
488 go: false,
489 java: false,
490 kotlin: false,
491 node: false,
492 python: false,
493 rust: false,
494 swift: false,
495 zig: false,
496 };
497
498 for v in languages {
499 match v {
500 "c" => out.c = true,
501 "go" => out.go = true,
502 "java" => out.java = true,
503 "node" => out.node = true,
506 "python" => out.python = true,
507 "rust" => out.rust = true,
508 "swift" => out.swift = true,
509 "zig" => out.zig = true,
510 unsupported => return Err(unsupported),
511 }
512 }
513
514 Ok(out)
515 }
516}
517
518impl Default for Bindings {
519 fn default() -> Self {
520 Self {
521 c: true,
522 go: true,
523 java: false,
524 kotlin: false,
525 node: true,
526 python: true,
527 rust: true,
528 swift: true,
529 zig: false,
530 }
531 }
532}
533
534fn deserialize_parser_directories<'de, D>(deserializer: D) -> Result<Vec<PathBuf>, D::Error>
538where
539 D: Deserializer<'de>,
540{
541 let paths = Vec::<PathBuf>::deserialize(deserializer)?;
542 let Ok(home) = etcetera::home_dir() else {
543 return Ok(paths);
544 };
545 let standardized = paths
546 .into_iter()
547 .map(|path| standardize_path(path, &home))
548 .collect();
549 Ok(standardized)
550}
551
552fn standardize_path(path: PathBuf, home: &Path) -> PathBuf {
553 if let Ok(p) = path.strip_prefix("~") {
554 return home.join(p);
555 }
556 if let Ok(p) = path.strip_prefix("$HOME") {
557 return home.join(p);
558 }
559 path
560}
561
562impl Config {
563 #[must_use]
564 pub fn initial() -> Self {
565 let home_dir = etcetera::home_dir().expect("Cannot determine home directory");
566 Self {
567 parser_directories: vec![
568 home_dir.join("github"),
569 home_dir.join("src"),
570 home_dir.join("source"),
571 home_dir.join("projects"),
572 home_dir.join("dev"),
573 home_dir.join("git"),
574 ],
575 }
576 }
577}
578
579const BUILD_TARGET: &str = env!("BUILD_TARGET");
580
581pub struct LanguageConfiguration<'a> {
582 pub scope: Option<String>,
583 pub content_regex: Option<Regex>,
584 pub first_line_regex: Option<Regex>,
585 pub injection_regex: Option<Regex>,
586 pub file_types: Vec<String>,
587 pub root_path: PathBuf,
588 pub highlights_filenames: Option<Vec<PathBuf>>,
589 pub injections_filenames: Option<Vec<PathBuf>>,
590 pub locals_filenames: Option<Vec<PathBuf>>,
591 pub tags_filenames: Option<Vec<PathBuf>>,
592 pub language_name: String,
593 language_id: usize,
594 #[cfg(feature = "tree-sitter-highlight")]
595 highlight_config: OnceCell<Option<HighlightConfiguration>>,
596 #[cfg(feature = "tree-sitter-tags")]
597 tags_config: OnceCell<Option<TagsConfiguration>>,
598 #[cfg(feature = "tree-sitter-highlight")]
599 highlight_names: &'a Mutex<Vec<String>>,
600 #[cfg(feature = "tree-sitter-highlight")]
601 use_all_highlight_names: bool,
602 _phantom: PhantomData<&'a ()>,
603}
604
605pub struct Loader {
606 pub parser_lib_path: PathBuf,
607 languages_by_id: Vec<(PathBuf, OnceCell<Language>, Option<Vec<PathBuf>>)>,
608 language_configurations: Vec<LanguageConfiguration<'static>>,
609 language_configuration_ids_by_file_type: HashMap<String, Vec<usize>>,
610 language_configuration_in_current_path: Option<usize>,
611 language_configuration_ids_by_first_line_regex: HashMap<String, Vec<usize>>,
612 #[cfg(feature = "tree-sitter-highlight")]
613 highlight_names: Box<Mutex<Vec<String>>>,
614 #[cfg(feature = "tree-sitter-highlight")]
615 use_all_highlight_names: bool,
616 debug_build: bool,
617 sanitize_build: bool,
618 force_rebuild: bool,
619
620 #[cfg(feature = "wasm")]
621 wasm_store: Mutex<Option<tree_sitter::WasmStore>>,
622}
623
624pub struct CompileConfig<'a> {
625 pub src_path: &'a Path,
626 pub header_paths: Vec<&'a Path>,
627 pub parser_path: PathBuf,
628 pub scanner_path: Option<PathBuf>,
629 pub external_files: Option<&'a [PathBuf]>,
630 pub output_path: Option<PathBuf>,
631 pub flags: &'a [&'a str],
632 pub sanitize: bool,
633 pub name: String,
634}
635
636impl<'a> CompileConfig<'a> {
637 #[must_use]
638 pub fn new(
639 src_path: &'a Path,
640 externals: Option<&'a [PathBuf]>,
641 output_path: Option<PathBuf>,
642 ) -> Self {
643 Self {
644 src_path,
645 header_paths: vec![src_path],
646 parser_path: src_path.join("parser.c"),
647 scanner_path: None,
648 external_files: externals,
649 output_path,
650 flags: &[],
651 sanitize: false,
652 name: String::new(),
653 }
654 }
655}
656
657unsafe impl Sync for Loader {}
658
659impl Loader {
660 pub fn new() -> LoaderResult<Self> {
661 let parser_lib_path = if let Ok(path) = env::var("TREE_SITTER_LIBDIR") {
662 PathBuf::from(path)
663 } else {
664 if cfg!(target_os = "macos") {
665 let legacy_apple_path = etcetera::base_strategy::Apple::new()?
666 .cache_dir() .join("tree-sitter");
668 if legacy_apple_path.exists() && legacy_apple_path.is_dir() {
669 std::fs::remove_dir_all(&legacy_apple_path).map_err(|e| {
670 LoaderError::IO(IoError::new(e, Some(legacy_apple_path.as_path())))
671 })?;
672 }
673 }
674
675 etcetera::choose_base_strategy()?
676 .cache_dir()
677 .join("tree-sitter")
678 .join("lib")
679 };
680 Ok(Self::with_parser_lib_path(parser_lib_path))
681 }
682
683 #[must_use]
684 pub fn with_parser_lib_path(parser_lib_path: PathBuf) -> Self {
685 Self {
686 parser_lib_path,
687 languages_by_id: Vec::new(),
688 language_configurations: Vec::new(),
689 language_configuration_ids_by_file_type: HashMap::new(),
690 language_configuration_in_current_path: None,
691 language_configuration_ids_by_first_line_regex: HashMap::new(),
692 #[cfg(feature = "tree-sitter-highlight")]
693 highlight_names: Box::new(Mutex::new(Vec::new())),
694 #[cfg(feature = "tree-sitter-highlight")]
695 use_all_highlight_names: true,
696 debug_build: false,
697 sanitize_build: false,
698 force_rebuild: false,
699
700 #[cfg(feature = "wasm")]
701 wasm_store: Mutex::default(),
702 }
703 }
704
705 #[cfg(feature = "tree-sitter-highlight")]
706 #[cfg_attr(docsrs, doc(cfg(feature = "tree-sitter-highlight")))]
707 pub fn configure_highlights(&mut self, names: &[String]) {
708 self.use_all_highlight_names = false;
709 let mut highlights = self.highlight_names.lock().unwrap();
710 highlights.clear();
711 highlights.extend(names.iter().cloned());
712 }
713
714 #[must_use]
715 #[cfg(feature = "tree-sitter-highlight")]
716 #[cfg_attr(docsrs, doc(cfg(feature = "tree-sitter-highlight")))]
717 pub fn highlight_names(&self) -> Vec<String> {
718 self.highlight_names.lock().unwrap().clone()
719 }
720
721 pub fn find_all_languages(&mut self, config: &Config) -> LoaderResult<()> {
722 if config.parser_directories.is_empty() {
723 warn!(concat!(
724 "You have not configured any parser directories!\n",
725 "Please run `tree-sitter init-config` and edit the resulting\n",
726 "configuration file to indicate where we should look for\n",
727 "language grammars.\n"
728 ));
729 }
730 for parser_container_dir in &config.parser_directories {
731 if let Ok(entries) = fs::read_dir(parser_container_dir) {
732 for entry in entries {
733 let entry = entry.map_err(|e| LoaderError::IO(IoError::new(e, None)))?;
734 if let Some(parser_dir_name) = entry.file_name().to_str() {
735 if parser_dir_name.starts_with("tree-sitter-") {
736 self.find_language_configurations_at_path(
737 &parser_container_dir.join(parser_dir_name),
738 false,
739 )
740 .ok();
741 }
742 }
743 }
744 }
745 }
746 Ok(())
747 }
748
749 pub fn languages_at_path(&mut self, path: &Path) -> LoaderResult<Vec<(Language, String)>> {
750 if let Ok(configurations) = self.find_language_configurations_at_path(path, true) {
751 let mut language_ids = configurations
752 .iter()
753 .map(|c| (c.language_id, c.language_name.clone()))
754 .collect::<Vec<_>>();
755 language_ids.sort_unstable();
756 language_ids.dedup();
757 language_ids
758 .into_iter()
759 .map(|(id, name)| Ok((self.language_for_id(id)?, name)))
760 .collect::<LoaderResult<Vec<_>>>()
761 } else {
762 Ok(Vec::new())
763 }
764 }
765
766 #[must_use]
767 pub fn get_all_language_configurations(&self) -> Vec<(&LanguageConfiguration, &Path)> {
768 self.language_configurations
769 .iter()
770 .map(|c| (c, self.languages_by_id[c.language_id].0.as_ref()))
771 .collect()
772 }
773
774 pub fn language_configuration_for_scope(
775 &self,
776 scope: &str,
777 ) -> LoaderResult<Option<(Language, &LanguageConfiguration)>> {
778 for configuration in &self.language_configurations {
779 if configuration.scope.as_ref().is_some_and(|s| s == scope) {
780 let language = self.language_for_id(configuration.language_id)?;
781 return Ok(Some((language, configuration)));
782 }
783 }
784 Ok(None)
785 }
786
787 pub fn language_configuration_for_first_line_regex(
788 &self,
789 path: &Path,
790 ) -> LoaderResult<Option<(Language, &LanguageConfiguration)>> {
791 self.language_configuration_ids_by_first_line_regex
792 .iter()
793 .try_fold(None, |_, (regex, ids)| {
794 if let Some(regex) = Self::regex(Some(regex)) {
795 let file = fs::File::open(path)
796 .map_err(|e| LoaderError::IO(IoError::new(e, Some(path))))?;
797 let reader = BufReader::new(file);
798 let first_line = reader
799 .lines()
800 .next()
801 .transpose()
802 .map_err(|e| LoaderError::IO(IoError::new(e, Some(path))))?;
803 if let Some(first_line) = first_line {
804 if regex.is_match(&first_line) && !ids.is_empty() {
805 let configuration = &self.language_configurations[ids[0]];
806 let language = self.language_for_id(configuration.language_id)?;
807 return Ok(Some((language, configuration)));
808 }
809 }
810 }
811
812 Ok(None)
813 })
814 }
815
816 pub fn language_configuration_for_file_name(
817 &self,
818 path: &Path,
819 ) -> LoaderResult<Option<(Language, &LanguageConfiguration)>> {
820 let configuration_ids = path
823 .file_name()
824 .and_then(|n| n.to_str())
825 .and_then(|file_name| self.language_configuration_ids_by_file_type.get(file_name))
826 .or_else(|| {
827 let mut path = path.to_owned();
828 let mut extensions = Vec::with_capacity(2);
829 while let Some(extension) = path.extension() {
830 extensions.push(extension.to_str()?.to_string());
831 path = PathBuf::from(path.file_stem()?.to_os_string());
832 }
833 extensions.reverse();
834 self.language_configuration_ids_by_file_type
835 .get(&extensions.join("."))
836 });
837
838 if let Some(configuration_ids) = configuration_ids {
839 if !configuration_ids.is_empty() {
840 let configuration = if configuration_ids.len() == 1 {
841 &self.language_configurations[configuration_ids[0]]
842 }
843 else {
846 let file_contents =
847 fs::read(path).map_err(|e| LoaderError::IO(IoError::new(e, Some(path))))?;
848 let file_contents = String::from_utf8_lossy(&file_contents);
849 let mut best_score = -2isize;
850 let mut best_configuration_id = None;
851 for configuration_id in configuration_ids {
852 let config = &self.language_configurations[*configuration_id];
853
854 let score;
857 if let Some(content_regex) = &config.content_regex {
858 if let Some(mat) = content_regex.find(&file_contents) {
859 score = (mat.end() - mat.start()) as isize;
860 }
861 else {
866 score = -1;
867 }
868 } else {
869 score = 0;
870 }
871 if score > best_score {
872 best_configuration_id = Some(*configuration_id);
873 best_score = score;
874 }
875 }
876
877 &self.language_configurations[best_configuration_id.unwrap()]
878 };
879
880 let language = self.language_for_id(configuration.language_id)?;
881 return Ok(Some((language, configuration)));
882 }
883 }
884
885 Ok(None)
886 }
887
888 pub fn language_configuration_for_injection_string(
889 &self,
890 string: &str,
891 ) -> LoaderResult<Option<(Language, &LanguageConfiguration)>> {
892 let mut best_match_length = 0;
893 let mut best_match_position = None;
894 for (i, configuration) in self.language_configurations.iter().enumerate() {
895 if let Some(injection_regex) = &configuration.injection_regex {
896 if let Some(mat) = injection_regex.find(string) {
897 let length = mat.end() - mat.start();
898 if length > best_match_length {
899 best_match_position = Some(i);
900 best_match_length = length;
901 }
902 }
903 }
904 }
905
906 if let Some(i) = best_match_position {
907 let configuration = &self.language_configurations[i];
908 let language = self.language_for_id(configuration.language_id)?;
909 Ok(Some((language, configuration)))
910 } else {
911 Ok(None)
912 }
913 }
914
915 pub fn language_for_configuration(
916 &self,
917 configuration: &LanguageConfiguration,
918 ) -> LoaderResult<Language> {
919 self.language_for_id(configuration.language_id)
920 }
921
922 fn language_for_id(&self, id: usize) -> LoaderResult<Language> {
923 let (path, language, externals) = &self.languages_by_id[id];
924 language
925 .get_or_try_init(|| {
926 let src_path = path.join("src");
927 self.load_language_at_path(CompileConfig::new(
928 &src_path,
929 externals.as_deref(),
930 None,
931 ))
932 })
933 .cloned()
934 }
935
936 pub fn compile_parser_at_path(
937 &self,
938 grammar_path: &Path,
939 output_path: PathBuf,
940 flags: &[&str],
941 ) -> LoaderResult<()> {
942 let src_path = grammar_path.join("src");
943 let mut config = CompileConfig::new(&src_path, None, Some(output_path));
944 config.flags = flags;
945 self.load_language_at_path(config).map(|_| ())
946 }
947
948 pub fn load_language_at_path(&self, mut config: CompileConfig) -> LoaderResult<Language> {
949 let grammar_path = config.src_path.join("grammar.json");
950 config.name = Self::grammar_json_name(&grammar_path)?;
951 self.load_language_at_path_with_name(config)
952 }
953
954 pub fn load_language_at_path_with_name(
955 &self,
956 mut config: CompileConfig,
957 ) -> LoaderResult<Language> {
958 let mut lib_name = config.name.clone();
959 let language_fn_name = format!("tree_sitter_{}", config.name.replace('-', "_"));
960 if self.debug_build {
961 lib_name.push_str(".debug._");
962 }
963
964 if self.sanitize_build {
965 lib_name.push_str(".sanitize._");
966 config.sanitize = true;
967 }
968
969 if config.output_path.is_none() {
970 fs::create_dir_all(&self.parser_lib_path).map_err(|e| {
971 LoaderError::IO(IoError::new(e, Some(self.parser_lib_path.as_path())))
972 })?;
973 }
974
975 let mut recompile = self.force_rebuild || config.output_path.is_some(); let output_path = config.output_path.unwrap_or_else(|| {
978 let mut path = self.parser_lib_path.join(lib_name);
979 path.set_extension(env::consts::DLL_EXTENSION);
980 #[cfg(feature = "wasm")]
981 if self.wasm_store.lock().unwrap().is_some() {
982 path.set_extension("wasm");
983 }
984 path
985 });
986 config.output_path = Some(output_path.clone());
987
988 let parser_path = config.src_path.join("parser.c");
989 config.scanner_path = self.get_scanner_path(config.src_path);
990
991 let mut paths_to_check = vec![parser_path];
992
993 if let Some(scanner_path) = config.scanner_path.as_ref() {
994 paths_to_check.push(scanner_path.clone());
995 }
996
997 paths_to_check.extend(
998 config
999 .external_files
1000 .unwrap_or_default()
1001 .iter()
1002 .map(|p| config.src_path.join(p)),
1003 );
1004
1005 if !recompile {
1006 recompile = needs_recompile(&output_path, &paths_to_check)?;
1007 }
1008
1009 #[cfg(feature = "wasm")]
1010 if let Some(wasm_store) = self.wasm_store.lock().unwrap().as_mut() {
1011 if recompile {
1012 self.compile_parser_to_wasm(
1013 &config.name,
1014 config.src_path,
1015 config
1016 .scanner_path
1017 .as_ref()
1018 .and_then(|p| p.strip_prefix(config.src_path).ok()),
1019 &output_path,
1020 )?;
1021 }
1022
1023 let wasm_bytes = fs::read(&output_path)
1024 .map_err(|e| LoaderError::IO(IoError::new(e, Some(output_path.as_path()))))?;
1025 return Ok(wasm_store.load_language(&config.name, &wasm_bytes)?);
1026 }
1027
1028 let lock_path = if env::var("CROSS_RUNNER").is_ok() {
1029 tempfile::tempdir()
1030 .unwrap()
1031 .path()
1032 .join("tree-sitter")
1033 .join("lock")
1034 .join(format!("{}.lock", config.name))
1035 } else {
1036 etcetera::choose_base_strategy()?
1037 .cache_dir()
1038 .join("tree-sitter")
1039 .join("lock")
1040 .join(format!("{}.lock", config.name))
1041 };
1042
1043 if let Ok(lock_file) = fs::OpenOptions::new().write(true).open(&lock_path) {
1044 recompile = false;
1045 if lock_file.try_lock_exclusive().is_err() {
1046 lock_file
1049 .lock_exclusive()
1050 .map_err(|e| LoaderError::IO(IoError::new(e, Some(lock_path.as_path()))))?;
1051 recompile = false;
1052 } else {
1053 let time = lock_file
1057 .metadata()
1058 .map_err(|e| LoaderError::IO(IoError::new(e, Some(lock_path.as_path()))))?
1059 .modified()
1060 .map_err(|e| LoaderError::IO(IoError::new(e, Some(lock_path.as_path()))))?
1061 .elapsed()?
1062 .as_secs();
1063 if time > 30 {
1064 fs::remove_file(&lock_path)
1065 .map_err(|e| LoaderError::IO(IoError::new(e, Some(lock_path.as_path()))))?;
1066 recompile = true;
1067 }
1068 }
1069 }
1070
1071 if recompile {
1072 let parent_path = lock_path.parent().unwrap();
1073 fs::create_dir_all(parent_path)
1074 .map_err(|e| LoaderError::IO(IoError::new(e, Some(parent_path))))?;
1075 let lock_file = fs::OpenOptions::new()
1076 .create(true)
1077 .truncate(true)
1078 .write(true)
1079 .open(&lock_path)
1080 .map_err(|e| LoaderError::IO(IoError::new(e, Some(lock_path.as_path()))))?;
1081 lock_file
1082 .lock_exclusive()
1083 .map_err(|e| LoaderError::IO(IoError::new(e, Some(lock_path.as_path()))))?;
1084
1085 self.compile_parser_to_dylib(&config, &lock_file, &lock_path)?;
1086
1087 if config.scanner_path.is_some() {
1088 self.check_external_scanner(&config.name, &output_path)?;
1089 }
1090 }
1091
1092 Self::load_language(&output_path, &language_fn_name)
1093 }
1094
1095 pub fn load_language(path: &Path, function_name: &str) -> LoaderResult<Language> {
1096 let library = unsafe { Library::new(path) }.map_err(|e| {
1097 LoaderError::Library(LibraryError {
1098 error: e,
1099 path: path.to_string_lossy().to_string(),
1100 })
1101 })?;
1102 let language = unsafe {
1103 let language_fn = library
1104 .get::<Symbol<unsafe extern "C" fn() -> Language>>(function_name.as_bytes())
1105 .map_err(|e| {
1106 LoaderError::Symbol(SymbolError {
1107 error: e,
1108 symbol_name: function_name.to_string(),
1109 path: path.to_string_lossy().to_string(),
1110 })
1111 })?;
1112 language_fn()
1113 };
1114 mem::forget(library);
1115 Ok(language)
1116 }
1117
1118 fn compile_parser_to_dylib(
1119 &self,
1120 config: &CompileConfig,
1121 lock_file: &fs::File,
1122 lock_path: &Path,
1123 ) -> LoaderResult<()> {
1124 let mut cc_config = cc::Build::new();
1125 cc_config
1126 .cargo_metadata(false)
1127 .cargo_warnings(false)
1128 .target(BUILD_TARGET)
1129 .host(BUILD_TARGET)
1133 .debug(self.debug_build)
1134 .file(&config.parser_path)
1135 .includes(&config.header_paths)
1136 .std("c11");
1137
1138 if let Some(scanner_path) = config.scanner_path.as_ref() {
1139 cc_config.file(scanner_path);
1140 }
1141
1142 if self.debug_build {
1143 cc_config.opt_level(0).extra_warnings(true);
1144 } else {
1145 cc_config.opt_level(2).extra_warnings(false);
1146 }
1147
1148 for flag in config.flags {
1149 cc_config.define(flag, None);
1150 }
1151
1152 let compiler = cc_config.get_compiler();
1153 let mut command = Command::new(compiler.path());
1154 command.args(compiler.args());
1155 for (key, value) in compiler.env() {
1156 command.env(key, value);
1157 }
1158
1159 let output_path = config.output_path.as_ref().unwrap();
1160
1161 let temp_dir = if compiler.is_like_msvc() {
1162 let out = format!("-out:{}", output_path.to_str().unwrap());
1163 command.arg(if self.debug_build { "-LDd" } else { "-LD" });
1164 command.arg("-utf-8");
1165
1166 let temp_dir = output_path.parent().unwrap().join(format!(
1170 "tmp_{}_{:?}",
1171 std::process::id(),
1172 std::thread::current().id()
1173 ));
1174 std::fs::create_dir_all(&temp_dir).unwrap();
1175
1176 command.arg(format!("/Fo{}\\", temp_dir.display()));
1177 command.args(cc_config.get_files());
1178 command.arg("-link").arg(out);
1179 command.arg(format!("/IMPLIB:{}.lib", temp_dir.join("temp").display()));
1180
1181 Some(temp_dir)
1182 } else {
1183 command.arg("-Werror=implicit-function-declaration");
1184 if cfg!(any(target_os = "macos", target_os = "ios")) {
1185 command.arg("-dynamiclib");
1186 command.arg("-UTREE_SITTER_REUSE_ALLOCATOR");
1188 } else {
1189 command.arg("-shared");
1190 }
1191 command.args(cc_config.get_files());
1192 command.arg("-o").arg(output_path);
1193
1194 None
1195 };
1196
1197 let output = command.output().map_err(|e| {
1198 LoaderError::Compiler(CompilerError {
1199 error: e,
1200 command: Box::new(command),
1201 })
1202 })?;
1203
1204 if let Some(temp_dir) = temp_dir {
1205 let _ = fs::remove_dir_all(temp_dir);
1206 }
1207
1208 FileExt::unlock(lock_file)
1209 .map_err(|e| LoaderError::IO(IoError::new(e, Some(lock_path))))?;
1210 fs::remove_file(lock_path)
1211 .map_err(|e| LoaderError::IO(IoError::new(e, Some(lock_path))))?;
1212
1213 if output.status.success() {
1214 Ok(())
1215 } else {
1216 Err(LoaderError::Compilation(
1217 String::from_utf8_lossy(&output.stdout).to_string(),
1218 String::from_utf8_lossy(&output.stderr).to_string(),
1219 ))
1220 }
1221 }
1222
1223 #[cfg(unix)]
1224 fn check_external_scanner(&self, name: &str, library_path: &Path) -> LoaderResult<()> {
1225 let prefix = if cfg!(any(target_os = "macos", target_os = "ios")) {
1226 "_"
1227 } else {
1228 ""
1229 };
1230 let section = if cfg!(all(target_arch = "powerpc64", target_os = "linux")) {
1231 " D "
1232 } else {
1233 " T "
1234 };
1235 let mut must_have = vec![
1236 format!("{prefix}tree_sitter_{name}_external_scanner_create"),
1237 format!("{prefix}tree_sitter_{name}_external_scanner_destroy"),
1238 format!("{prefix}tree_sitter_{name}_external_scanner_serialize"),
1239 format!("{prefix}tree_sitter_{name}_external_scanner_deserialize"),
1240 format!("{prefix}tree_sitter_{name}_external_scanner_scan"),
1241 ];
1242
1243 let nm_cmd = env::var("NM").unwrap_or_else(|_| "nm".to_owned());
1244 let command = Command::new(nm_cmd)
1245 .arg("--defined-only")
1246 .arg(library_path)
1247 .output();
1248 if let Ok(output) = command {
1249 if output.status.success() {
1250 let mut found_non_static = false;
1251 for line in String::from_utf8_lossy(&output.stdout).lines() {
1252 if line.contains(section) {
1253 if let Some(function_name) =
1254 line.split_whitespace().collect::<Vec<_>>().get(2)
1255 {
1256 if !line.contains("tree_sitter_") {
1257 if !found_non_static {
1258 found_non_static = true;
1259 warn!("Found non-static non-tree-sitter functions in the external scanner");
1260 }
1261 warn!(" `{function_name}`");
1262 } else {
1263 must_have.retain(|f| f != function_name);
1264 }
1265 }
1266 }
1267 }
1268 if found_non_static {
1269 warn!(concat!(
1270 "Consider making these functions static, they can cause conflicts ",
1271 "when another tree-sitter project uses the same function name."
1272 ));
1273 }
1274
1275 if !must_have.is_empty() {
1276 return Err(LoaderError::ScannerSymbols(ScannerSymbolError {
1277 missing: must_have,
1278 }));
1279 }
1280 }
1281 }
1282
1283 Ok(())
1284 }
1285
1286 #[cfg(windows)]
1287 fn check_external_scanner(&self, _name: &str, _library_path: &Path) -> LoaderResult<()> {
1288 Ok(())
1299 }
1300
1301 pub fn compile_parser_to_wasm(
1302 &self,
1303 language_name: &str,
1304 src_path: &Path,
1305 scanner_filename: Option<&Path>,
1306 output_path: &Path,
1307 ) -> LoaderResult<()> {
1308 let clang_executable = self.ensure_wasi_sdk_exists()?;
1309
1310 let mut command = Command::new(&clang_executable);
1311 command.current_dir(src_path).args([
1312 "-o",
1313 output_path.to_str().unwrap(),
1314 "-fPIC",
1315 "-shared",
1316 if self.debug_build { "-g" } else { "-Os" },
1317 format!("-Wl,--export=tree_sitter_{language_name}").as_str(),
1318 "-Wl,--allow-undefined",
1319 "-Wl,--no-entry",
1320 "-nostdlib",
1321 "-fno-exceptions",
1322 "-fvisibility=hidden",
1323 "-I",
1324 ".",
1325 "parser.c",
1326 ]);
1327
1328 if let Some(scanner_filename) = scanner_filename {
1329 command.arg(scanner_filename);
1330 }
1331
1332 let output = command.output().map_err(LoaderError::WasmCompiler)?;
1333
1334 if !output.status.success() {
1335 return Err(LoaderError::WasmCompilation(
1336 String::from_utf8_lossy(&output.stderr).to_string(),
1337 ));
1338 }
1339
1340 Ok(())
1341 }
1342
1343 fn extract_tar_gz_with_strip(
1345 &self,
1346 archive_path: &Path,
1347 destination: &Path,
1348 ) -> LoaderResult<()> {
1349 let status = Command::new("tar")
1350 .arg("-xzf")
1351 .arg(archive_path)
1352 .arg("--strip-components=1")
1353 .arg("-C")
1354 .arg(destination)
1355 .status()
1356 .map_err(|e| LoaderError::Tar(archive_path.to_string_lossy().to_string(), e))?;
1357
1358 if !status.success() {
1359 return Err(LoaderError::Extraction(
1360 archive_path.to_string_lossy().to_string(),
1361 destination.to_string_lossy().to_string(),
1362 ));
1363 }
1364
1365 Ok(())
1366 }
1367
1368 fn ensure_wasi_sdk_exists(&self) -> LoaderResult<PathBuf> {
1373 let possible_executables = if cfg!(windows) {
1374 vec![
1375 "clang.exe",
1376 "wasm32-unknown-wasi-clang.exe",
1377 "wasm32-wasi-clang.exe",
1378 ]
1379 } else {
1380 vec!["clang", "wasm32-unknown-wasi-clang", "wasm32-wasi-clang"]
1381 };
1382
1383 if let Ok(wasi_sdk_path) = std::env::var("TREE_SITTER_WASI_SDK_PATH") {
1384 let wasi_sdk_dir = PathBuf::from(wasi_sdk_path);
1385
1386 for exe in &possible_executables {
1387 let clang_exe = wasi_sdk_dir.join("bin").join(exe);
1388 if clang_exe.exists() {
1389 return Ok(clang_exe);
1390 }
1391 }
1392
1393 return Err(LoaderError::WasiSDKClang(WasiSDKClangError {
1394 wasi_sdk_dir: wasi_sdk_dir.to_string_lossy().to_string(),
1395 possible_executables,
1396 download: false,
1397 }));
1398 }
1399
1400 let cache_dir = etcetera::choose_base_strategy()?
1401 .cache_dir()
1402 .join("tree-sitter");
1403 fs::create_dir_all(&cache_dir)
1404 .map_err(|e| LoaderError::IO(IoError::new(e, Some(cache_dir.as_path()))))?;
1405
1406 let wasi_sdk_dir = cache_dir.join("wasi-sdk");
1407
1408 for exe in &possible_executables {
1409 let clang_exe = wasi_sdk_dir.join("bin").join(exe);
1410 if clang_exe.exists() {
1411 return Ok(clang_exe);
1412 }
1413 }
1414
1415 fs::create_dir_all(&wasi_sdk_dir)
1416 .map_err(|e| LoaderError::IO(IoError::new(e, Some(wasi_sdk_dir.as_path()))))?;
1417
1418 let arch_os = if cfg!(target_os = "macos") {
1419 if cfg!(target_arch = "aarch64") {
1420 "arm64-macos"
1421 } else {
1422 "x86_64-macos"
1423 }
1424 } else if cfg!(target_os = "windows") {
1425 if cfg!(target_arch = "aarch64") {
1426 "arm64-windows"
1427 } else {
1428 "x86_64-windows"
1429 }
1430 } else if cfg!(target_os = "linux") {
1431 if cfg!(target_arch = "aarch64") {
1432 "arm64-linux"
1433 } else {
1434 "x86_64-linux"
1435 }
1436 } else {
1437 return Err(LoaderError::WasiSDKPlatform);
1438 };
1439
1440 let sdk_filename = format!("wasi-sdk-{WASI_SDK_VERSION}-{arch_os}.tar.gz");
1441 let wasi_sdk_major_version = WASI_SDK_VERSION
1442 .trim_end_matches(char::is_numeric) .trim_end_matches('.'); let sdk_url = format!(
1445 "https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-{wasi_sdk_major_version}/{sdk_filename}",
1446 );
1447
1448 info!("Downloading wasi-sdk from {sdk_url}...");
1449 let temp_tar_path = cache_dir.join(sdk_filename);
1450
1451 let status = Command::new("curl")
1452 .arg("-f")
1453 .arg("-L")
1454 .arg("-o")
1455 .arg(&temp_tar_path)
1456 .arg(&sdk_url)
1457 .status()
1458 .map_err(|e| LoaderError::Curl(sdk_url.clone(), e))?;
1459
1460 if !status.success() {
1461 return Err(LoaderError::WasiSDKDownload(sdk_url));
1462 }
1463
1464 info!("Extracting wasi-sdk to {}...", wasi_sdk_dir.display());
1465 self.extract_tar_gz_with_strip(&temp_tar_path, &wasi_sdk_dir)?;
1466
1467 fs::remove_file(temp_tar_path).ok();
1468 for exe in &possible_executables {
1469 let clang_exe = wasi_sdk_dir.join("bin").join(exe);
1470 if clang_exe.exists() {
1471 return Ok(clang_exe);
1472 }
1473 }
1474
1475 Err(LoaderError::WasiSDKClang(WasiSDKClangError {
1476 wasi_sdk_dir: wasi_sdk_dir.to_string_lossy().to_string(),
1477 possible_executables,
1478 download: true,
1479 }))
1480 }
1481
1482 #[must_use]
1483 #[cfg(feature = "tree-sitter-highlight")]
1484 pub fn highlight_config_for_injection_string<'a>(
1485 &'a self,
1486 string: &str,
1487 ) -> Option<&'a HighlightConfiguration> {
1488 match self.language_configuration_for_injection_string(string) {
1489 Err(e) => {
1490 error!("Failed to load language for injection string '{string}': {e}",);
1491 None
1492 }
1493 Ok(None) => None,
1494 Ok(Some((language, configuration))) => {
1495 match configuration.highlight_config(language, None) {
1496 Err(e) => {
1497 error!(
1498 "Failed to load higlight config for injection string '{string}': {e}"
1499 );
1500 None
1501 }
1502 Ok(None) => None,
1503 Ok(Some(config)) => Some(config),
1504 }
1505 }
1506 }
1507 }
1508
1509 #[must_use]
1510 pub fn get_language_configuration_in_current_path(&self) -> Option<&LanguageConfiguration> {
1511 self.language_configuration_in_current_path
1512 .map(|i| &self.language_configurations[i])
1513 }
1514
1515 pub fn find_language_configurations_at_path(
1516 &mut self,
1517 parser_path: &Path,
1518 set_current_path_config: bool,
1519 ) -> LoaderResult<&[LanguageConfiguration]> {
1520 let initial_language_configuration_count = self.language_configurations.len();
1521
1522 match TreeSitterJSON::from_file(parser_path) {
1523 Ok(config) => {
1524 let language_count = self.languages_by_id.len();
1525 for grammar in config.grammars {
1526 let language_path =
1530 parser_path.join(grammar.path.unwrap_or(PathBuf::from(".")));
1531
1532 let mut language_id = None;
1535 for (id, (path, _, _)) in
1536 self.languages_by_id.iter().enumerate().skip(language_count)
1537 {
1538 if language_path == *path {
1539 language_id = Some(id);
1540 }
1541 }
1542
1543 let language_id = if let Some(language_id) = language_id {
1545 language_id
1546 } else {
1547 self.languages_by_id.push((
1548 language_path,
1549 OnceCell::new(),
1550 grammar
1551 .external_files
1552 .clone()
1553 .into_vec()
1554 .map(|files| {
1555 files
1556 .into_iter()
1557 .map(|path| {
1558 let path = parser_path.join(path);
1559 if path.starts_with(parser_path) {
1561 Ok(path)
1562 } else {
1563 Err(LoaderError::ExternalFile(
1564 path.to_string_lossy().to_string(),
1565 parser_path.to_string_lossy().to_string(),
1566 ))
1567 }
1568 })
1569 .collect::<LoaderResult<Vec<_>>>()
1570 })
1571 .transpose()?,
1572 ));
1573 self.languages_by_id.len() - 1
1574 };
1575
1576 let configuration = LanguageConfiguration {
1577 root_path: parser_path.to_path_buf(),
1578 language_name: grammar.name,
1579 scope: Some(grammar.scope),
1580 language_id,
1581 file_types: grammar.file_types.unwrap_or_default(),
1582 content_regex: Self::regex(grammar.content_regex.as_deref()),
1583 first_line_regex: Self::regex(grammar.first_line_regex.as_deref()),
1584 injection_regex: Self::regex(grammar.injection_regex.as_deref()),
1585 injections_filenames: grammar.injections.into_vec(),
1586 locals_filenames: grammar.locals.into_vec(),
1587 tags_filenames: grammar.tags.into_vec(),
1588 highlights_filenames: grammar.highlights.into_vec(),
1589 #[cfg(feature = "tree-sitter-highlight")]
1590 highlight_config: OnceCell::new(),
1591 #[cfg(feature = "tree-sitter-tags")]
1592 tags_config: OnceCell::new(),
1593 #[cfg(feature = "tree-sitter-highlight")]
1594 highlight_names: &self.highlight_names,
1595 #[cfg(feature = "tree-sitter-highlight")]
1596 use_all_highlight_names: self.use_all_highlight_names,
1597 _phantom: PhantomData,
1598 };
1599
1600 for file_type in &configuration.file_types {
1601 self.language_configuration_ids_by_file_type
1602 .entry(file_type.clone())
1603 .or_default()
1604 .push(self.language_configurations.len());
1605 }
1606 if let Some(first_line_regex) = &configuration.first_line_regex {
1607 self.language_configuration_ids_by_first_line_regex
1608 .entry(first_line_regex.to_string())
1609 .or_default()
1610 .push(self.language_configurations.len());
1611 }
1612
1613 self.language_configurations.push(unsafe {
1614 mem::transmute::<LanguageConfiguration<'_>, LanguageConfiguration<'static>>(
1615 configuration,
1616 )
1617 });
1618
1619 if set_current_path_config
1620 && self.language_configuration_in_current_path.is_none()
1621 {
1622 self.language_configuration_in_current_path =
1623 Some(self.language_configurations.len() - 1);
1624 }
1625 }
1626 }
1627 Err(LoaderError::Serialization(e)) => {
1628 warn!(
1629 "Failed to parse {} -- {e}",
1630 parser_path.join("tree-sitter.json").display()
1631 );
1632 }
1633 _ => {}
1634 }
1635
1636 if self.language_configurations.len() == initial_language_configuration_count
1640 && parser_path.join("src").join("grammar.json").exists()
1641 {
1642 let grammar_path = parser_path.join("src").join("grammar.json");
1643 let language_name = Self::grammar_json_name(&grammar_path)?;
1644 let configuration = LanguageConfiguration {
1645 root_path: parser_path.to_owned(),
1646 language_name,
1647 language_id: self.languages_by_id.len(),
1648 file_types: Vec::new(),
1649 scope: None,
1650 content_regex: None,
1651 first_line_regex: None,
1652 injection_regex: None,
1653 injections_filenames: None,
1654 locals_filenames: None,
1655 highlights_filenames: None,
1656 tags_filenames: None,
1657 #[cfg(feature = "tree-sitter-highlight")]
1658 highlight_config: OnceCell::new(),
1659 #[cfg(feature = "tree-sitter-tags")]
1660 tags_config: OnceCell::new(),
1661 #[cfg(feature = "tree-sitter-highlight")]
1662 highlight_names: &self.highlight_names,
1663 #[cfg(feature = "tree-sitter-highlight")]
1664 use_all_highlight_names: self.use_all_highlight_names,
1665 _phantom: PhantomData,
1666 };
1667 self.language_configurations.push(unsafe {
1668 mem::transmute::<LanguageConfiguration<'_>, LanguageConfiguration<'static>>(
1669 configuration,
1670 )
1671 });
1672 self.languages_by_id
1673 .push((parser_path.to_owned(), OnceCell::new(), None));
1674 }
1675
1676 Ok(&self.language_configurations[initial_language_configuration_count..])
1677 }
1678
1679 fn regex(pattern: Option<&str>) -> Option<Regex> {
1680 pattern.and_then(|r| RegexBuilder::new(r).multi_line(true).build().ok())
1681 }
1682
1683 fn grammar_json_name(grammar_path: &Path) -> LoaderResult<String> {
1684 let file = fs::File::open(grammar_path)
1685 .map_err(|e| LoaderError::IO(IoError::new(e, Some(grammar_path))))?;
1686
1687 let first_three_lines = BufReader::new(file)
1688 .lines()
1689 .take(3)
1690 .collect::<Result<Vec<_>, std::io::Error>>()
1691 .map_err(|_| LoaderError::GrammarJSON(grammar_path.to_string_lossy().to_string()))?
1692 .join("\n");
1693
1694 let name = GRAMMAR_NAME_REGEX
1695 .captures(&first_three_lines)
1696 .and_then(|c| c.get(1))
1697 .ok_or_else(|| LoaderError::GrammarJSON(grammar_path.to_string_lossy().to_string()))?;
1698
1699 Ok(name.as_str().to_string())
1700 }
1701
1702 pub fn select_language(
1703 &mut self,
1704 path: Option<&Path>,
1705 current_dir: &Path,
1706 scope: Option<&str>,
1707 lib_info: Option<&(PathBuf, &str)>,
1709 ) -> LoaderResult<Language> {
1710 if let Some((ref lib_path, language_name)) = lib_info {
1711 let language_fn_name = format!("tree_sitter_{}", language_name.replace('-', "_"));
1712 Self::load_language(lib_path, &language_fn_name)
1713 } else if let Some(scope) = scope {
1714 if let Some(config) = self
1715 .language_configuration_for_scope(scope)
1716 .map_err(|e| LoaderError::ScopeLoad(scope.to_string(), Box::new(e)))?
1717 {
1718 Ok(config.0)
1719 } else {
1720 Err(LoaderError::UnknownScope(scope.to_string()))
1721 }
1722 } else if let Some((lang, _)) = if let Some(path) = path {
1723 self.language_configuration_for_file_name(path)
1724 .map_err(|e| {
1725 LoaderError::FileNameLoad(
1726 path.file_name().unwrap().to_string_lossy().to_string(),
1727 Box::new(e),
1728 )
1729 })?
1730 } else {
1731 None
1732 } {
1733 Ok(lang)
1734 } else if let Some(id) = self.language_configuration_in_current_path {
1735 Ok(self.language_for_id(self.language_configurations[id].language_id)?)
1736 } else if let Some(lang) = self
1737 .languages_at_path(current_dir)
1738 .map_err(|e| LoaderError::CurrentDirectoryLoad(Box::new(e)))?
1739 .first()
1740 .cloned()
1741 {
1742 Ok(lang.0)
1743 } else if let Some(lang) = if let Some(path) = path {
1744 self.language_configuration_for_first_line_regex(path)?
1745 } else {
1746 None
1747 } {
1748 Ok(lang.0)
1749 } else {
1750 Err(LoaderError::NoLanguage)
1751 }
1752 }
1753
1754 pub const fn debug_build(&mut self, flag: bool) {
1755 self.debug_build = flag;
1756 }
1757
1758 pub const fn sanitize_build(&mut self, flag: bool) {
1759 self.sanitize_build = flag;
1760 }
1761
1762 pub const fn force_rebuild(&mut self, rebuild: bool) {
1763 self.force_rebuild = rebuild;
1764 }
1765
1766 #[cfg(feature = "wasm")]
1767 #[cfg_attr(docsrs, doc(cfg(feature = "wasm")))]
1768 pub fn use_wasm(&mut self, engine: &tree_sitter::wasmtime::Engine) {
1769 *self.wasm_store.lock().unwrap() = Some(tree_sitter::WasmStore::new(engine).unwrap());
1770 }
1771
1772 #[must_use]
1773 pub fn get_scanner_path(&self, src_path: &Path) -> Option<PathBuf> {
1774 let path = src_path.join("scanner.c");
1775 path.exists().then_some(path)
1776 }
1777}
1778
1779impl LanguageConfiguration<'_> {
1780 #[cfg(feature = "tree-sitter-highlight")]
1781 pub fn highlight_config(
1782 &self,
1783 language: Language,
1784 paths: Option<&[PathBuf]>,
1785 ) -> LoaderResult<Option<&HighlightConfiguration>> {
1786 let (highlights_filenames, injections_filenames, locals_filenames) = match paths {
1787 Some(paths) => (
1788 Some(
1789 paths
1790 .iter()
1791 .filter(|p| p.ends_with(DEFAULT_HIGHLIGHTS_QUERY_FILE_NAME))
1792 .cloned()
1793 .collect::<Vec<_>>(),
1794 ),
1795 Some(
1796 paths
1797 .iter()
1798 .filter(|p| p.ends_with(DEFAULT_TAGS_QUERY_FILE_NAME))
1799 .cloned()
1800 .collect::<Vec<_>>(),
1801 ),
1802 Some(
1803 paths
1804 .iter()
1805 .filter(|p| p.ends_with(DEFAULT_LOCALS_QUERY_FILE_NAME))
1806 .cloned()
1807 .collect::<Vec<_>>(),
1808 ),
1809 ),
1810 None => (None, None, None),
1811 };
1812 self.highlight_config
1813 .get_or_try_init(|| {
1814 let (highlights_query, highlight_ranges) = self.read_queries(
1815 if highlights_filenames.is_some() {
1816 highlights_filenames.as_deref()
1817 } else {
1818 self.highlights_filenames.as_deref()
1819 },
1820 DEFAULT_HIGHLIGHTS_QUERY_FILE_NAME,
1821 )?;
1822 let (injections_query, injection_ranges) = self.read_queries(
1823 if injections_filenames.is_some() {
1824 injections_filenames.as_deref()
1825 } else {
1826 self.injections_filenames.as_deref()
1827 },
1828 DEFAULT_INJECTIONS_QUERY_FILE_NAME,
1829 )?;
1830 let (locals_query, locals_ranges) = self.read_queries(
1831 if locals_filenames.is_some() {
1832 locals_filenames.as_deref()
1833 } else {
1834 self.locals_filenames.as_deref()
1835 },
1836 DEFAULT_LOCALS_QUERY_FILE_NAME,
1837 )?;
1838
1839 if highlights_query.is_empty() {
1840 Ok(None)
1841 } else {
1842 let mut result = HighlightConfiguration::new(
1843 language,
1844 &self.language_name,
1845 &highlights_query,
1846 &injections_query,
1847 &locals_query,
1848 )
1849 .map_err(|error| match error.kind {
1850 QueryErrorKind::Language => {
1851 LoaderError::Query(LoaderQueryError { error, file: None })
1852 }
1853 _ => {
1854 if error.offset < injections_query.len() {
1855 Self::include_path_in_query_error(
1856 error,
1857 &injection_ranges,
1858 &injections_query,
1859 0,
1860 )
1861 } else if error.offset < injections_query.len() + locals_query.len() {
1862 Self::include_path_in_query_error(
1863 error,
1864 &locals_ranges,
1865 &locals_query,
1866 injections_query.len(),
1867 )
1868 } else {
1869 Self::include_path_in_query_error(
1870 error,
1871 &highlight_ranges,
1872 &highlights_query,
1873 injections_query.len() + locals_query.len(),
1874 )
1875 }
1876 }
1877 })?;
1878 let mut all_highlight_names = self.highlight_names.lock().unwrap();
1879 if self.use_all_highlight_names {
1880 for capture_name in result.query.capture_names() {
1881 if !all_highlight_names.iter().any(|x| x == capture_name) {
1882 all_highlight_names.push((*capture_name).to_string());
1883 }
1884 }
1885 }
1886 result.configure(all_highlight_names.as_slice());
1887 drop(all_highlight_names);
1888 Ok(Some(result))
1889 }
1890 })
1891 .map(Option::as_ref)
1892 }
1893
1894 #[cfg(feature = "tree-sitter-tags")]
1895 pub fn tags_config(&self, language: Language) -> LoaderResult<Option<&TagsConfiguration>> {
1896 self.tags_config
1897 .get_or_try_init(|| {
1898 let (tags_query, tags_ranges) = self
1899 .read_queries(self.tags_filenames.as_deref(), DEFAULT_TAGS_QUERY_FILE_NAME)?;
1900 let (locals_query, locals_ranges) = self.read_queries(
1901 self.locals_filenames.as_deref(),
1902 DEFAULT_LOCALS_QUERY_FILE_NAME,
1903 )?;
1904 if tags_query.is_empty() {
1905 Ok(None)
1906 } else {
1907 TagsConfiguration::new(language, &tags_query, &locals_query)
1908 .map(Some)
1909 .map_err(|error| {
1910 if let TagsError::Query(error) = error {
1911 if error.offset < locals_query.len() {
1912 Self::include_path_in_query_error(
1913 error,
1914 &locals_ranges,
1915 &locals_query,
1916 0,
1917 )
1918 } else {
1919 Self::include_path_in_query_error(
1920 error,
1921 &tags_ranges,
1922 &tags_query,
1923 locals_query.len(),
1924 )
1925 }
1926 } else {
1927 error.into()
1928 }
1929 })
1930 }
1931 })
1932 .map(Option::as_ref)
1933 }
1934
1935 #[cfg(any(feature = "tree-sitter-highlight", feature = "tree-sitter-tags"))]
1936 fn include_path_in_query_error(
1937 mut error: QueryError,
1938 ranges: &[(PathBuf, Range<usize>)],
1939 source: &str,
1940 start_offset: usize,
1941 ) -> LoaderError {
1942 let offset_within_section = error.offset - start_offset;
1943 let (path, range) = ranges
1944 .iter()
1945 .find(|(_, range)| range.contains(&offset_within_section))
1946 .unwrap_or_else(|| ranges.last().unwrap());
1947 error.offset = offset_within_section - range.start;
1948 error.row = source[range.start..offset_within_section]
1949 .matches('\n')
1950 .count();
1951 LoaderError::Query(LoaderQueryError {
1952 error,
1953 file: Some(path.to_string_lossy().to_string()),
1954 })
1955 }
1956
1957 #[allow(clippy::type_complexity)]
1958 #[cfg(any(feature = "tree-sitter-highlight", feature = "tree-sitter-tags"))]
1959 fn read_queries(
1960 &self,
1961 paths: Option<&[PathBuf]>,
1962 default_path: &str,
1963 ) -> LoaderResult<(String, Vec<(PathBuf, Range<usize>)>)> {
1964 let mut query = String::new();
1965 let mut path_ranges = Vec::new();
1966 if let Some(paths) = paths {
1967 for path in paths {
1968 let abs_path = self.root_path.join(path);
1969 let prev_query_len = query.len();
1970 query += &fs::read_to_string(&abs_path)
1971 .map_err(|e| LoaderError::IO(IoError::new(e, Some(abs_path.as_path()))))?;
1972 path_ranges.push((path.clone(), prev_query_len..query.len()));
1973 }
1974 } else {
1975 if default_path == DEFAULT_HIGHLIGHTS_QUERY_FILE_NAME
1977 || default_path == DEFAULT_TAGS_QUERY_FILE_NAME
1978 {
1979 warn!(
1980 concat!(
1981 "You should add a `{}` entry pointing to the {} path in the `tree-sitter` ",
1982 "object in the grammar's tree-sitter.json file. See more here: ",
1983 "https://tree-sitter.github.io/tree-sitter/3-syntax-highlighting#query-paths"
1984 ),
1985 default_path.replace(".scm", ""),
1986 default_path
1987 );
1988 }
1989 let queries_path = self.root_path.join("queries");
1990 let path = queries_path.join(default_path);
1991 if path.exists() {
1992 query = fs::read_to_string(&path)
1993 .map_err(|e| LoaderError::IO(IoError::new(e, Some(path.as_path()))))?;
1994 path_ranges.push((PathBuf::from(default_path), 0..query.len()));
1995 }
1996 }
1997
1998 Ok((query, path_ranges))
1999 }
2000}
2001
2002fn needs_recompile(lib_path: &Path, paths_to_check: &[PathBuf]) -> LoaderResult<bool> {
2003 if !lib_path.exists() {
2004 return Ok(true);
2005 }
2006 let lib_mtime = mtime(lib_path).map_err(|e| LoaderError::ModifiedTime(Box::new(e)))?;
2007 for path in paths_to_check {
2008 if mtime(path).map_err(|e| LoaderError::ModifiedTime(Box::new(e)))? > lib_mtime {
2009 return Ok(true);
2010 }
2011 }
2012 Ok(false)
2013}
2014
2015fn mtime(path: &Path) -> LoaderResult<SystemTime> {
2016 fs::metadata(path)
2017 .map_err(|e| LoaderError::IO(IoError::new(e, Some(path))))?
2018 .modified()
2019 .map_err(|e| LoaderError::IO(IoError::new(e, Some(path))))
2020}