tree_sitter_loader/
loader.rs

1#![cfg_attr(not(any(test, doctest)), doc = include_str!("../README.md"))]
2#![cfg_attr(docsrs, feature(doc_cfg))]
3
4#[cfg(any(feature = "tree-sitter-highlight", feature = "tree-sitter-tags"))]
5use std::ops::Range;
6#[cfg(feature = "tree-sitter-highlight")]
7use std::sync::Mutex;
8use std::{
9    collections::HashMap,
10    env, fs,
11    io::{BufRead, BufReader},
12    marker::PhantomData,
13    mem,
14    path::{Path, PathBuf},
15    process::Command,
16    sync::LazyLock,
17    time::{SystemTime, SystemTimeError},
18};
19
20use etcetera::BaseStrategy as _;
21use fs4::fs_std::FileExt;
22use libloading::{Library, Symbol};
23use log::{error, info, warn};
24use once_cell::unsync::OnceCell;
25use regex::{Regex, RegexBuilder};
26use semver::Version;
27use serde::{Deserialize, Deserializer, Serialize};
28use thiserror::Error;
29use tree_sitter::Language;
30#[cfg(any(feature = "tree-sitter-highlight", feature = "tree-sitter-tags"))]
31use tree_sitter::QueryError;
32#[cfg(feature = "tree-sitter-highlight")]
33use tree_sitter::QueryErrorKind;
34#[cfg(feature = "wasm")]
35use tree_sitter::WasmError;
36#[cfg(feature = "tree-sitter-highlight")]
37use tree_sitter_highlight::HighlightConfiguration;
38#[cfg(feature = "tree-sitter-tags")]
39use tree_sitter_tags::{Error as TagsError, TagsConfiguration};
40
41static GRAMMAR_NAME_REGEX: LazyLock<Regex> =
42    LazyLock::new(|| Regex::new(r#""name":\s*"(.*?)""#).unwrap());
43
44const WASI_SDK_VERSION: &str = include_str!("../wasi-sdk-version").trim_ascii();
45
46pub type LoaderResult<T> = Result<T, LoaderError>;
47
48#[derive(Debug, Error)]
49pub enum LoaderError {
50    #[error(transparent)]
51    Compiler(CompilerError),
52    #[error("Parser compilation failed.\nStdout: {0}\nStderr: {1}")]
53    Compilation(String, String),
54    #[error("Failed to execute curl for {0} -- {1}")]
55    Curl(String, std::io::Error),
56    #[error("Failed to load language in current directory:\n{0}")]
57    CurrentDirectoryLoad(Box<Self>),
58    #[error("External file path {0} is outside of parser directory {1}")]
59    ExternalFile(String, String),
60    #[error("Failed to extract archive {0} to {1}")]
61    Extraction(String, String),
62    #[error("Failed to load language for file name {0}:\n{1}")]
63    FileNameLoad(String, Box<Self>),
64    #[error("Failed to parse the language name from grammar.json at {0}")]
65    GrammarJSON(String),
66    #[error(transparent)]
67    HomeDir(#[from] etcetera::HomeDirError),
68    #[error(transparent)]
69    IO(IoError),
70    #[error(transparent)]
71    Library(LibraryError),
72    #[error("Failed to compare binary and source timestamps:\n{0}")]
73    ModifiedTime(Box<Self>),
74    #[error("No language found")]
75    NoLanguage,
76    #[error(transparent)]
77    Query(LoaderQueryError),
78    #[error(transparent)]
79    ScannerSymbols(ScannerSymbolError),
80    #[error("Failed to load language for scope '{0}':\n{1}")]
81    ScopeLoad(String, Box<Self>),
82    #[error(transparent)]
83    Serialization(#[from] serde_json::Error),
84    #[error(transparent)]
85    Symbol(SymbolError),
86    #[error(transparent)]
87    Tags(#[from] TagsError),
88    #[error("Failed to execute tar for {0} -- {1}")]
89    Tar(String, std::io::Error),
90    #[error(transparent)]
91    Time(#[from] SystemTimeError),
92    #[error("Unknown scope '{0}'")]
93    UnknownScope(String),
94    #[error("Failed to download wasi-sdk from {0}")]
95    WasiSDKDownload(String),
96    #[error(transparent)]
97    WasiSDKClang(#[from] WasiSDKClangError),
98    #[error("Unsupported platform for wasi-sdk")]
99    WasiSDKPlatform,
100    #[cfg(feature = "wasm")]
101    #[error(transparent)]
102    Wasm(#[from] WasmError),
103    #[error("Failed to run wasi-sdk clang -- {0}")]
104    WasmCompiler(std::io::Error),
105    #[error("wasi-sdk clang command failed: {0}")]
106    WasmCompilation(String),
107}
108
109#[derive(Debug, Error)]
110pub struct CompilerError {
111    pub error: std::io::Error,
112    pub command: Box<Command>,
113}
114
115impl std::fmt::Display for CompilerError {
116    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
117        write!(
118            f,
119            "Failed to execute the C compiler with the following command:\n{:?}\nError: {}",
120            *self.command, self.error
121        )?;
122        Ok(())
123    }
124}
125
126#[derive(Debug, Error)]
127pub struct IoError {
128    pub error: std::io::Error,
129    pub path: Option<String>,
130}
131
132impl IoError {
133    fn new(error: std::io::Error, path: Option<&Path>) -> Self {
134        Self {
135            error,
136            path: path.map(|p| p.to_string_lossy().to_string()),
137        }
138    }
139}
140
141impl std::fmt::Display for IoError {
142    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
143        write!(f, "{}", self.error)?;
144        if let Some(ref path) = self.path {
145            write!(f, " ({path})")?;
146        }
147        Ok(())
148    }
149}
150
151#[derive(Debug, Error)]
152pub struct LibraryError {
153    pub error: libloading::Error,
154    pub path: String,
155}
156
157impl std::fmt::Display for LibraryError {
158    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
159        write!(
160            f,
161            "Error opening dynamic library {} -- {}",
162            self.path, self.error
163        )?;
164        Ok(())
165    }
166}
167
168#[derive(Debug, Error)]
169pub struct LoaderQueryError {
170    pub error: QueryError,
171    pub file: Option<String>,
172}
173
174impl std::fmt::Display for LoaderQueryError {
175    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
176        if let Some(ref path) = self.file {
177            writeln!(f, "Error in query file {path}:")?;
178        }
179        write!(f, "{}", self.error)?;
180        Ok(())
181    }
182}
183
184#[derive(Debug, Error)]
185pub struct SymbolError {
186    pub error: libloading::Error,
187    pub symbol_name: String,
188    pub path: String,
189}
190
191impl std::fmt::Display for SymbolError {
192    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
193        write!(
194            f,
195            "Failed to load symbol {} from {} -- {}",
196            self.symbol_name, self.path, self.error
197        )?;
198        Ok(())
199    }
200}
201
202#[derive(Debug, Error)]
203pub struct ScannerSymbolError {
204    pub missing: Vec<String>,
205}
206
207impl std::fmt::Display for ScannerSymbolError {
208    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
209        writeln!(
210            f,
211            "Missing required functions in the external scanner, parsing won't work without these!\n"
212        )?;
213        for symbol in &self.missing {
214            writeln!(f, "  `{symbol}`")?;
215        }
216        writeln!(
217            f,
218            "You can read more about this at https://tree-sitter.github.io/tree-sitter/creating-parsers/4-external-scanners\n"
219        )?;
220        Ok(())
221    }
222}
223
224#[derive(Debug, Error)]
225pub struct WasiSDKClangError {
226    pub wasi_sdk_dir: String,
227    pub possible_executables: Vec<&'static str>,
228    pub download: bool,
229}
230
231impl std::fmt::Display for WasiSDKClangError {
232    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
233        if self.download {
234            write!(
235                f,
236                "Failed to find clang executable in downloaded wasi-sdk at '{}'.",
237                self.wasi_sdk_dir
238            )?;
239        } else {
240            write!(f, "TREE_SITTER_WASI_SDK_PATH is set to '{}', but no clang executable found in 'bin/' directory.", self.wasi_sdk_dir)?;
241        }
242
243        let possible_exes = self.possible_executables.join(", ");
244        write!(f, " Looked for: {possible_exes}.")?;
245
246        Ok(())
247    }
248}
249
250pub const DEFAULT_HIGHLIGHTS_QUERY_FILE_NAME: &str = "highlights.scm";
251
252pub const DEFAULT_INJECTIONS_QUERY_FILE_NAME: &str = "injections.scm";
253
254pub const DEFAULT_LOCALS_QUERY_FILE_NAME: &str = "locals.scm";
255
256pub const DEFAULT_TAGS_QUERY_FILE_NAME: &str = "tags.scm";
257
258#[derive(Default, Deserialize, Serialize)]
259pub struct Config {
260    #[serde(default)]
261    #[serde(
262        rename = "parser-directories",
263        deserialize_with = "deserialize_parser_directories"
264    )]
265    pub parser_directories: Vec<PathBuf>,
266}
267
268#[derive(Serialize, Deserialize, Clone, Default)]
269#[serde(untagged)]
270pub enum PathsJSON {
271    #[default]
272    Empty,
273    Single(PathBuf),
274    Multiple(Vec<PathBuf>),
275}
276
277impl PathsJSON {
278    fn into_vec(self) -> Option<Vec<PathBuf>> {
279        match self {
280            Self::Empty => None,
281            Self::Single(s) => Some(vec![s]),
282            Self::Multiple(s) => Some(s),
283        }
284    }
285
286    const fn is_empty(&self) -> bool {
287        matches!(self, Self::Empty)
288    }
289
290    /// Represent this set of paths as a string that can be included in templates
291    #[must_use]
292    pub fn to_variable_value<'a>(&'a self, default: &'a PathBuf) -> &'a str {
293        match self {
294            Self::Empty => Some(default),
295            Self::Single(path_buf) => Some(path_buf),
296            Self::Multiple(paths) => paths.first(),
297        }
298        .map_or("", |path| path.as_os_str().to_str().unwrap_or(""))
299    }
300}
301
302#[derive(Serialize, Deserialize, Clone)]
303#[serde(untagged)]
304pub enum PackageJSONAuthor {
305    String(String),
306    Object {
307        name: String,
308        email: Option<String>,
309        url: Option<String>,
310    },
311}
312
313#[derive(Serialize, Deserialize, Clone)]
314#[serde(untagged)]
315pub enum PackageJSONRepository {
316    String(String),
317    Object { url: String },
318}
319
320#[derive(Serialize, Deserialize)]
321pub struct PackageJSON {
322    pub name: String,
323    pub version: Version,
324    pub description: Option<String>,
325    pub author: Option<PackageJSONAuthor>,
326    pub maintainers: Option<Vec<PackageJSONAuthor>>,
327    pub license: Option<String>,
328    pub repository: Option<PackageJSONRepository>,
329    #[serde(default)]
330    #[serde(rename = "tree-sitter", skip_serializing_if = "Option::is_none")]
331    pub tree_sitter: Option<Vec<LanguageConfigurationJSON>>,
332}
333
334fn default_path() -> PathBuf {
335    PathBuf::from(".")
336}
337
338#[derive(Serialize, Deserialize, Clone)]
339#[serde(rename_all = "kebab-case")]
340pub struct LanguageConfigurationJSON {
341    #[serde(default = "default_path")]
342    pub path: PathBuf,
343    pub scope: Option<String>,
344    pub file_types: Option<Vec<String>>,
345    pub content_regex: Option<String>,
346    pub first_line_regex: Option<String>,
347    pub injection_regex: Option<String>,
348    #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
349    pub highlights: PathsJSON,
350    #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
351    pub injections: PathsJSON,
352    #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
353    pub locals: PathsJSON,
354    #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
355    pub tags: PathsJSON,
356    #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
357    pub external_files: PathsJSON,
358}
359
360#[derive(Serialize, Deserialize)]
361#[serde(rename_all = "kebab-case")]
362pub struct TreeSitterJSON {
363    #[serde(rename = "$schema")]
364    pub schema: Option<String>,
365    pub grammars: Vec<Grammar>,
366    pub metadata: Metadata,
367    #[serde(default)]
368    pub bindings: Bindings,
369}
370
371impl TreeSitterJSON {
372    pub fn from_file(path: &Path) -> LoaderResult<Self> {
373        let path = path.join("tree-sitter.json");
374        Ok(serde_json::from_str(&fs::read_to_string(&path).map_err(
375            |e| LoaderError::IO(IoError::new(e, Some(path.as_path()))),
376        )?)?)
377    }
378
379    #[must_use]
380    pub fn has_multiple_language_configs(&self) -> bool {
381        self.grammars.len() > 1
382    }
383}
384
385#[derive(Serialize, Deserialize)]
386#[serde(rename_all = "kebab-case")]
387pub struct Grammar {
388    pub name: String,
389    #[serde(skip_serializing_if = "Option::is_none")]
390    pub camelcase: Option<String>,
391    #[serde(skip_serializing_if = "Option::is_none")]
392    pub title: Option<String>,
393    pub scope: String,
394    #[serde(skip_serializing_if = "Option::is_none")]
395    pub path: Option<PathBuf>,
396    #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
397    pub external_files: PathsJSON,
398    pub file_types: Option<Vec<String>>,
399    #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
400    pub highlights: PathsJSON,
401    #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
402    pub injections: PathsJSON,
403    #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
404    pub locals: PathsJSON,
405    #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
406    pub tags: PathsJSON,
407    #[serde(skip_serializing_if = "Option::is_none")]
408    pub injection_regex: Option<String>,
409    #[serde(skip_serializing_if = "Option::is_none")]
410    pub first_line_regex: Option<String>,
411    #[serde(skip_serializing_if = "Option::is_none")]
412    pub content_regex: Option<String>,
413    #[serde(skip_serializing_if = "Option::is_none")]
414    pub class_name: Option<String>,
415}
416
417#[derive(Serialize, Deserialize)]
418pub struct Metadata {
419    pub version: Version,
420    #[serde(skip_serializing_if = "Option::is_none")]
421    pub license: Option<String>,
422    #[serde(skip_serializing_if = "Option::is_none")]
423    pub description: Option<String>,
424    #[serde(skip_serializing_if = "Option::is_none")]
425    pub authors: Option<Vec<Author>>,
426    #[serde(skip_serializing_if = "Option::is_none")]
427    pub links: Option<Links>,
428    #[serde(skip)]
429    pub namespace: Option<String>,
430}
431
432#[derive(Serialize, Deserialize)]
433pub struct Author {
434    pub name: String,
435    #[serde(skip_serializing_if = "Option::is_none")]
436    pub email: Option<String>,
437    #[serde(skip_serializing_if = "Option::is_none")]
438    pub url: Option<String>,
439}
440
441#[derive(Serialize, Deserialize)]
442pub struct Links {
443    pub repository: String,
444    #[serde(skip_serializing_if = "Option::is_none")]
445    pub funding: Option<String>,
446}
447
448#[derive(Serialize, Deserialize, Clone)]
449#[serde(default)]
450pub struct Bindings {
451    pub c: bool,
452    pub go: bool,
453    pub java: bool,
454    #[serde(skip)]
455    pub kotlin: bool,
456    pub node: bool,
457    pub python: bool,
458    pub rust: bool,
459    pub swift: bool,
460    pub zig: bool,
461}
462
463impl Bindings {
464    /// return available languages and its default enabled state.
465    #[must_use]
466    pub const fn languages(&self) -> [(&'static str, bool); 8] {
467        [
468            ("c", true),
469            ("go", true),
470            ("java", false),
471            // Comment out Kotlin until the bindings are actually available.
472            // ("kotlin", false),
473            ("node", true),
474            ("python", true),
475            ("rust", true),
476            ("swift", true),
477            ("zig", false),
478        ]
479    }
480
481    /// construct Bindings from a language list. If a language isn't supported, its name will be put on the error part.
482    pub fn with_enabled_languages<'a, I>(languages: I) -> Result<Self, &'a str>
483    where
484        I: Iterator<Item = &'a str>,
485    {
486        let mut out = Self {
487            c: false,
488            go: false,
489            java: false,
490            kotlin: false,
491            node: false,
492            python: false,
493            rust: false,
494            swift: false,
495            zig: false,
496        };
497
498        for v in languages {
499            match v {
500                "c" => out.c = true,
501                "go" => out.go = true,
502                "java" => out.java = true,
503                // Comment out Kotlin until the bindings are actually available.
504                // "kotlin" => out.kotlin = true,
505                "node" => out.node = true,
506                "python" => out.python = true,
507                "rust" => out.rust = true,
508                "swift" => out.swift = true,
509                "zig" => out.zig = true,
510                unsupported => return Err(unsupported),
511            }
512        }
513
514        Ok(out)
515    }
516}
517
518impl Default for Bindings {
519    fn default() -> Self {
520        Self {
521            c: true,
522            go: true,
523            java: false,
524            kotlin: false,
525            node: true,
526            python: true,
527            rust: true,
528            swift: true,
529            zig: false,
530        }
531    }
532}
533
534// Replace `~` or `$HOME` with home path string.
535// (While paths like "~/.tree-sitter/config.json" can be deserialized,
536// they're not valid path for I/O modules.)
537fn deserialize_parser_directories<'de, D>(deserializer: D) -> Result<Vec<PathBuf>, D::Error>
538where
539    D: Deserializer<'de>,
540{
541    let paths = Vec::<PathBuf>::deserialize(deserializer)?;
542    let Ok(home) = etcetera::home_dir() else {
543        return Ok(paths);
544    };
545    let standardized = paths
546        .into_iter()
547        .map(|path| standardize_path(path, &home))
548        .collect();
549    Ok(standardized)
550}
551
552fn standardize_path(path: PathBuf, home: &Path) -> PathBuf {
553    if let Ok(p) = path.strip_prefix("~") {
554        return home.join(p);
555    }
556    if let Ok(p) = path.strip_prefix("$HOME") {
557        return home.join(p);
558    }
559    path
560}
561
562impl Config {
563    #[must_use]
564    pub fn initial() -> Self {
565        let home_dir = etcetera::home_dir().expect("Cannot determine home directory");
566        Self {
567            parser_directories: vec![
568                home_dir.join("github"),
569                home_dir.join("src"),
570                home_dir.join("source"),
571                home_dir.join("projects"),
572                home_dir.join("dev"),
573                home_dir.join("git"),
574            ],
575        }
576    }
577}
578
579const BUILD_TARGET: &str = env!("BUILD_TARGET");
580
581pub struct LanguageConfiguration<'a> {
582    pub scope: Option<String>,
583    pub content_regex: Option<Regex>,
584    pub first_line_regex: Option<Regex>,
585    pub injection_regex: Option<Regex>,
586    pub file_types: Vec<String>,
587    pub root_path: PathBuf,
588    pub highlights_filenames: Option<Vec<PathBuf>>,
589    pub injections_filenames: Option<Vec<PathBuf>>,
590    pub locals_filenames: Option<Vec<PathBuf>>,
591    pub tags_filenames: Option<Vec<PathBuf>>,
592    pub language_name: String,
593    language_id: usize,
594    #[cfg(feature = "tree-sitter-highlight")]
595    highlight_config: OnceCell<Option<HighlightConfiguration>>,
596    #[cfg(feature = "tree-sitter-tags")]
597    tags_config: OnceCell<Option<TagsConfiguration>>,
598    #[cfg(feature = "tree-sitter-highlight")]
599    highlight_names: &'a Mutex<Vec<String>>,
600    #[cfg(feature = "tree-sitter-highlight")]
601    use_all_highlight_names: bool,
602    _phantom: PhantomData<&'a ()>,
603}
604
605pub struct Loader {
606    pub parser_lib_path: PathBuf,
607    languages_by_id: Vec<(PathBuf, OnceCell<Language>, Option<Vec<PathBuf>>)>,
608    language_configurations: Vec<LanguageConfiguration<'static>>,
609    language_configuration_ids_by_file_type: HashMap<String, Vec<usize>>,
610    language_configuration_in_current_path: Option<usize>,
611    language_configuration_ids_by_first_line_regex: HashMap<String, Vec<usize>>,
612    #[cfg(feature = "tree-sitter-highlight")]
613    highlight_names: Box<Mutex<Vec<String>>>,
614    #[cfg(feature = "tree-sitter-highlight")]
615    use_all_highlight_names: bool,
616    debug_build: bool,
617    sanitize_build: bool,
618    force_rebuild: bool,
619
620    #[cfg(feature = "wasm")]
621    wasm_store: Mutex<Option<tree_sitter::WasmStore>>,
622}
623
624pub struct CompileConfig<'a> {
625    pub src_path: &'a Path,
626    pub header_paths: Vec<&'a Path>,
627    pub parser_path: PathBuf,
628    pub scanner_path: Option<PathBuf>,
629    pub external_files: Option<&'a [PathBuf]>,
630    pub output_path: Option<PathBuf>,
631    pub flags: &'a [&'a str],
632    pub sanitize: bool,
633    pub name: String,
634}
635
636impl<'a> CompileConfig<'a> {
637    #[must_use]
638    pub fn new(
639        src_path: &'a Path,
640        externals: Option<&'a [PathBuf]>,
641        output_path: Option<PathBuf>,
642    ) -> Self {
643        Self {
644            src_path,
645            header_paths: vec![src_path],
646            parser_path: src_path.join("parser.c"),
647            scanner_path: None,
648            external_files: externals,
649            output_path,
650            flags: &[],
651            sanitize: false,
652            name: String::new(),
653        }
654    }
655}
656
657unsafe impl Sync for Loader {}
658
659impl Loader {
660    pub fn new() -> LoaderResult<Self> {
661        let parser_lib_path = if let Ok(path) = env::var("TREE_SITTER_LIBDIR") {
662            PathBuf::from(path)
663        } else {
664            if cfg!(target_os = "macos") {
665                let legacy_apple_path = etcetera::base_strategy::Apple::new()?
666                    .cache_dir() // `$HOME/Library/Caches/`
667                    .join("tree-sitter");
668                if legacy_apple_path.exists() && legacy_apple_path.is_dir() {
669                    std::fs::remove_dir_all(&legacy_apple_path).map_err(|e| {
670                        LoaderError::IO(IoError::new(e, Some(legacy_apple_path.as_path())))
671                    })?;
672                }
673            }
674
675            etcetera::choose_base_strategy()?
676                .cache_dir()
677                .join("tree-sitter")
678                .join("lib")
679        };
680        Ok(Self::with_parser_lib_path(parser_lib_path))
681    }
682
683    #[must_use]
684    pub fn with_parser_lib_path(parser_lib_path: PathBuf) -> Self {
685        Self {
686            parser_lib_path,
687            languages_by_id: Vec::new(),
688            language_configurations: Vec::new(),
689            language_configuration_ids_by_file_type: HashMap::new(),
690            language_configuration_in_current_path: None,
691            language_configuration_ids_by_first_line_regex: HashMap::new(),
692            #[cfg(feature = "tree-sitter-highlight")]
693            highlight_names: Box::new(Mutex::new(Vec::new())),
694            #[cfg(feature = "tree-sitter-highlight")]
695            use_all_highlight_names: true,
696            debug_build: false,
697            sanitize_build: false,
698            force_rebuild: false,
699
700            #[cfg(feature = "wasm")]
701            wasm_store: Mutex::default(),
702        }
703    }
704
705    #[cfg(feature = "tree-sitter-highlight")]
706    #[cfg_attr(docsrs, doc(cfg(feature = "tree-sitter-highlight")))]
707    pub fn configure_highlights(&mut self, names: &[String]) {
708        self.use_all_highlight_names = false;
709        let mut highlights = self.highlight_names.lock().unwrap();
710        highlights.clear();
711        highlights.extend(names.iter().cloned());
712    }
713
714    #[must_use]
715    #[cfg(feature = "tree-sitter-highlight")]
716    #[cfg_attr(docsrs, doc(cfg(feature = "tree-sitter-highlight")))]
717    pub fn highlight_names(&self) -> Vec<String> {
718        self.highlight_names.lock().unwrap().clone()
719    }
720
721    pub fn find_all_languages(&mut self, config: &Config) -> LoaderResult<()> {
722        if config.parser_directories.is_empty() {
723            warn!(concat!(
724                "You have not configured any parser directories!\n",
725                "Please run `tree-sitter init-config` and edit the resulting\n",
726                "configuration file to indicate where we should look for\n",
727                "language grammars.\n"
728            ));
729        }
730        for parser_container_dir in &config.parser_directories {
731            if let Ok(entries) = fs::read_dir(parser_container_dir) {
732                for entry in entries {
733                    let entry = entry.map_err(|e| LoaderError::IO(IoError::new(e, None)))?;
734                    if let Some(parser_dir_name) = entry.file_name().to_str() {
735                        if parser_dir_name.starts_with("tree-sitter-") {
736                            self.find_language_configurations_at_path(
737                                &parser_container_dir.join(parser_dir_name),
738                                false,
739                            )
740                            .ok();
741                        }
742                    }
743                }
744            }
745        }
746        Ok(())
747    }
748
749    pub fn languages_at_path(&mut self, path: &Path) -> LoaderResult<Vec<(Language, String)>> {
750        if let Ok(configurations) = self.find_language_configurations_at_path(path, true) {
751            let mut language_ids = configurations
752                .iter()
753                .map(|c| (c.language_id, c.language_name.clone()))
754                .collect::<Vec<_>>();
755            language_ids.sort_unstable();
756            language_ids.dedup();
757            language_ids
758                .into_iter()
759                .map(|(id, name)| Ok((self.language_for_id(id)?, name)))
760                .collect::<LoaderResult<Vec<_>>>()
761        } else {
762            Ok(Vec::new())
763        }
764    }
765
766    #[must_use]
767    pub fn get_all_language_configurations(&self) -> Vec<(&LanguageConfiguration, &Path)> {
768        self.language_configurations
769            .iter()
770            .map(|c| (c, self.languages_by_id[c.language_id].0.as_ref()))
771            .collect()
772    }
773
774    pub fn language_configuration_for_scope(
775        &self,
776        scope: &str,
777    ) -> LoaderResult<Option<(Language, &LanguageConfiguration)>> {
778        for configuration in &self.language_configurations {
779            if configuration.scope.as_ref().is_some_and(|s| s == scope) {
780                let language = self.language_for_id(configuration.language_id)?;
781                return Ok(Some((language, configuration)));
782            }
783        }
784        Ok(None)
785    }
786
787    pub fn language_configuration_for_first_line_regex(
788        &self,
789        path: &Path,
790    ) -> LoaderResult<Option<(Language, &LanguageConfiguration)>> {
791        self.language_configuration_ids_by_first_line_regex
792            .iter()
793            .try_fold(None, |_, (regex, ids)| {
794                if let Some(regex) = Self::regex(Some(regex)) {
795                    let file = fs::File::open(path)
796                        .map_err(|e| LoaderError::IO(IoError::new(e, Some(path))))?;
797                    let reader = BufReader::new(file);
798                    let first_line = reader
799                        .lines()
800                        .next()
801                        .transpose()
802                        .map_err(|e| LoaderError::IO(IoError::new(e, Some(path))))?;
803                    if let Some(first_line) = first_line {
804                        if regex.is_match(&first_line) && !ids.is_empty() {
805                            let configuration = &self.language_configurations[ids[0]];
806                            let language = self.language_for_id(configuration.language_id)?;
807                            return Ok(Some((language, configuration)));
808                        }
809                    }
810                }
811
812                Ok(None)
813            })
814    }
815
816    pub fn language_configuration_for_file_name(
817        &self,
818        path: &Path,
819    ) -> LoaderResult<Option<(Language, &LanguageConfiguration)>> {
820        // Find all the language configurations that match this file name
821        // or a suffix of the file name.
822        let configuration_ids = path
823            .file_name()
824            .and_then(|n| n.to_str())
825            .and_then(|file_name| self.language_configuration_ids_by_file_type.get(file_name))
826            .or_else(|| {
827                let mut path = path.to_owned();
828                let mut extensions = Vec::with_capacity(2);
829                while let Some(extension) = path.extension() {
830                    extensions.push(extension.to_str()?.to_string());
831                    path = PathBuf::from(path.file_stem()?.to_os_string());
832                }
833                extensions.reverse();
834                self.language_configuration_ids_by_file_type
835                    .get(&extensions.join("."))
836            });
837
838        if let Some(configuration_ids) = configuration_ids {
839            if !configuration_ids.is_empty() {
840                let configuration = if configuration_ids.len() == 1 {
841                    &self.language_configurations[configuration_ids[0]]
842                }
843                // If multiple language configurations match, then determine which
844                // one to use by applying the configurations' content regexes.
845                else {
846                    let file_contents =
847                        fs::read(path).map_err(|e| LoaderError::IO(IoError::new(e, Some(path))))?;
848                    let file_contents = String::from_utf8_lossy(&file_contents);
849                    let mut best_score = -2isize;
850                    let mut best_configuration_id = None;
851                    for configuration_id in configuration_ids {
852                        let config = &self.language_configurations[*configuration_id];
853
854                        // If the language configuration has a content regex, assign
855                        // a score based on the length of the first match.
856                        let score;
857                        if let Some(content_regex) = &config.content_regex {
858                            if let Some(mat) = content_regex.find(&file_contents) {
859                                score = (mat.end() - mat.start()) as isize;
860                            }
861                            // If the content regex does not match, then *penalize* this
862                            // language configuration, so that language configurations
863                            // without content regexes are preferred over those with
864                            // non-matching content regexes.
865                            else {
866                                score = -1;
867                            }
868                        } else {
869                            score = 0;
870                        }
871                        if score > best_score {
872                            best_configuration_id = Some(*configuration_id);
873                            best_score = score;
874                        }
875                    }
876
877                    &self.language_configurations[best_configuration_id.unwrap()]
878                };
879
880                let language = self.language_for_id(configuration.language_id)?;
881                return Ok(Some((language, configuration)));
882            }
883        }
884
885        Ok(None)
886    }
887
888    pub fn language_configuration_for_injection_string(
889        &self,
890        string: &str,
891    ) -> LoaderResult<Option<(Language, &LanguageConfiguration)>> {
892        let mut best_match_length = 0;
893        let mut best_match_position = None;
894        for (i, configuration) in self.language_configurations.iter().enumerate() {
895            if let Some(injection_regex) = &configuration.injection_regex {
896                if let Some(mat) = injection_regex.find(string) {
897                    let length = mat.end() - mat.start();
898                    if length > best_match_length {
899                        best_match_position = Some(i);
900                        best_match_length = length;
901                    }
902                }
903            }
904        }
905
906        if let Some(i) = best_match_position {
907            let configuration = &self.language_configurations[i];
908            let language = self.language_for_id(configuration.language_id)?;
909            Ok(Some((language, configuration)))
910        } else {
911            Ok(None)
912        }
913    }
914
915    pub fn language_for_configuration(
916        &self,
917        configuration: &LanguageConfiguration,
918    ) -> LoaderResult<Language> {
919        self.language_for_id(configuration.language_id)
920    }
921
922    fn language_for_id(&self, id: usize) -> LoaderResult<Language> {
923        let (path, language, externals) = &self.languages_by_id[id];
924        language
925            .get_or_try_init(|| {
926                let src_path = path.join("src");
927                self.load_language_at_path(CompileConfig::new(
928                    &src_path,
929                    externals.as_deref(),
930                    None,
931                ))
932            })
933            .cloned()
934    }
935
936    pub fn compile_parser_at_path(
937        &self,
938        grammar_path: &Path,
939        output_path: PathBuf,
940        flags: &[&str],
941    ) -> LoaderResult<()> {
942        let src_path = grammar_path.join("src");
943        let mut config = CompileConfig::new(&src_path, None, Some(output_path));
944        config.flags = flags;
945        self.load_language_at_path(config).map(|_| ())
946    }
947
948    pub fn load_language_at_path(&self, mut config: CompileConfig) -> LoaderResult<Language> {
949        let grammar_path = config.src_path.join("grammar.json");
950        config.name = Self::grammar_json_name(&grammar_path)?;
951        self.load_language_at_path_with_name(config)
952    }
953
954    pub fn load_language_at_path_with_name(
955        &self,
956        mut config: CompileConfig,
957    ) -> LoaderResult<Language> {
958        let mut lib_name = config.name.clone();
959        let language_fn_name = format!("tree_sitter_{}", config.name.replace('-', "_"));
960        if self.debug_build {
961            lib_name.push_str(".debug._");
962        }
963
964        if self.sanitize_build {
965            lib_name.push_str(".sanitize._");
966            config.sanitize = true;
967        }
968
969        if config.output_path.is_none() {
970            fs::create_dir_all(&self.parser_lib_path).map_err(|e| {
971                LoaderError::IO(IoError::new(e, Some(self.parser_lib_path.as_path())))
972            })?;
973        }
974
975        let mut recompile = self.force_rebuild || config.output_path.is_some(); // if specified, always recompile
976
977        let output_path = config.output_path.unwrap_or_else(|| {
978            let mut path = self.parser_lib_path.join(lib_name);
979            path.set_extension(env::consts::DLL_EXTENSION);
980            #[cfg(feature = "wasm")]
981            if self.wasm_store.lock().unwrap().is_some() {
982                path.set_extension("wasm");
983            }
984            path
985        });
986        config.output_path = Some(output_path.clone());
987
988        let parser_path = config.src_path.join("parser.c");
989        config.scanner_path = self.get_scanner_path(config.src_path);
990
991        let mut paths_to_check = vec![parser_path];
992
993        if let Some(scanner_path) = config.scanner_path.as_ref() {
994            paths_to_check.push(scanner_path.clone());
995        }
996
997        paths_to_check.extend(
998            config
999                .external_files
1000                .unwrap_or_default()
1001                .iter()
1002                .map(|p| config.src_path.join(p)),
1003        );
1004
1005        if !recompile {
1006            recompile = needs_recompile(&output_path, &paths_to_check)?;
1007        }
1008
1009        #[cfg(feature = "wasm")]
1010        if let Some(wasm_store) = self.wasm_store.lock().unwrap().as_mut() {
1011            if recompile {
1012                self.compile_parser_to_wasm(
1013                    &config.name,
1014                    config.src_path,
1015                    config
1016                        .scanner_path
1017                        .as_ref()
1018                        .and_then(|p| p.strip_prefix(config.src_path).ok()),
1019                    &output_path,
1020                )?;
1021            }
1022
1023            let wasm_bytes = fs::read(&output_path)
1024                .map_err(|e| LoaderError::IO(IoError::new(e, Some(output_path.as_path()))))?;
1025            return Ok(wasm_store.load_language(&config.name, &wasm_bytes)?);
1026        }
1027
1028        let lock_path = if env::var("CROSS_RUNNER").is_ok() {
1029            tempfile::tempdir()
1030                .unwrap()
1031                .path()
1032                .join("tree-sitter")
1033                .join("lock")
1034                .join(format!("{}.lock", config.name))
1035        } else {
1036            etcetera::choose_base_strategy()?
1037                .cache_dir()
1038                .join("tree-sitter")
1039                .join("lock")
1040                .join(format!("{}.lock", config.name))
1041        };
1042
1043        if let Ok(lock_file) = fs::OpenOptions::new().write(true).open(&lock_path) {
1044            recompile = false;
1045            if lock_file.try_lock_exclusive().is_err() {
1046                // if we can't acquire the lock, another process is compiling the parser, wait for
1047                // it and don't recompile
1048                lock_file
1049                    .lock_exclusive()
1050                    .map_err(|e| LoaderError::IO(IoError::new(e, Some(lock_path.as_path()))))?;
1051                recompile = false;
1052            } else {
1053                // if we can acquire the lock, check if the lock file is older than 30 seconds, a
1054                // run that was interrupted and left the lock file behind should not block
1055                // subsequent runs
1056                let time = lock_file
1057                    .metadata()
1058                    .map_err(|e| LoaderError::IO(IoError::new(e, Some(lock_path.as_path()))))?
1059                    .modified()
1060                    .map_err(|e| LoaderError::IO(IoError::new(e, Some(lock_path.as_path()))))?
1061                    .elapsed()?
1062                    .as_secs();
1063                if time > 30 {
1064                    fs::remove_file(&lock_path)
1065                        .map_err(|e| LoaderError::IO(IoError::new(e, Some(lock_path.as_path()))))?;
1066                    recompile = true;
1067                }
1068            }
1069        }
1070
1071        if recompile {
1072            let parent_path = lock_path.parent().unwrap();
1073            fs::create_dir_all(parent_path)
1074                .map_err(|e| LoaderError::IO(IoError::new(e, Some(parent_path))))?;
1075            let lock_file = fs::OpenOptions::new()
1076                .create(true)
1077                .truncate(true)
1078                .write(true)
1079                .open(&lock_path)
1080                .map_err(|e| LoaderError::IO(IoError::new(e, Some(lock_path.as_path()))))?;
1081            lock_file
1082                .lock_exclusive()
1083                .map_err(|e| LoaderError::IO(IoError::new(e, Some(lock_path.as_path()))))?;
1084
1085            self.compile_parser_to_dylib(&config, &lock_file, &lock_path)?;
1086
1087            if config.scanner_path.is_some() {
1088                self.check_external_scanner(&config.name, &output_path)?;
1089            }
1090        }
1091
1092        Self::load_language(&output_path, &language_fn_name)
1093    }
1094
1095    pub fn load_language(path: &Path, function_name: &str) -> LoaderResult<Language> {
1096        let library = unsafe { Library::new(path) }.map_err(|e| {
1097            LoaderError::Library(LibraryError {
1098                error: e,
1099                path: path.to_string_lossy().to_string(),
1100            })
1101        })?;
1102        let language = unsafe {
1103            let language_fn = library
1104                .get::<Symbol<unsafe extern "C" fn() -> Language>>(function_name.as_bytes())
1105                .map_err(|e| {
1106                    LoaderError::Symbol(SymbolError {
1107                        error: e,
1108                        symbol_name: function_name.to_string(),
1109                        path: path.to_string_lossy().to_string(),
1110                    })
1111                })?;
1112            language_fn()
1113        };
1114        mem::forget(library);
1115        Ok(language)
1116    }
1117
1118    fn compile_parser_to_dylib(
1119        &self,
1120        config: &CompileConfig,
1121        lock_file: &fs::File,
1122        lock_path: &Path,
1123    ) -> LoaderResult<()> {
1124        let mut cc_config = cc::Build::new();
1125        cc_config
1126            .cargo_metadata(false)
1127            .cargo_warnings(false)
1128            .target(BUILD_TARGET)
1129            // BUILD_TARGET from the build environment becomes a runtime host for cc.
1130            // Otherwise, when cross compiled, cc will keep looking for a cross-compiler
1131            // on the target system instead of the native compiler.
1132            .host(BUILD_TARGET)
1133            .debug(self.debug_build)
1134            .file(&config.parser_path)
1135            .includes(&config.header_paths)
1136            .std("c11");
1137
1138        if let Some(scanner_path) = config.scanner_path.as_ref() {
1139            cc_config.file(scanner_path);
1140        }
1141
1142        if self.debug_build {
1143            cc_config.opt_level(0).extra_warnings(true);
1144        } else {
1145            cc_config.opt_level(2).extra_warnings(false);
1146        }
1147
1148        for flag in config.flags {
1149            cc_config.define(flag, None);
1150        }
1151
1152        let compiler = cc_config.get_compiler();
1153        let mut command = Command::new(compiler.path());
1154        command.args(compiler.args());
1155        for (key, value) in compiler.env() {
1156            command.env(key, value);
1157        }
1158
1159        let output_path = config.output_path.as_ref().unwrap();
1160
1161        let temp_dir = if compiler.is_like_msvc() {
1162            let out = format!("-out:{}", output_path.to_str().unwrap());
1163            command.arg(if self.debug_build { "-LDd" } else { "-LD" });
1164            command.arg("-utf-8");
1165
1166            // Windows creates intermediate files when compiling (.exp, .lib, .obj), which causes
1167            // issues when multiple processes are compiling in the same directory. This creates a
1168            // temporary directory for those files to go into, which is deleted after compilation.
1169            let temp_dir = output_path.parent().unwrap().join(format!(
1170                "tmp_{}_{:?}",
1171                std::process::id(),
1172                std::thread::current().id()
1173            ));
1174            std::fs::create_dir_all(&temp_dir).unwrap();
1175
1176            command.arg(format!("/Fo{}\\", temp_dir.display()));
1177            command.args(cc_config.get_files());
1178            command.arg("-link").arg(out);
1179            command.arg(format!("/IMPLIB:{}.lib", temp_dir.join("temp").display()));
1180
1181            Some(temp_dir)
1182        } else {
1183            command.arg("-Werror=implicit-function-declaration");
1184            if cfg!(any(target_os = "macos", target_os = "ios")) {
1185                command.arg("-dynamiclib");
1186                // TODO: remove when supported
1187                command.arg("-UTREE_SITTER_REUSE_ALLOCATOR");
1188            } else {
1189                command.arg("-shared");
1190            }
1191            command.args(cc_config.get_files());
1192            command.arg("-o").arg(output_path);
1193
1194            None
1195        };
1196
1197        let output = command.output().map_err(|e| {
1198            LoaderError::Compiler(CompilerError {
1199                error: e,
1200                command: Box::new(command),
1201            })
1202        })?;
1203
1204        if let Some(temp_dir) = temp_dir {
1205            let _ = fs::remove_dir_all(temp_dir);
1206        }
1207
1208        FileExt::unlock(lock_file)
1209            .map_err(|e| LoaderError::IO(IoError::new(e, Some(lock_path))))?;
1210        fs::remove_file(lock_path)
1211            .map_err(|e| LoaderError::IO(IoError::new(e, Some(lock_path))))?;
1212
1213        if output.status.success() {
1214            Ok(())
1215        } else {
1216            Err(LoaderError::Compilation(
1217                String::from_utf8_lossy(&output.stdout).to_string(),
1218                String::from_utf8_lossy(&output.stderr).to_string(),
1219            ))
1220        }
1221    }
1222
1223    #[cfg(unix)]
1224    fn check_external_scanner(&self, name: &str, library_path: &Path) -> LoaderResult<()> {
1225        let prefix = if cfg!(any(target_os = "macos", target_os = "ios")) {
1226            "_"
1227        } else {
1228            ""
1229        };
1230        let section = if cfg!(all(target_arch = "powerpc64", target_os = "linux")) {
1231            " D "
1232        } else {
1233            " T "
1234        };
1235        let mut must_have = vec![
1236            format!("{prefix}tree_sitter_{name}_external_scanner_create"),
1237            format!("{prefix}tree_sitter_{name}_external_scanner_destroy"),
1238            format!("{prefix}tree_sitter_{name}_external_scanner_serialize"),
1239            format!("{prefix}tree_sitter_{name}_external_scanner_deserialize"),
1240            format!("{prefix}tree_sitter_{name}_external_scanner_scan"),
1241        ];
1242
1243        let nm_cmd = env::var("NM").unwrap_or_else(|_| "nm".to_owned());
1244        let command = Command::new(nm_cmd)
1245            .arg("--defined-only")
1246            .arg(library_path)
1247            .output();
1248        if let Ok(output) = command {
1249            if output.status.success() {
1250                let mut found_non_static = false;
1251                for line in String::from_utf8_lossy(&output.stdout).lines() {
1252                    if line.contains(section) {
1253                        if let Some(function_name) =
1254                            line.split_whitespace().collect::<Vec<_>>().get(2)
1255                        {
1256                            if !line.contains("tree_sitter_") {
1257                                if !found_non_static {
1258                                    found_non_static = true;
1259                                    warn!("Found non-static non-tree-sitter functions in the external scanner");
1260                                }
1261                                warn!("  `{function_name}`");
1262                            } else {
1263                                must_have.retain(|f| f != function_name);
1264                            }
1265                        }
1266                    }
1267                }
1268                if found_non_static {
1269                    warn!(concat!(
1270                        "Consider making these functions static, they can cause conflicts ",
1271                        "when another tree-sitter project uses the same function name."
1272                    ));
1273                }
1274
1275                if !must_have.is_empty() {
1276                    return Err(LoaderError::ScannerSymbols(ScannerSymbolError {
1277                        missing: must_have,
1278                    }));
1279                }
1280            }
1281        }
1282
1283        Ok(())
1284    }
1285
1286    #[cfg(windows)]
1287    fn check_external_scanner(&self, _name: &str, _library_path: &Path) -> LoaderResult<()> {
1288        // TODO: there's no nm command on windows, whoever wants to implement this can and should :)
1289
1290        // let mut must_have = vec![
1291        //     format!("tree_sitter_{name}_external_scanner_create"),
1292        //     format!("tree_sitter_{name}_external_scanner_destroy"),
1293        //     format!("tree_sitter_{name}_external_scanner_serialize"),
1294        //     format!("tree_sitter_{name}_external_scanner_deserialize"),
1295        //     format!("tree_sitter_{name}_external_scanner_scan"),
1296        // ];
1297
1298        Ok(())
1299    }
1300
1301    pub fn compile_parser_to_wasm(
1302        &self,
1303        language_name: &str,
1304        src_path: &Path,
1305        scanner_filename: Option<&Path>,
1306        output_path: &Path,
1307    ) -> LoaderResult<()> {
1308        let clang_executable = self.ensure_wasi_sdk_exists()?;
1309
1310        let mut command = Command::new(&clang_executable);
1311        command.current_dir(src_path).args([
1312            "-o",
1313            output_path.to_str().unwrap(),
1314            "-fPIC",
1315            "-shared",
1316            if self.debug_build { "-g" } else { "-Os" },
1317            format!("-Wl,--export=tree_sitter_{language_name}").as_str(),
1318            "-Wl,--allow-undefined",
1319            "-Wl,--no-entry",
1320            "-nostdlib",
1321            "-fno-exceptions",
1322            "-fvisibility=hidden",
1323            "-I",
1324            ".",
1325            "parser.c",
1326        ]);
1327
1328        if let Some(scanner_filename) = scanner_filename {
1329            command.arg(scanner_filename);
1330        }
1331
1332        let output = command.output().map_err(LoaderError::WasmCompiler)?;
1333
1334        if !output.status.success() {
1335            return Err(LoaderError::WasmCompilation(
1336                String::from_utf8_lossy(&output.stderr).to_string(),
1337            ));
1338        }
1339
1340        Ok(())
1341    }
1342
1343    /// Extracts a tar.gz archive with `tar`, stripping the first path component.
1344    fn extract_tar_gz_with_strip(
1345        &self,
1346        archive_path: &Path,
1347        destination: &Path,
1348    ) -> LoaderResult<()> {
1349        let status = Command::new("tar")
1350            .arg("-xzf")
1351            .arg(archive_path)
1352            .arg("--strip-components=1")
1353            .arg("-C")
1354            .arg(destination)
1355            .status()
1356            .map_err(|e| LoaderError::Tar(archive_path.to_string_lossy().to_string(), e))?;
1357
1358        if !status.success() {
1359            return Err(LoaderError::Extraction(
1360                archive_path.to_string_lossy().to_string(),
1361                destination.to_string_lossy().to_string(),
1362            ));
1363        }
1364
1365        Ok(())
1366    }
1367
1368    /// This ensures that the wasi-sdk is available, downloading and extracting it if necessary,
1369    /// and returns the path to the `clang` executable.
1370    ///
1371    /// If `TREE_SITTER_WASI_SDK_PATH` is set, it will use that path to look for the clang executable.
1372    fn ensure_wasi_sdk_exists(&self) -> LoaderResult<PathBuf> {
1373        let possible_executables = if cfg!(windows) {
1374            vec![
1375                "clang.exe",
1376                "wasm32-unknown-wasi-clang.exe",
1377                "wasm32-wasi-clang.exe",
1378            ]
1379        } else {
1380            vec!["clang", "wasm32-unknown-wasi-clang", "wasm32-wasi-clang"]
1381        };
1382
1383        if let Ok(wasi_sdk_path) = std::env::var("TREE_SITTER_WASI_SDK_PATH") {
1384            let wasi_sdk_dir = PathBuf::from(wasi_sdk_path);
1385
1386            for exe in &possible_executables {
1387                let clang_exe = wasi_sdk_dir.join("bin").join(exe);
1388                if clang_exe.exists() {
1389                    return Ok(clang_exe);
1390                }
1391            }
1392
1393            return Err(LoaderError::WasiSDKClang(WasiSDKClangError {
1394                wasi_sdk_dir: wasi_sdk_dir.to_string_lossy().to_string(),
1395                possible_executables,
1396                download: false,
1397            }));
1398        }
1399
1400        let cache_dir = etcetera::choose_base_strategy()?
1401            .cache_dir()
1402            .join("tree-sitter");
1403        fs::create_dir_all(&cache_dir)
1404            .map_err(|e| LoaderError::IO(IoError::new(e, Some(cache_dir.as_path()))))?;
1405
1406        let wasi_sdk_dir = cache_dir.join("wasi-sdk");
1407
1408        for exe in &possible_executables {
1409            let clang_exe = wasi_sdk_dir.join("bin").join(exe);
1410            if clang_exe.exists() {
1411                return Ok(clang_exe);
1412            }
1413        }
1414
1415        fs::create_dir_all(&wasi_sdk_dir)
1416            .map_err(|e| LoaderError::IO(IoError::new(e, Some(wasi_sdk_dir.as_path()))))?;
1417
1418        let arch_os = if cfg!(target_os = "macos") {
1419            if cfg!(target_arch = "aarch64") {
1420                "arm64-macos"
1421            } else {
1422                "x86_64-macos"
1423            }
1424        } else if cfg!(target_os = "windows") {
1425            if cfg!(target_arch = "aarch64") {
1426                "arm64-windows"
1427            } else {
1428                "x86_64-windows"
1429            }
1430        } else if cfg!(target_os = "linux") {
1431            if cfg!(target_arch = "aarch64") {
1432                "arm64-linux"
1433            } else {
1434                "x86_64-linux"
1435            }
1436        } else {
1437            return Err(LoaderError::WasiSDKPlatform);
1438        };
1439
1440        let sdk_filename = format!("wasi-sdk-{WASI_SDK_VERSION}-{arch_os}.tar.gz");
1441        let wasi_sdk_major_version = WASI_SDK_VERSION
1442            .trim_end_matches(char::is_numeric) // trim minor version...
1443            .trim_end_matches('.'); // ...and '.' separator
1444        let sdk_url = format!(
1445            "https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-{wasi_sdk_major_version}/{sdk_filename}",
1446        );
1447
1448        info!("Downloading wasi-sdk from {sdk_url}...");
1449        let temp_tar_path = cache_dir.join(sdk_filename);
1450
1451        let status = Command::new("curl")
1452            .arg("-f")
1453            .arg("-L")
1454            .arg("-o")
1455            .arg(&temp_tar_path)
1456            .arg(&sdk_url)
1457            .status()
1458            .map_err(|e| LoaderError::Curl(sdk_url.clone(), e))?;
1459
1460        if !status.success() {
1461            return Err(LoaderError::WasiSDKDownload(sdk_url));
1462        }
1463
1464        info!("Extracting wasi-sdk to {}...", wasi_sdk_dir.display());
1465        self.extract_tar_gz_with_strip(&temp_tar_path, &wasi_sdk_dir)?;
1466
1467        fs::remove_file(temp_tar_path).ok();
1468        for exe in &possible_executables {
1469            let clang_exe = wasi_sdk_dir.join("bin").join(exe);
1470            if clang_exe.exists() {
1471                return Ok(clang_exe);
1472            }
1473        }
1474
1475        Err(LoaderError::WasiSDKClang(WasiSDKClangError {
1476            wasi_sdk_dir: wasi_sdk_dir.to_string_lossy().to_string(),
1477            possible_executables,
1478            download: true,
1479        }))
1480    }
1481
1482    #[must_use]
1483    #[cfg(feature = "tree-sitter-highlight")]
1484    pub fn highlight_config_for_injection_string<'a>(
1485        &'a self,
1486        string: &str,
1487    ) -> Option<&'a HighlightConfiguration> {
1488        match self.language_configuration_for_injection_string(string) {
1489            Err(e) => {
1490                error!("Failed to load language for injection string '{string}': {e}",);
1491                None
1492            }
1493            Ok(None) => None,
1494            Ok(Some((language, configuration))) => {
1495                match configuration.highlight_config(language, None) {
1496                    Err(e) => {
1497                        error!(
1498                            "Failed to load higlight config for injection string '{string}': {e}"
1499                        );
1500                        None
1501                    }
1502                    Ok(None) => None,
1503                    Ok(Some(config)) => Some(config),
1504                }
1505            }
1506        }
1507    }
1508
1509    #[must_use]
1510    pub fn get_language_configuration_in_current_path(&self) -> Option<&LanguageConfiguration> {
1511        self.language_configuration_in_current_path
1512            .map(|i| &self.language_configurations[i])
1513    }
1514
1515    pub fn find_language_configurations_at_path(
1516        &mut self,
1517        parser_path: &Path,
1518        set_current_path_config: bool,
1519    ) -> LoaderResult<&[LanguageConfiguration]> {
1520        let initial_language_configuration_count = self.language_configurations.len();
1521
1522        match TreeSitterJSON::from_file(parser_path) {
1523            Ok(config) => {
1524                let language_count = self.languages_by_id.len();
1525                for grammar in config.grammars {
1526                    // Determine the path to the parser directory. This can be specified in
1527                    // the tree-sitter.json, but defaults to the directory containing the
1528                    // tree-sitter.json.
1529                    let language_path =
1530                        parser_path.join(grammar.path.unwrap_or(PathBuf::from(".")));
1531
1532                    // Determine if a previous language configuration in this package.json file
1533                    // already uses the same language.
1534                    let mut language_id = None;
1535                    for (id, (path, _, _)) in
1536                        self.languages_by_id.iter().enumerate().skip(language_count)
1537                    {
1538                        if language_path == *path {
1539                            language_id = Some(id);
1540                        }
1541                    }
1542
1543                    // If not, add a new language path to the list.
1544                    let language_id = if let Some(language_id) = language_id {
1545                        language_id
1546                    } else {
1547                        self.languages_by_id.push((
1548                            language_path,
1549                            OnceCell::new(),
1550                            grammar
1551                                .external_files
1552                                .clone()
1553                                .into_vec()
1554                                .map(|files| {
1555                                    files
1556                                        .into_iter()
1557                                        .map(|path| {
1558                                            let path = parser_path.join(path);
1559                                            // prevent p being above/outside of parser_path
1560                                            if path.starts_with(parser_path) {
1561                                                Ok(path)
1562                                            } else {
1563                                                Err(LoaderError::ExternalFile(
1564                                                    path.to_string_lossy().to_string(),
1565                                                    parser_path.to_string_lossy().to_string(),
1566                                                ))
1567                                            }
1568                                        })
1569                                        .collect::<LoaderResult<Vec<_>>>()
1570                                })
1571                                .transpose()?,
1572                        ));
1573                        self.languages_by_id.len() - 1
1574                    };
1575
1576                    let configuration = LanguageConfiguration {
1577                        root_path: parser_path.to_path_buf(),
1578                        language_name: grammar.name,
1579                        scope: Some(grammar.scope),
1580                        language_id,
1581                        file_types: grammar.file_types.unwrap_or_default(),
1582                        content_regex: Self::regex(grammar.content_regex.as_deref()),
1583                        first_line_regex: Self::regex(grammar.first_line_regex.as_deref()),
1584                        injection_regex: Self::regex(grammar.injection_regex.as_deref()),
1585                        injections_filenames: grammar.injections.into_vec(),
1586                        locals_filenames: grammar.locals.into_vec(),
1587                        tags_filenames: grammar.tags.into_vec(),
1588                        highlights_filenames: grammar.highlights.into_vec(),
1589                        #[cfg(feature = "tree-sitter-highlight")]
1590                        highlight_config: OnceCell::new(),
1591                        #[cfg(feature = "tree-sitter-tags")]
1592                        tags_config: OnceCell::new(),
1593                        #[cfg(feature = "tree-sitter-highlight")]
1594                        highlight_names: &self.highlight_names,
1595                        #[cfg(feature = "tree-sitter-highlight")]
1596                        use_all_highlight_names: self.use_all_highlight_names,
1597                        _phantom: PhantomData,
1598                    };
1599
1600                    for file_type in &configuration.file_types {
1601                        self.language_configuration_ids_by_file_type
1602                            .entry(file_type.clone())
1603                            .or_default()
1604                            .push(self.language_configurations.len());
1605                    }
1606                    if let Some(first_line_regex) = &configuration.first_line_regex {
1607                        self.language_configuration_ids_by_first_line_regex
1608                            .entry(first_line_regex.to_string())
1609                            .or_default()
1610                            .push(self.language_configurations.len());
1611                    }
1612
1613                    self.language_configurations.push(unsafe {
1614                        mem::transmute::<LanguageConfiguration<'_>, LanguageConfiguration<'static>>(
1615                            configuration,
1616                        )
1617                    });
1618
1619                    if set_current_path_config
1620                        && self.language_configuration_in_current_path.is_none()
1621                    {
1622                        self.language_configuration_in_current_path =
1623                            Some(self.language_configurations.len() - 1);
1624                    }
1625                }
1626            }
1627            Err(LoaderError::Serialization(e)) => {
1628                warn!(
1629                    "Failed to parse {} -- {e}",
1630                    parser_path.join("tree-sitter.json").display()
1631                );
1632            }
1633            _ => {}
1634        }
1635
1636        // If we didn't find any language configurations in the tree-sitter.json file,
1637        // but there is a grammar.json file, then use the grammar file to form a simple
1638        // language configuration.
1639        if self.language_configurations.len() == initial_language_configuration_count
1640            && parser_path.join("src").join("grammar.json").exists()
1641        {
1642            let grammar_path = parser_path.join("src").join("grammar.json");
1643            let language_name = Self::grammar_json_name(&grammar_path)?;
1644            let configuration = LanguageConfiguration {
1645                root_path: parser_path.to_owned(),
1646                language_name,
1647                language_id: self.languages_by_id.len(),
1648                file_types: Vec::new(),
1649                scope: None,
1650                content_regex: None,
1651                first_line_regex: None,
1652                injection_regex: None,
1653                injections_filenames: None,
1654                locals_filenames: None,
1655                highlights_filenames: None,
1656                tags_filenames: None,
1657                #[cfg(feature = "tree-sitter-highlight")]
1658                highlight_config: OnceCell::new(),
1659                #[cfg(feature = "tree-sitter-tags")]
1660                tags_config: OnceCell::new(),
1661                #[cfg(feature = "tree-sitter-highlight")]
1662                highlight_names: &self.highlight_names,
1663                #[cfg(feature = "tree-sitter-highlight")]
1664                use_all_highlight_names: self.use_all_highlight_names,
1665                _phantom: PhantomData,
1666            };
1667            self.language_configurations.push(unsafe {
1668                mem::transmute::<LanguageConfiguration<'_>, LanguageConfiguration<'static>>(
1669                    configuration,
1670                )
1671            });
1672            self.languages_by_id
1673                .push((parser_path.to_owned(), OnceCell::new(), None));
1674        }
1675
1676        Ok(&self.language_configurations[initial_language_configuration_count..])
1677    }
1678
1679    fn regex(pattern: Option<&str>) -> Option<Regex> {
1680        pattern.and_then(|r| RegexBuilder::new(r).multi_line(true).build().ok())
1681    }
1682
1683    fn grammar_json_name(grammar_path: &Path) -> LoaderResult<String> {
1684        let file = fs::File::open(grammar_path)
1685            .map_err(|e| LoaderError::IO(IoError::new(e, Some(grammar_path))))?;
1686
1687        let first_three_lines = BufReader::new(file)
1688            .lines()
1689            .take(3)
1690            .collect::<Result<Vec<_>, std::io::Error>>()
1691            .map_err(|_| LoaderError::GrammarJSON(grammar_path.to_string_lossy().to_string()))?
1692            .join("\n");
1693
1694        let name = GRAMMAR_NAME_REGEX
1695            .captures(&first_three_lines)
1696            .and_then(|c| c.get(1))
1697            .ok_or_else(|| LoaderError::GrammarJSON(grammar_path.to_string_lossy().to_string()))?;
1698
1699        Ok(name.as_str().to_string())
1700    }
1701
1702    pub fn select_language(
1703        &mut self,
1704        path: Option<&Path>,
1705        current_dir: &Path,
1706        scope: Option<&str>,
1707        // path to dynamic library, name of language
1708        lib_info: Option<&(PathBuf, &str)>,
1709    ) -> LoaderResult<Language> {
1710        if let Some((ref lib_path, language_name)) = lib_info {
1711            let language_fn_name = format!("tree_sitter_{}", language_name.replace('-', "_"));
1712            Self::load_language(lib_path, &language_fn_name)
1713        } else if let Some(scope) = scope {
1714            if let Some(config) = self
1715                .language_configuration_for_scope(scope)
1716                .map_err(|e| LoaderError::ScopeLoad(scope.to_string(), Box::new(e)))?
1717            {
1718                Ok(config.0)
1719            } else {
1720                Err(LoaderError::UnknownScope(scope.to_string()))
1721            }
1722        } else if let Some((lang, _)) = if let Some(path) = path {
1723            self.language_configuration_for_file_name(path)
1724                .map_err(|e| {
1725                    LoaderError::FileNameLoad(
1726                        path.file_name().unwrap().to_string_lossy().to_string(),
1727                        Box::new(e),
1728                    )
1729                })?
1730        } else {
1731            None
1732        } {
1733            Ok(lang)
1734        } else if let Some(id) = self.language_configuration_in_current_path {
1735            Ok(self.language_for_id(self.language_configurations[id].language_id)?)
1736        } else if let Some(lang) = self
1737            .languages_at_path(current_dir)
1738            .map_err(|e| LoaderError::CurrentDirectoryLoad(Box::new(e)))?
1739            .first()
1740            .cloned()
1741        {
1742            Ok(lang.0)
1743        } else if let Some(lang) = if let Some(path) = path {
1744            self.language_configuration_for_first_line_regex(path)?
1745        } else {
1746            None
1747        } {
1748            Ok(lang.0)
1749        } else {
1750            Err(LoaderError::NoLanguage)
1751        }
1752    }
1753
1754    pub const fn debug_build(&mut self, flag: bool) {
1755        self.debug_build = flag;
1756    }
1757
1758    pub const fn sanitize_build(&mut self, flag: bool) {
1759        self.sanitize_build = flag;
1760    }
1761
1762    pub const fn force_rebuild(&mut self, rebuild: bool) {
1763        self.force_rebuild = rebuild;
1764    }
1765
1766    #[cfg(feature = "wasm")]
1767    #[cfg_attr(docsrs, doc(cfg(feature = "wasm")))]
1768    pub fn use_wasm(&mut self, engine: &tree_sitter::wasmtime::Engine) {
1769        *self.wasm_store.lock().unwrap() = Some(tree_sitter::WasmStore::new(engine).unwrap());
1770    }
1771
1772    #[must_use]
1773    pub fn get_scanner_path(&self, src_path: &Path) -> Option<PathBuf> {
1774        let path = src_path.join("scanner.c");
1775        path.exists().then_some(path)
1776    }
1777}
1778
1779impl LanguageConfiguration<'_> {
1780    #[cfg(feature = "tree-sitter-highlight")]
1781    pub fn highlight_config(
1782        &self,
1783        language: Language,
1784        paths: Option<&[PathBuf]>,
1785    ) -> LoaderResult<Option<&HighlightConfiguration>> {
1786        let (highlights_filenames, injections_filenames, locals_filenames) = match paths {
1787            Some(paths) => (
1788                Some(
1789                    paths
1790                        .iter()
1791                        .filter(|p| p.ends_with(DEFAULT_HIGHLIGHTS_QUERY_FILE_NAME))
1792                        .cloned()
1793                        .collect::<Vec<_>>(),
1794                ),
1795                Some(
1796                    paths
1797                        .iter()
1798                        .filter(|p| p.ends_with(DEFAULT_TAGS_QUERY_FILE_NAME))
1799                        .cloned()
1800                        .collect::<Vec<_>>(),
1801                ),
1802                Some(
1803                    paths
1804                        .iter()
1805                        .filter(|p| p.ends_with(DEFAULT_LOCALS_QUERY_FILE_NAME))
1806                        .cloned()
1807                        .collect::<Vec<_>>(),
1808                ),
1809            ),
1810            None => (None, None, None),
1811        };
1812        self.highlight_config
1813            .get_or_try_init(|| {
1814                let (highlights_query, highlight_ranges) = self.read_queries(
1815                    if highlights_filenames.is_some() {
1816                        highlights_filenames.as_deref()
1817                    } else {
1818                        self.highlights_filenames.as_deref()
1819                    },
1820                    DEFAULT_HIGHLIGHTS_QUERY_FILE_NAME,
1821                )?;
1822                let (injections_query, injection_ranges) = self.read_queries(
1823                    if injections_filenames.is_some() {
1824                        injections_filenames.as_deref()
1825                    } else {
1826                        self.injections_filenames.as_deref()
1827                    },
1828                    DEFAULT_INJECTIONS_QUERY_FILE_NAME,
1829                )?;
1830                let (locals_query, locals_ranges) = self.read_queries(
1831                    if locals_filenames.is_some() {
1832                        locals_filenames.as_deref()
1833                    } else {
1834                        self.locals_filenames.as_deref()
1835                    },
1836                    DEFAULT_LOCALS_QUERY_FILE_NAME,
1837                )?;
1838
1839                if highlights_query.is_empty() {
1840                    Ok(None)
1841                } else {
1842                    let mut result = HighlightConfiguration::new(
1843                        language,
1844                        &self.language_name,
1845                        &highlights_query,
1846                        &injections_query,
1847                        &locals_query,
1848                    )
1849                    .map_err(|error| match error.kind {
1850                        QueryErrorKind::Language => {
1851                            LoaderError::Query(LoaderQueryError { error, file: None })
1852                        }
1853                        _ => {
1854                            if error.offset < injections_query.len() {
1855                                Self::include_path_in_query_error(
1856                                    error,
1857                                    &injection_ranges,
1858                                    &injections_query,
1859                                    0,
1860                                )
1861                            } else if error.offset < injections_query.len() + locals_query.len() {
1862                                Self::include_path_in_query_error(
1863                                    error,
1864                                    &locals_ranges,
1865                                    &locals_query,
1866                                    injections_query.len(),
1867                                )
1868                            } else {
1869                                Self::include_path_in_query_error(
1870                                    error,
1871                                    &highlight_ranges,
1872                                    &highlights_query,
1873                                    injections_query.len() + locals_query.len(),
1874                                )
1875                            }
1876                        }
1877                    })?;
1878                    let mut all_highlight_names = self.highlight_names.lock().unwrap();
1879                    if self.use_all_highlight_names {
1880                        for capture_name in result.query.capture_names() {
1881                            if !all_highlight_names.iter().any(|x| x == capture_name) {
1882                                all_highlight_names.push((*capture_name).to_string());
1883                            }
1884                        }
1885                    }
1886                    result.configure(all_highlight_names.as_slice());
1887                    drop(all_highlight_names);
1888                    Ok(Some(result))
1889                }
1890            })
1891            .map(Option::as_ref)
1892    }
1893
1894    #[cfg(feature = "tree-sitter-tags")]
1895    pub fn tags_config(&self, language: Language) -> LoaderResult<Option<&TagsConfiguration>> {
1896        self.tags_config
1897            .get_or_try_init(|| {
1898                let (tags_query, tags_ranges) = self
1899                    .read_queries(self.tags_filenames.as_deref(), DEFAULT_TAGS_QUERY_FILE_NAME)?;
1900                let (locals_query, locals_ranges) = self.read_queries(
1901                    self.locals_filenames.as_deref(),
1902                    DEFAULT_LOCALS_QUERY_FILE_NAME,
1903                )?;
1904                if tags_query.is_empty() {
1905                    Ok(None)
1906                } else {
1907                    TagsConfiguration::new(language, &tags_query, &locals_query)
1908                        .map(Some)
1909                        .map_err(|error| {
1910                            if let TagsError::Query(error) = error {
1911                                if error.offset < locals_query.len() {
1912                                    Self::include_path_in_query_error(
1913                                        error,
1914                                        &locals_ranges,
1915                                        &locals_query,
1916                                        0,
1917                                    )
1918                                } else {
1919                                    Self::include_path_in_query_error(
1920                                        error,
1921                                        &tags_ranges,
1922                                        &tags_query,
1923                                        locals_query.len(),
1924                                    )
1925                                }
1926                            } else {
1927                                error.into()
1928                            }
1929                        })
1930                }
1931            })
1932            .map(Option::as_ref)
1933    }
1934
1935    #[cfg(any(feature = "tree-sitter-highlight", feature = "tree-sitter-tags"))]
1936    fn include_path_in_query_error(
1937        mut error: QueryError,
1938        ranges: &[(PathBuf, Range<usize>)],
1939        source: &str,
1940        start_offset: usize,
1941    ) -> LoaderError {
1942        let offset_within_section = error.offset - start_offset;
1943        let (path, range) = ranges
1944            .iter()
1945            .find(|(_, range)| range.contains(&offset_within_section))
1946            .unwrap_or_else(|| ranges.last().unwrap());
1947        error.offset = offset_within_section - range.start;
1948        error.row = source[range.start..offset_within_section]
1949            .matches('\n')
1950            .count();
1951        LoaderError::Query(LoaderQueryError {
1952            error,
1953            file: Some(path.to_string_lossy().to_string()),
1954        })
1955    }
1956
1957    #[allow(clippy::type_complexity)]
1958    #[cfg(any(feature = "tree-sitter-highlight", feature = "tree-sitter-tags"))]
1959    fn read_queries(
1960        &self,
1961        paths: Option<&[PathBuf]>,
1962        default_path: &str,
1963    ) -> LoaderResult<(String, Vec<(PathBuf, Range<usize>)>)> {
1964        let mut query = String::new();
1965        let mut path_ranges = Vec::new();
1966        if let Some(paths) = paths {
1967            for path in paths {
1968                let abs_path = self.root_path.join(path);
1969                let prev_query_len = query.len();
1970                query += &fs::read_to_string(&abs_path)
1971                    .map_err(|e| LoaderError::IO(IoError::new(e, Some(abs_path.as_path()))))?;
1972                path_ranges.push((path.clone(), prev_query_len..query.len()));
1973            }
1974        } else {
1975            // highlights.scm is needed to test highlights, and tags.scm to test tags
1976            if default_path == DEFAULT_HIGHLIGHTS_QUERY_FILE_NAME
1977                || default_path == DEFAULT_TAGS_QUERY_FILE_NAME
1978            {
1979                warn!(
1980                    concat!(
1981                        "You should add a `{}` entry pointing to the {} path in the `tree-sitter` ",
1982                        "object in the grammar's tree-sitter.json file. See more here: ",
1983                        "https://tree-sitter.github.io/tree-sitter/3-syntax-highlighting#query-paths"
1984                    ),
1985                    default_path.replace(".scm", ""),
1986                    default_path
1987                );
1988            }
1989            let queries_path = self.root_path.join("queries");
1990            let path = queries_path.join(default_path);
1991            if path.exists() {
1992                query = fs::read_to_string(&path)
1993                    .map_err(|e| LoaderError::IO(IoError::new(e, Some(path.as_path()))))?;
1994                path_ranges.push((PathBuf::from(default_path), 0..query.len()));
1995            }
1996        }
1997
1998        Ok((query, path_ranges))
1999    }
2000}
2001
2002fn needs_recompile(lib_path: &Path, paths_to_check: &[PathBuf]) -> LoaderResult<bool> {
2003    if !lib_path.exists() {
2004        return Ok(true);
2005    }
2006    let lib_mtime = mtime(lib_path).map_err(|e| LoaderError::ModifiedTime(Box::new(e)))?;
2007    for path in paths_to_check {
2008        if mtime(path).map_err(|e| LoaderError::ModifiedTime(Box::new(e)))? > lib_mtime {
2009            return Ok(true);
2010        }
2011    }
2012    Ok(false)
2013}
2014
2015fn mtime(path: &Path) -> LoaderResult<SystemTime> {
2016    fs::metadata(path)
2017        .map_err(|e| LoaderError::IO(IoError::new(e, Some(path))))?
2018        .modified()
2019        .map_err(|e| LoaderError::IO(IoError::new(e, Some(path))))
2020}