Skip to main content

tree_sitter_loader/
loader.rs

1#![cfg_attr(not(any(test, doctest)), doc = include_str!("../README.md"))]
2#![cfg_attr(docsrs, feature(doc_cfg))]
3
4#[cfg(unix)]
5use std::fmt::Write as _;
6#[cfg(any(feature = "tree-sitter-highlight", feature = "tree-sitter-tags"))]
7use std::ops::Range;
8#[cfg(feature = "tree-sitter-highlight")]
9use std::sync::Mutex;
10use std::{
11    collections::HashMap,
12    env, fs,
13    hash::{Hash as _, Hasher as _},
14    io::{BufRead, BufReader},
15    marker::PhantomData,
16    mem,
17    path::{Path, PathBuf},
18    process::Command,
19    sync::LazyLock,
20    time::{SystemTime, SystemTimeError},
21};
22
23use etcetera::BaseStrategy as _;
24use fs4::fs_std::FileExt;
25use libloading::{Library, Symbol};
26use log::{error, info, warn};
27use once_cell::unsync::OnceCell;
28use regex::{Regex, RegexBuilder};
29use semver::Version;
30use serde::{Deserialize, Deserializer, Serialize};
31use thiserror::Error;
32use tree_sitter::Language;
33#[cfg(any(feature = "tree-sitter-highlight", feature = "tree-sitter-tags"))]
34use tree_sitter::QueryError;
35#[cfg(feature = "tree-sitter-highlight")]
36use tree_sitter::QueryErrorKind;
37#[cfg(feature = "wasm")]
38use tree_sitter::WasmError;
39#[cfg(feature = "tree-sitter-highlight")]
40use tree_sitter_highlight::HighlightConfiguration;
41#[cfg(feature = "tree-sitter-tags")]
42use tree_sitter_tags::{Error as TagsError, TagsConfiguration};
43
44static GRAMMAR_NAME_REGEX: LazyLock<Regex> =
45    LazyLock::new(|| Regex::new(r#""name":\s*"(.*?)""#).unwrap());
46
47const WASI_SDK_VERSION: &str = include_str!("../wasi-sdk-version").trim_ascii();
48
49pub type LoaderResult<T> = Result<T, LoaderError>;
50
51#[derive(Debug, Error)]
52pub enum LoaderError {
53    #[error(transparent)]
54    Compiler(CompilerError),
55    #[error("Parser compilation failed.\nStdout: {0}\nStderr: {1}")]
56    Compilation(String, String),
57    #[error("Failed to execute curl for {0} -- {1}")]
58    Curl(String, std::io::Error),
59    #[error("Failed to load language in current directory:\n{0}")]
60    CurrentDirectoryLoad(Box<Self>),
61    #[error("External file path {0} is outside of parser directory {1}")]
62    ExternalFile(String, String),
63    #[error("Failed to extract archive {0} to {1}")]
64    Extraction(String, String),
65    #[error("Failed to load language for file name {0}:\n{1}")]
66    FileNameLoad(String, Box<Self>),
67    #[error("Failed to parse the language name from grammar.json at {0}")]
68    GrammarJSON(String),
69    #[error(transparent)]
70    HomeDir(#[from] etcetera::HomeDirError),
71    #[error(transparent)]
72    IO(IoError),
73    #[error(transparent)]
74    Library(LibraryError),
75    #[error("Failed to compare binary and source timestamps:\n{0}")]
76    ModifiedTime(Box<Self>),
77    #[error("No language found")]
78    NoLanguage,
79    #[error(transparent)]
80    Query(LoaderQueryError),
81    #[error("Failed to load language for scope '{0}':\n{1}")]
82    ScopeLoad(String, Box<Self>),
83    #[error(transparent)]
84    Serialization(#[from] serde_json::Error),
85    #[error(transparent)]
86    Symbol(SymbolError),
87    #[error(transparent)]
88    Tags(#[from] TagsError),
89    #[error("Failed to execute tar for {0} -- {1}")]
90    Tar(String, std::io::Error),
91    #[error(transparent)]
92    Time(#[from] SystemTimeError),
93    #[error("Unknown scope '{0}'")]
94    UnknownScope(String),
95    #[error("Failed to download wasi-sdk from {0}")]
96    WasiSDKDownload(String),
97    #[error(transparent)]
98    WasiSDKClang(#[from] WasiSDKClangError),
99    #[error("Unsupported platform for wasi-sdk")]
100    WasiSDKPlatform,
101    #[cfg(feature = "wasm")]
102    #[error(transparent)]
103    Wasm(#[from] WasmError),
104    #[error("Failed to run wasi-sdk clang -- {0}")]
105    WasmCompiler(std::io::Error),
106    #[error("wasi-sdk clang command failed: {0}")]
107    WasmCompilation(String),
108}
109
110#[derive(Debug, Error)]
111pub struct CompilerError {
112    pub error: std::io::Error,
113    pub command: Box<Command>,
114}
115
116impl std::fmt::Display for CompilerError {
117    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
118        write!(
119            f,
120            "Failed to execute the C compiler with the following command:\n{:?}\nError: {}",
121            *self.command, self.error
122        )?;
123        Ok(())
124    }
125}
126
127#[derive(Debug, Error)]
128pub struct IoError {
129    pub error: std::io::Error,
130    pub path: Option<String>,
131}
132
133impl IoError {
134    fn new(error: std::io::Error, path: Option<&Path>) -> Self {
135        Self {
136            error,
137            path: path.map(|p| p.to_string_lossy().to_string()),
138        }
139    }
140}
141
142impl std::fmt::Display for IoError {
143    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
144        write!(f, "{}", self.error)?;
145        if let Some(ref path) = self.path {
146            write!(f, " ({path})")?;
147        }
148        Ok(())
149    }
150}
151
152#[derive(Debug, Error)]
153pub struct LibraryError {
154    pub error: libloading::Error,
155    pub path: String,
156}
157
158impl std::fmt::Display for LibraryError {
159    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
160        write!(
161            f,
162            "Error opening dynamic library {} -- {}",
163            self.path, self.error
164        )?;
165        Ok(())
166    }
167}
168
169#[derive(Debug, Error)]
170pub struct LoaderQueryError {
171    pub error: QueryError,
172    pub file: Option<String>,
173}
174
175impl std::fmt::Display for LoaderQueryError {
176    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
177        if let Some(ref path) = self.file {
178            writeln!(f, "Error in query file {path}:")?;
179        }
180        write!(f, "{}", self.error)?;
181        Ok(())
182    }
183}
184
185#[derive(Debug, Error)]
186pub struct SymbolError {
187    pub error: libloading::Error,
188    pub symbol_name: String,
189    pub path: String,
190}
191
192impl std::fmt::Display for SymbolError {
193    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
194        write!(
195            f,
196            "Failed to load symbol {} from {} -- {}",
197            self.symbol_name, self.path, self.error
198        )?;
199        Ok(())
200    }
201}
202
203#[derive(Debug, Error)]
204pub struct WasiSDKClangError {
205    pub wasi_sdk_dir: String,
206    pub possible_executables: Vec<&'static str>,
207    pub download: bool,
208}
209
210impl std::fmt::Display for WasiSDKClangError {
211    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
212        if self.download {
213            write!(
214                f,
215                "Failed to find clang executable in downloaded wasi-sdk at '{}'.",
216                self.wasi_sdk_dir
217            )?;
218        } else {
219            write!(f, "TREE_SITTER_WASI_SDK_PATH is set to '{}', but no clang executable found in 'bin/' directory.", self.wasi_sdk_dir)?;
220        }
221
222        let possible_exes = self.possible_executables.join(", ");
223        write!(f, " Looked for: {possible_exes}.")?;
224
225        Ok(())
226    }
227}
228
229pub const DEFAULT_HIGHLIGHTS_QUERY_FILE_NAME: &str = "highlights.scm";
230
231pub const DEFAULT_INJECTIONS_QUERY_FILE_NAME: &str = "injections.scm";
232
233pub const DEFAULT_LOCALS_QUERY_FILE_NAME: &str = "locals.scm";
234
235pub const DEFAULT_TAGS_QUERY_FILE_NAME: &str = "tags.scm";
236
237#[derive(Default, Deserialize, Serialize)]
238pub struct Config {
239    #[serde(default)]
240    #[serde(
241        rename = "parser-directories",
242        deserialize_with = "deserialize_parser_directories"
243    )]
244    pub parser_directories: Vec<PathBuf>,
245}
246
247#[derive(Serialize, Deserialize, Clone, Default)]
248#[serde(untagged)]
249pub enum PathsJSON {
250    #[default]
251    Empty,
252    Single(PathBuf),
253    Multiple(Vec<PathBuf>),
254}
255
256impl PathsJSON {
257    fn into_vec(self) -> Option<Vec<PathBuf>> {
258        match self {
259            Self::Empty => None,
260            Self::Single(s) => Some(vec![s]),
261            Self::Multiple(s) => Some(s),
262        }
263    }
264
265    const fn is_empty(&self) -> bool {
266        matches!(self, Self::Empty)
267    }
268
269    /// Represent this set of paths as a string that can be included in templates
270    #[must_use]
271    pub fn to_variable_value<'a>(&'a self, default: &'a PathBuf) -> &'a str {
272        match self {
273            Self::Empty => Some(default),
274            Self::Single(path_buf) => Some(path_buf),
275            Self::Multiple(paths) => paths.first(),
276        }
277        .map_or("", |path| path.as_os_str().to_str().unwrap_or(""))
278    }
279}
280
281#[derive(Serialize, Deserialize, Clone)]
282#[serde(untagged)]
283pub enum PackageJSONAuthor {
284    String(String),
285    Object {
286        name: String,
287        email: Option<String>,
288        url: Option<String>,
289    },
290}
291
292#[derive(Serialize, Deserialize, Clone)]
293#[serde(untagged)]
294pub enum PackageJSONRepository {
295    String(String),
296    Object { url: String },
297}
298
299#[derive(Serialize, Deserialize)]
300pub struct PackageJSON {
301    pub name: String,
302    pub version: Version,
303    pub description: Option<String>,
304    pub author: Option<PackageJSONAuthor>,
305    pub maintainers: Option<Vec<PackageJSONAuthor>>,
306    pub license: Option<String>,
307    pub repository: Option<PackageJSONRepository>,
308    #[serde(default)]
309    #[serde(rename = "tree-sitter", skip_serializing_if = "Option::is_none")]
310    pub tree_sitter: Option<Vec<LanguageConfigurationJSON>>,
311}
312
313fn default_path() -> PathBuf {
314    PathBuf::from(".")
315}
316
317#[derive(Serialize, Deserialize, Clone)]
318#[serde(rename_all = "kebab-case")]
319pub struct LanguageConfigurationJSON {
320    #[serde(default = "default_path")]
321    pub path: PathBuf,
322    pub scope: Option<String>,
323    pub file_types: Option<Vec<String>>,
324    pub content_regex: Option<String>,
325    pub first_line_regex: Option<String>,
326    pub injection_regex: Option<String>,
327    #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
328    pub highlights: PathsJSON,
329    #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
330    pub injections: PathsJSON,
331    #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
332    pub locals: PathsJSON,
333    #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
334    pub tags: PathsJSON,
335    #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
336    pub external_files: PathsJSON,
337}
338
339#[derive(Serialize, Deserialize)]
340#[serde(rename_all = "kebab-case")]
341pub struct TreeSitterJSON {
342    #[serde(rename = "$schema")]
343    pub schema: Option<String>,
344    pub grammars: Vec<Grammar>,
345    pub metadata: Metadata,
346    #[serde(default)]
347    pub bindings: Bindings,
348}
349
350impl TreeSitterJSON {
351    pub fn from_file(path: &Path) -> LoaderResult<Self> {
352        let path = path.join("tree-sitter.json");
353        Ok(serde_json::from_str(&fs::read_to_string(&path).map_err(
354            |e| LoaderError::IO(IoError::new(e, Some(path.as_path()))),
355        )?)?)
356    }
357
358    #[must_use]
359    pub fn has_multiple_language_configs(&self) -> bool {
360        self.grammars.len() > 1
361    }
362}
363
364#[derive(Serialize, Deserialize)]
365#[serde(rename_all = "kebab-case")]
366pub struct Grammar {
367    pub name: String,
368    #[serde(skip_serializing_if = "Option::is_none")]
369    pub camelcase: Option<String>,
370    #[serde(skip_serializing_if = "Option::is_none")]
371    pub title: Option<String>,
372    pub scope: String,
373    #[serde(skip_serializing_if = "Option::is_none")]
374    pub path: Option<PathBuf>,
375    #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
376    pub external_files: PathsJSON,
377    pub file_types: Option<Vec<String>>,
378    #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
379    pub highlights: PathsJSON,
380    #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
381    pub injections: PathsJSON,
382    #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
383    pub locals: PathsJSON,
384    #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
385    pub tags: PathsJSON,
386    #[serde(skip_serializing_if = "Option::is_none")]
387    pub injection_regex: Option<String>,
388    #[serde(skip_serializing_if = "Option::is_none")]
389    pub first_line_regex: Option<String>,
390    #[serde(skip_serializing_if = "Option::is_none")]
391    pub content_regex: Option<String>,
392    #[serde(skip_serializing_if = "Option::is_none")]
393    pub class_name: Option<String>,
394}
395
396#[derive(Serialize, Deserialize)]
397pub struct Metadata {
398    pub version: Version,
399    #[serde(skip_serializing_if = "Option::is_none")]
400    pub license: Option<String>,
401    #[serde(skip_serializing_if = "Option::is_none")]
402    pub description: Option<String>,
403    #[serde(skip_serializing_if = "Option::is_none")]
404    pub authors: Option<Vec<Author>>,
405    #[serde(skip_serializing_if = "Option::is_none")]
406    pub links: Option<Links>,
407    #[serde(skip)]
408    pub namespace: Option<String>,
409}
410
411#[derive(Serialize, Deserialize)]
412pub struct Author {
413    pub name: String,
414    #[serde(skip_serializing_if = "Option::is_none")]
415    pub email: Option<String>,
416    #[serde(skip_serializing_if = "Option::is_none")]
417    pub url: Option<String>,
418}
419
420#[derive(Serialize, Deserialize)]
421pub struct Links {
422    pub repository: String,
423    #[serde(skip_serializing_if = "Option::is_none")]
424    pub funding: Option<String>,
425}
426
427#[derive(Serialize, Deserialize, Clone)]
428#[serde(default)]
429pub struct Bindings {
430    pub c: bool,
431    pub go: bool,
432    pub java: bool,
433    #[serde(skip)]
434    pub kotlin: bool,
435    pub node: bool,
436    pub python: bool,
437    pub rust: bool,
438    pub swift: bool,
439    pub zig: bool,
440}
441
442impl Bindings {
443    /// return available languages and its default enabled state.
444    #[must_use]
445    pub const fn languages(&self) -> [(&'static str, bool); 8] {
446        [
447            ("c", true),
448            ("go", true),
449            ("java", false),
450            // Comment out Kotlin until the bindings are actually available.
451            // ("kotlin", false),
452            ("node", true),
453            ("python", true),
454            ("rust", true),
455            ("swift", true),
456            ("zig", false),
457        ]
458    }
459
460    /// construct Bindings from a language list. If a language isn't supported, its name will be put on the error part.
461    pub fn with_enabled_languages<'a, I>(languages: I) -> Result<Self, &'a str>
462    where
463        I: Iterator<Item = &'a str>,
464    {
465        let mut out = Self {
466            c: false,
467            go: false,
468            java: false,
469            kotlin: false,
470            node: false,
471            python: false,
472            rust: false,
473            swift: false,
474            zig: false,
475        };
476
477        for v in languages {
478            match v {
479                "c" => out.c = true,
480                "go" => out.go = true,
481                "java" => out.java = true,
482                // Comment out Kotlin until the bindings are actually available.
483                // "kotlin" => out.kotlin = true,
484                "node" => out.node = true,
485                "python" => out.python = true,
486                "rust" => out.rust = true,
487                "swift" => out.swift = true,
488                "zig" => out.zig = true,
489                unsupported => return Err(unsupported),
490            }
491        }
492
493        Ok(out)
494    }
495}
496
497impl Default for Bindings {
498    fn default() -> Self {
499        Self {
500            c: true,
501            go: true,
502            java: false,
503            kotlin: false,
504            node: true,
505            python: true,
506            rust: true,
507            swift: true,
508            zig: false,
509        }
510    }
511}
512
513// Replace `~` or `$HOME` with home path string.
514// (While paths like "~/.tree-sitter/config.json" can be deserialized,
515// they're not valid path for I/O modules.)
516fn deserialize_parser_directories<'de, D>(deserializer: D) -> Result<Vec<PathBuf>, D::Error>
517where
518    D: Deserializer<'de>,
519{
520    let paths = Vec::<PathBuf>::deserialize(deserializer)?;
521    let Ok(home) = etcetera::home_dir() else {
522        return Ok(paths);
523    };
524    let standardized = paths
525        .into_iter()
526        .map(|path| standardize_path(path, &home))
527        .collect();
528    Ok(standardized)
529}
530
531fn standardize_path(path: PathBuf, home: &Path) -> PathBuf {
532    if let Ok(p) = path.strip_prefix("~") {
533        return home.join(p);
534    }
535    if let Ok(p) = path.strip_prefix("$HOME") {
536        return home.join(p);
537    }
538    path
539}
540
541impl Config {
542    #[must_use]
543    pub fn initial() -> Self {
544        let home_dir = etcetera::home_dir().expect("Cannot determine home directory");
545        Self {
546            parser_directories: vec![
547                home_dir.join("github"),
548                home_dir.join("src"),
549                home_dir.join("source"),
550                home_dir.join("projects"),
551                home_dir.join("dev"),
552                home_dir.join("git"),
553            ],
554        }
555    }
556}
557
558const BUILD_TARGET: &str = env!("BUILD_TARGET");
559
560pub struct LanguageConfiguration<'a> {
561    pub scope: Option<String>,
562    pub content_regex: Option<Regex>,
563    pub first_line_regex: Option<Regex>,
564    pub injection_regex: Option<Regex>,
565    pub file_types: Vec<String>,
566    pub root_path: PathBuf,
567    pub highlights_filenames: Option<Vec<PathBuf>>,
568    pub injections_filenames: Option<Vec<PathBuf>>,
569    pub locals_filenames: Option<Vec<PathBuf>>,
570    pub tags_filenames: Option<Vec<PathBuf>>,
571    pub language_name: String,
572    language_id: usize,
573    #[cfg(feature = "tree-sitter-highlight")]
574    highlight_config: OnceCell<Option<HighlightConfiguration>>,
575    #[cfg(feature = "tree-sitter-tags")]
576    tags_config: OnceCell<Option<TagsConfiguration>>,
577    #[cfg(feature = "tree-sitter-highlight")]
578    highlight_names: &'a Mutex<Vec<String>>,
579    #[cfg(feature = "tree-sitter-highlight")]
580    use_all_highlight_names: bool,
581    _phantom: PhantomData<&'a ()>,
582}
583
584pub struct Loader {
585    pub parser_lib_path: PathBuf,
586    languages_by_id: Vec<(PathBuf, OnceCell<Language>, Option<Vec<PathBuf>>)>,
587    language_configurations: Vec<LanguageConfiguration<'static>>,
588    language_configuration_ids_by_file_type: HashMap<String, Vec<usize>>,
589    language_configuration_in_current_path: Option<usize>,
590    language_configuration_ids_by_first_line_regex: HashMap<String, Vec<usize>>,
591    #[cfg(feature = "tree-sitter-highlight")]
592    highlight_names: Box<Mutex<Vec<String>>>,
593    #[cfg(feature = "tree-sitter-highlight")]
594    use_all_highlight_names: bool,
595    debug_build: bool,
596    sanitize_build: bool,
597    force_rebuild: bool,
598
599    #[cfg(feature = "wasm")]
600    wasm_store: Mutex<Option<tree_sitter::WasmStore>>,
601}
602
603pub struct CompileConfig<'a> {
604    pub src_path: &'a Path,
605    pub header_paths: Vec<&'a Path>,
606    pub parser_path: PathBuf,
607    pub scanner_path: Option<PathBuf>,
608    pub external_files: Option<&'a [PathBuf]>,
609    pub output_path: Option<PathBuf>,
610    pub flags: &'a [&'a str],
611    pub sanitize: bool,
612    pub name: String,
613}
614
615impl<'a> CompileConfig<'a> {
616    #[must_use]
617    pub fn new(
618        src_path: &'a Path,
619        externals: Option<&'a [PathBuf]>,
620        output_path: Option<PathBuf>,
621    ) -> Self {
622        Self {
623            src_path,
624            header_paths: vec![src_path],
625            parser_path: src_path.join("parser.c"),
626            scanner_path: None,
627            external_files: externals,
628            output_path,
629            flags: &[],
630            sanitize: false,
631            name: String::new(),
632        }
633    }
634}
635
636unsafe impl Sync for Loader {}
637
638impl Loader {
639    pub fn new() -> LoaderResult<Self> {
640        let parser_lib_path = if let Ok(path) = env::var("TREE_SITTER_LIBDIR") {
641            PathBuf::from(path)
642        } else {
643            if cfg!(target_os = "macos") {
644                let legacy_apple_path = etcetera::base_strategy::Apple::new()?
645                    .cache_dir() // `$HOME/Library/Caches/`
646                    .join("tree-sitter");
647                if legacy_apple_path.exists() && legacy_apple_path.is_dir() {
648                    std::fs::remove_dir_all(&legacy_apple_path).map_err(|e| {
649                        LoaderError::IO(IoError::new(e, Some(legacy_apple_path.as_path())))
650                    })?;
651                }
652            }
653
654            etcetera::choose_base_strategy()?
655                .cache_dir()
656                .join("tree-sitter")
657                .join("lib")
658        };
659        Ok(Self::with_parser_lib_path(parser_lib_path))
660    }
661
662    #[must_use]
663    pub fn with_parser_lib_path(parser_lib_path: PathBuf) -> Self {
664        Self {
665            parser_lib_path,
666            languages_by_id: Vec::new(),
667            language_configurations: Vec::new(),
668            language_configuration_ids_by_file_type: HashMap::new(),
669            language_configuration_in_current_path: None,
670            language_configuration_ids_by_first_line_regex: HashMap::new(),
671            #[cfg(feature = "tree-sitter-highlight")]
672            highlight_names: Box::new(Mutex::new(Vec::new())),
673            #[cfg(feature = "tree-sitter-highlight")]
674            use_all_highlight_names: true,
675            debug_build: false,
676            sanitize_build: false,
677            force_rebuild: false,
678
679            #[cfg(feature = "wasm")]
680            wasm_store: Mutex::default(),
681        }
682    }
683
684    #[cfg(feature = "tree-sitter-highlight")]
685    #[cfg_attr(docsrs, doc(cfg(feature = "tree-sitter-highlight")))]
686    pub fn configure_highlights(&mut self, names: &[String]) {
687        self.use_all_highlight_names = false;
688        let mut highlights = self.highlight_names.lock().unwrap();
689        highlights.clear();
690        highlights.extend(names.iter().cloned());
691    }
692
693    #[must_use]
694    #[cfg(feature = "tree-sitter-highlight")]
695    #[cfg_attr(docsrs, doc(cfg(feature = "tree-sitter-highlight")))]
696    pub fn highlight_names(&self) -> Vec<String> {
697        self.highlight_names.lock().unwrap().clone()
698    }
699
700    pub fn find_all_languages(&mut self, config: &Config) -> LoaderResult<()> {
701        if config.parser_directories.is_empty() {
702            warn!(concat!(
703                "You have not configured any parser directories!\n",
704                "Please run `tree-sitter init-config` and edit the resulting\n",
705                "configuration file to indicate where we should look for\n",
706                "language grammars.\n"
707            ));
708        }
709        for parser_container_dir in &config.parser_directories {
710            if let Ok(entries) = fs::read_dir(parser_container_dir) {
711                for entry in entries {
712                    let entry = entry.map_err(|e| LoaderError::IO(IoError::new(e, None)))?;
713                    if let Some(parser_dir_name) = entry.file_name().to_str() {
714                        if parser_dir_name.starts_with("tree-sitter-") {
715                            self.find_language_configurations_at_path(
716                                &parser_container_dir.join(parser_dir_name),
717                                false,
718                            )
719                            .ok();
720                        }
721                    }
722                }
723            }
724        }
725        Ok(())
726    }
727
728    pub fn languages_at_path(&mut self, path: &Path) -> LoaderResult<Vec<(Language, String)>> {
729        if let Ok(configurations) = self.find_language_configurations_at_path(path, true) {
730            let mut language_ids = configurations
731                .iter()
732                .map(|c| (c.language_id, c.language_name.clone()))
733                .collect::<Vec<_>>();
734            language_ids.sort_unstable();
735            language_ids.dedup();
736            language_ids
737                .into_iter()
738                .map(|(id, name)| Ok((self.language_for_id(id)?, name)))
739                .collect::<LoaderResult<Vec<_>>>()
740        } else {
741            Ok(Vec::new())
742        }
743    }
744
745    #[must_use]
746    pub fn get_all_language_configurations(&self) -> Vec<(&LanguageConfiguration, &Path)> {
747        self.language_configurations
748            .iter()
749            .map(|c| (c, self.languages_by_id[c.language_id].0.as_ref()))
750            .collect()
751    }
752
753    pub fn language_configuration_for_scope(
754        &self,
755        scope: &str,
756    ) -> LoaderResult<Option<(Language, &LanguageConfiguration)>> {
757        for configuration in &self.language_configurations {
758            if configuration.scope.as_ref().is_some_and(|s| s == scope) {
759                let language = self.language_for_id(configuration.language_id)?;
760                return Ok(Some((language, configuration)));
761            }
762        }
763        Ok(None)
764    }
765
766    pub fn language_configuration_for_first_line_regex(
767        &self,
768        path: &Path,
769    ) -> LoaderResult<Option<(Language, &LanguageConfiguration)>> {
770        self.language_configuration_ids_by_first_line_regex
771            .iter()
772            .try_fold(None, |_, (regex, ids)| {
773                if let Some(regex) = Self::regex(Some(regex)) {
774                    let file = fs::File::open(path)
775                        .map_err(|e| LoaderError::IO(IoError::new(e, Some(path))))?;
776                    let reader = BufReader::new(file);
777                    let first_line = reader
778                        .lines()
779                        .next()
780                        .transpose()
781                        .map_err(|e| LoaderError::IO(IoError::new(e, Some(path))))?;
782                    if let Some(first_line) = first_line {
783                        if regex.is_match(&first_line) && !ids.is_empty() {
784                            let configuration = &self.language_configurations[ids[0]];
785                            let language = self.language_for_id(configuration.language_id)?;
786                            return Ok(Some((language, configuration)));
787                        }
788                    }
789                }
790
791                Ok(None)
792            })
793    }
794
795    pub fn language_configuration_for_file_name(
796        &self,
797        path: &Path,
798    ) -> LoaderResult<Option<(Language, &LanguageConfiguration)>> {
799        // Find all the language configurations that match this file name
800        // or a suffix of the file name.
801        let configuration_ids = path
802            .file_name()
803            .and_then(|n| n.to_str())
804            .and_then(|file_name| self.language_configuration_ids_by_file_type.get(file_name))
805            .or_else(|| {
806                let mut path = path.to_owned();
807                let mut extensions = Vec::with_capacity(2);
808                while let Some(extension) = path.extension() {
809                    extensions.push(extension.to_str()?.to_string());
810                    path = PathBuf::from(path.file_stem()?.to_os_string());
811                }
812                extensions.reverse();
813                // Try longest extension suffixs first (e.g. "foo.bar.baz"->"bar.baz"->"baz"),
814                // stopping at the first match.
815                (0..extensions.len())
816                    .map(|i| extensions[i..].join("."))
817                    .find_map(|key| self.language_configuration_ids_by_file_type.get(&key))
818            });
819
820        if let Some(configuration_ids) = configuration_ids {
821            if !configuration_ids.is_empty() {
822                let configuration = if configuration_ids.len() == 1 {
823                    &self.language_configurations[configuration_ids[0]]
824                }
825                // If multiple language configurations match, then determine which
826                // one to use by applying the configurations' content regexes.
827                else {
828                    let file_contents =
829                        fs::read(path).map_err(|e| LoaderError::IO(IoError::new(e, Some(path))))?;
830                    let file_contents = String::from_utf8_lossy(&file_contents);
831                    let mut best_score = -2isize;
832                    let mut best_configuration_id = None;
833                    for configuration_id in configuration_ids {
834                        let config = &self.language_configurations[*configuration_id];
835
836                        // If the language configuration has a content regex, assign
837                        // a score based on the length of the first match.
838                        let score;
839                        if let Some(content_regex) = &config.content_regex {
840                            if let Some(mat) = content_regex.find(&file_contents) {
841                                score = (mat.end() - mat.start()) as isize;
842                            }
843                            // If the content regex does not match, then *penalize* this
844                            // language configuration, so that language configurations
845                            // without content regexes are preferred over those with
846                            // non-matching content regexes.
847                            else {
848                                score = -1;
849                            }
850                        } else {
851                            score = 0;
852                        }
853                        if score > best_score {
854                            best_configuration_id = Some(*configuration_id);
855                            best_score = score;
856                        }
857                    }
858
859                    &self.language_configurations[best_configuration_id.unwrap()]
860                };
861
862                let language = self.language_for_id(configuration.language_id)?;
863                return Ok(Some((language, configuration)));
864            }
865        }
866
867        Ok(None)
868    }
869
870    pub fn language_configuration_for_injection_string(
871        &self,
872        string: &str,
873    ) -> LoaderResult<Option<(Language, &LanguageConfiguration)>> {
874        let mut best_match_length = 0;
875        let mut best_match_position = None;
876        for (i, configuration) in self.language_configurations.iter().enumerate() {
877            if let Some(injection_regex) = &configuration.injection_regex {
878                if let Some(mat) = injection_regex.find(string) {
879                    let length = mat.end() - mat.start();
880                    if length > best_match_length {
881                        best_match_position = Some(i);
882                        best_match_length = length;
883                    }
884                }
885            }
886        }
887
888        if let Some(i) = best_match_position {
889            let configuration = &self.language_configurations[i];
890            let language = self.language_for_id(configuration.language_id)?;
891            Ok(Some((language, configuration)))
892        } else {
893            Ok(None)
894        }
895    }
896
897    pub fn language_for_configuration(
898        &self,
899        configuration: &LanguageConfiguration,
900    ) -> LoaderResult<Language> {
901        self.language_for_id(configuration.language_id)
902    }
903
904    fn language_for_id(&self, id: usize) -> LoaderResult<Language> {
905        let (path, language, externals) = &self.languages_by_id[id];
906        language
907            .get_or_try_init(|| {
908                let src_path = path.join("src");
909                self.load_language_at_path(CompileConfig::new(
910                    &src_path,
911                    externals.as_deref(),
912                    None,
913                ))
914            })
915            .cloned()
916    }
917
918    pub fn compile_parser_at_path(
919        &self,
920        grammar_path: &Path,
921        output_path: PathBuf,
922        flags: &[&str],
923    ) -> LoaderResult<()> {
924        let src_path = grammar_path.join("src");
925        let mut config = CompileConfig::new(&src_path, None, Some(output_path));
926        config.flags = flags;
927        self.load_language_at_path(config).map(|_| ())
928    }
929
930    pub fn load_language_at_path(&self, mut config: CompileConfig) -> LoaderResult<Language> {
931        let grammar_path = config.src_path.join("grammar.json");
932        config.name = Self::grammar_json_name(&grammar_path)?;
933        self.load_language_at_path_with_name(config)
934    }
935
936    pub fn load_language_at_path_with_name(
937        &self,
938        mut config: CompileConfig,
939    ) -> LoaderResult<Language> {
940        let mut lib_name = config.name.clone();
941        let language_fn_name = format!("tree_sitter_{}", config.name.replace('-', "_"));
942        if self.debug_build {
943            lib_name.push_str(".debug._");
944        }
945
946        if self.sanitize_build {
947            lib_name.push_str(".sanitize._");
948            config.sanitize = true;
949        }
950
951        if config.output_path.is_none() {
952            fs::create_dir_all(&self.parser_lib_path).map_err(|e| {
953                LoaderError::IO(IoError::new(e, Some(self.parser_lib_path.as_path())))
954            })?;
955        }
956
957        let mut recompile = self.force_rebuild || config.output_path.is_some(); // if specified, always recompile
958
959        let output_path = config.output_path.unwrap_or_else(|| {
960            let mut path = self.parser_lib_path.join(lib_name);
961            path.set_extension(env::consts::DLL_EXTENSION);
962            #[cfg(feature = "wasm")]
963            if self.wasm_store.lock().unwrap().is_some() {
964                path.set_extension("wasm");
965            }
966            path
967        });
968        config.output_path = Some(output_path.clone());
969
970        let parser_path = config.src_path.join("parser.c");
971        config.scanner_path = self.get_scanner_path(config.src_path);
972
973        let mut paths_to_check = vec![parser_path];
974
975        if let Some(scanner_path) = config.scanner_path.as_ref() {
976            paths_to_check.push(scanner_path.clone());
977        }
978
979        paths_to_check.extend(
980            config
981                .external_files
982                .unwrap_or_default()
983                .iter()
984                .map(|p| config.src_path.join(p)),
985        );
986
987        if !recompile {
988            recompile = needs_recompile(&output_path, &paths_to_check)?;
989        }
990
991        #[cfg(feature = "wasm")]
992        if let Some(wasm_store) = self.wasm_store.lock().unwrap().as_mut() {
993            if recompile {
994                self.compile_parser_to_wasm(
995                    &config.name,
996                    config.src_path,
997                    config
998                        .scanner_path
999                        .as_ref()
1000                        .and_then(|p| p.strip_prefix(config.src_path).ok()),
1001                    &output_path,
1002                )?;
1003            }
1004
1005            let wasm_bytes = fs::read(&output_path)
1006                .map_err(|e| LoaderError::IO(IoError::new(e, Some(output_path.as_path()))))?;
1007            return Ok(wasm_store.load_language(&config.name, &wasm_bytes)?);
1008        }
1009
1010        // Create a unique lock path based on the output path hash to prevent
1011        // interference when multiple processes build the same grammar (by name)
1012        // to different output locations
1013        let lock_hash = {
1014            let mut hasher = std::hash::DefaultHasher::new();
1015            output_path.hash(&mut hasher);
1016            format!("{:x}", hasher.finish())
1017        };
1018
1019        let lock_path = if env::var("CROSS_RUNNER").is_ok() {
1020            tempfile::tempdir()
1021                .expect("create a temp dir")
1022                .path()
1023                .to_path_buf()
1024        } else {
1025            etcetera::choose_base_strategy()?.cache_dir()
1026        }
1027        .join("tree-sitter")
1028        .join("lock")
1029        .join(format!("{}-{lock_hash}.lock", config.name));
1030
1031        if let Ok(lock_file) = fs::OpenOptions::new().write(true).open(&lock_path) {
1032            recompile = false;
1033            if lock_file.try_lock_exclusive().is_err() {
1034                // if we can't acquire the lock, another process is compiling the parser, wait for
1035                // it and don't recompile
1036                lock_file
1037                    .lock_exclusive()
1038                    .map_err(|e| LoaderError::IO(IoError::new(e, Some(lock_path.as_path()))))?;
1039                recompile = false;
1040            } else {
1041                // if we can acquire the lock, check if the lock file is older than 30 seconds, a
1042                // run that was interrupted and left the lock file behind should not block
1043                // subsequent runs
1044                let time = lock_file
1045                    .metadata()
1046                    .map_err(|e| LoaderError::IO(IoError::new(e, Some(lock_path.as_path()))))?
1047                    .modified()
1048                    .map_err(|e| LoaderError::IO(IoError::new(e, Some(lock_path.as_path()))))?
1049                    .elapsed()?
1050                    .as_secs();
1051                if time > 30 {
1052                    fs::remove_file(&lock_path)
1053                        .map_err(|e| LoaderError::IO(IoError::new(e, Some(lock_path.as_path()))))?;
1054                    recompile = true;
1055                }
1056            }
1057        }
1058
1059        if recompile {
1060            let parent_path = lock_path.parent().unwrap();
1061            fs::create_dir_all(parent_path)
1062                .map_err(|e| LoaderError::IO(IoError::new(e, Some(parent_path))))?;
1063            let lock_file = fs::OpenOptions::new()
1064                .create(true)
1065                .truncate(true)
1066                .write(true)
1067                .open(&lock_path)
1068                .map_err(|e| LoaderError::IO(IoError::new(e, Some(lock_path.as_path()))))?;
1069            lock_file
1070                .lock_exclusive()
1071                .map_err(|e| LoaderError::IO(IoError::new(e, Some(lock_path.as_path()))))?;
1072
1073            self.compile_parser_to_dylib(&config, &lock_file, &lock_path)?;
1074
1075            if config.scanner_path.is_some() {
1076                self.check_external_scanner(&output_path)?;
1077            }
1078        }
1079
1080        // Ensure the dynamic library exists before trying to load it. This can
1081        // happen in race conditions where we couldn't acquire the lock because
1082        // another process was compiling but it still hasn't finished by the
1083        // time we reach this point, so the output file still doesn't exist.
1084        //
1085        // Instead of allowing the `load_language` call below to fail, return a
1086        // clearer error to the user here.
1087        if !output_path.exists() {
1088            let msg = format!(
1089                "Dynamic library `{}` not found after build attempt. \
1090                Are you running multiple processes building to the same output location?",
1091                output_path.display()
1092            );
1093
1094            Err(LoaderError::IO(IoError::new(
1095                std::io::Error::new(std::io::ErrorKind::NotFound, msg),
1096                Some(output_path.as_path()),
1097            )))?;
1098        }
1099
1100        Self::load_language(&output_path, &language_fn_name)
1101    }
1102
1103    pub fn load_language(path: &Path, function_name: &str) -> LoaderResult<Language> {
1104        let library = unsafe { Library::new(path) }.map_err(|e| {
1105            LoaderError::Library(LibraryError {
1106                error: e,
1107                path: path.to_string_lossy().to_string(),
1108            })
1109        })?;
1110        let language = unsafe {
1111            let language_fn = library
1112                .get::<Symbol<unsafe extern "C" fn() -> Language>>(function_name.as_bytes())
1113                .map_err(|e| {
1114                    LoaderError::Symbol(SymbolError {
1115                        error: e,
1116                        symbol_name: function_name.to_string(),
1117                        path: path.to_string_lossy().to_string(),
1118                    })
1119                })?;
1120            language_fn()
1121        };
1122        mem::forget(library);
1123        Ok(language)
1124    }
1125
1126    fn compile_parser_to_dylib(
1127        &self,
1128        config: &CompileConfig,
1129        lock_file: &fs::File,
1130        lock_path: &Path,
1131    ) -> LoaderResult<()> {
1132        let mut cc_config = cc::Build::new();
1133        cc_config
1134            .cargo_metadata(false)
1135            .cargo_warnings(false)
1136            .target(BUILD_TARGET)
1137            // BUILD_TARGET from the build environment becomes a runtime host for cc.
1138            // Otherwise, when cross compiled, cc will keep looking for a cross-compiler
1139            // on the target system instead of the native compiler.
1140            .host(BUILD_TARGET)
1141            .debug(self.debug_build)
1142            .file(&config.parser_path)
1143            .includes(&config.header_paths)
1144            .std("c11");
1145
1146        if let Some(scanner_path) = config.scanner_path.as_ref() {
1147            cc_config.file(scanner_path);
1148        }
1149
1150        if self.debug_build {
1151            cc_config.opt_level(0).extra_warnings(true);
1152        } else {
1153            cc_config.opt_level(2).extra_warnings(false);
1154        }
1155
1156        for flag in config.flags {
1157            cc_config.define(flag, None);
1158        }
1159
1160        let compiler = cc_config.get_compiler();
1161        let mut command = Command::new(compiler.path());
1162        command.args(compiler.args());
1163        for (key, value) in compiler.env() {
1164            command.env(key, value);
1165        }
1166
1167        let output_path = config.output_path.as_ref().unwrap();
1168
1169        let temp_dir = if compiler.is_like_msvc() {
1170            let out = format!("-out:{}", output_path.to_str().unwrap());
1171            command.arg(if self.debug_build { "-LDd" } else { "-LD" });
1172            command.arg("-utf-8");
1173
1174            // Windows creates intermediate files when compiling (.exp, .lib, .obj), which causes
1175            // issues when multiple processes are compiling in the same directory. This creates a
1176            // temporary directory for those files to go into, which is deleted after compilation.
1177            let temp_dir = output_path.parent().unwrap().join(format!(
1178                "tmp_{}_{:?}",
1179                std::process::id(),
1180                std::thread::current().id()
1181            ));
1182            std::fs::create_dir_all(&temp_dir).unwrap();
1183
1184            command.arg(format!("/Fo{}\\", temp_dir.display()));
1185            command.args(cc_config.get_files());
1186            command.arg("-link").arg(out);
1187            command.arg(format!("/IMPLIB:{}.lib", temp_dir.join("temp").display()));
1188
1189            Some(temp_dir)
1190        } else {
1191            command.arg("-Werror=implicit-function-declaration");
1192            if cfg!(any(target_os = "macos", target_os = "ios")) {
1193                command.arg("-dynamiclib");
1194                // TODO: remove when supported
1195                command.arg("-UTREE_SITTER_REUSE_ALLOCATOR");
1196            } else {
1197                command.arg("-shared");
1198                command.arg("-Wl,--no-undefined");
1199                #[cfg(target_os = "openbsd")]
1200                command.arg("-lc");
1201            }
1202            command.args(cc_config.get_files());
1203            command.arg("-o").arg(output_path);
1204
1205            None
1206        };
1207
1208        let output = command.output().map_err(|e| {
1209            LoaderError::Compiler(CompilerError {
1210                error: e,
1211                command: Box::new(command),
1212            })
1213        })?;
1214
1215        if let Some(temp_dir) = temp_dir {
1216            let _ = fs::remove_dir_all(temp_dir);
1217        }
1218
1219        FileExt::unlock(lock_file)
1220            .map_err(|e| LoaderError::IO(IoError::new(e, Some(lock_path))))?;
1221        fs::remove_file(lock_path)
1222            .map_err(|e| LoaderError::IO(IoError::new(e, Some(lock_path))))?;
1223
1224        if output.status.success() {
1225            Ok(())
1226        } else {
1227            Err(LoaderError::Compilation(
1228                String::from_utf8_lossy(&output.stdout).to_string(),
1229                String::from_utf8_lossy(&output.stderr).to_string(),
1230            ))
1231        }
1232    }
1233
1234    #[cfg(unix)]
1235    fn check_external_scanner(&self, library_path: &Path) -> LoaderResult<()> {
1236        let section = " T ";
1237        // Older ppc toolchains incorrectly report functions in the Data section. This bug has been
1238        // fixed, but we still need to account for older systems.
1239        let old_ppc_section = if cfg!(all(target_arch = "powerpc64", target_os = "linux")) {
1240            Some(" D ")
1241        } else {
1242            None
1243        };
1244        let nm_cmd = env::var("NM").unwrap_or_else(|_| "nm".to_owned());
1245        let command = Command::new(nm_cmd)
1246            .arg("--defined-only")
1247            .arg(library_path)
1248            .output();
1249        if let Ok(output) = command {
1250            if output.status.success() {
1251                let mut non_static_symbols = String::new();
1252                for line in String::from_utf8_lossy(&output.stdout).lines() {
1253                    if line.contains(section) || old_ppc_section.is_some_and(|s| line.contains(s)) {
1254                        if let Some(function_name) =
1255                            line.split_whitespace().collect::<Vec<_>>().get(2)
1256                        {
1257                            if !line.contains("tree_sitter_") {
1258                                writeln!(&mut non_static_symbols, "  `{function_name}`").unwrap();
1259                            }
1260                        }
1261                    }
1262                }
1263                if !non_static_symbols.is_empty() {
1264                    warn!(
1265                        "Found non-static non-tree-sitter functions in the external scanner\n{non_static_symbols}\n{}",
1266                        concat!(
1267                            "Consider making these functions static, they can cause conflicts ",
1268                            "when another tree-sitter project uses the same function name."
1269                        )
1270                    );
1271                }
1272            }
1273        } else {
1274            warn!(
1275                "Failed to run `nm` to verify symbols in {}",
1276                library_path.display()
1277            );
1278        }
1279
1280        Ok(())
1281    }
1282
1283    #[cfg(windows)]
1284    fn check_external_scanner(&self, _library_path: &Path) -> LoaderResult<()> {
1285        // TODO: there's no nm command on windows, whoever wants to implement this can and should :)
1286        Ok(())
1287    }
1288
1289    pub fn compile_parser_to_wasm(
1290        &self,
1291        language_name: &str,
1292        src_path: &Path,
1293        scanner_filename: Option<&Path>,
1294        output_path: &Path,
1295    ) -> LoaderResult<()> {
1296        let clang_executable = self.ensure_wasi_sdk_exists()?;
1297
1298        let mut command = Command::new(&clang_executable);
1299        command.current_dir(src_path).args([
1300            "--target=wasm32-unknown-wasi",
1301            "-o",
1302            output_path.to_str().unwrap(),
1303            "-fPIC",
1304            "-shared",
1305            if self.debug_build { "-g" } else { "-Os" },
1306            format!("-Wl,--export=tree_sitter_{language_name}").as_str(),
1307            "-Wl,--allow-undefined",
1308            "-Wl,--no-entry",
1309            "-nostdlib",
1310            "-fno-exceptions",
1311            "-fvisibility=hidden",
1312            "-I",
1313            ".",
1314            "parser.c",
1315        ]);
1316
1317        if let Some(scanner_filename) = scanner_filename {
1318            command.arg(scanner_filename);
1319        }
1320
1321        let output = command.output().map_err(LoaderError::WasmCompiler)?;
1322
1323        if !output.status.success() {
1324            return Err(LoaderError::WasmCompilation(
1325                String::from_utf8_lossy(&output.stderr).to_string(),
1326            ));
1327        }
1328
1329        Ok(())
1330    }
1331
1332    /// Extracts a tar.gz archive with `tar`, stripping the first path component.
1333    fn extract_tar_gz_with_strip(
1334        &self,
1335        archive_path: &Path,
1336        destination: &Path,
1337    ) -> LoaderResult<()> {
1338        let status = Command::new("tar")
1339            .arg("-xzf")
1340            .arg(archive_path)
1341            .arg("--strip-components=1")
1342            .arg("-C")
1343            .arg(destination)
1344            .status()
1345            .map_err(|e| LoaderError::Tar(archive_path.to_string_lossy().to_string(), e))?;
1346
1347        if !status.success() {
1348            return Err(LoaderError::Extraction(
1349                archive_path.to_string_lossy().to_string(),
1350                destination.to_string_lossy().to_string(),
1351            ));
1352        }
1353
1354        Ok(())
1355    }
1356
1357    /// This ensures that the wasi-sdk is available, downloading and extracting it if necessary,
1358    /// and returns the path to the `clang` executable.
1359    ///
1360    /// If `TREE_SITTER_WASI_SDK_PATH` is set, it will use that path to look for the clang executable.
1361    fn ensure_wasi_sdk_exists(&self) -> LoaderResult<PathBuf> {
1362        let possible_executables = if cfg!(windows) {
1363            vec![
1364                "clang.exe",
1365                "wasm32-unknown-wasi-clang.exe",
1366                "wasm32-wasi-clang.exe",
1367            ]
1368        } else {
1369            vec!["clang", "wasm32-unknown-wasi-clang", "wasm32-wasi-clang"]
1370        };
1371
1372        if let Ok(wasi_sdk_path) = std::env::var("TREE_SITTER_WASI_SDK_PATH") {
1373            let wasi_sdk_dir = PathBuf::from(wasi_sdk_path);
1374
1375            for exe in &possible_executables {
1376                let clang_exe = wasi_sdk_dir.join("bin").join(exe);
1377                if clang_exe.exists() {
1378                    return Ok(clang_exe);
1379                }
1380            }
1381
1382            return Err(LoaderError::WasiSDKClang(WasiSDKClangError {
1383                wasi_sdk_dir: wasi_sdk_dir.to_string_lossy().to_string(),
1384                possible_executables,
1385                download: false,
1386            }));
1387        }
1388
1389        let cache_dir = etcetera::choose_base_strategy()?
1390            .cache_dir()
1391            .join("tree-sitter");
1392        fs::create_dir_all(&cache_dir)
1393            .map_err(|e| LoaderError::IO(IoError::new(e, Some(cache_dir.as_path()))))?;
1394
1395        let wasi_sdk_dir = cache_dir.join("wasi-sdk");
1396
1397        for exe in &possible_executables {
1398            let clang_exe = wasi_sdk_dir.join("bin").join(exe);
1399            if clang_exe.exists() {
1400                return Ok(clang_exe);
1401            }
1402        }
1403
1404        fs::create_dir_all(&wasi_sdk_dir)
1405            .map_err(|e| LoaderError::IO(IoError::new(e, Some(wasi_sdk_dir.as_path()))))?;
1406
1407        let arch_os = if cfg!(target_os = "macos") {
1408            if cfg!(target_arch = "aarch64") {
1409                "arm64-macos"
1410            } else {
1411                "x86_64-macos"
1412            }
1413        } else if cfg!(target_os = "windows") {
1414            if cfg!(target_arch = "aarch64") {
1415                "arm64-windows"
1416            } else {
1417                "x86_64-windows"
1418            }
1419        } else if cfg!(target_os = "linux") {
1420            if cfg!(target_arch = "aarch64") {
1421                "arm64-linux"
1422            } else {
1423                "x86_64-linux"
1424            }
1425        } else {
1426            return Err(LoaderError::WasiSDKPlatform);
1427        };
1428
1429        let sdk_filename = format!("wasi-sdk-{WASI_SDK_VERSION}-{arch_os}.tar.gz");
1430        let wasi_sdk_major_version = WASI_SDK_VERSION
1431            .trim_end_matches(char::is_numeric) // trim minor version...
1432            .trim_end_matches('.'); // ...and '.' separator
1433        let sdk_url = format!(
1434            "https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-{wasi_sdk_major_version}/{sdk_filename}",
1435        );
1436
1437        info!("Downloading wasi-sdk from {sdk_url}...");
1438        let temp_tar_path = cache_dir.join(sdk_filename);
1439
1440        let status = Command::new("curl")
1441            .arg("-f")
1442            .arg("-L")
1443            .arg("-o")
1444            .arg(&temp_tar_path)
1445            .arg(&sdk_url)
1446            .status()
1447            .map_err(|e| LoaderError::Curl(sdk_url.clone(), e))?;
1448
1449        if !status.success() {
1450            return Err(LoaderError::WasiSDKDownload(sdk_url));
1451        }
1452
1453        info!("Extracting wasi-sdk to {}...", wasi_sdk_dir.display());
1454        self.extract_tar_gz_with_strip(&temp_tar_path, &wasi_sdk_dir)?;
1455
1456        fs::remove_file(temp_tar_path).ok();
1457        for exe in &possible_executables {
1458            let clang_exe = wasi_sdk_dir.join("bin").join(exe);
1459            if clang_exe.exists() {
1460                return Ok(clang_exe);
1461            }
1462        }
1463
1464        Err(LoaderError::WasiSDKClang(WasiSDKClangError {
1465            wasi_sdk_dir: wasi_sdk_dir.to_string_lossy().to_string(),
1466            possible_executables,
1467            download: true,
1468        }))
1469    }
1470
1471    #[must_use]
1472    #[cfg(feature = "tree-sitter-highlight")]
1473    pub fn highlight_config_for_injection_string<'a>(
1474        &'a self,
1475        string: &str,
1476    ) -> Option<&'a HighlightConfiguration> {
1477        match self.language_configuration_for_injection_string(string) {
1478            Err(e) => {
1479                error!("Failed to load language for injection string '{string}': {e}");
1480                None
1481            }
1482            Ok(None) => None,
1483            Ok(Some((language, configuration))) => {
1484                match configuration.highlight_config(language, None) {
1485                    Err(e) => {
1486                        error!(
1487                            "Failed to load higlight config for injection string '{string}': {e}"
1488                        );
1489                        None
1490                    }
1491                    Ok(None) => None,
1492                    Ok(Some(config)) => Some(config),
1493                }
1494            }
1495        }
1496    }
1497
1498    #[must_use]
1499    pub fn get_language_configuration_in_current_path(&self) -> Option<&LanguageConfiguration> {
1500        self.language_configuration_in_current_path
1501            .map(|i| &self.language_configurations[i])
1502    }
1503
1504    pub fn find_language_configurations_at_path(
1505        &mut self,
1506        parser_path: &Path,
1507        set_current_path_config: bool,
1508    ) -> LoaderResult<&[LanguageConfiguration]> {
1509        let initial_language_configuration_count = self.language_configurations.len();
1510
1511        match TreeSitterJSON::from_file(parser_path) {
1512            Ok(config) => {
1513                let language_count = self.languages_by_id.len();
1514                for grammar in config.grammars {
1515                    // Determine the path to the parser directory. This can be specified in
1516                    // the tree-sitter.json, but defaults to the directory containing the
1517                    // tree-sitter.json.
1518                    let language_path =
1519                        parser_path.join(grammar.path.unwrap_or(PathBuf::from(".")));
1520
1521                    // Determine if a previous language configuration in this package.json file
1522                    // already uses the same language.
1523                    let mut language_id = None;
1524                    for (id, (path, _, _)) in
1525                        self.languages_by_id.iter().enumerate().skip(language_count)
1526                    {
1527                        if language_path == *path {
1528                            language_id = Some(id);
1529                        }
1530                    }
1531
1532                    // If not, add a new language path to the list.
1533                    let language_id = if let Some(language_id) = language_id {
1534                        language_id
1535                    } else {
1536                        self.languages_by_id.push((
1537                            language_path,
1538                            OnceCell::new(),
1539                            grammar
1540                                .external_files
1541                                .clone()
1542                                .into_vec()
1543                                .map(|files| {
1544                                    files
1545                                        .into_iter()
1546                                        .map(|path| {
1547                                            let path = parser_path.join(path);
1548                                            // prevent p being above/outside of parser_path
1549                                            if path.starts_with(parser_path) {
1550                                                Ok(path)
1551                                            } else {
1552                                                Err(LoaderError::ExternalFile(
1553                                                    path.to_string_lossy().to_string(),
1554                                                    parser_path.to_string_lossy().to_string(),
1555                                                ))
1556                                            }
1557                                        })
1558                                        .collect::<LoaderResult<Vec<_>>>()
1559                                })
1560                                .transpose()?,
1561                        ));
1562                        self.languages_by_id.len() - 1
1563                    };
1564
1565                    let configuration = LanguageConfiguration {
1566                        root_path: parser_path.to_path_buf(),
1567                        language_name: grammar.name,
1568                        scope: Some(grammar.scope),
1569                        language_id,
1570                        file_types: grammar.file_types.unwrap_or_default(),
1571                        content_regex: Self::regex(grammar.content_regex.as_deref()),
1572                        first_line_regex: Self::regex(grammar.first_line_regex.as_deref()),
1573                        injection_regex: Self::regex(grammar.injection_regex.as_deref()),
1574                        injections_filenames: grammar.injections.into_vec(),
1575                        locals_filenames: grammar.locals.into_vec(),
1576                        tags_filenames: grammar.tags.into_vec(),
1577                        highlights_filenames: grammar.highlights.into_vec(),
1578                        #[cfg(feature = "tree-sitter-highlight")]
1579                        highlight_config: OnceCell::new(),
1580                        #[cfg(feature = "tree-sitter-tags")]
1581                        tags_config: OnceCell::new(),
1582                        #[cfg(feature = "tree-sitter-highlight")]
1583                        highlight_names: &self.highlight_names,
1584                        #[cfg(feature = "tree-sitter-highlight")]
1585                        use_all_highlight_names: self.use_all_highlight_names,
1586                        _phantom: PhantomData,
1587                    };
1588
1589                    for file_type in &configuration.file_types {
1590                        self.language_configuration_ids_by_file_type
1591                            .entry(file_type.clone())
1592                            .or_default()
1593                            .push(self.language_configurations.len());
1594                    }
1595                    if let Some(first_line_regex) = &configuration.first_line_regex {
1596                        self.language_configuration_ids_by_first_line_regex
1597                            .entry(first_line_regex.to_string())
1598                            .or_default()
1599                            .push(self.language_configurations.len());
1600                    }
1601
1602                    self.language_configurations.push(unsafe {
1603                        mem::transmute::<LanguageConfiguration<'_>, LanguageConfiguration<'static>>(
1604                            configuration,
1605                        )
1606                    });
1607
1608                    if set_current_path_config
1609                        && self.language_configuration_in_current_path.is_none()
1610                    {
1611                        self.language_configuration_in_current_path =
1612                            Some(self.language_configurations.len() - 1);
1613                    }
1614                }
1615            }
1616            Err(LoaderError::Serialization(e)) => {
1617                warn!(
1618                    "Failed to parse {} -- {e}",
1619                    parser_path.join("tree-sitter.json").display()
1620                );
1621            }
1622            _ => {}
1623        }
1624
1625        // If we didn't find any language configurations in the tree-sitter.json file,
1626        // but there is a grammar.json file, then use the grammar file to form a simple
1627        // language configuration.
1628        if self.language_configurations.len() == initial_language_configuration_count
1629            && parser_path.join("src").join("grammar.json").exists()
1630        {
1631            let grammar_path = parser_path.join("src").join("grammar.json");
1632            let language_name = Self::grammar_json_name(&grammar_path)?;
1633            let configuration = LanguageConfiguration {
1634                root_path: parser_path.to_owned(),
1635                language_name,
1636                language_id: self.languages_by_id.len(),
1637                file_types: Vec::new(),
1638                scope: None,
1639                content_regex: None,
1640                first_line_regex: None,
1641                injection_regex: None,
1642                injections_filenames: None,
1643                locals_filenames: None,
1644                highlights_filenames: None,
1645                tags_filenames: None,
1646                #[cfg(feature = "tree-sitter-highlight")]
1647                highlight_config: OnceCell::new(),
1648                #[cfg(feature = "tree-sitter-tags")]
1649                tags_config: OnceCell::new(),
1650                #[cfg(feature = "tree-sitter-highlight")]
1651                highlight_names: &self.highlight_names,
1652                #[cfg(feature = "tree-sitter-highlight")]
1653                use_all_highlight_names: self.use_all_highlight_names,
1654                _phantom: PhantomData,
1655            };
1656            self.language_configurations.push(unsafe {
1657                mem::transmute::<LanguageConfiguration<'_>, LanguageConfiguration<'static>>(
1658                    configuration,
1659                )
1660            });
1661            self.languages_by_id
1662                .push((parser_path.to_owned(), OnceCell::new(), None));
1663        }
1664
1665        Ok(&self.language_configurations[initial_language_configuration_count..])
1666    }
1667
1668    fn regex(pattern: Option<&str>) -> Option<Regex> {
1669        pattern.and_then(|r| RegexBuilder::new(r).multi_line(true).build().ok())
1670    }
1671
1672    fn grammar_json_name(grammar_path: &Path) -> LoaderResult<String> {
1673        let file = fs::File::open(grammar_path)
1674            .map_err(|e| LoaderError::IO(IoError::new(e, Some(grammar_path))))?;
1675
1676        let first_three_lines = BufReader::new(file)
1677            .lines()
1678            .take(3)
1679            .collect::<Result<Vec<_>, std::io::Error>>()
1680            .map_err(|_| LoaderError::GrammarJSON(grammar_path.to_string_lossy().to_string()))?
1681            .join("\n");
1682
1683        let name = GRAMMAR_NAME_REGEX
1684            .captures(&first_three_lines)
1685            .and_then(|c| c.get(1))
1686            .ok_or_else(|| LoaderError::GrammarJSON(grammar_path.to_string_lossy().to_string()))?;
1687
1688        Ok(name.as_str().to_string())
1689    }
1690
1691    pub fn select_language(
1692        &mut self,
1693        path: Option<&Path>,
1694        current_dir: &Path,
1695        scope: Option<&str>,
1696        // path to dynamic library, name of language
1697        lib_info: Option<&(PathBuf, &str)>,
1698    ) -> LoaderResult<Language> {
1699        if let Some((ref lib_path, language_name)) = lib_info {
1700            let language_fn_name = format!("tree_sitter_{}", language_name.replace('-', "_"));
1701            Self::load_language(lib_path, &language_fn_name)
1702        } else if let Some(scope) = scope {
1703            if let Some(config) = self
1704                .language_configuration_for_scope(scope)
1705                .map_err(|e| LoaderError::ScopeLoad(scope.to_string(), Box::new(e)))?
1706            {
1707                Ok(config.0)
1708            } else {
1709                Err(LoaderError::UnknownScope(scope.to_string()))
1710            }
1711        } else if let Some((lang, _)) = if let Some(path) = path {
1712            self.language_configuration_for_file_name(path)
1713                .map_err(|e| {
1714                    LoaderError::FileNameLoad(
1715                        path.file_name().unwrap().to_string_lossy().to_string(),
1716                        Box::new(e),
1717                    )
1718                })?
1719        } else {
1720            None
1721        } {
1722            Ok(lang)
1723        } else if let Some(id) = self.language_configuration_in_current_path {
1724            Ok(self.language_for_id(self.language_configurations[id].language_id)?)
1725        } else if let Some(lang) = self
1726            .languages_at_path(current_dir)
1727            .map_err(|e| LoaderError::CurrentDirectoryLoad(Box::new(e)))?
1728            .first()
1729            .cloned()
1730        {
1731            Ok(lang.0)
1732        } else if let Some(lang) = if let Some(path) = path {
1733            self.language_configuration_for_first_line_regex(path)?
1734        } else {
1735            None
1736        } {
1737            Ok(lang.0)
1738        } else {
1739            Err(LoaderError::NoLanguage)
1740        }
1741    }
1742
1743    pub const fn debug_build(&mut self, flag: bool) {
1744        self.debug_build = flag;
1745    }
1746
1747    pub const fn sanitize_build(&mut self, flag: bool) {
1748        self.sanitize_build = flag;
1749    }
1750
1751    pub const fn force_rebuild(&mut self, rebuild: bool) {
1752        self.force_rebuild = rebuild;
1753    }
1754
1755    #[cfg(feature = "wasm")]
1756    #[cfg_attr(docsrs, doc(cfg(feature = "wasm")))]
1757    pub fn use_wasm(&mut self, engine: &tree_sitter::wasmtime::Engine) {
1758        *self.wasm_store.lock().unwrap() = Some(tree_sitter::WasmStore::new(engine).unwrap());
1759    }
1760
1761    #[must_use]
1762    pub fn get_scanner_path(&self, src_path: &Path) -> Option<PathBuf> {
1763        let path = src_path.join("scanner.c");
1764        path.exists().then_some(path)
1765    }
1766}
1767
1768impl LanguageConfiguration<'_> {
1769    #[cfg(feature = "tree-sitter-highlight")]
1770    pub fn highlight_config(
1771        &self,
1772        language: Language,
1773        paths: Option<&[PathBuf]>,
1774    ) -> LoaderResult<Option<&HighlightConfiguration>> {
1775        let (highlights_filenames, injections_filenames, locals_filenames) = match paths {
1776            Some(paths) => (
1777                Some(
1778                    paths
1779                        .iter()
1780                        .filter(|p| p.ends_with(DEFAULT_HIGHLIGHTS_QUERY_FILE_NAME))
1781                        .cloned()
1782                        .collect::<Vec<_>>(),
1783                ),
1784                Some(
1785                    paths
1786                        .iter()
1787                        .filter(|p| p.ends_with(DEFAULT_TAGS_QUERY_FILE_NAME))
1788                        .cloned()
1789                        .collect::<Vec<_>>(),
1790                ),
1791                Some(
1792                    paths
1793                        .iter()
1794                        .filter(|p| p.ends_with(DEFAULT_LOCALS_QUERY_FILE_NAME))
1795                        .cloned()
1796                        .collect::<Vec<_>>(),
1797                ),
1798            ),
1799            None => (None, None, None),
1800        };
1801        self.highlight_config
1802            .get_or_try_init(|| {
1803                let (highlights_query, highlight_ranges) = self.read_queries(
1804                    if highlights_filenames.is_some() {
1805                        highlights_filenames.as_deref()
1806                    } else {
1807                        self.highlights_filenames.as_deref()
1808                    },
1809                    DEFAULT_HIGHLIGHTS_QUERY_FILE_NAME,
1810                )?;
1811                let (injections_query, injection_ranges) = self.read_queries(
1812                    if injections_filenames.is_some() {
1813                        injections_filenames.as_deref()
1814                    } else {
1815                        self.injections_filenames.as_deref()
1816                    },
1817                    DEFAULT_INJECTIONS_QUERY_FILE_NAME,
1818                )?;
1819                let (locals_query, locals_ranges) = self.read_queries(
1820                    if locals_filenames.is_some() {
1821                        locals_filenames.as_deref()
1822                    } else {
1823                        self.locals_filenames.as_deref()
1824                    },
1825                    DEFAULT_LOCALS_QUERY_FILE_NAME,
1826                )?;
1827
1828                if highlights_query.is_empty() {
1829                    Ok(None)
1830                } else {
1831                    let mut result = HighlightConfiguration::new(
1832                        language,
1833                        &self.language_name,
1834                        &highlights_query,
1835                        &injections_query,
1836                        &locals_query,
1837                    )
1838                    .map_err(|error| match error.kind {
1839                        QueryErrorKind::Language => {
1840                            LoaderError::Query(LoaderQueryError { error, file: None })
1841                        }
1842                        _ => {
1843                            if error.offset < injections_query.len() {
1844                                Self::include_path_in_query_error(
1845                                    error,
1846                                    &injection_ranges,
1847                                    &injections_query,
1848                                    0,
1849                                )
1850                            } else if error.offset < injections_query.len() + locals_query.len() {
1851                                Self::include_path_in_query_error(
1852                                    error,
1853                                    &locals_ranges,
1854                                    &locals_query,
1855                                    injections_query.len(),
1856                                )
1857                            } else {
1858                                Self::include_path_in_query_error(
1859                                    error,
1860                                    &highlight_ranges,
1861                                    &highlights_query,
1862                                    injections_query.len() + locals_query.len(),
1863                                )
1864                            }
1865                        }
1866                    })?;
1867                    let mut all_highlight_names = self.highlight_names.lock().unwrap();
1868                    if self.use_all_highlight_names {
1869                        for capture_name in result.query.capture_names() {
1870                            if !all_highlight_names.iter().any(|x| x == capture_name) {
1871                                all_highlight_names.push((*capture_name).to_string());
1872                            }
1873                        }
1874                    }
1875                    result.configure(all_highlight_names.as_slice());
1876                    drop(all_highlight_names);
1877                    Ok(Some(result))
1878                }
1879            })
1880            .map(Option::as_ref)
1881    }
1882
1883    #[cfg(feature = "tree-sitter-tags")]
1884    pub fn tags_config(&self, language: Language) -> LoaderResult<Option<&TagsConfiguration>> {
1885        self.tags_config
1886            .get_or_try_init(|| {
1887                let (tags_query, tags_ranges) = self
1888                    .read_queries(self.tags_filenames.as_deref(), DEFAULT_TAGS_QUERY_FILE_NAME)?;
1889                let (locals_query, locals_ranges) = self.read_queries(
1890                    self.locals_filenames.as_deref(),
1891                    DEFAULT_LOCALS_QUERY_FILE_NAME,
1892                )?;
1893                if tags_query.is_empty() {
1894                    Ok(None)
1895                } else {
1896                    TagsConfiguration::new(language, &tags_query, &locals_query)
1897                        .map(Some)
1898                        .map_err(|error| {
1899                            if let TagsError::Query(error) = error {
1900                                if error.offset < locals_query.len() {
1901                                    Self::include_path_in_query_error(
1902                                        error,
1903                                        &locals_ranges,
1904                                        &locals_query,
1905                                        0,
1906                                    )
1907                                } else {
1908                                    Self::include_path_in_query_error(
1909                                        error,
1910                                        &tags_ranges,
1911                                        &tags_query,
1912                                        locals_query.len(),
1913                                    )
1914                                }
1915                            } else {
1916                                error.into()
1917                            }
1918                        })
1919                }
1920            })
1921            .map(Option::as_ref)
1922    }
1923
1924    #[cfg(any(feature = "tree-sitter-highlight", feature = "tree-sitter-tags"))]
1925    fn include_path_in_query_error(
1926        mut error: QueryError,
1927        ranges: &[(PathBuf, Range<usize>)],
1928        source: &str,
1929        start_offset: usize,
1930    ) -> LoaderError {
1931        let offset_within_section = error.offset - start_offset;
1932        let (path, range) = ranges
1933            .iter()
1934            .find(|(_, range)| range.contains(&offset_within_section))
1935            .unwrap_or_else(|| ranges.last().unwrap());
1936        error.offset = offset_within_section - range.start;
1937        error.row = source[range.start..offset_within_section]
1938            .matches('\n')
1939            .count();
1940        LoaderError::Query(LoaderQueryError {
1941            error,
1942            file: Some(path.to_string_lossy().to_string()),
1943        })
1944    }
1945
1946    #[allow(clippy::type_complexity)]
1947    #[cfg(any(feature = "tree-sitter-highlight", feature = "tree-sitter-tags"))]
1948    fn read_queries(
1949        &self,
1950        paths: Option<&[PathBuf]>,
1951        default_path: &str,
1952    ) -> LoaderResult<(String, Vec<(PathBuf, Range<usize>)>)> {
1953        let mut query = String::new();
1954        let mut path_ranges = Vec::new();
1955        if let Some(paths) = paths {
1956            for path in paths {
1957                let abs_path = self.root_path.join(path);
1958                let prev_query_len = query.len();
1959                query += &fs::read_to_string(&abs_path)
1960                    .map_err(|e| LoaderError::IO(IoError::new(e, Some(abs_path.as_path()))))?;
1961                path_ranges.push((path.clone(), prev_query_len..query.len()));
1962            }
1963        } else {
1964            // highlights.scm is needed to test highlights, and tags.scm to test tags
1965            if default_path == DEFAULT_HIGHLIGHTS_QUERY_FILE_NAME
1966                || default_path == DEFAULT_TAGS_QUERY_FILE_NAME
1967            {
1968                warn!(
1969                    concat!(
1970                        "You should add a `{}` entry pointing to the {} path in the `tree-sitter` ",
1971                        "object in the grammar's tree-sitter.json file. See more here: ",
1972                        "https://tree-sitter.github.io/tree-sitter/3-syntax-highlighting#query-paths"
1973                    ),
1974                    default_path.replace(".scm", ""),
1975                    default_path
1976                );
1977            }
1978            let queries_path = self.root_path.join("queries");
1979            let path = queries_path.join(default_path);
1980            if path.exists() {
1981                query = fs::read_to_string(&path)
1982                    .map_err(|e| LoaderError::IO(IoError::new(e, Some(path.as_path()))))?;
1983                path_ranges.push((PathBuf::from(default_path), 0..query.len()));
1984            }
1985        }
1986
1987        Ok((query, path_ranges))
1988    }
1989}
1990
1991fn needs_recompile(lib_path: &Path, paths_to_check: &[PathBuf]) -> LoaderResult<bool> {
1992    if !lib_path.exists() {
1993        return Ok(true);
1994    }
1995    let lib_mtime = mtime(lib_path).map_err(|e| LoaderError::ModifiedTime(Box::new(e)))?;
1996    for path in paths_to_check {
1997        if mtime(path).map_err(|e| LoaderError::ModifiedTime(Box::new(e)))? > lib_mtime {
1998            return Ok(true);
1999        }
2000    }
2001    Ok(false)
2002}
2003
2004fn mtime(path: &Path) -> LoaderResult<SystemTime> {
2005    fs::metadata(path)
2006        .map_err(|e| LoaderError::IO(IoError::new(e, Some(path))))?
2007        .modified()
2008        .map_err(|e| LoaderError::IO(IoError::new(e, Some(path))))
2009}