Skip to main content

tree_sitter_loader/
loader.rs

1#![cfg_attr(not(any(test, doctest)), doc = include_str!("../README.md"))]
2#![cfg_attr(docsrs, feature(doc_cfg))]
3
4#[cfg(unix)]
5use std::fmt::Write as _;
6#[cfg(any(feature = "tree-sitter-highlight", feature = "tree-sitter-tags"))]
7use std::ops::Range;
8#[cfg(feature = "tree-sitter-highlight")]
9use std::sync::Mutex;
10use std::{
11    collections::HashMap,
12    env, fs,
13    hash::{Hash as _, Hasher as _},
14    io::{BufRead, BufReader},
15    marker::PhantomData,
16    mem,
17    path::{Path, PathBuf},
18    process::Command,
19    sync::LazyLock,
20    time::{SystemTime, SystemTimeError},
21};
22
23use etcetera::BaseStrategy as _;
24use fs4::fs_std::FileExt;
25use libloading::{Library, Symbol};
26use log::{error, info, warn};
27use once_cell::unsync::OnceCell;
28use regex::{Regex, RegexBuilder};
29use semver::Version;
30use serde::{Deserialize, Deserializer, Serialize};
31use thiserror::Error;
32use tree_sitter::Language;
33#[cfg(any(feature = "tree-sitter-highlight", feature = "tree-sitter-tags"))]
34use tree_sitter::QueryError;
35#[cfg(feature = "tree-sitter-highlight")]
36use tree_sitter::QueryErrorKind;
37#[cfg(feature = "wasm")]
38use tree_sitter::WasmError;
39#[cfg(feature = "tree-sitter-highlight")]
40use tree_sitter_highlight::HighlightConfiguration;
41#[cfg(feature = "tree-sitter-tags")]
42use tree_sitter_tags::{Error as TagsError, TagsConfiguration};
43
44static GRAMMAR_NAME_REGEX: LazyLock<Regex> =
45    LazyLock::new(|| Regex::new(r#""name":\s*"(.*?)""#).unwrap());
46
47const WASI_SDK_VERSION: &str = include_str!("../wasi-sdk-version").trim_ascii();
48
49pub type LoaderResult<T> = Result<T, LoaderError>;
50
51#[derive(Debug, Error)]
52pub enum LoaderError {
53    #[error(transparent)]
54    Compiler(CompilerError),
55    #[error("Parser compilation failed.\nStdout: {0}\nStderr: {1}")]
56    Compilation(String, String),
57    #[error("Failed to execute curl for {0} -- {1}")]
58    Curl(String, std::io::Error),
59    #[error("Failed to load language in current directory:\n{0}")]
60    CurrentDirectoryLoad(Box<Self>),
61    #[error("External file path {0} is outside of parser directory {1}")]
62    ExternalFile(String, String),
63    #[error("Failed to extract archive {0} to {1}")]
64    Extraction(String, String),
65    #[error("Failed to load language for file name {0}:\n{1}")]
66    FileNameLoad(String, Box<Self>),
67    #[error("Failed to parse the language name from grammar.json at {0}")]
68    GrammarJSON(String),
69    #[error(transparent)]
70    HomeDir(#[from] etcetera::HomeDirError),
71    #[error(transparent)]
72    IO(IoError),
73    #[error(transparent)]
74    Library(LibraryError),
75    #[error("Failed to compare binary and source timestamps:\n{0}")]
76    ModifiedTime(Box<Self>),
77    #[error("No language found")]
78    NoLanguage,
79    #[error(transparent)]
80    Query(LoaderQueryError),
81    #[error("Failed to load language for scope '{0}':\n{1}")]
82    ScopeLoad(String, Box<Self>),
83    #[error(transparent)]
84    Serialization(#[from] serde_json::Error),
85    #[error(transparent)]
86    Symbol(SymbolError),
87    #[error(transparent)]
88    Tags(#[from] TagsError),
89    #[error("Failed to execute tar for {0} -- {1}")]
90    Tar(String, std::io::Error),
91    #[error(transparent)]
92    Time(#[from] SystemTimeError),
93    #[error("Unknown scope '{0}'")]
94    UnknownScope(String),
95    #[error("Failed to download wasi-sdk from {0}")]
96    WasiSDKDownload(String),
97    #[error(transparent)]
98    WasiSDKClang(#[from] WasiSDKClangError),
99    #[error("Unsupported platform for wasi-sdk")]
100    WasiSDKPlatform,
101    #[cfg(feature = "wasm")]
102    #[error(transparent)]
103    Wasm(#[from] WasmError),
104    #[error("Failed to run wasi-sdk clang -- {0}")]
105    WasmCompiler(std::io::Error),
106    #[error("wasi-sdk clang command failed: {0}")]
107    WasmCompilation(String),
108}
109
110#[derive(Debug, Error)]
111pub struct CompilerError {
112    pub error: std::io::Error,
113    pub command: Box<Command>,
114}
115
116impl std::fmt::Display for CompilerError {
117    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
118        write!(
119            f,
120            "Failed to execute the C compiler with the following command:\n{:?}\nError: {}",
121            *self.command, self.error
122        )?;
123        Ok(())
124    }
125}
126
127#[derive(Debug, Error)]
128pub struct IoError {
129    pub error: std::io::Error,
130    pub path: Option<String>,
131}
132
133impl IoError {
134    fn new(error: std::io::Error, path: Option<&Path>) -> Self {
135        Self {
136            error,
137            path: path.map(|p| p.to_string_lossy().to_string()),
138        }
139    }
140}
141
142impl std::fmt::Display for IoError {
143    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
144        write!(f, "{}", self.error)?;
145        if let Some(ref path) = self.path {
146            write!(f, " ({path})")?;
147        }
148        Ok(())
149    }
150}
151
152#[derive(Debug, Error)]
153pub struct LibraryError {
154    pub error: libloading::Error,
155    pub path: String,
156}
157
158impl std::fmt::Display for LibraryError {
159    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
160        write!(
161            f,
162            "Error opening dynamic library {} -- {}",
163            self.path, self.error
164        )?;
165        Ok(())
166    }
167}
168
169#[derive(Debug, Error)]
170pub struct LoaderQueryError {
171    pub error: QueryError,
172    pub file: Option<String>,
173}
174
175impl std::fmt::Display for LoaderQueryError {
176    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
177        if let Some(ref path) = self.file {
178            writeln!(f, "Error in query file {path}:")?;
179        }
180        write!(f, "{}", self.error)?;
181        Ok(())
182    }
183}
184
185#[derive(Debug, Error)]
186pub struct SymbolError {
187    pub error: libloading::Error,
188    pub symbol_name: String,
189    pub path: String,
190}
191
192impl std::fmt::Display for SymbolError {
193    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
194        write!(
195            f,
196            "Failed to load symbol {} from {} -- {}",
197            self.symbol_name, self.path, self.error
198        )?;
199        Ok(())
200    }
201}
202
203#[derive(Debug, Error)]
204pub struct WasiSDKClangError {
205    pub wasi_sdk_dir: String,
206    pub possible_executables: Vec<&'static str>,
207    pub download: bool,
208}
209
210impl std::fmt::Display for WasiSDKClangError {
211    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
212        if self.download {
213            write!(
214                f,
215                "Failed to find clang executable in downloaded wasi-sdk at '{}'.",
216                self.wasi_sdk_dir
217            )?;
218        } else {
219            write!(f, "TREE_SITTER_WASI_SDK_PATH is set to '{}', but no clang executable found in 'bin/' directory.", self.wasi_sdk_dir)?;
220        }
221
222        let possible_exes = self.possible_executables.join(", ");
223        write!(f, " Looked for: {possible_exes}.")?;
224
225        Ok(())
226    }
227}
228
229pub const DEFAULT_HIGHLIGHTS_QUERY_FILE_NAME: &str = "highlights.scm";
230
231pub const DEFAULT_INJECTIONS_QUERY_FILE_NAME: &str = "injections.scm";
232
233pub const DEFAULT_LOCALS_QUERY_FILE_NAME: &str = "locals.scm";
234
235pub const DEFAULT_TAGS_QUERY_FILE_NAME: &str = "tags.scm";
236
237#[derive(Default, Deserialize, Serialize)]
238pub struct Config {
239    #[serde(default)]
240    #[serde(
241        rename = "parser-directories",
242        deserialize_with = "deserialize_parser_directories"
243    )]
244    pub parser_directories: Vec<PathBuf>,
245}
246
247#[derive(Serialize, Deserialize, Clone, Default)]
248#[serde(untagged)]
249pub enum PathsJSON {
250    #[default]
251    Empty,
252    Single(PathBuf),
253    Multiple(Vec<PathBuf>),
254}
255
256impl PathsJSON {
257    fn into_vec(self) -> Option<Vec<PathBuf>> {
258        match self {
259            Self::Empty => None,
260            Self::Single(s) => Some(vec![s]),
261            Self::Multiple(s) => Some(s),
262        }
263    }
264
265    const fn is_empty(&self) -> bool {
266        matches!(self, Self::Empty)
267    }
268
269    /// Represent this set of paths as a string that can be included in templates
270    #[must_use]
271    pub fn to_variable_value<'a>(&'a self, default: &'a PathBuf) -> &'a str {
272        match self {
273            Self::Empty => Some(default),
274            Self::Single(path_buf) => Some(path_buf),
275            Self::Multiple(paths) => paths.first(),
276        }
277        .map_or("", |path| path.as_os_str().to_str().unwrap_or(""))
278    }
279}
280
281#[derive(Serialize, Deserialize, Clone)]
282#[serde(untagged)]
283pub enum PackageJSONAuthor {
284    String(String),
285    Object {
286        name: String,
287        email: Option<String>,
288        url: Option<String>,
289    },
290}
291
292#[derive(Serialize, Deserialize, Clone)]
293#[serde(untagged)]
294pub enum PackageJSONRepository {
295    String(String),
296    Object { url: String },
297}
298
299#[derive(Serialize, Deserialize)]
300pub struct PackageJSON {
301    pub name: String,
302    pub version: Version,
303    pub description: Option<String>,
304    pub author: Option<PackageJSONAuthor>,
305    pub maintainers: Option<Vec<PackageJSONAuthor>>,
306    pub license: Option<String>,
307    pub repository: Option<PackageJSONRepository>,
308    #[serde(default)]
309    #[serde(rename = "tree-sitter", skip_serializing_if = "Option::is_none")]
310    pub tree_sitter: Option<Vec<LanguageConfigurationJSON>>,
311}
312
313fn default_path() -> PathBuf {
314    PathBuf::from(".")
315}
316
317#[derive(Serialize, Deserialize, Clone)]
318#[serde(rename_all = "kebab-case")]
319pub struct LanguageConfigurationJSON {
320    #[serde(default = "default_path")]
321    pub path: PathBuf,
322    pub scope: Option<String>,
323    pub file_types: Option<Vec<String>>,
324    pub content_regex: Option<String>,
325    pub first_line_regex: Option<String>,
326    pub injection_regex: Option<String>,
327    #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
328    pub highlights: PathsJSON,
329    #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
330    pub injections: PathsJSON,
331    #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
332    pub locals: PathsJSON,
333    #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
334    pub tags: PathsJSON,
335    #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
336    pub external_files: PathsJSON,
337}
338
339#[derive(Serialize, Deserialize)]
340#[serde(rename_all = "kebab-case")]
341pub struct TreeSitterJSON {
342    #[serde(rename = "$schema")]
343    pub schema: Option<String>,
344    pub grammars: Vec<Grammar>,
345    pub metadata: Metadata,
346    #[serde(default)]
347    pub bindings: Bindings,
348}
349
350impl TreeSitterJSON {
351    pub fn from_file(path: &Path) -> LoaderResult<Self> {
352        let path = path.join("tree-sitter.json");
353        Ok(serde_json::from_str(&fs::read_to_string(&path).map_err(
354            |e| LoaderError::IO(IoError::new(e, Some(path.as_path()))),
355        )?)?)
356    }
357
358    #[must_use]
359    pub fn has_multiple_language_configs(&self) -> bool {
360        self.grammars.len() > 1
361    }
362}
363
364#[derive(Serialize, Deserialize)]
365#[serde(rename_all = "kebab-case")]
366pub struct Grammar {
367    pub name: String,
368    #[serde(skip_serializing_if = "Option::is_none")]
369    pub camelcase: Option<String>,
370    #[serde(skip_serializing_if = "Option::is_none")]
371    pub title: Option<String>,
372    pub scope: String,
373    #[serde(skip_serializing_if = "Option::is_none")]
374    pub path: Option<PathBuf>,
375    #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
376    pub external_files: PathsJSON,
377    pub file_types: Option<Vec<String>>,
378    #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
379    pub highlights: PathsJSON,
380    #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
381    pub injections: PathsJSON,
382    #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
383    pub locals: PathsJSON,
384    #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
385    pub tags: PathsJSON,
386    #[serde(skip_serializing_if = "Option::is_none")]
387    pub injection_regex: Option<String>,
388    #[serde(skip_serializing_if = "Option::is_none")]
389    pub first_line_regex: Option<String>,
390    #[serde(skip_serializing_if = "Option::is_none")]
391    pub content_regex: Option<String>,
392    #[serde(skip_serializing_if = "Option::is_none")]
393    pub class_name: Option<String>,
394}
395
396#[derive(Serialize, Deserialize)]
397pub struct Metadata {
398    pub version: Version,
399    #[serde(skip_serializing_if = "Option::is_none")]
400    pub license: Option<String>,
401    #[serde(skip_serializing_if = "Option::is_none")]
402    pub description: Option<String>,
403    #[serde(skip_serializing_if = "Option::is_none")]
404    pub authors: Option<Vec<Author>>,
405    #[serde(skip_serializing_if = "Option::is_none")]
406    pub links: Option<Links>,
407    #[serde(skip)]
408    pub namespace: Option<String>,
409}
410
411#[derive(Serialize, Deserialize)]
412pub struct Author {
413    pub name: String,
414    #[serde(skip_serializing_if = "Option::is_none")]
415    pub email: Option<String>,
416    #[serde(skip_serializing_if = "Option::is_none")]
417    pub url: Option<String>,
418}
419
420#[derive(Serialize, Deserialize)]
421pub struct Links {
422    pub repository: String,
423    #[serde(skip_serializing_if = "Option::is_none")]
424    pub funding: Option<String>,
425}
426
427#[derive(Serialize, Deserialize, Clone)]
428#[serde(default)]
429pub struct Bindings {
430    pub c: bool,
431    pub go: bool,
432    pub java: bool,
433    #[serde(skip)]
434    pub kotlin: bool,
435    pub node: bool,
436    pub python: bool,
437    pub rust: bool,
438    pub swift: bool,
439    pub zig: bool,
440}
441
442impl Bindings {
443    /// return available languages and its default enabled state.
444    #[must_use]
445    pub const fn languages(&self) -> [(&'static str, bool); 8] {
446        [
447            ("c", true),
448            ("go", true),
449            ("java", false),
450            // Comment out Kotlin until the bindings are actually available.
451            // ("kotlin", false),
452            ("node", true),
453            ("python", true),
454            ("rust", true),
455            ("swift", true),
456            ("zig", false),
457        ]
458    }
459
460    /// construct Bindings from a language list. If a language isn't supported, its name will be put on the error part.
461    pub fn with_enabled_languages<'a, I>(languages: I) -> Result<Self, &'a str>
462    where
463        I: Iterator<Item = &'a str>,
464    {
465        let mut out = Self {
466            c: false,
467            go: false,
468            java: false,
469            kotlin: false,
470            node: false,
471            python: false,
472            rust: false,
473            swift: false,
474            zig: false,
475        };
476
477        for v in languages {
478            match v {
479                "c" => out.c = true,
480                "go" => out.go = true,
481                "java" => out.java = true,
482                // Comment out Kotlin until the bindings are actually available.
483                // "kotlin" => out.kotlin = true,
484                "node" => out.node = true,
485                "python" => out.python = true,
486                "rust" => out.rust = true,
487                "swift" => out.swift = true,
488                "zig" => out.zig = true,
489                unsupported => return Err(unsupported),
490            }
491        }
492
493        Ok(out)
494    }
495}
496
497impl Default for Bindings {
498    fn default() -> Self {
499        Self {
500            c: true,
501            go: true,
502            java: false,
503            kotlin: false,
504            node: true,
505            python: true,
506            rust: true,
507            swift: true,
508            zig: false,
509        }
510    }
511}
512
513// Replace `~` or `$HOME` with home path string.
514// (While paths like "~/.tree-sitter/config.json" can be deserialized,
515// they're not valid path for I/O modules.)
516fn deserialize_parser_directories<'de, D>(deserializer: D) -> Result<Vec<PathBuf>, D::Error>
517where
518    D: Deserializer<'de>,
519{
520    let paths = Vec::<PathBuf>::deserialize(deserializer)?;
521    let Ok(home) = etcetera::home_dir() else {
522        return Ok(paths);
523    };
524    let standardized = paths
525        .into_iter()
526        .map(|path| standardize_path(path, &home))
527        .collect();
528    Ok(standardized)
529}
530
531fn standardize_path(path: PathBuf, home: &Path) -> PathBuf {
532    if let Ok(p) = path.strip_prefix("~") {
533        return home.join(p);
534    }
535    if let Ok(p) = path.strip_prefix("$HOME") {
536        return home.join(p);
537    }
538    path
539}
540
541impl Config {
542    #[must_use]
543    pub fn initial() -> Self {
544        let home_dir = etcetera::home_dir().expect("Cannot determine home directory");
545        Self {
546            parser_directories: vec![
547                home_dir.join("github"),
548                home_dir.join("src"),
549                home_dir.join("source"),
550                home_dir.join("projects"),
551                home_dir.join("dev"),
552                home_dir.join("git"),
553            ],
554        }
555    }
556}
557
558const BUILD_TARGET: &str = env!("BUILD_TARGET");
559
560pub struct LanguageConfiguration<'a> {
561    pub scope: Option<String>,
562    pub content_regex: Option<Regex>,
563    pub first_line_regex: Option<Regex>,
564    pub injection_regex: Option<Regex>,
565    pub file_types: Vec<String>,
566    pub root_path: PathBuf,
567    pub highlights_filenames: Option<Vec<PathBuf>>,
568    pub injections_filenames: Option<Vec<PathBuf>>,
569    pub locals_filenames: Option<Vec<PathBuf>>,
570    pub tags_filenames: Option<Vec<PathBuf>>,
571    pub language_name: String,
572    language_id: usize,
573    #[cfg(feature = "tree-sitter-highlight")]
574    highlight_config: OnceCell<Option<HighlightConfiguration>>,
575    #[cfg(feature = "tree-sitter-tags")]
576    tags_config: OnceCell<Option<TagsConfiguration>>,
577    #[cfg(feature = "tree-sitter-highlight")]
578    highlight_names: &'a Mutex<Vec<String>>,
579    #[cfg(feature = "tree-sitter-highlight")]
580    use_all_highlight_names: bool,
581    _phantom: PhantomData<&'a ()>,
582}
583
584pub struct Loader {
585    pub parser_lib_path: PathBuf,
586    languages_by_id: Vec<(PathBuf, OnceCell<Language>, Option<Vec<PathBuf>>)>,
587    language_configurations: Vec<LanguageConfiguration<'static>>,
588    language_configuration_ids_by_file_type: HashMap<String, Vec<usize>>,
589    language_configuration_in_current_path: Option<usize>,
590    language_configuration_ids_by_first_line_regex: HashMap<String, Vec<usize>>,
591    #[cfg(feature = "tree-sitter-highlight")]
592    highlight_names: Box<Mutex<Vec<String>>>,
593    #[cfg(feature = "tree-sitter-highlight")]
594    use_all_highlight_names: bool,
595    debug_build: bool,
596    sanitize_build: bool,
597    force_rebuild: bool,
598
599    #[cfg(feature = "wasm")]
600    wasm_store: Mutex<Option<tree_sitter::WasmStore>>,
601}
602
603pub struct CompileConfig<'a> {
604    pub src_path: &'a Path,
605    pub header_paths: Vec<&'a Path>,
606    pub parser_path: PathBuf,
607    pub scanner_path: Option<PathBuf>,
608    pub external_files: Option<&'a [PathBuf]>,
609    pub output_path: Option<PathBuf>,
610    pub flags: &'a [&'a str],
611    pub sanitize: bool,
612    pub name: String,
613}
614
615impl<'a> CompileConfig<'a> {
616    #[must_use]
617    pub fn new(
618        src_path: &'a Path,
619        externals: Option<&'a [PathBuf]>,
620        output_path: Option<PathBuf>,
621    ) -> Self {
622        Self {
623            src_path,
624            header_paths: vec![src_path],
625            parser_path: src_path.join("parser.c"),
626            scanner_path: None,
627            external_files: externals,
628            output_path,
629            flags: &[],
630            sanitize: false,
631            name: String::new(),
632        }
633    }
634}
635
636unsafe impl Sync for Loader {}
637
638impl Loader {
639    pub fn new() -> LoaderResult<Self> {
640        let parser_lib_path = if let Ok(path) = env::var("TREE_SITTER_LIBDIR") {
641            PathBuf::from(path)
642        } else {
643            if cfg!(target_os = "macos") {
644                let legacy_apple_path = etcetera::base_strategy::Apple::new()?
645                    .cache_dir() // `$HOME/Library/Caches/`
646                    .join("tree-sitter");
647                if legacy_apple_path.exists() && legacy_apple_path.is_dir() {
648                    std::fs::remove_dir_all(&legacy_apple_path).map_err(|e| {
649                        LoaderError::IO(IoError::new(e, Some(legacy_apple_path.as_path())))
650                    })?;
651                }
652            }
653
654            etcetera::choose_base_strategy()?
655                .cache_dir()
656                .join("tree-sitter")
657                .join("lib")
658        };
659        Ok(Self::with_parser_lib_path(parser_lib_path))
660    }
661
662    #[must_use]
663    pub fn with_parser_lib_path(parser_lib_path: PathBuf) -> Self {
664        Self {
665            parser_lib_path,
666            languages_by_id: Vec::new(),
667            language_configurations: Vec::new(),
668            language_configuration_ids_by_file_type: HashMap::new(),
669            language_configuration_in_current_path: None,
670            language_configuration_ids_by_first_line_regex: HashMap::new(),
671            #[cfg(feature = "tree-sitter-highlight")]
672            highlight_names: Box::new(Mutex::new(Vec::new())),
673            #[cfg(feature = "tree-sitter-highlight")]
674            use_all_highlight_names: true,
675            debug_build: false,
676            sanitize_build: false,
677            force_rebuild: false,
678
679            #[cfg(feature = "wasm")]
680            wasm_store: Mutex::default(),
681        }
682    }
683
684    #[cfg(feature = "tree-sitter-highlight")]
685    #[cfg_attr(docsrs, doc(cfg(feature = "tree-sitter-highlight")))]
686    pub fn configure_highlights(&mut self, names: &[String]) {
687        self.use_all_highlight_names = false;
688        let mut highlights = self.highlight_names.lock().unwrap();
689        highlights.clear();
690        highlights.extend(names.iter().cloned());
691    }
692
693    #[must_use]
694    #[cfg(feature = "tree-sitter-highlight")]
695    #[cfg_attr(docsrs, doc(cfg(feature = "tree-sitter-highlight")))]
696    pub fn highlight_names(&self) -> Vec<String> {
697        self.highlight_names.lock().unwrap().clone()
698    }
699
700    pub fn find_all_languages(&mut self, config: &Config) -> LoaderResult<()> {
701        if config.parser_directories.is_empty() {
702            warn!(concat!(
703                "You have not configured any parser directories!\n",
704                "Please run `tree-sitter init-config` and edit the resulting\n",
705                "configuration file to indicate where we should look for\n",
706                "language grammars.\n"
707            ));
708        }
709        for parser_container_dir in &config.parser_directories {
710            if let Ok(entries) = fs::read_dir(parser_container_dir) {
711                for entry in entries {
712                    let entry = entry.map_err(|e| LoaderError::IO(IoError::new(e, None)))?;
713                    if let Some(parser_dir_name) = entry.file_name().to_str() {
714                        if parser_dir_name.starts_with("tree-sitter-") {
715                            self.find_language_configurations_at_path(
716                                &parser_container_dir.join(parser_dir_name),
717                                false,
718                            )
719                            .ok();
720                        }
721                    }
722                }
723            }
724        }
725        Ok(())
726    }
727
728    pub fn languages_at_path(&mut self, path: &Path) -> LoaderResult<Vec<(Language, String)>> {
729        if let Ok(configurations) = self.find_language_configurations_at_path(path, true) {
730            let mut language_ids = configurations
731                .iter()
732                .map(|c| (c.language_id, c.language_name.clone()))
733                .collect::<Vec<_>>();
734            language_ids.sort_unstable();
735            language_ids.dedup();
736            language_ids
737                .into_iter()
738                .map(|(id, name)| Ok((self.language_for_id(id)?, name)))
739                .collect::<LoaderResult<Vec<_>>>()
740        } else {
741            Ok(Vec::new())
742        }
743    }
744
745    #[must_use]
746    pub fn get_all_language_configurations(&self) -> Vec<(&LanguageConfiguration, &Path)> {
747        self.language_configurations
748            .iter()
749            .map(|c| (c, self.languages_by_id[c.language_id].0.as_ref()))
750            .collect()
751    }
752
753    pub fn language_configuration_for_scope(
754        &self,
755        scope: &str,
756    ) -> LoaderResult<Option<(Language, &LanguageConfiguration)>> {
757        for configuration in &self.language_configurations {
758            if configuration.scope.as_ref().is_some_and(|s| s == scope) {
759                let language = self.language_for_id(configuration.language_id)?;
760                return Ok(Some((language, configuration)));
761            }
762        }
763        Ok(None)
764    }
765
766    pub fn language_configuration_for_first_line_regex(
767        &self,
768        path: &Path,
769    ) -> LoaderResult<Option<(Language, &LanguageConfiguration)>> {
770        self.language_configuration_ids_by_first_line_regex
771            .iter()
772            .try_fold(None, |_, (regex, ids)| {
773                if let Some(regex) = Self::regex(Some(regex)) {
774                    let file = fs::File::open(path)
775                        .map_err(|e| LoaderError::IO(IoError::new(e, Some(path))))?;
776                    let reader = BufReader::new(file);
777                    let first_line = reader
778                        .lines()
779                        .next()
780                        .transpose()
781                        .map_err(|e| LoaderError::IO(IoError::new(e, Some(path))))?;
782                    if let Some(first_line) = first_line {
783                        if regex.is_match(&first_line) && !ids.is_empty() {
784                            let configuration = &self.language_configurations[ids[0]];
785                            let language = self.language_for_id(configuration.language_id)?;
786                            return Ok(Some((language, configuration)));
787                        }
788                    }
789                }
790
791                Ok(None)
792            })
793    }
794
795    pub fn language_configuration_for_file_name(
796        &self,
797        path: &Path,
798    ) -> LoaderResult<Option<(Language, &LanguageConfiguration)>> {
799        // Find all the language configurations that match this file name
800        // or a suffix of the file name.
801        let configuration_ids = path
802            .file_name()
803            .and_then(|n| n.to_str())
804            .and_then(|file_name| self.language_configuration_ids_by_file_type.get(file_name))
805            .or_else(|| {
806                let mut path = path.to_owned();
807                let mut extensions = Vec::with_capacity(2);
808                while let Some(extension) = path.extension() {
809                    extensions.push(extension.to_str()?.to_string());
810                    path = PathBuf::from(path.file_stem()?.to_os_string());
811                }
812                extensions.reverse();
813                self.language_configuration_ids_by_file_type
814                    .get(&extensions.join("."))
815            });
816
817        if let Some(configuration_ids) = configuration_ids {
818            if !configuration_ids.is_empty() {
819                let configuration = if configuration_ids.len() == 1 {
820                    &self.language_configurations[configuration_ids[0]]
821                }
822                // If multiple language configurations match, then determine which
823                // one to use by applying the configurations' content regexes.
824                else {
825                    let file_contents =
826                        fs::read(path).map_err(|e| LoaderError::IO(IoError::new(e, Some(path))))?;
827                    let file_contents = String::from_utf8_lossy(&file_contents);
828                    let mut best_score = -2isize;
829                    let mut best_configuration_id = None;
830                    for configuration_id in configuration_ids {
831                        let config = &self.language_configurations[*configuration_id];
832
833                        // If the language configuration has a content regex, assign
834                        // a score based on the length of the first match.
835                        let score;
836                        if let Some(content_regex) = &config.content_regex {
837                            if let Some(mat) = content_regex.find(&file_contents) {
838                                score = (mat.end() - mat.start()) as isize;
839                            }
840                            // If the content regex does not match, then *penalize* this
841                            // language configuration, so that language configurations
842                            // without content regexes are preferred over those with
843                            // non-matching content regexes.
844                            else {
845                                score = -1;
846                            }
847                        } else {
848                            score = 0;
849                        }
850                        if score > best_score {
851                            best_configuration_id = Some(*configuration_id);
852                            best_score = score;
853                        }
854                    }
855
856                    &self.language_configurations[best_configuration_id.unwrap()]
857                };
858
859                let language = self.language_for_id(configuration.language_id)?;
860                return Ok(Some((language, configuration)));
861            }
862        }
863
864        Ok(None)
865    }
866
867    pub fn language_configuration_for_injection_string(
868        &self,
869        string: &str,
870    ) -> LoaderResult<Option<(Language, &LanguageConfiguration)>> {
871        let mut best_match_length = 0;
872        let mut best_match_position = None;
873        for (i, configuration) in self.language_configurations.iter().enumerate() {
874            if let Some(injection_regex) = &configuration.injection_regex {
875                if let Some(mat) = injection_regex.find(string) {
876                    let length = mat.end() - mat.start();
877                    if length > best_match_length {
878                        best_match_position = Some(i);
879                        best_match_length = length;
880                    }
881                }
882            }
883        }
884
885        if let Some(i) = best_match_position {
886            let configuration = &self.language_configurations[i];
887            let language = self.language_for_id(configuration.language_id)?;
888            Ok(Some((language, configuration)))
889        } else {
890            Ok(None)
891        }
892    }
893
894    pub fn language_for_configuration(
895        &self,
896        configuration: &LanguageConfiguration,
897    ) -> LoaderResult<Language> {
898        self.language_for_id(configuration.language_id)
899    }
900
901    fn language_for_id(&self, id: usize) -> LoaderResult<Language> {
902        let (path, language, externals) = &self.languages_by_id[id];
903        language
904            .get_or_try_init(|| {
905                let src_path = path.join("src");
906                self.load_language_at_path(CompileConfig::new(
907                    &src_path,
908                    externals.as_deref(),
909                    None,
910                ))
911            })
912            .cloned()
913    }
914
915    pub fn compile_parser_at_path(
916        &self,
917        grammar_path: &Path,
918        output_path: PathBuf,
919        flags: &[&str],
920    ) -> LoaderResult<()> {
921        let src_path = grammar_path.join("src");
922        let mut config = CompileConfig::new(&src_path, None, Some(output_path));
923        config.flags = flags;
924        self.load_language_at_path(config).map(|_| ())
925    }
926
927    pub fn load_language_at_path(&self, mut config: CompileConfig) -> LoaderResult<Language> {
928        let grammar_path = config.src_path.join("grammar.json");
929        config.name = Self::grammar_json_name(&grammar_path)?;
930        self.load_language_at_path_with_name(config)
931    }
932
933    pub fn load_language_at_path_with_name(
934        &self,
935        mut config: CompileConfig,
936    ) -> LoaderResult<Language> {
937        let mut lib_name = config.name.clone();
938        let language_fn_name = format!("tree_sitter_{}", config.name.replace('-', "_"));
939        if self.debug_build {
940            lib_name.push_str(".debug._");
941        }
942
943        if self.sanitize_build {
944            lib_name.push_str(".sanitize._");
945            config.sanitize = true;
946        }
947
948        if config.output_path.is_none() {
949            fs::create_dir_all(&self.parser_lib_path).map_err(|e| {
950                LoaderError::IO(IoError::new(e, Some(self.parser_lib_path.as_path())))
951            })?;
952        }
953
954        let mut recompile = self.force_rebuild || config.output_path.is_some(); // if specified, always recompile
955
956        let output_path = config.output_path.unwrap_or_else(|| {
957            let mut path = self.parser_lib_path.join(lib_name);
958            path.set_extension(env::consts::DLL_EXTENSION);
959            #[cfg(feature = "wasm")]
960            if self.wasm_store.lock().unwrap().is_some() {
961                path.set_extension("wasm");
962            }
963            path
964        });
965        config.output_path = Some(output_path.clone());
966
967        let parser_path = config.src_path.join("parser.c");
968        config.scanner_path = self.get_scanner_path(config.src_path);
969
970        let mut paths_to_check = vec![parser_path];
971
972        if let Some(scanner_path) = config.scanner_path.as_ref() {
973            paths_to_check.push(scanner_path.clone());
974        }
975
976        paths_to_check.extend(
977            config
978                .external_files
979                .unwrap_or_default()
980                .iter()
981                .map(|p| config.src_path.join(p)),
982        );
983
984        if !recompile {
985            recompile = needs_recompile(&output_path, &paths_to_check)?;
986        }
987
988        #[cfg(feature = "wasm")]
989        if let Some(wasm_store) = self.wasm_store.lock().unwrap().as_mut() {
990            if recompile {
991                self.compile_parser_to_wasm(
992                    &config.name,
993                    config.src_path,
994                    config
995                        .scanner_path
996                        .as_ref()
997                        .and_then(|p| p.strip_prefix(config.src_path).ok()),
998                    &output_path,
999                )?;
1000            }
1001
1002            let wasm_bytes = fs::read(&output_path)
1003                .map_err(|e| LoaderError::IO(IoError::new(e, Some(output_path.as_path()))))?;
1004            return Ok(wasm_store.load_language(&config.name, &wasm_bytes)?);
1005        }
1006
1007        // Create a unique lock path based on the output path hash to prevent
1008        // interference when multiple processes build the same grammar (by name)
1009        // to different output locations
1010        let lock_hash = {
1011            let mut hasher = std::hash::DefaultHasher::new();
1012            output_path.hash(&mut hasher);
1013            format!("{:x}", hasher.finish())
1014        };
1015
1016        let lock_path = if env::var("CROSS_RUNNER").is_ok() {
1017            tempfile::tempdir()
1018                .expect("create a temp dir")
1019                .path()
1020                .to_path_buf()
1021        } else {
1022            etcetera::choose_base_strategy()?.cache_dir()
1023        }
1024        .join("tree-sitter")
1025        .join("lock")
1026        .join(format!("{}-{lock_hash}.lock", config.name));
1027
1028        if let Ok(lock_file) = fs::OpenOptions::new().write(true).open(&lock_path) {
1029            recompile = false;
1030            if lock_file.try_lock_exclusive().is_err() {
1031                // if we can't acquire the lock, another process is compiling the parser, wait for
1032                // it and don't recompile
1033                lock_file
1034                    .lock_exclusive()
1035                    .map_err(|e| LoaderError::IO(IoError::new(e, Some(lock_path.as_path()))))?;
1036                recompile = false;
1037            } else {
1038                // if we can acquire the lock, check if the lock file is older than 30 seconds, a
1039                // run that was interrupted and left the lock file behind should not block
1040                // subsequent runs
1041                let time = lock_file
1042                    .metadata()
1043                    .map_err(|e| LoaderError::IO(IoError::new(e, Some(lock_path.as_path()))))?
1044                    .modified()
1045                    .map_err(|e| LoaderError::IO(IoError::new(e, Some(lock_path.as_path()))))?
1046                    .elapsed()?
1047                    .as_secs();
1048                if time > 30 {
1049                    fs::remove_file(&lock_path)
1050                        .map_err(|e| LoaderError::IO(IoError::new(e, Some(lock_path.as_path()))))?;
1051                    recompile = true;
1052                }
1053            }
1054        }
1055
1056        if recompile {
1057            let parent_path = lock_path.parent().unwrap();
1058            fs::create_dir_all(parent_path)
1059                .map_err(|e| LoaderError::IO(IoError::new(e, Some(parent_path))))?;
1060            let lock_file = fs::OpenOptions::new()
1061                .create(true)
1062                .truncate(true)
1063                .write(true)
1064                .open(&lock_path)
1065                .map_err(|e| LoaderError::IO(IoError::new(e, Some(lock_path.as_path()))))?;
1066            lock_file
1067                .lock_exclusive()
1068                .map_err(|e| LoaderError::IO(IoError::new(e, Some(lock_path.as_path()))))?;
1069
1070            self.compile_parser_to_dylib(&config, &lock_file, &lock_path)?;
1071
1072            if config.scanner_path.is_some() {
1073                self.check_external_scanner(&output_path)?;
1074            }
1075        }
1076
1077        // Ensure the dynamic library exists before trying to load it. This can
1078        // happen in race conditions where we couldn't acquire the lock because
1079        // another process was compiling but it still hasn't finished by the
1080        // time we reach this point, so the output file still doesn't exist.
1081        //
1082        // Instead of allowing the `load_language` call below to fail, return a
1083        // clearer error to the user here.
1084        if !output_path.exists() {
1085            let msg = format!(
1086                "Dynamic library `{}` not found after build attempt. \
1087                Are you running multiple processes building to the same output location?",
1088                output_path.display()
1089            );
1090
1091            Err(LoaderError::IO(IoError::new(
1092                std::io::Error::new(std::io::ErrorKind::NotFound, msg),
1093                Some(output_path.as_path()),
1094            )))?;
1095        }
1096
1097        Self::load_language(&output_path, &language_fn_name)
1098    }
1099
1100    pub fn load_language(path: &Path, function_name: &str) -> LoaderResult<Language> {
1101        let library = unsafe { Library::new(path) }.map_err(|e| {
1102            LoaderError::Library(LibraryError {
1103                error: e,
1104                path: path.to_string_lossy().to_string(),
1105            })
1106        })?;
1107        let language = unsafe {
1108            let language_fn = library
1109                .get::<Symbol<unsafe extern "C" fn() -> Language>>(function_name.as_bytes())
1110                .map_err(|e| {
1111                    LoaderError::Symbol(SymbolError {
1112                        error: e,
1113                        symbol_name: function_name.to_string(),
1114                        path: path.to_string_lossy().to_string(),
1115                    })
1116                })?;
1117            language_fn()
1118        };
1119        mem::forget(library);
1120        Ok(language)
1121    }
1122
1123    fn compile_parser_to_dylib(
1124        &self,
1125        config: &CompileConfig,
1126        lock_file: &fs::File,
1127        lock_path: &Path,
1128    ) -> LoaderResult<()> {
1129        let mut cc_config = cc::Build::new();
1130        cc_config
1131            .cargo_metadata(false)
1132            .cargo_warnings(false)
1133            .target(BUILD_TARGET)
1134            // BUILD_TARGET from the build environment becomes a runtime host for cc.
1135            // Otherwise, when cross compiled, cc will keep looking for a cross-compiler
1136            // on the target system instead of the native compiler.
1137            .host(BUILD_TARGET)
1138            .debug(self.debug_build)
1139            .file(&config.parser_path)
1140            .includes(&config.header_paths)
1141            .std("c11");
1142
1143        if let Some(scanner_path) = config.scanner_path.as_ref() {
1144            cc_config.file(scanner_path);
1145        }
1146
1147        if self.debug_build {
1148            cc_config.opt_level(0).extra_warnings(true);
1149        } else {
1150            cc_config.opt_level(2).extra_warnings(false);
1151        }
1152
1153        for flag in config.flags {
1154            cc_config.define(flag, None);
1155        }
1156
1157        let compiler = cc_config.get_compiler();
1158        let mut command = Command::new(compiler.path());
1159        command.args(compiler.args());
1160        for (key, value) in compiler.env() {
1161            command.env(key, value);
1162        }
1163
1164        let output_path = config.output_path.as_ref().unwrap();
1165
1166        let temp_dir = if compiler.is_like_msvc() {
1167            let out = format!("-out:{}", output_path.to_str().unwrap());
1168            command.arg(if self.debug_build { "-LDd" } else { "-LD" });
1169            command.arg("-utf-8");
1170
1171            // Windows creates intermediate files when compiling (.exp, .lib, .obj), which causes
1172            // issues when multiple processes are compiling in the same directory. This creates a
1173            // temporary directory for those files to go into, which is deleted after compilation.
1174            let temp_dir = output_path.parent().unwrap().join(format!(
1175                "tmp_{}_{:?}",
1176                std::process::id(),
1177                std::thread::current().id()
1178            ));
1179            std::fs::create_dir_all(&temp_dir).unwrap();
1180
1181            command.arg(format!("/Fo{}\\", temp_dir.display()));
1182            command.args(cc_config.get_files());
1183            command.arg("-link").arg(out);
1184            command.arg(format!("/IMPLIB:{}.lib", temp_dir.join("temp").display()));
1185
1186            Some(temp_dir)
1187        } else {
1188            command.arg("-Werror=implicit-function-declaration");
1189            if cfg!(any(target_os = "macos", target_os = "ios")) {
1190                command.arg("-dynamiclib");
1191                // TODO: remove when supported
1192                command.arg("-UTREE_SITTER_REUSE_ALLOCATOR");
1193            } else {
1194                command.arg("-shared");
1195                command.arg("-Wl,--no-undefined");
1196            }
1197            command.args(cc_config.get_files());
1198            command.arg("-o").arg(output_path);
1199
1200            None
1201        };
1202
1203        let output = command.output().map_err(|e| {
1204            LoaderError::Compiler(CompilerError {
1205                error: e,
1206                command: Box::new(command),
1207            })
1208        })?;
1209
1210        if let Some(temp_dir) = temp_dir {
1211            let _ = fs::remove_dir_all(temp_dir);
1212        }
1213
1214        FileExt::unlock(lock_file)
1215            .map_err(|e| LoaderError::IO(IoError::new(e, Some(lock_path))))?;
1216        fs::remove_file(lock_path)
1217            .map_err(|e| LoaderError::IO(IoError::new(e, Some(lock_path))))?;
1218
1219        if output.status.success() {
1220            Ok(())
1221        } else {
1222            Err(LoaderError::Compilation(
1223                String::from_utf8_lossy(&output.stdout).to_string(),
1224                String::from_utf8_lossy(&output.stderr).to_string(),
1225            ))
1226        }
1227    }
1228
1229    #[cfg(unix)]
1230    fn check_external_scanner(&self, library_path: &Path) -> LoaderResult<()> {
1231        let section = " T ";
1232        // Older ppc toolchains incorrectly report functions in the Data section. This bug has been
1233        // fixed, but we still need to account for older systems.
1234        let old_ppc_section = if cfg!(all(target_arch = "powerpc64", target_os = "linux")) {
1235            Some(" D ")
1236        } else {
1237            None
1238        };
1239        let nm_cmd = env::var("NM").unwrap_or_else(|_| "nm".to_owned());
1240        let command = Command::new(nm_cmd)
1241            .arg("--defined-only")
1242            .arg(library_path)
1243            .output();
1244        if let Ok(output) = command {
1245            if output.status.success() {
1246                let mut non_static_symbols = String::new();
1247                for line in String::from_utf8_lossy(&output.stdout).lines() {
1248                    if line.contains(section) || old_ppc_section.is_some_and(|s| line.contains(s)) {
1249                        if let Some(function_name) =
1250                            line.split_whitespace().collect::<Vec<_>>().get(2)
1251                        {
1252                            if !line.contains("tree_sitter_") {
1253                                writeln!(&mut non_static_symbols, "  `{function_name}`").unwrap();
1254                            }
1255                        }
1256                    }
1257                }
1258                if !non_static_symbols.is_empty() {
1259                    warn!(
1260                        "Found non-static non-tree-sitter functions in the external scanner\n{non_static_symbols}\n{}",
1261                        concat!(
1262                            "Consider making these functions static, they can cause conflicts ",
1263                            "when another tree-sitter project uses the same function name."
1264                        )
1265                    );
1266                }
1267            }
1268        } else {
1269            warn!(
1270                "Failed to run `nm` to verify symbols in {}",
1271                library_path.display()
1272            );
1273        }
1274
1275        Ok(())
1276    }
1277
1278    #[cfg(windows)]
1279    fn check_external_scanner(&self, _library_path: &Path) -> LoaderResult<()> {
1280        // TODO: there's no nm command on windows, whoever wants to implement this can and should :)
1281        Ok(())
1282    }
1283
1284    pub fn compile_parser_to_wasm(
1285        &self,
1286        language_name: &str,
1287        src_path: &Path,
1288        scanner_filename: Option<&Path>,
1289        output_path: &Path,
1290    ) -> LoaderResult<()> {
1291        let clang_executable = self.ensure_wasi_sdk_exists()?;
1292
1293        let mut command = Command::new(&clang_executable);
1294        command.current_dir(src_path).args([
1295            "-o",
1296            output_path.to_str().unwrap(),
1297            "-fPIC",
1298            "-shared",
1299            if self.debug_build { "-g" } else { "-Os" },
1300            format!("-Wl,--export=tree_sitter_{language_name}").as_str(),
1301            "-Wl,--allow-undefined",
1302            "-Wl,--no-entry",
1303            "-nostdlib",
1304            "-fno-exceptions",
1305            "-fvisibility=hidden",
1306            "-I",
1307            ".",
1308            "parser.c",
1309        ]);
1310
1311        if let Some(scanner_filename) = scanner_filename {
1312            command.arg(scanner_filename);
1313        }
1314
1315        let output = command.output().map_err(LoaderError::WasmCompiler)?;
1316
1317        if !output.status.success() {
1318            return Err(LoaderError::WasmCompilation(
1319                String::from_utf8_lossy(&output.stderr).to_string(),
1320            ));
1321        }
1322
1323        Ok(())
1324    }
1325
1326    /// Extracts a tar.gz archive with `tar`, stripping the first path component.
1327    fn extract_tar_gz_with_strip(
1328        &self,
1329        archive_path: &Path,
1330        destination: &Path,
1331    ) -> LoaderResult<()> {
1332        let status = Command::new("tar")
1333            .arg("-xzf")
1334            .arg(archive_path)
1335            .arg("--strip-components=1")
1336            .arg("-C")
1337            .arg(destination)
1338            .status()
1339            .map_err(|e| LoaderError::Tar(archive_path.to_string_lossy().to_string(), e))?;
1340
1341        if !status.success() {
1342            return Err(LoaderError::Extraction(
1343                archive_path.to_string_lossy().to_string(),
1344                destination.to_string_lossy().to_string(),
1345            ));
1346        }
1347
1348        Ok(())
1349    }
1350
1351    /// This ensures that the wasi-sdk is available, downloading and extracting it if necessary,
1352    /// and returns the path to the `clang` executable.
1353    ///
1354    /// If `TREE_SITTER_WASI_SDK_PATH` is set, it will use that path to look for the clang executable.
1355    fn ensure_wasi_sdk_exists(&self) -> LoaderResult<PathBuf> {
1356        let possible_executables = if cfg!(windows) {
1357            vec![
1358                "clang.exe",
1359                "wasm32-unknown-wasi-clang.exe",
1360                "wasm32-wasi-clang.exe",
1361            ]
1362        } else {
1363            vec!["clang", "wasm32-unknown-wasi-clang", "wasm32-wasi-clang"]
1364        };
1365
1366        if let Ok(wasi_sdk_path) = std::env::var("TREE_SITTER_WASI_SDK_PATH") {
1367            let wasi_sdk_dir = PathBuf::from(wasi_sdk_path);
1368
1369            for exe in &possible_executables {
1370                let clang_exe = wasi_sdk_dir.join("bin").join(exe);
1371                if clang_exe.exists() {
1372                    return Ok(clang_exe);
1373                }
1374            }
1375
1376            return Err(LoaderError::WasiSDKClang(WasiSDKClangError {
1377                wasi_sdk_dir: wasi_sdk_dir.to_string_lossy().to_string(),
1378                possible_executables,
1379                download: false,
1380            }));
1381        }
1382
1383        let cache_dir = etcetera::choose_base_strategy()?
1384            .cache_dir()
1385            .join("tree-sitter");
1386        fs::create_dir_all(&cache_dir)
1387            .map_err(|e| LoaderError::IO(IoError::new(e, Some(cache_dir.as_path()))))?;
1388
1389        let wasi_sdk_dir = cache_dir.join("wasi-sdk");
1390
1391        for exe in &possible_executables {
1392            let clang_exe = wasi_sdk_dir.join("bin").join(exe);
1393            if clang_exe.exists() {
1394                return Ok(clang_exe);
1395            }
1396        }
1397
1398        fs::create_dir_all(&wasi_sdk_dir)
1399            .map_err(|e| LoaderError::IO(IoError::new(e, Some(wasi_sdk_dir.as_path()))))?;
1400
1401        let arch_os = if cfg!(target_os = "macos") {
1402            if cfg!(target_arch = "aarch64") {
1403                "arm64-macos"
1404            } else {
1405                "x86_64-macos"
1406            }
1407        } else if cfg!(target_os = "windows") {
1408            if cfg!(target_arch = "aarch64") {
1409                "arm64-windows"
1410            } else {
1411                "x86_64-windows"
1412            }
1413        } else if cfg!(target_os = "linux") {
1414            if cfg!(target_arch = "aarch64") {
1415                "arm64-linux"
1416            } else {
1417                "x86_64-linux"
1418            }
1419        } else {
1420            return Err(LoaderError::WasiSDKPlatform);
1421        };
1422
1423        let sdk_filename = format!("wasi-sdk-{WASI_SDK_VERSION}-{arch_os}.tar.gz");
1424        let wasi_sdk_major_version = WASI_SDK_VERSION
1425            .trim_end_matches(char::is_numeric) // trim minor version...
1426            .trim_end_matches('.'); // ...and '.' separator
1427        let sdk_url = format!(
1428            "https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-{wasi_sdk_major_version}/{sdk_filename}",
1429        );
1430
1431        info!("Downloading wasi-sdk from {sdk_url}...");
1432        let temp_tar_path = cache_dir.join(sdk_filename);
1433
1434        let status = Command::new("curl")
1435            .arg("-f")
1436            .arg("-L")
1437            .arg("-o")
1438            .arg(&temp_tar_path)
1439            .arg(&sdk_url)
1440            .status()
1441            .map_err(|e| LoaderError::Curl(sdk_url.clone(), e))?;
1442
1443        if !status.success() {
1444            return Err(LoaderError::WasiSDKDownload(sdk_url));
1445        }
1446
1447        info!("Extracting wasi-sdk to {}...", wasi_sdk_dir.display());
1448        self.extract_tar_gz_with_strip(&temp_tar_path, &wasi_sdk_dir)?;
1449
1450        fs::remove_file(temp_tar_path).ok();
1451        for exe in &possible_executables {
1452            let clang_exe = wasi_sdk_dir.join("bin").join(exe);
1453            if clang_exe.exists() {
1454                return Ok(clang_exe);
1455            }
1456        }
1457
1458        Err(LoaderError::WasiSDKClang(WasiSDKClangError {
1459            wasi_sdk_dir: wasi_sdk_dir.to_string_lossy().to_string(),
1460            possible_executables,
1461            download: true,
1462        }))
1463    }
1464
1465    #[must_use]
1466    #[cfg(feature = "tree-sitter-highlight")]
1467    pub fn highlight_config_for_injection_string<'a>(
1468        &'a self,
1469        string: &str,
1470    ) -> Option<&'a HighlightConfiguration> {
1471        match self.language_configuration_for_injection_string(string) {
1472            Err(e) => {
1473                error!("Failed to load language for injection string '{string}': {e}",);
1474                None
1475            }
1476            Ok(None) => None,
1477            Ok(Some((language, configuration))) => {
1478                match configuration.highlight_config(language, None) {
1479                    Err(e) => {
1480                        error!(
1481                            "Failed to load higlight config for injection string '{string}': {e}"
1482                        );
1483                        None
1484                    }
1485                    Ok(None) => None,
1486                    Ok(Some(config)) => Some(config),
1487                }
1488            }
1489        }
1490    }
1491
1492    #[must_use]
1493    pub fn get_language_configuration_in_current_path(&self) -> Option<&LanguageConfiguration> {
1494        self.language_configuration_in_current_path
1495            .map(|i| &self.language_configurations[i])
1496    }
1497
1498    pub fn find_language_configurations_at_path(
1499        &mut self,
1500        parser_path: &Path,
1501        set_current_path_config: bool,
1502    ) -> LoaderResult<&[LanguageConfiguration]> {
1503        let initial_language_configuration_count = self.language_configurations.len();
1504
1505        match TreeSitterJSON::from_file(parser_path) {
1506            Ok(config) => {
1507                let language_count = self.languages_by_id.len();
1508                for grammar in config.grammars {
1509                    // Determine the path to the parser directory. This can be specified in
1510                    // the tree-sitter.json, but defaults to the directory containing the
1511                    // tree-sitter.json.
1512                    let language_path =
1513                        parser_path.join(grammar.path.unwrap_or(PathBuf::from(".")));
1514
1515                    // Determine if a previous language configuration in this package.json file
1516                    // already uses the same language.
1517                    let mut language_id = None;
1518                    for (id, (path, _, _)) in
1519                        self.languages_by_id.iter().enumerate().skip(language_count)
1520                    {
1521                        if language_path == *path {
1522                            language_id = Some(id);
1523                        }
1524                    }
1525
1526                    // If not, add a new language path to the list.
1527                    let language_id = if let Some(language_id) = language_id {
1528                        language_id
1529                    } else {
1530                        self.languages_by_id.push((
1531                            language_path,
1532                            OnceCell::new(),
1533                            grammar
1534                                .external_files
1535                                .clone()
1536                                .into_vec()
1537                                .map(|files| {
1538                                    files
1539                                        .into_iter()
1540                                        .map(|path| {
1541                                            let path = parser_path.join(path);
1542                                            // prevent p being above/outside of parser_path
1543                                            if path.starts_with(parser_path) {
1544                                                Ok(path)
1545                                            } else {
1546                                                Err(LoaderError::ExternalFile(
1547                                                    path.to_string_lossy().to_string(),
1548                                                    parser_path.to_string_lossy().to_string(),
1549                                                ))
1550                                            }
1551                                        })
1552                                        .collect::<LoaderResult<Vec<_>>>()
1553                                })
1554                                .transpose()?,
1555                        ));
1556                        self.languages_by_id.len() - 1
1557                    };
1558
1559                    let configuration = LanguageConfiguration {
1560                        root_path: parser_path.to_path_buf(),
1561                        language_name: grammar.name,
1562                        scope: Some(grammar.scope),
1563                        language_id,
1564                        file_types: grammar.file_types.unwrap_or_default(),
1565                        content_regex: Self::regex(grammar.content_regex.as_deref()),
1566                        first_line_regex: Self::regex(grammar.first_line_regex.as_deref()),
1567                        injection_regex: Self::regex(grammar.injection_regex.as_deref()),
1568                        injections_filenames: grammar.injections.into_vec(),
1569                        locals_filenames: grammar.locals.into_vec(),
1570                        tags_filenames: grammar.tags.into_vec(),
1571                        highlights_filenames: grammar.highlights.into_vec(),
1572                        #[cfg(feature = "tree-sitter-highlight")]
1573                        highlight_config: OnceCell::new(),
1574                        #[cfg(feature = "tree-sitter-tags")]
1575                        tags_config: OnceCell::new(),
1576                        #[cfg(feature = "tree-sitter-highlight")]
1577                        highlight_names: &self.highlight_names,
1578                        #[cfg(feature = "tree-sitter-highlight")]
1579                        use_all_highlight_names: self.use_all_highlight_names,
1580                        _phantom: PhantomData,
1581                    };
1582
1583                    for file_type in &configuration.file_types {
1584                        self.language_configuration_ids_by_file_type
1585                            .entry(file_type.clone())
1586                            .or_default()
1587                            .push(self.language_configurations.len());
1588                    }
1589                    if let Some(first_line_regex) = &configuration.first_line_regex {
1590                        self.language_configuration_ids_by_first_line_regex
1591                            .entry(first_line_regex.to_string())
1592                            .or_default()
1593                            .push(self.language_configurations.len());
1594                    }
1595
1596                    self.language_configurations.push(unsafe {
1597                        mem::transmute::<LanguageConfiguration<'_>, LanguageConfiguration<'static>>(
1598                            configuration,
1599                        )
1600                    });
1601
1602                    if set_current_path_config
1603                        && self.language_configuration_in_current_path.is_none()
1604                    {
1605                        self.language_configuration_in_current_path =
1606                            Some(self.language_configurations.len() - 1);
1607                    }
1608                }
1609            }
1610            Err(LoaderError::Serialization(e)) => {
1611                warn!(
1612                    "Failed to parse {} -- {e}",
1613                    parser_path.join("tree-sitter.json").display()
1614                );
1615            }
1616            _ => {}
1617        }
1618
1619        // If we didn't find any language configurations in the tree-sitter.json file,
1620        // but there is a grammar.json file, then use the grammar file to form a simple
1621        // language configuration.
1622        if self.language_configurations.len() == initial_language_configuration_count
1623            && parser_path.join("src").join("grammar.json").exists()
1624        {
1625            let grammar_path = parser_path.join("src").join("grammar.json");
1626            let language_name = Self::grammar_json_name(&grammar_path)?;
1627            let configuration = LanguageConfiguration {
1628                root_path: parser_path.to_owned(),
1629                language_name,
1630                language_id: self.languages_by_id.len(),
1631                file_types: Vec::new(),
1632                scope: None,
1633                content_regex: None,
1634                first_line_regex: None,
1635                injection_regex: None,
1636                injections_filenames: None,
1637                locals_filenames: None,
1638                highlights_filenames: None,
1639                tags_filenames: None,
1640                #[cfg(feature = "tree-sitter-highlight")]
1641                highlight_config: OnceCell::new(),
1642                #[cfg(feature = "tree-sitter-tags")]
1643                tags_config: OnceCell::new(),
1644                #[cfg(feature = "tree-sitter-highlight")]
1645                highlight_names: &self.highlight_names,
1646                #[cfg(feature = "tree-sitter-highlight")]
1647                use_all_highlight_names: self.use_all_highlight_names,
1648                _phantom: PhantomData,
1649            };
1650            self.language_configurations.push(unsafe {
1651                mem::transmute::<LanguageConfiguration<'_>, LanguageConfiguration<'static>>(
1652                    configuration,
1653                )
1654            });
1655            self.languages_by_id
1656                .push((parser_path.to_owned(), OnceCell::new(), None));
1657        }
1658
1659        Ok(&self.language_configurations[initial_language_configuration_count..])
1660    }
1661
1662    fn regex(pattern: Option<&str>) -> Option<Regex> {
1663        pattern.and_then(|r| RegexBuilder::new(r).multi_line(true).build().ok())
1664    }
1665
1666    fn grammar_json_name(grammar_path: &Path) -> LoaderResult<String> {
1667        let file = fs::File::open(grammar_path)
1668            .map_err(|e| LoaderError::IO(IoError::new(e, Some(grammar_path))))?;
1669
1670        let first_three_lines = BufReader::new(file)
1671            .lines()
1672            .take(3)
1673            .collect::<Result<Vec<_>, std::io::Error>>()
1674            .map_err(|_| LoaderError::GrammarJSON(grammar_path.to_string_lossy().to_string()))?
1675            .join("\n");
1676
1677        let name = GRAMMAR_NAME_REGEX
1678            .captures(&first_three_lines)
1679            .and_then(|c| c.get(1))
1680            .ok_or_else(|| LoaderError::GrammarJSON(grammar_path.to_string_lossy().to_string()))?;
1681
1682        Ok(name.as_str().to_string())
1683    }
1684
1685    pub fn select_language(
1686        &mut self,
1687        path: Option<&Path>,
1688        current_dir: &Path,
1689        scope: Option<&str>,
1690        // path to dynamic library, name of language
1691        lib_info: Option<&(PathBuf, &str)>,
1692    ) -> LoaderResult<Language> {
1693        if let Some((ref lib_path, language_name)) = lib_info {
1694            let language_fn_name = format!("tree_sitter_{}", language_name.replace('-', "_"));
1695            Self::load_language(lib_path, &language_fn_name)
1696        } else if let Some(scope) = scope {
1697            if let Some(config) = self
1698                .language_configuration_for_scope(scope)
1699                .map_err(|e| LoaderError::ScopeLoad(scope.to_string(), Box::new(e)))?
1700            {
1701                Ok(config.0)
1702            } else {
1703                Err(LoaderError::UnknownScope(scope.to_string()))
1704            }
1705        } else if let Some((lang, _)) = if let Some(path) = path {
1706            self.language_configuration_for_file_name(path)
1707                .map_err(|e| {
1708                    LoaderError::FileNameLoad(
1709                        path.file_name().unwrap().to_string_lossy().to_string(),
1710                        Box::new(e),
1711                    )
1712                })?
1713        } else {
1714            None
1715        } {
1716            Ok(lang)
1717        } else if let Some(id) = self.language_configuration_in_current_path {
1718            Ok(self.language_for_id(self.language_configurations[id].language_id)?)
1719        } else if let Some(lang) = self
1720            .languages_at_path(current_dir)
1721            .map_err(|e| LoaderError::CurrentDirectoryLoad(Box::new(e)))?
1722            .first()
1723            .cloned()
1724        {
1725            Ok(lang.0)
1726        } else if let Some(lang) = if let Some(path) = path {
1727            self.language_configuration_for_first_line_regex(path)?
1728        } else {
1729            None
1730        } {
1731            Ok(lang.0)
1732        } else {
1733            Err(LoaderError::NoLanguage)
1734        }
1735    }
1736
1737    pub const fn debug_build(&mut self, flag: bool) {
1738        self.debug_build = flag;
1739    }
1740
1741    pub const fn sanitize_build(&mut self, flag: bool) {
1742        self.sanitize_build = flag;
1743    }
1744
1745    pub const fn force_rebuild(&mut self, rebuild: bool) {
1746        self.force_rebuild = rebuild;
1747    }
1748
1749    #[cfg(feature = "wasm")]
1750    #[cfg_attr(docsrs, doc(cfg(feature = "wasm")))]
1751    pub fn use_wasm(&mut self, engine: &tree_sitter::wasmtime::Engine) {
1752        *self.wasm_store.lock().unwrap() = Some(tree_sitter::WasmStore::new(engine).unwrap());
1753    }
1754
1755    #[must_use]
1756    pub fn get_scanner_path(&self, src_path: &Path) -> Option<PathBuf> {
1757        let path = src_path.join("scanner.c");
1758        path.exists().then_some(path)
1759    }
1760}
1761
1762impl LanguageConfiguration<'_> {
1763    #[cfg(feature = "tree-sitter-highlight")]
1764    pub fn highlight_config(
1765        &self,
1766        language: Language,
1767        paths: Option<&[PathBuf]>,
1768    ) -> LoaderResult<Option<&HighlightConfiguration>> {
1769        let (highlights_filenames, injections_filenames, locals_filenames) = match paths {
1770            Some(paths) => (
1771                Some(
1772                    paths
1773                        .iter()
1774                        .filter(|p| p.ends_with(DEFAULT_HIGHLIGHTS_QUERY_FILE_NAME))
1775                        .cloned()
1776                        .collect::<Vec<_>>(),
1777                ),
1778                Some(
1779                    paths
1780                        .iter()
1781                        .filter(|p| p.ends_with(DEFAULT_TAGS_QUERY_FILE_NAME))
1782                        .cloned()
1783                        .collect::<Vec<_>>(),
1784                ),
1785                Some(
1786                    paths
1787                        .iter()
1788                        .filter(|p| p.ends_with(DEFAULT_LOCALS_QUERY_FILE_NAME))
1789                        .cloned()
1790                        .collect::<Vec<_>>(),
1791                ),
1792            ),
1793            None => (None, None, None),
1794        };
1795        self.highlight_config
1796            .get_or_try_init(|| {
1797                let (highlights_query, highlight_ranges) = self.read_queries(
1798                    if highlights_filenames.is_some() {
1799                        highlights_filenames.as_deref()
1800                    } else {
1801                        self.highlights_filenames.as_deref()
1802                    },
1803                    DEFAULT_HIGHLIGHTS_QUERY_FILE_NAME,
1804                )?;
1805                let (injections_query, injection_ranges) = self.read_queries(
1806                    if injections_filenames.is_some() {
1807                        injections_filenames.as_deref()
1808                    } else {
1809                        self.injections_filenames.as_deref()
1810                    },
1811                    DEFAULT_INJECTIONS_QUERY_FILE_NAME,
1812                )?;
1813                let (locals_query, locals_ranges) = self.read_queries(
1814                    if locals_filenames.is_some() {
1815                        locals_filenames.as_deref()
1816                    } else {
1817                        self.locals_filenames.as_deref()
1818                    },
1819                    DEFAULT_LOCALS_QUERY_FILE_NAME,
1820                )?;
1821
1822                if highlights_query.is_empty() {
1823                    Ok(None)
1824                } else {
1825                    let mut result = HighlightConfiguration::new(
1826                        language,
1827                        &self.language_name,
1828                        &highlights_query,
1829                        &injections_query,
1830                        &locals_query,
1831                    )
1832                    .map_err(|error| match error.kind {
1833                        QueryErrorKind::Language => {
1834                            LoaderError::Query(LoaderQueryError { error, file: None })
1835                        }
1836                        _ => {
1837                            if error.offset < injections_query.len() {
1838                                Self::include_path_in_query_error(
1839                                    error,
1840                                    &injection_ranges,
1841                                    &injections_query,
1842                                    0,
1843                                )
1844                            } else if error.offset < injections_query.len() + locals_query.len() {
1845                                Self::include_path_in_query_error(
1846                                    error,
1847                                    &locals_ranges,
1848                                    &locals_query,
1849                                    injections_query.len(),
1850                                )
1851                            } else {
1852                                Self::include_path_in_query_error(
1853                                    error,
1854                                    &highlight_ranges,
1855                                    &highlights_query,
1856                                    injections_query.len() + locals_query.len(),
1857                                )
1858                            }
1859                        }
1860                    })?;
1861                    let mut all_highlight_names = self.highlight_names.lock().unwrap();
1862                    if self.use_all_highlight_names {
1863                        for capture_name in result.query.capture_names() {
1864                            if !all_highlight_names.iter().any(|x| x == capture_name) {
1865                                all_highlight_names.push((*capture_name).to_string());
1866                            }
1867                        }
1868                    }
1869                    result.configure(all_highlight_names.as_slice());
1870                    drop(all_highlight_names);
1871                    Ok(Some(result))
1872                }
1873            })
1874            .map(Option::as_ref)
1875    }
1876
1877    #[cfg(feature = "tree-sitter-tags")]
1878    pub fn tags_config(&self, language: Language) -> LoaderResult<Option<&TagsConfiguration>> {
1879        self.tags_config
1880            .get_or_try_init(|| {
1881                let (tags_query, tags_ranges) = self
1882                    .read_queries(self.tags_filenames.as_deref(), DEFAULT_TAGS_QUERY_FILE_NAME)?;
1883                let (locals_query, locals_ranges) = self.read_queries(
1884                    self.locals_filenames.as_deref(),
1885                    DEFAULT_LOCALS_QUERY_FILE_NAME,
1886                )?;
1887                if tags_query.is_empty() {
1888                    Ok(None)
1889                } else {
1890                    TagsConfiguration::new(language, &tags_query, &locals_query)
1891                        .map(Some)
1892                        .map_err(|error| {
1893                            if let TagsError::Query(error) = error {
1894                                if error.offset < locals_query.len() {
1895                                    Self::include_path_in_query_error(
1896                                        error,
1897                                        &locals_ranges,
1898                                        &locals_query,
1899                                        0,
1900                                    )
1901                                } else {
1902                                    Self::include_path_in_query_error(
1903                                        error,
1904                                        &tags_ranges,
1905                                        &tags_query,
1906                                        locals_query.len(),
1907                                    )
1908                                }
1909                            } else {
1910                                error.into()
1911                            }
1912                        })
1913                }
1914            })
1915            .map(Option::as_ref)
1916    }
1917
1918    #[cfg(any(feature = "tree-sitter-highlight", feature = "tree-sitter-tags"))]
1919    fn include_path_in_query_error(
1920        mut error: QueryError,
1921        ranges: &[(PathBuf, Range<usize>)],
1922        source: &str,
1923        start_offset: usize,
1924    ) -> LoaderError {
1925        let offset_within_section = error.offset - start_offset;
1926        let (path, range) = ranges
1927            .iter()
1928            .find(|(_, range)| range.contains(&offset_within_section))
1929            .unwrap_or_else(|| ranges.last().unwrap());
1930        error.offset = offset_within_section - range.start;
1931        error.row = source[range.start..offset_within_section]
1932            .matches('\n')
1933            .count();
1934        LoaderError::Query(LoaderQueryError {
1935            error,
1936            file: Some(path.to_string_lossy().to_string()),
1937        })
1938    }
1939
1940    #[allow(clippy::type_complexity)]
1941    #[cfg(any(feature = "tree-sitter-highlight", feature = "tree-sitter-tags"))]
1942    fn read_queries(
1943        &self,
1944        paths: Option<&[PathBuf]>,
1945        default_path: &str,
1946    ) -> LoaderResult<(String, Vec<(PathBuf, Range<usize>)>)> {
1947        let mut query = String::new();
1948        let mut path_ranges = Vec::new();
1949        if let Some(paths) = paths {
1950            for path in paths {
1951                let abs_path = self.root_path.join(path);
1952                let prev_query_len = query.len();
1953                query += &fs::read_to_string(&abs_path)
1954                    .map_err(|e| LoaderError::IO(IoError::new(e, Some(abs_path.as_path()))))?;
1955                path_ranges.push((path.clone(), prev_query_len..query.len()));
1956            }
1957        } else {
1958            // highlights.scm is needed to test highlights, and tags.scm to test tags
1959            if default_path == DEFAULT_HIGHLIGHTS_QUERY_FILE_NAME
1960                || default_path == DEFAULT_TAGS_QUERY_FILE_NAME
1961            {
1962                warn!(
1963                    concat!(
1964                        "You should add a `{}` entry pointing to the {} path in the `tree-sitter` ",
1965                        "object in the grammar's tree-sitter.json file. See more here: ",
1966                        "https://tree-sitter.github.io/tree-sitter/3-syntax-highlighting#query-paths"
1967                    ),
1968                    default_path.replace(".scm", ""),
1969                    default_path
1970                );
1971            }
1972            let queries_path = self.root_path.join("queries");
1973            let path = queries_path.join(default_path);
1974            if path.exists() {
1975                query = fs::read_to_string(&path)
1976                    .map_err(|e| LoaderError::IO(IoError::new(e, Some(path.as_path()))))?;
1977                path_ranges.push((PathBuf::from(default_path), 0..query.len()));
1978            }
1979        }
1980
1981        Ok((query, path_ranges))
1982    }
1983}
1984
1985fn needs_recompile(lib_path: &Path, paths_to_check: &[PathBuf]) -> LoaderResult<bool> {
1986    if !lib_path.exists() {
1987        return Ok(true);
1988    }
1989    let lib_mtime = mtime(lib_path).map_err(|e| LoaderError::ModifiedTime(Box::new(e)))?;
1990    for path in paths_to_check {
1991        if mtime(path).map_err(|e| LoaderError::ModifiedTime(Box::new(e)))? > lib_mtime {
1992            return Ok(true);
1993        }
1994    }
1995    Ok(false)
1996}
1997
1998fn mtime(path: &Path) -> LoaderResult<SystemTime> {
1999    fs::metadata(path)
2000        .map_err(|e| LoaderError::IO(IoError::new(e, Some(path))))?
2001        .modified()
2002        .map_err(|e| LoaderError::IO(IoError::new(e, Some(path))))
2003}