Skip to main content

tree_sitter_loader/
loader.rs

1#![cfg_attr(not(any(test, doctest)), doc = include_str!("../README.md"))]
2#![cfg_attr(docsrs, feature(doc_cfg))]
3
4#[cfg(unix)]
5use std::fmt::Write as _;
6#[cfg(any(feature = "tree-sitter-highlight", feature = "tree-sitter-tags"))]
7use std::ops::Range;
8#[cfg(feature = "tree-sitter-highlight")]
9use std::sync::Mutex;
10use std::{
11    collections::HashMap,
12    env, fs,
13    hash::{Hash as _, Hasher as _},
14    io::{BufRead, BufReader},
15    marker::PhantomData,
16    mem,
17    path::{Path, PathBuf},
18    process::Command,
19    sync::LazyLock,
20    time::{SystemTime, SystemTimeError},
21};
22
23use etcetera::BaseStrategy as _;
24use fs4::fs_std::FileExt;
25use libloading::{Library, Symbol};
26use log::{error, info, warn};
27use once_cell::unsync::OnceCell;
28use regex::{Regex, RegexBuilder};
29use semver::Version;
30use serde::{Deserialize, Deserializer, Serialize};
31use thiserror::Error;
32use tree_sitter::Language;
33#[cfg(any(feature = "tree-sitter-highlight", feature = "tree-sitter-tags"))]
34use tree_sitter::QueryError;
35#[cfg(feature = "tree-sitter-highlight")]
36use tree_sitter::QueryErrorKind;
37#[cfg(feature = "wasm")]
38use tree_sitter::WasmError;
39#[cfg(feature = "tree-sitter-highlight")]
40use tree_sitter_highlight::HighlightConfiguration;
41#[cfg(feature = "tree-sitter-tags")]
42use tree_sitter_tags::{Error as TagsError, TagsConfiguration};
43
44static GRAMMAR_NAME_REGEX: LazyLock<Regex> =
45    LazyLock::new(|| Regex::new(r#""name":\s*"(.*?)""#).unwrap());
46
47const WASI_SDK_VERSION: &str = include_str!("../wasi-sdk-version").trim_ascii();
48
49pub type LoaderResult<T> = Result<T, LoaderError>;
50
51#[derive(Debug, Error)]
52pub enum LoaderError {
53    #[error(transparent)]
54    Compiler(CompilerError),
55    #[error("Parser compilation failed.\nStdout: {0}\nStderr: {1}")]
56    Compilation(String, String),
57    #[error("Failed to execute curl for {0} -- {1}")]
58    Curl(String, std::io::Error),
59    #[error("Failed to load language in current directory:\n{0}")]
60    CurrentDirectoryLoad(Box<Self>),
61    #[error("External file path {0} is outside of parser directory {1}")]
62    ExternalFile(String, String),
63    #[error("Failed to extract archive {0} to {1}")]
64    Extraction(String, String),
65    #[error("Failed to load language for file name {0}:\n{1}")]
66    FileNameLoad(String, Box<Self>),
67    #[error("Failed to parse the language name from grammar.json at {0}")]
68    GrammarJSON(String),
69    #[error(transparent)]
70    HomeDir(#[from] etcetera::HomeDirError),
71    #[error(transparent)]
72    IO(IoError),
73    #[error(transparent)]
74    Library(LibraryError),
75    #[error("Failed to compare binary and source timestamps:\n{0}")]
76    ModifiedTime(Box<Self>),
77    #[error("No language found")]
78    NoLanguage,
79    #[error(transparent)]
80    Query(LoaderQueryError),
81    #[error("Failed to load language for scope '{0}':\n{1}")]
82    ScopeLoad(String, Box<Self>),
83    #[error(transparent)]
84    Serialization(#[from] serde_json::Error),
85    #[error(transparent)]
86    Symbol(SymbolError),
87    #[error(transparent)]
88    Tags(#[from] TagsError),
89    #[error("Failed to execute tar for {0} -- {1}")]
90    Tar(String, std::io::Error),
91    #[error(transparent)]
92    Time(#[from] SystemTimeError),
93    #[error("Unknown scope '{0}'")]
94    UnknownScope(String),
95    #[error("Failed to download wasi-sdk from {0}")]
96    WasiSDKDownload(String),
97    #[error(transparent)]
98    WasiSDKClang(#[from] WasiSDKClangError),
99    #[error("Unsupported platform for wasi-sdk")]
100    WasiSDKPlatform,
101    #[cfg(feature = "wasm")]
102    #[error(transparent)]
103    Wasm(#[from] WasmError),
104    #[error("Failed to run wasi-sdk clang -- {0}")]
105    WasmCompiler(std::io::Error),
106    #[error("wasi-sdk clang command failed: {0}")]
107    WasmCompilation(String),
108}
109
110#[derive(Debug, Error)]
111pub struct CompilerError {
112    pub error: std::io::Error,
113    pub command: Box<Command>,
114}
115
116impl std::fmt::Display for CompilerError {
117    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
118        write!(
119            f,
120            "Failed to execute the C compiler with the following command:\n{:?}\nError: {}",
121            *self.command, self.error
122        )?;
123        Ok(())
124    }
125}
126
127#[derive(Debug, Error)]
128pub struct IoError {
129    pub error: std::io::Error,
130    pub path: Option<String>,
131}
132
133impl IoError {
134    fn new(error: std::io::Error, path: Option<&Path>) -> Self {
135        Self {
136            error,
137            path: path.map(|p| p.to_string_lossy().to_string()),
138        }
139    }
140}
141
142impl std::fmt::Display for IoError {
143    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
144        write!(f, "{}", self.error)?;
145        if let Some(ref path) = self.path {
146            write!(f, " ({path})")?;
147        }
148        Ok(())
149    }
150}
151
152#[derive(Debug, Error)]
153pub struct LibraryError {
154    pub error: libloading::Error,
155    pub path: String,
156}
157
158impl std::fmt::Display for LibraryError {
159    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
160        write!(
161            f,
162            "Error opening dynamic library {} -- {}",
163            self.path, self.error
164        )?;
165        Ok(())
166    }
167}
168
169#[derive(Debug, Error)]
170pub struct LoaderQueryError {
171    pub error: QueryError,
172    pub file: Option<String>,
173}
174
175impl std::fmt::Display for LoaderQueryError {
176    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
177        if let Some(ref path) = self.file {
178            writeln!(f, "Error in query file {path}:")?;
179        }
180        write!(f, "{}", self.error)?;
181        Ok(())
182    }
183}
184
185#[derive(Debug, Error)]
186pub struct SymbolError {
187    pub error: libloading::Error,
188    pub symbol_name: String,
189    pub path: String,
190}
191
192impl std::fmt::Display for SymbolError {
193    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
194        write!(
195            f,
196            "Failed to load symbol {} from {} -- {}",
197            self.symbol_name, self.path, self.error
198        )?;
199        Ok(())
200    }
201}
202
203#[derive(Debug, Error)]
204pub struct WasiSDKClangError {
205    pub wasi_sdk_dir: String,
206    pub possible_executables: Vec<&'static str>,
207    pub download: bool,
208}
209
210impl std::fmt::Display for WasiSDKClangError {
211    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
212        if self.download {
213            write!(
214                f,
215                "Failed to find clang executable in downloaded wasi-sdk at '{}'.",
216                self.wasi_sdk_dir
217            )?;
218        } else {
219            write!(f, "TREE_SITTER_WASI_SDK_PATH is set to '{}', but no clang executable found in 'bin/' directory.", self.wasi_sdk_dir)?;
220        }
221
222        let possible_exes = self.possible_executables.join(", ");
223        write!(f, " Looked for: {possible_exes}.")?;
224
225        Ok(())
226    }
227}
228
229pub const DEFAULT_HIGHLIGHTS_QUERY_FILE_NAME: &str = "highlights.scm";
230
231pub const DEFAULT_INJECTIONS_QUERY_FILE_NAME: &str = "injections.scm";
232
233pub const DEFAULT_LOCALS_QUERY_FILE_NAME: &str = "locals.scm";
234
235pub const DEFAULT_TAGS_QUERY_FILE_NAME: &str = "tags.scm";
236
237#[derive(Default, Deserialize, Serialize)]
238pub struct Config {
239    #[serde(default)]
240    #[serde(
241        rename = "parser-directories",
242        deserialize_with = "deserialize_parser_directories"
243    )]
244    pub parser_directories: Vec<PathBuf>,
245}
246
247#[derive(Serialize, Deserialize, Clone, Default)]
248#[serde(untagged)]
249pub enum PathsJSON {
250    #[default]
251    Empty,
252    Single(PathBuf),
253    Multiple(Vec<PathBuf>),
254}
255
256impl PathsJSON {
257    fn into_vec(self) -> Option<Vec<PathBuf>> {
258        match self {
259            Self::Empty => None,
260            Self::Single(s) => Some(vec![s]),
261            Self::Multiple(s) => Some(s),
262        }
263    }
264
265    const fn is_empty(&self) -> bool {
266        matches!(self, Self::Empty)
267    }
268
269    /// Represent this set of paths as a string that can be included in templates
270    #[must_use]
271    pub fn to_variable_value<'a>(&'a self, default: &'a PathBuf) -> &'a str {
272        match self {
273            Self::Empty => Some(default),
274            Self::Single(path_buf) => Some(path_buf),
275            Self::Multiple(paths) => paths.first(),
276        }
277        .map_or("", |path| path.as_os_str().to_str().unwrap_or(""))
278    }
279}
280
281#[derive(Serialize, Deserialize, Clone)]
282#[serde(untagged)]
283pub enum PackageJSONAuthor {
284    String(String),
285    Object {
286        name: String,
287        email: Option<String>,
288        url: Option<String>,
289    },
290}
291
292#[derive(Serialize, Deserialize, Clone)]
293#[serde(untagged)]
294pub enum PackageJSONRepository {
295    String(String),
296    Object { url: String },
297}
298
299#[derive(Serialize, Deserialize)]
300pub struct PackageJSON {
301    pub name: String,
302    pub version: Version,
303    pub description: Option<String>,
304    pub author: Option<PackageJSONAuthor>,
305    pub maintainers: Option<Vec<PackageJSONAuthor>>,
306    pub license: Option<String>,
307    pub repository: Option<PackageJSONRepository>,
308    #[serde(default)]
309    #[serde(rename = "tree-sitter", skip_serializing_if = "Option::is_none")]
310    pub tree_sitter: Option<Vec<LanguageConfigurationJSON>>,
311}
312
313fn default_path() -> PathBuf {
314    PathBuf::from(".")
315}
316
317#[derive(Serialize, Deserialize, Clone)]
318#[serde(rename_all = "kebab-case")]
319pub struct LanguageConfigurationJSON {
320    #[serde(default = "default_path")]
321    pub path: PathBuf,
322    pub scope: Option<String>,
323    pub file_types: Option<Vec<String>>,
324    pub content_regex: Option<String>,
325    pub first_line_regex: Option<String>,
326    pub injection_regex: Option<String>,
327    #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
328    pub highlights: PathsJSON,
329    #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
330    pub injections: PathsJSON,
331    #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
332    pub locals: PathsJSON,
333    #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
334    pub tags: PathsJSON,
335    #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
336    pub external_files: PathsJSON,
337}
338
339#[derive(Serialize, Deserialize)]
340#[serde(rename_all = "kebab-case")]
341pub struct TreeSitterJSON {
342    #[serde(rename = "$schema")]
343    pub schema: Option<String>,
344    pub grammars: Vec<Grammar>,
345    pub metadata: Metadata,
346    #[serde(default)]
347    pub bindings: Bindings,
348}
349
350impl TreeSitterJSON {
351    pub fn from_file(path: &Path) -> LoaderResult<Self> {
352        let path = path.join("tree-sitter.json");
353        Ok(serde_json::from_str(&fs::read_to_string(&path).map_err(
354            |e| LoaderError::IO(IoError::new(e, Some(path.as_path()))),
355        )?)?)
356    }
357
358    #[must_use]
359    pub fn has_multiple_language_configs(&self) -> bool {
360        self.grammars.len() > 1
361    }
362}
363
364#[derive(Serialize, Deserialize)]
365#[serde(rename_all = "kebab-case")]
366pub struct Grammar {
367    pub name: String,
368    #[serde(skip_serializing_if = "Option::is_none")]
369    pub camelcase: Option<String>,
370    #[serde(skip_serializing_if = "Option::is_none")]
371    pub title: Option<String>,
372    pub scope: String,
373    #[serde(skip_serializing_if = "Option::is_none")]
374    pub path: Option<PathBuf>,
375    #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
376    pub external_files: PathsJSON,
377    pub file_types: Option<Vec<String>>,
378    #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
379    pub highlights: PathsJSON,
380    #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
381    pub injections: PathsJSON,
382    #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
383    pub locals: PathsJSON,
384    #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
385    pub tags: PathsJSON,
386    #[serde(skip_serializing_if = "Option::is_none")]
387    pub injection_regex: Option<String>,
388    #[serde(skip_serializing_if = "Option::is_none")]
389    pub first_line_regex: Option<String>,
390    #[serde(skip_serializing_if = "Option::is_none")]
391    pub content_regex: Option<String>,
392    #[serde(skip_serializing_if = "Option::is_none")]
393    pub class_name: Option<String>,
394}
395
396#[derive(Serialize, Deserialize)]
397pub struct Metadata {
398    pub version: Version,
399    #[serde(skip_serializing_if = "Option::is_none")]
400    pub license: Option<String>,
401    #[serde(skip_serializing_if = "Option::is_none")]
402    pub description: Option<String>,
403    #[serde(skip_serializing_if = "Option::is_none")]
404    pub authors: Option<Vec<Author>>,
405    #[serde(skip_serializing_if = "Option::is_none")]
406    pub links: Option<Links>,
407    #[serde(skip)]
408    pub namespace: Option<String>,
409}
410
411#[derive(Serialize, Deserialize)]
412pub struct Author {
413    pub name: String,
414    #[serde(skip_serializing_if = "Option::is_none")]
415    pub email: Option<String>,
416    #[serde(skip_serializing_if = "Option::is_none")]
417    pub url: Option<String>,
418}
419
420#[derive(Serialize, Deserialize)]
421pub struct Links {
422    pub repository: String,
423    #[serde(skip_serializing_if = "Option::is_none")]
424    pub funding: Option<String>,
425}
426
427#[derive(Serialize, Deserialize, Clone)]
428#[serde(default)]
429pub struct Bindings {
430    pub c: bool,
431    pub go: bool,
432    pub java: bool,
433    #[serde(skip)]
434    pub kotlin: bool,
435    pub node: bool,
436    pub python: bool,
437    pub rust: bool,
438    pub swift: bool,
439    pub zig: bool,
440}
441
442impl Bindings {
443    /// return available languages and its default enabled state.
444    #[must_use]
445    pub const fn languages(&self) -> [(&'static str, bool); 8] {
446        [
447            ("c", true),
448            ("go", true),
449            ("java", false),
450            // Comment out Kotlin until the bindings are actually available.
451            // ("kotlin", false),
452            ("node", true),
453            ("python", true),
454            ("rust", true),
455            ("swift", true),
456            ("zig", false),
457        ]
458    }
459
460    /// construct Bindings from a language list. If a language isn't supported, its name will be put on the error part.
461    pub fn with_enabled_languages<'a, I>(languages: I) -> Result<Self, &'a str>
462    where
463        I: Iterator<Item = &'a str>,
464    {
465        let mut out = Self {
466            c: false,
467            go: false,
468            java: false,
469            kotlin: false,
470            node: false,
471            python: false,
472            rust: false,
473            swift: false,
474            zig: false,
475        };
476
477        for v in languages {
478            match v {
479                "c" => out.c = true,
480                "go" => out.go = true,
481                "java" => out.java = true,
482                // Comment out Kotlin until the bindings are actually available.
483                // "kotlin" => out.kotlin = true,
484                "node" => out.node = true,
485                "python" => out.python = true,
486                "rust" => out.rust = true,
487                "swift" => out.swift = true,
488                "zig" => out.zig = true,
489                unsupported => return Err(unsupported),
490            }
491        }
492
493        Ok(out)
494    }
495}
496
497impl Default for Bindings {
498    fn default() -> Self {
499        Self {
500            c: true,
501            go: true,
502            java: false,
503            kotlin: false,
504            node: true,
505            python: true,
506            rust: true,
507            swift: true,
508            zig: false,
509        }
510    }
511}
512
513// Replace `~` or `$HOME` with home path string.
514// (While paths like "~/.tree-sitter/config.json" can be deserialized,
515// they're not valid path for I/O modules.)
516fn deserialize_parser_directories<'de, D>(deserializer: D) -> Result<Vec<PathBuf>, D::Error>
517where
518    D: Deserializer<'de>,
519{
520    let paths = Vec::<PathBuf>::deserialize(deserializer)?;
521    let Ok(home) = etcetera::home_dir() else {
522        return Ok(paths);
523    };
524    let standardized = paths
525        .into_iter()
526        .map(|path| standardize_path(path, &home))
527        .collect();
528    Ok(standardized)
529}
530
531fn standardize_path(path: PathBuf, home: &Path) -> PathBuf {
532    if let Ok(p) = path.strip_prefix("~") {
533        return home.join(p);
534    }
535    if let Ok(p) = path.strip_prefix("$HOME") {
536        return home.join(p);
537    }
538    path
539}
540
541impl Config {
542    #[must_use]
543    pub fn initial() -> Self {
544        let home_dir = etcetera::home_dir().expect("Cannot determine home directory");
545        Self {
546            parser_directories: vec![
547                home_dir.join("github"),
548                home_dir.join("src"),
549                home_dir.join("source"),
550                home_dir.join("projects"),
551                home_dir.join("dev"),
552                home_dir.join("git"),
553            ],
554        }
555    }
556}
557
558const BUILD_TARGET: &str = env!("BUILD_TARGET");
559
560pub struct LanguageConfiguration<'a> {
561    pub scope: Option<String>,
562    pub content_regex: Option<Regex>,
563    pub first_line_regex: Option<Regex>,
564    pub injection_regex: Option<Regex>,
565    pub file_types: Vec<String>,
566    pub root_path: PathBuf,
567    pub highlights_filenames: Option<Vec<PathBuf>>,
568    pub injections_filenames: Option<Vec<PathBuf>>,
569    pub locals_filenames: Option<Vec<PathBuf>>,
570    pub tags_filenames: Option<Vec<PathBuf>>,
571    pub language_name: String,
572    language_id: usize,
573    #[cfg(feature = "tree-sitter-highlight")]
574    highlight_config: OnceCell<Option<HighlightConfiguration>>,
575    #[cfg(feature = "tree-sitter-tags")]
576    tags_config: OnceCell<Option<TagsConfiguration>>,
577    #[cfg(feature = "tree-sitter-highlight")]
578    highlight_names: &'a Mutex<Vec<String>>,
579    #[cfg(feature = "tree-sitter-highlight")]
580    use_all_highlight_names: bool,
581    _phantom: PhantomData<&'a ()>,
582}
583
584pub struct Loader {
585    pub parser_lib_path: PathBuf,
586    languages_by_id: Vec<(PathBuf, OnceCell<Language>, Option<Vec<PathBuf>>)>,
587    language_configurations: Vec<LanguageConfiguration<'static>>,
588    language_configuration_ids_by_file_type: HashMap<String, Vec<usize>>,
589    language_configuration_in_current_path: Option<usize>,
590    language_configuration_ids_by_first_line_regex: HashMap<String, Vec<usize>>,
591    #[cfg(feature = "tree-sitter-highlight")]
592    highlight_names: Box<Mutex<Vec<String>>>,
593    #[cfg(feature = "tree-sitter-highlight")]
594    use_all_highlight_names: bool,
595    debug_build: bool,
596    sanitize_build: bool,
597    force_rebuild: bool,
598
599    #[cfg(feature = "wasm")]
600    wasm_store: Mutex<Option<tree_sitter::WasmStore>>,
601}
602
603pub struct CompileConfig<'a> {
604    pub src_path: &'a Path,
605    pub header_paths: Vec<&'a Path>,
606    pub parser_path: PathBuf,
607    pub scanner_path: Option<PathBuf>,
608    pub external_files: Option<&'a [PathBuf]>,
609    pub output_path: Option<PathBuf>,
610    pub flags: &'a [&'a str],
611    pub sanitize: bool,
612    pub name: String,
613}
614
615impl<'a> CompileConfig<'a> {
616    #[must_use]
617    pub fn new(
618        src_path: &'a Path,
619        externals: Option<&'a [PathBuf]>,
620        output_path: Option<PathBuf>,
621    ) -> Self {
622        Self {
623            src_path,
624            header_paths: vec![src_path],
625            parser_path: src_path.join("parser.c"),
626            scanner_path: None,
627            external_files: externals,
628            output_path,
629            flags: &[],
630            sanitize: false,
631            name: String::new(),
632        }
633    }
634}
635
636unsafe impl Sync for Loader {}
637
638impl Loader {
639    pub fn new() -> LoaderResult<Self> {
640        let parser_lib_path = if let Ok(path) = env::var("TREE_SITTER_LIBDIR") {
641            PathBuf::from(path)
642        } else {
643            if cfg!(target_os = "macos") {
644                let legacy_apple_path = etcetera::base_strategy::Apple::new()?
645                    .cache_dir() // `$HOME/Library/Caches/`
646                    .join("tree-sitter");
647                if legacy_apple_path.exists() && legacy_apple_path.is_dir() {
648                    std::fs::remove_dir_all(&legacy_apple_path).map_err(|e| {
649                        LoaderError::IO(IoError::new(e, Some(legacy_apple_path.as_path())))
650                    })?;
651                }
652            }
653
654            etcetera::choose_base_strategy()?
655                .cache_dir()
656                .join("tree-sitter")
657                .join("lib")
658        };
659        Ok(Self::with_parser_lib_path(parser_lib_path))
660    }
661
662    #[must_use]
663    pub fn with_parser_lib_path(parser_lib_path: PathBuf) -> Self {
664        Self {
665            parser_lib_path,
666            languages_by_id: Vec::new(),
667            language_configurations: Vec::new(),
668            language_configuration_ids_by_file_type: HashMap::new(),
669            language_configuration_in_current_path: None,
670            language_configuration_ids_by_first_line_regex: HashMap::new(),
671            #[cfg(feature = "tree-sitter-highlight")]
672            highlight_names: Box::new(Mutex::new(Vec::new())),
673            #[cfg(feature = "tree-sitter-highlight")]
674            use_all_highlight_names: true,
675            debug_build: false,
676            sanitize_build: false,
677            force_rebuild: false,
678
679            #[cfg(feature = "wasm")]
680            wasm_store: Mutex::default(),
681        }
682    }
683
684    #[cfg(feature = "tree-sitter-highlight")]
685    #[cfg_attr(docsrs, doc(cfg(feature = "tree-sitter-highlight")))]
686    pub fn configure_highlights(&mut self, names: &[String]) {
687        self.use_all_highlight_names = false;
688        let mut highlights = self.highlight_names.lock().unwrap();
689        highlights.clear();
690        highlights.extend(names.iter().cloned());
691    }
692
693    #[must_use]
694    #[cfg(feature = "tree-sitter-highlight")]
695    #[cfg_attr(docsrs, doc(cfg(feature = "tree-sitter-highlight")))]
696    pub fn highlight_names(&self) -> Vec<String> {
697        self.highlight_names.lock().unwrap().clone()
698    }
699
700    pub fn find_all_languages(&mut self, config: &Config) -> LoaderResult<()> {
701        if config.parser_directories.is_empty() {
702            warn!(concat!(
703                "You have not configured any parser directories!\n",
704                "Please run `tree-sitter init-config` and edit the resulting\n",
705                "configuration file to indicate where we should look for\n",
706                "language grammars.\n"
707            ));
708        }
709        for parser_container_dir in &config.parser_directories {
710            if let Ok(entries) = fs::read_dir(parser_container_dir) {
711                for entry in entries {
712                    let entry = entry.map_err(|e| LoaderError::IO(IoError::new(e, None)))?;
713                    if let Some(parser_dir_name) = entry.file_name().to_str() {
714                        if parser_dir_name.starts_with("tree-sitter-") {
715                            self.find_language_configurations_at_path(
716                                &parser_container_dir.join(parser_dir_name),
717                                false,
718                            )
719                            .ok();
720                        }
721                    }
722                }
723            }
724        }
725        Ok(())
726    }
727
728    pub fn languages_at_path(&mut self, path: &Path) -> LoaderResult<Vec<(Language, String)>> {
729        if let Ok(configurations) = self.find_language_configurations_at_path(path, true) {
730            let mut language_ids = configurations
731                .iter()
732                .map(|c| (c.language_id, c.language_name.clone()))
733                .collect::<Vec<_>>();
734            language_ids.sort_unstable();
735            language_ids.dedup();
736            language_ids
737                .into_iter()
738                .map(|(id, name)| Ok((self.language_for_id(id)?, name)))
739                .collect::<LoaderResult<Vec<_>>>()
740        } else {
741            Ok(Vec::new())
742        }
743    }
744
745    #[must_use]
746    pub fn get_all_language_configurations(&self) -> Vec<(&LanguageConfiguration, &Path)> {
747        self.language_configurations
748            .iter()
749            .map(|c| (c, self.languages_by_id[c.language_id].0.as_ref()))
750            .collect()
751    }
752
753    pub fn language_configuration_for_scope(
754        &self,
755        scope: &str,
756    ) -> LoaderResult<Option<(Language, &LanguageConfiguration)>> {
757        for configuration in &self.language_configurations {
758            if configuration.scope.as_ref().is_some_and(|s| s == scope) {
759                let language = self.language_for_id(configuration.language_id)?;
760                return Ok(Some((language, configuration)));
761            }
762        }
763        Ok(None)
764    }
765
766    pub fn language_configuration_for_first_line_regex(
767        &self,
768        path: &Path,
769    ) -> LoaderResult<Option<(Language, &LanguageConfiguration)>> {
770        self.language_configuration_ids_by_first_line_regex
771            .iter()
772            .try_fold(None, |_, (regex, ids)| {
773                if let Some(regex) = Self::regex(Some(regex)) {
774                    let file = fs::File::open(path)
775                        .map_err(|e| LoaderError::IO(IoError::new(e, Some(path))))?;
776                    let reader = BufReader::new(file);
777                    let first_line = reader
778                        .lines()
779                        .next()
780                        .transpose()
781                        .map_err(|e| LoaderError::IO(IoError::new(e, Some(path))))?;
782                    if let Some(first_line) = first_line {
783                        if regex.is_match(&first_line) && !ids.is_empty() {
784                            let configuration = &self.language_configurations[ids[0]];
785                            let language = self.language_for_id(configuration.language_id)?;
786                            return Ok(Some((language, configuration)));
787                        }
788                    }
789                }
790
791                Ok(None)
792            })
793    }
794
795    pub fn language_configuration_for_file_name(
796        &self,
797        path: &Path,
798    ) -> LoaderResult<Option<(Language, &LanguageConfiguration)>> {
799        // Find all the language configurations that match this file name
800        // or a suffix of the file name.
801        let configuration_ids = path
802            .file_name()
803            .and_then(|n| n.to_str())
804            .and_then(|file_name| self.language_configuration_ids_by_file_type.get(file_name))
805            .or_else(|| {
806                let mut path = path.to_owned();
807                let mut extensions = Vec::with_capacity(2);
808                while let Some(extension) = path.extension() {
809                    extensions.push(extension.to_str()?.to_string());
810                    path = PathBuf::from(path.file_stem()?.to_os_string());
811                }
812                extensions.reverse();
813                self.language_configuration_ids_by_file_type
814                    .get(&extensions.join("."))
815            });
816
817        if let Some(configuration_ids) = configuration_ids {
818            if !configuration_ids.is_empty() {
819                let configuration = if configuration_ids.len() == 1 {
820                    &self.language_configurations[configuration_ids[0]]
821                }
822                // If multiple language configurations match, then determine which
823                // one to use by applying the configurations' content regexes.
824                else {
825                    let file_contents =
826                        fs::read(path).map_err(|e| LoaderError::IO(IoError::new(e, Some(path))))?;
827                    let file_contents = String::from_utf8_lossy(&file_contents);
828                    let mut best_score = -2isize;
829                    let mut best_configuration_id = None;
830                    for configuration_id in configuration_ids {
831                        let config = &self.language_configurations[*configuration_id];
832
833                        // If the language configuration has a content regex, assign
834                        // a score based on the length of the first match.
835                        let score;
836                        if let Some(content_regex) = &config.content_regex {
837                            if let Some(mat) = content_regex.find(&file_contents) {
838                                score = (mat.end() - mat.start()) as isize;
839                            }
840                            // If the content regex does not match, then *penalize* this
841                            // language configuration, so that language configurations
842                            // without content regexes are preferred over those with
843                            // non-matching content regexes.
844                            else {
845                                score = -1;
846                            }
847                        } else {
848                            score = 0;
849                        }
850                        if score > best_score {
851                            best_configuration_id = Some(*configuration_id);
852                            best_score = score;
853                        }
854                    }
855
856                    &self.language_configurations[best_configuration_id.unwrap()]
857                };
858
859                let language = self.language_for_id(configuration.language_id)?;
860                return Ok(Some((language, configuration)));
861            }
862        }
863
864        Ok(None)
865    }
866
867    pub fn language_configuration_for_injection_string(
868        &self,
869        string: &str,
870    ) -> LoaderResult<Option<(Language, &LanguageConfiguration)>> {
871        let mut best_match_length = 0;
872        let mut best_match_position = None;
873        for (i, configuration) in self.language_configurations.iter().enumerate() {
874            if let Some(injection_regex) = &configuration.injection_regex {
875                if let Some(mat) = injection_regex.find(string) {
876                    let length = mat.end() - mat.start();
877                    if length > best_match_length {
878                        best_match_position = Some(i);
879                        best_match_length = length;
880                    }
881                }
882            }
883        }
884
885        if let Some(i) = best_match_position {
886            let configuration = &self.language_configurations[i];
887            let language = self.language_for_id(configuration.language_id)?;
888            Ok(Some((language, configuration)))
889        } else {
890            Ok(None)
891        }
892    }
893
894    pub fn language_for_configuration(
895        &self,
896        configuration: &LanguageConfiguration,
897    ) -> LoaderResult<Language> {
898        self.language_for_id(configuration.language_id)
899    }
900
901    fn language_for_id(&self, id: usize) -> LoaderResult<Language> {
902        let (path, language, externals) = &self.languages_by_id[id];
903        language
904            .get_or_try_init(|| {
905                let src_path = path.join("src");
906                self.load_language_at_path(CompileConfig::new(
907                    &src_path,
908                    externals.as_deref(),
909                    None,
910                ))
911            })
912            .cloned()
913    }
914
915    pub fn compile_parser_at_path(
916        &self,
917        grammar_path: &Path,
918        output_path: PathBuf,
919        flags: &[&str],
920    ) -> LoaderResult<()> {
921        let src_path = grammar_path.join("src");
922        let mut config = CompileConfig::new(&src_path, None, Some(output_path));
923        config.flags = flags;
924        self.load_language_at_path(config).map(|_| ())
925    }
926
927    pub fn load_language_at_path(&self, mut config: CompileConfig) -> LoaderResult<Language> {
928        let grammar_path = config.src_path.join("grammar.json");
929        config.name = Self::grammar_json_name(&grammar_path)?;
930        self.load_language_at_path_with_name(config)
931    }
932
933    pub fn load_language_at_path_with_name(
934        &self,
935        mut config: CompileConfig,
936    ) -> LoaderResult<Language> {
937        let mut lib_name = config.name.clone();
938        let language_fn_name = format!("tree_sitter_{}", config.name.replace('-', "_"));
939        if self.debug_build {
940            lib_name.push_str(".debug._");
941        }
942
943        if self.sanitize_build {
944            lib_name.push_str(".sanitize._");
945            config.sanitize = true;
946        }
947
948        if config.output_path.is_none() {
949            fs::create_dir_all(&self.parser_lib_path).map_err(|e| {
950                LoaderError::IO(IoError::new(e, Some(self.parser_lib_path.as_path())))
951            })?;
952        }
953
954        let mut recompile = self.force_rebuild || config.output_path.is_some(); // if specified, always recompile
955
956        let output_path = config.output_path.unwrap_or_else(|| {
957            let mut path = self.parser_lib_path.join(lib_name);
958            path.set_extension(env::consts::DLL_EXTENSION);
959            #[cfg(feature = "wasm")]
960            if self.wasm_store.lock().unwrap().is_some() {
961                path.set_extension("wasm");
962            }
963            path
964        });
965        config.output_path = Some(output_path.clone());
966
967        let parser_path = config.src_path.join("parser.c");
968        config.scanner_path = self.get_scanner_path(config.src_path);
969
970        let mut paths_to_check = vec![parser_path];
971
972        if let Some(scanner_path) = config.scanner_path.as_ref() {
973            paths_to_check.push(scanner_path.clone());
974        }
975
976        paths_to_check.extend(
977            config
978                .external_files
979                .unwrap_or_default()
980                .iter()
981                .map(|p| config.src_path.join(p)),
982        );
983
984        if !recompile {
985            recompile = needs_recompile(&output_path, &paths_to_check)?;
986        }
987
988        #[cfg(feature = "wasm")]
989        if let Some(wasm_store) = self.wasm_store.lock().unwrap().as_mut() {
990            if recompile {
991                self.compile_parser_to_wasm(
992                    &config.name,
993                    config.src_path,
994                    config
995                        .scanner_path
996                        .as_ref()
997                        .and_then(|p| p.strip_prefix(config.src_path).ok()),
998                    &output_path,
999                )?;
1000            }
1001
1002            let wasm_bytes = fs::read(&output_path)
1003                .map_err(|e| LoaderError::IO(IoError::new(e, Some(output_path.as_path()))))?;
1004            return Ok(wasm_store.load_language(&config.name, &wasm_bytes)?);
1005        }
1006
1007        // Create a unique lock path based on the output path hash to prevent
1008        // interference when multiple processes build the same grammar (by name)
1009        // to different output locations
1010        let lock_hash = {
1011            let mut hasher = std::hash::DefaultHasher::new();
1012            output_path.hash(&mut hasher);
1013            format!("{:x}", hasher.finish())
1014        };
1015
1016        let lock_path = if env::var("CROSS_RUNNER").is_ok() {
1017            tempfile::tempdir()
1018                .expect("create a temp dir")
1019                .path()
1020                .to_path_buf()
1021        } else {
1022            etcetera::choose_base_strategy()?.cache_dir()
1023        }
1024        .join("tree-sitter")
1025        .join("lock")
1026        .join(format!("{}-{lock_hash}.lock", config.name));
1027
1028        if let Ok(lock_file) = fs::OpenOptions::new().write(true).open(&lock_path) {
1029            recompile = false;
1030            if lock_file.try_lock_exclusive().is_err() {
1031                // if we can't acquire the lock, another process is compiling the parser, wait for
1032                // it and don't recompile
1033                lock_file
1034                    .lock_exclusive()
1035                    .map_err(|e| LoaderError::IO(IoError::new(e, Some(lock_path.as_path()))))?;
1036                recompile = false;
1037            } else {
1038                // if we can acquire the lock, check if the lock file is older than 30 seconds, a
1039                // run that was interrupted and left the lock file behind should not block
1040                // subsequent runs
1041                let time = lock_file
1042                    .metadata()
1043                    .map_err(|e| LoaderError::IO(IoError::new(e, Some(lock_path.as_path()))))?
1044                    .modified()
1045                    .map_err(|e| LoaderError::IO(IoError::new(e, Some(lock_path.as_path()))))?
1046                    .elapsed()?
1047                    .as_secs();
1048                if time > 30 {
1049                    fs::remove_file(&lock_path)
1050                        .map_err(|e| LoaderError::IO(IoError::new(e, Some(lock_path.as_path()))))?;
1051                    recompile = true;
1052                }
1053            }
1054        }
1055
1056        if recompile {
1057            let parent_path = lock_path.parent().unwrap();
1058            fs::create_dir_all(parent_path)
1059                .map_err(|e| LoaderError::IO(IoError::new(e, Some(parent_path))))?;
1060            let lock_file = fs::OpenOptions::new()
1061                .create(true)
1062                .truncate(true)
1063                .write(true)
1064                .open(&lock_path)
1065                .map_err(|e| LoaderError::IO(IoError::new(e, Some(lock_path.as_path()))))?;
1066            lock_file
1067                .lock_exclusive()
1068                .map_err(|e| LoaderError::IO(IoError::new(e, Some(lock_path.as_path()))))?;
1069
1070            self.compile_parser_to_dylib(&config, &lock_file, &lock_path)?;
1071
1072            if config.scanner_path.is_some() {
1073                self.check_external_scanner(&output_path)?;
1074            }
1075        }
1076
1077        // Ensure the dynamic library exists before trying to load it. This can
1078        // happen in race conditions where we couldn't acquire the lock because
1079        // another process was compiling but it still hasn't finished by the
1080        // time we reach this point, so the output file still doesn't exist.
1081        //
1082        // Instead of allowing the `load_language` call below to fail, return a
1083        // clearer error to the user here.
1084        if !output_path.exists() {
1085            let msg = format!(
1086                "Dynamic library `{}` not found after build attempt. \
1087                Are you running multiple processes building to the same output location?",
1088                output_path.display()
1089            );
1090
1091            Err(LoaderError::IO(IoError::new(
1092                std::io::Error::new(std::io::ErrorKind::NotFound, msg),
1093                Some(output_path.as_path()),
1094            )))?;
1095        }
1096
1097        Self::load_language(&output_path, &language_fn_name)
1098    }
1099
1100    pub fn load_language(path: &Path, function_name: &str) -> LoaderResult<Language> {
1101        let library = unsafe { Library::new(path) }.map_err(|e| {
1102            LoaderError::Library(LibraryError {
1103                error: e,
1104                path: path.to_string_lossy().to_string(),
1105            })
1106        })?;
1107        let language = unsafe {
1108            let language_fn = library
1109                .get::<Symbol<unsafe extern "C" fn() -> Language>>(function_name.as_bytes())
1110                .map_err(|e| {
1111                    LoaderError::Symbol(SymbolError {
1112                        error: e,
1113                        symbol_name: function_name.to_string(),
1114                        path: path.to_string_lossy().to_string(),
1115                    })
1116                })?;
1117            language_fn()
1118        };
1119        mem::forget(library);
1120        Ok(language)
1121    }
1122
1123    fn compile_parser_to_dylib(
1124        &self,
1125        config: &CompileConfig,
1126        lock_file: &fs::File,
1127        lock_path: &Path,
1128    ) -> LoaderResult<()> {
1129        let mut cc_config = cc::Build::new();
1130        cc_config
1131            .cargo_metadata(false)
1132            .cargo_warnings(false)
1133            .target(BUILD_TARGET)
1134            // BUILD_TARGET from the build environment becomes a runtime host for cc.
1135            // Otherwise, when cross compiled, cc will keep looking for a cross-compiler
1136            // on the target system instead of the native compiler.
1137            .host(BUILD_TARGET)
1138            .debug(self.debug_build)
1139            .file(&config.parser_path)
1140            .includes(&config.header_paths)
1141            .std("c11");
1142
1143        if let Some(scanner_path) = config.scanner_path.as_ref() {
1144            cc_config.file(scanner_path);
1145        }
1146
1147        if self.debug_build {
1148            cc_config.opt_level(0).extra_warnings(true);
1149        } else {
1150            cc_config.opt_level(2).extra_warnings(false);
1151        }
1152
1153        for flag in config.flags {
1154            cc_config.define(flag, None);
1155        }
1156
1157        let compiler = cc_config.get_compiler();
1158        let mut command = Command::new(compiler.path());
1159        command.args(compiler.args());
1160        for (key, value) in compiler.env() {
1161            command.env(key, value);
1162        }
1163
1164        let output_path = config.output_path.as_ref().unwrap();
1165
1166        let temp_dir = if compiler.is_like_msvc() {
1167            let out = format!("-out:{}", output_path.to_str().unwrap());
1168            command.arg(if self.debug_build { "-LDd" } else { "-LD" });
1169            command.arg("-utf-8");
1170
1171            // Windows creates intermediate files when compiling (.exp, .lib, .obj), which causes
1172            // issues when multiple processes are compiling in the same directory. This creates a
1173            // temporary directory for those files to go into, which is deleted after compilation.
1174            let temp_dir = output_path.parent().unwrap().join(format!(
1175                "tmp_{}_{:?}",
1176                std::process::id(),
1177                std::thread::current().id()
1178            ));
1179            std::fs::create_dir_all(&temp_dir).unwrap();
1180
1181            command.arg(format!("/Fo{}\\", temp_dir.display()));
1182            command.args(cc_config.get_files());
1183            command.arg("-link").arg(out);
1184            command.arg(format!("/IMPLIB:{}.lib", temp_dir.join("temp").display()));
1185
1186            Some(temp_dir)
1187        } else {
1188            command.arg("-Werror=implicit-function-declaration");
1189            if cfg!(any(target_os = "macos", target_os = "ios")) {
1190                command.arg("-dynamiclib");
1191                // TODO: remove when supported
1192                command.arg("-UTREE_SITTER_REUSE_ALLOCATOR");
1193            } else {
1194                command.arg("-shared");
1195                command.arg("-Wl,--no-undefined");
1196                #[cfg(target_os = "openbsd")]
1197                command.arg("-lc");
1198            }
1199            command.args(cc_config.get_files());
1200            command.arg("-o").arg(output_path);
1201
1202            None
1203        };
1204
1205        let output = command.output().map_err(|e| {
1206            LoaderError::Compiler(CompilerError {
1207                error: e,
1208                command: Box::new(command),
1209            })
1210        })?;
1211
1212        if let Some(temp_dir) = temp_dir {
1213            let _ = fs::remove_dir_all(temp_dir);
1214        }
1215
1216        FileExt::unlock(lock_file)
1217            .map_err(|e| LoaderError::IO(IoError::new(e, Some(lock_path))))?;
1218        fs::remove_file(lock_path)
1219            .map_err(|e| LoaderError::IO(IoError::new(e, Some(lock_path))))?;
1220
1221        if output.status.success() {
1222            Ok(())
1223        } else {
1224            Err(LoaderError::Compilation(
1225                String::from_utf8_lossy(&output.stdout).to_string(),
1226                String::from_utf8_lossy(&output.stderr).to_string(),
1227            ))
1228        }
1229    }
1230
1231    #[cfg(unix)]
1232    fn check_external_scanner(&self, library_path: &Path) -> LoaderResult<()> {
1233        let section = " T ";
1234        // Older ppc toolchains incorrectly report functions in the Data section. This bug has been
1235        // fixed, but we still need to account for older systems.
1236        let old_ppc_section = if cfg!(all(target_arch = "powerpc64", target_os = "linux")) {
1237            Some(" D ")
1238        } else {
1239            None
1240        };
1241        let nm_cmd = env::var("NM").unwrap_or_else(|_| "nm".to_owned());
1242        let command = Command::new(nm_cmd)
1243            .arg("--defined-only")
1244            .arg(library_path)
1245            .output();
1246        if let Ok(output) = command {
1247            if output.status.success() {
1248                let mut non_static_symbols = String::new();
1249                for line in String::from_utf8_lossy(&output.stdout).lines() {
1250                    if line.contains(section) || old_ppc_section.is_some_and(|s| line.contains(s)) {
1251                        if let Some(function_name) =
1252                            line.split_whitespace().collect::<Vec<_>>().get(2)
1253                        {
1254                            if !line.contains("tree_sitter_") {
1255                                writeln!(&mut non_static_symbols, "  `{function_name}`").unwrap();
1256                            }
1257                        }
1258                    }
1259                }
1260                if !non_static_symbols.is_empty() {
1261                    warn!(
1262                        "Found non-static non-tree-sitter functions in the external scanner\n{non_static_symbols}\n{}",
1263                        concat!(
1264                            "Consider making these functions static, they can cause conflicts ",
1265                            "when another tree-sitter project uses the same function name."
1266                        )
1267                    );
1268                }
1269            }
1270        } else {
1271            warn!(
1272                "Failed to run `nm` to verify symbols in {}",
1273                library_path.display()
1274            );
1275        }
1276
1277        Ok(())
1278    }
1279
1280    #[cfg(windows)]
1281    fn check_external_scanner(&self, _library_path: &Path) -> LoaderResult<()> {
1282        // TODO: there's no nm command on windows, whoever wants to implement this can and should :)
1283        Ok(())
1284    }
1285
1286    pub fn compile_parser_to_wasm(
1287        &self,
1288        language_name: &str,
1289        src_path: &Path,
1290        scanner_filename: Option<&Path>,
1291        output_path: &Path,
1292    ) -> LoaderResult<()> {
1293        let clang_executable = self.ensure_wasi_sdk_exists()?;
1294
1295        let mut command = Command::new(&clang_executable);
1296        command.current_dir(src_path).args([
1297            "--target=wasm32-unknown-wasi",
1298            "-o",
1299            output_path.to_str().unwrap(),
1300            "-fPIC",
1301            "-shared",
1302            if self.debug_build { "-g" } else { "-Os" },
1303            format!("-Wl,--export=tree_sitter_{language_name}").as_str(),
1304            "-Wl,--allow-undefined",
1305            "-Wl,--no-entry",
1306            "-nostdlib",
1307            "-fno-exceptions",
1308            "-fvisibility=hidden",
1309            "-I",
1310            ".",
1311            "parser.c",
1312        ]);
1313
1314        if let Some(scanner_filename) = scanner_filename {
1315            command.arg(scanner_filename);
1316        }
1317
1318        let output = command.output().map_err(LoaderError::WasmCompiler)?;
1319
1320        if !output.status.success() {
1321            return Err(LoaderError::WasmCompilation(
1322                String::from_utf8_lossy(&output.stderr).to_string(),
1323            ));
1324        }
1325
1326        Ok(())
1327    }
1328
1329    /// Extracts a tar.gz archive with `tar`, stripping the first path component.
1330    fn extract_tar_gz_with_strip(
1331        &self,
1332        archive_path: &Path,
1333        destination: &Path,
1334    ) -> LoaderResult<()> {
1335        let status = Command::new("tar")
1336            .arg("-xzf")
1337            .arg(archive_path)
1338            .arg("--strip-components=1")
1339            .arg("-C")
1340            .arg(destination)
1341            .status()
1342            .map_err(|e| LoaderError::Tar(archive_path.to_string_lossy().to_string(), e))?;
1343
1344        if !status.success() {
1345            return Err(LoaderError::Extraction(
1346                archive_path.to_string_lossy().to_string(),
1347                destination.to_string_lossy().to_string(),
1348            ));
1349        }
1350
1351        Ok(())
1352    }
1353
1354    /// This ensures that the wasi-sdk is available, downloading and extracting it if necessary,
1355    /// and returns the path to the `clang` executable.
1356    ///
1357    /// If `TREE_SITTER_WASI_SDK_PATH` is set, it will use that path to look for the clang executable.
1358    fn ensure_wasi_sdk_exists(&self) -> LoaderResult<PathBuf> {
1359        let possible_executables = if cfg!(windows) {
1360            vec![
1361                "clang.exe",
1362                "wasm32-unknown-wasi-clang.exe",
1363                "wasm32-wasi-clang.exe",
1364            ]
1365        } else {
1366            vec!["clang", "wasm32-unknown-wasi-clang", "wasm32-wasi-clang"]
1367        };
1368
1369        if let Ok(wasi_sdk_path) = std::env::var("TREE_SITTER_WASI_SDK_PATH") {
1370            let wasi_sdk_dir = PathBuf::from(wasi_sdk_path);
1371
1372            for exe in &possible_executables {
1373                let clang_exe = wasi_sdk_dir.join("bin").join(exe);
1374                if clang_exe.exists() {
1375                    return Ok(clang_exe);
1376                }
1377            }
1378
1379            return Err(LoaderError::WasiSDKClang(WasiSDKClangError {
1380                wasi_sdk_dir: wasi_sdk_dir.to_string_lossy().to_string(),
1381                possible_executables,
1382                download: false,
1383            }));
1384        }
1385
1386        let cache_dir = etcetera::choose_base_strategy()?
1387            .cache_dir()
1388            .join("tree-sitter");
1389        fs::create_dir_all(&cache_dir)
1390            .map_err(|e| LoaderError::IO(IoError::new(e, Some(cache_dir.as_path()))))?;
1391
1392        let wasi_sdk_dir = cache_dir.join("wasi-sdk");
1393
1394        for exe in &possible_executables {
1395            let clang_exe = wasi_sdk_dir.join("bin").join(exe);
1396            if clang_exe.exists() {
1397                return Ok(clang_exe);
1398            }
1399        }
1400
1401        fs::create_dir_all(&wasi_sdk_dir)
1402            .map_err(|e| LoaderError::IO(IoError::new(e, Some(wasi_sdk_dir.as_path()))))?;
1403
1404        let arch_os = if cfg!(target_os = "macos") {
1405            if cfg!(target_arch = "aarch64") {
1406                "arm64-macos"
1407            } else {
1408                "x86_64-macos"
1409            }
1410        } else if cfg!(target_os = "windows") {
1411            if cfg!(target_arch = "aarch64") {
1412                "arm64-windows"
1413            } else {
1414                "x86_64-windows"
1415            }
1416        } else if cfg!(target_os = "linux") {
1417            if cfg!(target_arch = "aarch64") {
1418                "arm64-linux"
1419            } else {
1420                "x86_64-linux"
1421            }
1422        } else {
1423            return Err(LoaderError::WasiSDKPlatform);
1424        };
1425
1426        let sdk_filename = format!("wasi-sdk-{WASI_SDK_VERSION}-{arch_os}.tar.gz");
1427        let wasi_sdk_major_version = WASI_SDK_VERSION
1428            .trim_end_matches(char::is_numeric) // trim minor version...
1429            .trim_end_matches('.'); // ...and '.' separator
1430        let sdk_url = format!(
1431            "https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-{wasi_sdk_major_version}/{sdk_filename}",
1432        );
1433
1434        info!("Downloading wasi-sdk from {sdk_url}...");
1435        let temp_tar_path = cache_dir.join(sdk_filename);
1436
1437        let status = Command::new("curl")
1438            .arg("-f")
1439            .arg("-L")
1440            .arg("-o")
1441            .arg(&temp_tar_path)
1442            .arg(&sdk_url)
1443            .status()
1444            .map_err(|e| LoaderError::Curl(sdk_url.clone(), e))?;
1445
1446        if !status.success() {
1447            return Err(LoaderError::WasiSDKDownload(sdk_url));
1448        }
1449
1450        info!("Extracting wasi-sdk to {}...", wasi_sdk_dir.display());
1451        self.extract_tar_gz_with_strip(&temp_tar_path, &wasi_sdk_dir)?;
1452
1453        fs::remove_file(temp_tar_path).ok();
1454        for exe in &possible_executables {
1455            let clang_exe = wasi_sdk_dir.join("bin").join(exe);
1456            if clang_exe.exists() {
1457                return Ok(clang_exe);
1458            }
1459        }
1460
1461        Err(LoaderError::WasiSDKClang(WasiSDKClangError {
1462            wasi_sdk_dir: wasi_sdk_dir.to_string_lossy().to_string(),
1463            possible_executables,
1464            download: true,
1465        }))
1466    }
1467
1468    #[must_use]
1469    #[cfg(feature = "tree-sitter-highlight")]
1470    pub fn highlight_config_for_injection_string<'a>(
1471        &'a self,
1472        string: &str,
1473    ) -> Option<&'a HighlightConfiguration> {
1474        match self.language_configuration_for_injection_string(string) {
1475            Err(e) => {
1476                error!("Failed to load language for injection string '{string}': {e}",);
1477                None
1478            }
1479            Ok(None) => None,
1480            Ok(Some((language, configuration))) => {
1481                match configuration.highlight_config(language, None) {
1482                    Err(e) => {
1483                        error!(
1484                            "Failed to load higlight config for injection string '{string}': {e}"
1485                        );
1486                        None
1487                    }
1488                    Ok(None) => None,
1489                    Ok(Some(config)) => Some(config),
1490                }
1491            }
1492        }
1493    }
1494
1495    #[must_use]
1496    pub fn get_language_configuration_in_current_path(&self) -> Option<&LanguageConfiguration> {
1497        self.language_configuration_in_current_path
1498            .map(|i| &self.language_configurations[i])
1499    }
1500
1501    pub fn find_language_configurations_at_path(
1502        &mut self,
1503        parser_path: &Path,
1504        set_current_path_config: bool,
1505    ) -> LoaderResult<&[LanguageConfiguration]> {
1506        let initial_language_configuration_count = self.language_configurations.len();
1507
1508        match TreeSitterJSON::from_file(parser_path) {
1509            Ok(config) => {
1510                let language_count = self.languages_by_id.len();
1511                for grammar in config.grammars {
1512                    // Determine the path to the parser directory. This can be specified in
1513                    // the tree-sitter.json, but defaults to the directory containing the
1514                    // tree-sitter.json.
1515                    let language_path =
1516                        parser_path.join(grammar.path.unwrap_or(PathBuf::from(".")));
1517
1518                    // Determine if a previous language configuration in this package.json file
1519                    // already uses the same language.
1520                    let mut language_id = None;
1521                    for (id, (path, _, _)) in
1522                        self.languages_by_id.iter().enumerate().skip(language_count)
1523                    {
1524                        if language_path == *path {
1525                            language_id = Some(id);
1526                        }
1527                    }
1528
1529                    // If not, add a new language path to the list.
1530                    let language_id = if let Some(language_id) = language_id {
1531                        language_id
1532                    } else {
1533                        self.languages_by_id.push((
1534                            language_path,
1535                            OnceCell::new(),
1536                            grammar
1537                                .external_files
1538                                .clone()
1539                                .into_vec()
1540                                .map(|files| {
1541                                    files
1542                                        .into_iter()
1543                                        .map(|path| {
1544                                            let path = parser_path.join(path);
1545                                            // prevent p being above/outside of parser_path
1546                                            if path.starts_with(parser_path) {
1547                                                Ok(path)
1548                                            } else {
1549                                                Err(LoaderError::ExternalFile(
1550                                                    path.to_string_lossy().to_string(),
1551                                                    parser_path.to_string_lossy().to_string(),
1552                                                ))
1553                                            }
1554                                        })
1555                                        .collect::<LoaderResult<Vec<_>>>()
1556                                })
1557                                .transpose()?,
1558                        ));
1559                        self.languages_by_id.len() - 1
1560                    };
1561
1562                    let configuration = LanguageConfiguration {
1563                        root_path: parser_path.to_path_buf(),
1564                        language_name: grammar.name,
1565                        scope: Some(grammar.scope),
1566                        language_id,
1567                        file_types: grammar.file_types.unwrap_or_default(),
1568                        content_regex: Self::regex(grammar.content_regex.as_deref()),
1569                        first_line_regex: Self::regex(grammar.first_line_regex.as_deref()),
1570                        injection_regex: Self::regex(grammar.injection_regex.as_deref()),
1571                        injections_filenames: grammar.injections.into_vec(),
1572                        locals_filenames: grammar.locals.into_vec(),
1573                        tags_filenames: grammar.tags.into_vec(),
1574                        highlights_filenames: grammar.highlights.into_vec(),
1575                        #[cfg(feature = "tree-sitter-highlight")]
1576                        highlight_config: OnceCell::new(),
1577                        #[cfg(feature = "tree-sitter-tags")]
1578                        tags_config: OnceCell::new(),
1579                        #[cfg(feature = "tree-sitter-highlight")]
1580                        highlight_names: &self.highlight_names,
1581                        #[cfg(feature = "tree-sitter-highlight")]
1582                        use_all_highlight_names: self.use_all_highlight_names,
1583                        _phantom: PhantomData,
1584                    };
1585
1586                    for file_type in &configuration.file_types {
1587                        self.language_configuration_ids_by_file_type
1588                            .entry(file_type.clone())
1589                            .or_default()
1590                            .push(self.language_configurations.len());
1591                    }
1592                    if let Some(first_line_regex) = &configuration.first_line_regex {
1593                        self.language_configuration_ids_by_first_line_regex
1594                            .entry(first_line_regex.to_string())
1595                            .or_default()
1596                            .push(self.language_configurations.len());
1597                    }
1598
1599                    self.language_configurations.push(unsafe {
1600                        mem::transmute::<LanguageConfiguration<'_>, LanguageConfiguration<'static>>(
1601                            configuration,
1602                        )
1603                    });
1604
1605                    if set_current_path_config
1606                        && self.language_configuration_in_current_path.is_none()
1607                    {
1608                        self.language_configuration_in_current_path =
1609                            Some(self.language_configurations.len() - 1);
1610                    }
1611                }
1612            }
1613            Err(LoaderError::Serialization(e)) => {
1614                warn!(
1615                    "Failed to parse {} -- {e}",
1616                    parser_path.join("tree-sitter.json").display()
1617                );
1618            }
1619            _ => {}
1620        }
1621
1622        // If we didn't find any language configurations in the tree-sitter.json file,
1623        // but there is a grammar.json file, then use the grammar file to form a simple
1624        // language configuration.
1625        if self.language_configurations.len() == initial_language_configuration_count
1626            && parser_path.join("src").join("grammar.json").exists()
1627        {
1628            let grammar_path = parser_path.join("src").join("grammar.json");
1629            let language_name = Self::grammar_json_name(&grammar_path)?;
1630            let configuration = LanguageConfiguration {
1631                root_path: parser_path.to_owned(),
1632                language_name,
1633                language_id: self.languages_by_id.len(),
1634                file_types: Vec::new(),
1635                scope: None,
1636                content_regex: None,
1637                first_line_regex: None,
1638                injection_regex: None,
1639                injections_filenames: None,
1640                locals_filenames: None,
1641                highlights_filenames: None,
1642                tags_filenames: None,
1643                #[cfg(feature = "tree-sitter-highlight")]
1644                highlight_config: OnceCell::new(),
1645                #[cfg(feature = "tree-sitter-tags")]
1646                tags_config: OnceCell::new(),
1647                #[cfg(feature = "tree-sitter-highlight")]
1648                highlight_names: &self.highlight_names,
1649                #[cfg(feature = "tree-sitter-highlight")]
1650                use_all_highlight_names: self.use_all_highlight_names,
1651                _phantom: PhantomData,
1652            };
1653            self.language_configurations.push(unsafe {
1654                mem::transmute::<LanguageConfiguration<'_>, LanguageConfiguration<'static>>(
1655                    configuration,
1656                )
1657            });
1658            self.languages_by_id
1659                .push((parser_path.to_owned(), OnceCell::new(), None));
1660        }
1661
1662        Ok(&self.language_configurations[initial_language_configuration_count..])
1663    }
1664
1665    fn regex(pattern: Option<&str>) -> Option<Regex> {
1666        pattern.and_then(|r| RegexBuilder::new(r).multi_line(true).build().ok())
1667    }
1668
1669    fn grammar_json_name(grammar_path: &Path) -> LoaderResult<String> {
1670        let file = fs::File::open(grammar_path)
1671            .map_err(|e| LoaderError::IO(IoError::new(e, Some(grammar_path))))?;
1672
1673        let first_three_lines = BufReader::new(file)
1674            .lines()
1675            .take(3)
1676            .collect::<Result<Vec<_>, std::io::Error>>()
1677            .map_err(|_| LoaderError::GrammarJSON(grammar_path.to_string_lossy().to_string()))?
1678            .join("\n");
1679
1680        let name = GRAMMAR_NAME_REGEX
1681            .captures(&first_three_lines)
1682            .and_then(|c| c.get(1))
1683            .ok_or_else(|| LoaderError::GrammarJSON(grammar_path.to_string_lossy().to_string()))?;
1684
1685        Ok(name.as_str().to_string())
1686    }
1687
1688    pub fn select_language(
1689        &mut self,
1690        path: Option<&Path>,
1691        current_dir: &Path,
1692        scope: Option<&str>,
1693        // path to dynamic library, name of language
1694        lib_info: Option<&(PathBuf, &str)>,
1695    ) -> LoaderResult<Language> {
1696        if let Some((ref lib_path, language_name)) = lib_info {
1697            let language_fn_name = format!("tree_sitter_{}", language_name.replace('-', "_"));
1698            Self::load_language(lib_path, &language_fn_name)
1699        } else if let Some(scope) = scope {
1700            if let Some(config) = self
1701                .language_configuration_for_scope(scope)
1702                .map_err(|e| LoaderError::ScopeLoad(scope.to_string(), Box::new(e)))?
1703            {
1704                Ok(config.0)
1705            } else {
1706                Err(LoaderError::UnknownScope(scope.to_string()))
1707            }
1708        } else if let Some((lang, _)) = if let Some(path) = path {
1709            self.language_configuration_for_file_name(path)
1710                .map_err(|e| {
1711                    LoaderError::FileNameLoad(
1712                        path.file_name().unwrap().to_string_lossy().to_string(),
1713                        Box::new(e),
1714                    )
1715                })?
1716        } else {
1717            None
1718        } {
1719            Ok(lang)
1720        } else if let Some(id) = self.language_configuration_in_current_path {
1721            Ok(self.language_for_id(self.language_configurations[id].language_id)?)
1722        } else if let Some(lang) = self
1723            .languages_at_path(current_dir)
1724            .map_err(|e| LoaderError::CurrentDirectoryLoad(Box::new(e)))?
1725            .first()
1726            .cloned()
1727        {
1728            Ok(lang.0)
1729        } else if let Some(lang) = if let Some(path) = path {
1730            self.language_configuration_for_first_line_regex(path)?
1731        } else {
1732            None
1733        } {
1734            Ok(lang.0)
1735        } else {
1736            Err(LoaderError::NoLanguage)
1737        }
1738    }
1739
1740    pub const fn debug_build(&mut self, flag: bool) {
1741        self.debug_build = flag;
1742    }
1743
1744    pub const fn sanitize_build(&mut self, flag: bool) {
1745        self.sanitize_build = flag;
1746    }
1747
1748    pub const fn force_rebuild(&mut self, rebuild: bool) {
1749        self.force_rebuild = rebuild;
1750    }
1751
1752    #[cfg(feature = "wasm")]
1753    #[cfg_attr(docsrs, doc(cfg(feature = "wasm")))]
1754    pub fn use_wasm(&mut self, engine: &tree_sitter::wasmtime::Engine) {
1755        *self.wasm_store.lock().unwrap() = Some(tree_sitter::WasmStore::new(engine).unwrap());
1756    }
1757
1758    #[must_use]
1759    pub fn get_scanner_path(&self, src_path: &Path) -> Option<PathBuf> {
1760        let path = src_path.join("scanner.c");
1761        path.exists().then_some(path)
1762    }
1763}
1764
1765impl LanguageConfiguration<'_> {
1766    #[cfg(feature = "tree-sitter-highlight")]
1767    pub fn highlight_config(
1768        &self,
1769        language: Language,
1770        paths: Option<&[PathBuf]>,
1771    ) -> LoaderResult<Option<&HighlightConfiguration>> {
1772        let (highlights_filenames, injections_filenames, locals_filenames) = match paths {
1773            Some(paths) => (
1774                Some(
1775                    paths
1776                        .iter()
1777                        .filter(|p| p.ends_with(DEFAULT_HIGHLIGHTS_QUERY_FILE_NAME))
1778                        .cloned()
1779                        .collect::<Vec<_>>(),
1780                ),
1781                Some(
1782                    paths
1783                        .iter()
1784                        .filter(|p| p.ends_with(DEFAULT_TAGS_QUERY_FILE_NAME))
1785                        .cloned()
1786                        .collect::<Vec<_>>(),
1787                ),
1788                Some(
1789                    paths
1790                        .iter()
1791                        .filter(|p| p.ends_with(DEFAULT_LOCALS_QUERY_FILE_NAME))
1792                        .cloned()
1793                        .collect::<Vec<_>>(),
1794                ),
1795            ),
1796            None => (None, None, None),
1797        };
1798        self.highlight_config
1799            .get_or_try_init(|| {
1800                let (highlights_query, highlight_ranges) = self.read_queries(
1801                    if highlights_filenames.is_some() {
1802                        highlights_filenames.as_deref()
1803                    } else {
1804                        self.highlights_filenames.as_deref()
1805                    },
1806                    DEFAULT_HIGHLIGHTS_QUERY_FILE_NAME,
1807                )?;
1808                let (injections_query, injection_ranges) = self.read_queries(
1809                    if injections_filenames.is_some() {
1810                        injections_filenames.as_deref()
1811                    } else {
1812                        self.injections_filenames.as_deref()
1813                    },
1814                    DEFAULT_INJECTIONS_QUERY_FILE_NAME,
1815                )?;
1816                let (locals_query, locals_ranges) = self.read_queries(
1817                    if locals_filenames.is_some() {
1818                        locals_filenames.as_deref()
1819                    } else {
1820                        self.locals_filenames.as_deref()
1821                    },
1822                    DEFAULT_LOCALS_QUERY_FILE_NAME,
1823                )?;
1824
1825                if highlights_query.is_empty() {
1826                    Ok(None)
1827                } else {
1828                    let mut result = HighlightConfiguration::new(
1829                        language,
1830                        &self.language_name,
1831                        &highlights_query,
1832                        &injections_query,
1833                        &locals_query,
1834                    )
1835                    .map_err(|error| match error.kind {
1836                        QueryErrorKind::Language => {
1837                            LoaderError::Query(LoaderQueryError { error, file: None })
1838                        }
1839                        _ => {
1840                            if error.offset < injections_query.len() {
1841                                Self::include_path_in_query_error(
1842                                    error,
1843                                    &injection_ranges,
1844                                    &injections_query,
1845                                    0,
1846                                )
1847                            } else if error.offset < injections_query.len() + locals_query.len() {
1848                                Self::include_path_in_query_error(
1849                                    error,
1850                                    &locals_ranges,
1851                                    &locals_query,
1852                                    injections_query.len(),
1853                                )
1854                            } else {
1855                                Self::include_path_in_query_error(
1856                                    error,
1857                                    &highlight_ranges,
1858                                    &highlights_query,
1859                                    injections_query.len() + locals_query.len(),
1860                                )
1861                            }
1862                        }
1863                    })?;
1864                    let mut all_highlight_names = self.highlight_names.lock().unwrap();
1865                    if self.use_all_highlight_names {
1866                        for capture_name in result.query.capture_names() {
1867                            if !all_highlight_names.iter().any(|x| x == capture_name) {
1868                                all_highlight_names.push((*capture_name).to_string());
1869                            }
1870                        }
1871                    }
1872                    result.configure(all_highlight_names.as_slice());
1873                    drop(all_highlight_names);
1874                    Ok(Some(result))
1875                }
1876            })
1877            .map(Option::as_ref)
1878    }
1879
1880    #[cfg(feature = "tree-sitter-tags")]
1881    pub fn tags_config(&self, language: Language) -> LoaderResult<Option<&TagsConfiguration>> {
1882        self.tags_config
1883            .get_or_try_init(|| {
1884                let (tags_query, tags_ranges) = self
1885                    .read_queries(self.tags_filenames.as_deref(), DEFAULT_TAGS_QUERY_FILE_NAME)?;
1886                let (locals_query, locals_ranges) = self.read_queries(
1887                    self.locals_filenames.as_deref(),
1888                    DEFAULT_LOCALS_QUERY_FILE_NAME,
1889                )?;
1890                if tags_query.is_empty() {
1891                    Ok(None)
1892                } else {
1893                    TagsConfiguration::new(language, &tags_query, &locals_query)
1894                        .map(Some)
1895                        .map_err(|error| {
1896                            if let TagsError::Query(error) = error {
1897                                if error.offset < locals_query.len() {
1898                                    Self::include_path_in_query_error(
1899                                        error,
1900                                        &locals_ranges,
1901                                        &locals_query,
1902                                        0,
1903                                    )
1904                                } else {
1905                                    Self::include_path_in_query_error(
1906                                        error,
1907                                        &tags_ranges,
1908                                        &tags_query,
1909                                        locals_query.len(),
1910                                    )
1911                                }
1912                            } else {
1913                                error.into()
1914                            }
1915                        })
1916                }
1917            })
1918            .map(Option::as_ref)
1919    }
1920
1921    #[cfg(any(feature = "tree-sitter-highlight", feature = "tree-sitter-tags"))]
1922    fn include_path_in_query_error(
1923        mut error: QueryError,
1924        ranges: &[(PathBuf, Range<usize>)],
1925        source: &str,
1926        start_offset: usize,
1927    ) -> LoaderError {
1928        let offset_within_section = error.offset - start_offset;
1929        let (path, range) = ranges
1930            .iter()
1931            .find(|(_, range)| range.contains(&offset_within_section))
1932            .unwrap_or_else(|| ranges.last().unwrap());
1933        error.offset = offset_within_section - range.start;
1934        error.row = source[range.start..offset_within_section]
1935            .matches('\n')
1936            .count();
1937        LoaderError::Query(LoaderQueryError {
1938            error,
1939            file: Some(path.to_string_lossy().to_string()),
1940        })
1941    }
1942
1943    #[allow(clippy::type_complexity)]
1944    #[cfg(any(feature = "tree-sitter-highlight", feature = "tree-sitter-tags"))]
1945    fn read_queries(
1946        &self,
1947        paths: Option<&[PathBuf]>,
1948        default_path: &str,
1949    ) -> LoaderResult<(String, Vec<(PathBuf, Range<usize>)>)> {
1950        let mut query = String::new();
1951        let mut path_ranges = Vec::new();
1952        if let Some(paths) = paths {
1953            for path in paths {
1954                let abs_path = self.root_path.join(path);
1955                let prev_query_len = query.len();
1956                query += &fs::read_to_string(&abs_path)
1957                    .map_err(|e| LoaderError::IO(IoError::new(e, Some(abs_path.as_path()))))?;
1958                path_ranges.push((path.clone(), prev_query_len..query.len()));
1959            }
1960        } else {
1961            // highlights.scm is needed to test highlights, and tags.scm to test tags
1962            if default_path == DEFAULT_HIGHLIGHTS_QUERY_FILE_NAME
1963                || default_path == DEFAULT_TAGS_QUERY_FILE_NAME
1964            {
1965                warn!(
1966                    concat!(
1967                        "You should add a `{}` entry pointing to the {} path in the `tree-sitter` ",
1968                        "object in the grammar's tree-sitter.json file. See more here: ",
1969                        "https://tree-sitter.github.io/tree-sitter/3-syntax-highlighting#query-paths"
1970                    ),
1971                    default_path.replace(".scm", ""),
1972                    default_path
1973                );
1974            }
1975            let queries_path = self.root_path.join("queries");
1976            let path = queries_path.join(default_path);
1977            if path.exists() {
1978                query = fs::read_to_string(&path)
1979                    .map_err(|e| LoaderError::IO(IoError::new(e, Some(path.as_path()))))?;
1980                path_ranges.push((PathBuf::from(default_path), 0..query.len()));
1981            }
1982        }
1983
1984        Ok((query, path_ranges))
1985    }
1986}
1987
1988fn needs_recompile(lib_path: &Path, paths_to_check: &[PathBuf]) -> LoaderResult<bool> {
1989    if !lib_path.exists() {
1990        return Ok(true);
1991    }
1992    let lib_mtime = mtime(lib_path).map_err(|e| LoaderError::ModifiedTime(Box::new(e)))?;
1993    for path in paths_to_check {
1994        if mtime(path).map_err(|e| LoaderError::ModifiedTime(Box::new(e)))? > lib_mtime {
1995            return Ok(true);
1996        }
1997    }
1998    Ok(false)
1999}
2000
2001fn mtime(path: &Path) -> LoaderResult<SystemTime> {
2002    fs::metadata(path)
2003        .map_err(|e| LoaderError::IO(IoError::new(e, Some(path))))?
2004        .modified()
2005        .map_err(|e| LoaderError::IO(IoError::new(e, Some(path))))
2006}