tree_sitter_loader/
lib.rs

1#![doc = include_str!("../README.md")]
2#![cfg_attr(docsrs, feature(doc_cfg))]
3
4#[cfg(any(feature = "tree-sitter-highlight", feature = "tree-sitter-tags"))]
5use std::ops::Range;
6#[cfg(feature = "tree-sitter-highlight")]
7use std::sync::Mutex;
8use std::{
9    collections::HashMap,
10    env,
11    ffi::{OsStr, OsString},
12    fs,
13    io::{BufRead, BufReader},
14    marker::PhantomData,
15    mem,
16    path::{Path, PathBuf},
17    process::Command,
18    sync::LazyLock,
19    time::SystemTime,
20};
21
22use anyhow::Error;
23use anyhow::{anyhow, Context, Result};
24use etcetera::BaseStrategy as _;
25use fs4::fs_std::FileExt;
26use indoc::indoc;
27use libloading::{Library, Symbol};
28use once_cell::unsync::OnceCell;
29use path_slash::PathBufExt as _;
30use regex::{Regex, RegexBuilder};
31use semver::Version;
32use serde::{Deserialize, Deserializer, Serialize};
33use tree_sitter::Language;
34#[cfg(any(feature = "tree-sitter-highlight", feature = "tree-sitter-tags"))]
35use tree_sitter::QueryError;
36#[cfg(feature = "tree-sitter-highlight")]
37use tree_sitter::QueryErrorKind;
38#[cfg(feature = "tree-sitter-highlight")]
39use tree_sitter_highlight::HighlightConfiguration;
40#[cfg(feature = "tree-sitter-tags")]
41use tree_sitter_tags::{Error as TagsError, TagsConfiguration};
42use url::Url;
43
44static GRAMMAR_NAME_REGEX: LazyLock<Regex> =
45    LazyLock::new(|| Regex::new(r#""name":\s*"(.*?)""#).unwrap());
46
47pub const EMSCRIPTEN_TAG: &str = concat!("docker.io/emscripten/emsdk:", env!("EMSCRIPTEN_VERSION"));
48
49#[derive(Default, Deserialize, Serialize)]
50pub struct Config {
51    #[serde(default)]
52    #[serde(
53        rename = "parser-directories",
54        deserialize_with = "deserialize_parser_directories"
55    )]
56    pub parser_directories: Vec<PathBuf>,
57}
58
59#[derive(Serialize, Deserialize, Clone, Default)]
60#[serde(untagged)]
61pub enum PathsJSON {
62    #[default]
63    Empty,
64    Single(PathBuf),
65    Multiple(Vec<PathBuf>),
66}
67
68impl PathsJSON {
69    fn into_vec(self) -> Option<Vec<PathBuf>> {
70        match self {
71            Self::Empty => None,
72            Self::Single(s) => Some(vec![s]),
73            Self::Multiple(s) => Some(s),
74        }
75    }
76
77    const fn is_empty(&self) -> bool {
78        matches!(self, Self::Empty)
79    }
80}
81
82#[derive(Serialize, Deserialize, Clone)]
83#[serde(untagged)]
84pub enum PackageJSONAuthor {
85    String(String),
86    Object {
87        name: String,
88        email: Option<String>,
89        url: Option<String>,
90    },
91}
92
93#[derive(Serialize, Deserialize, Clone)]
94#[serde(untagged)]
95pub enum PackageJSONRepository {
96    String(String),
97    Object { url: String },
98}
99
100#[derive(Serialize, Deserialize)]
101pub struct PackageJSON {
102    pub name: String,
103    pub version: Version,
104    pub description: Option<String>,
105    pub author: Option<PackageJSONAuthor>,
106    pub maintainers: Option<Vec<PackageJSONAuthor>>,
107    pub license: Option<String>,
108    pub repository: Option<PackageJSONRepository>,
109    #[serde(default)]
110    #[serde(rename = "tree-sitter", skip_serializing_if = "Option::is_none")]
111    pub tree_sitter: Option<Vec<LanguageConfigurationJSON>>,
112}
113
114fn default_path() -> PathBuf {
115    PathBuf::from(".")
116}
117
118#[derive(Serialize, Deserialize, Clone)]
119#[serde(rename_all = "kebab-case")]
120pub struct LanguageConfigurationJSON {
121    #[serde(default = "default_path")]
122    pub path: PathBuf,
123    pub scope: Option<String>,
124    pub file_types: Option<Vec<String>>,
125    pub content_regex: Option<String>,
126    pub first_line_regex: Option<String>,
127    pub injection_regex: Option<String>,
128    #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
129    pub highlights: PathsJSON,
130    #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
131    pub injections: PathsJSON,
132    #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
133    pub locals: PathsJSON,
134    #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
135    pub tags: PathsJSON,
136    #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
137    pub external_files: PathsJSON,
138}
139
140#[derive(Serialize, Deserialize)]
141#[serde(rename_all = "kebab-case")]
142pub struct TreeSitterJSON {
143    #[serde(rename = "$schema")]
144    pub schema: Option<String>,
145    pub grammars: Vec<Grammar>,
146    pub metadata: Metadata,
147    #[serde(default)]
148    pub bindings: Bindings,
149}
150
151impl TreeSitterJSON {
152    pub fn from_file(path: &Path) -> Result<Self> {
153        Ok(serde_json::from_str(&fs::read_to_string(
154            path.join("tree-sitter.json"),
155        )?)?)
156    }
157
158    #[must_use]
159    pub fn has_multiple_language_configs(&self) -> bool {
160        self.grammars.len() > 1
161    }
162}
163
164#[derive(Serialize, Deserialize)]
165#[serde(rename_all = "kebab-case")]
166pub struct Grammar {
167    pub name: String,
168    #[serde(skip_serializing_if = "Option::is_none")]
169    pub camelcase: Option<String>,
170    #[serde(skip_serializing_if = "Option::is_none")]
171    pub title: Option<String>,
172    pub scope: String,
173    #[serde(skip_serializing_if = "Option::is_none")]
174    pub path: Option<PathBuf>,
175    #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
176    pub external_files: PathsJSON,
177    pub file_types: Option<Vec<String>>,
178    #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
179    pub highlights: PathsJSON,
180    #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
181    pub injections: PathsJSON,
182    #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
183    pub locals: PathsJSON,
184    #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
185    pub tags: PathsJSON,
186    #[serde(skip_serializing_if = "Option::is_none")]
187    pub injection_regex: Option<String>,
188    #[serde(skip_serializing_if = "Option::is_none")]
189    pub first_line_regex: Option<String>,
190    #[serde(skip_serializing_if = "Option::is_none")]
191    pub content_regex: Option<String>,
192    #[serde(skip_serializing_if = "Option::is_none")]
193    pub class_name: Option<String>,
194}
195
196#[derive(Serialize, Deserialize)]
197pub struct Metadata {
198    pub version: Version,
199    #[serde(skip_serializing_if = "Option::is_none")]
200    pub license: Option<String>,
201    #[serde(skip_serializing_if = "Option::is_none")]
202    pub description: Option<String>,
203    #[serde(skip_serializing_if = "Option::is_none")]
204    pub authors: Option<Vec<Author>>,
205    #[serde(skip_serializing_if = "Option::is_none")]
206    pub links: Option<Links>,
207    #[serde(skip)]
208    pub namespace: Option<String>,
209}
210
211#[derive(Serialize, Deserialize)]
212pub struct Author {
213    pub name: String,
214    #[serde(skip_serializing_if = "Option::is_none")]
215    pub email: Option<String>,
216    #[serde(skip_serializing_if = "Option::is_none")]
217    pub url: Option<String>,
218}
219
220#[derive(Serialize, Deserialize)]
221pub struct Links {
222    pub repository: Url,
223    #[serde(skip_serializing_if = "Option::is_none")]
224    pub funding: Option<Url>,
225    #[serde(skip_serializing_if = "Option::is_none")]
226    pub homepage: Option<String>,
227}
228
229#[derive(Serialize, Deserialize)]
230#[serde(default)]
231pub struct Bindings {
232    pub c: bool,
233    pub go: bool,
234    #[serde(skip)]
235    pub java: bool,
236    #[serde(skip)]
237    pub kotlin: bool,
238    pub node: bool,
239    pub python: bool,
240    pub rust: bool,
241    pub swift: bool,
242    pub zig: bool,
243}
244
245impl Default for Bindings {
246    fn default() -> Self {
247        Self {
248            c: true,
249            go: true,
250            java: false,
251            kotlin: false,
252            node: true,
253            python: true,
254            rust: true,
255            swift: true,
256            zig: false,
257        }
258    }
259}
260
261// Replace `~` or `$HOME` with home path string.
262// (While paths like "~/.tree-sitter/config.json" can be deserialized,
263// they're not valid path for I/O modules.)
264fn deserialize_parser_directories<'de, D>(deserializer: D) -> Result<Vec<PathBuf>, D::Error>
265where
266    D: Deserializer<'de>,
267{
268    let paths = Vec::<PathBuf>::deserialize(deserializer)?;
269    let Ok(home) = etcetera::home_dir() else {
270        return Ok(paths);
271    };
272    let standardized = paths
273        .into_iter()
274        .map(|path| standardize_path(path, &home))
275        .collect();
276    Ok(standardized)
277}
278
279fn standardize_path(path: PathBuf, home: &Path) -> PathBuf {
280    if let Ok(p) = path.strip_prefix("~") {
281        return home.join(p);
282    }
283    if let Ok(p) = path.strip_prefix("$HOME") {
284        return home.join(p);
285    }
286    path
287}
288
289impl Config {
290    #[must_use]
291    pub fn initial() -> Self {
292        let home_dir = etcetera::home_dir().expect("Cannot determine home directory");
293        Self {
294            parser_directories: vec![
295                home_dir.join("github"),
296                home_dir.join("src"),
297                home_dir.join("source"),
298                home_dir.join("projects"),
299                home_dir.join("dev"),
300                home_dir.join("git"),
301            ],
302        }
303    }
304}
305
306const BUILD_TARGET: &str = env!("BUILD_TARGET");
307const BUILD_HOST: &str = env!("BUILD_HOST");
308
309pub struct LanguageConfiguration<'a> {
310    pub scope: Option<String>,
311    pub content_regex: Option<Regex>,
312    pub first_line_regex: Option<Regex>,
313    pub injection_regex: Option<Regex>,
314    pub file_types: Vec<String>,
315    pub root_path: PathBuf,
316    pub highlights_filenames: Option<Vec<PathBuf>>,
317    pub injections_filenames: Option<Vec<PathBuf>>,
318    pub locals_filenames: Option<Vec<PathBuf>>,
319    pub tags_filenames: Option<Vec<PathBuf>>,
320    pub language_name: String,
321    language_id: usize,
322    #[cfg(feature = "tree-sitter-highlight")]
323    highlight_config: OnceCell<Option<HighlightConfiguration>>,
324    #[cfg(feature = "tree-sitter-tags")]
325    tags_config: OnceCell<Option<TagsConfiguration>>,
326    #[cfg(feature = "tree-sitter-highlight")]
327    highlight_names: &'a Mutex<Vec<String>>,
328    #[cfg(feature = "tree-sitter-highlight")]
329    use_all_highlight_names: bool,
330    _phantom: PhantomData<&'a ()>,
331}
332
333pub struct Loader {
334    pub parser_lib_path: PathBuf,
335    languages_by_id: Vec<(PathBuf, OnceCell<Language>, Option<Vec<PathBuf>>)>,
336    language_configurations: Vec<LanguageConfiguration<'static>>,
337    language_configuration_ids_by_file_type: HashMap<String, Vec<usize>>,
338    language_configuration_in_current_path: Option<usize>,
339    language_configuration_ids_by_first_line_regex: HashMap<String, Vec<usize>>,
340    #[cfg(feature = "tree-sitter-highlight")]
341    highlight_names: Box<Mutex<Vec<String>>>,
342    #[cfg(feature = "tree-sitter-highlight")]
343    use_all_highlight_names: bool,
344    debug_build: bool,
345    sanitize_build: bool,
346    force_rebuild: bool,
347
348    #[cfg(feature = "wasm")]
349    wasm_store: Mutex<Option<tree_sitter::WasmStore>>,
350}
351
352pub struct CompileConfig<'a> {
353    pub src_path: &'a Path,
354    pub header_paths: Vec<&'a Path>,
355    pub parser_path: PathBuf,
356    pub scanner_path: Option<PathBuf>,
357    pub external_files: Option<&'a [PathBuf]>,
358    pub output_path: Option<PathBuf>,
359    pub flags: &'a [&'a str],
360    pub sanitize: bool,
361    pub name: String,
362}
363
364impl<'a> CompileConfig<'a> {
365    #[must_use]
366    pub fn new(
367        src_path: &'a Path,
368        externals: Option<&'a [PathBuf]>,
369        output_path: Option<PathBuf>,
370    ) -> Self {
371        Self {
372            src_path,
373            header_paths: vec![src_path],
374            parser_path: src_path.join("parser.c"),
375            scanner_path: None,
376            external_files: externals,
377            output_path,
378            flags: &[],
379            sanitize: false,
380            name: String::new(),
381        }
382    }
383}
384
385unsafe impl Sync for Loader {}
386
387impl Loader {
388    pub fn new() -> Result<Self> {
389        let parser_lib_path = if let Ok(path) = env::var("TREE_SITTER_LIBDIR") {
390            PathBuf::from(path)
391        } else {
392            if cfg!(target_os = "macos") {
393                let legacy_apple_path = etcetera::base_strategy::Apple::new()?
394                    .cache_dir() // `$HOME/Library/Caches/`
395                    .join("tree-sitter");
396                if legacy_apple_path.exists() && legacy_apple_path.is_dir() {
397                    std::fs::remove_dir_all(legacy_apple_path)?;
398                }
399            }
400
401            etcetera::choose_base_strategy()?
402                .cache_dir()
403                .join("tree-sitter")
404                .join("lib")
405        };
406        Ok(Self::with_parser_lib_path(parser_lib_path))
407    }
408
409    #[must_use]
410    pub fn with_parser_lib_path(parser_lib_path: PathBuf) -> Self {
411        Self {
412            parser_lib_path,
413            languages_by_id: Vec::new(),
414            language_configurations: Vec::new(),
415            language_configuration_ids_by_file_type: HashMap::new(),
416            language_configuration_in_current_path: None,
417            language_configuration_ids_by_first_line_regex: HashMap::new(),
418            #[cfg(feature = "tree-sitter-highlight")]
419            highlight_names: Box::new(Mutex::new(Vec::new())),
420            #[cfg(feature = "tree-sitter-highlight")]
421            use_all_highlight_names: true,
422            debug_build: false,
423            sanitize_build: false,
424            force_rebuild: false,
425
426            #[cfg(feature = "wasm")]
427            wasm_store: Mutex::default(),
428        }
429    }
430
431    #[cfg(feature = "tree-sitter-highlight")]
432    #[cfg_attr(docsrs, doc(cfg(feature = "tree-sitter-highlight")))]
433    pub fn configure_highlights(&mut self, names: &[String]) {
434        self.use_all_highlight_names = false;
435        let mut highlights = self.highlight_names.lock().unwrap();
436        highlights.clear();
437        highlights.extend(names.iter().cloned());
438    }
439
440    #[must_use]
441    #[cfg(feature = "tree-sitter-highlight")]
442    #[cfg_attr(docsrs, doc(cfg(feature = "tree-sitter-highlight")))]
443    pub fn highlight_names(&self) -> Vec<String> {
444        self.highlight_names.lock().unwrap().clone()
445    }
446
447    pub fn find_all_languages(&mut self, config: &Config) -> Result<()> {
448        if config.parser_directories.is_empty() {
449            eprintln!("Warning: You have not configured any parser directories!");
450            eprintln!("Please run `tree-sitter init-config` and edit the resulting");
451            eprintln!("configuration file to indicate where we should look for");
452            eprintln!("language grammars.\n");
453        }
454        for parser_container_dir in &config.parser_directories {
455            if let Ok(entries) = fs::read_dir(parser_container_dir) {
456                for entry in entries {
457                    let entry = entry?;
458                    if let Some(parser_dir_name) = entry.file_name().to_str() {
459                        if parser_dir_name.starts_with("tree-sitter-") {
460                            self.find_language_configurations_at_path(
461                                &parser_container_dir.join(parser_dir_name),
462                                false,
463                            )
464                            .ok();
465                        }
466                    }
467                }
468            }
469        }
470        Ok(())
471    }
472
473    pub fn languages_at_path(&mut self, path: &Path) -> Result<Vec<(Language, String)>> {
474        if let Ok(configurations) = self.find_language_configurations_at_path(path, true) {
475            let mut language_ids = configurations
476                .iter()
477                .map(|c| (c.language_id, c.language_name.clone()))
478                .collect::<Vec<_>>();
479            language_ids.sort_unstable();
480            language_ids.dedup();
481            language_ids
482                .into_iter()
483                .map(|(id, name)| Ok((self.language_for_id(id)?, name)))
484                .collect::<Result<Vec<_>>>()
485        } else {
486            Ok(Vec::new())
487        }
488    }
489
490    #[must_use]
491    pub fn get_all_language_configurations(&self) -> Vec<(&LanguageConfiguration, &Path)> {
492        self.language_configurations
493            .iter()
494            .map(|c| (c, self.languages_by_id[c.language_id].0.as_ref()))
495            .collect()
496    }
497
498    pub fn language_configuration_for_scope(
499        &self,
500        scope: &str,
501    ) -> Result<Option<(Language, &LanguageConfiguration)>> {
502        for configuration in &self.language_configurations {
503            if configuration.scope.as_ref().is_some_and(|s| s == scope) {
504                let language = self.language_for_id(configuration.language_id)?;
505                return Ok(Some((language, configuration)));
506            }
507        }
508        Ok(None)
509    }
510
511    pub fn language_configuration_for_first_line_regex(
512        &self,
513        path: &Path,
514    ) -> Result<Option<(Language, &LanguageConfiguration)>> {
515        self.language_configuration_ids_by_first_line_regex
516            .iter()
517            .try_fold(None, |_, (regex, ids)| {
518                if let Some(regex) = Self::regex(Some(regex)) {
519                    let file = fs::File::open(path)?;
520                    let reader = BufReader::new(file);
521                    let first_line = reader.lines().next().transpose()?;
522                    if let Some(first_line) = first_line {
523                        if regex.is_match(&first_line) && !ids.is_empty() {
524                            let configuration = &self.language_configurations[ids[0]];
525                            let language = self.language_for_id(configuration.language_id)?;
526                            return Ok(Some((language, configuration)));
527                        }
528                    }
529                }
530
531                Ok(None)
532            })
533    }
534
535    pub fn language_configuration_for_file_name(
536        &self,
537        path: &Path,
538    ) -> Result<Option<(Language, &LanguageConfiguration)>> {
539        // Find all the language configurations that match this file name
540        // or a suffix of the file name.
541        let configuration_ids = path
542            .file_name()
543            .and_then(|n| n.to_str())
544            .and_then(|file_name| self.language_configuration_ids_by_file_type.get(file_name))
545            .or_else(|| {
546                let mut path = path.to_owned();
547                let mut extensions = Vec::with_capacity(2);
548                while let Some(extension) = path.extension() {
549                    extensions.push(extension.to_str()?.to_string());
550                    path = PathBuf::from(path.file_stem()?.to_os_string());
551                }
552                extensions.reverse();
553                self.language_configuration_ids_by_file_type
554                    .get(&extensions.join("."))
555            });
556
557        if let Some(configuration_ids) = configuration_ids {
558            if !configuration_ids.is_empty() {
559                let configuration = if configuration_ids.len() == 1 {
560                    &self.language_configurations[configuration_ids[0]]
561                }
562                // If multiple language configurations match, then determine which
563                // one to use by applying the configurations' content regexes.
564                else {
565                    let file_contents = fs::read(path)
566                        .with_context(|| format!("Failed to read path {}", path.display()))?;
567                    let file_contents = String::from_utf8_lossy(&file_contents);
568                    let mut best_score = -2isize;
569                    let mut best_configuration_id = None;
570                    for configuration_id in configuration_ids {
571                        let config = &self.language_configurations[*configuration_id];
572
573                        // If the language configuration has a content regex, assign
574                        // a score based on the length of the first match.
575                        let score;
576                        if let Some(content_regex) = &config.content_regex {
577                            if let Some(mat) = content_regex.find(&file_contents) {
578                                score = (mat.end() - mat.start()) as isize;
579                            }
580                            // If the content regex does not match, then *penalize* this
581                            // language configuration, so that language configurations
582                            // without content regexes are preferred over those with
583                            // non-matching content regexes.
584                            else {
585                                score = -1;
586                            }
587                        } else {
588                            score = 0;
589                        }
590                        if score > best_score {
591                            best_configuration_id = Some(*configuration_id);
592                            best_score = score;
593                        }
594                    }
595
596                    &self.language_configurations[best_configuration_id.unwrap()]
597                };
598
599                let language = self.language_for_id(configuration.language_id)?;
600                return Ok(Some((language, configuration)));
601            }
602        }
603
604        Ok(None)
605    }
606
607    pub fn language_configuration_for_injection_string(
608        &self,
609        string: &str,
610    ) -> Result<Option<(Language, &LanguageConfiguration)>> {
611        let mut best_match_length = 0;
612        let mut best_match_position = None;
613        for (i, configuration) in self.language_configurations.iter().enumerate() {
614            if let Some(injection_regex) = &configuration.injection_regex {
615                if let Some(mat) = injection_regex.find(string) {
616                    let length = mat.end() - mat.start();
617                    if length > best_match_length {
618                        best_match_position = Some(i);
619                        best_match_length = length;
620                    }
621                }
622            }
623        }
624
625        if let Some(i) = best_match_position {
626            let configuration = &self.language_configurations[i];
627            let language = self.language_for_id(configuration.language_id)?;
628            Ok(Some((language, configuration)))
629        } else {
630            Ok(None)
631        }
632    }
633
634    pub fn language_for_configuration(
635        &self,
636        configuration: &LanguageConfiguration,
637    ) -> Result<Language> {
638        self.language_for_id(configuration.language_id)
639    }
640
641    fn language_for_id(&self, id: usize) -> Result<Language> {
642        let (path, language, externals) = &self.languages_by_id[id];
643        language
644            .get_or_try_init(|| {
645                let src_path = path.join("src");
646                self.load_language_at_path(CompileConfig::new(
647                    &src_path,
648                    externals.as_deref(),
649                    None,
650                ))
651            })
652            .cloned()
653    }
654
655    pub fn compile_parser_at_path(
656        &self,
657        grammar_path: &Path,
658        output_path: PathBuf,
659        flags: &[&str],
660    ) -> Result<()> {
661        let src_path = grammar_path.join("src");
662        let mut config = CompileConfig::new(&src_path, None, Some(output_path));
663        config.flags = flags;
664        self.load_language_at_path(config).map(|_| ())
665    }
666
667    pub fn load_language_at_path(&self, mut config: CompileConfig) -> Result<Language> {
668        let grammar_path = config.src_path.join("grammar.json");
669        config.name = Self::grammar_json_name(&grammar_path)?;
670        self.load_language_at_path_with_name(config)
671    }
672
673    pub fn load_language_at_path_with_name(&self, mut config: CompileConfig) -> Result<Language> {
674        let mut lib_name = config.name.to_string();
675        let language_fn_name = format!(
676            "tree_sitter_{}",
677            replace_dashes_with_underscores(&config.name)
678        );
679        if self.debug_build {
680            lib_name.push_str(".debug._");
681        }
682
683        if self.sanitize_build {
684            lib_name.push_str(".sanitize._");
685            config.sanitize = true;
686        }
687
688        if config.output_path.is_none() {
689            fs::create_dir_all(&self.parser_lib_path)?;
690        }
691
692        let mut recompile = self.force_rebuild || config.output_path.is_some(); // if specified, always recompile
693
694        let output_path = config.output_path.unwrap_or_else(|| {
695            let mut path = self.parser_lib_path.join(lib_name);
696            path.set_extension(env::consts::DLL_EXTENSION);
697            #[cfg(feature = "wasm")]
698            if self.wasm_store.lock().unwrap().is_some() {
699                path.set_extension("wasm");
700            }
701            path
702        });
703        config.output_path = Some(output_path.clone());
704
705        let parser_path = config.src_path.join("parser.c");
706        config.scanner_path = self.get_scanner_path(config.src_path);
707
708        let mut paths_to_check = vec![parser_path];
709
710        if let Some(scanner_path) = config.scanner_path.as_ref() {
711            paths_to_check.push(scanner_path.clone());
712        }
713
714        paths_to_check.extend(
715            config
716                .external_files
717                .unwrap_or_default()
718                .iter()
719                .map(|p| config.src_path.join(p)),
720        );
721
722        if !recompile {
723            recompile = needs_recompile(&output_path, &paths_to_check)
724                .with_context(|| "Failed to compare source and binary timestamps")?;
725        }
726
727        #[cfg(feature = "wasm")]
728        if let Some(wasm_store) = self.wasm_store.lock().unwrap().as_mut() {
729            if recompile {
730                self.compile_parser_to_wasm(
731                    &config.name,
732                    None,
733                    config.src_path,
734                    config
735                        .scanner_path
736                        .as_ref()
737                        .and_then(|p| p.strip_prefix(config.src_path).ok()),
738                    &output_path,
739                    false,
740                )?;
741            }
742
743            let wasm_bytes = fs::read(&output_path)?;
744            return Ok(wasm_store.load_language(&config.name, &wasm_bytes)?);
745        }
746
747        let lock_path = if env::var("CROSS_RUNNER").is_ok() {
748            tempfile::tempdir()
749                .unwrap()
750                .path()
751                .join("tree-sitter")
752                .join("lock")
753                .join(format!("{}.lock", config.name))
754        } else {
755            etcetera::choose_base_strategy()?
756                .cache_dir()
757                .join("tree-sitter")
758                .join("lock")
759                .join(format!("{}.lock", config.name))
760        };
761
762        if let Ok(lock_file) = fs::OpenOptions::new().write(true).open(&lock_path) {
763            recompile = false;
764            if lock_file.try_lock_exclusive().is_err() {
765                // if we can't acquire the lock, another process is compiling the parser, wait for
766                // it and don't recompile
767                lock_file.lock_exclusive()?;
768                recompile = false;
769            } else {
770                // if we can acquire the lock, check if the lock file is older than 30 seconds, a
771                // run that was interrupted and left the lock file behind should not block
772                // subsequent runs
773                let time = lock_file.metadata()?.modified()?.elapsed()?.as_secs();
774                if time > 30 {
775                    fs::remove_file(&lock_path)?;
776                    recompile = true;
777                }
778            }
779        }
780
781        if recompile {
782            fs::create_dir_all(lock_path.parent().unwrap()).with_context(|| {
783                format!(
784                    "Failed to create directory {}",
785                    lock_path.parent().unwrap().display()
786                )
787            })?;
788            let lock_file = fs::OpenOptions::new()
789                .create(true)
790                .truncate(true)
791                .write(true)
792                .open(&lock_path)?;
793            lock_file.lock_exclusive()?;
794
795            self.compile_parser_to_dylib(&config, &lock_file, &lock_path)?;
796
797            if config.scanner_path.is_some() {
798                self.check_external_scanner(&config.name, &output_path)?;
799            }
800        }
801
802        let library = unsafe { Library::new(&output_path) }
803            .with_context(|| format!("Error opening dynamic library {}", output_path.display()))?;
804        let language = unsafe {
805            let language_fn = library
806                .get::<Symbol<unsafe extern "C" fn() -> Language>>(language_fn_name.as_bytes())
807                .with_context(|| format!("Failed to load symbol {language_fn_name}"))?;
808            language_fn()
809        };
810        mem::forget(library);
811        Ok(language)
812    }
813
814    fn compile_parser_to_dylib(
815        &self,
816        config: &CompileConfig,
817        lock_file: &fs::File,
818        lock_path: &Path,
819    ) -> Result<(), Error> {
820        let mut cc_config = cc::Build::new();
821        cc_config
822            .cargo_metadata(false)
823            .cargo_warnings(false)
824            .target(BUILD_TARGET)
825            .host(BUILD_HOST)
826            .debug(self.debug_build)
827            .file(&config.parser_path)
828            .includes(&config.header_paths)
829            .std("c11");
830
831        if let Some(scanner_path) = config.scanner_path.as_ref() {
832            cc_config.file(scanner_path);
833        }
834
835        if self.debug_build {
836            cc_config.opt_level(0).extra_warnings(true);
837        } else {
838            cc_config.opt_level(2).extra_warnings(false);
839        }
840
841        for flag in config.flags {
842            cc_config.define(flag, None);
843        }
844
845        let compiler = cc_config.get_compiler();
846        let mut command = Command::new(compiler.path());
847        command.args(compiler.args());
848        for (key, value) in compiler.env() {
849            command.env(key, value);
850        }
851
852        let output_path = config.output_path.as_ref().unwrap();
853
854        if compiler.is_like_msvc() {
855            let out = format!("-out:{}", output_path.to_str().unwrap());
856            command.arg(if self.debug_build { "-LDd" } else { "-LD" });
857            command.arg("-utf-8");
858            command.args(cc_config.get_files());
859            command.arg("-link").arg(out);
860        } else {
861            command.arg("-Werror=implicit-function-declaration");
862            if cfg!(any(target_os = "macos", target_os = "ios")) {
863                command.arg("-dynamiclib");
864                // TODO: remove when supported
865                command.arg("-UTREE_SITTER_REUSE_ALLOCATOR");
866            } else {
867                command.arg("-shared");
868            }
869            command.args(cc_config.get_files());
870            command.arg("-o").arg(output_path);
871        }
872
873        let output = command.output().with_context(|| {
874            format!("Failed to execute the C compiler with the following command:\n{command:?}")
875        })?;
876
877        FileExt::unlock(lock_file)?;
878        fs::remove_file(lock_path)?;
879
880        if output.status.success() {
881            Ok(())
882        } else {
883            Err(anyhow!(
884                "Parser compilation failed.\nStdout: {}\nStderr: {}",
885                String::from_utf8_lossy(&output.stdout),
886                String::from_utf8_lossy(&output.stderr)
887            ))
888        }
889    }
890
891    #[cfg(unix)]
892    fn check_external_scanner(&self, name: &str, library_path: &Path) -> Result<()> {
893        let prefix = if cfg!(any(target_os = "macos", target_os = "ios")) {
894            "_"
895        } else {
896            ""
897        };
898        let mut must_have = vec![
899            format!("{prefix}tree_sitter_{name}_external_scanner_create"),
900            format!("{prefix}tree_sitter_{name}_external_scanner_destroy"),
901            format!("{prefix}tree_sitter_{name}_external_scanner_serialize"),
902            format!("{prefix}tree_sitter_{name}_external_scanner_deserialize"),
903            format!("{prefix}tree_sitter_{name}_external_scanner_scan"),
904        ];
905
906        let command = Command::new("nm")
907            .arg("-W")
908            .arg("-U")
909            .arg(library_path)
910            .output();
911        if let Ok(output) = command {
912            if output.status.success() {
913                let mut found_non_static = false;
914                for line in String::from_utf8_lossy(&output.stdout).lines() {
915                    if line.contains(" T ") {
916                        if let Some(function_name) =
917                            line.split_whitespace().collect::<Vec<_>>().get(2)
918                        {
919                            if !line.contains("tree_sitter_") {
920                                if !found_non_static {
921                                    found_non_static = true;
922                                    eprintln!("Warning: Found non-static non-tree-sitter functions in the external scannner");
923                                }
924                                eprintln!("  `{function_name}`");
925                            } else {
926                                must_have.retain(|f| f != function_name);
927                            }
928                        }
929                    }
930                }
931                if found_non_static {
932                    eprintln!("Consider making these functions static, they can cause conflicts when another tree-sitter project uses the same function name");
933                }
934
935                if !must_have.is_empty() {
936                    let missing = must_have
937                        .iter()
938                        .map(|f| format!("  `{f}`"))
939                        .collect::<Vec<_>>()
940                        .join("\n");
941
942                    return Err(anyhow!(format!(
943                        indoc! {"
944                            Missing required functions in the external scanner, parsing won't work without these!
945
946                            {}
947
948                            You can read more about this at https://tree-sitter.github.io/tree-sitter/creating-parsers/4-external-scanners
949                        "},
950                        missing,
951                    )));
952                }
953            }
954        }
955
956        Ok(())
957    }
958
959    #[cfg(windows)]
960    fn check_external_scanner(&self, _name: &str, _library_path: &Path) -> Result<()> {
961        // TODO: there's no nm command on windows, whoever wants to implement this can and should :)
962
963        // let mut must_have = vec![
964        //     format!("tree_sitter_{name}_external_scanner_create"),
965        //     format!("tree_sitter_{name}_external_scanner_destroy"),
966        //     format!("tree_sitter_{name}_external_scanner_serialize"),
967        //     format!("tree_sitter_{name}_external_scanner_deserialize"),
968        //     format!("tree_sitter_{name}_external_scanner_scan"),
969        // ];
970
971        Ok(())
972    }
973
974    pub fn compile_parser_to_wasm(
975        &self,
976        language_name: &str,
977        root_path: Option<&Path>,
978        src_path: &Path,
979        scanner_filename: Option<&Path>,
980        output_path: &Path,
981        force_docker: bool,
982    ) -> Result<(), Error> {
983        #[derive(PartialEq, Eq)]
984        enum EmccSource {
985            Native,
986            Docker,
987            Podman,
988        }
989
990        let root_path = root_path.unwrap_or(src_path);
991        let emcc_name = if cfg!(windows) { "emcc.bat" } else { "emcc" };
992
993        // Order of preference: emscripten > docker > podman > error
994        let source = if !force_docker && Command::new(emcc_name).output().is_ok() {
995            EmccSource::Native
996        } else if Command::new("docker")
997            .output()
998            .is_ok_and(|out| out.status.success())
999        {
1000            EmccSource::Docker
1001        } else if Command::new("podman")
1002            .arg("--version")
1003            .output()
1004            .is_ok_and(|out| out.status.success())
1005        {
1006            EmccSource::Podman
1007        } else {
1008            return Err(anyhow!(
1009                "You must have either emcc, docker, or podman on your PATH to run this command"
1010            ));
1011        };
1012
1013        let mut command = match source {
1014            EmccSource::Native => {
1015                let mut command = Command::new(emcc_name);
1016                command.current_dir(src_path);
1017                command
1018            }
1019
1020            EmccSource::Docker | EmccSource::Podman => {
1021                let mut command = match source {
1022                    EmccSource::Docker => Command::new("docker"),
1023                    EmccSource::Podman => Command::new("podman"),
1024                    EmccSource::Native => unreachable!(),
1025                };
1026                command.args(["run", "--rm"]);
1027
1028                // The working directory is the directory containing the parser itself
1029                let workdir = if root_path == src_path {
1030                    PathBuf::from("/src")
1031                } else {
1032                    let mut path = PathBuf::from("/src");
1033                    path.push(src_path.strip_prefix(root_path).unwrap());
1034                    path
1035                };
1036                command.args(["--workdir", &workdir.to_slash_lossy()]);
1037
1038                // Mount the root directory as a volume, which is the repo root
1039                let mut volume_string = OsString::from(&root_path);
1040                volume_string.push(":/src:Z");
1041                command.args([OsStr::new("--volume"), &volume_string]);
1042
1043                // In case `docker` is an alias to `podman`, ensure that podman
1044                // mounts the current directory as writable by the container
1045                // user which has the same uid as the host user. Setting the
1046                // podman-specific variable is more reliable than attempting to
1047                // detect whether `docker` is an alias for `podman`.
1048                // see https://docs.podman.io/en/latest/markdown/podman-run.1.html#userns-mode
1049                command.env("PODMAN_USERNS", "keep-id");
1050
1051                // Get the current user id so that files created in the docker container will have
1052                // the same owner.
1053                #[cfg(unix)]
1054                {
1055                    #[link(name = "c")]
1056                    extern "C" {
1057                        fn getuid() -> u32;
1058                    }
1059                    // don't need to set user for podman since PODMAN_USERNS=keep-id is already set
1060                    if source == EmccSource::Docker {
1061                        let user_id = unsafe { getuid() };
1062                        command.args(["--user", &user_id.to_string()]);
1063                    }
1064                };
1065
1066                // Run `emcc` in a container using the `emscripten-slim` image
1067                command.args([EMSCRIPTEN_TAG, "emcc"]);
1068                command
1069            }
1070        };
1071
1072        let output_name = "output.wasm";
1073
1074        command.args([
1075            "-o",
1076            output_name,
1077            "-Os",
1078            "-s",
1079            "WASM=1",
1080            "-s",
1081            "SIDE_MODULE=2",
1082            "-s",
1083            "TOTAL_MEMORY=33554432",
1084            "-s",
1085            "NODEJS_CATCH_EXIT=0",
1086            "-s",
1087            &format!("EXPORTED_FUNCTIONS=[\"_tree_sitter_{language_name}\"]"),
1088            "-fno-exceptions",
1089            "-fvisibility=hidden",
1090            "-I",
1091            ".",
1092        ]);
1093
1094        if let Some(scanner_filename) = scanner_filename {
1095            command.arg(scanner_filename);
1096        }
1097
1098        command.arg("parser.c");
1099        let status = command
1100            .spawn()
1101            .with_context(|| "Failed to run emcc command")?
1102            .wait()?;
1103        if !status.success() {
1104            return Err(anyhow!("emcc command failed"));
1105        }
1106
1107        fs::rename(src_path.join(output_name), output_path)
1108            .context("failed to rename wasm output file")?;
1109
1110        Ok(())
1111    }
1112
1113    #[must_use]
1114    #[cfg(feature = "tree-sitter-highlight")]
1115    pub fn highlight_config_for_injection_string<'a>(
1116        &'a self,
1117        string: &str,
1118    ) -> Option<&'a HighlightConfiguration> {
1119        match self.language_configuration_for_injection_string(string) {
1120            Err(e) => {
1121                eprintln!("Failed to load language for injection string '{string}': {e}",);
1122                None
1123            }
1124            Ok(None) => None,
1125            Ok(Some((language, configuration))) => {
1126                match configuration.highlight_config(language, None) {
1127                    Err(e) => {
1128                        eprintln!(
1129                            "Failed to load property sheet for injection string '{string}': {e}",
1130                        );
1131                        None
1132                    }
1133                    Ok(None) => None,
1134                    Ok(Some(config)) => Some(config),
1135                }
1136            }
1137        }
1138    }
1139
1140    #[must_use]
1141    pub fn get_language_configuration_in_current_path(&self) -> Option<&LanguageConfiguration> {
1142        self.language_configuration_in_current_path
1143            .map(|i| &self.language_configurations[i])
1144    }
1145
1146    pub fn find_language_configurations_at_path(
1147        &mut self,
1148        parser_path: &Path,
1149        set_current_path_config: bool,
1150    ) -> Result<&[LanguageConfiguration]> {
1151        let initial_language_configuration_count = self.language_configurations.len();
1152
1153        let ts_json = TreeSitterJSON::from_file(parser_path);
1154        if let Ok(config) = ts_json {
1155            let language_count = self.languages_by_id.len();
1156            for grammar in config.grammars {
1157                // Determine the path to the parser directory. This can be specified in
1158                // the tree-sitter.json, but defaults to the directory containing the
1159                // tree-sitter.json.
1160                let language_path = parser_path.join(grammar.path.unwrap_or(PathBuf::from(".")));
1161
1162                // Determine if a previous language configuration in this package.json file
1163                // already uses the same language.
1164                let mut language_id = None;
1165                for (id, (path, _, _)) in
1166                    self.languages_by_id.iter().enumerate().skip(language_count)
1167                {
1168                    if language_path == *path {
1169                        language_id = Some(id);
1170                    }
1171                }
1172
1173                // If not, add a new language path to the list.
1174                let language_id = if let Some(language_id) = language_id {
1175                    language_id
1176                } else {
1177                    self.languages_by_id.push((
1178                            language_path,
1179                            OnceCell::new(),
1180                            grammar.external_files.clone().into_vec().map(|files| {
1181                                files.into_iter()
1182                                    .map(|path| {
1183                                       let path = parser_path.join(path);
1184                                        // prevent p being above/outside of parser_path
1185                                        if path.starts_with(parser_path) {
1186                                            Ok(path)
1187                                        } else {
1188                                            Err(anyhow!("External file path {path:?} is outside of parser directory {parser_path:?}"))
1189                                        }
1190                                    })
1191                                    .collect::<Result<Vec<_>>>()
1192                            }).transpose()?,
1193                        ));
1194                    self.languages_by_id.len() - 1
1195                };
1196
1197                let configuration = LanguageConfiguration {
1198                    root_path: parser_path.to_path_buf(),
1199                    language_name: grammar.name,
1200                    scope: Some(grammar.scope),
1201                    language_id,
1202                    file_types: grammar.file_types.unwrap_or_default(),
1203                    content_regex: Self::regex(grammar.content_regex.as_deref()),
1204                    first_line_regex: Self::regex(grammar.first_line_regex.as_deref()),
1205                    injection_regex: Self::regex(grammar.injection_regex.as_deref()),
1206                    injections_filenames: grammar.injections.into_vec(),
1207                    locals_filenames: grammar.locals.into_vec(),
1208                    tags_filenames: grammar.tags.into_vec(),
1209                    highlights_filenames: grammar.highlights.into_vec(),
1210                    #[cfg(feature = "tree-sitter-highlight")]
1211                    highlight_config: OnceCell::new(),
1212                    #[cfg(feature = "tree-sitter-tags")]
1213                    tags_config: OnceCell::new(),
1214                    #[cfg(feature = "tree-sitter-highlight")]
1215                    highlight_names: &self.highlight_names,
1216                    #[cfg(feature = "tree-sitter-highlight")]
1217                    use_all_highlight_names: self.use_all_highlight_names,
1218                    _phantom: PhantomData,
1219                };
1220
1221                for file_type in &configuration.file_types {
1222                    self.language_configuration_ids_by_file_type
1223                        .entry(file_type.to_string())
1224                        .or_default()
1225                        .push(self.language_configurations.len());
1226                }
1227                if let Some(first_line_regex) = &configuration.first_line_regex {
1228                    self.language_configuration_ids_by_first_line_regex
1229                        .entry(first_line_regex.to_string())
1230                        .or_default()
1231                        .push(self.language_configurations.len());
1232                }
1233
1234                self.language_configurations.push(unsafe {
1235                    mem::transmute::<LanguageConfiguration<'_>, LanguageConfiguration<'static>>(
1236                        configuration,
1237                    )
1238                });
1239
1240                if set_current_path_config && self.language_configuration_in_current_path.is_none()
1241                {
1242                    self.language_configuration_in_current_path =
1243                        Some(self.language_configurations.len() - 1);
1244                }
1245            }
1246        } else if let Err(e) = ts_json {
1247            match e.downcast_ref::<std::io::Error>() {
1248                // This is noisy, and not really an issue.
1249                Some(e) if e.kind() == std::io::ErrorKind::NotFound => {}
1250                _ => {
1251                    eprintln!(
1252                        "Warning: Failed to parse {} -- {e}",
1253                        parser_path.join("tree-sitter.json").display()
1254                    );
1255                }
1256            }
1257        }
1258
1259        // If we didn't find any language configurations in the tree-sitter.json file,
1260        // but there is a grammar.json file, then use the grammar file to form a simple
1261        // language configuration.
1262        if self.language_configurations.len() == initial_language_configuration_count
1263            && parser_path.join("src").join("grammar.json").exists()
1264        {
1265            let grammar_path = parser_path.join("src").join("grammar.json");
1266            let language_name = Self::grammar_json_name(&grammar_path)?;
1267            let configuration = LanguageConfiguration {
1268                root_path: parser_path.to_owned(),
1269                language_name,
1270                language_id: self.languages_by_id.len(),
1271                file_types: Vec::new(),
1272                scope: None,
1273                content_regex: None,
1274                first_line_regex: None,
1275                injection_regex: None,
1276                injections_filenames: None,
1277                locals_filenames: None,
1278                highlights_filenames: None,
1279                tags_filenames: None,
1280                #[cfg(feature = "tree-sitter-highlight")]
1281                highlight_config: OnceCell::new(),
1282                #[cfg(feature = "tree-sitter-tags")]
1283                tags_config: OnceCell::new(),
1284                #[cfg(feature = "tree-sitter-highlight")]
1285                highlight_names: &self.highlight_names,
1286                #[cfg(feature = "tree-sitter-highlight")]
1287                use_all_highlight_names: self.use_all_highlight_names,
1288                _phantom: PhantomData,
1289            };
1290            self.language_configurations.push(unsafe {
1291                mem::transmute::<LanguageConfiguration<'_>, LanguageConfiguration<'static>>(
1292                    configuration,
1293                )
1294            });
1295            self.languages_by_id
1296                .push((parser_path.to_owned(), OnceCell::new(), None));
1297        }
1298
1299        Ok(&self.language_configurations[initial_language_configuration_count..])
1300    }
1301
1302    fn regex(pattern: Option<&str>) -> Option<Regex> {
1303        pattern.and_then(|r| RegexBuilder::new(r).multi_line(true).build().ok())
1304    }
1305
1306    fn grammar_json_name(grammar_path: &Path) -> Result<String> {
1307        let file = fs::File::open(grammar_path).with_context(|| {
1308            format!("Failed to open grammar.json at {}", grammar_path.display())
1309        })?;
1310
1311        let first_three_lines = BufReader::new(file)
1312            .lines()
1313            .take(3)
1314            .collect::<Result<Vec<_>, _>>()
1315            .with_context(|| {
1316                format!(
1317                    "Failed to read the first three lines of grammar.json at {}",
1318                    grammar_path.display()
1319                )
1320            })?
1321            .join("\n");
1322
1323        let name = GRAMMAR_NAME_REGEX
1324            .captures(&first_three_lines)
1325            .and_then(|c| c.get(1))
1326            .ok_or_else(|| {
1327                anyhow!(
1328                    "Failed to parse the language name from grammar.json at {}",
1329                    grammar_path.display()
1330                )
1331            })?;
1332
1333        Ok(name.as_str().to_string())
1334    }
1335
1336    pub fn select_language(
1337        &mut self,
1338        path: &Path,
1339        current_dir: &Path,
1340        scope: Option<&str>,
1341    ) -> Result<Language> {
1342        if let Some(scope) = scope {
1343            if let Some(config) = self
1344                .language_configuration_for_scope(scope)
1345                .with_context(|| format!("Failed to load language for scope '{scope}'"))?
1346            {
1347                Ok(config.0)
1348            } else {
1349                Err(anyhow!("Unknown scope '{scope}'"))
1350            }
1351        } else if let Some((lang, _)) = self
1352            .language_configuration_for_file_name(path)
1353            .with_context(|| {
1354                format!(
1355                    "Failed to load language for file name {}",
1356                    path.file_name().unwrap().to_string_lossy()
1357                )
1358            })?
1359        {
1360            Ok(lang)
1361        } else if let Some(id) = self.language_configuration_in_current_path {
1362            Ok(self.language_for_id(self.language_configurations[id].language_id)?)
1363        } else if let Some(lang) = self
1364            .languages_at_path(current_dir)
1365            .with_context(|| "Failed to load language in current directory")?
1366            .first()
1367            .cloned()
1368        {
1369            Ok(lang.0)
1370        } else if let Some(lang) = self.language_configuration_for_first_line_regex(path)? {
1371            Ok(lang.0)
1372        } else {
1373            Err(anyhow!("No language found"))
1374        }
1375    }
1376
1377    pub fn debug_build(&mut self, flag: bool) {
1378        self.debug_build = flag;
1379    }
1380
1381    pub fn sanitize_build(&mut self, flag: bool) {
1382        self.sanitize_build = flag;
1383    }
1384
1385    pub fn force_rebuild(&mut self, rebuild: bool) {
1386        self.force_rebuild = rebuild;
1387    }
1388
1389    #[cfg(feature = "wasm")]
1390    #[cfg_attr(docsrs, doc(cfg(feature = "wasm")))]
1391    pub fn use_wasm(&mut self, engine: &tree_sitter::wasmtime::Engine) {
1392        *self.wasm_store.lock().unwrap() = Some(tree_sitter::WasmStore::new(engine).unwrap());
1393    }
1394
1395    #[must_use]
1396    pub fn get_scanner_path(&self, src_path: &Path) -> Option<PathBuf> {
1397        let path = src_path.join("scanner.c");
1398        path.exists().then_some(path)
1399    }
1400}
1401
1402impl LanguageConfiguration<'_> {
1403    #[cfg(feature = "tree-sitter-highlight")]
1404    pub fn highlight_config(
1405        &self,
1406        language: Language,
1407        paths: Option<&[PathBuf]>,
1408    ) -> Result<Option<&HighlightConfiguration>> {
1409        let (highlights_filenames, injections_filenames, locals_filenames) = match paths {
1410            Some(paths) => (
1411                Some(
1412                    paths
1413                        .iter()
1414                        .filter(|p| p.ends_with("highlights.scm"))
1415                        .cloned()
1416                        .collect::<Vec<_>>(),
1417                ),
1418                Some(
1419                    paths
1420                        .iter()
1421                        .filter(|p| p.ends_with("tags.scm"))
1422                        .cloned()
1423                        .collect::<Vec<_>>(),
1424                ),
1425                Some(
1426                    paths
1427                        .iter()
1428                        .filter(|p| p.ends_with("locals.scm"))
1429                        .cloned()
1430                        .collect::<Vec<_>>(),
1431                ),
1432            ),
1433            None => (None, None, None),
1434        };
1435        self.highlight_config
1436            .get_or_try_init(|| {
1437                let (highlights_query, highlight_ranges) = self.read_queries(
1438                    if highlights_filenames.is_some() {
1439                        highlights_filenames.as_deref()
1440                    } else {
1441                        self.highlights_filenames.as_deref()
1442                    },
1443                    "highlights.scm",
1444                )?;
1445                let (injections_query, injection_ranges) = self.read_queries(
1446                    if injections_filenames.is_some() {
1447                        injections_filenames.as_deref()
1448                    } else {
1449                        self.injections_filenames.as_deref()
1450                    },
1451                    "injections.scm",
1452                )?;
1453                let (locals_query, locals_ranges) = self.read_queries(
1454                    if locals_filenames.is_some() {
1455                        locals_filenames.as_deref()
1456                    } else {
1457                        self.locals_filenames.as_deref()
1458                    },
1459                    "locals.scm",
1460                )?;
1461
1462                if highlights_query.is_empty() {
1463                    Ok(None)
1464                } else {
1465                    let mut result = HighlightConfiguration::new(
1466                        language,
1467                        &self.language_name,
1468                        &highlights_query,
1469                        &injections_query,
1470                        &locals_query,
1471                    )
1472                    .map_err(|error| match error.kind {
1473                        QueryErrorKind::Language => Error::from(error),
1474                        _ => {
1475                            if error.offset < injections_query.len() {
1476                                Self::include_path_in_query_error(
1477                                    error,
1478                                    &injection_ranges,
1479                                    &injections_query,
1480                                    0,
1481                                )
1482                            } else if error.offset < injections_query.len() + locals_query.len() {
1483                                Self::include_path_in_query_error(
1484                                    error,
1485                                    &locals_ranges,
1486                                    &locals_query,
1487                                    injections_query.len(),
1488                                )
1489                            } else {
1490                                Self::include_path_in_query_error(
1491                                    error,
1492                                    &highlight_ranges,
1493                                    &highlights_query,
1494                                    injections_query.len() + locals_query.len(),
1495                                )
1496                            }
1497                        }
1498                    })?;
1499                    let mut all_highlight_names = self.highlight_names.lock().unwrap();
1500                    if self.use_all_highlight_names {
1501                        for capture_name in result.query.capture_names() {
1502                            if !all_highlight_names.iter().any(|x| x == capture_name) {
1503                                all_highlight_names.push((*capture_name).to_string());
1504                            }
1505                        }
1506                    }
1507                    result.configure(all_highlight_names.as_slice());
1508                    drop(all_highlight_names);
1509                    Ok(Some(result))
1510                }
1511            })
1512            .map(Option::as_ref)
1513    }
1514
1515    #[cfg(feature = "tree-sitter-tags")]
1516    pub fn tags_config(&self, language: Language) -> Result<Option<&TagsConfiguration>> {
1517        self.tags_config
1518            .get_or_try_init(|| {
1519                let (tags_query, tags_ranges) =
1520                    self.read_queries(self.tags_filenames.as_deref(), "tags.scm")?;
1521                let (locals_query, locals_ranges) =
1522                    self.read_queries(self.locals_filenames.as_deref(), "locals.scm")?;
1523                if tags_query.is_empty() {
1524                    Ok(None)
1525                } else {
1526                    TagsConfiguration::new(language, &tags_query, &locals_query)
1527                        .map(Some)
1528                        .map_err(|error| {
1529                            if let TagsError::Query(error) = error {
1530                                if error.offset < locals_query.len() {
1531                                    Self::include_path_in_query_error(
1532                                        error,
1533                                        &locals_ranges,
1534                                        &locals_query,
1535                                        0,
1536                                    )
1537                                } else {
1538                                    Self::include_path_in_query_error(
1539                                        error,
1540                                        &tags_ranges,
1541                                        &tags_query,
1542                                        locals_query.len(),
1543                                    )
1544                                }
1545                            } else {
1546                                error.into()
1547                            }
1548                        })
1549                }
1550            })
1551            .map(Option::as_ref)
1552    }
1553
1554    #[cfg(any(feature = "tree-sitter-highlight", feature = "tree-sitter-tags"))]
1555    fn include_path_in_query_error(
1556        mut error: QueryError,
1557        ranges: &[(PathBuf, Range<usize>)],
1558        source: &str,
1559        start_offset: usize,
1560    ) -> Error {
1561        let offset_within_section = error.offset - start_offset;
1562        let (path, range) = ranges
1563            .iter()
1564            .find(|(_, range)| range.contains(&offset_within_section))
1565            .unwrap_or_else(|| ranges.last().unwrap());
1566        error.offset = offset_within_section - range.start;
1567        error.row = source[range.start..offset_within_section]
1568            .matches('\n')
1569            .count();
1570        Error::from(error).context(format!("Error in query file {}", path.display()))
1571    }
1572
1573    #[allow(clippy::type_complexity)]
1574    #[cfg(any(feature = "tree-sitter-highlight", feature = "tree-sitter-tags"))]
1575    fn read_queries(
1576        &self,
1577        paths: Option<&[PathBuf]>,
1578        default_path: &str,
1579    ) -> Result<(String, Vec<(PathBuf, Range<usize>)>)> {
1580        let mut query = String::new();
1581        let mut path_ranges = Vec::new();
1582        if let Some(paths) = paths {
1583            for path in paths {
1584                let abs_path = self.root_path.join(path);
1585                let prev_query_len = query.len();
1586                query += &fs::read_to_string(&abs_path)
1587                    .with_context(|| format!("Failed to read query file {}", path.display()))?;
1588                path_ranges.push((path.clone(), prev_query_len..query.len()));
1589            }
1590        } else {
1591            // highlights.scm is needed to test highlights, and tags.scm to test tags
1592            if default_path == "highlights.scm" || default_path == "tags.scm" {
1593                eprintln!(
1594                    indoc! {"
1595                        Warning: you should add a `{}` entry pointing to the highlights path in the `tree-sitter` object in the grammar's tree-sitter.json file.
1596                        See more here: https://tree-sitter.github.io/tree-sitter/3-syntax-highlighting#query-paths
1597                    "},
1598                    default_path.replace(".scm", "")
1599                );
1600            }
1601            let queries_path = self.root_path.join("queries");
1602            let path = queries_path.join(default_path);
1603            if path.exists() {
1604                query = fs::read_to_string(&path)
1605                    .with_context(|| format!("Failed to read query file {}", path.display()))?;
1606                path_ranges.push((PathBuf::from(default_path), 0..query.len()));
1607            }
1608        }
1609
1610        Ok((query, path_ranges))
1611    }
1612}
1613
1614fn needs_recompile(lib_path: &Path, paths_to_check: &[PathBuf]) -> Result<bool> {
1615    if !lib_path.exists() {
1616        return Ok(true);
1617    }
1618    let lib_mtime = mtime(lib_path)
1619        .with_context(|| format!("Failed to read mtime of {}", lib_path.display()))?;
1620    for path in paths_to_check {
1621        if mtime(path)? > lib_mtime {
1622            return Ok(true);
1623        }
1624    }
1625    Ok(false)
1626}
1627
1628fn mtime(path: &Path) -> Result<SystemTime> {
1629    Ok(fs::metadata(path)?.modified()?)
1630}
1631
1632fn replace_dashes_with_underscores(name: &str) -> String {
1633    let mut result = String::with_capacity(name.len());
1634    for c in name.chars() {
1635        if c == '-' {
1636            result.push('_');
1637        } else {
1638            result.push(c);
1639        }
1640    }
1641    result
1642}