1#![cfg_attr(not(any(test, doctest)), doc = include_str!("../README.md"))]
2#![cfg_attr(docsrs, feature(doc_cfg))]
3
4#[cfg(unix)]
5use std::fmt::Write as _;
6#[cfg(any(feature = "tree-sitter-highlight", feature = "tree-sitter-tags"))]
7use std::ops::Range;
8#[cfg(feature = "tree-sitter-highlight")]
9use std::sync::Mutex;
10use std::{
11 collections::HashMap,
12 env, fs,
13 hash::{Hash as _, Hasher as _},
14 io::{BufRead, BufReader},
15 marker::PhantomData,
16 mem,
17 path::{Path, PathBuf},
18 process::Command,
19 sync::LazyLock,
20 time::{SystemTime, SystemTimeError},
21};
22
23use etcetera::BaseStrategy as _;
24use fs4::fs_std::FileExt;
25use libloading::{Library, Symbol};
26use log::{error, info, warn};
27use once_cell::unsync::OnceCell;
28use regex::{Regex, RegexBuilder};
29use semver::Version;
30use serde::{Deserialize, Deserializer, Serialize};
31use thiserror::Error;
32use tree_sitter::Language;
33#[cfg(any(feature = "tree-sitter-highlight", feature = "tree-sitter-tags"))]
34use tree_sitter::QueryError;
35#[cfg(feature = "tree-sitter-highlight")]
36use tree_sitter::QueryErrorKind;
37#[cfg(feature = "wasm")]
38use tree_sitter::WasmError;
39#[cfg(feature = "tree-sitter-highlight")]
40use tree_sitter_highlight::HighlightConfiguration;
41#[cfg(feature = "tree-sitter-tags")]
42use tree_sitter_tags::{Error as TagsError, TagsConfiguration};
43
44static GRAMMAR_NAME_REGEX: LazyLock<Regex> =
45 LazyLock::new(|| Regex::new(r#""name":\s*"(.*?)""#).unwrap());
46
47const WASI_SDK_VERSION: &str = include_str!("../wasi-sdk-version").trim_ascii();
48
49pub type LoaderResult<T> = Result<T, LoaderError>;
50
51#[derive(Debug, Error)]
52pub enum LoaderError {
53 #[error(transparent)]
54 Compiler(CompilerError),
55 #[error("Parser compilation failed.\nStdout: {0}\nStderr: {1}")]
56 Compilation(String, String),
57 #[error("Failed to execute curl for {0} -- {1}")]
58 Curl(String, std::io::Error),
59 #[error("Failed to load language in current directory:\n{0}")]
60 CurrentDirectoryLoad(Box<Self>),
61 #[error("External file path {0} is outside of parser directory {1}")]
62 ExternalFile(String, String),
63 #[error("Failed to extract archive {0} to {1}")]
64 Extraction(String, String),
65 #[error("Failed to load language for file name {0}:\n{1}")]
66 FileNameLoad(String, Box<Self>),
67 #[error("Failed to parse the language name from grammar.json at {0}")]
68 GrammarJSON(String),
69 #[error(transparent)]
70 HomeDir(#[from] etcetera::HomeDirError),
71 #[error(transparent)]
72 IO(IoError),
73 #[error(transparent)]
74 Library(LibraryError),
75 #[error("Failed to compare binary and source timestamps:\n{0}")]
76 ModifiedTime(Box<Self>),
77 #[error("No language found")]
78 NoLanguage,
79 #[error(transparent)]
80 Query(LoaderQueryError),
81 #[error("Failed to load language for scope '{0}':\n{1}")]
82 ScopeLoad(String, Box<Self>),
83 #[error(transparent)]
84 Serialization(#[from] serde_json::Error),
85 #[error(transparent)]
86 Symbol(SymbolError),
87 #[error(transparent)]
88 Tags(#[from] TagsError),
89 #[error("Failed to execute tar for {0} -- {1}")]
90 Tar(String, std::io::Error),
91 #[error(transparent)]
92 Time(#[from] SystemTimeError),
93 #[error("Unknown scope '{0}'")]
94 UnknownScope(String),
95 #[error("Failed to download wasi-sdk from {0}")]
96 WasiSDKDownload(String),
97 #[error(transparent)]
98 WasiSDKClang(#[from] WasiSDKClangError),
99 #[error("Unsupported platform for wasi-sdk")]
100 WasiSDKPlatform,
101 #[cfg(feature = "wasm")]
102 #[error(transparent)]
103 Wasm(#[from] WasmError),
104 #[error("Failed to run wasi-sdk clang -- {0}")]
105 WasmCompiler(std::io::Error),
106 #[error("wasi-sdk clang command failed: {0}")]
107 WasmCompilation(String),
108}
109
110#[derive(Debug, Error)]
111pub struct CompilerError {
112 pub error: std::io::Error,
113 pub command: Box<Command>,
114}
115
116impl std::fmt::Display for CompilerError {
117 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
118 write!(
119 f,
120 "Failed to execute the C compiler with the following command:\n{:?}\nError: {}",
121 *self.command, self.error
122 )?;
123 Ok(())
124 }
125}
126
127#[derive(Debug, Error)]
128pub struct IoError {
129 pub error: std::io::Error,
130 pub path: Option<String>,
131}
132
133impl IoError {
134 fn new(error: std::io::Error, path: Option<&Path>) -> Self {
135 Self {
136 error,
137 path: path.map(|p| p.to_string_lossy().to_string()),
138 }
139 }
140}
141
142impl std::fmt::Display for IoError {
143 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
144 write!(f, "{}", self.error)?;
145 if let Some(ref path) = self.path {
146 write!(f, " ({path})")?;
147 }
148 Ok(())
149 }
150}
151
152#[derive(Debug, Error)]
153pub struct LibraryError {
154 pub error: libloading::Error,
155 pub path: String,
156}
157
158impl std::fmt::Display for LibraryError {
159 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
160 write!(
161 f,
162 "Error opening dynamic library {} -- {}",
163 self.path, self.error
164 )?;
165 Ok(())
166 }
167}
168
169#[derive(Debug, Error)]
170pub struct LoaderQueryError {
171 pub error: QueryError,
172 pub file: Option<String>,
173}
174
175impl std::fmt::Display for LoaderQueryError {
176 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
177 if let Some(ref path) = self.file {
178 writeln!(f, "Error in query file {path}:")?;
179 }
180 write!(f, "{}", self.error)?;
181 Ok(())
182 }
183}
184
185#[derive(Debug, Error)]
186pub struct SymbolError {
187 pub error: libloading::Error,
188 pub symbol_name: String,
189 pub path: String,
190}
191
192impl std::fmt::Display for SymbolError {
193 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
194 write!(
195 f,
196 "Failed to load symbol {} from {} -- {}",
197 self.symbol_name, self.path, self.error
198 )?;
199 Ok(())
200 }
201}
202
203#[derive(Debug, Error)]
204pub struct WasiSDKClangError {
205 pub wasi_sdk_dir: String,
206 pub possible_executables: Vec<&'static str>,
207 pub download: bool,
208}
209
210impl std::fmt::Display for WasiSDKClangError {
211 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
212 if self.download {
213 write!(
214 f,
215 "Failed to find clang executable in downloaded wasi-sdk at '{}'.",
216 self.wasi_sdk_dir
217 )?;
218 } else {
219 write!(f, "TREE_SITTER_WASI_SDK_PATH is set to '{}', but no clang executable found in 'bin/' directory.", self.wasi_sdk_dir)?;
220 }
221
222 let possible_exes = self.possible_executables.join(", ");
223 write!(f, " Looked for: {possible_exes}.")?;
224
225 Ok(())
226 }
227}
228
229pub const DEFAULT_HIGHLIGHTS_QUERY_FILE_NAME: &str = "highlights.scm";
230
231pub const DEFAULT_INJECTIONS_QUERY_FILE_NAME: &str = "injections.scm";
232
233pub const DEFAULT_LOCALS_QUERY_FILE_NAME: &str = "locals.scm";
234
235pub const DEFAULT_TAGS_QUERY_FILE_NAME: &str = "tags.scm";
236
237#[derive(Default, Deserialize, Serialize)]
238pub struct Config {
239 #[serde(default)]
240 #[serde(
241 rename = "parser-directories",
242 deserialize_with = "deserialize_parser_directories"
243 )]
244 pub parser_directories: Vec<PathBuf>,
245}
246
247#[derive(Serialize, Deserialize, Clone, Default)]
248#[serde(untagged)]
249pub enum PathsJSON {
250 #[default]
251 Empty,
252 Single(PathBuf),
253 Multiple(Vec<PathBuf>),
254}
255
256impl PathsJSON {
257 fn into_vec(self) -> Option<Vec<PathBuf>> {
258 match self {
259 Self::Empty => None,
260 Self::Single(s) => Some(vec![s]),
261 Self::Multiple(s) => Some(s),
262 }
263 }
264
265 const fn is_empty(&self) -> bool {
266 matches!(self, Self::Empty)
267 }
268
269 #[must_use]
271 pub fn to_variable_value<'a>(&'a self, default: &'a PathBuf) -> &'a str {
272 match self {
273 Self::Empty => Some(default),
274 Self::Single(path_buf) => Some(path_buf),
275 Self::Multiple(paths) => paths.first(),
276 }
277 .map_or("", |path| path.as_os_str().to_str().unwrap_or(""))
278 }
279}
280
281#[derive(Serialize, Deserialize, Clone)]
282#[serde(untagged)]
283pub enum PackageJSONAuthor {
284 String(String),
285 Object {
286 name: String,
287 email: Option<String>,
288 url: Option<String>,
289 },
290}
291
292#[derive(Serialize, Deserialize, Clone)]
293#[serde(untagged)]
294pub enum PackageJSONRepository {
295 String(String),
296 Object { url: String },
297}
298
299#[derive(Serialize, Deserialize)]
300pub struct PackageJSON {
301 pub name: String,
302 pub version: Version,
303 pub description: Option<String>,
304 pub author: Option<PackageJSONAuthor>,
305 pub maintainers: Option<Vec<PackageJSONAuthor>>,
306 pub license: Option<String>,
307 pub repository: Option<PackageJSONRepository>,
308 #[serde(default)]
309 #[serde(rename = "tree-sitter", skip_serializing_if = "Option::is_none")]
310 pub tree_sitter: Option<Vec<LanguageConfigurationJSON>>,
311}
312
313fn default_path() -> PathBuf {
314 PathBuf::from(".")
315}
316
317#[derive(Serialize, Deserialize, Clone)]
318#[serde(rename_all = "kebab-case")]
319pub struct LanguageConfigurationJSON {
320 #[serde(default = "default_path")]
321 pub path: PathBuf,
322 pub scope: Option<String>,
323 pub file_types: Option<Vec<String>>,
324 pub content_regex: Option<String>,
325 pub first_line_regex: Option<String>,
326 pub injection_regex: Option<String>,
327 #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
328 pub highlights: PathsJSON,
329 #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
330 pub injections: PathsJSON,
331 #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
332 pub locals: PathsJSON,
333 #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
334 pub tags: PathsJSON,
335 #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
336 pub external_files: PathsJSON,
337}
338
339#[derive(Serialize, Deserialize)]
340#[serde(rename_all = "kebab-case")]
341pub struct TreeSitterJSON {
342 #[serde(rename = "$schema")]
343 pub schema: Option<String>,
344 pub grammars: Vec<Grammar>,
345 pub metadata: Metadata,
346 #[serde(default)]
347 pub bindings: Bindings,
348}
349
350impl TreeSitterJSON {
351 pub fn from_file(path: &Path) -> LoaderResult<Self> {
352 let path = path.join("tree-sitter.json");
353 Ok(serde_json::from_str(&fs::read_to_string(&path).map_err(
354 |e| LoaderError::IO(IoError::new(e, Some(path.as_path()))),
355 )?)?)
356 }
357
358 #[must_use]
359 pub fn has_multiple_language_configs(&self) -> bool {
360 self.grammars.len() > 1
361 }
362}
363
364#[derive(Serialize, Deserialize)]
365#[serde(rename_all = "kebab-case")]
366pub struct Grammar {
367 pub name: String,
368 #[serde(skip_serializing_if = "Option::is_none")]
369 pub camelcase: Option<String>,
370 #[serde(skip_serializing_if = "Option::is_none")]
371 pub title: Option<String>,
372 pub scope: String,
373 #[serde(skip_serializing_if = "Option::is_none")]
374 pub path: Option<PathBuf>,
375 #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
376 pub external_files: PathsJSON,
377 pub file_types: Option<Vec<String>>,
378 #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
379 pub highlights: PathsJSON,
380 #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
381 pub injections: PathsJSON,
382 #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
383 pub locals: PathsJSON,
384 #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
385 pub tags: PathsJSON,
386 #[serde(skip_serializing_if = "Option::is_none")]
387 pub injection_regex: Option<String>,
388 #[serde(skip_serializing_if = "Option::is_none")]
389 pub first_line_regex: Option<String>,
390 #[serde(skip_serializing_if = "Option::is_none")]
391 pub content_regex: Option<String>,
392 #[serde(skip_serializing_if = "Option::is_none")]
393 pub class_name: Option<String>,
394}
395
396#[derive(Serialize, Deserialize)]
397pub struct Metadata {
398 pub version: Version,
399 #[serde(skip_serializing_if = "Option::is_none")]
400 pub license: Option<String>,
401 #[serde(skip_serializing_if = "Option::is_none")]
402 pub description: Option<String>,
403 #[serde(skip_serializing_if = "Option::is_none")]
404 pub authors: Option<Vec<Author>>,
405 #[serde(skip_serializing_if = "Option::is_none")]
406 pub links: Option<Links>,
407 #[serde(skip)]
408 pub namespace: Option<String>,
409}
410
411#[derive(Serialize, Deserialize)]
412pub struct Author {
413 pub name: String,
414 #[serde(skip_serializing_if = "Option::is_none")]
415 pub email: Option<String>,
416 #[serde(skip_serializing_if = "Option::is_none")]
417 pub url: Option<String>,
418}
419
420#[derive(Serialize, Deserialize)]
421pub struct Links {
422 pub repository: String,
423 #[serde(skip_serializing_if = "Option::is_none")]
424 pub funding: Option<String>,
425}
426
427#[derive(Serialize, Deserialize, Clone)]
428#[serde(default)]
429pub struct Bindings {
430 pub c: bool,
431 pub go: bool,
432 pub java: bool,
433 #[serde(skip)]
434 pub kotlin: bool,
435 pub node: bool,
436 pub python: bool,
437 pub rust: bool,
438 pub swift: bool,
439 pub zig: bool,
440}
441
442impl Bindings {
443 #[must_use]
445 pub const fn languages(&self) -> [(&'static str, bool); 8] {
446 [
447 ("c", true),
448 ("go", true),
449 ("java", false),
450 ("node", true),
453 ("python", true),
454 ("rust", true),
455 ("swift", true),
456 ("zig", false),
457 ]
458 }
459
460 pub fn with_enabled_languages<'a, I>(languages: I) -> Result<Self, &'a str>
462 where
463 I: Iterator<Item = &'a str>,
464 {
465 let mut out = Self {
466 c: false,
467 go: false,
468 java: false,
469 kotlin: false,
470 node: false,
471 python: false,
472 rust: false,
473 swift: false,
474 zig: false,
475 };
476
477 for v in languages {
478 match v {
479 "c" => out.c = true,
480 "go" => out.go = true,
481 "java" => out.java = true,
482 "node" => out.node = true,
485 "python" => out.python = true,
486 "rust" => out.rust = true,
487 "swift" => out.swift = true,
488 "zig" => out.zig = true,
489 unsupported => return Err(unsupported),
490 }
491 }
492
493 Ok(out)
494 }
495}
496
497impl Default for Bindings {
498 fn default() -> Self {
499 Self {
500 c: true,
501 go: true,
502 java: false,
503 kotlin: false,
504 node: true,
505 python: true,
506 rust: true,
507 swift: true,
508 zig: false,
509 }
510 }
511}
512
513fn deserialize_parser_directories<'de, D>(deserializer: D) -> Result<Vec<PathBuf>, D::Error>
517where
518 D: Deserializer<'de>,
519{
520 let paths = Vec::<PathBuf>::deserialize(deserializer)?;
521 let Ok(home) = etcetera::home_dir() else {
522 return Ok(paths);
523 };
524 let standardized = paths
525 .into_iter()
526 .map(|path| standardize_path(path, &home))
527 .collect();
528 Ok(standardized)
529}
530
531fn standardize_path(path: PathBuf, home: &Path) -> PathBuf {
532 if let Ok(p) = path.strip_prefix("~") {
533 return home.join(p);
534 }
535 if let Ok(p) = path.strip_prefix("$HOME") {
536 return home.join(p);
537 }
538 path
539}
540
541impl Config {
542 #[must_use]
543 pub fn initial() -> Self {
544 let home_dir = etcetera::home_dir().expect("Cannot determine home directory");
545 Self {
546 parser_directories: vec![
547 home_dir.join("github"),
548 home_dir.join("src"),
549 home_dir.join("source"),
550 home_dir.join("projects"),
551 home_dir.join("dev"),
552 home_dir.join("git"),
553 ],
554 }
555 }
556}
557
558const BUILD_TARGET: &str = env!("BUILD_TARGET");
559
560pub struct LanguageConfiguration<'a> {
561 pub scope: Option<String>,
562 pub content_regex: Option<Regex>,
563 pub first_line_regex: Option<Regex>,
564 pub injection_regex: Option<Regex>,
565 pub file_types: Vec<String>,
566 pub root_path: PathBuf,
567 pub highlights_filenames: Option<Vec<PathBuf>>,
568 pub injections_filenames: Option<Vec<PathBuf>>,
569 pub locals_filenames: Option<Vec<PathBuf>>,
570 pub tags_filenames: Option<Vec<PathBuf>>,
571 pub language_name: String,
572 language_id: usize,
573 #[cfg(feature = "tree-sitter-highlight")]
574 highlight_config: OnceCell<Option<HighlightConfiguration>>,
575 #[cfg(feature = "tree-sitter-tags")]
576 tags_config: OnceCell<Option<TagsConfiguration>>,
577 #[cfg(feature = "tree-sitter-highlight")]
578 highlight_names: &'a Mutex<Vec<String>>,
579 #[cfg(feature = "tree-sitter-highlight")]
580 use_all_highlight_names: bool,
581 _phantom: PhantomData<&'a ()>,
582}
583
584pub struct Loader {
585 pub parser_lib_path: PathBuf,
586 languages_by_id: Vec<(PathBuf, OnceCell<Language>, Option<Vec<PathBuf>>)>,
587 language_configurations: Vec<LanguageConfiguration<'static>>,
588 language_configuration_ids_by_file_type: HashMap<String, Vec<usize>>,
589 language_configuration_in_current_path: Option<usize>,
590 language_configuration_ids_by_first_line_regex: HashMap<String, Vec<usize>>,
591 #[cfg(feature = "tree-sitter-highlight")]
592 highlight_names: Box<Mutex<Vec<String>>>,
593 #[cfg(feature = "tree-sitter-highlight")]
594 use_all_highlight_names: bool,
595 debug_build: bool,
596 sanitize_build: bool,
597 force_rebuild: bool,
598
599 #[cfg(feature = "wasm")]
600 wasm_store: Mutex<Option<tree_sitter::WasmStore>>,
601}
602
603pub struct CompileConfig<'a> {
604 pub src_path: &'a Path,
605 pub header_paths: Vec<&'a Path>,
606 pub parser_path: PathBuf,
607 pub scanner_path: Option<PathBuf>,
608 pub external_files: Option<&'a [PathBuf]>,
609 pub output_path: Option<PathBuf>,
610 pub flags: &'a [&'a str],
611 pub sanitize: bool,
612 pub name: String,
613}
614
615impl<'a> CompileConfig<'a> {
616 #[must_use]
617 pub fn new(
618 src_path: &'a Path,
619 externals: Option<&'a [PathBuf]>,
620 output_path: Option<PathBuf>,
621 ) -> Self {
622 Self {
623 src_path,
624 header_paths: vec![src_path],
625 parser_path: src_path.join("parser.c"),
626 scanner_path: None,
627 external_files: externals,
628 output_path,
629 flags: &[],
630 sanitize: false,
631 name: String::new(),
632 }
633 }
634}
635
636unsafe impl Sync for Loader {}
637
638impl Loader {
639 pub fn new() -> LoaderResult<Self> {
640 let parser_lib_path = if let Ok(path) = env::var("TREE_SITTER_LIBDIR") {
641 PathBuf::from(path)
642 } else {
643 if cfg!(target_os = "macos") {
644 let legacy_apple_path = etcetera::base_strategy::Apple::new()?
645 .cache_dir() .join("tree-sitter");
647 if legacy_apple_path.exists() && legacy_apple_path.is_dir() {
648 std::fs::remove_dir_all(&legacy_apple_path).map_err(|e| {
649 LoaderError::IO(IoError::new(e, Some(legacy_apple_path.as_path())))
650 })?;
651 }
652 }
653
654 etcetera::choose_base_strategy()?
655 .cache_dir()
656 .join("tree-sitter")
657 .join("lib")
658 };
659 Ok(Self::with_parser_lib_path(parser_lib_path))
660 }
661
662 #[must_use]
663 pub fn with_parser_lib_path(parser_lib_path: PathBuf) -> Self {
664 Self {
665 parser_lib_path,
666 languages_by_id: Vec::new(),
667 language_configurations: Vec::new(),
668 language_configuration_ids_by_file_type: HashMap::new(),
669 language_configuration_in_current_path: None,
670 language_configuration_ids_by_first_line_regex: HashMap::new(),
671 #[cfg(feature = "tree-sitter-highlight")]
672 highlight_names: Box::new(Mutex::new(Vec::new())),
673 #[cfg(feature = "tree-sitter-highlight")]
674 use_all_highlight_names: true,
675 debug_build: false,
676 sanitize_build: false,
677 force_rebuild: false,
678
679 #[cfg(feature = "wasm")]
680 wasm_store: Mutex::default(),
681 }
682 }
683
684 #[cfg(feature = "tree-sitter-highlight")]
685 #[cfg_attr(docsrs, doc(cfg(feature = "tree-sitter-highlight")))]
686 pub fn configure_highlights(&mut self, names: &[String]) {
687 self.use_all_highlight_names = false;
688 let mut highlights = self.highlight_names.lock().unwrap();
689 highlights.clear();
690 highlights.extend(names.iter().cloned());
691 }
692
693 #[must_use]
694 #[cfg(feature = "tree-sitter-highlight")]
695 #[cfg_attr(docsrs, doc(cfg(feature = "tree-sitter-highlight")))]
696 pub fn highlight_names(&self) -> Vec<String> {
697 self.highlight_names.lock().unwrap().clone()
698 }
699
700 pub fn find_all_languages(&mut self, config: &Config) -> LoaderResult<()> {
701 if config.parser_directories.is_empty() {
702 warn!(concat!(
703 "You have not configured any parser directories!\n",
704 "Please run `tree-sitter init-config` and edit the resulting\n",
705 "configuration file to indicate where we should look for\n",
706 "language grammars.\n"
707 ));
708 }
709 for parser_container_dir in &config.parser_directories {
710 if let Ok(entries) = fs::read_dir(parser_container_dir) {
711 for entry in entries {
712 let entry = entry.map_err(|e| LoaderError::IO(IoError::new(e, None)))?;
713 if let Some(parser_dir_name) = entry.file_name().to_str() {
714 if parser_dir_name.starts_with("tree-sitter-") {
715 self.find_language_configurations_at_path(
716 &parser_container_dir.join(parser_dir_name),
717 false,
718 )
719 .ok();
720 }
721 }
722 }
723 }
724 }
725 Ok(())
726 }
727
728 pub fn languages_at_path(&mut self, path: &Path) -> LoaderResult<Vec<(Language, String)>> {
729 if let Ok(configurations) = self.find_language_configurations_at_path(path, true) {
730 let mut language_ids = configurations
731 .iter()
732 .map(|c| (c.language_id, c.language_name.clone()))
733 .collect::<Vec<_>>();
734 language_ids.sort_unstable();
735 language_ids.dedup();
736 language_ids
737 .into_iter()
738 .map(|(id, name)| Ok((self.language_for_id(id)?, name)))
739 .collect::<LoaderResult<Vec<_>>>()
740 } else {
741 Ok(Vec::new())
742 }
743 }
744
745 #[must_use]
746 pub fn get_all_language_configurations(&self) -> Vec<(&LanguageConfiguration, &Path)> {
747 self.language_configurations
748 .iter()
749 .map(|c| (c, self.languages_by_id[c.language_id].0.as_ref()))
750 .collect()
751 }
752
753 pub fn language_configuration_for_scope(
754 &self,
755 scope: &str,
756 ) -> LoaderResult<Option<(Language, &LanguageConfiguration)>> {
757 for configuration in &self.language_configurations {
758 if configuration.scope.as_ref().is_some_and(|s| s == scope) {
759 let language = self.language_for_id(configuration.language_id)?;
760 return Ok(Some((language, configuration)));
761 }
762 }
763 Ok(None)
764 }
765
766 pub fn language_configuration_for_first_line_regex(
767 &self,
768 path: &Path,
769 ) -> LoaderResult<Option<(Language, &LanguageConfiguration)>> {
770 self.language_configuration_ids_by_first_line_regex
771 .iter()
772 .try_fold(None, |_, (regex, ids)| {
773 if let Some(regex) = Self::regex(Some(regex)) {
774 let file = fs::File::open(path)
775 .map_err(|e| LoaderError::IO(IoError::new(e, Some(path))))?;
776 let reader = BufReader::new(file);
777 let first_line = reader
778 .lines()
779 .next()
780 .transpose()
781 .map_err(|e| LoaderError::IO(IoError::new(e, Some(path))))?;
782 if let Some(first_line) = first_line {
783 if regex.is_match(&first_line) && !ids.is_empty() {
784 let configuration = &self.language_configurations[ids[0]];
785 let language = self.language_for_id(configuration.language_id)?;
786 return Ok(Some((language, configuration)));
787 }
788 }
789 }
790
791 Ok(None)
792 })
793 }
794
795 pub fn language_configuration_for_file_name(
796 &self,
797 path: &Path,
798 ) -> LoaderResult<Option<(Language, &LanguageConfiguration)>> {
799 let configuration_ids = path
802 .file_name()
803 .and_then(|n| n.to_str())
804 .and_then(|file_name| self.language_configuration_ids_by_file_type.get(file_name))
805 .or_else(|| {
806 let mut path = path.to_owned();
807 let mut extensions = Vec::with_capacity(2);
808 while let Some(extension) = path.extension() {
809 extensions.push(extension.to_str()?.to_string());
810 path = PathBuf::from(path.file_stem()?.to_os_string());
811 }
812 extensions.reverse();
813 self.language_configuration_ids_by_file_type
814 .get(&extensions.join("."))
815 });
816
817 if let Some(configuration_ids) = configuration_ids {
818 if !configuration_ids.is_empty() {
819 let configuration = if configuration_ids.len() == 1 {
820 &self.language_configurations[configuration_ids[0]]
821 }
822 else {
825 let file_contents =
826 fs::read(path).map_err(|e| LoaderError::IO(IoError::new(e, Some(path))))?;
827 let file_contents = String::from_utf8_lossy(&file_contents);
828 let mut best_score = -2isize;
829 let mut best_configuration_id = None;
830 for configuration_id in configuration_ids {
831 let config = &self.language_configurations[*configuration_id];
832
833 let score;
836 if let Some(content_regex) = &config.content_regex {
837 if let Some(mat) = content_regex.find(&file_contents) {
838 score = (mat.end() - mat.start()) as isize;
839 }
840 else {
845 score = -1;
846 }
847 } else {
848 score = 0;
849 }
850 if score > best_score {
851 best_configuration_id = Some(*configuration_id);
852 best_score = score;
853 }
854 }
855
856 &self.language_configurations[best_configuration_id.unwrap()]
857 };
858
859 let language = self.language_for_id(configuration.language_id)?;
860 return Ok(Some((language, configuration)));
861 }
862 }
863
864 Ok(None)
865 }
866
867 pub fn language_configuration_for_injection_string(
868 &self,
869 string: &str,
870 ) -> LoaderResult<Option<(Language, &LanguageConfiguration)>> {
871 let mut best_match_length = 0;
872 let mut best_match_position = None;
873 for (i, configuration) in self.language_configurations.iter().enumerate() {
874 if let Some(injection_regex) = &configuration.injection_regex {
875 if let Some(mat) = injection_regex.find(string) {
876 let length = mat.end() - mat.start();
877 if length > best_match_length {
878 best_match_position = Some(i);
879 best_match_length = length;
880 }
881 }
882 }
883 }
884
885 if let Some(i) = best_match_position {
886 let configuration = &self.language_configurations[i];
887 let language = self.language_for_id(configuration.language_id)?;
888 Ok(Some((language, configuration)))
889 } else {
890 Ok(None)
891 }
892 }
893
894 pub fn language_for_configuration(
895 &self,
896 configuration: &LanguageConfiguration,
897 ) -> LoaderResult<Language> {
898 self.language_for_id(configuration.language_id)
899 }
900
901 fn language_for_id(&self, id: usize) -> LoaderResult<Language> {
902 let (path, language, externals) = &self.languages_by_id[id];
903 language
904 .get_or_try_init(|| {
905 let src_path = path.join("src");
906 self.load_language_at_path(CompileConfig::new(
907 &src_path,
908 externals.as_deref(),
909 None,
910 ))
911 })
912 .cloned()
913 }
914
915 pub fn compile_parser_at_path(
916 &self,
917 grammar_path: &Path,
918 output_path: PathBuf,
919 flags: &[&str],
920 ) -> LoaderResult<()> {
921 let src_path = grammar_path.join("src");
922 let mut config = CompileConfig::new(&src_path, None, Some(output_path));
923 config.flags = flags;
924 self.load_language_at_path(config).map(|_| ())
925 }
926
927 pub fn load_language_at_path(&self, mut config: CompileConfig) -> LoaderResult<Language> {
928 let grammar_path = config.src_path.join("grammar.json");
929 config.name = Self::grammar_json_name(&grammar_path)?;
930 self.load_language_at_path_with_name(config)
931 }
932
933 pub fn load_language_at_path_with_name(
934 &self,
935 mut config: CompileConfig,
936 ) -> LoaderResult<Language> {
937 let mut lib_name = config.name.clone();
938 let language_fn_name = format!("tree_sitter_{}", config.name.replace('-', "_"));
939 if self.debug_build {
940 lib_name.push_str(".debug._");
941 }
942
943 if self.sanitize_build {
944 lib_name.push_str(".sanitize._");
945 config.sanitize = true;
946 }
947
948 if config.output_path.is_none() {
949 fs::create_dir_all(&self.parser_lib_path).map_err(|e| {
950 LoaderError::IO(IoError::new(e, Some(self.parser_lib_path.as_path())))
951 })?;
952 }
953
954 let mut recompile = self.force_rebuild || config.output_path.is_some(); let output_path = config.output_path.unwrap_or_else(|| {
957 let mut path = self.parser_lib_path.join(lib_name);
958 path.set_extension(env::consts::DLL_EXTENSION);
959 #[cfg(feature = "wasm")]
960 if self.wasm_store.lock().unwrap().is_some() {
961 path.set_extension("wasm");
962 }
963 path
964 });
965 config.output_path = Some(output_path.clone());
966
967 let parser_path = config.src_path.join("parser.c");
968 config.scanner_path = self.get_scanner_path(config.src_path);
969
970 let mut paths_to_check = vec![parser_path];
971
972 if let Some(scanner_path) = config.scanner_path.as_ref() {
973 paths_to_check.push(scanner_path.clone());
974 }
975
976 paths_to_check.extend(
977 config
978 .external_files
979 .unwrap_or_default()
980 .iter()
981 .map(|p| config.src_path.join(p)),
982 );
983
984 if !recompile {
985 recompile = needs_recompile(&output_path, &paths_to_check)?;
986 }
987
988 #[cfg(feature = "wasm")]
989 if let Some(wasm_store) = self.wasm_store.lock().unwrap().as_mut() {
990 if recompile {
991 self.compile_parser_to_wasm(
992 &config.name,
993 config.src_path,
994 config
995 .scanner_path
996 .as_ref()
997 .and_then(|p| p.strip_prefix(config.src_path).ok()),
998 &output_path,
999 )?;
1000 }
1001
1002 let wasm_bytes = fs::read(&output_path)
1003 .map_err(|e| LoaderError::IO(IoError::new(e, Some(output_path.as_path()))))?;
1004 return Ok(wasm_store.load_language(&config.name, &wasm_bytes)?);
1005 }
1006
1007 let lock_hash = {
1011 let mut hasher = std::hash::DefaultHasher::new();
1012 output_path.hash(&mut hasher);
1013 format!("{:x}", hasher.finish())
1014 };
1015
1016 let lock_path = if env::var("CROSS_RUNNER").is_ok() {
1017 tempfile::tempdir()
1018 .expect("create a temp dir")
1019 .path()
1020 .to_path_buf()
1021 } else {
1022 etcetera::choose_base_strategy()?.cache_dir()
1023 }
1024 .join("tree-sitter")
1025 .join("lock")
1026 .join(format!("{}-{lock_hash}.lock", config.name));
1027
1028 if let Ok(lock_file) = fs::OpenOptions::new().write(true).open(&lock_path) {
1029 recompile = false;
1030 if lock_file.try_lock_exclusive().is_err() {
1031 lock_file
1034 .lock_exclusive()
1035 .map_err(|e| LoaderError::IO(IoError::new(e, Some(lock_path.as_path()))))?;
1036 recompile = false;
1037 } else {
1038 let time = lock_file
1042 .metadata()
1043 .map_err(|e| LoaderError::IO(IoError::new(e, Some(lock_path.as_path()))))?
1044 .modified()
1045 .map_err(|e| LoaderError::IO(IoError::new(e, Some(lock_path.as_path()))))?
1046 .elapsed()?
1047 .as_secs();
1048 if time > 30 {
1049 fs::remove_file(&lock_path)
1050 .map_err(|e| LoaderError::IO(IoError::new(e, Some(lock_path.as_path()))))?;
1051 recompile = true;
1052 }
1053 }
1054 }
1055
1056 if recompile {
1057 let parent_path = lock_path.parent().unwrap();
1058 fs::create_dir_all(parent_path)
1059 .map_err(|e| LoaderError::IO(IoError::new(e, Some(parent_path))))?;
1060 let lock_file = fs::OpenOptions::new()
1061 .create(true)
1062 .truncate(true)
1063 .write(true)
1064 .open(&lock_path)
1065 .map_err(|e| LoaderError::IO(IoError::new(e, Some(lock_path.as_path()))))?;
1066 lock_file
1067 .lock_exclusive()
1068 .map_err(|e| LoaderError::IO(IoError::new(e, Some(lock_path.as_path()))))?;
1069
1070 self.compile_parser_to_dylib(&config, &lock_file, &lock_path)?;
1071
1072 if config.scanner_path.is_some() {
1073 self.check_external_scanner(&output_path)?;
1074 }
1075 }
1076
1077 if !output_path.exists() {
1085 let msg = format!(
1086 "Dynamic library `{}` not found after build attempt. \
1087 Are you running multiple processes building to the same output location?",
1088 output_path.display()
1089 );
1090
1091 Err(LoaderError::IO(IoError::new(
1092 std::io::Error::new(std::io::ErrorKind::NotFound, msg),
1093 Some(output_path.as_path()),
1094 )))?;
1095 }
1096
1097 Self::load_language(&output_path, &language_fn_name)
1098 }
1099
1100 pub fn load_language(path: &Path, function_name: &str) -> LoaderResult<Language> {
1101 let library = unsafe { Library::new(path) }.map_err(|e| {
1102 LoaderError::Library(LibraryError {
1103 error: e,
1104 path: path.to_string_lossy().to_string(),
1105 })
1106 })?;
1107 let language = unsafe {
1108 let language_fn = library
1109 .get::<Symbol<unsafe extern "C" fn() -> Language>>(function_name.as_bytes())
1110 .map_err(|e| {
1111 LoaderError::Symbol(SymbolError {
1112 error: e,
1113 symbol_name: function_name.to_string(),
1114 path: path.to_string_lossy().to_string(),
1115 })
1116 })?;
1117 language_fn()
1118 };
1119 mem::forget(library);
1120 Ok(language)
1121 }
1122
1123 fn compile_parser_to_dylib(
1124 &self,
1125 config: &CompileConfig,
1126 lock_file: &fs::File,
1127 lock_path: &Path,
1128 ) -> LoaderResult<()> {
1129 let mut cc_config = cc::Build::new();
1130 cc_config
1131 .cargo_metadata(false)
1132 .cargo_warnings(false)
1133 .target(BUILD_TARGET)
1134 .host(BUILD_TARGET)
1138 .debug(self.debug_build)
1139 .file(&config.parser_path)
1140 .includes(&config.header_paths)
1141 .std("c11");
1142
1143 if let Some(scanner_path) = config.scanner_path.as_ref() {
1144 cc_config.file(scanner_path);
1145 }
1146
1147 if self.debug_build {
1148 cc_config.opt_level(0).extra_warnings(true);
1149 } else {
1150 cc_config.opt_level(2).extra_warnings(false);
1151 }
1152
1153 for flag in config.flags {
1154 cc_config.define(flag, None);
1155 }
1156
1157 let compiler = cc_config.get_compiler();
1158 let mut command = Command::new(compiler.path());
1159 command.args(compiler.args());
1160 for (key, value) in compiler.env() {
1161 command.env(key, value);
1162 }
1163
1164 let output_path = config.output_path.as_ref().unwrap();
1165
1166 let temp_dir = if compiler.is_like_msvc() {
1167 let out = format!("-out:{}", output_path.to_str().unwrap());
1168 command.arg(if self.debug_build { "-LDd" } else { "-LD" });
1169 command.arg("-utf-8");
1170
1171 let temp_dir = output_path.parent().unwrap().join(format!(
1175 "tmp_{}_{:?}",
1176 std::process::id(),
1177 std::thread::current().id()
1178 ));
1179 std::fs::create_dir_all(&temp_dir).unwrap();
1180
1181 command.arg(format!("/Fo{}\\", temp_dir.display()));
1182 command.args(cc_config.get_files());
1183 command.arg("-link").arg(out);
1184 command.arg(format!("/IMPLIB:{}.lib", temp_dir.join("temp").display()));
1185
1186 Some(temp_dir)
1187 } else {
1188 command.arg("-Werror=implicit-function-declaration");
1189 if cfg!(any(target_os = "macos", target_os = "ios")) {
1190 command.arg("-dynamiclib");
1191 command.arg("-UTREE_SITTER_REUSE_ALLOCATOR");
1193 } else {
1194 command.arg("-shared");
1195 command.arg("-Wl,--no-undefined");
1196 }
1197 command.args(cc_config.get_files());
1198 command.arg("-o").arg(output_path);
1199
1200 None
1201 };
1202
1203 let output = command.output().map_err(|e| {
1204 LoaderError::Compiler(CompilerError {
1205 error: e,
1206 command: Box::new(command),
1207 })
1208 })?;
1209
1210 if let Some(temp_dir) = temp_dir {
1211 let _ = fs::remove_dir_all(temp_dir);
1212 }
1213
1214 FileExt::unlock(lock_file)
1215 .map_err(|e| LoaderError::IO(IoError::new(e, Some(lock_path))))?;
1216 fs::remove_file(lock_path)
1217 .map_err(|e| LoaderError::IO(IoError::new(e, Some(lock_path))))?;
1218
1219 if output.status.success() {
1220 Ok(())
1221 } else {
1222 Err(LoaderError::Compilation(
1223 String::from_utf8_lossy(&output.stdout).to_string(),
1224 String::from_utf8_lossy(&output.stderr).to_string(),
1225 ))
1226 }
1227 }
1228
1229 #[cfg(unix)]
1230 fn check_external_scanner(&self, library_path: &Path) -> LoaderResult<()> {
1231 let section = " T ";
1232 let old_ppc_section = if cfg!(all(target_arch = "powerpc64", target_os = "linux")) {
1235 Some(" D ")
1236 } else {
1237 None
1238 };
1239 let nm_cmd = env::var("NM").unwrap_or_else(|_| "nm".to_owned());
1240 let command = Command::new(nm_cmd)
1241 .arg("--defined-only")
1242 .arg(library_path)
1243 .output();
1244 if let Ok(output) = command {
1245 if output.status.success() {
1246 let mut non_static_symbols = String::new();
1247 for line in String::from_utf8_lossy(&output.stdout).lines() {
1248 if line.contains(section) || old_ppc_section.is_some_and(|s| line.contains(s)) {
1249 if let Some(function_name) =
1250 line.split_whitespace().collect::<Vec<_>>().get(2)
1251 {
1252 if !line.contains("tree_sitter_") {
1253 writeln!(&mut non_static_symbols, " `{function_name}`").unwrap();
1254 }
1255 }
1256 }
1257 }
1258 if !non_static_symbols.is_empty() {
1259 warn!(
1260 "Found non-static non-tree-sitter functions in the external scanner\n{non_static_symbols}\n{}",
1261 concat!(
1262 "Consider making these functions static, they can cause conflicts ",
1263 "when another tree-sitter project uses the same function name."
1264 )
1265 );
1266 }
1267 }
1268 } else {
1269 warn!(
1270 "Failed to run `nm` to verify symbols in {}",
1271 library_path.display()
1272 );
1273 }
1274
1275 Ok(())
1276 }
1277
1278 #[cfg(windows)]
1279 fn check_external_scanner(&self, _library_path: &Path) -> LoaderResult<()> {
1280 Ok(())
1282 }
1283
1284 pub fn compile_parser_to_wasm(
1285 &self,
1286 language_name: &str,
1287 src_path: &Path,
1288 scanner_filename: Option<&Path>,
1289 output_path: &Path,
1290 ) -> LoaderResult<()> {
1291 let clang_executable = self.ensure_wasi_sdk_exists()?;
1292
1293 let mut command = Command::new(&clang_executable);
1294 command.current_dir(src_path).args([
1295 "-o",
1296 output_path.to_str().unwrap(),
1297 "-fPIC",
1298 "-shared",
1299 if self.debug_build { "-g" } else { "-Os" },
1300 format!("-Wl,--export=tree_sitter_{language_name}").as_str(),
1301 "-Wl,--allow-undefined",
1302 "-Wl,--no-entry",
1303 "-nostdlib",
1304 "-fno-exceptions",
1305 "-fvisibility=hidden",
1306 "-I",
1307 ".",
1308 "parser.c",
1309 ]);
1310
1311 if let Some(scanner_filename) = scanner_filename {
1312 command.arg(scanner_filename);
1313 }
1314
1315 let output = command.output().map_err(LoaderError::WasmCompiler)?;
1316
1317 if !output.status.success() {
1318 return Err(LoaderError::WasmCompilation(
1319 String::from_utf8_lossy(&output.stderr).to_string(),
1320 ));
1321 }
1322
1323 Ok(())
1324 }
1325
1326 fn extract_tar_gz_with_strip(
1328 &self,
1329 archive_path: &Path,
1330 destination: &Path,
1331 ) -> LoaderResult<()> {
1332 let status = Command::new("tar")
1333 .arg("-xzf")
1334 .arg(archive_path)
1335 .arg("--strip-components=1")
1336 .arg("-C")
1337 .arg(destination)
1338 .status()
1339 .map_err(|e| LoaderError::Tar(archive_path.to_string_lossy().to_string(), e))?;
1340
1341 if !status.success() {
1342 return Err(LoaderError::Extraction(
1343 archive_path.to_string_lossy().to_string(),
1344 destination.to_string_lossy().to_string(),
1345 ));
1346 }
1347
1348 Ok(())
1349 }
1350
1351 fn ensure_wasi_sdk_exists(&self) -> LoaderResult<PathBuf> {
1356 let possible_executables = if cfg!(windows) {
1357 vec![
1358 "clang.exe",
1359 "wasm32-unknown-wasi-clang.exe",
1360 "wasm32-wasi-clang.exe",
1361 ]
1362 } else {
1363 vec!["clang", "wasm32-unknown-wasi-clang", "wasm32-wasi-clang"]
1364 };
1365
1366 if let Ok(wasi_sdk_path) = std::env::var("TREE_SITTER_WASI_SDK_PATH") {
1367 let wasi_sdk_dir = PathBuf::from(wasi_sdk_path);
1368
1369 for exe in &possible_executables {
1370 let clang_exe = wasi_sdk_dir.join("bin").join(exe);
1371 if clang_exe.exists() {
1372 return Ok(clang_exe);
1373 }
1374 }
1375
1376 return Err(LoaderError::WasiSDKClang(WasiSDKClangError {
1377 wasi_sdk_dir: wasi_sdk_dir.to_string_lossy().to_string(),
1378 possible_executables,
1379 download: false,
1380 }));
1381 }
1382
1383 let cache_dir = etcetera::choose_base_strategy()?
1384 .cache_dir()
1385 .join("tree-sitter");
1386 fs::create_dir_all(&cache_dir)
1387 .map_err(|e| LoaderError::IO(IoError::new(e, Some(cache_dir.as_path()))))?;
1388
1389 let wasi_sdk_dir = cache_dir.join("wasi-sdk");
1390
1391 for exe in &possible_executables {
1392 let clang_exe = wasi_sdk_dir.join("bin").join(exe);
1393 if clang_exe.exists() {
1394 return Ok(clang_exe);
1395 }
1396 }
1397
1398 fs::create_dir_all(&wasi_sdk_dir)
1399 .map_err(|e| LoaderError::IO(IoError::new(e, Some(wasi_sdk_dir.as_path()))))?;
1400
1401 let arch_os = if cfg!(target_os = "macos") {
1402 if cfg!(target_arch = "aarch64") {
1403 "arm64-macos"
1404 } else {
1405 "x86_64-macos"
1406 }
1407 } else if cfg!(target_os = "windows") {
1408 if cfg!(target_arch = "aarch64") {
1409 "arm64-windows"
1410 } else {
1411 "x86_64-windows"
1412 }
1413 } else if cfg!(target_os = "linux") {
1414 if cfg!(target_arch = "aarch64") {
1415 "arm64-linux"
1416 } else {
1417 "x86_64-linux"
1418 }
1419 } else {
1420 return Err(LoaderError::WasiSDKPlatform);
1421 };
1422
1423 let sdk_filename = format!("wasi-sdk-{WASI_SDK_VERSION}-{arch_os}.tar.gz");
1424 let wasi_sdk_major_version = WASI_SDK_VERSION
1425 .trim_end_matches(char::is_numeric) .trim_end_matches('.'); let sdk_url = format!(
1428 "https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-{wasi_sdk_major_version}/{sdk_filename}",
1429 );
1430
1431 info!("Downloading wasi-sdk from {sdk_url}...");
1432 let temp_tar_path = cache_dir.join(sdk_filename);
1433
1434 let status = Command::new("curl")
1435 .arg("-f")
1436 .arg("-L")
1437 .arg("-o")
1438 .arg(&temp_tar_path)
1439 .arg(&sdk_url)
1440 .status()
1441 .map_err(|e| LoaderError::Curl(sdk_url.clone(), e))?;
1442
1443 if !status.success() {
1444 return Err(LoaderError::WasiSDKDownload(sdk_url));
1445 }
1446
1447 info!("Extracting wasi-sdk to {}...", wasi_sdk_dir.display());
1448 self.extract_tar_gz_with_strip(&temp_tar_path, &wasi_sdk_dir)?;
1449
1450 fs::remove_file(temp_tar_path).ok();
1451 for exe in &possible_executables {
1452 let clang_exe = wasi_sdk_dir.join("bin").join(exe);
1453 if clang_exe.exists() {
1454 return Ok(clang_exe);
1455 }
1456 }
1457
1458 Err(LoaderError::WasiSDKClang(WasiSDKClangError {
1459 wasi_sdk_dir: wasi_sdk_dir.to_string_lossy().to_string(),
1460 possible_executables,
1461 download: true,
1462 }))
1463 }
1464
1465 #[must_use]
1466 #[cfg(feature = "tree-sitter-highlight")]
1467 pub fn highlight_config_for_injection_string<'a>(
1468 &'a self,
1469 string: &str,
1470 ) -> Option<&'a HighlightConfiguration> {
1471 match self.language_configuration_for_injection_string(string) {
1472 Err(e) => {
1473 error!("Failed to load language for injection string '{string}': {e}",);
1474 None
1475 }
1476 Ok(None) => None,
1477 Ok(Some((language, configuration))) => {
1478 match configuration.highlight_config(language, None) {
1479 Err(e) => {
1480 error!(
1481 "Failed to load higlight config for injection string '{string}': {e}"
1482 );
1483 None
1484 }
1485 Ok(None) => None,
1486 Ok(Some(config)) => Some(config),
1487 }
1488 }
1489 }
1490 }
1491
1492 #[must_use]
1493 pub fn get_language_configuration_in_current_path(&self) -> Option<&LanguageConfiguration> {
1494 self.language_configuration_in_current_path
1495 .map(|i| &self.language_configurations[i])
1496 }
1497
1498 pub fn find_language_configurations_at_path(
1499 &mut self,
1500 parser_path: &Path,
1501 set_current_path_config: bool,
1502 ) -> LoaderResult<&[LanguageConfiguration]> {
1503 let initial_language_configuration_count = self.language_configurations.len();
1504
1505 match TreeSitterJSON::from_file(parser_path) {
1506 Ok(config) => {
1507 let language_count = self.languages_by_id.len();
1508 for grammar in config.grammars {
1509 let language_path =
1513 parser_path.join(grammar.path.unwrap_or(PathBuf::from(".")));
1514
1515 let mut language_id = None;
1518 for (id, (path, _, _)) in
1519 self.languages_by_id.iter().enumerate().skip(language_count)
1520 {
1521 if language_path == *path {
1522 language_id = Some(id);
1523 }
1524 }
1525
1526 let language_id = if let Some(language_id) = language_id {
1528 language_id
1529 } else {
1530 self.languages_by_id.push((
1531 language_path,
1532 OnceCell::new(),
1533 grammar
1534 .external_files
1535 .clone()
1536 .into_vec()
1537 .map(|files| {
1538 files
1539 .into_iter()
1540 .map(|path| {
1541 let path = parser_path.join(path);
1542 if path.starts_with(parser_path) {
1544 Ok(path)
1545 } else {
1546 Err(LoaderError::ExternalFile(
1547 path.to_string_lossy().to_string(),
1548 parser_path.to_string_lossy().to_string(),
1549 ))
1550 }
1551 })
1552 .collect::<LoaderResult<Vec<_>>>()
1553 })
1554 .transpose()?,
1555 ));
1556 self.languages_by_id.len() - 1
1557 };
1558
1559 let configuration = LanguageConfiguration {
1560 root_path: parser_path.to_path_buf(),
1561 language_name: grammar.name,
1562 scope: Some(grammar.scope),
1563 language_id,
1564 file_types: grammar.file_types.unwrap_or_default(),
1565 content_regex: Self::regex(grammar.content_regex.as_deref()),
1566 first_line_regex: Self::regex(grammar.first_line_regex.as_deref()),
1567 injection_regex: Self::regex(grammar.injection_regex.as_deref()),
1568 injections_filenames: grammar.injections.into_vec(),
1569 locals_filenames: grammar.locals.into_vec(),
1570 tags_filenames: grammar.tags.into_vec(),
1571 highlights_filenames: grammar.highlights.into_vec(),
1572 #[cfg(feature = "tree-sitter-highlight")]
1573 highlight_config: OnceCell::new(),
1574 #[cfg(feature = "tree-sitter-tags")]
1575 tags_config: OnceCell::new(),
1576 #[cfg(feature = "tree-sitter-highlight")]
1577 highlight_names: &self.highlight_names,
1578 #[cfg(feature = "tree-sitter-highlight")]
1579 use_all_highlight_names: self.use_all_highlight_names,
1580 _phantom: PhantomData,
1581 };
1582
1583 for file_type in &configuration.file_types {
1584 self.language_configuration_ids_by_file_type
1585 .entry(file_type.clone())
1586 .or_default()
1587 .push(self.language_configurations.len());
1588 }
1589 if let Some(first_line_regex) = &configuration.first_line_regex {
1590 self.language_configuration_ids_by_first_line_regex
1591 .entry(first_line_regex.to_string())
1592 .or_default()
1593 .push(self.language_configurations.len());
1594 }
1595
1596 self.language_configurations.push(unsafe {
1597 mem::transmute::<LanguageConfiguration<'_>, LanguageConfiguration<'static>>(
1598 configuration,
1599 )
1600 });
1601
1602 if set_current_path_config
1603 && self.language_configuration_in_current_path.is_none()
1604 {
1605 self.language_configuration_in_current_path =
1606 Some(self.language_configurations.len() - 1);
1607 }
1608 }
1609 }
1610 Err(LoaderError::Serialization(e)) => {
1611 warn!(
1612 "Failed to parse {} -- {e}",
1613 parser_path.join("tree-sitter.json").display()
1614 );
1615 }
1616 _ => {}
1617 }
1618
1619 if self.language_configurations.len() == initial_language_configuration_count
1623 && parser_path.join("src").join("grammar.json").exists()
1624 {
1625 let grammar_path = parser_path.join("src").join("grammar.json");
1626 let language_name = Self::grammar_json_name(&grammar_path)?;
1627 let configuration = LanguageConfiguration {
1628 root_path: parser_path.to_owned(),
1629 language_name,
1630 language_id: self.languages_by_id.len(),
1631 file_types: Vec::new(),
1632 scope: None,
1633 content_regex: None,
1634 first_line_regex: None,
1635 injection_regex: None,
1636 injections_filenames: None,
1637 locals_filenames: None,
1638 highlights_filenames: None,
1639 tags_filenames: None,
1640 #[cfg(feature = "tree-sitter-highlight")]
1641 highlight_config: OnceCell::new(),
1642 #[cfg(feature = "tree-sitter-tags")]
1643 tags_config: OnceCell::new(),
1644 #[cfg(feature = "tree-sitter-highlight")]
1645 highlight_names: &self.highlight_names,
1646 #[cfg(feature = "tree-sitter-highlight")]
1647 use_all_highlight_names: self.use_all_highlight_names,
1648 _phantom: PhantomData,
1649 };
1650 self.language_configurations.push(unsafe {
1651 mem::transmute::<LanguageConfiguration<'_>, LanguageConfiguration<'static>>(
1652 configuration,
1653 )
1654 });
1655 self.languages_by_id
1656 .push((parser_path.to_owned(), OnceCell::new(), None));
1657 }
1658
1659 Ok(&self.language_configurations[initial_language_configuration_count..])
1660 }
1661
1662 fn regex(pattern: Option<&str>) -> Option<Regex> {
1663 pattern.and_then(|r| RegexBuilder::new(r).multi_line(true).build().ok())
1664 }
1665
1666 fn grammar_json_name(grammar_path: &Path) -> LoaderResult<String> {
1667 let file = fs::File::open(grammar_path)
1668 .map_err(|e| LoaderError::IO(IoError::new(e, Some(grammar_path))))?;
1669
1670 let first_three_lines = BufReader::new(file)
1671 .lines()
1672 .take(3)
1673 .collect::<Result<Vec<_>, std::io::Error>>()
1674 .map_err(|_| LoaderError::GrammarJSON(grammar_path.to_string_lossy().to_string()))?
1675 .join("\n");
1676
1677 let name = GRAMMAR_NAME_REGEX
1678 .captures(&first_three_lines)
1679 .and_then(|c| c.get(1))
1680 .ok_or_else(|| LoaderError::GrammarJSON(grammar_path.to_string_lossy().to_string()))?;
1681
1682 Ok(name.as_str().to_string())
1683 }
1684
1685 pub fn select_language(
1686 &mut self,
1687 path: Option<&Path>,
1688 current_dir: &Path,
1689 scope: Option<&str>,
1690 lib_info: Option<&(PathBuf, &str)>,
1692 ) -> LoaderResult<Language> {
1693 if let Some((ref lib_path, language_name)) = lib_info {
1694 let language_fn_name = format!("tree_sitter_{}", language_name.replace('-', "_"));
1695 Self::load_language(lib_path, &language_fn_name)
1696 } else if let Some(scope) = scope {
1697 if let Some(config) = self
1698 .language_configuration_for_scope(scope)
1699 .map_err(|e| LoaderError::ScopeLoad(scope.to_string(), Box::new(e)))?
1700 {
1701 Ok(config.0)
1702 } else {
1703 Err(LoaderError::UnknownScope(scope.to_string()))
1704 }
1705 } else if let Some((lang, _)) = if let Some(path) = path {
1706 self.language_configuration_for_file_name(path)
1707 .map_err(|e| {
1708 LoaderError::FileNameLoad(
1709 path.file_name().unwrap().to_string_lossy().to_string(),
1710 Box::new(e),
1711 )
1712 })?
1713 } else {
1714 None
1715 } {
1716 Ok(lang)
1717 } else if let Some(id) = self.language_configuration_in_current_path {
1718 Ok(self.language_for_id(self.language_configurations[id].language_id)?)
1719 } else if let Some(lang) = self
1720 .languages_at_path(current_dir)
1721 .map_err(|e| LoaderError::CurrentDirectoryLoad(Box::new(e)))?
1722 .first()
1723 .cloned()
1724 {
1725 Ok(lang.0)
1726 } else if let Some(lang) = if let Some(path) = path {
1727 self.language_configuration_for_first_line_regex(path)?
1728 } else {
1729 None
1730 } {
1731 Ok(lang.0)
1732 } else {
1733 Err(LoaderError::NoLanguage)
1734 }
1735 }
1736
1737 pub const fn debug_build(&mut self, flag: bool) {
1738 self.debug_build = flag;
1739 }
1740
1741 pub const fn sanitize_build(&mut self, flag: bool) {
1742 self.sanitize_build = flag;
1743 }
1744
1745 pub const fn force_rebuild(&mut self, rebuild: bool) {
1746 self.force_rebuild = rebuild;
1747 }
1748
1749 #[cfg(feature = "wasm")]
1750 #[cfg_attr(docsrs, doc(cfg(feature = "wasm")))]
1751 pub fn use_wasm(&mut self, engine: &tree_sitter::wasmtime::Engine) {
1752 *self.wasm_store.lock().unwrap() = Some(tree_sitter::WasmStore::new(engine).unwrap());
1753 }
1754
1755 #[must_use]
1756 pub fn get_scanner_path(&self, src_path: &Path) -> Option<PathBuf> {
1757 let path = src_path.join("scanner.c");
1758 path.exists().then_some(path)
1759 }
1760}
1761
1762impl LanguageConfiguration<'_> {
1763 #[cfg(feature = "tree-sitter-highlight")]
1764 pub fn highlight_config(
1765 &self,
1766 language: Language,
1767 paths: Option<&[PathBuf]>,
1768 ) -> LoaderResult<Option<&HighlightConfiguration>> {
1769 let (highlights_filenames, injections_filenames, locals_filenames) = match paths {
1770 Some(paths) => (
1771 Some(
1772 paths
1773 .iter()
1774 .filter(|p| p.ends_with(DEFAULT_HIGHLIGHTS_QUERY_FILE_NAME))
1775 .cloned()
1776 .collect::<Vec<_>>(),
1777 ),
1778 Some(
1779 paths
1780 .iter()
1781 .filter(|p| p.ends_with(DEFAULT_TAGS_QUERY_FILE_NAME))
1782 .cloned()
1783 .collect::<Vec<_>>(),
1784 ),
1785 Some(
1786 paths
1787 .iter()
1788 .filter(|p| p.ends_with(DEFAULT_LOCALS_QUERY_FILE_NAME))
1789 .cloned()
1790 .collect::<Vec<_>>(),
1791 ),
1792 ),
1793 None => (None, None, None),
1794 };
1795 self.highlight_config
1796 .get_or_try_init(|| {
1797 let (highlights_query, highlight_ranges) = self.read_queries(
1798 if highlights_filenames.is_some() {
1799 highlights_filenames.as_deref()
1800 } else {
1801 self.highlights_filenames.as_deref()
1802 },
1803 DEFAULT_HIGHLIGHTS_QUERY_FILE_NAME,
1804 )?;
1805 let (injections_query, injection_ranges) = self.read_queries(
1806 if injections_filenames.is_some() {
1807 injections_filenames.as_deref()
1808 } else {
1809 self.injections_filenames.as_deref()
1810 },
1811 DEFAULT_INJECTIONS_QUERY_FILE_NAME,
1812 )?;
1813 let (locals_query, locals_ranges) = self.read_queries(
1814 if locals_filenames.is_some() {
1815 locals_filenames.as_deref()
1816 } else {
1817 self.locals_filenames.as_deref()
1818 },
1819 DEFAULT_LOCALS_QUERY_FILE_NAME,
1820 )?;
1821
1822 if highlights_query.is_empty() {
1823 Ok(None)
1824 } else {
1825 let mut result = HighlightConfiguration::new(
1826 language,
1827 &self.language_name,
1828 &highlights_query,
1829 &injections_query,
1830 &locals_query,
1831 )
1832 .map_err(|error| match error.kind {
1833 QueryErrorKind::Language => {
1834 LoaderError::Query(LoaderQueryError { error, file: None })
1835 }
1836 _ => {
1837 if error.offset < injections_query.len() {
1838 Self::include_path_in_query_error(
1839 error,
1840 &injection_ranges,
1841 &injections_query,
1842 0,
1843 )
1844 } else if error.offset < injections_query.len() + locals_query.len() {
1845 Self::include_path_in_query_error(
1846 error,
1847 &locals_ranges,
1848 &locals_query,
1849 injections_query.len(),
1850 )
1851 } else {
1852 Self::include_path_in_query_error(
1853 error,
1854 &highlight_ranges,
1855 &highlights_query,
1856 injections_query.len() + locals_query.len(),
1857 )
1858 }
1859 }
1860 })?;
1861 let mut all_highlight_names = self.highlight_names.lock().unwrap();
1862 if self.use_all_highlight_names {
1863 for capture_name in result.query.capture_names() {
1864 if !all_highlight_names.iter().any(|x| x == capture_name) {
1865 all_highlight_names.push((*capture_name).to_string());
1866 }
1867 }
1868 }
1869 result.configure(all_highlight_names.as_slice());
1870 drop(all_highlight_names);
1871 Ok(Some(result))
1872 }
1873 })
1874 .map(Option::as_ref)
1875 }
1876
1877 #[cfg(feature = "tree-sitter-tags")]
1878 pub fn tags_config(&self, language: Language) -> LoaderResult<Option<&TagsConfiguration>> {
1879 self.tags_config
1880 .get_or_try_init(|| {
1881 let (tags_query, tags_ranges) = self
1882 .read_queries(self.tags_filenames.as_deref(), DEFAULT_TAGS_QUERY_FILE_NAME)?;
1883 let (locals_query, locals_ranges) = self.read_queries(
1884 self.locals_filenames.as_deref(),
1885 DEFAULT_LOCALS_QUERY_FILE_NAME,
1886 )?;
1887 if tags_query.is_empty() {
1888 Ok(None)
1889 } else {
1890 TagsConfiguration::new(language, &tags_query, &locals_query)
1891 .map(Some)
1892 .map_err(|error| {
1893 if let TagsError::Query(error) = error {
1894 if error.offset < locals_query.len() {
1895 Self::include_path_in_query_error(
1896 error,
1897 &locals_ranges,
1898 &locals_query,
1899 0,
1900 )
1901 } else {
1902 Self::include_path_in_query_error(
1903 error,
1904 &tags_ranges,
1905 &tags_query,
1906 locals_query.len(),
1907 )
1908 }
1909 } else {
1910 error.into()
1911 }
1912 })
1913 }
1914 })
1915 .map(Option::as_ref)
1916 }
1917
1918 #[cfg(any(feature = "tree-sitter-highlight", feature = "tree-sitter-tags"))]
1919 fn include_path_in_query_error(
1920 mut error: QueryError,
1921 ranges: &[(PathBuf, Range<usize>)],
1922 source: &str,
1923 start_offset: usize,
1924 ) -> LoaderError {
1925 let offset_within_section = error.offset - start_offset;
1926 let (path, range) = ranges
1927 .iter()
1928 .find(|(_, range)| range.contains(&offset_within_section))
1929 .unwrap_or_else(|| ranges.last().unwrap());
1930 error.offset = offset_within_section - range.start;
1931 error.row = source[range.start..offset_within_section]
1932 .matches('\n')
1933 .count();
1934 LoaderError::Query(LoaderQueryError {
1935 error,
1936 file: Some(path.to_string_lossy().to_string()),
1937 })
1938 }
1939
1940 #[allow(clippy::type_complexity)]
1941 #[cfg(any(feature = "tree-sitter-highlight", feature = "tree-sitter-tags"))]
1942 fn read_queries(
1943 &self,
1944 paths: Option<&[PathBuf]>,
1945 default_path: &str,
1946 ) -> LoaderResult<(String, Vec<(PathBuf, Range<usize>)>)> {
1947 let mut query = String::new();
1948 let mut path_ranges = Vec::new();
1949 if let Some(paths) = paths {
1950 for path in paths {
1951 let abs_path = self.root_path.join(path);
1952 let prev_query_len = query.len();
1953 query += &fs::read_to_string(&abs_path)
1954 .map_err(|e| LoaderError::IO(IoError::new(e, Some(abs_path.as_path()))))?;
1955 path_ranges.push((path.clone(), prev_query_len..query.len()));
1956 }
1957 } else {
1958 if default_path == DEFAULT_HIGHLIGHTS_QUERY_FILE_NAME
1960 || default_path == DEFAULT_TAGS_QUERY_FILE_NAME
1961 {
1962 warn!(
1963 concat!(
1964 "You should add a `{}` entry pointing to the {} path in the `tree-sitter` ",
1965 "object in the grammar's tree-sitter.json file. See more here: ",
1966 "https://tree-sitter.github.io/tree-sitter/3-syntax-highlighting#query-paths"
1967 ),
1968 default_path.replace(".scm", ""),
1969 default_path
1970 );
1971 }
1972 let queries_path = self.root_path.join("queries");
1973 let path = queries_path.join(default_path);
1974 if path.exists() {
1975 query = fs::read_to_string(&path)
1976 .map_err(|e| LoaderError::IO(IoError::new(e, Some(path.as_path()))))?;
1977 path_ranges.push((PathBuf::from(default_path), 0..query.len()));
1978 }
1979 }
1980
1981 Ok((query, path_ranges))
1982 }
1983}
1984
1985fn needs_recompile(lib_path: &Path, paths_to_check: &[PathBuf]) -> LoaderResult<bool> {
1986 if !lib_path.exists() {
1987 return Ok(true);
1988 }
1989 let lib_mtime = mtime(lib_path).map_err(|e| LoaderError::ModifiedTime(Box::new(e)))?;
1990 for path in paths_to_check {
1991 if mtime(path).map_err(|e| LoaderError::ModifiedTime(Box::new(e)))? > lib_mtime {
1992 return Ok(true);
1993 }
1994 }
1995 Ok(false)
1996}
1997
1998fn mtime(path: &Path) -> LoaderResult<SystemTime> {
1999 fs::metadata(path)
2000 .map_err(|e| LoaderError::IO(IoError::new(e, Some(path))))?
2001 .modified()
2002 .map_err(|e| LoaderError::IO(IoError::new(e, Some(path))))
2003}