1#![cfg_attr(not(any(test, doctest)), doc = include_str!("../README.md"))]
2#![cfg_attr(docsrs, feature(doc_cfg))]
3
4#[cfg(unix)]
5use std::fmt::Write as _;
6#[cfg(any(feature = "tree-sitter-highlight", feature = "tree-sitter-tags"))]
7use std::ops::Range;
8#[cfg(feature = "tree-sitter-highlight")]
9use std::sync::Mutex;
10use std::{
11 collections::HashMap,
12 env, fs,
13 hash::{Hash as _, Hasher as _},
14 io::{BufRead, BufReader},
15 marker::PhantomData,
16 mem,
17 path::{Path, PathBuf},
18 process::Command,
19 sync::LazyLock,
20 time::{SystemTime, SystemTimeError},
21};
22
23use etcetera::BaseStrategy as _;
24use fs4::fs_std::FileExt;
25use libloading::{Library, Symbol};
26use log::{error, info, warn};
27use once_cell::unsync::OnceCell;
28use regex::{Regex, RegexBuilder};
29use semver::Version;
30use serde::{Deserialize, Deserializer, Serialize};
31use thiserror::Error;
32use tree_sitter::Language;
33#[cfg(any(feature = "tree-sitter-highlight", feature = "tree-sitter-tags"))]
34use tree_sitter::QueryError;
35#[cfg(feature = "tree-sitter-highlight")]
36use tree_sitter::QueryErrorKind;
37#[cfg(feature = "wasm")]
38use tree_sitter::WasmError;
39#[cfg(feature = "tree-sitter-highlight")]
40use tree_sitter_highlight::HighlightConfiguration;
41#[cfg(feature = "tree-sitter-tags")]
42use tree_sitter_tags::{Error as TagsError, TagsConfiguration};
43
44static GRAMMAR_NAME_REGEX: LazyLock<Regex> =
45 LazyLock::new(|| Regex::new(r#""name":\s*"(.*?)""#).unwrap());
46
47const WASI_SDK_VERSION: &str = include_str!("../wasi-sdk-version").trim_ascii();
48
49pub type LoaderResult<T> = Result<T, LoaderError>;
50
51#[derive(Debug, Error)]
52pub enum LoaderError {
53 #[error(transparent)]
54 Compiler(CompilerError),
55 #[error("Parser compilation failed.\nStdout: {0}\nStderr: {1}")]
56 Compilation(String, String),
57 #[error("Failed to execute curl for {0} -- {1}")]
58 Curl(String, std::io::Error),
59 #[error("Failed to load language in current directory:\n{0}")]
60 CurrentDirectoryLoad(Box<Self>),
61 #[error("External file path {0} is outside of parser directory {1}")]
62 ExternalFile(String, String),
63 #[error("Failed to extract archive {0} to {1}")]
64 Extraction(String, String),
65 #[error("Failed to load language for file name {0}:\n{1}")]
66 FileNameLoad(String, Box<Self>),
67 #[error("Failed to parse the language name from grammar.json at {0}")]
68 GrammarJSON(String),
69 #[error(transparent)]
70 HomeDir(#[from] etcetera::HomeDirError),
71 #[error(transparent)]
72 IO(IoError),
73 #[error(transparent)]
74 Library(LibraryError),
75 #[error("Failed to compare binary and source timestamps:\n{0}")]
76 ModifiedTime(Box<Self>),
77 #[error("No language found")]
78 NoLanguage,
79 #[error(transparent)]
80 Query(LoaderQueryError),
81 #[error("Failed to load language for scope '{0}':\n{1}")]
82 ScopeLoad(String, Box<Self>),
83 #[error(transparent)]
84 Serialization(#[from] serde_json::Error),
85 #[error(transparent)]
86 Symbol(SymbolError),
87 #[error(transparent)]
88 Tags(#[from] TagsError),
89 #[error("Failed to execute tar for {0} -- {1}")]
90 Tar(String, std::io::Error),
91 #[error(transparent)]
92 Time(#[from] SystemTimeError),
93 #[error("Unknown scope '{0}'")]
94 UnknownScope(String),
95 #[error("Failed to download wasi-sdk from {0}")]
96 WasiSDKDownload(String),
97 #[error(transparent)]
98 WasiSDKClang(#[from] WasiSDKClangError),
99 #[error("Unsupported platform for wasi-sdk")]
100 WasiSDKPlatform,
101 #[cfg(feature = "wasm")]
102 #[error(transparent)]
103 Wasm(#[from] WasmError),
104 #[error("Failed to run wasi-sdk clang -- {0}")]
105 WasmCompiler(std::io::Error),
106 #[error("wasi-sdk clang command failed: {0}")]
107 WasmCompilation(String),
108}
109
110#[derive(Debug, Error)]
111pub struct CompilerError {
112 pub error: std::io::Error,
113 pub command: Box<Command>,
114}
115
116impl std::fmt::Display for CompilerError {
117 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
118 write!(
119 f,
120 "Failed to execute the C compiler with the following command:\n{:?}\nError: {}",
121 *self.command, self.error
122 )?;
123 Ok(())
124 }
125}
126
127#[derive(Debug, Error)]
128pub struct IoError {
129 pub error: std::io::Error,
130 pub path: Option<String>,
131}
132
133impl IoError {
134 fn new(error: std::io::Error, path: Option<&Path>) -> Self {
135 Self {
136 error,
137 path: path.map(|p| p.to_string_lossy().to_string()),
138 }
139 }
140}
141
142impl std::fmt::Display for IoError {
143 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
144 write!(f, "{}", self.error)?;
145 if let Some(ref path) = self.path {
146 write!(f, " ({path})")?;
147 }
148 Ok(())
149 }
150}
151
152#[derive(Debug, Error)]
153pub struct LibraryError {
154 pub error: libloading::Error,
155 pub path: String,
156}
157
158impl std::fmt::Display for LibraryError {
159 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
160 write!(
161 f,
162 "Error opening dynamic library {} -- {}",
163 self.path, self.error
164 )?;
165 Ok(())
166 }
167}
168
169#[derive(Debug, Error)]
170pub struct LoaderQueryError {
171 pub error: QueryError,
172 pub file: Option<String>,
173}
174
175impl std::fmt::Display for LoaderQueryError {
176 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
177 if let Some(ref path) = self.file {
178 writeln!(f, "Error in query file {path}:")?;
179 }
180 write!(f, "{}", self.error)?;
181 Ok(())
182 }
183}
184
185#[derive(Debug, Error)]
186pub struct SymbolError {
187 pub error: libloading::Error,
188 pub symbol_name: String,
189 pub path: String,
190}
191
192impl std::fmt::Display for SymbolError {
193 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
194 write!(
195 f,
196 "Failed to load symbol {} from {} -- {}",
197 self.symbol_name, self.path, self.error
198 )?;
199 Ok(())
200 }
201}
202
203#[derive(Debug, Error)]
204pub struct WasiSDKClangError {
205 pub wasi_sdk_dir: String,
206 pub possible_executables: Vec<&'static str>,
207 pub download: bool,
208}
209
210impl std::fmt::Display for WasiSDKClangError {
211 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
212 if self.download {
213 write!(
214 f,
215 "Failed to find clang executable in downloaded wasi-sdk at '{}'.",
216 self.wasi_sdk_dir
217 )?;
218 } else {
219 write!(f, "TREE_SITTER_WASI_SDK_PATH is set to '{}', but no clang executable found in 'bin/' directory.", self.wasi_sdk_dir)?;
220 }
221
222 let possible_exes = self.possible_executables.join(", ");
223 write!(f, " Looked for: {possible_exes}.")?;
224
225 Ok(())
226 }
227}
228
229pub const DEFAULT_HIGHLIGHTS_QUERY_FILE_NAME: &str = "highlights.scm";
230
231pub const DEFAULT_INJECTIONS_QUERY_FILE_NAME: &str = "injections.scm";
232
233pub const DEFAULT_LOCALS_QUERY_FILE_NAME: &str = "locals.scm";
234
235pub const DEFAULT_TAGS_QUERY_FILE_NAME: &str = "tags.scm";
236
237#[derive(Default, Deserialize, Serialize)]
238pub struct Config {
239 #[serde(default)]
240 #[serde(
241 rename = "parser-directories",
242 deserialize_with = "deserialize_parser_directories"
243 )]
244 pub parser_directories: Vec<PathBuf>,
245}
246
247#[derive(Serialize, Deserialize, Clone, Default)]
248#[serde(untagged)]
249pub enum PathsJSON {
250 #[default]
251 Empty,
252 Single(PathBuf),
253 Multiple(Vec<PathBuf>),
254}
255
256impl PathsJSON {
257 fn into_vec(self) -> Option<Vec<PathBuf>> {
258 match self {
259 Self::Empty => None,
260 Self::Single(s) => Some(vec![s]),
261 Self::Multiple(s) => Some(s),
262 }
263 }
264
265 const fn is_empty(&self) -> bool {
266 matches!(self, Self::Empty)
267 }
268
269 #[must_use]
271 pub fn to_variable_value<'a>(&'a self, default: &'a PathBuf) -> &'a str {
272 match self {
273 Self::Empty => Some(default),
274 Self::Single(path_buf) => Some(path_buf),
275 Self::Multiple(paths) => paths.first(),
276 }
277 .map_or("", |path| path.as_os_str().to_str().unwrap_or(""))
278 }
279}
280
281#[derive(Serialize, Deserialize, Clone)]
282#[serde(untagged)]
283pub enum PackageJSONAuthor {
284 String(String),
285 Object {
286 name: String,
287 email: Option<String>,
288 url: Option<String>,
289 },
290}
291
292#[derive(Serialize, Deserialize, Clone)]
293#[serde(untagged)]
294pub enum PackageJSONRepository {
295 String(String),
296 Object { url: String },
297}
298
299#[derive(Serialize, Deserialize)]
300pub struct PackageJSON {
301 pub name: String,
302 pub version: Version,
303 pub description: Option<String>,
304 pub author: Option<PackageJSONAuthor>,
305 pub maintainers: Option<Vec<PackageJSONAuthor>>,
306 pub license: Option<String>,
307 pub repository: Option<PackageJSONRepository>,
308 #[serde(default)]
309 #[serde(rename = "tree-sitter", skip_serializing_if = "Option::is_none")]
310 pub tree_sitter: Option<Vec<LanguageConfigurationJSON>>,
311}
312
313fn default_path() -> PathBuf {
314 PathBuf::from(".")
315}
316
317#[derive(Serialize, Deserialize, Clone)]
318#[serde(rename_all = "kebab-case")]
319pub struct LanguageConfigurationJSON {
320 #[serde(default = "default_path")]
321 pub path: PathBuf,
322 pub scope: Option<String>,
323 pub file_types: Option<Vec<String>>,
324 pub content_regex: Option<String>,
325 pub first_line_regex: Option<String>,
326 pub injection_regex: Option<String>,
327 #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
328 pub highlights: PathsJSON,
329 #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
330 pub injections: PathsJSON,
331 #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
332 pub locals: PathsJSON,
333 #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
334 pub tags: PathsJSON,
335 #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
336 pub external_files: PathsJSON,
337}
338
339#[derive(Serialize, Deserialize)]
340#[serde(rename_all = "kebab-case")]
341pub struct TreeSitterJSON {
342 #[serde(rename = "$schema")]
343 pub schema: Option<String>,
344 pub grammars: Vec<Grammar>,
345 pub metadata: Metadata,
346 #[serde(default)]
347 pub bindings: Bindings,
348}
349
350impl TreeSitterJSON {
351 pub fn from_file(path: &Path) -> LoaderResult<Self> {
352 let path = path.join("tree-sitter.json");
353 Ok(serde_json::from_str(&fs::read_to_string(&path).map_err(
354 |e| LoaderError::IO(IoError::new(e, Some(path.as_path()))),
355 )?)?)
356 }
357
358 #[must_use]
359 pub fn has_multiple_language_configs(&self) -> bool {
360 self.grammars.len() > 1
361 }
362}
363
364#[derive(Serialize, Deserialize)]
365#[serde(rename_all = "kebab-case")]
366pub struct Grammar {
367 pub name: String,
368 #[serde(skip_serializing_if = "Option::is_none")]
369 pub camelcase: Option<String>,
370 #[serde(skip_serializing_if = "Option::is_none")]
371 pub title: Option<String>,
372 pub scope: String,
373 #[serde(skip_serializing_if = "Option::is_none")]
374 pub path: Option<PathBuf>,
375 #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
376 pub external_files: PathsJSON,
377 pub file_types: Option<Vec<String>>,
378 #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
379 pub highlights: PathsJSON,
380 #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
381 pub injections: PathsJSON,
382 #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
383 pub locals: PathsJSON,
384 #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
385 pub tags: PathsJSON,
386 #[serde(skip_serializing_if = "Option::is_none")]
387 pub injection_regex: Option<String>,
388 #[serde(skip_serializing_if = "Option::is_none")]
389 pub first_line_regex: Option<String>,
390 #[serde(skip_serializing_if = "Option::is_none")]
391 pub content_regex: Option<String>,
392 #[serde(skip_serializing_if = "Option::is_none")]
393 pub class_name: Option<String>,
394}
395
396#[derive(Serialize, Deserialize)]
397pub struct Metadata {
398 pub version: Version,
399 #[serde(skip_serializing_if = "Option::is_none")]
400 pub license: Option<String>,
401 #[serde(skip_serializing_if = "Option::is_none")]
402 pub description: Option<String>,
403 #[serde(skip_serializing_if = "Option::is_none")]
404 pub authors: Option<Vec<Author>>,
405 #[serde(skip_serializing_if = "Option::is_none")]
406 pub links: Option<Links>,
407 #[serde(skip)]
408 pub namespace: Option<String>,
409}
410
411#[derive(Serialize, Deserialize)]
412pub struct Author {
413 pub name: String,
414 #[serde(skip_serializing_if = "Option::is_none")]
415 pub email: Option<String>,
416 #[serde(skip_serializing_if = "Option::is_none")]
417 pub url: Option<String>,
418}
419
420#[derive(Serialize, Deserialize)]
421pub struct Links {
422 pub repository: String,
423 #[serde(skip_serializing_if = "Option::is_none")]
424 pub funding: Option<String>,
425}
426
427#[derive(Serialize, Deserialize, Clone)]
428#[serde(default)]
429pub struct Bindings {
430 pub c: bool,
431 pub go: bool,
432 pub java: bool,
433 #[serde(skip)]
434 pub kotlin: bool,
435 pub node: bool,
436 pub python: bool,
437 pub rust: bool,
438 pub swift: bool,
439 pub zig: bool,
440}
441
442impl Bindings {
443 #[must_use]
445 pub const fn languages(&self) -> [(&'static str, bool); 8] {
446 [
447 ("c", true),
448 ("go", true),
449 ("java", false),
450 ("node", true),
453 ("python", true),
454 ("rust", true),
455 ("swift", true),
456 ("zig", false),
457 ]
458 }
459
460 pub fn with_enabled_languages<'a, I>(languages: I) -> Result<Self, &'a str>
462 where
463 I: Iterator<Item = &'a str>,
464 {
465 let mut out = Self {
466 c: false,
467 go: false,
468 java: false,
469 kotlin: false,
470 node: false,
471 python: false,
472 rust: false,
473 swift: false,
474 zig: false,
475 };
476
477 for v in languages {
478 match v {
479 "c" => out.c = true,
480 "go" => out.go = true,
481 "java" => out.java = true,
482 "node" => out.node = true,
485 "python" => out.python = true,
486 "rust" => out.rust = true,
487 "swift" => out.swift = true,
488 "zig" => out.zig = true,
489 unsupported => return Err(unsupported),
490 }
491 }
492
493 Ok(out)
494 }
495}
496
497impl Default for Bindings {
498 fn default() -> Self {
499 Self {
500 c: true,
501 go: true,
502 java: false,
503 kotlin: false,
504 node: true,
505 python: true,
506 rust: true,
507 swift: true,
508 zig: false,
509 }
510 }
511}
512
513fn deserialize_parser_directories<'de, D>(deserializer: D) -> Result<Vec<PathBuf>, D::Error>
517where
518 D: Deserializer<'de>,
519{
520 let paths = Vec::<PathBuf>::deserialize(deserializer)?;
521 let Ok(home) = etcetera::home_dir() else {
522 return Ok(paths);
523 };
524 let standardized = paths
525 .into_iter()
526 .map(|path| standardize_path(path, &home))
527 .collect();
528 Ok(standardized)
529}
530
531fn standardize_path(path: PathBuf, home: &Path) -> PathBuf {
532 if let Ok(p) = path.strip_prefix("~") {
533 return home.join(p);
534 }
535 if let Ok(p) = path.strip_prefix("$HOME") {
536 return home.join(p);
537 }
538 path
539}
540
541impl Config {
542 #[must_use]
543 pub fn initial() -> Self {
544 let home_dir = etcetera::home_dir().expect("Cannot determine home directory");
545 Self {
546 parser_directories: vec![
547 home_dir.join("github"),
548 home_dir.join("src"),
549 home_dir.join("source"),
550 home_dir.join("projects"),
551 home_dir.join("dev"),
552 home_dir.join("git"),
553 ],
554 }
555 }
556}
557
558const BUILD_TARGET: &str = env!("BUILD_TARGET");
559
560pub struct LanguageConfiguration<'a> {
561 pub scope: Option<String>,
562 pub content_regex: Option<Regex>,
563 pub first_line_regex: Option<Regex>,
564 pub injection_regex: Option<Regex>,
565 pub file_types: Vec<String>,
566 pub root_path: PathBuf,
567 pub highlights_filenames: Option<Vec<PathBuf>>,
568 pub injections_filenames: Option<Vec<PathBuf>>,
569 pub locals_filenames: Option<Vec<PathBuf>>,
570 pub tags_filenames: Option<Vec<PathBuf>>,
571 pub language_name: String,
572 language_id: usize,
573 #[cfg(feature = "tree-sitter-highlight")]
574 highlight_config: OnceCell<Option<HighlightConfiguration>>,
575 #[cfg(feature = "tree-sitter-tags")]
576 tags_config: OnceCell<Option<TagsConfiguration>>,
577 #[cfg(feature = "tree-sitter-highlight")]
578 highlight_names: &'a Mutex<Vec<String>>,
579 #[cfg(feature = "tree-sitter-highlight")]
580 use_all_highlight_names: bool,
581 _phantom: PhantomData<&'a ()>,
582}
583
584pub struct Loader {
585 pub parser_lib_path: PathBuf,
586 languages_by_id: Vec<(PathBuf, OnceCell<Language>, Option<Vec<PathBuf>>)>,
587 language_configurations: Vec<LanguageConfiguration<'static>>,
588 language_configuration_ids_by_file_type: HashMap<String, Vec<usize>>,
589 language_configuration_in_current_path: Option<usize>,
590 language_configuration_ids_by_first_line_regex: HashMap<String, Vec<usize>>,
591 #[cfg(feature = "tree-sitter-highlight")]
592 highlight_names: Box<Mutex<Vec<String>>>,
593 #[cfg(feature = "tree-sitter-highlight")]
594 use_all_highlight_names: bool,
595 debug_build: bool,
596 sanitize_build: bool,
597 force_rebuild: bool,
598
599 #[cfg(feature = "wasm")]
600 wasm_store: Mutex<Option<tree_sitter::WasmStore>>,
601}
602
603pub struct CompileConfig<'a> {
604 pub src_path: &'a Path,
605 pub header_paths: Vec<&'a Path>,
606 pub parser_path: PathBuf,
607 pub scanner_path: Option<PathBuf>,
608 pub external_files: Option<&'a [PathBuf]>,
609 pub output_path: Option<PathBuf>,
610 pub flags: &'a [&'a str],
611 pub sanitize: bool,
612 pub name: String,
613}
614
615impl<'a> CompileConfig<'a> {
616 #[must_use]
617 pub fn new(
618 src_path: &'a Path,
619 externals: Option<&'a [PathBuf]>,
620 output_path: Option<PathBuf>,
621 ) -> Self {
622 Self {
623 src_path,
624 header_paths: vec![src_path],
625 parser_path: src_path.join("parser.c"),
626 scanner_path: None,
627 external_files: externals,
628 output_path,
629 flags: &[],
630 sanitize: false,
631 name: String::new(),
632 }
633 }
634}
635
636unsafe impl Sync for Loader {}
637
638impl Loader {
639 pub fn new() -> LoaderResult<Self> {
640 let parser_lib_path = if let Ok(path) = env::var("TREE_SITTER_LIBDIR") {
641 PathBuf::from(path)
642 } else {
643 if cfg!(target_os = "macos") {
644 let legacy_apple_path = etcetera::base_strategy::Apple::new()?
645 .cache_dir() .join("tree-sitter");
647 if legacy_apple_path.exists() && legacy_apple_path.is_dir() {
648 std::fs::remove_dir_all(&legacy_apple_path).map_err(|e| {
649 LoaderError::IO(IoError::new(e, Some(legacy_apple_path.as_path())))
650 })?;
651 }
652 }
653
654 etcetera::choose_base_strategy()?
655 .cache_dir()
656 .join("tree-sitter")
657 .join("lib")
658 };
659 Ok(Self::with_parser_lib_path(parser_lib_path))
660 }
661
662 #[must_use]
663 pub fn with_parser_lib_path(parser_lib_path: PathBuf) -> Self {
664 Self {
665 parser_lib_path,
666 languages_by_id: Vec::new(),
667 language_configurations: Vec::new(),
668 language_configuration_ids_by_file_type: HashMap::new(),
669 language_configuration_in_current_path: None,
670 language_configuration_ids_by_first_line_regex: HashMap::new(),
671 #[cfg(feature = "tree-sitter-highlight")]
672 highlight_names: Box::new(Mutex::new(Vec::new())),
673 #[cfg(feature = "tree-sitter-highlight")]
674 use_all_highlight_names: true,
675 debug_build: false,
676 sanitize_build: false,
677 force_rebuild: false,
678
679 #[cfg(feature = "wasm")]
680 wasm_store: Mutex::default(),
681 }
682 }
683
684 #[cfg(feature = "tree-sitter-highlight")]
685 #[cfg_attr(docsrs, doc(cfg(feature = "tree-sitter-highlight")))]
686 pub fn configure_highlights(&mut self, names: &[String]) {
687 self.use_all_highlight_names = false;
688 let mut highlights = self.highlight_names.lock().unwrap();
689 highlights.clear();
690 highlights.extend(names.iter().cloned());
691 }
692
693 #[must_use]
694 #[cfg(feature = "tree-sitter-highlight")]
695 #[cfg_attr(docsrs, doc(cfg(feature = "tree-sitter-highlight")))]
696 pub fn highlight_names(&self) -> Vec<String> {
697 self.highlight_names.lock().unwrap().clone()
698 }
699
700 pub fn find_all_languages(&mut self, config: &Config) -> LoaderResult<()> {
701 if config.parser_directories.is_empty() {
702 warn!(concat!(
703 "You have not configured any parser directories!\n",
704 "Please run `tree-sitter init-config` and edit the resulting\n",
705 "configuration file to indicate where we should look for\n",
706 "language grammars.\n"
707 ));
708 }
709 for parser_container_dir in &config.parser_directories {
710 if let Ok(entries) = fs::read_dir(parser_container_dir) {
711 for entry in entries {
712 let entry = entry.map_err(|e| LoaderError::IO(IoError::new(e, None)))?;
713 if let Some(parser_dir_name) = entry.file_name().to_str() {
714 if parser_dir_name.starts_with("tree-sitter-") {
715 self.find_language_configurations_at_path(
716 &parser_container_dir.join(parser_dir_name),
717 false,
718 )
719 .ok();
720 }
721 }
722 }
723 }
724 }
725 Ok(())
726 }
727
728 pub fn languages_at_path(&mut self, path: &Path) -> LoaderResult<Vec<(Language, String)>> {
729 if let Ok(configurations) = self.find_language_configurations_at_path(path, true) {
730 let mut language_ids = configurations
731 .iter()
732 .map(|c| (c.language_id, c.language_name.clone()))
733 .collect::<Vec<_>>();
734 language_ids.sort_unstable();
735 language_ids.dedup();
736 language_ids
737 .into_iter()
738 .map(|(id, name)| Ok((self.language_for_id(id)?, name)))
739 .collect::<LoaderResult<Vec<_>>>()
740 } else {
741 Ok(Vec::new())
742 }
743 }
744
745 #[must_use]
746 pub fn get_all_language_configurations(&self) -> Vec<(&LanguageConfiguration, &Path)> {
747 self.language_configurations
748 .iter()
749 .map(|c| (c, self.languages_by_id[c.language_id].0.as_ref()))
750 .collect()
751 }
752
753 pub fn language_configuration_for_scope(
754 &self,
755 scope: &str,
756 ) -> LoaderResult<Option<(Language, &LanguageConfiguration)>> {
757 for configuration in &self.language_configurations {
758 if configuration.scope.as_ref().is_some_and(|s| s == scope) {
759 let language = self.language_for_id(configuration.language_id)?;
760 return Ok(Some((language, configuration)));
761 }
762 }
763 Ok(None)
764 }
765
766 pub fn language_configuration_for_first_line_regex(
767 &self,
768 path: &Path,
769 ) -> LoaderResult<Option<(Language, &LanguageConfiguration)>> {
770 self.language_configuration_ids_by_first_line_regex
771 .iter()
772 .try_fold(None, |_, (regex, ids)| {
773 if let Some(regex) = Self::regex(Some(regex)) {
774 let file = fs::File::open(path)
775 .map_err(|e| LoaderError::IO(IoError::new(e, Some(path))))?;
776 let reader = BufReader::new(file);
777 let first_line = reader
778 .lines()
779 .next()
780 .transpose()
781 .map_err(|e| LoaderError::IO(IoError::new(e, Some(path))))?;
782 if let Some(first_line) = first_line {
783 if regex.is_match(&first_line) && !ids.is_empty() {
784 let configuration = &self.language_configurations[ids[0]];
785 let language = self.language_for_id(configuration.language_id)?;
786 return Ok(Some((language, configuration)));
787 }
788 }
789 }
790
791 Ok(None)
792 })
793 }
794
795 pub fn language_configuration_for_file_name(
796 &self,
797 path: &Path,
798 ) -> LoaderResult<Option<(Language, &LanguageConfiguration)>> {
799 let configuration_ids = path
802 .file_name()
803 .and_then(|n| n.to_str())
804 .and_then(|file_name| self.language_configuration_ids_by_file_type.get(file_name))
805 .or_else(|| {
806 let mut path = path.to_owned();
807 let mut extensions = Vec::with_capacity(2);
808 while let Some(extension) = path.extension() {
809 extensions.push(extension.to_str()?.to_string());
810 path = PathBuf::from(path.file_stem()?.to_os_string());
811 }
812 extensions.reverse();
813 self.language_configuration_ids_by_file_type
814 .get(&extensions.join("."))
815 });
816
817 if let Some(configuration_ids) = configuration_ids {
818 if !configuration_ids.is_empty() {
819 let configuration = if configuration_ids.len() == 1 {
820 &self.language_configurations[configuration_ids[0]]
821 }
822 else {
825 let file_contents =
826 fs::read(path).map_err(|e| LoaderError::IO(IoError::new(e, Some(path))))?;
827 let file_contents = String::from_utf8_lossy(&file_contents);
828 let mut best_score = -2isize;
829 let mut best_configuration_id = None;
830 for configuration_id in configuration_ids {
831 let config = &self.language_configurations[*configuration_id];
832
833 let score;
836 if let Some(content_regex) = &config.content_regex {
837 if let Some(mat) = content_regex.find(&file_contents) {
838 score = (mat.end() - mat.start()) as isize;
839 }
840 else {
845 score = -1;
846 }
847 } else {
848 score = 0;
849 }
850 if score > best_score {
851 best_configuration_id = Some(*configuration_id);
852 best_score = score;
853 }
854 }
855
856 &self.language_configurations[best_configuration_id.unwrap()]
857 };
858
859 let language = self.language_for_id(configuration.language_id)?;
860 return Ok(Some((language, configuration)));
861 }
862 }
863
864 Ok(None)
865 }
866
867 pub fn language_configuration_for_injection_string(
868 &self,
869 string: &str,
870 ) -> LoaderResult<Option<(Language, &LanguageConfiguration)>> {
871 let mut best_match_length = 0;
872 let mut best_match_position = None;
873 for (i, configuration) in self.language_configurations.iter().enumerate() {
874 if let Some(injection_regex) = &configuration.injection_regex {
875 if let Some(mat) = injection_regex.find(string) {
876 let length = mat.end() - mat.start();
877 if length > best_match_length {
878 best_match_position = Some(i);
879 best_match_length = length;
880 }
881 }
882 }
883 }
884
885 if let Some(i) = best_match_position {
886 let configuration = &self.language_configurations[i];
887 let language = self.language_for_id(configuration.language_id)?;
888 Ok(Some((language, configuration)))
889 } else {
890 Ok(None)
891 }
892 }
893
894 pub fn language_for_configuration(
895 &self,
896 configuration: &LanguageConfiguration,
897 ) -> LoaderResult<Language> {
898 self.language_for_id(configuration.language_id)
899 }
900
901 fn language_for_id(&self, id: usize) -> LoaderResult<Language> {
902 let (path, language, externals) = &self.languages_by_id[id];
903 language
904 .get_or_try_init(|| {
905 let src_path = path.join("src");
906 self.load_language_at_path(CompileConfig::new(
907 &src_path,
908 externals.as_deref(),
909 None,
910 ))
911 })
912 .cloned()
913 }
914
915 pub fn compile_parser_at_path(
916 &self,
917 grammar_path: &Path,
918 output_path: PathBuf,
919 flags: &[&str],
920 ) -> LoaderResult<()> {
921 let src_path = grammar_path.join("src");
922 let mut config = CompileConfig::new(&src_path, None, Some(output_path));
923 config.flags = flags;
924 self.load_language_at_path(config).map(|_| ())
925 }
926
927 pub fn load_language_at_path(&self, mut config: CompileConfig) -> LoaderResult<Language> {
928 let grammar_path = config.src_path.join("grammar.json");
929 config.name = Self::grammar_json_name(&grammar_path)?;
930 self.load_language_at_path_with_name(config)
931 }
932
933 pub fn load_language_at_path_with_name(
934 &self,
935 mut config: CompileConfig,
936 ) -> LoaderResult<Language> {
937 let mut lib_name = config.name.clone();
938 let language_fn_name = format!("tree_sitter_{}", config.name.replace('-', "_"));
939 if self.debug_build {
940 lib_name.push_str(".debug._");
941 }
942
943 if self.sanitize_build {
944 lib_name.push_str(".sanitize._");
945 config.sanitize = true;
946 }
947
948 if config.output_path.is_none() {
949 fs::create_dir_all(&self.parser_lib_path).map_err(|e| {
950 LoaderError::IO(IoError::new(e, Some(self.parser_lib_path.as_path())))
951 })?;
952 }
953
954 let mut recompile = self.force_rebuild || config.output_path.is_some(); let output_path = config.output_path.unwrap_or_else(|| {
957 let mut path = self.parser_lib_path.join(lib_name);
958 path.set_extension(env::consts::DLL_EXTENSION);
959 #[cfg(feature = "wasm")]
960 if self.wasm_store.lock().unwrap().is_some() {
961 path.set_extension("wasm");
962 }
963 path
964 });
965 config.output_path = Some(output_path.clone());
966
967 let parser_path = config.src_path.join("parser.c");
968 config.scanner_path = self.get_scanner_path(config.src_path);
969
970 let mut paths_to_check = vec![parser_path];
971
972 if let Some(scanner_path) = config.scanner_path.as_ref() {
973 paths_to_check.push(scanner_path.clone());
974 }
975
976 paths_to_check.extend(
977 config
978 .external_files
979 .unwrap_or_default()
980 .iter()
981 .map(|p| config.src_path.join(p)),
982 );
983
984 if !recompile {
985 recompile = needs_recompile(&output_path, &paths_to_check)?;
986 }
987
988 #[cfg(feature = "wasm")]
989 if let Some(wasm_store) = self.wasm_store.lock().unwrap().as_mut() {
990 if recompile {
991 self.compile_parser_to_wasm(
992 &config.name,
993 config.src_path,
994 config
995 .scanner_path
996 .as_ref()
997 .and_then(|p| p.strip_prefix(config.src_path).ok()),
998 &output_path,
999 )?;
1000 }
1001
1002 let wasm_bytes = fs::read(&output_path)
1003 .map_err(|e| LoaderError::IO(IoError::new(e, Some(output_path.as_path()))))?;
1004 return Ok(wasm_store.load_language(&config.name, &wasm_bytes)?);
1005 }
1006
1007 let lock_hash = {
1011 let mut hasher = std::hash::DefaultHasher::new();
1012 output_path.hash(&mut hasher);
1013 format!("{:x}", hasher.finish())
1014 };
1015
1016 let lock_path = if env::var("CROSS_RUNNER").is_ok() {
1017 tempfile::tempdir()
1018 .expect("create a temp dir")
1019 .path()
1020 .to_path_buf()
1021 } else {
1022 etcetera::choose_base_strategy()?.cache_dir()
1023 }
1024 .join("tree-sitter")
1025 .join("lock")
1026 .join(format!("{}-{lock_hash}.lock", config.name));
1027
1028 if let Ok(lock_file) = fs::OpenOptions::new().write(true).open(&lock_path) {
1029 recompile = false;
1030 if lock_file.try_lock_exclusive().is_err() {
1031 lock_file
1034 .lock_exclusive()
1035 .map_err(|e| LoaderError::IO(IoError::new(e, Some(lock_path.as_path()))))?;
1036 recompile = false;
1037 } else {
1038 let time = lock_file
1042 .metadata()
1043 .map_err(|e| LoaderError::IO(IoError::new(e, Some(lock_path.as_path()))))?
1044 .modified()
1045 .map_err(|e| LoaderError::IO(IoError::new(e, Some(lock_path.as_path()))))?
1046 .elapsed()?
1047 .as_secs();
1048 if time > 30 {
1049 fs::remove_file(&lock_path)
1050 .map_err(|e| LoaderError::IO(IoError::new(e, Some(lock_path.as_path()))))?;
1051 recompile = true;
1052 }
1053 }
1054 }
1055
1056 if recompile {
1057 let parent_path = lock_path.parent().unwrap();
1058 fs::create_dir_all(parent_path)
1059 .map_err(|e| LoaderError::IO(IoError::new(e, Some(parent_path))))?;
1060 let lock_file = fs::OpenOptions::new()
1061 .create(true)
1062 .truncate(true)
1063 .write(true)
1064 .open(&lock_path)
1065 .map_err(|e| LoaderError::IO(IoError::new(e, Some(lock_path.as_path()))))?;
1066 lock_file
1067 .lock_exclusive()
1068 .map_err(|e| LoaderError::IO(IoError::new(e, Some(lock_path.as_path()))))?;
1069
1070 self.compile_parser_to_dylib(&config, &lock_file, &lock_path)?;
1071
1072 if config.scanner_path.is_some() {
1073 self.check_external_scanner(&output_path)?;
1074 }
1075 }
1076
1077 if !output_path.exists() {
1085 let msg = format!(
1086 "Dynamic library `{}` not found after build attempt. \
1087 Are you running multiple processes building to the same output location?",
1088 output_path.display()
1089 );
1090
1091 Err(LoaderError::IO(IoError::new(
1092 std::io::Error::new(std::io::ErrorKind::NotFound, msg),
1093 Some(output_path.as_path()),
1094 )))?;
1095 }
1096
1097 Self::load_language(&output_path, &language_fn_name)
1098 }
1099
1100 pub fn load_language(path: &Path, function_name: &str) -> LoaderResult<Language> {
1101 let library = unsafe { Library::new(path) }.map_err(|e| {
1102 LoaderError::Library(LibraryError {
1103 error: e,
1104 path: path.to_string_lossy().to_string(),
1105 })
1106 })?;
1107 let language = unsafe {
1108 let language_fn = library
1109 .get::<Symbol<unsafe extern "C" fn() -> Language>>(function_name.as_bytes())
1110 .map_err(|e| {
1111 LoaderError::Symbol(SymbolError {
1112 error: e,
1113 symbol_name: function_name.to_string(),
1114 path: path.to_string_lossy().to_string(),
1115 })
1116 })?;
1117 language_fn()
1118 };
1119 mem::forget(library);
1120 Ok(language)
1121 }
1122
1123 fn compile_parser_to_dylib(
1124 &self,
1125 config: &CompileConfig,
1126 lock_file: &fs::File,
1127 lock_path: &Path,
1128 ) -> LoaderResult<()> {
1129 let mut cc_config = cc::Build::new();
1130 cc_config
1131 .cargo_metadata(false)
1132 .cargo_warnings(false)
1133 .target(BUILD_TARGET)
1134 .host(BUILD_TARGET)
1138 .debug(self.debug_build)
1139 .file(&config.parser_path)
1140 .includes(&config.header_paths)
1141 .std("c11");
1142
1143 if let Some(scanner_path) = config.scanner_path.as_ref() {
1144 cc_config.file(scanner_path);
1145 }
1146
1147 if self.debug_build {
1148 cc_config.opt_level(0).extra_warnings(true);
1149 } else {
1150 cc_config.opt_level(2).extra_warnings(false);
1151 }
1152
1153 for flag in config.flags {
1154 cc_config.define(flag, None);
1155 }
1156
1157 let compiler = cc_config.get_compiler();
1158 let mut command = Command::new(compiler.path());
1159 command.args(compiler.args());
1160 for (key, value) in compiler.env() {
1161 command.env(key, value);
1162 }
1163
1164 let output_path = config.output_path.as_ref().unwrap();
1165
1166 let temp_dir = if compiler.is_like_msvc() {
1167 let out = format!("-out:{}", output_path.to_str().unwrap());
1168 command.arg(if self.debug_build { "-LDd" } else { "-LD" });
1169 command.arg("-utf-8");
1170
1171 let temp_dir = output_path.parent().unwrap().join(format!(
1175 "tmp_{}_{:?}",
1176 std::process::id(),
1177 std::thread::current().id()
1178 ));
1179 std::fs::create_dir_all(&temp_dir).unwrap();
1180
1181 command.arg(format!("/Fo{}\\", temp_dir.display()));
1182 command.args(cc_config.get_files());
1183 command.arg("-link").arg(out);
1184 command.arg(format!("/IMPLIB:{}.lib", temp_dir.join("temp").display()));
1185
1186 Some(temp_dir)
1187 } else {
1188 command.arg("-Werror=implicit-function-declaration");
1189 if cfg!(any(target_os = "macos", target_os = "ios")) {
1190 command.arg("-dynamiclib");
1191 command.arg("-UTREE_SITTER_REUSE_ALLOCATOR");
1193 } else {
1194 command.arg("-shared");
1195 command.arg("-Wl,--no-undefined");
1196 #[cfg(target_os = "openbsd")]
1197 command.arg("-lc");
1198 }
1199 command.args(cc_config.get_files());
1200 command.arg("-o").arg(output_path);
1201
1202 None
1203 };
1204
1205 let output = command.output().map_err(|e| {
1206 LoaderError::Compiler(CompilerError {
1207 error: e,
1208 command: Box::new(command),
1209 })
1210 })?;
1211
1212 if let Some(temp_dir) = temp_dir {
1213 let _ = fs::remove_dir_all(temp_dir);
1214 }
1215
1216 FileExt::unlock(lock_file)
1217 .map_err(|e| LoaderError::IO(IoError::new(e, Some(lock_path))))?;
1218 fs::remove_file(lock_path)
1219 .map_err(|e| LoaderError::IO(IoError::new(e, Some(lock_path))))?;
1220
1221 if output.status.success() {
1222 Ok(())
1223 } else {
1224 Err(LoaderError::Compilation(
1225 String::from_utf8_lossy(&output.stdout).to_string(),
1226 String::from_utf8_lossy(&output.stderr).to_string(),
1227 ))
1228 }
1229 }
1230
1231 #[cfg(unix)]
1232 fn check_external_scanner(&self, library_path: &Path) -> LoaderResult<()> {
1233 let section = " T ";
1234 let old_ppc_section = if cfg!(all(target_arch = "powerpc64", target_os = "linux")) {
1237 Some(" D ")
1238 } else {
1239 None
1240 };
1241 let nm_cmd = env::var("NM").unwrap_or_else(|_| "nm".to_owned());
1242 let command = Command::new(nm_cmd)
1243 .arg("--defined-only")
1244 .arg(library_path)
1245 .output();
1246 if let Ok(output) = command {
1247 if output.status.success() {
1248 let mut non_static_symbols = String::new();
1249 for line in String::from_utf8_lossy(&output.stdout).lines() {
1250 if line.contains(section) || old_ppc_section.is_some_and(|s| line.contains(s)) {
1251 if let Some(function_name) =
1252 line.split_whitespace().collect::<Vec<_>>().get(2)
1253 {
1254 if !line.contains("tree_sitter_") {
1255 writeln!(&mut non_static_symbols, " `{function_name}`").unwrap();
1256 }
1257 }
1258 }
1259 }
1260 if !non_static_symbols.is_empty() {
1261 warn!(
1262 "Found non-static non-tree-sitter functions in the external scanner\n{non_static_symbols}\n{}",
1263 concat!(
1264 "Consider making these functions static, they can cause conflicts ",
1265 "when another tree-sitter project uses the same function name."
1266 )
1267 );
1268 }
1269 }
1270 } else {
1271 warn!(
1272 "Failed to run `nm` to verify symbols in {}",
1273 library_path.display()
1274 );
1275 }
1276
1277 Ok(())
1278 }
1279
1280 #[cfg(windows)]
1281 fn check_external_scanner(&self, _library_path: &Path) -> LoaderResult<()> {
1282 Ok(())
1284 }
1285
1286 pub fn compile_parser_to_wasm(
1287 &self,
1288 language_name: &str,
1289 src_path: &Path,
1290 scanner_filename: Option<&Path>,
1291 output_path: &Path,
1292 ) -> LoaderResult<()> {
1293 let clang_executable = self.ensure_wasi_sdk_exists()?;
1294
1295 let mut command = Command::new(&clang_executable);
1296 command.current_dir(src_path).args([
1297 "--target=wasm32-unknown-wasi",
1298 "-o",
1299 output_path.to_str().unwrap(),
1300 "-fPIC",
1301 "-shared",
1302 if self.debug_build { "-g" } else { "-Os" },
1303 format!("-Wl,--export=tree_sitter_{language_name}").as_str(),
1304 "-Wl,--allow-undefined",
1305 "-Wl,--no-entry",
1306 "-nostdlib",
1307 "-fno-exceptions",
1308 "-fvisibility=hidden",
1309 "-I",
1310 ".",
1311 "parser.c",
1312 ]);
1313
1314 if let Some(scanner_filename) = scanner_filename {
1315 command.arg(scanner_filename);
1316 }
1317
1318 let output = command.output().map_err(LoaderError::WasmCompiler)?;
1319
1320 if !output.status.success() {
1321 return Err(LoaderError::WasmCompilation(
1322 String::from_utf8_lossy(&output.stderr).to_string(),
1323 ));
1324 }
1325
1326 Ok(())
1327 }
1328
1329 fn extract_tar_gz_with_strip(
1331 &self,
1332 archive_path: &Path,
1333 destination: &Path,
1334 ) -> LoaderResult<()> {
1335 let status = Command::new("tar")
1336 .arg("-xzf")
1337 .arg(archive_path)
1338 .arg("--strip-components=1")
1339 .arg("-C")
1340 .arg(destination)
1341 .status()
1342 .map_err(|e| LoaderError::Tar(archive_path.to_string_lossy().to_string(), e))?;
1343
1344 if !status.success() {
1345 return Err(LoaderError::Extraction(
1346 archive_path.to_string_lossy().to_string(),
1347 destination.to_string_lossy().to_string(),
1348 ));
1349 }
1350
1351 Ok(())
1352 }
1353
1354 fn ensure_wasi_sdk_exists(&self) -> LoaderResult<PathBuf> {
1359 let possible_executables = if cfg!(windows) {
1360 vec![
1361 "clang.exe",
1362 "wasm32-unknown-wasi-clang.exe",
1363 "wasm32-wasi-clang.exe",
1364 ]
1365 } else {
1366 vec!["clang", "wasm32-unknown-wasi-clang", "wasm32-wasi-clang"]
1367 };
1368
1369 if let Ok(wasi_sdk_path) = std::env::var("TREE_SITTER_WASI_SDK_PATH") {
1370 let wasi_sdk_dir = PathBuf::from(wasi_sdk_path);
1371
1372 for exe in &possible_executables {
1373 let clang_exe = wasi_sdk_dir.join("bin").join(exe);
1374 if clang_exe.exists() {
1375 return Ok(clang_exe);
1376 }
1377 }
1378
1379 return Err(LoaderError::WasiSDKClang(WasiSDKClangError {
1380 wasi_sdk_dir: wasi_sdk_dir.to_string_lossy().to_string(),
1381 possible_executables,
1382 download: false,
1383 }));
1384 }
1385
1386 let cache_dir = etcetera::choose_base_strategy()?
1387 .cache_dir()
1388 .join("tree-sitter");
1389 fs::create_dir_all(&cache_dir)
1390 .map_err(|e| LoaderError::IO(IoError::new(e, Some(cache_dir.as_path()))))?;
1391
1392 let wasi_sdk_dir = cache_dir.join("wasi-sdk");
1393
1394 for exe in &possible_executables {
1395 let clang_exe = wasi_sdk_dir.join("bin").join(exe);
1396 if clang_exe.exists() {
1397 return Ok(clang_exe);
1398 }
1399 }
1400
1401 fs::create_dir_all(&wasi_sdk_dir)
1402 .map_err(|e| LoaderError::IO(IoError::new(e, Some(wasi_sdk_dir.as_path()))))?;
1403
1404 let arch_os = if cfg!(target_os = "macos") {
1405 if cfg!(target_arch = "aarch64") {
1406 "arm64-macos"
1407 } else {
1408 "x86_64-macos"
1409 }
1410 } else if cfg!(target_os = "windows") {
1411 if cfg!(target_arch = "aarch64") {
1412 "arm64-windows"
1413 } else {
1414 "x86_64-windows"
1415 }
1416 } else if cfg!(target_os = "linux") {
1417 if cfg!(target_arch = "aarch64") {
1418 "arm64-linux"
1419 } else {
1420 "x86_64-linux"
1421 }
1422 } else {
1423 return Err(LoaderError::WasiSDKPlatform);
1424 };
1425
1426 let sdk_filename = format!("wasi-sdk-{WASI_SDK_VERSION}-{arch_os}.tar.gz");
1427 let wasi_sdk_major_version = WASI_SDK_VERSION
1428 .trim_end_matches(char::is_numeric) .trim_end_matches('.'); let sdk_url = format!(
1431 "https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-{wasi_sdk_major_version}/{sdk_filename}",
1432 );
1433
1434 info!("Downloading wasi-sdk from {sdk_url}...");
1435 let temp_tar_path = cache_dir.join(sdk_filename);
1436
1437 let status = Command::new("curl")
1438 .arg("-f")
1439 .arg("-L")
1440 .arg("-o")
1441 .arg(&temp_tar_path)
1442 .arg(&sdk_url)
1443 .status()
1444 .map_err(|e| LoaderError::Curl(sdk_url.clone(), e))?;
1445
1446 if !status.success() {
1447 return Err(LoaderError::WasiSDKDownload(sdk_url));
1448 }
1449
1450 info!("Extracting wasi-sdk to {}...", wasi_sdk_dir.display());
1451 self.extract_tar_gz_with_strip(&temp_tar_path, &wasi_sdk_dir)?;
1452
1453 fs::remove_file(temp_tar_path).ok();
1454 for exe in &possible_executables {
1455 let clang_exe = wasi_sdk_dir.join("bin").join(exe);
1456 if clang_exe.exists() {
1457 return Ok(clang_exe);
1458 }
1459 }
1460
1461 Err(LoaderError::WasiSDKClang(WasiSDKClangError {
1462 wasi_sdk_dir: wasi_sdk_dir.to_string_lossy().to_string(),
1463 possible_executables,
1464 download: true,
1465 }))
1466 }
1467
1468 #[must_use]
1469 #[cfg(feature = "tree-sitter-highlight")]
1470 pub fn highlight_config_for_injection_string<'a>(
1471 &'a self,
1472 string: &str,
1473 ) -> Option<&'a HighlightConfiguration> {
1474 match self.language_configuration_for_injection_string(string) {
1475 Err(e) => {
1476 error!("Failed to load language for injection string '{string}': {e}",);
1477 None
1478 }
1479 Ok(None) => None,
1480 Ok(Some((language, configuration))) => {
1481 match configuration.highlight_config(language, None) {
1482 Err(e) => {
1483 error!(
1484 "Failed to load higlight config for injection string '{string}': {e}"
1485 );
1486 None
1487 }
1488 Ok(None) => None,
1489 Ok(Some(config)) => Some(config),
1490 }
1491 }
1492 }
1493 }
1494
1495 #[must_use]
1496 pub fn get_language_configuration_in_current_path(&self) -> Option<&LanguageConfiguration> {
1497 self.language_configuration_in_current_path
1498 .map(|i| &self.language_configurations[i])
1499 }
1500
1501 pub fn find_language_configurations_at_path(
1502 &mut self,
1503 parser_path: &Path,
1504 set_current_path_config: bool,
1505 ) -> LoaderResult<&[LanguageConfiguration]> {
1506 let initial_language_configuration_count = self.language_configurations.len();
1507
1508 match TreeSitterJSON::from_file(parser_path) {
1509 Ok(config) => {
1510 let language_count = self.languages_by_id.len();
1511 for grammar in config.grammars {
1512 let language_path =
1516 parser_path.join(grammar.path.unwrap_or(PathBuf::from(".")));
1517
1518 let mut language_id = None;
1521 for (id, (path, _, _)) in
1522 self.languages_by_id.iter().enumerate().skip(language_count)
1523 {
1524 if language_path == *path {
1525 language_id = Some(id);
1526 }
1527 }
1528
1529 let language_id = if let Some(language_id) = language_id {
1531 language_id
1532 } else {
1533 self.languages_by_id.push((
1534 language_path,
1535 OnceCell::new(),
1536 grammar
1537 .external_files
1538 .clone()
1539 .into_vec()
1540 .map(|files| {
1541 files
1542 .into_iter()
1543 .map(|path| {
1544 let path = parser_path.join(path);
1545 if path.starts_with(parser_path) {
1547 Ok(path)
1548 } else {
1549 Err(LoaderError::ExternalFile(
1550 path.to_string_lossy().to_string(),
1551 parser_path.to_string_lossy().to_string(),
1552 ))
1553 }
1554 })
1555 .collect::<LoaderResult<Vec<_>>>()
1556 })
1557 .transpose()?,
1558 ));
1559 self.languages_by_id.len() - 1
1560 };
1561
1562 let configuration = LanguageConfiguration {
1563 root_path: parser_path.to_path_buf(),
1564 language_name: grammar.name,
1565 scope: Some(grammar.scope),
1566 language_id,
1567 file_types: grammar.file_types.unwrap_or_default(),
1568 content_regex: Self::regex(grammar.content_regex.as_deref()),
1569 first_line_regex: Self::regex(grammar.first_line_regex.as_deref()),
1570 injection_regex: Self::regex(grammar.injection_regex.as_deref()),
1571 injections_filenames: grammar.injections.into_vec(),
1572 locals_filenames: grammar.locals.into_vec(),
1573 tags_filenames: grammar.tags.into_vec(),
1574 highlights_filenames: grammar.highlights.into_vec(),
1575 #[cfg(feature = "tree-sitter-highlight")]
1576 highlight_config: OnceCell::new(),
1577 #[cfg(feature = "tree-sitter-tags")]
1578 tags_config: OnceCell::new(),
1579 #[cfg(feature = "tree-sitter-highlight")]
1580 highlight_names: &self.highlight_names,
1581 #[cfg(feature = "tree-sitter-highlight")]
1582 use_all_highlight_names: self.use_all_highlight_names,
1583 _phantom: PhantomData,
1584 };
1585
1586 for file_type in &configuration.file_types {
1587 self.language_configuration_ids_by_file_type
1588 .entry(file_type.clone())
1589 .or_default()
1590 .push(self.language_configurations.len());
1591 }
1592 if let Some(first_line_regex) = &configuration.first_line_regex {
1593 self.language_configuration_ids_by_first_line_regex
1594 .entry(first_line_regex.to_string())
1595 .or_default()
1596 .push(self.language_configurations.len());
1597 }
1598
1599 self.language_configurations.push(unsafe {
1600 mem::transmute::<LanguageConfiguration<'_>, LanguageConfiguration<'static>>(
1601 configuration,
1602 )
1603 });
1604
1605 if set_current_path_config
1606 && self.language_configuration_in_current_path.is_none()
1607 {
1608 self.language_configuration_in_current_path =
1609 Some(self.language_configurations.len() - 1);
1610 }
1611 }
1612 }
1613 Err(LoaderError::Serialization(e)) => {
1614 warn!(
1615 "Failed to parse {} -- {e}",
1616 parser_path.join("tree-sitter.json").display()
1617 );
1618 }
1619 _ => {}
1620 }
1621
1622 if self.language_configurations.len() == initial_language_configuration_count
1626 && parser_path.join("src").join("grammar.json").exists()
1627 {
1628 let grammar_path = parser_path.join("src").join("grammar.json");
1629 let language_name = Self::grammar_json_name(&grammar_path)?;
1630 let configuration = LanguageConfiguration {
1631 root_path: parser_path.to_owned(),
1632 language_name,
1633 language_id: self.languages_by_id.len(),
1634 file_types: Vec::new(),
1635 scope: None,
1636 content_regex: None,
1637 first_line_regex: None,
1638 injection_regex: None,
1639 injections_filenames: None,
1640 locals_filenames: None,
1641 highlights_filenames: None,
1642 tags_filenames: None,
1643 #[cfg(feature = "tree-sitter-highlight")]
1644 highlight_config: OnceCell::new(),
1645 #[cfg(feature = "tree-sitter-tags")]
1646 tags_config: OnceCell::new(),
1647 #[cfg(feature = "tree-sitter-highlight")]
1648 highlight_names: &self.highlight_names,
1649 #[cfg(feature = "tree-sitter-highlight")]
1650 use_all_highlight_names: self.use_all_highlight_names,
1651 _phantom: PhantomData,
1652 };
1653 self.language_configurations.push(unsafe {
1654 mem::transmute::<LanguageConfiguration<'_>, LanguageConfiguration<'static>>(
1655 configuration,
1656 )
1657 });
1658 self.languages_by_id
1659 .push((parser_path.to_owned(), OnceCell::new(), None));
1660 }
1661
1662 Ok(&self.language_configurations[initial_language_configuration_count..])
1663 }
1664
1665 fn regex(pattern: Option<&str>) -> Option<Regex> {
1666 pattern.and_then(|r| RegexBuilder::new(r).multi_line(true).build().ok())
1667 }
1668
1669 fn grammar_json_name(grammar_path: &Path) -> LoaderResult<String> {
1670 let file = fs::File::open(grammar_path)
1671 .map_err(|e| LoaderError::IO(IoError::new(e, Some(grammar_path))))?;
1672
1673 let first_three_lines = BufReader::new(file)
1674 .lines()
1675 .take(3)
1676 .collect::<Result<Vec<_>, std::io::Error>>()
1677 .map_err(|_| LoaderError::GrammarJSON(grammar_path.to_string_lossy().to_string()))?
1678 .join("\n");
1679
1680 let name = GRAMMAR_NAME_REGEX
1681 .captures(&first_three_lines)
1682 .and_then(|c| c.get(1))
1683 .ok_or_else(|| LoaderError::GrammarJSON(grammar_path.to_string_lossy().to_string()))?;
1684
1685 Ok(name.as_str().to_string())
1686 }
1687
1688 pub fn select_language(
1689 &mut self,
1690 path: Option<&Path>,
1691 current_dir: &Path,
1692 scope: Option<&str>,
1693 lib_info: Option<&(PathBuf, &str)>,
1695 ) -> LoaderResult<Language> {
1696 if let Some((ref lib_path, language_name)) = lib_info {
1697 let language_fn_name = format!("tree_sitter_{}", language_name.replace('-', "_"));
1698 Self::load_language(lib_path, &language_fn_name)
1699 } else if let Some(scope) = scope {
1700 if let Some(config) = self
1701 .language_configuration_for_scope(scope)
1702 .map_err(|e| LoaderError::ScopeLoad(scope.to_string(), Box::new(e)))?
1703 {
1704 Ok(config.0)
1705 } else {
1706 Err(LoaderError::UnknownScope(scope.to_string()))
1707 }
1708 } else if let Some((lang, _)) = if let Some(path) = path {
1709 self.language_configuration_for_file_name(path)
1710 .map_err(|e| {
1711 LoaderError::FileNameLoad(
1712 path.file_name().unwrap().to_string_lossy().to_string(),
1713 Box::new(e),
1714 )
1715 })?
1716 } else {
1717 None
1718 } {
1719 Ok(lang)
1720 } else if let Some(id) = self.language_configuration_in_current_path {
1721 Ok(self.language_for_id(self.language_configurations[id].language_id)?)
1722 } else if let Some(lang) = self
1723 .languages_at_path(current_dir)
1724 .map_err(|e| LoaderError::CurrentDirectoryLoad(Box::new(e)))?
1725 .first()
1726 .cloned()
1727 {
1728 Ok(lang.0)
1729 } else if let Some(lang) = if let Some(path) = path {
1730 self.language_configuration_for_first_line_regex(path)?
1731 } else {
1732 None
1733 } {
1734 Ok(lang.0)
1735 } else {
1736 Err(LoaderError::NoLanguage)
1737 }
1738 }
1739
1740 pub const fn debug_build(&mut self, flag: bool) {
1741 self.debug_build = flag;
1742 }
1743
1744 pub const fn sanitize_build(&mut self, flag: bool) {
1745 self.sanitize_build = flag;
1746 }
1747
1748 pub const fn force_rebuild(&mut self, rebuild: bool) {
1749 self.force_rebuild = rebuild;
1750 }
1751
1752 #[cfg(feature = "wasm")]
1753 #[cfg_attr(docsrs, doc(cfg(feature = "wasm")))]
1754 pub fn use_wasm(&mut self, engine: &tree_sitter::wasmtime::Engine) {
1755 *self.wasm_store.lock().unwrap() = Some(tree_sitter::WasmStore::new(engine).unwrap());
1756 }
1757
1758 #[must_use]
1759 pub fn get_scanner_path(&self, src_path: &Path) -> Option<PathBuf> {
1760 let path = src_path.join("scanner.c");
1761 path.exists().then_some(path)
1762 }
1763}
1764
1765impl LanguageConfiguration<'_> {
1766 #[cfg(feature = "tree-sitter-highlight")]
1767 pub fn highlight_config(
1768 &self,
1769 language: Language,
1770 paths: Option<&[PathBuf]>,
1771 ) -> LoaderResult<Option<&HighlightConfiguration>> {
1772 let (highlights_filenames, injections_filenames, locals_filenames) = match paths {
1773 Some(paths) => (
1774 Some(
1775 paths
1776 .iter()
1777 .filter(|p| p.ends_with(DEFAULT_HIGHLIGHTS_QUERY_FILE_NAME))
1778 .cloned()
1779 .collect::<Vec<_>>(),
1780 ),
1781 Some(
1782 paths
1783 .iter()
1784 .filter(|p| p.ends_with(DEFAULT_TAGS_QUERY_FILE_NAME))
1785 .cloned()
1786 .collect::<Vec<_>>(),
1787 ),
1788 Some(
1789 paths
1790 .iter()
1791 .filter(|p| p.ends_with(DEFAULT_LOCALS_QUERY_FILE_NAME))
1792 .cloned()
1793 .collect::<Vec<_>>(),
1794 ),
1795 ),
1796 None => (None, None, None),
1797 };
1798 self.highlight_config
1799 .get_or_try_init(|| {
1800 let (highlights_query, highlight_ranges) = self.read_queries(
1801 if highlights_filenames.is_some() {
1802 highlights_filenames.as_deref()
1803 } else {
1804 self.highlights_filenames.as_deref()
1805 },
1806 DEFAULT_HIGHLIGHTS_QUERY_FILE_NAME,
1807 )?;
1808 let (injections_query, injection_ranges) = self.read_queries(
1809 if injections_filenames.is_some() {
1810 injections_filenames.as_deref()
1811 } else {
1812 self.injections_filenames.as_deref()
1813 },
1814 DEFAULT_INJECTIONS_QUERY_FILE_NAME,
1815 )?;
1816 let (locals_query, locals_ranges) = self.read_queries(
1817 if locals_filenames.is_some() {
1818 locals_filenames.as_deref()
1819 } else {
1820 self.locals_filenames.as_deref()
1821 },
1822 DEFAULT_LOCALS_QUERY_FILE_NAME,
1823 )?;
1824
1825 if highlights_query.is_empty() {
1826 Ok(None)
1827 } else {
1828 let mut result = HighlightConfiguration::new(
1829 language,
1830 &self.language_name,
1831 &highlights_query,
1832 &injections_query,
1833 &locals_query,
1834 )
1835 .map_err(|error| match error.kind {
1836 QueryErrorKind::Language => {
1837 LoaderError::Query(LoaderQueryError { error, file: None })
1838 }
1839 _ => {
1840 if error.offset < injections_query.len() {
1841 Self::include_path_in_query_error(
1842 error,
1843 &injection_ranges,
1844 &injections_query,
1845 0,
1846 )
1847 } else if error.offset < injections_query.len() + locals_query.len() {
1848 Self::include_path_in_query_error(
1849 error,
1850 &locals_ranges,
1851 &locals_query,
1852 injections_query.len(),
1853 )
1854 } else {
1855 Self::include_path_in_query_error(
1856 error,
1857 &highlight_ranges,
1858 &highlights_query,
1859 injections_query.len() + locals_query.len(),
1860 )
1861 }
1862 }
1863 })?;
1864 let mut all_highlight_names = self.highlight_names.lock().unwrap();
1865 if self.use_all_highlight_names {
1866 for capture_name in result.query.capture_names() {
1867 if !all_highlight_names.iter().any(|x| x == capture_name) {
1868 all_highlight_names.push((*capture_name).to_string());
1869 }
1870 }
1871 }
1872 result.configure(all_highlight_names.as_slice());
1873 drop(all_highlight_names);
1874 Ok(Some(result))
1875 }
1876 })
1877 .map(Option::as_ref)
1878 }
1879
1880 #[cfg(feature = "tree-sitter-tags")]
1881 pub fn tags_config(&self, language: Language) -> LoaderResult<Option<&TagsConfiguration>> {
1882 self.tags_config
1883 .get_or_try_init(|| {
1884 let (tags_query, tags_ranges) = self
1885 .read_queries(self.tags_filenames.as_deref(), DEFAULT_TAGS_QUERY_FILE_NAME)?;
1886 let (locals_query, locals_ranges) = self.read_queries(
1887 self.locals_filenames.as_deref(),
1888 DEFAULT_LOCALS_QUERY_FILE_NAME,
1889 )?;
1890 if tags_query.is_empty() {
1891 Ok(None)
1892 } else {
1893 TagsConfiguration::new(language, &tags_query, &locals_query)
1894 .map(Some)
1895 .map_err(|error| {
1896 if let TagsError::Query(error) = error {
1897 if error.offset < locals_query.len() {
1898 Self::include_path_in_query_error(
1899 error,
1900 &locals_ranges,
1901 &locals_query,
1902 0,
1903 )
1904 } else {
1905 Self::include_path_in_query_error(
1906 error,
1907 &tags_ranges,
1908 &tags_query,
1909 locals_query.len(),
1910 )
1911 }
1912 } else {
1913 error.into()
1914 }
1915 })
1916 }
1917 })
1918 .map(Option::as_ref)
1919 }
1920
1921 #[cfg(any(feature = "tree-sitter-highlight", feature = "tree-sitter-tags"))]
1922 fn include_path_in_query_error(
1923 mut error: QueryError,
1924 ranges: &[(PathBuf, Range<usize>)],
1925 source: &str,
1926 start_offset: usize,
1927 ) -> LoaderError {
1928 let offset_within_section = error.offset - start_offset;
1929 let (path, range) = ranges
1930 .iter()
1931 .find(|(_, range)| range.contains(&offset_within_section))
1932 .unwrap_or_else(|| ranges.last().unwrap());
1933 error.offset = offset_within_section - range.start;
1934 error.row = source[range.start..offset_within_section]
1935 .matches('\n')
1936 .count();
1937 LoaderError::Query(LoaderQueryError {
1938 error,
1939 file: Some(path.to_string_lossy().to_string()),
1940 })
1941 }
1942
1943 #[allow(clippy::type_complexity)]
1944 #[cfg(any(feature = "tree-sitter-highlight", feature = "tree-sitter-tags"))]
1945 fn read_queries(
1946 &self,
1947 paths: Option<&[PathBuf]>,
1948 default_path: &str,
1949 ) -> LoaderResult<(String, Vec<(PathBuf, Range<usize>)>)> {
1950 let mut query = String::new();
1951 let mut path_ranges = Vec::new();
1952 if let Some(paths) = paths {
1953 for path in paths {
1954 let abs_path = self.root_path.join(path);
1955 let prev_query_len = query.len();
1956 query += &fs::read_to_string(&abs_path)
1957 .map_err(|e| LoaderError::IO(IoError::new(e, Some(abs_path.as_path()))))?;
1958 path_ranges.push((path.clone(), prev_query_len..query.len()));
1959 }
1960 } else {
1961 if default_path == DEFAULT_HIGHLIGHTS_QUERY_FILE_NAME
1963 || default_path == DEFAULT_TAGS_QUERY_FILE_NAME
1964 {
1965 warn!(
1966 concat!(
1967 "You should add a `{}` entry pointing to the {} path in the `tree-sitter` ",
1968 "object in the grammar's tree-sitter.json file. See more here: ",
1969 "https://tree-sitter.github.io/tree-sitter/3-syntax-highlighting#query-paths"
1970 ),
1971 default_path.replace(".scm", ""),
1972 default_path
1973 );
1974 }
1975 let queries_path = self.root_path.join("queries");
1976 let path = queries_path.join(default_path);
1977 if path.exists() {
1978 query = fs::read_to_string(&path)
1979 .map_err(|e| LoaderError::IO(IoError::new(e, Some(path.as_path()))))?;
1980 path_ranges.push((PathBuf::from(default_path), 0..query.len()));
1981 }
1982 }
1983
1984 Ok((query, path_ranges))
1985 }
1986}
1987
1988fn needs_recompile(lib_path: &Path, paths_to_check: &[PathBuf]) -> LoaderResult<bool> {
1989 if !lib_path.exists() {
1990 return Ok(true);
1991 }
1992 let lib_mtime = mtime(lib_path).map_err(|e| LoaderError::ModifiedTime(Box::new(e)))?;
1993 for path in paths_to_check {
1994 if mtime(path).map_err(|e| LoaderError::ModifiedTime(Box::new(e)))? > lib_mtime {
1995 return Ok(true);
1996 }
1997 }
1998 Ok(false)
1999}
2000
2001fn mtime(path: &Path) -> LoaderResult<SystemTime> {
2002 fs::metadata(path)
2003 .map_err(|e| LoaderError::IO(IoError::new(e, Some(path))))?
2004 .modified()
2005 .map_err(|e| LoaderError::IO(IoError::new(e, Some(path))))
2006}