1#![cfg_attr(not(any(test, doctest)), doc = include_str!("../README.md"))]
2#![cfg_attr(docsrs, feature(doc_cfg))]
3
4#[cfg(unix)]
5use std::fmt::Write as _;
6#[cfg(any(feature = "tree-sitter-highlight", feature = "tree-sitter-tags"))]
7use std::ops::Range;
8#[cfg(feature = "tree-sitter-highlight")]
9use std::sync::Mutex;
10use std::{
11 collections::HashMap,
12 env, fs,
13 hash::{Hash as _, Hasher as _},
14 io::{BufRead, BufReader},
15 marker::PhantomData,
16 mem,
17 path::{Path, PathBuf},
18 process::Command,
19 sync::LazyLock,
20 time::{SystemTime, SystemTimeError},
21};
22
23use etcetera::BaseStrategy as _;
24use fs4::fs_std::FileExt;
25use libloading::{Library, Symbol};
26use log::{error, info, warn};
27use once_cell::unsync::OnceCell;
28use regex::{Regex, RegexBuilder};
29use semver::Version;
30use serde::{Deserialize, Deserializer, Serialize};
31use thiserror::Error;
32use tree_sitter::Language;
33#[cfg(any(feature = "tree-sitter-highlight", feature = "tree-sitter-tags"))]
34use tree_sitter::QueryError;
35#[cfg(feature = "tree-sitter-highlight")]
36use tree_sitter::QueryErrorKind;
37#[cfg(feature = "wasm")]
38use tree_sitter::WasmError;
39#[cfg(feature = "tree-sitter-highlight")]
40use tree_sitter_highlight::HighlightConfiguration;
41#[cfg(feature = "tree-sitter-tags")]
42use tree_sitter_tags::{Error as TagsError, TagsConfiguration};
43
44static GRAMMAR_NAME_REGEX: LazyLock<Regex> =
45 LazyLock::new(|| Regex::new(r#""name":\s*"(.*?)""#).unwrap());
46
47const WASI_SDK_VERSION: &str = include_str!("../wasi-sdk-version").trim_ascii();
48
49pub type LoaderResult<T> = Result<T, LoaderError>;
50
51#[derive(Debug, Error)]
52pub enum LoaderError {
53 #[error(transparent)]
54 Compiler(CompilerError),
55 #[error("Parser compilation failed.\nStdout: {0}\nStderr: {1}")]
56 Compilation(String, String),
57 #[error("Failed to execute curl for {0} -- {1}")]
58 Curl(String, std::io::Error),
59 #[error("Failed to load language in current directory:\n{0}")]
60 CurrentDirectoryLoad(Box<Self>),
61 #[error("External file path {0} is outside of parser directory {1}")]
62 ExternalFile(String, String),
63 #[error("Failed to extract archive {0} to {1}")]
64 Extraction(String, String),
65 #[error("Failed to load language for file name {0}:\n{1}")]
66 FileNameLoad(String, Box<Self>),
67 #[error("Failed to parse the language name from grammar.json at {0}")]
68 GrammarJSON(String),
69 #[error(transparent)]
70 HomeDir(#[from] etcetera::HomeDirError),
71 #[error(transparent)]
72 IO(IoError),
73 #[error(transparent)]
74 Library(LibraryError),
75 #[error("Failed to compare binary and source timestamps:\n{0}")]
76 ModifiedTime(Box<Self>),
77 #[error("No language found")]
78 NoLanguage,
79 #[error(transparent)]
80 Query(LoaderQueryError),
81 #[error("Failed to load language for scope '{0}':\n{1}")]
82 ScopeLoad(String, Box<Self>),
83 #[error(transparent)]
84 Serialization(#[from] serde_json::Error),
85 #[error(transparent)]
86 Symbol(SymbolError),
87 #[error(transparent)]
88 Tags(#[from] TagsError),
89 #[error("Failed to execute tar for {0} -- {1}")]
90 Tar(String, std::io::Error),
91 #[error(transparent)]
92 Time(#[from] SystemTimeError),
93 #[error("Unknown scope '{0}'")]
94 UnknownScope(String),
95 #[error("Failed to download wasi-sdk from {0}")]
96 WasiSDKDownload(String),
97 #[error(transparent)]
98 WasiSDKClang(#[from] WasiSDKClangError),
99 #[error("Unsupported platform for wasi-sdk")]
100 WasiSDKPlatform,
101 #[cfg(feature = "wasm")]
102 #[error(transparent)]
103 Wasm(#[from] WasmError),
104 #[error("Failed to run wasi-sdk clang -- {0}")]
105 WasmCompiler(std::io::Error),
106 #[error("wasi-sdk clang command failed: {0}")]
107 WasmCompilation(String),
108}
109
110#[derive(Debug, Error)]
111pub struct CompilerError {
112 pub error: std::io::Error,
113 pub command: Box<Command>,
114}
115
116impl std::fmt::Display for CompilerError {
117 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
118 write!(
119 f,
120 "Failed to execute the C compiler with the following command:\n{:?}\nError: {}",
121 *self.command, self.error
122 )?;
123 Ok(())
124 }
125}
126
127#[derive(Debug, Error)]
128pub struct IoError {
129 pub error: std::io::Error,
130 pub path: Option<String>,
131}
132
133impl IoError {
134 fn new(error: std::io::Error, path: Option<&Path>) -> Self {
135 Self {
136 error,
137 path: path.map(|p| p.to_string_lossy().to_string()),
138 }
139 }
140}
141
142impl std::fmt::Display for IoError {
143 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
144 write!(f, "{}", self.error)?;
145 if let Some(ref path) = self.path {
146 write!(f, " ({path})")?;
147 }
148 Ok(())
149 }
150}
151
152#[derive(Debug, Error)]
153pub struct LibraryError {
154 pub error: libloading::Error,
155 pub path: String,
156}
157
158impl std::fmt::Display for LibraryError {
159 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
160 write!(
161 f,
162 "Error opening dynamic library {} -- {}",
163 self.path, self.error
164 )?;
165 Ok(())
166 }
167}
168
169#[derive(Debug, Error)]
170pub struct LoaderQueryError {
171 pub error: QueryError,
172 pub file: Option<String>,
173}
174
175impl std::fmt::Display for LoaderQueryError {
176 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
177 if let Some(ref path) = self.file {
178 writeln!(f, "Error in query file {path}:")?;
179 }
180 write!(f, "{}", self.error)?;
181 Ok(())
182 }
183}
184
185#[derive(Debug, Error)]
186pub struct SymbolError {
187 pub error: libloading::Error,
188 pub symbol_name: String,
189 pub path: String,
190}
191
192impl std::fmt::Display for SymbolError {
193 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
194 write!(
195 f,
196 "Failed to load symbol {} from {} -- {}",
197 self.symbol_name, self.path, self.error
198 )?;
199 Ok(())
200 }
201}
202
203#[derive(Debug, Error)]
204pub struct WasiSDKClangError {
205 pub wasi_sdk_dir: String,
206 pub possible_executables: Vec<&'static str>,
207 pub download: bool,
208}
209
210impl std::fmt::Display for WasiSDKClangError {
211 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
212 if self.download {
213 write!(
214 f,
215 "Failed to find clang executable in downloaded wasi-sdk at '{}'.",
216 self.wasi_sdk_dir
217 )?;
218 } else {
219 write!(f, "TREE_SITTER_WASI_SDK_PATH is set to '{}', but no clang executable found in 'bin/' directory.", self.wasi_sdk_dir)?;
220 }
221
222 let possible_exes = self.possible_executables.join(", ");
223 write!(f, " Looked for: {possible_exes}.")?;
224
225 Ok(())
226 }
227}
228
229pub const DEFAULT_HIGHLIGHTS_QUERY_FILE_NAME: &str = "highlights.scm";
230
231pub const DEFAULT_INJECTIONS_QUERY_FILE_NAME: &str = "injections.scm";
232
233pub const DEFAULT_LOCALS_QUERY_FILE_NAME: &str = "locals.scm";
234
235pub const DEFAULT_TAGS_QUERY_FILE_NAME: &str = "tags.scm";
236
237#[derive(Default, Deserialize, Serialize)]
238pub struct Config {
239 #[serde(default)]
240 #[serde(
241 rename = "parser-directories",
242 deserialize_with = "deserialize_parser_directories"
243 )]
244 pub parser_directories: Vec<PathBuf>,
245}
246
247#[derive(Serialize, Deserialize, Clone, Default)]
248#[serde(untagged)]
249pub enum PathsJSON {
250 #[default]
251 Empty,
252 Single(PathBuf),
253 Multiple(Vec<PathBuf>),
254}
255
256impl PathsJSON {
257 fn into_vec(self) -> Option<Vec<PathBuf>> {
258 match self {
259 Self::Empty => None,
260 Self::Single(s) => Some(vec![s]),
261 Self::Multiple(s) => Some(s),
262 }
263 }
264
265 const fn is_empty(&self) -> bool {
266 matches!(self, Self::Empty)
267 }
268
269 #[must_use]
271 pub fn to_variable_value<'a>(&'a self, default: &'a PathBuf) -> &'a str {
272 match self {
273 Self::Empty => Some(default),
274 Self::Single(path_buf) => Some(path_buf),
275 Self::Multiple(paths) => paths.first(),
276 }
277 .map_or("", |path| path.as_os_str().to_str().unwrap_or(""))
278 }
279}
280
281#[derive(Serialize, Deserialize, Clone)]
282#[serde(untagged)]
283pub enum PackageJSONAuthor {
284 String(String),
285 Object {
286 name: String,
287 email: Option<String>,
288 url: Option<String>,
289 },
290}
291
292#[derive(Serialize, Deserialize, Clone)]
293#[serde(untagged)]
294pub enum PackageJSONRepository {
295 String(String),
296 Object { url: String },
297}
298
299#[derive(Serialize, Deserialize)]
300pub struct PackageJSON {
301 pub name: String,
302 pub version: Version,
303 pub description: Option<String>,
304 pub author: Option<PackageJSONAuthor>,
305 pub maintainers: Option<Vec<PackageJSONAuthor>>,
306 pub license: Option<String>,
307 pub repository: Option<PackageJSONRepository>,
308 #[serde(default)]
309 #[serde(rename = "tree-sitter", skip_serializing_if = "Option::is_none")]
310 pub tree_sitter: Option<Vec<LanguageConfigurationJSON>>,
311}
312
313fn default_path() -> PathBuf {
314 PathBuf::from(".")
315}
316
317#[derive(Serialize, Deserialize, Clone)]
318#[serde(rename_all = "kebab-case")]
319pub struct LanguageConfigurationJSON {
320 #[serde(default = "default_path")]
321 pub path: PathBuf,
322 pub scope: Option<String>,
323 pub file_types: Option<Vec<String>>,
324 pub content_regex: Option<String>,
325 pub first_line_regex: Option<String>,
326 pub injection_regex: Option<String>,
327 #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
328 pub highlights: PathsJSON,
329 #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
330 pub injections: PathsJSON,
331 #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
332 pub locals: PathsJSON,
333 #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
334 pub tags: PathsJSON,
335 #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
336 pub external_files: PathsJSON,
337}
338
339#[derive(Serialize, Deserialize)]
340#[serde(rename_all = "kebab-case")]
341pub struct TreeSitterJSON {
342 #[serde(rename = "$schema")]
343 pub schema: Option<String>,
344 pub grammars: Vec<Grammar>,
345 pub metadata: Metadata,
346 #[serde(default)]
347 pub bindings: Bindings,
348}
349
350impl TreeSitterJSON {
351 pub fn from_file(path: &Path) -> LoaderResult<Self> {
352 let path = path.join("tree-sitter.json");
353 Ok(serde_json::from_str(&fs::read_to_string(&path).map_err(
354 |e| LoaderError::IO(IoError::new(e, Some(path.as_path()))),
355 )?)?)
356 }
357
358 #[must_use]
359 pub fn has_multiple_language_configs(&self) -> bool {
360 self.grammars.len() > 1
361 }
362}
363
364#[derive(Serialize, Deserialize)]
365#[serde(rename_all = "kebab-case")]
366pub struct Grammar {
367 pub name: String,
368 #[serde(skip_serializing_if = "Option::is_none")]
369 pub camelcase: Option<String>,
370 #[serde(skip_serializing_if = "Option::is_none")]
371 pub title: Option<String>,
372 pub scope: String,
373 #[serde(skip_serializing_if = "Option::is_none")]
374 pub path: Option<PathBuf>,
375 #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
376 pub external_files: PathsJSON,
377 pub file_types: Option<Vec<String>>,
378 #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
379 pub highlights: PathsJSON,
380 #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
381 pub injections: PathsJSON,
382 #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
383 pub locals: PathsJSON,
384 #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
385 pub tags: PathsJSON,
386 #[serde(skip_serializing_if = "Option::is_none")]
387 pub injection_regex: Option<String>,
388 #[serde(skip_serializing_if = "Option::is_none")]
389 pub first_line_regex: Option<String>,
390 #[serde(skip_serializing_if = "Option::is_none")]
391 pub content_regex: Option<String>,
392 #[serde(skip_serializing_if = "Option::is_none")]
393 pub class_name: Option<String>,
394}
395
396#[derive(Serialize, Deserialize)]
397pub struct Metadata {
398 pub version: Version,
399 #[serde(skip_serializing_if = "Option::is_none")]
400 pub license: Option<String>,
401 #[serde(skip_serializing_if = "Option::is_none")]
402 pub description: Option<String>,
403 #[serde(skip_serializing_if = "Option::is_none")]
404 pub authors: Option<Vec<Author>>,
405 #[serde(skip_serializing_if = "Option::is_none")]
406 pub links: Option<Links>,
407 #[serde(skip)]
408 pub namespace: Option<String>,
409}
410
411#[derive(Serialize, Deserialize)]
412pub struct Author {
413 pub name: String,
414 #[serde(skip_serializing_if = "Option::is_none")]
415 pub email: Option<String>,
416 #[serde(skip_serializing_if = "Option::is_none")]
417 pub url: Option<String>,
418}
419
420#[derive(Serialize, Deserialize)]
421pub struct Links {
422 pub repository: String,
423 #[serde(skip_serializing_if = "Option::is_none")]
424 pub funding: Option<String>,
425}
426
427#[derive(Serialize, Deserialize, Clone)]
428#[serde(default)]
429pub struct Bindings {
430 pub c: bool,
431 pub go: bool,
432 pub java: bool,
433 #[serde(skip)]
434 pub kotlin: bool,
435 pub node: bool,
436 pub python: bool,
437 pub rust: bool,
438 pub swift: bool,
439 pub zig: bool,
440}
441
442impl Bindings {
443 #[must_use]
445 pub const fn languages(&self) -> [(&'static str, bool); 8] {
446 [
447 ("c", true),
448 ("go", true),
449 ("java", false),
450 ("node", true),
453 ("python", true),
454 ("rust", true),
455 ("swift", true),
456 ("zig", false),
457 ]
458 }
459
460 pub fn with_enabled_languages<'a, I>(languages: I) -> Result<Self, &'a str>
462 where
463 I: Iterator<Item = &'a str>,
464 {
465 let mut out = Self {
466 c: false,
467 go: false,
468 java: false,
469 kotlin: false,
470 node: false,
471 python: false,
472 rust: false,
473 swift: false,
474 zig: false,
475 };
476
477 for v in languages {
478 match v {
479 "c" => out.c = true,
480 "go" => out.go = true,
481 "java" => out.java = true,
482 "node" => out.node = true,
485 "python" => out.python = true,
486 "rust" => out.rust = true,
487 "swift" => out.swift = true,
488 "zig" => out.zig = true,
489 unsupported => return Err(unsupported),
490 }
491 }
492
493 Ok(out)
494 }
495}
496
497impl Default for Bindings {
498 fn default() -> Self {
499 Self {
500 c: true,
501 go: true,
502 java: false,
503 kotlin: false,
504 node: true,
505 python: true,
506 rust: true,
507 swift: true,
508 zig: false,
509 }
510 }
511}
512
513fn deserialize_parser_directories<'de, D>(deserializer: D) -> Result<Vec<PathBuf>, D::Error>
517where
518 D: Deserializer<'de>,
519{
520 let paths = Vec::<PathBuf>::deserialize(deserializer)?;
521 let Ok(home) = etcetera::home_dir() else {
522 return Ok(paths);
523 };
524 let standardized = paths
525 .into_iter()
526 .map(|path| standardize_path(path, &home))
527 .collect();
528 Ok(standardized)
529}
530
531fn standardize_path(path: PathBuf, home: &Path) -> PathBuf {
532 if let Ok(p) = path.strip_prefix("~") {
533 return home.join(p);
534 }
535 if let Ok(p) = path.strip_prefix("$HOME") {
536 return home.join(p);
537 }
538 path
539}
540
541impl Config {
542 #[must_use]
543 pub fn initial() -> Self {
544 let home_dir = etcetera::home_dir().expect("Cannot determine home directory");
545 Self {
546 parser_directories: vec![
547 home_dir.join("github"),
548 home_dir.join("src"),
549 home_dir.join("source"),
550 home_dir.join("projects"),
551 home_dir.join("dev"),
552 home_dir.join("git"),
553 ],
554 }
555 }
556}
557
558const BUILD_TARGET: &str = env!("BUILD_TARGET");
559
560pub struct LanguageConfiguration<'a> {
561 pub scope: Option<String>,
562 pub content_regex: Option<Regex>,
563 pub first_line_regex: Option<Regex>,
564 pub injection_regex: Option<Regex>,
565 pub file_types: Vec<String>,
566 pub root_path: PathBuf,
567 pub highlights_filenames: Option<Vec<PathBuf>>,
568 pub injections_filenames: Option<Vec<PathBuf>>,
569 pub locals_filenames: Option<Vec<PathBuf>>,
570 pub tags_filenames: Option<Vec<PathBuf>>,
571 pub language_name: String,
572 language_id: usize,
573 #[cfg(feature = "tree-sitter-highlight")]
574 highlight_config: OnceCell<Option<HighlightConfiguration>>,
575 #[cfg(feature = "tree-sitter-tags")]
576 tags_config: OnceCell<Option<TagsConfiguration>>,
577 #[cfg(feature = "tree-sitter-highlight")]
578 highlight_names: &'a Mutex<Vec<String>>,
579 #[cfg(feature = "tree-sitter-highlight")]
580 use_all_highlight_names: bool,
581 _phantom: PhantomData<&'a ()>,
582}
583
584pub struct Loader {
585 pub parser_lib_path: PathBuf,
586 languages_by_id: Vec<(PathBuf, OnceCell<Language>, Option<Vec<PathBuf>>)>,
587 language_configurations: Vec<LanguageConfiguration<'static>>,
588 language_configuration_ids_by_file_type: HashMap<String, Vec<usize>>,
589 language_configuration_in_current_path: Option<usize>,
590 language_configuration_ids_by_first_line_regex: HashMap<String, Vec<usize>>,
591 #[cfg(feature = "tree-sitter-highlight")]
592 highlight_names: Box<Mutex<Vec<String>>>,
593 #[cfg(feature = "tree-sitter-highlight")]
594 use_all_highlight_names: bool,
595 debug_build: bool,
596 sanitize_build: bool,
597 force_rebuild: bool,
598
599 #[cfg(feature = "wasm")]
600 wasm_store: Mutex<Option<tree_sitter::WasmStore>>,
601}
602
603pub struct CompileConfig<'a> {
604 pub src_path: &'a Path,
605 pub header_paths: Vec<&'a Path>,
606 pub parser_path: PathBuf,
607 pub scanner_path: Option<PathBuf>,
608 pub external_files: Option<&'a [PathBuf]>,
609 pub output_path: Option<PathBuf>,
610 pub flags: &'a [&'a str],
611 pub sanitize: bool,
612 pub name: String,
613}
614
615impl<'a> CompileConfig<'a> {
616 #[must_use]
617 pub fn new(
618 src_path: &'a Path,
619 externals: Option<&'a [PathBuf]>,
620 output_path: Option<PathBuf>,
621 ) -> Self {
622 Self {
623 src_path,
624 header_paths: vec![src_path],
625 parser_path: src_path.join("parser.c"),
626 scanner_path: None,
627 external_files: externals,
628 output_path,
629 flags: &[],
630 sanitize: false,
631 name: String::new(),
632 }
633 }
634}
635
636unsafe impl Sync for Loader {}
637
638impl Loader {
639 pub fn new() -> LoaderResult<Self> {
640 let parser_lib_path = if let Ok(path) = env::var("TREE_SITTER_LIBDIR") {
641 PathBuf::from(path)
642 } else {
643 if cfg!(target_os = "macos") {
644 let legacy_apple_path = etcetera::base_strategy::Apple::new()?
645 .cache_dir() .join("tree-sitter");
647 if legacy_apple_path.exists() && legacy_apple_path.is_dir() {
648 std::fs::remove_dir_all(&legacy_apple_path).map_err(|e| {
649 LoaderError::IO(IoError::new(e, Some(legacy_apple_path.as_path())))
650 })?;
651 }
652 }
653
654 etcetera::choose_base_strategy()?
655 .cache_dir()
656 .join("tree-sitter")
657 .join("lib")
658 };
659 Ok(Self::with_parser_lib_path(parser_lib_path))
660 }
661
662 #[must_use]
663 pub fn with_parser_lib_path(parser_lib_path: PathBuf) -> Self {
664 Self {
665 parser_lib_path,
666 languages_by_id: Vec::new(),
667 language_configurations: Vec::new(),
668 language_configuration_ids_by_file_type: HashMap::new(),
669 language_configuration_in_current_path: None,
670 language_configuration_ids_by_first_line_regex: HashMap::new(),
671 #[cfg(feature = "tree-sitter-highlight")]
672 highlight_names: Box::new(Mutex::new(Vec::new())),
673 #[cfg(feature = "tree-sitter-highlight")]
674 use_all_highlight_names: true,
675 debug_build: false,
676 sanitize_build: false,
677 force_rebuild: false,
678
679 #[cfg(feature = "wasm")]
680 wasm_store: Mutex::default(),
681 }
682 }
683
684 #[cfg(feature = "tree-sitter-highlight")]
685 #[cfg_attr(docsrs, doc(cfg(feature = "tree-sitter-highlight")))]
686 pub fn configure_highlights(&mut self, names: &[String]) {
687 self.use_all_highlight_names = false;
688 let mut highlights = self.highlight_names.lock().unwrap();
689 highlights.clear();
690 highlights.extend(names.iter().cloned());
691 }
692
693 #[must_use]
694 #[cfg(feature = "tree-sitter-highlight")]
695 #[cfg_attr(docsrs, doc(cfg(feature = "tree-sitter-highlight")))]
696 pub fn highlight_names(&self) -> Vec<String> {
697 self.highlight_names.lock().unwrap().clone()
698 }
699
700 pub fn find_all_languages(&mut self, config: &Config) -> LoaderResult<()> {
701 if config.parser_directories.is_empty() {
702 warn!(concat!(
703 "You have not configured any parser directories!\n",
704 "Please run `tree-sitter init-config` and edit the resulting\n",
705 "configuration file to indicate where we should look for\n",
706 "language grammars.\n"
707 ));
708 }
709 for parser_container_dir in &config.parser_directories {
710 if let Ok(entries) = fs::read_dir(parser_container_dir) {
711 for entry in entries {
712 let entry = entry.map_err(|e| LoaderError::IO(IoError::new(e, None)))?;
713 if let Some(parser_dir_name) = entry.file_name().to_str() {
714 if parser_dir_name.starts_with("tree-sitter-") {
715 self.find_language_configurations_at_path(
716 &parser_container_dir.join(parser_dir_name),
717 false,
718 )
719 .ok();
720 }
721 }
722 }
723 }
724 }
725 Ok(())
726 }
727
728 pub fn languages_at_path(&mut self, path: &Path) -> LoaderResult<Vec<(Language, String)>> {
729 if let Ok(configurations) = self.find_language_configurations_at_path(path, true) {
730 let mut language_ids = configurations
731 .iter()
732 .map(|c| (c.language_id, c.language_name.clone()))
733 .collect::<Vec<_>>();
734 language_ids.sort_unstable();
735 language_ids.dedup();
736 language_ids
737 .into_iter()
738 .map(|(id, name)| Ok((self.language_for_id(id)?, name)))
739 .collect::<LoaderResult<Vec<_>>>()
740 } else {
741 Ok(Vec::new())
742 }
743 }
744
745 #[must_use]
746 pub fn get_all_language_configurations(&self) -> Vec<(&LanguageConfiguration, &Path)> {
747 self.language_configurations
748 .iter()
749 .map(|c| (c, self.languages_by_id[c.language_id].0.as_ref()))
750 .collect()
751 }
752
753 pub fn language_configuration_for_scope(
754 &self,
755 scope: &str,
756 ) -> LoaderResult<Option<(Language, &LanguageConfiguration)>> {
757 for configuration in &self.language_configurations {
758 if configuration.scope.as_ref().is_some_and(|s| s == scope) {
759 let language = self.language_for_id(configuration.language_id)?;
760 return Ok(Some((language, configuration)));
761 }
762 }
763 Ok(None)
764 }
765
766 pub fn language_configuration_for_first_line_regex(
767 &self,
768 path: &Path,
769 ) -> LoaderResult<Option<(Language, &LanguageConfiguration)>> {
770 self.language_configuration_ids_by_first_line_regex
771 .iter()
772 .try_fold(None, |_, (regex, ids)| {
773 if let Some(regex) = Self::regex(Some(regex)) {
774 let file = fs::File::open(path)
775 .map_err(|e| LoaderError::IO(IoError::new(e, Some(path))))?;
776 let reader = BufReader::new(file);
777 let first_line = reader
778 .lines()
779 .next()
780 .transpose()
781 .map_err(|e| LoaderError::IO(IoError::new(e, Some(path))))?;
782 if let Some(first_line) = first_line {
783 if regex.is_match(&first_line) && !ids.is_empty() {
784 let configuration = &self.language_configurations[ids[0]];
785 let language = self.language_for_id(configuration.language_id)?;
786 return Ok(Some((language, configuration)));
787 }
788 }
789 }
790
791 Ok(None)
792 })
793 }
794
795 pub fn language_configuration_for_file_name(
796 &self,
797 path: &Path,
798 ) -> LoaderResult<Option<(Language, &LanguageConfiguration)>> {
799 let configuration_ids = path
802 .file_name()
803 .and_then(|n| n.to_str())
804 .and_then(|file_name| self.language_configuration_ids_by_file_type.get(file_name))
805 .or_else(|| {
806 let mut path = path.to_owned();
807 let mut extensions = Vec::with_capacity(2);
808 while let Some(extension) = path.extension() {
809 extensions.push(extension.to_str()?.to_string());
810 path = PathBuf::from(path.file_stem()?.to_os_string());
811 }
812 extensions.reverse();
813 (0..extensions.len())
816 .map(|i| extensions[i..].join("."))
817 .find_map(|key| self.language_configuration_ids_by_file_type.get(&key))
818 });
819
820 if let Some(configuration_ids) = configuration_ids {
821 if !configuration_ids.is_empty() {
822 let configuration = if configuration_ids.len() == 1 {
823 &self.language_configurations[configuration_ids[0]]
824 }
825 else {
828 let file_contents =
829 fs::read(path).map_err(|e| LoaderError::IO(IoError::new(e, Some(path))))?;
830 let file_contents = String::from_utf8_lossy(&file_contents);
831 let mut best_score = -2isize;
832 let mut best_configuration_id = None;
833 for configuration_id in configuration_ids {
834 let config = &self.language_configurations[*configuration_id];
835
836 let score;
839 if let Some(content_regex) = &config.content_regex {
840 if let Some(mat) = content_regex.find(&file_contents) {
841 score = (mat.end() - mat.start()) as isize;
842 }
843 else {
848 score = -1;
849 }
850 } else {
851 score = 0;
852 }
853 if score > best_score {
854 best_configuration_id = Some(*configuration_id);
855 best_score = score;
856 }
857 }
858
859 &self.language_configurations[best_configuration_id.unwrap()]
860 };
861
862 let language = self.language_for_id(configuration.language_id)?;
863 return Ok(Some((language, configuration)));
864 }
865 }
866
867 Ok(None)
868 }
869
870 pub fn language_configuration_for_injection_string(
871 &self,
872 string: &str,
873 ) -> LoaderResult<Option<(Language, &LanguageConfiguration)>> {
874 let mut best_match_length = 0;
875 let mut best_match_position = None;
876 for (i, configuration) in self.language_configurations.iter().enumerate() {
877 if let Some(injection_regex) = &configuration.injection_regex {
878 if let Some(mat) = injection_regex.find(string) {
879 let length = mat.end() - mat.start();
880 if length > best_match_length {
881 best_match_position = Some(i);
882 best_match_length = length;
883 }
884 }
885 }
886 }
887
888 if let Some(i) = best_match_position {
889 let configuration = &self.language_configurations[i];
890 let language = self.language_for_id(configuration.language_id)?;
891 Ok(Some((language, configuration)))
892 } else {
893 Ok(None)
894 }
895 }
896
897 pub fn language_for_configuration(
898 &self,
899 configuration: &LanguageConfiguration,
900 ) -> LoaderResult<Language> {
901 self.language_for_id(configuration.language_id)
902 }
903
904 fn language_for_id(&self, id: usize) -> LoaderResult<Language> {
905 let (path, language, externals) = &self.languages_by_id[id];
906 language
907 .get_or_try_init(|| {
908 let src_path = path.join("src");
909 self.load_language_at_path(CompileConfig::new(
910 &src_path,
911 externals.as_deref(),
912 None,
913 ))
914 })
915 .cloned()
916 }
917
918 pub fn compile_parser_at_path(
919 &self,
920 grammar_path: &Path,
921 output_path: PathBuf,
922 flags: &[&str],
923 ) -> LoaderResult<()> {
924 let src_path = grammar_path.join("src");
925 let mut config = CompileConfig::new(&src_path, None, Some(output_path));
926 config.flags = flags;
927 self.load_language_at_path(config).map(|_| ())
928 }
929
930 pub fn load_language_at_path(&self, mut config: CompileConfig) -> LoaderResult<Language> {
931 let grammar_path = config.src_path.join("grammar.json");
932 config.name = Self::grammar_json_name(&grammar_path)?;
933 self.load_language_at_path_with_name(config)
934 }
935
936 pub fn load_language_at_path_with_name(
937 &self,
938 mut config: CompileConfig,
939 ) -> LoaderResult<Language> {
940 let mut lib_name = config.name.clone();
941 let language_fn_name = format!("tree_sitter_{}", config.name.replace('-', "_"));
942 if self.debug_build {
943 lib_name.push_str(".debug._");
944 }
945
946 if self.sanitize_build {
947 lib_name.push_str(".sanitize._");
948 config.sanitize = true;
949 }
950
951 if config.output_path.is_none() {
952 fs::create_dir_all(&self.parser_lib_path).map_err(|e| {
953 LoaderError::IO(IoError::new(e, Some(self.parser_lib_path.as_path())))
954 })?;
955 }
956
957 let mut recompile = self.force_rebuild || config.output_path.is_some(); let output_path = config.output_path.unwrap_or_else(|| {
960 let mut path = self.parser_lib_path.join(lib_name);
961 path.set_extension(env::consts::DLL_EXTENSION);
962 #[cfg(feature = "wasm")]
963 if self.wasm_store.lock().unwrap().is_some() {
964 path.set_extension("wasm");
965 }
966 path
967 });
968 config.output_path = Some(output_path.clone());
969
970 let parser_path = config.src_path.join("parser.c");
971 config.scanner_path = self.get_scanner_path(config.src_path);
972
973 let mut paths_to_check = vec![parser_path];
974
975 if let Some(scanner_path) = config.scanner_path.as_ref() {
976 paths_to_check.push(scanner_path.clone());
977 }
978
979 paths_to_check.extend(
980 config
981 .external_files
982 .unwrap_or_default()
983 .iter()
984 .map(|p| config.src_path.join(p)),
985 );
986
987 if !recompile {
988 recompile = needs_recompile(&output_path, &paths_to_check)?;
989 }
990
991 #[cfg(feature = "wasm")]
992 if let Some(wasm_store) = self.wasm_store.lock().unwrap().as_mut() {
993 if recompile {
994 self.compile_parser_to_wasm(
995 &config.name,
996 config.src_path,
997 config
998 .scanner_path
999 .as_ref()
1000 .and_then(|p| p.strip_prefix(config.src_path).ok()),
1001 &output_path,
1002 )?;
1003 }
1004
1005 let wasm_bytes = fs::read(&output_path)
1006 .map_err(|e| LoaderError::IO(IoError::new(e, Some(output_path.as_path()))))?;
1007 return Ok(wasm_store.load_language(&config.name, &wasm_bytes)?);
1008 }
1009
1010 let lock_hash = {
1014 let mut hasher = std::hash::DefaultHasher::new();
1015 output_path.hash(&mut hasher);
1016 format!("{:x}", hasher.finish())
1017 };
1018
1019 let lock_path = if env::var("CROSS_RUNNER").is_ok() {
1020 tempfile::tempdir()
1021 .expect("create a temp dir")
1022 .path()
1023 .to_path_buf()
1024 } else {
1025 etcetera::choose_base_strategy()?.cache_dir()
1026 }
1027 .join("tree-sitter")
1028 .join("lock")
1029 .join(format!("{}-{lock_hash}.lock", config.name));
1030
1031 if let Ok(lock_file) = fs::OpenOptions::new().write(true).open(&lock_path) {
1032 recompile = false;
1033 if lock_file.try_lock_exclusive().is_err() {
1034 lock_file
1037 .lock_exclusive()
1038 .map_err(|e| LoaderError::IO(IoError::new(e, Some(lock_path.as_path()))))?;
1039 recompile = false;
1040 } else {
1041 let time = lock_file
1045 .metadata()
1046 .map_err(|e| LoaderError::IO(IoError::new(e, Some(lock_path.as_path()))))?
1047 .modified()
1048 .map_err(|e| LoaderError::IO(IoError::new(e, Some(lock_path.as_path()))))?
1049 .elapsed()?
1050 .as_secs();
1051 if time > 30 {
1052 fs::remove_file(&lock_path)
1053 .map_err(|e| LoaderError::IO(IoError::new(e, Some(lock_path.as_path()))))?;
1054 recompile = true;
1055 }
1056 }
1057 }
1058
1059 if recompile {
1060 let parent_path = lock_path.parent().unwrap();
1061 fs::create_dir_all(parent_path)
1062 .map_err(|e| LoaderError::IO(IoError::new(e, Some(parent_path))))?;
1063 let lock_file = fs::OpenOptions::new()
1064 .create(true)
1065 .truncate(true)
1066 .write(true)
1067 .open(&lock_path)
1068 .map_err(|e| LoaderError::IO(IoError::new(e, Some(lock_path.as_path()))))?;
1069 lock_file
1070 .lock_exclusive()
1071 .map_err(|e| LoaderError::IO(IoError::new(e, Some(lock_path.as_path()))))?;
1072
1073 self.compile_parser_to_dylib(&config, &lock_file, &lock_path)?;
1074
1075 if config.scanner_path.is_some() {
1076 self.check_external_scanner(&output_path)?;
1077 }
1078 }
1079
1080 if !output_path.exists() {
1088 let msg = format!(
1089 "Dynamic library `{}` not found after build attempt. \
1090 Are you running multiple processes building to the same output location?",
1091 output_path.display()
1092 );
1093
1094 Err(LoaderError::IO(IoError::new(
1095 std::io::Error::new(std::io::ErrorKind::NotFound, msg),
1096 Some(output_path.as_path()),
1097 )))?;
1098 }
1099
1100 Self::load_language(&output_path, &language_fn_name)
1101 }
1102
1103 pub fn load_language(path: &Path, function_name: &str) -> LoaderResult<Language> {
1104 let library = unsafe { Library::new(path) }.map_err(|e| {
1105 LoaderError::Library(LibraryError {
1106 error: e,
1107 path: path.to_string_lossy().to_string(),
1108 })
1109 })?;
1110 let language = unsafe {
1111 let language_fn = library
1112 .get::<Symbol<unsafe extern "C" fn() -> Language>>(function_name.as_bytes())
1113 .map_err(|e| {
1114 LoaderError::Symbol(SymbolError {
1115 error: e,
1116 symbol_name: function_name.to_string(),
1117 path: path.to_string_lossy().to_string(),
1118 })
1119 })?;
1120 language_fn()
1121 };
1122 mem::forget(library);
1123 Ok(language)
1124 }
1125
1126 fn compile_parser_to_dylib(
1127 &self,
1128 config: &CompileConfig,
1129 lock_file: &fs::File,
1130 lock_path: &Path,
1131 ) -> LoaderResult<()> {
1132 let mut cc_config = cc::Build::new();
1133 cc_config
1134 .cargo_metadata(false)
1135 .cargo_warnings(false)
1136 .target(BUILD_TARGET)
1137 .host(BUILD_TARGET)
1141 .debug(self.debug_build)
1142 .file(&config.parser_path)
1143 .includes(&config.header_paths)
1144 .std("c11");
1145
1146 if let Some(scanner_path) = config.scanner_path.as_ref() {
1147 cc_config.file(scanner_path);
1148 }
1149
1150 if self.debug_build {
1151 cc_config.opt_level(0).extra_warnings(true);
1152 } else {
1153 cc_config.opt_level(2).extra_warnings(false);
1154 }
1155
1156 for flag in config.flags {
1157 cc_config.define(flag, None);
1158 }
1159
1160 let compiler = cc_config.get_compiler();
1161 let mut command = Command::new(compiler.path());
1162 command.args(compiler.args());
1163 for (key, value) in compiler.env() {
1164 command.env(key, value);
1165 }
1166
1167 let output_path = config.output_path.as_ref().unwrap();
1168
1169 let temp_dir = if compiler.is_like_msvc() {
1170 let out = format!("-out:{}", output_path.to_str().unwrap());
1171 command.arg(if self.debug_build { "-LDd" } else { "-LD" });
1172 command.arg("-utf-8");
1173
1174 let temp_dir = output_path.parent().unwrap().join(format!(
1178 "tmp_{}_{:?}",
1179 std::process::id(),
1180 std::thread::current().id()
1181 ));
1182 std::fs::create_dir_all(&temp_dir).unwrap();
1183
1184 command.arg(format!("/Fo{}\\", temp_dir.display()));
1185 command.args(cc_config.get_files());
1186 command.arg("-link").arg(out);
1187 command.arg(format!("/IMPLIB:{}.lib", temp_dir.join("temp").display()));
1188
1189 Some(temp_dir)
1190 } else {
1191 command.arg("-Werror=implicit-function-declaration");
1192 if cfg!(any(target_os = "macos", target_os = "ios")) {
1193 command.arg("-dynamiclib");
1194 command.arg("-UTREE_SITTER_REUSE_ALLOCATOR");
1196 } else {
1197 command.arg("-shared");
1198 command.arg("-Wl,--no-undefined");
1199 #[cfg(target_os = "openbsd")]
1200 command.arg("-lc");
1201 }
1202 command.args(cc_config.get_files());
1203 command.arg("-o").arg(output_path);
1204
1205 None
1206 };
1207
1208 let output = command.output().map_err(|e| {
1209 LoaderError::Compiler(CompilerError {
1210 error: e,
1211 command: Box::new(command),
1212 })
1213 })?;
1214
1215 if let Some(temp_dir) = temp_dir {
1216 let _ = fs::remove_dir_all(temp_dir);
1217 }
1218
1219 FileExt::unlock(lock_file)
1220 .map_err(|e| LoaderError::IO(IoError::new(e, Some(lock_path))))?;
1221 fs::remove_file(lock_path)
1222 .map_err(|e| LoaderError::IO(IoError::new(e, Some(lock_path))))?;
1223
1224 if output.status.success() {
1225 Ok(())
1226 } else {
1227 Err(LoaderError::Compilation(
1228 String::from_utf8_lossy(&output.stdout).to_string(),
1229 String::from_utf8_lossy(&output.stderr).to_string(),
1230 ))
1231 }
1232 }
1233
1234 #[cfg(unix)]
1235 fn check_external_scanner(&self, library_path: &Path) -> LoaderResult<()> {
1236 let section = " T ";
1237 let old_ppc_section = if cfg!(all(target_arch = "powerpc64", target_os = "linux")) {
1240 Some(" D ")
1241 } else {
1242 None
1243 };
1244 let nm_cmd = env::var("NM").unwrap_or_else(|_| "nm".to_owned());
1245 let command = Command::new(nm_cmd)
1246 .arg("--defined-only")
1247 .arg(library_path)
1248 .output();
1249 if let Ok(output) = command {
1250 if output.status.success() {
1251 let mut non_static_symbols = String::new();
1252 for line in String::from_utf8_lossy(&output.stdout).lines() {
1253 if line.contains(section) || old_ppc_section.is_some_and(|s| line.contains(s)) {
1254 if let Some(function_name) =
1255 line.split_whitespace().collect::<Vec<_>>().get(2)
1256 {
1257 if !line.contains("tree_sitter_") {
1258 writeln!(&mut non_static_symbols, " `{function_name}`").unwrap();
1259 }
1260 }
1261 }
1262 }
1263 if !non_static_symbols.is_empty() {
1264 warn!(
1265 "Found non-static non-tree-sitter functions in the external scanner\n{non_static_symbols}\n{}",
1266 concat!(
1267 "Consider making these functions static, they can cause conflicts ",
1268 "when another tree-sitter project uses the same function name."
1269 )
1270 );
1271 }
1272 }
1273 } else {
1274 warn!(
1275 "Failed to run `nm` to verify symbols in {}",
1276 library_path.display()
1277 );
1278 }
1279
1280 Ok(())
1281 }
1282
1283 #[cfg(windows)]
1284 fn check_external_scanner(&self, _library_path: &Path) -> LoaderResult<()> {
1285 Ok(())
1287 }
1288
1289 pub fn compile_parser_to_wasm(
1290 &self,
1291 language_name: &str,
1292 src_path: &Path,
1293 scanner_filename: Option<&Path>,
1294 output_path: &Path,
1295 ) -> LoaderResult<()> {
1296 let clang_executable = self.ensure_wasi_sdk_exists()?;
1297
1298 let mut command = Command::new(&clang_executable);
1299 command.current_dir(src_path).args([
1300 "--target=wasm32-unknown-wasi",
1301 "-o",
1302 output_path.to_str().unwrap(),
1303 "-fPIC",
1304 "-shared",
1305 if self.debug_build { "-g" } else { "-Os" },
1306 format!("-Wl,--export=tree_sitter_{language_name}").as_str(),
1307 "-Wl,--allow-undefined",
1308 "-Wl,--no-entry",
1309 "-nostdlib",
1310 "-fno-exceptions",
1311 "-fvisibility=hidden",
1312 "-I",
1313 ".",
1314 "parser.c",
1315 ]);
1316
1317 if let Some(scanner_filename) = scanner_filename {
1318 command.arg(scanner_filename);
1319 }
1320
1321 let output = command.output().map_err(LoaderError::WasmCompiler)?;
1322
1323 if !output.status.success() {
1324 return Err(LoaderError::WasmCompilation(
1325 String::from_utf8_lossy(&output.stderr).to_string(),
1326 ));
1327 }
1328
1329 Ok(())
1330 }
1331
1332 fn extract_tar_gz_with_strip(
1334 &self,
1335 archive_path: &Path,
1336 destination: &Path,
1337 ) -> LoaderResult<()> {
1338 let status = Command::new("tar")
1339 .arg("-xzf")
1340 .arg(archive_path)
1341 .arg("--strip-components=1")
1342 .arg("-C")
1343 .arg(destination)
1344 .status()
1345 .map_err(|e| LoaderError::Tar(archive_path.to_string_lossy().to_string(), e))?;
1346
1347 if !status.success() {
1348 return Err(LoaderError::Extraction(
1349 archive_path.to_string_lossy().to_string(),
1350 destination.to_string_lossy().to_string(),
1351 ));
1352 }
1353
1354 Ok(())
1355 }
1356
1357 fn ensure_wasi_sdk_exists(&self) -> LoaderResult<PathBuf> {
1362 let possible_executables = if cfg!(windows) {
1363 vec![
1364 "clang.exe",
1365 "wasm32-unknown-wasi-clang.exe",
1366 "wasm32-wasi-clang.exe",
1367 ]
1368 } else {
1369 vec!["clang", "wasm32-unknown-wasi-clang", "wasm32-wasi-clang"]
1370 };
1371
1372 if let Ok(wasi_sdk_path) = std::env::var("TREE_SITTER_WASI_SDK_PATH") {
1373 let wasi_sdk_dir = PathBuf::from(wasi_sdk_path);
1374
1375 for exe in &possible_executables {
1376 let clang_exe = wasi_sdk_dir.join("bin").join(exe);
1377 if clang_exe.exists() {
1378 return Ok(clang_exe);
1379 }
1380 }
1381
1382 return Err(LoaderError::WasiSDKClang(WasiSDKClangError {
1383 wasi_sdk_dir: wasi_sdk_dir.to_string_lossy().to_string(),
1384 possible_executables,
1385 download: false,
1386 }));
1387 }
1388
1389 let cache_dir = etcetera::choose_base_strategy()?
1390 .cache_dir()
1391 .join("tree-sitter");
1392 fs::create_dir_all(&cache_dir)
1393 .map_err(|e| LoaderError::IO(IoError::new(e, Some(cache_dir.as_path()))))?;
1394
1395 let wasi_sdk_dir = cache_dir.join("wasi-sdk");
1396
1397 for exe in &possible_executables {
1398 let clang_exe = wasi_sdk_dir.join("bin").join(exe);
1399 if clang_exe.exists() {
1400 return Ok(clang_exe);
1401 }
1402 }
1403
1404 fs::create_dir_all(&wasi_sdk_dir)
1405 .map_err(|e| LoaderError::IO(IoError::new(e, Some(wasi_sdk_dir.as_path()))))?;
1406
1407 let arch_os = if cfg!(target_os = "macos") {
1408 if cfg!(target_arch = "aarch64") {
1409 "arm64-macos"
1410 } else {
1411 "x86_64-macos"
1412 }
1413 } else if cfg!(target_os = "windows") {
1414 if cfg!(target_arch = "aarch64") {
1415 "arm64-windows"
1416 } else {
1417 "x86_64-windows"
1418 }
1419 } else if cfg!(target_os = "linux") {
1420 if cfg!(target_arch = "aarch64") {
1421 "arm64-linux"
1422 } else {
1423 "x86_64-linux"
1424 }
1425 } else {
1426 return Err(LoaderError::WasiSDKPlatform);
1427 };
1428
1429 let sdk_filename = format!("wasi-sdk-{WASI_SDK_VERSION}-{arch_os}.tar.gz");
1430 let wasi_sdk_major_version = WASI_SDK_VERSION
1431 .trim_end_matches(char::is_numeric) .trim_end_matches('.'); let sdk_url = format!(
1434 "https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-{wasi_sdk_major_version}/{sdk_filename}",
1435 );
1436
1437 info!("Downloading wasi-sdk from {sdk_url}...");
1438 let temp_tar_path = cache_dir.join(sdk_filename);
1439
1440 let status = Command::new("curl")
1441 .arg("-f")
1442 .arg("-L")
1443 .arg("-o")
1444 .arg(&temp_tar_path)
1445 .arg(&sdk_url)
1446 .status()
1447 .map_err(|e| LoaderError::Curl(sdk_url.clone(), e))?;
1448
1449 if !status.success() {
1450 return Err(LoaderError::WasiSDKDownload(sdk_url));
1451 }
1452
1453 info!("Extracting wasi-sdk to {}...", wasi_sdk_dir.display());
1454 self.extract_tar_gz_with_strip(&temp_tar_path, &wasi_sdk_dir)?;
1455
1456 fs::remove_file(temp_tar_path).ok();
1457 for exe in &possible_executables {
1458 let clang_exe = wasi_sdk_dir.join("bin").join(exe);
1459 if clang_exe.exists() {
1460 return Ok(clang_exe);
1461 }
1462 }
1463
1464 Err(LoaderError::WasiSDKClang(WasiSDKClangError {
1465 wasi_sdk_dir: wasi_sdk_dir.to_string_lossy().to_string(),
1466 possible_executables,
1467 download: true,
1468 }))
1469 }
1470
1471 #[must_use]
1472 #[cfg(feature = "tree-sitter-highlight")]
1473 pub fn highlight_config_for_injection_string<'a>(
1474 &'a self,
1475 string: &str,
1476 ) -> Option<&'a HighlightConfiguration> {
1477 match self.language_configuration_for_injection_string(string) {
1478 Err(e) => {
1479 error!("Failed to load language for injection string '{string}': {e}");
1480 None
1481 }
1482 Ok(None) => None,
1483 Ok(Some((language, configuration))) => {
1484 match configuration.highlight_config(language, None) {
1485 Err(e) => {
1486 error!(
1487 "Failed to load higlight config for injection string '{string}': {e}"
1488 );
1489 None
1490 }
1491 Ok(None) => None,
1492 Ok(Some(config)) => Some(config),
1493 }
1494 }
1495 }
1496 }
1497
1498 #[must_use]
1499 pub fn get_language_configuration_in_current_path(&self) -> Option<&LanguageConfiguration> {
1500 self.language_configuration_in_current_path
1501 .map(|i| &self.language_configurations[i])
1502 }
1503
1504 pub fn find_language_configurations_at_path(
1505 &mut self,
1506 parser_path: &Path,
1507 set_current_path_config: bool,
1508 ) -> LoaderResult<&[LanguageConfiguration]> {
1509 let initial_language_configuration_count = self.language_configurations.len();
1510
1511 match TreeSitterJSON::from_file(parser_path) {
1512 Ok(config) => {
1513 let language_count = self.languages_by_id.len();
1514 for grammar in config.grammars {
1515 let language_path =
1519 parser_path.join(grammar.path.unwrap_or(PathBuf::from(".")));
1520
1521 let mut language_id = None;
1524 for (id, (path, _, _)) in
1525 self.languages_by_id.iter().enumerate().skip(language_count)
1526 {
1527 if language_path == *path {
1528 language_id = Some(id);
1529 }
1530 }
1531
1532 let language_id = if let Some(language_id) = language_id {
1534 language_id
1535 } else {
1536 self.languages_by_id.push((
1537 language_path,
1538 OnceCell::new(),
1539 grammar
1540 .external_files
1541 .clone()
1542 .into_vec()
1543 .map(|files| {
1544 files
1545 .into_iter()
1546 .map(|path| {
1547 let path = parser_path.join(path);
1548 if path.starts_with(parser_path) {
1550 Ok(path)
1551 } else {
1552 Err(LoaderError::ExternalFile(
1553 path.to_string_lossy().to_string(),
1554 parser_path.to_string_lossy().to_string(),
1555 ))
1556 }
1557 })
1558 .collect::<LoaderResult<Vec<_>>>()
1559 })
1560 .transpose()?,
1561 ));
1562 self.languages_by_id.len() - 1
1563 };
1564
1565 let configuration = LanguageConfiguration {
1566 root_path: parser_path.to_path_buf(),
1567 language_name: grammar.name,
1568 scope: Some(grammar.scope),
1569 language_id,
1570 file_types: grammar.file_types.unwrap_or_default(),
1571 content_regex: Self::regex(grammar.content_regex.as_deref()),
1572 first_line_regex: Self::regex(grammar.first_line_regex.as_deref()),
1573 injection_regex: Self::regex(grammar.injection_regex.as_deref()),
1574 injections_filenames: grammar.injections.into_vec(),
1575 locals_filenames: grammar.locals.into_vec(),
1576 tags_filenames: grammar.tags.into_vec(),
1577 highlights_filenames: grammar.highlights.into_vec(),
1578 #[cfg(feature = "tree-sitter-highlight")]
1579 highlight_config: OnceCell::new(),
1580 #[cfg(feature = "tree-sitter-tags")]
1581 tags_config: OnceCell::new(),
1582 #[cfg(feature = "tree-sitter-highlight")]
1583 highlight_names: &self.highlight_names,
1584 #[cfg(feature = "tree-sitter-highlight")]
1585 use_all_highlight_names: self.use_all_highlight_names,
1586 _phantom: PhantomData,
1587 };
1588
1589 for file_type in &configuration.file_types {
1590 self.language_configuration_ids_by_file_type
1591 .entry(file_type.clone())
1592 .or_default()
1593 .push(self.language_configurations.len());
1594 }
1595 if let Some(first_line_regex) = &configuration.first_line_regex {
1596 self.language_configuration_ids_by_first_line_regex
1597 .entry(first_line_regex.to_string())
1598 .or_default()
1599 .push(self.language_configurations.len());
1600 }
1601
1602 self.language_configurations.push(unsafe {
1603 mem::transmute::<LanguageConfiguration<'_>, LanguageConfiguration<'static>>(
1604 configuration,
1605 )
1606 });
1607
1608 if set_current_path_config
1609 && self.language_configuration_in_current_path.is_none()
1610 {
1611 self.language_configuration_in_current_path =
1612 Some(self.language_configurations.len() - 1);
1613 }
1614 }
1615 }
1616 Err(LoaderError::Serialization(e)) => {
1617 warn!(
1618 "Failed to parse {} -- {e}",
1619 parser_path.join("tree-sitter.json").display()
1620 );
1621 }
1622 _ => {}
1623 }
1624
1625 if self.language_configurations.len() == initial_language_configuration_count
1629 && parser_path.join("src").join("grammar.json").exists()
1630 {
1631 let grammar_path = parser_path.join("src").join("grammar.json");
1632 let language_name = Self::grammar_json_name(&grammar_path)?;
1633 let configuration = LanguageConfiguration {
1634 root_path: parser_path.to_owned(),
1635 language_name,
1636 language_id: self.languages_by_id.len(),
1637 file_types: Vec::new(),
1638 scope: None,
1639 content_regex: None,
1640 first_line_regex: None,
1641 injection_regex: None,
1642 injections_filenames: None,
1643 locals_filenames: None,
1644 highlights_filenames: None,
1645 tags_filenames: None,
1646 #[cfg(feature = "tree-sitter-highlight")]
1647 highlight_config: OnceCell::new(),
1648 #[cfg(feature = "tree-sitter-tags")]
1649 tags_config: OnceCell::new(),
1650 #[cfg(feature = "tree-sitter-highlight")]
1651 highlight_names: &self.highlight_names,
1652 #[cfg(feature = "tree-sitter-highlight")]
1653 use_all_highlight_names: self.use_all_highlight_names,
1654 _phantom: PhantomData,
1655 };
1656 self.language_configurations.push(unsafe {
1657 mem::transmute::<LanguageConfiguration<'_>, LanguageConfiguration<'static>>(
1658 configuration,
1659 )
1660 });
1661 self.languages_by_id
1662 .push((parser_path.to_owned(), OnceCell::new(), None));
1663 }
1664
1665 Ok(&self.language_configurations[initial_language_configuration_count..])
1666 }
1667
1668 fn regex(pattern: Option<&str>) -> Option<Regex> {
1669 pattern.and_then(|r| RegexBuilder::new(r).multi_line(true).build().ok())
1670 }
1671
1672 fn grammar_json_name(grammar_path: &Path) -> LoaderResult<String> {
1673 let file = fs::File::open(grammar_path)
1674 .map_err(|e| LoaderError::IO(IoError::new(e, Some(grammar_path))))?;
1675
1676 let first_three_lines = BufReader::new(file)
1677 .lines()
1678 .take(3)
1679 .collect::<Result<Vec<_>, std::io::Error>>()
1680 .map_err(|_| LoaderError::GrammarJSON(grammar_path.to_string_lossy().to_string()))?
1681 .join("\n");
1682
1683 let name = GRAMMAR_NAME_REGEX
1684 .captures(&first_three_lines)
1685 .and_then(|c| c.get(1))
1686 .ok_or_else(|| LoaderError::GrammarJSON(grammar_path.to_string_lossy().to_string()))?;
1687
1688 Ok(name.as_str().to_string())
1689 }
1690
1691 pub fn select_language(
1692 &mut self,
1693 path: Option<&Path>,
1694 current_dir: &Path,
1695 scope: Option<&str>,
1696 lib_info: Option<&(PathBuf, &str)>,
1698 ) -> LoaderResult<Language> {
1699 if let Some((ref lib_path, language_name)) = lib_info {
1700 let language_fn_name = format!("tree_sitter_{}", language_name.replace('-', "_"));
1701 Self::load_language(lib_path, &language_fn_name)
1702 } else if let Some(scope) = scope {
1703 if let Some(config) = self
1704 .language_configuration_for_scope(scope)
1705 .map_err(|e| LoaderError::ScopeLoad(scope.to_string(), Box::new(e)))?
1706 {
1707 Ok(config.0)
1708 } else {
1709 Err(LoaderError::UnknownScope(scope.to_string()))
1710 }
1711 } else if let Some((lang, _)) = if let Some(path) = path {
1712 self.language_configuration_for_file_name(path)
1713 .map_err(|e| {
1714 LoaderError::FileNameLoad(
1715 path.file_name().unwrap().to_string_lossy().to_string(),
1716 Box::new(e),
1717 )
1718 })?
1719 } else {
1720 None
1721 } {
1722 Ok(lang)
1723 } else if let Some(id) = self.language_configuration_in_current_path {
1724 Ok(self.language_for_id(self.language_configurations[id].language_id)?)
1725 } else if let Some(lang) = self
1726 .languages_at_path(current_dir)
1727 .map_err(|e| LoaderError::CurrentDirectoryLoad(Box::new(e)))?
1728 .first()
1729 .cloned()
1730 {
1731 Ok(lang.0)
1732 } else if let Some(lang) = if let Some(path) = path {
1733 self.language_configuration_for_first_line_regex(path)?
1734 } else {
1735 None
1736 } {
1737 Ok(lang.0)
1738 } else {
1739 Err(LoaderError::NoLanguage)
1740 }
1741 }
1742
1743 pub const fn debug_build(&mut self, flag: bool) {
1744 self.debug_build = flag;
1745 }
1746
1747 pub const fn sanitize_build(&mut self, flag: bool) {
1748 self.sanitize_build = flag;
1749 }
1750
1751 pub const fn force_rebuild(&mut self, rebuild: bool) {
1752 self.force_rebuild = rebuild;
1753 }
1754
1755 #[cfg(feature = "wasm")]
1756 #[cfg_attr(docsrs, doc(cfg(feature = "wasm")))]
1757 pub fn use_wasm(&mut self, engine: &tree_sitter::wasmtime::Engine) {
1758 *self.wasm_store.lock().unwrap() = Some(tree_sitter::WasmStore::new(engine).unwrap());
1759 }
1760
1761 #[must_use]
1762 pub fn get_scanner_path(&self, src_path: &Path) -> Option<PathBuf> {
1763 let path = src_path.join("scanner.c");
1764 path.exists().then_some(path)
1765 }
1766}
1767
1768impl LanguageConfiguration<'_> {
1769 #[cfg(feature = "tree-sitter-highlight")]
1770 pub fn highlight_config(
1771 &self,
1772 language: Language,
1773 paths: Option<&[PathBuf]>,
1774 ) -> LoaderResult<Option<&HighlightConfiguration>> {
1775 let (highlights_filenames, injections_filenames, locals_filenames) = match paths {
1776 Some(paths) => (
1777 Some(
1778 paths
1779 .iter()
1780 .filter(|p| p.ends_with(DEFAULT_HIGHLIGHTS_QUERY_FILE_NAME))
1781 .cloned()
1782 .collect::<Vec<_>>(),
1783 ),
1784 Some(
1785 paths
1786 .iter()
1787 .filter(|p| p.ends_with(DEFAULT_TAGS_QUERY_FILE_NAME))
1788 .cloned()
1789 .collect::<Vec<_>>(),
1790 ),
1791 Some(
1792 paths
1793 .iter()
1794 .filter(|p| p.ends_with(DEFAULT_LOCALS_QUERY_FILE_NAME))
1795 .cloned()
1796 .collect::<Vec<_>>(),
1797 ),
1798 ),
1799 None => (None, None, None),
1800 };
1801 self.highlight_config
1802 .get_or_try_init(|| {
1803 let (highlights_query, highlight_ranges) = self.read_queries(
1804 if highlights_filenames.is_some() {
1805 highlights_filenames.as_deref()
1806 } else {
1807 self.highlights_filenames.as_deref()
1808 },
1809 DEFAULT_HIGHLIGHTS_QUERY_FILE_NAME,
1810 )?;
1811 let (injections_query, injection_ranges) = self.read_queries(
1812 if injections_filenames.is_some() {
1813 injections_filenames.as_deref()
1814 } else {
1815 self.injections_filenames.as_deref()
1816 },
1817 DEFAULT_INJECTIONS_QUERY_FILE_NAME,
1818 )?;
1819 let (locals_query, locals_ranges) = self.read_queries(
1820 if locals_filenames.is_some() {
1821 locals_filenames.as_deref()
1822 } else {
1823 self.locals_filenames.as_deref()
1824 },
1825 DEFAULT_LOCALS_QUERY_FILE_NAME,
1826 )?;
1827
1828 if highlights_query.is_empty() {
1829 Ok(None)
1830 } else {
1831 let mut result = HighlightConfiguration::new(
1832 language,
1833 &self.language_name,
1834 &highlights_query,
1835 &injections_query,
1836 &locals_query,
1837 )
1838 .map_err(|error| match error.kind {
1839 QueryErrorKind::Language => {
1840 LoaderError::Query(LoaderQueryError { error, file: None })
1841 }
1842 _ => {
1843 if error.offset < injections_query.len() {
1844 Self::include_path_in_query_error(
1845 error,
1846 &injection_ranges,
1847 &injections_query,
1848 0,
1849 )
1850 } else if error.offset < injections_query.len() + locals_query.len() {
1851 Self::include_path_in_query_error(
1852 error,
1853 &locals_ranges,
1854 &locals_query,
1855 injections_query.len(),
1856 )
1857 } else {
1858 Self::include_path_in_query_error(
1859 error,
1860 &highlight_ranges,
1861 &highlights_query,
1862 injections_query.len() + locals_query.len(),
1863 )
1864 }
1865 }
1866 })?;
1867 let mut all_highlight_names = self.highlight_names.lock().unwrap();
1868 if self.use_all_highlight_names {
1869 for capture_name in result.query.capture_names() {
1870 if !all_highlight_names.iter().any(|x| x == capture_name) {
1871 all_highlight_names.push((*capture_name).to_string());
1872 }
1873 }
1874 }
1875 result.configure(all_highlight_names.as_slice());
1876 drop(all_highlight_names);
1877 Ok(Some(result))
1878 }
1879 })
1880 .map(Option::as_ref)
1881 }
1882
1883 #[cfg(feature = "tree-sitter-tags")]
1884 pub fn tags_config(&self, language: Language) -> LoaderResult<Option<&TagsConfiguration>> {
1885 self.tags_config
1886 .get_or_try_init(|| {
1887 let (tags_query, tags_ranges) = self
1888 .read_queries(self.tags_filenames.as_deref(), DEFAULT_TAGS_QUERY_FILE_NAME)?;
1889 let (locals_query, locals_ranges) = self.read_queries(
1890 self.locals_filenames.as_deref(),
1891 DEFAULT_LOCALS_QUERY_FILE_NAME,
1892 )?;
1893 if tags_query.is_empty() {
1894 Ok(None)
1895 } else {
1896 TagsConfiguration::new(language, &tags_query, &locals_query)
1897 .map(Some)
1898 .map_err(|error| {
1899 if let TagsError::Query(error) = error {
1900 if error.offset < locals_query.len() {
1901 Self::include_path_in_query_error(
1902 error,
1903 &locals_ranges,
1904 &locals_query,
1905 0,
1906 )
1907 } else {
1908 Self::include_path_in_query_error(
1909 error,
1910 &tags_ranges,
1911 &tags_query,
1912 locals_query.len(),
1913 )
1914 }
1915 } else {
1916 error.into()
1917 }
1918 })
1919 }
1920 })
1921 .map(Option::as_ref)
1922 }
1923
1924 #[cfg(any(feature = "tree-sitter-highlight", feature = "tree-sitter-tags"))]
1925 fn include_path_in_query_error(
1926 mut error: QueryError,
1927 ranges: &[(PathBuf, Range<usize>)],
1928 source: &str,
1929 start_offset: usize,
1930 ) -> LoaderError {
1931 let offset_within_section = error.offset - start_offset;
1932 let (path, range) = ranges
1933 .iter()
1934 .find(|(_, range)| range.contains(&offset_within_section))
1935 .unwrap_or_else(|| ranges.last().unwrap());
1936 error.offset = offset_within_section - range.start;
1937 error.row = source[range.start..offset_within_section]
1938 .matches('\n')
1939 .count();
1940 LoaderError::Query(LoaderQueryError {
1941 error,
1942 file: Some(path.to_string_lossy().to_string()),
1943 })
1944 }
1945
1946 #[allow(clippy::type_complexity)]
1947 #[cfg(any(feature = "tree-sitter-highlight", feature = "tree-sitter-tags"))]
1948 fn read_queries(
1949 &self,
1950 paths: Option<&[PathBuf]>,
1951 default_path: &str,
1952 ) -> LoaderResult<(String, Vec<(PathBuf, Range<usize>)>)> {
1953 let mut query = String::new();
1954 let mut path_ranges = Vec::new();
1955 if let Some(paths) = paths {
1956 for path in paths {
1957 let abs_path = self.root_path.join(path);
1958 let prev_query_len = query.len();
1959 query += &fs::read_to_string(&abs_path)
1960 .map_err(|e| LoaderError::IO(IoError::new(e, Some(abs_path.as_path()))))?;
1961 path_ranges.push((path.clone(), prev_query_len..query.len()));
1962 }
1963 } else {
1964 if default_path == DEFAULT_HIGHLIGHTS_QUERY_FILE_NAME
1966 || default_path == DEFAULT_TAGS_QUERY_FILE_NAME
1967 {
1968 warn!(
1969 concat!(
1970 "You should add a `{}` entry pointing to the {} path in the `tree-sitter` ",
1971 "object in the grammar's tree-sitter.json file. See more here: ",
1972 "https://tree-sitter.github.io/tree-sitter/3-syntax-highlighting#query-paths"
1973 ),
1974 default_path.replace(".scm", ""),
1975 default_path
1976 );
1977 }
1978 let queries_path = self.root_path.join("queries");
1979 let path = queries_path.join(default_path);
1980 if path.exists() {
1981 query = fs::read_to_string(&path)
1982 .map_err(|e| LoaderError::IO(IoError::new(e, Some(path.as_path()))))?;
1983 path_ranges.push((PathBuf::from(default_path), 0..query.len()));
1984 }
1985 }
1986
1987 Ok((query, path_ranges))
1988 }
1989}
1990
1991fn needs_recompile(lib_path: &Path, paths_to_check: &[PathBuf]) -> LoaderResult<bool> {
1992 if !lib_path.exists() {
1993 return Ok(true);
1994 }
1995 let lib_mtime = mtime(lib_path).map_err(|e| LoaderError::ModifiedTime(Box::new(e)))?;
1996 for path in paths_to_check {
1997 if mtime(path).map_err(|e| LoaderError::ModifiedTime(Box::new(e)))? > lib_mtime {
1998 return Ok(true);
1999 }
2000 }
2001 Ok(false)
2002}
2003
2004fn mtime(path: &Path) -> LoaderResult<SystemTime> {
2005 fs::metadata(path)
2006 .map_err(|e| LoaderError::IO(IoError::new(e, Some(path))))?
2007 .modified()
2008 .map_err(|e| LoaderError::IO(IoError::new(e, Some(path))))
2009}