1#![doc = include_str!("../README.md")]
2#![cfg_attr(docsrs, feature(doc_cfg))]
3
4#[cfg(any(feature = "tree-sitter-highlight", feature = "tree-sitter-tags"))]
5use std::ops::Range;
6#[cfg(feature = "tree-sitter-highlight")]
7use std::sync::Mutex;
8use std::{
9 collections::HashMap,
10 env,
11 ffi::{OsStr, OsString},
12 fs,
13 io::{BufRead, BufReader},
14 marker::PhantomData,
15 mem,
16 path::{Path, PathBuf},
17 process::Command,
18 sync::LazyLock,
19 time::SystemTime,
20};
21
22use anyhow::Error;
23use anyhow::{anyhow, Context, Result};
24use etcetera::BaseStrategy as _;
25use fs4::fs_std::FileExt;
26use indoc::indoc;
27use libloading::{Library, Symbol};
28use once_cell::unsync::OnceCell;
29use path_slash::PathBufExt as _;
30use regex::{Regex, RegexBuilder};
31use semver::Version;
32use serde::{Deserialize, Deserializer, Serialize};
33use tree_sitter::Language;
34#[cfg(any(feature = "tree-sitter-highlight", feature = "tree-sitter-tags"))]
35use tree_sitter::QueryError;
36#[cfg(feature = "tree-sitter-highlight")]
37use tree_sitter::QueryErrorKind;
38#[cfg(feature = "tree-sitter-highlight")]
39use tree_sitter_highlight::HighlightConfiguration;
40#[cfg(feature = "tree-sitter-tags")]
41use tree_sitter_tags::{Error as TagsError, TagsConfiguration};
42use url::Url;
43
44static GRAMMAR_NAME_REGEX: LazyLock<Regex> =
45 LazyLock::new(|| Regex::new(r#""name":\s*"(.*?)""#).unwrap());
46
47pub const EMSCRIPTEN_TAG: &str = concat!("docker.io/emscripten/emsdk:", env!("EMSCRIPTEN_VERSION"));
48
49#[derive(Default, Deserialize, Serialize)]
50pub struct Config {
51 #[serde(default)]
52 #[serde(
53 rename = "parser-directories",
54 deserialize_with = "deserialize_parser_directories"
55 )]
56 pub parser_directories: Vec<PathBuf>,
57}
58
59#[derive(Serialize, Deserialize, Clone, Default)]
60#[serde(untagged)]
61pub enum PathsJSON {
62 #[default]
63 Empty,
64 Single(PathBuf),
65 Multiple(Vec<PathBuf>),
66}
67
68impl PathsJSON {
69 fn into_vec(self) -> Option<Vec<PathBuf>> {
70 match self {
71 Self::Empty => None,
72 Self::Single(s) => Some(vec![s]),
73 Self::Multiple(s) => Some(s),
74 }
75 }
76
77 const fn is_empty(&self) -> bool {
78 matches!(self, Self::Empty)
79 }
80}
81
82#[derive(Serialize, Deserialize, Clone)]
83#[serde(untagged)]
84pub enum PackageJSONAuthor {
85 String(String),
86 Object {
87 name: String,
88 email: Option<String>,
89 url: Option<String>,
90 },
91}
92
93#[derive(Serialize, Deserialize, Clone)]
94#[serde(untagged)]
95pub enum PackageJSONRepository {
96 String(String),
97 Object { url: String },
98}
99
100#[derive(Serialize, Deserialize)]
101pub struct PackageJSON {
102 pub name: String,
103 pub version: Version,
104 pub description: Option<String>,
105 pub author: Option<PackageJSONAuthor>,
106 pub maintainers: Option<Vec<PackageJSONAuthor>>,
107 pub license: Option<String>,
108 pub repository: Option<PackageJSONRepository>,
109 #[serde(default)]
110 #[serde(rename = "tree-sitter", skip_serializing_if = "Option::is_none")]
111 pub tree_sitter: Option<Vec<LanguageConfigurationJSON>>,
112}
113
114fn default_path() -> PathBuf {
115 PathBuf::from(".")
116}
117
118#[derive(Serialize, Deserialize, Clone)]
119#[serde(rename_all = "kebab-case")]
120pub struct LanguageConfigurationJSON {
121 #[serde(default = "default_path")]
122 pub path: PathBuf,
123 pub scope: Option<String>,
124 pub file_types: Option<Vec<String>>,
125 pub content_regex: Option<String>,
126 pub first_line_regex: Option<String>,
127 pub injection_regex: Option<String>,
128 #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
129 pub highlights: PathsJSON,
130 #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
131 pub injections: PathsJSON,
132 #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
133 pub locals: PathsJSON,
134 #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
135 pub tags: PathsJSON,
136 #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
137 pub external_files: PathsJSON,
138}
139
140#[derive(Serialize, Deserialize)]
141#[serde(rename_all = "kebab-case")]
142pub struct TreeSitterJSON {
143 #[serde(rename = "$schema")]
144 pub schema: Option<String>,
145 pub grammars: Vec<Grammar>,
146 pub metadata: Metadata,
147 #[serde(default)]
148 pub bindings: Bindings,
149}
150
151impl TreeSitterJSON {
152 pub fn from_file(path: &Path) -> Result<Self> {
153 Ok(serde_json::from_str(&fs::read_to_string(
154 path.join("tree-sitter.json"),
155 )?)?)
156 }
157
158 #[must_use]
159 pub fn has_multiple_language_configs(&self) -> bool {
160 self.grammars.len() > 1
161 }
162}
163
164#[derive(Serialize, Deserialize)]
165#[serde(rename_all = "kebab-case")]
166pub struct Grammar {
167 pub name: String,
168 #[serde(skip_serializing_if = "Option::is_none")]
169 pub camelcase: Option<String>,
170 #[serde(skip_serializing_if = "Option::is_none")]
171 pub title: Option<String>,
172 pub scope: String,
173 #[serde(skip_serializing_if = "Option::is_none")]
174 pub path: Option<PathBuf>,
175 #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
176 pub external_files: PathsJSON,
177 pub file_types: Option<Vec<String>>,
178 #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
179 pub highlights: PathsJSON,
180 #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
181 pub injections: PathsJSON,
182 #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
183 pub locals: PathsJSON,
184 #[serde(default, skip_serializing_if = "PathsJSON::is_empty")]
185 pub tags: PathsJSON,
186 #[serde(skip_serializing_if = "Option::is_none")]
187 pub injection_regex: Option<String>,
188 #[serde(skip_serializing_if = "Option::is_none")]
189 pub first_line_regex: Option<String>,
190 #[serde(skip_serializing_if = "Option::is_none")]
191 pub content_regex: Option<String>,
192 #[serde(skip_serializing_if = "Option::is_none")]
193 pub class_name: Option<String>,
194}
195
196#[derive(Serialize, Deserialize)]
197pub struct Metadata {
198 pub version: Version,
199 #[serde(skip_serializing_if = "Option::is_none")]
200 pub license: Option<String>,
201 #[serde(skip_serializing_if = "Option::is_none")]
202 pub description: Option<String>,
203 #[serde(skip_serializing_if = "Option::is_none")]
204 pub authors: Option<Vec<Author>>,
205 #[serde(skip_serializing_if = "Option::is_none")]
206 pub links: Option<Links>,
207 #[serde(skip)]
208 pub namespace: Option<String>,
209}
210
211#[derive(Serialize, Deserialize)]
212pub struct Author {
213 pub name: String,
214 #[serde(skip_serializing_if = "Option::is_none")]
215 pub email: Option<String>,
216 #[serde(skip_serializing_if = "Option::is_none")]
217 pub url: Option<String>,
218}
219
220#[derive(Serialize, Deserialize)]
221pub struct Links {
222 pub repository: Url,
223 #[serde(skip_serializing_if = "Option::is_none")]
224 pub funding: Option<Url>,
225 #[serde(skip_serializing_if = "Option::is_none")]
226 pub homepage: Option<String>,
227}
228
229#[derive(Serialize, Deserialize)]
230#[serde(default)]
231pub struct Bindings {
232 pub c: bool,
233 pub go: bool,
234 #[serde(skip)]
235 pub java: bool,
236 #[serde(skip)]
237 pub kotlin: bool,
238 pub node: bool,
239 pub python: bool,
240 pub rust: bool,
241 pub swift: bool,
242 pub zig: bool,
243}
244
245impl Default for Bindings {
246 fn default() -> Self {
247 Self {
248 c: true,
249 go: true,
250 java: false,
251 kotlin: false,
252 node: true,
253 python: true,
254 rust: true,
255 swift: true,
256 zig: false,
257 }
258 }
259}
260
261fn deserialize_parser_directories<'de, D>(deserializer: D) -> Result<Vec<PathBuf>, D::Error>
265where
266 D: Deserializer<'de>,
267{
268 let paths = Vec::<PathBuf>::deserialize(deserializer)?;
269 let Ok(home) = etcetera::home_dir() else {
270 return Ok(paths);
271 };
272 let standardized = paths
273 .into_iter()
274 .map(|path| standardize_path(path, &home))
275 .collect();
276 Ok(standardized)
277}
278
279fn standardize_path(path: PathBuf, home: &Path) -> PathBuf {
280 if let Ok(p) = path.strip_prefix("~") {
281 return home.join(p);
282 }
283 if let Ok(p) = path.strip_prefix("$HOME") {
284 return home.join(p);
285 }
286 path
287}
288
289impl Config {
290 #[must_use]
291 pub fn initial() -> Self {
292 let home_dir = etcetera::home_dir().expect("Cannot determine home directory");
293 Self {
294 parser_directories: vec![
295 home_dir.join("github"),
296 home_dir.join("src"),
297 home_dir.join("source"),
298 home_dir.join("projects"),
299 home_dir.join("dev"),
300 home_dir.join("git"),
301 ],
302 }
303 }
304}
305
306const BUILD_TARGET: &str = env!("BUILD_TARGET");
307const BUILD_HOST: &str = env!("BUILD_HOST");
308
309pub struct LanguageConfiguration<'a> {
310 pub scope: Option<String>,
311 pub content_regex: Option<Regex>,
312 pub first_line_regex: Option<Regex>,
313 pub injection_regex: Option<Regex>,
314 pub file_types: Vec<String>,
315 pub root_path: PathBuf,
316 pub highlights_filenames: Option<Vec<PathBuf>>,
317 pub injections_filenames: Option<Vec<PathBuf>>,
318 pub locals_filenames: Option<Vec<PathBuf>>,
319 pub tags_filenames: Option<Vec<PathBuf>>,
320 pub language_name: String,
321 language_id: usize,
322 #[cfg(feature = "tree-sitter-highlight")]
323 highlight_config: OnceCell<Option<HighlightConfiguration>>,
324 #[cfg(feature = "tree-sitter-tags")]
325 tags_config: OnceCell<Option<TagsConfiguration>>,
326 #[cfg(feature = "tree-sitter-highlight")]
327 highlight_names: &'a Mutex<Vec<String>>,
328 #[cfg(feature = "tree-sitter-highlight")]
329 use_all_highlight_names: bool,
330 _phantom: PhantomData<&'a ()>,
331}
332
333pub struct Loader {
334 pub parser_lib_path: PathBuf,
335 languages_by_id: Vec<(PathBuf, OnceCell<Language>, Option<Vec<PathBuf>>)>,
336 language_configurations: Vec<LanguageConfiguration<'static>>,
337 language_configuration_ids_by_file_type: HashMap<String, Vec<usize>>,
338 language_configuration_in_current_path: Option<usize>,
339 language_configuration_ids_by_first_line_regex: HashMap<String, Vec<usize>>,
340 #[cfg(feature = "tree-sitter-highlight")]
341 highlight_names: Box<Mutex<Vec<String>>>,
342 #[cfg(feature = "tree-sitter-highlight")]
343 use_all_highlight_names: bool,
344 debug_build: bool,
345 sanitize_build: bool,
346 force_rebuild: bool,
347
348 #[cfg(feature = "wasm")]
349 wasm_store: Mutex<Option<tree_sitter::WasmStore>>,
350}
351
352pub struct CompileConfig<'a> {
353 pub src_path: &'a Path,
354 pub header_paths: Vec<&'a Path>,
355 pub parser_path: PathBuf,
356 pub scanner_path: Option<PathBuf>,
357 pub external_files: Option<&'a [PathBuf]>,
358 pub output_path: Option<PathBuf>,
359 pub flags: &'a [&'a str],
360 pub sanitize: bool,
361 pub name: String,
362}
363
364impl<'a> CompileConfig<'a> {
365 #[must_use]
366 pub fn new(
367 src_path: &'a Path,
368 externals: Option<&'a [PathBuf]>,
369 output_path: Option<PathBuf>,
370 ) -> Self {
371 Self {
372 src_path,
373 header_paths: vec![src_path],
374 parser_path: src_path.join("parser.c"),
375 scanner_path: None,
376 external_files: externals,
377 output_path,
378 flags: &[],
379 sanitize: false,
380 name: String::new(),
381 }
382 }
383}
384
385unsafe impl Sync for Loader {}
386
387impl Loader {
388 pub fn new() -> Result<Self> {
389 let parser_lib_path = if let Ok(path) = env::var("TREE_SITTER_LIBDIR") {
390 PathBuf::from(path)
391 } else {
392 if cfg!(target_os = "macos") {
393 let legacy_apple_path = etcetera::base_strategy::Apple::new()?
394 .cache_dir() .join("tree-sitter");
396 if legacy_apple_path.exists() && legacy_apple_path.is_dir() {
397 std::fs::remove_dir_all(legacy_apple_path)?;
398 }
399 }
400
401 etcetera::choose_base_strategy()?
402 .cache_dir()
403 .join("tree-sitter")
404 .join("lib")
405 };
406 Ok(Self::with_parser_lib_path(parser_lib_path))
407 }
408
409 #[must_use]
410 pub fn with_parser_lib_path(parser_lib_path: PathBuf) -> Self {
411 Self {
412 parser_lib_path,
413 languages_by_id: Vec::new(),
414 language_configurations: Vec::new(),
415 language_configuration_ids_by_file_type: HashMap::new(),
416 language_configuration_in_current_path: None,
417 language_configuration_ids_by_first_line_regex: HashMap::new(),
418 #[cfg(feature = "tree-sitter-highlight")]
419 highlight_names: Box::new(Mutex::new(Vec::new())),
420 #[cfg(feature = "tree-sitter-highlight")]
421 use_all_highlight_names: true,
422 debug_build: false,
423 sanitize_build: false,
424 force_rebuild: false,
425
426 #[cfg(feature = "wasm")]
427 wasm_store: Mutex::default(),
428 }
429 }
430
431 #[cfg(feature = "tree-sitter-highlight")]
432 #[cfg_attr(docsrs, doc(cfg(feature = "tree-sitter-highlight")))]
433 pub fn configure_highlights(&mut self, names: &[String]) {
434 self.use_all_highlight_names = false;
435 let mut highlights = self.highlight_names.lock().unwrap();
436 highlights.clear();
437 highlights.extend(names.iter().cloned());
438 }
439
440 #[must_use]
441 #[cfg(feature = "tree-sitter-highlight")]
442 #[cfg_attr(docsrs, doc(cfg(feature = "tree-sitter-highlight")))]
443 pub fn highlight_names(&self) -> Vec<String> {
444 self.highlight_names.lock().unwrap().clone()
445 }
446
447 pub fn find_all_languages(&mut self, config: &Config) -> Result<()> {
448 if config.parser_directories.is_empty() {
449 eprintln!("Warning: You have not configured any parser directories!");
450 eprintln!("Please run `tree-sitter init-config` and edit the resulting");
451 eprintln!("configuration file to indicate where we should look for");
452 eprintln!("language grammars.\n");
453 }
454 for parser_container_dir in &config.parser_directories {
455 if let Ok(entries) = fs::read_dir(parser_container_dir) {
456 for entry in entries {
457 let entry = entry?;
458 if let Some(parser_dir_name) = entry.file_name().to_str() {
459 if parser_dir_name.starts_with("tree-sitter-") {
460 self.find_language_configurations_at_path(
461 &parser_container_dir.join(parser_dir_name),
462 false,
463 )
464 .ok();
465 }
466 }
467 }
468 }
469 }
470 Ok(())
471 }
472
473 pub fn languages_at_path(&mut self, path: &Path) -> Result<Vec<(Language, String)>> {
474 if let Ok(configurations) = self.find_language_configurations_at_path(path, true) {
475 let mut language_ids = configurations
476 .iter()
477 .map(|c| (c.language_id, c.language_name.clone()))
478 .collect::<Vec<_>>();
479 language_ids.sort_unstable();
480 language_ids.dedup();
481 language_ids
482 .into_iter()
483 .map(|(id, name)| Ok((self.language_for_id(id)?, name)))
484 .collect::<Result<Vec<_>>>()
485 } else {
486 Ok(Vec::new())
487 }
488 }
489
490 #[must_use]
491 pub fn get_all_language_configurations(&self) -> Vec<(&LanguageConfiguration, &Path)> {
492 self.language_configurations
493 .iter()
494 .map(|c| (c, self.languages_by_id[c.language_id].0.as_ref()))
495 .collect()
496 }
497
498 pub fn language_configuration_for_scope(
499 &self,
500 scope: &str,
501 ) -> Result<Option<(Language, &LanguageConfiguration)>> {
502 for configuration in &self.language_configurations {
503 if configuration.scope.as_ref().is_some_and(|s| s == scope) {
504 let language = self.language_for_id(configuration.language_id)?;
505 return Ok(Some((language, configuration)));
506 }
507 }
508 Ok(None)
509 }
510
511 pub fn language_configuration_for_first_line_regex(
512 &self,
513 path: &Path,
514 ) -> Result<Option<(Language, &LanguageConfiguration)>> {
515 self.language_configuration_ids_by_first_line_regex
516 .iter()
517 .try_fold(None, |_, (regex, ids)| {
518 if let Some(regex) = Self::regex(Some(regex)) {
519 let file = fs::File::open(path)?;
520 let reader = BufReader::new(file);
521 let first_line = reader.lines().next().transpose()?;
522 if let Some(first_line) = first_line {
523 if regex.is_match(&first_line) && !ids.is_empty() {
524 let configuration = &self.language_configurations[ids[0]];
525 let language = self.language_for_id(configuration.language_id)?;
526 return Ok(Some((language, configuration)));
527 }
528 }
529 }
530
531 Ok(None)
532 })
533 }
534
535 pub fn language_configuration_for_file_name(
536 &self,
537 path: &Path,
538 ) -> Result<Option<(Language, &LanguageConfiguration)>> {
539 let configuration_ids = path
542 .file_name()
543 .and_then(|n| n.to_str())
544 .and_then(|file_name| self.language_configuration_ids_by_file_type.get(file_name))
545 .or_else(|| {
546 let mut path = path.to_owned();
547 let mut extensions = Vec::with_capacity(2);
548 while let Some(extension) = path.extension() {
549 extensions.push(extension.to_str()?.to_string());
550 path = PathBuf::from(path.file_stem()?.to_os_string());
551 }
552 extensions.reverse();
553 self.language_configuration_ids_by_file_type
554 .get(&extensions.join("."))
555 });
556
557 if let Some(configuration_ids) = configuration_ids {
558 if !configuration_ids.is_empty() {
559 let configuration = if configuration_ids.len() == 1 {
560 &self.language_configurations[configuration_ids[0]]
561 }
562 else {
565 let file_contents = fs::read(path)
566 .with_context(|| format!("Failed to read path {}", path.display()))?;
567 let file_contents = String::from_utf8_lossy(&file_contents);
568 let mut best_score = -2isize;
569 let mut best_configuration_id = None;
570 for configuration_id in configuration_ids {
571 let config = &self.language_configurations[*configuration_id];
572
573 let score;
576 if let Some(content_regex) = &config.content_regex {
577 if let Some(mat) = content_regex.find(&file_contents) {
578 score = (mat.end() - mat.start()) as isize;
579 }
580 else {
585 score = -1;
586 }
587 } else {
588 score = 0;
589 }
590 if score > best_score {
591 best_configuration_id = Some(*configuration_id);
592 best_score = score;
593 }
594 }
595
596 &self.language_configurations[best_configuration_id.unwrap()]
597 };
598
599 let language = self.language_for_id(configuration.language_id)?;
600 return Ok(Some((language, configuration)));
601 }
602 }
603
604 Ok(None)
605 }
606
607 pub fn language_configuration_for_injection_string(
608 &self,
609 string: &str,
610 ) -> Result<Option<(Language, &LanguageConfiguration)>> {
611 let mut best_match_length = 0;
612 let mut best_match_position = None;
613 for (i, configuration) in self.language_configurations.iter().enumerate() {
614 if let Some(injection_regex) = &configuration.injection_regex {
615 if let Some(mat) = injection_regex.find(string) {
616 let length = mat.end() - mat.start();
617 if length > best_match_length {
618 best_match_position = Some(i);
619 best_match_length = length;
620 }
621 }
622 }
623 }
624
625 if let Some(i) = best_match_position {
626 let configuration = &self.language_configurations[i];
627 let language = self.language_for_id(configuration.language_id)?;
628 Ok(Some((language, configuration)))
629 } else {
630 Ok(None)
631 }
632 }
633
634 pub fn language_for_configuration(
635 &self,
636 configuration: &LanguageConfiguration,
637 ) -> Result<Language> {
638 self.language_for_id(configuration.language_id)
639 }
640
641 fn language_for_id(&self, id: usize) -> Result<Language> {
642 let (path, language, externals) = &self.languages_by_id[id];
643 language
644 .get_or_try_init(|| {
645 let src_path = path.join("src");
646 self.load_language_at_path(CompileConfig::new(
647 &src_path,
648 externals.as_deref(),
649 None,
650 ))
651 })
652 .cloned()
653 }
654
655 pub fn compile_parser_at_path(
656 &self,
657 grammar_path: &Path,
658 output_path: PathBuf,
659 flags: &[&str],
660 ) -> Result<()> {
661 let src_path = grammar_path.join("src");
662 let mut config = CompileConfig::new(&src_path, None, Some(output_path));
663 config.flags = flags;
664 self.load_language_at_path(config).map(|_| ())
665 }
666
667 pub fn load_language_at_path(&self, mut config: CompileConfig) -> Result<Language> {
668 let grammar_path = config.src_path.join("grammar.json");
669 config.name = Self::grammar_json_name(&grammar_path)?;
670 self.load_language_at_path_with_name(config)
671 }
672
673 pub fn load_language_at_path_with_name(&self, mut config: CompileConfig) -> Result<Language> {
674 let mut lib_name = config.name.to_string();
675 let language_fn_name = format!(
676 "tree_sitter_{}",
677 replace_dashes_with_underscores(&config.name)
678 );
679 if self.debug_build {
680 lib_name.push_str(".debug._");
681 }
682
683 if self.sanitize_build {
684 lib_name.push_str(".sanitize._");
685 config.sanitize = true;
686 }
687
688 if config.output_path.is_none() {
689 fs::create_dir_all(&self.parser_lib_path)?;
690 }
691
692 let mut recompile = self.force_rebuild || config.output_path.is_some(); let output_path = config.output_path.unwrap_or_else(|| {
695 let mut path = self.parser_lib_path.join(lib_name);
696 path.set_extension(env::consts::DLL_EXTENSION);
697 #[cfg(feature = "wasm")]
698 if self.wasm_store.lock().unwrap().is_some() {
699 path.set_extension("wasm");
700 }
701 path
702 });
703 config.output_path = Some(output_path.clone());
704
705 let parser_path = config.src_path.join("parser.c");
706 config.scanner_path = self.get_scanner_path(config.src_path);
707
708 let mut paths_to_check = vec![parser_path];
709
710 if let Some(scanner_path) = config.scanner_path.as_ref() {
711 paths_to_check.push(scanner_path.clone());
712 }
713
714 paths_to_check.extend(
715 config
716 .external_files
717 .unwrap_or_default()
718 .iter()
719 .map(|p| config.src_path.join(p)),
720 );
721
722 if !recompile {
723 recompile = needs_recompile(&output_path, &paths_to_check)
724 .with_context(|| "Failed to compare source and binary timestamps")?;
725 }
726
727 #[cfg(feature = "wasm")]
728 if let Some(wasm_store) = self.wasm_store.lock().unwrap().as_mut() {
729 if recompile {
730 self.compile_parser_to_wasm(
731 &config.name,
732 None,
733 config.src_path,
734 config
735 .scanner_path
736 .as_ref()
737 .and_then(|p| p.strip_prefix(config.src_path).ok()),
738 &output_path,
739 false,
740 )?;
741 }
742
743 let wasm_bytes = fs::read(&output_path)?;
744 return Ok(wasm_store.load_language(&config.name, &wasm_bytes)?);
745 }
746
747 let lock_path = if env::var("CROSS_RUNNER").is_ok() {
748 tempfile::tempdir()
749 .unwrap()
750 .path()
751 .join("tree-sitter")
752 .join("lock")
753 .join(format!("{}.lock", config.name))
754 } else {
755 etcetera::choose_base_strategy()?
756 .cache_dir()
757 .join("tree-sitter")
758 .join("lock")
759 .join(format!("{}.lock", config.name))
760 };
761
762 if let Ok(lock_file) = fs::OpenOptions::new().write(true).open(&lock_path) {
763 recompile = false;
764 if lock_file.try_lock_exclusive().is_err() {
765 lock_file.lock_exclusive()?;
768 recompile = false;
769 } else {
770 let time = lock_file.metadata()?.modified()?.elapsed()?.as_secs();
774 if time > 30 {
775 fs::remove_file(&lock_path)?;
776 recompile = true;
777 }
778 }
779 }
780
781 if recompile {
782 fs::create_dir_all(lock_path.parent().unwrap()).with_context(|| {
783 format!(
784 "Failed to create directory {}",
785 lock_path.parent().unwrap().display()
786 )
787 })?;
788 let lock_file = fs::OpenOptions::new()
789 .create(true)
790 .truncate(true)
791 .write(true)
792 .open(&lock_path)?;
793 lock_file.lock_exclusive()?;
794
795 self.compile_parser_to_dylib(&config, &lock_file, &lock_path)?;
796
797 if config.scanner_path.is_some() {
798 self.check_external_scanner(&config.name, &output_path)?;
799 }
800 }
801
802 let library = unsafe { Library::new(&output_path) }
803 .with_context(|| format!("Error opening dynamic library {}", output_path.display()))?;
804 let language = unsafe {
805 let language_fn = library
806 .get::<Symbol<unsafe extern "C" fn() -> Language>>(language_fn_name.as_bytes())
807 .with_context(|| format!("Failed to load symbol {language_fn_name}"))?;
808 language_fn()
809 };
810 mem::forget(library);
811 Ok(language)
812 }
813
814 fn compile_parser_to_dylib(
815 &self,
816 config: &CompileConfig,
817 lock_file: &fs::File,
818 lock_path: &Path,
819 ) -> Result<(), Error> {
820 let mut cc_config = cc::Build::new();
821 cc_config
822 .cargo_metadata(false)
823 .cargo_warnings(false)
824 .target(BUILD_TARGET)
825 .host(BUILD_HOST)
826 .debug(self.debug_build)
827 .file(&config.parser_path)
828 .includes(&config.header_paths)
829 .std("c11");
830
831 if let Some(scanner_path) = config.scanner_path.as_ref() {
832 cc_config.file(scanner_path);
833 }
834
835 if self.debug_build {
836 cc_config.opt_level(0).extra_warnings(true);
837 } else {
838 cc_config.opt_level(2).extra_warnings(false);
839 }
840
841 for flag in config.flags {
842 cc_config.define(flag, None);
843 }
844
845 let compiler = cc_config.get_compiler();
846 let mut command = Command::new(compiler.path());
847 command.args(compiler.args());
848 for (key, value) in compiler.env() {
849 command.env(key, value);
850 }
851
852 let output_path = config.output_path.as_ref().unwrap();
853
854 if compiler.is_like_msvc() {
855 let out = format!("-out:{}", output_path.to_str().unwrap());
856 command.arg(if self.debug_build { "-LDd" } else { "-LD" });
857 command.arg("-utf-8");
858 command.args(cc_config.get_files());
859 command.arg("-link").arg(out);
860 } else {
861 command.arg("-Werror=implicit-function-declaration");
862 if cfg!(any(target_os = "macos", target_os = "ios")) {
863 command.arg("-dynamiclib");
864 command.arg("-UTREE_SITTER_REUSE_ALLOCATOR");
866 } else {
867 command.arg("-shared");
868 }
869 command.args(cc_config.get_files());
870 command.arg("-o").arg(output_path);
871 }
872
873 let output = command.output().with_context(|| {
874 format!("Failed to execute the C compiler with the following command:\n{command:?}")
875 })?;
876
877 FileExt::unlock(lock_file)?;
878 fs::remove_file(lock_path)?;
879
880 if output.status.success() {
881 Ok(())
882 } else {
883 Err(anyhow!(
884 "Parser compilation failed.\nStdout: {}\nStderr: {}",
885 String::from_utf8_lossy(&output.stdout),
886 String::from_utf8_lossy(&output.stderr)
887 ))
888 }
889 }
890
891 #[cfg(unix)]
892 fn check_external_scanner(&self, name: &str, library_path: &Path) -> Result<()> {
893 let prefix = if cfg!(any(target_os = "macos", target_os = "ios")) {
894 "_"
895 } else {
896 ""
897 };
898 let mut must_have = vec![
899 format!("{prefix}tree_sitter_{name}_external_scanner_create"),
900 format!("{prefix}tree_sitter_{name}_external_scanner_destroy"),
901 format!("{prefix}tree_sitter_{name}_external_scanner_serialize"),
902 format!("{prefix}tree_sitter_{name}_external_scanner_deserialize"),
903 format!("{prefix}tree_sitter_{name}_external_scanner_scan"),
904 ];
905
906 let command = Command::new("nm")
907 .arg("-W")
908 .arg("-U")
909 .arg(library_path)
910 .output();
911 if let Ok(output) = command {
912 if output.status.success() {
913 let mut found_non_static = false;
914 for line in String::from_utf8_lossy(&output.stdout).lines() {
915 if line.contains(" T ") {
916 if let Some(function_name) =
917 line.split_whitespace().collect::<Vec<_>>().get(2)
918 {
919 if !line.contains("tree_sitter_") {
920 if !found_non_static {
921 found_non_static = true;
922 eprintln!("Warning: Found non-static non-tree-sitter functions in the external scannner");
923 }
924 eprintln!(" `{function_name}`");
925 } else {
926 must_have.retain(|f| f != function_name);
927 }
928 }
929 }
930 }
931 if found_non_static {
932 eprintln!("Consider making these functions static, they can cause conflicts when another tree-sitter project uses the same function name");
933 }
934
935 if !must_have.is_empty() {
936 let missing = must_have
937 .iter()
938 .map(|f| format!(" `{f}`"))
939 .collect::<Vec<_>>()
940 .join("\n");
941
942 return Err(anyhow!(format!(
943 indoc! {"
944 Missing required functions in the external scanner, parsing won't work without these!
945
946 {}
947
948 You can read more about this at https://tree-sitter.github.io/tree-sitter/creating-parsers/4-external-scanners
949 "},
950 missing,
951 )));
952 }
953 }
954 }
955
956 Ok(())
957 }
958
959 #[cfg(windows)]
960 fn check_external_scanner(&self, _name: &str, _library_path: &Path) -> Result<()> {
961 Ok(())
972 }
973
974 pub fn compile_parser_to_wasm(
975 &self,
976 language_name: &str,
977 root_path: Option<&Path>,
978 src_path: &Path,
979 scanner_filename: Option<&Path>,
980 output_path: &Path,
981 force_docker: bool,
982 ) -> Result<(), Error> {
983 #[derive(PartialEq, Eq)]
984 enum EmccSource {
985 Native,
986 Docker,
987 Podman,
988 }
989
990 let root_path = root_path.unwrap_or(src_path);
991 let emcc_name = if cfg!(windows) { "emcc.bat" } else { "emcc" };
992
993 let source = if !force_docker && Command::new(emcc_name).output().is_ok() {
995 EmccSource::Native
996 } else if Command::new("docker")
997 .output()
998 .is_ok_and(|out| out.status.success())
999 {
1000 EmccSource::Docker
1001 } else if Command::new("podman")
1002 .arg("--version")
1003 .output()
1004 .is_ok_and(|out| out.status.success())
1005 {
1006 EmccSource::Podman
1007 } else {
1008 return Err(anyhow!(
1009 "You must have either emcc, docker, or podman on your PATH to run this command"
1010 ));
1011 };
1012
1013 let mut command = match source {
1014 EmccSource::Native => {
1015 let mut command = Command::new(emcc_name);
1016 command.current_dir(src_path);
1017 command
1018 }
1019
1020 EmccSource::Docker | EmccSource::Podman => {
1021 let mut command = match source {
1022 EmccSource::Docker => Command::new("docker"),
1023 EmccSource::Podman => Command::new("podman"),
1024 EmccSource::Native => unreachable!(),
1025 };
1026 command.args(["run", "--rm"]);
1027
1028 let workdir = if root_path == src_path {
1030 PathBuf::from("/src")
1031 } else {
1032 let mut path = PathBuf::from("/src");
1033 path.push(src_path.strip_prefix(root_path).unwrap());
1034 path
1035 };
1036 command.args(["--workdir", &workdir.to_slash_lossy()]);
1037
1038 let mut volume_string = OsString::from(&root_path);
1040 volume_string.push(":/src:Z");
1041 command.args([OsStr::new("--volume"), &volume_string]);
1042
1043 command.env("PODMAN_USERNS", "keep-id");
1050
1051 #[cfg(unix)]
1054 {
1055 #[link(name = "c")]
1056 extern "C" {
1057 fn getuid() -> u32;
1058 }
1059 if source == EmccSource::Docker {
1061 let user_id = unsafe { getuid() };
1062 command.args(["--user", &user_id.to_string()]);
1063 }
1064 };
1065
1066 command.args([EMSCRIPTEN_TAG, "emcc"]);
1068 command
1069 }
1070 };
1071
1072 let output_name = "output.wasm";
1073
1074 command.args([
1075 "-o",
1076 output_name,
1077 "-Os",
1078 "-s",
1079 "WASM=1",
1080 "-s",
1081 "SIDE_MODULE=2",
1082 "-s",
1083 "TOTAL_MEMORY=33554432",
1084 "-s",
1085 "NODEJS_CATCH_EXIT=0",
1086 "-s",
1087 &format!("EXPORTED_FUNCTIONS=[\"_tree_sitter_{language_name}\"]"),
1088 "-fno-exceptions",
1089 "-fvisibility=hidden",
1090 "-I",
1091 ".",
1092 ]);
1093
1094 if let Some(scanner_filename) = scanner_filename {
1095 command.arg(scanner_filename);
1096 }
1097
1098 command.arg("parser.c");
1099 let status = command
1100 .spawn()
1101 .with_context(|| "Failed to run emcc command")?
1102 .wait()?;
1103 if !status.success() {
1104 return Err(anyhow!("emcc command failed"));
1105 }
1106
1107 fs::rename(src_path.join(output_name), output_path)
1108 .context("failed to rename wasm output file")?;
1109
1110 Ok(())
1111 }
1112
1113 #[must_use]
1114 #[cfg(feature = "tree-sitter-highlight")]
1115 pub fn highlight_config_for_injection_string<'a>(
1116 &'a self,
1117 string: &str,
1118 ) -> Option<&'a HighlightConfiguration> {
1119 match self.language_configuration_for_injection_string(string) {
1120 Err(e) => {
1121 eprintln!("Failed to load language for injection string '{string}': {e}",);
1122 None
1123 }
1124 Ok(None) => None,
1125 Ok(Some((language, configuration))) => {
1126 match configuration.highlight_config(language, None) {
1127 Err(e) => {
1128 eprintln!(
1129 "Failed to load property sheet for injection string '{string}': {e}",
1130 );
1131 None
1132 }
1133 Ok(None) => None,
1134 Ok(Some(config)) => Some(config),
1135 }
1136 }
1137 }
1138 }
1139
1140 #[must_use]
1141 pub fn get_language_configuration_in_current_path(&self) -> Option<&LanguageConfiguration> {
1142 self.language_configuration_in_current_path
1143 .map(|i| &self.language_configurations[i])
1144 }
1145
1146 pub fn find_language_configurations_at_path(
1147 &mut self,
1148 parser_path: &Path,
1149 set_current_path_config: bool,
1150 ) -> Result<&[LanguageConfiguration]> {
1151 let initial_language_configuration_count = self.language_configurations.len();
1152
1153 let ts_json = TreeSitterJSON::from_file(parser_path);
1154 if let Ok(config) = ts_json {
1155 let language_count = self.languages_by_id.len();
1156 for grammar in config.grammars {
1157 let language_path = parser_path.join(grammar.path.unwrap_or(PathBuf::from(".")));
1161
1162 let mut language_id = None;
1165 for (id, (path, _, _)) in
1166 self.languages_by_id.iter().enumerate().skip(language_count)
1167 {
1168 if language_path == *path {
1169 language_id = Some(id);
1170 }
1171 }
1172
1173 let language_id = if let Some(language_id) = language_id {
1175 language_id
1176 } else {
1177 self.languages_by_id.push((
1178 language_path,
1179 OnceCell::new(),
1180 grammar.external_files.clone().into_vec().map(|files| {
1181 files.into_iter()
1182 .map(|path| {
1183 let path = parser_path.join(path);
1184 if path.starts_with(parser_path) {
1186 Ok(path)
1187 } else {
1188 Err(anyhow!("External file path {path:?} is outside of parser directory {parser_path:?}"))
1189 }
1190 })
1191 .collect::<Result<Vec<_>>>()
1192 }).transpose()?,
1193 ));
1194 self.languages_by_id.len() - 1
1195 };
1196
1197 let configuration = LanguageConfiguration {
1198 root_path: parser_path.to_path_buf(),
1199 language_name: grammar.name,
1200 scope: Some(grammar.scope),
1201 language_id,
1202 file_types: grammar.file_types.unwrap_or_default(),
1203 content_regex: Self::regex(grammar.content_regex.as_deref()),
1204 first_line_regex: Self::regex(grammar.first_line_regex.as_deref()),
1205 injection_regex: Self::regex(grammar.injection_regex.as_deref()),
1206 injections_filenames: grammar.injections.into_vec(),
1207 locals_filenames: grammar.locals.into_vec(),
1208 tags_filenames: grammar.tags.into_vec(),
1209 highlights_filenames: grammar.highlights.into_vec(),
1210 #[cfg(feature = "tree-sitter-highlight")]
1211 highlight_config: OnceCell::new(),
1212 #[cfg(feature = "tree-sitter-tags")]
1213 tags_config: OnceCell::new(),
1214 #[cfg(feature = "tree-sitter-highlight")]
1215 highlight_names: &self.highlight_names,
1216 #[cfg(feature = "tree-sitter-highlight")]
1217 use_all_highlight_names: self.use_all_highlight_names,
1218 _phantom: PhantomData,
1219 };
1220
1221 for file_type in &configuration.file_types {
1222 self.language_configuration_ids_by_file_type
1223 .entry(file_type.to_string())
1224 .or_default()
1225 .push(self.language_configurations.len());
1226 }
1227 if let Some(first_line_regex) = &configuration.first_line_regex {
1228 self.language_configuration_ids_by_first_line_regex
1229 .entry(first_line_regex.to_string())
1230 .or_default()
1231 .push(self.language_configurations.len());
1232 }
1233
1234 self.language_configurations.push(unsafe {
1235 mem::transmute::<LanguageConfiguration<'_>, LanguageConfiguration<'static>>(
1236 configuration,
1237 )
1238 });
1239
1240 if set_current_path_config && self.language_configuration_in_current_path.is_none()
1241 {
1242 self.language_configuration_in_current_path =
1243 Some(self.language_configurations.len() - 1);
1244 }
1245 }
1246 } else if let Err(e) = ts_json {
1247 match e.downcast_ref::<std::io::Error>() {
1248 Some(e) if e.kind() == std::io::ErrorKind::NotFound => {}
1250 _ => {
1251 eprintln!(
1252 "Warning: Failed to parse {} -- {e}",
1253 parser_path.join("tree-sitter.json").display()
1254 );
1255 }
1256 }
1257 }
1258
1259 if self.language_configurations.len() == initial_language_configuration_count
1263 && parser_path.join("src").join("grammar.json").exists()
1264 {
1265 let grammar_path = parser_path.join("src").join("grammar.json");
1266 let language_name = Self::grammar_json_name(&grammar_path)?;
1267 let configuration = LanguageConfiguration {
1268 root_path: parser_path.to_owned(),
1269 language_name,
1270 language_id: self.languages_by_id.len(),
1271 file_types: Vec::new(),
1272 scope: None,
1273 content_regex: None,
1274 first_line_regex: None,
1275 injection_regex: None,
1276 injections_filenames: None,
1277 locals_filenames: None,
1278 highlights_filenames: None,
1279 tags_filenames: None,
1280 #[cfg(feature = "tree-sitter-highlight")]
1281 highlight_config: OnceCell::new(),
1282 #[cfg(feature = "tree-sitter-tags")]
1283 tags_config: OnceCell::new(),
1284 #[cfg(feature = "tree-sitter-highlight")]
1285 highlight_names: &self.highlight_names,
1286 #[cfg(feature = "tree-sitter-highlight")]
1287 use_all_highlight_names: self.use_all_highlight_names,
1288 _phantom: PhantomData,
1289 };
1290 self.language_configurations.push(unsafe {
1291 mem::transmute::<LanguageConfiguration<'_>, LanguageConfiguration<'static>>(
1292 configuration,
1293 )
1294 });
1295 self.languages_by_id
1296 .push((parser_path.to_owned(), OnceCell::new(), None));
1297 }
1298
1299 Ok(&self.language_configurations[initial_language_configuration_count..])
1300 }
1301
1302 fn regex(pattern: Option<&str>) -> Option<Regex> {
1303 pattern.and_then(|r| RegexBuilder::new(r).multi_line(true).build().ok())
1304 }
1305
1306 fn grammar_json_name(grammar_path: &Path) -> Result<String> {
1307 let file = fs::File::open(grammar_path).with_context(|| {
1308 format!("Failed to open grammar.json at {}", grammar_path.display())
1309 })?;
1310
1311 let first_three_lines = BufReader::new(file)
1312 .lines()
1313 .take(3)
1314 .collect::<Result<Vec<_>, _>>()
1315 .with_context(|| {
1316 format!(
1317 "Failed to read the first three lines of grammar.json at {}",
1318 grammar_path.display()
1319 )
1320 })?
1321 .join("\n");
1322
1323 let name = GRAMMAR_NAME_REGEX
1324 .captures(&first_three_lines)
1325 .and_then(|c| c.get(1))
1326 .ok_or_else(|| {
1327 anyhow!(
1328 "Failed to parse the language name from grammar.json at {}",
1329 grammar_path.display()
1330 )
1331 })?;
1332
1333 Ok(name.as_str().to_string())
1334 }
1335
1336 pub fn select_language(
1337 &mut self,
1338 path: &Path,
1339 current_dir: &Path,
1340 scope: Option<&str>,
1341 ) -> Result<Language> {
1342 if let Some(scope) = scope {
1343 if let Some(config) = self
1344 .language_configuration_for_scope(scope)
1345 .with_context(|| format!("Failed to load language for scope '{scope}'"))?
1346 {
1347 Ok(config.0)
1348 } else {
1349 Err(anyhow!("Unknown scope '{scope}'"))
1350 }
1351 } else if let Some((lang, _)) = self
1352 .language_configuration_for_file_name(path)
1353 .with_context(|| {
1354 format!(
1355 "Failed to load language for file name {}",
1356 path.file_name().unwrap().to_string_lossy()
1357 )
1358 })?
1359 {
1360 Ok(lang)
1361 } else if let Some(id) = self.language_configuration_in_current_path {
1362 Ok(self.language_for_id(self.language_configurations[id].language_id)?)
1363 } else if let Some(lang) = self
1364 .languages_at_path(current_dir)
1365 .with_context(|| "Failed to load language in current directory")?
1366 .first()
1367 .cloned()
1368 {
1369 Ok(lang.0)
1370 } else if let Some(lang) = self.language_configuration_for_first_line_regex(path)? {
1371 Ok(lang.0)
1372 } else {
1373 Err(anyhow!("No language found"))
1374 }
1375 }
1376
1377 pub fn debug_build(&mut self, flag: bool) {
1378 self.debug_build = flag;
1379 }
1380
1381 pub fn sanitize_build(&mut self, flag: bool) {
1382 self.sanitize_build = flag;
1383 }
1384
1385 pub fn force_rebuild(&mut self, rebuild: bool) {
1386 self.force_rebuild = rebuild;
1387 }
1388
1389 #[cfg(feature = "wasm")]
1390 #[cfg_attr(docsrs, doc(cfg(feature = "wasm")))]
1391 pub fn use_wasm(&mut self, engine: &tree_sitter::wasmtime::Engine) {
1392 *self.wasm_store.lock().unwrap() = Some(tree_sitter::WasmStore::new(engine).unwrap());
1393 }
1394
1395 #[must_use]
1396 pub fn get_scanner_path(&self, src_path: &Path) -> Option<PathBuf> {
1397 let path = src_path.join("scanner.c");
1398 path.exists().then_some(path)
1399 }
1400}
1401
1402impl LanguageConfiguration<'_> {
1403 #[cfg(feature = "tree-sitter-highlight")]
1404 pub fn highlight_config(
1405 &self,
1406 language: Language,
1407 paths: Option<&[PathBuf]>,
1408 ) -> Result<Option<&HighlightConfiguration>> {
1409 let (highlights_filenames, injections_filenames, locals_filenames) = match paths {
1410 Some(paths) => (
1411 Some(
1412 paths
1413 .iter()
1414 .filter(|p| p.ends_with("highlights.scm"))
1415 .cloned()
1416 .collect::<Vec<_>>(),
1417 ),
1418 Some(
1419 paths
1420 .iter()
1421 .filter(|p| p.ends_with("tags.scm"))
1422 .cloned()
1423 .collect::<Vec<_>>(),
1424 ),
1425 Some(
1426 paths
1427 .iter()
1428 .filter(|p| p.ends_with("locals.scm"))
1429 .cloned()
1430 .collect::<Vec<_>>(),
1431 ),
1432 ),
1433 None => (None, None, None),
1434 };
1435 self.highlight_config
1436 .get_or_try_init(|| {
1437 let (highlights_query, highlight_ranges) = self.read_queries(
1438 if highlights_filenames.is_some() {
1439 highlights_filenames.as_deref()
1440 } else {
1441 self.highlights_filenames.as_deref()
1442 },
1443 "highlights.scm",
1444 )?;
1445 let (injections_query, injection_ranges) = self.read_queries(
1446 if injections_filenames.is_some() {
1447 injections_filenames.as_deref()
1448 } else {
1449 self.injections_filenames.as_deref()
1450 },
1451 "injections.scm",
1452 )?;
1453 let (locals_query, locals_ranges) = self.read_queries(
1454 if locals_filenames.is_some() {
1455 locals_filenames.as_deref()
1456 } else {
1457 self.locals_filenames.as_deref()
1458 },
1459 "locals.scm",
1460 )?;
1461
1462 if highlights_query.is_empty() {
1463 Ok(None)
1464 } else {
1465 let mut result = HighlightConfiguration::new(
1466 language,
1467 &self.language_name,
1468 &highlights_query,
1469 &injections_query,
1470 &locals_query,
1471 )
1472 .map_err(|error| match error.kind {
1473 QueryErrorKind::Language => Error::from(error),
1474 _ => {
1475 if error.offset < injections_query.len() {
1476 Self::include_path_in_query_error(
1477 error,
1478 &injection_ranges,
1479 &injections_query,
1480 0,
1481 )
1482 } else if error.offset < injections_query.len() + locals_query.len() {
1483 Self::include_path_in_query_error(
1484 error,
1485 &locals_ranges,
1486 &locals_query,
1487 injections_query.len(),
1488 )
1489 } else {
1490 Self::include_path_in_query_error(
1491 error,
1492 &highlight_ranges,
1493 &highlights_query,
1494 injections_query.len() + locals_query.len(),
1495 )
1496 }
1497 }
1498 })?;
1499 let mut all_highlight_names = self.highlight_names.lock().unwrap();
1500 if self.use_all_highlight_names {
1501 for capture_name in result.query.capture_names() {
1502 if !all_highlight_names.iter().any(|x| x == capture_name) {
1503 all_highlight_names.push((*capture_name).to_string());
1504 }
1505 }
1506 }
1507 result.configure(all_highlight_names.as_slice());
1508 drop(all_highlight_names);
1509 Ok(Some(result))
1510 }
1511 })
1512 .map(Option::as_ref)
1513 }
1514
1515 #[cfg(feature = "tree-sitter-tags")]
1516 pub fn tags_config(&self, language: Language) -> Result<Option<&TagsConfiguration>> {
1517 self.tags_config
1518 .get_or_try_init(|| {
1519 let (tags_query, tags_ranges) =
1520 self.read_queries(self.tags_filenames.as_deref(), "tags.scm")?;
1521 let (locals_query, locals_ranges) =
1522 self.read_queries(self.locals_filenames.as_deref(), "locals.scm")?;
1523 if tags_query.is_empty() {
1524 Ok(None)
1525 } else {
1526 TagsConfiguration::new(language, &tags_query, &locals_query)
1527 .map(Some)
1528 .map_err(|error| {
1529 if let TagsError::Query(error) = error {
1530 if error.offset < locals_query.len() {
1531 Self::include_path_in_query_error(
1532 error,
1533 &locals_ranges,
1534 &locals_query,
1535 0,
1536 )
1537 } else {
1538 Self::include_path_in_query_error(
1539 error,
1540 &tags_ranges,
1541 &tags_query,
1542 locals_query.len(),
1543 )
1544 }
1545 } else {
1546 error.into()
1547 }
1548 })
1549 }
1550 })
1551 .map(Option::as_ref)
1552 }
1553
1554 #[cfg(any(feature = "tree-sitter-highlight", feature = "tree-sitter-tags"))]
1555 fn include_path_in_query_error(
1556 mut error: QueryError,
1557 ranges: &[(PathBuf, Range<usize>)],
1558 source: &str,
1559 start_offset: usize,
1560 ) -> Error {
1561 let offset_within_section = error.offset - start_offset;
1562 let (path, range) = ranges
1563 .iter()
1564 .find(|(_, range)| range.contains(&offset_within_section))
1565 .unwrap_or_else(|| ranges.last().unwrap());
1566 error.offset = offset_within_section - range.start;
1567 error.row = source[range.start..offset_within_section]
1568 .matches('\n')
1569 .count();
1570 Error::from(error).context(format!("Error in query file {}", path.display()))
1571 }
1572
1573 #[allow(clippy::type_complexity)]
1574 #[cfg(any(feature = "tree-sitter-highlight", feature = "tree-sitter-tags"))]
1575 fn read_queries(
1576 &self,
1577 paths: Option<&[PathBuf]>,
1578 default_path: &str,
1579 ) -> Result<(String, Vec<(PathBuf, Range<usize>)>)> {
1580 let mut query = String::new();
1581 let mut path_ranges = Vec::new();
1582 if let Some(paths) = paths {
1583 for path in paths {
1584 let abs_path = self.root_path.join(path);
1585 let prev_query_len = query.len();
1586 query += &fs::read_to_string(&abs_path)
1587 .with_context(|| format!("Failed to read query file {}", path.display()))?;
1588 path_ranges.push((path.clone(), prev_query_len..query.len()));
1589 }
1590 } else {
1591 if default_path == "highlights.scm" || default_path == "tags.scm" {
1593 eprintln!(
1594 indoc! {"
1595 Warning: you should add a `{}` entry pointing to the highlights path in the `tree-sitter` object in the grammar's tree-sitter.json file.
1596 See more here: https://tree-sitter.github.io/tree-sitter/3-syntax-highlighting#query-paths
1597 "},
1598 default_path.replace(".scm", "")
1599 );
1600 }
1601 let queries_path = self.root_path.join("queries");
1602 let path = queries_path.join(default_path);
1603 if path.exists() {
1604 query = fs::read_to_string(&path)
1605 .with_context(|| format!("Failed to read query file {}", path.display()))?;
1606 path_ranges.push((PathBuf::from(default_path), 0..query.len()));
1607 }
1608 }
1609
1610 Ok((query, path_ranges))
1611 }
1612}
1613
1614fn needs_recompile(lib_path: &Path, paths_to_check: &[PathBuf]) -> Result<bool> {
1615 if !lib_path.exists() {
1616 return Ok(true);
1617 }
1618 let lib_mtime = mtime(lib_path)
1619 .with_context(|| format!("Failed to read mtime of {}", lib_path.display()))?;
1620 for path in paths_to_check {
1621 if mtime(path)? > lib_mtime {
1622 return Ok(true);
1623 }
1624 }
1625 Ok(false)
1626}
1627
1628fn mtime(path: &Path) -> Result<SystemTime> {
1629 Ok(fs::metadata(path)?.modified()?)
1630}
1631
1632fn replace_dashes_with_underscores(name: &str) -> String {
1633 let mut result = String::with_capacity(name.len());
1634 for c in name.chars() {
1635 if c == '-' {
1636 result.push('_');
1637 } else {
1638 result.push(c);
1639 }
1640 }
1641 result
1642}