#![doc = include_str!("../README.md")]
#[cfg(any(feature = "tree-sitter-highlight", feature = "tree-sitter-tags"))]
use std::ops::Range;
#[cfg(feature = "tree-sitter-highlight")]
use std::sync::Mutex;
use std::{
collections::HashMap,
env,
ffi::{OsStr, OsString},
fs,
io::{BufRead, BufReader},
mem,
path::{Path, PathBuf},
process::Command,
time::SystemTime,
};
#[cfg(any(feature = "tree-sitter-highlight", feature = "tree-sitter-tags"))]
use anyhow::Error;
use anyhow::{anyhow, Context, Result};
use fs4::FileExt;
use indoc::indoc;
use libloading::{Library, Symbol};
use once_cell::unsync::OnceCell;
use regex::{Regex, RegexBuilder};
use serde::{Deserialize, Deserializer, Serialize};
use tree_sitter::Language;
#[cfg(any(feature = "tree-sitter-highlight", feature = "tree-sitter-tags"))]
use tree_sitter::QueryError;
#[cfg(feature = "tree-sitter-highlight")]
use tree_sitter::QueryErrorKind;
#[cfg(feature = "tree-sitter-highlight")]
use tree_sitter_highlight::HighlightConfiguration;
#[cfg(feature = "tree-sitter-tags")]
use tree_sitter_tags::{Error as TagsError, TagsConfiguration};
pub const EMSCRIPTEN_TAG: &str = concat!("docker.io/emscripten/emsdk:", env!("EMSCRIPTEN_VERSION"));
#[derive(Default, Deserialize, Serialize)]
pub struct Config {
#[serde(default)]
#[serde(
rename = "parser-directories",
deserialize_with = "deserialize_parser_directories"
)]
pub parser_directories: Vec<PathBuf>,
}
fn deserialize_parser_directories<'de, D>(deserializer: D) -> Result<Vec<PathBuf>, D::Error>
where
D: Deserializer<'de>,
{
let paths = Vec::<PathBuf>::deserialize(deserializer)?;
let Some(home) = dirs::home_dir() else {
return Ok(paths);
};
let standardized = paths
.into_iter()
.map(|path| standardize_path(path, &home))
.collect();
Ok(standardized)
}
fn standardize_path(path: PathBuf, home: &Path) -> PathBuf {
if let Ok(p) = path.strip_prefix("~") {
return home.join(p);
}
if let Ok(p) = path.strip_prefix("$HOME") {
return home.join(p);
}
path
}
impl Config {
#[must_use]
pub fn initial() -> Self {
let home_dir = dirs::home_dir().expect("Cannot determine home directory");
Self {
parser_directories: vec![
home_dir.join("github"),
home_dir.join("src"),
home_dir.join("source"),
home_dir.join("projects"),
home_dir.join("dev"),
home_dir.join("git"),
],
}
}
}
const BUILD_TARGET: &str = env!("BUILD_TARGET");
const BUILD_HOST: &str = env!("BUILD_HOST");
pub struct LanguageConfiguration<'a> {
pub scope: Option<String>,
pub content_regex: Option<Regex>,
pub first_line_regex: Option<Regex>,
pub injection_regex: Option<Regex>,
pub file_types: Vec<String>,
pub root_path: PathBuf,
pub highlights_filenames: Option<Vec<String>>,
pub injections_filenames: Option<Vec<String>>,
pub locals_filenames: Option<Vec<String>>,
pub tags_filenames: Option<Vec<String>>,
pub language_name: String,
language_id: usize,
#[cfg(feature = "tree-sitter-highlight")]
highlight_config: OnceCell<Option<HighlightConfiguration>>,
#[cfg(feature = "tree-sitter-tags")]
tags_config: OnceCell<Option<TagsConfiguration>>,
#[cfg(feature = "tree-sitter-highlight")]
highlight_names: &'a Mutex<Vec<String>>,
#[cfg(feature = "tree-sitter-highlight")]
use_all_highlight_names: bool,
}
pub struct Loader {
pub parser_lib_path: PathBuf,
languages_by_id: Vec<(PathBuf, OnceCell<Language>, Option<Vec<PathBuf>>)>,
language_configurations: Vec<LanguageConfiguration<'static>>,
language_configuration_ids_by_file_type: HashMap<String, Vec<usize>>,
language_configuration_in_current_path: Option<usize>,
language_configuration_ids_by_first_line_regex: HashMap<String, Vec<usize>>,
#[cfg(feature = "tree-sitter-highlight")]
highlight_names: Box<Mutex<Vec<String>>>,
#[cfg(feature = "tree-sitter-highlight")]
use_all_highlight_names: bool,
debug_build: bool,
sanitize_build: bool,
#[cfg(feature = "wasm")]
wasm_store: Mutex<Option<tree_sitter::WasmStore>>,
}
pub struct CompileConfig<'a> {
pub src_path: &'a Path,
pub header_paths: Vec<&'a Path>,
pub parser_path: PathBuf,
pub scanner_path: Option<PathBuf>,
pub external_files: Option<&'a [PathBuf]>,
pub output_path: Option<PathBuf>,
pub flags: &'a [&'a str],
pub sanitize: bool,
pub name: String,
}
impl<'a> CompileConfig<'a> {
#[must_use]
pub fn new(
src_path: &'a Path,
externals: Option<&'a [PathBuf]>,
output_path: Option<PathBuf>,
) -> Self {
Self {
src_path,
header_paths: vec![src_path],
parser_path: src_path.join("parser.c"),
scanner_path: None,
external_files: externals,
output_path,
flags: &[],
sanitize: false,
name: String::new(),
}
}
}
unsafe impl Send for Loader {}
unsafe impl Sync for Loader {}
impl Loader {
pub fn new() -> Result<Self> {
let parser_lib_path = match env::var("TREE_SITTER_LIBDIR") {
Ok(path) => PathBuf::from(path),
_ => dirs::cache_dir()
.ok_or_else(|| anyhow!("Cannot determine cache directory"))?
.join("tree-sitter")
.join("lib"),
};
Ok(Self::with_parser_lib_path(parser_lib_path))
}
#[must_use]
pub fn with_parser_lib_path(parser_lib_path: PathBuf) -> Self {
Self {
parser_lib_path,
languages_by_id: Vec::new(),
language_configurations: Vec::new(),
language_configuration_ids_by_file_type: HashMap::new(),
language_configuration_in_current_path: None,
language_configuration_ids_by_first_line_regex: HashMap::new(),
#[cfg(feature = "tree-sitter-highlight")]
highlight_names: Box::new(Mutex::new(Vec::new())),
#[cfg(feature = "tree-sitter-highlight")]
use_all_highlight_names: true,
debug_build: false,
sanitize_build: false,
#[cfg(feature = "wasm")]
wasm_store: Mutex::default(),
}
}
#[cfg(feature = "tree-sitter-highlight")]
pub fn configure_highlights(&mut self, names: &[String]) {
self.use_all_highlight_names = false;
let mut highlights = self.highlight_names.lock().unwrap();
highlights.clear();
highlights.extend(names.iter().cloned());
}
#[must_use]
#[cfg(feature = "tree-sitter-highlight")]
pub fn highlight_names(&self) -> Vec<String> {
self.highlight_names.lock().unwrap().clone()
}
pub fn find_all_languages(&mut self, config: &Config) -> Result<()> {
if config.parser_directories.is_empty() {
eprintln!("Warning: You have not configured any parser directories!");
eprintln!("Please run `tree-sitter init-config` and edit the resulting");
eprintln!("configuration file to indicate where we should look for");
eprintln!("language grammars.\n");
}
for parser_container_dir in &config.parser_directories {
if let Ok(entries) = fs::read_dir(parser_container_dir) {
for entry in entries {
let entry = entry?;
if let Some(parser_dir_name) = entry.file_name().to_str() {
if parser_dir_name.starts_with("tree-sitter-") {
self.find_language_configurations_at_path(
&parser_container_dir.join(parser_dir_name),
false,
)
.ok();
}
}
}
}
}
Ok(())
}
pub fn languages_at_path(&mut self, path: &Path) -> Result<Vec<(Language, String)>> {
if let Ok(configurations) = self.find_language_configurations_at_path(path, true) {
let mut language_ids = configurations
.iter()
.map(|c| (c.language_id, c.language_name.clone()))
.collect::<Vec<_>>();
language_ids.sort_unstable();
language_ids.dedup();
language_ids
.into_iter()
.map(|(id, name)| Ok((self.language_for_id(id)?, name)))
.collect::<Result<Vec<_>>>()
} else {
Ok(Vec::new())
}
}
#[must_use]
pub fn get_all_language_configurations(&self) -> Vec<(&LanguageConfiguration, &Path)> {
self.language_configurations
.iter()
.map(|c| (c, self.languages_by_id[c.language_id].0.as_ref()))
.collect()
}
pub fn language_configuration_for_scope(
&self,
scope: &str,
) -> Result<Option<(Language, &LanguageConfiguration)>> {
for configuration in &self.language_configurations {
if configuration.scope.as_ref().map_or(false, |s| s == scope) {
let language = self.language_for_id(configuration.language_id)?;
return Ok(Some((language, configuration)));
}
}
Ok(None)
}
pub fn language_configuration_for_first_line_regex(
&self,
path: &Path,
) -> Result<Option<(Language, &LanguageConfiguration)>> {
self.language_configuration_ids_by_first_line_regex
.iter()
.try_fold(None, |_, (regex, ids)| {
if let Some(regex) = Self::regex(Some(regex)) {
let file = fs::File::open(path)?;
let reader = BufReader::new(file);
let first_line = reader.lines().next().transpose()?;
if let Some(first_line) = first_line {
if regex.is_match(&first_line) && !ids.is_empty() {
let configuration = &self.language_configurations[ids[0]];
let language = self.language_for_id(configuration.language_id)?;
return Ok(Some((language, configuration)));
}
}
}
Ok(None)
})
}
pub fn language_configuration_for_file_name(
&self,
path: &Path,
) -> Result<Option<(Language, &LanguageConfiguration)>> {
let configuration_ids = path
.file_name()
.and_then(|n| n.to_str())
.and_then(|file_name| self.language_configuration_ids_by_file_type.get(file_name))
.or_else(|| {
path.extension()
.and_then(|extension| extension.to_str())
.and_then(|extension| {
self.language_configuration_ids_by_file_type.get(extension)
})
});
if let Some(configuration_ids) = configuration_ids {
if !configuration_ids.is_empty() {
let configuration = if configuration_ids.len() == 1 {
&self.language_configurations[configuration_ids[0]]
}
else {
let file_contents =
fs::read(path).with_context(|| format!("Failed to read path {path:?}"))?;
let file_contents = String::from_utf8_lossy(&file_contents);
let mut best_score = -2isize;
let mut best_configuration_id = None;
for configuration_id in configuration_ids {
let config = &self.language_configurations[*configuration_id];
let score;
if let Some(content_regex) = &config.content_regex {
if let Some(mat) = content_regex.find(&file_contents) {
score = (mat.end() - mat.start()) as isize;
}
else {
score = -1;
}
} else {
score = 0;
}
if score > best_score {
best_configuration_id = Some(*configuration_id);
best_score = score;
}
}
&self.language_configurations[best_configuration_id.unwrap()]
};
let language = self.language_for_id(configuration.language_id)?;
return Ok(Some((language, configuration)));
}
}
Ok(None)
}
pub fn language_configuration_for_injection_string(
&self,
string: &str,
) -> Result<Option<(Language, &LanguageConfiguration)>> {
let mut best_match_length = 0;
let mut best_match_position = None;
for (i, configuration) in self.language_configurations.iter().enumerate() {
if let Some(injection_regex) = &configuration.injection_regex {
if let Some(mat) = injection_regex.find(string) {
let length = mat.end() - mat.start();
if length > best_match_length {
best_match_position = Some(i);
best_match_length = length;
}
}
}
}
if let Some(i) = best_match_position {
let configuration = &self.language_configurations[i];
let language = self.language_for_id(configuration.language_id)?;
Ok(Some((language, configuration)))
} else {
Ok(None)
}
}
fn language_for_id(&self, id: usize) -> Result<Language> {
let (path, language, externals) = &self.languages_by_id[id];
language
.get_or_try_init(|| {
let src_path = path.join("src");
self.load_language_at_path(CompileConfig::new(
&src_path,
externals.as_deref(),
None,
))
})
.cloned()
}
pub fn compile_parser_at_path(
&self,
grammar_path: &Path,
output_path: PathBuf,
flags: &[&str],
) -> Result<()> {
let src_path = grammar_path.join("src");
let mut config = CompileConfig::new(&src_path, None, Some(output_path));
config.flags = flags;
self.load_language_at_path(config).map(|_| ())
}
pub fn load_language_at_path(&self, mut config: CompileConfig) -> Result<Language> {
let grammar_path = config.src_path.join("grammar.json");
#[derive(Deserialize)]
struct GrammarJSON {
name: String,
}
let mut grammar_file = fs::File::open(&grammar_path).with_context(|| {
format!(
"Failed to read grammar.json file at the following path:\n{:?}",
&grammar_path
)
})?;
let grammar_json: GrammarJSON = serde_json::from_reader(BufReader::new(&mut grammar_file))
.with_context(|| {
format!(
"Failed to parse grammar.json file at the following path:\n{:?}",
&grammar_path
)
})?;
config.name = grammar_json.name;
self.load_language_at_path_with_name(config)
}
pub fn load_language_at_path_with_name(&self, mut config: CompileConfig) -> Result<Language> {
let mut lib_name = config.name.to_string();
let language_fn_name = format!(
"tree_sitter_{}",
replace_dashes_with_underscores(&config.name)
);
if self.debug_build {
lib_name.push_str(".debug._");
}
if self.sanitize_build {
lib_name.push_str(".sanitize._");
config.sanitize = true;
}
if config.output_path.is_none() {
fs::create_dir_all(&self.parser_lib_path)?;
}
let mut recompile = config.output_path.is_some(); let output_path = config.output_path.unwrap_or_else(|| {
let mut path = self.parser_lib_path.join(lib_name);
path.set_extension(env::consts::DLL_EXTENSION);
#[cfg(feature = "wasm")]
if self.wasm_store.lock().unwrap().is_some() {
path.set_extension("wasm");
}
path
});
config.output_path = Some(output_path.clone());
let parser_path = config.src_path.join("parser.c");
config.scanner_path = self.get_scanner_path(config.src_path);
let mut paths_to_check = vec![parser_path];
if let Some(scanner_path) = config.scanner_path.as_ref() {
paths_to_check.push(scanner_path.clone());
}
paths_to_check.extend(
config
.external_files
.unwrap_or_default()
.iter()
.map(|p| config.src_path.join(p)),
);
if !recompile {
recompile = needs_recompile(&output_path, &paths_to_check)
.with_context(|| "Failed to compare source and binary timestamps")?;
}
#[cfg(feature = "wasm")]
if let Some(wasm_store) = self.wasm_store.lock().unwrap().as_mut() {
if recompile {
self.compile_parser_to_wasm(
&config.name,
None,
config.src_path,
config
.scanner_path
.as_ref()
.and_then(|p| p.strip_prefix(config.src_path).ok()),
&output_path,
false,
)?;
}
let wasm_bytes = fs::read(&output_path)?;
return Ok(wasm_store.load_language(&config.name, &wasm_bytes)?);
}
let lock_path = if env::var("CROSS_RUNNER").is_ok() {
tempfile::tempdir()
.unwrap()
.path()
.join("tree-sitter")
.join("lock")
.join(format!("{}.lock", config.name))
} else {
dirs::cache_dir()
.ok_or_else(|| anyhow!("Cannot determine cache directory"))?
.join("tree-sitter")
.join("lock")
.join(format!("{}.lock", config.name))
};
if let Ok(lock_file) = fs::OpenOptions::new().write(true).open(&lock_path) {
recompile = false;
if lock_file.try_lock_exclusive().is_err() {
lock_file.lock_exclusive()?;
recompile = false;
} else {
let time = lock_file.metadata()?.modified()?.elapsed()?.as_secs();
if time > 30 {
fs::remove_file(&lock_path)?;
recompile = true;
}
}
}
if recompile {
fs::create_dir_all(lock_path.parent().unwrap()).with_context(|| {
format!(
"Failed to create directory {:?}",
lock_path.parent().unwrap()
)
})?;
let lock_file = fs::OpenOptions::new()
.create(true)
.truncate(true)
.write(true)
.open(&lock_path)?;
lock_file.lock_exclusive()?;
self.compile_parser_to_dylib(&config, &lock_file, &lock_path)?;
if config.scanner_path.is_some() {
self.check_external_scanner(&config.name, &output_path)?;
}
}
let library = unsafe { Library::new(&output_path) }
.with_context(|| format!("Error opening dynamic library {output_path:?}"))?;
let language = unsafe {
let language_fn = library
.get::<Symbol<unsafe extern "C" fn() -> Language>>(language_fn_name.as_bytes())
.with_context(|| format!("Failed to load symbol {language_fn_name}"))?;
language_fn()
};
mem::forget(library);
Ok(language)
}
fn compile_parser_to_dylib(
&self,
config: &CompileConfig,
lock_file: &fs::File,
lock_path: &Path,
) -> Result<(), Error> {
let mut cc_config = cc::Build::new();
cc_config
.cargo_metadata(false)
.cargo_warnings(false)
.target(BUILD_TARGET)
.host(BUILD_HOST)
.debug(self.debug_build)
.file(&config.parser_path)
.includes(&config.header_paths);
if let Some(scanner_path) = config.scanner_path.as_ref() {
if scanner_path.extension() != Some("c".as_ref()) {
cc_config.cpp(true);
eprintln!("Warning: Using a C++ scanner is now deprecated. Please migrate your scanner code to C, as C++ support will be removed in the near future.");
} else {
cc_config.std("c11");
}
cc_config.file(scanner_path);
}
if self.debug_build {
cc_config.opt_level(0).extra_warnings(true);
} else {
cc_config.opt_level(2).extra_warnings(false);
}
for flag in config.flags {
cc_config.define(flag, None);
}
let compiler = cc_config.get_compiler();
let mut command = Command::new(compiler.path());
command.args(compiler.args());
for (key, value) in compiler.env() {
command.env(key, value);
}
let output_path = config.output_path.as_ref().unwrap();
if compiler.is_like_msvc() {
let out = format!("-out:{}", output_path.to_str().unwrap());
command.arg(if self.debug_build { "-LDd" } else { "-LD" });
command.arg("-utf-8");
command.args(cc_config.get_files());
command.arg("-link").arg(out);
} else {
command.arg("-Werror=implicit-function-declaration");
if cfg!(any(target_os = "macos", target_os = "ios")) {
command.arg("-dynamiclib");
command.arg("-UTREE_SITTER_REUSE_ALLOCATOR");
} else {
command.arg("-shared");
}
command.args(cc_config.get_files());
command.arg("-o").arg(output_path);
}
let output = command.output().with_context(|| {
format!("Failed to execute the C compiler with the following command:\n{command:?}")
})?;
lock_file.unlock()?;
fs::remove_file(lock_path)?;
if output.status.success() {
Ok(())
} else {
Err(anyhow!(
"Parser compilation failed.\nStdout: {}\nStderr: {}",
String::from_utf8_lossy(&output.stdout),
String::from_utf8_lossy(&output.stderr)
))
}
}
#[cfg(unix)]
fn check_external_scanner(&self, name: &str, library_path: &Path) -> Result<()> {
let prefix = if cfg!(any(target_os = "macos", target_os = "ios")) {
"_"
} else {
""
};
let mut must_have = vec![
format!("{prefix}tree_sitter_{name}_external_scanner_create"),
format!("{prefix}tree_sitter_{name}_external_scanner_destroy"),
format!("{prefix}tree_sitter_{name}_external_scanner_serialize"),
format!("{prefix}tree_sitter_{name}_external_scanner_deserialize"),
format!("{prefix}tree_sitter_{name}_external_scanner_scan"),
];
let command = Command::new("nm")
.arg("-W")
.arg("-U")
.arg(library_path)
.output();
if let Ok(output) = command {
if output.status.success() {
let mut found_non_static = false;
for line in String::from_utf8_lossy(&output.stdout).lines() {
if line.contains(" T ") {
if let Some(function_name) =
line.split_whitespace().collect::<Vec<_>>().get(2)
{
if !line.contains("tree_sitter_") {
if !found_non_static {
found_non_static = true;
eprintln!("Warning: Found non-static non-tree-sitter functions in the external scannner");
}
eprintln!(" `{function_name}`");
} else {
must_have.retain(|f| f != function_name);
}
}
}
}
if found_non_static {
eprintln!("Consider making these functions static, they can cause conflicts when another tree-sitter project uses the same function name");
}
if !must_have.is_empty() {
let missing = must_have
.iter()
.map(|f| format!(" `{f}`"))
.collect::<Vec<_>>()
.join("\n");
return Err(anyhow!(format!(
indoc! {"
Missing required functions in the external scanner, parsing won't work without these!
{}
You can read more about this at https://tree-sitter.github.io/tree-sitter/creating-parsers#external-scanners
"},
missing,
)));
}
}
}
Ok(())
}
#[cfg(windows)]
fn check_external_scanner(&self, _name: &str, _library_path: &Path) -> Result<()> {
Ok(())
}
pub fn compile_parser_to_wasm(
&self,
language_name: &str,
root_path: Option<&Path>,
src_path: &Path,
scanner_filename: Option<&Path>,
output_path: &Path,
force_docker: bool,
) -> Result<(), Error> {
#[derive(PartialEq, Eq)]
enum EmccSource {
Native,
Docker,
Podman,
}
let root_path = root_path.unwrap_or(src_path);
let emcc_name = if cfg!(windows) { "emcc.bat" } else { "emcc" };
let source = if !force_docker && Command::new(emcc_name).output().is_ok() {
EmccSource::Native
} else if Command::new("docker")
.arg("info")
.output()
.map_or(false, |out| out.status.success())
{
EmccSource::Docker
} else if Command::new("podman")
.arg("--version")
.output()
.map_or(false, |out| out.status.success())
{
EmccSource::Podman
} else {
return Err(anyhow!(
"You must have either emcc, docker, or podman on your PATH to run this command"
));
};
let mut command = match source {
EmccSource::Native => {
let mut command = Command::new(emcc_name);
command.current_dir(src_path);
command
}
EmccSource::Docker | EmccSource::Podman => {
let mut command = match source {
EmccSource::Docker => Command::new("docker"),
EmccSource::Podman => Command::new("podman"),
_ => unreachable!(),
};
command.args(["run", "--rm"]);
let workdir = if root_path == src_path {
PathBuf::from("/src")
} else {
let mut path = PathBuf::from("/src");
path.push(src_path.strip_prefix(root_path).unwrap());
path
};
command.args(["--workdir", &workdir.to_string_lossy()]);
let mut volume_string = OsString::from(&root_path);
volume_string.push(":/src:Z");
command.args([OsStr::new("--volume"), &volume_string]);
command.env("PODMAN_USERNS", "keep-id");
#[cfg(unix)]
{
#[link(name = "c")]
extern "C" {
fn getuid() -> u32;
}
if source == EmccSource::Docker {
let user_id = unsafe { getuid() };
command.args(["--user", &user_id.to_string()]);
}
};
command.args([EMSCRIPTEN_TAG, "emcc"]);
command
}
};
let output_name = "output.wasm";
command.args([
"-o",
output_name,
"-Os",
"-s",
"WASM=1",
"-s",
"SIDE_MODULE=2",
"-s",
"TOTAL_MEMORY=33554432",
"-s",
"NODEJS_CATCH_EXIT=0",
"-s",
&format!("EXPORTED_FUNCTIONS=[\"_tree_sitter_{language_name}\"]"),
"-fno-exceptions",
"-fvisibility=hidden",
"-I",
".",
]);
if let Some(scanner_filename) = scanner_filename {
if scanner_filename
.extension()
.and_then(|ext| ext.to_str())
.map_or(false, |ext| ["cc", "cpp"].contains(&ext))
{
eprintln!("Warning: Using a C++ scanner is now deprecated. Please migrate your scanner code to C, as C++ support will be removed in the near future.");
command.arg("-xc++");
}
command.arg(scanner_filename);
}
command.arg("parser.c");
let status = command
.spawn()
.with_context(|| "Failed to run emcc command")?
.wait()?;
if !status.success() {
return Err(anyhow!("emcc command failed"));
}
fs::rename(src_path.join(output_name), output_path)
.context("failed to rename wasm output file")?;
Ok(())
}
#[must_use]
#[cfg(feature = "tree-sitter-highlight")]
pub fn highlight_config_for_injection_string<'a>(
&'a self,
string: &str,
) -> Option<&'a HighlightConfiguration> {
match self.language_configuration_for_injection_string(string) {
Err(e) => {
eprintln!("Failed to load language for injection string '{string}': {e}",);
None
}
Ok(None) => None,
Ok(Some((language, configuration))) => {
match configuration.highlight_config(language, None) {
Err(e) => {
eprintln!(
"Failed to load property sheet for injection string '{string}': {e}",
);
None
}
Ok(None) => None,
Ok(Some(config)) => Some(config),
}
}
}
}
pub fn find_language_configurations_at_path(
&mut self,
parser_path: &Path,
set_current_path_config: bool,
) -> Result<&[LanguageConfiguration]> {
#[derive(Deserialize, Clone, Default)]
#[serde(untagged)]
enum PathsJSON {
#[default]
Empty,
Single(String),
Multiple(Vec<String>),
}
impl PathsJSON {
fn into_vec(self) -> Option<Vec<String>> {
match self {
Self::Empty => None,
Self::Single(s) => Some(vec![s]),
Self::Multiple(s) => Some(s),
}
}
}
#[derive(Deserialize)]
struct LanguageConfigurationJSON {
#[serde(default)]
path: PathBuf,
scope: Option<String>,
#[serde(rename = "file-types")]
file_types: Option<Vec<String>>,
#[serde(rename = "content-regex")]
content_regex: Option<String>,
#[serde(rename = "first-line-regex")]
first_line_regex: Option<String>,
#[serde(rename = "injection-regex")]
injection_regex: Option<String>,
#[serde(default)]
highlights: PathsJSON,
#[serde(default)]
injections: PathsJSON,
#[serde(default)]
locals: PathsJSON,
#[serde(default)]
tags: PathsJSON,
#[serde(default, rename = "external-files")]
external_files: PathsJSON,
}
#[derive(Deserialize)]
struct PackageJSON {
#[serde(default)]
#[serde(rename = "tree-sitter")]
tree_sitter: Vec<LanguageConfigurationJSON>,
}
#[derive(Deserialize)]
struct GrammarJSON {
name: String,
}
let initial_language_configuration_count = self.language_configurations.len();
if let Ok(package_json_contents) = fs::read_to_string(parser_path.join("package.json")) {
let package_json = serde_json::from_str::<PackageJSON>(&package_json_contents);
if let Ok(package_json) = package_json {
let language_count = self.languages_by_id.len();
for config_json in package_json.tree_sitter {
let language_path = parser_path.join(config_json.path);
let grammar_path = language_path.join("src").join("grammar.json");
let mut grammar_file = fs::File::open(grammar_path)
.with_context(|| "Failed to read grammar.json")?;
let grammar_json: GrammarJSON =
serde_json::from_reader(BufReader::new(&mut grammar_file))
.with_context(|| "Failed to parse grammar.json")?;
let mut language_id = None;
for (id, (path, _, _)) in
self.languages_by_id.iter().enumerate().skip(language_count)
{
if language_path == *path {
language_id = Some(id);
}
}
let language_id = if let Some(language_id) = language_id {
language_id
} else {
self.languages_by_id.push((
language_path,
OnceCell::new(),
config_json.external_files.clone().into_vec().map(|files| {
files.into_iter()
.map(|path| {
let path = parser_path.join(path);
if path.starts_with(parser_path) {
Ok(path)
} else {
Err(anyhow!("External file path {path:?} is outside of parser directory {parser_path:?}"))
}
})
.collect::<Result<Vec<_>>>()
}).transpose()?,
));
self.languages_by_id.len() - 1
};
let configuration = LanguageConfiguration {
root_path: parser_path.to_path_buf(),
language_name: grammar_json.name.clone(),
scope: config_json.scope,
language_id,
file_types: config_json.file_types.unwrap_or_default(),
content_regex: Self::regex(config_json.content_regex.as_deref()),
first_line_regex: Self::regex(config_json.first_line_regex.as_deref()),
injection_regex: Self::regex(config_json.injection_regex.as_deref()),
injections_filenames: config_json.injections.into_vec(),
locals_filenames: config_json.locals.into_vec(),
tags_filenames: config_json.tags.into_vec(),
highlights_filenames: config_json.highlights.into_vec(),
#[cfg(feature = "tree-sitter-highlight")]
highlight_config: OnceCell::new(),
#[cfg(feature = "tree-sitter-tags")]
tags_config: OnceCell::new(),
#[cfg(feature = "tree-sitter-highlight")]
highlight_names: &self.highlight_names,
#[cfg(feature = "tree-sitter-highlight")]
use_all_highlight_names: self.use_all_highlight_names,
};
for file_type in &configuration.file_types {
self.language_configuration_ids_by_file_type
.entry(file_type.to_string())
.or_default()
.push(self.language_configurations.len());
}
if let Some(first_line_regex) = &configuration.first_line_regex {
self.language_configuration_ids_by_first_line_regex
.entry(first_line_regex.to_string())
.or_default()
.push(self.language_configurations.len());
}
self.language_configurations.push(unsafe {
mem::transmute::<LanguageConfiguration<'_>, LanguageConfiguration<'static>>(
configuration,
)
});
if set_current_path_config
&& self.language_configuration_in_current_path.is_none()
{
self.language_configuration_in_current_path =
Some(self.language_configurations.len() - 1);
}
}
}
}
if self.language_configurations.len() == initial_language_configuration_count
&& parser_path.join("src").join("grammar.json").exists()
{
let grammar_path = parser_path.join("src").join("grammar.json");
let mut grammar_file =
fs::File::open(grammar_path).with_context(|| "Failed to read grammar.json")?;
let grammar_json: GrammarJSON =
serde_json::from_reader(BufReader::new(&mut grammar_file))
.with_context(|| "Failed to parse grammar.json")?;
let configuration = LanguageConfiguration {
root_path: parser_path.to_owned(),
language_name: grammar_json.name,
language_id: self.languages_by_id.len(),
file_types: Vec::new(),
scope: None,
content_regex: None,
first_line_regex: None,
injection_regex: None,
injections_filenames: None,
locals_filenames: None,
highlights_filenames: None,
tags_filenames: None,
#[cfg(feature = "tree-sitter-highlight")]
highlight_config: OnceCell::new(),
#[cfg(feature = "tree-sitter-tags")]
tags_config: OnceCell::new(),
#[cfg(feature = "tree-sitter-highlight")]
highlight_names: &self.highlight_names,
#[cfg(feature = "tree-sitter-highlight")]
use_all_highlight_names: self.use_all_highlight_names,
};
self.language_configurations.push(unsafe {
mem::transmute::<LanguageConfiguration<'_>, LanguageConfiguration<'static>>(
configuration,
)
});
self.languages_by_id
.push((parser_path.to_owned(), OnceCell::new(), None));
}
Ok(&self.language_configurations[initial_language_configuration_count..])
}
fn regex(pattern: Option<&str>) -> Option<Regex> {
pattern.and_then(|r| RegexBuilder::new(r).multi_line(true).build().ok())
}
pub fn select_language(
&mut self,
path: &Path,
current_dir: &Path,
scope: Option<&str>,
) -> Result<Language> {
if let Some(scope) = scope {
if let Some(config) = self
.language_configuration_for_scope(scope)
.with_context(|| format!("Failed to load language for scope '{scope}'"))?
{
Ok(config.0)
} else {
Err(anyhow!("Unknown scope '{scope}'"))
}
} else if let Some((lang, _)) = self
.language_configuration_for_file_name(path)
.with_context(|| {
format!(
"Failed to load language for file name {}",
&path.file_name().unwrap().to_string_lossy()
)
})?
{
Ok(lang)
} else if let Some(id) = self.language_configuration_in_current_path {
Ok(self.language_for_id(self.language_configurations[id].language_id)?)
} else if let Some(lang) = self
.languages_at_path(current_dir)
.with_context(|| "Failed to load language in current directory")?
.first()
.cloned()
{
Ok(lang.0)
} else if let Some(lang) = self.language_configuration_for_first_line_regex(path)? {
Ok(lang.0)
} else {
Err(anyhow!("No language found"))
}
}
pub fn debug_build(&mut self, flag: bool) {
self.debug_build = flag;
}
pub fn sanitize_build(&mut self, flag: bool) {
self.sanitize_build = flag;
}
#[cfg(feature = "wasm")]
pub fn use_wasm(&mut self, engine: &tree_sitter::wasmtime::Engine) {
*self.wasm_store.lock().unwrap() = Some(tree_sitter::WasmStore::new(engine).unwrap());
}
#[must_use]
pub fn get_scanner_path(&self, src_path: &Path) -> Option<PathBuf> {
let mut path = src_path.join("scanner.c");
for extension in ["c", "cc", "cpp"] {
path.set_extension(extension);
if path.exists() {
return Some(path);
}
}
None
}
}
impl<'a> LanguageConfiguration<'a> {
#[cfg(feature = "tree-sitter-highlight")]
pub fn highlight_config(
&self,
language: Language,
paths: Option<&[String]>,
) -> Result<Option<&HighlightConfiguration>> {
let (highlights_filenames, injections_filenames, locals_filenames) = match paths {
Some(paths) => (
Some(
paths
.iter()
.filter(|p| p.ends_with("tree-sitter-highlights.scm"))
.cloned()
.collect::<Vec<_>>(),
),
Some(
paths
.iter()
.filter(|p| p.ends_with("tree-sitter-tags.scm"))
.cloned()
.collect::<Vec<_>>(),
),
Some(
paths
.iter()
.filter(|p| p.ends_with("locals.scm"))
.cloned()
.collect::<Vec<_>>(),
),
),
None => (None, None, None),
};
self.highlight_config
.get_or_try_init(|| {
let (highlights_query, highlight_ranges) = self.read_queries(
if highlights_filenames.is_some() {
highlights_filenames.as_deref()
} else {
self.highlights_filenames.as_deref()
},
"tree-sitter-highlights.scm",
)?;
let (injections_query, injection_ranges) = self.read_queries(
if injections_filenames.is_some() {
injections_filenames.as_deref()
} else {
self.injections_filenames.as_deref()
},
"injections.scm",
)?;
let (locals_query, locals_ranges) = self.read_queries(
if locals_filenames.is_some() {
locals_filenames.as_deref()
} else {
self.locals_filenames.as_deref()
},
"locals.scm",
)?;
if highlights_query.is_empty() {
Ok(None)
} else {
let mut result = HighlightConfiguration::new(
language,
&self.language_name,
&highlights_query,
&injections_query,
&locals_query,
)
.map_err(|error| match error.kind {
QueryErrorKind::Language => Error::from(error),
_ => {
if error.offset < injections_query.len() {
Self::include_path_in_query_error(
error,
&injection_ranges,
&injections_query,
0,
)
} else if error.offset < injections_query.len() + locals_query.len() {
Self::include_path_in_query_error(
error,
&locals_ranges,
&locals_query,
injections_query.len(),
)
} else {
Self::include_path_in_query_error(
error,
&highlight_ranges,
&highlights_query,
injections_query.len() + locals_query.len(),
)
}
}
})?;
let mut all_highlight_names = self.highlight_names.lock().unwrap();
if self.use_all_highlight_names {
for capture_name in result.query.capture_names() {
if !all_highlight_names.iter().any(|x| x == capture_name) {
all_highlight_names.push((*capture_name).to_string());
}
}
}
result.configure(all_highlight_names.as_slice());
drop(all_highlight_names);
Ok(Some(result))
}
})
.map(Option::as_ref)
}
#[cfg(feature = "tree-sitter-tags")]
pub fn tags_config(&self, language: Language) -> Result<Option<&TagsConfiguration>> {
self.tags_config
.get_or_try_init(|| {
let (tags_query, tags_ranges) =
self.read_queries(self.tags_filenames.as_deref(), "tree-sitter-tags.scm")?;
let (locals_query, locals_ranges) =
self.read_queries(self.locals_filenames.as_deref(), "locals.scm")?;
if tags_query.is_empty() {
Ok(None)
} else {
TagsConfiguration::new(language, &tags_query, &locals_query)
.map(Some)
.map_err(|error| {
if let TagsError::Query(error) = error {
if error.offset < locals_query.len() {
Self::include_path_in_query_error(
error,
&locals_ranges,
&locals_query,
0,
)
} else {
Self::include_path_in_query_error(
error,
&tags_ranges,
&tags_query,
locals_query.len(),
)
}
} else {
error.into()
}
})
}
})
.map(Option::as_ref)
}
#[cfg(any(feature = "tree-sitter-highlight", feature = "tree-sitter-tags"))]
fn include_path_in_query_error(
mut error: QueryError,
ranges: &[(String, Range<usize>)],
source: &str,
start_offset: usize,
) -> Error {
let offset_within_section = error.offset - start_offset;
let (path, range) = ranges
.iter()
.find(|(_, range)| range.contains(&offset_within_section))
.unwrap_or_else(|| ranges.last().unwrap());
error.offset = offset_within_section - range.start;
error.row = source[range.start..offset_within_section]
.matches('\n')
.count();
Error::from(error).context(format!("Error in query file {path:?}"))
}
#[allow(clippy::type_complexity)]
#[cfg(any(feature = "tree-sitter-highlight", feature = "tree-sitter-tags"))]
fn read_queries(
&self,
paths: Option<&[String]>,
default_path: &str,
) -> Result<(String, Vec<(String, Range<usize>)>)> {
let mut query = String::new();
let mut path_ranges = Vec::new();
if let Some(paths) = paths {
for path in paths {
let abs_path = self.root_path.join(path);
let prev_query_len = query.len();
query += &fs::read_to_string(&abs_path)
.with_context(|| format!("Failed to read query file {path:?}"))?;
path_ranges.push((path.clone(), prev_query_len..query.len()));
}
} else {
if default_path == "tree-sitter-highlights.scm"
|| default_path == "tree-sitter-tags.scm"
{
eprintln!(
indoc! {"
Warning: you should add a `{}` entry pointing to the highlights path in `tree-sitter` language list in the grammar's package.json
See more here: https://tree-sitter.github.io/tree-sitter/syntax-highlighting#query-paths
"},
default_path.replace(".scm", "")
);
}
let queries_path = self.root_path.join("queries");
let path = queries_path.join(default_path);
if path.exists() {
query = fs::read_to_string(&path)
.with_context(|| format!("Failed to read query file {path:?}"))?;
path_ranges.push((default_path.to_string(), 0..query.len()));
}
}
Ok((query, path_ranges))
}
}
fn needs_recompile(lib_path: &Path, paths_to_check: &[PathBuf]) -> Result<bool> {
if !lib_path.exists() {
return Ok(true);
}
let lib_mtime =
mtime(lib_path).with_context(|| format!("Failed to read mtime of {lib_path:?}"))?;
for path in paths_to_check {
if mtime(path)? > lib_mtime {
return Ok(true);
}
}
Ok(false)
}
fn mtime(path: &Path) -> Result<SystemTime> {
Ok(fs::metadata(path)?.modified()?)
}
fn replace_dashes_with_underscores(name: &str) -> String {
let mut result = String::with_capacity(name.len());
for c in name.chars() {
if c == '-' {
result.push('_');
} else {
result.push(c);
}
}
result
}