#[cfg(feature = "static-grammar-libs")]
include!(concat!(env!("OUT_DIR"), "/generated_grammar.rs"));
#[cfg(feature = "static-grammar-libs")]
use lazy_static::lazy_static;
#[cfg(feature = "static-grammar-libs")]
lazy_static! {
pub static ref SUPPORTED_LANGUAGES: Vec<&'static str> = {
let mut keys: Vec<&'static str> = LANGUAGES.keys().copied().collect();
keys.sort_unstable();
keys
};
}
#[cfg(not(feature = "static-grammar-libs"))]
use phf::phf_map;
#[cfg(not(feature = "static-grammar-libs"))]
use tree_sitter::Language;
use log::{debug, error, info};
use logging_timer::time;
use serde::{Deserialize, Serialize};
use std::{
collections::HashMap,
fs, io,
path::{Path, PathBuf},
};
use thiserror::Error;
use tree_sitter::{Parser, Tree, LANGUAGE_VERSION, MIN_COMPATIBLE_LANGUAGE_VERSION};
static FILE_EXTS: phf::Map<&'static str, &'static str> = phf_map! {
"hs" => "haskell",
"rs" => "rust",
"go" => "go",
"c" => "c",
"cc" => "cpp",
"cpp" => "cpp",
"cs" => "c_sharp",
"java" => "java",
"py" => "python",
"css" => "css",
"sh" => "bash",
"bash" => "bash",
"jl" => "julia",
"ml" => "ocaml",
"rb" => "ruby",
"scala" => "scala",
"sc" => "scala",
"swift" => "swift",
"php" => "php",
"json" => "json",
"hcl" => "hcl",
"ts" => "typescript",
"tsx" => "tsx",
"js" => "typescript",
"jsx" => "tsx",
"hpp" => "cpp",
"tpp" => "tpp",
"h" => "c",
"tf" => "hcl",
"md" => "markdown",
};
#[derive(Error, Debug)]
pub enum LoadingError {
#[cfg(feature = "static-grammar-libs")]
#[error("The program was not compiled with support for {0}")]
StaticNotCompiled(String),
#[error("This program was not compiled with support for any grammars")]
NoGrammars,
#[error("Unsupported extension: {0}")]
UnsupportedExt(String),
#[error("Did not find a valid file extension from filename {0}")]
NoFileExt(String),
#[error("tree-sitter had an error")]
LanguageError(#[from] tree_sitter::LanguageError),
#[error("could not parse {0} with tree-sitter")]
TSParseFailure(PathBuf),
#[error("Some IO error was encountered")]
IoError(#[from] io::Error),
#[error("Unable to dynamically load grammar")]
LibloadingError(#[from] libloading::Error),
#[error("Attempted to load a tree-sitter grammar with incompatible language ABI version: {0} (supported range: {1} - {2})")]
AbiOutOfRange(usize, usize, usize),
}
type StringMap = HashMap<String, String>;
#[derive(Debug, Eq, PartialEq, Serialize, Deserialize, Clone, Default)]
#[serde(rename_all = "kebab-case")]
pub struct GrammarConfig {
pub dylib_overrides: Option<StringMap>,
pub file_associations: Option<StringMap>,
}
#[cfg(feature = "static-grammar-libs")]
fn generate_language_static(lang: &str) -> Result<Language, LoadingError> {
info!("Using tree-sitter parser for language {}", lang);
match LANGUAGES.get(lang) {
Some(grammar_fn) => Ok(unsafe { grammar_fn() }),
None => Err(LoadingError::StaticNotCompiled(lang.to_string())),
}
}
#[must_use]
pub fn tree_sitter_constructor_symbol_name(lang: &str) -> String {
format!("tree_sitter_{}", lang.replace('-', "_"))
}
#[cfg(feature = "dynamic-grammar-libs")]
fn lib_name_from_lang(lang: &str) -> String {
let extension = if cfg!(target_os = "macos") {
"dylib"
} else if cfg!(any(target_os = "linux", target_os = "netbsd")) {
"so"
} else if cfg!(target_os = "windows") {
"dll"
} else {
panic!("Dynamic libraries are not supported for this platform.");
};
format!("libtree-sitter-{}.{}", lang.replace('_', "-"), extension)
}
pub fn construct_ts_lang_from_shared_lib(
language_name: &str,
parser_path: &Path,
) -> Result<Language, LoadingError> {
info!(
"Loading dynamic library for language '{}' path '{}'",
language_name,
parser_path.to_string_lossy(),
);
let constructor_symbol_name = tree_sitter_constructor_symbol_name(language_name);
debug!(
"Using '{}' as symbol name for parser constructor method",
constructor_symbol_name
);
let grammar = unsafe {
let shared_library = Box::new(libloading::Library::new(parser_path.as_os_str())?);
let static_shared_library = Box::leak(shared_library);
let constructor = static_shared_library.get::<libloading::Symbol<
unsafe extern "C" fn() -> Language,
>>(constructor_symbol_name.as_bytes())?;
constructor()
};
Ok(grammar)
}
#[cfg(feature = "dynamic-grammar-libs")]
fn generate_language_dynamic(
lang: &str,
overrides: Option<&StringMap>,
) -> Result<Language, LoadingError> {
let default_fname = lib_name_from_lang(lang);
let lib_fname = if let Some(d) = overrides {
debug!("Overriding dynamic library name because of user config");
d.get(lang).unwrap_or(&default_fname)
} else {
&default_fname
};
let language_path = PathBuf::from(lib_fname);
construct_ts_lang_from_shared_lib(lang, &language_path)
}
#[allow(clippy::vec_init_then_push)]
#[allow(unused)]
pub fn generate_language(lang: &str, config: &GrammarConfig) -> Result<Language, LoadingError> {
let mut grammar_candidates = Vec::new();
#[cfg(feature = "dynamic-grammar-libs")]
if config.dylib_overrides.is_some() {
grammar_candidates.push(generate_language_dynamic(
lang,
config.dylib_overrides.as_ref(),
));
}
#[cfg(feature = "static-grammar-libs")]
grammar_candidates.push(generate_language_static(lang));
#[cfg(feature = "dynamic-grammar-libs")]
if config.dylib_overrides.is_none() {
grammar_candidates.push(generate_language_dynamic(
lang,
config.dylib_overrides.as_ref(),
));
}
let last_cand_idx = grammar_candidates.len() - 1;
for (i, candidate_result) in grammar_candidates.into_iter().enumerate() {
let is_last_cand = i == last_cand_idx;
match candidate_result {
Ok(grammar) => {
info!("Succeeded loading grammar for {}", lang);
ts_language_abi_checked(&grammar)?;
return Ok(grammar);
}
Err(e) => {
debug!("Failed to load candidate grammar for {}: {}", lang, &e);
if is_last_cand {
error!("Failed to load all candidate grammars for {}", lang);
return Err(e);
}
}
};
}
error!("No grammars were loaded at all");
Err(LoadingError::NoGrammars)
}
#[must_use]
pub fn resolve_language_str<'a>(
ext: &str,
overrides: Option<&'a HashMap<String, String>>,
) -> Option<&'a str> {
let lang_from_override = {
if let Some(overrides) = overrides {
overrides.get(ext)
} else {
None
}
};
let lang_from_defaults = FILE_EXTS.get(ext);
if let Some(lang) = lang_from_override {
info!(
"Deduced language \"{}\" from extension \"{}\" provided from user mappings",
lang, ext
);
Some(lang)
} else if let Some(lang) = lang_from_defaults {
info!(
"Deduced language \"{}\" from extension \"{}\" from default mappings",
lang, ext
);
Some(lang)
} else {
error!(
"Was not able to find a language string for extension {}",
ext
);
None
}
}
#[deprecated(
since = "0.8.1",
note = "You should use lang_name_from_file_ext instead."
)]
pub fn language_from_ext(
ext: &str,
grammar_config: &GrammarConfig,
) -> Result<Language, LoadingError> {
let language_str_cand = resolve_language_str(ext, grammar_config.file_associations.as_ref());
if let Some(language_str) = language_str_cand {
generate_language(language_str, grammar_config)
} else {
Err(LoadingError::UnsupportedExt(ext.to_string()))
}
}
pub fn lang_name_from_file_ext<'cfg>(
ext: &str,
grammar_config: &'cfg GrammarConfig,
) -> Result<&'cfg str, LoadingError> {
let language_str_cand = resolve_language_str(ext, grammar_config.file_associations.as_ref());
match language_str_cand {
Some(s) => Ok(s),
None => Err(LoadingError::UnsupportedExt(ext.to_string())),
}
}
pub fn ts_language_abi_checked(ts_language: &Language) -> Result<(), LoadingError> {
let loaded_ts_version = ts_language.abi_version();
let is_abi_compatible =
(MIN_COMPATIBLE_LANGUAGE_VERSION..=LANGUAGE_VERSION).contains(&loaded_ts_version);
if !is_abi_compatible {
return Err(LoadingError::AbiOutOfRange(
loaded_ts_version,
MIN_COMPATIBLE_LANGUAGE_VERSION,
LANGUAGE_VERSION,
));
}
Ok(())
}
#[time("info", "parse::{}")]
pub fn parse_file(
p: &Path,
language: Option<&str>,
config: &GrammarConfig,
) -> Result<Tree, LoadingError> {
let resolved_language = match language {
Some(lang) => Ok(lang),
None => {
if let Some(ext) = p.extension() {
lang_name_from_file_ext(&ext.to_string_lossy(), config)
} else {
Err(LoadingError::NoFileExt(p.to_string_lossy().to_string()))
}
}
}?;
let mut parser = Parser::new();
let ts_lang = generate_language(resolved_language, config)?;
parser.set_language(&ts_lang)?;
let text = fs::read_to_string(p)?;
match parser.parse(&text, None) {
Some(ast) => {
debug!("Parsed AST");
Ok(ast)
}
None => Err(LoadingError::TSParseFailure(p.to_owned())),
}
}
#[cfg(test)]
mod tests {
use super::*;
#[cfg(feature = "static-grammar-libs")]
#[test]
fn static_load_parsers() {
let mut failures = Vec::new();
for (&name, lang) in &LANGUAGES {
let mut parser = tree_sitter::Parser::new();
let result = unsafe {
let ts_lang = lang();
parser.set_language(&ts_lang)
};
if let Err(e) = result {
failures.push((name, e));
}
}
assert!(failures.is_empty(), "{failures:#?}");
}
#[cfg(feature = "dynamic-grammar-libs")]
#[test]
#[ignore] fn dynamic_load_parsers() {
let languages = vec![
"rust", "cpp", "python", "bash", "ocaml", "go", "ruby", "java", "c_sharp", "css",
"php", "json", "tsx", "hcl",
];
let mut failures = Vec::new();
for &name in &languages {
if generate_language_dynamic(name, None).is_err() {
failures.push(name);
}
}
assert!(failures.is_empty(), "{:#?}", failures);
}
#[cfg(feature = "static-grammar-libs")]
#[test]
fn test_static_grammar_tree_sitter_abi_compatibility() -> Result<(), LoadingError> {
for (_, language_ctor) in &LANGUAGES {
unsafe {
let language = language_ctor();
ts_language_abi_checked(&language)?;
}
}
Ok(())
}
}