#[cfg(feature = "static-grammar-libs")]
include!(concat!(env!("OUT_DIR"), "/generated_grammar.rs"));
#[cfg(not(feature = "static-grammar-libs"))]
use phf::phf_map;
#[cfg(not(feature = "static-grammar-libs"))]
use tree_sitter::Language;
use anyhow::Result;
use log::{debug, error, info};
use logging_timer::time;
use serde::{Deserialize, Serialize};
use std::{
collections::HashMap,
fs, io,
path::{Path, PathBuf},
};
use thiserror::Error;
use tree_sitter::{Parser, Tree};
static FILE_EXTS: phf::Map<&'static str, &'static str> = phf_map! {
"hs" => "haskell",
"rs" => "rust",
"go" => "go",
"c" => "c",
"cc" => "cpp",
"cpp" => "cpp",
"cs" => "c_sharp",
"java" => "java",
"py" => "python",
"css" => "css",
"sh" => "bash",
"bash" => "bash",
"jl" => "julia",
"ml" => "ocaml",
"rb" => "ruby",
"scala" => "scala",
"sc" => "scala",
"swift" => "swift",
"php" => "php",
"json" => "json",
"hcl" => "hcl",
"ts" => "typescript",
"tsx" => "tsx",
"hpp" => "cpp",
"tpp" => "tpp",
"h" => "c",
"tf" => "hcl",
"js" => "javascript",
};
#[derive(Error, Debug)]
pub enum LoadingError {
#[cfg(feature = "static-grammar-libs")]
#[error("The program was not compiled with support for {0}")]
StaticNotCompiled(String),
#[error("This program was not compiled with support for any grammars")]
NoGrammars,
#[error("Unsupported extension: {0}")]
UnsupportedExt(String),
#[error("Did not find a valid file extension from filename {0}")]
NoFileExt(String),
#[error("tree-sitter had an error")]
LanguageError(#[from] tree_sitter::LanguageError),
#[error("could not parse {0} with tree-sitter")]
TSParseFailure(PathBuf),
#[error("Some IO error was encountered")]
IoError(#[from] io::Error),
#[cfg(feature = "dynamic-grammar-libs")]
#[error("Unable to dynamically load grammar")]
LibloadingError(#[from] libloading::Error),
}
type StringMap = HashMap<String, String>;
#[derive(Debug, Eq, PartialEq, Serialize, Deserialize, Clone, Default)]
#[serde(rename_all = "kebab-case")]
pub struct GrammarConfig {
pub dylib_overrides: Option<StringMap>,
pub file_associations: Option<StringMap>,
}
#[cfg(feature = "static-grammar-libs")]
fn generate_language_static(lang: &str) -> Result<Language, LoadingError> {
info!("Using tree-sitter parser for language {}", lang);
match LANGUAGES.get(lang) {
Some(grammar_fn) => Ok(unsafe { grammar_fn() }),
None => Err(LoadingError::StaticNotCompiled(lang.to_string())),
}
}
#[cfg(feature = "dynamic-grammar-libs")]
fn fn_name_from_lang(lang: &str) -> String {
format!("tree_sitter_{}", lang.replace("-", "_"))
}
#[cfg(feature = "dynamic-grammar-libs")]
fn lib_name_from_lang(lang: &str) -> String {
let extension = if cfg!(target_os = "macos") {
"dylib"
} else if cfg!(target_os = "linux") {
"so"
} else if cfg!(target_os = "windows") {
"dll"
} else {
panic!("Dynamic libraries are not supported for this platform.");
};
format!("libtree-sitter-{}.{}", lang.replace("_", "-"), extension)
}
#[cfg(feature = "dynamic-grammar-libs")]
fn generate_language_dynamic(
lang: &str,
overrides: Option<&StringMap>,
) -> Result<Language, LoadingError> {
let default_fname = lib_name_from_lang(lang);
let lib_fname = if let Some(d) = overrides {
debug!("Overriding dynamic library name because of user config");
d.get(lang).unwrap_or(&default_fname)
} else {
&default_fname
};
info!("Loading dynamic library from {}", lib_fname);
let fn_name = fn_name_from_lang(lang);
debug!("Using name {} for dynamic function", fn_name);
let grammar = unsafe {
let ptr = Box::new(libloading::Library::new(lib_fname)?);
let lib = Box::leak(ptr);
let constructor =
lib.get::<libloading::Symbol<unsafe extern "C" fn() -> Language>>(fn_name.as_bytes())?;
constructor()
};
Ok(grammar)
}
#[allow(clippy::vec_init_then_push)]
#[allow(unused)]
pub fn generate_language(lang: &str, config: &GrammarConfig) -> Result<Language, LoadingError> {
let mut grammar_candidates = Vec::new();
#[cfg(feature = "dynamic-grammar-libs")]
if config.dylib_overrides.is_some() {
grammar_candidates.push(generate_language_dynamic(
lang,
config.dylib_overrides.as_ref(),
));
}
#[cfg(feature = "static-grammar-libs")]
grammar_candidates.push(generate_language_static(lang));
#[cfg(feature = "dynamic-grammar-libs")]
if config.dylib_overrides.is_none() {
grammar_candidates.push(generate_language_dynamic(
lang,
config.dylib_overrides.as_ref(),
));
}
let last_cand_idx = grammar_candidates.len() - 1;
for (i, candidate_result) in grammar_candidates.into_iter().enumerate() {
let is_last_cand = i == last_cand_idx;
match candidate_result {
Ok(grammar) => {
info!("Succeeded loading grammar for {}", lang);
return Ok(grammar);
}
Err(e) => {
debug!("Failed to load candidate grammar for {}: {}", lang, &e);
if is_last_cand {
error!("Failed to load all candidate grammars for {}", lang);
return Err(e);
}
}
};
}
error!("No grammars were loaded at all");
Err(LoadingError::NoGrammars)
}
pub fn resolve_language_str<'a>(
ext: &str,
overrides: Option<&'a HashMap<String, String>>,
) -> Option<&'a str> {
let lang_from_override = {
if let Some(overrides) = overrides {
overrides.get(ext)
} else {
None
}
};
let lang_from_defaults = FILE_EXTS.get(ext);
if let Some(lang) = lang_from_override {
info!(
"Deduced language \"{}\" from extension \"{}\" provided from user mappings",
lang, ext
);
Some(lang)
} else if let Some(lang) = lang_from_defaults {
info!(
"Deduced language \"{}\" from extension \"{}\" from default mappings",
lang, ext
);
Some(lang)
} else {
error!(
"Was not able to find a language string for extension {}",
ext
);
None
}
}
pub fn language_from_ext(
ext: &str,
grammar_config: &GrammarConfig,
) -> Result<Language, LoadingError> {
let language_str_cand = resolve_language_str(ext, grammar_config.file_associations.as_ref());
if let Some(language_str) = language_str_cand {
generate_language(language_str, grammar_config)
} else {
Err(LoadingError::UnsupportedExt(ext.to_string()))
}
}
#[time("info", "parse::{}")]
pub fn parse_file(
p: &Path,
language: Option<&str>,
config: &GrammarConfig,
) -> Result<Tree, LoadingError> {
let text = fs::read_to_string(p)?;
let mut parser = Parser::new();
let language = match language {
Some(x) => {
info!("Using language {} with parser", x);
generate_language(x, config)
}
None => {
if let Some(ext) = p.extension() {
let ext_str = ext.to_string_lossy();
language_from_ext(&ext_str, config)
} else {
Err(LoadingError::NoFileExt(p.to_string_lossy().to_string()))
}
}
}?;
parser.set_language(language)?;
debug!("Constructed parser");
match parser.parse(&text, None) {
Some(ast) => {
debug!("Parsed AST");
Ok(ast)
}
None => Err(LoadingError::TSParseFailure(p.to_owned())),
}
}
#[cfg(feature = "static-grammar-libs")]
pub fn supported_languages() -> Vec<&'static str> {
if cfg!(feature = "static-grammar-libs") {
let mut keys: Vec<&'static str> = LANGUAGES.keys().copied().collect();
keys.sort_unstable();
keys
} else {
Vec::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[cfg(feature = "static-grammar-libs")]
#[test]
fn static_load_parsers() {
let mut failures = Vec::new();
for (&name, lang) in &LANGUAGES {
let mut parser = tree_sitter::Parser::new();
let result = parser.set_language(unsafe { lang() });
if let Err(e) = result {
failures.push((name, e));
}
}
assert!(failures.is_empty(), "{:#?}", failures);
}
#[cfg(feature = "dynamic-grammar-libs")]
#[test]
#[ignore] fn dynamic_load_parsers() {
let languages = vec![
"rust", "cpp", "python", "bash", "ocaml", "go", "ruby", "java", "c_sharp", "css",
"php", "json", "tsx", "hcl",
];
let mut failures = Vec::new();
for &name in &languages {
if generate_language_dynamic(name, None).is_err() {
failures.push(name);
}
}
assert!(failures.is_empty(), "{:#?}", failures);
}
}