use ahash::{AHashMap, AHashSet};
#[cfg(feature = "dynamic-loading")]
use std::path::PathBuf;
#[cfg(feature = "dynamic-loading")]
use std::sync::Arc;
use tree_sitter::Language;
use crate::error::Error;
include!(concat!(env!("OUT_DIR"), "/registry_generated.rs"));
const LANGUAGE_ALIASES: &[(&str, &str)] = &[
("bazel", "starlark"),
("gradle", "groovy"),
("ignorefile", "gitignore"),
("lisp", "commonlisp"),
("makefile", "make"),
("shell", "bash"),
];
#[cfg(any(feature = "dynamic-loading", feature = "download"))]
#[inline(always)]
pub(crate) fn c_symbol_for(name: &str) -> &str {
for &(lang, sym) in C_SYMBOL_OVERRIDES {
if lang == name {
return sym;
}
}
name
}
#[cfg(any(feature = "dynamic-loading", feature = "download"))]
#[inline(always)]
pub(crate) fn lang_name_for_symbol(symbol: &str) -> &str {
for &(lang, sym) in C_SYMBOL_OVERRIDES {
if sym == symbol {
return lang;
}
}
symbol
}
#[inline(always)]
fn resolve_alias(name: &str) -> &str {
for &(alias, target) in LANGUAGE_ALIASES {
if name == alias {
return target;
}
}
name
}
#[cfg(feature = "dynamic-loading")]
fn lib_path_in(dir: &std::path::Path, name: &str) -> PathBuf {
let lib_name = format!("tree_sitter_{}", c_symbol_for(name));
let (prefix, ext) = if cfg!(target_os = "macos") {
("lib", "dylib")
} else if cfg!(target_os = "windows") {
("", "dll")
} else {
("lib", "so")
};
dir.join(format!("{prefix}{lib_name}.{ext}"))
}
#[cfg(feature = "dynamic-loading")]
mod dynamic {
use std::collections::HashMap;
use std::path::PathBuf;
use std::sync::RwLock;
use tree_sitter::Language;
use crate::error::Error;
pub(crate) struct DynamicLibs {
libs: HashMap<String, (libloading::Library, Language)>,
}
pub(crate) struct DynamicLoader {
inner: RwLock<DynamicLibs>,
pub(crate) libs_dir: PathBuf,
pub(crate) dynamic_names: Vec<&'static str>,
}
impl DynamicLoader {
pub(crate) fn new(libs_dir: PathBuf, dynamic_names: Vec<&'static str>) -> Self {
Self {
inner: RwLock::new(DynamicLibs { libs: HashMap::new() }),
libs_dir,
dynamic_names,
}
}
pub(crate) fn get_cached(&self, name: &str) -> Result<Option<Language>, Error> {
let dynamic = self.inner.read().map_err(|e| Error::LockPoisoned(e.to_string()))?;
Ok(dynamic.libs.get(name).map(|(_, lang)| lang.clone()))
}
pub(crate) fn cached_names(&self) -> Vec<String> {
if let Ok(dynamic) = self.inner.read() {
dynamic.libs.keys().cloned().collect()
} else {
Vec::new()
}
}
pub(crate) fn lib_file_exists(&self, name: &str) -> bool {
self.lib_path(name).exists()
}
fn lib_path(&self, name: &str) -> PathBuf {
super::lib_path_in(&self.libs_dir, name)
}
pub(crate) fn load_from_dir(&self, name: &str, dir: &std::path::Path) -> Result<Language, Error> {
let lib_path = super::lib_path_in(dir, name);
if !lib_path.exists() {
return Err(Error::LanguageNotFound(format!(
"Dynamic library for '{}' not found at {}",
name,
lib_path.display()
)));
}
self.load_from_path(name, &lib_path)
}
pub(crate) fn load(&self, name: &str) -> Result<Language, Error> {
let lib_path = self.lib_path(name);
if !lib_path.exists() {
return Err(Error::LanguageNotFound(format!(
"Dynamic library for '{}' not found at {}",
name,
lib_path.display()
)));
}
self.load_from_path(name, &lib_path)
}
fn load_from_path(&self, name: &str, lib_path: &std::path::Path) -> Result<Language, Error> {
let mut dynamic = self.inner.write().map_err(|e| Error::LockPoisoned(e.to_string()))?;
if let Some((_, lang)) = dynamic.libs.get(name) {
return Ok(lang.clone());
}
let func_name = format!("tree_sitter_{}", super::c_symbol_for(name));
let lib = unsafe { libloading::Library::new(lib_path) }
.map_err(|e| Error::DynamicLoad(format!("Failed to load library {}: {}", lib_path.display(), e)))?;
let language = unsafe {
let func: libloading::Symbol<unsafe extern "C" fn() -> *const tree_sitter::ffi::TSLanguage> =
lib.get(func_name.as_bytes()).map_err(|e| {
Error::DynamicLoad(format!(
"Symbol '{}' not found in {}: {}",
func_name,
lib_path.display(),
e
))
})?;
let ptr = func();
if ptr.is_null() {
return Err(Error::NullLanguagePointer(name.to_string()));
}
Language::from_raw(ptr)
};
dynamic.libs.insert(name.to_string(), (lib, language.clone()));
Ok(language)
}
}
}
pub struct LanguageRegistry {
static_lookup: AHashMap<&'static str, fn() -> Language>,
#[cfg(feature = "dynamic-loading")]
dynamic_loader: dynamic::DynamicLoader,
#[cfg(feature = "dynamic-loading")]
extra_lib_dirs: Arc<std::sync::RwLock<Arc<Vec<PathBuf>>>>,
}
impl LanguageRegistry {
pub fn new() -> Self {
let mut static_lookup = AHashMap::with_capacity(STATIC_LANGUAGES.len());
for &(name, loader) in STATIC_LANGUAGES {
static_lookup.insert(name, loader);
}
Self {
static_lookup,
#[cfg(feature = "dynamic-loading")]
dynamic_loader: dynamic::DynamicLoader::new(PathBuf::from(LIBS_DIR), DYNAMIC_LANGUAGE_NAMES.to_vec()),
#[cfg(feature = "dynamic-loading")]
extra_lib_dirs: Arc::new(std::sync::RwLock::new(Arc::new(Vec::new()))),
}
}
#[cfg(feature = "dynamic-loading")]
pub fn with_libs_dir(libs_dir: PathBuf) -> Self {
let mut reg = Self::new();
reg.dynamic_loader.libs_dir = libs_dir;
reg
}
#[cfg(feature = "dynamic-loading")]
pub fn add_extra_libs_dir(&self, dir: PathBuf) {
if let Ok(mut dirs) = self.extra_lib_dirs.write()
&& !dirs.contains(&dir)
{
let mut new_dirs = (**dirs).clone();
new_dirs.push(dir);
*dirs = Arc::new(new_dirs);
}
}
pub fn get_language(&self, name: &str) -> Result<Language, Error> {
let name = resolve_alias(name);
if let Some(loader) = self.static_lookup.get(name) {
return Ok(loader());
}
#[cfg(feature = "dynamic-loading")]
{
if let Some(lang) = self.dynamic_loader.get_cached(name)? {
return Ok(lang);
}
if self.dynamic_loader.dynamic_names.contains(&name) || self.dynamic_loader.lib_file_exists(name) {
return self.dynamic_loader.load(name);
}
let extra_dirs: Arc<Vec<PathBuf>> = self
.extra_lib_dirs
.read()
.map(|dirs| Arc::clone(&dirs))
.unwrap_or_default();
for extra_dir in extra_dirs.iter() {
if self.dynamic_loader.load_from_dir(name, extra_dir).is_ok() {
if let Some(lang) = self.dynamic_loader.get_cached(name)? {
return Ok(lang);
}
}
}
}
Err(Error::LanguageNotFound(name.to_string()))
}
pub fn available_languages(&self) -> Vec<String> {
let mut seen: AHashSet<&str> = self.static_lookup.keys().copied().collect();
#[cfg(feature = "dynamic-loading")]
let _owned_names: Vec<String>;
#[cfg(feature = "dynamic-loading")]
{
for name in self.dynamic_loader.dynamic_names.iter() {
seen.insert(name);
}
let mut owned = self.dynamic_loader.cached_names();
let extra_dirs: Arc<Vec<PathBuf>> = self
.extra_lib_dirs
.read()
.map(|dirs| Arc::clone(&dirs))
.unwrap_or_default();
for extra_dir in extra_dirs.iter() {
if let Ok(entries) = std::fs::read_dir(extra_dir) {
for entry in entries.flatten() {
let filename = entry.file_name();
let name = filename.to_string_lossy();
let stripped = name.strip_prefix("lib").unwrap_or(&name);
if let Some(lang) = stripped.strip_prefix("tree_sitter_") {
let lang = lang
.strip_suffix(".so")
.or_else(|| lang.strip_suffix(".dylib"))
.or_else(|| lang.strip_suffix(".dll"));
if let Some(lang) = lang {
owned.push(lang.to_string());
}
}
}
}
}
_owned_names = owned;
for name in &_owned_names {
seen.insert(name.as_str());
}
}
for &(alias, target) in LANGUAGE_ALIASES {
if seen.contains(target) {
seen.insert(alias);
}
}
let mut langs: Vec<String> = seen.into_iter().map(String::from).collect();
langs.sort_unstable();
langs
}
pub fn has_language(&self, name: &str) -> bool {
let name = resolve_alias(name);
if self.static_lookup.contains_key(name) {
return true;
}
#[cfg(feature = "dynamic-loading")]
{
if self.dynamic_loader.dynamic_names.contains(&name) || self.dynamic_loader.lib_file_exists(name) {
return true;
}
let extra_dirs: Arc<Vec<PathBuf>> = self
.extra_lib_dirs
.read()
.map(|dirs| Arc::clone(&dirs))
.unwrap_or_default();
for extra_dir in extra_dirs.iter() {
if lib_path_in(extra_dir, name).exists() {
return true;
}
}
}
false
}
pub fn language_count(&self) -> usize {
self.available_languages().len()
}
pub fn process(
&self,
source: &str,
config: &crate::process_config::ProcessConfig,
) -> Result<crate::intel::types::ProcessResult, Error> {
let resolved_lang = resolve_alias(&config.language);
if resolved_lang != config.language.as_ref() {
let mut resolved_config = config.clone();
resolved_config.language = std::borrow::Cow::Owned(resolved_lang.to_string());
crate::intel::process(source, &resolved_config, self)
} else {
crate::intel::process(source, config, self)
}
}
}
impl Default for LanguageRegistry {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::process_config::ProcessConfig;
fn first_available_lang() -> Option<String> {
let registry = LanguageRegistry::new();
let langs = registry.available_languages();
langs.into_iter().next()
}
#[test]
fn test_registry_process() {
let Some(lang) = first_available_lang() else { return };
let registry = LanguageRegistry::new();
let config = ProcessConfig::new(&lang);
let result = registry.process("x", &config);
assert!(result.is_ok(), "registry.process() should succeed");
let intel = result.unwrap();
assert_eq!(intel.language, lang);
assert!(intel.metrics.total_lines >= 1);
}
#[test]
fn test_registry_process_with_chunking() {
let Some(lang) = first_available_lang() else { return };
let registry = LanguageRegistry::new();
let config = ProcessConfig::new(&lang).with_chunking(1000);
let result = registry.process("x", &config);
assert!(result.is_ok(), "registry.process() with chunking should succeed");
let intel = result.unwrap();
assert_eq!(intel.language, lang);
assert!(!intel.chunks.is_empty());
}
#[test]
fn test_registry_process_invalid_language() {
let registry = LanguageRegistry::new();
let config = ProcessConfig::new("nonexistent_lang_xyz");
let result = registry.process("x", &config);
assert!(result.is_err());
}
#[test]
fn test_registry_has_language_and_count() {
let registry = LanguageRegistry::new();
let langs = registry.available_languages();
assert_eq!(registry.language_count(), langs.len());
if let Some(lang) = langs.first() {
assert!(registry.has_language(lang));
}
assert!(!registry.has_language("nonexistent_lang_xyz"));
}
#[cfg(feature = "serde")]
#[test]
fn test_process_result_serde_roundtrip() {
let Some(lang) = first_available_lang() else { return };
let registry = LanguageRegistry::new();
let source = "x";
let config = ProcessConfig::new(&lang);
let intel = registry.process(source, &config).unwrap();
let json = serde_json::to_string(&intel).expect("serialize should succeed");
let deserialized: crate::intel::types::ProcessResult =
serde_json::from_str(&json).expect("deserialize should succeed");
assert_eq!(deserialized.language, intel.language);
assert_eq!(deserialized.metrics.total_lines, intel.metrics.total_lines);
assert_eq!(deserialized.metrics.total_bytes, intel.metrics.total_bytes);
}
}