pub mod error;
pub mod extensions;
pub mod extract;
pub mod intel;
#[cfg(feature = "serde")]
pub mod json_utils;
pub mod node;
pub mod pack_config;
pub mod parse;
pub mod process_config;
pub mod queries;
pub mod query;
pub mod registry;
pub mod text_splitter;
#[cfg(feature = "config")]
pub mod config;
#[cfg(feature = "config")]
pub mod definitions;
#[cfg(feature = "download")]
pub mod download;
pub use error::Error;
#[cfg(feature = "serde")]
pub use extensions::extension_ambiguity_json;
pub use extensions::{
detect_language_from_content, detect_language_from_extension, detect_language_from_path, extension_ambiguity,
};
pub use extract::{
CaptureOutput, CaptureResult, CompiledExtraction, ExtractionConfig, ExtractionPattern, ExtractionResult,
MatchResult, PatternResult, PatternValidation, ValidationResult,
};
pub use intel::types::{
ChunkContext, CodeChunk, CommentInfo, CommentKind, Diagnostic, DiagnosticSeverity, DocSection, DocstringFormat,
DocstringInfo, ExportInfo, ExportKind, FileMetrics, ImportInfo, ProcessResult, Span, StructureItem, StructureKind,
SymbolInfo, SymbolKind,
};
pub use node::{NodeInfo, extract_text, find_nodes_by_type, named_children_info, node_info_from_node, root_node_info};
pub use pack_config::PackConfig;
pub use parse::{parse_string, tree_contains_node_type, tree_error_count, tree_has_error_nodes, tree_to_sexp};
pub use process_config::ProcessConfig;
pub use queries::{get_highlights_query, get_injections_query, get_locals_query};
pub use query::{QueryMatch, run_query};
pub use registry::LanguageRegistry;
pub use text_splitter::split_code;
pub use tree_sitter::{Language, Parser, Tree};
#[cfg(feature = "download")]
pub use download::DownloadManager;
use std::sync::LazyLock;
#[cfg(feature = "download")]
use std::sync::RwLock;
static REGISTRY: LazyLock<LanguageRegistry> = LazyLock::new(LanguageRegistry::new);
#[cfg(feature = "download")]
static CACHE_REGISTERED: std::sync::atomic::AtomicBool = std::sync::atomic::AtomicBool::new(false);
#[cfg(feature = "download")]
static CUSTOM_CACHE_DIR: LazyLock<RwLock<Option<std::path::PathBuf>>> = LazyLock::new(|| RwLock::new(None));
pub fn get_language(name: &str) -> Result<Language, Error> {
if let Ok(lang) = REGISTRY.get_language(name) {
return Ok(lang);
}
#[cfg(feature = "download")]
{
ensure_cache_registered()?;
let cache_dir = effective_cache_dir()?;
let mut dm = DownloadManager::with_cache_dir(env!("CARGO_PKG_VERSION"), cache_dir);
dm.ensure_languages(&[name])?;
REGISTRY.get_language(name)
}
#[cfg(not(feature = "download"))]
Err(Error::LanguageNotFound(name.to_string()))
}
pub fn get_parser(name: &str) -> Result<tree_sitter::Parser, Error> {
let language = get_language(name)?;
let mut parser = tree_sitter::Parser::new();
parser
.set_language(&language)
.map_err(|e| Error::ParserSetup(format!("{e}")))?;
Ok(parser)
}
pub fn available_languages() -> Vec<String> {
REGISTRY.available_languages()
}
pub fn has_language(name: &str) -> bool {
REGISTRY.has_language(name)
}
pub fn language_count() -> usize {
REGISTRY.language_count()
}
pub fn process(source: &str, config: &ProcessConfig) -> Result<ProcessResult, Error> {
#[cfg(feature = "download")]
get_language(&config.language)?;
REGISTRY.process(source, config)
}
pub fn extract_patterns(source: &str, config: &ExtractionConfig) -> Result<ExtractionResult, Error> {
extract::extract(source, config)
}
pub fn validate_extraction(config: &ExtractionConfig) -> Result<ValidationResult, Error> {
extract::validate_extraction(config)
}
#[cfg(feature = "download")]
fn ensure_cache_registered() -> Result<(), Error> {
if CACHE_REGISTERED.load(std::sync::atomic::Ordering::Acquire) {
return Ok(());
}
let cache_dir = effective_cache_dir()?;
REGISTRY.add_extra_libs_dir(cache_dir);
CACHE_REGISTERED.store(true, std::sync::atomic::Ordering::Release);
Ok(())
}
#[cfg(feature = "download")]
fn effective_cache_dir() -> Result<std::path::PathBuf, Error> {
let custom = CUSTOM_CACHE_DIR
.read()
.map_err(|e| Error::LockPoisoned(e.to_string()))?;
match custom.as_ref() {
Some(dir) => Ok(dir.clone()),
None => DownloadManager::default_cache_dir(env!("CARGO_PKG_VERSION")),
}
}
#[cfg(feature = "download")]
pub fn init(config: &PackConfig) -> Result<(), Error> {
configure(config)?;
if let Some(ref languages) = config.languages {
let refs: Vec<&str> = languages.iter().map(String::as_str).collect();
download(&refs)?;
}
if let Some(ref groups) = config.groups {
let cache_dir = effective_cache_dir()?;
let mut dm = DownloadManager::with_cache_dir(env!("CARGO_PKG_VERSION"), cache_dir);
for group in groups {
dm.ensure_group(group)?;
}
}
ensure_cache_registered()?;
Ok(())
}
#[cfg(feature = "download")]
pub fn configure(config: &PackConfig) -> Result<(), Error> {
if let Some(ref dir) = config.cache_dir {
let mut custom = CUSTOM_CACHE_DIR
.write()
.map_err(|e| Error::LockPoisoned(e.to_string()))?;
*custom = Some(dir.clone());
CACHE_REGISTERED.store(false, std::sync::atomic::Ordering::Release);
}
Ok(())
}
#[cfg(feature = "download")]
pub fn download(names: &[&str]) -> Result<usize, Error> {
ensure_cache_registered()?;
let cache_dir = effective_cache_dir()?;
let mut dm = DownloadManager::with_cache_dir(env!("CARGO_PKG_VERSION"), cache_dir);
let before = dm.installed_languages().len();
dm.ensure_languages(names)?;
let after = dm.installed_languages().len();
Ok(after.saturating_sub(before))
}
#[cfg(feature = "download")]
pub fn download_all() -> Result<usize, Error> {
let langs = manifest_languages()?;
let refs: Vec<&str> = langs.iter().map(String::as_str).collect();
download(&refs)
}
#[cfg(feature = "download")]
pub fn manifest_languages() -> Result<Vec<String>, Error> {
let cache_dir = effective_cache_dir()?;
let mut dm = DownloadManager::with_cache_dir(env!("CARGO_PKG_VERSION"), cache_dir);
let manifest = dm.fetch_manifest()?;
let mut langs: Vec<String> = manifest.languages.keys().cloned().collect();
langs.sort_unstable();
Ok(langs)
}
#[cfg(feature = "download")]
pub fn downloaded_languages() -> Vec<String> {
let cache_dir = match effective_cache_dir() {
Ok(dir) => dir,
Err(_) => return Vec::new(),
};
let dm = DownloadManager::with_cache_dir(env!("CARGO_PKG_VERSION"), cache_dir);
dm.installed_languages()
}
#[cfg(feature = "download")]
pub fn clean_cache() -> Result<(), Error> {
let cache_dir = effective_cache_dir()?;
let dm = DownloadManager::with_cache_dir(env!("CARGO_PKG_VERSION"), cache_dir);
dm.clean_cache()?;
CACHE_REGISTERED.store(false, std::sync::atomic::Ordering::Release);
Ok(())
}
#[cfg(feature = "download")]
pub fn cache_dir() -> Result<std::path::PathBuf, Error> {
effective_cache_dir()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_available_languages() {
let langs = available_languages();
let _ = langs;
}
#[test]
fn test_has_language() {
let langs = available_languages();
if !langs.is_empty() {
assert!(has_language(&langs[0]));
}
assert!(!has_language("nonexistent_language_xyz"));
}
#[test]
fn test_get_language_invalid() {
let result = get_language("nonexistent_language_xyz");
assert!(result.is_err());
}
#[test]
#[ignore = "loads all 305 dynamic libraries — run with --ignored"]
fn test_get_language_and_parse() {
let langs = available_languages();
for lang_name in &langs {
let lang = get_language(lang_name.as_str())
.unwrap_or_else(|e| panic!("Failed to load language '{lang_name}': {e}"));
let mut parser = tree_sitter::Parser::new();
parser
.set_language(&lang)
.unwrap_or_else(|e| panic!("Failed to set language '{lang_name}': {e}"));
let tree = parser.parse("x", None);
assert!(tree.is_some(), "Parser for '{lang_name}' should parse a string");
}
}
#[test]
fn test_get_parser() {
let langs = available_languages();
if let Some(first) = langs.first() {
let parser = get_parser(first.as_str());
assert!(parser.is_ok(), "get_parser should succeed for '{first}'");
}
}
#[test]
fn test_pack_config_default() {
let config = PackConfig::default();
assert!(config.cache_dir.is_none());
assert!(config.languages.is_none());
assert!(config.groups.is_none());
}
}