Skip to main content

tree_sitter_language_pack/
lib.rs

1//! # tree-sitter-language-pack
2//!
3//! Pre-compiled tree-sitter grammars for 248 programming languages with
4//! a unified API for parsing, analysis, and intelligent code chunking.
5//!
6//! ## Quick Start
7//!
8//! ```no_run
9//! use tree_sitter_language_pack::{ProcessConfig, available_languages, has_language, process};
10//!
11//! // Check available languages
12//! let langs = available_languages();
13//! assert!(has_language("python"));
14//!
15//! // Process source code
16//! let config = ProcessConfig::new("python").all();
17//! let result = process("def hello(): pass", &config).unwrap();
18//! println!("Language: {}", result.language);
19//! println!("Functions: {}", result.structure.len());
20//! ```
21//!
22//! ## Modules
23//!
24//! - [`registry`] - Thread-safe language registry for parser lookup
25//! - [`intel`] - Source code intelligence extraction (structure, imports, exports, etc.)
26//! - [`parse`] - Low-level tree-sitter parsing utilities
27//! - [`node`] - Tree node traversal and information extraction
28//! - [`query`] - Tree-sitter query execution
29//! - [`text_splitter`] - Syntax-aware code chunking
30//! - [`process_config`] - Configuration for the `process` pipeline
31//! - [`pack_config`] - Configuration for the language pack (cache dir, languages to download)
32//! - [`error`] - Error types
33
34pub mod error;
35pub mod extensions;
36pub mod extract;
37pub mod intel;
38#[cfg(feature = "serde")]
39pub mod json_utils;
40pub mod node;
41pub mod pack_config;
42pub mod parse;
43pub mod process_config;
44pub mod queries;
45pub mod query;
46pub mod registry;
47pub mod text_splitter;
48
49#[cfg(feature = "config")]
50pub mod config;
51#[cfg(feature = "config")]
52pub mod definitions;
53#[cfg(feature = "download")]
54pub mod download;
55
56pub use error::Error;
57#[cfg(feature = "serde")]
58pub use extensions::extension_ambiguity_json;
59pub use extensions::{
60    detect_language_from_content, detect_language_from_extension, detect_language_from_path, extension_ambiguity,
61};
62pub use extract::{
63    CaptureOutput, CaptureResult, CompiledExtraction, ExtractionConfig, ExtractionPattern, ExtractionResult,
64    MatchResult, PatternResult, PatternValidation, ValidationResult,
65};
66pub use intel::types::{
67    ChunkContext, CodeChunk, CommentInfo, CommentKind, Diagnostic, DiagnosticSeverity, DocSection, DocstringFormat,
68    DocstringInfo, ExportInfo, ExportKind, FileMetrics, ImportInfo, ProcessResult, Span, StructureItem, StructureKind,
69    SymbolInfo, SymbolKind,
70};
71pub use node::{NodeInfo, extract_text, find_nodes_by_type, named_children_info, node_info_from_node, root_node_info};
72pub use pack_config::PackConfig;
73pub use parse::{parse_string, tree_contains_node_type, tree_error_count, tree_has_error_nodes, tree_to_sexp};
74pub use process_config::ProcessConfig;
75pub use queries::{get_highlights_query, get_injections_query, get_locals_query};
76pub use query::{QueryMatch, run_query};
77pub use registry::LanguageRegistry;
78pub use text_splitter::split_code;
79pub use tree_sitter::{Language, Parser, Tree};
80
81#[cfg(feature = "download")]
82pub use download::DownloadManager;
83
84use std::sync::LazyLock;
85#[cfg(feature = "download")]
86use std::sync::RwLock;
87
88static REGISTRY: LazyLock<LanguageRegistry> = LazyLock::new(LanguageRegistry::new);
89
90#[cfg(feature = "download")]
91static CACHE_REGISTERED: std::sync::atomic::AtomicBool = std::sync::atomic::AtomicBool::new(false);
92
93#[cfg(feature = "download")]
94static CUSTOM_CACHE_DIR: LazyLock<RwLock<Option<std::path::PathBuf>>> = LazyLock::new(|| RwLock::new(None));
95
96/// Get a tree-sitter [`Language`] by name using the global registry.
97///
98/// Resolves language aliases (e.g., `"shell"` maps to `"bash"`).
99/// When the `download` feature is enabled (default), automatically downloads
100/// the parser from GitHub releases if not found locally.
101///
102/// # Errors
103///
104/// Returns [`Error::LanguageNotFound`] if the language is not recognized,
105/// or [`Error::Download`] if auto-download fails.
106///
107/// # Example
108///
109/// ```no_run
110/// use tree_sitter_language_pack::get_language;
111///
112/// let lang = get_language("python").unwrap();
113/// // Use the Language with a tree-sitter Parser
114/// let mut parser = tree_sitter::Parser::new();
115/// parser.set_language(&lang).unwrap();
116/// let tree = parser.parse("x = 1", None).unwrap();
117/// assert_eq!(tree.root_node().kind(), "module");
118/// ```
119pub fn get_language(name: &str) -> Result<Language, Error> {
120    // Fast path: check registry directly (no outer lock needed)
121    if let Ok(lang) = REGISTRY.get_language(name) {
122        return Ok(lang);
123    }
124    // Slow path: auto-download if feature enabled
125    #[cfg(feature = "download")]
126    {
127        ensure_cache_registered()?;
128        let cache_dir = effective_cache_dir()?;
129        let mut dm = DownloadManager::with_cache_dir(env!("CARGO_PKG_VERSION"), cache_dir);
130        dm.ensure_languages(&[name])?;
131        REGISTRY.get_language(name)
132    }
133    #[cfg(not(feature = "download"))]
134    Err(Error::LanguageNotFound(name.to_string()))
135}
136
137/// Get a tree-sitter [`Parser`] pre-configured for the given language.
138///
139/// This is a convenience function that calls [`get_language`] and configures
140/// a new parser in one step.
141///
142/// # Errors
143///
144/// Returns [`Error::LanguageNotFound`] if the language is not recognized, or
145/// [`Error::ParserSetup`] if the language cannot be applied to the parser.
146///
147/// # Example
148///
149/// ```no_run
150/// use tree_sitter_language_pack::get_parser;
151///
152/// let mut parser = get_parser("rust").unwrap();
153/// let tree = parser.parse("fn main() {}", None).unwrap();
154/// assert!(!tree.root_node().has_error());
155/// ```
156pub fn get_parser(name: &str) -> Result<tree_sitter::Parser, Error> {
157    let language = get_language(name)?;
158    let mut parser = tree_sitter::Parser::new();
159    parser
160        .set_language(&language)
161        .map_err(|e| Error::ParserSetup(format!("{e}")))?;
162    Ok(parser)
163}
164
165/// List all available language names (sorted, deduplicated, includes aliases).
166///
167/// Returns names of both statically compiled and dynamically loadable languages,
168/// plus any configured aliases.
169///
170/// # Example
171///
172/// ```no_run
173/// use tree_sitter_language_pack::available_languages;
174///
175/// let langs = available_languages();
176/// for name in &langs {
177///     println!("{}", name);
178/// }
179/// ```
180pub fn available_languages() -> Vec<String> {
181    REGISTRY.available_languages()
182}
183
184/// Check if a language is available by name or alias.
185///
186/// Returns `true` if the language can be loaded (statically compiled,
187/// dynamically available, or a known alias for one of these).
188///
189/// # Example
190///
191/// ```no_run
192/// use tree_sitter_language_pack::has_language;
193///
194/// assert!(has_language("python"));
195/// assert!(has_language("shell")); // alias for "bash"
196/// assert!(!has_language("nonexistent_language"));
197/// ```
198pub fn has_language(name: &str) -> bool {
199    REGISTRY.has_language(name)
200}
201
202/// Return the number of available languages.
203///
204/// Includes statically compiled languages, dynamically loadable languages,
205/// and aliases.
206///
207/// # Example
208///
209/// ```no_run
210/// use tree_sitter_language_pack::language_count;
211///
212/// let count = language_count();
213/// println!("{} languages available", count);
214/// ```
215pub fn language_count() -> usize {
216    REGISTRY.language_count()
217}
218
219/// Process source code and extract file intelligence using the global registry.
220///
221/// Parses the source with tree-sitter and extracts metrics, structure, imports,
222/// exports, comments, docstrings, symbols, diagnostics, and/or chunks based on
223/// the flags set in [`ProcessConfig`].
224///
225/// # Errors
226///
227/// Returns an error if the language is not found or parsing fails.
228///
229/// # Example
230///
231/// ```no_run
232/// use tree_sitter_language_pack::{ProcessConfig, process};
233///
234/// let config = ProcessConfig::new("python").all();
235/// let result = process("def hello(): pass", &config).unwrap();
236/// println!("Language: {}", result.language);
237/// println!("Lines: {}", result.metrics.total_lines);
238/// println!("Structures: {}", result.structure.len());
239/// ```
240pub fn process(source: &str, config: &ProcessConfig) -> Result<ProcessResult, Error> {
241    // Ensure cache is registered before attempting to process
242    #[cfg(feature = "download")]
243    ensure_cache_registered()?;
244
245    REGISTRY.process(source, config)
246}
247
248/// Run extraction patterns against source code.
249///
250/// Convenience wrapper around [`extract::extract`].
251///
252/// # Errors
253///
254/// Returns an error if the language is not found, parsing fails, or a query
255/// pattern is invalid.
256///
257/// # Example
258///
259/// ```no_run
260/// use ahash::AHashMap;
261/// use tree_sitter_language_pack::{ExtractionConfig, ExtractionPattern, CaptureOutput, extract_patterns};
262///
263/// let mut patterns = AHashMap::new();
264/// patterns.insert("fns".to_string(), ExtractionPattern {
265///     query: "(function_definition name: (identifier) @fn_name)".to_string(),
266///     capture_output: CaptureOutput::default(),
267///     child_fields: Vec::new(),
268///     max_results: None,
269///     byte_range: None,
270/// });
271/// let config = ExtractionConfig { language: "python".to_string(), patterns };
272/// let result = extract_patterns("def hello(): pass", &config).unwrap();
273/// ```
274pub fn extract_patterns(source: &str, config: &ExtractionConfig) -> Result<ExtractionResult, Error> {
275    extract::extract(source, config)
276}
277
278/// Validate extraction patterns without running them.
279///
280/// Convenience wrapper around [`extract::validate_extraction`].
281///
282/// # Errors
283///
284/// Returns an error if the language cannot be loaded.
285pub fn validate_extraction(config: &ExtractionConfig) -> Result<ValidationResult, Error> {
286    extract::validate_extraction(config)
287}
288
289// ──────────────────────────────────────────────────────────────────────────────
290// Download feature helpers and public API
291// ──────────────────────────────────────────────────────────────────────────────
292
293#[cfg(feature = "download")]
294fn ensure_cache_registered() -> Result<(), Error> {
295    if CACHE_REGISTERED.load(std::sync::atomic::Ordering::Acquire) {
296        return Ok(());
297    }
298    let cache_dir = effective_cache_dir()?;
299    // add_extra_libs_dir uses interior mutability — no outer write lock needed
300    REGISTRY.add_extra_libs_dir(cache_dir);
301    CACHE_REGISTERED.store(true, std::sync::atomic::Ordering::Release);
302    Ok(())
303}
304
305#[cfg(feature = "download")]
306fn effective_cache_dir() -> Result<std::path::PathBuf, Error> {
307    let custom = CUSTOM_CACHE_DIR
308        .read()
309        .map_err(|e| Error::LockPoisoned(e.to_string()))?;
310    match custom.as_ref() {
311        Some(dir) => Ok(dir.clone()),
312        None => DownloadManager::default_cache_dir(env!("CARGO_PKG_VERSION")),
313    }
314}
315
316/// Initialize the language pack with the given configuration.
317///
318/// Applies any custom cache directory, then downloads all languages and groups
319/// specified in the config. This is the recommended entry point when you want
320/// to pre-warm the cache before use.
321///
322/// # Errors
323///
324/// Returns an error if configuration cannot be applied or if downloads fail.
325///
326/// # Example
327///
328/// ```no_run
329/// use tree_sitter_language_pack::{PackConfig, init};
330///
331/// let config = PackConfig {
332///     cache_dir: None,
333///     languages: Some(vec!["python".to_string(), "rust".to_string()]),
334///     groups: None,
335/// };
336/// init(&config).unwrap();
337/// ```
338#[cfg(feature = "download")]
339pub fn init(config: &PackConfig) -> Result<(), Error> {
340    configure(config)?;
341    if let Some(ref languages) = config.languages {
342        let refs: Vec<&str> = languages.iter().map(String::as_str).collect();
343        download(&refs)?;
344    }
345    if let Some(ref groups) = config.groups {
346        let cache_dir = effective_cache_dir()?;
347        let mut dm = DownloadManager::with_cache_dir(env!("CARGO_PKG_VERSION"), cache_dir);
348        for group in groups {
349            dm.ensure_group(group)?;
350        }
351    }
352    ensure_cache_registered()?;
353    Ok(())
354}
355
356/// Apply download configuration without downloading anything.
357///
358/// Use this to set a custom cache directory before the first call to
359/// [`get_language`] or any download function. Changing the cache dir
360/// after languages have been registered has no effect on already-loaded
361/// languages.
362///
363/// # Errors
364///
365/// Returns an error if the lock cannot be acquired.
366///
367/// # Example
368///
369/// ```no_run
370/// use std::path::PathBuf;
371/// use tree_sitter_language_pack::{PackConfig, configure};
372///
373/// let config = PackConfig {
374///     cache_dir: Some(PathBuf::from("/tmp/my-parsers")),
375///     languages: None,
376///     groups: None,
377/// };
378/// configure(&config).unwrap();
379/// ```
380#[cfg(feature = "download")]
381pub fn configure(config: &PackConfig) -> Result<(), Error> {
382    if let Some(ref dir) = config.cache_dir {
383        let mut custom = CUSTOM_CACHE_DIR
384            .write()
385            .map_err(|e| Error::LockPoisoned(e.to_string()))?;
386        *custom = Some(dir.clone());
387        // Reset cache registration so the new directory gets registered on next use.
388        // NOTE: Old directories remain in the registry but won't have new files since
389        // add_extra_libs_dir deduplicates, and the directory scanning is independent
390        // per path. This is acceptable behavior and avoids complex cleanup logic.
391        CACHE_REGISTERED.store(false, std::sync::atomic::Ordering::Release);
392    }
393    Ok(())
394}
395
396/// Download specific languages to the local cache.
397///
398/// Returns the number of newly downloaded languages (languages that were
399/// already cached are not counted).
400///
401/// # Errors
402///
403/// Returns an error if any language is not available in the manifest or if
404/// the download fails.
405///
406/// # Example
407///
408/// ```no_run
409/// use tree_sitter_language_pack::download;
410///
411/// let count = download(&["python", "rust", "typescript"]).unwrap();
412/// println!("Downloaded {} new languages", count);
413/// ```
414#[cfg(feature = "download")]
415pub fn download(names: &[&str]) -> Result<usize, Error> {
416    ensure_cache_registered()?;
417    let cache_dir = effective_cache_dir()?;
418    let mut dm = DownloadManager::with_cache_dir(env!("CARGO_PKG_VERSION"), cache_dir);
419    let before = dm.installed_languages().len();
420    dm.ensure_languages(names)?;
421    let after = dm.installed_languages().len();
422    Ok(after.saturating_sub(before))
423}
424
425/// Download all available languages from the remote manifest.
426///
427/// Returns the number of newly downloaded languages.
428///
429/// # Errors
430///
431/// Returns an error if the manifest cannot be fetched or a download fails.
432///
433/// # Example
434///
435/// ```no_run
436/// use tree_sitter_language_pack::download_all;
437///
438/// let count = download_all().unwrap();
439/// println!("Downloaded {} languages", count);
440/// ```
441#[cfg(feature = "download")]
442pub fn download_all() -> Result<usize, Error> {
443    let langs = manifest_languages()?;
444    let refs: Vec<&str> = langs.iter().map(String::as_str).collect();
445    download(&refs)
446}
447
448/// Return all language names available in the remote manifest (248).
449///
450/// Fetches (and caches) the remote manifest to discover the full list of
451/// downloadable languages. Use [`downloaded_languages`] to list what is
452/// already cached locally.
453///
454/// # Errors
455///
456/// Returns an error if the manifest cannot be fetched.
457///
458/// # Example
459///
460/// ```no_run
461/// use tree_sitter_language_pack::manifest_languages;
462///
463/// let langs = manifest_languages().unwrap();
464/// println!("{} languages available for download", langs.len());
465/// ```
466#[cfg(feature = "download")]
467pub fn manifest_languages() -> Result<Vec<String>, Error> {
468    let cache_dir = effective_cache_dir()?;
469    let mut dm = DownloadManager::with_cache_dir(env!("CARGO_PKG_VERSION"), cache_dir);
470    let manifest = dm.fetch_manifest()?;
471    let mut langs: Vec<String> = manifest.languages.keys().cloned().collect();
472    langs.sort_unstable();
473    Ok(langs)
474}
475
476/// Return languages that are already downloaded and cached locally.
477///
478/// Does not perform any network requests. Returns an empty list if the
479/// cache directory does not exist or cannot be read.
480///
481/// # Example
482///
483/// ```no_run
484/// use tree_sitter_language_pack::downloaded_languages;
485///
486/// let langs = downloaded_languages();
487/// println!("{} languages already cached", langs.len());
488/// ```
489#[cfg(feature = "download")]
490pub fn downloaded_languages() -> Vec<String> {
491    let cache_dir = match effective_cache_dir() {
492        Ok(dir) => dir,
493        Err(_) => return Vec::new(),
494    };
495    let dm = DownloadManager::with_cache_dir(env!("CARGO_PKG_VERSION"), cache_dir);
496    dm.installed_languages()
497}
498
499/// Delete all cached parser shared libraries.
500///
501/// Resets the cache registration so the next call to [`get_language`] or
502/// a download function will re-register the (now empty) cache directory.
503///
504/// # Errors
505///
506/// Returns an error if the cache directory cannot be removed.
507///
508/// # Example
509///
510/// ```no_run
511/// use tree_sitter_language_pack::clean_cache;
512///
513/// clean_cache().unwrap();
514/// println!("Cache cleared");
515/// ```
516#[cfg(feature = "download")]
517pub fn clean_cache() -> Result<(), Error> {
518    let cache_dir = effective_cache_dir()?;
519    let dm = DownloadManager::with_cache_dir(env!("CARGO_PKG_VERSION"), cache_dir);
520    dm.clean_cache()?;
521    CACHE_REGISTERED.store(false, std::sync::atomic::Ordering::Release);
522    Ok(())
523}
524
525/// Return the effective cache directory path.
526///
527/// This is either the custom path set via [`configure`] / [`init`] or the
528/// default: `~/.cache/tree-sitter-language-pack/v{version}/libs/`.
529///
530/// # Errors
531///
532/// Returns an error if the system cache directory cannot be determined.
533///
534/// # Example
535///
536/// ```no_run
537/// use tree_sitter_language_pack::cache_dir;
538///
539/// let dir = cache_dir().unwrap();
540/// println!("Cache directory: {}", dir.display());
541/// ```
542#[cfg(feature = "download")]
543pub fn cache_dir() -> Result<std::path::PathBuf, Error> {
544    effective_cache_dir()
545}
546
547#[cfg(test)]
548mod tests {
549    use super::*;
550
551    #[test]
552    fn test_available_languages() {
553        let langs = available_languages();
554        // With zero default parsers, this may be empty unless lang-* features are enabled
555        // Verify available_languages doesn't panic; may be empty without lang-* features
556        let _ = langs;
557    }
558
559    #[test]
560    fn test_has_language() {
561        let langs = available_languages();
562        if !langs.is_empty() {
563            assert!(has_language(&langs[0]));
564        }
565        assert!(!has_language("nonexistent_language_xyz"));
566    }
567
568    #[test]
569    fn test_get_language_invalid() {
570        let result = get_language("nonexistent_language_xyz");
571        assert!(result.is_err());
572    }
573
574    #[test]
575    #[ignore = "loads all 248 dynamic libraries — run with --ignored"]
576    fn test_get_language_and_parse() {
577        let langs = available_languages();
578        for lang_name in &langs {
579            let lang = get_language(lang_name.as_str())
580                .unwrap_or_else(|e| panic!("Failed to load language '{lang_name}': {e}"));
581            let mut parser = tree_sitter::Parser::new();
582            parser
583                .set_language(&lang)
584                .unwrap_or_else(|e| panic!("Failed to set language '{lang_name}': {e}"));
585            let tree = parser.parse("x", None);
586            assert!(tree.is_some(), "Parser for '{lang_name}' should parse a string");
587        }
588    }
589
590    #[test]
591    fn test_get_parser() {
592        let langs = available_languages();
593        if let Some(first) = langs.first() {
594            let parser = get_parser(first.as_str());
595            assert!(parser.is_ok(), "get_parser should succeed for '{first}'");
596        }
597    }
598
599    #[test]
600    fn test_pack_config_default() {
601        let config = PackConfig::default();
602        assert!(config.cache_dir.is_none());
603        assert!(config.languages.is_none());
604        assert!(config.groups.is_none());
605    }
606}