Skip to main content

normalize_languages/
grammar_loader.rs

1//! Dynamic grammar loading for tree-sitter.
2//!
3//! Loads tree-sitter grammars from shared libraries (.so/.dylib/.dll).
4//! Also loads highlight queries (.scm files) for syntax highlighting.
5//! Grammars are compiled from arborium sources via `cargo xtask build-grammars`.
6//!
7//! # ABI Compatibility
8//!
9//! Tree-sitter grammars have an ABI version embedded at compile time. The tree-sitter
10//! library only loads grammars within its supported version range:
11//! - tree-sitter 0.24: ABI 13-14
12//! - tree-sitter 0.25+: ABI 13-15
13//!
14//! Arborium grammar crates embed the ABI version in their parser.c source. When arborium
15//! updates to use newer tree-sitter, grammars must be recompiled. Stale grammars in
16//! `~/.config/normalize/grammars/` may cause `LanguageError { version: N }` if incompatible.
17//!
18//! # Lifetime Requirements
19//!
20//! **IMPORTANT**: The `GrammarLoader` must outlive any `Language` or `Tree` obtained from it.
21//! The loader holds the shared library (`Library`) that contains the grammar's code. If the
22//! loader is dropped, the library is unloaded, and any `Language`/`Tree` references become
23//! dangling pointers (use-after-free, likely segfault).
24//!
25//! Safe patterns:
26//! - Use the global singleton (see [`crate::parsers::grammar_loader()`])
27//! - Keep the loader in scope for the duration of tree usage
28//! - Return `(Tree, GrammarLoader)` tuples from helper functions
29//!
30//! Unsafe pattern (causes segfault):
31//! ```ignore
32//! fn parse(code: &str) -> Tree {
33//!     let loader = GrammarLoader::new();  // Created here
34//!     let lang = loader.get("python").ok().unwrap();
35//!     let mut parser = Parser::new();
36//!     parser.set_language(&lang).unwrap();
37//!     parser.parse(code, None).unwrap()   // Tree returned
38//! }  // loader dropped here - library unloaded!
39//! // Tree now has dangling pointers -> segfault on use
40//! ```
41
42use libloading::{Library, Symbol};
43use std::collections::HashMap;
44use std::path::{Path, PathBuf};
45use std::sync::{Arc, RwLock};
46use tree_sitter::Language;
47use tree_sitter_language::LanguageFn;
48
49/// Error returned by [`GrammarLoader::get`].
50#[derive(Debug, thiserror::Error)]
51pub enum GrammarLoadError {
52    /// No `.so`/`.dylib` file for this grammar exists in any search path.
53    #[error("grammar '{0}' not found in search paths")]
54    NotFound(String),
55    /// The shared library was found but could not be loaded (e.g., missing
56    /// symbols, OS-level `dlopen` failure).
57    #[error("failed to load grammar '{grammar}': {detail}")]
58    LoadFailed {
59        /// Grammar name (e.g. `"python"`).
60        grammar: String,
61        /// Underlying error message from libloading.
62        detail: String,
63    },
64}
65
66/// Loaded grammar with its backing library.
67///
68/// The `_library` field keeps the shared library loaded in memory. The `language`
69/// field contains pointers into this library's memory. Dropping the library while
70/// the language is in use causes undefined behavior (typically segfault).
71struct LoadedGrammar {
72    /// Backing shared library - must outlive any use of `language`.
73    _library: Library,
74    /// Tree-sitter Language (contains pointers into `_library`).
75    language: Language,
76}
77
78/// Dynamic grammar loader with caching.
79pub struct GrammarLoader {
80    /// Search paths for grammar libraries.
81    search_paths: Vec<PathBuf>,
82    /// Cached loaded grammars.
83    cache: RwLock<HashMap<String, Arc<LoadedGrammar>>>,
84    /// Cached highlight queries.
85    highlight_cache: RwLock<HashMap<String, Arc<String>>>,
86    /// Cached injection queries.
87    injection_cache: RwLock<HashMap<String, Arc<String>>>,
88    /// Cached locals queries.
89    locals_cache: RwLock<HashMap<String, Arc<String>>>,
90    /// Cached complexity queries.
91    complexity_cache: RwLock<HashMap<String, Arc<String>>>,
92    /// Cached calls queries.
93    calls_cache: RwLock<HashMap<String, Arc<String>>>,
94    /// Cached type queries.
95    types_cache: RwLock<HashMap<String, Arc<String>>>,
96    /// Cached tags queries.
97    tags_cache: RwLock<HashMap<String, Arc<String>>>,
98    /// Cached imports queries.
99    imports_cache: RwLock<HashMap<String, Arc<String>>>,
100    /// Cached decorations queries.
101    decorations_cache: RwLock<HashMap<String, Arc<String>>>,
102    /// Cached test-regions queries.
103    test_regions_cache: RwLock<HashMap<String, Arc<String>>>,
104    /// Cached CFG queries.
105    cfg_cache: RwLock<HashMap<String, Arc<String>>>,
106    /// Cached compiled tree-sitter queries (keyed by "grammar:query_type").
107    compiled_query_cache: RwLock<HashMap<String, Arc<tree_sitter::Query>>>,
108}
109
110impl GrammarLoader {
111    /// Create a new grammar loader with default search paths.
112    ///
113    /// Search order:
114    /// 1. `NORMALIZE_GRAMMAR_PATH` environment variable (colon-separated)
115    /// 2. `~/.config/normalize/grammars/`
116    pub fn new() -> Self {
117        let mut paths = Vec::new();
118
119        // Environment variable takes priority
120        if let Ok(env_path) = std::env::var("NORMALIZE_GRAMMAR_PATH") {
121            for p in env_path.split(':') {
122                if !p.is_empty() {
123                    paths.push(PathBuf::from(p));
124                }
125            }
126        }
127
128        // User config directory
129        if let Some(config) = dirs::config_dir() {
130            paths.push(config.join("normalize/grammars"));
131        }
132
133        Self {
134            search_paths: paths,
135            cache: RwLock::new(HashMap::new()),
136            highlight_cache: RwLock::new(HashMap::new()),
137            injection_cache: RwLock::new(HashMap::new()),
138            locals_cache: RwLock::new(HashMap::new()),
139            complexity_cache: RwLock::new(HashMap::new()),
140            calls_cache: RwLock::new(HashMap::new()),
141            types_cache: RwLock::new(HashMap::new()),
142            tags_cache: RwLock::new(HashMap::new()),
143            imports_cache: RwLock::new(HashMap::new()),
144            decorations_cache: RwLock::new(HashMap::new()),
145            test_regions_cache: RwLock::new(HashMap::new()),
146            cfg_cache: RwLock::new(HashMap::new()),
147            compiled_query_cache: RwLock::new(HashMap::new()),
148        }
149    }
150
151    /// Create a loader with custom search paths.
152    pub fn with_paths(paths: Vec<PathBuf>) -> Self {
153        Self {
154            search_paths: paths,
155            cache: RwLock::new(HashMap::new()),
156            highlight_cache: RwLock::new(HashMap::new()),
157            injection_cache: RwLock::new(HashMap::new()),
158            locals_cache: RwLock::new(HashMap::new()),
159            complexity_cache: RwLock::new(HashMap::new()),
160            calls_cache: RwLock::new(HashMap::new()),
161            types_cache: RwLock::new(HashMap::new()),
162            tags_cache: RwLock::new(HashMap::new()),
163            imports_cache: RwLock::new(HashMap::new()),
164            decorations_cache: RwLock::new(HashMap::new()),
165            test_regions_cache: RwLock::new(HashMap::new()),
166            cfg_cache: RwLock::new(HashMap::new()),
167            compiled_query_cache: RwLock::new(HashMap::new()),
168        }
169    }
170
171    /// Add a search path.
172    pub fn add_path(&mut self, path: PathBuf) {
173        self.search_paths.push(path);
174    }
175
176    /// Get a grammar by name.
177    ///
178    /// Returns `Ok(lang)` if found and loaded successfully,
179    /// `Err(GrammarLoadError::NotFound)` if no `.so`/`.dylib` exists in any
180    /// search path, and other `Err` variants for load or ABI failures.
181    pub fn get(&self, name: &str) -> Result<Language, GrammarLoadError> {
182        // Check cache first
183        if let Some(loaded) = self
184            .cache
185            .read()
186            .unwrap_or_else(|e| e.into_inner())
187            .get(name)
188        {
189            return Ok(loaded.language.clone());
190        }
191
192        self.load_external(name)
193    }
194
195    /// Get the highlight query for a grammar.
196    ///
197    /// Returns None if no highlight query found for the grammar.
198    /// Query files are {name}.highlights.scm in the grammar search paths.
199    pub fn get_highlights(&self, name: &str) -> Option<Arc<String>> {
200        // Check cache first
201        if let Some(query) = self
202            .highlight_cache
203            .read()
204            .unwrap_or_else(|e| e.into_inner())
205            .get(name)
206        {
207            return Some(Arc::clone(query));
208        }
209
210        self.load_query(name, "highlights", &self.highlight_cache)
211    }
212
213    /// Get the injection query for a grammar.
214    ///
215    /// Returns None if no injection query found for the grammar.
216    /// Query files are {name}.injections.scm in the grammar search paths.
217    pub fn get_injections(&self, name: &str) -> Option<Arc<String>> {
218        // Check cache first
219        if let Some(query) = self
220            .injection_cache
221            .read()
222            .unwrap_or_else(|e| e.into_inner())
223            .get(name)
224        {
225            return Some(Arc::clone(query));
226        }
227
228        self.load_query(name, "injections", &self.injection_cache)
229    }
230
231    /// Get the locals query for a grammar.
232    ///
233    /// Returns None if no locals query found for the grammar.
234    /// Query files are {name}.locals.scm in the grammar search paths.
235    pub fn get_locals(&self, name: &str) -> Option<Arc<String>> {
236        // Check cache first
237        if let Some(query) = self
238            .locals_cache
239            .read()
240            .unwrap_or_else(|e| e.into_inner())
241            .get(name)
242        {
243            return Some(Arc::clone(query));
244        }
245
246        self.load_query(name, "locals", &self.locals_cache)
247    }
248
249    /// Get the complexity query for a grammar.
250    ///
251    /// Returns None if no complexity query found for the grammar.
252    /// Query files are {name}.complexity.scm in the grammar search paths.
253    /// Uses `@complexity` captures for nodes that increase cyclomatic complexity,
254    /// and `@nesting` captures for nodes that increase nesting depth.
255    pub fn get_complexity(&self, name: &str) -> Option<Arc<String>> {
256        // Check cache first
257        if let Some(query) = self
258            .complexity_cache
259            .read()
260            .unwrap_or_else(|e| e.into_inner())
261            .get(name)
262        {
263            return Some(Arc::clone(query));
264        }
265
266        // Try external files, then fall back to bundled queries
267        self.load_query(name, "complexity", &self.complexity_cache)
268            .or_else(|| {
269                let content = bundled_complexity_query(name)?;
270                let query = Arc::new(content.to_string());
271                self.complexity_cache
272                    .write()
273                    .unwrap_or_else(|e| e.into_inner())
274                    .insert(name.to_string(), Arc::clone(&query));
275                Some(query)
276            })
277    }
278
279    /// Get the calls query for a grammar.
280    ///
281    /// Returns None if no calls query found for the grammar.
282    /// Query files are {name}.calls.scm in the grammar search paths.
283    /// Uses `@call` captures for call expressions and `@call.qualifier` for
284    /// method call receivers (e.g. `foo` in `foo.bar()`).
285    pub fn get_calls(&self, name: &str) -> Option<Arc<String>> {
286        // Check cache first
287        if let Some(query) = self
288            .calls_cache
289            .read()
290            .unwrap_or_else(|e| e.into_inner())
291            .get(name)
292        {
293            return Some(Arc::clone(query));
294        }
295
296        // Try external files, then fall back to bundled queries
297        self.load_query(name, "calls", &self.calls_cache)
298            .or_else(|| {
299                let content = bundled_calls_query(name)?;
300                let query = Arc::new(content.to_string());
301                self.calls_cache
302                    .write()
303                    .unwrap_or_else(|e| e.into_inner())
304                    .insert(name.to_string(), Arc::clone(&query));
305                Some(query)
306            })
307    }
308
309    /// Get the types query for a grammar.
310    ///
311    /// Returns the bundled query for supported languages, or an external file if one
312    /// exists at `{name}.types.scm` in the grammar search paths (external wins).
313    pub fn get_types(&self, name: &str) -> Option<Arc<String>> {
314        // Check cache first
315        if let Some(query) = self
316            .types_cache
317            .read()
318            .unwrap_or_else(|e| e.into_inner())
319            .get(name)
320        {
321            return Some(Arc::clone(query));
322        }
323
324        // External file takes priority over bundled
325        if let Some(q) = self.load_query(name, "types", &self.types_cache) {
326            return Some(q);
327        }
328
329        // Fall back to bundled query
330        let bundled = bundled_types_query(name)?;
331        let query = Arc::new(bundled.to_string());
332        self.types_cache
333            .write()
334            .unwrap_or_else(|e| e.into_inner())
335            .insert(name.to_string(), Arc::clone(&query));
336        Some(query)
337    }
338
339    /// Get the tags query for a grammar.
340    ///
341    /// Tags queries use the tree-sitter tags format with `@name.definition.*` and
342    /// `@name.reference.*` captures for symbol navigation (used by GitHub Linguist,
343    /// nvim-treesitter, etc.).
344    ///
345    /// Returns the bundled query for supported languages, or an external file if one
346    /// exists at `{name}.tags.scm` in the grammar search paths (external wins).
347    pub fn get_tags(&self, name: &str) -> Option<Arc<String>> {
348        // Check cache first
349        if let Some(query) = self
350            .tags_cache
351            .read()
352            .unwrap_or_else(|e| e.into_inner())
353            .get(name)
354        {
355            return Some(Arc::clone(query));
356        }
357
358        // External file takes priority over bundled
359        if let Some(q) = self.load_query(name, "tags", &self.tags_cache) {
360            return Some(q);
361        }
362
363        // Fall back to bundled query
364        let bundled = bundled_tags_query(name)?;
365        let query = Arc::new(bundled.to_string());
366        self.tags_cache
367            .write()
368            .unwrap_or_else(|e| e.into_inner())
369            .insert(name.to_string(), Arc::clone(&query));
370        Some(query)
371    }
372
373    /// Get the imports query for a grammar.
374    ///
375    /// Returns the bundled query for supported languages, or an external file if one
376    /// exists at `{name}.imports.scm` in the grammar search paths (external wins).
377    pub fn get_imports(&self, name: &str) -> Option<Arc<String>> {
378        // Check cache first
379        if let Some(query) = self
380            .imports_cache
381            .read()
382            .unwrap_or_else(|e| e.into_inner())
383            .get(name)
384        {
385            return Some(Arc::clone(query));
386        }
387
388        // External file takes priority over bundled
389        if let Some(q) = self.load_query(name, "imports", &self.imports_cache) {
390            return Some(q);
391        }
392
393        // Fall back to bundled query
394        let bundled = bundled_imports_query(name)?;
395        let query = Arc::new(bundled.to_string());
396        self.imports_cache
397            .write()
398            .unwrap_or_else(|e| e.into_inner())
399            .insert(name.to_string(), Arc::clone(&query));
400        Some(query)
401    }
402
403    /// Get the decorations query for a grammar.
404    ///
405    /// Returns the bundled query for supported languages, or an external file if one
406    /// exists at `{name}.decorations.scm` in the grammar search paths (external wins).
407    /// Uses `@decoration` captures for doc comments, attributes, decorators, and
408    /// annotations that immediately precede a definition.
409    pub fn get_decorations(&self, name: &str) -> Option<Arc<String>> {
410        // Check cache first
411        if let Some(query) = self
412            .decorations_cache
413            .read()
414            .unwrap_or_else(|e| e.into_inner())
415            .get(name)
416        {
417            return Some(Arc::clone(query));
418        }
419
420        // External file takes priority over bundled
421        if let Some(q) = self.load_query(name, "decorations", &self.decorations_cache) {
422            return Some(q);
423        }
424
425        // Fall back to bundled query
426        let bundled = bundled_decorations_query(name)?;
427        let query = Arc::new(bundled.to_string());
428        self.decorations_cache
429            .write()
430            .unwrap_or_else(|e| e.into_inner())
431            .insert(name.to_string(), Arc::clone(&query));
432        Some(query)
433    }
434
435    /// Get the test-regions query for a grammar.
436    ///
437    /// Returns None if no test-regions query exists for the grammar.
438    /// Query files are `{name}.test_regions.scm` in the grammar search paths.
439    /// Uses `@test_region` captures for byte ranges of test-only code that
440    /// rules may opt to skip (via `applies_in_tests = false`, the default).
441    ///
442    /// Languages without a `.test_regions.scm` simply have no AST-based test
443    /// detection — path-based excludes (e.g. `**/tests/**` or `*_test.go`)
444    /// remain the way to scope rules in those cases.
445    pub fn get_test_regions(&self, name: &str) -> Option<Arc<String>> {
446        // Check cache first
447        if let Some(query) = self
448            .test_regions_cache
449            .read()
450            .unwrap_or_else(|e| e.into_inner())
451            .get(name)
452        {
453            return Some(Arc::clone(query));
454        }
455
456        // External file takes priority over bundled
457        if let Some(q) = self.load_query(name, "test_regions", &self.test_regions_cache) {
458            return Some(q);
459        }
460
461        // Fall back to bundled query
462        let bundled = bundled_test_regions_query(name)?;
463        let query = Arc::new(bundled.to_string());
464        self.test_regions_cache
465            .write()
466            .unwrap_or_else(|e| e.into_inner())
467            .insert(name.to_string(), Arc::clone(&query));
468        Some(query)
469    }
470
471    /// Get the CFG query for a grammar.
472    ///
473    /// Returns `None` if no CFG query is found for the grammar.
474    /// Query files are `{name}.cfg.scm` in the grammar search paths.
475    /// Uses `@cfg.*` captures for control flow nodes (see `normalize-cfg` documentation).
476    pub fn get_cfg(&self, name: &str) -> Option<Arc<String>> {
477        // Check cache first
478        if let Some(query) = self
479            .cfg_cache
480            .read()
481            .unwrap_or_else(|e| e.into_inner())
482            .get(name)
483        {
484            return Some(Arc::clone(query));
485        }
486
487        // Try external files, then fall back to bundled queries
488        self.load_query(name, "cfg", &self.cfg_cache).or_else(|| {
489            let content = bundled_cfg_query(name)?;
490            let query = Arc::new(content.to_string());
491            self.cfg_cache
492                .write()
493                .unwrap_or_else(|e| e.into_inner())
494                .insert(name.to_string(), Arc::clone(&query));
495            Some(query)
496        })
497    }
498
499    /// Load a query file (.scm) from external file.
500    fn load_query(
501        &self,
502        name: &str,
503        query_type: &str,
504        cache: &RwLock<HashMap<String, Arc<String>>>,
505    ) -> Option<Arc<String>> {
506        let scm_name = format!("{name}.{query_type}.scm");
507
508        for search_path in &self.search_paths {
509            let scm_path = search_path.join(&scm_name);
510            if scm_path.exists()
511                && let Ok(content) = std::fs::read_to_string(&scm_path)
512            {
513                let query = Arc::new(content);
514
515                // Cache it
516                cache
517                    .write()
518                    .unwrap_or_else(|e| e.into_inner())
519                    .insert(name.to_string(), Arc::clone(&query));
520
521                return Some(query);
522            }
523        }
524
525        None
526    }
527
528    /// Get a compiled tree-sitter query, using the cache to avoid recompilation.
529    ///
530    /// `grammar_name` is the grammar name (e.g. "rust", "python").
531    /// `query_type` is the query category (e.g. "tags", "complexity", "calls").
532    /// `query_str` is the raw .scm query string.
533    ///
534    /// Returns the compiled query or None if compilation fails.
535    pub fn get_compiled_query(
536        &self,
537        grammar_name: &str,
538        query_type: &str,
539        query_str: &str,
540    ) -> Option<Arc<tree_sitter::Query>> {
541        let key = format!("{grammar_name}:{query_type}");
542
543        // Check cache
544        {
545            let cache = self
546                .compiled_query_cache
547                .read()
548                .unwrap_or_else(|e| e.into_inner());
549            if let Some(q) = cache.get(&key) {
550                return Some(Arc::clone(q));
551            }
552        }
553
554        // Compile and cache
555        let grammar = self.get(grammar_name).ok()?;
556        let compiled = tree_sitter::Query::new(&grammar, query_str).ok()?;
557        let arc = Arc::new(compiled);
558
559        self.compiled_query_cache
560            .write()
561            .unwrap_or_else(|e| e.into_inner())
562            .insert(key, Arc::clone(&arc));
563
564        Some(arc)
565    }
566
567    /// Load a grammar from external .so file.
568    fn load_external(&self, name: &str) -> Result<Language, GrammarLoadError> {
569        let lib_name = grammar_lib_name(name);
570
571        for search_path in &self.search_paths {
572            let lib_path = search_path.join(&lib_name);
573            if lib_path.exists() {
574                return self.load_from_path(name, &lib_path);
575            }
576        }
577
578        Err(GrammarLoadError::NotFound(name.to_string()))
579    }
580
581    /// Load grammar from a specific path.
582    fn load_from_path(&self, name: &str, path: &Path) -> Result<Language, GrammarLoadError> {
583        // SAFETY: Loading shared libraries is inherently unsafe. We accept this risk because:
584        // 1. Grammars come from arborium (bundled) or user-configured search paths
585        // 2. The alternative (no dynamic loading) would require compiling all grammars statically
586        // 3. Tree-sitter grammars are widely used and well-tested
587        let library = unsafe {
588            Library::new(path).map_err(|e| {
589                log::debug!("Failed to load grammar at {}: {}", path.display(), e);
590                GrammarLoadError::LoadFailed {
591                    grammar: name.to_string(),
592                    detail: e.to_string(),
593                }
594            })?
595        };
596
597        let symbol_name = grammar_symbol_name(name);
598        // SAFETY: We call the tree-sitter grammar function which returns a Language pointer.
599        // The function signature is defined by tree-sitter's C ABI. We trust that:
600        // 1. The symbol exists (checked by library.get)
601        // 2. The function conforms to tree-sitter's expected signature
602        // 3. The returned Language is valid for the lifetime of the library
603        let language = unsafe {
604            let func: Result<Symbol<unsafe extern "C" fn() -> *const ()>, _> =
605                library.get(symbol_name.as_bytes());
606            match func {
607                Ok(f) => {
608                    let lang_fn = LanguageFn::from_raw(*f);
609                    Language::new(lang_fn)
610                }
611                Err(e) => {
612                    log::debug!(
613                        "Grammar '{}' at {} missing symbol '{}': {}",
614                        name,
615                        path.display(),
616                        symbol_name,
617                        e
618                    );
619                    return Err(GrammarLoadError::LoadFailed {
620                        grammar: name.to_string(),
621                        detail: format!("symbol '{}' not found: {}", symbol_name, e),
622                    });
623                }
624            }
625        };
626
627        // Cache the loaded grammar
628        let loaded = Arc::new(LoadedGrammar {
629            _library: library,
630            language: language.clone(),
631        });
632
633        self.cache
634            .write()
635            .unwrap_or_else(|e| e.into_inner())
636            .insert(name.to_string(), loaded);
637
638        Ok(language)
639    }
640
641    /// List available grammars in search paths.
642    pub fn available_external(&self) -> Vec<String> {
643        let mut grammars = Vec::new();
644        let ext = grammar_extension();
645
646        for path in &self.search_paths {
647            if let Ok(entries) = std::fs::read_dir(path) {
648                for entry in entries.flatten() {
649                    let name = entry.file_name();
650                    let name_str = name.to_string_lossy();
651                    if name_str.ends_with(ext) {
652                        let grammar_name = name_str.trim_end_matches(ext);
653                        if !grammars.contains(&grammar_name.to_string()) {
654                            grammars.push(grammar_name.to_string());
655                        }
656                    }
657                }
658            }
659        }
660
661        grammars.sort();
662        grammars
663    }
664
665    /// List available grammars in search paths, with their file paths.
666    pub fn available_external_with_paths(&self) -> Vec<(String, std::path::PathBuf)> {
667        let mut grammars: Vec<(String, std::path::PathBuf)> = Vec::new();
668        let ext = grammar_extension();
669
670        for dir in &self.search_paths {
671            if let Ok(entries) = std::fs::read_dir(dir) {
672                for entry in entries.flatten() {
673                    let name = entry.file_name();
674                    let name_str = name.to_string_lossy();
675                    if name_str.ends_with(ext) {
676                        let grammar_name = name_str.trim_end_matches(ext).to_string();
677                        if !grammars.iter().any(|(n, _)| n == &grammar_name) {
678                            grammars.push((grammar_name, entry.path()));
679                        }
680                    }
681                }
682            }
683        }
684
685        grammars.sort_by(|a, b| a.0.cmp(&b.0));
686        grammars
687    }
688}
689
690impl Default for GrammarLoader {
691    fn default() -> Self {
692        Self::new()
693    }
694}
695
696/// Get the library file name for a grammar.
697fn grammar_lib_name(name: &str) -> String {
698    let ext = grammar_extension();
699    format!("{name}{ext}")
700}
701
702/// Get the expected symbol name for a grammar.
703fn grammar_symbol_name(name: &str) -> String {
704    // Special cases for arborium grammars with non-standard symbol names
705    match name {
706        "rust" => return "tree_sitter_rust_orchard".to_string(),
707        "vb" => return "tree_sitter_vb_dotnet".to_string(),
708        _ => {}
709    }
710    // Most grammars use tree_sitter_{name} with hyphens replaced by underscores
711    let normalized = name.replace('-', "_");
712    format!("tree_sitter_{normalized}")
713}
714
715/// Return a bundled types query for languages with built-in support.
716/// Returns None for languages without a bundled query.
717fn bundled_types_query(name: &str) -> Option<&'static str> {
718    match name {
719        "rust" => Some(include_str!("queries/rust.types.scm")),
720        "typescript" => Some(include_str!("queries/typescript.types.scm")),
721        "tsx" => Some(include_str!("queries/tsx.types.scm")),
722        "python" => Some(include_str!("queries/python.types.scm")),
723        "java" => Some(include_str!("queries/java.types.scm")),
724        "go" => Some(include_str!("queries/go.types.scm")),
725        "c" => Some(include_str!("queries/c.types.scm")),
726        "cpp" => Some(include_str!("queries/cpp.types.scm")),
727        "kotlin" => Some(include_str!("queries/kotlin.types.scm")),
728        "swift" => Some(include_str!("queries/swift.types.scm")),
729        "c-sharp" => Some(include_str!("queries/c-sharp.types.scm")),
730        "scala" => Some(include_str!("queries/scala.types.scm")),
731        "haskell" => Some(include_str!("queries/haskell.types.scm")),
732        "ruby" => Some(include_str!("queries/ruby.types.scm")),
733        "dart" => Some(include_str!("queries/dart.types.scm")),
734        "elixir" => Some(include_str!("queries/elixir.types.scm")),
735        "ocaml" => Some(include_str!("queries/ocaml.types.scm")),
736        "erlang" => Some(include_str!("queries/erlang.types.scm")),
737        "zig" => Some(include_str!("queries/zig.types.scm")),
738        "fsharp" => Some(include_str!("queries/fsharp.types.scm")),
739        "gleam" => Some(include_str!("queries/gleam.types.scm")),
740        "julia" => Some(include_str!("queries/julia.types.scm")),
741        "r" => Some(include_str!("queries/r.types.scm")),
742        "d" => Some(include_str!("queries/d.types.scm")),
743        "objc" => Some(include_str!("queries/objc.types.scm")),
744        "vb" => Some(include_str!("queries/vb.types.scm")),
745        "groovy" => Some(include_str!("queries/groovy.types.scm")),
746        "ada" => Some(include_str!("queries/ada.types.scm")),
747        "agda" => Some(include_str!("queries/agda.types.scm")),
748        "elm" => Some(include_str!("queries/elm.types.scm")),
749        "idris" => Some(include_str!("queries/idris.types.scm")),
750        "lean" => Some(include_str!("queries/lean.types.scm")),
751        "php" => Some(include_str!("queries/php.types.scm")),
752        "powershell" => Some(include_str!("queries/powershell.types.scm")),
753        "rescript" => Some(include_str!("queries/rescript.types.scm")),
754        "verilog" => Some(include_str!("queries/verilog.types.scm")),
755        "vhdl" => Some(include_str!("queries/vhdl.types.scm")),
756        "sql" => Some(include_str!("queries/sql.types.scm")),
757        "hcl" => Some(include_str!("queries/hcl.types.scm")),
758        "glsl" => Some(include_str!("queries/glsl.types.scm")),
759        "hlsl" => Some(include_str!("queries/hlsl.types.scm")),
760        "clojure" => Some(include_str!("queries/clojure.types.scm")),
761        "commonlisp" => Some(include_str!("queries/commonlisp.types.scm")),
762        "elisp" => Some(include_str!("queries/elisp.types.scm")),
763        "javascript" => Some(include_str!("queries/javascript.types.scm")),
764        "lua" => Some(include_str!("queries/lua.types.scm")),
765        "scheme" => Some(include_str!("queries/scheme.types.scm")),
766        "graphql" => Some(include_str!("queries/graphql.types.scm")),
767        "nix" => Some(include_str!("queries/nix.types.scm")),
768        "starlark" => Some(include_str!("queries/starlark.types.scm")),
769        "matlab" => Some(include_str!("queries/matlab.types.scm")),
770        "tlaplus" => Some(include_str!("queries/tlaplus.types.scm")),
771        "typst" => Some(include_str!("queries/typst.types.scm")),
772        _ => None,
773    }
774}
775
776/// Return a bundled tags query for languages with built-in support.
777///
778/// Tags queries use the tree-sitter tags format (`@name.definition.*` and
779/// `@name.reference.*` captures) for symbol navigation. Sources are vendored from
780/// official tree-sitter grammar repositories (MIT licensed).
781fn bundled_tags_query(name: &str) -> Option<&'static str> {
782    match name {
783        "rust" => Some(include_str!("queries/rust.tags.scm")),
784        "python" => Some(include_str!("queries/python.tags.scm")),
785        "javascript" => Some(include_str!("queries/javascript.tags.scm")),
786        "typescript" => Some(include_str!("queries/typescript.tags.scm")),
787        "tsx" => Some(include_str!("queries/tsx.tags.scm")),
788        "go" => Some(include_str!("queries/go.tags.scm")),
789        "java" => Some(include_str!("queries/java.tags.scm")),
790        "c" => Some(include_str!("queries/c.tags.scm")),
791        "cpp" => Some(include_str!("queries/cpp.tags.scm")),
792        "ruby" => Some(include_str!("queries/ruby.tags.scm")),
793        "kotlin" => Some(include_str!("queries/kotlin.tags.scm")),
794        "scala" => Some(include_str!("queries/scala.tags.scm")),
795        "elixir" => Some(include_str!("queries/elixir.tags.scm")),
796        "swift" => Some(include_str!("queries/swift.tags.scm")),
797        "haskell" => Some(include_str!("queries/haskell.tags.scm")),
798        "dart" => Some(include_str!("queries/dart.tags.scm")),
799        "ocaml" => Some(include_str!("queries/ocaml.tags.scm")),
800        "fsharp" => Some(include_str!("queries/fsharp.tags.scm")),
801        "gleam" => Some(include_str!("queries/gleam.tags.scm")),
802        "zig" => Some(include_str!("queries/zig.tags.scm")),
803        "julia" => Some(include_str!("queries/julia.tags.scm")),
804        "erlang" => Some(include_str!("queries/erlang.tags.scm")),
805        "lua" => Some(include_str!("queries/lua.tags.scm")),
806        "php" => Some(include_str!("queries/php.tags.scm")),
807        "perl" => Some(include_str!("queries/perl.tags.scm")),
808        "r" => Some(include_str!("queries/r.tags.scm")),
809        "groovy" => Some(include_str!("queries/groovy.tags.scm")),
810        "c-sharp" => Some(include_str!("queries/c-sharp.tags.scm")),
811        "d" => Some(include_str!("queries/d.tags.scm")),
812        "graphql" => Some(include_str!("queries/graphql.tags.scm")),
813        "objc" => Some(include_str!("queries/objc.tags.scm")),
814        "vb" => Some(include_str!("queries/vb.tags.scm")),
815        "powershell" => Some(include_str!("queries/powershell.tags.scm")),
816        "clojure" => Some(include_str!("queries/clojure.tags.scm")),
817        "commonlisp" => Some(include_str!("queries/commonlisp.tags.scm")),
818        "scheme" => Some(include_str!("queries/scheme.tags.scm")),
819        "elisp" => Some(include_str!("queries/elisp.tags.scm")),
820        "bash" => Some(include_str!("queries/bash.tags.scm")),
821        "fish" => Some(include_str!("queries/fish.tags.scm")),
822        "zsh" => Some(include_str!("queries/zsh.tags.scm")),
823        "ada" => Some(include_str!("queries/ada.tags.scm")),
824        "idris" => Some(include_str!("queries/idris.tags.scm")),
825        "lean" => Some(include_str!("queries/lean.tags.scm")),
826        "rescript" => Some(include_str!("queries/rescript.tags.scm")),
827        "elm" => Some(include_str!("queries/elm.tags.scm")),
828        "markdown" => Some(include_str!("queries/markdown.tags.scm")),
829        "nix" => Some(include_str!("queries/nix.tags.scm")),
830        "prolog" => Some(include_str!("queries/prolog.tags.scm")),
831        "agda" => Some(include_str!("queries/agda.tags.scm")),
832        "awk" => Some(include_str!("queries/awk.tags.scm")),
833        "cmake" => Some(include_str!("queries/cmake.tags.scm")),
834        "glsl" => Some(include_str!("queries/glsl.tags.scm")),
835        "hcl" => Some(include_str!("queries/hcl.tags.scm")),
836        "hlsl" => Some(include_str!("queries/hlsl.tags.scm")),
837        "jq" => Some(include_str!("queries/jq.tags.scm")),
838        "matlab" => Some(include_str!("queries/matlab.tags.scm")),
839        "meson" => Some(include_str!("queries/meson.tags.scm")),
840        "nginx" => Some(include_str!("queries/nginx.tags.scm")),
841        "scss" => Some(include_str!("queries/scss.tags.scm")),
842        "sql" => Some(include_str!("queries/sql.tags.scm")),
843        "starlark" => Some(include_str!("queries/starlark.tags.scm")),
844        "svelte" => Some(include_str!("queries/svelte.tags.scm")),
845        "tlaplus" => Some(include_str!("queries/tlaplus.tags.scm")),
846        "typst" => Some(include_str!("queries/typst.tags.scm")),
847        "verilog" => Some(include_str!("queries/verilog.tags.scm")),
848        "vhdl" => Some(include_str!("queries/vhdl.tags.scm")),
849        "vim" => Some(include_str!("queries/vim.tags.scm")),
850        "vue" => Some(include_str!("queries/vue.tags.scm")),
851        "jinja2" => Some(include_str!("queries/jinja2.tags.scm")),
852        "json" => Some(include_str!("queries/json.tags.scm")),
853        "toml" => Some(include_str!("queries/toml.tags.scm")),
854        "yaml" => Some(include_str!("queries/yaml.tags.scm")),
855        "css" => Some(include_str!("queries/css.tags.scm")),
856        "html" => Some(include_str!("queries/html.tags.scm")),
857        "xml" => Some(include_str!("queries/xml.tags.scm")),
858        "thrift" => Some(include_str!("queries/thrift.tags.scm")),
859        "dockerfile" => Some(include_str!("queries/dockerfile.tags.scm")),
860        "caddy" => Some(include_str!("queries/caddy.tags.scm")),
861        _ => None,
862    }
863}
864
865/// Get the shared library extension for the current platform.
866fn grammar_extension() -> &'static str {
867    if cfg!(target_os = "macos") {
868        ".dylib"
869    } else if cfg!(target_os = "windows") {
870        ".dll"
871    } else {
872        ".so"
873    }
874}
875
876/// Return a bundled complexity query for a grammar, if available.
877///
878/// These are compiled into the binary so they work without external .scm files.
879/// External files in search paths take priority (for user customization).
880fn bundled_complexity_query(name: &str) -> Option<&'static str> {
881    match name {
882        "rust" => Some(include_str!("queries/rust.complexity.scm")),
883        "python" => Some(include_str!("queries/python.complexity.scm")),
884        "go" => Some(include_str!("queries/go.complexity.scm")),
885        "javascript" => Some(include_str!("queries/javascript.complexity.scm")),
886        "typescript" => Some(include_str!("queries/typescript.complexity.scm")),
887        "tsx" => Some(include_str!("queries/tsx.complexity.scm")),
888        "java" => Some(include_str!("queries/java.complexity.scm")),
889        "c" => Some(include_str!("queries/c.complexity.scm")),
890        "cpp" => Some(include_str!("queries/cpp.complexity.scm")),
891        "ruby" => Some(include_str!("queries/ruby.complexity.scm")),
892        "kotlin" => Some(include_str!("queries/kotlin.complexity.scm")),
893        "swift" => Some(include_str!("queries/swift.complexity.scm")),
894        "c-sharp" => Some(include_str!("queries/c-sharp.complexity.scm")),
895        "bash" => Some(include_str!("queries/bash.complexity.scm")),
896        "lua" => Some(include_str!("queries/lua.complexity.scm")),
897        "elixir" => Some(include_str!("queries/elixir.complexity.scm")),
898        "scala" => Some(include_str!("queries/scala.complexity.scm")),
899        "dart" => Some(include_str!("queries/dart.complexity.scm")),
900        "zig" => Some(include_str!("queries/zig.complexity.scm")),
901        "ocaml" => Some(include_str!("queries/ocaml.complexity.scm")),
902        "erlang" => Some(include_str!("queries/erlang.complexity.scm")),
903        "php" => Some(include_str!("queries/php.complexity.scm")),
904        "haskell" => Some(include_str!("queries/haskell.complexity.scm")),
905        "r" => Some(include_str!("queries/r.complexity.scm")),
906        "julia" => Some(include_str!("queries/julia.complexity.scm")),
907        "perl" => Some(include_str!("queries/perl.complexity.scm")),
908        "groovy" => Some(include_str!("queries/groovy.complexity.scm")),
909        "elm" => Some(include_str!("queries/elm.complexity.scm")),
910        "powershell" => Some(include_str!("queries/powershell.complexity.scm")),
911        "fish" => Some(include_str!("queries/fish.complexity.scm")),
912        "fsharp" => Some(include_str!("queries/fsharp.complexity.scm")),
913        "gleam" => Some(include_str!("queries/gleam.complexity.scm")),
914        "clojure" => Some(include_str!("queries/clojure.complexity.scm")),
915        "commonlisp" => Some(include_str!("queries/commonlisp.complexity.scm")),
916        "scheme" => Some(include_str!("queries/scheme.complexity.scm")),
917        "d" => Some(include_str!("queries/d.complexity.scm")),
918        "objc" => Some(include_str!("queries/objc.complexity.scm")),
919        "vb" => Some(include_str!("queries/vb.complexity.scm")),
920        "elisp" => Some(include_str!("queries/elisp.complexity.scm")),
921        "hcl" => Some(include_str!("queries/hcl.complexity.scm")),
922        "matlab" => Some(include_str!("queries/matlab.complexity.scm")),
923        "nix" => Some(include_str!("queries/nix.complexity.scm")),
924        "sql" => Some(include_str!("queries/sql.complexity.scm")),
925        "starlark" => Some(include_str!("queries/starlark.complexity.scm")),
926        "vim" => Some(include_str!("queries/vim.complexity.scm")),
927        "zsh" => Some(include_str!("queries/zsh.complexity.scm")),
928        "rescript" => Some(include_str!("queries/rescript.complexity.scm")),
929        "idris" => Some(include_str!("queries/idris.complexity.scm")),
930        "lean" => Some(include_str!("queries/lean.complexity.scm")),
931        "ada" => Some(include_str!("queries/ada.complexity.scm")),
932        "agda" => Some(include_str!("queries/agda.complexity.scm")),
933        "awk" => Some(include_str!("queries/awk.complexity.scm")),
934        "cmake" => Some(include_str!("queries/cmake.complexity.scm")),
935        "glsl" => Some(include_str!("queries/glsl.complexity.scm")),
936        "graphql" => Some(include_str!("queries/graphql.complexity.scm")),
937        "hlsl" => Some(include_str!("queries/hlsl.complexity.scm")),
938        "jq" => Some(include_str!("queries/jq.complexity.scm")),
939        "meson" => Some(include_str!("queries/meson.complexity.scm")),
940        "nginx" => Some(include_str!("queries/nginx.complexity.scm")),
941        "prolog" => Some(include_str!("queries/prolog.complexity.scm")),
942        "scss" => Some(include_str!("queries/scss.complexity.scm")),
943        "svelte" => Some(include_str!("queries/svelte.complexity.scm")),
944        "tlaplus" => Some(include_str!("queries/tlaplus.complexity.scm")),
945        "typst" => Some(include_str!("queries/typst.complexity.scm")),
946        "verilog" => Some(include_str!("queries/verilog.complexity.scm")),
947        "vhdl" => Some(include_str!("queries/vhdl.complexity.scm")),
948        "vue" => Some(include_str!("queries/vue.complexity.scm")),
949        "batch" => Some(include_str!("queries/batch.complexity.scm")),
950        "thrift" => Some(include_str!("queries/thrift.complexity.scm")),
951        "jinja2" => Some(include_str!("queries/jinja2.complexity.scm")),
952        _ => None,
953    }
954}
955
956/// Return a bundled calls query for a grammar, if available.
957fn bundled_calls_query(name: &str) -> Option<&'static str> {
958    match name {
959        "python" => Some(include_str!("queries/python.calls.scm")),
960        "rust" => Some(include_str!("queries/rust.calls.scm")),
961        "typescript" => Some(include_str!("queries/typescript.calls.scm")),
962        "tsx" => Some(include_str!("queries/tsx.calls.scm")),
963        "javascript" => Some(include_str!("queries/javascript.calls.scm")),
964        "java" => Some(include_str!("queries/java.calls.scm")),
965        "go" => Some(include_str!("queries/go.calls.scm")),
966        "c" => Some(include_str!("queries/c.calls.scm")),
967        "cpp" => Some(include_str!("queries/cpp.calls.scm")),
968        "ruby" => Some(include_str!("queries/ruby.calls.scm")),
969        "kotlin" => Some(include_str!("queries/kotlin.calls.scm")),
970        "swift" => Some(include_str!("queries/swift.calls.scm")),
971        "c-sharp" => Some(include_str!("queries/c-sharp.calls.scm")),
972        "bash" => Some(include_str!("queries/bash.calls.scm")),
973        "scala" => Some(include_str!("queries/scala.calls.scm")),
974        "elixir" => Some(include_str!("queries/elixir.calls.scm")),
975        "lua" => Some(include_str!("queries/lua.calls.scm")),
976        "dart" => Some(include_str!("queries/dart.calls.scm")),
977        "graphql" => Some(include_str!("queries/graphql.calls.scm")),
978        "ocaml" => Some(include_str!("queries/ocaml.calls.scm")),
979        "erlang" => Some(include_str!("queries/erlang.calls.scm")),
980        "zig" => Some(include_str!("queries/zig.calls.scm")),
981        "julia" => Some(include_str!("queries/julia.calls.scm")),
982        "r" => Some(include_str!("queries/r.calls.scm")),
983        "haskell" => Some(include_str!("queries/haskell.calls.scm")),
984        "php" => Some(include_str!("queries/php.calls.scm")),
985        "perl" => Some(include_str!("queries/perl.calls.scm")),
986        "fsharp" => Some(include_str!("queries/fsharp.calls.scm")),
987        "gleam" => Some(include_str!("queries/gleam.calls.scm")),
988        "groovy" => Some(include_str!("queries/groovy.calls.scm")),
989        "clojure" => Some(include_str!("queries/clojure.calls.scm")),
990        "d" => Some(include_str!("queries/d.calls.scm")),
991        "objc" => Some(include_str!("queries/objc.calls.scm")),
992        "elisp" => Some(include_str!("queries/elisp.calls.scm")),
993        "hcl" => Some(include_str!("queries/hcl.calls.scm")),
994        "matlab" => Some(include_str!("queries/matlab.calls.scm")),
995        "nix" => Some(include_str!("queries/nix.calls.scm")),
996        "starlark" => Some(include_str!("queries/starlark.calls.scm")),
997        "vim" => Some(include_str!("queries/vim.calls.scm")),
998        "zsh" => Some(include_str!("queries/zsh.calls.scm")),
999        "rescript" => Some(include_str!("queries/rescript.calls.scm")),
1000        "prolog" => Some(include_str!("queries/prolog.calls.scm")),
1001        "sql" => Some(include_str!("queries/sql.calls.scm")),
1002        "ada" => Some(include_str!("queries/ada.calls.scm")),
1003        "agda" => Some(include_str!("queries/agda.calls.scm")),
1004        "awk" => Some(include_str!("queries/awk.calls.scm")),
1005        "batch" => Some(include_str!("queries/batch.calls.scm")),
1006        "cmake" => Some(include_str!("queries/cmake.calls.scm")),
1007        "elm" => Some(include_str!("queries/elm.calls.scm")),
1008        "fish" => Some(include_str!("queries/fish.calls.scm")),
1009        "idris" => Some(include_str!("queries/idris.calls.scm")),
1010        "lean" => Some(include_str!("queries/lean.calls.scm")),
1011        "meson" => Some(include_str!("queries/meson.calls.scm")),
1012        "powershell" => Some(include_str!("queries/powershell.calls.scm")),
1013        "scheme" => Some(include_str!("queries/scheme.calls.scm")),
1014        "thrift" => Some(include_str!("queries/thrift.calls.scm")),
1015        "tlaplus" => Some(include_str!("queries/tlaplus.calls.scm")),
1016        "verilog" => Some(include_str!("queries/verilog.calls.scm")),
1017        "vhdl" => Some(include_str!("queries/vhdl.calls.scm")),
1018        "vb" => Some(include_str!("queries/vb.calls.scm")),
1019        "commonlisp" => Some(include_str!("queries/commonlisp.calls.scm")),
1020        "scss" => Some(include_str!("queries/scss.calls.scm")),
1021        "glsl" => Some(include_str!("queries/glsl.calls.scm")),
1022        "hlsl" => Some(include_str!("queries/hlsl.calls.scm")),
1023        "typst" => Some(include_str!("queries/typst.calls.scm")),
1024        "svelte" => Some(include_str!("queries/svelte.calls.scm")),
1025        "vue" => Some(include_str!("queries/vue.calls.scm")),
1026        "jq" => Some(include_str!("queries/jq.calls.scm")),
1027        "jinja2" => Some(include_str!("queries/jinja2.calls.scm")),
1028        "nginx" => Some(include_str!("queries/nginx.calls.scm")),
1029        _ => None,
1030    }
1031}
1032
1033/// Return a bundled imports query for a grammar, if available.
1034fn bundled_imports_query(name: &str) -> Option<&'static str> {
1035    match name {
1036        "python" => Some(include_str!("queries/python.imports.scm")),
1037        "javascript" => Some(include_str!("queries/javascript.imports.scm")),
1038        "go" => Some(include_str!("queries/go.imports.scm")),
1039        "lua" => Some(include_str!("queries/lua.imports.scm")),
1040        "rust" => Some(include_str!("queries/rust.imports.scm")),
1041        "typescript" => Some(include_str!("queries/typescript.imports.scm")),
1042        "tsx" => Some(include_str!("queries/tsx.imports.scm")),
1043        "java" => Some(include_str!("queries/java.imports.scm")),
1044        "kotlin" => Some(include_str!("queries/kotlin.imports.scm")),
1045        "c-sharp" => Some(include_str!("queries/c-sharp.imports.scm")),
1046        "ruby" => Some(include_str!("queries/ruby.imports.scm")),
1047        "swift" => Some(include_str!("queries/swift.imports.scm")),
1048        "scala" => Some(include_str!("queries/scala.imports.scm")),
1049        "elixir" => Some(include_str!("queries/elixir.imports.scm")),
1050        "dart" => Some(include_str!("queries/dart.imports.scm")),
1051        "php" => Some(include_str!("queries/php.imports.scm")),
1052        "c" => Some(include_str!("queries/c.imports.scm")),
1053        "cpp" => Some(include_str!("queries/cpp.imports.scm")),
1054        "bash" => Some(include_str!("queries/bash.imports.scm")),
1055        "zsh" => Some(include_str!("queries/zsh.imports.scm")),
1056        "fish" => Some(include_str!("queries/fish.imports.scm")),
1057        "perl" => Some(include_str!("queries/perl.imports.scm")),
1058        "r" => Some(include_str!("queries/r.imports.scm")),
1059        "haskell" => Some(include_str!("queries/haskell.imports.scm")),
1060        "ocaml" => Some(include_str!("queries/ocaml.imports.scm")),
1061        "fsharp" => Some(include_str!("queries/fsharp.imports.scm")),
1062        "erlang" => Some(include_str!("queries/erlang.imports.scm")),
1063        "gleam" => Some(include_str!("queries/gleam.imports.scm")),
1064        "zig" => Some(include_str!("queries/zig.imports.scm")),
1065        "julia" => Some(include_str!("queries/julia.imports.scm")),
1066        "groovy" => Some(include_str!("queries/groovy.imports.scm")),
1067        "clojure" => Some(include_str!("queries/clojure.imports.scm")),
1068        "commonlisp" => Some(include_str!("queries/commonlisp.imports.scm")),
1069        "scheme" => Some(include_str!("queries/scheme.imports.scm")),
1070        "elisp" => Some(include_str!("queries/elisp.imports.scm")),
1071        "d" => Some(include_str!("queries/d.imports.scm")),
1072        "objc" => Some(include_str!("queries/objc.imports.scm")),
1073        "vb" => Some(include_str!("queries/vb.imports.scm")),
1074        "powershell" => Some(include_str!("queries/powershell.imports.scm")),
1075        "vim" => Some(include_str!("queries/vim.imports.scm")),
1076        "matlab" => Some(include_str!("queries/matlab.imports.scm")),
1077        "nix" => Some(include_str!("queries/nix.imports.scm")),
1078        "starlark" => Some(include_str!("queries/starlark.imports.scm")),
1079        "rescript" => Some(include_str!("queries/rescript.imports.scm")),
1080        "idris" => Some(include_str!("queries/idris.imports.scm")),
1081        "ada" => Some(include_str!("queries/ada.imports.scm")),
1082        "agda" => Some(include_str!("queries/agda.imports.scm")),
1083        "asciidoc" => Some(include_str!("queries/asciidoc.imports.scm")),
1084        "caddy" => Some(include_str!("queries/caddy.imports.scm")),
1085        "capnp" => Some(include_str!("queries/capnp.imports.scm")),
1086        "cmake" => Some(include_str!("queries/cmake.imports.scm")),
1087        "devicetree" => Some(include_str!("queries/devicetree.imports.scm")),
1088        "dockerfile" => Some(include_str!("queries/dockerfile.imports.scm")),
1089        "elm" => Some(include_str!("queries/elm.imports.scm")),
1090        "hcl" => Some(include_str!("queries/hcl.imports.scm")),
1091        "hlsl" => Some(include_str!("queries/hlsl.imports.scm")),
1092        "jq" => Some(include_str!("queries/jq.imports.scm")),
1093        "lean" => Some(include_str!("queries/lean.imports.scm")),
1094        "meson" => Some(include_str!("queries/meson.imports.scm")),
1095        "nginx" => Some(include_str!("queries/nginx.imports.scm")),
1096        "ninja" => Some(include_str!("queries/ninja.imports.scm")),
1097        "prolog" => Some(include_str!("queries/prolog.imports.scm")),
1098        "awk" => Some(include_str!("queries/awk.imports.scm")),
1099        "css" => Some(include_str!("queries/css.imports.scm")),
1100        "glsl" => Some(include_str!("queries/glsl.imports.scm")),
1101        "html" => Some(include_str!("queries/html.imports.scm")),
1102        "jinja2" => Some(include_str!("queries/jinja2.imports.scm")),
1103        "scss" => Some(include_str!("queries/scss.imports.scm")),
1104        "thrift" => Some(include_str!("queries/thrift.imports.scm")),
1105        "tlaplus" => Some(include_str!("queries/tlaplus.imports.scm")),
1106        "typst" => Some(include_str!("queries/typst.imports.scm")),
1107        "verilog" => Some(include_str!("queries/verilog.imports.scm")),
1108        "vhdl" => Some(include_str!("queries/vhdl.imports.scm")),
1109        "wit" => Some(include_str!("queries/wit.imports.scm")),
1110        _ => None,
1111    }
1112}
1113
1114/// Return a bundled decorations query for a grammar, if available.
1115///
1116/// Uses `@decoration` captures for doc comments, attributes, decorators,
1117/// and annotations that immediately precede a definition.
1118fn bundled_decorations_query(name: &str) -> Option<&'static str> {
1119    match name {
1120        "rust" => Some(include_str!("queries/rust.decorations.scm")),
1121        "python" => Some(include_str!("queries/python.decorations.scm")),
1122        "javascript" => Some(include_str!("queries/javascript.decorations.scm")),
1123        "typescript" => Some(include_str!("queries/typescript.decorations.scm")),
1124        "tsx" => Some(include_str!("queries/tsx.decorations.scm")),
1125        "java" => Some(include_str!("queries/java.decorations.scm")),
1126        "kotlin" => Some(include_str!("queries/kotlin.decorations.scm")),
1127        "scala" => Some(include_str!("queries/scala.decorations.scm")),
1128        "c-sharp" => Some(include_str!("queries/c-sharp.decorations.scm")),
1129        "php" => Some(include_str!("queries/php.decorations.scm")),
1130        "swift" => Some(include_str!("queries/swift.decorations.scm")),
1131        "dart" => Some(include_str!("queries/dart.decorations.scm")),
1132        "ocaml" => Some(include_str!("queries/ocaml.decorations.scm")),
1133        "rescript" => Some(include_str!("queries/rescript.decorations.scm")),
1134        "fsharp" => Some(include_str!("queries/fsharp.decorations.scm")),
1135        "elixir" => Some(include_str!("queries/elixir.decorations.scm")),
1136        "erlang" => Some(include_str!("queries/erlang.decorations.scm")),
1137        "gleam" => Some(include_str!("queries/gleam.decorations.scm")),
1138        "lean" => Some(include_str!("queries/lean.decorations.scm")),
1139        "groovy" => Some(include_str!("queries/groovy.decorations.scm")),
1140        "vb" => Some(include_str!("queries/vb.decorations.scm")),
1141        "haskell" => Some(include_str!("queries/haskell.decorations.scm")),
1142        "go" => Some(include_str!("queries/go.decorations.scm")),
1143        "c" => Some(include_str!("queries/c.decorations.scm")),
1144        "cpp" => Some(include_str!("queries/cpp.decorations.scm")),
1145        "objc" => Some(include_str!("queries/objc.decorations.scm")),
1146        "ruby" => Some(include_str!("queries/ruby.decorations.scm")),
1147        "r" => Some(include_str!("queries/r.decorations.scm")),
1148        "lua" => Some(include_str!("queries/lua.decorations.scm")),
1149        "zig" => Some(include_str!("queries/zig.decorations.scm")),
1150        "idris" => Some(include_str!("queries/idris.decorations.scm")),
1151        "agda" => Some(include_str!("queries/agda.decorations.scm")),
1152        "elm" => Some(include_str!("queries/elm.decorations.scm")),
1153        "julia" => Some(include_str!("queries/julia.decorations.scm")),
1154        "perl" => Some(include_str!("queries/perl.decorations.scm")),
1155        "verilog" => Some(include_str!("queries/verilog.decorations.scm")),
1156        "vhdl" => Some(include_str!("queries/vhdl.decorations.scm")),
1157        "ada" => Some(include_str!("queries/ada.decorations.scm")),
1158        "capnp" => Some(include_str!("queries/capnp.decorations.scm")),
1159        "thrift" => Some(include_str!("queries/thrift.decorations.scm")),
1160        "graphql" => Some(include_str!("queries/graphql.decorations.scm")),
1161        "wit" => Some(include_str!("queries/wit.decorations.scm")),
1162        "clojure" => Some(include_str!("queries/clojure.decorations.scm")),
1163        "scheme" => Some(include_str!("queries/scheme.decorations.scm")),
1164        "prolog" => Some(include_str!("queries/prolog.decorations.scm")),
1165        _ => None,
1166    }
1167}
1168
1169/// Return a bundled test-regions query for a grammar, if available.
1170///
1171/// Captures `@test_region` for byte ranges of test-only source regions
1172/// (e.g. inline `#[cfg(test)] mod ...` blocks in Rust). Languages whose
1173/// test conventions are filename-based (e.g. Go's `*_test.go`) or where
1174/// no AST-level distinction exists return None and rely on path-based
1175/// excludes instead.
1176fn bundled_test_regions_query(name: &str) -> Option<&'static str> {
1177    match name {
1178        "rust" => Some(include_str!("queries/rust.test_regions.scm")),
1179        _ => None,
1180    }
1181}
1182
1183/// Return a bundled CFG query for a grammar, if available.
1184///
1185/// CFG queries use `@cfg.*` captures to identify control flow nodes
1186/// (branches, loops, exits). See `normalize-cfg` for the full capture vocabulary.
1187fn bundled_cfg_query(name: &str) -> Option<&'static str> {
1188    match name {
1189        // Seed languages (Phase 1 — verified grammars)
1190        "rust" => Some(include_str!("queries/rust.cfg.scm")),
1191        "python" => Some(include_str!("queries/python.cfg.scm")),
1192        "go" => Some(include_str!("queries/go.cfg.scm")),
1193        "typescript" => Some(include_str!("queries/typescript.cfg.scm")),
1194        "tsx" => Some(include_str!("queries/tsx.cfg.scm")),
1195        "javascript" => Some(include_str!("queries/javascript.cfg.scm")),
1196        "java" => Some(include_str!("queries/java.cfg.scm")),
1197        // Batch A: C-family
1198        "c" => Some(include_str!("queries/c.cfg.scm")),
1199        "cpp" => Some(include_str!("queries/cpp.cfg.scm")),
1200        "objc" => Some(include_str!("queries/objc.cfg.scm")),
1201        "c-sharp" => Some(include_str!("queries/c-sharp.cfg.scm")),
1202        "kotlin" => Some(include_str!("queries/kotlin.cfg.scm")),
1203        "swift" => Some(include_str!("queries/swift.cfg.scm")),
1204        "dart" => Some(include_str!("queries/dart.cfg.scm")),
1205        // Batch B: JVM/functional
1206        "scala" => Some(include_str!("queries/scala.cfg.scm")),
1207        "groovy" => Some(include_str!("queries/groovy.cfg.scm")),
1208        "vb" => Some(include_str!("queries/vb.cfg.scm")),
1209        "haskell" => Some(include_str!("queries/haskell.cfg.scm")),
1210        "ocaml" => Some(include_str!("queries/ocaml.cfg.scm")),
1211        "fsharp" => Some(include_str!("queries/fsharp.cfg.scm")),
1212        "elixir" => Some(include_str!("queries/elixir.cfg.scm")),
1213        "erlang" => Some(include_str!("queries/erlang.cfg.scm")),
1214        "clojure" => Some(include_str!("queries/clojure.cfg.scm")),
1215        "gleam" => Some(include_str!("queries/gleam.cfg.scm")),
1216        "rescript" => Some(include_str!("queries/rescript.cfg.scm")),
1217        "idris" => Some(include_str!("queries/idris.cfg.scm")),
1218        "agda" => Some(include_str!("queries/agda.cfg.scm")),
1219        "lean" => Some(include_str!("queries/lean.cfg.scm")),
1220        "commonlisp" => Some(include_str!("queries/commonlisp.cfg.scm")),
1221        "scheme" => Some(include_str!("queries/scheme.cfg.scm")),
1222        "elisp" => Some(include_str!("queries/elisp.cfg.scm")),
1223        // Batch C: Scripting
1224        "ruby" => Some(include_str!("queries/ruby.cfg.scm")),
1225        "lua" => Some(include_str!("queries/lua.cfg.scm")),
1226        "php" => Some(include_str!("queries/php.cfg.scm")),
1227        "perl" => Some(include_str!("queries/perl.cfg.scm")),
1228        "bash" => Some(include_str!("queries/bash.cfg.scm")),
1229        "fish" => Some(include_str!("queries/fish.cfg.scm")),
1230        "awk" => Some(include_str!("queries/awk.cfg.scm")),
1231        "zsh" => Some(include_str!("queries/zsh.cfg.scm")),
1232        "powershell" => Some(include_str!("queries/powershell.cfg.scm")),
1233        "batch" => Some(include_str!("queries/batch.cfg.scm")),
1234        "vim" => Some(include_str!("queries/vim.cfg.scm")),
1235        // Batch D: Systems/other
1236        "zig" => Some(include_str!("queries/zig.cfg.scm")),
1237        "ada" => Some(include_str!("queries/ada.cfg.scm")),
1238        "d" => Some(include_str!("queries/d.cfg.scm")),
1239        "prolog" => Some(include_str!("queries/prolog.cfg.scm")),
1240        "r" => Some(include_str!("queries/r.cfg.scm")),
1241        "julia" => Some(include_str!("queries/julia.cfg.scm")),
1242        "matlab" => Some(include_str!("queries/matlab.cfg.scm")),
1243        "glsl" => Some(include_str!("queries/glsl.cfg.scm")),
1244        "hlsl" => Some(include_str!("queries/hlsl.cfg.scm")),
1245        "verilog" => Some(include_str!("queries/verilog.cfg.scm")),
1246        "vhdl" => Some(include_str!("queries/vhdl.cfg.scm")),
1247        // Batch E: Domain/config
1248        "nix" => Some(include_str!("queries/nix.cfg.scm")),
1249        "hcl" => Some(include_str!("queries/hcl.cfg.scm")),
1250        "starlark" => Some(include_str!("queries/starlark.cfg.scm")),
1251        "elm" => Some(include_str!("queries/elm.cfg.scm")),
1252        "jinja2" => Some(include_str!("queries/jinja2.cfg.scm")),
1253        "svelte" => Some(include_str!("queries/svelte.cfg.scm")),
1254        "vue" => Some(include_str!("queries/vue.cfg.scm")),
1255        "cmake" => Some(include_str!("queries/cmake.cfg.scm")),
1256        "meson" => Some(include_str!("queries/meson.cfg.scm")),
1257        "tlaplus" => Some(include_str!("queries/tlaplus.cfg.scm")),
1258        "jq" => Some(include_str!("queries/jq.cfg.scm")),
1259        _ => None,
1260    }
1261}
1262
1263#[cfg(test)]
1264mod tests {
1265    use super::*;
1266
1267    #[test]
1268    fn test_grammar_lib_name() {
1269        let name = grammar_lib_name("python");
1270        assert!(name.starts_with("python."));
1271    }
1272
1273    #[test]
1274    fn test_grammar_symbol_name() {
1275        assert_eq!(grammar_symbol_name("python"), "tree_sitter_python");
1276        assert_eq!(grammar_symbol_name("rust"), "tree_sitter_rust_orchard");
1277        assert_eq!(grammar_symbol_name("ssh-config"), "tree_sitter_ssh_config");
1278        assert_eq!(grammar_symbol_name("vb"), "tree_sitter_vb_dotnet");
1279    }
1280
1281    #[test]
1282    fn test_bundled_tags_queries() {
1283        for lang in &[
1284            "rust",
1285            "python",
1286            "javascript",
1287            "typescript",
1288            "tsx",
1289            "go",
1290            "java",
1291            "c",
1292            "cpp",
1293            "ruby",
1294            "kotlin",
1295            "scala",
1296            "elixir",
1297            "swift",
1298            "haskell",
1299            "dart",
1300            "ocaml",
1301            "fsharp",
1302            "gleam",
1303            "zig",
1304            "julia",
1305            "erlang",
1306            "lua",
1307            "php",
1308            "perl",
1309            "r",
1310            "groovy",
1311            "d",
1312            "objc",
1313            "vb",
1314            "powershell",
1315            "clojure",
1316            "commonlisp",
1317            "scheme",
1318            "elisp",
1319            "bash",
1320            "fish",
1321            "zsh",
1322            "ada",
1323            "idris",
1324            "lean",
1325            "rescript",
1326            "elm",
1327        ] {
1328            let query = bundled_tags_query(lang);
1329            assert!(query.is_some(), "Missing bundled tags query for {lang}");
1330            assert!(
1331                !query.unwrap().is_empty(),
1332                "Empty bundled tags query for {lang}"
1333            );
1334        }
1335    }
1336
1337    #[test]
1338    fn test_bundled_types_queries() {
1339        for lang in &[
1340            "rust",
1341            "python",
1342            "typescript",
1343            "tsx",
1344            "java",
1345            "go",
1346            "c",
1347            "cpp",
1348            "kotlin",
1349            "swift",
1350            "c-sharp",
1351            "scala",
1352            "haskell",
1353            "ruby",
1354            "dart",
1355            "elixir",
1356            "ocaml",
1357            "erlang",
1358            "zig",
1359            "fsharp",
1360            "gleam",
1361            "julia",
1362            "r",
1363            "d",
1364            "objc",
1365            "vb",
1366            "groovy",
1367            "ada",
1368            "agda",
1369            "elm",
1370            "idris",
1371            "lean",
1372            "php",
1373            "powershell",
1374            "rescript",
1375            "verilog",
1376            "vhdl",
1377            "sql",
1378            "hcl",
1379            "glsl",
1380            "hlsl",
1381            "clojure",
1382            "commonlisp",
1383            "elisp",
1384            "javascript",
1385            "lua",
1386            "scheme",
1387            "graphql",
1388            "nix",
1389            "starlark",
1390            "matlab",
1391            "tlaplus",
1392            "typst",
1393        ] {
1394            let query = bundled_types_query(lang);
1395            assert!(query.is_some(), "Missing bundled types query for {lang}");
1396            assert!(
1397                !query.unwrap().is_empty(),
1398                "Empty bundled types query for {lang}"
1399            );
1400        }
1401    }
1402
1403    #[test]
1404    fn test_bundled_complexity_queries() {
1405        for lang in &[
1406            "rust",
1407            "python",
1408            "go",
1409            "javascript",
1410            "typescript",
1411            "tsx",
1412            "java",
1413            "c",
1414            "cpp",
1415            "ruby",
1416            "kotlin",
1417            "swift",
1418            "c-sharp",
1419            "bash",
1420            "lua",
1421            "elixir",
1422            "scala",
1423            "dart",
1424            "zig",
1425            "ocaml",
1426            "erlang",
1427            "php",
1428            "haskell",
1429            "r",
1430            "julia",
1431            "perl",
1432            "groovy",
1433            "elm",
1434            "powershell",
1435            "fish",
1436            "fsharp",
1437            "gleam",
1438            "clojure",
1439            "commonlisp",
1440            "scheme",
1441            "d",
1442            "objc",
1443            "vb",
1444            "elisp",
1445            "hcl",
1446            "matlab",
1447            "nix",
1448            "sql",
1449            "starlark",
1450            "vim",
1451            "zsh",
1452            "rescript",
1453            "idris",
1454            "lean",
1455        ] {
1456            let query = bundled_complexity_query(lang);
1457            assert!(
1458                query.is_some(),
1459                "Missing bundled complexity query for {lang}"
1460            );
1461            assert!(
1462                !query.unwrap().is_empty(),
1463                "Empty bundled complexity query for {lang}"
1464            );
1465        }
1466    }
1467
1468    #[test]
1469    fn test_bundled_calls_queries() {
1470        for lang in &[
1471            "python",
1472            "rust",
1473            "typescript",
1474            "tsx",
1475            "javascript",
1476            "java",
1477            "go",
1478            "c",
1479            "cpp",
1480            "ruby",
1481            "kotlin",
1482            "swift",
1483            "c-sharp",
1484            "bash",
1485            "scala",
1486            "elixir",
1487            "lua",
1488            "dart",
1489            "ocaml",
1490            "erlang",
1491            "zig",
1492            "julia",
1493            "r",
1494            "haskell",
1495            "php",
1496            "perl",
1497            "fsharp",
1498            "gleam",
1499            "groovy",
1500            "clojure",
1501            "d",
1502            "objc",
1503            "elisp",
1504            "hcl",
1505            "matlab",
1506            "nix",
1507            "starlark",
1508            "vim",
1509            "zsh",
1510            "rescript",
1511            "prolog",
1512            "sql",
1513            "ada",
1514            "agda",
1515            "awk",
1516            "batch",
1517            "cmake",
1518            "elm",
1519            "fish",
1520            "idris",
1521            "lean",
1522            "meson",
1523            "powershell",
1524            "scheme",
1525            "thrift",
1526            "tlaplus",
1527            "verilog",
1528            "vhdl",
1529            "vb",
1530            "commonlisp",
1531            "scss",
1532            "jinja2",
1533            "nginx",
1534        ] {
1535            let query = bundled_calls_query(lang);
1536            assert!(query.is_some(), "Missing bundled calls query for {lang}");
1537            assert!(
1538                !query.unwrap().is_empty(),
1539                "Empty bundled calls query for {lang}"
1540            );
1541        }
1542    }
1543
1544    #[test]
1545    fn test_bundled_imports_queries() {
1546        for lang in &[
1547            "awk",
1548            "python",
1549            "javascript",
1550            "go",
1551            "lua",
1552            "rust",
1553            "typescript",
1554            "tsx",
1555            "java",
1556            "kotlin",
1557            "c-sharp",
1558            "ruby",
1559            "swift",
1560            "scala",
1561            "elixir",
1562            "dart",
1563            "php",
1564            "c",
1565            "cpp",
1566            "bash",
1567            "zsh",
1568            "fish",
1569            "perl",
1570            "r",
1571            "haskell",
1572            "ocaml",
1573            "fsharp",
1574            "erlang",
1575            "gleam",
1576            "zig",
1577            "julia",
1578            "groovy",
1579            "clojure",
1580            "commonlisp",
1581            "scheme",
1582            "elisp",
1583            "d",
1584            "objc",
1585            "vb",
1586            "powershell",
1587            "vim",
1588            "matlab",
1589            "nix",
1590            "starlark",
1591            "rescript",
1592            "idris",
1593            "ada",
1594            "agda",
1595            "asciidoc",
1596            "caddy",
1597            "capnp",
1598            "cmake",
1599            "devicetree",
1600            "dockerfile",
1601            "elm",
1602            "hcl",
1603            "hlsl",
1604            "jq",
1605            "lean",
1606            "meson",
1607            "nginx",
1608            "ninja",
1609            "prolog",
1610            "css",
1611            "glsl",
1612            "html",
1613            "jinja2",
1614            "scss",
1615            "thrift",
1616            "tlaplus",
1617            "typst",
1618            "verilog",
1619            "vhdl",
1620            "wit",
1621        ] {
1622            let query = bundled_imports_query(lang);
1623            assert!(query.is_some(), "Missing bundled imports query for {lang}");
1624            assert!(
1625                !query.unwrap().is_empty(),
1626                "Empty bundled imports query for {lang}"
1627            );
1628        }
1629    }
1630
1631    #[test]
1632    fn test_get_imports_returns_bundled() {
1633        let loader = GrammarLoader::with_paths(vec![]);
1634        assert!(loader.get_imports("awk").is_some());
1635        assert!(loader.get_imports("python").is_some());
1636        assert!(loader.get_imports("javascript").is_some());
1637        assert!(loader.get_imports("go").is_some());
1638        assert!(loader.get_imports("lua").is_some());
1639        assert!(loader.get_imports("rust").is_some());
1640        assert!(loader.get_imports("typescript").is_some());
1641        assert!(loader.get_imports("tsx").is_some());
1642        assert!(loader.get_imports("java").is_some());
1643        assert!(loader.get_imports("kotlin").is_some());
1644        assert!(loader.get_imports("c-sharp").is_some());
1645        assert!(loader.get_imports("ruby").is_some());
1646        assert!(loader.get_imports("swift").is_some());
1647        assert!(loader.get_imports("scala").is_some());
1648        assert!(loader.get_imports("elixir").is_some());
1649        assert!(loader.get_imports("dart").is_some());
1650        assert!(loader.get_imports("php").is_some());
1651        assert!(loader.get_imports("c").is_some());
1652        assert!(loader.get_imports("cpp").is_some());
1653        assert!(loader.get_imports("bash").is_some());
1654        assert!(loader.get_imports("zsh").is_some());
1655        assert!(loader.get_imports("fish").is_some());
1656        assert!(loader.get_imports("perl").is_some());
1657        assert!(loader.get_imports("r").is_some());
1658        assert!(loader.get_imports("haskell").is_some());
1659        assert!(loader.get_imports("ocaml").is_some());
1660        assert!(loader.get_imports("fsharp").is_some());
1661        assert!(loader.get_imports("erlang").is_some());
1662        assert!(loader.get_imports("gleam").is_some());
1663        assert!(loader.get_imports("zig").is_some());
1664        assert!(loader.get_imports("julia").is_some());
1665        assert!(loader.get_imports("groovy").is_some());
1666        assert!(loader.get_imports("clojure").is_some());
1667        assert!(loader.get_imports("commonlisp").is_some());
1668        assert!(loader.get_imports("scheme").is_some());
1669        assert!(loader.get_imports("elisp").is_some());
1670        assert!(loader.get_imports("d").is_some());
1671        assert!(loader.get_imports("objc").is_some());
1672        assert!(loader.get_imports("vb").is_some());
1673        assert!(loader.get_imports("powershell").is_some());
1674        assert!(loader.get_imports("vim").is_some());
1675        assert!(loader.get_imports("matlab").is_some());
1676        assert!(loader.get_imports("nix").is_some());
1677        assert!(loader.get_imports("starlark").is_some());
1678        assert!(loader.get_imports("rescript").is_some());
1679        assert!(loader.get_imports("idris").is_some());
1680        assert!(loader.get_imports("ada").is_some());
1681        assert!(loader.get_imports("agda").is_some());
1682        assert!(loader.get_imports("asciidoc").is_some());
1683        assert!(loader.get_imports("caddy").is_some());
1684        assert!(loader.get_imports("capnp").is_some());
1685        assert!(loader.get_imports("cmake").is_some());
1686        assert!(loader.get_imports("devicetree").is_some());
1687        assert!(loader.get_imports("dockerfile").is_some());
1688        assert!(loader.get_imports("elm").is_some());
1689        assert!(loader.get_imports("hcl").is_some());
1690        assert!(loader.get_imports("hlsl").is_some());
1691        assert!(loader.get_imports("jq").is_some());
1692        assert!(loader.get_imports("lean").is_some());
1693        assert!(loader.get_imports("meson").is_some());
1694        assert!(loader.get_imports("nginx").is_some());
1695        assert!(loader.get_imports("ninja").is_some());
1696        assert!(loader.get_imports("prolog").is_some());
1697        assert!(loader.get_imports("css").is_some());
1698        assert!(loader.get_imports("glsl").is_some());
1699        assert!(loader.get_imports("html").is_some());
1700        assert!(loader.get_imports("jinja2").is_some());
1701        assert!(loader.get_imports("scss").is_some());
1702        assert!(loader.get_imports("thrift").is_some());
1703        assert!(loader.get_imports("tlaplus").is_some());
1704        assert!(loader.get_imports("typst").is_some());
1705        assert!(loader.get_imports("verilog").is_some());
1706        assert!(loader.get_imports("vhdl").is_some());
1707        assert!(loader.get_imports("wit").is_some());
1708        assert!(loader.get_imports("unknown-lang-xyz").is_none());
1709    }
1710
1711    #[test]
1712    fn test_get_tags_returns_bundled() {
1713        let loader = GrammarLoader::with_paths(vec![]);
1714        assert!(loader.get_tags("rust").is_some());
1715        assert!(loader.get_tags("python").is_some());
1716        assert!(loader.get_tags("go").is_some());
1717        assert!(loader.get_tags("unknown-lang-xyz").is_none());
1718    }
1719
1720    #[test]
1721    fn test_tags_queries_compile() {
1722        let loader = GrammarLoader::new();
1723        let langs = [
1724            "zig",
1725            "clojure",
1726            "scheme",
1727            "nix",
1728            "prolog",
1729            "toml",
1730            "json",
1731            "yaml",
1732            "css",
1733            "html",
1734            "xml",
1735            "thrift",
1736            "dockerfile",
1737            "caddy",
1738        ];
1739        for lang in langs {
1740            let tags = loader.get_tags(lang);
1741            assert!(
1742                tags.is_some(),
1743                "{lang}: no tags query found (missing from bundled_tags_query)"
1744            );
1745            let tags_str = tags.unwrap();
1746            let grammar = loader.get(lang).ok();
1747            if grammar.is_none() {
1748                eprintln!("{lang}: grammar .so not found, skipping compilation check");
1749                continue;
1750            }
1751            let result = tree_sitter::Query::new(&grammar.unwrap(), &tags_str);
1752            assert!(
1753                result.is_ok(),
1754                "{lang}: tags query compilation failed: {:?}",
1755                result.err()
1756            );
1757        }
1758    }
1759
1760    #[test]
1761    fn test_scheme_node_kinds() {
1762        let loader = GrammarLoader::new();
1763        let grammar = loader.get("scheme").ok();
1764        if grammar.is_none() {
1765            eprintln!("scheme: grammar .so not found or load failed");
1766            return;
1767        }
1768        eprintln!("scheme: grammar loaded ok");
1769        let g = grammar.unwrap();
1770        // Test which node names compile in queries against the installed grammar
1771        let test_cases = [
1772            ("list", "(list) @x"),
1773            ("symbol", "(symbol) @x"),
1774            ("named_node", "(named_node) @x"),
1775            ("identifier", "(identifier) @x"),
1776        ];
1777        for (name, query_str) in test_cases {
1778            match tree_sitter::Query::new(&g, query_str) {
1779                Ok(_) => eprintln!("scheme node '{name}': valid"),
1780                Err(e) => eprintln!("scheme node '{name}': INVALID - {e:?}"),
1781            }
1782        }
1783        // Now test parsing the fixture
1784        use tree_sitter::Parser;
1785        let src = "(define (add a b) (+ a b))\n(define (multiply a b) (* a b))\n";
1786        let mut parser = Parser::new();
1787        parser.set_language(&g).unwrap();
1788        let tree = parser.parse(src, None).unwrap();
1789        eprintln!("scheme sexp: {}", tree.root_node().to_sexp());
1790        // Walk the tree and print node kinds
1791        fn walk(node: tree_sitter::Node, depth: usize) {
1792            let prefix = "  ".repeat(depth);
1793            eprintln!(
1794                "{prefix}kind={} named={} text_len={}",
1795                node.kind(),
1796                node.is_named(),
1797                node.byte_range().len()
1798            );
1799            let mut cursor = node.walk();
1800            for child in node.children(&mut cursor) {
1801                walk(child, depth + 1);
1802            }
1803        }
1804        walk(tree.root_node(), 0);
1805    }
1806
1807    #[test]
1808    fn test_get_types_returns_bundled() {
1809        let loader = GrammarLoader::with_paths(vec![]);
1810        assert!(loader.get_types("rust").is_some());
1811        assert!(loader.get_types("python").is_some());
1812        assert!(loader.get_types("java").is_some());
1813        assert!(loader.get_types("go").is_some());
1814        assert!(loader.get_types("c").is_some());
1815        assert!(loader.get_types("cpp").is_some());
1816        assert!(loader.get_types("kotlin").is_some());
1817        assert!(loader.get_types("swift").is_some());
1818        assert!(loader.get_types("c-sharp").is_some());
1819        assert!(loader.get_types("unknown-lang-xyz").is_none());
1820    }
1821
1822    #[test]
1823    fn test_get_calls_returns_bundled() {
1824        let loader = GrammarLoader::with_paths(vec![]);
1825        assert!(loader.get_calls("rust").is_some());
1826        assert!(loader.get_calls("python").is_some());
1827        assert!(loader.get_calls("go").is_some());
1828        assert!(loader.get_calls("c").is_some());
1829        assert!(loader.get_calls("cpp").is_some());
1830        assert!(loader.get_calls("ruby").is_some());
1831        assert!(loader.get_calls("kotlin").is_some());
1832        assert!(loader.get_calls("swift").is_some());
1833        assert!(loader.get_calls("c-sharp").is_some());
1834        assert!(loader.get_calls("bash").is_some());
1835        assert!(loader.get_calls("unknown-lang-xyz").is_none());
1836    }
1837
1838    #[test]
1839    fn test_load_from_env() {
1840        // Set up env var pointing to target/grammars
1841        let grammar_path = std::env::current_dir().unwrap().join("target/grammars");
1842
1843        if !grammar_path.exists() {
1844            eprintln!("Skipping: run `cargo xtask build-grammars` first");
1845            return;
1846        }
1847
1848        // SAFETY: This is a test that runs single-threaded
1849        unsafe {
1850            std::env::set_var("NORMALIZE_GRAMMAR_PATH", grammar_path.to_str().unwrap());
1851        }
1852
1853        let loader = GrammarLoader::new();
1854
1855        // Should load python from .so
1856        let ext = grammar_extension();
1857        if grammar_path.join(format!("python{ext}")).exists() {
1858            let lang = loader.get("python").ok();
1859            assert!(lang.is_some(), "Failed to load python grammar");
1860        }
1861
1862        // Clean up
1863        // SAFETY: This is a test that runs single-threaded
1864        unsafe {
1865            std::env::remove_var("NORMALIZE_GRAMMAR_PATH");
1866        }
1867    }
1868}