Skip to main content

ripvec_core/
languages.rs

1//! Language registry mapping file extensions to tree-sitter grammars.
2//!
3//! Each supported language has a grammar and a tree-sitter query that
4//! extracts function, class, and method definitions. Compiled queries
5//! are cached so that repeated calls for the same extension are free.
6
7use std::sync::{Arc, OnceLock};
8
9use tree_sitter::{Language, Query};
10
11/// Configuration for extracting function calls from a language.
12///
13/// Wrapped in [`Arc`] so it can be shared across threads and returned
14/// from the cache without cloning the compiled [`Query`].
15pub struct CallConfig {
16    /// The tree-sitter Language grammar.
17    pub language: Language,
18    /// Query that extracts call sites (`@callee` captures).
19    pub query: Query,
20}
21
22/// Configuration for a supported source language.
23///
24/// Wrapped in [`Arc`] so it can be shared across threads and returned
25/// from the cache without cloning the compiled [`Query`].
26pub struct LangConfig {
27    /// The tree-sitter Language grammar.
28    pub language: Language,
29    /// Query that extracts semantic chunks (`@def` captures with `@name`).
30    pub query: Query,
31}
32
33/// LSP `SymbolKind` numeric values (as defined in the Language Server Protocol
34/// specification version 3.17, §3.15.1).
35///
36/// Only the subset used by ripvec's Rust mapping is listed here. The full
37/// specification defines values 1–26; constants are named for clarity and to
38/// avoid embedding magic numbers at call sites.
39pub mod lsp_symbol_kind {
40    /// A file symbol. (1)
41    pub const FILE: u32 = 1;
42    /// A module or namespace. (2)
43    pub const MODULE: u32 = 2;
44    /// A namespace. (3)
45    pub const NAMESPACE: u32 = 3;
46    /// A package. (4)
47    pub const PACKAGE: u32 = 4;
48    /// A class. (5)
49    pub const CLASS: u32 = 5;
50    /// A method. (6)
51    pub const METHOD: u32 = 6;
52    /// A property. (7)
53    pub const PROPERTY: u32 = 7;
54    /// A field. (8)
55    pub const FIELD: u32 = 8;
56    /// A constructor. (9)
57    pub const CONSTRUCTOR: u32 = 9;
58    /// An enum type. (10)
59    pub const ENUM: u32 = 10;
60    /// An interface (trait in Rust). (11)
61    pub const INTERFACE: u32 = 11;
62    /// A function or free function. (12)
63    pub const FUNCTION: u32 = 12;
64    /// A variable. (13)
65    pub const VARIABLE: u32 = 13;
66    /// A constant or `const`/`static` item. (14)
67    pub const CONSTANT: u32 = 14;
68    /// A string literal symbol. (15)
69    pub const STRING: u32 = 15;
70    /// A numeric constant. (16)
71    pub const NUMBER: u32 = 16;
72    /// A boolean symbol. (17)
73    pub const BOOLEAN: u32 = 17;
74    /// An array or slice symbol. (18)
75    pub const ARRAY: u32 = 18;
76    /// An object or struct-like value. (19)
77    pub const OBJECT: u32 = 19;
78    /// A key in a key-value pair. (20)
79    pub const KEY: u32 = 20;
80    /// A null value symbol. (21)
81    pub const NULL: u32 = 21;
82    /// An enum member / variant. (22)
83    pub const ENUM_MEMBER: u32 = 22;
84    /// A struct type. (23)
85    pub const STRUCT: u32 = 23;
86    /// An event. (24)
87    pub const EVENT: u32 = 24;
88    /// An operator. (25)
89    pub const OPERATOR: u32 = 25;
90    /// A type parameter / type alias. (26)
91    pub const TYPE_PARAMETER: u32 = 26;
92}
93
94/// Map a tree-sitter node kind string to an LSP `SymbolKind` numeric value.
95///
96/// The mapping covers Rust node kinds exhaustively, then falls back to a
97/// cross-language best-effort mapping, and finally returns
98/// [`lsp_symbol_kind::VARIABLE`] (13) for any unrecognised kind — preserving
99/// the pre-B1 default so callers that don't need kind-awareness are unaffected.
100///
101/// # Rust node kinds
102///
103/// | tree-sitter kind | LSP SymbolKind |
104/// |---|---|
105/// | `function_item` | 12 (Function) |
106/// | `function_signature_item` | 12 (Function) |
107/// | `struct_item` | 23 (Struct) |
108/// | `enum_item` | 10 (Enum) |
109/// | `trait_item` | 11 (Interface) |
110/// | `impl_item` | 5 (Class) — implementation block |
111/// | `mod_item` | 2 (Module) |
112/// | `const_item` | 14 (Constant) |
113/// | `static_item` | 14 (Constant) |
114/// | `type_item` | 26 (TypeParameter) |
115/// | `field_declaration` | 8 (Field) |
116/// | `enum_variant` | 22 (EnumMember) |
117///
118/// # Cross-language kinds
119///
120/// | tree-sitter kind | LSP SymbolKind |
121/// |---|---|
122/// | `function_definition` / `function_declaration` | 12 (Function) |
123/// | `method_definition` / `method_declaration` | 6 (Method) |
124/// | `class_definition` / `class_declaration` / `class_specifier` | 5 (Class) |
125/// | `interface_declaration` / `trait_definition` / `interface_type` | 11 (Interface) |
126/// | `variable_declarator` / `variable_declaration` / `assignment` | 13 (Variable) |
127/// | `enum_declaration` / `enum_definition` | 10 (Enum) |
128/// | `type_alias_declaration` / `type_definition` / `type_declaration` / `type_alias` | 26 (TypeParameter) |
129/// | `constructor_declaration` | 9 (Constructor) |
130/// | `module` | 2 (Module) |
131/// | `object_definition` / `object_declaration` | 5 (Class) |
132/// | `val_definition` / `var_definition` | 13 (Variable) |
133/// | `property_declaration` / `decorated_definition` | 7 (Property) |
134/// | `namespace_definition` | 3 (Namespace) |
135/// | `protocol_declaration` | 11 (Interface) |
136/// | `typealias_declaration` | 26 (TypeParameter) |
137/// | `struct_type` | 23 (Struct) — Go struct type body |
138/// | `block` / `table` / `pair` | 20 (Key) |
139/// | `atx_heading` | 2 (Module) |
140/// | `element` / `empty_element` | 5 (Class) |
141/// | `rdf_statements` | 19 (Object) |
142/// | `file` / `window` | 1 (File) |
143///
144/// # C preprocessor kinds (I53)
145///
146/// | tree-sitter kind | LSP SymbolKind |
147/// |---|---|
148/// | `preproc_def` | 14 (Constant) — object-like macro (`#define FOO 42`) |
149/// | `preproc_function_def` | 12 (Function) — function-like macro (`#define F(x) ...`) |
150#[must_use]
151#[expect(
152    clippy::match_same_arms,
153    reason = "variable/assignment patterns are explicit for documentation completeness; \
154              they intentionally duplicate the wildcard fallback (VARIABLE=13) so the \
155              mapping table reads as a self-contained reference"
156)]
157pub fn lsp_symbol_kind_for_node_kind(node_kind: &str) -> u32 {
158    use lsp_symbol_kind as K;
159    match node_kind {
160        // --- Function / method ---
161        // Rust: function_item, function_signature_item
162        // Cross-language: function_definition (Python, Ruby, Scala, Go, Java, Kotlin, Swift),
163        //   function_declaration (C, C++, JS, TS, Bash)
164        // C: preproc_function_def is the tree-sitter kind for function-like macros:
165        //   `#define LIKELY(x) __builtin_expect(!!(x), 1)` / `#define container_of(ptr, type, member) ...`.
166        //   Function (12) is used because function-like macros are callable (they take arguments
167        //   and expand to an expression), making them semantically closer to Function than Constant.
168        "function_item"
169        | "function_signature_item"
170        | "function_definition"
171        | "function_declaration"
172        | "preproc_function_def" => K::FUNCTION,
173
174        // --- Method (non-free function bound to a type) ---
175        "method_definition" | "method_declaration" => K::METHOD,
176
177        // --- Constructor ---
178        "constructor_declaration" => K::CONSTRUCTOR,
179
180        // --- Struct ---
181        "struct_item" => K::STRUCT,
182
183        // --- Enum ---
184        // Rust: enum_item; cross-language: enum_declaration, enum_definition
185        "enum_item" | "enum_declaration" | "enum_definition" => K::ENUM,
186
187        // --- Enum member / variant ---
188        "enum_variant" => K::ENUM_MEMBER,
189
190        // --- Interface / trait ---
191        // Rust: trait_item; cross-language: interface_declaration, trait_definition,
192        //   protocol_declaration (Swift); interface_type (Go — the body node of an
193        //   interface type_spec, used as @def in the Go query to distinguish interfaces
194        //   from other type declarations — K2 fix)
195        "trait_item"
196        | "interface_declaration"
197        | "trait_definition"
198        | "protocol_declaration"
199        | "interface_type" => K::INTERFACE,
200
201        // --- Struct ---
202        // Go: struct_type is the body node of a struct type_spec; used as @def
203        //   to distinguish struct declarations from other type declarations — K2 fix.
204        "struct_type" => K::STRUCT,
205
206        // --- Class / impl block ---
207        // Rust: impl_item (implementation block — closest to LSP Class)
208        // Cross-language: class_definition, class_declaration, class_specifier,
209        //   object_definition (Scala), object_declaration (Kotlin),
210        //   element (XML/RDF ontology element)
211        "impl_item" | "class_definition" | "class_declaration" | "class_specifier"
212        | "object_definition" | "object_declaration" | "element" => K::CLASS,
213
214        // --- Module / namespace (used by heading-level symbols too) ---
215        // Rust: mod_item; cross-language: module (Python module, Ruby module),
216        //   mod_definition; atx_heading (Markdown headings as module-level anchors)
217        "mod_item" | "module" | "mod_definition" | "atx_heading" => K::MODULE,
218
219        // --- Namespace ---
220        "namespace_definition" => K::NAMESPACE,
221
222        // --- Constant ---
223        // Rust: const_item and static_item (immutable statics are semantically constants)
224        // HCL: local_attribute is the synthetic kind emitted by the chunker
225        // for each `local.X = ...` attribute inside an HCL `locals { ... }`
226        // block. T18 Cohesion Refraction queries can now distinguish
227        // individual locals as Constants (R6, Wave 3).
228        // C: preproc_def is the tree-sitter kind for object-like macros:
229        //   `#define FOO 42` / `#define REDIS_OK 0` / `#define OBJ_ENCODING_INT 1`.
230        //   Constant (14) is the closest LSP kind — these are named values, not callable.
231        "const_item" | "static_item" | "local_attribute" | "preproc_def" => K::CONSTANT,
232
233        // --- Type alias / type parameter ---
234        // Rust: type_item; cross-language: type_alias_declaration (TS),
235        //   type_definition (C typedef), type_declaration (Go fallback for non-interface
236        //   non-struct types), typealias_declaration (Swift); type_alias (Go — the body
237        //   node of `type X = Y` alias declarations — L2 fix: now maps to VARIABLE instead
238        //   of TYPE_PARAMETER to distinguish aliases from pure generics)
239        "type_item"
240        | "type_alias_declaration"
241        | "type_definition"
242        | "type_declaration"
243        | "typealias_declaration" => K::TYPE_PARAMETER,
244
245        // --- Type alias (distinct from type parameters) ---
246        // Go: type_alias is the @def node from `type X = Y` patterns (K2 fix).
247        // L2 fix: maps to VARIABLE (13) instead of TYPE_PARAMETER (26) to distinguish
248        // aliases from pure generics. Variable is used because LSP has no dedicated
249        // alias kind, and Variable better represents the semantic nature of an alias
250        // than TypeParameter (which represents generic type variables like T in [T any]).
251        "type_alias" => K::VARIABLE,
252
253        // --- Field ---
254        "field_declaration" => K::FIELD,
255
256        // --- Variable / assignment ---
257        // Cross-language: variable_declarator (JS/TS), variable_declaration,
258        //   assignment (Python, Ruby top-level), val_definition / var_definition (Scala)
259        "variable_declarator"
260        | "variable_declaration"
261        | "assignment"
262        | "val_definition"
263        | "var_definition" => K::VARIABLE,
264
265        // --- Property ---
266        // Kotlin / Swift: property_declaration.
267        // Python: decorated_definition (a function wrapped in a decorator).
268        //   NOTE: this string-only mapping returns PROPERTY for ALL
269        //   decorated_definition nodes, which is intentionally conservative
270        //   (K1 fix: previously fell through to VARIABLE=13 as an unrecognised kind).
271        //   Callers that have the full AST node should use `lsp_symbol_kind_for_node`
272        //   instead, which inspects the first decorator name and returns FUNCTION (12)
273        //   for @classmethod, @staticmethod, and arbitrary decorators (I#39 fix).
274        "property_declaration" | "decorated_definition" => K::PROPERTY,
275
276        // --- Data / config file structural kinds ---
277        "block" | "table" | "pair" => K::KEY,
278
279        // --- SQL kinds (S1, Wave 4) ---
280        // SQL: tree-sitter-sequel emits `create_table` for `CREATE TABLE foo (...)`.
281        //   STRUCT (23) matches Rust struct_item and Go struct_type: a table is the
282        //   relational equivalent of a record type.
283        "create_table" => K::STRUCT,
284        // SQL: tree-sitter-sequel emits `cte` for `WITH foo AS (SELECT ...)`. CTEs
285        //   are scoped intermediate result names — VARIABLE (13) is the closest LSP
286        //   shape (no dedicated "alias" kind).
287        "cte" => K::VARIABLE,
288        // SQL: synthetic file-level def emitted by `repo_map::enrich_sql_file_def`
289        //   for dbt/sqlmesh files whose model name is the filename stem and whose
290        //   sqlmesh `MODEL (name @{schema}.X, ...)` header parses as an ERROR node.
291        //   FILE (1) matches the "this whole file is the symbol" semantic.
292        "sql_file" => K::FILE,
293
294        // --- Fallback / special chunker kinds ---
295        "rdf_statements" => K::OBJECT,
296        "file" | "window" => K::FILE,
297
298        // --- Unknown: preserve pre-B1 default (Variable = 13) ---
299        _ => K::VARIABLE,
300    }
301}
302
303/// Resolve the LSP `SymbolKind` for a `decorated_definition` tree-sitter node
304/// given its first decorator name.
305///
306/// This is the authoritative decorator-kind mapping (I#39):
307/// - `"property"` → 7 (Property) — the K1 target case
308/// - `"cached_property"` → 7 (Property) — lazy property with identical semantics
309/// - `"classmethod"` | `"staticmethod"` | any other identifier → 12 (Function)
310/// - Complex decorators (`attribute` / `call` children — e.g. `@functools.wraps(f)`)
311///   are passed as `""` by [`lsp_symbol_kind_for_node`] and fall through to 12.
312///
313/// Callers that have the full tree-sitter node should prefer
314/// [`lsp_symbol_kind_for_node`] which extracts the decorator name automatically.
315/// This function is exposed for callers that have already extracted the decorator
316/// name (e.g., from a stored string field).
317#[must_use]
318pub fn lsp_symbol_kind_for_decorated_definition(first_decorator_name: &str) -> u32 {
319    use lsp_symbol_kind as K;
320    match first_decorator_name {
321        "property" | "cached_property" => K::PROPERTY,
322        // @classmethod and @staticmethod: these are class-bound callables, not
323        // properties. FUNCTION (12) is used instead of METHOD (6) because the
324        // outer context (class vs top-level) is not available here and FUNCTION
325        // is the safer conservative choice. Callers that know the enclosing scope
326        // may upgrade to METHOD.
327        _ => K::FUNCTION,
328    }
329}
330
331/// Extract the name of the first `decorator` child of a `decorated_definition`
332/// tree-sitter node.
333///
334/// Returns `Some(name)` when the first decorator is a simple `identifier`
335/// (e.g., `@property`, `@classmethod`, `@staticmethod`, `@cached_property`).
336///
337/// Returns `None` when:
338/// - The node has no `decorator` children.
339/// - The first decorator's first non-trivial child is an `attribute` node
340///   (e.g., `@functools.wraps`) or a `call` node (e.g., `@functools.wraps(f)`).
341///   These complex decorators are not property-like.
342///
343/// The returned `&str` is a slice of `source` (zero-copy).
344fn first_decorator_ident<'src>(
345    node: &tree_sitter::Node<'_>,
346    source: &'src [u8],
347) -> Option<&'src str> {
348    let mut cursor = node.walk();
349    for child in node.children(&mut cursor) {
350        if child.kind() == "decorator" {
351            // A decorator node looks like: `"@" <expression>`.
352            // Walk its children to find the expression child.
353            let mut inner = child.walk();
354            for inner_child in child.children(&mut inner) {
355                match inner_child.kind() {
356                    // Simple @name decorator — return the identifier text.
357                    "identifier" => {
358                        let start = inner_child.start_byte();
359                        let end = inner_child.end_byte();
360                        return std::str::from_utf8(&source[start..end]).ok();
361                    }
362                    // Complex decorator (attribute access or call) — not a simple
363                    // identifier; treat as arbitrary (non-property) decorator.
364                    "attribute" | "call" => return None,
365                    // Skip punctuation/whitespace nodes (e.g., the "@" token).
366                    _ => {}
367                }
368            }
369            // Decorator had no recognisable expression child.
370            return None;
371        }
372    }
373    None
374}
375
376/// Map a tree-sitter `Node` to an LSP `SymbolKind` numeric value.
377///
378/// This is the decorator-aware variant of [`lsp_symbol_kind_for_node_kind`].
379/// For `decorated_definition` nodes (Python), it inspects the first decorator
380/// child to distinguish `@property` (→ 7) from other decorators (→ 12).
381/// For all other node kinds, it delegates to [`lsp_symbol_kind_for_node_kind`].
382///
383/// # Python decorator mapping (I#39)
384///
385/// | First decorator | LSP SymbolKind |
386/// |---|---|
387/// | `@property` | 7 (Property) |
388/// | `@cached_property` | 7 (Property) |
389/// | `@classmethod` | 12 (Function) |
390/// | `@staticmethod` | 12 (Function) |
391/// | `@functools.wraps(f)` (complex) | 12 (Function) |
392/// | any other | 12 (Function) |
393///
394/// Callers that have the full tree-sitter parse tree available (e.g., the
395/// chunker's `extract_name_captures`) should call this function instead of
396/// `lsp_symbol_kind_for_node_kind(node.kind())` to get correct Python decorator
397/// classification.
398#[must_use]
399pub fn lsp_symbol_kind_for_node(node: &tree_sitter::Node<'_>, source: &[u8]) -> u32 {
400    if node.kind() == "decorated_definition" {
401        let decorator = first_decorator_ident(node, source).unwrap_or("");
402        return lsp_symbol_kind_for_decorated_definition(decorator);
403    }
404    lsp_symbol_kind_for_node_kind(node.kind())
405}
406
407/// Check whether a [`tree_sitter::Language`] is the Rust grammar.
408///
409/// Used by [`crate::repo_map`] to gate Rust-specific receiver-type heuristics.
410/// Compares the node-kind count as a proxy for grammar identity (the node-kind
411/// string table is stable across compatible ABI versions and differs between
412/// grammars).
413#[must_use]
414pub fn is_rust_language(lang: &tree_sitter::Language) -> bool {
415    let rust_lang: tree_sitter::Language = tree_sitter_rust::LANGUAGE.into();
416    // Both must have the same ABI version AND the same number of node kinds
417    // (a grammar-specific constant). This is not a guaranteed identity check but
418    // is reliable enough for our heuristic gating.
419    lang.abi_version() == rust_lang.abi_version()
420        && lang.node_kind_count() == rust_lang.node_kind_count()
421}
422
423/// Check whether a [`tree_sitter::Language`] is the Python grammar.
424///
425/// Used by [`crate::repo_map`] to gate Python-specific receiver-type heuristics.
426/// Same node-kind-count proxy as [`is_rust_language`].
427#[must_use]
428pub fn is_python_language(lang: &tree_sitter::Language) -> bool {
429    let py_lang: tree_sitter::Language = tree_sitter_python::LANGUAGE.into();
430    lang.abi_version() == py_lang.abi_version()
431        && lang.node_kind_count() == py_lang.node_kind_count()
432}
433
434/// Check whether a [`tree_sitter::Language`] is the Go grammar.
435///
436/// Used by [`crate::repo_map`] to gate Go-specific receiver-type heuristics.
437/// Same node-kind-count proxy as [`is_rust_language`].
438#[must_use]
439pub fn is_go_language(lang: &tree_sitter::Language) -> bool {
440    let go_lang: tree_sitter::Language = tree_sitter_go::LANGUAGE.into();
441    lang.abi_version() == go_lang.abi_version()
442        && lang.node_kind_count() == go_lang.node_kind_count()
443}
444
445/// Check whether a [`tree_sitter::Language`] is the HCL grammar.
446///
447/// Used by [`crate::repo_map`] to gate HCL-specific call-edge extraction
448/// (terraform_remote_state references and module blocks — R2/R3, Wave 3).
449/// Same node-kind-count proxy as [`is_rust_language`].
450#[must_use]
451pub fn is_hcl_language(lang: &tree_sitter::Language) -> bool {
452    let hcl_lang: tree_sitter::Language = tree_sitter_hcl::LANGUAGE.into();
453    lang.abi_version() == hcl_lang.abi_version()
454        && lang.node_kind_count() == hcl_lang.node_kind_count()
455}
456
457/// Check whether a [`tree_sitter::Language`] is the SQL grammar (tree-sitter-sequel).
458///
459/// Used by [`crate::repo_map`] to gate SQL-specific enrichment — the synthetic
460/// file-level def for dbt/sqlmesh models (S1, Wave 4). Same node-kind-count proxy
461/// as [`is_rust_language`].
462#[must_use]
463pub fn is_sql_language(lang: &tree_sitter::Language) -> bool {
464    let sql_lang: tree_sitter::Language = tree_sitter_sequel::LANGUAGE.into();
465    lang.abi_version() == sql_lang.abi_version()
466        && lang.node_kind_count() == sql_lang.node_kind_count()
467}
468
469/// Check whether a [`tree_sitter::Language`] is the C grammar.
470///
471/// Used by [`crate::repo_map`] to gate C-specific call-edge extraction — the
472/// struct-literal initializer fnptr post-pass (I#55, 4.1.5). C codebases
473/// dispatch via tables of function pointers (Linux `file_operations`, redis
474/// `redisCommandTable`, libuv handle vtables) — the post-pass emits one
475/// synthetic edge per `identifier` found inside an `initializer_list`,
476/// either designated (`.read = my_read`) or positional (`{"get",
477/// getCommand}`). Same node-kind-count proxy as [`is_rust_language`].
478#[must_use]
479pub fn is_c_language(lang: &tree_sitter::Language) -> bool {
480    let c_lang: tree_sitter::Language = tree_sitter_c::LANGUAGE.into();
481    lang.abi_version() == c_lang.abi_version() && lang.node_kind_count() == c_lang.node_kind_count()
482}
483
484/// Check whether a [`tree_sitter::Language`] is the C++ grammar.
485///
486/// Used by [`crate::repo_map`] to gate C++-specific call-edge extraction —
487/// the same struct-literal initializer fnptr post-pass as C (I#55, 4.1.5),
488/// because C++ inherits C's initializer-list syntax and the same dispatch
489/// pattern is common (e.g. POSIX-style driver tables in C++ kernels,
490/// embedded firmware). Same node-kind-count proxy as [`is_rust_language`].
491#[must_use]
492pub fn is_cpp_language(lang: &tree_sitter::Language) -> bool {
493    let cpp_lang: tree_sitter::Language = tree_sitter_cpp::LANGUAGE.into();
494    lang.abi_version() == cpp_lang.abi_version()
495        && lang.node_kind_count() == cpp_lang.node_kind_count()
496}
497
498/// Derive the canonical symbol name for an HCL `block` AST node.
499///
500/// HCL blocks have the form `keyword "type_label" "name_label" { ... }`:
501/// - `resource "aws_iam_role" "loader" { ... }` → `"aws_iam_role.loader"`
502/// - `data "aws_s3_bucket" "main" { ... }`      → `"aws_s3_bucket.main"`
503/// - `variable "region" { ... }`                → `"region"`
504/// - `output "role_arn" { ... }`                → `"role_arn"`
505/// - `locals { ... }`                           → `"locals"`
506///
507/// This function is the authoritative composite-name implementation (K3). The chunker
508/// pipeline uses the `@name` capture from the HCL query (the last string label or the
509/// keyword for no-label blocks). Callers that need the full `type.name` format — e.g.
510/// `"aws_iam_role.loader"` — should call this function directly after identifying the
511/// block node.
512///
513/// Returns an owned `String` with the composite name. If the `block_node` is not
514/// a `block` node, returns an empty string.
515#[must_use]
516pub fn derive_hcl_block_name(block_node: &tree_sitter::Node<'_>, source: &[u8]) -> String {
517    if block_node.kind() != "block" {
518        return String::new();
519    }
520    // Collect all template_literal texts from string_lit children (the block labels).
521    // Children order: identifier, string_lit*, block_start, body, block_end.
522    let mut labels: Vec<&str> = Vec::new();
523    let mut cursor = block_node.walk();
524    for child in block_node.children(&mut cursor) {
525        if child.kind() == "string_lit" {
526            // Walk into string_lit to find template_literal
527            let mut inner = child.walk();
528            for grandchild in child.children(&mut inner) {
529                if grandchild.kind() == "template_literal" {
530                    let start = grandchild.start_byte();
531                    let end = grandchild.end_byte();
532                    if let Ok(text) = std::str::from_utf8(&source[start..end]) {
533                        labels.push(text);
534                    }
535                }
536            }
537        } else if child.kind() == "block_start" {
538            // Stop at the opening brace — everything after is the block body.
539            break;
540        }
541    }
542    match labels.len() {
543        0 => {
544            // No string labels — use the identifier (e.g. "locals").
545            let mut cursor2 = block_node.walk();
546            for child in block_node.children(&mut cursor2) {
547                if child.kind() == "identifier" {
548                    let start = child.start_byte();
549                    let end = child.end_byte();
550                    if let Ok(text) = std::str::from_utf8(&source[start..end]) {
551                        return text.to_string();
552                    }
553                }
554            }
555            String::new()
556        }
557        1 => labels[0].to_string(),
558        _ => {
559            // Two or more labels: join all but the keyword portion.
560            // Convention: skip the first label if there are exactly two (type.name).
561            // For three or more labels, join all with dots.
562            labels.join(".")
563        }
564    }
565}
566
567/// Look up the language configuration for a file extension.
568///
569/// Compiled queries are cached per extension so repeated calls are free.
570/// Returns `None` for unsupported extensions.
571#[must_use]
572pub fn config_for_extension(ext: &str) -> Option<Arc<LangConfig>> {
573    // Cache of compiled configs, keyed by canonical extension.
574    static CACHE: OnceLock<std::collections::HashMap<&'static str, Arc<LangConfig>>> =
575        OnceLock::new();
576
577    let cache = CACHE.get_or_init(|| {
578        let mut m = std::collections::HashMap::new();
579        // Pre-compile all supported extensions
580        for &ext in &[
581            "rs", "py", "pyi", "js", "jsx", "ts", "tsx", "go", "java", "c", "h", "cpp", "cc",
582            "cxx", "hpp", "sh", "bash", "bats", "rb", "tf", "tfvars", "hcl", "kt", "kts", "swift",
583            "scala", "toml", "json", "yaml", "yml", "md", "xml", "rdf", "owl", "sql",
584        ] {
585            if let Some(cfg) = compile_config(ext) {
586                m.insert(ext, Arc::new(cfg));
587            }
588        }
589        m
590    });
591
592    cache.get(ext).cloned()
593}
594
595/// Compile a [`LangConfig`] for the given extension (uncached).
596#[expect(
597    clippy::too_many_lines,
598    reason = "one match arm per language — flat by design"
599)]
600fn compile_config(ext: &str) -> Option<LangConfig> {
601    let (lang, query_str): (Language, &str) = match ext {
602        // Rust: standalone functions, structs, and methods INSIDE impl/trait blocks.
603        // impl_item and trait_item are NOT captured as wholes — we extract their
604        // individual function_item children for method-level granularity.
605        "rs" => (
606            tree_sitter_rust::LANGUAGE.into(),
607            concat!(
608                "(function_item name: (identifier) @name) @def\n",
609                "(struct_item name: (type_identifier) @name) @def\n",
610                "(enum_item name: (type_identifier) @name) @def\n",
611                "(type_item name: (type_identifier) @name) @def\n",
612                "(field_declaration name: (field_identifier) @name) @def\n",
613                "(enum_variant name: (identifier) @name) @def\n",
614                "(impl_item type: (type_identifier) @name) @def\n",
615                "(trait_item name: (type_identifier) @name) @def\n",
616                "(const_item name: (identifier) @name) @def\n",
617                "(static_item name: (identifier) @name) @def\n",
618                "(mod_item name: (identifier) @name) @def",
619            ),
620        ),
621        // Python: top-level functions AND methods inside classes (function_definition
622        // matches at any nesting depth, so methods are captured individually).
623        //
624        // K1 fix: decorated functions (e.g. @property, @classmethod, @staticmethod)
625        // are captured as decorated_definition with @name taken from the inner
626        // function_definition. The chunker pipeline does not evaluate tree-sitter
627        // predicates, so all decorated_definition nodes emit kind="decorated_definition"
628        // → SymbolKind::PROPERTY (22). The @property case is the primary target; other
629        // decorators are over-classified as PROPERTY but previously fell through to
630        // VARIABLE (13) as an unrecognised kind, which was worse.
631        "py" | "pyi" => (
632            tree_sitter_python::LANGUAGE.into(),
633            concat!(
634                "(decorated_definition (function_definition name: (identifier) @name)) @def\n",
635                "(function_definition name: (identifier) @name) @def\n",
636                "(class_definition name: (identifier) @name) @def\n",
637                "(assignment left: (identifier) @name) @def",
638            ),
639        ),
640        // JS: functions, methods, and arrow functions assigned to variables.
641        "js" | "jsx" => (
642            tree_sitter_javascript::LANGUAGE.into(),
643            concat!(
644                "(function_declaration name: (identifier) @name) @def\n",
645                "(method_definition name: (property_identifier) @name) @def\n",
646                "(class_declaration name: (identifier) @name) @def\n",
647                "(variable_declarator name: (identifier) @name) @def",
648            ),
649        ),
650        "ts" => (
651            tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into(),
652            concat!(
653                "(function_declaration name: (identifier) @name) @def\n",
654                "(method_definition name: (property_identifier) @name) @def\n",
655                "(class_declaration name: (type_identifier) @name) @def\n",
656                "(interface_declaration name: (type_identifier) @name) @def\n",
657                "(variable_declarator name: (identifier) @name) @def\n",
658                "(type_alias_declaration name: (type_identifier) @name) @def\n",
659                "(enum_declaration name: (identifier) @name) @def",
660            ),
661        ),
662        "tsx" => (
663            tree_sitter_typescript::LANGUAGE_TSX.into(),
664            concat!(
665                "(function_declaration name: (identifier) @name) @def\n",
666                "(method_definition name: (property_identifier) @name) @def\n",
667                "(class_declaration name: (type_identifier) @name) @def\n",
668                "(interface_declaration name: (type_identifier) @name) @def\n",
669                "(variable_declarator name: (identifier) @name) @def\n",
670                "(type_alias_declaration name: (type_identifier) @name) @def\n",
671                "(enum_declaration name: (identifier) @name) @def",
672            ),
673        ),
674        // Go: functions, methods, and type declarations.
675        //
676        // K2 fix: distinguish interface types (kind=11, Interface) from struct types
677        // (kind=23, Struct) from other type declarations (kind=26, TypeParameter).
678        // The previous single `(type_declaration (type_spec ...)) @def` pattern emitted
679        // kind="type_declaration" for ALL types, mapping to TYPE_PARAMETER (26) and
680        // making Go interfaces invisible to interface-kind filters.
681        //
682        // Strategy: use the INNER type body node as @def so that `node.kind()` reflects
683        // the concrete type kind:
684        //   - `interface_type` @def → kind="interface_type" → INTERFACE (11)
685        //   - `struct_type`    @def → kind="struct_type"    → STRUCT (23)
686        //   - `type_alias`     @def → kind="type_alias"     → TYPE_PARAMETER (26)
687        //
688        // Non-interface non-struct type_specs (e.g. `type MyChan chan int`) are NOT
689        // captured; this is intentional to avoid duplicate chunks for the same declaration.
690        "go" => (
691            tree_sitter_go::LANGUAGE.into(),
692            concat!(
693                "(function_declaration name: (identifier) @name) @def\n",
694                "(method_declaration name: (field_identifier) @name) @def\n",
695                // Interface type: @def = interface_type → kind="interface_type" → INTERFACE
696                "(type_declaration (type_spec name: (type_identifier) @name type: (interface_type) @def))\n",
697                // Struct type: @def = struct_type → kind="struct_type" → STRUCT
698                "(type_declaration (type_spec name: (type_identifier) @name type: (struct_type) @def))\n",
699                // Type alias: @def = type_alias → kind="type_alias" → TYPE_PARAMETER
700                "(type_declaration (type_alias name: (type_identifier) @name) @def)\n",
701                "(const_spec name: (identifier) @name) @def",
702            ),
703        ),
704        // Java: methods are already captured individually (method_declaration
705        // matches inside class bodies). Keep class for the signature/fields.
706        "java" => (
707            tree_sitter_java::LANGUAGE.into(),
708            concat!(
709                "(method_declaration name: (identifier) @name) @def\n",
710                "(class_declaration name: (identifier) @name) @def\n",
711                "(interface_declaration name: (identifier) @name) @def\n",
712                "(field_declaration declarator: (variable_declarator name: (identifier) @name)) @def\n",
713                "(enum_constant name: (identifier) @name) @def\n",
714                "(enum_declaration name: (identifier) @name) @def\n",
715                "(constructor_declaration name: (identifier) @name) @def",
716            ),
717        ),
718        // I#66: C pointer-returning functions are wrapped in `pointer_declarator`
719        // nodes between `function_definition` and `function_declarator`, so the
720        // simple direct pattern misses them. Three patterns cover the common cases:
721        //   1. Direct:        `int foo(...)` — plain return type
722        //   2. Pointer:       `struct foo *make_foo(...)` — one pointer level
723        //   3. Double-pointer:`char **get_argv(...)` — two pointer levels
724        // Triple-pointer is rare enough to be out of scope; the same three patterns
725        // also apply to C++ below.
726        "c" | "h" => (
727            tree_sitter_c::LANGUAGE.into(),
728            concat!(
729                // 1. Direct: int foo(int x) { ... }
730                "(function_definition declarator: (function_declarator declarator: (identifier) @name)) @def\n",
731                // 2. Pointer-returning: struct foo *make_foo(int x) { ... }
732                "(function_definition declarator: (pointer_declarator declarator: (function_declarator declarator: (identifier) @name))) @def\n",
733                // 3. Double-pointer-returning: char **get_argv(int n) { ... }
734                "(function_definition declarator: (pointer_declarator declarator: (pointer_declarator declarator: (function_declarator declarator: (identifier) @name)))) @def\n",
735                "(declaration declarator: (init_declarator declarator: (identifier) @name)) @def\n",
736                // I#55 (4.1.5): array-initializer declarations like
737                //   `struct redisCommand cmds[] = { {"get", getCommand}, ... };`
738                // The init_declarator's declarator is an `array_declarator`
739                // wrapping the identifier, not the bare identifier. Without
740                // this pattern, the table has no def to anchor the synthetic
741                // struct-literal fnptr edges emitted by
742                // `extract_c_struct_init_edges` — the redis command-table
743                // and Linux per-driver lookup-table dispatchers would
744                // remain unreachable from any entry point.
745                "(declaration declarator: (init_declarator declarator: (array_declarator declarator: (identifier) @name))) @def\n",
746                "(struct_specifier name: (type_identifier) @name) @def\n",
747                "(enum_specifier name: (type_identifier) @name) @def\n",
748                "(type_definition declarator: (type_identifier) @name) @def\n",
749                // I53: object-like macros: #define FOO 42 → kind=preproc_def → Constant (14)
750                "(preproc_def name: (identifier) @name) @def\n",
751                // I53: function-like macros: #define F(x) ... → kind=preproc_function_def → Function (12)
752                "(preproc_function_def name: (identifier) @name) @def",
753            ),
754        ),
755        // C++: functions at any level, plus class signatures.
756        // I#66: same pointer_declarator fix applied as for C above.
757        "cpp" | "cc" | "cxx" | "hpp" => (
758            tree_sitter_cpp::LANGUAGE.into(),
759            concat!(
760                // 1. Direct: int foo(int x) { ... }
761                "(function_definition declarator: (function_declarator declarator: (identifier) @name)) @def\n",
762                // 2. Pointer-returning: Foo *make(int x) { ... }
763                "(function_definition declarator: (pointer_declarator declarator: (function_declarator declarator: (identifier) @name))) @def\n",
764                // 3. Double-pointer-returning: char **get(int n) { ... }
765                "(function_definition declarator: (pointer_declarator declarator: (pointer_declarator declarator: (function_declarator declarator: (identifier) @name)))) @def\n",
766                "(class_specifier name: (type_identifier) @name) @def\n",
767                "(declaration declarator: (init_declarator declarator: (identifier) @name)) @def\n",
768                // I#55 (4.1.5): array-initializer declarations — mirrors C.
769                // The same struct-of-fnptr dispatch idiom appears in
770                // embedded/firmware C++ codebases.
771                "(declaration declarator: (init_declarator declarator: (array_declarator declarator: (identifier) @name))) @def\n",
772                "(struct_specifier name: (type_identifier) @name) @def\n",
773                "(enum_specifier name: (type_identifier) @name) @def\n",
774                "(type_definition declarator: (type_identifier) @name) @def\n",
775                "(namespace_definition name: (namespace_identifier) @name) @def\n",
776                "(field_declaration declarator: (field_identifier) @name) @def",
777            ),
778        ),
779        // Bash: function definitions (.bats = Bash Automated Testing System).
780        "sh" | "bash" | "bats" => (
781            tree_sitter_bash::LANGUAGE.into(),
782            concat!(
783                "(function_definition name: (word) @name) @def\n",
784                "(variable_assignment name: (variable_name) @name) @def",
785            ),
786        ),
787        // Ruby: methods, classes, and modules.
788        "rb" => (
789            tree_sitter_ruby::LANGUAGE.into(),
790            concat!(
791                "(method name: (identifier) @name) @def\n",
792                "(class name: (constant) @name) @def\n",
793                "(module name: (constant) @name) @def\n",
794                "(assignment left: (identifier) @name) @def\n",
795                "(assignment left: (constant) @name) @def",
796            ),
797        ),
798        // HCL (Terraform): resource, data, variable, and output blocks.
799        //
800        // K3 fix: index blocks by their semantic name rather than the keyword.
801        // Previous query `(block (identifier) @name)` captured the block keyword
802        // (e.g. "resource") as the symbol name, making `lsp_workspace_symbols(query="loader")`
803        // unable to find `resource "aws_iam_role" "loader" { ... }`.
804        //
805        // Fixed query uses dot-anchor patterns to select the LAST string label
806        // immediately before the opening `{` (block_start):
807        //   - `resource "aws_iam_role" "loader" {}` → last string_lit before { = "loader" ✓
808        //   - `data "aws_s3_bucket" "main" {}`    → last string_lit before { = "main"   ✓
809        //   - `variable "region" {}`              → only  string_lit before { = "region" ✓
810        //   - `output "role_arn" {}`              → only  string_lit before { = "role_arn" ✓
811        //   - `locals {}`                          → no string_lit; identifier before { = "locals" ✓
812        //
813        // Cycle 13 W1 (B-0017/B-0021 family): the third pattern below captures
814        // each `attribute` inside a `locals { ... }` body as a named def. This
815        // closes the dense-pipeline gap where `find_similar(symbol_name="aws_projects")`
816        // returned "not in index" — the AST-merge chunker in
817        // `encoder::ripvec::dense::extract_name_captures` couldn't see per-local
818        // attribute names, so they never reached BM25 or the chunk-name table.
819        // The `#eq?` predicate scopes the capture to locals blocks; attributes
820        // inside resource/data/module blocks are intentionally NOT captured
821        // (they are intra-block scaffolding, not semantic symbols).
822        //
823        // Note: the composite `type.name` format (e.g. "aws_iam_role.loader") is available
824        // via [`derive_hcl_block_name`] for callers that need it. The chunker uses the
825        // `@name` capture (the last string_lit label or identifier) which already enables
826        // workspace symbol queries to find resources by their specific name.
827        "tf" | "tfvars" | "hcl" => (
828            tree_sitter_hcl::LANGUAGE.into(),
829            concat!(
830                // Last string_lit immediately before block_start (covers both single-label
831                // and multi-label blocks; the dot anchor selects only the final label).
832                "(block (string_lit (template_literal) @name) . (block_start)) @def\n",
833                // No-label blocks (e.g. locals): identifier immediately before block_start.
834                "(block (identifier) @name . (block_start)) @def\n",
835                // Per-locals-attribute capture (C13W1): each `aws_projects = ...`
836                // inside `locals { ... }` becomes its own @name + @def match.
837                // Scoped to locals via `#eq?` so resource/data block attributes
838                // do not pollute the symbol table.
839                "(block (identifier) @_kw (body (attribute (identifier) @name) @def) ",
840                "(#eq? @_kw \"locals\"))",
841            ),
842        ),
843        // Kotlin: functions, classes, and objects.
844        "kt" | "kts" => (
845            tree_sitter_kotlin_ng::LANGUAGE.into(),
846            concat!(
847                "(function_declaration name: (identifier) @name) @def\n",
848                "(class_declaration name: (identifier) @name) @def\n",
849                "(object_declaration name: (identifier) @name) @def\n",
850                "(property_declaration (identifier) @name) @def\n",
851                "(enum_entry (identifier) @name) @def",
852            ),
853        ),
854        // Swift: functions, classes, structs, enums, and protocols.
855        "swift" => (
856            tree_sitter_swift::LANGUAGE.into(),
857            concat!(
858                "(function_declaration name: (simple_identifier) @name) @def\n",
859                "(class_declaration name: (type_identifier) @name) @def\n",
860                "(protocol_declaration name: (type_identifier) @name) @def\n",
861                "(property_declaration name: (pattern bound_identifier: (simple_identifier) @name)) @def\n",
862                "(typealias_declaration name: (type_identifier) @name) @def",
863            ),
864        ),
865        // Scala: functions, classes, traits, and objects.
866        "scala" => (
867            tree_sitter_scala::LANGUAGE.into(),
868            concat!(
869                "(function_definition name: (identifier) @name) @def\n",
870                "(class_definition name: (identifier) @name) @def\n",
871                "(trait_definition name: (identifier) @name) @def\n",
872                "(object_definition name: (identifier) @name) @def\n",
873                "(val_definition pattern: (identifier) @name) @def\n",
874                "(var_definition pattern: (identifier) @name) @def\n",
875                "(type_definition name: (type_identifier) @name) @def",
876            ),
877        ),
878        // TOML: table headers (sections).
879        "toml" => (
880            tree_sitter_toml_ng::LANGUAGE.into(),
881            concat!(
882                "(table (bare_key) @name) @def\n",
883                "(pair (bare_key) @name) @def",
884            ),
885        ),
886        // JSON: key-value pairs, capturing the key string content.
887        "json" => (
888            tree_sitter_json::LANGUAGE.into(),
889            "(pair key: (string (string_content) @name)) @def",
890        ),
891        // YAML: block mapping pairs with plain scalar keys.
892        "yaml" | "yml" => (
893            tree_sitter_yaml::LANGUAGE.into(),
894            "(block_mapping_pair key: (flow_node (plain_scalar (string_scalar) @name))) @def",
895        ),
896        // Markdown: ATX headings (# through ######), capturing the heading text.
897        "md" => (
898            tree_sitter_md::LANGUAGE.into(),
899            "(atx_heading heading_content: (inline) @name) @def",
900        ),
901        // RDF/XML and OWL/XML are XML documents; capture each element so
902        // ontology classes/properties become searchable semantic chunks.
903        "xml" | "rdf" | "owl" => (
904            tree_sitter_xml::LANGUAGE_XML.into(),
905            concat!(
906                "(element (STag (Name) @name)) @def\n",
907                "(element (EmptyElemTag (Name) @name)) @def",
908            ),
909        ),
910        // SQL: CREATE TABLE statements and common table expressions (CTEs).
911        // Powered by tree-sitter-sequel (derekstride/tree-sitter-sql).
912        //
913        // dbt/sqlmesh files conventionally name their model by the *filename*
914        // rather than an in-source CREATE TABLE — see `enrich_sql_file_def` in
915        // repo_map.rs for the synthetic file-level def that fills that gap.
916        // The chunker-level query below captures any in-source CREATE TABLE
917        // and CTE so they remain searchable semantic chunks even when the
918        // file uses sqlmesh `MODEL (...)` headers (which parse as ERROR nodes
919        // — FROM/JOIN still extract cleanly post-error per S1 design).
920        "sql" => (
921            tree_sitter_sequel::LANGUAGE.into(),
922            concat!(
923                // CREATE TABLE foo — table-as-def.
924                "(create_table (object_reference name: (identifier) @name)) @def\n",
925                // WITH foo AS (SELECT ...) — CTE-as-def.
926                "(cte (identifier) @name) @def",
927            ),
928        ),
929        _ => return None,
930    };
931    let query = match Query::new(&lang, query_str) {
932        Ok(q) => q,
933        Err(e) => {
934            tracing::warn!(ext, %e, "tree-sitter query compilation failed — language may be ABI-incompatible");
935            return None;
936        }
937    };
938    Some(LangConfig {
939        language: lang,
940        query,
941    })
942}
943
944/// Look up the call-extraction query for a file extension.
945///
946/// Compiled queries are cached per extension so repeated calls are free.
947/// Returns `None` for unsupported extensions (including TOML, which has
948/// no function calls).
949#[must_use]
950pub fn call_query_for_extension(ext: &str) -> Option<Arc<CallConfig>> {
951    static CACHE: OnceLock<std::collections::HashMap<&'static str, Arc<CallConfig>>> =
952        OnceLock::new();
953
954    let cache = CACHE.get_or_init(|| {
955        let mut m = std::collections::HashMap::new();
956        // Pre-compile for all extensions that have callable constructs.
957        // TOML is deliberately excluded — it has no function calls.
958        // SQL has FROM/JOIN as call-edges (model-to-model references) —
959        // emitted by the per-language call query plus a synthetic
960        // file-level def in repo_map::enrich_sql_file_def (S1, Wave 4).
961        for &ext in &[
962            "rs", "py", "pyi", "js", "jsx", "ts", "tsx", "go", "java", "c", "h", "cpp", "cc",
963            "cxx", "hpp", "sh", "bash", "bats", "rb", "tf", "tfvars", "hcl", "kt", "kts", "swift",
964            "scala", "sql",
965        ] {
966            if let Some(cfg) = compile_call_config(ext) {
967                m.insert(ext, Arc::new(cfg));
968            }
969        }
970        m
971    });
972
973    cache.get(ext).cloned()
974}
975
976/// Compile a [`CallConfig`] for the given extension (uncached).
977///
978/// Each query extracts the callee identifier (`@callee`) from function
979/// and method calls, plus the whole call expression (`@call`).
980#[expect(
981    clippy::too_many_lines,
982    reason = "one match arm per language — flat by design"
983)]
984fn compile_call_config(ext: &str) -> Option<CallConfig> {
985    let (lang, query_str): (Language, &str) = match ext {
986        // Rust: free calls, method calls, and scoped (path) calls.
987        //
988        // For scoped calls, capture the full `scoped_identifier` node as @callee
989        // (not just the trailing `(identifier)` child). This preserves the qualified
990        // path so that `mod_a::foo()` records "mod_a::foo" rather than bare "foo",
991        // enabling cross-module disambiguation in `resolve_calls`.
992        "rs" => (
993            tree_sitter_rust::LANGUAGE.into(),
994            concat!(
995                "(call_expression function: (identifier) @callee) @call\n",
996                "(call_expression function: (field_expression field: (field_identifier) @callee)) @call\n",
997                "(call_expression function: (scoped_identifier) @callee) @call",
998            ),
999        ),
1000        // Python: simple calls and attribute (method) calls.
1001        "py" | "pyi" => (
1002            tree_sitter_python::LANGUAGE.into(),
1003            concat!(
1004                "(call function: (identifier) @callee) @call\n",
1005                "(call function: (attribute attribute: (identifier) @callee)) @call",
1006            ),
1007        ),
1008        // JavaScript: function calls and member expression calls.
1009        "js" | "jsx" => (
1010            tree_sitter_javascript::LANGUAGE.into(),
1011            concat!(
1012                "(call_expression function: (identifier) @callee) @call\n",
1013                "(call_expression function: (member_expression property: (property_identifier) @callee)) @call",
1014            ),
1015        ),
1016        // TypeScript: same patterns as JavaScript.
1017        "ts" => (
1018            tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into(),
1019            concat!(
1020                "(call_expression function: (identifier) @callee) @call\n",
1021                "(call_expression function: (member_expression property: (property_identifier) @callee)) @call",
1022            ),
1023        ),
1024        // TSX: same patterns as JavaScript.
1025        "tsx" => (
1026            tree_sitter_typescript::LANGUAGE_TSX.into(),
1027            concat!(
1028                "(call_expression function: (identifier) @callee) @call\n",
1029                "(call_expression function: (member_expression property: (property_identifier) @callee)) @call",
1030            ),
1031        ),
1032        // Go: function calls and selector (method) calls.
1033        "go" => (
1034            tree_sitter_go::LANGUAGE.into(),
1035            concat!(
1036                "(call_expression function: (identifier) @callee) @call\n",
1037                "(call_expression function: (selector_expression field: (field_identifier) @callee)) @call",
1038            ),
1039        ),
1040        // Java: method invocations.
1041        "java" => (
1042            tree_sitter_java::LANGUAGE.into(),
1043            "(method_invocation name: (identifier) @callee) @call",
1044        ),
1045        // C: function calls and field-expression calls (function pointers).
1046        "c" | "h" => (
1047            tree_sitter_c::LANGUAGE.into(),
1048            concat!(
1049                "(call_expression function: (identifier) @callee) @call\n",
1050                "(call_expression function: (field_expression field: (field_identifier) @callee)) @call",
1051            ),
1052        ),
1053        // C++: same patterns as C.
1054        "cpp" | "cc" | "cxx" | "hpp" => (
1055            tree_sitter_cpp::LANGUAGE.into(),
1056            concat!(
1057                "(call_expression function: (identifier) @callee) @call\n",
1058                "(call_expression function: (field_expression field: (field_identifier) @callee)) @call",
1059            ),
1060        ),
1061        // Bash: command invocations (.bats = Bash Automated Testing System).
1062        "sh" | "bash" | "bats" => (
1063            tree_sitter_bash::LANGUAGE.into(),
1064            "(command name: (command_name (word) @callee)) @call",
1065        ),
1066        // Ruby: method calls.
1067        "rb" => (
1068            tree_sitter_ruby::LANGUAGE.into(),
1069            "(call method: (identifier) @callee) @call",
1070        ),
1071        // HCL (Terraform): built-in function calls.
1072        "tf" | "tfvars" | "hcl" => (
1073            tree_sitter_hcl::LANGUAGE.into(),
1074            "(function_call (identifier) @callee) @call",
1075        ),
1076        // Kotlin: call expressions — grammar uses unnamed children, so match
1077        // identifier as first child of call_expression.
1078        "kt" | "kts" => (
1079            tree_sitter_kotlin_ng::LANGUAGE.into(),
1080            "(call_expression (identifier) @callee) @call",
1081        ),
1082        // Swift: call expressions with simple identifiers.
1083        "swift" => (
1084            tree_sitter_swift::LANGUAGE.into(),
1085            "(call_expression (simple_identifier) @callee) @call",
1086        ),
1087        // Scala: function calls and field-expression (method) calls.
1088        "scala" => (
1089            tree_sitter_scala::LANGUAGE.into(),
1090            concat!(
1091                "(call_expression function: (identifier) @callee) @call\n",
1092                "(call_expression function: (field_expression field: (identifier) @callee)) @call",
1093            ),
1094        ),
1095        // SQL: FROM <table> and JOIN <table> as call-edges. Schema-qualified
1096        // names like `analytics.silver_X` parse as
1097        //   (object_reference schema: (identifier) name: (identifier))
1098        // — the field selector `name:` picks the table identifier and skips
1099        // the schema prefix, which is correct for cross-model resolution
1100        // (downstream dbt/sqlmesh models reference each other by table name
1101        // not by schema + name).
1102        "sql" => (
1103            tree_sitter_sequel::LANGUAGE.into(),
1104            concat!(
1105                // FROM <table>: relation > object_reference > name identifier.
1106                "(from (relation (object_reference name: (identifier) @callee))) @call\n",
1107                // JOIN <table>: same shape, inside a join clause.
1108                "(join (relation (object_reference name: (identifier) @callee))) @call",
1109            ),
1110        ),
1111        _ => return None,
1112    };
1113    let query = match Query::new(&lang, query_str) {
1114        Ok(q) => q,
1115        Err(e) => {
1116            tracing::warn!(ext, %e, "tree-sitter call query compilation failed");
1117            return None;
1118        }
1119    };
1120    Some(CallConfig {
1121        language: lang,
1122        query,
1123    })
1124}
1125
1126#[cfg(test)]
1127mod tests {
1128    use super::*;
1129
1130    #[test]
1131    fn rust_extension_resolves() {
1132        assert!(config_for_extension("rs").is_some());
1133    }
1134
1135    #[test]
1136    fn python_extension_resolves() {
1137        assert!(config_for_extension("py").is_some());
1138    }
1139
1140    #[test]
1141    fn python_stub_extension_resolves() {
1142        assert!(config_for_extension("pyi").is_some());
1143    }
1144
1145    #[test]
1146    fn unknown_extension_returns_none() {
1147        assert!(config_for_extension("xyz").is_none());
1148    }
1149
1150    #[test]
1151    fn all_supported_extensions() {
1152        let exts = [
1153            "rs", "py", "pyi", "js", "jsx", "ts", "tsx", "go", "java", "c", "h", "cpp", "cc",
1154            "cxx", "hpp", "sh", "bash", "bats", "rb", "tf", "tfvars", "hcl", "kt", "kts", "swift",
1155            "scala", "toml", "json", "yaml", "yml", "md", "xml", "rdf", "owl", "sql",
1156        ];
1157        for ext in &exts {
1158            assert!(config_for_extension(ext).is_some(), "failed for {ext}");
1159        }
1160    }
1161
1162    #[test]
1163    fn turtle_family_uses_rdf_text_chunking_not_tree_sitter() {
1164        for ext in ["ttl", "nt", "n3", "trig", "nq"] {
1165            assert!(
1166                config_for_extension(ext).is_none(),
1167                "{ext} should be handled by RDF text chunking"
1168            );
1169            assert!(crate::chunk::is_rdf_text_extension(ext));
1170        }
1171    }
1172
1173    #[test]
1174    fn all_call_query_extensions() {
1175        let exts = [
1176            "rs", "py", "pyi", "js", "jsx", "ts", "tsx", "go", "java", "c", "h", "cpp", "cc",
1177            "cxx", "hpp", "sh", "bash", "bats", "rb", "tf", "tfvars", "hcl", "kt", "kts", "swift",
1178            "scala", "sql",
1179        ];
1180        for ext in &exts {
1181            assert!(
1182                call_query_for_extension(ext).is_some(),
1183                "call query failed for {ext}"
1184            );
1185        }
1186    }
1187
1188    #[test]
1189    fn toml_has_no_call_query() {
1190        assert!(call_query_for_extension("toml").is_none());
1191    }
1192
1193    /// RED test (R2.3 issue a): scoped_identifier call must capture the full path.
1194    ///
1195    /// Before the fix, `mod_a::foo()` captured only `foo` as @callee.
1196    /// After the fix, it must capture `mod_a::foo` as @callee.
1197    #[test]
1198    fn test_scoped_identifier_call_query_captures_full_path() {
1199        use streaming_iterator::StreamingIterator as _;
1200
1201        let source = "
1202fn caller() {
1203    mod_a::foo();
1204    std::io::stderr();
1205}
1206";
1207        let call_cfg = call_query_for_extension("rs").expect("rs call config");
1208        let mut parser = tree_sitter::Parser::new();
1209        parser
1210            .set_language(&call_cfg.language)
1211            .expect("set language");
1212        let tree = parser.parse(source, None).expect("parse");
1213
1214        let mut cursor = tree_sitter::QueryCursor::new();
1215        let mut matches = cursor.matches(&call_cfg.query, tree.root_node(), source.as_bytes());
1216
1217        let mut callees: Vec<String> = Vec::new();
1218        while let Some(m) = matches.next() {
1219            for cap in m.captures {
1220                let name = &call_cfg.query.capture_names()[cap.index as usize];
1221                if *name == "callee" {
1222                    let text = &source[cap.node.start_byte()..cap.node.end_byte()];
1223                    callees.push(text.to_string());
1224                }
1225            }
1226        }
1227
1228        // Must contain full qualified path, not bare identifier
1229        assert!(
1230            callees.contains(&"mod_a::foo".to_string()),
1231            "expected 'mod_a::foo' in callees, got: {callees:?}"
1232        );
1233        // Bare 'foo' must not appear when scoped call is made
1234        assert!(
1235            !callees.contains(&"foo".to_string()),
1236            "bare 'foo' must not appear for scoped call; got: {callees:?}"
1237        );
1238    }
1239
1240    // -------------------------------------------------------------------------
1241    // B1: tree-sitter node-kind → LSP SymbolKind mapping tests
1242    // -------------------------------------------------------------------------
1243
1244    /// `test:rust_node_kind_maps_to_lsp_symbol_kind_struct` — `struct_item`
1245    /// maps to LSP SymbolKind 23 (Struct).
1246    ///
1247    /// Behavior: trigger-fails-on-baseline-then-passes-post-fix.
1248    /// On the baseline `lsp_symbol_kind_for_node_kind` did not exist.
1249    #[test]
1250    fn rust_node_kind_maps_to_lsp_symbol_kind_struct() {
1251        assert_eq!(
1252            lsp_symbol_kind_for_node_kind("struct_item"),
1253            lsp_symbol_kind::STRUCT,
1254            "struct_item must map to SymbolKind::Struct (23)"
1255        );
1256    }
1257
1258    /// `test:rust_node_kind_maps_to_lsp_symbol_kind_trait` — `trait_item`
1259    /// maps to LSP SymbolKind 11 (Interface).
1260    #[test]
1261    fn rust_node_kind_maps_to_lsp_symbol_kind_trait() {
1262        assert_eq!(
1263            lsp_symbol_kind_for_node_kind("trait_item"),
1264            lsp_symbol_kind::INTERFACE,
1265            "trait_item must map to SymbolKind::Interface (11)"
1266        );
1267    }
1268
1269    /// `test:rust_node_kind_maps_to_lsp_symbol_kind_enum` — `enum_item`
1270    /// maps to LSP SymbolKind 10 (Enum).
1271    #[test]
1272    fn rust_node_kind_maps_to_lsp_symbol_kind_enum() {
1273        assert_eq!(
1274            lsp_symbol_kind_for_node_kind("enum_item"),
1275            lsp_symbol_kind::ENUM,
1276            "enum_item must map to SymbolKind::Enum (10)"
1277        );
1278    }
1279
1280    /// `test:rust_node_kind_maps_to_lsp_symbol_kind_function` — `function_item`
1281    /// maps to LSP SymbolKind 12 (Function).
1282    #[test]
1283    fn rust_node_kind_maps_to_lsp_symbol_kind_function() {
1284        assert_eq!(
1285            lsp_symbol_kind_for_node_kind("function_item"),
1286            lsp_symbol_kind::FUNCTION,
1287            "function_item must map to SymbolKind::Function (12)"
1288        );
1289    }
1290
1291    /// `test:rust_node_kind_maps_to_lsp_symbol_kind_module` — `mod_item`
1292    /// maps to LSP SymbolKind 2 (Module).
1293    #[test]
1294    fn rust_node_kind_maps_to_lsp_symbol_kind_module() {
1295        assert_eq!(
1296            lsp_symbol_kind_for_node_kind("mod_item"),
1297            lsp_symbol_kind::MODULE,
1298            "mod_item must map to SymbolKind::Module (2)"
1299        );
1300    }
1301
1302    /// Additional B1 coverage: impl, const, static, type_item all map
1303    /// to meaningful, non-Variable kinds.
1304    #[test]
1305    fn rust_node_kinds_map_to_non_variable_kinds() {
1306        let cases: &[(&str, u32)] = &[
1307            ("impl_item", lsp_symbol_kind::CLASS),
1308            ("const_item", lsp_symbol_kind::CONSTANT),
1309            ("static_item", lsp_symbol_kind::CONSTANT),
1310            ("type_item", lsp_symbol_kind::TYPE_PARAMETER),
1311            ("field_declaration", lsp_symbol_kind::FIELD),
1312            ("enum_variant", lsp_symbol_kind::ENUM_MEMBER),
1313            ("function_signature_item", lsp_symbol_kind::FUNCTION),
1314        ];
1315        for &(kind, expected) in cases {
1316            assert_eq!(
1317                lsp_symbol_kind_for_node_kind(kind),
1318                expected,
1319                "node kind '{kind}' should map to {expected}, got {}",
1320                lsp_symbol_kind_for_node_kind(kind)
1321            );
1322        }
1323    }
1324
1325    /// Unknown node kinds fall back to Variable (13) — preserving pre-B1 default.
1326    #[test]
1327    fn unknown_node_kind_falls_back_to_variable() {
1328        assert_eq!(
1329            lsp_symbol_kind_for_node_kind("some_unknown_kind"),
1330            lsp_symbol_kind::VARIABLE,
1331            "unknown kind must fall back to Variable (13)"
1332        );
1333    }
1334
1335    // =========================================================================
1336    // K1 — Python @property classification (I#17a)
1337    // =========================================================================
1338
1339    /// `test:python_property_decorator_classifies_as_property_kind`
1340    ///
1341    /// Baseline (RED): `decorated_definition` was not in the kind mapping, so it
1342    /// fell through to VARIABLE (13). The Python `@property` decorated method was
1343    /// thus invisible to LSP property-kind filters.
1344    ///
1345    /// After fix (GREEN): `decorated_definition` maps to PROPERTY (22), and the
1346    /// Python query captures `decorated_definition` as `@def` so `@property`-decorated
1347    /// methods emit kind="decorated_definition" → SymbolKind::Property.
1348    #[test]
1349    fn python_property_decorator_classifies_as_property_kind() {
1350        // The kind mapping must return PROPERTY (22) for decorated_definition.
1351        assert_eq!(
1352            lsp_symbol_kind_for_node_kind("decorated_definition"),
1353            lsp_symbol_kind::PROPERTY,
1354            "decorated_definition must map to SymbolKind::Property (22); baseline gave Variable (13)"
1355        );
1356    }
1357
1358    /// The Python query must capture `@property`-decorated methods as `@def=decorated_definition`.
1359    ///
1360    /// Uses tree-sitter to parse a Python source snippet with a `@property` decorated
1361    /// method, runs the compiled Python LangConfig query, and verifies that at least one
1362    /// match emits `def_kind = "decorated_definition"` with `name = "name"`.
1363    #[test]
1364    fn python_property_query_captures_decorated_definition() {
1365        use streaming_iterator::StreamingIterator as _;
1366
1367        let source = r"class MyModel:
1368    @property
1369    def name(self):
1370        return self._name
1371
1372    @name.setter
1373    def name(self, value):
1374        self._name = value
1375
1376    def regular_method(self):
1377        pass
1378";
1379        let cfg = config_for_extension("py").expect("Python config must compile");
1380        let mut parser = tree_sitter::Parser::new();
1381        parser.set_language(&cfg.language).expect("set language");
1382        let tree = parser.parse(source, None).expect("parse");
1383
1384        let mut cursor = tree_sitter::QueryCursor::new();
1385        let mut matches = cursor.matches(&cfg.query, tree.root_node(), source.as_bytes());
1386
1387        let mut property_kind_found = false;
1388        let mut property_name_found = false;
1389        while let Some(m) = matches.next() {
1390            let mut name = "";
1391            let mut def_kind = "";
1392            for cap in m.captures {
1393                let cap_name = &cfg.query.capture_names()[cap.index as usize];
1394                let text = &source[cap.node.start_byte()..cap.node.end_byte()];
1395                if *cap_name == "name" {
1396                    name = text;
1397                } else if *cap_name == "def" {
1398                    def_kind = cap.node.kind();
1399                }
1400            }
1401            if def_kind == "decorated_definition" && name == "name" {
1402                property_kind_found = true;
1403                property_name_found = true;
1404            }
1405        }
1406        assert!(
1407            property_kind_found,
1408            "Python query must capture decorated_definition for @property method; got none"
1409        );
1410        assert!(
1411            property_name_found,
1412            "Python query must capture 'name' as the method name inside @property definition"
1413        );
1414    }
1415
1416    // =========================================================================
1417    // K2 — Go interface classification (I#17b)
1418    // =========================================================================
1419
1420    /// `test:go_interface_type_classifies_as_interface_kind`
1421    ///
1422    /// Baseline (RED): all Go type declarations used `(type_declaration ...) @def`
1423    /// which sets kind="type_declaration" → TYPE_PARAMETER (26). Interfaces were
1424    /// invisible to interface-kind filters (`kind=11`).
1425    ///
1426    /// After fix (GREEN): interface types use `(interface_type) @def` → kind=
1427    /// "interface_type" → INTERFACE (11).
1428    #[test]
1429    fn go_interface_type_classifies_as_interface_kind() {
1430        assert_eq!(
1431            lsp_symbol_kind_for_node_kind("interface_type"),
1432            lsp_symbol_kind::INTERFACE,
1433            "interface_type must map to SymbolKind::Interface (11); baseline gave TypeParameter (26)"
1434        );
1435    }
1436
1437    /// `test:go_struct_type_classifies_as_struct_kind`
1438    ///
1439    /// Baseline (RED): struct types were also TYPE_PARAMETER (26) via the generic
1440    /// type_declaration pattern. After fix: struct_type → STRUCT (23).
1441    #[test]
1442    fn go_struct_type_classifies_as_struct_kind() {
1443        assert_eq!(
1444            lsp_symbol_kind_for_node_kind("struct_type"),
1445            lsp_symbol_kind::STRUCT,
1446            "struct_type must map to SymbolKind::Struct (23); baseline gave TypeParameter (26)"
1447        );
1448    }
1449
1450    /// The Go query must emit kind="interface_type" for `type Reader interface { ... }`.
1451    ///
1452    /// Parses a Go source snippet and verifies that the compiled Go LangConfig query
1453    /// produces a match with def_kind="interface_type" and name="Reader".
1454    #[test]
1455    fn go_interface_query_captures_interface_type() {
1456        use streaming_iterator::StreamingIterator as _;
1457
1458        let source = r"package io
1459type Reader interface {
1460    Read(p []byte) (n int, err error)
1461}
1462type MyStruct struct {
1463    Name string
1464}
1465func NewReader() Reader {
1466    return nil
1467}
1468";
1469        let cfg = config_for_extension("go").expect("Go config must compile");
1470        let mut parser = tree_sitter::Parser::new();
1471        parser.set_language(&cfg.language).expect("set language");
1472        let tree = parser.parse(source, None).expect("parse");
1473
1474        let mut cursor = tree_sitter::QueryCursor::new();
1475        let mut matches = cursor.matches(&cfg.query, tree.root_node(), source.as_bytes());
1476
1477        let mut interface_found = false;
1478        let mut struct_found = false;
1479        let mut function_found = false;
1480        while let Some(m) = matches.next() {
1481            let mut name = "";
1482            let mut def_kind = "";
1483            for cap in m.captures {
1484                let cap_name = &cfg.query.capture_names()[cap.index as usize];
1485                let text = &source[cap.node.start_byte()..cap.node.end_byte()];
1486                if *cap_name == "name" {
1487                    name = text;
1488                } else if *cap_name == "def" {
1489                    def_kind = cap.node.kind();
1490                }
1491            }
1492            if def_kind == "interface_type" && name == "Reader" {
1493                interface_found = true;
1494            }
1495            if def_kind == "struct_type" && name == "MyStruct" {
1496                struct_found = true;
1497            }
1498            if def_kind == "function_declaration" && name == "NewReader" {
1499                function_found = true;
1500            }
1501        }
1502        assert!(
1503            interface_found,
1504            "Go query must emit def_kind='interface_type' for 'type Reader interface {{ ... }}'"
1505        );
1506        assert!(
1507            struct_found,
1508            "Go query must emit def_kind='struct_type' for 'type MyStruct struct {{ ... }}'"
1509        );
1510        assert!(
1511            function_found,
1512            "Go query must emit def_kind='function_declaration' for 'func NewReader()'"
1513        );
1514    }
1515
1516    // =========================================================================
1517    // K3 — HCL resource naming (I#17c)
1518    // =========================================================================
1519
1520    /// `test:hcl_resource_symbol_uses_type_dot_name`
1521    ///
1522    /// Verifies that `derive_hcl_block_name` produces the `type.name` composite
1523    /// for a two-label HCL block (e.g. `resource "aws_iam_role" "loader" { ... }`
1524    /// → "aws_iam_role.loader").
1525    ///
1526    /// Baseline (RED): the previous HCL query captured the keyword ("resource") as the
1527    /// symbol name, making `lsp_workspace_symbols(query="loader")` unable to find the
1528    /// resource. The query fix makes the chunker emit "loader" as the name; this function
1529    /// enables callers to reconstruct the full "aws_iam_role.loader" composite.
1530    #[test]
1531    fn hcl_resource_symbol_uses_type_dot_name() {
1532        let source = br#"resource "aws_iam_role" "loader" {
1533  assume_role_policy = "assume.json"
1534}
1535"#;
1536        let lang: tree_sitter::Language = tree_sitter_hcl::LANGUAGE.into();
1537        let mut parser = tree_sitter::Parser::new();
1538        parser.set_language(&lang).expect("set HCL language");
1539        let tree = parser.parse(source, None).expect("parse HCL");
1540
1541        // Find the first block node
1542        let root = tree.root_node();
1543        let body = root.child(0).expect("config_file has body");
1544        #[expect(
1545            clippy::cast_possible_truncation,
1546            reason = "child_count() is a small usize; fits in u32"
1547        )]
1548        let block = (0..body.child_count())
1549            .filter_map(|i| body.child(i as u32))
1550            .find(|n| n.kind() == "block")
1551            .expect("should have at least one block node");
1552
1553        let name = derive_hcl_block_name(&block, source);
1554        assert_eq!(
1555            name, "aws_iam_role.loader",
1556            "derive_hcl_block_name must produce 'aws_iam_role.loader' for \
1557             `resource \"aws_iam_role\" \"loader\"` block; got {name:?}"
1558        );
1559    }
1560
1561    /// `test:hcl_data_source_symbol_uses_type_dot_name`
1562    ///
1563    /// Verifies `derive_hcl_block_name` produces "aws_s3_bucket.main" for
1564    /// `data "aws_s3_bucket" "main" { ... }`.
1565    #[test]
1566    fn hcl_data_source_symbol_uses_type_dot_name() {
1567        let source = br#"data "aws_s3_bucket" "main" {
1568  bucket = "my-bucket"
1569}
1570"#;
1571        let lang: tree_sitter::Language = tree_sitter_hcl::LANGUAGE.into();
1572        let mut parser = tree_sitter::Parser::new();
1573        parser.set_language(&lang).expect("set HCL language");
1574        let tree = parser.parse(source, None).expect("parse HCL");
1575
1576        let root = tree.root_node();
1577        let body = root.child(0).expect("config_file has body");
1578        #[expect(
1579            clippy::cast_possible_truncation,
1580            reason = "child_count() returns a small usize; fits in u32"
1581        )]
1582        let block = (0..body.child_count())
1583            .filter_map(|i| body.child(i as u32))
1584            .find(|n| n.kind() == "block")
1585            .expect("block node");
1586
1587        let name = derive_hcl_block_name(&block, source);
1588        assert_eq!(
1589            name, "aws_s3_bucket.main",
1590            "derive_hcl_block_name must produce 'aws_s3_bucket.main'"
1591        );
1592    }
1593
1594    /// The HCL query must capture the resource name (last string label) not the keyword.
1595    ///
1596    /// Verifies that the compiled HCL LangConfig query emits `@name = "loader"` (not
1597    /// "resource") for `resource "aws_iam_role" "loader" { ... }`. This is the live
1598    /// chunker behaviour that makes `lsp_workspace_symbols(query="loader")` work.
1599    #[test]
1600    fn hcl_query_captures_resource_name_not_keyword() {
1601        use streaming_iterator::StreamingIterator as _;
1602
1603        let source = r#"resource "aws_iam_role" "loader" {
1604  x = 1
1605}
1606variable "region" {
1607  type = "string"
1608}
1609output "role_arn" {
1610  value = "arn"
1611}
1612locals {
1613  x = 1
1614}
1615"#;
1616        let cfg = config_for_extension("tf").expect("HCL config must compile");
1617        let mut parser = tree_sitter::Parser::new();
1618        parser.set_language(&cfg.language).expect("set language");
1619        let tree = parser.parse(source, None).expect("parse");
1620
1621        let mut cursor = tree_sitter::QueryCursor::new();
1622        let mut matches = cursor.matches(&cfg.query, tree.root_node(), source.as_bytes());
1623
1624        let mut names: Vec<(String, String)> = Vec::new(); // (name, def_kind)
1625        while let Some(m) = matches.next() {
1626            let mut name = String::new();
1627            let mut def_kind = String::new();
1628            for cap in m.captures {
1629                let cap_name = &cfg.query.capture_names()[cap.index as usize];
1630                let text = &source[cap.node.start_byte()..cap.node.end_byte()];
1631                if *cap_name == "name" {
1632                    name = text.to_string();
1633                } else if *cap_name == "def" {
1634                    def_kind = cap.node.kind().to_string();
1635                }
1636            }
1637            if !name.is_empty() {
1638                names.push((name, def_kind));
1639            }
1640        }
1641
1642        let name_list: Vec<&str> = names.iter().map(|(n, _)| n.as_str()).collect();
1643
1644        // Must capture "loader" (not "resource") for the resource block.
1645        assert!(
1646            name_list.contains(&"loader"),
1647            "HCL query must capture 'loader' (not the keyword 'resource') for resource block; got: {name_list:?}"
1648        );
1649        assert!(
1650            !name_list.contains(&"resource"),
1651            "HCL query must NOT capture the keyword 'resource' as a symbol name; got: {name_list:?}"
1652        );
1653
1654        // Must capture "region" for variable block.
1655        assert!(
1656            name_list.contains(&"region"),
1657            "HCL query must capture 'region' for variable block; got: {name_list:?}"
1658        );
1659
1660        // Must capture "role_arn" for output block.
1661        assert!(
1662            name_list.contains(&"role_arn"),
1663            "HCL query must capture 'role_arn' for output block; got: {name_list:?}"
1664        );
1665
1666        // Must capture "locals" for locals block.
1667        assert!(
1668            name_list.contains(&"locals"),
1669            "HCL query must capture 'locals' for locals block; got: {name_list:?}"
1670        );
1671    }
1672
1673    // =========================================================================
1674    // L1 — Python class_definition kind taxonomy fix (I#19)
1675    // =========================================================================
1676
1677    /// `test:python_class_definition_kind_5`
1678    ///
1679    /// Baseline (RED): `class_definition` was falling through to the wildcard
1680    /// match or returning VARIABLE (13). The Python `class Foo: pass` pattern
1681    /// was classified as kind=20 (Key) in the mnemosyne corpus
1682    /// (ErrorOccurred, OCRCompleted, MnemosyneApp, BaseScreen, BrowseScansScreen).
1683    ///
1684    /// After fix (GREEN): `class_definition` maps to CLASS (5) in
1685    /// `lsp_symbol_kind_for_node_kind`, and the Python query captures
1686    /// `class_definition` with its body node, so `node.kind() == "class_definition"`
1687    /// maps to 5.
1688    #[test]
1689    fn test_python_class_definition_kind_5() {
1690        use streaming_iterator::StreamingIterator as _;
1691
1692        let source = r"class Foo:
1693    pass
1694";
1695        let cfg = config_for_extension("py").expect("Python config must compile");
1696        let mut parser = tree_sitter::Parser::new();
1697        parser.set_language(&cfg.language).expect("set language");
1698        let tree = parser.parse(source, None).expect("parse");
1699
1700        let mut cursor = tree_sitter::QueryCursor::new();
1701        let mut matches = cursor.matches(&cfg.query, tree.root_node(), source.as_bytes());
1702
1703        let mut class_kind_found = false;
1704        while let Some(m) = matches.next() {
1705            for cap in m.captures {
1706                let cap_name = &cfg.query.capture_names()[cap.index as usize];
1707                if *cap_name == "def" {
1708                    let def_kind = cap.node.kind();
1709                    if def_kind == "class_definition" {
1710                        let lsp_kind = lsp_symbol_kind_for_node_kind(def_kind);
1711                        assert_eq!(
1712                            lsp_kind,
1713                            lsp_symbol_kind::CLASS,
1714                            "class_definition must map to SymbolKind::Class (5); got {lsp_kind}"
1715                        );
1716                        class_kind_found = true;
1717                    }
1718                }
1719            }
1720        }
1721
1722        assert!(
1723            class_kind_found,
1724            "Python query must emit def_kind='class_definition' for 'class Foo:' pattern"
1725        );
1726    }
1727
1728    // =========================================================================
1729    // L2 — Go type_alias kind taxonomy fix (I#17b)
1730    // =========================================================================
1731
1732    /// `test:go_type_alias_kind_21`
1733    ///
1734    /// Baseline (RED): `type_alias` (the @def node from Go `type X = Y` patterns)
1735    /// was mapping to TYPE_PARAMETER (26) in the kind match. This matched the
1736    /// previous K2 work which split type_spec into interface_type (→11) and
1737    /// struct_type (→23), but the fallthrough type_alias path still mapped
1738    /// to TYPE_PARAMETER.
1739    ///
1740    /// After fix (GREEN): `type_alias` maps to VARIABLE (21) — a better
1741    /// classification than TypeParameter and semantically closer to an alias.
1742    /// Alternative: could use Constant (14) if the codebase considers aliases
1743    /// as immutable. Variable (21) is used here because:
1744    /// - LSP spec doesn't have a dedicated "Alias" kind
1745    /// - Variable is used in some implementations for type aliases
1746    /// - It provides a type classification separate from pure TypeParameters
1747    ///   (which represent generics like `[T]` in function signatures)
1748    #[test]
1749    fn test_go_type_alias_kind_21() {
1750        use streaming_iterator::StreamingIterator as _;
1751
1752        let source = r"package main
1753
1754type Foo = Bar
1755
1756type Reader interface {
1757    Read(p []byte) (n int, err error)
1758}
1759";
1760        let cfg = config_for_extension("go").expect("Go config must compile");
1761        let mut parser = tree_sitter::Parser::new();
1762        parser.set_language(&cfg.language).expect("set language");
1763        let tree = parser.parse(source, None).expect("parse");
1764
1765        let mut cursor = tree_sitter::QueryCursor::new();
1766        let mut matches = cursor.matches(&cfg.query, tree.root_node(), source.as_bytes());
1767
1768        let mut alias_kind_found = false;
1769        while let Some(m) = matches.next() {
1770            let mut name = "";
1771            let mut def_kind = "";
1772            for cap in m.captures {
1773                let cap_name = &cfg.query.capture_names()[cap.index as usize];
1774                let text = &source[cap.node.start_byte()..cap.node.end_byte()];
1775                if *cap_name == "name" {
1776                    name = text;
1777                } else if *cap_name == "def" {
1778                    def_kind = cap.node.kind();
1779                }
1780            }
1781            if def_kind == "type_alias" && name == "Foo" {
1782                let lsp_kind = lsp_symbol_kind_for_node_kind(def_kind);
1783                assert_eq!(
1784                    lsp_kind,
1785                    lsp_symbol_kind::VARIABLE,
1786                    "type_alias must map to SymbolKind::Variable (13) not TypeParameter (26); got {lsp_kind}"
1787                );
1788                alias_kind_found = true;
1789            }
1790        }
1791
1792        assert!(
1793            alias_kind_found,
1794            "Go query must emit def_kind='type_alias' for 'type Foo = Bar' pattern"
1795        );
1796    }
1797
1798    /// `test:go_type_alias_distinct_from_type_parameter`
1799    ///
1800    /// Verifies that a Go generic type parameter (like `[T any]` in a generic
1801    /// function) gets kind=26 (TypeParameter), while an alias `type Foo = Bar`
1802    /// gets kind=21 (Variable). This documents the distinction: generics stay
1803    /// as TypeParameter, aliases are Variable.
1804    #[test]
1805    fn test_go_type_alias_distinct_from_type_parameter() {
1806        use streaming_iterator::StreamingIterator as _;
1807
1808        let source = r"package main
1809
1810type Foo = Bar
1811
1812func generic[T any](x T) {
1813}
1814";
1815        let cfg = config_for_extension("go").expect("Go config must compile");
1816        let mut parser = tree_sitter::Parser::new();
1817        parser.set_language(&cfg.language).expect("set language");
1818        let tree = parser.parse(source, None).expect("parse");
1819
1820        let mut cursor = tree_sitter::QueryCursor::new();
1821        let mut matches = cursor.matches(&cfg.query, tree.root_node(), source.as_bytes());
1822
1823        let mut alias_found = false;
1824        let mut alias_kind = 0u32;
1825
1826        while let Some(m) = matches.next() {
1827            let mut name = "";
1828            let mut def_kind = "";
1829            for cap in m.captures {
1830                let cap_name = &cfg.query.capture_names()[cap.index as usize];
1831                let text = &source[cap.node.start_byte()..cap.node.end_byte()];
1832                if *cap_name == "name" {
1833                    name = text;
1834                } else if *cap_name == "def" {
1835                    def_kind = cap.node.kind();
1836                }
1837            }
1838            if def_kind == "type_alias" && name == "Foo" {
1839                alias_kind = lsp_symbol_kind_for_node_kind(def_kind);
1840                alias_found = true;
1841            }
1842        }
1843
1844        assert!(
1845            alias_found,
1846            "Go query must emit 'type Foo = Bar' as type_alias; got none"
1847        );
1848        assert_eq!(
1849            alias_kind,
1850            lsp_symbol_kind::VARIABLE,
1851            "type_alias 'Foo' must be kind=13 (Variable), got {alias_kind}"
1852        );
1853
1854        // Note: This test does NOT check generic type parameters because
1855        // the current Go query does not capture them — it only captures
1856        // top-level definitions. Generic parameters in function signatures
1857        // are part of the function_declaration's syntax but not extracted
1858        // as separate definitions, so they will not appear in the query results.
1859        // This is the intended behavior; generics are not searchable symbols
1860        // in the chunker pipeline.
1861    }
1862}