ripvec_core/languages.rs
1//! Language registry mapping file extensions to tree-sitter grammars.
2//!
3//! Each supported language has a grammar and a tree-sitter query that
4//! extracts function, class, and method definitions. Compiled queries
5//! are cached so that repeated calls for the same extension are free.
6
7use std::sync::{Arc, OnceLock};
8
9use tree_sitter::{Language, Query};
10
11/// Configuration for extracting function calls from a language.
12///
13/// Wrapped in [`Arc`] so it can be shared across threads and returned
14/// from the cache without cloning the compiled [`Query`].
15pub struct CallConfig {
16 /// The tree-sitter Language grammar.
17 pub language: Language,
18 /// Query that extracts call sites (`@callee` captures).
19 pub query: Query,
20}
21
22/// Configuration for a supported source language.
23///
24/// Wrapped in [`Arc`] so it can be shared across threads and returned
25/// from the cache without cloning the compiled [`Query`].
26pub struct LangConfig {
27 /// The tree-sitter Language grammar.
28 pub language: Language,
29 /// Query that extracts semantic chunks (`@def` captures with `@name`).
30 pub query: Query,
31}
32
33/// LSP `SymbolKind` numeric values (as defined in the Language Server Protocol
34/// specification version 3.17, §3.15.1).
35///
36/// Only the subset used by ripvec's Rust mapping is listed here. The full
37/// specification defines values 1–26; constants are named for clarity and to
38/// avoid embedding magic numbers at call sites.
39pub mod lsp_symbol_kind {
40 /// A file symbol. (1)
41 pub const FILE: u32 = 1;
42 /// A module or namespace. (2)
43 pub const MODULE: u32 = 2;
44 /// A namespace. (3)
45 pub const NAMESPACE: u32 = 3;
46 /// A package. (4)
47 pub const PACKAGE: u32 = 4;
48 /// A class. (5)
49 pub const CLASS: u32 = 5;
50 /// A method. (6)
51 pub const METHOD: u32 = 6;
52 /// A property. (7)
53 pub const PROPERTY: u32 = 7;
54 /// A field. (8)
55 pub const FIELD: u32 = 8;
56 /// A constructor. (9)
57 pub const CONSTRUCTOR: u32 = 9;
58 /// An enum type. (10)
59 pub const ENUM: u32 = 10;
60 /// An interface (trait in Rust). (11)
61 pub const INTERFACE: u32 = 11;
62 /// A function or free function. (12)
63 pub const FUNCTION: u32 = 12;
64 /// A variable. (13)
65 pub const VARIABLE: u32 = 13;
66 /// A constant or `const`/`static` item. (14)
67 pub const CONSTANT: u32 = 14;
68 /// A string literal symbol. (15)
69 pub const STRING: u32 = 15;
70 /// A numeric constant. (16)
71 pub const NUMBER: u32 = 16;
72 /// A boolean symbol. (17)
73 pub const BOOLEAN: u32 = 17;
74 /// An array or slice symbol. (18)
75 pub const ARRAY: u32 = 18;
76 /// An object or struct-like value. (19)
77 pub const OBJECT: u32 = 19;
78 /// A key in a key-value pair. (20)
79 pub const KEY: u32 = 20;
80 /// A null value symbol. (21)
81 pub const NULL: u32 = 21;
82 /// An enum member / variant. (22)
83 pub const ENUM_MEMBER: u32 = 22;
84 /// A struct type. (23)
85 pub const STRUCT: u32 = 23;
86 /// An event. (24)
87 pub const EVENT: u32 = 24;
88 /// An operator. (25)
89 pub const OPERATOR: u32 = 25;
90 /// A type parameter / type alias. (26)
91 pub const TYPE_PARAMETER: u32 = 26;
92}
93
94/// Map a tree-sitter node kind string to an LSP `SymbolKind` numeric value.
95///
96/// The mapping covers Rust node kinds exhaustively, then falls back to a
97/// cross-language best-effort mapping, and finally returns
98/// [`lsp_symbol_kind::VARIABLE`] (13) for any unrecognised kind — preserving
99/// the pre-B1 default so callers that don't need kind-awareness are unaffected.
100///
101/// # Rust node kinds
102///
103/// | tree-sitter kind | LSP SymbolKind |
104/// |---|---|
105/// | `function_item` | 12 (Function) |
106/// | `function_signature_item` | 12 (Function) |
107/// | `struct_item` | 23 (Struct) |
108/// | `enum_item` | 10 (Enum) |
109/// | `trait_item` | 11 (Interface) |
110/// | `impl_item` | 5 (Class) — implementation block |
111/// | `mod_item` | 2 (Module) |
112/// | `const_item` | 14 (Constant) |
113/// | `static_item` | 14 (Constant) |
114/// | `type_item` | 26 (TypeParameter) |
115/// | `field_declaration` | 8 (Field) |
116/// | `enum_variant` | 22 (EnumMember) |
117///
118/// # Cross-language kinds
119///
120/// | tree-sitter kind | LSP SymbolKind |
121/// |---|---|
122/// | `function_definition` / `function_declaration` | 12 (Function) |
123/// | `method_definition` / `method_declaration` | 6 (Method) |
124/// | `class_definition` / `class_declaration` / `class_specifier` | 5 (Class) |
125/// | `interface_declaration` / `trait_definition` / `interface_type` | 11 (Interface) |
126/// | `variable_declarator` / `variable_declaration` / `assignment` | 13 (Variable) |
127/// | `enum_declaration` / `enum_definition` | 10 (Enum) |
128/// | `type_alias_declaration` / `type_definition` / `type_declaration` / `type_alias` | 26 (TypeParameter) |
129/// | `constructor_declaration` | 9 (Constructor) |
130/// | `module` | 2 (Module) |
131/// | `object_definition` / `object_declaration` | 5 (Class) |
132/// | `val_definition` / `var_definition` | 13 (Variable) |
133/// | `property_declaration` / `decorated_definition` | 7 (Property) |
134/// | `namespace_definition` | 3 (Namespace) |
135/// | `protocol_declaration` | 11 (Interface) |
136/// | `typealias_declaration` | 26 (TypeParameter) |
137/// | `struct_type` | 23 (Struct) — Go struct type body |
138/// | `block` / `table` / `pair` | 20 (Key) |
139/// | `atx_heading` | 2 (Module) |
140/// | `element` / `empty_element` | 5 (Class) |
141/// | `rdf_statements` | 19 (Object) |
142/// | `file` / `window` | 1 (File) |
143#[must_use]
144#[expect(
145 clippy::match_same_arms,
146 reason = "variable/assignment patterns are explicit for documentation completeness; \
147 they intentionally duplicate the wildcard fallback (VARIABLE=13) so the \
148 mapping table reads as a self-contained reference"
149)]
150pub fn lsp_symbol_kind_for_node_kind(node_kind: &str) -> u32 {
151 use lsp_symbol_kind as K;
152 match node_kind {
153 // --- Function / method ---
154 // Rust: function_item, function_signature_item
155 // Cross-language: function_definition (Python, Ruby, Scala, Go, Java, Kotlin, Swift),
156 // function_declaration (C, C++, JS, TS, Bash)
157 "function_item"
158 | "function_signature_item"
159 | "function_definition"
160 | "function_declaration" => K::FUNCTION,
161
162 // --- Method (non-free function bound to a type) ---
163 "method_definition" | "method_declaration" => K::METHOD,
164
165 // --- Constructor ---
166 "constructor_declaration" => K::CONSTRUCTOR,
167
168 // --- Struct ---
169 "struct_item" => K::STRUCT,
170
171 // --- Enum ---
172 // Rust: enum_item; cross-language: enum_declaration, enum_definition
173 "enum_item" | "enum_declaration" | "enum_definition" => K::ENUM,
174
175 // --- Enum member / variant ---
176 "enum_variant" => K::ENUM_MEMBER,
177
178 // --- Interface / trait ---
179 // Rust: trait_item; cross-language: interface_declaration, trait_definition,
180 // protocol_declaration (Swift); interface_type (Go — the body node of an
181 // interface type_spec, used as @def in the Go query to distinguish interfaces
182 // from other type declarations — K2 fix)
183 "trait_item"
184 | "interface_declaration"
185 | "trait_definition"
186 | "protocol_declaration"
187 | "interface_type" => K::INTERFACE,
188
189 // --- Struct ---
190 // Go: struct_type is the body node of a struct type_spec; used as @def
191 // to distinguish struct declarations from other type declarations — K2 fix.
192 "struct_type" => K::STRUCT,
193
194 // --- Class / impl block ---
195 // Rust: impl_item (implementation block — closest to LSP Class)
196 // Cross-language: class_definition, class_declaration, class_specifier,
197 // object_definition (Scala), object_declaration (Kotlin),
198 // element (XML/RDF ontology element)
199 "impl_item" | "class_definition" | "class_declaration" | "class_specifier"
200 | "object_definition" | "object_declaration" | "element" => K::CLASS,
201
202 // --- Module / namespace (used by heading-level symbols too) ---
203 // Rust: mod_item; cross-language: module (Python module, Ruby module),
204 // mod_definition; atx_heading (Markdown headings as module-level anchors)
205 "mod_item" | "module" | "mod_definition" | "atx_heading" => K::MODULE,
206
207 // --- Namespace ---
208 "namespace_definition" => K::NAMESPACE,
209
210 // --- Constant ---
211 // Rust: const_item and static_item (immutable statics are semantically constants)
212 // HCL: local_attribute is the synthetic kind emitted by the chunker
213 // for each `local.X = ...` attribute inside an HCL `locals { ... }`
214 // block. T18 Cohesion Refraction queries can now distinguish
215 // individual locals as Constants (R6, Wave 3).
216 "const_item" | "static_item" | "local_attribute" => K::CONSTANT,
217
218 // --- Type alias / type parameter ---
219 // Rust: type_item; cross-language: type_alias_declaration (TS),
220 // type_definition (C typedef), type_declaration (Go fallback for non-interface
221 // non-struct types), typealias_declaration (Swift); type_alias (Go — the body
222 // node of `type X = Y` alias declarations — L2 fix: now maps to VARIABLE instead
223 // of TYPE_PARAMETER to distinguish aliases from pure generics)
224 "type_item"
225 | "type_alias_declaration"
226 | "type_definition"
227 | "type_declaration"
228 | "typealias_declaration" => K::TYPE_PARAMETER,
229
230 // --- Type alias (distinct from type parameters) ---
231 // Go: type_alias is the @def node from `type X = Y` patterns (K2 fix).
232 // L2 fix: maps to VARIABLE (13) instead of TYPE_PARAMETER (26) to distinguish
233 // aliases from pure generics. Variable is used because LSP has no dedicated
234 // alias kind, and Variable better represents the semantic nature of an alias
235 // than TypeParameter (which represents generic type variables like T in [T any]).
236 "type_alias" => K::VARIABLE,
237
238 // --- Field ---
239 "field_declaration" => K::FIELD,
240
241 // --- Variable / assignment ---
242 // Cross-language: variable_declarator (JS/TS), variable_declaration,
243 // assignment (Python, Ruby top-level), val_definition / var_definition (Scala)
244 "variable_declarator"
245 | "variable_declaration"
246 | "assignment"
247 | "val_definition"
248 | "var_definition" => K::VARIABLE,
249
250 // --- Property ---
251 // Kotlin / Swift: property_declaration.
252 // Python: decorated_definition (a function wrapped in a decorator).
253 // NOTE: this string-only mapping returns PROPERTY for ALL
254 // decorated_definition nodes, which is intentionally conservative
255 // (K1 fix: previously fell through to VARIABLE=13 as an unrecognised kind).
256 // Callers that have the full AST node should use `lsp_symbol_kind_for_node`
257 // instead, which inspects the first decorator name and returns FUNCTION (12)
258 // for @classmethod, @staticmethod, and arbitrary decorators (I#39 fix).
259 "property_declaration" | "decorated_definition" => K::PROPERTY,
260
261 // --- Data / config file structural kinds ---
262 "block" | "table" | "pair" => K::KEY,
263
264 // --- SQL kinds (S1, Wave 4) ---
265 // SQL: tree-sitter-sequel emits `create_table` for `CREATE TABLE foo (...)`.
266 // STRUCT (23) matches Rust struct_item and Go struct_type: a table is the
267 // relational equivalent of a record type.
268 "create_table" => K::STRUCT,
269 // SQL: tree-sitter-sequel emits `cte` for `WITH foo AS (SELECT ...)`. CTEs
270 // are scoped intermediate result names — VARIABLE (13) is the closest LSP
271 // shape (no dedicated "alias" kind).
272 "cte" => K::VARIABLE,
273 // SQL: synthetic file-level def emitted by `repo_map::enrich_sql_file_def`
274 // for dbt/sqlmesh files whose model name is the filename stem and whose
275 // sqlmesh `MODEL (name @{schema}.X, ...)` header parses as an ERROR node.
276 // FILE (1) matches the "this whole file is the symbol" semantic.
277 "sql_file" => K::FILE,
278
279 // --- Fallback / special chunker kinds ---
280 "rdf_statements" => K::OBJECT,
281 "file" | "window" => K::FILE,
282
283 // --- Unknown: preserve pre-B1 default (Variable = 13) ---
284 _ => K::VARIABLE,
285 }
286}
287
288/// Resolve the LSP `SymbolKind` for a `decorated_definition` tree-sitter node
289/// given its first decorator name.
290///
291/// This is the authoritative decorator-kind mapping (I#39):
292/// - `"property"` → 7 (Property) — the K1 target case
293/// - `"cached_property"` → 7 (Property) — lazy property with identical semantics
294/// - `"classmethod"` | `"staticmethod"` | any other identifier → 12 (Function)
295/// - Complex decorators (`attribute` / `call` children — e.g. `@functools.wraps(f)`)
296/// are passed as `""` by [`lsp_symbol_kind_for_node`] and fall through to 12.
297///
298/// Callers that have the full tree-sitter node should prefer
299/// [`lsp_symbol_kind_for_node`] which extracts the decorator name automatically.
300/// This function is exposed for callers that have already extracted the decorator
301/// name (e.g., from a stored string field).
302#[must_use]
303pub fn lsp_symbol_kind_for_decorated_definition(first_decorator_name: &str) -> u32 {
304 use lsp_symbol_kind as K;
305 match first_decorator_name {
306 "property" | "cached_property" => K::PROPERTY,
307 // @classmethod and @staticmethod: these are class-bound callables, not
308 // properties. FUNCTION (12) is used instead of METHOD (6) because the
309 // outer context (class vs top-level) is not available here and FUNCTION
310 // is the safer conservative choice. Callers that know the enclosing scope
311 // may upgrade to METHOD.
312 _ => K::FUNCTION,
313 }
314}
315
316/// Extract the name of the first `decorator` child of a `decorated_definition`
317/// tree-sitter node.
318///
319/// Returns `Some(name)` when the first decorator is a simple `identifier`
320/// (e.g., `@property`, `@classmethod`, `@staticmethod`, `@cached_property`).
321///
322/// Returns `None` when:
323/// - The node has no `decorator` children.
324/// - The first decorator's first non-trivial child is an `attribute` node
325/// (e.g., `@functools.wraps`) or a `call` node (e.g., `@functools.wraps(f)`).
326/// These complex decorators are not property-like.
327///
328/// The returned `&str` is a slice of `source` (zero-copy).
329fn first_decorator_ident<'src>(
330 node: &tree_sitter::Node<'_>,
331 source: &'src [u8],
332) -> Option<&'src str> {
333 let mut cursor = node.walk();
334 for child in node.children(&mut cursor) {
335 if child.kind() == "decorator" {
336 // A decorator node looks like: `"@" <expression>`.
337 // Walk its children to find the expression child.
338 let mut inner = child.walk();
339 for inner_child in child.children(&mut inner) {
340 match inner_child.kind() {
341 // Simple @name decorator — return the identifier text.
342 "identifier" => {
343 let start = inner_child.start_byte();
344 let end = inner_child.end_byte();
345 return std::str::from_utf8(&source[start..end]).ok();
346 }
347 // Complex decorator (attribute access or call) — not a simple
348 // identifier; treat as arbitrary (non-property) decorator.
349 "attribute" | "call" => return None,
350 // Skip punctuation/whitespace nodes (e.g., the "@" token).
351 _ => {}
352 }
353 }
354 // Decorator had no recognisable expression child.
355 return None;
356 }
357 }
358 None
359}
360
361/// Map a tree-sitter `Node` to an LSP `SymbolKind` numeric value.
362///
363/// This is the decorator-aware variant of [`lsp_symbol_kind_for_node_kind`].
364/// For `decorated_definition` nodes (Python), it inspects the first decorator
365/// child to distinguish `@property` (→ 7) from other decorators (→ 12).
366/// For all other node kinds, it delegates to [`lsp_symbol_kind_for_node_kind`].
367///
368/// # Python decorator mapping (I#39)
369///
370/// | First decorator | LSP SymbolKind |
371/// |---|---|
372/// | `@property` | 7 (Property) |
373/// | `@cached_property` | 7 (Property) |
374/// | `@classmethod` | 12 (Function) |
375/// | `@staticmethod` | 12 (Function) |
376/// | `@functools.wraps(f)` (complex) | 12 (Function) |
377/// | any other | 12 (Function) |
378///
379/// Callers that have the full tree-sitter parse tree available (e.g., the
380/// chunker's `extract_name_captures`) should call this function instead of
381/// `lsp_symbol_kind_for_node_kind(node.kind())` to get correct Python decorator
382/// classification.
383#[must_use]
384pub fn lsp_symbol_kind_for_node(node: &tree_sitter::Node<'_>, source: &[u8]) -> u32 {
385 if node.kind() == "decorated_definition" {
386 let decorator = first_decorator_ident(node, source).unwrap_or("");
387 return lsp_symbol_kind_for_decorated_definition(decorator);
388 }
389 lsp_symbol_kind_for_node_kind(node.kind())
390}
391
392/// Check whether a [`tree_sitter::Language`] is the Rust grammar.
393///
394/// Used by [`crate::repo_map`] to gate Rust-specific receiver-type heuristics.
395/// Compares the node-kind count as a proxy for grammar identity (the node-kind
396/// string table is stable across compatible ABI versions and differs between
397/// grammars).
398#[must_use]
399pub fn is_rust_language(lang: &tree_sitter::Language) -> bool {
400 let rust_lang: tree_sitter::Language = tree_sitter_rust::LANGUAGE.into();
401 // Both must have the same ABI version AND the same number of node kinds
402 // (a grammar-specific constant). This is not a guaranteed identity check but
403 // is reliable enough for our heuristic gating.
404 lang.abi_version() == rust_lang.abi_version()
405 && lang.node_kind_count() == rust_lang.node_kind_count()
406}
407
408/// Check whether a [`tree_sitter::Language`] is the Python grammar.
409///
410/// Used by [`crate::repo_map`] to gate Python-specific receiver-type heuristics.
411/// Same node-kind-count proxy as [`is_rust_language`].
412#[must_use]
413pub fn is_python_language(lang: &tree_sitter::Language) -> bool {
414 let py_lang: tree_sitter::Language = tree_sitter_python::LANGUAGE.into();
415 lang.abi_version() == py_lang.abi_version()
416 && lang.node_kind_count() == py_lang.node_kind_count()
417}
418
419/// Check whether a [`tree_sitter::Language`] is the Go grammar.
420///
421/// Used by [`crate::repo_map`] to gate Go-specific receiver-type heuristics.
422/// Same node-kind-count proxy as [`is_rust_language`].
423#[must_use]
424pub fn is_go_language(lang: &tree_sitter::Language) -> bool {
425 let go_lang: tree_sitter::Language = tree_sitter_go::LANGUAGE.into();
426 lang.abi_version() == go_lang.abi_version()
427 && lang.node_kind_count() == go_lang.node_kind_count()
428}
429
430/// Check whether a [`tree_sitter::Language`] is the HCL grammar.
431///
432/// Used by [`crate::repo_map`] to gate HCL-specific call-edge extraction
433/// (terraform_remote_state references and module blocks — R2/R3, Wave 3).
434/// Same node-kind-count proxy as [`is_rust_language`].
435#[must_use]
436pub fn is_hcl_language(lang: &tree_sitter::Language) -> bool {
437 let hcl_lang: tree_sitter::Language = tree_sitter_hcl::LANGUAGE.into();
438 lang.abi_version() == hcl_lang.abi_version()
439 && lang.node_kind_count() == hcl_lang.node_kind_count()
440}
441
442/// Check whether a [`tree_sitter::Language`] is the SQL grammar (tree-sitter-sequel).
443///
444/// Used by [`crate::repo_map`] to gate SQL-specific enrichment — the synthetic
445/// file-level def for dbt/sqlmesh models (S1, Wave 4). Same node-kind-count proxy
446/// as [`is_rust_language`].
447#[must_use]
448pub fn is_sql_language(lang: &tree_sitter::Language) -> bool {
449 let sql_lang: tree_sitter::Language = tree_sitter_sequel::LANGUAGE.into();
450 lang.abi_version() == sql_lang.abi_version()
451 && lang.node_kind_count() == sql_lang.node_kind_count()
452}
453
454/// Derive the canonical symbol name for an HCL `block` AST node.
455///
456/// HCL blocks have the form `keyword "type_label" "name_label" { ... }`:
457/// - `resource "aws_iam_role" "loader" { ... }` → `"aws_iam_role.loader"`
458/// - `data "aws_s3_bucket" "main" { ... }` → `"aws_s3_bucket.main"`
459/// - `variable "region" { ... }` → `"region"`
460/// - `output "role_arn" { ... }` → `"role_arn"`
461/// - `locals { ... }` → `"locals"`
462///
463/// This function is the authoritative composite-name implementation (K3). The chunker
464/// pipeline uses the `@name` capture from the HCL query (the last string label or the
465/// keyword for no-label blocks). Callers that need the full `type.name` format — e.g.
466/// `"aws_iam_role.loader"` — should call this function directly after identifying the
467/// block node.
468///
469/// Returns an owned `String` with the composite name. If the `block_node` is not
470/// a `block` node, returns an empty string.
471#[must_use]
472pub fn derive_hcl_block_name(block_node: &tree_sitter::Node<'_>, source: &[u8]) -> String {
473 if block_node.kind() != "block" {
474 return String::new();
475 }
476 // Collect all template_literal texts from string_lit children (the block labels).
477 // Children order: identifier, string_lit*, block_start, body, block_end.
478 let mut labels: Vec<&str> = Vec::new();
479 let mut cursor = block_node.walk();
480 for child in block_node.children(&mut cursor) {
481 if child.kind() == "string_lit" {
482 // Walk into string_lit to find template_literal
483 let mut inner = child.walk();
484 for grandchild in child.children(&mut inner) {
485 if grandchild.kind() == "template_literal" {
486 let start = grandchild.start_byte();
487 let end = grandchild.end_byte();
488 if let Ok(text) = std::str::from_utf8(&source[start..end]) {
489 labels.push(text);
490 }
491 }
492 }
493 } else if child.kind() == "block_start" {
494 // Stop at the opening brace — everything after is the block body.
495 break;
496 }
497 }
498 match labels.len() {
499 0 => {
500 // No string labels — use the identifier (e.g. "locals").
501 let mut cursor2 = block_node.walk();
502 for child in block_node.children(&mut cursor2) {
503 if child.kind() == "identifier" {
504 let start = child.start_byte();
505 let end = child.end_byte();
506 if let Ok(text) = std::str::from_utf8(&source[start..end]) {
507 return text.to_string();
508 }
509 }
510 }
511 String::new()
512 }
513 1 => labels[0].to_string(),
514 _ => {
515 // Two or more labels: join all but the keyword portion.
516 // Convention: skip the first label if there are exactly two (type.name).
517 // For three or more labels, join all with dots.
518 labels.join(".")
519 }
520 }
521}
522
523/// Look up the language configuration for a file extension.
524///
525/// Compiled queries are cached per extension so repeated calls are free.
526/// Returns `None` for unsupported extensions.
527#[must_use]
528pub fn config_for_extension(ext: &str) -> Option<Arc<LangConfig>> {
529 // Cache of compiled configs, keyed by canonical extension.
530 static CACHE: OnceLock<std::collections::HashMap<&'static str, Arc<LangConfig>>> =
531 OnceLock::new();
532
533 let cache = CACHE.get_or_init(|| {
534 let mut m = std::collections::HashMap::new();
535 // Pre-compile all supported extensions
536 for &ext in &[
537 "rs", "py", "pyi", "js", "jsx", "ts", "tsx", "go", "java", "c", "h", "cpp", "cc",
538 "cxx", "hpp", "sh", "bash", "bats", "rb", "tf", "tfvars", "hcl", "kt", "kts", "swift",
539 "scala", "toml", "json", "yaml", "yml", "md", "xml", "rdf", "owl", "sql",
540 ] {
541 if let Some(cfg) = compile_config(ext) {
542 m.insert(ext, Arc::new(cfg));
543 }
544 }
545 m
546 });
547
548 cache.get(ext).cloned()
549}
550
551/// Compile a [`LangConfig`] for the given extension (uncached).
552#[expect(
553 clippy::too_many_lines,
554 reason = "one match arm per language — flat by design"
555)]
556fn compile_config(ext: &str) -> Option<LangConfig> {
557 let (lang, query_str): (Language, &str) = match ext {
558 // Rust: standalone functions, structs, and methods INSIDE impl/trait blocks.
559 // impl_item and trait_item are NOT captured as wholes — we extract their
560 // individual function_item children for method-level granularity.
561 "rs" => (
562 tree_sitter_rust::LANGUAGE.into(),
563 concat!(
564 "(function_item name: (identifier) @name) @def\n",
565 "(struct_item name: (type_identifier) @name) @def\n",
566 "(enum_item name: (type_identifier) @name) @def\n",
567 "(type_item name: (type_identifier) @name) @def\n",
568 "(field_declaration name: (field_identifier) @name) @def\n",
569 "(enum_variant name: (identifier) @name) @def\n",
570 "(impl_item type: (type_identifier) @name) @def\n",
571 "(trait_item name: (type_identifier) @name) @def\n",
572 "(const_item name: (identifier) @name) @def\n",
573 "(static_item name: (identifier) @name) @def\n",
574 "(mod_item name: (identifier) @name) @def",
575 ),
576 ),
577 // Python: top-level functions AND methods inside classes (function_definition
578 // matches at any nesting depth, so methods are captured individually).
579 //
580 // K1 fix: decorated functions (e.g. @property, @classmethod, @staticmethod)
581 // are captured as decorated_definition with @name taken from the inner
582 // function_definition. The chunker pipeline does not evaluate tree-sitter
583 // predicates, so all decorated_definition nodes emit kind="decorated_definition"
584 // → SymbolKind::PROPERTY (22). The @property case is the primary target; other
585 // decorators are over-classified as PROPERTY but previously fell through to
586 // VARIABLE (13) as an unrecognised kind, which was worse.
587 "py" | "pyi" => (
588 tree_sitter_python::LANGUAGE.into(),
589 concat!(
590 "(decorated_definition (function_definition name: (identifier) @name)) @def\n",
591 "(function_definition name: (identifier) @name) @def\n",
592 "(class_definition name: (identifier) @name) @def\n",
593 "(assignment left: (identifier) @name) @def",
594 ),
595 ),
596 // JS: functions, methods, and arrow functions assigned to variables.
597 "js" | "jsx" => (
598 tree_sitter_javascript::LANGUAGE.into(),
599 concat!(
600 "(function_declaration name: (identifier) @name) @def\n",
601 "(method_definition name: (property_identifier) @name) @def\n",
602 "(class_declaration name: (identifier) @name) @def\n",
603 "(variable_declarator name: (identifier) @name) @def",
604 ),
605 ),
606 "ts" => (
607 tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into(),
608 concat!(
609 "(function_declaration name: (identifier) @name) @def\n",
610 "(method_definition name: (property_identifier) @name) @def\n",
611 "(class_declaration name: (type_identifier) @name) @def\n",
612 "(interface_declaration name: (type_identifier) @name) @def\n",
613 "(variable_declarator name: (identifier) @name) @def\n",
614 "(type_alias_declaration name: (type_identifier) @name) @def\n",
615 "(enum_declaration name: (identifier) @name) @def",
616 ),
617 ),
618 "tsx" => (
619 tree_sitter_typescript::LANGUAGE_TSX.into(),
620 concat!(
621 "(function_declaration name: (identifier) @name) @def\n",
622 "(method_definition name: (property_identifier) @name) @def\n",
623 "(class_declaration name: (type_identifier) @name) @def\n",
624 "(interface_declaration name: (type_identifier) @name) @def\n",
625 "(variable_declarator name: (identifier) @name) @def\n",
626 "(type_alias_declaration name: (type_identifier) @name) @def\n",
627 "(enum_declaration name: (identifier) @name) @def",
628 ),
629 ),
630 // Go: functions, methods, and type declarations.
631 //
632 // K2 fix: distinguish interface types (kind=11, Interface) from struct types
633 // (kind=23, Struct) from other type declarations (kind=26, TypeParameter).
634 // The previous single `(type_declaration (type_spec ...)) @def` pattern emitted
635 // kind="type_declaration" for ALL types, mapping to TYPE_PARAMETER (26) and
636 // making Go interfaces invisible to interface-kind filters.
637 //
638 // Strategy: use the INNER type body node as @def so that `node.kind()` reflects
639 // the concrete type kind:
640 // - `interface_type` @def → kind="interface_type" → INTERFACE (11)
641 // - `struct_type` @def → kind="struct_type" → STRUCT (23)
642 // - `type_alias` @def → kind="type_alias" → TYPE_PARAMETER (26)
643 //
644 // Non-interface non-struct type_specs (e.g. `type MyChan chan int`) are NOT
645 // captured; this is intentional to avoid duplicate chunks for the same declaration.
646 "go" => (
647 tree_sitter_go::LANGUAGE.into(),
648 concat!(
649 "(function_declaration name: (identifier) @name) @def\n",
650 "(method_declaration name: (field_identifier) @name) @def\n",
651 // Interface type: @def = interface_type → kind="interface_type" → INTERFACE
652 "(type_declaration (type_spec name: (type_identifier) @name type: (interface_type) @def))\n",
653 // Struct type: @def = struct_type → kind="struct_type" → STRUCT
654 "(type_declaration (type_spec name: (type_identifier) @name type: (struct_type) @def))\n",
655 // Type alias: @def = type_alias → kind="type_alias" → TYPE_PARAMETER
656 "(type_declaration (type_alias name: (type_identifier) @name) @def)\n",
657 "(const_spec name: (identifier) @name) @def",
658 ),
659 ),
660 // Java: methods are already captured individually (method_declaration
661 // matches inside class bodies). Keep class for the signature/fields.
662 "java" => (
663 tree_sitter_java::LANGUAGE.into(),
664 concat!(
665 "(method_declaration name: (identifier) @name) @def\n",
666 "(class_declaration name: (identifier) @name) @def\n",
667 "(interface_declaration name: (identifier) @name) @def\n",
668 "(field_declaration declarator: (variable_declarator name: (identifier) @name)) @def\n",
669 "(enum_constant name: (identifier) @name) @def\n",
670 "(enum_declaration name: (identifier) @name) @def\n",
671 "(constructor_declaration name: (identifier) @name) @def",
672 ),
673 ),
674 "c" | "h" => (
675 tree_sitter_c::LANGUAGE.into(),
676 concat!(
677 "(function_definition declarator: (function_declarator declarator: (identifier) @name)) @def\n",
678 "(declaration declarator: (init_declarator declarator: (identifier) @name)) @def\n",
679 "(struct_specifier name: (type_identifier) @name) @def\n",
680 "(enum_specifier name: (type_identifier) @name) @def\n",
681 "(type_definition declarator: (type_identifier) @name) @def",
682 ),
683 ),
684 // C++: functions at any level, plus class signatures.
685 "cpp" | "cc" | "cxx" | "hpp" => (
686 tree_sitter_cpp::LANGUAGE.into(),
687 concat!(
688 "(function_definition declarator: (function_declarator declarator: (identifier) @name)) @def\n",
689 "(class_specifier name: (type_identifier) @name) @def\n",
690 "(declaration declarator: (init_declarator declarator: (identifier) @name)) @def\n",
691 "(struct_specifier name: (type_identifier) @name) @def\n",
692 "(enum_specifier name: (type_identifier) @name) @def\n",
693 "(type_definition declarator: (type_identifier) @name) @def\n",
694 "(namespace_definition name: (namespace_identifier) @name) @def\n",
695 "(field_declaration declarator: (field_identifier) @name) @def",
696 ),
697 ),
698 // Bash: function definitions (.bats = Bash Automated Testing System).
699 "sh" | "bash" | "bats" => (
700 tree_sitter_bash::LANGUAGE.into(),
701 concat!(
702 "(function_definition name: (word) @name) @def\n",
703 "(variable_assignment name: (variable_name) @name) @def",
704 ),
705 ),
706 // Ruby: methods, classes, and modules.
707 "rb" => (
708 tree_sitter_ruby::LANGUAGE.into(),
709 concat!(
710 "(method name: (identifier) @name) @def\n",
711 "(class name: (constant) @name) @def\n",
712 "(module name: (constant) @name) @def\n",
713 "(assignment left: (identifier) @name) @def\n",
714 "(assignment left: (constant) @name) @def",
715 ),
716 ),
717 // HCL (Terraform): resource, data, variable, and output blocks.
718 //
719 // K3 fix: index blocks by their semantic name rather than the keyword.
720 // Previous query `(block (identifier) @name)` captured the block keyword
721 // (e.g. "resource") as the symbol name, making `lsp_workspace_symbols(query="loader")`
722 // unable to find `resource "aws_iam_role" "loader" { ... }`.
723 //
724 // Fixed query uses dot-anchor patterns to select the LAST string label
725 // immediately before the opening `{` (block_start):
726 // - `resource "aws_iam_role" "loader" {}` → last string_lit before { = "loader" ✓
727 // - `data "aws_s3_bucket" "main" {}` → last string_lit before { = "main" ✓
728 // - `variable "region" {}` → only string_lit before { = "region" ✓
729 // - `output "role_arn" {}` → only string_lit before { = "role_arn" ✓
730 // - `locals {}` → no string_lit; identifier before { = "locals" ✓
731 //
732 // Note: the composite `type.name` format (e.g. "aws_iam_role.loader") is available
733 // via [`derive_hcl_block_name`] for callers that need it. The chunker uses the
734 // `@name` capture (the last string_lit label or identifier) which already enables
735 // workspace symbol queries to find resources by their specific name.
736 "tf" | "tfvars" | "hcl" => (
737 tree_sitter_hcl::LANGUAGE.into(),
738 concat!(
739 // Last string_lit immediately before block_start (covers both single-label
740 // and multi-label blocks; the dot anchor selects only the final label).
741 "(block (string_lit (template_literal) @name) . (block_start)) @def\n",
742 // No-label blocks (e.g. locals): identifier immediately before block_start.
743 "(block (identifier) @name . (block_start)) @def",
744 ),
745 ),
746 // Kotlin: functions, classes, and objects.
747 "kt" | "kts" => (
748 tree_sitter_kotlin_ng::LANGUAGE.into(),
749 concat!(
750 "(function_declaration name: (identifier) @name) @def\n",
751 "(class_declaration name: (identifier) @name) @def\n",
752 "(object_declaration name: (identifier) @name) @def\n",
753 "(property_declaration (identifier) @name) @def\n",
754 "(enum_entry (identifier) @name) @def",
755 ),
756 ),
757 // Swift: functions, classes, structs, enums, and protocols.
758 "swift" => (
759 tree_sitter_swift::LANGUAGE.into(),
760 concat!(
761 "(function_declaration name: (simple_identifier) @name) @def\n",
762 "(class_declaration name: (type_identifier) @name) @def\n",
763 "(protocol_declaration name: (type_identifier) @name) @def\n",
764 "(property_declaration name: (pattern bound_identifier: (simple_identifier) @name)) @def\n",
765 "(typealias_declaration name: (type_identifier) @name) @def",
766 ),
767 ),
768 // Scala: functions, classes, traits, and objects.
769 "scala" => (
770 tree_sitter_scala::LANGUAGE.into(),
771 concat!(
772 "(function_definition name: (identifier) @name) @def\n",
773 "(class_definition name: (identifier) @name) @def\n",
774 "(trait_definition name: (identifier) @name) @def\n",
775 "(object_definition name: (identifier) @name) @def\n",
776 "(val_definition pattern: (identifier) @name) @def\n",
777 "(var_definition pattern: (identifier) @name) @def\n",
778 "(type_definition name: (type_identifier) @name) @def",
779 ),
780 ),
781 // TOML: table headers (sections).
782 "toml" => (
783 tree_sitter_toml_ng::LANGUAGE.into(),
784 concat!(
785 "(table (bare_key) @name) @def\n",
786 "(pair (bare_key) @name) @def",
787 ),
788 ),
789 // JSON: key-value pairs, capturing the key string content.
790 "json" => (
791 tree_sitter_json::LANGUAGE.into(),
792 "(pair key: (string (string_content) @name)) @def",
793 ),
794 // YAML: block mapping pairs with plain scalar keys.
795 "yaml" | "yml" => (
796 tree_sitter_yaml::LANGUAGE.into(),
797 "(block_mapping_pair key: (flow_node (plain_scalar (string_scalar) @name))) @def",
798 ),
799 // Markdown: ATX headings (# through ######), capturing the heading text.
800 "md" => (
801 tree_sitter_md::LANGUAGE.into(),
802 "(atx_heading heading_content: (inline) @name) @def",
803 ),
804 // RDF/XML and OWL/XML are XML documents; capture each element so
805 // ontology classes/properties become searchable semantic chunks.
806 "xml" | "rdf" | "owl" => (
807 tree_sitter_xml::LANGUAGE_XML.into(),
808 concat!(
809 "(element (STag (Name) @name)) @def\n",
810 "(element (EmptyElemTag (Name) @name)) @def",
811 ),
812 ),
813 // SQL: CREATE TABLE statements and common table expressions (CTEs).
814 // Powered by tree-sitter-sequel (derekstride/tree-sitter-sql).
815 //
816 // dbt/sqlmesh files conventionally name their model by the *filename*
817 // rather than an in-source CREATE TABLE — see `enrich_sql_file_def` in
818 // repo_map.rs for the synthetic file-level def that fills that gap.
819 // The chunker-level query below captures any in-source CREATE TABLE
820 // and CTE so they remain searchable semantic chunks even when the
821 // file uses sqlmesh `MODEL (...)` headers (which parse as ERROR nodes
822 // — FROM/JOIN still extract cleanly post-error per S1 design).
823 "sql" => (
824 tree_sitter_sequel::LANGUAGE.into(),
825 concat!(
826 // CREATE TABLE foo — table-as-def.
827 "(create_table (object_reference name: (identifier) @name)) @def\n",
828 // WITH foo AS (SELECT ...) — CTE-as-def.
829 "(cte (identifier) @name) @def",
830 ),
831 ),
832 _ => return None,
833 };
834 let query = match Query::new(&lang, query_str) {
835 Ok(q) => q,
836 Err(e) => {
837 tracing::warn!(ext, %e, "tree-sitter query compilation failed — language may be ABI-incompatible");
838 return None;
839 }
840 };
841 Some(LangConfig {
842 language: lang,
843 query,
844 })
845}
846
847/// Look up the call-extraction query for a file extension.
848///
849/// Compiled queries are cached per extension so repeated calls are free.
850/// Returns `None` for unsupported extensions (including TOML, which has
851/// no function calls).
852#[must_use]
853pub fn call_query_for_extension(ext: &str) -> Option<Arc<CallConfig>> {
854 static CACHE: OnceLock<std::collections::HashMap<&'static str, Arc<CallConfig>>> =
855 OnceLock::new();
856
857 let cache = CACHE.get_or_init(|| {
858 let mut m = std::collections::HashMap::new();
859 // Pre-compile for all extensions that have callable constructs.
860 // TOML is deliberately excluded — it has no function calls.
861 // SQL has FROM/JOIN as call-edges (model-to-model references) —
862 // emitted by the per-language call query plus a synthetic
863 // file-level def in repo_map::enrich_sql_file_def (S1, Wave 4).
864 for &ext in &[
865 "rs", "py", "pyi", "js", "jsx", "ts", "tsx", "go", "java", "c", "h", "cpp", "cc",
866 "cxx", "hpp", "sh", "bash", "bats", "rb", "tf", "tfvars", "hcl", "kt", "kts", "swift",
867 "scala", "sql",
868 ] {
869 if let Some(cfg) = compile_call_config(ext) {
870 m.insert(ext, Arc::new(cfg));
871 }
872 }
873 m
874 });
875
876 cache.get(ext).cloned()
877}
878
879/// Compile a [`CallConfig`] for the given extension (uncached).
880///
881/// Each query extracts the callee identifier (`@callee`) from function
882/// and method calls, plus the whole call expression (`@call`).
883#[expect(
884 clippy::too_many_lines,
885 reason = "one match arm per language — flat by design"
886)]
887fn compile_call_config(ext: &str) -> Option<CallConfig> {
888 let (lang, query_str): (Language, &str) = match ext {
889 // Rust: free calls, method calls, and scoped (path) calls.
890 //
891 // For scoped calls, capture the full `scoped_identifier` node as @callee
892 // (not just the trailing `(identifier)` child). This preserves the qualified
893 // path so that `mod_a::foo()` records "mod_a::foo" rather than bare "foo",
894 // enabling cross-module disambiguation in `resolve_calls`.
895 "rs" => (
896 tree_sitter_rust::LANGUAGE.into(),
897 concat!(
898 "(call_expression function: (identifier) @callee) @call\n",
899 "(call_expression function: (field_expression field: (field_identifier) @callee)) @call\n",
900 "(call_expression function: (scoped_identifier) @callee) @call",
901 ),
902 ),
903 // Python: simple calls and attribute (method) calls.
904 "py" | "pyi" => (
905 tree_sitter_python::LANGUAGE.into(),
906 concat!(
907 "(call function: (identifier) @callee) @call\n",
908 "(call function: (attribute attribute: (identifier) @callee)) @call",
909 ),
910 ),
911 // JavaScript: function calls and member expression calls.
912 "js" | "jsx" => (
913 tree_sitter_javascript::LANGUAGE.into(),
914 concat!(
915 "(call_expression function: (identifier) @callee) @call\n",
916 "(call_expression function: (member_expression property: (property_identifier) @callee)) @call",
917 ),
918 ),
919 // TypeScript: same patterns as JavaScript.
920 "ts" => (
921 tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into(),
922 concat!(
923 "(call_expression function: (identifier) @callee) @call\n",
924 "(call_expression function: (member_expression property: (property_identifier) @callee)) @call",
925 ),
926 ),
927 // TSX: same patterns as JavaScript.
928 "tsx" => (
929 tree_sitter_typescript::LANGUAGE_TSX.into(),
930 concat!(
931 "(call_expression function: (identifier) @callee) @call\n",
932 "(call_expression function: (member_expression property: (property_identifier) @callee)) @call",
933 ),
934 ),
935 // Go: function calls and selector (method) calls.
936 "go" => (
937 tree_sitter_go::LANGUAGE.into(),
938 concat!(
939 "(call_expression function: (identifier) @callee) @call\n",
940 "(call_expression function: (selector_expression field: (field_identifier) @callee)) @call",
941 ),
942 ),
943 // Java: method invocations.
944 "java" => (
945 tree_sitter_java::LANGUAGE.into(),
946 "(method_invocation name: (identifier) @callee) @call",
947 ),
948 // C: function calls and field-expression calls (function pointers).
949 "c" | "h" => (
950 tree_sitter_c::LANGUAGE.into(),
951 concat!(
952 "(call_expression function: (identifier) @callee) @call\n",
953 "(call_expression function: (field_expression field: (field_identifier) @callee)) @call",
954 ),
955 ),
956 // C++: same patterns as C.
957 "cpp" | "cc" | "cxx" | "hpp" => (
958 tree_sitter_cpp::LANGUAGE.into(),
959 concat!(
960 "(call_expression function: (identifier) @callee) @call\n",
961 "(call_expression function: (field_expression field: (field_identifier) @callee)) @call",
962 ),
963 ),
964 // Bash: command invocations (.bats = Bash Automated Testing System).
965 "sh" | "bash" | "bats" => (
966 tree_sitter_bash::LANGUAGE.into(),
967 "(command name: (command_name (word) @callee)) @call",
968 ),
969 // Ruby: method calls.
970 "rb" => (
971 tree_sitter_ruby::LANGUAGE.into(),
972 "(call method: (identifier) @callee) @call",
973 ),
974 // HCL (Terraform): built-in function calls.
975 "tf" | "tfvars" | "hcl" => (
976 tree_sitter_hcl::LANGUAGE.into(),
977 "(function_call (identifier) @callee) @call",
978 ),
979 // Kotlin: call expressions — grammar uses unnamed children, so match
980 // identifier as first child of call_expression.
981 "kt" | "kts" => (
982 tree_sitter_kotlin_ng::LANGUAGE.into(),
983 "(call_expression (identifier) @callee) @call",
984 ),
985 // Swift: call expressions with simple identifiers.
986 "swift" => (
987 tree_sitter_swift::LANGUAGE.into(),
988 "(call_expression (simple_identifier) @callee) @call",
989 ),
990 // Scala: function calls and field-expression (method) calls.
991 "scala" => (
992 tree_sitter_scala::LANGUAGE.into(),
993 concat!(
994 "(call_expression function: (identifier) @callee) @call\n",
995 "(call_expression function: (field_expression field: (identifier) @callee)) @call",
996 ),
997 ),
998 // SQL: FROM <table> and JOIN <table> as call-edges. Schema-qualified
999 // names like `analytics.silver_X` parse as
1000 // (object_reference schema: (identifier) name: (identifier))
1001 // — the field selector `name:` picks the table identifier and skips
1002 // the schema prefix, which is correct for cross-model resolution
1003 // (downstream dbt/sqlmesh models reference each other by table name
1004 // not by schema + name).
1005 "sql" => (
1006 tree_sitter_sequel::LANGUAGE.into(),
1007 concat!(
1008 // FROM <table>: relation > object_reference > name identifier.
1009 "(from (relation (object_reference name: (identifier) @callee))) @call\n",
1010 // JOIN <table>: same shape, inside a join clause.
1011 "(join (relation (object_reference name: (identifier) @callee))) @call",
1012 ),
1013 ),
1014 _ => return None,
1015 };
1016 let query = match Query::new(&lang, query_str) {
1017 Ok(q) => q,
1018 Err(e) => {
1019 tracing::warn!(ext, %e, "tree-sitter call query compilation failed");
1020 return None;
1021 }
1022 };
1023 Some(CallConfig {
1024 language: lang,
1025 query,
1026 })
1027}
1028
1029#[cfg(test)]
1030mod tests {
1031 use super::*;
1032
1033 #[test]
1034 fn rust_extension_resolves() {
1035 assert!(config_for_extension("rs").is_some());
1036 }
1037
1038 #[test]
1039 fn python_extension_resolves() {
1040 assert!(config_for_extension("py").is_some());
1041 }
1042
1043 #[test]
1044 fn python_stub_extension_resolves() {
1045 assert!(config_for_extension("pyi").is_some());
1046 }
1047
1048 #[test]
1049 fn unknown_extension_returns_none() {
1050 assert!(config_for_extension("xyz").is_none());
1051 }
1052
1053 #[test]
1054 fn all_supported_extensions() {
1055 let exts = [
1056 "rs", "py", "pyi", "js", "jsx", "ts", "tsx", "go", "java", "c", "h", "cpp", "cc",
1057 "cxx", "hpp", "sh", "bash", "bats", "rb", "tf", "tfvars", "hcl", "kt", "kts", "swift",
1058 "scala", "toml", "json", "yaml", "yml", "md", "xml", "rdf", "owl", "sql",
1059 ];
1060 for ext in &exts {
1061 assert!(config_for_extension(ext).is_some(), "failed for {ext}");
1062 }
1063 }
1064
1065 #[test]
1066 fn turtle_family_uses_rdf_text_chunking_not_tree_sitter() {
1067 for ext in ["ttl", "nt", "n3", "trig", "nq"] {
1068 assert!(
1069 config_for_extension(ext).is_none(),
1070 "{ext} should be handled by RDF text chunking"
1071 );
1072 assert!(crate::chunk::is_rdf_text_extension(ext));
1073 }
1074 }
1075
1076 #[test]
1077 fn all_call_query_extensions() {
1078 let exts = [
1079 "rs", "py", "pyi", "js", "jsx", "ts", "tsx", "go", "java", "c", "h", "cpp", "cc",
1080 "cxx", "hpp", "sh", "bash", "bats", "rb", "tf", "tfvars", "hcl", "kt", "kts", "swift",
1081 "scala", "sql",
1082 ];
1083 for ext in &exts {
1084 assert!(
1085 call_query_for_extension(ext).is_some(),
1086 "call query failed for {ext}"
1087 );
1088 }
1089 }
1090
1091 #[test]
1092 fn toml_has_no_call_query() {
1093 assert!(call_query_for_extension("toml").is_none());
1094 }
1095
1096 /// RED test (R2.3 issue a): scoped_identifier call must capture the full path.
1097 ///
1098 /// Before the fix, `mod_a::foo()` captured only `foo` as @callee.
1099 /// After the fix, it must capture `mod_a::foo` as @callee.
1100 #[test]
1101 fn test_scoped_identifier_call_query_captures_full_path() {
1102 use streaming_iterator::StreamingIterator as _;
1103
1104 let source = "
1105fn caller() {
1106 mod_a::foo();
1107 std::io::stderr();
1108}
1109";
1110 let call_cfg = call_query_for_extension("rs").expect("rs call config");
1111 let mut parser = tree_sitter::Parser::new();
1112 parser
1113 .set_language(&call_cfg.language)
1114 .expect("set language");
1115 let tree = parser.parse(source, None).expect("parse");
1116
1117 let mut cursor = tree_sitter::QueryCursor::new();
1118 let mut matches = cursor.matches(&call_cfg.query, tree.root_node(), source.as_bytes());
1119
1120 let mut callees: Vec<String> = Vec::new();
1121 while let Some(m) = matches.next() {
1122 for cap in m.captures {
1123 let name = &call_cfg.query.capture_names()[cap.index as usize];
1124 if *name == "callee" {
1125 let text = &source[cap.node.start_byte()..cap.node.end_byte()];
1126 callees.push(text.to_string());
1127 }
1128 }
1129 }
1130
1131 // Must contain full qualified path, not bare identifier
1132 assert!(
1133 callees.contains(&"mod_a::foo".to_string()),
1134 "expected 'mod_a::foo' in callees, got: {callees:?}"
1135 );
1136 // Bare 'foo' must not appear when scoped call is made
1137 assert!(
1138 !callees.contains(&"foo".to_string()),
1139 "bare 'foo' must not appear for scoped call; got: {callees:?}"
1140 );
1141 }
1142
1143 // -------------------------------------------------------------------------
1144 // B1: tree-sitter node-kind → LSP SymbolKind mapping tests
1145 // -------------------------------------------------------------------------
1146
1147 /// `test:rust_node_kind_maps_to_lsp_symbol_kind_struct` — `struct_item`
1148 /// maps to LSP SymbolKind 23 (Struct).
1149 ///
1150 /// Behavior: trigger-fails-on-baseline-then-passes-post-fix.
1151 /// On the baseline `lsp_symbol_kind_for_node_kind` did not exist.
1152 #[test]
1153 fn rust_node_kind_maps_to_lsp_symbol_kind_struct() {
1154 assert_eq!(
1155 lsp_symbol_kind_for_node_kind("struct_item"),
1156 lsp_symbol_kind::STRUCT,
1157 "struct_item must map to SymbolKind::Struct (23)"
1158 );
1159 }
1160
1161 /// `test:rust_node_kind_maps_to_lsp_symbol_kind_trait` — `trait_item`
1162 /// maps to LSP SymbolKind 11 (Interface).
1163 #[test]
1164 fn rust_node_kind_maps_to_lsp_symbol_kind_trait() {
1165 assert_eq!(
1166 lsp_symbol_kind_for_node_kind("trait_item"),
1167 lsp_symbol_kind::INTERFACE,
1168 "trait_item must map to SymbolKind::Interface (11)"
1169 );
1170 }
1171
1172 /// `test:rust_node_kind_maps_to_lsp_symbol_kind_enum` — `enum_item`
1173 /// maps to LSP SymbolKind 10 (Enum).
1174 #[test]
1175 fn rust_node_kind_maps_to_lsp_symbol_kind_enum() {
1176 assert_eq!(
1177 lsp_symbol_kind_for_node_kind("enum_item"),
1178 lsp_symbol_kind::ENUM,
1179 "enum_item must map to SymbolKind::Enum (10)"
1180 );
1181 }
1182
1183 /// `test:rust_node_kind_maps_to_lsp_symbol_kind_function` — `function_item`
1184 /// maps to LSP SymbolKind 12 (Function).
1185 #[test]
1186 fn rust_node_kind_maps_to_lsp_symbol_kind_function() {
1187 assert_eq!(
1188 lsp_symbol_kind_for_node_kind("function_item"),
1189 lsp_symbol_kind::FUNCTION,
1190 "function_item must map to SymbolKind::Function (12)"
1191 );
1192 }
1193
1194 /// `test:rust_node_kind_maps_to_lsp_symbol_kind_module` — `mod_item`
1195 /// maps to LSP SymbolKind 2 (Module).
1196 #[test]
1197 fn rust_node_kind_maps_to_lsp_symbol_kind_module() {
1198 assert_eq!(
1199 lsp_symbol_kind_for_node_kind("mod_item"),
1200 lsp_symbol_kind::MODULE,
1201 "mod_item must map to SymbolKind::Module (2)"
1202 );
1203 }
1204
1205 /// Additional B1 coverage: impl, const, static, type_item all map
1206 /// to meaningful, non-Variable kinds.
1207 #[test]
1208 fn rust_node_kinds_map_to_non_variable_kinds() {
1209 let cases: &[(&str, u32)] = &[
1210 ("impl_item", lsp_symbol_kind::CLASS),
1211 ("const_item", lsp_symbol_kind::CONSTANT),
1212 ("static_item", lsp_symbol_kind::CONSTANT),
1213 ("type_item", lsp_symbol_kind::TYPE_PARAMETER),
1214 ("field_declaration", lsp_symbol_kind::FIELD),
1215 ("enum_variant", lsp_symbol_kind::ENUM_MEMBER),
1216 ("function_signature_item", lsp_symbol_kind::FUNCTION),
1217 ];
1218 for &(kind, expected) in cases {
1219 assert_eq!(
1220 lsp_symbol_kind_for_node_kind(kind),
1221 expected,
1222 "node kind '{kind}' should map to {expected}, got {}",
1223 lsp_symbol_kind_for_node_kind(kind)
1224 );
1225 }
1226 }
1227
1228 /// Unknown node kinds fall back to Variable (13) — preserving pre-B1 default.
1229 #[test]
1230 fn unknown_node_kind_falls_back_to_variable() {
1231 assert_eq!(
1232 lsp_symbol_kind_for_node_kind("some_unknown_kind"),
1233 lsp_symbol_kind::VARIABLE,
1234 "unknown kind must fall back to Variable (13)"
1235 );
1236 }
1237
1238 // =========================================================================
1239 // K1 — Python @property classification (I#17a)
1240 // =========================================================================
1241
1242 /// `test:python_property_decorator_classifies_as_property_kind`
1243 ///
1244 /// Baseline (RED): `decorated_definition` was not in the kind mapping, so it
1245 /// fell through to VARIABLE (13). The Python `@property` decorated method was
1246 /// thus invisible to LSP property-kind filters.
1247 ///
1248 /// After fix (GREEN): `decorated_definition` maps to PROPERTY (22), and the
1249 /// Python query captures `decorated_definition` as `@def` so `@property`-decorated
1250 /// methods emit kind="decorated_definition" → SymbolKind::Property.
1251 #[test]
1252 fn python_property_decorator_classifies_as_property_kind() {
1253 // The kind mapping must return PROPERTY (22) for decorated_definition.
1254 assert_eq!(
1255 lsp_symbol_kind_for_node_kind("decorated_definition"),
1256 lsp_symbol_kind::PROPERTY,
1257 "decorated_definition must map to SymbolKind::Property (22); baseline gave Variable (13)"
1258 );
1259 }
1260
1261 /// The Python query must capture `@property`-decorated methods as `@def=decorated_definition`.
1262 ///
1263 /// Uses tree-sitter to parse a Python source snippet with a `@property` decorated
1264 /// method, runs the compiled Python LangConfig query, and verifies that at least one
1265 /// match emits `def_kind = "decorated_definition"` with `name = "name"`.
1266 #[test]
1267 fn python_property_query_captures_decorated_definition() {
1268 use streaming_iterator::StreamingIterator as _;
1269
1270 let source = r"class MyModel:
1271 @property
1272 def name(self):
1273 return self._name
1274
1275 @name.setter
1276 def name(self, value):
1277 self._name = value
1278
1279 def regular_method(self):
1280 pass
1281";
1282 let cfg = config_for_extension("py").expect("Python config must compile");
1283 let mut parser = tree_sitter::Parser::new();
1284 parser.set_language(&cfg.language).expect("set language");
1285 let tree = parser.parse(source, None).expect("parse");
1286
1287 let mut cursor = tree_sitter::QueryCursor::new();
1288 let mut matches = cursor.matches(&cfg.query, tree.root_node(), source.as_bytes());
1289
1290 let mut property_kind_found = false;
1291 let mut property_name_found = false;
1292 while let Some(m) = matches.next() {
1293 let mut name = "";
1294 let mut def_kind = "";
1295 for cap in m.captures {
1296 let cap_name = &cfg.query.capture_names()[cap.index as usize];
1297 let text = &source[cap.node.start_byte()..cap.node.end_byte()];
1298 if *cap_name == "name" {
1299 name = text;
1300 } else if *cap_name == "def" {
1301 def_kind = cap.node.kind();
1302 }
1303 }
1304 if def_kind == "decorated_definition" && name == "name" {
1305 property_kind_found = true;
1306 property_name_found = true;
1307 }
1308 }
1309 assert!(
1310 property_kind_found,
1311 "Python query must capture decorated_definition for @property method; got none"
1312 );
1313 assert!(
1314 property_name_found,
1315 "Python query must capture 'name' as the method name inside @property definition"
1316 );
1317 }
1318
1319 // =========================================================================
1320 // K2 — Go interface classification (I#17b)
1321 // =========================================================================
1322
1323 /// `test:go_interface_type_classifies_as_interface_kind`
1324 ///
1325 /// Baseline (RED): all Go type declarations used `(type_declaration ...) @def`
1326 /// which sets kind="type_declaration" → TYPE_PARAMETER (26). Interfaces were
1327 /// invisible to interface-kind filters (`kind=11`).
1328 ///
1329 /// After fix (GREEN): interface types use `(interface_type) @def` → kind=
1330 /// "interface_type" → INTERFACE (11).
1331 #[test]
1332 fn go_interface_type_classifies_as_interface_kind() {
1333 assert_eq!(
1334 lsp_symbol_kind_for_node_kind("interface_type"),
1335 lsp_symbol_kind::INTERFACE,
1336 "interface_type must map to SymbolKind::Interface (11); baseline gave TypeParameter (26)"
1337 );
1338 }
1339
1340 /// `test:go_struct_type_classifies_as_struct_kind`
1341 ///
1342 /// Baseline (RED): struct types were also TYPE_PARAMETER (26) via the generic
1343 /// type_declaration pattern. After fix: struct_type → STRUCT (23).
1344 #[test]
1345 fn go_struct_type_classifies_as_struct_kind() {
1346 assert_eq!(
1347 lsp_symbol_kind_for_node_kind("struct_type"),
1348 lsp_symbol_kind::STRUCT,
1349 "struct_type must map to SymbolKind::Struct (23); baseline gave TypeParameter (26)"
1350 );
1351 }
1352
1353 /// The Go query must emit kind="interface_type" for `type Reader interface { ... }`.
1354 ///
1355 /// Parses a Go source snippet and verifies that the compiled Go LangConfig query
1356 /// produces a match with def_kind="interface_type" and name="Reader".
1357 #[test]
1358 fn go_interface_query_captures_interface_type() {
1359 use streaming_iterator::StreamingIterator as _;
1360
1361 let source = r"package io
1362type Reader interface {
1363 Read(p []byte) (n int, err error)
1364}
1365type MyStruct struct {
1366 Name string
1367}
1368func NewReader() Reader {
1369 return nil
1370}
1371";
1372 let cfg = config_for_extension("go").expect("Go config must compile");
1373 let mut parser = tree_sitter::Parser::new();
1374 parser.set_language(&cfg.language).expect("set language");
1375 let tree = parser.parse(source, None).expect("parse");
1376
1377 let mut cursor = tree_sitter::QueryCursor::new();
1378 let mut matches = cursor.matches(&cfg.query, tree.root_node(), source.as_bytes());
1379
1380 let mut interface_found = false;
1381 let mut struct_found = false;
1382 let mut function_found = false;
1383 while let Some(m) = matches.next() {
1384 let mut name = "";
1385 let mut def_kind = "";
1386 for cap in m.captures {
1387 let cap_name = &cfg.query.capture_names()[cap.index as usize];
1388 let text = &source[cap.node.start_byte()..cap.node.end_byte()];
1389 if *cap_name == "name" {
1390 name = text;
1391 } else if *cap_name == "def" {
1392 def_kind = cap.node.kind();
1393 }
1394 }
1395 if def_kind == "interface_type" && name == "Reader" {
1396 interface_found = true;
1397 }
1398 if def_kind == "struct_type" && name == "MyStruct" {
1399 struct_found = true;
1400 }
1401 if def_kind == "function_declaration" && name == "NewReader" {
1402 function_found = true;
1403 }
1404 }
1405 assert!(
1406 interface_found,
1407 "Go query must emit def_kind='interface_type' for 'type Reader interface {{ ... }}'"
1408 );
1409 assert!(
1410 struct_found,
1411 "Go query must emit def_kind='struct_type' for 'type MyStruct struct {{ ... }}'"
1412 );
1413 assert!(
1414 function_found,
1415 "Go query must emit def_kind='function_declaration' for 'func NewReader()'"
1416 );
1417 }
1418
1419 // =========================================================================
1420 // K3 — HCL resource naming (I#17c)
1421 // =========================================================================
1422
1423 /// `test:hcl_resource_symbol_uses_type_dot_name`
1424 ///
1425 /// Verifies that `derive_hcl_block_name` produces the `type.name` composite
1426 /// for a two-label HCL block (e.g. `resource "aws_iam_role" "loader" { ... }`
1427 /// → "aws_iam_role.loader").
1428 ///
1429 /// Baseline (RED): the previous HCL query captured the keyword ("resource") as the
1430 /// symbol name, making `lsp_workspace_symbols(query="loader")` unable to find the
1431 /// resource. The query fix makes the chunker emit "loader" as the name; this function
1432 /// enables callers to reconstruct the full "aws_iam_role.loader" composite.
1433 #[test]
1434 fn hcl_resource_symbol_uses_type_dot_name() {
1435 let source = br#"resource "aws_iam_role" "loader" {
1436 assume_role_policy = "assume.json"
1437}
1438"#;
1439 let lang: tree_sitter::Language = tree_sitter_hcl::LANGUAGE.into();
1440 let mut parser = tree_sitter::Parser::new();
1441 parser.set_language(&lang).expect("set HCL language");
1442 let tree = parser.parse(source, None).expect("parse HCL");
1443
1444 // Find the first block node
1445 let root = tree.root_node();
1446 let body = root.child(0).expect("config_file has body");
1447 #[expect(
1448 clippy::cast_possible_truncation,
1449 reason = "child_count() is a small usize; fits in u32"
1450 )]
1451 let block = (0..body.child_count())
1452 .filter_map(|i| body.child(i as u32))
1453 .find(|n| n.kind() == "block")
1454 .expect("should have at least one block node");
1455
1456 let name = derive_hcl_block_name(&block, source);
1457 assert_eq!(
1458 name, "aws_iam_role.loader",
1459 "derive_hcl_block_name must produce 'aws_iam_role.loader' for \
1460 `resource \"aws_iam_role\" \"loader\"` block; got {name:?}"
1461 );
1462 }
1463
1464 /// `test:hcl_data_source_symbol_uses_type_dot_name`
1465 ///
1466 /// Verifies `derive_hcl_block_name` produces "aws_s3_bucket.main" for
1467 /// `data "aws_s3_bucket" "main" { ... }`.
1468 #[test]
1469 fn hcl_data_source_symbol_uses_type_dot_name() {
1470 let source = br#"data "aws_s3_bucket" "main" {
1471 bucket = "my-bucket"
1472}
1473"#;
1474 let lang: tree_sitter::Language = tree_sitter_hcl::LANGUAGE.into();
1475 let mut parser = tree_sitter::Parser::new();
1476 parser.set_language(&lang).expect("set HCL language");
1477 let tree = parser.parse(source, None).expect("parse HCL");
1478
1479 let root = tree.root_node();
1480 let body = root.child(0).expect("config_file has body");
1481 #[expect(
1482 clippy::cast_possible_truncation,
1483 reason = "child_count() returns a small usize; fits in u32"
1484 )]
1485 let block = (0..body.child_count())
1486 .filter_map(|i| body.child(i as u32))
1487 .find(|n| n.kind() == "block")
1488 .expect("block node");
1489
1490 let name = derive_hcl_block_name(&block, source);
1491 assert_eq!(
1492 name, "aws_s3_bucket.main",
1493 "derive_hcl_block_name must produce 'aws_s3_bucket.main'"
1494 );
1495 }
1496
1497 /// The HCL query must capture the resource name (last string label) not the keyword.
1498 ///
1499 /// Verifies that the compiled HCL LangConfig query emits `@name = "loader"` (not
1500 /// "resource") for `resource "aws_iam_role" "loader" { ... }`. This is the live
1501 /// chunker behaviour that makes `lsp_workspace_symbols(query="loader")` work.
1502 #[test]
1503 fn hcl_query_captures_resource_name_not_keyword() {
1504 use streaming_iterator::StreamingIterator as _;
1505
1506 let source = r#"resource "aws_iam_role" "loader" {
1507 x = 1
1508}
1509variable "region" {
1510 type = "string"
1511}
1512output "role_arn" {
1513 value = "arn"
1514}
1515locals {
1516 x = 1
1517}
1518"#;
1519 let cfg = config_for_extension("tf").expect("HCL config must compile");
1520 let mut parser = tree_sitter::Parser::new();
1521 parser.set_language(&cfg.language).expect("set language");
1522 let tree = parser.parse(source, None).expect("parse");
1523
1524 let mut cursor = tree_sitter::QueryCursor::new();
1525 let mut matches = cursor.matches(&cfg.query, tree.root_node(), source.as_bytes());
1526
1527 let mut names: Vec<(String, String)> = Vec::new(); // (name, def_kind)
1528 while let Some(m) = matches.next() {
1529 let mut name = String::new();
1530 let mut def_kind = String::new();
1531 for cap in m.captures {
1532 let cap_name = &cfg.query.capture_names()[cap.index as usize];
1533 let text = &source[cap.node.start_byte()..cap.node.end_byte()];
1534 if *cap_name == "name" {
1535 name = text.to_string();
1536 } else if *cap_name == "def" {
1537 def_kind = cap.node.kind().to_string();
1538 }
1539 }
1540 if !name.is_empty() {
1541 names.push((name, def_kind));
1542 }
1543 }
1544
1545 let name_list: Vec<&str> = names.iter().map(|(n, _)| n.as_str()).collect();
1546
1547 // Must capture "loader" (not "resource") for the resource block.
1548 assert!(
1549 name_list.contains(&"loader"),
1550 "HCL query must capture 'loader' (not the keyword 'resource') for resource block; got: {name_list:?}"
1551 );
1552 assert!(
1553 !name_list.contains(&"resource"),
1554 "HCL query must NOT capture the keyword 'resource' as a symbol name; got: {name_list:?}"
1555 );
1556
1557 // Must capture "region" for variable block.
1558 assert!(
1559 name_list.contains(&"region"),
1560 "HCL query must capture 'region' for variable block; got: {name_list:?}"
1561 );
1562
1563 // Must capture "role_arn" for output block.
1564 assert!(
1565 name_list.contains(&"role_arn"),
1566 "HCL query must capture 'role_arn' for output block; got: {name_list:?}"
1567 );
1568
1569 // Must capture "locals" for locals block.
1570 assert!(
1571 name_list.contains(&"locals"),
1572 "HCL query must capture 'locals' for locals block; got: {name_list:?}"
1573 );
1574 }
1575
1576 // =========================================================================
1577 // L1 — Python class_definition kind taxonomy fix (I#19)
1578 // =========================================================================
1579
1580 /// `test:python_class_definition_kind_5`
1581 ///
1582 /// Baseline (RED): `class_definition` was falling through to the wildcard
1583 /// match or returning VARIABLE (13). The Python `class Foo: pass` pattern
1584 /// was classified as kind=20 (Key) in the mnemosyne corpus
1585 /// (ErrorOccurred, OCRCompleted, MnemosyneApp, BaseScreen, BrowseScansScreen).
1586 ///
1587 /// After fix (GREEN): `class_definition` maps to CLASS (5) in
1588 /// `lsp_symbol_kind_for_node_kind`, and the Python query captures
1589 /// `class_definition` with its body node, so `node.kind() == "class_definition"`
1590 /// maps to 5.
1591 #[test]
1592 fn test_python_class_definition_kind_5() {
1593 use streaming_iterator::StreamingIterator as _;
1594
1595 let source = r"class Foo:
1596 pass
1597";
1598 let cfg = config_for_extension("py").expect("Python config must compile");
1599 let mut parser = tree_sitter::Parser::new();
1600 parser.set_language(&cfg.language).expect("set language");
1601 let tree = parser.parse(source, None).expect("parse");
1602
1603 let mut cursor = tree_sitter::QueryCursor::new();
1604 let mut matches = cursor.matches(&cfg.query, tree.root_node(), source.as_bytes());
1605
1606 let mut class_kind_found = false;
1607 while let Some(m) = matches.next() {
1608 for cap in m.captures {
1609 let cap_name = &cfg.query.capture_names()[cap.index as usize];
1610 if *cap_name == "def" {
1611 let def_kind = cap.node.kind();
1612 if def_kind == "class_definition" {
1613 let lsp_kind = lsp_symbol_kind_for_node_kind(def_kind);
1614 assert_eq!(
1615 lsp_kind,
1616 lsp_symbol_kind::CLASS,
1617 "class_definition must map to SymbolKind::Class (5); got {lsp_kind}"
1618 );
1619 class_kind_found = true;
1620 }
1621 }
1622 }
1623 }
1624
1625 assert!(
1626 class_kind_found,
1627 "Python query must emit def_kind='class_definition' for 'class Foo:' pattern"
1628 );
1629 }
1630
1631 // =========================================================================
1632 // L2 — Go type_alias kind taxonomy fix (I#17b)
1633 // =========================================================================
1634
1635 /// `test:go_type_alias_kind_21`
1636 ///
1637 /// Baseline (RED): `type_alias` (the @def node from Go `type X = Y` patterns)
1638 /// was mapping to TYPE_PARAMETER (26) in the kind match. This matched the
1639 /// previous K2 work which split type_spec into interface_type (→11) and
1640 /// struct_type (→23), but the fallthrough type_alias path still mapped
1641 /// to TYPE_PARAMETER.
1642 ///
1643 /// After fix (GREEN): `type_alias` maps to VARIABLE (21) — a better
1644 /// classification than TypeParameter and semantically closer to an alias.
1645 /// Alternative: could use Constant (14) if the codebase considers aliases
1646 /// as immutable. Variable (21) is used here because:
1647 /// - LSP spec doesn't have a dedicated "Alias" kind
1648 /// - Variable is used in some implementations for type aliases
1649 /// - It provides a type classification separate from pure TypeParameters
1650 /// (which represent generics like `[T]` in function signatures)
1651 #[test]
1652 fn test_go_type_alias_kind_21() {
1653 use streaming_iterator::StreamingIterator as _;
1654
1655 let source = r"package main
1656
1657type Foo = Bar
1658
1659type Reader interface {
1660 Read(p []byte) (n int, err error)
1661}
1662";
1663 let cfg = config_for_extension("go").expect("Go config must compile");
1664 let mut parser = tree_sitter::Parser::new();
1665 parser.set_language(&cfg.language).expect("set language");
1666 let tree = parser.parse(source, None).expect("parse");
1667
1668 let mut cursor = tree_sitter::QueryCursor::new();
1669 let mut matches = cursor.matches(&cfg.query, tree.root_node(), source.as_bytes());
1670
1671 let mut alias_kind_found = false;
1672 while let Some(m) = matches.next() {
1673 let mut name = "";
1674 let mut def_kind = "";
1675 for cap in m.captures {
1676 let cap_name = &cfg.query.capture_names()[cap.index as usize];
1677 let text = &source[cap.node.start_byte()..cap.node.end_byte()];
1678 if *cap_name == "name" {
1679 name = text;
1680 } else if *cap_name == "def" {
1681 def_kind = cap.node.kind();
1682 }
1683 }
1684 if def_kind == "type_alias" && name == "Foo" {
1685 let lsp_kind = lsp_symbol_kind_for_node_kind(def_kind);
1686 assert_eq!(
1687 lsp_kind,
1688 lsp_symbol_kind::VARIABLE,
1689 "type_alias must map to SymbolKind::Variable (13) not TypeParameter (26); got {lsp_kind}"
1690 );
1691 alias_kind_found = true;
1692 }
1693 }
1694
1695 assert!(
1696 alias_kind_found,
1697 "Go query must emit def_kind='type_alias' for 'type Foo = Bar' pattern"
1698 );
1699 }
1700
1701 /// `test:go_type_alias_distinct_from_type_parameter`
1702 ///
1703 /// Verifies that a Go generic type parameter (like `[T any]` in a generic
1704 /// function) gets kind=26 (TypeParameter), while an alias `type Foo = Bar`
1705 /// gets kind=21 (Variable). This documents the distinction: generics stay
1706 /// as TypeParameter, aliases are Variable.
1707 #[test]
1708 fn test_go_type_alias_distinct_from_type_parameter() {
1709 use streaming_iterator::StreamingIterator as _;
1710
1711 let source = r"package main
1712
1713type Foo = Bar
1714
1715func generic[T any](x T) {
1716}
1717";
1718 let cfg = config_for_extension("go").expect("Go config must compile");
1719 let mut parser = tree_sitter::Parser::new();
1720 parser.set_language(&cfg.language).expect("set language");
1721 let tree = parser.parse(source, None).expect("parse");
1722
1723 let mut cursor = tree_sitter::QueryCursor::new();
1724 let mut matches = cursor.matches(&cfg.query, tree.root_node(), source.as_bytes());
1725
1726 let mut alias_found = false;
1727 let mut alias_kind = 0u32;
1728
1729 while let Some(m) = matches.next() {
1730 let mut name = "";
1731 let mut def_kind = "";
1732 for cap in m.captures {
1733 let cap_name = &cfg.query.capture_names()[cap.index as usize];
1734 let text = &source[cap.node.start_byte()..cap.node.end_byte()];
1735 if *cap_name == "name" {
1736 name = text;
1737 } else if *cap_name == "def" {
1738 def_kind = cap.node.kind();
1739 }
1740 }
1741 if def_kind == "type_alias" && name == "Foo" {
1742 alias_kind = lsp_symbol_kind_for_node_kind(def_kind);
1743 alias_found = true;
1744 }
1745 }
1746
1747 assert!(
1748 alias_found,
1749 "Go query must emit 'type Foo = Bar' as type_alias; got none"
1750 );
1751 assert_eq!(
1752 alias_kind,
1753 lsp_symbol_kind::VARIABLE,
1754 "type_alias 'Foo' must be kind=13 (Variable), got {alias_kind}"
1755 );
1756
1757 // Note: This test does NOT check generic type parameters because
1758 // the current Go query does not capture them — it only captures
1759 // top-level definitions. Generic parameters in function signatures
1760 // are part of the function_declaration's syntax but not extracted
1761 // as separate definitions, so they will not appear in the query results.
1762 // This is the intended behavior; generics are not searchable symbols
1763 // in the chunker pipeline.
1764 }
1765}