1use std::sync::{Arc, OnceLock};
8
9use tree_sitter::{Language, Query};
10
11pub struct CallConfig {
16 pub language: Language,
18 pub query: Query,
20}
21
22pub struct LangConfig {
27 pub language: Language,
29 pub query: Query,
31}
32
33#[must_use]
38pub fn config_for_extension(ext: &str) -> Option<Arc<LangConfig>> {
39 static CACHE: OnceLock<std::collections::HashMap<&'static str, Arc<LangConfig>>> =
41 OnceLock::new();
42
43 let cache = CACHE.get_or_init(|| {
44 let mut m = std::collections::HashMap::new();
45 for &ext in &[
47 "rs", "py", "pyi", "js", "jsx", "ts", "tsx", "go", "java", "c", "h", "cpp", "cc",
48 "cxx", "hpp", "sh", "bash", "bats", "rb", "tf", "tfvars", "hcl", "kt", "kts", "swift",
49 "scala", "toml", "json", "yaml", "yml", "md", "xml", "rdf", "owl",
50 ] {
51 if let Some(cfg) = compile_config(ext) {
52 m.insert(ext, Arc::new(cfg));
53 }
54 }
55 m
56 });
57
58 cache.get(ext).cloned()
59}
60
61#[expect(
63 clippy::too_many_lines,
64 reason = "one match arm per language — flat by design"
65)]
66fn compile_config(ext: &str) -> Option<LangConfig> {
67 let (lang, query_str): (Language, &str) = match ext {
68 "rs" => (
72 tree_sitter_rust::LANGUAGE.into(),
73 concat!(
74 "(function_item name: (identifier) @name) @def\n",
75 "(struct_item name: (type_identifier) @name) @def\n",
76 "(enum_item name: (type_identifier) @name) @def\n",
77 "(type_item name: (type_identifier) @name) @def\n",
78 "(field_declaration name: (field_identifier) @name) @def\n",
79 "(enum_variant name: (identifier) @name) @def\n",
80 "(impl_item type: (type_identifier) @name) @def\n",
81 "(trait_item name: (type_identifier) @name) @def\n",
82 "(const_item name: (identifier) @name) @def\n",
83 "(static_item name: (identifier) @name) @def\n",
84 "(mod_item name: (identifier) @name) @def",
85 ),
86 ),
87 "py" | "pyi" => (
90 tree_sitter_python::LANGUAGE.into(),
91 concat!(
92 "(function_definition name: (identifier) @name) @def\n",
93 "(class_definition name: (identifier) @name body: (block) @def)\n",
94 "(assignment left: (identifier) @name) @def",
95 ),
96 ),
97 "js" | "jsx" => (
99 tree_sitter_javascript::LANGUAGE.into(),
100 concat!(
101 "(function_declaration name: (identifier) @name) @def\n",
102 "(method_definition name: (property_identifier) @name) @def\n",
103 "(class_declaration name: (identifier) @name) @def\n",
104 "(variable_declarator name: (identifier) @name) @def",
105 ),
106 ),
107 "ts" => (
108 tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into(),
109 concat!(
110 "(function_declaration name: (identifier) @name) @def\n",
111 "(method_definition name: (property_identifier) @name) @def\n",
112 "(class_declaration name: (type_identifier) @name) @def\n",
113 "(interface_declaration name: (type_identifier) @name) @def\n",
114 "(variable_declarator name: (identifier) @name) @def\n",
115 "(type_alias_declaration name: (type_identifier) @name) @def\n",
116 "(enum_declaration name: (identifier) @name) @def",
117 ),
118 ),
119 "tsx" => (
120 tree_sitter_typescript::LANGUAGE_TSX.into(),
121 concat!(
122 "(function_declaration name: (identifier) @name) @def\n",
123 "(method_definition name: (property_identifier) @name) @def\n",
124 "(class_declaration name: (type_identifier) @name) @def\n",
125 "(interface_declaration name: (type_identifier) @name) @def\n",
126 "(variable_declarator name: (identifier) @name) @def\n",
127 "(type_alias_declaration name: (type_identifier) @name) @def\n",
128 "(enum_declaration name: (identifier) @name) @def",
129 ),
130 ),
131 "go" => (
132 tree_sitter_go::LANGUAGE.into(),
133 concat!(
134 "(function_declaration name: (identifier) @name) @def\n",
135 "(method_declaration name: (field_identifier) @name) @def\n",
136 "(type_declaration (type_spec name: (type_identifier) @name)) @def\n",
137 "(const_spec name: (identifier) @name) @def",
138 ),
139 ),
140 "java" => (
143 tree_sitter_java::LANGUAGE.into(),
144 concat!(
145 "(method_declaration name: (identifier) @name) @def\n",
146 "(class_declaration name: (identifier) @name) @def\n",
147 "(interface_declaration name: (identifier) @name) @def\n",
148 "(field_declaration declarator: (variable_declarator name: (identifier) @name)) @def\n",
149 "(enum_constant name: (identifier) @name) @def\n",
150 "(enum_declaration name: (identifier) @name) @def\n",
151 "(constructor_declaration name: (identifier) @name) @def",
152 ),
153 ),
154 "c" | "h" => (
155 tree_sitter_c::LANGUAGE.into(),
156 concat!(
157 "(function_definition declarator: (function_declarator declarator: (identifier) @name)) @def\n",
158 "(declaration declarator: (init_declarator declarator: (identifier) @name)) @def\n",
159 "(struct_specifier name: (type_identifier) @name) @def\n",
160 "(enum_specifier name: (type_identifier) @name) @def\n",
161 "(type_definition declarator: (type_identifier) @name) @def",
162 ),
163 ),
164 "cpp" | "cc" | "cxx" | "hpp" => (
166 tree_sitter_cpp::LANGUAGE.into(),
167 concat!(
168 "(function_definition declarator: (function_declarator declarator: (identifier) @name)) @def\n",
169 "(class_specifier name: (type_identifier) @name) @def\n",
170 "(declaration declarator: (init_declarator declarator: (identifier) @name)) @def\n",
171 "(struct_specifier name: (type_identifier) @name) @def\n",
172 "(enum_specifier name: (type_identifier) @name) @def\n",
173 "(type_definition declarator: (type_identifier) @name) @def\n",
174 "(namespace_definition name: (namespace_identifier) @name) @def\n",
175 "(field_declaration declarator: (field_identifier) @name) @def",
176 ),
177 ),
178 "sh" | "bash" | "bats" => (
180 tree_sitter_bash::LANGUAGE.into(),
181 concat!(
182 "(function_definition name: (word) @name) @def\n",
183 "(variable_assignment name: (variable_name) @name) @def",
184 ),
185 ),
186 "rb" => (
188 tree_sitter_ruby::LANGUAGE.into(),
189 concat!(
190 "(method name: (identifier) @name) @def\n",
191 "(class name: (constant) @name) @def\n",
192 "(module name: (constant) @name) @def\n",
193 "(assignment left: (identifier) @name) @def\n",
194 "(assignment left: (constant) @name) @def",
195 ),
196 ),
197 "tf" | "tfvars" | "hcl" => (
199 tree_sitter_hcl::LANGUAGE.into(),
200 "(block (identifier) @name) @def",
201 ),
202 "kt" | "kts" => (
204 tree_sitter_kotlin_ng::LANGUAGE.into(),
205 concat!(
206 "(function_declaration name: (identifier) @name) @def\n",
207 "(class_declaration name: (identifier) @name) @def\n",
208 "(object_declaration name: (identifier) @name) @def\n",
209 "(property_declaration (identifier) @name) @def\n",
210 "(enum_entry (identifier) @name) @def",
211 ),
212 ),
213 "swift" => (
215 tree_sitter_swift::LANGUAGE.into(),
216 concat!(
217 "(function_declaration name: (simple_identifier) @name) @def\n",
218 "(class_declaration name: (type_identifier) @name) @def\n",
219 "(protocol_declaration name: (type_identifier) @name) @def\n",
220 "(property_declaration name: (pattern bound_identifier: (simple_identifier) @name)) @def\n",
221 "(typealias_declaration name: (type_identifier) @name) @def",
222 ),
223 ),
224 "scala" => (
226 tree_sitter_scala::LANGUAGE.into(),
227 concat!(
228 "(function_definition name: (identifier) @name) @def\n",
229 "(class_definition name: (identifier) @name) @def\n",
230 "(trait_definition name: (identifier) @name) @def\n",
231 "(object_definition name: (identifier) @name) @def\n",
232 "(val_definition pattern: (identifier) @name) @def\n",
233 "(var_definition pattern: (identifier) @name) @def\n",
234 "(type_definition name: (type_identifier) @name) @def",
235 ),
236 ),
237 "toml" => (
239 tree_sitter_toml_ng::LANGUAGE.into(),
240 concat!(
241 "(table (bare_key) @name) @def\n",
242 "(pair (bare_key) @name) @def",
243 ),
244 ),
245 "json" => (
247 tree_sitter_json::LANGUAGE.into(),
248 "(pair key: (string (string_content) @name)) @def",
249 ),
250 "yaml" | "yml" => (
252 tree_sitter_yaml::LANGUAGE.into(),
253 "(block_mapping_pair key: (flow_node (plain_scalar (string_scalar) @name))) @def",
254 ),
255 "md" => (
257 tree_sitter_md::LANGUAGE.into(),
258 "(atx_heading heading_content: (inline) @name) @def",
259 ),
260 "xml" | "rdf" | "owl" => (
263 tree_sitter_xml::LANGUAGE_XML.into(),
264 concat!(
265 "(element (STag (Name) @name)) @def\n",
266 "(element (EmptyElemTag (Name) @name)) @def",
267 ),
268 ),
269 _ => return None,
270 };
271 let query = match Query::new(&lang, query_str) {
272 Ok(q) => q,
273 Err(e) => {
274 tracing::warn!(ext, %e, "tree-sitter query compilation failed — language may be ABI-incompatible");
275 return None;
276 }
277 };
278 Some(LangConfig {
279 language: lang,
280 query,
281 })
282}
283
284#[must_use]
290pub fn call_query_for_extension(ext: &str) -> Option<Arc<CallConfig>> {
291 static CACHE: OnceLock<std::collections::HashMap<&'static str, Arc<CallConfig>>> =
292 OnceLock::new();
293
294 let cache = CACHE.get_or_init(|| {
295 let mut m = std::collections::HashMap::new();
296 for &ext in &[
299 "rs", "py", "pyi", "js", "jsx", "ts", "tsx", "go", "java", "c", "h", "cpp", "cc",
300 "cxx", "hpp", "sh", "bash", "bats", "rb", "tf", "tfvars", "hcl", "kt", "kts", "swift",
301 "scala",
302 ] {
303 if let Some(cfg) = compile_call_config(ext) {
304 m.insert(ext, Arc::new(cfg));
305 }
306 }
307 m
308 });
309
310 cache.get(ext).cloned()
311}
312
313#[expect(
318 clippy::too_many_lines,
319 reason = "one match arm per language — flat by design"
320)]
321fn compile_call_config(ext: &str) -> Option<CallConfig> {
322 let (lang, query_str): (Language, &str) = match ext {
323 "rs" => (
325 tree_sitter_rust::LANGUAGE.into(),
326 concat!(
327 "(call_expression function: (identifier) @callee) @call\n",
328 "(call_expression function: (field_expression field: (field_identifier) @callee)) @call\n",
329 "(call_expression function: (scoped_identifier name: (identifier) @callee)) @call",
330 ),
331 ),
332 "py" | "pyi" => (
334 tree_sitter_python::LANGUAGE.into(),
335 concat!(
336 "(call function: (identifier) @callee) @call\n",
337 "(call function: (attribute attribute: (identifier) @callee)) @call",
338 ),
339 ),
340 "js" | "jsx" => (
342 tree_sitter_javascript::LANGUAGE.into(),
343 concat!(
344 "(call_expression function: (identifier) @callee) @call\n",
345 "(call_expression function: (member_expression property: (property_identifier) @callee)) @call",
346 ),
347 ),
348 "ts" => (
350 tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into(),
351 concat!(
352 "(call_expression function: (identifier) @callee) @call\n",
353 "(call_expression function: (member_expression property: (property_identifier) @callee)) @call",
354 ),
355 ),
356 "tsx" => (
358 tree_sitter_typescript::LANGUAGE_TSX.into(),
359 concat!(
360 "(call_expression function: (identifier) @callee) @call\n",
361 "(call_expression function: (member_expression property: (property_identifier) @callee)) @call",
362 ),
363 ),
364 "go" => (
366 tree_sitter_go::LANGUAGE.into(),
367 concat!(
368 "(call_expression function: (identifier) @callee) @call\n",
369 "(call_expression function: (selector_expression field: (field_identifier) @callee)) @call",
370 ),
371 ),
372 "java" => (
374 tree_sitter_java::LANGUAGE.into(),
375 "(method_invocation name: (identifier) @callee) @call",
376 ),
377 "c" | "h" => (
379 tree_sitter_c::LANGUAGE.into(),
380 concat!(
381 "(call_expression function: (identifier) @callee) @call\n",
382 "(call_expression function: (field_expression field: (field_identifier) @callee)) @call",
383 ),
384 ),
385 "cpp" | "cc" | "cxx" | "hpp" => (
387 tree_sitter_cpp::LANGUAGE.into(),
388 concat!(
389 "(call_expression function: (identifier) @callee) @call\n",
390 "(call_expression function: (field_expression field: (field_identifier) @callee)) @call",
391 ),
392 ),
393 "sh" | "bash" | "bats" => (
395 tree_sitter_bash::LANGUAGE.into(),
396 "(command name: (command_name (word) @callee)) @call",
397 ),
398 "rb" => (
400 tree_sitter_ruby::LANGUAGE.into(),
401 "(call method: (identifier) @callee) @call",
402 ),
403 "tf" | "tfvars" | "hcl" => (
405 tree_sitter_hcl::LANGUAGE.into(),
406 "(function_call (identifier) @callee) @call",
407 ),
408 "kt" | "kts" => (
411 tree_sitter_kotlin_ng::LANGUAGE.into(),
412 "(call_expression (identifier) @callee) @call",
413 ),
414 "swift" => (
416 tree_sitter_swift::LANGUAGE.into(),
417 "(call_expression (simple_identifier) @callee) @call",
418 ),
419 "scala" => (
421 tree_sitter_scala::LANGUAGE.into(),
422 concat!(
423 "(call_expression function: (identifier) @callee) @call\n",
424 "(call_expression function: (field_expression field: (identifier) @callee)) @call",
425 ),
426 ),
427 _ => return None,
428 };
429 let query = match Query::new(&lang, query_str) {
430 Ok(q) => q,
431 Err(e) => {
432 tracing::warn!(ext, %e, "tree-sitter call query compilation failed");
433 return None;
434 }
435 };
436 Some(CallConfig {
437 language: lang,
438 query,
439 })
440}
441
442#[cfg(test)]
443mod tests {
444 use super::*;
445
446 #[test]
447 fn rust_extension_resolves() {
448 assert!(config_for_extension("rs").is_some());
449 }
450
451 #[test]
452 fn python_extension_resolves() {
453 assert!(config_for_extension("py").is_some());
454 }
455
456 #[test]
457 fn python_stub_extension_resolves() {
458 assert!(config_for_extension("pyi").is_some());
459 }
460
461 #[test]
462 fn unknown_extension_returns_none() {
463 assert!(config_for_extension("xyz").is_none());
464 }
465
466 #[test]
467 fn all_supported_extensions() {
468 let exts = [
469 "rs", "py", "pyi", "js", "jsx", "ts", "tsx", "go", "java", "c", "h", "cpp", "cc",
470 "cxx", "hpp", "sh", "bash", "bats", "rb", "tf", "tfvars", "hcl", "kt", "kts", "swift",
471 "scala", "toml", "json", "yaml", "yml", "md", "xml", "rdf", "owl",
472 ];
473 for ext in &exts {
474 assert!(config_for_extension(ext).is_some(), "failed for {ext}");
475 }
476 }
477
478 #[test]
479 fn turtle_family_uses_rdf_text_chunking_not_tree_sitter() {
480 for ext in ["ttl", "nt", "n3", "trig", "nq"] {
481 assert!(
482 config_for_extension(ext).is_none(),
483 "{ext} should be handled by RDF text chunking"
484 );
485 assert!(crate::chunk::is_rdf_text_extension(ext));
486 }
487 }
488
489 #[test]
490 fn all_call_query_extensions() {
491 let exts = [
492 "rs", "py", "pyi", "js", "jsx", "ts", "tsx", "go", "java", "c", "h", "cpp", "cc",
493 "cxx", "hpp", "sh", "bash", "bats", "rb", "tf", "tfvars", "hcl", "kt", "kts", "swift",
494 "scala",
495 ];
496 for ext in &exts {
497 assert!(
498 call_query_for_extension(ext).is_some(),
499 "call query failed for {ext}"
500 );
501 }
502 }
503
504 #[test]
505 fn toml_has_no_call_query() {
506 assert!(call_query_for_extension("toml").is_none());
507 }
508}