1use std::sync::{Arc, OnceLock};
8
9use tree_sitter::{Language, Query};
10
11pub struct CallConfig {
16 pub language: Language,
18 pub query: Query,
20}
21
22pub struct LangConfig {
27 pub language: Language,
29 pub query: Query,
31}
32
33#[must_use]
38pub fn config_for_extension(ext: &str) -> Option<Arc<LangConfig>> {
39 static CACHE: OnceLock<std::collections::HashMap<&'static str, Arc<LangConfig>>> =
41 OnceLock::new();
42
43 let cache = CACHE.get_or_init(|| {
44 let mut m = std::collections::HashMap::new();
45 for &ext in &[
47 "rs", "py", "pyi", "js", "jsx", "ts", "tsx", "go", "java", "c", "h", "cpp", "cc",
48 "cxx", "hpp", "sh", "bash", "bats", "rb", "tf", "tfvars", "hcl", "kt", "kts", "swift",
49 "scala", "toml", "json", "yaml", "yml", "md", "xml", "rdf", "owl",
50 ] {
51 if let Some(cfg) = compile_config(ext) {
52 m.insert(ext, Arc::new(cfg));
53 }
54 }
55 m
56 });
57
58 cache.get(ext).cloned()
59}
60
61#[expect(
63 clippy::too_many_lines,
64 reason = "one match arm per language — flat by design"
65)]
66fn compile_config(ext: &str) -> Option<LangConfig> {
67 let (lang, query_str): (Language, &str) = match ext {
68 "rs" => (
72 tree_sitter_rust::LANGUAGE.into(),
73 concat!(
74 "(function_item name: (identifier) @name) @def\n",
75 "(struct_item name: (type_identifier) @name) @def\n",
76 "(enum_item name: (type_identifier) @name) @def\n",
77 "(type_item name: (type_identifier) @name) @def\n",
78 "(field_declaration name: (field_identifier) @name) @def\n",
79 "(enum_variant name: (identifier) @name) @def\n",
80 "(impl_item type: (type_identifier) @name) @def\n",
81 "(trait_item name: (type_identifier) @name) @def\n",
82 "(const_item name: (identifier) @name) @def\n",
83 "(static_item name: (identifier) @name) @def\n",
84 "(mod_item name: (identifier) @name) @def",
85 ),
86 ),
87 "py" | "pyi" => (
90 tree_sitter_python::LANGUAGE.into(),
91 concat!(
92 "(function_definition name: (identifier) @name) @def\n",
93 "(class_definition name: (identifier) @name body: (block) @def)\n",
94 "(assignment left: (identifier) @name) @def",
95 ),
96 ),
97 "js" | "jsx" => (
99 tree_sitter_javascript::LANGUAGE.into(),
100 concat!(
101 "(function_declaration name: (identifier) @name) @def\n",
102 "(method_definition name: (property_identifier) @name) @def\n",
103 "(class_declaration name: (identifier) @name) @def\n",
104 "(variable_declarator name: (identifier) @name) @def",
105 ),
106 ),
107 "ts" => (
108 tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into(),
109 concat!(
110 "(function_declaration name: (identifier) @name) @def\n",
111 "(method_definition name: (property_identifier) @name) @def\n",
112 "(class_declaration name: (type_identifier) @name) @def\n",
113 "(interface_declaration name: (type_identifier) @name) @def\n",
114 "(variable_declarator name: (identifier) @name) @def\n",
115 "(type_alias_declaration name: (type_identifier) @name) @def\n",
116 "(enum_declaration name: (identifier) @name) @def",
117 ),
118 ),
119 "tsx" => (
120 tree_sitter_typescript::LANGUAGE_TSX.into(),
121 concat!(
122 "(function_declaration name: (identifier) @name) @def\n",
123 "(method_definition name: (property_identifier) @name) @def\n",
124 "(class_declaration name: (type_identifier) @name) @def\n",
125 "(interface_declaration name: (type_identifier) @name) @def\n",
126 "(variable_declarator name: (identifier) @name) @def\n",
127 "(type_alias_declaration name: (type_identifier) @name) @def\n",
128 "(enum_declaration name: (identifier) @name) @def",
129 ),
130 ),
131 "go" => (
132 tree_sitter_go::LANGUAGE.into(),
133 concat!(
134 "(function_declaration name: (identifier) @name) @def\n",
135 "(method_declaration name: (field_identifier) @name) @def\n",
136 "(type_declaration (type_spec name: (type_identifier) @name)) @def\n",
137 "(const_spec name: (identifier) @name) @def",
138 ),
139 ),
140 "java" => (
143 tree_sitter_java::LANGUAGE.into(),
144 concat!(
145 "(method_declaration name: (identifier) @name) @def\n",
146 "(class_declaration name: (identifier) @name) @def\n",
147 "(interface_declaration name: (identifier) @name) @def\n",
148 "(field_declaration declarator: (variable_declarator name: (identifier) @name)) @def\n",
149 "(enum_constant name: (identifier) @name) @def\n",
150 "(enum_declaration name: (identifier) @name) @def\n",
151 "(constructor_declaration name: (identifier) @name) @def",
152 ),
153 ),
154 "c" | "h" => (
155 tree_sitter_c::LANGUAGE.into(),
156 concat!(
157 "(function_definition declarator: (function_declarator declarator: (identifier) @name)) @def\n",
158 "(declaration declarator: (init_declarator declarator: (identifier) @name)) @def\n",
159 "(struct_specifier name: (type_identifier) @name) @def\n",
160 "(enum_specifier name: (type_identifier) @name) @def\n",
161 "(type_definition declarator: (type_identifier) @name) @def",
162 ),
163 ),
164 "cpp" | "cc" | "cxx" | "hpp" => (
166 tree_sitter_cpp::LANGUAGE.into(),
167 concat!(
168 "(function_definition declarator: (function_declarator declarator: (identifier) @name)) @def\n",
169 "(class_specifier name: (type_identifier) @name) @def\n",
170 "(declaration declarator: (init_declarator declarator: (identifier) @name)) @def\n",
171 "(struct_specifier name: (type_identifier) @name) @def\n",
172 "(enum_specifier name: (type_identifier) @name) @def\n",
173 "(type_definition declarator: (type_identifier) @name) @def\n",
174 "(namespace_definition name: (namespace_identifier) @name) @def\n",
175 "(field_declaration declarator: (field_identifier) @name) @def",
176 ),
177 ),
178 "sh" | "bash" | "bats" => (
180 tree_sitter_bash::LANGUAGE.into(),
181 concat!(
182 "(function_definition name: (word) @name) @def\n",
183 "(variable_assignment name: (variable_name) @name) @def",
184 ),
185 ),
186 "rb" => (
188 tree_sitter_ruby::LANGUAGE.into(),
189 concat!(
190 "(method name: (identifier) @name) @def\n",
191 "(class name: (constant) @name) @def\n",
192 "(module name: (constant) @name) @def\n",
193 "(assignment left: (identifier) @name) @def\n",
194 "(assignment left: (constant) @name) @def",
195 ),
196 ),
197 "tf" | "tfvars" | "hcl" => (
199 tree_sitter_hcl::LANGUAGE.into(),
200 "(block (identifier) @name) @def",
201 ),
202 "kt" | "kts" => (
204 tree_sitter_kotlin_ng::LANGUAGE.into(),
205 concat!(
206 "(function_declaration name: (identifier) @name) @def\n",
207 "(class_declaration name: (identifier) @name) @def\n",
208 "(object_declaration name: (identifier) @name) @def\n",
209 "(property_declaration (identifier) @name) @def\n",
210 "(enum_entry (identifier) @name) @def",
211 ),
212 ),
213 "swift" => (
215 tree_sitter_swift::LANGUAGE.into(),
216 concat!(
217 "(function_declaration name: (simple_identifier) @name) @def\n",
218 "(class_declaration name: (type_identifier) @name) @def\n",
219 "(protocol_declaration name: (type_identifier) @name) @def\n",
220 "(property_declaration name: (pattern bound_identifier: (simple_identifier) @name)) @def\n",
221 "(typealias_declaration name: (type_identifier) @name) @def",
222 ),
223 ),
224 "scala" => (
226 tree_sitter_scala::LANGUAGE.into(),
227 concat!(
228 "(function_definition name: (identifier) @name) @def\n",
229 "(class_definition name: (identifier) @name) @def\n",
230 "(trait_definition name: (identifier) @name) @def\n",
231 "(object_definition name: (identifier) @name) @def\n",
232 "(val_definition pattern: (identifier) @name) @def\n",
233 "(var_definition pattern: (identifier) @name) @def\n",
234 "(type_definition name: (type_identifier) @name) @def",
235 ),
236 ),
237 "toml" => (
239 tree_sitter_toml_ng::LANGUAGE.into(),
240 concat!(
241 "(table (bare_key) @name) @def\n",
242 "(pair (bare_key) @name) @def",
243 ),
244 ),
245 "json" => (
247 tree_sitter_json::LANGUAGE.into(),
248 "(pair key: (string (string_content) @name)) @def",
249 ),
250 "yaml" | "yml" => (
252 tree_sitter_yaml::LANGUAGE.into(),
253 "(block_mapping_pair key: (flow_node (plain_scalar (string_scalar) @name))) @def",
254 ),
255 "md" => (
257 tree_sitter_md::LANGUAGE.into(),
258 "(atx_heading heading_content: (inline) @name) @def",
259 ),
260 "xml" | "rdf" | "owl" => (
263 tree_sitter_xml::LANGUAGE_XML.into(),
264 concat!(
265 "(element (STag (Name) @name)) @def\n",
266 "(element (EmptyElemTag (Name) @name)) @def",
267 ),
268 ),
269 _ => return None,
270 };
271 let query = match Query::new(&lang, query_str) {
272 Ok(q) => q,
273 Err(e) => {
274 tracing::warn!(ext, %e, "tree-sitter query compilation failed — language may be ABI-incompatible");
275 return None;
276 }
277 };
278 Some(LangConfig {
279 language: lang,
280 query,
281 })
282}
283
284#[must_use]
290pub fn call_query_for_extension(ext: &str) -> Option<Arc<CallConfig>> {
291 static CACHE: OnceLock<std::collections::HashMap<&'static str, Arc<CallConfig>>> =
292 OnceLock::new();
293
294 let cache = CACHE.get_or_init(|| {
295 let mut m = std::collections::HashMap::new();
296 for &ext in &[
299 "rs", "py", "pyi", "js", "jsx", "ts", "tsx", "go", "java", "c", "h", "cpp", "cc",
300 "cxx", "hpp", "sh", "bash", "bats", "rb", "tf", "tfvars", "hcl", "kt", "kts", "swift",
301 "scala",
302 ] {
303 if let Some(cfg) = compile_call_config(ext) {
304 m.insert(ext, Arc::new(cfg));
305 }
306 }
307 m
308 });
309
310 cache.get(ext).cloned()
311}
312
313#[expect(
318 clippy::too_many_lines,
319 reason = "one match arm per language — flat by design"
320)]
321fn compile_call_config(ext: &str) -> Option<CallConfig> {
322 let (lang, query_str): (Language, &str) = match ext {
323 "rs" => (
330 tree_sitter_rust::LANGUAGE.into(),
331 concat!(
332 "(call_expression function: (identifier) @callee) @call\n",
333 "(call_expression function: (field_expression field: (field_identifier) @callee)) @call\n",
334 "(call_expression function: (scoped_identifier) @callee) @call",
335 ),
336 ),
337 "py" | "pyi" => (
339 tree_sitter_python::LANGUAGE.into(),
340 concat!(
341 "(call function: (identifier) @callee) @call\n",
342 "(call function: (attribute attribute: (identifier) @callee)) @call",
343 ),
344 ),
345 "js" | "jsx" => (
347 tree_sitter_javascript::LANGUAGE.into(),
348 concat!(
349 "(call_expression function: (identifier) @callee) @call\n",
350 "(call_expression function: (member_expression property: (property_identifier) @callee)) @call",
351 ),
352 ),
353 "ts" => (
355 tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into(),
356 concat!(
357 "(call_expression function: (identifier) @callee) @call\n",
358 "(call_expression function: (member_expression property: (property_identifier) @callee)) @call",
359 ),
360 ),
361 "tsx" => (
363 tree_sitter_typescript::LANGUAGE_TSX.into(),
364 concat!(
365 "(call_expression function: (identifier) @callee) @call\n",
366 "(call_expression function: (member_expression property: (property_identifier) @callee)) @call",
367 ),
368 ),
369 "go" => (
371 tree_sitter_go::LANGUAGE.into(),
372 concat!(
373 "(call_expression function: (identifier) @callee) @call\n",
374 "(call_expression function: (selector_expression field: (field_identifier) @callee)) @call",
375 ),
376 ),
377 "java" => (
379 tree_sitter_java::LANGUAGE.into(),
380 "(method_invocation name: (identifier) @callee) @call",
381 ),
382 "c" | "h" => (
384 tree_sitter_c::LANGUAGE.into(),
385 concat!(
386 "(call_expression function: (identifier) @callee) @call\n",
387 "(call_expression function: (field_expression field: (field_identifier) @callee)) @call",
388 ),
389 ),
390 "cpp" | "cc" | "cxx" | "hpp" => (
392 tree_sitter_cpp::LANGUAGE.into(),
393 concat!(
394 "(call_expression function: (identifier) @callee) @call\n",
395 "(call_expression function: (field_expression field: (field_identifier) @callee)) @call",
396 ),
397 ),
398 "sh" | "bash" | "bats" => (
400 tree_sitter_bash::LANGUAGE.into(),
401 "(command name: (command_name (word) @callee)) @call",
402 ),
403 "rb" => (
405 tree_sitter_ruby::LANGUAGE.into(),
406 "(call method: (identifier) @callee) @call",
407 ),
408 "tf" | "tfvars" | "hcl" => (
410 tree_sitter_hcl::LANGUAGE.into(),
411 "(function_call (identifier) @callee) @call",
412 ),
413 "kt" | "kts" => (
416 tree_sitter_kotlin_ng::LANGUAGE.into(),
417 "(call_expression (identifier) @callee) @call",
418 ),
419 "swift" => (
421 tree_sitter_swift::LANGUAGE.into(),
422 "(call_expression (simple_identifier) @callee) @call",
423 ),
424 "scala" => (
426 tree_sitter_scala::LANGUAGE.into(),
427 concat!(
428 "(call_expression function: (identifier) @callee) @call\n",
429 "(call_expression function: (field_expression field: (identifier) @callee)) @call",
430 ),
431 ),
432 _ => return None,
433 };
434 let query = match Query::new(&lang, query_str) {
435 Ok(q) => q,
436 Err(e) => {
437 tracing::warn!(ext, %e, "tree-sitter call query compilation failed");
438 return None;
439 }
440 };
441 Some(CallConfig {
442 language: lang,
443 query,
444 })
445}
446
447#[cfg(test)]
448mod tests {
449 use super::*;
450
451 #[test]
452 fn rust_extension_resolves() {
453 assert!(config_for_extension("rs").is_some());
454 }
455
456 #[test]
457 fn python_extension_resolves() {
458 assert!(config_for_extension("py").is_some());
459 }
460
461 #[test]
462 fn python_stub_extension_resolves() {
463 assert!(config_for_extension("pyi").is_some());
464 }
465
466 #[test]
467 fn unknown_extension_returns_none() {
468 assert!(config_for_extension("xyz").is_none());
469 }
470
471 #[test]
472 fn all_supported_extensions() {
473 let exts = [
474 "rs", "py", "pyi", "js", "jsx", "ts", "tsx", "go", "java", "c", "h", "cpp", "cc",
475 "cxx", "hpp", "sh", "bash", "bats", "rb", "tf", "tfvars", "hcl", "kt", "kts", "swift",
476 "scala", "toml", "json", "yaml", "yml", "md", "xml", "rdf", "owl",
477 ];
478 for ext in &exts {
479 assert!(config_for_extension(ext).is_some(), "failed for {ext}");
480 }
481 }
482
483 #[test]
484 fn turtle_family_uses_rdf_text_chunking_not_tree_sitter() {
485 for ext in ["ttl", "nt", "n3", "trig", "nq"] {
486 assert!(
487 config_for_extension(ext).is_none(),
488 "{ext} should be handled by RDF text chunking"
489 );
490 assert!(crate::chunk::is_rdf_text_extension(ext));
491 }
492 }
493
494 #[test]
495 fn all_call_query_extensions() {
496 let exts = [
497 "rs", "py", "pyi", "js", "jsx", "ts", "tsx", "go", "java", "c", "h", "cpp", "cc",
498 "cxx", "hpp", "sh", "bash", "bats", "rb", "tf", "tfvars", "hcl", "kt", "kts", "swift",
499 "scala",
500 ];
501 for ext in &exts {
502 assert!(
503 call_query_for_extension(ext).is_some(),
504 "call query failed for {ext}"
505 );
506 }
507 }
508
509 #[test]
510 fn toml_has_no_call_query() {
511 assert!(call_query_for_extension("toml").is_none());
512 }
513
514 #[test]
519 fn test_scoped_identifier_call_query_captures_full_path() {
520 use streaming_iterator::StreamingIterator as _;
521
522 let source = "
523fn caller() {
524 mod_a::foo();
525 std::io::stderr();
526}
527";
528 let call_cfg = call_query_for_extension("rs").expect("rs call config");
529 let mut parser = tree_sitter::Parser::new();
530 parser
531 .set_language(&call_cfg.language)
532 .expect("set language");
533 let tree = parser.parse(source, None).expect("parse");
534
535 let mut cursor = tree_sitter::QueryCursor::new();
536 let mut matches = cursor.matches(&call_cfg.query, tree.root_node(), source.as_bytes());
537
538 let mut callees: Vec<String> = Vec::new();
539 while let Some(m) = matches.next() {
540 for cap in m.captures {
541 let name = &call_cfg.query.capture_names()[cap.index as usize];
542 if *name == "callee" {
543 let text = &source[cap.node.start_byte()..cap.node.end_byte()];
544 callees.push(text.to_string());
545 }
546 }
547 }
548
549 assert!(
551 callees.contains(&"mod_a::foo".to_string()),
552 "expected 'mod_a::foo' in callees, got: {callees:?}"
553 );
554 assert!(
556 !callees.contains(&"foo".to_string()),
557 "bare 'foo' must not appear for scoped call; got: {callees:?}"
558 );
559 }
560}