1use crate::languages::get_language_info;
13use crate::types::{
14 CallInfo, ClassInfo, FunctionInfo, ImplTraitInfo, ImportInfo, ReferenceInfo, ReferenceType,
15 SemanticAnalysis,
16};
17use std::cell::RefCell;
18use std::collections::HashMap;
19use std::path::{Path, PathBuf};
20use std::sync::LazyLock;
21use thiserror::Error;
22use tracing::instrument;
23use tree_sitter::{Node, Parser, Query, QueryCursor, StreamingIterator};
24
25#[derive(Debug, Error)]
26#[non_exhaustive]
27pub enum ParserError {
28 #[error("Unsupported language: {0}")]
29 UnsupportedLanguage(String),
30 #[error("Failed to parse file: {0}")]
31 ParseError(String),
32 #[error("Invalid UTF-8 in file")]
33 InvalidUtf8,
34 #[error("Query error: {0}")]
35 QueryError(String),
36}
37
38struct CompiledQueries {
41 element: Query,
42 call: Query,
43 import: Option<Query>,
44 impl_block: Option<Query>,
45 reference: Option<Query>,
46 impl_trait: Option<Query>,
47 defuse: Option<Query>,
48}
49
50#[cfg_attr(coverage_nightly, coverage(off))]
55fn build_compiled_queries(
56 lang_info: &crate::languages::LanguageInfo,
57) -> Result<CompiledQueries, ParserError> {
58 let element = Query::new(&lang_info.language, lang_info.element_query).map_err(|e| {
59 ParserError::QueryError(format!(
60 "Failed to compile element query for {}: {}",
61 lang_info.name, e
62 ))
63 })?;
64
65 let call = Query::new(&lang_info.language, lang_info.call_query).map_err(|e| {
66 ParserError::QueryError(format!(
67 "Failed to compile call query for {}: {}",
68 lang_info.name, e
69 ))
70 })?;
71
72 let import = if let Some(import_query_str) = lang_info.import_query {
73 Some(
74 Query::new(&lang_info.language, import_query_str).map_err(|e| {
75 ParserError::QueryError(format!(
76 "Failed to compile import query for {}: {}",
77 lang_info.name, e
78 ))
79 })?,
80 )
81 } else {
82 None
83 };
84
85 let impl_block = if let Some(impl_query_str) = lang_info.impl_query {
86 Some(
87 Query::new(&lang_info.language, impl_query_str).map_err(|e| {
88 ParserError::QueryError(format!(
89 "Failed to compile impl query for {}: {}",
90 lang_info.name, e
91 ))
92 })?,
93 )
94 } else {
95 None
96 };
97
98 let reference = if let Some(ref_query_str) = lang_info.reference_query {
99 Some(Query::new(&lang_info.language, ref_query_str).map_err(|e| {
100 ParserError::QueryError(format!(
101 "Failed to compile reference query for {}: {}",
102 lang_info.name, e
103 ))
104 })?)
105 } else {
106 None
107 };
108
109 let impl_trait = if let Some(impl_trait_query_str) = lang_info.impl_trait_query {
110 Some(
111 Query::new(&lang_info.language, impl_trait_query_str).map_err(|e| {
112 ParserError::QueryError(format!(
113 "Failed to compile impl_trait query for {}: {}",
114 lang_info.name, e
115 ))
116 })?,
117 )
118 } else {
119 None
120 };
121
122 let defuse = if let Some(defuse_query_str) = lang_info.defuse_query {
123 Some(
124 Query::new(&lang_info.language, defuse_query_str).map_err(|e| {
125 ParserError::QueryError(format!(
126 "Failed to compile defuse query for {}: {}",
127 lang_info.name, e
128 ))
129 })?,
130 )
131 } else {
132 None
133 };
134
135 Ok(CompiledQueries {
136 element,
137 call,
138 import,
139 impl_block,
140 reference,
141 impl_trait,
142 defuse,
143 })
144}
145
146#[cfg_attr(coverage_nightly, coverage(off))]
151fn init_query_cache() -> HashMap<&'static str, CompiledQueries> {
152 let mut cache = HashMap::new();
153
154 for lang_name in crate::lang::supported_languages() {
155 if let Some(lang_info) = get_language_info(lang_name) {
156 match build_compiled_queries(&lang_info) {
157 Ok(compiled) => {
158 cache.insert(*lang_name, compiled);
159 }
160 Err(e) => {
161 tracing::error!(
162 "Failed to compile queries for language {}: {}",
163 lang_name,
164 e
165 );
166 }
167 }
168 }
169 }
170
171 cache
172}
173
174static QUERY_CACHE: LazyLock<HashMap<&'static str, CompiledQueries>> =
176 LazyLock::new(init_query_cache);
177
178fn get_compiled_queries(language: &str) -> Result<&'static CompiledQueries, ParserError> {
180 QUERY_CACHE
181 .get(language)
182 .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))
183}
184
185thread_local! {
186 static PARSER: RefCell<Parser> = RefCell::new(Parser::new());
187}
188
189pub struct ElementExtractor;
191
192impl ElementExtractor {
193 #[instrument(skip_all, fields(language))]
201 pub fn extract_with_depth(source: &str, language: &str) -> Result<(usize, usize), ParserError> {
202 let lang_info = get_language_info(language)
203 .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))?;
204
205 let tree = PARSER.with(|p| {
206 let mut parser = p.borrow_mut();
207 parser
208 .set_language(&lang_info.language)
209 .map_err(|e| ParserError::ParseError(format!("Failed to set language: {e}")))?;
210 parser
211 .parse(source, None)
212 .ok_or_else(|| ParserError::ParseError("Failed to parse".to_string()))
213 })?;
214
215 let compiled = get_compiled_queries(language)?;
216
217 let mut cursor = QueryCursor::new();
218 let mut function_count = 0;
219 let mut class_count = 0;
220
221 let mut matches = cursor.matches(&compiled.element, tree.root_node(), source.as_bytes());
222 while let Some(mat) = matches.next() {
223 for capture in mat.captures {
224 let capture_name = compiled.element.capture_names()[capture.index as usize];
225 match capture_name {
226 "function" => function_count += 1,
227 "class" => class_count += 1,
228 _ => {}
229 }
230 }
231 }
232
233 tracing::debug!(language = %language, functions = function_count, classes = class_count, "parse complete");
234
235 Ok((function_count, class_count))
236 }
237}
238
239#[allow(clippy::too_many_lines)] fn extract_imports_from_node(
244 node: &Node,
245 source: &str,
246 prefix: &str,
247 line: usize,
248 imports: &mut Vec<ImportInfo>,
249) {
250 match node.kind() {
251 "identifier" | "self" | "super" | "crate" => {
253 let name = source[node.start_byte()..node.end_byte()].to_string();
254 imports.push(ImportInfo {
255 module: prefix.to_string(),
256 items: vec![name],
257 line,
258 });
259 }
260 "scoped_identifier" => {
262 let item = node
263 .child_by_field_name("name")
264 .map(|n| source[n.start_byte()..n.end_byte()].to_string())
265 .unwrap_or_default();
266 let module = node.child_by_field_name("path").map_or_else(
267 || prefix.to_string(),
268 |p| {
269 let path_text = source[p.start_byte()..p.end_byte()].to_string();
270 if prefix.is_empty() {
271 path_text
272 } else {
273 format!("{prefix}::{path_text}")
274 }
275 },
276 );
277 if !item.is_empty() {
278 imports.push(ImportInfo {
279 module,
280 items: vec![item],
281 line,
282 });
283 }
284 }
285 "scoped_use_list" => {
287 let new_prefix = node.child_by_field_name("path").map_or_else(
288 || prefix.to_string(),
289 |p| {
290 let path_text = source[p.start_byte()..p.end_byte()].to_string();
291 if prefix.is_empty() {
292 path_text
293 } else {
294 format!("{prefix}::{path_text}")
295 }
296 },
297 );
298 if let Some(list) = node.child_by_field_name("list") {
299 extract_imports_from_node(&list, source, &new_prefix, line, imports);
300 }
301 }
302 "use_list" => {
304 let mut cursor = node.walk();
305 for child in node.children(&mut cursor) {
306 match child.kind() {
307 "{" | "}" | "," => {}
308 _ => extract_imports_from_node(&child, source, prefix, line, imports),
309 }
310 }
311 }
312 "use_wildcard" => {
314 let text = source[node.start_byte()..node.end_byte()].to_string();
315 let module = if let Some(stripped) = text.strip_suffix("::*") {
316 if prefix.is_empty() {
317 stripped.to_string()
318 } else {
319 format!("{prefix}::{stripped}")
320 }
321 } else {
322 prefix.to_string()
323 };
324 imports.push(ImportInfo {
325 module,
326 items: vec!["*".to_string()],
327 line,
328 });
329 }
330 "use_as_clause" => {
332 let alias = node
333 .child_by_field_name("alias")
334 .map(|n| source[n.start_byte()..n.end_byte()].to_string())
335 .unwrap_or_default();
336 let module = if let Some(path_node) = node.child_by_field_name("path") {
337 match path_node.kind() {
338 "scoped_identifier" => path_node.child_by_field_name("path").map_or_else(
339 || prefix.to_string(),
340 |p| {
341 let p_text = source[p.start_byte()..p.end_byte()].to_string();
342 if prefix.is_empty() {
343 p_text
344 } else {
345 format!("{prefix}::{p_text}")
346 }
347 },
348 ),
349 _ => prefix.to_string(),
350 }
351 } else {
352 prefix.to_string()
353 };
354 if !alias.is_empty() {
355 imports.push(ImportInfo {
356 module,
357 items: vec![alias],
358 line,
359 });
360 }
361 }
362 "import_from_statement" => {
364 extract_python_import_from(node, source, line, imports);
365 }
366 _ => {
368 let text = source[node.start_byte()..node.end_byte()]
369 .trim()
370 .to_string();
371 if !text.is_empty() {
372 imports.push(ImportInfo {
373 module: text,
374 items: vec![],
375 line,
376 });
377 }
378 }
379 }
380}
381
382fn extract_import_item_name(child: &Node, source: &str) -> Option<String> {
384 match child.kind() {
385 "dotted_name" => {
386 let name = source[child.start_byte()..child.end_byte()]
387 .trim()
388 .to_string();
389 if name.is_empty() { None } else { Some(name) }
390 }
391 "aliased_import" => child.child_by_field_name("name").and_then(|n| {
392 let name = source[n.start_byte()..n.end_byte()].trim().to_string();
393 if name.is_empty() { None } else { Some(name) }
394 }),
395 _ => None,
396 }
397}
398
399fn collect_import_items(
401 node: &Node,
402 source: &str,
403 is_wildcard: &mut bool,
404 items: &mut Vec<String>,
405) {
406 if let Some(import_list) = node.child_by_field_name("import_list") {
408 let mut cursor = import_list.walk();
409 for child in import_list.named_children(&mut cursor) {
410 if child.kind() == "wildcard_import" {
411 *is_wildcard = true;
412 } else if let Some(name) = extract_import_item_name(&child, source) {
413 items.push(name);
414 }
415 }
416 return;
417 }
418 let mut cursor = node.walk();
420 let mut first = true;
421 for child in node.named_children(&mut cursor) {
422 if first {
423 first = false;
424 continue;
425 }
426 if child.kind() == "wildcard_import" {
427 *is_wildcard = true;
428 } else if let Some(name) = extract_import_item_name(&child, source) {
429 items.push(name);
430 }
431 }
432}
433
434fn extract_python_import_from(
436 node: &Node,
437 source: &str,
438 line: usize,
439 imports: &mut Vec<ImportInfo>,
440) {
441 let module = if let Some(m) = node.child_by_field_name("module_name") {
442 source[m.start_byte()..m.end_byte()].trim().to_string()
443 } else if let Some(r) = node.child_by_field_name("relative_import") {
444 source[r.start_byte()..r.end_byte()].trim().to_string()
445 } else {
446 String::new()
447 };
448
449 let mut is_wildcard = false;
450 let mut items = Vec::new();
451 collect_import_items(node, source, &mut is_wildcard, &mut items);
452
453 if !module.is_empty() {
454 imports.push(ImportInfo {
455 module,
456 items: if is_wildcard {
457 vec!["*".to_string()]
458 } else {
459 items
460 },
461 line,
462 });
463 }
464}
465
466pub struct SemanticExtractor;
467
468impl SemanticExtractor {
469 #[instrument(skip_all, fields(language))]
477 pub fn extract(
478 source: &str,
479 language: &str,
480 ast_recursion_limit: Option<usize>,
481 ) -> Result<SemanticAnalysis, ParserError> {
482 let lang_info = get_language_info(language)
483 .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))?;
484
485 let tree = PARSER.with(|p| {
486 let mut parser = p.borrow_mut();
487 parser
488 .set_language(&lang_info.language)
489 .map_err(|e| ParserError::ParseError(format!("Failed to set language: {e}")))?;
490 parser
491 .parse(source, None)
492 .ok_or_else(|| ParserError::ParseError("Failed to parse".to_string()))
493 })?;
494
495 let max_depth: Option<u32> = ast_recursion_limit
498 .filter(|&limit| limit > 0)
499 .map(|limit| {
500 u32::try_from(limit).map_err(|_| {
501 ParserError::ParseError(format!(
502 "ast_recursion_limit {} exceeds maximum supported value {}",
503 limit,
504 u32::MAX
505 ))
506 })
507 })
508 .transpose()?;
509
510 let compiled = get_compiled_queries(language)?;
511 let root = tree.root_node();
512
513 let mut functions = Vec::new();
514 let mut classes = Vec::new();
515 let mut imports = Vec::new();
516 let mut references = Vec::new();
517 let mut call_frequency = HashMap::new();
518 let mut calls = Vec::new();
519
520 Self::extract_elements(
521 source,
522 compiled,
523 root,
524 max_depth,
525 &lang_info,
526 &mut functions,
527 &mut classes,
528 );
529 Self::extract_calls(
530 source,
531 compiled,
532 root,
533 max_depth,
534 &mut calls,
535 &mut call_frequency,
536 );
537 Self::extract_imports(source, compiled, root, max_depth, &mut imports);
538 Self::extract_impl_methods(source, compiled, root, max_depth, &mut classes);
539 Self::extract_references(source, compiled, root, max_depth, &mut references);
540
541 let impl_traits = if language == "rust" {
543 Self::extract_impl_traits_from_tree(source, compiled, root)
544 } else {
545 vec![]
546 };
547
548 tracing::debug!(language = %language, functions = functions.len(), classes = classes.len(), imports = imports.len(), references = references.len(), calls = calls.len(), impl_traits = impl_traits.len(), "extraction complete");
549
550 Ok(SemanticAnalysis {
551 functions,
552 classes,
553 imports,
554 references,
555 call_frequency,
556 calls,
557 impl_traits,
558 def_use_sites: Vec::new(),
559 })
560 }
561
562 fn extract_elements(
563 source: &str,
564 compiled: &CompiledQueries,
565 root: Node<'_>,
566 max_depth: Option<u32>,
567 lang_info: &crate::languages::LanguageInfo,
568 functions: &mut Vec<FunctionInfo>,
569 classes: &mut Vec<ClassInfo>,
570 ) {
571 let mut cursor = QueryCursor::new();
572 if let Some(depth) = max_depth {
573 cursor.set_max_start_depth(Some(depth));
574 }
575 let mut matches = cursor.matches(&compiled.element, root, source.as_bytes());
576 let mut seen_functions = std::collections::HashSet::new();
577
578 while let Some(mat) = matches.next() {
579 let mut func_node: Option<Node> = None;
580 let mut func_name_text: Option<String> = None;
581 let mut class_node: Option<Node> = None;
582 let mut class_name_text: Option<String> = None;
583
584 for capture in mat.captures {
585 let capture_name = compiled.element.capture_names()[capture.index as usize];
586 let node = capture.node;
587 match capture_name {
588 "function" => func_node = Some(node),
589 "func_name" | "method_name" => {
590 func_name_text =
591 Some(source[node.start_byte()..node.end_byte()].to_string());
592 }
593 "class" => class_node = Some(node),
594 "class_name" | "type_name" => {
595 class_name_text =
596 Some(source[node.start_byte()..node.end_byte()].to_string());
597 }
598 _ => {}
599 }
600 }
601
602 if let Some(func_node) = func_node {
603 let parent_is_template = func_node
607 .parent()
608 .map(|p| p.kind() == "template_declaration")
609 .unwrap_or(false);
610 if func_node.kind() == "function_definition" && parent_is_template {
611 } else {
613 let func_def = if func_node.kind() == "template_declaration" {
616 let mut cursor = func_node.walk();
617 func_node
618 .children(&mut cursor)
619 .find(|n| n.kind() == "function_definition")
620 .unwrap_or(func_node)
621 } else {
622 func_node
623 };
624
625 let name = func_name_text
626 .or_else(|| {
627 func_def
628 .child_by_field_name("name")
629 .map(|n| source[n.start_byte()..n.end_byte()].to_string())
630 })
631 .unwrap_or_default();
632
633 let func_key = (name.clone(), func_node.start_position().row);
634 if !name.is_empty() && seen_functions.insert(func_key) {
635 let params = func_def
638 .child_by_field_name("declarator")
639 .and_then(|d| d.child_by_field_name("parameters"))
640 .or_else(|| func_def.child_by_field_name("parameters"))
641 .map(|p| source[p.start_byte()..p.end_byte()].to_string())
642 .unwrap_or_default();
643
644 let return_type = func_def
647 .child_by_field_name("type")
648 .or_else(|| func_def.child_by_field_name("return_type"))
649 .map(|r| source[r.start_byte()..r.end_byte()].to_string());
650
651 functions.push(FunctionInfo {
652 name,
653 line: func_node.start_position().row + 1,
654 end_line: func_node.end_position().row + 1,
655 parameters: if params.is_empty() {
656 Vec::new()
657 } else {
658 vec![params]
659 },
660 return_type,
661 });
662 }
663 }
664 }
665
666 if let Some(class_node) = class_node {
667 let name = class_name_text
668 .or_else(|| {
669 class_node
670 .child_by_field_name("name")
671 .map(|n| source[n.start_byte()..n.end_byte()].to_string())
672 })
673 .unwrap_or_default();
674
675 if !name.is_empty() {
676 let inherits = if let Some(handler) = lang_info.extract_inheritance {
677 handler(&class_node, source)
678 } else {
679 Vec::new()
680 };
681 classes.push(ClassInfo {
682 name,
683 line: class_node.start_position().row + 1,
684 end_line: class_node.end_position().row + 1,
685 methods: Vec::new(),
686 fields: Vec::new(),
687 inherits,
688 });
689 }
690 }
691 }
692 }
693
694 fn enclosing_function_name(mut node: tree_sitter::Node<'_>, source: &str) -> Option<String> {
697 let mut depth = 0u32;
698 while let Some(parent) = node.parent() {
699 depth += 1;
700 if depth > 64 {
704 return None;
705 }
706 let name_node = match parent.kind() {
707 "function_item"
709 | "method_item"
710 | "function_definition"
711 | "function_declaration"
712 | "method_declaration"
713 | "method_definition" => parent.child_by_field_name("name"),
714 "subroutine" => {
716 let mut cursor = parent.walk();
717 parent
718 .children(&mut cursor)
719 .find(|c| c.kind() == "subroutine_statement")
720 .and_then(|s| s.child_by_field_name("name"))
721 }
722 "function" => {
724 let mut cursor = parent.walk();
725 parent
726 .children(&mut cursor)
727 .find(|c| c.kind() == "function_statement")
728 .and_then(|s| s.child_by_field_name("name"))
729 }
730 _ => {
731 node = parent;
732 continue;
733 }
734 };
735 return name_node.map(|n| source[n.start_byte()..n.end_byte()].to_string());
736 }
737 None
741 }
742
743 fn extract_calls(
744 source: &str,
745 compiled: &CompiledQueries,
746 root: Node<'_>,
747 max_depth: Option<u32>,
748 calls: &mut Vec<CallInfo>,
749 call_frequency: &mut HashMap<String, usize>,
750 ) {
751 let mut cursor = QueryCursor::new();
752 if let Some(depth) = max_depth {
753 cursor.set_max_start_depth(Some(depth));
754 }
755 let mut matches = cursor.matches(&compiled.call, root, source.as_bytes());
756
757 while let Some(mat) = matches.next() {
758 for capture in mat.captures {
759 let capture_name = compiled.call.capture_names()[capture.index as usize];
760 if capture_name != "call" {
761 continue;
762 }
763 let node = capture.node;
764 let call_name = source[node.start_byte()..node.end_byte()].to_string();
765 *call_frequency.entry(call_name.clone()).or_insert(0) += 1;
766
767 let caller = Self::enclosing_function_name(node, source)
768 .unwrap_or_else(|| "<module>".to_string());
769
770 let mut arg_count = None;
771 let mut arg_node = node;
772 let mut hop = 0u32;
773 let mut cap_hit = false;
774 while let Some(parent) = arg_node.parent() {
775 hop += 1;
776 if hop > 16 {
782 cap_hit = true;
783 break;
784 }
785 if parent.kind() == "call_expression" {
786 if let Some(args) = parent.child_by_field_name("arguments") {
787 arg_count = Some(args.named_child_count());
788 }
789 break;
790 }
791 arg_node = parent;
792 }
793 debug_assert!(
794 !cap_hit,
795 "extract_calls: parent traversal cap reached (hop > 16)"
796 );
797
798 calls.push(CallInfo {
799 caller,
800 callee: call_name,
801 line: node.start_position().row + 1,
802 column: node.start_position().column,
803 arg_count,
804 });
805 }
806 }
807 }
808
809 fn extract_imports(
810 source: &str,
811 compiled: &CompiledQueries,
812 root: Node<'_>,
813 max_depth: Option<u32>,
814 imports: &mut Vec<ImportInfo>,
815 ) {
816 let Some(ref import_query) = compiled.import else {
817 return;
818 };
819 let mut cursor = QueryCursor::new();
820 if let Some(depth) = max_depth {
821 cursor.set_max_start_depth(Some(depth));
822 }
823 let mut matches = cursor.matches(import_query, root, source.as_bytes());
824
825 while let Some(mat) = matches.next() {
826 for capture in mat.captures {
827 let capture_name = import_query.capture_names()[capture.index as usize];
828 if capture_name == "import_path" {
829 let node = capture.node;
830 let line = node.start_position().row + 1;
831 extract_imports_from_node(&node, source, "", line, imports);
832 }
833 }
834 }
835 }
836
837 fn extract_impl_methods(
838 source: &str,
839 compiled: &CompiledQueries,
840 root: Node<'_>,
841 max_depth: Option<u32>,
842 classes: &mut [ClassInfo],
843 ) {
844 let Some(ref impl_query) = compiled.impl_block else {
845 return;
846 };
847 let mut cursor = QueryCursor::new();
848 if let Some(depth) = max_depth {
849 cursor.set_max_start_depth(Some(depth));
850 }
851 let mut matches = cursor.matches(impl_query, root, source.as_bytes());
852
853 while let Some(mat) = matches.next() {
854 let mut impl_type_name = String::new();
855 let mut method_name = String::new();
856 let mut method_line = 0usize;
857 let mut method_end_line = 0usize;
858 let mut method_params = String::new();
859 let mut method_return_type: Option<String> = None;
860
861 for capture in mat.captures {
862 let capture_name = impl_query.capture_names()[capture.index as usize];
863 let node = capture.node;
864 match capture_name {
865 "impl_type" => {
866 impl_type_name = source[node.start_byte()..node.end_byte()].to_string();
867 }
868 "method_name" => {
869 method_name = source[node.start_byte()..node.end_byte()].to_string();
870 }
871 "method_params" => {
872 method_params = source[node.start_byte()..node.end_byte()].to_string();
873 }
874 "method" => {
875 method_line = node.start_position().row + 1;
876 method_end_line = node.end_position().row + 1;
877 method_return_type = node
878 .child_by_field_name("return_type")
879 .map(|r| source[r.start_byte()..r.end_byte()].to_string());
880 }
881 _ => {}
882 }
883 }
884
885 if !impl_type_name.is_empty() && !method_name.is_empty() {
886 let func = FunctionInfo {
887 name: method_name,
888 line: method_line,
889 end_line: method_end_line,
890 parameters: if method_params.is_empty() {
891 Vec::new()
892 } else {
893 vec![method_params]
894 },
895 return_type: method_return_type,
896 };
897 if let Some(class) = classes.iter_mut().find(|c| c.name == impl_type_name) {
898 class.methods.push(func);
899 }
900 }
901 }
902 }
903
904 fn extract_references(
905 source: &str,
906 compiled: &CompiledQueries,
907 root: Node<'_>,
908 max_depth: Option<u32>,
909 references: &mut Vec<ReferenceInfo>,
910 ) {
911 let Some(ref ref_query) = compiled.reference else {
912 return;
913 };
914 let mut cursor = QueryCursor::new();
915 if let Some(depth) = max_depth {
916 cursor.set_max_start_depth(Some(depth));
917 }
918 let mut seen_refs = std::collections::HashSet::new();
919 let mut matches = cursor.matches(ref_query, root, source.as_bytes());
920
921 while let Some(mat) = matches.next() {
922 for capture in mat.captures {
923 let capture_name = ref_query.capture_names()[capture.index as usize];
924 if capture_name == "type_ref" {
925 let node = capture.node;
926 let type_ref = source[node.start_byte()..node.end_byte()].to_string();
927 if seen_refs.insert(type_ref.clone()) {
928 references.push(ReferenceInfo {
929 symbol: type_ref,
930 reference_type: ReferenceType::Usage,
931 location: String::new(),
933 line: node.start_position().row + 1,
934 });
935 }
936 }
937 }
938 }
939 }
940
941 fn extract_impl_traits_from_tree(
946 source: &str,
947 compiled: &CompiledQueries,
948 root: Node<'_>,
949 ) -> Vec<ImplTraitInfo> {
950 let Some(query) = &compiled.impl_trait else {
951 return vec![];
952 };
953
954 let mut cursor = QueryCursor::new();
955 let mut matches = cursor.matches(query, root, source.as_bytes());
956 let mut results = Vec::new();
957
958 while let Some(mat) = matches.next() {
959 let mut trait_name = String::new();
960 let mut impl_type = String::new();
961 let mut line = 0usize;
962
963 for capture in mat.captures {
964 let capture_name = query.capture_names()[capture.index as usize];
965 let node = capture.node;
966 let text = source[node.start_byte()..node.end_byte()].to_string();
967 match capture_name {
968 "trait_name" => {
969 trait_name = text;
970 line = node.start_position().row + 1;
971 }
972 "impl_type" => {
973 impl_type = text;
974 }
975 _ => {}
976 }
977 }
978
979 if !trait_name.is_empty() && !impl_type.is_empty() {
980 results.push(ImplTraitInfo {
981 trait_name,
982 impl_type,
983 path: PathBuf::new(), line,
985 });
986 }
987 }
988
989 results
990 }
991
992 fn extract_def_use(
1005 source: &str,
1006 compiled: &CompiledQueries,
1007 root: Node<'_>,
1008 symbol_name: &str,
1009 file_path: &str,
1010 max_depth: Option<u32>,
1011 ) -> Vec<crate::types::DefUseSite> {
1012 let Some(ref defuse_query) = compiled.defuse else {
1013 return vec![];
1014 };
1015
1016 let mut cursor = QueryCursor::new();
1017 if let Some(depth) = max_depth {
1018 cursor.set_max_start_depth(Some(depth));
1019 }
1020 let mut matches = cursor.matches(defuse_query, root, source.as_bytes());
1021 let mut sites = Vec::new();
1022 let source_lines: Vec<&str> = source.lines().collect();
1023 let mut write_offsets = std::collections::HashSet::new();
1026
1027 while let Some(mat) = matches.next() {
1028 for capture in mat.captures {
1029 let capture_name = defuse_query.capture_names()[capture.index as usize];
1030 let node = capture.node;
1031 let node_text = node.utf8_text(source.as_bytes()).unwrap_or_default();
1032
1033 if node_text != symbol_name {
1035 continue;
1036 }
1037
1038 let kind = if capture_name.starts_with("write.") {
1040 crate::types::DefUseKind::Write
1041 } else if capture_name.starts_with("read.") {
1042 crate::types::DefUseKind::Read
1043 } else if capture_name.starts_with("writeread.") {
1044 crate::types::DefUseKind::WriteRead
1045 } else {
1046 continue;
1047 };
1048
1049 let byte_offset = node.start_byte();
1050
1051 if kind == crate::types::DefUseKind::Read && write_offsets.contains(&byte_offset) {
1053 continue;
1054 }
1055 if kind != crate::types::DefUseKind::Read {
1056 write_offsets.insert(byte_offset);
1057 }
1058
1059 let line = node.start_position().row + 1;
1062 let snippet = {
1063 let row = node.start_position().row;
1064 let last_line = source_lines.len().saturating_sub(1);
1065 let prev = if row > 0 { row - 1 } else { 0 };
1066 let next = std::cmp::min(row + 1, last_line);
1067 let prev_text = if row == 0 {
1068 ""
1069 } else {
1070 source_lines[prev].trim_end()
1071 };
1072 let cur_text = source_lines[row].trim_end();
1073 let next_text = if row >= last_line {
1074 ""
1075 } else {
1076 source_lines[next].trim_end()
1077 };
1078 format!("{prev_text}\n{cur_text}\n{next_text}")
1079 };
1080
1081 let enclosing_scope = Self::enclosing_function_name(node, source);
1083
1084 let column = node.start_position().column;
1085 sites.push(crate::types::DefUseSite {
1086 kind,
1087 symbol: node_text.to_string(),
1088 file: file_path.to_string(),
1089 line,
1090 column,
1091 snippet,
1092 enclosing_scope,
1093 });
1094 }
1095 }
1096
1097 sites
1098 }
1099
1100 pub(crate) fn extract_def_use_for_file(
1103 source: &str,
1104 language: &str,
1105 symbol: &str,
1106 file_path: &str,
1107 ast_recursion_limit: Option<usize>,
1108 ) -> Vec<crate::types::DefUseSite> {
1109 let Some(lang_info) = crate::languages::get_language_info(language) else {
1110 return vec![];
1111 };
1112 let Ok(compiled) = get_compiled_queries(language) else {
1113 return vec![];
1114 };
1115 if compiled.defuse.is_none() {
1116 return vec![];
1117 }
1118
1119 let tree = match PARSER.with(|p| {
1120 let mut parser = p.borrow_mut();
1121 if parser.set_language(&lang_info.language).is_err() {
1122 return None;
1123 }
1124 parser.parse(source, None)
1125 }) {
1126 Some(t) => t,
1127 None => return vec![],
1128 };
1129
1130 let root = tree.root_node();
1131
1132 let max_depth: Option<u32> = ast_recursion_limit
1135 .filter(|&limit| limit > 0)
1136 .and_then(|limit| u32::try_from(limit).ok());
1137
1138 Self::extract_def_use(source, compiled, root, symbol, file_path, max_depth)
1139 }
1140}
1141
1142#[must_use]
1147pub fn extract_impl_traits(source: &str, path: &Path) -> Vec<ImplTraitInfo> {
1148 let Some(lang_info) = get_language_info("rust") else {
1149 return vec![];
1150 };
1151
1152 let Ok(compiled) = get_compiled_queries("rust") else {
1153 return vec![];
1154 };
1155
1156 let Some(query) = &compiled.impl_trait else {
1157 return vec![];
1158 };
1159
1160 let Some(tree) = PARSER.with(|p| {
1161 let mut parser = p.borrow_mut();
1162 let _ = parser.set_language(&lang_info.language);
1163 parser.parse(source, None)
1164 }) else {
1165 return vec![];
1166 };
1167
1168 let root = tree.root_node();
1169 let mut cursor = QueryCursor::new();
1170 let mut matches = cursor.matches(query, root, source.as_bytes());
1171 let mut results = Vec::new();
1172
1173 while let Some(mat) = matches.next() {
1174 let mut trait_name = String::new();
1175 let mut impl_type = String::new();
1176 let mut line = 0usize;
1177
1178 for capture in mat.captures {
1179 let capture_name = query.capture_names()[capture.index as usize];
1180 let node = capture.node;
1181 let text = source[node.start_byte()..node.end_byte()].to_string();
1182 match capture_name {
1183 "trait_name" => {
1184 trait_name = text;
1185 line = node.start_position().row + 1;
1186 }
1187 "impl_type" => {
1188 impl_type = text;
1189 }
1190 _ => {}
1191 }
1192 }
1193
1194 if !trait_name.is_empty() && !impl_type.is_empty() {
1195 results.push(ImplTraitInfo {
1196 trait_name,
1197 impl_type,
1198 path: path.to_path_buf(),
1199 line,
1200 });
1201 }
1202 }
1203
1204 results
1205}
1206
1207pub fn execute_query_impl(
1211 language: &str,
1212 source: &str,
1213 query_str: &str,
1214) -> Result<Vec<crate::QueryCapture>, ParserError> {
1215 let ts_language = crate::languages::get_ts_language(language)
1217 .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))?;
1218
1219 let mut parser = Parser::new();
1220 parser
1221 .set_language(&ts_language)
1222 .map_err(|e| ParserError::QueryError(e.to_string()))?;
1223
1224 let tree = parser
1225 .parse(source.as_bytes(), None)
1226 .ok_or_else(|| ParserError::QueryError("failed to parse source".to_string()))?;
1227
1228 let query =
1229 Query::new(&ts_language, query_str).map_err(|e| ParserError::QueryError(e.to_string()))?;
1230
1231 let mut cursor = QueryCursor::new();
1232 let source_bytes = source.as_bytes();
1233
1234 let mut captures = Vec::new();
1235 let mut matches = cursor.matches(&query, tree.root_node(), source_bytes);
1236 while let Some(m) = matches.next() {
1237 for cap in m.captures {
1238 let node = cap.node;
1239 let capture_name = query.capture_names()[cap.index as usize].to_string();
1240 let text = node.utf8_text(source_bytes).unwrap_or("").to_string();
1241 captures.push(crate::QueryCapture {
1242 capture_name,
1243 text,
1244 start_line: node.start_position().row,
1245 end_line: node.end_position().row,
1246 start_byte: node.start_byte(),
1247 end_byte: node.end_byte(),
1248 });
1249 }
1250 }
1251 Ok(captures)
1252}
1253
1254#[cfg(all(test, feature = "lang-rust"))]
1256mod tests {
1257 use super::*;
1258 use std::path::Path;
1259
1260 #[test]
1261 fn test_ast_recursion_limit_zero_is_unlimited() {
1262 let source = r#"fn hello() -> u32 { 42 }"#;
1263 let result_none = SemanticExtractor::extract(source, "rust", None);
1264 let result_zero = SemanticExtractor::extract(source, "rust", Some(0));
1265 assert!(result_none.is_ok(), "extract with None failed");
1266 assert!(result_zero.is_ok(), "extract with Some(0) failed");
1267 let analysis_none = result_none.unwrap();
1268 let analysis_zero = result_zero.unwrap();
1269 assert!(
1270 analysis_none.functions.len() >= 1,
1271 "extract with None should find at least one function in the test source"
1272 );
1273 assert_eq!(
1274 analysis_none.functions.len(),
1275 analysis_zero.functions.len(),
1276 "ast_recursion_limit=0 should behave identically to unset (unlimited)"
1277 );
1278 }
1279
1280 #[test]
1281 fn test_rust_use_as_imports() {
1282 let source = "use std::io as stdio;";
1284 let result = SemanticExtractor::extract(source, "rust", None).unwrap();
1286 assert!(
1288 result
1289 .imports
1290 .iter()
1291 .any(|imp| imp.items.iter().any(|i| i == "stdio")),
1292 "expected import alias 'stdio' in {:?}",
1293 result.imports
1294 );
1295 }
1296
1297 #[test]
1298 fn test_rust_use_as_clause_plain_identifier() {
1299 let source = "use io as stdio;";
1302 let result = SemanticExtractor::extract(source, "rust", None).unwrap();
1304 assert!(
1306 result
1307 .imports
1308 .iter()
1309 .any(|imp| imp.items.iter().any(|i| i == "stdio")),
1310 "expected import alias 'stdio' from plain identifier in {:?}",
1311 result.imports
1312 );
1313 }
1314
1315 #[test]
1316 fn test_rust_scoped_use_with_prefix() {
1317 let source = "use std::{io::Read, io::Write};";
1319 let result = SemanticExtractor::extract(source, "rust", None).unwrap();
1321 let items: Vec<String> = result
1323 .imports
1324 .iter()
1325 .filter(|imp| imp.module.starts_with("std::io"))
1326 .flat_map(|imp| imp.items.clone())
1327 .collect();
1328 assert!(
1329 items.contains(&"Read".to_string()) && items.contains(&"Write".to_string()),
1330 "expected 'Read' and 'Write' items under module with std::io, got {:?}",
1331 result.imports
1332 );
1333 }
1334
1335 #[test]
1336 fn test_rust_scoped_use_imports() {
1337 let source = "use std::{fs, io};";
1339 let result = SemanticExtractor::extract(source, "rust", None).unwrap();
1341 let items: Vec<&str> = result
1343 .imports
1344 .iter()
1345 .filter(|imp| imp.module == "std")
1346 .flat_map(|imp| imp.items.iter().map(|s| s.as_str()))
1347 .collect();
1348 assert!(
1349 items.contains(&"fs") && items.contains(&"io"),
1350 "expected 'fs' and 'io' items under module 'std', got {:?}",
1351 items
1352 );
1353 }
1354
1355 #[test]
1356 fn test_rust_wildcard_imports() {
1357 let source = "use std::io::*;";
1359 let result = SemanticExtractor::extract(source, "rust", None).unwrap();
1361 let wildcard = result
1363 .imports
1364 .iter()
1365 .find(|imp| imp.module == "std::io" && imp.items == vec!["*"]);
1366 assert!(
1367 wildcard.is_some(),
1368 "expected wildcard import with module 'std::io', got {:?}",
1369 result.imports
1370 );
1371 }
1372
1373 #[test]
1374 fn test_extract_impl_traits_standalone() {
1375 let source = r#"
1377struct Foo;
1378trait Display {}
1379impl Display for Foo {}
1380"#;
1381 let results = extract_impl_traits(source, Path::new("test.rs"));
1383 assert_eq!(
1385 results.len(),
1386 1,
1387 "expected one impl trait, got {:?}",
1388 results
1389 );
1390 assert_eq!(results[0].trait_name, "Display");
1391 assert_eq!(results[0].impl_type, "Foo");
1392 }
1393
1394 #[cfg(target_pointer_width = "64")]
1395 #[test]
1396 fn test_ast_recursion_limit_overflow() {
1397 let source = "fn foo() {}";
1399 let big_limit = usize::try_from(u32::MAX).unwrap() + 1;
1400 let result = SemanticExtractor::extract(source, "rust", Some(big_limit));
1402 assert!(
1404 matches!(result, Err(ParserError::ParseError(_))),
1405 "expected ParseError for oversized limit, got {:?}",
1406 result
1407 );
1408 }
1409
1410 #[test]
1411 fn test_ast_recursion_limit_some() {
1412 let source = r#"fn hello() -> u32 { 42 }"#;
1414 let result = SemanticExtractor::extract(source, "rust", Some(5));
1416 assert!(result.is_ok(), "extract with Some(5) failed: {:?}", result);
1418 let analysis = result.unwrap();
1419 assert!(
1420 analysis.functions.len() >= 1,
1421 "expected at least one function with depth limit 5"
1422 );
1423 }
1424
1425 #[test]
1426 fn test_extract_def_use_for_file_finds_write_and_read() {
1427 let source = r#"
1429fn main() {
1430 let count = 0;
1431 println!("{}", count);
1432}
1433"#;
1434 let sites = SemanticExtractor::extract_def_use_for_file(
1436 source,
1437 "rust",
1438 "count",
1439 "src/main.rs",
1440 None,
1441 );
1442
1443 assert!(
1445 !sites.is_empty(),
1446 "expected at least one def-use site for 'count'"
1447 );
1448 let has_write = sites
1449 .iter()
1450 .any(|s| s.kind == crate::types::DefUseKind::Write);
1451 let has_read = sites
1452 .iter()
1453 .any(|s| s.kind == crate::types::DefUseKind::Read);
1454 assert!(has_write, "expected a write site for 'count'");
1455 assert!(has_read, "expected a read site for 'count'");
1456 assert_eq!(sites[0].file, "src/main.rs");
1457 }
1458
1459 #[test]
1460 fn test_extract_def_use_for_file_no_match_returns_empty() {
1461 let source = "fn foo() { let x = 1; }";
1463
1464 let sites = SemanticExtractor::extract_def_use_for_file(
1466 source,
1467 "rust",
1468 "nonexistent_symbol",
1469 "src/lib.rs",
1470 None,
1471 );
1472
1473 assert!(sites.is_empty(), "expected empty for nonexistent symbol");
1475 }
1476}
1477
1478#[cfg(all(test, feature = "lang-python"))]
1480mod tests_python {
1481 use super::*;
1482
1483 #[test]
1484 fn test_python_relative_import() {
1485 let source = "from . import foo\n";
1487 let result = SemanticExtractor::extract(source, "python", None).unwrap();
1489 let relative = result.imports.iter().find(|imp| imp.module.contains("."));
1491 assert!(
1492 relative.is_some(),
1493 "expected relative import in {:?}",
1494 result.imports
1495 );
1496 }
1497
1498 #[test]
1499 fn test_python_aliased_import() {
1500 let source = "from os import path as p\n";
1503 let result = SemanticExtractor::extract(source, "python", None).unwrap();
1505 let path_import = result
1507 .imports
1508 .iter()
1509 .find(|imp| imp.module == "os" && imp.items.iter().any(|i| i == "path"));
1510 assert!(
1511 path_import.is_some(),
1512 "expected import 'path' from module 'os' in {:?}",
1513 result.imports
1514 );
1515 }
1516}
1517
1518#[cfg(test)]
1520mod tests_unsupported {
1521 use super::*;
1522
1523 #[test]
1524 fn test_element_extractor_unsupported_language() {
1525 let result = ElementExtractor::extract_with_depth("x = 1", "cobol");
1527 assert!(
1529 matches!(result, Err(ParserError::UnsupportedLanguage(ref lang)) if lang == "cobol"),
1530 "expected UnsupportedLanguage error, got {:?}",
1531 result
1532 );
1533 }
1534
1535 #[test]
1536 fn test_semantic_extractor_unsupported_language() {
1537 let result = SemanticExtractor::extract("x = 1", "cobol", None);
1539 assert!(
1541 matches!(result, Err(ParserError::UnsupportedLanguage(ref lang)) if lang == "cobol"),
1542 "expected UnsupportedLanguage error, got {:?}",
1543 result
1544 );
1545 }
1546}