1use crate::languages::get_language_info;
13use crate::types::{
14 CallInfo, ClassInfo, FunctionInfo, ImplTraitInfo, ImportInfo, ReferenceInfo, ReferenceType,
15 SemanticAnalysis,
16};
17use std::cell::RefCell;
18use std::collections::HashMap;
19use std::path::{Path, PathBuf};
20use std::sync::LazyLock;
21use thiserror::Error;
22use tracing::instrument;
23use tree_sitter::{Node, Parser, Query, QueryCursor, StreamingIterator};
24
25#[derive(Debug, Error)]
26#[non_exhaustive]
27pub enum ParserError {
28 #[error("Unsupported language: {0}")]
29 UnsupportedLanguage(String),
30 #[error("Failed to parse file: {0}")]
31 ParseError(String),
32 #[error("Invalid UTF-8 in file")]
33 InvalidUtf8,
34 #[error("Query error: {0}")]
35 QueryError(String),
36}
37
38struct CompiledQueries {
41 element: Query,
42 call: Query,
43 import: Option<Query>,
44 impl_block: Option<Query>,
45 reference: Option<Query>,
46 impl_trait: Option<Query>,
47 defuse: Option<Query>,
48}
49
50#[cfg_attr(coverage_nightly, coverage(off))]
55fn build_compiled_queries(
56 lang_info: &crate::languages::LanguageInfo,
57) -> Result<CompiledQueries, ParserError> {
58 let element = Query::new(&lang_info.language, lang_info.element_query).map_err(|e| {
59 ParserError::QueryError(format!(
60 "Failed to compile element query for {}: {}",
61 lang_info.name, e
62 ))
63 })?;
64
65 let call = Query::new(&lang_info.language, lang_info.call_query).map_err(|e| {
66 ParserError::QueryError(format!(
67 "Failed to compile call query for {}: {}",
68 lang_info.name, e
69 ))
70 })?;
71
72 let import = if let Some(import_query_str) = lang_info.import_query {
73 Some(
74 Query::new(&lang_info.language, import_query_str).map_err(|e| {
75 ParserError::QueryError(format!(
76 "Failed to compile import query for {}: {}",
77 lang_info.name, e
78 ))
79 })?,
80 )
81 } else {
82 None
83 };
84
85 let impl_block = if let Some(impl_query_str) = lang_info.impl_query {
86 Some(
87 Query::new(&lang_info.language, impl_query_str).map_err(|e| {
88 ParserError::QueryError(format!(
89 "Failed to compile impl query for {}: {}",
90 lang_info.name, e
91 ))
92 })?,
93 )
94 } else {
95 None
96 };
97
98 let reference = if let Some(ref_query_str) = lang_info.reference_query {
99 Some(Query::new(&lang_info.language, ref_query_str).map_err(|e| {
100 ParserError::QueryError(format!(
101 "Failed to compile reference query for {}: {}",
102 lang_info.name, e
103 ))
104 })?)
105 } else {
106 None
107 };
108
109 let impl_trait = if let Some(impl_trait_query_str) = lang_info.impl_trait_query {
110 Some(
111 Query::new(&lang_info.language, impl_trait_query_str).map_err(|e| {
112 ParserError::QueryError(format!(
113 "Failed to compile impl_trait query for {}: {}",
114 lang_info.name, e
115 ))
116 })?,
117 )
118 } else {
119 None
120 };
121
122 let defuse = if let Some(defuse_query_str) = lang_info.defuse_query {
123 Some(
124 Query::new(&lang_info.language, defuse_query_str).map_err(|e| {
125 ParserError::QueryError(format!(
126 "Failed to compile defuse query for {}: {}",
127 lang_info.name, e
128 ))
129 })?,
130 )
131 } else {
132 None
133 };
134
135 Ok(CompiledQueries {
136 element,
137 call,
138 import,
139 impl_block,
140 reference,
141 impl_trait,
142 defuse,
143 })
144}
145
146#[cfg_attr(coverage_nightly, coverage(off))]
151fn init_query_cache() -> HashMap<&'static str, CompiledQueries> {
152 let mut cache = HashMap::new();
153
154 for lang_name in crate::lang::supported_languages() {
155 if let Some(lang_info) = get_language_info(lang_name) {
156 match build_compiled_queries(&lang_info) {
157 Ok(compiled) => {
158 cache.insert(*lang_name, compiled);
159 }
160 Err(e) => {
161 tracing::error!(
162 "Failed to compile queries for language {}: {}",
163 lang_name,
164 e
165 );
166 }
167 }
168 }
169 }
170
171 cache
172}
173
174static QUERY_CACHE: LazyLock<HashMap<&'static str, CompiledQueries>> =
176 LazyLock::new(init_query_cache);
177
178fn get_compiled_queries(language: &str) -> Result<&'static CompiledQueries, ParserError> {
180 QUERY_CACHE
181 .get(language)
182 .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))
183}
184
185thread_local! {
186 static PARSER: RefCell<Parser> = RefCell::new(Parser::new());
187 static QUERY_CURSOR: RefCell<QueryCursor> = RefCell::new(QueryCursor::new());
188}
189
190pub struct ElementExtractor;
192
193impl ElementExtractor {
194 #[instrument(skip_all, fields(language))]
202 pub fn extract_with_depth(source: &str, language: &str) -> Result<(usize, usize), ParserError> {
203 let lang_info = get_language_info(language)
204 .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))?;
205
206 let tree = PARSER.with(|p| {
207 let mut parser = p.borrow_mut();
208 parser
209 .set_language(&lang_info.language)
210 .map_err(|e| ParserError::ParseError(format!("Failed to set language: {e}")))?;
211 parser
212 .parse(source, None)
213 .ok_or_else(|| ParserError::ParseError("Failed to parse".to_string()))
214 })?;
215
216 let compiled = get_compiled_queries(language)?;
217
218 let (function_count, class_count) = QUERY_CURSOR.with(|c| {
219 let mut cursor = c.borrow_mut();
220 cursor.set_max_start_depth(None);
221 let mut function_count = 0;
222 let mut class_count = 0;
223
224 let mut matches =
225 cursor.matches(&compiled.element, tree.root_node(), source.as_bytes());
226 while let Some(mat) = matches.next() {
227 for capture in mat.captures {
228 let capture_name = compiled.element.capture_names()[capture.index as usize];
229 match capture_name {
230 "function" => function_count += 1,
231 "class" => class_count += 1,
232 _ => {}
233 }
234 }
235 }
236 (function_count, class_count)
237 });
238
239 tracing::debug!(language = %language, functions = function_count, classes = class_count, "parse complete");
240
241 Ok((function_count, class_count))
242 }
243}
244
245#[allow(clippy::too_many_lines)] fn extract_imports_from_node(
250 node: &Node,
251 source: &str,
252 prefix: &str,
253 line: usize,
254 imports: &mut Vec<ImportInfo>,
255) {
256 match node.kind() {
257 "identifier" | "self" | "super" | "crate" => {
259 let name = source[node.start_byte()..node.end_byte()].to_string();
260 imports.push(ImportInfo {
261 module: prefix.to_string(),
262 items: vec![name],
263 line,
264 });
265 }
266 "scoped_identifier" => {
268 let item = node
269 .child_by_field_name("name")
270 .map(|n| source[n.start_byte()..n.end_byte()].to_string())
271 .unwrap_or_default();
272 let module = node.child_by_field_name("path").map_or_else(
273 || prefix.to_string(),
274 |p| {
275 let path_text = source[p.start_byte()..p.end_byte()].to_string();
276 if prefix.is_empty() {
277 path_text
278 } else {
279 format!("{prefix}::{path_text}")
280 }
281 },
282 );
283 if !item.is_empty() {
284 imports.push(ImportInfo {
285 module,
286 items: vec![item],
287 line,
288 });
289 }
290 }
291 "scoped_use_list" => {
293 let new_prefix = node.child_by_field_name("path").map_or_else(
294 || prefix.to_string(),
295 |p| {
296 let path_text = source[p.start_byte()..p.end_byte()].to_string();
297 if prefix.is_empty() {
298 path_text
299 } else {
300 format!("{prefix}::{path_text}")
301 }
302 },
303 );
304 if let Some(list) = node.child_by_field_name("list") {
305 extract_imports_from_node(&list, source, &new_prefix, line, imports);
306 }
307 }
308 "use_list" => {
310 let mut cursor = node.walk();
311 for child in node.children(&mut cursor) {
312 match child.kind() {
313 "{" | "}" | "," => {}
314 _ => extract_imports_from_node(&child, source, prefix, line, imports),
315 }
316 }
317 }
318 "use_wildcard" => {
320 let text = source[node.start_byte()..node.end_byte()].to_string();
321 let module = if let Some(stripped) = text.strip_suffix("::*") {
322 if prefix.is_empty() {
323 stripped.to_string()
324 } else {
325 format!("{prefix}::{stripped}")
326 }
327 } else {
328 prefix.to_string()
329 };
330 imports.push(ImportInfo {
331 module,
332 items: vec!["*".to_string()],
333 line,
334 });
335 }
336 "use_as_clause" => {
338 let alias = node
339 .child_by_field_name("alias")
340 .map(|n| source[n.start_byte()..n.end_byte()].to_string())
341 .unwrap_or_default();
342 let module = if let Some(path_node) = node.child_by_field_name("path") {
343 match path_node.kind() {
344 "scoped_identifier" => path_node.child_by_field_name("path").map_or_else(
345 || prefix.to_string(),
346 |p| {
347 let p_text = source[p.start_byte()..p.end_byte()].to_string();
348 if prefix.is_empty() {
349 p_text
350 } else {
351 format!("{prefix}::{p_text}")
352 }
353 },
354 ),
355 _ => prefix.to_string(),
356 }
357 } else {
358 prefix.to_string()
359 };
360 if !alias.is_empty() {
361 imports.push(ImportInfo {
362 module,
363 items: vec![alias],
364 line,
365 });
366 }
367 }
368 "import_from_statement" => {
370 extract_python_import_from(node, source, line, imports);
371 }
372 _ => {
374 let text = source[node.start_byte()..node.end_byte()]
375 .trim()
376 .to_string();
377 if !text.is_empty() {
378 imports.push(ImportInfo {
379 module: text,
380 items: vec![],
381 line,
382 });
383 }
384 }
385 }
386}
387
388fn extract_import_item_name(child: &Node, source: &str) -> Option<String> {
390 match child.kind() {
391 "dotted_name" => {
392 let name = source[child.start_byte()..child.end_byte()]
393 .trim()
394 .to_string();
395 if name.is_empty() { None } else { Some(name) }
396 }
397 "aliased_import" => child.child_by_field_name("name").and_then(|n| {
398 let name = source[n.start_byte()..n.end_byte()].trim().to_string();
399 if name.is_empty() { None } else { Some(name) }
400 }),
401 _ => None,
402 }
403}
404
405fn collect_import_items(
407 node: &Node,
408 source: &str,
409 is_wildcard: &mut bool,
410 items: &mut Vec<String>,
411) {
412 if let Some(import_list) = node.child_by_field_name("import_list") {
414 let mut cursor = import_list.walk();
415 for child in import_list.named_children(&mut cursor) {
416 if child.kind() == "wildcard_import" {
417 *is_wildcard = true;
418 } else if let Some(name) = extract_import_item_name(&child, source) {
419 items.push(name);
420 }
421 }
422 return;
423 }
424 let mut cursor = node.walk();
426 let mut first = true;
427 for child in node.named_children(&mut cursor) {
428 if first {
429 first = false;
430 continue;
431 }
432 if child.kind() == "wildcard_import" {
433 *is_wildcard = true;
434 } else if let Some(name) = extract_import_item_name(&child, source) {
435 items.push(name);
436 }
437 }
438}
439
440fn extract_python_import_from(
442 node: &Node,
443 source: &str,
444 line: usize,
445 imports: &mut Vec<ImportInfo>,
446) {
447 let module = if let Some(m) = node.child_by_field_name("module_name") {
448 source[m.start_byte()..m.end_byte()].trim().to_string()
449 } else if let Some(r) = node.child_by_field_name("relative_import") {
450 source[r.start_byte()..r.end_byte()].trim().to_string()
451 } else {
452 String::new()
453 };
454
455 let mut is_wildcard = false;
456 let mut items = Vec::new();
457 collect_import_items(node, source, &mut is_wildcard, &mut items);
458
459 if !module.is_empty() {
460 imports.push(ImportInfo {
461 module,
462 items: if is_wildcard {
463 vec!["*".to_string()]
464 } else {
465 items
466 },
467 line,
468 });
469 }
470}
471
472pub struct SemanticExtractor;
473
474impl SemanticExtractor {
475 #[instrument(skip_all, fields(language))]
483 pub fn extract(
484 source: &str,
485 language: &str,
486 ast_recursion_limit: Option<usize>,
487 ) -> Result<SemanticAnalysis, ParserError> {
488 let lang_info = get_language_info(language)
489 .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))?;
490
491 let tree = PARSER.with(|p| {
492 let mut parser = p.borrow_mut();
493 parser
494 .set_language(&lang_info.language)
495 .map_err(|e| ParserError::ParseError(format!("Failed to set language: {e}")))?;
496 parser
497 .parse(source, None)
498 .ok_or_else(|| ParserError::ParseError("Failed to parse".to_string()))
499 })?;
500
501 let max_depth: Option<u32> = ast_recursion_limit
504 .filter(|&limit| limit > 0)
505 .map(|limit| {
506 u32::try_from(limit).map_err(|_| {
507 ParserError::ParseError(format!(
508 "ast_recursion_limit {} exceeds maximum supported value {}",
509 limit,
510 u32::MAX
511 ))
512 })
513 })
514 .transpose()?;
515
516 let compiled = get_compiled_queries(language)?;
517 let root = tree.root_node();
518
519 let mut functions = Vec::new();
520 let mut classes = Vec::new();
521 let mut imports = Vec::new();
522 let mut references = Vec::new();
523 let mut call_frequency = HashMap::new();
524 let mut calls = Vec::new();
525
526 Self::extract_elements(
527 source,
528 compiled,
529 root,
530 max_depth,
531 &lang_info,
532 &mut functions,
533 &mut classes,
534 );
535 Self::extract_calls(
536 source,
537 compiled,
538 root,
539 max_depth,
540 &mut calls,
541 &mut call_frequency,
542 );
543 Self::extract_imports(source, compiled, root, max_depth, &mut imports);
544 Self::extract_impl_methods(source, compiled, root, max_depth, &mut classes);
545 Self::extract_references(source, compiled, root, max_depth, &mut references);
546
547 let impl_traits = if language == "rust" {
549 Self::extract_impl_traits_from_tree(source, compiled, root)
550 } else {
551 vec![]
552 };
553
554 tracing::debug!(language = %language, functions = functions.len(), classes = classes.len(), imports = imports.len(), references = references.len(), calls = calls.len(), impl_traits = impl_traits.len(), "extraction complete");
555
556 Ok(SemanticAnalysis {
557 functions,
558 classes,
559 imports,
560 references,
561 call_frequency,
562 calls,
563 impl_traits,
564 def_use_sites: Vec::new(),
565 })
566 }
567
568 fn extract_elements(
569 source: &str,
570 compiled: &CompiledQueries,
571 root: Node<'_>,
572 max_depth: Option<u32>,
573 lang_info: &crate::languages::LanguageInfo,
574 functions: &mut Vec<FunctionInfo>,
575 classes: &mut Vec<ClassInfo>,
576 ) {
577 let mut seen_functions = std::collections::HashSet::new();
578
579 QUERY_CURSOR.with(|c| {
580 let mut cursor = c.borrow_mut();
581 cursor.set_max_start_depth(None);
582 if let Some(depth) = max_depth {
583 cursor.set_max_start_depth(Some(depth));
584 }
585 let mut matches = cursor.matches(&compiled.element, root, source.as_bytes());
586
587 while let Some(mat) = matches.next() {
588 let mut func_node: Option<Node> = None;
589 let mut func_name_text: Option<String> = None;
590 let mut class_node: Option<Node> = None;
591 let mut class_name_text: Option<String> = None;
592
593 for capture in mat.captures {
594 let capture_name = compiled.element.capture_names()[capture.index as usize];
595 let node = capture.node;
596 match capture_name {
597 "function" => func_node = Some(node),
598 "func_name" | "method_name" => {
599 func_name_text =
600 Some(source[node.start_byte()..node.end_byte()].to_string());
601 }
602 "class" => class_node = Some(node),
603 "class_name" | "type_name" => {
604 class_name_text =
605 Some(source[node.start_byte()..node.end_byte()].to_string());
606 }
607 _ => {}
608 }
609 }
610
611 if let Some(func_node) = func_node {
612 let parent_is_template = func_node
616 .parent()
617 .map(|p| p.kind() == "template_declaration")
618 .unwrap_or(false);
619 if func_node.kind() == "function_definition" && parent_is_template {
620 } else {
622 let func_def = if func_node.kind() == "template_declaration" {
625 let mut cursor = func_node.walk();
626 func_node
627 .children(&mut cursor)
628 .find(|n| n.kind() == "function_definition")
629 .unwrap_or(func_node)
630 } else {
631 func_node
632 };
633
634 let name = func_name_text
635 .or_else(|| {
636 func_def
637 .child_by_field_name("name")
638 .map(|n| source[n.start_byte()..n.end_byte()].to_string())
639 })
640 .unwrap_or_default();
641
642 let func_key = (name.clone(), func_node.start_position().row);
643 if !name.is_empty() && seen_functions.insert(func_key) {
644 let params = func_def
647 .child_by_field_name("declarator")
648 .and_then(|d| d.child_by_field_name("parameters"))
649 .or_else(|| func_def.child_by_field_name("parameters"))
650 .map(|p| source[p.start_byte()..p.end_byte()].to_string())
651 .unwrap_or_default();
652
653 let return_type = func_def
656 .child_by_field_name("type")
657 .or_else(|| func_def.child_by_field_name("return_type"))
658 .map(|r| source[r.start_byte()..r.end_byte()].to_string());
659
660 functions.push(FunctionInfo {
661 name,
662 line: func_node.start_position().row + 1,
663 end_line: func_node.end_position().row + 1,
664 parameters: if params.is_empty() {
665 Vec::new()
666 } else {
667 vec![params]
668 },
669 return_type,
670 });
671 }
672 }
673 }
674
675 if let Some(class_node) = class_node {
676 let name = class_name_text
677 .or_else(|| {
678 class_node
679 .child_by_field_name("name")
680 .map(|n| source[n.start_byte()..n.end_byte()].to_string())
681 })
682 .unwrap_or_default();
683
684 if !name.is_empty() {
685 let inherits = if let Some(handler) = lang_info.extract_inheritance {
686 handler(&class_node, source)
687 } else {
688 Vec::new()
689 };
690 classes.push(ClassInfo {
691 name,
692 line: class_node.start_position().row + 1,
693 end_line: class_node.end_position().row + 1,
694 methods: Vec::new(),
695 fields: Vec::new(),
696 inherits,
697 });
698 }
699 }
700 }
701 });
702 }
703
704 fn enclosing_function_name(mut node: tree_sitter::Node<'_>, source: &str) -> Option<String> {
707 let mut depth = 0u32;
708 while let Some(parent) = node.parent() {
709 depth += 1;
710 if depth > 64 {
714 return None;
715 }
716 let name_node = match parent.kind() {
717 "function_item"
719 | "method_item"
720 | "function_definition"
721 | "function_declaration"
722 | "method_declaration"
723 | "method_definition" => parent.child_by_field_name("name"),
724 "subroutine" => {
726 let mut cursor = parent.walk();
727 parent
728 .children(&mut cursor)
729 .find(|c| c.kind() == "subroutine_statement")
730 .and_then(|s| s.child_by_field_name("name"))
731 }
732 "function" => {
734 let mut cursor = parent.walk();
735 parent
736 .children(&mut cursor)
737 .find(|c| c.kind() == "function_statement")
738 .and_then(|s| s.child_by_field_name("name"))
739 }
740 _ => {
741 node = parent;
742 continue;
743 }
744 };
745 return name_node.map(|n| source[n.start_byte()..n.end_byte()].to_string());
746 }
747 None
751 }
752
753 fn extract_calls(
754 source: &str,
755 compiled: &CompiledQueries,
756 root: Node<'_>,
757 max_depth: Option<u32>,
758 calls: &mut Vec<CallInfo>,
759 call_frequency: &mut HashMap<String, usize>,
760 ) {
761 QUERY_CURSOR.with(|c| {
762 let mut cursor = c.borrow_mut();
763 cursor.set_max_start_depth(None);
764 if let Some(depth) = max_depth {
765 cursor.set_max_start_depth(Some(depth));
766 }
767 let mut matches = cursor.matches(&compiled.call, root, source.as_bytes());
768
769 while let Some(mat) = matches.next() {
770 for capture in mat.captures {
771 let capture_name = compiled.call.capture_names()[capture.index as usize];
772 if capture_name != "call" {
773 continue;
774 }
775 let node = capture.node;
776 let call_name = source[node.start_byte()..node.end_byte()].to_string();
777 *call_frequency.entry(call_name.clone()).or_insert(0) += 1;
778
779 let caller = Self::enclosing_function_name(node, source)
780 .unwrap_or_else(|| "<module>".to_string());
781
782 let mut arg_count = None;
783 let mut arg_node = node;
784 let mut hop = 0u32;
785 let mut cap_hit = false;
786 while let Some(parent) = arg_node.parent() {
787 hop += 1;
788 if hop > 16 {
794 cap_hit = true;
795 break;
796 }
797 if parent.kind() == "call_expression" {
798 if let Some(args) = parent.child_by_field_name("arguments") {
799 arg_count = Some(args.named_child_count());
800 }
801 break;
802 }
803 arg_node = parent;
804 }
805 debug_assert!(
806 !cap_hit,
807 "extract_calls: parent traversal cap reached (hop > 16)"
808 );
809
810 calls.push(CallInfo {
811 caller,
812 callee: call_name,
813 line: node.start_position().row + 1,
814 column: node.start_position().column,
815 arg_count,
816 });
817 }
818 }
819 });
820 }
821
822 fn extract_imports(
823 source: &str,
824 compiled: &CompiledQueries,
825 root: Node<'_>,
826 max_depth: Option<u32>,
827 imports: &mut Vec<ImportInfo>,
828 ) {
829 let Some(ref import_query) = compiled.import else {
830 return;
831 };
832 QUERY_CURSOR.with(|c| {
833 let mut cursor = c.borrow_mut();
834 cursor.set_max_start_depth(None);
835 if let Some(depth) = max_depth {
836 cursor.set_max_start_depth(Some(depth));
837 }
838 let mut matches = cursor.matches(import_query, root, source.as_bytes());
839
840 while let Some(mat) = matches.next() {
841 for capture in mat.captures {
842 let capture_name = import_query.capture_names()[capture.index as usize];
843 if capture_name == "import_path" {
844 let node = capture.node;
845 let line = node.start_position().row + 1;
846 extract_imports_from_node(&node, source, "", line, imports);
847 }
848 }
849 }
850 });
851 }
852
853 fn extract_impl_methods(
854 source: &str,
855 compiled: &CompiledQueries,
856 root: Node<'_>,
857 max_depth: Option<u32>,
858 classes: &mut [ClassInfo],
859 ) {
860 let Some(ref impl_query) = compiled.impl_block else {
861 return;
862 };
863 QUERY_CURSOR.with(|c| {
864 let mut cursor = c.borrow_mut();
865 cursor.set_max_start_depth(None);
866 if let Some(depth) = max_depth {
867 cursor.set_max_start_depth(Some(depth));
868 }
869 let mut matches = cursor.matches(impl_query, root, source.as_bytes());
870
871 while let Some(mat) = matches.next() {
872 let mut impl_type_name = String::new();
873 let mut method_name = String::new();
874 let mut method_line = 0usize;
875 let mut method_end_line = 0usize;
876 let mut method_params = String::new();
877 let mut method_return_type: Option<String> = None;
878
879 for capture in mat.captures {
880 let capture_name = impl_query.capture_names()[capture.index as usize];
881 let node = capture.node;
882 match capture_name {
883 "impl_type" => {
884 impl_type_name = source[node.start_byte()..node.end_byte()].to_string();
885 }
886 "method_name" => {
887 method_name = source[node.start_byte()..node.end_byte()].to_string();
888 }
889 "method_params" => {
890 method_params = source[node.start_byte()..node.end_byte()].to_string();
891 }
892 "method" => {
893 method_line = node.start_position().row + 1;
894 method_end_line = node.end_position().row + 1;
895 method_return_type = node
896 .child_by_field_name("return_type")
897 .map(|r| source[r.start_byte()..r.end_byte()].to_string());
898 }
899 _ => {}
900 }
901 }
902
903 if !impl_type_name.is_empty() && !method_name.is_empty() {
904 let func = FunctionInfo {
905 name: method_name,
906 line: method_line,
907 end_line: method_end_line,
908 parameters: if method_params.is_empty() {
909 Vec::new()
910 } else {
911 vec![method_params]
912 },
913 return_type: method_return_type,
914 };
915 if let Some(class) = classes.iter_mut().find(|c| c.name == impl_type_name) {
916 class.methods.push(func);
917 }
918 }
919 }
920 });
921 }
922
923 fn extract_references(
924 source: &str,
925 compiled: &CompiledQueries,
926 root: Node<'_>,
927 max_depth: Option<u32>,
928 references: &mut Vec<ReferenceInfo>,
929 ) {
930 let Some(ref ref_query) = compiled.reference else {
931 return;
932 };
933 let mut seen_refs = std::collections::HashSet::new();
934 QUERY_CURSOR.with(|c| {
935 let mut cursor = c.borrow_mut();
936 cursor.set_max_start_depth(None);
937 if let Some(depth) = max_depth {
938 cursor.set_max_start_depth(Some(depth));
939 }
940 let mut matches = cursor.matches(ref_query, root, source.as_bytes());
941
942 while let Some(mat) = matches.next() {
943 for capture in mat.captures {
944 let capture_name = ref_query.capture_names()[capture.index as usize];
945 if capture_name == "type_ref" {
946 let node = capture.node;
947 let type_ref = source[node.start_byte()..node.end_byte()].to_string();
948 if seen_refs.insert(type_ref.clone()) {
949 references.push(ReferenceInfo {
950 symbol: type_ref,
951 reference_type: ReferenceType::Usage,
952 location: String::new(),
954 line: node.start_position().row + 1,
955 });
956 }
957 }
958 }
959 }
960 });
961 }
962
963 fn extract_impl_traits_from_tree(
968 source: &str,
969 compiled: &CompiledQueries,
970 root: Node<'_>,
971 ) -> Vec<ImplTraitInfo> {
972 let Some(query) = &compiled.impl_trait else {
973 return vec![];
974 };
975
976 let mut results = Vec::new();
977 QUERY_CURSOR.with(|c| {
978 let mut cursor = c.borrow_mut();
979 cursor.set_max_start_depth(None);
980 let mut matches = cursor.matches(query, root, source.as_bytes());
981
982 while let Some(mat) = matches.next() {
983 let mut trait_name = String::new();
984 let mut impl_type = String::new();
985 let mut line = 0usize;
986
987 for capture in mat.captures {
988 let capture_name = query.capture_names()[capture.index as usize];
989 let node = capture.node;
990 let text = source[node.start_byte()..node.end_byte()].to_string();
991 match capture_name {
992 "trait_name" => {
993 trait_name = text;
994 line = node.start_position().row + 1;
995 }
996 "impl_type" => {
997 impl_type = text;
998 }
999 _ => {}
1000 }
1001 }
1002
1003 if !trait_name.is_empty() && !impl_type.is_empty() {
1004 results.push(ImplTraitInfo {
1005 trait_name,
1006 impl_type,
1007 path: PathBuf::new(), line,
1009 });
1010 }
1011 }
1012 });
1013
1014 results
1015 }
1016
1017 fn extract_def_use(
1030 source: &str,
1031 compiled: &CompiledQueries,
1032 root: Node<'_>,
1033 symbol_name: &str,
1034 file_path: &str,
1035 max_depth: Option<u32>,
1036 ) -> Vec<crate::types::DefUseSite> {
1037 let Some(ref defuse_query) = compiled.defuse else {
1038 return vec![];
1039 };
1040
1041 let mut sites = Vec::new();
1042 let source_lines: Vec<&str> = source.lines().collect();
1043 let mut write_offsets = std::collections::HashSet::new();
1046
1047 QUERY_CURSOR.with(|c| {
1048 let mut cursor = c.borrow_mut();
1049 cursor.set_max_start_depth(None);
1050 if let Some(depth) = max_depth {
1051 cursor.set_max_start_depth(Some(depth));
1052 }
1053 let mut matches = cursor.matches(defuse_query, root, source.as_bytes());
1054
1055 while let Some(mat) = matches.next() {
1056 for capture in mat.captures {
1057 let capture_name = defuse_query.capture_names()[capture.index as usize];
1058 let node = capture.node;
1059 let node_text = node.utf8_text(source.as_bytes()).unwrap_or_default();
1060
1061 if node_text != symbol_name {
1063 continue;
1064 }
1065
1066 let kind = if capture_name.starts_with("write.") {
1068 crate::types::DefUseKind::Write
1069 } else if capture_name.starts_with("read.") {
1070 crate::types::DefUseKind::Read
1071 } else if capture_name.starts_with("writeread.") {
1072 crate::types::DefUseKind::WriteRead
1073 } else {
1074 continue;
1075 };
1076
1077 let byte_offset = node.start_byte();
1078
1079 if kind == crate::types::DefUseKind::Read
1081 && write_offsets.contains(&byte_offset)
1082 {
1083 continue;
1084 }
1085 if kind != crate::types::DefUseKind::Read {
1086 write_offsets.insert(byte_offset);
1087 }
1088
1089 let line = node.start_position().row + 1;
1092 let snippet = {
1093 let row = node.start_position().row;
1094 let last_line = source_lines.len().saturating_sub(1);
1095 let prev = if row > 0 { row - 1 } else { 0 };
1096 let next = std::cmp::min(row + 1, last_line);
1097 let prev_text = if row == 0 {
1098 ""
1099 } else {
1100 source_lines[prev].trim_end()
1101 };
1102 let cur_text = source_lines[row].trim_end();
1103 let next_text = if row >= last_line {
1104 ""
1105 } else {
1106 source_lines[next].trim_end()
1107 };
1108 format!("{prev_text}\n{cur_text}\n{next_text}")
1109 };
1110
1111 let enclosing_scope = Self::enclosing_function_name(node, source);
1113
1114 let column = node.start_position().column;
1115 sites.push(crate::types::DefUseSite {
1116 kind,
1117 symbol: node_text.to_string(),
1118 file: file_path.to_string(),
1119 line,
1120 column,
1121 snippet,
1122 enclosing_scope,
1123 });
1124 }
1125 }
1126 });
1127
1128 sites
1129 }
1130
1131 pub(crate) fn extract_def_use_for_file(
1134 source: &str,
1135 language: &str,
1136 symbol: &str,
1137 file_path: &str,
1138 ast_recursion_limit: Option<usize>,
1139 ) -> Vec<crate::types::DefUseSite> {
1140 let Some(lang_info) = crate::languages::get_language_info(language) else {
1141 return vec![];
1142 };
1143 let Ok(compiled) = get_compiled_queries(language) else {
1144 return vec![];
1145 };
1146 if compiled.defuse.is_none() {
1147 return vec![];
1148 }
1149
1150 let tree = match PARSER.with(|p| {
1151 let mut parser = p.borrow_mut();
1152 if parser.set_language(&lang_info.language).is_err() {
1153 return None;
1154 }
1155 parser.parse(source, None)
1156 }) {
1157 Some(t) => t,
1158 None => return vec![],
1159 };
1160
1161 let root = tree.root_node();
1162
1163 let max_depth: Option<u32> = ast_recursion_limit
1166 .filter(|&limit| limit > 0)
1167 .and_then(|limit| u32::try_from(limit).ok());
1168
1169 Self::extract_def_use(source, compiled, root, symbol, file_path, max_depth)
1170 }
1171}
1172
1173#[must_use]
1178pub fn extract_impl_traits(source: &str, path: &Path) -> Vec<ImplTraitInfo> {
1179 let Some(lang_info) = get_language_info("rust") else {
1180 return vec![];
1181 };
1182
1183 let Ok(compiled) = get_compiled_queries("rust") else {
1184 return vec![];
1185 };
1186
1187 let Some(query) = &compiled.impl_trait else {
1188 return vec![];
1189 };
1190
1191 let Some(tree) = PARSER.with(|p| {
1192 let mut parser = p.borrow_mut();
1193 let _ = parser.set_language(&lang_info.language);
1194 parser.parse(source, None)
1195 }) else {
1196 return vec![];
1197 };
1198
1199 let root = tree.root_node();
1200 let mut results = Vec::new();
1201
1202 QUERY_CURSOR.with(|c| {
1203 let mut cursor = c.borrow_mut();
1204 cursor.set_max_start_depth(None);
1205 let mut matches = cursor.matches(query, root, source.as_bytes());
1206
1207 while let Some(mat) = matches.next() {
1208 let mut trait_name = String::new();
1209 let mut impl_type = String::new();
1210 let mut line = 0usize;
1211
1212 for capture in mat.captures {
1213 let capture_name = query.capture_names()[capture.index as usize];
1214 let node = capture.node;
1215 let text = source[node.start_byte()..node.end_byte()].to_string();
1216 match capture_name {
1217 "trait_name" => {
1218 trait_name = text;
1219 line = node.start_position().row + 1;
1220 }
1221 "impl_type" => {
1222 impl_type = text;
1223 }
1224 _ => {}
1225 }
1226 }
1227
1228 if !trait_name.is_empty() && !impl_type.is_empty() {
1229 results.push(ImplTraitInfo {
1230 trait_name,
1231 impl_type,
1232 path: path.to_path_buf(),
1233 line,
1234 });
1235 }
1236 }
1237 });
1238
1239 results
1240}
1241
1242pub fn execute_query_impl(
1246 language: &str,
1247 source: &str,
1248 query_str: &str,
1249) -> Result<Vec<crate::QueryCapture>, ParserError> {
1250 let ts_language = crate::languages::get_ts_language(language)
1252 .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))?;
1253
1254 let mut parser = Parser::new();
1255 parser
1256 .set_language(&ts_language)
1257 .map_err(|e| ParserError::QueryError(e.to_string()))?;
1258
1259 let tree = parser
1260 .parse(source.as_bytes(), None)
1261 .ok_or_else(|| ParserError::QueryError("failed to parse source".to_string()))?;
1262
1263 let query =
1264 Query::new(&ts_language, query_str).map_err(|e| ParserError::QueryError(e.to_string()))?;
1265
1266 let source_bytes = source.as_bytes();
1267
1268 let mut captures = Vec::new();
1269 QUERY_CURSOR.with(|c| {
1270 let mut cursor = c.borrow_mut();
1271 cursor.set_max_start_depth(None);
1272 let mut matches = cursor.matches(&query, tree.root_node(), source_bytes);
1273 while let Some(m) = matches.next() {
1274 for cap in m.captures {
1275 let node = cap.node;
1276 let capture_name = query.capture_names()[cap.index as usize].to_string();
1277 let text = node.utf8_text(source_bytes).unwrap_or("").to_string();
1278 captures.push(crate::QueryCapture {
1279 capture_name,
1280 text,
1281 start_line: node.start_position().row,
1282 end_line: node.end_position().row,
1283 start_byte: node.start_byte(),
1284 end_byte: node.end_byte(),
1285 });
1286 }
1287 }
1288 });
1289 Ok(captures)
1290}
1291
1292#[cfg(all(test, feature = "lang-rust"))]
1294mod tests {
1295 use super::*;
1296 use std::path::Path;
1297
1298 #[test]
1299 fn test_ast_recursion_limit_zero_is_unlimited() {
1300 let source = r#"fn hello() -> u32 { 42 }"#;
1301 let result_none = SemanticExtractor::extract(source, "rust", None);
1302 let result_zero = SemanticExtractor::extract(source, "rust", Some(0));
1303 assert!(result_none.is_ok(), "extract with None failed");
1304 assert!(result_zero.is_ok(), "extract with Some(0) failed");
1305 let analysis_none = result_none.unwrap();
1306 let analysis_zero = result_zero.unwrap();
1307 assert!(
1308 analysis_none.functions.len() >= 1,
1309 "extract with None should find at least one function in the test source"
1310 );
1311 assert_eq!(
1312 analysis_none.functions.len(),
1313 analysis_zero.functions.len(),
1314 "ast_recursion_limit=0 should behave identically to unset (unlimited)"
1315 );
1316 }
1317
1318 #[test]
1319 fn test_rust_use_as_imports() {
1320 let source = "use std::io as stdio;";
1322 let result = SemanticExtractor::extract(source, "rust", None).unwrap();
1324 assert!(
1326 result
1327 .imports
1328 .iter()
1329 .any(|imp| imp.items.iter().any(|i| i == "stdio")),
1330 "expected import alias 'stdio' in {:?}",
1331 result.imports
1332 );
1333 }
1334
1335 #[test]
1336 fn test_rust_use_as_clause_plain_identifier() {
1337 let source = "use io as stdio;";
1340 let result = SemanticExtractor::extract(source, "rust", None).unwrap();
1342 assert!(
1344 result
1345 .imports
1346 .iter()
1347 .any(|imp| imp.items.iter().any(|i| i == "stdio")),
1348 "expected import alias 'stdio' from plain identifier in {:?}",
1349 result.imports
1350 );
1351 }
1352
1353 #[test]
1354 fn test_rust_scoped_use_with_prefix() {
1355 let source = "use std::{io::Read, io::Write};";
1357 let result = SemanticExtractor::extract(source, "rust", None).unwrap();
1359 let items: Vec<String> = result
1361 .imports
1362 .iter()
1363 .filter(|imp| imp.module.starts_with("std::io"))
1364 .flat_map(|imp| imp.items.clone())
1365 .collect();
1366 assert!(
1367 items.contains(&"Read".to_string()) && items.contains(&"Write".to_string()),
1368 "expected 'Read' and 'Write' items under module with std::io, got {:?}",
1369 result.imports
1370 );
1371 }
1372
1373 #[test]
1374 fn test_rust_scoped_use_imports() {
1375 let source = "use std::{fs, io};";
1377 let result = SemanticExtractor::extract(source, "rust", None).unwrap();
1379 let items: Vec<&str> = result
1381 .imports
1382 .iter()
1383 .filter(|imp| imp.module == "std")
1384 .flat_map(|imp| imp.items.iter().map(|s| s.as_str()))
1385 .collect();
1386 assert!(
1387 items.contains(&"fs") && items.contains(&"io"),
1388 "expected 'fs' and 'io' items under module 'std', got {:?}",
1389 items
1390 );
1391 }
1392
1393 #[test]
1394 fn test_rust_wildcard_imports() {
1395 let source = "use std::io::*;";
1397 let result = SemanticExtractor::extract(source, "rust", None).unwrap();
1399 let wildcard = result
1401 .imports
1402 .iter()
1403 .find(|imp| imp.module == "std::io" && imp.items == vec!["*"]);
1404 assert!(
1405 wildcard.is_some(),
1406 "expected wildcard import with module 'std::io', got {:?}",
1407 result.imports
1408 );
1409 }
1410
1411 #[test]
1412 fn test_extract_impl_traits_standalone() {
1413 let source = r#"
1415struct Foo;
1416trait Display {}
1417impl Display for Foo {}
1418"#;
1419 let results = extract_impl_traits(source, Path::new("test.rs"));
1421 assert_eq!(
1423 results.len(),
1424 1,
1425 "expected one impl trait, got {:?}",
1426 results
1427 );
1428 assert_eq!(results[0].trait_name, "Display");
1429 assert_eq!(results[0].impl_type, "Foo");
1430 }
1431
1432 #[cfg(target_pointer_width = "64")]
1433 #[test]
1434 fn test_ast_recursion_limit_overflow() {
1435 let source = "fn foo() {}";
1437 let big_limit = usize::try_from(u32::MAX).unwrap() + 1;
1438 let result = SemanticExtractor::extract(source, "rust", Some(big_limit));
1440 assert!(
1442 matches!(result, Err(ParserError::ParseError(_))),
1443 "expected ParseError for oversized limit, got {:?}",
1444 result
1445 );
1446 }
1447
1448 #[test]
1449 fn test_ast_recursion_limit_some() {
1450 let source = r#"fn hello() -> u32 { 42 }"#;
1452 let result = SemanticExtractor::extract(source, "rust", Some(5));
1454 assert!(result.is_ok(), "extract with Some(5) failed: {:?}", result);
1456 let analysis = result.unwrap();
1457 assert!(
1458 analysis.functions.len() >= 1,
1459 "expected at least one function with depth limit 5"
1460 );
1461 }
1462
1463 #[test]
1464 fn test_extract_def_use_for_file_finds_write_and_read() {
1465 let source = r#"
1467fn main() {
1468 let count = 0;
1469 println!("{}", count);
1470}
1471"#;
1472 let sites = SemanticExtractor::extract_def_use_for_file(
1474 source,
1475 "rust",
1476 "count",
1477 "src/main.rs",
1478 None,
1479 );
1480
1481 assert!(
1483 !sites.is_empty(),
1484 "expected at least one def-use site for 'count'"
1485 );
1486 let has_write = sites
1487 .iter()
1488 .any(|s| s.kind == crate::types::DefUseKind::Write);
1489 let has_read = sites
1490 .iter()
1491 .any(|s| s.kind == crate::types::DefUseKind::Read);
1492 assert!(has_write, "expected a write site for 'count'");
1493 assert!(has_read, "expected a read site for 'count'");
1494 assert_eq!(sites[0].file, "src/main.rs");
1495 }
1496
1497 #[test]
1498 fn test_extract_def_use_for_file_no_match_returns_empty() {
1499 let source = "fn foo() { let x = 1; }";
1501
1502 let sites = SemanticExtractor::extract_def_use_for_file(
1504 source,
1505 "rust",
1506 "nonexistent_symbol",
1507 "src/lib.rs",
1508 None,
1509 );
1510
1511 assert!(sites.is_empty(), "expected empty for nonexistent symbol");
1513 }
1514}
1515
1516#[cfg(all(test, feature = "lang-python"))]
1518mod tests_python {
1519 use super::*;
1520
1521 #[test]
1522 fn test_python_relative_import() {
1523 let source = "from . import foo\n";
1525 let result = SemanticExtractor::extract(source, "python", None).unwrap();
1527 let relative = result.imports.iter().find(|imp| imp.module.contains("."));
1529 assert!(
1530 relative.is_some(),
1531 "expected relative import in {:?}",
1532 result.imports
1533 );
1534 }
1535
1536 #[test]
1537 fn test_python_aliased_import() {
1538 let source = "from os import path as p\n";
1541 let result = SemanticExtractor::extract(source, "python", None).unwrap();
1543 let path_import = result
1545 .imports
1546 .iter()
1547 .find(|imp| imp.module == "os" && imp.items.iter().any(|i| i == "path"));
1548 assert!(
1549 path_import.is_some(),
1550 "expected import 'path' from module 'os' in {:?}",
1551 result.imports
1552 );
1553 }
1554}
1555
1556#[cfg(test)]
1558mod tests_unsupported {
1559 use super::*;
1560
1561 #[test]
1562 fn test_element_extractor_unsupported_language() {
1563 let result = ElementExtractor::extract_with_depth("x = 1", "cobol");
1565 assert!(
1567 matches!(result, Err(ParserError::UnsupportedLanguage(ref lang)) if lang == "cobol"),
1568 "expected UnsupportedLanguage error, got {:?}",
1569 result
1570 );
1571 }
1572
1573 #[test]
1574 fn test_semantic_extractor_unsupported_language() {
1575 let result = SemanticExtractor::extract("x = 1", "cobol", None);
1577 assert!(
1579 matches!(result, Err(ParserError::UnsupportedLanguage(ref lang)) if lang == "cobol"),
1580 "expected UnsupportedLanguage error, got {:?}",
1581 result
1582 );
1583 }
1584}