1use crate::languages::get_language_info;
13use crate::types::{
14 CallInfo, ClassInfo, FunctionInfo, ImplTraitInfo, ImportInfo, ReferenceInfo, ReferenceType,
15 SemanticAnalysis,
16};
17use std::cell::RefCell;
18use std::collections::HashMap;
19use std::path::{Path, PathBuf};
20use std::sync::LazyLock;
21use thiserror::Error;
22use tracing::instrument;
23use tree_sitter::{Node, Parser, Query, QueryCursor, StreamingIterator};
24
25#[derive(Debug, Error)]
26#[non_exhaustive]
27pub enum ParserError {
28 #[error("Unsupported language: {0}")]
29 UnsupportedLanguage(String),
30 #[error("Failed to parse file: {0}")]
31 ParseError(String),
32 #[error("Invalid UTF-8 in file")]
33 InvalidUtf8,
34 #[error("Query error: {0}")]
35 QueryError(String),
36}
37
38struct CompiledQueries {
41 element: Query,
42 call: Query,
43 import: Option<Query>,
44 impl_block: Option<Query>,
45 reference: Option<Query>,
46 impl_trait: Option<Query>,
47 defuse: Option<Query>,
48}
49
50#[cfg_attr(coverage_nightly, coverage(off))]
55fn build_compiled_queries(
56 lang_info: &crate::languages::LanguageInfo,
57) -> Result<CompiledQueries, ParserError> {
58 let element = Query::new(&lang_info.language, lang_info.element_query).map_err(|e| {
59 ParserError::QueryError(format!(
60 "Failed to compile element query for {}: {}",
61 lang_info.name, e
62 ))
63 })?;
64
65 let call = Query::new(&lang_info.language, lang_info.call_query).map_err(|e| {
66 ParserError::QueryError(format!(
67 "Failed to compile call query for {}: {}",
68 lang_info.name, e
69 ))
70 })?;
71
72 let import = if let Some(import_query_str) = lang_info.import_query {
73 Some(
74 Query::new(&lang_info.language, import_query_str).map_err(|e| {
75 ParserError::QueryError(format!(
76 "Failed to compile import query for {}: {}",
77 lang_info.name, e
78 ))
79 })?,
80 )
81 } else {
82 None
83 };
84
85 let impl_block = if let Some(impl_query_str) = lang_info.impl_query {
86 Some(
87 Query::new(&lang_info.language, impl_query_str).map_err(|e| {
88 ParserError::QueryError(format!(
89 "Failed to compile impl query for {}: {}",
90 lang_info.name, e
91 ))
92 })?,
93 )
94 } else {
95 None
96 };
97
98 let reference = if let Some(ref_query_str) = lang_info.reference_query {
99 Some(Query::new(&lang_info.language, ref_query_str).map_err(|e| {
100 ParserError::QueryError(format!(
101 "Failed to compile reference query for {}: {}",
102 lang_info.name, e
103 ))
104 })?)
105 } else {
106 None
107 };
108
109 let impl_trait = if let Some(impl_trait_query_str) = lang_info.impl_trait_query {
110 Some(
111 Query::new(&lang_info.language, impl_trait_query_str).map_err(|e| {
112 ParserError::QueryError(format!(
113 "Failed to compile impl_trait query for {}: {}",
114 lang_info.name, e
115 ))
116 })?,
117 )
118 } else {
119 None
120 };
121
122 let defuse = if let Some(defuse_query_str) = lang_info.defuse_query {
123 Some(
124 Query::new(&lang_info.language, defuse_query_str).map_err(|e| {
125 ParserError::QueryError(format!(
126 "Failed to compile defuse query for {}: {}",
127 lang_info.name, e
128 ))
129 })?,
130 )
131 } else {
132 None
133 };
134
135 Ok(CompiledQueries {
136 element,
137 call,
138 import,
139 impl_block,
140 reference,
141 impl_trait,
142 defuse,
143 })
144}
145
146#[cfg_attr(coverage_nightly, coverage(off))]
151fn init_query_cache() -> HashMap<&'static str, CompiledQueries> {
152 let mut cache = HashMap::new();
153
154 for lang_name in crate::lang::supported_languages() {
155 if let Some(lang_info) = get_language_info(lang_name) {
156 match build_compiled_queries(&lang_info) {
157 Ok(compiled) => {
158 cache.insert(*lang_name, compiled);
159 }
160 Err(e) => {
161 tracing::error!(
162 "Failed to compile queries for language {}: {}",
163 lang_name,
164 e
165 );
166 }
167 }
168 }
169 }
170
171 cache
172}
173
174static QUERY_CACHE: LazyLock<HashMap<&'static str, CompiledQueries>> =
176 LazyLock::new(init_query_cache);
177
178fn get_compiled_queries(language: &str) -> Result<&'static CompiledQueries, ParserError> {
180 QUERY_CACHE
181 .get(language)
182 .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))
183}
184
185thread_local! {
186 static PARSER: RefCell<Parser> = RefCell::new(Parser::new());
187 static QUERY_CURSOR: RefCell<QueryCursor> = RefCell::new(QueryCursor::new());
188}
189
190pub struct ElementExtractor;
192
193impl ElementExtractor {
194 #[instrument(skip_all, fields(language))]
202 pub fn extract_with_depth(source: &str, language: &str) -> Result<(usize, usize), ParserError> {
203 let lang_info = get_language_info(language)
204 .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))?;
205
206 let tree = PARSER.with(|p| {
207 let mut parser = p.borrow_mut();
208 parser
209 .set_language(&lang_info.language)
210 .map_err(|e| ParserError::ParseError(format!("Failed to set language: {e}")))?;
211 parser
212 .parse(source, None)
213 .ok_or_else(|| ParserError::ParseError("Failed to parse".to_string()))
214 })?;
215
216 let compiled = get_compiled_queries(language)?;
217
218 let (function_count, class_count) = QUERY_CURSOR.with(|c| {
219 let mut cursor = c.borrow_mut();
220 cursor.set_max_start_depth(None);
221 let mut function_count = 0;
222 let mut class_count = 0;
223
224 let mut matches =
225 cursor.matches(&compiled.element, tree.root_node(), source.as_bytes());
226 while let Some(mat) = matches.next() {
227 for capture in mat.captures {
228 let capture_name = compiled.element.capture_names()[capture.index as usize];
229 match capture_name {
230 "function" => function_count += 1,
231 "class" => class_count += 1,
232 _ => {}
233 }
234 }
235 }
236 (function_count, class_count)
237 });
238
239 tracing::debug!(language = %language, functions = function_count, classes = class_count, "parse complete");
240
241 Ok((function_count, class_count))
242 }
243}
244
245#[allow(clippy::too_many_lines)] fn extract_imports_from_node(
250 node: &Node,
251 source: &str,
252 prefix: &str,
253 line: usize,
254 imports: &mut Vec<ImportInfo>,
255) {
256 match node.kind() {
257 "identifier" | "self" | "super" | "crate" => {
259 let name = source[node.start_byte()..node.end_byte()].to_string();
260 imports.push(ImportInfo {
261 module: prefix.to_string(),
262 items: vec![name],
263 line,
264 });
265 }
266 "scoped_identifier" => {
268 let item = node
269 .child_by_field_name("name")
270 .map(|n| source[n.start_byte()..n.end_byte()].to_string())
271 .unwrap_or_default();
272 let module = node.child_by_field_name("path").map_or_else(
273 || prefix.to_string(),
274 |p| {
275 let path_text = source[p.start_byte()..p.end_byte()].to_string();
276 if prefix.is_empty() {
277 path_text
278 } else {
279 format!("{prefix}::{path_text}")
280 }
281 },
282 );
283 if !item.is_empty() {
284 imports.push(ImportInfo {
285 module,
286 items: vec![item],
287 line,
288 });
289 }
290 }
291 "scoped_use_list" => {
293 let new_prefix = node.child_by_field_name("path").map_or_else(
294 || prefix.to_string(),
295 |p| {
296 let path_text = source[p.start_byte()..p.end_byte()].to_string();
297 if prefix.is_empty() {
298 path_text
299 } else {
300 format!("{prefix}::{path_text}")
301 }
302 },
303 );
304 if let Some(list) = node.child_by_field_name("list") {
305 extract_imports_from_node(&list, source, &new_prefix, line, imports);
306 }
307 }
308 "use_list" => {
310 let mut cursor = node.walk();
311 for child in node.children(&mut cursor) {
312 match child.kind() {
313 "{" | "}" | "," => {}
314 _ => extract_imports_from_node(&child, source, prefix, line, imports),
315 }
316 }
317 }
318 "use_wildcard" => {
320 let text = source[node.start_byte()..node.end_byte()].to_string();
321 let module = if let Some(stripped) = text.strip_suffix("::*") {
322 if prefix.is_empty() {
323 stripped.to_string()
324 } else {
325 format!("{prefix}::{stripped}")
326 }
327 } else {
328 prefix.to_string()
329 };
330 imports.push(ImportInfo {
331 module,
332 items: vec!["*".to_string()],
333 line,
334 });
335 }
336 "use_as_clause" => {
338 let alias = node
339 .child_by_field_name("alias")
340 .map(|n| source[n.start_byte()..n.end_byte()].to_string())
341 .unwrap_or_default();
342 let module = if let Some(path_node) = node.child_by_field_name("path") {
343 match path_node.kind() {
344 "scoped_identifier" => path_node.child_by_field_name("path").map_or_else(
345 || prefix.to_string(),
346 |p| {
347 let p_text = source[p.start_byte()..p.end_byte()].to_string();
348 if prefix.is_empty() {
349 p_text
350 } else {
351 format!("{prefix}::{p_text}")
352 }
353 },
354 ),
355 _ => prefix.to_string(),
356 }
357 } else {
358 prefix.to_string()
359 };
360 if !alias.is_empty() {
361 imports.push(ImportInfo {
362 module,
363 items: vec![alias],
364 line,
365 });
366 }
367 }
368 "import_from_statement" => {
370 extract_python_import_from(node, source, line, imports);
371 }
372 _ => {
374 let text = source[node.start_byte()..node.end_byte()]
375 .trim()
376 .to_string();
377 if !text.is_empty() {
378 imports.push(ImportInfo {
379 module: text,
380 items: vec![],
381 line,
382 });
383 }
384 }
385 }
386}
387
388fn extract_import_item_name(child: &Node, source: &str) -> Option<String> {
390 match child.kind() {
391 "dotted_name" => {
392 let name = source[child.start_byte()..child.end_byte()]
393 .trim()
394 .to_string();
395 if name.is_empty() { None } else { Some(name) }
396 }
397 "aliased_import" => child.child_by_field_name("name").and_then(|n| {
398 let name = source[n.start_byte()..n.end_byte()].trim().to_string();
399 if name.is_empty() { None } else { Some(name) }
400 }),
401 _ => None,
402 }
403}
404
405fn collect_import_items(
407 node: &Node,
408 source: &str,
409 is_wildcard: &mut bool,
410 items: &mut Vec<String>,
411) {
412 if let Some(import_list) = node.child_by_field_name("import_list") {
414 let mut cursor = import_list.walk();
415 for child in import_list.named_children(&mut cursor) {
416 if child.kind() == "wildcard_import" {
417 *is_wildcard = true;
418 } else if let Some(name) = extract_import_item_name(&child, source) {
419 items.push(name);
420 }
421 }
422 return;
423 }
424 let mut cursor = node.walk();
426 let mut first = true;
427 for child in node.named_children(&mut cursor) {
428 if first {
429 first = false;
430 continue;
431 }
432 if child.kind() == "wildcard_import" {
433 *is_wildcard = true;
434 } else if let Some(name) = extract_import_item_name(&child, source) {
435 items.push(name);
436 }
437 }
438}
439
440fn extract_python_import_from(
442 node: &Node,
443 source: &str,
444 line: usize,
445 imports: &mut Vec<ImportInfo>,
446) {
447 let module = if let Some(m) = node.child_by_field_name("module_name") {
448 source[m.start_byte()..m.end_byte()].trim().to_string()
449 } else if let Some(r) = node.child_by_field_name("relative_import") {
450 source[r.start_byte()..r.end_byte()].trim().to_string()
451 } else {
452 String::new()
453 };
454
455 let mut is_wildcard = false;
456 let mut items = Vec::new();
457 collect_import_items(node, source, &mut is_wildcard, &mut items);
458
459 if !module.is_empty() {
460 imports.push(ImportInfo {
461 module,
462 items: if is_wildcard {
463 vec!["*".to_string()]
464 } else {
465 items
466 },
467 line,
468 });
469 }
470}
471
472pub struct SemanticExtractor;
473
474impl SemanticExtractor {
475 #[instrument(skip_all, fields(language))]
483 pub fn extract(
484 source: &str,
485 language: &str,
486 ast_recursion_limit: Option<usize>,
487 ) -> Result<SemanticAnalysis, ParserError> {
488 let lang_info = get_language_info(language)
489 .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))?;
490
491 let tree = PARSER.with(|p| {
492 let mut parser = p.borrow_mut();
493 parser
494 .set_language(&lang_info.language)
495 .map_err(|e| ParserError::ParseError(format!("Failed to set language: {e}")))?;
496 parser
497 .parse(source, None)
498 .ok_or_else(|| ParserError::ParseError("Failed to parse".to_string()))
499 })?;
500
501 let max_depth: Option<u32> = ast_recursion_limit
504 .filter(|&limit| limit > 0)
505 .map(|limit| {
506 u32::try_from(limit).map_err(|_| {
507 ParserError::ParseError(format!(
508 "ast_recursion_limit {} exceeds maximum supported value {}",
509 limit,
510 u32::MAX
511 ))
512 })
513 })
514 .transpose()?;
515
516 let compiled = get_compiled_queries(language)?;
517 let root = tree.root_node();
518
519 let mut functions = Vec::new();
520 let mut classes = Vec::new();
521 let mut imports = Vec::new();
522 let mut references = Vec::new();
523 let mut call_frequency = HashMap::new();
524 let mut calls = Vec::new();
525
526 Self::extract_elements(
527 source,
528 compiled,
529 root,
530 max_depth,
531 &lang_info,
532 &mut functions,
533 &mut classes,
534 );
535 Self::extract_calls(
536 source,
537 compiled,
538 root,
539 max_depth,
540 &mut calls,
541 &mut call_frequency,
542 );
543 Self::extract_imports(source, compiled, root, max_depth, &mut imports);
544 Self::extract_impl_methods(source, compiled, root, max_depth, &mut classes);
545 Self::extract_references(source, compiled, root, max_depth, &mut references);
546
547 let impl_traits = if language == "rust" {
549 Self::extract_impl_traits_from_tree(source, compiled, root)
550 } else {
551 vec![]
552 };
553
554 tracing::debug!(language = %language, functions = functions.len(), classes = classes.len(), imports = imports.len(), references = references.len(), calls = calls.len(), impl_traits = impl_traits.len(), "extraction complete");
555
556 Ok(SemanticAnalysis {
557 functions,
558 classes,
559 imports,
560 references,
561 call_frequency,
562 calls,
563 impl_traits,
564 def_use_sites: Vec::new(),
565 })
566 }
567
568 fn extract_elements(
569 source: &str,
570 compiled: &CompiledQueries,
571 root: Node<'_>,
572 max_depth: Option<u32>,
573 lang_info: &crate::languages::LanguageInfo,
574 functions: &mut Vec<FunctionInfo>,
575 classes: &mut Vec<ClassInfo>,
576 ) {
577 let mut seen_functions = std::collections::HashSet::new();
578
579 QUERY_CURSOR.with(|c| {
580 let mut cursor = c.borrow_mut();
581 cursor.set_max_start_depth(None);
582 if let Some(depth) = max_depth {
583 cursor.set_max_start_depth(Some(depth));
584 }
585 let mut matches = cursor.matches(&compiled.element, root, source.as_bytes());
586
587 while let Some(mat) = matches.next() {
588 let mut func_node: Option<Node> = None;
589 let mut func_name_text: Option<String> = None;
590 let mut class_node: Option<Node> = None;
591 let mut class_name_text: Option<String> = None;
592
593 for capture in mat.captures {
594 let capture_name = compiled.element.capture_names()[capture.index as usize];
595 let node = capture.node;
596 match capture_name {
597 "function" => func_node = Some(node),
598 "func_name" | "method_name" => {
599 func_name_text =
600 Some(source[node.start_byte()..node.end_byte()].to_string());
601 }
602 "class" => class_node = Some(node),
603 "class_name" | "type_name" => {
604 class_name_text =
605 Some(source[node.start_byte()..node.end_byte()].to_string());
606 }
607 _ => {}
608 }
609 }
610
611 if let Some(func_node) = func_node {
612 let parent_kind = func_node.parent().map(|p| p.kind());
616 let parent_is_wrapper = parent_kind
617 .map(|k| k == "template_declaration" || k == "decorated_definition")
618 .unwrap_or(false);
619 if func_node.kind() == "function_definition" && parent_is_wrapper {
620 } else {
622 let func_def = if func_node.kind() == "template_declaration" {
625 let mut cursor = func_node.walk();
626 func_node
627 .children(&mut cursor)
628 .find(|n| n.kind() == "function_definition")
629 .unwrap_or(func_node)
630 } else if func_node.kind() == "decorated_definition" {
631 func_node
632 .child_by_field_name("definition")
633 .unwrap_or(func_node)
634 } else {
635 func_node
636 };
637
638 let name = func_name_text
639 .or_else(|| {
640 func_def
641 .child_by_field_name("name")
642 .map(|n| source[n.start_byte()..n.end_byte()].to_string())
643 })
644 .unwrap_or_default();
645
646 let func_key = (name.clone(), func_node.start_position().row);
647 if !name.is_empty() && seen_functions.insert(func_key) {
648 let params = func_def
651 .child_by_field_name("declarator")
652 .and_then(|d| d.child_by_field_name("parameters"))
653 .or_else(|| func_def.child_by_field_name("parameters"))
654 .map(|p| source[p.start_byte()..p.end_byte()].to_string())
655 .unwrap_or_default();
656
657 let return_type = func_def
660 .child_by_field_name("type")
661 .or_else(|| func_def.child_by_field_name("return_type"))
662 .map(|r| source[r.start_byte()..r.end_byte()].to_string());
663
664 let first_line = if func_node.kind() == "function_item" {
667 let mut attrs: Vec<Node> = Vec::new();
668 let mut sib = func_node.prev_named_sibling();
669 while let Some(s) = sib {
670 if s.kind() == "attribute_item" {
671 attrs.push(s);
672 sib = s.prev_named_sibling();
673 } else {
674 break;
675 }
676 }
677 attrs
678 .last()
679 .map(|n| n.start_position().row + 1)
680 .unwrap_or_else(|| func_node.start_position().row + 1)
681 } else {
682 func_node.start_position().row + 1
683 };
684
685 functions.push(FunctionInfo {
686 name,
687 line: first_line,
688 end_line: func_node.end_position().row + 1,
689 parameters: if params.is_empty() {
690 Vec::new()
691 } else {
692 vec![params]
693 },
694 return_type,
695 });
696 }
697 }
698 }
699
700 if let Some(class_node) = class_node {
701 let name = class_name_text
702 .or_else(|| {
703 class_node
704 .child_by_field_name("name")
705 .map(|n| source[n.start_byte()..n.end_byte()].to_string())
706 })
707 .unwrap_or_default();
708
709 if !name.is_empty() {
710 let inherits = if let Some(handler) = lang_info.extract_inheritance {
711 handler(&class_node, source)
712 } else {
713 Vec::new()
714 };
715 classes.push(ClassInfo {
716 name,
717 line: class_node.start_position().row + 1,
718 end_line: class_node.end_position().row + 1,
719 methods: Vec::new(),
720 fields: Vec::new(),
721 inherits,
722 });
723 }
724 }
725 }
726 });
727 }
728
729 fn enclosing_function_name(mut node: tree_sitter::Node<'_>, source: &str) -> Option<String> {
732 let mut depth = 0u32;
733 while let Some(parent) = node.parent() {
734 depth += 1;
735 if depth > 64 {
739 return None;
740 }
741 let name_node = match parent.kind() {
742 "function_item"
744 | "method_item"
745 | "function_definition"
746 | "function_declaration"
747 | "method_declaration"
748 | "method_definition" => parent.child_by_field_name("name"),
749 "subroutine" => {
751 let mut cursor = parent.walk();
752 parent
753 .children(&mut cursor)
754 .find(|c| c.kind() == "subroutine_statement")
755 .and_then(|s| s.child_by_field_name("name"))
756 }
757 "function" => {
759 let mut cursor = parent.walk();
760 parent
761 .children(&mut cursor)
762 .find(|c| c.kind() == "function_statement")
763 .and_then(|s| s.child_by_field_name("name"))
764 }
765 _ => {
766 node = parent;
767 continue;
768 }
769 };
770 return name_node.map(|n| source[n.start_byte()..n.end_byte()].to_string());
771 }
772 None
776 }
777
778 fn extract_calls(
779 source: &str,
780 compiled: &CompiledQueries,
781 root: Node<'_>,
782 max_depth: Option<u32>,
783 calls: &mut Vec<CallInfo>,
784 call_frequency: &mut HashMap<String, usize>,
785 ) {
786 QUERY_CURSOR.with(|c| {
787 let mut cursor = c.borrow_mut();
788 cursor.set_max_start_depth(None);
789 if let Some(depth) = max_depth {
790 cursor.set_max_start_depth(Some(depth));
791 }
792 let mut matches = cursor.matches(&compiled.call, root, source.as_bytes());
793
794 while let Some(mat) = matches.next() {
795 for capture in mat.captures {
796 let capture_name = compiled.call.capture_names()[capture.index as usize];
797 if capture_name != "call" {
798 continue;
799 }
800 let node = capture.node;
801 let call_name = source[node.start_byte()..node.end_byte()].to_string();
802 *call_frequency.entry(call_name.clone()).or_insert(0) += 1;
803
804 let caller = Self::enclosing_function_name(node, source)
805 .unwrap_or_else(|| "<module>".to_string());
806
807 let mut arg_count = None;
808 let mut arg_node = node;
809 let mut hop = 0u32;
810 let mut cap_hit = false;
811 while let Some(parent) = arg_node.parent() {
812 hop += 1;
813 if hop > 16 {
819 cap_hit = true;
820 break;
821 }
822 if parent.kind() == "call_expression" {
823 if let Some(args) = parent.child_by_field_name("arguments") {
824 arg_count = Some(args.named_child_count());
825 }
826 break;
827 }
828 arg_node = parent;
829 }
830 debug_assert!(
831 !cap_hit,
832 "extract_calls: parent traversal cap reached (hop > 16)"
833 );
834
835 calls.push(CallInfo {
836 caller,
837 callee: call_name,
838 line: node.start_position().row + 1,
839 column: node.start_position().column,
840 arg_count,
841 });
842 }
843 }
844 });
845 }
846
847 fn extract_imports(
848 source: &str,
849 compiled: &CompiledQueries,
850 root: Node<'_>,
851 max_depth: Option<u32>,
852 imports: &mut Vec<ImportInfo>,
853 ) {
854 let Some(ref import_query) = compiled.import else {
855 return;
856 };
857 QUERY_CURSOR.with(|c| {
858 let mut cursor = c.borrow_mut();
859 cursor.set_max_start_depth(None);
860 if let Some(depth) = max_depth {
861 cursor.set_max_start_depth(Some(depth));
862 }
863 let mut matches = cursor.matches(import_query, root, source.as_bytes());
864
865 while let Some(mat) = matches.next() {
866 for capture in mat.captures {
867 let capture_name = import_query.capture_names()[capture.index as usize];
868 if capture_name == "import_path" {
869 let node = capture.node;
870 let line = node.start_position().row + 1;
871 extract_imports_from_node(&node, source, "", line, imports);
872 }
873 }
874 }
875 });
876 }
877
878 fn extract_impl_methods(
879 source: &str,
880 compiled: &CompiledQueries,
881 root: Node<'_>,
882 max_depth: Option<u32>,
883 classes: &mut [ClassInfo],
884 ) {
885 let Some(ref impl_query) = compiled.impl_block else {
886 return;
887 };
888 QUERY_CURSOR.with(|c| {
889 let mut cursor = c.borrow_mut();
890 cursor.set_max_start_depth(None);
891 if let Some(depth) = max_depth {
892 cursor.set_max_start_depth(Some(depth));
893 }
894 let mut matches = cursor.matches(impl_query, root, source.as_bytes());
895
896 while let Some(mat) = matches.next() {
897 let mut impl_type_name = String::new();
898 let mut method_name = String::new();
899 let mut method_line = 0usize;
900 let mut method_end_line = 0usize;
901 let mut method_params = String::new();
902 let mut method_return_type: Option<String> = None;
903
904 for capture in mat.captures {
905 let capture_name = impl_query.capture_names()[capture.index as usize];
906 let node = capture.node;
907 match capture_name {
908 "impl_type" => {
909 impl_type_name = source[node.start_byte()..node.end_byte()].to_string();
910 }
911 "method_name" => {
912 method_name = source[node.start_byte()..node.end_byte()].to_string();
913 }
914 "method_params" => {
915 method_params = source[node.start_byte()..node.end_byte()].to_string();
916 }
917 "method" => {
918 let mut method_attrs: Vec<Node> = Vec::new();
919 let mut msib = node.prev_named_sibling();
920 while let Some(s) = msib {
921 if s.kind() == "attribute_item" {
922 method_attrs.push(s);
923 msib = s.prev_named_sibling();
924 } else {
925 break;
926 }
927 }
928 method_line = method_attrs
929 .last()
930 .map(|n| n.start_position().row + 1)
931 .unwrap_or_else(|| node.start_position().row + 1);
932 method_end_line = node.end_position().row + 1;
933 method_return_type = node
934 .child_by_field_name("return_type")
935 .map(|r| source[r.start_byte()..r.end_byte()].to_string());
936 }
937 _ => {}
938 }
939 }
940
941 if !impl_type_name.is_empty() && !method_name.is_empty() {
942 let func = FunctionInfo {
943 name: method_name,
944 line: method_line,
945 end_line: method_end_line,
946 parameters: if method_params.is_empty() {
947 Vec::new()
948 } else {
949 vec![method_params]
950 },
951 return_type: method_return_type,
952 };
953 if let Some(class) = classes.iter_mut().find(|c| c.name == impl_type_name) {
954 class.methods.push(func);
955 }
956 }
957 }
958 });
959 }
960
961 fn extract_references(
962 source: &str,
963 compiled: &CompiledQueries,
964 root: Node<'_>,
965 max_depth: Option<u32>,
966 references: &mut Vec<ReferenceInfo>,
967 ) {
968 let Some(ref ref_query) = compiled.reference else {
969 return;
970 };
971 let mut seen_refs = std::collections::HashSet::new();
972 QUERY_CURSOR.with(|c| {
973 let mut cursor = c.borrow_mut();
974 cursor.set_max_start_depth(None);
975 if let Some(depth) = max_depth {
976 cursor.set_max_start_depth(Some(depth));
977 }
978 let mut matches = cursor.matches(ref_query, root, source.as_bytes());
979
980 while let Some(mat) = matches.next() {
981 for capture in mat.captures {
982 let capture_name = ref_query.capture_names()[capture.index as usize];
983 if capture_name == "type_ref" {
984 let node = capture.node;
985 let type_ref = source[node.start_byte()..node.end_byte()].to_string();
986 if seen_refs.insert(type_ref.clone()) {
987 references.push(ReferenceInfo {
988 symbol: type_ref,
989 reference_type: ReferenceType::Usage,
990 location: String::new(),
992 line: node.start_position().row + 1,
993 });
994 }
995 }
996 }
997 }
998 });
999 }
1000
1001 fn extract_impl_traits_from_tree(
1006 source: &str,
1007 compiled: &CompiledQueries,
1008 root: Node<'_>,
1009 ) -> Vec<ImplTraitInfo> {
1010 let Some(query) = &compiled.impl_trait else {
1011 return vec![];
1012 };
1013
1014 let mut results = Vec::new();
1015 QUERY_CURSOR.with(|c| {
1016 let mut cursor = c.borrow_mut();
1017 cursor.set_max_start_depth(None);
1018 let mut matches = cursor.matches(query, root, source.as_bytes());
1019
1020 while let Some(mat) = matches.next() {
1021 let mut trait_name = String::new();
1022 let mut impl_type = String::new();
1023 let mut line = 0usize;
1024
1025 for capture in mat.captures {
1026 let capture_name = query.capture_names()[capture.index as usize];
1027 let node = capture.node;
1028 let text = source[node.start_byte()..node.end_byte()].to_string();
1029 match capture_name {
1030 "trait_name" => {
1031 trait_name = text;
1032 line = node.start_position().row + 1;
1033 }
1034 "impl_type" => {
1035 impl_type = text;
1036 }
1037 _ => {}
1038 }
1039 }
1040
1041 if !trait_name.is_empty() && !impl_type.is_empty() {
1042 results.push(ImplTraitInfo {
1043 trait_name,
1044 impl_type,
1045 path: PathBuf::new(), line,
1047 });
1048 }
1049 }
1050 });
1051
1052 results
1053 }
1054
1055 fn extract_def_use(
1068 source: &str,
1069 compiled: &CompiledQueries,
1070 root: Node<'_>,
1071 symbol_name: &str,
1072 file_path: &str,
1073 max_depth: Option<u32>,
1074 ) -> Vec<crate::types::DefUseSite> {
1075 let Some(ref defuse_query) = compiled.defuse else {
1076 return vec![];
1077 };
1078
1079 let mut sites = Vec::new();
1080 let source_lines: Vec<&str> = source.lines().collect();
1081 let mut write_offsets = std::collections::HashSet::new();
1084
1085 QUERY_CURSOR.with(|c| {
1086 let mut cursor = c.borrow_mut();
1087 cursor.set_max_start_depth(None);
1088 if let Some(depth) = max_depth {
1089 cursor.set_max_start_depth(Some(depth));
1090 }
1091 let mut matches = cursor.matches(defuse_query, root, source.as_bytes());
1092
1093 while let Some(mat) = matches.next() {
1094 for capture in mat.captures {
1095 let capture_name = defuse_query.capture_names()[capture.index as usize];
1096 let node = capture.node;
1097 let node_text = node.utf8_text(source.as_bytes()).unwrap_or_default();
1098
1099 if node_text != symbol_name {
1101 continue;
1102 }
1103
1104 let kind = if capture_name.starts_with("write.") {
1106 crate::types::DefUseKind::Write
1107 } else if capture_name.starts_with("read.") {
1108 crate::types::DefUseKind::Read
1109 } else if capture_name.starts_with("writeread.") {
1110 crate::types::DefUseKind::WriteRead
1111 } else {
1112 continue;
1113 };
1114
1115 let byte_offset = node.start_byte();
1116
1117 if kind == crate::types::DefUseKind::Read
1119 && write_offsets.contains(&byte_offset)
1120 {
1121 continue;
1122 }
1123 if kind != crate::types::DefUseKind::Read {
1124 write_offsets.insert(byte_offset);
1125 }
1126
1127 let line = node.start_position().row + 1;
1130 let snippet = {
1131 let row = node.start_position().row;
1132 let last_line = source_lines.len().saturating_sub(1);
1133 let prev = if row > 0 { row - 1 } else { 0 };
1134 let next = std::cmp::min(row + 1, last_line);
1135 let prev_text = if row == 0 {
1136 ""
1137 } else {
1138 source_lines[prev].trim_end()
1139 };
1140 let cur_text = source_lines[row].trim_end();
1141 let next_text = if row >= last_line {
1142 ""
1143 } else {
1144 source_lines[next].trim_end()
1145 };
1146 format!("{prev_text}\n{cur_text}\n{next_text}")
1147 };
1148
1149 let enclosing_scope = Self::enclosing_function_name(node, source);
1151
1152 let column = node.start_position().column;
1153 sites.push(crate::types::DefUseSite {
1154 kind,
1155 symbol: node_text.to_string(),
1156 file: file_path.to_string(),
1157 line,
1158 column,
1159 snippet,
1160 enclosing_scope,
1161 });
1162 }
1163 }
1164 });
1165
1166 sites
1167 }
1168
1169 pub(crate) fn extract_def_use_for_file(
1172 source: &str,
1173 language: &str,
1174 symbol: &str,
1175 file_path: &str,
1176 ast_recursion_limit: Option<usize>,
1177 ) -> Vec<crate::types::DefUseSite> {
1178 let Some(lang_info) = crate::languages::get_language_info(language) else {
1179 return vec![];
1180 };
1181 let Ok(compiled) = get_compiled_queries(language) else {
1182 return vec![];
1183 };
1184 if compiled.defuse.is_none() {
1185 return vec![];
1186 }
1187
1188 let tree = match PARSER.with(|p| {
1189 let mut parser = p.borrow_mut();
1190 if parser.set_language(&lang_info.language).is_err() {
1191 return None;
1192 }
1193 parser.parse(source, None)
1194 }) {
1195 Some(t) => t,
1196 None => return vec![],
1197 };
1198
1199 let root = tree.root_node();
1200
1201 let max_depth: Option<u32> = ast_recursion_limit
1204 .filter(|&limit| limit > 0)
1205 .and_then(|limit| u32::try_from(limit).ok());
1206
1207 Self::extract_def_use(source, compiled, root, symbol, file_path, max_depth)
1208 }
1209}
1210
1211#[must_use]
1216pub fn extract_impl_traits(source: &str, path: &Path) -> Vec<ImplTraitInfo> {
1217 let Some(lang_info) = get_language_info("rust") else {
1218 return vec![];
1219 };
1220
1221 let Ok(compiled) = get_compiled_queries("rust") else {
1222 return vec![];
1223 };
1224
1225 let Some(query) = &compiled.impl_trait else {
1226 return vec![];
1227 };
1228
1229 let Some(tree) = PARSER.with(|p| {
1230 let mut parser = p.borrow_mut();
1231 let _ = parser.set_language(&lang_info.language);
1232 parser.parse(source, None)
1233 }) else {
1234 return vec![];
1235 };
1236
1237 let root = tree.root_node();
1238 let mut results = Vec::new();
1239
1240 QUERY_CURSOR.with(|c| {
1241 let mut cursor = c.borrow_mut();
1242 cursor.set_max_start_depth(None);
1243 let mut matches = cursor.matches(query, root, source.as_bytes());
1244
1245 while let Some(mat) = matches.next() {
1246 let mut trait_name = String::new();
1247 let mut impl_type = String::new();
1248 let mut line = 0usize;
1249
1250 for capture in mat.captures {
1251 let capture_name = query.capture_names()[capture.index as usize];
1252 let node = capture.node;
1253 let text = source[node.start_byte()..node.end_byte()].to_string();
1254 match capture_name {
1255 "trait_name" => {
1256 trait_name = text;
1257 line = node.start_position().row + 1;
1258 }
1259 "impl_type" => {
1260 impl_type = text;
1261 }
1262 _ => {}
1263 }
1264 }
1265
1266 if !trait_name.is_empty() && !impl_type.is_empty() {
1267 results.push(ImplTraitInfo {
1268 trait_name,
1269 impl_type,
1270 path: path.to_path_buf(),
1271 line,
1272 });
1273 }
1274 }
1275 });
1276
1277 results
1278}
1279
1280pub fn execute_query_impl(
1284 language: &str,
1285 source: &str,
1286 query_str: &str,
1287) -> Result<Vec<crate::QueryCapture>, ParserError> {
1288 let ts_language = crate::languages::get_ts_language(language)
1290 .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))?;
1291
1292 let mut parser = Parser::new();
1293 parser
1294 .set_language(&ts_language)
1295 .map_err(|e| ParserError::QueryError(e.to_string()))?;
1296
1297 let tree = parser
1298 .parse(source.as_bytes(), None)
1299 .ok_or_else(|| ParserError::QueryError("failed to parse source".to_string()))?;
1300
1301 let query =
1302 Query::new(&ts_language, query_str).map_err(|e| ParserError::QueryError(e.to_string()))?;
1303
1304 let source_bytes = source.as_bytes();
1305
1306 let mut captures = Vec::new();
1307 QUERY_CURSOR.with(|c| {
1308 let mut cursor = c.borrow_mut();
1309 cursor.set_max_start_depth(None);
1310 let mut matches = cursor.matches(&query, tree.root_node(), source_bytes);
1311 while let Some(m) = matches.next() {
1312 for cap in m.captures {
1313 let node = cap.node;
1314 let capture_name = query.capture_names()[cap.index as usize].to_string();
1315 let text = node.utf8_text(source_bytes).unwrap_or("").to_string();
1316 captures.push(crate::QueryCapture {
1317 capture_name,
1318 text,
1319 start_line: node.start_position().row,
1320 end_line: node.end_position().row,
1321 start_byte: node.start_byte(),
1322 end_byte: node.end_byte(),
1323 });
1324 }
1325 }
1326 });
1327 Ok(captures)
1328}
1329
1330#[cfg(all(test, feature = "lang-rust"))]
1332mod tests {
1333 use super::*;
1334 use std::path::Path;
1335
1336 #[test]
1337 fn test_ast_recursion_limit_zero_is_unlimited() {
1338 let source = r#"fn hello() -> u32 { 42 }"#;
1339 let result_none = SemanticExtractor::extract(source, "rust", None);
1340 let result_zero = SemanticExtractor::extract(source, "rust", Some(0));
1341 assert!(result_none.is_ok(), "extract with None failed");
1342 assert!(result_zero.is_ok(), "extract with Some(0) failed");
1343 let analysis_none = result_none.unwrap();
1344 let analysis_zero = result_zero.unwrap();
1345 assert!(
1346 analysis_none.functions.len() >= 1,
1347 "extract with None should find at least one function in the test source"
1348 );
1349 assert_eq!(
1350 analysis_none.functions.len(),
1351 analysis_zero.functions.len(),
1352 "ast_recursion_limit=0 should behave identically to unset (unlimited)"
1353 );
1354 }
1355
1356 #[test]
1357 fn test_rust_use_as_imports() {
1358 let source = "use std::io as stdio;";
1360 let result = SemanticExtractor::extract(source, "rust", None).unwrap();
1362 assert!(
1364 result
1365 .imports
1366 .iter()
1367 .any(|imp| imp.items.iter().any(|i| i == "stdio")),
1368 "expected import alias 'stdio' in {:?}",
1369 result.imports
1370 );
1371 }
1372
1373 #[test]
1374 fn test_rust_use_as_clause_plain_identifier() {
1375 let source = "use io as stdio;";
1378 let result = SemanticExtractor::extract(source, "rust", None).unwrap();
1380 assert!(
1382 result
1383 .imports
1384 .iter()
1385 .any(|imp| imp.items.iter().any(|i| i == "stdio")),
1386 "expected import alias 'stdio' from plain identifier in {:?}",
1387 result.imports
1388 );
1389 }
1390
1391 #[test]
1392 fn test_rust_scoped_use_with_prefix() {
1393 let source = "use std::{io::Read, io::Write};";
1395 let result = SemanticExtractor::extract(source, "rust", None).unwrap();
1397 let items: Vec<String> = result
1399 .imports
1400 .iter()
1401 .filter(|imp| imp.module.starts_with("std::io"))
1402 .flat_map(|imp| imp.items.clone())
1403 .collect();
1404 assert!(
1405 items.contains(&"Read".to_string()) && items.contains(&"Write".to_string()),
1406 "expected 'Read' and 'Write' items under module with std::io, got {:?}",
1407 result.imports
1408 );
1409 }
1410
1411 #[test]
1412 fn test_rust_scoped_use_imports() {
1413 let source = "use std::{fs, io};";
1415 let result = SemanticExtractor::extract(source, "rust", None).unwrap();
1417 let items: Vec<&str> = result
1419 .imports
1420 .iter()
1421 .filter(|imp| imp.module == "std")
1422 .flat_map(|imp| imp.items.iter().map(|s| s.as_str()))
1423 .collect();
1424 assert!(
1425 items.contains(&"fs") && items.contains(&"io"),
1426 "expected 'fs' and 'io' items under module 'std', got {:?}",
1427 items
1428 );
1429 }
1430
1431 #[test]
1432 fn test_rust_wildcard_imports() {
1433 let source = "use std::io::*;";
1435 let result = SemanticExtractor::extract(source, "rust", None).unwrap();
1437 let wildcard = result
1439 .imports
1440 .iter()
1441 .find(|imp| imp.module == "std::io" && imp.items == vec!["*"]);
1442 assert!(
1443 wildcard.is_some(),
1444 "expected wildcard import with module 'std::io', got {:?}",
1445 result.imports
1446 );
1447 }
1448
1449 #[test]
1450 fn test_extract_impl_traits_standalone() {
1451 let source = r#"
1453struct Foo;
1454trait Display {}
1455impl Display for Foo {}
1456"#;
1457 let results = extract_impl_traits(source, Path::new("test.rs"));
1459 assert_eq!(
1461 results.len(),
1462 1,
1463 "expected one impl trait, got {:?}",
1464 results
1465 );
1466 assert_eq!(results[0].trait_name, "Display");
1467 assert_eq!(results[0].impl_type, "Foo");
1468 }
1469
1470 #[cfg(target_pointer_width = "64")]
1471 #[test]
1472 fn test_ast_recursion_limit_overflow() {
1473 let source = "fn foo() {}";
1475 let big_limit = usize::try_from(u32::MAX).unwrap() + 1;
1476 let result = SemanticExtractor::extract(source, "rust", Some(big_limit));
1478 assert!(
1480 matches!(result, Err(ParserError::ParseError(_))),
1481 "expected ParseError for oversized limit, got {:?}",
1482 result
1483 );
1484 }
1485
1486 #[test]
1487 fn test_ast_recursion_limit_some() {
1488 let source = r#"fn hello() -> u32 { 42 }"#;
1490 let result = SemanticExtractor::extract(source, "rust", Some(5));
1492 assert!(result.is_ok(), "extract with Some(5) failed: {:?}", result);
1494 let analysis = result.unwrap();
1495 assert!(
1496 analysis.functions.len() >= 1,
1497 "expected at least one function with depth limit 5"
1498 );
1499 }
1500
1501 #[test]
1502 fn test_extract_def_use_for_file_finds_write_and_read() {
1503 let source = r#"
1505fn main() {
1506 let count = 0;
1507 println!("{}", count);
1508}
1509"#;
1510 let sites = SemanticExtractor::extract_def_use_for_file(
1512 source,
1513 "rust",
1514 "count",
1515 "src/main.rs",
1516 None,
1517 );
1518
1519 assert!(
1521 !sites.is_empty(),
1522 "expected at least one def-use site for 'count'"
1523 );
1524 let has_write = sites
1525 .iter()
1526 .any(|s| s.kind == crate::types::DefUseKind::Write);
1527 let has_read = sites
1528 .iter()
1529 .any(|s| s.kind == crate::types::DefUseKind::Read);
1530 assert!(has_write, "expected a write site for 'count'");
1531 assert!(has_read, "expected a read site for 'count'");
1532 assert_eq!(sites[0].file, "src/main.rs");
1533 }
1534
1535 #[test]
1536 fn test_extract_def_use_for_file_no_match_returns_empty() {
1537 let source = "fn foo() { let x = 1; }";
1539
1540 let sites = SemanticExtractor::extract_def_use_for_file(
1542 source,
1543 "rust",
1544 "nonexistent_symbol",
1545 "src/lib.rs",
1546 None,
1547 );
1548
1549 assert!(sites.is_empty(), "expected empty for nonexistent symbol");
1551 }
1552}
1553
1554#[cfg(all(test, feature = "lang-python"))]
1556mod tests_python {
1557 use super::*;
1558
1559 #[test]
1560 fn test_python_relative_import() {
1561 let source = "from . import foo\n";
1563 let result = SemanticExtractor::extract(source, "python", None).unwrap();
1565 let relative = result.imports.iter().find(|imp| imp.module.contains("."));
1567 assert!(
1568 relative.is_some(),
1569 "expected relative import in {:?}",
1570 result.imports
1571 );
1572 }
1573
1574 #[test]
1575 fn test_python_aliased_import() {
1576 let source = "from os import path as p\n";
1579 let result = SemanticExtractor::extract(source, "python", None).unwrap();
1581 let path_import = result
1583 .imports
1584 .iter()
1585 .find(|imp| imp.module == "os" && imp.items.iter().any(|i| i == "path"));
1586 assert!(
1587 path_import.is_some(),
1588 "expected import 'path' from module 'os' in {:?}",
1589 result.imports
1590 );
1591 }
1592}
1593
1594#[cfg(test)]
1596mod tests_unsupported {
1597 use super::*;
1598
1599 #[test]
1600 fn test_element_extractor_unsupported_language() {
1601 let result = ElementExtractor::extract_with_depth("x = 1", "cobol");
1603 assert!(
1605 matches!(result, Err(ParserError::UnsupportedLanguage(ref lang)) if lang == "cobol"),
1606 "expected UnsupportedLanguage error, got {:?}",
1607 result
1608 );
1609 }
1610
1611 #[test]
1612 fn test_semantic_extractor_unsupported_language() {
1613 let result = SemanticExtractor::extract("x = 1", "cobol", None);
1615 assert!(
1617 matches!(result, Err(ParserError::UnsupportedLanguage(ref lang)) if lang == "cobol"),
1618 "expected UnsupportedLanguage error, got {:?}",
1619 result
1620 );
1621 }
1622}