1use crate::languages::get_language_info;
11use crate::types::{
12 AssignmentInfo, CallInfo, ClassInfo, FieldAccessInfo, FunctionInfo, ImportInfo, ReferenceInfo,
13 ReferenceType, SemanticAnalysis,
14};
15use std::cell::RefCell;
16use std::collections::HashMap;
17use std::sync::LazyLock;
18use thiserror::Error;
19use tracing::instrument;
20use tree_sitter::{Node, Parser, Query, QueryCursor, StreamingIterator};
21
22#[derive(Debug, Error)]
23pub enum ParserError {
24 #[error("Unsupported language: {0}")]
25 UnsupportedLanguage(String),
26 #[error("Failed to parse file: {0}")]
27 ParseError(String),
28 #[error("Invalid UTF-8 in file")]
29 InvalidUtf8,
30 #[error("Query error: {0}")]
31 QueryError(String),
32}
33
34struct CompiledQueries {
37 element: Query,
38 call: Query,
39 import: Option<Query>,
40 impl_block: Option<Query>,
41 reference: Option<Query>,
42 assignment: Option<Query>,
43 field: Option<Query>,
44}
45
46fn build_compiled_queries(
48 lang_info: &crate::languages::LanguageInfo,
49) -> Result<CompiledQueries, ParserError> {
50 let element = Query::new(&lang_info.language, lang_info.element_query).map_err(|e| {
51 ParserError::QueryError(format!(
52 "Failed to compile element query for {}: {}",
53 lang_info.name, e
54 ))
55 })?;
56
57 let call = Query::new(&lang_info.language, lang_info.call_query).map_err(|e| {
58 ParserError::QueryError(format!(
59 "Failed to compile call query for {}: {}",
60 lang_info.name, e
61 ))
62 })?;
63
64 let import = if let Some(import_query_str) = lang_info.import_query {
65 Some(
66 Query::new(&lang_info.language, import_query_str).map_err(|e| {
67 ParserError::QueryError(format!(
68 "Failed to compile import query for {}: {}",
69 lang_info.name, e
70 ))
71 })?,
72 )
73 } else {
74 None
75 };
76
77 let impl_block = if let Some(impl_query_str) = lang_info.impl_query {
78 Some(
79 Query::new(&lang_info.language, impl_query_str).map_err(|e| {
80 ParserError::QueryError(format!(
81 "Failed to compile impl query for {}: {}",
82 lang_info.name, e
83 ))
84 })?,
85 )
86 } else {
87 None
88 };
89
90 let reference = if let Some(ref_query_str) = lang_info.reference_query {
91 Some(Query::new(&lang_info.language, ref_query_str).map_err(|e| {
92 ParserError::QueryError(format!(
93 "Failed to compile reference query for {}: {}",
94 lang_info.name, e
95 ))
96 })?)
97 } else {
98 None
99 };
100
101 let assignment = if let Some(assignment_query_str) = lang_info.assignment_query {
102 Some(
103 Query::new(&lang_info.language, assignment_query_str).map_err(|e| {
104 ParserError::QueryError(format!(
105 "Failed to compile assignment query for {}: {}",
106 lang_info.name, e
107 ))
108 })?,
109 )
110 } else {
111 None
112 };
113
114 let field = if let Some(field_query_str) = lang_info.field_query {
115 Some(
116 Query::new(&lang_info.language, field_query_str).map_err(|e| {
117 ParserError::QueryError(format!(
118 "Failed to compile field query for {}: {}",
119 lang_info.name, e
120 ))
121 })?,
122 )
123 } else {
124 None
125 };
126
127 Ok(CompiledQueries {
128 element,
129 call,
130 import,
131 impl_block,
132 reference,
133 assignment,
134 field,
135 })
136}
137
138fn init_query_cache() -> HashMap<&'static str, CompiledQueries> {
140 let supported_languages = ["rust", "python", "typescript", "tsx", "go", "java"];
141 let mut cache = HashMap::new();
142
143 for lang_name in &supported_languages {
144 if let Some(lang_info) = get_language_info(lang_name) {
145 match build_compiled_queries(&lang_info) {
146 Ok(compiled) => {
147 cache.insert(*lang_name, compiled);
148 }
149 Err(e) => {
150 tracing::error!(
151 "Failed to compile queries for language {}: {}",
152 lang_name,
153 e
154 );
155 }
156 }
157 }
158 }
159
160 cache
161}
162
163static QUERY_CACHE: LazyLock<HashMap<&'static str, CompiledQueries>> =
165 LazyLock::new(init_query_cache);
166
167fn get_compiled_queries(language: &str) -> Result<&'static CompiledQueries, ParserError> {
169 QUERY_CACHE
170 .get(language)
171 .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))
172}
173
174thread_local! {
175 static PARSER: RefCell<Parser> = RefCell::new(Parser::new());
176}
177
178pub struct ElementExtractor;
180
181impl ElementExtractor {
182 #[instrument(skip_all, fields(language))]
190 pub fn extract_with_depth(source: &str, language: &str) -> Result<(usize, usize), ParserError> {
191 let lang_info = get_language_info(language)
192 .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))?;
193
194 let tree = PARSER.with(|p| {
195 let mut parser = p.borrow_mut();
196 parser
197 .set_language(&lang_info.language)
198 .map_err(|e| ParserError::ParseError(format!("Failed to set language: {}", e)))?;
199 parser
200 .parse(source, None)
201 .ok_or_else(|| ParserError::ParseError("Failed to parse".to_string()))
202 })?;
203
204 let compiled = get_compiled_queries(language)?;
205
206 let mut cursor = QueryCursor::new();
207 let mut function_count = 0;
208 let mut class_count = 0;
209
210 let mut matches = cursor.matches(&compiled.element, tree.root_node(), source.as_bytes());
211 while let Some(mat) = matches.next() {
212 for capture in mat.captures {
213 let capture_name = compiled.element.capture_names()[capture.index as usize];
214 match capture_name {
215 "function" => function_count += 1,
216 "class" => class_count += 1,
217 _ => {}
218 }
219 }
220 }
221
222 tracing::debug!(language = %language, functions = function_count, classes = class_count, "parse complete");
223
224 Ok((function_count, class_count))
225 }
226}
227
228fn extract_imports_from_node(
232 node: &Node,
233 source: &str,
234 prefix: &str,
235 line: usize,
236 imports: &mut Vec<ImportInfo>,
237) {
238 match node.kind() {
239 "identifier" | "self" | "super" | "crate" => {
241 let name = source[node.start_byte()..node.end_byte()].to_string();
242 imports.push(ImportInfo {
243 module: prefix.to_string(),
244 items: vec![name],
245 line,
246 });
247 }
248 "scoped_identifier" => {
250 let item = node
251 .child_by_field_name("name")
252 .map(|n| source[n.start_byte()..n.end_byte()].to_string())
253 .unwrap_or_default();
254 let module = node
255 .child_by_field_name("path")
256 .map(|p| {
257 let path_text = source[p.start_byte()..p.end_byte()].to_string();
258 if prefix.is_empty() {
259 path_text
260 } else {
261 format!("{}::{}", prefix, path_text)
262 }
263 })
264 .unwrap_or_else(|| prefix.to_string());
265 if !item.is_empty() {
266 imports.push(ImportInfo {
267 module,
268 items: vec![item],
269 line,
270 });
271 }
272 }
273 "scoped_use_list" => {
275 let new_prefix = node
276 .child_by_field_name("path")
277 .map(|p| {
278 let path_text = source[p.start_byte()..p.end_byte()].to_string();
279 if prefix.is_empty() {
280 path_text
281 } else {
282 format!("{}::{}", prefix, path_text)
283 }
284 })
285 .unwrap_or_else(|| prefix.to_string());
286 if let Some(list) = node.child_by_field_name("list") {
287 extract_imports_from_node(&list, source, &new_prefix, line, imports);
288 }
289 }
290 "use_list" => {
292 let mut cursor = node.walk();
293 for child in node.children(&mut cursor) {
294 match child.kind() {
295 "{" | "}" | "," => {}
296 _ => extract_imports_from_node(&child, source, prefix, line, imports),
297 }
298 }
299 }
300 "use_wildcard" => {
302 let text = source[node.start_byte()..node.end_byte()].to_string();
303 let module = if let Some(stripped) = text.strip_suffix("::*") {
304 if prefix.is_empty() {
305 stripped.to_string()
306 } else {
307 format!("{}::{}", prefix, stripped)
308 }
309 } else {
310 prefix.to_string()
311 };
312 imports.push(ImportInfo {
313 module,
314 items: vec!["*".to_string()],
315 line,
316 });
317 }
318 "use_as_clause" => {
320 let alias = node
321 .child_by_field_name("alias")
322 .map(|n| source[n.start_byte()..n.end_byte()].to_string())
323 .unwrap_or_default();
324 let module = if let Some(path_node) = node.child_by_field_name("path") {
325 match path_node.kind() {
326 "scoped_identifier" => path_node
327 .child_by_field_name("path")
328 .map(|p| {
329 let p_text = source[p.start_byte()..p.end_byte()].to_string();
330 if prefix.is_empty() {
331 p_text
332 } else {
333 format!("{}::{}", prefix, p_text)
334 }
335 })
336 .unwrap_or_else(|| prefix.to_string()),
337 _ => prefix.to_string(),
338 }
339 } else {
340 prefix.to_string()
341 };
342 if !alias.is_empty() {
343 imports.push(ImportInfo {
344 module,
345 items: vec![alias],
346 line,
347 });
348 }
349 }
350 _ => {
352 let text = source[node.start_byte()..node.end_byte()]
353 .trim()
354 .to_string();
355 if !text.is_empty() {
356 imports.push(ImportInfo {
357 module: text,
358 items: vec![],
359 line,
360 });
361 }
362 }
363 }
364}
365
366pub struct SemanticExtractor;
367
368impl SemanticExtractor {
369 #[instrument(skip_all, fields(language))]
377 pub fn extract(
378 source: &str,
379 language: &str,
380 ast_recursion_limit: Option<usize>,
381 ) -> Result<SemanticAnalysis, ParserError> {
382 let lang_info = get_language_info(language)
383 .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))?;
384
385 let tree = PARSER.with(|p| {
386 let mut parser = p.borrow_mut();
387 parser
388 .set_language(&lang_info.language)
389 .map_err(|e| ParserError::ParseError(format!("Failed to set language: {}", e)))?;
390 parser
391 .parse(source, None)
392 .ok_or_else(|| ParserError::ParseError("Failed to parse".to_string()))
393 })?;
394
395 let mut functions = Vec::new();
396 let mut classes = Vec::new();
397 let mut imports = Vec::new();
398 let mut references = Vec::new();
399 let mut call_frequency = HashMap::new();
400 let mut calls = Vec::new();
401 let mut assignments: Vec<AssignmentInfo> = Vec::new();
402 let mut field_accesses: Vec<FieldAccessInfo> = Vec::new();
403
404 let max_depth: Option<u32> = ast_recursion_limit
406 .map(|limit| {
407 u32::try_from(limit).map_err(|_| {
408 ParserError::ParseError(format!(
409 "ast_recursion_limit {} exceeds maximum supported value {}",
410 limit,
411 u32::MAX
412 ))
413 })
414 })
415 .transpose()?;
416
417 let compiled = get_compiled_queries(language)?;
419 let mut cursor = QueryCursor::new();
420 if let Some(depth) = max_depth {
421 cursor.set_max_start_depth(Some(depth));
422 }
423
424 let mut matches = cursor.matches(&compiled.element, tree.root_node(), source.as_bytes());
425 let mut seen_functions = std::collections::HashSet::new();
426
427 while let Some(mat) = matches.next() {
428 for capture in mat.captures {
429 let capture_name = compiled.element.capture_names()[capture.index as usize];
430 let node = capture.node;
431
432 match capture_name {
433 "function" => {
434 if let Some(name_node) = node.child_by_field_name("name") {
435 let name =
436 source[name_node.start_byte()..name_node.end_byte()].to_string();
437 let func_key = (name.clone(), node.start_position().row);
438
439 if !seen_functions.contains(&func_key) {
440 seen_functions.insert(func_key);
441
442 let params = node
443 .child_by_field_name("parameters")
444 .map(|p| source[p.start_byte()..p.end_byte()].to_string())
445 .unwrap_or_default();
446 let return_type = node
447 .child_by_field_name("return_type")
448 .map(|r| source[r.start_byte()..r.end_byte()].to_string());
449
450 functions.push(FunctionInfo {
451 name,
452 line: node.start_position().row + 1,
453 end_line: node.end_position().row + 1,
454 parameters: if params.is_empty() {
455 Vec::new()
456 } else {
457 vec![params]
458 },
459 return_type,
460 });
461 }
462 }
463 }
464 "class" => {
465 if let Some(name_node) = node.child_by_field_name("name") {
466 let name =
467 source[name_node.start_byte()..name_node.end_byte()].to_string();
468 let inherits = if let Some(handler) = lang_info.extract_inheritance {
469 handler(&node, source)
470 } else {
471 Vec::new()
472 };
473 classes.push(ClassInfo {
474 name,
475 line: node.start_position().row + 1,
476 end_line: node.end_position().row + 1,
477 methods: Vec::new(),
478 fields: Vec::new(),
479 inherits,
480 });
481 }
482 }
483 _ => {}
484 }
485 }
486 }
487
488 let mut cursor = QueryCursor::new();
490 if let Some(depth) = max_depth {
491 cursor.set_max_start_depth(Some(depth));
492 }
493
494 let mut matches = cursor.matches(&compiled.call, tree.root_node(), source.as_bytes());
495 while let Some(mat) = matches.next() {
496 for capture in mat.captures {
497 let capture_name = compiled.call.capture_names()[capture.index as usize];
498 if capture_name == "call" {
499 let node = capture.node;
500 let call_name = source[node.start_byte()..node.end_byte()].to_string();
501 *call_frequency.entry(call_name.clone()).or_insert(0) += 1;
502
503 let mut current = node;
505 let mut caller = "<module>".to_string();
506 while let Some(parent) = current.parent() {
507 if parent.kind() == "function_item"
508 && let Some(name_node) = parent.child_by_field_name("name")
509 {
510 caller =
511 source[name_node.start_byte()..name_node.end_byte()].to_string();
512 break;
513 }
514 current = parent;
515 }
516
517 let mut arg_count = None;
519 let mut arg_node = node;
520 while let Some(parent) = arg_node.parent() {
521 if parent.kind() == "call_expression" {
522 if let Some(args) = parent.child_by_field_name("arguments") {
523 arg_count = Some(args.named_child_count());
524 }
525 break;
526 }
527 arg_node = parent;
528 }
529
530 calls.push(CallInfo {
531 caller,
532 callee: call_name,
533 line: node.start_position().row + 1,
534 column: node.start_position().column,
535 arg_count,
536 });
537 }
538 }
539 }
540
541 if let Some(ref import_query) = compiled.import {
543 let mut cursor = QueryCursor::new();
544 if let Some(depth) = max_depth {
545 cursor.set_max_start_depth(Some(depth));
546 }
547
548 let mut matches = cursor.matches(import_query, tree.root_node(), source.as_bytes());
549 while let Some(mat) = matches.next() {
550 for capture in mat.captures {
551 let capture_name = import_query.capture_names()[capture.index as usize];
552 if capture_name == "import_path" {
553 let node = capture.node;
554 let line = node.start_position().row + 1;
555 extract_imports_from_node(&node, source, "", line, &mut imports);
556 }
557 }
558 }
559 }
560
561 if let Some(ref impl_query) = compiled.impl_block {
563 let mut cursor = QueryCursor::new();
564 if let Some(depth) = max_depth {
565 cursor.set_max_start_depth(Some(depth));
566 }
567
568 let mut matches = cursor.matches(impl_query, tree.root_node(), source.as_bytes());
569 while let Some(mat) = matches.next() {
570 let mut impl_type_name = String::new();
571 let mut method_name = String::new();
572 let mut method_line = 0usize;
573 let mut method_end_line = 0usize;
574 let mut method_params = String::new();
575 let mut method_return_type: Option<String> = None;
576
577 for capture in mat.captures {
578 let capture_name = impl_query.capture_names()[capture.index as usize];
579 let node = capture.node;
580 match capture_name {
581 "impl_type" => {
582 impl_type_name = source[node.start_byte()..node.end_byte()].to_string();
583 }
584 "method_name" => {
585 method_name = source[node.start_byte()..node.end_byte()].to_string();
586 }
587 "method_params" => {
588 method_params = source[node.start_byte()..node.end_byte()].to_string();
589 }
590 "method" => {
591 method_line = node.start_position().row + 1;
592 method_end_line = node.end_position().row + 1;
593 method_return_type = node
594 .child_by_field_name("return_type")
595 .map(|r| source[r.start_byte()..r.end_byte()].to_string());
596 }
597 _ => {}
598 }
599 }
600
601 if !impl_type_name.is_empty() && !method_name.is_empty() {
602 let func = FunctionInfo {
603 name: method_name,
604 line: method_line,
605 end_line: method_end_line,
606 parameters: if method_params.is_empty() {
607 Vec::new()
608 } else {
609 vec![method_params]
610 },
611 return_type: method_return_type,
612 };
613 if let Some(class) = classes.iter_mut().find(|c| c.name == impl_type_name) {
614 class.methods.push(func);
615 }
616 }
617 }
618 }
619
620 if let Some(ref ref_query) = compiled.reference {
622 let mut cursor = QueryCursor::new();
623 if let Some(depth) = max_depth {
624 cursor.set_max_start_depth(Some(depth));
625 }
626
627 let mut seen_refs = std::collections::HashSet::new();
628 let mut matches = cursor.matches(ref_query, tree.root_node(), source.as_bytes());
629 while let Some(mat) = matches.next() {
630 for capture in mat.captures {
631 let capture_name = ref_query.capture_names()[capture.index as usize];
632 if capture_name == "type_ref" {
633 let node = capture.node;
634 let type_ref = source[node.start_byte()..node.end_byte()].to_string();
635 if seen_refs.insert(type_ref.clone()) {
636 references.push(ReferenceInfo {
637 symbol: type_ref,
638 reference_type: ReferenceType::Usage,
639 location: String::new(),
641 line: node.start_position().row + 1,
642 });
643 }
644 }
645 }
646 }
647 }
648
649 if let Some(ref assignment_query) = compiled.assignment {
651 let mut cursor = QueryCursor::new();
652 if let Some(depth) = max_depth {
653 cursor.set_max_start_depth(Some(depth));
654 }
655
656 let mut matches = cursor.matches(assignment_query, tree.root_node(), source.as_bytes());
657 while let Some(mat) = matches.next() {
658 let mut variable = String::new();
659 let mut value = String::new();
660 let mut line = 0usize;
661
662 for capture in mat.captures {
663 let capture_name = assignment_query.capture_names()[capture.index as usize];
664 let node = capture.node;
665 match capture_name {
666 "variable" => {
667 variable = source[node.start_byte()..node.end_byte()].to_string();
668 }
669 "value" => {
670 value = source[node.start_byte()..node.end_byte()].to_string();
671 line = node.start_position().row + 1;
672 }
673 _ => {}
674 }
675 }
676
677 if !variable.is_empty() && !value.is_empty() {
678 let mut current = mat.captures[0].node;
679 let mut scope = "global".to_string();
680 while let Some(parent) = current.parent() {
681 if parent.kind() == "function_item"
682 && let Some(name_node) = parent.child_by_field_name("name")
683 {
684 scope =
685 source[name_node.start_byte()..name_node.end_byte()].to_string();
686 break;
687 }
688 current = parent;
689 }
690
691 assignments.push(AssignmentInfo {
692 variable,
693 value,
694 line,
695 scope,
696 });
697 }
698 }
699 }
700
701 if let Some(ref field_query) = compiled.field {
703 let mut cursor = QueryCursor::new();
704 if let Some(depth) = max_depth {
705 cursor.set_max_start_depth(Some(depth));
706 }
707
708 let mut matches = cursor.matches(field_query, tree.root_node(), source.as_bytes());
709 while let Some(mat) = matches.next() {
710 let mut object = String::new();
711 let mut field = String::new();
712 let mut line = 0usize;
713
714 for capture in mat.captures {
715 let capture_name = field_query.capture_names()[capture.index as usize];
716 let node = capture.node;
717 match capture_name {
718 "object" => {
719 object = source[node.start_byte()..node.end_byte()].to_string();
720 }
721 "field" => {
722 field = source[node.start_byte()..node.end_byte()].to_string();
723 line = node.start_position().row + 1;
724 }
725 _ => {}
726 }
727 }
728
729 if !object.is_empty() && !field.is_empty() {
730 let mut current = mat.captures[0].node;
731 let mut scope = "global".to_string();
732 while let Some(parent) = current.parent() {
733 if parent.kind() == "function_item"
734 && let Some(name_node) = parent.child_by_field_name("name")
735 {
736 scope =
737 source[name_node.start_byte()..name_node.end_byte()].to_string();
738 break;
739 }
740 current = parent;
741 }
742
743 field_accesses.push(FieldAccessInfo {
744 object,
745 field,
746 line,
747 scope,
748 });
749 }
750 }
751 }
752
753 tracing::debug!(language = %language, functions = functions.len(), classes = classes.len(), imports = imports.len(), references = references.len(), calls = calls.len(), "extraction complete");
754
755 Ok(SemanticAnalysis {
756 functions,
757 classes,
758 imports,
759 references,
760 call_frequency,
761 calls,
762 assignments,
763 field_accesses,
764 })
765 }
766}
767
768#[cfg(test)]
769mod tests {
770 use super::*;
771
772 #[test]
773 fn test_extract_assignments() {
774 let source = r#"
775fn main() {
776 let x = 42;
777 let y = x + 1;
778}
779"#;
780 let result = SemanticExtractor::extract(source, "rust", None);
781 assert!(result.is_ok());
782 let analysis = result.unwrap();
783 assert!(!analysis.assignments.is_empty());
784 assert_eq!(analysis.assignments[0].variable, "x");
785 assert_eq!(analysis.assignments[0].value, "42");
786 assert_eq!(analysis.assignments[0].scope, "main");
787 }
788
789 #[test]
790 fn test_extract_field_accesses() {
791 let source = r#"
792fn process(user: &User) {
793 let name = user.name;
794 let age = user.age;
795}
796"#;
797 let result = SemanticExtractor::extract(source, "rust", None);
798 assert!(result.is_ok());
799 let analysis = result.unwrap();
800 assert!(!analysis.field_accesses.is_empty());
801 assert!(
802 analysis
803 .field_accesses
804 .iter()
805 .any(|fa| fa.object == "user" && fa.field == "name")
806 );
807 assert_eq!(analysis.field_accesses[0].scope, "process");
808 }
809}