Skip to main content

code_analyze_mcp/
parser.rs

1//! Tree-sitter-based parser for extracting semantic structure from source code.
2//!
3//! This module provides language-agnostic parsing using tree-sitter queries to extract
4//! functions, classes, imports, references, and other semantic elements from source files.
5//! Two main extractors handle different use cases:
6//!
7//! - [`ElementExtractor`]: Quick extraction of function and class counts.
8//! - [`SemanticExtractor`]: Detailed semantic analysis with calls, imports, and references.
9
10use crate::languages::get_language_info;
11use crate::types::{
12    AssignmentInfo, CallInfo, ClassInfo, FieldAccessInfo, FunctionInfo, ImportInfo, ReferenceInfo,
13    ReferenceType, SemanticAnalysis,
14};
15use std::cell::RefCell;
16use std::collections::HashMap;
17use std::sync::LazyLock;
18use thiserror::Error;
19use tracing::instrument;
20use tree_sitter::{Node, Parser, Query, QueryCursor, StreamingIterator};
21
22#[derive(Debug, Error)]
23pub enum ParserError {
24    #[error("Unsupported language: {0}")]
25    UnsupportedLanguage(String),
26    #[error("Failed to parse file: {0}")]
27    ParseError(String),
28    #[error("Invalid UTF-8 in file")]
29    InvalidUtf8,
30    #[error("Query error: {0}")]
31    QueryError(String),
32}
33
34/// Compiled tree-sitter queries for a language.
35/// Stores all query types: mandatory (element, call) and optional (import, impl, reference).
36struct CompiledQueries {
37    element: Query,
38    call: Query,
39    import: Option<Query>,
40    impl_block: Option<Query>,
41    reference: Option<Query>,
42    assignment: Option<Query>,
43    field: Option<Query>,
44}
45
46/// Build compiled queries for a given language.
47fn build_compiled_queries(
48    lang_info: &crate::languages::LanguageInfo,
49) -> Result<CompiledQueries, ParserError> {
50    let element = Query::new(&lang_info.language, lang_info.element_query).map_err(|e| {
51        ParserError::QueryError(format!(
52            "Failed to compile element query for {}: {}",
53            lang_info.name, e
54        ))
55    })?;
56
57    let call = Query::new(&lang_info.language, lang_info.call_query).map_err(|e| {
58        ParserError::QueryError(format!(
59            "Failed to compile call query for {}: {}",
60            lang_info.name, e
61        ))
62    })?;
63
64    let import = if let Some(import_query_str) = lang_info.import_query {
65        Some(
66            Query::new(&lang_info.language, import_query_str).map_err(|e| {
67                ParserError::QueryError(format!(
68                    "Failed to compile import query for {}: {}",
69                    lang_info.name, e
70                ))
71            })?,
72        )
73    } else {
74        None
75    };
76
77    let impl_block = if let Some(impl_query_str) = lang_info.impl_query {
78        Some(
79            Query::new(&lang_info.language, impl_query_str).map_err(|e| {
80                ParserError::QueryError(format!(
81                    "Failed to compile impl query for {}: {}",
82                    lang_info.name, e
83                ))
84            })?,
85        )
86    } else {
87        None
88    };
89
90    let reference = if let Some(ref_query_str) = lang_info.reference_query {
91        Some(Query::new(&lang_info.language, ref_query_str).map_err(|e| {
92            ParserError::QueryError(format!(
93                "Failed to compile reference query for {}: {}",
94                lang_info.name, e
95            ))
96        })?)
97    } else {
98        None
99    };
100
101    let assignment = if let Some(assignment_query_str) = lang_info.assignment_query {
102        Some(
103            Query::new(&lang_info.language, assignment_query_str).map_err(|e| {
104                ParserError::QueryError(format!(
105                    "Failed to compile assignment query for {}: {}",
106                    lang_info.name, e
107                ))
108            })?,
109        )
110    } else {
111        None
112    };
113
114    let field = if let Some(field_query_str) = lang_info.field_query {
115        Some(
116            Query::new(&lang_info.language, field_query_str).map_err(|e| {
117                ParserError::QueryError(format!(
118                    "Failed to compile field query for {}: {}",
119                    lang_info.name, e
120                ))
121            })?,
122        )
123    } else {
124        None
125    };
126
127    Ok(CompiledQueries {
128        element,
129        call,
130        import,
131        impl_block,
132        reference,
133        assignment,
134        field,
135    })
136}
137
138/// Initialize the query cache with compiled queries for all supported languages.
139fn init_query_cache() -> HashMap<&'static str, CompiledQueries> {
140    let supported_languages = ["rust", "python", "typescript", "tsx", "go", "java"];
141    let mut cache = HashMap::new();
142
143    for lang_name in &supported_languages {
144        if let Some(lang_info) = get_language_info(lang_name) {
145            match build_compiled_queries(&lang_info) {
146                Ok(compiled) => {
147                    cache.insert(*lang_name, compiled);
148                }
149                Err(e) => {
150                    tracing::error!(
151                        "Failed to compile queries for language {}: {}",
152                        lang_name,
153                        e
154                    );
155                }
156            }
157        }
158    }
159
160    cache
161}
162
163/// Lazily initialized cache of compiled queries per language.
164static QUERY_CACHE: LazyLock<HashMap<&'static str, CompiledQueries>> =
165    LazyLock::new(init_query_cache);
166
167/// Get compiled queries for a language from the cache.
168fn get_compiled_queries(language: &str) -> Result<&'static CompiledQueries, ParserError> {
169    QUERY_CACHE
170        .get(language)
171        .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))
172}
173
174thread_local! {
175    static PARSER: RefCell<Parser> = RefCell::new(Parser::new());
176}
177
178/// Canonical API for extracting element counts from source code.
179pub struct ElementExtractor;
180
181impl ElementExtractor {
182    /// Extract function and class counts from source code.
183    ///
184    /// # Errors
185    ///
186    /// Returns `ParserError::UnsupportedLanguage` if the language is not recognized.
187    /// Returns `ParserError::ParseError` if the source code cannot be parsed.
188    /// Returns `ParserError::QueryError` if the tree-sitter query fails.
189    #[instrument(skip_all, fields(language))]
190    pub fn extract_with_depth(source: &str, language: &str) -> Result<(usize, usize), ParserError> {
191        let lang_info = get_language_info(language)
192            .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))?;
193
194        let tree = PARSER.with(|p| {
195            let mut parser = p.borrow_mut();
196            parser
197                .set_language(&lang_info.language)
198                .map_err(|e| ParserError::ParseError(format!("Failed to set language: {}", e)))?;
199            parser
200                .parse(source, None)
201                .ok_or_else(|| ParserError::ParseError("Failed to parse".to_string()))
202        })?;
203
204        let compiled = get_compiled_queries(language)?;
205
206        let mut cursor = QueryCursor::new();
207        let mut function_count = 0;
208        let mut class_count = 0;
209
210        let mut matches = cursor.matches(&compiled.element, tree.root_node(), source.as_bytes());
211        while let Some(mat) = matches.next() {
212            for capture in mat.captures {
213                let capture_name = compiled.element.capture_names()[capture.index as usize];
214                match capture_name {
215                    "function" => function_count += 1,
216                    "class" => class_count += 1,
217                    _ => {}
218                }
219            }
220        }
221
222        tracing::debug!(language = %language, functions = function_count, classes = class_count, "parse complete");
223
224        Ok((function_count, class_count))
225    }
226}
227
228/// Recursively extract `ImportInfo` entries from a use-clause node, respecting all Rust
229/// use-declaration forms (`scoped_identifier`, `scoped_use_list`, `use_list`,
230/// `use_as_clause`, `use_wildcard`, bare `identifier`).
231fn extract_imports_from_node(
232    node: &Node,
233    source: &str,
234    prefix: &str,
235    line: usize,
236    imports: &mut Vec<ImportInfo>,
237) {
238    match node.kind() {
239        // Simple identifier: `use foo;` or an item inside `{foo, bar}`
240        "identifier" | "self" | "super" | "crate" => {
241            let name = source[node.start_byte()..node.end_byte()].to_string();
242            imports.push(ImportInfo {
243                module: prefix.to_string(),
244                items: vec![name],
245                line,
246            });
247        }
248        // Qualified path: `std::collections::HashMap`
249        "scoped_identifier" => {
250            let item = node
251                .child_by_field_name("name")
252                .map(|n| source[n.start_byte()..n.end_byte()].to_string())
253                .unwrap_or_default();
254            let module = node
255                .child_by_field_name("path")
256                .map(|p| {
257                    let path_text = source[p.start_byte()..p.end_byte()].to_string();
258                    if prefix.is_empty() {
259                        path_text
260                    } else {
261                        format!("{}::{}", prefix, path_text)
262                    }
263                })
264                .unwrap_or_else(|| prefix.to_string());
265            if !item.is_empty() {
266                imports.push(ImportInfo {
267                    module,
268                    items: vec![item],
269                    line,
270                });
271            }
272        }
273        // `std::{io, fs}` — path prefix followed by a brace list
274        "scoped_use_list" => {
275            let new_prefix = node
276                .child_by_field_name("path")
277                .map(|p| {
278                    let path_text = source[p.start_byte()..p.end_byte()].to_string();
279                    if prefix.is_empty() {
280                        path_text
281                    } else {
282                        format!("{}::{}", prefix, path_text)
283                    }
284                })
285                .unwrap_or_else(|| prefix.to_string());
286            if let Some(list) = node.child_by_field_name("list") {
287                extract_imports_from_node(&list, source, &new_prefix, line, imports);
288            }
289        }
290        // `{HashMap, HashSet}` — brace-enclosed list of items
291        "use_list" => {
292            let mut cursor = node.walk();
293            for child in node.children(&mut cursor) {
294                match child.kind() {
295                    "{" | "}" | "," => {}
296                    _ => extract_imports_from_node(&child, source, prefix, line, imports),
297                }
298            }
299        }
300        // `std::io::*` — glob import
301        "use_wildcard" => {
302            let text = source[node.start_byte()..node.end_byte()].to_string();
303            let module = if let Some(stripped) = text.strip_suffix("::*") {
304                if prefix.is_empty() {
305                    stripped.to_string()
306                } else {
307                    format!("{}::{}", prefix, stripped)
308                }
309            } else {
310                prefix.to_string()
311            };
312            imports.push(ImportInfo {
313                module,
314                items: vec!["*".to_string()],
315                line,
316            });
317        }
318        // `io as stdio` or `std::io as stdio`
319        "use_as_clause" => {
320            let alias = node
321                .child_by_field_name("alias")
322                .map(|n| source[n.start_byte()..n.end_byte()].to_string())
323                .unwrap_or_default();
324            let module = if let Some(path_node) = node.child_by_field_name("path") {
325                match path_node.kind() {
326                    "scoped_identifier" => path_node
327                        .child_by_field_name("path")
328                        .map(|p| {
329                            let p_text = source[p.start_byte()..p.end_byte()].to_string();
330                            if prefix.is_empty() {
331                                p_text
332                            } else {
333                                format!("{}::{}", prefix, p_text)
334                            }
335                        })
336                        .unwrap_or_else(|| prefix.to_string()),
337                    _ => prefix.to_string(),
338                }
339            } else {
340                prefix.to_string()
341            };
342            if !alias.is_empty() {
343                imports.push(ImportInfo {
344                    module,
345                    items: vec![alias],
346                    line,
347                });
348            }
349        }
350        // Python import_from_statement: `from module import name` or `from . import *`
351        "import_from_statement" => {
352            extract_python_import_from(node, source, line, imports);
353        }
354        // Fallback for non-Rust import nodes: capture full text as module
355        _ => {
356            let text = source[node.start_byte()..node.end_byte()]
357                .trim()
358                .to_string();
359            if !text.is_empty() {
360                imports.push(ImportInfo {
361                    module: text,
362                    items: vec![],
363                    line,
364                });
365            }
366        }
367    }
368}
369
370/// Extract an item name from a dotted_name or aliased_import child node.
371fn extract_import_item_name(child: &Node, source: &str) -> Option<String> {
372    match child.kind() {
373        "dotted_name" => {
374            let name = source[child.start_byte()..child.end_byte()]
375                .trim()
376                .to_string();
377            if name.is_empty() { None } else { Some(name) }
378        }
379        "aliased_import" => child.child_by_field_name("name").and_then(|n| {
380            let name = source[n.start_byte()..n.end_byte()].trim().to_string();
381            if name.is_empty() { None } else { Some(name) }
382        }),
383        _ => None,
384    }
385}
386
387/// Collect wildcard/named imports from an import_list node or from direct named children.
388fn collect_import_items(
389    node: &Node,
390    source: &str,
391    is_wildcard: &mut bool,
392    items: &mut Vec<String>,
393) {
394    // Prefer import_list child (wraps `from x import a, b`)
395    if let Some(import_list) = node.child_by_field_name("import_list") {
396        let mut cursor = import_list.walk();
397        for child in import_list.named_children(&mut cursor) {
398            if child.kind() == "wildcard_import" {
399                *is_wildcard = true;
400            } else if let Some(name) = extract_import_item_name(&child, source) {
401                items.push(name);
402            }
403        }
404        return;
405    }
406    // No import_list: single-name or wildcard as direct child (skip first named child = module_name)
407    let mut cursor = node.walk();
408    let mut first = true;
409    for child in node.named_children(&mut cursor) {
410        if first {
411            first = false;
412            continue;
413        }
414        if child.kind() == "wildcard_import" {
415            *is_wildcard = true;
416        } else if let Some(name) = extract_import_item_name(&child, source) {
417            items.push(name);
418        }
419    }
420}
421
422/// Handle Python `import_from_statement` node.
423fn extract_python_import_from(
424    node: &Node,
425    source: &str,
426    line: usize,
427    imports: &mut Vec<ImportInfo>,
428) {
429    let module = if let Some(m) = node.child_by_field_name("module_name") {
430        source[m.start_byte()..m.end_byte()].trim().to_string()
431    } else if let Some(r) = node.child_by_field_name("relative_import") {
432        source[r.start_byte()..r.end_byte()].trim().to_string()
433    } else {
434        String::new()
435    };
436
437    let mut is_wildcard = false;
438    let mut items = Vec::new();
439    collect_import_items(node, source, &mut is_wildcard, &mut items);
440
441    if !module.is_empty() {
442        imports.push(ImportInfo {
443            module,
444            items: if is_wildcard {
445                vec!["*".to_string()]
446            } else {
447                items
448            },
449            line,
450        });
451    }
452}
453
454pub struct SemanticExtractor;
455
456impl SemanticExtractor {
457    /// Extract semantic information from source code.
458    ///
459    /// # Errors
460    ///
461    /// Returns `ParserError::UnsupportedLanguage` if the language is not recognized.
462    /// Returns `ParserError::ParseError` if the source code cannot be parsed.
463    /// Returns `ParserError::QueryError` if the tree-sitter query fails.
464    #[instrument(skip_all, fields(language))]
465    pub fn extract(
466        source: &str,
467        language: &str,
468        ast_recursion_limit: Option<usize>,
469    ) -> Result<SemanticAnalysis, ParserError> {
470        let lang_info = get_language_info(language)
471            .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))?;
472
473        let tree = PARSER.with(|p| {
474            let mut parser = p.borrow_mut();
475            parser
476                .set_language(&lang_info.language)
477                .map_err(|e| ParserError::ParseError(format!("Failed to set language: {}", e)))?;
478            parser
479                .parse(source, None)
480                .ok_or_else(|| ParserError::ParseError("Failed to parse".to_string()))
481        })?;
482
483        // 0 is not a useful depth (visits root node only, returning zero results).
484        // Treat 0 as None (unlimited). See #339.
485        let max_depth: Option<u32> = ast_recursion_limit
486            .filter(|&limit| limit > 0)
487            .map(|limit| {
488                u32::try_from(limit).map_err(|_| {
489                    ParserError::ParseError(format!(
490                        "ast_recursion_limit {} exceeds maximum supported value {}",
491                        limit,
492                        u32::MAX
493                    ))
494                })
495            })
496            .transpose()?;
497
498        let compiled = get_compiled_queries(language)?;
499        let root = tree.root_node();
500
501        let mut functions = Vec::new();
502        let mut classes = Vec::new();
503        let mut imports = Vec::new();
504        let mut references = Vec::new();
505        let mut call_frequency = HashMap::new();
506        let mut calls = Vec::new();
507        let mut assignments: Vec<AssignmentInfo> = Vec::new();
508        let mut field_accesses: Vec<FieldAccessInfo> = Vec::new();
509
510        Self::extract_elements(
511            source,
512            compiled,
513            root,
514            max_depth,
515            &lang_info,
516            &mut functions,
517            &mut classes,
518        );
519        Self::extract_calls(
520            source,
521            compiled,
522            root,
523            max_depth,
524            &mut calls,
525            &mut call_frequency,
526        );
527        Self::extract_imports(source, compiled, root, max_depth, &mut imports);
528        Self::extract_impl_methods(source, compiled, root, max_depth, &mut classes);
529        Self::extract_references(source, compiled, root, max_depth, &mut references);
530        Self::extract_assignments(source, compiled, root, max_depth, &mut assignments);
531        Self::extract_field_accesses(source, compiled, root, max_depth, &mut field_accesses);
532
533        tracing::debug!(language = %language, functions = functions.len(), classes = classes.len(), imports = imports.len(), references = references.len(), calls = calls.len(), "extraction complete");
534
535        Ok(SemanticAnalysis {
536            functions,
537            classes,
538            imports,
539            references,
540            call_frequency,
541            calls,
542            assignments,
543            field_accesses,
544        })
545    }
546
547    fn extract_elements(
548        source: &str,
549        compiled: &CompiledQueries,
550        root: Node<'_>,
551        max_depth: Option<u32>,
552        lang_info: &crate::languages::LanguageInfo,
553        functions: &mut Vec<FunctionInfo>,
554        classes: &mut Vec<ClassInfo>,
555    ) {
556        let mut cursor = QueryCursor::new();
557        if let Some(depth) = max_depth {
558            cursor.set_max_start_depth(Some(depth));
559        }
560        let mut matches = cursor.matches(&compiled.element, root, source.as_bytes());
561        let mut seen_functions = std::collections::HashSet::new();
562
563        while let Some(mat) = matches.next() {
564            for capture in mat.captures {
565                let capture_name = compiled.element.capture_names()[capture.index as usize];
566                let node = capture.node;
567                match capture_name {
568                    "function" => {
569                        if let Some(name_node) = node.child_by_field_name("name") {
570                            let name =
571                                source[name_node.start_byte()..name_node.end_byte()].to_string();
572                            let func_key = (name.clone(), node.start_position().row);
573                            if !seen_functions.contains(&func_key) {
574                                seen_functions.insert(func_key);
575                                let params = node
576                                    .child_by_field_name("parameters")
577                                    .map(|p| source[p.start_byte()..p.end_byte()].to_string())
578                                    .unwrap_or_default();
579                                let return_type = node
580                                    .child_by_field_name("return_type")
581                                    .map(|r| source[r.start_byte()..r.end_byte()].to_string());
582                                functions.push(FunctionInfo {
583                                    name,
584                                    line: node.start_position().row + 1,
585                                    end_line: node.end_position().row + 1,
586                                    parameters: if params.is_empty() {
587                                        Vec::new()
588                                    } else {
589                                        vec![params]
590                                    },
591                                    return_type,
592                                });
593                            }
594                        }
595                    }
596                    "class" => {
597                        if let Some(name_node) = node.child_by_field_name("name") {
598                            let name =
599                                source[name_node.start_byte()..name_node.end_byte()].to_string();
600                            let inherits = if let Some(handler) = lang_info.extract_inheritance {
601                                handler(&node, source)
602                            } else {
603                                Vec::new()
604                            };
605                            classes.push(ClassInfo {
606                                name,
607                                line: node.start_position().row + 1,
608                                end_line: node.end_position().row + 1,
609                                methods: Vec::new(),
610                                fields: Vec::new(),
611                                inherits,
612                            });
613                        }
614                    }
615                    _ => {}
616                }
617            }
618        }
619    }
620
621    fn extract_calls(
622        source: &str,
623        compiled: &CompiledQueries,
624        root: Node<'_>,
625        max_depth: Option<u32>,
626        calls: &mut Vec<CallInfo>,
627        call_frequency: &mut HashMap<String, usize>,
628    ) {
629        let mut cursor = QueryCursor::new();
630        if let Some(depth) = max_depth {
631            cursor.set_max_start_depth(Some(depth));
632        }
633        let mut matches = cursor.matches(&compiled.call, root, source.as_bytes());
634
635        while let Some(mat) = matches.next() {
636            for capture in mat.captures {
637                let capture_name = compiled.call.capture_names()[capture.index as usize];
638                if capture_name != "call" {
639                    continue;
640                }
641                let node = capture.node;
642                let call_name = source[node.start_byte()..node.end_byte()].to_string();
643                *call_frequency.entry(call_name.clone()).or_insert(0) += 1;
644
645                let mut current = node;
646                let mut caller = "<module>".to_string();
647                while let Some(parent) = current.parent() {
648                    if parent.kind() == "function_item"
649                        && let Some(name_node) = parent.child_by_field_name("name")
650                    {
651                        caller = source[name_node.start_byte()..name_node.end_byte()].to_string();
652                        break;
653                    }
654                    current = parent;
655                }
656
657                let mut arg_count = None;
658                let mut arg_node = node;
659                while let Some(parent) = arg_node.parent() {
660                    if parent.kind() == "call_expression" {
661                        if let Some(args) = parent.child_by_field_name("arguments") {
662                            arg_count = Some(args.named_child_count());
663                        }
664                        break;
665                    }
666                    arg_node = parent;
667                }
668
669                calls.push(CallInfo {
670                    caller,
671                    callee: call_name,
672                    line: node.start_position().row + 1,
673                    column: node.start_position().column,
674                    arg_count,
675                });
676            }
677        }
678    }
679
680    fn extract_imports(
681        source: &str,
682        compiled: &CompiledQueries,
683        root: Node<'_>,
684        max_depth: Option<u32>,
685        imports: &mut Vec<ImportInfo>,
686    ) {
687        let Some(ref import_query) = compiled.import else {
688            return;
689        };
690        let mut cursor = QueryCursor::new();
691        if let Some(depth) = max_depth {
692            cursor.set_max_start_depth(Some(depth));
693        }
694        let mut matches = cursor.matches(import_query, root, source.as_bytes());
695
696        while let Some(mat) = matches.next() {
697            for capture in mat.captures {
698                let capture_name = import_query.capture_names()[capture.index as usize];
699                if capture_name == "import_path" {
700                    let node = capture.node;
701                    let line = node.start_position().row + 1;
702                    extract_imports_from_node(&node, source, "", line, imports);
703                }
704            }
705        }
706    }
707
708    fn extract_impl_methods(
709        source: &str,
710        compiled: &CompiledQueries,
711        root: Node<'_>,
712        max_depth: Option<u32>,
713        classes: &mut [ClassInfo],
714    ) {
715        let Some(ref impl_query) = compiled.impl_block else {
716            return;
717        };
718        let mut cursor = QueryCursor::new();
719        if let Some(depth) = max_depth {
720            cursor.set_max_start_depth(Some(depth));
721        }
722        let mut matches = cursor.matches(impl_query, root, source.as_bytes());
723
724        while let Some(mat) = matches.next() {
725            let mut impl_type_name = String::new();
726            let mut method_name = String::new();
727            let mut method_line = 0usize;
728            let mut method_end_line = 0usize;
729            let mut method_params = String::new();
730            let mut method_return_type: Option<String> = None;
731
732            for capture in mat.captures {
733                let capture_name = impl_query.capture_names()[capture.index as usize];
734                let node = capture.node;
735                match capture_name {
736                    "impl_type" => {
737                        impl_type_name = source[node.start_byte()..node.end_byte()].to_string();
738                    }
739                    "method_name" => {
740                        method_name = source[node.start_byte()..node.end_byte()].to_string();
741                    }
742                    "method_params" => {
743                        method_params = source[node.start_byte()..node.end_byte()].to_string();
744                    }
745                    "method" => {
746                        method_line = node.start_position().row + 1;
747                        method_end_line = node.end_position().row + 1;
748                        method_return_type = node
749                            .child_by_field_name("return_type")
750                            .map(|r| source[r.start_byte()..r.end_byte()].to_string());
751                    }
752                    _ => {}
753                }
754            }
755
756            if !impl_type_name.is_empty() && !method_name.is_empty() {
757                let func = FunctionInfo {
758                    name: method_name,
759                    line: method_line,
760                    end_line: method_end_line,
761                    parameters: if method_params.is_empty() {
762                        Vec::new()
763                    } else {
764                        vec![method_params]
765                    },
766                    return_type: method_return_type,
767                };
768                if let Some(class) = classes.iter_mut().find(|c| c.name == impl_type_name) {
769                    class.methods.push(func);
770                }
771            }
772        }
773    }
774
775    fn extract_references(
776        source: &str,
777        compiled: &CompiledQueries,
778        root: Node<'_>,
779        max_depth: Option<u32>,
780        references: &mut Vec<ReferenceInfo>,
781    ) {
782        let Some(ref ref_query) = compiled.reference else {
783            return;
784        };
785        let mut cursor = QueryCursor::new();
786        if let Some(depth) = max_depth {
787            cursor.set_max_start_depth(Some(depth));
788        }
789        let mut seen_refs = std::collections::HashSet::new();
790        let mut matches = cursor.matches(ref_query, root, source.as_bytes());
791
792        while let Some(mat) = matches.next() {
793            for capture in mat.captures {
794                let capture_name = ref_query.capture_names()[capture.index as usize];
795                if capture_name == "type_ref" {
796                    let node = capture.node;
797                    let type_ref = source[node.start_byte()..node.end_byte()].to_string();
798                    if seen_refs.insert(type_ref.clone()) {
799                        references.push(ReferenceInfo {
800                            symbol: type_ref,
801                            reference_type: ReferenceType::Usage,
802                            // location is intentionally empty here; set by the caller (analyze_file)
803                            location: String::new(),
804                            line: node.start_position().row + 1,
805                        });
806                    }
807                }
808            }
809        }
810    }
811
812    fn extract_assignments(
813        source: &str,
814        compiled: &CompiledQueries,
815        root: Node<'_>,
816        max_depth: Option<u32>,
817        assignments: &mut Vec<AssignmentInfo>,
818    ) {
819        let Some(ref assignment_query) = compiled.assignment else {
820            return;
821        };
822        let mut cursor = QueryCursor::new();
823        if let Some(depth) = max_depth {
824            cursor.set_max_start_depth(Some(depth));
825        }
826        let mut matches = cursor.matches(assignment_query, root, source.as_bytes());
827
828        while let Some(mat) = matches.next() {
829            let mut variable = String::new();
830            let mut value = String::new();
831            let mut line = 0usize;
832
833            for capture in mat.captures {
834                let capture_name = assignment_query.capture_names()[capture.index as usize];
835                let node = capture.node;
836                match capture_name {
837                    "variable" => {
838                        variable = source[node.start_byte()..node.end_byte()].to_string();
839                    }
840                    "value" => {
841                        value = source[node.start_byte()..node.end_byte()].to_string();
842                        line = node.start_position().row + 1;
843                    }
844                    _ => {}
845                }
846            }
847
848            if !variable.is_empty() && !value.is_empty() {
849                let mut current = mat.captures[0].node;
850                let mut scope = "global".to_string();
851                while let Some(parent) = current.parent() {
852                    if parent.kind() == "function_item"
853                        && let Some(name_node) = parent.child_by_field_name("name")
854                    {
855                        scope = source[name_node.start_byte()..name_node.end_byte()].to_string();
856                        break;
857                    }
858                    current = parent;
859                }
860                assignments.push(AssignmentInfo {
861                    variable,
862                    value,
863                    line,
864                    scope,
865                });
866            }
867        }
868    }
869
870    fn extract_field_accesses(
871        source: &str,
872        compiled: &CompiledQueries,
873        root: Node<'_>,
874        max_depth: Option<u32>,
875        field_accesses: &mut Vec<FieldAccessInfo>,
876    ) {
877        let Some(ref field_query) = compiled.field else {
878            return;
879        };
880        let mut cursor = QueryCursor::new();
881        if let Some(depth) = max_depth {
882            cursor.set_max_start_depth(Some(depth));
883        }
884        let mut matches = cursor.matches(field_query, root, source.as_bytes());
885
886        while let Some(mat) = matches.next() {
887            let mut object = String::new();
888            let mut field = String::new();
889            let mut line = 0usize;
890
891            for capture in mat.captures {
892                let capture_name = field_query.capture_names()[capture.index as usize];
893                let node = capture.node;
894                match capture_name {
895                    "object" => {
896                        object = source[node.start_byte()..node.end_byte()].to_string();
897                    }
898                    "field" => {
899                        field = source[node.start_byte()..node.end_byte()].to_string();
900                        line = node.start_position().row + 1;
901                    }
902                    _ => {}
903                }
904            }
905
906            if !object.is_empty() && !field.is_empty() {
907                let mut current = mat.captures[0].node;
908                let mut scope = "global".to_string();
909                while let Some(parent) = current.parent() {
910                    if parent.kind() == "function_item"
911                        && let Some(name_node) = parent.child_by_field_name("name")
912                    {
913                        scope = source[name_node.start_byte()..name_node.end_byte()].to_string();
914                        break;
915                    }
916                    current = parent;
917                }
918                field_accesses.push(FieldAccessInfo {
919                    object,
920                    field,
921                    line,
922                    scope,
923                });
924            }
925        }
926    }
927}
928
929#[cfg(test)]
930mod tests {
931    use super::*;
932
933    #[test]
934    fn test_extract_assignments() {
935        let source = r#"
936fn main() {
937    let x = 42;
938    let y = x + 1;
939}
940"#;
941        let result = SemanticExtractor::extract(source, "rust", None);
942        assert!(result.is_ok());
943        let analysis = result.unwrap();
944        assert!(!analysis.assignments.is_empty());
945        assert_eq!(analysis.assignments[0].variable, "x");
946        assert_eq!(analysis.assignments[0].value, "42");
947        assert_eq!(analysis.assignments[0].scope, "main");
948    }
949
950    #[test]
951    fn test_extract_field_accesses() {
952        let source = r#"
953fn process(user: &User) {
954    let name = user.name;
955    let age = user.age;
956}
957"#;
958        let result = SemanticExtractor::extract(source, "rust", None);
959        assert!(result.is_ok());
960        let analysis = result.unwrap();
961        assert!(!analysis.field_accesses.is_empty());
962        assert!(
963            analysis
964                .field_accesses
965                .iter()
966                .any(|fa| fa.object == "user" && fa.field == "name")
967        );
968        assert_eq!(analysis.field_accesses[0].scope, "process");
969    }
970
971    #[test]
972    fn test_ast_recursion_limit_zero_is_unlimited() {
973        let source = r#"fn hello() -> u32 { 42 }"#;
974        let result_none = SemanticExtractor::extract(source, "rust", None);
975        let result_zero = SemanticExtractor::extract(source, "rust", Some(0));
976        assert!(result_none.is_ok(), "extract with None failed");
977        assert!(result_zero.is_ok(), "extract with Some(0) failed");
978        let analysis_none = result_none.unwrap();
979        let analysis_zero = result_zero.unwrap();
980        assert!(
981            analysis_none.functions.len() >= 1,
982            "extract with None should find at least one function in the test source"
983        );
984        assert_eq!(
985            analysis_none.functions.len(),
986            analysis_zero.functions.len(),
987            "ast_recursion_limit=0 should behave identically to unset (unlimited)"
988        );
989    }
990}