Skip to main content

code_analyze_mcp/
parser.rs

1//! Tree-sitter-based parser for extracting semantic structure from source code.
2//!
3//! This module provides language-agnostic parsing using tree-sitter queries to extract
4//! functions, classes, imports, references, and other semantic elements from source files.
5//! Two main extractors handle different use cases:
6//!
7//! - [`ElementExtractor`]: Quick extraction of function and class counts.
8//! - [`SemanticExtractor`]: Detailed semantic analysis with calls, imports, and references.
9
10use crate::languages::get_language_info;
11use crate::types::{
12    AssignmentInfo, CallInfo, ClassInfo, FieldAccessInfo, FunctionInfo, ImportInfo, ReferenceInfo,
13    ReferenceType, SemanticAnalysis,
14};
15use std::cell::RefCell;
16use std::collections::HashMap;
17use std::sync::LazyLock;
18use thiserror::Error;
19use tracing::instrument;
20use tree_sitter::{Node, Parser, Query, QueryCursor, StreamingIterator};
21
22#[derive(Debug, Error)]
23pub enum ParserError {
24    #[error("Unsupported language: {0}")]
25    UnsupportedLanguage(String),
26    #[error("Failed to parse file: {0}")]
27    ParseError(String),
28    #[error("Invalid UTF-8 in file")]
29    InvalidUtf8,
30    #[error("Query error: {0}")]
31    QueryError(String),
32}
33
34/// Compiled tree-sitter queries for a language.
35/// Stores all query types: mandatory (element, call) and optional (import, impl, reference).
36struct CompiledQueries {
37    element: Query,
38    call: Query,
39    import: Option<Query>,
40    impl_block: Option<Query>,
41    reference: Option<Query>,
42    assignment: Option<Query>,
43    field: Option<Query>,
44}
45
46/// Build compiled queries for a given language.
47fn build_compiled_queries(
48    lang_info: &crate::languages::LanguageInfo,
49) -> Result<CompiledQueries, ParserError> {
50    let element = Query::new(&lang_info.language, lang_info.element_query).map_err(|e| {
51        ParserError::QueryError(format!(
52            "Failed to compile element query for {}: {}",
53            lang_info.name, e
54        ))
55    })?;
56
57    let call = Query::new(&lang_info.language, lang_info.call_query).map_err(|e| {
58        ParserError::QueryError(format!(
59            "Failed to compile call query for {}: {}",
60            lang_info.name, e
61        ))
62    })?;
63
64    let import = if let Some(import_query_str) = lang_info.import_query {
65        Some(
66            Query::new(&lang_info.language, import_query_str).map_err(|e| {
67                ParserError::QueryError(format!(
68                    "Failed to compile import query for {}: {}",
69                    lang_info.name, e
70                ))
71            })?,
72        )
73    } else {
74        None
75    };
76
77    let impl_block = if let Some(impl_query_str) = lang_info.impl_query {
78        Some(
79            Query::new(&lang_info.language, impl_query_str).map_err(|e| {
80                ParserError::QueryError(format!(
81                    "Failed to compile impl query for {}: {}",
82                    lang_info.name, e
83                ))
84            })?,
85        )
86    } else {
87        None
88    };
89
90    let reference = if let Some(ref_query_str) = lang_info.reference_query {
91        Some(Query::new(&lang_info.language, ref_query_str).map_err(|e| {
92            ParserError::QueryError(format!(
93                "Failed to compile reference query for {}: {}",
94                lang_info.name, e
95            ))
96        })?)
97    } else {
98        None
99    };
100
101    let assignment = if let Some(assignment_query_str) = lang_info.assignment_query {
102        Some(
103            Query::new(&lang_info.language, assignment_query_str).map_err(|e| {
104                ParserError::QueryError(format!(
105                    "Failed to compile assignment query for {}: {}",
106                    lang_info.name, e
107                ))
108            })?,
109        )
110    } else {
111        None
112    };
113
114    let field = if let Some(field_query_str) = lang_info.field_query {
115        Some(
116            Query::new(&lang_info.language, field_query_str).map_err(|e| {
117                ParserError::QueryError(format!(
118                    "Failed to compile field query for {}: {}",
119                    lang_info.name, e
120                ))
121            })?,
122        )
123    } else {
124        None
125    };
126
127    Ok(CompiledQueries {
128        element,
129        call,
130        import,
131        impl_block,
132        reference,
133        assignment,
134        field,
135    })
136}
137
138/// Initialize the query cache with compiled queries for all supported languages.
139fn init_query_cache() -> HashMap<&'static str, CompiledQueries> {
140    let supported_languages = [
141        "rust",
142        "python",
143        "typescript",
144        "tsx",
145        "go",
146        "java",
147        "fortran",
148    ];
149    let mut cache = HashMap::new();
150
151    for lang_name in &supported_languages {
152        if let Some(lang_info) = get_language_info(lang_name) {
153            match build_compiled_queries(&lang_info) {
154                Ok(compiled) => {
155                    cache.insert(*lang_name, compiled);
156                }
157                Err(e) => {
158                    tracing::error!(
159                        "Failed to compile queries for language {}: {}",
160                        lang_name,
161                        e
162                    );
163                }
164            }
165        }
166    }
167
168    cache
169}
170
171/// Lazily initialized cache of compiled queries per language.
172static QUERY_CACHE: LazyLock<HashMap<&'static str, CompiledQueries>> =
173    LazyLock::new(init_query_cache);
174
175/// Get compiled queries for a language from the cache.
176fn get_compiled_queries(language: &str) -> Result<&'static CompiledQueries, ParserError> {
177    QUERY_CACHE
178        .get(language)
179        .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))
180}
181
182thread_local! {
183    static PARSER: RefCell<Parser> = RefCell::new(Parser::new());
184}
185
186/// Canonical API for extracting element counts from source code.
187pub struct ElementExtractor;
188
189impl ElementExtractor {
190    /// Extract function and class counts from source code.
191    ///
192    /// # Errors
193    ///
194    /// Returns `ParserError::UnsupportedLanguage` if the language is not recognized.
195    /// Returns `ParserError::ParseError` if the source code cannot be parsed.
196    /// Returns `ParserError::QueryError` if the tree-sitter query fails.
197    #[instrument(skip_all, fields(language))]
198    pub fn extract_with_depth(source: &str, language: &str) -> Result<(usize, usize), ParserError> {
199        let lang_info = get_language_info(language)
200            .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))?;
201
202        let tree = PARSER.with(|p| {
203            let mut parser = p.borrow_mut();
204            parser
205                .set_language(&lang_info.language)
206                .map_err(|e| ParserError::ParseError(format!("Failed to set language: {}", e)))?;
207            parser
208                .parse(source, None)
209                .ok_or_else(|| ParserError::ParseError("Failed to parse".to_string()))
210        })?;
211
212        let compiled = get_compiled_queries(language)?;
213
214        let mut cursor = QueryCursor::new();
215        let mut function_count = 0;
216        let mut class_count = 0;
217
218        let mut matches = cursor.matches(&compiled.element, tree.root_node(), source.as_bytes());
219        while let Some(mat) = matches.next() {
220            for capture in mat.captures {
221                let capture_name = compiled.element.capture_names()[capture.index as usize];
222                match capture_name {
223                    "function" => function_count += 1,
224                    "class" => class_count += 1,
225                    _ => {}
226                }
227            }
228        }
229
230        tracing::debug!(language = %language, functions = function_count, classes = class_count, "parse complete");
231
232        Ok((function_count, class_count))
233    }
234}
235
236/// Recursively extract `ImportInfo` entries from a use-clause node, respecting all Rust
237/// use-declaration forms (`scoped_identifier`, `scoped_use_list`, `use_list`,
238/// `use_as_clause`, `use_wildcard`, bare `identifier`).
239fn extract_imports_from_node(
240    node: &Node,
241    source: &str,
242    prefix: &str,
243    line: usize,
244    imports: &mut Vec<ImportInfo>,
245) {
246    match node.kind() {
247        // Simple identifier: `use foo;` or an item inside `{foo, bar}`
248        "identifier" | "self" | "super" | "crate" => {
249            let name = source[node.start_byte()..node.end_byte()].to_string();
250            imports.push(ImportInfo {
251                module: prefix.to_string(),
252                items: vec![name],
253                line,
254            });
255        }
256        // Qualified path: `std::collections::HashMap`
257        "scoped_identifier" => {
258            let item = node
259                .child_by_field_name("name")
260                .map(|n| source[n.start_byte()..n.end_byte()].to_string())
261                .unwrap_or_default();
262            let module = node
263                .child_by_field_name("path")
264                .map(|p| {
265                    let path_text = source[p.start_byte()..p.end_byte()].to_string();
266                    if prefix.is_empty() {
267                        path_text
268                    } else {
269                        format!("{}::{}", prefix, path_text)
270                    }
271                })
272                .unwrap_or_else(|| prefix.to_string());
273            if !item.is_empty() {
274                imports.push(ImportInfo {
275                    module,
276                    items: vec![item],
277                    line,
278                });
279            }
280        }
281        // `std::{io, fs}` — path prefix followed by a brace list
282        "scoped_use_list" => {
283            let new_prefix = node
284                .child_by_field_name("path")
285                .map(|p| {
286                    let path_text = source[p.start_byte()..p.end_byte()].to_string();
287                    if prefix.is_empty() {
288                        path_text
289                    } else {
290                        format!("{}::{}", prefix, path_text)
291                    }
292                })
293                .unwrap_or_else(|| prefix.to_string());
294            if let Some(list) = node.child_by_field_name("list") {
295                extract_imports_from_node(&list, source, &new_prefix, line, imports);
296            }
297        }
298        // `{HashMap, HashSet}` — brace-enclosed list of items
299        "use_list" => {
300            let mut cursor = node.walk();
301            for child in node.children(&mut cursor) {
302                match child.kind() {
303                    "{" | "}" | "," => {}
304                    _ => extract_imports_from_node(&child, source, prefix, line, imports),
305                }
306            }
307        }
308        // `std::io::*` — glob import
309        "use_wildcard" => {
310            let text = source[node.start_byte()..node.end_byte()].to_string();
311            let module = if let Some(stripped) = text.strip_suffix("::*") {
312                if prefix.is_empty() {
313                    stripped.to_string()
314                } else {
315                    format!("{}::{}", prefix, stripped)
316                }
317            } else {
318                prefix.to_string()
319            };
320            imports.push(ImportInfo {
321                module,
322                items: vec!["*".to_string()],
323                line,
324            });
325        }
326        // `io as stdio` or `std::io as stdio`
327        "use_as_clause" => {
328            let alias = node
329                .child_by_field_name("alias")
330                .map(|n| source[n.start_byte()..n.end_byte()].to_string())
331                .unwrap_or_default();
332            let module = if let Some(path_node) = node.child_by_field_name("path") {
333                match path_node.kind() {
334                    "scoped_identifier" => path_node
335                        .child_by_field_name("path")
336                        .map(|p| {
337                            let p_text = source[p.start_byte()..p.end_byte()].to_string();
338                            if prefix.is_empty() {
339                                p_text
340                            } else {
341                                format!("{}::{}", prefix, p_text)
342                            }
343                        })
344                        .unwrap_or_else(|| prefix.to_string()),
345                    _ => prefix.to_string(),
346                }
347            } else {
348                prefix.to_string()
349            };
350            if !alias.is_empty() {
351                imports.push(ImportInfo {
352                    module,
353                    items: vec![alias],
354                    line,
355                });
356            }
357        }
358        // Python import_from_statement: `from module import name` or `from . import *`
359        "import_from_statement" => {
360            extract_python_import_from(node, source, line, imports);
361        }
362        // Fallback for non-Rust import nodes: capture full text as module
363        _ => {
364            let text = source[node.start_byte()..node.end_byte()]
365                .trim()
366                .to_string();
367            if !text.is_empty() {
368                imports.push(ImportInfo {
369                    module: text,
370                    items: vec![],
371                    line,
372                });
373            }
374        }
375    }
376}
377
378/// Extract an item name from a dotted_name or aliased_import child node.
379fn extract_import_item_name(child: &Node, source: &str) -> Option<String> {
380    match child.kind() {
381        "dotted_name" => {
382            let name = source[child.start_byte()..child.end_byte()]
383                .trim()
384                .to_string();
385            if name.is_empty() { None } else { Some(name) }
386        }
387        "aliased_import" => child.child_by_field_name("name").and_then(|n| {
388            let name = source[n.start_byte()..n.end_byte()].trim().to_string();
389            if name.is_empty() { None } else { Some(name) }
390        }),
391        _ => None,
392    }
393}
394
395/// Collect wildcard/named imports from an import_list node or from direct named children.
396fn collect_import_items(
397    node: &Node,
398    source: &str,
399    is_wildcard: &mut bool,
400    items: &mut Vec<String>,
401) {
402    // Prefer import_list child (wraps `from x import a, b`)
403    if let Some(import_list) = node.child_by_field_name("import_list") {
404        let mut cursor = import_list.walk();
405        for child in import_list.named_children(&mut cursor) {
406            if child.kind() == "wildcard_import" {
407                *is_wildcard = true;
408            } else if let Some(name) = extract_import_item_name(&child, source) {
409                items.push(name);
410            }
411        }
412        return;
413    }
414    // No import_list: single-name or wildcard as direct child (skip first named child = module_name)
415    let mut cursor = node.walk();
416    let mut first = true;
417    for child in node.named_children(&mut cursor) {
418        if first {
419            first = false;
420            continue;
421        }
422        if child.kind() == "wildcard_import" {
423            *is_wildcard = true;
424        } else if let Some(name) = extract_import_item_name(&child, source) {
425            items.push(name);
426        }
427    }
428}
429
430/// Handle Python `import_from_statement` node.
431fn extract_python_import_from(
432    node: &Node,
433    source: &str,
434    line: usize,
435    imports: &mut Vec<ImportInfo>,
436) {
437    let module = if let Some(m) = node.child_by_field_name("module_name") {
438        source[m.start_byte()..m.end_byte()].trim().to_string()
439    } else if let Some(r) = node.child_by_field_name("relative_import") {
440        source[r.start_byte()..r.end_byte()].trim().to_string()
441    } else {
442        String::new()
443    };
444
445    let mut is_wildcard = false;
446    let mut items = Vec::new();
447    collect_import_items(node, source, &mut is_wildcard, &mut items);
448
449    if !module.is_empty() {
450        imports.push(ImportInfo {
451            module,
452            items: if is_wildcard {
453                vec!["*".to_string()]
454            } else {
455                items
456            },
457            line,
458        });
459    }
460}
461
462pub struct SemanticExtractor;
463
464impl SemanticExtractor {
465    /// Extract semantic information from source code.
466    ///
467    /// # Errors
468    ///
469    /// Returns `ParserError::UnsupportedLanguage` if the language is not recognized.
470    /// Returns `ParserError::ParseError` if the source code cannot be parsed.
471    /// Returns `ParserError::QueryError` if the tree-sitter query fails.
472    #[instrument(skip_all, fields(language))]
473    pub fn extract(
474        source: &str,
475        language: &str,
476        ast_recursion_limit: Option<usize>,
477    ) -> Result<SemanticAnalysis, ParserError> {
478        let lang_info = get_language_info(language)
479            .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))?;
480
481        let tree = PARSER.with(|p| {
482            let mut parser = p.borrow_mut();
483            parser
484                .set_language(&lang_info.language)
485                .map_err(|e| ParserError::ParseError(format!("Failed to set language: {}", e)))?;
486            parser
487                .parse(source, None)
488                .ok_or_else(|| ParserError::ParseError("Failed to parse".to_string()))
489        })?;
490
491        // 0 is not a useful depth (visits root node only, returning zero results).
492        // Treat 0 as None (unlimited). See #339.
493        let max_depth: Option<u32> = ast_recursion_limit
494            .filter(|&limit| limit > 0)
495            .map(|limit| {
496                u32::try_from(limit).map_err(|_| {
497                    ParserError::ParseError(format!(
498                        "ast_recursion_limit {} exceeds maximum supported value {}",
499                        limit,
500                        u32::MAX
501                    ))
502                })
503            })
504            .transpose()?;
505
506        let compiled = get_compiled_queries(language)?;
507        let root = tree.root_node();
508
509        let mut functions = Vec::new();
510        let mut classes = Vec::new();
511        let mut imports = Vec::new();
512        let mut references = Vec::new();
513        let mut call_frequency = HashMap::new();
514        let mut calls = Vec::new();
515        let mut assignments: Vec<AssignmentInfo> = Vec::new();
516        let mut field_accesses: Vec<FieldAccessInfo> = Vec::new();
517
518        Self::extract_elements(
519            source,
520            compiled,
521            root,
522            max_depth,
523            &lang_info,
524            &mut functions,
525            &mut classes,
526        );
527        Self::extract_calls(
528            source,
529            compiled,
530            root,
531            max_depth,
532            &mut calls,
533            &mut call_frequency,
534        );
535        Self::extract_imports(source, compiled, root, max_depth, &mut imports);
536        Self::extract_impl_methods(source, compiled, root, max_depth, &mut classes);
537        Self::extract_references(source, compiled, root, max_depth, &mut references);
538        Self::extract_assignments(source, compiled, root, max_depth, &mut assignments);
539        Self::extract_field_accesses(source, compiled, root, max_depth, &mut field_accesses);
540
541        tracing::debug!(language = %language, functions = functions.len(), classes = classes.len(), imports = imports.len(), references = references.len(), calls = calls.len(), "extraction complete");
542
543        Ok(SemanticAnalysis {
544            functions,
545            classes,
546            imports,
547            references,
548            call_frequency,
549            calls,
550            assignments,
551            field_accesses,
552        })
553    }
554
555    fn extract_elements(
556        source: &str,
557        compiled: &CompiledQueries,
558        root: Node<'_>,
559        max_depth: Option<u32>,
560        lang_info: &crate::languages::LanguageInfo,
561        functions: &mut Vec<FunctionInfo>,
562        classes: &mut Vec<ClassInfo>,
563    ) {
564        let mut cursor = QueryCursor::new();
565        if let Some(depth) = max_depth {
566            cursor.set_max_start_depth(Some(depth));
567        }
568        let mut matches = cursor.matches(&compiled.element, root, source.as_bytes());
569        let mut seen_functions = std::collections::HashSet::new();
570
571        while let Some(mat) = matches.next() {
572            for capture in mat.captures {
573                let capture_name = compiled.element.capture_names()[capture.index as usize];
574                let node = capture.node;
575                match capture_name {
576                    "function" => {
577                        if let Some(name_node) = node.child_by_field_name("name") {
578                            let name =
579                                source[name_node.start_byte()..name_node.end_byte()].to_string();
580                            let func_key = (name.clone(), node.start_position().row);
581                            if !seen_functions.contains(&func_key) {
582                                seen_functions.insert(func_key);
583                                let params = node
584                                    .child_by_field_name("parameters")
585                                    .map(|p| source[p.start_byte()..p.end_byte()].to_string())
586                                    .unwrap_or_default();
587                                let return_type = node
588                                    .child_by_field_name("return_type")
589                                    .map(|r| source[r.start_byte()..r.end_byte()].to_string());
590                                functions.push(FunctionInfo {
591                                    name,
592                                    line: node.start_position().row + 1,
593                                    end_line: node.end_position().row + 1,
594                                    parameters: if params.is_empty() {
595                                        Vec::new()
596                                    } else {
597                                        vec![params]
598                                    },
599                                    return_type,
600                                });
601                            }
602                        }
603                    }
604                    "class" => {
605                        if let Some(name_node) = node.child_by_field_name("name") {
606                            let name =
607                                source[name_node.start_byte()..name_node.end_byte()].to_string();
608                            let inherits = if let Some(handler) = lang_info.extract_inheritance {
609                                handler(&node, source)
610                            } else {
611                                Vec::new()
612                            };
613                            classes.push(ClassInfo {
614                                name,
615                                line: node.start_position().row + 1,
616                                end_line: node.end_position().row + 1,
617                                methods: Vec::new(),
618                                fields: Vec::new(),
619                                inherits,
620                            });
621                        }
622                    }
623                    _ => {}
624                }
625            }
626        }
627    }
628
629    /// Returns the name of the enclosing function/method/subroutine for a given AST node,
630    /// by walking ancestors and matching all language-specific function container kinds.
631    fn enclosing_function_name(mut node: tree_sitter::Node<'_>, source: &str) -> Option<String> {
632        while let Some(parent) = node.parent() {
633            let name_node = match parent.kind() {
634                // Direct name field: Rust, Python, Go, Java, TypeScript/TSX
635                "function_item"
636                | "method_item"
637                | "function_definition"
638                | "function_declaration"
639                | "method_declaration"
640                | "method_definition" => parent.child_by_field_name("name"),
641                // Fortran subroutine: name is inside subroutine_statement child
642                "subroutine" => {
643                    let mut cursor = parent.walk();
644                    parent
645                        .children(&mut cursor)
646                        .find(|c| c.kind() == "subroutine_statement")
647                        .and_then(|s| s.child_by_field_name("name"))
648                }
649                // Fortran function: name is inside function_statement child
650                "function" => {
651                    let mut cursor = parent.walk();
652                    parent
653                        .children(&mut cursor)
654                        .find(|c| c.kind() == "function_statement")
655                        .and_then(|s| s.child_by_field_name("name"))
656                }
657                _ => {
658                    node = parent;
659                    continue;
660                }
661            };
662            return name_node.map(|n| source[n.start_byte()..n.end_byte()].to_string());
663        }
664        None
665    }
666
667    fn extract_calls(
668        source: &str,
669        compiled: &CompiledQueries,
670        root: Node<'_>,
671        max_depth: Option<u32>,
672        calls: &mut Vec<CallInfo>,
673        call_frequency: &mut HashMap<String, usize>,
674    ) {
675        let mut cursor = QueryCursor::new();
676        if let Some(depth) = max_depth {
677            cursor.set_max_start_depth(Some(depth));
678        }
679        let mut matches = cursor.matches(&compiled.call, root, source.as_bytes());
680
681        while let Some(mat) = matches.next() {
682            for capture in mat.captures {
683                let capture_name = compiled.call.capture_names()[capture.index as usize];
684                if capture_name != "call" {
685                    continue;
686                }
687                let node = capture.node;
688                let call_name = source[node.start_byte()..node.end_byte()].to_string();
689                *call_frequency.entry(call_name.clone()).or_insert(0) += 1;
690
691                let caller = Self::enclosing_function_name(node, source)
692                    .unwrap_or_else(|| "<module>".to_string());
693
694                let mut arg_count = None;
695                let mut arg_node = node;
696                while let Some(parent) = arg_node.parent() {
697                    if parent.kind() == "call_expression" {
698                        if let Some(args) = parent.child_by_field_name("arguments") {
699                            arg_count = Some(args.named_child_count());
700                        }
701                        break;
702                    }
703                    arg_node = parent;
704                }
705
706                calls.push(CallInfo {
707                    caller,
708                    callee: call_name,
709                    line: node.start_position().row + 1,
710                    column: node.start_position().column,
711                    arg_count,
712                });
713            }
714        }
715    }
716
717    fn extract_imports(
718        source: &str,
719        compiled: &CompiledQueries,
720        root: Node<'_>,
721        max_depth: Option<u32>,
722        imports: &mut Vec<ImportInfo>,
723    ) {
724        let Some(ref import_query) = compiled.import else {
725            return;
726        };
727        let mut cursor = QueryCursor::new();
728        if let Some(depth) = max_depth {
729            cursor.set_max_start_depth(Some(depth));
730        }
731        let mut matches = cursor.matches(import_query, root, source.as_bytes());
732
733        while let Some(mat) = matches.next() {
734            for capture in mat.captures {
735                let capture_name = import_query.capture_names()[capture.index as usize];
736                if capture_name == "import_path" {
737                    let node = capture.node;
738                    let line = node.start_position().row + 1;
739                    extract_imports_from_node(&node, source, "", line, imports);
740                }
741            }
742        }
743    }
744
745    fn extract_impl_methods(
746        source: &str,
747        compiled: &CompiledQueries,
748        root: Node<'_>,
749        max_depth: Option<u32>,
750        classes: &mut [ClassInfo],
751    ) {
752        let Some(ref impl_query) = compiled.impl_block else {
753            return;
754        };
755        let mut cursor = QueryCursor::new();
756        if let Some(depth) = max_depth {
757            cursor.set_max_start_depth(Some(depth));
758        }
759        let mut matches = cursor.matches(impl_query, root, source.as_bytes());
760
761        while let Some(mat) = matches.next() {
762            let mut impl_type_name = String::new();
763            let mut method_name = String::new();
764            let mut method_line = 0usize;
765            let mut method_end_line = 0usize;
766            let mut method_params = String::new();
767            let mut method_return_type: Option<String> = None;
768
769            for capture in mat.captures {
770                let capture_name = impl_query.capture_names()[capture.index as usize];
771                let node = capture.node;
772                match capture_name {
773                    "impl_type" => {
774                        impl_type_name = source[node.start_byte()..node.end_byte()].to_string();
775                    }
776                    "method_name" => {
777                        method_name = source[node.start_byte()..node.end_byte()].to_string();
778                    }
779                    "method_params" => {
780                        method_params = source[node.start_byte()..node.end_byte()].to_string();
781                    }
782                    "method" => {
783                        method_line = node.start_position().row + 1;
784                        method_end_line = node.end_position().row + 1;
785                        method_return_type = node
786                            .child_by_field_name("return_type")
787                            .map(|r| source[r.start_byte()..r.end_byte()].to_string());
788                    }
789                    _ => {}
790                }
791            }
792
793            if !impl_type_name.is_empty() && !method_name.is_empty() {
794                let func = FunctionInfo {
795                    name: method_name,
796                    line: method_line,
797                    end_line: method_end_line,
798                    parameters: if method_params.is_empty() {
799                        Vec::new()
800                    } else {
801                        vec![method_params]
802                    },
803                    return_type: method_return_type,
804                };
805                if let Some(class) = classes.iter_mut().find(|c| c.name == impl_type_name) {
806                    class.methods.push(func);
807                }
808            }
809        }
810    }
811
812    fn extract_references(
813        source: &str,
814        compiled: &CompiledQueries,
815        root: Node<'_>,
816        max_depth: Option<u32>,
817        references: &mut Vec<ReferenceInfo>,
818    ) {
819        let Some(ref ref_query) = compiled.reference else {
820            return;
821        };
822        let mut cursor = QueryCursor::new();
823        if let Some(depth) = max_depth {
824            cursor.set_max_start_depth(Some(depth));
825        }
826        let mut seen_refs = std::collections::HashSet::new();
827        let mut matches = cursor.matches(ref_query, root, source.as_bytes());
828
829        while let Some(mat) = matches.next() {
830            for capture in mat.captures {
831                let capture_name = ref_query.capture_names()[capture.index as usize];
832                if capture_name == "type_ref" {
833                    let node = capture.node;
834                    let type_ref = source[node.start_byte()..node.end_byte()].to_string();
835                    if seen_refs.insert(type_ref.clone()) {
836                        references.push(ReferenceInfo {
837                            symbol: type_ref,
838                            reference_type: ReferenceType::Usage,
839                            // location is intentionally empty here; set by the caller (analyze_file)
840                            location: String::new(),
841                            line: node.start_position().row + 1,
842                        });
843                    }
844                }
845            }
846        }
847    }
848
849    fn extract_assignments(
850        source: &str,
851        compiled: &CompiledQueries,
852        root: Node<'_>,
853        max_depth: Option<u32>,
854        assignments: &mut Vec<AssignmentInfo>,
855    ) {
856        let Some(ref assignment_query) = compiled.assignment else {
857            return;
858        };
859        let mut cursor = QueryCursor::new();
860        if let Some(depth) = max_depth {
861            cursor.set_max_start_depth(Some(depth));
862        }
863        let mut matches = cursor.matches(assignment_query, root, source.as_bytes());
864
865        while let Some(mat) = matches.next() {
866            let mut variable = String::new();
867            let mut value = String::new();
868            let mut line = 0usize;
869
870            for capture in mat.captures {
871                let capture_name = assignment_query.capture_names()[capture.index as usize];
872                let node = capture.node;
873                match capture_name {
874                    "variable" => {
875                        variable = source[node.start_byte()..node.end_byte()].to_string();
876                    }
877                    "value" => {
878                        value = source[node.start_byte()..node.end_byte()].to_string();
879                        line = node.start_position().row + 1;
880                    }
881                    _ => {}
882                }
883            }
884
885            if !variable.is_empty() && !value.is_empty() {
886                let scope = Self::enclosing_function_name(mat.captures[0].node, source)
887                    .unwrap_or_else(|| "global".to_string());
888                assignments.push(AssignmentInfo {
889                    variable,
890                    value,
891                    line,
892                    scope,
893                });
894            }
895        }
896    }
897
898    fn extract_field_accesses(
899        source: &str,
900        compiled: &CompiledQueries,
901        root: Node<'_>,
902        max_depth: Option<u32>,
903        field_accesses: &mut Vec<FieldAccessInfo>,
904    ) {
905        let Some(ref field_query) = compiled.field else {
906            return;
907        };
908        let mut cursor = QueryCursor::new();
909        if let Some(depth) = max_depth {
910            cursor.set_max_start_depth(Some(depth));
911        }
912        let mut matches = cursor.matches(field_query, root, source.as_bytes());
913
914        while let Some(mat) = matches.next() {
915            let mut object = String::new();
916            let mut field = String::new();
917            let mut line = 0usize;
918
919            for capture in mat.captures {
920                let capture_name = field_query.capture_names()[capture.index as usize];
921                let node = capture.node;
922                match capture_name {
923                    "object" => {
924                        object = source[node.start_byte()..node.end_byte()].to_string();
925                    }
926                    "field" => {
927                        field = source[node.start_byte()..node.end_byte()].to_string();
928                        line = node.start_position().row + 1;
929                    }
930                    _ => {}
931                }
932            }
933
934            if !object.is_empty() && !field.is_empty() {
935                let scope = Self::enclosing_function_name(mat.captures[0].node, source)
936                    .unwrap_or_else(|| "global".to_string());
937                field_accesses.push(FieldAccessInfo {
938                    object,
939                    field,
940                    line,
941                    scope,
942                });
943            }
944        }
945    }
946}
947
948#[cfg(test)]
949mod tests {
950    use super::*;
951
952    #[test]
953    fn test_extract_assignments() {
954        let source = r#"
955fn main() {
956    let x = 42;
957    let y = x + 1;
958}
959"#;
960        let result = SemanticExtractor::extract(source, "rust", None);
961        assert!(result.is_ok());
962        let analysis = result.unwrap();
963        assert!(!analysis.assignments.is_empty());
964        assert_eq!(analysis.assignments[0].variable, "x");
965        assert_eq!(analysis.assignments[0].value, "42");
966        assert_eq!(analysis.assignments[0].scope, "main");
967    }
968
969    #[test]
970    fn test_extract_field_accesses() {
971        let source = r#"
972fn process(user: &User) {
973    let name = user.name;
974    let age = user.age;
975}
976"#;
977        let result = SemanticExtractor::extract(source, "rust", None);
978        assert!(result.is_ok());
979        let analysis = result.unwrap();
980        assert!(!analysis.field_accesses.is_empty());
981        assert!(
982            analysis
983                .field_accesses
984                .iter()
985                .any(|fa| fa.object == "user" && fa.field == "name")
986        );
987        assert_eq!(analysis.field_accesses[0].scope, "process");
988    }
989
990    #[test]
991    fn test_ast_recursion_limit_zero_is_unlimited() {
992        let source = r#"fn hello() -> u32 { 42 }"#;
993        let result_none = SemanticExtractor::extract(source, "rust", None);
994        let result_zero = SemanticExtractor::extract(source, "rust", Some(0));
995        assert!(result_none.is_ok(), "extract with None failed");
996        assert!(result_zero.is_ok(), "extract with Some(0) failed");
997        let analysis_none = result_none.unwrap();
998        let analysis_zero = result_zero.unwrap();
999        assert!(
1000            analysis_none.functions.len() >= 1,
1001            "extract with None should find at least one function in the test source"
1002        );
1003        assert_eq!(
1004            analysis_none.functions.len(),
1005            analysis_zero.functions.len(),
1006            "ast_recursion_limit=0 should behave identically to unset (unlimited)"
1007        );
1008    }
1009}