Skip to main content

code_analyze_mcp/
parser.rs

1//! Tree-sitter-based parser for extracting semantic structure from source code.
2//!
3//! This module provides language-agnostic parsing using tree-sitter queries to extract
4//! functions, classes, imports, references, and other semantic elements from source files.
5//! Two main extractors handle different use cases:
6//!
7//! - [`ElementExtractor`]: Quick extraction of function and class counts.
8//! - [`SemanticExtractor`]: Detailed semantic analysis with calls, imports, and references.
9
10use crate::languages::get_language_info;
11use crate::types::{
12    AssignmentInfo, CallInfo, ClassInfo, FieldAccessInfo, FunctionInfo, ImportInfo, ReferenceInfo,
13    ReferenceType, SemanticAnalysis,
14};
15use std::cell::RefCell;
16use std::collections::HashMap;
17use std::sync::LazyLock;
18use thiserror::Error;
19use tracing::instrument;
20use tree_sitter::{Node, Parser, Query, QueryCursor, StreamingIterator};
21
22#[derive(Debug, Error)]
23pub enum ParserError {
24    #[error("Unsupported language: {0}")]
25    UnsupportedLanguage(String),
26    #[error("Failed to parse file: {0}")]
27    ParseError(String),
28    #[error("Invalid UTF-8 in file")]
29    InvalidUtf8,
30    #[error("Query error: {0}")]
31    QueryError(String),
32}
33
34/// Compiled tree-sitter queries for a language.
35/// Stores all query types: mandatory (element, call) and optional (import, impl, reference).
36struct CompiledQueries {
37    element: Query,
38    call: Query,
39    import: Option<Query>,
40    impl_block: Option<Query>,
41    reference: Option<Query>,
42    assignment: Option<Query>,
43    field: Option<Query>,
44}
45
46/// Build compiled queries for a given language.
47fn build_compiled_queries(
48    lang_info: &crate::languages::LanguageInfo,
49) -> Result<CompiledQueries, ParserError> {
50    let element = Query::new(&lang_info.language, lang_info.element_query).map_err(|e| {
51        ParserError::QueryError(format!(
52            "Failed to compile element query for {}: {}",
53            lang_info.name, e
54        ))
55    })?;
56
57    let call = Query::new(&lang_info.language, lang_info.call_query).map_err(|e| {
58        ParserError::QueryError(format!(
59            "Failed to compile call query for {}: {}",
60            lang_info.name, e
61        ))
62    })?;
63
64    let import = if let Some(import_query_str) = lang_info.import_query {
65        Some(
66            Query::new(&lang_info.language, import_query_str).map_err(|e| {
67                ParserError::QueryError(format!(
68                    "Failed to compile import query for {}: {}",
69                    lang_info.name, e
70                ))
71            })?,
72        )
73    } else {
74        None
75    };
76
77    let impl_block = if let Some(impl_query_str) = lang_info.impl_query {
78        Some(
79            Query::new(&lang_info.language, impl_query_str).map_err(|e| {
80                ParserError::QueryError(format!(
81                    "Failed to compile impl query for {}: {}",
82                    lang_info.name, e
83                ))
84            })?,
85        )
86    } else {
87        None
88    };
89
90    let reference = if let Some(ref_query_str) = lang_info.reference_query {
91        Some(Query::new(&lang_info.language, ref_query_str).map_err(|e| {
92            ParserError::QueryError(format!(
93                "Failed to compile reference query for {}: {}",
94                lang_info.name, e
95            ))
96        })?)
97    } else {
98        None
99    };
100
101    let assignment = if let Some(assignment_query_str) = lang_info.assignment_query {
102        Some(
103            Query::new(&lang_info.language, assignment_query_str).map_err(|e| {
104                ParserError::QueryError(format!(
105                    "Failed to compile assignment query for {}: {}",
106                    lang_info.name, e
107                ))
108            })?,
109        )
110    } else {
111        None
112    };
113
114    let field = if let Some(field_query_str) = lang_info.field_query {
115        Some(
116            Query::new(&lang_info.language, field_query_str).map_err(|e| {
117                ParserError::QueryError(format!(
118                    "Failed to compile field query for {}: {}",
119                    lang_info.name, e
120                ))
121            })?,
122        )
123    } else {
124        None
125    };
126
127    Ok(CompiledQueries {
128        element,
129        call,
130        import,
131        impl_block,
132        reference,
133        assignment,
134        field,
135    })
136}
137
138/// Initialize the query cache with compiled queries for all supported languages.
139fn init_query_cache() -> HashMap<&'static str, CompiledQueries> {
140    let supported_languages = [
141        "rust",
142        "python",
143        "typescript",
144        "tsx",
145        "go",
146        "java",
147        "fortran",
148    ];
149    let mut cache = HashMap::new();
150
151    for lang_name in &supported_languages {
152        if let Some(lang_info) = get_language_info(lang_name) {
153            match build_compiled_queries(&lang_info) {
154                Ok(compiled) => {
155                    cache.insert(*lang_name, compiled);
156                }
157                Err(e) => {
158                    tracing::error!(
159                        "Failed to compile queries for language {}: {}",
160                        lang_name,
161                        e
162                    );
163                }
164            }
165        }
166    }
167
168    cache
169}
170
171/// Lazily initialized cache of compiled queries per language.
172static QUERY_CACHE: LazyLock<HashMap<&'static str, CompiledQueries>> =
173    LazyLock::new(init_query_cache);
174
175/// Get compiled queries for a language from the cache.
176fn get_compiled_queries(language: &str) -> Result<&'static CompiledQueries, ParserError> {
177    QUERY_CACHE
178        .get(language)
179        .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))
180}
181
182thread_local! {
183    static PARSER: RefCell<Parser> = RefCell::new(Parser::new());
184}
185
186/// Canonical API for extracting element counts from source code.
187pub struct ElementExtractor;
188
189impl ElementExtractor {
190    /// Extract function and class counts from source code.
191    ///
192    /// # Errors
193    ///
194    /// Returns `ParserError::UnsupportedLanguage` if the language is not recognized.
195    /// Returns `ParserError::ParseError` if the source code cannot be parsed.
196    /// Returns `ParserError::QueryError` if the tree-sitter query fails.
197    #[instrument(skip_all, fields(language))]
198    pub fn extract_with_depth(source: &str, language: &str) -> Result<(usize, usize), ParserError> {
199        let lang_info = get_language_info(language)
200            .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))?;
201
202        let tree = PARSER.with(|p| {
203            let mut parser = p.borrow_mut();
204            parser
205                .set_language(&lang_info.language)
206                .map_err(|e| ParserError::ParseError(format!("Failed to set language: {}", e)))?;
207            parser
208                .parse(source, None)
209                .ok_or_else(|| ParserError::ParseError("Failed to parse".to_string()))
210        })?;
211
212        let compiled = get_compiled_queries(language)?;
213
214        let mut cursor = QueryCursor::new();
215        let mut function_count = 0;
216        let mut class_count = 0;
217
218        let mut matches = cursor.matches(&compiled.element, tree.root_node(), source.as_bytes());
219        while let Some(mat) = matches.next() {
220            for capture in mat.captures {
221                let capture_name = compiled.element.capture_names()[capture.index as usize];
222                match capture_name {
223                    "function" => function_count += 1,
224                    "class" => class_count += 1,
225                    _ => {}
226                }
227            }
228        }
229
230        tracing::debug!(language = %language, functions = function_count, classes = class_count, "parse complete");
231
232        Ok((function_count, class_count))
233    }
234}
235
236/// Recursively extract `ImportInfo` entries from a use-clause node, respecting all Rust
237/// use-declaration forms (`scoped_identifier`, `scoped_use_list`, `use_list`,
238/// `use_as_clause`, `use_wildcard`, bare `identifier`).
239fn extract_imports_from_node(
240    node: &Node,
241    source: &str,
242    prefix: &str,
243    line: usize,
244    imports: &mut Vec<ImportInfo>,
245) {
246    match node.kind() {
247        // Simple identifier: `use foo;` or an item inside `{foo, bar}`
248        "identifier" | "self" | "super" | "crate" => {
249            let name = source[node.start_byte()..node.end_byte()].to_string();
250            imports.push(ImportInfo {
251                module: prefix.to_string(),
252                items: vec![name],
253                line,
254            });
255        }
256        // Qualified path: `std::collections::HashMap`
257        "scoped_identifier" => {
258            let item = node
259                .child_by_field_name("name")
260                .map(|n| source[n.start_byte()..n.end_byte()].to_string())
261                .unwrap_or_default();
262            let module = node
263                .child_by_field_name("path")
264                .map(|p| {
265                    let path_text = source[p.start_byte()..p.end_byte()].to_string();
266                    if prefix.is_empty() {
267                        path_text
268                    } else {
269                        format!("{}::{}", prefix, path_text)
270                    }
271                })
272                .unwrap_or_else(|| prefix.to_string());
273            if !item.is_empty() {
274                imports.push(ImportInfo {
275                    module,
276                    items: vec![item],
277                    line,
278                });
279            }
280        }
281        // `std::{io, fs}` — path prefix followed by a brace list
282        "scoped_use_list" => {
283            let new_prefix = node
284                .child_by_field_name("path")
285                .map(|p| {
286                    let path_text = source[p.start_byte()..p.end_byte()].to_string();
287                    if prefix.is_empty() {
288                        path_text
289                    } else {
290                        format!("{}::{}", prefix, path_text)
291                    }
292                })
293                .unwrap_or_else(|| prefix.to_string());
294            if let Some(list) = node.child_by_field_name("list") {
295                extract_imports_from_node(&list, source, &new_prefix, line, imports);
296            }
297        }
298        // `{HashMap, HashSet}` — brace-enclosed list of items
299        "use_list" => {
300            let mut cursor = node.walk();
301            for child in node.children(&mut cursor) {
302                match child.kind() {
303                    "{" | "}" | "," => {}
304                    _ => extract_imports_from_node(&child, source, prefix, line, imports),
305                }
306            }
307        }
308        // `std::io::*` — glob import
309        "use_wildcard" => {
310            let text = source[node.start_byte()..node.end_byte()].to_string();
311            let module = if let Some(stripped) = text.strip_suffix("::*") {
312                if prefix.is_empty() {
313                    stripped.to_string()
314                } else {
315                    format!("{}::{}", prefix, stripped)
316                }
317            } else {
318                prefix.to_string()
319            };
320            imports.push(ImportInfo {
321                module,
322                items: vec!["*".to_string()],
323                line,
324            });
325        }
326        // `io as stdio` or `std::io as stdio`
327        "use_as_clause" => {
328            let alias = node
329                .child_by_field_name("alias")
330                .map(|n| source[n.start_byte()..n.end_byte()].to_string())
331                .unwrap_or_default();
332            let module = if let Some(path_node) = node.child_by_field_name("path") {
333                match path_node.kind() {
334                    "scoped_identifier" => path_node
335                        .child_by_field_name("path")
336                        .map(|p| {
337                            let p_text = source[p.start_byte()..p.end_byte()].to_string();
338                            if prefix.is_empty() {
339                                p_text
340                            } else {
341                                format!("{}::{}", prefix, p_text)
342                            }
343                        })
344                        .unwrap_or_else(|| prefix.to_string()),
345                    _ => prefix.to_string(),
346                }
347            } else {
348                prefix.to_string()
349            };
350            if !alias.is_empty() {
351                imports.push(ImportInfo {
352                    module,
353                    items: vec![alias],
354                    line,
355                });
356            }
357        }
358        // Python import_from_statement: `from module import name` or `from . import *`
359        "import_from_statement" => {
360            extract_python_import_from(node, source, line, imports);
361        }
362        // Fallback for non-Rust import nodes: capture full text as module
363        _ => {
364            let text = source[node.start_byte()..node.end_byte()]
365                .trim()
366                .to_string();
367            if !text.is_empty() {
368                imports.push(ImportInfo {
369                    module: text,
370                    items: vec![],
371                    line,
372                });
373            }
374        }
375    }
376}
377
378/// Extract an item name from a dotted_name or aliased_import child node.
379fn extract_import_item_name(child: &Node, source: &str) -> Option<String> {
380    match child.kind() {
381        "dotted_name" => {
382            let name = source[child.start_byte()..child.end_byte()]
383                .trim()
384                .to_string();
385            if name.is_empty() { None } else { Some(name) }
386        }
387        "aliased_import" => child.child_by_field_name("name").and_then(|n| {
388            let name = source[n.start_byte()..n.end_byte()].trim().to_string();
389            if name.is_empty() { None } else { Some(name) }
390        }),
391        _ => None,
392    }
393}
394
395/// Collect wildcard/named imports from an import_list node or from direct named children.
396fn collect_import_items(
397    node: &Node,
398    source: &str,
399    is_wildcard: &mut bool,
400    items: &mut Vec<String>,
401) {
402    // Prefer import_list child (wraps `from x import a, b`)
403    if let Some(import_list) = node.child_by_field_name("import_list") {
404        let mut cursor = import_list.walk();
405        for child in import_list.named_children(&mut cursor) {
406            if child.kind() == "wildcard_import" {
407                *is_wildcard = true;
408            } else if let Some(name) = extract_import_item_name(&child, source) {
409                items.push(name);
410            }
411        }
412        return;
413    }
414    // No import_list: single-name or wildcard as direct child (skip first named child = module_name)
415    let mut cursor = node.walk();
416    let mut first = true;
417    for child in node.named_children(&mut cursor) {
418        if first {
419            first = false;
420            continue;
421        }
422        if child.kind() == "wildcard_import" {
423            *is_wildcard = true;
424        } else if let Some(name) = extract_import_item_name(&child, source) {
425            items.push(name);
426        }
427    }
428}
429
430/// Handle Python `import_from_statement` node.
431fn extract_python_import_from(
432    node: &Node,
433    source: &str,
434    line: usize,
435    imports: &mut Vec<ImportInfo>,
436) {
437    let module = if let Some(m) = node.child_by_field_name("module_name") {
438        source[m.start_byte()..m.end_byte()].trim().to_string()
439    } else if let Some(r) = node.child_by_field_name("relative_import") {
440        source[r.start_byte()..r.end_byte()].trim().to_string()
441    } else {
442        String::new()
443    };
444
445    let mut is_wildcard = false;
446    let mut items = Vec::new();
447    collect_import_items(node, source, &mut is_wildcard, &mut items);
448
449    if !module.is_empty() {
450        imports.push(ImportInfo {
451            module,
452            items: if is_wildcard {
453                vec!["*".to_string()]
454            } else {
455                items
456            },
457            line,
458        });
459    }
460}
461
462pub struct SemanticExtractor;
463
464impl SemanticExtractor {
465    /// Extract semantic information from source code.
466    ///
467    /// # Errors
468    ///
469    /// Returns `ParserError::UnsupportedLanguage` if the language is not recognized.
470    /// Returns `ParserError::ParseError` if the source code cannot be parsed.
471    /// Returns `ParserError::QueryError` if the tree-sitter query fails.
472    #[instrument(skip_all, fields(language))]
473    pub fn extract(
474        source: &str,
475        language: &str,
476        ast_recursion_limit: Option<usize>,
477    ) -> Result<SemanticAnalysis, ParserError> {
478        let lang_info = get_language_info(language)
479            .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))?;
480
481        let tree = PARSER.with(|p| {
482            let mut parser = p.borrow_mut();
483            parser
484                .set_language(&lang_info.language)
485                .map_err(|e| ParserError::ParseError(format!("Failed to set language: {}", e)))?;
486            parser
487                .parse(source, None)
488                .ok_or_else(|| ParserError::ParseError("Failed to parse".to_string()))
489        })?;
490
491        // 0 is not a useful depth (visits root node only, returning zero results).
492        // Treat 0 as None (unlimited). See #339.
493        let max_depth: Option<u32> = ast_recursion_limit
494            .filter(|&limit| limit > 0)
495            .map(|limit| {
496                u32::try_from(limit).map_err(|_| {
497                    ParserError::ParseError(format!(
498                        "ast_recursion_limit {} exceeds maximum supported value {}",
499                        limit,
500                        u32::MAX
501                    ))
502                })
503            })
504            .transpose()?;
505
506        let compiled = get_compiled_queries(language)?;
507        let root = tree.root_node();
508
509        let mut functions = Vec::new();
510        let mut classes = Vec::new();
511        let mut imports = Vec::new();
512        let mut references = Vec::new();
513        let mut call_frequency = HashMap::new();
514        let mut calls = Vec::new();
515        let mut assignments: Vec<AssignmentInfo> = Vec::new();
516        let mut field_accesses: Vec<FieldAccessInfo> = Vec::new();
517
518        Self::extract_elements(
519            source,
520            compiled,
521            root,
522            max_depth,
523            &lang_info,
524            &mut functions,
525            &mut classes,
526        );
527        Self::extract_calls(
528            source,
529            compiled,
530            root,
531            max_depth,
532            &mut calls,
533            &mut call_frequency,
534        );
535        Self::extract_imports(source, compiled, root, max_depth, &mut imports);
536        Self::extract_impl_methods(source, compiled, root, max_depth, &mut classes);
537        Self::extract_references(source, compiled, root, max_depth, &mut references);
538        Self::extract_assignments(source, compiled, root, max_depth, &mut assignments);
539        Self::extract_field_accesses(source, compiled, root, max_depth, &mut field_accesses);
540
541        tracing::debug!(language = %language, functions = functions.len(), classes = classes.len(), imports = imports.len(), references = references.len(), calls = calls.len(), "extraction complete");
542
543        Ok(SemanticAnalysis {
544            functions,
545            classes,
546            imports,
547            references,
548            call_frequency,
549            calls,
550            assignments,
551            field_accesses,
552        })
553    }
554
555    fn extract_elements(
556        source: &str,
557        compiled: &CompiledQueries,
558        root: Node<'_>,
559        max_depth: Option<u32>,
560        lang_info: &crate::languages::LanguageInfo,
561        functions: &mut Vec<FunctionInfo>,
562        classes: &mut Vec<ClassInfo>,
563    ) {
564        let mut cursor = QueryCursor::new();
565        if let Some(depth) = max_depth {
566            cursor.set_max_start_depth(Some(depth));
567        }
568        let mut matches = cursor.matches(&compiled.element, root, source.as_bytes());
569        let mut seen_functions = std::collections::HashSet::new();
570
571        while let Some(mat) = matches.next() {
572            for capture in mat.captures {
573                let capture_name = compiled.element.capture_names()[capture.index as usize];
574                let node = capture.node;
575                match capture_name {
576                    "function" => {
577                        if let Some(name_node) = node.child_by_field_name("name") {
578                            let name =
579                                source[name_node.start_byte()..name_node.end_byte()].to_string();
580                            let func_key = (name.clone(), node.start_position().row);
581                            if !seen_functions.contains(&func_key) {
582                                seen_functions.insert(func_key);
583                                let params = node
584                                    .child_by_field_name("parameters")
585                                    .map(|p| source[p.start_byte()..p.end_byte()].to_string())
586                                    .unwrap_or_default();
587                                let return_type = node
588                                    .child_by_field_name("return_type")
589                                    .map(|r| source[r.start_byte()..r.end_byte()].to_string());
590                                functions.push(FunctionInfo {
591                                    name,
592                                    line: node.start_position().row + 1,
593                                    end_line: node.end_position().row + 1,
594                                    parameters: if params.is_empty() {
595                                        Vec::new()
596                                    } else {
597                                        vec![params]
598                                    },
599                                    return_type,
600                                });
601                            }
602                        }
603                    }
604                    "class" => {
605                        if let Some(name_node) = node.child_by_field_name("name") {
606                            let name =
607                                source[name_node.start_byte()..name_node.end_byte()].to_string();
608                            let inherits = if let Some(handler) = lang_info.extract_inheritance {
609                                handler(&node, source)
610                            } else {
611                                Vec::new()
612                            };
613                            classes.push(ClassInfo {
614                                name,
615                                line: node.start_position().row + 1,
616                                end_line: node.end_position().row + 1,
617                                methods: Vec::new(),
618                                fields: Vec::new(),
619                                inherits,
620                            });
621                        }
622                    }
623                    _ => {}
624                }
625            }
626        }
627    }
628
629    fn extract_calls(
630        source: &str,
631        compiled: &CompiledQueries,
632        root: Node<'_>,
633        max_depth: Option<u32>,
634        calls: &mut Vec<CallInfo>,
635        call_frequency: &mut HashMap<String, usize>,
636    ) {
637        let mut cursor = QueryCursor::new();
638        if let Some(depth) = max_depth {
639            cursor.set_max_start_depth(Some(depth));
640        }
641        let mut matches = cursor.matches(&compiled.call, root, source.as_bytes());
642
643        while let Some(mat) = matches.next() {
644            for capture in mat.captures {
645                let capture_name = compiled.call.capture_names()[capture.index as usize];
646                if capture_name != "call" {
647                    continue;
648                }
649                let node = capture.node;
650                let call_name = source[node.start_byte()..node.end_byte()].to_string();
651                *call_frequency.entry(call_name.clone()).or_insert(0) += 1;
652
653                let mut current = node;
654                let mut caller = "<module>".to_string();
655                while let Some(parent) = current.parent() {
656                    if parent.kind() == "function_item"
657                        && let Some(name_node) = parent.child_by_field_name("name")
658                    {
659                        caller = source[name_node.start_byte()..name_node.end_byte()].to_string();
660                        break;
661                    }
662                    current = parent;
663                }
664
665                let mut arg_count = None;
666                let mut arg_node = node;
667                while let Some(parent) = arg_node.parent() {
668                    if parent.kind() == "call_expression" {
669                        if let Some(args) = parent.child_by_field_name("arguments") {
670                            arg_count = Some(args.named_child_count());
671                        }
672                        break;
673                    }
674                    arg_node = parent;
675                }
676
677                calls.push(CallInfo {
678                    caller,
679                    callee: call_name,
680                    line: node.start_position().row + 1,
681                    column: node.start_position().column,
682                    arg_count,
683                });
684            }
685        }
686    }
687
688    fn extract_imports(
689        source: &str,
690        compiled: &CompiledQueries,
691        root: Node<'_>,
692        max_depth: Option<u32>,
693        imports: &mut Vec<ImportInfo>,
694    ) {
695        let Some(ref import_query) = compiled.import else {
696            return;
697        };
698        let mut cursor = QueryCursor::new();
699        if let Some(depth) = max_depth {
700            cursor.set_max_start_depth(Some(depth));
701        }
702        let mut matches = cursor.matches(import_query, root, source.as_bytes());
703
704        while let Some(mat) = matches.next() {
705            for capture in mat.captures {
706                let capture_name = import_query.capture_names()[capture.index as usize];
707                if capture_name == "import_path" {
708                    let node = capture.node;
709                    let line = node.start_position().row + 1;
710                    extract_imports_from_node(&node, source, "", line, imports);
711                }
712            }
713        }
714    }
715
716    fn extract_impl_methods(
717        source: &str,
718        compiled: &CompiledQueries,
719        root: Node<'_>,
720        max_depth: Option<u32>,
721        classes: &mut [ClassInfo],
722    ) {
723        let Some(ref impl_query) = compiled.impl_block else {
724            return;
725        };
726        let mut cursor = QueryCursor::new();
727        if let Some(depth) = max_depth {
728            cursor.set_max_start_depth(Some(depth));
729        }
730        let mut matches = cursor.matches(impl_query, root, source.as_bytes());
731
732        while let Some(mat) = matches.next() {
733            let mut impl_type_name = String::new();
734            let mut method_name = String::new();
735            let mut method_line = 0usize;
736            let mut method_end_line = 0usize;
737            let mut method_params = String::new();
738            let mut method_return_type: Option<String> = None;
739
740            for capture in mat.captures {
741                let capture_name = impl_query.capture_names()[capture.index as usize];
742                let node = capture.node;
743                match capture_name {
744                    "impl_type" => {
745                        impl_type_name = source[node.start_byte()..node.end_byte()].to_string();
746                    }
747                    "method_name" => {
748                        method_name = source[node.start_byte()..node.end_byte()].to_string();
749                    }
750                    "method_params" => {
751                        method_params = source[node.start_byte()..node.end_byte()].to_string();
752                    }
753                    "method" => {
754                        method_line = node.start_position().row + 1;
755                        method_end_line = node.end_position().row + 1;
756                        method_return_type = node
757                            .child_by_field_name("return_type")
758                            .map(|r| source[r.start_byte()..r.end_byte()].to_string());
759                    }
760                    _ => {}
761                }
762            }
763
764            if !impl_type_name.is_empty() && !method_name.is_empty() {
765                let func = FunctionInfo {
766                    name: method_name,
767                    line: method_line,
768                    end_line: method_end_line,
769                    parameters: if method_params.is_empty() {
770                        Vec::new()
771                    } else {
772                        vec![method_params]
773                    },
774                    return_type: method_return_type,
775                };
776                if let Some(class) = classes.iter_mut().find(|c| c.name == impl_type_name) {
777                    class.methods.push(func);
778                }
779            }
780        }
781    }
782
783    fn extract_references(
784        source: &str,
785        compiled: &CompiledQueries,
786        root: Node<'_>,
787        max_depth: Option<u32>,
788        references: &mut Vec<ReferenceInfo>,
789    ) {
790        let Some(ref ref_query) = compiled.reference else {
791            return;
792        };
793        let mut cursor = QueryCursor::new();
794        if let Some(depth) = max_depth {
795            cursor.set_max_start_depth(Some(depth));
796        }
797        let mut seen_refs = std::collections::HashSet::new();
798        let mut matches = cursor.matches(ref_query, root, source.as_bytes());
799
800        while let Some(mat) = matches.next() {
801            for capture in mat.captures {
802                let capture_name = ref_query.capture_names()[capture.index as usize];
803                if capture_name == "type_ref" {
804                    let node = capture.node;
805                    let type_ref = source[node.start_byte()..node.end_byte()].to_string();
806                    if seen_refs.insert(type_ref.clone()) {
807                        references.push(ReferenceInfo {
808                            symbol: type_ref,
809                            reference_type: ReferenceType::Usage,
810                            // location is intentionally empty here; set by the caller (analyze_file)
811                            location: String::new(),
812                            line: node.start_position().row + 1,
813                        });
814                    }
815                }
816            }
817        }
818    }
819
820    fn extract_assignments(
821        source: &str,
822        compiled: &CompiledQueries,
823        root: Node<'_>,
824        max_depth: Option<u32>,
825        assignments: &mut Vec<AssignmentInfo>,
826    ) {
827        let Some(ref assignment_query) = compiled.assignment else {
828            return;
829        };
830        let mut cursor = QueryCursor::new();
831        if let Some(depth) = max_depth {
832            cursor.set_max_start_depth(Some(depth));
833        }
834        let mut matches = cursor.matches(assignment_query, root, source.as_bytes());
835
836        while let Some(mat) = matches.next() {
837            let mut variable = String::new();
838            let mut value = String::new();
839            let mut line = 0usize;
840
841            for capture in mat.captures {
842                let capture_name = assignment_query.capture_names()[capture.index as usize];
843                let node = capture.node;
844                match capture_name {
845                    "variable" => {
846                        variable = source[node.start_byte()..node.end_byte()].to_string();
847                    }
848                    "value" => {
849                        value = source[node.start_byte()..node.end_byte()].to_string();
850                        line = node.start_position().row + 1;
851                    }
852                    _ => {}
853                }
854            }
855
856            if !variable.is_empty() && !value.is_empty() {
857                let mut current = mat.captures[0].node;
858                let mut scope = "global".to_string();
859                while let Some(parent) = current.parent() {
860                    if parent.kind() == "function_item"
861                        && let Some(name_node) = parent.child_by_field_name("name")
862                    {
863                        scope = source[name_node.start_byte()..name_node.end_byte()].to_string();
864                        break;
865                    }
866                    current = parent;
867                }
868                assignments.push(AssignmentInfo {
869                    variable,
870                    value,
871                    line,
872                    scope,
873                });
874            }
875        }
876    }
877
878    fn extract_field_accesses(
879        source: &str,
880        compiled: &CompiledQueries,
881        root: Node<'_>,
882        max_depth: Option<u32>,
883        field_accesses: &mut Vec<FieldAccessInfo>,
884    ) {
885        let Some(ref field_query) = compiled.field else {
886            return;
887        };
888        let mut cursor = QueryCursor::new();
889        if let Some(depth) = max_depth {
890            cursor.set_max_start_depth(Some(depth));
891        }
892        let mut matches = cursor.matches(field_query, root, source.as_bytes());
893
894        while let Some(mat) = matches.next() {
895            let mut object = String::new();
896            let mut field = String::new();
897            let mut line = 0usize;
898
899            for capture in mat.captures {
900                let capture_name = field_query.capture_names()[capture.index as usize];
901                let node = capture.node;
902                match capture_name {
903                    "object" => {
904                        object = source[node.start_byte()..node.end_byte()].to_string();
905                    }
906                    "field" => {
907                        field = source[node.start_byte()..node.end_byte()].to_string();
908                        line = node.start_position().row + 1;
909                    }
910                    _ => {}
911                }
912            }
913
914            if !object.is_empty() && !field.is_empty() {
915                let mut current = mat.captures[0].node;
916                let mut scope = "global".to_string();
917                while let Some(parent) = current.parent() {
918                    if parent.kind() == "function_item"
919                        && let Some(name_node) = parent.child_by_field_name("name")
920                    {
921                        scope = source[name_node.start_byte()..name_node.end_byte()].to_string();
922                        break;
923                    }
924                    current = parent;
925                }
926                field_accesses.push(FieldAccessInfo {
927                    object,
928                    field,
929                    line,
930                    scope,
931                });
932            }
933        }
934    }
935}
936
937#[cfg(test)]
938mod tests {
939    use super::*;
940
941    #[test]
942    fn test_extract_assignments() {
943        let source = r#"
944fn main() {
945    let x = 42;
946    let y = x + 1;
947}
948"#;
949        let result = SemanticExtractor::extract(source, "rust", None);
950        assert!(result.is_ok());
951        let analysis = result.unwrap();
952        assert!(!analysis.assignments.is_empty());
953        assert_eq!(analysis.assignments[0].variable, "x");
954        assert_eq!(analysis.assignments[0].value, "42");
955        assert_eq!(analysis.assignments[0].scope, "main");
956    }
957
958    #[test]
959    fn test_extract_field_accesses() {
960        let source = r#"
961fn process(user: &User) {
962    let name = user.name;
963    let age = user.age;
964}
965"#;
966        let result = SemanticExtractor::extract(source, "rust", None);
967        assert!(result.is_ok());
968        let analysis = result.unwrap();
969        assert!(!analysis.field_accesses.is_empty());
970        assert!(
971            analysis
972                .field_accesses
973                .iter()
974                .any(|fa| fa.object == "user" && fa.field == "name")
975        );
976        assert_eq!(analysis.field_accesses[0].scope, "process");
977    }
978
979    #[test]
980    fn test_ast_recursion_limit_zero_is_unlimited() {
981        let source = r#"fn hello() -> u32 { 42 }"#;
982        let result_none = SemanticExtractor::extract(source, "rust", None);
983        let result_zero = SemanticExtractor::extract(source, "rust", Some(0));
984        assert!(result_none.is_ok(), "extract with None failed");
985        assert!(result_zero.is_ok(), "extract with Some(0) failed");
986        let analysis_none = result_none.unwrap();
987        let analysis_zero = result_zero.unwrap();
988        assert!(
989            analysis_none.functions.len() >= 1,
990            "extract with None should find at least one function in the test source"
991        );
992        assert_eq!(
993            analysis_none.functions.len(),
994            analysis_zero.functions.len(),
995            "ast_recursion_limit=0 should behave identically to unset (unlimited)"
996        );
997    }
998}