Skip to main content

code_analyze_mcp/
parser.rs

1//! Tree-sitter-based parser for extracting semantic structure from source code.
2//!
3//! This module provides language-agnostic parsing using tree-sitter queries to extract
4//! functions, classes, imports, references, and other semantic elements from source files.
5//! Two main extractors handle different use cases:
6//!
7//! - [`ElementExtractor`]: Quick extraction of function and class counts.
8//! - [`SemanticExtractor`]: Detailed semantic analysis with calls, imports, and references.
9
10use crate::languages::get_language_info;
11use crate::types::{
12    CallInfo, ClassInfo, FunctionInfo, ImplTraitInfo, ImportInfo, ReferenceInfo, ReferenceType,
13    SemanticAnalysis,
14};
15use std::cell::RefCell;
16use std::collections::HashMap;
17use std::path::Path;
18use std::sync::LazyLock;
19use thiserror::Error;
20use tracing::instrument;
21use tree_sitter::{Node, Parser, Query, QueryCursor, StreamingIterator};
22
23#[derive(Debug, Error)]
24pub enum ParserError {
25    #[error("Unsupported language: {0}")]
26    UnsupportedLanguage(String),
27    #[error("Failed to parse file: {0}")]
28    ParseError(String),
29    #[error("Invalid UTF-8 in file")]
30    InvalidUtf8,
31    #[error("Query error: {0}")]
32    QueryError(String),
33}
34
35/// Compiled tree-sitter queries for a language.
36/// Stores all query types: mandatory (element, call) and optional (import, impl, reference).
37struct CompiledQueries {
38    element: Query,
39    call: Query,
40    import: Option<Query>,
41    impl_block: Option<Query>,
42    reference: Option<Query>,
43    impl_trait: Option<Query>,
44}
45
46/// Build compiled queries for a given language.
47fn build_compiled_queries(
48    lang_info: &crate::languages::LanguageInfo,
49) -> Result<CompiledQueries, ParserError> {
50    let element = Query::new(&lang_info.language, lang_info.element_query).map_err(|e| {
51        ParserError::QueryError(format!(
52            "Failed to compile element query for {}: {}",
53            lang_info.name, e
54        ))
55    })?;
56
57    let call = Query::new(&lang_info.language, lang_info.call_query).map_err(|e| {
58        ParserError::QueryError(format!(
59            "Failed to compile call query for {}: {}",
60            lang_info.name, e
61        ))
62    })?;
63
64    let import = if let Some(import_query_str) = lang_info.import_query {
65        Some(
66            Query::new(&lang_info.language, import_query_str).map_err(|e| {
67                ParserError::QueryError(format!(
68                    "Failed to compile import query for {}: {}",
69                    lang_info.name, e
70                ))
71            })?,
72        )
73    } else {
74        None
75    };
76
77    let impl_block = if let Some(impl_query_str) = lang_info.impl_query {
78        Some(
79            Query::new(&lang_info.language, impl_query_str).map_err(|e| {
80                ParserError::QueryError(format!(
81                    "Failed to compile impl query for {}: {}",
82                    lang_info.name, e
83                ))
84            })?,
85        )
86    } else {
87        None
88    };
89
90    let reference = if let Some(ref_query_str) = lang_info.reference_query {
91        Some(Query::new(&lang_info.language, ref_query_str).map_err(|e| {
92            ParserError::QueryError(format!(
93                "Failed to compile reference query for {}: {}",
94                lang_info.name, e
95            ))
96        })?)
97    } else {
98        None
99    };
100
101    let impl_trait = if let Some(impl_trait_query_str) = lang_info.impl_trait_query {
102        Some(
103            Query::new(&lang_info.language, impl_trait_query_str).map_err(|e| {
104                ParserError::QueryError(format!(
105                    "Failed to compile impl_trait query for {}: {}",
106                    lang_info.name, e
107                ))
108            })?,
109        )
110    } else {
111        None
112    };
113
114    Ok(CompiledQueries {
115        element,
116        call,
117        import,
118        impl_block,
119        reference,
120        impl_trait,
121    })
122}
123
124/// Initialize the query cache with compiled queries for all supported languages.
125fn init_query_cache() -> HashMap<&'static str, CompiledQueries> {
126    let supported_languages = [
127        "rust",
128        "python",
129        "typescript",
130        "tsx",
131        "go",
132        "java",
133        "fortran",
134    ];
135    let mut cache = HashMap::new();
136
137    for lang_name in &supported_languages {
138        if let Some(lang_info) = get_language_info(lang_name) {
139            match build_compiled_queries(&lang_info) {
140                Ok(compiled) => {
141                    cache.insert(*lang_name, compiled);
142                }
143                Err(e) => {
144                    tracing::error!(
145                        "Failed to compile queries for language {}: {}",
146                        lang_name,
147                        e
148                    );
149                }
150            }
151        }
152    }
153
154    cache
155}
156
157/// Lazily initialized cache of compiled queries per language.
158static QUERY_CACHE: LazyLock<HashMap<&'static str, CompiledQueries>> =
159    LazyLock::new(init_query_cache);
160
161/// Get compiled queries for a language from the cache.
162fn get_compiled_queries(language: &str) -> Result<&'static CompiledQueries, ParserError> {
163    QUERY_CACHE
164        .get(language)
165        .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))
166}
167
168thread_local! {
169    static PARSER: RefCell<Parser> = RefCell::new(Parser::new());
170}
171
172/// Canonical API for extracting element counts from source code.
173pub struct ElementExtractor;
174
175impl ElementExtractor {
176    /// Extract function and class counts from source code.
177    ///
178    /// # Errors
179    ///
180    /// Returns `ParserError::UnsupportedLanguage` if the language is not recognized.
181    /// Returns `ParserError::ParseError` if the source code cannot be parsed.
182    /// Returns `ParserError::QueryError` if the tree-sitter query fails.
183    #[instrument(skip_all, fields(language))]
184    pub fn extract_with_depth(source: &str, language: &str) -> Result<(usize, usize), ParserError> {
185        let lang_info = get_language_info(language)
186            .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))?;
187
188        let tree = PARSER.with(|p| {
189            let mut parser = p.borrow_mut();
190            parser
191                .set_language(&lang_info.language)
192                .map_err(|e| ParserError::ParseError(format!("Failed to set language: {}", e)))?;
193            parser
194                .parse(source, None)
195                .ok_or_else(|| ParserError::ParseError("Failed to parse".to_string()))
196        })?;
197
198        let compiled = get_compiled_queries(language)?;
199
200        let mut cursor = QueryCursor::new();
201        let mut function_count = 0;
202        let mut class_count = 0;
203
204        let mut matches = cursor.matches(&compiled.element, tree.root_node(), source.as_bytes());
205        while let Some(mat) = matches.next() {
206            for capture in mat.captures {
207                let capture_name = compiled.element.capture_names()[capture.index as usize];
208                match capture_name {
209                    "function" => function_count += 1,
210                    "class" => class_count += 1,
211                    _ => {}
212                }
213            }
214        }
215
216        tracing::debug!(language = %language, functions = function_count, classes = class_count, "parse complete");
217
218        Ok((function_count, class_count))
219    }
220}
221
222/// Recursively extract `ImportInfo` entries from a use-clause node, respecting all Rust
223/// use-declaration forms (`scoped_identifier`, `scoped_use_list`, `use_list`,
224/// `use_as_clause`, `use_wildcard`, bare `identifier`).
225fn extract_imports_from_node(
226    node: &Node,
227    source: &str,
228    prefix: &str,
229    line: usize,
230    imports: &mut Vec<ImportInfo>,
231) {
232    match node.kind() {
233        // Simple identifier: `use foo;` or an item inside `{foo, bar}`
234        "identifier" | "self" | "super" | "crate" => {
235            let name = source[node.start_byte()..node.end_byte()].to_string();
236            imports.push(ImportInfo {
237                module: prefix.to_string(),
238                items: vec![name],
239                line,
240            });
241        }
242        // Qualified path: `std::collections::HashMap`
243        "scoped_identifier" => {
244            let item = node
245                .child_by_field_name("name")
246                .map(|n| source[n.start_byte()..n.end_byte()].to_string())
247                .unwrap_or_default();
248            let module = node
249                .child_by_field_name("path")
250                .map(|p| {
251                    let path_text = source[p.start_byte()..p.end_byte()].to_string();
252                    if prefix.is_empty() {
253                        path_text
254                    } else {
255                        format!("{}::{}", prefix, path_text)
256                    }
257                })
258                .unwrap_or_else(|| prefix.to_string());
259            if !item.is_empty() {
260                imports.push(ImportInfo {
261                    module,
262                    items: vec![item],
263                    line,
264                });
265            }
266        }
267        // `std::{io, fs}` — path prefix followed by a brace list
268        "scoped_use_list" => {
269            let new_prefix = node
270                .child_by_field_name("path")
271                .map(|p| {
272                    let path_text = source[p.start_byte()..p.end_byte()].to_string();
273                    if prefix.is_empty() {
274                        path_text
275                    } else {
276                        format!("{}::{}", prefix, path_text)
277                    }
278                })
279                .unwrap_or_else(|| prefix.to_string());
280            if let Some(list) = node.child_by_field_name("list") {
281                extract_imports_from_node(&list, source, &new_prefix, line, imports);
282            }
283        }
284        // `{HashMap, HashSet}` — brace-enclosed list of items
285        "use_list" => {
286            let mut cursor = node.walk();
287            for child in node.children(&mut cursor) {
288                match child.kind() {
289                    "{" | "}" | "," => {}
290                    _ => extract_imports_from_node(&child, source, prefix, line, imports),
291                }
292            }
293        }
294        // `std::io::*` — glob import
295        "use_wildcard" => {
296            let text = source[node.start_byte()..node.end_byte()].to_string();
297            let module = if let Some(stripped) = text.strip_suffix("::*") {
298                if prefix.is_empty() {
299                    stripped.to_string()
300                } else {
301                    format!("{}::{}", prefix, stripped)
302                }
303            } else {
304                prefix.to_string()
305            };
306            imports.push(ImportInfo {
307                module,
308                items: vec!["*".to_string()],
309                line,
310            });
311        }
312        // `io as stdio` or `std::io as stdio`
313        "use_as_clause" => {
314            let alias = node
315                .child_by_field_name("alias")
316                .map(|n| source[n.start_byte()..n.end_byte()].to_string())
317                .unwrap_or_default();
318            let module = if let Some(path_node) = node.child_by_field_name("path") {
319                match path_node.kind() {
320                    "scoped_identifier" => path_node
321                        .child_by_field_name("path")
322                        .map(|p| {
323                            let p_text = source[p.start_byte()..p.end_byte()].to_string();
324                            if prefix.is_empty() {
325                                p_text
326                            } else {
327                                format!("{}::{}", prefix, p_text)
328                            }
329                        })
330                        .unwrap_or_else(|| prefix.to_string()),
331                    _ => prefix.to_string(),
332                }
333            } else {
334                prefix.to_string()
335            };
336            if !alias.is_empty() {
337                imports.push(ImportInfo {
338                    module,
339                    items: vec![alias],
340                    line,
341                });
342            }
343        }
344        // Python import_from_statement: `from module import name` or `from . import *`
345        "import_from_statement" => {
346            extract_python_import_from(node, source, line, imports);
347        }
348        // Fallback for non-Rust import nodes: capture full text as module
349        _ => {
350            let text = source[node.start_byte()..node.end_byte()]
351                .trim()
352                .to_string();
353            if !text.is_empty() {
354                imports.push(ImportInfo {
355                    module: text,
356                    items: vec![],
357                    line,
358                });
359            }
360        }
361    }
362}
363
364/// Extract an item name from a dotted_name or aliased_import child node.
365fn extract_import_item_name(child: &Node, source: &str) -> Option<String> {
366    match child.kind() {
367        "dotted_name" => {
368            let name = source[child.start_byte()..child.end_byte()]
369                .trim()
370                .to_string();
371            if name.is_empty() { None } else { Some(name) }
372        }
373        "aliased_import" => child.child_by_field_name("name").and_then(|n| {
374            let name = source[n.start_byte()..n.end_byte()].trim().to_string();
375            if name.is_empty() { None } else { Some(name) }
376        }),
377        _ => None,
378    }
379}
380
381/// Collect wildcard/named imports from an import_list node or from direct named children.
382fn collect_import_items(
383    node: &Node,
384    source: &str,
385    is_wildcard: &mut bool,
386    items: &mut Vec<String>,
387) {
388    // Prefer import_list child (wraps `from x import a, b`)
389    if let Some(import_list) = node.child_by_field_name("import_list") {
390        let mut cursor = import_list.walk();
391        for child in import_list.named_children(&mut cursor) {
392            if child.kind() == "wildcard_import" {
393                *is_wildcard = true;
394            } else if let Some(name) = extract_import_item_name(&child, source) {
395                items.push(name);
396            }
397        }
398        return;
399    }
400    // No import_list: single-name or wildcard as direct child (skip first named child = module_name)
401    let mut cursor = node.walk();
402    let mut first = true;
403    for child in node.named_children(&mut cursor) {
404        if first {
405            first = false;
406            continue;
407        }
408        if child.kind() == "wildcard_import" {
409            *is_wildcard = true;
410        } else if let Some(name) = extract_import_item_name(&child, source) {
411            items.push(name);
412        }
413    }
414}
415
416/// Handle Python `import_from_statement` node.
417fn extract_python_import_from(
418    node: &Node,
419    source: &str,
420    line: usize,
421    imports: &mut Vec<ImportInfo>,
422) {
423    let module = if let Some(m) = node.child_by_field_name("module_name") {
424        source[m.start_byte()..m.end_byte()].trim().to_string()
425    } else if let Some(r) = node.child_by_field_name("relative_import") {
426        source[r.start_byte()..r.end_byte()].trim().to_string()
427    } else {
428        String::new()
429    };
430
431    let mut is_wildcard = false;
432    let mut items = Vec::new();
433    collect_import_items(node, source, &mut is_wildcard, &mut items);
434
435    if !module.is_empty() {
436        imports.push(ImportInfo {
437            module,
438            items: if is_wildcard {
439                vec!["*".to_string()]
440            } else {
441                items
442            },
443            line,
444        });
445    }
446}
447
448pub struct SemanticExtractor;
449
450impl SemanticExtractor {
451    /// Extract semantic information from source code.
452    ///
453    /// # Errors
454    ///
455    /// Returns `ParserError::UnsupportedLanguage` if the language is not recognized.
456    /// Returns `ParserError::ParseError` if the source code cannot be parsed.
457    /// Returns `ParserError::QueryError` if the tree-sitter query fails.
458    #[instrument(skip_all, fields(language))]
459    pub fn extract(
460        source: &str,
461        language: &str,
462        ast_recursion_limit: Option<usize>,
463    ) -> Result<SemanticAnalysis, ParserError> {
464        let lang_info = get_language_info(language)
465            .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))?;
466
467        let tree = PARSER.with(|p| {
468            let mut parser = p.borrow_mut();
469            parser
470                .set_language(&lang_info.language)
471                .map_err(|e| ParserError::ParseError(format!("Failed to set language: {}", e)))?;
472            parser
473                .parse(source, None)
474                .ok_or_else(|| ParserError::ParseError("Failed to parse".to_string()))
475        })?;
476
477        // 0 is not a useful depth (visits root node only, returning zero results).
478        // Treat 0 as None (unlimited). See #339.
479        let max_depth: Option<u32> = ast_recursion_limit
480            .filter(|&limit| limit > 0)
481            .map(|limit| {
482                u32::try_from(limit).map_err(|_| {
483                    ParserError::ParseError(format!(
484                        "ast_recursion_limit {} exceeds maximum supported value {}",
485                        limit,
486                        u32::MAX
487                    ))
488                })
489            })
490            .transpose()?;
491
492        let compiled = get_compiled_queries(language)?;
493        let root = tree.root_node();
494
495        let mut functions = Vec::new();
496        let mut classes = Vec::new();
497        let mut imports = Vec::new();
498        let mut references = Vec::new();
499        let mut call_frequency = HashMap::new();
500        let mut calls = Vec::new();
501
502        Self::extract_elements(
503            source,
504            compiled,
505            root,
506            max_depth,
507            &lang_info,
508            &mut functions,
509            &mut classes,
510        );
511        Self::extract_calls(
512            source,
513            compiled,
514            root,
515            max_depth,
516            &mut calls,
517            &mut call_frequency,
518        );
519        Self::extract_imports(source, compiled, root, max_depth, &mut imports);
520        Self::extract_impl_methods(source, compiled, root, max_depth, &mut classes);
521        Self::extract_references(source, compiled, root, max_depth, &mut references);
522
523        tracing::debug!(language = %language, functions = functions.len(), classes = classes.len(), imports = imports.len(), references = references.len(), calls = calls.len(), "extraction complete");
524
525        Ok(SemanticAnalysis {
526            functions,
527            classes,
528            imports,
529            references,
530            call_frequency,
531            calls,
532            impl_traits: vec![],
533        })
534    }
535
536    fn extract_elements(
537        source: &str,
538        compiled: &CompiledQueries,
539        root: Node<'_>,
540        max_depth: Option<u32>,
541        lang_info: &crate::languages::LanguageInfo,
542        functions: &mut Vec<FunctionInfo>,
543        classes: &mut Vec<ClassInfo>,
544    ) {
545        let mut cursor = QueryCursor::new();
546        if let Some(depth) = max_depth {
547            cursor.set_max_start_depth(Some(depth));
548        }
549        let mut matches = cursor.matches(&compiled.element, root, source.as_bytes());
550        let mut seen_functions = std::collections::HashSet::new();
551
552        while let Some(mat) = matches.next() {
553            for capture in mat.captures {
554                let capture_name = compiled.element.capture_names()[capture.index as usize];
555                let node = capture.node;
556                match capture_name {
557                    "function" => {
558                        if let Some(name_node) = node.child_by_field_name("name") {
559                            let name =
560                                source[name_node.start_byte()..name_node.end_byte()].to_string();
561                            let func_key = (name.clone(), node.start_position().row);
562                            if !seen_functions.contains(&func_key) {
563                                seen_functions.insert(func_key);
564                                let params = node
565                                    .child_by_field_name("parameters")
566                                    .map(|p| source[p.start_byte()..p.end_byte()].to_string())
567                                    .unwrap_or_default();
568                                let return_type = node
569                                    .child_by_field_name("return_type")
570                                    .map(|r| source[r.start_byte()..r.end_byte()].to_string());
571                                functions.push(FunctionInfo {
572                                    name,
573                                    line: node.start_position().row + 1,
574                                    end_line: node.end_position().row + 1,
575                                    parameters: if params.is_empty() {
576                                        Vec::new()
577                                    } else {
578                                        vec![params]
579                                    },
580                                    return_type,
581                                });
582                            }
583                        }
584                    }
585                    "class" => {
586                        if let Some(name_node) = node.child_by_field_name("name") {
587                            let name =
588                                source[name_node.start_byte()..name_node.end_byte()].to_string();
589                            let inherits = if let Some(handler) = lang_info.extract_inheritance {
590                                handler(&node, source)
591                            } else {
592                                Vec::new()
593                            };
594                            classes.push(ClassInfo {
595                                name,
596                                line: node.start_position().row + 1,
597                                end_line: node.end_position().row + 1,
598                                methods: Vec::new(),
599                                fields: Vec::new(),
600                                inherits,
601                            });
602                        }
603                    }
604                    _ => {}
605                }
606            }
607        }
608    }
609
610    /// Returns the name of the enclosing function/method/subroutine for a given AST node,
611    /// by walking ancestors and matching all language-specific function container kinds.
612    fn enclosing_function_name(mut node: tree_sitter::Node<'_>, source: &str) -> Option<String> {
613        while let Some(parent) = node.parent() {
614            let name_node = match parent.kind() {
615                // Direct name field: Rust, Python, Go, Java, TypeScript/TSX
616                "function_item"
617                | "method_item"
618                | "function_definition"
619                | "function_declaration"
620                | "method_declaration"
621                | "method_definition" => parent.child_by_field_name("name"),
622                // Fortran subroutine: name is inside subroutine_statement child
623                "subroutine" => {
624                    let mut cursor = parent.walk();
625                    parent
626                        .children(&mut cursor)
627                        .find(|c| c.kind() == "subroutine_statement")
628                        .and_then(|s| s.child_by_field_name("name"))
629                }
630                // Fortran function: name is inside function_statement child
631                "function" => {
632                    let mut cursor = parent.walk();
633                    parent
634                        .children(&mut cursor)
635                        .find(|c| c.kind() == "function_statement")
636                        .and_then(|s| s.child_by_field_name("name"))
637                }
638                _ => {
639                    node = parent;
640                    continue;
641                }
642            };
643            return name_node.map(|n| source[n.start_byte()..n.end_byte()].to_string());
644        }
645        None
646    }
647
648    fn extract_calls(
649        source: &str,
650        compiled: &CompiledQueries,
651        root: Node<'_>,
652        max_depth: Option<u32>,
653        calls: &mut Vec<CallInfo>,
654        call_frequency: &mut HashMap<String, usize>,
655    ) {
656        let mut cursor = QueryCursor::new();
657        if let Some(depth) = max_depth {
658            cursor.set_max_start_depth(Some(depth));
659        }
660        let mut matches = cursor.matches(&compiled.call, root, source.as_bytes());
661
662        while let Some(mat) = matches.next() {
663            for capture in mat.captures {
664                let capture_name = compiled.call.capture_names()[capture.index as usize];
665                if capture_name != "call" {
666                    continue;
667                }
668                let node = capture.node;
669                let call_name = source[node.start_byte()..node.end_byte()].to_string();
670                *call_frequency.entry(call_name.clone()).or_insert(0) += 1;
671
672                let caller = Self::enclosing_function_name(node, source)
673                    .unwrap_or_else(|| "<module>".to_string());
674
675                let mut arg_count = None;
676                let mut arg_node = node;
677                while let Some(parent) = arg_node.parent() {
678                    if parent.kind() == "call_expression" {
679                        if let Some(args) = parent.child_by_field_name("arguments") {
680                            arg_count = Some(args.named_child_count());
681                        }
682                        break;
683                    }
684                    arg_node = parent;
685                }
686
687                calls.push(CallInfo {
688                    caller,
689                    callee: call_name,
690                    line: node.start_position().row + 1,
691                    column: node.start_position().column,
692                    arg_count,
693                });
694            }
695        }
696    }
697
698    fn extract_imports(
699        source: &str,
700        compiled: &CompiledQueries,
701        root: Node<'_>,
702        max_depth: Option<u32>,
703        imports: &mut Vec<ImportInfo>,
704    ) {
705        let Some(ref import_query) = compiled.import else {
706            return;
707        };
708        let mut cursor = QueryCursor::new();
709        if let Some(depth) = max_depth {
710            cursor.set_max_start_depth(Some(depth));
711        }
712        let mut matches = cursor.matches(import_query, root, source.as_bytes());
713
714        while let Some(mat) = matches.next() {
715            for capture in mat.captures {
716                let capture_name = import_query.capture_names()[capture.index as usize];
717                if capture_name == "import_path" {
718                    let node = capture.node;
719                    let line = node.start_position().row + 1;
720                    extract_imports_from_node(&node, source, "", line, imports);
721                }
722            }
723        }
724    }
725
726    fn extract_impl_methods(
727        source: &str,
728        compiled: &CompiledQueries,
729        root: Node<'_>,
730        max_depth: Option<u32>,
731        classes: &mut [ClassInfo],
732    ) {
733        let Some(ref impl_query) = compiled.impl_block else {
734            return;
735        };
736        let mut cursor = QueryCursor::new();
737        if let Some(depth) = max_depth {
738            cursor.set_max_start_depth(Some(depth));
739        }
740        let mut matches = cursor.matches(impl_query, root, source.as_bytes());
741
742        while let Some(mat) = matches.next() {
743            let mut impl_type_name = String::new();
744            let mut method_name = String::new();
745            let mut method_line = 0usize;
746            let mut method_end_line = 0usize;
747            let mut method_params = String::new();
748            let mut method_return_type: Option<String> = None;
749
750            for capture in mat.captures {
751                let capture_name = impl_query.capture_names()[capture.index as usize];
752                let node = capture.node;
753                match capture_name {
754                    "impl_type" => {
755                        impl_type_name = source[node.start_byte()..node.end_byte()].to_string();
756                    }
757                    "method_name" => {
758                        method_name = source[node.start_byte()..node.end_byte()].to_string();
759                    }
760                    "method_params" => {
761                        method_params = source[node.start_byte()..node.end_byte()].to_string();
762                    }
763                    "method" => {
764                        method_line = node.start_position().row + 1;
765                        method_end_line = node.end_position().row + 1;
766                        method_return_type = node
767                            .child_by_field_name("return_type")
768                            .map(|r| source[r.start_byte()..r.end_byte()].to_string());
769                    }
770                    _ => {}
771                }
772            }
773
774            if !impl_type_name.is_empty() && !method_name.is_empty() {
775                let func = FunctionInfo {
776                    name: method_name,
777                    line: method_line,
778                    end_line: method_end_line,
779                    parameters: if method_params.is_empty() {
780                        Vec::new()
781                    } else {
782                        vec![method_params]
783                    },
784                    return_type: method_return_type,
785                };
786                if let Some(class) = classes.iter_mut().find(|c| c.name == impl_type_name) {
787                    class.methods.push(func);
788                }
789            }
790        }
791    }
792
793    fn extract_references(
794        source: &str,
795        compiled: &CompiledQueries,
796        root: Node<'_>,
797        max_depth: Option<u32>,
798        references: &mut Vec<ReferenceInfo>,
799    ) {
800        let Some(ref ref_query) = compiled.reference else {
801            return;
802        };
803        let mut cursor = QueryCursor::new();
804        if let Some(depth) = max_depth {
805            cursor.set_max_start_depth(Some(depth));
806        }
807        let mut seen_refs = std::collections::HashSet::new();
808        let mut matches = cursor.matches(ref_query, root, source.as_bytes());
809
810        while let Some(mat) = matches.next() {
811            for capture in mat.captures {
812                let capture_name = ref_query.capture_names()[capture.index as usize];
813                if capture_name == "type_ref" {
814                    let node = capture.node;
815                    let type_ref = source[node.start_byte()..node.end_byte()].to_string();
816                    if seen_refs.insert(type_ref.clone()) {
817                        references.push(ReferenceInfo {
818                            symbol: type_ref,
819                            reference_type: ReferenceType::Usage,
820                            // location is intentionally empty here; set by the caller (analyze_file)
821                            location: String::new(),
822                            line: node.start_position().row + 1,
823                        });
824                    }
825                }
826            }
827        }
828    }
829}
830
831/// Extract `impl Trait for Type` blocks from Rust source.
832///
833/// Runs independently of `extract_references` to avoid shared deduplication state.
834/// Returns an empty vec for non-Rust source (no error; caller decides).
835pub fn extract_impl_traits(source: &str, path: &Path) -> Vec<ImplTraitInfo> {
836    let lang_info = match get_language_info("rust") {
837        Some(info) => info,
838        None => return vec![],
839    };
840
841    let compiled = match get_compiled_queries("rust") {
842        Ok(c) => c,
843        Err(_) => return vec![],
844    };
845
846    let query = match &compiled.impl_trait {
847        Some(q) => q,
848        None => return vec![],
849    };
850
851    let tree = match PARSER.with(|p| {
852        let mut parser = p.borrow_mut();
853        let _ = parser.set_language(&lang_info.language);
854        parser.parse(source, None)
855    }) {
856        Some(t) => t,
857        None => return vec![],
858    };
859
860    let root = tree.root_node();
861    let mut cursor = QueryCursor::new();
862    let mut matches = cursor.matches(query, root, source.as_bytes());
863    let mut results = Vec::new();
864
865    while let Some(mat) = matches.next() {
866        let mut trait_name = String::new();
867        let mut impl_type = String::new();
868        let mut line = 0usize;
869
870        for capture in mat.captures {
871            let capture_name = query.capture_names()[capture.index as usize];
872            let node = capture.node;
873            let text = source[node.start_byte()..node.end_byte()].to_string();
874            match capture_name {
875                "trait_name" => {
876                    trait_name = text;
877                    line = node.start_position().row + 1;
878                }
879                "impl_type" => {
880                    impl_type = text;
881                }
882                _ => {}
883            }
884        }
885
886        if !trait_name.is_empty() && !impl_type.is_empty() {
887            results.push(ImplTraitInfo {
888                trait_name,
889                impl_type,
890                path: path.to_path_buf(),
891                line,
892            });
893        }
894    }
895
896    results
897}
898
899#[cfg(test)]
900mod tests {
901    use super::*;
902
903    #[test]
904    fn test_ast_recursion_limit_zero_is_unlimited() {
905        let source = r#"fn hello() -> u32 { 42 }"#;
906        let result_none = SemanticExtractor::extract(source, "rust", None);
907        let result_zero = SemanticExtractor::extract(source, "rust", Some(0));
908        assert!(result_none.is_ok(), "extract with None failed");
909        assert!(result_zero.is_ok(), "extract with Some(0) failed");
910        let analysis_none = result_none.unwrap();
911        let analysis_zero = result_zero.unwrap();
912        assert!(
913            analysis_none.functions.len() >= 1,
914            "extract with None should find at least one function in the test source"
915        );
916        assert_eq!(
917            analysis_none.functions.len(),
918            analysis_zero.functions.len(),
919            "ast_recursion_limit=0 should behave identically to unset (unlimited)"
920        );
921    }
922}