Skip to main content

code_analyze_mcp/
parser.rs

1//! Tree-sitter-based parser for extracting semantic structure from source code.
2//!
3//! This module provides language-agnostic parsing using tree-sitter queries to extract
4//! functions, classes, imports, references, and other semantic elements from source files.
5//! Two main extractors handle different use cases:
6//!
7//! - [`ElementExtractor`]: Quick extraction of function and class counts.
8//! - [`SemanticExtractor`]: Detailed semantic analysis with calls, imports, and references.
9
10use crate::languages::get_language_info;
11use crate::types::{
12    CallInfo, ClassInfo, FunctionInfo, ImplTraitInfo, ImportInfo, ReferenceInfo, ReferenceType,
13    SemanticAnalysis,
14};
15use std::cell::RefCell;
16use std::collections::HashMap;
17use std::path::{Path, PathBuf};
18use std::sync::LazyLock;
19use thiserror::Error;
20use tracing::instrument;
21use tree_sitter::{Node, Parser, Query, QueryCursor, StreamingIterator};
22
23#[derive(Debug, Error)]
24pub enum ParserError {
25    #[error("Unsupported language: {0}")]
26    UnsupportedLanguage(String),
27    #[error("Failed to parse file: {0}")]
28    ParseError(String),
29    #[error("Invalid UTF-8 in file")]
30    InvalidUtf8,
31    #[error("Query error: {0}")]
32    QueryError(String),
33}
34
35/// Compiled tree-sitter queries for a language.
36/// Stores all query types: mandatory (element, call) and optional (import, impl, reference).
37struct CompiledQueries {
38    element: Query,
39    call: Query,
40    import: Option<Query>,
41    impl_block: Option<Query>,
42    reference: Option<Query>,
43    impl_trait: Option<Query>,
44}
45
46/// Build compiled queries for a given language.
47fn build_compiled_queries(
48    lang_info: &crate::languages::LanguageInfo,
49) -> Result<CompiledQueries, ParserError> {
50    let element = Query::new(&lang_info.language, lang_info.element_query).map_err(|e| {
51        ParserError::QueryError(format!(
52            "Failed to compile element query for {}: {}",
53            lang_info.name, e
54        ))
55    })?;
56
57    let call = Query::new(&lang_info.language, lang_info.call_query).map_err(|e| {
58        ParserError::QueryError(format!(
59            "Failed to compile call query for {}: {}",
60            lang_info.name, e
61        ))
62    })?;
63
64    let import = if let Some(import_query_str) = lang_info.import_query {
65        Some(
66            Query::new(&lang_info.language, import_query_str).map_err(|e| {
67                ParserError::QueryError(format!(
68                    "Failed to compile import query for {}: {}",
69                    lang_info.name, e
70                ))
71            })?,
72        )
73    } else {
74        None
75    };
76
77    let impl_block = if let Some(impl_query_str) = lang_info.impl_query {
78        Some(
79            Query::new(&lang_info.language, impl_query_str).map_err(|e| {
80                ParserError::QueryError(format!(
81                    "Failed to compile impl query for {}: {}",
82                    lang_info.name, e
83                ))
84            })?,
85        )
86    } else {
87        None
88    };
89
90    let reference = if let Some(ref_query_str) = lang_info.reference_query {
91        Some(Query::new(&lang_info.language, ref_query_str).map_err(|e| {
92            ParserError::QueryError(format!(
93                "Failed to compile reference query for {}: {}",
94                lang_info.name, e
95            ))
96        })?)
97    } else {
98        None
99    };
100
101    let impl_trait = if let Some(impl_trait_query_str) = lang_info.impl_trait_query {
102        Some(
103            Query::new(&lang_info.language, impl_trait_query_str).map_err(|e| {
104                ParserError::QueryError(format!(
105                    "Failed to compile impl_trait query for {}: {}",
106                    lang_info.name, e
107                ))
108            })?,
109        )
110    } else {
111        None
112    };
113
114    Ok(CompiledQueries {
115        element,
116        call,
117        import,
118        impl_block,
119        reference,
120        impl_trait,
121    })
122}
123
124/// Initialize the query cache with compiled queries for all supported languages.
125fn init_query_cache() -> HashMap<&'static str, CompiledQueries> {
126    let supported_languages = [
127        "rust",
128        "python",
129        "typescript",
130        "tsx",
131        "go",
132        "java",
133        "fortran",
134    ];
135    let mut cache = HashMap::new();
136
137    for lang_name in &supported_languages {
138        if let Some(lang_info) = get_language_info(lang_name) {
139            match build_compiled_queries(&lang_info) {
140                Ok(compiled) => {
141                    cache.insert(*lang_name, compiled);
142                }
143                Err(e) => {
144                    tracing::error!(
145                        "Failed to compile queries for language {}: {}",
146                        lang_name,
147                        e
148                    );
149                }
150            }
151        }
152    }
153
154    cache
155}
156
157/// Lazily initialized cache of compiled queries per language.
158static QUERY_CACHE: LazyLock<HashMap<&'static str, CompiledQueries>> =
159    LazyLock::new(init_query_cache);
160
161/// Get compiled queries for a language from the cache.
162fn get_compiled_queries(language: &str) -> Result<&'static CompiledQueries, ParserError> {
163    QUERY_CACHE
164        .get(language)
165        .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))
166}
167
168thread_local! {
169    static PARSER: RefCell<Parser> = RefCell::new(Parser::new());
170}
171
172/// Canonical API for extracting element counts from source code.
173pub struct ElementExtractor;
174
175impl ElementExtractor {
176    /// Extract function and class counts from source code.
177    ///
178    /// # Errors
179    ///
180    /// Returns `ParserError::UnsupportedLanguage` if the language is not recognized.
181    /// Returns `ParserError::ParseError` if the source code cannot be parsed.
182    /// Returns `ParserError::QueryError` if the tree-sitter query fails.
183    #[instrument(skip_all, fields(language))]
184    pub fn extract_with_depth(source: &str, language: &str) -> Result<(usize, usize), ParserError> {
185        let lang_info = get_language_info(language)
186            .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))?;
187
188        let tree = PARSER.with(|p| {
189            let mut parser = p.borrow_mut();
190            parser
191                .set_language(&lang_info.language)
192                .map_err(|e| ParserError::ParseError(format!("Failed to set language: {e}")))?;
193            parser
194                .parse(source, None)
195                .ok_or_else(|| ParserError::ParseError("Failed to parse".to_string()))
196        })?;
197
198        let compiled = get_compiled_queries(language)?;
199
200        let mut cursor = QueryCursor::new();
201        let mut function_count = 0;
202        let mut class_count = 0;
203
204        let mut matches = cursor.matches(&compiled.element, tree.root_node(), source.as_bytes());
205        while let Some(mat) = matches.next() {
206            for capture in mat.captures {
207                let capture_name = compiled.element.capture_names()[capture.index as usize];
208                match capture_name {
209                    "function" => function_count += 1,
210                    "class" => class_count += 1,
211                    _ => {}
212                }
213            }
214        }
215
216        tracing::debug!(language = %language, functions = function_count, classes = class_count, "parse complete");
217
218        Ok((function_count, class_count))
219    }
220}
221
222/// Recursively extract `ImportInfo` entries from a use-clause node, respecting all Rust
223/// use-declaration forms (`scoped_identifier`, `scoped_use_list`, `use_list`,
224/// `use_as_clause`, `use_wildcard`, bare `identifier`).
225#[allow(clippy::too_many_lines)] // exhaustive match over all supported Rust use-clause forms; splitting harms readability
226fn extract_imports_from_node(
227    node: &Node,
228    source: &str,
229    prefix: &str,
230    line: usize,
231    imports: &mut Vec<ImportInfo>,
232) {
233    match node.kind() {
234        // Simple identifier: `use foo;` or an item inside `{foo, bar}`
235        "identifier" | "self" | "super" | "crate" => {
236            let name = source[node.start_byte()..node.end_byte()].to_string();
237            imports.push(ImportInfo {
238                module: prefix.to_string(),
239                items: vec![name],
240                line,
241            });
242        }
243        // Qualified path: `std::collections::HashMap`
244        "scoped_identifier" => {
245            let item = node
246                .child_by_field_name("name")
247                .map(|n| source[n.start_byte()..n.end_byte()].to_string())
248                .unwrap_or_default();
249            let module = node.child_by_field_name("path").map_or_else(
250                || prefix.to_string(),
251                |p| {
252                    let path_text = source[p.start_byte()..p.end_byte()].to_string();
253                    if prefix.is_empty() {
254                        path_text
255                    } else {
256                        format!("{prefix}::{path_text}")
257                    }
258                },
259            );
260            if !item.is_empty() {
261                imports.push(ImportInfo {
262                    module,
263                    items: vec![item],
264                    line,
265                });
266            }
267        }
268        // `std::{io, fs}` — path prefix followed by a brace list
269        "scoped_use_list" => {
270            let new_prefix = node.child_by_field_name("path").map_or_else(
271                || prefix.to_string(),
272                |p| {
273                    let path_text = source[p.start_byte()..p.end_byte()].to_string();
274                    if prefix.is_empty() {
275                        path_text
276                    } else {
277                        format!("{prefix}::{path_text}")
278                    }
279                },
280            );
281            if let Some(list) = node.child_by_field_name("list") {
282                extract_imports_from_node(&list, source, &new_prefix, line, imports);
283            }
284        }
285        // `{HashMap, HashSet}` — brace-enclosed list of items
286        "use_list" => {
287            let mut cursor = node.walk();
288            for child in node.children(&mut cursor) {
289                match child.kind() {
290                    "{" | "}" | "," => {}
291                    _ => extract_imports_from_node(&child, source, prefix, line, imports),
292                }
293            }
294        }
295        // `std::io::*` — glob import
296        "use_wildcard" => {
297            let text = source[node.start_byte()..node.end_byte()].to_string();
298            let module = if let Some(stripped) = text.strip_suffix("::*") {
299                if prefix.is_empty() {
300                    stripped.to_string()
301                } else {
302                    format!("{prefix}::{stripped}")
303                }
304            } else {
305                prefix.to_string()
306            };
307            imports.push(ImportInfo {
308                module,
309                items: vec!["*".to_string()],
310                line,
311            });
312        }
313        // `io as stdio` or `std::io as stdio`
314        "use_as_clause" => {
315            let alias = node
316                .child_by_field_name("alias")
317                .map(|n| source[n.start_byte()..n.end_byte()].to_string())
318                .unwrap_or_default();
319            let module = if let Some(path_node) = node.child_by_field_name("path") {
320                match path_node.kind() {
321                    "scoped_identifier" => path_node.child_by_field_name("path").map_or_else(
322                        || prefix.to_string(),
323                        |p| {
324                            let p_text = source[p.start_byte()..p.end_byte()].to_string();
325                            if prefix.is_empty() {
326                                p_text
327                            } else {
328                                format!("{prefix}::{p_text}")
329                            }
330                        },
331                    ),
332                    _ => prefix.to_string(),
333                }
334            } else {
335                prefix.to_string()
336            };
337            if !alias.is_empty() {
338                imports.push(ImportInfo {
339                    module,
340                    items: vec![alias],
341                    line,
342                });
343            }
344        }
345        // Python import_from_statement: `from module import name` or `from . import *`
346        "import_from_statement" => {
347            extract_python_import_from(node, source, line, imports);
348        }
349        // Fallback for non-Rust import nodes: capture full text as module
350        _ => {
351            let text = source[node.start_byte()..node.end_byte()]
352                .trim()
353                .to_string();
354            if !text.is_empty() {
355                imports.push(ImportInfo {
356                    module: text,
357                    items: vec![],
358                    line,
359                });
360            }
361        }
362    }
363}
364
365/// Extract an item name from a `dotted_name` or `aliased_import` child node.
366fn extract_import_item_name(child: &Node, source: &str) -> Option<String> {
367    match child.kind() {
368        "dotted_name" => {
369            let name = source[child.start_byte()..child.end_byte()]
370                .trim()
371                .to_string();
372            if name.is_empty() { None } else { Some(name) }
373        }
374        "aliased_import" => child.child_by_field_name("name").and_then(|n| {
375            let name = source[n.start_byte()..n.end_byte()].trim().to_string();
376            if name.is_empty() { None } else { Some(name) }
377        }),
378        _ => None,
379    }
380}
381
382/// Collect wildcard/named imports from an `import_list` node or from direct named children.
383fn collect_import_items(
384    node: &Node,
385    source: &str,
386    is_wildcard: &mut bool,
387    items: &mut Vec<String>,
388) {
389    // Prefer import_list child (wraps `from x import a, b`)
390    if let Some(import_list) = node.child_by_field_name("import_list") {
391        let mut cursor = import_list.walk();
392        for child in import_list.named_children(&mut cursor) {
393            if child.kind() == "wildcard_import" {
394                *is_wildcard = true;
395            } else if let Some(name) = extract_import_item_name(&child, source) {
396                items.push(name);
397            }
398        }
399        return;
400    }
401    // No import_list: single-name or wildcard as direct child (skip first named child = module_name)
402    let mut cursor = node.walk();
403    let mut first = true;
404    for child in node.named_children(&mut cursor) {
405        if first {
406            first = false;
407            continue;
408        }
409        if child.kind() == "wildcard_import" {
410            *is_wildcard = true;
411        } else if let Some(name) = extract_import_item_name(&child, source) {
412            items.push(name);
413        }
414    }
415}
416
417/// Handle Python `import_from_statement` node.
418fn extract_python_import_from(
419    node: &Node,
420    source: &str,
421    line: usize,
422    imports: &mut Vec<ImportInfo>,
423) {
424    let module = if let Some(m) = node.child_by_field_name("module_name") {
425        source[m.start_byte()..m.end_byte()].trim().to_string()
426    } else if let Some(r) = node.child_by_field_name("relative_import") {
427        source[r.start_byte()..r.end_byte()].trim().to_string()
428    } else {
429        String::new()
430    };
431
432    let mut is_wildcard = false;
433    let mut items = Vec::new();
434    collect_import_items(node, source, &mut is_wildcard, &mut items);
435
436    if !module.is_empty() {
437        imports.push(ImportInfo {
438            module,
439            items: if is_wildcard {
440                vec!["*".to_string()]
441            } else {
442                items
443            },
444            line,
445        });
446    }
447}
448
449pub struct SemanticExtractor;
450
451impl SemanticExtractor {
452    /// Extract semantic information from source code.
453    ///
454    /// # Errors
455    ///
456    /// Returns `ParserError::UnsupportedLanguage` if the language is not recognized.
457    /// Returns `ParserError::ParseError` if the source code cannot be parsed.
458    /// Returns `ParserError::QueryError` if the tree-sitter query fails.
459    #[instrument(skip_all, fields(language))]
460    pub fn extract(
461        source: &str,
462        language: &str,
463        ast_recursion_limit: Option<usize>,
464    ) -> Result<SemanticAnalysis, ParserError> {
465        let lang_info = get_language_info(language)
466            .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))?;
467
468        let tree = PARSER.with(|p| {
469            let mut parser = p.borrow_mut();
470            parser
471                .set_language(&lang_info.language)
472                .map_err(|e| ParserError::ParseError(format!("Failed to set language: {e}")))?;
473            parser
474                .parse(source, None)
475                .ok_or_else(|| ParserError::ParseError("Failed to parse".to_string()))
476        })?;
477
478        // 0 is not a useful depth (visits root node only, returning zero results).
479        // Treat 0 as None (unlimited). See #339.
480        let max_depth: Option<u32> = ast_recursion_limit
481            .filter(|&limit| limit > 0)
482            .map(|limit| {
483                u32::try_from(limit).map_err(|_| {
484                    ParserError::ParseError(format!(
485                        "ast_recursion_limit {} exceeds maximum supported value {}",
486                        limit,
487                        u32::MAX
488                    ))
489                })
490            })
491            .transpose()?;
492
493        let compiled = get_compiled_queries(language)?;
494        let root = tree.root_node();
495
496        let mut functions = Vec::new();
497        let mut classes = Vec::new();
498        let mut imports = Vec::new();
499        let mut references = Vec::new();
500        let mut call_frequency = HashMap::new();
501        let mut calls = Vec::new();
502
503        Self::extract_elements(
504            source,
505            compiled,
506            root,
507            max_depth,
508            &lang_info,
509            &mut functions,
510            &mut classes,
511        );
512        Self::extract_calls(
513            source,
514            compiled,
515            root,
516            max_depth,
517            &mut calls,
518            &mut call_frequency,
519        );
520        Self::extract_imports(source, compiled, root, max_depth, &mut imports);
521        Self::extract_impl_methods(source, compiled, root, max_depth, &mut classes);
522        Self::extract_references(source, compiled, root, max_depth, &mut references);
523
524        // Extract impl-trait blocks for Rust files (empty for other languages)
525        let impl_traits = if language == "rust" {
526            Self::extract_impl_traits_from_tree(source, compiled, root)
527        } else {
528            vec![]
529        };
530
531        tracing::debug!(language = %language, functions = functions.len(), classes = classes.len(), imports = imports.len(), references = references.len(), calls = calls.len(), impl_traits = impl_traits.len(), "extraction complete");
532
533        Ok(SemanticAnalysis {
534            functions,
535            classes,
536            imports,
537            references,
538            call_frequency,
539            calls,
540            impl_traits,
541        })
542    }
543
544    fn extract_elements(
545        source: &str,
546        compiled: &CompiledQueries,
547        root: Node<'_>,
548        max_depth: Option<u32>,
549        lang_info: &crate::languages::LanguageInfo,
550        functions: &mut Vec<FunctionInfo>,
551        classes: &mut Vec<ClassInfo>,
552    ) {
553        let mut cursor = QueryCursor::new();
554        if let Some(depth) = max_depth {
555            cursor.set_max_start_depth(Some(depth));
556        }
557        let mut matches = cursor.matches(&compiled.element, root, source.as_bytes());
558        let mut seen_functions = std::collections::HashSet::new();
559
560        while let Some(mat) = matches.next() {
561            for capture in mat.captures {
562                let capture_name = compiled.element.capture_names()[capture.index as usize];
563                let node = capture.node;
564                match capture_name {
565                    "function" => {
566                        if let Some(name_node) = node.child_by_field_name("name") {
567                            let name =
568                                source[name_node.start_byte()..name_node.end_byte()].to_string();
569                            let func_key = (name.clone(), node.start_position().row);
570                            if !seen_functions.contains(&func_key) {
571                                seen_functions.insert(func_key);
572                                let params = node
573                                    .child_by_field_name("parameters")
574                                    .map(|p| source[p.start_byte()..p.end_byte()].to_string())
575                                    .unwrap_or_default();
576                                let return_type = node
577                                    .child_by_field_name("return_type")
578                                    .map(|r| source[r.start_byte()..r.end_byte()].to_string());
579                                functions.push(FunctionInfo {
580                                    name,
581                                    line: node.start_position().row + 1,
582                                    end_line: node.end_position().row + 1,
583                                    parameters: if params.is_empty() {
584                                        Vec::new()
585                                    } else {
586                                        vec![params]
587                                    },
588                                    return_type,
589                                });
590                            }
591                        }
592                    }
593                    "class" => {
594                        if let Some(name_node) = node.child_by_field_name("name") {
595                            let name =
596                                source[name_node.start_byte()..name_node.end_byte()].to_string();
597                            let inherits = if let Some(handler) = lang_info.extract_inheritance {
598                                handler(&node, source)
599                            } else {
600                                Vec::new()
601                            };
602                            classes.push(ClassInfo {
603                                name,
604                                line: node.start_position().row + 1,
605                                end_line: node.end_position().row + 1,
606                                methods: Vec::new(),
607                                fields: Vec::new(),
608                                inherits,
609                            });
610                        }
611                    }
612                    _ => {}
613                }
614            }
615        }
616    }
617
618    /// Returns the name of the enclosing function/method/subroutine for a given AST node,
619    /// by walking ancestors and matching all language-specific function container kinds.
620    fn enclosing_function_name(mut node: tree_sitter::Node<'_>, source: &str) -> Option<String> {
621        let mut depth = 0u32;
622        while let Some(parent) = node.parent() {
623            depth += 1;
624            // Cap at 64 hops: real function nesting rarely exceeds ~10 levels; 64 is a generous
625            // upper bound that guards against pathological/malformed ASTs without false negatives
626            // on legitimate code. Returns None (treated as <module>) when the cap is hit.
627            if depth > 64 {
628                return None;
629            }
630            let name_node = match parent.kind() {
631                // Direct name field: Rust, Python, Go, Java, TypeScript/TSX
632                "function_item"
633                | "method_item"
634                | "function_definition"
635                | "function_declaration"
636                | "method_declaration"
637                | "method_definition" => parent.child_by_field_name("name"),
638                // Fortran subroutine: name is inside subroutine_statement child
639                "subroutine" => {
640                    let mut cursor = parent.walk();
641                    parent
642                        .children(&mut cursor)
643                        .find(|c| c.kind() == "subroutine_statement")
644                        .and_then(|s| s.child_by_field_name("name"))
645                }
646                // Fortran function: name is inside function_statement child
647                "function" => {
648                    let mut cursor = parent.walk();
649                    parent
650                        .children(&mut cursor)
651                        .find(|c| c.kind() == "function_statement")
652                        .and_then(|s| s.child_by_field_name("name"))
653                }
654                _ => {
655                    node = parent;
656                    continue;
657                }
658            };
659            return name_node.map(|n| source[n.start_byte()..n.end_byte()].to_string());
660        }
661        // The loop exits here only when no parent was found (i.e., we reached the tree root
662        // without finding a function container). If the depth cap fired, we returned None early
663        // above. Nothing to assert here.
664        None
665    }
666
667    fn extract_calls(
668        source: &str,
669        compiled: &CompiledQueries,
670        root: Node<'_>,
671        max_depth: Option<u32>,
672        calls: &mut Vec<CallInfo>,
673        call_frequency: &mut HashMap<String, usize>,
674    ) {
675        let mut cursor = QueryCursor::new();
676        if let Some(depth) = max_depth {
677            cursor.set_max_start_depth(Some(depth));
678        }
679        let mut matches = cursor.matches(&compiled.call, root, source.as_bytes());
680
681        while let Some(mat) = matches.next() {
682            for capture in mat.captures {
683                let capture_name = compiled.call.capture_names()[capture.index as usize];
684                if capture_name != "call" {
685                    continue;
686                }
687                let node = capture.node;
688                let call_name = source[node.start_byte()..node.end_byte()].to_string();
689                *call_frequency.entry(call_name.clone()).or_insert(0) += 1;
690
691                let caller = Self::enclosing_function_name(node, source)
692                    .unwrap_or_else(|| "<module>".to_string());
693
694                let mut arg_count = None;
695                let mut arg_node = node;
696                let mut hop = 0u32;
697                let mut cap_hit = false;
698                while let Some(parent) = arg_node.parent() {
699                    hop += 1;
700                    // Bounded parent traversal: cap at 16 hops to guard against pathological
701                    // walks on malformed/degenerate trees. Real call-expression nesting is
702                    // shallow (typically 1-3 levels). When the cap is hit we stop searching and
703                    // leave arg_count as None; the caller is still recorded, just without
704                    // argument-count information.
705                    if hop > 16 {
706                        cap_hit = true;
707                        break;
708                    }
709                    if parent.kind() == "call_expression" {
710                        if let Some(args) = parent.child_by_field_name("arguments") {
711                            arg_count = Some(args.named_child_count());
712                        }
713                        break;
714                    }
715                    arg_node = parent;
716                }
717                debug_assert!(
718                    !cap_hit,
719                    "extract_calls: parent traversal cap reached (hop > 16)"
720                );
721
722                calls.push(CallInfo {
723                    caller,
724                    callee: call_name,
725                    line: node.start_position().row + 1,
726                    column: node.start_position().column,
727                    arg_count,
728                });
729            }
730        }
731    }
732
733    fn extract_imports(
734        source: &str,
735        compiled: &CompiledQueries,
736        root: Node<'_>,
737        max_depth: Option<u32>,
738        imports: &mut Vec<ImportInfo>,
739    ) {
740        let Some(ref import_query) = compiled.import else {
741            return;
742        };
743        let mut cursor = QueryCursor::new();
744        if let Some(depth) = max_depth {
745            cursor.set_max_start_depth(Some(depth));
746        }
747        let mut matches = cursor.matches(import_query, root, source.as_bytes());
748
749        while let Some(mat) = matches.next() {
750            for capture in mat.captures {
751                let capture_name = import_query.capture_names()[capture.index as usize];
752                if capture_name == "import_path" {
753                    let node = capture.node;
754                    let line = node.start_position().row + 1;
755                    extract_imports_from_node(&node, source, "", line, imports);
756                }
757            }
758        }
759    }
760
761    fn extract_impl_methods(
762        source: &str,
763        compiled: &CompiledQueries,
764        root: Node<'_>,
765        max_depth: Option<u32>,
766        classes: &mut [ClassInfo],
767    ) {
768        let Some(ref impl_query) = compiled.impl_block else {
769            return;
770        };
771        let mut cursor = QueryCursor::new();
772        if let Some(depth) = max_depth {
773            cursor.set_max_start_depth(Some(depth));
774        }
775        let mut matches = cursor.matches(impl_query, root, source.as_bytes());
776
777        while let Some(mat) = matches.next() {
778            let mut impl_type_name = String::new();
779            let mut method_name = String::new();
780            let mut method_line = 0usize;
781            let mut method_end_line = 0usize;
782            let mut method_params = String::new();
783            let mut method_return_type: Option<String> = None;
784
785            for capture in mat.captures {
786                let capture_name = impl_query.capture_names()[capture.index as usize];
787                let node = capture.node;
788                match capture_name {
789                    "impl_type" => {
790                        impl_type_name = source[node.start_byte()..node.end_byte()].to_string();
791                    }
792                    "method_name" => {
793                        method_name = source[node.start_byte()..node.end_byte()].to_string();
794                    }
795                    "method_params" => {
796                        method_params = source[node.start_byte()..node.end_byte()].to_string();
797                    }
798                    "method" => {
799                        method_line = node.start_position().row + 1;
800                        method_end_line = node.end_position().row + 1;
801                        method_return_type = node
802                            .child_by_field_name("return_type")
803                            .map(|r| source[r.start_byte()..r.end_byte()].to_string());
804                    }
805                    _ => {}
806                }
807            }
808
809            if !impl_type_name.is_empty() && !method_name.is_empty() {
810                let func = FunctionInfo {
811                    name: method_name,
812                    line: method_line,
813                    end_line: method_end_line,
814                    parameters: if method_params.is_empty() {
815                        Vec::new()
816                    } else {
817                        vec![method_params]
818                    },
819                    return_type: method_return_type,
820                };
821                if let Some(class) = classes.iter_mut().find(|c| c.name == impl_type_name) {
822                    class.methods.push(func);
823                }
824            }
825        }
826    }
827
828    fn extract_references(
829        source: &str,
830        compiled: &CompiledQueries,
831        root: Node<'_>,
832        max_depth: Option<u32>,
833        references: &mut Vec<ReferenceInfo>,
834    ) {
835        let Some(ref ref_query) = compiled.reference else {
836            return;
837        };
838        let mut cursor = QueryCursor::new();
839        if let Some(depth) = max_depth {
840            cursor.set_max_start_depth(Some(depth));
841        }
842        let mut seen_refs = std::collections::HashSet::new();
843        let mut matches = cursor.matches(ref_query, root, source.as_bytes());
844
845        while let Some(mat) = matches.next() {
846            for capture in mat.captures {
847                let capture_name = ref_query.capture_names()[capture.index as usize];
848                if capture_name == "type_ref" {
849                    let node = capture.node;
850                    let type_ref = source[node.start_byte()..node.end_byte()].to_string();
851                    if seen_refs.insert(type_ref.clone()) {
852                        references.push(ReferenceInfo {
853                            symbol: type_ref,
854                            reference_type: ReferenceType::Usage,
855                            // location is intentionally empty here; set by the caller (analyze_file)
856                            location: String::new(),
857                            line: node.start_position().row + 1,
858                        });
859                    }
860                }
861            }
862        }
863    }
864
865    /// Extract impl-trait blocks from an already-parsed tree.
866    ///
867    /// Called during `extract()` for Rust files to avoid a second parse.
868    /// Returns an empty vec if the query is not available.
869    fn extract_impl_traits_from_tree(
870        source: &str,
871        compiled: &CompiledQueries,
872        root: Node<'_>,
873    ) -> Vec<ImplTraitInfo> {
874        let Some(query) = &compiled.impl_trait else {
875            return vec![];
876        };
877
878        let mut cursor = QueryCursor::new();
879        let mut matches = cursor.matches(query, root, source.as_bytes());
880        let mut results = Vec::new();
881
882        while let Some(mat) = matches.next() {
883            let mut trait_name = String::new();
884            let mut impl_type = String::new();
885            let mut line = 0usize;
886
887            for capture in mat.captures {
888                let capture_name = query.capture_names()[capture.index as usize];
889                let node = capture.node;
890                let text = source[node.start_byte()..node.end_byte()].to_string();
891                match capture_name {
892                    "trait_name" => {
893                        trait_name = text;
894                        line = node.start_position().row + 1;
895                    }
896                    "impl_type" => {
897                        impl_type = text;
898                    }
899                    _ => {}
900                }
901            }
902
903            if !trait_name.is_empty() && !impl_type.is_empty() {
904                results.push(ImplTraitInfo {
905                    trait_name,
906                    impl_type,
907                    path: PathBuf::new(), // Path will be set by caller
908                    line,
909                });
910            }
911        }
912
913        results
914    }
915}
916
917/// Extract `impl Trait for Type` blocks from Rust source.
918///
919/// Runs independently of `extract_references` to avoid shared deduplication state.
920/// Returns an empty vec for non-Rust source (no error; caller decides).
921#[must_use]
922pub fn extract_impl_traits(source: &str, path: &Path) -> Vec<ImplTraitInfo> {
923    let Some(lang_info) = get_language_info("rust") else {
924        return vec![];
925    };
926
927    let Ok(compiled) = get_compiled_queries("rust") else {
928        return vec![];
929    };
930
931    let Some(query) = &compiled.impl_trait else {
932        return vec![];
933    };
934
935    let Some(tree) = PARSER.with(|p| {
936        let mut parser = p.borrow_mut();
937        let _ = parser.set_language(&lang_info.language);
938        parser.parse(source, None)
939    }) else {
940        return vec![];
941    };
942
943    let root = tree.root_node();
944    let mut cursor = QueryCursor::new();
945    let mut matches = cursor.matches(query, root, source.as_bytes());
946    let mut results = Vec::new();
947
948    while let Some(mat) = matches.next() {
949        let mut trait_name = String::new();
950        let mut impl_type = String::new();
951        let mut line = 0usize;
952
953        for capture in mat.captures {
954            let capture_name = query.capture_names()[capture.index as usize];
955            let node = capture.node;
956            let text = source[node.start_byte()..node.end_byte()].to_string();
957            match capture_name {
958                "trait_name" => {
959                    trait_name = text;
960                    line = node.start_position().row + 1;
961                }
962                "impl_type" => {
963                    impl_type = text;
964                }
965                _ => {}
966            }
967        }
968
969        if !trait_name.is_empty() && !impl_type.is_empty() {
970            results.push(ImplTraitInfo {
971                trait_name,
972                impl_type,
973                path: path.to_path_buf(),
974                line,
975            });
976        }
977    }
978
979    results
980}
981
982#[cfg(test)]
983mod tests {
984    use super::*;
985
986    #[test]
987    fn test_ast_recursion_limit_zero_is_unlimited() {
988        let source = r#"fn hello() -> u32 { 42 }"#;
989        let result_none = SemanticExtractor::extract(source, "rust", None);
990        let result_zero = SemanticExtractor::extract(source, "rust", Some(0));
991        assert!(result_none.is_ok(), "extract with None failed");
992        assert!(result_zero.is_ok(), "extract with Some(0) failed");
993        let analysis_none = result_none.unwrap();
994        let analysis_zero = result_zero.unwrap();
995        assert!(
996            analysis_none.functions.len() >= 1,
997            "extract with None should find at least one function in the test source"
998        );
999        assert_eq!(
1000            analysis_none.functions.len(),
1001            analysis_zero.functions.len(),
1002            "ast_recursion_limit=0 should behave identically to unset (unlimited)"
1003        );
1004    }
1005}