Skip to main content

code_analyze_core/
parser.rs

1// SPDX-FileCopyrightText: 2026 code-analyze-mcp contributors
2// SPDX-License-Identifier: Apache-2.0
3//! Tree-sitter-based parser for extracting semantic structure from source code.
4//!
5//! This module provides language-agnostic parsing using tree-sitter queries to extract
6//! functions, classes, imports, references, and other semantic elements from source files.
7//! Two main extractors handle different use cases:
8//!
9//! - [`ElementExtractor`]: Quick extraction of function and class counts.
10//! - [`SemanticExtractor`]: Detailed semantic analysis with calls, imports, and references.
11
12use crate::languages::get_language_info;
13use crate::types::{
14    CallInfo, ClassInfo, FunctionInfo, ImplTraitInfo, ImportInfo, ReferenceInfo, ReferenceType,
15    SemanticAnalysis,
16};
17use std::cell::RefCell;
18use std::collections::HashMap;
19use std::path::{Path, PathBuf};
20use std::sync::LazyLock;
21use thiserror::Error;
22use tracing::instrument;
23use tree_sitter::{Node, Parser, Query, QueryCursor, StreamingIterator};
24
25#[derive(Debug, Error)]
26#[non_exhaustive]
27pub enum ParserError {
28    #[error("Unsupported language: {0}")]
29    UnsupportedLanguage(String),
30    #[error("Failed to parse file: {0}")]
31    ParseError(String),
32    #[error("Invalid UTF-8 in file")]
33    InvalidUtf8,
34    #[error("Query error: {0}")]
35    QueryError(String),
36}
37
38/// Compiled tree-sitter queries for a language.
39/// Stores all query types: mandatory (element, call) and optional (import, impl, reference).
40struct CompiledQueries {
41    element: Query,
42    call: Query,
43    import: Option<Query>,
44    impl_block: Option<Query>,
45    reference: Option<Query>,
46    impl_trait: Option<Query>,
47}
48
49/// Build compiled queries for a given language.
50///
51/// The `map_err` closures inside are only reachable if a hardcoded query string is
52/// invalid, which cannot happen at runtime -- exclude them from coverage instrumentation.
53#[cfg_attr(coverage_nightly, coverage(off))]
54fn build_compiled_queries(
55    lang_info: &crate::languages::LanguageInfo,
56) -> Result<CompiledQueries, ParserError> {
57    let element = Query::new(&lang_info.language, lang_info.element_query).map_err(|e| {
58        ParserError::QueryError(format!(
59            "Failed to compile element query for {}: {}",
60            lang_info.name, e
61        ))
62    })?;
63
64    let call = Query::new(&lang_info.language, lang_info.call_query).map_err(|e| {
65        ParserError::QueryError(format!(
66            "Failed to compile call query for {}: {}",
67            lang_info.name, e
68        ))
69    })?;
70
71    let import = if let Some(import_query_str) = lang_info.import_query {
72        Some(
73            Query::new(&lang_info.language, import_query_str).map_err(|e| {
74                ParserError::QueryError(format!(
75                    "Failed to compile import query for {}: {}",
76                    lang_info.name, e
77                ))
78            })?,
79        )
80    } else {
81        None
82    };
83
84    let impl_block = if let Some(impl_query_str) = lang_info.impl_query {
85        Some(
86            Query::new(&lang_info.language, impl_query_str).map_err(|e| {
87                ParserError::QueryError(format!(
88                    "Failed to compile impl query for {}: {}",
89                    lang_info.name, e
90                ))
91            })?,
92        )
93    } else {
94        None
95    };
96
97    let reference = if let Some(ref_query_str) = lang_info.reference_query {
98        Some(Query::new(&lang_info.language, ref_query_str).map_err(|e| {
99            ParserError::QueryError(format!(
100                "Failed to compile reference query for {}: {}",
101                lang_info.name, e
102            ))
103        })?)
104    } else {
105        None
106    };
107
108    let impl_trait = if let Some(impl_trait_query_str) = lang_info.impl_trait_query {
109        Some(
110            Query::new(&lang_info.language, impl_trait_query_str).map_err(|e| {
111                ParserError::QueryError(format!(
112                    "Failed to compile impl_trait query for {}: {}",
113                    lang_info.name, e
114                ))
115            })?,
116        )
117    } else {
118        None
119    };
120
121    Ok(CompiledQueries {
122        element,
123        call,
124        import,
125        impl_block,
126        reference,
127        impl_trait,
128    })
129}
130
131/// Initialize the query cache with compiled queries for all supported languages.
132///
133/// Excluded from coverage: the `Err` arm is unreachable because `build_compiled_queries`
134/// only fails on invalid hardcoded query strings.
135#[cfg_attr(coverage_nightly, coverage(off))]
136fn init_query_cache() -> HashMap<&'static str, CompiledQueries> {
137    let supported_languages = [
138        "rust",
139        "python",
140        "typescript",
141        "tsx",
142        "go",
143        "java",
144        "fortran",
145    ];
146    let mut cache = HashMap::new();
147
148    for lang_name in &supported_languages {
149        if let Some(lang_info) = get_language_info(lang_name) {
150            match build_compiled_queries(&lang_info) {
151                Ok(compiled) => {
152                    cache.insert(*lang_name, compiled);
153                }
154                Err(e) => {
155                    tracing::error!(
156                        "Failed to compile queries for language {}: {}",
157                        lang_name,
158                        e
159                    );
160                }
161            }
162        }
163    }
164
165    cache
166}
167
168/// Lazily initialized cache of compiled queries per language.
169static QUERY_CACHE: LazyLock<HashMap<&'static str, CompiledQueries>> =
170    LazyLock::new(init_query_cache);
171
172/// Get compiled queries for a language from the cache.
173fn get_compiled_queries(language: &str) -> Result<&'static CompiledQueries, ParserError> {
174    QUERY_CACHE
175        .get(language)
176        .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))
177}
178
179thread_local! {
180    static PARSER: RefCell<Parser> = RefCell::new(Parser::new());
181}
182
183/// Canonical API for extracting element counts from source code.
184pub struct ElementExtractor;
185
186impl ElementExtractor {
187    /// Extract function and class counts from source code.
188    ///
189    /// # Errors
190    ///
191    /// Returns `ParserError::UnsupportedLanguage` if the language is not recognized.
192    /// Returns `ParserError::ParseError` if the source code cannot be parsed.
193    /// Returns `ParserError::QueryError` if the tree-sitter query fails.
194    #[instrument(skip_all, fields(language))]
195    pub fn extract_with_depth(source: &str, language: &str) -> Result<(usize, usize), ParserError> {
196        let lang_info = get_language_info(language)
197            .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))?;
198
199        let tree = PARSER.with(|p| {
200            let mut parser = p.borrow_mut();
201            parser
202                .set_language(&lang_info.language)
203                .map_err(|e| ParserError::ParseError(format!("Failed to set language: {e}")))?;
204            parser
205                .parse(source, None)
206                .ok_or_else(|| ParserError::ParseError("Failed to parse".to_string()))
207        })?;
208
209        let compiled = get_compiled_queries(language)?;
210
211        let mut cursor = QueryCursor::new();
212        let mut function_count = 0;
213        let mut class_count = 0;
214
215        let mut matches = cursor.matches(&compiled.element, tree.root_node(), source.as_bytes());
216        while let Some(mat) = matches.next() {
217            for capture in mat.captures {
218                let capture_name = compiled.element.capture_names()[capture.index as usize];
219                match capture_name {
220                    "function" => function_count += 1,
221                    "class" => class_count += 1,
222                    _ => {}
223                }
224            }
225        }
226
227        tracing::debug!(language = %language, functions = function_count, classes = class_count, "parse complete");
228
229        Ok((function_count, class_count))
230    }
231}
232
233/// Recursively extract `ImportInfo` entries from a use-clause node, respecting all Rust
234/// use-declaration forms (`scoped_identifier`, `scoped_use_list`, `use_list`,
235/// `use_as_clause`, `use_wildcard`, bare `identifier`).
236#[allow(clippy::too_many_lines)] // exhaustive match over all supported Rust use-clause forms; splitting harms readability
237fn extract_imports_from_node(
238    node: &Node,
239    source: &str,
240    prefix: &str,
241    line: usize,
242    imports: &mut Vec<ImportInfo>,
243) {
244    match node.kind() {
245        // Simple identifier: `use foo;` or an item inside `{foo, bar}`
246        "identifier" | "self" | "super" | "crate" => {
247            let name = source[node.start_byte()..node.end_byte()].to_string();
248            imports.push(ImportInfo {
249                module: prefix.to_string(),
250                items: vec![name],
251                line,
252            });
253        }
254        // Qualified path: `std::collections::HashMap`
255        "scoped_identifier" => {
256            let item = node
257                .child_by_field_name("name")
258                .map(|n| source[n.start_byte()..n.end_byte()].to_string())
259                .unwrap_or_default();
260            let module = node.child_by_field_name("path").map_or_else(
261                || prefix.to_string(),
262                |p| {
263                    let path_text = source[p.start_byte()..p.end_byte()].to_string();
264                    if prefix.is_empty() {
265                        path_text
266                    } else {
267                        format!("{prefix}::{path_text}")
268                    }
269                },
270            );
271            if !item.is_empty() {
272                imports.push(ImportInfo {
273                    module,
274                    items: vec![item],
275                    line,
276                });
277            }
278        }
279        // `std::{io, fs}` — path prefix followed by a brace list
280        "scoped_use_list" => {
281            let new_prefix = node.child_by_field_name("path").map_or_else(
282                || prefix.to_string(),
283                |p| {
284                    let path_text = source[p.start_byte()..p.end_byte()].to_string();
285                    if prefix.is_empty() {
286                        path_text
287                    } else {
288                        format!("{prefix}::{path_text}")
289                    }
290                },
291            );
292            if let Some(list) = node.child_by_field_name("list") {
293                extract_imports_from_node(&list, source, &new_prefix, line, imports);
294            }
295        }
296        // `{HashMap, HashSet}` — brace-enclosed list of items
297        "use_list" => {
298            let mut cursor = node.walk();
299            for child in node.children(&mut cursor) {
300                match child.kind() {
301                    "{" | "}" | "," => {}
302                    _ => extract_imports_from_node(&child, source, prefix, line, imports),
303                }
304            }
305        }
306        // `std::io::*` — glob import
307        "use_wildcard" => {
308            let text = source[node.start_byte()..node.end_byte()].to_string();
309            let module = if let Some(stripped) = text.strip_suffix("::*") {
310                if prefix.is_empty() {
311                    stripped.to_string()
312                } else {
313                    format!("{prefix}::{stripped}")
314                }
315            } else {
316                prefix.to_string()
317            };
318            imports.push(ImportInfo {
319                module,
320                items: vec!["*".to_string()],
321                line,
322            });
323        }
324        // `io as stdio` or `std::io as stdio`
325        "use_as_clause" => {
326            let alias = node
327                .child_by_field_name("alias")
328                .map(|n| source[n.start_byte()..n.end_byte()].to_string())
329                .unwrap_or_default();
330            let module = if let Some(path_node) = node.child_by_field_name("path") {
331                match path_node.kind() {
332                    "scoped_identifier" => path_node.child_by_field_name("path").map_or_else(
333                        || prefix.to_string(),
334                        |p| {
335                            let p_text = source[p.start_byte()..p.end_byte()].to_string();
336                            if prefix.is_empty() {
337                                p_text
338                            } else {
339                                format!("{prefix}::{p_text}")
340                            }
341                        },
342                    ),
343                    _ => prefix.to_string(),
344                }
345            } else {
346                prefix.to_string()
347            };
348            if !alias.is_empty() {
349                imports.push(ImportInfo {
350                    module,
351                    items: vec![alias],
352                    line,
353                });
354            }
355        }
356        // Python import_from_statement: `from module import name` or `from . import *`
357        "import_from_statement" => {
358            extract_python_import_from(node, source, line, imports);
359        }
360        // Fallback for non-Rust import nodes: capture full text as module
361        _ => {
362            let text = source[node.start_byte()..node.end_byte()]
363                .trim()
364                .to_string();
365            if !text.is_empty() {
366                imports.push(ImportInfo {
367                    module: text,
368                    items: vec![],
369                    line,
370                });
371            }
372        }
373    }
374}
375
376/// Extract an item name from a `dotted_name` or `aliased_import` child node.
377fn extract_import_item_name(child: &Node, source: &str) -> Option<String> {
378    match child.kind() {
379        "dotted_name" => {
380            let name = source[child.start_byte()..child.end_byte()]
381                .trim()
382                .to_string();
383            if name.is_empty() { None } else { Some(name) }
384        }
385        "aliased_import" => child.child_by_field_name("name").and_then(|n| {
386            let name = source[n.start_byte()..n.end_byte()].trim().to_string();
387            if name.is_empty() { None } else { Some(name) }
388        }),
389        _ => None,
390    }
391}
392
393/// Collect wildcard/named imports from an `import_list` node or from direct named children.
394fn collect_import_items(
395    node: &Node,
396    source: &str,
397    is_wildcard: &mut bool,
398    items: &mut Vec<String>,
399) {
400    // Prefer import_list child (wraps `from x import a, b`)
401    if let Some(import_list) = node.child_by_field_name("import_list") {
402        let mut cursor = import_list.walk();
403        for child in import_list.named_children(&mut cursor) {
404            if child.kind() == "wildcard_import" {
405                *is_wildcard = true;
406            } else if let Some(name) = extract_import_item_name(&child, source) {
407                items.push(name);
408            }
409        }
410        return;
411    }
412    // No import_list: single-name or wildcard as direct child (skip first named child = module_name)
413    let mut cursor = node.walk();
414    let mut first = true;
415    for child in node.named_children(&mut cursor) {
416        if first {
417            first = false;
418            continue;
419        }
420        if child.kind() == "wildcard_import" {
421            *is_wildcard = true;
422        } else if let Some(name) = extract_import_item_name(&child, source) {
423            items.push(name);
424        }
425    }
426}
427
428/// Handle Python `import_from_statement` node.
429fn extract_python_import_from(
430    node: &Node,
431    source: &str,
432    line: usize,
433    imports: &mut Vec<ImportInfo>,
434) {
435    let module = if let Some(m) = node.child_by_field_name("module_name") {
436        source[m.start_byte()..m.end_byte()].trim().to_string()
437    } else if let Some(r) = node.child_by_field_name("relative_import") {
438        source[r.start_byte()..r.end_byte()].trim().to_string()
439    } else {
440        String::new()
441    };
442
443    let mut is_wildcard = false;
444    let mut items = Vec::new();
445    collect_import_items(node, source, &mut is_wildcard, &mut items);
446
447    if !module.is_empty() {
448        imports.push(ImportInfo {
449            module,
450            items: if is_wildcard {
451                vec!["*".to_string()]
452            } else {
453                items
454            },
455            line,
456        });
457    }
458}
459
460pub struct SemanticExtractor;
461
462impl SemanticExtractor {
463    /// Extract semantic information from source code.
464    ///
465    /// # Errors
466    ///
467    /// Returns `ParserError::UnsupportedLanguage` if the language is not recognized.
468    /// Returns `ParserError::ParseError` if the source code cannot be parsed.
469    /// Returns `ParserError::QueryError` if the tree-sitter query fails.
470    #[instrument(skip_all, fields(language))]
471    pub fn extract(
472        source: &str,
473        language: &str,
474        ast_recursion_limit: Option<usize>,
475    ) -> Result<SemanticAnalysis, ParserError> {
476        let lang_info = get_language_info(language)
477            .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))?;
478
479        let tree = PARSER.with(|p| {
480            let mut parser = p.borrow_mut();
481            parser
482                .set_language(&lang_info.language)
483                .map_err(|e| ParserError::ParseError(format!("Failed to set language: {e}")))?;
484            parser
485                .parse(source, None)
486                .ok_or_else(|| ParserError::ParseError("Failed to parse".to_string()))
487        })?;
488
489        // 0 is not a useful depth (visits root node only, returning zero results).
490        // Treat 0 as None (unlimited). See #339.
491        let max_depth: Option<u32> = ast_recursion_limit
492            .filter(|&limit| limit > 0)
493            .map(|limit| {
494                u32::try_from(limit).map_err(|_| {
495                    ParserError::ParseError(format!(
496                        "ast_recursion_limit {} exceeds maximum supported value {}",
497                        limit,
498                        u32::MAX
499                    ))
500                })
501            })
502            .transpose()?;
503
504        let compiled = get_compiled_queries(language)?;
505        let root = tree.root_node();
506
507        let mut functions = Vec::new();
508        let mut classes = Vec::new();
509        let mut imports = Vec::new();
510        let mut references = Vec::new();
511        let mut call_frequency = HashMap::new();
512        let mut calls = Vec::new();
513
514        Self::extract_elements(
515            source,
516            compiled,
517            root,
518            max_depth,
519            &lang_info,
520            &mut functions,
521            &mut classes,
522        );
523        Self::extract_calls(
524            source,
525            compiled,
526            root,
527            max_depth,
528            &mut calls,
529            &mut call_frequency,
530        );
531        Self::extract_imports(source, compiled, root, max_depth, &mut imports);
532        Self::extract_impl_methods(source, compiled, root, max_depth, &mut classes);
533        Self::extract_references(source, compiled, root, max_depth, &mut references);
534
535        // Extract impl-trait blocks for Rust files (empty for other languages)
536        let impl_traits = if language == "rust" {
537            Self::extract_impl_traits_from_tree(source, compiled, root)
538        } else {
539            vec![]
540        };
541
542        tracing::debug!(language = %language, functions = functions.len(), classes = classes.len(), imports = imports.len(), references = references.len(), calls = calls.len(), impl_traits = impl_traits.len(), "extraction complete");
543
544        Ok(SemanticAnalysis {
545            functions,
546            classes,
547            imports,
548            references,
549            call_frequency,
550            calls,
551            impl_traits,
552        })
553    }
554
555    fn extract_elements(
556        source: &str,
557        compiled: &CompiledQueries,
558        root: Node<'_>,
559        max_depth: Option<u32>,
560        lang_info: &crate::languages::LanguageInfo,
561        functions: &mut Vec<FunctionInfo>,
562        classes: &mut Vec<ClassInfo>,
563    ) {
564        let mut cursor = QueryCursor::new();
565        if let Some(depth) = max_depth {
566            cursor.set_max_start_depth(Some(depth));
567        }
568        let mut matches = cursor.matches(&compiled.element, root, source.as_bytes());
569        let mut seen_functions = std::collections::HashSet::new();
570
571        while let Some(mat) = matches.next() {
572            for capture in mat.captures {
573                let capture_name = compiled.element.capture_names()[capture.index as usize];
574                let node = capture.node;
575                match capture_name {
576                    "function" => {
577                        if let Some(name_node) = node.child_by_field_name("name") {
578                            let name =
579                                source[name_node.start_byte()..name_node.end_byte()].to_string();
580                            let func_key = (name.clone(), node.start_position().row);
581                            if !seen_functions.contains(&func_key) {
582                                seen_functions.insert(func_key);
583                                let params = node
584                                    .child_by_field_name("parameters")
585                                    .map(|p| source[p.start_byte()..p.end_byte()].to_string())
586                                    .unwrap_or_default();
587                                let return_type = node
588                                    .child_by_field_name("return_type")
589                                    .map(|r| source[r.start_byte()..r.end_byte()].to_string());
590                                functions.push(FunctionInfo {
591                                    name,
592                                    line: node.start_position().row + 1,
593                                    end_line: node.end_position().row + 1,
594                                    parameters: if params.is_empty() {
595                                        Vec::new()
596                                    } else {
597                                        vec![params]
598                                    },
599                                    return_type,
600                                });
601                            }
602                        }
603                    }
604                    "class" => {
605                        if let Some(name_node) = node.child_by_field_name("name") {
606                            let name =
607                                source[name_node.start_byte()..name_node.end_byte()].to_string();
608                            let inherits = if let Some(handler) = lang_info.extract_inheritance {
609                                handler(&node, source)
610                            } else {
611                                Vec::new()
612                            };
613                            classes.push(ClassInfo {
614                                name,
615                                line: node.start_position().row + 1,
616                                end_line: node.end_position().row + 1,
617                                methods: Vec::new(),
618                                fields: Vec::new(),
619                                inherits,
620                            });
621                        }
622                    }
623                    _ => {}
624                }
625            }
626        }
627    }
628
629    /// Returns the name of the enclosing function/method/subroutine for a given AST node,
630    /// by walking ancestors and matching all language-specific function container kinds.
631    fn enclosing_function_name(mut node: tree_sitter::Node<'_>, source: &str) -> Option<String> {
632        let mut depth = 0u32;
633        while let Some(parent) = node.parent() {
634            depth += 1;
635            // Cap at 64 hops: real function nesting rarely exceeds ~10 levels; 64 is a generous
636            // upper bound that guards against pathological/malformed ASTs without false negatives
637            // on legitimate code. Returns None (treated as <module>) when the cap is hit.
638            if depth > 64 {
639                return None;
640            }
641            let name_node = match parent.kind() {
642                // Direct name field: Rust, Python, Go, Java, TypeScript/TSX
643                "function_item"
644                | "method_item"
645                | "function_definition"
646                | "function_declaration"
647                | "method_declaration"
648                | "method_definition" => parent.child_by_field_name("name"),
649                // Fortran subroutine: name is inside subroutine_statement child
650                "subroutine" => {
651                    let mut cursor = parent.walk();
652                    parent
653                        .children(&mut cursor)
654                        .find(|c| c.kind() == "subroutine_statement")
655                        .and_then(|s| s.child_by_field_name("name"))
656                }
657                // Fortran function: name is inside function_statement child
658                "function" => {
659                    let mut cursor = parent.walk();
660                    parent
661                        .children(&mut cursor)
662                        .find(|c| c.kind() == "function_statement")
663                        .and_then(|s| s.child_by_field_name("name"))
664                }
665                _ => {
666                    node = parent;
667                    continue;
668                }
669            };
670            return name_node.map(|n| source[n.start_byte()..n.end_byte()].to_string());
671        }
672        // The loop exits here only when no parent was found (i.e., we reached the tree root
673        // without finding a function container). If the depth cap fired, we returned None early
674        // above. Nothing to assert here.
675        None
676    }
677
678    fn extract_calls(
679        source: &str,
680        compiled: &CompiledQueries,
681        root: Node<'_>,
682        max_depth: Option<u32>,
683        calls: &mut Vec<CallInfo>,
684        call_frequency: &mut HashMap<String, usize>,
685    ) {
686        let mut cursor = QueryCursor::new();
687        if let Some(depth) = max_depth {
688            cursor.set_max_start_depth(Some(depth));
689        }
690        let mut matches = cursor.matches(&compiled.call, root, source.as_bytes());
691
692        while let Some(mat) = matches.next() {
693            for capture in mat.captures {
694                let capture_name = compiled.call.capture_names()[capture.index as usize];
695                if capture_name != "call" {
696                    continue;
697                }
698                let node = capture.node;
699                let call_name = source[node.start_byte()..node.end_byte()].to_string();
700                *call_frequency.entry(call_name.clone()).or_insert(0) += 1;
701
702                let caller = Self::enclosing_function_name(node, source)
703                    .unwrap_or_else(|| "<module>".to_string());
704
705                let mut arg_count = None;
706                let mut arg_node = node;
707                let mut hop = 0u32;
708                let mut cap_hit = false;
709                while let Some(parent) = arg_node.parent() {
710                    hop += 1;
711                    // Bounded parent traversal: cap at 16 hops to guard against pathological
712                    // walks on malformed/degenerate trees. Real call-expression nesting is
713                    // shallow (typically 1-3 levels). When the cap is hit we stop searching and
714                    // leave arg_count as None; the caller is still recorded, just without
715                    // argument-count information.
716                    if hop > 16 {
717                        cap_hit = true;
718                        break;
719                    }
720                    if parent.kind() == "call_expression" {
721                        if let Some(args) = parent.child_by_field_name("arguments") {
722                            arg_count = Some(args.named_child_count());
723                        }
724                        break;
725                    }
726                    arg_node = parent;
727                }
728                debug_assert!(
729                    !cap_hit,
730                    "extract_calls: parent traversal cap reached (hop > 16)"
731                );
732
733                calls.push(CallInfo {
734                    caller,
735                    callee: call_name,
736                    line: node.start_position().row + 1,
737                    column: node.start_position().column,
738                    arg_count,
739                });
740            }
741        }
742    }
743
744    fn extract_imports(
745        source: &str,
746        compiled: &CompiledQueries,
747        root: Node<'_>,
748        max_depth: Option<u32>,
749        imports: &mut Vec<ImportInfo>,
750    ) {
751        let Some(ref import_query) = compiled.import else {
752            return;
753        };
754        let mut cursor = QueryCursor::new();
755        if let Some(depth) = max_depth {
756            cursor.set_max_start_depth(Some(depth));
757        }
758        let mut matches = cursor.matches(import_query, root, source.as_bytes());
759
760        while let Some(mat) = matches.next() {
761            for capture in mat.captures {
762                let capture_name = import_query.capture_names()[capture.index as usize];
763                if capture_name == "import_path" {
764                    let node = capture.node;
765                    let line = node.start_position().row + 1;
766                    extract_imports_from_node(&node, source, "", line, imports);
767                }
768            }
769        }
770    }
771
772    fn extract_impl_methods(
773        source: &str,
774        compiled: &CompiledQueries,
775        root: Node<'_>,
776        max_depth: Option<u32>,
777        classes: &mut [ClassInfo],
778    ) {
779        let Some(ref impl_query) = compiled.impl_block else {
780            return;
781        };
782        let mut cursor = QueryCursor::new();
783        if let Some(depth) = max_depth {
784            cursor.set_max_start_depth(Some(depth));
785        }
786        let mut matches = cursor.matches(impl_query, root, source.as_bytes());
787
788        while let Some(mat) = matches.next() {
789            let mut impl_type_name = String::new();
790            let mut method_name = String::new();
791            let mut method_line = 0usize;
792            let mut method_end_line = 0usize;
793            let mut method_params = String::new();
794            let mut method_return_type: Option<String> = None;
795
796            for capture in mat.captures {
797                let capture_name = impl_query.capture_names()[capture.index as usize];
798                let node = capture.node;
799                match capture_name {
800                    "impl_type" => {
801                        impl_type_name = source[node.start_byte()..node.end_byte()].to_string();
802                    }
803                    "method_name" => {
804                        method_name = source[node.start_byte()..node.end_byte()].to_string();
805                    }
806                    "method_params" => {
807                        method_params = source[node.start_byte()..node.end_byte()].to_string();
808                    }
809                    "method" => {
810                        method_line = node.start_position().row + 1;
811                        method_end_line = node.end_position().row + 1;
812                        method_return_type = node
813                            .child_by_field_name("return_type")
814                            .map(|r| source[r.start_byte()..r.end_byte()].to_string());
815                    }
816                    _ => {}
817                }
818            }
819
820            if !impl_type_name.is_empty() && !method_name.is_empty() {
821                let func = FunctionInfo {
822                    name: method_name,
823                    line: method_line,
824                    end_line: method_end_line,
825                    parameters: if method_params.is_empty() {
826                        Vec::new()
827                    } else {
828                        vec![method_params]
829                    },
830                    return_type: method_return_type,
831                };
832                if let Some(class) = classes.iter_mut().find(|c| c.name == impl_type_name) {
833                    class.methods.push(func);
834                }
835            }
836        }
837    }
838
839    fn extract_references(
840        source: &str,
841        compiled: &CompiledQueries,
842        root: Node<'_>,
843        max_depth: Option<u32>,
844        references: &mut Vec<ReferenceInfo>,
845    ) {
846        let Some(ref ref_query) = compiled.reference else {
847            return;
848        };
849        let mut cursor = QueryCursor::new();
850        if let Some(depth) = max_depth {
851            cursor.set_max_start_depth(Some(depth));
852        }
853        let mut seen_refs = std::collections::HashSet::new();
854        let mut matches = cursor.matches(ref_query, root, source.as_bytes());
855
856        while let Some(mat) = matches.next() {
857            for capture in mat.captures {
858                let capture_name = ref_query.capture_names()[capture.index as usize];
859                if capture_name == "type_ref" {
860                    let node = capture.node;
861                    let type_ref = source[node.start_byte()..node.end_byte()].to_string();
862                    if seen_refs.insert(type_ref.clone()) {
863                        references.push(ReferenceInfo {
864                            symbol: type_ref,
865                            reference_type: ReferenceType::Usage,
866                            // location is intentionally empty here; set by the caller (analyze_file)
867                            location: String::new(),
868                            line: node.start_position().row + 1,
869                        });
870                    }
871                }
872            }
873        }
874    }
875
876    /// Extract impl-trait blocks from an already-parsed tree.
877    ///
878    /// Called during `extract()` for Rust files to avoid a second parse.
879    /// Returns an empty vec if the query is not available.
880    fn extract_impl_traits_from_tree(
881        source: &str,
882        compiled: &CompiledQueries,
883        root: Node<'_>,
884    ) -> Vec<ImplTraitInfo> {
885        let Some(query) = &compiled.impl_trait else {
886            return vec![];
887        };
888
889        let mut cursor = QueryCursor::new();
890        let mut matches = cursor.matches(query, root, source.as_bytes());
891        let mut results = Vec::new();
892
893        while let Some(mat) = matches.next() {
894            let mut trait_name = String::new();
895            let mut impl_type = String::new();
896            let mut line = 0usize;
897
898            for capture in mat.captures {
899                let capture_name = query.capture_names()[capture.index as usize];
900                let node = capture.node;
901                let text = source[node.start_byte()..node.end_byte()].to_string();
902                match capture_name {
903                    "trait_name" => {
904                        trait_name = text;
905                        line = node.start_position().row + 1;
906                    }
907                    "impl_type" => {
908                        impl_type = text;
909                    }
910                    _ => {}
911                }
912            }
913
914            if !trait_name.is_empty() && !impl_type.is_empty() {
915                results.push(ImplTraitInfo {
916                    trait_name,
917                    impl_type,
918                    path: PathBuf::new(), // Path will be set by caller
919                    line,
920                });
921            }
922        }
923
924        results
925    }
926}
927
928/// Extract `impl Trait for Type` blocks from Rust source.
929///
930/// Runs independently of `extract_references` to avoid shared deduplication state.
931/// Returns an empty vec for non-Rust source (no error; caller decides).
932#[must_use]
933pub fn extract_impl_traits(source: &str, path: &Path) -> Vec<ImplTraitInfo> {
934    let Some(lang_info) = get_language_info("rust") else {
935        return vec![];
936    };
937
938    let Ok(compiled) = get_compiled_queries("rust") else {
939        return vec![];
940    };
941
942    let Some(query) = &compiled.impl_trait else {
943        return vec![];
944    };
945
946    let Some(tree) = PARSER.with(|p| {
947        let mut parser = p.borrow_mut();
948        let _ = parser.set_language(&lang_info.language);
949        parser.parse(source, None)
950    }) else {
951        return vec![];
952    };
953
954    let root = tree.root_node();
955    let mut cursor = QueryCursor::new();
956    let mut matches = cursor.matches(query, root, source.as_bytes());
957    let mut results = Vec::new();
958
959    while let Some(mat) = matches.next() {
960        let mut trait_name = String::new();
961        let mut impl_type = String::new();
962        let mut line = 0usize;
963
964        for capture in mat.captures {
965            let capture_name = query.capture_names()[capture.index as usize];
966            let node = capture.node;
967            let text = source[node.start_byte()..node.end_byte()].to_string();
968            match capture_name {
969                "trait_name" => {
970                    trait_name = text;
971                    line = node.start_position().row + 1;
972                }
973                "impl_type" => {
974                    impl_type = text;
975                }
976                _ => {}
977            }
978        }
979
980        if !trait_name.is_empty() && !impl_type.is_empty() {
981            results.push(ImplTraitInfo {
982                trait_name,
983                impl_type,
984                path: path.to_path_buf(),
985                line,
986            });
987        }
988    }
989
990    results
991}
992
993/// Execute a custom tree-sitter query against source code.
994///
995/// This is the internal implementation of the public `execute_query` function.
996pub fn execute_query_impl(
997    language: &str,
998    source: &str,
999    query_str: &str,
1000) -> Result<Vec<crate::QueryCapture>, ParserError> {
1001    // Get the tree-sitter language from the language name
1002    let ts_language = crate::languages::get_ts_language(language)
1003        .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))?;
1004
1005    let mut parser = Parser::new();
1006    parser
1007        .set_language(&ts_language)
1008        .map_err(|e| ParserError::QueryError(e.to_string()))?;
1009
1010    let tree = parser
1011        .parse(source.as_bytes(), None)
1012        .ok_or_else(|| ParserError::QueryError("failed to parse source".to_string()))?;
1013
1014    let query =
1015        Query::new(&ts_language, query_str).map_err(|e| ParserError::QueryError(e.to_string()))?;
1016
1017    let mut cursor = QueryCursor::new();
1018    let source_bytes = source.as_bytes();
1019
1020    let mut captures = Vec::new();
1021    let mut matches = cursor.matches(&query, tree.root_node(), source_bytes);
1022    while let Some(m) = matches.next() {
1023        for cap in m.captures {
1024            let node = cap.node;
1025            let capture_name = query.capture_names()[cap.index as usize].to_string();
1026            let text = node.utf8_text(source_bytes).unwrap_or("").to_string();
1027            captures.push(crate::QueryCapture {
1028                capture_name,
1029                text,
1030                start_line: node.start_position().row,
1031                end_line: node.end_position().row,
1032                start_byte: node.start_byte(),
1033                end_byte: node.end_byte(),
1034            });
1035        }
1036    }
1037    Ok(captures)
1038}
1039
1040// Language-feature-gated tests (require lang-rust); see also tests_unsupported below
1041#[cfg(all(test, feature = "lang-rust"))]
1042mod tests {
1043    use super::*;
1044    use std::path::Path;
1045
1046    #[test]
1047    fn test_ast_recursion_limit_zero_is_unlimited() {
1048        let source = r#"fn hello() -> u32 { 42 }"#;
1049        let result_none = SemanticExtractor::extract(source, "rust", None);
1050        let result_zero = SemanticExtractor::extract(source, "rust", Some(0));
1051        assert!(result_none.is_ok(), "extract with None failed");
1052        assert!(result_zero.is_ok(), "extract with Some(0) failed");
1053        let analysis_none = result_none.unwrap();
1054        let analysis_zero = result_zero.unwrap();
1055        assert!(
1056            analysis_none.functions.len() >= 1,
1057            "extract with None should find at least one function in the test source"
1058        );
1059        assert_eq!(
1060            analysis_none.functions.len(),
1061            analysis_zero.functions.len(),
1062            "ast_recursion_limit=0 should behave identically to unset (unlimited)"
1063        );
1064    }
1065
1066    #[test]
1067    fn test_rust_use_as_imports() {
1068        // Arrange
1069        let source = "use std::io as stdio;";
1070        // Act
1071        let result = SemanticExtractor::extract(source, "rust", None).unwrap();
1072        // Assert: alias "stdio" is captured as an import item
1073        assert!(
1074            result
1075                .imports
1076                .iter()
1077                .any(|imp| imp.items.iter().any(|i| i == "stdio")),
1078            "expected import alias 'stdio' in {:?}",
1079            result.imports
1080        );
1081    }
1082
1083    #[test]
1084    fn test_rust_use_as_clause_plain_identifier() {
1085        // Arrange: use_as_clause with plain identifier (no scoped_identifier)
1086        // exercises the _ => prefix.to_string() arm
1087        let source = "use io as stdio;";
1088        // Act
1089        let result = SemanticExtractor::extract(source, "rust", None).unwrap();
1090        // Assert: alias "stdio" is captured as an import item
1091        assert!(
1092            result
1093                .imports
1094                .iter()
1095                .any(|imp| imp.items.iter().any(|i| i == "stdio")),
1096            "expected import alias 'stdio' from plain identifier in {:?}",
1097            result.imports
1098        );
1099    }
1100
1101    #[test]
1102    fn test_rust_scoped_use_with_prefix() {
1103        // Arrange: scoped_use_list with non-empty prefix
1104        let source = "use std::{io::Read, io::Write};";
1105        // Act
1106        let result = SemanticExtractor::extract(source, "rust", None).unwrap();
1107        // Assert: both Read and Write appear as items with std::io module
1108        let items: Vec<String> = result
1109            .imports
1110            .iter()
1111            .filter(|imp| imp.module.starts_with("std::io"))
1112            .flat_map(|imp| imp.items.clone())
1113            .collect();
1114        assert!(
1115            items.contains(&"Read".to_string()) && items.contains(&"Write".to_string()),
1116            "expected 'Read' and 'Write' items under module with std::io, got {:?}",
1117            result.imports
1118        );
1119    }
1120
1121    #[test]
1122    fn test_rust_scoped_use_imports() {
1123        // Arrange
1124        let source = "use std::{fs, io};";
1125        // Act
1126        let result = SemanticExtractor::extract(source, "rust", None).unwrap();
1127        // Assert: both "fs" and "io" appear as import items under module "std"
1128        let items: Vec<&str> = result
1129            .imports
1130            .iter()
1131            .filter(|imp| imp.module == "std")
1132            .flat_map(|imp| imp.items.iter().map(|s| s.as_str()))
1133            .collect();
1134        assert!(
1135            items.contains(&"fs") && items.contains(&"io"),
1136            "expected 'fs' and 'io' items under module 'std', got {:?}",
1137            items
1138        );
1139    }
1140
1141    #[test]
1142    fn test_rust_wildcard_imports() {
1143        // Arrange
1144        let source = "use std::io::*;";
1145        // Act
1146        let result = SemanticExtractor::extract(source, "rust", None).unwrap();
1147        // Assert: wildcard import with module "std::io"
1148        let wildcard = result
1149            .imports
1150            .iter()
1151            .find(|imp| imp.module == "std::io" && imp.items == vec!["*"]);
1152        assert!(
1153            wildcard.is_some(),
1154            "expected wildcard import with module 'std::io', got {:?}",
1155            result.imports
1156        );
1157    }
1158
1159    #[test]
1160    fn test_extract_impl_traits_standalone() {
1161        // Arrange: source with a simple impl Trait for Type
1162        let source = r#"
1163struct Foo;
1164trait Display {}
1165impl Display for Foo {}
1166"#;
1167        // Act
1168        let results = extract_impl_traits(source, Path::new("test.rs"));
1169        // Assert
1170        assert_eq!(
1171            results.len(),
1172            1,
1173            "expected one impl trait, got {:?}",
1174            results
1175        );
1176        assert_eq!(results[0].trait_name, "Display");
1177        assert_eq!(results[0].impl_type, "Foo");
1178    }
1179
1180    #[cfg(target_pointer_width = "64")]
1181    #[test]
1182    fn test_ast_recursion_limit_overflow() {
1183        // Arrange: limit larger than u32::MAX triggers a ParseError on 64-bit targets
1184        let source = "fn foo() {}";
1185        let big_limit = usize::try_from(u32::MAX).unwrap() + 1;
1186        // Act
1187        let result = SemanticExtractor::extract(source, "rust", Some(big_limit));
1188        // Assert
1189        assert!(
1190            matches!(result, Err(ParserError::ParseError(_))),
1191            "expected ParseError for oversized limit, got {:?}",
1192            result
1193        );
1194    }
1195
1196    #[test]
1197    fn test_ast_recursion_limit_some() {
1198        // Arrange: ast_recursion_limit with Some(depth) to exercise max_depth Some branch
1199        let source = r#"fn hello() -> u32 { 42 }"#;
1200        // Act
1201        let result = SemanticExtractor::extract(source, "rust", Some(5));
1202        // Assert: should succeed without error and extract functions
1203        assert!(result.is_ok(), "extract with Some(5) failed: {:?}", result);
1204        let analysis = result.unwrap();
1205        assert!(
1206            analysis.functions.len() >= 1,
1207            "expected at least one function with depth limit 5"
1208        );
1209    }
1210}
1211
1212// Language-feature-gated tests for Python
1213#[cfg(all(test, feature = "lang-python"))]
1214mod tests_python {
1215    use super::*;
1216
1217    #[test]
1218    fn test_python_relative_import() {
1219        // Arrange: relative import (from . import foo)
1220        let source = "from . import foo\n";
1221        // Act
1222        let result = SemanticExtractor::extract(source, "python", None).unwrap();
1223        // Assert: relative import should be captured
1224        let relative = result.imports.iter().find(|imp| imp.module.contains("."));
1225        assert!(
1226            relative.is_some(),
1227            "expected relative import in {:?}",
1228            result.imports
1229        );
1230    }
1231
1232    #[test]
1233    fn test_python_aliased_import() {
1234        // Arrange: aliased import (from os import path as p)
1235        // Note: tree-sitter-python extracts "path" (the original name), not the alias "p"
1236        let source = "from os import path as p\n";
1237        // Act
1238        let result = SemanticExtractor::extract(source, "python", None).unwrap();
1239        // Assert: "path" should be in items (alias is captured separately by aliased_import node)
1240        let path_import = result
1241            .imports
1242            .iter()
1243            .find(|imp| imp.module == "os" && imp.items.iter().any(|i| i == "path"));
1244        assert!(
1245            path_import.is_some(),
1246            "expected import 'path' from module 'os' in {:?}",
1247            result.imports
1248        );
1249    }
1250}
1251
1252// Tests that do not require any language feature gate
1253#[cfg(test)]
1254mod tests_unsupported {
1255    use super::*;
1256
1257    #[test]
1258    fn test_element_extractor_unsupported_language() {
1259        // Arrange + Act
1260        let result = ElementExtractor::extract_with_depth("x = 1", "cobol");
1261        // Assert
1262        assert!(
1263            matches!(result, Err(ParserError::UnsupportedLanguage(ref lang)) if lang == "cobol"),
1264            "expected UnsupportedLanguage error, got {:?}",
1265            result
1266        );
1267    }
1268
1269    #[test]
1270    fn test_semantic_extractor_unsupported_language() {
1271        // Arrange + Act
1272        let result = SemanticExtractor::extract("x = 1", "cobol", None);
1273        // Assert
1274        assert!(
1275            matches!(result, Err(ParserError::UnsupportedLanguage(ref lang)) if lang == "cobol"),
1276            "expected UnsupportedLanguage error, got {:?}",
1277            result
1278        );
1279    }
1280}