Skip to main content

code_analyze_core/
parser.rs

1// SPDX-FileCopyrightText: 2026 code-analyze-mcp contributors
2// SPDX-License-Identifier: Apache-2.0
3//! Tree-sitter-based parser for extracting semantic structure from source code.
4//!
5//! This module provides language-agnostic parsing using tree-sitter queries to extract
6//! functions, classes, imports, references, and other semantic elements from source files.
7//! Two main extractors handle different use cases:
8//!
9//! - [`ElementExtractor`]: Quick extraction of function and class counts.
10//! - [`SemanticExtractor`]: Detailed semantic analysis with calls, imports, and references.
11
12use crate::languages::get_language_info;
13use crate::types::{
14    CallInfo, ClassInfo, FunctionInfo, ImplTraitInfo, ImportInfo, ReferenceInfo, ReferenceType,
15    SemanticAnalysis,
16};
17use std::cell::RefCell;
18use std::collections::HashMap;
19use std::path::{Path, PathBuf};
20use std::sync::LazyLock;
21use thiserror::Error;
22use tracing::instrument;
23use tree_sitter::{Node, Parser, Query, QueryCursor, StreamingIterator};
24
25#[derive(Debug, Error)]
26#[non_exhaustive]
27pub enum ParserError {
28    #[error("Unsupported language: {0}")]
29    UnsupportedLanguage(String),
30    #[error("Failed to parse file: {0}")]
31    ParseError(String),
32    #[error("Invalid UTF-8 in file")]
33    InvalidUtf8,
34    #[error("Query error: {0}")]
35    QueryError(String),
36}
37
38/// Compiled tree-sitter queries for a language.
39/// Stores all query types: mandatory (element, call) and optional (import, impl, reference).
40struct CompiledQueries {
41    element: Query,
42    call: Query,
43    import: Option<Query>,
44    impl_block: Option<Query>,
45    reference: Option<Query>,
46    impl_trait: Option<Query>,
47}
48
49/// Build compiled queries for a given language.
50///
51/// The `map_err` closures inside are only reachable if a hardcoded query string is
52/// invalid, which cannot happen at runtime -- exclude them from coverage instrumentation.
53#[cfg_attr(coverage_nightly, coverage(off))]
54fn build_compiled_queries(
55    lang_info: &crate::languages::LanguageInfo,
56) -> Result<CompiledQueries, ParserError> {
57    let element = Query::new(&lang_info.language, lang_info.element_query).map_err(|e| {
58        ParserError::QueryError(format!(
59            "Failed to compile element query for {}: {}",
60            lang_info.name, e
61        ))
62    })?;
63
64    let call = Query::new(&lang_info.language, lang_info.call_query).map_err(|e| {
65        ParserError::QueryError(format!(
66            "Failed to compile call query for {}: {}",
67            lang_info.name, e
68        ))
69    })?;
70
71    let import = if let Some(import_query_str) = lang_info.import_query {
72        Some(
73            Query::new(&lang_info.language, import_query_str).map_err(|e| {
74                ParserError::QueryError(format!(
75                    "Failed to compile import query for {}: {}",
76                    lang_info.name, e
77                ))
78            })?,
79        )
80    } else {
81        None
82    };
83
84    let impl_block = if let Some(impl_query_str) = lang_info.impl_query {
85        Some(
86            Query::new(&lang_info.language, impl_query_str).map_err(|e| {
87                ParserError::QueryError(format!(
88                    "Failed to compile impl query for {}: {}",
89                    lang_info.name, e
90                ))
91            })?,
92        )
93    } else {
94        None
95    };
96
97    let reference = if let Some(ref_query_str) = lang_info.reference_query {
98        Some(Query::new(&lang_info.language, ref_query_str).map_err(|e| {
99            ParserError::QueryError(format!(
100                "Failed to compile reference query for {}: {}",
101                lang_info.name, e
102            ))
103        })?)
104    } else {
105        None
106    };
107
108    let impl_trait = if let Some(impl_trait_query_str) = lang_info.impl_trait_query {
109        Some(
110            Query::new(&lang_info.language, impl_trait_query_str).map_err(|e| {
111                ParserError::QueryError(format!(
112                    "Failed to compile impl_trait query for {}: {}",
113                    lang_info.name, e
114                ))
115            })?,
116        )
117    } else {
118        None
119    };
120
121    Ok(CompiledQueries {
122        element,
123        call,
124        import,
125        impl_block,
126        reference,
127        impl_trait,
128    })
129}
130
131/// Initialize the query cache with compiled queries for all supported languages.
132///
133/// Excluded from coverage: the `Err` arm is unreachable because `build_compiled_queries`
134/// only fails on invalid hardcoded query strings.
135#[cfg_attr(coverage_nightly, coverage(off))]
136fn init_query_cache() -> HashMap<&'static str, CompiledQueries> {
137    let mut cache = HashMap::new();
138
139    for lang_name in crate::lang::supported_languages() {
140        if let Some(lang_info) = get_language_info(lang_name) {
141            match build_compiled_queries(&lang_info) {
142                Ok(compiled) => {
143                    cache.insert(*lang_name, compiled);
144                }
145                Err(e) => {
146                    tracing::error!(
147                        "Failed to compile queries for language {}: {}",
148                        lang_name,
149                        e
150                    );
151                }
152            }
153        }
154    }
155
156    cache
157}
158
159/// Lazily initialized cache of compiled queries per language.
160static QUERY_CACHE: LazyLock<HashMap<&'static str, CompiledQueries>> =
161    LazyLock::new(init_query_cache);
162
163/// Get compiled queries for a language from the cache.
164fn get_compiled_queries(language: &str) -> Result<&'static CompiledQueries, ParserError> {
165    QUERY_CACHE
166        .get(language)
167        .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))
168}
169
170thread_local! {
171    static PARSER: RefCell<Parser> = RefCell::new(Parser::new());
172}
173
174/// Canonical API for extracting element counts from source code.
175pub struct ElementExtractor;
176
177impl ElementExtractor {
178    /// Extract function and class counts from source code.
179    ///
180    /// # Errors
181    ///
182    /// Returns `ParserError::UnsupportedLanguage` if the language is not recognized.
183    /// Returns `ParserError::ParseError` if the source code cannot be parsed.
184    /// Returns `ParserError::QueryError` if the tree-sitter query fails.
185    #[instrument(skip_all, fields(language))]
186    pub fn extract_with_depth(source: &str, language: &str) -> Result<(usize, usize), ParserError> {
187        let lang_info = get_language_info(language)
188            .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))?;
189
190        let tree = PARSER.with(|p| {
191            let mut parser = p.borrow_mut();
192            parser
193                .set_language(&lang_info.language)
194                .map_err(|e| ParserError::ParseError(format!("Failed to set language: {e}")))?;
195            parser
196                .parse(source, None)
197                .ok_or_else(|| ParserError::ParseError("Failed to parse".to_string()))
198        })?;
199
200        let compiled = get_compiled_queries(language)?;
201
202        let mut cursor = QueryCursor::new();
203        let mut function_count = 0;
204        let mut class_count = 0;
205
206        let mut matches = cursor.matches(&compiled.element, tree.root_node(), source.as_bytes());
207        while let Some(mat) = matches.next() {
208            for capture in mat.captures {
209                let capture_name = compiled.element.capture_names()[capture.index as usize];
210                match capture_name {
211                    "function" => function_count += 1,
212                    "class" => class_count += 1,
213                    _ => {}
214                }
215            }
216        }
217
218        tracing::debug!(language = %language, functions = function_count, classes = class_count, "parse complete");
219
220        Ok((function_count, class_count))
221    }
222}
223
224/// Recursively extract `ImportInfo` entries from a use-clause node, respecting all Rust
225/// use-declaration forms (`scoped_identifier`, `scoped_use_list`, `use_list`,
226/// `use_as_clause`, `use_wildcard`, bare `identifier`).
227#[allow(clippy::too_many_lines)] // exhaustive match over all supported Rust use-clause forms; splitting harms readability
228fn extract_imports_from_node(
229    node: &Node,
230    source: &str,
231    prefix: &str,
232    line: usize,
233    imports: &mut Vec<ImportInfo>,
234) {
235    match node.kind() {
236        // Simple identifier: `use foo;` or an item inside `{foo, bar}`
237        "identifier" | "self" | "super" | "crate" => {
238            let name = source[node.start_byte()..node.end_byte()].to_string();
239            imports.push(ImportInfo {
240                module: prefix.to_string(),
241                items: vec![name],
242                line,
243            });
244        }
245        // Qualified path: `std::collections::HashMap`
246        "scoped_identifier" => {
247            let item = node
248                .child_by_field_name("name")
249                .map(|n| source[n.start_byte()..n.end_byte()].to_string())
250                .unwrap_or_default();
251            let module = node.child_by_field_name("path").map_or_else(
252                || prefix.to_string(),
253                |p| {
254                    let path_text = source[p.start_byte()..p.end_byte()].to_string();
255                    if prefix.is_empty() {
256                        path_text
257                    } else {
258                        format!("{prefix}::{path_text}")
259                    }
260                },
261            );
262            if !item.is_empty() {
263                imports.push(ImportInfo {
264                    module,
265                    items: vec![item],
266                    line,
267                });
268            }
269        }
270        // `std::{io, fs}` — path prefix followed by a brace list
271        "scoped_use_list" => {
272            let new_prefix = node.child_by_field_name("path").map_or_else(
273                || prefix.to_string(),
274                |p| {
275                    let path_text = source[p.start_byte()..p.end_byte()].to_string();
276                    if prefix.is_empty() {
277                        path_text
278                    } else {
279                        format!("{prefix}::{path_text}")
280                    }
281                },
282            );
283            if let Some(list) = node.child_by_field_name("list") {
284                extract_imports_from_node(&list, source, &new_prefix, line, imports);
285            }
286        }
287        // `{HashMap, HashSet}` — brace-enclosed list of items
288        "use_list" => {
289            let mut cursor = node.walk();
290            for child in node.children(&mut cursor) {
291                match child.kind() {
292                    "{" | "}" | "," => {}
293                    _ => extract_imports_from_node(&child, source, prefix, line, imports),
294                }
295            }
296        }
297        // `std::io::*` — glob import
298        "use_wildcard" => {
299            let text = source[node.start_byte()..node.end_byte()].to_string();
300            let module = if let Some(stripped) = text.strip_suffix("::*") {
301                if prefix.is_empty() {
302                    stripped.to_string()
303                } else {
304                    format!("{prefix}::{stripped}")
305                }
306            } else {
307                prefix.to_string()
308            };
309            imports.push(ImportInfo {
310                module,
311                items: vec!["*".to_string()],
312                line,
313            });
314        }
315        // `io as stdio` or `std::io as stdio`
316        "use_as_clause" => {
317            let alias = node
318                .child_by_field_name("alias")
319                .map(|n| source[n.start_byte()..n.end_byte()].to_string())
320                .unwrap_or_default();
321            let module = if let Some(path_node) = node.child_by_field_name("path") {
322                match path_node.kind() {
323                    "scoped_identifier" => path_node.child_by_field_name("path").map_or_else(
324                        || prefix.to_string(),
325                        |p| {
326                            let p_text = source[p.start_byte()..p.end_byte()].to_string();
327                            if prefix.is_empty() {
328                                p_text
329                            } else {
330                                format!("{prefix}::{p_text}")
331                            }
332                        },
333                    ),
334                    _ => prefix.to_string(),
335                }
336            } else {
337                prefix.to_string()
338            };
339            if !alias.is_empty() {
340                imports.push(ImportInfo {
341                    module,
342                    items: vec![alias],
343                    line,
344                });
345            }
346        }
347        // Python import_from_statement: `from module import name` or `from . import *`
348        "import_from_statement" => {
349            extract_python_import_from(node, source, line, imports);
350        }
351        // Fallback for non-Rust import nodes: capture full text as module
352        _ => {
353            let text = source[node.start_byte()..node.end_byte()]
354                .trim()
355                .to_string();
356            if !text.is_empty() {
357                imports.push(ImportInfo {
358                    module: text,
359                    items: vec![],
360                    line,
361                });
362            }
363        }
364    }
365}
366
367/// Extract an item name from a `dotted_name` or `aliased_import` child node.
368fn extract_import_item_name(child: &Node, source: &str) -> Option<String> {
369    match child.kind() {
370        "dotted_name" => {
371            let name = source[child.start_byte()..child.end_byte()]
372                .trim()
373                .to_string();
374            if name.is_empty() { None } else { Some(name) }
375        }
376        "aliased_import" => child.child_by_field_name("name").and_then(|n| {
377            let name = source[n.start_byte()..n.end_byte()].trim().to_string();
378            if name.is_empty() { None } else { Some(name) }
379        }),
380        _ => None,
381    }
382}
383
384/// Collect wildcard/named imports from an `import_list` node or from direct named children.
385fn collect_import_items(
386    node: &Node,
387    source: &str,
388    is_wildcard: &mut bool,
389    items: &mut Vec<String>,
390) {
391    // Prefer import_list child (wraps `from x import a, b`)
392    if let Some(import_list) = node.child_by_field_name("import_list") {
393        let mut cursor = import_list.walk();
394        for child in import_list.named_children(&mut cursor) {
395            if child.kind() == "wildcard_import" {
396                *is_wildcard = true;
397            } else if let Some(name) = extract_import_item_name(&child, source) {
398                items.push(name);
399            }
400        }
401        return;
402    }
403    // No import_list: single-name or wildcard as direct child (skip first named child = module_name)
404    let mut cursor = node.walk();
405    let mut first = true;
406    for child in node.named_children(&mut cursor) {
407        if first {
408            first = false;
409            continue;
410        }
411        if child.kind() == "wildcard_import" {
412            *is_wildcard = true;
413        } else if let Some(name) = extract_import_item_name(&child, source) {
414            items.push(name);
415        }
416    }
417}
418
419/// Handle Python `import_from_statement` node.
420fn extract_python_import_from(
421    node: &Node,
422    source: &str,
423    line: usize,
424    imports: &mut Vec<ImportInfo>,
425) {
426    let module = if let Some(m) = node.child_by_field_name("module_name") {
427        source[m.start_byte()..m.end_byte()].trim().to_string()
428    } else if let Some(r) = node.child_by_field_name("relative_import") {
429        source[r.start_byte()..r.end_byte()].trim().to_string()
430    } else {
431        String::new()
432    };
433
434    let mut is_wildcard = false;
435    let mut items = Vec::new();
436    collect_import_items(node, source, &mut is_wildcard, &mut items);
437
438    if !module.is_empty() {
439        imports.push(ImportInfo {
440            module,
441            items: if is_wildcard {
442                vec!["*".to_string()]
443            } else {
444                items
445            },
446            line,
447        });
448    }
449}
450
451pub struct SemanticExtractor;
452
453impl SemanticExtractor {
454    /// Extract semantic information from source code.
455    ///
456    /// # Errors
457    ///
458    /// Returns `ParserError::UnsupportedLanguage` if the language is not recognized.
459    /// Returns `ParserError::ParseError` if the source code cannot be parsed.
460    /// Returns `ParserError::QueryError` if the tree-sitter query fails.
461    #[instrument(skip_all, fields(language))]
462    pub fn extract(
463        source: &str,
464        language: &str,
465        ast_recursion_limit: Option<usize>,
466    ) -> Result<SemanticAnalysis, ParserError> {
467        let lang_info = get_language_info(language)
468            .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))?;
469
470        let tree = PARSER.with(|p| {
471            let mut parser = p.borrow_mut();
472            parser
473                .set_language(&lang_info.language)
474                .map_err(|e| ParserError::ParseError(format!("Failed to set language: {e}")))?;
475            parser
476                .parse(source, None)
477                .ok_or_else(|| ParserError::ParseError("Failed to parse".to_string()))
478        })?;
479
480        // 0 is not a useful depth (visits root node only, returning zero results).
481        // Treat 0 as None (unlimited). See #339.
482        let max_depth: Option<u32> = ast_recursion_limit
483            .filter(|&limit| limit > 0)
484            .map(|limit| {
485                u32::try_from(limit).map_err(|_| {
486                    ParserError::ParseError(format!(
487                        "ast_recursion_limit {} exceeds maximum supported value {}",
488                        limit,
489                        u32::MAX
490                    ))
491                })
492            })
493            .transpose()?;
494
495        let compiled = get_compiled_queries(language)?;
496        let root = tree.root_node();
497
498        let mut functions = Vec::new();
499        let mut classes = Vec::new();
500        let mut imports = Vec::new();
501        let mut references = Vec::new();
502        let mut call_frequency = HashMap::new();
503        let mut calls = Vec::new();
504
505        Self::extract_elements(
506            source,
507            compiled,
508            root,
509            max_depth,
510            &lang_info,
511            &mut functions,
512            &mut classes,
513        );
514        Self::extract_calls(
515            source,
516            compiled,
517            root,
518            max_depth,
519            &mut calls,
520            &mut call_frequency,
521        );
522        Self::extract_imports(source, compiled, root, max_depth, &mut imports);
523        Self::extract_impl_methods(source, compiled, root, max_depth, &mut classes);
524        Self::extract_references(source, compiled, root, max_depth, &mut references);
525
526        // Extract impl-trait blocks for Rust files (empty for other languages)
527        let impl_traits = if language == "rust" {
528            Self::extract_impl_traits_from_tree(source, compiled, root)
529        } else {
530            vec![]
531        };
532
533        tracing::debug!(language = %language, functions = functions.len(), classes = classes.len(), imports = imports.len(), references = references.len(), calls = calls.len(), impl_traits = impl_traits.len(), "extraction complete");
534
535        Ok(SemanticAnalysis {
536            functions,
537            classes,
538            imports,
539            references,
540            call_frequency,
541            calls,
542            impl_traits,
543        })
544    }
545
546    fn extract_elements(
547        source: &str,
548        compiled: &CompiledQueries,
549        root: Node<'_>,
550        max_depth: Option<u32>,
551        lang_info: &crate::languages::LanguageInfo,
552        functions: &mut Vec<FunctionInfo>,
553        classes: &mut Vec<ClassInfo>,
554    ) {
555        let mut cursor = QueryCursor::new();
556        if let Some(depth) = max_depth {
557            cursor.set_max_start_depth(Some(depth));
558        }
559        let mut matches = cursor.matches(&compiled.element, root, source.as_bytes());
560        let mut seen_functions = std::collections::HashSet::new();
561
562        while let Some(mat) = matches.next() {
563            let mut func_node: Option<Node> = None;
564            let mut func_name_text: Option<String> = None;
565            let mut class_node: Option<Node> = None;
566            let mut class_name_text: Option<String> = None;
567
568            for capture in mat.captures {
569                let capture_name = compiled.element.capture_names()[capture.index as usize];
570                let node = capture.node;
571                match capture_name {
572                    "function" => func_node = Some(node),
573                    "func_name" | "method_name" => {
574                        func_name_text =
575                            Some(source[node.start_byte()..node.end_byte()].to_string());
576                    }
577                    "class" => class_node = Some(node),
578                    "class_name" | "type_name" => {
579                        class_name_text =
580                            Some(source[node.start_byte()..node.end_byte()].to_string());
581                    }
582                    _ => {}
583                }
584            }
585
586            if let Some(func_node) = func_node {
587                // When a plain function_definition is nested inside a template_declaration,
588                // it is also matched by the explicit template_declaration pattern. Skip it
589                // here to avoid duplicates; the template_declaration match will emit it.
590                let parent_is_template = func_node
591                    .parent()
592                    .map(|p| p.kind() == "template_declaration")
593                    .unwrap_or(false);
594                if func_node.kind() == "function_definition" && parent_is_template {
595                    // Handled by the template_declaration @function match instead.
596                } else {
597                    // Resolve template_declaration to its inner function_definition for
598                    // declarator/field walks. The captured node may be the template wrapper.
599                    let func_def = if func_node.kind() == "template_declaration" {
600                        let mut cursor = func_node.walk();
601                        func_node
602                            .children(&mut cursor)
603                            .find(|n| n.kind() == "function_definition")
604                            .unwrap_or(func_node)
605                    } else {
606                        func_node
607                    };
608
609                    let name = func_name_text
610                        .or_else(|| {
611                            func_def
612                                .child_by_field_name("name")
613                                .map(|n| source[n.start_byte()..n.end_byte()].to_string())
614                        })
615                        .unwrap_or_default();
616
617                    let func_key = (name.clone(), func_node.start_position().row);
618                    if !name.is_empty() && seen_functions.insert(func_key) {
619                        // For C/C++: parameters live under declarator -> parameters.
620                        // For other languages: parameters is a direct child field.
621                        let params = func_def
622                            .child_by_field_name("declarator")
623                            .and_then(|d| d.child_by_field_name("parameters"))
624                            .or_else(|| func_def.child_by_field_name("parameters"))
625                            .map(|p| source[p.start_byte()..p.end_byte()].to_string())
626                            .unwrap_or_default();
627
628                        // Try "type" first (C/C++ uses this field for the return type);
629                        // fall back to "return_type" (Rust, Python, TypeScript, etc.).
630                        let return_type = func_def
631                            .child_by_field_name("type")
632                            .or_else(|| func_def.child_by_field_name("return_type"))
633                            .map(|r| source[r.start_byte()..r.end_byte()].to_string());
634
635                        functions.push(FunctionInfo {
636                            name,
637                            line: func_node.start_position().row + 1,
638                            end_line: func_node.end_position().row + 1,
639                            parameters: if params.is_empty() {
640                                Vec::new()
641                            } else {
642                                vec![params]
643                            },
644                            return_type,
645                        });
646                    }
647                }
648            }
649
650            if let Some(class_node) = class_node {
651                let name = class_name_text
652                    .or_else(|| {
653                        class_node
654                            .child_by_field_name("name")
655                            .map(|n| source[n.start_byte()..n.end_byte()].to_string())
656                    })
657                    .unwrap_or_default();
658
659                if !name.is_empty() {
660                    let inherits = if let Some(handler) = lang_info.extract_inheritance {
661                        handler(&class_node, source)
662                    } else {
663                        Vec::new()
664                    };
665                    classes.push(ClassInfo {
666                        name,
667                        line: class_node.start_position().row + 1,
668                        end_line: class_node.end_position().row + 1,
669                        methods: Vec::new(),
670                        fields: Vec::new(),
671                        inherits,
672                    });
673                }
674            }
675        }
676    }
677
678    /// Returns the name of the enclosing function/method/subroutine for a given AST node,
679    /// by walking ancestors and matching all language-specific function container kinds.
680    fn enclosing_function_name(mut node: tree_sitter::Node<'_>, source: &str) -> Option<String> {
681        let mut depth = 0u32;
682        while let Some(parent) = node.parent() {
683            depth += 1;
684            // Cap at 64 hops: real function nesting rarely exceeds ~10 levels; 64 is a generous
685            // upper bound that guards against pathological/malformed ASTs without false negatives
686            // on legitimate code. Returns None (treated as <module>) when the cap is hit.
687            if depth > 64 {
688                return None;
689            }
690            let name_node = match parent.kind() {
691                // Direct name field: Rust, Python, Go, Java, TypeScript/TSX
692                "function_item"
693                | "method_item"
694                | "function_definition"
695                | "function_declaration"
696                | "method_declaration"
697                | "method_definition" => parent.child_by_field_name("name"),
698                // Fortran subroutine: name is inside subroutine_statement child
699                "subroutine" => {
700                    let mut cursor = parent.walk();
701                    parent
702                        .children(&mut cursor)
703                        .find(|c| c.kind() == "subroutine_statement")
704                        .and_then(|s| s.child_by_field_name("name"))
705                }
706                // Fortran function: name is inside function_statement child
707                "function" => {
708                    let mut cursor = parent.walk();
709                    parent
710                        .children(&mut cursor)
711                        .find(|c| c.kind() == "function_statement")
712                        .and_then(|s| s.child_by_field_name("name"))
713                }
714                _ => {
715                    node = parent;
716                    continue;
717                }
718            };
719            return name_node.map(|n| source[n.start_byte()..n.end_byte()].to_string());
720        }
721        // The loop exits here only when no parent was found (i.e., we reached the tree root
722        // without finding a function container). If the depth cap fired, we returned None early
723        // above. Nothing to assert here.
724        None
725    }
726
727    fn extract_calls(
728        source: &str,
729        compiled: &CompiledQueries,
730        root: Node<'_>,
731        max_depth: Option<u32>,
732        calls: &mut Vec<CallInfo>,
733        call_frequency: &mut HashMap<String, usize>,
734    ) {
735        let mut cursor = QueryCursor::new();
736        if let Some(depth) = max_depth {
737            cursor.set_max_start_depth(Some(depth));
738        }
739        let mut matches = cursor.matches(&compiled.call, root, source.as_bytes());
740
741        while let Some(mat) = matches.next() {
742            for capture in mat.captures {
743                let capture_name = compiled.call.capture_names()[capture.index as usize];
744                if capture_name != "call" {
745                    continue;
746                }
747                let node = capture.node;
748                let call_name = source[node.start_byte()..node.end_byte()].to_string();
749                *call_frequency.entry(call_name.clone()).or_insert(0) += 1;
750
751                let caller = Self::enclosing_function_name(node, source)
752                    .unwrap_or_else(|| "<module>".to_string());
753
754                let mut arg_count = None;
755                let mut arg_node = node;
756                let mut hop = 0u32;
757                let mut cap_hit = false;
758                while let Some(parent) = arg_node.parent() {
759                    hop += 1;
760                    // Bounded parent traversal: cap at 16 hops to guard against pathological
761                    // walks on malformed/degenerate trees. Real call-expression nesting is
762                    // shallow (typically 1-3 levels). When the cap is hit we stop searching and
763                    // leave arg_count as None; the caller is still recorded, just without
764                    // argument-count information.
765                    if hop > 16 {
766                        cap_hit = true;
767                        break;
768                    }
769                    if parent.kind() == "call_expression" {
770                        if let Some(args) = parent.child_by_field_name("arguments") {
771                            arg_count = Some(args.named_child_count());
772                        }
773                        break;
774                    }
775                    arg_node = parent;
776                }
777                debug_assert!(
778                    !cap_hit,
779                    "extract_calls: parent traversal cap reached (hop > 16)"
780                );
781
782                calls.push(CallInfo {
783                    caller,
784                    callee: call_name,
785                    line: node.start_position().row + 1,
786                    column: node.start_position().column,
787                    arg_count,
788                });
789            }
790        }
791    }
792
793    fn extract_imports(
794        source: &str,
795        compiled: &CompiledQueries,
796        root: Node<'_>,
797        max_depth: Option<u32>,
798        imports: &mut Vec<ImportInfo>,
799    ) {
800        let Some(ref import_query) = compiled.import else {
801            return;
802        };
803        let mut cursor = QueryCursor::new();
804        if let Some(depth) = max_depth {
805            cursor.set_max_start_depth(Some(depth));
806        }
807        let mut matches = cursor.matches(import_query, root, source.as_bytes());
808
809        while let Some(mat) = matches.next() {
810            for capture in mat.captures {
811                let capture_name = import_query.capture_names()[capture.index as usize];
812                if capture_name == "import_path" {
813                    let node = capture.node;
814                    let line = node.start_position().row + 1;
815                    extract_imports_from_node(&node, source, "", line, imports);
816                }
817            }
818        }
819    }
820
821    fn extract_impl_methods(
822        source: &str,
823        compiled: &CompiledQueries,
824        root: Node<'_>,
825        max_depth: Option<u32>,
826        classes: &mut [ClassInfo],
827    ) {
828        let Some(ref impl_query) = compiled.impl_block else {
829            return;
830        };
831        let mut cursor = QueryCursor::new();
832        if let Some(depth) = max_depth {
833            cursor.set_max_start_depth(Some(depth));
834        }
835        let mut matches = cursor.matches(impl_query, root, source.as_bytes());
836
837        while let Some(mat) = matches.next() {
838            let mut impl_type_name = String::new();
839            let mut method_name = String::new();
840            let mut method_line = 0usize;
841            let mut method_end_line = 0usize;
842            let mut method_params = String::new();
843            let mut method_return_type: Option<String> = None;
844
845            for capture in mat.captures {
846                let capture_name = impl_query.capture_names()[capture.index as usize];
847                let node = capture.node;
848                match capture_name {
849                    "impl_type" => {
850                        impl_type_name = source[node.start_byte()..node.end_byte()].to_string();
851                    }
852                    "method_name" => {
853                        method_name = source[node.start_byte()..node.end_byte()].to_string();
854                    }
855                    "method_params" => {
856                        method_params = source[node.start_byte()..node.end_byte()].to_string();
857                    }
858                    "method" => {
859                        method_line = node.start_position().row + 1;
860                        method_end_line = node.end_position().row + 1;
861                        method_return_type = node
862                            .child_by_field_name("return_type")
863                            .map(|r| source[r.start_byte()..r.end_byte()].to_string());
864                    }
865                    _ => {}
866                }
867            }
868
869            if !impl_type_name.is_empty() && !method_name.is_empty() {
870                let func = FunctionInfo {
871                    name: method_name,
872                    line: method_line,
873                    end_line: method_end_line,
874                    parameters: if method_params.is_empty() {
875                        Vec::new()
876                    } else {
877                        vec![method_params]
878                    },
879                    return_type: method_return_type,
880                };
881                if let Some(class) = classes.iter_mut().find(|c| c.name == impl_type_name) {
882                    class.methods.push(func);
883                }
884            }
885        }
886    }
887
888    fn extract_references(
889        source: &str,
890        compiled: &CompiledQueries,
891        root: Node<'_>,
892        max_depth: Option<u32>,
893        references: &mut Vec<ReferenceInfo>,
894    ) {
895        let Some(ref ref_query) = compiled.reference else {
896            return;
897        };
898        let mut cursor = QueryCursor::new();
899        if let Some(depth) = max_depth {
900            cursor.set_max_start_depth(Some(depth));
901        }
902        let mut seen_refs = std::collections::HashSet::new();
903        let mut matches = cursor.matches(ref_query, root, source.as_bytes());
904
905        while let Some(mat) = matches.next() {
906            for capture in mat.captures {
907                let capture_name = ref_query.capture_names()[capture.index as usize];
908                if capture_name == "type_ref" {
909                    let node = capture.node;
910                    let type_ref = source[node.start_byte()..node.end_byte()].to_string();
911                    if seen_refs.insert(type_ref.clone()) {
912                        references.push(ReferenceInfo {
913                            symbol: type_ref,
914                            reference_type: ReferenceType::Usage,
915                            // location is intentionally empty here; set by the caller (analyze_file)
916                            location: String::new(),
917                            line: node.start_position().row + 1,
918                        });
919                    }
920                }
921            }
922        }
923    }
924
925    /// Extract impl-trait blocks from an already-parsed tree.
926    ///
927    /// Called during `extract()` for Rust files to avoid a second parse.
928    /// Returns an empty vec if the query is not available.
929    fn extract_impl_traits_from_tree(
930        source: &str,
931        compiled: &CompiledQueries,
932        root: Node<'_>,
933    ) -> Vec<ImplTraitInfo> {
934        let Some(query) = &compiled.impl_trait else {
935            return vec![];
936        };
937
938        let mut cursor = QueryCursor::new();
939        let mut matches = cursor.matches(query, root, source.as_bytes());
940        let mut results = Vec::new();
941
942        while let Some(mat) = matches.next() {
943            let mut trait_name = String::new();
944            let mut impl_type = String::new();
945            let mut line = 0usize;
946
947            for capture in mat.captures {
948                let capture_name = query.capture_names()[capture.index as usize];
949                let node = capture.node;
950                let text = source[node.start_byte()..node.end_byte()].to_string();
951                match capture_name {
952                    "trait_name" => {
953                        trait_name = text;
954                        line = node.start_position().row + 1;
955                    }
956                    "impl_type" => {
957                        impl_type = text;
958                    }
959                    _ => {}
960                }
961            }
962
963            if !trait_name.is_empty() && !impl_type.is_empty() {
964                results.push(ImplTraitInfo {
965                    trait_name,
966                    impl_type,
967                    path: PathBuf::new(), // Path will be set by caller
968                    line,
969                });
970            }
971        }
972
973        results
974    }
975}
976
977/// Extract `impl Trait for Type` blocks from Rust source.
978///
979/// Runs independently of `extract_references` to avoid shared deduplication state.
980/// Returns an empty vec for non-Rust source (no error; caller decides).
981#[must_use]
982pub fn extract_impl_traits(source: &str, path: &Path) -> Vec<ImplTraitInfo> {
983    let Some(lang_info) = get_language_info("rust") else {
984        return vec![];
985    };
986
987    let Ok(compiled) = get_compiled_queries("rust") else {
988        return vec![];
989    };
990
991    let Some(query) = &compiled.impl_trait else {
992        return vec![];
993    };
994
995    let Some(tree) = PARSER.with(|p| {
996        let mut parser = p.borrow_mut();
997        let _ = parser.set_language(&lang_info.language);
998        parser.parse(source, None)
999    }) else {
1000        return vec![];
1001    };
1002
1003    let root = tree.root_node();
1004    let mut cursor = QueryCursor::new();
1005    let mut matches = cursor.matches(query, root, source.as_bytes());
1006    let mut results = Vec::new();
1007
1008    while let Some(mat) = matches.next() {
1009        let mut trait_name = String::new();
1010        let mut impl_type = String::new();
1011        let mut line = 0usize;
1012
1013        for capture in mat.captures {
1014            let capture_name = query.capture_names()[capture.index as usize];
1015            let node = capture.node;
1016            let text = source[node.start_byte()..node.end_byte()].to_string();
1017            match capture_name {
1018                "trait_name" => {
1019                    trait_name = text;
1020                    line = node.start_position().row + 1;
1021                }
1022                "impl_type" => {
1023                    impl_type = text;
1024                }
1025                _ => {}
1026            }
1027        }
1028
1029        if !trait_name.is_empty() && !impl_type.is_empty() {
1030            results.push(ImplTraitInfo {
1031                trait_name,
1032                impl_type,
1033                path: path.to_path_buf(),
1034                line,
1035            });
1036        }
1037    }
1038
1039    results
1040}
1041
1042/// Execute a custom tree-sitter query against source code.
1043///
1044/// This is the internal implementation of the public `execute_query` function.
1045pub fn execute_query_impl(
1046    language: &str,
1047    source: &str,
1048    query_str: &str,
1049) -> Result<Vec<crate::QueryCapture>, ParserError> {
1050    // Get the tree-sitter language from the language name
1051    let ts_language = crate::languages::get_ts_language(language)
1052        .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))?;
1053
1054    let mut parser = Parser::new();
1055    parser
1056        .set_language(&ts_language)
1057        .map_err(|e| ParserError::QueryError(e.to_string()))?;
1058
1059    let tree = parser
1060        .parse(source.as_bytes(), None)
1061        .ok_or_else(|| ParserError::QueryError("failed to parse source".to_string()))?;
1062
1063    let query =
1064        Query::new(&ts_language, query_str).map_err(|e| ParserError::QueryError(e.to_string()))?;
1065
1066    let mut cursor = QueryCursor::new();
1067    let source_bytes = source.as_bytes();
1068
1069    let mut captures = Vec::new();
1070    let mut matches = cursor.matches(&query, tree.root_node(), source_bytes);
1071    while let Some(m) = matches.next() {
1072        for cap in m.captures {
1073            let node = cap.node;
1074            let capture_name = query.capture_names()[cap.index as usize].to_string();
1075            let text = node.utf8_text(source_bytes).unwrap_or("").to_string();
1076            captures.push(crate::QueryCapture {
1077                capture_name,
1078                text,
1079                start_line: node.start_position().row,
1080                end_line: node.end_position().row,
1081                start_byte: node.start_byte(),
1082                end_byte: node.end_byte(),
1083            });
1084        }
1085    }
1086    Ok(captures)
1087}
1088
1089// Language-feature-gated tests (require lang-rust); see also tests_unsupported below
1090#[cfg(all(test, feature = "lang-rust"))]
1091mod tests {
1092    use super::*;
1093    use std::path::Path;
1094
1095    #[test]
1096    fn test_ast_recursion_limit_zero_is_unlimited() {
1097        let source = r#"fn hello() -> u32 { 42 }"#;
1098        let result_none = SemanticExtractor::extract(source, "rust", None);
1099        let result_zero = SemanticExtractor::extract(source, "rust", Some(0));
1100        assert!(result_none.is_ok(), "extract with None failed");
1101        assert!(result_zero.is_ok(), "extract with Some(0) failed");
1102        let analysis_none = result_none.unwrap();
1103        let analysis_zero = result_zero.unwrap();
1104        assert!(
1105            analysis_none.functions.len() >= 1,
1106            "extract with None should find at least one function in the test source"
1107        );
1108        assert_eq!(
1109            analysis_none.functions.len(),
1110            analysis_zero.functions.len(),
1111            "ast_recursion_limit=0 should behave identically to unset (unlimited)"
1112        );
1113    }
1114
1115    #[test]
1116    fn test_rust_use_as_imports() {
1117        // Arrange
1118        let source = "use std::io as stdio;";
1119        // Act
1120        let result = SemanticExtractor::extract(source, "rust", None).unwrap();
1121        // Assert: alias "stdio" is captured as an import item
1122        assert!(
1123            result
1124                .imports
1125                .iter()
1126                .any(|imp| imp.items.iter().any(|i| i == "stdio")),
1127            "expected import alias 'stdio' in {:?}",
1128            result.imports
1129        );
1130    }
1131
1132    #[test]
1133    fn test_rust_use_as_clause_plain_identifier() {
1134        // Arrange: use_as_clause with plain identifier (no scoped_identifier)
1135        // exercises the _ => prefix.to_string() arm
1136        let source = "use io as stdio;";
1137        // Act
1138        let result = SemanticExtractor::extract(source, "rust", None).unwrap();
1139        // Assert: alias "stdio" is captured as an import item
1140        assert!(
1141            result
1142                .imports
1143                .iter()
1144                .any(|imp| imp.items.iter().any(|i| i == "stdio")),
1145            "expected import alias 'stdio' from plain identifier in {:?}",
1146            result.imports
1147        );
1148    }
1149
1150    #[test]
1151    fn test_rust_scoped_use_with_prefix() {
1152        // Arrange: scoped_use_list with non-empty prefix
1153        let source = "use std::{io::Read, io::Write};";
1154        // Act
1155        let result = SemanticExtractor::extract(source, "rust", None).unwrap();
1156        // Assert: both Read and Write appear as items with std::io module
1157        let items: Vec<String> = result
1158            .imports
1159            .iter()
1160            .filter(|imp| imp.module.starts_with("std::io"))
1161            .flat_map(|imp| imp.items.clone())
1162            .collect();
1163        assert!(
1164            items.contains(&"Read".to_string()) && items.contains(&"Write".to_string()),
1165            "expected 'Read' and 'Write' items under module with std::io, got {:?}",
1166            result.imports
1167        );
1168    }
1169
1170    #[test]
1171    fn test_rust_scoped_use_imports() {
1172        // Arrange
1173        let source = "use std::{fs, io};";
1174        // Act
1175        let result = SemanticExtractor::extract(source, "rust", None).unwrap();
1176        // Assert: both "fs" and "io" appear as import items under module "std"
1177        let items: Vec<&str> = result
1178            .imports
1179            .iter()
1180            .filter(|imp| imp.module == "std")
1181            .flat_map(|imp| imp.items.iter().map(|s| s.as_str()))
1182            .collect();
1183        assert!(
1184            items.contains(&"fs") && items.contains(&"io"),
1185            "expected 'fs' and 'io' items under module 'std', got {:?}",
1186            items
1187        );
1188    }
1189
1190    #[test]
1191    fn test_rust_wildcard_imports() {
1192        // Arrange
1193        let source = "use std::io::*;";
1194        // Act
1195        let result = SemanticExtractor::extract(source, "rust", None).unwrap();
1196        // Assert: wildcard import with module "std::io"
1197        let wildcard = result
1198            .imports
1199            .iter()
1200            .find(|imp| imp.module == "std::io" && imp.items == vec!["*"]);
1201        assert!(
1202            wildcard.is_some(),
1203            "expected wildcard import with module 'std::io', got {:?}",
1204            result.imports
1205        );
1206    }
1207
1208    #[test]
1209    fn test_extract_impl_traits_standalone() {
1210        // Arrange: source with a simple impl Trait for Type
1211        let source = r#"
1212struct Foo;
1213trait Display {}
1214impl Display for Foo {}
1215"#;
1216        // Act
1217        let results = extract_impl_traits(source, Path::new("test.rs"));
1218        // Assert
1219        assert_eq!(
1220            results.len(),
1221            1,
1222            "expected one impl trait, got {:?}",
1223            results
1224        );
1225        assert_eq!(results[0].trait_name, "Display");
1226        assert_eq!(results[0].impl_type, "Foo");
1227    }
1228
1229    #[cfg(target_pointer_width = "64")]
1230    #[test]
1231    fn test_ast_recursion_limit_overflow() {
1232        // Arrange: limit larger than u32::MAX triggers a ParseError on 64-bit targets
1233        let source = "fn foo() {}";
1234        let big_limit = usize::try_from(u32::MAX).unwrap() + 1;
1235        // Act
1236        let result = SemanticExtractor::extract(source, "rust", Some(big_limit));
1237        // Assert
1238        assert!(
1239            matches!(result, Err(ParserError::ParseError(_))),
1240            "expected ParseError for oversized limit, got {:?}",
1241            result
1242        );
1243    }
1244
1245    #[test]
1246    fn test_ast_recursion_limit_some() {
1247        // Arrange: ast_recursion_limit with Some(depth) to exercise max_depth Some branch
1248        let source = r#"fn hello() -> u32 { 42 }"#;
1249        // Act
1250        let result = SemanticExtractor::extract(source, "rust", Some(5));
1251        // Assert: should succeed without error and extract functions
1252        assert!(result.is_ok(), "extract with Some(5) failed: {:?}", result);
1253        let analysis = result.unwrap();
1254        assert!(
1255            analysis.functions.len() >= 1,
1256            "expected at least one function with depth limit 5"
1257        );
1258    }
1259}
1260
1261// Language-feature-gated tests for Python
1262#[cfg(all(test, feature = "lang-python"))]
1263mod tests_python {
1264    use super::*;
1265
1266    #[test]
1267    fn test_python_relative_import() {
1268        // Arrange: relative import (from . import foo)
1269        let source = "from . import foo\n";
1270        // Act
1271        let result = SemanticExtractor::extract(source, "python", None).unwrap();
1272        // Assert: relative import should be captured
1273        let relative = result.imports.iter().find(|imp| imp.module.contains("."));
1274        assert!(
1275            relative.is_some(),
1276            "expected relative import in {:?}",
1277            result.imports
1278        );
1279    }
1280
1281    #[test]
1282    fn test_python_aliased_import() {
1283        // Arrange: aliased import (from os import path as p)
1284        // Note: tree-sitter-python extracts "path" (the original name), not the alias "p"
1285        let source = "from os import path as p\n";
1286        // Act
1287        let result = SemanticExtractor::extract(source, "python", None).unwrap();
1288        // Assert: "path" should be in items (alias is captured separately by aliased_import node)
1289        let path_import = result
1290            .imports
1291            .iter()
1292            .find(|imp| imp.module == "os" && imp.items.iter().any(|i| i == "path"));
1293        assert!(
1294            path_import.is_some(),
1295            "expected import 'path' from module 'os' in {:?}",
1296            result.imports
1297        );
1298    }
1299}
1300
1301// Tests that do not require any language feature gate
1302#[cfg(test)]
1303mod tests_unsupported {
1304    use super::*;
1305
1306    #[test]
1307    fn test_element_extractor_unsupported_language() {
1308        // Arrange + Act
1309        let result = ElementExtractor::extract_with_depth("x = 1", "cobol");
1310        // Assert
1311        assert!(
1312            matches!(result, Err(ParserError::UnsupportedLanguage(ref lang)) if lang == "cobol"),
1313            "expected UnsupportedLanguage error, got {:?}",
1314            result
1315        );
1316    }
1317
1318    #[test]
1319    fn test_semantic_extractor_unsupported_language() {
1320        // Arrange + Act
1321        let result = SemanticExtractor::extract("x = 1", "cobol", None);
1322        // Assert
1323        assert!(
1324            matches!(result, Err(ParserError::UnsupportedLanguage(ref lang)) if lang == "cobol"),
1325            "expected UnsupportedLanguage error, got {:?}",
1326            result
1327        );
1328    }
1329}