Skip to main content

code_analyze_core/
parser.rs

1// SPDX-FileCopyrightText: 2026 code-analyze-mcp contributors
2// SPDX-License-Identifier: Apache-2.0
3//! Tree-sitter-based parser for extracting semantic structure from source code.
4//!
5//! This module provides language-agnostic parsing using tree-sitter queries to extract
6//! functions, classes, imports, references, and other semantic elements from source files.
7//! Two main extractors handle different use cases:
8//!
9//! - [`ElementExtractor`]: Quick extraction of function and class counts.
10//! - [`SemanticExtractor`]: Detailed semantic analysis with calls, imports, and references.
11
12use crate::languages::get_language_info;
13use crate::types::{
14    CallInfo, ClassInfo, FunctionInfo, ImplTraitInfo, ImportInfo, ReferenceInfo, ReferenceType,
15    SemanticAnalysis,
16};
17use std::cell::RefCell;
18use std::collections::HashMap;
19use std::path::{Path, PathBuf};
20use std::sync::LazyLock;
21use thiserror::Error;
22use tracing::instrument;
23use tree_sitter::{Node, Parser, Query, QueryCursor, StreamingIterator};
24
25#[derive(Debug, Error)]
26#[non_exhaustive]
27pub enum ParserError {
28    #[error("Unsupported language: {0}")]
29    UnsupportedLanguage(String),
30    #[error("Failed to parse file: {0}")]
31    ParseError(String),
32    #[error("Invalid UTF-8 in file")]
33    InvalidUtf8,
34    #[error("Query error: {0}")]
35    QueryError(String),
36}
37
38/// Compiled tree-sitter queries for a language.
39/// Stores all query types: mandatory (element, call) and optional (import, impl, reference).
40struct CompiledQueries {
41    element: Query,
42    call: Query,
43    import: Option<Query>,
44    impl_block: Option<Query>,
45    reference: Option<Query>,
46    impl_trait: Option<Query>,
47}
48
49/// Build compiled queries for a given language.
50///
51/// The `map_err` closures inside are only reachable if a hardcoded query string is
52/// invalid, which cannot happen at runtime -- exclude them from coverage instrumentation.
53#[cfg_attr(coverage_nightly, coverage(off))]
54fn build_compiled_queries(
55    lang_info: &crate::languages::LanguageInfo,
56) -> Result<CompiledQueries, ParserError> {
57    let element = Query::new(&lang_info.language, lang_info.element_query).map_err(|e| {
58        ParserError::QueryError(format!(
59            "Failed to compile element query for {}: {}",
60            lang_info.name, e
61        ))
62    })?;
63
64    let call = Query::new(&lang_info.language, lang_info.call_query).map_err(|e| {
65        ParserError::QueryError(format!(
66            "Failed to compile call query for {}: {}",
67            lang_info.name, e
68        ))
69    })?;
70
71    let import = if let Some(import_query_str) = lang_info.import_query {
72        Some(
73            Query::new(&lang_info.language, import_query_str).map_err(|e| {
74                ParserError::QueryError(format!(
75                    "Failed to compile import query for {}: {}",
76                    lang_info.name, e
77                ))
78            })?,
79        )
80    } else {
81        None
82    };
83
84    let impl_block = if let Some(impl_query_str) = lang_info.impl_query {
85        Some(
86            Query::new(&lang_info.language, impl_query_str).map_err(|e| {
87                ParserError::QueryError(format!(
88                    "Failed to compile impl query for {}: {}",
89                    lang_info.name, e
90                ))
91            })?,
92        )
93    } else {
94        None
95    };
96
97    let reference = if let Some(ref_query_str) = lang_info.reference_query {
98        Some(Query::new(&lang_info.language, ref_query_str).map_err(|e| {
99            ParserError::QueryError(format!(
100                "Failed to compile reference query for {}: {}",
101                lang_info.name, e
102            ))
103        })?)
104    } else {
105        None
106    };
107
108    let impl_trait = if let Some(impl_trait_query_str) = lang_info.impl_trait_query {
109        Some(
110            Query::new(&lang_info.language, impl_trait_query_str).map_err(|e| {
111                ParserError::QueryError(format!(
112                    "Failed to compile impl_trait query for {}: {}",
113                    lang_info.name, e
114                ))
115            })?,
116        )
117    } else {
118        None
119    };
120
121    Ok(CompiledQueries {
122        element,
123        call,
124        import,
125        impl_block,
126        reference,
127        impl_trait,
128    })
129}
130
131/// Initialize the query cache with compiled queries for all supported languages.
132///
133/// Excluded from coverage: the `Err` arm is unreachable because `build_compiled_queries`
134/// only fails on invalid hardcoded query strings.
135#[cfg_attr(coverage_nightly, coverage(off))]
136fn init_query_cache() -> HashMap<&'static str, CompiledQueries> {
137    let mut cache = HashMap::new();
138
139    for lang_name in crate::lang::supported_languages() {
140        if let Some(lang_info) = get_language_info(lang_name) {
141            match build_compiled_queries(&lang_info) {
142                Ok(compiled) => {
143                    cache.insert(*lang_name, compiled);
144                }
145                Err(e) => {
146                    tracing::error!(
147                        "Failed to compile queries for language {}: {}",
148                        lang_name,
149                        e
150                    );
151                }
152            }
153        }
154    }
155
156    cache
157}
158
159/// Lazily initialized cache of compiled queries per language.
160static QUERY_CACHE: LazyLock<HashMap<&'static str, CompiledQueries>> =
161    LazyLock::new(init_query_cache);
162
163/// Get compiled queries for a language from the cache.
164fn get_compiled_queries(language: &str) -> Result<&'static CompiledQueries, ParserError> {
165    QUERY_CACHE
166        .get(language)
167        .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))
168}
169
170thread_local! {
171    static PARSER: RefCell<Parser> = RefCell::new(Parser::new());
172}
173
174/// Canonical API for extracting element counts from source code.
175pub struct ElementExtractor;
176
177impl ElementExtractor {
178    /// Extract function and class counts from source code.
179    ///
180    /// # Errors
181    ///
182    /// Returns `ParserError::UnsupportedLanguage` if the language is not recognized.
183    /// Returns `ParserError::ParseError` if the source code cannot be parsed.
184    /// Returns `ParserError::QueryError` if the tree-sitter query fails.
185    #[instrument(skip_all, fields(language))]
186    pub fn extract_with_depth(source: &str, language: &str) -> Result<(usize, usize), ParserError> {
187        let lang_info = get_language_info(language)
188            .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))?;
189
190        let tree = PARSER.with(|p| {
191            let mut parser = p.borrow_mut();
192            parser
193                .set_language(&lang_info.language)
194                .map_err(|e| ParserError::ParseError(format!("Failed to set language: {e}")))?;
195            parser
196                .parse(source, None)
197                .ok_or_else(|| ParserError::ParseError("Failed to parse".to_string()))
198        })?;
199
200        let compiled = get_compiled_queries(language)?;
201
202        let mut cursor = QueryCursor::new();
203        let mut function_count = 0;
204        let mut class_count = 0;
205
206        let mut matches = cursor.matches(&compiled.element, tree.root_node(), source.as_bytes());
207        while let Some(mat) = matches.next() {
208            for capture in mat.captures {
209                let capture_name = compiled.element.capture_names()[capture.index as usize];
210                match capture_name {
211                    "function" => function_count += 1,
212                    "class" => class_count += 1,
213                    _ => {}
214                }
215            }
216        }
217
218        tracing::debug!(language = %language, functions = function_count, classes = class_count, "parse complete");
219
220        Ok((function_count, class_count))
221    }
222}
223
224/// Recursively extract `ImportInfo` entries from a use-clause node, respecting all Rust
225/// use-declaration forms (`scoped_identifier`, `scoped_use_list`, `use_list`,
226/// `use_as_clause`, `use_wildcard`, bare `identifier`).
227#[allow(clippy::too_many_lines)] // exhaustive match over all supported Rust use-clause forms; splitting harms readability
228fn extract_imports_from_node(
229    node: &Node,
230    source: &str,
231    prefix: &str,
232    line: usize,
233    imports: &mut Vec<ImportInfo>,
234) {
235    match node.kind() {
236        // Simple identifier: `use foo;` or an item inside `{foo, bar}`
237        "identifier" | "self" | "super" | "crate" => {
238            let name = source[node.start_byte()..node.end_byte()].to_string();
239            imports.push(ImportInfo {
240                module: prefix.to_string(),
241                items: vec![name],
242                line,
243            });
244        }
245        // Qualified path: `std::collections::HashMap`
246        "scoped_identifier" => {
247            let item = node
248                .child_by_field_name("name")
249                .map(|n| source[n.start_byte()..n.end_byte()].to_string())
250                .unwrap_or_default();
251            let module = node.child_by_field_name("path").map_or_else(
252                || prefix.to_string(),
253                |p| {
254                    let path_text = source[p.start_byte()..p.end_byte()].to_string();
255                    if prefix.is_empty() {
256                        path_text
257                    } else {
258                        format!("{prefix}::{path_text}")
259                    }
260                },
261            );
262            if !item.is_empty() {
263                imports.push(ImportInfo {
264                    module,
265                    items: vec![item],
266                    line,
267                });
268            }
269        }
270        // `std::{io, fs}` — path prefix followed by a brace list
271        "scoped_use_list" => {
272            let new_prefix = node.child_by_field_name("path").map_or_else(
273                || prefix.to_string(),
274                |p| {
275                    let path_text = source[p.start_byte()..p.end_byte()].to_string();
276                    if prefix.is_empty() {
277                        path_text
278                    } else {
279                        format!("{prefix}::{path_text}")
280                    }
281                },
282            );
283            if let Some(list) = node.child_by_field_name("list") {
284                extract_imports_from_node(&list, source, &new_prefix, line, imports);
285            }
286        }
287        // `{HashMap, HashSet}` — brace-enclosed list of items
288        "use_list" => {
289            let mut cursor = node.walk();
290            for child in node.children(&mut cursor) {
291                match child.kind() {
292                    "{" | "}" | "," => {}
293                    _ => extract_imports_from_node(&child, source, prefix, line, imports),
294                }
295            }
296        }
297        // `std::io::*` — glob import
298        "use_wildcard" => {
299            let text = source[node.start_byte()..node.end_byte()].to_string();
300            let module = if let Some(stripped) = text.strip_suffix("::*") {
301                if prefix.is_empty() {
302                    stripped.to_string()
303                } else {
304                    format!("{prefix}::{stripped}")
305                }
306            } else {
307                prefix.to_string()
308            };
309            imports.push(ImportInfo {
310                module,
311                items: vec!["*".to_string()],
312                line,
313            });
314        }
315        // `io as stdio` or `std::io as stdio`
316        "use_as_clause" => {
317            let alias = node
318                .child_by_field_name("alias")
319                .map(|n| source[n.start_byte()..n.end_byte()].to_string())
320                .unwrap_or_default();
321            let module = if let Some(path_node) = node.child_by_field_name("path") {
322                match path_node.kind() {
323                    "scoped_identifier" => path_node.child_by_field_name("path").map_or_else(
324                        || prefix.to_string(),
325                        |p| {
326                            let p_text = source[p.start_byte()..p.end_byte()].to_string();
327                            if prefix.is_empty() {
328                                p_text
329                            } else {
330                                format!("{prefix}::{p_text}")
331                            }
332                        },
333                    ),
334                    _ => prefix.to_string(),
335                }
336            } else {
337                prefix.to_string()
338            };
339            if !alias.is_empty() {
340                imports.push(ImportInfo {
341                    module,
342                    items: vec![alias],
343                    line,
344                });
345            }
346        }
347        // Python import_from_statement: `from module import name` or `from . import *`
348        "import_from_statement" => {
349            extract_python_import_from(node, source, line, imports);
350        }
351        // Fallback for non-Rust import nodes: capture full text as module
352        _ => {
353            let text = source[node.start_byte()..node.end_byte()]
354                .trim()
355                .to_string();
356            if !text.is_empty() {
357                imports.push(ImportInfo {
358                    module: text,
359                    items: vec![],
360                    line,
361                });
362            }
363        }
364    }
365}
366
367/// Extract an item name from a `dotted_name` or `aliased_import` child node.
368fn extract_import_item_name(child: &Node, source: &str) -> Option<String> {
369    match child.kind() {
370        "dotted_name" => {
371            let name = source[child.start_byte()..child.end_byte()]
372                .trim()
373                .to_string();
374            if name.is_empty() { None } else { Some(name) }
375        }
376        "aliased_import" => child.child_by_field_name("name").and_then(|n| {
377            let name = source[n.start_byte()..n.end_byte()].trim().to_string();
378            if name.is_empty() { None } else { Some(name) }
379        }),
380        _ => None,
381    }
382}
383
384/// Collect wildcard/named imports from an `import_list` node or from direct named children.
385fn collect_import_items(
386    node: &Node,
387    source: &str,
388    is_wildcard: &mut bool,
389    items: &mut Vec<String>,
390) {
391    // Prefer import_list child (wraps `from x import a, b`)
392    if let Some(import_list) = node.child_by_field_name("import_list") {
393        let mut cursor = import_list.walk();
394        for child in import_list.named_children(&mut cursor) {
395            if child.kind() == "wildcard_import" {
396                *is_wildcard = true;
397            } else if let Some(name) = extract_import_item_name(&child, source) {
398                items.push(name);
399            }
400        }
401        return;
402    }
403    // No import_list: single-name or wildcard as direct child (skip first named child = module_name)
404    let mut cursor = node.walk();
405    let mut first = true;
406    for child in node.named_children(&mut cursor) {
407        if first {
408            first = false;
409            continue;
410        }
411        if child.kind() == "wildcard_import" {
412            *is_wildcard = true;
413        } else if let Some(name) = extract_import_item_name(&child, source) {
414            items.push(name);
415        }
416    }
417}
418
419/// Handle Python `import_from_statement` node.
420fn extract_python_import_from(
421    node: &Node,
422    source: &str,
423    line: usize,
424    imports: &mut Vec<ImportInfo>,
425) {
426    let module = if let Some(m) = node.child_by_field_name("module_name") {
427        source[m.start_byte()..m.end_byte()].trim().to_string()
428    } else if let Some(r) = node.child_by_field_name("relative_import") {
429        source[r.start_byte()..r.end_byte()].trim().to_string()
430    } else {
431        String::new()
432    };
433
434    let mut is_wildcard = false;
435    let mut items = Vec::new();
436    collect_import_items(node, source, &mut is_wildcard, &mut items);
437
438    if !module.is_empty() {
439        imports.push(ImportInfo {
440            module,
441            items: if is_wildcard {
442                vec!["*".to_string()]
443            } else {
444                items
445            },
446            line,
447        });
448    }
449}
450
451pub struct SemanticExtractor;
452
453impl SemanticExtractor {
454    /// Extract semantic information from source code.
455    ///
456    /// # Errors
457    ///
458    /// Returns `ParserError::UnsupportedLanguage` if the language is not recognized.
459    /// Returns `ParserError::ParseError` if the source code cannot be parsed.
460    /// Returns `ParserError::QueryError` if the tree-sitter query fails.
461    #[instrument(skip_all, fields(language))]
462    pub fn extract(
463        source: &str,
464        language: &str,
465        ast_recursion_limit: Option<usize>,
466    ) -> Result<SemanticAnalysis, ParserError> {
467        let lang_info = get_language_info(language)
468            .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))?;
469
470        let tree = PARSER.with(|p| {
471            let mut parser = p.borrow_mut();
472            parser
473                .set_language(&lang_info.language)
474                .map_err(|e| ParserError::ParseError(format!("Failed to set language: {e}")))?;
475            parser
476                .parse(source, None)
477                .ok_or_else(|| ParserError::ParseError("Failed to parse".to_string()))
478        })?;
479
480        // 0 is not a useful depth (visits root node only, returning zero results).
481        // Treat 0 as None (unlimited). See #339.
482        let max_depth: Option<u32> = ast_recursion_limit
483            .filter(|&limit| limit > 0)
484            .map(|limit| {
485                u32::try_from(limit).map_err(|_| {
486                    ParserError::ParseError(format!(
487                        "ast_recursion_limit {} exceeds maximum supported value {}",
488                        limit,
489                        u32::MAX
490                    ))
491                })
492            })
493            .transpose()?;
494
495        let compiled = get_compiled_queries(language)?;
496        let root = tree.root_node();
497
498        let mut functions = Vec::new();
499        let mut classes = Vec::new();
500        let mut imports = Vec::new();
501        let mut references = Vec::new();
502        let mut call_frequency = HashMap::new();
503        let mut calls = Vec::new();
504
505        Self::extract_elements(
506            source,
507            compiled,
508            root,
509            max_depth,
510            &lang_info,
511            &mut functions,
512            &mut classes,
513        );
514        Self::extract_calls(
515            source,
516            compiled,
517            root,
518            max_depth,
519            &mut calls,
520            &mut call_frequency,
521        );
522        Self::extract_imports(source, compiled, root, max_depth, &mut imports);
523        Self::extract_impl_methods(source, compiled, root, max_depth, &mut classes);
524        Self::extract_references(source, compiled, root, max_depth, &mut references);
525
526        // Extract impl-trait blocks for Rust files (empty for other languages)
527        let impl_traits = if language == "rust" {
528            Self::extract_impl_traits_from_tree(source, compiled, root)
529        } else {
530            vec![]
531        };
532
533        tracing::debug!(language = %language, functions = functions.len(), classes = classes.len(), imports = imports.len(), references = references.len(), calls = calls.len(), impl_traits = impl_traits.len(), "extraction complete");
534
535        Ok(SemanticAnalysis {
536            functions,
537            classes,
538            imports,
539            references,
540            call_frequency,
541            calls,
542            impl_traits,
543        })
544    }
545
546    fn extract_elements(
547        source: &str,
548        compiled: &CompiledQueries,
549        root: Node<'_>,
550        max_depth: Option<u32>,
551        lang_info: &crate::languages::LanguageInfo,
552        functions: &mut Vec<FunctionInfo>,
553        classes: &mut Vec<ClassInfo>,
554    ) {
555        let mut cursor = QueryCursor::new();
556        if let Some(depth) = max_depth {
557            cursor.set_max_start_depth(Some(depth));
558        }
559        let mut matches = cursor.matches(&compiled.element, root, source.as_bytes());
560        let mut seen_functions = std::collections::HashSet::new();
561
562        while let Some(mat) = matches.next() {
563            for capture in mat.captures {
564                let capture_name = compiled.element.capture_names()[capture.index as usize];
565                let node = capture.node;
566                match capture_name {
567                    "function" => {
568                        if let Some(name_node) = node.child_by_field_name("name") {
569                            let name =
570                                source[name_node.start_byte()..name_node.end_byte()].to_string();
571                            let func_key = (name.clone(), node.start_position().row);
572                            if !seen_functions.contains(&func_key) {
573                                seen_functions.insert(func_key);
574                                let params = node
575                                    .child_by_field_name("parameters")
576                                    .map(|p| source[p.start_byte()..p.end_byte()].to_string())
577                                    .unwrap_or_default();
578                                let return_type = node
579                                    .child_by_field_name("return_type")
580                                    .map(|r| source[r.start_byte()..r.end_byte()].to_string());
581                                functions.push(FunctionInfo {
582                                    name,
583                                    line: node.start_position().row + 1,
584                                    end_line: node.end_position().row + 1,
585                                    parameters: if params.is_empty() {
586                                        Vec::new()
587                                    } else {
588                                        vec![params]
589                                    },
590                                    return_type,
591                                });
592                            }
593                        }
594                    }
595                    "class" => {
596                        if let Some(name_node) = node.child_by_field_name("name") {
597                            let name =
598                                source[name_node.start_byte()..name_node.end_byte()].to_string();
599                            let inherits = if let Some(handler) = lang_info.extract_inheritance {
600                                handler(&node, source)
601                            } else {
602                                Vec::new()
603                            };
604                            classes.push(ClassInfo {
605                                name,
606                                line: node.start_position().row + 1,
607                                end_line: node.end_position().row + 1,
608                                methods: Vec::new(),
609                                fields: Vec::new(),
610                                inherits,
611                            });
612                        }
613                    }
614                    _ => {}
615                }
616            }
617        }
618    }
619
620    /// Returns the name of the enclosing function/method/subroutine for a given AST node,
621    /// by walking ancestors and matching all language-specific function container kinds.
622    fn enclosing_function_name(mut node: tree_sitter::Node<'_>, source: &str) -> Option<String> {
623        let mut depth = 0u32;
624        while let Some(parent) = node.parent() {
625            depth += 1;
626            // Cap at 64 hops: real function nesting rarely exceeds ~10 levels; 64 is a generous
627            // upper bound that guards against pathological/malformed ASTs without false negatives
628            // on legitimate code. Returns None (treated as <module>) when the cap is hit.
629            if depth > 64 {
630                return None;
631            }
632            let name_node = match parent.kind() {
633                // Direct name field: Rust, Python, Go, Java, TypeScript/TSX
634                "function_item"
635                | "method_item"
636                | "function_definition"
637                | "function_declaration"
638                | "method_declaration"
639                | "method_definition" => parent.child_by_field_name("name"),
640                // Fortran subroutine: name is inside subroutine_statement child
641                "subroutine" => {
642                    let mut cursor = parent.walk();
643                    parent
644                        .children(&mut cursor)
645                        .find(|c| c.kind() == "subroutine_statement")
646                        .and_then(|s| s.child_by_field_name("name"))
647                }
648                // Fortran function: name is inside function_statement child
649                "function" => {
650                    let mut cursor = parent.walk();
651                    parent
652                        .children(&mut cursor)
653                        .find(|c| c.kind() == "function_statement")
654                        .and_then(|s| s.child_by_field_name("name"))
655                }
656                _ => {
657                    node = parent;
658                    continue;
659                }
660            };
661            return name_node.map(|n| source[n.start_byte()..n.end_byte()].to_string());
662        }
663        // The loop exits here only when no parent was found (i.e., we reached the tree root
664        // without finding a function container). If the depth cap fired, we returned None early
665        // above. Nothing to assert here.
666        None
667    }
668
669    fn extract_calls(
670        source: &str,
671        compiled: &CompiledQueries,
672        root: Node<'_>,
673        max_depth: Option<u32>,
674        calls: &mut Vec<CallInfo>,
675        call_frequency: &mut HashMap<String, usize>,
676    ) {
677        let mut cursor = QueryCursor::new();
678        if let Some(depth) = max_depth {
679            cursor.set_max_start_depth(Some(depth));
680        }
681        let mut matches = cursor.matches(&compiled.call, root, source.as_bytes());
682
683        while let Some(mat) = matches.next() {
684            for capture in mat.captures {
685                let capture_name = compiled.call.capture_names()[capture.index as usize];
686                if capture_name != "call" {
687                    continue;
688                }
689                let node = capture.node;
690                let call_name = source[node.start_byte()..node.end_byte()].to_string();
691                *call_frequency.entry(call_name.clone()).or_insert(0) += 1;
692
693                let caller = Self::enclosing_function_name(node, source)
694                    .unwrap_or_else(|| "<module>".to_string());
695
696                let mut arg_count = None;
697                let mut arg_node = node;
698                let mut hop = 0u32;
699                let mut cap_hit = false;
700                while let Some(parent) = arg_node.parent() {
701                    hop += 1;
702                    // Bounded parent traversal: cap at 16 hops to guard against pathological
703                    // walks on malformed/degenerate trees. Real call-expression nesting is
704                    // shallow (typically 1-3 levels). When the cap is hit we stop searching and
705                    // leave arg_count as None; the caller is still recorded, just without
706                    // argument-count information.
707                    if hop > 16 {
708                        cap_hit = true;
709                        break;
710                    }
711                    if parent.kind() == "call_expression" {
712                        if let Some(args) = parent.child_by_field_name("arguments") {
713                            arg_count = Some(args.named_child_count());
714                        }
715                        break;
716                    }
717                    arg_node = parent;
718                }
719                debug_assert!(
720                    !cap_hit,
721                    "extract_calls: parent traversal cap reached (hop > 16)"
722                );
723
724                calls.push(CallInfo {
725                    caller,
726                    callee: call_name,
727                    line: node.start_position().row + 1,
728                    column: node.start_position().column,
729                    arg_count,
730                });
731            }
732        }
733    }
734
735    fn extract_imports(
736        source: &str,
737        compiled: &CompiledQueries,
738        root: Node<'_>,
739        max_depth: Option<u32>,
740        imports: &mut Vec<ImportInfo>,
741    ) {
742        let Some(ref import_query) = compiled.import else {
743            return;
744        };
745        let mut cursor = QueryCursor::new();
746        if let Some(depth) = max_depth {
747            cursor.set_max_start_depth(Some(depth));
748        }
749        let mut matches = cursor.matches(import_query, root, source.as_bytes());
750
751        while let Some(mat) = matches.next() {
752            for capture in mat.captures {
753                let capture_name = import_query.capture_names()[capture.index as usize];
754                if capture_name == "import_path" {
755                    let node = capture.node;
756                    let line = node.start_position().row + 1;
757                    extract_imports_from_node(&node, source, "", line, imports);
758                }
759            }
760        }
761    }
762
763    fn extract_impl_methods(
764        source: &str,
765        compiled: &CompiledQueries,
766        root: Node<'_>,
767        max_depth: Option<u32>,
768        classes: &mut [ClassInfo],
769    ) {
770        let Some(ref impl_query) = compiled.impl_block else {
771            return;
772        };
773        let mut cursor = QueryCursor::new();
774        if let Some(depth) = max_depth {
775            cursor.set_max_start_depth(Some(depth));
776        }
777        let mut matches = cursor.matches(impl_query, root, source.as_bytes());
778
779        while let Some(mat) = matches.next() {
780            let mut impl_type_name = String::new();
781            let mut method_name = String::new();
782            let mut method_line = 0usize;
783            let mut method_end_line = 0usize;
784            let mut method_params = String::new();
785            let mut method_return_type: Option<String> = None;
786
787            for capture in mat.captures {
788                let capture_name = impl_query.capture_names()[capture.index as usize];
789                let node = capture.node;
790                match capture_name {
791                    "impl_type" => {
792                        impl_type_name = source[node.start_byte()..node.end_byte()].to_string();
793                    }
794                    "method_name" => {
795                        method_name = source[node.start_byte()..node.end_byte()].to_string();
796                    }
797                    "method_params" => {
798                        method_params = source[node.start_byte()..node.end_byte()].to_string();
799                    }
800                    "method" => {
801                        method_line = node.start_position().row + 1;
802                        method_end_line = node.end_position().row + 1;
803                        method_return_type = node
804                            .child_by_field_name("return_type")
805                            .map(|r| source[r.start_byte()..r.end_byte()].to_string());
806                    }
807                    _ => {}
808                }
809            }
810
811            if !impl_type_name.is_empty() && !method_name.is_empty() {
812                let func = FunctionInfo {
813                    name: method_name,
814                    line: method_line,
815                    end_line: method_end_line,
816                    parameters: if method_params.is_empty() {
817                        Vec::new()
818                    } else {
819                        vec![method_params]
820                    },
821                    return_type: method_return_type,
822                };
823                if let Some(class) = classes.iter_mut().find(|c| c.name == impl_type_name) {
824                    class.methods.push(func);
825                }
826            }
827        }
828    }
829
830    fn extract_references(
831        source: &str,
832        compiled: &CompiledQueries,
833        root: Node<'_>,
834        max_depth: Option<u32>,
835        references: &mut Vec<ReferenceInfo>,
836    ) {
837        let Some(ref ref_query) = compiled.reference else {
838            return;
839        };
840        let mut cursor = QueryCursor::new();
841        if let Some(depth) = max_depth {
842            cursor.set_max_start_depth(Some(depth));
843        }
844        let mut seen_refs = std::collections::HashSet::new();
845        let mut matches = cursor.matches(ref_query, root, source.as_bytes());
846
847        while let Some(mat) = matches.next() {
848            for capture in mat.captures {
849                let capture_name = ref_query.capture_names()[capture.index as usize];
850                if capture_name == "type_ref" {
851                    let node = capture.node;
852                    let type_ref = source[node.start_byte()..node.end_byte()].to_string();
853                    if seen_refs.insert(type_ref.clone()) {
854                        references.push(ReferenceInfo {
855                            symbol: type_ref,
856                            reference_type: ReferenceType::Usage,
857                            // location is intentionally empty here; set by the caller (analyze_file)
858                            location: String::new(),
859                            line: node.start_position().row + 1,
860                        });
861                    }
862                }
863            }
864        }
865    }
866
867    /// Extract impl-trait blocks from an already-parsed tree.
868    ///
869    /// Called during `extract()` for Rust files to avoid a second parse.
870    /// Returns an empty vec if the query is not available.
871    fn extract_impl_traits_from_tree(
872        source: &str,
873        compiled: &CompiledQueries,
874        root: Node<'_>,
875    ) -> Vec<ImplTraitInfo> {
876        let Some(query) = &compiled.impl_trait else {
877            return vec![];
878        };
879
880        let mut cursor = QueryCursor::new();
881        let mut matches = cursor.matches(query, root, source.as_bytes());
882        let mut results = Vec::new();
883
884        while let Some(mat) = matches.next() {
885            let mut trait_name = String::new();
886            let mut impl_type = String::new();
887            let mut line = 0usize;
888
889            for capture in mat.captures {
890                let capture_name = query.capture_names()[capture.index as usize];
891                let node = capture.node;
892                let text = source[node.start_byte()..node.end_byte()].to_string();
893                match capture_name {
894                    "trait_name" => {
895                        trait_name = text;
896                        line = node.start_position().row + 1;
897                    }
898                    "impl_type" => {
899                        impl_type = text;
900                    }
901                    _ => {}
902                }
903            }
904
905            if !trait_name.is_empty() && !impl_type.is_empty() {
906                results.push(ImplTraitInfo {
907                    trait_name,
908                    impl_type,
909                    path: PathBuf::new(), // Path will be set by caller
910                    line,
911                });
912            }
913        }
914
915        results
916    }
917}
918
919/// Extract `impl Trait for Type` blocks from Rust source.
920///
921/// Runs independently of `extract_references` to avoid shared deduplication state.
922/// Returns an empty vec for non-Rust source (no error; caller decides).
923#[must_use]
924pub fn extract_impl_traits(source: &str, path: &Path) -> Vec<ImplTraitInfo> {
925    let Some(lang_info) = get_language_info("rust") else {
926        return vec![];
927    };
928
929    let Ok(compiled) = get_compiled_queries("rust") else {
930        return vec![];
931    };
932
933    let Some(query) = &compiled.impl_trait else {
934        return vec![];
935    };
936
937    let Some(tree) = PARSER.with(|p| {
938        let mut parser = p.borrow_mut();
939        let _ = parser.set_language(&lang_info.language);
940        parser.parse(source, None)
941    }) else {
942        return vec![];
943    };
944
945    let root = tree.root_node();
946    let mut cursor = QueryCursor::new();
947    let mut matches = cursor.matches(query, root, source.as_bytes());
948    let mut results = Vec::new();
949
950    while let Some(mat) = matches.next() {
951        let mut trait_name = String::new();
952        let mut impl_type = String::new();
953        let mut line = 0usize;
954
955        for capture in mat.captures {
956            let capture_name = query.capture_names()[capture.index as usize];
957            let node = capture.node;
958            let text = source[node.start_byte()..node.end_byte()].to_string();
959            match capture_name {
960                "trait_name" => {
961                    trait_name = text;
962                    line = node.start_position().row + 1;
963                }
964                "impl_type" => {
965                    impl_type = text;
966                }
967                _ => {}
968            }
969        }
970
971        if !trait_name.is_empty() && !impl_type.is_empty() {
972            results.push(ImplTraitInfo {
973                trait_name,
974                impl_type,
975                path: path.to_path_buf(),
976                line,
977            });
978        }
979    }
980
981    results
982}
983
984/// Execute a custom tree-sitter query against source code.
985///
986/// This is the internal implementation of the public `execute_query` function.
987pub fn execute_query_impl(
988    language: &str,
989    source: &str,
990    query_str: &str,
991) -> Result<Vec<crate::QueryCapture>, ParserError> {
992    // Get the tree-sitter language from the language name
993    let ts_language = crate::languages::get_ts_language(language)
994        .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))?;
995
996    let mut parser = Parser::new();
997    parser
998        .set_language(&ts_language)
999        .map_err(|e| ParserError::QueryError(e.to_string()))?;
1000
1001    let tree = parser
1002        .parse(source.as_bytes(), None)
1003        .ok_or_else(|| ParserError::QueryError("failed to parse source".to_string()))?;
1004
1005    let query =
1006        Query::new(&ts_language, query_str).map_err(|e| ParserError::QueryError(e.to_string()))?;
1007
1008    let mut cursor = QueryCursor::new();
1009    let source_bytes = source.as_bytes();
1010
1011    let mut captures = Vec::new();
1012    let mut matches = cursor.matches(&query, tree.root_node(), source_bytes);
1013    while let Some(m) = matches.next() {
1014        for cap in m.captures {
1015            let node = cap.node;
1016            let capture_name = query.capture_names()[cap.index as usize].to_string();
1017            let text = node.utf8_text(source_bytes).unwrap_or("").to_string();
1018            captures.push(crate::QueryCapture {
1019                capture_name,
1020                text,
1021                start_line: node.start_position().row,
1022                end_line: node.end_position().row,
1023                start_byte: node.start_byte(),
1024                end_byte: node.end_byte(),
1025            });
1026        }
1027    }
1028    Ok(captures)
1029}
1030
1031// Language-feature-gated tests (require lang-rust); see also tests_unsupported below
1032#[cfg(all(test, feature = "lang-rust"))]
1033mod tests {
1034    use super::*;
1035    use std::path::Path;
1036
1037    #[test]
1038    fn test_ast_recursion_limit_zero_is_unlimited() {
1039        let source = r#"fn hello() -> u32 { 42 }"#;
1040        let result_none = SemanticExtractor::extract(source, "rust", None);
1041        let result_zero = SemanticExtractor::extract(source, "rust", Some(0));
1042        assert!(result_none.is_ok(), "extract with None failed");
1043        assert!(result_zero.is_ok(), "extract with Some(0) failed");
1044        let analysis_none = result_none.unwrap();
1045        let analysis_zero = result_zero.unwrap();
1046        assert!(
1047            analysis_none.functions.len() >= 1,
1048            "extract with None should find at least one function in the test source"
1049        );
1050        assert_eq!(
1051            analysis_none.functions.len(),
1052            analysis_zero.functions.len(),
1053            "ast_recursion_limit=0 should behave identically to unset (unlimited)"
1054        );
1055    }
1056
1057    #[test]
1058    fn test_rust_use_as_imports() {
1059        // Arrange
1060        let source = "use std::io as stdio;";
1061        // Act
1062        let result = SemanticExtractor::extract(source, "rust", None).unwrap();
1063        // Assert: alias "stdio" is captured as an import item
1064        assert!(
1065            result
1066                .imports
1067                .iter()
1068                .any(|imp| imp.items.iter().any(|i| i == "stdio")),
1069            "expected import alias 'stdio' in {:?}",
1070            result.imports
1071        );
1072    }
1073
1074    #[test]
1075    fn test_rust_use_as_clause_plain_identifier() {
1076        // Arrange: use_as_clause with plain identifier (no scoped_identifier)
1077        // exercises the _ => prefix.to_string() arm
1078        let source = "use io as stdio;";
1079        // Act
1080        let result = SemanticExtractor::extract(source, "rust", None).unwrap();
1081        // Assert: alias "stdio" is captured as an import item
1082        assert!(
1083            result
1084                .imports
1085                .iter()
1086                .any(|imp| imp.items.iter().any(|i| i == "stdio")),
1087            "expected import alias 'stdio' from plain identifier in {:?}",
1088            result.imports
1089        );
1090    }
1091
1092    #[test]
1093    fn test_rust_scoped_use_with_prefix() {
1094        // Arrange: scoped_use_list with non-empty prefix
1095        let source = "use std::{io::Read, io::Write};";
1096        // Act
1097        let result = SemanticExtractor::extract(source, "rust", None).unwrap();
1098        // Assert: both Read and Write appear as items with std::io module
1099        let items: Vec<String> = result
1100            .imports
1101            .iter()
1102            .filter(|imp| imp.module.starts_with("std::io"))
1103            .flat_map(|imp| imp.items.clone())
1104            .collect();
1105        assert!(
1106            items.contains(&"Read".to_string()) && items.contains(&"Write".to_string()),
1107            "expected 'Read' and 'Write' items under module with std::io, got {:?}",
1108            result.imports
1109        );
1110    }
1111
1112    #[test]
1113    fn test_rust_scoped_use_imports() {
1114        // Arrange
1115        let source = "use std::{fs, io};";
1116        // Act
1117        let result = SemanticExtractor::extract(source, "rust", None).unwrap();
1118        // Assert: both "fs" and "io" appear as import items under module "std"
1119        let items: Vec<&str> = result
1120            .imports
1121            .iter()
1122            .filter(|imp| imp.module == "std")
1123            .flat_map(|imp| imp.items.iter().map(|s| s.as_str()))
1124            .collect();
1125        assert!(
1126            items.contains(&"fs") && items.contains(&"io"),
1127            "expected 'fs' and 'io' items under module 'std', got {:?}",
1128            items
1129        );
1130    }
1131
1132    #[test]
1133    fn test_rust_wildcard_imports() {
1134        // Arrange
1135        let source = "use std::io::*;";
1136        // Act
1137        let result = SemanticExtractor::extract(source, "rust", None).unwrap();
1138        // Assert: wildcard import with module "std::io"
1139        let wildcard = result
1140            .imports
1141            .iter()
1142            .find(|imp| imp.module == "std::io" && imp.items == vec!["*"]);
1143        assert!(
1144            wildcard.is_some(),
1145            "expected wildcard import with module 'std::io', got {:?}",
1146            result.imports
1147        );
1148    }
1149
1150    #[test]
1151    fn test_extract_impl_traits_standalone() {
1152        // Arrange: source with a simple impl Trait for Type
1153        let source = r#"
1154struct Foo;
1155trait Display {}
1156impl Display for Foo {}
1157"#;
1158        // Act
1159        let results = extract_impl_traits(source, Path::new("test.rs"));
1160        // Assert
1161        assert_eq!(
1162            results.len(),
1163            1,
1164            "expected one impl trait, got {:?}",
1165            results
1166        );
1167        assert_eq!(results[0].trait_name, "Display");
1168        assert_eq!(results[0].impl_type, "Foo");
1169    }
1170
1171    #[cfg(target_pointer_width = "64")]
1172    #[test]
1173    fn test_ast_recursion_limit_overflow() {
1174        // Arrange: limit larger than u32::MAX triggers a ParseError on 64-bit targets
1175        let source = "fn foo() {}";
1176        let big_limit = usize::try_from(u32::MAX).unwrap() + 1;
1177        // Act
1178        let result = SemanticExtractor::extract(source, "rust", Some(big_limit));
1179        // Assert
1180        assert!(
1181            matches!(result, Err(ParserError::ParseError(_))),
1182            "expected ParseError for oversized limit, got {:?}",
1183            result
1184        );
1185    }
1186
1187    #[test]
1188    fn test_ast_recursion_limit_some() {
1189        // Arrange: ast_recursion_limit with Some(depth) to exercise max_depth Some branch
1190        let source = r#"fn hello() -> u32 { 42 }"#;
1191        // Act
1192        let result = SemanticExtractor::extract(source, "rust", Some(5));
1193        // Assert: should succeed without error and extract functions
1194        assert!(result.is_ok(), "extract with Some(5) failed: {:?}", result);
1195        let analysis = result.unwrap();
1196        assert!(
1197            analysis.functions.len() >= 1,
1198            "expected at least one function with depth limit 5"
1199        );
1200    }
1201}
1202
1203// Language-feature-gated tests for Python
1204#[cfg(all(test, feature = "lang-python"))]
1205mod tests_python {
1206    use super::*;
1207
1208    #[test]
1209    fn test_python_relative_import() {
1210        // Arrange: relative import (from . import foo)
1211        let source = "from . import foo\n";
1212        // Act
1213        let result = SemanticExtractor::extract(source, "python", None).unwrap();
1214        // Assert: relative import should be captured
1215        let relative = result.imports.iter().find(|imp| imp.module.contains("."));
1216        assert!(
1217            relative.is_some(),
1218            "expected relative import in {:?}",
1219            result.imports
1220        );
1221    }
1222
1223    #[test]
1224    fn test_python_aliased_import() {
1225        // Arrange: aliased import (from os import path as p)
1226        // Note: tree-sitter-python extracts "path" (the original name), not the alias "p"
1227        let source = "from os import path as p\n";
1228        // Act
1229        let result = SemanticExtractor::extract(source, "python", None).unwrap();
1230        // Assert: "path" should be in items (alias is captured separately by aliased_import node)
1231        let path_import = result
1232            .imports
1233            .iter()
1234            .find(|imp| imp.module == "os" && imp.items.iter().any(|i| i == "path"));
1235        assert!(
1236            path_import.is_some(),
1237            "expected import 'path' from module 'os' in {:?}",
1238            result.imports
1239        );
1240    }
1241}
1242
1243// Tests that do not require any language feature gate
1244#[cfg(test)]
1245mod tests_unsupported {
1246    use super::*;
1247
1248    #[test]
1249    fn test_element_extractor_unsupported_language() {
1250        // Arrange + Act
1251        let result = ElementExtractor::extract_with_depth("x = 1", "cobol");
1252        // Assert
1253        assert!(
1254            matches!(result, Err(ParserError::UnsupportedLanguage(ref lang)) if lang == "cobol"),
1255            "expected UnsupportedLanguage error, got {:?}",
1256            result
1257        );
1258    }
1259
1260    #[test]
1261    fn test_semantic_extractor_unsupported_language() {
1262        // Arrange + Act
1263        let result = SemanticExtractor::extract("x = 1", "cobol", None);
1264        // Assert
1265        assert!(
1266            matches!(result, Err(ParserError::UnsupportedLanguage(ref lang)) if lang == "cobol"),
1267            "expected UnsupportedLanguage error, got {:?}",
1268            result
1269        );
1270    }
1271}