Skip to main content

code_analyze_core/
parser.rs

1// SPDX-FileCopyrightText: 2026 code-analyze-mcp contributors
2// SPDX-License-Identifier: Apache-2.0
3//! Tree-sitter-based parser for extracting semantic structure from source code.
4//!
5//! This module provides language-agnostic parsing using tree-sitter queries to extract
6//! functions, classes, imports, references, and other semantic elements from source files.
7//! Two main extractors handle different use cases:
8//!
9//! - [`ElementExtractor`]: Quick extraction of function and class counts.
10//! - [`SemanticExtractor`]: Detailed semantic analysis with calls, imports, and references.
11
12use crate::languages::get_language_info;
13use crate::types::{
14    CallInfo, ClassInfo, FunctionInfo, ImplTraitInfo, ImportInfo, ReferenceInfo, ReferenceType,
15    SemanticAnalysis,
16};
17use std::cell::RefCell;
18use std::collections::HashMap;
19use std::path::{Path, PathBuf};
20use std::sync::LazyLock;
21use thiserror::Error;
22use tracing::instrument;
23use tree_sitter::{Node, Parser, Query, QueryCursor, StreamingIterator};
24
25#[derive(Debug, Error)]
26#[non_exhaustive]
27pub enum ParserError {
28    #[error("Unsupported language: {0}")]
29    UnsupportedLanguage(String),
30    #[error("Failed to parse file: {0}")]
31    ParseError(String),
32    #[error("Invalid UTF-8 in file")]
33    InvalidUtf8,
34    #[error("Query error: {0}")]
35    QueryError(String),
36}
37
38/// Compiled tree-sitter queries for a language.
39/// Stores all query types: mandatory (element, call) and optional (import, impl, reference).
40struct CompiledQueries {
41    element: Query,
42    call: Query,
43    import: Option<Query>,
44    impl_block: Option<Query>,
45    reference: Option<Query>,
46    impl_trait: Option<Query>,
47    defuse: Option<Query>,
48}
49
50/// Build compiled queries for a given language.
51///
52/// The `map_err` closures inside are only reachable if a hardcoded query string is
53/// invalid, which cannot happen at runtime -- exclude them from coverage instrumentation.
54#[cfg_attr(coverage_nightly, coverage(off))]
55fn build_compiled_queries(
56    lang_info: &crate::languages::LanguageInfo,
57) -> Result<CompiledQueries, ParserError> {
58    let element = Query::new(&lang_info.language, lang_info.element_query).map_err(|e| {
59        ParserError::QueryError(format!(
60            "Failed to compile element query for {}: {}",
61            lang_info.name, e
62        ))
63    })?;
64
65    let call = Query::new(&lang_info.language, lang_info.call_query).map_err(|e| {
66        ParserError::QueryError(format!(
67            "Failed to compile call query for {}: {}",
68            lang_info.name, e
69        ))
70    })?;
71
72    let import = if let Some(import_query_str) = lang_info.import_query {
73        Some(
74            Query::new(&lang_info.language, import_query_str).map_err(|e| {
75                ParserError::QueryError(format!(
76                    "Failed to compile import query for {}: {}",
77                    lang_info.name, e
78                ))
79            })?,
80        )
81    } else {
82        None
83    };
84
85    let impl_block = if let Some(impl_query_str) = lang_info.impl_query {
86        Some(
87            Query::new(&lang_info.language, impl_query_str).map_err(|e| {
88                ParserError::QueryError(format!(
89                    "Failed to compile impl query for {}: {}",
90                    lang_info.name, e
91                ))
92            })?,
93        )
94    } else {
95        None
96    };
97
98    let reference = if let Some(ref_query_str) = lang_info.reference_query {
99        Some(Query::new(&lang_info.language, ref_query_str).map_err(|e| {
100            ParserError::QueryError(format!(
101                "Failed to compile reference query for {}: {}",
102                lang_info.name, e
103            ))
104        })?)
105    } else {
106        None
107    };
108
109    let impl_trait = if let Some(impl_trait_query_str) = lang_info.impl_trait_query {
110        Some(
111            Query::new(&lang_info.language, impl_trait_query_str).map_err(|e| {
112                ParserError::QueryError(format!(
113                    "Failed to compile impl_trait query for {}: {}",
114                    lang_info.name, e
115                ))
116            })?,
117        )
118    } else {
119        None
120    };
121
122    let defuse = if let Some(defuse_query_str) = lang_info.defuse_query {
123        Some(
124            Query::new(&lang_info.language, defuse_query_str).map_err(|e| {
125                ParserError::QueryError(format!(
126                    "Failed to compile defuse query for {}: {}",
127                    lang_info.name, e
128                ))
129            })?,
130        )
131    } else {
132        None
133    };
134
135    Ok(CompiledQueries {
136        element,
137        call,
138        import,
139        impl_block,
140        reference,
141        impl_trait,
142        defuse,
143    })
144}
145
146/// Initialize the query cache with compiled queries for all supported languages.
147///
148/// Excluded from coverage: the `Err` arm is unreachable because `build_compiled_queries`
149/// only fails on invalid hardcoded query strings.
150#[cfg_attr(coverage_nightly, coverage(off))]
151fn init_query_cache() -> HashMap<&'static str, CompiledQueries> {
152    let mut cache = HashMap::new();
153
154    for lang_name in crate::lang::supported_languages() {
155        if let Some(lang_info) = get_language_info(lang_name) {
156            match build_compiled_queries(&lang_info) {
157                Ok(compiled) => {
158                    cache.insert(*lang_name, compiled);
159                }
160                Err(e) => {
161                    tracing::error!(
162                        "Failed to compile queries for language {}: {}",
163                        lang_name,
164                        e
165                    );
166                }
167            }
168        }
169    }
170
171    cache
172}
173
174/// Lazily initialized cache of compiled queries per language.
175static QUERY_CACHE: LazyLock<HashMap<&'static str, CompiledQueries>> =
176    LazyLock::new(init_query_cache);
177
178/// Get compiled queries for a language from the cache.
179fn get_compiled_queries(language: &str) -> Result<&'static CompiledQueries, ParserError> {
180    QUERY_CACHE
181        .get(language)
182        .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))
183}
184
185thread_local! {
186    static PARSER: RefCell<Parser> = RefCell::new(Parser::new());
187}
188
189/// Canonical API for extracting element counts from source code.
190pub struct ElementExtractor;
191
192impl ElementExtractor {
193    /// Extract function and class counts from source code.
194    ///
195    /// # Errors
196    ///
197    /// Returns `ParserError::UnsupportedLanguage` if the language is not recognized.
198    /// Returns `ParserError::ParseError` if the source code cannot be parsed.
199    /// Returns `ParserError::QueryError` if the tree-sitter query fails.
200    #[instrument(skip_all, fields(language))]
201    pub fn extract_with_depth(source: &str, language: &str) -> Result<(usize, usize), ParserError> {
202        let lang_info = get_language_info(language)
203            .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))?;
204
205        let tree = PARSER.with(|p| {
206            let mut parser = p.borrow_mut();
207            parser
208                .set_language(&lang_info.language)
209                .map_err(|e| ParserError::ParseError(format!("Failed to set language: {e}")))?;
210            parser
211                .parse(source, None)
212                .ok_or_else(|| ParserError::ParseError("Failed to parse".to_string()))
213        })?;
214
215        let compiled = get_compiled_queries(language)?;
216
217        let mut cursor = QueryCursor::new();
218        let mut function_count = 0;
219        let mut class_count = 0;
220
221        let mut matches = cursor.matches(&compiled.element, tree.root_node(), source.as_bytes());
222        while let Some(mat) = matches.next() {
223            for capture in mat.captures {
224                let capture_name = compiled.element.capture_names()[capture.index as usize];
225                match capture_name {
226                    "function" => function_count += 1,
227                    "class" => class_count += 1,
228                    _ => {}
229                }
230            }
231        }
232
233        tracing::debug!(language = %language, functions = function_count, classes = class_count, "parse complete");
234
235        Ok((function_count, class_count))
236    }
237}
238
239/// Recursively extract `ImportInfo` entries from a use-clause node, respecting all Rust
240/// use-declaration forms (`scoped_identifier`, `scoped_use_list`, `use_list`,
241/// `use_as_clause`, `use_wildcard`, bare `identifier`).
242#[allow(clippy::too_many_lines)] // exhaustive match over all supported Rust use-clause forms; splitting harms readability
243fn extract_imports_from_node(
244    node: &Node,
245    source: &str,
246    prefix: &str,
247    line: usize,
248    imports: &mut Vec<ImportInfo>,
249) {
250    match node.kind() {
251        // Simple identifier: `use foo;` or an item inside `{foo, bar}`
252        "identifier" | "self" | "super" | "crate" => {
253            let name = source[node.start_byte()..node.end_byte()].to_string();
254            imports.push(ImportInfo {
255                module: prefix.to_string(),
256                items: vec![name],
257                line,
258            });
259        }
260        // Qualified path: `std::collections::HashMap`
261        "scoped_identifier" => {
262            let item = node
263                .child_by_field_name("name")
264                .map(|n| source[n.start_byte()..n.end_byte()].to_string())
265                .unwrap_or_default();
266            let module = node.child_by_field_name("path").map_or_else(
267                || prefix.to_string(),
268                |p| {
269                    let path_text = source[p.start_byte()..p.end_byte()].to_string();
270                    if prefix.is_empty() {
271                        path_text
272                    } else {
273                        format!("{prefix}::{path_text}")
274                    }
275                },
276            );
277            if !item.is_empty() {
278                imports.push(ImportInfo {
279                    module,
280                    items: vec![item],
281                    line,
282                });
283            }
284        }
285        // `std::{io, fs}` — path prefix followed by a brace list
286        "scoped_use_list" => {
287            let new_prefix = node.child_by_field_name("path").map_or_else(
288                || prefix.to_string(),
289                |p| {
290                    let path_text = source[p.start_byte()..p.end_byte()].to_string();
291                    if prefix.is_empty() {
292                        path_text
293                    } else {
294                        format!("{prefix}::{path_text}")
295                    }
296                },
297            );
298            if let Some(list) = node.child_by_field_name("list") {
299                extract_imports_from_node(&list, source, &new_prefix, line, imports);
300            }
301        }
302        // `{HashMap, HashSet}` — brace-enclosed list of items
303        "use_list" => {
304            let mut cursor = node.walk();
305            for child in node.children(&mut cursor) {
306                match child.kind() {
307                    "{" | "}" | "," => {}
308                    _ => extract_imports_from_node(&child, source, prefix, line, imports),
309                }
310            }
311        }
312        // `std::io::*` — glob import
313        "use_wildcard" => {
314            let text = source[node.start_byte()..node.end_byte()].to_string();
315            let module = if let Some(stripped) = text.strip_suffix("::*") {
316                if prefix.is_empty() {
317                    stripped.to_string()
318                } else {
319                    format!("{prefix}::{stripped}")
320                }
321            } else {
322                prefix.to_string()
323            };
324            imports.push(ImportInfo {
325                module,
326                items: vec!["*".to_string()],
327                line,
328            });
329        }
330        // `io as stdio` or `std::io as stdio`
331        "use_as_clause" => {
332            let alias = node
333                .child_by_field_name("alias")
334                .map(|n| source[n.start_byte()..n.end_byte()].to_string())
335                .unwrap_or_default();
336            let module = if let Some(path_node) = node.child_by_field_name("path") {
337                match path_node.kind() {
338                    "scoped_identifier" => path_node.child_by_field_name("path").map_or_else(
339                        || prefix.to_string(),
340                        |p| {
341                            let p_text = source[p.start_byte()..p.end_byte()].to_string();
342                            if prefix.is_empty() {
343                                p_text
344                            } else {
345                                format!("{prefix}::{p_text}")
346                            }
347                        },
348                    ),
349                    _ => prefix.to_string(),
350                }
351            } else {
352                prefix.to_string()
353            };
354            if !alias.is_empty() {
355                imports.push(ImportInfo {
356                    module,
357                    items: vec![alias],
358                    line,
359                });
360            }
361        }
362        // Python import_from_statement: `from module import name` or `from . import *`
363        "import_from_statement" => {
364            extract_python_import_from(node, source, line, imports);
365        }
366        // Fallback for non-Rust import nodes: capture full text as module
367        _ => {
368            let text = source[node.start_byte()..node.end_byte()]
369                .trim()
370                .to_string();
371            if !text.is_empty() {
372                imports.push(ImportInfo {
373                    module: text,
374                    items: vec![],
375                    line,
376                });
377            }
378        }
379    }
380}
381
382/// Extract an item name from a `dotted_name` or `aliased_import` child node.
383fn extract_import_item_name(child: &Node, source: &str) -> Option<String> {
384    match child.kind() {
385        "dotted_name" => {
386            let name = source[child.start_byte()..child.end_byte()]
387                .trim()
388                .to_string();
389            if name.is_empty() { None } else { Some(name) }
390        }
391        "aliased_import" => child.child_by_field_name("name").and_then(|n| {
392            let name = source[n.start_byte()..n.end_byte()].trim().to_string();
393            if name.is_empty() { None } else { Some(name) }
394        }),
395        _ => None,
396    }
397}
398
399/// Collect wildcard/named imports from an `import_list` node or from direct named children.
400fn collect_import_items(
401    node: &Node,
402    source: &str,
403    is_wildcard: &mut bool,
404    items: &mut Vec<String>,
405) {
406    // Prefer import_list child (wraps `from x import a, b`)
407    if let Some(import_list) = node.child_by_field_name("import_list") {
408        let mut cursor = import_list.walk();
409        for child in import_list.named_children(&mut cursor) {
410            if child.kind() == "wildcard_import" {
411                *is_wildcard = true;
412            } else if let Some(name) = extract_import_item_name(&child, source) {
413                items.push(name);
414            }
415        }
416        return;
417    }
418    // No import_list: single-name or wildcard as direct child (skip first named child = module_name)
419    let mut cursor = node.walk();
420    let mut first = true;
421    for child in node.named_children(&mut cursor) {
422        if first {
423            first = false;
424            continue;
425        }
426        if child.kind() == "wildcard_import" {
427            *is_wildcard = true;
428        } else if let Some(name) = extract_import_item_name(&child, source) {
429            items.push(name);
430        }
431    }
432}
433
434/// Handle Python `import_from_statement` node.
435fn extract_python_import_from(
436    node: &Node,
437    source: &str,
438    line: usize,
439    imports: &mut Vec<ImportInfo>,
440) {
441    let module = if let Some(m) = node.child_by_field_name("module_name") {
442        source[m.start_byte()..m.end_byte()].trim().to_string()
443    } else if let Some(r) = node.child_by_field_name("relative_import") {
444        source[r.start_byte()..r.end_byte()].trim().to_string()
445    } else {
446        String::new()
447    };
448
449    let mut is_wildcard = false;
450    let mut items = Vec::new();
451    collect_import_items(node, source, &mut is_wildcard, &mut items);
452
453    if !module.is_empty() {
454        imports.push(ImportInfo {
455            module,
456            items: if is_wildcard {
457                vec!["*".to_string()]
458            } else {
459                items
460            },
461            line,
462        });
463    }
464}
465
466pub struct SemanticExtractor;
467
468impl SemanticExtractor {
469    /// Extract semantic information from source code.
470    ///
471    /// # Errors
472    ///
473    /// Returns `ParserError::UnsupportedLanguage` if the language is not recognized.
474    /// Returns `ParserError::ParseError` if the source code cannot be parsed.
475    /// Returns `ParserError::QueryError` if the tree-sitter query fails.
476    #[instrument(skip_all, fields(language))]
477    pub fn extract(
478        source: &str,
479        language: &str,
480        ast_recursion_limit: Option<usize>,
481    ) -> Result<SemanticAnalysis, ParserError> {
482        let lang_info = get_language_info(language)
483            .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))?;
484
485        let tree = PARSER.with(|p| {
486            let mut parser = p.borrow_mut();
487            parser
488                .set_language(&lang_info.language)
489                .map_err(|e| ParserError::ParseError(format!("Failed to set language: {e}")))?;
490            parser
491                .parse(source, None)
492                .ok_or_else(|| ParserError::ParseError("Failed to parse".to_string()))
493        })?;
494
495        // 0 is not a useful depth (visits root node only, returning zero results).
496        // Treat 0 as None (unlimited). See #339.
497        let max_depth: Option<u32> = ast_recursion_limit
498            .filter(|&limit| limit > 0)
499            .map(|limit| {
500                u32::try_from(limit).map_err(|_| {
501                    ParserError::ParseError(format!(
502                        "ast_recursion_limit {} exceeds maximum supported value {}",
503                        limit,
504                        u32::MAX
505                    ))
506                })
507            })
508            .transpose()?;
509
510        let compiled = get_compiled_queries(language)?;
511        let root = tree.root_node();
512
513        let mut functions = Vec::new();
514        let mut classes = Vec::new();
515        let mut imports = Vec::new();
516        let mut references = Vec::new();
517        let mut call_frequency = HashMap::new();
518        let mut calls = Vec::new();
519
520        Self::extract_elements(
521            source,
522            compiled,
523            root,
524            max_depth,
525            &lang_info,
526            &mut functions,
527            &mut classes,
528        );
529        Self::extract_calls(
530            source,
531            compiled,
532            root,
533            max_depth,
534            &mut calls,
535            &mut call_frequency,
536        );
537        Self::extract_imports(source, compiled, root, max_depth, &mut imports);
538        Self::extract_impl_methods(source, compiled, root, max_depth, &mut classes);
539        Self::extract_references(source, compiled, root, max_depth, &mut references);
540
541        // Extract impl-trait blocks for Rust files (empty for other languages)
542        let impl_traits = if language == "rust" {
543            Self::extract_impl_traits_from_tree(source, compiled, root)
544        } else {
545            vec![]
546        };
547
548        tracing::debug!(language = %language, functions = functions.len(), classes = classes.len(), imports = imports.len(), references = references.len(), calls = calls.len(), impl_traits = impl_traits.len(), "extraction complete");
549
550        Ok(SemanticAnalysis {
551            functions,
552            classes,
553            imports,
554            references,
555            call_frequency,
556            calls,
557            impl_traits,
558            def_use_sites: Vec::new(),
559        })
560    }
561
562    fn extract_elements(
563        source: &str,
564        compiled: &CompiledQueries,
565        root: Node<'_>,
566        max_depth: Option<u32>,
567        lang_info: &crate::languages::LanguageInfo,
568        functions: &mut Vec<FunctionInfo>,
569        classes: &mut Vec<ClassInfo>,
570    ) {
571        let mut cursor = QueryCursor::new();
572        if let Some(depth) = max_depth {
573            cursor.set_max_start_depth(Some(depth));
574        }
575        let mut matches = cursor.matches(&compiled.element, root, source.as_bytes());
576        let mut seen_functions = std::collections::HashSet::new();
577
578        while let Some(mat) = matches.next() {
579            let mut func_node: Option<Node> = None;
580            let mut func_name_text: Option<String> = None;
581            let mut class_node: Option<Node> = None;
582            let mut class_name_text: Option<String> = None;
583
584            for capture in mat.captures {
585                let capture_name = compiled.element.capture_names()[capture.index as usize];
586                let node = capture.node;
587                match capture_name {
588                    "function" => func_node = Some(node),
589                    "func_name" | "method_name" => {
590                        func_name_text =
591                            Some(source[node.start_byte()..node.end_byte()].to_string());
592                    }
593                    "class" => class_node = Some(node),
594                    "class_name" | "type_name" => {
595                        class_name_text =
596                            Some(source[node.start_byte()..node.end_byte()].to_string());
597                    }
598                    _ => {}
599                }
600            }
601
602            if let Some(func_node) = func_node {
603                // When a plain function_definition is nested inside a template_declaration,
604                // it is also matched by the explicit template_declaration pattern. Skip it
605                // here to avoid duplicates; the template_declaration match will emit it.
606                let parent_is_template = func_node
607                    .parent()
608                    .map(|p| p.kind() == "template_declaration")
609                    .unwrap_or(false);
610                if func_node.kind() == "function_definition" && parent_is_template {
611                    // Handled by the template_declaration @function match instead.
612                } else {
613                    // Resolve template_declaration to its inner function_definition for
614                    // declarator/field walks. The captured node may be the template wrapper.
615                    let func_def = if func_node.kind() == "template_declaration" {
616                        let mut cursor = func_node.walk();
617                        func_node
618                            .children(&mut cursor)
619                            .find(|n| n.kind() == "function_definition")
620                            .unwrap_or(func_node)
621                    } else {
622                        func_node
623                    };
624
625                    let name = func_name_text
626                        .or_else(|| {
627                            func_def
628                                .child_by_field_name("name")
629                                .map(|n| source[n.start_byte()..n.end_byte()].to_string())
630                        })
631                        .unwrap_or_default();
632
633                    let func_key = (name.clone(), func_node.start_position().row);
634                    if !name.is_empty() && seen_functions.insert(func_key) {
635                        // For C/C++: parameters live under declarator -> parameters.
636                        // For other languages: parameters is a direct child field.
637                        let params = func_def
638                            .child_by_field_name("declarator")
639                            .and_then(|d| d.child_by_field_name("parameters"))
640                            .or_else(|| func_def.child_by_field_name("parameters"))
641                            .map(|p| source[p.start_byte()..p.end_byte()].to_string())
642                            .unwrap_or_default();
643
644                        // Try "type" first (C/C++ uses this field for the return type);
645                        // fall back to "return_type" (Rust, Python, TypeScript, etc.).
646                        let return_type = func_def
647                            .child_by_field_name("type")
648                            .or_else(|| func_def.child_by_field_name("return_type"))
649                            .map(|r| source[r.start_byte()..r.end_byte()].to_string());
650
651                        functions.push(FunctionInfo {
652                            name,
653                            line: func_node.start_position().row + 1,
654                            end_line: func_node.end_position().row + 1,
655                            parameters: if params.is_empty() {
656                                Vec::new()
657                            } else {
658                                vec![params]
659                            },
660                            return_type,
661                        });
662                    }
663                }
664            }
665
666            if let Some(class_node) = class_node {
667                let name = class_name_text
668                    .or_else(|| {
669                        class_node
670                            .child_by_field_name("name")
671                            .map(|n| source[n.start_byte()..n.end_byte()].to_string())
672                    })
673                    .unwrap_or_default();
674
675                if !name.is_empty() {
676                    let inherits = if let Some(handler) = lang_info.extract_inheritance {
677                        handler(&class_node, source)
678                    } else {
679                        Vec::new()
680                    };
681                    classes.push(ClassInfo {
682                        name,
683                        line: class_node.start_position().row + 1,
684                        end_line: class_node.end_position().row + 1,
685                        methods: Vec::new(),
686                        fields: Vec::new(),
687                        inherits,
688                    });
689                }
690            }
691        }
692    }
693
694    /// Returns the name of the enclosing function/method/subroutine for a given AST node,
695    /// by walking ancestors and matching all language-specific function container kinds.
696    fn enclosing_function_name(mut node: tree_sitter::Node<'_>, source: &str) -> Option<String> {
697        let mut depth = 0u32;
698        while let Some(parent) = node.parent() {
699            depth += 1;
700            // Cap at 64 hops: real function nesting rarely exceeds ~10 levels; 64 is a generous
701            // upper bound that guards against pathological/malformed ASTs without false negatives
702            // on legitimate code. Returns None (treated as <module>) when the cap is hit.
703            if depth > 64 {
704                return None;
705            }
706            let name_node = match parent.kind() {
707                // Direct name field: Rust, Python, Go, Java, TypeScript/TSX
708                "function_item"
709                | "method_item"
710                | "function_definition"
711                | "function_declaration"
712                | "method_declaration"
713                | "method_definition" => parent.child_by_field_name("name"),
714                // Fortran subroutine: name is inside subroutine_statement child
715                "subroutine" => {
716                    let mut cursor = parent.walk();
717                    parent
718                        .children(&mut cursor)
719                        .find(|c| c.kind() == "subroutine_statement")
720                        .and_then(|s| s.child_by_field_name("name"))
721                }
722                // Fortran function: name is inside function_statement child
723                "function" => {
724                    let mut cursor = parent.walk();
725                    parent
726                        .children(&mut cursor)
727                        .find(|c| c.kind() == "function_statement")
728                        .and_then(|s| s.child_by_field_name("name"))
729                }
730                _ => {
731                    node = parent;
732                    continue;
733                }
734            };
735            return name_node.map(|n| source[n.start_byte()..n.end_byte()].to_string());
736        }
737        // The loop exits here only when no parent was found (i.e., we reached the tree root
738        // without finding a function container). If the depth cap fired, we returned None early
739        // above. Nothing to assert here.
740        None
741    }
742
743    fn extract_calls(
744        source: &str,
745        compiled: &CompiledQueries,
746        root: Node<'_>,
747        max_depth: Option<u32>,
748        calls: &mut Vec<CallInfo>,
749        call_frequency: &mut HashMap<String, usize>,
750    ) {
751        let mut cursor = QueryCursor::new();
752        if let Some(depth) = max_depth {
753            cursor.set_max_start_depth(Some(depth));
754        }
755        let mut matches = cursor.matches(&compiled.call, root, source.as_bytes());
756
757        while let Some(mat) = matches.next() {
758            for capture in mat.captures {
759                let capture_name = compiled.call.capture_names()[capture.index as usize];
760                if capture_name != "call" {
761                    continue;
762                }
763                let node = capture.node;
764                let call_name = source[node.start_byte()..node.end_byte()].to_string();
765                *call_frequency.entry(call_name.clone()).or_insert(0) += 1;
766
767                let caller = Self::enclosing_function_name(node, source)
768                    .unwrap_or_else(|| "<module>".to_string());
769
770                let mut arg_count = None;
771                let mut arg_node = node;
772                let mut hop = 0u32;
773                let mut cap_hit = false;
774                while let Some(parent) = arg_node.parent() {
775                    hop += 1;
776                    // Bounded parent traversal: cap at 16 hops to guard against pathological
777                    // walks on malformed/degenerate trees. Real call-expression nesting is
778                    // shallow (typically 1-3 levels). When the cap is hit we stop searching and
779                    // leave arg_count as None; the caller is still recorded, just without
780                    // argument-count information.
781                    if hop > 16 {
782                        cap_hit = true;
783                        break;
784                    }
785                    if parent.kind() == "call_expression" {
786                        if let Some(args) = parent.child_by_field_name("arguments") {
787                            arg_count = Some(args.named_child_count());
788                        }
789                        break;
790                    }
791                    arg_node = parent;
792                }
793                debug_assert!(
794                    !cap_hit,
795                    "extract_calls: parent traversal cap reached (hop > 16)"
796                );
797
798                calls.push(CallInfo {
799                    caller,
800                    callee: call_name,
801                    line: node.start_position().row + 1,
802                    column: node.start_position().column,
803                    arg_count,
804                });
805            }
806        }
807    }
808
809    fn extract_imports(
810        source: &str,
811        compiled: &CompiledQueries,
812        root: Node<'_>,
813        max_depth: Option<u32>,
814        imports: &mut Vec<ImportInfo>,
815    ) {
816        let Some(ref import_query) = compiled.import else {
817            return;
818        };
819        let mut cursor = QueryCursor::new();
820        if let Some(depth) = max_depth {
821            cursor.set_max_start_depth(Some(depth));
822        }
823        let mut matches = cursor.matches(import_query, root, source.as_bytes());
824
825        while let Some(mat) = matches.next() {
826            for capture in mat.captures {
827                let capture_name = import_query.capture_names()[capture.index as usize];
828                if capture_name == "import_path" {
829                    let node = capture.node;
830                    let line = node.start_position().row + 1;
831                    extract_imports_from_node(&node, source, "", line, imports);
832                }
833            }
834        }
835    }
836
837    fn extract_impl_methods(
838        source: &str,
839        compiled: &CompiledQueries,
840        root: Node<'_>,
841        max_depth: Option<u32>,
842        classes: &mut [ClassInfo],
843    ) {
844        let Some(ref impl_query) = compiled.impl_block else {
845            return;
846        };
847        let mut cursor = QueryCursor::new();
848        if let Some(depth) = max_depth {
849            cursor.set_max_start_depth(Some(depth));
850        }
851        let mut matches = cursor.matches(impl_query, root, source.as_bytes());
852
853        while let Some(mat) = matches.next() {
854            let mut impl_type_name = String::new();
855            let mut method_name = String::new();
856            let mut method_line = 0usize;
857            let mut method_end_line = 0usize;
858            let mut method_params = String::new();
859            let mut method_return_type: Option<String> = None;
860
861            for capture in mat.captures {
862                let capture_name = impl_query.capture_names()[capture.index as usize];
863                let node = capture.node;
864                match capture_name {
865                    "impl_type" => {
866                        impl_type_name = source[node.start_byte()..node.end_byte()].to_string();
867                    }
868                    "method_name" => {
869                        method_name = source[node.start_byte()..node.end_byte()].to_string();
870                    }
871                    "method_params" => {
872                        method_params = source[node.start_byte()..node.end_byte()].to_string();
873                    }
874                    "method" => {
875                        method_line = node.start_position().row + 1;
876                        method_end_line = node.end_position().row + 1;
877                        method_return_type = node
878                            .child_by_field_name("return_type")
879                            .map(|r| source[r.start_byte()..r.end_byte()].to_string());
880                    }
881                    _ => {}
882                }
883            }
884
885            if !impl_type_name.is_empty() && !method_name.is_empty() {
886                let func = FunctionInfo {
887                    name: method_name,
888                    line: method_line,
889                    end_line: method_end_line,
890                    parameters: if method_params.is_empty() {
891                        Vec::new()
892                    } else {
893                        vec![method_params]
894                    },
895                    return_type: method_return_type,
896                };
897                if let Some(class) = classes.iter_mut().find(|c| c.name == impl_type_name) {
898                    class.methods.push(func);
899                }
900            }
901        }
902    }
903
904    fn extract_references(
905        source: &str,
906        compiled: &CompiledQueries,
907        root: Node<'_>,
908        max_depth: Option<u32>,
909        references: &mut Vec<ReferenceInfo>,
910    ) {
911        let Some(ref ref_query) = compiled.reference else {
912            return;
913        };
914        let mut cursor = QueryCursor::new();
915        if let Some(depth) = max_depth {
916            cursor.set_max_start_depth(Some(depth));
917        }
918        let mut seen_refs = std::collections::HashSet::new();
919        let mut matches = cursor.matches(ref_query, root, source.as_bytes());
920
921        while let Some(mat) = matches.next() {
922            for capture in mat.captures {
923                let capture_name = ref_query.capture_names()[capture.index as usize];
924                if capture_name == "type_ref" {
925                    let node = capture.node;
926                    let type_ref = source[node.start_byte()..node.end_byte()].to_string();
927                    if seen_refs.insert(type_ref.clone()) {
928                        references.push(ReferenceInfo {
929                            symbol: type_ref,
930                            reference_type: ReferenceType::Usage,
931                            // location is intentionally empty here; set by the caller (analyze_file)
932                            location: String::new(),
933                            line: node.start_position().row + 1,
934                        });
935                    }
936                }
937            }
938        }
939    }
940
941    /// Extract impl-trait blocks from an already-parsed tree.
942    ///
943    /// Called during `extract()` for Rust files to avoid a second parse.
944    /// Returns an empty vec if the query is not available.
945    fn extract_impl_traits_from_tree(
946        source: &str,
947        compiled: &CompiledQueries,
948        root: Node<'_>,
949    ) -> Vec<ImplTraitInfo> {
950        let Some(query) = &compiled.impl_trait else {
951            return vec![];
952        };
953
954        let mut cursor = QueryCursor::new();
955        let mut matches = cursor.matches(query, root, source.as_bytes());
956        let mut results = Vec::new();
957
958        while let Some(mat) = matches.next() {
959            let mut trait_name = String::new();
960            let mut impl_type = String::new();
961            let mut line = 0usize;
962
963            for capture in mat.captures {
964                let capture_name = query.capture_names()[capture.index as usize];
965                let node = capture.node;
966                let text = source[node.start_byte()..node.end_byte()].to_string();
967                match capture_name {
968                    "trait_name" => {
969                        trait_name = text;
970                        line = node.start_position().row + 1;
971                    }
972                    "impl_type" => {
973                        impl_type = text;
974                    }
975                    _ => {}
976                }
977            }
978
979            if !trait_name.is_empty() && !impl_type.is_empty() {
980                results.push(ImplTraitInfo {
981                    trait_name,
982                    impl_type,
983                    path: PathBuf::new(), // Path will be set by caller
984                    line,
985                });
986            }
987        }
988
989        results
990    }
991
992    /// Extract def-use sites (write/read locations) for a given symbol within a file.
993    ///
994    /// Runs the defuse query to find all definition and use sites of a symbol.
995    /// Returns empty vec if no defuse query is available for this language.
996    ///
997    /// # Arguments
998    ///
999    /// * `source` - The source code text
1000    /// * `compiled` - Compiled tree-sitter queries
1001    /// * `root` - Root node of the AST
1002    /// * `symbol_name` - The symbol to search for (must match exactly)
1003    /// * `file_path` - Relative file path for site reporting
1004    fn extract_def_use(
1005        source: &str,
1006        compiled: &CompiledQueries,
1007        root: Node<'_>,
1008        symbol_name: &str,
1009        file_path: &str,
1010        max_depth: Option<u32>,
1011    ) -> Vec<crate::types::DefUseSite> {
1012        let Some(ref defuse_query) = compiled.defuse else {
1013            return vec![];
1014        };
1015
1016        let mut cursor = QueryCursor::new();
1017        if let Some(depth) = max_depth {
1018            cursor.set_max_start_depth(Some(depth));
1019        }
1020        let mut matches = cursor.matches(defuse_query, root, source.as_bytes());
1021        let mut sites = Vec::new();
1022        let source_lines: Vec<&str> = source.lines().collect();
1023        // Track byte offsets that already have a write or writeread capture so
1024        // duplicate read captures for the same identifier are suppressed.
1025        let mut write_offsets = std::collections::HashSet::new();
1026
1027        while let Some(mat) = matches.next() {
1028            for capture in mat.captures {
1029                let capture_name = defuse_query.capture_names()[capture.index as usize];
1030                let node = capture.node;
1031                let node_text = node.utf8_text(source.as_bytes()).unwrap_or_default();
1032
1033                // Only collect if the captured node matches the target symbol
1034                if node_text != symbol_name {
1035                    continue;
1036                }
1037
1038                // Classify capture by prefix
1039                let kind = if capture_name.starts_with("write.") {
1040                    crate::types::DefUseKind::Write
1041                } else if capture_name.starts_with("read.") {
1042                    crate::types::DefUseKind::Read
1043                } else if capture_name.starts_with("writeread.") {
1044                    crate::types::DefUseKind::WriteRead
1045                } else {
1046                    continue;
1047                };
1048
1049                let byte_offset = node.start_byte();
1050
1051                // De-duplicate: skip read captures for offsets already captured as write/writeread
1052                if kind == crate::types::DefUseKind::Read && write_offsets.contains(&byte_offset) {
1053                    continue;
1054                }
1055                if kind != crate::types::DefUseKind::Read {
1056                    write_offsets.insert(byte_offset);
1057                }
1058
1059                // Get line number (1-indexed) and center-line snippet.
1060                // Always produce a 3-line window so snippet_one_line (index 1) is safe.
1061                let line = node.start_position().row + 1;
1062                let snippet = {
1063                    let row = node.start_position().row;
1064                    let last_line = source_lines.len().saturating_sub(1);
1065                    let prev = if row > 0 { row - 1 } else { 0 };
1066                    let next = std::cmp::min(row + 1, last_line);
1067                    let prev_text = if row == 0 {
1068                        ""
1069                    } else {
1070                        source_lines[prev].trim_end()
1071                    };
1072                    let cur_text = source_lines[row].trim_end();
1073                    let next_text = if row >= last_line {
1074                        ""
1075                    } else {
1076                        source_lines[next].trim_end()
1077                    };
1078                    format!("{prev_text}\n{cur_text}\n{next_text}")
1079                };
1080
1081                // Get enclosing function scope
1082                let enclosing_scope = Self::enclosing_function_name(node, source);
1083
1084                let column = node.start_position().column;
1085                sites.push(crate::types::DefUseSite {
1086                    kind,
1087                    symbol: node_text.to_string(),
1088                    file: file_path.to_string(),
1089                    line,
1090                    column,
1091                    snippet,
1092                    enclosing_scope,
1093                });
1094            }
1095        }
1096
1097        sites
1098    }
1099
1100    /// Parse `source` in `language`, run the defuse query for `symbol`, and return all sites.
1101    /// Returns an empty vec if the language has no defuse query or parsing fails.
1102    pub(crate) fn extract_def_use_for_file(
1103        source: &str,
1104        language: &str,
1105        symbol: &str,
1106        file_path: &str,
1107        ast_recursion_limit: Option<usize>,
1108    ) -> Vec<crate::types::DefUseSite> {
1109        let Some(lang_info) = crate::languages::get_language_info(language) else {
1110            return vec![];
1111        };
1112        let Ok(compiled) = get_compiled_queries(language) else {
1113            return vec![];
1114        };
1115        if compiled.defuse.is_none() {
1116            return vec![];
1117        }
1118
1119        let tree = match PARSER.with(|p| {
1120            let mut parser = p.borrow_mut();
1121            if parser.set_language(&lang_info.language).is_err() {
1122                return None;
1123            }
1124            parser.parse(source, None)
1125        }) {
1126            Some(t) => t,
1127            None => return vec![],
1128        };
1129
1130        let root = tree.root_node();
1131
1132        // Convert ast_recursion_limit the same way extract() does:
1133        // 0 means unlimited (None); positive values become Some(u32).
1134        let max_depth: Option<u32> = ast_recursion_limit
1135            .filter(|&limit| limit > 0)
1136            .and_then(|limit| u32::try_from(limit).ok());
1137
1138        Self::extract_def_use(source, compiled, root, symbol, file_path, max_depth)
1139    }
1140}
1141
1142/// Extract `impl Trait for Type` blocks from Rust source.
1143///
1144/// Runs independently of `extract_references` to avoid shared deduplication state.
1145/// Returns an empty vec for non-Rust source (no error; caller decides).
1146#[must_use]
1147pub fn extract_impl_traits(source: &str, path: &Path) -> Vec<ImplTraitInfo> {
1148    let Some(lang_info) = get_language_info("rust") else {
1149        return vec![];
1150    };
1151
1152    let Ok(compiled) = get_compiled_queries("rust") else {
1153        return vec![];
1154    };
1155
1156    let Some(query) = &compiled.impl_trait else {
1157        return vec![];
1158    };
1159
1160    let Some(tree) = PARSER.with(|p| {
1161        let mut parser = p.borrow_mut();
1162        let _ = parser.set_language(&lang_info.language);
1163        parser.parse(source, None)
1164    }) else {
1165        return vec![];
1166    };
1167
1168    let root = tree.root_node();
1169    let mut cursor = QueryCursor::new();
1170    let mut matches = cursor.matches(query, root, source.as_bytes());
1171    let mut results = Vec::new();
1172
1173    while let Some(mat) = matches.next() {
1174        let mut trait_name = String::new();
1175        let mut impl_type = String::new();
1176        let mut line = 0usize;
1177
1178        for capture in mat.captures {
1179            let capture_name = query.capture_names()[capture.index as usize];
1180            let node = capture.node;
1181            let text = source[node.start_byte()..node.end_byte()].to_string();
1182            match capture_name {
1183                "trait_name" => {
1184                    trait_name = text;
1185                    line = node.start_position().row + 1;
1186                }
1187                "impl_type" => {
1188                    impl_type = text;
1189                }
1190                _ => {}
1191            }
1192        }
1193
1194        if !trait_name.is_empty() && !impl_type.is_empty() {
1195            results.push(ImplTraitInfo {
1196                trait_name,
1197                impl_type,
1198                path: path.to_path_buf(),
1199                line,
1200            });
1201        }
1202    }
1203
1204    results
1205}
1206
1207/// Execute a custom tree-sitter query against source code.
1208///
1209/// This is the internal implementation of the public `execute_query` function.
1210pub fn execute_query_impl(
1211    language: &str,
1212    source: &str,
1213    query_str: &str,
1214) -> Result<Vec<crate::QueryCapture>, ParserError> {
1215    // Get the tree-sitter language from the language name
1216    let ts_language = crate::languages::get_ts_language(language)
1217        .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))?;
1218
1219    let mut parser = Parser::new();
1220    parser
1221        .set_language(&ts_language)
1222        .map_err(|e| ParserError::QueryError(e.to_string()))?;
1223
1224    let tree = parser
1225        .parse(source.as_bytes(), None)
1226        .ok_or_else(|| ParserError::QueryError("failed to parse source".to_string()))?;
1227
1228    let query =
1229        Query::new(&ts_language, query_str).map_err(|e| ParserError::QueryError(e.to_string()))?;
1230
1231    let mut cursor = QueryCursor::new();
1232    let source_bytes = source.as_bytes();
1233
1234    let mut captures = Vec::new();
1235    let mut matches = cursor.matches(&query, tree.root_node(), source_bytes);
1236    while let Some(m) = matches.next() {
1237        for cap in m.captures {
1238            let node = cap.node;
1239            let capture_name = query.capture_names()[cap.index as usize].to_string();
1240            let text = node.utf8_text(source_bytes).unwrap_or("").to_string();
1241            captures.push(crate::QueryCapture {
1242                capture_name,
1243                text,
1244                start_line: node.start_position().row,
1245                end_line: node.end_position().row,
1246                start_byte: node.start_byte(),
1247                end_byte: node.end_byte(),
1248            });
1249        }
1250    }
1251    Ok(captures)
1252}
1253
1254// Language-feature-gated tests (require lang-rust); see also tests_unsupported below
1255#[cfg(all(test, feature = "lang-rust"))]
1256mod tests {
1257    use super::*;
1258    use std::path::Path;
1259
1260    #[test]
1261    fn test_ast_recursion_limit_zero_is_unlimited() {
1262        let source = r#"fn hello() -> u32 { 42 }"#;
1263        let result_none = SemanticExtractor::extract(source, "rust", None);
1264        let result_zero = SemanticExtractor::extract(source, "rust", Some(0));
1265        assert!(result_none.is_ok(), "extract with None failed");
1266        assert!(result_zero.is_ok(), "extract with Some(0) failed");
1267        let analysis_none = result_none.unwrap();
1268        let analysis_zero = result_zero.unwrap();
1269        assert!(
1270            analysis_none.functions.len() >= 1,
1271            "extract with None should find at least one function in the test source"
1272        );
1273        assert_eq!(
1274            analysis_none.functions.len(),
1275            analysis_zero.functions.len(),
1276            "ast_recursion_limit=0 should behave identically to unset (unlimited)"
1277        );
1278    }
1279
1280    #[test]
1281    fn test_rust_use_as_imports() {
1282        // Arrange
1283        let source = "use std::io as stdio;";
1284        // Act
1285        let result = SemanticExtractor::extract(source, "rust", None).unwrap();
1286        // Assert: alias "stdio" is captured as an import item
1287        assert!(
1288            result
1289                .imports
1290                .iter()
1291                .any(|imp| imp.items.iter().any(|i| i == "stdio")),
1292            "expected import alias 'stdio' in {:?}",
1293            result.imports
1294        );
1295    }
1296
1297    #[test]
1298    fn test_rust_use_as_clause_plain_identifier() {
1299        // Arrange: use_as_clause with plain identifier (no scoped_identifier)
1300        // exercises the _ => prefix.to_string() arm
1301        let source = "use io as stdio;";
1302        // Act
1303        let result = SemanticExtractor::extract(source, "rust", None).unwrap();
1304        // Assert: alias "stdio" is captured as an import item
1305        assert!(
1306            result
1307                .imports
1308                .iter()
1309                .any(|imp| imp.items.iter().any(|i| i == "stdio")),
1310            "expected import alias 'stdio' from plain identifier in {:?}",
1311            result.imports
1312        );
1313    }
1314
1315    #[test]
1316    fn test_rust_scoped_use_with_prefix() {
1317        // Arrange: scoped_use_list with non-empty prefix
1318        let source = "use std::{io::Read, io::Write};";
1319        // Act
1320        let result = SemanticExtractor::extract(source, "rust", None).unwrap();
1321        // Assert: both Read and Write appear as items with std::io module
1322        let items: Vec<String> = result
1323            .imports
1324            .iter()
1325            .filter(|imp| imp.module.starts_with("std::io"))
1326            .flat_map(|imp| imp.items.clone())
1327            .collect();
1328        assert!(
1329            items.contains(&"Read".to_string()) && items.contains(&"Write".to_string()),
1330            "expected 'Read' and 'Write' items under module with std::io, got {:?}",
1331            result.imports
1332        );
1333    }
1334
1335    #[test]
1336    fn test_rust_scoped_use_imports() {
1337        // Arrange
1338        let source = "use std::{fs, io};";
1339        // Act
1340        let result = SemanticExtractor::extract(source, "rust", None).unwrap();
1341        // Assert: both "fs" and "io" appear as import items under module "std"
1342        let items: Vec<&str> = result
1343            .imports
1344            .iter()
1345            .filter(|imp| imp.module == "std")
1346            .flat_map(|imp| imp.items.iter().map(|s| s.as_str()))
1347            .collect();
1348        assert!(
1349            items.contains(&"fs") && items.contains(&"io"),
1350            "expected 'fs' and 'io' items under module 'std', got {:?}",
1351            items
1352        );
1353    }
1354
1355    #[test]
1356    fn test_rust_wildcard_imports() {
1357        // Arrange
1358        let source = "use std::io::*;";
1359        // Act
1360        let result = SemanticExtractor::extract(source, "rust", None).unwrap();
1361        // Assert: wildcard import with module "std::io"
1362        let wildcard = result
1363            .imports
1364            .iter()
1365            .find(|imp| imp.module == "std::io" && imp.items == vec!["*"]);
1366        assert!(
1367            wildcard.is_some(),
1368            "expected wildcard import with module 'std::io', got {:?}",
1369            result.imports
1370        );
1371    }
1372
1373    #[test]
1374    fn test_extract_impl_traits_standalone() {
1375        // Arrange: source with a simple impl Trait for Type
1376        let source = r#"
1377struct Foo;
1378trait Display {}
1379impl Display for Foo {}
1380"#;
1381        // Act
1382        let results = extract_impl_traits(source, Path::new("test.rs"));
1383        // Assert
1384        assert_eq!(
1385            results.len(),
1386            1,
1387            "expected one impl trait, got {:?}",
1388            results
1389        );
1390        assert_eq!(results[0].trait_name, "Display");
1391        assert_eq!(results[0].impl_type, "Foo");
1392    }
1393
1394    #[cfg(target_pointer_width = "64")]
1395    #[test]
1396    fn test_ast_recursion_limit_overflow() {
1397        // Arrange: limit larger than u32::MAX triggers a ParseError on 64-bit targets
1398        let source = "fn foo() {}";
1399        let big_limit = usize::try_from(u32::MAX).unwrap() + 1;
1400        // Act
1401        let result = SemanticExtractor::extract(source, "rust", Some(big_limit));
1402        // Assert
1403        assert!(
1404            matches!(result, Err(ParserError::ParseError(_))),
1405            "expected ParseError for oversized limit, got {:?}",
1406            result
1407        );
1408    }
1409
1410    #[test]
1411    fn test_ast_recursion_limit_some() {
1412        // Arrange: ast_recursion_limit with Some(depth) to exercise max_depth Some branch
1413        let source = r#"fn hello() -> u32 { 42 }"#;
1414        // Act
1415        let result = SemanticExtractor::extract(source, "rust", Some(5));
1416        // Assert: should succeed without error and extract functions
1417        assert!(result.is_ok(), "extract with Some(5) failed: {:?}", result);
1418        let analysis = result.unwrap();
1419        assert!(
1420            analysis.functions.len() >= 1,
1421            "expected at least one function with depth limit 5"
1422        );
1423    }
1424
1425    #[test]
1426    fn test_extract_def_use_for_file_finds_write_and_read() {
1427        // Arrange
1428        let source = r#"
1429fn main() {
1430    let count = 0;
1431    println!("{}", count);
1432}
1433"#;
1434        // Act
1435        let sites = SemanticExtractor::extract_def_use_for_file(
1436            source,
1437            "rust",
1438            "count",
1439            "src/main.rs",
1440            None,
1441        );
1442
1443        // Assert
1444        assert!(
1445            !sites.is_empty(),
1446            "expected at least one def-use site for 'count'"
1447        );
1448        let has_write = sites
1449            .iter()
1450            .any(|s| s.kind == crate::types::DefUseKind::Write);
1451        let has_read = sites
1452            .iter()
1453            .any(|s| s.kind == crate::types::DefUseKind::Read);
1454        assert!(has_write, "expected a write site for 'count'");
1455        assert!(has_read, "expected a read site for 'count'");
1456        assert_eq!(sites[0].file, "src/main.rs");
1457    }
1458
1459    #[test]
1460    fn test_extract_def_use_for_file_no_match_returns_empty() {
1461        // Arrange
1462        let source = "fn foo() { let x = 1; }";
1463
1464        // Act
1465        let sites = SemanticExtractor::extract_def_use_for_file(
1466            source,
1467            "rust",
1468            "nonexistent_symbol",
1469            "src/lib.rs",
1470            None,
1471        );
1472
1473        // Assert
1474        assert!(sites.is_empty(), "expected empty for nonexistent symbol");
1475    }
1476}
1477
1478// Language-feature-gated tests for Python
1479#[cfg(all(test, feature = "lang-python"))]
1480mod tests_python {
1481    use super::*;
1482
1483    #[test]
1484    fn test_python_relative_import() {
1485        // Arrange: relative import (from . import foo)
1486        let source = "from . import foo\n";
1487        // Act
1488        let result = SemanticExtractor::extract(source, "python", None).unwrap();
1489        // Assert: relative import should be captured
1490        let relative = result.imports.iter().find(|imp| imp.module.contains("."));
1491        assert!(
1492            relative.is_some(),
1493            "expected relative import in {:?}",
1494            result.imports
1495        );
1496    }
1497
1498    #[test]
1499    fn test_python_aliased_import() {
1500        // Arrange: aliased import (from os import path as p)
1501        // Note: tree-sitter-python extracts "path" (the original name), not the alias "p"
1502        let source = "from os import path as p\n";
1503        // Act
1504        let result = SemanticExtractor::extract(source, "python", None).unwrap();
1505        // Assert: "path" should be in items (alias is captured separately by aliased_import node)
1506        let path_import = result
1507            .imports
1508            .iter()
1509            .find(|imp| imp.module == "os" && imp.items.iter().any(|i| i == "path"));
1510        assert!(
1511            path_import.is_some(),
1512            "expected import 'path' from module 'os' in {:?}",
1513            result.imports
1514        );
1515    }
1516}
1517
1518// Tests that do not require any language feature gate
1519#[cfg(test)]
1520mod tests_unsupported {
1521    use super::*;
1522
1523    #[test]
1524    fn test_element_extractor_unsupported_language() {
1525        // Arrange + Act
1526        let result = ElementExtractor::extract_with_depth("x = 1", "cobol");
1527        // Assert
1528        assert!(
1529            matches!(result, Err(ParserError::UnsupportedLanguage(ref lang)) if lang == "cobol"),
1530            "expected UnsupportedLanguage error, got {:?}",
1531            result
1532        );
1533    }
1534
1535    #[test]
1536    fn test_semantic_extractor_unsupported_language() {
1537        // Arrange + Act
1538        let result = SemanticExtractor::extract("x = 1", "cobol", None);
1539        // Assert
1540        assert!(
1541            matches!(result, Err(ParserError::UnsupportedLanguage(ref lang)) if lang == "cobol"),
1542            "expected UnsupportedLanguage error, got {:?}",
1543            result
1544        );
1545    }
1546}