Skip to main content

aptu_coder_core/
parser.rs

1// SPDX-FileCopyrightText: 2026 aptu-coder contributors
2// SPDX-License-Identifier: Apache-2.0
3//! Tree-sitter-based parser for extracting semantic structure from source code.
4//!
5//! This module provides language-agnostic parsing using tree-sitter queries to extract
6//! functions, classes, imports, references, and other semantic elements from source files.
7//! Two main extractors handle different use cases:
8//!
9//! - [`ElementExtractor`]: Quick extraction of function and class counts.
10//! - [`SemanticExtractor`]: Detailed semantic analysis with calls, imports, and references.
11
12use crate::languages::get_language_info;
13use crate::types::{
14    CallInfo, ClassInfo, FunctionInfo, ImplTraitInfo, ImportInfo, ReferenceInfo, ReferenceType,
15    SemanticAnalysis,
16};
17use std::cell::RefCell;
18use std::collections::HashMap;
19use std::path::{Path, PathBuf};
20use std::sync::LazyLock;
21use thiserror::Error;
22use tracing::instrument;
23use tree_sitter::{Node, Parser, Query, QueryCursor, StreamingIterator};
24
25#[derive(Debug, Error)]
26#[non_exhaustive]
27pub enum ParserError {
28    #[error("Unsupported language: {0}")]
29    UnsupportedLanguage(String),
30    #[error("Failed to parse file: {0}")]
31    ParseError(String),
32    #[error("Invalid UTF-8 in file")]
33    InvalidUtf8,
34    #[error("Query error: {0}")]
35    QueryError(String),
36}
37
38/// Compiled tree-sitter queries for a language.
39/// Stores all query types: mandatory (element, call) and optional (import, impl, reference).
40struct CompiledQueries {
41    element: Query,
42    call: Query,
43    import: Option<Query>,
44    impl_block: Option<Query>,
45    reference: Option<Query>,
46    impl_trait: Option<Query>,
47    defuse: Option<Query>,
48}
49
50/// Build compiled queries for a given language.
51///
52/// The `map_err` closures inside are only reachable if a hardcoded query string is
53/// invalid, which cannot happen at runtime -- exclude them from coverage instrumentation.
54#[cfg_attr(coverage_nightly, coverage(off))]
55fn build_compiled_queries(
56    lang_info: &crate::languages::LanguageInfo,
57) -> Result<CompiledQueries, ParserError> {
58    let element = Query::new(&lang_info.language, lang_info.element_query).map_err(|e| {
59        ParserError::QueryError(format!(
60            "Failed to compile element query for {}: {}",
61            lang_info.name, e
62        ))
63    })?;
64
65    let call = Query::new(&lang_info.language, lang_info.call_query).map_err(|e| {
66        ParserError::QueryError(format!(
67            "Failed to compile call query for {}: {}",
68            lang_info.name, e
69        ))
70    })?;
71
72    let import = if let Some(import_query_str) = lang_info.import_query {
73        Some(
74            Query::new(&lang_info.language, import_query_str).map_err(|e| {
75                ParserError::QueryError(format!(
76                    "Failed to compile import query for {}: {}",
77                    lang_info.name, e
78                ))
79            })?,
80        )
81    } else {
82        None
83    };
84
85    let impl_block = if let Some(impl_query_str) = lang_info.impl_query {
86        Some(
87            Query::new(&lang_info.language, impl_query_str).map_err(|e| {
88                ParserError::QueryError(format!(
89                    "Failed to compile impl query for {}: {}",
90                    lang_info.name, e
91                ))
92            })?,
93        )
94    } else {
95        None
96    };
97
98    let reference = if let Some(ref_query_str) = lang_info.reference_query {
99        Some(Query::new(&lang_info.language, ref_query_str).map_err(|e| {
100            ParserError::QueryError(format!(
101                "Failed to compile reference query for {}: {}",
102                lang_info.name, e
103            ))
104        })?)
105    } else {
106        None
107    };
108
109    let impl_trait = if let Some(impl_trait_query_str) = lang_info.impl_trait_query {
110        Some(
111            Query::new(&lang_info.language, impl_trait_query_str).map_err(|e| {
112                ParserError::QueryError(format!(
113                    "Failed to compile impl_trait query for {}: {}",
114                    lang_info.name, e
115                ))
116            })?,
117        )
118    } else {
119        None
120    };
121
122    let defuse = if let Some(defuse_query_str) = lang_info.defuse_query {
123        Some(
124            Query::new(&lang_info.language, defuse_query_str).map_err(|e| {
125                ParserError::QueryError(format!(
126                    "Failed to compile defuse query for {}: {}",
127                    lang_info.name, e
128                ))
129            })?,
130        )
131    } else {
132        None
133    };
134
135    Ok(CompiledQueries {
136        element,
137        call,
138        import,
139        impl_block,
140        reference,
141        impl_trait,
142        defuse,
143    })
144}
145
146/// Initialize the query cache with compiled queries for all supported languages.
147///
148/// Excluded from coverage: the `Err` arm is unreachable because `build_compiled_queries`
149/// only fails on invalid hardcoded query strings.
150#[cfg_attr(coverage_nightly, coverage(off))]
151fn init_query_cache() -> HashMap<&'static str, CompiledQueries> {
152    let mut cache = HashMap::new();
153
154    for lang_name in crate::lang::supported_languages() {
155        if let Some(lang_info) = get_language_info(lang_name) {
156            match build_compiled_queries(&lang_info) {
157                Ok(compiled) => {
158                    cache.insert(*lang_name, compiled);
159                }
160                Err(e) => {
161                    tracing::error!(
162                        "Failed to compile queries for language {}: {}",
163                        lang_name,
164                        e
165                    );
166                }
167            }
168        }
169    }
170
171    cache
172}
173
174/// Lazily initialized cache of compiled queries per language.
175static QUERY_CACHE: LazyLock<HashMap<&'static str, CompiledQueries>> =
176    LazyLock::new(init_query_cache);
177
178/// Get compiled queries for a language from the cache.
179fn get_compiled_queries(language: &str) -> Result<&'static CompiledQueries, ParserError> {
180    QUERY_CACHE
181        .get(language)
182        .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))
183}
184
185thread_local! {
186    static PARSER: RefCell<Parser> = RefCell::new(Parser::new());
187    static QUERY_CURSOR: RefCell<QueryCursor> = RefCell::new(QueryCursor::new());
188}
189
190/// Canonical API for extracting element counts from source code.
191pub struct ElementExtractor;
192
193impl ElementExtractor {
194    /// Extract function and class counts from source code.
195    ///
196    /// # Errors
197    ///
198    /// Returns `ParserError::UnsupportedLanguage` if the language is not recognized.
199    /// Returns `ParserError::ParseError` if the source code cannot be parsed.
200    /// Returns `ParserError::QueryError` if the tree-sitter query fails.
201    #[instrument(skip_all, fields(language))]
202    pub fn extract_with_depth(source: &str, language: &str) -> Result<(usize, usize), ParserError> {
203        let lang_info = get_language_info(language)
204            .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))?;
205
206        let tree = PARSER.with(|p| {
207            let mut parser = p.borrow_mut();
208            parser
209                .set_language(&lang_info.language)
210                .map_err(|e| ParserError::ParseError(format!("Failed to set language: {e}")))?;
211            parser
212                .parse(source, None)
213                .ok_or_else(|| ParserError::ParseError("Failed to parse".to_string()))
214        })?;
215
216        let compiled = get_compiled_queries(language)?;
217
218        let (function_count, class_count) = QUERY_CURSOR.with(|c| {
219            let mut cursor = c.borrow_mut();
220            cursor.set_max_start_depth(None);
221            let mut function_count = 0;
222            let mut class_count = 0;
223
224            let mut matches =
225                cursor.matches(&compiled.element, tree.root_node(), source.as_bytes());
226            while let Some(mat) = matches.next() {
227                for capture in mat.captures {
228                    let capture_name = compiled.element.capture_names()[capture.index as usize];
229                    match capture_name {
230                        "function" => function_count += 1,
231                        "class" => class_count += 1,
232                        _ => {}
233                    }
234                }
235            }
236            (function_count, class_count)
237        });
238
239        tracing::debug!(language = %language, functions = function_count, classes = class_count, "parse complete");
240
241        Ok((function_count, class_count))
242    }
243}
244
245/// Recursively extract `ImportInfo` entries from a use-clause node, respecting all Rust
246/// use-declaration forms (`scoped_identifier`, `scoped_use_list`, `use_list`,
247/// `use_as_clause`, `use_wildcard`, bare `identifier`).
248#[allow(clippy::too_many_lines)] // exhaustive match over all supported Rust use-clause forms; splitting harms readability
249fn extract_imports_from_node(
250    node: &Node,
251    source: &str,
252    prefix: &str,
253    line: usize,
254    imports: &mut Vec<ImportInfo>,
255) {
256    match node.kind() {
257        // Simple identifier: `use foo;` or an item inside `{foo, bar}`
258        "identifier" | "self" | "super" | "crate" => {
259            let name = source[node.start_byte()..node.end_byte()].to_string();
260            imports.push(ImportInfo {
261                module: prefix.to_string(),
262                items: vec![name],
263                line,
264            });
265        }
266        // Qualified path: `std::collections::HashMap`
267        "scoped_identifier" => {
268            let item = node
269                .child_by_field_name("name")
270                .map(|n| source[n.start_byte()..n.end_byte()].to_string())
271                .unwrap_or_default();
272            let module = node.child_by_field_name("path").map_or_else(
273                || prefix.to_string(),
274                |p| {
275                    let path_text = source[p.start_byte()..p.end_byte()].to_string();
276                    if prefix.is_empty() {
277                        path_text
278                    } else {
279                        format!("{prefix}::{path_text}")
280                    }
281                },
282            );
283            if !item.is_empty() {
284                imports.push(ImportInfo {
285                    module,
286                    items: vec![item],
287                    line,
288                });
289            }
290        }
291        // `std::{io, fs}` — path prefix followed by a brace list
292        "scoped_use_list" => {
293            let new_prefix = node.child_by_field_name("path").map_or_else(
294                || prefix.to_string(),
295                |p| {
296                    let path_text = source[p.start_byte()..p.end_byte()].to_string();
297                    if prefix.is_empty() {
298                        path_text
299                    } else {
300                        format!("{prefix}::{path_text}")
301                    }
302                },
303            );
304            if let Some(list) = node.child_by_field_name("list") {
305                extract_imports_from_node(&list, source, &new_prefix, line, imports);
306            }
307        }
308        // `{HashMap, HashSet}` — brace-enclosed list of items
309        "use_list" => {
310            let mut cursor = node.walk();
311            for child in node.children(&mut cursor) {
312                match child.kind() {
313                    "{" | "}" | "," => {}
314                    _ => extract_imports_from_node(&child, source, prefix, line, imports),
315                }
316            }
317        }
318        // `std::io::*` — glob import
319        "use_wildcard" => {
320            let text = source[node.start_byte()..node.end_byte()].to_string();
321            let module = if let Some(stripped) = text.strip_suffix("::*") {
322                if prefix.is_empty() {
323                    stripped.to_string()
324                } else {
325                    format!("{prefix}::{stripped}")
326                }
327            } else {
328                prefix.to_string()
329            };
330            imports.push(ImportInfo {
331                module,
332                items: vec!["*".to_string()],
333                line,
334            });
335        }
336        // `io as stdio` or `std::io as stdio`
337        "use_as_clause" => {
338            let alias = node
339                .child_by_field_name("alias")
340                .map(|n| source[n.start_byte()..n.end_byte()].to_string())
341                .unwrap_or_default();
342            let module = if let Some(path_node) = node.child_by_field_name("path") {
343                match path_node.kind() {
344                    "scoped_identifier" => path_node.child_by_field_name("path").map_or_else(
345                        || prefix.to_string(),
346                        |p| {
347                            let p_text = source[p.start_byte()..p.end_byte()].to_string();
348                            if prefix.is_empty() {
349                                p_text
350                            } else {
351                                format!("{prefix}::{p_text}")
352                            }
353                        },
354                    ),
355                    _ => prefix.to_string(),
356                }
357            } else {
358                prefix.to_string()
359            };
360            if !alias.is_empty() {
361                imports.push(ImportInfo {
362                    module,
363                    items: vec![alias],
364                    line,
365                });
366            }
367        }
368        // Python import_from_statement: `from module import name` or `from . import *`
369        "import_from_statement" => {
370            extract_python_import_from(node, source, line, imports);
371        }
372        // Fallback for non-Rust import nodes: capture full text as module
373        _ => {
374            let text = source[node.start_byte()..node.end_byte()]
375                .trim()
376                .to_string();
377            if !text.is_empty() {
378                imports.push(ImportInfo {
379                    module: text,
380                    items: vec![],
381                    line,
382                });
383            }
384        }
385    }
386}
387
388/// Extract an item name from a `dotted_name` or `aliased_import` child node.
389fn extract_import_item_name(child: &Node, source: &str) -> Option<String> {
390    match child.kind() {
391        "dotted_name" => {
392            let name = source[child.start_byte()..child.end_byte()]
393                .trim()
394                .to_string();
395            if name.is_empty() { None } else { Some(name) }
396        }
397        "aliased_import" => child.child_by_field_name("name").and_then(|n| {
398            let name = source[n.start_byte()..n.end_byte()].trim().to_string();
399            if name.is_empty() { None } else { Some(name) }
400        }),
401        _ => None,
402    }
403}
404
405/// Collect wildcard/named imports from an `import_list` node or from direct named children.
406fn collect_import_items(
407    node: &Node,
408    source: &str,
409    is_wildcard: &mut bool,
410    items: &mut Vec<String>,
411) {
412    // Prefer import_list child (wraps `from x import a, b`)
413    if let Some(import_list) = node.child_by_field_name("import_list") {
414        let mut cursor = import_list.walk();
415        for child in import_list.named_children(&mut cursor) {
416            if child.kind() == "wildcard_import" {
417                *is_wildcard = true;
418            } else if let Some(name) = extract_import_item_name(&child, source) {
419                items.push(name);
420            }
421        }
422        return;
423    }
424    // No import_list: single-name or wildcard as direct child (skip first named child = module_name)
425    let mut cursor = node.walk();
426    let mut first = true;
427    for child in node.named_children(&mut cursor) {
428        if first {
429            first = false;
430            continue;
431        }
432        if child.kind() == "wildcard_import" {
433            *is_wildcard = true;
434        } else if let Some(name) = extract_import_item_name(&child, source) {
435            items.push(name);
436        }
437    }
438}
439
440/// Handle Python `import_from_statement` node.
441fn extract_python_import_from(
442    node: &Node,
443    source: &str,
444    line: usize,
445    imports: &mut Vec<ImportInfo>,
446) {
447    let module = if let Some(m) = node.child_by_field_name("module_name") {
448        source[m.start_byte()..m.end_byte()].trim().to_string()
449    } else if let Some(r) = node.child_by_field_name("relative_import") {
450        source[r.start_byte()..r.end_byte()].trim().to_string()
451    } else {
452        String::new()
453    };
454
455    let mut is_wildcard = false;
456    let mut items = Vec::new();
457    collect_import_items(node, source, &mut is_wildcard, &mut items);
458
459    if !module.is_empty() {
460        imports.push(ImportInfo {
461            module,
462            items: if is_wildcard {
463                vec!["*".to_string()]
464            } else {
465                items
466            },
467            line,
468        });
469    }
470}
471
472pub struct SemanticExtractor;
473
474impl SemanticExtractor {
475    /// Extract semantic information from source code.
476    ///
477    /// # Errors
478    ///
479    /// Returns `ParserError::UnsupportedLanguage` if the language is not recognized.
480    /// Returns `ParserError::ParseError` if the source code cannot be parsed.
481    /// Returns `ParserError::QueryError` if the tree-sitter query fails.
482    #[instrument(skip_all, fields(language))]
483    pub fn extract(
484        source: &str,
485        language: &str,
486        ast_recursion_limit: Option<usize>,
487    ) -> Result<SemanticAnalysis, ParserError> {
488        let lang_info = get_language_info(language)
489            .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))?;
490
491        let tree = PARSER.with(|p| {
492            let mut parser = p.borrow_mut();
493            parser
494                .set_language(&lang_info.language)
495                .map_err(|e| ParserError::ParseError(format!("Failed to set language: {e}")))?;
496            parser
497                .parse(source, None)
498                .ok_or_else(|| ParserError::ParseError("Failed to parse".to_string()))
499        })?;
500
501        // 0 is not a useful depth (visits root node only, returning zero results).
502        // Treat 0 as None (unlimited). See #339.
503        let max_depth: Option<u32> = ast_recursion_limit
504            .filter(|&limit| limit > 0)
505            .map(|limit| {
506                u32::try_from(limit).map_err(|_| {
507                    ParserError::ParseError(format!(
508                        "ast_recursion_limit {} exceeds maximum supported value {}",
509                        limit,
510                        u32::MAX
511                    ))
512                })
513            })
514            .transpose()?;
515
516        let compiled = get_compiled_queries(language)?;
517        let root = tree.root_node();
518
519        let mut functions = Vec::new();
520        let mut classes = Vec::new();
521        let mut imports = Vec::new();
522        let mut references = Vec::new();
523        let mut call_frequency = HashMap::new();
524        let mut calls = Vec::new();
525
526        Self::extract_elements(
527            source,
528            compiled,
529            root,
530            max_depth,
531            &lang_info,
532            &mut functions,
533            &mut classes,
534        );
535        Self::extract_calls(
536            source,
537            compiled,
538            root,
539            max_depth,
540            &mut calls,
541            &mut call_frequency,
542        );
543        Self::extract_imports(source, compiled, root, max_depth, &mut imports);
544        Self::extract_impl_methods(source, compiled, root, max_depth, &mut classes);
545        Self::extract_references(source, compiled, root, max_depth, &mut references);
546
547        // Extract impl-trait blocks for Rust files (empty for other languages)
548        let impl_traits = if language == "rust" {
549            Self::extract_impl_traits_from_tree(source, compiled, root)
550        } else {
551            vec![]
552        };
553
554        tracing::debug!(language = %language, functions = functions.len(), classes = classes.len(), imports = imports.len(), references = references.len(), calls = calls.len(), impl_traits = impl_traits.len(), "extraction complete");
555
556        Ok(SemanticAnalysis {
557            functions,
558            classes,
559            imports,
560            references,
561            call_frequency,
562            calls,
563            impl_traits,
564            def_use_sites: Vec::new(),
565        })
566    }
567
568    fn extract_elements(
569        source: &str,
570        compiled: &CompiledQueries,
571        root: Node<'_>,
572        max_depth: Option<u32>,
573        lang_info: &crate::languages::LanguageInfo,
574        functions: &mut Vec<FunctionInfo>,
575        classes: &mut Vec<ClassInfo>,
576    ) {
577        let mut seen_functions = std::collections::HashSet::new();
578
579        QUERY_CURSOR.with(|c| {
580            let mut cursor = c.borrow_mut();
581            cursor.set_max_start_depth(None);
582            if let Some(depth) = max_depth {
583                cursor.set_max_start_depth(Some(depth));
584            }
585            let mut matches = cursor.matches(&compiled.element, root, source.as_bytes());
586
587            while let Some(mat) = matches.next() {
588                let mut func_node: Option<Node> = None;
589                let mut func_name_text: Option<String> = None;
590                let mut class_node: Option<Node> = None;
591                let mut class_name_text: Option<String> = None;
592
593                for capture in mat.captures {
594                    let capture_name = compiled.element.capture_names()[capture.index as usize];
595                    let node = capture.node;
596                    match capture_name {
597                        "function" => func_node = Some(node),
598                        "func_name" | "method_name" => {
599                            func_name_text =
600                                Some(source[node.start_byte()..node.end_byte()].to_string());
601                        }
602                        "class" => class_node = Some(node),
603                        "class_name" | "type_name" => {
604                            class_name_text =
605                                Some(source[node.start_byte()..node.end_byte()].to_string());
606                        }
607                        _ => {}
608                    }
609                }
610
611                if let Some(func_node) = func_node {
612                    // When a plain function_definition is nested inside a template_declaration,
613                    // it is also matched by the explicit template_declaration pattern. Skip it
614                    // here to avoid duplicates; the template_declaration match will emit it.
615                    let parent_is_template = func_node
616                        .parent()
617                        .map(|p| p.kind() == "template_declaration")
618                        .unwrap_or(false);
619                    if func_node.kind() == "function_definition" && parent_is_template {
620                        // Handled by the template_declaration @function match instead.
621                    } else {
622                        // Resolve template_declaration to its inner function_definition for
623                        // declarator/field walks. The captured node may be the template wrapper.
624                        let func_def = if func_node.kind() == "template_declaration" {
625                            let mut cursor = func_node.walk();
626                            func_node
627                                .children(&mut cursor)
628                                .find(|n| n.kind() == "function_definition")
629                                .unwrap_or(func_node)
630                        } else {
631                            func_node
632                        };
633
634                        let name = func_name_text
635                            .or_else(|| {
636                                func_def
637                                    .child_by_field_name("name")
638                                    .map(|n| source[n.start_byte()..n.end_byte()].to_string())
639                            })
640                            .unwrap_or_default();
641
642                        let func_key = (name.clone(), func_node.start_position().row);
643                        if !name.is_empty() && seen_functions.insert(func_key) {
644                            // For C/C++: parameters live under declarator -> parameters.
645                            // For other languages: parameters is a direct child field.
646                            let params = func_def
647                                .child_by_field_name("declarator")
648                                .and_then(|d| d.child_by_field_name("parameters"))
649                                .or_else(|| func_def.child_by_field_name("parameters"))
650                                .map(|p| source[p.start_byte()..p.end_byte()].to_string())
651                                .unwrap_or_default();
652
653                            // Try "type" first (C/C++ uses this field for the return type);
654                            // fall back to "return_type" (Rust, Python, TypeScript, etc.).
655                            let return_type = func_def
656                                .child_by_field_name("type")
657                                .or_else(|| func_def.child_by_field_name("return_type"))
658                                .map(|r| source[r.start_byte()..r.end_byte()].to_string());
659
660                            functions.push(FunctionInfo {
661                                name,
662                                line: func_node.start_position().row + 1,
663                                end_line: func_node.end_position().row + 1,
664                                parameters: if params.is_empty() {
665                                    Vec::new()
666                                } else {
667                                    vec![params]
668                                },
669                                return_type,
670                            });
671                        }
672                    }
673                }
674
675                if let Some(class_node) = class_node {
676                    let name = class_name_text
677                        .or_else(|| {
678                            class_node
679                                .child_by_field_name("name")
680                                .map(|n| source[n.start_byte()..n.end_byte()].to_string())
681                        })
682                        .unwrap_or_default();
683
684                    if !name.is_empty() {
685                        let inherits = if let Some(handler) = lang_info.extract_inheritance {
686                            handler(&class_node, source)
687                        } else {
688                            Vec::new()
689                        };
690                        classes.push(ClassInfo {
691                            name,
692                            line: class_node.start_position().row + 1,
693                            end_line: class_node.end_position().row + 1,
694                            methods: Vec::new(),
695                            fields: Vec::new(),
696                            inherits,
697                        });
698                    }
699                }
700            }
701        });
702    }
703
704    /// Returns the name of the enclosing function/method/subroutine for a given AST node,
705    /// by walking ancestors and matching all language-specific function container kinds.
706    fn enclosing_function_name(mut node: tree_sitter::Node<'_>, source: &str) -> Option<String> {
707        let mut depth = 0u32;
708        while let Some(parent) = node.parent() {
709            depth += 1;
710            // Cap at 64 hops: real function nesting rarely exceeds ~10 levels; 64 is a generous
711            // upper bound that guards against pathological/malformed ASTs without false negatives
712            // on legitimate code. Returns None (treated as <module>) when the cap is hit.
713            if depth > 64 {
714                return None;
715            }
716            let name_node = match parent.kind() {
717                // Direct name field: Rust, Python, Go, Java, TypeScript/TSX
718                "function_item"
719                | "method_item"
720                | "function_definition"
721                | "function_declaration"
722                | "method_declaration"
723                | "method_definition" => parent.child_by_field_name("name"),
724                // Fortran subroutine: name is inside subroutine_statement child
725                "subroutine" => {
726                    let mut cursor = parent.walk();
727                    parent
728                        .children(&mut cursor)
729                        .find(|c| c.kind() == "subroutine_statement")
730                        .and_then(|s| s.child_by_field_name("name"))
731                }
732                // Fortran function: name is inside function_statement child
733                "function" => {
734                    let mut cursor = parent.walk();
735                    parent
736                        .children(&mut cursor)
737                        .find(|c| c.kind() == "function_statement")
738                        .and_then(|s| s.child_by_field_name("name"))
739                }
740                _ => {
741                    node = parent;
742                    continue;
743                }
744            };
745            return name_node.map(|n| source[n.start_byte()..n.end_byte()].to_string());
746        }
747        // The loop exits here only when no parent was found (i.e., we reached the tree root
748        // without finding a function container). If the depth cap fired, we returned None early
749        // above. Nothing to assert here.
750        None
751    }
752
753    fn extract_calls(
754        source: &str,
755        compiled: &CompiledQueries,
756        root: Node<'_>,
757        max_depth: Option<u32>,
758        calls: &mut Vec<CallInfo>,
759        call_frequency: &mut HashMap<String, usize>,
760    ) {
761        QUERY_CURSOR.with(|c| {
762            let mut cursor = c.borrow_mut();
763            cursor.set_max_start_depth(None);
764            if let Some(depth) = max_depth {
765                cursor.set_max_start_depth(Some(depth));
766            }
767            let mut matches = cursor.matches(&compiled.call, root, source.as_bytes());
768
769            while let Some(mat) = matches.next() {
770                for capture in mat.captures {
771                    let capture_name = compiled.call.capture_names()[capture.index as usize];
772                    if capture_name != "call" {
773                        continue;
774                    }
775                    let node = capture.node;
776                    let call_name = source[node.start_byte()..node.end_byte()].to_string();
777                    *call_frequency.entry(call_name.clone()).or_insert(0) += 1;
778
779                    let caller = Self::enclosing_function_name(node, source)
780                        .unwrap_or_else(|| "<module>".to_string());
781
782                    let mut arg_count = None;
783                    let mut arg_node = node;
784                    let mut hop = 0u32;
785                    let mut cap_hit = false;
786                    while let Some(parent) = arg_node.parent() {
787                        hop += 1;
788                        // Bounded parent traversal: cap at 16 hops to guard against pathological
789                        // walks on malformed/degenerate trees. Real call-expression nesting is
790                        // shallow (typically 1-3 levels). When the cap is hit we stop searching and
791                        // leave arg_count as None; the caller is still recorded, just without
792                        // argument-count information.
793                        if hop > 16 {
794                            cap_hit = true;
795                            break;
796                        }
797                        if parent.kind() == "call_expression" {
798                            if let Some(args) = parent.child_by_field_name("arguments") {
799                                arg_count = Some(args.named_child_count());
800                            }
801                            break;
802                        }
803                        arg_node = parent;
804                    }
805                    debug_assert!(
806                        !cap_hit,
807                        "extract_calls: parent traversal cap reached (hop > 16)"
808                    );
809
810                    calls.push(CallInfo {
811                        caller,
812                        callee: call_name,
813                        line: node.start_position().row + 1,
814                        column: node.start_position().column,
815                        arg_count,
816                    });
817                }
818            }
819        });
820    }
821
822    fn extract_imports(
823        source: &str,
824        compiled: &CompiledQueries,
825        root: Node<'_>,
826        max_depth: Option<u32>,
827        imports: &mut Vec<ImportInfo>,
828    ) {
829        let Some(ref import_query) = compiled.import else {
830            return;
831        };
832        QUERY_CURSOR.with(|c| {
833            let mut cursor = c.borrow_mut();
834            cursor.set_max_start_depth(None);
835            if let Some(depth) = max_depth {
836                cursor.set_max_start_depth(Some(depth));
837            }
838            let mut matches = cursor.matches(import_query, root, source.as_bytes());
839
840            while let Some(mat) = matches.next() {
841                for capture in mat.captures {
842                    let capture_name = import_query.capture_names()[capture.index as usize];
843                    if capture_name == "import_path" {
844                        let node = capture.node;
845                        let line = node.start_position().row + 1;
846                        extract_imports_from_node(&node, source, "", line, imports);
847                    }
848                }
849            }
850        });
851    }
852
853    fn extract_impl_methods(
854        source: &str,
855        compiled: &CompiledQueries,
856        root: Node<'_>,
857        max_depth: Option<u32>,
858        classes: &mut [ClassInfo],
859    ) {
860        let Some(ref impl_query) = compiled.impl_block else {
861            return;
862        };
863        QUERY_CURSOR.with(|c| {
864            let mut cursor = c.borrow_mut();
865            cursor.set_max_start_depth(None);
866            if let Some(depth) = max_depth {
867                cursor.set_max_start_depth(Some(depth));
868            }
869            let mut matches = cursor.matches(impl_query, root, source.as_bytes());
870
871            while let Some(mat) = matches.next() {
872                let mut impl_type_name = String::new();
873                let mut method_name = String::new();
874                let mut method_line = 0usize;
875                let mut method_end_line = 0usize;
876                let mut method_params = String::new();
877                let mut method_return_type: Option<String> = None;
878
879                for capture in mat.captures {
880                    let capture_name = impl_query.capture_names()[capture.index as usize];
881                    let node = capture.node;
882                    match capture_name {
883                        "impl_type" => {
884                            impl_type_name = source[node.start_byte()..node.end_byte()].to_string();
885                        }
886                        "method_name" => {
887                            method_name = source[node.start_byte()..node.end_byte()].to_string();
888                        }
889                        "method_params" => {
890                            method_params = source[node.start_byte()..node.end_byte()].to_string();
891                        }
892                        "method" => {
893                            method_line = node.start_position().row + 1;
894                            method_end_line = node.end_position().row + 1;
895                            method_return_type = node
896                                .child_by_field_name("return_type")
897                                .map(|r| source[r.start_byte()..r.end_byte()].to_string());
898                        }
899                        _ => {}
900                    }
901                }
902
903                if !impl_type_name.is_empty() && !method_name.is_empty() {
904                    let func = FunctionInfo {
905                        name: method_name,
906                        line: method_line,
907                        end_line: method_end_line,
908                        parameters: if method_params.is_empty() {
909                            Vec::new()
910                        } else {
911                            vec![method_params]
912                        },
913                        return_type: method_return_type,
914                    };
915                    if let Some(class) = classes.iter_mut().find(|c| c.name == impl_type_name) {
916                        class.methods.push(func);
917                    }
918                }
919            }
920        });
921    }
922
923    fn extract_references(
924        source: &str,
925        compiled: &CompiledQueries,
926        root: Node<'_>,
927        max_depth: Option<u32>,
928        references: &mut Vec<ReferenceInfo>,
929    ) {
930        let Some(ref ref_query) = compiled.reference else {
931            return;
932        };
933        let mut seen_refs = std::collections::HashSet::new();
934        QUERY_CURSOR.with(|c| {
935            let mut cursor = c.borrow_mut();
936            cursor.set_max_start_depth(None);
937            if let Some(depth) = max_depth {
938                cursor.set_max_start_depth(Some(depth));
939            }
940            let mut matches = cursor.matches(ref_query, root, source.as_bytes());
941
942            while let Some(mat) = matches.next() {
943                for capture in mat.captures {
944                    let capture_name = ref_query.capture_names()[capture.index as usize];
945                    if capture_name == "type_ref" {
946                        let node = capture.node;
947                        let type_ref = source[node.start_byte()..node.end_byte()].to_string();
948                        if seen_refs.insert(type_ref.clone()) {
949                            references.push(ReferenceInfo {
950                                symbol: type_ref,
951                                reference_type: ReferenceType::Usage,
952                                // location is intentionally empty here; set by the caller (analyze_file)
953                                location: String::new(),
954                                line: node.start_position().row + 1,
955                            });
956                        }
957                    }
958                }
959            }
960        });
961    }
962
963    /// Extract impl-trait blocks from an already-parsed tree.
964    ///
965    /// Called during `extract()` for Rust files to avoid a second parse.
966    /// Returns an empty vec if the query is not available.
967    fn extract_impl_traits_from_tree(
968        source: &str,
969        compiled: &CompiledQueries,
970        root: Node<'_>,
971    ) -> Vec<ImplTraitInfo> {
972        let Some(query) = &compiled.impl_trait else {
973            return vec![];
974        };
975
976        let mut results = Vec::new();
977        QUERY_CURSOR.with(|c| {
978            let mut cursor = c.borrow_mut();
979            cursor.set_max_start_depth(None);
980            let mut matches = cursor.matches(query, root, source.as_bytes());
981
982            while let Some(mat) = matches.next() {
983                let mut trait_name = String::new();
984                let mut impl_type = String::new();
985                let mut line = 0usize;
986
987                for capture in mat.captures {
988                    let capture_name = query.capture_names()[capture.index as usize];
989                    let node = capture.node;
990                    let text = source[node.start_byte()..node.end_byte()].to_string();
991                    match capture_name {
992                        "trait_name" => {
993                            trait_name = text;
994                            line = node.start_position().row + 1;
995                        }
996                        "impl_type" => {
997                            impl_type = text;
998                        }
999                        _ => {}
1000                    }
1001                }
1002
1003                if !trait_name.is_empty() && !impl_type.is_empty() {
1004                    results.push(ImplTraitInfo {
1005                        trait_name,
1006                        impl_type,
1007                        path: PathBuf::new(), // Path will be set by caller
1008                        line,
1009                    });
1010                }
1011            }
1012        });
1013
1014        results
1015    }
1016
1017    /// Extract def-use sites (write/read locations) for a given symbol within a file.
1018    ///
1019    /// Runs the defuse query to find all definition and use sites of a symbol.
1020    /// Returns empty vec if no defuse query is available for this language.
1021    ///
1022    /// # Arguments
1023    ///
1024    /// * `source` - The source code text
1025    /// * `compiled` - Compiled tree-sitter queries
1026    /// * `root` - Root node of the AST
1027    /// * `symbol_name` - The symbol to search for (must match exactly)
1028    /// * `file_path` - Relative file path for site reporting
1029    fn extract_def_use(
1030        source: &str,
1031        compiled: &CompiledQueries,
1032        root: Node<'_>,
1033        symbol_name: &str,
1034        file_path: &str,
1035        max_depth: Option<u32>,
1036    ) -> Vec<crate::types::DefUseSite> {
1037        let Some(ref defuse_query) = compiled.defuse else {
1038            return vec![];
1039        };
1040
1041        let mut sites = Vec::new();
1042        let source_lines: Vec<&str> = source.lines().collect();
1043        // Track byte offsets that already have a write or writeread capture so
1044        // duplicate read captures for the same identifier are suppressed.
1045        let mut write_offsets = std::collections::HashSet::new();
1046
1047        QUERY_CURSOR.with(|c| {
1048            let mut cursor = c.borrow_mut();
1049            cursor.set_max_start_depth(None);
1050            if let Some(depth) = max_depth {
1051                cursor.set_max_start_depth(Some(depth));
1052            }
1053            let mut matches = cursor.matches(defuse_query, root, source.as_bytes());
1054
1055            while let Some(mat) = matches.next() {
1056                for capture in mat.captures {
1057                    let capture_name = defuse_query.capture_names()[capture.index as usize];
1058                    let node = capture.node;
1059                    let node_text = node.utf8_text(source.as_bytes()).unwrap_or_default();
1060
1061                    // Only collect if the captured node matches the target symbol
1062                    if node_text != symbol_name {
1063                        continue;
1064                    }
1065
1066                    // Classify capture by prefix
1067                    let kind = if capture_name.starts_with("write.") {
1068                        crate::types::DefUseKind::Write
1069                    } else if capture_name.starts_with("read.") {
1070                        crate::types::DefUseKind::Read
1071                    } else if capture_name.starts_with("writeread.") {
1072                        crate::types::DefUseKind::WriteRead
1073                    } else {
1074                        continue;
1075                    };
1076
1077                    let byte_offset = node.start_byte();
1078
1079                    // De-duplicate: skip read captures for offsets already captured as write/writeread
1080                    if kind == crate::types::DefUseKind::Read
1081                        && write_offsets.contains(&byte_offset)
1082                    {
1083                        continue;
1084                    }
1085                    if kind != crate::types::DefUseKind::Read {
1086                        write_offsets.insert(byte_offset);
1087                    }
1088
1089                    // Get line number (1-indexed) and center-line snippet.
1090                    // Always produce a 3-line window so snippet_one_line (index 1) is safe.
1091                    let line = node.start_position().row + 1;
1092                    let snippet = {
1093                        let row = node.start_position().row;
1094                        let last_line = source_lines.len().saturating_sub(1);
1095                        let prev = if row > 0 { row - 1 } else { 0 };
1096                        let next = std::cmp::min(row + 1, last_line);
1097                        let prev_text = if row == 0 {
1098                            ""
1099                        } else {
1100                            source_lines[prev].trim_end()
1101                        };
1102                        let cur_text = source_lines[row].trim_end();
1103                        let next_text = if row >= last_line {
1104                            ""
1105                        } else {
1106                            source_lines[next].trim_end()
1107                        };
1108                        format!("{prev_text}\n{cur_text}\n{next_text}")
1109                    };
1110
1111                    // Get enclosing function scope
1112                    let enclosing_scope = Self::enclosing_function_name(node, source);
1113
1114                    let column = node.start_position().column;
1115                    sites.push(crate::types::DefUseSite {
1116                        kind,
1117                        symbol: node_text.to_string(),
1118                        file: file_path.to_string(),
1119                        line,
1120                        column,
1121                        snippet,
1122                        enclosing_scope,
1123                    });
1124                }
1125            }
1126        });
1127
1128        sites
1129    }
1130
1131    /// Parse `source` in `language`, run the defuse query for `symbol`, and return all sites.
1132    /// Returns an empty vec if the language has no defuse query or parsing fails.
1133    pub(crate) fn extract_def_use_for_file(
1134        source: &str,
1135        language: &str,
1136        symbol: &str,
1137        file_path: &str,
1138        ast_recursion_limit: Option<usize>,
1139    ) -> Vec<crate::types::DefUseSite> {
1140        let Some(lang_info) = crate::languages::get_language_info(language) else {
1141            return vec![];
1142        };
1143        let Ok(compiled) = get_compiled_queries(language) else {
1144            return vec![];
1145        };
1146        if compiled.defuse.is_none() {
1147            return vec![];
1148        }
1149
1150        let tree = match PARSER.with(|p| {
1151            let mut parser = p.borrow_mut();
1152            if parser.set_language(&lang_info.language).is_err() {
1153                return None;
1154            }
1155            parser.parse(source, None)
1156        }) {
1157            Some(t) => t,
1158            None => return vec![],
1159        };
1160
1161        let root = tree.root_node();
1162
1163        // Convert ast_recursion_limit the same way extract() does:
1164        // 0 means unlimited (None); positive values become Some(u32).
1165        let max_depth: Option<u32> = ast_recursion_limit
1166            .filter(|&limit| limit > 0)
1167            .and_then(|limit| u32::try_from(limit).ok());
1168
1169        Self::extract_def_use(source, compiled, root, symbol, file_path, max_depth)
1170    }
1171}
1172
1173/// Extract `impl Trait for Type` blocks from Rust source.
1174///
1175/// Runs independently of `extract_references` to avoid shared deduplication state.
1176/// Returns an empty vec for non-Rust source (no error; caller decides).
1177#[must_use]
1178pub fn extract_impl_traits(source: &str, path: &Path) -> Vec<ImplTraitInfo> {
1179    let Some(lang_info) = get_language_info("rust") else {
1180        return vec![];
1181    };
1182
1183    let Ok(compiled) = get_compiled_queries("rust") else {
1184        return vec![];
1185    };
1186
1187    let Some(query) = &compiled.impl_trait else {
1188        return vec![];
1189    };
1190
1191    let Some(tree) = PARSER.with(|p| {
1192        let mut parser = p.borrow_mut();
1193        let _ = parser.set_language(&lang_info.language);
1194        parser.parse(source, None)
1195    }) else {
1196        return vec![];
1197    };
1198
1199    let root = tree.root_node();
1200    let mut results = Vec::new();
1201
1202    QUERY_CURSOR.with(|c| {
1203        let mut cursor = c.borrow_mut();
1204        cursor.set_max_start_depth(None);
1205        let mut matches = cursor.matches(query, root, source.as_bytes());
1206
1207        while let Some(mat) = matches.next() {
1208            let mut trait_name = String::new();
1209            let mut impl_type = String::new();
1210            let mut line = 0usize;
1211
1212            for capture in mat.captures {
1213                let capture_name = query.capture_names()[capture.index as usize];
1214                let node = capture.node;
1215                let text = source[node.start_byte()..node.end_byte()].to_string();
1216                match capture_name {
1217                    "trait_name" => {
1218                        trait_name = text;
1219                        line = node.start_position().row + 1;
1220                    }
1221                    "impl_type" => {
1222                        impl_type = text;
1223                    }
1224                    _ => {}
1225                }
1226            }
1227
1228            if !trait_name.is_empty() && !impl_type.is_empty() {
1229                results.push(ImplTraitInfo {
1230                    trait_name,
1231                    impl_type,
1232                    path: path.to_path_buf(),
1233                    line,
1234                });
1235            }
1236        }
1237    });
1238
1239    results
1240}
1241
1242/// Execute a custom tree-sitter query against source code.
1243///
1244/// This is the internal implementation of the public `execute_query` function.
1245pub fn execute_query_impl(
1246    language: &str,
1247    source: &str,
1248    query_str: &str,
1249) -> Result<Vec<crate::QueryCapture>, ParserError> {
1250    // Get the tree-sitter language from the language name
1251    let ts_language = crate::languages::get_ts_language(language)
1252        .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))?;
1253
1254    let mut parser = Parser::new();
1255    parser
1256        .set_language(&ts_language)
1257        .map_err(|e| ParserError::QueryError(e.to_string()))?;
1258
1259    let tree = parser
1260        .parse(source.as_bytes(), None)
1261        .ok_or_else(|| ParserError::QueryError("failed to parse source".to_string()))?;
1262
1263    let query =
1264        Query::new(&ts_language, query_str).map_err(|e| ParserError::QueryError(e.to_string()))?;
1265
1266    let source_bytes = source.as_bytes();
1267
1268    let mut captures = Vec::new();
1269    QUERY_CURSOR.with(|c| {
1270        let mut cursor = c.borrow_mut();
1271        cursor.set_max_start_depth(None);
1272        let mut matches = cursor.matches(&query, tree.root_node(), source_bytes);
1273        while let Some(m) = matches.next() {
1274            for cap in m.captures {
1275                let node = cap.node;
1276                let capture_name = query.capture_names()[cap.index as usize].to_string();
1277                let text = node.utf8_text(source_bytes).unwrap_or("").to_string();
1278                captures.push(crate::QueryCapture {
1279                    capture_name,
1280                    text,
1281                    start_line: node.start_position().row,
1282                    end_line: node.end_position().row,
1283                    start_byte: node.start_byte(),
1284                    end_byte: node.end_byte(),
1285                });
1286            }
1287        }
1288    });
1289    Ok(captures)
1290}
1291
1292// Language-feature-gated tests (require lang-rust); see also tests_unsupported below
1293#[cfg(all(test, feature = "lang-rust"))]
1294mod tests {
1295    use super::*;
1296    use std::path::Path;
1297
1298    #[test]
1299    fn test_ast_recursion_limit_zero_is_unlimited() {
1300        let source = r#"fn hello() -> u32 { 42 }"#;
1301        let result_none = SemanticExtractor::extract(source, "rust", None);
1302        let result_zero = SemanticExtractor::extract(source, "rust", Some(0));
1303        assert!(result_none.is_ok(), "extract with None failed");
1304        assert!(result_zero.is_ok(), "extract with Some(0) failed");
1305        let analysis_none = result_none.unwrap();
1306        let analysis_zero = result_zero.unwrap();
1307        assert!(
1308            analysis_none.functions.len() >= 1,
1309            "extract with None should find at least one function in the test source"
1310        );
1311        assert_eq!(
1312            analysis_none.functions.len(),
1313            analysis_zero.functions.len(),
1314            "ast_recursion_limit=0 should behave identically to unset (unlimited)"
1315        );
1316    }
1317
1318    #[test]
1319    fn test_rust_use_as_imports() {
1320        // Arrange
1321        let source = "use std::io as stdio;";
1322        // Act
1323        let result = SemanticExtractor::extract(source, "rust", None).unwrap();
1324        // Assert: alias "stdio" is captured as an import item
1325        assert!(
1326            result
1327                .imports
1328                .iter()
1329                .any(|imp| imp.items.iter().any(|i| i == "stdio")),
1330            "expected import alias 'stdio' in {:?}",
1331            result.imports
1332        );
1333    }
1334
1335    #[test]
1336    fn test_rust_use_as_clause_plain_identifier() {
1337        // Arrange: use_as_clause with plain identifier (no scoped_identifier)
1338        // exercises the _ => prefix.to_string() arm
1339        let source = "use io as stdio;";
1340        // Act
1341        let result = SemanticExtractor::extract(source, "rust", None).unwrap();
1342        // Assert: alias "stdio" is captured as an import item
1343        assert!(
1344            result
1345                .imports
1346                .iter()
1347                .any(|imp| imp.items.iter().any(|i| i == "stdio")),
1348            "expected import alias 'stdio' from plain identifier in {:?}",
1349            result.imports
1350        );
1351    }
1352
1353    #[test]
1354    fn test_rust_scoped_use_with_prefix() {
1355        // Arrange: scoped_use_list with non-empty prefix
1356        let source = "use std::{io::Read, io::Write};";
1357        // Act
1358        let result = SemanticExtractor::extract(source, "rust", None).unwrap();
1359        // Assert: both Read and Write appear as items with std::io module
1360        let items: Vec<String> = result
1361            .imports
1362            .iter()
1363            .filter(|imp| imp.module.starts_with("std::io"))
1364            .flat_map(|imp| imp.items.clone())
1365            .collect();
1366        assert!(
1367            items.contains(&"Read".to_string()) && items.contains(&"Write".to_string()),
1368            "expected 'Read' and 'Write' items under module with std::io, got {:?}",
1369            result.imports
1370        );
1371    }
1372
1373    #[test]
1374    fn test_rust_scoped_use_imports() {
1375        // Arrange
1376        let source = "use std::{fs, io};";
1377        // Act
1378        let result = SemanticExtractor::extract(source, "rust", None).unwrap();
1379        // Assert: both "fs" and "io" appear as import items under module "std"
1380        let items: Vec<&str> = result
1381            .imports
1382            .iter()
1383            .filter(|imp| imp.module == "std")
1384            .flat_map(|imp| imp.items.iter().map(|s| s.as_str()))
1385            .collect();
1386        assert!(
1387            items.contains(&"fs") && items.contains(&"io"),
1388            "expected 'fs' and 'io' items under module 'std', got {:?}",
1389            items
1390        );
1391    }
1392
1393    #[test]
1394    fn test_rust_wildcard_imports() {
1395        // Arrange
1396        let source = "use std::io::*;";
1397        // Act
1398        let result = SemanticExtractor::extract(source, "rust", None).unwrap();
1399        // Assert: wildcard import with module "std::io"
1400        let wildcard = result
1401            .imports
1402            .iter()
1403            .find(|imp| imp.module == "std::io" && imp.items == vec!["*"]);
1404        assert!(
1405            wildcard.is_some(),
1406            "expected wildcard import with module 'std::io', got {:?}",
1407            result.imports
1408        );
1409    }
1410
1411    #[test]
1412    fn test_extract_impl_traits_standalone() {
1413        // Arrange: source with a simple impl Trait for Type
1414        let source = r#"
1415struct Foo;
1416trait Display {}
1417impl Display for Foo {}
1418"#;
1419        // Act
1420        let results = extract_impl_traits(source, Path::new("test.rs"));
1421        // Assert
1422        assert_eq!(
1423            results.len(),
1424            1,
1425            "expected one impl trait, got {:?}",
1426            results
1427        );
1428        assert_eq!(results[0].trait_name, "Display");
1429        assert_eq!(results[0].impl_type, "Foo");
1430    }
1431
1432    #[cfg(target_pointer_width = "64")]
1433    #[test]
1434    fn test_ast_recursion_limit_overflow() {
1435        // Arrange: limit larger than u32::MAX triggers a ParseError on 64-bit targets
1436        let source = "fn foo() {}";
1437        let big_limit = usize::try_from(u32::MAX).unwrap() + 1;
1438        // Act
1439        let result = SemanticExtractor::extract(source, "rust", Some(big_limit));
1440        // Assert
1441        assert!(
1442            matches!(result, Err(ParserError::ParseError(_))),
1443            "expected ParseError for oversized limit, got {:?}",
1444            result
1445        );
1446    }
1447
1448    #[test]
1449    fn test_ast_recursion_limit_some() {
1450        // Arrange: ast_recursion_limit with Some(depth) to exercise max_depth Some branch
1451        let source = r#"fn hello() -> u32 { 42 }"#;
1452        // Act
1453        let result = SemanticExtractor::extract(source, "rust", Some(5));
1454        // Assert: should succeed without error and extract functions
1455        assert!(result.is_ok(), "extract with Some(5) failed: {:?}", result);
1456        let analysis = result.unwrap();
1457        assert!(
1458            analysis.functions.len() >= 1,
1459            "expected at least one function with depth limit 5"
1460        );
1461    }
1462
1463    #[test]
1464    fn test_extract_def_use_for_file_finds_write_and_read() {
1465        // Arrange
1466        let source = r#"
1467fn main() {
1468    let count = 0;
1469    println!("{}", count);
1470}
1471"#;
1472        // Act
1473        let sites = SemanticExtractor::extract_def_use_for_file(
1474            source,
1475            "rust",
1476            "count",
1477            "src/main.rs",
1478            None,
1479        );
1480
1481        // Assert
1482        assert!(
1483            !sites.is_empty(),
1484            "expected at least one def-use site for 'count'"
1485        );
1486        let has_write = sites
1487            .iter()
1488            .any(|s| s.kind == crate::types::DefUseKind::Write);
1489        let has_read = sites
1490            .iter()
1491            .any(|s| s.kind == crate::types::DefUseKind::Read);
1492        assert!(has_write, "expected a write site for 'count'");
1493        assert!(has_read, "expected a read site for 'count'");
1494        assert_eq!(sites[0].file, "src/main.rs");
1495    }
1496
1497    #[test]
1498    fn test_extract_def_use_for_file_no_match_returns_empty() {
1499        // Arrange
1500        let source = "fn foo() { let x = 1; }";
1501
1502        // Act
1503        let sites = SemanticExtractor::extract_def_use_for_file(
1504            source,
1505            "rust",
1506            "nonexistent_symbol",
1507            "src/lib.rs",
1508            None,
1509        );
1510
1511        // Assert
1512        assert!(sites.is_empty(), "expected empty for nonexistent symbol");
1513    }
1514}
1515
1516// Language-feature-gated tests for Python
1517#[cfg(all(test, feature = "lang-python"))]
1518mod tests_python {
1519    use super::*;
1520
1521    #[test]
1522    fn test_python_relative_import() {
1523        // Arrange: relative import (from . import foo)
1524        let source = "from . import foo\n";
1525        // Act
1526        let result = SemanticExtractor::extract(source, "python", None).unwrap();
1527        // Assert: relative import should be captured
1528        let relative = result.imports.iter().find(|imp| imp.module.contains("."));
1529        assert!(
1530            relative.is_some(),
1531            "expected relative import in {:?}",
1532            result.imports
1533        );
1534    }
1535
1536    #[test]
1537    fn test_python_aliased_import() {
1538        // Arrange: aliased import (from os import path as p)
1539        // Note: tree-sitter-python extracts "path" (the original name), not the alias "p"
1540        let source = "from os import path as p\n";
1541        // Act
1542        let result = SemanticExtractor::extract(source, "python", None).unwrap();
1543        // Assert: "path" should be in items (alias is captured separately by aliased_import node)
1544        let path_import = result
1545            .imports
1546            .iter()
1547            .find(|imp| imp.module == "os" && imp.items.iter().any(|i| i == "path"));
1548        assert!(
1549            path_import.is_some(),
1550            "expected import 'path' from module 'os' in {:?}",
1551            result.imports
1552        );
1553    }
1554}
1555
1556// Tests that do not require any language feature gate
1557#[cfg(test)]
1558mod tests_unsupported {
1559    use super::*;
1560
1561    #[test]
1562    fn test_element_extractor_unsupported_language() {
1563        // Arrange + Act
1564        let result = ElementExtractor::extract_with_depth("x = 1", "cobol");
1565        // Assert
1566        assert!(
1567            matches!(result, Err(ParserError::UnsupportedLanguage(ref lang)) if lang == "cobol"),
1568            "expected UnsupportedLanguage error, got {:?}",
1569            result
1570        );
1571    }
1572
1573    #[test]
1574    fn test_semantic_extractor_unsupported_language() {
1575        // Arrange + Act
1576        let result = SemanticExtractor::extract("x = 1", "cobol", None);
1577        // Assert
1578        assert!(
1579            matches!(result, Err(ParserError::UnsupportedLanguage(ref lang)) if lang == "cobol"),
1580            "expected UnsupportedLanguage error, got {:?}",
1581            result
1582        );
1583    }
1584}