Skip to main content

aptu_coder_core/
parser.rs

1// SPDX-FileCopyrightText: 2026 aptu-coder contributors
2// SPDX-License-Identifier: Apache-2.0
3//! Tree-sitter-based parser for extracting semantic structure from source code.
4//!
5//! This module provides language-agnostic parsing using tree-sitter queries to extract
6//! functions, classes, imports, references, and other semantic elements from source files.
7//! Two main extractors handle different use cases:
8//!
9//! - [`ElementExtractor`]: Quick extraction of function and class counts.
10//! - [`SemanticExtractor`]: Detailed semantic analysis with calls, imports, and references.
11
12use crate::languages::get_language_info;
13use crate::types::{
14    CallInfo, ClassInfo, FunctionInfo, ImplTraitInfo, ImportInfo, ReferenceInfo, ReferenceType,
15    SemanticAnalysis,
16};
17use std::cell::RefCell;
18use std::collections::HashMap;
19use std::path::{Path, PathBuf};
20use std::sync::LazyLock;
21use thiserror::Error;
22use tracing::instrument;
23use tree_sitter::{Node, Parser, Query, QueryCursor, StreamingIterator};
24
25#[derive(Debug, Error)]
26#[non_exhaustive]
27pub enum ParserError {
28    #[error("Unsupported language: {0}")]
29    UnsupportedLanguage(String),
30    #[error("Failed to parse file: {0}")]
31    ParseError(String),
32    #[error("Invalid UTF-8 in file")]
33    InvalidUtf8,
34    #[error("Query error: {0}")]
35    QueryError(String),
36}
37
38/// Compiled tree-sitter queries for a language.
39/// Stores all query types: mandatory (element, call) and optional (import, impl, reference).
40struct CompiledQueries {
41    element: Query,
42    call: Query,
43    import: Option<Query>,
44    impl_block: Option<Query>,
45    reference: Option<Query>,
46    impl_trait: Option<Query>,
47    defuse: Option<Query>,
48}
49
50/// Build compiled queries for a given language.
51///
52/// The `map_err` closures inside are only reachable if a hardcoded query string is
53/// invalid, which cannot happen at runtime -- exclude them from coverage instrumentation.
54#[cfg_attr(coverage_nightly, coverage(off))]
55fn build_compiled_queries(
56    lang_info: &crate::languages::LanguageInfo,
57) -> Result<CompiledQueries, ParserError> {
58    let element = Query::new(&lang_info.language, lang_info.element_query).map_err(|e| {
59        ParserError::QueryError(format!(
60            "Failed to compile element query for {}: {}",
61            lang_info.name, e
62        ))
63    })?;
64
65    let call = Query::new(&lang_info.language, lang_info.call_query).map_err(|e| {
66        ParserError::QueryError(format!(
67            "Failed to compile call query for {}: {}",
68            lang_info.name, e
69        ))
70    })?;
71
72    let import = if let Some(import_query_str) = lang_info.import_query {
73        Some(
74            Query::new(&lang_info.language, import_query_str).map_err(|e| {
75                ParserError::QueryError(format!(
76                    "Failed to compile import query for {}: {}",
77                    lang_info.name, e
78                ))
79            })?,
80        )
81    } else {
82        None
83    };
84
85    let impl_block = if let Some(impl_query_str) = lang_info.impl_query {
86        Some(
87            Query::new(&lang_info.language, impl_query_str).map_err(|e| {
88                ParserError::QueryError(format!(
89                    "Failed to compile impl query for {}: {}",
90                    lang_info.name, e
91                ))
92            })?,
93        )
94    } else {
95        None
96    };
97
98    let reference = if let Some(ref_query_str) = lang_info.reference_query {
99        Some(Query::new(&lang_info.language, ref_query_str).map_err(|e| {
100            ParserError::QueryError(format!(
101                "Failed to compile reference query for {}: {}",
102                lang_info.name, e
103            ))
104        })?)
105    } else {
106        None
107    };
108
109    let impl_trait = if let Some(impl_trait_query_str) = lang_info.impl_trait_query {
110        Some(
111            Query::new(&lang_info.language, impl_trait_query_str).map_err(|e| {
112                ParserError::QueryError(format!(
113                    "Failed to compile impl_trait query for {}: {}",
114                    lang_info.name, e
115                ))
116            })?,
117        )
118    } else {
119        None
120    };
121
122    let defuse = if let Some(defuse_query_str) = lang_info.defuse_query {
123        Some(
124            Query::new(&lang_info.language, defuse_query_str).map_err(|e| {
125                ParserError::QueryError(format!(
126                    "Failed to compile defuse query for {}: {}",
127                    lang_info.name, e
128                ))
129            })?,
130        )
131    } else {
132        None
133    };
134
135    Ok(CompiledQueries {
136        element,
137        call,
138        import,
139        impl_block,
140        reference,
141        impl_trait,
142        defuse,
143    })
144}
145
146/// Initialize the query cache with compiled queries for all supported languages.
147///
148/// Excluded from coverage: the `Err` arm is unreachable because `build_compiled_queries`
149/// only fails on invalid hardcoded query strings.
150#[cfg_attr(coverage_nightly, coverage(off))]
151fn init_query_cache() -> HashMap<&'static str, CompiledQueries> {
152    let mut cache = HashMap::new();
153
154    for lang_name in crate::lang::supported_languages() {
155        if let Some(lang_info) = get_language_info(lang_name) {
156            match build_compiled_queries(&lang_info) {
157                Ok(compiled) => {
158                    cache.insert(*lang_name, compiled);
159                }
160                Err(e) => {
161                    tracing::error!(
162                        "Failed to compile queries for language {}: {}",
163                        lang_name,
164                        e
165                    );
166                }
167            }
168        }
169    }
170
171    cache
172}
173
174/// Lazily initialized cache of compiled queries per language.
175static QUERY_CACHE: LazyLock<HashMap<&'static str, CompiledQueries>> =
176    LazyLock::new(init_query_cache);
177
178/// Get compiled queries for a language from the cache.
179fn get_compiled_queries(language: &str) -> Result<&'static CompiledQueries, ParserError> {
180    QUERY_CACHE
181        .get(language)
182        .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))
183}
184
185thread_local! {
186    static PARSER: RefCell<Parser> = RefCell::new(Parser::new());
187    static QUERY_CURSOR: RefCell<QueryCursor> = RefCell::new(QueryCursor::new());
188}
189
190/// Canonical API for extracting element counts from source code.
191pub struct ElementExtractor;
192
193impl ElementExtractor {
194    /// Extract function and class counts from source code.
195    ///
196    /// # Errors
197    ///
198    /// Returns `ParserError::UnsupportedLanguage` if the language is not recognized.
199    /// Returns `ParserError::ParseError` if the source code cannot be parsed.
200    /// Returns `ParserError::QueryError` if the tree-sitter query fails.
201    #[instrument(skip_all, fields(language))]
202    pub fn extract_with_depth(source: &str, language: &str) -> Result<(usize, usize), ParserError> {
203        let lang_info = get_language_info(language)
204            .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))?;
205
206        let tree = PARSER.with(|p| {
207            let mut parser = p.borrow_mut();
208            parser
209                .set_language(&lang_info.language)
210                .map_err(|e| ParserError::ParseError(format!("Failed to set language: {e}")))?;
211            parser
212                .parse(source, None)
213                .ok_or_else(|| ParserError::ParseError("Failed to parse".to_string()))
214        })?;
215
216        let compiled = get_compiled_queries(language)?;
217
218        let (function_count, class_count) = QUERY_CURSOR.with(|c| {
219            let mut cursor = c.borrow_mut();
220            cursor.set_max_start_depth(None);
221            let mut function_count = 0;
222            let mut class_count = 0;
223
224            let mut matches =
225                cursor.matches(&compiled.element, tree.root_node(), source.as_bytes());
226            while let Some(mat) = matches.next() {
227                for capture in mat.captures {
228                    let capture_name = compiled.element.capture_names()[capture.index as usize];
229                    match capture_name {
230                        "function" => function_count += 1,
231                        "class" => class_count += 1,
232                        _ => {}
233                    }
234                }
235            }
236            (function_count, class_count)
237        });
238
239        tracing::debug!(language = %language, functions = function_count, classes = class_count, "parse complete");
240
241        Ok((function_count, class_count))
242    }
243}
244
245/// Recursively extract `ImportInfo` entries from a use-clause node, respecting all Rust
246/// use-declaration forms (`scoped_identifier`, `scoped_use_list`, `use_list`,
247/// `use_as_clause`, `use_wildcard`, bare `identifier`).
248#[allow(clippy::too_many_lines)] // exhaustive match over all supported Rust use-clause forms; splitting harms readability
249fn extract_imports_from_node(
250    node: &Node,
251    source: &str,
252    prefix: &str,
253    line: usize,
254    imports: &mut Vec<ImportInfo>,
255) {
256    match node.kind() {
257        // Simple identifier: `use foo;` or an item inside `{foo, bar}`
258        "identifier" | "self" | "super" | "crate" => {
259            let name = source[node.start_byte()..node.end_byte()].to_string();
260            imports.push(ImportInfo {
261                module: prefix.to_string(),
262                items: vec![name],
263                line,
264            });
265        }
266        // Qualified path: `std::collections::HashMap`
267        "scoped_identifier" => {
268            let item = node
269                .child_by_field_name("name")
270                .map(|n| source[n.start_byte()..n.end_byte()].to_string())
271                .unwrap_or_default();
272            let module = node.child_by_field_name("path").map_or_else(
273                || prefix.to_string(),
274                |p| {
275                    let path_text = source[p.start_byte()..p.end_byte()].to_string();
276                    if prefix.is_empty() {
277                        path_text
278                    } else {
279                        format!("{prefix}::{path_text}")
280                    }
281                },
282            );
283            if !item.is_empty() {
284                imports.push(ImportInfo {
285                    module,
286                    items: vec![item],
287                    line,
288                });
289            }
290        }
291        // `std::{io, fs}` — path prefix followed by a brace list
292        "scoped_use_list" => {
293            let new_prefix = node.child_by_field_name("path").map_or_else(
294                || prefix.to_string(),
295                |p| {
296                    let path_text = source[p.start_byte()..p.end_byte()].to_string();
297                    if prefix.is_empty() {
298                        path_text
299                    } else {
300                        format!("{prefix}::{path_text}")
301                    }
302                },
303            );
304            if let Some(list) = node.child_by_field_name("list") {
305                extract_imports_from_node(&list, source, &new_prefix, line, imports);
306            }
307        }
308        // `{HashMap, HashSet}` — brace-enclosed list of items
309        "use_list" => {
310            let mut cursor = node.walk();
311            for child in node.children(&mut cursor) {
312                match child.kind() {
313                    "{" | "}" | "," => {}
314                    _ => extract_imports_from_node(&child, source, prefix, line, imports),
315                }
316            }
317        }
318        // `std::io::*` — glob import
319        "use_wildcard" => {
320            let text = source[node.start_byte()..node.end_byte()].to_string();
321            let module = if let Some(stripped) = text.strip_suffix("::*") {
322                if prefix.is_empty() {
323                    stripped.to_string()
324                } else {
325                    format!("{prefix}::{stripped}")
326                }
327            } else {
328                prefix.to_string()
329            };
330            imports.push(ImportInfo {
331                module,
332                items: vec!["*".to_string()],
333                line,
334            });
335        }
336        // `io as stdio` or `std::io as stdio`
337        "use_as_clause" => {
338            let alias = node
339                .child_by_field_name("alias")
340                .map(|n| source[n.start_byte()..n.end_byte()].to_string())
341                .unwrap_or_default();
342            let module = if let Some(path_node) = node.child_by_field_name("path") {
343                match path_node.kind() {
344                    "scoped_identifier" => path_node.child_by_field_name("path").map_or_else(
345                        || prefix.to_string(),
346                        |p| {
347                            let p_text = source[p.start_byte()..p.end_byte()].to_string();
348                            if prefix.is_empty() {
349                                p_text
350                            } else {
351                                format!("{prefix}::{p_text}")
352                            }
353                        },
354                    ),
355                    _ => prefix.to_string(),
356                }
357            } else {
358                prefix.to_string()
359            };
360            if !alias.is_empty() {
361                imports.push(ImportInfo {
362                    module,
363                    items: vec![alias],
364                    line,
365                });
366            }
367        }
368        // Python import_from_statement: `from module import name` or `from . import *`
369        "import_from_statement" => {
370            extract_python_import_from(node, source, line, imports);
371        }
372        // Fallback for non-Rust import nodes: capture full text as module
373        _ => {
374            let text = source[node.start_byte()..node.end_byte()]
375                .trim()
376                .to_string();
377            if !text.is_empty() {
378                imports.push(ImportInfo {
379                    module: text,
380                    items: vec![],
381                    line,
382                });
383            }
384        }
385    }
386}
387
388/// Extract an item name from a `dotted_name` or `aliased_import` child node.
389fn extract_import_item_name(child: &Node, source: &str) -> Option<String> {
390    match child.kind() {
391        "dotted_name" => {
392            let name = source[child.start_byte()..child.end_byte()]
393                .trim()
394                .to_string();
395            if name.is_empty() { None } else { Some(name) }
396        }
397        "aliased_import" => child.child_by_field_name("name").and_then(|n| {
398            let name = source[n.start_byte()..n.end_byte()].trim().to_string();
399            if name.is_empty() { None } else { Some(name) }
400        }),
401        _ => None,
402    }
403}
404
405/// Collect wildcard/named imports from an `import_list` node or from direct named children.
406fn collect_import_items(
407    node: &Node,
408    source: &str,
409    is_wildcard: &mut bool,
410    items: &mut Vec<String>,
411) {
412    // Prefer import_list child (wraps `from x import a, b`)
413    if let Some(import_list) = node.child_by_field_name("import_list") {
414        let mut cursor = import_list.walk();
415        for child in import_list.named_children(&mut cursor) {
416            if child.kind() == "wildcard_import" {
417                *is_wildcard = true;
418            } else if let Some(name) = extract_import_item_name(&child, source) {
419                items.push(name);
420            }
421        }
422        return;
423    }
424    // No import_list: single-name or wildcard as direct child (skip first named child = module_name)
425    let mut cursor = node.walk();
426    let mut first = true;
427    for child in node.named_children(&mut cursor) {
428        if first {
429            first = false;
430            continue;
431        }
432        if child.kind() == "wildcard_import" {
433            *is_wildcard = true;
434        } else if let Some(name) = extract_import_item_name(&child, source) {
435            items.push(name);
436        }
437    }
438}
439
440/// Handle Python `import_from_statement` node.
441fn extract_python_import_from(
442    node: &Node,
443    source: &str,
444    line: usize,
445    imports: &mut Vec<ImportInfo>,
446) {
447    let module = if let Some(m) = node.child_by_field_name("module_name") {
448        source[m.start_byte()..m.end_byte()].trim().to_string()
449    } else if let Some(r) = node.child_by_field_name("relative_import") {
450        source[r.start_byte()..r.end_byte()].trim().to_string()
451    } else {
452        String::new()
453    };
454
455    let mut is_wildcard = false;
456    let mut items = Vec::new();
457    collect_import_items(node, source, &mut is_wildcard, &mut items);
458
459    if !module.is_empty() {
460        imports.push(ImportInfo {
461            module,
462            items: if is_wildcard {
463                vec!["*".to_string()]
464            } else {
465                items
466            },
467            line,
468        });
469    }
470}
471
472pub struct SemanticExtractor;
473
474impl SemanticExtractor {
475    /// Extract semantic information from source code.
476    ///
477    /// # Errors
478    ///
479    /// Returns `ParserError::UnsupportedLanguage` if the language is not recognized.
480    /// Returns `ParserError::ParseError` if the source code cannot be parsed.
481    /// Returns `ParserError::QueryError` if the tree-sitter query fails.
482    #[instrument(skip_all, fields(language))]
483    pub fn extract(
484        source: &str,
485        language: &str,
486        ast_recursion_limit: Option<usize>,
487    ) -> Result<SemanticAnalysis, ParserError> {
488        let lang_info = get_language_info(language)
489            .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))?;
490
491        let tree = PARSER.with(|p| {
492            let mut parser = p.borrow_mut();
493            parser
494                .set_language(&lang_info.language)
495                .map_err(|e| ParserError::ParseError(format!("Failed to set language: {e}")))?;
496            parser
497                .parse(source, None)
498                .ok_or_else(|| ParserError::ParseError("Failed to parse".to_string()))
499        })?;
500
501        // 0 is not a useful depth (visits root node only, returning zero results).
502        // Treat 0 as None (unlimited). See #339.
503        let max_depth: Option<u32> = ast_recursion_limit
504            .filter(|&limit| limit > 0)
505            .map(|limit| {
506                u32::try_from(limit).map_err(|_| {
507                    ParserError::ParseError(format!(
508                        "ast_recursion_limit {} exceeds maximum supported value {}",
509                        limit,
510                        u32::MAX
511                    ))
512                })
513            })
514            .transpose()?;
515
516        let compiled = get_compiled_queries(language)?;
517        let root = tree.root_node();
518
519        let mut functions = Vec::new();
520        let mut classes = Vec::new();
521        let mut imports = Vec::new();
522        let mut references = Vec::new();
523        let mut call_frequency = HashMap::new();
524        let mut calls = Vec::new();
525
526        Self::extract_elements(
527            source,
528            compiled,
529            root,
530            max_depth,
531            &lang_info,
532            &mut functions,
533            &mut classes,
534        );
535        Self::extract_calls(
536            source,
537            compiled,
538            root,
539            max_depth,
540            &mut calls,
541            &mut call_frequency,
542        );
543        Self::extract_imports(source, compiled, root, max_depth, &mut imports);
544        Self::extract_impl_methods(source, compiled, root, max_depth, &mut classes);
545        Self::extract_references(source, compiled, root, max_depth, &mut references);
546
547        // Extract impl-trait blocks for Rust files (empty for other languages)
548        let impl_traits = if language == "rust" {
549            Self::extract_impl_traits_from_tree(source, compiled, root)
550        } else {
551            vec![]
552        };
553
554        tracing::debug!(language = %language, functions = functions.len(), classes = classes.len(), imports = imports.len(), references = references.len(), calls = calls.len(), impl_traits = impl_traits.len(), "extraction complete");
555
556        Ok(SemanticAnalysis {
557            functions,
558            classes,
559            imports,
560            references,
561            call_frequency,
562            calls,
563            impl_traits,
564            def_use_sites: Vec::new(),
565        })
566    }
567
568    fn extract_elements(
569        source: &str,
570        compiled: &CompiledQueries,
571        root: Node<'_>,
572        max_depth: Option<u32>,
573        lang_info: &crate::languages::LanguageInfo,
574        functions: &mut Vec<FunctionInfo>,
575        classes: &mut Vec<ClassInfo>,
576    ) {
577        let mut seen_functions = std::collections::HashSet::new();
578
579        QUERY_CURSOR.with(|c| {
580            let mut cursor = c.borrow_mut();
581            cursor.set_max_start_depth(None);
582            if let Some(depth) = max_depth {
583                cursor.set_max_start_depth(Some(depth));
584            }
585            let mut matches = cursor.matches(&compiled.element, root, source.as_bytes());
586
587            while let Some(mat) = matches.next() {
588                let mut func_node: Option<Node> = None;
589                let mut func_name_text: Option<String> = None;
590                let mut class_node: Option<Node> = None;
591                let mut class_name_text: Option<String> = None;
592
593                for capture in mat.captures {
594                    let capture_name = compiled.element.capture_names()[capture.index as usize];
595                    let node = capture.node;
596                    match capture_name {
597                        "function" => func_node = Some(node),
598                        "func_name" | "method_name" => {
599                            func_name_text =
600                                Some(source[node.start_byte()..node.end_byte()].to_string());
601                        }
602                        "class" => class_node = Some(node),
603                        "class_name" | "type_name" => {
604                            class_name_text =
605                                Some(source[node.start_byte()..node.end_byte()].to_string());
606                        }
607                        _ => {}
608                    }
609                }
610
611                if let Some(func_node) = func_node {
612                    // When a plain function_definition is nested inside a template_declaration
613                    // or decorated_definition, it is also matched by the explicit wrapper pattern.
614                    // Skip it here to avoid duplicates; the wrapper match will emit it.
615                    let parent_kind = func_node.parent().map(|p| p.kind());
616                    let parent_is_wrapper = parent_kind
617                        .map(|k| k == "template_declaration" || k == "decorated_definition")
618                        .unwrap_or(false);
619                    if func_node.kind() == "function_definition" && parent_is_wrapper {
620                        // Handled by the template_declaration or decorated_definition @function match instead.
621                    } else {
622                        // Resolve template_declaration or decorated_definition to inner function_definition
623                        // for declarator/field walks. The captured node may be a wrapper.
624                        let func_def = if func_node.kind() == "template_declaration" {
625                            let mut cursor = func_node.walk();
626                            func_node
627                                .children(&mut cursor)
628                                .find(|n| n.kind() == "function_definition")
629                                .unwrap_or(func_node)
630                        } else if func_node.kind() == "decorated_definition" {
631                            func_node
632                                .child_by_field_name("definition")
633                                .unwrap_or(func_node)
634                        } else {
635                            func_node
636                        };
637
638                        let name = func_name_text
639                            .or_else(|| {
640                                func_def
641                                    .child_by_field_name("name")
642                                    .map(|n| source[n.start_byte()..n.end_byte()].to_string())
643                            })
644                            .unwrap_or_default();
645
646                        let func_key = (name.clone(), func_node.start_position().row);
647                        if !name.is_empty() && seen_functions.insert(func_key) {
648                            // For C/C++: parameters live under declarator -> parameters.
649                            // For other languages: parameters is a direct child field.
650                            let params = func_def
651                                .child_by_field_name("declarator")
652                                .and_then(|d| d.child_by_field_name("parameters"))
653                                .or_else(|| func_def.child_by_field_name("parameters"))
654                                .map(|p| source[p.start_byte()..p.end_byte()].to_string())
655                                .unwrap_or_default();
656
657                            // Try "type" first (C/C++ uses this field for the return type);
658                            // fall back to "return_type" (Rust, Python, TypeScript, etc.).
659                            let return_type = func_def
660                                .child_by_field_name("type")
661                                .or_else(|| func_def.child_by_field_name("return_type"))
662                                .map(|r| source[r.start_byte()..r.end_byte()].to_string());
663
664                            // Walk backward through contiguous attribute_item siblings
665                            // to find the first attribute line (Rust only).
666                            let first_line = if func_node.kind() == "function_item" {
667                                let mut attrs: Vec<Node> = Vec::new();
668                                let mut sib = func_node.prev_named_sibling();
669                                while let Some(s) = sib {
670                                    if s.kind() == "attribute_item" {
671                                        attrs.push(s);
672                                        sib = s.prev_named_sibling();
673                                    } else {
674                                        break;
675                                    }
676                                }
677                                attrs
678                                    .last()
679                                    .map(|n| n.start_position().row + 1)
680                                    .unwrap_or_else(|| func_node.start_position().row + 1)
681                            } else {
682                                func_node.start_position().row + 1
683                            };
684
685                            functions.push(FunctionInfo {
686                                name,
687                                line: first_line,
688                                end_line: func_node.end_position().row + 1,
689                                parameters: if params.is_empty() {
690                                    Vec::new()
691                                } else {
692                                    vec![params]
693                                },
694                                return_type,
695                            });
696                        }
697                    }
698                }
699
700                if let Some(class_node) = class_node {
701                    let name = class_name_text
702                        .or_else(|| {
703                            class_node
704                                .child_by_field_name("name")
705                                .map(|n| source[n.start_byte()..n.end_byte()].to_string())
706                        })
707                        .unwrap_or_default();
708
709                    if !name.is_empty() {
710                        let inherits = if let Some(handler) = lang_info.extract_inheritance {
711                            handler(&class_node, source)
712                        } else {
713                            Vec::new()
714                        };
715                        classes.push(ClassInfo {
716                            name,
717                            line: class_node.start_position().row + 1,
718                            end_line: class_node.end_position().row + 1,
719                            methods: Vec::new(),
720                            fields: Vec::new(),
721                            inherits,
722                        });
723                    }
724                }
725            }
726        });
727    }
728
729    /// Returns the name of the enclosing function/method/subroutine for a given AST node,
730    /// by walking ancestors and matching all language-specific function container kinds.
731    fn enclosing_function_name(mut node: tree_sitter::Node<'_>, source: &str) -> Option<String> {
732        let mut depth = 0u32;
733        while let Some(parent) = node.parent() {
734            depth += 1;
735            // Cap at 64 hops: real function nesting rarely exceeds ~10 levels; 64 is a generous
736            // upper bound that guards against pathological/malformed ASTs without false negatives
737            // on legitimate code. Returns None (treated as <module>) when the cap is hit.
738            if depth > 64 {
739                return None;
740            }
741            let name_node = match parent.kind() {
742                // Direct name field: Rust, Python, Go, Java, TypeScript/TSX
743                "function_item"
744                | "method_item"
745                | "function_definition"
746                | "function_declaration"
747                | "method_declaration"
748                | "method_definition" => parent.child_by_field_name("name"),
749                // Fortran subroutine: name is inside subroutine_statement child
750                "subroutine" => {
751                    let mut cursor = parent.walk();
752                    parent
753                        .children(&mut cursor)
754                        .find(|c| c.kind() == "subroutine_statement")
755                        .and_then(|s| s.child_by_field_name("name"))
756                }
757                // Fortran function: name is inside function_statement child
758                "function" => {
759                    let mut cursor = parent.walk();
760                    parent
761                        .children(&mut cursor)
762                        .find(|c| c.kind() == "function_statement")
763                        .and_then(|s| s.child_by_field_name("name"))
764                }
765                _ => {
766                    node = parent;
767                    continue;
768                }
769            };
770            return name_node.map(|n| source[n.start_byte()..n.end_byte()].to_string());
771        }
772        // The loop exits here only when no parent was found (i.e., we reached the tree root
773        // without finding a function container). If the depth cap fired, we returned None early
774        // above. Nothing to assert here.
775        None
776    }
777
778    fn extract_calls(
779        source: &str,
780        compiled: &CompiledQueries,
781        root: Node<'_>,
782        max_depth: Option<u32>,
783        calls: &mut Vec<CallInfo>,
784        call_frequency: &mut HashMap<String, usize>,
785    ) {
786        QUERY_CURSOR.with(|c| {
787            let mut cursor = c.borrow_mut();
788            cursor.set_max_start_depth(None);
789            if let Some(depth) = max_depth {
790                cursor.set_max_start_depth(Some(depth));
791            }
792            let mut matches = cursor.matches(&compiled.call, root, source.as_bytes());
793
794            while let Some(mat) = matches.next() {
795                for capture in mat.captures {
796                    let capture_name = compiled.call.capture_names()[capture.index as usize];
797                    if capture_name != "call" {
798                        continue;
799                    }
800                    let node = capture.node;
801                    let call_name = source[node.start_byte()..node.end_byte()].to_string();
802                    *call_frequency.entry(call_name.clone()).or_insert(0) += 1;
803
804                    let caller = Self::enclosing_function_name(node, source)
805                        .unwrap_or_else(|| "<module>".to_string());
806
807                    let mut arg_count = None;
808                    let mut arg_node = node;
809                    let mut hop = 0u32;
810                    let mut cap_hit = false;
811                    while let Some(parent) = arg_node.parent() {
812                        hop += 1;
813                        // Bounded parent traversal: cap at 16 hops to guard against pathological
814                        // walks on malformed/degenerate trees. Real call-expression nesting is
815                        // shallow (typically 1-3 levels). When the cap is hit we stop searching and
816                        // leave arg_count as None; the caller is still recorded, just without
817                        // argument-count information.
818                        if hop > 16 {
819                            cap_hit = true;
820                            break;
821                        }
822                        if parent.kind() == "call_expression" {
823                            if let Some(args) = parent.child_by_field_name("arguments") {
824                                arg_count = Some(args.named_child_count());
825                            }
826                            break;
827                        }
828                        arg_node = parent;
829                    }
830                    debug_assert!(
831                        !cap_hit,
832                        "extract_calls: parent traversal cap reached (hop > 16)"
833                    );
834
835                    calls.push(CallInfo {
836                        caller,
837                        callee: call_name,
838                        line: node.start_position().row + 1,
839                        column: node.start_position().column,
840                        arg_count,
841                    });
842                }
843            }
844        });
845    }
846
847    fn extract_imports(
848        source: &str,
849        compiled: &CompiledQueries,
850        root: Node<'_>,
851        max_depth: Option<u32>,
852        imports: &mut Vec<ImportInfo>,
853    ) {
854        let Some(ref import_query) = compiled.import else {
855            return;
856        };
857        QUERY_CURSOR.with(|c| {
858            let mut cursor = c.borrow_mut();
859            cursor.set_max_start_depth(None);
860            if let Some(depth) = max_depth {
861                cursor.set_max_start_depth(Some(depth));
862            }
863            let mut matches = cursor.matches(import_query, root, source.as_bytes());
864
865            while let Some(mat) = matches.next() {
866                for capture in mat.captures {
867                    let capture_name = import_query.capture_names()[capture.index as usize];
868                    if capture_name == "import_path" {
869                        let node = capture.node;
870                        let line = node.start_position().row + 1;
871                        extract_imports_from_node(&node, source, "", line, imports);
872                    }
873                }
874            }
875        });
876    }
877
878    fn extract_impl_methods(
879        source: &str,
880        compiled: &CompiledQueries,
881        root: Node<'_>,
882        max_depth: Option<u32>,
883        classes: &mut [ClassInfo],
884    ) {
885        let Some(ref impl_query) = compiled.impl_block else {
886            return;
887        };
888        QUERY_CURSOR.with(|c| {
889            let mut cursor = c.borrow_mut();
890            cursor.set_max_start_depth(None);
891            if let Some(depth) = max_depth {
892                cursor.set_max_start_depth(Some(depth));
893            }
894            let mut matches = cursor.matches(impl_query, root, source.as_bytes());
895
896            while let Some(mat) = matches.next() {
897                let mut impl_type_name = String::new();
898                let mut method_name = String::new();
899                let mut method_line = 0usize;
900                let mut method_end_line = 0usize;
901                let mut method_params = String::new();
902                let mut method_return_type: Option<String> = None;
903
904                for capture in mat.captures {
905                    let capture_name = impl_query.capture_names()[capture.index as usize];
906                    let node = capture.node;
907                    match capture_name {
908                        "impl_type" => {
909                            impl_type_name = source[node.start_byte()..node.end_byte()].to_string();
910                        }
911                        "method_name" => {
912                            method_name = source[node.start_byte()..node.end_byte()].to_string();
913                        }
914                        "method_params" => {
915                            method_params = source[node.start_byte()..node.end_byte()].to_string();
916                        }
917                        "method" => {
918                            let mut method_attrs: Vec<Node> = Vec::new();
919                            let mut msib = node.prev_named_sibling();
920                            while let Some(s) = msib {
921                                if s.kind() == "attribute_item" {
922                                    method_attrs.push(s);
923                                    msib = s.prev_named_sibling();
924                                } else {
925                                    break;
926                                }
927                            }
928                            method_line = method_attrs
929                                .last()
930                                .map(|n| n.start_position().row + 1)
931                                .unwrap_or_else(|| node.start_position().row + 1);
932                            method_end_line = node.end_position().row + 1;
933                            method_return_type = node
934                                .child_by_field_name("return_type")
935                                .map(|r| source[r.start_byte()..r.end_byte()].to_string());
936                        }
937                        _ => {}
938                    }
939                }
940
941                if !impl_type_name.is_empty() && !method_name.is_empty() {
942                    let func = FunctionInfo {
943                        name: method_name,
944                        line: method_line,
945                        end_line: method_end_line,
946                        parameters: if method_params.is_empty() {
947                            Vec::new()
948                        } else {
949                            vec![method_params]
950                        },
951                        return_type: method_return_type,
952                    };
953                    if let Some(class) = classes.iter_mut().find(|c| c.name == impl_type_name) {
954                        class.methods.push(func);
955                    }
956                }
957            }
958        });
959    }
960
961    fn extract_references(
962        source: &str,
963        compiled: &CompiledQueries,
964        root: Node<'_>,
965        max_depth: Option<u32>,
966        references: &mut Vec<ReferenceInfo>,
967    ) {
968        let Some(ref ref_query) = compiled.reference else {
969            return;
970        };
971        let mut seen_refs = std::collections::HashSet::new();
972        QUERY_CURSOR.with(|c| {
973            let mut cursor = c.borrow_mut();
974            cursor.set_max_start_depth(None);
975            if let Some(depth) = max_depth {
976                cursor.set_max_start_depth(Some(depth));
977            }
978            let mut matches = cursor.matches(ref_query, root, source.as_bytes());
979
980            while let Some(mat) = matches.next() {
981                for capture in mat.captures {
982                    let capture_name = ref_query.capture_names()[capture.index as usize];
983                    if capture_name == "type_ref" {
984                        let node = capture.node;
985                        let type_ref = source[node.start_byte()..node.end_byte()].to_string();
986                        if seen_refs.insert(type_ref.clone()) {
987                            references.push(ReferenceInfo {
988                                symbol: type_ref,
989                                reference_type: ReferenceType::Usage,
990                                // location is intentionally empty here; set by the caller (analyze_file)
991                                location: String::new(),
992                                line: node.start_position().row + 1,
993                            });
994                        }
995                    }
996                }
997            }
998        });
999    }
1000
1001    /// Extract impl-trait blocks from an already-parsed tree.
1002    ///
1003    /// Called during `extract()` for Rust files to avoid a second parse.
1004    /// Returns an empty vec if the query is not available.
1005    fn extract_impl_traits_from_tree(
1006        source: &str,
1007        compiled: &CompiledQueries,
1008        root: Node<'_>,
1009    ) -> Vec<ImplTraitInfo> {
1010        let Some(query) = &compiled.impl_trait else {
1011            return vec![];
1012        };
1013
1014        let mut results = Vec::new();
1015        QUERY_CURSOR.with(|c| {
1016            let mut cursor = c.borrow_mut();
1017            cursor.set_max_start_depth(None);
1018            let mut matches = cursor.matches(query, root, source.as_bytes());
1019
1020            while let Some(mat) = matches.next() {
1021                let mut trait_name = String::new();
1022                let mut impl_type = String::new();
1023                let mut line = 0usize;
1024
1025                for capture in mat.captures {
1026                    let capture_name = query.capture_names()[capture.index as usize];
1027                    let node = capture.node;
1028                    let text = source[node.start_byte()..node.end_byte()].to_string();
1029                    match capture_name {
1030                        "trait_name" => {
1031                            trait_name = text;
1032                            line = node.start_position().row + 1;
1033                        }
1034                        "impl_type" => {
1035                            impl_type = text;
1036                        }
1037                        _ => {}
1038                    }
1039                }
1040
1041                if !trait_name.is_empty() && !impl_type.is_empty() {
1042                    results.push(ImplTraitInfo {
1043                        trait_name,
1044                        impl_type,
1045                        path: PathBuf::new(), // Path will be set by caller
1046                        line,
1047                    });
1048                }
1049            }
1050        });
1051
1052        results
1053    }
1054
1055    /// Extract def-use sites (write/read locations) for a given symbol within a file.
1056    ///
1057    /// Runs the defuse query to find all definition and use sites of a symbol.
1058    /// Returns empty vec if no defuse query is available for this language.
1059    ///
1060    /// # Arguments
1061    ///
1062    /// * `source` - The source code text
1063    /// * `compiled` - Compiled tree-sitter queries
1064    /// * `root` - Root node of the AST
1065    /// * `symbol_name` - The symbol to search for (must match exactly)
1066    /// * `file_path` - Relative file path for site reporting
1067    fn extract_def_use(
1068        source: &str,
1069        compiled: &CompiledQueries,
1070        root: Node<'_>,
1071        symbol_name: &str,
1072        file_path: &str,
1073        max_depth: Option<u32>,
1074    ) -> Vec<crate::types::DefUseSite> {
1075        let Some(ref defuse_query) = compiled.defuse else {
1076            return vec![];
1077        };
1078
1079        let mut sites = Vec::new();
1080        let source_lines: Vec<&str> = source.lines().collect();
1081        // Track byte offsets that already have a write or writeread capture so
1082        // duplicate read captures for the same identifier are suppressed.
1083        let mut write_offsets = std::collections::HashSet::new();
1084
1085        QUERY_CURSOR.with(|c| {
1086            let mut cursor = c.borrow_mut();
1087            cursor.set_max_start_depth(None);
1088            if let Some(depth) = max_depth {
1089                cursor.set_max_start_depth(Some(depth));
1090            }
1091            let mut matches = cursor.matches(defuse_query, root, source.as_bytes());
1092
1093            while let Some(mat) = matches.next() {
1094                for capture in mat.captures {
1095                    let capture_name = defuse_query.capture_names()[capture.index as usize];
1096                    let node = capture.node;
1097                    let node_text = node.utf8_text(source.as_bytes()).unwrap_or_default();
1098
1099                    // Only collect if the captured node matches the target symbol
1100                    if node_text != symbol_name {
1101                        continue;
1102                    }
1103
1104                    // Classify capture by prefix
1105                    let kind = if capture_name.starts_with("write.") {
1106                        crate::types::DefUseKind::Write
1107                    } else if capture_name.starts_with("read.") {
1108                        crate::types::DefUseKind::Read
1109                    } else if capture_name.starts_with("writeread.") {
1110                        crate::types::DefUseKind::WriteRead
1111                    } else {
1112                        continue;
1113                    };
1114
1115                    let byte_offset = node.start_byte();
1116
1117                    // De-duplicate: skip read captures for offsets already captured as write/writeread
1118                    if kind == crate::types::DefUseKind::Read
1119                        && write_offsets.contains(&byte_offset)
1120                    {
1121                        continue;
1122                    }
1123                    if kind != crate::types::DefUseKind::Read {
1124                        write_offsets.insert(byte_offset);
1125                    }
1126
1127                    // Get line number (1-indexed) and center-line snippet.
1128                    // Always produce a 3-line window so snippet_one_line (index 1) is safe.
1129                    let line = node.start_position().row + 1;
1130                    let snippet = {
1131                        let row = node.start_position().row;
1132                        let last_line = source_lines.len().saturating_sub(1);
1133                        let prev = if row > 0 { row - 1 } else { 0 };
1134                        let next = std::cmp::min(row + 1, last_line);
1135                        let prev_text = if row == 0 {
1136                            ""
1137                        } else {
1138                            source_lines[prev].trim_end()
1139                        };
1140                        let cur_text = source_lines[row].trim_end();
1141                        let next_text = if row >= last_line {
1142                            ""
1143                        } else {
1144                            source_lines[next].trim_end()
1145                        };
1146                        format!("{prev_text}\n{cur_text}\n{next_text}")
1147                    };
1148
1149                    // Get enclosing function scope
1150                    let enclosing_scope = Self::enclosing_function_name(node, source);
1151
1152                    let column = node.start_position().column;
1153                    sites.push(crate::types::DefUseSite {
1154                        kind,
1155                        symbol: node_text.to_string(),
1156                        file: file_path.to_string(),
1157                        line,
1158                        column,
1159                        snippet,
1160                        enclosing_scope,
1161                    });
1162                }
1163            }
1164        });
1165
1166        sites
1167    }
1168
1169    /// Parse `source` in `language`, run the defuse query for `symbol`, and return all sites.
1170    /// Returns an empty vec if the language has no defuse query or parsing fails.
1171    pub(crate) fn extract_def_use_for_file(
1172        source: &str,
1173        language: &str,
1174        symbol: &str,
1175        file_path: &str,
1176        ast_recursion_limit: Option<usize>,
1177    ) -> Vec<crate::types::DefUseSite> {
1178        let Some(lang_info) = crate::languages::get_language_info(language) else {
1179            return vec![];
1180        };
1181        let Ok(compiled) = get_compiled_queries(language) else {
1182            return vec![];
1183        };
1184        if compiled.defuse.is_none() {
1185            return vec![];
1186        }
1187
1188        let tree = match PARSER.with(|p| {
1189            let mut parser = p.borrow_mut();
1190            if parser.set_language(&lang_info.language).is_err() {
1191                return None;
1192            }
1193            parser.parse(source, None)
1194        }) {
1195            Some(t) => t,
1196            None => return vec![],
1197        };
1198
1199        let root = tree.root_node();
1200
1201        // Convert ast_recursion_limit the same way extract() does:
1202        // 0 means unlimited (None); positive values become Some(u32).
1203        let max_depth: Option<u32> = ast_recursion_limit
1204            .filter(|&limit| limit > 0)
1205            .and_then(|limit| u32::try_from(limit).ok());
1206
1207        Self::extract_def_use(source, compiled, root, symbol, file_path, max_depth)
1208    }
1209}
1210
1211/// Extract `impl Trait for Type` blocks from Rust source.
1212///
1213/// Runs independently of `extract_references` to avoid shared deduplication state.
1214/// Returns an empty vec for non-Rust source (no error; caller decides).
1215#[must_use]
1216pub fn extract_impl_traits(source: &str, path: &Path) -> Vec<ImplTraitInfo> {
1217    let Some(lang_info) = get_language_info("rust") else {
1218        return vec![];
1219    };
1220
1221    let Ok(compiled) = get_compiled_queries("rust") else {
1222        return vec![];
1223    };
1224
1225    let Some(query) = &compiled.impl_trait else {
1226        return vec![];
1227    };
1228
1229    let Some(tree) = PARSER.with(|p| {
1230        let mut parser = p.borrow_mut();
1231        let _ = parser.set_language(&lang_info.language);
1232        parser.parse(source, None)
1233    }) else {
1234        return vec![];
1235    };
1236
1237    let root = tree.root_node();
1238    let mut results = Vec::new();
1239
1240    QUERY_CURSOR.with(|c| {
1241        let mut cursor = c.borrow_mut();
1242        cursor.set_max_start_depth(None);
1243        let mut matches = cursor.matches(query, root, source.as_bytes());
1244
1245        while let Some(mat) = matches.next() {
1246            let mut trait_name = String::new();
1247            let mut impl_type = String::new();
1248            let mut line = 0usize;
1249
1250            for capture in mat.captures {
1251                let capture_name = query.capture_names()[capture.index as usize];
1252                let node = capture.node;
1253                let text = source[node.start_byte()..node.end_byte()].to_string();
1254                match capture_name {
1255                    "trait_name" => {
1256                        trait_name = text;
1257                        line = node.start_position().row + 1;
1258                    }
1259                    "impl_type" => {
1260                        impl_type = text;
1261                    }
1262                    _ => {}
1263                }
1264            }
1265
1266            if !trait_name.is_empty() && !impl_type.is_empty() {
1267                results.push(ImplTraitInfo {
1268                    trait_name,
1269                    impl_type,
1270                    path: path.to_path_buf(),
1271                    line,
1272                });
1273            }
1274        }
1275    });
1276
1277    results
1278}
1279
1280/// Execute a custom tree-sitter query against source code.
1281///
1282/// This is the internal implementation of the public `execute_query` function.
1283pub fn execute_query_impl(
1284    language: &str,
1285    source: &str,
1286    query_str: &str,
1287) -> Result<Vec<crate::QueryCapture>, ParserError> {
1288    // Get the tree-sitter language from the language name
1289    let ts_language = crate::languages::get_ts_language(language)
1290        .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))?;
1291
1292    let mut parser = Parser::new();
1293    parser
1294        .set_language(&ts_language)
1295        .map_err(|e| ParserError::QueryError(e.to_string()))?;
1296
1297    let tree = parser
1298        .parse(source.as_bytes(), None)
1299        .ok_or_else(|| ParserError::QueryError("failed to parse source".to_string()))?;
1300
1301    let query =
1302        Query::new(&ts_language, query_str).map_err(|e| ParserError::QueryError(e.to_string()))?;
1303
1304    let source_bytes = source.as_bytes();
1305
1306    let mut captures = Vec::new();
1307    QUERY_CURSOR.with(|c| {
1308        let mut cursor = c.borrow_mut();
1309        cursor.set_max_start_depth(None);
1310        let mut matches = cursor.matches(&query, tree.root_node(), source_bytes);
1311        while let Some(m) = matches.next() {
1312            for cap in m.captures {
1313                let node = cap.node;
1314                let capture_name = query.capture_names()[cap.index as usize].to_string();
1315                let text = node.utf8_text(source_bytes).unwrap_or("").to_string();
1316                captures.push(crate::QueryCapture {
1317                    capture_name,
1318                    text,
1319                    start_line: node.start_position().row,
1320                    end_line: node.end_position().row,
1321                    start_byte: node.start_byte(),
1322                    end_byte: node.end_byte(),
1323                });
1324            }
1325        }
1326    });
1327    Ok(captures)
1328}
1329
1330// Language-feature-gated tests (require lang-rust); see also tests_unsupported below
1331#[cfg(all(test, feature = "lang-rust"))]
1332mod tests {
1333    use super::*;
1334    use std::path::Path;
1335
1336    #[test]
1337    fn test_ast_recursion_limit_zero_is_unlimited() {
1338        let source = r#"fn hello() -> u32 { 42 }"#;
1339        let result_none = SemanticExtractor::extract(source, "rust", None);
1340        let result_zero = SemanticExtractor::extract(source, "rust", Some(0));
1341        assert!(result_none.is_ok(), "extract with None failed");
1342        assert!(result_zero.is_ok(), "extract with Some(0) failed");
1343        let analysis_none = result_none.unwrap();
1344        let analysis_zero = result_zero.unwrap();
1345        assert!(
1346            analysis_none.functions.len() >= 1,
1347            "extract with None should find at least one function in the test source"
1348        );
1349        assert_eq!(
1350            analysis_none.functions.len(),
1351            analysis_zero.functions.len(),
1352            "ast_recursion_limit=0 should behave identically to unset (unlimited)"
1353        );
1354    }
1355
1356    #[test]
1357    fn test_rust_use_as_imports() {
1358        // Arrange
1359        let source = "use std::io as stdio;";
1360        // Act
1361        let result = SemanticExtractor::extract(source, "rust", None).unwrap();
1362        // Assert: alias "stdio" is captured as an import item
1363        assert!(
1364            result
1365                .imports
1366                .iter()
1367                .any(|imp| imp.items.iter().any(|i| i == "stdio")),
1368            "expected import alias 'stdio' in {:?}",
1369            result.imports
1370        );
1371    }
1372
1373    #[test]
1374    fn test_rust_use_as_clause_plain_identifier() {
1375        // Arrange: use_as_clause with plain identifier (no scoped_identifier)
1376        // exercises the _ => prefix.to_string() arm
1377        let source = "use io as stdio;";
1378        // Act
1379        let result = SemanticExtractor::extract(source, "rust", None).unwrap();
1380        // Assert: alias "stdio" is captured as an import item
1381        assert!(
1382            result
1383                .imports
1384                .iter()
1385                .any(|imp| imp.items.iter().any(|i| i == "stdio")),
1386            "expected import alias 'stdio' from plain identifier in {:?}",
1387            result.imports
1388        );
1389    }
1390
1391    #[test]
1392    fn test_rust_scoped_use_with_prefix() {
1393        // Arrange: scoped_use_list with non-empty prefix
1394        let source = "use std::{io::Read, io::Write};";
1395        // Act
1396        let result = SemanticExtractor::extract(source, "rust", None).unwrap();
1397        // Assert: both Read and Write appear as items with std::io module
1398        let items: Vec<String> = result
1399            .imports
1400            .iter()
1401            .filter(|imp| imp.module.starts_with("std::io"))
1402            .flat_map(|imp| imp.items.clone())
1403            .collect();
1404        assert!(
1405            items.contains(&"Read".to_string()) && items.contains(&"Write".to_string()),
1406            "expected 'Read' and 'Write' items under module with std::io, got {:?}",
1407            result.imports
1408        );
1409    }
1410
1411    #[test]
1412    fn test_rust_scoped_use_imports() {
1413        // Arrange
1414        let source = "use std::{fs, io};";
1415        // Act
1416        let result = SemanticExtractor::extract(source, "rust", None).unwrap();
1417        // Assert: both "fs" and "io" appear as import items under module "std"
1418        let items: Vec<&str> = result
1419            .imports
1420            .iter()
1421            .filter(|imp| imp.module == "std")
1422            .flat_map(|imp| imp.items.iter().map(|s| s.as_str()))
1423            .collect();
1424        assert!(
1425            items.contains(&"fs") && items.contains(&"io"),
1426            "expected 'fs' and 'io' items under module 'std', got {:?}",
1427            items
1428        );
1429    }
1430
1431    #[test]
1432    fn test_rust_wildcard_imports() {
1433        // Arrange
1434        let source = "use std::io::*;";
1435        // Act
1436        let result = SemanticExtractor::extract(source, "rust", None).unwrap();
1437        // Assert: wildcard import with module "std::io"
1438        let wildcard = result
1439            .imports
1440            .iter()
1441            .find(|imp| imp.module == "std::io" && imp.items == vec!["*"]);
1442        assert!(
1443            wildcard.is_some(),
1444            "expected wildcard import with module 'std::io', got {:?}",
1445            result.imports
1446        );
1447    }
1448
1449    #[test]
1450    fn test_extract_impl_traits_standalone() {
1451        // Arrange: source with a simple impl Trait for Type
1452        let source = r#"
1453struct Foo;
1454trait Display {}
1455impl Display for Foo {}
1456"#;
1457        // Act
1458        let results = extract_impl_traits(source, Path::new("test.rs"));
1459        // Assert
1460        assert_eq!(
1461            results.len(),
1462            1,
1463            "expected one impl trait, got {:?}",
1464            results
1465        );
1466        assert_eq!(results[0].trait_name, "Display");
1467        assert_eq!(results[0].impl_type, "Foo");
1468    }
1469
1470    #[cfg(target_pointer_width = "64")]
1471    #[test]
1472    fn test_ast_recursion_limit_overflow() {
1473        // Arrange: limit larger than u32::MAX triggers a ParseError on 64-bit targets
1474        let source = "fn foo() {}";
1475        let big_limit = usize::try_from(u32::MAX).unwrap() + 1;
1476        // Act
1477        let result = SemanticExtractor::extract(source, "rust", Some(big_limit));
1478        // Assert
1479        assert!(
1480            matches!(result, Err(ParserError::ParseError(_))),
1481            "expected ParseError for oversized limit, got {:?}",
1482            result
1483        );
1484    }
1485
1486    #[test]
1487    fn test_ast_recursion_limit_some() {
1488        // Arrange: ast_recursion_limit with Some(depth) to exercise max_depth Some branch
1489        let source = r#"fn hello() -> u32 { 42 }"#;
1490        // Act
1491        let result = SemanticExtractor::extract(source, "rust", Some(5));
1492        // Assert: should succeed without error and extract functions
1493        assert!(result.is_ok(), "extract with Some(5) failed: {:?}", result);
1494        let analysis = result.unwrap();
1495        assert!(
1496            analysis.functions.len() >= 1,
1497            "expected at least one function with depth limit 5"
1498        );
1499    }
1500
1501    #[test]
1502    fn test_extract_def_use_for_file_finds_write_and_read() {
1503        // Arrange
1504        let source = r#"
1505fn main() {
1506    let count = 0;
1507    println!("{}", count);
1508}
1509"#;
1510        // Act
1511        let sites = SemanticExtractor::extract_def_use_for_file(
1512            source,
1513            "rust",
1514            "count",
1515            "src/main.rs",
1516            None,
1517        );
1518
1519        // Assert
1520        assert!(
1521            !sites.is_empty(),
1522            "expected at least one def-use site for 'count'"
1523        );
1524        let has_write = sites
1525            .iter()
1526            .any(|s| s.kind == crate::types::DefUseKind::Write);
1527        let has_read = sites
1528            .iter()
1529            .any(|s| s.kind == crate::types::DefUseKind::Read);
1530        assert!(has_write, "expected a write site for 'count'");
1531        assert!(has_read, "expected a read site for 'count'");
1532        assert_eq!(sites[0].file, "src/main.rs");
1533    }
1534
1535    #[test]
1536    fn test_extract_def_use_for_file_no_match_returns_empty() {
1537        // Arrange
1538        let source = "fn foo() { let x = 1; }";
1539
1540        // Act
1541        let sites = SemanticExtractor::extract_def_use_for_file(
1542            source,
1543            "rust",
1544            "nonexistent_symbol",
1545            "src/lib.rs",
1546            None,
1547        );
1548
1549        // Assert
1550        assert!(sites.is_empty(), "expected empty for nonexistent symbol");
1551    }
1552}
1553
1554// Language-feature-gated tests for Python
1555#[cfg(all(test, feature = "lang-python"))]
1556mod tests_python {
1557    use super::*;
1558
1559    #[test]
1560    fn test_python_relative_import() {
1561        // Arrange: relative import (from . import foo)
1562        let source = "from . import foo\n";
1563        // Act
1564        let result = SemanticExtractor::extract(source, "python", None).unwrap();
1565        // Assert: relative import should be captured
1566        let relative = result.imports.iter().find(|imp| imp.module.contains("."));
1567        assert!(
1568            relative.is_some(),
1569            "expected relative import in {:?}",
1570            result.imports
1571        );
1572    }
1573
1574    #[test]
1575    fn test_python_aliased_import() {
1576        // Arrange: aliased import (from os import path as p)
1577        // Note: tree-sitter-python extracts "path" (the original name), not the alias "p"
1578        let source = "from os import path as p\n";
1579        // Act
1580        let result = SemanticExtractor::extract(source, "python", None).unwrap();
1581        // Assert: "path" should be in items (alias is captured separately by aliased_import node)
1582        let path_import = result
1583            .imports
1584            .iter()
1585            .find(|imp| imp.module == "os" && imp.items.iter().any(|i| i == "path"));
1586        assert!(
1587            path_import.is_some(),
1588            "expected import 'path' from module 'os' in {:?}",
1589            result.imports
1590        );
1591    }
1592}
1593
1594// Tests that do not require any language feature gate
1595#[cfg(test)]
1596mod tests_unsupported {
1597    use super::*;
1598
1599    #[test]
1600    fn test_element_extractor_unsupported_language() {
1601        // Arrange + Act
1602        let result = ElementExtractor::extract_with_depth("x = 1", "cobol");
1603        // Assert
1604        assert!(
1605            matches!(result, Err(ParserError::UnsupportedLanguage(ref lang)) if lang == "cobol"),
1606            "expected UnsupportedLanguage error, got {:?}",
1607            result
1608        );
1609    }
1610
1611    #[test]
1612    fn test_semantic_extractor_unsupported_language() {
1613        // Arrange + Act
1614        let result = SemanticExtractor::extract("x = 1", "cobol", None);
1615        // Assert
1616        assert!(
1617            matches!(result, Err(ParserError::UnsupportedLanguage(ref lang)) if lang == "cobol"),
1618            "expected UnsupportedLanguage error, got {:?}",
1619            result
1620        );
1621    }
1622}