Skip to main content

aptu_coder_core/
parser.rs

1// SPDX-FileCopyrightText: 2026 aptu-coder contributors
2// SPDX-License-Identifier: Apache-2.0
3//! Tree-sitter-based parser for extracting semantic structure from source code.
4//!
5//! This module provides language-agnostic parsing using tree-sitter queries to extract
6//! functions, classes, imports, references, and other semantic elements from source files.
7//! Two main extractors handle different use cases:
8//!
9//! - [`ElementExtractor`]: Quick extraction of function and class counts.
10//! - [`SemanticExtractor`]: Detailed semantic analysis with calls, imports, and references.
11
12use crate::languages::get_language_info;
13use crate::types::{
14    CallInfo, ClassInfo, FunctionInfo, ImplTraitInfo, ImportInfo, ReferenceInfo, ReferenceType,
15    SemanticAnalysis,
16};
17use std::cell::RefCell;
18use std::collections::HashMap;
19use std::path::{Path, PathBuf};
20use std::sync::LazyLock;
21use thiserror::Error;
22use tracing::instrument;
23use tree_sitter::{Node, Parser, Query, QueryCursor, StreamingIterator};
24
25#[derive(Debug, Error)]
26#[non_exhaustive]
27pub enum ParserError {
28    #[error("Unsupported language: {0}")]
29    UnsupportedLanguage(String),
30    #[error("Failed to parse file: {0}")]
31    ParseError(String),
32    #[error("Invalid UTF-8 in file")]
33    InvalidUtf8,
34    #[error("Query error: {0}")]
35    QueryError(String),
36    #[error("Parse timeout exceeded: {0} microseconds")]
37    Timeout(u64),
38}
39
40/// Groups a query deadline with the configured timeout duration for use in private extract helpers.
41/// Avoids threading two separate values through every helper signature.
42#[derive(Clone, Copy)]
43struct TimeoutConfig {
44    /// Absolute deadline; `None` means no timeout.
45    deadline: Option<std::time::Instant>,
46    /// The configured timeout in microseconds (used in `ParserError::Timeout`).
47    micros: u64,
48}
49
50impl TimeoutConfig {
51    fn new(timeout_micros: Option<u64>) -> Self {
52        let deadline = timeout_micros
53            .map(|us| std::time::Instant::now() + std::time::Duration::from_micros(us));
54        Self {
55            deadline,
56            micros: timeout_micros.unwrap_or(0),
57        }
58    }
59
60    /// Returns `true` if the deadline has been reached.
61    fn is_exceeded(self) -> bool {
62        self.deadline
63            .is_some_and(|d| std::time::Instant::now() >= d)
64    }
65}
66
67/// Compiled tree-sitter queries for a language.
68/// Stores all query types: mandatory (element, call) and optional (import, impl, reference).
69struct CompiledQueries {
70    element: Query,
71    call: Query,
72    import: Option<Query>,
73    impl_block: Option<Query>,
74    reference: Option<Query>,
75    impl_trait: Option<Query>,
76    defuse: Option<Query>,
77}
78
79/// Build compiled queries for a given language.
80///
81/// The `map_err` closures inside are only reachable if a hardcoded query string is
82/// invalid, which cannot happen at runtime -- exclude them from coverage instrumentation.
83#[cfg_attr(coverage_nightly, coverage(off))]
84fn build_compiled_queries(
85    lang_info: &crate::languages::LanguageInfo,
86) -> Result<CompiledQueries, ParserError> {
87    let element = Query::new(&lang_info.language, lang_info.element_query).map_err(|e| {
88        ParserError::QueryError(format!(
89            "Failed to compile element query for {}: {}",
90            lang_info.name, e
91        ))
92    })?;
93
94    let call = Query::new(&lang_info.language, lang_info.call_query).map_err(|e| {
95        ParserError::QueryError(format!(
96            "Failed to compile call query for {}: {}",
97            lang_info.name, e
98        ))
99    })?;
100
101    let import = if let Some(import_query_str) = lang_info.import_query {
102        Some(
103            Query::new(&lang_info.language, import_query_str).map_err(|e| {
104                ParserError::QueryError(format!(
105                    "Failed to compile import query for {}: {}",
106                    lang_info.name, e
107                ))
108            })?,
109        )
110    } else {
111        None
112    };
113
114    let impl_block = if let Some(impl_query_str) = lang_info.impl_query {
115        Some(
116            Query::new(&lang_info.language, impl_query_str).map_err(|e| {
117                ParserError::QueryError(format!(
118                    "Failed to compile impl query for {}: {}",
119                    lang_info.name, e
120                ))
121            })?,
122        )
123    } else {
124        None
125    };
126
127    let reference = if let Some(ref_query_str) = lang_info.reference_query {
128        Some(Query::new(&lang_info.language, ref_query_str).map_err(|e| {
129            ParserError::QueryError(format!(
130                "Failed to compile reference query for {}: {}",
131                lang_info.name, e
132            ))
133        })?)
134    } else {
135        None
136    };
137
138    let impl_trait = if let Some(impl_trait_query_str) = lang_info.impl_trait_query {
139        Some(
140            Query::new(&lang_info.language, impl_trait_query_str).map_err(|e| {
141                ParserError::QueryError(format!(
142                    "Failed to compile impl_trait query for {}: {}",
143                    lang_info.name, e
144                ))
145            })?,
146        )
147    } else {
148        None
149    };
150
151    let defuse = if let Some(defuse_query_str) = lang_info.defuse_query {
152        Some(
153            Query::new(&lang_info.language, defuse_query_str).map_err(|e| {
154                ParserError::QueryError(format!(
155                    "Failed to compile defuse query for {}: {}",
156                    lang_info.name, e
157                ))
158            })?,
159        )
160    } else {
161        None
162    };
163
164    Ok(CompiledQueries {
165        element,
166        call,
167        import,
168        impl_block,
169        reference,
170        impl_trait,
171        defuse,
172    })
173}
174
175/// Initialize the query cache with compiled queries for all supported languages.
176///
177/// Excluded from coverage: the `Err` arm is unreachable because `build_compiled_queries`
178/// only fails on invalid hardcoded query strings.
179#[cfg_attr(coverage_nightly, coverage(off))]
180fn init_query_cache() -> HashMap<&'static str, CompiledQueries> {
181    let mut cache = HashMap::new();
182
183    for lang_name in crate::lang::supported_languages() {
184        if let Some(lang_info) = get_language_info(lang_name) {
185            match build_compiled_queries(&lang_info) {
186                Ok(compiled) => {
187                    cache.insert(*lang_name, compiled);
188                }
189                Err(e) => {
190                    tracing::error!(
191                        "Failed to compile queries for language {}: {}",
192                        lang_name,
193                        e
194                    );
195                }
196            }
197        }
198    }
199
200    cache
201}
202
203/// Lazily initialized cache of compiled queries per language.
204static QUERY_CACHE: LazyLock<HashMap<&'static str, CompiledQueries>> =
205    LazyLock::new(init_query_cache);
206
207/// Get compiled queries for a language from the cache.
208fn get_compiled_queries(language: &str) -> Result<&'static CompiledQueries, ParserError> {
209    QUERY_CACHE
210        .get(language)
211        .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))
212}
213
214thread_local! {
215    static PARSER: RefCell<Parser> = RefCell::new(Parser::new());
216    static QUERY_CURSOR: RefCell<QueryCursor> = RefCell::new(QueryCursor::new());
217}
218
219/// Canonical API for extracting element counts from source code.
220pub struct ElementExtractor;
221
222impl ElementExtractor {
223    /// Extract function and class counts from source code.
224    ///
225    /// # Errors
226    ///
227    /// Returns `ParserError::UnsupportedLanguage` if the language is not recognized.
228    /// Returns `ParserError::ParseError` if the source code cannot be parsed.
229    /// Returns `ParserError::QueryError` if the tree-sitter query fails.
230    #[instrument(skip_all, fields(language))]
231    pub fn extract_with_depth(source: &str, language: &str) -> Result<(usize, usize), ParserError> {
232        let lang_info = get_language_info(language)
233            .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))?;
234
235        let tree = PARSER.with(|p| {
236            let mut parser = p.borrow_mut();
237            parser
238                .set_language(&lang_info.language)
239                .map_err(|e| ParserError::ParseError(format!("Failed to set language: {e}")))?;
240            parser
241                .parse(source, None)
242                .ok_or_else(|| ParserError::ParseError("Failed to parse".to_string()))
243        })?;
244
245        let compiled = get_compiled_queries(language)?;
246
247        let (function_count, class_count) = QUERY_CURSOR.with(|c| {
248            let mut cursor = c.borrow_mut();
249            cursor.set_max_start_depth(None);
250            let mut function_count = 0;
251            let mut class_count = 0;
252
253            let mut matches =
254                cursor.matches(&compiled.element, tree.root_node(), source.as_bytes());
255            while let Some(mat) = matches.next() {
256                for capture in mat.captures {
257                    let capture_name = compiled.element.capture_names()[capture.index as usize];
258                    match capture_name {
259                        "function" => function_count += 1,
260                        "class" => class_count += 1,
261                        _ => {}
262                    }
263                }
264            }
265            (function_count, class_count)
266        });
267
268        tracing::debug!(language = %language, functions = function_count, classes = class_count, "parse complete");
269
270        Ok((function_count, class_count))
271    }
272}
273
274/// Recursively extract `ImportInfo` entries from a use-clause node, respecting all Rust
275/// use-declaration forms (`scoped_identifier`, `scoped_use_list`, `use_list`,
276/// `use_as_clause`, `use_wildcard`, bare `identifier`).
277#[allow(clippy::too_many_lines)] // exhaustive match over all supported Rust use-clause forms; splitting harms readability
278fn extract_imports_from_node(
279    node: &Node,
280    source: &str,
281    prefix: &str,
282    line: usize,
283    imports: &mut Vec<ImportInfo>,
284) {
285    match node.kind() {
286        // Simple identifier: `use foo;` or an item inside `{foo, bar}`
287        "identifier" | "self" | "super" | "crate" => {
288            let name = source[node.start_byte()..node.end_byte()].to_string();
289            imports.push(ImportInfo {
290                module: prefix.to_string(),
291                items: vec![name],
292                line,
293            });
294        }
295        // Qualified path: `std::collections::HashMap`
296        "scoped_identifier" => {
297            let item = node
298                .child_by_field_name("name")
299                .map(|n| source[n.start_byte()..n.end_byte()].to_string())
300                .unwrap_or_default();
301            let module = node.child_by_field_name("path").map_or_else(
302                || prefix.to_string(),
303                |p| {
304                    let path_text = source[p.start_byte()..p.end_byte()].to_string();
305                    if prefix.is_empty() {
306                        path_text
307                    } else {
308                        format!("{prefix}::{path_text}")
309                    }
310                },
311            );
312            if !item.is_empty() {
313                imports.push(ImportInfo {
314                    module,
315                    items: vec![item],
316                    line,
317                });
318            }
319        }
320        // `std::{io, fs}` — path prefix followed by a brace list
321        "scoped_use_list" => {
322            let new_prefix = node.child_by_field_name("path").map_or_else(
323                || prefix.to_string(),
324                |p| {
325                    let path_text = source[p.start_byte()..p.end_byte()].to_string();
326                    if prefix.is_empty() {
327                        path_text
328                    } else {
329                        format!("{prefix}::{path_text}")
330                    }
331                },
332            );
333            if let Some(list) = node.child_by_field_name("list") {
334                extract_imports_from_node(&list, source, &new_prefix, line, imports);
335            }
336        }
337        // `{HashMap, HashSet}` — brace-enclosed list of items
338        "use_list" => {
339            let mut cursor = node.walk();
340            for child in node.children(&mut cursor) {
341                match child.kind() {
342                    "{" | "}" | "," => {}
343                    _ => extract_imports_from_node(&child, source, prefix, line, imports),
344                }
345            }
346        }
347        // `std::io::*` — glob import
348        "use_wildcard" => {
349            let text = source[node.start_byte()..node.end_byte()].to_string();
350            let module = if let Some(stripped) = text.strip_suffix("::*") {
351                if prefix.is_empty() {
352                    stripped.to_string()
353                } else {
354                    format!("{prefix}::{stripped}")
355                }
356            } else {
357                prefix.to_string()
358            };
359            imports.push(ImportInfo {
360                module,
361                items: vec!["*".to_string()],
362                line,
363            });
364        }
365        // `io as stdio` or `std::io as stdio`
366        "use_as_clause" => {
367            let alias = node
368                .child_by_field_name("alias")
369                .map(|n| source[n.start_byte()..n.end_byte()].to_string())
370                .unwrap_or_default();
371            let module = if let Some(path_node) = node.child_by_field_name("path") {
372                match path_node.kind() {
373                    "scoped_identifier" => path_node.child_by_field_name("path").map_or_else(
374                        || prefix.to_string(),
375                        |p| {
376                            let p_text = source[p.start_byte()..p.end_byte()].to_string();
377                            if prefix.is_empty() {
378                                p_text
379                            } else {
380                                format!("{prefix}::{p_text}")
381                            }
382                        },
383                    ),
384                    _ => prefix.to_string(),
385                }
386            } else {
387                prefix.to_string()
388            };
389            if !alias.is_empty() {
390                imports.push(ImportInfo {
391                    module,
392                    items: vec![alias],
393                    line,
394                });
395            }
396        }
397        // Python import_from_statement: `from module import name` or `from . import *`
398        "import_from_statement" => {
399            extract_python_import_from(node, source, line, imports);
400        }
401        // Fallback for non-Rust import nodes: capture full text as module
402        _ => {
403            let text = source[node.start_byte()..node.end_byte()]
404                .trim()
405                .to_string();
406            if !text.is_empty() {
407                imports.push(ImportInfo {
408                    module: text,
409                    items: vec![],
410                    line,
411                });
412            }
413        }
414    }
415}
416
417/// Extract an item name from a `dotted_name` or `aliased_import` child node.
418fn extract_import_item_name(child: &Node, source: &str) -> Option<String> {
419    match child.kind() {
420        "dotted_name" => {
421            let name = source[child.start_byte()..child.end_byte()]
422                .trim()
423                .to_string();
424            if name.is_empty() { None } else { Some(name) }
425        }
426        "aliased_import" => child.child_by_field_name("name").and_then(|n| {
427            let name = source[n.start_byte()..n.end_byte()].trim().to_string();
428            if name.is_empty() { None } else { Some(name) }
429        }),
430        _ => None,
431    }
432}
433
434/// Collect wildcard/named imports from an `import_list` node or from direct named children.
435fn collect_import_items(
436    node: &Node,
437    source: &str,
438    is_wildcard: &mut bool,
439    items: &mut Vec<String>,
440) {
441    // Prefer import_list child (wraps `from x import a, b`)
442    if let Some(import_list) = node.child_by_field_name("import_list") {
443        let mut cursor = import_list.walk();
444        for child in import_list.named_children(&mut cursor) {
445            if child.kind() == "wildcard_import" {
446                *is_wildcard = true;
447            } else if let Some(name) = extract_import_item_name(&child, source) {
448                items.push(name);
449            }
450        }
451        return;
452    }
453    // No import_list: single-name or wildcard as direct child (skip first named child = module_name)
454    let mut cursor = node.walk();
455    let mut first = true;
456    for child in node.named_children(&mut cursor) {
457        if first {
458            first = false;
459            continue;
460        }
461        if child.kind() == "wildcard_import" {
462            *is_wildcard = true;
463        } else if let Some(name) = extract_import_item_name(&child, source) {
464            items.push(name);
465        }
466    }
467}
468
469/// Handle Python `import_from_statement` node.
470fn extract_python_import_from(
471    node: &Node,
472    source: &str,
473    line: usize,
474    imports: &mut Vec<ImportInfo>,
475) {
476    let module = if let Some(m) = node.child_by_field_name("module_name") {
477        source[m.start_byte()..m.end_byte()].trim().to_string()
478    } else if let Some(r) = node.child_by_field_name("relative_import") {
479        source[r.start_byte()..r.end_byte()].trim().to_string()
480    } else {
481        String::new()
482    };
483
484    let mut is_wildcard = false;
485    let mut items = Vec::new();
486    collect_import_items(node, source, &mut is_wildcard, &mut items);
487
488    if !module.is_empty() {
489        imports.push(ImportInfo {
490            module,
491            items: if is_wildcard {
492                vec!["*".to_string()]
493            } else {
494                items
495            },
496            line,
497        });
498    }
499}
500
501pub struct SemanticExtractor;
502
503impl SemanticExtractor {
504    /// Extract semantic information from source code.
505    ///
506    /// # Errors
507    ///
508    /// Returns `ParserError::UnsupportedLanguage` if the language is not recognized.
509    /// Returns `ParserError::ParseError` if the source code cannot be parsed.
510    /// Returns `ParserError::QueryError` if the tree-sitter query fails.
511    #[instrument(skip_all, fields(language))]
512    pub fn extract(
513        source: &str,
514        language: &str,
515        ast_recursion_limit: Option<usize>,
516        timeout_micros: Option<u64>,
517    ) -> Result<SemanticAnalysis, ParserError> {
518        let tc = TimeoutConfig::new(timeout_micros);
519
520        // Check deadline at the start before any parsing work.
521        if tc.is_exceeded() {
522            return Err(ParserError::Timeout(tc.micros));
523        }
524        let lang_info = get_language_info(language)
525            .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))?;
526
527        let tree = PARSER.with(|p| {
528            let mut parser = p.borrow_mut();
529            parser
530                .set_language(&lang_info.language)
531                .map_err(|e| ParserError::ParseError(format!("Failed to set language: {e}")))?;
532            parser
533                .parse(source, None)
534                .ok_or_else(|| ParserError::ParseError("Failed to parse".to_string()))
535        })?;
536
537        // 0 is not a useful depth (visits root node only, returning zero results).
538        // Treat 0 as None (unlimited). See #339.
539        let max_depth: Option<u32> = ast_recursion_limit
540            .filter(|&limit| limit > 0)
541            .map(|limit| {
542                u32::try_from(limit).map_err(|_| {
543                    ParserError::ParseError(format!(
544                        "ast_recursion_limit {} exceeds maximum supported value {}",
545                        limit,
546                        u32::MAX
547                    ))
548                })
549            })
550            .transpose()?;
551
552        let compiled = get_compiled_queries(language)?;
553        let root = tree.root_node();
554
555        let mut functions = Vec::new();
556        let mut classes = Vec::new();
557        let mut imports = Vec::new();
558        let mut references = Vec::new();
559        let mut call_frequency = HashMap::new();
560        let mut calls = Vec::new();
561
562        Self::extract_elements(
563            source,
564            compiled,
565            root,
566            max_depth,
567            &lang_info,
568            &mut functions,
569            &mut classes,
570            tc,
571        )?;
572        Self::extract_calls(
573            source,
574            compiled,
575            root,
576            max_depth,
577            &mut calls,
578            &mut call_frequency,
579            tc,
580        )?;
581        Self::extract_imports(source, compiled, root, max_depth, &mut imports, tc)?;
582        Self::extract_impl_methods(source, compiled, root, max_depth, &mut classes, tc)?;
583        Self::extract_references(source, compiled, root, max_depth, &mut references, tc)?;
584
585        // Extract impl-trait blocks for Rust files (empty for other languages)
586        let impl_traits = if language == "rust" {
587            Self::extract_impl_traits_from_tree(source, compiled, root, tc)?
588        } else {
589            vec![]
590        };
591
592        tracing::debug!(language = %language, functions = functions.len(), classes = classes.len(), imports = imports.len(), references = references.len(), calls = calls.len(), impl_traits = impl_traits.len(), "extraction complete");
593
594        Ok(SemanticAnalysis {
595            functions,
596            classes,
597            imports,
598            references,
599            call_frequency,
600            calls,
601            impl_traits,
602            def_use_sites: Vec::new(),
603        })
604    }
605
606    #[allow(clippy::too_many_arguments)]
607    #[allow(clippy::too_many_arguments)]
608    fn extract_elements(
609        source: &str,
610        compiled: &CompiledQueries,
611        root: Node<'_>,
612        max_depth: Option<u32>,
613        lang_info: &crate::languages::LanguageInfo,
614        functions: &mut Vec<FunctionInfo>,
615        classes: &mut Vec<ClassInfo>,
616        tc: TimeoutConfig,
617    ) -> Result<(), ParserError> {
618        let mut seen_functions = std::collections::HashSet::new();
619        let mut timed_out = false;
620
621        QUERY_CURSOR.with(|c| {
622            let mut cursor = c.borrow_mut();
623            cursor.set_max_start_depth(None);
624            if let Some(depth) = max_depth {
625                cursor.set_max_start_depth(Some(depth));
626            }
627
628            let mut matches = cursor.matches(&compiled.element, root, source.as_bytes());
629
630            while let Some(mat) = matches.next() {
631                // Check if we've hit the deadline
632                if tc.is_exceeded() {
633                    timed_out = true;
634                    break;
635                }
636                let mut func_node: Option<Node> = None;
637                let mut func_name_text: Option<String> = None;
638                let mut class_node: Option<Node> = None;
639                let mut class_name_text: Option<String> = None;
640
641                for capture in mat.captures {
642                    let capture_name = compiled.element.capture_names()[capture.index as usize];
643                    let node = capture.node;
644                    match capture_name {
645                        "function" => func_node = Some(node),
646                        "func_name" | "method_name" => {
647                            func_name_text =
648                                Some(source[node.start_byte()..node.end_byte()].to_string());
649                        }
650                        "class" => class_node = Some(node),
651                        "class_name" | "type_name" => {
652                            class_name_text =
653                                Some(source[node.start_byte()..node.end_byte()].to_string());
654                        }
655                        _ => {}
656                    }
657                }
658
659                if let Some(func_node) = func_node {
660                    // When a plain function_definition is nested inside a template_declaration
661                    // or decorated_definition, it is also matched by the explicit wrapper pattern.
662                    // Skip it here to avoid duplicates; the wrapper match will emit it.
663                    let parent_kind = func_node.parent().map(|p| p.kind());
664                    let parent_is_wrapper = parent_kind
665                        .map(|k| k == "template_declaration" || k == "decorated_definition")
666                        .unwrap_or(false);
667                    if func_node.kind() == "function_definition" && parent_is_wrapper {
668                        // Handled by the template_declaration or decorated_definition @function match instead.
669                    } else {
670                        // Resolve template_declaration or decorated_definition to inner function_definition
671                        // for declarator/field walks. The captured node may be a wrapper.
672                        let func_def = if func_node.kind() == "template_declaration" {
673                            let mut cursor = func_node.walk();
674                            func_node
675                                .children(&mut cursor)
676                                .find(|n| n.kind() == "function_definition")
677                                .unwrap_or(func_node)
678                        } else if func_node.kind() == "decorated_definition" {
679                            func_node
680                                .child_by_field_name("definition")
681                                .unwrap_or(func_node)
682                        } else {
683                            func_node
684                        };
685
686                        let name = func_name_text
687                            .or_else(|| {
688                                func_def
689                                    .child_by_field_name("name")
690                                    .map(|n| source[n.start_byte()..n.end_byte()].to_string())
691                            })
692                            .unwrap_or_default();
693
694                        let func_key = (name.clone(), func_node.start_position().row);
695                        if !name.is_empty() && seen_functions.insert(func_key) {
696                            // For C/C++: parameters live under declarator -> parameters.
697                            // For other languages: parameters is a direct child field.
698                            let params = func_def
699                                .child_by_field_name("declarator")
700                                .and_then(|d| d.child_by_field_name("parameters"))
701                                .or_else(|| func_def.child_by_field_name("parameters"))
702                                .map(|p| source[p.start_byte()..p.end_byte()].to_string())
703                                .unwrap_or_default();
704
705                            // Try "type" first (C/C++ uses this field for the return type);
706                            // fall back to "return_type" (Rust, Python, TypeScript, etc.).
707                            let return_type = func_def
708                                .child_by_field_name("type")
709                                .or_else(|| func_def.child_by_field_name("return_type"))
710                                .map(|r| source[r.start_byte()..r.end_byte()].to_string());
711
712                            // Walk backward through contiguous attribute_item siblings
713                            // to find the first attribute line (Rust only).
714                            let first_line = if func_node.kind() == "function_item" {
715                                let mut attrs: Vec<Node> = Vec::new();
716                                let mut sib = func_node.prev_named_sibling();
717                                while let Some(s) = sib {
718                                    if s.kind() == "attribute_item" {
719                                        attrs.push(s);
720                                        sib = s.prev_named_sibling();
721                                    } else {
722                                        break;
723                                    }
724                                }
725                                attrs
726                                    .last()
727                                    .map(|n| n.start_position().row + 1)
728                                    .unwrap_or_else(|| func_node.start_position().row + 1)
729                            } else {
730                                func_node.start_position().row + 1
731                            };
732
733                            functions.push(FunctionInfo {
734                                name,
735                                line: first_line,
736                                end_line: func_node.end_position().row + 1,
737                                parameters: if params.is_empty() {
738                                    Vec::new()
739                                } else {
740                                    vec![params]
741                                },
742                                return_type,
743                            });
744                        }
745                    }
746                }
747
748                if let Some(class_node) = class_node {
749                    let name = class_name_text
750                        .or_else(|| {
751                            class_node
752                                .child_by_field_name("name")
753                                .map(|n| source[n.start_byte()..n.end_byte()].to_string())
754                        })
755                        .unwrap_or_default();
756
757                    if !name.is_empty() {
758                        let inherits = if let Some(handler) = lang_info.extract_inheritance {
759                            handler(&class_node, source)
760                        } else {
761                            Vec::new()
762                        };
763                        classes.push(ClassInfo {
764                            name,
765                            line: class_node.start_position().row + 1,
766                            end_line: class_node.end_position().row + 1,
767                            methods: Vec::new(),
768                            fields: Vec::new(),
769                            inherits,
770                        });
771                    }
772                }
773            }
774        });
775
776        if timed_out {
777            return Err(ParserError::Timeout(tc.micros));
778        }
779
780        Ok(())
781    }
782
783    /// Returns the name of the enclosing function/method/subroutine for a given AST node,
784    /// by walking ancestors and matching all language-specific function container kinds.
785    fn enclosing_function_name(mut node: tree_sitter::Node<'_>, source: &str) -> Option<String> {
786        let mut depth = 0u32;
787        while let Some(parent) = node.parent() {
788            depth += 1;
789            // Cap at 64 hops: real function nesting rarely exceeds ~10 levels; 64 is a generous
790            // upper bound that guards against pathological/malformed ASTs without false negatives
791            // on legitimate code. Returns None (treated as <module>) when the cap is hit.
792            if depth > 64 {
793                return None;
794            }
795            let name_node = match parent.kind() {
796                // Direct name field: Rust, Python, Go, Java, TypeScript/TSX
797                "function_item"
798                | "method_item"
799                | "function_definition"
800                | "function_declaration"
801                | "method_declaration"
802                | "method_definition" => parent.child_by_field_name("name"),
803                // Fortran subroutine: name is inside subroutine_statement child
804                "subroutine" => {
805                    let mut cursor = parent.walk();
806                    parent
807                        .children(&mut cursor)
808                        .find(|c| c.kind() == "subroutine_statement")
809                        .and_then(|s| s.child_by_field_name("name"))
810                }
811                // Fortran function: name is inside function_statement child
812                "function" => {
813                    let mut cursor = parent.walk();
814                    parent
815                        .children(&mut cursor)
816                        .find(|c| c.kind() == "function_statement")
817                        .and_then(|s| s.child_by_field_name("name"))
818                }
819                _ => {
820                    node = parent;
821                    continue;
822                }
823            };
824            return name_node.map(|n| source[n.start_byte()..n.end_byte()].to_string());
825        }
826        // The loop exits here only when no parent was found (i.e., we reached the tree root
827        // without finding a function container). If the depth cap fired, we returned None early
828        // above. Nothing to assert here.
829        None
830    }
831
832    #[allow(clippy::too_many_arguments)]
833    fn extract_calls(
834        source: &str,
835        compiled: &CompiledQueries,
836        root: Node<'_>,
837        max_depth: Option<u32>,
838        calls: &mut Vec<CallInfo>,
839        call_frequency: &mut HashMap<String, usize>,
840        tc: TimeoutConfig,
841    ) -> Result<(), ParserError> {
842        let mut timed_out = false;
843
844        QUERY_CURSOR.with(|c| {
845            let mut cursor = c.borrow_mut();
846            cursor.set_max_start_depth(None);
847            if let Some(depth) = max_depth {
848                cursor.set_max_start_depth(Some(depth));
849            }
850
851            let mut matches = cursor.matches(&compiled.call, root, source.as_bytes());
852
853            while let Some(mat) = matches.next() {
854                // Check if we've hit the deadline
855                if tc.is_exceeded() {
856                    timed_out = true;
857                    break;
858                }
859                for capture in mat.captures {
860                    let capture_name = compiled.call.capture_names()[capture.index as usize];
861                    if capture_name != "call" {
862                        continue;
863                    }
864                    let node = capture.node;
865                    let call_name = source[node.start_byte()..node.end_byte()].to_string();
866                    *call_frequency.entry(call_name.clone()).or_insert(0) += 1;
867
868                    let caller = Self::enclosing_function_name(node, source)
869                        .unwrap_or_else(|| "<module>".to_string());
870
871                    let mut arg_count = None;
872                    let mut arg_node = node;
873                    let mut hop = 0u32;
874                    let mut cap_hit = false;
875                    while let Some(parent) = arg_node.parent() {
876                        hop += 1;
877                        // Bounded parent traversal: cap at 16 hops to guard against pathological
878                        // walks on malformed/degenerate trees. Real call-expression nesting is
879                        // shallow (typically 1-3 levels). When the cap is hit we stop searching and
880                        // leave arg_count as None; the caller is still recorded, just without
881                        // argument-count information.
882                        if hop > 16 {
883                            cap_hit = true;
884                            break;
885                        }
886                        if parent.kind() == "call_expression" {
887                            if let Some(args) = parent.child_by_field_name("arguments") {
888                                arg_count = Some(args.named_child_count());
889                            }
890                            break;
891                        }
892                        arg_node = parent;
893                    }
894                    debug_assert!(
895                        !cap_hit,
896                        "extract_calls: parent traversal cap reached (hop > 16)"
897                    );
898
899                    calls.push(CallInfo {
900                        caller,
901                        callee: call_name,
902                        line: node.start_position().row + 1,
903                        column: node.start_position().column,
904                        arg_count,
905                    });
906                }
907            }
908        });
909
910        if timed_out {
911            return Err(ParserError::Timeout(tc.micros));
912        }
913
914        Ok(())
915    }
916
917    fn extract_imports(
918        source: &str,
919        compiled: &CompiledQueries,
920        root: Node<'_>,
921        max_depth: Option<u32>,
922        imports: &mut Vec<ImportInfo>,
923        tc: TimeoutConfig,
924    ) -> Result<(), ParserError> {
925        let Some(ref import_query) = compiled.import else {
926            return Ok(());
927        };
928        let mut timed_out = false;
929
930        QUERY_CURSOR.with(|c| {
931            let mut cursor = c.borrow_mut();
932            cursor.set_max_start_depth(None);
933            if let Some(depth) = max_depth {
934                cursor.set_max_start_depth(Some(depth));
935            }
936
937            let mut matches = cursor.matches(import_query, root, source.as_bytes());
938
939            while let Some(mat) = matches.next() {
940                // Check if we've hit the deadline
941                if tc.is_exceeded() {
942                    timed_out = true;
943                    break;
944                }
945                for capture in mat.captures {
946                    let capture_name = import_query.capture_names()[capture.index as usize];
947                    if capture_name == "import_path" {
948                        let node = capture.node;
949                        let line = node.start_position().row + 1;
950                        extract_imports_from_node(&node, source, "", line, imports);
951                    }
952                }
953            }
954        });
955
956        if timed_out {
957            return Err(ParserError::Timeout(tc.micros));
958        }
959
960        Ok(())
961    }
962
963    fn extract_impl_methods(
964        source: &str,
965        compiled: &CompiledQueries,
966        root: Node<'_>,
967        max_depth: Option<u32>,
968        classes: &mut [ClassInfo],
969        tc: TimeoutConfig,
970    ) -> Result<(), ParserError> {
971        let Some(ref impl_query) = compiled.impl_block else {
972            return Ok(());
973        };
974        let mut timed_out = false;
975
976        QUERY_CURSOR.with(|c| {
977            let mut cursor = c.borrow_mut();
978            cursor.set_max_start_depth(None);
979            if let Some(depth) = max_depth {
980                cursor.set_max_start_depth(Some(depth));
981            }
982
983            let mut matches = cursor.matches(impl_query, root, source.as_bytes());
984
985            while let Some(mat) = matches.next() {
986                // Check if we've hit the deadline
987                if tc.is_exceeded() {
988                    timed_out = true;
989                    break;
990                }
991
992                let mut impl_type_name = String::new();
993                let mut method_name = String::new();
994                let mut method_line = 0usize;
995                let mut method_end_line = 0usize;
996                let mut method_params = String::new();
997                let mut method_return_type: Option<String> = None;
998
999                for capture in mat.captures {
1000                    let capture_name = impl_query.capture_names()[capture.index as usize];
1001                    let node = capture.node;
1002                    match capture_name {
1003                        "impl_type" => {
1004                            impl_type_name = source[node.start_byte()..node.end_byte()].to_string();
1005                        }
1006                        "method_name" => {
1007                            method_name = source[node.start_byte()..node.end_byte()].to_string();
1008                        }
1009                        "method_params" => {
1010                            method_params = source[node.start_byte()..node.end_byte()].to_string();
1011                        }
1012                        "method" => {
1013                            let mut method_attrs: Vec<Node> = Vec::new();
1014                            let mut msib = node.prev_named_sibling();
1015                            while let Some(s) = msib {
1016                                if s.kind() == "attribute_item" {
1017                                    method_attrs.push(s);
1018                                    msib = s.prev_named_sibling();
1019                                } else {
1020                                    break;
1021                                }
1022                            }
1023                            method_line = method_attrs
1024                                .last()
1025                                .map(|n| n.start_position().row + 1)
1026                                .unwrap_or_else(|| node.start_position().row + 1);
1027                            method_end_line = node.end_position().row + 1;
1028                            method_return_type = node
1029                                .child_by_field_name("return_type")
1030                                .map(|r| source[r.start_byte()..r.end_byte()].to_string());
1031                        }
1032                        _ => {}
1033                    }
1034                }
1035
1036                if !impl_type_name.is_empty() && !method_name.is_empty() {
1037                    let func = FunctionInfo {
1038                        name: method_name,
1039                        line: method_line,
1040                        end_line: method_end_line,
1041                        parameters: if method_params.is_empty() {
1042                            Vec::new()
1043                        } else {
1044                            vec![method_params]
1045                        },
1046                        return_type: method_return_type,
1047                    };
1048                    if let Some(class) = classes.iter_mut().find(|c| c.name == impl_type_name) {
1049                        class.methods.push(func);
1050                    }
1051                }
1052            }
1053        });
1054
1055        if timed_out {
1056            return Err(ParserError::Timeout(tc.micros));
1057        }
1058
1059        Ok(())
1060    }
1061
1062    fn extract_references(
1063        source: &str,
1064        compiled: &CompiledQueries,
1065        root: Node<'_>,
1066        max_depth: Option<u32>,
1067        references: &mut Vec<ReferenceInfo>,
1068        tc: TimeoutConfig,
1069    ) -> Result<(), ParserError> {
1070        let Some(ref ref_query) = compiled.reference else {
1071            return Ok(());
1072        };
1073        let mut seen_refs = std::collections::HashSet::new();
1074        let mut timed_out = false;
1075
1076        QUERY_CURSOR.with(|c| {
1077            let mut cursor = c.borrow_mut();
1078            cursor.set_max_start_depth(None);
1079            if let Some(depth) = max_depth {
1080                cursor.set_max_start_depth(Some(depth));
1081            }
1082
1083            let mut matches = cursor.matches(ref_query, root, source.as_bytes());
1084
1085            while let Some(mat) = matches.next() {
1086                // Check if we've hit the deadline
1087                if tc.is_exceeded() {
1088                    timed_out = true;
1089                    break;
1090                }
1091
1092                for capture in mat.captures {
1093                    let capture_name = ref_query.capture_names()[capture.index as usize];
1094                    if capture_name == "type_ref" {
1095                        let node = capture.node;
1096                        let type_ref = source[node.start_byte()..node.end_byte()].to_string();
1097                        if seen_refs.insert(type_ref.clone()) {
1098                            references.push(ReferenceInfo {
1099                                symbol: type_ref,
1100                                reference_type: ReferenceType::Usage,
1101                                // location is intentionally empty here; set by the caller (analyze_file)
1102                                location: String::new(),
1103                                line: node.start_position().row + 1,
1104                            });
1105                        }
1106                    }
1107                }
1108            }
1109        });
1110
1111        if timed_out {
1112            return Err(ParserError::Timeout(tc.micros));
1113        }
1114
1115        Ok(())
1116    }
1117
1118    /// Extract impl-trait blocks from an already-parsed tree.
1119    ///
1120    /// Called during `extract()` for Rust files to avoid a second parse.
1121    /// Returns an empty vec if the query is not available.
1122    fn extract_impl_traits_from_tree(
1123        source: &str,
1124        compiled: &CompiledQueries,
1125        root: Node<'_>,
1126        tc: TimeoutConfig,
1127    ) -> Result<Vec<ImplTraitInfo>, ParserError> {
1128        let Some(query) = &compiled.impl_trait else {
1129            return Ok(vec![]);
1130        };
1131
1132        let mut results = Vec::new();
1133        let mut timed_out = false;
1134
1135        QUERY_CURSOR.with(|c| {
1136            let mut cursor = c.borrow_mut();
1137            cursor.set_max_start_depth(None);
1138
1139            let mut matches = cursor.matches(query, root, source.as_bytes());
1140
1141            while let Some(mat) = matches.next() {
1142                // Check if we've hit the deadline
1143                if tc.is_exceeded() {
1144                    timed_out = true;
1145                    break;
1146                }
1147
1148                let mut trait_name = String::new();
1149                let mut impl_type = String::new();
1150                let mut line = 0usize;
1151
1152                for capture in mat.captures {
1153                    let capture_name = query.capture_names()[capture.index as usize];
1154                    let node = capture.node;
1155                    let text = source[node.start_byte()..node.end_byte()].to_string();
1156                    match capture_name {
1157                        "trait_name" => {
1158                            trait_name = text;
1159                            line = node.start_position().row + 1;
1160                        }
1161                        "impl_type" => {
1162                            impl_type = text;
1163                        }
1164                        _ => {}
1165                    }
1166                }
1167
1168                if !trait_name.is_empty() && !impl_type.is_empty() {
1169                    results.push(ImplTraitInfo {
1170                        trait_name,
1171                        impl_type,
1172                        path: PathBuf::new(), // Path will be set by caller
1173                        line,
1174                    });
1175                }
1176            }
1177        });
1178
1179        if timed_out {
1180            return Err(ParserError::Timeout(tc.micros));
1181        }
1182
1183        Ok(results)
1184    }
1185
1186    /// Extract def-use sites (write/read locations) for a given symbol within a file.
1187    ///
1188    /// Runs the defuse query to find all definition and use sites of a symbol.
1189    /// Returns empty vec if no defuse query is available for this language.
1190    ///
1191    /// # Arguments
1192    ///
1193    /// * `source` - The source code text
1194    /// * `compiled` - Compiled tree-sitter queries
1195    /// * `root` - Root node of the AST
1196    /// * `symbol_name` - The symbol to search for (must match exactly)
1197    /// * `file_path` - Relative file path for site reporting
1198    fn extract_def_use(
1199        source: &str,
1200        compiled: &CompiledQueries,
1201        root: Node<'_>,
1202        symbol_name: &str,
1203        file_path: &str,
1204        max_depth: Option<u32>,
1205    ) -> Vec<crate::types::DefUseSite> {
1206        let Some(ref defuse_query) = compiled.defuse else {
1207            return vec![];
1208        };
1209
1210        let mut sites = Vec::new();
1211        let source_lines: Vec<&str> = source.lines().collect();
1212        // Track byte offsets that already have a write or writeread capture so
1213        // duplicate read captures for the same identifier are suppressed.
1214        let mut write_offsets = std::collections::HashSet::new();
1215
1216        QUERY_CURSOR.with(|c| {
1217            let mut cursor = c.borrow_mut();
1218            cursor.set_max_start_depth(None);
1219            if let Some(depth) = max_depth {
1220                cursor.set_max_start_depth(Some(depth));
1221            }
1222            let mut matches = cursor.matches(defuse_query, root, source.as_bytes());
1223
1224            while let Some(mat) = matches.next() {
1225                for capture in mat.captures {
1226                    let capture_name = defuse_query.capture_names()[capture.index as usize];
1227                    let node = capture.node;
1228                    let node_text = node.utf8_text(source.as_bytes()).unwrap_or_default();
1229
1230                    // Only collect if the captured node matches the target symbol
1231                    if node_text != symbol_name {
1232                        continue;
1233                    }
1234
1235                    // Classify capture by prefix
1236                    let kind = if capture_name.starts_with("write.") {
1237                        crate::types::DefUseKind::Write
1238                    } else if capture_name.starts_with("read.") {
1239                        crate::types::DefUseKind::Read
1240                    } else if capture_name.starts_with("writeread.") {
1241                        crate::types::DefUseKind::WriteRead
1242                    } else {
1243                        continue;
1244                    };
1245
1246                    let byte_offset = node.start_byte();
1247
1248                    // De-duplicate: skip read captures for offsets already captured as write/writeread
1249                    if kind == crate::types::DefUseKind::Read
1250                        && write_offsets.contains(&byte_offset)
1251                    {
1252                        continue;
1253                    }
1254                    if kind != crate::types::DefUseKind::Read {
1255                        write_offsets.insert(byte_offset);
1256                    }
1257
1258                    // Get line number (1-indexed) and center-line snippet.
1259                    // Always produce a 3-line window so snippet_one_line (index 1) is safe.
1260                    let line = node.start_position().row + 1;
1261                    let snippet = {
1262                        let row = node.start_position().row;
1263                        let last_line = source_lines.len().saturating_sub(1);
1264                        let prev = if row > 0 { row - 1 } else { 0 };
1265                        let next = std::cmp::min(row + 1, last_line);
1266                        let prev_text = if row == 0 {
1267                            ""
1268                        } else {
1269                            source_lines[prev].trim_end()
1270                        };
1271                        let cur_text = source_lines[row].trim_end();
1272                        let next_text = if row >= last_line {
1273                            ""
1274                        } else {
1275                            source_lines[next].trim_end()
1276                        };
1277                        format!("{prev_text}\n{cur_text}\n{next_text}")
1278                    };
1279
1280                    // Get enclosing function scope
1281                    let enclosing_scope = Self::enclosing_function_name(node, source);
1282
1283                    let column = node.start_position().column;
1284                    sites.push(crate::types::DefUseSite {
1285                        kind,
1286                        symbol: node_text.to_string(),
1287                        file: file_path.to_string(),
1288                        line,
1289                        column,
1290                        snippet,
1291                        enclosing_scope,
1292                    });
1293                }
1294            }
1295        });
1296
1297        sites
1298    }
1299
1300    /// Parse `source` in `language`, run the defuse query for `symbol`, and return all sites.
1301    /// Returns an empty vec if the language has no defuse query or parsing fails.
1302    pub(crate) fn extract_def_use_for_file(
1303        source: &str,
1304        language: &str,
1305        symbol: &str,
1306        file_path: &str,
1307        ast_recursion_limit: Option<usize>,
1308    ) -> Vec<crate::types::DefUseSite> {
1309        let Some(lang_info) = crate::languages::get_language_info(language) else {
1310            return vec![];
1311        };
1312        let Ok(compiled) = get_compiled_queries(language) else {
1313            return vec![];
1314        };
1315        if compiled.defuse.is_none() {
1316            return vec![];
1317        }
1318
1319        let tree = match PARSER.with(|p| {
1320            let mut parser = p.borrow_mut();
1321            if parser.set_language(&lang_info.language).is_err() {
1322                return None;
1323            }
1324            parser.parse(source, None)
1325        }) {
1326            Some(t) => t,
1327            None => return vec![],
1328        };
1329
1330        let root = tree.root_node();
1331
1332        // Convert ast_recursion_limit the same way extract() does:
1333        // 0 means unlimited (None); positive values become Some(u32).
1334        let max_depth: Option<u32> = ast_recursion_limit
1335            .filter(|&limit| limit > 0)
1336            .and_then(|limit| u32::try_from(limit).ok());
1337
1338        Self::extract_def_use(source, compiled, root, symbol, file_path, max_depth)
1339    }
1340}
1341
1342/// Extract `impl Trait for Type` blocks from Rust source.
1343///
1344/// Runs independently of `extract_references` to avoid shared deduplication state.
1345/// Returns an empty vec for non-Rust source (no error; caller decides).
1346#[must_use]
1347pub fn extract_impl_traits(source: &str, path: &Path) -> Vec<ImplTraitInfo> {
1348    let Some(lang_info) = get_language_info("rust") else {
1349        return vec![];
1350    };
1351
1352    let Ok(compiled) = get_compiled_queries("rust") else {
1353        return vec![];
1354    };
1355
1356    let Some(query) = &compiled.impl_trait else {
1357        return vec![];
1358    };
1359
1360    let Some(tree) = PARSER.with(|p| {
1361        let mut parser = p.borrow_mut();
1362        let _ = parser.set_language(&lang_info.language);
1363        parser.parse(source, None)
1364    }) else {
1365        return vec![];
1366    };
1367
1368    let root = tree.root_node();
1369    let mut results = Vec::new();
1370
1371    QUERY_CURSOR.with(|c| {
1372        let mut cursor = c.borrow_mut();
1373        cursor.set_max_start_depth(None);
1374        let mut matches = cursor.matches(query, root, source.as_bytes());
1375
1376        while let Some(mat) = matches.next() {
1377            let mut trait_name = String::new();
1378            let mut impl_type = String::new();
1379            let mut line = 0usize;
1380
1381            for capture in mat.captures {
1382                let capture_name = query.capture_names()[capture.index as usize];
1383                let node = capture.node;
1384                let text = source[node.start_byte()..node.end_byte()].to_string();
1385                match capture_name {
1386                    "trait_name" => {
1387                        trait_name = text;
1388                        line = node.start_position().row + 1;
1389                    }
1390                    "impl_type" => {
1391                        impl_type = text;
1392                    }
1393                    _ => {}
1394                }
1395            }
1396
1397            if !trait_name.is_empty() && !impl_type.is_empty() {
1398                results.push(ImplTraitInfo {
1399                    trait_name,
1400                    impl_type,
1401                    path: path.to_path_buf(),
1402                    line,
1403                });
1404            }
1405        }
1406    });
1407
1408    results
1409}
1410
1411/// Execute a custom tree-sitter query against source code.
1412///
1413/// This is the internal implementation of the public `execute_query` function.
1414pub fn execute_query_impl(
1415    language: &str,
1416    source: &str,
1417    query_str: &str,
1418) -> Result<Vec<crate::QueryCapture>, ParserError> {
1419    // Get the tree-sitter language from the language name
1420    let ts_language = crate::languages::get_ts_language(language)
1421        .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))?;
1422
1423    let mut parser = Parser::new();
1424    parser
1425        .set_language(&ts_language)
1426        .map_err(|e| ParserError::QueryError(e.to_string()))?;
1427
1428    let tree = parser
1429        .parse(source.as_bytes(), None)
1430        .ok_or_else(|| ParserError::QueryError("failed to parse source".to_string()))?;
1431
1432    let query =
1433        Query::new(&ts_language, query_str).map_err(|e| ParserError::QueryError(e.to_string()))?;
1434
1435    let source_bytes = source.as_bytes();
1436
1437    let mut captures = Vec::new();
1438    QUERY_CURSOR.with(|c| {
1439        let mut cursor = c.borrow_mut();
1440        cursor.set_max_start_depth(None);
1441        let mut matches = cursor.matches(&query, tree.root_node(), source_bytes);
1442        while let Some(m) = matches.next() {
1443            for cap in m.captures {
1444                let node = cap.node;
1445                let capture_name = query.capture_names()[cap.index as usize].to_string();
1446                let text = node.utf8_text(source_bytes).unwrap_or("").to_string();
1447                captures.push(crate::QueryCapture {
1448                    capture_name,
1449                    text,
1450                    start_line: node.start_position().row,
1451                    end_line: node.end_position().row,
1452                    start_byte: node.start_byte(),
1453                    end_byte: node.end_byte(),
1454                });
1455            }
1456        }
1457    });
1458    Ok(captures)
1459}
1460
1461// Language-feature-gated tests (require lang-rust); see also tests_unsupported below
1462#[cfg(all(test, feature = "lang-rust"))]
1463mod tests {
1464    use super::*;
1465    use std::path::Path;
1466
1467    #[test]
1468    fn test_ast_recursion_limit_zero_is_unlimited() {
1469        let source = r#"fn hello() -> u32 { 42 }"#;
1470        let result_none = SemanticExtractor::extract(source, "rust", None, None);
1471        let result_zero = SemanticExtractor::extract(source, "rust", Some(0), None);
1472        assert!(result_none.is_ok(), "extract with None failed");
1473        assert!(result_zero.is_ok(), "extract with Some(0) failed");
1474        let analysis_none = result_none.unwrap();
1475        let analysis_zero = result_zero.unwrap();
1476        assert!(
1477            analysis_none.functions.len() >= 1,
1478            "extract with None should find at least one function in the test source"
1479        );
1480        assert_eq!(
1481            analysis_none.functions.len(),
1482            analysis_zero.functions.len(),
1483            "ast_recursion_limit=0 should behave identically to unset (unlimited)"
1484        );
1485    }
1486
1487    #[test]
1488    fn test_rust_use_as_imports() {
1489        // Arrange
1490        let source = "use std::io as stdio;";
1491        // Act
1492        let result = SemanticExtractor::extract(source, "rust", None, None).unwrap();
1493        // Assert: alias "stdio" is captured as an import item
1494        assert!(
1495            result
1496                .imports
1497                .iter()
1498                .any(|imp| imp.items.iter().any(|i| i == "stdio")),
1499            "expected import alias 'stdio' in {:?}",
1500            result.imports
1501        );
1502    }
1503
1504    #[test]
1505    fn test_rust_use_as_clause_plain_identifier() {
1506        // Arrange: use_as_clause with plain identifier (no scoped_identifier)
1507        // exercises the _ => prefix.to_string() arm
1508        let source = "use io as stdio;";
1509        // Act
1510        let result = SemanticExtractor::extract(source, "rust", None, None).unwrap();
1511        // Assert: alias "stdio" is captured as an import item
1512        assert!(
1513            result
1514                .imports
1515                .iter()
1516                .any(|imp| imp.items.iter().any(|i| i == "stdio")),
1517            "expected import alias 'stdio' from plain identifier in {:?}",
1518            result.imports
1519        );
1520    }
1521
1522    #[test]
1523    fn test_rust_scoped_use_with_prefix() {
1524        // Arrange: scoped_use_list with non-empty prefix
1525        let source = "use std::{io::Read, io::Write};";
1526        // Act
1527        let result = SemanticExtractor::extract(source, "rust", None, None).unwrap();
1528        // Assert: both Read and Write appear as items with std::io module
1529        let items: Vec<String> = result
1530            .imports
1531            .iter()
1532            .filter(|imp| imp.module.starts_with("std::io"))
1533            .flat_map(|imp| imp.items.clone())
1534            .collect();
1535        assert!(
1536            items.contains(&"Read".to_string()) && items.contains(&"Write".to_string()),
1537            "expected 'Read' and 'Write' items under module with std::io, got {:?}",
1538            result.imports
1539        );
1540    }
1541
1542    #[test]
1543    fn test_rust_scoped_use_imports() {
1544        // Arrange
1545        let source = "use std::{fs, io};";
1546        // Act
1547        let result = SemanticExtractor::extract(source, "rust", None, None).unwrap();
1548        // Assert: both "fs" and "io" appear as import items under module "std"
1549        let items: Vec<&str> = result
1550            .imports
1551            .iter()
1552            .filter(|imp| imp.module == "std")
1553            .flat_map(|imp| imp.items.iter().map(|s| s.as_str()))
1554            .collect();
1555        assert!(
1556            items.contains(&"fs") && items.contains(&"io"),
1557            "expected 'fs' and 'io' items under module 'std', got {:?}",
1558            items
1559        );
1560    }
1561
1562    #[test]
1563    fn test_rust_wildcard_imports() {
1564        // Arrange
1565        let source = "use std::io::*;";
1566        // Act
1567        let result = SemanticExtractor::extract(source, "rust", None, None).unwrap();
1568        // Assert: wildcard import with module "std::io"
1569        let wildcard = result
1570            .imports
1571            .iter()
1572            .find(|imp| imp.module == "std::io" && imp.items == vec!["*"]);
1573        assert!(
1574            wildcard.is_some(),
1575            "expected wildcard import with module 'std::io', got {:?}",
1576            result.imports
1577        );
1578    }
1579
1580    #[test]
1581    fn test_extract_impl_traits_standalone() {
1582        // Arrange: source with a simple impl Trait for Type
1583        let source = r#"
1584struct Foo;
1585trait Display {}
1586impl Display for Foo {}
1587"#;
1588        // Act
1589        let results = extract_impl_traits(source, Path::new("test.rs"));
1590        // Assert
1591        assert_eq!(
1592            results.len(),
1593            1,
1594            "expected one impl trait, got {:?}",
1595            results
1596        );
1597        assert_eq!(results[0].trait_name, "Display");
1598        assert_eq!(results[0].impl_type, "Foo");
1599    }
1600
1601    #[cfg(target_pointer_width = "64")]
1602    #[test]
1603    fn test_ast_recursion_limit_overflow() {
1604        // Arrange: limit larger than u32::MAX triggers a ParseError on 64-bit targets
1605        let source = "fn foo() {}";
1606        let big_limit = usize::try_from(u32::MAX).unwrap() + 1;
1607        // Act
1608        let result = SemanticExtractor::extract(source, "rust", Some(big_limit), None);
1609        // Assert
1610        assert!(
1611            matches!(result, Err(ParserError::ParseError(_))),
1612            "expected ParseError for oversized limit, got {:?}",
1613            result
1614        );
1615    }
1616
1617    #[test]
1618    fn test_ast_recursion_limit_some() {
1619        // Arrange: ast_recursion_limit with Some(depth) to exercise max_depth Some branch
1620        let source = r#"fn hello() -> u32 { 42 }"#;
1621        // Act
1622        let result = SemanticExtractor::extract(source, "rust", Some(5), None);
1623        // Assert: should succeed without error and extract functions
1624        assert!(result.is_ok(), "extract with Some(5) failed: {:?}", result);
1625        let analysis = result.unwrap();
1626        assert!(
1627            analysis.functions.len() >= 1,
1628            "expected at least one function with depth limit 5"
1629        );
1630    }
1631
1632    #[test]
1633    fn test_extract_def_use_for_file_finds_write_and_read() {
1634        // Arrange
1635        let source = r#"
1636fn main() {
1637    let count = 0;
1638    println!("{}", count);
1639}
1640"#;
1641        // Act
1642        let sites = SemanticExtractor::extract_def_use_for_file(
1643            source,
1644            "rust",
1645            "count",
1646            "src/main.rs",
1647            None,
1648        );
1649
1650        // Assert
1651        assert!(
1652            !sites.is_empty(),
1653            "expected at least one def-use site for 'count'"
1654        );
1655        let has_write = sites
1656            .iter()
1657            .any(|s| s.kind == crate::types::DefUseKind::Write);
1658        let has_read = sites
1659            .iter()
1660            .any(|s| s.kind == crate::types::DefUseKind::Read);
1661        assert!(has_write, "expected a write site for 'count'");
1662        assert!(has_read, "expected a read site for 'count'");
1663        assert_eq!(sites[0].file, "src/main.rs");
1664    }
1665
1666    #[test]
1667    fn test_extract_def_use_for_file_no_match_returns_empty() {
1668        // Arrange
1669        let source = "fn foo() { let x = 1; }";
1670
1671        // Act
1672        let sites = SemanticExtractor::extract_def_use_for_file(
1673            source,
1674            "rust",
1675            "nonexistent_symbol",
1676            "src/lib.rs",
1677            None,
1678        );
1679
1680        // Assert
1681        assert!(sites.is_empty(), "expected empty for nonexistent symbol");
1682    }
1683}
1684
1685// Language-feature-gated tests for Python
1686#[cfg(all(test, feature = "lang-python"))]
1687mod tests_python {
1688    use super::*;
1689
1690    #[test]
1691    fn test_python_relative_import() {
1692        // Arrange: relative import (from . import foo)
1693        let source = "from . import foo\n";
1694        // Act
1695        let result = SemanticExtractor::extract(source, "python", None, None).unwrap();
1696        // Assert: relative import should be captured
1697        let relative = result.imports.iter().find(|imp| imp.module.contains("."));
1698        assert!(
1699            relative.is_some(),
1700            "expected relative import in {:?}",
1701            result.imports
1702        );
1703    }
1704
1705    #[test]
1706    fn test_python_aliased_import() {
1707        // Arrange: aliased import (from os import path as p)
1708        // Note: tree-sitter-python extracts "path" (the original name), not the alias "p"
1709        let source = "from os import path as p\n";
1710        // Act
1711        let result = SemanticExtractor::extract(source, "python", None, None).unwrap();
1712        // Assert: "path" should be in items (alias is captured separately by aliased_import node)
1713        let path_import = result
1714            .imports
1715            .iter()
1716            .find(|imp| imp.module == "os" && imp.items.iter().any(|i| i == "path"));
1717        assert!(
1718            path_import.is_some(),
1719            "expected import 'path' from module 'os' in {:?}",
1720            result.imports
1721        );
1722    }
1723
1724    #[test]
1725    fn test_parse_no_timeout_when_none() {
1726        // Arrange: simple Rust source with no deadline
1727        let source = r#"fn hello() -> u32 { 42 }"#;
1728        // Act: extract with deadline=None (no timeout)
1729        let result = SemanticExtractor::extract(source, "rust", None, None);
1730        // Assert: should succeed normally
1731        assert!(result.is_ok(), "extract with deadline=None should succeed");
1732        let analysis = result.unwrap();
1733        assert!(
1734            analysis.functions.len() >= 1,
1735            "should find at least one function"
1736        );
1737    }
1738
1739    #[test]
1740    fn test_parse_timeout_triggers_error() {
1741        // Arrange: simple Rust source with a very short timeout (1 microsecond)
1742        let source = r#"fn hello() -> u32 { 42 }"#;
1743        // Act: extract with a very short timeout that will expire immediately
1744        let result = SemanticExtractor::extract(source, "rust", None, Some(1u64));
1745        // Assert: should return a Timeout error
1746        assert!(
1747            matches!(result, Err(ParserError::Timeout(_))),
1748            "expected Timeout error, got {:?}",
1749            result
1750        );
1751    }
1752}
1753
1754// Tests that do not require any language feature gate
1755#[cfg(test)]
1756mod tests_unsupported {
1757    use super::*;
1758
1759    #[test]
1760    fn test_element_extractor_unsupported_language() {
1761        // Arrange + Act
1762        let result = ElementExtractor::extract_with_depth("x = 1", "cobol");
1763        // Assert
1764        assert!(
1765            matches!(result, Err(ParserError::UnsupportedLanguage(ref lang)) if lang == "cobol"),
1766            "expected UnsupportedLanguage error, got {:?}",
1767            result
1768        );
1769    }
1770
1771    #[test]
1772    fn test_semantic_extractor_unsupported_language() {
1773        // Arrange + Act
1774        let result = SemanticExtractor::extract("x = 1", "cobol", None, None);
1775        // Assert
1776        assert!(
1777            matches!(result, Err(ParserError::UnsupportedLanguage(ref lang)) if lang == "cobol"),
1778            "expected UnsupportedLanguage error, got {:?}",
1779            result
1780        );
1781    }
1782}