Skip to main content

aptu_coder_core/
parser.rs

1// SPDX-FileCopyrightText: 2026 aptu-coder contributors
2// SPDX-License-Identifier: Apache-2.0
3//! Tree-sitter-based parser for extracting semantic structure from source code.
4//!
5//! This module provides language-agnostic parsing using tree-sitter queries to extract
6//! functions, classes, imports, references, and other semantic elements from source files.
7//! Two main extractors handle different use cases:
8//!
9//! - [`ElementExtractor`]: Quick extraction of function and class counts.
10//! - [`SemanticExtractor`]: Detailed semantic analysis with calls, imports, and references.
11
12use crate::languages::get_language_info;
13use crate::types::{
14    CallInfo, ClassInfo, FunctionInfo, ImplTraitInfo, ImportInfo, ReferenceInfo, ReferenceType,
15    SemanticAnalysis,
16};
17use std::cell::RefCell;
18use std::collections::HashMap;
19use std::path::{Path, PathBuf};
20use std::sync::LazyLock;
21use thiserror::Error;
22use tracing::instrument;
23use tree_sitter::{Node, Parser, Query, QueryCursor, StreamingIterator};
24
25#[derive(Debug, Error)]
26#[non_exhaustive]
27pub enum ParserError {
28    #[error("Unsupported language: {0}")]
29    UnsupportedLanguage(String),
30    #[error("Failed to parse file: {0}")]
31    ParseError(String),
32    #[error("Invalid UTF-8 in file")]
33    InvalidUtf8,
34    #[error("Query error: {0}")]
35    QueryError(String),
36    #[error("Parse timeout exceeded: {0} microseconds")]
37    Timeout(u64),
38}
39
40/// Groups a query deadline with the configured timeout duration for use in private extract helpers.
41/// Avoids threading two separate values through every helper signature.
42#[derive(Clone, Copy)]
43struct TimeoutConfig {
44    /// Absolute deadline; `None` means no timeout.
45    pub deadline: Option<std::time::Instant>,
46    /// The configured timeout in microseconds (used in `ParserError::Timeout`).
47    pub micros: u64,
48}
49
50impl TimeoutConfig {
51    fn new(timeout_micros: Option<u64>) -> Self {
52        let deadline = timeout_micros
53            .map(|us| std::time::Instant::now() + std::time::Duration::from_micros(us));
54        Self {
55            deadline,
56            micros: timeout_micros.unwrap_or(0),
57        }
58    }
59
60    /// Returns `true` if the deadline has been reached.
61    fn is_exceeded(self) -> bool {
62        self.deadline
63            .is_some_and(|d| std::time::Instant::now() >= d)
64    }
65}
66
67/// Compiled tree-sitter queries for a language.
68/// Stores all query types: mandatory (element, call) and optional (import, impl, reference).
69struct CompiledQueries {
70    pub element: Query,
71    pub call: Query,
72    pub import: Option<Query>,
73    pub impl_block: Option<Query>,
74    pub reference: Option<Query>,
75    pub impl_trait: Option<Query>,
76    pub defuse: Option<Query>,
77}
78
79/// Build compiled queries for a given language.
80///
81/// The `map_err` closures inside are only reachable if a hardcoded query string is
82/// invalid, which cannot happen at runtime -- exclude them from coverage instrumentation.
83#[cfg_attr(coverage_nightly, coverage(off))]
84fn build_compiled_queries(
85    lang_info: &crate::languages::LanguageInfo,
86) -> Result<CompiledQueries, ParserError> {
87    let element = Query::new(&lang_info.language, lang_info.element_query).map_err(|e| {
88        ParserError::QueryError(format!(
89            "Failed to compile element query for {}: {}",
90            lang_info.name, e
91        ))
92    })?;
93
94    let call = Query::new(&lang_info.language, lang_info.call_query).map_err(|e| {
95        ParserError::QueryError(format!(
96            "Failed to compile call query for {}: {}",
97            lang_info.name, e
98        ))
99    })?;
100
101    let import = if let Some(import_query_str) = lang_info.import_query {
102        Some(
103            Query::new(&lang_info.language, import_query_str).map_err(|e| {
104                ParserError::QueryError(format!(
105                    "Failed to compile import query for {}: {}",
106                    lang_info.name, e
107                ))
108            })?,
109        )
110    } else {
111        None
112    };
113
114    let impl_block = if let Some(impl_query_str) = lang_info.impl_query {
115        Some(
116            Query::new(&lang_info.language, impl_query_str).map_err(|e| {
117                ParserError::QueryError(format!(
118                    "Failed to compile impl query for {}: {}",
119                    lang_info.name, e
120                ))
121            })?,
122        )
123    } else {
124        None
125    };
126
127    let reference = if let Some(ref_query_str) = lang_info.reference_query {
128        Some(Query::new(&lang_info.language, ref_query_str).map_err(|e| {
129            ParserError::QueryError(format!(
130                "Failed to compile reference query for {}: {}",
131                lang_info.name, e
132            ))
133        })?)
134    } else {
135        None
136    };
137
138    let impl_trait = if let Some(impl_trait_query_str) = lang_info.impl_trait_query {
139        Some(
140            Query::new(&lang_info.language, impl_trait_query_str).map_err(|e| {
141                ParserError::QueryError(format!(
142                    "Failed to compile impl_trait query for {}: {}",
143                    lang_info.name, e
144                ))
145            })?,
146        )
147    } else {
148        None
149    };
150
151    let defuse = if let Some(defuse_query_str) = lang_info.defuse_query {
152        Some(
153            Query::new(&lang_info.language, defuse_query_str).map_err(|e| {
154                ParserError::QueryError(format!(
155                    "Failed to compile defuse query for {}: {}",
156                    lang_info.name, e
157                ))
158            })?,
159        )
160    } else {
161        None
162    };
163
164    Ok(CompiledQueries {
165        element,
166        call,
167        import,
168        impl_block,
169        reference,
170        impl_trait,
171        defuse,
172    })
173}
174
175/// Initialize the query cache with compiled queries for all supported languages.
176///
177/// Excluded from coverage: the `Err` arm is unreachable because `build_compiled_queries`
178/// only fails on invalid hardcoded query strings.
179#[cfg_attr(coverage_nightly, coverage(off))]
180fn init_query_cache() -> HashMap<&'static str, CompiledQueries> {
181    let mut cache = HashMap::new();
182
183    for lang_name in crate::lang::supported_languages() {
184        if let Some(lang_info) = get_language_info(lang_name) {
185            match build_compiled_queries(&lang_info) {
186                Ok(compiled) => {
187                    cache.insert(*lang_name, compiled);
188                }
189                Err(e) => {
190                    tracing::error!(
191                        "Failed to compile queries for language {}: {}",
192                        lang_name,
193                        e
194                    );
195                }
196            }
197        }
198    }
199
200    cache
201}
202
203/// Lazily initialized cache of compiled queries per language.
204static QUERY_CACHE: LazyLock<HashMap<&'static str, CompiledQueries>> =
205    LazyLock::new(init_query_cache);
206
207/// Get compiled queries for a language from the cache.
208fn get_compiled_queries(language: &str) -> Result<&'static CompiledQueries, ParserError> {
209    QUERY_CACHE
210        .get(language)
211        .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))
212}
213
214thread_local! {
215    static PARSER: RefCell<Parser> = RefCell::new(Parser::new());
216    static QUERY_CURSOR: RefCell<QueryCursor> = RefCell::new(QueryCursor::new());
217}
218
219/// Canonical API for extracting element counts from source code.
220pub struct ElementExtractor;
221
222impl ElementExtractor {
223    /// Extract function and class counts from source code.
224    ///
225    /// # Errors
226    ///
227    /// Returns `ParserError::UnsupportedLanguage` if the language is not recognized.
228    /// Returns `ParserError::ParseError` if the source code cannot be parsed.
229    /// Returns `ParserError::QueryError` if the tree-sitter query fails.
230    #[instrument(skip_all, fields(language))]
231    pub fn extract_with_depth(source: &str, language: &str) -> Result<(usize, usize), ParserError> {
232        let lang_info = get_language_info(language)
233            .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))?;
234
235        let tree = PARSER.with(|p| {
236            let mut parser = p.borrow_mut();
237            parser
238                .set_language(&lang_info.language)
239                .map_err(|e| ParserError::ParseError(format!("Failed to set language: {e}")))?;
240            parser
241                .parse(source, None)
242                .ok_or_else(|| ParserError::ParseError("Failed to parse".to_string()))
243        })?;
244
245        let compiled = get_compiled_queries(language)?;
246
247        let (function_count, class_count) = QUERY_CURSOR.with(|c| {
248            let mut cursor = c.borrow_mut();
249            cursor.set_max_start_depth(None);
250            let mut function_count = 0;
251            let mut class_count = 0;
252
253            let mut matches =
254                cursor.matches(&compiled.element, tree.root_node(), source.as_bytes());
255            while let Some(mat) = matches.next() {
256                for capture in mat.captures {
257                    let capture_name = compiled.element.capture_names()[capture.index as usize];
258                    match capture_name {
259                        "function" => function_count += 1,
260                        "class" => class_count += 1,
261                        _ => {}
262                    }
263                }
264            }
265            (function_count, class_count)
266        });
267
268        tracing::debug!(language = %language, functions = function_count, classes = class_count, "parse complete");
269
270        Ok((function_count, class_count))
271    }
272}
273
274/// Recursively extract `ImportInfo` entries from a use-clause node, respecting all Rust
275/// use-declaration forms (`scoped_identifier`, `scoped_use_list`, `use_list`,
276/// `use_as_clause`, `use_wildcard`, bare `identifier`).
277#[allow(clippy::too_many_lines)] // exhaustive match over all supported Rust use-clause forms; splitting harms readability
278fn extract_imports_from_node(
279    node: &Node,
280    source: &str,
281    prefix: &str,
282    line: usize,
283    imports: &mut Vec<ImportInfo>,
284) {
285    match node.kind() {
286        // Simple identifier: `use foo;` or an item inside `{foo, bar}`
287        "identifier" | "self" | "super" | "crate" => {
288            let name = source[node.start_byte()..node.end_byte()].to_string();
289            imports.push(ImportInfo {
290                module: prefix.to_string(),
291                items: vec![name],
292                line,
293            });
294        }
295        // Qualified path: `std::collections::HashMap`
296        "scoped_identifier" => {
297            let item = node
298                .child_by_field_name("name")
299                .map(|n| source[n.start_byte()..n.end_byte()].to_string())
300                .unwrap_or_default();
301            let module = node.child_by_field_name("path").map_or_else(
302                || prefix.to_string(),
303                |p| {
304                    let path_text = source[p.start_byte()..p.end_byte()].to_string();
305                    if prefix.is_empty() {
306                        path_text
307                    } else {
308                        format!("{prefix}::{path_text}")
309                    }
310                },
311            );
312            if !item.is_empty() {
313                imports.push(ImportInfo {
314                    module,
315                    items: vec![item],
316                    line,
317                });
318            }
319        }
320        // `std::{io, fs}` — path prefix followed by a brace list
321        "scoped_use_list" => {
322            let new_prefix = node.child_by_field_name("path").map_or_else(
323                || prefix.to_string(),
324                |p| {
325                    let path_text = source[p.start_byte()..p.end_byte()].to_string();
326                    if prefix.is_empty() {
327                        path_text
328                    } else {
329                        format!("{prefix}::{path_text}")
330                    }
331                },
332            );
333            if let Some(list) = node.child_by_field_name("list") {
334                extract_imports_from_node(&list, source, &new_prefix, line, imports);
335            }
336        }
337        // `{HashMap, HashSet}` — brace-enclosed list of items
338        "use_list" => {
339            let mut cursor = node.walk();
340            for child in node.children(&mut cursor) {
341                match child.kind() {
342                    "{" | "}" | "," => {}
343                    _ => extract_imports_from_node(&child, source, prefix, line, imports),
344                }
345            }
346        }
347        // `std::io::*` — glob import
348        "use_wildcard" => {
349            let text = source[node.start_byte()..node.end_byte()].to_string();
350            let module = if let Some(stripped) = text.strip_suffix("::*") {
351                if prefix.is_empty() {
352                    stripped.to_string()
353                } else {
354                    format!("{prefix}::{stripped}")
355                }
356            } else {
357                prefix.to_string()
358            };
359            imports.push(ImportInfo {
360                module,
361                items: vec!["*".to_string()],
362                line,
363            });
364        }
365        // `io as stdio` or `std::io as stdio`
366        "use_as_clause" => {
367            let alias = node
368                .child_by_field_name("alias")
369                .map(|n| source[n.start_byte()..n.end_byte()].to_string())
370                .unwrap_or_default();
371            let module = if let Some(path_node) = node.child_by_field_name("path") {
372                match path_node.kind() {
373                    "scoped_identifier" => path_node.child_by_field_name("path").map_or_else(
374                        || prefix.to_string(),
375                        |p| {
376                            let p_text = source[p.start_byte()..p.end_byte()].to_string();
377                            if prefix.is_empty() {
378                                p_text
379                            } else {
380                                format!("{prefix}::{p_text}")
381                            }
382                        },
383                    ),
384                    _ => prefix.to_string(),
385                }
386            } else {
387                prefix.to_string()
388            };
389            if !alias.is_empty() {
390                imports.push(ImportInfo {
391                    module,
392                    items: vec![alias],
393                    line,
394                });
395            }
396        }
397        // Python import_from_statement: `from module import name` or `from . import *`
398        "import_from_statement" => {
399            extract_python_import_from(node, source, line, imports);
400        }
401        // Fallback for non-Rust import nodes: capture full text as module
402        _ => {
403            let text = source[node.start_byte()..node.end_byte()]
404                .trim()
405                .to_string();
406            if !text.is_empty() {
407                imports.push(ImportInfo {
408                    module: text,
409                    items: vec![],
410                    line,
411                });
412            }
413        }
414    }
415}
416
417/// Extract an item name from a `dotted_name` or `aliased_import` child node.
418fn extract_import_item_name(child: &Node, source: &str) -> Option<String> {
419    match child.kind() {
420        "dotted_name" => {
421            let name = source[child.start_byte()..child.end_byte()]
422                .trim()
423                .to_string();
424            if name.is_empty() { None } else { Some(name) }
425        }
426        "aliased_import" => child.child_by_field_name("name").and_then(|n| {
427            let name = source[n.start_byte()..n.end_byte()].trim().to_string();
428            if name.is_empty() { None } else { Some(name) }
429        }),
430        _ => None,
431    }
432}
433
434/// Collect wildcard/named imports from an `import_list` node or from direct named children.
435fn collect_import_items(
436    node: &Node,
437    source: &str,
438    is_wildcard: &mut bool,
439    items: &mut Vec<String>,
440) {
441    // Prefer import_list child (wraps `from x import a, b`)
442    if let Some(import_list) = node.child_by_field_name("import_list") {
443        let mut cursor = import_list.walk();
444        for child in import_list.named_children(&mut cursor) {
445            if child.kind() == "wildcard_import" {
446                *is_wildcard = true;
447            } else if let Some(name) = extract_import_item_name(&child, source) {
448                items.push(name);
449            }
450        }
451        return;
452    }
453    // No import_list: single-name or wildcard as direct child (skip first named child = module_name)
454    let mut cursor = node.walk();
455    let mut first = true;
456    for child in node.named_children(&mut cursor) {
457        if first {
458            first = false;
459            continue;
460        }
461        if child.kind() == "wildcard_import" {
462            *is_wildcard = true;
463        } else if let Some(name) = extract_import_item_name(&child, source) {
464            items.push(name);
465        }
466    }
467}
468
469/// Handle Python `import_from_statement` node.
470fn extract_python_import_from(
471    node: &Node,
472    source: &str,
473    line: usize,
474    imports: &mut Vec<ImportInfo>,
475) {
476    let module = if let Some(m) = node.child_by_field_name("module_name") {
477        source[m.start_byte()..m.end_byte()].trim().to_string()
478    } else if let Some(r) = node.child_by_field_name("relative_import") {
479        source[r.start_byte()..r.end_byte()].trim().to_string()
480    } else {
481        String::new()
482    };
483
484    let mut is_wildcard = false;
485    let mut items = Vec::new();
486    collect_import_items(node, source, &mut is_wildcard, &mut items);
487
488    if !module.is_empty() {
489        imports.push(ImportInfo {
490            module,
491            items: if is_wildcard {
492                vec!["*".to_string()]
493            } else {
494                items
495            },
496            line,
497        });
498    }
499}
500
501pub struct SemanticExtractor;
502
503impl SemanticExtractor {
504    /// Extract semantic information from source code.
505    ///
506    /// # Errors
507    ///
508    /// Returns `ParserError::UnsupportedLanguage` if the language is not recognized.
509    /// Returns `ParserError::ParseError` if the source code cannot be parsed.
510    /// Returns `ParserError::QueryError` if the tree-sitter query fails.
511    #[instrument(skip_all, fields(language))]
512    pub fn extract(
513        source: &str,
514        language: &str,
515        ast_recursion_limit: Option<usize>,
516        timeout_micros: Option<u64>,
517    ) -> Result<SemanticAnalysis, ParserError> {
518        let tc = TimeoutConfig::new(timeout_micros);
519
520        // Check deadline at the start before any parsing work.
521        if tc.is_exceeded() {
522            return Err(ParserError::Timeout(tc.micros));
523        }
524        let lang_info = get_language_info(language)
525            .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))?;
526
527        let tree = PARSER.with(|p| {
528            let mut parser = p.borrow_mut();
529            parser
530                .set_language(&lang_info.language)
531                .map_err(|e| ParserError::ParseError(format!("Failed to set language: {e}")))?;
532            parser
533                .parse(source, None)
534                .ok_or_else(|| ParserError::ParseError("Failed to parse".to_string()))
535        })?;
536
537        // 0 is not a useful depth (visits root node only, returning zero results).
538        // Treat 0 as None (unlimited). See #339.
539        let max_depth: Option<u32> = ast_recursion_limit
540            .filter(|&limit| limit > 0)
541            .map(|limit| {
542                u32::try_from(limit).map_err(|_| {
543                    ParserError::ParseError(format!(
544                        "ast_recursion_limit {} exceeds maximum supported value {}",
545                        limit,
546                        u32::MAX
547                    ))
548                })
549            })
550            .transpose()?;
551
552        let compiled = get_compiled_queries(language)?;
553        let root = tree.root_node();
554
555        let mut functions = Vec::new();
556        let mut classes = Vec::new();
557        let mut imports = Vec::new();
558        let mut references = Vec::new();
559        let mut call_frequency = HashMap::new();
560        let mut calls = Vec::new();
561
562        Self::extract_elements(
563            source,
564            compiled,
565            root,
566            max_depth,
567            &lang_info,
568            &mut functions,
569            &mut classes,
570            tc,
571        )?;
572        Self::extract_calls(
573            source,
574            compiled,
575            root,
576            max_depth,
577            &mut calls,
578            &mut call_frequency,
579            tc,
580        )?;
581        Self::extract_imports(source, compiled, root, max_depth, &mut imports, tc)?;
582        Self::extract_impl_methods(source, compiled, root, max_depth, &mut classes, tc)?;
583        Self::extract_references(source, compiled, root, max_depth, &mut references, tc)?;
584
585        // Extract impl-trait blocks for Rust files (empty for other languages)
586        let impl_traits = if language == "rust" {
587            Self::extract_impl_traits_from_tree(source, compiled, root, tc)?
588        } else {
589            vec![]
590        };
591
592        tracing::debug!(language = %language, functions = functions.len(), classes = classes.len(), imports = imports.len(), references = references.len(), calls = calls.len(), impl_traits = impl_traits.len(), "extraction complete");
593
594        Ok(SemanticAnalysis {
595            functions,
596            classes,
597            imports,
598            references,
599            call_frequency,
600            calls,
601            impl_traits,
602            def_use_sites: Vec::new(),
603        })
604    }
605
606    /// Fast path for extracting module metadata: functions and imports only.
607    ///
608    /// This method is optimized for the `analyze_module` tool, which only needs function
609    /// definitions and import statements. It skips the more expensive extractors (calls,
610    /// references, impl traits) and returns a lightweight `ModuleInfo` directly.
611    ///
612    /// # Arguments
613    ///
614    /// * `source` - The source code as a string
615    /// * `language` - The programming language (e.g., "rust", "python")
616    /// * `timeout` - Optional timeout configuration in microseconds
617    ///
618    /// # Returns
619    ///
620    /// A `ModuleInfo` containing the file name, line count, language, functions, and imports.
621    ///
622    /// # Errors
623    ///
624    /// Returns a `ParserError` if:
625    /// * `ParserError::Timeout` - The operation exceeds the specified timeout
626    /// * `ParserError::UnsupportedLanguage` - The language is not supported
627    /// * `ParserError::ParseError` - Tree-sitter parsing fails
628    #[instrument(skip_all, fields(language))]
629    pub fn extract_module_info(
630        source: &str,
631        language: &str,
632        timeout_micros: Option<u64>,
633    ) -> Result<crate::types::ModuleInfo, ParserError> {
634        let tc = TimeoutConfig::new(timeout_micros);
635
636        // Check deadline at the start before any parsing work.
637        if tc.is_exceeded() {
638            return Err(ParserError::Timeout(tc.micros));
639        }
640
641        let lang_info = get_language_info(language)
642            .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))?;
643
644        let tree = PARSER.with(|p| {
645            let mut parser = p.borrow_mut();
646            parser
647                .set_language(&lang_info.language)
648                .map_err(|e| ParserError::ParseError(format!("Failed to set language: {e}")))?;
649            parser
650                .parse(source, None)
651                .ok_or_else(|| ParserError::ParseError("Failed to parse".to_string()))
652        })?;
653
654        // Check deadline after parsing
655        if tc.is_exceeded() {
656            return Err(ParserError::Timeout(tc.micros));
657        }
658
659        let compiled = get_compiled_queries(language)?;
660        let root = tree.root_node();
661
662        let mut functions = Vec::new();
663        let mut classes = Vec::new();
664        let mut imports = Vec::new();
665
666        // Extract functions and classes
667        Self::extract_elements(
668            source,
669            compiled,
670            root,
671            None,
672            &lang_info,
673            &mut functions,
674            &mut classes,
675            tc,
676        )?;
677
678        // Check deadline after extract_elements
679        if tc.is_exceeded() {
680            return Err(ParserError::Timeout(tc.micros));
681        }
682
683        // Extract imports
684        Self::extract_imports(source, compiled, root, None, &mut imports, tc)?;
685
686        // Check deadline after extract_imports
687        if tc.is_exceeded() {
688            return Err(ParserError::Timeout(tc.micros));
689        }
690
691        // Map to ModuleInfo
692        let module_functions = functions
693            .into_iter()
694            .map(|f| crate::types::ModuleFunctionInfo {
695                name: f.name,
696                line: f.line,
697            })
698            .collect();
699
700        let module_imports = imports
701            .into_iter()
702            .map(|i| crate::types::ModuleImportInfo {
703                module: i.module,
704                items: i.items,
705            })
706            .collect();
707
708        let line_count = source.lines().count();
709
710        Ok(crate::types::ModuleInfo::new(
711            String::new(), // Will be set by caller
712            line_count,
713            language.to_string(),
714            module_functions,
715            module_imports,
716        ))
717    }
718
719    // Extracts function and class definitions from a pre-parsed syntax tree.
720    #[allow(clippy::too_many_arguments)]
721    fn extract_elements(
722        source: &str,
723        compiled: &CompiledQueries,
724        root: Node<'_>,
725        max_depth: Option<u32>,
726        lang_info: &crate::languages::LanguageInfo,
727        functions: &mut Vec<FunctionInfo>,
728        classes: &mut Vec<ClassInfo>,
729        tc: TimeoutConfig,
730    ) -> Result<(), ParserError> {
731        let mut seen_functions = std::collections::HashSet::new();
732        let mut timed_out = false;
733
734        QUERY_CURSOR.with(|c| {
735            let mut cursor = c.borrow_mut();
736            cursor.set_max_start_depth(None);
737            if let Some(depth) = max_depth {
738                cursor.set_max_start_depth(Some(depth));
739            }
740
741            let mut matches = cursor.matches(&compiled.element, root, source.as_bytes());
742
743            while let Some(mat) = matches.next() {
744                // Check if we've hit the deadline
745                if tc.is_exceeded() {
746                    timed_out = true;
747                    break;
748                }
749                let mut func_node: Option<Node> = None;
750                let mut func_name_text: Option<String> = None;
751                let mut class_node: Option<Node> = None;
752                let mut class_name_text: Option<String> = None;
753
754                for capture in mat.captures {
755                    let capture_name = compiled.element.capture_names()[capture.index as usize];
756                    let node = capture.node;
757                    match capture_name {
758                        "function" => func_node = Some(node),
759                        "func_name" | "method_name" => {
760                            func_name_text =
761                                Some(source[node.start_byte()..node.end_byte()].to_string());
762                        }
763                        "class" => class_node = Some(node),
764                        "class_name" | "type_name" => {
765                            class_name_text =
766                                Some(source[node.start_byte()..node.end_byte()].to_string());
767                        }
768                        _ => {}
769                    }
770                }
771
772                if let Some(func_node) = func_node {
773                    // When a plain function_definition is nested inside a template_declaration
774                    // or decorated_definition, it is also matched by the explicit wrapper pattern.
775                    // Skip it here to avoid duplicates; the wrapper match will emit it.
776                    let parent_kind = func_node.parent().map(|p| p.kind());
777                    let parent_is_wrapper = parent_kind
778                        .map(|k| k == "template_declaration" || k == "decorated_definition")
779                        .unwrap_or(false);
780                    if func_node.kind() == "function_definition" && parent_is_wrapper {
781                        // Handled by the template_declaration or decorated_definition @function match instead.
782                    } else {
783                        // Resolve template_declaration or decorated_definition to inner function_definition
784                        // for declarator/field walks. The captured node may be a wrapper.
785                        let func_def = if func_node.kind() == "template_declaration" {
786                            let mut cursor = func_node.walk();
787                            func_node
788                                .children(&mut cursor)
789                                .find(|n| n.kind() == "function_definition")
790                                .unwrap_or(func_node)
791                        } else if func_node.kind() == "decorated_definition" {
792                            func_node
793                                .child_by_field_name("definition")
794                                .unwrap_or(func_node)
795                        } else {
796                            func_node
797                        };
798
799                        let name = func_name_text
800                            .or_else(|| {
801                                func_def
802                                    .child_by_field_name("name")
803                                    .map(|n| source[n.start_byte()..n.end_byte()].to_string())
804                            })
805                            .unwrap_or_default();
806
807                        let func_key = (name.clone(), func_node.start_position().row);
808                        if !name.is_empty() && seen_functions.insert(func_key) {
809                            // For C/C++: parameters live under declarator -> parameters.
810                            // For other languages: parameters is a direct child field.
811                            let params = func_def
812                                .child_by_field_name("declarator")
813                                .and_then(|d| d.child_by_field_name("parameters"))
814                                .or_else(|| func_def.child_by_field_name("parameters"))
815                                .map(|p| source[p.start_byte()..p.end_byte()].to_string())
816                                .unwrap_or_default();
817
818                            // Try "type" first (C/C++ uses this field for the return type);
819                            // fall back to "return_type" (Rust, Python, TypeScript, etc.).
820                            let return_type = func_def
821                                .child_by_field_name("type")
822                                .or_else(|| func_def.child_by_field_name("return_type"))
823                                .map(|r| source[r.start_byte()..r.end_byte()].to_string());
824
825                            // Walk backward through contiguous attribute_item siblings
826                            // to find the first attribute line (Rust only).
827                            let first_line = if func_node.kind() == "function_item" {
828                                let mut attrs: Vec<Node> = Vec::new();
829                                let mut sib = func_node.prev_named_sibling();
830                                while let Some(s) = sib {
831                                    if s.kind() == "attribute_item" {
832                                        attrs.push(s);
833                                        sib = s.prev_named_sibling();
834                                    } else {
835                                        break;
836                                    }
837                                }
838                                attrs
839                                    .last()
840                                    .map(|n| n.start_position().row + 1)
841                                    .unwrap_or_else(|| func_node.start_position().row + 1)
842                            } else {
843                                func_node.start_position().row + 1
844                            };
845
846                            functions.push(FunctionInfo {
847                                name,
848                                line: first_line,
849                                end_line: func_node.end_position().row + 1,
850                                parameters: if params.is_empty() {
851                                    Vec::new()
852                                } else {
853                                    vec![params]
854                                },
855                                return_type,
856                            });
857                        }
858                    }
859                }
860
861                if let Some(class_node) = class_node {
862                    let name = class_name_text
863                        .or_else(|| {
864                            class_node
865                                .child_by_field_name("name")
866                                .map(|n| source[n.start_byte()..n.end_byte()].to_string())
867                        })
868                        .unwrap_or_default();
869
870                    if !name.is_empty() {
871                        let inherits = if let Some(handler) = lang_info.extract_inheritance {
872                            handler(&class_node, source)
873                        } else {
874                            Vec::new()
875                        };
876                        classes.push(ClassInfo {
877                            name,
878                            line: class_node.start_position().row + 1,
879                            end_line: class_node.end_position().row + 1,
880                            methods: Vec::new(),
881                            fields: Vec::new(),
882                            inherits,
883                        });
884                    }
885                }
886            }
887        });
888
889        if timed_out {
890            return Err(ParserError::Timeout(tc.micros));
891        }
892
893        Ok(())
894    }
895
896    /// Returns the name of the enclosing function/method/subroutine for a given AST node,
897    /// by walking ancestors and matching all language-specific function container kinds.
898    fn enclosing_function_name(mut node: tree_sitter::Node<'_>, source: &str) -> Option<String> {
899        let mut depth = 0u32;
900        while let Some(parent) = node.parent() {
901            depth += 1;
902            // Cap at 64 hops: real function nesting rarely exceeds ~10 levels; 64 is a generous
903            // upper bound that guards against pathological/malformed ASTs without false negatives
904            // on legitimate code. Returns None (treated as <module>) when the cap is hit.
905            if depth > 64 {
906                return None;
907            }
908            let name_node = match parent.kind() {
909                // Direct name field: Rust, Python, Go, Java, TypeScript/TSX
910                "function_item"
911                | "method_item"
912                | "function_definition"
913                | "function_declaration"
914                | "method_declaration"
915                | "method_definition" => parent.child_by_field_name("name"),
916                // Fortran subroutine: name is inside subroutine_statement child
917                "subroutine" => {
918                    let mut cursor = parent.walk();
919                    parent
920                        .children(&mut cursor)
921                        .find(|c| c.kind() == "subroutine_statement")
922                        .and_then(|s| s.child_by_field_name("name"))
923                }
924                // Fortran function: name is inside function_statement child
925                "function" => {
926                    let mut cursor = parent.walk();
927                    parent
928                        .children(&mut cursor)
929                        .find(|c| c.kind() == "function_statement")
930                        .and_then(|s| s.child_by_field_name("name"))
931                }
932                _ => {
933                    node = parent;
934                    continue;
935                }
936            };
937            return name_node.map(|n| source[n.start_byte()..n.end_byte()].to_string());
938        }
939        // The loop exits here only when no parent was found (i.e., we reached the tree root
940        // without finding a function container). If the depth cap fired, we returned None early
941        // above. Nothing to assert here.
942        None
943    }
944
945    #[allow(clippy::too_many_arguments)]
946    fn extract_calls(
947        source: &str,
948        compiled: &CompiledQueries,
949        root: Node<'_>,
950        max_depth: Option<u32>,
951        calls: &mut Vec<CallInfo>,
952        call_frequency: &mut HashMap<String, usize>,
953        tc: TimeoutConfig,
954    ) -> Result<(), ParserError> {
955        let mut timed_out = false;
956
957        QUERY_CURSOR.with(|c| {
958            let mut cursor = c.borrow_mut();
959            cursor.set_max_start_depth(None);
960            if let Some(depth) = max_depth {
961                cursor.set_max_start_depth(Some(depth));
962            }
963
964            let mut matches = cursor.matches(&compiled.call, root, source.as_bytes());
965
966            while let Some(mat) = matches.next() {
967                // Check if we've hit the deadline
968                if tc.is_exceeded() {
969                    timed_out = true;
970                    break;
971                }
972                for capture in mat.captures {
973                    let capture_name = compiled.call.capture_names()[capture.index as usize];
974                    if capture_name != "call" {
975                        continue;
976                    }
977                    let node = capture.node;
978                    let call_name = source[node.start_byte()..node.end_byte()].to_string();
979                    *call_frequency.entry(call_name.clone()).or_insert(0) += 1;
980
981                    let caller = Self::enclosing_function_name(node, source)
982                        .unwrap_or_else(|| "<module>".to_string());
983
984                    let mut arg_count = None;
985                    let mut arg_node = node;
986                    let mut hop = 0u32;
987                    let mut cap_hit = false;
988                    while let Some(parent) = arg_node.parent() {
989                        hop += 1;
990                        // Bounded parent traversal: cap at 16 hops to guard against pathological
991                        // walks on malformed/degenerate trees. Real call-expression nesting is
992                        // shallow (typically 1-3 levels). When the cap is hit we stop searching and
993                        // leave arg_count as None; the caller is still recorded, just without
994                        // argument-count information.
995                        if hop > 16 {
996                            cap_hit = true;
997                            break;
998                        }
999                        if parent.kind() == "call_expression" {
1000                            if let Some(args) = parent.child_by_field_name("arguments") {
1001                                arg_count = Some(args.named_child_count());
1002                            }
1003                            break;
1004                        }
1005                        arg_node = parent;
1006                    }
1007                    debug_assert!(
1008                        !cap_hit,
1009                        "extract_calls: parent traversal cap reached (hop > 16)"
1010                    );
1011
1012                    calls.push(CallInfo {
1013                        caller,
1014                        callee: call_name,
1015                        line: node.start_position().row + 1,
1016                        column: node.start_position().column,
1017                        arg_count,
1018                    });
1019                }
1020            }
1021        });
1022
1023        if timed_out {
1024            return Err(ParserError::Timeout(tc.micros));
1025        }
1026
1027        Ok(())
1028    }
1029
1030    // Extracts import statements from a pre-parsed syntax tree.
1031    fn extract_imports(
1032        source: &str,
1033        compiled: &CompiledQueries,
1034        root: Node<'_>,
1035        max_depth: Option<u32>,
1036        imports: &mut Vec<ImportInfo>,
1037        tc: TimeoutConfig,
1038    ) -> Result<(), ParserError> {
1039        let Some(ref import_query) = compiled.import else {
1040            return Ok(());
1041        };
1042        let mut timed_out = false;
1043
1044        QUERY_CURSOR.with(|c| {
1045            let mut cursor = c.borrow_mut();
1046            cursor.set_max_start_depth(None);
1047            if let Some(depth) = max_depth {
1048                cursor.set_max_start_depth(Some(depth));
1049            }
1050
1051            let mut matches = cursor.matches(import_query, root, source.as_bytes());
1052
1053            while let Some(mat) = matches.next() {
1054                // Check if we've hit the deadline
1055                if tc.is_exceeded() {
1056                    timed_out = true;
1057                    break;
1058                }
1059                for capture in mat.captures {
1060                    let capture_name = import_query.capture_names()[capture.index as usize];
1061                    if capture_name == "import_path" {
1062                        let node = capture.node;
1063                        let line = node.start_position().row + 1;
1064                        extract_imports_from_node(&node, source, "", line, imports);
1065                    }
1066                }
1067            }
1068        });
1069
1070        if timed_out {
1071            return Err(ParserError::Timeout(tc.micros));
1072        }
1073
1074        Ok(())
1075    }
1076
1077    fn extract_impl_methods(
1078        source: &str,
1079        compiled: &CompiledQueries,
1080        root: Node<'_>,
1081        max_depth: Option<u32>,
1082        classes: &mut [ClassInfo],
1083        tc: TimeoutConfig,
1084    ) -> Result<(), ParserError> {
1085        let Some(ref impl_query) = compiled.impl_block else {
1086            return Ok(());
1087        };
1088        let mut timed_out = false;
1089
1090        QUERY_CURSOR.with(|c| {
1091            let mut cursor = c.borrow_mut();
1092            cursor.set_max_start_depth(None);
1093            if let Some(depth) = max_depth {
1094                cursor.set_max_start_depth(Some(depth));
1095            }
1096
1097            let mut matches = cursor.matches(impl_query, root, source.as_bytes());
1098
1099            while let Some(mat) = matches.next() {
1100                // Check if we've hit the deadline
1101                if tc.is_exceeded() {
1102                    timed_out = true;
1103                    break;
1104                }
1105
1106                let mut impl_type_name = String::new();
1107                let mut method_name = String::new();
1108                let mut method_line = 0usize;
1109                let mut method_end_line = 0usize;
1110                let mut method_params = String::new();
1111                let mut method_return_type: Option<String> = None;
1112
1113                for capture in mat.captures {
1114                    let capture_name = impl_query.capture_names()[capture.index as usize];
1115                    let node = capture.node;
1116                    match capture_name {
1117                        "impl_type" => {
1118                            impl_type_name = source[node.start_byte()..node.end_byte()].to_string();
1119                        }
1120                        "method_name" => {
1121                            method_name = source[node.start_byte()..node.end_byte()].to_string();
1122                        }
1123                        "method_params" => {
1124                            method_params = source[node.start_byte()..node.end_byte()].to_string();
1125                        }
1126                        "method" => {
1127                            let mut method_attrs: Vec<Node> = Vec::new();
1128                            let mut msib = node.prev_named_sibling();
1129                            while let Some(s) = msib {
1130                                if s.kind() == "attribute_item" {
1131                                    method_attrs.push(s);
1132                                    msib = s.prev_named_sibling();
1133                                } else {
1134                                    break;
1135                                }
1136                            }
1137                            method_line = method_attrs
1138                                .last()
1139                                .map(|n| n.start_position().row + 1)
1140                                .unwrap_or_else(|| node.start_position().row + 1);
1141                            method_end_line = node.end_position().row + 1;
1142                            method_return_type = node
1143                                .child_by_field_name("return_type")
1144                                .map(|r| source[r.start_byte()..r.end_byte()].to_string());
1145                        }
1146                        _ => {}
1147                    }
1148                }
1149
1150                if !impl_type_name.is_empty() && !method_name.is_empty() {
1151                    let func = FunctionInfo {
1152                        name: method_name,
1153                        line: method_line,
1154                        end_line: method_end_line,
1155                        parameters: if method_params.is_empty() {
1156                            Vec::new()
1157                        } else {
1158                            vec![method_params]
1159                        },
1160                        return_type: method_return_type,
1161                    };
1162                    if let Some(class) = classes.iter_mut().find(|c| c.name == impl_type_name) {
1163                        class.methods.push(func);
1164                    }
1165                }
1166            }
1167        });
1168
1169        if timed_out {
1170            return Err(ParserError::Timeout(tc.micros));
1171        }
1172
1173        Ok(())
1174    }
1175
1176    fn extract_references(
1177        source: &str,
1178        compiled: &CompiledQueries,
1179        root: Node<'_>,
1180        max_depth: Option<u32>,
1181        references: &mut Vec<ReferenceInfo>,
1182        tc: TimeoutConfig,
1183    ) -> Result<(), ParserError> {
1184        let Some(ref ref_query) = compiled.reference else {
1185            return Ok(());
1186        };
1187        let mut seen_refs = std::collections::HashSet::new();
1188        let mut timed_out = false;
1189
1190        QUERY_CURSOR.with(|c| {
1191            let mut cursor = c.borrow_mut();
1192            cursor.set_max_start_depth(None);
1193            if let Some(depth) = max_depth {
1194                cursor.set_max_start_depth(Some(depth));
1195            }
1196
1197            let mut matches = cursor.matches(ref_query, root, source.as_bytes());
1198
1199            while let Some(mat) = matches.next() {
1200                // Check if we've hit the deadline
1201                if tc.is_exceeded() {
1202                    timed_out = true;
1203                    break;
1204                }
1205
1206                for capture in mat.captures {
1207                    let capture_name = ref_query.capture_names()[capture.index as usize];
1208                    if capture_name == "type_ref" {
1209                        let node = capture.node;
1210                        let type_ref = source[node.start_byte()..node.end_byte()].to_string();
1211                        if seen_refs.insert(type_ref.clone()) {
1212                            references.push(ReferenceInfo {
1213                                symbol: type_ref,
1214                                reference_type: ReferenceType::Usage,
1215                                // location is intentionally empty here; set by the caller (analyze_file)
1216                                location: String::new(),
1217                                line: node.start_position().row + 1,
1218                            });
1219                        }
1220                    }
1221                }
1222            }
1223        });
1224
1225        if timed_out {
1226            return Err(ParserError::Timeout(tc.micros));
1227        }
1228
1229        Ok(())
1230    }
1231
1232    /// Extract impl-trait blocks from an already-parsed tree.
1233    ///
1234    /// Called during `extract()` for Rust files to avoid a second parse.
1235    /// Returns an empty vec if the query is not available.
1236    fn extract_impl_traits_from_tree(
1237        source: &str,
1238        compiled: &CompiledQueries,
1239        root: Node<'_>,
1240        tc: TimeoutConfig,
1241    ) -> Result<Vec<ImplTraitInfo>, ParserError> {
1242        let Some(query) = &compiled.impl_trait else {
1243            return Ok(vec![]);
1244        };
1245
1246        let mut results = Vec::new();
1247        let mut timed_out = false;
1248
1249        QUERY_CURSOR.with(|c| {
1250            let mut cursor = c.borrow_mut();
1251            cursor.set_max_start_depth(None);
1252
1253            let mut matches = cursor.matches(query, root, source.as_bytes());
1254
1255            while let Some(mat) = matches.next() {
1256                // Check if we've hit the deadline
1257                if tc.is_exceeded() {
1258                    timed_out = true;
1259                    break;
1260                }
1261
1262                let mut trait_name = String::new();
1263                let mut impl_type = String::new();
1264                let mut line = 0usize;
1265
1266                for capture in mat.captures {
1267                    let capture_name = query.capture_names()[capture.index as usize];
1268                    let node = capture.node;
1269                    let text = source[node.start_byte()..node.end_byte()].to_string();
1270                    match capture_name {
1271                        "trait_name" => {
1272                            trait_name = text;
1273                            line = node.start_position().row + 1;
1274                        }
1275                        "impl_type" => {
1276                            impl_type = text;
1277                        }
1278                        _ => {}
1279                    }
1280                }
1281
1282                if !trait_name.is_empty() && !impl_type.is_empty() {
1283                    results.push(ImplTraitInfo {
1284                        trait_name,
1285                        impl_type,
1286                        path: PathBuf::new(), // Path will be set by caller
1287                        line,
1288                    });
1289                }
1290            }
1291        });
1292
1293        if timed_out {
1294            return Err(ParserError::Timeout(tc.micros));
1295        }
1296
1297        Ok(results)
1298    }
1299
1300    /// Extract def-use sites (write/read locations) for a given symbol within a file.
1301    ///
1302    /// Runs the defuse query to find all definition and use sites of a symbol.
1303    /// Returns empty vec if no defuse query is available for this language.
1304    ///
1305    /// # Arguments
1306    ///
1307    /// * `source` - The source code text
1308    /// * `compiled` - Compiled tree-sitter queries
1309    /// * `root` - Root node of the AST
1310    /// * `symbol_name` - The symbol to search for (must match exactly)
1311    /// * `file_path` - Relative file path for site reporting
1312    fn extract_def_use(
1313        source: &str,
1314        compiled: &CompiledQueries,
1315        root: Node<'_>,
1316        symbol_name: &str,
1317        file_path: &str,
1318        max_depth: Option<u32>,
1319    ) -> Vec<crate::types::DefUseSite> {
1320        let Some(ref defuse_query) = compiled.defuse else {
1321            return vec![];
1322        };
1323
1324        let mut sites = Vec::new();
1325        let source_lines: Vec<&str> = source.lines().collect();
1326        // Track byte offsets that already have a write or writeread capture so
1327        // duplicate read captures for the same identifier are suppressed.
1328        let mut write_offsets = std::collections::HashSet::new();
1329
1330        QUERY_CURSOR.with(|c| {
1331            let mut cursor = c.borrow_mut();
1332            cursor.set_max_start_depth(None);
1333            if let Some(depth) = max_depth {
1334                cursor.set_max_start_depth(Some(depth));
1335            }
1336            let mut matches = cursor.matches(defuse_query, root, source.as_bytes());
1337
1338            while let Some(mat) = matches.next() {
1339                for capture in mat.captures {
1340                    let capture_name = defuse_query.capture_names()[capture.index as usize];
1341                    let node = capture.node;
1342                    let node_text = node.utf8_text(source.as_bytes()).unwrap_or_default();
1343
1344                    // Only collect if the captured node matches the target symbol
1345                    if node_text != symbol_name {
1346                        continue;
1347                    }
1348
1349                    // Classify capture by prefix
1350                    let kind = if capture_name.starts_with("write.") {
1351                        crate::types::DefUseKind::Write
1352                    } else if capture_name.starts_with("read.") {
1353                        crate::types::DefUseKind::Read
1354                    } else if capture_name.starts_with("writeread.") {
1355                        crate::types::DefUseKind::WriteRead
1356                    } else {
1357                        continue;
1358                    };
1359
1360                    let byte_offset = node.start_byte();
1361
1362                    // De-duplicate: skip read captures for offsets already captured as write/writeread
1363                    if kind == crate::types::DefUseKind::Read
1364                        && write_offsets.contains(&byte_offset)
1365                    {
1366                        continue;
1367                    }
1368                    if kind != crate::types::DefUseKind::Read {
1369                        write_offsets.insert(byte_offset);
1370                    }
1371
1372                    // Get line number (1-indexed) and center-line snippet.
1373                    // Always produce a 3-line window so snippet_one_line (index 1) is safe.
1374                    let line = node.start_position().row + 1;
1375                    let snippet = {
1376                        let row = node.start_position().row;
1377                        let last_line = source_lines.len().saturating_sub(1);
1378                        let prev = if row > 0 { row - 1 } else { 0 };
1379                        let next = std::cmp::min(row + 1, last_line);
1380                        let prev_text = if row == 0 {
1381                            ""
1382                        } else {
1383                            source_lines[prev].trim_end()
1384                        };
1385                        let cur_text = source_lines[row].trim_end();
1386                        let next_text = if row >= last_line {
1387                            ""
1388                        } else {
1389                            source_lines[next].trim_end()
1390                        };
1391                        format!("{prev_text}\n{cur_text}\n{next_text}")
1392                    };
1393
1394                    // Get enclosing function scope
1395                    let enclosing_scope = Self::enclosing_function_name(node, source);
1396
1397                    let column = node.start_position().column;
1398                    sites.push(crate::types::DefUseSite {
1399                        kind,
1400                        symbol: node_text.to_string(),
1401                        file: file_path.to_string(),
1402                        line,
1403                        column,
1404                        snippet,
1405                        enclosing_scope,
1406                    });
1407                }
1408            }
1409        });
1410
1411        sites
1412    }
1413
1414    /// Parse `source` in `language`, run the defuse query for `symbol`, and return all sites.
1415    /// Returns an empty vec if the language has no defuse query or parsing fails.
1416    pub(crate) fn extract_def_use_for_file(
1417        source: &str,
1418        language: &str,
1419        symbol: &str,
1420        file_path: &str,
1421        ast_recursion_limit: Option<usize>,
1422    ) -> Vec<crate::types::DefUseSite> {
1423        let Some(lang_info) = crate::languages::get_language_info(language) else {
1424            return vec![];
1425        };
1426        let Ok(compiled) = get_compiled_queries(language) else {
1427            return vec![];
1428        };
1429        if compiled.defuse.is_none() {
1430            return vec![];
1431        }
1432
1433        let tree = match PARSER.with(|p| {
1434            let mut parser = p.borrow_mut();
1435            if parser.set_language(&lang_info.language).is_err() {
1436                return None;
1437            }
1438            parser.parse(source, None)
1439        }) {
1440            Some(t) => t,
1441            None => return vec![],
1442        };
1443
1444        let root = tree.root_node();
1445
1446        // Convert ast_recursion_limit the same way extract() does:
1447        // 0 means unlimited (None); positive values become Some(u32).
1448        let max_depth: Option<u32> = ast_recursion_limit
1449            .filter(|&limit| limit > 0)
1450            .and_then(|limit| u32::try_from(limit).ok());
1451
1452        Self::extract_def_use(source, compiled, root, symbol, file_path, max_depth)
1453    }
1454}
1455
1456/// Extract `impl Trait for Type` blocks from Rust source.
1457///
1458/// Runs independently of `extract_references` to avoid shared deduplication state.
1459/// Returns an empty vec for non-Rust source (no error; caller decides).
1460#[must_use]
1461pub fn extract_impl_traits(source: &str, path: &Path) -> Vec<ImplTraitInfo> {
1462    let Some(lang_info) = get_language_info("rust") else {
1463        return vec![];
1464    };
1465
1466    let Ok(compiled) = get_compiled_queries("rust") else {
1467        return vec![];
1468    };
1469
1470    let Some(query) = &compiled.impl_trait else {
1471        return vec![];
1472    };
1473
1474    let Some(tree) = PARSER.with(|p| {
1475        let mut parser = p.borrow_mut();
1476        let _ = parser.set_language(&lang_info.language);
1477        parser.parse(source, None)
1478    }) else {
1479        return vec![];
1480    };
1481
1482    let root = tree.root_node();
1483    let mut results = Vec::new();
1484
1485    QUERY_CURSOR.with(|c| {
1486        let mut cursor = c.borrow_mut();
1487        cursor.set_max_start_depth(None);
1488        let mut matches = cursor.matches(query, root, source.as_bytes());
1489
1490        while let Some(mat) = matches.next() {
1491            let mut trait_name = String::new();
1492            let mut impl_type = String::new();
1493            let mut line = 0usize;
1494
1495            for capture in mat.captures {
1496                let capture_name = query.capture_names()[capture.index as usize];
1497                let node = capture.node;
1498                let text = source[node.start_byte()..node.end_byte()].to_string();
1499                match capture_name {
1500                    "trait_name" => {
1501                        trait_name = text;
1502                        line = node.start_position().row + 1;
1503                    }
1504                    "impl_type" => {
1505                        impl_type = text;
1506                    }
1507                    _ => {}
1508                }
1509            }
1510
1511            if !trait_name.is_empty() && !impl_type.is_empty() {
1512                results.push(ImplTraitInfo {
1513                    trait_name,
1514                    impl_type,
1515                    path: path.to_path_buf(),
1516                    line,
1517                });
1518            }
1519        }
1520    });
1521
1522    results
1523}
1524
1525/// Execute a custom tree-sitter query against source code.
1526///
1527/// This is the internal implementation of the public `execute_query` function.
1528pub fn execute_query_impl(
1529    language: &str,
1530    source: &str,
1531    query_str: &str,
1532) -> Result<Vec<crate::QueryCapture>, ParserError> {
1533    // Get the tree-sitter language from the language name
1534    let ts_language = crate::languages::get_ts_language(language)
1535        .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))?;
1536
1537    let mut parser = Parser::new();
1538    parser
1539        .set_language(&ts_language)
1540        .map_err(|e| ParserError::QueryError(e.to_string()))?;
1541
1542    let tree = parser
1543        .parse(source.as_bytes(), None)
1544        .ok_or_else(|| ParserError::QueryError("failed to parse source".to_string()))?;
1545
1546    let query =
1547        Query::new(&ts_language, query_str).map_err(|e| ParserError::QueryError(e.to_string()))?;
1548
1549    let source_bytes = source.as_bytes();
1550
1551    let mut captures = Vec::new();
1552    QUERY_CURSOR.with(|c| {
1553        let mut cursor = c.borrow_mut();
1554        cursor.set_max_start_depth(None);
1555        let mut matches = cursor.matches(&query, tree.root_node(), source_bytes);
1556        while let Some(m) = matches.next() {
1557            for cap in m.captures {
1558                let node = cap.node;
1559                let capture_name = query.capture_names()[cap.index as usize].to_string();
1560                let text = node.utf8_text(source_bytes).unwrap_or("").to_string();
1561                captures.push(crate::QueryCapture {
1562                    capture_name,
1563                    text,
1564                    start_line: node.start_position().row,
1565                    end_line: node.end_position().row,
1566                    start_byte: node.start_byte(),
1567                    end_byte: node.end_byte(),
1568                });
1569            }
1570        }
1571    });
1572    Ok(captures)
1573}
1574
1575// Language-feature-gated tests (require lang-rust); see also tests_unsupported below
1576#[cfg(all(test, feature = "lang-rust"))]
1577mod tests {
1578    use super::*;
1579    use std::path::Path;
1580
1581    #[test]
1582    fn test_ast_recursion_limit_zero_is_unlimited() {
1583        let source = r#"fn hello() -> u32 { 42 }"#;
1584        let result_none = SemanticExtractor::extract(source, "rust", None, None);
1585        let result_zero = SemanticExtractor::extract(source, "rust", Some(0), None);
1586        assert!(result_none.is_ok(), "extract with None failed");
1587        assert!(result_zero.is_ok(), "extract with Some(0) failed");
1588        let analysis_none = result_none.unwrap();
1589        let analysis_zero = result_zero.unwrap();
1590        assert!(
1591            analysis_none.functions.len() >= 1,
1592            "extract with None should find at least one function in the test source"
1593        );
1594        assert_eq!(
1595            analysis_none.functions.len(),
1596            analysis_zero.functions.len(),
1597            "ast_recursion_limit=0 should behave identically to unset (unlimited)"
1598        );
1599    }
1600
1601    #[test]
1602    fn test_rust_use_as_imports() {
1603        // Arrange
1604        let source = "use std::io as stdio;";
1605        // Act
1606        let result = SemanticExtractor::extract(source, "rust", None, None).unwrap();
1607        // Assert: alias "stdio" is captured as an import item
1608        assert!(
1609            result
1610                .imports
1611                .iter()
1612                .any(|imp| imp.items.iter().any(|i| i == "stdio")),
1613            "expected import alias 'stdio' in {:?}",
1614            result.imports
1615        );
1616    }
1617
1618    #[test]
1619    fn test_rust_use_as_clause_plain_identifier() {
1620        // Arrange: use_as_clause with plain identifier (no scoped_identifier)
1621        // exercises the _ => prefix.to_string() arm
1622        let source = "use io as stdio;";
1623        // Act
1624        let result = SemanticExtractor::extract(source, "rust", None, None).unwrap();
1625        // Assert: alias "stdio" is captured as an import item
1626        assert!(
1627            result
1628                .imports
1629                .iter()
1630                .any(|imp| imp.items.iter().any(|i| i == "stdio")),
1631            "expected import alias 'stdio' from plain identifier in {:?}",
1632            result.imports
1633        );
1634    }
1635
1636    #[test]
1637    fn test_rust_scoped_use_with_prefix() {
1638        // Arrange: scoped_use_list with non-empty prefix
1639        let source = "use std::{io::Read, io::Write};";
1640        // Act
1641        let result = SemanticExtractor::extract(source, "rust", None, None).unwrap();
1642        // Assert: both Read and Write appear as items with std::io module
1643        let items: Vec<String> = result
1644            .imports
1645            .iter()
1646            .filter(|imp| imp.module.starts_with("std::io"))
1647            .flat_map(|imp| imp.items.clone())
1648            .collect();
1649        assert!(
1650            items.contains(&"Read".to_string()) && items.contains(&"Write".to_string()),
1651            "expected 'Read' and 'Write' items under module with std::io, got {:?}",
1652            result.imports
1653        );
1654    }
1655
1656    #[test]
1657    fn test_rust_scoped_use_imports() {
1658        // Arrange
1659        let source = "use std::{fs, io};";
1660        // Act
1661        let result = SemanticExtractor::extract(source, "rust", None, None).unwrap();
1662        // Assert: both "fs" and "io" appear as import items under module "std"
1663        let items: Vec<&str> = result
1664            .imports
1665            .iter()
1666            .filter(|imp| imp.module == "std")
1667            .flat_map(|imp| imp.items.iter().map(|s| s.as_str()))
1668            .collect();
1669        assert!(
1670            items.contains(&"fs") && items.contains(&"io"),
1671            "expected 'fs' and 'io' items under module 'std', got {:?}",
1672            items
1673        );
1674    }
1675
1676    #[test]
1677    fn test_rust_wildcard_imports() {
1678        // Arrange
1679        let source = "use std::io::*;";
1680        // Act
1681        let result = SemanticExtractor::extract(source, "rust", None, None).unwrap();
1682        // Assert: wildcard import with module "std::io"
1683        let wildcard = result
1684            .imports
1685            .iter()
1686            .find(|imp| imp.module == "std::io" && imp.items == vec!["*"]);
1687        assert!(
1688            wildcard.is_some(),
1689            "expected wildcard import with module 'std::io', got {:?}",
1690            result.imports
1691        );
1692    }
1693
1694    #[test]
1695    fn test_extract_impl_traits_standalone() {
1696        // Arrange: source with a simple impl Trait for Type
1697        let source = r#"
1698struct Foo;
1699trait Display {}
1700impl Display for Foo {}
1701"#;
1702        // Act
1703        let results = extract_impl_traits(source, Path::new("test.rs"));
1704        // Assert
1705        assert_eq!(
1706            results.len(),
1707            1,
1708            "expected one impl trait, got {:?}",
1709            results
1710        );
1711        assert_eq!(results[0].trait_name, "Display");
1712        assert_eq!(results[0].impl_type, "Foo");
1713    }
1714
1715    #[cfg(target_pointer_width = "64")]
1716    #[test]
1717    fn test_ast_recursion_limit_overflow() {
1718        // Arrange: limit larger than u32::MAX triggers a ParseError on 64-bit targets
1719        let source = "fn foo() {}";
1720        let big_limit = usize::try_from(u32::MAX).unwrap() + 1;
1721        // Act
1722        let result = SemanticExtractor::extract(source, "rust", Some(big_limit), None);
1723        // Assert
1724        assert!(
1725            matches!(result, Err(ParserError::ParseError(_))),
1726            "expected ParseError for oversized limit, got {:?}",
1727            result
1728        );
1729    }
1730
1731    #[test]
1732    fn test_ast_recursion_limit_some() {
1733        // Arrange: ast_recursion_limit with Some(depth) to exercise max_depth Some branch
1734        let source = r#"fn hello() -> u32 { 42 }"#;
1735        // Act
1736        let result = SemanticExtractor::extract(source, "rust", Some(5), None);
1737        // Assert: should succeed without error and extract functions
1738        assert!(result.is_ok(), "extract with Some(5) failed: {:?}", result);
1739        let analysis = result.unwrap();
1740        assert!(
1741            analysis.functions.len() >= 1,
1742            "expected at least one function with depth limit 5"
1743        );
1744    }
1745
1746    #[test]
1747    fn test_extract_def_use_for_file_finds_write_and_read() {
1748        // Arrange
1749        let source = r#"
1750fn main() {
1751    let count = 0;
1752    println!("{}", count);
1753}
1754"#;
1755        // Act
1756        let sites = SemanticExtractor::extract_def_use_for_file(
1757            source,
1758            "rust",
1759            "count",
1760            "src/main.rs",
1761            None,
1762        );
1763
1764        // Assert
1765        assert!(
1766            !sites.is_empty(),
1767            "expected at least one def-use site for 'count'"
1768        );
1769        let has_write = sites
1770            .iter()
1771            .any(|s| s.kind == crate::types::DefUseKind::Write);
1772        let has_read = sites
1773            .iter()
1774            .any(|s| s.kind == crate::types::DefUseKind::Read);
1775        assert!(has_write, "expected a write site for 'count'");
1776        assert!(has_read, "expected a read site for 'count'");
1777        assert_eq!(sites[0].file, "src/main.rs");
1778    }
1779
1780    #[test]
1781    fn test_extract_def_use_for_file_no_match_returns_empty() {
1782        // Arrange
1783        let source = "fn foo() { let x = 1; }";
1784
1785        // Act
1786        let sites = SemanticExtractor::extract_def_use_for_file(
1787            source,
1788            "rust",
1789            "nonexistent_symbol",
1790            "src/lib.rs",
1791            None,
1792        );
1793
1794        // Assert
1795        assert!(sites.is_empty(), "expected empty for nonexistent symbol");
1796    }
1797}
1798
1799// Language-feature-gated tests for Python
1800#[cfg(all(test, feature = "lang-python"))]
1801mod tests_python {
1802    use super::*;
1803
1804    #[test]
1805    fn test_python_relative_import() {
1806        // Arrange: relative import (from . import foo)
1807        let source = "from . import foo\n";
1808        // Act
1809        let result = SemanticExtractor::extract(source, "python", None, None).unwrap();
1810        // Assert: relative import should be captured
1811        let relative = result.imports.iter().find(|imp| imp.module.contains("."));
1812        assert!(
1813            relative.is_some(),
1814            "expected relative import in {:?}",
1815            result.imports
1816        );
1817    }
1818
1819    #[test]
1820    fn test_python_aliased_import() {
1821        // Arrange: aliased import (from os import path as p)
1822        // Note: tree-sitter-python extracts "path" (the original name), not the alias "p"
1823        let source = "from os import path as p\n";
1824        // Act
1825        let result = SemanticExtractor::extract(source, "python", None, None).unwrap();
1826        // Assert: "path" should be in items (alias is captured separately by aliased_import node)
1827        let path_import = result
1828            .imports
1829            .iter()
1830            .find(|imp| imp.module == "os" && imp.items.iter().any(|i| i == "path"));
1831        assert!(
1832            path_import.is_some(),
1833            "expected import 'path' from module 'os' in {:?}",
1834            result.imports
1835        );
1836    }
1837
1838    #[test]
1839    fn test_parse_no_timeout_when_none() {
1840        // Arrange: simple Rust source with no deadline
1841        let source = r#"fn hello() -> u32 { 42 }"#;
1842        // Act: extract with deadline=None (no timeout)
1843        let result = SemanticExtractor::extract(source, "rust", None, None);
1844        // Assert: should succeed normally
1845        assert!(result.is_ok(), "extract with deadline=None should succeed");
1846        let analysis = result.unwrap();
1847        assert!(
1848            analysis.functions.len() >= 1,
1849            "should find at least one function"
1850        );
1851    }
1852
1853    #[test]
1854    fn test_parse_timeout_triggers_error() {
1855        // Arrange: simple Rust source with a very short timeout (1 microsecond)
1856        let source = r#"fn hello() -> u32 { 42 }"#;
1857        // Act: extract with a very short timeout that will expire immediately
1858        let result = SemanticExtractor::extract(source, "rust", None, Some(1u64));
1859        // Assert: should return a Timeout error
1860        assert!(
1861            matches!(result, Err(ParserError::Timeout(_))),
1862            "expected Timeout error, got {:?}",
1863            result
1864        );
1865    }
1866}
1867
1868// Tests that do not require any language feature gate
1869#[cfg(test)]
1870mod tests_unsupported {
1871    use super::*;
1872
1873    #[test]
1874    fn test_element_extractor_unsupported_language() {
1875        // Arrange + Act
1876        let result = ElementExtractor::extract_with_depth("x = 1", "cobol");
1877        // Assert
1878        assert!(
1879            matches!(result, Err(ParserError::UnsupportedLanguage(ref lang)) if lang == "cobol"),
1880            "expected UnsupportedLanguage error, got {:?}",
1881            result
1882        );
1883    }
1884
1885    #[test]
1886    fn test_semantic_extractor_unsupported_language() {
1887        // Arrange + Act
1888        let result = SemanticExtractor::extract("x = 1", "cobol", None, None);
1889        // Assert
1890        assert!(
1891            matches!(result, Err(ParserError::UnsupportedLanguage(ref lang)) if lang == "cobol"),
1892            "expected UnsupportedLanguage error, got {:?}",
1893            result
1894        );
1895    }
1896}