Skip to main content

aptu_coder_core/
parser.rs

1// SPDX-FileCopyrightText: 2026 aptu-coder contributors
2// SPDX-License-Identifier: Apache-2.0
3//! Tree-sitter-based parser for extracting semantic structure from source code.
4//!
5//! This module provides language-agnostic parsing using tree-sitter queries to extract
6//! functions, classes, imports, references, and other semantic elements from source files.
7//! Two main extractors handle different use cases:
8//!
9//! - [`ElementExtractor`]: Quick extraction of function and class counts.
10//! - [`SemanticExtractor`]: Detailed semantic analysis with calls, imports, and references.
11
12use crate::languages::{get_language_info, try_regex_fallback};
13use crate::types::{
14    CallInfo, ClassInfo, FunctionInfo, ImplTraitInfo, ImportInfo, ReferenceInfo, ReferenceType,
15    SemanticAnalysis,
16};
17use std::cell::RefCell;
18use std::collections::HashMap;
19use std::path::{Path, PathBuf};
20use std::sync::LazyLock;
21use thiserror::Error;
22use tracing::instrument;
23use tree_sitter::{Node, Parser, Query, QueryCursor, StreamingIterator};
24
25#[derive(Debug, Error)]
26#[non_exhaustive]
27pub enum ParserError {
28    #[error("Unsupported language: {0}")]
29    UnsupportedLanguage(String),
30    #[error("Failed to parse file: {0}")]
31    ParseError(String),
32    #[error("Invalid UTF-8 in file")]
33    InvalidUtf8,
34    #[error("Query error: {0}")]
35    QueryError(String),
36    #[error("Parse timeout exceeded: {0} microseconds")]
37    Timeout(u64),
38}
39
40/// Groups a query deadline with the configured timeout duration for use in private extract helpers.
41/// Avoids threading two separate values through every helper signature.
42#[derive(Clone, Copy)]
43struct TimeoutConfig {
44    /// Absolute deadline; `None` means no timeout.
45    pub deadline: Option<std::time::Instant>,
46    /// The configured timeout in microseconds (used in `ParserError::Timeout`).
47    pub micros: u64,
48}
49
50impl TimeoutConfig {
51    fn new(timeout_micros: Option<u64>) -> Self {
52        let deadline = timeout_micros
53            .map(|us| std::time::Instant::now() + std::time::Duration::from_micros(us));
54        Self {
55            deadline,
56            micros: timeout_micros.unwrap_or(0),
57        }
58    }
59
60    /// Returns `true` if the deadline has been reached.
61    fn is_exceeded(self) -> bool {
62        self.deadline
63            .is_some_and(|d| std::time::Instant::now() >= d)
64    }
65}
66
67/// Compiled tree-sitter queries for a language.
68/// Stores all query types: mandatory (element, call) and optional (import, impl, reference).
69struct CompiledQueries {
70    pub element: Query,
71    pub call: Query,
72    pub import: Option<Query>,
73    pub impl_block: Option<Query>,
74    pub reference: Option<Query>,
75    pub impl_trait: Option<Query>,
76    pub defuse: Option<Query>,
77}
78
79/// Build compiled queries for a given language.
80///
81/// The `map_err` closures inside are only reachable if a hardcoded query string is
82/// invalid, which cannot happen at runtime -- exclude them from coverage instrumentation.
83#[cfg_attr(coverage_nightly, coverage(off))]
84fn build_compiled_queries(
85    lang_info: &crate::languages::LanguageInfo,
86) -> Result<CompiledQueries, ParserError> {
87    let element = Query::new(&lang_info.language, lang_info.element_query).map_err(|e| {
88        ParserError::QueryError(format!(
89            "Failed to compile element query for {}: {}",
90            lang_info.name, e
91        ))
92    })?;
93
94    let call = Query::new(&lang_info.language, lang_info.call_query).map_err(|e| {
95        ParserError::QueryError(format!(
96            "Failed to compile call query for {}: {}",
97            lang_info.name, e
98        ))
99    })?;
100
101    let import = if let Some(import_query_str) = lang_info.import_query {
102        Some(
103            Query::new(&lang_info.language, import_query_str).map_err(|e| {
104                ParserError::QueryError(format!(
105                    "Failed to compile import query for {}: {}",
106                    lang_info.name, e
107                ))
108            })?,
109        )
110    } else {
111        None
112    };
113
114    let impl_block = if let Some(impl_query_str) = lang_info.impl_query {
115        Some(
116            Query::new(&lang_info.language, impl_query_str).map_err(|e| {
117                ParserError::QueryError(format!(
118                    "Failed to compile impl query for {}: {}",
119                    lang_info.name, e
120                ))
121            })?,
122        )
123    } else {
124        None
125    };
126
127    let reference = if let Some(ref_query_str) = lang_info.reference_query {
128        Some(Query::new(&lang_info.language, ref_query_str).map_err(|e| {
129            ParserError::QueryError(format!(
130                "Failed to compile reference query for {}: {}",
131                lang_info.name, e
132            ))
133        })?)
134    } else {
135        None
136    };
137
138    let impl_trait = if let Some(impl_trait_query_str) = lang_info.impl_trait_query {
139        Some(
140            Query::new(&lang_info.language, impl_trait_query_str).map_err(|e| {
141                ParserError::QueryError(format!(
142                    "Failed to compile impl_trait query for {}: {}",
143                    lang_info.name, e
144                ))
145            })?,
146        )
147    } else {
148        None
149    };
150
151    let defuse = if let Some(defuse_query_str) = lang_info.defuse_query {
152        Some(
153            Query::new(&lang_info.language, defuse_query_str).map_err(|e| {
154                ParserError::QueryError(format!(
155                    "Failed to compile defuse query for {}: {}",
156                    lang_info.name, e
157                ))
158            })?,
159        )
160    } else {
161        None
162    };
163
164    Ok(CompiledQueries {
165        element,
166        call,
167        import,
168        impl_block,
169        reference,
170        impl_trait,
171        defuse,
172    })
173}
174
175/// Initialize the query cache with compiled queries for all supported languages.
176///
177/// Excluded from coverage: the `Err` arm is unreachable because `build_compiled_queries`
178/// only fails on invalid hardcoded query strings.
179#[cfg_attr(coverage_nightly, coverage(off))]
180fn init_query_cache() -> HashMap<&'static str, CompiledQueries> {
181    let mut cache = HashMap::new();
182
183    for lang_name in crate::lang::supported_languages() {
184        if let Some(lang_info) = get_language_info(lang_name) {
185            match build_compiled_queries(&lang_info) {
186                Ok(compiled) => {
187                    cache.insert(*lang_name, compiled);
188                }
189                Err(e) => {
190                    tracing::error!(
191                        "Failed to compile queries for language {}: {}",
192                        lang_name,
193                        e
194                    );
195                }
196            }
197        }
198    }
199
200    cache
201}
202
203/// Lazily initialized cache of compiled queries per language.
204static QUERY_CACHE: LazyLock<HashMap<&'static str, CompiledQueries>> =
205    LazyLock::new(init_query_cache);
206
207/// Get compiled queries for a language from the cache.
208fn get_compiled_queries(language: &str) -> Result<&'static CompiledQueries, ParserError> {
209    QUERY_CACHE
210        .get(language)
211        .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))
212}
213
214thread_local! {
215    static PARSER: RefCell<Parser> = RefCell::new(Parser::new());
216    static QUERY_CURSOR: RefCell<QueryCursor> = RefCell::new(QueryCursor::new());
217}
218
219/// Canonical API for extracting element counts from source code.
220pub struct ElementExtractor;
221
222impl ElementExtractor {
223    /// Extract function and class counts from source code.
224    ///
225    /// # Errors
226    ///
227    /// Returns `ParserError::UnsupportedLanguage` if the language is not recognized.
228    /// Returns `ParserError::ParseError` if the source code cannot be parsed.
229    /// Returns `ParserError::QueryError` if the tree-sitter query fails.
230    #[instrument(skip_all, fields(language))]
231    pub fn extract_with_depth(source: &str, language: &str) -> Result<(usize, usize), ParserError> {
232        let lang_info = get_language_info(language)
233            .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))?;
234
235        let tree = PARSER.with(|p| {
236            let mut parser = p.borrow_mut();
237            parser
238                .set_language(&lang_info.language)
239                .map_err(|e| ParserError::ParseError(format!("Failed to set language: {e}")))?;
240            parser
241                .parse(source, None)
242                .ok_or_else(|| ParserError::ParseError("Failed to parse".to_string()))
243        })?;
244
245        let compiled = get_compiled_queries(language)?;
246
247        let (function_count, class_count) = QUERY_CURSOR.with(|c| {
248            let mut cursor = c.borrow_mut();
249            cursor.set_max_start_depth(None);
250            let mut function_count = 0;
251            let mut class_count = 0;
252
253            let mut matches =
254                cursor.matches(&compiled.element, tree.root_node(), source.as_bytes());
255            while let Some(mat) = matches.next() {
256                for capture in mat.captures {
257                    let capture_name = compiled.element.capture_names()[capture.index as usize];
258                    match capture_name {
259                        "function" => function_count += 1,
260                        "class" => class_count += 1,
261                        _ => {}
262                    }
263                }
264            }
265            (function_count, class_count)
266        });
267
268        tracing::debug!(language = %language, functions = function_count, classes = class_count, "parse complete");
269
270        Ok((function_count, class_count))
271    }
272}
273
274/// Recursively extract `ImportInfo` entries from a use-clause node, respecting all Rust
275/// use-declaration forms (`scoped_identifier`, `scoped_use_list`, `use_list`,
276/// `use_as_clause`, `use_wildcard`, bare `identifier`).
277#[allow(clippy::too_many_lines)] // exhaustive match over all supported Rust use-clause forms; splitting harms readability
278fn extract_imports_from_node(
279    node: &Node,
280    source: &str,
281    prefix: &str,
282    line: usize,
283    imports: &mut Vec<ImportInfo>,
284) {
285    match node.kind() {
286        // Simple identifier: `use foo;` or an item inside `{foo, bar}`
287        "identifier" | "self" | "super" | "crate" => {
288            let name = source[node.start_byte()..node.end_byte()].to_string();
289            imports.push(ImportInfo {
290                module: prefix.to_string(),
291                items: vec![name],
292                line,
293            });
294        }
295        // Qualified path: `std::collections::HashMap`
296        "scoped_identifier" => {
297            let item = node
298                .child_by_field_name("name")
299                .map(|n| source[n.start_byte()..n.end_byte()].to_string())
300                .unwrap_or_default();
301            let module = node.child_by_field_name("path").map_or_else(
302                || prefix.to_string(),
303                |p| {
304                    let path_text = source[p.start_byte()..p.end_byte()].to_string();
305                    if prefix.is_empty() {
306                        path_text
307                    } else {
308                        format!("{prefix}::{path_text}")
309                    }
310                },
311            );
312            if !item.is_empty() {
313                imports.push(ImportInfo {
314                    module,
315                    items: vec![item],
316                    line,
317                });
318            }
319        }
320        // `std::{io, fs}` — path prefix followed by a brace list
321        "scoped_use_list" => {
322            let new_prefix = node.child_by_field_name("path").map_or_else(
323                || prefix.to_string(),
324                |p| {
325                    let path_text = source[p.start_byte()..p.end_byte()].to_string();
326                    if prefix.is_empty() {
327                        path_text
328                    } else {
329                        format!("{prefix}::{path_text}")
330                    }
331                },
332            );
333            if let Some(list) = node.child_by_field_name("list") {
334                extract_imports_from_node(&list, source, &new_prefix, line, imports);
335            }
336        }
337        // `{HashMap, HashSet}` — brace-enclosed list of items
338        "use_list" => {
339            let mut cursor = node.walk();
340            for child in node.children(&mut cursor) {
341                match child.kind() {
342                    "{" | "}" | "," => {}
343                    _ => extract_imports_from_node(&child, source, prefix, line, imports),
344                }
345            }
346        }
347        // `std::io::*` — glob import
348        "use_wildcard" => {
349            let text = source[node.start_byte()..node.end_byte()].to_string();
350            let module = if let Some(stripped) = text.strip_suffix("::*") {
351                if prefix.is_empty() {
352                    stripped.to_string()
353                } else {
354                    format!("{prefix}::{stripped}")
355                }
356            } else {
357                prefix.to_string()
358            };
359            imports.push(ImportInfo {
360                module,
361                items: vec!["*".to_string()],
362                line,
363            });
364        }
365        // `io as stdio` or `std::io as stdio`
366        "use_as_clause" => {
367            let alias = node
368                .child_by_field_name("alias")
369                .map(|n| source[n.start_byte()..n.end_byte()].to_string())
370                .unwrap_or_default();
371            let module = if let Some(path_node) = node.child_by_field_name("path") {
372                match path_node.kind() {
373                    "scoped_identifier" => path_node.child_by_field_name("path").map_or_else(
374                        || prefix.to_string(),
375                        |p| {
376                            let p_text = source[p.start_byte()..p.end_byte()].to_string();
377                            if prefix.is_empty() {
378                                p_text
379                            } else {
380                                format!("{prefix}::{p_text}")
381                            }
382                        },
383                    ),
384                    _ => prefix.to_string(),
385                }
386            } else {
387                prefix.to_string()
388            };
389            if !alias.is_empty() {
390                imports.push(ImportInfo {
391                    module,
392                    items: vec![alias],
393                    line,
394                });
395            }
396        }
397        // Python import_from_statement: `from module import name` or `from . import *`
398        "import_from_statement" => {
399            extract_python_import_from(node, source, line, imports);
400        }
401        // Fallback for non-Rust import nodes: capture full text as module
402        _ => {
403            let text = source[node.start_byte()..node.end_byte()]
404                .trim()
405                .to_string();
406            if !text.is_empty() {
407                imports.push(ImportInfo {
408                    module: text,
409                    items: vec![],
410                    line,
411                });
412            }
413        }
414    }
415}
416
417/// Extract an item name from a `dotted_name` or `aliased_import` child node.
418fn extract_import_item_name(child: &Node, source: &str) -> Option<String> {
419    match child.kind() {
420        "dotted_name" => {
421            let name = source[child.start_byte()..child.end_byte()]
422                .trim()
423                .to_string();
424            if name.is_empty() { None } else { Some(name) }
425        }
426        "aliased_import" => child.child_by_field_name("name").and_then(|n| {
427            let name = source[n.start_byte()..n.end_byte()].trim().to_string();
428            if name.is_empty() { None } else { Some(name) }
429        }),
430        _ => None,
431    }
432}
433
434/// Collect wildcard/named imports from an `import_list` node or from direct named children.
435fn collect_import_items(
436    node: &Node,
437    source: &str,
438    is_wildcard: &mut bool,
439    items: &mut Vec<String>,
440) {
441    // Prefer import_list child (wraps `from x import a, b`)
442    if let Some(import_list) = node.child_by_field_name("import_list") {
443        let mut cursor = import_list.walk();
444        for child in import_list.named_children(&mut cursor) {
445            if child.kind() == "wildcard_import" {
446                *is_wildcard = true;
447            } else if let Some(name) = extract_import_item_name(&child, source) {
448                items.push(name);
449            }
450        }
451        return;
452    }
453    // No import_list: single-name or wildcard as direct child (skip first named child = module_name)
454    let mut cursor = node.walk();
455    let mut first = true;
456    for child in node.named_children(&mut cursor) {
457        if first {
458            first = false;
459            continue;
460        }
461        if child.kind() == "wildcard_import" {
462            *is_wildcard = true;
463        } else if let Some(name) = extract_import_item_name(&child, source) {
464            items.push(name);
465        }
466    }
467}
468
469/// Handle Python `import_from_statement` node.
470fn extract_python_import_from(
471    node: &Node,
472    source: &str,
473    line: usize,
474    imports: &mut Vec<ImportInfo>,
475) {
476    let module = if let Some(m) = node.child_by_field_name("module_name") {
477        source[m.start_byte()..m.end_byte()].trim().to_string()
478    } else if let Some(r) = node.child_by_field_name("relative_import") {
479        source[r.start_byte()..r.end_byte()].trim().to_string()
480    } else {
481        String::new()
482    };
483
484    let mut is_wildcard = false;
485    let mut items = Vec::new();
486    collect_import_items(node, source, &mut is_wildcard, &mut items);
487
488    if !module.is_empty() {
489        imports.push(ImportInfo {
490            module,
491            items: if is_wildcard {
492                vec!["*".to_string()]
493            } else {
494                items
495            },
496            line,
497        });
498    }
499}
500
501pub struct SemanticExtractor;
502
503impl SemanticExtractor {
504    /// Extract semantic information from source code.
505    ///
506    /// # Errors
507    ///
508    /// Returns `ParserError::UnsupportedLanguage` if the language is not recognized.
509    /// Returns `ParserError::ParseError` if the source code cannot be parsed.
510    /// Returns `ParserError::QueryError` if the tree-sitter query fails.
511    #[instrument(skip_all, fields(language))]
512    pub fn extract(
513        source: &str,
514        language: &str,
515        ast_recursion_limit: Option<usize>,
516        timeout_micros: Option<u64>,
517    ) -> Result<SemanticAnalysis, ParserError> {
518        let tc = TimeoutConfig::new(timeout_micros);
519
520        // Check deadline at the start before any parsing work.
521        if tc.is_exceeded() {
522            return Err(ParserError::Timeout(tc.micros));
523        }
524
525        // Try regex-based fallback for formats without a tree-sitter grammar.
526        if let Some(analysis) = try_regex_fallback(source, language) {
527            return Ok(analysis);
528        }
529
530        let lang_info = get_language_info(language)
531            .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))?;
532
533        let tree = PARSER.with(|p| {
534            let mut parser = p.borrow_mut();
535            parser
536                .set_language(&lang_info.language)
537                .map_err(|e| ParserError::ParseError(format!("Failed to set language: {e}")))?;
538            parser
539                .parse(source, None)
540                .ok_or_else(|| ParserError::ParseError("Failed to parse".to_string()))
541        })?;
542
543        // 0 is not a useful depth (visits root node only, returning zero results).
544        // Treat 0 as None (unlimited). See #339.
545        let max_depth: Option<u32> = ast_recursion_limit
546            .filter(|&limit| limit > 0)
547            .map(|limit| {
548                u32::try_from(limit).map_err(|_| {
549                    ParserError::ParseError(format!(
550                        "ast_recursion_limit {} exceeds maximum supported value {}",
551                        limit,
552                        u32::MAX
553                    ))
554                })
555            })
556            .transpose()?;
557
558        let compiled = get_compiled_queries(language)?;
559        let root = tree.root_node();
560
561        let mut functions = Vec::new();
562        let mut classes = Vec::new();
563        let mut imports = Vec::new();
564        let mut references = Vec::new();
565        let mut call_frequency = HashMap::new();
566        let mut calls = Vec::new();
567
568        Self::extract_elements(
569            source,
570            compiled,
571            root,
572            max_depth,
573            &lang_info,
574            &mut functions,
575            &mut classes,
576            tc,
577        )?;
578        Self::extract_calls(
579            source,
580            compiled,
581            root,
582            max_depth,
583            &mut calls,
584            &mut call_frequency,
585            tc,
586        )?;
587        Self::extract_imports(source, compiled, root, max_depth, &mut imports, tc)?;
588        Self::extract_impl_methods(source, compiled, root, max_depth, &mut classes, tc)?;
589        Self::extract_references(source, compiled, root, max_depth, &mut references, tc)?;
590
591        // Extract impl-trait blocks for Rust files (empty for other languages)
592        let impl_traits = if language == "rust" {
593            Self::extract_impl_traits_from_tree(source, compiled, root, tc)?
594        } else {
595            vec![]
596        };
597
598        tracing::debug!(language = %language, functions = functions.len(), classes = classes.len(), imports = imports.len(), references = references.len(), calls = calls.len(), impl_traits = impl_traits.len(), "extraction complete");
599
600        Ok(SemanticAnalysis {
601            functions,
602            classes,
603            imports,
604            references,
605            call_frequency,
606            calls,
607            impl_traits,
608            def_use_sites: Vec::new(),
609        })
610    }
611
612    /// Fast path for extracting module metadata: functions and imports only.
613    ///
614    /// This method is optimized for the `analyze_module` tool, which only needs function
615    /// definitions and import statements. It skips the more expensive extractors (calls,
616    /// references, impl traits) and returns a lightweight `ModuleInfo` directly.
617    ///
618    /// # Arguments
619    ///
620    /// * `source` - The source code as a string
621    /// * `language` - The programming language (e.g., "rust", "python")
622    /// * `timeout` - Optional timeout configuration in microseconds
623    ///
624    /// # Returns
625    ///
626    /// A `ModuleInfo` containing the file name, line count, language, functions, and imports.
627    ///
628    /// # Errors
629    ///
630    /// Returns a `ParserError` if:
631    /// * `ParserError::Timeout` - The operation exceeds the specified timeout
632    /// * `ParserError::UnsupportedLanguage` - The language is not supported
633    /// * `ParserError::ParseError` - Tree-sitter parsing fails
634    #[instrument(skip_all, fields(language))]
635    pub fn extract_module_info(
636        source: &str,
637        language: &str,
638        timeout_micros: Option<u64>,
639    ) -> Result<crate::types::ModuleInfo, ParserError> {
640        let tc = TimeoutConfig::new(timeout_micros);
641
642        // Check deadline at the start before any parsing work.
643        if tc.is_exceeded() {
644            return Err(ParserError::Timeout(tc.micros));
645        }
646
647        let lang_info = get_language_info(language)
648            .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))?;
649
650        let tree = PARSER.with(|p| {
651            let mut parser = p.borrow_mut();
652            parser
653                .set_language(&lang_info.language)
654                .map_err(|e| ParserError::ParseError(format!("Failed to set language: {e}")))?;
655            parser
656                .parse(source, None)
657                .ok_or_else(|| ParserError::ParseError("Failed to parse".to_string()))
658        })?;
659
660        // Check deadline after parsing
661        if tc.is_exceeded() {
662            return Err(ParserError::Timeout(tc.micros));
663        }
664
665        let compiled = get_compiled_queries(language)?;
666        let root = tree.root_node();
667
668        let mut functions = Vec::new();
669        let mut classes = Vec::new();
670        let mut imports = Vec::new();
671
672        // Extract functions and classes
673        Self::extract_elements(
674            source,
675            compiled,
676            root,
677            None,
678            &lang_info,
679            &mut functions,
680            &mut classes,
681            tc,
682        )?;
683
684        // Check deadline after extract_elements
685        if tc.is_exceeded() {
686            return Err(ParserError::Timeout(tc.micros));
687        }
688
689        // Extract imports
690        Self::extract_imports(source, compiled, root, None, &mut imports, tc)?;
691
692        // Check deadline after extract_imports
693        if tc.is_exceeded() {
694            return Err(ParserError::Timeout(tc.micros));
695        }
696
697        // Map to ModuleInfo
698        let module_functions = functions
699            .into_iter()
700            .map(|f| crate::types::ModuleFunctionInfo {
701                name: f.name,
702                line: f.line,
703            })
704            .collect();
705
706        let module_imports = imports
707            .into_iter()
708            .map(|i| crate::types::ModuleImportInfo {
709                module: i.module,
710                items: i.items,
711            })
712            .collect();
713
714        let line_count = source.lines().count();
715
716        Ok(crate::types::ModuleInfo::new(
717            String::new(), // Will be set by caller
718            line_count,
719            language.to_string(),
720            module_functions,
721            module_imports,
722        ))
723    }
724
725    // Extracts function and class definitions from a pre-parsed syntax tree.
726    #[allow(clippy::too_many_arguments)]
727    fn extract_elements(
728        source: &str,
729        compiled: &CompiledQueries,
730        root: Node<'_>,
731        max_depth: Option<u32>,
732        lang_info: &crate::languages::LanguageInfo,
733        functions: &mut Vec<FunctionInfo>,
734        classes: &mut Vec<ClassInfo>,
735        tc: TimeoutConfig,
736    ) -> Result<(), ParserError> {
737        let mut seen_functions = std::collections::HashSet::new();
738        let mut timed_out = false;
739
740        QUERY_CURSOR.with(|c| {
741            let mut cursor = c.borrow_mut();
742            cursor.set_max_start_depth(None);
743            if let Some(depth) = max_depth {
744                cursor.set_max_start_depth(Some(depth));
745            }
746
747            let mut matches = cursor.matches(&compiled.element, root, source.as_bytes());
748
749            while let Some(mat) = matches.next() {
750                // Check if we've hit the deadline
751                if tc.is_exceeded() {
752                    timed_out = true;
753                    break;
754                }
755                let mut func_node: Option<Node> = None;
756                let mut func_name_text: Option<String> = None;
757                let mut class_node: Option<Node> = None;
758                let mut class_name_text: Option<String> = None;
759
760                for capture in mat.captures {
761                    let capture_name = compiled.element.capture_names()[capture.index as usize];
762                    let node = capture.node;
763                    match capture_name {
764                        "function" => func_node = Some(node),
765                        "func_name" | "method_name" => {
766                            func_name_text =
767                                Some(source[node.start_byte()..node.end_byte()].to_string());
768                        }
769                        "class" => class_node = Some(node),
770                        "class_name" | "type_name" => {
771                            class_name_text =
772                                Some(source[node.start_byte()..node.end_byte()].to_string());
773                        }
774                        _ => {}
775                    }
776                }
777
778                if let Some(func_node) = func_node {
779                    // When a plain function_definition is nested inside a template_declaration
780                    // or decorated_definition, it is also matched by the explicit wrapper pattern.
781                    // Skip it here to avoid duplicates; the wrapper match will emit it.
782                    let parent_kind = func_node.parent().map(|p| p.kind());
783                    let parent_is_wrapper = parent_kind
784                        .map(|k| k == "template_declaration" || k == "decorated_definition")
785                        .unwrap_or(false);
786                    if func_node.kind() == "function_definition" && parent_is_wrapper {
787                        // Handled by the template_declaration or decorated_definition @function match instead.
788                    } else {
789                        // Resolve template_declaration or decorated_definition to inner function_definition
790                        // for declarator/field walks. The captured node may be a wrapper.
791                        let func_def = if func_node.kind() == "template_declaration" {
792                            let mut cursor = func_node.walk();
793                            func_node
794                                .children(&mut cursor)
795                                .find(|n| n.kind() == "function_definition")
796                                .unwrap_or(func_node)
797                        } else if func_node.kind() == "decorated_definition" {
798                            func_node
799                                .child_by_field_name("definition")
800                                .unwrap_or(func_node)
801                        } else {
802                            func_node
803                        };
804
805                        let name = func_name_text
806                            .or_else(|| {
807                                func_def
808                                    .child_by_field_name("name")
809                                    .map(|n| source[n.start_byte()..n.end_byte()].to_string())
810                            })
811                            .unwrap_or_default();
812
813                        let func_key = (name.clone(), func_node.start_position().row);
814                        if !name.is_empty() && seen_functions.insert(func_key) {
815                            // For C/C++: parameters live under declarator -> parameters.
816                            // For other languages: parameters is a direct child field.
817                            let params = func_def
818                                .child_by_field_name("declarator")
819                                .and_then(|d| d.child_by_field_name("parameters"))
820                                .or_else(|| func_def.child_by_field_name("parameters"))
821                                .map(|p| source[p.start_byte()..p.end_byte()].to_string())
822                                .unwrap_or_default();
823
824                            // Try "type" first (C/C++ uses this field for the return type);
825                            // fall back to "return_type" (Rust, Python, TypeScript, etc.).
826                            let return_type = func_def
827                                .child_by_field_name("type")
828                                .or_else(|| func_def.child_by_field_name("return_type"))
829                                .map(|r| source[r.start_byte()..r.end_byte()].to_string());
830
831                            // Walk backward through contiguous attribute_item siblings
832                            // to find the first attribute line (Rust only).
833                            let first_line = if func_node.kind() == "function_item" {
834                                let mut attrs: Vec<Node> = Vec::new();
835                                let mut sib = func_node.prev_named_sibling();
836                                while let Some(s) = sib {
837                                    if s.kind() == "attribute_item" {
838                                        attrs.push(s);
839                                        sib = s.prev_named_sibling();
840                                    } else {
841                                        break;
842                                    }
843                                }
844                                attrs
845                                    .last()
846                                    .map(|n| n.start_position().row + 1)
847                                    .unwrap_or_else(|| func_node.start_position().row + 1)
848                            } else {
849                                func_node.start_position().row + 1
850                            };
851
852                            functions.push(FunctionInfo {
853                                name,
854                                line: first_line,
855                                end_line: func_node.end_position().row + 1,
856                                parameters: if params.is_empty() {
857                                    Vec::new()
858                                } else {
859                                    vec![params]
860                                },
861                                return_type,
862                            });
863                        }
864                    }
865                }
866
867                if let Some(class_node) = class_node {
868                    let name = class_name_text
869                        .or_else(|| {
870                            class_node
871                                .child_by_field_name("name")
872                                .map(|n| source[n.start_byte()..n.end_byte()].to_string())
873                        })
874                        .unwrap_or_default();
875
876                    if !name.is_empty() {
877                        let inherits = if let Some(handler) = lang_info.extract_inheritance {
878                            handler(&class_node, source)
879                        } else {
880                            Vec::new()
881                        };
882                        classes.push(ClassInfo {
883                            name,
884                            line: class_node.start_position().row + 1,
885                            end_line: class_node.end_position().row + 1,
886                            methods: Vec::new(),
887                            fields: Vec::new(),
888                            inherits,
889                        });
890                    }
891                }
892            }
893        });
894
895        if timed_out {
896            return Err(ParserError::Timeout(tc.micros));
897        }
898
899        Ok(())
900    }
901
902    /// Returns the name of the enclosing function/method/subroutine for a given AST node,
903    /// by walking ancestors and matching all language-specific function container kinds.
904    fn enclosing_function_name(mut node: tree_sitter::Node<'_>, source: &str) -> Option<String> {
905        let mut depth = 0u32;
906        while let Some(parent) = node.parent() {
907            depth += 1;
908            // Cap at 64 hops: real function nesting rarely exceeds ~10 levels; 64 is a generous
909            // upper bound that guards against pathological/malformed ASTs without false negatives
910            // on legitimate code. Returns None (treated as <module>) when the cap is hit.
911            if depth > 64 {
912                return None;
913            }
914            let name_node = match parent.kind() {
915                // Direct name field: Rust, Python, Go, Java, TypeScript/TSX
916                "function_item"
917                | "method_item"
918                | "function_definition"
919                | "function_declaration"
920                | "method_declaration"
921                | "method_definition" => parent.child_by_field_name("name"),
922                // Fortran subroutine: name is inside subroutine_statement child
923                "subroutine" => {
924                    let mut cursor = parent.walk();
925                    parent
926                        .children(&mut cursor)
927                        .find(|c| c.kind() == "subroutine_statement")
928                        .and_then(|s| s.child_by_field_name("name"))
929                }
930                // Fortran function: name is inside function_statement child
931                "function" => {
932                    let mut cursor = parent.walk();
933                    parent
934                        .children(&mut cursor)
935                        .find(|c| c.kind() == "function_statement")
936                        .and_then(|s| s.child_by_field_name("name"))
937                }
938                _ => {
939                    node = parent;
940                    continue;
941                }
942            };
943            return name_node.map(|n| source[n.start_byte()..n.end_byte()].to_string());
944        }
945        // The loop exits here only when no parent was found (i.e., we reached the tree root
946        // without finding a function container). If the depth cap fired, we returned None early
947        // above. Nothing to assert here.
948        None
949    }
950
951    #[allow(clippy::too_many_arguments)]
952    fn extract_calls(
953        source: &str,
954        compiled: &CompiledQueries,
955        root: Node<'_>,
956        max_depth: Option<u32>,
957        calls: &mut Vec<CallInfo>,
958        call_frequency: &mut HashMap<String, usize>,
959        tc: TimeoutConfig,
960    ) -> Result<(), ParserError> {
961        let mut timed_out = false;
962
963        QUERY_CURSOR.with(|c| {
964            let mut cursor = c.borrow_mut();
965            cursor.set_max_start_depth(None);
966            if let Some(depth) = max_depth {
967                cursor.set_max_start_depth(Some(depth));
968            }
969
970            let mut matches = cursor.matches(&compiled.call, root, source.as_bytes());
971
972            while let Some(mat) = matches.next() {
973                // Check if we've hit the deadline
974                if tc.is_exceeded() {
975                    timed_out = true;
976                    break;
977                }
978                for capture in mat.captures {
979                    let capture_name = compiled.call.capture_names()[capture.index as usize];
980                    if capture_name != "call" {
981                        continue;
982                    }
983                    let node = capture.node;
984                    let call_name = source[node.start_byte()..node.end_byte()].to_string();
985                    *call_frequency.entry(call_name.clone()).or_insert(0) += 1;
986
987                    let caller = Self::enclosing_function_name(node, source)
988                        .unwrap_or_else(|| "<module>".to_string());
989
990                    let mut arg_count = None;
991                    let mut arg_node = node;
992                    let mut hop = 0u32;
993                    let mut cap_hit = false;
994                    while let Some(parent) = arg_node.parent() {
995                        hop += 1;
996                        // Bounded parent traversal: cap at 16 hops to guard against pathological
997                        // walks on malformed/degenerate trees. Real call-expression nesting is
998                        // shallow (typically 1-3 levels). When the cap is hit we stop searching and
999                        // leave arg_count as None; the caller is still recorded, just without
1000                        // argument-count information.
1001                        if hop > 16 {
1002                            cap_hit = true;
1003                            break;
1004                        }
1005                        if parent.kind() == "call_expression" {
1006                            if let Some(args) = parent.child_by_field_name("arguments") {
1007                                arg_count = Some(args.named_child_count());
1008                            }
1009                            break;
1010                        }
1011                        arg_node = parent;
1012                    }
1013                    debug_assert!(
1014                        !cap_hit,
1015                        "extract_calls: parent traversal cap reached (hop > 16)"
1016                    );
1017
1018                    calls.push(CallInfo {
1019                        caller,
1020                        callee: call_name,
1021                        line: node.start_position().row + 1,
1022                        column: node.start_position().column,
1023                        arg_count,
1024                    });
1025                }
1026            }
1027        });
1028
1029        if timed_out {
1030            return Err(ParserError::Timeout(tc.micros));
1031        }
1032
1033        Ok(())
1034    }
1035
1036    // Extracts import statements from a pre-parsed syntax tree.
1037    fn extract_imports(
1038        source: &str,
1039        compiled: &CompiledQueries,
1040        root: Node<'_>,
1041        max_depth: Option<u32>,
1042        imports: &mut Vec<ImportInfo>,
1043        tc: TimeoutConfig,
1044    ) -> Result<(), ParserError> {
1045        let Some(ref import_query) = compiled.import else {
1046            return Ok(());
1047        };
1048        let mut timed_out = false;
1049
1050        QUERY_CURSOR.with(|c| {
1051            let mut cursor = c.borrow_mut();
1052            cursor.set_max_start_depth(None);
1053            if let Some(depth) = max_depth {
1054                cursor.set_max_start_depth(Some(depth));
1055            }
1056
1057            let mut matches = cursor.matches(import_query, root, source.as_bytes());
1058
1059            while let Some(mat) = matches.next() {
1060                // Check if we've hit the deadline
1061                if tc.is_exceeded() {
1062                    timed_out = true;
1063                    break;
1064                }
1065                for capture in mat.captures {
1066                    let capture_name = import_query.capture_names()[capture.index as usize];
1067                    if capture_name == "import_path" {
1068                        let node = capture.node;
1069                        let line = node.start_position().row + 1;
1070                        extract_imports_from_node(&node, source, "", line, imports);
1071                    }
1072                }
1073            }
1074        });
1075
1076        if timed_out {
1077            return Err(ParserError::Timeout(tc.micros));
1078        }
1079
1080        Ok(())
1081    }
1082
1083    fn extract_impl_methods(
1084        source: &str,
1085        compiled: &CompiledQueries,
1086        root: Node<'_>,
1087        max_depth: Option<u32>,
1088        classes: &mut [ClassInfo],
1089        tc: TimeoutConfig,
1090    ) -> Result<(), ParserError> {
1091        let Some(ref impl_query) = compiled.impl_block else {
1092            return Ok(());
1093        };
1094        let mut timed_out = false;
1095
1096        QUERY_CURSOR.with(|c| {
1097            let mut cursor = c.borrow_mut();
1098            cursor.set_max_start_depth(None);
1099            if let Some(depth) = max_depth {
1100                cursor.set_max_start_depth(Some(depth));
1101            }
1102
1103            let mut matches = cursor.matches(impl_query, root, source.as_bytes());
1104
1105            while let Some(mat) = matches.next() {
1106                // Check if we've hit the deadline
1107                if tc.is_exceeded() {
1108                    timed_out = true;
1109                    break;
1110                }
1111
1112                let mut impl_type_name = String::new();
1113                let mut method_name = String::new();
1114                let mut method_line = 0usize;
1115                let mut method_end_line = 0usize;
1116                let mut method_params = String::new();
1117                let mut method_return_type: Option<String> = None;
1118
1119                for capture in mat.captures {
1120                    let capture_name = impl_query.capture_names()[capture.index as usize];
1121                    let node = capture.node;
1122                    match capture_name {
1123                        "impl_type" => {
1124                            impl_type_name = source[node.start_byte()..node.end_byte()].to_string();
1125                        }
1126                        "method_name" => {
1127                            method_name = source[node.start_byte()..node.end_byte()].to_string();
1128                        }
1129                        "method_params" => {
1130                            method_params = source[node.start_byte()..node.end_byte()].to_string();
1131                        }
1132                        "method" => {
1133                            let mut method_attrs: Vec<Node> = Vec::new();
1134                            let mut msib = node.prev_named_sibling();
1135                            while let Some(s) = msib {
1136                                if s.kind() == "attribute_item" {
1137                                    method_attrs.push(s);
1138                                    msib = s.prev_named_sibling();
1139                                } else {
1140                                    break;
1141                                }
1142                            }
1143                            method_line = method_attrs
1144                                .last()
1145                                .map(|n| n.start_position().row + 1)
1146                                .unwrap_or_else(|| node.start_position().row + 1);
1147                            method_end_line = node.end_position().row + 1;
1148                            method_return_type = node
1149                                .child_by_field_name("return_type")
1150                                .map(|r| source[r.start_byte()..r.end_byte()].to_string());
1151                        }
1152                        _ => {}
1153                    }
1154                }
1155
1156                if !impl_type_name.is_empty() && !method_name.is_empty() {
1157                    let func = FunctionInfo {
1158                        name: method_name,
1159                        line: method_line,
1160                        end_line: method_end_line,
1161                        parameters: if method_params.is_empty() {
1162                            Vec::new()
1163                        } else {
1164                            vec![method_params]
1165                        },
1166                        return_type: method_return_type,
1167                    };
1168                    if let Some(class) = classes.iter_mut().find(|c| c.name == impl_type_name) {
1169                        class.methods.push(func);
1170                    }
1171                }
1172            }
1173        });
1174
1175        if timed_out {
1176            return Err(ParserError::Timeout(tc.micros));
1177        }
1178
1179        Ok(())
1180    }
1181
1182    fn extract_references(
1183        source: &str,
1184        compiled: &CompiledQueries,
1185        root: Node<'_>,
1186        max_depth: Option<u32>,
1187        references: &mut Vec<ReferenceInfo>,
1188        tc: TimeoutConfig,
1189    ) -> Result<(), ParserError> {
1190        let Some(ref ref_query) = compiled.reference else {
1191            return Ok(());
1192        };
1193        let mut seen_refs = std::collections::HashSet::new();
1194        let mut timed_out = false;
1195
1196        QUERY_CURSOR.with(|c| {
1197            let mut cursor = c.borrow_mut();
1198            cursor.set_max_start_depth(None);
1199            if let Some(depth) = max_depth {
1200                cursor.set_max_start_depth(Some(depth));
1201            }
1202
1203            let mut matches = cursor.matches(ref_query, root, source.as_bytes());
1204
1205            while let Some(mat) = matches.next() {
1206                // Check if we've hit the deadline
1207                if tc.is_exceeded() {
1208                    timed_out = true;
1209                    break;
1210                }
1211
1212                for capture in mat.captures {
1213                    let capture_name = ref_query.capture_names()[capture.index as usize];
1214                    if capture_name == "type_ref" {
1215                        let node = capture.node;
1216                        let type_ref = source[node.start_byte()..node.end_byte()].to_string();
1217                        if seen_refs.insert(type_ref.clone()) {
1218                            references.push(ReferenceInfo {
1219                                symbol: type_ref,
1220                                reference_type: ReferenceType::Usage,
1221                                // location is intentionally empty here; set by the caller (analyze_file)
1222                                location: String::new(),
1223                                line: node.start_position().row + 1,
1224                            });
1225                        }
1226                    }
1227                }
1228            }
1229        });
1230
1231        if timed_out {
1232            return Err(ParserError::Timeout(tc.micros));
1233        }
1234
1235        Ok(())
1236    }
1237
1238    /// Extract impl-trait blocks from an already-parsed tree.
1239    ///
1240    /// Called during `extract()` for Rust files to avoid a second parse.
1241    /// Returns an empty vec if the query is not available.
1242    fn extract_impl_traits_from_tree(
1243        source: &str,
1244        compiled: &CompiledQueries,
1245        root: Node<'_>,
1246        tc: TimeoutConfig,
1247    ) -> Result<Vec<ImplTraitInfo>, ParserError> {
1248        let Some(query) = &compiled.impl_trait else {
1249            return Ok(vec![]);
1250        };
1251
1252        let mut results = Vec::new();
1253        let mut timed_out = false;
1254
1255        QUERY_CURSOR.with(|c| {
1256            let mut cursor = c.borrow_mut();
1257            cursor.set_max_start_depth(None);
1258
1259            let mut matches = cursor.matches(query, root, source.as_bytes());
1260
1261            while let Some(mat) = matches.next() {
1262                // Check if we've hit the deadline
1263                if tc.is_exceeded() {
1264                    timed_out = true;
1265                    break;
1266                }
1267
1268                let mut trait_name = String::new();
1269                let mut impl_type = String::new();
1270                let mut line = 0usize;
1271
1272                for capture in mat.captures {
1273                    let capture_name = query.capture_names()[capture.index as usize];
1274                    let node = capture.node;
1275                    let text = source[node.start_byte()..node.end_byte()].to_string();
1276                    match capture_name {
1277                        "trait_name" => {
1278                            trait_name = text;
1279                            line = node.start_position().row + 1;
1280                        }
1281                        "impl_type" => {
1282                            impl_type = text;
1283                        }
1284                        _ => {}
1285                    }
1286                }
1287
1288                if !trait_name.is_empty() && !impl_type.is_empty() {
1289                    results.push(ImplTraitInfo {
1290                        trait_name,
1291                        impl_type,
1292                        path: PathBuf::new(), // Path will be set by caller
1293                        line,
1294                    });
1295                }
1296            }
1297        });
1298
1299        if timed_out {
1300            return Err(ParserError::Timeout(tc.micros));
1301        }
1302
1303        Ok(results)
1304    }
1305
1306    /// Extract def-use sites (write/read locations) for a given symbol within a file.
1307    ///
1308    /// Runs the defuse query to find all definition and use sites of a symbol.
1309    /// Returns empty vec if no defuse query is available for this language.
1310    ///
1311    /// # Arguments
1312    ///
1313    /// * `source` - The source code text
1314    /// * `compiled` - Compiled tree-sitter queries
1315    /// * `root` - Root node of the AST
1316    /// * `symbol_name` - The symbol to search for (must match exactly)
1317    /// * `file_path` - Relative file path for site reporting
1318    fn extract_def_use(
1319        source: &str,
1320        compiled: &CompiledQueries,
1321        root: Node<'_>,
1322        symbol_name: &str,
1323        file_path: &str,
1324        max_depth: Option<u32>,
1325    ) -> Vec<crate::types::DefUseSite> {
1326        let Some(ref defuse_query) = compiled.defuse else {
1327            return vec![];
1328        };
1329
1330        let mut sites = Vec::new();
1331        let source_lines: Vec<&str> = source.lines().collect();
1332        // Track byte offsets that already have a write or writeread capture so
1333        // duplicate read captures for the same identifier are suppressed.
1334        let mut write_offsets = std::collections::HashSet::new();
1335
1336        QUERY_CURSOR.with(|c| {
1337            let mut cursor = c.borrow_mut();
1338            cursor.set_max_start_depth(None);
1339            if let Some(depth) = max_depth {
1340                cursor.set_max_start_depth(Some(depth));
1341            }
1342            let mut matches = cursor.matches(defuse_query, root, source.as_bytes());
1343
1344            while let Some(mat) = matches.next() {
1345                for capture in mat.captures {
1346                    let capture_name = defuse_query.capture_names()[capture.index as usize];
1347                    let node = capture.node;
1348                    let node_text = node.utf8_text(source.as_bytes()).unwrap_or_default();
1349
1350                    // Only collect if the captured node matches the target symbol
1351                    if node_text != symbol_name {
1352                        continue;
1353                    }
1354
1355                    // Classify capture by prefix
1356                    let kind = if capture_name.starts_with("write.") {
1357                        crate::types::DefUseKind::Write
1358                    } else if capture_name.starts_with("read.") {
1359                        crate::types::DefUseKind::Read
1360                    } else if capture_name.starts_with("writeread.") {
1361                        crate::types::DefUseKind::WriteRead
1362                    } else {
1363                        continue;
1364                    };
1365
1366                    let byte_offset = node.start_byte();
1367
1368                    // De-duplicate: skip read captures for offsets already captured as write/writeread
1369                    if kind == crate::types::DefUseKind::Read
1370                        && write_offsets.contains(&byte_offset)
1371                    {
1372                        continue;
1373                    }
1374                    if kind != crate::types::DefUseKind::Read {
1375                        write_offsets.insert(byte_offset);
1376                    }
1377
1378                    // Get line number (1-indexed) and center-line snippet.
1379                    // Always produce a 3-line window so snippet_one_line (index 1) is safe.
1380                    let line = node.start_position().row + 1;
1381                    let snippet = {
1382                        let row = node.start_position().row;
1383                        let last_line = source_lines.len().saturating_sub(1);
1384                        let prev = if row > 0 { row - 1 } else { 0 };
1385                        let next = std::cmp::min(row + 1, last_line);
1386                        let prev_text = if row == 0 {
1387                            ""
1388                        } else {
1389                            source_lines[prev].trim_end()
1390                        };
1391                        let cur_text = source_lines[row].trim_end();
1392                        let next_text = if row >= last_line {
1393                            ""
1394                        } else {
1395                            source_lines[next].trim_end()
1396                        };
1397                        format!("{prev_text}\n{cur_text}\n{next_text}")
1398                    };
1399
1400                    // Get enclosing function scope
1401                    let enclosing_scope = Self::enclosing_function_name(node, source);
1402
1403                    let column = node.start_position().column;
1404                    sites.push(crate::types::DefUseSite {
1405                        kind,
1406                        symbol: node_text.to_string(),
1407                        file: file_path.to_string(),
1408                        line,
1409                        column,
1410                        snippet,
1411                        enclosing_scope,
1412                    });
1413                }
1414            }
1415        });
1416
1417        sites
1418    }
1419
1420    /// Parse `source` in `language`, run the defuse query for `symbol`, and return all sites.
1421    /// Returns an empty vec if the language has no defuse query or parsing fails.
1422    pub(crate) fn extract_def_use_for_file(
1423        source: &str,
1424        language: &str,
1425        symbol: &str,
1426        file_path: &str,
1427        ast_recursion_limit: Option<usize>,
1428    ) -> Vec<crate::types::DefUseSite> {
1429        let Some(lang_info) = crate::languages::get_language_info(language) else {
1430            return vec![];
1431        };
1432        let Ok(compiled) = get_compiled_queries(language) else {
1433            return vec![];
1434        };
1435        if compiled.defuse.is_none() {
1436            return vec![];
1437        }
1438
1439        let tree = match PARSER.with(|p| {
1440            let mut parser = p.borrow_mut();
1441            if parser.set_language(&lang_info.language).is_err() {
1442                return None;
1443            }
1444            parser.parse(source, None)
1445        }) {
1446            Some(t) => t,
1447            None => return vec![],
1448        };
1449
1450        let root = tree.root_node();
1451
1452        // Convert ast_recursion_limit the same way extract() does:
1453        // 0 means unlimited (None); positive values become Some(u32).
1454        let max_depth: Option<u32> = ast_recursion_limit
1455            .filter(|&limit| limit > 0)
1456            .and_then(|limit| u32::try_from(limit).ok());
1457
1458        Self::extract_def_use(source, compiled, root, symbol, file_path, max_depth)
1459    }
1460}
1461
1462/// Extract `impl Trait for Type` blocks from Rust source.
1463///
1464/// Runs independently of `extract_references` to avoid shared deduplication state.
1465/// Returns an empty vec for non-Rust source (no error; caller decides).
1466#[must_use]
1467pub fn extract_impl_traits(source: &str, path: &Path) -> Vec<ImplTraitInfo> {
1468    let Some(lang_info) = get_language_info("rust") else {
1469        return vec![];
1470    };
1471
1472    let Ok(compiled) = get_compiled_queries("rust") else {
1473        return vec![];
1474    };
1475
1476    let Some(query) = &compiled.impl_trait else {
1477        return vec![];
1478    };
1479
1480    let Some(tree) = PARSER.with(|p| {
1481        let mut parser = p.borrow_mut();
1482        let _ = parser.set_language(&lang_info.language);
1483        parser.parse(source, None)
1484    }) else {
1485        return vec![];
1486    };
1487
1488    let root = tree.root_node();
1489    let mut results = Vec::new();
1490
1491    QUERY_CURSOR.with(|c| {
1492        let mut cursor = c.borrow_mut();
1493        cursor.set_max_start_depth(None);
1494        let mut matches = cursor.matches(query, root, source.as_bytes());
1495
1496        while let Some(mat) = matches.next() {
1497            let mut trait_name = String::new();
1498            let mut impl_type = String::new();
1499            let mut line = 0usize;
1500
1501            for capture in mat.captures {
1502                let capture_name = query.capture_names()[capture.index as usize];
1503                let node = capture.node;
1504                let text = source[node.start_byte()..node.end_byte()].to_string();
1505                match capture_name {
1506                    "trait_name" => {
1507                        trait_name = text;
1508                        line = node.start_position().row + 1;
1509                    }
1510                    "impl_type" => {
1511                        impl_type = text;
1512                    }
1513                    _ => {}
1514                }
1515            }
1516
1517            if !trait_name.is_empty() && !impl_type.is_empty() {
1518                results.push(ImplTraitInfo {
1519                    trait_name,
1520                    impl_type,
1521                    path: path.to_path_buf(),
1522                    line,
1523                });
1524            }
1525        }
1526    });
1527
1528    results
1529}
1530
1531/// Execute a custom tree-sitter query against source code.
1532///
1533/// This is the internal implementation of the public `execute_query` function.
1534pub(crate) fn execute_query_impl(
1535    language: &str,
1536    source: &str,
1537    query_str: &str,
1538) -> Result<Vec<crate::QueryCapture>, ParserError> {
1539    // Get the tree-sitter language from the language name
1540    let ts_language = crate::languages::get_ts_language(language)
1541        .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))?;
1542
1543    let mut parser = Parser::new();
1544    parser
1545        .set_language(&ts_language)
1546        .map_err(|e| ParserError::QueryError(e.to_string()))?;
1547
1548    let tree = parser
1549        .parse(source.as_bytes(), None)
1550        .ok_or_else(|| ParserError::QueryError("failed to parse source".to_string()))?;
1551
1552    let query =
1553        Query::new(&ts_language, query_str).map_err(|e| ParserError::QueryError(e.to_string()))?;
1554
1555    let source_bytes = source.as_bytes();
1556
1557    let mut captures = Vec::new();
1558    QUERY_CURSOR.with(|c| {
1559        let mut cursor = c.borrow_mut();
1560        cursor.set_max_start_depth(None);
1561        let mut matches = cursor.matches(&query, tree.root_node(), source_bytes);
1562        while let Some(m) = matches.next() {
1563            for cap in m.captures {
1564                let node = cap.node;
1565                let capture_name = query.capture_names()[cap.index as usize].to_string();
1566                let text = node.utf8_text(source_bytes).unwrap_or("").to_string();
1567                captures.push(crate::QueryCapture {
1568                    capture_name,
1569                    text,
1570                    start_line: node.start_position().row,
1571                    end_line: node.end_position().row,
1572                    start_byte: node.start_byte(),
1573                    end_byte: node.end_byte(),
1574                });
1575            }
1576        }
1577    });
1578    Ok(captures)
1579}
1580
1581// Language-feature-gated tests (require lang-rust); see also tests_unsupported below
1582#[cfg(all(test, feature = "lang-rust"))]
1583mod tests {
1584    use super::*;
1585    use std::path::Path;
1586
1587    #[test]
1588    fn test_ast_recursion_limit_zero_is_unlimited() {
1589        let source = r#"fn hello() -> u32 { 42 }"#;
1590        let result_none = SemanticExtractor::extract(source, "rust", None, None);
1591        let result_zero = SemanticExtractor::extract(source, "rust", Some(0), None);
1592        assert!(result_none.is_ok(), "extract with None failed");
1593        assert!(result_zero.is_ok(), "extract with Some(0) failed");
1594        let analysis_none = result_none.unwrap();
1595        let analysis_zero = result_zero.unwrap();
1596        assert!(
1597            analysis_none.functions.len() >= 1,
1598            "extract with None should find at least one function in the test source"
1599        );
1600        assert_eq!(
1601            analysis_none.functions.len(),
1602            analysis_zero.functions.len(),
1603            "ast_recursion_limit=0 should behave identically to unset (unlimited)"
1604        );
1605    }
1606
1607    #[test]
1608    fn test_rust_use_as_imports() {
1609        // Arrange
1610        let source = "use std::io as stdio;";
1611        // Act
1612        let result = SemanticExtractor::extract(source, "rust", None, None).unwrap();
1613        // Assert: alias "stdio" is captured as an import item
1614        assert!(
1615            result
1616                .imports
1617                .iter()
1618                .any(|imp| imp.items.iter().any(|i| i == "stdio")),
1619            "expected import alias 'stdio' in {:?}",
1620            result.imports
1621        );
1622    }
1623
1624    #[test]
1625    fn test_rust_use_as_clause_plain_identifier() {
1626        // Arrange: use_as_clause with plain identifier (no scoped_identifier)
1627        // exercises the _ => prefix.to_string() arm
1628        let source = "use io as stdio;";
1629        // Act
1630        let result = SemanticExtractor::extract(source, "rust", None, None).unwrap();
1631        // Assert: alias "stdio" is captured as an import item
1632        assert!(
1633            result
1634                .imports
1635                .iter()
1636                .any(|imp| imp.items.iter().any(|i| i == "stdio")),
1637            "expected import alias 'stdio' from plain identifier in {:?}",
1638            result.imports
1639        );
1640    }
1641
1642    #[test]
1643    fn test_rust_scoped_use_with_prefix() {
1644        // Arrange: scoped_use_list with non-empty prefix
1645        let source = "use std::{io::Read, io::Write};";
1646        // Act
1647        let result = SemanticExtractor::extract(source, "rust", None, None).unwrap();
1648        // Assert: both Read and Write appear as items with std::io module
1649        let items: Vec<String> = result
1650            .imports
1651            .iter()
1652            .filter(|imp| imp.module.starts_with("std::io"))
1653            .flat_map(|imp| imp.items.clone())
1654            .collect();
1655        assert!(
1656            items.contains(&"Read".to_string()) && items.contains(&"Write".to_string()),
1657            "expected 'Read' and 'Write' items under module with std::io, got {:?}",
1658            result.imports
1659        );
1660    }
1661
1662    #[test]
1663    fn test_rust_scoped_use_imports() {
1664        // Arrange
1665        let source = "use std::{fs, io};";
1666        // Act
1667        let result = SemanticExtractor::extract(source, "rust", None, None).unwrap();
1668        // Assert: both "fs" and "io" appear as import items under module "std"
1669        let items: Vec<&str> = result
1670            .imports
1671            .iter()
1672            .filter(|imp| imp.module == "std")
1673            .flat_map(|imp| imp.items.iter().map(|s| s.as_str()))
1674            .collect();
1675        assert!(
1676            items.contains(&"fs") && items.contains(&"io"),
1677            "expected 'fs' and 'io' items under module 'std', got {:?}",
1678            items
1679        );
1680    }
1681
1682    #[test]
1683    fn test_rust_wildcard_imports() {
1684        // Arrange
1685        let source = "use std::io::*;";
1686        // Act
1687        let result = SemanticExtractor::extract(source, "rust", None, None).unwrap();
1688        // Assert: wildcard import with module "std::io"
1689        let wildcard = result
1690            .imports
1691            .iter()
1692            .find(|imp| imp.module == "std::io" && imp.items == vec!["*"]);
1693        assert!(
1694            wildcard.is_some(),
1695            "expected wildcard import with module 'std::io', got {:?}",
1696            result.imports
1697        );
1698    }
1699
1700    #[test]
1701    fn test_extract_impl_traits_standalone() {
1702        // Arrange: source with a simple impl Trait for Type
1703        let source = r#"
1704struct Foo;
1705trait Display {}
1706impl Display for Foo {}
1707"#;
1708        // Act
1709        let results = extract_impl_traits(source, Path::new("test.rs"));
1710        // Assert
1711        assert_eq!(
1712            results.len(),
1713            1,
1714            "expected one impl trait, got {:?}",
1715            results
1716        );
1717        assert_eq!(results[0].trait_name, "Display");
1718        assert_eq!(results[0].impl_type, "Foo");
1719    }
1720
1721    #[cfg(target_pointer_width = "64")]
1722    #[test]
1723    fn test_ast_recursion_limit_overflow() {
1724        // Arrange: limit larger than u32::MAX triggers a ParseError on 64-bit targets
1725        let source = "fn foo() {}";
1726        let big_limit = usize::try_from(u32::MAX).unwrap() + 1;
1727        // Act
1728        let result = SemanticExtractor::extract(source, "rust", Some(big_limit), None);
1729        // Assert
1730        assert!(
1731            matches!(result, Err(ParserError::ParseError(_))),
1732            "expected ParseError for oversized limit, got {:?}",
1733            result
1734        );
1735    }
1736
1737    #[test]
1738    fn test_ast_recursion_limit_some() {
1739        // Arrange: ast_recursion_limit with Some(depth) to exercise max_depth Some branch
1740        let source = r#"fn hello() -> u32 { 42 }"#;
1741        // Act
1742        let result = SemanticExtractor::extract(source, "rust", Some(5), None);
1743        // Assert: should succeed without error and extract functions
1744        assert!(result.is_ok(), "extract with Some(5) failed: {:?}", result);
1745        let analysis = result.unwrap();
1746        assert!(
1747            analysis.functions.len() >= 1,
1748            "expected at least one function with depth limit 5"
1749        );
1750    }
1751
1752    #[test]
1753    fn test_extract_def_use_for_file_finds_write_and_read() {
1754        // Arrange
1755        let source = r#"
1756fn main() {
1757    let count = 0;
1758    println!("{}", count);
1759}
1760"#;
1761        // Act
1762        let sites = SemanticExtractor::extract_def_use_for_file(
1763            source,
1764            "rust",
1765            "count",
1766            "src/main.rs",
1767            None,
1768        );
1769
1770        // Assert
1771        assert!(
1772            !sites.is_empty(),
1773            "expected at least one def-use site for 'count'"
1774        );
1775        let has_write = sites
1776            .iter()
1777            .any(|s| s.kind == crate::types::DefUseKind::Write);
1778        let has_read = sites
1779            .iter()
1780            .any(|s| s.kind == crate::types::DefUseKind::Read);
1781        assert!(has_write, "expected a write site for 'count'");
1782        assert!(has_read, "expected a read site for 'count'");
1783        assert_eq!(sites[0].file, "src/main.rs");
1784    }
1785
1786    #[test]
1787    fn test_extract_def_use_for_file_no_match_returns_empty() {
1788        // Arrange
1789        let source = "fn foo() { let x = 1; }";
1790
1791        // Act
1792        let sites = SemanticExtractor::extract_def_use_for_file(
1793            source,
1794            "rust",
1795            "nonexistent_symbol",
1796            "src/lib.rs",
1797            None,
1798        );
1799
1800        // Assert
1801        assert!(sites.is_empty(), "expected empty for nonexistent symbol");
1802    }
1803}
1804
1805// Language-feature-gated tests for Python
1806#[cfg(all(test, feature = "lang-python"))]
1807mod tests_python {
1808    use super::*;
1809
1810    #[test]
1811    fn test_python_relative_import() {
1812        // Arrange: relative import (from . import foo)
1813        let source = "from . import foo\n";
1814        // Act
1815        let result = SemanticExtractor::extract(source, "python", None, None).unwrap();
1816        // Assert: relative import should be captured
1817        let relative = result.imports.iter().find(|imp| imp.module.contains("."));
1818        assert!(
1819            relative.is_some(),
1820            "expected relative import in {:?}",
1821            result.imports
1822        );
1823    }
1824
1825    #[test]
1826    fn test_python_aliased_import() {
1827        // Arrange: aliased import (from os import path as p)
1828        // Note: tree-sitter-python extracts "path" (the original name), not the alias "p"
1829        let source = "from os import path as p\n";
1830        // Act
1831        let result = SemanticExtractor::extract(source, "python", None, None).unwrap();
1832        // Assert: "path" should be in items (alias is captured separately by aliased_import node)
1833        let path_import = result
1834            .imports
1835            .iter()
1836            .find(|imp| imp.module == "os" && imp.items.iter().any(|i| i == "path"));
1837        assert!(
1838            path_import.is_some(),
1839            "expected import 'path' from module 'os' in {:?}",
1840            result.imports
1841        );
1842    }
1843
1844    #[test]
1845    fn test_parse_no_timeout_when_none() {
1846        // Arrange: simple Rust source with no deadline
1847        let source = r#"fn hello() -> u32 { 42 }"#;
1848        // Act: extract with deadline=None (no timeout)
1849        let result = SemanticExtractor::extract(source, "rust", None, None);
1850        // Assert: should succeed normally
1851        assert!(result.is_ok(), "extract with deadline=None should succeed");
1852        let analysis = result.unwrap();
1853        assert!(
1854            analysis.functions.len() >= 1,
1855            "should find at least one function"
1856        );
1857    }
1858
1859    #[test]
1860    fn test_parse_timeout_triggers_error() {
1861        // Arrange: simple Rust source with a very short timeout (1 microsecond)
1862        let source = r#"fn hello() -> u32 { 42 }"#;
1863        // Act: extract with a very short timeout that will expire immediately
1864        let result = SemanticExtractor::extract(source, "rust", None, Some(1u64));
1865        // Assert: should return a Timeout error
1866        assert!(
1867            matches!(result, Err(ParserError::Timeout(_))),
1868            "expected Timeout error, got {:?}",
1869            result
1870        );
1871    }
1872}
1873
1874// Tests that do not require any language feature gate
1875#[cfg(test)]
1876mod tests_unsupported {
1877    use super::*;
1878
1879    #[test]
1880    fn test_element_extractor_unsupported_language() {
1881        // Arrange + Act
1882        let result = ElementExtractor::extract_with_depth("x = 1", "cobol");
1883        // Assert
1884        assert!(
1885            matches!(result, Err(ParserError::UnsupportedLanguage(ref lang)) if lang == "cobol"),
1886            "expected UnsupportedLanguage error, got {:?}",
1887            result
1888        );
1889    }
1890
1891    #[test]
1892    fn test_semantic_extractor_unsupported_language() {
1893        // Arrange + Act
1894        let result = SemanticExtractor::extract("x = 1", "cobol", None, None);
1895        // Assert
1896        assert!(
1897            matches!(result, Err(ParserError::UnsupportedLanguage(ref lang)) if lang == "cobol"),
1898            "expected UnsupportedLanguage error, got {:?}",
1899            result
1900        );
1901    }
1902}