Skip to main content

code_analyze_core/
parser.rs

1//! Tree-sitter-based parser for extracting semantic structure from source code.
2//!
3//! This module provides language-agnostic parsing using tree-sitter queries to extract
4//! functions, classes, imports, references, and other semantic elements from source files.
5//! Two main extractors handle different use cases:
6//!
7//! - [`ElementExtractor`]: Quick extraction of function and class counts.
8//! - [`SemanticExtractor`]: Detailed semantic analysis with calls, imports, and references.
9
10use crate::languages::get_language_info;
11use crate::types::{
12    CallInfo, ClassInfo, FunctionInfo, ImplTraitInfo, ImportInfo, ReferenceInfo, ReferenceType,
13    SemanticAnalysis,
14};
15use std::cell::RefCell;
16use std::collections::HashMap;
17use std::path::{Path, PathBuf};
18use std::sync::LazyLock;
19use thiserror::Error;
20use tracing::instrument;
21use tree_sitter::{Node, Parser, Query, QueryCursor, StreamingIterator};
22
23#[derive(Debug, Error)]
24#[non_exhaustive]
25pub enum ParserError {
26    #[error("Unsupported language: {0}")]
27    UnsupportedLanguage(String),
28    #[error("Failed to parse file: {0}")]
29    ParseError(String),
30    #[error("Invalid UTF-8 in file")]
31    InvalidUtf8,
32    #[error("Query error: {0}")]
33    QueryError(String),
34}
35
36/// Compiled tree-sitter queries for a language.
37/// Stores all query types: mandatory (element, call) and optional (import, impl, reference).
38struct CompiledQueries {
39    element: Query,
40    call: Query,
41    import: Option<Query>,
42    impl_block: Option<Query>,
43    reference: Option<Query>,
44    impl_trait: Option<Query>,
45}
46
47/// Build compiled queries for a given language.
48fn build_compiled_queries(
49    lang_info: &crate::languages::LanguageInfo,
50) -> Result<CompiledQueries, ParserError> {
51    let element = Query::new(&lang_info.language, lang_info.element_query).map_err(|e| {
52        ParserError::QueryError(format!(
53            "Failed to compile element query for {}: {}",
54            lang_info.name, e
55        ))
56    })?;
57
58    let call = Query::new(&lang_info.language, lang_info.call_query).map_err(|e| {
59        ParserError::QueryError(format!(
60            "Failed to compile call query for {}: {}",
61            lang_info.name, e
62        ))
63    })?;
64
65    let import = if let Some(import_query_str) = lang_info.import_query {
66        Some(
67            Query::new(&lang_info.language, import_query_str).map_err(|e| {
68                ParserError::QueryError(format!(
69                    "Failed to compile import query for {}: {}",
70                    lang_info.name, e
71                ))
72            })?,
73        )
74    } else {
75        None
76    };
77
78    let impl_block = if let Some(impl_query_str) = lang_info.impl_query {
79        Some(
80            Query::new(&lang_info.language, impl_query_str).map_err(|e| {
81                ParserError::QueryError(format!(
82                    "Failed to compile impl query for {}: {}",
83                    lang_info.name, e
84                ))
85            })?,
86        )
87    } else {
88        None
89    };
90
91    let reference = if let Some(ref_query_str) = lang_info.reference_query {
92        Some(Query::new(&lang_info.language, ref_query_str).map_err(|e| {
93            ParserError::QueryError(format!(
94                "Failed to compile reference query for {}: {}",
95                lang_info.name, e
96            ))
97        })?)
98    } else {
99        None
100    };
101
102    let impl_trait = if let Some(impl_trait_query_str) = lang_info.impl_trait_query {
103        Some(
104            Query::new(&lang_info.language, impl_trait_query_str).map_err(|e| {
105                ParserError::QueryError(format!(
106                    "Failed to compile impl_trait query for {}: {}",
107                    lang_info.name, e
108                ))
109            })?,
110        )
111    } else {
112        None
113    };
114
115    Ok(CompiledQueries {
116        element,
117        call,
118        import,
119        impl_block,
120        reference,
121        impl_trait,
122    })
123}
124
125/// Initialize the query cache with compiled queries for all supported languages.
126fn init_query_cache() -> HashMap<&'static str, CompiledQueries> {
127    let supported_languages = [
128        "rust",
129        "python",
130        "typescript",
131        "tsx",
132        "go",
133        "java",
134        "fortran",
135    ];
136    let mut cache = HashMap::new();
137
138    for lang_name in &supported_languages {
139        if let Some(lang_info) = get_language_info(lang_name) {
140            match build_compiled_queries(&lang_info) {
141                Ok(compiled) => {
142                    cache.insert(*lang_name, compiled);
143                }
144                Err(e) => {
145                    tracing::error!(
146                        "Failed to compile queries for language {}: {}",
147                        lang_name,
148                        e
149                    );
150                }
151            }
152        }
153    }
154
155    cache
156}
157
158/// Lazily initialized cache of compiled queries per language.
159static QUERY_CACHE: LazyLock<HashMap<&'static str, CompiledQueries>> =
160    LazyLock::new(init_query_cache);
161
162/// Get compiled queries for a language from the cache.
163fn get_compiled_queries(language: &str) -> Result<&'static CompiledQueries, ParserError> {
164    QUERY_CACHE
165        .get(language)
166        .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))
167}
168
169thread_local! {
170    static PARSER: RefCell<Parser> = RefCell::new(Parser::new());
171}
172
173/// Canonical API for extracting element counts from source code.
174pub struct ElementExtractor;
175
176impl ElementExtractor {
177    /// Extract function and class counts from source code.
178    ///
179    /// # Errors
180    ///
181    /// Returns `ParserError::UnsupportedLanguage` if the language is not recognized.
182    /// Returns `ParserError::ParseError` if the source code cannot be parsed.
183    /// Returns `ParserError::QueryError` if the tree-sitter query fails.
184    #[instrument(skip_all, fields(language))]
185    pub fn extract_with_depth(source: &str, language: &str) -> Result<(usize, usize), ParserError> {
186        let lang_info = get_language_info(language)
187            .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))?;
188
189        let tree = PARSER.with(|p| {
190            let mut parser = p.borrow_mut();
191            parser
192                .set_language(&lang_info.language)
193                .map_err(|e| ParserError::ParseError(format!("Failed to set language: {e}")))?;
194            parser
195                .parse(source, None)
196                .ok_or_else(|| ParserError::ParseError("Failed to parse".to_string()))
197        })?;
198
199        let compiled = get_compiled_queries(language)?;
200
201        let mut cursor = QueryCursor::new();
202        let mut function_count = 0;
203        let mut class_count = 0;
204
205        let mut matches = cursor.matches(&compiled.element, tree.root_node(), source.as_bytes());
206        while let Some(mat) = matches.next() {
207            for capture in mat.captures {
208                let capture_name = compiled.element.capture_names()[capture.index as usize];
209                match capture_name {
210                    "function" => function_count += 1,
211                    "class" => class_count += 1,
212                    _ => {}
213                }
214            }
215        }
216
217        tracing::debug!(language = %language, functions = function_count, classes = class_count, "parse complete");
218
219        Ok((function_count, class_count))
220    }
221}
222
223/// Recursively extract `ImportInfo` entries from a use-clause node, respecting all Rust
224/// use-declaration forms (`scoped_identifier`, `scoped_use_list`, `use_list`,
225/// `use_as_clause`, `use_wildcard`, bare `identifier`).
226#[allow(clippy::too_many_lines)] // exhaustive match over all supported Rust use-clause forms; splitting harms readability
227fn extract_imports_from_node(
228    node: &Node,
229    source: &str,
230    prefix: &str,
231    line: usize,
232    imports: &mut Vec<ImportInfo>,
233) {
234    match node.kind() {
235        // Simple identifier: `use foo;` or an item inside `{foo, bar}`
236        "identifier" | "self" | "super" | "crate" => {
237            let name = source[node.start_byte()..node.end_byte()].to_string();
238            imports.push(ImportInfo {
239                module: prefix.to_string(),
240                items: vec![name],
241                line,
242            });
243        }
244        // Qualified path: `std::collections::HashMap`
245        "scoped_identifier" => {
246            let item = node
247                .child_by_field_name("name")
248                .map(|n| source[n.start_byte()..n.end_byte()].to_string())
249                .unwrap_or_default();
250            let module = node.child_by_field_name("path").map_or_else(
251                || prefix.to_string(),
252                |p| {
253                    let path_text = source[p.start_byte()..p.end_byte()].to_string();
254                    if prefix.is_empty() {
255                        path_text
256                    } else {
257                        format!("{prefix}::{path_text}")
258                    }
259                },
260            );
261            if !item.is_empty() {
262                imports.push(ImportInfo {
263                    module,
264                    items: vec![item],
265                    line,
266                });
267            }
268        }
269        // `std::{io, fs}` — path prefix followed by a brace list
270        "scoped_use_list" => {
271            let new_prefix = node.child_by_field_name("path").map_or_else(
272                || prefix.to_string(),
273                |p| {
274                    let path_text = source[p.start_byte()..p.end_byte()].to_string();
275                    if prefix.is_empty() {
276                        path_text
277                    } else {
278                        format!("{prefix}::{path_text}")
279                    }
280                },
281            );
282            if let Some(list) = node.child_by_field_name("list") {
283                extract_imports_from_node(&list, source, &new_prefix, line, imports);
284            }
285        }
286        // `{HashMap, HashSet}` — brace-enclosed list of items
287        "use_list" => {
288            let mut cursor = node.walk();
289            for child in node.children(&mut cursor) {
290                match child.kind() {
291                    "{" | "}" | "," => {}
292                    _ => extract_imports_from_node(&child, source, prefix, line, imports),
293                }
294            }
295        }
296        // `std::io::*` — glob import
297        "use_wildcard" => {
298            let text = source[node.start_byte()..node.end_byte()].to_string();
299            let module = if let Some(stripped) = text.strip_suffix("::*") {
300                if prefix.is_empty() {
301                    stripped.to_string()
302                } else {
303                    format!("{prefix}::{stripped}")
304                }
305            } else {
306                prefix.to_string()
307            };
308            imports.push(ImportInfo {
309                module,
310                items: vec!["*".to_string()],
311                line,
312            });
313        }
314        // `io as stdio` or `std::io as stdio`
315        "use_as_clause" => {
316            let alias = node
317                .child_by_field_name("alias")
318                .map(|n| source[n.start_byte()..n.end_byte()].to_string())
319                .unwrap_or_default();
320            let module = if let Some(path_node) = node.child_by_field_name("path") {
321                match path_node.kind() {
322                    "scoped_identifier" => path_node.child_by_field_name("path").map_or_else(
323                        || prefix.to_string(),
324                        |p| {
325                            let p_text = source[p.start_byte()..p.end_byte()].to_string();
326                            if prefix.is_empty() {
327                                p_text
328                            } else {
329                                format!("{prefix}::{p_text}")
330                            }
331                        },
332                    ),
333                    _ => prefix.to_string(),
334                }
335            } else {
336                prefix.to_string()
337            };
338            if !alias.is_empty() {
339                imports.push(ImportInfo {
340                    module,
341                    items: vec![alias],
342                    line,
343                });
344            }
345        }
346        // Python import_from_statement: `from module import name` or `from . import *`
347        "import_from_statement" => {
348            extract_python_import_from(node, source, line, imports);
349        }
350        // Fallback for non-Rust import nodes: capture full text as module
351        _ => {
352            let text = source[node.start_byte()..node.end_byte()]
353                .trim()
354                .to_string();
355            if !text.is_empty() {
356                imports.push(ImportInfo {
357                    module: text,
358                    items: vec![],
359                    line,
360                });
361            }
362        }
363    }
364}
365
366/// Extract an item name from a `dotted_name` or `aliased_import` child node.
367fn extract_import_item_name(child: &Node, source: &str) -> Option<String> {
368    match child.kind() {
369        "dotted_name" => {
370            let name = source[child.start_byte()..child.end_byte()]
371                .trim()
372                .to_string();
373            if name.is_empty() { None } else { Some(name) }
374        }
375        "aliased_import" => child.child_by_field_name("name").and_then(|n| {
376            let name = source[n.start_byte()..n.end_byte()].trim().to_string();
377            if name.is_empty() { None } else { Some(name) }
378        }),
379        _ => None,
380    }
381}
382
383/// Collect wildcard/named imports from an `import_list` node or from direct named children.
384fn collect_import_items(
385    node: &Node,
386    source: &str,
387    is_wildcard: &mut bool,
388    items: &mut Vec<String>,
389) {
390    // Prefer import_list child (wraps `from x import a, b`)
391    if let Some(import_list) = node.child_by_field_name("import_list") {
392        let mut cursor = import_list.walk();
393        for child in import_list.named_children(&mut cursor) {
394            if child.kind() == "wildcard_import" {
395                *is_wildcard = true;
396            } else if let Some(name) = extract_import_item_name(&child, source) {
397                items.push(name);
398            }
399        }
400        return;
401    }
402    // No import_list: single-name or wildcard as direct child (skip first named child = module_name)
403    let mut cursor = node.walk();
404    let mut first = true;
405    for child in node.named_children(&mut cursor) {
406        if first {
407            first = false;
408            continue;
409        }
410        if child.kind() == "wildcard_import" {
411            *is_wildcard = true;
412        } else if let Some(name) = extract_import_item_name(&child, source) {
413            items.push(name);
414        }
415    }
416}
417
418/// Handle Python `import_from_statement` node.
419fn extract_python_import_from(
420    node: &Node,
421    source: &str,
422    line: usize,
423    imports: &mut Vec<ImportInfo>,
424) {
425    let module = if let Some(m) = node.child_by_field_name("module_name") {
426        source[m.start_byte()..m.end_byte()].trim().to_string()
427    } else if let Some(r) = node.child_by_field_name("relative_import") {
428        source[r.start_byte()..r.end_byte()].trim().to_string()
429    } else {
430        String::new()
431    };
432
433    let mut is_wildcard = false;
434    let mut items = Vec::new();
435    collect_import_items(node, source, &mut is_wildcard, &mut items);
436
437    if !module.is_empty() {
438        imports.push(ImportInfo {
439            module,
440            items: if is_wildcard {
441                vec!["*".to_string()]
442            } else {
443                items
444            },
445            line,
446        });
447    }
448}
449
450pub struct SemanticExtractor;
451
452impl SemanticExtractor {
453    /// Extract semantic information from source code.
454    ///
455    /// # Errors
456    ///
457    /// Returns `ParserError::UnsupportedLanguage` if the language is not recognized.
458    /// Returns `ParserError::ParseError` if the source code cannot be parsed.
459    /// Returns `ParserError::QueryError` if the tree-sitter query fails.
460    #[instrument(skip_all, fields(language))]
461    pub fn extract(
462        source: &str,
463        language: &str,
464        ast_recursion_limit: Option<usize>,
465    ) -> Result<SemanticAnalysis, ParserError> {
466        let lang_info = get_language_info(language)
467            .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))?;
468
469        let tree = PARSER.with(|p| {
470            let mut parser = p.borrow_mut();
471            parser
472                .set_language(&lang_info.language)
473                .map_err(|e| ParserError::ParseError(format!("Failed to set language: {e}")))?;
474            parser
475                .parse(source, None)
476                .ok_or_else(|| ParserError::ParseError("Failed to parse".to_string()))
477        })?;
478
479        // 0 is not a useful depth (visits root node only, returning zero results).
480        // Treat 0 as None (unlimited). See #339.
481        let max_depth: Option<u32> = ast_recursion_limit
482            .filter(|&limit| limit > 0)
483            .map(|limit| {
484                u32::try_from(limit).map_err(|_| {
485                    ParserError::ParseError(format!(
486                        "ast_recursion_limit {} exceeds maximum supported value {}",
487                        limit,
488                        u32::MAX
489                    ))
490                })
491            })
492            .transpose()?;
493
494        let compiled = get_compiled_queries(language)?;
495        let root = tree.root_node();
496
497        let mut functions = Vec::new();
498        let mut classes = Vec::new();
499        let mut imports = Vec::new();
500        let mut references = Vec::new();
501        let mut call_frequency = HashMap::new();
502        let mut calls = Vec::new();
503
504        Self::extract_elements(
505            source,
506            compiled,
507            root,
508            max_depth,
509            &lang_info,
510            &mut functions,
511            &mut classes,
512        );
513        Self::extract_calls(
514            source,
515            compiled,
516            root,
517            max_depth,
518            &mut calls,
519            &mut call_frequency,
520        );
521        Self::extract_imports(source, compiled, root, max_depth, &mut imports);
522        Self::extract_impl_methods(source, compiled, root, max_depth, &mut classes);
523        Self::extract_references(source, compiled, root, max_depth, &mut references);
524
525        // Extract impl-trait blocks for Rust files (empty for other languages)
526        let impl_traits = if language == "rust" {
527            Self::extract_impl_traits_from_tree(source, compiled, root)
528        } else {
529            vec![]
530        };
531
532        tracing::debug!(language = %language, functions = functions.len(), classes = classes.len(), imports = imports.len(), references = references.len(), calls = calls.len(), impl_traits = impl_traits.len(), "extraction complete");
533
534        Ok(SemanticAnalysis {
535            functions,
536            classes,
537            imports,
538            references,
539            call_frequency,
540            calls,
541            impl_traits,
542        })
543    }
544
545    fn extract_elements(
546        source: &str,
547        compiled: &CompiledQueries,
548        root: Node<'_>,
549        max_depth: Option<u32>,
550        lang_info: &crate::languages::LanguageInfo,
551        functions: &mut Vec<FunctionInfo>,
552        classes: &mut Vec<ClassInfo>,
553    ) {
554        let mut cursor = QueryCursor::new();
555        if let Some(depth) = max_depth {
556            cursor.set_max_start_depth(Some(depth));
557        }
558        let mut matches = cursor.matches(&compiled.element, root, source.as_bytes());
559        let mut seen_functions = std::collections::HashSet::new();
560
561        while let Some(mat) = matches.next() {
562            for capture in mat.captures {
563                let capture_name = compiled.element.capture_names()[capture.index as usize];
564                let node = capture.node;
565                match capture_name {
566                    "function" => {
567                        if let Some(name_node) = node.child_by_field_name("name") {
568                            let name =
569                                source[name_node.start_byte()..name_node.end_byte()].to_string();
570                            let func_key = (name.clone(), node.start_position().row);
571                            if !seen_functions.contains(&func_key) {
572                                seen_functions.insert(func_key);
573                                let params = node
574                                    .child_by_field_name("parameters")
575                                    .map(|p| source[p.start_byte()..p.end_byte()].to_string())
576                                    .unwrap_or_default();
577                                let return_type = node
578                                    .child_by_field_name("return_type")
579                                    .map(|r| source[r.start_byte()..r.end_byte()].to_string());
580                                functions.push(FunctionInfo {
581                                    name,
582                                    line: node.start_position().row + 1,
583                                    end_line: node.end_position().row + 1,
584                                    parameters: if params.is_empty() {
585                                        Vec::new()
586                                    } else {
587                                        vec![params]
588                                    },
589                                    return_type,
590                                });
591                            }
592                        }
593                    }
594                    "class" => {
595                        if let Some(name_node) = node.child_by_field_name("name") {
596                            let name =
597                                source[name_node.start_byte()..name_node.end_byte()].to_string();
598                            let inherits = if let Some(handler) = lang_info.extract_inheritance {
599                                handler(&node, source)
600                            } else {
601                                Vec::new()
602                            };
603                            classes.push(ClassInfo {
604                                name,
605                                line: node.start_position().row + 1,
606                                end_line: node.end_position().row + 1,
607                                methods: Vec::new(),
608                                fields: Vec::new(),
609                                inherits,
610                            });
611                        }
612                    }
613                    _ => {}
614                }
615            }
616        }
617    }
618
619    /// Returns the name of the enclosing function/method/subroutine for a given AST node,
620    /// by walking ancestors and matching all language-specific function container kinds.
621    fn enclosing_function_name(mut node: tree_sitter::Node<'_>, source: &str) -> Option<String> {
622        let mut depth = 0u32;
623        while let Some(parent) = node.parent() {
624            depth += 1;
625            // Cap at 64 hops: real function nesting rarely exceeds ~10 levels; 64 is a generous
626            // upper bound that guards against pathological/malformed ASTs without false negatives
627            // on legitimate code. Returns None (treated as <module>) when the cap is hit.
628            if depth > 64 {
629                return None;
630            }
631            let name_node = match parent.kind() {
632                // Direct name field: Rust, Python, Go, Java, TypeScript/TSX
633                "function_item"
634                | "method_item"
635                | "function_definition"
636                | "function_declaration"
637                | "method_declaration"
638                | "method_definition" => parent.child_by_field_name("name"),
639                // Fortran subroutine: name is inside subroutine_statement child
640                "subroutine" => {
641                    let mut cursor = parent.walk();
642                    parent
643                        .children(&mut cursor)
644                        .find(|c| c.kind() == "subroutine_statement")
645                        .and_then(|s| s.child_by_field_name("name"))
646                }
647                // Fortran function: name is inside function_statement child
648                "function" => {
649                    let mut cursor = parent.walk();
650                    parent
651                        .children(&mut cursor)
652                        .find(|c| c.kind() == "function_statement")
653                        .and_then(|s| s.child_by_field_name("name"))
654                }
655                _ => {
656                    node = parent;
657                    continue;
658                }
659            };
660            return name_node.map(|n| source[n.start_byte()..n.end_byte()].to_string());
661        }
662        // The loop exits here only when no parent was found (i.e., we reached the tree root
663        // without finding a function container). If the depth cap fired, we returned None early
664        // above. Nothing to assert here.
665        None
666    }
667
668    fn extract_calls(
669        source: &str,
670        compiled: &CompiledQueries,
671        root: Node<'_>,
672        max_depth: Option<u32>,
673        calls: &mut Vec<CallInfo>,
674        call_frequency: &mut HashMap<String, usize>,
675    ) {
676        let mut cursor = QueryCursor::new();
677        if let Some(depth) = max_depth {
678            cursor.set_max_start_depth(Some(depth));
679        }
680        let mut matches = cursor.matches(&compiled.call, root, source.as_bytes());
681
682        while let Some(mat) = matches.next() {
683            for capture in mat.captures {
684                let capture_name = compiled.call.capture_names()[capture.index as usize];
685                if capture_name != "call" {
686                    continue;
687                }
688                let node = capture.node;
689                let call_name = source[node.start_byte()..node.end_byte()].to_string();
690                *call_frequency.entry(call_name.clone()).or_insert(0) += 1;
691
692                let caller = Self::enclosing_function_name(node, source)
693                    .unwrap_or_else(|| "<module>".to_string());
694
695                let mut arg_count = None;
696                let mut arg_node = node;
697                let mut hop = 0u32;
698                let mut cap_hit = false;
699                while let Some(parent) = arg_node.parent() {
700                    hop += 1;
701                    // Bounded parent traversal: cap at 16 hops to guard against pathological
702                    // walks on malformed/degenerate trees. Real call-expression nesting is
703                    // shallow (typically 1-3 levels). When the cap is hit we stop searching and
704                    // leave arg_count as None; the caller is still recorded, just without
705                    // argument-count information.
706                    if hop > 16 {
707                        cap_hit = true;
708                        break;
709                    }
710                    if parent.kind() == "call_expression" {
711                        if let Some(args) = parent.child_by_field_name("arguments") {
712                            arg_count = Some(args.named_child_count());
713                        }
714                        break;
715                    }
716                    arg_node = parent;
717                }
718                debug_assert!(
719                    !cap_hit,
720                    "extract_calls: parent traversal cap reached (hop > 16)"
721                );
722
723                calls.push(CallInfo {
724                    caller,
725                    callee: call_name,
726                    line: node.start_position().row + 1,
727                    column: node.start_position().column,
728                    arg_count,
729                });
730            }
731        }
732    }
733
734    fn extract_imports(
735        source: &str,
736        compiled: &CompiledQueries,
737        root: Node<'_>,
738        max_depth: Option<u32>,
739        imports: &mut Vec<ImportInfo>,
740    ) {
741        let Some(ref import_query) = compiled.import else {
742            return;
743        };
744        let mut cursor = QueryCursor::new();
745        if let Some(depth) = max_depth {
746            cursor.set_max_start_depth(Some(depth));
747        }
748        let mut matches = cursor.matches(import_query, root, source.as_bytes());
749
750        while let Some(mat) = matches.next() {
751            for capture in mat.captures {
752                let capture_name = import_query.capture_names()[capture.index as usize];
753                if capture_name == "import_path" {
754                    let node = capture.node;
755                    let line = node.start_position().row + 1;
756                    extract_imports_from_node(&node, source, "", line, imports);
757                }
758            }
759        }
760    }
761
762    fn extract_impl_methods(
763        source: &str,
764        compiled: &CompiledQueries,
765        root: Node<'_>,
766        max_depth: Option<u32>,
767        classes: &mut [ClassInfo],
768    ) {
769        let Some(ref impl_query) = compiled.impl_block else {
770            return;
771        };
772        let mut cursor = QueryCursor::new();
773        if let Some(depth) = max_depth {
774            cursor.set_max_start_depth(Some(depth));
775        }
776        let mut matches = cursor.matches(impl_query, root, source.as_bytes());
777
778        while let Some(mat) = matches.next() {
779            let mut impl_type_name = String::new();
780            let mut method_name = String::new();
781            let mut method_line = 0usize;
782            let mut method_end_line = 0usize;
783            let mut method_params = String::new();
784            let mut method_return_type: Option<String> = None;
785
786            for capture in mat.captures {
787                let capture_name = impl_query.capture_names()[capture.index as usize];
788                let node = capture.node;
789                match capture_name {
790                    "impl_type" => {
791                        impl_type_name = source[node.start_byte()..node.end_byte()].to_string();
792                    }
793                    "method_name" => {
794                        method_name = source[node.start_byte()..node.end_byte()].to_string();
795                    }
796                    "method_params" => {
797                        method_params = source[node.start_byte()..node.end_byte()].to_string();
798                    }
799                    "method" => {
800                        method_line = node.start_position().row + 1;
801                        method_end_line = node.end_position().row + 1;
802                        method_return_type = node
803                            .child_by_field_name("return_type")
804                            .map(|r| source[r.start_byte()..r.end_byte()].to_string());
805                    }
806                    _ => {}
807                }
808            }
809
810            if !impl_type_name.is_empty() && !method_name.is_empty() {
811                let func = FunctionInfo {
812                    name: method_name,
813                    line: method_line,
814                    end_line: method_end_line,
815                    parameters: if method_params.is_empty() {
816                        Vec::new()
817                    } else {
818                        vec![method_params]
819                    },
820                    return_type: method_return_type,
821                };
822                if let Some(class) = classes.iter_mut().find(|c| c.name == impl_type_name) {
823                    class.methods.push(func);
824                }
825            }
826        }
827    }
828
829    fn extract_references(
830        source: &str,
831        compiled: &CompiledQueries,
832        root: Node<'_>,
833        max_depth: Option<u32>,
834        references: &mut Vec<ReferenceInfo>,
835    ) {
836        let Some(ref ref_query) = compiled.reference else {
837            return;
838        };
839        let mut cursor = QueryCursor::new();
840        if let Some(depth) = max_depth {
841            cursor.set_max_start_depth(Some(depth));
842        }
843        let mut seen_refs = std::collections::HashSet::new();
844        let mut matches = cursor.matches(ref_query, root, source.as_bytes());
845
846        while let Some(mat) = matches.next() {
847            for capture in mat.captures {
848                let capture_name = ref_query.capture_names()[capture.index as usize];
849                if capture_name == "type_ref" {
850                    let node = capture.node;
851                    let type_ref = source[node.start_byte()..node.end_byte()].to_string();
852                    if seen_refs.insert(type_ref.clone()) {
853                        references.push(ReferenceInfo {
854                            symbol: type_ref,
855                            reference_type: ReferenceType::Usage,
856                            // location is intentionally empty here; set by the caller (analyze_file)
857                            location: String::new(),
858                            line: node.start_position().row + 1,
859                        });
860                    }
861                }
862            }
863        }
864    }
865
866    /// Extract impl-trait blocks from an already-parsed tree.
867    ///
868    /// Called during `extract()` for Rust files to avoid a second parse.
869    /// Returns an empty vec if the query is not available.
870    fn extract_impl_traits_from_tree(
871        source: &str,
872        compiled: &CompiledQueries,
873        root: Node<'_>,
874    ) -> Vec<ImplTraitInfo> {
875        let Some(query) = &compiled.impl_trait else {
876            return vec![];
877        };
878
879        let mut cursor = QueryCursor::new();
880        let mut matches = cursor.matches(query, root, source.as_bytes());
881        let mut results = Vec::new();
882
883        while let Some(mat) = matches.next() {
884            let mut trait_name = String::new();
885            let mut impl_type = String::new();
886            let mut line = 0usize;
887
888            for capture in mat.captures {
889                let capture_name = query.capture_names()[capture.index as usize];
890                let node = capture.node;
891                let text = source[node.start_byte()..node.end_byte()].to_string();
892                match capture_name {
893                    "trait_name" => {
894                        trait_name = text;
895                        line = node.start_position().row + 1;
896                    }
897                    "impl_type" => {
898                        impl_type = text;
899                    }
900                    _ => {}
901                }
902            }
903
904            if !trait_name.is_empty() && !impl_type.is_empty() {
905                results.push(ImplTraitInfo {
906                    trait_name,
907                    impl_type,
908                    path: PathBuf::new(), // Path will be set by caller
909                    line,
910                });
911            }
912        }
913
914        results
915    }
916}
917
918/// Extract `impl Trait for Type` blocks from Rust source.
919///
920/// Runs independently of `extract_references` to avoid shared deduplication state.
921/// Returns an empty vec for non-Rust source (no error; caller decides).
922#[must_use]
923pub fn extract_impl_traits(source: &str, path: &Path) -> Vec<ImplTraitInfo> {
924    let Some(lang_info) = get_language_info("rust") else {
925        return vec![];
926    };
927
928    let Ok(compiled) = get_compiled_queries("rust") else {
929        return vec![];
930    };
931
932    let Some(query) = &compiled.impl_trait else {
933        return vec![];
934    };
935
936    let Some(tree) = PARSER.with(|p| {
937        let mut parser = p.borrow_mut();
938        let _ = parser.set_language(&lang_info.language);
939        parser.parse(source, None)
940    }) else {
941        return vec![];
942    };
943
944    let root = tree.root_node();
945    let mut cursor = QueryCursor::new();
946    let mut matches = cursor.matches(query, root, source.as_bytes());
947    let mut results = Vec::new();
948
949    while let Some(mat) = matches.next() {
950        let mut trait_name = String::new();
951        let mut impl_type = String::new();
952        let mut line = 0usize;
953
954        for capture in mat.captures {
955            let capture_name = query.capture_names()[capture.index as usize];
956            let node = capture.node;
957            let text = source[node.start_byte()..node.end_byte()].to_string();
958            match capture_name {
959                "trait_name" => {
960                    trait_name = text;
961                    line = node.start_position().row + 1;
962                }
963                "impl_type" => {
964                    impl_type = text;
965                }
966                _ => {}
967            }
968        }
969
970        if !trait_name.is_empty() && !impl_type.is_empty() {
971            results.push(ImplTraitInfo {
972                trait_name,
973                impl_type,
974                path: path.to_path_buf(),
975                line,
976            });
977        }
978    }
979
980    results
981}
982
983/// Execute a custom tree-sitter query against source code.
984///
985/// This is the internal implementation of the public `execute_query` function.
986pub fn execute_query_impl(
987    language: &str,
988    source: &str,
989    query_str: &str,
990) -> Result<Vec<crate::QueryCapture>, ParserError> {
991    // Get the tree-sitter language from the language name
992    let ts_language = crate::languages::get_ts_language(language)
993        .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))?;
994
995    let mut parser = Parser::new();
996    parser
997        .set_language(&ts_language)
998        .map_err(|e| ParserError::QueryError(e.to_string()))?;
999
1000    let tree = parser
1001        .parse(source.as_bytes(), None)
1002        .ok_or_else(|| ParserError::QueryError("failed to parse source".to_string()))?;
1003
1004    let query =
1005        Query::new(&ts_language, query_str).map_err(|e| ParserError::QueryError(e.to_string()))?;
1006
1007    let mut cursor = QueryCursor::new();
1008    let source_bytes = source.as_bytes();
1009
1010    let mut captures = Vec::new();
1011    let mut matches = cursor.matches(&query, tree.root_node(), source_bytes);
1012    while let Some(m) = matches.next() {
1013        for cap in m.captures {
1014            let node = cap.node;
1015            let capture_name = query.capture_names()[cap.index as usize].to_string();
1016            let text = node.utf8_text(source_bytes).unwrap_or("").to_string();
1017            captures.push(crate::QueryCapture {
1018                capture_name,
1019                text,
1020                start_line: node.start_position().row,
1021                end_line: node.end_position().row,
1022                start_byte: node.start_byte(),
1023                end_byte: node.end_byte(),
1024            });
1025        }
1026    }
1027    Ok(captures)
1028}
1029
1030#[cfg(all(test, feature = "lang-rust"))]
1031mod tests {
1032    use super::*;
1033
1034    #[test]
1035    fn test_ast_recursion_limit_zero_is_unlimited() {
1036        let source = r#"fn hello() -> u32 { 42 }"#;
1037        let result_none = SemanticExtractor::extract(source, "rust", None);
1038        let result_zero = SemanticExtractor::extract(source, "rust", Some(0));
1039        assert!(result_none.is_ok(), "extract with None failed");
1040        assert!(result_zero.is_ok(), "extract with Some(0) failed");
1041        let analysis_none = result_none.unwrap();
1042        let analysis_zero = result_zero.unwrap();
1043        assert!(
1044            analysis_none.functions.len() >= 1,
1045            "extract with None should find at least one function in the test source"
1046        );
1047        assert_eq!(
1048            analysis_none.functions.len(),
1049            analysis_zero.functions.len(),
1050            "ast_recursion_limit=0 should behave identically to unset (unlimited)"
1051        );
1052    }
1053}