Skip to main content

code_analyze_mcp/
parser.rs

1//! Tree-sitter-based parser for extracting semantic structure from source code.
2//!
3//! This module provides language-agnostic parsing using tree-sitter queries to extract
4//! functions, classes, imports, references, and other semantic elements from source files.
5//! Two main extractors handle different use cases:
6//!
7//! - [`ElementExtractor`]: Quick extraction of function and class counts.
8//! - [`SemanticExtractor`]: Detailed semantic analysis with calls, imports, and references.
9
10use crate::languages::get_language_info;
11use crate::types::{
12    AssignmentInfo, CallInfo, ClassInfo, FieldAccessInfo, FunctionInfo, ImportInfo, ReferenceInfo,
13    ReferenceType, SemanticAnalysis,
14};
15use std::cell::RefCell;
16use std::collections::HashMap;
17use std::sync::LazyLock;
18use thiserror::Error;
19use tracing::instrument;
20use tree_sitter::{Node, Parser, Query, QueryCursor, StreamingIterator};
21
22#[derive(Debug, Error)]
23pub enum ParserError {
24    #[error("Unsupported language: {0}")]
25    UnsupportedLanguage(String),
26    #[error("Failed to parse file: {0}")]
27    ParseError(String),
28    #[error("Invalid UTF-8 in file")]
29    InvalidUtf8,
30    #[error("Query error: {0}")]
31    QueryError(String),
32}
33
34/// Compiled tree-sitter queries for a language.
35/// Stores all query types: mandatory (element, call) and optional (import, impl, reference).
36struct CompiledQueries {
37    element: Query,
38    call: Query,
39    import: Option<Query>,
40    impl_block: Option<Query>,
41    reference: Option<Query>,
42    assignment: Option<Query>,
43    field: Option<Query>,
44}
45
46/// Build compiled queries for a given language.
47fn build_compiled_queries(
48    lang_info: &crate::languages::LanguageInfo,
49) -> Result<CompiledQueries, ParserError> {
50    let element = Query::new(&lang_info.language, lang_info.element_query).map_err(|e| {
51        ParserError::QueryError(format!(
52            "Failed to compile element query for {}: {}",
53            lang_info.name, e
54        ))
55    })?;
56
57    let call = Query::new(&lang_info.language, lang_info.call_query).map_err(|e| {
58        ParserError::QueryError(format!(
59            "Failed to compile call query for {}: {}",
60            lang_info.name, e
61        ))
62    })?;
63
64    let import = if let Some(import_query_str) = lang_info.import_query {
65        Some(
66            Query::new(&lang_info.language, import_query_str).map_err(|e| {
67                ParserError::QueryError(format!(
68                    "Failed to compile import query for {}: {}",
69                    lang_info.name, e
70                ))
71            })?,
72        )
73    } else {
74        None
75    };
76
77    let impl_block = if let Some(impl_query_str) = lang_info.impl_query {
78        Some(
79            Query::new(&lang_info.language, impl_query_str).map_err(|e| {
80                ParserError::QueryError(format!(
81                    "Failed to compile impl query for {}: {}",
82                    lang_info.name, e
83                ))
84            })?,
85        )
86    } else {
87        None
88    };
89
90    let reference = if let Some(ref_query_str) = lang_info.reference_query {
91        Some(Query::new(&lang_info.language, ref_query_str).map_err(|e| {
92            ParserError::QueryError(format!(
93                "Failed to compile reference query for {}: {}",
94                lang_info.name, e
95            ))
96        })?)
97    } else {
98        None
99    };
100
101    let assignment = if let Some(assignment_query_str) = lang_info.assignment_query {
102        Some(
103            Query::new(&lang_info.language, assignment_query_str).map_err(|e| {
104                ParserError::QueryError(format!(
105                    "Failed to compile assignment query for {}: {}",
106                    lang_info.name, e
107                ))
108            })?,
109        )
110    } else {
111        None
112    };
113
114    let field = if let Some(field_query_str) = lang_info.field_query {
115        Some(
116            Query::new(&lang_info.language, field_query_str).map_err(|e| {
117                ParserError::QueryError(format!(
118                    "Failed to compile field query for {}: {}",
119                    lang_info.name, e
120                ))
121            })?,
122        )
123    } else {
124        None
125    };
126
127    Ok(CompiledQueries {
128        element,
129        call,
130        import,
131        impl_block,
132        reference,
133        assignment,
134        field,
135    })
136}
137
138/// Initialize the query cache with compiled queries for all supported languages.
139fn init_query_cache() -> HashMap<&'static str, CompiledQueries> {
140    let supported_languages = ["rust", "python", "typescript", "tsx", "go", "java"];
141    let mut cache = HashMap::new();
142
143    for lang_name in &supported_languages {
144        if let Some(lang_info) = get_language_info(lang_name) {
145            match build_compiled_queries(&lang_info) {
146                Ok(compiled) => {
147                    cache.insert(*lang_name, compiled);
148                }
149                Err(e) => {
150                    tracing::error!(
151                        "Failed to compile queries for language {}: {}",
152                        lang_name,
153                        e
154                    );
155                }
156            }
157        }
158    }
159
160    cache
161}
162
163/// Lazily initialized cache of compiled queries per language.
164static QUERY_CACHE: LazyLock<HashMap<&'static str, CompiledQueries>> =
165    LazyLock::new(init_query_cache);
166
167/// Get compiled queries for a language from the cache.
168fn get_compiled_queries(language: &str) -> Result<&'static CompiledQueries, ParserError> {
169    QUERY_CACHE
170        .get(language)
171        .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))
172}
173
174thread_local! {
175    static PARSER: RefCell<Parser> = RefCell::new(Parser::new());
176}
177
178/// Canonical API for extracting element counts from source code.
179pub struct ElementExtractor;
180
181impl ElementExtractor {
182    /// Extract function and class counts from source code.
183    ///
184    /// # Errors
185    ///
186    /// Returns `ParserError::UnsupportedLanguage` if the language is not recognized.
187    /// Returns `ParserError::ParseError` if the source code cannot be parsed.
188    /// Returns `ParserError::QueryError` if the tree-sitter query fails.
189    #[instrument(skip_all, fields(language))]
190    pub fn extract_with_depth(source: &str, language: &str) -> Result<(usize, usize), ParserError> {
191        let lang_info = get_language_info(language)
192            .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))?;
193
194        let tree = PARSER.with(|p| {
195            let mut parser = p.borrow_mut();
196            parser
197                .set_language(&lang_info.language)
198                .map_err(|e| ParserError::ParseError(format!("Failed to set language: {}", e)))?;
199            parser
200                .parse(source, None)
201                .ok_or_else(|| ParserError::ParseError("Failed to parse".to_string()))
202        })?;
203
204        let compiled = get_compiled_queries(language)?;
205
206        let mut cursor = QueryCursor::new();
207        let mut function_count = 0;
208        let mut class_count = 0;
209
210        let mut matches = cursor.matches(&compiled.element, tree.root_node(), source.as_bytes());
211        while let Some(mat) = matches.next() {
212            for capture in mat.captures {
213                let capture_name = compiled.element.capture_names()[capture.index as usize];
214                match capture_name {
215                    "function" => function_count += 1,
216                    "class" => class_count += 1,
217                    _ => {}
218                }
219            }
220        }
221
222        tracing::debug!(language = %language, functions = function_count, classes = class_count, "parse complete");
223
224        Ok((function_count, class_count))
225    }
226}
227
228/// Recursively extract `ImportInfo` entries from a use-clause node, respecting all Rust
229/// use-declaration forms (`scoped_identifier`, `scoped_use_list`, `use_list`,
230/// `use_as_clause`, `use_wildcard`, bare `identifier`).
231fn extract_imports_from_node(
232    node: &Node,
233    source: &str,
234    prefix: &str,
235    line: usize,
236    imports: &mut Vec<ImportInfo>,
237) {
238    match node.kind() {
239        // Simple identifier: `use foo;` or an item inside `{foo, bar}`
240        "identifier" | "self" | "super" | "crate" => {
241            let name = source[node.start_byte()..node.end_byte()].to_string();
242            imports.push(ImportInfo {
243                module: prefix.to_string(),
244                items: vec![name],
245                line,
246            });
247        }
248        // Qualified path: `std::collections::HashMap`
249        "scoped_identifier" => {
250            let item = node
251                .child_by_field_name("name")
252                .map(|n| source[n.start_byte()..n.end_byte()].to_string())
253                .unwrap_or_default();
254            let module = node
255                .child_by_field_name("path")
256                .map(|p| {
257                    let path_text = source[p.start_byte()..p.end_byte()].to_string();
258                    if prefix.is_empty() {
259                        path_text
260                    } else {
261                        format!("{}::{}", prefix, path_text)
262                    }
263                })
264                .unwrap_or_else(|| prefix.to_string());
265            if !item.is_empty() {
266                imports.push(ImportInfo {
267                    module,
268                    items: vec![item],
269                    line,
270                });
271            }
272        }
273        // `std::{io, fs}` — path prefix followed by a brace list
274        "scoped_use_list" => {
275            let new_prefix = node
276                .child_by_field_name("path")
277                .map(|p| {
278                    let path_text = source[p.start_byte()..p.end_byte()].to_string();
279                    if prefix.is_empty() {
280                        path_text
281                    } else {
282                        format!("{}::{}", prefix, path_text)
283                    }
284                })
285                .unwrap_or_else(|| prefix.to_string());
286            if let Some(list) = node.child_by_field_name("list") {
287                extract_imports_from_node(&list, source, &new_prefix, line, imports);
288            }
289        }
290        // `{HashMap, HashSet}` — brace-enclosed list of items
291        "use_list" => {
292            let mut cursor = node.walk();
293            for child in node.children(&mut cursor) {
294                match child.kind() {
295                    "{" | "}" | "," => {}
296                    _ => extract_imports_from_node(&child, source, prefix, line, imports),
297                }
298            }
299        }
300        // `std::io::*` — glob import
301        "use_wildcard" => {
302            let text = source[node.start_byte()..node.end_byte()].to_string();
303            let module = if let Some(stripped) = text.strip_suffix("::*") {
304                if prefix.is_empty() {
305                    stripped.to_string()
306                } else {
307                    format!("{}::{}", prefix, stripped)
308                }
309            } else {
310                prefix.to_string()
311            };
312            imports.push(ImportInfo {
313                module,
314                items: vec!["*".to_string()],
315                line,
316            });
317        }
318        // `io as stdio` or `std::io as stdio`
319        "use_as_clause" => {
320            let alias = node
321                .child_by_field_name("alias")
322                .map(|n| source[n.start_byte()..n.end_byte()].to_string())
323                .unwrap_or_default();
324            let module = if let Some(path_node) = node.child_by_field_name("path") {
325                match path_node.kind() {
326                    "scoped_identifier" => path_node
327                        .child_by_field_name("path")
328                        .map(|p| {
329                            let p_text = source[p.start_byte()..p.end_byte()].to_string();
330                            if prefix.is_empty() {
331                                p_text
332                            } else {
333                                format!("{}::{}", prefix, p_text)
334                            }
335                        })
336                        .unwrap_or_else(|| prefix.to_string()),
337                    _ => prefix.to_string(),
338                }
339            } else {
340                prefix.to_string()
341            };
342            if !alias.is_empty() {
343                imports.push(ImportInfo {
344                    module,
345                    items: vec![alias],
346                    line,
347                });
348            }
349        }
350        // Fallback for non-Rust import nodes: capture full text as module
351        _ => {
352            let text = source[node.start_byte()..node.end_byte()]
353                .trim()
354                .to_string();
355            if !text.is_empty() {
356                imports.push(ImportInfo {
357                    module: text,
358                    items: vec![],
359                    line,
360                });
361            }
362        }
363    }
364}
365
366pub struct SemanticExtractor;
367
368impl SemanticExtractor {
369    /// Extract semantic information from source code.
370    ///
371    /// # Errors
372    ///
373    /// Returns `ParserError::UnsupportedLanguage` if the language is not recognized.
374    /// Returns `ParserError::ParseError` if the source code cannot be parsed.
375    /// Returns `ParserError::QueryError` if the tree-sitter query fails.
376    #[instrument(skip_all, fields(language))]
377    pub fn extract(
378        source: &str,
379        language: &str,
380        ast_recursion_limit: Option<usize>,
381    ) -> Result<SemanticAnalysis, ParserError> {
382        let lang_info = get_language_info(language)
383            .ok_or_else(|| ParserError::UnsupportedLanguage(language.to_string()))?;
384
385        let tree = PARSER.with(|p| {
386            let mut parser = p.borrow_mut();
387            parser
388                .set_language(&lang_info.language)
389                .map_err(|e| ParserError::ParseError(format!("Failed to set language: {}", e)))?;
390            parser
391                .parse(source, None)
392                .ok_or_else(|| ParserError::ParseError("Failed to parse".to_string()))
393        })?;
394
395        let max_depth: Option<u32> = ast_recursion_limit
396            .map(|limit| {
397                u32::try_from(limit).map_err(|_| {
398                    ParserError::ParseError(format!(
399                        "ast_recursion_limit {} exceeds maximum supported value {}",
400                        limit,
401                        u32::MAX
402                    ))
403                })
404            })
405            .transpose()?;
406
407        let compiled = get_compiled_queries(language)?;
408        let root = tree.root_node();
409
410        let mut functions = Vec::new();
411        let mut classes = Vec::new();
412        let mut imports = Vec::new();
413        let mut references = Vec::new();
414        let mut call_frequency = HashMap::new();
415        let mut calls = Vec::new();
416        let mut assignments: Vec<AssignmentInfo> = Vec::new();
417        let mut field_accesses: Vec<FieldAccessInfo> = Vec::new();
418
419        Self::extract_elements(
420            source,
421            compiled,
422            root,
423            max_depth,
424            &lang_info,
425            &mut functions,
426            &mut classes,
427        );
428        Self::extract_calls(
429            source,
430            compiled,
431            root,
432            max_depth,
433            &mut calls,
434            &mut call_frequency,
435        );
436        Self::extract_imports(source, compiled, root, max_depth, &mut imports);
437        Self::extract_impl_methods(source, compiled, root, max_depth, &mut classes);
438        Self::extract_references(source, compiled, root, max_depth, &mut references);
439        Self::extract_assignments(source, compiled, root, max_depth, &mut assignments);
440        Self::extract_field_accesses(source, compiled, root, max_depth, &mut field_accesses);
441
442        tracing::debug!(language = %language, functions = functions.len(), classes = classes.len(), imports = imports.len(), references = references.len(), calls = calls.len(), "extraction complete");
443
444        Ok(SemanticAnalysis {
445            functions,
446            classes,
447            imports,
448            references,
449            call_frequency,
450            calls,
451            assignments,
452            field_accesses,
453        })
454    }
455
456    fn extract_elements(
457        source: &str,
458        compiled: &CompiledQueries,
459        root: Node<'_>,
460        max_depth: Option<u32>,
461        lang_info: &crate::languages::LanguageInfo,
462        functions: &mut Vec<FunctionInfo>,
463        classes: &mut Vec<ClassInfo>,
464    ) {
465        let mut cursor = QueryCursor::new();
466        if let Some(depth) = max_depth {
467            cursor.set_max_start_depth(Some(depth));
468        }
469        let mut matches = cursor.matches(&compiled.element, root, source.as_bytes());
470        let mut seen_functions = std::collections::HashSet::new();
471
472        while let Some(mat) = matches.next() {
473            for capture in mat.captures {
474                let capture_name = compiled.element.capture_names()[capture.index as usize];
475                let node = capture.node;
476                match capture_name {
477                    "function" => {
478                        if let Some(name_node) = node.child_by_field_name("name") {
479                            let name =
480                                source[name_node.start_byte()..name_node.end_byte()].to_string();
481                            let func_key = (name.clone(), node.start_position().row);
482                            if !seen_functions.contains(&func_key) {
483                                seen_functions.insert(func_key);
484                                let params = node
485                                    .child_by_field_name("parameters")
486                                    .map(|p| source[p.start_byte()..p.end_byte()].to_string())
487                                    .unwrap_or_default();
488                                let return_type = node
489                                    .child_by_field_name("return_type")
490                                    .map(|r| source[r.start_byte()..r.end_byte()].to_string());
491                                functions.push(FunctionInfo {
492                                    name,
493                                    line: node.start_position().row + 1,
494                                    end_line: node.end_position().row + 1,
495                                    parameters: if params.is_empty() {
496                                        Vec::new()
497                                    } else {
498                                        vec![params]
499                                    },
500                                    return_type,
501                                });
502                            }
503                        }
504                    }
505                    "class" => {
506                        if let Some(name_node) = node.child_by_field_name("name") {
507                            let name =
508                                source[name_node.start_byte()..name_node.end_byte()].to_string();
509                            let inherits = if let Some(handler) = lang_info.extract_inheritance {
510                                handler(&node, source)
511                            } else {
512                                Vec::new()
513                            };
514                            classes.push(ClassInfo {
515                                name,
516                                line: node.start_position().row + 1,
517                                end_line: node.end_position().row + 1,
518                                methods: Vec::new(),
519                                fields: Vec::new(),
520                                inherits,
521                            });
522                        }
523                    }
524                    _ => {}
525                }
526            }
527        }
528    }
529
530    fn extract_calls(
531        source: &str,
532        compiled: &CompiledQueries,
533        root: Node<'_>,
534        max_depth: Option<u32>,
535        calls: &mut Vec<CallInfo>,
536        call_frequency: &mut HashMap<String, usize>,
537    ) {
538        let mut cursor = QueryCursor::new();
539        if let Some(depth) = max_depth {
540            cursor.set_max_start_depth(Some(depth));
541        }
542        let mut matches = cursor.matches(&compiled.call, root, source.as_bytes());
543
544        while let Some(mat) = matches.next() {
545            for capture in mat.captures {
546                let capture_name = compiled.call.capture_names()[capture.index as usize];
547                if capture_name != "call" {
548                    continue;
549                }
550                let node = capture.node;
551                let call_name = source[node.start_byte()..node.end_byte()].to_string();
552                *call_frequency.entry(call_name.clone()).or_insert(0) += 1;
553
554                let mut current = node;
555                let mut caller = "<module>".to_string();
556                while let Some(parent) = current.parent() {
557                    if parent.kind() == "function_item"
558                        && let Some(name_node) = parent.child_by_field_name("name")
559                    {
560                        caller = source[name_node.start_byte()..name_node.end_byte()].to_string();
561                        break;
562                    }
563                    current = parent;
564                }
565
566                let mut arg_count = None;
567                let mut arg_node = node;
568                while let Some(parent) = arg_node.parent() {
569                    if parent.kind() == "call_expression" {
570                        if let Some(args) = parent.child_by_field_name("arguments") {
571                            arg_count = Some(args.named_child_count());
572                        }
573                        break;
574                    }
575                    arg_node = parent;
576                }
577
578                calls.push(CallInfo {
579                    caller,
580                    callee: call_name,
581                    line: node.start_position().row + 1,
582                    column: node.start_position().column,
583                    arg_count,
584                });
585            }
586        }
587    }
588
589    fn extract_imports(
590        source: &str,
591        compiled: &CompiledQueries,
592        root: Node<'_>,
593        max_depth: Option<u32>,
594        imports: &mut Vec<ImportInfo>,
595    ) {
596        let Some(ref import_query) = compiled.import else {
597            return;
598        };
599        let mut cursor = QueryCursor::new();
600        if let Some(depth) = max_depth {
601            cursor.set_max_start_depth(Some(depth));
602        }
603        let mut matches = cursor.matches(import_query, root, source.as_bytes());
604
605        while let Some(mat) = matches.next() {
606            for capture in mat.captures {
607                let capture_name = import_query.capture_names()[capture.index as usize];
608                if capture_name == "import_path" {
609                    let node = capture.node;
610                    let line = node.start_position().row + 1;
611                    extract_imports_from_node(&node, source, "", line, imports);
612                }
613            }
614        }
615    }
616
617    fn extract_impl_methods(
618        source: &str,
619        compiled: &CompiledQueries,
620        root: Node<'_>,
621        max_depth: Option<u32>,
622        classes: &mut [ClassInfo],
623    ) {
624        let Some(ref impl_query) = compiled.impl_block else {
625            return;
626        };
627        let mut cursor = QueryCursor::new();
628        if let Some(depth) = max_depth {
629            cursor.set_max_start_depth(Some(depth));
630        }
631        let mut matches = cursor.matches(impl_query, root, source.as_bytes());
632
633        while let Some(mat) = matches.next() {
634            let mut impl_type_name = String::new();
635            let mut method_name = String::new();
636            let mut method_line = 0usize;
637            let mut method_end_line = 0usize;
638            let mut method_params = String::new();
639            let mut method_return_type: Option<String> = None;
640
641            for capture in mat.captures {
642                let capture_name = impl_query.capture_names()[capture.index as usize];
643                let node = capture.node;
644                match capture_name {
645                    "impl_type" => {
646                        impl_type_name = source[node.start_byte()..node.end_byte()].to_string();
647                    }
648                    "method_name" => {
649                        method_name = source[node.start_byte()..node.end_byte()].to_string();
650                    }
651                    "method_params" => {
652                        method_params = source[node.start_byte()..node.end_byte()].to_string();
653                    }
654                    "method" => {
655                        method_line = node.start_position().row + 1;
656                        method_end_line = node.end_position().row + 1;
657                        method_return_type = node
658                            .child_by_field_name("return_type")
659                            .map(|r| source[r.start_byte()..r.end_byte()].to_string());
660                    }
661                    _ => {}
662                }
663            }
664
665            if !impl_type_name.is_empty() && !method_name.is_empty() {
666                let func = FunctionInfo {
667                    name: method_name,
668                    line: method_line,
669                    end_line: method_end_line,
670                    parameters: if method_params.is_empty() {
671                        Vec::new()
672                    } else {
673                        vec![method_params]
674                    },
675                    return_type: method_return_type,
676                };
677                if let Some(class) = classes.iter_mut().find(|c| c.name == impl_type_name) {
678                    class.methods.push(func);
679                }
680            }
681        }
682    }
683
684    fn extract_references(
685        source: &str,
686        compiled: &CompiledQueries,
687        root: Node<'_>,
688        max_depth: Option<u32>,
689        references: &mut Vec<ReferenceInfo>,
690    ) {
691        let Some(ref ref_query) = compiled.reference else {
692            return;
693        };
694        let mut cursor = QueryCursor::new();
695        if let Some(depth) = max_depth {
696            cursor.set_max_start_depth(Some(depth));
697        }
698        let mut seen_refs = std::collections::HashSet::new();
699        let mut matches = cursor.matches(ref_query, root, source.as_bytes());
700
701        while let Some(mat) = matches.next() {
702            for capture in mat.captures {
703                let capture_name = ref_query.capture_names()[capture.index as usize];
704                if capture_name == "type_ref" {
705                    let node = capture.node;
706                    let type_ref = source[node.start_byte()..node.end_byte()].to_string();
707                    if seen_refs.insert(type_ref.clone()) {
708                        references.push(ReferenceInfo {
709                            symbol: type_ref,
710                            reference_type: ReferenceType::Usage,
711                            // location is intentionally empty here; set by the caller (analyze_file)
712                            location: String::new(),
713                            line: node.start_position().row + 1,
714                        });
715                    }
716                }
717            }
718        }
719    }
720
721    fn extract_assignments(
722        source: &str,
723        compiled: &CompiledQueries,
724        root: Node<'_>,
725        max_depth: Option<u32>,
726        assignments: &mut Vec<AssignmentInfo>,
727    ) {
728        let Some(ref assignment_query) = compiled.assignment else {
729            return;
730        };
731        let mut cursor = QueryCursor::new();
732        if let Some(depth) = max_depth {
733            cursor.set_max_start_depth(Some(depth));
734        }
735        let mut matches = cursor.matches(assignment_query, root, source.as_bytes());
736
737        while let Some(mat) = matches.next() {
738            let mut variable = String::new();
739            let mut value = String::new();
740            let mut line = 0usize;
741
742            for capture in mat.captures {
743                let capture_name = assignment_query.capture_names()[capture.index as usize];
744                let node = capture.node;
745                match capture_name {
746                    "variable" => {
747                        variable = source[node.start_byte()..node.end_byte()].to_string();
748                    }
749                    "value" => {
750                        value = source[node.start_byte()..node.end_byte()].to_string();
751                        line = node.start_position().row + 1;
752                    }
753                    _ => {}
754                }
755            }
756
757            if !variable.is_empty() && !value.is_empty() {
758                let mut current = mat.captures[0].node;
759                let mut scope = "global".to_string();
760                while let Some(parent) = current.parent() {
761                    if parent.kind() == "function_item"
762                        && let Some(name_node) = parent.child_by_field_name("name")
763                    {
764                        scope = source[name_node.start_byte()..name_node.end_byte()].to_string();
765                        break;
766                    }
767                    current = parent;
768                }
769                assignments.push(AssignmentInfo {
770                    variable,
771                    value,
772                    line,
773                    scope,
774                });
775            }
776        }
777    }
778
779    fn extract_field_accesses(
780        source: &str,
781        compiled: &CompiledQueries,
782        root: Node<'_>,
783        max_depth: Option<u32>,
784        field_accesses: &mut Vec<FieldAccessInfo>,
785    ) {
786        let Some(ref field_query) = compiled.field else {
787            return;
788        };
789        let mut cursor = QueryCursor::new();
790        if let Some(depth) = max_depth {
791            cursor.set_max_start_depth(Some(depth));
792        }
793        let mut matches = cursor.matches(field_query, root, source.as_bytes());
794
795        while let Some(mat) = matches.next() {
796            let mut object = String::new();
797            let mut field = String::new();
798            let mut line = 0usize;
799
800            for capture in mat.captures {
801                let capture_name = field_query.capture_names()[capture.index as usize];
802                let node = capture.node;
803                match capture_name {
804                    "object" => {
805                        object = source[node.start_byte()..node.end_byte()].to_string();
806                    }
807                    "field" => {
808                        field = source[node.start_byte()..node.end_byte()].to_string();
809                        line = node.start_position().row + 1;
810                    }
811                    _ => {}
812                }
813            }
814
815            if !object.is_empty() && !field.is_empty() {
816                let mut current = mat.captures[0].node;
817                let mut scope = "global".to_string();
818                while let Some(parent) = current.parent() {
819                    if parent.kind() == "function_item"
820                        && let Some(name_node) = parent.child_by_field_name("name")
821                    {
822                        scope = source[name_node.start_byte()..name_node.end_byte()].to_string();
823                        break;
824                    }
825                    current = parent;
826                }
827                field_accesses.push(FieldAccessInfo {
828                    object,
829                    field,
830                    line,
831                    scope,
832                });
833            }
834        }
835    }
836}
837
838#[cfg(test)]
839mod tests {
840    use super::*;
841
842    #[test]
843    fn test_extract_assignments() {
844        let source = r#"
845fn main() {
846    let x = 42;
847    let y = x + 1;
848}
849"#;
850        let result = SemanticExtractor::extract(source, "rust", None);
851        assert!(result.is_ok());
852        let analysis = result.unwrap();
853        assert!(!analysis.assignments.is_empty());
854        assert_eq!(analysis.assignments[0].variable, "x");
855        assert_eq!(analysis.assignments[0].value, "42");
856        assert_eq!(analysis.assignments[0].scope, "main");
857    }
858
859    #[test]
860    fn test_extract_field_accesses() {
861        let source = r#"
862fn process(user: &User) {
863    let name = user.name;
864    let age = user.age;
865}
866"#;
867        let result = SemanticExtractor::extract(source, "rust", None);
868        assert!(result.is_ok());
869        let analysis = result.unwrap();
870        assert!(!analysis.field_accesses.is_empty());
871        assert!(
872            analysis
873                .field_accesses
874                .iter()
875                .any(|fa| fa.object == "user" && fa.field == "name")
876        );
877        assert_eq!(analysis.field_accesses[0].scope, "process");
878    }
879}