Skip to main content

cgx_engine/parsers/
java.rs

1use tree_sitter::{Parser, Query, QueryCursor, Node};
2
3use crate::parser::{EdgeDef, EdgeKind, LanguageParser, NodeDef, NodeKind, ParseResult};
4use crate::walker::SourceFile;
5
6pub struct JavaParser {
7    language: tree_sitter::Language,
8}
9
10impl JavaParser {
11    pub fn new() -> Self {
12        Self {
13            language: tree_sitter_java::language(),
14        }
15    }
16}
17
18impl Default for JavaParser {
19    fn default() -> Self {
20        Self::new()
21    }
22}
23
24impl LanguageParser for JavaParser {
25    fn extensions(&self) -> &[&str] {
26        &["java"]
27    }
28
29    fn extract(&self, file: &SourceFile) -> anyhow::Result<ParseResult> {
30        let mut parser = Parser::new();
31        parser.set_language(&self.language)?;
32
33        let tree = parser.parse(&file.content, None).ok_or_else(|| {
34            anyhow::anyhow!("failed to parse {}", file.relative_path)
35        })?;
36
37        let source_bytes = file.content.as_bytes();
38        let root = tree.root_node();
39        let mut nodes = Vec::new();
40        let mut edges = Vec::new();
41
42        let fp = file_node_id(&file.relative_path);
43
44        // Parse class declarations
45        if let Ok(query) = Query::new(
46            &self.language,
47            "(class_declaration name: (identifier) @name) @cls",
48        ) {
49            extract_nodes(
50                &mut nodes, &mut edges, file, &query, root, source_bytes,
51                NodeKind::Class, "cls", &fp,
52            );
53        }
54
55        // Parse interface declarations
56        if let Ok(query) = Query::new(
57            &self.language,
58            "(interface_declaration name: (identifier) @name) @cls",
59        ) {
60            extract_nodes(
61                &mut nodes, &mut edges, file, &query, root, source_bytes,
62                NodeKind::Class, "cls", &fp,
63            );
64        }
65
66        // Parse method declarations
67        if let Ok(query) = Query::new(
68            &self.language,
69            "(method_declaration name: (identifier) @name) @fn",
70        ) {
71            extract_nodes(
72                &mut nodes, &mut edges, file, &query, root, source_bytes,
73                NodeKind::Function, "fn", &fp,
74            );
75        }
76
77        // Parse constructor declarations
78        if let Ok(query) = Query::new(
79            &self.language,
80            "(constructor_declaration name: (identifier) @name) @fn",
81        ) {
82            extract_nodes(
83                &mut nodes, &mut edges, file, &query, root, source_bytes,
84                NodeKind::Function, "fn", &fp,
85            );
86        }
87
88        // Parse imports
89        extract_imports(&mut edges, root, source_bytes, &fp, file);
90
91        // Extract calls
92        extract_calls(&mut edges, root, source_bytes, file);
93
94        Ok(ParseResult { nodes, edges })
95    }
96}
97
98fn file_node_id(rel_path: &str) -> String {
99    format!("file:{}", rel_path)
100}
101
102#[allow(clippy::too_many_arguments)]
103fn extract_nodes(
104    nodes: &mut Vec<NodeDef>,
105    edges: &mut Vec<EdgeDef>,
106    file: &SourceFile,
107    query: &Query,
108    root: tree_sitter::Node,
109    source_bytes: &[u8],
110    kind: NodeKind,
111    prefix: &str,
112    file_id: &str,
113) {
114    let mut cursor = QueryCursor::new();
115    for m in cursor.matches(query, root, source_bytes) {
116        let Some(name_capture) = m
117            .captures
118            .iter()
119            .find(|c| query.capture_names()[c.index as usize] == "name")
120        else {
121            continue;
122        };
123
124        let name = node_text(name_capture.node, source_bytes);
125        let node_start = name_capture.node.start_position();
126
127        let body_end = m
128            .captures
129            .iter()
130            .find(|c| {
131                let cap_name = &query.capture_names()[c.index as usize];
132                *cap_name == "fn" || *cap_name == "cls"
133            })
134            .map(|c| c.node.end_position())
135            .unwrap_or_else(|| name_capture.node.end_position());
136
137        let id = format!("{}:{}:{}", prefix, file.relative_path, name);
138
139        nodes.push(NodeDef {
140            id: id.clone(),
141            kind: kind.clone(),
142            name: name.clone(),
143            path: file.relative_path.clone(),
144            line_start: node_start.row as u32 + 1,
145            line_end: body_end.row as u32 + 1,
146            ..Default::default()
147        });
148
149        edges.push(EdgeDef {
150            src: file_id.to_string(),
151            dst: id,
152            kind: EdgeKind::Exports,
153            ..Default::default()
154        });
155    }
156}
157
158fn node_text(node: tree_sitter::Node, source: &[u8]) -> String {
159    node.utf8_text(source).unwrap_or("").to_string()
160}
161
162fn extract_imports(
163    edges: &mut Vec<EdgeDef>,
164    root: tree_sitter::Node,
165    source_bytes: &[u8],
166    file_id: &str,
167    file: &SourceFile,
168) {
169    let mut cursor = root.walk();
170    traverse_imports(edges, root, source_bytes, file_id, file, &mut cursor);
171}
172
173fn traverse_imports(
174    edges: &mut Vec<EdgeDef>,
175    node: tree_sitter::Node,
176    source_bytes: &[u8],
177    file_id: &str,
178    file: &SourceFile,
179    cursor: &mut tree_sitter::TreeCursor,
180) {
181    if node.kind() == "import_declaration" {
182        // Java: import com.foo.Bar; or import com.foo.*;
183        for j in 0..node.child_count() {
184            let Some(import_child) = node.child(j) else { continue };
185            if import_child.kind() == "scoped_identifier" || import_child.kind() == "identifier" {
186                let import_path = node_text(import_child, source_bytes);
187                // Convert package path to potential file path heuristic
188                if !import_path.is_empty() {
189                    let resolved = resolve_java_import(&file.relative_path, &import_path);
190                    if !resolved.is_empty() {
191                        edges.push(EdgeDef {
192                            src: file_id.to_string(),
193                            dst: file_node_id(&resolved),
194                            kind: EdgeKind::Imports,
195                            ..Default::default()
196                        });
197                    }
198                }
199            }
200        }
201    }
202
203    if cursor.goto_first_child() {
204        loop {
205            let child = cursor.node();
206            traverse_imports(edges, child, source_bytes, file_id, file, cursor);
207            if !cursor.goto_next_sibling() {
208                break;
209            }
210        }
211        cursor.goto_parent();
212    }
213}
214
215fn resolve_java_import(_current: &str, import: &str) -> String {
216    // Heuristic: com.example.Foo → com/example/Foo.java
217    let parts: Vec<&str> = import.split('.').collect();
218    if parts.len() < 2 {
219        return String::new();
220    }
221    // If last part is uppercase, it's likely a class name
222    let last = parts.last().unwrap();
223    if last.chars().next().map(|c| c.is_uppercase()).unwrap_or(false) {
224        let path = parts.join("/");
225        format!("{}.java", path)
226    } else {
227        // Package import: com.example.* → com/example/
228        parts.join("/")
229    }
230}
231
232fn extract_calls(edges: &mut Vec<EdgeDef>, root: Node, source: &[u8], file: &SourceFile) {
233    let mut fn_stack: Vec<String> = Vec::new();
234    walk_for_calls(edges, root, source, file, &mut fn_stack);
235}
236
237fn is_fn_node(kind: &str) -> bool {
238    matches!(kind, "method_declaration" | "constructor_declaration" | "lambda_expression")
239}
240
241fn fn_name_from_node(node: Node, source: &[u8], file: &SourceFile) -> Option<String> {
242    if let Some(name_node) = node.child_by_field_name("name") {
243        let name = name_node.utf8_text(source).unwrap_or("").to_string();
244        if !name.is_empty() {
245            return Some(format!("fn:{}:{}", file.relative_path, name));
246        }
247    }
248    None
249}
250
251fn walk_for_calls(
252    edges: &mut Vec<EdgeDef>,
253    node: Node,
254    source: &[u8],
255    file: &SourceFile,
256    fn_stack: &mut Vec<String>,
257) {
258    let kind = node.kind();
259    let pushed = is_fn_node(kind);
260
261    if pushed {
262        if let Some(id) = fn_name_from_node(node, source, file) {
263            fn_stack.push(id);
264        } else {
265            fn_stack.push(String::new());
266        }
267    }
268
269    if kind == "method_invocation" {
270        if let Some(caller_id) = fn_stack.last().filter(|s| !s.is_empty()) {
271            let callee_name = node
272                .child_by_field_name("name")
273                .map(|n| n.utf8_text(source).unwrap_or("").to_string())
274                .unwrap_or_default();
275
276            if !callee_name.is_empty() {
277                edges.push(EdgeDef {
278                    src: caller_id.clone(),
279                    dst: callee_name,
280                    kind: EdgeKind::Calls,
281                    confidence: 0.7,
282                    ..Default::default()
283                });
284            }
285        }
286    }
287
288    let mut cursor = node.walk();
289    if cursor.goto_first_child() {
290        loop {
291            walk_for_calls(edges, cursor.node(), source, file, fn_stack);
292            if !cursor.goto_next_sibling() {
293                break;
294            }
295        }
296    }
297
298    if pushed {
299        fn_stack.pop();
300    }
301}