Skip to main content

cgx_engine/parsers/
java.rs

1use tree_sitter::{Node, Parser, Query, QueryCursor};
2
3use crate::parser::{EdgeDef, EdgeKind, LanguageParser, NodeDef, NodeKind, ParseResult};
4use crate::walker::SourceFile;
5
6pub struct JavaParser {
7    language: tree_sitter::Language,
8}
9
10impl JavaParser {
11    pub fn new() -> Self {
12        Self {
13            language: tree_sitter_java::language(),
14        }
15    }
16}
17
18impl Default for JavaParser {
19    fn default() -> Self {
20        Self::new()
21    }
22}
23
24impl LanguageParser for JavaParser {
25    fn extensions(&self) -> &[&str] {
26        &["java"]
27    }
28
29    fn extract(&self, file: &SourceFile) -> anyhow::Result<ParseResult> {
30        let mut parser = Parser::new();
31        parser.set_language(&self.language)?;
32
33        let tree = parser
34            .parse(&file.content, None)
35            .ok_or_else(|| anyhow::anyhow!("failed to parse {}", file.relative_path))?;
36
37        let source_bytes = file.content.as_bytes();
38        let root = tree.root_node();
39        let mut nodes = Vec::new();
40        let mut edges = Vec::new();
41
42        let fp = file_node_id(&file.relative_path);
43
44        // Parse class declarations
45        if let Ok(query) = Query::new(
46            &self.language,
47            "(class_declaration name: (identifier) @name) @cls",
48        ) {
49            extract_nodes(
50                &mut nodes,
51                &mut edges,
52                file,
53                &query,
54                root,
55                source_bytes,
56                NodeKind::Class,
57                "cls",
58                &fp,
59            );
60        }
61
62        // Parse interface declarations
63        if let Ok(query) = Query::new(
64            &self.language,
65            "(interface_declaration name: (identifier) @name) @cls",
66        ) {
67            extract_nodes(
68                &mut nodes,
69                &mut edges,
70                file,
71                &query,
72                root,
73                source_bytes,
74                NodeKind::Class,
75                "cls",
76                &fp,
77            );
78        }
79
80        // Parse method declarations
81        if let Ok(query) = Query::new(
82            &self.language,
83            "(method_declaration name: (identifier) @name) @fn",
84        ) {
85            extract_nodes(
86                &mut nodes,
87                &mut edges,
88                file,
89                &query,
90                root,
91                source_bytes,
92                NodeKind::Function,
93                "fn",
94                &fp,
95            );
96        }
97
98        // Parse constructor declarations
99        if let Ok(query) = Query::new(
100            &self.language,
101            "(constructor_declaration name: (identifier) @name) @fn",
102        ) {
103            extract_nodes(
104                &mut nodes,
105                &mut edges,
106                file,
107                &query,
108                root,
109                source_bytes,
110                NodeKind::Function,
111                "fn",
112                &fp,
113            );
114        }
115
116        // Parse imports
117        extract_imports(&mut edges, root, source_bytes, &fp, file);
118
119        // Extract calls
120        extract_calls(&mut edges, root, source_bytes, file);
121
122        Ok(ParseResult { nodes, edges })
123    }
124}
125
126fn file_node_id(rel_path: &str) -> String {
127    format!("file:{}", rel_path)
128}
129
130#[allow(clippy::too_many_arguments)]
131fn extract_nodes(
132    nodes: &mut Vec<NodeDef>,
133    edges: &mut Vec<EdgeDef>,
134    file: &SourceFile,
135    query: &Query,
136    root: tree_sitter::Node,
137    source_bytes: &[u8],
138    kind: NodeKind,
139    prefix: &str,
140    file_id: &str,
141) {
142    let mut cursor = QueryCursor::new();
143    for m in cursor.matches(query, root, source_bytes) {
144        let Some(name_capture) = m
145            .captures
146            .iter()
147            .find(|c| query.capture_names()[c.index as usize] == "name")
148        else {
149            continue;
150        };
151
152        let name = node_text(name_capture.node, source_bytes);
153        let node_start = name_capture.node.start_position();
154
155        let body_end = m
156            .captures
157            .iter()
158            .find(|c| {
159                let cap_name = &query.capture_names()[c.index as usize];
160                *cap_name == "fn" || *cap_name == "cls"
161            })
162            .map(|c| c.node.end_position())
163            .unwrap_or_else(|| name_capture.node.end_position());
164
165        let id = format!("{}:{}:{}", prefix, file.relative_path, name);
166
167        nodes.push(NodeDef {
168            id: id.clone(),
169            kind: kind.clone(),
170            name: name.clone(),
171            path: file.relative_path.clone(),
172            line_start: node_start.row as u32 + 1,
173            line_end: body_end.row as u32 + 1,
174            ..Default::default()
175        });
176
177        edges.push(EdgeDef {
178            src: file_id.to_string(),
179            dst: id,
180            kind: EdgeKind::Exports,
181            ..Default::default()
182        });
183    }
184}
185
186fn node_text(node: tree_sitter::Node, source: &[u8]) -> String {
187    node.utf8_text(source).unwrap_or("").to_string()
188}
189
190fn extract_imports(
191    edges: &mut Vec<EdgeDef>,
192    root: tree_sitter::Node,
193    source_bytes: &[u8],
194    file_id: &str,
195    file: &SourceFile,
196) {
197    let mut cursor = root.walk();
198    traverse_imports(edges, root, source_bytes, file_id, file, &mut cursor);
199}
200
201fn traverse_imports(
202    edges: &mut Vec<EdgeDef>,
203    node: tree_sitter::Node,
204    source_bytes: &[u8],
205    file_id: &str,
206    file: &SourceFile,
207    cursor: &mut tree_sitter::TreeCursor,
208) {
209    if node.kind() == "import_declaration" {
210        // Java: import com.foo.Bar; or import com.foo.*;
211        for j in 0..node.child_count() {
212            let Some(import_child) = node.child(j) else {
213                continue;
214            };
215            if import_child.kind() == "scoped_identifier" || import_child.kind() == "identifier" {
216                let import_path = node_text(import_child, source_bytes);
217                // Convert package path to potential file path heuristic
218                if !import_path.is_empty() {
219                    let resolved = resolve_java_import(&file.relative_path, &import_path);
220                    if !resolved.is_empty() {
221                        edges.push(EdgeDef {
222                            src: file_id.to_string(),
223                            dst: file_node_id(&resolved),
224                            kind: EdgeKind::Imports,
225                            ..Default::default()
226                        });
227                    }
228                }
229            }
230        }
231    }
232
233    if cursor.goto_first_child() {
234        loop {
235            let child = cursor.node();
236            traverse_imports(edges, child, source_bytes, file_id, file, cursor);
237            if !cursor.goto_next_sibling() {
238                break;
239            }
240        }
241        cursor.goto_parent();
242    }
243}
244
245fn resolve_java_import(_current: &str, import: &str) -> String {
246    // Heuristic: com.example.Foo → com/example/Foo.java
247    let parts: Vec<&str> = import.split('.').collect();
248    if parts.len() < 2 {
249        return String::new();
250    }
251    // If last part is uppercase, it's likely a class name
252    let Some(last) = parts.last() else {
253        return String::new();
254    };
255    if last
256        .chars()
257        .next()
258        .map(|c| c.is_uppercase())
259        .unwrap_or(false)
260    {
261        let path = parts.join("/");
262        format!("{}.java", path)
263    } else {
264        // Package import: com.example.* → com/example/
265        parts.join("/")
266    }
267}
268
269fn extract_calls(edges: &mut Vec<EdgeDef>, root: Node, source: &[u8], file: &SourceFile) {
270    let mut fn_stack: Vec<String> = Vec::new();
271    walk_for_calls(edges, root, source, file, &mut fn_stack);
272}
273
274fn is_fn_node(kind: &str) -> bool {
275    matches!(
276        kind,
277        "method_declaration" | "constructor_declaration" | "lambda_expression"
278    )
279}
280
281fn fn_name_from_node(node: Node, source: &[u8], file: &SourceFile) -> Option<String> {
282    if let Some(name_node) = node.child_by_field_name("name") {
283        let name = name_node.utf8_text(source).unwrap_or("").to_string();
284        if !name.is_empty() {
285            return Some(format!("fn:{}:{}", file.relative_path, name));
286        }
287    }
288    None
289}
290
291fn walk_for_calls(
292    edges: &mut Vec<EdgeDef>,
293    node: Node,
294    source: &[u8],
295    file: &SourceFile,
296    fn_stack: &mut Vec<String>,
297) {
298    let kind = node.kind();
299    let pushed = is_fn_node(kind);
300
301    if pushed {
302        if let Some(id) = fn_name_from_node(node, source, file) {
303            fn_stack.push(id);
304        } else {
305            fn_stack.push(String::new());
306        }
307    }
308
309    if kind == "method_invocation" {
310        if let Some(caller_id) = fn_stack.last().filter(|s| !s.is_empty()) {
311            let callee_name = node
312                .child_by_field_name("name")
313                .map(|n| n.utf8_text(source).unwrap_or("").to_string())
314                .unwrap_or_default();
315
316            if !callee_name.is_empty() {
317                edges.push(EdgeDef {
318                    src: caller_id.clone(),
319                    dst: callee_name,
320                    kind: EdgeKind::Calls,
321                    confidence: 0.7,
322                    ..Default::default()
323                });
324            }
325        }
326    }
327
328    let mut cursor = node.walk();
329    if cursor.goto_first_child() {
330        loop {
331            walk_for_calls(edges, cursor.node(), source, file, fn_stack);
332            if !cursor.goto_next_sibling() {
333                break;
334            }
335        }
336    }
337
338    if pushed {
339        fn_stack.pop();
340    }
341}