Skip to main content

cgx_engine/parsers/
java.rs

1use tree_sitter::{Node, Parser, Query, QueryCursor};
2
3use crate::parser::{
4    collect_doc_block_above, meta_set, EdgeDef, EdgeKind, LanguageParser, NodeDef, NodeKind,
5    ParseResult,
6};
7use crate::walker::SourceFile;
8
9pub struct JavaParser {
10    language: tree_sitter::Language,
11}
12
13impl JavaParser {
14    pub fn new() -> Self {
15        Self {
16            language: tree_sitter_java::language(),
17        }
18    }
19}
20
21impl Default for JavaParser {
22    fn default() -> Self {
23        Self::new()
24    }
25}
26
27impl LanguageParser for JavaParser {
28    fn extensions(&self) -> &[&str] {
29        &["java"]
30    }
31
32    fn extract(&self, file: &SourceFile) -> anyhow::Result<ParseResult> {
33        let mut parser = Parser::new();
34        parser.set_language(&self.language)?;
35
36        let tree = parser
37            .parse(&file.content, None)
38            .ok_or_else(|| anyhow::anyhow!("failed to parse {}", file.relative_path))?;
39
40        let source_bytes = file.content.as_bytes();
41        let root = tree.root_node();
42        let mut nodes = Vec::new();
43        let mut edges = Vec::new();
44
45        let fp = file_node_id(&file.relative_path);
46
47        // Parse class declarations
48        if let Ok(query) = Query::new(
49            &self.language,
50            "(class_declaration name: (identifier) @name) @cls",
51        ) {
52            extract_nodes(
53                &mut nodes,
54                &mut edges,
55                file,
56                &query,
57                root,
58                source_bytes,
59                NodeKind::Class,
60                "cls",
61                &fp,
62            );
63        }
64
65        // Parse interface declarations
66        if let Ok(query) = Query::new(
67            &self.language,
68            "(interface_declaration name: (identifier) @name) @cls",
69        ) {
70            extract_nodes(
71                &mut nodes,
72                &mut edges,
73                file,
74                &query,
75                root,
76                source_bytes,
77                NodeKind::Class,
78                "cls",
79                &fp,
80            );
81        }
82
83        // Parse method declarations
84        if let Ok(query) = Query::new(
85            &self.language,
86            "(method_declaration name: (identifier) @name) @fn",
87        ) {
88            extract_nodes(
89                &mut nodes,
90                &mut edges,
91                file,
92                &query,
93                root,
94                source_bytes,
95                NodeKind::Function,
96                "fn",
97                &fp,
98            );
99        }
100
101        // Parse constructor declarations
102        if let Ok(query) = Query::new(
103            &self.language,
104            "(constructor_declaration name: (identifier) @name) @fn",
105        ) {
106            extract_nodes(
107                &mut nodes,
108                &mut edges,
109                file,
110                &query,
111                root,
112                source_bytes,
113                NodeKind::Function,
114                "fn",
115                &fp,
116            );
117        }
118
119        // Parse imports
120        extract_imports(&mut edges, root, source_bytes, &fp, file);
121
122        // Extract calls
123        extract_calls(&mut edges, root, source_bytes, file);
124
125        Ok(ParseResult {
126            nodes,
127            edges,
128            ..Default::default()
129        })
130    }
131}
132
133fn file_node_id(rel_path: &str) -> String {
134    format!("file:{}", rel_path)
135}
136
137#[allow(clippy::too_many_arguments)]
138fn extract_nodes(
139    nodes: &mut Vec<NodeDef>,
140    edges: &mut Vec<EdgeDef>,
141    file: &SourceFile,
142    query: &Query,
143    root: tree_sitter::Node,
144    source_bytes: &[u8],
145    kind: NodeKind,
146    prefix: &str,
147    file_id: &str,
148) {
149    let mut cursor = QueryCursor::new();
150    for m in cursor.matches(query, root, source_bytes) {
151        let Some(name_capture) = m
152            .captures
153            .iter()
154            .find(|c| query.capture_names()[c.index as usize] == "name")
155        else {
156            continue;
157        };
158
159        let name = node_text(name_capture.node, source_bytes);
160        let node_start = name_capture.node.start_position();
161
162        let item_node = m
163            .captures
164            .iter()
165            .find(|c| {
166                let cap_name = &query.capture_names()[c.index as usize];
167                *cap_name == "fn" || *cap_name == "cls"
168            })
169            .map(|c| c.node);
170        let body_end = item_node
171            .map(|n| n.end_position())
172            .unwrap_or_else(|| name_capture.node.end_position());
173
174        let id = format!("{}:{}:{}", prefix, file.relative_path, name);
175
176        let doc_comment = item_node
177            .and_then(|n| collect_doc_block_above(n, source_bytes, is_jdoc_comment))
178            .map(strip_jdoc_markers);
179
180        let mut def = NodeDef {
181            id: id.clone(),
182            kind: kind.clone(),
183            name: name.clone(),
184            path: file.relative_path.clone(),
185            line_start: node_start.row as u32 + 1,
186            line_end: body_end.row as u32 + 1,
187            ..Default::default()
188        };
189        if let Some(doc) = doc_comment {
190            meta_set(&mut def, "doc_comment", serde_json::Value::String(doc));
191        }
192        nodes.push(def);
193
194        edges.push(EdgeDef {
195            src: file_id.to_string(),
196            dst: id,
197            kind: EdgeKind::Exports,
198            ..Default::default()
199        });
200    }
201}
202
203/// Javadoc / PHPDoc: comments starting with `/**`. Plain `//` line comments are noise.
204fn is_jdoc_comment(text: &str) -> bool {
205    text.trim_start().starts_with("/**")
206}
207
208fn strip_jdoc_markers(raw: String) -> String {
209    let mut out: Vec<String> = Vec::new();
210    for line in raw.lines() {
211        let l = line.trim();
212        let stripped = if l.starts_with("/**") {
213            l.trim_start_matches("/**")
214                .trim_end_matches("*/")
215                .trim()
216                .to_string()
217        } else if l.starts_with("*/") {
218            String::new()
219        } else if let Some(rest) = l.strip_prefix('*') {
220            rest.trim().to_string()
221        } else {
222            l.to_string()
223        };
224        out.push(stripped);
225    }
226    out.join("\n").trim().to_string()
227}
228
229fn node_text(node: tree_sitter::Node, source: &[u8]) -> String {
230    node.utf8_text(source).unwrap_or("").to_string()
231}
232
233fn extract_imports(
234    edges: &mut Vec<EdgeDef>,
235    root: tree_sitter::Node,
236    source_bytes: &[u8],
237    file_id: &str,
238    file: &SourceFile,
239) {
240    let mut cursor = root.walk();
241    traverse_imports(edges, root, source_bytes, file_id, file, &mut cursor);
242}
243
244fn traverse_imports(
245    edges: &mut Vec<EdgeDef>,
246    node: tree_sitter::Node,
247    source_bytes: &[u8],
248    file_id: &str,
249    file: &SourceFile,
250    cursor: &mut tree_sitter::TreeCursor,
251) {
252    if node.kind() == "import_declaration" {
253        // Java: import com.foo.Bar; or import com.foo.*;
254        for j in 0..node.child_count() {
255            let Some(import_child) = node.child(j) else {
256                continue;
257            };
258            if import_child.kind() == "scoped_identifier" || import_child.kind() == "identifier" {
259                let import_path = node_text(import_child, source_bytes);
260                // Convert package path to potential file path heuristic
261                if !import_path.is_empty() {
262                    let resolved = resolve_java_import(&file.relative_path, &import_path);
263                    if !resolved.is_empty() {
264                        edges.push(EdgeDef {
265                            src: file_id.to_string(),
266                            dst: file_node_id(&resolved),
267                            kind: EdgeKind::Imports,
268                            ..Default::default()
269                        });
270                    }
271                }
272            }
273        }
274    }
275
276    if cursor.goto_first_child() {
277        loop {
278            let child = cursor.node();
279            traverse_imports(edges, child, source_bytes, file_id, file, cursor);
280            if !cursor.goto_next_sibling() {
281                break;
282            }
283        }
284        cursor.goto_parent();
285    }
286}
287
288fn resolve_java_import(_current: &str, import: &str) -> String {
289    // Heuristic: com.example.Foo → com/example/Foo.java
290    let parts: Vec<&str> = import.split('.').collect();
291    if parts.len() < 2 {
292        return String::new();
293    }
294    // If last part is uppercase, it's likely a class name
295    let Some(last) = parts.last() else {
296        return String::new();
297    };
298    if last
299        .chars()
300        .next()
301        .map(|c| c.is_uppercase())
302        .unwrap_or(false)
303    {
304        let path = parts.join("/");
305        format!("{}.java", path)
306    } else {
307        // Package import: com.example.* → com/example/
308        parts.join("/")
309    }
310}
311
312fn extract_calls(edges: &mut Vec<EdgeDef>, root: Node, source: &[u8], file: &SourceFile) {
313    let mut fn_stack: Vec<String> = Vec::new();
314    walk_for_calls(edges, root, source, file, &mut fn_stack);
315}
316
317fn is_fn_node(kind: &str) -> bool {
318    matches!(
319        kind,
320        "method_declaration" | "constructor_declaration" | "lambda_expression"
321    )
322}
323
324fn fn_name_from_node(node: Node, source: &[u8], file: &SourceFile) -> Option<String> {
325    if let Some(name_node) = node.child_by_field_name("name") {
326        let name = name_node.utf8_text(source).unwrap_or("").to_string();
327        if !name.is_empty() {
328            return Some(format!("fn:{}:{}", file.relative_path, name));
329        }
330    }
331    None
332}
333
334fn walk_for_calls(
335    edges: &mut Vec<EdgeDef>,
336    node: Node,
337    source: &[u8],
338    file: &SourceFile,
339    fn_stack: &mut Vec<String>,
340) {
341    let kind = node.kind();
342    let pushed = is_fn_node(kind);
343
344    if pushed {
345        if let Some(id) = fn_name_from_node(node, source, file) {
346            fn_stack.push(id);
347        } else {
348            fn_stack.push(String::new());
349        }
350    }
351
352    if kind == "method_invocation" {
353        if let Some(caller_id) = fn_stack.last().filter(|s| !s.is_empty()) {
354            let callee_name = node
355                .child_by_field_name("name")
356                .map(|n| n.utf8_text(source).unwrap_or("").to_string())
357                .unwrap_or_default();
358
359            if !callee_name.is_empty() {
360                edges.push(EdgeDef {
361                    src: caller_id.clone(),
362                    dst: callee_name,
363                    kind: EdgeKind::Calls,
364                    confidence: 0.7,
365                    ..Default::default()
366                });
367            }
368        }
369    }
370
371    let mut cursor = node.walk();
372    if cursor.goto_first_child() {
373        loop {
374            walk_for_calls(edges, cursor.node(), source, file, fn_stack);
375            if !cursor.goto_next_sibling() {
376                break;
377            }
378        }
379    }
380
381    if pushed {
382        fn_stack.pop();
383    }
384}