1use tree_sitter::{Node, Parser, Query, QueryCursor};
2
3use crate::parser::{EdgeDef, EdgeKind, LanguageParser, NodeDef, NodeKind, ParseResult};
4use crate::walker::SourceFile;
5
6pub struct JavaParser {
7 language: tree_sitter::Language,
8}
9
10impl JavaParser {
11 pub fn new() -> Self {
12 Self {
13 language: tree_sitter_java::language(),
14 }
15 }
16}
17
18impl Default for JavaParser {
19 fn default() -> Self {
20 Self::new()
21 }
22}
23
24impl LanguageParser for JavaParser {
25 fn extensions(&self) -> &[&str] {
26 &["java"]
27 }
28
29 fn extract(&self, file: &SourceFile) -> anyhow::Result<ParseResult> {
30 let mut parser = Parser::new();
31 parser.set_language(&self.language)?;
32
33 let tree = parser
34 .parse(&file.content, None)
35 .ok_or_else(|| anyhow::anyhow!("failed to parse {}", file.relative_path))?;
36
37 let source_bytes = file.content.as_bytes();
38 let root = tree.root_node();
39 let mut nodes = Vec::new();
40 let mut edges = Vec::new();
41
42 let fp = file_node_id(&file.relative_path);
43
44 if let Ok(query) = Query::new(
46 &self.language,
47 "(class_declaration name: (identifier) @name) @cls",
48 ) {
49 extract_nodes(
50 &mut nodes,
51 &mut edges,
52 file,
53 &query,
54 root,
55 source_bytes,
56 NodeKind::Class,
57 "cls",
58 &fp,
59 );
60 }
61
62 if let Ok(query) = Query::new(
64 &self.language,
65 "(interface_declaration name: (identifier) @name) @cls",
66 ) {
67 extract_nodes(
68 &mut nodes,
69 &mut edges,
70 file,
71 &query,
72 root,
73 source_bytes,
74 NodeKind::Class,
75 "cls",
76 &fp,
77 );
78 }
79
80 if let Ok(query) = Query::new(
82 &self.language,
83 "(method_declaration name: (identifier) @name) @fn",
84 ) {
85 extract_nodes(
86 &mut nodes,
87 &mut edges,
88 file,
89 &query,
90 root,
91 source_bytes,
92 NodeKind::Function,
93 "fn",
94 &fp,
95 );
96 }
97
98 if let Ok(query) = Query::new(
100 &self.language,
101 "(constructor_declaration name: (identifier) @name) @fn",
102 ) {
103 extract_nodes(
104 &mut nodes,
105 &mut edges,
106 file,
107 &query,
108 root,
109 source_bytes,
110 NodeKind::Function,
111 "fn",
112 &fp,
113 );
114 }
115
116 extract_imports(&mut edges, root, source_bytes, &fp, file);
118
119 extract_calls(&mut edges, root, source_bytes, file);
121
122 Ok(ParseResult { nodes, edges })
123 }
124}
125
126fn file_node_id(rel_path: &str) -> String {
127 format!("file:{}", rel_path)
128}
129
130#[allow(clippy::too_many_arguments)]
131fn extract_nodes(
132 nodes: &mut Vec<NodeDef>,
133 edges: &mut Vec<EdgeDef>,
134 file: &SourceFile,
135 query: &Query,
136 root: tree_sitter::Node,
137 source_bytes: &[u8],
138 kind: NodeKind,
139 prefix: &str,
140 file_id: &str,
141) {
142 let mut cursor = QueryCursor::new();
143 for m in cursor.matches(query, root, source_bytes) {
144 let Some(name_capture) = m
145 .captures
146 .iter()
147 .find(|c| query.capture_names()[c.index as usize] == "name")
148 else {
149 continue;
150 };
151
152 let name = node_text(name_capture.node, source_bytes);
153 let node_start = name_capture.node.start_position();
154
155 let body_end = m
156 .captures
157 .iter()
158 .find(|c| {
159 let cap_name = &query.capture_names()[c.index as usize];
160 *cap_name == "fn" || *cap_name == "cls"
161 })
162 .map(|c| c.node.end_position())
163 .unwrap_or_else(|| name_capture.node.end_position());
164
165 let id = format!("{}:{}:{}", prefix, file.relative_path, name);
166
167 nodes.push(NodeDef {
168 id: id.clone(),
169 kind: kind.clone(),
170 name: name.clone(),
171 path: file.relative_path.clone(),
172 line_start: node_start.row as u32 + 1,
173 line_end: body_end.row as u32 + 1,
174 ..Default::default()
175 });
176
177 edges.push(EdgeDef {
178 src: file_id.to_string(),
179 dst: id,
180 kind: EdgeKind::Exports,
181 ..Default::default()
182 });
183 }
184}
185
186fn node_text(node: tree_sitter::Node, source: &[u8]) -> String {
187 node.utf8_text(source).unwrap_or("").to_string()
188}
189
190fn extract_imports(
191 edges: &mut Vec<EdgeDef>,
192 root: tree_sitter::Node,
193 source_bytes: &[u8],
194 file_id: &str,
195 file: &SourceFile,
196) {
197 let mut cursor = root.walk();
198 traverse_imports(edges, root, source_bytes, file_id, file, &mut cursor);
199}
200
201fn traverse_imports(
202 edges: &mut Vec<EdgeDef>,
203 node: tree_sitter::Node,
204 source_bytes: &[u8],
205 file_id: &str,
206 file: &SourceFile,
207 cursor: &mut tree_sitter::TreeCursor,
208) {
209 if node.kind() == "import_declaration" {
210 for j in 0..node.child_count() {
212 let Some(import_child) = node.child(j) else {
213 continue;
214 };
215 if import_child.kind() == "scoped_identifier" || import_child.kind() == "identifier" {
216 let import_path = node_text(import_child, source_bytes);
217 if !import_path.is_empty() {
219 let resolved = resolve_java_import(&file.relative_path, &import_path);
220 if !resolved.is_empty() {
221 edges.push(EdgeDef {
222 src: file_id.to_string(),
223 dst: file_node_id(&resolved),
224 kind: EdgeKind::Imports,
225 ..Default::default()
226 });
227 }
228 }
229 }
230 }
231 }
232
233 if cursor.goto_first_child() {
234 loop {
235 let child = cursor.node();
236 traverse_imports(edges, child, source_bytes, file_id, file, cursor);
237 if !cursor.goto_next_sibling() {
238 break;
239 }
240 }
241 cursor.goto_parent();
242 }
243}
244
245fn resolve_java_import(_current: &str, import: &str) -> String {
246 let parts: Vec<&str> = import.split('.').collect();
248 if parts.len() < 2 {
249 return String::new();
250 }
251 let Some(last) = parts.last() else {
253 return String::new();
254 };
255 if last
256 .chars()
257 .next()
258 .map(|c| c.is_uppercase())
259 .unwrap_or(false)
260 {
261 let path = parts.join("/");
262 format!("{}.java", path)
263 } else {
264 parts.join("/")
266 }
267}
268
269fn extract_calls(edges: &mut Vec<EdgeDef>, root: Node, source: &[u8], file: &SourceFile) {
270 let mut fn_stack: Vec<String> = Vec::new();
271 walk_for_calls(edges, root, source, file, &mut fn_stack);
272}
273
274fn is_fn_node(kind: &str) -> bool {
275 matches!(
276 kind,
277 "method_declaration" | "constructor_declaration" | "lambda_expression"
278 )
279}
280
281fn fn_name_from_node(node: Node, source: &[u8], file: &SourceFile) -> Option<String> {
282 if let Some(name_node) = node.child_by_field_name("name") {
283 let name = name_node.utf8_text(source).unwrap_or("").to_string();
284 if !name.is_empty() {
285 return Some(format!("fn:{}:{}", file.relative_path, name));
286 }
287 }
288 None
289}
290
291fn walk_for_calls(
292 edges: &mut Vec<EdgeDef>,
293 node: Node,
294 source: &[u8],
295 file: &SourceFile,
296 fn_stack: &mut Vec<String>,
297) {
298 let kind = node.kind();
299 let pushed = is_fn_node(kind);
300
301 if pushed {
302 if let Some(id) = fn_name_from_node(node, source, file) {
303 fn_stack.push(id);
304 } else {
305 fn_stack.push(String::new());
306 }
307 }
308
309 if kind == "method_invocation" {
310 if let Some(caller_id) = fn_stack.last().filter(|s| !s.is_empty()) {
311 let callee_name = node
312 .child_by_field_name("name")
313 .map(|n| n.utf8_text(source).unwrap_or("").to_string())
314 .unwrap_or_default();
315
316 if !callee_name.is_empty() {
317 edges.push(EdgeDef {
318 src: caller_id.clone(),
319 dst: callee_name,
320 kind: EdgeKind::Calls,
321 confidence: 0.7,
322 ..Default::default()
323 });
324 }
325 }
326 }
327
328 let mut cursor = node.walk();
329 if cursor.goto_first_child() {
330 loop {
331 walk_for_calls(edges, cursor.node(), source, file, fn_stack);
332 if !cursor.goto_next_sibling() {
333 break;
334 }
335 }
336 }
337
338 if pushed {
339 fn_stack.pop();
340 }
341}