1use tree_sitter::{Node, Parser, Query, QueryCursor};
2
3use crate::parser::{EdgeDef, EdgeKind, LanguageParser, NodeDef, NodeKind, ParseResult};
4use crate::walker::SourceFile;
5
6pub struct JavaParser {
7 language: tree_sitter::Language,
8}
9
10impl JavaParser {
11 pub fn new() -> Self {
12 Self {
13 language: tree_sitter_java::language(),
14 }
15 }
16}
17
18impl Default for JavaParser {
19 fn default() -> Self {
20 Self::new()
21 }
22}
23
24impl LanguageParser for JavaParser {
25 fn extensions(&self) -> &[&str] {
26 &["java"]
27 }
28
29 fn extract(&self, file: &SourceFile) -> anyhow::Result<ParseResult> {
30 let mut parser = Parser::new();
31 parser.set_language(&self.language)?;
32
33 let tree = parser
34 .parse(&file.content, None)
35 .ok_or_else(|| anyhow::anyhow!("failed to parse {}", file.relative_path))?;
36
37 let source_bytes = file.content.as_bytes();
38 let root = tree.root_node();
39 let mut nodes = Vec::new();
40 let mut edges = Vec::new();
41
42 let fp = file_node_id(&file.relative_path);
43
44 if let Ok(query) = Query::new(
46 &self.language,
47 "(class_declaration name: (identifier) @name) @cls",
48 ) {
49 extract_nodes(
50 &mut nodes,
51 &mut edges,
52 file,
53 &query,
54 root,
55 source_bytes,
56 NodeKind::Class,
57 "cls",
58 &fp,
59 );
60 }
61
62 if let Ok(query) = Query::new(
64 &self.language,
65 "(interface_declaration name: (identifier) @name) @cls",
66 ) {
67 extract_nodes(
68 &mut nodes,
69 &mut edges,
70 file,
71 &query,
72 root,
73 source_bytes,
74 NodeKind::Class,
75 "cls",
76 &fp,
77 );
78 }
79
80 if let Ok(query) = Query::new(
82 &self.language,
83 "(method_declaration name: (identifier) @name) @fn",
84 ) {
85 extract_nodes(
86 &mut nodes,
87 &mut edges,
88 file,
89 &query,
90 root,
91 source_bytes,
92 NodeKind::Function,
93 "fn",
94 &fp,
95 );
96 }
97
98 if let Ok(query) = Query::new(
100 &self.language,
101 "(constructor_declaration name: (identifier) @name) @fn",
102 ) {
103 extract_nodes(
104 &mut nodes,
105 &mut edges,
106 file,
107 &query,
108 root,
109 source_bytes,
110 NodeKind::Function,
111 "fn",
112 &fp,
113 );
114 }
115
116 extract_imports(&mut edges, root, source_bytes, &fp, file);
118
119 extract_calls(&mut edges, root, source_bytes, file);
121
122 Ok(ParseResult {
123 nodes,
124 edges,
125 ..Default::default()
126 })
127 }
128}
129
130fn file_node_id(rel_path: &str) -> String {
131 format!("file:{}", rel_path)
132}
133
134#[allow(clippy::too_many_arguments)]
135fn extract_nodes(
136 nodes: &mut Vec<NodeDef>,
137 edges: &mut Vec<EdgeDef>,
138 file: &SourceFile,
139 query: &Query,
140 root: tree_sitter::Node,
141 source_bytes: &[u8],
142 kind: NodeKind,
143 prefix: &str,
144 file_id: &str,
145) {
146 let mut cursor = QueryCursor::new();
147 for m in cursor.matches(query, root, source_bytes) {
148 let Some(name_capture) = m
149 .captures
150 .iter()
151 .find(|c| query.capture_names()[c.index as usize] == "name")
152 else {
153 continue;
154 };
155
156 let name = node_text(name_capture.node, source_bytes);
157 let node_start = name_capture.node.start_position();
158
159 let body_end = m
160 .captures
161 .iter()
162 .find(|c| {
163 let cap_name = &query.capture_names()[c.index as usize];
164 *cap_name == "fn" || *cap_name == "cls"
165 })
166 .map(|c| c.node.end_position())
167 .unwrap_or_else(|| name_capture.node.end_position());
168
169 let id = format!("{}:{}:{}", prefix, file.relative_path, name);
170
171 nodes.push(NodeDef {
172 id: id.clone(),
173 kind: kind.clone(),
174 name: name.clone(),
175 path: file.relative_path.clone(),
176 line_start: node_start.row as u32 + 1,
177 line_end: body_end.row as u32 + 1,
178 ..Default::default()
179 });
180
181 edges.push(EdgeDef {
182 src: file_id.to_string(),
183 dst: id,
184 kind: EdgeKind::Exports,
185 ..Default::default()
186 });
187 }
188}
189
190fn node_text(node: tree_sitter::Node, source: &[u8]) -> String {
191 node.utf8_text(source).unwrap_or("").to_string()
192}
193
194fn extract_imports(
195 edges: &mut Vec<EdgeDef>,
196 root: tree_sitter::Node,
197 source_bytes: &[u8],
198 file_id: &str,
199 file: &SourceFile,
200) {
201 let mut cursor = root.walk();
202 traverse_imports(edges, root, source_bytes, file_id, file, &mut cursor);
203}
204
205fn traverse_imports(
206 edges: &mut Vec<EdgeDef>,
207 node: tree_sitter::Node,
208 source_bytes: &[u8],
209 file_id: &str,
210 file: &SourceFile,
211 cursor: &mut tree_sitter::TreeCursor,
212) {
213 if node.kind() == "import_declaration" {
214 for j in 0..node.child_count() {
216 let Some(import_child) = node.child(j) else {
217 continue;
218 };
219 if import_child.kind() == "scoped_identifier" || import_child.kind() == "identifier" {
220 let import_path = node_text(import_child, source_bytes);
221 if !import_path.is_empty() {
223 let resolved = resolve_java_import(&file.relative_path, &import_path);
224 if !resolved.is_empty() {
225 edges.push(EdgeDef {
226 src: file_id.to_string(),
227 dst: file_node_id(&resolved),
228 kind: EdgeKind::Imports,
229 ..Default::default()
230 });
231 }
232 }
233 }
234 }
235 }
236
237 if cursor.goto_first_child() {
238 loop {
239 let child = cursor.node();
240 traverse_imports(edges, child, source_bytes, file_id, file, cursor);
241 if !cursor.goto_next_sibling() {
242 break;
243 }
244 }
245 cursor.goto_parent();
246 }
247}
248
249fn resolve_java_import(_current: &str, import: &str) -> String {
250 let parts: Vec<&str> = import.split('.').collect();
252 if parts.len() < 2 {
253 return String::new();
254 }
255 let Some(last) = parts.last() else {
257 return String::new();
258 };
259 if last
260 .chars()
261 .next()
262 .map(|c| c.is_uppercase())
263 .unwrap_or(false)
264 {
265 let path = parts.join("/");
266 format!("{}.java", path)
267 } else {
268 parts.join("/")
270 }
271}
272
273fn extract_calls(edges: &mut Vec<EdgeDef>, root: Node, source: &[u8], file: &SourceFile) {
274 let mut fn_stack: Vec<String> = Vec::new();
275 walk_for_calls(edges, root, source, file, &mut fn_stack);
276}
277
278fn is_fn_node(kind: &str) -> bool {
279 matches!(
280 kind,
281 "method_declaration" | "constructor_declaration" | "lambda_expression"
282 )
283}
284
285fn fn_name_from_node(node: Node, source: &[u8], file: &SourceFile) -> Option<String> {
286 if let Some(name_node) = node.child_by_field_name("name") {
287 let name = name_node.utf8_text(source).unwrap_or("").to_string();
288 if !name.is_empty() {
289 return Some(format!("fn:{}:{}", file.relative_path, name));
290 }
291 }
292 None
293}
294
295fn walk_for_calls(
296 edges: &mut Vec<EdgeDef>,
297 node: Node,
298 source: &[u8],
299 file: &SourceFile,
300 fn_stack: &mut Vec<String>,
301) {
302 let kind = node.kind();
303 let pushed = is_fn_node(kind);
304
305 if pushed {
306 if let Some(id) = fn_name_from_node(node, source, file) {
307 fn_stack.push(id);
308 } else {
309 fn_stack.push(String::new());
310 }
311 }
312
313 if kind == "method_invocation" {
314 if let Some(caller_id) = fn_stack.last().filter(|s| !s.is_empty()) {
315 let callee_name = node
316 .child_by_field_name("name")
317 .map(|n| n.utf8_text(source).unwrap_or("").to_string())
318 .unwrap_or_default();
319
320 if !callee_name.is_empty() {
321 edges.push(EdgeDef {
322 src: caller_id.clone(),
323 dst: callee_name,
324 kind: EdgeKind::Calls,
325 confidence: 0.7,
326 ..Default::default()
327 });
328 }
329 }
330 }
331
332 let mut cursor = node.walk();
333 if cursor.goto_first_child() {
334 loop {
335 walk_for_calls(edges, cursor.node(), source, file, fn_stack);
336 if !cursor.goto_next_sibling() {
337 break;
338 }
339 }
340 }
341
342 if pushed {
343 fn_stack.pop();
344 }
345}