1use tree_sitter::{Node, Parser, Query, QueryCursor};
2
3use crate::parser::{
4 collect_doc_block_above, meta_set, EdgeDef, EdgeKind, LanguageParser, NodeDef, NodeKind,
5 ParseResult,
6};
7use crate::walker::SourceFile;
8
9pub struct JavaParser {
10 language: tree_sitter::Language,
11}
12
13impl JavaParser {
14 pub fn new() -> Self {
15 Self {
16 language: tree_sitter_java::language(),
17 }
18 }
19}
20
21impl Default for JavaParser {
22 fn default() -> Self {
23 Self::new()
24 }
25}
26
27impl LanguageParser for JavaParser {
28 fn extensions(&self) -> &[&str] {
29 &["java"]
30 }
31
32 fn extract(&self, file: &SourceFile) -> anyhow::Result<ParseResult> {
33 let mut parser = Parser::new();
34 parser.set_language(&self.language)?;
35
36 let tree = parser
37 .parse(&file.content, None)
38 .ok_or_else(|| anyhow::anyhow!("failed to parse {}", file.relative_path))?;
39
40 let source_bytes = file.content.as_bytes();
41 let root = tree.root_node();
42 let mut nodes = Vec::new();
43 let mut edges = Vec::new();
44
45 let fp = file_node_id(&file.relative_path);
46
47 if let Ok(query) = Query::new(
49 &self.language,
50 "(class_declaration name: (identifier) @name) @cls",
51 ) {
52 extract_nodes(
53 &mut nodes,
54 &mut edges,
55 file,
56 &query,
57 root,
58 source_bytes,
59 NodeKind::Class,
60 "cls",
61 &fp,
62 );
63 }
64
65 if let Ok(query) = Query::new(
67 &self.language,
68 "(interface_declaration name: (identifier) @name) @cls",
69 ) {
70 extract_nodes(
71 &mut nodes,
72 &mut edges,
73 file,
74 &query,
75 root,
76 source_bytes,
77 NodeKind::Class,
78 "cls",
79 &fp,
80 );
81 }
82
83 if let Ok(query) = Query::new(
85 &self.language,
86 "(method_declaration name: (identifier) @name) @fn",
87 ) {
88 extract_nodes(
89 &mut nodes,
90 &mut edges,
91 file,
92 &query,
93 root,
94 source_bytes,
95 NodeKind::Function,
96 "fn",
97 &fp,
98 );
99 }
100
101 if let Ok(query) = Query::new(
103 &self.language,
104 "(constructor_declaration name: (identifier) @name) @fn",
105 ) {
106 extract_nodes(
107 &mut nodes,
108 &mut edges,
109 file,
110 &query,
111 root,
112 source_bytes,
113 NodeKind::Function,
114 "fn",
115 &fp,
116 );
117 }
118
119 extract_imports(&mut edges, root, source_bytes, &fp, file);
121
122 extract_calls(&mut edges, root, source_bytes, file);
124
125 Ok(ParseResult {
126 nodes,
127 edges,
128 ..Default::default()
129 })
130 }
131}
132
133fn file_node_id(rel_path: &str) -> String {
134 format!("file:{}", rel_path)
135}
136
137#[allow(clippy::too_many_arguments)]
138fn extract_nodes(
139 nodes: &mut Vec<NodeDef>,
140 edges: &mut Vec<EdgeDef>,
141 file: &SourceFile,
142 query: &Query,
143 root: tree_sitter::Node,
144 source_bytes: &[u8],
145 kind: NodeKind,
146 prefix: &str,
147 file_id: &str,
148) {
149 let mut cursor = QueryCursor::new();
150 for m in cursor.matches(query, root, source_bytes) {
151 let Some(name_capture) = m
152 .captures
153 .iter()
154 .find(|c| query.capture_names()[c.index as usize] == "name")
155 else {
156 continue;
157 };
158
159 let name = node_text(name_capture.node, source_bytes);
160 let node_start = name_capture.node.start_position();
161
162 let item_node = m
163 .captures
164 .iter()
165 .find(|c| {
166 let cap_name = &query.capture_names()[c.index as usize];
167 *cap_name == "fn" || *cap_name == "cls"
168 })
169 .map(|c| c.node);
170 let body_end = item_node
171 .map(|n| n.end_position())
172 .unwrap_or_else(|| name_capture.node.end_position());
173
174 let id = format!("{}:{}:{}", prefix, file.relative_path, name);
175
176 let doc_comment = item_node
177 .and_then(|n| collect_doc_block_above(n, source_bytes, is_jdoc_comment))
178 .map(strip_jdoc_markers);
179
180 let mut def = NodeDef {
181 id: id.clone(),
182 kind: kind.clone(),
183 name: name.clone(),
184 path: file.relative_path.clone(),
185 line_start: node_start.row as u32 + 1,
186 line_end: body_end.row as u32 + 1,
187 ..Default::default()
188 };
189 if let Some(doc) = doc_comment {
190 meta_set(&mut def, "doc_comment", serde_json::Value::String(doc));
191 }
192 nodes.push(def);
193
194 edges.push(EdgeDef {
195 src: file_id.to_string(),
196 dst: id,
197 kind: EdgeKind::Exports,
198 ..Default::default()
199 });
200 }
201}
202
203fn is_jdoc_comment(text: &str) -> bool {
205 text.trim_start().starts_with("/**")
206}
207
208fn strip_jdoc_markers(raw: String) -> String {
209 let mut out: Vec<String> = Vec::new();
210 for line in raw.lines() {
211 let l = line.trim();
212 let stripped = if l.starts_with("/**") {
213 l.trim_start_matches("/**")
214 .trim_end_matches("*/")
215 .trim()
216 .to_string()
217 } else if l.starts_with("*/") {
218 String::new()
219 } else if let Some(rest) = l.strip_prefix('*') {
220 rest.trim().to_string()
221 } else {
222 l.to_string()
223 };
224 out.push(stripped);
225 }
226 out.join("\n").trim().to_string()
227}
228
229fn node_text(node: tree_sitter::Node, source: &[u8]) -> String {
230 node.utf8_text(source).unwrap_or("").to_string()
231}
232
233fn extract_imports(
234 edges: &mut Vec<EdgeDef>,
235 root: tree_sitter::Node,
236 source_bytes: &[u8],
237 file_id: &str,
238 file: &SourceFile,
239) {
240 let mut cursor = root.walk();
241 traverse_imports(edges, root, source_bytes, file_id, file, &mut cursor);
242}
243
244fn traverse_imports(
245 edges: &mut Vec<EdgeDef>,
246 node: tree_sitter::Node,
247 source_bytes: &[u8],
248 file_id: &str,
249 file: &SourceFile,
250 cursor: &mut tree_sitter::TreeCursor,
251) {
252 if node.kind() == "import_declaration" {
253 for j in 0..node.child_count() {
255 let Some(import_child) = node.child(j) else {
256 continue;
257 };
258 if import_child.kind() == "scoped_identifier" || import_child.kind() == "identifier" {
259 let import_path = node_text(import_child, source_bytes);
260 if !import_path.is_empty() {
262 let resolved = resolve_java_import(&file.relative_path, &import_path);
263 if !resolved.is_empty() {
264 edges.push(EdgeDef {
265 src: file_id.to_string(),
266 dst: file_node_id(&resolved),
267 kind: EdgeKind::Imports,
268 ..Default::default()
269 });
270 }
271 }
272 }
273 }
274 }
275
276 if cursor.goto_first_child() {
277 loop {
278 let child = cursor.node();
279 traverse_imports(edges, child, source_bytes, file_id, file, cursor);
280 if !cursor.goto_next_sibling() {
281 break;
282 }
283 }
284 cursor.goto_parent();
285 }
286}
287
288fn resolve_java_import(_current: &str, import: &str) -> String {
289 let parts: Vec<&str> = import.split('.').collect();
291 if parts.len() < 2 {
292 return String::new();
293 }
294 let Some(last) = parts.last() else {
296 return String::new();
297 };
298 if last
299 .chars()
300 .next()
301 .map(|c| c.is_uppercase())
302 .unwrap_or(false)
303 {
304 let path = parts.join("/");
305 format!("{}.java", path)
306 } else {
307 parts.join("/")
309 }
310}
311
312fn extract_calls(edges: &mut Vec<EdgeDef>, root: Node, source: &[u8], file: &SourceFile) {
313 let mut fn_stack: Vec<String> = Vec::new();
314 walk_for_calls(edges, root, source, file, &mut fn_stack);
315}
316
317fn is_fn_node(kind: &str) -> bool {
318 matches!(
319 kind,
320 "method_declaration" | "constructor_declaration" | "lambda_expression"
321 )
322}
323
324fn fn_name_from_node(node: Node, source: &[u8], file: &SourceFile) -> Option<String> {
325 if let Some(name_node) = node.child_by_field_name("name") {
326 let name = name_node.utf8_text(source).unwrap_or("").to_string();
327 if !name.is_empty() {
328 return Some(format!("fn:{}:{}", file.relative_path, name));
329 }
330 }
331 None
332}
333
334fn walk_for_calls(
335 edges: &mut Vec<EdgeDef>,
336 node: Node,
337 source: &[u8],
338 file: &SourceFile,
339 fn_stack: &mut Vec<String>,
340) {
341 let kind = node.kind();
342 let pushed = is_fn_node(kind);
343
344 if pushed {
345 if let Some(id) = fn_name_from_node(node, source, file) {
346 fn_stack.push(id);
347 } else {
348 fn_stack.push(String::new());
349 }
350 }
351
352 if kind == "method_invocation" {
353 if let Some(caller_id) = fn_stack.last().filter(|s| !s.is_empty()) {
354 let callee_name = node
355 .child_by_field_name("name")
356 .map(|n| n.utf8_text(source).unwrap_or("").to_string())
357 .unwrap_or_default();
358
359 if !callee_name.is_empty() {
360 edges.push(EdgeDef {
361 src: caller_id.clone(),
362 dst: callee_name,
363 kind: EdgeKind::Calls,
364 confidence: 0.7,
365 ..Default::default()
366 });
367 }
368 }
369 }
370
371 let mut cursor = node.walk();
372 if cursor.goto_first_child() {
373 loop {
374 walk_for_calls(edges, cursor.node(), source, file, fn_stack);
375 if !cursor.goto_next_sibling() {
376 break;
377 }
378 }
379 }
380
381 if pushed {
382 fn_stack.pop();
383 }
384}