Skip to main content

agentic_codebase/parse/
java.rs

1//! Java parsing using tree-sitter.
2//!
3//! Extracts classes, interfaces, enums, methods, constructors, imports, packages.
4
5use std::path::Path;
6
7use crate::types::{AcbResult, CodeUnitType, Language, Visibility};
8
9use super::treesitter::{get_node_text, node_to_span};
10use super::{LanguageParser, RawCodeUnit};
11
12/// Java language parser.
13pub struct JavaParser;
14
15impl Default for JavaParser {
16    fn default() -> Self {
17        Self::new()
18    }
19}
20
21impl JavaParser {
22    /// Create a new Java parser.
23    pub fn new() -> Self {
24        Self
25    }
26
27    fn extract_from_node(
28        &self,
29        node: tree_sitter::Node,
30        source: &str,
31        file_path: &Path,
32        units: &mut Vec<RawCodeUnit>,
33        next_id: &mut u64,
34        parent_qname: &str,
35    ) {
36        let mut cursor = node.walk();
37        for child in node.children(&mut cursor) {
38            match child.kind() {
39                "class_declaration" => {
40                    self.extract_class(
41                        child,
42                        source,
43                        file_path,
44                        units,
45                        next_id,
46                        parent_qname,
47                        CodeUnitType::Type,
48                    );
49                }
50                "interface_declaration" => {
51                    self.extract_class(
52                        child,
53                        source,
54                        file_path,
55                        units,
56                        next_id,
57                        parent_qname,
58                        CodeUnitType::Trait,
59                    );
60                }
61                "enum_declaration" => {
62                    self.extract_class(
63                        child,
64                        source,
65                        file_path,
66                        units,
67                        next_id,
68                        parent_qname,
69                        CodeUnitType::Type,
70                    );
71                }
72                "record_declaration" => {
73                    self.extract_class(
74                        child,
75                        source,
76                        file_path,
77                        units,
78                        next_id,
79                        parent_qname,
80                        CodeUnitType::Type,
81                    );
82                }
83                "method_declaration" => {
84                    if let Some(unit) =
85                        self.extract_method(child, source, file_path, parent_qname, next_id)
86                    {
87                        units.push(unit);
88                    }
89                }
90                "constructor_declaration" => {
91                    if let Some(unit) =
92                        self.extract_method(child, source, file_path, parent_qname, next_id)
93                    {
94                        units.push(unit);
95                    }
96                }
97                "import_declaration" => {
98                    if let Some(unit) =
99                        self.extract_import(child, source, file_path, parent_qname, next_id)
100                    {
101                        units.push(unit);
102                    }
103                }
104                "package_declaration" => {
105                    // Captured as module-level metadata, skip as unit
106                }
107                _ => {}
108            }
109        }
110    }
111
112    #[allow(clippy::too_many_arguments)]
113    fn extract_class(
114        &self,
115        node: tree_sitter::Node,
116        source: &str,
117        file_path: &Path,
118        units: &mut Vec<RawCodeUnit>,
119        next_id: &mut u64,
120        parent_qname: &str,
121        unit_type: CodeUnitType,
122    ) {
123        let name = match node.child_by_field_name("name") {
124            Some(n) => get_node_text(n, source).to_string(),
125            None => return,
126        };
127        let qname = java_qname(parent_qname, &name);
128        let span = node_to_span(node);
129        let vis = extract_java_visibility(node, source);
130
131        let id = *next_id;
132        *next_id += 1;
133
134        let mut unit = RawCodeUnit::new(
135            unit_type,
136            Language::Java,
137            name,
138            file_path.to_path_buf(),
139            span,
140        );
141        unit.temp_id = id;
142        unit.qualified_name = qname.clone();
143        unit.visibility = vis;
144        units.push(unit);
145
146        // Recurse into the class body
147        if let Some(body) = node.child_by_field_name("body") {
148            self.extract_from_node(body, source, file_path, units, next_id, &qname);
149        }
150    }
151
152    fn extract_method(
153        &self,
154        node: tree_sitter::Node,
155        source: &str,
156        file_path: &Path,
157        parent_qname: &str,
158        next_id: &mut u64,
159    ) -> Option<RawCodeUnit> {
160        let name_node = node.child_by_field_name("name")?;
161        let name = get_node_text(name_node, source).to_string();
162        let qname = java_qname(parent_qname, &name);
163        let span = node_to_span(node);
164        let vis = extract_java_visibility(node, source);
165
166        let id = *next_id;
167        *next_id += 1;
168
169        let is_test = has_annotation(node, source, "Test")
170            || has_annotation(node, source, "ParameterizedTest")
171            || name.starts_with("test");
172        let unit_type = if is_test {
173            CodeUnitType::Test
174        } else {
175            CodeUnitType::Function
176        };
177
178        let mut unit = RawCodeUnit::new(
179            unit_type,
180            Language::Java,
181            name,
182            file_path.to_path_buf(),
183            span,
184        );
185        unit.temp_id = id;
186        unit.qualified_name = qname;
187        unit.visibility = vis;
188
189        Some(unit)
190    }
191
192    fn extract_import(
193        &self,
194        node: tree_sitter::Node,
195        source: &str,
196        file_path: &Path,
197        parent_qname: &str,
198        next_id: &mut u64,
199    ) -> Option<RawCodeUnit> {
200        let text = get_node_text(node, source)
201            .trim_start_matches("import ")
202            .trim_start_matches("static ")
203            .trim_end_matches(';')
204            .trim()
205            .to_string();
206        let span = node_to_span(node);
207
208        let id = *next_id;
209        *next_id += 1;
210
211        let mut unit = RawCodeUnit::new(
212            CodeUnitType::Import,
213            Language::Java,
214            text,
215            file_path.to_path_buf(),
216            span,
217        );
218        unit.temp_id = id;
219        unit.qualified_name = java_qname(parent_qname, "import");
220
221        Some(unit)
222    }
223}
224
225impl LanguageParser for JavaParser {
226    fn extract_units(
227        &self,
228        tree: &tree_sitter::Tree,
229        source: &str,
230        file_path: &Path,
231    ) -> AcbResult<Vec<RawCodeUnit>> {
232        let mut units = Vec::new();
233        let mut next_id = 0u64;
234
235        let module_name = file_path
236            .file_stem()
237            .and_then(|s| s.to_str())
238            .unwrap_or("unknown")
239            .to_string();
240
241        let root_span = node_to_span(tree.root_node());
242        let mut module_unit = RawCodeUnit::new(
243            CodeUnitType::Module,
244            Language::Java,
245            module_name.clone(),
246            file_path.to_path_buf(),
247            root_span,
248        );
249        module_unit.temp_id = next_id;
250        module_unit.qualified_name = module_name.clone();
251        next_id += 1;
252        units.push(module_unit);
253
254        self.extract_from_node(
255            tree.root_node(),
256            source,
257            file_path,
258            &mut units,
259            &mut next_id,
260            &module_name,
261        );
262
263        Ok(units)
264    }
265
266    fn is_test_file(&self, path: &Path, _source: &str) -> bool {
267        let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
268        name.ends_with("Test.java")
269            || name.ends_with("Tests.java")
270            || name.starts_with("Test")
271            || name.ends_with("IT.java")
272    }
273}
274
275fn java_qname(parent: &str, name: &str) -> String {
276    if parent.is_empty() {
277        name.to_string()
278    } else {
279        format!("{}.{}", parent, name)
280    }
281}
282
283/// Extract visibility from Java modifiers.
284fn extract_java_visibility(node: tree_sitter::Node, source: &str) -> Visibility {
285    let mut cursor = node.walk();
286    for child in node.children(&mut cursor) {
287        if child.kind() == "modifiers" {
288            let text = get_node_text(child, source);
289            if text.contains("public") {
290                return Visibility::Public;
291            } else if text.contains("private") {
292                return Visibility::Private;
293            } else if text.contains("protected") {
294                return Visibility::Public; // close enough for graph purposes
295            }
296        }
297    }
298    // Java default (package-private) — treat as public for graph
299    Visibility::Public
300}
301
302/// Check if a method/class has a specific annotation.
303fn has_annotation(node: tree_sitter::Node, source: &str, annotation: &str) -> bool {
304    let mut cursor = node.walk();
305    for child in node.children(&mut cursor) {
306        if child.kind() == "modifiers" {
307            let mut inner_cursor = child.walk();
308            for modifier in child.children(&mut inner_cursor) {
309                if modifier.kind() == "marker_annotation" || modifier.kind() == "annotation" {
310                    let text = get_node_text(modifier, source);
311                    if text.contains(annotation) {
312                        return true;
313                    }
314                }
315            }
316        }
317    }
318    false
319}