Skip to main content

agentic_codebase/parse/
java.rs

1//! Java parsing using tree-sitter.
2//!
3//! Extracts classes, interfaces, enums, methods, constructors, imports, packages.
4
5use std::path::Path;
6
7use crate::types::{AcbResult, CodeUnitType, Language, Visibility};
8
9use super::treesitter::{get_node_text, node_to_span};
10use super::{LanguageParser, RawCodeUnit};
11
12/// Java language parser.
13pub struct JavaParser;
14
15impl Default for JavaParser {
16    fn default() -> Self {
17        Self::new()
18    }
19}
20
21impl JavaParser {
22    /// Create a new Java parser.
23    pub fn new() -> Self {
24        Self
25    }
26
27    fn extract_from_node(
28        &self,
29        node: tree_sitter::Node,
30        source: &str,
31        file_path: &Path,
32        units: &mut Vec<RawCodeUnit>,
33        next_id: &mut u64,
34        parent_qname: &str,
35    ) {
36        let mut cursor = node.walk();
37        for child in node.children(&mut cursor) {
38            match child.kind() {
39                "class_declaration" => {
40                    self.extract_class(
41                        child,
42                        source,
43                        file_path,
44                        units,
45                        next_id,
46                        parent_qname,
47                        CodeUnitType::Type,
48                    );
49                }
50                "interface_declaration" => {
51                    self.extract_class(
52                        child,
53                        source,
54                        file_path,
55                        units,
56                        next_id,
57                        parent_qname,
58                        CodeUnitType::Trait,
59                    );
60                }
61                "enum_declaration" => {
62                    self.extract_class(
63                        child,
64                        source,
65                        file_path,
66                        units,
67                        next_id,
68                        parent_qname,
69                        CodeUnitType::Type,
70                    );
71                }
72                "record_declaration" => {
73                    self.extract_class(
74                        child,
75                        source,
76                        file_path,
77                        units,
78                        next_id,
79                        parent_qname,
80                        CodeUnitType::Type,
81                    );
82                }
83                "method_declaration" => {
84                    if let Some(unit) =
85                        self.extract_method(child, source, file_path, parent_qname, next_id)
86                    {
87                        units.push(unit);
88                    }
89                }
90                "constructor_declaration" => {
91                    if let Some(unit) =
92                        self.extract_method(child, source, file_path, parent_qname, next_id)
93                    {
94                        units.push(unit);
95                    }
96                }
97                "import_declaration" => {
98                    if let Some(unit) =
99                        self.extract_import(child, source, file_path, parent_qname, next_id)
100                    {
101                        units.push(unit);
102                    }
103                }
104                "package_declaration" => {
105                    // Captured as module-level metadata, skip as unit
106                }
107                _ => {}
108            }
109        }
110    }
111
112    #[allow(clippy::too_many_arguments)]
113    fn extract_class(
114        &self,
115        node: tree_sitter::Node,
116        source: &str,
117        file_path: &Path,
118        units: &mut Vec<RawCodeUnit>,
119        next_id: &mut u64,
120        parent_qname: &str,
121        unit_type: CodeUnitType,
122    ) {
123        let name = match node.child_by_field_name("name") {
124            Some(n) => get_node_text(n, source).to_string(),
125            None => return,
126        };
127        let qname = java_qname(parent_qname, &name);
128        let span = node_to_span(node);
129        let vis = extract_java_visibility(node, source);
130
131        let id = *next_id;
132        *next_id += 1;
133
134        let mut unit = RawCodeUnit::new(
135            unit_type,
136            Language::Java,
137            name,
138            file_path.to_path_buf(),
139            span,
140        );
141        unit.temp_id = id;
142        unit.qualified_name = qname.clone();
143        unit.visibility = vis;
144        units.push(unit);
145
146        // Recurse into the class body
147        if let Some(body) = node.child_by_field_name("body") {
148            self.extract_from_node(body, source, file_path, units, next_id, &qname);
149        }
150    }
151
152    fn extract_method(
153        &self,
154        node: tree_sitter::Node,
155        source: &str,
156        file_path: &Path,
157        parent_qname: &str,
158        next_id: &mut u64,
159    ) -> Option<RawCodeUnit> {
160        let name_node = node.child_by_field_name("name")?;
161        let name = get_node_text(name_node, source).to_string();
162        let qname = java_qname(parent_qname, &name);
163        let span = node_to_span(node);
164        let vis = extract_java_visibility(node, source);
165
166        let id = *next_id;
167        *next_id += 1;
168
169        let is_test = has_annotation(node, source, "Test")
170            || has_annotation(node, source, "ParameterizedTest")
171            || name.starts_with("test");
172        let unit_type = if is_test {
173            CodeUnitType::Test
174        } else {
175            CodeUnitType::Function
176        };
177
178        let mut unit =
179            RawCodeUnit::new(unit_type, Language::Java, name, file_path.to_path_buf(), span);
180        unit.temp_id = id;
181        unit.qualified_name = qname;
182        unit.visibility = vis;
183
184        Some(unit)
185    }
186
187    fn extract_import(
188        &self,
189        node: tree_sitter::Node,
190        source: &str,
191        file_path: &Path,
192        parent_qname: &str,
193        next_id: &mut u64,
194    ) -> Option<RawCodeUnit> {
195        let text = get_node_text(node, source)
196            .trim_start_matches("import ")
197            .trim_start_matches("static ")
198            .trim_end_matches(';')
199            .trim()
200            .to_string();
201        let span = node_to_span(node);
202
203        let id = *next_id;
204        *next_id += 1;
205
206        let mut unit = RawCodeUnit::new(
207            CodeUnitType::Import,
208            Language::Java,
209            text,
210            file_path.to_path_buf(),
211            span,
212        );
213        unit.temp_id = id;
214        unit.qualified_name = java_qname(parent_qname, "import");
215
216        Some(unit)
217    }
218}
219
220impl LanguageParser for JavaParser {
221    fn extract_units(
222        &self,
223        tree: &tree_sitter::Tree,
224        source: &str,
225        file_path: &Path,
226    ) -> AcbResult<Vec<RawCodeUnit>> {
227        let mut units = Vec::new();
228        let mut next_id = 0u64;
229
230        let module_name = file_path
231            .file_stem()
232            .and_then(|s| s.to_str())
233            .unwrap_or("unknown")
234            .to_string();
235
236        let root_span = node_to_span(tree.root_node());
237        let mut module_unit = RawCodeUnit::new(
238            CodeUnitType::Module,
239            Language::Java,
240            module_name.clone(),
241            file_path.to_path_buf(),
242            root_span,
243        );
244        module_unit.temp_id = next_id;
245        module_unit.qualified_name = module_name.clone();
246        next_id += 1;
247        units.push(module_unit);
248
249        self.extract_from_node(
250            tree.root_node(),
251            source,
252            file_path,
253            &mut units,
254            &mut next_id,
255            &module_name,
256        );
257
258        Ok(units)
259    }
260
261    fn is_test_file(&self, path: &Path, _source: &str) -> bool {
262        let name = path
263            .file_name()
264            .and_then(|n| n.to_str())
265            .unwrap_or("");
266        name.ends_with("Test.java")
267            || name.ends_with("Tests.java")
268            || name.starts_with("Test")
269            || name.ends_with("IT.java")
270    }
271}
272
273fn java_qname(parent: &str, name: &str) -> String {
274    if parent.is_empty() {
275        name.to_string()
276    } else {
277        format!("{}.{}", parent, name)
278    }
279}
280
281/// Extract visibility from Java modifiers.
282fn extract_java_visibility(node: tree_sitter::Node, source: &str) -> Visibility {
283    let mut cursor = node.walk();
284    for child in node.children(&mut cursor) {
285        if child.kind() == "modifiers" {
286            let text = get_node_text(child, source);
287            if text.contains("public") {
288                return Visibility::Public;
289            } else if text.contains("private") {
290                return Visibility::Private;
291            } else if text.contains("protected") {
292                return Visibility::Public; // close enough for graph purposes
293            }
294        }
295    }
296    // Java default (package-private) — treat as public for graph
297    Visibility::Public
298}
299
300/// Check if a method/class has a specific annotation.
301fn has_annotation(node: tree_sitter::Node, source: &str, annotation: &str) -> bool {
302    let mut cursor = node.walk();
303    for child in node.children(&mut cursor) {
304        if child.kind() == "modifiers" {
305            let mut inner_cursor = child.walk();
306            for modifier in child.children(&mut inner_cursor) {
307                if modifier.kind() == "marker_annotation" || modifier.kind() == "annotation" {
308                    let text = get_node_text(modifier, source);
309                    if text.contains(annotation) {
310                        return true;
311                    }
312                }
313            }
314        }
315    }
316    false
317}