Skip to main content

normalize_languages/
java.rs

1//! Java language support.
2
3use crate::traits::{ImportSpec, ModuleId, ModuleResolver, Resolution, ResolverConfig};
4use crate::{ContainerBody, Import, Language, LanguageSymbols, Visibility};
5use std::path::Path;
6use tree_sitter::Node;
7
8/// Java language support.
9pub struct Java;
10
11impl Language for Java {
12    fn name(&self) -> &'static str {
13        "Java"
14    }
15    fn extensions(&self) -> &'static [&'static str] {
16        &["java"]
17    }
18    fn grammar_name(&self) -> &'static str {
19        "java"
20    }
21
22    fn as_symbols(&self) -> Option<&dyn LanguageSymbols> {
23        Some(self)
24    }
25
26    fn signature_suffix(&self) -> &'static str {
27        " {}"
28    }
29
30    fn extract_docstring(&self, node: &Node, content: &str) -> Option<String> {
31        extract_javadoc(node, content)
32    }
33
34    fn extract_attributes(&self, node: &Node, content: &str) -> Vec<String> {
35        extract_annotations(node, content)
36    }
37
38    fn refine_kind(
39        &self,
40        node: &Node,
41        _content: &str,
42        tag_kind: crate::SymbolKind,
43    ) -> crate::SymbolKind {
44        match node.kind() {
45            "enum_declaration" => crate::SymbolKind::Enum,
46            "interface_declaration" | "annotation_type_declaration" => crate::SymbolKind::Interface,
47            "record_declaration" => crate::SymbolKind::Struct,
48            _ => tag_kind,
49        }
50    }
51
52    fn extract_implements(&self, node: &Node, content: &str) -> crate::ImplementsInfo {
53        let mut implements = Vec::new();
54        let mut cursor = node.walk();
55        for child in node.children(&mut cursor) {
56            if child.kind() == "superclass" {
57                let mut sc = child.walk();
58                for t in child.children(&mut sc) {
59                    if t.kind() == "type_identifier" {
60                        implements.push(content[t.byte_range()].to_string());
61                    }
62                }
63            } else if child.kind() == "super_interfaces" {
64                let mut si = child.walk();
65                for list in child.children(&mut si) {
66                    if list.kind() == "type_list" {
67                        let mut tc = list.walk();
68                        for t in list.children(&mut tc) {
69                            if t.kind() == "type_identifier" {
70                                implements.push(content[t.byte_range()].to_string());
71                            }
72                        }
73                    }
74                }
75            }
76        }
77        crate::ImplementsInfo {
78            is_interface: node.kind() == "interface_declaration",
79            implements,
80        }
81    }
82
83    fn build_signature(&self, node: &Node, content: &str) -> String {
84        let name = match self.node_name(node, content) {
85            Some(n) => n,
86            None => {
87                return content[node.byte_range()]
88                    .lines()
89                    .next()
90                    .unwrap_or("")
91                    .trim()
92                    .to_string();
93            }
94        };
95        match node.kind() {
96            "method_declaration" | "constructor_declaration" => {
97                let params = node
98                    .child_by_field_name("parameters")
99                    .map(|p| content[p.byte_range()].to_string())
100                    .unwrap_or_else(|| "()".to_string());
101                format!("{}{}", name, params)
102            }
103            "class_declaration" => format!("class {}", name),
104            "interface_declaration" => format!("interface {}", name),
105            "enum_declaration" => format!("enum {}", name),
106            _ => {
107                let text = &content[node.byte_range()];
108                text.lines().next().unwrap_or(text).trim().to_string()
109            }
110        }
111    }
112
113    fn extract_imports(&self, node: &Node, content: &str) -> Vec<Import> {
114        if node.kind() != "import_declaration" {
115            return Vec::new();
116        }
117
118        let line = node.start_position().row + 1;
119        let text = &content[node.byte_range()];
120
121        // Extract import path
122        let is_static = text.contains("static ");
123        let is_wildcard = text.contains(".*");
124
125        // Get the scoped_identifier
126        let mut cursor = node.walk();
127        for child in node.children(&mut cursor) {
128            if child.kind() == "scoped_identifier" || child.kind() == "identifier" {
129                let module = content[child.byte_range()].to_string();
130                return vec![Import {
131                    module,
132                    names: Vec::new(),
133                    alias: if is_static {
134                        Some("static".to_string())
135                    } else {
136                        None
137                    },
138                    is_wildcard,
139                    is_relative: false,
140                    line,
141                }];
142            }
143        }
144
145        Vec::new()
146    }
147
148    fn format_import(&self, import: &Import, _names: Option<&[&str]>) -> String {
149        // Java: import pkg.Class; or import pkg.*;
150        if import.is_wildcard {
151            format!("import {}.*;", import.module)
152        } else {
153            format!("import {};", import.module)
154        }
155    }
156
157    fn is_test_symbol(&self, symbol: &crate::Symbol) -> bool {
158        let has_test_attr = symbol.attributes.iter().any(|a| a.contains("@Test"));
159        if has_test_attr {
160            return true;
161        }
162        match symbol.kind {
163            crate::SymbolKind::Class => {
164                symbol.name.starts_with("Test") || symbol.name.ends_with("Test")
165            }
166            _ => false,
167        }
168    }
169
170    fn test_file_globs(&self) -> &'static [&'static str] {
171        &[
172            "**/src/test/**/*.java",
173            "**/Test*.java",
174            "**/*Test.java",
175            "**/*Tests.java",
176        ]
177    }
178
179    fn container_body<'a>(&self, node: &'a Node<'a>) -> Option<Node<'a>> {
180        node.child_by_field_name("body")
181    }
182
183    fn analyze_container_body(
184        &self,
185        body_node: &Node,
186        content: &str,
187        inner_indent: &str,
188    ) -> Option<ContainerBody> {
189        crate::body::analyze_brace_body(body_node, content, inner_indent)
190    }
191
192    fn get_visibility(&self, node: &Node, content: &str) -> Visibility {
193        let mut cursor = node.walk();
194        for child in node.children(&mut cursor) {
195            if child.kind() == "modifiers" {
196                let mods = &content[child.byte_range()];
197                if mods.contains("private") {
198                    return Visibility::Private;
199                }
200                if mods.contains("protected") {
201                    return Visibility::Protected;
202                }
203                // public or no modifier = visible in skeleton
204                return Visibility::Public;
205            }
206        }
207        // No modifier = package-private, but still visible for skeleton purposes
208        Visibility::Public
209    }
210
211    fn module_resolver(&self) -> Option<&dyn ModuleResolver> {
212        static RESOLVER: JavaModuleResolver = JavaModuleResolver;
213        Some(&RESOLVER)
214    }
215}
216
217impl LanguageSymbols for Java {}
218
219// =============================================================================
220// Java Module Resolver
221// =============================================================================
222
223/// Module resolver for Java (Maven/Gradle conventions).
224///
225/// Java package = directory hierarchy. `com.example.Foo` lives at
226/// `src/main/java/com/example/Foo.java` (or `src/test/java/...`).
227pub struct JavaModuleResolver;
228
229/// Source directory prefixes to search under workspace root.
230const JAVA_SRC_DIRS: &[&str] = &["src/main/java", "src/test/java", ""];
231
232impl ModuleResolver for JavaModuleResolver {
233    fn workspace_config(&self, root: &Path) -> ResolverConfig {
234        ResolverConfig {
235            workspace_root: root.to_path_buf(),
236            path_mappings: Vec::new(),
237            search_roots: JAVA_SRC_DIRS.iter().map(|d| root.join(d)).collect(),
238        }
239    }
240
241    fn module_of_file(&self, _root: &Path, file: &Path, cfg: &ResolverConfig) -> Vec<ModuleId> {
242        let ext = file.extension().and_then(|e| e.to_str()).unwrap_or("");
243        if ext != "java" {
244            return Vec::new();
245        }
246        for search_root in &cfg.search_roots {
247            if let Ok(rel) = file.strip_prefix(search_root) {
248                let rel_str = rel
249                    .to_str()
250                    .unwrap_or("")
251                    .trim_end_matches(".java")
252                    .replace(['/', '\\'], ".");
253                if !rel_str.is_empty() {
254                    return vec![ModuleId {
255                        canonical_path: rel_str,
256                    }];
257                }
258            }
259        }
260        Vec::new()
261    }
262
263    fn resolve(&self, from_file: &Path, spec: &ImportSpec, cfg: &ResolverConfig) -> Resolution {
264        let ext = from_file.extension().and_then(|e| e.to_str()).unwrap_or("");
265        if ext != "java" {
266            return Resolution::NotApplicable;
267        }
268
269        let raw = &spec.raw;
270        // Convert dotted package to path: com.example.Foo → com/example/Foo.java
271        let path_part = raw.replace('.', "/");
272        let file_name = format!("{}.java", path_part);
273        let exported_name = raw.rsplit('.').next().unwrap_or(raw).to_string();
274
275        for search_root in &cfg.search_roots {
276            let candidate = search_root.join(&file_name);
277            if candidate.exists() {
278                return Resolution::Resolved(candidate, exported_name);
279            }
280        }
281
282        Resolution::NotFound
283    }
284}
285
286/// Extract a JavaDoc comment (`/** ... */`) preceding a node.
287///
288/// Walks backwards through siblings looking for a `block_comment` starting with `/**`.
289fn extract_javadoc(node: &Node, content: &str) -> Option<String> {
290    let mut prev = node.prev_sibling();
291    while let Some(sibling) = prev {
292        match sibling.kind() {
293            "block_comment" => {
294                let text = &content[sibling.byte_range()];
295                if text.starts_with("/**") {
296                    return Some(clean_block_doc_comment(text));
297                }
298                return None;
299            }
300            "line_comment" => {
301                // Skip line comments, keep looking for a block comment
302            }
303            "modifiers" | "marker_annotation" | "annotation" => {
304                // Skip annotations/modifiers between doc comment and declaration
305            }
306            _ => return None,
307        }
308        prev = sibling.prev_sibling();
309    }
310    None
311}
312
313/// Clean a `/** ... */` block doc comment into plain text.
314fn clean_block_doc_comment(text: &str) -> String {
315    let lines: Vec<&str> = text
316        .strip_prefix("/**")
317        .unwrap_or(text)
318        .strip_suffix("*/")
319        .unwrap_or(text)
320        .lines()
321        .map(|l| l.trim().strip_prefix('*').unwrap_or(l).trim())
322        .filter(|l| !l.is_empty())
323        .collect();
324    lines.join(" ")
325}
326
327/// Extract annotations from a Java definition node.
328fn extract_annotations(node: &Node, content: &str) -> Vec<String> {
329    let mut attrs = Vec::new();
330    if let Some(modifiers) = node.child_by_field_name("modifiers").or_else(|| {
331        let mut cursor = node.walk();
332        node.children(&mut cursor).find(|c| c.kind() == "modifiers")
333    }) {
334        let mut cursor = modifiers.walk();
335        for child in modifiers.children(&mut cursor) {
336            if child.kind() == "marker_annotation" || child.kind() == "annotation" {
337                attrs.push(content[child.byte_range()].to_string());
338            }
339        }
340    }
341    attrs
342}
343
344#[cfg(test)]
345mod tests {
346    use super::*;
347    use crate::validate_unused_kinds_audit;
348
349    /// Documents node kinds that exist in the Java grammar but aren't used in trait methods.
350    /// Run `cross_check_node_kinds` in registry.rs to see all potentially useful kinds.
351    #[test]
352    fn unused_node_kinds_audit() {
353        #[rustfmt::skip]
354        let documented_unused: &[&str] = &[
355            // STRUCTURAL
356            "block_comment",           // comments
357            "class_body",              // class body
358            "class_literal",           // Foo.class
359            "constructor_body",        // constructor body
360            "enum_body",               // enum body
361            "enum_body_declarations",  // enum body decls
362            "enum_constant",           // enum value
363            "field_declaration",       // field decl
364            "formal_parameter",        // method param
365            "formal_parameters",       // param list
366            "identifier",              // too common
367            "interface_body",          // interface body
368            "modifiers",               // access modifiers
369            "scoped_identifier",       // pkg.Class
370            "scoped_type_identifier",  // pkg.Type              // extends
371            "super_interfaces",        // implements         // type name
372
373            // CLAUSE
374            "catch_formal_parameter",  // catch param
375            "catch_type",              // catch type
376            "extends_interfaces",      // extends for interfaces
377            "finally_clause",          // finally block
378            "switch_block",            // switch body
379            "switch_block_statement_group", // case group
380            "throws",                  // throws clause
381
382            // EXPRESSION
383            "array_creation_expression", // new T[]
384            "assignment_expression",   // x = y
385            "cast_expression",         // (T)x
386            "instanceof_expression",   // x instanceof T
387            "lambda_expression",       // x -> y       // obj.method()
388            "method_reference",        // Class::method // new Foo()
389            "parenthesized_expression",// (expr)
390            "template_expression",     // string template
391            "unary_expression",        // -x, !x
392            "update_expression",       // x++
393            "yield_statement",         // yield x
394
395            // TYPE
396            "annotated_type",          // @Ann Type
397            "array_type",              // T[]
398            "boolean_type",            // boolean
399            "floating_point_type",     // float, double
400            "generic_type",            // T<U>
401            "integral_type",           // int, long
402            "type_arguments",          // <T, U>
403            "type_bound",              // T extends X               // T, U, V
404            "type_parameter",          // T
405            "type_parameters",         // <T, U>
406            "type_pattern",            // type pattern
407            "void_type",               // void
408
409            // DECLARATION
410            "annotation_type_body",    // @interface body
411            "annotation_type_declaration", // @interface
412            "annotation_type_element_declaration", // @interface element
413            "assert_statement",        // assert
414            "compact_constructor_declaration", // record constructor
415            "constant_declaration",    // const decl
416            "explicit_constructor_invocation", // this(), super()
417            "expression_statement",    // expr;
418            "labeled_statement",       // label: stmt
419            "local_variable_declaration", // local var
420            "record_declaration",      // record
421            "record_pattern_body",     // record pattern
422
423            // MODULE
424            "exports_module_directive",// exports
425            "module_body",             // module body
426            "module_declaration",      // module
427            "opens_module_directive",  // opens
428            "package_declaration",     // package
429            "provides_module_directive", // provides
430            "requires_modifier",       // requires modifier
431            "requires_module_directive", // requires
432            "uses_module_directive",   // uses
433
434            // OTHER
435            "resource_specification", // try-with-resources
436            "synchronized_statement",  // synchronized
437            "try_with_resources_statement", // try-with
438            // control flow — not extracted as symbols
439            "do_statement",
440            "return_statement",
441            "constructor_declaration",
442            "binary_expression",
443            "try_statement",
444            "continue_statement",
445            "switch_expression",
446            "ternary_expression",
447            "while_statement",
448            "break_statement",
449            "enhanced_for_statement",
450            "import_declaration",
451            "for_statement",
452            "block",
453            "throw_statement",
454            "catch_clause",
455            "if_statement",
456        ];
457
458        validate_unused_kinds_audit(&Java, documented_unused)
459            .expect("Java unused node kinds audit failed");
460    }
461}