Skip to main content

normalize_languages/
java.rs

1//! Java language support.
2
3use crate::{ContainerBody, Import, Language, LanguageSymbols, Visibility};
4use tree_sitter::Node;
5
6/// Java language support.
7pub struct Java;
8
9impl Language for Java {
10    fn name(&self) -> &'static str {
11        "Java"
12    }
13    fn extensions(&self) -> &'static [&'static str] {
14        &["java"]
15    }
16    fn grammar_name(&self) -> &'static str {
17        "java"
18    }
19
20    fn as_symbols(&self) -> Option<&dyn LanguageSymbols> {
21        Some(self)
22    }
23
24    fn signature_suffix(&self) -> &'static str {
25        " {}"
26    }
27
28    fn extract_docstring(&self, node: &Node, content: &str) -> Option<String> {
29        extract_javadoc(node, content)
30    }
31
32    fn extract_attributes(&self, node: &Node, content: &str) -> Vec<String> {
33        extract_annotations(node, content)
34    }
35
36    fn refine_kind(
37        &self,
38        node: &Node,
39        _content: &str,
40        tag_kind: crate::SymbolKind,
41    ) -> crate::SymbolKind {
42        match node.kind() {
43            "enum_declaration" => crate::SymbolKind::Enum,
44            "interface_declaration" | "annotation_type_declaration" => crate::SymbolKind::Interface,
45            "record_declaration" => crate::SymbolKind::Struct,
46            _ => tag_kind,
47        }
48    }
49
50    fn extract_implements(&self, node: &Node, content: &str) -> crate::ImplementsInfo {
51        let mut implements = Vec::new();
52        let mut cursor = node.walk();
53        for child in node.children(&mut cursor) {
54            if child.kind() == "superclass" {
55                let mut sc = child.walk();
56                for t in child.children(&mut sc) {
57                    if t.kind() == "type_identifier" {
58                        implements.push(content[t.byte_range()].to_string());
59                    }
60                }
61            } else if child.kind() == "super_interfaces" {
62                let mut si = child.walk();
63                for list in child.children(&mut si) {
64                    if list.kind() == "type_list" {
65                        let mut tc = list.walk();
66                        for t in list.children(&mut tc) {
67                            if t.kind() == "type_identifier" {
68                                implements.push(content[t.byte_range()].to_string());
69                            }
70                        }
71                    }
72                }
73            }
74        }
75        crate::ImplementsInfo {
76            is_interface: node.kind() == "interface_declaration",
77            implements,
78        }
79    }
80
81    fn build_signature(&self, node: &Node, content: &str) -> String {
82        let name = match self.node_name(node, content) {
83            Some(n) => n,
84            None => {
85                return content[node.byte_range()]
86                    .lines()
87                    .next()
88                    .unwrap_or("")
89                    .trim()
90                    .to_string();
91            }
92        };
93        match node.kind() {
94            "method_declaration" | "constructor_declaration" => {
95                let params = node
96                    .child_by_field_name("parameters")
97                    .map(|p| content[p.byte_range()].to_string())
98                    .unwrap_or_else(|| "()".to_string());
99                format!("{}{}", name, params)
100            }
101            "class_declaration" => format!("class {}", name),
102            "interface_declaration" => format!("interface {}", name),
103            "enum_declaration" => format!("enum {}", name),
104            _ => {
105                let text = &content[node.byte_range()];
106                text.lines().next().unwrap_or(text).trim().to_string()
107            }
108        }
109    }
110
111    fn extract_imports(&self, node: &Node, content: &str) -> Vec<Import> {
112        if node.kind() != "import_declaration" {
113            return Vec::new();
114        }
115
116        let line = node.start_position().row + 1;
117        let text = &content[node.byte_range()];
118
119        // Extract import path
120        let is_static = text.contains("static ");
121        let is_wildcard = text.contains(".*");
122
123        // Get the scoped_identifier
124        let mut cursor = node.walk();
125        for child in node.children(&mut cursor) {
126            if child.kind() == "scoped_identifier" || child.kind() == "identifier" {
127                let module = content[child.byte_range()].to_string();
128                return vec![Import {
129                    module,
130                    names: Vec::new(),
131                    alias: if is_static {
132                        Some("static".to_string())
133                    } else {
134                        None
135                    },
136                    is_wildcard,
137                    is_relative: false,
138                    line,
139                }];
140            }
141        }
142
143        Vec::new()
144    }
145
146    fn format_import(&self, import: &Import, _names: Option<&[&str]>) -> String {
147        // Java: import pkg.Class; or import pkg.*;
148        if import.is_wildcard {
149            format!("import {}.*;", import.module)
150        } else {
151            format!("import {};", import.module)
152        }
153    }
154
155    fn is_test_symbol(&self, symbol: &crate::Symbol) -> bool {
156        let has_test_attr = symbol.attributes.iter().any(|a| a.contains("@Test"));
157        if has_test_attr {
158            return true;
159        }
160        match symbol.kind {
161            crate::SymbolKind::Class => {
162                symbol.name.starts_with("Test") || symbol.name.ends_with("Test")
163            }
164            _ => false,
165        }
166    }
167
168    fn test_file_globs(&self) -> &'static [&'static str] {
169        &[
170            "**/src/test/**/*.java",
171            "**/Test*.java",
172            "**/*Test.java",
173            "**/*Tests.java",
174        ]
175    }
176
177    fn container_body<'a>(&self, node: &'a Node<'a>) -> Option<Node<'a>> {
178        node.child_by_field_name("body")
179    }
180
181    fn analyze_container_body(
182        &self,
183        body_node: &Node,
184        content: &str,
185        inner_indent: &str,
186    ) -> Option<ContainerBody> {
187        crate::body::analyze_brace_body(body_node, content, inner_indent)
188    }
189
190    fn get_visibility(&self, node: &Node, content: &str) -> Visibility {
191        let mut cursor = node.walk();
192        for child in node.children(&mut cursor) {
193            if child.kind() == "modifiers" {
194                let mods = &content[child.byte_range()];
195                if mods.contains("private") {
196                    return Visibility::Private;
197                }
198                if mods.contains("protected") {
199                    return Visibility::Protected;
200                }
201                // public or no modifier = visible in skeleton
202                return Visibility::Public;
203            }
204        }
205        // No modifier = package-private, but still visible for skeleton purposes
206        Visibility::Public
207    }
208}
209
210impl LanguageSymbols for Java {}
211
212/// Extract a JavaDoc comment (`/** ... */`) preceding a node.
213///
214/// Walks backwards through siblings looking for a `block_comment` starting with `/**`.
215fn extract_javadoc(node: &Node, content: &str) -> Option<String> {
216    let mut prev = node.prev_sibling();
217    while let Some(sibling) = prev {
218        match sibling.kind() {
219            "block_comment" => {
220                let text = &content[sibling.byte_range()];
221                if text.starts_with("/**") {
222                    return Some(clean_block_doc_comment(text));
223                }
224                return None;
225            }
226            "line_comment" => {
227                // Skip line comments, keep looking for a block comment
228            }
229            "modifiers" | "marker_annotation" | "annotation" => {
230                // Skip annotations/modifiers between doc comment and declaration
231            }
232            _ => return None,
233        }
234        prev = sibling.prev_sibling();
235    }
236    None
237}
238
239/// Clean a `/** ... */` block doc comment into plain text.
240fn clean_block_doc_comment(text: &str) -> String {
241    let lines: Vec<&str> = text
242        .strip_prefix("/**")
243        .unwrap_or(text)
244        .strip_suffix("*/")
245        .unwrap_or(text)
246        .lines()
247        .map(|l| l.trim().strip_prefix('*').unwrap_or(l).trim())
248        .filter(|l| !l.is_empty())
249        .collect();
250    lines.join(" ")
251}
252
253/// Extract annotations from a Java definition node.
254fn extract_annotations(node: &Node, content: &str) -> Vec<String> {
255    let mut attrs = Vec::new();
256    if let Some(modifiers) = node.child_by_field_name("modifiers").or_else(|| {
257        let mut cursor = node.walk();
258        node.children(&mut cursor).find(|c| c.kind() == "modifiers")
259    }) {
260        let mut cursor = modifiers.walk();
261        for child in modifiers.children(&mut cursor) {
262            if child.kind() == "marker_annotation" || child.kind() == "annotation" {
263                attrs.push(content[child.byte_range()].to_string());
264            }
265        }
266    }
267    attrs
268}
269
270#[cfg(test)]
271mod tests {
272    use super::*;
273    use crate::validate_unused_kinds_audit;
274
275    /// Documents node kinds that exist in the Java grammar but aren't used in trait methods.
276    /// Run `cross_check_node_kinds` in registry.rs to see all potentially useful kinds.
277    #[test]
278    fn unused_node_kinds_audit() {
279        #[rustfmt::skip]
280        let documented_unused: &[&str] = &[
281            // STRUCTURAL
282            "block_comment",           // comments
283            "class_body",              // class body
284            "class_literal",           // Foo.class
285            "constructor_body",        // constructor body
286            "enum_body",               // enum body
287            "enum_body_declarations",  // enum body decls
288            "enum_constant",           // enum value
289            "field_declaration",       // field decl
290            "formal_parameter",        // method param
291            "formal_parameters",       // param list
292            "identifier",              // too common
293            "interface_body",          // interface body
294            "modifiers",               // access modifiers
295            "scoped_identifier",       // pkg.Class
296            "scoped_type_identifier",  // pkg.Type              // extends
297            "super_interfaces",        // implements         // type name
298
299            // CLAUSE
300            "catch_formal_parameter",  // catch param
301            "catch_type",              // catch type
302            "extends_interfaces",      // extends for interfaces
303            "finally_clause",          // finally block
304            "switch_block",            // switch body
305            "switch_block_statement_group", // case group
306            "throws",                  // throws clause
307
308            // EXPRESSION
309            "array_creation_expression", // new T[]
310            "assignment_expression",   // x = y
311            "cast_expression",         // (T)x
312            "instanceof_expression",   // x instanceof T
313            "lambda_expression",       // x -> y       // obj.method()
314            "method_reference",        // Class::method // new Foo()
315            "parenthesized_expression",// (expr)
316            "template_expression",     // string template
317            "unary_expression",        // -x, !x
318            "update_expression",       // x++
319            "yield_statement",         // yield x
320
321            // TYPE
322            "annotated_type",          // @Ann Type
323            "array_type",              // T[]
324            "boolean_type",            // boolean
325            "floating_point_type",     // float, double
326            "generic_type",            // T<U>
327            "integral_type",           // int, long
328            "type_arguments",          // <T, U>
329            "type_bound",              // T extends X               // T, U, V
330            "type_parameter",          // T
331            "type_parameters",         // <T, U>
332            "type_pattern",            // type pattern
333            "void_type",               // void
334
335            // DECLARATION
336            "annotation_type_body",    // @interface body
337            "annotation_type_declaration", // @interface
338            "annotation_type_element_declaration", // @interface element
339            "assert_statement",        // assert
340            "compact_constructor_declaration", // record constructor
341            "constant_declaration",    // const decl
342            "explicit_constructor_invocation", // this(), super()
343            "expression_statement",    // expr;
344            "labeled_statement",       // label: stmt
345            "local_variable_declaration", // local var
346            "record_declaration",      // record
347            "record_pattern_body",     // record pattern
348
349            // MODULE
350            "exports_module_directive",// exports
351            "module_body",             // module body
352            "module_declaration",      // module
353            "opens_module_directive",  // opens
354            "package_declaration",     // package
355            "provides_module_directive", // provides
356            "requires_modifier",       // requires modifier
357            "requires_module_directive", // requires
358            "uses_module_directive",   // uses
359
360            // OTHER
361            "resource_specification", // try-with-resources
362            "synchronized_statement",  // synchronized
363            "try_with_resources_statement", // try-with
364            // control flow — not extracted as symbols
365            "do_statement",
366            "return_statement",
367            "constructor_declaration",
368            "binary_expression",
369            "try_statement",
370            "continue_statement",
371            "switch_expression",
372            "ternary_expression",
373            "while_statement",
374            "break_statement",
375            "enhanced_for_statement",
376            "import_declaration",
377            "for_statement",
378            "block",
379            "throw_statement",
380            "catch_clause",
381            "if_statement",
382        ];
383
384        validate_unused_kinds_audit(&Java, documented_unused)
385            .expect("Java unused node kinds audit failed");
386    }
387}