Skip to main content

normalize_languages/
kotlin.rs

1//! Kotlin language support.
2
3use crate::{ContainerBody, Import, Language, LanguageSymbols, Visibility};
4use tree_sitter::Node;
5
6/// Kotlin language support.
7pub struct Kotlin;
8
9impl Kotlin {
10    /// Find the first type_identifier in a delegation_specifier subtree.
11    fn find_type_identifier(node: &Node, content: &str, out: &mut Vec<String>) {
12        let before = out.len();
13        if node.kind() == "type_identifier" {
14            out.push(content[node.byte_range()].to_string());
15            return;
16        }
17        let mut cursor = node.walk();
18        for child in node.children(&mut cursor) {
19            Self::find_type_identifier(&child, content, out);
20            if out.len() > before {
21                return;
22            }
23        }
24    }
25}
26
27impl Language for Kotlin {
28    fn name(&self) -> &'static str {
29        "Kotlin"
30    }
31    fn extensions(&self) -> &'static [&'static str] {
32        &["kt", "kts"]
33    }
34    fn grammar_name(&self) -> &'static str {
35        "kotlin"
36    }
37
38    fn as_symbols(&self) -> Option<&dyn LanguageSymbols> {
39        Some(self)
40    }
41
42    fn signature_suffix(&self) -> &'static str {
43        " {}"
44    }
45
46    fn extract_docstring(&self, node: &Node, content: &str) -> Option<String> {
47        extract_kdoc(node, content)
48    }
49
50    fn refine_kind(
51        &self,
52        node: &Node,
53        _content: &str,
54        tag_kind: crate::SymbolKind,
55    ) -> crate::SymbolKind {
56        if node.kind() == "class_declaration" {
57            // Kotlin uses class_declaration for class, interface, enum class.
58            // Distinguished by keyword children: "interface", "enum", "class".
59            let mut cursor = node.walk();
60            for child in node.children(&mut cursor) {
61                match child.kind() {
62                    "interface" => return crate::SymbolKind::Interface,
63                    "enum" => return crate::SymbolKind::Enum,
64                    // Stop before body/name to avoid scanning the entire tree
65                    "type_identifier" | "class_body" | "enum_class_body" => break,
66                    _ => {}
67                }
68            }
69        }
70        tag_kind
71    }
72
73    fn extract_implements(&self, node: &Node, content: &str) -> crate::ImplementsInfo {
74        let mut implements = Vec::new();
75        for i in 0..node.child_count() {
76            if let Some(child) = node.child(i as u32)
77                && child.kind() == "delegation_specifier"
78            {
79                Self::find_type_identifier(&child, content, &mut implements);
80            }
81        }
82        crate::ImplementsInfo {
83            is_interface: false,
84            implements,
85        }
86    }
87
88    fn build_signature(&self, node: &Node, content: &str) -> String {
89        let name = match self.node_name(node, content) {
90            Some(n) => n,
91            None => {
92                return content[node.byte_range()]
93                    .lines()
94                    .next()
95                    .unwrap_or("")
96                    .trim()
97                    .to_string();
98            }
99        };
100        match node.kind() {
101            "function_declaration" | "function_definition" => {
102                let params = node
103                    .child_by_field_name("value_parameters")
104                    .or_else(|| node.child_by_field_name("parameters"))
105                    .map(|p| content[p.byte_range()].to_string())
106                    .unwrap_or_else(|| "()".to_string());
107                let return_type = node
108                    .child_by_field_name("type")
109                    .map(|t| format!(": {}", content[t.byte_range()].trim()))
110                    .unwrap_or_default();
111                format!("fun {}{}{}", name, params, return_type)
112            }
113            "class_declaration" => format!("class {}", name),
114            "object_declaration" => format!("object {}", name),
115            "type_alias" => {
116                let target = node
117                    .child_by_field_name("type")
118                    .map(|t| content[t.byte_range()].to_string())
119                    .unwrap_or_default();
120                format!("typealias {} = {}", name, target)
121            }
122            _ => {
123                let text = &content[node.byte_range()];
124                text.lines().next().unwrap_or(text).trim().to_string()
125            }
126        }
127    }
128
129    fn extract_imports(&self, node: &Node, content: &str) -> Vec<Import> {
130        if node.kind() != "import_header" {
131            return Vec::new();
132        }
133
134        let line = node.start_position().row + 1;
135
136        // Get the import identifier
137        let mut cursor = node.walk();
138        for child in node.children(&mut cursor) {
139            if child.kind() == "identifier" || child.kind() == "user_type" {
140                let module = content[child.byte_range()].to_string();
141                let is_wildcard = content[node.byte_range()].contains(".*");
142                return vec![Import {
143                    module,
144                    names: Vec::new(),
145                    alias: None,
146                    is_wildcard,
147                    is_relative: false,
148                    line,
149                }];
150            }
151        }
152
153        Vec::new()
154    }
155
156    fn format_import(&self, import: &Import, _names: Option<&[&str]>) -> String {
157        // Kotlin: import pkg.Class or import pkg.*
158        if import.is_wildcard {
159            format!("import {}.*", import.module)
160        } else {
161            format!("import {}", import.module)
162        }
163    }
164
165    fn is_test_symbol(&self, symbol: &crate::Symbol) -> bool {
166        let has_test_attr = symbol.attributes.iter().any(|a| a.contains("@Test"));
167        if has_test_attr {
168            return true;
169        }
170        match symbol.kind {
171            crate::SymbolKind::Class => {
172                symbol.name.starts_with("Test") || symbol.name.ends_with("Test")
173            }
174            _ => false,
175        }
176    }
177
178    fn test_file_globs(&self) -> &'static [&'static str] {
179        &[
180            "**/src/test/**/*.kt",
181            "**/Test*.kt",
182            "**/*Test.kt",
183            "**/*Tests.kt",
184        ]
185    }
186
187    fn container_body<'a>(&self, node: &'a Node<'a>) -> Option<Node<'a>> {
188        node.child_by_field_name("class_body")
189            .or_else(|| node.child_by_field_name("body"))
190    }
191
192    fn analyze_container_body(
193        &self,
194        body_node: &Node,
195        content: &str,
196        inner_indent: &str,
197    ) -> Option<ContainerBody> {
198        crate::body::analyze_brace_body(body_node, content, inner_indent)
199    }
200
201    fn node_name<'a>(&self, node: &Node, content: &'a str) -> Option<&'a str> {
202        // Try "name" field first (most declarations)
203        if let Some(name_node) = node.child_by_field_name("name") {
204            return Some(&content[name_node.byte_range()]);
205        }
206        // Try first type_identifier (class/object declarations) or simple_identifier
207        for i in 0..node.child_count() {
208            if let Some(child) = node.child(i as u32)
209                && (child.kind() == "type_identifier" || child.kind() == "simple_identifier")
210            {
211                return Some(&content[child.byte_range()]);
212            }
213        }
214        None
215    }
216
217    fn extract_attributes(&self, node: &Node, content: &str) -> Vec<String> {
218        extract_kotlin_annotations(node, content)
219    }
220
221    fn get_visibility(&self, node: &Node, content: &str) -> Visibility {
222        let mut cursor = node.walk();
223        for child in node.children(&mut cursor) {
224            if child.kind() == "modifiers" {
225                let mods = &content[child.byte_range()];
226                if mods.contains("private") {
227                    return Visibility::Private;
228                }
229                if mods.contains("protected") {
230                    return Visibility::Protected;
231                }
232                if mods.contains("internal") {
233                    return Visibility::Protected;
234                } // internal ≈ protected for our purposes
235                if mods.contains("public") {
236                    return Visibility::Public;
237                }
238            }
239            // Also check visibility_modifier directly
240            if child.kind() == "visibility_modifier" {
241                let vis = &content[child.byte_range()];
242                if vis == "private" {
243                    return Visibility::Private;
244                }
245                if vis == "protected" {
246                    return Visibility::Protected;
247                }
248                if vis == "internal" {
249                    return Visibility::Protected;
250                }
251                if vis == "public" {
252                    return Visibility::Public;
253                }
254            }
255        }
256        // Kotlin default is public (unlike Java's package-private)
257        Visibility::Public
258    }
259}
260
261impl LanguageSymbols for Kotlin {}
262
263/// Extract a KDoc comment (`/** ... */`) preceding a node.
264///
265/// Walks backwards through siblings looking for a `multiline_comment` starting with `/**`.
266fn extract_kdoc(node: &Node, content: &str) -> Option<String> {
267    let mut prev = node.prev_sibling();
268    while let Some(sibling) = prev {
269        match sibling.kind() {
270            "multiline_comment" => {
271                let text = &content[sibling.byte_range()];
272                if text.starts_with("/**") {
273                    // Strip /** and */ and leading *
274                    let lines: Vec<&str> = text
275                        .strip_prefix("/**")
276                        .unwrap_or(text)
277                        .strip_suffix("*/")
278                        .unwrap_or(text)
279                        .lines()
280                        .map(|l| l.trim().strip_prefix("*").unwrap_or(l).trim())
281                        .filter(|l| !l.is_empty())
282                        .collect();
283                    if !lines.is_empty() {
284                        return Some(lines.join(" "));
285                    }
286                }
287                return None;
288            }
289            "line_comment" => {
290                // Skip single-line comments
291            }
292            _ => return None,
293        }
294        prev = sibling.prev_sibling();
295    }
296    None
297}
298
299/// Extract annotations from a Kotlin definition node.
300/// Kotlin annotations live inside a `modifiers` child (e.g. `@JvmStatic`, `@Deprecated`).
301fn extract_kotlin_annotations(node: &Node, content: &str) -> Vec<String> {
302    let mut attrs = Vec::new();
303    let mut cursor = node.walk();
304    for child in node.children(&mut cursor) {
305        if child.kind() == "modifiers" {
306            let mut mod_cursor = child.walk();
307            for mod_child in child.children(&mut mod_cursor) {
308                if mod_child.kind() == "annotation" {
309                    attrs.push(content[mod_child.byte_range()].to_string());
310                }
311            }
312        }
313    }
314    attrs
315}
316
317#[cfg(test)]
318mod tests {
319    use super::*;
320    use crate::validate_unused_kinds_audit;
321
322    /// Documents node kinds that exist in the Kotlin grammar but aren't used in trait methods.
323    /// Run `cross_check_node_kinds` in registry.rs to see all potentially useful kinds.
324    #[test]
325    fn unused_node_kinds_audit() {
326        #[rustfmt::skip]
327        let documented_unused: &[&str] = &[
328            // STRUCTURAL
329            "annotated_lambda",        // @Ann { }
330            "class_body",              // class body
331            "class_modifier",          // class modifiers
332            "class_parameter",         // class param
333            "constructor_delegation_call", // this(), super()  // constructor call
334            "control_structure_body",  // control body
335            "delegation_specifier",    // delegation              // enum value
336            "function_body",           // function body
337            "function_modifier",       // fun modifiers
338            "function_type_parameters",// (T) -> U params
339            "function_value_parameters", // fun params
340            "identifier",              // too common
341            "import_alias",            // import as
342            "import_list",             // imports
343            "inheritance_modifier",    // open, final
344            "interpolated_expression", // ${expr}
345            "interpolated_identifier", // $id
346            "lambda_parameters",       // lambda params
347            "member_modifier",         // member modifiers
348            "modifiers",               // modifiers
349            "multi_variable_declaration", // val (a, b)
350            "parameter_modifier",      // param modifiers
351            "parameter_modifiers",     // param modifiers list
352            "parameter_with_optional_type", // optional type param
353            "platform_modifier",       // expect, actual
354            "primary_constructor",     // primary constructor    // property
355            "property_modifier",       // property modifiers
356            "reification_modifier",    // reified
357            "secondary_constructor",   // secondary constructor       // simple id
358            "statements",              // statement list
359            "visibility_modifier",     // public, private
360
361            // EXPRESSION
362            "additive_expression",     // a + b
363            "as_expression",           // x as T         // foo()
364            "check_expression",        // is, !is
365            "comparison_expression",   // a < b
366            "directly_assignable_expression", // assignable
367            "equality_expression",     // a == b
368            "indexing_expression",     // arr[i]
369            "infix_expression",        // a infix b
370            "multiplicative_expression", // a * b   // a.b
371            "parenthesized_expression",// (expr)
372            "postfix_expression",      // x++
373            "prefix_expression",       // ++x
374            "range_expression",        // 0..10
375            "spread_expression",       // *arr
376            "super_expression",        // super
377            "this_expression",         // this
378            "wildcard_import",         // import.*
379
380            // TYPE
381            "function_type",           // (T) -> U
382            "not_nullable_type",       // T & Any
383            "nullable_type",           // T?
384            "parenthesized_type",      // (T)
385            "parenthesized_user_type", // (UserType)
386            "receiver_type",           // T.
387            "type_arguments",          // <T, U>
388            "type_constraint",         // T : Bound
389            "type_constraints",        // where clause         // type name
390            "type_modifiers",          // type modifiers
391            "type_parameter",          // T
392            "type_parameter_modifiers",// type param mods
393            "type_parameters",         // <T, U>
394            "type_projection",         // out T, in T
395            "type_projection_modifiers", // projection mods
396            "type_test",               // is T               // user-defined type
397            "variance_modifier",       // in, out
398
399            // OTHER
400            "finally_block",           // finally
401            // property_declaration and variable_declaration are intentionally excluded from
402            // tags.scm: the Kotlin grammar uses the same node kind for class-level properties
403            // AND local val/var declarations inside function bodies. Including them in tags
404            // causes collect_symbols_from_tags to fail because node_name() returns None for
405            // property_declaration (name is nested inside variable_declaration, not a direct
406            // "name" field), silently dropping all symbols in the file.
407            "property_declaration",
408            "variable_declaration",
409            // control flow — not extracted as symbols
410            "if_expression",
411            "anonymous_function",
412            "when_entry",
413            "conjunction_expression",
414            "disjunction_expression",
415            "while_statement",
416            "do_while_statement",
417            "enum_class_body",
418            "for_statement",
419            "import_header",
420            "elvis_expression",
421            "jump_expression",
422            "when_expression",
423            "try_expression",
424            "lambda_literal",
425            "catch_block",
426        ];
427
428        validate_unused_kinds_audit(&Kotlin, documented_unused)
429            .expect("Kotlin unused node kinds audit failed");
430    }
431}