Skip to main content

normalize_languages/
rust.rs

1//! Rust language support.
2
3use crate::{ContainerBody, Import, Language, LanguageSymbols, Visibility};
4use tree_sitter::Node;
5
6/// Rust language support.
7pub struct Rust;
8
9impl Language for Rust {
10    fn name(&self) -> &'static str {
11        "Rust"
12    }
13    fn extensions(&self) -> &'static [&'static str] {
14        &["rs"]
15    }
16    fn grammar_name(&self) -> &'static str {
17        "rust"
18    }
19
20    fn as_symbols(&self) -> Option<&dyn LanguageSymbols> {
21        Some(self)
22    }
23
24    fn signature_suffix(&self) -> &'static str {
25        " {}"
26    }
27
28    fn extract_docstring(&self, node: &Node, content: &str) -> Option<String> {
29        extract_docstring(node, content)
30    }
31
32    fn extract_attributes(&self, node: &Node, content: &str) -> Vec<String> {
33        extract_attributes(node, content)
34    }
35
36    fn extract_implements(&self, node: &Node, content: &str) -> crate::ImplementsInfo {
37        if node.kind() == "impl_item" {
38            let type_node = match node.child_by_field_name("type") {
39                Some(n) => n,
40                None => return crate::ImplementsInfo::default(),
41            };
42            let _ = &content[type_node.byte_range()]; // used below
43            let is_interface = node.child_by_field_name("trait").is_some();
44            let implements = if let Some(trait_node) = node.child_by_field_name("trait") {
45                vec![content[trait_node.byte_range()].to_string()]
46            } else {
47                Vec::new()
48            };
49            crate::ImplementsInfo {
50                is_interface,
51                implements,
52            }
53        } else {
54            crate::ImplementsInfo::default()
55        }
56    }
57
58    fn refine_kind(
59        &self,
60        node: &Node,
61        _content: &str,
62        tag_kind: crate::SymbolKind,
63    ) -> crate::SymbolKind {
64        match node.kind() {
65            "struct_item" => crate::SymbolKind::Struct,
66            "enum_item" => crate::SymbolKind::Enum,
67            "type_item" => crate::SymbolKind::Type,
68            "union_item" => crate::SymbolKind::Struct,
69            "trait_item" => crate::SymbolKind::Trait,
70            _ => tag_kind,
71        }
72    }
73
74    fn build_signature(&self, node: &Node, content: &str) -> String {
75        match node.kind() {
76            "function_item" | "function_signature_item" => {
77                let name = match self.node_name(node, content) {
78                    Some(n) => n,
79                    None => {
80                        return content[node.byte_range()]
81                            .lines()
82                            .next()
83                            .unwrap_or("")
84                            .trim()
85                            .to_string();
86                    }
87                };
88                let vis = self.extract_visibility_prefix(node, content);
89                let params = node
90                    .child_by_field_name("parameters")
91                    .map(|p| content[p.byte_range()].to_string())
92                    .unwrap_or_else(|| "()".to_string());
93                let return_type = node
94                    .child_by_field_name("return_type")
95                    .map(|r| format!(" -> {}", &content[r.byte_range()]))
96                    .unwrap_or_default();
97                format!("{}fn {}{}{}", vis, name, params, return_type)
98            }
99            "impl_item" => {
100                let type_node = node.child_by_field_name("type");
101                let type_name = type_node
102                    .map(|n| content[n.byte_range()].to_string())
103                    .unwrap_or_default();
104                if let Some(trait_node) = node.child_by_field_name("trait") {
105                    let trait_name = &content[trait_node.byte_range()];
106                    format!("impl {} for {}", trait_name, type_name)
107                } else {
108                    format!("impl {}", type_name)
109                }
110            }
111            "trait_item" => {
112                let name = self.node_name(node, content).unwrap_or("");
113                let vis = self.extract_visibility_prefix(node, content);
114                format!("{}trait {}", vis, name)
115            }
116            "mod_item" => {
117                let name = self.node_name(node, content).unwrap_or("");
118                let vis = self.extract_visibility_prefix(node, content);
119                format!("{}mod {}", vis, name)
120            }
121            "struct_item" => {
122                let name = self.node_name(node, content).unwrap_or("");
123                let vis = self.extract_visibility_prefix(node, content);
124                format!("{}struct {}", vis, name)
125            }
126            "enum_item" => {
127                let name = self.node_name(node, content).unwrap_or("");
128                let vis = self.extract_visibility_prefix(node, content);
129                format!("{}enum {}", vis, name)
130            }
131            "type_item" => {
132                let name = self.node_name(node, content).unwrap_or("");
133                let vis = self.extract_visibility_prefix(node, content);
134                format!("{}type {}", vis, name)
135            }
136            _ => {
137                let text = &content[node.byte_range()];
138                text.lines().next().unwrap_or(text).trim().to_string()
139            }
140        }
141    }
142
143    fn extract_imports(&self, node: &Node, content: &str) -> Vec<Import> {
144        if node.kind() != "use_declaration" {
145            return Vec::new();
146        }
147
148        let line = node.start_position().row + 1;
149        let text = &content[node.byte_range()];
150        let module = text.trim_start_matches("use ").trim_end_matches(';').trim();
151
152        // Check for braced imports: use foo::{bar, baz}
153        let mut names = Vec::new();
154        let is_relative = module.starts_with("crate")
155            || module.starts_with("self")
156            || module.starts_with("super");
157
158        if let Some(brace_start) = module.find('{') {
159            let prefix = module[..brace_start].trim_end_matches("::");
160            // Find matching closing brace using depth counter to handle nested groups
161            // like `use std::{io::{Read, Write}, fs}`.
162            let brace_end = {
163                let mut depth = 0u32;
164                let mut end = None;
165                for (i, c) in module[brace_start..].char_indices() {
166                    match c {
167                        '{' => depth += 1,
168                        '}' => {
169                            depth -= 1;
170                            if depth == 0 {
171                                end = Some(brace_start + i);
172                                break;
173                            }
174                        }
175                        _ => {}
176                    }
177                }
178                end
179            };
180            if let Some(brace_end) = brace_end {
181                let items = &module[brace_start + 1..brace_end];
182                for item in items.split(',') {
183                    let trimmed = item.trim();
184                    if !trimmed.is_empty() {
185                        names.push(trimmed.to_string());
186                    }
187                }
188            }
189            vec![Import {
190                module: prefix.to_string(),
191                names,
192                alias: None,
193                is_wildcard: false,
194                is_relative,
195                line,
196            }]
197        } else {
198            // Simple import: use foo::bar or use foo::bar as baz
199            let (module_part, alias) = if let Some(as_pos) = module.find(" as ") {
200                (&module[..as_pos], Some(module[as_pos + 4..].to_string()))
201            } else {
202                (module, None)
203            };
204
205            vec![Import {
206                module: module_part.to_string(),
207                names: Vec::new(),
208                alias,
209                is_wildcard: module_part.ends_with("::*"),
210                is_relative,
211                line,
212            }]
213        }
214    }
215
216    fn format_import(&self, import: &Import, names: Option<&[&str]>) -> String {
217        let names_to_use: Vec<&str> = names
218            .map(|n| n.to_vec())
219            .unwrap_or_else(|| import.names.iter().map(|s| s.as_str()).collect());
220
221        if import.is_wildcard {
222            // Module already contains ::* from parsing
223            format!("use {};", import.module)
224        } else if names_to_use.is_empty() {
225            format!("use {};", import.module)
226        } else if names_to_use.len() == 1 {
227            format!("use {}::{};", import.module, names_to_use[0])
228        } else {
229            format!("use {}::{{{}}};", import.module, names_to_use.join(", "))
230        }
231    }
232
233    fn get_visibility(&self, node: &Node, content: &str) -> Visibility {
234        let mut cursor = node.walk();
235        for child in node.children(&mut cursor) {
236            if child.kind() == "visibility_modifier" {
237                let vis = &content[child.byte_range()];
238                if vis == "pub" {
239                    return Visibility::Public;
240                } else if vis.starts_with("pub(crate)") {
241                    return Visibility::Internal;
242                } else if vis.starts_with("pub(super)") || vis.starts_with("pub(in") {
243                    return Visibility::Protected;
244                }
245            }
246        }
247        Visibility::Private
248    }
249
250    fn is_test_symbol(&self, symbol: &crate::Symbol) -> bool {
251        let in_attrs = symbol
252            .attributes
253            .iter()
254            .any(|a| a.contains("#[test]") || a.contains("#[cfg(test)]"));
255        let in_sig =
256            symbol.signature.contains("#[test]") || symbol.signature.contains("#[cfg(test)]");
257        if in_attrs || in_sig {
258            return true;
259        }
260        match symbol.kind {
261            crate::SymbolKind::Function | crate::SymbolKind::Method => {
262                symbol.name.starts_with("test_")
263            }
264            crate::SymbolKind::Module => symbol.name == "tests",
265            _ => false,
266        }
267    }
268
269    fn test_file_globs(&self) -> &'static [&'static str] {
270        &[
271            "**/tests/**",
272            "**/test_*.rs",
273            "**/*_test.rs",
274            "**/*_tests.rs",
275        ]
276    }
277
278    fn container_body<'a>(&self, node: &'a Node<'a>) -> Option<Node<'a>> {
279        node.child_by_field_name("body")
280    }
281
282    fn analyze_container_body(
283        &self,
284        body_node: &Node,
285        content: &str,
286        inner_indent: &str,
287    ) -> Option<ContainerBody> {
288        crate::body::analyze_brace_body(body_node, content, inner_indent)
289    }
290
291    fn node_name<'a>(&self, node: &Node, content: &'a str) -> Option<&'a str> {
292        // impl_item uses "type" field; trait_item and mod_item use "name"
293        let name_node = node
294            .child_by_field_name("name")
295            .or_else(|| node.child_by_field_name("type"))?;
296        Some(&content[name_node.byte_range()])
297    }
298
299    fn extract_module_doc(&self, src: &str) -> Option<String> {
300        extract_rust_module_doc(src)
301    }
302}
303
304impl LanguageSymbols for Rust {}
305
306impl Rust {
307    fn extract_visibility_prefix(&self, node: &Node, content: &str) -> String {
308        let mut cursor = node.walk();
309        for child in node.children(&mut cursor) {
310            if child.kind() == "visibility_modifier" {
311                return format!("{} ", &content[child.byte_range()]);
312            }
313        }
314        String::new()
315    }
316}
317
318/// Extract a Rust doc comment from a node's `attributes` child.
319///
320/// Looks for `line_outer_doc_comment` nodes (`///`) and joins their text.
321fn extract_docstring(node: &Node, content: &str) -> Option<String> {
322    let mut cursor = node.walk();
323    for child in node.children(&mut cursor) {
324        if child.kind() == "attributes" {
325            let mut doc_lines = Vec::new();
326            let mut attr_cursor = child.walk();
327            for attr_child in child.children(&mut attr_cursor) {
328                if attr_child.kind() == "line_outer_doc_comment" {
329                    let text = &content[attr_child.byte_range()];
330                    let doc = text.trim_start_matches("///").trim();
331                    if !doc.is_empty() {
332                        doc_lines.push(doc.to_string());
333                    }
334                }
335            }
336            if !doc_lines.is_empty() {
337                return Some(doc_lines.join("\n"));
338            }
339        }
340    }
341    None
342}
343
344/// Extract Rust `#[...]` attribute items from a node.
345///
346/// Checks both the `attributes` child field and preceding sibling `attribute_item` nodes.
347fn extract_attributes(node: &Node, content: &str) -> Vec<String> {
348    let mut attrs = Vec::new();
349
350    // Check for attributes child (e.g., #[test], #[cfg(test)])
351    if let Some(attr_node) = node.child_by_field_name("attributes") {
352        let mut cursor = attr_node.walk();
353        for child in attr_node.children(&mut cursor) {
354            if child.kind() == "attribute_item" {
355                attrs.push(content[child.byte_range()].to_string());
356            }
357        }
358    }
359
360    // Also check preceding siblings for outer attributes
361    let mut prev = node.prev_sibling();
362    while let Some(sibling) = prev {
363        if sibling.kind() == "attribute_item" {
364            // Insert at beginning to maintain order
365            attrs.insert(0, content[sibling.byte_range()].to_string());
366            prev = sibling.prev_sibling();
367        } else {
368            break;
369        }
370    }
371
372    attrs
373}
374
375/// Extract the module-level doc comment from Rust source.
376///
377/// Collects consecutive `//!` inner-doc comment lines from the top of the file,
378/// stopping at the first line that is not a `//!` comment (ignoring blank lines).
379fn extract_rust_module_doc(src: &str) -> Option<String> {
380    let mut lines = Vec::new();
381    for line in src.lines() {
382        let trimmed = line.trim();
383        if trimmed.starts_with("//!") {
384            let text = trimmed.strip_prefix("//!").unwrap_or("").trim_start();
385            lines.push(text.to_string());
386        } else if trimmed.is_empty() && lines.is_empty() {
387            // skip leading blank lines
388        } else {
389            break;
390        }
391    }
392    if lines.is_empty() {
393        return None;
394    }
395    // Strip trailing empty lines from the collected doc
396    while lines.last().map(|l: &String| l.is_empty()).unwrap_or(false) {
397        lines.pop();
398    }
399    if lines.is_empty() {
400        None
401    } else {
402        Some(lines.join("\n"))
403    }
404}
405
406#[cfg(test)]
407mod tests {
408    use super::*;
409    use crate::validate_unused_kinds_audit;
410
411    /// Documents node kinds that exist in the Rust grammar but aren't used in trait methods.
412    /// Run `cross_check_node_kinds` in registry.rs to see all potentially useful kinds.
413    #[test]
414    fn unused_node_kinds_audit() {
415        // Categories:
416        // - STRUCTURAL: Internal/wrapper nodes
417        // - CLAUSE: Sub-parts of larger constructs
418        // - EXPRESSION: Expressions (we track statements/definitions)
419        // - TYPE: Type-related nodes
420        // - MODIFIER: Visibility/async/unsafe modifiers
421        // - PATTERN: Pattern matching internals
422        // - MACRO: Macro-related nodes
423        // - MAYBE: Potentially useful
424
425        #[rustfmt::skip]
426        let documented_unused: &[&str] = &[
427            // STRUCTURAL
428            "block_comment",           // comments        // extern block contents
429            "field_declaration",       // struct field
430            "field_declaration_list",  // struct body
431            "field_expression",        // foo.bar
432            "field_identifier",        // field name
433            "identifier",              // too common
434            "lifetime",                // 'a
435            "lifetime_parameter",      // <'a>
436            "ordered_field_declaration_list", // tuple struct fields
437            "scoped_identifier",       // path::to::thing
438            "scoped_type_identifier",  // path::to::Type
439            "shorthand_field_identifier", // struct init shorthand
440            "type_identifier",         // type names
441            "visibility_modifier",     // pub, pub(crate)
442
443            // CLAUSE
444            "else_clause",             // part of if
445            "enum_variant",            // enum variant
446            "enum_variant_list",       // enum body
447            "match_block",             // match body
448            "match_pattern",           // match arm pattern
449            "trait_bounds",            // T: Foo + Bar
450            "where_clause",            // where T: Foo
451
452            // EXPRESSION
453            "array_expression",        // [1, 2, 3]
454            "assignment_expression",   // x = y
455            "async_block",             // async { }
456            "await_expression",        // foo.await         // foo()
457            "generic_function",        // foo::<T>()
458            "index_expression",        // arr[i]
459            "parenthesized_expression",// (expr)
460            "range_expression",        // 0..10
461            "reference_expression",    // &x
462            "struct_expression",       // Foo { x: 1 }
463            "try_expression",          // foo?
464            "tuple_expression",        // (a, b)
465            "type_cast_expression",    // x as T
466            "unary_expression",        // -x, !x
467            "unit_expression",         // ()
468            "yield_expression",        // yield x
469
470            // TYPE
471            "abstract_type",           // impl Trait
472            "array_type",              // [T; N]
473            "bounded_type",            // T: Foo
474            "bracketed_type",          // <T>
475            "dynamic_type",            // dyn Trait
476            "function_type",           // fn(T) -> U
477            "generic_type",            // Vec<T>
478            "generic_type_with_turbofish", // Vec::<T>
479            "higher_ranked_trait_bound", // for<'a>
480            "never_type",              // !
481            "pointer_type",            // *const T
482            "primitive_type",          // i32, bool
483            "qualified_type",          // <T as Trait>::Item
484            "reference_type",          // &T
485            "removed_trait_bound",     // ?Sized
486            "tuple_type",              // (A, B)
487            "type_arguments",          // <T, U>
488            "type_binding",            // Item = T
489            "type_parameter",          // T
490            "type_parameters",         // <T, U>
491            "unit_type",               // ()
492            "unsafe_bound_type",       // unsafe trait bound
493
494            // MODIFIER
495            "block_outer_doc_comment", // //!
496            "extern_modifier",         // extern "C"
497            "function_modifiers",      // async, const, unsafe
498            "mutable_specifier",       // mut
499
500            // PATTERN
501            "struct_pattern",          // Foo { x, y }
502            "tuple_struct_pattern",    // Foo(x, y)
503
504            // MACRO
505            "fragment_specifier",      // $x:expr
506            "macro_arguments_declaration", // macro args
507            "macro_body_v2",           // macro body        // macro_rules!
508            "macro_definition_v2",     // macro 2.0
509
510            // OTHER
511            "block_expression_with_attribute", // #[attr] { }
512            "const_block",             // const { }
513            "expression_statement",    // expr;
514            "expression_with_attribute", // #[attr] expr
515            "extern_crate_declaration",// extern crate
516            "foreign_mod_item",        // extern block item
517            "function_signature_item", // fn signature in trait
518            "gen_block",               // gen { }
519            "let_declaration",         // let x = y
520            "try_block",               // try { }
521            "unsafe_block",            // unsafe { }
522            "use_as_clause",           // use foo as bar
523            "empty_statement",         // ;
524            // control flow — not extracted as symbols
525            "closure_expression",
526            "continue_expression",
527            "match_expression",
528            "use_declaration",
529            "for_expression",
530            "match_arm",
531            "break_expression",
532            "while_expression",
533            "loop_expression",
534            "return_expression",
535            "if_expression",
536            "block",
537            "binary_expression",
538        ];
539
540        validate_unused_kinds_audit(&Rust, documented_unused)
541            .expect("Rust unused node kinds audit failed");
542    }
543}