Skip to main content

normalize_languages/
r.rs

1//! R language support.
2
3use crate::external_packages::ResolvedPackage;
4use crate::{Export, Import, Language, Symbol, SymbolKind, Visibility, VisibilityMechanism};
5use std::path::{Path, PathBuf};
6use tree_sitter::Node;
7
8/// R language support.
9pub struct R;
10
11impl Language for R {
12    fn name(&self) -> &'static str {
13        "R"
14    }
15    fn extensions(&self) -> &'static [&'static str] {
16        &["r", "R", "rmd", "Rmd"]
17    }
18    fn grammar_name(&self) -> &'static str {
19        "r"
20    }
21
22    fn has_symbols(&self) -> bool {
23        true
24    }
25
26    fn container_kinds(&self) -> &'static [&'static str] {
27        &[]
28    }
29
30    fn function_kinds(&self) -> &'static [&'static str] {
31        &["function_definition"]
32    }
33
34    fn type_kinds(&self) -> &'static [&'static str] {
35        &[]
36    }
37
38    fn import_kinds(&self) -> &'static [&'static str] {
39        &["call"] // library(), require()
40    }
41
42    fn public_symbol_kinds(&self) -> &'static [&'static str] {
43        &["binary_operator"] // assignments in R are binary operators
44    }
45
46    fn visibility_mechanism(&self) -> VisibilityMechanism {
47        VisibilityMechanism::NamingConvention // . prefix for internal
48    }
49
50    fn extract_public_symbols(&self, node: &Node, content: &str) -> Vec<Export> {
51        // Look for assignments like: foo <- function(...) or foo = function(...)
52        // In R grammar, these are binary_operator nodes
53        if node.kind() != "binary_operator" {
54            return Vec::new();
55        }
56
57        // Check if it's an assignment (contains <- or =)
58        let text = &content[node.byte_range()];
59        if !text.contains("<-") && !text.contains("=") {
60            return Vec::new();
61        }
62
63        let name = match node.child(0).map(|n| &content[n.byte_range()]) {
64            Some(n) => n.to_string(),
65            None => return Vec::new(),
66        };
67
68        // Check if RHS is a function
69        let rhs = node.child(2);
70        let is_function = rhs.map_or(false, |n| n.kind() == "function_definition");
71
72        if !is_function {
73            return Vec::new();
74        }
75
76        // . prefix is internal by convention
77        if name.starts_with('.') {
78            return Vec::new();
79        }
80
81        vec![Export {
82            name,
83            kind: SymbolKind::Function,
84            line: node.start_position().row + 1,
85        }]
86    }
87
88    fn scope_creating_kinds(&self) -> &'static [&'static str] {
89        &["function_definition", "braced_expression"]
90    }
91
92    fn control_flow_kinds(&self) -> &'static [&'static str] {
93        &[
94            "if_statement",
95            "for_statement",
96            "while_statement",
97            "repeat_statement",
98        ]
99    }
100
101    fn complexity_nodes(&self) -> &'static [&'static str] {
102        &["if_statement", "for_statement", "while_statement"]
103    }
104
105    fn nesting_nodes(&self) -> &'static [&'static str] {
106        &[
107            "function_definition",
108            "if_statement",
109            "for_statement",
110            "braced_expression",
111        ]
112    }
113
114    fn signature_suffix(&self) -> &'static str {
115        ""
116    }
117
118    fn extract_function(&self, node: &Node, content: &str, _in_container: bool) -> Option<Symbol> {
119        // R functions are typically assigned: name <- function(...) {}
120        // We need to look at the parent assignment (binary_operator in R grammar)
121        let parent = node.parent()?;
122        if parent.kind() != "binary_operator" {
123            return None;
124        }
125
126        let name = parent
127            .child(0)
128            .map(|n| content[n.byte_range()].to_string())?;
129        let text = &content[parent.byte_range()];
130        let first_line = text.lines().next().unwrap_or(text);
131
132        Some(Symbol {
133            name: name.clone(),
134            kind: SymbolKind::Function,
135            signature: first_line.trim().to_string(),
136            docstring: self.extract_docstring(&parent, content),
137            attributes: Vec::new(),
138            start_line: parent.start_position().row + 1,
139            end_line: parent.end_position().row + 1,
140            visibility: if name.starts_with('.') {
141                Visibility::Private
142            } else {
143                Visibility::Public
144            },
145            children: Vec::new(),
146            is_interface_impl: false,
147            implements: Vec::new(),
148        })
149    }
150
151    fn extract_container(&self, _node: &Node, _content: &str) -> Option<Symbol> {
152        None
153    }
154    fn extract_type(&self, _node: &Node, _content: &str) -> Option<Symbol> {
155        None
156    }
157
158    fn extract_docstring(&self, node: &Node, content: &str) -> Option<String> {
159        // R uses # for comments, roxygen2 uses #' for docs
160        let mut prev = node.prev_sibling();
161        let mut doc_lines = Vec::new();
162
163        while let Some(sibling) = prev {
164            let text = &content[sibling.byte_range()];
165            if sibling.kind() == "comment" {
166                if text.starts_with("#'") {
167                    let line = text.strip_prefix("#'").unwrap_or(text).trim();
168                    if !line.starts_with('@') {
169                        doc_lines.push(line.to_string());
170                    }
171                }
172                prev = sibling.prev_sibling();
173            } else {
174                break;
175            }
176        }
177
178        if doc_lines.is_empty() {
179            return None;
180        }
181
182        doc_lines.reverse();
183        Some(doc_lines.join(" "))
184    }
185
186    fn extract_attributes(&self, _node: &Node, _content: &str) -> Vec<String> {
187        Vec::new()
188    }
189
190    fn extract_imports(&self, node: &Node, content: &str) -> Vec<Import> {
191        if node.kind() != "call" {
192            return Vec::new();
193        }
194
195        let text = &content[node.byte_range()];
196        if !text.starts_with("library(") && !text.starts_with("require(") {
197            return Vec::new();
198        }
199
200        // Extract package name from library(pkg) or require(pkg)
201        let inner = text
202            .split('(')
203            .nth(1)
204            .and_then(|s| s.split(')').next())
205            .map(|s| s.trim().trim_matches('"').trim_matches('\'').to_string());
206
207        if let Some(module) = inner {
208            return vec![Import {
209                module,
210                names: Vec::new(),
211                alias: None,
212                is_wildcard: true,
213                is_relative: false,
214                line: node.start_position().row + 1,
215            }];
216        }
217
218        Vec::new()
219    }
220
221    fn format_import(&self, import: &Import, _names: Option<&[&str]>) -> String {
222        // R: library(package)
223        format!("library({})", import.module)
224    }
225
226    fn is_public(&self, node: &Node, content: &str) -> bool {
227        node.child(0)
228            .map_or(true, |n| !content[n.byte_range()].starts_with('.'))
229    }
230
231    fn get_visibility(&self, node: &Node, content: &str) -> Visibility {
232        if self.is_public(node, content) {
233            Visibility::Public
234        } else {
235            Visibility::Private
236        }
237    }
238
239    fn is_test_symbol(&self, symbol: &crate::Symbol) -> bool {
240        let name = symbol.name.as_str();
241        match symbol.kind {
242            crate::SymbolKind::Function | crate::SymbolKind::Method => name.starts_with("test_"),
243            crate::SymbolKind::Module => name == "tests" || name == "test",
244            _ => false,
245        }
246    }
247
248    fn embedded_content(&self, _node: &Node, _content: &str) -> Option<crate::EmbeddedBlock> {
249        None
250    }
251
252    fn container_body<'a>(&self, _node: &'a Node<'a>) -> Option<Node<'a>> {
253        None
254    }
255    fn body_has_docstring(&self, _body: &Node, _content: &str) -> bool {
256        false
257    }
258    fn node_name<'a>(&self, _node: &Node, _content: &'a str) -> Option<&'a str> {
259        None
260    }
261
262    fn file_path_to_module_name(&self, path: &Path) -> Option<String> {
263        let ext = path.extension()?.to_str()?.to_lowercase();
264        if ext != "r" {
265            return None;
266        }
267        let stem = path.file_stem()?.to_str()?;
268        Some(stem.to_string())
269    }
270
271    fn module_name_to_paths(&self, module: &str) -> Vec<String> {
272        vec![format!("{}.R", module), format!("{}.r", module)]
273    }
274
275    fn lang_key(&self) -> &'static str {
276        "r"
277    }
278
279    fn is_stdlib_import(&self, import_name: &str, _project_root: &Path) -> bool {
280        matches!(
281            import_name,
282            "base"
283                | "stats"
284                | "graphics"
285                | "grDevices"
286                | "utils"
287                | "datasets"
288                | "methods"
289                | "grid"
290                | "tools"
291                | "compiler"
292        )
293    }
294
295    fn find_stdlib(&self, _project_root: &Path) -> Option<PathBuf> {
296        None
297    }
298
299    fn resolve_local_import(
300        &self,
301        import: &str,
302        _current_file: &Path,
303        project_root: &Path,
304    ) -> Option<PathBuf> {
305        let full = project_root.join("R").join(format!("{}.R", import));
306        if full.is_file() { Some(full) } else { None }
307    }
308
309    fn resolve_external_import(
310        &self,
311        _import_name: &str,
312        _project_root: &Path,
313    ) -> Option<ResolvedPackage> {
314        None
315    }
316
317    fn get_version(&self, project_root: &Path) -> Option<String> {
318        if project_root.join("DESCRIPTION").is_file() {
319            return Some("R package".to_string());
320        }
321        None
322    }
323
324    fn find_package_cache(&self, _project_root: &Path) -> Option<PathBuf> {
325        // R library paths
326        if let Some(home) = std::env::var_os("HOME") {
327            let lib = PathBuf::from(home).join("R/library");
328            if lib.is_dir() {
329                return Some(lib);
330            }
331        }
332        None
333    }
334
335    fn indexable_extensions(&self) -> &'static [&'static str] {
336        &["r", "R"]
337    }
338    fn package_sources(&self, _project_root: &Path) -> Vec<crate::PackageSource> {
339        Vec::new()
340    }
341
342    fn should_skip_package_entry(&self, name: &str, is_dir: bool) -> bool {
343        use crate::traits::skip_dotfiles;
344        if skip_dotfiles(name) {
345            return true;
346        }
347        if is_dir && (name == "man" || name == "inst") {
348            return true;
349        }
350        !is_dir && !name.to_lowercase().ends_with(".r")
351    }
352
353    fn discover_packages(&self, _source: &crate::PackageSource) -> Vec<(String, PathBuf)> {
354        Vec::new()
355    }
356
357    fn package_module_name(&self, entry_name: &str) -> String {
358        entry_name
359            .strip_suffix(".R")
360            .or_else(|| entry_name.strip_suffix(".r"))
361            .unwrap_or(entry_name)
362            .to_string()
363    }
364
365    fn find_package_entry(&self, path: &Path) -> Option<PathBuf> {
366        if path.is_file() {
367            Some(path.to_path_buf())
368        } else {
369            None
370        }
371    }
372}
373
374#[cfg(test)]
375mod tests {
376    use super::*;
377    use crate::validate_unused_kinds_audit;
378
379    #[test]
380    fn unused_node_kinds_audit() {
381        #[rustfmt::skip]
382        let documented_unused: &[&str] = &[
383            "extract_operator", "identifier",
384            "namespace_operator", "parenthesized_expression", "return", "unary_operator",
385        ];
386        validate_unused_kinds_audit(&R, documented_unused)
387            .expect("R unused node kinds audit failed");
388    }
389}