Skip to main content

normalize_languages/
perl.rs

1//! Perl language support.
2
3use crate::external_packages::ResolvedPackage;
4use crate::{Export, Import, Language, Symbol, SymbolKind, Visibility, VisibilityMechanism};
5use std::path::{Path, PathBuf};
6use tree_sitter::Node;
7
8/// Perl language support.
9pub struct Perl;
10
11impl Language for Perl {
12    fn name(&self) -> &'static str {
13        "Perl"
14    }
15    fn extensions(&self) -> &'static [&'static str] {
16        &["pl", "pm", "t"]
17    }
18    fn grammar_name(&self) -> &'static str {
19        "perl"
20    }
21
22    fn has_symbols(&self) -> bool {
23        true
24    }
25
26    fn container_kinds(&self) -> &'static [&'static str] {
27        &["package_statement"]
28    }
29
30    fn function_kinds(&self) -> &'static [&'static str] {
31        &["subroutine_declaration_statement"]
32    }
33
34    fn type_kinds(&self) -> &'static [&'static str] {
35        &[]
36    }
37
38    fn import_kinds(&self) -> &'static [&'static str] {
39        &["use_statement", "require_expression"]
40    }
41
42    fn public_symbol_kinds(&self) -> &'static [&'static str] {
43        &["subroutine_declaration_statement"]
44    }
45
46    fn visibility_mechanism(&self) -> VisibilityMechanism {
47        VisibilityMechanism::NamingConvention // _ prefix for private
48    }
49
50    fn extract_public_symbols(&self, node: &Node, content: &str) -> Vec<Export> {
51        let name = match self.node_name(node, content) {
52            Some(n) => n.to_string(),
53            None => return Vec::new(),
54        };
55
56        // _ prefix is conventionally private
57        if name.starts_with('_') {
58            return Vec::new();
59        }
60
61        vec![Export {
62            name,
63            kind: SymbolKind::Function,
64            line: node.start_position().row + 1,
65        }]
66    }
67
68    fn scope_creating_kinds(&self) -> &'static [&'static str] {
69        &["block", "subroutine_declaration_statement"]
70    }
71
72    fn control_flow_kinds(&self) -> &'static [&'static str] {
73        &[
74            "conditional_statement",
75            "loop_statement",
76            "for_statement",
77            "cstyle_for_statement",
78        ]
79    }
80
81    fn complexity_nodes(&self) -> &'static [&'static str] {
82        &[
83            "conditional_statement",
84            "loop_statement",
85            "for_statement",
86            "conditional_expression",
87        ]
88    }
89
90    fn nesting_nodes(&self) -> &'static [&'static str] {
91        &[
92            "subroutine_declaration_statement",
93            "conditional_statement",
94            "loop_statement",
95            "block",
96        ]
97    }
98
99    fn signature_suffix(&self) -> &'static str {
100        ""
101    }
102
103    fn extract_function(&self, node: &Node, content: &str, _in_container: bool) -> Option<Symbol> {
104        let name = self.node_name(node, content)?;
105        let text = &content[node.byte_range()];
106        let first_line = text.lines().next().unwrap_or(text);
107
108        Some(Symbol {
109            name: name.to_string(),
110            kind: SymbolKind::Function,
111            signature: first_line.trim().to_string(),
112            docstring: self.extract_docstring(node, content),
113            attributes: Vec::new(),
114            start_line: node.start_position().row + 1,
115            end_line: node.end_position().row + 1,
116            visibility: if name.starts_with('_') {
117                Visibility::Private
118            } else {
119                Visibility::Public
120            },
121            children: Vec::new(),
122            is_interface_impl: false,
123            implements: Vec::new(),
124        })
125    }
126
127    fn extract_container(&self, node: &Node, content: &str) -> Option<Symbol> {
128        if node.kind() != "package_statement" {
129            return None;
130        }
131
132        let text = &content[node.byte_range()];
133        let name = text
134            .strip_prefix("package ")
135            .and_then(|s| s.split(';').next())
136            .map(|s| s.trim().to_string())
137            .unwrap_or_else(|| "main".to_string());
138
139        Some(Symbol {
140            name: name.clone(),
141            kind: SymbolKind::Module,
142            signature: format!("package {}", name),
143            docstring: None,
144            attributes: Vec::new(),
145            start_line: node.start_position().row + 1,
146            end_line: node.end_position().row + 1,
147            visibility: Visibility::Public,
148            children: Vec::new(),
149            is_interface_impl: false,
150            implements: Vec::new(),
151        })
152    }
153
154    fn extract_type(&self, _node: &Node, _content: &str) -> Option<Symbol> {
155        None
156    }
157
158    fn extract_docstring(&self, node: &Node, content: &str) -> Option<String> {
159        // Perl uses # for comments, POD for docs
160        let mut prev = node.prev_sibling();
161        let mut doc_lines = Vec::new();
162
163        while let Some(sibling) = prev {
164            let text = &content[sibling.byte_range()];
165            if sibling.kind() == "comment" && text.starts_with('#') {
166                let line = text.strip_prefix('#').unwrap_or(text).trim();
167                doc_lines.push(line.to_string());
168                prev = sibling.prev_sibling();
169            } else {
170                break;
171            }
172        }
173
174        if doc_lines.is_empty() {
175            return None;
176        }
177
178        doc_lines.reverse();
179        Some(doc_lines.join(" "))
180    }
181
182    fn extract_attributes(&self, _node: &Node, _content: &str) -> Vec<String> {
183        Vec::new()
184    }
185
186    fn extract_imports(&self, node: &Node, content: &str) -> Vec<Import> {
187        let text = &content[node.byte_range()];
188        let line = node.start_position().row + 1;
189
190        // use Module::Name;
191        // require Module::Name;
192        let module = if let Some(rest) = text.strip_prefix("use ") {
193            rest.split(|c| c == ';' || c == ' ').next()
194        } else if let Some(rest) = text.strip_prefix("require ") {
195            rest.split(|c| c == ';' || c == ' ').next()
196        } else {
197            None
198        };
199
200        if let Some(module) = module {
201            let module = module.trim().to_string();
202            return vec![Import {
203                module: module.clone(),
204                names: Vec::new(),
205                alias: None,
206                is_wildcard: false,
207                is_relative: false,
208                line,
209            }];
210        }
211
212        Vec::new()
213    }
214
215    fn format_import(&self, import: &Import, names: Option<&[&str]>) -> String {
216        // Perl: use Module; or use Module qw(a b c);
217        let names_to_use: Vec<&str> = names
218            .map(|n| n.to_vec())
219            .unwrap_or_else(|| import.names.iter().map(|s| s.as_str()).collect());
220        if names_to_use.is_empty() {
221            format!("use {};", import.module)
222        } else {
223            format!("use {} qw({});", import.module, names_to_use.join(" "))
224        }
225    }
226
227    fn is_public(&self, node: &Node, content: &str) -> bool {
228        self.node_name(node, content)
229            .map_or(true, |n| !n.starts_with('_'))
230    }
231
232    fn get_visibility(&self, node: &Node, content: &str) -> Visibility {
233        if self.is_public(node, content) {
234            Visibility::Public
235        } else {
236            Visibility::Private
237        }
238    }
239
240    fn is_test_symbol(&self, symbol: &crate::Symbol) -> bool {
241        let name = symbol.name.as_str();
242        match symbol.kind {
243            crate::SymbolKind::Function | crate::SymbolKind::Method => name.starts_with("test_"),
244            crate::SymbolKind::Module => name == "tests" || name == "test",
245            _ => false,
246        }
247    }
248
249    fn embedded_content(&self, _node: &Node, _content: &str) -> Option<crate::EmbeddedBlock> {
250        None
251    }
252
253    fn container_body<'a>(&self, node: &'a Node<'a>) -> Option<Node<'a>> {
254        node.child_by_field_name("body")
255    }
256
257    fn body_has_docstring(&self, _body: &Node, _content: &str) -> bool {
258        false
259    }
260
261    fn node_name<'a>(&self, node: &Node, content: &'a str) -> Option<&'a str> {
262        node.child_by_field_name("name")
263            .map(|n| &content[n.byte_range()])
264    }
265
266    fn file_path_to_module_name(&self, path: &Path) -> Option<String> {
267        let ext = path.extension()?.to_str()?;
268        if !["pl", "pm"].contains(&ext) {
269            return None;
270        }
271        let stem = path.file_stem()?.to_str()?;
272        Some(stem.to_string())
273    }
274
275    fn module_name_to_paths(&self, module: &str) -> Vec<String> {
276        let path = module.replace("::", "/");
277        vec![format!("{}.pm", path), format!("{}.pl", path)]
278    }
279
280    fn lang_key(&self) -> &'static str {
281        "perl"
282    }
283
284    fn is_stdlib_import(&self, import_name: &str, _project_root: &Path) -> bool {
285        // Core Perl modules
286        import_name == "strict"
287            || import_name == "warnings"
288            || import_name.starts_with("File::")
289            || import_name.starts_with("IO::")
290            || import_name.starts_with("Data::")
291            || import_name.starts_with("Carp")
292    }
293
294    fn find_stdlib(&self, _project_root: &Path) -> Option<PathBuf> {
295        None
296    }
297
298    fn resolve_local_import(
299        &self,
300        import: &str,
301        _current_file: &Path,
302        project_root: &Path,
303    ) -> Option<PathBuf> {
304        let path = import.replace("::", "/");
305        let full = project_root.join("lib").join(format!("{}.pm", path));
306        if full.is_file() { Some(full) } else { None }
307    }
308
309    fn resolve_external_import(
310        &self,
311        _import_name: &str,
312        _project_root: &Path,
313    ) -> Option<ResolvedPackage> {
314        None
315    }
316
317    fn get_version(&self, project_root: &Path) -> Option<String> {
318        if project_root.join("cpanfile").is_file() {
319            return Some("cpan".to_string());
320        }
321        if project_root.join("Makefile.PL").is_file() {
322            return Some("ExtUtils::MakeMaker".to_string());
323        }
324        None
325    }
326
327    fn find_package_cache(&self, _project_root: &Path) -> Option<PathBuf> {
328        None
329    }
330    fn indexable_extensions(&self) -> &'static [&'static str] {
331        &["pl", "pm"]
332    }
333    fn package_sources(&self, _project_root: &Path) -> Vec<crate::PackageSource> {
334        Vec::new()
335    }
336
337    fn should_skip_package_entry(&self, name: &str, is_dir: bool) -> bool {
338        use crate::traits::{has_extension, skip_dotfiles};
339        if skip_dotfiles(name) {
340            return true;
341        }
342        if is_dir && name == "blib" {
343            return true;
344        }
345        !is_dir && !has_extension(name, self.indexable_extensions())
346    }
347
348    fn discover_packages(&self, _source: &crate::PackageSource) -> Vec<(String, PathBuf)> {
349        Vec::new()
350    }
351
352    fn package_module_name(&self, entry_name: &str) -> String {
353        entry_name
354            .strip_suffix(".pm")
355            .or_else(|| entry_name.strip_suffix(".pl"))
356            .unwrap_or(entry_name)
357            .to_string()
358    }
359
360    fn find_package_entry(&self, path: &Path) -> Option<PathBuf> {
361        if path.is_file() {
362            Some(path.to_path_buf())
363        } else {
364            None
365        }
366    }
367}
368
369#[cfg(test)]
370mod tests {
371    use super::*;
372    use crate::validate_unused_kinds_audit;
373
374    #[test]
375    fn unused_node_kinds_audit() {
376        #[rustfmt::skip]
377        let documented_unused: &[&str] = &[
378            "ambiguous_function_call_expression", "amper_deref_expression",
379            "anonymous_array_expression", "anonymous_hash_expression",
380            "anonymous_method_expression", "anonymous_slice_expression",
381            "anonymous_subroutine_expression", "array_deref_expression",
382            "array_element_expression", "arraylen_deref_expression", "assignment_expression",
383            "await_expression", "binary_expression", "block_statement", "class_phaser_statement",
384            "class_statement", "coderef_call_expression",
385            "defer_statement", "do_expression", "else", "elsif",
386            "equality_expression", "eval_expression", "expression_statement",
387            "fileglob_expression", "func0op_call_expression", "func1op_call_expression",
388            "function", "function_call_expression", "glob_deref_expression",
389            "glob_slot_expression", "goto_expression", "hash_deref_expression",
390            "hash_element_expression", "identifier", "keyval_expression",
391            "list_expression", "localization_expression",
392            "loopex_expression", "lowprec_logical_expression", "map_grep_expression",
393            "match_regexp", "match_regexp_modifiers", "method", "method_call_expression",
394            "method_declaration_statement", "phaser_statement", "postfix_conditional_expression",
395            "postfix_for_expression", "postfix_loop_expression", "postinc_expression",
396            "preinc_expression", "prototype", "quoted_regexp_modifiers", "readline_expression",
397            "refgen_expression", "relational_expression",
398            "require_version_expression", "return_expression", "role_statement",
399            "scalar_deref_expression", "slice_expression", "sort_expression", "statement_label",
400            "stub_expression", "substitution_regexp_modifiers", "transliteration_expression",
401            "transliteration_modifiers", "try_statement", "unary_expression", "undef_expression",
402            "use_version_statement", "variable_declaration",
403        ];
404        validate_unused_kinds_audit(&Perl, documented_unused)
405            .expect("Perl unused node kinds audit failed");
406    }
407}