perl_parser_core/syntax/
source_file.rs1use std::path::Path;
7
8const BINARY_PROBE_BYTES: usize = 4096;
13
14#[must_use]
28pub fn is_binary_content(text: &str) -> bool {
29 text.bytes().take(BINARY_PROBE_BYTES).any(|b| b == 0)
30}
31
32pub const PERL_SOURCE_EXTENSIONS: [&str; 9] =
38 ["pl", "pm", "t", "psgi", "cgi", "ep", "tt", "tt2", "mason"];
39
40#[must_use]
45pub fn is_perl_source_extension(extension: &str) -> bool {
46 let ext = extension.strip_prefix('.').unwrap_or(extension);
47 PERL_SOURCE_EXTENSIONS.iter().any(|candidate| candidate.eq_ignore_ascii_case(ext))
48}
49
50#[must_use]
52pub fn is_perl_source_path(path: &Path) -> bool {
53 path.extension().and_then(|ext| ext.to_str()).is_some_and(is_perl_source_extension)
54}
55
56#[must_use]
63pub fn is_perl_source_uri(uri: &str) -> bool {
64 let without_fragment = uri.split('#').next().unwrap_or(uri);
65 let without_query = without_fragment.split('?').next().unwrap_or(without_fragment);
66 is_perl_source_path(Path::new(without_query))
67}
68
69#[cfg(test)]
70mod tests {
71 use super::{
72 BINARY_PROBE_BYTES, PERL_SOURCE_EXTENSIONS, is_binary_content, is_perl_source_extension,
73 is_perl_source_path, is_perl_source_uri,
74 };
75 use std::path::Path;
76
77 #[test]
78 fn exposes_expected_extension_set() {
79 assert_eq!(
80 PERL_SOURCE_EXTENSIONS,
81 ["pl", "pm", "t", "psgi", "cgi", "ep", "tt", "tt2", "mason"]
82 );
83 }
84
85 #[test]
86 fn classifies_extensions_case_insensitively() {
87 assert!(is_perl_source_extension("pl"));
88 assert!(is_perl_source_extension(".pm"));
89 assert!(is_perl_source_extension("T"));
90 assert!(is_perl_source_extension("PsGi"));
91 assert!(is_perl_source_extension("cgi"));
92 assert!(is_perl_source_extension(".CGI"));
93 assert!(!is_perl_source_extension("txt"));
94 }
95
96 #[test]
97 fn classifies_filesystem_paths() {
98 assert!(is_perl_source_path(Path::new("/workspace/script.pl")));
99 assert!(is_perl_source_path(Path::new("/workspace/lib/Foo/Bar.PM")));
100 assert!(is_perl_source_path(Path::new("/workspace/app.psgi")));
101 assert!(is_perl_source_path(Path::new("/var/www/cgi-bin/form.cgi")));
102 assert!(is_perl_source_path(Path::new("/var/www/cgi-bin/upload.CGI")));
103 assert!(!is_perl_source_path(Path::new("/workspace/README.md")));
104 assert!(!is_perl_source_path(Path::new("/workspace/no_extension")));
105 }
106
107 #[test]
108 fn classifies_uri_like_inputs() {
109 assert!(is_perl_source_uri("file:///workspace/script.pl"));
110 assert!(is_perl_source_uri("file:///workspace/lib/Foo/Bar.pm"));
111 assert!(is_perl_source_uri("file:///workspace/app.psgi"));
112 assert!(is_perl_source_uri("file:///workspace/app.psgi?version=1#section"));
113 assert!(is_perl_source_uri("file:///var/www/cgi-bin/form.cgi"));
114 assert!(is_perl_source_uri("file:///var/www/cgi-bin/search.cgi?q=perl#results"));
115 assert!(!is_perl_source_uri("file:///workspace/README.md"));
116 }
117
118 #[test]
119 fn cgi_and_psgi_are_recognized() {
120 assert!(is_perl_source_extension("cgi"));
122 assert!(is_perl_source_extension("CGI"));
123 assert!(is_perl_source_path(Path::new("/var/www/cgi-bin/form.cgi")));
124 assert!(is_perl_source_uri("file:///var/www/cgi-bin/form.cgi"));
125
126 assert!(is_perl_source_extension("psgi"));
128 assert!(is_perl_source_extension("PSGI"));
129 assert!(is_perl_source_path(Path::new("/workspace/app.psgi")));
130 assert!(is_perl_source_uri("file:///workspace/app.psgi"));
131
132 assert!(!is_perl_source_extension("sh"));
134 assert!(!is_perl_source_extension("py"));
135 }
136
137 #[test]
138 fn template_extensions_are_recognized() {
139 assert!(is_perl_source_extension("ep"));
141 assert!(is_perl_source_extension("EP"));
142 assert!(is_perl_source_path(Path::new("/app/templates/index.html.ep")));
143 assert!(is_perl_source_uri("file:///app/templates/index.html.ep"));
144
145 assert!(is_perl_source_extension("tt"));
147 assert!(is_perl_source_extension("TT"));
148 assert!(is_perl_source_path(Path::new("/app/templates/page.tt")));
149 assert!(is_perl_source_uri("file:///app/templates/page.tt"));
150
151 assert!(is_perl_source_extension("tt2"));
153 assert!(is_perl_source_extension("TT2"));
154 assert!(is_perl_source_path(Path::new("/app/templates/layout.tt2")));
155 assert!(is_perl_source_uri("file:///app/templates/layout.tt2"));
156
157 assert!(is_perl_source_extension("mason"));
159 assert!(is_perl_source_extension("MASON"));
160 assert!(is_perl_source_path(Path::new("/app/comp/header.mason")));
161 assert!(is_perl_source_uri("file:///app/comp/header.mason"));
162
163 assert!(!is_perl_source_extension("html"));
165 assert!(!is_perl_source_extension("tmpl"));
166 }
167
168 #[test]
169 fn supports_windows_style_paths() {
170 assert!(is_perl_source_uri(r"C:\workspace\script.pl"));
171 assert!(is_perl_source_uri(r"file:///C:/workspace/lib/Foo.pm"));
172 assert!(!is_perl_source_uri(r"C:\workspace\README.txt"));
173 }
174
175 #[test]
178 fn binary_content_null_byte_is_detected() {
179 let binary = "PK\x00\x03some binary content\x00\x00\x00";
181 assert!(is_binary_content(binary), "null bytes must trigger binary guard");
182 }
183
184 #[test]
185 fn binary_content_single_null_byte_triggers_guard() {
186 let text = "use strict;\x00\nuse warnings;\n";
187 assert!(is_binary_content(text), "single null byte must trigger binary guard");
188 }
189
190 #[test]
191 fn binary_content_clean_perl_is_not_binary() {
192 let perl = "#!/usr/bin/perl\nuse strict;\nuse warnings;\n\nprint \"Hello, World!\\n\";\n";
193 assert!(!is_binary_content(perl), "clean Perl source must not be classified as binary");
194 }
195
196 #[test]
197 fn binary_content_empty_string_is_not_binary() {
198 assert!(!is_binary_content(""), "empty string must not be classified as binary");
199 }
200
201 #[test]
202 fn binary_content_unicode_text_is_not_binary() {
203 let utf8 = "# Perl with Unicode: \u{00e9}t\u{00e9}\nprint \"caf\u{00e9}\\n\";\n";
205 assert!(!is_binary_content(utf8), "UTF-8 text without null bytes must not be binary");
206 }
207
208 #[test]
209 fn binary_content_only_scans_first_probe_window() {
210 let safe_prefix = "a".repeat(BINARY_PROBE_BYTES);
213 let text_with_late_null = format!("{safe_prefix}\x00trailing");
214 assert!(
215 !is_binary_content(&text_with_late_null),
216 "null byte beyond probe window must not trigger the guard"
217 );
218 }
219
220 #[test]
221 fn binary_content_null_byte_at_probe_boundary() {
222 let prefix = "a".repeat(BINARY_PROBE_BYTES - 1);
224 let text = format!("{prefix}\x00rest");
225 assert!(is_binary_content(&text), "null byte at probe boundary must trigger binary guard");
226 }
227
228 #[test]
229 fn binary_content_elf_header_is_detected() {
230 let elf_like = "\x7fELF\x02\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00";
232 assert!(is_binary_content(elf_like), "ELF-like header with null bytes must be binary");
233 }
234
235 #[test]
236 fn binary_content_zip_pk_header_is_detected() {
237 let zip_like = "PK\x03\x04\x14\x00\x00\x00\x08\x00";
239 assert!(is_binary_content(zip_like), "ZIP-like header with null bytes must be binary");
240 }
241}