use std::path::Path;
const BINARY_PROBE_BYTES: usize = 4096;
#[must_use]
pub fn is_binary_content(text: &str) -> bool {
text.bytes().take(BINARY_PROBE_BYTES).any(|b| b == 0)
}
pub const PERL_SOURCE_EXTENSIONS: [&str; 9] =
["pl", "pm", "t", "psgi", "cgi", "ep", "tt", "tt2", "mason"];
#[must_use]
pub fn is_perl_source_extension(extension: &str) -> bool {
let ext = extension.strip_prefix('.').unwrap_or(extension);
PERL_SOURCE_EXTENSIONS.iter().any(|candidate| candidate.eq_ignore_ascii_case(ext))
}
#[must_use]
pub fn is_perl_source_path(path: &Path) -> bool {
path.extension().and_then(|ext| ext.to_str()).is_some_and(is_perl_source_extension)
}
#[must_use]
pub fn is_perl_source_uri(uri: &str) -> bool {
let without_fragment = uri.split('#').next().unwrap_or(uri);
let without_query = without_fragment.split('?').next().unwrap_or(without_fragment);
is_perl_source_path(Path::new(without_query))
}
#[cfg(test)]
mod tests {
use super::{
BINARY_PROBE_BYTES, PERL_SOURCE_EXTENSIONS, is_binary_content, is_perl_source_extension,
is_perl_source_path, is_perl_source_uri,
};
use std::path::Path;
#[test]
fn exposes_expected_extension_set() {
assert_eq!(
PERL_SOURCE_EXTENSIONS,
["pl", "pm", "t", "psgi", "cgi", "ep", "tt", "tt2", "mason"]
);
}
#[test]
fn classifies_extensions_case_insensitively() {
assert!(is_perl_source_extension("pl"));
assert!(is_perl_source_extension(".pm"));
assert!(is_perl_source_extension("T"));
assert!(is_perl_source_extension("PsGi"));
assert!(is_perl_source_extension("cgi"));
assert!(is_perl_source_extension(".CGI"));
assert!(!is_perl_source_extension("txt"));
}
#[test]
fn classifies_filesystem_paths() {
assert!(is_perl_source_path(Path::new("/workspace/script.pl")));
assert!(is_perl_source_path(Path::new("/workspace/lib/Foo/Bar.PM")));
assert!(is_perl_source_path(Path::new("/workspace/app.psgi")));
assert!(is_perl_source_path(Path::new("/var/www/cgi-bin/form.cgi")));
assert!(is_perl_source_path(Path::new("/var/www/cgi-bin/upload.CGI")));
assert!(!is_perl_source_path(Path::new("/workspace/README.md")));
assert!(!is_perl_source_path(Path::new("/workspace/no_extension")));
}
#[test]
fn classifies_uri_like_inputs() {
assert!(is_perl_source_uri("file:///workspace/script.pl"));
assert!(is_perl_source_uri("file:///workspace/lib/Foo/Bar.pm"));
assert!(is_perl_source_uri("file:///workspace/app.psgi"));
assert!(is_perl_source_uri("file:///workspace/app.psgi?version=1#section"));
assert!(is_perl_source_uri("file:///var/www/cgi-bin/form.cgi"));
assert!(is_perl_source_uri("file:///var/www/cgi-bin/search.cgi?q=perl#results"));
assert!(!is_perl_source_uri("file:///workspace/README.md"));
}
#[test]
fn cgi_and_psgi_are_recognized() {
assert!(is_perl_source_extension("cgi"));
assert!(is_perl_source_extension("CGI"));
assert!(is_perl_source_path(Path::new("/var/www/cgi-bin/form.cgi")));
assert!(is_perl_source_uri("file:///var/www/cgi-bin/form.cgi"));
assert!(is_perl_source_extension("psgi"));
assert!(is_perl_source_extension("PSGI"));
assert!(is_perl_source_path(Path::new("/workspace/app.psgi")));
assert!(is_perl_source_uri("file:///workspace/app.psgi"));
assert!(!is_perl_source_extension("sh"));
assert!(!is_perl_source_extension("py"));
}
#[test]
fn template_extensions_are_recognized() {
assert!(is_perl_source_extension("ep"));
assert!(is_perl_source_extension("EP"));
assert!(is_perl_source_path(Path::new("/app/templates/index.html.ep")));
assert!(is_perl_source_uri("file:///app/templates/index.html.ep"));
assert!(is_perl_source_extension("tt"));
assert!(is_perl_source_extension("TT"));
assert!(is_perl_source_path(Path::new("/app/templates/page.tt")));
assert!(is_perl_source_uri("file:///app/templates/page.tt"));
assert!(is_perl_source_extension("tt2"));
assert!(is_perl_source_extension("TT2"));
assert!(is_perl_source_path(Path::new("/app/templates/layout.tt2")));
assert!(is_perl_source_uri("file:///app/templates/layout.tt2"));
assert!(is_perl_source_extension("mason"));
assert!(is_perl_source_extension("MASON"));
assert!(is_perl_source_path(Path::new("/app/comp/header.mason")));
assert!(is_perl_source_uri("file:///app/comp/header.mason"));
assert!(!is_perl_source_extension("html"));
assert!(!is_perl_source_extension("tmpl"));
}
#[test]
fn supports_windows_style_paths() {
assert!(is_perl_source_uri(r"C:\workspace\script.pl"));
assert!(is_perl_source_uri(r"file:///C:/workspace/lib/Foo.pm"));
assert!(!is_perl_source_uri(r"C:\workspace\README.txt"));
}
#[test]
fn binary_content_null_byte_is_detected() {
let binary = "PK\x00\x03some binary content\x00\x00\x00";
assert!(is_binary_content(binary), "null bytes must trigger binary guard");
}
#[test]
fn binary_content_single_null_byte_triggers_guard() {
let text = "use strict;\x00\nuse warnings;\n";
assert!(is_binary_content(text), "single null byte must trigger binary guard");
}
#[test]
fn binary_content_clean_perl_is_not_binary() {
let perl = "#!/usr/bin/perl\nuse strict;\nuse warnings;\n\nprint \"Hello, World!\\n\";\n";
assert!(!is_binary_content(perl), "clean Perl source must not be classified as binary");
}
#[test]
fn binary_content_empty_string_is_not_binary() {
assert!(!is_binary_content(""), "empty string must not be classified as binary");
}
#[test]
fn binary_content_unicode_text_is_not_binary() {
let utf8 = "# Perl with Unicode: \u{00e9}t\u{00e9}\nprint \"caf\u{00e9}\\n\";\n";
assert!(!is_binary_content(utf8), "UTF-8 text without null bytes must not be binary");
}
#[test]
fn binary_content_only_scans_first_probe_window() {
let safe_prefix = "a".repeat(BINARY_PROBE_BYTES);
let text_with_late_null = format!("{safe_prefix}\x00trailing");
assert!(
!is_binary_content(&text_with_late_null),
"null byte beyond probe window must not trigger the guard"
);
}
#[test]
fn binary_content_null_byte_at_probe_boundary() {
let prefix = "a".repeat(BINARY_PROBE_BYTES - 1);
let text = format!("{prefix}\x00rest");
assert!(is_binary_content(&text), "null byte at probe boundary must trigger binary guard");
}
#[test]
fn binary_content_elf_header_is_detected() {
let elf_like = "\x7fELF\x02\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00";
assert!(is_binary_content(elf_like), "ELF-like header with null bytes must be binary");
}
#[test]
fn binary_content_zip_pk_header_is_detected() {
let zip_like = "PK\x03\x04\x14\x00\x00\x00\x08\x00";
assert!(is_binary_content(zip_like), "ZIP-like header with null bytes must be binary");
}
}