use std::env;
use std::fs;
use std::path::{Path, PathBuf};
pub const CORPUS_ROOT_ENV: &str = "PERL_CORPUS_ROOT";
const TEST_EXTENSIONS: &[&str] = &["pl", "pm", "t", "psgi", "cgi"];
#[derive(Debug, Clone)]
pub struct CorpusPaths {
pub root: PathBuf,
pub test_corpus: PathBuf,
pub fuzz: PathBuf,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum CorpusLayer {
TestCorpus,
Fuzz,
}
#[derive(Debug, Clone)]
pub struct CorpusFile {
pub path: PathBuf,
pub layer: CorpusLayer,
}
impl CorpusPaths {
pub fn discover() -> Self {
if let Ok(root) = env::var(CORPUS_ROOT_ENV) {
return Self::from_root(PathBuf::from(root));
}
Self::from_root(find_workspace_root())
}
pub fn from_root(root: PathBuf) -> Self {
Self {
test_corpus: root.join("test_corpus"),
fuzz: root.join("crates/perl-corpus/fuzz"),
root,
}
}
}
pub fn get_test_files() -> Vec<PathBuf> {
get_test_files_from(&CorpusPaths::discover())
}
pub fn get_test_files_from(paths: &CorpusPaths) -> Vec<PathBuf> {
collect_files(&paths.test_corpus, TEST_EXTENSIONS)
}
pub fn get_fuzz_files() -> Vec<PathBuf> {
get_fuzz_files_from(&CorpusPaths::discover())
}
pub fn get_fuzz_files_from(paths: &CorpusPaths) -> Vec<PathBuf> {
collect_files(&paths.fuzz, &["pl"])
}
pub fn get_corpus_files() -> Vec<CorpusFile> {
get_corpus_files_from(&CorpusPaths::discover())
}
pub fn get_corpus_files_from(paths: &CorpusPaths) -> Vec<CorpusFile> {
let mut files: Vec<CorpusFile> = get_test_files_from(paths)
.into_iter()
.map(|path| CorpusFile { path, layer: CorpusLayer::TestCorpus })
.collect();
files.extend(
get_fuzz_files_from(paths)
.into_iter()
.map(|path| CorpusFile { path, layer: CorpusLayer::Fuzz }),
);
files.sort_by(|a, b| a.path.cmp(&b.path));
files.dedup_by(|a, b| a.path == b.path);
files
}
pub fn get_all_test_files() -> Vec<PathBuf> {
let mut files: Vec<PathBuf> = get_corpus_files().into_iter().map(|file| file.path).collect();
files.sort();
files.dedup();
files
}
fn find_workspace_root() -> PathBuf {
let manifest_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
for ancestor in manifest_dir.ancestors() {
let cargo_toml = ancestor.join("Cargo.toml");
if !cargo_toml.exists() {
continue;
}
if let Ok(contents) = fs::read_to_string(&cargo_toml)
&& contents.contains("[workspace]")
{
return ancestor.to_path_buf();
}
}
manifest_dir
}
fn collect_files(root: &Path, extensions: &[&str]) -> Vec<PathBuf> {
let mut files = Vec::new();
if !root.exists() {
return files;
}
let mut stack = vec![root.to_path_buf()];
while let Some(dir) = stack.pop() {
let entries = match fs::read_dir(&dir) {
Ok(entries) => entries,
Err(_) => continue,
};
for entry in entries.flatten() {
let file_type = match entry.file_type() {
Ok(file_type) => file_type,
Err(_) => continue,
};
let path = entry.path();
let file_name = entry.file_name();
let file_name = file_name.to_string_lossy();
if file_name.starts_with('.') || file_name.starts_with('_') {
continue;
}
if file_type.is_dir() {
stack.push(path);
continue;
}
if file_type.is_file() && has_allowed_extension(&path, extensions) {
files.push(path);
}
}
}
files.sort();
files.dedup();
files
}
fn has_allowed_extension(path: &Path, extensions: &[&str]) -> bool {
path.extension()
.and_then(|ext| ext.to_str())
.map(|ext| extensions.iter().any(|allowed| ext.eq_ignore_ascii_case(allowed)))
.unwrap_or(false)
}
#[cfg(test)]
#[allow(clippy::unwrap_used, clippy::expect_used)]
mod tests {
use super::*;
use std::time::{SystemTime, UNIX_EPOCH};
fn temp_root(prefix: &str) -> std::io::Result<PathBuf> {
let mut root = std::env::temp_dir();
let nanos = SystemTime::now().duration_since(UNIX_EPOCH).unwrap_or_default().as_nanos();
root.push(format!("{}_{}_{}", prefix, std::process::id(), nanos));
fs::create_dir_all(&root)?;
Ok(root)
}
#[test]
fn collect_files_filters_extensions_and_skips_hidden() -> Result<(), Box<dyn std::error::Error>>
{
let root = temp_root("perl_corpus_files")?;
let keep_dir = root.join("keep");
fs::create_dir_all(&keep_dir)?;
fs::create_dir_all(root.join("_skip"))?;
fs::create_dir_all(root.join(".hidden_dir"))?;
let fixtures = [
root.join("case.pl"),
root.join("case.pm"),
root.join("case.t"),
root.join("case.psgi"),
root.join("case.cgi"),
keep_dir.join("nested.pl"),
];
for fixture in &fixtures {
fs::write(fixture, "print 1;\n")?;
}
fs::write(root.join("case.txt"), "ignore\n")?;
fs::write(root.join(".hidden.pl"), "ignore\n")?;
fs::write(root.join("_skip/inner.pl"), "ignore\n")?;
fs::write(root.join(".hidden_dir/inner.pm"), "ignore\n")?;
let files = collect_files(&root, TEST_EXTENSIONS);
let mut names: Vec<_> = files
.iter()
.map(|path| {
path.file_name().map(|n| n.to_string_lossy().to_string()).unwrap_or_default()
})
.collect();
names.sort();
let expected = vec!["case.cgi", "case.pl", "case.pm", "case.psgi", "case.t", "nested.pl"];
assert_eq!(names, expected);
fs::remove_dir_all(&root)?;
Ok(())
}
#[test]
fn corpus_files_include_layer_info() -> Result<(), Box<dyn std::error::Error>> {
let root = temp_root("perl_corpus_layers")?;
let test_dir = root.join("test_corpus");
let fuzz_dir = root.join("crates/perl-corpus/fuzz");
fs::create_dir_all(&test_dir)?;
fs::create_dir_all(&fuzz_dir)?;
let test_file = test_dir.join("case.pl");
let fuzz_file = fuzz_dir.join("fuzz_case.pl");
fs::write(&test_file, "print 1;\n")?;
fs::write(&fuzz_file, "print 2;\n")?;
let paths = CorpusPaths::from_root(root.clone());
let files = get_corpus_files_from(&paths);
assert!(
files
.iter()
.any(|file| file.layer == CorpusLayer::TestCorpus && file.path == test_file),
"Expected test corpus file in results"
);
assert!(
files.iter().any(|file| file.layer == CorpusLayer::Fuzz && file.path == fuzz_file),
"Expected fuzz file in results"
);
fs::remove_dir_all(&root)?;
Ok(())
}
}