use sha1::{Digest, Sha1};
use std::fs;
use std::io::Cursor;
use std::path::Path;
use std::sync::OnceLock;
#[derive(Debug)]
pub enum TestDataError {
Io(std::io::Error),
ChecksumMismatch {
file: String,
expected: String,
actual: String,
},
InvalidChecksumFile(String),
DirectoryNotFound(String),
}
impl std::fmt::Display for TestDataError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
TestDataError::Io(e) => write!(f, "IO error: {}", e),
TestDataError::ChecksumMismatch {
file,
expected,
actual,
} => {
write!(
f,
"Checksum mismatch for {}: expected {}, got {}",
file, expected, actual
)
}
TestDataError::InvalidChecksumFile(path) => {
write!(f, "Invalid or missing checksum file: {}", path)
}
TestDataError::DirectoryNotFound(path) => {
write!(f, "Test data directory not found: {}", path)
}
}
}
}
impl std::error::Error for TestDataError {}
impl From<std::io::Error> for TestDataError {
fn from(error: std::io::Error) -> Self {
TestDataError::Io(error)
}
}
fn calculate_file_sha1<P: AsRef<Path>>(file_path: P) -> Result<String, TestDataError> {
let mut file = fs::File::open(file_path)?;
let mut hasher = Sha1::new();
std::io::copy(&mut file, &mut hasher)?;
let result = hasher.finalize();
Ok(format!("{:x}", result))
}
fn parse_checksum_line(line: &str) -> Option<(String, String)> {
let parts: Vec<&str> = line.splitn(2, " ").collect();
if parts.len() == 2 {
Some((parts[0].to_string(), parts[1].to_string()))
} else {
None
}
}
pub fn verify_test_data_checksums<P: AsRef<Path>>(test_data_dir: P) -> Result<(), TestDataError> {
let test_data_dir = test_data_dir.as_ref();
if !test_data_dir.exists() {
return Err(TestDataError::DirectoryNotFound(
test_data_dir.display().to_string(),
));
}
let checksum_file = test_data_dir.join("checksums.sha1");
if !checksum_file.exists() {
return Err(TestDataError::InvalidChecksumFile(
checksum_file.display().to_string(),
));
}
let checksum_content = fs::read_to_string(&checksum_file)?;
for (line_num, line) in checksum_content.lines().enumerate() {
let line = line.trim();
if line.is_empty() {
continue;
}
let (expected_hash, filename) = parse_checksum_line(line).ok_or_else(|| {
TestDataError::InvalidChecksumFile(format!(
"Invalid checksum format at line {} in {}",
line_num + 1,
checksum_file.display()
))
})?;
let file_path = test_data_dir.join(&filename);
if !file_path.exists() {
return Err(TestDataError::DirectoryNotFound(format!(
"Referenced file not found: {}",
filename
)));
}
let actual_hash = calculate_file_sha1(&file_path)?;
if expected_hash != actual_hash {
return Err(TestDataError::ChecksumMismatch {
file: filename,
expected: expected_hash,
actual: actual_hash,
});
}
}
Ok(())
}
fn get_expected_files<P: AsRef<Path>>(test_data_dir: P) -> Result<Vec<String>, TestDataError> {
let checksum_file = test_data_dir.as_ref().join("checksums.sha1");
let content = fs::read_to_string(&checksum_file)?;
let mut files = Vec::new();
for line in content.lines() {
let line = line.trim();
if line.is_empty() {
continue;
}
if let Some((_, filename)) = parse_checksum_line(line) {
files.push(filename);
}
}
Ok(files)
}
fn get_missing_files<P: AsRef<Path>>(test_data_dir: P) -> Result<Vec<String>, TestDataError> {
let test_data_dir = test_data_dir.as_ref();
let expected_files = get_expected_files(test_data_dir)?;
let missing_files: Vec<String> = expected_files
.into_iter()
.filter(|filename| !test_data_dir.join(filename).exists())
.collect();
Ok(missing_files)
}
fn discover_test_data_directories<P: AsRef<Path>>(
base_dir: P,
) -> Result<Vec<std::path::PathBuf>, TestDataError> {
let base_dir = base_dir.as_ref();
if !base_dir.exists() {
return Ok(Vec::new());
}
let mut directories = Vec::new();
let read_dir = fs::read_dir(base_dir).map_err(TestDataError::Io)?;
for entry in read_dir {
let entry = entry.map_err(TestDataError::Io)?;
let path = entry.path();
if path.is_dir() {
let checksum_file = path.join("checksums.sha1");
if checksum_file.exists() {
directories.push(path);
}
}
}
Ok(directories)
}
pub fn ensure_test_data_available<P: AsRef<Path>>(path: P) -> Result<(), TestDataError> {
let path = path.as_ref();
let (_base_dir, specific_dirs) = if path.join("checksums.sha1").exists() {
(path.parent().unwrap_or(path), vec![path.to_path_buf()])
} else {
let discovered = discover_test_data_directories(path)?;
(path, discovered)
};
if specific_dirs.is_empty() {
return Ok(()); }
let mut all_missing_files = Vec::new();
let mut dirs_with_missing = Vec::new();
for dir in &specific_dirs {
let missing_files = get_missing_files(dir)?;
if !missing_files.is_empty() {
println!(
"Missing files in {}: {}",
dir.file_name().unwrap_or_default().to_string_lossy(),
missing_files.join(", ")
);
all_missing_files.extend(missing_files.iter().cloned());
dirs_with_missing.push(dir.clone());
}
}
if !all_missing_files.is_empty() {
if !cfg!(feature = "no-large-tests") {
println!("Attempting to download test data from GitHub...");
match download_test_data_from_github() {
Ok(_) => {
println!("Test data download completed successfully!");
for dir in &dirs_with_missing {
let still_missing = get_missing_files(dir)?;
if !still_missing.is_empty() {
return Err(TestDataError::DirectoryNotFound(format!(
"Download completed but still missing files in {}: {}",
dir.display(),
still_missing.join(", ")
)));
}
}
}
Err(e) => {
eprintln!("Error: Failed to download test data: {}", e);
eprintln!("To skip large tests, run: cargo test --features no-large-tests");
eprintln!("To download manually:");
eprintln!(" git clone --depth=1 https://github.com/varioustoxins/ustar.git temp_ustar");
eprintln!(" cp -r temp_ustar/ustar-parser/tests/test_data/* <your-project>/tests/test_data/");
eprintln!(" rm -rf temp_ustar");
return Err(TestDataError::DirectoryNotFound(format!(
"Missing test data files and download failed: {}",
e
)));
}
}
} else {
return Err(TestDataError::DirectoryNotFound(format!(
"Missing test data files: {}. Download disabled by --features no-large-tests.",
all_missing_files.join(", ")
)));
}
}
for dir in &specific_dirs {
verify_test_data_checksums(dir)?;
}
Ok(())
}
static DOWNLOAD_RESULT: OnceLock<Result<(), String>> = OnceLock::new();
fn download_test_data_from_github() -> Result<(), Box<dyn std::error::Error>> {
let result = DOWNLOAD_RESULT.get_or_init(|| perform_download().map_err(|e| e.to_string()));
result.clone().map_err(|e| e.into())
}
fn perform_download() -> Result<(), Box<dyn std::error::Error>> {
let rt = tokio::runtime::Runtime::new()?;
rt.block_on(download_github_archive())
}
async fn download_github_archive() -> Result<(), Box<dyn std::error::Error>> {
let archive_url = "https://github.com/varioustoxins/ustar/archive/refs/heads/main.zip";
println!("Downloading repository archive from GitHub...");
let response = reqwest::get(archive_url).await?;
if !response.status().is_success() {
return Err(format!("Failed to download archive: HTTP {}", response.status()).into());
}
let zip_bytes = response.bytes().await?;
println!(
"Downloaded {} bytes, extracting test data...",
zip_bytes.len()
);
let cursor = Cursor::new(zip_bytes);
let mut archive = zip::ZipArchive::new(cursor)?;
let manifest_dir = std::env::var("CARGO_MANIFEST_DIR").unwrap_or_else(|_| ".".to_string());
let target_base = format!("{}/tests/test_data", manifest_dir);
fs::create_dir_all(&target_base)?;
let mut extracted_files = 0;
let mut extracted_dirs = std::collections::HashSet::new();
for i in 0..archive.len() {
let mut file = archive.by_index(i)?;
let file_path = file.name();
if let Some(relative_path) = extract_test_data_path(file_path) {
let target_path = format!("{}/{}", target_base, relative_path);
if let Some(dir) = relative_path.split('/').next() {
if extracted_dirs.insert(dir.to_string()) {
println!("Extracting {}...", dir);
}
}
if let Some(parent) = Path::new(&target_path).parent() {
fs::create_dir_all(parent)?;
}
let mut target_file = fs::File::create(&target_path)?;
std::io::copy(&mut file, &mut target_file)?;
extracted_files += 1;
}
}
println!(
"Extracted {} files from {} directories",
extracted_files,
extracted_dirs.len()
);
Ok(())
}
fn extract_test_data_path(archive_path: &str) -> Option<String> {
if let Some(test_data_pos) = archive_path.find("/tests/test_data/") {
let after_test_data = &archive_path[test_data_pos + "/tests/test_data/".len()..];
if !after_test_data.is_empty() && !after_test_data.ends_with('/') {
return Some(after_test_data.to_string());
}
}
None
}
#[allow(dead_code)]
fn get_temp_dir() -> std::path::PathBuf {
std::env::temp_dir()
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
#[test]
fn test_get_expected_files() {
let temp_dir = TempDir::new().unwrap();
let checksum_content = "abc123 file1.txt\ndef456 file2.txt\n";
fs::write(temp_dir.path().join("checksums.sha1"), checksum_content).unwrap();
let files = get_expected_files(temp_dir.path()).unwrap();
assert_eq!(files, vec!["file1.txt", "file2.txt"]);
}
#[test]
fn test_missing_files_detection() {
let temp_dir = TempDir::new().unwrap();
let checksum_content = "abc123 file1.txt\ndef456 file2.txt\n";
fs::write(temp_dir.path().join("checksums.sha1"), checksum_content).unwrap();
fs::write(temp_dir.path().join("file1.txt"), "content").unwrap();
let missing = get_missing_files(temp_dir.path()).unwrap();
assert_eq!(missing, vec!["file2.txt"]);
}
#[test]
fn test_sha1_verification_success() {
let temp_dir = TempDir::new().unwrap();
let test_content = "Hello, world!";
fs::write(temp_dir.path().join("test.txt"), test_content).unwrap();
let expected_hash = "943a702d06f34599aee1f8da8ef9f7296031d699";
let checksum_content = format!("{} test.txt\n", expected_hash);
fs::write(temp_dir.path().join("checksums.sha1"), checksum_content).unwrap();
let result = verify_test_data_checksums(temp_dir.path());
assert!(result.is_ok());
}
#[test]
fn test_sha1_verification_failure() {
let temp_dir = TempDir::new().unwrap();
fs::write(temp_dir.path().join("test.txt"), "Hello, world!").unwrap();
let wrong_hash = "0000000000000000000000000000000000000000";
let checksum_content = format!("{} test.txt\n", wrong_hash);
fs::write(temp_dir.path().join("checksums.sha1"), checksum_content).unwrap();
let result = verify_test_data_checksums(temp_dir.path());
assert!(result.is_err());
match result.unwrap_err() {
TestDataError::ChecksumMismatch {
file,
expected,
actual,
} => {
assert_eq!(file, "test.txt");
assert_eq!(expected, wrong_hash);
assert_eq!(actual, "943a702d06f34599aee1f8da8ef9f7296031d699");
}
_ => panic!("Expected ChecksumMismatch error"),
}
}
#[test]
fn test_parse_checksum_line() {
assert_eq!(
parse_checksum_line("abc123 file.txt"),
Some(("abc123".to_string(), "file.txt".to_string()))
);
assert_eq!(parse_checksum_line("invalid line"), None);
}
#[test]
fn test_discover_test_data_directories() {
let temp_dir = TempDir::new().unwrap();
let dir1 = temp_dir.path().join("dir1");
let dir2 = temp_dir.path().join("dir2");
let dir3 = temp_dir.path().join("dir3");
fs::create_dir_all(&dir1).unwrap();
fs::create_dir_all(&dir2).unwrap();
fs::create_dir_all(&dir3).unwrap();
fs::write(dir1.join("checksums.sha1"), "abc123 file1.txt\n").unwrap();
fs::write(dir3.join("checksums.sha1"), "def456 file3.txt\n").unwrap();
fs::write(dir2.join("some_other_file.txt"), "content").unwrap();
let discovered = discover_test_data_directories(temp_dir.path()).unwrap();
assert_eq!(discovered.len(), 2);
let dir_names: Vec<String> = discovered
.iter()
.map(|p| p.file_name().unwrap().to_string_lossy().to_string())
.collect();
assert!(dir_names.contains(&"dir1".to_string()));
assert!(dir_names.contains(&"dir3".to_string()));
assert!(!dir_names.contains(&"dir2".to_string()));
}
}