1use sha1::{Digest, Sha1};
16use std::fs;
17use std::io::Cursor;
18use std::path::Path;
19use std::sync::OnceLock;
20
21#[derive(Debug)]
23pub enum TestDataError {
24 Io(std::io::Error),
26 ChecksumMismatch {
28 file: String,
29 expected: String,
30 actual: String,
31 },
32 InvalidChecksumFile(String),
34 DirectoryNotFound(String),
36}
37
38impl std::fmt::Display for TestDataError {
39 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
40 match self {
41 TestDataError::Io(e) => write!(f, "IO error: {}", e),
42 TestDataError::ChecksumMismatch {
43 file,
44 expected,
45 actual,
46 } => {
47 write!(
48 f,
49 "Checksum mismatch for {}: expected {}, got {}",
50 file, expected, actual
51 )
52 }
53 TestDataError::InvalidChecksumFile(path) => {
54 write!(f, "Invalid or missing checksum file: {}", path)
55 }
56 TestDataError::DirectoryNotFound(path) => {
57 write!(f, "Test data directory not found: {}", path)
58 }
59 }
60 }
61}
62
63impl std::error::Error for TestDataError {}
64
65impl From<std::io::Error> for TestDataError {
66 fn from(error: std::io::Error) -> Self {
67 TestDataError::Io(error)
68 }
69}
70
71fn calculate_file_sha1<P: AsRef<Path>>(file_path: P) -> Result<String, TestDataError> {
73 let mut file = fs::File::open(file_path)?;
74 let mut hasher = Sha1::new();
75 std::io::copy(&mut file, &mut hasher)?;
76 let result = hasher.finalize();
77 Ok(format!("{:x}", result))
78}
79
80fn parse_checksum_line(line: &str) -> Option<(String, String)> {
82 let parts: Vec<&str> = line.splitn(2, " ").collect();
84 if parts.len() == 2 {
85 Some((parts[0].to_string(), parts[1].to_string()))
86 } else {
87 None
88 }
89}
90
91pub fn verify_test_data_checksums<P: AsRef<Path>>(test_data_dir: P) -> Result<(), TestDataError> {
96 let test_data_dir = test_data_dir.as_ref();
97
98 if !test_data_dir.exists() {
99 return Err(TestDataError::DirectoryNotFound(
100 test_data_dir.display().to_string(),
101 ));
102 }
103
104 let checksum_file = test_data_dir.join("checksums.sha1");
105 if !checksum_file.exists() {
106 return Err(TestDataError::InvalidChecksumFile(
107 checksum_file.display().to_string(),
108 ));
109 }
110
111 let checksum_content = fs::read_to_string(&checksum_file)?;
112
113 for (line_num, line) in checksum_content.lines().enumerate() {
114 let line = line.trim();
115 if line.is_empty() {
116 continue;
117 }
118
119 let (expected_hash, filename) = parse_checksum_line(line).ok_or_else(|| {
120 TestDataError::InvalidChecksumFile(format!(
121 "Invalid checksum format at line {} in {}",
122 line_num + 1,
123 checksum_file.display()
124 ))
125 })?;
126
127 let file_path = test_data_dir.join(&filename);
128 if !file_path.exists() {
129 return Err(TestDataError::DirectoryNotFound(format!(
130 "Referenced file not found: {}",
131 filename
132 )));
133 }
134
135 let actual_hash = calculate_file_sha1(&file_path)?;
136 if expected_hash != actual_hash {
137 return Err(TestDataError::ChecksumMismatch {
138 file: filename,
139 expected: expected_hash,
140 actual: actual_hash,
141 });
142 }
143 }
144
145 Ok(())
146}
147
148fn get_expected_files<P: AsRef<Path>>(test_data_dir: P) -> Result<Vec<String>, TestDataError> {
150 let checksum_file = test_data_dir.as_ref().join("checksums.sha1");
151 let content = fs::read_to_string(&checksum_file)?;
152
153 let mut files = Vec::new();
154 for line in content.lines() {
155 let line = line.trim();
156 if line.is_empty() {
157 continue;
158 }
159
160 if let Some((_, filename)) = parse_checksum_line(line) {
161 files.push(filename);
162 }
163 }
164
165 Ok(files)
166}
167
168fn get_missing_files<P: AsRef<Path>>(test_data_dir: P) -> Result<Vec<String>, TestDataError> {
170 let test_data_dir = test_data_dir.as_ref();
171 let expected_files = get_expected_files(test_data_dir)?;
172
173 let missing_files: Vec<String> = expected_files
174 .into_iter()
175 .filter(|filename| !test_data_dir.join(filename).exists())
176 .collect();
177
178 Ok(missing_files)
179}
180
181fn discover_test_data_directories<P: AsRef<Path>>(
183 base_dir: P,
184) -> Result<Vec<std::path::PathBuf>, TestDataError> {
185 let base_dir = base_dir.as_ref();
186
187 if !base_dir.exists() {
188 return Ok(Vec::new());
189 }
190
191 let mut directories = Vec::new();
192
193 let read_dir = fs::read_dir(base_dir).map_err(TestDataError::Io)?;
195
196 for entry in read_dir {
197 let entry = entry.map_err(TestDataError::Io)?;
198 let path = entry.path();
199
200 if path.is_dir() {
201 let checksum_file = path.join("checksums.sha1");
202 if checksum_file.exists() {
203 directories.push(path);
204 }
205 }
206 }
207
208 Ok(directories)
209}
210
211pub fn ensure_test_data_available<P: AsRef<Path>>(path: P) -> Result<(), TestDataError> {
221 let path = path.as_ref();
222
223 let (_base_dir, specific_dirs) = if path.join("checksums.sha1").exists() {
225 (path.parent().unwrap_or(path), vec![path.to_path_buf()])
227 } else {
228 let discovered = discover_test_data_directories(path)?;
230 (path, discovered)
231 };
232
233 if specific_dirs.is_empty() {
234 return Ok(()); }
236
237 let mut all_missing_files = Vec::new();
239 let mut dirs_with_missing = Vec::new();
240
241 for dir in &specific_dirs {
242 let missing_files = get_missing_files(dir)?;
243 if !missing_files.is_empty() {
244 println!(
245 "Missing files in {}: {}",
246 dir.file_name().unwrap_or_default().to_string_lossy(),
247 missing_files.join(", ")
248 );
249 all_missing_files.extend(missing_files.iter().cloned());
250 dirs_with_missing.push(dir.clone());
251 }
252 }
253
254 if !all_missing_files.is_empty() {
256 if !cfg!(feature = "no-large-tests") {
257 println!("Attempting to download test data from GitHub...");
258
259 match download_test_data_from_github() {
260 Ok(_) => {
261 println!("Test data download completed successfully!");
262
263 for dir in &dirs_with_missing {
265 let still_missing = get_missing_files(dir)?;
266 if !still_missing.is_empty() {
267 return Err(TestDataError::DirectoryNotFound(format!(
268 "Download completed but still missing files in {}: {}",
269 dir.display(),
270 still_missing.join(", ")
271 )));
272 }
273 }
274 }
275 Err(e) => {
276 eprintln!("Error: Failed to download test data: {}", e);
277 eprintln!("To skip large tests, run: cargo test --features no-large-tests");
278 eprintln!("To download manually:");
279 eprintln!(" git clone --depth=1 https://github.com/varioustoxins/ustar.git temp_ustar");
280 eprintln!(" cp -r temp_ustar/ustar-parser/tests/test_data/* <your-project>/tests/test_data/");
281 eprintln!(" rm -rf temp_ustar");
282
283 return Err(TestDataError::DirectoryNotFound(format!(
284 "Missing test data files and download failed: {}",
285 e
286 )));
287 }
288 }
289 } else {
290 return Err(TestDataError::DirectoryNotFound(format!(
291 "Missing test data files: {}. Download disabled by --features no-large-tests.",
292 all_missing_files.join(", ")
293 )));
294 }
295 }
296
297 for dir in &specific_dirs {
299 verify_test_data_checksums(dir)?;
300 }
301
302 Ok(())
303}
304
305static DOWNLOAD_RESULT: OnceLock<Result<(), String>> = OnceLock::new();
306
307fn download_test_data_from_github() -> Result<(), Box<dyn std::error::Error>> {
310 let result = DOWNLOAD_RESULT.get_or_init(|| perform_download().map_err(|e| e.to_string()));
312
313 result.clone().map_err(|e| e.into())
315}
316
317fn perform_download() -> Result<(), Box<dyn std::error::Error>> {
319 let rt = tokio::runtime::Runtime::new()?;
321 rt.block_on(download_github_archive())
322}
323
324async fn download_github_archive() -> Result<(), Box<dyn std::error::Error>> {
326 let archive_url = "https://github.com/varioustoxins/ustar/archive/refs/heads/main.zip";
327
328 println!("Downloading repository archive from GitHub...");
329
330 let response = reqwest::get(archive_url).await?;
332 if !response.status().is_success() {
333 return Err(format!("Failed to download archive: HTTP {}", response.status()).into());
334 }
335
336 let zip_bytes = response.bytes().await?;
337 println!(
338 "Downloaded {} bytes, extracting test data...",
339 zip_bytes.len()
340 );
341
342 let cursor = Cursor::new(zip_bytes);
344 let mut archive = zip::ZipArchive::new(cursor)?;
345
346 let manifest_dir = std::env::var("CARGO_MANIFEST_DIR").unwrap_or_else(|_| ".".to_string());
348 let target_base = format!("{}/tests/test_data", manifest_dir);
349
350 fs::create_dir_all(&target_base)?;
352
353 let mut extracted_files = 0;
354 let mut extracted_dirs = std::collections::HashSet::new();
355
356 for i in 0..archive.len() {
358 let mut file = archive.by_index(i)?;
359 let file_path = file.name();
360
361 if let Some(relative_path) = extract_test_data_path(file_path) {
363 let target_path = format!("{}/{}", target_base, relative_path);
364
365 if let Some(dir) = relative_path.split('/').next() {
367 if extracted_dirs.insert(dir.to_string()) {
368 println!("Extracting {}...", dir);
369 }
370 }
371
372 if let Some(parent) = Path::new(&target_path).parent() {
374 fs::create_dir_all(parent)?;
375 }
376
377 let mut target_file = fs::File::create(&target_path)?;
379 std::io::copy(&mut file, &mut target_file)?;
380 extracted_files += 1;
381 }
382 }
383
384 println!(
385 "Extracted {} files from {} directories",
386 extracted_files,
387 extracted_dirs.len()
388 );
389 Ok(())
390}
391
392fn extract_test_data_path(archive_path: &str) -> Option<String> {
396 if let Some(test_data_pos) = archive_path.find("/tests/test_data/") {
398 let after_test_data = &archive_path[test_data_pos + "/tests/test_data/".len()..];
399 if !after_test_data.is_empty() && !after_test_data.ends_with('/') {
400 return Some(after_test_data.to_string());
401 }
402 }
403 None
404}
405
406#[allow(dead_code)]
409fn get_temp_dir() -> std::path::PathBuf {
410 std::env::temp_dir()
411}
412
413#[cfg(test)]
414mod tests {
415 use super::*;
416 use tempfile::TempDir;
417
418 #[test]
419 fn test_get_expected_files() {
420 let temp_dir = TempDir::new().unwrap();
421 let checksum_content = "abc123 file1.txt\ndef456 file2.txt\n";
422 fs::write(temp_dir.path().join("checksums.sha1"), checksum_content).unwrap();
423
424 let files = get_expected_files(temp_dir.path()).unwrap();
425 assert_eq!(files, vec!["file1.txt", "file2.txt"]);
426 }
427
428 #[test]
429 fn test_missing_files_detection() {
430 let temp_dir = TempDir::new().unwrap();
431 let checksum_content = "abc123 file1.txt\ndef456 file2.txt\n";
432 fs::write(temp_dir.path().join("checksums.sha1"), checksum_content).unwrap();
433
434 fs::write(temp_dir.path().join("file1.txt"), "content").unwrap();
436
437 let missing = get_missing_files(temp_dir.path()).unwrap();
438 assert_eq!(missing, vec!["file2.txt"]);
439 }
440
441 #[test]
442 fn test_sha1_verification_success() {
443 let temp_dir = TempDir::new().unwrap();
444
445 let test_content = "Hello, world!";
447 fs::write(temp_dir.path().join("test.txt"), test_content).unwrap();
448
449 let expected_hash = "943a702d06f34599aee1f8da8ef9f7296031d699";
451
452 let checksum_content = format!("{} test.txt\n", expected_hash);
453 fs::write(temp_dir.path().join("checksums.sha1"), checksum_content).unwrap();
454
455 let result = verify_test_data_checksums(temp_dir.path());
457 assert!(result.is_ok());
458 }
459
460 #[test]
461 fn test_sha1_verification_failure() {
462 let temp_dir = TempDir::new().unwrap();
463
464 fs::write(temp_dir.path().join("test.txt"), "Hello, world!").unwrap();
466
467 let wrong_hash = "0000000000000000000000000000000000000000";
469 let checksum_content = format!("{} test.txt\n", wrong_hash);
470 fs::write(temp_dir.path().join("checksums.sha1"), checksum_content).unwrap();
471
472 let result = verify_test_data_checksums(temp_dir.path());
474 assert!(result.is_err());
475
476 match result.unwrap_err() {
477 TestDataError::ChecksumMismatch {
478 file,
479 expected,
480 actual,
481 } => {
482 assert_eq!(file, "test.txt");
483 assert_eq!(expected, wrong_hash);
484 assert_eq!(actual, "943a702d06f34599aee1f8da8ef9f7296031d699");
485 }
486 _ => panic!("Expected ChecksumMismatch error"),
487 }
488 }
489
490 #[test]
491 fn test_parse_checksum_line() {
492 assert_eq!(
493 parse_checksum_line("abc123 file.txt"),
494 Some(("abc123".to_string(), "file.txt".to_string()))
495 );
496
497 assert_eq!(parse_checksum_line("invalid line"), None);
498 }
499
500 #[test]
501 fn test_discover_test_data_directories() {
502 let temp_dir = TempDir::new().unwrap();
503
504 let dir1 = temp_dir.path().join("dir1");
506 let dir2 = temp_dir.path().join("dir2");
507 let dir3 = temp_dir.path().join("dir3");
508
509 fs::create_dir_all(&dir1).unwrap();
510 fs::create_dir_all(&dir2).unwrap();
511 fs::create_dir_all(&dir3).unwrap();
512
513 fs::write(dir1.join("checksums.sha1"), "abc123 file1.txt\n").unwrap();
515 fs::write(dir3.join("checksums.sha1"), "def456 file3.txt\n").unwrap();
516
517 fs::write(dir2.join("some_other_file.txt"), "content").unwrap();
519
520 let discovered = discover_test_data_directories(temp_dir.path()).unwrap();
521
522 assert_eq!(discovered.len(), 2);
524
525 let dir_names: Vec<String> = discovered
526 .iter()
527 .map(|p| p.file_name().unwrap().to_string_lossy().to_string())
528 .collect();
529
530 assert!(dir_names.contains(&"dir1".to_string()));
531 assert!(dir_names.contains(&"dir3".to_string()));
532 assert!(!dir_names.contains(&"dir2".to_string()));
533 }
534}