use std::io::Read as _;
use std::path::Path;
use crate::ArchiveError;
use crate::Result;
const MAGIC_READ_LEN: usize = 262;
const MAGIC_SIGNATURES: &[(usize, &[u8], ArchiveType)] = &[
(0, b"\x1f\x8b", ArchiveType::TarGz),
(0, b"\x28\xb5\x2f\xfd", ArchiveType::TarZst),
(0, b"\x42\x5a\x68", ArchiveType::TarBz2),
(0, b"\x50\x4b\x03\x04", ArchiveType::Zip),
(0, b"\x50\x4b\x05\x06", ArchiveType::Zip),
(0, b"\x50\x4b\x07\x08", ArchiveType::Zip),
(0, b"\x37\x7a\xbc\xaf\x27\x1c", ArchiveType::SevenZ),
(0, b"\xfd\x37\x7a\x58\x5a\x00", ArchiveType::TarXz),
(257, b"ustar", ArchiveType::Tar),
];
pub const ZIP_FAMILY_ALIASES: &[&str] = &[
"jar", "war", "ear", "nar", "nbm", "apk", "aab", "ipa", "appx", "msix", "whl", "vsix", "xpi",
"epub",
];
pub(crate) fn is_zip_family_alias(ext: &str) -> bool {
let lower = ext.to_ascii_lowercase();
ZIP_FAMILY_ALIASES.contains(&lower.as_str())
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ArchiveType {
Tar,
TarGz,
TarBz2,
TarXz,
TarZst,
Zip,
SevenZ,
}
pub fn detect_format(path: &Path) -> Result<ArchiveType> {
let ext_result = detect_format_from_extension(path);
match ext_result {
Ok(ext_type) => {
if let Some(magic_type) = detect_format_from_magic(path)
&& magic_type != ext_type
{
return Ok(magic_type);
}
Ok(ext_type)
}
Err(_) => {
detect_format_from_magic(path).ok_or_else(|| ArchiveError::UnknownFormat {
path: path.to_path_buf(),
})
}
}
}
pub(crate) fn detect_format_from_extension(path: &Path) -> Result<ArchiveType> {
let extension =
path.extension()
.and_then(|e| e.to_str())
.ok_or_else(|| ArchiveError::UnknownFormat {
path: path.to_path_buf(),
})?;
let ext_lower = extension.to_ascii_lowercase();
match ext_lower.as_str() {
"tar" => Ok(ArchiveType::Tar),
"tgz" => Ok(ArchiveType::TarGz),
"gz" => {
if let Some(stem) = path.file_stem()
&& stem.to_string_lossy().ends_with(".tar")
{
Ok(ArchiveType::TarGz)
} else {
Err(ArchiveError::UnknownFormat {
path: path.to_path_buf(),
})
}
}
"bz2" | "tbz" | "tbz2" => Ok(ArchiveType::TarBz2),
"xz" | "txz" => Ok(ArchiveType::TarXz),
"zst" | "tzst" => Ok(ArchiveType::TarZst),
"zip" => Ok(ArchiveType::Zip),
"7z" => Ok(ArchiveType::SevenZ),
ext if is_zip_family_alias(ext) => Ok(ArchiveType::Zip),
_ => Err(ArchiveError::UnknownFormat {
path: path.to_path_buf(),
}),
}
}
fn detect_format_from_magic(path: &Path) -> Option<ArchiveType> {
let mut file = std::fs::File::open(path).ok()?;
let mut buf = [0u8; MAGIC_READ_LEN];
let mut filled = 0;
while filled < buf.len() {
match file.read(&mut buf[filled..]) {
Ok(0) => break,
Ok(n) => filled += n,
Err(e) if e.kind() == std::io::ErrorKind::Interrupted => {}
Err(_) => return None,
}
}
let header = &buf[..filled];
for &(offset, sig, archive_type) in MAGIC_SIGNATURES {
let end = offset.checked_add(sig.len())?;
if header.len() >= end && &header[offset..end] == sig {
return Some(archive_type);
}
}
None
}
#[cfg(test)]
#[allow(clippy::unwrap_used)]
mod tests {
use super::*;
use std::path::PathBuf;
#[test]
fn test_detect_tar() {
let path = PathBuf::from("archive.tar");
assert_eq!(detect_format(&path).unwrap(), ArchiveType::Tar);
}
#[test]
fn test_detect_tar_gz_still_works() {
let path = PathBuf::from("archive.tar.gz");
assert_eq!(detect_format(&path).unwrap(), ArchiveType::TarGz);
let path2 = PathBuf::from("archive.tgz");
assert_eq!(detect_format(&path2).unwrap(), ArchiveType::TarGz);
}
#[test]
fn test_detect_bare_gz_returns_unknown_format() {
let path = PathBuf::from("archive.gz");
assert!(matches!(
detect_format(&path),
Err(ArchiveError::UnknownFormat { .. })
));
}
#[test]
fn test_detect_bare_gz_error_carries_path() {
let path = PathBuf::from("archive.gz");
let err = detect_format(&path).unwrap_err();
assert!(matches!(
err,
ArchiveError::UnknownFormat { path: ref p } if p == &PathBuf::from("archive.gz")
));
}
#[test]
fn test_detect_tar_bz2() {
let path = PathBuf::from("archive.tar.bz2");
assert_eq!(detect_format(&path).unwrap(), ArchiveType::TarBz2);
let path2 = PathBuf::from("archive.tbz");
assert_eq!(detect_format(&path2).unwrap(), ArchiveType::TarBz2);
let path3 = PathBuf::from("archive.tbz2");
assert_eq!(detect_format(&path3).unwrap(), ArchiveType::TarBz2);
}
#[test]
fn test_detect_tar_xz() {
let path = PathBuf::from("archive.tar.xz");
assert_eq!(detect_format(&path).unwrap(), ArchiveType::TarXz);
let path2 = PathBuf::from("archive.txz");
assert_eq!(detect_format(&path2).unwrap(), ArchiveType::TarXz);
}
#[test]
fn test_detect_tar_zst() {
let path = PathBuf::from("archive.tar.zst");
assert_eq!(detect_format(&path).unwrap(), ArchiveType::TarZst);
let path2 = PathBuf::from("archive.tzst");
assert_eq!(detect_format(&path2).unwrap(), ArchiveType::TarZst);
}
#[test]
fn test_detect_zip() {
let path = PathBuf::from("archive.zip");
assert_eq!(detect_format(&path).unwrap(), ArchiveType::Zip);
}
#[test]
fn test_detect_zip_family_extensions() {
for ext in ZIP_FAMILY_ALIASES {
let path = PathBuf::from(format!("archive.{ext}"));
assert_eq!(
detect_format(&path).unwrap(),
ArchiveType::Zip,
"{ext} should detect as ZIP",
);
let upper = PathBuf::from(format!("archive.{}", ext.to_ascii_uppercase()));
assert_eq!(
detect_format(&upper).unwrap(),
ArchiveType::Zip,
"{ext} uppercase should detect as ZIP",
);
}
}
#[test]
fn test_detect_7z() {
let path = PathBuf::from("archive.7z");
assert_eq!(detect_format(&path).unwrap(), ArchiveType::SevenZ);
}
#[test]
fn test_detect_7z_case_insensitive() {
let path = PathBuf::from("ARCHIVE.7Z");
assert_eq!(detect_format(&path).unwrap(), ArchiveType::SevenZ);
let path2 = PathBuf::from("Archive.7Z");
assert_eq!(detect_format(&path2).unwrap(), ArchiveType::SevenZ);
}
#[test]
fn test_detect_unknown_format() {
let path = PathBuf::from("archive.rar");
assert!(matches!(
detect_format(&path),
Err(ArchiveError::UnknownFormat { .. })
));
}
#[test]
fn test_detect_unknown_format_error_carries_path() {
let path = PathBuf::from("archive.rar");
let err = detect_format(&path).unwrap_err();
assert!(matches!(
err,
ArchiveError::UnknownFormat { path: ref p } if p == &PathBuf::from("archive.rar")
));
}
#[test]
fn test_archive_type_sevenz_equality() {
assert_eq!(ArchiveType::SevenZ, ArchiveType::SevenZ);
assert_ne!(ArchiveType::SevenZ, ArchiveType::Zip);
}
#[test]
fn test_archive_type_sevenz_debug() {
let format = ArchiveType::SevenZ;
let debug_str = format!("{format:?}");
assert_eq!(debug_str, "SevenZ");
}
fn write_magic_file(dir: &tempfile::TempDir, name: &str, header: &[u8]) -> PathBuf {
let path = dir.path().join(name);
let mut data = vec![0u8; MAGIC_READ_LEN];
let n = header.len().min(data.len());
data[..n].copy_from_slice(&header[..n]);
std::fs::write(&path, &data).unwrap();
path
}
fn write_magic_file_at_offset(
dir: &tempfile::TempDir,
name: &str,
offset: usize,
sig: &[u8],
) -> PathBuf {
let path = dir.path().join(name);
let mut data = vec![0u8; MAGIC_READ_LEN];
let end = offset + sig.len();
if end <= data.len() {
data[offset..end].copy_from_slice(sig);
}
std::fs::write(&path, &data).unwrap();
path
}
#[test]
fn test_magic_zip_no_extension() {
let dir = tempfile::tempdir().unwrap();
let path = write_magic_file(&dir, "data", b"\x50\x4b\x03\x04");
assert_eq!(detect_format(&path).unwrap(), ArchiveType::Zip);
}
#[test]
fn test_magic_gzip_no_extension() {
let dir = tempfile::tempdir().unwrap();
let path = write_magic_file(&dir, "data", b"\x1f\x8b\x00");
assert_eq!(detect_format(&path).unwrap(), ArchiveType::TarGz);
}
#[test]
fn test_magic_bz2_no_extension() {
let dir = tempfile::tempdir().unwrap();
let path = write_magic_file(&dir, "data", b"\x42\x5a\x68");
assert_eq!(detect_format(&path).unwrap(), ArchiveType::TarBz2);
}
#[test]
fn test_magic_xz_no_extension() {
let dir = tempfile::tempdir().unwrap();
let path = write_magic_file(&dir, "data", b"\xfd\x37\x7a\x58\x5a\x00");
assert_eq!(detect_format(&path).unwrap(), ArchiveType::TarXz);
}
#[test]
fn test_magic_zstd_no_extension() {
let dir = tempfile::tempdir().unwrap();
let path = write_magic_file(&dir, "data", b"\x28\xb5\x2f\xfd");
assert_eq!(detect_format(&path).unwrap(), ArchiveType::TarZst);
}
#[test]
fn test_magic_sevenz_no_extension() {
let dir = tempfile::tempdir().unwrap();
let path = write_magic_file(&dir, "data", b"\x37\x7a\xbc\xaf\x27\x1c");
assert_eq!(detect_format(&path).unwrap(), ArchiveType::SevenZ);
}
#[test]
fn test_magic_tar_ustar_no_extension() {
let dir = tempfile::tempdir().unwrap();
let path = write_magic_file_at_offset(&dir, "data", 257, b"ustar");
assert_eq!(detect_format(&path).unwrap(), ArchiveType::Tar);
}
#[test]
fn test_magic_wins_over_wrong_extension() {
let dir = tempfile::tempdir().unwrap();
let path = write_magic_file(&dir, "archive.zip", b"\x1f\x8b\x00");
assert_eq!(detect_format(&path).unwrap(), ArchiveType::TarGz);
}
#[test]
fn test_extension_wins_when_no_magic_mismatch() {
let dir = tempfile::tempdir().unwrap();
let path = dir.path().join("archive.zip");
std::fs::write(&path, b"\x00\x00\x00\x00").unwrap();
assert_eq!(detect_format(&path).unwrap(), ArchiveType::Zip);
}
#[test]
fn test_nonexistent_file_without_extension_returns_unknown() {
let path = PathBuf::from("/nonexistent/path/to/archive");
assert!(matches!(
detect_format(&path),
Err(ArchiveError::UnknownFormat { .. })
));
}
#[test]
fn test_nonexistent_file_with_known_extension_uses_extension() {
let path = PathBuf::from("/nonexistent/archive.zip");
assert_eq!(detect_format(&path).unwrap(), ArchiveType::Zip);
}
#[test]
fn test_magic_empty_zip_eocd_no_extension() {
let dir = tempfile::tempdir().unwrap();
let path = write_magic_file(&dir, "empty", b"\x50\x4b\x05\x06");
assert_eq!(detect_format(&path).unwrap(), ArchiveType::Zip);
}
#[test]
fn test_magic_split_zip_no_extension() {
let dir = tempfile::tempdir().unwrap();
let path = write_magic_file(&dir, "split", b"\x50\x4b\x07\x08");
assert_eq!(detect_format(&path).unwrap(), ArchiveType::Zip);
}
#[test]
fn test_magic_tar_ustar_exact_boundary_file() {
let dir = tempfile::tempdir().unwrap();
let path = dir.path().join("minimal.tar");
let mut data = vec![0u8; MAGIC_READ_LEN];
data[257..262].copy_from_slice(b"ustar");
std::fs::write(&path, &data).unwrap();
assert_eq!(detect_format(&path).unwrap(), ArchiveType::Tar);
}
#[test]
fn test_magic_tar_ustar_minimal_263_byte_file() {
let dir = tempfile::tempdir().unwrap();
let path = dir.path().join("data");
let mut data = vec![0u8; 263];
data[257..262].copy_from_slice(b"ustar");
std::fs::write(&path, &data).unwrap();
assert_eq!(detect_format(&path).unwrap(), ArchiveType::Tar);
}
}