#![forbid(unsafe_code)]
#![doc = include_str!("../README.md")]
use core::{convert::Infallible, fmt, str::FromStr};
#[derive(Clone, Copy, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub enum ArchiveFormat {
Tar,
Zip,
SevenZip,
Cpio,
Ar,
Iso,
Rar,
Cab,
Warc,
Mtree,
#[default]
Unknown,
}
impl ArchiveFormat {
#[must_use]
pub const fn as_str(self) -> &'static str {
match self {
Self::Tar => "tar",
Self::Zip => "zip",
Self::SevenZip => "7z",
Self::Cpio => "cpio",
Self::Ar => "ar",
Self::Iso => "iso",
Self::Rar => "rar",
Self::Cab => "cab",
Self::Warc => "warc",
Self::Mtree => "mtree",
Self::Unknown => "unknown",
}
}
#[must_use]
pub const fn extension(self) -> Option<&'static str> {
match self {
Self::Tar => Some("tar"),
Self::Zip => Some("zip"),
Self::SevenZip => Some("7z"),
Self::Cpio => Some("cpio"),
Self::Ar => Some("ar"),
Self::Iso => Some("iso"),
Self::Rar => Some("rar"),
Self::Cab => Some("cab"),
Self::Warc => Some("warc"),
Self::Mtree => Some("mtree"),
Self::Unknown => None,
}
}
#[must_use]
pub fn from_label(input: &str) -> Self {
match input.trim().to_ascii_lowercase().as_str() {
"tar" => Self::Tar,
"zip" => Self::Zip,
"7z" | "sevenzip" | "seven-zip" => Self::SevenZip,
"cpio" => Self::Cpio,
"ar" => Self::Ar,
"iso" | "iso9660" | "iso-9660" => Self::Iso,
"rar" | "rar4" | "rar5" => Self::Rar,
"cab" | "cabinet" => Self::Cab,
"warc" | "arc" | "web-archive" => Self::Warc,
"mtree" => Self::Mtree,
_ => Self::Unknown,
}
}
#[must_use]
pub fn from_extension(input: &str) -> Self {
ArchiveEncoding::from_extension(input).archive
}
#[must_use]
pub const fn is_known(self) -> bool {
!matches!(self, Self::Unknown)
}
}
impl fmt::Display for ArchiveFormat {
fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
formatter.write_str(self.as_str())
}
}
impl FromStr for ArchiveFormat {
type Err = Infallible;
fn from_str(input: &str) -> Result<Self, Self::Err> {
Ok(Self::from_label(input))
}
}
#[derive(Clone, Copy, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub enum CompressionFormat {
#[default]
None,
Gzip,
Bzip2,
Xz,
Zstd,
Brotli,
Lz4,
Unknown,
}
impl CompressionFormat {
#[must_use]
pub const fn as_str(self) -> &'static str {
match self {
Self::None => "none",
Self::Gzip => "gzip",
Self::Bzip2 => "bzip2",
Self::Xz => "xz",
Self::Zstd => "zstd",
Self::Brotli => "brotli",
Self::Lz4 => "lz4",
Self::Unknown => "unknown",
}
}
#[must_use]
pub const fn extension(self) -> Option<&'static str> {
match self {
Self::Gzip => Some("gz"),
Self::Bzip2 => Some("bz2"),
Self::Xz => Some("xz"),
Self::Zstd => Some("zst"),
Self::Brotli => Some("br"),
Self::Lz4 => Some("lz4"),
Self::None | Self::Unknown => None,
}
}
#[must_use]
pub fn from_label(input: &str) -> Self {
match input.trim().to_ascii_lowercase().as_str() {
"" | "none" | "stored" => Self::None,
"gz" | "gzip" => Self::Gzip,
"bz2" | "bzip2" => Self::Bzip2,
"xz" | "lzma" => Self::Xz,
"zst" | "zstd" | "zstandard" => Self::Zstd,
"br" | "brotli" => Self::Brotli,
"lz4" => Self::Lz4,
_ => Self::Unknown,
}
}
#[must_use]
pub fn from_extension(input: &str) -> Self {
ArchiveEncoding::from_extension(input).compression
}
#[must_use]
pub const fn is_compressed(self) -> bool {
!matches!(self, Self::None | Self::Unknown)
}
}
impl fmt::Display for CompressionFormat {
fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
formatter.write_str(self.as_str())
}
}
impl FromStr for CompressionFormat {
type Err = Infallible;
fn from_str(input: &str) -> Result<Self, Self::Err> {
Ok(Self::from_label(input))
}
}
#[derive(Clone, Copy, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub struct ArchiveEncoding {
pub archive: ArchiveFormat,
pub compression: CompressionFormat,
}
impl ArchiveEncoding {
#[must_use]
pub const fn new(archive: ArchiveFormat, compression: CompressionFormat) -> Self {
Self {
archive,
compression,
}
}
#[must_use]
pub const fn unknown() -> Self {
Self::new(ArchiveFormat::Unknown, CompressionFormat::Unknown)
}
#[must_use]
pub fn from_extension(input: &str) -> Self {
let normalized = input.trim().to_ascii_lowercase();
let leaf = normalized
.rsplit(['/', '\\'])
.next()
.unwrap_or(normalized.as_str());
let extension_like = leaf.trim_start_matches('.');
let parts = extension_like
.split('.')
.filter(|part| !part.is_empty())
.collect::<Vec<_>>();
let Some(last) = parts.last().copied() else {
return Self::unknown();
};
if Self::is_seven_zip_volume(&parts) {
return Self::new(ArchiveFormat::SevenZip, CompressionFormat::None);
}
if Self::is_rar_part(&parts) {
return Self::new(ArchiveFormat::Rar, CompressionFormat::None);
}
if let Some(previous) = parts
.len()
.checked_sub(2)
.and_then(|index| parts.get(index))
.copied()
&& let Some(encoding) = Self::from_combined_parts(previous, last)
{
return encoding;
}
Self::from_single_extension(last)
}
fn from_combined_parts(previous: &str, last: &str) -> Option<Self> {
let compression = Self::compression_from_extension_part(last)?;
let archive = match previous {
"tar" => ArchiveFormat::Tar,
"cpio" => ArchiveFormat::Cpio,
"warc" | "arc" => ArchiveFormat::Warc,
"mtree" => ArchiveFormat::Mtree,
_ => return None,
};
Some(Self::new(archive, compression))
}
fn compression_from_extension_part(extension: &str) -> Option<CompressionFormat> {
match extension {
"gz" | "gzip" => Some(CompressionFormat::Gzip),
"bz2" | "bzip2" => Some(CompressionFormat::Bzip2),
"xz" | "lzma" => Some(CompressionFormat::Xz),
"zst" | "zstd" => Some(CompressionFormat::Zstd),
"br" | "brotli" => Some(CompressionFormat::Brotli),
"lz4" => Some(CompressionFormat::Lz4),
_ => None,
}
}
fn is_seven_zip_volume(parts: &[&str]) -> bool {
let Some(last) = parts.last().copied() else {
return false;
};
let Some(previous) = parts
.len()
.checked_sub(2)
.and_then(|index| parts.get(index))
.copied()
else {
return false;
};
previous == "7z" && is_three_digit_part(last)
}
fn is_rar_part(parts: &[&str]) -> bool {
let Some(last) = parts.last().copied() else {
return false;
};
if is_rar_old_part(last) {
return true;
}
let Some(previous) = parts
.len()
.checked_sub(2)
.and_then(|index| parts.get(index))
.copied()
else {
return false;
};
last == "rar" && is_part_label(previous)
}
fn from_single_extension(extension: &str) -> Self {
match extension {
"tgz" => Self::new(ArchiveFormat::Tar, CompressionFormat::Gzip),
"tbz" | "tbz2" => Self::new(ArchiveFormat::Tar, CompressionFormat::Bzip2),
"txz" | "tlz" => Self::new(ArchiveFormat::Tar, CompressionFormat::Xz),
"tzst" => Self::new(ArchiveFormat::Tar, CompressionFormat::Zstd),
"tbr" => Self::new(ArchiveFormat::Tar, CompressionFormat::Brotli),
"tar" => Self::new(ArchiveFormat::Tar, CompressionFormat::None),
"zip" => Self::new(ArchiveFormat::Zip, CompressionFormat::None),
"7z" => Self::new(ArchiveFormat::SevenZip, CompressionFormat::None),
"cpio" => Self::new(ArchiveFormat::Cpio, CompressionFormat::None),
"a" | "ar" | "deb" => Self::new(ArchiveFormat::Ar, CompressionFormat::None),
"iso" | "img" => Self::new(ArchiveFormat::Iso, CompressionFormat::None),
"rar" => Self::new(ArchiveFormat::Rar, CompressionFormat::None),
"cab" => Self::new(ArchiveFormat::Cab, CompressionFormat::None),
"warc" | "arc" => Self::new(ArchiveFormat::Warc, CompressionFormat::None),
"mtree" => Self::new(ArchiveFormat::Mtree, CompressionFormat::None),
"gz" | "gzip" => Self::new(ArchiveFormat::Unknown, CompressionFormat::Gzip),
"bz2" | "bzip2" => Self::new(ArchiveFormat::Unknown, CompressionFormat::Bzip2),
"xz" | "lzma" => Self::new(ArchiveFormat::Unknown, CompressionFormat::Xz),
"zst" | "zstd" => Self::new(ArchiveFormat::Unknown, CompressionFormat::Zstd),
"br" | "brotli" => Self::new(ArchiveFormat::Unknown, CompressionFormat::Brotli),
"lz4" => Self::new(ArchiveFormat::Unknown, CompressionFormat::Lz4),
_ => Self::unknown(),
}
}
#[must_use]
pub fn from_filename(input: &str) -> Self {
Self::from_extension(input)
}
#[must_use]
pub const fn has_archive(self) -> bool {
self.archive.is_known()
}
#[must_use]
pub const fn is_compressed(self) -> bool {
self.compression.is_compressed()
}
}
fn is_three_digit_part(part: &str) -> bool {
part.len() == 3 && part.bytes().all(|byte| byte.is_ascii_digit())
}
fn is_rar_old_part(part: &str) -> bool {
let bytes = part.as_bytes();
bytes.len() == 3 && bytes[0] == b'r' && bytes[1].is_ascii_digit() && bytes[2].is_ascii_digit()
}
fn is_part_label(part: &str) -> bool {
let Some(number) = part.strip_prefix("part") else {
return false;
};
!number.is_empty() && number.bytes().all(|byte| byte.is_ascii_digit())
}
impl fmt::Display for ArchiveEncoding {
fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
if self.compression == CompressionFormat::None {
formatter.write_str(self.archive.as_str())
} else {
write!(formatter, "{}+{}", self.archive, self.compression)
}
}
}
#[cfg(test)]
mod tests {
use super::{ArchiveEncoding, ArchiveFormat, CompressionFormat};
#[test]
fn detects_common_archive_encodings() {
assert_eq!(
ArchiveEncoding::from_extension("release.tar.zst"),
ArchiveEncoding::new(ArchiveFormat::Tar, CompressionFormat::Zstd)
);
assert_eq!(
ArchiveEncoding::from_extension("bundle.tgz"),
ArchiveEncoding::new(ArchiveFormat::Tar, CompressionFormat::Gzip)
);
assert_eq!(
ArchiveEncoding::from_extension("assets.zip"),
ArchiveEncoding::new(ArchiveFormat::Zip, CompressionFormat::None)
);
assert_eq!(
ArchiveEncoding::from_extension("initramfs.cpio.gz"),
ArchiveEncoding::new(ArchiveFormat::Cpio, CompressionFormat::Gzip)
);
assert_eq!(
ArchiveEncoding::from_extension("crawl.warc.gz"),
ArchiveEncoding::new(ArchiveFormat::Warc, CompressionFormat::Gzip)
);
assert_eq!(
ArchiveEncoding::from_extension("manifest.mtree.gz"),
ArchiveEncoding::new(ArchiveFormat::Mtree, CompressionFormat::Gzip)
);
}
#[test]
fn detects_extension_labels() {
assert_eq!(ArchiveFormat::from_extension(".tar"), ArchiveFormat::Tar);
assert_eq!(
ArchiveFormat::from_extension("libexample.a"),
ArchiveFormat::Ar
);
assert_eq!(
ArchiveFormat::from_extension("installer.img"),
ArchiveFormat::Iso
);
assert_eq!(
ArchiveFormat::from_extension("bundle.7z.001"),
ArchiveFormat::SevenZip
);
assert_eq!(
ArchiveFormat::from_extension("backup.part1.rar"),
ArchiveFormat::Rar
);
assert_eq!(
ArchiveFormat::from_extension("driver.cab"),
ArchiveFormat::Cab
);
assert_eq!(
ArchiveFormat::from_extension("crawl.arc.gz"),
ArchiveFormat::Warc
);
assert_eq!(
CompressionFormat::from_extension("xz"),
CompressionFormat::Xz
);
}
#[test]
fn preserves_unknown_archive_and_compression() {
let encoding = ArchiveEncoding::from_filename("notes.txt");
assert_eq!(encoding.archive, ArchiveFormat::Unknown);
assert_eq!(encoding.compression, CompressionFormat::Unknown);
}
}